From 2c72e662bad11a41839780f86680d4bb05367c78 Mon Sep 17 00:00:00 2001 From: Dan Gudmundsson Date: Mon, 3 Apr 2017 12:19:21 +0200 Subject: New unicode aware string module that works with unicode:chardata() Works with unicode:chardata() as input as was decided on OTP board meeting as response to EEP-35 a long time ago. Works on graphemes clusters as base, with a few exceptions, does not handle classic (nor nfd'ified) Hangul nor the extended grapheme clusters such as the prepend class. That would make handling binaries as input/output very slow. List input => list output, binary input => binary output and mixed input => mixed output for all find/split functions. So that results can be post-processed without the need to invoke unicode:characters_to_list|binary for intermediate data. pad functions return lists of unicode:chardata() for performance. --- lib/stdlib/test/string_SUITE.erl | 893 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 872 insertions(+), 21 deletions(-) (limited to 'lib/stdlib/test/string_SUITE.erl') diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index 836f9e5142..a78ddf761b 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -29,25 +29,46 @@ -export([init_per_testcase/2, end_per_testcase/2]). %% Test cases must be exported. --export([len/1,equal/1,concat/1,chr_rchr/1,str_rstr/1]). --export([span_cspan/1,substr/1,tokens/1,chars/1]). +-export([is_empty/1, length/1, to_graphemes/1, + reverse/1, slice/1, + equal/1, + pad/1, trim/1, chomp/1, take/1, + uppercase/1, lowercase/1, titlecase/1, casefold/1, + prefix/1, split/1, replace/1, find/1, + lexemes/1, nth_lexeme/1, cd_gc/1, meas/1 + ]). + +-export([len/1,old_equal/1,old_concat/1,chr_rchr/1,str_rstr/1]). +-export([span_cspan/1,substr/1,old_tokens/1,chars/1]). -export([copies/1,words/1,strip/1,sub_word/1,left_right/1]). -export([sub_string/1,centre/1, join/1]). -export([to_integer/1,to_float/1]). -export([to_upper_to_lower/1]). +%% Run tests when debugging them +-export([debug/0]). + suite() -> [{ct_hooks,[ts_install_cth]}, {timetrap,{minutes,1}}]. -all() -> - [len, equal, concat, chr_rchr, str_rstr, span_cspan, - substr, tokens, chars, copies, words, strip, sub_word, - left_right, sub_string, centre, join, to_integer, - to_float, to_upper_to_lower]. +all() -> + [{group, chardata}, {group, list_string}]. -groups() -> - []. +groups() -> + [{chardata, + [is_empty, length, to_graphemes, + equal, reverse, slice, + pad, trim, chomp, take, + lexemes, nth_lexeme, + uppercase, lowercase, titlecase, casefold, + prefix, find, split, replace, cd_gc, + meas]}, + {list_string, + [len, old_equal, old_concat, chr_rchr, str_rstr, span_cspan, + substr, old_tokens, chars, copies, words, strip, sub_word, + left_right, sub_string, centre, join, to_integer, + to_float, to_upper_to_lower]}]. init_per_suite(Config) -> Config. @@ -68,8 +89,839 @@ init_per_testcase(_Case, Config) -> end_per_testcase(_Case, _Config) -> ok. +debug() -> + Config = [{data_dir, ?MODULE_STRING++"_data"}], + [io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) || + {_,Tests} <- groups(), Test <- Tests]. + +-define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)). +-define(TEST_EQ(B,C,D), + test(?LINE,?FUNCTION_NAME,B,C,D, true), + test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)). + +-define(TEST_NN(B,C,D), + test(?LINE,?FUNCTION_NAME,B,C,D, false), + test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C)],D, false)). + + +is_empty(_) -> + ?TEST("", [], true), + ?TEST([""|<<>>], [], true), + ?TEST("a", [], false), + ?TEST([""|<<$a>>], [], false), + ?TEST(["",[<<>>]], [], true), + ok. + +length(_) -> + %% invalid arg type + {'EXIT',_} = (catch string:length({})), + {'EXIT',_} = (catch string:length(foo)), + %% Valid signs + ?TEST("", [], 0), + ?TEST([""|<<>>], [], 0), + L = tuple_size(list_to_tuple(atom_to_list(?MODULE))), + ?TEST(atom_to_list(?MODULE), [], L), + ?TEST("Hello", [], 5), + ?TEST("UC Ω ßð", [], 7), + ?TEST(["abc"|<<"abc">>], [], 6), + ?TEST(["abc",["def"]], [], 6), + ?TEST([<<97/utf8, 778/utf8, 98/utf8>>, [776,111,776]], [], 3), %% åäö in nfd + ok. + +equal(_) -> + %% invalid arg type + {'EXIT',_} = (catch string:equal(1, 2)), + {'EXIT',_} = (catch string:equal(1, 2, foo)), + {'EXIT',_} = (catch string:equal(1, 2, true, foo)), + + ?TEST("", [<<"">>], true), + ?TEST("Hello", ["Hello"], true), + ?TEST("Hello", ["Hell"], false), + ?TEST("Hello", ["Hello!"], false), + ?TEST("Hello", [<<"Hello"/utf8>>], true), + ?TEST("Hello", [<<"Mello"/utf8>>], false), + ?TEST("Hello", [<<"Hello!"/utf8>>], false), + ?TEST(["Hello",[" deep"]], ["Hello deep"], true), + ?TEST(["Hello",[<<" deep"/utf8>>]], ["Hello deep"], true), + ?TEST("Hello deep", [["Hello", [" deep"]]], true), + ?TEST("Hello deep", [["Hello", [" d!eep"]]], false), + ?TEST("Hello deep", [["Hello", [<<" deep"/utf8>>]]], true), + false = string:equal("Åäö", [<<97/utf8, 778/utf8, 98/utf8>>, [776,111,776]]), %% nfc vs nfd + + %% case_insensitive_equal() + ?TEST("", ["", true], true), + ?TEST("a", ["b", true], false), + ?TEST("", [<<>>, true], true), + ?TEST("", [[<<>>,[]], true], true), + ?TEST("", [[<<>>,[$a]], true], false), + ?TEST("123", ["123", true], true), + ?TEST("abc", ["abc", true], true), + ?TEST([[],<<>>,"ABC"|<<>>], [["abc",[]], true], true), + ?TEST("ABCa", ["abcå", true], false), + ?TEST("åäö", [{norm,"åäö"}, true], true), + ?TEST("ÅÄÖ", [{norm,"åäö"}, true], true), + ?TEST("MICHAŁ", ["michał", true], true), + ?TEST(["Mic",<<"HAŁ"/utf8>>], ["michał", true], true), + ?TEST("ß SHARP S", ["ss sharp s", true], true), + ?TEST("ẞ SHARP S", [[<<$ß/utf8, $\s>>,"SHARP S"], true], true), + ?TEST("ẞ SHARP ß", ["ss sharp s", true], false), + ?TEST(<<"İ I WITH DOT ABOVE"/utf8>>, ["i̇ i with dot above", true], true), + %% These should be equivalent with the above + true = string:equal(string:casefold(["Mic",<<"HAŁ"/utf8>>]), string:casefold("michał")), + true = string:equal(string:casefold("ẞ SHARP S"), string:casefold([<<$ß/utf8, $\s>>,"SHARP S"])), + false = string:equal(string:casefold("ẞ SHARP ß"), string:casefold("ss sharp s")), + + %% Normalization + ?TEST_NN("", ["", true, none], true), + ?TEST_NN("a", ["b", true, nfc], false), + ?TEST_NN("a", ["b", true, nfd], false), + ?TEST_NN("a", ["b", true, nfkc], false), + ?TEST_NN("a", ["b", true, nfkd], false), + + ?TEST_NN("a", ["A", false, nfc], false), + ?TEST_NN("a", ["A", false, nfd], false), + ?TEST_NN([<<>>,"a"|<<>>], ["A", true, nfkc], true), + ?TEST_NN(<<"a">>, ["A", true, nfkd], true), + + ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, none], false), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfc], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfd], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfkc], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfkd], true), + + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, none], false), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", false, nfc], false), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfc], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfd], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfkc], true), + ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfkd], true), + + ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abホンダ", true, none], false), + ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abホンダ", true, nfc], false), + ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abホンダ", true, nfd], false), + ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abホンダ", true, nfkc], true), + ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abホンダ", true, nfkd], true), + + ?TEST_NN([$a, <<$b>>, "32"], ["ab32", true, none], false), + ?TEST_NN([$a, <<$b>>, "32"], ["ab32", true, nfc], false), + ?TEST_NN([$a, <<$b>>, "32"], ["ab32", true, nfd], false), + ?TEST_NN([$a, <<$b>>, "32"], ["ab32", true, nfkc], true), + ?TEST_NN([$a, <<$b>>, "32"], ["ab32", true, nfkd], true), + + %% Coverage. + ?TEST("", [<<"">>, false, nfc], true), + ?TEST("", [<<"">>, true, nfc], true), + + ok. + +to_graphemes(_) -> + %% More tests are in unicode_util_SUITE.erl + {'EXIT', _} = (catch unicode:characters_to_nfd_binary(["asdåäö", an_atom])), + String = ["abc..åäö", $e, 788, <<"Ωµe`è"/utf8>>, "œŒþæÆħ§ß"], + NFD = unicode:characters_to_nfd_list(String), + [] = string:to_graphemes([]), + [] = string:to_graphemes(<<>>), + GCs = string:to_graphemes(String), + true = erlang:length(GCs) =:= string:length(String), + true = erlang:length(GCs) =:= erlang:length(string:to_graphemes(NFD)), + true = erlang:length(GCs) =:= + erlang:length(string:to_graphemes(unicode:characters_to_nfc_list(String))), + ok. + +reverse(_) -> + {'EXIT',_} = (catch string:reverse(2)), + Str1 = "Hello ", + Str2 = "Ω ßð", + Str3 = "åäö", + ?TEST("", [], ""), + ?TEST(Str1, [], lists:reverse(Str1)), + ?TEST(Str2, [], lists:reverse(Str2)), + ?TEST(Str3, [], lists:reverse(Str3)), + true = string:reverse(Str3) =:= lists:reverse(string:to_graphemes(Str3)), + ok. + +slice(_) -> + {'EXIT',_} = (catch string:slice(2, 2, 2)), + {'EXIT',_} = (catch string:slice("asd", foo, 2)), + {'EXIT',_} = (catch string:slice("asd", 2, -1)), + ?TEST("", [3], ""), + ?TEST("aåä", [1, 0], ""), + ?TEST("aåä", [3], ""), + ?TEST("aåäöbcd", [3], "öbcd"), + ?TEST([<<"aå"/utf8>>,"äöbcd"], [3], "öbcd"), + ?TEST([<<"aåä"/utf8>>,"öbcd"], [3], "öbcd"), + ?TEST([<<"aåä"/utf8>>,"öbcd"], [3, infinity], "öbcd"), + + ?TEST("", [3, 2], ""), + ?TEST("aåä", [3, 2], ""), + ?TEST("aåäöbcd", [3,2], "öb"), + ?TEST([<<"aå"/utf8>>,"äöbcd"], [3,3], "öbc"), + ?TEST([<<"aåä"/utf8>>,"öbcd"], [3,10], "öbcd"), + + ok. + +pad(_) -> + Str = "Hallå", + ?TEST(Str, [7], "Hallå "), + ?TEST(Str, [7, leading], " Hallå"), + ?TEST(Str, [4, both, $.], "Hallå"), + ?TEST(Str, [10, both, $.], "..Hallå..."), + ?TEST(Str, [10, leading, $.], ".....Hallå"), + ?TEST(Str, [10, trailing, $.], "Hallå....."), + ?TEST(Str++["f"], [10, trailing, $.], "Hallåf...."), + ?TEST(Str++[" flåwer"], [10, trailing, $.], "Hallå flåwer"), + ok. + +trim(_) -> + Str = "\t\s..Ha\s.llå..\t\n\r", + ?TEST("", [], ""), + ?TEST(Str, [both, "x"], Str), + ?TEST(Str, [leading], "..Ha\s.llå..\t\n\r"), + ?TEST(Str, [trailing], "\t\s..Ha\s.llå.."), + ?TEST(Str, [], "..Ha .llå.."), + ?TEST(".. ", [both, ""], ".. "), + ?TEST([<<".. ">>], [both, ". "], ""), + ?TEST(".. h.ej ..", [leading, ". "], "h.ej .."), + ?TEST(".. h.ej ..", [trailing, ". "], ".. h.ej"), + ?TEST(".. h.ej ..", [both, ". "], "h.ej"), + ?TEST(["..", <<"h.ej">>, ".."], [both, ". "], "h.ej"), + ?TEST([[], "..", " h.ej ", <<"..">>], [both, ". "], "h.ej"), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [both, ". "], "h.ej"), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [trailing, ". "], ".. h.ej"), + ?TEST([<<".. h.ej .">>, <<"..">>], [both, ". "], "h.ej"), + ?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"), + ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"), + %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST("aaåaa", [both, "a"], "å"), + ?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"), + ?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"), + ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [both, [[$e,778]]], "åäö"), + ?TEST([[<<"!v">>|<<204,128,$v,204,129>>]],[trailing, [[$v,769]]], [$!,$v,768]), + ?TEST([[[<<"v">>|<<204,129,118,204,128,118>>],769,118,769]], [trailing, [[118,769]]], [$v,769,$v,768]), + ?TEST([<<"vv">>|<<204,128,118,204,128>>], [trailing, [[118,768]]], "v"), + ok. + +chomp(_) -> + Str = "åäö\na\r\nsd\n", + Res = "åäö\na\r\nsd", + ?TEST("", [], ""), + ?TEST("\n", [], ""), + ?TEST("str \t", [], "str \t"), + ?TEST("str \t\n\r", [], "str \t\n\r"), + ?TEST(Str, [], Res), + ?TEST([Str,$\n], [], Res), + ?TEST([Str|"\n"], [], Res), + ?TEST([Str|<<"\n">>], [], Res), + ?TEST([Str,$\r|<<"\n">>], [], Res), + ?TEST([Str, <<$\r>>|"\n"], [], Res), + ?TEST([<<$a,$\r>>,"\na\n"], [], "a\r\na"), + ok. + +take(_) -> + Str = "\t\s..Ha\s.llå..\t\n\r", + WS = "\t\s\n\r", + Chars = lists:seq($a,$z)++lists:seq($A,$Z), + %% complement=false, dir=leading + ?TEST("", ["abc"], {"",""}), + ?TEST(Str, ["x"], {[], Str}), + ?TEST(Str, [WS], {"\t\s","..Ha\s.llå..\t\n\r"}), + ?TEST(".. ", ["", false], {"", ".. "}), + ?TEST([<<".. ">>], [". ", false, leading], {".. ", ""}), + ?TEST(".. h.ej ..", [". ", false, leading], {".. ", "h.ej .."}), + ?TEST(["..", <<"h.ej">>, ".."], [". ", false, leading], {"..", "h.ej.."}), + ?TEST([[], "..", " h.ej ", <<"..">>], [". ", false, leading], {".. ","h.ej .."}), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [". ", false, leading], {".. ", "h.ej .."}), + ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [". ", false, leading], {"..", "h.ejsan.."}), + ?TEST([[<<101,204,138,33>>]], [[[$e,778]]], {[$e,778], "!"}), + %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST("aaåaa", ["a", false, leading], {"aa", "åaa"}), + ?TEST(["aaa",778,"äöoo"], ["ao", false, leading], {"aa", "åäöoo"}), + ?TEST([<<"aaa">>,778,"äöoo"], ["ao",false,leading], {"aa", "åäöoo"}), + ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], false, leading], {[$e,778],"åäöe"++[778]}), + + %% complement=true, dir=leading + ?TEST("", ["abc", true], {"",""}), + ?TEST(Str, ["x", true], {Str, []}), + ?TEST(Str, [Chars, true], {"\t\s..","Ha\s.llå..\t\n\r"}), + ?TEST(".. ", ["",true], {".. ", ""}), + ?TEST([<<".. ">>], [Chars, true, leading], {".. ", ""}), + ?TEST(".. h.ej ..", [Chars, true, leading], {".. ", "h.ej .."}), + ?TEST(["..", <<"h.ej">>, ".."], [Chars, true, leading], {"..", "h.ej.."}), + ?TEST([[], "..", " h.ej ", <<"..">>], [Chars, true, leading], {".. ","h.ej .."}), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}), + ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}), + %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}), + ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}), + ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}), + + %% complement=false, dir=trailing + ?TEST(Str, ["", false, trailing], {Str, []}), + ?TEST(Str, ["x", false, trailing], {Str, []}), + ?TEST(Str, [WS, false,trailing], {"\t\s..Ha\s.llå..", "\t\n\r"}), + ?TEST(".. h.ej ..", [". ", false, trailing], {".. h.ej", " .."}), + ?TEST(["..", <<"h.ej">>, ".."], [". ", false, trailing], {"..h.ej", ".."}), + ?TEST([[], "..", " h.ej ", <<"..">>], [". ", false, trailing], {".. h.ej", " .."}), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [". ", false, trailing], {".. h.ej", " .."}), + ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [". ", false, trailing], {"..h.ejsan", ".."}), + ?TEST("aaåaa", ["a", false, trailing], {"aaå", "aa"}), + ?TEST([<<"KMШ"/utf8>>], [[1064], false, trailing], {"KMШ",[]}), + ?TEST([[<<"!\"">>|<<"\"">>]], ["\"", false, trailing], {"!", "\"\""}), + ?TEST([<<$v>>, 769], [[[$v,769]], false, trailing], {"", [$v,769]}), + ?TEST(["aaa",778,"äöoo"], ["ao", false, trailing], {"aaåäö", "oo"}), + ?TEST([<<"aaa">>,778,"äöoo"], ["ao", false, trailing], {"aaåäö", "oo"}), + ?TEST([<<"e">>,778,"åäöee", <<778/utf8>>], [[[$e,778]], false, trailing], {[$e,778|"åäöe"], [$e,778]}), + + %% complement=true, dir=trailing + ?TEST("", ["abc", true, trailing], {"",""}), + ?TEST(Str, ["x", true, trailing], {[], Str}), + %?TEST(Str, [{norm,Chars}, true, trailing], {"\t\s..Ha\s.ll","å..\t\n\r"}), + ?TEST(".. ", ["", true, trailing], {"", ".. "}), + ?TEST([<<".. ">>], [Chars, true, trailing], {"", ".. "}), + ?TEST(".. h.ej ..", [Chars, true, trailing], {".. h.ej", " .."}), + ?TEST(["..", <<"h.ej">>, ".."], [Chars, true, trailing], {"..h.ej", ".."}), + ?TEST([[], "..", " h.ej ", <<"..">>], [Chars, true, trailing], {".. h.ej"," .."}), + ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, trailing], {".. h.ej"," .."}), + ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, trailing], {"..h.ejsan", ".."}), + ?TEST([[<<101,204,138,33>>]], [[[$e,778]], true, trailing], {[$e,778], "!"}), + ?TEST([<<"Fa">>], [[$F], true, trailing], {"F", "a"}), + ?TEST([[<<101,101,204,138>>,1045,778]], ["e", true, trailing], {"e", [101,778,1045,778]}), + ?TEST([[<<101,101,204,138>>,<<1045/utf8,778/utf8>>]], ["e", true, trailing], {"e", [101,778,1045,778]}), + ?TEST([[[118,769,118],<<204,129,118,204,129,120,204,128,118>>,768,120,768]], + [[[118,769]], true, trailing], {[118,769,118,769,118,769],[120,768,118,768,120,768]}), + ?TEST([[<<118,204,128,118>>|<<204,128,118,204,128,118,204,128,206,132,204,129,206,132,204,129>>]], + [[[118,768]], true, trailing], {[118,768,118,768,118,768,118,768], [900,769,900,769]}), + %% Test that it behaves with graphemes (i.e. nfd tests are the hard part) + ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, trailing], {"aae"++[$e,778], "äöoo"}), + ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,trailing], {"aa"++[$e,778], "äöoo"}), + ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, trailing], {[$e,778]++"åäöe"++[778], []}), + ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>, $e, 779], [[[$e,778]], true, trailing], + {[$e,778]++"åäöe"++[778], [$e,779]}), + + ok. + + +uppercase(_) -> + ?TEST("", [], ""), + ?TEST("123", [], "123"), + ?TEST("abc", [], "ABC"), + ?TEST("ABC", [], "ABC"), + ?TEST("abcdefghiljklmnopqrstvxyzåäö",[], "ABCDEFGHILJKLMNOPQRSTVXYZÅÄÖ"), + ?TEST("åäö", [], "ÅÄÖ"), + ?TEST("ÅÄÖ", [], "ÅÄÖ"), + ?TEST("Michał", [], "MICHAŁ"), + ?TEST(["Mic",<<"hał"/utf8>>], [], "MICHAŁ"), + ?TEST("ljLJ", [], "LJLJ"), + ?TEST("LJlj", [], "LJLJ"), + ?TEST("ß sharp s", [], "SS SHARP S"), + ok. + +lowercase(_) -> + ?TEST("", [], ""), + ?TEST("123", [], "123"), + ?TEST("abc", [], "abc"), + ?TEST("ABC", [], "abc"), + ?TEST("åäö", [], "åäö"), + ?TEST("ÅÄÖ", [], "åäö"), + ?TEST("MICHAŁ", [], "michał"), + ?TEST(["Mic",<<"HAŁ"/utf8>>], [], "michał"), + ?TEST("ß SHARP S", [], "ß sharp s"), + ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"), + ok. + +titlecase(_) -> + ?TEST("", [], ""), + ?TEST("123", [], "123"), + %% Titlecase is the same as uppercase for most chars + [?TEST([C,$x], [], string:uppercase([C])++[$x]) || + C <-"abcdefghiljklmnopqrstvxyzåäö"], + %% Example of a different mapping + ?TEST("ljusad", [],"Ljusad"), + ?TEST("ljLJ", [], "LjLJ"), + ?TEST("LJlj", [], "Ljlj"), + ?TEST("ß sharp s", [], "Ss sharp s"), + ok. + +casefold(_) -> + ?TEST("", [], ""), + ?TEST("123", [], "123"), + ?TEST("abc", [], "abc"), + ?TEST("ABC", [], "abc"), + ?TEST("åäö", [], "åäö"), + ?TEST("ÅÄÖ", [], "åäö"), + ?TEST("MICHAŁ", [], "michał"), + ?TEST(["Mic",<<"HAŁ"/utf8>>], [], "michał"), + ?TEST("ß SHARP S", [], "ss sharp s"), + ?TEST("ẞ SHARP S", [], "ss sharp s"), + ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"), + ok. + +prefix(_) -> + ?TEST("", ["a"], nomatch), + ?TEST("a", [""], "a"), + ?TEST("b", ["a"], nomatch), + ?TEST("a", ["a"], ""), + ?TEST("å", ["a"], nomatch), + ?TEST(["a",<<778/utf8>>], ["a"], nomatch), + ?TEST([<<"a"/utf8>>,778], ["a"], nomatch), + ?TEST("hejsan", [""], "hejsan"), + ?TEST("hejsan", ["hej"], "san"), + ?TEST("hejsan", ["hes"], nomatch), + ?TEST(["h", "ejsan"], ["hej"], "san"), + ?TEST(["h", "e", "jsan"], ["hej"], "san"), + ?TEST(["h", "e", "san"], ["hej"], nomatch), + ?TEST(["h", <<"ejsan">>], ["hej"], "san"), + ?TEST(["h", <<"e">>, "jsan"], ["hej"], "san"), + ?TEST(["h", "e", <<"jsan">>], ["hej"], "san"), + ok. + +split(_) -> + Mod = fun(Res) -> + [lists:flatten(unicode:characters_to_nfc_list(io_lib:format("~ts", [Str]))) + || Str <- Res] end, + ?TEST("..", ["", leading], {Mod, [".."]}), + ?TEST("..", ["..", leading], {Mod, [[],[]]}), + ?TEST("abcd", ["..", leading], {Mod, ["abcd"]}), + ?TEST("ab..bc", ["..", leading], {Mod, ["ab","bc"]}), + ?TEST("ab..bc..cd", ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST("..ab", [".."], {Mod, [[],"ab"]}), + ?TEST("ab..", ["..", leading], {Mod, ["ab",[]]}), + ?TEST(["ab..bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab","..bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab",<<"..bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab.",".bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab.",<<".bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab..","bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab..",<<"bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}), + ?TEST(["ab.","bc..cd"], ["..", leading], {Mod, ["ab.bc","cd"]}), + ?TEST("ab...bc", ["..", leading], {Mod, ["ab",".bc"]}), + + ?TEST("..", ["", trailing], {Mod, [".."]}), + ?TEST("..", ["..", trailing], {Mod, [[],[]]}), + ?TEST("abcd", ["..", trailing], {Mod, ["abcd"]}), + ?TEST("ab..bc", ["..", trailing], {Mod, ["ab","bc"]}), + ?TEST("ab..bc..cd", ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST("..ab", ["..", trailing], {Mod, [[],"ab"]}), + ?TEST("ab..", ["..", trailing], {Mod, ["ab",[]]}), + ?TEST(["ab..bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab","..bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab"|<<"a">>], ["a", trailing], {Mod, ["ab",[]]}), + ?TEST(["ab",<<"..bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST([<<"ab.">>,".bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab.",<<".bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab..","bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab..",<<"bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}), + ?TEST(["ab.","bc..cd"], ["..", trailing], {Mod, ["ab.bc","cd"]}), + ?TEST("ab...bc", ["..", trailing], {Mod, ["ab.","bc"]}), + + ?TEST("..", ["..", all], {Mod, [[],[]]}), + ?TEST("abcd", ["..", all], {Mod, ["abcd"]}), + ?TEST("a..b", ["..", all], {Mod, ["a","b"]}), + ?TEST("a..b..c", ["..", all], {Mod, ["a","b","c"]}), + ?TEST("a..", ["..", all], {Mod, ["a",[]]}), + ?TEST(["a..b..c"], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a","..b..c"], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a",<<"..b..c">>], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a.",".b..c"], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a.",<<".b..c">>], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a..","b..c"], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a..",<<"b..c">>], ["..", all], {Mod, ["a","b","c"]}), + ?TEST(["a.","b..c"], ["..", all], {Mod, ["a.b","c"]}), + ?TEST("a...b", ["..", all], {Mod, ["a",".b"]}), + + %% Grapheme (split) tests + ?TEST("aΩΩb", ["Ω", all], {Mod, ["a","","b"]}), + ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], leading], {Mod, ["aa","äöoo"]}), + ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], trailing], {Mod, ["aa","äöoo"]}), + ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], all], {Mod, ["aa","äöoo"]}), + ?TEST([<<"aae">>,778,"öeeåäö"], ["e", leading], {Mod, [[$a, $a, $e,778,$ö],"eåäö"]}), + ?TEST([<<"aae">>,778,"öeeåäö"], ["e", trailing], {Mod, [[$a, $a, $e,778,$ö, $e],"åäö"]}), + ?TEST([<<"aae">>,778,"öeeåäö"], ["e", all], {Mod, [[$a, $a, $e,778,$ö],"", "åäö"]}), + + ok. + +replace(_) -> + ?TEST(["a..b.", [".c"]], ["xxx", "::"], "a..b..c"), + ?TEST(["a..b.", [".c"]], ["..", "::"], "a::b..c"), + ?TEST([<<"a..b.">>, [".c"]], ["..", "::", trailing], "a..b::c"), + ?TEST(["a..b.", [".c"]], ["..", "::", all], "a::b::c"), + ok. + +cd_gc(_) -> + [] = string:next_codepoint(""), + [] = string:next_codepoint(<<>>), + [] = string:next_codepoint([<<>>]), + "abcd" = string:next_codepoint("abcd"), + [$e,778] = string:next_codepoint([$e,778]), + [$e|<<204,138>>] = string:next_codepoint(<<$e,778/utf8>>), + [778|_] = string:next_codepoint(tl(string:next_codepoint(<<$e,778/utf8>>))), + + [] = string:next_grapheme(""), + [] = string:next_grapheme(<<>>), + [] = string:next_grapheme([<<>>]), + "abcd" = string:next_grapheme("abcd"), + [[$e,778]] = string:next_grapheme([$e,778]), + [[$e,778]] = string:next_grapheme(<<$e,778/utf8>>), + + ok. + + +find(_) -> + ?TEST(["h", "ejsan"], [""], "hejsan"), + ?TEST(["h", "ejsan"], [<<>>], "hejsan"), + ?TEST([], [""], ""), + ?TEST([], ["hej"], nomatch), + ?TEST(["h", "ejsan"], ["hej"], "hejsan"), + ?TEST(["h", "e", "jsan"], ["hej"], "hejsan"), + ?TEST(["xh", "e", "san"], ["hej"], nomatch), + ?TEST([<<"xh">>, <<"ejsan">>], ["hej"], "hejsan"), + ?TEST(["xh", <<"ejsan">>], ["hej"], "hejsan"), + ?TEST(["xh", <<"e">>, "jsan"], ["hej"], "hejsan"), + ?TEST(["xh", "e", <<"jsan">>], ["hej"], "hejsan"), + ?TEST(["xh", "er", <<"ljsane">>, "rlang"], ["erl", leading], "erljsanerlang"), + ?TEST("aΩΩb", ["Ω", leading], "ΩΩb"), + ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], leading], [$e,778]++"äöoo"), + ?TEST([<<"aae">>,778,"öeeåäö"], ["e", leading], "eeåäö"), + + ?TEST(["h", "ejsan"], ["", trailing], "hejsan"), + ?TEST([], ["", trailing], ""), + ?TEST([], ["hej", trailing], nomatch), + ?TEST(["h", "ejsan"], ["hej", trailing], "hejsan"), + ?TEST(["h", "e", "jsan"], ["hej", trailing], "hejsan"), + ?TEST(["xh", "e", "san"], ["hej", trailing], nomatch), + ?TEST([<<"xh">>, <<"ejsan">>], ["hej", trailing], "hejsan"), + ?TEST(["xh", <<"ejsan">>], ["hej", trailing], "hejsan"), + ?TEST(["xh", <<"e">>, "jsan"], ["hej", trailing], "hejsan"), + ?TEST(["xh", "e", <<"jsan">>], ["hej", trailing], "hejsan"), + ?TEST(["xh", "er", <<"ljsane">>, "rlang"], ["erl", trailing], "erlang"), + ?TEST("aΩΩb", ["Ω", trailing], "Ωb"), + ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], trailing], [$e,778]++"äöoo"), + ?TEST([<<"aeae">>,778,"äö"], ["e", trailing], "eae"++[778,$ä,$ö]), + + ok. + +lexemes(_) -> + Mod = fun(Res) -> + [unicode:characters_to_nfc_list(io_lib:format("~ts", [Str]))|| Str <- Res] + end, + Res = ["Hej", "san", "Hopp", "san"], + ?TEST("", [" ,."], {Mod, []}), + ?TEST("Hej san", [""], {Mod, ["Hej san"]}), + ?TEST(" ,., ", [" ,."], {Mod, []}), + ?TEST( "Hej san Hopp san", [" ,."], {Mod, Res}), + ?TEST(" Hej san Hopp san ", [" ,."], {Mod, Res}), + ?TEST(" Hej san, .Hopp san ", [" ,."], {Mod, Res}), + + ?TEST([" Hej san",", .Hopp san "], [" ,."], {Mod, Res}), + ?TEST([" Hej sa","n, .Hopp san "], [" ,."], {Mod, Res}), + ?TEST([" Hej san,"," .Hopp san "], [" ,."], {Mod, Res}), + + ?TEST([" Hej san",[", .Hopp san "]], [" ,."], {Mod, Res}), + ?TEST([" Hej sa",["n, .Hopp san "]], [" ,."], {Mod, Res}), + ?TEST([" Hej san,",[" .Hopp san "]], [" ,."], {Mod, Res}), + + ?TEST([" H",<<"ej san, .Hopp "/utf8>>, "san"], [" ,."], {Mod, Res}), + ?TEST([" Hej san",<<", .Hopp "/utf8>>, "san"], [" ,."], {Mod, Res}), + ?TEST([" Hej sa",<<"n, .Hopp"/utf8>>, " san"], [" ,."], {Mod, Res}), + ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [" ,."], {Mod, Res}), + ?TEST([" Hej san",[<<", .Hopp san "/utf8>>]], [" ,."], {Mod, Res}), + ?TEST([" Hej sa",[<<"n, .Hopp san "/utf8>>]], [" ,."], {Mod, Res}), + ?TEST([" Hej san,",[<<" .Hopp san "/utf8>>], <<" ">>], [" ,."], {Mod, Res}), + + ?TEST(" Hej\r\nsan\nnl", ["\r\n\s"], {Mod, ["Hej\r\nsan", "nl"]}), + + ?TEST(["b1ec1e",778,"äöo21"], ["eo"], {Mod, ["b1",[$c,$1,$e,778,$ä,$ö],"21"]}), + ?TEST([<<"b1ec1e">>,778,"äöo21"], ["eo"], {Mod, ["b1",[$c,$1,$e,778,$ä,$ö],"21"]}), + %% Grapheme (split) tests + Str10 = [[[<<"÷"/utf8>>,1101],<<"ë"/utf8>>|<<"\"">>]], + ?TEST(Str10, [[1076]], {Mod, [unicode:characters_to_nfc_list(Str10)]}), + ?TEST("a1Ωb1Ωc1", ["Ω"], {Mod, ["a1","b1","c1"]}), + ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]]], {Mod, ["aa","äöoo"]}), + ?TEST([<<"aae">>,778,"äöo21"], [[[$e,778],$o]], {Mod, ["aa","äö","21"]}), + ?TEST([<<"aae">>,778,"öeeåäö"], ["e"], {Mod, [[$a, $a, $e,778,$ö],"åäö"]}), + ok. + +nth_lexeme(_) -> + {'EXIT', _} = (catch string:nth_lexeme("test test", 0, [])), + {'EXIT', _} = (catch string:nth_lexeme(<<"test test">>, 0, [])), + ?TEST( "", [1, " ,."], []), + ?TEST( "Hej san", [1, ""], "Hej san"), + ?TEST( " ,., ", [1, " ,."], []), + ?TEST( " ,., ", [3, " ,."], []), + ?TEST("Hej san Hopp san", [1, " ,."], "Hej"), + ?TEST("...Hej san Hopp san", [1, " ,."], "Hej"), + ?TEST("Hej san Hopp san", [3, " ,."], "Hopp"), + ?TEST(" Hej san Hopp san ", [3, " ,."], "Hopp"), + ?TEST(" Hej san, .Hopp san ", [3, " ,."], "Hopp"), + ?TEST("ab cd", [3, " "], ""), + + ?TEST([" Hej san",", .Hopp san "], [3, " ,."], "Hopp"), + ?TEST([" Hej sa","n, .Hopp san "], [3, " ,."], "Hopp"), + ?TEST([" Hej san,"," .Hopp san "], [3, " ,."], "Hopp"), + + ?TEST([" Hej san",[", .Hopp san "]], [3," ,."], "Hopp"), + ?TEST([" Hej sa",["n, .Hopp san "]], [3, " ,."], "Hopp"), + ?TEST([" Hej san,",[" .Hopp san "]], [3, " ,."], "Hopp"), + + ?TEST([" Hej san",<<", .Hopp "/utf8>>, "san"], [3, " ,."], "Hopp"), + ?TEST([" Hej sa",<<"n, .Hopp"/utf8>>, " san"], [3, " ,."], "Hopp"), + ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [3, " ,."], "Hopp"), + ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [4, " ,."], "san"), + ?TEST([" Hej san",[<<", .Hopp san "/utf8>>]], [3, " ,."], "Hopp"), + ?TEST([" Hej sa",[<<"n, .Hopp san "/utf8>>]], [3, " ,."], "Hopp"), + ?TEST([" Hej san,",[<<" .Hopp san "/utf8>>], <<" ">>], [3, " ,."], "Hopp"), + + ?TEST(["b1ec1e",778,"äöo21"], [3,"eo"], "21"), + ?TEST([<<"b1ec1e">>,778,"äöo21"], [3, "eo"], "21"), + %% Grapheme (split) tests + ?TEST("a1Ωb1Ωc1", [1, "Ω"], "a1"), + ?TEST([<<"aae">>,778,"äöoo"], [2,[[$e,778]]], "äöoo"), + ?TEST([<<"aae">>,778,"äöo21"], [2,[[$e,778],$o]], "äö"), + ?TEST([<<"aae">>,778,"öeeåäö"], [2,"e"], "åäö"), + ok. + + +meas(Config) -> + case ct:get_timetrap_info() of + {_,{_,Scale}} when Scale > 1 -> + {skip,{will_not_run_in_debug,Scale}}; + _ -> % No scaling + DataDir = proplists:get_value(data_dir, Config), + TestDir = filename:dirname(string:trim(DataDir, trailing, "/")), + do_measure(TestDir) + end. + +do_measure(TestDir) -> + File = filename:join(TestDir, ?MODULE_STRING ++ ".erl"), + io:format("File ~s ",[File]), + {ok, Bin} = file:read_file(File), + io:format("~p~n",[byte_size(Bin)]), + Do = fun(Name, Func, Mode) -> + {N, Mean, Stddev, _} = time_func(Func, Mode, Bin), + io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n", + [Name, Mode, Mean/1000, Stddev/1000, N]) + end, + io:format("----------------------~n"), + Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list), + Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end}, + [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]], + ok. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +%% internal functions + +test(Line, Func, Str, Args, Res, Norm) -> + %%io:format("~p: ~p ~w ~w~n",[Line, Func, Str, Args]), + test_1(Line, Func, Str, [Str|norm(none,Args)], Res), + %%io:format("~p: ~p bin ",[Line, Func]), + test_1({Line,list}, Func, Str, + [unicode:characters_to_list(Str)|norm(none,Args)], Res), + Norm andalso + test_1({Line,clist}, Func, Str, + [unicode:characters_to_nfc_list(Str)|norm(nfc,Args)], Res), + Norm andalso + test_1({Line,dlist}, Func, Str, + [unicode:characters_to_nfd_list(Str)|norm(nfd,Args)], Res), + test_1({Line,bin}, Func, Str, + [unicode:characters_to_binary(Str)|norm(none, Args)], Res), + Norm andalso + test_1({Line,cbin}, Func, Str, + [unicode:characters_to_nfc_binary(Str)|norm(nfc,Args)], Res), + Norm andalso + test_1({Line,dbin}, Func, Str, + [unicode:characters_to_nfd_binary(Str)|norm(nfd,Args)], Res), + %%io:format("~n",[]), + ok. + +test_1(Line, Func, Str, Args, Exp) -> + try + Res = apply(string, Func, Args), + check_types(Line, Func, Args, Res), + case res(Res, Exp) of + true -> ok; + {Res1,Exp1} when is_tuple(Exp1) -> + io:format("~p~n",[Args]), + io:format("~p:~p: ~ts~w =>~n :~w:~w~n", + [Func,Line, Str,Str,Res1,Exp1]), + exit({error, Func}); + {Res1,Exp1} -> + io:format("~p:~p: ~ts~w =>~n :~ts~w:~ts~w~n", + [Func,Line, Str,Str, Res1,Res1, Exp1,Exp1]), + exit({error, Func}) + end + catch + error:Exp -> + ok; + error:Reason -> + io:format("~p:~p: Crash ~p ~p~n", + [?MODULE,Line, Reason, erlang:get_stacktrace()]), + exit({error, Func}) + end. + +norm(Type, Args) -> + Norm = case Type of + nfc -> fun unicode:characters_to_nfc_list/1; + nfd -> fun unicode:characters_to_nfd_list/1; + none -> fun(Str) -> Str end + end, + lists:map(fun({norm,Str}) -> Norm(Str); + (Other) -> Other + end, Args). + +res(Str, Str) -> true; +res(Str, Exp) when is_list(Str), is_list(Exp) -> + A = unicode:characters_to_nfc_list(Str), + A==Exp orelse {A,Exp}; +res(Str, Exp) when is_binary(Str), is_list(Exp) -> + A = unicode:characters_to_nfc_list(Str), + A==Exp orelse {A,Exp}; +res(What, {Fun, Exp}) when is_function(Fun) -> + Fun(What) == Exp orelse {Fun(What), Exp}; +res({S1,S2}=S, {Exp1,Exp2}=E) -> %% For take + case {res(S1,Exp1), res(S2,Exp2)} of + {true, true} -> true; + _ -> {S, E} + end; +res(Int, Exp) -> + Int == Exp orelse {Int, Exp}. + + +check_types(_Line, _Func, _Str, Res) + when is_integer(Res); is_boolean(Res); Res =:= nomatch -> + %% length or equal + ok; +check_types(Line, Func, [S1,S2], Res) + when Func =:= concat -> + case check_types_1(type(S1),type(S2)) of + ok -> + case check_types_1(type(S1),type(Res)) of + ok -> ok; + {T1,T2} -> + io:format("Failed: ~p ~p ~p ~p~n",[Line, Func, T1, T2]), + io:format(" ~p ~p => ~p~n", [S1, S2, Res]), + error + end; + _ -> ok + end; +check_types(Line, Func, [Str|_], Res) -> + AddList = fun(mixed) -> mixed; + ({list,{list,_}}) -> {list, deep}; + (R) -> + case lists:member(Func, [lexemes, tokens, split]) of + true -> {list, R}; + false -> R + end + end, + try needs_check(Func) andalso (ok = check_types_1(AddList(type(Str)), type(Res))) of + ok -> ok; + false -> ok + catch _:{badmatch, {T1,T2}} -> + io:format("Failed: ~p ~p: ~p ~p~n",[Line, Func, T1, T2]), + io:format(" ~p => ~p~n", [Str, Res]), + error; + _:Reason -> + io:format("Crash: ~p in~n ~p~n",[Reason, erlang:get_stacktrace()]), + io:format("Failed: ~p ~p: ~p => ~p~n", [Line, Func, Str, Res]), + exit({Reason, erlang:get_stacktrace()}) + end. + +check_types_1(T, T) -> + ok; +check_types_1(Str, Res) + when is_binary(Str), is_binary(Res) -> + ok; +check_types_1({list, _},{list, undefined}) -> + ok; +check_types_1({list, _},{list, codepoints}) -> + ok; +check_types_1({list, _},{list, {list, codepoints}}) -> + ok; +check_types_1({list, {list, _}},{list, {list, codepoints}}) -> + ok; +check_types_1(mixed,_) -> + ok; +check_types_1({list, binary}, binary) -> + ok; +check_types_1({list, binary}, {other, _, _}) -> %% take + ok; +check_types_1({list, deep}, _) -> + ok; +check_types_1({list, {list, deep}}, _) -> + ok; +check_types_1(T1,T2) -> + {T1,T2}. + +type(Bin) when is_binary(Bin) -> + binary; +type([]) -> + {list, undefined}; +type(List) when is_list(List) -> + Deep = fun(L) when is_list(L) -> + lists:any(fun(C) -> is_list(C) orelse is_binary(C) end, L); + (_) -> false + end, + case all(fun(C) -> not is_binary(C) end, List) of + true -> + case all(fun(C) -> is_integer(C) end, List) of + true -> {list, codepoints}; + false -> + case [deep || L <- List, Deep(L)] of + [] -> {list, {list, codepoints}}; + _ -> {list, deep} + end + end; + false -> + case all(fun(C) -> is_binary(C) end, List) of + true -> {list, binary}; + false -> mixed + end + end; +type({R1,R2}) -> + case {type(R1),type(R2)} of + {T,T} -> T; + {{list,undefined}, {list,codepoints}} -> {list,codepoints}; + {{list,codepoints}, {list,undefined}} -> {list,codepoints}; + {T1,T2} -> {other, T1,T2} + end; +type(Other) -> + {other, Other}. + +all(_Check, []) -> + true; +all(Check, [H|T]) -> + Check(H) andalso all(Check,T); +all(Check, Bin) when is_binary(Bin) -> + Check(Bin). + +needs_check(reverse) -> false; +needs_check(pad) -> false; +needs_check(replace) -> false; +needs_check(_) -> true. + +%%%% Timer stuff + +time_func(Fun, Mode, Bin) -> + timer:sleep(100), %% Let emulator catch up and clean things before test runs + Self = self(), + Pid = spawn_link(fun() -> + Str = mode(Mode, Bin), + Self ! {self(),time_func(0,0,0, Fun, Str, undefined)} + end), + receive {Pid,Msg} -> Msg end. + +time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 -> + {Time, Res} = timer:tc(fun() -> Fun(Str) end), + time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res); +time_func(N,Sum,SumSq, _, _, Res) -> + Mean = round(Sum / N), + Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))), + {N, Mean, Stdev, Res}. + +mode(binary, Bin) -> Bin; +mode(list, Bin) -> unicode:characters_to_list(Bin). + %% -%% Test cases starts here. +%% Old string lists Test cases starts here. %% len(Config) when is_list(Config) -> @@ -80,16 +932,14 @@ len(Config) when is_list(Config) -> {'EXIT',_} = (catch string:len({})), ok. -equal(Config) when is_list(Config) -> +old_equal(Config) when is_list(Config) -> true = string:equal("", ""), false = string:equal("", " "), true = string:equal("laban", "laban"), false = string:equal("skvimp", "skvump"), - %% invalid arg type - true = string:equal(2, 2), % not good, should crash ok. -concat(Config) when is_list(Config) -> +old_concat(Config) when is_list(Config) -> "erlang rules" = string:concat("erlang ", "rules"), "" = string:concat("", ""), "x" = string:concat("x", ""), @@ -130,6 +980,7 @@ str_rstr(Config) when is_list(Config) -> 3 = string:rstr("xxxx", "xx"), 3 = string:str("xy z yx", " z"), 3 = string:rstr("xy z yx", " z"), + 3 = string:str("aaab", "ab"), %% invalid arg type {'EXIT',_} = (catch string:str(hello, "he")), %% invalid arg type @@ -184,7 +1035,7 @@ substr(Config) when is_list(Config) -> {'EXIT',_} = (catch string:substr("1234", "1")), ok. -tokens(Config) when is_list(Config) -> +old_tokens(Config) when is_list(Config) -> [] = string:tokens("",""), [] = string:tokens("abc","abc"), ["abc"] = string:tokens("abc", ""), @@ -221,7 +1072,7 @@ replace_sep(C, Seps, New) -> chars(Config) when is_list(Config) -> [] = string:chars($., 0), [] = string:chars($., 0, []), - 10 = length(string:chars(32, 10, [])), + 10 = erlang:length(string:chars(32, 10, [])), "aaargh" = string:chars($a, 3, "rgh"), %% invalid arg type {'EXIT',_} = (catch string:chars($x, [])), @@ -231,7 +1082,7 @@ copies(Config) when is_list(Config) -> "" = string:copies("", 10), "" = string:copies(".", 0), "." = string:copies(".", 1), - 30 = length(string:copies("123", 10)), + 30 = erlang:length(string:copies("123", 10)), %% invalid arg type {'EXIT',_} = (catch string:copies("hej", -1)), {'EXIT',_} = (catch string:copies("hej", 2.0)), @@ -360,7 +1211,7 @@ to_integer(Config) when is_list(Config) -> ok. test_to_integer(Str) -> - io:format("Checking ~p~n", [Str]), + %% io:format("Checking ~p~n", [Str]), case string:to_integer(Str) of {error,_Reason} = Bad -> {'EXIT',_} = (catch list_to_integer(Str)), @@ -403,7 +1254,7 @@ to_float(Config) when is_list(Config) -> ok. test_to_float(Str) -> - io:format("Checking ~p~n", [Str]), + %% io:format("Checking ~p~n", [Str]), case string:to_float(Str) of {error,_Reason} = Bad -> {'EXIT',_} = (catch list_to_float(Str)), @@ -419,7 +1270,7 @@ to_upper_to_lower(Config) when is_list(Config) -> All = lists:seq(0, 255), UC = string:to_upper(All), - 256 = length(UC), + 256 = erlang:length(UC), all_upper_latin1(UC, 0), LC = string:to_lower(All), @@ -450,7 +1301,7 @@ all_lower_latin1([C|T], C) when 0 =< C, C < $A; all_lower_latin1([H|T], C) when $A =< C, C =< $Z; 16#C0 =< C, C =< 16#F6; 16#C8 =< C, C =< 16#DE -> - io:format("~p\n", [{H,C}]), + % io:format("~p\n", [{H,C}]), H = C + 32, all_lower_latin1(T, C+1); all_lower_latin1([], 256) -> ok. -- cgit v1.2.3