New unicode aware string module that works with unicode:chardata()

Works with unicode:chardata() as input as was decided on OTP board meeting as response to EEP-35 a long time ago. Works on graphemes clusters as base, with a few exceptions, does not handle classic (nor nfd'ified) Hangul nor the extended grapheme clusters such as the prepend class. That would make handling binaries as input/output very slow. List input => list output, binary input => binary output and mixed input => mixed output for all find/split functions. So that results can be post-processed without the need to invoke unicode:characters_to_list|binary for intermediate data. pad functions return lists of unicode:chardata() for performance.
author: Dan Gudmundsson <[email protected]> 2017-04-03 12:19:21 +0200
committer: Dan Gudmundsson <[email protected]> 2017-04-24 12:16:56 +0200
commit: 2c72e662bad11a41839780f86680d4bb05367c78 (patch)
tree: 01e9ae9b32fdb953392e571a0773fb2cd059c498 /lib/stdlib/test/string_SUITE.erl
parent: 75fc94b8b462d7b7f6dd4b706bbe32cff77ee575 (diff)
download: otp-2c72e662bad11a41839780f86680d4bb05367c78.tar.gz
otp-2c72e662bad11a41839780f86680d4bb05367c78.tar.bz2
otp-2c72e662bad11a41839780f86680d4bb05367c78.zip
1 files changed, 872 insertions, 21 deletions
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 836f9e5142..a78ddf761b 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -29,25 +29,46 @@
 -export([init_per_testcase/2, end_per_testcase/2]).
 
 %% Test cases must be exported.
--export([len/1,equal/1,concat/1,chr_rchr/1,str_rstr/1]).
--export([span_cspan/1,substr/1,tokens/1,chars/1]).
+-export([is_empty/1, length/1, to_graphemes/1,
+         reverse/1, slice/1,
+         equal/1,
+         pad/1, trim/1, chomp/1, take/1,
+         uppercase/1, lowercase/1, titlecase/1, casefold/1,
+         prefix/1, split/1, replace/1, find/1,
+         lexemes/1, nth_lexeme/1, cd_gc/1, meas/1
+        ]).
+
+-export([len/1,old_equal/1,old_concat/1,chr_rchr/1,str_rstr/1]).
+-export([span_cspan/1,substr/1,old_tokens/1,chars/1]).
 -export([copies/1,words/1,strip/1,sub_word/1,left_right/1]).
 -export([sub_string/1,centre/1, join/1]).
 -export([to_integer/1,to_float/1]).
 -export([to_upper_to_lower/1]).
 
+%% Run tests when debugging them
+-export([debug/0]).
+
 suite() ->
     [{ct_hooks,[ts_install_cth]},
      {timetrap,{minutes,1}}].
 
-all() -> 
-    [len, equal, concat, chr_rchr, str_rstr, span_cspan,
-     substr, tokens, chars, copies, words, strip, sub_word,
-     left_right, sub_string, centre, join, to_integer,
-     to_float, to_upper_to_lower].
+all() ->
+    [{group, chardata}, {group, list_string}].
 
-groups() -> 
-    [].
+groups() ->
+    [{chardata,
+      [is_empty, length, to_graphemes,
+       equal, reverse, slice,
+       pad, trim, chomp, take,
+       lexemes, nth_lexeme,
+       uppercase, lowercase, titlecase, casefold,
+       prefix, find, split, replace, cd_gc,
+       meas]},
+     {list_string,
+      [len, old_equal, old_concat, chr_rchr, str_rstr, span_cspan,
+       substr, old_tokens, chars, copies, words, strip, sub_word,
+       left_right, sub_string, centre, join, to_integer,
+       to_float, to_upper_to_lower]}].
 
 init_per_suite(Config) ->
     Config.
@@ -68,8 +89,839 @@ init_per_testcase(_Case, Config) ->
 end_per_testcase(_Case, _Config) ->
     ok.
 
+debug() ->
+    Config = [{data_dir, ?MODULE_STRING++"_data"}],
+    [io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) ||
+        {_,Tests} <- groups(), Test <- Tests].
+
+-define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)).
+-define(TEST_EQ(B,C,D),
+        test(?LINE,?FUNCTION_NAME,B,C,D, true),
+        test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)).
+
+-define(TEST_NN(B,C,D),
+        test(?LINE,?FUNCTION_NAME,B,C,D, false),
+        test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C)],D, false)).
+
+
+is_empty(_) ->
+    ?TEST("", [], true),
+    ?TEST([""|<<>>], [], true),
+    ?TEST("a", [], false),
+    ?TEST([""|<<$a>>], [], false),
+    ?TEST(["",[<<>>]], [], true),
+    ok.
+
+length(_) ->
+    %% invalid arg type
+    {'EXIT',_} = (catch string:length({})),
+    {'EXIT',_} = (catch string:length(foo)),
+    %% Valid signs
+    ?TEST("", [], 0),
+    ?TEST([""|<<>>], [], 0),
+    L = tuple_size(list_to_tuple(atom_to_list(?MODULE))),
+    ?TEST(atom_to_list(?MODULE), [], L),
+    ?TEST("Hello", [], 5),
+    ?TEST("UC Ω ßð", [], 7),
+    ?TEST(["abc"|<<"abc">>], [], 6),
+    ?TEST(["abc",["def"]], [], 6),
+    ?TEST([<<97/utf8, 778/utf8, 98/utf8>>, [776,111,776]], [], 3), %% åäö in nfd
+    ok.
+
+equal(_) ->
+    %% invalid arg type
+    {'EXIT',_} = (catch string:equal(1, 2)),
+    {'EXIT',_} = (catch string:equal(1, 2, foo)),
+    {'EXIT',_} = (catch string:equal(1, 2, true, foo)),
+
+    ?TEST("", [<<"">>], true),
+    ?TEST("Hello", ["Hello"], true),
+    ?TEST("Hello", ["Hell"], false),
+    ?TEST("Hello", ["Hello!"], false),
+    ?TEST("Hello", [<<"Hello"/utf8>>], true),
+    ?TEST("Hello", [<<"Mello"/utf8>>], false),
+    ?TEST("Hello", [<<"Hello!"/utf8>>], false),
+    ?TEST(["Hello",[" deep"]], ["Hello deep"], true),
+    ?TEST(["Hello",[<<" deep"/utf8>>]], ["Hello deep"], true),
+    ?TEST("Hello deep", [["Hello", [" deep"]]], true),
+    ?TEST("Hello deep", [["Hello", [" d!eep"]]], false),
+    ?TEST("Hello deep", [["Hello", [<<" deep"/utf8>>]]], true),
+    false = string:equal("Åäö", [<<97/utf8, 778/utf8, 98/utf8>>, [776,111,776]]), %% nfc vs nfd
+
+    %% case_insensitive_equal()
+    ?TEST("", ["", true], true),
+    ?TEST("a", ["b", true], false),
+    ?TEST("", [<<>>, true], true),
+    ?TEST("", [[<<>>,[]], true], true),
+    ?TEST("", [[<<>>,[$a]], true], false),
+    ?TEST("123", ["123", true], true),
+    ?TEST("abc", ["abc", true], true),
+    ?TEST([[],<<>>,"ABC"|<<>>], [["abc",[]], true], true),
+    ?TEST("ABCa", ["abcå", true], false),
+    ?TEST("åäö", [{norm,"åäö"}, true], true),
+    ?TEST("ÅÄÖ", [{norm,"åäö"}, true], true),
+    ?TEST("MICHAŁ", ["michał", true], true),
+    ?TEST(["Mic",<<"HAŁ"/utf8>>], ["michał", true], true),
+    ?TEST("ß SHARP S", ["ss sharp s", true], true),
+    ?TEST("ẞ SHARP S", [[<<$ß/utf8, $\s>>,"SHARP S"], true], true),
+    ?TEST("ẞ SHARP ß", ["ss sharp s", true], false),
+    ?TEST(<<"İ I WITH DOT ABOVE"/utf8>>, ["i̇ i with dot above", true], true),
+    %% These should be equivalent with the above
+    true = string:equal(string:casefold(["Mic",<<"HAŁ"/utf8>>]), string:casefold("michał")),
+    true = string:equal(string:casefold("ẞ SHARP S"), string:casefold([<<$ß/utf8, $\s>>,"SHARP S"])),
+    false = string:equal(string:casefold("ẞ SHARP ß"), string:casefold("ss sharp s")),
+
+    %% Normalization
+    ?TEST_NN("", ["", true, none], true),
+    ?TEST_NN("a", ["b", true, nfc], false),
+    ?TEST_NN("a", ["b", true, nfd], false),
+    ?TEST_NN("a", ["b", true, nfkc], false),
+    ?TEST_NN("a", ["b", true, nfkd], false),
+
+    ?TEST_NN("a", ["A", false, nfc], false),
+    ?TEST_NN("a", ["A", false, nfd], false),
+    ?TEST_NN([<<>>,"a"|<<>>], ["A", true, nfkc], true),
+    ?TEST_NN(<<"a">>, ["A", true, nfkd], true),
+
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, none], false),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfc], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfd], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfkc], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abä", false, nfkd], true),
+
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, none], false),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", false, nfc], false),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfc], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfd], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfkc], true),
+    ?TEST_NN([$a, <<$b>>, [97,776]], ["abÄ", true, nfkd], true),
+
+    ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abﾎﾝﾀﾞ", true, none], false),
+    ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abﾎﾝﾀﾞ", true, nfc], false),
+    ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abﾎﾝﾀﾞ", true, nfd], false),
+    ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abﾎﾝﾀﾞ", true, nfkc], true),
+    ?TEST_NN([$a, <<$b>>, "ホンダ"], ["abﾎﾝﾀﾞ", true, nfkd], true),
+
+    ?TEST_NN([$a, <<$b>>, "32"], ["ab３２", true, none], false),
+    ?TEST_NN([$a, <<$b>>, "32"], ["ab３２", true, nfc], false),
+    ?TEST_NN([$a, <<$b>>, "32"], ["ab３２", true, nfd], false),
+    ?TEST_NN([$a, <<$b>>, "32"], ["ab３２", true, nfkc], true),
+    ?TEST_NN([$a, <<$b>>, "32"], ["ab３２", true, nfkd], true),
+
+    %% Coverage.
+    ?TEST("", [<<"">>, false, nfc], true),
+    ?TEST("", [<<"">>, true, nfc], true),
+
+    ok.
+
+to_graphemes(_) ->
+    %% More tests are in unicode_util_SUITE.erl
+    {'EXIT', _} = (catch unicode:characters_to_nfd_binary(["asdåäö", an_atom])),
+    String = ["abc..åäö", $e, 788, <<"Ωµe`è"/utf8>>, "œŒþæÆħ§ß"],
+    NFD = unicode:characters_to_nfd_list(String),
+    [] = string:to_graphemes([]),
+    [] = string:to_graphemes(<<>>),
+    GCs = string:to_graphemes(String),
+    true = erlang:length(GCs) =:= string:length(String),
+    true = erlang:length(GCs) =:= erlang:length(string:to_graphemes(NFD)),
+    true = erlang:length(GCs) =:=
+        erlang:length(string:to_graphemes(unicode:characters_to_nfc_list(String))),
+    ok.
+
+reverse(_) ->
+    {'EXIT',_} = (catch string:reverse(2)),
+    Str1 = "Hello ",
+    Str2 = "Ω ßð",
+    Str3 = "åäö",
+    ?TEST("", [], ""),
+    ?TEST(Str1, [], lists:reverse(Str1)),
+    ?TEST(Str2, [], lists:reverse(Str2)),
+    ?TEST(Str3, [], lists:reverse(Str3)),
+    true = string:reverse(Str3) =:= lists:reverse(string:to_graphemes(Str3)),
+    ok.
+
+slice(_) ->
+    {'EXIT',_} = (catch string:slice(2, 2, 2)),
+    {'EXIT',_} = (catch string:slice("asd", foo, 2)),
+    {'EXIT',_} = (catch string:slice("asd", 2, -1)),
+    ?TEST("", [3], ""),
+    ?TEST("aåä", [1, 0], ""),
+    ?TEST("aåä", [3], ""),
+    ?TEST("aåäöbcd", [3], "öbcd"),
+    ?TEST([<<"aå"/utf8>>,"äöbcd"], [3], "öbcd"),
+    ?TEST([<<"aåä"/utf8>>,"öbcd"], [3], "öbcd"),
+    ?TEST([<<"aåä"/utf8>>,"öbcd"], [3, infinity], "öbcd"),
+
+    ?TEST("", [3, 2], ""),
+    ?TEST("aåä", [3, 2], ""),
+    ?TEST("aåäöbcd", [3,2], "öb"),
+    ?TEST([<<"aå"/utf8>>,"äöbcd"], [3,3], "öbc"),
+    ?TEST([<<"aåä"/utf8>>,"öbcd"], [3,10], "öbcd"),
+
+    ok.
+
+pad(_) ->
+    Str = "Hallå",
+    ?TEST(Str, [7], "Hallå  "),
+    ?TEST(Str, [7, leading], "  Hallå"),
+    ?TEST(Str, [4, both, $.], "Hallå"),
+    ?TEST(Str, [10, both, $.], "..Hallå..."),
+    ?TEST(Str, [10, leading, $.], ".....Hallå"),
+    ?TEST(Str, [10, trailing, $.], "Hallå....."),
+    ?TEST(Str++["f"], [10, trailing, $.], "Hallåf...."),
+    ?TEST(Str++[" flåwer"], [10, trailing, $.], "Hallå flåwer"),
+    ok.
+
+trim(_) ->
+    Str = "\t\s..Ha\s.llå..\t\n\r",
+    ?TEST("", [], ""),
+    ?TEST(Str, [both, "x"], Str),
+    ?TEST(Str, [leading], "..Ha\s.llå..\t\n\r"),
+    ?TEST(Str, [trailing], "\t\s..Ha\s.llå.."),
+    ?TEST(Str, [], "..Ha .llå.."),
+    ?TEST(".. ", [both, ""], ".. "),
+    ?TEST([<<".. ">>], [both, ". "], ""),
+    ?TEST(".. h.ej ..", [leading, ". "], "h.ej .."),
+    ?TEST(".. h.ej ..", [trailing, ". "], ".. h.ej"),
+    ?TEST(".. h.ej ..", [both, ". "], "h.ej"),
+    ?TEST(["..", <<"h.ej">>, ".."], [both, ". "], "h.ej"),
+    ?TEST([[], "..", " h.ej ", <<"..">>], [both, ". "], "h.ej"),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [both, ". "], "h.ej"),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [trailing, ". "], ".. h.ej"),
+    ?TEST([<<"..  h.ej .">>, <<"..">>], [both, ". "], "h.ej"),
+    ?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"),
+    ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"),
+    %% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+    ?TEST("aaåaa", [both, "a"], "å"),
+    ?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"),
+    ?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"),
+    ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [both, [[$e,778]]], "åäö"),
+    ?TEST([[<<"!v">>|<<204,128,$v,204,129>>]],[trailing, [[$v,769]]], [$!,$v,768]),
+    ?TEST([[[<<"v">>|<<204,129,118,204,128,118>>],769,118,769]], [trailing, [[118,769]]], [$v,769,$v,768]),
+    ?TEST([<<"vv">>|<<204,128,118,204,128>>], [trailing, [[118,768]]], "v"),
+    ok.
+
+chomp(_) ->
+    Str = "åäö\na\r\nsd\n",
+    Res = "åäö\na\r\nsd",
+    ?TEST("", [], ""),
+    ?TEST("\n", [], ""),
+    ?TEST("str \t", [], "str \t"),
+    ?TEST("str \t\n\r", [], "str \t\n\r"),
+    ?TEST(Str, [], Res),
+    ?TEST([Str,$\n], [], Res),
+    ?TEST([Str|"\n"], [], Res),
+    ?TEST([Str|<<"\n">>], [], Res),
+    ?TEST([Str,$\r|<<"\n">>], [], Res),
+    ?TEST([Str, <<$\r>>|"\n"], [], Res),
+    ?TEST([<<$a,$\r>>,"\na\n"], [], "a\r\na"),
+    ok.
+
+take(_) ->
+    Str = "\t\s..Ha\s.llå..\t\n\r",
+    WS = "\t\s\n\r",
+    Chars = lists:seq($a,$z)++lists:seq($A,$Z),
+    %% complement=false, dir=leading
+    ?TEST("", ["abc"], {"",""}),
+    ?TEST(Str, ["x"], {[], Str}),
+    ?TEST(Str, [WS], {"\t\s","..Ha\s.llå..\t\n\r"}),
+    ?TEST(".. ", ["", false], {"", ".. "}),
+    ?TEST([<<".. ">>], [". ", false, leading], {".. ", ""}),
+    ?TEST(".. h.ej ..", [". ", false, leading], {".. ", "h.ej .."}),
+    ?TEST(["..", <<"h.ej">>, ".."], [". ", false, leading], {"..", "h.ej.."}),
+    ?TEST([[], "..", " h.ej ", <<"..">>], [". ", false, leading], {".. ","h.ej .."}),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [". ", false, leading], {".. ", "h.ej .."}),
+    ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [". ", false, leading], {"..", "h.ejsan.."}),
+    ?TEST([[<<101,204,138,33>>]], [[[$e,778]]], {[$e,778], "!"}),
+    %% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+    ?TEST("aaåaa", ["a", false, leading], {"aa", "åaa"}),
+    ?TEST(["aaa",778,"äöoo"], ["ao", false, leading], {"aa", "åäöoo"}),
+    ?TEST([<<"aaa">>,778,"äöoo"], ["ao",false,leading], {"aa", "åäöoo"}),
+    ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], false, leading], {[$e,778],"åäöe"++[778]}),
+
+    %% complement=true, dir=leading
+    ?TEST("", ["abc", true], {"",""}),
+    ?TEST(Str, ["x", true], {Str, []}),
+    ?TEST(Str, [Chars, true], {"\t\s..","Ha\s.llå..\t\n\r"}),
+    ?TEST(".. ", ["",true], {".. ", ""}),
+    ?TEST([<<".. ">>], [Chars, true, leading], {".. ", ""}),
+    ?TEST(".. h.ej ..", [Chars, true, leading], {".. ", "h.ej .."}),
+    ?TEST(["..", <<"h.ej">>, ".."], [Chars, true, leading], {"..", "h.ej.."}),
+    ?TEST([[], "..", " h.ej ", <<"..">>], [Chars, true, leading], {".. ","h.ej .."}),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}),
+    ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}),
+    %% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+    ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}),
+    ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}),
+
+    %% complement=false, dir=trailing
+    ?TEST(Str, ["", false, trailing], {Str, []}),
+    ?TEST(Str, ["x", false, trailing], {Str, []}),
+    ?TEST(Str, [WS, false,trailing], {"\t\s..Ha\s.llå..", "\t\n\r"}),
+    ?TEST(".. h.ej ..", [". ", false, trailing], {".. h.ej", " .."}),
+    ?TEST(["..", <<"h.ej">>, ".."], [". ", false, trailing], {"..h.ej", ".."}),
+    ?TEST([[], "..", " h.ej ", <<"..">>], [". ", false, trailing], {".. h.ej", " .."}),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [". ", false, trailing], {".. h.ej", " .."}),
+    ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [". ", false, trailing], {"..h.ejsan", ".."}),
+    ?TEST("aaåaa", ["a", false, trailing], {"aaå", "aa"}),
+    ?TEST([<<"KMÐ¨"/utf8>>], [[1064], false, trailing], {"KMÐ¨",[]}),
+    ?TEST([[<<"!\"">>|<<"\"">>]], ["\"", false, trailing], {"!", "\"\""}),
+    ?TEST([<<$v>>, 769], [[[$v,769]], false, trailing], {"", [$v,769]}),
+    ?TEST(["aaa",778,"äöoo"], ["ao", false, trailing], {"aaåäö", "oo"}),
+    ?TEST([<<"aaa">>,778,"äöoo"], ["ao", false, trailing], {"aaåäö", "oo"}),
+    ?TEST([<<"e">>,778,"åäöee", <<778/utf8>>], [[[$e,778]], false, trailing], {[$e,778|"åäöe"], [$e,778]}),
+
+    %% complement=true, dir=trailing
+    ?TEST("", ["abc", true, trailing], {"",""}),
+    ?TEST(Str, ["x", true, trailing], {[], Str}),
+    %?TEST(Str, [{norm,Chars}, true, trailing], {"\t\s..Ha\s.ll","å..\t\n\r"}),
+    ?TEST(".. ", ["", true, trailing], {"", ".. "}),
+    ?TEST([<<".. ">>], [Chars, true, trailing], {"", ".. "}),
+    ?TEST(".. h.ej ..", [Chars, true, trailing], {".. h.ej", " .."}),
+    ?TEST(["..", <<"h.ej">>, ".."], [Chars, true, trailing], {"..h.ej", ".."}),
+    ?TEST([[], "..", " h.ej ", <<"..">>], [Chars, true, trailing], {".. h.ej"," .."}),
+    ?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, trailing], {".. h.ej"," .."}),
+    ?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, trailing], {"..h.ejsan", ".."}),
+    ?TEST([[<<101,204,138,33>>]], [[[$e,778]], true, trailing], {[$e,778], "!"}),
+    ?TEST([<<"Fa">>], [[$F], true, trailing], {"F", "a"}),
+    ?TEST([[<<101,101,204,138>>,1045,778]], ["e", true, trailing], {"e", [101,778,1045,778]}),
+    ?TEST([[<<101,101,204,138>>,<<1045/utf8,778/utf8>>]], ["e", true, trailing], {"e", [101,778,1045,778]}),
+    ?TEST([[[118,769,118],<<204,129,118,204,129,120,204,128,118>>,768,120,768]],
+          [[[118,769]], true, trailing], {[118,769,118,769,118,769],[120,768,118,768,120,768]}),
+    ?TEST([[<<118,204,128,118>>|<<204,128,118,204,128,118,204,128,206,132,204,129,206,132,204,129>>]],
+          [[[118,768]], true, trailing], {[118,768,118,768,118,768,118,768], [900,769,900,769]}),
+    %% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+    ?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, trailing], {"aae"++[$e,778], "äöoo"}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,trailing], {"aa"++[$e,778], "äöoo"}),
+    ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, trailing], {[$e,778]++"åäöe"++[778], []}),
+    ?TEST([<<"e">>,778,"åäöe", <<778/utf8>>, $e, 779], [[[$e,778]], true, trailing],
+          {[$e,778]++"åäöe"++[778], [$e,779]}),
+
+    ok.
+
+
+uppercase(_) ->
+    ?TEST("", [], ""),
+    ?TEST("123", [], "123"),
+    ?TEST("abc", [], "ABC"),
+    ?TEST("ABC", [], "ABC"),
+    ?TEST("abcdefghiljklmnopqrstvxyzåäö",[], "ABCDEFGHILJKLMNOPQRSTVXYZÅÄÖ"),
+    ?TEST("åäö", [], "ÅÄÖ"),
+    ?TEST("ÅÄÖ", [], "ÅÄÖ"),
+    ?TEST("Michał", [], "MICHAŁ"),
+    ?TEST(["Mic",<<"hał"/utf8>>], [], "MICHAŁ"),
+    ?TEST("ǉǇ", [], "ǇǇ"),
+    ?TEST("Ǉǉ", [], "ǇǇ"),
+    ?TEST("ß sharp s", [], "SS SHARP S"),
+    ok.
+
+lowercase(_) ->
+    ?TEST("", [], ""),
+    ?TEST("123", [], "123"),
+    ?TEST("abc", [], "abc"),
+    ?TEST("ABC", [], "abc"),
+    ?TEST("åäö", [], "åäö"),
+    ?TEST("ÅÄÖ", [], "åäö"),
+    ?TEST("MICHAŁ", [], "michał"),
+    ?TEST(["Mic",<<"HAŁ"/utf8>>], [], "michał"),
+    ?TEST("ß SHARP S", [], "ß sharp s"),
+    ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"),
+    ok.
+
+titlecase(_) ->
+    ?TEST("", [], ""),
+    ?TEST("123", [], "123"),
+    %% Titlecase is the same as uppercase for most chars
+    [?TEST([C,$x], [], string:uppercase([C])++[$x]) ||
+        C <-"abcdefghiljklmnopqrstvxyzåäö"],
+    %% Example of a different mapping
+    ?TEST("ǉusad", [],"ǈusad"),
+    ?TEST("ǉǇ", [], "ǈǇ"),
+    ?TEST("Ǉǉ", [], "ǈǉ"),
+    ?TEST("ß sharp s", [], "Ss sharp s"),
+    ok.
+
+casefold(_) ->
+    ?TEST("", [], ""),
+    ?TEST("123", [], "123"),
+    ?TEST("abc", [], "abc"),
+    ?TEST("ABC", [], "abc"),
+    ?TEST("åäö", [], "åäö"),
+    ?TEST("ÅÄÖ", [], "åäö"),
+    ?TEST("MICHAŁ", [], "michał"),
+    ?TEST(["Mic",<<"HAŁ"/utf8>>], [], "michał"),
+    ?TEST("ß SHARP S", [], "ss sharp s"),
+    ?TEST("ẞ SHARP S", [], "ss sharp s"),
+    ?TEST("İ I WITH DOT ABOVE", [], "i̇ i with dot above"),
+    ok.
+
+prefix(_) ->
+    ?TEST("", ["a"], nomatch),
+    ?TEST("a", [""], "a"),
+    ?TEST("b", ["a"], nomatch),
+    ?TEST("a", ["a"], ""),
+    ?TEST("å", ["a"], nomatch),
+    ?TEST(["a",<<778/utf8>>], ["a"], nomatch),
+    ?TEST([<<"a"/utf8>>,778], ["a"], nomatch),
+    ?TEST("hejsan", [""], "hejsan"),
+    ?TEST("hejsan", ["hej"], "san"),
+    ?TEST("hejsan", ["hes"], nomatch),
+    ?TEST(["h", "ejsan"], ["hej"], "san"),
+    ?TEST(["h", "e", "jsan"], ["hej"], "san"),
+    ?TEST(["h", "e", "san"], ["hej"], nomatch),
+    ?TEST(["h", <<"ejsan">>], ["hej"], "san"),
+    ?TEST(["h", <<"e">>, "jsan"], ["hej"], "san"),
+    ?TEST(["h", "e", <<"jsan">>], ["hej"], "san"),
+    ok.
+
+split(_) ->
+    Mod = fun(Res) ->
+                  [lists:flatten(unicode:characters_to_nfc_list(io_lib:format("~ts", [Str])))
+                   || Str <- Res] end,
+    ?TEST("..", ["", leading], {Mod, [".."]}),
+    ?TEST("..", ["..", leading], {Mod, [[],[]]}),
+    ?TEST("abcd", ["..", leading], {Mod, ["abcd"]}),
+    ?TEST("ab..bc", ["..", leading], {Mod, ["ab","bc"]}),
+    ?TEST("ab..bc..cd", ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST("..ab", [".."], {Mod, [[],"ab"]}),
+    ?TEST("ab..", ["..", leading], {Mod, ["ab",[]]}),
+    ?TEST(["ab..bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab","..bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab",<<"..bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab.",".bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab.",<<".bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab..","bc..cd"], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab..",<<"bc..cd">>], ["..", leading], {Mod, ["ab","bc..cd"]}),
+    ?TEST(["ab.","bc..cd"], ["..", leading], {Mod, ["ab.bc","cd"]}),
+    ?TEST("ab...bc", ["..", leading], {Mod, ["ab",".bc"]}),
+
+    ?TEST("..", ["", trailing], {Mod, [".."]}),
+    ?TEST("..", ["..", trailing], {Mod, [[],[]]}),
+    ?TEST("abcd", ["..", trailing], {Mod, ["abcd"]}),
+    ?TEST("ab..bc", ["..", trailing], {Mod, ["ab","bc"]}),
+    ?TEST("ab..bc..cd", ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST("..ab", ["..", trailing], {Mod, [[],"ab"]}),
+    ?TEST("ab..", ["..", trailing], {Mod, ["ab",[]]}),
+    ?TEST(["ab..bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab","..bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab"|<<"a">>], ["a", trailing], {Mod, ["ab",[]]}),
+    ?TEST(["ab",<<"..bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST([<<"ab.">>,".bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab.",<<".bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab..","bc..cd"], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab..",<<"bc..cd">>], ["..", trailing], {Mod, ["ab..bc","cd"]}),
+    ?TEST(["ab.","bc..cd"], ["..", trailing], {Mod, ["ab.bc","cd"]}),
+    ?TEST("ab...bc", ["..", trailing], {Mod, ["ab.","bc"]}),
+
+    ?TEST("..", ["..", all], {Mod, [[],[]]}),
+    ?TEST("abcd", ["..", all], {Mod, ["abcd"]}),
+    ?TEST("a..b", ["..", all], {Mod, ["a","b"]}),
+    ?TEST("a..b..c", ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST("a..", ["..", all], {Mod, ["a",[]]}),
+    ?TEST(["a..b..c"], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a","..b..c"], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a",<<"..b..c">>], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a.",".b..c"], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a.",<<".b..c">>], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a..","b..c"], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a..",<<"b..c">>], ["..", all], {Mod, ["a","b","c"]}),
+    ?TEST(["a.","b..c"], ["..", all], {Mod, ["a.b","c"]}),
+    ?TEST("a...b", ["..", all], {Mod, ["a",".b"]}),
+
+    %% Grapheme (split) tests
+    ?TEST("aΩΩb", ["Ω", all], {Mod, ["a","","b"]}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], leading], {Mod, ["aa","äöoo"]}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], trailing], {Mod, ["aa","äöoo"]}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], all], {Mod, ["aa","äöoo"]}),
+    ?TEST([<<"aae">>,778,"öeeåäö"], ["e", leading], {Mod, [[$a, $a, $e,778,$ö],"eåäö"]}),
+    ?TEST([<<"aae">>,778,"öeeåäö"], ["e", trailing], {Mod, [[$a, $a, $e,778,$ö, $e],"åäö"]}),
+    ?TEST([<<"aae">>,778,"öeeåäö"], ["e", all], {Mod, [[$a, $a, $e,778,$ö],"", "åäö"]}),
+
+    ok.
+
+replace(_) ->
+    ?TEST(["a..b.", [".c"]], ["xxx", "::"], "a..b..c"),
+    ?TEST(["a..b.", [".c"]], ["..", "::"], "a::b..c"),
+    ?TEST([<<"a..b.">>, [".c"]], ["..", "::", trailing], "a..b::c"),
+    ?TEST(["a..b.", [".c"]], ["..", "::", all], "a::b::c"),
+    ok.
+
+cd_gc(_) ->
+    [] = string:next_codepoint(""),
+    [] = string:next_codepoint(<<>>),
+    [] = string:next_codepoint([<<>>]),
+    "abcd" = string:next_codepoint("abcd"),
+    [$e,778] = string:next_codepoint([$e,778]),
+    [$e|<<204,138>>] = string:next_codepoint(<<$e,778/utf8>>),
+    [778|_] = string:next_codepoint(tl(string:next_codepoint(<<$e,778/utf8>>))),
+
+    [] = string:next_grapheme(""),
+    [] = string:next_grapheme(<<>>),
+    [] = string:next_grapheme([<<>>]),
+    "abcd" = string:next_grapheme("abcd"),
+    [[$e,778]] = string:next_grapheme([$e,778]),
+    [[$e,778]] = string:next_grapheme(<<$e,778/utf8>>),
+
+    ok.
+
+
+find(_) ->
+    ?TEST(["h", "ejsan"], [""], "hejsan"),
+    ?TEST(["h", "ejsan"], [<<>>], "hejsan"),
+    ?TEST([], [""], ""),
+    ?TEST([], ["hej"], nomatch),
+    ?TEST(["h", "ejsan"], ["hej"], "hejsan"),
+    ?TEST(["h", "e", "jsan"], ["hej"], "hejsan"),
+    ?TEST(["xh", "e", "san"], ["hej"], nomatch),
+    ?TEST([<<"xh">>, <<"ejsan">>], ["hej"], "hejsan"),
+    ?TEST(["xh", <<"ejsan">>], ["hej"], "hejsan"),
+    ?TEST(["xh", <<"e">>, "jsan"], ["hej"], "hejsan"),
+    ?TEST(["xh", "e", <<"jsan">>], ["hej"], "hejsan"),
+    ?TEST(["xh", "er", <<"ljsane">>, "rlang"], ["erl", leading], "erljsanerlang"),
+    ?TEST("aΩΩb", ["Ω", leading], "ΩΩb"),
+    ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], leading], [$e,778]++"äöoo"),
+    ?TEST([<<"aae">>,778,"öeeåäö"], ["e", leading], "eeåäö"),
+
+    ?TEST(["h", "ejsan"], ["", trailing], "hejsan"),
+    ?TEST([], ["", trailing], ""),
+    ?TEST([], ["hej", trailing], nomatch),
+    ?TEST(["h", "ejsan"], ["hej", trailing], "hejsan"),
+    ?TEST(["h", "e", "jsan"], ["hej", trailing], "hejsan"),
+    ?TEST(["xh", "e", "san"], ["hej", trailing], nomatch),
+    ?TEST([<<"xh">>, <<"ejsan">>], ["hej", trailing], "hejsan"),
+    ?TEST(["xh", <<"ejsan">>], ["hej", trailing], "hejsan"),
+    ?TEST(["xh", <<"e">>, "jsan"], ["hej", trailing], "hejsan"),
+    ?TEST(["xh", "e", <<"jsan">>], ["hej", trailing], "hejsan"),
+    ?TEST(["xh", "er", <<"ljsane">>, "rlang"], ["erl", trailing], "erlang"),
+    ?TEST("aΩΩb", ["Ω", trailing], "Ωb"),
+    ?TEST([<<"aae">>,778,"äöoo"], [[$e,778], trailing], [$e,778]++"äöoo"),
+    ?TEST([<<"aeae">>,778,"äö"], ["e", trailing], "eae"++[778,$ä,$ö]),
+
+    ok.
+
+lexemes(_) ->
+    Mod = fun(Res) ->
+                  [unicode:characters_to_nfc_list(io_lib:format("~ts", [Str]))|| Str <- Res]
+          end,
+    Res = ["Hej", "san", "Hopp", "san"],
+    ?TEST("", [" ,."],  {Mod, []}),
+    ?TEST("Hej san", [""],  {Mod, ["Hej san"]}),
+    ?TEST("  ,., ", [" ,."],  {Mod, []}),
+    ?TEST( "Hej san Hopp san", [" ,."], {Mod, Res}),
+    ?TEST(" Hej san Hopp san ", [" ,."], {Mod, Res}),
+    ?TEST(" Hej san, .Hopp san ", [" ,."], {Mod, Res}),
+
+    ?TEST([" Hej san",", .Hopp san "], [" ,."], {Mod, Res}),
+    ?TEST([" Hej sa","n, .Hopp san "], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san,"," .Hopp san "], [" ,."], {Mod, Res}),
+
+    ?TEST([" Hej san",[", .Hopp san "]], [" ,."], {Mod, Res}),
+    ?TEST([" Hej sa",["n, .Hopp san "]], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san,",[" .Hopp san "]], [" ,."], {Mod, Res}),
+
+    ?TEST([" H",<<"ej san, .Hopp "/utf8>>, "san"], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san",<<", .Hopp "/utf8>>, "san"], [" ,."], {Mod, Res}),
+    ?TEST([" Hej sa",<<"n, .Hopp"/utf8>>, " san"], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san",[<<", .Hopp san "/utf8>>]], [" ,."], {Mod, Res}),
+    ?TEST([" Hej sa",[<<"n, .Hopp san "/utf8>>]], [" ,."], {Mod, Res}),
+    ?TEST([" Hej san,",[<<" .Hopp san "/utf8>>], <<"  ">>], [" ,."], {Mod, Res}),
+
+    ?TEST(" Hej\r\nsan\nnl", ["\r\n\s"], {Mod, ["Hej\r\nsan", "nl"]}),
+
+    ?TEST(["b1ec1e",778,"äöo21"], ["eo"], {Mod, ["b1",[$c,$1,$e,778,$ä,$ö],"21"]}),
+    ?TEST([<<"b1ec1e">>,778,"äöo21"], ["eo"], {Mod, ["b1",[$c,$1,$e,778,$ä,$ö],"21"]}),
+    %% Grapheme (split) tests
+    Str10 = [[[<<"Ã·"/utf8>>,1101],<<"Ã«"/utf8>>|<<"\"">>]],
+    ?TEST(Str10, [[1076]], {Mod, [unicode:characters_to_nfc_list(Str10)]}),
+    ?TEST("a1Ωb1Ωc1", ["Ω"], {Mod, ["a1","b1","c1"]}),
+    ?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]]], {Mod, ["aa","äöoo"]}),
+    ?TEST([<<"aae">>,778,"äöo21"], [[[$e,778],$o]], {Mod, ["aa","äö","21"]}),
+    ?TEST([<<"aae">>,778,"öeeåäö"], ["e"], {Mod, [[$a, $a, $e,778,$ö],"åäö"]}),
+    ok.
+
+nth_lexeme(_) ->
+    {'EXIT', _} = (catch string:nth_lexeme("test test", 0, [])),
+    {'EXIT', _} = (catch string:nth_lexeme(<<"test test">>, 0, [])),
+    ?TEST( "", [1, " ,."],  []),
+    ?TEST( "Hej san", [1, ""],  "Hej san"),
+    ?TEST( "  ,., ", [1, " ,."],  []),
+    ?TEST( "  ,., ", [3, " ,."],  []),
+    ?TEST("Hej san Hopp san", [1, " ,."], "Hej"),
+    ?TEST("...Hej san Hopp san", [1, " ,."], "Hej"),
+    ?TEST("Hej san Hopp san", [3, " ,."], "Hopp"),
+    ?TEST(" Hej san Hopp san ", [3, " ,."], "Hopp"),
+    ?TEST(" Hej san, .Hopp san ", [3, " ,."], "Hopp"),
+    ?TEST("ab cd", [3, " "], ""),
+
+    ?TEST([" Hej san",", .Hopp san "], [3, " ,."], "Hopp"),
+    ?TEST([" Hej sa","n, .Hopp san "], [3, " ,."], "Hopp"),
+    ?TEST([" Hej san,"," .Hopp san "], [3, " ,."], "Hopp"),
+
+    ?TEST([" Hej san",[", .Hopp san "]], [3," ,."], "Hopp"),
+    ?TEST([" Hej sa",["n, .Hopp san "]], [3, " ,."], "Hopp"),
+    ?TEST([" Hej san,",[" .Hopp san "]], [3, " ,."], "Hopp"),
+
+    ?TEST([" Hej san",<<", .Hopp "/utf8>>, "san"], [3, " ,."], "Hopp"),
+    ?TEST([" Hej sa",<<"n, .Hopp"/utf8>>, " san"], [3, " ,."], "Hopp"),
+    ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [3, " ,."], "Hopp"),
+    ?TEST([" Hej san,",<<" .Hopp s"/utf8>>, "an"], [4, " ,."], "san"),
+    ?TEST([" Hej san",[<<", .Hopp san "/utf8>>]], [3, " ,."], "Hopp"),
+    ?TEST([" Hej sa",[<<"n, .Hopp san "/utf8>>]], [3, " ,."], "Hopp"),
+    ?TEST([" Hej san,",[<<" .Hopp san "/utf8>>], <<"  ">>], [3, " ,."], "Hopp"),
+
+    ?TEST(["b1ec1e",778,"äöo21"], [3,"eo"], "21"),
+    ?TEST([<<"b1ec1e">>,778,"äöo21"], [3, "eo"], "21"),
+    %% Grapheme (split) tests
+    ?TEST("a1Ωb1Ωc1", [1, "Ω"], "a1"),
+    ?TEST([<<"aae">>,778,"äöoo"], [2,[[$e,778]]], "äöoo"),
+    ?TEST([<<"aae">>,778,"äöo21"], [2,[[$e,778],$o]], "äö"),
+    ?TEST([<<"aae">>,778,"öeeåäö"], [2,"e"], "åäö"),
+    ok.
+
+
+meas(Config) ->
+    case ct:get_timetrap_info() of
+        {_,{_,Scale}} when Scale > 1 ->
+            {skip,{will_not_run_in_debug,Scale}};
+        _ -> % No scaling
+            DataDir = proplists:get_value(data_dir, Config),
+            TestDir = filename:dirname(string:trim(DataDir, trailing, "/")),
+            do_measure(TestDir)
+    end.
+
+do_measure(TestDir) ->
+    File =  filename:join(TestDir, ?MODULE_STRING ++ ".erl"),
+    io:format("File ~s ",[File]),
+    {ok, Bin} = file:read_file(File),
+    io:format("~p~n",[byte_size(Bin)]),
+    Do = fun(Name, Func, Mode) ->
+                 {N, Mean, Stddev, _} = time_func(Func, Mode, Bin),
+                 io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n",
+                           [Name, Mode, Mean/1000, Stddev/1000, N])
+         end,
+    io:format("----------------------~n"),
+    Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
+    Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end},
+    [Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]],
+    ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%% internal functions
+
+test(Line, Func, Str, Args, Res, Norm) ->
+    %%io:format("~p: ~p ~w ~w~n",[Line, Func, Str, Args]),
+    test_1(Line, Func, Str, [Str|norm(none,Args)], Res),
+    %%io:format("~p: ~p bin ",[Line, Func]),
+    test_1({Line,list}, Func, Str,
+           [unicode:characters_to_list(Str)|norm(none,Args)], Res),
+    Norm andalso
+        test_1({Line,clist}, Func, Str,
+               [unicode:characters_to_nfc_list(Str)|norm(nfc,Args)], Res),
+    Norm andalso
+        test_1({Line,dlist}, Func, Str,
+               [unicode:characters_to_nfd_list(Str)|norm(nfd,Args)], Res),
+    test_1({Line,bin}, Func, Str,
+           [unicode:characters_to_binary(Str)|norm(none, Args)], Res),
+    Norm andalso
+        test_1({Line,cbin}, Func, Str,
+               [unicode:characters_to_nfc_binary(Str)|norm(nfc,Args)], Res),
+    Norm andalso
+        test_1({Line,dbin}, Func, Str,
+               [unicode:characters_to_nfd_binary(Str)|norm(nfd,Args)], Res),
+    %%io:format("~n",[]),
+    ok.
+
+test_1(Line, Func, Str, Args, Exp) ->
+    try
+        Res = apply(string, Func, Args),
+        check_types(Line, Func, Args, Res),
+        case res(Res, Exp) of
+            true -> ok;
+            {Res1,Exp1} when is_tuple(Exp1) ->
+                io:format("~p~n",[Args]),
+                io:format("~p:~p: ~ts~w =>~n  :~w:~w~n",
+                          [Func,Line, Str,Str,Res1,Exp1]),
+                exit({error, Func});
+            {Res1,Exp1} ->
+                io:format("~p:~p: ~ts~w =>~n  :~ts~w:~ts~w~n",
+                          [Func,Line, Str,Str, Res1,Res1, Exp1,Exp1]),
+                exit({error, Func})
+        end
+    catch
+        error:Exp ->
+            ok;
+        error:Reason ->
+            io:format("~p:~p: Crash ~p ~p~n",
+                      [?MODULE,Line, Reason, erlang:get_stacktrace()]),
+            exit({error, Func})
+    end.
+
+norm(Type, Args) ->
+    Norm = case Type of
+               nfc -> fun unicode:characters_to_nfc_list/1;
+               nfd -> fun unicode:characters_to_nfd_list/1;
+               none -> fun(Str) -> Str end
+           end,
+    lists:map(fun({norm,Str}) -> Norm(Str);
+                 (Other) -> Other
+              end, Args).
+
+res(Str, Str) -> true;
+res(Str, Exp) when is_list(Str), is_list(Exp) ->
+    A = unicode:characters_to_nfc_list(Str),
+    A==Exp orelse {A,Exp};
+res(Str, Exp) when is_binary(Str), is_list(Exp) ->
+    A = unicode:characters_to_nfc_list(Str),
+    A==Exp orelse {A,Exp};
+res(What, {Fun, Exp}) when is_function(Fun) ->
+    Fun(What) == Exp orelse {Fun(What), Exp};
+res({S1,S2}=S, {Exp1,Exp2}=E) -> %% For take
+    case {res(S1,Exp1), res(S2,Exp2)} of
+        {true, true} -> true;
+        _ -> {S, E}
+    end;
+res(Int, Exp) ->
+    Int == Exp orelse {Int, Exp}.
+
+
+check_types(_Line, _Func, _Str, Res)
+  when is_integer(Res); is_boolean(Res); Res =:= nomatch ->
+    %% length or equal
+    ok;
+check_types(Line, Func, [S1,S2], Res)
+  when Func =:= concat ->
+    case check_types_1(type(S1),type(S2)) of
+        ok ->
+            case check_types_1(type(S1),type(Res)) of
+                ok -> ok;
+                {T1,T2} ->
+                    io:format("Failed: ~p ~p ~p ~p~n",[Line, Func, T1, T2]),
+                    io:format("  ~p ~p  => ~p~n", [S1, S2, Res]),
+                    error
+            end;
+        _ -> ok
+    end;
+check_types(Line, Func, [Str|_], Res)  ->
+    AddList = fun(mixed) -> mixed;
+                 ({list,{list,_}}) -> {list, deep};
+                 (R) ->
+                      case lists:member(Func, [lexemes, tokens, split]) of
+                          true -> {list, R};
+                          false -> R
+                      end
+              end,
+    try needs_check(Func) andalso (ok = check_types_1(AddList(type(Str)), type(Res))) of
+        ok -> ok;
+        false -> ok
+    catch _:{badmatch, {T1,T2}} ->
+            io:format("Failed: ~p ~p: ~p ~p~n",[Line, Func, T1, T2]),
+            io:format("  ~p  => ~p~n", [Str, Res]),
+            error;
+          _:Reason ->
+            io:format("Crash: ~p in~n ~p~n",[Reason, erlang:get_stacktrace()]),
+            io:format("Failed: ~p ~p: ~p => ~p~n", [Line, Func, Str, Res]),
+            exit({Reason, erlang:get_stacktrace()})
+    end.
+
+check_types_1(T, T) ->
+    ok;
+check_types_1(Str, Res)
+  when is_binary(Str), is_binary(Res) ->
+    ok;
+check_types_1({list, _},{list, undefined}) ->
+    ok;
+check_types_1({list, _},{list, codepoints}) ->
+    ok;
+check_types_1({list, _},{list, {list, codepoints}}) ->
+    ok;
+check_types_1({list, {list, _}},{list, {list, codepoints}}) ->
+    ok;
+check_types_1(mixed,_) ->
+    ok;
+check_types_1({list, binary}, binary) ->
+    ok;
+check_types_1({list, binary}, {other, _, _}) -> %% take
+    ok;
+check_types_1({list, deep}, _) ->
+    ok;
+check_types_1({list, {list, deep}}, _) ->
+    ok;
+check_types_1(T1,T2) ->
+    {T1,T2}.
+
+type(Bin) when is_binary(Bin) ->
+    binary;
+type([]) ->
+    {list, undefined};
+type(List) when is_list(List) ->
+    Deep = fun(L) when is_list(L) ->
+                   lists:any(fun(C) -> is_list(C) orelse is_binary(C) end, L);
+              (_) -> false
+           end,
+    case all(fun(C) -> not is_binary(C) end, List) of
+        true ->
+            case all(fun(C) -> is_integer(C) end, List) of
+                true -> {list, codepoints};
+                false ->
+                    case [deep || L <- List, Deep(L)] of
+                        [] -> {list, {list, codepoints}};
+                        _ -> {list, deep}
+                    end
+            end;
+        false ->
+            case all(fun(C) -> is_binary(C) end, List) of
+                true -> {list, binary};
+                false -> mixed
+            end
+    end;
+type({R1,R2}) ->
+    case {type(R1),type(R2)} of
+        {T,T} -> T;
+        {{list,undefined}, {list,codepoints}} -> {list,codepoints};
+        {{list,codepoints}, {list,undefined}} -> {list,codepoints};
+        {T1,T2} -> {other, T1,T2}
+    end;
+type(Other) ->
+    {other, Other}.
+
+all(_Check, []) ->
+    true;
+all(Check, [H|T]) ->
+    Check(H) andalso all(Check,T);
+all(Check, Bin) when is_binary(Bin) ->
+    Check(Bin).
+
+needs_check(reverse) -> false;
+needs_check(pad) -> false;
+needs_check(replace) -> false;
+needs_check(_) -> true.
+
+%%%% Timer stuff
+
+time_func(Fun, Mode, Bin) ->
+    timer:sleep(100), %% Let emulator catch up and clean things before test runs
+    Self = self(),
+    Pid = spawn_link(fun() ->
+                             Str = mode(Mode, Bin),
+                             Self ! {self(),time_func(0,0,0, Fun, Str, undefined)}
+                     end),
+    receive {Pid,Msg} -> Msg end.
+
+time_func(N,Sum,SumSq, Fun, Str, _) when N < 50 ->
+    {Time, Res} = timer:tc(fun() -> Fun(Str) end),
+    time_func(N+1,Sum+Time,SumSq+Time*Time, Fun, Str, Res);
+time_func(N,Sum,SumSq, _, _, Res) ->
+    Mean = round(Sum / N),
+    Stdev = round(math:sqrt((SumSq - (Sum*Sum/N))/(N - 1))),
+    {N, Mean, Stdev, Res}.
+
+mode(binary, Bin) -> Bin;
+mode(list, Bin) -> unicode:characters_to_list(Bin).
+
 %%
-%% Test cases starts here.
+%% Old string lists Test cases starts here.
 %%
 
 len(Config) when is_list(Config) ->
@@ -80,16 +932,14 @@ len(Config) when is_list(Config) ->
     {'EXIT',_} = (catch string:len({})),
     ok.
 
-equal(Config) when is_list(Config) ->
+old_equal(Config) when is_list(Config) ->
     true = string:equal("", ""),
     false = string:equal("", " "),
     true = string:equal("laban", "laban"),
     false = string:equal("skvimp", "skvump"),
-    %% invalid arg type
-    true = string:equal(2, 2),			% not good, should crash
     ok.
 
-concat(Config) when is_list(Config) ->
+old_concat(Config) when is_list(Config) ->
     "erlang rules" = string:concat("erlang ", "rules"),
     "" = string:concat("", ""),
     "x" = string:concat("x", ""),
@@ -130,6 +980,7 @@ str_rstr(Config) when is_list(Config) ->
     3 = string:rstr("xxxx", "xx"),
     3 = string:str("xy z yx", " z"),
     3 = string:rstr("xy z yx", " z"),
+    3 = string:str("aaab", "ab"),
     %% invalid arg type
     {'EXIT',_} = (catch string:str(hello, "he")),
     %% invalid arg type
@@ -184,7 +1035,7 @@ substr(Config) when is_list(Config) ->
     {'EXIT',_} = (catch string:substr("1234", "1")),
     ok.
 
-tokens(Config) when is_list(Config) ->
+old_tokens(Config) when is_list(Config) ->
     [] = string:tokens("",""),
     [] = string:tokens("abc","abc"),
     ["abc"] = string:tokens("abc", ""),
@@ -221,7 +1072,7 @@ replace_sep(C, Seps, New) ->
 chars(Config) when is_list(Config) ->
     [] = string:chars($., 0),
     [] = string:chars($., 0, []),
-    10 = length(string:chars(32, 10, [])),
+    10 = erlang:length(string:chars(32, 10, [])),
     "aaargh" = string:chars($a, 3, "rgh"),
     %% invalid arg type
     {'EXIT',_} = (catch string:chars($x, [])),
@@ -231,7 +1082,7 @@ copies(Config) when is_list(Config) ->
     "" = string:copies("", 10),
     "" = string:copies(".", 0),
     "." = string:copies(".", 1),
-    30 = length(string:copies("123", 10)),
+    30 = erlang:length(string:copies("123", 10)),
     %% invalid arg type
     {'EXIT',_} = (catch string:copies("hej", -1)),
     {'EXIT',_} = (catch string:copies("hej", 2.0)),
@@ -360,7 +1211,7 @@ to_integer(Config) when is_list(Config) ->
     ok.
 
 test_to_integer(Str) ->
-    io:format("Checking ~p~n", [Str]),
+    %% io:format("Checking ~p~n", [Str]),
     case string:to_integer(Str) of
 	{error,_Reason} = Bad ->
 	    {'EXIT',_} = (catch list_to_integer(Str)),
@@ -403,7 +1254,7 @@ to_float(Config) when is_list(Config) ->
     ok.
 
 test_to_float(Str) ->
-    io:format("Checking ~p~n", [Str]),
+    %% io:format("Checking ~p~n", [Str]),
     case string:to_float(Str) of
 	{error,_Reason} = Bad ->
 	    {'EXIT',_} = (catch list_to_float(Str)),
@@ -419,7 +1270,7 @@ to_upper_to_lower(Config) when is_list(Config) ->
     All = lists:seq(0, 255),
 
     UC = string:to_upper(All),
-    256 = length(UC),
+    256 = erlang:length(UC),
     all_upper_latin1(UC, 0),
 
     LC = string:to_lower(All),
@@ -450,7 +1301,7 @@ all_lower_latin1([C|T], C) when 0 =< C, C < $A;
 all_lower_latin1([H|T], C) when $A =< C, C =< $Z;
 				16#C0 =< C, C =< 16#F6;
 				16#C8 =< C, C =< 16#DE ->
-    io:format("~p\n", [{H,C}]),
+    % io:format("~p\n", [{H,C}]),
     H = C + 32,
     all_lower_latin1(T, C+1);
 all_lower_latin1([], 256) -> ok.
author	Dan Gudmundsson <[email protected]>	2017-04-03 12:19:21 +0200
committer	Dan Gudmundsson <[email protected]>	2017-04-24 12:16:56 +0200
commit	2c72e662bad11a41839780f86680d4bb05367c78 (patch)
tree	01e9ae9b32fdb953392e571a0773fb2cd059c498 /lib/stdlib/test/string_SUITE.erl
parent	75fc94b8b462d7b7f6dd4b706bbe32cff77ee575 (diff)
download	otp-2c72e662bad11a41839780f86680d4bb05367c78.tar.gz otp-2c72e662bad11a41839780f86680d4bb05367c78.tar.bz2 otp-2c72e662bad11a41839780f86680d4bb05367c78.zip