From cabcd537c30e2c0a4496ee9291cfd4e7713645c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 16 Feb 2015 13:24:39 +0100 Subject: Modernize and strengthen the test case for string:tokens/2 --- lib/stdlib/test/string_SUITE.erl | 40 +++++++++++++++++++++++++++++----------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl index fccd1bef95..a55c710d50 100644 --- a/lib/stdlib/test/string_SUITE.erl +++ b/lib/stdlib/test/string_SUITE.erl @@ -217,21 +217,39 @@ substr(Config) when is_list(Config) -> ?line {'EXIT',_} = (catch string:substr("1234", "1")), ok. -tokens(suite) -> - []; -tokens(doc) -> - []; tokens(Config) when is_list(Config) -> - ?line [] = string:tokens("",""), - ?line [] = string:tokens("abc","abc"), - ?line ["abc"] = string:tokens("abc", ""), - ?line ["1","2 34","4","5"] = string:tokens("1,2 34,4;5", ";,"), - %% invalid arg type - ?line {'EXIT',_} = (catch string:tokens('x,y', ",")), + [] = string:tokens("",""), + [] = string:tokens("abc","abc"), + ["abc"] = string:tokens("abc", ""), + ["1","2 34","45","5","6","7"] = do_tokens("1,2 34,45;5,;6;,7", ";,"), + %% invalid arg type - ?line {'EXIT',_} = (catch string:tokens("x,y", ',')), + {'EXIT',_} = (catch string:tokens('x,y', ",")), + {'EXIT',_} = (catch string:tokens("x,y", ',')), ok. +do_tokens(S0, Sep0) -> + [H|T] = Sep0, + S = [replace_sep(C, T, H) || C <- S0], + Sep = [H], + io:format("~p ~p\n", [S0,Sep0]), + io:format("~p ~p\n", [S,Sep]), + + Res = string:tokens(S0, Sep0), + Res = string:tokens(Sep0++S0, Sep0), + Res = string:tokens(S0++Sep0, Sep0), + + Res = string:tokens(S, Sep), + Res = string:tokens(Sep++S, Sep), + Res = string:tokens(S++Sep, Sep), + + Res. + +replace_sep(C, Seps, New) -> + case lists:member(C, Seps) of + true -> New; + false -> C + end. chars(suite) -> []; -- cgit v1.2.3 From 53288b441ec721ce3bbdcc4ad65b75e11acc5e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 16 Feb 2015 13:53:25 +0100 Subject: Optimize string:tokens/2 We can save some time by reversing the original string before starting the tokenization. When there is only one separator, we can save even more time by treating that case specially so that we don't have to call lists:member/2 for each character. --- lib/stdlib/src/string.erl | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index f9b083a56d..f6903d1c3d 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -221,23 +221,47 @@ substr2([_|String], S) -> substr2(String, S-1). Tokens :: [Token :: nonempty_string()]. tokens(S, Seps) -> - tokens1(S, Seps, []). + case Seps of + [] -> + case S of + [] -> []; + [_|_] -> [S] + end; + [C] -> + tokens_single_1(reverse(S), C, []); + [_|_] -> + tokens_multiple_1(reverse(S), Seps, []) + end. -tokens1([C|S], Seps, Toks) -> +tokens_single_1([Sep|S], Sep, Toks) -> + tokens_single_1(S, Sep, Toks); +tokens_single_1([C|S], Sep, Toks) -> + tokens_single_2(S, Sep, Toks, [C]); +tokens_single_1([], _, Toks) -> + Toks. + +tokens_single_2([Sep|S], Sep, Toks, Tok) -> + tokens_single_1(S, Sep, [Tok|Toks]); +tokens_single_2([C|S], Sep, Toks, Tok) -> + tokens_single_2(S, Sep, Toks, [C|Tok]); +tokens_single_2([], _Sep, Toks, Tok) -> + [Tok|Toks]. + +tokens_multiple_1([C|S], Seps, Toks) -> case member(C, Seps) of - true -> tokens1(S, Seps, Toks); - false -> tokens2(S, Seps, Toks, [C]) + true -> tokens_multiple_1(S, Seps, Toks); + false -> tokens_multiple_2(S, Seps, Toks, [C]) end; -tokens1([], _Seps, Toks) -> - reverse(Toks). +tokens_multiple_1([], _Seps, Toks) -> + Toks. -tokens2([C|S], Seps, Toks, Cs) -> +tokens_multiple_2([C|S], Seps, Toks, Tok) -> case member(C, Seps) of - true -> tokens1(S, Seps, [reverse(Cs)|Toks]); - false -> tokens2(S, Seps, Toks, [C|Cs]) + true -> tokens_multiple_1(S, Seps, [Tok|Toks]); + false -> tokens_multiple_2(S, Seps, Toks, [C|Tok]) end; -tokens2([], _Seps, Toks, Cs) -> - reverse([reverse(Cs)|Toks]). +tokens_multiple_2([], _Seps, Toks, Tok) -> + [Tok|Toks]. -spec chars(Character, Number) -> String when Character :: char(), -- cgit v1.2.3