From 53288b441ec721ce3bbdcc4ad65b75e11acc5e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Mon, 16 Feb 2015 13:53:25 +0100 Subject: Optimize string:tokens/2 We can save some time by reversing the original string before starting the tokenization. When there is only one separator, we can save even more time by treating that case specially so that we don't have to call lists:member/2 for each character. --- lib/stdlib/src/string.erl | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index f9b083a56d..f6903d1c3d 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -221,23 +221,47 @@ substr2([_|String], S) -> substr2(String, S-1). Tokens :: [Token :: nonempty_string()]. tokens(S, Seps) -> - tokens1(S, Seps, []). + case Seps of + [] -> + case S of + [] -> []; + [_|_] -> [S] + end; + [C] -> + tokens_single_1(reverse(S), C, []); + [_|_] -> + tokens_multiple_1(reverse(S), Seps, []) + end. -tokens1([C|S], Seps, Toks) -> +tokens_single_1([Sep|S], Sep, Toks) -> + tokens_single_1(S, Sep, Toks); +tokens_single_1([C|S], Sep, Toks) -> + tokens_single_2(S, Sep, Toks, [C]); +tokens_single_1([], _, Toks) -> + Toks. + +tokens_single_2([Sep|S], Sep, Toks, Tok) -> + tokens_single_1(S, Sep, [Tok|Toks]); +tokens_single_2([C|S], Sep, Toks, Tok) -> + tokens_single_2(S, Sep, Toks, [C|Tok]); +tokens_single_2([], _Sep, Toks, Tok) -> + [Tok|Toks]. + +tokens_multiple_1([C|S], Seps, Toks) -> case member(C, Seps) of - true -> tokens1(S, Seps, Toks); - false -> tokens2(S, Seps, Toks, [C]) + true -> tokens_multiple_1(S, Seps, Toks); + false -> tokens_multiple_2(S, Seps, Toks, [C]) end; -tokens1([], _Seps, Toks) -> - reverse(Toks). +tokens_multiple_1([], _Seps, Toks) -> + Toks. -tokens2([C|S], Seps, Toks, Cs) -> +tokens_multiple_2([C|S], Seps, Toks, Tok) -> case member(C, Seps) of - true -> tokens1(S, Seps, [reverse(Cs)|Toks]); - false -> tokens2(S, Seps, Toks, [C|Cs]) + true -> tokens_multiple_1(S, Seps, [Tok|Toks]); + false -> tokens_multiple_2(S, Seps, Toks, [C|Tok]) end; -tokens2([], _Seps, Toks, Cs) -> - reverse([reverse(Cs)|Toks]). +tokens_multiple_2([], _Seps, Toks, Tok) -> + [Tok|Toks]. -spec chars(Character, Number) -> String when Character :: char(), -- cgit v1.2.3