diff options
author | Björn Gustavsson <[email protected]> | 2015-02-16 13:53:25 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2015-02-18 12:49:42 +0100 |
commit | 53288b441ec721ce3bbdcc4ad65b75e11acc5e1b (patch) | |
tree | 027c8d8157437cca7b297f3813b4532fc028b6de | |
parent | cabcd537c30e2c0a4496ee9291cfd4e7713645c1 (diff) | |
download | otp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.tar.gz otp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.tar.bz2 otp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.zip |
Optimize string:tokens/2
We can save some time by reversing the original string
before starting the tokenization.
When there is only one separator, we can save even more time
by treating that case specially so that we don't have to call
lists:member/2 for each character.
-rw-r--r-- | lib/stdlib/src/string.erl | 46 |
1 files changed, 35 insertions, 11 deletions
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl index f9b083a56d..f6903d1c3d 100644 --- a/lib/stdlib/src/string.erl +++ b/lib/stdlib/src/string.erl @@ -221,23 +221,47 @@ substr2([_|String], S) -> substr2(String, S-1). Tokens :: [Token :: nonempty_string()]. tokens(S, Seps) -> - tokens1(S, Seps, []). + case Seps of + [] -> + case S of + [] -> []; + [_|_] -> [S] + end; + [C] -> + tokens_single_1(reverse(S), C, []); + [_|_] -> + tokens_multiple_1(reverse(S), Seps, []) + end. -tokens1([C|S], Seps, Toks) -> +tokens_single_1([Sep|S], Sep, Toks) -> + tokens_single_1(S, Sep, Toks); +tokens_single_1([C|S], Sep, Toks) -> + tokens_single_2(S, Sep, Toks, [C]); +tokens_single_1([], _, Toks) -> + Toks. + +tokens_single_2([Sep|S], Sep, Toks, Tok) -> + tokens_single_1(S, Sep, [Tok|Toks]); +tokens_single_2([C|S], Sep, Toks, Tok) -> + tokens_single_2(S, Sep, Toks, [C|Tok]); +tokens_single_2([], _Sep, Toks, Tok) -> + [Tok|Toks]. + +tokens_multiple_1([C|S], Seps, Toks) -> case member(C, Seps) of - true -> tokens1(S, Seps, Toks); - false -> tokens2(S, Seps, Toks, [C]) + true -> tokens_multiple_1(S, Seps, Toks); + false -> tokens_multiple_2(S, Seps, Toks, [C]) end; -tokens1([], _Seps, Toks) -> - reverse(Toks). +tokens_multiple_1([], _Seps, Toks) -> + Toks. -tokens2([C|S], Seps, Toks, Cs) -> +tokens_multiple_2([C|S], Seps, Toks, Tok) -> case member(C, Seps) of - true -> tokens1(S, Seps, [reverse(Cs)|Toks]); - false -> tokens2(S, Seps, Toks, [C|Cs]) + true -> tokens_multiple_1(S, Seps, [Tok|Toks]); + false -> tokens_multiple_2(S, Seps, Toks, [C|Tok]) end; -tokens2([], _Seps, Toks, Cs) -> - reverse([reverse(Cs)|Toks]). +tokens_multiple_2([], _Seps, Toks, Tok) -> + [Tok|Toks]. -spec chars(Character, Number) -> String when Character :: char(), |