aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2015-02-16 13:53:25 +0100
committerBjörn Gustavsson <[email protected]>2015-02-18 12:49:42 +0100
commit53288b441ec721ce3bbdcc4ad65b75e11acc5e1b (patch)
tree027c8d8157437cca7b297f3813b4532fc028b6de /lib/stdlib/src
parentcabcd537c30e2c0a4496ee9291cfd4e7713645c1 (diff)
downloadotp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.tar.gz
otp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.tar.bz2
otp-53288b441ec721ce3bbdcc4ad65b75e11acc5e1b.zip
Optimize string:tokens/2
We can save some time by reversing the original string before starting the tokenization. When there is only one separator, we can save even more time by treating that case specially so that we don't have to call lists:member/2 for each character.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/string.erl46
1 files changed, 35 insertions, 11 deletions
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index f9b083a56d..f6903d1c3d 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -221,23 +221,47 @@ substr2([_|String], S) -> substr2(String, S-1).
Tokens :: [Token :: nonempty_string()].
tokens(S, Seps) ->
- tokens1(S, Seps, []).
+ case Seps of
+ [] ->
+ case S of
+ [] -> [];
+ [_|_] -> [S]
+ end;
+ [C] ->
+ tokens_single_1(reverse(S), C, []);
+ [_|_] ->
+ tokens_multiple_1(reverse(S), Seps, [])
+ end.
-tokens1([C|S], Seps, Toks) ->
+tokens_single_1([Sep|S], Sep, Toks) ->
+ tokens_single_1(S, Sep, Toks);
+tokens_single_1([C|S], Sep, Toks) ->
+ tokens_single_2(S, Sep, Toks, [C]);
+tokens_single_1([], _, Toks) ->
+ Toks.
+
+tokens_single_2([Sep|S], Sep, Toks, Tok) ->
+ tokens_single_1(S, Sep, [Tok|Toks]);
+tokens_single_2([C|S], Sep, Toks, Tok) ->
+ tokens_single_2(S, Sep, Toks, [C|Tok]);
+tokens_single_2([], _Sep, Toks, Tok) ->
+ [Tok|Toks].
+
+tokens_multiple_1([C|S], Seps, Toks) ->
case member(C, Seps) of
- true -> tokens1(S, Seps, Toks);
- false -> tokens2(S, Seps, Toks, [C])
+ true -> tokens_multiple_1(S, Seps, Toks);
+ false -> tokens_multiple_2(S, Seps, Toks, [C])
end;
-tokens1([], _Seps, Toks) ->
- reverse(Toks).
+tokens_multiple_1([], _Seps, Toks) ->
+ Toks.
-tokens2([C|S], Seps, Toks, Cs) ->
+tokens_multiple_2([C|S], Seps, Toks, Tok) ->
case member(C, Seps) of
- true -> tokens1(S, Seps, [reverse(Cs)|Toks]);
- false -> tokens2(S, Seps, Toks, [C|Cs])
+ true -> tokens_multiple_1(S, Seps, [Tok|Toks]);
+ false -> tokens_multiple_2(S, Seps, Toks, [C|Tok])
end;
-tokens2([], _Seps, Toks, Cs) ->
- reverse([reverse(Cs)|Toks]).
+tokens_multiple_2([], _Seps, Toks, Tok) ->
+ [Tok|Toks].
-spec chars(Character, Number) -> String when
Character :: char(),