aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/base64.erl597
-rw-r--r--lib/stdlib/src/erl_compile.erl3
-rw-r--r--lib/stdlib/src/erl_lint.erl44
-rw-r--r--lib/stdlib/src/erl_parse.yrl82
-rw-r--r--lib/stdlib/src/gen.erl1
-rw-r--r--lib/stdlib/src/string.erl747
-rw-r--r--lib/stdlib/src/uri_string.erl229
7 files changed, 1181 insertions, 522 deletions
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index c8cf6fdffe..6ea4147abf 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -24,22 +24,11 @@
-export([encode/1, decode/1, mime_decode/1,
encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]).
-%%-------------------------------------------------------------------------
%% The following type is a subtype of string() for return values
%% of (some) functions of this module.
-%%-------------------------------------------------------------------------
-
-type ascii_string() :: [1..255].
-type ascii_binary() :: binary().
-%%-------------------------------------------------------------------------
-%% encode_to_string(ASCII) -> Base64String
-%% ASCII - string() | binary()
-%% Base64String - string()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
-
-spec encode_to_string(Data) -> Base64String when
Data :: ascii_string() | ascii_binary(),
Base64String :: ascii_string().
@@ -47,66 +36,67 @@
encode_to_string(Bin) when is_binary(Bin) ->
encode_to_string(binary_to_list(Bin));
encode_to_string(List) when is_list(List) ->
- encode_l(List).
-
-%%-------------------------------------------------------------------------
-%% encode(ASCII) -> Base64
-%% ASCII - string() | binary()
-%% Base64 - binary()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
+ encode_list_to_string(List).
-spec encode(Data) -> Base64 when
Data :: ascii_string() | ascii_binary(),
Base64 :: ascii_binary().
encode(Bin) when is_binary(Bin) ->
- encode_binary(Bin);
+ encode_binary(Bin, <<>>);
encode(List) when is_list(List) ->
- list_to_binary(encode_l(List)).
-
--spec encode_l(ascii_string()) -> ascii_string().
+ encode_list(List, <<>>).
-encode_l([]) ->
+encode_list_to_string([]) ->
[];
-encode_l([A]) ->
- [b64e(A bsr 2),
- b64e((A band 3) bsl 4), $=, $=];
-encode_l([A,B]) ->
- [b64e(A bsr 2),
- b64e(((A band 3) bsl 4) bor (B bsr 4)),
- b64e((B band 15) bsl 2), $=];
-encode_l([A,B,C|Ls]) ->
- BB = (A bsl 16) bor (B bsl 8) bor C,
+encode_list_to_string([B1]) ->
+ [b64e(B1 bsr 2),
+ b64e((B1 band 3) bsl 4), $=, $=];
+encode_list_to_string([B1,B2]) ->
+ [b64e(B1 bsr 2),
+ b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)),
+ b64e((B2 band 15) bsl 2), $=];
+encode_list_to_string([B1,B2,B3|Ls]) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
[b64e(BB bsr 18),
b64e((BB bsr 12) band 63),
b64e((BB bsr 6) band 63),
- b64e(BB band 63) | encode_l(Ls)].
-
-encode_binary(Bin) ->
- Split = 3*(byte_size(Bin) div 3),
- <<Main0:Split/binary,Rest/binary>> = Bin,
- Main = << <<(b64e(C)):8>> || <<C:6>> <= Main0 >>,
- case Rest of
- <<A:6,B:6,C:4>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B)):8,(b64e(C bsl 2)):8,$=:8>>;
- <<A:6,B:2>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B bsl 4)):8,$=:8,$=:8>>;
- <<>> ->
- Main
- end.
+ b64e(BB band 63) | encode_list_to_string(Ls)].
-%%-------------------------------------------------------------------------
-%% mime_decode(Base64) -> ASCII
-%% decode(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+encode_binary(<<>>, A) ->
+ A;
+encode_binary(<<B1:8>>, A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_binary(<<B1:8, B2:8>>, A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_binary(Ls,
+ <<A/bits,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+encode_list([], A) ->
+ A;
+encode_list([B1], A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_list([B1,B2], A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_list([B1,B2,B3|Ls], A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_list(Ls,
+ <<A/bits,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+%% mime_decode strips away all characters not Base64 before
+%% converting, whereas decode crashes if an illegal character is found
-spec decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -122,32 +112,13 @@ decode(List) when is_list(List) ->
Data :: ascii_binary().
mime_decode(Bin) when is_binary(Bin) ->
- mime_decode_binary(<<>>, Bin);
+ mime_decode_binary(Bin, <<>>);
mime_decode(List) when is_list(List) ->
- mime_decode(list_to_binary(List)).
+ mime_decode_list(List, <<>>).
--spec decode_l(ascii_string()) -> ascii_string().
-
-decode_l(List) ->
- L = strip_spaces(List, []),
- decode(L, []).
-
--spec mime_decode_l(ascii_string()) -> ascii_string().
-
-mime_decode_l(List) ->
- L = strip_illegal(List, [], 0),
- decode(L, []).
-
-%%-------------------------------------------------------------------------
-%% mime_decode_to_string(Base64) -> ASCII
-%% decode_to_string(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+%% mime_decode_to_string strips away all characters not Base64 before
+%% converting, whereas decode_to_string crashes if an illegal
+%% character is found
-spec decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
@@ -156,7 +127,7 @@ mime_decode_l(List) ->
decode_to_string(Bin) when is_binary(Bin) ->
decode_to_string(binary_to_list(Bin));
decode_to_string(List) when is_list(List) ->
- decode_l(List).
+ decode_list_to_string(List).
-spec mime_decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
@@ -165,115 +136,195 @@ decode_to_string(List) when is_list(List) ->
mime_decode_to_string(Bin) when is_binary(Bin) ->
mime_decode_to_string(binary_to_list(Bin));
mime_decode_to_string(List) when is_list(List) ->
- mime_decode_l(List).
-
-%% One-based decode map.
--define(DECODE_MAP,
- {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
- ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
- 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
- bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
- 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
- bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
- 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+ mime_decode_list_to_string(List).
-decode_binary(<<C1:8, Cs/bits>>, A) ->
- case element(C1, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A);
- B1 -> decode_binary(Cs, A, B1)
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+mime_decode_list([0 | Cs], A) ->
+ mime_decode_list(Cs, A);
+mime_decode_list([C1 | Cs], A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1);
+ _ -> mime_decode_list(Cs, A) % eq is padding
end;
-decode_binary(<<>>, A) ->
+mime_decode_list([], A) ->
A.
-decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
- case element(C2, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1);
- B2 -> decode_binary(Cs, A, B1, B2)
+mime_decode_list([0 | Cs], A, B1) ->
+ mime_decode_list(Cs, A, B1);
+mime_decode_list([C2 | Cs], A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+ _ -> mime_decode_list(Cs, A, B1) % eq is padding
end.
-decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2);
- B3 -> decode_binary(Cs, A, B1, B2, B3)
+mime_decode_list([0 | Cs], A, B1, B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+mime_decode_list([C3 | Cs], A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2)
end.
-decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2, B3);
- eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
- eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
- B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+mime_decode_list([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+mime_decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
+ eq ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2, B3)
end.
-only_ws_binary(<<>>, A) ->
- A;
-only_ws_binary(<<C:8, Cs/bits>>, A) ->
- case element(C, ?DECODE_MAP) of
- ws -> only_ws_binary(Cs, A);
- _ -> erlang:error(function_clause)
+mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_list(Cs, A, B1, B2, B);
+ _ -> mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3)
+ end;
+mime_decode_list_after_eq([], A, B1, B2, eq) ->
+ <<A/bits,B1:6,(B2 bsr 4):2>>;
+mime_decode_list_after_eq([], A, B1, B2, B3) ->
+ <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A) ->
+ mime_decode_binary(Cs, A);
+mime_decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1);
+ _ -> mime_decode_binary(Cs, A) % eq is padding
+ end;
+mime_decode_binary(<<>>, A) ->
+ A.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1) ->
+ mime_decode_binary(Cs, A, B1);
+mime_decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+ _ -> mime_decode_binary(Cs, A, B1) % eq is padding
end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-mime_decode_binary(Result, <<0:8,T/bits>>) ->
- mime_decode_binary(Result, T);
-mime_decode_binary(Result0, <<C:8,T/bits>>) ->
- case element(C, ?DECODE_MAP) of
- Bits when is_integer(Bits) ->
- mime_decode_binary(<<Result0/bits,Bits:6>>, T);
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+mime_decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2)
+ end.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+mime_decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
eq ->
- mime_decode_binary_after_eq(Result0, T, false);
- _ ->
- mime_decode_binary(Result0, T)
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+mime_decode_binary_after_eq(<<C:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_binary(Cs, A, B1, B2, B);
+ _ -> mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3)
end;
-mime_decode_binary(Result, _) ->
- true = is_binary(Result),
- Result.
-
-mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) ->
- mime_decode_binary_after_eq(Result, T, Eq);
-mime_decode_binary_after_eq(Result0, <<C:8,T/bits>>, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- mime_decode_binary_after_eq(Result0, T, Eq);
- ws ->
- mime_decode_binary_after_eq(Result0, T, Eq);
+mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) ->
+ <<A/bits,B1:6,(B2 bsr 4):2>>;
+mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) ->
+ <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_list_to_string([0 | Cs]) ->
+ mime_decode_list_to_string(Cs);
+mime_decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1);
+ _ -> mime_decode_list_to_string(Cs) % eq is padding
+ end;
+mime_decode_list_to_string([]) ->
+ [].
+
+mime_decode_list_to_string([0 | Cs], B1) ->
+ mime_decode_list_to_string(Cs, B1);
+mime_decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+ _ -> mime_decode_list_to_string(Cs, B1) % eq is padding
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+mime_decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+ eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2)
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+mime_decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)];
eq ->
- mime_decode_binary_after_eq(Result0, T, true);
- Bits when is_integer(Bits) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
%% More valid data, skip the eq as invalid
- mime_decode_binary(<<Result0/bits,Bits:6>>, T)
+ case B3 of
+ eq -> mime_decode_list_to_string(Cs, B1, B2, B);
+ _ ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]
+ end;
+ _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3)
end;
-mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
- %% No more valid data.
- case bit_size(Result0) rem 8 of
- 0 ->
- %% '====' is not uncommon.
- Result0;
- 4 when Eq ->
- %% enforce at least one more '=' only ignoring illegals and spacing
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- 2 ->
- %% remove 2 bits
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
- end.
+mime_decode_list_to_string_after_eq([], B1, B2, eq) ->
+ binary_to_list(<<B1:6,(B2 bsr 4):2>>);
+mime_decode_list_to_string_after_eq([], B1, B2, B3) ->
+ binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>).
decode_list([C1 | Cs], A) ->
- case element(C1, ?DECODE_MAP) of
+ case b64d(C1) of
ws -> decode_list(Cs, A);
B1 -> decode_list(Cs, A, B1)
end;
@@ -281,122 +332,130 @@ decode_list([], A) ->
A.
decode_list([C2 | Cs], A, B1) ->
- case element(C2, ?DECODE_MAP) of
+ case b64d(C2) of
ws -> decode_list(Cs, A, B1);
B2 -> decode_list(Cs, A, B1, B2)
end.
decode_list([C3 | Cs], A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
+ case b64d(C3) of
ws -> decode_list(Cs, A, B1, B2);
B3 -> decode_list(Cs, A, B1, B2, B3)
end.
decode_list([C4 | Cs], A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
+ case b64d(C4) of
ws -> decode_list(Cs, A, B1, B2, B3);
- eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
- eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
- B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ eq when B3 =:= eq -> only_ws(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
end.
-only_ws([], A) ->
- A;
-only_ws([C | Cs], A) ->
- case element(C, ?DECODE_MAP) of
- ws -> only_ws(Cs, A);
- _ -> erlang:error(function_clause)
- end.
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
+ end;
+decode_binary(<<>>, A) ->
+ A.
-decode([], A) -> A;
-decode([$=,$=,C2,C1|Cs], A) ->
- Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
- Octet1 = Bits2x6 bsr 16,
- decode(Cs, [Octet1|A]);
-decode([$=,C3,C2,C1|Cs], A) ->
- Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6),
- Octet1 = Bits3x6 bsr 16,
- Octet2 = (Bits3x6 bsr 8) band 16#ff,
- decode(Cs, [Octet1,Octet2|A]);
-decode([C4,C3,C2,C1| Cs], A) ->
- Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6) bor b64d(C4),
- Octet1 = Bits4x6 bsr 16,
- Octet2 = (Bits4x6 bsr 8) band 16#ff,
- Octet3 = Bits4x6 band 16#ff,
- decode(Cs, [Octet1,Octet2,Octet3|A]).
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
-%%%========================================================================
-%%% Internal functions
-%%%========================================================================
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
-strip_spaces([], A) -> A;
-strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
+ end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-strip_illegal([], A, _Cnt) ->
+only_ws_binary(<<>>, A) ->
A;
-strip_illegal([0|Cs], A, Cnt) ->
- strip_illegal(Cs, A, Cnt);
-strip_illegal([C|Cs], A, Cnt) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- strip_illegal(Cs, A, Cnt);
- ws ->
- strip_illegal(Cs, A, Cnt);
- eq ->
- case {tail_contains_more(Cs, false), Cnt rem 4} of
- {{[], _}, 0} ->
- A; %% Ignore extra =
- {{[], true}, 2} ->
- [$=|[$=|A]]; %% 'XX=='
- {{[], _}, 3} ->
- [$=|A]; %% 'XXX='
- {{[H|T], _}, _} ->
- %% more data, skip equals
- strip_illegal(T, [H|A], Cnt+1)
- end;
- _ ->
- strip_illegal(Cs, [C|A], Cnt+1)
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case b64d(C) of
+ ws -> only_ws_binary(Cs, A)
end.
-%% Search the tail for more valid data and remember if we saw
-%% another equals along the way.
-tail_contains_more([], Eq) ->
- {[], Eq};
-tail_contains_more(<<>>, Eq) ->
- {<<>>, Eq};
-tail_contains_more([C|T]=More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
+decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ ws -> decode_list_to_string(Cs);
+ B1 -> decode_list_to_string(Cs, B1)
end;
-tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
+decode_list_to_string([]) ->
+ [].
+
+decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ ws -> decode_list_to_string(Cs, B1);
+ B2 -> decode_list_to_string(Cs, B1, B2)
+ end.
+
+decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_list_to_string(Cs, B1, B2);
+ B3 -> decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ ws ->
+ decode_list_to_string(Cs, B1, B2, B3);
+ eq when B3 =:= eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>));
+ eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>));
+ B4 ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)]
+ end.
+
+only_ws([], A) ->
+ A;
+only_ws([C | Cs], A) ->
+ case b64d(C) of
+ ws -> only_ws(Cs, A)
end.
-
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
%% accessors
+-compile({inline, [{b64d, 1}]}).
+%% One-based decode map.
+b64d(X) ->
+ element(X,
+ {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
+ ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
+ 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
+ bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
+ bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+
+-compile({inline, [{b64e, 1}]}).
b64e(X) ->
element(X+1,
{$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N,
@@ -404,9 +463,3 @@ b64e(X) ->
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n,
$o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
$0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}).
-
-
-b64d(X) ->
- b64d_ok(element(X, ?DECODE_MAP)).
-
-b64d_ok(I) when is_integer(I) -> I.
diff --git a/lib/stdlib/src/erl_compile.erl b/lib/stdlib/src/erl_compile.erl
index 18d7548fdc..f781312ca2 100644
--- a/lib/stdlib/src/erl_compile.erl
+++ b/lib/stdlib/src/erl_compile.erl
@@ -188,6 +188,8 @@ parse_dep_option("", T) ->
{[makedep,{makedep_output,standard_io}],T};
parse_dep_option("D", T) ->
{[makedep],T};
+parse_dep_option("MD", T) ->
+ {[makedep_side_effect],T};
parse_dep_option("F"++Opt, T0) ->
{File,T} = get_option("MF", Opt, T0),
{[makedep,{makedep_output,File}],T};
@@ -221,6 +223,7 @@ usage() ->
"the dependencies"},
{"-MP","add a phony target for each dependency"},
{"-MD","same as -M -MT file (with default 'file')"},
+ {"-MMD","generate dependencies as a side-effect"},
{"-o name","name output directory or file"},
{"-pa path","add path to the front of Erlang's code path"},
{"-pz path","add path to the end of Erlang's code path"},
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index f58cb35cea..1930c462e8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -144,6 +144,7 @@ value_option(Flag, Default, On, OnVal, Off, OffVal, Opts) ->
:: dict:dict(ta(), #typeinfo{}),
exp_types=gb_sets:empty() %Exported types
:: gb_sets:set(ta()),
+ in_try_head=false :: boolean(), %In a try head.
catch_scope = none %Inside/outside try or catch
:: catch_scope()
}).
@@ -312,6 +313,10 @@ format_error({unused_var, V}) ->
io_lib:format("variable ~w is unused", [V]);
format_error({variable_in_record_def,V}) ->
io_lib:format("variable ~w in record definition", [V]);
+format_error({stacktrace_guard,V}) ->
+ io_lib:format("stacktrace variable ~w must not be used in a guard", [V]);
+format_error({stacktrace_bound,V}) ->
+ io_lib:format("stacktrace variable ~w must not be previously bound", [V]);
%% --- binaries ---
format_error({undefined_bittype,Type}) ->
io_lib:format("bit type ~tw undefined", [Type]);
@@ -3218,11 +3223,11 @@ is_module_dialyzer_option(Option) ->
try_clauses(Scs, Ccs, In, Vt, St0) ->
{Csvt0,St1} = icrt_clauses(Scs, Vt, St0),
- St2 = St1#lint{catch_scope=try_catch},
+ St2 = St1#lint{catch_scope=try_catch,in_try_head=true},
{Csvt1,St3} = icrt_clauses(Ccs, Vt, St2),
Csvt = Csvt0 ++ Csvt1,
UpdVt = icrt_export(Csvt, Vt, In, St3),
- {UpdVt,St3}.
+ {UpdVt,St3#lint{in_try_head=false}}.
%% icrt_clauses(Clauses, In, ImportVarTable, State) ->
%% {UpdVt,State}.
@@ -3239,12 +3244,29 @@ icrt_clauses(Cs, Vt, St) ->
mapfoldl(fun (C, St0) -> icrt_clause(C, Vt, St0) end, St, Cs).
icrt_clause({clause,_Line,H,G,B}, Vt0, #lint{catch_scope=Scope}=St0) ->
- {Hvt,Binvt,St1} = head(H, Vt0, St0),
- Vt1 = vtupdate(Hvt, Binvt),
- {Gvt,St2} = guard(G, vtupdate(Vt1, Vt0), St1),
- Vt2 = vtupdate(Gvt, Vt1),
- {Bvt,St3} = exprs(B, vtupdate(Vt2, Vt0), St2),
- {vtupdate(Bvt, Vt2),St3#lint{catch_scope=Scope}}.
+ Vt1 = taint_stack_var(Vt0, H, St0),
+ {Hvt,Binvt,St1} = head(H, Vt1, St0),
+ Vt2 = vtupdate(Hvt, Binvt),
+ Vt3 = taint_stack_var(Vt2, H, St0),
+ {Gvt,St2} = guard(G, vtupdate(Vt3, Vt0), St1#lint{in_try_head=false}),
+ Vt4 = vtupdate(Gvt, Vt2),
+ {Bvt,St3} = exprs(B, vtupdate(Vt4, Vt0), St2),
+ {vtupdate(Bvt, Vt4),St3#lint{catch_scope=Scope}}.
+
+taint_stack_var(Vt, Pat, #lint{in_try_head=true}) ->
+ [{tuple,_,[_,_,{var,_,Stk}]}] = Pat,
+ case Stk of
+ '_' ->
+ Vt;
+ _ ->
+ lists:map(fun({V,{bound,Used,Lines}}) when V =:= Stk ->
+ {V,{stacktrace,Used,Lines}};
+ (B) ->
+ B
+ end, Vt)
+ end;
+taint_stack_var(Vt, _Pat, #lint{in_try_head=false}) ->
+ Vt.
icrt_export(Vts, Vt, {Tag,Attrs}, St) ->
{_File,Loc} = loc(Attrs, St),
@@ -3484,6 +3506,9 @@ pat_var(V, Line, Vt, Bvt, St) ->
{[{V,{bound,used,Ls}}],[],
%% As this is matching, exported vars are risky.
add_warning(Line, {exported_var,V,From}, St)};
+ {ok,{stacktrace,_Usage,Ls}} ->
+ {[{V,{bound,used,Ls}}],[],
+ add_error(Line, {stacktrace_bound,V}, St)};
error when St#lint.recdef_top ->
{[],[{V,{bound,unused,[Line]}}],
add_error(Line, {variable_in_record_def,V}, St)};
@@ -3541,6 +3566,9 @@ expr_var(V, Line, Vt, St) ->
false ->
{[{V,{{export,From},used,Ls}}],St}
end;
+ {ok,{stacktrace,_Usage,Ls}} ->
+ {[{V,{bound,used,Ls}}],
+ add_error(Line, {stacktrace_guard,V}, St)};
error ->
{[{V,{bound,used,[Line]}}],
add_error(Line, {unbound_var,V}, St)}
diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl
index 6e72d64acc..14ca24362e 100644
--- a/lib/stdlib/src/erl_parse.yrl
+++ b/lib/stdlib/src/erl_parse.yrl
@@ -29,6 +29,10 @@ clause_args clause_guard clause_body
expr expr_100 expr_150 expr_160 expr_200 expr_300 expr_400 expr_500
expr_600 expr_700 expr_800
expr_max
+pat_expr pat_expr_200 pat_expr_300 pat_expr_400 pat_expr_500
+pat_expr_600 pat_expr_700 pat_expr_800
+pat_expr_max map_pat_expr record_pat_expr
+pat_argument_list pat_exprs
list tail
list_comprehension lc_expr lc_exprs
binary_comprehension
@@ -37,7 +41,7 @@ record_expr record_tuple record_field record_fields
map_expr map_tuple map_field map_field_assoc map_field_exact map_fields map_key
if_expr if_clause if_clauses case_expr cr_clause cr_clauses receive_expr
fun_expr fun_clause fun_clauses atom_or_var integer_or_var
-try_expr try_catch try_clause try_clauses
+try_expr try_catch try_clause try_clauses try_opt_stacktrace
function_call argument_list
exprs guard
atomic strings
@@ -66,7 +70,7 @@ char integer float atom string var
'spec' 'callback' % helper
dot.
-Expect 2.
+Expect 0.
Rootsymbol form.
@@ -210,7 +214,7 @@ function_clause -> atom clause_args clause_guard clause_body :
{clause,?anno('$1'),element(3, '$1'),'$2','$3','$4'}.
-clause_args -> argument_list : element(1, '$1').
+clause_args -> pat_argument_list : element(1, '$1').
clause_guard -> 'when' guard : '$2'.
clause_guard -> '$empty' : [].
@@ -275,6 +279,53 @@ expr_max -> receive_expr : '$1'.
expr_max -> fun_expr : '$1'.
expr_max -> try_expr : '$1'.
+pat_expr -> pat_expr_200 '=' pat_expr : {match,?anno('$2'),'$1','$3'}.
+pat_expr -> pat_expr_200 : '$1'.
+
+pat_expr_200 -> pat_expr_300 comp_op pat_expr_300 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_200 -> pat_expr_300 : '$1'.
+
+pat_expr_300 -> pat_expr_400 list_op pat_expr_300 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_300 -> pat_expr_400 : '$1'.
+
+pat_expr_400 -> pat_expr_400 add_op pat_expr_500 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_400 -> pat_expr_500 : '$1'.
+
+pat_expr_500 -> pat_expr_500 mult_op pat_expr_600 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_500 -> pat_expr_600 : '$1'.
+
+pat_expr_600 -> prefix_op pat_expr_700 :
+ ?mkop1('$1', '$2').
+pat_expr_600 -> map_pat_expr : '$1'.
+pat_expr_600 -> pat_expr_700 : '$1'.
+
+pat_expr_700 -> record_pat_expr : '$1'.
+pat_expr_700 -> pat_expr_800 : '$1'.
+
+pat_expr_800 -> pat_expr_max : '$1'.
+
+pat_expr_max -> var : '$1'.
+pat_expr_max -> atomic : '$1'.
+pat_expr_max -> list : '$1'.
+pat_expr_max -> binary : '$1'.
+pat_expr_max -> tuple : '$1'.
+pat_expr_max -> '(' pat_expr ')' : '$2'.
+
+map_pat_expr -> '#' map_tuple :
+ {map, ?anno('$1'),'$2'}.
+map_pat_expr -> pat_expr_max '#' map_tuple :
+ {map, ?anno('$2'),'$1','$3'}.
+map_pat_expr -> map_pat_expr '#' map_tuple :
+ {map, ?anno('$2'),'$1','$3'}.
+
+record_pat_expr -> '#' atom '.' atom :
+ {record_index,?anno('$1'),element(3, '$2'),'$4'}.
+record_pat_expr -> '#' atom record_tuple :
+ {record,?anno('$1'),element(3, '$2'),'$3'}.
list -> '[' ']' : {nil,?anno('$1')}.
list -> '[' expr tail : {cons,?anno('$1'),'$2','$3'}.
@@ -397,6 +448,10 @@ case_expr -> 'case' expr 'of' cr_clauses 'end' :
cr_clauses -> cr_clause : ['$1'].
cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3'].
+%% FIXME: merl in syntax_tools depends on patterns in a 'case' being
+%% full expressions. Therefore, we can't use pat_expr here. There
+%% should be a better way.
+
cr_clause -> expr clause_guard clause_body :
{clause,?anno('$1'),['$1'],'$2','$3'}.
@@ -424,11 +479,11 @@ integer_or_var -> var : '$1'.
fun_clauses -> fun_clause : ['$1'].
fun_clauses -> fun_clause ';' fun_clauses : ['$1' | '$3'].
-fun_clause -> argument_list clause_guard clause_body :
+fun_clause -> pat_argument_list clause_guard clause_body :
{Args,Anno} = '$1',
{clause,Anno,'fun',Args,'$2','$3'}.
-fun_clause -> var argument_list clause_guard clause_body :
+fun_clause -> var pat_argument_list clause_guard clause_body :
{clause,element(2, '$1'),element(3, '$1'),element(1, '$2'),'$3','$4'}.
try_expr -> 'try' exprs 'of' cr_clauses try_catch :
@@ -446,24 +501,31 @@ try_catch -> 'after' exprs 'end' :
try_clauses -> try_clause : ['$1'].
try_clauses -> try_clause ';' try_clauses : ['$1' | '$3'].
-try_clause -> expr clause_guard clause_body :
+try_clause -> pat_expr clause_guard clause_body :
A = ?anno('$1'),
{clause,A,[{tuple,A,[{atom,A,throw},'$1',{var,A,'_'}]}],'$2','$3'}.
-try_clause -> atom ':' expr clause_guard clause_body :
+try_clause -> atom ':' pat_expr try_opt_stacktrace clause_guard clause_body :
A = ?anno('$1'),
- {clause,A,[{tuple,A,['$1','$3',{var,A,'_'}]}],'$4','$5'}.
-try_clause -> var ':' expr clause_guard clause_body :
+ {clause,A,[{tuple,A,['$1','$3',{var,A,'$4'}]}],'$5','$6'}.
+try_clause -> var ':' pat_expr try_opt_stacktrace clause_guard clause_body :
A = ?anno('$1'),
- {clause,A,[{tuple,A,['$1','$3',{var,A,'_'}]}],'$4','$5'}.
+ {clause,A,[{tuple,A,['$1','$3',{var,A,'$4'}]}],'$5','$6'}.
+try_opt_stacktrace -> ':' var : element(3, '$2').
+try_opt_stacktrace -> '$empty' : '_'.
argument_list -> '(' ')' : {[],?anno('$1')}.
argument_list -> '(' exprs ')' : {'$2',?anno('$1')}.
+pat_argument_list -> '(' ')' : {[],?anno('$1')}.
+pat_argument_list -> '(' pat_exprs ')' : {'$2',?anno('$1')}.
exprs -> expr : ['$1'].
exprs -> expr ',' exprs : ['$1' | '$3'].
+pat_exprs -> pat_expr : ['$1'].
+pat_exprs -> pat_expr ',' pat_exprs : ['$1' | '$3'].
+
guard -> exprs : ['$1'].
guard -> exprs ';' guard : ['$1'|'$3'].
diff --git a/lib/stdlib/src/gen.erl b/lib/stdlib/src/gen.erl
index 33af0aed8f..4b1d448487 100644
--- a/lib/stdlib/src/gen.erl
+++ b/lib/stdlib/src/gen.erl
@@ -49,6 +49,7 @@
| {'logfile', string()}.
-type option() :: {'timeout', timeout()}
| {'debug', [debug_flag()]}
+ | {'hibernate_after', timeout()}
| {'spawn_opt', [proc_lib:spawn_option()]}.
-type options() :: [option()].
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 5a4d2df2a6..e01bb7d85e 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -74,15 +74,16 @@
-export([to_upper/1, to_lower/1]).
%%
-import(lists,[member/2]).
-
-compile({no_auto_import,[length/1]}).
+-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}).
+-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r).
-export_type([grapheme_cluster/0]).
-type grapheme_cluster() :: char() | [char()].
-type direction() :: 'leading' | 'trailing'.
--dialyzer({no_improper_lists, stack/2}).
+-dialyzer({no_improper_lists, [stack/2, length_b/3]}).
%%% BIFs internal (not documented) should not to be used outside of this module
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
@@ -127,8 +128,10 @@ is_empty(_) -> false.
%% Count the number of grapheme clusters in chardata
-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(<<CP1/utf8, Bin/binary>>) ->
+ length_b(Bin, CP1, 0);
length(CD) ->
- length_1(unicode_util:gc(CD), 0).
+ length_1(CD, 0).
%% Convert a string to a list of grapheme clusters
-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
@@ -176,6 +179,8 @@ equal(A, B, true, Norm) ->
%% Reverse grapheme clusters
-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(<<CP1/utf8, Rest/binary>>) ->
+ reverse_b(Rest, CP1, []);
reverse(CD) ->
reverse_1(CD, []).
@@ -186,7 +191,10 @@ reverse(CD) ->
Start :: non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N) when is_integer(N), N >= 0 ->
- slice_l(CD, N, is_binary(CD)).
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end.
-spec slice(String, Start, Length) -> Slice when
String::unicode:chardata(),
@@ -195,9 +203,15 @@ slice(CD, N) when is_integer(N), N >= 0 ->
Slice :: unicode:chardata().
slice(CD, N, Length)
when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
- slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ L -> slice_trail(L, Length)
+ end;
slice(CD, N, infinity) ->
- slice_l(CD, N, is_binary(CD));
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end;
slice(CD, _, 0) ->
case is_binary(CD) of
true -> <<>>;
@@ -256,18 +270,22 @@ trim(Str, Dir) ->
Dir :: direction() | 'both',
Characters :: [grapheme_cluster()].
trim(Str, _, []) -> Str;
+trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ls(Str, Sep);
trim(Str, leading, Sep) when is_list(Sep) ->
- trim_l(Str, search_pattern(Sep));
-trim(Str, trailing, Sep) when is_list(Sep) ->
- trim_t(Str, 0, search_pattern(Sep));
-trim(Str, both, Sep0) when is_list(Sep0) ->
- Sep = search_pattern(Sep0),
- trim_t(trim_l(Str,Sep), 0, Sep).
+ trim_l(Str, Sep);
+trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ts(Str, Sep);
+trim(Str, trailing, Seps0) when is_list(Seps0) ->
+ Seps = search_pattern(Seps0),
+ trim_t(Str, 0, Seps);
+trim(Str, both, Sep) when is_list(Sep) ->
+ trim(trim(Str,leading,Sep), trailing, Sep).
%% Delete trailing newlines or \r\n
-spec chomp(String::unicode:chardata()) -> unicode:chardata().
chomp(Str) ->
- trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+ trim(Str, trailing, [[$\r,$\n],$\n]).
%% Split String into two parts where the leading part consists of Characters
-spec take(String, Characters) -> {Leading, Trailing} when
@@ -300,8 +318,7 @@ take(Str, [], Complement, Dir) ->
{true, leading} -> {Str, Empty};
{true, trailing} -> {Empty, Str}
end;
-take(Str, Sep0, false, leading) ->
- Sep = search_pattern(Sep0),
+take(Str, Sep, false, leading) ->
take_l(Str, Sep, []);
take(Str, Sep0, true, leading) ->
Sep = search_pattern(Sep0),
@@ -461,6 +478,7 @@ replace(String, SearchPattern, Replacement, Where) ->
SeparatorList::[grapheme_cluster()]) ->
[unicode:chardata()].
lexemes([], _) -> [];
+lexemes(Str, []) -> [Str];
lexemes(Str, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
lexemes_m(Str, Seps, []).
@@ -494,13 +512,13 @@ find(String, SearchPattern, leading) ->
find(String, SearchPattern, trailing) ->
find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
-%% Fetch first codepoint and return rest in tail
+%% Fetch first grapheme cluster and return rest in tail
-spec next_grapheme(String::unicode:chardata()) ->
maybe_improper_list(grapheme_cluster(),unicode:chardata()) |
{error,unicode:chardata()}.
next_grapheme(CD) -> unicode_util:gc(CD).
-%% Fetch first grapheme cluster and return rest in tail
+%% Fetch first codepoint and return rest in tail
-spec next_codepoint(String::unicode:chardata()) ->
maybe_improper_list(char(),unicode:chardata()) |
{error,unicode:chardata()}.
@@ -508,10 +526,23 @@ next_codepoint(CD) -> unicode_util:cp(CD).
%% Internals
-length_1([_|Rest], N) ->
- length_1(unicode_util:gc(Rest), N+1);
-length_1([], N) ->
- N.
+length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
+ length_1(Cont, N+1);
+length_1(Str, N) ->
+ case unicode_util:gc(Str) of
+ [] -> N;
+ [_|Rest] -> length_1(Rest, N+1)
+ end.
+
+length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
+ when ?ASCII_LIST(CP1,CP2) ->
+ length_b(Rest, CP2, N+1);
+length_b(Bin0, CP1, N) ->
+ [_|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> N+1;
+ [CP3|Bin] -> length_b(Bin, CP3, N+1)
+ end.
equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
A =:= B andalso equal_1(AR, BR);
@@ -550,29 +581,66 @@ equal_norm_nocase(A0, B0, Norm) ->
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
+reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
+ reverse_1(Cont, [CP1|Acc]);
reverse_1(CD, Acc) ->
case unicode_util:gc(CD) of
[GC|Rest] -> reverse_1(Rest, [GC|Acc]);
[] -> Acc
end.
-slice_l(CD, N, Binary) when N > 0 ->
+reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ reverse_b(Rest, CP2, [CP1|Acc]);
+reverse_b(Bin0, CP1, Acc) ->
+ [GC|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> [GC|Acc];
+ [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
+ end.
+
+slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
+ slice_lb(Bin, CP1, N);
+slice_l0(L, N) ->
+ slice_l(L, N).
+
+slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ slice_l(Cont, N-1);
+slice_l(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
- [_|Cont] -> slice_l(Cont, N-1, Binary);
- [] when Binary -> <<>>;
+ [_|Cont] -> slice_l(Cont, N-1);
[] -> []
end;
-slice_l(Cont, 0, Binary) ->
- case is_empty(Cont) of
- true when Binary -> <<>>;
- _ -> Cont
+slice_l(Cont, 0) ->
+ Cont.
+
+slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 ->
+ slice_lb(Bin, CP2, N-1);
+slice_lb(Bin, CP1, N) ->
+ [_|Rest] = unicode_util:gc([CP1|Bin]),
+ if N > 1 ->
+ case unicode_util:cp(Rest) of
+ [CP2|Cont] -> slice_lb(Cont, CP2, N-1);
+ [] -> <<>>
+ end;
+ N =:= 1 ->
+ Rest
end.
+slice_trail(Orig, N) when is_binary(Orig) ->
+ case Orig of
+ <<CP1/utf8, Bin/binary>> when N > 0 ->
+ Length = slice_bin(Bin, CP1, N),
+ Sz = byte_size(Orig) - Length,
+ <<Keep:Sz/binary, _/binary>> = Orig,
+ Keep;
+ _ -> <<>>
+ end;
slice_trail(CD, N) when is_list(CD) ->
- slice_list(CD, N);
-slice_trail(CD, N) when is_binary(CD) ->
- slice_bin(CD, N, CD).
+ slice_list(CD, N).
+slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ [CP1|slice_list(Cont, N-1)];
slice_list(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[GC|Cont] -> append(GC, slice_list(Cont, N-1));
@@ -581,17 +649,16 @@ slice_list(CD, N) when N > 0 ->
slice_list(_, 0) ->
[].
-slice_bin(CD, N, Orig) when N > 0 ->
- case unicode_util:gc(CD) of
- [_|Cont] -> slice_bin(Cont, N-1, Orig);
- [] -> Orig
+slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 ->
+ slice_bin(Bin, CP2, N-1);
+slice_bin(CD, CP1, N) when N > 0 ->
+ [_|Bin] = unicode_util:gc([CP1|CD]),
+ case unicode_util:cp(Bin) of
+ [CP2|Cont] -> slice_bin(Cont, CP2, N-1);
+ [] -> 0
end;
-slice_bin([], 0, Orig) ->
- Orig;
-slice_bin(CD, 0, Orig) ->
- Sz = byte_size(Orig) - byte_size(CD),
- <<Keep:Sz/binary, _/binary>> = Orig,
- Keep.
+slice_bin(CD, CP1, 0) ->
+ byte_size(CD)+byte_size(<<CP1/utf8>>).
uppercase_list(CPs0) ->
case unicode_util:uppercase(CPs0) of
@@ -641,16 +708,31 @@ casefold_bin(CPs0, Acc) ->
[] -> Acc
end.
-
+%% Fast path for ascii searching for one character in lists
+trim_ls([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case Sep of
+ CP1 -> trim_ls(Cont, Sep);
+ _ -> Str
+ end;
+trim_ls(Str, Sep) ->
+ trim_l(Str, [Sep]).
+
+trim_l([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Sep) of
+ true -> trim_l(Cont, Sep);
+ false -> Str
+ end;
trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Sep) of
{nomatch, Cont} -> trim_l(Cont, Sep);
Keep -> Keep
end;
-trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+trim_l(Str, Sep) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
+ case lists:member(C, Sep) of
true -> trim_l(Cs, Sep);
false -> Str
end;
@@ -662,15 +744,51 @@ trim_l(Bin, Sep) when is_binary(Bin) ->
[Keep] -> Keep
end.
-trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+%% Fast path for ascii searching for one character in lists
+trim_ts([Sep|Cs1]=Str, Sep) ->
+ case Cs1 of
+ [] -> [];
+ [CP2|_] when ?ASCII_LIST(Sep,CP2) ->
+ Tail = trim_ts(Cs1, Sep),
+ case is_empty(Tail) of
+ true -> [];
+ false -> [Sep|Tail]
+ end;
+ _ ->
+ trim_t(Str, 0, search_pattern([Sep]))
+ end;
+trim_ts([CP|Cont],Sep) when is_integer(CP) ->
+ [CP|trim_ts(Cont, Sep)];
+trim_ts(Str, Sep) ->
+ trim_t(Str, 0, search_pattern([Sep])).
+
+trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Cs1] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
+ end;
+ false ->
+ append(GC,trim_t(Cs1, 0, Seps))
+ end;
+ false ->
+ [CP1|trim_t(Cont, 0, Seps)]
+ end;
+trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,_} ->
- stack(Bin, trim_t(Cont0, 0, Sep));
+ stack(Bin, trim_t(Cont0, 0, Seps));
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- Tail = trim_t(Cont, 0, Sep),
+ Tail = trim_t(Cont, 0, Seps),
case is_empty(Tail) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
@@ -682,67 +800,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t([Bin|Cont], KeepSz, Sep)
+ trim_t([Bin|Cont], KeepSz, Seps)
end
end;
-trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- Tail = trim_t(Cs1, 0, Sep),
- case is_empty(Tail) of
- true -> [];
- false -> append(GC,Tail)
- end;
- false ->
- append(GC,trim_t(Cs1, 0, Sep))
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
end;
false ->
- append(CP,trim_t(Cs, 0, Sep))
+ append(GC,trim_t(Cs1, 0, Seps))
end;
[] -> []
end;
-trim_t(Bin, N, Sep) when is_binary(Bin) ->
+trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> Bin;
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t(Bin, KeepSz, Sep)
+ trim_t(Bin, KeepSz, Seps)
end
end.
-take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Sep) of
+
+take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Seps) of
+ true -> take_l(Cont, Seps, [CP1|Acc]);
+ false -> {rev(Acc), Str}
+ end;
+take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_l(Str, Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
- true -> take_l(Cs, Sep, append(rev(C),Acc));
+ case lists:member(C, Seps) of
+ true -> take_l(Cs, Seps, append(rev(C),Acc));
false -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_l(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Sep) of
+take_l(Bin, Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -751,27 +871,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, Cont0, Sep) of
+
+take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ false -> take_lc(Str, Seps, append(rev(GC),Acc));
+ true -> {rev(Acc), Str0}
+ end;
+ false ->
+ take_lc(Cont, Seps, append(CP1,Acc))
+ end;
+take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, GCs) of
- false -> take_lc(Cs, Sep, append(rev(C),Acc));
+ false -> take_lc(Cs, Seps, append(rev(C),Acc));
true -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, [], Sep) of
+take_lc(Bin, Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -780,148 +914,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+
+take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ true ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Cont, 0, Seps),
+ {[CP1|Head], Tail}
+ end;
+take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t([Bin|Cont], KeepSz, Sep)
+ take_t([Bin|Cont], KeepSz, Seps)
end
end;
-take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- {Head, Tail} = take_t(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
end;
false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_t(Bin, N, Sep) when is_binary(Bin) ->
+take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t(Bin, KeepSz, Sep)
+ take_t(Bin, KeepSz, Seps)
end
end.
-take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, GCs) of
+ false ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(CP1,Tail)};
+ false -> {append(CP1,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ {append(CP1,Head), Tail}
+ end;
+take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, Cont0, Sep) of
+ case bin_search_inv(Rest, Cont0, GCs) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps0),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search(SepStart, Cont1, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, Cont1, Seps) of
{nomatch, Cont} ->
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc([Bin|Cont], KeepSz, Sep)
+ take_tc([Bin|Cont], KeepSz, Seps)
end
end;
-take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
- true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- false ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- true ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- {append(GC,Head), Tail}
- end;
+take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
false ->
- {Head, Tail} = take_tc(Cs, 0, Sep),
- case equal(Tail, Cs) of
- true -> {Head, append(CP,Tail)};
- false -> {append(CP,Head), Tail}
- end
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_tc(Bin, N, Sep) when is_binary(Bin) ->
+take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, [], Sep) of
+ case bin_search_inv(Rest, [], GCs) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search(SepStart, [], Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, [], Seps) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc(Bin, KeepSz, Sep)
+ take_tc(Bin, KeepSz, Seps)
end
end.
-prefix_1(Cs, []) -> Cs;
-prefix_1(Cs, [_]=Pre) ->
- prefix_2(unicode_util:gc(Cs), Pre);
-prefix_1(Cs, Pre) ->
- prefix_2(unicode_util:cp(Cs), Pre).
-
-prefix_2([C|Cs], [C|Pre]) ->
- prefix_1(Cs, Pre);
-prefix_2(_, _) ->
- nomatch.
+prefix_1(Cs0, [GC]) ->
+ case unicode_util:gc(Cs0) of
+ [GC|Cs] -> Cs;
+ _ -> nomatch
+ end;
+prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(Cs0, [Pre|PreR]) ->
+ case unicode_util:cp(Cs0) of
+ [Pre|Cs] -> prefix_1(Cs,PreR);
+ _ -> nomatch
+ end.
+split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) ->
+ case CP1=:=C of
+ true ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+ Rest when Where =:= leading ->
+ [rev(Curr), Rest];
+ Rest when Where =:= trailing ->
+ split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+ Rest when Where =:= all ->
+ split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+ end;
+ false ->
+ split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc)
+ end;
split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
when is_binary(Bin) ->
case bin_search_str(Bin, Start, Cont0, Needle) of
@@ -981,32 +1159,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
end
end.
-lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) ->
+ case lists:member(CP, CPs) of
+ true ->
+ [GC|Cs2] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ lexemes_m(Cs2, Seps, Ts);
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- lexemes_m(Cont, Seps, Ts);
+ lexemes_m(Cont, Seps0, Ts);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
-lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- lexemes_m(Cs, Seps, Ts);
+ lexemes_m(Cs, Seps0, Ts);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
[] ->
lists:reverse(Ts)
end;
-lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], GCs) of
{nomatch,_} ->
lists:reverse(Ts);
[Cs] ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
end.
@@ -1037,7 +1233,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> {rev(Tkn), Cs0};
+ true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false ->
@@ -1047,7 +1243,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
{rev(Tkn), []}
end;
lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin,Tkn), []};
[Left] ->
@@ -1056,35 +1252,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
{btoken(Lexeme, Tkn), Left}
end.
-nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- nth_lexeme_m(Cont, Seps, N);
+ nth_lexeme_m(Cont, Seps0, N);
Cs when N > 1 ->
- Rest = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Rest, Seps, N-1);
+ Rest = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Rest, Seps0, N-1);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme
end;
-nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- nth_lexeme_m(Cs, Seps, N);
+ nth_lexeme_m(Cs, Seps0, N);
false when N > 1 ->
- Cs1 = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Cs1, Seps, N-1);
+ Cs1 = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Cs1, Seps0, N-1);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs0, Seps, []),
Lexeme
end;
[] ->
[]
end;
-nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search_inv(Bin, [], GCs) of
[Cs] when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps),
nth_lexeme_m(Cs1, Seps, N-1);
@@ -1100,16 +1299,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs1, Seps)
end;
-lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} -> lexeme_skip(Cont0, Seps);
- Cs -> Cs
+ Cs -> tl(unicode_util:gc(Cs))
end;
lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
@@ -1118,7 +1318,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
@@ -1127,12 +1327,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
[] ->
[]
end;
-lexeme_skip(Bin, Seps) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+lexeme_skip(Bin, Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} -> <<>>;
- [Left] -> Left
+ [Left] -> tl(unicode_util:gc(Left))
end.
+find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) ->
+ case C1 of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_l(Cs, Needle);
+ _ -> Cs0
+ end;
+ _ ->
+ find_l(Cs, Needle)
+ end;
find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch, _, Cont} ->
@@ -1157,6 +1368,16 @@ find_l(Bin, Needle) ->
{_Before, [Cs], _After} -> Cs
end.
+find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) ->
+ case Cp of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_r(Cs, Needle, Res);
+ _ -> find_r(Cs, Needle, Cs0)
+ end;
+ _ ->
+ find_r(Cs, Needle, Res)
+ end;
find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch,_,Cont} ->
@@ -1227,11 +1448,6 @@ cp_prefix_1(Orig, Until, Cont) ->
%% Binary special
-bin_search(Bin, Seps) ->
- bin_search(Bin, [], Seps).
-
-bin_search(_Bin, Cont, {[],_,_}) ->
- {nomatch, Cont};
bin_search(Bin, Cont, {Seps,_,BP}) ->
bin_search_loop(Bin, 0, BP, Cont, Seps).
@@ -1239,10 +1455,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) ->
%% i.e. å in nfd form $a "COMBINING RING ABOVE"
%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
%% combined with other characters are currently ignored.
+search_pattern({_,_,_}=P) -> P;
search_pattern(Seps) ->
CPs = search_cp(Seps),
- Bin = bin_pattern(CPs),
- {Seps, CPs, Bin}.
+ {Seps, CPs, undefined}.
+
+search_compile({Sep, CPs, undefined}) ->
+ {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))};
+search_compile({_,_,_}=Compiled) -> Compiled.
search_cp([CP|Seps]) when is_integer(CP) ->
[CP|search_cp(Seps)];
@@ -1263,9 +1483,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
case binary:match(Bin, BinSeps) of
nomatch ->
{nomatch,Cont};
+ {Where, _CL} when Cont =:= [] ->
+ <<_:Where/binary, Cont1/binary>> = Bin,
+ [GC|Cont2] = unicode_util:gc(Cont1),
+ case lists:member(GC, Seps) of
+ false when Cont2 =:= [] ->
+ {nomatch, []};
+ false ->
+ Next = byte_size(Bin0) - byte_size(Cont2),
+ bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+ true ->
+ [Cont1]
+ end;
{Where, _CL} ->
<<_:Where/binary, Cont0/binary>> = Bin,
- Cont1 = stack(Cont0, Cont),
+ Cont1 = [Cont0|Cont],
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false ->
@@ -1273,55 +1505,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
[BinR|Cont] when is_binary(BinR) ->
Next = byte_size(Bin0) - byte_size(BinR),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
- BinR when is_binary(BinR), Cont =:= [] ->
- Next = byte_size(Bin0) - byte_size(BinR),
- bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
_ ->
{nomatch, Cont2}
end;
- true when is_list(Cont1) ->
- Cont1;
true ->
- [Cont1]
+ Cont1
end
end.
-bin_search_inv(Bin, Cont, {[], _, _}) ->
- [Bin|Cont];
-bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
- bin_search_inv_1([Bin|Cont], Sep);
-bin_search_inv(Bin, Cont, {Seps, _, _}) ->
- bin_search_inv_n([Bin|Cont], Seps).
-
-bin_search_inv_1([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
- case unicode_util:gc(CPs) of
- [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
- bin_search_inv_1([Bin], Sep);
- [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
- bin_search_inv_1(Cs, Sep);
- [Sep|Cs] ->
- {nomatch, Cs};
- _ -> CPs
- end.
+bin_search_inv(<<>>, Cont, _) ->
+ {nomatch, Cont};
+bin_search_inv(Bin, Cont, [Sep]) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+bin_search_inv(Bin, Cont, Seps) ->
+ bin_search_inv_n(Bin, Cont, Seps).
+
+bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case CP1 of
+ Sep -> bin_search_inv_1(BinRest, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ case unicode_util:gc(Bin0) of
+ [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ ->
+ case unicode_util:gc([Bin0|Cont]) of
+ [Sep|[Bin|Cont]] when is_binary(Bin) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+ [Sep|Cs] ->
+ {nomatch, Cs};
+ _ -> [Bin0|Cont]
+ end
+ end;
+bin_search_inv_1(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_1([], Cont, _Sep) ->
+ {nomatch, Cont}.
-bin_search_inv_n([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
- [C|Cs0] = unicode_util:gc(CPs),
- case {lists:member(C, Seps), Cs0} of
- {true, Cs} when is_binary(Cs), Cont =:= [] ->
- bin_search_inv_n([Cs], Seps);
- {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
- bin_search_inv_n(Cs, Seps);
- {true, Cs} -> {nomatch, Cs};
- {false, _} -> CPs
- end.
+bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case lists:member(CP1,Seps) of
+ true -> bin_search_inv_n(BinRest, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ [GC|Bin] = unicode_util:gc(Bin0),
+ case lists:member(GC, Seps) of
+ true -> bin_search_inv_n(Bin, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ ->
+ [GC|Cs0] = unicode_util:gc([Bin0|Cont]),
+ case lists:member(GC, Seps) of
+ false -> [Bin0|Cont];
+ true ->
+ case Cs0 of
+ [Bin|Cont] when is_binary(Bin) ->
+ bin_search_inv_n(Bin, Cont, Seps);
+ _ ->
+ {nomatch, Cs0}
+ end
+ end
+ end;
+bin_search_inv_n(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_n([], Cont, _Sep) ->
+ {nomatch, Cont}.
+
+bin_search_str(Bin0, Start, [], SearchCPs) ->
+ Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
+ bin_search_str_1(Bin0, Start, Compiled, SearchCPs);
bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+ First = binary:compile_pattern(<<CP/utf8>>),
+ bin_search_str_2(Bin0, Start, Cont, First, SearchCPs).
+
+bin_search_str_1(Bin0, Start, First, SearchCPs) ->
+ <<_:Start/binary, Bin/binary>> = Bin0,
+ case binary:match(Bin, First) of
+ nomatch -> {nomatch, byte_size(Bin0), []};
+ {Where0, _} ->
+ Where = Start+Where0,
+ <<Keep:Where/binary, Cs0/binary>> = Bin0,
+ case prefix_1(Cs0, SearchCPs) of
+ nomatch ->
+ <<_/utf8, Cs/binary>> = Cs0,
+ KeepSz = byte_size(Bin0) - byte_size(Cs),
+ bin_search_str_1(Bin0, KeepSz, First, SearchCPs);
+ [] ->
+ {Keep, [Cs0], <<>>};
+ Rest ->
+ {Keep, [Cs0], Rest}
+ end
+ end.
+
+bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
- case binary:match(Bin, <<CP/utf8>>) of
+ case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), Cont};
{Where0, _} ->
Where = Start+Where0,
@@ -1330,7 +1615,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
case prefix_1(stack(Cs0,Cont), SearchCPs) of
nomatch when is_binary(Cs) ->
KeepSz = byte_size(Bin0) - byte_size(Cs),
- bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+ bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs);
nomatch ->
{nomatch, Where, stack([GC|Cs],Cont)};
[] ->
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 22212da222..a84679c595 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,7 +226,8 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([normalize/1, parse/1,
+-export([compose_query/1, compose_query/2,
+ dissect_query/1, normalize/1, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -381,6 +382,76 @@ transcode(URIString, Options) when is_list(URIString) ->
end.
+%%-------------------------------------------------------------------------
+%% Functions for working with the query part of a URI as a list
+%% of key/value pairs.
+%% HTML5 - 4.10.22.6 URL-encoded form data
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Compose urlencoded query string from a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded encoding algorithm)
+%%-------------------------------------------------------------------------
+-spec compose_query(QueryList) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ QueryString :: uri_string()
+ | error().
+compose_query(List) ->
+ compose_query(List, [{encoding, utf8}]).
+
+
+-spec compose_query(QueryList, Options) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ Options :: [{encoding, atom()}],
+ QueryString :: uri_string()
+ | error().
+compose_query([],_Options) ->
+ [];
+compose_query(List, Options) ->
+ try compose_query(List, Options, false, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+%%
+compose_query([{Key,Value}|Rest], Options, IsList, Acc) ->
+ Separator = get_separator(Rest),
+ K = form_urlencode(Key, Options),
+ V = form_urlencode(Value, Options),
+ IsListNew = IsList orelse is_list(Key) orelse is_list(Value),
+ compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>);
+compose_query([], _Options, IsList, Acc) ->
+ case IsList of
+ true -> convert_to_list(Acc, utf8);
+ false -> Acc
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Dissect a query string into a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded decoding algorithm)
+%%-------------------------------------------------------------------------
+-spec dissect_query(QueryString) -> QueryList when
+ QueryString :: uri_string(),
+ QueryList :: [{uri_string(), uri_string()}]
+ | error().
+dissect_query(<<>>) ->
+ [];
+dissect_query([]) ->
+ [];
+dissect_query(QueryString) when is_list(QueryString) ->
+ try
+ B = convert_to_binary(QueryString, utf8, utf8),
+ dissect_query_key(B, true, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+dissect_query(QueryString) ->
+ try dissect_query_key(QueryString, false, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
%%%========================================================================
%%% Internal functions
%%%========================================================================
@@ -585,6 +656,7 @@ maybe_add_path(Map) ->
end.
+
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
{_, URI1} = parse_hier(Rest, URI),
@@ -1673,6 +1745,161 @@ percent_encode_segment(Segment) ->
%%-------------------------------------------------------------------------
+%% Helper functions for compose_query
+%%-------------------------------------------------------------------------
+
+%% Returns separator to be used between key-value pairs
+get_separator(L) when length(L) =:= 0 ->
+ <<>>;
+get_separator(_L) ->
+ <<"&">>.
+
+
+%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ html5_byte_encode(base10_encode(B));
+form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) ->
+ html5_byte_encode(base10_encode(Cs));
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ B = convert_to_binary(Cs, utf8, Encoding),
+ html5_byte_encode(B);
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ html5_byte_encode(Cs);
+form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) ->
+ throw({error,invalid_encoding, Encoding});
+form_urlencode(Cs, _) ->
+ throw({error,invalid_input, Cs}).
+
+
+%% For each character in the entry's name and value that cannot be expressed using
+%% the selected character encoding, replace the character by a string consisting of
+%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII
+%% digits representing the Unicode code point of the character in base ten, and
+%% finally a ";" (U+003B) character.
+base10_encode(Cs) ->
+ base10_encode(Cs, <<>>).
+%%
+base10_encode(<<>>, Acc) ->
+ Acc;
+base10_encode(<<H/utf8,T/binary>>, Acc) when H > 255 ->
+ Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8),
+ base10_encode(T, <<Acc/binary,"&#",Base10/binary,$;>>);
+base10_encode(<<H/utf8,T/binary>>, Acc) ->
+ base10_encode(T, <<Acc/binary,H>>).
+
+
+html5_byte_encode(B) ->
+ html5_byte_encode(B, <<>>).
+%%
+html5_byte_encode(<<>>, Acc) ->
+ Acc;
+html5_byte_encode(<<$ ,T/binary>>, Acc) ->
+ html5_byte_encode(T, <<Acc/binary,$+>>);
+html5_byte_encode(<<H,T/binary>>, Acc) ->
+ case is_url_char(H) of
+ true ->
+ html5_byte_encode(T, <<Acc/binary,H>>);
+ false ->
+ <<A:4,B:4>> = <<H>>,
+ html5_byte_encode(T, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>)
+ end;
+html5_byte_encode(H, _Acc) ->
+ throw({error,invalid_input, H}).
+
+
+%% Return true if input char can appear in form-urlencoded string
+%% Allowed chararacters:
+%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A,
+%% 0x5F, 0x61 to 0x7A
+is_url_char(C)
+ when C =:= 16#2A; C =:= 16#2D;
+ C =:= 16#2E; C =:= 16#5F;
+ 16#30 =< C, C =< 16#39;
+ 16#41 =< C, C =< 16#5A;
+ 16#61 =< C, C =< 16#7A -> true;
+is_url_char(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for dissect_query
+%%-------------------------------------------------------------------------
+dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, Value);
+dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,"&#">>, Value);
+dissect_query_key(<<$&,_T/binary>>, _IsList, _Acc, _Key, _Value) ->
+ throw({error, missing_value, "&"});
+dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value);
+dissect_query_key(B, _, _, _, _) ->
+ throw({error, missing_value, B}).
+
+
+dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>);
+dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>);
+dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ lists:reverse([{K,V}|Acc]).
+
+
+%% Form-urldecode input based on RFC 1866 [8.2.1]
+form_urldecode(true, B) ->
+ Result = base10_decode(form_urldecode(B, <<>>)),
+ convert_to_list(Result, utf8);
+form_urldecode(false, B) ->
+ base10_decode(form_urldecode(B, <<>>));
+form_urldecode(<<>>, Acc) ->
+ Acc;
+form_urldecode(<<$+,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,$ >>);
+form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ form_urldecode(T, <<Acc/binary, V>>);
+ false ->
+ L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8),
+ throw({error, invalid_percent_encoding, L})
+ end;
+form_urldecode(<<H/utf8,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,H/utf8>>);
+form_urldecode(<<H,_/binary>>, _Acc) ->
+ throw({error, invalid_character, [H]}).
+
+base10_decode(Cs) ->
+ base10_decode(Cs, <<>>).
+%
+base10_decode(<<>>, Acc) ->
+ Acc;
+base10_decode(<<"&#",T/binary>>, Acc) ->
+ base10_decode_unicode(T, Acc);
+base10_decode(<<H/utf8,T/binary>>, Acc) ->
+ base10_decode(T,<<Acc/binary,H/utf8>>);
+base10_decode(<<H,_/binary>>, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+base10_decode_unicode(B, Acc) ->
+ base10_decode_unicode(B, 0, Acc).
+%%
+base10_decode_unicode(<<H/utf8,T/binary>>, Codepoint, Acc) when $0 =< H, H =< $9 ->
+ Res = Codepoint * 10 + (H - $0),
+ base10_decode_unicode(T, Res, Acc);
+base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) ->
+ base10_decode(T, <<Acc/binary,Codepoint/utf8>>);
+base10_decode_unicode(<<H,_/binary>>, _, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+%%-------------------------------------------------------------------------
%% Helper functions for normalize
%%-------------------------------------------------------------------------