aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/digraph.xml4
-rw-r--r--lib/stdlib/doc/src/ets.xml2
-rw-r--r--lib/stdlib/doc/src/notes.xml43
-rw-r--r--lib/stdlib/doc/src/unicode.xml9
-rw-r--r--lib/stdlib/doc/src/uri_string.xml98
-rw-r--r--lib/stdlib/src/base64.erl597
-rw-r--r--lib/stdlib/src/erl_compile.erl3
-rw-r--r--lib/stdlib/src/erl_lint.erl44
-rw-r--r--lib/stdlib/src/erl_parse.yrl82
-rw-r--r--lib/stdlib/src/gen.erl1
-rw-r--r--lib/stdlib/src/string.erl747
-rw-r--r--lib/stdlib/src/uri_string.erl229
-rw-r--r--lib/stdlib/test/base64_SUITE.erl109
-rw-r--r--lib/stdlib/test/erl_lint_SUITE.erl39
-rw-r--r--lib/stdlib/test/filelib_SUITE.erl22
-rw-r--r--lib/stdlib/test/qlc_SUITE.erl23
-rw-r--r--lib/stdlib/test/rand_SUITE.erl208
-rw-r--r--lib/stdlib/test/stdlib_bench_SUITE.erl107
-rw-r--r--lib/stdlib/test/string_SUITE.erl122
-rw-r--r--lib/stdlib/test/unicode_util_SUITE.erl17
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl86
-rwxr-xr-xlib/stdlib/uc_spec/gen_unicode_mod.escript31
-rw-r--r--lib/stdlib/vsn.mk2
23 files changed, 1989 insertions, 636 deletions
diff --git a/lib/stdlib/doc/src/digraph.xml b/lib/stdlib/doc/src/digraph.xml
index 5332d7aba5..db96beed6c 100644
--- a/lib/stdlib/doc/src/digraph.xml
+++ b/lib/stdlib/doc/src/digraph.xml
@@ -170,6 +170,10 @@
<p>If the edge would create a cycle in
an <seealso marker="#acyclic_digraph">acyclic digraph</seealso>,
<c>{error,&nbsp;{bad_edge,&nbsp;<anno>Path</anno>}}</c> is returned.
+ If <c><anno>G</anno></c> already has an edge with value
+ <c><anno>E</anno></c> connecting a different pair of vertices,
+ <c>{error,&nbsp;{bad_edge,&nbsp;[<anno>V1</anno>,&nbsp;<anno>V2</anno>]}}</c>
+ is returned.
If either of <c><anno>V1</anno></c> or <c><anno>V2</anno></c> is not
a vertex of digraph <c><anno>G</anno></c>,
<c>{error,&nbsp;{bad_vertex,&nbsp;</c><anno>V</anno><c>}}</c> is
diff --git a/lib/stdlib/doc/src/ets.xml b/lib/stdlib/doc/src/ets.xml
index 576959b1c8..a0ec22c515 100644
--- a/lib/stdlib/doc/src/ets.xml
+++ b/lib/stdlib/doc/src/ets.xml
@@ -1961,7 +1961,7 @@ true</pre>
The return value is a list of the new counter values from each
update operation in the same order as in the operation list. If an
empty list is specified, nothing is updated and an empty list is
- returned. If the function fails, no updates is done.</p>
+ returned. If the function fails, no updates are done.</p>
<p>The specified <c><anno>Key</anno></c> is used to identify the object
by either <em>matching</em> the key of an object in a <c>set</c>
table, or <em>compare equal</em> to the key of an object in an
diff --git a/lib/stdlib/doc/src/notes.xml b/lib/stdlib/doc/src/notes.xml
index d396f1de8f..b61e5b9b9e 100644
--- a/lib/stdlib/doc/src/notes.xml
+++ b/lib/stdlib/doc/src/notes.xml
@@ -31,6 +31,49 @@
</header>
<p>This document describes the changes made to the STDLIB application.</p>
+<section><title>STDLIB 3.4.3</title>
+
+ <section><title>Fixed Bugs and Malfunctions</title>
+ <list>
+ <item>
+ <p> Make <c>ets:i/1</c> exit cleaner when ^D is input
+ while browsing a table. Only the old Erlang shell is
+ affected (<c>erl(1)</c> flag <c>-oldshell</c>). </p>
+ <p>
+ Own Id: OTP-14663</p>
+ </item>
+ <item>
+ <p>
+ Fixed handling of windows UNC paths in module
+ <c>filename</c>.</p>
+ <p>
+ Own Id: OTP-14693</p>
+ </item>
+ </list>
+ </section>
+
+
+ <section><title>Improvements and New Features</title>
+ <list>
+ <item>
+ <p>
+ Improve performance of the new string functionality when
+ handling ASCII characters.</p>
+ <p>
+ Own Id: OTP-14670</p>
+ </item>
+ <item>
+ <p>
+ Added a clarification to the documentation of
+ <c>unicode:characters_to_list/2</c>.</p>
+ <p>
+ Own Id: OTP-14798</p>
+ </item>
+ </list>
+ </section>
+
+</section>
+
<section><title>STDLIB 3.4.2</title>
<section><title>Fixed Bugs and Malfunctions</title>
diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml
index e86f45431f..d822aca89c 100644
--- a/lib/stdlib/doc/src/unicode.xml
+++ b/lib/stdlib/doc/src/unicode.xml
@@ -239,8 +239,13 @@
<c><anno>InEncoding</anno></c>.</p>
</item>
</list>
- <p>Only when <c><anno>InEncoding</anno></c> is one of the UTF
- encodings, integers in the list are allowed to be &gt; 255.</p>
+ <p>
+ Note that integers in the list always represent code points
+ regardless of <c><anno>InEncoding</anno></c> passed. If
+ <c><anno>InEncoding</anno> latin1</c> is passed, only code
+ points &lt; 256 are allowed; otherwise, all valid unicode code
+ points are allowed.
+ </p>
<p>If <c><anno>InEncoding</anno></c> is <c>latin1</c>, parameter
<c><anno>Data</anno></c> corresponds to the <c>iodata()</c> type,
but for <c>unicode</c>, parameter <c><anno>Data</anno></c> can
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 9ace2b0a05..21f470e763 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -31,7 +31,8 @@
<modulesummary>URI processing functions.</modulesummary>
<description>
<p>This module contains functions for parsing and handling URIs
- (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>).
+ (<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
+ form-urlencoded query strings (<url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>).
</p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
@@ -71,6 +72,13 @@
<item>Transforming URIs into a normalized form<br></br>
<seealso marker="#normalize/1"><c>normalize/1</c></seealso>
</item>
+ <item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
+ <seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
+ <seealso marker="#compose_query/2"><c>compose_query/2</c></seealso>
+ </item>
+ <item>Dissecting form-urlencoded query strings into a list of key-value pairs<br></br>
+ <seealso marker="#dissect_query/1"><c>dissect_query/1</c></seealso>
+ </item>
</list>
<p>There are four different encodings present during the handling of URIs:</p>
<list type="bulleted">
@@ -102,12 +110,15 @@
<desc>
<p>Error tuple indicating the type of error. Possible values of the second component:</p>
<list type="bulleted">
+ <item><c>invalid_character</c></item>
+ <item><c>invalid_encoding</c></item>
<item><c>invalid_input</c></item>
<item><c>invalid_map</c></item>
<item><c>invalid_percent_encoding</c></item>
<item><c>invalid_scheme</c></item>
<item><c>invalid_uri</c></item>
<item><c>invalid_utf8</c></item>
+ <item><c>missing_value</c></item>
</list>
<p>The third component is a term providing additional information about the
cause of the error.</p>
@@ -134,6 +145,91 @@
<funcs>
<func>
+ <name name="compose_query" arity="1"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ specification.
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}]).</input>
+<![CDATA["foo+bar=1&city=%C3%B6rebro"]]>
+2> <![CDATA[uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"örebro"/utf8>>}]).]]>
+<![CDATA[<<"foo+bar=1&city=%C3%B6rebro">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="compose_query" arity="2"/>
+ <fsummary>Compose urlencoded query string.</fsummary>
+ <desc>
+ <p>Same as <c>compose_query/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls the encoding ("charset")
+ used by the encoding algorithm. There are two supported encodings: <c>utf8</c>
+ (or <c>unicode</c>) and <c>latin1</c>.
+ </p>
+ <p>Each character in the entry's name and value that cannot be expressed using
+ the selected character encoding, is replaced by a string consisting of a U+0026
+ AMPERSAND character (<![CDATA[&]]>), a "#" (U+0023) character, one or more ASCII
+ digits representing the Unicode code point of the character in base ten, and
+ finally a ";" (U+003B) character.
+ </p>
+ <p>Bytes that are out of the range 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A, 0x5F,
+ 0x61 to 0x7A, are percent-encoded (U+0025 PERCENT SIGN character (%) followed by
+ uppercase ASCII hex digits representing the hexadecimal value of the byte).
+ </p>
+ <p>See also the opposite operation <seealso marker="#dissect_query/1">
+ <c>dissect_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:compose_query([{"foo bar","1"},{"city","örebro"}],</input>
+1> [{encoding, latin1}]).
+<![CDATA["foo+bar=1&city=%F6rebro"
+2> uri_string:compose_query([{<<"foo bar">>,<<"1">>},
+2> {<<"city">>,<<"東京"/utf8>>}], [{encoding, latin1}]).]]>
+<![CDATA[<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="dissect_query" arity="1"/>
+ <fsummary>Dissect query string.</fsummary>
+ <desc>
+ <p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
+ <c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
+ Form-urlencoding is defined in section
+ 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ specification.
+ </p>
+ <p>It is not as strict for its input as the decoding algorithm defined by
+ <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
+ and accepts all unicode characters.</p>
+ <p>See also the opposite operation <seealso marker="#compose_query/1">
+ <c>compose_query/1</c></seealso>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input><![CDATA[uri_string:dissect_query("foo+bar=1&city=%C3%B6rebro").]]></input>
+[{"foo bar","1"},{"city","örebro"}]
+2> <![CDATA[uri_string:dissect_query(<<"foo+bar=1&city=%26%2326481%3B%26%2320140%3B">>).]]>
+<![CDATA[[{<<"foo bar">>,<<"1">>},
+ {<<"city">>,<<230,157,177,228,186,172>>}] ]]>
+ </pre>
+ </desc>
+ </func>
+
+ <func>
<name name="normalize" arity="1"/>
<fsummary>Syntax-based normalization.</fsummary>
<desc>
diff --git a/lib/stdlib/src/base64.erl b/lib/stdlib/src/base64.erl
index c8cf6fdffe..6ea4147abf 100644
--- a/lib/stdlib/src/base64.erl
+++ b/lib/stdlib/src/base64.erl
@@ -24,22 +24,11 @@
-export([encode/1, decode/1, mime_decode/1,
encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]).
-%%-------------------------------------------------------------------------
%% The following type is a subtype of string() for return values
%% of (some) functions of this module.
-%%-------------------------------------------------------------------------
-
-type ascii_string() :: [1..255].
-type ascii_binary() :: binary().
-%%-------------------------------------------------------------------------
-%% encode_to_string(ASCII) -> Base64String
-%% ASCII - string() | binary()
-%% Base64String - string()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
-
-spec encode_to_string(Data) -> Base64String when
Data :: ascii_string() | ascii_binary(),
Base64String :: ascii_string().
@@ -47,66 +36,67 @@
encode_to_string(Bin) when is_binary(Bin) ->
encode_to_string(binary_to_list(Bin));
encode_to_string(List) when is_list(List) ->
- encode_l(List).
-
-%%-------------------------------------------------------------------------
-%% encode(ASCII) -> Base64
-%% ASCII - string() | binary()
-%% Base64 - binary()
-%%
-%% Description: Encodes a plain ASCII string (or binary) into base64.
-%%-------------------------------------------------------------------------
+ encode_list_to_string(List).
-spec encode(Data) -> Base64 when
Data :: ascii_string() | ascii_binary(),
Base64 :: ascii_binary().
encode(Bin) when is_binary(Bin) ->
- encode_binary(Bin);
+ encode_binary(Bin, <<>>);
encode(List) when is_list(List) ->
- list_to_binary(encode_l(List)).
-
--spec encode_l(ascii_string()) -> ascii_string().
+ encode_list(List, <<>>).
-encode_l([]) ->
+encode_list_to_string([]) ->
[];
-encode_l([A]) ->
- [b64e(A bsr 2),
- b64e((A band 3) bsl 4), $=, $=];
-encode_l([A,B]) ->
- [b64e(A bsr 2),
- b64e(((A band 3) bsl 4) bor (B bsr 4)),
- b64e((B band 15) bsl 2), $=];
-encode_l([A,B,C|Ls]) ->
- BB = (A bsl 16) bor (B bsl 8) bor C,
+encode_list_to_string([B1]) ->
+ [b64e(B1 bsr 2),
+ b64e((B1 band 3) bsl 4), $=, $=];
+encode_list_to_string([B1,B2]) ->
+ [b64e(B1 bsr 2),
+ b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)),
+ b64e((B2 band 15) bsl 2), $=];
+encode_list_to_string([B1,B2,B3|Ls]) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
[b64e(BB bsr 18),
b64e((BB bsr 12) band 63),
b64e((BB bsr 6) band 63),
- b64e(BB band 63) | encode_l(Ls)].
-
-encode_binary(Bin) ->
- Split = 3*(byte_size(Bin) div 3),
- <<Main0:Split/binary,Rest/binary>> = Bin,
- Main = << <<(b64e(C)):8>> || <<C:6>> <= Main0 >>,
- case Rest of
- <<A:6,B:6,C:4>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B)):8,(b64e(C bsl 2)):8,$=:8>>;
- <<A:6,B:2>> ->
- <<Main/binary,(b64e(A)):8,(b64e(B bsl 4)):8,$=:8,$=:8>>;
- <<>> ->
- Main
- end.
+ b64e(BB band 63) | encode_list_to_string(Ls)].
-%%-------------------------------------------------------------------------
-%% mime_decode(Base64) -> ASCII
-%% decode(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+encode_binary(<<>>, A) ->
+ A;
+encode_binary(<<B1:8>>, A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_binary(<<B1:8, B2:8>>, A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_binary(Ls,
+ <<A/bits,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+encode_list([], A) ->
+ A;
+encode_list([B1], A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
+encode_list([B1,B2], A) ->
+ <<A/bits,(b64e(B1 bsr 2)):8,
+ (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
+ (b64e((B2 band 15) bsl 2)):8, $=:8>>;
+encode_list([B1,B2,B3|Ls], A) ->
+ BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
+ encode_list(Ls,
+ <<A/bits,(b64e(BB bsr 18)):8,
+ (b64e((BB bsr 12) band 63)):8,
+ (b64e((BB bsr 6) band 63)):8,
+ (b64e(BB band 63)):8>>).
+
+%% mime_decode strips away all characters not Base64 before
+%% converting, whereas decode crashes if an illegal character is found
-spec decode(Base64) -> Data when
Base64 :: ascii_string() | ascii_binary(),
@@ -122,32 +112,13 @@ decode(List) when is_list(List) ->
Data :: ascii_binary().
mime_decode(Bin) when is_binary(Bin) ->
- mime_decode_binary(<<>>, Bin);
+ mime_decode_binary(Bin, <<>>);
mime_decode(List) when is_list(List) ->
- mime_decode(list_to_binary(List)).
+ mime_decode_list(List, <<>>).
--spec decode_l(ascii_string()) -> ascii_string().
-
-decode_l(List) ->
- L = strip_spaces(List, []),
- decode(L, []).
-
--spec mime_decode_l(ascii_string()) -> ascii_string().
-
-mime_decode_l(List) ->
- L = strip_illegal(List, [], 0),
- decode(L, []).
-
-%%-------------------------------------------------------------------------
-%% mime_decode_to_string(Base64) -> ASCII
-%% decode_to_string(Base64) -> ASCII
-%% Base64 - string() | binary()
-%% ASCII - binary()
-%%
-%% Description: Decodes an base64 encoded string to plain ASCII.
-%% mime_decode strips away all characters not Base64 before converting,
-%% whereas decode crashes if an illegal character is found
-%%-------------------------------------------------------------------------
+%% mime_decode_to_string strips away all characters not Base64 before
+%% converting, whereas decode_to_string crashes if an illegal
+%% character is found
-spec decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
@@ -156,7 +127,7 @@ mime_decode_l(List) ->
decode_to_string(Bin) when is_binary(Bin) ->
decode_to_string(binary_to_list(Bin));
decode_to_string(List) when is_list(List) ->
- decode_l(List).
+ decode_list_to_string(List).
-spec mime_decode_to_string(Base64) -> DataString when
Base64 :: ascii_string() | ascii_binary(),
@@ -165,115 +136,195 @@ decode_to_string(List) when is_list(List) ->
mime_decode_to_string(Bin) when is_binary(Bin) ->
mime_decode_to_string(binary_to_list(Bin));
mime_decode_to_string(List) when is_list(List) ->
- mime_decode_l(List).
-
-%% One-based decode map.
--define(DECODE_MAP,
- {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
- ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
- 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
- bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
- 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
- bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
- 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
- bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+ mime_decode_list_to_string(List).
-decode_binary(<<C1:8, Cs/bits>>, A) ->
- case element(C1, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A);
- B1 -> decode_binary(Cs, A, B1)
+%% Skipping pad character if not at end of string. Also liberal about
+%% excess padding and skipping of other illegal (non-base64 alphabet)
+%% characters. See section 3.3 of RFC4648
+mime_decode_list([0 | Cs], A) ->
+ mime_decode_list(Cs, A);
+mime_decode_list([C1 | Cs], A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1);
+ _ -> mime_decode_list(Cs, A) % eq is padding
end;
-decode_binary(<<>>, A) ->
+mime_decode_list([], A) ->
A.
-decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
- case element(C2, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1);
- B2 -> decode_binary(Cs, A, B1, B2)
+mime_decode_list([0 | Cs], A, B1) ->
+ mime_decode_list(Cs, A, B1);
+mime_decode_list([C2 | Cs], A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+ _ -> mime_decode_list(Cs, A, B1) % eq is padding
end.
-decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2);
- B3 -> decode_binary(Cs, A, B1, B2, B3)
+mime_decode_list([0 | Cs], A, B1, B2) ->
+ mime_decode_list(Cs, A, B1, B2);
+mime_decode_list([C3 | Cs], A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2)
end.
-decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
- ws -> decode_binary(Cs, A, B1, B2, B3);
- eq when B3 =:= eq -> only_ws_binary(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
- eq -> only_ws_binary(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
- B4 -> decode_binary(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+mime_decode_list([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list(Cs, A, B1, B2, B3);
+mime_decode_list([C4 | Cs], A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
+ eq ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_list(Cs, A, B1, B2, B3)
end.
-only_ws_binary(<<>>, A) ->
- A;
-only_ws_binary(<<C:8, Cs/bits>>, A) ->
- case element(C, ?DECODE_MAP) of
- ws -> only_ws_binary(Cs, A);
- _ -> erlang:error(function_clause)
+mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) ->
+ mime_decode_list_after_eq(Cs, A, B1, B2, B3);
+mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_list(Cs, A, B1, B2, B);
+ _ -> mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3)
+ end;
+mime_decode_list_after_eq([], A, B1, B2, eq) ->
+ <<A/bits,B1:6,(B2 bsr 4):2>>;
+mime_decode_list_after_eq([], A, B1, B2, B3) ->
+ <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A) ->
+ mime_decode_binary(Cs, A);
+mime_decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1);
+ _ -> mime_decode_binary(Cs, A) % eq is padding
+ end;
+mime_decode_binary(<<>>, A) ->
+ A.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1) ->
+ mime_decode_binary(Cs, A, B1);
+mime_decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+ _ -> mime_decode_binary(Cs, A, B1) % eq is padding
end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-mime_decode_binary(Result, <<0:8,T/bits>>) ->
- mime_decode_binary(Result, T);
-mime_decode_binary(Result0, <<C:8,T/bits>>) ->
- case element(C, ?DECODE_MAP) of
- Bits when is_integer(Bits) ->
- mime_decode_binary(<<Result0/bits,Bits:6>>, T);
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) ->
+ mime_decode_binary(Cs, A, B1, B2);
+mime_decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+ eq=B3 ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2)
+ end.
+
+mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary(Cs, A, B1, B2, B3);
+mime_decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
eq ->
- mime_decode_binary_after_eq(Result0, T, false);
- _ ->
- mime_decode_binary(Result0, T)
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+ _ -> mime_decode_binary(Cs, A, B1, B2, B3)
+ end.
+
+mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
+ mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
+mime_decode_binary_after_eq(<<C:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
+ %% More valid data, skip the eq as invalid
+ case B3 of
+ eq -> mime_decode_binary(Cs, A, B1, B2, B);
+ _ -> mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
+ end;
+ _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3)
end;
-mime_decode_binary(Result, _) ->
- true = is_binary(Result),
- Result.
-
-mime_decode_binary_after_eq(Result, <<0:8,T/bits>>, Eq) ->
- mime_decode_binary_after_eq(Result, T, Eq);
-mime_decode_binary_after_eq(Result0, <<C:8,T/bits>>, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- mime_decode_binary_after_eq(Result0, T, Eq);
- ws ->
- mime_decode_binary_after_eq(Result0, T, Eq);
+mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) ->
+ <<A/bits,B1:6,(B2 bsr 4):2>>;
+mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) ->
+ <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
+
+mime_decode_list_to_string([0 | Cs]) ->
+ mime_decode_list_to_string(Cs);
+mime_decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1);
+ _ -> mime_decode_list_to_string(Cs) % eq is padding
+ end;
+mime_decode_list_to_string([]) ->
+ [].
+
+mime_decode_list_to_string([0 | Cs], B1) ->
+ mime_decode_list_to_string(Cs, B1);
+mime_decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ B2 when is_integer(B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+ _ -> mime_decode_list_to_string(Cs, B1) % eq is padding
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2) ->
+ mime_decode_list_to_string(Cs, B1, B2);
+mime_decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ B3 when is_integer(B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+ eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2)
+ end.
+
+mime_decode_list_to_string([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string(Cs, B1, B2, B3);
+mime_decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ B4 when is_integer(B4) ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)];
eq ->
- mime_decode_binary_after_eq(Result0, T, true);
- Bits when is_integer(Bits) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+ _ -> mime_decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) ->
+ mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
+mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) ->
+ case b64d(C) of
+ B when is_integer(B) ->
%% More valid data, skip the eq as invalid
- mime_decode_binary(<<Result0/bits,Bits:6>>, T)
+ case B3 of
+ eq -> mime_decode_list_to_string(Cs, B1, B2, B);
+ _ ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]
+ end;
+ _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3)
end;
-mime_decode_binary_after_eq(Result0, <<>>, Eq) ->
- %% No more valid data.
- case bit_size(Result0) rem 8 of
- 0 ->
- %% '====' is not uncommon.
- Result0;
- 4 when Eq ->
- %% enforce at least one more '=' only ignoring illegals and spacing
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:4>> = Result0,
- Result;
- 2 ->
- %% remove 2 bits
- Split = byte_size(Result0) - 1,
- <<Result:Split/bytes,_:2>> = Result0,
- Result
- end.
+mime_decode_list_to_string_after_eq([], B1, B2, eq) ->
+ binary_to_list(<<B1:6,(B2 bsr 4):2>>);
+mime_decode_list_to_string_after_eq([], B1, B2, B3) ->
+ binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>).
decode_list([C1 | Cs], A) ->
- case element(C1, ?DECODE_MAP) of
+ case b64d(C1) of
ws -> decode_list(Cs, A);
B1 -> decode_list(Cs, A, B1)
end;
@@ -281,122 +332,130 @@ decode_list([], A) ->
A.
decode_list([C2 | Cs], A, B1) ->
- case element(C2, ?DECODE_MAP) of
+ case b64d(C2) of
ws -> decode_list(Cs, A, B1);
B2 -> decode_list(Cs, A, B1, B2)
end.
decode_list([C3 | Cs], A, B1, B2) ->
- case element(C3, ?DECODE_MAP) of
+ case b64d(C3) of
ws -> decode_list(Cs, A, B1, B2);
B3 -> decode_list(Cs, A, B1, B2, B3)
end.
decode_list([C4 | Cs], A, B1, B2, B3) ->
- case element(C4, ?DECODE_MAP) of
+ case b64d(C4) of
ws -> decode_list(Cs, A, B1, B2, B3);
- eq when B3 =:= eq -> only_ws(Cs, <<A/binary,B1:6,(B2 bsr 4):2>>);
- eq -> only_ws(Cs, <<A/binary,B1:6,B2:6,(B3 bsr 2):4>>);
- B4 -> decode_list(Cs, <<A/binary,B1:6,B2:6,B3:6,B4:6>>)
+ eq when B3 =:= eq -> only_ws(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
end.
-only_ws([], A) ->
- A;
-only_ws([C | Cs], A) ->
- case element(C, ?DECODE_MAP) of
- ws -> only_ws(Cs, A);
- _ -> erlang:error(function_clause)
- end.
+decode_binary(<<C1:8, Cs/bits>>, A) ->
+ case b64d(C1) of
+ ws -> decode_binary(Cs, A);
+ B1 -> decode_binary(Cs, A, B1)
+ end;
+decode_binary(<<>>, A) ->
+ A.
-decode([], A) -> A;
-decode([$=,$=,C2,C1|Cs], A) ->
- Bits2x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12),
- Octet1 = Bits2x6 bsr 16,
- decode(Cs, [Octet1|A]);
-decode([$=,C3,C2,C1|Cs], A) ->
- Bits3x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6),
- Octet1 = Bits3x6 bsr 16,
- Octet2 = (Bits3x6 bsr 8) band 16#ff,
- decode(Cs, [Octet1,Octet2|A]);
-decode([C4,C3,C2,C1| Cs], A) ->
- Bits4x6 = (b64d(C1) bsl 18) bor (b64d(C2) bsl 12)
- bor (b64d(C3) bsl 6) bor b64d(C4),
- Octet1 = Bits4x6 bsr 16,
- Octet2 = (Bits4x6 bsr 8) band 16#ff,
- Octet3 = Bits4x6 band 16#ff,
- decode(Cs, [Octet1,Octet2,Octet3|A]).
+decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
+ case b64d(C2) of
+ ws -> decode_binary(Cs, A, B1);
+ B2 -> decode_binary(Cs, A, B1, B2)
+ end.
-%%%========================================================================
-%%% Internal functions
-%%%========================================================================
+decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_binary(Cs, A, B1, B2);
+ B3 -> decode_binary(Cs, A, B1, B2, B3)
+ end.
-strip_spaces([], A) -> A;
-strip_spaces([$\s|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\t|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\r|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([$\n|Cs], A) -> strip_spaces(Cs, A);
-strip_spaces([C|Cs], A) -> strip_spaces(Cs, [C | A]).
+decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
+ case b64d(C4) of
+ ws -> decode_binary(Cs, A, B1, B2, B3);
+ eq when B3 =:= eq -> only_ws_binary(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
+ eq -> only_ws_binary(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
+ B4 -> decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
+ end.
-%% Skipping pad character if not at end of string. Also liberal about
-%% excess padding and skipping of other illegal (non-base64 alphabet)
-%% characters. See section 3.3 of RFC4648
-strip_illegal([], A, _Cnt) ->
+only_ws_binary(<<>>, A) ->
A;
-strip_illegal([0|Cs], A, Cnt) ->
- strip_illegal(Cs, A, Cnt);
-strip_illegal([C|Cs], A, Cnt) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- strip_illegal(Cs, A, Cnt);
- ws ->
- strip_illegal(Cs, A, Cnt);
- eq ->
- case {tail_contains_more(Cs, false), Cnt rem 4} of
- {{[], _}, 0} ->
- A; %% Ignore extra =
- {{[], true}, 2} ->
- [$=|[$=|A]]; %% 'XX=='
- {{[], _}, 3} ->
- [$=|A]; %% 'XXX='
- {{[H|T], _}, _} ->
- %% more data, skip equals
- strip_illegal(T, [H|A], Cnt+1)
- end;
- _ ->
- strip_illegal(Cs, [C|A], Cnt+1)
+only_ws_binary(<<C:8, Cs/bits>>, A) ->
+ case b64d(C) of
+ ws -> only_ws_binary(Cs, A)
end.
-%% Search the tail for more valid data and remember if we saw
-%% another equals along the way.
-tail_contains_more([], Eq) ->
- {[], Eq};
-tail_contains_more(<<>>, Eq) ->
- {<<>>, Eq};
-tail_contains_more([C|T]=More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
+decode_list_to_string([C1 | Cs]) ->
+ case b64d(C1) of
+ ws -> decode_list_to_string(Cs);
+ B1 -> decode_list_to_string(Cs, B1)
end;
-tail_contains_more(<<C:8,T/bits>> =More, Eq) ->
- case element(C, ?DECODE_MAP) of
- bad ->
- tail_contains_more(T, Eq);
- ws ->
- tail_contains_more(T, Eq);
- eq ->
- tail_contains_more(T, true);
- _ ->
- {More, Eq}
+decode_list_to_string([]) ->
+ [].
+
+decode_list_to_string([C2 | Cs], B1) ->
+ case b64d(C2) of
+ ws -> decode_list_to_string(Cs, B1);
+ B2 -> decode_list_to_string(Cs, B1, B2)
+ end.
+
+decode_list_to_string([C3 | Cs], B1, B2) ->
+ case b64d(C3) of
+ ws -> decode_list_to_string(Cs, B1, B2);
+ B3 -> decode_list_to_string(Cs, B1, B2, B3)
+ end.
+
+decode_list_to_string([C4 | Cs], B1, B2, B3) ->
+ case b64d(C4) of
+ ws ->
+ decode_list_to_string(Cs, B1, B2, B3);
+ eq when B3 =:= eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>));
+ eq ->
+ only_ws(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>));
+ B4 ->
+ Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
+ Octet1 = Bits4x6 bsr 16,
+ Octet2 = (Bits4x6 bsr 8) band 16#ff,
+ Octet3 = Bits4x6 band 16#ff,
+ [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)]
+ end.
+
+only_ws([], A) ->
+ A;
+only_ws([C | Cs], A) ->
+ case b64d(C) of
+ ws -> only_ws(Cs, A)
end.
-
+
+%%%========================================================================
+%%% Internal functions
+%%%========================================================================
+
%% accessors
+-compile({inline, [{b64d, 1}]}).
+%% One-based decode map.
+b64d(X) ->
+ element(X,
+ {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
+ ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
+ 52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
+ bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
+ 15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
+ bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
+ 41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
+ bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
+
+-compile({inline, [{b64e, 1}]}).
b64e(X) ->
element(X+1,
{$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N,
@@ -404,9 +463,3 @@ b64e(X) ->
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n,
$o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
$0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}).
-
-
-b64d(X) ->
- b64d_ok(element(X, ?DECODE_MAP)).
-
-b64d_ok(I) when is_integer(I) -> I.
diff --git a/lib/stdlib/src/erl_compile.erl b/lib/stdlib/src/erl_compile.erl
index 18d7548fdc..f781312ca2 100644
--- a/lib/stdlib/src/erl_compile.erl
+++ b/lib/stdlib/src/erl_compile.erl
@@ -188,6 +188,8 @@ parse_dep_option("", T) ->
{[makedep,{makedep_output,standard_io}],T};
parse_dep_option("D", T) ->
{[makedep],T};
+parse_dep_option("MD", T) ->
+ {[makedep_side_effect],T};
parse_dep_option("F"++Opt, T0) ->
{File,T} = get_option("MF", Opt, T0),
{[makedep,{makedep_output,File}],T};
@@ -221,6 +223,7 @@ usage() ->
"the dependencies"},
{"-MP","add a phony target for each dependency"},
{"-MD","same as -M -MT file (with default 'file')"},
+ {"-MMD","generate dependencies as a side-effect"},
{"-o name","name output directory or file"},
{"-pa path","add path to the front of Erlang's code path"},
{"-pz path","add path to the end of Erlang's code path"},
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index f58cb35cea..1930c462e8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -144,6 +144,7 @@ value_option(Flag, Default, On, OnVal, Off, OffVal, Opts) ->
:: dict:dict(ta(), #typeinfo{}),
exp_types=gb_sets:empty() %Exported types
:: gb_sets:set(ta()),
+ in_try_head=false :: boolean(), %In a try head.
catch_scope = none %Inside/outside try or catch
:: catch_scope()
}).
@@ -312,6 +313,10 @@ format_error({unused_var, V}) ->
io_lib:format("variable ~w is unused", [V]);
format_error({variable_in_record_def,V}) ->
io_lib:format("variable ~w in record definition", [V]);
+format_error({stacktrace_guard,V}) ->
+ io_lib:format("stacktrace variable ~w must not be used in a guard", [V]);
+format_error({stacktrace_bound,V}) ->
+ io_lib:format("stacktrace variable ~w must not be previously bound", [V]);
%% --- binaries ---
format_error({undefined_bittype,Type}) ->
io_lib:format("bit type ~tw undefined", [Type]);
@@ -3218,11 +3223,11 @@ is_module_dialyzer_option(Option) ->
try_clauses(Scs, Ccs, In, Vt, St0) ->
{Csvt0,St1} = icrt_clauses(Scs, Vt, St0),
- St2 = St1#lint{catch_scope=try_catch},
+ St2 = St1#lint{catch_scope=try_catch,in_try_head=true},
{Csvt1,St3} = icrt_clauses(Ccs, Vt, St2),
Csvt = Csvt0 ++ Csvt1,
UpdVt = icrt_export(Csvt, Vt, In, St3),
- {UpdVt,St3}.
+ {UpdVt,St3#lint{in_try_head=false}}.
%% icrt_clauses(Clauses, In, ImportVarTable, State) ->
%% {UpdVt,State}.
@@ -3239,12 +3244,29 @@ icrt_clauses(Cs, Vt, St) ->
mapfoldl(fun (C, St0) -> icrt_clause(C, Vt, St0) end, St, Cs).
icrt_clause({clause,_Line,H,G,B}, Vt0, #lint{catch_scope=Scope}=St0) ->
- {Hvt,Binvt,St1} = head(H, Vt0, St0),
- Vt1 = vtupdate(Hvt, Binvt),
- {Gvt,St2} = guard(G, vtupdate(Vt1, Vt0), St1),
- Vt2 = vtupdate(Gvt, Vt1),
- {Bvt,St3} = exprs(B, vtupdate(Vt2, Vt0), St2),
- {vtupdate(Bvt, Vt2),St3#lint{catch_scope=Scope}}.
+ Vt1 = taint_stack_var(Vt0, H, St0),
+ {Hvt,Binvt,St1} = head(H, Vt1, St0),
+ Vt2 = vtupdate(Hvt, Binvt),
+ Vt3 = taint_stack_var(Vt2, H, St0),
+ {Gvt,St2} = guard(G, vtupdate(Vt3, Vt0), St1#lint{in_try_head=false}),
+ Vt4 = vtupdate(Gvt, Vt2),
+ {Bvt,St3} = exprs(B, vtupdate(Vt4, Vt0), St2),
+ {vtupdate(Bvt, Vt4),St3#lint{catch_scope=Scope}}.
+
+taint_stack_var(Vt, Pat, #lint{in_try_head=true}) ->
+ [{tuple,_,[_,_,{var,_,Stk}]}] = Pat,
+ case Stk of
+ '_' ->
+ Vt;
+ _ ->
+ lists:map(fun({V,{bound,Used,Lines}}) when V =:= Stk ->
+ {V,{stacktrace,Used,Lines}};
+ (B) ->
+ B
+ end, Vt)
+ end;
+taint_stack_var(Vt, _Pat, #lint{in_try_head=false}) ->
+ Vt.
icrt_export(Vts, Vt, {Tag,Attrs}, St) ->
{_File,Loc} = loc(Attrs, St),
@@ -3484,6 +3506,9 @@ pat_var(V, Line, Vt, Bvt, St) ->
{[{V,{bound,used,Ls}}],[],
%% As this is matching, exported vars are risky.
add_warning(Line, {exported_var,V,From}, St)};
+ {ok,{stacktrace,_Usage,Ls}} ->
+ {[{V,{bound,used,Ls}}],[],
+ add_error(Line, {stacktrace_bound,V}, St)};
error when St#lint.recdef_top ->
{[],[{V,{bound,unused,[Line]}}],
add_error(Line, {variable_in_record_def,V}, St)};
@@ -3541,6 +3566,9 @@ expr_var(V, Line, Vt, St) ->
false ->
{[{V,{{export,From},used,Ls}}],St}
end;
+ {ok,{stacktrace,_Usage,Ls}} ->
+ {[{V,{bound,used,Ls}}],
+ add_error(Line, {stacktrace_guard,V}, St)};
error ->
{[{V,{bound,used,[Line]}}],
add_error(Line, {unbound_var,V}, St)}
diff --git a/lib/stdlib/src/erl_parse.yrl b/lib/stdlib/src/erl_parse.yrl
index 6e72d64acc..14ca24362e 100644
--- a/lib/stdlib/src/erl_parse.yrl
+++ b/lib/stdlib/src/erl_parse.yrl
@@ -29,6 +29,10 @@ clause_args clause_guard clause_body
expr expr_100 expr_150 expr_160 expr_200 expr_300 expr_400 expr_500
expr_600 expr_700 expr_800
expr_max
+pat_expr pat_expr_200 pat_expr_300 pat_expr_400 pat_expr_500
+pat_expr_600 pat_expr_700 pat_expr_800
+pat_expr_max map_pat_expr record_pat_expr
+pat_argument_list pat_exprs
list tail
list_comprehension lc_expr lc_exprs
binary_comprehension
@@ -37,7 +41,7 @@ record_expr record_tuple record_field record_fields
map_expr map_tuple map_field map_field_assoc map_field_exact map_fields map_key
if_expr if_clause if_clauses case_expr cr_clause cr_clauses receive_expr
fun_expr fun_clause fun_clauses atom_or_var integer_or_var
-try_expr try_catch try_clause try_clauses
+try_expr try_catch try_clause try_clauses try_opt_stacktrace
function_call argument_list
exprs guard
atomic strings
@@ -66,7 +70,7 @@ char integer float atom string var
'spec' 'callback' % helper
dot.
-Expect 2.
+Expect 0.
Rootsymbol form.
@@ -210,7 +214,7 @@ function_clause -> atom clause_args clause_guard clause_body :
{clause,?anno('$1'),element(3, '$1'),'$2','$3','$4'}.
-clause_args -> argument_list : element(1, '$1').
+clause_args -> pat_argument_list : element(1, '$1').
clause_guard -> 'when' guard : '$2'.
clause_guard -> '$empty' : [].
@@ -275,6 +279,53 @@ expr_max -> receive_expr : '$1'.
expr_max -> fun_expr : '$1'.
expr_max -> try_expr : '$1'.
+pat_expr -> pat_expr_200 '=' pat_expr : {match,?anno('$2'),'$1','$3'}.
+pat_expr -> pat_expr_200 : '$1'.
+
+pat_expr_200 -> pat_expr_300 comp_op pat_expr_300 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_200 -> pat_expr_300 : '$1'.
+
+pat_expr_300 -> pat_expr_400 list_op pat_expr_300 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_300 -> pat_expr_400 : '$1'.
+
+pat_expr_400 -> pat_expr_400 add_op pat_expr_500 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_400 -> pat_expr_500 : '$1'.
+
+pat_expr_500 -> pat_expr_500 mult_op pat_expr_600 :
+ ?mkop2('$1', '$2', '$3').
+pat_expr_500 -> pat_expr_600 : '$1'.
+
+pat_expr_600 -> prefix_op pat_expr_700 :
+ ?mkop1('$1', '$2').
+pat_expr_600 -> map_pat_expr : '$1'.
+pat_expr_600 -> pat_expr_700 : '$1'.
+
+pat_expr_700 -> record_pat_expr : '$1'.
+pat_expr_700 -> pat_expr_800 : '$1'.
+
+pat_expr_800 -> pat_expr_max : '$1'.
+
+pat_expr_max -> var : '$1'.
+pat_expr_max -> atomic : '$1'.
+pat_expr_max -> list : '$1'.
+pat_expr_max -> binary : '$1'.
+pat_expr_max -> tuple : '$1'.
+pat_expr_max -> '(' pat_expr ')' : '$2'.
+
+map_pat_expr -> '#' map_tuple :
+ {map, ?anno('$1'),'$2'}.
+map_pat_expr -> pat_expr_max '#' map_tuple :
+ {map, ?anno('$2'),'$1','$3'}.
+map_pat_expr -> map_pat_expr '#' map_tuple :
+ {map, ?anno('$2'),'$1','$3'}.
+
+record_pat_expr -> '#' atom '.' atom :
+ {record_index,?anno('$1'),element(3, '$2'),'$4'}.
+record_pat_expr -> '#' atom record_tuple :
+ {record,?anno('$1'),element(3, '$2'),'$3'}.
list -> '[' ']' : {nil,?anno('$1')}.
list -> '[' expr tail : {cons,?anno('$1'),'$2','$3'}.
@@ -397,6 +448,10 @@ case_expr -> 'case' expr 'of' cr_clauses 'end' :
cr_clauses -> cr_clause : ['$1'].
cr_clauses -> cr_clause ';' cr_clauses : ['$1' | '$3'].
+%% FIXME: merl in syntax_tools depends on patterns in a 'case' being
+%% full expressions. Therefore, we can't use pat_expr here. There
+%% should be a better way.
+
cr_clause -> expr clause_guard clause_body :
{clause,?anno('$1'),['$1'],'$2','$3'}.
@@ -424,11 +479,11 @@ integer_or_var -> var : '$1'.
fun_clauses -> fun_clause : ['$1'].
fun_clauses -> fun_clause ';' fun_clauses : ['$1' | '$3'].
-fun_clause -> argument_list clause_guard clause_body :
+fun_clause -> pat_argument_list clause_guard clause_body :
{Args,Anno} = '$1',
{clause,Anno,'fun',Args,'$2','$3'}.
-fun_clause -> var argument_list clause_guard clause_body :
+fun_clause -> var pat_argument_list clause_guard clause_body :
{clause,element(2, '$1'),element(3, '$1'),element(1, '$2'),'$3','$4'}.
try_expr -> 'try' exprs 'of' cr_clauses try_catch :
@@ -446,24 +501,31 @@ try_catch -> 'after' exprs 'end' :
try_clauses -> try_clause : ['$1'].
try_clauses -> try_clause ';' try_clauses : ['$1' | '$3'].
-try_clause -> expr clause_guard clause_body :
+try_clause -> pat_expr clause_guard clause_body :
A = ?anno('$1'),
{clause,A,[{tuple,A,[{atom,A,throw},'$1',{var,A,'_'}]}],'$2','$3'}.
-try_clause -> atom ':' expr clause_guard clause_body :
+try_clause -> atom ':' pat_expr try_opt_stacktrace clause_guard clause_body :
A = ?anno('$1'),
- {clause,A,[{tuple,A,['$1','$3',{var,A,'_'}]}],'$4','$5'}.
-try_clause -> var ':' expr clause_guard clause_body :
+ {clause,A,[{tuple,A,['$1','$3',{var,A,'$4'}]}],'$5','$6'}.
+try_clause -> var ':' pat_expr try_opt_stacktrace clause_guard clause_body :
A = ?anno('$1'),
- {clause,A,[{tuple,A,['$1','$3',{var,A,'_'}]}],'$4','$5'}.
+ {clause,A,[{tuple,A,['$1','$3',{var,A,'$4'}]}],'$5','$6'}.
+try_opt_stacktrace -> ':' var : element(3, '$2').
+try_opt_stacktrace -> '$empty' : '_'.
argument_list -> '(' ')' : {[],?anno('$1')}.
argument_list -> '(' exprs ')' : {'$2',?anno('$1')}.
+pat_argument_list -> '(' ')' : {[],?anno('$1')}.
+pat_argument_list -> '(' pat_exprs ')' : {'$2',?anno('$1')}.
exprs -> expr : ['$1'].
exprs -> expr ',' exprs : ['$1' | '$3'].
+pat_exprs -> pat_expr : ['$1'].
+pat_exprs -> pat_expr ',' pat_exprs : ['$1' | '$3'].
+
guard -> exprs : ['$1'].
guard -> exprs ';' guard : ['$1'|'$3'].
diff --git a/lib/stdlib/src/gen.erl b/lib/stdlib/src/gen.erl
index 33af0aed8f..4b1d448487 100644
--- a/lib/stdlib/src/gen.erl
+++ b/lib/stdlib/src/gen.erl
@@ -49,6 +49,7 @@
| {'logfile', string()}.
-type option() :: {'timeout', timeout()}
| {'debug', [debug_flag()]}
+ | {'hibernate_after', timeout()}
| {'spawn_opt', [proc_lib:spawn_option()]}.
-type options() :: [option()].
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index 5a4d2df2a6..e01bb7d85e 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -74,15 +74,16 @@
-export([to_upper/1, to_lower/1]).
%%
-import(lists,[member/2]).
-
-compile({no_auto_import,[length/1]}).
+-compile({inline, [btoken/2, rev/1, append/2, stack/2, search_compile/1]}).
+-define(ASCII_LIST(CP1,CP2), CP1 < 256, CP2 < 256, CP1 =/= $\r).
-export_type([grapheme_cluster/0]).
-type grapheme_cluster() :: char() | [char()].
-type direction() :: 'leading' | 'trailing'.
--dialyzer({no_improper_lists, stack/2}).
+-dialyzer({no_improper_lists, [stack/2, length_b/3]}).
%%% BIFs internal (not documented) should not to be used outside of this module
%%% May be removed
-export([list_to_float/1, list_to_integer/1]).
@@ -127,8 +128,10 @@ is_empty(_) -> false.
%% Count the number of grapheme clusters in chardata
-spec length(String::unicode:chardata()) -> non_neg_integer().
+length(<<CP1/utf8, Bin/binary>>) ->
+ length_b(Bin, CP1, 0);
length(CD) ->
- length_1(unicode_util:gc(CD), 0).
+ length_1(CD, 0).
%% Convert a string to a list of grapheme clusters
-spec to_graphemes(String::unicode:chardata()) -> [grapheme_cluster()].
@@ -176,6 +179,8 @@ equal(A, B, true, Norm) ->
%% Reverse grapheme clusters
-spec reverse(String::unicode:chardata()) -> [grapheme_cluster()].
+reverse(<<CP1/utf8, Rest/binary>>) ->
+ reverse_b(Rest, CP1, []);
reverse(CD) ->
reverse_1(CD, []).
@@ -186,7 +191,10 @@ reverse(CD) ->
Start :: non_neg_integer(),
Slice :: unicode:chardata().
slice(CD, N) when is_integer(N), N >= 0 ->
- slice_l(CD, N, is_binary(CD)).
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end.
-spec slice(String, Start, Length) -> Slice when
String::unicode:chardata(),
@@ -195,9 +203,15 @@ slice(CD, N) when is_integer(N), N >= 0 ->
Slice :: unicode:chardata().
slice(CD, N, Length)
when is_integer(N), N >= 0, is_integer(Length), Length > 0 ->
- slice_trail(slice_l(CD, N, is_binary(CD)), Length);
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ L -> slice_trail(L, Length)
+ end;
slice(CD, N, infinity) ->
- slice_l(CD, N, is_binary(CD));
+ case slice_l0(CD, N) of
+ [] when is_binary(CD) -> <<>>;
+ Res -> Res
+ end;
slice(CD, _, 0) ->
case is_binary(CD) of
true -> <<>>;
@@ -256,18 +270,22 @@ trim(Str, Dir) ->
Dir :: direction() | 'both',
Characters :: [grapheme_cluster()].
trim(Str, _, []) -> Str;
+trim(Str, leading, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ls(Str, Sep);
trim(Str, leading, Sep) when is_list(Sep) ->
- trim_l(Str, search_pattern(Sep));
-trim(Str, trailing, Sep) when is_list(Sep) ->
- trim_t(Str, 0, search_pattern(Sep));
-trim(Str, both, Sep0) when is_list(Sep0) ->
- Sep = search_pattern(Sep0),
- trim_t(trim_l(Str,Sep), 0, Sep).
+ trim_l(Str, Sep);
+trim(Str, trailing, [Sep]) when is_list(Str), Sep < 256 ->
+ trim_ts(Str, Sep);
+trim(Str, trailing, Seps0) when is_list(Seps0) ->
+ Seps = search_pattern(Seps0),
+ trim_t(Str, 0, Seps);
+trim(Str, both, Sep) when is_list(Sep) ->
+ trim(trim(Str,leading,Sep), trailing, Sep).
%% Delete trailing newlines or \r\n
-spec chomp(String::unicode:chardata()) -> unicode:chardata().
chomp(Str) ->
- trim_t(Str,0, {[[$\r,$\n],$\n], [$\r,$\n], [<<$\r>>,<<$\n>>]}).
+ trim(Str, trailing, [[$\r,$\n],$\n]).
%% Split String into two parts where the leading part consists of Characters
-spec take(String, Characters) -> {Leading, Trailing} when
@@ -300,8 +318,7 @@ take(Str, [], Complement, Dir) ->
{true, leading} -> {Str, Empty};
{true, trailing} -> {Empty, Str}
end;
-take(Str, Sep0, false, leading) ->
- Sep = search_pattern(Sep0),
+take(Str, Sep, false, leading) ->
take_l(Str, Sep, []);
take(Str, Sep0, true, leading) ->
Sep = search_pattern(Sep0),
@@ -461,6 +478,7 @@ replace(String, SearchPattern, Replacement, Where) ->
SeparatorList::[grapheme_cluster()]) ->
[unicode:chardata()].
lexemes([], _) -> [];
+lexemes(Str, []) -> [Str];
lexemes(Str, Seps0) when is_list(Seps0) ->
Seps = search_pattern(Seps0),
lexemes_m(Str, Seps, []).
@@ -494,13 +512,13 @@ find(String, SearchPattern, leading) ->
find(String, SearchPattern, trailing) ->
find_r(String, unicode:characters_to_list(SearchPattern), nomatch).
-%% Fetch first codepoint and return rest in tail
+%% Fetch first grapheme cluster and return rest in tail
-spec next_grapheme(String::unicode:chardata()) ->
maybe_improper_list(grapheme_cluster(),unicode:chardata()) |
{error,unicode:chardata()}.
next_grapheme(CD) -> unicode_util:gc(CD).
-%% Fetch first grapheme cluster and return rest in tail
+%% Fetch first codepoint and return rest in tail
-spec next_codepoint(String::unicode:chardata()) ->
maybe_improper_list(char(),unicode:chardata()) |
{error,unicode:chardata()}.
@@ -508,10 +526,23 @@ next_codepoint(CD) -> unicode_util:cp(CD).
%% Internals
-length_1([_|Rest], N) ->
- length_1(unicode_util:gc(Rest), N+1);
-length_1([], N) ->
- N.
+length_1([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2) ->
+ length_1(Cont, N+1);
+length_1(Str, N) ->
+ case unicode_util:gc(Str) of
+ [] -> N;
+ [_|Rest] -> length_1(Rest, N+1)
+ end.
+
+length_b(<<CP2/utf8, Rest/binary>>, CP1, N)
+ when ?ASCII_LIST(CP1,CP2) ->
+ length_b(Rest, CP2, N+1);
+length_b(Bin0, CP1, N) ->
+ [_|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> N+1;
+ [CP3|Bin] -> length_b(Bin, CP3, N+1)
+ end.
equal_1([A|AR], [B|BR]) when is_integer(A), is_integer(B) ->
A =:= B andalso equal_1(AR, BR);
@@ -550,29 +581,66 @@ equal_norm_nocase(A0, B0, Norm) ->
{L1,L2} when is_list(L1), is_list(L2) -> false
end.
+reverse_1([CP1|[CP2|_]=Cont], Acc) when ?ASCII_LIST(CP1,CP2) ->
+ reverse_1(Cont, [CP1|Acc]);
reverse_1(CD, Acc) ->
case unicode_util:gc(CD) of
[GC|Rest] -> reverse_1(Rest, [GC|Acc]);
[] -> Acc
end.
-slice_l(CD, N, Binary) when N > 0 ->
+reverse_b(<<CP2/utf8, Rest/binary>>, CP1, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ reverse_b(Rest, CP2, [CP1|Acc]);
+reverse_b(Bin0, CP1, Acc) ->
+ [GC|Bin1] = unicode_util:gc([CP1|Bin0]),
+ case unicode_util:cp(Bin1) of
+ [] -> [GC|Acc];
+ [CP3|Bin] -> reverse_b(Bin, CP3, [GC|Acc])
+ end.
+
+slice_l0(<<CP1/utf8, Bin/binary>>, N) when N > 0 ->
+ slice_lb(Bin, CP1, N);
+slice_l0(L, N) ->
+ slice_l(L, N).
+
+slice_l([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ slice_l(Cont, N-1);
+slice_l(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
- [_|Cont] -> slice_l(Cont, N-1, Binary);
- [] when Binary -> <<>>;
+ [_|Cont] -> slice_l(Cont, N-1);
[] -> []
end;
-slice_l(Cont, 0, Binary) ->
- case is_empty(Cont) of
- true when Binary -> <<>>;
- _ -> Cont
+slice_l(Cont, 0) ->
+ Cont.
+
+slice_lb(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 1 ->
+ slice_lb(Bin, CP2, N-1);
+slice_lb(Bin, CP1, N) ->
+ [_|Rest] = unicode_util:gc([CP1|Bin]),
+ if N > 1 ->
+ case unicode_util:cp(Rest) of
+ [CP2|Cont] -> slice_lb(Cont, CP2, N-1);
+ [] -> <<>>
+ end;
+ N =:= 1 ->
+ Rest
end.
+slice_trail(Orig, N) when is_binary(Orig) ->
+ case Orig of
+ <<CP1/utf8, Bin/binary>> when N > 0 ->
+ Length = slice_bin(Bin, CP1, N),
+ Sz = byte_size(Orig) - Length,
+ <<Keep:Sz/binary, _/binary>> = Orig,
+ Keep;
+ _ -> <<>>
+ end;
slice_trail(CD, N) when is_list(CD) ->
- slice_list(CD, N);
-slice_trail(CD, N) when is_binary(CD) ->
- slice_bin(CD, N, CD).
+ slice_list(CD, N).
+slice_list([CP1|[CP2|_]=Cont], N) when ?ASCII_LIST(CP1,CP2),N > 0 ->
+ [CP1|slice_list(Cont, N-1)];
slice_list(CD, N) when N > 0 ->
case unicode_util:gc(CD) of
[GC|Cont] -> append(GC, slice_list(Cont, N-1));
@@ -581,17 +649,16 @@ slice_list(CD, N) when N > 0 ->
slice_list(_, 0) ->
[].
-slice_bin(CD, N, Orig) when N > 0 ->
- case unicode_util:gc(CD) of
- [_|Cont] -> slice_bin(Cont, N-1, Orig);
- [] -> Orig
+slice_bin(<<CP2/utf8, Bin/binary>>, CP1, N) when ?ASCII_LIST(CP1,CP2), N > 0 ->
+ slice_bin(Bin, CP2, N-1);
+slice_bin(CD, CP1, N) when N > 0 ->
+ [_|Bin] = unicode_util:gc([CP1|CD]),
+ case unicode_util:cp(Bin) of
+ [CP2|Cont] -> slice_bin(Cont, CP2, N-1);
+ [] -> 0
end;
-slice_bin([], 0, Orig) ->
- Orig;
-slice_bin(CD, 0, Orig) ->
- Sz = byte_size(Orig) - byte_size(CD),
- <<Keep:Sz/binary, _/binary>> = Orig,
- Keep.
+slice_bin(CD, CP1, 0) ->
+ byte_size(CD)+byte_size(<<CP1/utf8>>).
uppercase_list(CPs0) ->
case unicode_util:uppercase(CPs0) of
@@ -641,16 +708,31 @@ casefold_bin(CPs0, Acc) ->
[] -> Acc
end.
-
+%% Fast path for ascii searching for one character in lists
+trim_ls([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case Sep of
+ CP1 -> trim_ls(Cont, Sep);
+ _ -> Str
+ end;
+trim_ls(Str, Sep) ->
+ trim_l(Str, [Sep]).
+
+trim_l([CP1|[CP2|_]=Cont]=Str, Sep)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Sep) of
+ true -> trim_l(Cont, Sep);
+ false -> Str
+ end;
trim_l([Bin|Cont0], Sep) when is_binary(Bin) ->
case bin_search_inv(Bin, Cont0, Sep) of
{nomatch, Cont} -> trim_l(Cont, Sep);
Keep -> Keep
end;
-trim_l(Str, {GCs, _, _}=Sep) when is_list(Str) ->
+trim_l(Str, Sep) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
+ case lists:member(C, Sep) of
true -> trim_l(Cs, Sep);
false -> Str
end;
@@ -662,15 +744,51 @@ trim_l(Bin, Sep) when is_binary(Bin) ->
[Keep] -> Keep
end.
-trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+%% Fast path for ascii searching for one character in lists
+trim_ts([Sep|Cs1]=Str, Sep) ->
+ case Cs1 of
+ [] -> [];
+ [CP2|_] when ?ASCII_LIST(Sep,CP2) ->
+ Tail = trim_ts(Cs1, Sep),
+ case is_empty(Tail) of
+ true -> [];
+ false -> [Sep|Tail]
+ end;
+ _ ->
+ trim_t(Str, 0, search_pattern([Sep]))
+ end;
+trim_ts([CP|Cont],Sep) when is_integer(CP) ->
+ [CP|trim_ts(Cont, Sep)];
+trim_ts(Str, Sep) ->
+ trim_t(Str, 0, search_pattern([Sep])).
+
+trim_t([CP1|Cont]=Cs0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Cs1] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
+ end;
+ false ->
+ append(GC,trim_t(Cs1, 0, Seps))
+ end;
+ false ->
+ [CP1|trim_t(Cont, 0, Seps)]
+ end;
+trim_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,_} ->
- stack(Bin, trim_t(Cont0, 0, Sep));
+ stack(Bin, trim_t(Cont0, 0, Seps));
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- Tail = trim_t(Cont, 0, Sep),
+ Tail = trim_t(Cont, 0, Seps),
case is_empty(Tail) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
@@ -682,67 +800,69 @@ trim_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t([Bin|Cont], KeepSz, Sep)
+ trim_t([Bin|Cont], KeepSz, Seps)
end
end;
-trim_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+trim_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- Tail = trim_t(Cs1, 0, Sep),
- case is_empty(Tail) of
- true -> [];
- false -> append(GC,Tail)
- end;
- false ->
- append(GC,trim_t(Cs1, 0, Sep))
+ Tail = trim_t(Cs1, 0, Seps),
+ case is_empty(Tail) of
+ true -> [];
+ false -> append(GC,Tail)
end;
false ->
- append(CP,trim_t(Cs, 0, Sep))
+ append(GC,trim_t(Cs1, 0, Seps))
end;
[] -> []
end;
-trim_t(Bin, N, Sep) when is_binary(Bin) ->
+trim_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> Bin;
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, _/binary>> = Bin,
Keep;
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- trim_t(Bin, KeepSz, Sep)
+ trim_t(Bin, KeepSz, Seps)
end
end.
-take_l([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Sep) of
+
+take_l([CP1|[CP2|_]=Cont]=Str, Seps, Acc)
+ when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, Seps) of
+ true -> take_l(Cont, Seps, [CP1|Acc]);
+ false -> {rev(Acc), Str}
+ end;
+take_l([Bin|Cont0], Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_l(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_l(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_l(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_l(Str, Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
- case lists:member(C, GCs) of
- true -> take_l(Cs, Sep, append(rev(C),Acc));
+ case lists:member(C, Seps) of
+ true -> take_l(Cs, Seps, append(rev(C),Acc));
false -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_l(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Sep) of
+take_l(Bin, Seps, Acc) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -751,27 +871,41 @@ take_l(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_lc([Bin|Cont0], Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, Cont0, Sep) of
+
+take_lc([CP1|Cont]=Str0, {GCs,CPs,_}=Seps, Acc) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ false -> take_lc(Str, Seps, append(rev(GC),Acc));
+ true -> {rev(Acc), Str0}
+ end;
+ false ->
+ take_lc(Cont, Seps, append(CP1,Acc))
+ end;
+take_lc([Bin|Cont0], Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, Cont0, Seps) of
{nomatch, Cont} ->
Used = cp_prefix(Cont0, Cont),
- take_lc(Cont, Sep, [unicode:characters_to_binary([Bin|Used])|Acc]);
+ take_lc(Cont, Seps, [unicode:characters_to_binary([Bin|Used])|Acc]);
[Bin1|_]=After when is_binary(Bin1) ->
First = byte_size(Bin) - byte_size(Bin1),
<<Keep:First/binary, _/binary>> = Bin,
{btoken(Keep,Acc), After}
end;
-take_lc(Str, {GCs, _, _}=Sep, Acc) when is_list(Str) ->
+take_lc(Str, {GCs,_,_}=Seps, Acc) when is_list(Str) ->
case unicode_util:gc(Str) of
[C|Cs] ->
case lists:member(C, GCs) of
- false -> take_lc(Cs, Sep, append(rev(C),Acc));
+ false -> take_lc(Cs, Seps, append(rev(C),Acc));
true -> {rev(Acc), Str}
end;
[] -> {rev(Acc), []}
end;
-take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
- case bin_search(Bin, [], Sep) of
+take_lc(Bin, Seps0, Acc) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin, Acc), <<>>};
[After] ->
@@ -780,148 +914,192 @@ take_lc(Bin, Sep, Acc) when is_binary(Bin) ->
{btoken(Keep, Acc), After}
end.
-take_t([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+
+take_t([CP1|Cont]=Str0, _, {GCs,CPs,_}=Seps) when is_integer(CP1) ->
+ case lists:member(CP1, CPs) of
+ true ->
+ [GC|Str] = unicode_util:gc(Str0),
+ case lists:member(GC, GCs) of
+ true ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Str, 0, Seps),
+ {append(GC,Head), Tail}
+ end;
+ false ->
+ {Head, Tail} = take_t(Cont, 0, Seps),
+ {[CP1|Head], Tail}
+ end;
+take_t([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Cont0, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, Cont0, Seps) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search_inv(SepStart, Cont1, Sep) of
+ case bin_search_inv(SepStart, Cont1, GCs) of
{nomatch, Cont} ->
- {Head, Tail} = take_t(Cont, 0, Sep),
+ {Head, Tail} = take_t(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t([Bin|Cont], KeepSz, Sep)
+ take_t([Bin|Cont], KeepSz, Seps)
end
end;
-take_t(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
+take_t(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- true ->
- {Head, Tail} = take_t(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
end;
false ->
- {Head, Tail} = take_t(Cs, 0, Sep),
- {append(CP,Head), Tail}
+ {Head, Tail} = take_t(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_t(Bin, N, Sep) when is_binary(Bin) ->
+take_t(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search(Rest, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(Rest, [], Seps) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search_inv(SepStart, [], Sep) of
+ case bin_search_inv(SepStart, [], GCs) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_t(Bin, KeepSz, Sep)
+ take_t(Bin, KeepSz, Seps)
end
end.
-take_tc([Bin|Cont0], N, Sep) when is_binary(Bin) ->
+take_tc([CP1|[CP2|_]=Cont], _, {GCs,_,_}=Seps) when ?ASCII_LIST(CP1,CP2) ->
+ case lists:member(CP1, GCs) of
+ false ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(CP1,Tail)};
+ false -> {append(CP1,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cont, 0, Seps),
+ {append(CP1,Head), Tail}
+ end;
+take_tc([Bin|Cont0], N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, Cont0, Sep) of
+ case bin_search_inv(Rest, Cont0, GCs) of
{nomatch,Cont} ->
Used = cp_prefix(Cont0, Cont),
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps0),
{stack(unicode:characters_to_binary([Bin|Used]), Head), Tail};
[SepStart|Cont1] ->
- case bin_search(SepStart, Cont1, Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, Cont1, Seps) of
{nomatch, Cont} ->
- {Head, Tail} = take_tc(Cont, 0, Sep),
+ {Head, Tail} = take_tc(Cont, 0, Seps),
Used = cp_prefix(Cont0, Cont),
- case equal(Tail, Cont) of
+ case is_empty(Head) of
true ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Keep:KeepSz/binary, End/binary>> = Bin,
- {stack(Keep,Head), stack(stack(End,Used),Tail)};
+ {Keep, stack(stack(End,Used),Tail)};
false ->
{stack(unicode:characters_to_binary([Bin|Used]),Head), Tail}
end;
[NonSep|Cont] when is_binary(NonSep) ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc([Bin|Cont], KeepSz, Sep)
+ take_tc([Bin|Cont], KeepSz, Seps)
end
end;
-take_tc(Str, 0, {GCs,CPs,_}=Sep) when is_list(Str) ->
- case unicode_util:cp(Str) of
- [CP|Cs] ->
- case lists:member(CP, CPs) of
- true ->
- [GC|Cs1] = unicode_util:gc(Str),
- case lists:member(GC, GCs) of
- false ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- case equal(Tail, Cs1) of
- true -> {Head, append(GC,Tail)};
- false -> {append(GC,Head), Tail}
- end;
- true ->
- {Head, Tail} = take_tc(Cs1, 0, Sep),
- {append(GC,Head), Tail}
- end;
+take_tc(Str, 0, {GCs,_,_}=Seps) when is_list(Str) ->
+ case unicode_util:gc(Str) of
+ [GC|Cs1] ->
+ case lists:member(GC, GCs) of
false ->
- {Head, Tail} = take_tc(Cs, 0, Sep),
- case equal(Tail, Cs) of
- true -> {Head, append(CP,Tail)};
- false -> {append(CP,Head), Tail}
- end
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ case is_empty(Head) of
+ true -> {Head, append(GC,Tail)};
+ false -> {append(GC,Head), Tail}
+ end;
+ true ->
+ {Head, Tail} = take_tc(Cs1, 0, Seps),
+ {append(GC,Head), Tail}
end;
[] -> {[],[]}
end;
-take_tc(Bin, N, Sep) when is_binary(Bin) ->
+take_tc(Bin, N, {GCs,_,_}=Seps0) when is_binary(Bin) ->
<<_:N/binary, Rest/binary>> = Bin,
- case bin_search_inv(Rest, [], Sep) of
+ case bin_search_inv(Rest, [], GCs) of
{nomatch,_} -> {Bin, <<>>};
[SepStart] ->
- case bin_search(SepStart, [], Sep) of
+ Seps = search_compile(Seps0),
+ case bin_search(SepStart, [], Seps) of
{nomatch,_} ->
KeepSz = byte_size(Bin) - byte_size(SepStart),
<<Before:KeepSz/binary, End/binary>> = Bin,
{Before, End};
[NonSep] ->
KeepSz = byte_size(Bin) - byte_size(NonSep),
- take_tc(Bin, KeepSz, Sep)
+ take_tc(Bin, KeepSz, Seps)
end
end.
-prefix_1(Cs, []) -> Cs;
-prefix_1(Cs, [_]=Pre) ->
- prefix_2(unicode_util:gc(Cs), Pre);
-prefix_1(Cs, Pre) ->
- prefix_2(unicode_util:cp(Cs), Pre).
-
-prefix_2([C|Cs], [C|Pre]) ->
- prefix_1(Cs, Pre);
-prefix_2(_, _) ->
- nomatch.
+prefix_1(Cs0, [GC]) ->
+ case unicode_util:gc(Cs0) of
+ [GC|Cs] -> Cs;
+ _ -> nomatch
+ end;
+prefix_1([CP|Cs], [Pre|PreR]) when is_integer(CP) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(<<CP/utf8, Cs/binary>>, [Pre|PreR]) ->
+ case CP =:= Pre of
+ true -> prefix_1(Cs,PreR);
+ false -> nomatch
+ end;
+prefix_1(Cs0, [Pre|PreR]) ->
+ case unicode_util:cp(Cs0) of
+ [Pre|Cs] -> prefix_1(Cs,PreR);
+ _ -> nomatch
+ end.
+split_1([CP1|Cs]=Cs0, [C|_]=Needle, _, Where, Curr, Acc) when is_integer(CP1) ->
+ case CP1=:=C of
+ true ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> split_1(Cs, Needle, 0, Where, append(C,Curr), Acc);
+ Rest when Where =:= leading ->
+ [rev(Curr), Rest];
+ Rest when Where =:= trailing ->
+ split_1(Cs, Needle, 0, Where, [C|Curr], [rev(Curr), Rest]);
+ Rest when Where =:= all ->
+ split_1(Rest, Needle, 0, Where, [], [rev(Curr)|Acc])
+ end;
+ false ->
+ split_1(Cs, Needle, 0, Where, append(CP1,Curr), Acc)
+ end;
split_1([Bin|Cont0], Needle, Start, Where, Curr0, Acc)
when is_binary(Bin) ->
case bin_search_str(Bin, Start, Cont0, Needle) of
@@ -981,32 +1159,50 @@ split_1(Bin, [_C|_]=Needle, Start, Where, Curr0, Acc) ->
end
end.
-lexemes_m([Bin|Cont0], Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+lexemes_m([CP|_]=Cs0, {GCs,CPs,_}=Seps, Ts) when is_integer(CP) ->
+ case lists:member(CP, CPs) of
+ true ->
+ [GC|Cs2] = unicode_util:gc(Cs0),
+ case lists:member(GC, GCs) of
+ true ->
+ lexemes_m(Cs2, Seps, Ts);
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+ false ->
+ {Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
+ lexemes_m(Rest, Seps, [Lexeme|Ts])
+ end;
+lexemes_m([Bin|Cont0], {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- lexemes_m(Cont, Seps, Ts);
+ lexemes_m(Cont, Seps0, Ts);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
-lexemes_m(Cs0, {GCs, _, _}=Seps, Ts) when is_list(Cs0) ->
+lexemes_m(Cs0, {GCs, _, _}=Seps0, Ts) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- lexemes_m(Cs, Seps, Ts);
+ lexemes_m(Cs, Seps0, Ts);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs0, Seps, []),
lexemes_m(Rest, Seps, [Lexeme|Ts])
end;
[] ->
lists:reverse(Ts)
end;
-lexemes_m(Bin, Seps, Ts) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+lexemes_m(Bin, {GCs,_,_}=Seps0, Ts) when is_binary(Bin) ->
+ case bin_search_inv(Bin, [], GCs) of
{nomatch,_} ->
lists:reverse(Ts);
[Cs] ->
+ Seps = search_compile(Seps0),
{Lexeme,Rest} = lexeme_pick(Cs, Seps, []),
lexemes_m(Rest, Seps, add_non_empty(Lexeme,Ts))
end.
@@ -1037,7 +1233,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> {rev(Tkn), Cs0};
+ true -> {rev(Tkn), Cs2};
false -> lexeme_pick(Cs2, Seps, append(rev(GC),Tkn))
end;
false ->
@@ -1047,7 +1243,7 @@ lexeme_pick(Cs0, {GCs, CPs, _} = Seps, Tkn) when is_list(Cs0) ->
{rev(Tkn), []}
end;
lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+ case bin_search(Bin, [], Seps) of
{nomatch,_} ->
{btoken(Bin,Tkn), []};
[Left] ->
@@ -1056,35 +1252,38 @@ lexeme_pick(Bin, Seps, Tkn) when is_binary(Bin) ->
{btoken(Lexeme, Tkn), Left}
end.
-nth_lexeme_m([Bin|Cont0], Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, Cont0, Seps) of
+nth_lexeme_m([Bin|Cont0], {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ case bin_search_inv(Bin, Cont0, GCs) of
{nomatch,Cont} ->
- nth_lexeme_m(Cont, Seps, N);
+ nth_lexeme_m(Cont, Seps0, N);
Cs when N > 1 ->
- Rest = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Rest, Seps, N-1);
+ Rest = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Rest, Seps0, N-1);
Cs ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs, Seps, []),
Lexeme
end;
-nth_lexeme_m(Cs0, {GCs, _, _}=Seps, N) when is_list(Cs0) ->
+nth_lexeme_m(Cs0, {GCs, _, _}=Seps0, N) when is_list(Cs0) ->
case unicode_util:gc(Cs0) of
[C|Cs] ->
case lists:member(C, GCs) of
true ->
- nth_lexeme_m(Cs, Seps, N);
+ nth_lexeme_m(Cs, Seps0, N);
false when N > 1 ->
- Cs1 = lexeme_skip(Cs, Seps),
- nth_lexeme_m(Cs1, Seps, N-1);
+ Cs1 = lexeme_skip(Cs, Seps0),
+ nth_lexeme_m(Cs1, Seps0, N-1);
false ->
+ Seps = search_compile(Seps0),
{Lexeme,_} = lexeme_pick(Cs0, Seps, []),
Lexeme
end;
[] ->
[]
end;
-nth_lexeme_m(Bin, Seps, N) when is_binary(Bin) ->
- case bin_search_inv(Bin, [], Seps) of
+nth_lexeme_m(Bin, {GCs,_,_}=Seps0, N) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search_inv(Bin, [], GCs) of
[Cs] when N > 1 ->
Cs1 = lexeme_skip(Cs, Seps),
nth_lexeme_m(Cs1, Seps, N-1);
@@ -1100,16 +1299,17 @@ lexeme_skip([CP|Cs1]=Cs0, {GCs,CPs,_}=Seps) when is_integer(CP) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
lexeme_skip(Cs1, Seps)
end;
-lexeme_skip([Bin|Cont0], Seps) when is_binary(Bin) ->
+lexeme_skip([Bin|Cont0], Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
case bin_search(Bin, Cont0, Seps) of
{nomatch,_} -> lexeme_skip(Cont0, Seps);
- Cs -> Cs
+ Cs -> tl(unicode_util:gc(Cs))
end;
lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
case unicode_util:cp(Cs0) of
@@ -1118,7 +1318,7 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
true ->
[GC|Cs2] = unicode_util:gc(Cs0),
case lists:member(GC, GCs) of
- true -> Cs0;
+ true -> Cs2;
false -> lexeme_skip(Cs2, Seps)
end;
false ->
@@ -1127,12 +1327,23 @@ lexeme_skip(Cs0, {GCs, CPs, _} = Seps) when is_list(Cs0) ->
[] ->
[]
end;
-lexeme_skip(Bin, Seps) when is_binary(Bin) ->
- case bin_search(Bin, Seps) of
+lexeme_skip(Bin, Seps0) when is_binary(Bin) ->
+ Seps = search_compile(Seps0),
+ case bin_search(Bin, [], Seps) of
{nomatch,_} -> <<>>;
- [Left] -> Left
+ [Left] -> tl(unicode_util:gc(Left))
end.
+find_l([C1|Cs]=Cs0, [C|_]=Needle) when is_integer(C1) ->
+ case C1 of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_l(Cs, Needle);
+ _ -> Cs0
+ end;
+ _ ->
+ find_l(Cs, Needle)
+ end;
find_l([Bin|Cont0], Needle) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch, _, Cont} ->
@@ -1157,6 +1368,16 @@ find_l(Bin, Needle) ->
{_Before, [Cs], _After} -> Cs
end.
+find_r([Cp|Cs]=Cs0, [C|_]=Needle, Res) when is_integer(Cp) ->
+ case Cp of
+ C ->
+ case prefix_1(Cs0, Needle) of
+ nomatch -> find_r(Cs, Needle, Res);
+ _ -> find_r(Cs, Needle, Cs0)
+ end;
+ _ ->
+ find_r(Cs, Needle, Res)
+ end;
find_r([Bin|Cont0], Needle, Res) when is_binary(Bin) ->
case bin_search_str(Bin, 0, Cont0, Needle) of
{nomatch,_,Cont} ->
@@ -1227,11 +1448,6 @@ cp_prefix_1(Orig, Until, Cont) ->
%% Binary special
-bin_search(Bin, Seps) ->
- bin_search(Bin, [], Seps).
-
-bin_search(_Bin, Cont, {[],_,_}) ->
- {nomatch, Cont};
bin_search(Bin, Cont, {Seps,_,BP}) ->
bin_search_loop(Bin, 0, BP, Cont, Seps).
@@ -1239,10 +1455,14 @@ bin_search(Bin, Cont, {Seps,_,BP}) ->
%% i.e. å in nfd form $a "COMBINING RING ABOVE"
%% and PREPEND characters like "ARABIC NUMBER SIGN" 1536 <<216,128>>
%% combined with other characters are currently ignored.
+search_pattern({_,_,_}=P) -> P;
search_pattern(Seps) ->
CPs = search_cp(Seps),
- Bin = bin_pattern(CPs),
- {Seps, CPs, Bin}.
+ {Seps, CPs, undefined}.
+
+search_compile({Sep, CPs, undefined}) ->
+ {Sep, CPs, binary:compile_pattern(bin_pattern(CPs))};
+search_compile({_,_,_}=Compiled) -> Compiled.
search_cp([CP|Seps]) when is_integer(CP) ->
[CP|search_cp(Seps)];
@@ -1263,9 +1483,21 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
case binary:match(Bin, BinSeps) of
nomatch ->
{nomatch,Cont};
+ {Where, _CL} when Cont =:= [] ->
+ <<_:Where/binary, Cont1/binary>> = Bin,
+ [GC|Cont2] = unicode_util:gc(Cont1),
+ case lists:member(GC, Seps) of
+ false when Cont2 =:= [] ->
+ {nomatch, []};
+ false ->
+ Next = byte_size(Bin0) - byte_size(Cont2),
+ bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
+ true ->
+ [Cont1]
+ end;
{Where, _CL} ->
<<_:Where/binary, Cont0/binary>> = Bin,
- Cont1 = stack(Cont0, Cont),
+ Cont1 = [Cont0|Cont],
[GC|Cont2] = unicode_util:gc(Cont1),
case lists:member(GC, Seps) of
false ->
@@ -1273,55 +1505,108 @@ bin_search_loop(Bin0, Start, BinSeps, Cont, Seps) ->
[BinR|Cont] when is_binary(BinR) ->
Next = byte_size(Bin0) - byte_size(BinR),
bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
- BinR when is_binary(BinR), Cont =:= [] ->
- Next = byte_size(Bin0) - byte_size(BinR),
- bin_search_loop(Bin0, Next, BinSeps, Cont, Seps);
_ ->
{nomatch, Cont2}
end;
- true when is_list(Cont1) ->
- Cont1;
true ->
- [Cont1]
+ Cont1
end
end.
-bin_search_inv(Bin, Cont, {[], _, _}) ->
- [Bin|Cont];
-bin_search_inv(Bin, Cont, {[Sep], _, _}) ->
- bin_search_inv_1([Bin|Cont], Sep);
-bin_search_inv(Bin, Cont, {Seps, _, _}) ->
- bin_search_inv_n([Bin|Cont], Seps).
-
-bin_search_inv_1([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_1(CPs = [Bin0|Cont], Sep) when is_binary(Bin0) ->
- case unicode_util:gc(CPs) of
- [Sep|Bin] when is_binary(Bin), Cont =:= [] ->
- bin_search_inv_1([Bin], Sep);
- [Sep|[Bin|Cont]=Cs] when is_binary(Bin) ->
- bin_search_inv_1(Cs, Sep);
- [Sep|Cs] ->
- {nomatch, Cs};
- _ -> CPs
- end.
+bin_search_inv(<<>>, Cont, _) ->
+ {nomatch, Cont};
+bin_search_inv(Bin, Cont, [Sep]) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+bin_search_inv(Bin, Cont, Seps) ->
+ bin_search_inv_n(Bin, Cont, Seps).
+
+bin_search_inv_1(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Sep) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case CP1 of
+ Sep -> bin_search_inv_1(BinRest, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ case unicode_util:gc(Bin0) of
+ [Sep|Bin] -> bin_search_inv_1(Bin, Cont, Sep);
+ _ -> [Bin0|Cont]
+ end;
+ _ ->
+ case unicode_util:gc([Bin0|Cont]) of
+ [Sep|[Bin|Cont]] when is_binary(Bin) ->
+ bin_search_inv_1(Bin, Cont, Sep);
+ [Sep|Cs] ->
+ {nomatch, Cs};
+ _ -> [Bin0|Cont]
+ end
+ end;
+bin_search_inv_1(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_1([], Cont, _Sep) ->
+ {nomatch, Cont}.
-bin_search_inv_n([<<>>|CPs], _) ->
- {nomatch, CPs};
-bin_search_inv_n([Bin0|Cont]=CPs, Seps) when is_binary(Bin0) ->
- [C|Cs0] = unicode_util:gc(CPs),
- case {lists:member(C, Seps), Cs0} of
- {true, Cs} when is_binary(Cs), Cont =:= [] ->
- bin_search_inv_n([Cs], Seps);
- {true, [Bin|Cont]=Cs} when is_binary(Bin) ->
- bin_search_inv_n(Cs, Seps);
- {true, Cs} -> {nomatch, Cs};
- {false, _} -> CPs
- end.
+bin_search_inv_n(<<CP1/utf8, BinRest/binary>>=Bin0, Cont, Seps) ->
+ case BinRest of
+ <<CP2/utf8, _/binary>> when ?ASCII_LIST(CP1, CP2) ->
+ case lists:member(CP1,Seps) of
+ true -> bin_search_inv_n(BinRest, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ when Cont =:= [] ->
+ [GC|Bin] = unicode_util:gc(Bin0),
+ case lists:member(GC, Seps) of
+ true -> bin_search_inv_n(Bin, Cont, Seps);
+ false -> [Bin0|Cont]
+ end;
+ _ ->
+ [GC|Cs0] = unicode_util:gc([Bin0|Cont]),
+ case lists:member(GC, Seps) of
+ false -> [Bin0|Cont];
+ true ->
+ case Cs0 of
+ [Bin|Cont] when is_binary(Bin) ->
+ bin_search_inv_n(Bin, Cont, Seps);
+ _ ->
+ {nomatch, Cs0}
+ end
+ end
+ end;
+bin_search_inv_n(<<>>, Cont, _Sep) ->
+ {nomatch, Cont};
+bin_search_inv_n([], Cont, _Sep) ->
+ {nomatch, Cont}.
+
+bin_search_str(Bin0, Start, [], SearchCPs) ->
+ Compiled = binary:compile_pattern(unicode:characters_to_binary(SearchCPs)),
+ bin_search_str_1(Bin0, Start, Compiled, SearchCPs);
bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
+ First = binary:compile_pattern(<<CP/utf8>>),
+ bin_search_str_2(Bin0, Start, Cont, First, SearchCPs).
+
+bin_search_str_1(Bin0, Start, First, SearchCPs) ->
+ <<_:Start/binary, Bin/binary>> = Bin0,
+ case binary:match(Bin, First) of
+ nomatch -> {nomatch, byte_size(Bin0), []};
+ {Where0, _} ->
+ Where = Start+Where0,
+ <<Keep:Where/binary, Cs0/binary>> = Bin0,
+ case prefix_1(Cs0, SearchCPs) of
+ nomatch ->
+ <<_/utf8, Cs/binary>> = Cs0,
+ KeepSz = byte_size(Bin0) - byte_size(Cs),
+ bin_search_str_1(Bin0, KeepSz, First, SearchCPs);
+ [] ->
+ {Keep, [Cs0], <<>>};
+ Rest ->
+ {Keep, [Cs0], Rest}
+ end
+ end.
+
+bin_search_str_2(Bin0, Start, Cont, First, SearchCPs) ->
<<_:Start/binary, Bin/binary>> = Bin0,
- case binary:match(Bin, <<CP/utf8>>) of
+ case binary:match(Bin, First) of
nomatch -> {nomatch, byte_size(Bin0), Cont};
{Where0, _} ->
Where = Start+Where0,
@@ -1330,7 +1615,7 @@ bin_search_str(Bin0, Start, Cont, [CP|_]=SearchCPs) ->
case prefix_1(stack(Cs0,Cont), SearchCPs) of
nomatch when is_binary(Cs) ->
KeepSz = byte_size(Bin0) - byte_size(Cs),
- bin_search_str(Bin0, KeepSz, Cont, SearchCPs);
+ bin_search_str_2(Bin0, KeepSz, Cont, First, SearchCPs);
nomatch ->
{nomatch, Where, stack([GC|Cs],Cont)};
[] ->
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index 22212da222..a84679c595 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -226,7 +226,8 @@
%%-------------------------------------------------------------------------
%% External API
%%-------------------------------------------------------------------------
--export([normalize/1, parse/1,
+-export([compose_query/1, compose_query/2,
+ dissect_query/1, normalize/1, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -381,6 +382,76 @@ transcode(URIString, Options) when is_list(URIString) ->
end.
+%%-------------------------------------------------------------------------
+%% Functions for working with the query part of a URI as a list
+%% of key/value pairs.
+%% HTML5 - 4.10.22.6 URL-encoded form data
+%%-------------------------------------------------------------------------
+
+%%-------------------------------------------------------------------------
+%% Compose urlencoded query string from a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded encoding algorithm)
+%%-------------------------------------------------------------------------
+-spec compose_query(QueryList) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ QueryString :: uri_string()
+ | error().
+compose_query(List) ->
+ compose_query(List, [{encoding, utf8}]).
+
+
+-spec compose_query(QueryList, Options) -> QueryString when
+ QueryList :: [{uri_string(), uri_string()}],
+ Options :: [{encoding, atom()}],
+ QueryString :: uri_string()
+ | error().
+compose_query([],_Options) ->
+ [];
+compose_query(List, Options) ->
+ try compose_query(List, Options, false, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+%%
+compose_query([{Key,Value}|Rest], Options, IsList, Acc) ->
+ Separator = get_separator(Rest),
+ K = form_urlencode(Key, Options),
+ V = form_urlencode(Value, Options),
+ IsListNew = IsList orelse is_list(Key) orelse is_list(Value),
+ compose_query(Rest, Options, IsListNew, <<Acc/binary,K/binary,"=",V/binary,Separator/binary>>);
+compose_query([], _Options, IsList, Acc) ->
+ case IsList of
+ true -> convert_to_list(Acc, utf8);
+ false -> Acc
+ end.
+
+
+%%-------------------------------------------------------------------------
+%% Dissect a query string into a list of unescaped key/value pairs.
+%% (application/x-www-form-urlencoded decoding algorithm)
+%%-------------------------------------------------------------------------
+-spec dissect_query(QueryString) -> QueryList when
+ QueryString :: uri_string(),
+ QueryList :: [{uri_string(), uri_string()}]
+ | error().
+dissect_query(<<>>) ->
+ [];
+dissect_query([]) ->
+ [];
+dissect_query(QueryString) when is_list(QueryString) ->
+ try
+ B = convert_to_binary(QueryString, utf8, utf8),
+ dissect_query_key(B, true, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end;
+dissect_query(QueryString) ->
+ try dissect_query_key(QueryString, false, [], <<>>, <<>>)
+ catch
+ throw:{error, Atom, RestData} -> {error, Atom, RestData}
+ end.
+
+
%%%========================================================================
%%% Internal functions
%%%========================================================================
@@ -585,6 +656,7 @@ maybe_add_path(Map) ->
end.
+
-spec parse_scheme(binary(), uri_map()) -> {binary(), uri_map()}.
parse_scheme(?STRING_REST($:, Rest), URI) ->
{_, URI1} = parse_hier(Rest, URI),
@@ -1673,6 +1745,161 @@ percent_encode_segment(Segment) ->
%%-------------------------------------------------------------------------
+%% Helper functions for compose_query
+%%-------------------------------------------------------------------------
+
+%% Returns separator to be used between key-value pairs
+get_separator(L) when length(L) =:= 0 ->
+ <<>>;
+get_separator(_L) ->
+ <<"&">>.
+
+
+%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
+ B = convert_to_binary(Cs, utf8, utf8),
+ html5_byte_encode(base10_encode(B));
+form_urlencode(Cs, [{encoding, latin1}]) when is_binary(Cs) ->
+ html5_byte_encode(base10_encode(Cs));
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_list(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ B = convert_to_binary(Cs, utf8, Encoding),
+ html5_byte_encode(B);
+form_urlencode(Cs, [{encoding, Encoding}])
+ when is_binary(Cs), Encoding =:= utf8; Encoding =:= unicode ->
+ html5_byte_encode(Cs);
+form_urlencode(Cs, [{encoding, Encoding}]) when is_list(Cs); is_binary(Cs) ->
+ throw({error,invalid_encoding, Encoding});
+form_urlencode(Cs, _) ->
+ throw({error,invalid_input, Cs}).
+
+
+%% For each character in the entry's name and value that cannot be expressed using
+%% the selected character encoding, replace the character by a string consisting of
+%% a U+0026 AMPERSAND character (&), a "#" (U+0023) character, one or more ASCII
+%% digits representing the Unicode code point of the character in base ten, and
+%% finally a ";" (U+003B) character.
+base10_encode(Cs) ->
+ base10_encode(Cs, <<>>).
+%%
+base10_encode(<<>>, Acc) ->
+ Acc;
+base10_encode(<<H/utf8,T/binary>>, Acc) when H > 255 ->
+ Base10 = convert_to_binary(integer_to_list(H,10), utf8, utf8),
+ base10_encode(T, <<Acc/binary,"&#",Base10/binary,$;>>);
+base10_encode(<<H/utf8,T/binary>>, Acc) ->
+ base10_encode(T, <<Acc/binary,H>>).
+
+
+html5_byte_encode(B) ->
+ html5_byte_encode(B, <<>>).
+%%
+html5_byte_encode(<<>>, Acc) ->
+ Acc;
+html5_byte_encode(<<$ ,T/binary>>, Acc) ->
+ html5_byte_encode(T, <<Acc/binary,$+>>);
+html5_byte_encode(<<H,T/binary>>, Acc) ->
+ case is_url_char(H) of
+ true ->
+ html5_byte_encode(T, <<Acc/binary,H>>);
+ false ->
+ <<A:4,B:4>> = <<H>>,
+ html5_byte_encode(T, <<Acc/binary,$%,(?DEC2HEX(A)),(?DEC2HEX(B))>>)
+ end;
+html5_byte_encode(H, _Acc) ->
+ throw({error,invalid_input, H}).
+
+
+%% Return true if input char can appear in form-urlencoded string
+%% Allowed chararacters:
+%% 0x2A, 0x2D, 0x2E, 0x30 to 0x39, 0x41 to 0x5A,
+%% 0x5F, 0x61 to 0x7A
+is_url_char(C)
+ when C =:= 16#2A; C =:= 16#2D;
+ C =:= 16#2E; C =:= 16#5F;
+ 16#30 =< C, C =< 16#39;
+ 16#41 =< C, C =< 16#5A;
+ 16#61 =< C, C =< 16#7A -> true;
+is_url_char(_) -> false.
+
+
+%%-------------------------------------------------------------------------
+%% Helper functions for dissect_query
+%%-------------------------------------------------------------------------
+dissect_query_key(<<$=,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, Value);
+dissect_query_key(<<"&#",T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,"&#">>, Value);
+dissect_query_key(<<$&,_T/binary>>, _IsList, _Acc, _Key, _Value) ->
+ throw({error, missing_value, "&"});
+dissect_query_key(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_key(T, IsList, Acc, <<Key/binary,H>>, Value);
+dissect_query_key(B, _, _, _, _) ->
+ throw({error, missing_value, B}).
+
+
+dissect_query_value(<<$&,T/binary>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ dissect_query_key(T, IsList, [{K,V}|Acc], <<>>, <<>>);
+dissect_query_value(<<H,T/binary>>, IsList, Acc, Key, Value) ->
+ dissect_query_value(T, IsList, Acc, Key, <<Value/binary,H>>);
+dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
+ K = form_urldecode(IsList, Key),
+ V = form_urldecode(IsList, Value),
+ lists:reverse([{K,V}|Acc]).
+
+
+%% Form-urldecode input based on RFC 1866 [8.2.1]
+form_urldecode(true, B) ->
+ Result = base10_decode(form_urldecode(B, <<>>)),
+ convert_to_list(Result, utf8);
+form_urldecode(false, B) ->
+ base10_decode(form_urldecode(B, <<>>));
+form_urldecode(<<>>, Acc) ->
+ Acc;
+form_urldecode(<<$+,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,$ >>);
+form_urldecode(<<$%,C0,C1,T/binary>>, Acc) ->
+ case is_hex_digit(C0) andalso is_hex_digit(C1) of
+ true ->
+ V = ?HEX2DEC(C0)*16+?HEX2DEC(C1),
+ form_urldecode(T, <<Acc/binary, V>>);
+ false ->
+ L = convert_to_list(<<$%,C0,C1,T/binary>>, utf8),
+ throw({error, invalid_percent_encoding, L})
+ end;
+form_urldecode(<<H/utf8,T/binary>>, Acc) ->
+ form_urldecode(T, <<Acc/binary,H/utf8>>);
+form_urldecode(<<H,_/binary>>, _Acc) ->
+ throw({error, invalid_character, [H]}).
+
+base10_decode(Cs) ->
+ base10_decode(Cs, <<>>).
+%
+base10_decode(<<>>, Acc) ->
+ Acc;
+base10_decode(<<"&#",T/binary>>, Acc) ->
+ base10_decode_unicode(T, Acc);
+base10_decode(<<H/utf8,T/binary>>, Acc) ->
+ base10_decode(T,<<Acc/binary,H/utf8>>);
+base10_decode(<<H,_/binary>>, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+base10_decode_unicode(B, Acc) ->
+ base10_decode_unicode(B, 0, Acc).
+%%
+base10_decode_unicode(<<H/utf8,T/binary>>, Codepoint, Acc) when $0 =< H, H =< $9 ->
+ Res = Codepoint * 10 + (H - $0),
+ base10_decode_unicode(T, Res, Acc);
+base10_decode_unicode(<<$;,T/binary>>, Codepoint, Acc) ->
+ base10_decode(T, <<Acc/binary,Codepoint/utf8>>);
+base10_decode_unicode(<<H,_/binary>>, _, _) ->
+ throw({error, invalid_input, [H]}).
+
+
+%%-------------------------------------------------------------------------
%% Helper functions for normalize
%%-------------------------------------------------------------------------
diff --git a/lib/stdlib/test/base64_SUITE.erl b/lib/stdlib/test/base64_SUITE.erl
index 48b3f5f959..1fc4c3fc0e 100644
--- a/lib/stdlib/test/base64_SUITE.erl
+++ b/lib/stdlib/test/base64_SUITE.erl
@@ -97,10 +97,9 @@ base64_otp_5635(Config) when is_list(Config) ->
<<"===">> = base64:decode(base64:encode("===")),
ok.
%%-------------------------------------------------------------------------
-%% OTP-6279: Guard needed so that function fails in a correct
-%% way for faulty input, i.e. function_clause.
+%% OTP-6279: Make sure illegal characters are rejected when decoding.
base64_otp_6279(Config) when is_list(Config) ->
- {'EXIT',{function_clause, _}} = (catch base64:decode("dGVzda==a")),
+ {'EXIT',_} = (catch base64:decode("dGVzda==a")),
ok.
%%-------------------------------------------------------------------------
%% Encode and decode big binaries.
@@ -115,48 +114,61 @@ big(Config) when is_list(Config) ->
%%-------------------------------------------------------------------------
%% Make sure illegal characters are rejected when decoding.
illegal(Config) when is_list(Config) ->
- {'EXIT',{function_clause, _}} = (catch base64:decode("()")),
+ %% A few samples with different error reasons. Nothing can be
+ %% assumed about the reason for the crash.
+ {'EXIT',_} = (catch base64:decode("()")),
+ {'EXIT',_} = (catch base64:decode(<<19:8,20:8,21:8,22:8>>)),
+ {'EXIT',_} = (catch base64:decode([19,20,21,22])),
+ {'EXIT',_} = (catch base64:decode_to_string(<<19:8,20:8,21:8,22:8>>)),
+ {'EXIT',_} = (catch base64:decode_to_string([19,20,21,22])),
ok.
%%-------------------------------------------------------------------------
%% mime_decode and mime_decode_to_string have different implementations
-%% so test both with the same input separately. Both functions have
-%% the same implementation for binary/string arguments.
+%% so test both with the same input separately.
%%
%% Test base64:mime_decode/1.
mime_decode(Config) when is_list(Config) ->
+ MimeDecode = fun(In) ->
+ Out = base64:mime_decode(In),
+ Out = base64:mime_decode(binary_to_list(In))
+ end,
%% Test correct padding
- <<"one">> = base64:mime_decode(<<"b25l">>),
- <<"on">> = base64:mime_decode(<<"b24=">>),
- <<"o">> = base64:mime_decode(<<"bw==">>),
+ <<"one">> = MimeDecode(<<"b25l">>),
+ <<"on">> = MimeDecode(<<"b24=">>),
+ <<"o">> = MimeDecode(<<"bw==">>),
%% Test 1 extra padding
- <<"one">> = base64:mime_decode(<<"b25l= =">>),
- <<"on">> = base64:mime_decode(<<"b24== =">>),
- <<"o">> = base64:mime_decode(<<"bw=== =">>),
+ <<"one">> = MimeDecode(<<"b25l= =">>),
+ <<"on">> = MimeDecode(<<"b24== =">>),
+ <<"o">> = MimeDecode(<<"bw=== =">>),
%% Test 2 extra padding
- <<"one">> = base64:mime_decode(<<"b25l===">>),
- <<"on">> = base64:mime_decode(<<"b24====">>),
- <<"o">> = base64:mime_decode(<<"bw=====">>),
+ <<"one">> = MimeDecode(<<"b25l===">>),
+ <<"on">> = MimeDecode(<<"b24====">>),
+ <<"o">> = MimeDecode(<<"bw=====">>),
%% Test misc embedded padding
- <<"one">> = base64:mime_decode(<<"b2=5l===">>),
- <<"on">> = base64:mime_decode(<<"b=24====">>),
- <<"o">> = base64:mime_decode(<<"b=w=====">>),
+ <<"one">> = MimeDecode(<<"b2=5l===">>),
+ <<"on">> = MimeDecode(<<"b=24====">>),
+ <<"o">> = MimeDecode(<<"b=w=====">>),
%% Test misc white space and illegals with embedded padding
- <<"one">> = base64:mime_decode(<<" b~2=\r\n5()l===">>),
- <<"on">> = base64:mime_decode(<<"\tb =2\"¤4=¤= ==">>),
- <<"o">> = base64:mime_decode(<<"\nb=w=====">>),
+ <<"one">> = MimeDecode(<<" b~2=\r\n5()l===">>),
+ <<"on">> = MimeDecode(<<"\tb =2\"¤4=¤= ==">>),
+ <<"o">> = MimeDecode(<<"\nb=w=====">>),
%% Two pads
<<"Aladdin:open sesame">> =
- base64:mime_decode("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
+ MimeDecode(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>),
%% One pad to ignore, followed by more text
- <<"Hello World!!">> = base64:mime_decode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
+ <<"Hello World!!">> = MimeDecode(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
%% No pad
<<"Aladdin:open sesam">> =
- base64:mime_decode("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"),
+ MimeDecode(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>),
%% Encoded base 64 strings may be divided by non base 64 chars.
%% In this cases whitespaces.
<<"0123456789!@#0^&*();:<>,. []{}">> =
- base64:mime_decode(
+ MimeDecode(
<<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
+ %% Zeroes
+ <<"012">> = MimeDecode(<<"\000M\000D\000E\000y=\000">>),
+ <<"o">> = MimeDecode(<<"bw==\000">>),
+ <<"o">> = MimeDecode(<<"bw=\000=">>),
ok.
%%-------------------------------------------------------------------------
@@ -165,39 +177,48 @@ mime_decode(Config) when is_list(Config) ->
%% Test base64:mime_decode_to_string/1.
mime_decode_to_string(Config) when is_list(Config) ->
+ MimeDecodeToString =
+ fun(In) ->
+ Out = base64:mime_decode_to_string(In),
+ Out = base64:mime_decode_to_string(binary_to_list(In))
+ end,
%% Test correct padding
- "one" = base64:mime_decode_to_string(<<"b25l">>),
- "on" = base64:mime_decode_to_string(<<"b24=">>),
- "o" = base64:mime_decode_to_string(<<"bw==">>),
+ "one" = MimeDecodeToString(<<"b25l">>),
+ "on" = MimeDecodeToString(<<"b24=">>),
+ "o" = MimeDecodeToString(<<"bw==">>),
%% Test 1 extra padding
- "one" = base64:mime_decode_to_string(<<"b25l= =">>),
- "on" = base64:mime_decode_to_string(<<"b24== =">>),
- "o" = base64:mime_decode_to_string(<<"bw=== =">>),
+ "one" = MimeDecodeToString(<<"b25l= =">>),
+ "on" = MimeDecodeToString(<<"b24== =">>),
+ "o" = MimeDecodeToString(<<"bw=== =">>),
%% Test 2 extra padding
- "one" = base64:mime_decode_to_string(<<"b25l===">>),
- "on" = base64:mime_decode_to_string(<<"b24====">>),
- "o" = base64:mime_decode_to_string(<<"bw=====">>),
+ "one" = MimeDecodeToString(<<"b25l===">>),
+ "on" = MimeDecodeToString(<<"b24====">>),
+ "o" = MimeDecodeToString(<<"bw=====">>),
%% Test misc embedded padding
- "one" = base64:mime_decode_to_string(<<"b2=5l===">>),
- "on" = base64:mime_decode_to_string(<<"b=24====">>),
- "o" = base64:mime_decode_to_string(<<"b=w=====">>),
+ "one" = MimeDecodeToString(<<"b2=5l===">>),
+ "on" = MimeDecodeToString(<<"b=24====">>),
+ "o" = MimeDecodeToString(<<"b=w=====">>),
%% Test misc white space and illegals with embedded padding
- "one" = base64:mime_decode_to_string(<<" b~2=\r\n5()l===">>),
- "on" = base64:mime_decode_to_string(<<"\tb =2\"¤4=¤= ==">>),
- "o" = base64:mime_decode_to_string(<<"\nb=w=====">>),
+ "one" = MimeDecodeToString(<<" b~2=\r\n5()l===">>),
+ "on" = MimeDecodeToString(<<"\tb =2\"¤4=¤= ==">>),
+ "o" = MimeDecodeToString(<<"\nb=w=====">>),
%% Two pads
"Aladdin:open sesame" =
- base64:mime_decode_to_string("QWxhZGRpbjpvc()GVuIHNlc2FtZQ=="),
+ MimeDecodeToString(<<"QWxhZGRpbjpvc()GVuIHNlc2FtZQ==">>),
%% One pad to ignore, followed by more text
- "Hello World!!" = base64:mime_decode_to_string(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
+ "Hello World!!" = MimeDecodeToString(<<"SGVsb)(G8gV29ybGQ=h IQ= =">>),
%% No pad
"Aladdin:open sesam" =
- base64:mime_decode_to_string("QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft"),
+ MimeDecodeToString(<<"QWxhZGRpbjpvcG¤\")(VuIHNlc2Ft">>),
%% Encoded base 64 strings may be divided by non base 64 chars.
%% In this cases whitespaces.
"0123456789!@#0^&*();:<>,. []{}" =
- base64:mime_decode_to_string(
+ MimeDecodeToString(
<<"MDEy MzQ1Njc4 \tOSFAIzBeJ \nio)(oKTs6 PD4sLi \r\nBbXXt9">>),
+ %% Zeroes
+ "012" = MimeDecodeToString(<<"\000M\000D\000E\000y=\000">>),
+ "o" = MimeDecodeToString(<<"bw==\000">>),
+ "o" = MimeDecodeToString(<<"bw=\000=">>),
ok.
%%-------------------------------------------------------------------------
diff --git a/lib/stdlib/test/erl_lint_SUITE.erl b/lib/stdlib/test/erl_lint_SUITE.erl
index b76bece07f..5efffc6a5c 100644
--- a/lib/stdlib/test/erl_lint_SUITE.erl
+++ b/lib/stdlib/test/erl_lint_SUITE.erl
@@ -66,7 +66,8 @@
otp_11851/1,otp_11879/1,otp_13230/1,
record_errors/1, otp_11879_cont/1,
non_latin1_module/1, otp_14323/1,
- get_stacktrace/1, otp_14285/1, otp_14378/1]).
+ get_stacktrace/1, stacktrace_syntax/1,
+ otp_14285/1, otp_14378/1]).
suite() ->
[{ct_hooks,[ts_install_cth]},
@@ -87,7 +88,7 @@ all() ->
maps, maps_type, maps_parallel_match,
otp_11851, otp_11879, otp_13230,
record_errors, otp_11879_cont, non_latin1_module, otp_14323,
- get_stacktrace, otp_14285, otp_14378].
+ get_stacktrace, stacktrace_syntax, otp_14285, otp_14378].
groups() ->
[{unused_vars_warn, [],
@@ -4129,6 +4130,40 @@ get_stacktrace(Config) ->
run(Config, Ts),
ok.
+stacktrace_syntax(Config) ->
+ Ts = [{guard,
+ <<"t1() ->
+ try error(foo)
+ catch _:_:Stk when is_number(Stk) -> ok
+ end.
+ ">>,
+ [],
+ {errors,[{3,erl_lint,{stacktrace_guard,'Stk'}}],[]}},
+ {bound,
+ <<"t1() ->
+ Stk = [],
+ try error(foo)
+ catch _:_:Stk -> ok
+ end.
+ ">>,
+ [],
+ {errors,[{4,erl_lint,{stacktrace_bound,'Stk'}}],[]}},
+ {guard_and_bound,
+ <<"t1() ->
+ Stk = [],
+ try error(foo)
+ catch _:_:Stk when is_integer(Stk) -> ok
+ end.
+ ">>,
+ [],
+ {errors,[{4,erl_lint,{stacktrace_bound,'Stk'}},
+ {4,erl_lint,{stacktrace_guard,'Stk'}}],[]}}
+ ],
+
+ run(Config, Ts),
+ ok.
+
+
%% Unicode atoms.
otp_14285(Config) ->
%% A small sample of all the errors and warnings in module erl_lint.
diff --git a/lib/stdlib/test/filelib_SUITE.erl b/lib/stdlib/test/filelib_SUITE.erl
index 1236fe45f4..930cea347f 100644
--- a/lib/stdlib/test/filelib_SUITE.erl
+++ b/lib/stdlib/test/filelib_SUITE.erl
@@ -33,6 +33,8 @@
-include_lib("common_test/include/ct.hrl").
-include_lib("kernel/include/file.hrl").
+-define(PRIM_FILE, prim_file).
+
init_per_testcase(_Case, Config) ->
Config.
@@ -446,10 +448,10 @@ wildcard_symlink(Config) when is_list(Config) ->
erl_prim_loader)),
["sub","symlink"] =
basenames(Dir, filelib:wildcard(filename:join(Dir, "*"),
- prim_file)),
+ ?PRIM_FILE)),
["symlink"] =
basenames(Dir, filelib:wildcard(filename:join(Dir, "symlink"),
- prim_file)),
+ ?PRIM_FILE)),
ok = file:delete(AFile),
%% The symlink should still be visible even when its target
%% has been deleted.
@@ -465,10 +467,10 @@ wildcard_symlink(Config) when is_list(Config) ->
erl_prim_loader)),
["sub","symlink"] =
basenames(Dir, filelib:wildcard(filename:join(Dir, "*"),
- prim_file)),
+ ?PRIM_FILE)),
["symlink"] =
basenames(Dir, filelib:wildcard(filename:join(Dir, "symlink"),
- prim_file)),
+ ?PRIM_FILE)),
ok
end.
@@ -497,17 +499,17 @@ is_file_symlink(Config) ->
ok ->
true = filelib:is_dir(DirAlias),
true = filelib:is_dir(DirAlias, erl_prim_loader),
- true = filelib:is_dir(DirAlias, prim_file),
+ true = filelib:is_dir(DirAlias, ?PRIM_FILE),
true = filelib:is_file(DirAlias),
true = filelib:is_file(DirAlias, erl_prim_loader),
- true = filelib:is_file(DirAlias, prim_file),
+ true = filelib:is_file(DirAlias, ?PRIM_FILE),
ok = file:make_symlink(AFile,FileAlias),
true = filelib:is_file(FileAlias),
true = filelib:is_file(FileAlias, erl_prim_loader),
- true = filelib:is_file(FileAlias, prim_file),
+ true = filelib:is_file(FileAlias, ?PRIM_FILE),
true = filelib:is_regular(FileAlias),
true = filelib:is_regular(FileAlias, erl_prim_loader),
- true = filelib:is_regular(FileAlias, prim_file),
+ true = filelib:is_regular(FileAlias, ?PRIM_FILE),
ok
end.
@@ -528,11 +530,11 @@ file_props_symlink(Config) ->
{_,_} = LastMod = filelib:last_modified(AFile),
LastMod = filelib:last_modified(Alias),
LastMod = filelib:last_modified(Alias, erl_prim_loader),
- LastMod = filelib:last_modified(Alias, prim_file),
+ LastMod = filelib:last_modified(Alias, ?PRIM_FILE),
FileSize = filelib:file_size(AFile),
FileSize = filelib:file_size(Alias),
FileSize = filelib:file_size(Alias, erl_prim_loader),
- FileSize = filelib:file_size(Alias, prim_file)
+ FileSize = filelib:file_size(Alias, ?PRIM_FILE)
end.
find_source(Config) when is_list(Config) ->
diff --git a/lib/stdlib/test/qlc_SUITE.erl b/lib/stdlib/test/qlc_SUITE.erl
index 949142ec77..8f8a0f6e73 100644
--- a/lib/stdlib/test/qlc_SUITE.erl
+++ b/lib/stdlib/test/qlc_SUITE.erl
@@ -1695,28 +1695,7 @@ sort(Config) when is_list(Config) ->
[true || I <- lists:seq(1, 50000), not ets:insert(E, {I, I})],
H = qlc:q([{X,Y} || X <- [a,b], Y <- qlc:sort(ets:table(E))]),
100000 = length(qlc:e(H)),
- ets:delete(E)">>,
-
- begin
- TmpDir = ?privdir,
- [<<"TE = process_flag(trap_exit, true),
- E = ets:new(foo, []),
- [true || I <- lists:seq(1, 50000), not ets:insert(E, {I, I})],
- Ports = erlang:ports(),
- H = qlc:q([{X,Y} || X <- [a,b],
- begin
- [P] = erlang:ports() -- Ports,
- exit(P, port_exit),
- true
- end,
- Y <- qlc:sort(ets:table(E),
- [{tmpdir,\"">>,
- TmpDir, <<"\"}])]),
- {error, qlc, {file_error, _, _}} = (catch qlc:e(H)),
- receive {'EXIT', _, port_exit} -> ok end,
- ets:delete(E),
- process_flag(trap_exit, TE)">>]
- end
+ ets:delete(E)">>
],
run(Config, Ts),
diff --git a/lib/stdlib/test/rand_SUITE.erl b/lib/stdlib/test/rand_SUITE.erl
index ef4f9faad9..3d3241b33d 100644
--- a/lib/stdlib/test/rand_SUITE.erl
+++ b/lib/stdlib/test/rand_SUITE.erl
@@ -29,6 +29,9 @@
basic_stats_uniform_1/1, basic_stats_uniform_2/1,
basic_stats_standard_normal/1,
basic_stats_normal/1,
+ stats_standard_normal_box_muller/1,
+ stats_standard_normal_box_muller_2/1,
+ stats_standard_normal/1,
uniform_real_conv/1,
plugin/1, measure/1,
reference_jump_state/1, reference_jump_procdict/1]).
@@ -57,7 +60,10 @@ all() ->
groups() ->
[{basic_stats, [parallel],
[basic_stats_uniform_1, basic_stats_uniform_2,
- basic_stats_standard_normal]},
+ basic_stats_standard_normal,
+ stats_standard_normal_box_muller,
+ stats_standard_normal_box_muller_2,
+ stats_standard_normal]},
{reference_jump, [parallel],
[reference_jump_state, reference_jump_procdict]}].
@@ -410,6 +416,206 @@ normal_s(Mean, Variance, State0) when Mean == 0, Variance == 1 ->
normal_s(Mean, Variance, State0) ->
rand:normal_s(Mean, Variance, State0).
+
+
+-dialyzer({no_improper_lists, stats_standard_normal_box_muller/1}).
+stats_standard_normal_box_muller(Config) when is_list(Config) ->
+ try math:erfc(1.0) of
+ _ ->
+ TwoPi = 2.0 * math:pi(),
+ NormalS =
+ fun
+ ([S0]) ->
+ {U1, S1} = rand:uniform_real_s(S0),
+ R = math:sqrt(-2.0 * math:log(U1)),
+ {U2, S2} = rand:uniform_s(S1),
+ T = TwoPi * U2,
+ Z0 = R * math:cos(T),
+ Z1 = R * math:sin(T),
+ {Z0, [S2|Z1]};
+ ([S|Z]) ->
+ {Z, [S]}
+ end,
+ State = [rand:seed(exrop)],
+ stats_standard_normal(NormalS, State)
+ catch error:_ ->
+ {skip, "math:erfc/1 not supported"}
+ end.
+
+-dialyzer({no_improper_lists, stats_standard_normal_box_muller_2/1}).
+stats_standard_normal_box_muller_2(Config) when is_list(Config) ->
+ try math:erfc(1.0) of
+ _ ->
+ TwoPi = 2.0 * math:pi(),
+ NormalS =
+ fun
+ ([S0]) ->
+ {U0, S1} = rand:uniform_s(S0),
+ U1 = 1.0 - U0,
+ R = math:sqrt(-2.0 * math:log(U1)),
+ {U2, S2} = rand:uniform_s(S1),
+ T = TwoPi * U2,
+ Z0 = R * math:cos(T),
+ Z1 = R * math:sin(T),
+ {Z0, [S2|Z1]};
+ ([S|Z]) ->
+ {Z, [S]}
+ end,
+ State = [rand:seed(exrop)],
+ stats_standard_normal(NormalS, State)
+ catch error:_ ->
+ {skip, "math:erfc/1 not supported"}
+ end.
+
+
+stats_standard_normal(Config) when is_list(Config) ->
+ try math:erfc(1.0) of
+ _ ->
+ stats_standard_normal(
+ fun rand:normal_s/1, rand:seed_s(exrop))
+ catch error:_ ->
+ {skip, "math:erfc/1 not supported"}
+ end.
+%%
+stats_standard_normal(Fun, S) ->
+%%%
+%%% ct config:
+%%% {rand_SUITE, [{stats_standard_normal,[{seconds, 8}, {std_devs, 4.2}]}]}.
+%%%
+ Seconds = ct:get_config({?MODULE, ?FUNCTION_NAME, seconds}, 8),
+ StdDevs =
+ ct:get_config(
+ {?MODULE, ?FUNCTION_NAME, std_devs},
+ 4.2), % probability erfc(4.2/sqrt(2)) (1/37465) to fail a bucket
+%%%
+ ct:timetrap({seconds, Seconds + 120}),
+ %% Buckets is chosen to get a range where the the probability to land
+ %% in the top catch-all bucket is not vanishingly low, but with
+ %% these values it is about 1/25 of the probability for the low bucket
+ %% (closest to 0).
+ %%
+ %% Rounds is calculated so the expected value for the low
+ %% bucket will be at least TargetHits.
+ %%
+ InvDelta = 512,
+ Buckets = 4 * InvDelta, % 4 std devs range
+ TargetHits = 1024,
+ Sqrt2 = math:sqrt(2.0),
+ W = InvDelta * Sqrt2,
+ P0 = math:erf(1 / W),
+ Rounds = TargetHits * ceil(1.0 / P0),
+ Histogram = array:new({default, 0}),
+ StopTime = erlang:monotonic_time(second) + Seconds,
+ ct:pal(
+ "Running standard normal test against ~w std devs for ~w seconds...",
+ [StdDevs, Seconds]),
+ {PositiveHistogram, NegativeHistogram, Outlier, TotalRounds} =
+ stats_standard_normal(
+ InvDelta, Buckets, Histogram, Histogram, 0.0,
+ Fun, S, Rounds, StopTime, Rounds, 0),
+ Precision = math:sqrt(TotalRounds * P0) / StdDevs,
+ TopP = math:erfc(Buckets / W),
+ TopPrecision = math:sqrt(TotalRounds * TopP) / StdDevs,
+ OutlierProbability = math:erfc(Outlier / Sqrt2) * TotalRounds,
+ InvOP = 1.0 / OutlierProbability,
+ ct:pal(
+ "Total rounds: ~w, tolerance: 1/~.2f..1/~.2f, "
+ "outlier: ~.2f, probability 1/~.2f.",
+ [TotalRounds, Precision, TopPrecision, Outlier, InvOP]),
+ {TotalRounds, [], []} =
+ {TotalRounds,
+ check_histogram(
+ W, TotalRounds, StdDevs, PositiveHistogram, Buckets),
+ check_histogram(
+ W, TotalRounds, StdDevs, NegativeHistogram, Buckets)},
+ %% If the probability for getting this Outlier is lower than 1/50,
+ %% then this is fishy!
+ true = (1/50 =< OutlierProbability),
+ {comment, {tp, TopPrecision, op, InvOP}}.
+%%
+stats_standard_normal(
+ InvDelta, Buckets, PositiveHistogram, NegativeHistogram, Outlier,
+ Fun, S, 0, StopTime, Rounds, TotalRounds) ->
+ case erlang:monotonic_time(second) of
+ Now when Now < StopTime ->
+ stats_standard_normal(
+ InvDelta, Buckets,
+ PositiveHistogram, NegativeHistogram, Outlier,
+ Fun, S, Rounds, StopTime, Rounds, TotalRounds + Rounds);
+ _ ->
+ {PositiveHistogram, NegativeHistogram,
+ Outlier, TotalRounds + Rounds}
+ end;
+stats_standard_normal(
+ InvDelta, Buckets, PositiveHistogram, NegativeHistogram, Outlier,
+ Fun, S, Count, StopTime, Rounds, TotalRounds) ->
+ case Fun(S) of
+ {X, NewS} when 0.0 =< X ->
+ Bucket = min(Buckets, floor(X * InvDelta)),
+ stats_standard_normal(
+ InvDelta, Buckets,
+ increment_bucket(Bucket, PositiveHistogram),
+ NegativeHistogram, max(Outlier, X),
+ Fun, NewS, Count - 1, StopTime, Rounds, TotalRounds);
+ {MinusX, NewS} ->
+ X = -MinusX,
+ Bucket = min(Buckets, floor(X * InvDelta)),
+ stats_standard_normal(
+ InvDelta, Buckets,
+ PositiveHistogram,
+ increment_bucket(Bucket, NegativeHistogram), max(Outlier, X),
+ Fun, NewS, Count - 1, StopTime, Rounds, TotalRounds)
+ end.
+
+increment_bucket(Bucket, Array) ->
+ array:set(Bucket, array:get(Bucket, Array) + 1, Array).
+
+check_histogram(W, Rounds, StdDevs, Histogram, Buckets) ->
+ %%PrevBucket = 512,
+ %%Bucket = PrevBucket - 1,
+ %%P = 0.5 * math:erfc(PrevBucket / W),
+ TargetP = 0.5 * math:erfc(Buckets / W),
+ P = 0.0,
+ N = 0,
+ check_histogram(
+ W, Rounds, StdDevs, Histogram, TargetP,
+ Buckets, Buckets, P, N).
+%%
+check_histogram(
+ _W, _Rounds, _StdDevs, _Histogram, _TargetP,
+ 0, _PrevBucket, _PrevP, _PrevN) ->
+ [];
+check_histogram(
+ W, Rounds, StdDevs, Histogram, TargetP,
+ Bucket, PrevBucket, PrevP, PrevN) ->
+ N = PrevN + array:get(Bucket, Histogram),
+ P = 0.5 * math:erfc(Bucket / W),
+ BucketP = P - PrevP,
+ if
+ TargetP =< BucketP ->
+ check_histogram(
+ W, Rounds, StdDevs, Histogram, TargetP,
+ Bucket - 1, PrevBucket, PrevP, N);
+ true ->
+ Exp = BucketP * Rounds,
+ Var = Rounds * BucketP*(1.0 - BucketP),
+ Threshold = StdDevs * math:sqrt(Var),
+ LowerLimit = floor(Exp - Threshold),
+ UpperLimit = ceil(Exp + Threshold),
+ if
+ N < LowerLimit; UpperLimit < N ->
+ [#{bucket => {Bucket, PrevBucket}, n => N, exp => Exp,
+ lower => LowerLimit, upper => UpperLimit} |
+ check_histogram(
+ W, Rounds, StdDevs, Histogram, TargetP,
+ Bucket - 1, Bucket, P, 0)];
+ true ->
+ check_histogram(
+ W, Rounds, StdDevs, Histogram, TargetP,
+ Bucket - 1, Bucket, P, 0)
+ end
+ end.
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% White box test of the conversion to float
diff --git a/lib/stdlib/test/stdlib_bench_SUITE.erl b/lib/stdlib/test/stdlib_bench_SUITE.erl
index 8670e7029c..2a9981bb9e 100644
--- a/lib/stdlib/test/stdlib_bench_SUITE.erl
+++ b/lib/stdlib/test/stdlib_bench_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2012-2016. All Rights Reserved.
+%% Copyright Ericsson AB 2012-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -28,13 +28,20 @@ suite() -> [{ct_hooks,[{ts_install_cth,[{nodenames,2}]}]}].
all() ->
- [{group,unicode}].
+ [{group,unicode}, {group,base64}].
groups() ->
[{unicode,[{repeat,5}],
[norm_nfc_list, norm_nfc_deep_l, norm_nfc_binary,
string_lexemes_list, string_lexemes_binary
- ]}].
+ ]},
+ {base64,[{repeat,5}],
+ [decode_binary, decode_binary_to_string,
+ decode_list, decode_list_to_string,
+ encode_binary, encode_binary_to_string,
+ encode_list, encode_list_to_string,
+ mime_binary_decode, mime_binary_decode_to_string,
+ mime_list_decode, mime_list_decode_to_string]}].
init_per_group(_GroupName, Config) ->
Config.
@@ -105,3 +112,97 @@ norm_data(Config) ->
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+decode_binary(_Config) ->
+ test(decode, encoded_binary()).
+
+decode_binary_to_string(_Config) ->
+ test(decode_to_string, encoded_binary()).
+
+decode_list(_Config) ->
+ test(decode, encoded_list()).
+
+decode_list_to_string(_Config) ->
+ test(decode_to_string, encoded_list()).
+
+encode_binary(_Config) ->
+ test(encode, binary()).
+
+encode_binary_to_string(_Config) ->
+ test(encode_to_string, binary()).
+
+encode_list(_Config) ->
+ test(encode, list()).
+
+encode_list_to_string(_Config) ->
+ test(encode_to_string, list()).
+
+mime_binary_decode(_Config) ->
+ test(mime_decode, encoded_binary()).
+
+mime_binary_decode_to_string(_Config) ->
+ test(mime_decode_to_string, encoded_binary()).
+
+mime_list_decode(_Config) ->
+ test(mime_decode, encoded_list()).
+
+mime_list_decode_to_string(_Config) ->
+ test(mime_decode_to_string, encoded_list()).
+
+-define(SIZE, 10000).
+-define(N, 1000).
+
+encoded_binary() ->
+ list_to_binary(encoded_list()).
+
+encoded_list() ->
+ L = random_byte_list(round(?SIZE*0.75)),
+ base64:encode_to_string(L).
+
+binary() ->
+ list_to_binary(list()).
+
+list() ->
+ random_byte_list(?SIZE).
+
+test(Func, Data) ->
+ F = fun() -> loop(?N, Func, Data) end,
+ {Time, ok} = timer:tc(fun() -> lspawn(F) end),
+ report_base64(Time).
+
+loop(0, _F, _D) -> garbage_collect(), ok;
+loop(N, F, D) ->
+ _ = base64:F(D),
+ loop(N - 1, F, D).
+
+lspawn(Fun) ->
+ {Pid, Ref} = spawn_monitor(fun() -> exit(Fun()) end),
+ receive
+ {'DOWN', Ref, process, Pid, Rep} -> Rep
+ end.
+
+report_base64(Time) ->
+ Tps = round((?N*1000000)/Time),
+ ct_event:notify(#event{name = benchmark_data,
+ data = [{suite, "stdlib_base64"},
+ {value, Tps}]}),
+ Tps.
+
+%% Copied from base64_SUITE.erl.
+
+random_byte_list(N) ->
+ random_byte_list(N, []).
+
+random_byte_list(0, Acc) ->
+ Acc;
+random_byte_list(N, Acc) ->
+ random_byte_list(N-1, [rand:uniform(255)|Acc]).
+
+make_big_binary(N) ->
+ list_to_binary(mbb(N, [])).
+
+mbb(N, Acc) when N > 256 ->
+ B = list_to_binary(lists:seq(0, 255)),
+ mbb(N - 256, [B | Acc]);
+mbb(N, Acc) ->
+ B = list_to_binary(lists:seq(0, N-1)),
+ lists:reverse(Acc, B).
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index 05f18ef238..d02a6eac0a 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -48,6 +48,7 @@
%% Run tests when debugging them
-export([debug/0, time_func/4]).
+-compile([nowarn_deprecated_function]).
suite() ->
[{ct_hooks,[ts_install_cth]},
@@ -92,14 +93,11 @@ end_per_testcase(_Case, _Config) ->
ok.
debug() ->
- Config = [{data_dir, ?MODULE_STRING++"_data"}],
+ Config = [{data_dir, "./" ++ ?MODULE_STRING++"_data"}],
[io:format("~p:~p~n",[Test,?MODULE:Test(Config)]) ||
{_,Tests} <- groups(), Test <- Tests].
-define(TEST(B,C,D), test(?LINE,?FUNCTION_NAME,B,C,D, true)).
--define(TEST_EQ(B,C,D),
- test(?LINE,?FUNCTION_NAME,B,C,D, true),
- test(?LINE,?FUNCTION_NAME,hd(C),[B|tl(C),D, true)).
-define(TEST_NN(B,C,D),
test(?LINE,?FUNCTION_NAME,B,C,D, false),
@@ -294,6 +292,7 @@ trim(_) ->
?TEST(["..h", ".e", <<"j..">>], [both, ". "], "h.ej"),
?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [both, ". "], "h.ejsan"),
%% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+ ?TEST([1013,101,778,101,101], [trailing, [101]], [1013,101,778]),
?TEST("aaåaa", [both, "a"], "å"),
?TEST(["aaa",778,"äöoo"], [both, "ao"], "åäö"),
?TEST([<<"aaa">>,778,"äöoo"], [both, "ao"], "åäö"),
@@ -353,6 +352,7 @@ take(_) ->
?TEST([<<>>,<<"..">>, " h.ej", <<" ..">>], [Chars, true, leading], {".. ", "h.ej .."}),
?TEST(["..h", <<".ejsa"/utf8>>, "n.."], [Chars, true, leading], {"..", "h.ejsan.."}),
%% Test that it behaves with graphemes (i.e. nfd tests are the hard part)
+ ?TEST([101,778], [[[101, 779]], true], {[101,778], []}),
?TEST(["aaee",778,"äöoo"], [[[$e,778]], true, leading], {"aae", [$e,778|"äöoo"]}),
?TEST([<<"aae">>,778,"äöoo"], [[[$e,778]],true,leading], {"aa", [$e,778|"äöoo"]}),
?TEST([<<"e">>,778,"åäöe", <<778/utf8>>], [[[$e,778]], true, leading], {[], [$e,778]++"åäöe"++[778]}),
@@ -713,29 +713,123 @@ nth_lexeme(_) ->
meas(Config) ->
+ Parent = self(),
+ Exec = fun() ->
+ DataDir0 = proplists:get_value(data_dir, Config),
+ DataDir = filename:join(lists:droplast(filename:split(DataDir0))),
+ case proplists:get_value(profile, Config, false) of
+ false ->
+ do_measure(DataDir);
+ eprof ->
+ eprof:profile(fun() -> do_measure(DataDir) end, [set_on_spawn]),
+ eprof:stop_profiling(),
+ eprof:analyze(),
+ eprof:stop()
+ end,
+ Parent ! {test_done, self()},
+ normal
+ end,
+ ct:timetrap({minutes,2}),
case ct:get_timetrap_info() of
{_,{_,Scale}} when Scale > 1 ->
{skip,{will_not_run_in_debug,Scale}};
- _ -> % No scaling
- DataDir = proplists:get_value(data_dir, Config),
- TestDir = filename:dirname(string:trim(DataDir, trailing, "/")),
- do_measure(TestDir)
+ _ -> % No scaling, run at most 1.5 min
+ Tester = spawn(Exec),
+ receive {test_done, Tester} -> ok
+ after 90000 ->
+ io:format("Timelimit reached stopping~n",[]),
+ exit(Tester, die)
+ end,
+ ok
end.
-do_measure(TestDir) ->
- File = filename:join(TestDir, ?MODULE_STRING ++ ".erl"),
+do_measure(DataDir) ->
+ File = filename:join([DataDir,"unicode_util_SUITE_data","NormalizationTest.txt"]),
io:format("File ~s ",[File]),
{ok, Bin} = file:read_file(File),
io:format("~p~n",[byte_size(Bin)]),
Do = fun(Name, Func, Mode) ->
- {N, Mean, Stddev, _} = time_func(Func, Mode, Bin, 50),
- io:format("~10w ~6w ~6.2fms ±~4.2fms #~.2w gc included~n",
+ {N, Mean, Stddev, _} = time_func(Func, Mode, Bin, 20),
+ io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
[Name, Mode, Mean/1000, Stddev/1000, N])
end,
+ Do2 = fun(Name, Func, Mode) ->
+ {N, Mean, Stddev, _} = time_func(Func, binary, <<>>, 20),
+ io:format("~15w ~6w ~6.2fms ±~5.2fms #~.2w gc included~n",
+ [Name, Mode, Mean/1000, Stddev/1000, N])
+ end,
io:format("----------------------~n"),
- Do(tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
+
+ Do(old_tokens, fun(Str) -> string:tokens(Str, [$\n,$\r]) end, list),
Tokens = {lexemes, fun(Str) -> string:lexemes(Str, [$\n,$\r]) end},
[Do(Name,Fun,Mode) || {Name,Fun} <- [Tokens], Mode <- [list, binary]],
+
+ S0 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....",
+ S0B = <<"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxy.....">>,
+ Do2(old_strip_l, repeat(fun() -> string:strip(S0, left, $x) end), list),
+ Do2(trim_l, repeat(fun() -> string:trim(S0, leading, [$x]) end), list),
+ Do2(trim_l, repeat(fun() -> string:trim(S0B, leading, [$x]) end), binary),
+ Do2(old_strip_r, repeat(fun() -> string:strip(S0, right, $.) end), list),
+ Do2(trim_t, repeat(fun() -> string:trim(S0, trailing, [$.]) end), list),
+ Do2(trim_t, repeat(fun() -> string:trim(S0B, trailing, [$.]) end), binary),
+
+ Do2(old_chr_sub, repeat(fun() -> string:sub_string(S0, string:chr(S0, $.)) end), list),
+ Do2(old_str_sub, repeat(fun() -> string:sub_string(S0, string:str(S0, [$.])) end), list),
+ Do2(find, repeat(fun() -> string:find(S0, [$.]) end), list),
+ Do2(find, repeat(fun() -> string:find(S0B, [$.]) end), binary),
+ Do2(old_str_sub2, repeat(fun() -> N = string:str(S0, "xy.."),
+ {string:sub_string(S0,1,N), string:sub_string(S0,N+4)} end), list),
+ Do2(split, repeat(fun() -> string:split(S0, "xy..") end), list),
+ Do2(split, repeat(fun() -> string:split(S0B, "xy..") end), binary),
+
+ Do2(old_rstr_sub, repeat(fun() -> string:sub_string(S0, string:rstr(S0, [$y])) end), list),
+ Do2(find_t, repeat(fun() -> string:find(S0, [$y], trailing) end), list),
+ Do2(find_t, repeat(fun() -> string:find(S0B, [$y], trailing) end), binary),
+ Do2(old_rstr_sub2, repeat(fun() -> N = string:rstr(S0, "y.."),
+ {string:sub_string(S0,1,N), string:sub_string(S0,N+3)} end), list),
+ Do2(split_t, repeat(fun() -> string:split(S0, "y..", trailing) end), list),
+ Do2(split_t, repeat(fun() -> string:split(S0B, "y..", trailing) end), binary),
+
+ Do2(old_span, repeat(fun() -> N=string:span(S0, [$x, $y]),
+ {string:sub_string(S0,1,N),string:sub_string(S0,N+1)}
+ end), list),
+ Do2(take, repeat(fun() -> string:take(S0, [$x, $y]) end), list),
+ Do2(take, repeat(fun() -> string:take(S0B, [$x, $y]) end), binary),
+
+ Do2(old_cspan, repeat(fun() -> N=string:cspan(S0, [$.,$y]),
+ {string:sub_string(S0,1,N),string:sub_string(S0,N+1)}
+ end), list),
+ Do2(take_c, repeat(fun() -> string:take(S0, [$.,$y], true) end), list),
+ Do2(take_c, repeat(fun() -> string:take(S0B, [$.,$y], true) end), binary),
+
+ Do2(old_substr, repeat(fun() -> string:substr(S0, 21, 15) end), list),
+ Do2(slice, repeat(fun() -> string:slice(S0, 20, 15) end), list),
+ Do2(slice, repeat(fun() -> string:slice(S0B, 20, 15) end), binary),
+
+ io:format("--~n",[]),
+ NthTokens = {nth_lexemes, fun(Str) -> string:nth_lexeme(Str, 18000, [$\n,$\r]) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [NthTokens], Mode <- [list, binary]],
+ Do2(take_t, repeat(fun() -> string:take(S0, [$.,$y], false, trailing) end), list),
+ Do2(take_t, repeat(fun() -> string:take(S0B, [$.,$y], false, trailing) end), binary),
+ Do2(take_tc, repeat(fun() -> string:take(S0, [$x], true, trailing) end), list),
+ Do2(take_tc, repeat(fun() -> string:take(S0B, [$x], true, trailing) end), binary),
+
+ Length = {length, fun(Str) -> string:length(Str) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [Length], Mode <- [list, binary]],
+
+ Reverse = {reverse, fun(Str) -> string:reverse(Str) end},
+ [Do(Name,Fun,Mode) || {Name,Fun} <- [Reverse], Mode <- [list, binary]],
+
+ ok.
+
+repeat(F) ->
+ fun(_) -> repeat_1(F,20000) end.
+
+repeat_1(F, N) when N > 0 ->
+ F(),
+ repeat_1(F, N-1);
+repeat_1(_, _) ->
+ erlang:garbage_collect(),
ok.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -865,8 +959,6 @@ check_types_1({list, _},{list, undefined}) ->
ok;
check_types_1({list, _},{list, codepoints}) ->
ok;
-check_types_1({list, _},{list, {list, codepoints}}) ->
- ok;
check_types_1({list, {list, _}},{list, {list, codepoints}}) ->
ok;
check_types_1(mixed,_) ->
diff --git a/lib/stdlib/test/unicode_util_SUITE.erl b/lib/stdlib/test/unicode_util_SUITE.erl
index 7dba0a2fd0..632d9ae6e6 100644
--- a/lib/stdlib/test/unicode_util_SUITE.erl
+++ b/lib/stdlib/test/unicode_util_SUITE.erl
@@ -312,12 +312,23 @@ get(_) ->
add_get_tests.
count(Config) ->
+ Parent = self(),
+ Exec = fun() ->
+ do_measure(Config),
+ Parent ! {test_done, self()}
+ end,
ct:timetrap({minutes,5}),
case ct:get_timetrap_info() of
- {_,{_,Scale}} ->
+ {_,{_,Scale}} when Scale > 1 ->
{skip,{measurments_skipped_debug,Scale}};
- _ -> % No scaling
- do_measure(Config)
+ _ -> % No scaling, run at most 2 min
+ Tester = spawn(Exec),
+ receive {test_done, Tester} -> ok
+ after 120000 ->
+ io:format("Timelimit reached stopping~n",[]),
+ exit(Tester, die)
+ end,
+ ok
end.
do_measure(Config) ->
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index c625da56c6..fef356355c 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -38,7 +38,10 @@
recompose_query/1, recompose_parse_query/1,
recompose_path/1, recompose_parse_path/1,
recompose_autogen/1, parse_recompose_autogen/1,
- transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1
+ transcode_basic/1, transcode_options/1, transcode_mixed/1, transcode_negative/1,
+ compose_query/1, compose_query_latin1/1, compose_query_negative/1,
+ dissect_query/1, dissect_query_negative/1,
+ interop_query_latin1/1, interop_query_utf8/1
]).
@@ -107,7 +110,14 @@ all() ->
transcode_basic,
transcode_options,
transcode_mixed,
- transcode_negative
+ transcode_negative,
+ compose_query,
+ compose_query_latin1,
+ compose_query_negative,
+ dissect_query,
+ dissect_query_negative,
+ interop_query_latin1,
+ interop_query_utf8
].
groups() ->
@@ -823,6 +833,65 @@ transcode_negative(_Config) ->
{error,invalid_input,<<"ö">>} =
uri_string:transcode("foo%F6bar", [{in_encoding, utf8},{out_encoding, utf8}]).
+compose_query(_Config) ->
+ [] = uri_string:compose_query([]),
+ "foo=1&bar=2" = uri_string:compose_query([{<<"foo">>,"1"}, {"bar", "2"}]),
+ "foo=1&b%C3%A4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,utf8}]),
+ "foo=1&b%C3%A4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,unicode}]),
+ "foo=1&b%E4r=2" = uri_string:compose_query([{"foo","1"}, {"bär", "2"}],[{encoding,latin1}]),
+ "foo+bar=1&%E5%90%88=2" = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
+ "foo+bar=1&%26%2321512%3B=2" =
+ uri_string:compose_query([{"foo bar","1"}, {"合", "2"}],[{encoding,latin1}]),
+ "foo+bar=1&%C3%B6=2" = uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {"ö", <<"2">>}]),
+ <<"foo+bar=1&%C3%B6=2">> =
+ uri_string:compose_query([{<<"foo bar">>,<<"1">>}, {<<"ö"/utf8>>, <<"2">>}]).
+
+compose_query_latin1(_Config) ->
+ Q = uri_string:compose_query([{"合foö bar","1"}, {"合", "合"}],[{encoding,latin1}]),
+ Q1 = uri_string:transcode(Q, [{in_encoding, latin1}]),
+ [{"合foö bar","1"}, {"合", "合"}] = uri_string:dissect_query(Q1),
+ Q2 = uri_string:compose_query([{<<"合foö bar"/utf8>>,<<"1">>}, {<<"合"/utf8>>, <<"合"/utf8>>}],
+ [{encoding,latin1}]),
+ Q3 = uri_string:transcode(Q2, [{in_encoding, latin1}]),
+ [{<<"合foö bar"/utf8>>,<<"1">>}, {<<"合"/utf8>>, <<"合"/utf8>>}] =
+ uri_string:dissect_query(Q3).
+
+compose_query_negative(_Config) ->
+ {error,invalid_input,4} = uri_string:compose_query([{"",4}]),
+ {error,invalid_input,5} = uri_string:compose_query([{5,""}]),
+ {error,invalid_encoding,utf16} =
+ uri_string:compose_query([{"foo bar","1"}, {<<"ö">>, "2"}],[{encoding,utf16}]).
+
+dissect_query(_Config) ->
+ [] = uri_string:dissect_query(""),
+ [{"foo","1"}, {"amp;bar", "2"}] = uri_string:dissect_query("foo=1&amp;bar=2"),
+ [{"foo","1"}, {"bar", "2"}] = uri_string:dissect_query("foo=1&bar=2"),
+ [{"foo","1;bar=2"}] = uri_string:dissect_query("foo=1;bar=2"),
+ [{"foo","1"}, {"bar", "222"}] = uri_string:dissect_query([<<"foo=1&bar=2">>,"22"]),
+ [{"foo","ö"}, {"bar", "2"}] = uri_string:dissect_query("foo=%C3%B6&bar=2"),
+ [{<<"foo">>,<<"ö"/utf8>>}, {<<"bar">>, <<"2">>}] =
+ uri_string:dissect_query(<<"foo=%C3%B6&bar=2">>),
+ [{"foo bar","1"},{"ö","2"}] =
+ uri_string:dissect_query([<<"foo+bar=1&">>,<<"%C3%B6=2">>]),
+ [{"foo bar","1"},{[21512],"2"}] =
+ uri_string:dissect_query("foo+bar=1&%26%2321512%3B=2"),
+ [{<<"foo bar">>,<<"1">>},{<<"合"/utf8>>,<<"2">>}] =
+ uri_string:dissect_query(<<"foo+bar=1&%26%2321512%3B=2">>),
+ [{"föo bar","1"},{"ö","2"}] =
+ uri_string:dissect_query("föo+bar=1&%C3%B6=2"),
+ [{<<"föo bar"/utf8>>,<<"1">>},{<<"ö"/utf8>>,<<"2">>}] =
+ uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2"/utf8>>).
+
+dissect_query_negative(_Config) ->
+ {error,missing_value,"&"} =
+ uri_string:dissect_query("foo1&bar=2"),
+ {error,invalid_percent_encoding,"%XX%B6"} = uri_string:dissect_query("foo=%XX%B6&amp;bar=2"),
+ {error,invalid_input,[153]} =
+ uri_string:dissect_query("foo=%99%B6&amp;bar=2"),
+ {error,invalid_character,"ö"} = uri_string:dissect_query(<<"föo+bar=1&%C3%B6=2">>),
+ {error,invalid_input,<<"ö">>} =
+ uri_string:dissect_query([<<"foo+bar=1&amp;">>,<<"%C3%B6=2ö">>]).
+
normalize(_Config) ->
"/a/g" = uri_string:normalize("/a/b/c/./../../g"),
<<"mid/6">> = uri_string:normalize(<<"mid/content=5/../6">>),
@@ -842,3 +911,16 @@ normalize(_Config) ->
uri_string:normalize(<<"sftp://localhost:22">>),
<<"tftp://localhost">> =
uri_string:normalize(<<"tftp://localhost:69">>).
+
+interop_query_utf8(_Config) ->
+ Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
+ Uri = uri_string:recompose(#{path => "/", query => Q}),
+ #{query := Q1} = uri_string:parse(Uri),
+ [{"foo bar","1"}, {"合", "2"}] = uri_string:dissect_query(Q1).
+
+interop_query_latin1(_Config) ->
+ Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}], [{encoding,latin1}]),
+ Uri = uri_string:recompose(#{path => "/", query => Q}),
+ Uri1 = uri_string:transcode(Uri, [{in_encoding, latin1}]),
+ #{query := Q1} = uri_string:parse(Uri1),
+ [{"foo bar","1"}, {"合", "2"}] = uri_string:dissect_query(Q1).
diff --git a/lib/stdlib/uc_spec/gen_unicode_mod.escript b/lib/stdlib/uc_spec/gen_unicode_mod.escript
index 674e5a0628..fe5a860d45 100755
--- a/lib/stdlib/uc_spec/gen_unicode_mod.escript
+++ b/lib/stdlib/uc_spec/gen_unicode_mod.escript
@@ -170,7 +170,7 @@ gen_header(Fd) ->
io:put_chars(Fd, "-export([spec_version/0, lookup/1, get_case/1]).\n"),
io:put_chars(Fd, "-inline([class/1]).\n"),
io:put_chars(Fd, "-compile(nowarn_unused_vars).\n"),
- io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc_prepend/2, gc_e_cont/2]}).\n"),
+ io:put_chars(Fd, "-dialyzer({no_improper_lists, [cp/1, gc/1, gc_prepend/2, gc_e_cont/2]}).\n"),
io:put_chars(Fd, "-type gc() :: char()|[char()].\n\n\n"),
ok.
@@ -240,7 +240,7 @@ gen_norm(Fd) ->
"-spec nfd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfd(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [decompose(GC)|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -250,7 +250,7 @@ gen_norm(Fd) ->
"-spec nfkd(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfkd(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [decompose_compat(GC)|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -260,7 +260,7 @@ gen_norm(Fd) ->
"-spec nfc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfc(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 255 -> [GC|R];\n"
+ " [GC|R] when GC < 256 -> [GC|R];\n"
" [GC|Str] -> [compose(decompose(GC))|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -270,7 +270,7 @@ gen_norm(Fd) ->
"-spec nfkc(unicode:chardata()) -> maybe_improper_list(gc(),unicode:chardata()) | {error, unicode:chardata()}.\n"
"nfkc(Str0) ->\n"
" case gc(Str0) of\n"
- " [GC|R] when GC < 127 -> [GC|R];\n"
+ " [GC|R] when GC < 128 -> [GC|R];\n"
" [GC|Str] -> [compose_compat_0(decompose_compat(GC))|Str];\n"
" [] -> [];\n"
" {error,_}=Error -> Error\n end.\n\n"
@@ -476,13 +476,30 @@ gen_gc(Fd, GBP) ->
"-spec gc(String::unicode:chardata()) ->"
" maybe_improper_list() | {error, unicode:chardata()}.\n"),
io:put_chars(Fd,
+ "gc([CP1, CP2|_]=T)\n"
+ " when CP1 < 256, CP2 < 256, CP1 =/= $\r -> %% Ascii Fast path\n"
+ " T;\n"
+ "gc(<<CP1/utf8, Rest/binary>>) ->\n"
+ " if CP1 < 256, CP1 =/= $\r ->\n"
+ " case Rest of\n"
+ " <<CP2/utf8, _/binary>> when CP2 < 256 -> %% Ascii Fast path\n"
+ " [CP1|Rest];\n"
+ " _ -> gc_1([CP1|Rest])\n"
+ " end;\n"
+ " true -> gc_1([CP1|Rest])\n"
+ " end;\n"
"gc(Str) ->\n"
" gc_1(cp(Str)).\n\n"
"gc_1([$\\r|R0] = R) ->\n"
" case cp(R0) of % Don't break CRLF\n"
" [$\\n|R1] -> [[$\\r,$\\n]|R1];\n"
" _ -> R\n"
- " end;\n"),
+ " end;\n"
+ %% "gc_1([CP1, CP2|_]=T) when CP1 < 256, CP2 < 256 ->\n"
+ %% " T; %% Fast path\n"
+ %% "gc_1([CP1|<<CP2/utf8, _/binary>>]=T) when CP1 < 256, CP2 < 256 ->\n"
+ %% " T; %% Fast path\n"
+ ),
io:put_chars(Fd, "%% Handle control\n"),
GenControl = fun(Range) -> io:format(Fd, "gc_1~s R0;\n", [gen_clause(Range)]) end,
@@ -490,7 +507,7 @@ gen_gc(Fd, GBP) ->
[R1,R2,R3|Crs] = CRs0,
[GenControl(CP) || CP <- merge_ranges([R1,R2,R3], split), CP =/= {$\r, undefined}],
%%GenControl(R1),GenControl(R2),GenControl(R3),
- io:format(Fd, "gc_1([CP|R]) when CP < 255 -> gc_extend(R,CP);\n", []),
+ io:format(Fd, "gc_1([CP|R]) when CP < 256 -> gc_extend(R,CP);\n", []),
[GenControl(CP) || CP <- Crs],
%% One clause per CP
%% CRs0 = merge_ranges(maps:get(cr, GBP) ++ maps:get(lf, GBP) ++ maps:get(control, GBP)),
diff --git a/lib/stdlib/vsn.mk b/lib/stdlib/vsn.mk
index 48db5dc900..69d258c2f0 100644
--- a/lib/stdlib/vsn.mk
+++ b/lib/stdlib/vsn.mk
@@ -1 +1 @@
-STDLIB_VSN = 3.4.2
+STDLIB_VSN = 3.4.3