aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/erl_tar.xml2
-rw-r--r--lib/stdlib/doc/src/io.xml95
-rw-r--r--lib/stdlib/doc/src/timer.xml2
-rw-r--r--lib/stdlib/doc/src/uri_string.xml57
-rw-r--r--lib/stdlib/src/binary.erl28
-rw-r--r--lib/stdlib/src/erl_lint.erl8
-rw-r--r--lib/stdlib/src/io_lib.erl58
-rw-r--r--lib/stdlib/src/io_lib_format.erl43
-rw-r--r--lib/stdlib/src/string.erl8
-rw-r--r--lib/stdlib/src/uri_string.erl65
-rw-r--r--lib/stdlib/test/io_SUITE.erl87
-rw-r--r--lib/stdlib/test/re_SUITE.erl15
-rw-r--r--lib/stdlib/test/string_SUITE.erl4
-rw-r--r--lib/stdlib/test/uri_string_SUITE.erl55
14 files changed, 389 insertions, 138 deletions
diff --git a/lib/stdlib/doc/src/erl_tar.xml b/lib/stdlib/doc/src/erl_tar.xml
index caf8f4a96d..14c543ee2b 100644
--- a/lib/stdlib/doc/src/erl_tar.xml
+++ b/lib/stdlib/doc/src/erl_tar.xml
@@ -417,7 +417,7 @@
<v>Reason = term()</v>
</type>
<desc>
- <p>Cconverts an error reason term to a human-readable error message
+ <p>Converts an error reason term to a human-readable error message
string.</p>
</desc>
</func>
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml
index 72c774e6ef..f1037ec76b 100644
--- a/lib/stdlib/doc/src/io.xml
+++ b/lib/stdlib/doc/src/io.xml
@@ -137,11 +137,11 @@
Hello world!
ok</pre>
<p>The general format of a control sequence is <c>~F.P.PadModC</c>.</p>
- <p>Character <c>C</c> determines the type of control sequence
- to be used, <c>F</c> and <c>P</c> are optional numeric
- arguments. If <c>F</c>, <c>P</c>, or <c>Pad</c> is <c>*</c>,
- the next argument in <c>Data</c> is used as the numeric value
- of <c>F</c> or <c>P</c>.</p>
+ <p>The character <c>C</c> determines the type of control sequence
+ to be used. It is the only required field. All of <c>F</c>,
+ <c>P</c>, <c>Pad</c>, and <c>Mod</c> are optional. For example,
+ to use a <c>#</c> for <c>Pad</c> but use the default values for
+ <c>F</c> and <c>P</c>, you can write <c>~..#C</c>.</p>
<list type="bulleted">
<item>
<p><c>F</c> is the <c>field width</c> of the printed argument. A
@@ -167,13 +167,26 @@ ok</pre>
The default padding character is <c>' '</c> (space).</p>
</item>
<item>
- <p><c>Mod</c> is the control sequence modifier. It is either a
- single character (<c>t</c>, for Unicode
- translation, and <c>l</c>, for stopping <c>p</c> and
- <c>P</c> from detecting printable characters)
- that changes the interpretation of <c>Data</c>.</p>
+ <p><c>Mod</c> is the control sequence modifier. This is
+ one or more characters that change the interpretation of
+ <c>Data</c>. The current modifiers are <c>t</c>, for Unicode
+ translation, and <c>l</c>, for stopping <c>p</c> and <c>P</c>
+ from detecting printable characters.</p>
</item>
</list>
+ <p>If <c>F</c>, <c>P</c>, or <c>Pad</c> is a <c>*</c> character,
+ the next argument in <c>Data</c> is used as the value.
+ For example:</p>
+ <pre>
+1> <input>io:fwrite("~*.*.0f~n",[9, 5, 3.14159265]).</input>
+003.14159
+ok</pre>
+ <p>To use a literal <c>*</c> character as <c>Pad</c>, it must be
+ passed as an argument:</p>
+ <pre>
+2> <input>io:fwrite("~*.*.*f~n",[9, 5, $*, 3.14159265]).</input>
+**3.14159
+ok</pre>
<p><em>Available control sequences:</em></p>
<taglist>
<tag><c>~</c></tag>
@@ -277,10 +290,9 @@ ok
<c>~w</c>, but breaks terms whose printed representation
is longer than one line into many lines and indents each
line sensibly. Left-justification is not supported.
- It also tries to detect lists of
- printable characters and to output these as strings. The
- Unicode translation modifier is used for determining
- what characters are printable, for example:</p>
+ It also tries to detect flat lists of
+ printable characters and output these as strings.
+ For example:</p>
<pre>
1> <input>T = [{attributes,[[{id,age,1.50000},{mode,explicit},</input>
<input>{typename,"INTEGER"}], [{id,cho},{mode,explicit},{typename,'Cho'}]]},</input>
@@ -302,7 +314,7 @@ ok
{mode,implicit}]
ok</pre>
<p>The field width specifies the maximum line length.
- Defaults to 80. The precision specifies the initial
+ It defaults to 80. The precision specifies the initial
indentation of the term. It defaults to the number of
characters printed on this line in the <em>same</em> call to
<seealso marker="#write/1"><c>write/1</c></seealso> or
@@ -332,18 +344,53 @@ ok
[{a,[97]},
{b,[98]}]
ok</pre>
- <p>Binaries that look like UTF-8 encoded strings are
- output with the string syntax if the Unicode translation
- modifier is specified:</p>
+ <p>The Unicode translation modifier <c>t</c> specifies how to treat
+ characters outside the Latin-1 range of codepoints, in
+ atoms, strings, and binaries. For example, printing an atom
+ containing a character &gt; 255:</p>
+ <pre>
+8> <input>io:fwrite("~p~n",[list_to_atom([1024])]).</input>
+'\x{400}'
+ok
+9> <input>io:fwrite("~tp~n",[list_to_atom([1024])]).</input>
+'Ѐ'
+ok</pre>
+ <p>By default, Erlang only detects lists of characters
+ in the Latin-1 range as strings, but the <c>+pc unicode</c>
+ flag can be used to change this (see <seealso
+ marker="#printable_range/0">
+ <c>printable_range/0</c></seealso> for details). For example:</p>
+ <pre>
+10> <input>io:fwrite("~p~n",[[214]]).</input>
+"Ö"
+ok
+11> <input>io:fwrite("~p~n",[[1024]]).</input>
+[1024]
+ok
+12> <input>io:fwrite("~tp~n",[[1024]]).</input>
+[1024]
+ok
+</pre>
+ <p>but if Erlang was started with <c>+pc unicode</c>:</p>
<pre>
-9> <input>io:fwrite("~p~n",[[1024]]).</input>
+13> <input>io:fwrite("~p~n",[[1024]]).</input>
[1024]
-10> <input>io:fwrite("~tp~n",[[1024]]).</input>
-"\x{400}"
-11> <input>io:fwrite("~tp~n", [&lt;&lt;128,128&gt;&gt;]).</input>
+ok
+14> <input>io:fwrite("~tp~n",[[1024]]).</input>
+"Ѐ"
+ok</pre>
+ <p>Similarly, binaries that look like UTF-8 encoded strings
+ are output with the binary string syntax if the <c>t</c>
+ modifier is specified:</p>
+ <pre>
+15> <input>io:fwrite("~p~n", [&lt;&lt;208,128&gt;&gt;]).</input>
+&lt;&lt;208,128&gt;&gt;
+ok
+16> <input>io:fwrite("~tp~n", [&lt;&lt;208,128&gt;&gt;]).</input>
+&lt;&lt;"Ѐ"/utf8&gt;&gt;
+ok
+17> <input>io:fwrite("~tp~n", [&lt;&lt;128,128&gt;&gt;]).</input>
&lt;&lt;128,128&gt;&gt;
-12> <input>io:fwrite("~tp~n", [&lt;&lt;208,128&gt;&gt;]).</input>
-&lt;&lt;"\x{400}"/utf8&gt;&gt;
ok</pre>
</item>
<tag><c>W</c></tag>
diff --git a/lib/stdlib/doc/src/timer.xml b/lib/stdlib/doc/src/timer.xml
index fcaccdb2cb..350847bf7d 100644
--- a/lib/stdlib/doc/src/timer.xml
+++ b/lib/stdlib/doc/src/timer.xml
@@ -270,7 +270,7 @@
<item>
<p>Evaluates <c>apply(<anno>Module</anno>, <anno>Function</anno>,
<anno>Arguments</anno>)</c> and measures the elapsed real time as
- reported by <seealso marker="os:timestamp/0">
+ reported by <seealso marker="kernel:os#timestamp/0">
<c>os:timestamp/0</c></seealso>.</p>
<p>Returns <c>{<anno>Time</anno>, <anno>Value</anno>}</c>, where
<c><anno>Time</anno></c> is the elapsed real time in
diff --git a/lib/stdlib/doc/src/uri_string.xml b/lib/stdlib/doc/src/uri_string.xml
index 21f470e763..88d4600611 100644
--- a/lib/stdlib/doc/src/uri_string.xml
+++ b/lib/stdlib/doc/src/uri_string.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>2017</year><year>2017</year>
+ <year>2017</year><year>2018</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -24,7 +24,7 @@
<title>uri_string</title>
<prepared>Péter Dimitrov</prepared>
<docno>1</docno>
- <date>2017-10-24</date>
+ <date>2018-02-07</date>
<rev>A</rev>
</header>
<module>uri_string</module>
@@ -32,7 +32,11 @@
<description>
<p>This module contains functions for parsing and handling URIs
(<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>) and
- form-urlencoded query strings (<url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>).
+ form-urlencoded query strings (<url href="https://www.w3.org/TR/html52/">HTML 5.2</url>).
+ </p>
+ <p>
+ Parsing and serializing non-UTF-8 form-urlencoded query strings are also supported
+ (<url href="https://www.w3.org/TR/html50/">HTML 5.0</url>).
</p>
<p>A URI is an identifier consisting of a sequence of characters matching the syntax
rule named <em>URI</em> in <url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.
@@ -70,7 +74,8 @@
<seealso marker="#transcode/2"><c>transcode/2</c></seealso>
</item>
<item>Transforming URIs into a normalized form<br></br>
- <seealso marker="#normalize/1"><c>normalize/1</c></seealso>
+ <seealso marker="#normalize/1"><c>normalize/1</c></seealso><br></br>
+ <seealso marker="#normalize/2"><c>normalize/2</c></seealso>
</item>
<item>Composing form-urlencoded query strings from a list of key-value pairs<br></br>
<seealso marker="#compose_query/1"><c>compose_query/1</c></seealso><br></br>
@@ -151,8 +156,10 @@
<p>Composes a form-urlencoded <c><anno>QueryString</anno></c> based on a
<c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
Form-urlencoding is defined in section
- 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- specification.
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
</p>
<p>See also the opposite operation <seealso marker="#dissect_query/1">
<c>dissect_query/1</c></seealso>.
@@ -209,12 +216,11 @@
<p>Dissects an urlencoded <c><anno>QueryString</anno></c> and returns a
<c><anno>QueryList</anno></c>, a list of non-percent-encoded key-value pairs.
Form-urlencoding is defined in section
- 4.10.22.6 of the <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- specification.
+ 4.10.21.6 of the <url href="https://www.w3.org/TR/html52/">HTML 5.2</url>
+ specification and in section 4.10.22.6 of the
+ <url href="https://www.w3.org/TR/html50/">HTML 5.0</url> specification for
+ non-UTF-8 encodings.
</p>
- <p>It is not as strict for its input as the decoding algorithm defined by
- <url href="https://www.w3.org/TR/html5/forms.html">HTML5</url>
- and accepts all unicode characters.</p>
<p>See also the opposite operation <seealso marker="#compose_query/1">
<c>compose_query/1</c></seealso>.
</p>
@@ -233,7 +239,7 @@
<name name="normalize" arity="1"/>
<fsummary>Syntax-based normalization.</fsummary>
<desc>
- <p>Transforms <c><anno>URIString</anno></c> into a normalized form
+ <p>Transforms an <c><anno>URI</anno></c> into a normalized form
using Syntax-Based Normalization as defined by
<url href="https://www.ietf.org/rfc/rfc3986.txt">RFC 3986</url>.</p>
<p>This function implements case normalization, percent-encoding
@@ -247,6 +253,33 @@
<![CDATA[<<"mid/6">>]]>
3> uri_string:normalize("http://localhost:80").
"https://localhost/"
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}).
+"http://localhost-%C3%B6rebro/a/g"
+ </pre>
+ </desc>
+ </func>
+
+ <func>
+ <name name="normalize" arity="2"/>
+ <fsummary>Syntax-based normalization.</fsummary>
+ <desc>
+ <p>Same as <c>normalize/1</c> but with an additional
+ <c><anno>Options</anno></c> parameter, that controls if the normalized URI
+ shall be returned as an uri_map().
+ There is one supported option: <c>return_map</c>.
+ </p>
+ <p><em>Example:</em></p>
+ <pre>
+1> <input>uri_string:normalize("/a/b/c/./../../g", [return_map]).</input>
+#{path => "/a/g"}
+2> <![CDATA[uri_string:normalize(<<"mid/content=5/../6">>, [return_map]).]]>
+<![CDATA[#{path => <<"mid/6">>}]]>
+3> uri_string:normalize("http://localhost:80", [return_map]).
+#{scheme => "http",path => "/",host => "localhost"}
+4> <input>uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",</input>
+4> host => "localhost-örebro"}, [return_map]).
+#{scheme => "http",path => "/a/g",host => "localhost-örebro"}
</pre>
</desc>
</func>
diff --git a/lib/stdlib/src/binary.erl b/lib/stdlib/src/binary.erl
index 6a64133b45..7d0e42489e 100644
--- a/lib/stdlib/src/binary.erl
+++ b/lib/stdlib/src/binary.erl
@@ -47,23 +47,39 @@ at(_, _) ->
-spec bin_to_list(Subject) -> [byte()] when
Subject :: binary().
-bin_to_list(_) ->
- erlang:nif_error(undef).
+bin_to_list(Subject) ->
+ binary_to_list(Subject).
-spec bin_to_list(Subject, PosLen) -> [byte()] when
Subject :: binary(),
PosLen :: part().
-bin_to_list(_, _) ->
- erlang:nif_error(undef).
+bin_to_list(Subject, {Pos, Len}) ->
+ bin_to_list(Subject, Pos, Len);
+bin_to_list(_Subject, _BadArg) ->
+ erlang:error(badarg).
-spec bin_to_list(Subject, Pos, Len) -> [byte()] when
Subject :: binary(),
Pos :: non_neg_integer(),
Len :: integer().
-bin_to_list(_, _, _) ->
- erlang:nif_error(undef).
+bin_to_list(Subject, Pos, Len) when not is_binary(Subject);
+ not is_integer(Pos);
+ not is_integer(Len) ->
+ %% binary_to_list/3 allows bitstrings as long as the slice fits, and we
+ %% want to badarg when Pos/Len aren't integers instead of raising badarith
+ %% when adjusting args for binary_to_list/3.
+ erlang:error(badarg);
+bin_to_list(Subject, Pos, 0) when Pos >= 0, Pos =< byte_size(Subject) ->
+ %% binary_to_list/3 doesn't handle this case.
+ [];
+bin_to_list(_Subject, _Pos, 0) ->
+ erlang:error(badarg);
+bin_to_list(Subject, Pos, Len) when Len < 0 ->
+ bin_to_list(Subject, Pos + Len, -Len);
+bin_to_list(Subject, Pos, Len) when Len > 0 ->
+ binary_to_list(Subject, Pos + 1, Pos + Len).
-spec compile_pattern(Pattern) -> cp() when
Pattern :: binary() | [binary()].
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 1930c462e8..9a62d21d34 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3971,6 +3971,8 @@ extract_sequence(3, [$.,_|Fmt], Need) ->
extract_sequence(4, Fmt, Need);
extract_sequence(3, Fmt, Need) ->
extract_sequence(4, Fmt, Need);
+extract_sequence(4, [$t, $l | Fmt], Need) ->
+ extract_sequence(4, [$l, $t | Fmt], Need);
extract_sequence(4, [$t, $c | Fmt], Need) ->
extract_sequence(5, [$c|Fmt], Need);
extract_sequence(4, [$t, $s | Fmt], Need) ->
@@ -3987,8 +3989,14 @@ extract_sequence(4, [$t, C | _Fmt], _Need) ->
{error,"invalid control ~t" ++ [C]};
extract_sequence(4, [$l, $p | Fmt], Need) ->
extract_sequence(5, [$p|Fmt], Need);
+extract_sequence(4, [$l, $t, $p | Fmt], Need) ->
+ extract_sequence(5, [$p|Fmt], Need);
extract_sequence(4, [$l, $P | Fmt], Need) ->
extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$l, $t, $P | Fmt], Need) ->
+ extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$l, $t, C | _Fmt], _Need) ->
+ {error,"invalid control ~lt" ++ [C]};
extract_sequence(4, [$l, C | _Fmt], _Need) ->
{error,"invalid control ~l" ++ [C]};
extract_sequence(4, Fmt, Need) ->
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index cacd9f2524..e37c13093b 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -931,7 +931,7 @@ limit_term(Term, Depth) ->
limit(_, 0) -> '...';
limit([H|T]=L, D) ->
if
- D =:= 1 -> '...';
+ D =:= 1 -> ['...'];
true ->
case printable_list(L) of
true -> L;
@@ -944,7 +944,7 @@ limit(Term, D) when is_map(Term) ->
limit({}=T, _D) -> T;
limit(T, D) when is_tuple(T) ->
if
- D =:= 1 -> '...';
+ D =:= 1 -> {'...'};
true ->
list_to_tuple([limit(element(1, T), D-1)|
limit_tail(tl(tuple_to_list(T)), D-1)])
@@ -961,32 +961,29 @@ limit_tail(Other, D) ->
%% Cannot limit maps properly since there is no guarantee that
%% maps:from_list() creates a map with the same internal ordering of
-%% the selected associations as in Map.
+%% the selected associations as in Map. Instead of subtracting one
+%% from the depth as the map associations are traversed (as is done
+%% for tuples and lists), the same depth is applied to each and every
+%% (returned) association.
limit_map(Map, D) ->
- limit_map(maps:iterator(Map), D, []).
+ %% Keep one extra association to make sure the final ',...' is included.
+ limit_map_body(maps:iterator(Map), D + 1, D, []).
-limit_map(_I, 0, Acc) ->
+limit_map_body(_I, 0, _D0, Acc) ->
maps:from_list(Acc);
-limit_map(I, D, Acc) ->
+limit_map_body(I, D, D0, Acc) ->
case maps:next(I) of
{K, V, NextI} ->
- limit_map(NextI, D-1, [{K,V} | Acc]);
+ limit_map_body(NextI, D-1, D0, [limit_map_assoc(K, V, D0) | Acc]);
none ->
maps:from_list(Acc)
end.
-%% maps:from_list(limit_map_body(erts_internal:maps_to_list(Map, D), D)).
+limit_map_assoc(K, V, D) ->
+ %% Keep keys as are to avoid creating duplicated keys.
+ {K, limit(V, D - 1)}.
-%% limit_map_body(_, 0) -> [{'...', '...'}];
-%% limit_map_body([], _) -> [];
-%% limit_map_body([{K,V}], D) -> [limit_map_assoc(K, V, D)];
-%% limit_map_body([{K,V}|KVs], D) ->
-%% [limit_map_assoc(K, V, D) | limit_map_body(KVs, D-1)].
-
-%% limit_map_assoc(K, V, D) ->
-%% {limit(K, D-1), limit(V, D-1)}.
-
-limit_bitstring(B, _D) -> B. %% Keeps all printable binaries.
+limit_bitstring(B, _D) -> B. % Keeps all printable binaries.
test_limit(_, 0) -> throw(limit);
test_limit([H|T]=L, D) when is_integer(D) ->
@@ -1022,18 +1019,21 @@ test_limit_tuple(T, I, Sz, D) ->
test_limit(element(I, T), D-1),
test_limit_tuple(T, I+1, Sz, D-1).
-test_limit_map(_Map, _D) -> ok.
-%% test_limit_map_body(erts_internal:maps_to_list(Map, D), D).
+test_limit_map(Map, D) ->
+ test_limit_map_body(maps:iterator(Map), D).
-%% test_limit_map_body(_, 0) -> throw(limit);
-%% test_limit_map_body([], _) -> ok;
-%% test_limit_map_body([{K,V}], D) -> test_limit_map_assoc(K, V, D);
-%% test_limit_map_body([{K,V}|KVs], D) ->
-%% test_limit_map_assoc(K, V, D),
-%% test_limit_map_body(KVs, D-1).
+test_limit_map_body(_I, 0) -> throw(limit); % cannot happen
+test_limit_map_body(I, D) ->
+ case maps:next(I) of
+ {K, V, NextI} ->
+ test_limit_map_assoc(K, V, D),
+ test_limit_map_body(NextI, D-1);
+ none ->
+ ok
+ end.
-%% test_limit_map_assoc(K, V, D) ->
-%% test_limit(K, D-1),
-%% test_limit(V, D-1).
+test_limit_map_assoc(K, V, D) ->
+ test_limit(K, D - 1),
+ test_limit(V, D - 1).
test_limit_bitstring(_, _) -> ok.
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index e345810ca0..64edbf1824 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -95,7 +95,7 @@ print([]) ->
[].
print(C, F, Ad, P, Pad, Encoding, Strings) ->
- [$~] ++ print_field_width(F, Ad) ++ print_precision(P) ++
+ [$~] ++ print_field_width(F, Ad) ++ print_precision(P, Pad) ++
print_pad_char(Pad) ++ print_encoding(Encoding) ++
print_strings(Strings) ++ [C].
@@ -103,8 +103,9 @@ print_field_width(none, _Ad) -> "";
print_field_width(F, left) -> integer_to_list(-F);
print_field_width(F, right) -> integer_to_list(F).
-print_precision(none) -> "";
-print_precision(P) -> [$. | integer_to_list(P)].
+print_precision(none, $\s) -> "";
+print_precision(none, _Pad) -> "."; % pad must be second dot
+print_precision(P, _Pad) -> [$. | integer_to_list(P)].
print_pad_char($\s) -> ""; % default, no need to make explicit
print_pad_char(Pad) -> [$., Pad].
@@ -126,25 +127,23 @@ collect_cseq(Fmt0, Args0) ->
{F,Ad,Fmt1,Args1} = field_width(Fmt0, Args0),
{P,Fmt2,Args2} = precision(Fmt1, Args1),
{Pad,Fmt3,Args3} = pad_char(Fmt2, Args2),
- {Encoding,Fmt4,Args4} = encoding(Fmt3, Args3),
- {Strings,Fmt5,Args5} = strings(Fmt4, Args4),
- {C,As,Fmt6,Args6} = collect_cc(Fmt5, Args5),
- FormatSpec = #{control_char => C, args => As, width => F, adjust => Ad,
- precision => P, pad_char => Pad, encoding => Encoding,
- strings => Strings},
- {FormatSpec,Fmt6,Args6}.
-
-encoding([$t|Fmt],Args) ->
- true = hd(Fmt) =/= $l,
- {unicode,Fmt,Args};
-encoding(Fmt,Args) ->
- {latin1,Fmt,Args}.
-
-strings([$l|Fmt],Args) ->
- true = hd(Fmt) =/= $t,
- {false,Fmt,Args};
-strings(Fmt,Args) ->
- {true,Fmt,Args}.
+ Spec0 = #{width => F,
+ adjust => Ad,
+ precision => P,
+ pad_char => Pad,
+ encoding => latin1,
+ strings => true},
+ {Spec1,Fmt4} = modifiers(Fmt3, Spec0),
+ {C,As,Fmt5,Args4} = collect_cc(Fmt4, Args3),
+ Spec2 = Spec1#{control_char => C, args => As},
+ {Spec2,Fmt5,Args4}.
+
+modifiers([$t|Fmt], Spec) ->
+ modifiers(Fmt, Spec#{encoding => unicode});
+modifiers([$l|Fmt], Spec) ->
+ modifiers(Fmt, Spec#{strings => false});
+modifiers(Fmt, Spec) ->
+ {Spec, Fmt}.
field_width([$-|Fmt0], Args0) ->
{F,Fmt,Args} = field_value(Fmt0, Args0),
diff --git a/lib/stdlib/src/string.erl b/lib/stdlib/src/string.erl
index e01bb7d85e..4e89819e41 100644
--- a/lib/stdlib/src/string.erl
+++ b/lib/stdlib/src/string.erl
@@ -420,10 +420,12 @@ to_number(_, Number, Rest, _, Tail) ->
%% Return the remaining string with prefix removed or else nomatch
-spec prefix(String::unicode:chardata(), Prefix::unicode:chardata()) ->
'nomatch' | unicode:chardata().
-prefix(Str, []) -> Str;
prefix(Str, Prefix0) ->
- Prefix = unicode:characters_to_list(Prefix0),
- case prefix_1(Str, Prefix) of
+ Result = case unicode:characters_to_list(Prefix0) of
+ [] -> Str;
+ Prefix -> prefix_1(Str, Prefix)
+ end,
+ case Result of
[] when is_binary(Str) -> <<>>;
Res -> Res
end.
diff --git a/lib/stdlib/src/uri_string.erl b/lib/stdlib/src/uri_string.erl
index a84679c595..28d36ea229 100644
--- a/lib/stdlib/src/uri_string.erl
+++ b/lib/stdlib/src/uri_string.erl
@@ -227,7 +227,7 @@
%% External API
%%-------------------------------------------------------------------------
-export([compose_query/1, compose_query/2,
- dissect_query/1, normalize/1, parse/1,
+ dissect_query/1, normalize/1, normalize/2, parse/1,
recompose/1, transcode/2]).
-export_type([error/0, uri_map/0, uri_string/0]).
@@ -292,18 +292,36 @@
%%-------------------------------------------------------------------------
%% Normalize URIs
%%-------------------------------------------------------------------------
--spec normalize(URIString) -> NormalizedURI when
- URIString :: uri_string(),
- NormalizedURI :: uri_string().
-normalize(URIString) ->
- %% Percent-encoding normalization and case normalization for
- %% percent-encoded triplets are achieved by running parse and
- %% recompose on the input URI string.
- recompose(
- normalize_path_segment(
- normalize_scheme_based(
- normalize_case(
- parse(URIString))))).
+-spec normalize(URI) -> NormalizedURI when
+ URI :: uri_string() | uri_map(),
+ NormalizedURI :: uri_string()
+ | error().
+normalize(URIMap) ->
+ normalize(URIMap, []).
+
+
+-spec normalize(URI, Options) -> NormalizedURI when
+ URI :: uri_string() | uri_map(),
+ Options :: [return_map],
+ NormalizedURI :: uri_string() | uri_map().
+normalize(URIMap, []) when is_map(URIMap) ->
+ recompose(normalize_map(URIMap));
+normalize(URIMap, [return_map]) when is_map(URIMap) ->
+ normalize_map(URIMap);
+normalize(URIString, []) ->
+ case parse(URIString) of
+ Value when is_map(Value) ->
+ recompose(normalize_map(Value));
+ Error ->
+ Error
+ end;
+normalize(URIString, [return_map]) ->
+ case parse(URIString) of
+ Value when is_map(Value) ->
+ normalize_map(Value);
+ Error ->
+ Error
+ end.
%%-------------------------------------------------------------------------
@@ -385,7 +403,8 @@ transcode(URIString, Options) when is_list(URIString) ->
%%-------------------------------------------------------------------------
%% Functions for working with the query part of a URI as a list
%% of key/value pairs.
-%% HTML5 - 4.10.22.6 URL-encoded form data
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - non UTF-8
%%-------------------------------------------------------------------------
%%-------------------------------------------------------------------------
@@ -393,7 +412,7 @@ transcode(URIString, Options) when is_list(URIString) ->
%% (application/x-www-form-urlencoded encoding algorithm)
%%-------------------------------------------------------------------------
-spec compose_query(QueryList) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
+ QueryList :: [{unicode:chardata(), unicode:chardata()}],
QueryString :: uri_string()
| error().
compose_query(List) ->
@@ -401,7 +420,7 @@ compose_query(List) ->
-spec compose_query(QueryList, Options) -> QueryString when
- QueryList :: [{uri_string(), uri_string()}],
+ QueryList :: [{unicode:chardata(), unicode:chardata()}],
Options :: [{encoding, atom()}],
QueryString :: uri_string()
| error().
@@ -432,7 +451,7 @@ compose_query([], _Options, IsList, Acc) ->
%%-------------------------------------------------------------------------
-spec dissect_query(QueryString) -> QueryList when
QueryString :: uri_string(),
- QueryList :: [{uri_string(), uri_string()}]
+ QueryList :: [{unicode:chardata(), unicode:chardata()}]
| error().
dissect_query(<<>>) ->
[];
@@ -1755,7 +1774,8 @@ get_separator(_L) ->
<<"&">>.
-%% HTML5 - 4.10.22.6 URL-encoded form data - encoding
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - encoding (non UTF-8)
form_urlencode(Cs, [{encoding, latin1}]) when is_list(Cs) ->
B = convert_to_binary(Cs, utf8, utf8),
html5_byte_encode(base10_encode(B));
@@ -1850,7 +1870,8 @@ dissect_query_value(<<>>, IsList, Acc, Key, Value) ->
lists:reverse([{K,V}|Acc]).
-%% Form-urldecode input based on RFC 1866 [8.2.1]
+%% HTML 5.2 - 4.10.21.6 URL-encoded form data - WHATWG URL (10 Jan 2018) - UTF-8
+%% HTML 5.0 - 4.10.22.6 URL-encoded form data - decoding (non UTF-8)
form_urldecode(true, B) ->
Result = base10_decode(form_urldecode(B, <<>>)),
convert_to_list(Result, utf8);
@@ -1903,6 +1924,12 @@ base10_decode_unicode(<<H,_/binary>>, _, _) ->
%% Helper functions for normalize
%%-------------------------------------------------------------------------
+normalize_map(URIMap) ->
+ normalize_path_segment(
+ normalize_scheme_based(
+ normalize_case(URIMap))).
+
+
%% 6.2.2.1. Case Normalization
normalize_case(#{scheme := Scheme, host := Host} = Map) ->
Map#{scheme => to_lower(Scheme),
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index 45363c0592..6f4e7ad7e0 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -1905,29 +1905,61 @@ otp_10836(Suite) when is_list(Suite) ->
%% OTP-10755. The 'l' modifier
otp_10755(Suite) when is_list(Suite) ->
+ %% printing plain ascii characters
S = "string",
"\"string\"" = fmt("~p", [S]),
"[115,116,114,105,110,103]" = fmt("~lp", [S]),
"\"string\"" = fmt("~P", [S, 2]),
"[115|...]" = fmt("~lP", [S, 2]),
- {'EXIT',{badarg,_}} = (catch fmt("~ltp", [S])),
- {'EXIT',{badarg,_}} = (catch fmt("~tlp", [S])),
- {'EXIT',{badarg,_}} = (catch fmt("~ltP", [S])),
- {'EXIT',{badarg,_}} = (catch fmt("~tlP", [S])),
+ %% printing latin1 chars, with and without modifiers
+ T = {[255],list_to_atom([255]),[a,b,c]},
+ "{\"ÿ\",ÿ,[a,b,c]}" = fmt("~p", [T]),
+ "{\"ÿ\",ÿ,[a,b,c]}" = fmt("~tp", [T]),
+ "{[255],ÿ,[a,b,c]}" = fmt("~lp", [T]),
+ "{[255],ÿ,[a,b,c]}" = fmt("~ltp", [T]),
+ "{[255],ÿ,[a,b,c]}" = fmt("~tlp", [T]),
+ "{\"ÿ\",ÿ,...}" = fmt("~P", [T,3]),
+ "{\"ÿ\",ÿ,...}" = fmt("~tP", [T,3]),
+ "{[255],ÿ,...}" = fmt("~lP", [T,3]),
+ "{[255],ÿ,...}" = fmt("~ltP", [T,3]),
+ "{[255],ÿ,...}" = fmt("~tlP", [T,3]),
+ %% printing unicode chars, with and without modifiers
+ U = {[666],list_to_atom([666]),[a,b,c]},
+ "{[666],'\\x{29A}',[a,b,c]}" = fmt("~p", [U]),
+ case io:printable_range() of
+ unicode ->
+ "{\"ʚ\",'ʚ',[a,b,c]}" = fmt("~tp", [U]),
+ "{\"ʚ\",'ʚ',...}" = fmt("~tP", [U,3]);
+ latin1 ->
+ "{[666],'ʚ',[a,b,c]}" = fmt("~tp", [U]),
+ "{[666],'ʚ',...}" = fmt("~tP", [U,3])
+ end,
+ "{[666],'\\x{29A}',[a,b,c]}" = fmt("~lp", [U]),
+ "{[666],'ʚ',[a,b,c]}" = fmt("~ltp", [U]),
+ "{[666],'ʚ',[a,b,c]}" = fmt("~tlp", [U]),
+ "{[666],'\\x{29A}',...}" = fmt("~P", [U,3]),
+ "{[666],'\\x{29A}',...}" = fmt("~lP", [U,3]),
+ "{[666],'ʚ',...}" = fmt("~ltP", [U,3]),
+ "{[666],'ʚ',...}" = fmt("~tlP", [U,3]),
+ %% the compiler should catch uses of ~l with other than pP
Text =
"-module(l_mod).\n"
"-export([t/0]).\n"
"t() ->\n"
" S = \"string\",\n"
- " io:format(\"~ltp\", [S]),\n"
- " io:format(\"~tlp\", [S]),\n"
- " io:format(\"~ltP\", [S, 1]),\n"
- " io:format(\"~tlP\", [S, 1]).\n",
+ " io:format(\"~lw\", [S]),\n"
+ " io:format(\"~lW\", [S, 1]),\n"
+ " io:format(\"~ltw\", [S]),\n"
+ " io:format(\"~tlw\", [S]),\n"
+ " io:format(\"~ltW\", [S, 1]),\n"
+ " io:format(\"~tlW\", [S, 1]).\n",
{ok,l_mod,[{_File,Ws}]} = compile_file("l_mod.erl", Text, Suite),
- ["format string invalid (invalid control ~lt)",
- "format string invalid (invalid control ~tl)",
- "format string invalid (invalid control ~lt)",
- "format string invalid (invalid control ~tl)"] =
+ ["format string invalid (invalid control ~lw)",
+ "format string invalid (invalid control ~lW)",
+ "format string invalid (invalid control ~ltw)",
+ "format string invalid (invalid control ~ltw)",
+ "format string invalid (invalid control ~ltW)",
+ "format string invalid (invalid control ~ltW)"] =
[lists:flatten(M:format_error(E)) || {_L,M,E} <- Ws],
ok.
@@ -2005,6 +2037,7 @@ writes(N, F1) ->
format_string(_Config) ->
%% All but padding is tested by fmt/2.
+ "xxxxxxxsss" = fmt("~10..xs", ["sss"]),
"xxxxxxsssx" = fmt("~10.4.xs", ["sss"]),
"xxxxxxsssx" = fmt("~10.4.*s", [$x, "sss"]),
ok.
@@ -2384,19 +2417,36 @@ limit_term(_Config) ->
{_, 2} = limt({a,b,c,[d,e]}, 2),
{_, 2} = limt({a,b,c,[d,e]}, 3),
{_, 2} = limt({a,b,c,[d,e]}, 4),
+ T0 = [1|{a,b,c}],
+ {_, 2} = limt(T0, 2),
+ {_, 2} = limt(T0, 3),
+ {_, 2} = limt(T0, 4),
{_, 1} = limt(<<"foo">>, 18),
+ {_, 2} = limt({"",[1,2]}, 3),
+ {_, 2} = limt({"",{1,2}}, 3),
+ true = limt_pp({"123456789012345678901234567890",{1,2}}, 3),
ok = blimt(<<"123456789012345678901234567890">>),
+ true = limt_pp(<<"123456789012345678901234567890">>, 3),
+ {_, 2} = limt({<<"kljlkjsl">>,[1,2,3,4]}, 4),
{_, 1} = limt(<<7:3>>, 2),
{_, 1} = limt(<<7:21>>, 2),
{_, 1} = limt([], 2),
{_, 1} = limt({}, 2),
+ {_, 1} = limt({"", ""}, 4),
{_, 1} = limt(#{}, 2),
- {_, 1} = limt(#{[] => {}}, 2),
+ {_, 2} = limt(#{[] => {}}, 1),
+ {_, 2} = limt(#{[] => {}}, 2),
{_, 1} = limt(#{[] => {}}, 3),
T = #{[] => {},[a] => [b]},
- {_, 1} = limt(T, 2),
+ {_, 1} = limt(T, 0),
+ {_, 2} = limt(T, 1),
+ {_, 2} = limt(T, 2),
{_, 1} = limt(T, 3),
{_, 1} = limt(T, 4),
+ T2 = #{[] => {},{} => []},
+ {_, 2} = limt(T2, 1),
+ {_, 2} = limt(T2, 2),
+ {_, 1} = limt(T2, 3),
ok.
blimt(Binary) ->
@@ -2430,3 +2480,12 @@ limt(Term, Depth) when is_integer(Depth) ->
form(Term, Depth) ->
lists:flatten(io_lib:format("~W", [Term, Depth])).
+
+limt_pp(Term, Depth) when is_integer(Depth) ->
+ T1 = io_lib:limit_term(Term, Depth),
+ S = pp(Term, Depth),
+ S1 = pp(T1, Depth),
+ S1 =:= S.
+
+pp(Term, Depth) ->
+ lists:flatten(io_lib:format("~P", [Term, Depth])).
diff --git a/lib/stdlib/test/re_SUITE.erl b/lib/stdlib/test/re_SUITE.erl
index 71f86e32e5..7b82647416 100644
--- a/lib/stdlib/test/re_SUITE.erl
+++ b/lib/stdlib/test/re_SUITE.erl
@@ -894,10 +894,13 @@ match_limit(Config) when is_list(Config) ->
%% Test that we get sub-binaries if subject is a binary and we capture
%% binaries.
sub_binaries(Config) when is_list(Config) ->
- Bin = list_to_binary(lists:seq(1,255)),
- {match,[B,C]}=re:run(Bin,"(a)",[{capture,all,binary}]),
- 255 = binary:referenced_byte_size(B),
- 255 = binary:referenced_byte_size(C),
- {match,[D]}=re:run(Bin,"(a)",[{capture,[1],binary}]),
- 255 = binary:referenced_byte_size(D),
+ %% The GC can auto-convert tiny sub-binaries to heap binaries, so we
+ %% extract large sequences to make the test more stable.
+ Bin = << <<I>> || I <- lists:seq(1, 4096) >>,
+ {match,[B,C]}=re:run(Bin,"a(.+)$",[{capture,all,binary}]),
+ true = byte_size(B) =/= byte_size(C),
+ 4096 = binary:referenced_byte_size(B),
+ 4096 = binary:referenced_byte_size(C),
+ {match,[D]}=re:run(Bin,"a(.+)$",[{capture,[1],binary}]),
+ 4096 = binary:referenced_byte_size(D),
ok.
diff --git a/lib/stdlib/test/string_SUITE.erl b/lib/stdlib/test/string_SUITE.erl
index c4a469c251..fdff2d24b8 100644
--- a/lib/stdlib/test/string_SUITE.erl
+++ b/lib/stdlib/test/string_SUITE.erl
@@ -486,6 +486,10 @@ to_float(_) ->
prefix(_) ->
?TEST("", ["a"], nomatch),
?TEST("a", [""], "a"),
+ ?TEST("a", [[[]]], "a"),
+ ?TEST("a", [<<>>], "a"),
+ ?TEST("a", [[<<>>]], "a"),
+ ?TEST("a", [[[<<>>]]], "a"),
?TEST("b", ["a"], nomatch),
?TEST("a", ["a"], ""),
?TEST("å", ["a"], nomatch),
diff --git a/lib/stdlib/test/uri_string_SUITE.erl b/lib/stdlib/test/uri_string_SUITE.erl
index fef356355c..92f8bb3292 100644
--- a/lib/stdlib/test/uri_string_SUITE.erl
+++ b/lib/stdlib/test/uri_string_SUITE.erl
@@ -22,7 +22,7 @@
-include_lib("common_test/include/ct.hrl").
-export([all/0, suite/0,groups/0,
- normalize/1,
+ normalize/1, normalize_map/1, normalize_return_map/1, normalize_negative/1,
parse_binary_fragment/1, parse_binary_host/1, parse_binary_host_ipv4/1,
parse_binary_host_ipv6/1,
parse_binary_path/1, parse_binary_pct_encoded_fragment/1, parse_binary_pct_encoded_query/1,
@@ -68,6 +68,9 @@ suite() ->
all() ->
[
normalize,
+ normalize_map,
+ normalize_return_map,
+ normalize_negative,
parse_binary_scheme,
parse_binary_userinfo,
parse_binary_pct_encoded_userinfo,
@@ -912,6 +915,56 @@ normalize(_Config) ->
<<"tftp://localhost">> =
uri_string:normalize(<<"tftp://localhost:69">>).
+normalize_map(_Config) ->
+ "/a/g" = uri_string:normalize(#{path => "/a/b/c/./../../g"}),
+ <<"mid/6">> = uri_string:normalize(#{path => <<"mid/content=5/../6">>}),
+ "http://localhost-%C3%B6rebro/a/g" =
+ uri_string:normalize(#{scheme => "http",port => 80,path => "/a/b/c/./../../g",
+ host => "localhost-örebro"}),
+ <<"http://localhost-%C3%B6rebro/a/g">> =
+ uri_string:normalize(#{scheme => <<"http">>,port => 80,
+ path => <<"/a/b/c/./../../g">>,
+ host => <<"localhost-örebro"/utf8>>}),
+ <<"https://localhost/">> =
+ uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>,
+ host => <<"localhost">>}),
+ <<"https://localhost:445/">> =
+ uri_string:normalize(#{scheme => <<"https">>,port => 445,path => <<>>,
+ host => <<"localhost">>}),
+ <<"ftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"ftp">>,port => 21,path => <<>>,
+ host => <<"localhost">>}),
+ <<"ssh://localhost">> =
+ uri_string:normalize(#{scheme => <<"ssh">>,port => 22,path => <<>>,
+ host => <<"localhost">>}),
+ <<"sftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"sftp">>,port => 22,path => <<>>,
+ host => <<"localhost">>}),
+ <<"tftp://localhost">> =
+ uri_string:normalize(#{scheme => <<"tftp">>,port => 69,path => <<>>,
+ host => <<"localhost">>}).
+
+normalize_return_map(_Config) ->
+ #{scheme := "http",path := "/a/g",host := "localhost-örebro"} =
+ uri_string:normalize("http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g",
+ [return_map]),
+ #{scheme := <<"http">>,path := <<"/a/g">>, host := <<"localhost-örebro"/utf8>>} =
+ uri_string:normalize(<<"http://localhos%74-%c3%b6rebro:80/a/b/c/./../../g">>,
+ [return_map]),
+ #{scheme := <<"https">>,path := <<"/">>, host := <<"localhost">>} =
+ uri_string:normalize(#{scheme => <<"https">>,port => 443,path => <<>>,
+ host => <<"localhost">>}, [return_map]).
+
+normalize_negative(_Config) ->
+ {error,invalid_uri,":"} =
+ uri_string:normalize("http://local>host"),
+ {error,invalid_uri,":"} =
+ uri_string:normalize(<<"http://local>host">>),
+ {error,invalid_uri,":"} =
+ uri_string:normalize("http://[192.168.0.1]", [return_map]),
+ {error,invalid_uri,":"} =
+ uri_string:normalize(<<"http://[192.168.0.1]">>, [return_map]).
+
interop_query_utf8(_Config) ->
Q = uri_string:compose_query([{"foo bar","1"}, {"合", "2"}]),
Uri = uri_string:recompose(#{path => "/", query => Q}),