aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-04-19 16:10:15 +0200
committerHans Bolinder <[email protected]>2017-04-24 14:51:03 +0200
commitef0dbc4f2a43d629d086c3e2b9a762bbc00d034b (patch)
treede3dd7b8d268f0819453920f8b6e8efb7212477e /lib/stdlib
parent739bca3fc267c55d84c8f5c193d16c0b2a7eee13 (diff)
downloadotp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.gz
otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.bz2
otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.zip
stdlib: Add Unicode modifier t to control sequences w and W
As of the introduction of Unicode characters in atoms, the control sequences 'w' and 'W' can return non-Latin-1 characters, unless some measure is taken. This commit makes sure that '~w' and '~W' always return Latin-1 characters, or bytes, which can be output to ports or written to raw files. The Unicode translation modifier 't' is needed to return non-Latin-1 characters.
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/io.xml4
-rw-r--r--lib/stdlib/doc/src/io_lib.xml3
-rw-r--r--lib/stdlib/src/erl_lint.erl4
-rw-r--r--lib/stdlib/src/io_lib.erl87
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
-rw-r--r--lib/stdlib/test/io_SUITE.erl15
6 files changed, 83 insertions, 40 deletions
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml
index 11a64c7f8a..74b57457ea 100644
--- a/lib/stdlib/doc/src/io.xml
+++ b/lib/stdlib/doc/src/io.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>1996</year><year>2016</year>
+ <year>1996</year><year>2017</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -265,6 +265,8 @@ ok
<p>Writes data with the standard syntax. This is used to
output Erlang terms. Atoms are printed within quotes if
they contain embedded non-printable characters.
+ Atom characters &gt; 255 are escaped unless the
+ Unicode translation modifier (<c>t</c>) is used.
Floats are printed accurately as the shortest, correctly
rounded string.</p>
</item>
diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml
index 5ae400da62..bc1d77ac83 100644
--- a/lib/stdlib/doc/src/io_lib.xml
+++ b/lib/stdlib/doc/src/io_lib.xml
@@ -356,7 +356,8 @@
<func>
<name name="write" arity="1"/>
- <name name="write" arity="2"/>
+ <name name="write" arity="2" clause_i="1"/>
+ <name name="write" arity="2" clause_i="2"/>
<fsummary>Write a term.</fsummary>
<desc>
<p>Returns a character list that represents <c><anno>Term</anno></c>.
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 78b7a0e751..7c40058dd8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3883,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) ->
extract_sequence(5, [$p|Fmt], Need);
extract_sequence(4, [$t, $P | Fmt], Need) ->
extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$t, $w | Fmt], Need) ->
+ extract_sequence(5, [$w|Fmt], Need);
+extract_sequence(4, [$t, $W | Fmt], Need) ->
+ extract_sequence(5, [$W|Fmt], Need);
extract_sequence(4, [$t, C | _Fmt], _Need) ->
{error,"invalid control ~t" ++ [C]};
extract_sequence(4, [$l, $p | Fmt], Need) ->
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 28e5007e5a..5ed2f4d888 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -268,47 +268,61 @@ write(Term, D, false) ->
-spec write(Term, Depth) -> chars() when
Term :: term(),
+ Depth :: depth();
+ (Term, Options) -> chars() when
+ Term :: term(),
+ Options :: [Option],
+ Option :: {'depth', Depth}
+ | {'encoding', 'latin1' | 'utf8' | 'unicode'},
Depth :: depth().
-write(_Term, 0) -> "...";
-write(Term, _D) when is_integer(Term) -> integer_to_list(Term);
-write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term);
-write(Atom, _D) when is_atom(Atom) -> write_atom(Atom);
-write(Term, _D) when is_port(Term) -> write_port(Term);
-write(Term, _D) when is_pid(Term) -> pid_to_list(Term);
-write(Term, _D) when is_reference(Term) -> write_ref(Term);
-write(<<_/bitstring>>=Term, D) -> write_binary(Term, D);
-write([], _D) -> "[]";
-write({}, _D) -> "{}";
-write([H|T], D) ->
+write(Term, Options) when is_list(Options) ->
+ Depth = get_option(depth, Options, -1),
+ Encoding = get_option(encoding, Options, epp:default_encoding()),
+ write1(Term, Depth, Encoding);
+write(Term, Depth) ->
+ write1(Term, Depth, latin1).
+
+write1(_Term, 0, _E) -> "...";
+write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term);
+write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term);
+write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom);
+write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom);
+write1(Term, _D, _E) when is_port(Term) -> write_port(Term);
+write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term);
+write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term);
+write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D);
+write1([], _D, _E) -> "[]";
+write1({}, _D, _E) -> "{}";
+write1([H|T], D, E) ->
if
D =:= 1 -> "[...]";
true ->
- [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]]
+ [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]]
end;
-write(F, _D) when is_function(F) ->
+write1(F, _D, _E) when is_function(F) ->
erlang:fun_to_list(F);
-write(Term, D) when is_map(Term) ->
- write_map(Term, D);
-write(T, D) when is_tuple(T) ->
+write1(Term, D, E) when is_map(Term) ->
+ write_map(Term, D, E);
+write1(T, D, E) when is_tuple(T) ->
if
D =:= 1 -> "{...}";
true ->
[${,
- [write(element(1, T), D-1)|
- write_tail(tl(tuple_to_list(T)), D-1, $,)],
+ [write1(element(1, T), D-1, E)|
+ write_tail(tl(tuple_to_list(T)), D-1, E, $,)],
$}]
end.
%% write_tail(List, Depth, CharacterBeforeDots)
%% Test the terminating case first as this looks better with depth.
-write_tail([], _D, _S) -> "";
-write_tail(_, 1, S) -> [S | "..."];
-write_tail([H|T], D, S) ->
- [$,,write(H, D-1)|write_tail(T, D-1, S)];
-write_tail(Other, D, S) ->
- [S,write(Other, D-1)].
+write_tail([], _D, _E, _S) -> "";
+write_tail(_, 1, _E, S) -> [S | "..."];
+write_tail([H|T], D, E, S) ->
+ [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)];
+write_tail(Other, D, E, S) ->
+ [S,write1(Other, D-1, E)].
write_port(Port) ->
erlang:port_to_list(Port).
@@ -316,17 +330,17 @@ write_port(Port) ->
write_ref(Ref) ->
erlang:ref_to_list(Ref).
-write_map(Map, D) when is_integer(D) ->
- [$#,${,write_map_body(maps:to_list(Map), D),$}].
+write_map(Map, D, E) when is_integer(D) ->
+ [$#,${,write_map_body(maps:to_list(Map), D, E),$}].
-write_map_body(_, 0) -> "...";
-write_map_body([],_) -> [];
-write_map_body([{K,V}],D) -> write_map_assoc(K,V,D);
-write_map_body([{K,V}|KVs], D) ->
- [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)].
+write_map_body(_, 0, _E) -> "...";
+write_map_body([], _, _E) -> [];
+write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E);
+write_map_body([{K,V}|KVs], D, E) ->
+ [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)].
-write_map_assoc(K,V,D) ->
- [write(K,D - 1),"=>",write(V,D-1)].
+write_map_assoc(K, V, D, E) ->
+ [write1(K, D - 1, E),"=>",write1(V, D-1, E)].
write_binary(B, D) when is_integer(D) ->
[$<,$<,write_binary_body(B, D),$>,$>].
@@ -344,6 +358,13 @@ write_binary_body(B, _D) ->
<<X:L>> = B,
[integer_to_list(X),$:,integer_to_list(L)].
+get_option(Key, TupleList, Default) ->
+ case lists:keyfind(Key, 1, TupleList) of
+ false -> Default;
+ {Key, Value} -> Value;
+ _ -> Default
+ end.
+
%%% There are two functions to write Unicode atoms:
%%% - they both escape control characters < 160;
%%% - write_atom() never escapes characters >= 160;
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 3113767614..14d925bacf 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -257,12 +257,12 @@ indentation([], I) -> I.
%% This is the main dispatch function for the various formatting commands.
%% Field widths and precisions have already been calculated.
-control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) ->
- term(io_lib:write(A, -1), F, Adj, P, Pad);
+control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) ->
+ term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad);
control($p, [A], F, Adj, P, Pad, Enc, Str, I) ->
print(A, -1, F, Adj, P, Pad, Enc, Str, I);
-control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) ->
- term(io_lib:write(A, Depth), F, Adj, P, Pad);
+control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) ->
+ term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad);
control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) ->
print(A, Depth, F, Adj, P, Pad, Enc, Str, I);
control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) ->
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index b2754e47ba..fb62e3aa3c 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -2348,4 +2348,19 @@ otp_14285(_Config) ->
L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])),
not is_latin1(S)],
L1 = lists:seq(256, 512),
+
+ latin1_fmt("~w", ['кирилли́ческий атом']),
+ latin1_fmt("~w", ['\x{10FFFF}']),
+ "'кирилли́ческий атом'" = fmt("~tw", ['кирилли́ческий атом']),
+ [$',16#10FFFF,$'] = fmt("~tw", ['\x{10FFFF}']),
+
+ latin1_fmt("~W", ['кирилли́ческий атом', 13]),
+ latin1_fmt("~W", ['\x{10FFFF}', 13]),
+ "'кирилли́ческий атом'" = fmt("~tW", ['кирилли́ческий атом', 13]),
+ [$',16#10FFFF,$'] = fmt("~tW", ['\x{10FFFF}', 13]),
+
ok.
+
+latin1_fmt(Fmt, Args) ->
+ L = fmt(Fmt, Args),
+ true = lists:all(fun is_latin1/1, L).