aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorHans Bolinder <hasse@erlang.org>2017-04-27 11:57:28 +0200
committerHans Bolinder <hasse@erlang.org>2017-04-27 11:57:28 +0200
commit89a66ab9110033350d6cd90ba9b3fe47252bc708 (patch)
tree9df65d3b7da87941a34282982673b6bf201b8a3e /lib
parentd9c2e78d4e7b6166245e79a70bd4d95c8a1b742a (diff)
parent4567b6afc41a5d18384916c171ae413112ee57cc (diff)
downloadotp-89a66ab9110033350d6cd90ba9b3fe47252bc708.tar.gz
otp-89a66ab9110033350d6cd90ba9b3fe47252bc708.tar.bz2
otp-89a66ab9110033350d6cd90ba9b3fe47252bc708.zip
Merge branch 'hasse/unicode_atoms/OTP-14285'
* hasse/unicode_atoms/OTP-14285: stdlib: Add Unicode modifier t to control sequence a stdlib: Add Unicode modifier t to control sequences w and W
Diffstat (limited to 'lib')
-rw-r--r--lib/stdlib/doc/src/io.xml6
-rw-r--r--lib/stdlib/doc/src/io_lib.xml3
-rw-r--r--lib/stdlib/src/erl_lint.erl4
-rw-r--r--lib/stdlib/src/io_lib.erl87
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
-rw-r--r--lib/stdlib/src/io_lib_fread.erl11
-rw-r--r--lib/stdlib/test/io_SUITE.erl24
7 files changed, 97 insertions, 48 deletions
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml
index 11a64c7f8a..64fcf4379f 100644
--- a/lib/stdlib/doc/src/io.xml
+++ b/lib/stdlib/doc/src/io.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>1996</year><year>2016</year>
+ <year>1996</year><year>2017</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -265,6 +265,8 @@ ok
<p>Writes data with the standard syntax. This is used to
output Erlang terms. Atoms are printed within quotes if
they contain embedded non-printable characters.
+ Atom characters &gt; 255 are escaped unless the
+ Unicode translation modifier (<c>t</c>) is used.
Floats are printed accurately as the shortest, correctly
rounded string.</p>
</item>
@@ -567,8 +569,6 @@ Prompt> <input>&lt;Characters beyond latin1 range not printable in this medium&g
<item>
<p>Similar to <c>s</c>, but the resulting string is
converted into an atom.</p>
- <p>The Unicode translation modifier is not allowed (atoms
- cannot contain characters beyond the <c>latin1</c> range).</p>
</item>
<tag><c>c</c></tag>
<item>
diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml
index 5ae400da62..bc1d77ac83 100644
--- a/lib/stdlib/doc/src/io_lib.xml
+++ b/lib/stdlib/doc/src/io_lib.xml
@@ -356,7 +356,8 @@
<func>
<name name="write" arity="1"/>
- <name name="write" arity="2"/>
+ <name name="write" arity="2" clause_i="1"/>
+ <name name="write" arity="2" clause_i="2"/>
<fsummary>Write a term.</fsummary>
<desc>
<p>Returns a character list that represents <c><anno>Term</anno></c>.
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 78b7a0e751..7c40058dd8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3883,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) ->
extract_sequence(5, [$p|Fmt], Need);
extract_sequence(4, [$t, $P | Fmt], Need) ->
extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$t, $w | Fmt], Need) ->
+ extract_sequence(5, [$w|Fmt], Need);
+extract_sequence(4, [$t, $W | Fmt], Need) ->
+ extract_sequence(5, [$W|Fmt], Need);
extract_sequence(4, [$t, C | _Fmt], _Need) ->
{error,"invalid control ~t" ++ [C]};
extract_sequence(4, [$l, $p | Fmt], Need) ->
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 28e5007e5a..5ed2f4d888 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -268,47 +268,61 @@ write(Term, D, false) ->
-spec write(Term, Depth) -> chars() when
Term :: term(),
+ Depth :: depth();
+ (Term, Options) -> chars() when
+ Term :: term(),
+ Options :: [Option],
+ Option :: {'depth', Depth}
+ | {'encoding', 'latin1' | 'utf8' | 'unicode'},
Depth :: depth().
-write(_Term, 0) -> "...";
-write(Term, _D) when is_integer(Term) -> integer_to_list(Term);
-write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term);
-write(Atom, _D) when is_atom(Atom) -> write_atom(Atom);
-write(Term, _D) when is_port(Term) -> write_port(Term);
-write(Term, _D) when is_pid(Term) -> pid_to_list(Term);
-write(Term, _D) when is_reference(Term) -> write_ref(Term);
-write(<<_/bitstring>>=Term, D) -> write_binary(Term, D);
-write([], _D) -> "[]";
-write({}, _D) -> "{}";
-write([H|T], D) ->
+write(Term, Options) when is_list(Options) ->
+ Depth = get_option(depth, Options, -1),
+ Encoding = get_option(encoding, Options, epp:default_encoding()),
+ write1(Term, Depth, Encoding);
+write(Term, Depth) ->
+ write1(Term, Depth, latin1).
+
+write1(_Term, 0, _E) -> "...";
+write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term);
+write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term);
+write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom);
+write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom);
+write1(Term, _D, _E) when is_port(Term) -> write_port(Term);
+write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term);
+write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term);
+write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D);
+write1([], _D, _E) -> "[]";
+write1({}, _D, _E) -> "{}";
+write1([H|T], D, E) ->
if
D =:= 1 -> "[...]";
true ->
- [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]]
+ [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]]
end;
-write(F, _D) when is_function(F) ->
+write1(F, _D, _E) when is_function(F) ->
erlang:fun_to_list(F);
-write(Term, D) when is_map(Term) ->
- write_map(Term, D);
-write(T, D) when is_tuple(T) ->
+write1(Term, D, E) when is_map(Term) ->
+ write_map(Term, D, E);
+write1(T, D, E) when is_tuple(T) ->
if
D =:= 1 -> "{...}";
true ->
[${,
- [write(element(1, T), D-1)|
- write_tail(tl(tuple_to_list(T)), D-1, $,)],
+ [write1(element(1, T), D-1, E)|
+ write_tail(tl(tuple_to_list(T)), D-1, E, $,)],
$}]
end.
%% write_tail(List, Depth, CharacterBeforeDots)
%% Test the terminating case first as this looks better with depth.
-write_tail([], _D, _S) -> "";
-write_tail(_, 1, S) -> [S | "..."];
-write_tail([H|T], D, S) ->
- [$,,write(H, D-1)|write_tail(T, D-1, S)];
-write_tail(Other, D, S) ->
- [S,write(Other, D-1)].
+write_tail([], _D, _E, _S) -> "";
+write_tail(_, 1, _E, S) -> [S | "..."];
+write_tail([H|T], D, E, S) ->
+ [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)];
+write_tail(Other, D, E, S) ->
+ [S,write1(Other, D-1, E)].
write_port(Port) ->
erlang:port_to_list(Port).
@@ -316,17 +330,17 @@ write_port(Port) ->
write_ref(Ref) ->
erlang:ref_to_list(Ref).
-write_map(Map, D) when is_integer(D) ->
- [$#,${,write_map_body(maps:to_list(Map), D),$}].
+write_map(Map, D, E) when is_integer(D) ->
+ [$#,${,write_map_body(maps:to_list(Map), D, E),$}].
-write_map_body(_, 0) -> "...";
-write_map_body([],_) -> [];
-write_map_body([{K,V}],D) -> write_map_assoc(K,V,D);
-write_map_body([{K,V}|KVs], D) ->
- [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)].
+write_map_body(_, 0, _E) -> "...";
+write_map_body([], _, _E) -> [];
+write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E);
+write_map_body([{K,V}|KVs], D, E) ->
+ [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)].
-write_map_assoc(K,V,D) ->
- [write(K,D - 1),"=>",write(V,D-1)].
+write_map_assoc(K, V, D, E) ->
+ [write1(K, D - 1, E),"=>",write1(V, D-1, E)].
write_binary(B, D) when is_integer(D) ->
[$<,$<,write_binary_body(B, D),$>,$>].
@@ -344,6 +358,13 @@ write_binary_body(B, _D) ->
<<X:L>> = B,
[integer_to_list(X),$:,integer_to_list(L)].
+get_option(Key, TupleList, Default) ->
+ case lists:keyfind(Key, 1, TupleList) of
+ false -> Default;
+ {Key, Value} -> Value;
+ _ -> Default
+ end.
+
%%% There are two functions to write Unicode atoms:
%%% - they both escape control characters < 160;
%%% - write_atom() never escapes characters >= 160;
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 3113767614..14d925bacf 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -257,12 +257,12 @@ indentation([], I) -> I.
%% This is the main dispatch function for the various formatting commands.
%% Field widths and precisions have already been calculated.
-control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) ->
- term(io_lib:write(A, -1), F, Adj, P, Pad);
+control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) ->
+ term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad);
control($p, [A], F, Adj, P, Pad, Enc, Str, I) ->
print(A, -1, F, Adj, P, Pad, Enc, Str, I);
-control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) ->
- term(io_lib:write(A, Depth), F, Adj, P, Pad);
+control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) ->
+ term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad);
control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) ->
print(A, Depth, F, Adj, P, Pad, Enc, Str, I);
control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) ->
diff --git a/lib/stdlib/src/io_lib_fread.erl b/lib/stdlib/src/io_lib_fread.erl
index 6a8f8f728e..983e8d4566 100644
--- a/lib/stdlib/src/io_lib_fread.erl
+++ b/lib/stdlib/src/io_lib_fread.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -159,8 +159,8 @@ fread_field([$t|Format], F, Sup, _Unic) ->
fread_field(Format, F, Sup, Unic) ->
{Format,F,Sup,Unic}.
-%% fread1(Format, FieldWidth, Suppress, Line, N, Results, AllFormat)
-%% fread1(Format, FieldWidth, Suppress, Line, N, Results)
+%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results, AllFormat)
+%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results)
%% The main dispatch function for the formatting commands. Done in two
%% stages so format commands that need no input can always be processed.
@@ -231,9 +231,8 @@ fread1([$s|Format], none, Sup, U, Line0, N0, Res) ->
fread1([$s|Format], F, Sup, U, Line0, N, Res) ->
{Line,Cs} = fread_chars(Line0, F, U),
fread_string(Cs, Sup, U, Format, Line, N+F, Res);
-%% XXX:PaN Atoms still only latin1...
-fread1([$a|Format], none, Sup, false, Line0, N0, Res) ->
- {Line,N,Cs} = fread_string_cs(Line0, N0, false),
+fread1([$a|Format], none, Sup, U, Line0, N0, Res) ->
+ {Line,N,Cs} = fread_string_cs(Line0, N0, U),
fread_atom(Cs, Sup, Format, Line, N, Res);
fread1([$a|Format], F, Sup, false, Line0, N, Res) ->
{Line,Cs} = fread_chars(Line0, F, false),
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index b2754e47ba..ef3f0be5d7 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -2348,4 +2348,28 @@ otp_14285(_Config) ->
L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])),
not is_latin1(S)],
L1 = lists:seq(256, 512),
+
+ latin1_fmt("~w", ['кирилли́ческий атом']),
+ latin1_fmt("~w", ['\x{10FFFF}']),
+ "'кирилли́ческий атом'" = fmt("~tw", ['кирилли́ческий атом']),
+ [$',16#10FFFF,$'] = fmt("~tw", ['\x{10FFFF}']),
+
+ latin1_fmt("~W", ['кирилли́ческий атом', 13]),
+ latin1_fmt("~W", ['\x{10FFFF}', 13]),
+ "'кирилли́ческий атом'" = fmt("~tW", ['кирилли́ческий атом', 13]),
+ [$',16#10FFFF,$'] = fmt("~tW", ['\x{10FFFF}', 13]),
+
+ {ok, [an_atom],[]} = io_lib:fread("~a", "an_atom"),
+ {ok, [an_atom],[]} = io_lib:fread("~ta", "an_atom"),
+ Str = "\"ab" ++ [1089] ++ "cd\"",
+ {ok, ["\"ab"], [1089]++"cd\""} = io_lib:fread("~s", Str),
+ {ok, ['\"ab'], [1089]++"cd\""} = io_lib:fread("~a", Str),
+ {ok,[Str], []} = io_lib:fread("~ts", Str),
+ {ok,[Atom],[]} = io_lib:fread("~ta", Str),
+ Str = atom_to_list(Atom),
+
ok.
+
+latin1_fmt(Fmt, Args) ->
+ L = fmt(Fmt, Args),
+ true = lists:all(fun is_latin1/1, L).