diff options
author | Hans Bolinder <[email protected]> | 2017-04-19 16:10:15 +0200 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2017-04-24 14:51:03 +0200 |
commit | ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b (patch) | |
tree | de3dd7b8d268f0819453920f8b6e8efb7212477e /lib/stdlib | |
parent | 739bca3fc267c55d84c8f5c193d16c0b2a7eee13 (diff) | |
download | otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.gz otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.bz2 otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.zip |
stdlib: Add Unicode modifier t to control sequences w and W
As of the introduction of Unicode characters in atoms, the control
sequences 'w' and 'W' can return non-Latin-1 characters, unless some
measure is taken.
This commit makes sure that '~w' and '~W' always return Latin-1
characters, or bytes, which can be output to ports or written to raw
files.
The Unicode translation modifier 't' is needed to return non-Latin-1
characters.
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/io.xml | 4 | ||||
-rw-r--r-- | lib/stdlib/doc/src/io_lib.xml | 3 | ||||
-rw-r--r-- | lib/stdlib/src/erl_lint.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib.erl | 87 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_format.erl | 10 | ||||
-rw-r--r-- | lib/stdlib/test/io_SUITE.erl | 15 |
6 files changed, 83 insertions, 40 deletions
diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml index 11a64c7f8a..74b57457ea 100644 --- a/lib/stdlib/doc/src/io.xml +++ b/lib/stdlib/doc/src/io.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1996</year><year>2016</year> + <year>1996</year><year>2017</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -265,6 +265,8 @@ ok <p>Writes data with the standard syntax. This is used to output Erlang terms. Atoms are printed within quotes if they contain embedded non-printable characters. + Atom characters > 255 are escaped unless the + Unicode translation modifier (<c>t</c>) is used. Floats are printed accurately as the shortest, correctly rounded string.</p> </item> diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 5ae400da62..bc1d77ac83 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -356,7 +356,8 @@ <func> <name name="write" arity="1"/> - <name name="write" arity="2"/> + <name name="write" arity="2" clause_i="1"/> + <name name="write" arity="2" clause_i="2"/> <fsummary>Write a term.</fsummary> <desc> <p>Returns a character list that represents <c><anno>Term</anno></c>. diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 78b7a0e751..7c40058dd8 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3883,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) -> extract_sequence(5, [$p|Fmt], Need); extract_sequence(4, [$t, $P | Fmt], Need) -> extract_sequence(5, [$P|Fmt], Need); +extract_sequence(4, [$t, $w | Fmt], Need) -> + extract_sequence(5, [$w|Fmt], Need); +extract_sequence(4, [$t, $W | Fmt], Need) -> + extract_sequence(5, [$W|Fmt], Need); extract_sequence(4, [$t, C | _Fmt], _Need) -> {error,"invalid control ~t" ++ [C]}; extract_sequence(4, [$l, $p | Fmt], Need) -> diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index 28e5007e5a..5ed2f4d888 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -268,47 +268,61 @@ write(Term, D, false) -> -spec write(Term, Depth) -> chars() when Term :: term(), + Depth :: depth(); + (Term, Options) -> chars() when + Term :: term(), + Options :: [Option], + Option :: {'depth', Depth} + | {'encoding', 'latin1' | 'utf8' | 'unicode'}, Depth :: depth(). -write(_Term, 0) -> "..."; -write(Term, _D) when is_integer(Term) -> integer_to_list(Term); -write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term); -write(Atom, _D) when is_atom(Atom) -> write_atom(Atom); -write(Term, _D) when is_port(Term) -> write_port(Term); -write(Term, _D) when is_pid(Term) -> pid_to_list(Term); -write(Term, _D) when is_reference(Term) -> write_ref(Term); -write(<<_/bitstring>>=Term, D) -> write_binary(Term, D); -write([], _D) -> "[]"; -write({}, _D) -> "{}"; -write([H|T], D) -> +write(Term, Options) when is_list(Options) -> + Depth = get_option(depth, Options, -1), + Encoding = get_option(encoding, Options, epp:default_encoding()), + write1(Term, Depth, Encoding); +write(Term, Depth) -> + write1(Term, Depth, latin1). + +write1(_Term, 0, _E) -> "..."; +write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term); +write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term); +write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom); +write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom); +write1(Term, _D, _E) when is_port(Term) -> write_port(Term); +write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term); +write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term); +write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D); +write1([], _D, _E) -> "[]"; +write1({}, _D, _E) -> "{}"; +write1([H|T], D, E) -> if D =:= 1 -> "[...]"; true -> - [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]] + [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]] end; -write(F, _D) when is_function(F) -> +write1(F, _D, _E) when is_function(F) -> erlang:fun_to_list(F); -write(Term, D) when is_map(Term) -> - write_map(Term, D); -write(T, D) when is_tuple(T) -> +write1(Term, D, E) when is_map(Term) -> + write_map(Term, D, E); +write1(T, D, E) when is_tuple(T) -> if D =:= 1 -> "{...}"; true -> [${, - [write(element(1, T), D-1)| - write_tail(tl(tuple_to_list(T)), D-1, $,)], + [write1(element(1, T), D-1, E)| + write_tail(tl(tuple_to_list(T)), D-1, E, $,)], $}] end. %% write_tail(List, Depth, CharacterBeforeDots) %% Test the terminating case first as this looks better with depth. -write_tail([], _D, _S) -> ""; -write_tail(_, 1, S) -> [S | "..."]; -write_tail([H|T], D, S) -> - [$,,write(H, D-1)|write_tail(T, D-1, S)]; -write_tail(Other, D, S) -> - [S,write(Other, D-1)]. +write_tail([], _D, _E, _S) -> ""; +write_tail(_, 1, _E, S) -> [S | "..."]; +write_tail([H|T], D, E, S) -> + [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)]; +write_tail(Other, D, E, S) -> + [S,write1(Other, D-1, E)]. write_port(Port) -> erlang:port_to_list(Port). @@ -316,17 +330,17 @@ write_port(Port) -> write_ref(Ref) -> erlang:ref_to_list(Ref). -write_map(Map, D) when is_integer(D) -> - [$#,${,write_map_body(maps:to_list(Map), D),$}]. +write_map(Map, D, E) when is_integer(D) -> + [$#,${,write_map_body(maps:to_list(Map), D, E),$}]. -write_map_body(_, 0) -> "..."; -write_map_body([],_) -> []; -write_map_body([{K,V}],D) -> write_map_assoc(K,V,D); -write_map_body([{K,V}|KVs], D) -> - [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)]. +write_map_body(_, 0, _E) -> "..."; +write_map_body([], _, _E) -> []; +write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E); +write_map_body([{K,V}|KVs], D, E) -> + [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)]. -write_map_assoc(K,V,D) -> - [write(K,D - 1),"=>",write(V,D-1)]. +write_map_assoc(K, V, D, E) -> + [write1(K, D - 1, E),"=>",write1(V, D-1, E)]. write_binary(B, D) when is_integer(D) -> [$<,$<,write_binary_body(B, D),$>,$>]. @@ -344,6 +358,13 @@ write_binary_body(B, _D) -> <<X:L>> = B, [integer_to_list(X),$:,integer_to_list(L)]. +get_option(Key, TupleList, Default) -> + case lists:keyfind(Key, 1, TupleList) of + false -> Default; + {Key, Value} -> Value; + _ -> Default + end. + %%% There are two functions to write Unicode atoms: %%% - they both escape control characters < 160; %%% - write_atom() never escapes characters >= 160; diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 3113767614..14d925bacf 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -257,12 +257,12 @@ indentation([], I) -> I. %% This is the main dispatch function for the various formatting commands. %% Field widths and precisions have already been calculated. -control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) -> - term(io_lib:write(A, -1), F, Adj, P, Pad); +control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) -> + term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad); control($p, [A], F, Adj, P, Pad, Enc, Str, I) -> print(A, -1, F, Adj, P, Pad, Enc, Str, I); -control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) -> - term(io_lib:write(A, Depth), F, Adj, P, Pad); +control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) -> + term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad); control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) -> print(A, Depth, F, Adj, P, Pad, Enc, Str, I); control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) -> diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index b2754e47ba..fb62e3aa3c 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2348,4 +2348,19 @@ otp_14285(_Config) -> L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])), not is_latin1(S)], L1 = lists:seq(256, 512), + + latin1_fmt("~w", ['кирилли́ческий атом']), + latin1_fmt("~w", ['\x{10FFFF}']), + "'кирилли́ческий атом'" = fmt("~tw", ['кирилли́ческий атом']), + [$',16#10FFFF,$'] = fmt("~tw", ['\x{10FFFF}']), + + latin1_fmt("~W", ['кирилли́ческий атом', 13]), + latin1_fmt("~W", ['\x{10FFFF}', 13]), + "'кирилли́ческий атом'" = fmt("~tW", ['кирилли́ческий атом', 13]), + [$',16#10FFFF,$'] = fmt("~tW", ['\x{10FFFF}', 13]), + ok. + +latin1_fmt(Fmt, Args) -> + L = fmt(Fmt, Args), + true = lists:all(fun is_latin1/1, L). |