From ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 19 Apr 2017 16:10:15 +0200 Subject: stdlib: Add Unicode modifier t to control sequences w and W As of the introduction of Unicode characters in atoms, the control sequences 'w' and 'W' can return non-Latin-1 characters, unless some measure is taken. This commit makes sure that '~w' and '~W' always return Latin-1 characters, or bytes, which can be output to ports or written to raw files. The Unicode translation modifier 't' is needed to return non-Latin-1 characters. --- lib/stdlib/doc/src/io.xml | 4 +- lib/stdlib/doc/src/io_lib.xml | 3 +- lib/stdlib/src/erl_lint.erl | 4 ++ lib/stdlib/src/io_lib.erl | 87 +++++++++++++++++++++++++--------------- lib/stdlib/src/io_lib_format.erl | 10 ++--- lib/stdlib/test/io_SUITE.erl | 15 +++++++ 6 files changed, 83 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml index 11a64c7f8a..74b57457ea 100644 --- a/lib/stdlib/doc/src/io.xml +++ b/lib/stdlib/doc/src/io.xml @@ -4,7 +4,7 @@
- 19962016 + 19962017 Ericsson AB. All Rights Reserved. @@ -265,6 +265,8 @@ ok

Writes data with the standard syntax. This is used to output Erlang terms. Atoms are printed within quotes if they contain embedded non-printable characters. + Atom characters > 255 are escaped unless the + Unicode translation modifier (t) is used. Floats are printed accurately as the shortest, correctly rounded string.

diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 5ae400da62..bc1d77ac83 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -356,7 +356,8 @@ - + + Write a term.

Returns a character list that represents Term. diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl index 78b7a0e751..7c40058dd8 100644 --- a/lib/stdlib/src/erl_lint.erl +++ b/lib/stdlib/src/erl_lint.erl @@ -3883,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) -> extract_sequence(5, [$p|Fmt], Need); extract_sequence(4, [$t, $P | Fmt], Need) -> extract_sequence(5, [$P|Fmt], Need); +extract_sequence(4, [$t, $w | Fmt], Need) -> + extract_sequence(5, [$w|Fmt], Need); +extract_sequence(4, [$t, $W | Fmt], Need) -> + extract_sequence(5, [$W|Fmt], Need); extract_sequence(4, [$t, C | _Fmt], _Need) -> {error,"invalid control ~t" ++ [C]}; extract_sequence(4, [$l, $p | Fmt], Need) -> diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index 28e5007e5a..5ed2f4d888 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -268,47 +268,61 @@ write(Term, D, false) -> -spec write(Term, Depth) -> chars() when Term :: term(), + Depth :: depth(); + (Term, Options) -> chars() when + Term :: term(), + Options :: [Option], + Option :: {'depth', Depth} + | {'encoding', 'latin1' | 'utf8' | 'unicode'}, Depth :: depth(). -write(_Term, 0) -> "..."; -write(Term, _D) when is_integer(Term) -> integer_to_list(Term); -write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term); -write(Atom, _D) when is_atom(Atom) -> write_atom(Atom); -write(Term, _D) when is_port(Term) -> write_port(Term); -write(Term, _D) when is_pid(Term) -> pid_to_list(Term); -write(Term, _D) when is_reference(Term) -> write_ref(Term); -write(<<_/bitstring>>=Term, D) -> write_binary(Term, D); -write([], _D) -> "[]"; -write({}, _D) -> "{}"; -write([H|T], D) -> +write(Term, Options) when is_list(Options) -> + Depth = get_option(depth, Options, -1), + Encoding = get_option(encoding, Options, epp:default_encoding()), + write1(Term, Depth, Encoding); +write(Term, Depth) -> + write1(Term, Depth, latin1). + +write1(_Term, 0, _E) -> "..."; +write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term); +write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term); +write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom); +write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom); +write1(Term, _D, _E) when is_port(Term) -> write_port(Term); +write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term); +write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term); +write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D); +write1([], _D, _E) -> "[]"; +write1({}, _D, _E) -> "{}"; +write1([H|T], D, E) -> if D =:= 1 -> "[...]"; true -> - [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]] + [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]] end; -write(F, _D) when is_function(F) -> +write1(F, _D, _E) when is_function(F) -> erlang:fun_to_list(F); -write(Term, D) when is_map(Term) -> - write_map(Term, D); -write(T, D) when is_tuple(T) -> +write1(Term, D, E) when is_map(Term) -> + write_map(Term, D, E); +write1(T, D, E) when is_tuple(T) -> if D =:= 1 -> "{...}"; true -> [${, - [write(element(1, T), D-1)| - write_tail(tl(tuple_to_list(T)), D-1, $,)], + [write1(element(1, T), D-1, E)| + write_tail(tl(tuple_to_list(T)), D-1, E, $,)], $}] end. %% write_tail(List, Depth, CharacterBeforeDots) %% Test the terminating case first as this looks better with depth. -write_tail([], _D, _S) -> ""; -write_tail(_, 1, S) -> [S | "..."]; -write_tail([H|T], D, S) -> - [$,,write(H, D-1)|write_tail(T, D-1, S)]; -write_tail(Other, D, S) -> - [S,write(Other, D-1)]. +write_tail([], _D, _E, _S) -> ""; +write_tail(_, 1, _E, S) -> [S | "..."]; +write_tail([H|T], D, E, S) -> + [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)]; +write_tail(Other, D, E, S) -> + [S,write1(Other, D-1, E)]. write_port(Port) -> erlang:port_to_list(Port). @@ -316,17 +330,17 @@ write_port(Port) -> write_ref(Ref) -> erlang:ref_to_list(Ref). -write_map(Map, D) when is_integer(D) -> - [$#,${,write_map_body(maps:to_list(Map), D),$}]. +write_map(Map, D, E) when is_integer(D) -> + [$#,${,write_map_body(maps:to_list(Map), D, E),$}]. -write_map_body(_, 0) -> "..."; -write_map_body([],_) -> []; -write_map_body([{K,V}],D) -> write_map_assoc(K,V,D); -write_map_body([{K,V}|KVs], D) -> - [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)]. +write_map_body(_, 0, _E) -> "..."; +write_map_body([], _, _E) -> []; +write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E); +write_map_body([{K,V}|KVs], D, E) -> + [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)]. -write_map_assoc(K,V,D) -> - [write(K,D - 1),"=>",write(V,D-1)]. +write_map_assoc(K, V, D, E) -> + [write1(K, D - 1, E),"=>",write1(V, D-1, E)]. write_binary(B, D) when is_integer(D) -> [$<,$<,write_binary_body(B, D),$>,$>]. @@ -344,6 +358,13 @@ write_binary_body(B, _D) -> <> = B, [integer_to_list(X),$:,integer_to_list(L)]. +get_option(Key, TupleList, Default) -> + case lists:keyfind(Key, 1, TupleList) of + false -> Default; + {Key, Value} -> Value; + _ -> Default + end. + %%% There are two functions to write Unicode atoms: %%% - they both escape control characters < 160; %%% - write_atom() never escapes characters >= 160; diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 3113767614..14d925bacf 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -257,12 +257,12 @@ indentation([], I) -> I. %% This is the main dispatch function for the various formatting commands. %% Field widths and precisions have already been calculated. -control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) -> - term(io_lib:write(A, -1), F, Adj, P, Pad); +control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) -> + term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad); control($p, [A], F, Adj, P, Pad, Enc, Str, I) -> print(A, -1, F, Adj, P, Pad, Enc, Str, I); -control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) -> - term(io_lib:write(A, Depth), F, Adj, P, Pad); +control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) -> + term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad); control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) -> print(A, Depth, F, Adj, P, Pad, Enc, Str, I); control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) -> diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index b2754e47ba..fb62e3aa3c 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2348,4 +2348,19 @@ otp_14285(_Config) -> L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])), not is_latin1(S)], L1 = lists:seq(256, 512), + + latin1_fmt("~w", ['кирилли́ческий атом']), + latin1_fmt("~w", ['\x{10FFFF}']), + "'кирилли́ческий атом'" = fmt("~tw", ['кирилли́ческий атом']), + [$',16#10FFFF,$'] = fmt("~tw", ['\x{10FFFF}']), + + latin1_fmt("~W", ['кирилли́ческий атом', 13]), + latin1_fmt("~W", ['\x{10FFFF}', 13]), + "'кирилли́ческий атом'" = fmt("~tW", ['кирилли́ческий атом', 13]), + [$',16#10FFFF,$'] = fmt("~tW", ['\x{10FFFF}', 13]), + ok. + +latin1_fmt(Fmt, Args) -> + L = fmt(Fmt, Args), + true = lists:all(fun is_latin1/1, L). -- cgit v1.2.3 From 4567b6afc41a5d18384916c171ae413112ee57cc Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 24 Apr 2017 14:46:57 +0200 Subject: stdlib: Add Unicode modifier t to control sequence a The Unicode translation modifier 't' is added to the io:fread/2,3 control sequence 'a'. --- lib/stdlib/doc/src/io.xml | 2 -- lib/stdlib/src/io_lib_fread.erl | 11 +++++------ lib/stdlib/test/io_SUITE.erl | 9 +++++++++ 3 files changed, 14 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/io.xml b/lib/stdlib/doc/src/io.xml index 74b57457ea..64fcf4379f 100644 --- a/lib/stdlib/doc/src/io.xml +++ b/lib/stdlib/doc/src/io.xml @@ -569,8 +569,6 @@ Prompt> <Characters beyond latin1 range not printable in this medium&g

Similar to s, but the resulting string is converted into an atom.

-

The Unicode translation modifier is not allowed (atoms - cannot contain characters beyond the latin1 range).

c diff --git a/lib/stdlib/src/io_lib_fread.erl b/lib/stdlib/src/io_lib_fread.erl index 6a8f8f728e..983e8d4566 100644 --- a/lib/stdlib/src/io_lib_fread.erl +++ b/lib/stdlib/src/io_lib_fread.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -159,8 +159,8 @@ fread_field([$t|Format], F, Sup, _Unic) -> fread_field(Format, F, Sup, Unic) -> {Format,F,Sup,Unic}. -%% fread1(Format, FieldWidth, Suppress, Line, N, Results, AllFormat) -%% fread1(Format, FieldWidth, Suppress, Line, N, Results) +%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results, AllFormat) +%% fread1(Format, FieldWidth, Suppress, Unicode, Line, N, Results) %% The main dispatch function for the formatting commands. Done in two %% stages so format commands that need no input can always be processed. @@ -231,9 +231,8 @@ fread1([$s|Format], none, Sup, U, Line0, N0, Res) -> fread1([$s|Format], F, Sup, U, Line0, N, Res) -> {Line,Cs} = fread_chars(Line0, F, U), fread_string(Cs, Sup, U, Format, Line, N+F, Res); -%% XXX:PaN Atoms still only latin1... -fread1([$a|Format], none, Sup, false, Line0, N0, Res) -> - {Line,N,Cs} = fread_string_cs(Line0, N0, false), +fread1([$a|Format], none, Sup, U, Line0, N0, Res) -> + {Line,N,Cs} = fread_string_cs(Line0, N0, U), fread_atom(Cs, Sup, Format, Line, N, Res); fread1([$a|Format], F, Sup, false, Line0, N, Res) -> {Line,Cs} = fread_chars(Line0, F, false), diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index fb62e3aa3c..ef3f0be5d7 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2359,6 +2359,15 @@ otp_14285(_Config) -> "'кирилли́ческий атом'" = fmt("~tW", ['кирилли́ческий атом', 13]), [$',16#10FFFF,$'] = fmt("~tW", ['\x{10FFFF}', 13]), + {ok, [an_atom],[]} = io_lib:fread("~a", "an_atom"), + {ok, [an_atom],[]} = io_lib:fread("~ta", "an_atom"), + Str = "\"ab" ++ [1089] ++ "cd\"", + {ok, ["\"ab"], [1089]++"cd\""} = io_lib:fread("~s", Str), + {ok, ['\"ab'], [1089]++"cd\""} = io_lib:fread("~a", Str), + {ok,[Str], []} = io_lib:fread("~ts", Str), + {ok,[Atom],[]} = io_lib:fread("~ta", Str), + Str = atom_to_list(Atom), + ok. latin1_fmt(Fmt, Args) -> -- cgit v1.2.3