aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-03-22 09:41:57 +0100
committerHans Bolinder <[email protected]>2017-04-07 08:57:26 +0200
commit92c79e394041f76d8f676cafe9b6af44522497bd (patch)
tree7c89b19a9997346aea81cd387dcc370ca02ce08f
parent97d1f21f53fabd27e31fe9aa46ffd9f0c00bcbec (diff)
downloadotp-92c79e394041f76d8f676cafe9b6af44522497bd.tar.gz
otp-92c79e394041f76d8f676cafe9b6af44522497bd.tar.bz2
otp-92c79e394041f76d8f676cafe9b6af44522497bd.zip
stdlib: Add function to io_lib to handle Unicode atoms
-rw-r--r--lib/stdlib/doc/src/io_lib.xml14
-rw-r--r--lib/stdlib/src/io_lib.erl24
-rw-r--r--lib/stdlib/src/io_lib_pretty.erl14
-rw-r--r--lib/stdlib/test/io_SUITE.erl29
4 files changed, 70 insertions, 11 deletions
diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml
index 931e50f6f2..5ae400da62 100644
--- a/lib/stdlib/doc/src/io_lib.xml
+++ b/lib/stdlib/doc/src/io_lib.xml
@@ -4,7 +4,7 @@
<erlref>
<header>
<copyright>
- <year>1996</year><year>2016</year>
+ <year>1996</year><year>2017</year>
<holder>Ericsson AB. All Rights Reserved.</holder>
</copyright>
<legalnotice>
@@ -147,7 +147,7 @@
format string (that is, <c>~ts</c> or <c>~tc</c>), the resulting list
can contain characters beyond the ISO Latin-1 character range
(that is, numbers &gt; 255). If so, the
- result is not an ordinary Erlang <c>string()</c>, but can well be
+ result is still an ordinary Erlang <c>string()</c>, and can well be
used in any context where Unicode data is allowed.</p>
</desc>
</func>
@@ -384,6 +384,16 @@
</func>
<func>
+ <name name="write_atom_as_latin1" arity="1"/>
+ <fsummary>Write an atom.</fsummary>
+ <desc>
+ <p>Returns the list of characters needed to print atom
+ <c><anno>Atom</anno></c>. Non-Latin-1 characters
+ are escaped.</p>
+ </desc>
+ </func>
+
+ <func>
<name name="write_char" arity="1"/>
<fsummary>Write a character.</fsummary>
<desc>
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index a91143a764..28e5007e5a 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -68,8 +68,8 @@
-export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1,
write_latin1_string/2, write_char/1, write_latin1_char/1]).
--export([write_string_as_latin1/1, write_string_as_latin1/2,
- write_char_as_latin1/1]).
+-export([write_atom_as_latin1/1, write_string_as_latin1/1,
+ write_string_as_latin1/2, write_char_as_latin1/1]).
-export([quote_atom/2, char_list/1, latin1_char_list/1,
deep_char_list/1, deep_latin1_char_list/1,
@@ -344,6 +344,11 @@ write_binary_body(B, _D) ->
<<X:L>> = B,
[integer_to_list(X),$:,integer_to_list(L)].
+%%% There are two functions to write Unicode atoms:
+%%% - they both escape control characters < 160;
+%%% - write_atom() never escapes characters >= 160;
+%%% - write_atom_as_latin1() also escapes characters >= 255.
+
%% write_atom(Atom) -> [Char]
%% Generate the list of characters needed to print an atom.
@@ -351,17 +356,26 @@ write_binary_body(B, _D) ->
Atom :: atom().
write_atom(Atom) ->
+ write_possibly_quoted_atom(Atom, fun write_string/2).
+
+-spec write_atom_as_latin1(Atom) -> latin1_string() when
+ Atom :: atom().
+
+write_atom_as_latin1(Atom) ->
+ write_possibly_quoted_atom(Atom, fun write_string_as_latin1/2).
+
+write_possibly_quoted_atom(Atom, PFun) ->
Chars = atom_to_list(Atom),
case quote_atom(Atom, Chars) of
true ->
- write_string(Chars, $'); %'
+ PFun(Chars, $'); %'
false ->
Chars
end.
%% quote_atom(Atom, CharList)
%% Return 'true' if atom with chars in CharList needs to be quoted, else
-%% return 'false'.
+%% return 'false'. Notice that characters >= 160 are always quoted.
-spec quote_atom(atom(), chars()) -> boolean().
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index aabccfc5d9..ff368d02da 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -105,6 +105,8 @@ print(_, _, _, 0, _M, _RF, _Enc, _Str) -> "...";
print(Term, Col, Ll, D, M, RecDefFun, Enc, Str) when Col =< 0 ->
%% ensure Col is at least 1
print(Term, 1, Ll, D, M, RecDefFun, Enc, Str);
+print(Atom, _Col, _Ll, _D, _M, _RF, Enc, _Str) when is_atom(Atom) ->
+ write_atom(Atom, Enc);
print(Term, Col, Ll, D, M0, RecDefFun, Enc, Str) when is_tuple(Term);
is_list(Term);
is_map(Term);
@@ -407,6 +409,9 @@ print_length({}, _D, _RF, _Enc, _Str) ->
{"{}", 2};
print_length(#{}=M, _D, _RF, _Enc, _Str) when map_size(M) =:= 0 ->
{"#{}", 3};
+print_length(Atom, _D, _RF, Enc, _Str) when is_atom(Atom) ->
+ S = write_atom(Atom, Enc),
+ {S, lists:flatlength(S)};
print_length(List, D, RF, Enc, Str) when is_list(List) ->
%% only flat lists are "printable"
case Str andalso printable_list(List, D, Enc) of
@@ -500,7 +505,7 @@ print_length_tuple(Tuple, D, RF, Enc, Str) ->
print_length_record(_Tuple, 1, _RF, _RDefs, _Enc, _Str) ->
{"{...}", 5};
print_length_record(Tuple, D, RF, RDefs, Enc, Str) ->
- Name = [$# | io_lib:write_atom(element(1, Tuple))],
+ Name = [$# | write_atom(element(1, Tuple), Enc)],
NameL = length(Name),
Elements = tl(tuple_to_list(Tuple)),
L = print_length_fields(RDefs, D - 1, Elements, RF, Enc, Str),
@@ -515,7 +520,7 @@ print_length_fields([Def | Defs], D, [E | Es], RF, Enc, Str) ->
print_length_fields(Defs, D - 1, Es, RF, Enc, Str)].
print_length_field(Def, D, E, RF, Enc, Str) ->
- Name = io_lib:write_atom(Def),
+ Name = write_atom(Def, Enc),
{S, L} = print_length(E, D, RF, Enc, Str),
NameL = length(Name) + 3,
{{field, Name, NameL, {S, L}}, NameL + L}.
@@ -664,6 +669,11 @@ printable_char(C,unicode) ->
C > 16#DFFF andalso C < 16#FFFE orelse
C > 16#FFFF andalso C =< 16#10FFFF.
+write_atom(A, latin1) ->
+ io_lib:write_atom_as_latin1(A);
+write_atom(A, _Uni) ->
+ io_lib:write_atom(A).
+
write_string(S, latin1) ->
io_lib:write_latin1_string(S, $"); %"
write_string(S, _Uni) ->
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index d546e8fad2..b2754e47ba 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -30,7 +30,8 @@
io_lib_print_binary_depth_one/1, otp_10302/1, otp_10755/1,
otp_10836/1, io_lib_width_too_small/1,
io_with_huge_message_queue/1, format_string/1,
- maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1]).
+ maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1,
+ otp_14285/1]).
-export([pretty/2]).
@@ -61,7 +62,8 @@ all() ->
printable_range, bad_printable_range,
io_lib_print_binary_depth_one, otp_10302, otp_10755, otp_10836,
io_lib_width_too_small, io_with_huge_message_queue,
- format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175].
+ format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175,
+ otp_14285].
%% Error cases for output.
error_1(Config) when is_list(Config) ->
@@ -755,6 +757,8 @@ rfd(rrrrr, 3) ->
[f1, f2, f3];
rfd(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, 0) ->
[];
+rfd('\x{400}', 1) ->
+ ['\x{400}'];
rfd(_, _) ->
no.
@@ -1881,6 +1885,7 @@ otp_10302(Suite) when is_list(Suite) ->
pretty(Term, Depth) when is_integer(Depth) ->
Opts = [{column, 1}, {line_length, 20},
{depth, Depth}, {max_chars, 60},
+ {record_print_fun, fun rfd/2},
{encoding, unicode}],
pretty(Term, Opts);
pretty(Term, Opts) when is_list(Opts) ->
@@ -2324,3 +2329,23 @@ text1([T|Ts]) ->
[erl_anno:text(Anno) | text1(Ts)].
-endif. % EXACT
+
+otp_14285(_Config) ->
+ UOpts = [{record_print_fun, fun rfd/2},
+ {encoding, unicode}],
+ LOpts = [{record_print_fun, fun rfd/2},
+ {encoding, latin1}],
+
+ RT = {'\x{400}','\x{400}'},
+ "#'\x{400}'{'\x{400}' = '\x{400}'}" = pretty(RT, UOpts),
+ "#'\\x{400}'{'\\x{400}' = '\\x{400}'}" = pretty(RT, LOpts),
+
+ Chars = lists:seq(0, 512),
+ [] = [C ||
+ C <- Chars,
+ S <- io_lib:write_atom_as_latin1(list_to_atom([C])),
+ not is_latin1(S)],
+ L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])),
+ not is_latin1(S)],
+ L1 = lists:seq(256, 512),
+ ok.