From 92c79e394041f76d8f676cafe9b6af44522497bd Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 22 Mar 2017 09:41:57 +0100 Subject: stdlib: Add function to io_lib to handle Unicode atoms --- lib/stdlib/doc/src/io_lib.xml | 14 ++++++++++++-- lib/stdlib/src/io_lib.erl | 24 +++++++++++++++++++----- lib/stdlib/src/io_lib_pretty.erl | 14 ++++++++++++-- lib/stdlib/test/io_SUITE.erl | 29 +++++++++++++++++++++++++++-- 4 files changed, 70 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 931e50f6f2..5ae400da62 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -4,7 +4,7 @@
- 19962016 + 19962017 Ericsson AB. All Rights Reserved. @@ -147,7 +147,7 @@ format string (that is, ~ts or ~tc), the resulting list can contain characters beyond the ISO Latin-1 character range (that is, numbers > 255). If so, the - result is not an ordinary Erlang string(), but can well be + result is still an ordinary Erlang string(), and can well be used in any context where Unicode data is allowed.

@@ -383,6 +383,16 @@ + + + Write an atom. + +

Returns the list of characters needed to print atom + Atom. Non-Latin-1 characters + are escaped.

+
+
+ Write a character. diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index a91143a764..28e5007e5a 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2016. All Rights Reserved. +%% Copyright Ericsson AB 1996-2017. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -68,8 +68,8 @@ -export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1, write_latin1_string/2, write_char/1, write_latin1_char/1]). --export([write_string_as_latin1/1, write_string_as_latin1/2, - write_char_as_latin1/1]). +-export([write_atom_as_latin1/1, write_string_as_latin1/1, + write_string_as_latin1/2, write_char_as_latin1/1]). -export([quote_atom/2, char_list/1, latin1_char_list/1, deep_char_list/1, deep_latin1_char_list/1, @@ -344,6 +344,11 @@ write_binary_body(B, _D) -> <> = B, [integer_to_list(X),$:,integer_to_list(L)]. +%%% There are two functions to write Unicode atoms: +%%% - they both escape control characters < 160; +%%% - write_atom() never escapes characters >= 160; +%%% - write_atom_as_latin1() also escapes characters >= 255. + %% write_atom(Atom) -> [Char] %% Generate the list of characters needed to print an atom. @@ -351,17 +356,26 @@ write_binary_body(B, _D) -> Atom :: atom(). write_atom(Atom) -> + write_possibly_quoted_atom(Atom, fun write_string/2). + +-spec write_atom_as_latin1(Atom) -> latin1_string() when + Atom :: atom(). + +write_atom_as_latin1(Atom) -> + write_possibly_quoted_atom(Atom, fun write_string_as_latin1/2). + +write_possibly_quoted_atom(Atom, PFun) -> Chars = atom_to_list(Atom), case quote_atom(Atom, Chars) of true -> - write_string(Chars, $'); %' + PFun(Chars, $'); %' false -> Chars end. %% quote_atom(Atom, CharList) %% Return 'true' if atom with chars in CharList needs to be quoted, else -%% return 'false'. +%% return 'false'. Notice that characters >= 160 are always quoted. -spec quote_atom(atom(), chars()) -> boolean(). diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index aabccfc5d9..ff368d02da 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -105,6 +105,8 @@ print(_, _, _, 0, _M, _RF, _Enc, _Str) -> "..."; print(Term, Col, Ll, D, M, RecDefFun, Enc, Str) when Col =< 0 -> %% ensure Col is at least 1 print(Term, 1, Ll, D, M, RecDefFun, Enc, Str); +print(Atom, _Col, _Ll, _D, _M, _RF, Enc, _Str) when is_atom(Atom) -> + write_atom(Atom, Enc); print(Term, Col, Ll, D, M0, RecDefFun, Enc, Str) when is_tuple(Term); is_list(Term); is_map(Term); @@ -407,6 +409,9 @@ print_length({}, _D, _RF, _Enc, _Str) -> {"{}", 2}; print_length(#{}=M, _D, _RF, _Enc, _Str) when map_size(M) =:= 0 -> {"#{}", 3}; +print_length(Atom, _D, _RF, Enc, _Str) when is_atom(Atom) -> + S = write_atom(Atom, Enc), + {S, lists:flatlength(S)}; print_length(List, D, RF, Enc, Str) when is_list(List) -> %% only flat lists are "printable" case Str andalso printable_list(List, D, Enc) of @@ -500,7 +505,7 @@ print_length_tuple(Tuple, D, RF, Enc, Str) -> print_length_record(_Tuple, 1, _RF, _RDefs, _Enc, _Str) -> {"{...}", 5}; print_length_record(Tuple, D, RF, RDefs, Enc, Str) -> - Name = [$# | io_lib:write_atom(element(1, Tuple))], + Name = [$# | write_atom(element(1, Tuple), Enc)], NameL = length(Name), Elements = tl(tuple_to_list(Tuple)), L = print_length_fields(RDefs, D - 1, Elements, RF, Enc, Str), @@ -515,7 +520,7 @@ print_length_fields([Def | Defs], D, [E | Es], RF, Enc, Str) -> print_length_fields(Defs, D - 1, Es, RF, Enc, Str)]. print_length_field(Def, D, E, RF, Enc, Str) -> - Name = io_lib:write_atom(Def), + Name = write_atom(Def, Enc), {S, L} = print_length(E, D, RF, Enc, Str), NameL = length(Name) + 3, {{field, Name, NameL, {S, L}}, NameL + L}. @@ -664,6 +669,11 @@ printable_char(C,unicode) -> C > 16#DFFF andalso C < 16#FFFE orelse C > 16#FFFF andalso C =< 16#10FFFF. +write_atom(A, latin1) -> + io_lib:write_atom_as_latin1(A); +write_atom(A, _Uni) -> + io_lib:write_atom(A). + write_string(S, latin1) -> io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index d546e8fad2..b2754e47ba 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -30,7 +30,8 @@ io_lib_print_binary_depth_one/1, otp_10302/1, otp_10755/1, otp_10836/1, io_lib_width_too_small/1, io_with_huge_message_queue/1, format_string/1, - maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1]). + maps/1, coverage/1, otp_14178_unicode_atoms/1, otp_14175/1, + otp_14285/1]). -export([pretty/2]). @@ -61,7 +62,8 @@ all() -> printable_range, bad_printable_range, io_lib_print_binary_depth_one, otp_10302, otp_10755, otp_10836, io_lib_width_too_small, io_with_huge_message_queue, - format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175]. + format_string, maps, coverage, otp_14178_unicode_atoms, otp_14175, + otp_14285]. %% Error cases for output. error_1(Config) when is_list(Config) -> @@ -755,6 +757,8 @@ rfd(rrrrr, 3) -> [f1, f2, f3]; rfd(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, 0) -> []; +rfd('\x{400}', 1) -> + ['\x{400}']; rfd(_, _) -> no. @@ -1881,6 +1885,7 @@ otp_10302(Suite) when is_list(Suite) -> pretty(Term, Depth) when is_integer(Depth) -> Opts = [{column, 1}, {line_length, 20}, {depth, Depth}, {max_chars, 60}, + {record_print_fun, fun rfd/2}, {encoding, unicode}], pretty(Term, Opts); pretty(Term, Opts) when is_list(Opts) -> @@ -2324,3 +2329,23 @@ text1([T|Ts]) -> [erl_anno:text(Anno) | text1(Ts)]. -endif. % EXACT + +otp_14285(_Config) -> + UOpts = [{record_print_fun, fun rfd/2}, + {encoding, unicode}], + LOpts = [{record_print_fun, fun rfd/2}, + {encoding, latin1}], + + RT = {'\x{400}','\x{400}'}, + "#'\x{400}'{'\x{400}' = '\x{400}'}" = pretty(RT, UOpts), + "#'\\x{400}'{'\\x{400}' = '\\x{400}'}" = pretty(RT, LOpts), + + Chars = lists:seq(0, 512), + [] = [C || + C <- Chars, + S <- io_lib:write_atom_as_latin1(list_to_atom([C])), + not is_latin1(S)], + L1 = [S || C <- Chars, S <- io_lib:write_atom(list_to_atom([C])), + not is_latin1(S)], + L1 = lists:seq(256, 512), + ok. -- cgit v1.2.3