aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2017-04-19 16:10:15 +0200
committerHans Bolinder <[email protected]>2017-04-24 14:51:03 +0200
commitef0dbc4f2a43d629d086c3e2b9a762bbc00d034b (patch)
treede3dd7b8d268f0819453920f8b6e8efb7212477e /lib/stdlib/src
parent739bca3fc267c55d84c8f5c193d16c0b2a7eee13 (diff)
downloadotp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.gz
otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.tar.bz2
otp-ef0dbc4f2a43d629d086c3e2b9a762bbc00d034b.zip
stdlib: Add Unicode modifier t to control sequences w and W
As of the introduction of Unicode characters in atoms, the control sequences 'w' and 'W' can return non-Latin-1 characters, unless some measure is taken. This commit makes sure that '~w' and '~W' always return Latin-1 characters, or bytes, which can be output to ports or written to raw files. The Unicode translation modifier 't' is needed to return non-Latin-1 characters.
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/erl_lint.erl4
-rw-r--r--lib/stdlib/src/io_lib.erl87
-rw-r--r--lib/stdlib/src/io_lib_format.erl10
3 files changed, 63 insertions, 38 deletions
diff --git a/lib/stdlib/src/erl_lint.erl b/lib/stdlib/src/erl_lint.erl
index 78b7a0e751..7c40058dd8 100644
--- a/lib/stdlib/src/erl_lint.erl
+++ b/lib/stdlib/src/erl_lint.erl
@@ -3883,6 +3883,10 @@ extract_sequence(4, [$t, $p | Fmt], Need) ->
extract_sequence(5, [$p|Fmt], Need);
extract_sequence(4, [$t, $P | Fmt], Need) ->
extract_sequence(5, [$P|Fmt], Need);
+extract_sequence(4, [$t, $w | Fmt], Need) ->
+ extract_sequence(5, [$w|Fmt], Need);
+extract_sequence(4, [$t, $W | Fmt], Need) ->
+ extract_sequence(5, [$W|Fmt], Need);
extract_sequence(4, [$t, C | _Fmt], _Need) ->
{error,"invalid control ~t" ++ [C]};
extract_sequence(4, [$l, $p | Fmt], Need) ->
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 28e5007e5a..5ed2f4d888 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -268,47 +268,61 @@ write(Term, D, false) ->
-spec write(Term, Depth) -> chars() when
Term :: term(),
+ Depth :: depth();
+ (Term, Options) -> chars() when
+ Term :: term(),
+ Options :: [Option],
+ Option :: {'depth', Depth}
+ | {'encoding', 'latin1' | 'utf8' | 'unicode'},
Depth :: depth().
-write(_Term, 0) -> "...";
-write(Term, _D) when is_integer(Term) -> integer_to_list(Term);
-write(Term, _D) when is_float(Term) -> io_lib_format:fwrite_g(Term);
-write(Atom, _D) when is_atom(Atom) -> write_atom(Atom);
-write(Term, _D) when is_port(Term) -> write_port(Term);
-write(Term, _D) when is_pid(Term) -> pid_to_list(Term);
-write(Term, _D) when is_reference(Term) -> write_ref(Term);
-write(<<_/bitstring>>=Term, D) -> write_binary(Term, D);
-write([], _D) -> "[]";
-write({}, _D) -> "{}";
-write([H|T], D) ->
+write(Term, Options) when is_list(Options) ->
+ Depth = get_option(depth, Options, -1),
+ Encoding = get_option(encoding, Options, epp:default_encoding()),
+ write1(Term, Depth, Encoding);
+write(Term, Depth) ->
+ write1(Term, Depth, latin1).
+
+write1(_Term, 0, _E) -> "...";
+write1(Term, _D, _E) when is_integer(Term) -> integer_to_list(Term);
+write1(Term, _D, _E) when is_float(Term) -> io_lib_format:fwrite_g(Term);
+write1(Atom, _D, latin1) when is_atom(Atom) -> write_atom_as_latin1(Atom);
+write1(Atom, _D, _E) when is_atom(Atom) -> write_atom(Atom);
+write1(Term, _D, _E) when is_port(Term) -> write_port(Term);
+write1(Term, _D, _E) when is_pid(Term) -> pid_to_list(Term);
+write1(Term, _D, _E) when is_reference(Term) -> write_ref(Term);
+write1(<<_/bitstring>>=Term, D, _E) -> write_binary(Term, D);
+write1([], _D, _E) -> "[]";
+write1({}, _D, _E) -> "{}";
+write1([H|T], D, E) ->
if
D =:= 1 -> "[...]";
true ->
- [$[,[write(H, D-1)|write_tail(T, D-1, $|)],$]]
+ [$[,[write1(H, D-1, E)|write_tail(T, D-1, E, $|)],$]]
end;
-write(F, _D) when is_function(F) ->
+write1(F, _D, _E) when is_function(F) ->
erlang:fun_to_list(F);
-write(Term, D) when is_map(Term) ->
- write_map(Term, D);
-write(T, D) when is_tuple(T) ->
+write1(Term, D, E) when is_map(Term) ->
+ write_map(Term, D, E);
+write1(T, D, E) when is_tuple(T) ->
if
D =:= 1 -> "{...}";
true ->
[${,
- [write(element(1, T), D-1)|
- write_tail(tl(tuple_to_list(T)), D-1, $,)],
+ [write1(element(1, T), D-1, E)|
+ write_tail(tl(tuple_to_list(T)), D-1, E, $,)],
$}]
end.
%% write_tail(List, Depth, CharacterBeforeDots)
%% Test the terminating case first as this looks better with depth.
-write_tail([], _D, _S) -> "";
-write_tail(_, 1, S) -> [S | "..."];
-write_tail([H|T], D, S) ->
- [$,,write(H, D-1)|write_tail(T, D-1, S)];
-write_tail(Other, D, S) ->
- [S,write(Other, D-1)].
+write_tail([], _D, _E, _S) -> "";
+write_tail(_, 1, _E, S) -> [S | "..."];
+write_tail([H|T], D, E, S) ->
+ [$,,write1(H, D-1, E)|write_tail(T, D-1, E, S)];
+write_tail(Other, D, E, S) ->
+ [S,write1(Other, D-1, E)].
write_port(Port) ->
erlang:port_to_list(Port).
@@ -316,17 +330,17 @@ write_port(Port) ->
write_ref(Ref) ->
erlang:ref_to_list(Ref).
-write_map(Map, D) when is_integer(D) ->
- [$#,${,write_map_body(maps:to_list(Map), D),$}].
+write_map(Map, D, E) when is_integer(D) ->
+ [$#,${,write_map_body(maps:to_list(Map), D, E),$}].
-write_map_body(_, 0) -> "...";
-write_map_body([],_) -> [];
-write_map_body([{K,V}],D) -> write_map_assoc(K,V,D);
-write_map_body([{K,V}|KVs], D) ->
- [write_map_assoc(K,V,D),$, | write_map_body(KVs,D-1)].
+write_map_body(_, 0, _E) -> "...";
+write_map_body([], _, _E) -> [];
+write_map_body([{K,V}], D, E) -> write_map_assoc(K, V, D, E);
+write_map_body([{K,V}|KVs], D, E) ->
+ [write_map_assoc(K, V, D, E),$, | write_map_body(KVs, D-1, E)].
-write_map_assoc(K,V,D) ->
- [write(K,D - 1),"=>",write(V,D-1)].
+write_map_assoc(K, V, D, E) ->
+ [write1(K, D - 1, E),"=>",write1(V, D-1, E)].
write_binary(B, D) when is_integer(D) ->
[$<,$<,write_binary_body(B, D),$>,$>].
@@ -344,6 +358,13 @@ write_binary_body(B, _D) ->
<<X:L>> = B,
[integer_to_list(X),$:,integer_to_list(L)].
+get_option(Key, TupleList, Default) ->
+ case lists:keyfind(Key, 1, TupleList) of
+ false -> Default;
+ {Key, Value} -> Value;
+ _ -> Default
+ end.
+
%%% There are two functions to write Unicode atoms:
%%% - they both escape control characters < 160;
%%% - write_atom() never escapes characters >= 160;
diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl
index 3113767614..14d925bacf 100644
--- a/lib/stdlib/src/io_lib_format.erl
+++ b/lib/stdlib/src/io_lib_format.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2016. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2017. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
@@ -257,12 +257,12 @@ indentation([], I) -> I.
%% This is the main dispatch function for the various formatting commands.
%% Field widths and precisions have already been calculated.
-control($w, [A], F, Adj, P, Pad, _Enc, _Str, _I) ->
- term(io_lib:write(A, -1), F, Adj, P, Pad);
+control($w, [A], F, Adj, P, Pad, Enc, _Str, _I) ->
+ term(io_lib:write(A, [{depth,-1}, {encoding, Enc}]), F, Adj, P, Pad);
control($p, [A], F, Adj, P, Pad, Enc, Str, I) ->
print(A, -1, F, Adj, P, Pad, Enc, Str, I);
-control($W, [A,Depth], F, Adj, P, Pad, _Enc, _Str, _I) when is_integer(Depth) ->
- term(io_lib:write(A, Depth), F, Adj, P, Pad);
+control($W, [A,Depth], F, Adj, P, Pad, Enc, _Str, _I) when is_integer(Depth) ->
+ term(io_lib:write(A, [{depth,Depth}, {encoding, Enc}]), F, Adj, P, Pad);
control($P, [A,Depth], F, Adj, P, Pad, Enc, Str, I) when is_integer(Depth) ->
print(A, Depth, F, Adj, P, Pad, Enc, Str, I);
control($s, [A], F, Adj, P, Pad, latin1, _Str, _I) when is_atom(A) ->