aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src
diff options
context:
space:
mode:
authorPatrik Nyblom <[email protected]>2013-02-22 12:06:41 +0100
committerPatrik Nyblom <[email protected]>2013-02-22 12:06:41 +0100
commit7215c49aff685e93765598cd428baf1d4320f752 (patch)
tree76409af3ed6abe7502af3aa438e47b1be98df1e3 /lib/stdlib/src
parent14820e983856654e68e08244e4dfc689f0804fd8 (diff)
parent2a79b74ac371387ce338bacf979f9ca32447b302 (diff)
downloadotp-7215c49aff685e93765598cd428baf1d4320f752.tar.gz
otp-7215c49aff685e93765598cd428baf1d4320f752.tar.bz2
otp-7215c49aff685e93765598cd428baf1d4320f752.zip
Merge branch 'pan/unicode_printable_ranges'
* pan/unicode_printable_ranges: Adapt stdlib tests to ~tp detecting latin1 binaries Update primary bootstrap Make wx debugger use +pc flag when applicable Correct misspelled comments and space at lin ends Make ~tp output latin1 binaries as strings if possible Leave the +pc handling to io and io_lib_pretty Remove newly introduced warning in erlexec.c Make shell_SUITE:otp_10302 use +pc unicode when needed Fix io_proto_SUITE to handle the new io_lib_pretty:print Add testcase for +pc and io:printable_range/0 Make printing of UTF-8 in binaries behave like lists. Document +pc flag and io:printable_range/0 Add usage of and spec for io:printable_range/0 Add +pc {latin1|unicode} switch and io:printable_range/0 Fix some Unicode issues OTP-18084
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r--lib/stdlib/src/erl_pp.erl18
-rw-r--r--lib/stdlib/src/io.erl7
-rw-r--r--lib/stdlib/src/io_lib.erl48
-rw-r--r--lib/stdlib/src/io_lib_pretty.erl55
-rw-r--r--lib/stdlib/src/shell.erl27
5 files changed, 92 insertions, 63 deletions
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl
index 06dae51cc9..7c7566e4ec 100644
--- a/lib/stdlib/src/erl_pp.erl
+++ b/lib/stdlib/src/erl_pp.erl
@@ -42,7 +42,7 @@
| {encoding, latin1 | unicode | utf8}).
-type(options() :: hook_function() | [option()]).
--record(pp, {string_fun, char_fun, term_fun}).
+-record(pp, {string_fun, char_fun}).
-record(options, {hook, encoding, opts}).
@@ -182,13 +182,11 @@ state(_Hook) ->
state() ->
#pp{string_fun = fun io_lib:write_string_as_latin1/1,
- char_fun = fun io_lib:write_char_as_latin1/1,
- term_fun = fun(T) -> io_lib:format("~p", [T]) end}.
+ char_fun = fun io_lib:write_char_as_latin1/1}.
unicode_state() ->
#pp{string_fun = fun io_lib:write_string/1,
- char_fun = fun io_lib:write_char/1,
- term_fun = fun(T) -> io_lib:format("~tp", [T]) end}.
+ char_fun = fun io_lib:write_char/1}.
encoding(Options) ->
case proplists:get_value(encoding, Options, epp:default_encoding()) of
@@ -204,10 +202,10 @@ lform({function,Line,Name,Arity,Clauses}, Opts, _State) ->
lform({rule,Line,Name,Arity,Clauses}, Opts, _State) ->
lrule({rule,Line,Name,Arity,Clauses}, Opts);
%% These are specials to make it easier for the compiler.
-lform({error,E}, _Opts, State) ->
- leaf((State#pp.term_fun)({error,E})++"\n");
-lform({warning,W}, _Opts, State) ->
- leaf((State#pp.term_fun)({warning,W})++"\n");
+lform({error,E}, _Opts, _State) ->
+ leaf(format("~p\n", [{error,E}]));
+lform({warning,W}, _Opts, _State) ->
+ leaf(format("~p\n", [{warning,W}]));
lform({eof,_Line}, _Opts, _State) ->
$\n.
@@ -233,7 +231,7 @@ lattribute(import, Name, _Opts, _State) when is_list(Name) ->
lattribute(import, {From,Falist}, _Opts, _State) ->
attr("import",[{var,0,pname(From)},falist(Falist)]);
lattribute(file, {Name,Line}, _Opts, State) ->
- attr("file", [{var,0,(State#pp.term_fun)(Name)},{integer,0,Line}]);
+ attr("file", [{var,0,(State#pp.string_fun)(Name)},{integer,0,Line}]);
lattribute(record, {Name,Is}, Opts, _State) ->
Nl = leaf(format("-record(~w,", [Name])),
[{first,Nl,record_fields(Is, Opts)},$)];
diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl
index 3dddb0d6e7..c92e9e3ade 100644
--- a/lib/stdlib/src/io.erl
+++ b/lib/stdlib/src/io.erl
@@ -32,6 +32,8 @@
parse_erl_exprs/4,parse_erl_form/1,parse_erl_form/2,
parse_erl_form/3,parse_erl_form/4]).
-export([request/1,request/2,requests/1,requests/2]).
+%% Implemented in native code
+-export([printable_range/0]).
-export_type([device/0, format/0, server_no_data/0]).
@@ -66,6 +68,11 @@ o_request(Io, Request, Func) ->
Other
end.
+%% Request what the user considers printable characters
+-spec printable_range() -> 'unicode' | 'latin1'.
+printable_range() ->
+ erlang:nif_error(undefined).
+
%% Put chars takes mixed *unicode* list from R13 onwards.
-spec put_chars(CharData) -> 'ok' when
CharData :: unicode:chardata().
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index b7ec848e1e..a9b6d4131e 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -72,7 +72,7 @@
-export([quote_atom/2, char_list/1, latin1_char_list/1,
deep_char_list/1, deep_latin1_char_list/1,
- printable_list/1, printable_latin1_list/1]).
+ printable_list/1, printable_latin1_list/1, printable_unicode_list/1]).
%% Utilities for collecting characters.
-export([collect_chars/3, collect_chars/4,
@@ -533,27 +533,45 @@ printable_latin1_list(_) -> false. %Everything else is false
%% Return true if CharList is a list of printable characters, else
%% false. The notion of printable in Unicode terms is somewhat floating.
%% Everything that is not a control character and not invalid unicode
-%% will be considered printable.
+%% will be considered printable.
+%% What the user has noted as printable characters is what actually
+%% specifies when this function will return true. If the VM is started
+%% with +pc latin1, only the latin1 range will be deemed as printable
+%% if on the other hand +pc unicode is given, all characters in the Unicode
+%% character set are deemed printable. latin1 is default.
-spec printable_list(Term) -> boolean() when
Term :: term().
-printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 ->
- printable_list(Cs);
-printable_list([C|Cs])
+printable_list(L) ->
+ %% There will be more alternatives returns from io:printable range
+ %% in the future. To not have a catch-all clause is deliberate.
+ case io:printable_range() of
+ latin1 ->
+ printable_latin1_list(L);
+ unicode ->
+ printable_unicode_list(L)
+ end.
+
+-spec printable_unicode_list(Term) -> boolean() when
+ Term :: term().
+
+printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 ->
+ printable_unicode_list(Cs);
+printable_unicode_list([C|Cs])
when is_integer(C), C >= 16#A0, C < 16#D800;
is_integer(C), C > 16#DFFF, C < 16#FFFE;
is_integer(C), C > 16#FFFF, C =< 16#10FFFF ->
- printable_list(Cs);
-printable_list([$\n|Cs]) -> printable_list(Cs);
-printable_list([$\r|Cs]) -> printable_list(Cs);
-printable_list([$\t|Cs]) -> printable_list(Cs);
-printable_list([$\v|Cs]) -> printable_list(Cs);
-printable_list([$\b|Cs]) -> printable_list(Cs);
-printable_list([$\f|Cs]) -> printable_list(Cs);
-printable_list([$\e|Cs]) -> printable_list(Cs);
-printable_list([]) -> true;
-printable_list(_) -> false. %Everything else is false
+ printable_unicode_list(Cs);
+printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs);
+printable_unicode_list([]) -> true;
+printable_unicode_list(_) -> false. %Everything else is false
%% List = nl()
%% Return a list of characters to generate a newline.
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index 525b534249..7637ad7a3d 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -485,13 +485,18 @@ printable_bin(Bin, Len, D, latin1) ->
false
end;
printable_bin(Bin, Len, D, _Uni) ->
- case printable_unicode(Bin, Len, []) of
- {_, <<>>, L} ->
- {byte_size(Bin) =:= length(L), L};
- {NC, Bin1, L} when D > 0, Len - NC >= D ->
- {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L};
- {_NC, _Bin, _L} ->
- false
+ case valid_utf8(Bin,Len) of
+ true ->
+ case printable_unicode(Bin, Len, [], io:printable_range()) of
+ {_, <<>>, L} ->
+ {byte_size(Bin) =:= length(L), L};
+ {NC, Bin1, L} when D > 0, Len - NC >= D ->
+ {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L};
+ {_NC, _Bin, _L} ->
+ false
+ end;
+ false ->
+ printable_bin(Bin, Len, D, latin1)
end.
printable_bin1(_Bin, _Start, 0) ->
@@ -522,24 +527,36 @@ printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1);
printable_latin1_list([], _) -> all;
printable_latin1_list(_, N) -> N.
-printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 ->
- case printable_char(C) of
+valid_utf8(<<>>,_) ->
+ true;
+valid_utf8(_,0) ->
+ true;
+valid_utf8(<<_/utf8, R/binary>>,N) ->
+ valid_utf8(R,N-1);
+valid_utf8(_,_) ->
+ false.
+
+printable_unicode(<<C/utf8, R/binary>>=Bin, I, L, Range) when I > 0 ->
+ case printable_char(C,Range) of
true ->
- printable_unicode(R, I - 1, [C | L]);
+ printable_unicode(R, I - 1, [C | L],Range);
false ->
{I, Bin, lists:reverse(L)}
end;
-printable_unicode(Bin, I, L) ->
+printable_unicode(Bin, I, L,_) ->
{I, Bin, lists:reverse(L)}.
-printable_char($\n) -> true;
-printable_char($\r) -> true;
-printable_char($\t) -> true;
-printable_char($\v) -> true;
-printable_char($\b) -> true;
-printable_char($\f) -> true;
-printable_char($\e) -> true;
-printable_char(C) ->
+printable_char($\n,_) -> true;
+printable_char($\r,_) -> true;
+printable_char($\t,_) -> true;
+printable_char($\v,_) -> true;
+printable_char($\b,_) -> true;
+printable_char($\f,_) -> true;
+printable_char($\e,_) -> true;
+printable_char(C,latin1) ->
+ C >= $\s andalso C =< $~ orelse
+ C >= 16#A0 andalso C =< 16#FF;
+printable_char(C,unicode) ->
C >= $\s andalso C =< $~ orelse
C >= 16#A0 andalso C < 16#D800 orelse
C > 16#DFFF andalso C < 16#FFFE orelse
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index df66acb97b..96f3e5dd32 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -129,7 +129,7 @@ start_restricted(RShMod) when is_atom(RShMod) ->
error_logger:error_report(
lists:flatten(
io_lib:fwrite(
- "Restricted shell module ~w not found: ~"++cs_p() ++"\n",
+ "Restricted shell module ~w not found: ~tp\n",
[RShMod,What]))),
Error
end.
@@ -214,8 +214,7 @@ server(StartSync) ->
ok;
{RShMod2,What2} ->
io:fwrite(
- ("Warning! Restricted shell module ~w not found: ~"
- ++cs_p()++".\n"
+ ("Warning! Restricted shell module ~w not found: ~tp.\n"
"Only the commands q() and init:stop() will be allowed!\n"),
[RShMod2,What2]),
application:set_env(stdlib, restricted_shell, ?MODULE)
@@ -337,7 +336,7 @@ get_prompt_func() ->
end.
bad_prompt_func(M) ->
- fwrite_severity(benign, "Bad prompt function: ~"++cs_p(), [M]).
+ fwrite_severity(benign, "Bad prompt function: ~tp", [M]).
default_prompt(N) ->
%% Don't bother flattening the list irrespective of what the
@@ -1380,27 +1379,18 @@ pp(V, I, RT, Enc) ->
{record_print_fun, record_print_fun(RT)}]
++ Enc)).
-%% Control sequence 'p' possibly with Unicode translation modifier
-cs_p() ->
- case encoding() of
- latin1 -> "p";
- unicode -> "tp"
- end.
-
columns() ->
case io:columns() of
{ok,N} -> N;
_ -> 80
end.
-
encoding() ->
[{encoding, Encoding}] = enc(),
Encoding.
-
enc() ->
case lists:keyfind(encoding, 1, io:getopts()) of
- false -> [{encoding,latin1}]; % should never happen
- Enc -> [Enc]
+ false -> [{encoding,latin1}]; % should never happen
+ Enc -> [Enc]
end.
garb(Shell) ->
@@ -1424,10 +1414,9 @@ check_env(V) ->
{ok, Val} when is_integer(Val), Val >= 0 ->
ok;
{ok, Val} ->
- Txt = io_lib:fwrite(
- ("Invalid value of STDLIB configuration parameter ~w: ~"
- ++cs_p()++"\n"),
- [V, Val]),
+ Txt = io_lib:fwrite
+ ("Invalid value of STDLIB configuration parameter"
+ "~w: ~tp\n", [V, Val]),
error_logger:info_report(lists:flatten(Txt))
end.