diff options
author | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
commit | 7215c49aff685e93765598cd428baf1d4320f752 (patch) | |
tree | 76409af3ed6abe7502af3aa438e47b1be98df1e3 /lib/stdlib/src | |
parent | 14820e983856654e68e08244e4dfc689f0804fd8 (diff) | |
parent | 2a79b74ac371387ce338bacf979f9ca32447b302 (diff) | |
download | otp-7215c49aff685e93765598cd428baf1d4320f752.tar.gz otp-7215c49aff685e93765598cd428baf1d4320f752.tar.bz2 otp-7215c49aff685e93765598cd428baf1d4320f752.zip |
Merge branch 'pan/unicode_printable_ranges'
* pan/unicode_printable_ranges:
Adapt stdlib tests to ~tp detecting latin1 binaries
Update primary bootstrap
Make wx debugger use +pc flag when applicable
Correct misspelled comments and space at lin ends
Make ~tp output latin1 binaries as strings if possible
Leave the +pc handling to io and io_lib_pretty
Remove newly introduced warning in erlexec.c
Make shell_SUITE:otp_10302 use +pc unicode when needed
Fix io_proto_SUITE to handle the new io_lib_pretty:print
Add testcase for +pc and io:printable_range/0
Make printing of UTF-8 in binaries behave like lists.
Document +pc flag and io:printable_range/0
Add usage of and spec for io:printable_range/0
Add +pc {latin1|unicode} switch and io:printable_range/0
Fix some Unicode issues
OTP-18084
Diffstat (limited to 'lib/stdlib/src')
-rw-r--r-- | lib/stdlib/src/erl_pp.erl | 18 | ||||
-rw-r--r-- | lib/stdlib/src/io.erl | 7 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib.erl | 48 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 55 | ||||
-rw-r--r-- | lib/stdlib/src/shell.erl | 27 |
5 files changed, 92 insertions, 63 deletions
diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 06dae51cc9..7c7566e4ec 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -42,7 +42,7 @@ | {encoding, latin1 | unicode | utf8}). -type(options() :: hook_function() | [option()]). --record(pp, {string_fun, char_fun, term_fun}). +-record(pp, {string_fun, char_fun}). -record(options, {hook, encoding, opts}). @@ -182,13 +182,11 @@ state(_Hook) -> state() -> #pp{string_fun = fun io_lib:write_string_as_latin1/1, - char_fun = fun io_lib:write_char_as_latin1/1, - term_fun = fun(T) -> io_lib:format("~p", [T]) end}. + char_fun = fun io_lib:write_char_as_latin1/1}. unicode_state() -> #pp{string_fun = fun io_lib:write_string/1, - char_fun = fun io_lib:write_char/1, - term_fun = fun(T) -> io_lib:format("~tp", [T]) end}. + char_fun = fun io_lib:write_char/1}. encoding(Options) -> case proplists:get_value(encoding, Options, epp:default_encoding()) of @@ -204,10 +202,10 @@ lform({function,Line,Name,Arity,Clauses}, Opts, _State) -> lform({rule,Line,Name,Arity,Clauses}, Opts, _State) -> lrule({rule,Line,Name,Arity,Clauses}, Opts); %% These are specials to make it easier for the compiler. -lform({error,E}, _Opts, State) -> - leaf((State#pp.term_fun)({error,E})++"\n"); -lform({warning,W}, _Opts, State) -> - leaf((State#pp.term_fun)({warning,W})++"\n"); +lform({error,E}, _Opts, _State) -> + leaf(format("~p\n", [{error,E}])); +lform({warning,W}, _Opts, _State) -> + leaf(format("~p\n", [{warning,W}])); lform({eof,_Line}, _Opts, _State) -> $\n. @@ -233,7 +231,7 @@ lattribute(import, Name, _Opts, _State) when is_list(Name) -> lattribute(import, {From,Falist}, _Opts, _State) -> attr("import",[{var,0,pname(From)},falist(Falist)]); lattribute(file, {Name,Line}, _Opts, State) -> - attr("file", [{var,0,(State#pp.term_fun)(Name)},{integer,0,Line}]); + attr("file", [{var,0,(State#pp.string_fun)(Name)},{integer,0,Line}]); lattribute(record, {Name,Is}, Opts, _State) -> Nl = leaf(format("-record(~w,", [Name])), [{first,Nl,record_fields(Is, Opts)},$)]; diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 3dddb0d6e7..c92e9e3ade 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -32,6 +32,8 @@ parse_erl_exprs/4,parse_erl_form/1,parse_erl_form/2, parse_erl_form/3,parse_erl_form/4]). -export([request/1,request/2,requests/1,requests/2]). +%% Implemented in native code +-export([printable_range/0]). -export_type([device/0, format/0, server_no_data/0]). @@ -66,6 +68,11 @@ o_request(Io, Request, Func) -> Other end. +%% Request what the user considers printable characters +-spec printable_range() -> 'unicode' | 'latin1'. +printable_range() -> + erlang:nif_error(undefined). + %% Put chars takes mixed *unicode* list from R13 onwards. -spec put_chars(CharData) -> 'ok' when CharData :: unicode:chardata(). diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index b7ec848e1e..a9b6d4131e 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -72,7 +72,7 @@ -export([quote_atom/2, char_list/1, latin1_char_list/1, deep_char_list/1, deep_latin1_char_list/1, - printable_list/1, printable_latin1_list/1]). + printable_list/1, printable_latin1_list/1, printable_unicode_list/1]). %% Utilities for collecting characters. -export([collect_chars/3, collect_chars/4, @@ -533,27 +533,45 @@ printable_latin1_list(_) -> false. %Everything else is false %% Return true if CharList is a list of printable characters, else %% false. The notion of printable in Unicode terms is somewhat floating. %% Everything that is not a control character and not invalid unicode -%% will be considered printable. +%% will be considered printable. +%% What the user has noted as printable characters is what actually +%% specifies when this function will return true. If the VM is started +%% with +pc latin1, only the latin1 range will be deemed as printable +%% if on the other hand +pc unicode is given, all characters in the Unicode +%% character set are deemed printable. latin1 is default. -spec printable_list(Term) -> boolean() when Term :: term(). -printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> - printable_list(Cs); -printable_list([C|Cs]) +printable_list(L) -> + %% There will be more alternatives returns from io:printable range + %% in the future. To not have a catch-all clause is deliberate. + case io:printable_range() of + latin1 -> + printable_latin1_list(L); + unicode -> + printable_unicode_list(L) + end. + +-spec printable_unicode_list(Term) -> boolean() when + Term :: term(). + +printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_unicode_list(Cs); +printable_unicode_list([C|Cs]) when is_integer(C), C >= 16#A0, C < 16#D800; is_integer(C), C > 16#DFFF, C < 16#FFFE; is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - printable_list(Cs); -printable_list([$\n|Cs]) -> printable_list(Cs); -printable_list([$\r|Cs]) -> printable_list(Cs); -printable_list([$\t|Cs]) -> printable_list(Cs); -printable_list([$\v|Cs]) -> printable_list(Cs); -printable_list([$\b|Cs]) -> printable_list(Cs); -printable_list([$\f|Cs]) -> printable_list(Cs); -printable_list([$\e|Cs]) -> printable_list(Cs); -printable_list([]) -> true; -printable_list(_) -> false. %Everything else is false + printable_unicode_list(Cs); +printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([]) -> true; +printable_unicode_list(_) -> false. %Everything else is false %% List = nl() %% Return a list of characters to generate a newline. diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 525b534249..7637ad7a3d 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -485,13 +485,18 @@ printable_bin(Bin, Len, D, latin1) -> false end; printable_bin(Bin, Len, D, _Uni) -> - case printable_unicode(Bin, Len, []) of - {_, <<>>, L} -> - {byte_size(Bin) =:= length(L), L}; - {NC, Bin1, L} when D > 0, Len - NC >= D -> - {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; - {_NC, _Bin, _L} -> - false + case valid_utf8(Bin,Len) of + true -> + case printable_unicode(Bin, Len, [], io:printable_range()) of + {_, <<>>, L} -> + {byte_size(Bin) =:= length(L), L}; + {NC, Bin1, L} when D > 0, Len - NC >= D -> + {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; + {_NC, _Bin, _L} -> + false + end; + false -> + printable_bin(Bin, Len, D, latin1) end. printable_bin1(_Bin, _Start, 0) -> @@ -522,24 +527,36 @@ printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); printable_latin1_list([], _) -> all; printable_latin1_list(_, N) -> N. -printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 -> - case printable_char(C) of +valid_utf8(<<>>,_) -> + true; +valid_utf8(_,0) -> + true; +valid_utf8(<<_/utf8, R/binary>>,N) -> + valid_utf8(R,N-1); +valid_utf8(_,_) -> + false. + +printable_unicode(<<C/utf8, R/binary>>=Bin, I, L, Range) when I > 0 -> + case printable_char(C,Range) of true -> - printable_unicode(R, I - 1, [C | L]); + printable_unicode(R, I - 1, [C | L],Range); false -> {I, Bin, lists:reverse(L)} end; -printable_unicode(Bin, I, L) -> +printable_unicode(Bin, I, L,_) -> {I, Bin, lists:reverse(L)}. -printable_char($\n) -> true; -printable_char($\r) -> true; -printable_char($\t) -> true; -printable_char($\v) -> true; -printable_char($\b) -> true; -printable_char($\f) -> true; -printable_char($\e) -> true; -printable_char(C) -> +printable_char($\n,_) -> true; +printable_char($\r,_) -> true; +printable_char($\t,_) -> true; +printable_char($\v,_) -> true; +printable_char($\b,_) -> true; +printable_char($\f,_) -> true; +printable_char($\e,_) -> true; +printable_char(C,latin1) -> + C >= $\s andalso C =< $~ orelse + C >= 16#A0 andalso C =< 16#FF; +printable_char(C,unicode) -> C >= $\s andalso C =< $~ orelse C >= 16#A0 andalso C < 16#D800 orelse C > 16#DFFF andalso C < 16#FFFE orelse diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index df66acb97b..96f3e5dd32 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -129,7 +129,7 @@ start_restricted(RShMod) when is_atom(RShMod) -> error_logger:error_report( lists:flatten( io_lib:fwrite( - "Restricted shell module ~w not found: ~"++cs_p() ++"\n", + "Restricted shell module ~w not found: ~tp\n", [RShMod,What]))), Error end. @@ -214,8 +214,7 @@ server(StartSync) -> ok; {RShMod2,What2} -> io:fwrite( - ("Warning! Restricted shell module ~w not found: ~" - ++cs_p()++".\n" + ("Warning! Restricted shell module ~w not found: ~tp.\n" "Only the commands q() and init:stop() will be allowed!\n"), [RShMod2,What2]), application:set_env(stdlib, restricted_shell, ?MODULE) @@ -337,7 +336,7 @@ get_prompt_func() -> end. bad_prompt_func(M) -> - fwrite_severity(benign, "Bad prompt function: ~"++cs_p(), [M]). + fwrite_severity(benign, "Bad prompt function: ~tp", [M]). default_prompt(N) -> %% Don't bother flattening the list irrespective of what the @@ -1380,27 +1379,18 @@ pp(V, I, RT, Enc) -> {record_print_fun, record_print_fun(RT)}] ++ Enc)). -%% Control sequence 'p' possibly with Unicode translation modifier -cs_p() -> - case encoding() of - latin1 -> "p"; - unicode -> "tp" - end. - columns() -> case io:columns() of {ok,N} -> N; _ -> 80 end. - encoding() -> [{encoding, Encoding}] = enc(), Encoding. - enc() -> case lists:keyfind(encoding, 1, io:getopts()) of - false -> [{encoding,latin1}]; % should never happen - Enc -> [Enc] + false -> [{encoding,latin1}]; % should never happen + Enc -> [Enc] end. garb(Shell) -> @@ -1424,10 +1414,9 @@ check_env(V) -> {ok, Val} when is_integer(Val), Val >= 0 -> ok; {ok, Val} -> - Txt = io_lib:fwrite( - ("Invalid value of STDLIB configuration parameter ~w: ~" - ++cs_p()++"\n"), - [V, Val]), + Txt = io_lib:fwrite + ("Invalid value of STDLIB configuration parameter" + "~w: ~tp\n", [V, Val]), error_logger:info_report(lists:flatten(Txt)) end. |