From 998a64b2237086ebc776de4c72b0df8733e1c4ef Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Sat, 16 Feb 2013 19:04:29 +0100 Subject: Fix some Unicode issues Also let the Erlang shell use the new function io:printable_range(). --- lib/stdlib/src/erl_pp.erl | 18 ++++++++---------- lib/stdlib/src/shell.erl | 32 ++++++++------------------------ 2 files changed, 16 insertions(+), 34 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 06dae51cc9..7c7566e4ec 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -42,7 +42,7 @@ | {encoding, latin1 | unicode | utf8}). -type(options() :: hook_function() | [option()]). --record(pp, {string_fun, char_fun, term_fun}). +-record(pp, {string_fun, char_fun}). -record(options, {hook, encoding, opts}). @@ -182,13 +182,11 @@ state(_Hook) -> state() -> #pp{string_fun = fun io_lib:write_string_as_latin1/1, - char_fun = fun io_lib:write_char_as_latin1/1, - term_fun = fun(T) -> io_lib:format("~p", [T]) end}. + char_fun = fun io_lib:write_char_as_latin1/1}. unicode_state() -> #pp{string_fun = fun io_lib:write_string/1, - char_fun = fun io_lib:write_char/1, - term_fun = fun(T) -> io_lib:format("~tp", [T]) end}. + char_fun = fun io_lib:write_char/1}. encoding(Options) -> case proplists:get_value(encoding, Options, epp:default_encoding()) of @@ -204,10 +202,10 @@ lform({function,Line,Name,Arity,Clauses}, Opts, _State) -> lform({rule,Line,Name,Arity,Clauses}, Opts, _State) -> lrule({rule,Line,Name,Arity,Clauses}, Opts); %% These are specials to make it easier for the compiler. -lform({error,E}, _Opts, State) -> - leaf((State#pp.term_fun)({error,E})++"\n"); -lform({warning,W}, _Opts, State) -> - leaf((State#pp.term_fun)({warning,W})++"\n"); +lform({error,E}, _Opts, _State) -> + leaf(format("~p\n", [{error,E}])); +lform({warning,W}, _Opts, _State) -> + leaf(format("~p\n", [{warning,W}])); lform({eof,_Line}, _Opts, _State) -> $\n. @@ -233,7 +231,7 @@ lattribute(import, Name, _Opts, _State) when is_list(Name) -> lattribute(import, {From,Falist}, _Opts, _State) -> attr("import",[{var,0,pname(From)},falist(Falist)]); lattribute(file, {Name,Line}, _Opts, State) -> - attr("file", [{var,0,(State#pp.term_fun)(Name)},{integer,0,Line}]); + attr("file", [{var,0,(State#pp.string_fun)(Name)},{integer,0,Line}]); lattribute(record, {Name,Is}, Opts, _State) -> Nl = leaf(format("-record(~w,", [Name])), [{first,Nl,record_fields(Is, Opts)},$)]; diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index df66acb97b..c90b5ad5c8 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -129,7 +129,7 @@ start_restricted(RShMod) when is_atom(RShMod) -> error_logger:error_report( lists:flatten( io_lib:fwrite( - "Restricted shell module ~w not found: ~"++cs_p() ++"\n", + "Restricted shell module ~w not found: ~tp\n", [RShMod,What]))), Error end. @@ -214,8 +214,7 @@ server(StartSync) -> ok; {RShMod2,What2} -> io:fwrite( - ("Warning! Restricted shell module ~w not found: ~" - ++cs_p()++".\n" + ("Warning! Restricted shell module ~w not found: ~tp.\n" "Only the commands q() and init:stop() will be allowed!\n"), [RShMod2,What2]), application:set_env(stdlib, restricted_shell, ?MODULE) @@ -337,7 +336,7 @@ get_prompt_func() -> end. bad_prompt_func(M) -> - fwrite_severity(benign, "Bad prompt function: ~"++cs_p(), [M]). + fwrite_severity(benign, "Bad prompt function: ~tp", [M]). default_prompt(N) -> %% Don't bother flattening the list irrespective of what the @@ -571,7 +570,7 @@ report_exception(Class, Severity, {Reason,Stacktrace}, RT) -> I = iolist_size(Tag) + 1, PF = fun(Term, I1) -> pp(Term, I1, RT) end, SF = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, - Enc = encoding(), + Enc = io:printable_range(), Str = lib:format_exception(I, Class, Reason, Stacktrace, SF, PF, Enc), io:requests([{put_chars, latin1, Tag}, {put_chars, unicode, Str}, @@ -1380,28 +1379,14 @@ pp(V, I, RT, Enc) -> {record_print_fun, record_print_fun(RT)}] ++ Enc)). -%% Control sequence 'p' possibly with Unicode translation modifier -cs_p() -> - case encoding() of - latin1 -> "p"; - unicode -> "tp" - end. - columns() -> case io:columns() of {ok,N} -> N; _ -> 80 end. -encoding() -> - [{encoding, Encoding}] = enc(), - Encoding. - enc() -> - case lists:keyfind(encoding, 1, io:getopts()) of - false -> [{encoding,latin1}]; % should never happen - Enc -> [Enc] - end. + [{encoding, io:printable_range()}]. garb(Shell) -> erlang:garbage_collect(Shell), @@ -1424,10 +1409,9 @@ check_env(V) -> {ok, Val} when is_integer(Val), Val >= 0 -> ok; {ok, Val} -> - Txt = io_lib:fwrite( - ("Invalid value of STDLIB configuration parameter ~w: ~" - ++cs_p()++"\n"), - [V, Val]), + Txt = io_lib:fwrite + ("Invalid value of STDLIB configuration parameter" + "~w: ~tp\n", [V, Val]), error_logger:info_report(lists:flatten(Txt)) end. -- cgit v1.2.3 From 75238dc582f5dcf69dfc00b7719dca7d50ca9a68 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Wed, 13 Feb 2013 14:32:47 +0100 Subject: Add usage of and spec for io:printable_range/0 This makes both io:format("~tp", ...) and the shell honor the +pc setting and also reverts the shell to the old behaviour of only interpreting latin1 strings as "strings" by default. +pc unicode turns on the R16A behaviour. Doing io:format("~tp~n", [[1024,1025]]). will show you the difference when starting with +pc unicode. --- lib/stdlib/src/io.erl | 7 +++++++ lib/stdlib/src/io_lib.erl | 48 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 40 insertions(+), 15 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 3dddb0d6e7..c92e9e3ade 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -32,6 +32,8 @@ parse_erl_exprs/4,parse_erl_form/1,parse_erl_form/2, parse_erl_form/3,parse_erl_form/4]). -export([request/1,request/2,requests/1,requests/2]). +%% Implemented in native code +-export([printable_range/0]). -export_type([device/0, format/0, server_no_data/0]). @@ -66,6 +68,11 @@ o_request(Io, Request, Func) -> Other end. +%% Request what the user considers printable characters +-spec printable_range() -> 'unicode' | 'latin1'. +printable_range() -> + erlang:nif_error(undefined). + %% Put chars takes mixed *unicode* list from R13 onwards. -spec put_chars(CharData) -> 'ok' when CharData :: unicode:chardata(). diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index b7ec848e1e..ff389cbe22 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -72,7 +72,7 @@ -export([quote_atom/2, char_list/1, latin1_char_list/1, deep_char_list/1, deep_latin1_char_list/1, - printable_list/1, printable_latin1_list/1]). + printable_list/1, printable_latin1_list/1, printable_unicode_list/1]). %% Utilities for collecting characters. -export([collect_chars/3, collect_chars/4, @@ -533,27 +533,45 @@ printable_latin1_list(_) -> false. %Everything else is false %% Return true if CharList is a list of printable characters, else %% false. The notion of printable in Unicode terms is somewhat floating. %% Everything that is not a control character and not invalid unicode -%% will be considered printable. +%% will be considered printable. +%% What the user has noted as printable characters is what actually +%% specifies when this function will return true. If the VM is started +%% with +pc latin1, only the latin1 range will be deemed as printable +%% if on the other hand +pc unicode is given, all characters in the Unicode +%% character set are deemed printable. latin1 is default. -spec printable_list(Term) -> boolean() when Term :: term(). -printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> - printable_list(Cs); -printable_list([C|Cs]) +printable_list(L) -> + %% There will be more alternatives returns from io:printable range + %% in the future. To not have a catch-all caluse is deliberate. + case io:printable_range() of + latin1 -> + printable_latin1_list(L); + unicode -> + printable_unicode_list(L) + end. + +-spec printable_unicode_list(Term) -> boolean() when + Term :: term(). + +printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_unicode_list(Cs); +printable_unicode_list([C|Cs]) when is_integer(C), C >= 16#A0, C < 16#D800; is_integer(C), C > 16#DFFF, C < 16#FFFE; is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - printable_list(Cs); -printable_list([$\n|Cs]) -> printable_list(Cs); -printable_list([$\r|Cs]) -> printable_list(Cs); -printable_list([$\t|Cs]) -> printable_list(Cs); -printable_list([$\v|Cs]) -> printable_list(Cs); -printable_list([$\b|Cs]) -> printable_list(Cs); -printable_list([$\f|Cs]) -> printable_list(Cs); -printable_list([$\e|Cs]) -> printable_list(Cs); -printable_list([]) -> true; -printable_list(_) -> false. %Everything else is false + printable_unicode_list(Cs); +printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs); +printable_unicode_list([]) -> true; +printable_unicode_list(_) -> false. %Everything else is false %% List = nl() %% Return a list of characters to generate a newline. -- cgit v1.2.3 From fe227eac8af30e646ade187eaf194bdd9c92b2c2 Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Thu, 14 Feb 2013 15:33:00 +0100 Subject: Make printing of UTF-8 in binaries behave like lists. This means that only UTF-8 characters in binaries that conform to the printable_range are printed as <<"..."/utf8>> in io_lib_pretty. --- lib/stdlib/src/io_lib_pretty.erl | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 525b534249..4a61b033b0 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -485,7 +485,7 @@ printable_bin(Bin, Len, D, latin1) -> false end; printable_bin(Bin, Len, D, _Uni) -> - case printable_unicode(Bin, Len, []) of + case printable_unicode(Bin, Len, [], io:printable_range()) of {_, <<>>, L} -> {byte_size(Bin) =:= length(L), L}; {NC, Bin1, L} when D > 0, Len - NC >= D -> @@ -522,24 +522,27 @@ printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); printable_latin1_list([], _) -> all; printable_latin1_list(_, N) -> N. -printable_unicode(<>=Bin, I, L) when I > 0 -> - case printable_char(C) of +printable_unicode(<>=Bin, I, L, Range) when I > 0 -> + case printable_char(C,Range) of true -> - printable_unicode(R, I - 1, [C | L]); + printable_unicode(R, I - 1, [C | L],Range); false -> {I, Bin, lists:reverse(L)} end; -printable_unicode(Bin, I, L) -> +printable_unicode(Bin, I, L,_) -> {I, Bin, lists:reverse(L)}. -printable_char($\n) -> true; -printable_char($\r) -> true; -printable_char($\t) -> true; -printable_char($\v) -> true; -printable_char($\b) -> true; -printable_char($\f) -> true; -printable_char($\e) -> true; -printable_char(C) -> +printable_char($\n,_) -> true; +printable_char($\r,_) -> true; +printable_char($\t,_) -> true; +printable_char($\v,_) -> true; +printable_char($\b,_) -> true; +printable_char($\f,_) -> true; +printable_char($\e,_) -> true; +printable_char(C,latin1) -> + C >= $\s andalso C =< $~ orelse + C >= 16#A0 andalso C =< 16#FF; +printable_char(C,unicode) -> C >= $\s andalso C =< $~ orelse C >= 16#A0 andalso C < 16#D800 orelse C > 16#DFFF andalso C < 16#FFFE orelse -- cgit v1.2.3 From 568cb052c7be7e36f96ddd18001529e371790b4b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 18 Feb 2013 15:12:37 +0100 Subject: Leave the +pc handling to io and io_lib_pretty This makes the shell output binaries and list as intended by the +pc setting. --- lib/stdlib/src/shell.erl | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index c90b5ad5c8..96f3e5dd32 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -570,7 +570,7 @@ report_exception(Class, Severity, {Reason,Stacktrace}, RT) -> I = iolist_size(Tag) + 1, PF = fun(Term, I1) -> pp(Term, I1, RT) end, SF = fun(M, _F, _A) -> (M =:= erl_eval) or (M =:= ?MODULE) end, - Enc = io:printable_range(), + Enc = encoding(), Str = lib:format_exception(I, Class, Reason, Stacktrace, SF, PF, Enc), io:requests([{put_chars, latin1, Tag}, {put_chars, unicode, Str}, @@ -1384,9 +1384,14 @@ columns() -> {ok,N} -> N; _ -> 80 end. - +encoding() -> + [{encoding, Encoding}] = enc(), + Encoding. enc() -> - [{encoding, io:printable_range()}]. + case lists:keyfind(encoding, 1, io:getopts()) of + false -> [{encoding,latin1}]; % should never happen + Enc -> [Enc] + end. garb(Shell) -> erlang:garbage_collect(Shell), -- cgit v1.2.3 From b6e1fa3e42011c7845becd8a000fd77940d6ed9b Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 18 Feb 2013 15:14:18 +0100 Subject: Make ~tp output latin1 binaries as strings if possible --- lib/stdlib/src/io_lib_pretty.erl | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 4a61b033b0..7637ad7a3d 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -485,13 +485,18 @@ printable_bin(Bin, Len, D, latin1) -> false end; printable_bin(Bin, Len, D, _Uni) -> - case printable_unicode(Bin, Len, [], io:printable_range()) of - {_, <<>>, L} -> - {byte_size(Bin) =:= length(L), L}; - {NC, Bin1, L} when D > 0, Len - NC >= D -> - {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; - {_NC, _Bin, _L} -> - false + case valid_utf8(Bin,Len) of + true -> + case printable_unicode(Bin, Len, [], io:printable_range()) of + {_, <<>>, L} -> + {byte_size(Bin) =:= length(L), L}; + {NC, Bin1, L} when D > 0, Len - NC >= D -> + {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; + {_NC, _Bin, _L} -> + false + end; + false -> + printable_bin(Bin, Len, D, latin1) end. printable_bin1(_Bin, _Start, 0) -> @@ -522,6 +527,15 @@ printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); printable_latin1_list([], _) -> all; printable_latin1_list(_, N) -> N. +valid_utf8(<<>>,_) -> + true; +valid_utf8(_,0) -> + true; +valid_utf8(<<_/utf8, R/binary>>,N) -> + valid_utf8(R,N-1); +valid_utf8(_,_) -> + false. + printable_unicode(<>=Bin, I, L, Range) when I > 0 -> case printable_char(C,Range) of true -> -- cgit v1.2.3 From 5fcb4b742820ab66e646f8561ed9f32b253bca9a Mon Sep 17 00:00:00 2001 From: Patrik Nyblom Date: Mon, 18 Feb 2013 16:42:51 +0100 Subject: Correct misspelled comments and space at lin ends --- lib/stdlib/src/io_lib.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/stdlib/src') diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index ff389cbe22..a9b6d4131e 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -545,7 +545,7 @@ printable_latin1_list(_) -> false. %Everything else is false printable_list(L) -> %% There will be more alternatives returns from io:printable range - %% in the future. To not have a catch-all caluse is deliberate. + %% in the future. To not have a catch-all clause is deliberate. case io:printable_range() of latin1 -> printable_latin1_list(L); -- cgit v1.2.3