From 771b21fa519a52a27134806e1d62440d8535f6b5 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Mon, 4 Feb 2013 16:11:43 +0100 Subject: Make Unicode corrections --- lib/kernel/doc/src/file.xml | 4 +--- lib/kernel/src/application_controller.erl | 4 ++-- lib/kernel/src/file.erl | 6 +++--- lib/parsetools/src/leex.erl | 2 +- lib/parsetools/src/yecc.erl | 2 +- lib/parsetools/src/yeccscan.erl | 4 ++-- lib/stdlib/doc/src/unicode_usage.xml | 13 ++++++------- lib/stdlib/src/epp.erl | 4 ++-- lib/stdlib/src/shell.erl | 3 +-- lib/syntax_tools/src/epp_dodger.erl | 2 +- 10 files changed, 20 insertions(+), 24 deletions(-) diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml index 4a9b7d2ceb..a96da0fb4e 100644 --- a/lib/kernel/doc/src/file.xml +++ b/lib/kernel/doc/src/file.xml @@ -156,9 +156,6 @@ - - - @@ -1648,6 +1645,7 @@ send a file to a socket +

Sends Bytes from the file referenced by RawFile beginning at Offset to diff --git a/lib/kernel/src/application_controller.erl b/lib/kernel/src/application_controller.erl index 3c860af48e..1602745669 100644 --- a/lib/kernel/src/application_controller.erl +++ b/lib/kernel/src/application_controller.erl @@ -1447,7 +1447,7 @@ prim_consult(FullName) -> {ok, Bin, _} -> case file_binary_to_list(Bin) of {ok, String} -> - case erl_scan:string(String, 1, [unicode]) of + case erl_scan:string(String) of {ok, Tokens, _EndLine} -> prim_parse(Tokens, []); {error, Reason, _EndLine} -> @@ -1600,7 +1600,7 @@ conv(_) -> []. %%% Fix some day: eliminate the duplicated code here make_term(Str) -> - case erl_scan:string(Str, 1, [unicode]) of + case erl_scan:string(Str) of {ok, Tokens, _} -> case erl_parse:parse_term(Tokens ++ [{dot, 1}]) of {ok, Term} -> diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl index 70c4583ad2..e7a0451011 100644 --- a/lib/kernel/src/file.erl +++ b/lib/kernel/src/file.erl @@ -1196,7 +1196,7 @@ change_time(Name, {{AY, AM, AD}, {AH, AMin, ASec}}=Atime, -spec sendfile(RawFile, Socket, Offset, Bytes, Opts) -> {'ok', non_neg_integer()} | {'error', inet:posix() | closed | badarg | not_owner} when - RawFile :: file:fd(), + RawFile :: fd(), Socket :: inet:socket(), Offset :: non_neg_integer(), Bytes :: non_neg_integer(), @@ -1222,7 +1222,7 @@ sendfile(File, Sock, Offset, Bytes, Opts) -> -spec sendfile(Filename, Socket) -> {'ok', non_neg_integer()} | {'error', inet:posix() | closed | badarg | not_owner} - when Filename :: file:name(), + when Filename :: name(), Socket :: inet:socket(). sendfile(Filename, Sock) -> case file:open(Filename, [read, raw, binary]) of @@ -1345,7 +1345,7 @@ eval_stream(Fd, Handling, Bs) -> eval_stream(Fd, Handling, 1, undefined, [], Bs). eval_stream(Fd, H, Line, Last, E, Bs) -> - eval_stream2(io:parse_erl_exprs(Fd, '', Line, [unicode]), Fd, H, Last, E, Bs). + eval_stream2(io:parse_erl_exprs(Fd, '', Line), Fd, H, Last, E, Bs). eval_stream2({ok,Form,EndLine}, Fd, H, Last, E, Bs0) -> try erl_eval:exprs(Form, Bs0) of diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index 32c513f56c..22b496258f 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -504,7 +504,7 @@ collect_rule(Ifile, Chars, L0) -> collect_action(_Ifile, {error, _}, L, _Cont0) -> {error, {L, leex, cannot_parse}, ignored_end_line}; collect_action(Ifile, Chars, L0, Cont0) -> - case erl_scan:tokens(Cont0, Chars, L0, [unicode]) of + case erl_scan:tokens(Cont0, Chars, L0) of {done,{ok,Toks,_},_} -> {ok,Toks,L0}; {done,{eof,_},_} -> {eof,L0}; {done,{error,E,_},_} -> {error,E,L0}; diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index 2f0f70f39b..30e0db421e 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -2561,7 +2561,7 @@ format_assoc(nonassoc) -> format_symbol(Symbol) -> String = concat([Symbol]), - case erl_scan:string(String, 1, [unicode]) of + case erl_scan:string(String) of {ok, [{atom, _, _}], _} -> io_lib:fwrite(<<"~w">>, [Symbol]); {ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' -> diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl index 9e0e85143a..fa3ce8c73b 100644 --- a/lib/parsetools/src/yeccscan.erl +++ b/lib/parsetools/src/yeccscan.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -24,7 +24,7 @@ scan(Inport) -> scan(Inport, '', 1). scan(Inport, Prompt, Line1) -> - case catch io:scan_erl_form(Inport, Prompt, Line1, [unicode]) of + case catch io:scan_erl_form(Inport, Prompt, Line1) of {eof, Line2} -> {eof, Line2}; {ok, Tokens, Line2} -> diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index 0a75fbeec0..354ec58df3 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -69,12 +69,11 @@ strings.

Character data may be combined from several sources, sometimes available in a mix of strings and binaries. Erlang has for long had the concept of iodata or iolists, where binaries and lists can be combined to represent a sequence of bytes. In the same way, the Unicode aware modules often allow for combinations of binaries and lists where the binaries have characters encoded in UTF-8 and the lists contain such binaries or numbers representing Unicode codepoints:

unicode_binary() = binary() with characters encoded in UTF-8 coding standard -unicode_char() = integer() >= 0 representing valid Unicode codepoint chardata() = charlist() | unicode_binary() -charlist() = [unicode_char() | unicode_binary() | charlist()] - a unicode_binary is allowed as the tail of the list +charlist() = maybe_improper_list(char() | unicode_binary() | charlist(), + unicode_binary() | nil())

The module unicode in STDLIB even supports similar mixes with binaries containing other encodings than UTF-8, but that is a special case to allow for conversions to and from external data:

external_unicode_binary() = binary() with characters coded in @@ -82,10 +81,10 @@ external_unicode_binary() = binary() with characters coded in external_chardata() = external_charlist() | external_unicode_binary() -external_charlist() = [unicode_char() | - external_unicode_binary() | - external_charlist()] - an external_unicode_binary() is allowed as the tail of the list +external_charlist() = maybe_improper_list(char() | + external_unicode_binary() | + external_charlist(), + external_unicode_binary() | nil())
Basic Language Support for Unicode diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index afa39c3fb9..1bb3b95ae2 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -661,7 +661,7 @@ leave_file(From, St) -> %% scan_toks(Tokens, From, EppState) scan_toks(From, St) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of + case io:scan_erl_form(St#epp.file, '', St#epp.location) of {ok,Toks,Cl} -> scan_toks(Toks, From, St#epp{location=Cl}); {error,E,Cl} -> @@ -1035,7 +1035,7 @@ new_location(Ln, {Le,_}, {Lf,_}) -> %% nested conditionals and repeated 'else's. skip_toks(From, St, [I|Sis]) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of + case io:scan_erl_form(St#epp.file, '', St#epp.location) of {ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} -> skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]); {ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} -> diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index 0cd408204e..c94f052b24 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -950,7 +950,7 @@ local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> RecDef = expand_value(RecDef0), RDs = lists:flatten(erl_pp:expr(RecDef)), Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]), - {ok, Tokens, _} = erl_scan:string(Attr, 1, [unicode]), + {ok, Tokens, _} = erl_scan:string(Attr), case erl_parse:parse_form(Tokens) of {ok,AttrForm} -> [RN] = add_records([AttrForm], Bs, RT), @@ -1397,7 +1397,6 @@ enc() -> garb(Shell) -> erlang:garbage_collect(Shell), catch erlang:garbage_collect(whereis(user)), - catch erlang:garbage_collect(whereis(group)), catch erlang:garbage_collect(group_leader()), erlang:garbage_collect(). diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl index 70395848a1..131be4e8e4 100644 --- a/lib/syntax_tools/src/epp_dodger.erl +++ b/lib/syntax_tools/src/epp_dodger.erl @@ -401,7 +401,7 @@ quick_parse_form(Dev, L0, Options) -> parse_form(Dev, L0, Parser, Options) -> NoFail = proplists:get_bool(no_fail, Options), Opt = #opt{clever = proplists:get_bool(clever, Options)}, - case io:scan_erl_form(Dev, "", L0, [unicode]) of + case io:scan_erl_form(Dev, "", L0) of {ok, Ts, L1} -> case catch {ok, Parser(Ts, Opt)} of {'EXIT', Term} -> -- cgit v1.2.3 From b333d6f828b396e8174b3a5d2a6d34f91a872d42 Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Wed, 6 Feb 2013 12:45:35 +0100 Subject: [stdlib] Fix a bug concerning pretty printing and Unicode Binaries were pretty printed too often. --- lib/stdlib/src/io_lib_pretty.erl | 68 ++++++++++++++++++++++------------------ lib/stdlib/test/io_SUITE.erl | 2 ++ 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index a8f610558a..b05db3d290 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -452,18 +452,6 @@ printable_list(L, _D, latin1) -> io_lib:printable_latin1_list(L); printable_list(L, _D, _Uni) -> io_lib:printable_list(L). -%% Truncated lists could break some existing code. -% printable_list(L, D, Enc) when D >= 0 -> -% Len = ?CHARS * (D - 1), -% case printable_list1(L, Len, Enc) of -% all -> -% true; -% N when is_integer(N), Len - N >= D - 1 -> -% {L1, _} = lists:split(Len - N, L), -% {true, L1}; -% N when is_integer(N) -> -% false -% end. printable_bin(Bin, D, Enc) when D >= 0, ?CHARS * D =< byte_size(Bin) -> printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D, Enc); @@ -473,7 +461,7 @@ printable_bin(Bin, D, Enc) -> printable_bin(Bin, Len, D, latin1) -> N = erlang:min(20, Len), L = binary_to_list(Bin, 1, N), - case printable_list1(L, N) of + case printable_latin1_list(L, N) of all when N =:= byte_size(Bin) -> {true, L}; all when N =:= Len -> % N < byte_size(Bin) @@ -507,7 +495,7 @@ printable_bin1(_Bin, _Start, 0) -> printable_bin1(Bin, Start, Len) -> N = erlang:min(10000, Len), L = binary_to_list(Bin, Start, Start + N - 1), - case printable_list1(L, N) of + case printable_latin1_list(L, N) of all -> printable_bin1(Bin, Start + N, Len - N); NC when is_integer(NC) -> @@ -515,26 +503,44 @@ printable_bin1(Bin, Start, Len) -> end. %% -> all | integer() >=0. Adopted from io_lib.erl. -% printable_list1([_ | _], 0) -> 0; -printable_list1([C | Cs], N) when is_integer(C), C >= $\s, C =< $~ -> - printable_list1(Cs, N - 1); -printable_list1([C | Cs], N) when is_integer(C), C >= $\240, C =< $\377 -> - printable_list1(Cs, N - 1); -printable_list1([$\n | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\r | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\t | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\v | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\b | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\f | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([], _) -> all; -printable_list1(_, N) -> N. - -printable_unicode(<>, I, L) when I > 0 -> - printable_unicode(R, I - 1, [C | L]); +% printable_latin1_list([_ | _], 0) -> 0; +printable_latin1_list([C | Cs], N) when C >= $\s, C =< $~ -> + printable_latin1_list(Cs, N - 1); +printable_latin1_list([C | Cs], N) when C >= $\240, C =< $\377 -> + printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\n | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\r | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\t | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\v | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\b | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\f | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([], _) -> all; +printable_latin1_list(_, N) -> N. + +printable_unicode(<>=Bin, I, L) when I > 0 -> + case printable_char(C) of + true -> + printable_unicode(R, I - 1, [C | L]); + false -> + {I, Bin, lists:reverse(L)} + end; printable_unicode(Bin, I, L) -> {I, Bin, lists:reverse(L)}. +printable_char($\n) -> true; +printable_char($\r) -> true; +printable_char($\t) -> true; +printable_char($\v) -> true; +printable_char($\b) -> true; +printable_char($\f) -> true; +printable_char($\e) -> true; +printable_char(C) -> + C >= $\s andalso C =< $~ orelse + C >= 16#A0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF. + write_string(S, latin1) -> io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 4d2b53b265..05009fa570 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2049,6 +2049,8 @@ otp_10302(Suite) when is_list(Suite) -> "<<\"apel\"...>>" = pretty(<<"apelsin">>, 2), "<<228,112,112,108>>" = fmt("~tp", [<<"äppl">>]), "<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]), + "<<0,0,0,0,0,0,1,0>>" = fmt("~p", [<<256:64/unsigned-integer>>]), + "<<0,0,0,0,0,0,1,0>>" = fmt("~tp", [<<256:64/unsigned-integer>>]), Chars = lists:seq(0, 512), % just a few... [] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C), -- cgit v1.2.3