diff options
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/unicode_usage.xml | 13 | ||||
-rw-r--r-- | lib/stdlib/src/epp.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 68 | ||||
-rw-r--r-- | lib/stdlib/src/shell.erl | 3 | ||||
-rw-r--r-- | lib/stdlib/test/io_SUITE.erl | 2 |
5 files changed, 48 insertions, 42 deletions
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml index 0a75fbeec0..354ec58df3 100644 --- a/lib/stdlib/doc/src/unicode_usage.xml +++ b/lib/stdlib/doc/src/unicode_usage.xml @@ -69,12 +69,11 @@ strings.</p> <p>Character data may be combined from several sources, sometimes available in a mix of strings and binaries. Erlang has for long had the concept of <c>iodata</c> or <c>iolists</c>, where binaries and lists can be combined to represent a sequence of bytes. In the same way, the Unicode aware modules often allow for combinations of binaries and lists where the binaries have characters encoded in UTF-8 and the lists contain such binaries or numbers representing Unicode codepoints:</p> <code type="none"> unicode_binary() = binary() with characters encoded in UTF-8 coding standard -unicode_char() = integer() >= 0 representing valid Unicode codepoint chardata() = charlist() | unicode_binary() -charlist() = [unicode_char() | unicode_binary() | charlist()] - a unicode_binary is allowed as the tail of the list</code> +charlist() = maybe_improper_list(char() | unicode_binary() | charlist(), + unicode_binary() | nil())</code> <p>The module <c>unicode</c> in STDLIB even supports similar mixes with binaries containing other encodings than UTF-8, but that is a special case to allow for conversions to and from external data:</p> <code type="none"> external_unicode_binary() = binary() with characters coded in @@ -82,10 +81,10 @@ external_unicode_binary() = binary() with characters coded in external_chardata() = external_charlist() | external_unicode_binary() -external_charlist() = [unicode_char() | - external_unicode_binary() | - external_charlist()] - an external_unicode_binary() is allowed as the tail of the list</code> +external_charlist() = maybe_improper_list(char() | + external_unicode_binary() | + external_charlist(), + external_unicode_binary() | nil())</code> </section> <section> <title>Basic Language Support for Unicode</title> diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index afa39c3fb9..1bb3b95ae2 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -661,7 +661,7 @@ leave_file(From, St) -> %% scan_toks(Tokens, From, EppState) scan_toks(From, St) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of + case io:scan_erl_form(St#epp.file, '', St#epp.location) of {ok,Toks,Cl} -> scan_toks(Toks, From, St#epp{location=Cl}); {error,E,Cl} -> @@ -1035,7 +1035,7 @@ new_location(Ln, {Le,_}, {Lf,_}) -> %% nested conditionals and repeated 'else's. skip_toks(From, St, [I|Sis]) -> - case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of + case io:scan_erl_form(St#epp.file, '', St#epp.location) of {ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} -> skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]); {ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} -> diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index a8f610558a..b05db3d290 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -452,18 +452,6 @@ printable_list(L, _D, latin1) -> io_lib:printable_latin1_list(L); printable_list(L, _D, _Uni) -> io_lib:printable_list(L). -%% Truncated lists could break some existing code. -% printable_list(L, D, Enc) when D >= 0 -> -% Len = ?CHARS * (D - 1), -% case printable_list1(L, Len, Enc) of -% all -> -% true; -% N when is_integer(N), Len - N >= D - 1 -> -% {L1, _} = lists:split(Len - N, L), -% {true, L1}; -% N when is_integer(N) -> -% false -% end. printable_bin(Bin, D, Enc) when D >= 0, ?CHARS * D =< byte_size(Bin) -> printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D, Enc); @@ -473,7 +461,7 @@ printable_bin(Bin, D, Enc) -> printable_bin(Bin, Len, D, latin1) -> N = erlang:min(20, Len), L = binary_to_list(Bin, 1, N), - case printable_list1(L, N) of + case printable_latin1_list(L, N) of all when N =:= byte_size(Bin) -> {true, L}; all when N =:= Len -> % N < byte_size(Bin) @@ -507,7 +495,7 @@ printable_bin1(_Bin, _Start, 0) -> printable_bin1(Bin, Start, Len) -> N = erlang:min(10000, Len), L = binary_to_list(Bin, Start, Start + N - 1), - case printable_list1(L, N) of + case printable_latin1_list(L, N) of all -> printable_bin1(Bin, Start + N, Len - N); NC when is_integer(NC) -> @@ -515,26 +503,44 @@ printable_bin1(Bin, Start, Len) -> end. %% -> all | integer() >=0. Adopted from io_lib.erl. -% printable_list1([_ | _], 0) -> 0; -printable_list1([C | Cs], N) when is_integer(C), C >= $\s, C =< $~ -> - printable_list1(Cs, N - 1); -printable_list1([C | Cs], N) when is_integer(C), C >= $\240, C =< $\377 -> - printable_list1(Cs, N - 1); -printable_list1([$\n | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\r | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\t | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\v | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\b | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\f | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1); -printable_list1([], _) -> all; -printable_list1(_, N) -> N. - -printable_unicode(<<C/utf8, R/binary>>, I, L) when I > 0 -> - printable_unicode(R, I - 1, [C | L]); +% printable_latin1_list([_ | _], 0) -> 0; +printable_latin1_list([C | Cs], N) when C >= $\s, C =< $~ -> + printable_latin1_list(Cs, N - 1); +printable_latin1_list([C | Cs], N) when C >= $\240, C =< $\377 -> + printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\n | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\r | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\t | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\v | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\b | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\f | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); +printable_latin1_list([], _) -> all; +printable_latin1_list(_, N) -> N. + +printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 -> + case printable_char(C) of + true -> + printable_unicode(R, I - 1, [C | L]); + false -> + {I, Bin, lists:reverse(L)} + end; printable_unicode(Bin, I, L) -> {I, Bin, lists:reverse(L)}. +printable_char($\n) -> true; +printable_char($\r) -> true; +printable_char($\t) -> true; +printable_char($\v) -> true; +printable_char($\b) -> true; +printable_char($\f) -> true; +printable_char($\e) -> true; +printable_char(C) -> + C >= $\s andalso C =< $~ orelse + C >= 16#A0 andalso C < 16#D800 orelse + C > 16#DFFF andalso C < 16#FFFE orelse + C > 16#FFFF andalso C =< 16#10FFFF. + write_string(S, latin1) -> io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl index 0cd408204e..c94f052b24 100644 --- a/lib/stdlib/src/shell.erl +++ b/lib/stdlib/src/shell.erl @@ -950,7 +950,7 @@ local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) -> RecDef = expand_value(RecDef0), RDs = lists:flatten(erl_pp:expr(RecDef)), Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]), - {ok, Tokens, _} = erl_scan:string(Attr, 1, [unicode]), + {ok, Tokens, _} = erl_scan:string(Attr), case erl_parse:parse_form(Tokens) of {ok,AttrForm} -> [RN] = add_records([AttrForm], Bs, RT), @@ -1397,7 +1397,6 @@ enc() -> garb(Shell) -> erlang:garbage_collect(Shell), catch erlang:garbage_collect(whereis(user)), - catch erlang:garbage_collect(whereis(group)), catch erlang:garbage_collect(group_leader()), erlang:garbage_collect(). diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 4d2b53b265..05009fa570 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2049,6 +2049,8 @@ otp_10302(Suite) when is_list(Suite) -> "<<\"apel\"...>>" = pretty(<<"apelsin">>, 2), "<<228,112,112,108>>" = fmt("~tp", [<<"äppl">>]), "<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]), + "<<0,0,0,0,0,0,1,0>>" = fmt("~p", [<<256:64/unsigned-integer>>]), + "<<0,0,0,0,0,0,1,0>>" = fmt("~tp", [<<256:64/unsigned-integer>>]), Chars = lists:seq(0, 512), % just a few... [] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C), |