aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib')
-rw-r--r--lib/stdlib/doc/src/unicode_usage.xml13
-rw-r--r--lib/stdlib/src/epp.erl4
-rw-r--r--lib/stdlib/src/io_lib_pretty.erl68
-rw-r--r--lib/stdlib/src/shell.erl3
-rw-r--r--lib/stdlib/test/io_SUITE.erl2
5 files changed, 48 insertions, 42 deletions
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 0a75fbeec0..354ec58df3 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -69,12 +69,11 @@ strings.</p>
<p>Character data may be combined from several sources, sometimes available in a mix of strings and binaries. Erlang has for long had the concept of <c>iodata</c> or <c>iolists</c>, where binaries and lists can be combined to represent a sequence of bytes. In the same way, the Unicode aware modules often allow for combinations of binaries and lists where the binaries have characters encoded in UTF-8 and the lists contain such binaries or numbers representing Unicode codepoints:</p>
<code type="none">
unicode_binary() = binary() with characters encoded in UTF-8 coding standard
-unicode_char() = integer() >= 0 representing valid Unicode codepoint
chardata() = charlist() | unicode_binary()
-charlist() = [unicode_char() | unicode_binary() | charlist()]
- a unicode_binary is allowed as the tail of the list</code>
+charlist() = maybe_improper_list(char() | unicode_binary() | charlist(),
+ unicode_binary() | nil())</code>
<p>The module <c>unicode</c> in STDLIB even supports similar mixes with binaries containing other encodings than UTF-8, but that is a special case to allow for conversions to and from external data:</p>
<code type="none">
external_unicode_binary() = binary() with characters coded in
@@ -82,10 +81,10 @@ external_unicode_binary() = binary() with characters coded in
external_chardata() = external_charlist() | external_unicode_binary()
-external_charlist() = [unicode_char() |
- external_unicode_binary() |
- external_charlist()]
- an external_unicode_binary() is allowed as the tail of the list</code>
+external_charlist() = maybe_improper_list(char() |
+ external_unicode_binary() |
+ external_charlist(),
+ external_unicode_binary() | nil())</code>
</section>
<section>
<title>Basic Language Support for Unicode</title>
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index afa39c3fb9..1bb3b95ae2 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -661,7 +661,7 @@ leave_file(From, St) ->
%% scan_toks(Tokens, From, EppState)
scan_toks(From, St) ->
- case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of
+ case io:scan_erl_form(St#epp.file, '', St#epp.location) of
{ok,Toks,Cl} ->
scan_toks(Toks, From, St#epp{location=Cl});
{error,E,Cl} ->
@@ -1035,7 +1035,7 @@ new_location(Ln, {Le,_}, {Lf,_}) ->
%% nested conditionals and repeated 'else's.
skip_toks(From, St, [I|Sis]) ->
- case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of
+ case io:scan_erl_form(St#epp.file, '', St#epp.location) of
{ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} ->
skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]);
{ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} ->
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index a8f610558a..b05db3d290 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -452,18 +452,6 @@ printable_list(L, _D, latin1) ->
io_lib:printable_latin1_list(L);
printable_list(L, _D, _Uni) ->
io_lib:printable_list(L).
-%% Truncated lists could break some existing code.
-% printable_list(L, D, Enc) when D >= 0 ->
-% Len = ?CHARS * (D - 1),
-% case printable_list1(L, Len, Enc) of
-% all ->
-% true;
-% N when is_integer(N), Len - N >= D - 1 ->
-% {L1, _} = lists:split(Len - N, L),
-% {true, L1};
-% N when is_integer(N) ->
-% false
-% end.
printable_bin(Bin, D, Enc) when D >= 0, ?CHARS * D =< byte_size(Bin) ->
printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D, Enc);
@@ -473,7 +461,7 @@ printable_bin(Bin, D, Enc) ->
printable_bin(Bin, Len, D, latin1) ->
N = erlang:min(20, Len),
L = binary_to_list(Bin, 1, N),
- case printable_list1(L, N) of
+ case printable_latin1_list(L, N) of
all when N =:= byte_size(Bin) ->
{true, L};
all when N =:= Len -> % N < byte_size(Bin)
@@ -507,7 +495,7 @@ printable_bin1(_Bin, _Start, 0) ->
printable_bin1(Bin, Start, Len) ->
N = erlang:min(10000, Len),
L = binary_to_list(Bin, Start, Start + N - 1),
- case printable_list1(L, N) of
+ case printable_latin1_list(L, N) of
all ->
printable_bin1(Bin, Start + N, Len - N);
NC when is_integer(NC) ->
@@ -515,26 +503,44 @@ printable_bin1(Bin, Start, Len) ->
end.
%% -> all | integer() >=0. Adopted from io_lib.erl.
-% printable_list1([_ | _], 0) -> 0;
-printable_list1([C | Cs], N) when is_integer(C), C >= $\s, C =< $~ ->
- printable_list1(Cs, N - 1);
-printable_list1([C | Cs], N) when is_integer(C), C >= $\240, C =< $\377 ->
- printable_list1(Cs, N - 1);
-printable_list1([$\n | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\r | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\t | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\v | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\b | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\f | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([], _) -> all;
-printable_list1(_, N) -> N.
-
-printable_unicode(<<C/utf8, R/binary>>, I, L) when I > 0 ->
- printable_unicode(R, I - 1, [C | L]);
+% printable_latin1_list([_ | _], 0) -> 0;
+printable_latin1_list([C | Cs], N) when C >= $\s, C =< $~ ->
+ printable_latin1_list(Cs, N - 1);
+printable_latin1_list([C | Cs], N) when C >= $\240, C =< $\377 ->
+ printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\n | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\r | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\t | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\v | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\b | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\f | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([], _) -> all;
+printable_latin1_list(_, N) -> N.
+
+printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 ->
+ case printable_char(C) of
+ true ->
+ printable_unicode(R, I - 1, [C | L]);
+ false ->
+ {I, Bin, lists:reverse(L)}
+ end;
printable_unicode(Bin, I, L) ->
{I, Bin, lists:reverse(L)}.
+printable_char($\n) -> true;
+printable_char($\r) -> true;
+printable_char($\t) -> true;
+printable_char($\v) -> true;
+printable_char($\b) -> true;
+printable_char($\f) -> true;
+printable_char($\e) -> true;
+printable_char(C) ->
+ C >= $\s andalso C =< $~ orelse
+ C >= 16#A0 andalso C < 16#D800 orelse
+ C > 16#DFFF andalso C < 16#FFFE orelse
+ C > 16#FFFF andalso C =< 16#10FFFF.
+
write_string(S, latin1) ->
io_lib:write_latin1_string(S, $"); %"
write_string(S, _Uni) ->
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index 0cd408204e..c94f052b24 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -950,7 +950,7 @@ local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
RecDef = expand_value(RecDef0),
RDs = lists:flatten(erl_pp:expr(RecDef)),
Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]),
- {ok, Tokens, _} = erl_scan:string(Attr, 1, [unicode]),
+ {ok, Tokens, _} = erl_scan:string(Attr),
case erl_parse:parse_form(Tokens) of
{ok,AttrForm} ->
[RN] = add_records([AttrForm], Bs, RT),
@@ -1397,7 +1397,6 @@ enc() ->
garb(Shell) ->
erlang:garbage_collect(Shell),
catch erlang:garbage_collect(whereis(user)),
- catch erlang:garbage_collect(whereis(group)),
catch erlang:garbage_collect(group_leader()),
erlang:garbage_collect().
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index 4d2b53b265..05009fa570 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -2049,6 +2049,8 @@ otp_10302(Suite) when is_list(Suite) ->
"<<\"apel\"...>>" = pretty(<<"apelsin">>, 2),
"<<228,112,112,108>>" = fmt("~tp", [<<"äppl">>]),
"<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]),
+ "<<0,0,0,0,0,0,1,0>>" = fmt("~p", [<<256:64/unsigned-integer>>]),
+ "<<0,0,0,0,0,0,1,0>>" = fmt("~tp", [<<256:64/unsigned-integer>>]),
Chars = lists:seq(0, 512), % just a few...
[] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C),