aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHans Bolinder <[email protected]>2013-02-11 16:15:27 +0100
committerHans Bolinder <[email protected]>2013-02-11 16:15:27 +0100
commitdc1cdb1d12c7198fa46ad88a9632bfc8832aa796 (patch)
tree6e9a50eb167bcbec7391e637ecbcb496b9107a04
parente1676e6c579fa34d07694784afd0902433802bfb (diff)
parentb333d6f828b396e8174b3a5d2a6d34f91a872d42 (diff)
downloadotp-dc1cdb1d12c7198fa46ad88a9632bfc8832aa796.tar.gz
otp-dc1cdb1d12c7198fa46ad88a9632bfc8832aa796.tar.bz2
otp-dc1cdb1d12c7198fa46ad88a9632bfc8832aa796.zip
Merge branch 'hb/stdlib/unicode_corrections/OTP-10820'
* hb/stdlib/unicode_corrections/OTP-10820: [stdlib] Fix a bug concerning pretty printing and Unicode Make Unicode corrections
-rw-r--r--lib/kernel/doc/src/file.xml4
-rw-r--r--lib/kernel/src/application_controller.erl4
-rw-r--r--lib/kernel/src/file.erl6
-rw-r--r--lib/parsetools/src/leex.erl2
-rw-r--r--lib/parsetools/src/yecc.erl2
-rw-r--r--lib/parsetools/src/yeccscan.erl4
-rw-r--r--lib/stdlib/doc/src/unicode_usage.xml13
-rw-r--r--lib/stdlib/src/epp.erl4
-rw-r--r--lib/stdlib/src/io_lib_pretty.erl68
-rw-r--r--lib/stdlib/src/shell.erl3
-rw-r--r--lib/stdlib/test/io_SUITE.erl2
-rw-r--r--lib/syntax_tools/src/epp_dodger.erl2
12 files changed, 59 insertions, 55 deletions
diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml
index 4a9b7d2ceb..a96da0fb4e 100644
--- a/lib/kernel/doc/src/file.xml
+++ b/lib/kernel/doc/src/file.xml
@@ -156,9 +156,6 @@
<datatype>
<name name="file_info_option"/>
</datatype>
- <datatype>
- <name name="sendfile_option"/>
- </datatype>
</datatypes>
<funcs>
@@ -1648,6 +1645,7 @@
<func>
<name name="sendfile" arity="5"/>
<fsummary>send a file to a socket</fsummary>
+ <type name="sendfile_option"/>
<desc>
<p>Sends <c>Bytes</c> from the file
referenced by <c>RawFile</c> beginning at <c>Offset</c> to
diff --git a/lib/kernel/src/application_controller.erl b/lib/kernel/src/application_controller.erl
index 3c860af48e..1602745669 100644
--- a/lib/kernel/src/application_controller.erl
+++ b/lib/kernel/src/application_controller.erl
@@ -1447,7 +1447,7 @@ prim_consult(FullName) ->
{ok, Bin, _} ->
case file_binary_to_list(Bin) of
{ok, String} ->
- case erl_scan:string(String, 1, [unicode]) of
+ case erl_scan:string(String) of
{ok, Tokens, _EndLine} ->
prim_parse(Tokens, []);
{error, Reason, _EndLine} ->
@@ -1600,7 +1600,7 @@ conv(_) -> [].
%%% Fix some day: eliminate the duplicated code here
make_term(Str) ->
- case erl_scan:string(Str, 1, [unicode]) of
+ case erl_scan:string(Str) of
{ok, Tokens, _} ->
case erl_parse:parse_term(Tokens ++ [{dot, 1}]) of
{ok, Term} ->
diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl
index 70c4583ad2..e7a0451011 100644
--- a/lib/kernel/src/file.erl
+++ b/lib/kernel/src/file.erl
@@ -1196,7 +1196,7 @@ change_time(Name, {{AY, AM, AD}, {AH, AMin, ASec}}=Atime,
-spec sendfile(RawFile, Socket, Offset, Bytes, Opts) ->
{'ok', non_neg_integer()} | {'error', inet:posix() |
closed | badarg | not_owner} when
- RawFile :: file:fd(),
+ RawFile :: fd(),
Socket :: inet:socket(),
Offset :: non_neg_integer(),
Bytes :: non_neg_integer(),
@@ -1222,7 +1222,7 @@ sendfile(File, Sock, Offset, Bytes, Opts) ->
-spec sendfile(Filename, Socket) ->
{'ok', non_neg_integer()} | {'error', inet:posix() |
closed | badarg | not_owner}
- when Filename :: file:name(),
+ when Filename :: name(),
Socket :: inet:socket().
sendfile(Filename, Sock) ->
case file:open(Filename, [read, raw, binary]) of
@@ -1345,7 +1345,7 @@ eval_stream(Fd, Handling, Bs) ->
eval_stream(Fd, Handling, 1, undefined, [], Bs).
eval_stream(Fd, H, Line, Last, E, Bs) ->
- eval_stream2(io:parse_erl_exprs(Fd, '', Line, [unicode]), Fd, H, Last, E, Bs).
+ eval_stream2(io:parse_erl_exprs(Fd, '', Line), Fd, H, Last, E, Bs).
eval_stream2({ok,Form,EndLine}, Fd, H, Last, E, Bs0) ->
try erl_eval:exprs(Form, Bs0) of
diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl
index 32c513f56c..22b496258f 100644
--- a/lib/parsetools/src/leex.erl
+++ b/lib/parsetools/src/leex.erl
@@ -504,7 +504,7 @@ collect_rule(Ifile, Chars, L0) ->
collect_action(_Ifile, {error, _}, L, _Cont0) ->
{error, {L, leex, cannot_parse}, ignored_end_line};
collect_action(Ifile, Chars, L0, Cont0) ->
- case erl_scan:tokens(Cont0, Chars, L0, [unicode]) of
+ case erl_scan:tokens(Cont0, Chars, L0) of
{done,{ok,Toks,_},_} -> {ok,Toks,L0};
{done,{eof,_},_} -> {eof,L0};
{done,{error,E,_},_} -> {error,E,L0};
diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl
index 2f0f70f39b..30e0db421e 100644
--- a/lib/parsetools/src/yecc.erl
+++ b/lib/parsetools/src/yecc.erl
@@ -2561,7 +2561,7 @@ format_assoc(nonassoc) ->
format_symbol(Symbol) ->
String = concat([Symbol]),
- case erl_scan:string(String, 1, [unicode]) of
+ case erl_scan:string(String) of
{ok, [{atom, _, _}], _} ->
io_lib:fwrite(<<"~w">>, [Symbol]);
{ok, [{Word, _}], _} when Word =/= ':', Word =/= '->' ->
diff --git a/lib/parsetools/src/yeccscan.erl b/lib/parsetools/src/yeccscan.erl
index 9e0e85143a..fa3ce8c73b 100644
--- a/lib/parsetools/src/yeccscan.erl
+++ b/lib/parsetools/src/yeccscan.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 1996-2012. All Rights Reserved.
+%% Copyright Ericsson AB 1996-2013. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -24,7 +24,7 @@ scan(Inport) ->
scan(Inport, '', 1).
scan(Inport, Prompt, Line1) ->
- case catch io:scan_erl_form(Inport, Prompt, Line1, [unicode]) of
+ case catch io:scan_erl_form(Inport, Prompt, Line1) of
{eof, Line2} ->
{eof, Line2};
{ok, Tokens, Line2} ->
diff --git a/lib/stdlib/doc/src/unicode_usage.xml b/lib/stdlib/doc/src/unicode_usage.xml
index 0a75fbeec0..354ec58df3 100644
--- a/lib/stdlib/doc/src/unicode_usage.xml
+++ b/lib/stdlib/doc/src/unicode_usage.xml
@@ -69,12 +69,11 @@ strings.</p>
<p>Character data may be combined from several sources, sometimes available in a mix of strings and binaries. Erlang has for long had the concept of <c>iodata</c> or <c>iolists</c>, where binaries and lists can be combined to represent a sequence of bytes. In the same way, the Unicode aware modules often allow for combinations of binaries and lists where the binaries have characters encoded in UTF-8 and the lists contain such binaries or numbers representing Unicode codepoints:</p>
<code type="none">
unicode_binary() = binary() with characters encoded in UTF-8 coding standard
-unicode_char() = integer() >= 0 representing valid Unicode codepoint
chardata() = charlist() | unicode_binary()
-charlist() = [unicode_char() | unicode_binary() | charlist()]
- a unicode_binary is allowed as the tail of the list</code>
+charlist() = maybe_improper_list(char() | unicode_binary() | charlist(),
+ unicode_binary() | nil())</code>
<p>The module <c>unicode</c> in STDLIB even supports similar mixes with binaries containing other encodings than UTF-8, but that is a special case to allow for conversions to and from external data:</p>
<code type="none">
external_unicode_binary() = binary() with characters coded in
@@ -82,10 +81,10 @@ external_unicode_binary() = binary() with characters coded in
external_chardata() = external_charlist() | external_unicode_binary()
-external_charlist() = [unicode_char() |
- external_unicode_binary() |
- external_charlist()]
- an external_unicode_binary() is allowed as the tail of the list</code>
+external_charlist() = maybe_improper_list(char() |
+ external_unicode_binary() |
+ external_charlist(),
+ external_unicode_binary() | nil())</code>
</section>
<section>
<title>Basic Language Support for Unicode</title>
diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl
index afa39c3fb9..1bb3b95ae2 100644
--- a/lib/stdlib/src/epp.erl
+++ b/lib/stdlib/src/epp.erl
@@ -661,7 +661,7 @@ leave_file(From, St) ->
%% scan_toks(Tokens, From, EppState)
scan_toks(From, St) ->
- case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of
+ case io:scan_erl_form(St#epp.file, '', St#epp.location) of
{ok,Toks,Cl} ->
scan_toks(Toks, From, St#epp{location=Cl});
{error,E,Cl} ->
@@ -1035,7 +1035,7 @@ new_location(Ln, {Le,_}, {Lf,_}) ->
%% nested conditionals and repeated 'else's.
skip_toks(From, St, [I|Sis]) ->
- case io:scan_erl_form(St#epp.file, '', St#epp.location, [unicode]) of
+ case io:scan_erl_form(St#epp.file, '', St#epp.location) of
{ok,[{'-',_Lh},{atom,_Li,ifdef}|_Toks],Cl} ->
skip_toks(From, St#epp{location=Cl}, [ifdef,I|Sis]);
{ok,[{'-',_Lh},{atom,_Li,ifndef}|_Toks],Cl} ->
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl
index a8f610558a..b05db3d290 100644
--- a/lib/stdlib/src/io_lib_pretty.erl
+++ b/lib/stdlib/src/io_lib_pretty.erl
@@ -452,18 +452,6 @@ printable_list(L, _D, latin1) ->
io_lib:printable_latin1_list(L);
printable_list(L, _D, _Uni) ->
io_lib:printable_list(L).
-%% Truncated lists could break some existing code.
-% printable_list(L, D, Enc) when D >= 0 ->
-% Len = ?CHARS * (D - 1),
-% case printable_list1(L, Len, Enc) of
-% all ->
-% true;
-% N when is_integer(N), Len - N >= D - 1 ->
-% {L1, _} = lists:split(Len - N, L),
-% {true, L1};
-% N when is_integer(N) ->
-% false
-% end.
printable_bin(Bin, D, Enc) when D >= 0, ?CHARS * D =< byte_size(Bin) ->
printable_bin(Bin, erlang:min(?CHARS * D, byte_size(Bin)), D, Enc);
@@ -473,7 +461,7 @@ printable_bin(Bin, D, Enc) ->
printable_bin(Bin, Len, D, latin1) ->
N = erlang:min(20, Len),
L = binary_to_list(Bin, 1, N),
- case printable_list1(L, N) of
+ case printable_latin1_list(L, N) of
all when N =:= byte_size(Bin) ->
{true, L};
all when N =:= Len -> % N < byte_size(Bin)
@@ -507,7 +495,7 @@ printable_bin1(_Bin, _Start, 0) ->
printable_bin1(Bin, Start, Len) ->
N = erlang:min(10000, Len),
L = binary_to_list(Bin, Start, Start + N - 1),
- case printable_list1(L, N) of
+ case printable_latin1_list(L, N) of
all ->
printable_bin1(Bin, Start + N, Len - N);
NC when is_integer(NC) ->
@@ -515,26 +503,44 @@ printable_bin1(Bin, Start, Len) ->
end.
%% -> all | integer() >=0. Adopted from io_lib.erl.
-% printable_list1([_ | _], 0) -> 0;
-printable_list1([C | Cs], N) when is_integer(C), C >= $\s, C =< $~ ->
- printable_list1(Cs, N - 1);
-printable_list1([C | Cs], N) when is_integer(C), C >= $\240, C =< $\377 ->
- printable_list1(Cs, N - 1);
-printable_list1([$\n | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\r | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\t | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\v | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\b | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\f | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([$\e | Cs], N) -> printable_list1(Cs, N - 1);
-printable_list1([], _) -> all;
-printable_list1(_, N) -> N.
-
-printable_unicode(<<C/utf8, R/binary>>, I, L) when I > 0 ->
- printable_unicode(R, I - 1, [C | L]);
+% printable_latin1_list([_ | _], 0) -> 0;
+printable_latin1_list([C | Cs], N) when C >= $\s, C =< $~ ->
+ printable_latin1_list(Cs, N - 1);
+printable_latin1_list([C | Cs], N) when C >= $\240, C =< $\377 ->
+ printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\n | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\r | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\t | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\v | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\b | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\f | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1);
+printable_latin1_list([], _) -> all;
+printable_latin1_list(_, N) -> N.
+
+printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 ->
+ case printable_char(C) of
+ true ->
+ printable_unicode(R, I - 1, [C | L]);
+ false ->
+ {I, Bin, lists:reverse(L)}
+ end;
printable_unicode(Bin, I, L) ->
{I, Bin, lists:reverse(L)}.
+printable_char($\n) -> true;
+printable_char($\r) -> true;
+printable_char($\t) -> true;
+printable_char($\v) -> true;
+printable_char($\b) -> true;
+printable_char($\f) -> true;
+printable_char($\e) -> true;
+printable_char(C) ->
+ C >= $\s andalso C =< $~ orelse
+ C >= 16#A0 andalso C < 16#D800 orelse
+ C > 16#DFFF andalso C < 16#FFFE orelse
+ C > 16#FFFF andalso C =< 16#10FFFF.
+
write_string(S, latin1) ->
io_lib:write_latin1_string(S, $"); %"
write_string(S, _Uni) ->
diff --git a/lib/stdlib/src/shell.erl b/lib/stdlib/src/shell.erl
index 0cd408204e..c94f052b24 100644
--- a/lib/stdlib/src/shell.erl
+++ b/lib/stdlib/src/shell.erl
@@ -950,7 +950,7 @@ local_func(rd, [{atom,_,RecName},RecDef0], Bs, _Shell, RT, _Lf, _Ef) ->
RecDef = expand_value(RecDef0),
RDs = lists:flatten(erl_pp:expr(RecDef)),
Attr = lists:concat(["-record('", RecName, "',", RDs, ")."]),
- {ok, Tokens, _} = erl_scan:string(Attr, 1, [unicode]),
+ {ok, Tokens, _} = erl_scan:string(Attr),
case erl_parse:parse_form(Tokens) of
{ok,AttrForm} ->
[RN] = add_records([AttrForm], Bs, RT),
@@ -1397,7 +1397,6 @@ enc() ->
garb(Shell) ->
erlang:garbage_collect(Shell),
catch erlang:garbage_collect(whereis(user)),
- catch erlang:garbage_collect(whereis(group)),
catch erlang:garbage_collect(group_leader()),
erlang:garbage_collect().
diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl
index 4d2b53b265..05009fa570 100644
--- a/lib/stdlib/test/io_SUITE.erl
+++ b/lib/stdlib/test/io_SUITE.erl
@@ -2049,6 +2049,8 @@ otp_10302(Suite) when is_list(Suite) ->
"<<\"apel\"...>>" = pretty(<<"apelsin">>, 2),
"<<228,112,112,108>>" = fmt("~tp", [<<"äppl">>]),
"<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]),
+ "<<0,0,0,0,0,0,1,0>>" = fmt("~p", [<<256:64/unsigned-integer>>]),
+ "<<0,0,0,0,0,0,1,0>>" = fmt("~tp", [<<256:64/unsigned-integer>>]),
Chars = lists:seq(0, 512), % just a few...
[] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C),
diff --git a/lib/syntax_tools/src/epp_dodger.erl b/lib/syntax_tools/src/epp_dodger.erl
index 70395848a1..131be4e8e4 100644
--- a/lib/syntax_tools/src/epp_dodger.erl
+++ b/lib/syntax_tools/src/epp_dodger.erl
@@ -401,7 +401,7 @@ quick_parse_form(Dev, L0, Options) ->
parse_form(Dev, L0, Parser, Options) ->
NoFail = proplists:get_bool(no_fail, Options),
Opt = #opt{clever = proplists:get_bool(clever, Options)},
- case io:scan_erl_form(Dev, "", L0, [unicode]) of
+ case io:scan_erl_form(Dev, "", L0) of
{ok, Ts, L1} ->
case catch {ok, Parser(Ts, Opt)} of
{'EXIT', Term} ->