diff options
author | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
---|---|---|
committer | Patrik Nyblom <[email protected]> | 2013-02-22 12:06:41 +0100 |
commit | 7215c49aff685e93765598cd428baf1d4320f752 (patch) | |
tree | 76409af3ed6abe7502af3aa438e47b1be98df1e3 /lib/stdlib/src/io_lib_pretty.erl | |
parent | 14820e983856654e68e08244e4dfc689f0804fd8 (diff) | |
parent | 2a79b74ac371387ce338bacf979f9ca32447b302 (diff) | |
download | otp-7215c49aff685e93765598cd428baf1d4320f752.tar.gz otp-7215c49aff685e93765598cd428baf1d4320f752.tar.bz2 otp-7215c49aff685e93765598cd428baf1d4320f752.zip |
Merge branch 'pan/unicode_printable_ranges'
* pan/unicode_printable_ranges:
Adapt stdlib tests to ~tp detecting latin1 binaries
Update primary bootstrap
Make wx debugger use +pc flag when applicable
Correct misspelled comments and space at lin ends
Make ~tp output latin1 binaries as strings if possible
Leave the +pc handling to io and io_lib_pretty
Remove newly introduced warning in erlexec.c
Make shell_SUITE:otp_10302 use +pc unicode when needed
Fix io_proto_SUITE to handle the new io_lib_pretty:print
Add testcase for +pc and io:printable_range/0
Make printing of UTF-8 in binaries behave like lists.
Document +pc flag and io:printable_range/0
Add usage of and spec for io:printable_range/0
Add +pc {latin1|unicode} switch and io:printable_range/0
Fix some Unicode issues
OTP-18084
Diffstat (limited to 'lib/stdlib/src/io_lib_pretty.erl')
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 55 |
1 files changed, 36 insertions, 19 deletions
diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 525b534249..7637ad7a3d 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -485,13 +485,18 @@ printable_bin(Bin, Len, D, latin1) -> false end; printable_bin(Bin, Len, D, _Uni) -> - case printable_unicode(Bin, Len, []) of - {_, <<>>, L} -> - {byte_size(Bin) =:= length(L), L}; - {NC, Bin1, L} when D > 0, Len - NC >= D -> - {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; - {_NC, _Bin, _L} -> - false + case valid_utf8(Bin,Len) of + true -> + case printable_unicode(Bin, Len, [], io:printable_range()) of + {_, <<>>, L} -> + {byte_size(Bin) =:= length(L), L}; + {NC, Bin1, L} when D > 0, Len - NC >= D -> + {byte_size(Bin)-byte_size(Bin1) =:= length(L), true, L}; + {_NC, _Bin, _L} -> + false + end; + false -> + printable_bin(Bin, Len, D, latin1) end. printable_bin1(_Bin, _Start, 0) -> @@ -522,24 +527,36 @@ printable_latin1_list([$\e | Cs], N) -> printable_latin1_list(Cs, N - 1); printable_latin1_list([], _) -> all; printable_latin1_list(_, N) -> N. -printable_unicode(<<C/utf8, R/binary>>=Bin, I, L) when I > 0 -> - case printable_char(C) of +valid_utf8(<<>>,_) -> + true; +valid_utf8(_,0) -> + true; +valid_utf8(<<_/utf8, R/binary>>,N) -> + valid_utf8(R,N-1); +valid_utf8(_,_) -> + false. + +printable_unicode(<<C/utf8, R/binary>>=Bin, I, L, Range) when I > 0 -> + case printable_char(C,Range) of true -> - printable_unicode(R, I - 1, [C | L]); + printable_unicode(R, I - 1, [C | L],Range); false -> {I, Bin, lists:reverse(L)} end; -printable_unicode(Bin, I, L) -> +printable_unicode(Bin, I, L,_) -> {I, Bin, lists:reverse(L)}. -printable_char($\n) -> true; -printable_char($\r) -> true; -printable_char($\t) -> true; -printable_char($\v) -> true; -printable_char($\b) -> true; -printable_char($\f) -> true; -printable_char($\e) -> true; -printable_char(C) -> +printable_char($\n,_) -> true; +printable_char($\r,_) -> true; +printable_char($\t,_) -> true; +printable_char($\v,_) -> true; +printable_char($\b,_) -> true; +printable_char($\f,_) -> true; +printable_char($\e,_) -> true; +printable_char(C,latin1) -> + C >= $\s andalso C =< $~ orelse + C >= 16#A0 andalso C =< 16#FF; +printable_char(C,unicode) -> C >= $\s andalso C =< $~ orelse C >= 16#A0 andalso C < 16#D800 orelse C > 16#DFFF andalso C < 16#FFFE orelse |