diff options
author | Hans Bolinder <[email protected]> | 2013-01-11 14:42:54 +0100 |
---|---|---|
committer | Hans Bolinder <[email protected]> | 2013-01-25 12:54:27 +0100 |
commit | b2b0feab152063f71c4bf58f985cd52fc9e0105a (patch) | |
tree | be4cb2fbdfc1950a8aeb91b1d22f994e949b1be4 /lib/stdlib | |
parent | 965e2e01db25a55c4976ccce5940c6631b0e989c (diff) | |
download | otp-b2b0feab152063f71c4bf58f985cd52fc9e0105a.tar.gz otp-b2b0feab152063f71c4bf58f985cd52fc9e0105a.tar.bz2 otp-b2b0feab152063f71c4bf58f985cd52fc9e0105a.zip |
Extend char() to Unicode characters
The code related to the introduction of unicode_string() and
unicode_char() has been removed. The types char() and string() have
been extended to include Unicode characters.
In fact char() was changed some time ago; this commit is about
cleaning up the documentation and introduce better names for some
functions.
Diffstat (limited to 'lib/stdlib')
-rw-r--r-- | lib/stdlib/doc/src/io_lib.xml | 67 | ||||
-rw-r--r-- | lib/stdlib/doc/src/unicode.xml | 20 | ||||
-rw-r--r-- | lib/stdlib/src/epp.erl | 6 | ||||
-rw-r--r-- | lib/stdlib/src/erl_pp.erl | 10 | ||||
-rw-r--r-- | lib/stdlib/src/erl_scan.erl | 4 | ||||
-rw-r--r-- | lib/stdlib/src/io.erl | 8 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib.erl | 224 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_format.erl | 13 | ||||
-rw-r--r-- | lib/stdlib/src/io_lib_pretty.erl | 28 | ||||
-rw-r--r-- | lib/stdlib/src/lib.erl | 6 | ||||
-rw-r--r-- | lib/stdlib/src/unicode.erl | 23 | ||||
-rw-r--r-- | lib/stdlib/test/io_SUITE.erl | 8 |
12 files changed, 240 insertions, 177 deletions
diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 7487cb5740..001d34a7c2 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -4,7 +4,7 @@ <erlref> <header> <copyright> - <year>1996</year><year>2012</year> + <year>1996</year><year>2013</year> <holder>Ericsson AB. All Rights Reserved.</holder> </copyright> <legalnotice> @@ -53,6 +53,9 @@ <datatype> <name name="fread_error"/> </datatype> + <datatype> + <name name="latin1_string"/> + </datatype> </datatypes> <funcs> <func> @@ -207,8 +210,25 @@ <name name="write_string" arity="1"/> <fsummary>Write a string</fsummary> <desc> - <p>Returns the list of characters needed to print <c><anno>String</anno></c> - as a string.</p> + <p>Returns the list of characters needed to print + <c><anno>String</anno></c> as a string.</p> + </desc> + </func> + <func> + <name name="write_string_as_latin1" arity="1"/> + <fsummary>Write a string</fsummary> + <desc> + <p>Returns the list of characters needed to print + <c><anno>String</anno></c> as a string. Non-Latin-1 + characters are escaped.</p> + </desc> + </func> + <func> + <name name="write_latin1_string" arity="1"/> + <fsummary>Write an ISO-latin-1 string</fsummary> + <desc> + <p>Returns the list of characters needed to print + <c><anno>Latin1String</anno></c> as a string.</p> </desc> </func> <func> @@ -216,6 +236,23 @@ <fsummary>Write a character</fsummary> <desc> <p>Returns the list of characters needed to print a character + constant in the Unicode character set.</p> + </desc> + </func> + <func> + <name name="write_char_as_latin1" arity="1"/> + <fsummary>Write a character</fsummary> + <desc> + <p>Returns the list of characters needed to print a character + constant in the Unicode character set. Non-Latin-1 characters + are escaped.</p> + </desc> + </func> + <func> + <name name="write_latin1_char" arity="1"/> + <fsummary>Write an ISO-latin-1 character</fsummary> + <desc> + <p>Returns the list of characters needed to print a character constant in the ISO-latin-1 character set.</p> </desc> </func> @@ -232,6 +269,14 @@ <fsummary>Test for a list of characters</fsummary> <desc> <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a flat list of + characters in the Unicode range, otherwise it returns <c>false</c>.</p> + </desc> + </func> + <func> + <name name="latin1_char_list" arity="1"/> + <fsummary>Test for a list of ISO-latin-1 characters</fsummary> + <desc> + <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a flat list of characters in the ISO-latin-1 range, otherwise it returns <c>false</c>.</p> </desc> </func> @@ -240,11 +285,27 @@ <fsummary>Test for a deep list of characters</fsummary> <desc> <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a, possibly deep, list + of characters in the Unicode range, otherwise it returns <c>false</c>.</p> + </desc> + </func> + <func> + <name name="deep_latin1_char_list" arity="1"/> + <fsummary>Test for a deep list of characters</fsummary> + <desc> + <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a, possibly deep, list of characters in the ISO-latin-1 range, otherwise it returns <c>false</c>.</p> </desc> </func> <func> <name name="printable_list" arity="1"/> + <fsummary>Test for a list of printable characters</fsummary> + <desc> + <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a flat list of + printable Unicode characters, otherwise it returns <c>false</c>.</p> + </desc> + </func> + <func> + <name name="printable_latin1_list" arity="1"/> <fsummary>Test for a list of printable ISO-latin-1 characters</fsummary> <desc> <p>Returns <c>true</c> if <c><anno>Term</anno></c> is a flat list of diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml index d235f3e180..deba6adb11 100644 --- a/lib/stdlib/doc/src/unicode.xml +++ b/lib/stdlib/doc/src/unicode.xml @@ -5,7 +5,7 @@ <header> <copyright> <year>1996</year> - <year>2012</year> + <year>2013</year> <holder>Ericsson AB, All Rights Reserved</holder> </copyright> <legalnotice> @@ -52,19 +52,10 @@ </desc> </datatype> <datatype> - <name name="unicode_char"/> - <desc> - <p>An <c>integer()</c> representing a valid Unicode codepoint.</p> - </desc> - </datatype> - <datatype> <name name="chardata"/> </datatype> <datatype> <name name="charlist"/> - <desc> - <p>A <c>unicode_binary()</c> is allowed as the tail of the list.</p> - </desc> </datatype> <datatype> <name name="external_unicode_binary"/> @@ -78,10 +69,6 @@ </datatype> <datatype> <name name="external_charlist"/> - <desc> - <p>An <c>external_unicode_binary()</c> is allowed as the tail - of the list.</p> - </desc> </datatype> <datatype> <name name="latin1_binary"/> @@ -96,11 +83,12 @@ </datatype> <datatype> <name name="latin1_chardata"/> + <desc><p>The same as <c>iodata()</c>.</p> + </desc> </datatype> <datatype> <name name="latin1_charlist"/> - <desc><p>A <c>latin1_binary()</c> is allowed as the tail of - the list.</p> + <desc><p>The same as <c>iolist()</c>.</p> </desc> </datatype> </datatypes> diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index ebabf8d700..475b1bf933 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -1276,9 +1276,9 @@ token_src({X, _}) when is_atom(X) -> token_src({var, _, X}) -> atom_to_list(X); token_src({char,_,C}) -> - io_lib:write_unicode_char(C); + io_lib:write_char(C); token_src({string, _, X}) -> - io_lib:write_unicode_string(X); + io_lib:write_string(X); token_src({_, _, X}) -> io_lib:format("~w", [X]). diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 0e1156075a..a868867a81 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -179,12 +179,12 @@ state(_Hook) -> state(). state() -> - #pp{string_fun = fun io_lib:write_unicode_string_as_latin1/1, - char_fun = fun io_lib:write_unicode_char_as_latin1/1}. + #pp{string_fun = fun io_lib:write_string_as_latin1/1, + char_fun = fun io_lib:write_char_as_latin1/1}. unicode_state() -> - #pp{string_fun = fun io_lib:write_unicode_string/1, - char_fun = fun io_lib:write_unicode_char/1}. + #pp{string_fun = fun io_lib:write_string/1, + char_fun = fun io_lib:write_char/1}. encoding(Options) -> case proplists:get_value(encoding, Options, epp:default_encoding()) of diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index bc0eaf015d..f4accde8e7 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -115,7 +115,7 @@ format_error({string,Quote,Head}) -> lists:flatten(["unterminated " ++ string_thing(Quote) ++ " starting with " ++ - io_lib:write_unicode_string(Head, Quote)]); + io_lib:write_string(Head, Quote)]); format_error({illegal,Type}) -> lists:flatten(io_lib:fwrite("illegal ~w", [Type])); format_error(char) -> "unterminated character"; diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 9b436c224c..79cb988c5d 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -127,7 +127,7 @@ rows(Io) -> -spec get_chars(Prompt, Count) -> Data | server_no_data() when Prompt :: prompt(), Count :: non_neg_integer(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_chars(Prompt, N) -> get_chars(default_input(), Prompt, N). @@ -136,14 +136,14 @@ get_chars(Prompt, N) -> IoDevice :: device(), Prompt :: prompt(), Count :: non_neg_integer(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_chars(Io, Prompt, N) when is_integer(N), N >= 0 -> request(Io, {get_chars,unicode,Prompt,N}). -spec get_line(Prompt) -> Data | server_no_data() when Prompt :: prompt(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_line(Prompt) -> get_line(default_input(), Prompt). @@ -151,7 +151,7 @@ get_line(Prompt) -> -spec get_line(IoDevice, Prompt) -> Data | server_no_data() when IoDevice :: device(), Prompt :: prompt(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_line(Io, Prompt) -> request(Io, {get_line,unicode,Prompt}). diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index df5f9b8c25..b7ec848e1e 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -64,29 +64,31 @@ -export([print/1,print/4,indentation/2]). -export([write/1,write/2,write/3,nl/0,format_prompt/1,format_prompt/2]). --export([write_atom/1,write_string/1,write_string/2,write_unicode_string/1, - write_unicode_string/2, write_char/1, write_unicode_char/1]). +-export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1, + write_latin1_string/2, write_char/1, write_latin1_char/1]). --export([write_unicode_string_as_latin1/1, write_unicode_string_as_latin1/2, - write_unicode_char_as_latin1/1]). +-export([write_string_as_latin1/1, write_string_as_latin1/2, + write_char_as_latin1/1]). --export([quote_atom/2, char_list/1, unicode_char_list/1, - deep_char_list/1, deep_unicode_char_list/1, - printable_list/1, printable_unicode_list/1]). +-export([quote_atom/2, char_list/1, latin1_char_list/1, + deep_char_list/1, deep_latin1_char_list/1, + printable_list/1, printable_latin1_list/1]). %% Utilities for collecting characters. -export([collect_chars/3, collect_chars/4, collect_line/2, collect_line/3, collect_line/4, get_until/3, get_until/4]). --export_type([chars/0, unicode_chars/0, unicode_string/0, continuation/0, - fread_error/0]). +%% The following functions were used by Yecc's include-file. +-export([write_unicode_string/1, write_unicode_char/1, + deep_unicode_char_list/1]). + +-export_type([chars/0, latin1_string/0, continuation/0, fread_error/0]). %%---------------------------------------------------------------------- -type chars() :: [char() | chars()]. --type unicode_chars() :: [unicode:unicode_char() | unicode_chars()]. --type unicode_string() :: [unicode:unicode_char()]. +-type latin1_string() :: [unicode:latin1_char()]. -type depth() :: -1 | non_neg_integer(). -opaque continuation() :: {Format :: string(), @@ -108,10 +110,8 @@ %% Interface calls to sub-modules. --spec fwrite(Format, Data) -> chars() | UnicodeList when +-spec fwrite(Format, Data) -> chars() when Format :: io:format(), - Data :: [term()], - UnicodeList :: [unicode:unicode_char()], Data :: [term()]. fwrite(Format, Args) -> @@ -142,10 +142,9 @@ fread(Chars, Format) -> fread(Cont, Chars, Format) -> io_lib_fread:fread(Cont, Chars, Format). --spec format(Format, Data) -> chars() | UnicodeList when +-spec format(Format, Data) -> chars() when Format :: io:format(), - Data :: [term()], - UnicodeList :: [unicode:unicode_char()]. + Data :: [term()]. format(Format, Args) -> case catch io_lib_format:fwrite(Format, Args) of @@ -340,6 +339,11 @@ name_char($_) -> true; name_char($@) -> true; name_char(_) -> false. +%%% There are two functions to write Unicode strings: +%%% - they both escape control characters < 160; +%%% - write_string() never escapes characters >= 160; +%%% - write_string_as_latin1() also escapes characters >= 255. + %% write_string([Char]) -> [Char] %% Generate the list of characters needed to print a string. @@ -352,33 +356,32 @@ write_string(S) -> -spec write_string(string(), char()) -> chars(). write_string(S, Q) -> - [Q|write_string1(latin1, S, Q)]. + [Q|write_string1(unicode_as_unicode, S, Q)]. -%%% There are two functions to write Unicode strings: -%%% - they both escape control characters < 160; -%%% - write_unicode_string() never escapes characters >= 160; -%%% - write_unicode_string_as_latin1() also escapes characters >= 255. +%% Backwards compatibility. +write_unicode_string(S) -> + write_string(S). --spec write_unicode_string(UnicodeString) -> unicode_string() when - UnicodeString :: unicode_string(). +-spec write_latin1_string(Latin1String) -> latin1_string() when + Latin1String :: latin1_string(). -write_unicode_string(S) -> - write_unicode_string(S, $"). %" +write_latin1_string(S) -> + write_latin1_string(S, $"). %" --spec write_unicode_string(unicode_string(), char()) -> unicode_string(). +-spec write_latin1_string(latin1_string(), char()) -> latin1_string(). -write_unicode_string(S, Q) -> - [Q|write_string1(unicode_as_unicode, S, Q)]. +write_latin1_string(S, Q) -> + [Q|write_string1(latin1, S, Q)]. --spec write_unicode_string_as_latin1(UnicodeString) -> string() when - UnicodeString :: unicode_string(). +-spec write_string_as_latin1(String) -> latin1_string() when + String :: string(). -write_unicode_string_as_latin1(S) -> - write_unicode_string_as_latin1(S, $"). %" +write_string_as_latin1(S) -> + write_string_as_latin1(S, $"). %" --spec write_unicode_string_as_latin1(unicode_string(), char()) -> string(). +-spec write_string_as_latin1(string(), char()) -> latin1_string(). -write_unicode_string_as_latin1(S, Q) -> +write_string_as_latin1(S, Q) -> [Q|write_string1(unicode_as_latin1, S, Q)]. write_string1(_,[], Q) -> @@ -412,6 +415,11 @@ string_char(_,C, _, Tail) when C < $\240-> %Other control characters. C3 = (C band 7) + $0, [$\\,C1,C2,C3|Tail]. +%%% There are two functions to write a Unicode character: +%%% - they both escape control characters < 160; +%%% - write_char() never escapes characters >= 160; +%%% - write_char_as_latin1() also escapes characters >= 255. + %% write_char(Char) -> [char()]. %% Generate the list of characters needed to print a character constant. %% Must special case SPACE, $\s, here. @@ -420,48 +428,63 @@ string_char(_,C, _, Tail) when C < $\240-> %Other control characters. Char :: char(). write_char($\s) -> "$\\s"; %Must special case this. -write_char(C) when is_integer(C), C >= $\000, C =< $\377 -> - [$$|string_char(latin1,C, -1, [])]. +write_char(C) when is_integer(C), C >= $\000 -> + [$$|string_char(unicode_as_unicode, C, -1, [])]. -%%% There are two functions to write a Unicode character: -%%% - they both escape control characters < 160; -%%% - write_unicode_char() never escapes characters >= 160; -%%% - write_unicode_char_as_latin1() also escapes characters >= 255. +%% Backwards compatibility. +write_unicode_char(C) -> + write_char(C). --spec write_unicode_char(UnicodeChar) -> unicode_string() when - UnicodeChar :: unicode:unicode_char(). +-spec write_latin1_char(Latin1Char) -> latin1_string() when + Latin1Char :: unicode:latin1_char(). -write_unicode_char(Uni) when is_integer(Uni), Uni >= $\000 -> - [$$|string_char(unicode_as_unicode,Uni, -1, [])]. +write_latin1_char(Lat1) when is_integer(Lat1), Lat1 >= $\000, Lat1 =< $\377 -> + [$$|string_char(latin1, Lat1, -1, [])]. --spec write_unicode_char_as_latin1(UnicodeChar) -> string() when - UnicodeChar :: unicode:unicode_char(). +-spec write_char_as_latin1(Char) -> latin1_string() when + Char :: char(). -write_unicode_char_as_latin1(Uni) when is_integer(Uni), Uni >= $\000 -> +write_char_as_latin1(Uni) when is_integer(Uni), Uni >= $\000 -> [$$|string_char(unicode_as_latin1,Uni, -1, [])]. -%% char_list(CharList) -%% deep_char_list(CharList) -%% Return true if CharList is a (possibly deep) list of characters, else -%% false. +%% latin1_char_list(CharList) +%% deep_latin1_char_list(CharList) +%% Return true if CharList is a (possibly deep) list of Latin-1 +%% characters, else false. + +-spec latin1_char_list(Term) -> boolean() when + Term :: term(). + +latin1_char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> + latin1_char_list(Cs); +latin1_char_list([]) -> true; +latin1_char_list(_) -> false. %Everything else is false -spec char_list(Term) -> boolean() when Term :: term(). -char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> +char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> char_list(Cs); char_list([]) -> true; char_list(_) -> false. %Everything else is false --spec unicode_char_list(Term) -> boolean() when +-spec deep_latin1_char_list(Term) -> boolean() when Term :: term(). -unicode_char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - unicode_char_list(Cs); -unicode_char_list([]) -> true; -unicode_char_list(_) -> false. %Everything else is false +deep_latin1_char_list(Cs) -> + deep_latin1_char_list(Cs, []). + +deep_latin1_char_list([C|Cs], More) when is_list(C) -> + deep_latin1_char_list(C, [Cs|More]); +deep_latin1_char_list([C|Cs], More) when is_integer(C), C >= $\000, C =< $\377 -> + deep_latin1_char_list(Cs, More); +deep_latin1_char_list([], [Cs|More]) -> + deep_latin1_char_list(Cs, More); +deep_latin1_char_list([], []) -> true; +deep_latin1_char_list(_, _More) -> %Everything else is false + false. -spec deep_char_list(Term) -> boolean() when Term :: term(). @@ -471,43 +494,56 @@ deep_char_list(Cs) -> deep_char_list([C|Cs], More) when is_list(C) -> deep_char_list(C, [Cs|More]); -deep_char_list([C|Cs], More) when is_integer(C), C >= $\000, C =< $\377 -> +deep_char_list([C|Cs], More) + when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> deep_char_list(Cs, More); deep_char_list([], [Cs|More]) -> deep_char_list(Cs, More); deep_char_list([], []) -> true; -deep_char_list(_, _More) -> %Everything else is false +deep_char_list(_, _More) -> %Everything else is false false. --spec deep_unicode_char_list(Term) -> boolean() when - Term :: term(). +deep_unicode_char_list(Term) -> + deep_char_list(Term). -deep_unicode_char_list(Cs) -> - deep_unicode_char_list(Cs, []). +%% printable_latin1_list([Char]) -> boolean() +%% Return true if CharList is a list of printable Latin1 characters, else +%% false. -deep_unicode_char_list([C|Cs], More) when is_list(C) -> - deep_unicode_char_list(C, [Cs|More]); -deep_unicode_char_list([C|Cs], More) - when is_integer(C), C >= 0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - deep_unicode_char_list(Cs, More); -deep_unicode_char_list([], [Cs|More]) -> - deep_unicode_char_list(Cs, More); -deep_unicode_char_list([], []) -> true; -deep_unicode_char_list(_, _More) -> %Everything else is false - false. +-spec printable_latin1_list(Term) -> boolean() when + Term :: term(). + +printable_latin1_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_latin1_list(Cs); +printable_latin1_list([C|Cs]) when is_integer(C), C >= $\240, C =< $\377 -> + printable_latin1_list(Cs); +printable_latin1_list([$\n|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\r|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\t|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\v|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\b|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\f|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\e|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([]) -> true; +printable_latin1_list(_) -> false. %Everything else is false %% printable_list([Char]) -> boolean() %% Return true if CharList is a list of printable characters, else -%% false. +%% false. The notion of printable in Unicode terms is somewhat floating. +%% Everything that is not a control character and not invalid unicode +%% will be considered printable. -spec printable_list(Term) -> boolean() when Term :: term(). printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> printable_list(Cs); -printable_list([C|Cs]) when is_integer(C), C >= $\240, C =< $\377 -> +printable_list([C|Cs]) + when is_integer(C), C >= 16#A0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> printable_list(Cs); printable_list([$\n|Cs]) -> printable_list(Cs); printable_list([$\r|Cs]) -> printable_list(Cs); @@ -517,33 +553,7 @@ printable_list([$\b|Cs]) -> printable_list(Cs); printable_list([$\f|Cs]) -> printable_list(Cs); printable_list([$\e|Cs]) -> printable_list(Cs); printable_list([]) -> true; -printable_list(_) -> false. %Everything else is false - -%% printable_unicode_list([Char]) -> boolean() -%% Return true if CharList is a list of printable characters, else -%% false. The notion of printable in Unicode terms is somewhat floating. -%% Everything that is not a control character and not invalid unicode -%% will be considered printable. - --spec printable_unicode_list(Term) -> boolean() when - Term :: term(). - -printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> - printable_unicode_list(Cs); -printable_unicode_list([C|Cs]) - when is_integer(C), C >= 16#A0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - printable_unicode_list(Cs); -printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([]) -> true; -printable_unicode_list(_) -> false. %Everything else is false +printable_list(_) -> false. %Everything else is false %% List = nl() %% Return a list of characters to generate a newline. diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 5680f83ab6..6a06d9448b 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,7 +22,7 @@ -export([fwrite/2,fwrite_g/1,indentation/2]). -%% fwrite(Format, ArgList) -> [unicode:unicode:char()]. +%% fwrite(Format, ArgList) -> string(). %% Format the arguments in ArgList after string Format. Just generate %% an error if there is an error in the arguments. %% @@ -133,7 +133,7 @@ pcount([{$P,_As,_F,_Ad,_P,_Pad,_Enc}|Cs], Acc) -> pcount(Cs, Acc+1); pcount([_|Cs], Acc) -> pcount(Cs, Acc); pcount([], Acc) -> Acc. -%% build([Control], Pc, Indentation) -> [unicode:unicode_char()]. +%% build([Control], Pc, Indentation) -> string(). %% Interpret the control structures. Count the number of print %% remaining and only calculate indentation when necessary. Must also %% be smart when calculating indentation for characters in format. @@ -154,7 +154,7 @@ decr_pc($p, Pc) -> Pc - 1; decr_pc($P, Pc) -> Pc - 1; decr_pc(_, Pc) -> Pc. -%% indentation([unicode:unicode_char()], Indentation) -> Indentation. +%% indentation(String, Indentation) -> Indentation. %% Calculate the indentation of the end of a string given its start %% indentation. We assume tabs at 8 cols. @@ -167,8 +167,7 @@ indentation([C|Cs], I) -> indentation([], I) -> I. %% control(FormatChar, [Argument], FieldWidth, Adjust, Precision, PadChar, -%% Encoding, Indentation) -> -%% [unicode:unicode_char()] +%% Encoding, Indentation) -> String %% This is the main dispatch function for the various formatting commands. %% Field widths and precisions have already been calculated. @@ -613,7 +612,7 @@ prefixed_integer(Int, F, Adj, Base, Pad, Prefix, Lowercase) term([Prefix|S], F, Adj, none, Pad) end. -%% char(Char, Field, Adjust, Precision, PadChar) -> [unicode:unicode_char()]. +%% char(Char, Field, Adjust, Precision, PadChar) -> string(). char(C, none, _Adj, none, _Pad) -> [C]; char(C, F, _Adj, none, _Pad) -> chars(C, F); diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 99ad281a9b..dbf6fd74e4 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -51,7 +51,6 @@ print(Term) -> -type max_chars() :: integer(). -type chars() :: io_lib:chars(). --type unicode_chars() :: io_lib:unicode_chars(). -type option() :: {column, column()} | {line_length, line_length()} | {depth, depth()} @@ -60,8 +59,8 @@ print(Term) -> | {encoding, latin1 | utf8 | unicode}. -type options() :: [option()]. --spec print(term(), rec_print_fun()) -> chars() | unicode_chars(); - (term(), options()) -> chars() | unicode_chars(). +-spec print(term(), rec_print_fun()) -> chars(); + (term(), options()) -> chars(). print(Term, Options) when is_list(Options) -> Col = proplists:get_value(column, Options, 1), @@ -74,24 +73,23 @@ print(Term, Options) when is_list(Options) -> print(Term, RecDefFun) -> print(Term, -1, RecDefFun). --spec print(term(), depth(), rec_print_fun()) -> chars() | unicode_chars(). +-spec print(term(), depth(), rec_print_fun()) -> chars(). print(Term, Depth, RecDefFun) -> print(Term, 1, 80, Depth, RecDefFun). --spec print(term(), column(), line_length(), depth()) -> - chars() | unicode_chars(). +-spec print(term(), column(), line_length(), depth()) -> chars(). print(Term, Col, Ll, D) -> print(Term, Col, Ll, D, _M=-1, no_fun, latin1). -spec print(term(), column(), line_length(), depth(), rec_print_fun()) -> - chars() | unicode_chars(). + chars(). print(Term, Col, Ll, D, RecDefFun) -> print(Term, Col, Ll, D, _M=-1, RecDefFun). -spec print(term(), column(), line_length(), depth(), max_chars(), - rec_print_fun()) -> chars() | unicode_chars(). + rec_print_fun()) -> chars(). print(Term, Col, Ll, D, M, RecDefFun) -> print(Term, Col, Ll, D, M, RecDefFun, latin1). @@ -369,13 +367,13 @@ print_length(<<_/bitstring>>=Bin, D, _RF, Enc) -> S = io_lib:write_string(List, $"), %" {[$<,$<,S,$>,$>], 4 + length(S)}; {false, List} when is_list(List) -> - S = io_lib:write_unicode_string(List, $"), %" + S = io_lib:write_string(List, $"), %" {[$<,$<,S,"/utf8>>"], 9 + length(S)}; {true, true, Prefix} -> S = io_lib:write_string(Prefix, $"), %" {[$<,$<, S | "...>>"], 7 + length(S)}; {false, true, Prefix} -> - S = io_lib:write_unicode_string(Prefix, $"), %" + S = io_lib:write_string(Prefix, $"), %" {[$<,$<, S | "/utf8...>>"], 12 + length(S)}; false -> S = io_lib:write(Bin, D), @@ -451,9 +449,9 @@ list_length_tail({_, Len}, Acc) -> printable_list(_L, 1, _Enc) -> false; printable_list(L, _D, latin1) -> - io_lib:printable_list(L); + io_lib:printable_latin1_list(L); printable_list(L, _D, _Uni) -> - io_lib:printable_unicode_list(L). + io_lib:printable_list(L). %% Truncated lists could break some existing code. % printable_list(L, D, Enc) when D >= 0 -> % Len = ?CHARS * (D - 1), @@ -538,9 +536,9 @@ printable_unicode(Bin, I, L) -> {I, Bin, lists:reverse(L)}. write_string(S, latin1) -> - io_lib:write_string(S, $"); %" + io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> - io_lib:write_unicode_string(S, $"). %" + io_lib:write_string(S, $"). %" %% Throw 'no_good' if the indentation exceeds half the line length %% unless there is room for M characters on the line. diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl index b2ce2a5a8f..b778f3bf64 100644 --- a/lib/stdlib/src/lib.erl +++ b/lib/stdlib/src/lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -426,9 +426,9 @@ brackets_to_parens(S, Enc) -> [$(,R,$)]. printable_list(latin1, As) -> - io_lib:printable_list(As); + io_lib:printable_latin1_list(As); printable_list(_, As) -> - io_lib:printable_unicode_list(As). + io_lib:printable_list(As). mfa_to_string(M, F, A) -> io_lib:fwrite(<<"~s/~w">>, [mf_to_string({M, F}, A), A]). diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl index 8b9412fb1b..10b652cdde 100644 --- a/lib/stdlib/src/unicode.erl +++ b/lib/stdlib/src/unicode.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2012. All Rights Reserved. +%% Copyright Ericsson AB 2008-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -25,25 +25,32 @@ -export_type([chardata/0, charlist/0, encoding/0, external_chardata/0, external_charlist/0, latin1_chardata/0, - latin1_charlist/0, unicode_binary/0, unicode_char/0]). + latin1_charlist/0, unicode_binary/0]). -type encoding() :: 'latin1' | 'unicode' | 'utf8' | 'utf16' | {'utf16', endian()} | 'utf32' | {'utf32', endian()}. -type endian() :: 'big' | 'little'. -type unicode_binary() :: binary(). --type unicode_char() :: non_neg_integer(). --type charlist() :: [unicode_char() | unicode_binary() | charlist()]. +-type charlist() :: + maybe_improper_list(char() | unicode_binary() | charlist(), + unicode_binary() | nil()). -type chardata() :: charlist() | unicode_binary(). -type external_unicode_binary() :: binary(). -type external_chardata() :: external_charlist() | external_unicode_binary(). --type external_charlist() :: [unicode_char() | external_unicode_binary() - | external_charlist()]. +-type external_charlist() :: + maybe_improper_list(char() | + external_unicode_binary() | + external_charlist(), + external_unicode_binary() | nil()). -type latin1_binary() :: binary(). -type latin1_char() :: byte(). -type latin1_chardata() :: latin1_charlist() | latin1_binary(). --type latin1_charlist() :: [latin1_char() | latin1_binary() - | latin1_charlist()]. +-type latin1_charlist() :: + maybe_improper_list(latin1_char() | + latin1_binary() | + latin1_charlist(), + latin1_binary() | nil()). %%% BIFs %%% diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 521d7255ea..4d2b53b265 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2051,15 +2051,15 @@ otp_10302(Suite) when is_list(Suite) -> "<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]), Chars = lists:seq(0, 512), % just a few... - [] = [C || C <- Chars, S <- io_lib:write_unicode_char_as_latin1(C), + [] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C), not is_latin1(S)], - L1 = [S || C <- Chars, S <- io_lib:write_unicode_char(C), + L1 = [S || C <- Chars, S <- io_lib:write_char(C), not is_latin1(S)], L1 = lists:seq(256, 512), - [] = [C || C <- Chars, S <- io_lib:write_unicode_string_as_latin1([C]), + [] = [C || C <- Chars, S <- io_lib:write_string_as_latin1([C]), not is_latin1(S)], - L2 = [S || C <- Chars, S <- io_lib:write_unicode_string([C]), + L2 = [S || C <- Chars, S <- io_lib:write_string([C]), not is_latin1(S)], L2 = lists:seq(256, 512), |