From b2b0feab152063f71c4bf58f985cd52fc9e0105a Mon Sep 17 00:00:00 2001 From: Hans Bolinder Date: Fri, 11 Jan 2013 14:42:54 +0100 Subject: Extend char() to Unicode characters The code related to the introduction of unicode_string() and unicode_char() has been removed. The types char() and string() have been extended to include Unicode characters. In fact char() was changed some time ago; this commit is about cleaning up the documentation and introduce better names for some functions. --- erts/preloaded/src/prim_file.erl | 7 +- lib/dialyzer/test/r9c_SUITE_data/results/inets | 2 +- lib/hipe/cerl/erl_types.erl | 34 +--- lib/parsetools/include/yeccpre.hrl | 8 +- lib/parsetools/src/leex.erl | 2 +- lib/parsetools/src/yecc.erl | 6 +- lib/parsetools/src/yeccparser.erl | 4 +- lib/stdlib/doc/src/io_lib.xml | 67 +++++++- lib/stdlib/doc/src/unicode.xml | 20 +-- lib/stdlib/src/epp.erl | 6 +- lib/stdlib/src/erl_pp.erl | 10 +- lib/stdlib/src/erl_scan.erl | 4 +- lib/stdlib/src/io.erl | 8 +- lib/stdlib/src/io_lib.erl | 224 +++++++++++++------------ lib/stdlib/src/io_lib_format.erl | 13 +- lib/stdlib/src/io_lib_pretty.erl | 28 ++-- lib/stdlib/src/lib.erl | 6 +- lib/stdlib/src/unicode.erl | 23 ++- lib/stdlib/test/io_SUITE.erl | 8 +- lib/syntax_tools/src/erl_syntax.erl | 12 +- system/doc/reference_manual/typespec.xml | 4 +- 21 files changed, 263 insertions(+), 233 deletions(-) diff --git a/erts/preloaded/src/prim_file.erl b/erts/preloaded/src/prim_file.erl index c412b7faf2..50adf9c89d 100644 --- a/erts/preloaded/src/prim_file.erl +++ b/erts/preloaded/src/prim_file.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2000-2012. All Rights Reserved. +%% Copyright Ericsson AB 2000-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -154,8 +154,7 @@ internal_native2name/1, internal_normalize_utf8/1]). --type unicode_string() :: [unicode:unicode_char()]. --type prim_file_name() :: unicode_string() | unicode:unicode_binary(). +-type prim_file_name() :: string() | unicode:unicode_binary(). -spec internal_name2native(prim_file_name()) -> binary(). @@ -167,7 +166,7 @@ internal_name2native(_) -> internal_native2name(_) -> erlang:nif_error(undefined). --spec internal_normalize_utf8(unicode:unicode_binary()) -> unicode_string(). +-spec internal_normalize_utf8(unicode:unicode_binary()) -> string(). internal_normalize_utf8(_) -> erlang:nif_error(undefined). diff --git a/lib/dialyzer/test/r9c_SUITE_data/results/inets b/lib/dialyzer/test/r9c_SUITE_data/results/inets index d789d8d246..629378d673 100644 --- a/lib/dialyzer/test/r9c_SUITE_data/results/inets +++ b/lib/dialyzer/test/r9c_SUITE_data/results/inets @@ -35,7 +35,7 @@ mod_auth_plain.erl:159: The variable _ can never match since previous clauses co mod_auth_plain.erl:83: The variable O can never match since previous clauses completely covered the type [[any()]] mod_cgi.erl:372: The pattern {'http_response', NewAccResponse} can never match the type 'ok' mod_dir.erl:101: The call lists:flatten(nonempty_improper_list(atom() | [any()] | char(),atom())) will never return since it differs in the 1st argument from the success typing arguments: ([any()]) -mod_dir.erl:72: The pattern {'error', Reason} can never match the type {'ok',[[[any()] | non_neg_integer()],...]} +mod_dir.erl:72: The pattern {'error', Reason} can never match the type {'ok',[[[any()] | char()],...]} mod_get.erl:135: The pattern <{'enfile', _}, _Info, Path> can never match the type mod_head.erl:80: The pattern <{'enfile', _}, _Info, Path> can never match the type mod_htaccess.erl:460: The pattern {'error', BadData} can never match the type {'ok',_} diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl index 7631f5289e..0f88b32db1 100644 --- a/lib/hipe/cerl/erl_types.erl +++ b/lib/hipe/cerl/erl_types.erl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2003-2012. All Rights Reserved. +%% Copyright Ericsson AB 2003-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -63,7 +63,6 @@ t_boolean/0, t_byte/0, t_char/0, - t_charlist/0, t_collect_vars/1, t_cons/0, t_cons/2, @@ -197,7 +196,6 @@ t_tuple_size/1, t_tuple_sizes/1, t_tuple_subtypes/1, - t_unicode_string/0, t_unify/2, t_unify/3, t_unit/0, @@ -1458,21 +1456,6 @@ t_is_tuple(_) -> false. t_bitstrlist() -> t_iolist(1, t_bitstr()). --spec t_charlist() -> erl_type(). - -t_charlist() -> - t_charlist(1). - --spec t_charlist(non_neg_integer()) -> erl_type(). - -t_charlist(N) when N > 0 -> - t_maybe_improper_list(t_sup([t_unicode_char(), - t_unicode_binary(), - t_charlist(N-1)]), - t_sup(t_unicode_binary(), t_nil())); -t_charlist(0) -> - t_maybe_improper_list(t_any(), t_sup(t_unicode_binary(), t_nil())). - -spec t_constant() -> erl_type(). t_constant() -> @@ -1568,21 +1551,6 @@ t_parameterized_module() -> t_timeout() -> t_sup(t_non_neg_integer(), t_atom('infinity')). --spec t_unicode_binary() -> erl_type(). - -t_unicode_binary() -> - t_binary(). % with characters encoded in UTF-8 coding standard - --spec t_unicode_char() -> erl_type(). - -t_unicode_char() -> - t_integer(). % representing a valid unicode codepoint - --spec t_unicode_string() -> erl_type(). - -t_unicode_string() -> - t_list(t_unicode_char()). - %%----------------------------------------------------------------------------- %% Some built-in opaque types %% diff --git a/lib/parsetools/include/yeccpre.hrl b/lib/parsetools/include/yeccpre.hrl index e4c3ba52be..855bff5fdc 100644 --- a/lib/parsetools/include/yeccpre.hrl +++ b/lib/parsetools/include/yeccpre.hrl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -36,7 +36,7 @@ parse_and_scan({M, F, A}) -> -spec format_error(any()) -> [char() | list()]. format_error(Message) -> - case io_lib:deep_unicode_char_list(Message) of + case io_lib:deep_char_list(Message) of true -> Message; _ -> @@ -164,9 +164,9 @@ yecctoken_location(Token) -> yecctoken2string({atom, _, A}) -> io_lib:write(A); yecctoken2string({integer,_,N}) -> io_lib:write(N); yecctoken2string({float,_,F}) -> io_lib:write(F); -yecctoken2string({char,_,C}) -> io_lib:write_unicode_char(C); +yecctoken2string({char,_,C}) -> io_lib:write_char(C); yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]); -yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S); +yecctoken2string({string,_,S}) -> io_lib:write_string(S); yecctoken2string({reserved_symbol, _, A}) -> io_lib:write(A); yecctoken2string({_Cat, _, Val}) -> io_lib:format("~p",[Val]); yecctoken2string({dot, _}) -> "'.'"; diff --git a/lib/parsetools/src/leex.erl b/lib/parsetools/src/leex.erl index bbef4053b4..32c513f56c 100644 --- a/lib/parsetools/src/leex.erl +++ b/lib/parsetools/src/leex.erl @@ -1669,7 +1669,7 @@ quote($\d) -> "\\\\d"; quote($\\) -> "\\\\"; quote(C) when is_integer(C) -> %% Must remove the $ and get the \'s right. - case io_lib:write_unicode_char(C) of + case io_lib:write_char(C) of [$$,$\\|Cs] -> "\\\\" ++ Cs; [$$|Cs] -> Cs end; diff --git a/lib/parsetools/src/yecc.erl b/lib/parsetools/src/yecc.erl index dbb7d025ae..2f0f70f39b 100644 --- a/lib/parsetools/src/yecc.erl +++ b/lib/parsetools/src/yecc.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -2494,8 +2494,8 @@ pp_tokens1([T | Ts], Line0, Enc, T0) -> pp_symbol({var,_,Var}, _Enc) -> Var; pp_symbol({string,_,String}, latin1) -> - io_lib:write_unicode_string_as_latin1(String); -pp_symbol({string,_,String}, _Enc) -> io_lib:write_unicode_string(String); + io_lib:write_string_as_latin1(String); +pp_symbol({string,_,String}, _Enc) -> io_lib:write_string(String); pp_symbol({_,_,Symbol}, latin1) -> io_lib:fwrite(<<"~p">>, [Symbol]); pp_symbol({_,_,Symbol}, _Enc) -> io_lib:fwrite(<<"~tp">>, [Symbol]); pp_symbol({Symbol, _}, _Enc) -> Symbol. diff --git a/lib/parsetools/src/yeccparser.erl b/lib/parsetools/src/yeccparser.erl index e4b8b06db5..54f9ba5a58 100644 --- a/lib/parsetools/src/yeccparser.erl +++ b/lib/parsetools/src/yeccparser.erl @@ -17,7 +17,7 @@ line_of(Token) -> %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2011. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -178,7 +178,7 @@ yecctoken2string({integer,_,N}) -> io_lib:write(N); yecctoken2string({float,_,F}) -> io_lib:write(F); yecctoken2string({char,_,C}) -> io_lib:write_char(C); yecctoken2string({var,_,V}) -> io_lib:format("~s", [V]); -yecctoken2string({string,_,S}) -> io_lib:write_unicode_string(S); +yecctoken2string({string,_,S}) -> io_lib:write_string(S); yecctoken2string({reserved_symbol, _, A}) -> io_lib:write(A); yecctoken2string({_Cat, _, Val}) -> io_lib:write(Val); yecctoken2string({dot, _}) -> "'.'"; diff --git a/lib/stdlib/doc/src/io_lib.xml b/lib/stdlib/doc/src/io_lib.xml index 7487cb5740..001d34a7c2 100644 --- a/lib/stdlib/doc/src/io_lib.xml +++ b/lib/stdlib/doc/src/io_lib.xml @@ -4,7 +4,7 @@
- 19962012 + 19962013 Ericsson AB. All Rights Reserved. @@ -53,6 +53,9 @@ + + + @@ -207,13 +210,47 @@ Write a string -

Returns the list of characters needed to print String - as a string.

+

Returns the list of characters needed to print + String as a string.

+
+
+ + + Write a string + +

Returns the list of characters needed to print + String as a string. Non-Latin-1 + characters are escaped.

+
+
+ + + Write an ISO-latin-1 string + +

Returns the list of characters needed to print + Latin1String as a string.

Write a character + +

Returns the list of characters needed to print a character + constant in the Unicode character set.

+
+
+ + + Write a character + +

Returns the list of characters needed to print a character + constant in the Unicode character set. Non-Latin-1 characters + are escaped.

+
+
+ + + Write an ISO-latin-1 character

Returns the list of characters needed to print a character constant in the ISO-latin-1 character set.

@@ -230,6 +267,14 @@ Test for a list of characters + +

Returns true if Term is a flat list of + characters in the Unicode range, otherwise it returns false.

+
+
+ + + Test for a list of ISO-latin-1 characters

Returns true if Term is a flat list of characters in the ISO-latin-1 range, otherwise it returns false.

@@ -238,6 +283,14 @@ Test for a deep list of characters + +

Returns true if Term is a, possibly deep, list + of characters in the Unicode range, otherwise it returns false.

+
+
+ + + Test for a deep list of characters

Returns true if Term is a, possibly deep, list of characters in the ISO-latin-1 range, otherwise it returns false.

@@ -245,6 +298,14 @@
+ Test for a list of printable characters + +

Returns true if Term is a flat list of + printable Unicode characters, otherwise it returns false.

+
+
+ + Test for a list of printable ISO-latin-1 characters

Returns true if Term is a flat list of diff --git a/lib/stdlib/doc/src/unicode.xml b/lib/stdlib/doc/src/unicode.xml index d235f3e180..deba6adb11 100644 --- a/lib/stdlib/doc/src/unicode.xml +++ b/lib/stdlib/doc/src/unicode.xml @@ -5,7 +5,7 @@

1996 - 2012 + 2013 Ericsson AB, All Rights Reserved @@ -51,20 +51,11 @@

A binary() with characters encoded in the UTF-8 coding standard.

- - - -

An integer() representing a valid Unicode codepoint.

-
-
- -

A unicode_binary() is allowed as the tail of the list.

-
@@ -78,10 +69,6 @@ - -

An external_unicode_binary() is allowed as the tail - of the list.

-
@@ -96,11 +83,12 @@ +

The same as iodata().

+
-

A latin1_binary() is allowed as the tail of - the list.

+

The same as iolist().

diff --git a/lib/stdlib/src/epp.erl b/lib/stdlib/src/epp.erl index ebabf8d700..475b1bf933 100644 --- a/lib/stdlib/src/epp.erl +++ b/lib/stdlib/src/epp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -1276,9 +1276,9 @@ token_src({X, _}) when is_atom(X) -> token_src({var, _, X}) -> atom_to_list(X); token_src({char,_,C}) -> - io_lib:write_unicode_char(C); + io_lib:write_char(C); token_src({string, _, X}) -> - io_lib:write_unicode_string(X); + io_lib:write_string(X); token_src({_, _, X}) -> io_lib:format("~w", [X]). diff --git a/lib/stdlib/src/erl_pp.erl b/lib/stdlib/src/erl_pp.erl index 0e1156075a..a868867a81 100644 --- a/lib/stdlib/src/erl_pp.erl +++ b/lib/stdlib/src/erl_pp.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -179,12 +179,12 @@ state(_Hook) -> state(). state() -> - #pp{string_fun = fun io_lib:write_unicode_string_as_latin1/1, - char_fun = fun io_lib:write_unicode_char_as_latin1/1}. + #pp{string_fun = fun io_lib:write_string_as_latin1/1, + char_fun = fun io_lib:write_char_as_latin1/1}. unicode_state() -> - #pp{string_fun = fun io_lib:write_unicode_string/1, - char_fun = fun io_lib:write_unicode_char/1}. + #pp{string_fun = fun io_lib:write_string/1, + char_fun = fun io_lib:write_char/1}. encoding(Options) -> case proplists:get_value(encoding, Options, epp:default_encoding()) of diff --git a/lib/stdlib/src/erl_scan.erl b/lib/stdlib/src/erl_scan.erl index bc0eaf015d..f4accde8e7 100644 --- a/lib/stdlib/src/erl_scan.erl +++ b/lib/stdlib/src/erl_scan.erl @@ -2,7 +2,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -115,7 +115,7 @@ format_error({string,Quote,Head}) -> lists:flatten(["unterminated " ++ string_thing(Quote) ++ " starting with " ++ - io_lib:write_unicode_string(Head, Quote)]); + io_lib:write_string(Head, Quote)]); format_error({illegal,Type}) -> lists:flatten(io_lib:fwrite("illegal ~w", [Type])); format_error(char) -> "unterminated character"; diff --git a/lib/stdlib/src/io.erl b/lib/stdlib/src/io.erl index 9b436c224c..79cb988c5d 100644 --- a/lib/stdlib/src/io.erl +++ b/lib/stdlib/src/io.erl @@ -127,7 +127,7 @@ rows(Io) -> -spec get_chars(Prompt, Count) -> Data | server_no_data() when Prompt :: prompt(), Count :: non_neg_integer(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_chars(Prompt, N) -> get_chars(default_input(), Prompt, N). @@ -136,14 +136,14 @@ get_chars(Prompt, N) -> IoDevice :: device(), Prompt :: prompt(), Count :: non_neg_integer(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_chars(Io, Prompt, N) when is_integer(N), N >= 0 -> request(Io, {get_chars,unicode,Prompt,N}). -spec get_line(Prompt) -> Data | server_no_data() when Prompt :: prompt(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_line(Prompt) -> get_line(default_input(), Prompt). @@ -151,7 +151,7 @@ get_line(Prompt) -> -spec get_line(IoDevice, Prompt) -> Data | server_no_data() when IoDevice :: device(), Prompt :: prompt(), - Data :: [unicode:unicode_char()] | unicode:unicode_binary(). + Data :: string() | unicode:unicode_binary(). get_line(Io, Prompt) -> request(Io, {get_line,unicode,Prompt}). diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl index df5f9b8c25..b7ec848e1e 100644 --- a/lib/stdlib/src/io_lib.erl +++ b/lib/stdlib/src/io_lib.erl @@ -64,29 +64,31 @@ -export([print/1,print/4,indentation/2]). -export([write/1,write/2,write/3,nl/0,format_prompt/1,format_prompt/2]). --export([write_atom/1,write_string/1,write_string/2,write_unicode_string/1, - write_unicode_string/2, write_char/1, write_unicode_char/1]). +-export([write_atom/1,write_string/1,write_string/2,write_latin1_string/1, + write_latin1_string/2, write_char/1, write_latin1_char/1]). --export([write_unicode_string_as_latin1/1, write_unicode_string_as_latin1/2, - write_unicode_char_as_latin1/1]). +-export([write_string_as_latin1/1, write_string_as_latin1/2, + write_char_as_latin1/1]). --export([quote_atom/2, char_list/1, unicode_char_list/1, - deep_char_list/1, deep_unicode_char_list/1, - printable_list/1, printable_unicode_list/1]). +-export([quote_atom/2, char_list/1, latin1_char_list/1, + deep_char_list/1, deep_latin1_char_list/1, + printable_list/1, printable_latin1_list/1]). %% Utilities for collecting characters. -export([collect_chars/3, collect_chars/4, collect_line/2, collect_line/3, collect_line/4, get_until/3, get_until/4]). --export_type([chars/0, unicode_chars/0, unicode_string/0, continuation/0, - fread_error/0]). +%% The following functions were used by Yecc's include-file. +-export([write_unicode_string/1, write_unicode_char/1, + deep_unicode_char_list/1]). + +-export_type([chars/0, latin1_string/0, continuation/0, fread_error/0]). %%---------------------------------------------------------------------- -type chars() :: [char() | chars()]. --type unicode_chars() :: [unicode:unicode_char() | unicode_chars()]. --type unicode_string() :: [unicode:unicode_char()]. +-type latin1_string() :: [unicode:latin1_char()]. -type depth() :: -1 | non_neg_integer(). -opaque continuation() :: {Format :: string(), @@ -108,10 +110,8 @@ %% Interface calls to sub-modules. --spec fwrite(Format, Data) -> chars() | UnicodeList when +-spec fwrite(Format, Data) -> chars() when Format :: io:format(), - Data :: [term()], - UnicodeList :: [unicode:unicode_char()], Data :: [term()]. fwrite(Format, Args) -> @@ -142,10 +142,9 @@ fread(Chars, Format) -> fread(Cont, Chars, Format) -> io_lib_fread:fread(Cont, Chars, Format). --spec format(Format, Data) -> chars() | UnicodeList when +-spec format(Format, Data) -> chars() when Format :: io:format(), - Data :: [term()], - UnicodeList :: [unicode:unicode_char()]. + Data :: [term()]. format(Format, Args) -> case catch io_lib_format:fwrite(Format, Args) of @@ -340,6 +339,11 @@ name_char($_) -> true; name_char($@) -> true; name_char(_) -> false. +%%% There are two functions to write Unicode strings: +%%% - they both escape control characters < 160; +%%% - write_string() never escapes characters >= 160; +%%% - write_string_as_latin1() also escapes characters >= 255. + %% write_string([Char]) -> [Char] %% Generate the list of characters needed to print a string. @@ -352,33 +356,32 @@ write_string(S) -> -spec write_string(string(), char()) -> chars(). write_string(S, Q) -> - [Q|write_string1(latin1, S, Q)]. + [Q|write_string1(unicode_as_unicode, S, Q)]. -%%% There are two functions to write Unicode strings: -%%% - they both escape control characters < 160; -%%% - write_unicode_string() never escapes characters >= 160; -%%% - write_unicode_string_as_latin1() also escapes characters >= 255. +%% Backwards compatibility. +write_unicode_string(S) -> + write_string(S). --spec write_unicode_string(UnicodeString) -> unicode_string() when - UnicodeString :: unicode_string(). +-spec write_latin1_string(Latin1String) -> latin1_string() when + Latin1String :: latin1_string(). -write_unicode_string(S) -> - write_unicode_string(S, $"). %" +write_latin1_string(S) -> + write_latin1_string(S, $"). %" --spec write_unicode_string(unicode_string(), char()) -> unicode_string(). +-spec write_latin1_string(latin1_string(), char()) -> latin1_string(). -write_unicode_string(S, Q) -> - [Q|write_string1(unicode_as_unicode, S, Q)]. +write_latin1_string(S, Q) -> + [Q|write_string1(latin1, S, Q)]. --spec write_unicode_string_as_latin1(UnicodeString) -> string() when - UnicodeString :: unicode_string(). +-spec write_string_as_latin1(String) -> latin1_string() when + String :: string(). -write_unicode_string_as_latin1(S) -> - write_unicode_string_as_latin1(S, $"). %" +write_string_as_latin1(S) -> + write_string_as_latin1(S, $"). %" --spec write_unicode_string_as_latin1(unicode_string(), char()) -> string(). +-spec write_string_as_latin1(string(), char()) -> latin1_string(). -write_unicode_string_as_latin1(S, Q) -> +write_string_as_latin1(S, Q) -> [Q|write_string1(unicode_as_latin1, S, Q)]. write_string1(_,[], Q) -> @@ -412,6 +415,11 @@ string_char(_,C, _, Tail) when C < $\240-> %Other control characters. C3 = (C band 7) + $0, [$\\,C1,C2,C3|Tail]. +%%% There are two functions to write a Unicode character: +%%% - they both escape control characters < 160; +%%% - write_char() never escapes characters >= 160; +%%% - write_char_as_latin1() also escapes characters >= 255. + %% write_char(Char) -> [char()]. %% Generate the list of characters needed to print a character constant. %% Must special case SPACE, $\s, here. @@ -420,48 +428,63 @@ string_char(_,C, _, Tail) when C < $\240-> %Other control characters. Char :: char(). write_char($\s) -> "$\\s"; %Must special case this. -write_char(C) when is_integer(C), C >= $\000, C =< $\377 -> - [$$|string_char(latin1,C, -1, [])]. +write_char(C) when is_integer(C), C >= $\000 -> + [$$|string_char(unicode_as_unicode, C, -1, [])]. -%%% There are two functions to write a Unicode character: -%%% - they both escape control characters < 160; -%%% - write_unicode_char() never escapes characters >= 160; -%%% - write_unicode_char_as_latin1() also escapes characters >= 255. +%% Backwards compatibility. +write_unicode_char(C) -> + write_char(C). --spec write_unicode_char(UnicodeChar) -> unicode_string() when - UnicodeChar :: unicode:unicode_char(). +-spec write_latin1_char(Latin1Char) -> latin1_string() when + Latin1Char :: unicode:latin1_char(). -write_unicode_char(Uni) when is_integer(Uni), Uni >= $\000 -> - [$$|string_char(unicode_as_unicode,Uni, -1, [])]. +write_latin1_char(Lat1) when is_integer(Lat1), Lat1 >= $\000, Lat1 =< $\377 -> + [$$|string_char(latin1, Lat1, -1, [])]. --spec write_unicode_char_as_latin1(UnicodeChar) -> string() when - UnicodeChar :: unicode:unicode_char(). +-spec write_char_as_latin1(Char) -> latin1_string() when + Char :: char(). -write_unicode_char_as_latin1(Uni) when is_integer(Uni), Uni >= $\000 -> +write_char_as_latin1(Uni) when is_integer(Uni), Uni >= $\000 -> [$$|string_char(unicode_as_latin1,Uni, -1, [])]. -%% char_list(CharList) -%% deep_char_list(CharList) -%% Return true if CharList is a (possibly deep) list of characters, else -%% false. +%% latin1_char_list(CharList) +%% deep_latin1_char_list(CharList) +%% Return true if CharList is a (possibly deep) list of Latin-1 +%% characters, else false. + +-spec latin1_char_list(Term) -> boolean() when + Term :: term(). + +latin1_char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> + latin1_char_list(Cs); +latin1_char_list([]) -> true; +latin1_char_list(_) -> false. %Everything else is false -spec char_list(Term) -> boolean() when Term :: term(). -char_list([C|Cs]) when is_integer(C), C >= $\000, C =< $\377 -> +char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> char_list(Cs); char_list([]) -> true; char_list(_) -> false. %Everything else is false --spec unicode_char_list(Term) -> boolean() when +-spec deep_latin1_char_list(Term) -> boolean() when Term :: term(). -unicode_char_list([C|Cs]) when is_integer(C), C >= 0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - unicode_char_list(Cs); -unicode_char_list([]) -> true; -unicode_char_list(_) -> false. %Everything else is false +deep_latin1_char_list(Cs) -> + deep_latin1_char_list(Cs, []). + +deep_latin1_char_list([C|Cs], More) when is_list(C) -> + deep_latin1_char_list(C, [Cs|More]); +deep_latin1_char_list([C|Cs], More) when is_integer(C), C >= $\000, C =< $\377 -> + deep_latin1_char_list(Cs, More); +deep_latin1_char_list([], [Cs|More]) -> + deep_latin1_char_list(Cs, More); +deep_latin1_char_list([], []) -> true; +deep_latin1_char_list(_, _More) -> %Everything else is false + false. -spec deep_char_list(Term) -> boolean() when Term :: term(). @@ -471,43 +494,56 @@ deep_char_list(Cs) -> deep_char_list([C|Cs], More) when is_list(C) -> deep_char_list(C, [Cs|More]); -deep_char_list([C|Cs], More) when is_integer(C), C >= $\000, C =< $\377 -> +deep_char_list([C|Cs], More) + when is_integer(C), C >= 0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> deep_char_list(Cs, More); deep_char_list([], [Cs|More]) -> deep_char_list(Cs, More); deep_char_list([], []) -> true; -deep_char_list(_, _More) -> %Everything else is false +deep_char_list(_, _More) -> %Everything else is false false. --spec deep_unicode_char_list(Term) -> boolean() when - Term :: term(). +deep_unicode_char_list(Term) -> + deep_char_list(Term). -deep_unicode_char_list(Cs) -> - deep_unicode_char_list(Cs, []). +%% printable_latin1_list([Char]) -> boolean() +%% Return true if CharList is a list of printable Latin1 characters, else +%% false. -deep_unicode_char_list([C|Cs], More) when is_list(C) -> - deep_unicode_char_list(C, [Cs|More]); -deep_unicode_char_list([C|Cs], More) - when is_integer(C), C >= 0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - deep_unicode_char_list(Cs, More); -deep_unicode_char_list([], [Cs|More]) -> - deep_unicode_char_list(Cs, More); -deep_unicode_char_list([], []) -> true; -deep_unicode_char_list(_, _More) -> %Everything else is false - false. +-spec printable_latin1_list(Term) -> boolean() when + Term :: term(). + +printable_latin1_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> + printable_latin1_list(Cs); +printable_latin1_list([C|Cs]) when is_integer(C), C >= $\240, C =< $\377 -> + printable_latin1_list(Cs); +printable_latin1_list([$\n|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\r|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\t|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\v|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\b|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\f|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([$\e|Cs]) -> printable_latin1_list(Cs); +printable_latin1_list([]) -> true; +printable_latin1_list(_) -> false. %Everything else is false %% printable_list([Char]) -> boolean() %% Return true if CharList is a list of printable characters, else -%% false. +%% false. The notion of printable in Unicode terms is somewhat floating. +%% Everything that is not a control character and not invalid unicode +%% will be considered printable. -spec printable_list(Term) -> boolean() when Term :: term(). printable_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> printable_list(Cs); -printable_list([C|Cs]) when is_integer(C), C >= $\240, C =< $\377 -> +printable_list([C|Cs]) + when is_integer(C), C >= 16#A0, C < 16#D800; + is_integer(C), C > 16#DFFF, C < 16#FFFE; + is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> printable_list(Cs); printable_list([$\n|Cs]) -> printable_list(Cs); printable_list([$\r|Cs]) -> printable_list(Cs); @@ -517,33 +553,7 @@ printable_list([$\b|Cs]) -> printable_list(Cs); printable_list([$\f|Cs]) -> printable_list(Cs); printable_list([$\e|Cs]) -> printable_list(Cs); printable_list([]) -> true; -printable_list(_) -> false. %Everything else is false - -%% printable_unicode_list([Char]) -> boolean() -%% Return true if CharList is a list of printable characters, else -%% false. The notion of printable in Unicode terms is somewhat floating. -%% Everything that is not a control character and not invalid unicode -%% will be considered printable. - --spec printable_unicode_list(Term) -> boolean() when - Term :: term(). - -printable_unicode_list([C|Cs]) when is_integer(C), C >= $\040, C =< $\176 -> - printable_unicode_list(Cs); -printable_unicode_list([C|Cs]) - when is_integer(C), C >= 16#A0, C < 16#D800; - is_integer(C), C > 16#DFFF, C < 16#FFFE; - is_integer(C), C > 16#FFFF, C =< 16#10FFFF -> - printable_unicode_list(Cs); -printable_unicode_list([$\n|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\r|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\t|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\v|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\b|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\f|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([$\e|Cs]) -> printable_unicode_list(Cs); -printable_unicode_list([]) -> true; -printable_unicode_list(_) -> false. %Everything else is false +printable_list(_) -> false. %Everything else is false %% List = nl() %% Return a list of characters to generate a newline. diff --git a/lib/stdlib/src/io_lib_format.erl b/lib/stdlib/src/io_lib_format.erl index 5680f83ab6..6a06d9448b 100644 --- a/lib/stdlib/src/io_lib_format.erl +++ b/lib/stdlib/src/io_lib_format.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -22,7 +22,7 @@ -export([fwrite/2,fwrite_g/1,indentation/2]). -%% fwrite(Format, ArgList) -> [unicode:unicode:char()]. +%% fwrite(Format, ArgList) -> string(). %% Format the arguments in ArgList after string Format. Just generate %% an error if there is an error in the arguments. %% @@ -133,7 +133,7 @@ pcount([{$P,_As,_F,_Ad,_P,_Pad,_Enc}|Cs], Acc) -> pcount(Cs, Acc+1); pcount([_|Cs], Acc) -> pcount(Cs, Acc); pcount([], Acc) -> Acc. -%% build([Control], Pc, Indentation) -> [unicode:unicode_char()]. +%% build([Control], Pc, Indentation) -> string(). %% Interpret the control structures. Count the number of print %% remaining and only calculate indentation when necessary. Must also %% be smart when calculating indentation for characters in format. @@ -154,7 +154,7 @@ decr_pc($p, Pc) -> Pc - 1; decr_pc($P, Pc) -> Pc - 1; decr_pc(_, Pc) -> Pc. -%% indentation([unicode:unicode_char()], Indentation) -> Indentation. +%% indentation(String, Indentation) -> Indentation. %% Calculate the indentation of the end of a string given its start %% indentation. We assume tabs at 8 cols. @@ -167,8 +167,7 @@ indentation([C|Cs], I) -> indentation([], I) -> I. %% control(FormatChar, [Argument], FieldWidth, Adjust, Precision, PadChar, -%% Encoding, Indentation) -> -%% [unicode:unicode_char()] +%% Encoding, Indentation) -> String %% This is the main dispatch function for the various formatting commands. %% Field widths and precisions have already been calculated. @@ -613,7 +612,7 @@ prefixed_integer(Int, F, Adj, Base, Pad, Prefix, Lowercase) term([Prefix|S], F, Adj, none, Pad) end. -%% char(Char, Field, Adjust, Precision, PadChar) -> [unicode:unicode_char()]. +%% char(Char, Field, Adjust, Precision, PadChar) -> string(). char(C, none, _Adj, none, _Pad) -> [C]; char(C, F, _Adj, none, _Pad) -> chars(C, F); diff --git a/lib/stdlib/src/io_lib_pretty.erl b/lib/stdlib/src/io_lib_pretty.erl index 99ad281a9b..dbf6fd74e4 100644 --- a/lib/stdlib/src/io_lib_pretty.erl +++ b/lib/stdlib/src/io_lib_pretty.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -51,7 +51,6 @@ print(Term) -> -type max_chars() :: integer(). -type chars() :: io_lib:chars(). --type unicode_chars() :: io_lib:unicode_chars(). -type option() :: {column, column()} | {line_length, line_length()} | {depth, depth()} @@ -60,8 +59,8 @@ print(Term) -> | {encoding, latin1 | utf8 | unicode}. -type options() :: [option()]. --spec print(term(), rec_print_fun()) -> chars() | unicode_chars(); - (term(), options()) -> chars() | unicode_chars(). +-spec print(term(), rec_print_fun()) -> chars(); + (term(), options()) -> chars(). print(Term, Options) when is_list(Options) -> Col = proplists:get_value(column, Options, 1), @@ -74,24 +73,23 @@ print(Term, Options) when is_list(Options) -> print(Term, RecDefFun) -> print(Term, -1, RecDefFun). --spec print(term(), depth(), rec_print_fun()) -> chars() | unicode_chars(). +-spec print(term(), depth(), rec_print_fun()) -> chars(). print(Term, Depth, RecDefFun) -> print(Term, 1, 80, Depth, RecDefFun). --spec print(term(), column(), line_length(), depth()) -> - chars() | unicode_chars(). +-spec print(term(), column(), line_length(), depth()) -> chars(). print(Term, Col, Ll, D) -> print(Term, Col, Ll, D, _M=-1, no_fun, latin1). -spec print(term(), column(), line_length(), depth(), rec_print_fun()) -> - chars() | unicode_chars(). + chars(). print(Term, Col, Ll, D, RecDefFun) -> print(Term, Col, Ll, D, _M=-1, RecDefFun). -spec print(term(), column(), line_length(), depth(), max_chars(), - rec_print_fun()) -> chars() | unicode_chars(). + rec_print_fun()) -> chars(). print(Term, Col, Ll, D, M, RecDefFun) -> print(Term, Col, Ll, D, M, RecDefFun, latin1). @@ -369,13 +367,13 @@ print_length(<<_/bitstring>>=Bin, D, _RF, Enc) -> S = io_lib:write_string(List, $"), %" {[$<,$<,S,$>,$>], 4 + length(S)}; {false, List} when is_list(List) -> - S = io_lib:write_unicode_string(List, $"), %" + S = io_lib:write_string(List, $"), %" {[$<,$<,S,"/utf8>>"], 9 + length(S)}; {true, true, Prefix} -> S = io_lib:write_string(Prefix, $"), %" {[$<,$<, S | "...>>"], 7 + length(S)}; {false, true, Prefix} -> - S = io_lib:write_unicode_string(Prefix, $"), %" + S = io_lib:write_string(Prefix, $"), %" {[$<,$<, S | "/utf8...>>"], 12 + length(S)}; false -> S = io_lib:write(Bin, D), @@ -451,9 +449,9 @@ list_length_tail({_, Len}, Acc) -> printable_list(_L, 1, _Enc) -> false; printable_list(L, _D, latin1) -> - io_lib:printable_list(L); + io_lib:printable_latin1_list(L); printable_list(L, _D, _Uni) -> - io_lib:printable_unicode_list(L). + io_lib:printable_list(L). %% Truncated lists could break some existing code. % printable_list(L, D, Enc) when D >= 0 -> % Len = ?CHARS * (D - 1), @@ -538,9 +536,9 @@ printable_unicode(Bin, I, L) -> {I, Bin, lists:reverse(L)}. write_string(S, latin1) -> - io_lib:write_string(S, $"); %" + io_lib:write_latin1_string(S, $"); %" write_string(S, _Uni) -> - io_lib:write_unicode_string(S, $"). %" + io_lib:write_string(S, $"). %" %% Throw 'no_good' if the indentation exceeds half the line length %% unless there is room for M characters on the line. diff --git a/lib/stdlib/src/lib.erl b/lib/stdlib/src/lib.erl index b2ce2a5a8f..b778f3bf64 100644 --- a/lib/stdlib/src/lib.erl +++ b/lib/stdlib/src/lib.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1996-2012. All Rights Reserved. +%% Copyright Ericsson AB 1996-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -426,9 +426,9 @@ brackets_to_parens(S, Enc) -> [$(,R,$)]. printable_list(latin1, As) -> - io_lib:printable_list(As); + io_lib:printable_latin1_list(As); printable_list(_, As) -> - io_lib:printable_unicode_list(As). + io_lib:printable_list(As). mfa_to_string(M, F, A) -> io_lib:fwrite(<<"~s/~w">>, [mf_to_string({M, F}, A), A]). diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl index 8b9412fb1b..10b652cdde 100644 --- a/lib/stdlib/src/unicode.erl +++ b/lib/stdlib/src/unicode.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2012. All Rights Reserved. +%% Copyright Ericsson AB 2008-2013. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -25,25 +25,32 @@ -export_type([chardata/0, charlist/0, encoding/0, external_chardata/0, external_charlist/0, latin1_chardata/0, - latin1_charlist/0, unicode_binary/0, unicode_char/0]). + latin1_charlist/0, unicode_binary/0]). -type encoding() :: 'latin1' | 'unicode' | 'utf8' | 'utf16' | {'utf16', endian()} | 'utf32' | {'utf32', endian()}. -type endian() :: 'big' | 'little'. -type unicode_binary() :: binary(). --type unicode_char() :: non_neg_integer(). --type charlist() :: [unicode_char() | unicode_binary() | charlist()]. +-type charlist() :: + maybe_improper_list(char() | unicode_binary() | charlist(), + unicode_binary() | nil()). -type chardata() :: charlist() | unicode_binary(). -type external_unicode_binary() :: binary(). -type external_chardata() :: external_charlist() | external_unicode_binary(). --type external_charlist() :: [unicode_char() | external_unicode_binary() - | external_charlist()]. +-type external_charlist() :: + maybe_improper_list(char() | + external_unicode_binary() | + external_charlist(), + external_unicode_binary() | nil()). -type latin1_binary() :: binary(). -type latin1_char() :: byte(). -type latin1_chardata() :: latin1_charlist() | latin1_binary(). --type latin1_charlist() :: [latin1_char() | latin1_binary() - | latin1_charlist()]. +-type latin1_charlist() :: + maybe_improper_list(latin1_char() | + latin1_binary() | + latin1_charlist(), + latin1_binary() | nil()). %%% BIFs %%% diff --git a/lib/stdlib/test/io_SUITE.erl b/lib/stdlib/test/io_SUITE.erl index 521d7255ea..4d2b53b265 100644 --- a/lib/stdlib/test/io_SUITE.erl +++ b/lib/stdlib/test/io_SUITE.erl @@ -2051,15 +2051,15 @@ otp_10302(Suite) when is_list(Suite) -> "<<228,...>>" = fmt("~tP", [<<"äppl">>, 2]), Chars = lists:seq(0, 512), % just a few... - [] = [C || C <- Chars, S <- io_lib:write_unicode_char_as_latin1(C), + [] = [C || C <- Chars, S <- io_lib:write_char_as_latin1(C), not is_latin1(S)], - L1 = [S || C <- Chars, S <- io_lib:write_unicode_char(C), + L1 = [S || C <- Chars, S <- io_lib:write_char(C), not is_latin1(S)], L1 = lists:seq(256, 512), - [] = [C || C <- Chars, S <- io_lib:write_unicode_string_as_latin1([C]), + [] = [C || C <- Chars, S <- io_lib:write_string_as_latin1([C]), not is_latin1(S)], - L2 = [S || C <- Chars, S <- io_lib:write_unicode_string([C]), + L2 = [S || C <- Chars, S <- io_lib:write_string([C]), not is_latin1(S)], L2 = lists:seq(256, 512), diff --git a/lib/syntax_tools/src/erl_syntax.erl b/lib/syntax_tools/src/erl_syntax.erl index 41a1708925..bdb2b5bcd7 100644 --- a/lib/syntax_tools/src/erl_syntax.erl +++ b/lib/syntax_tools/src/erl_syntax.erl @@ -1700,11 +1700,11 @@ char_literal(Node) -> -spec char_literal(syntaxTree(), encoding()) -> nonempty_string(). char_literal(Node, unicode) -> - io_lib:write_unicode_char(char_value(Node)); + io_lib:write_char(char_value(Node)); char_literal(Node, utf8) -> - io_lib:write_unicode_char(char_value(Node)); + io_lib:write_char(char_value(Node)); char_literal(Node, latin1) -> - io_lib:write_unicode_char_as_latin1(char_value(Node)). + io_lib:write_char_as_latin1(char_value(Node)). %% ===================================================================== @@ -1801,11 +1801,11 @@ string_literal(Node) -> -spec string_literal(syntaxTree(), encoding()) -> nonempty_string(). string_literal(Node, utf8) -> - io_lib:write_unicode_string(string_value(Node)); + io_lib:write_string(string_value(Node)); string_literal(Node, unicode) -> - io_lib:write_unicode_string(string_value(Node)); + io_lib:write_string(string_value(Node)); string_literal(Node, latin1) -> - io_lib:write_unicode_string_as_latin1(string_value(Node)). + io_lib:write_string_as_latin1(string_value(Node)). %% ===================================================================== diff --git a/system/doc/reference_manual/typespec.xml b/system/doc/reference_manual/typespec.xml index c3620f83f6..9207d536d5 100644 --- a/system/doc/reference_manual/typespec.xml +++ b/system/doc/reference_manual/typespec.xml @@ -4,7 +4,7 @@
- 20032011 + 20032013 Ericsson AB. All Rights Reserved. @@ -196,7 +196,7 @@ TList :: Type nonempty_string()[char(),...] - iolist()maybe_improper_list(char() | binary() | iolist(), binary() | []) + iolist()maybe_improper_list(byte() | binary() | iolist(), binary() | []) module()atom() -- cgit v1.2.3