diff options
Diffstat (limited to 'lib/stdlib/src/unicode.erl')
-rw-r--r-- | lib/stdlib/src/unicode.erl | 63 |
1 files changed, 56 insertions, 7 deletions
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl index 12bc60623d..e9b90befe6 100644 --- a/lib/stdlib/src/unicode.erl +++ b/lib/stdlib/src/unicode.erl @@ -30,12 +30,34 @@ characters_to_binary/3, bom_to_encoding/1, encoding_to_bom/1]). --export_type([encoding/0]). +-export_type([chardata/0, charlist/0, encoding/0, external_chardata/0, + external_charlist/0, latin1_chardata/0, + latin1_charlist/0, unicode_binary/0, unicode_char/0]). -type encoding() :: 'latin1' | 'unicode' | 'utf8' | 'utf16' | {'utf16', endian()} | 'utf32' | {'utf32', endian()}. -type endian() :: 'big' | 'little'. +-type unicode_binary() :: binary(). +-type unicode_char() :: non_neg_integer(). +-type charlist() :: [unicode_char() | unicode_binary() | charlist()]. +-type chardata() :: charlist() | unicode_binary(). +-type external_unicode_binary() :: binary(). +-type external_chardata() :: external_charlist() | external_unicode_binary(). +-type external_charlist() :: [unicode_char() | external_unicode_binary() + | external_charlist()]. +-type latin1_binary() :: binary(). +-type latin1_char() :: byte(). +-type latin1_chardata() :: latin1_charlist() | latin1_binary(). +-type latin1_charlist() :: [latin1_char() | latin1_binary() + | latin1_charlist()]. + +-spec characters_to_list(Data) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + Result :: list() + | {error, list(), RestData} + | {incomplete, list(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). characters_to_list(ML) -> unicode:characters_to_list(ML,unicode). @@ -51,7 +73,7 @@ characters_to_list_int(ML, Encoding) -> _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML,Encoding])), erlang:raise(error,TheError,[{Mod,characters_to_list,L}|Rest]) @@ -69,6 +91,13 @@ do_characters_to_list(ML, Encoding) -> end. +-spec characters_to_binary(Data) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + Result :: binary() + | {error, binary(), RestData} + | {incomplete, binary(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). + characters_to_binary(ML) -> try unicode:characters_to_binary(ML,unicode) @@ -80,7 +109,7 @@ characters_to_binary(ML) -> _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML])), erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) @@ -98,12 +127,21 @@ characters_to_binary_int(ML,InEncoding) -> _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML,InEncoding])), erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) end. +-spec characters_to_binary(Data, InEncoding, OutEncoding) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + InEncoding :: encoding(), + OutEncoding :: encoding(), + Result :: binary() + | {error, binary(), RestData} + | {incomplete, binary(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). + characters_to_binary(ML, latin1, latin1) when is_binary(ML) -> ML; characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or (Uni =:= unicode)) -> @@ -121,7 +159,7 @@ characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML,latin1,Uni])), erlang:raise(error,TheError, @@ -143,7 +181,7 @@ characters_to_binary(ML,Uni,latin1) when is_binary(ML) and ((Uni =:= utf8) or _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML,Uni,latin1])), erlang:raise(error,TheError, @@ -162,7 +200,7 @@ characters_to_binary(ML, InEncoding, OutEncoding) -> _ -> badarg end, - {'EXIT',{new_stacktrace,[{Mod,_,L}|Rest]}} = + {'EXIT',{new_stacktrace,[{Mod,_,L,_}|Rest]}} = (catch erlang:error(new_stacktrace, [ML,InEncoding,OutEncoding])), erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) @@ -215,6 +253,13 @@ characters_to_binary_int(ML, InEncoding, OutEncoding) -> Res end. +-spec bom_to_encoding(Bin) -> {Encoding, Length} when + Bin :: binary(), + Encoding :: 'latin1' | 'utf8' + | {'utf16', endian()} + | {'utf32', endian()}, + Length :: non_neg_integer(). + bom_to_encoding(<<239,187,191,_/binary>>) -> {utf8,3}; bom_to_encoding(<<0,0,254,255,_/binary>>) -> @@ -228,6 +273,10 @@ bom_to_encoding(<<255,254,_/binary>>) -> bom_to_encoding(Bin) when is_binary(Bin) -> {latin1,0}. +-spec encoding_to_bom(InEncoding) -> Bin when + Bin :: binary(), + InEncoding :: encoding(). + encoding_to_bom(unicode) -> <<239,187,191>>; encoding_to_bom(utf8) -> |