diff options
Diffstat (limited to 'lib/stdlib/src/unicode.erl')
-rw-r--r-- | lib/stdlib/src/unicode.erl | 64 |
1 files changed, 61 insertions, 3 deletions
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl index 09b1deff9c..a5d9965ca2 100644 --- a/lib/stdlib/src/unicode.erl +++ b/lib/stdlib/src/unicode.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2008-2009. All Rights Reserved. +%% Copyright Ericsson AB 2008-2011. All Rights Reserved. %% %% The contents of this file are subject to the Erlang Public License, %% Version 1.1, (the "License"); you may not use this file except in @@ -25,8 +25,39 @@ %% InEncoding is not {latin1 | unicode | utf8}) %% --export([characters_to_list/1, characters_to_list_int/2, characters_to_binary/1,characters_to_binary_int/2, characters_to_binary/3,bom_to_encoding/1, encoding_to_bom/1]). - +-export([characters_to_list/1, characters_to_list_int/2, + characters_to_binary/1, characters_to_binary_int/2, + characters_to_binary/3, + bom_to_encoding/1, encoding_to_bom/1]). + +-export_type([chardata/0, charlist/0, encoding/0, external_chardata/0, + external_charlist/0, latin1_chardata/0, + latin1_charlist/0, unicode_binary/0, unicode_char/0]). + +-type encoding() :: 'latin1' | 'unicode' | 'utf8' + | 'utf16' | {'utf16', endian()} + | 'utf32' | {'utf32', endian()}. +-type endian() :: 'big' | 'little'. +-type unicode_binary() :: binary(). +-type unicode_char() :: non_neg_integer(). +-type charlist() :: [unicode_char() | unicode_binary() | charlist()]. +-type chardata() :: charlist() | unicode_binary(). +-type external_unicode_binary() :: binary(). +-type external_chardata() :: external_charlist() | external_unicode_binary(). +-type external_charlist() :: [unicode_char() | external_unicode_binary() + | external_charlist()]. +-type latin1_binary() :: binary(). +-type latin1_char() :: byte(). +-type latin1_chardata() :: latin1_charlist() | latin1_binary(). +-type latin1_charlist() :: [latin1_char() | latin1_binary() + | latin1_charlist()]. + +-spec characters_to_list(Data) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + Result :: list() + | {error, list(), RestData} + | {incomplete, list(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). characters_to_list(ML) -> unicode:characters_to_list(ML,unicode). @@ -60,6 +91,13 @@ do_characters_to_list(ML, Encoding) -> end. +-spec characters_to_binary(Data) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + Result :: binary() + | {error, binary(), RestData} + | {incomplete, binary(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). + characters_to_binary(ML) -> try unicode:characters_to_binary(ML,unicode) @@ -95,6 +133,15 @@ characters_to_binary_int(ML,InEncoding) -> erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest]) end. +-spec characters_to_binary(Data, InEncoding, OutEncoding) -> Result when + Data :: latin1_chardata() | chardata() | external_chardata(), + InEncoding :: encoding(), + OutEncoding :: encoding(), + Result :: binary() + | {error, binary(), RestData} + | {incomplete, binary(), binary()}, + RestData :: latin1_chardata() | chardata() | external_chardata(). + characters_to_binary(ML, latin1, latin1) when is_binary(ML) -> ML; characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or (Uni =:= unicode)) -> @@ -206,6 +253,13 @@ characters_to_binary_int(ML, InEncoding, OutEncoding) -> Res end. +-spec bom_to_encoding(Bin) -> {Encoding, Length} when + Bin :: binary(), + Encoding :: 'latin1' | 'utf8' + | {'utf16', endian()} + | {'utf32', endian()}, + Length :: non_neg_integer(). + bom_to_encoding(<<239,187,191,_/binary>>) -> {utf8,3}; bom_to_encoding(<<0,0,254,255,_/binary>>) -> @@ -219,6 +273,10 @@ bom_to_encoding(<<255,254,_/binary>>) -> bom_to_encoding(Bin) when is_binary(Bin) -> {latin1,0}. +-spec encoding_to_bom(InEncoding) -> Bin when + Bin :: binary(), + InEncoding :: encoding(). + encoding_to_bom(unicode) -> <<239,187,191>>; encoding_to_bom(utf8) -> |