aboutsummaryrefslogtreecommitdiffstats
path: root/lib/stdlib/src/unicode.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/stdlib/src/unicode.erl')
-rw-r--r--lib/stdlib/src/unicode.erl64
1 files changed, 61 insertions, 3 deletions
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl
index 09b1deff9c..a5d9965ca2 100644
--- a/lib/stdlib/src/unicode.erl
+++ b/lib/stdlib/src/unicode.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2008-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2008-2011. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -25,8 +25,39 @@
%% InEncoding is not {latin1 | unicode | utf8})
%%
--export([characters_to_list/1, characters_to_list_int/2, characters_to_binary/1,characters_to_binary_int/2, characters_to_binary/3,bom_to_encoding/1, encoding_to_bom/1]).
-
+-export([characters_to_list/1, characters_to_list_int/2,
+ characters_to_binary/1, characters_to_binary_int/2,
+ characters_to_binary/3,
+ bom_to_encoding/1, encoding_to_bom/1]).
+
+-export_type([chardata/0, charlist/0, encoding/0, external_chardata/0,
+ external_charlist/0, latin1_chardata/0,
+ latin1_charlist/0, unicode_binary/0, unicode_char/0]).
+
+-type encoding() :: 'latin1' | 'unicode' | 'utf8'
+ | 'utf16' | {'utf16', endian()}
+ | 'utf32' | {'utf32', endian()}.
+-type endian() :: 'big' | 'little'.
+-type unicode_binary() :: binary().
+-type unicode_char() :: non_neg_integer().
+-type charlist() :: [unicode_char() | unicode_binary() | charlist()].
+-type chardata() :: charlist() | unicode_binary().
+-type external_unicode_binary() :: binary().
+-type external_chardata() :: external_charlist() | external_unicode_binary().
+-type external_charlist() :: [unicode_char() | external_unicode_binary()
+ | external_charlist()].
+-type latin1_binary() :: binary().
+-type latin1_char() :: byte().
+-type latin1_chardata() :: latin1_charlist() | latin1_binary().
+-type latin1_charlist() :: [latin1_char() | latin1_binary()
+ | latin1_charlist()].
+
+-spec characters_to_list(Data) -> Result when
+ Data :: latin1_chardata() | chardata() | external_chardata(),
+ Result :: list()
+ | {error, list(), RestData}
+ | {incomplete, list(), binary()},
+ RestData :: latin1_chardata() | chardata() | external_chardata().
characters_to_list(ML) ->
unicode:characters_to_list(ML,unicode).
@@ -60,6 +91,13 @@ do_characters_to_list(ML, Encoding) ->
end.
+-spec characters_to_binary(Data) -> Result when
+ Data :: latin1_chardata() | chardata() | external_chardata(),
+ Result :: binary()
+ | {error, binary(), RestData}
+ | {incomplete, binary(), binary()},
+ RestData :: latin1_chardata() | chardata() | external_chardata().
+
characters_to_binary(ML) ->
try
unicode:characters_to_binary(ML,unicode)
@@ -95,6 +133,15 @@ characters_to_binary_int(ML,InEncoding) ->
erlang:raise(error,TheError,[{Mod,characters_to_binary,L}|Rest])
end.
+-spec characters_to_binary(Data, InEncoding, OutEncoding) -> Result when
+ Data :: latin1_chardata() | chardata() | external_chardata(),
+ InEncoding :: encoding(),
+ OutEncoding :: encoding(),
+ Result :: binary()
+ | {error, binary(), RestData}
+ | {incomplete, binary(), binary()},
+ RestData :: latin1_chardata() | chardata() | external_chardata().
+
characters_to_binary(ML, latin1, latin1) when is_binary(ML) ->
ML;
characters_to_binary(ML, latin1, Uni) when is_binary(ML) and ((Uni =:= utf8) or (Uni =:= unicode)) ->
@@ -206,6 +253,13 @@ characters_to_binary_int(ML, InEncoding, OutEncoding) ->
Res
end.
+-spec bom_to_encoding(Bin) -> {Encoding, Length} when
+ Bin :: binary(),
+ Encoding :: 'latin1' | 'utf8'
+ | {'utf16', endian()}
+ | {'utf32', endian()},
+ Length :: non_neg_integer().
+
bom_to_encoding(<<239,187,191,_/binary>>) ->
{utf8,3};
bom_to_encoding(<<0,0,254,255,_/binary>>) ->
@@ -219,6 +273,10 @@ bom_to_encoding(<<255,254,_/binary>>) ->
bom_to_encoding(Bin) when is_binary(Bin) ->
{latin1,0}.
+-spec encoding_to_bom(InEncoding) -> Bin when
+ Bin :: binary(),
+ InEncoding :: encoding().
+
encoding_to_bom(unicode) ->
<<239,187,191>>;
encoding_to_bom(utf8) ->