From c5651a54b7248a6100c546a1104f23db414ff8f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Tue, 25 Mar 2014 10:22:48 +0100 Subject: Correct the UTF8String description and example --- lib/asn1/doc/src/asn1_ug.xml | 93 ++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/asn1/doc/src/asn1_ug.xml b/lib/asn1/doc/src/asn1_ug.xml index 1da4cce5a9..2475eaa153 100644 --- a/lib/asn1/doc/src/asn1_ug.xml +++ b/lib/asn1/doc/src/asn1_ug.xml @@ -748,51 +748,58 @@ ok {ok,<<30,20,0,66,0,77,0,80,0,32,0,115,0,116,0,114,0,105,0,110,0,103>>} 7> 'PrimStrings':decode('BMP', Bytes3). {ok,"BMP string"} -

The UTF8String is represented in Erlang as a list of integers, - where each integer represents the unicode value of one - character. When a value shall be encoded one first has to - transform it to a UTF8 encoded binary, then it can be encoded by - asn1. When decoding the result is a UTF8 encoded binary, which - may be transformed to an integer list. The transformation - functions, utf8_binary_to_list and - utf8_list_to_binary, are in the asn1rt module. In - the example below we assume an asn1 definition UTF ::= UTF8String in a module UTF.asn:

+ +

The UTF8String type is represented as a UTF-8 encoded binary in + Erlang. Such binaries can be created directly using the binary syntax + or by converting from a list of Unicode code points using the + unicode:characters_to_binary/1 function.

+ +

Here are some examples showing how UTF-8 encoded binaries can + be created and manipulated:

+ +
+1> Gs = "Мой маленький Гном".
+[1052,1086,1081,32,1084,1072,1083,1077,1085,1100,1082,1080,
+ 1081,32,1043,1085,1086,1084]
+2> Gbin = unicode:characters_to_binary(Gs).
+<<208,156,208,190,208,185,32,208,188,208,176,208,187,208,
+  181,208,189,209,140,208,186,208,184,208,185,32,208,147,
+  208,...>>
+3> Gbin = <<"Мой маленький Гном"/utf8>>.
+<<208,156,208,190,208,185,32,208,188,208,176,208,187,208,
+  181,208,189,209,140,208,186,208,184,208,185,32,208,147,
+  208,...>>
+4> Gs = unicode:characters_to_list(Gbin).
+[1052,1086,1081,32,1084,1072,1083,1077,1085,1100,1082,1080,
+ 1081,32,1043,1085,1086,1084]
+      
+ +

See the unicode module + for more details.

+ +

In the following example we will use this ASN.1 specification:

-1> asn1ct:compile('UTF',[ber]).
-Erlang ASN.1 version "1.4.3.3" compiling "UTF.asn" 
-Compiler Options: [ber]
---{generated,"UTF.asn1db"}--
---{generated,"UTF.erl"}--
+UTF DEFINITIONS AUTOMATIC TAGS ::=
+BEGIN
+   UTF ::= UTF8String
+END
+      
+ +

Encoding and decoding a string with Unicode characters:

+ +
+5> asn1ct:compile('UTF', [ber]).
+ok
+6> {ok,Bytes1} = 'UTF':encode('UTF', <<"Гном"/utf8>>).
+{ok,<<12,8,208,147,208,189,208,190,208,188>>}
+7> {ok,Bin1} = 'UTF':decode('UTF', Bytes1).
+{ok,<<208,147,208,189,208,190,208,188>>}
+8> io:format("~ts\n", [Bin1]).
+Гном
 ok
-2> UTF8Val1 = "hello".
-"hello"
-3> {ok,UTF8bin1} = asn1rt:utf8_list_to_binary(UTF8Val1).
-{ok,<<104,101,108,108,111>>}
-4> {ok,B}='UTF':encode('UTF',UTF8bin1).
-{ok,[12,
-     5,
-     <<104,101,108,108,111>>]}
-5> Bin = list_to_binary(B).
-<<12,5,104,101,108,108,111>>
-6> {ok,UTF8bin1}='UTF':decode('UTF',Bin).
-{ok,<<104,101,108,108,111>>}
-7> asn1rt:utf8_binary_to_list(UTF8bin1).
-{ok,"hello"}           
-8> UTF8Val2 = [16#00,16#100,16#ffff,16#ffffff].
-[0,256,65535,16777215]
-9> {ok,UTF8bin2} = asn1rt:utf8_list_to_binary(UTF8Val2).
-{ok,<<0,196,128,239,191,191,248,191,191,191,191>>}
-10> {ok,B2} = 'UTF':encode('UTF',UTF8bin2).
-{ok,[12,
-     11,
-     <<0,196,128,239,191,191,248,191,191,191,191>>]}
-11> Bin2 = list_to_binary(B2).
-<<12,11,0,196,128,239,191,191,248,191,191,191,191>>
-12> {ok,UTF8bin2} = 'UTF':decode('UTF',Bin2).
-{ok,<<0,196,128,239,191,191,248,191,191,191,191>>}
-13> asn1rt:utf8_binary_to_list(UTF8bin2).
-{ok,[0,256,65535,16777215]}
-14>       
+9> unicode:characters_to_list(Bin1). +[1043,1085,1086,1084] +
-- cgit v1.2.3