From b553664f54034e8c04ae6f9cc44f16b7f516518b Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Fri, 11 Jan 2013 17:27:29 +0100 Subject: erl_interface: utf8 atoms continued --- lib/erl_interface/doc/src/ei.xml | 59 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 57 insertions(+), 2 deletions(-) (limited to 'lib/erl_interface/doc/src/ei.xml') diff --git a/lib/erl_interface/doc/src/ei.xml b/lib/erl_interface/doc/src/ei.xml index 539e16d837..0b0b1eeb79 100644 --- a/lib/erl_interface/doc/src/ei.xml +++ b/lib/erl_interface/doc/src/ei.xml @@ -82,6 +82,22 @@ function returns the size required (note that for strings an extra byte is needed for the 0 string terminator).

+
+ DATA TYPES + + + enum erlang_char_encoding + +

+ +enum erlang_char_encoding { + ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER +}; + +

The character encoding used for atoms.

+
+
+
voidei_set_compat_rel(release_number) @@ -225,11 +241,31 @@ Encode an atom

Encodes an atom in the binary format. The parameter - is the name of the atom. Only upto bytes + is the name of the atom in latin1 encoding. Only upto MAXATOMLEN-1 bytes are encoded. The name should be zero-terminated, except for the function.

+ + intei_encode_atom_as(char *buf, int *index, const char *p, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc) + intei_encode_atom_len_as(char *buf, int *index, const char *p, int len, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc) + intei_x_encode_atom_as(ei_x_buff* x, const char *p, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc) + intei_x_encode_atom_len_as(ei_x_buff* x, const char *p, int len, enum erlang_char_encoding from_enc, enum erlang_char_encoding to_enc) + Encode an atom + +

Encodes an atom in the binary format with character encoding + to_enc (latin1 or utf8). + The p parameter is the name of the atom with character encoding + from_enc. + The name must either be zero-terminated or a function variant with a len + parameter must be used.

+

The encoding will fail if the atom is too long or if it can not be represented + with character encoding to_enc.

+

These functions were introduced in R16 release of Erlang/OTP as part of a first step + to support UTF8 atoms. Atoms encoded with ERLANG_UTF8 + can not be decoded by earlier releases than R16.

+
+
intei_encode_binary(char *buf, int *index, const void *p, long len) intei_x_encode_binary(ei_x_buff* x, const void *p, long len) @@ -490,10 +526,29 @@ ei_x_encode_empty_list(&x); Decode an atom

This function decodes an atom from the binary format. The - name of the atom is placed at . There can be at most + null terminated name of the atom is placed at . There can be at most bytes placed in the buffer.

+ + intei_decode_atom_as(const char *buf, int *index, char *p, int plen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result) + Decode an atom + +

This function decodes an atom from the binary format. The + null terminated name of the atom is placed in buffer at p of length + plen bytes.

+

The wanted string encoding is specified by + want. The original encoding used in the + binary format (latin1 or utf8) can be obtained from *was. The actual encoding of the resulting string + (7-bit ascii, latin1 or utf8) can be obtained from *result. Both was and result can be NULL. + *result may differ from want if want is ERLANG_WHATEVER or if + *result turn out to be pure 7-bit ascii (compatible with both latin1 and utf8).

+

This function fails if the atom is too long for the buffer + or if it can not be represented with encoding want.

+

This functions was introduced in R16 release of Erlang/OTP as part of a first step + to support UTF8 atoms.

+
+
intei_decode_binary(const char *buf, int *index, void *p, long *len) Decode a binary -- cgit v1.2.3