From c596e17cf3d69cf5e10d28ee2a8ee35162786da1 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 23 Jan 2013 16:04:38 +0100 Subject: erl_interface: Changed erlang_char_encoding interface to allow bitwise-or'd combinations. --- lib/erl_interface/doc/src/ei.xml | 13 +++++++------ lib/erl_interface/include/ei.h | 7 ++++++- lib/erl_interface/src/decode/decode_atom.c | 2 +- lib/erl_interface/src/encode/encode_atom.c | 3 +-- lib/erl_interface/src/legacy/erl_marshal.c | 6 +++--- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/lib/erl_interface/doc/src/ei.xml b/lib/erl_interface/doc/src/ei.xml index e9c7c644b5..117c787da6 100644 --- a/lib/erl_interface/doc/src/ei.xml +++ b/lib/erl_interface/doc/src/ei.xml @@ -91,14 +91,13 @@

enum erlang_char_encoding { - ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER + ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8 };

The character encoding used for atoms. ERLANG_ASCII represents 7-bit ASCII. Latin1 and UTF8 are different extensions of 7-bit ASCII. All 7-bit ASCII characters are valid Latin1 and UTF8 characters. ASCII and Latin1 both represent each character - by one byte. A UTF8 character can consist of one to four bytes. ERLANG_WHATEVER - is not an encoding but rather used as a wildcard.

+ by one byte. A UTF8 character can consist of one to four bytes.

@@ -545,11 +544,13 @@ ei_x_encode_empty_list(&x); want. The original encoding used in the binary format (latin1 or utf8) can be obtained from *was. The actual encoding of the resulting string (7-bit ascii, latin1 or utf8) can be obtained from *result. Both was and result can be NULL. - *result may differ from want if want is ERLANG_WHATEVER or if - *result turn out to be pure 7-bit ascii (compatible with both latin1 and utf8).

+ + *result may differ from want if want is a bitwise-or'd combination like + ERLANG_LATIN1|ERLANG_UTF8 or if *result turn out to be pure 7-bit ascii + (compatible with both latin1 and utf8).

This function fails if the atom is too long for the buffer or if it can not be represented with encoding want.

-

This functions was introduced in R16 release of Erlang/OTP as part of a first step +

This function was introduced in R16 release of Erlang/OTP as part of a first step to support UTF8 atoms.

diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h index 20e575f64d..2278a28adb 100644 --- a/lib/erl_interface/include/ei.h +++ b/lib/erl_interface/include/ei.h @@ -190,7 +190,12 @@ extern volatile int __erl_errno; #define MAXATOMLEN_UTF8 (255*4 + 1) #define MAXNODELEN EI_MAXALIVELEN+1+EI_MAXHOSTNAMELEN -enum erlang_char_encoding { ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER }; +enum erlang_char_encoding { + ERLANG_ASCII = 1, + ERLANG_LATIN1 = 2, + ERLANG_UTF8 = 4, + ERLANG_ANY = ERLANG_ASCII|ERLANG_LATIN1|ERLANG_UTF8 +}; /* a pid */ typedef struct { diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c index 2ada418243..556c400cb3 100644 --- a/lib/erl_interface/src/decode/decode_atom.c +++ b/lib/erl_interface/src/decode/decode_atom.c @@ -58,7 +58,7 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen, return -1; } - if (want_enc == got_enc || want_enc == ERLANG_WHATEVER || want_enc == ERLANG_ASCII) { + if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) { int i, found_non_ascii = 0; if (len >= destlen) return -1; diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c index 8bbe962396..044f17cb60 100644 --- a/lib/erl_interface/src/encode/encode_atom.c +++ b/lib/erl_interface/src/encode/encode_atom.c @@ -59,8 +59,7 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, char *s0 = s; int offs; - if (len >= MAXATOMLEN && (from_enc == ERLANG_LATIN1 || - from_enc == ERLANG_ASCII)) { + if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) { return -1; } diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c index 884e9d421b..4c45cebb02 100644 --- a/lib/erl_interface/src/legacy/erl_marshal.c +++ b/lib/erl_interface/src/legacy/erl_marshal.c @@ -662,7 +662,7 @@ static int read_atom(unsigned char** ext, Erl_Atom_data* a) int offs = 0; enum erlang_char_encoding enc; int ret = ei_decode_atom_as((char*)*ext, &offs, buf, MAXATOMLEN_UTF8, - ERLANG_WHATEVER, NULL, &enc); + ERLANG_LATIN1|ERLANG_UTF8, NULL, &enc); *ext += offs; if (ret == 0) { @@ -674,11 +674,11 @@ static int read_atom(unsigned char** ext, Erl_Atom_data* a) a->lenL = 0; a->utf8 = NULL; a->lenU = 0; - if (enc == ERLANG_LATIN1 || enc == ERLANG_ASCII) { + if (enc & (ERLANG_LATIN1 | ERLANG_ASCII)) { a->latin1 = clone; a->lenL = i; } - if (enc == ERLANG_UTF8 || enc == ERLANG_ASCII) { + if (enc & (ERLANG_UTF8 | ERLANG_ASCII)) { a->utf8 = clone; a->lenU = i; } -- cgit v1.2.3