+
+
+ voidei_set_compat_rel(release_number)
@@ -225,11 +241,31 @@
Encode an atom
Encodes an atom in the binary format. The parameter
- is the name of the atom. Only upto bytes
+ is the name of the atom in latin1 encoding. Only upto MAXATOMLEN-1 bytes
are encoded. The name should be zero-terminated, except for
the function.
Encodes an atom in the binary format with character encoding
+ to_enc (latin1 or utf8).
+ The p parameter is the name of the atom with character encoding
+ from_enc.
+ The name must either be zero-terminated or a function variant with a len
+ parameter must be used.
+
The encoding will fail if the atom is too long or if it can not be represented
+ with character encoding to_enc.
+
These functions were introduced in R16 release of Erlang/OTP as part of a first step
+ to support UTF8 atoms. Atoms encoded with ERLANG_UTF8
+ can not be decoded by earlier releases than R16.
+
+ intei_encode_binary(char *buf, int *index, const void *p, long len)intei_x_encode_binary(ei_x_buff* x, const void *p, long len)
@@ -490,10 +526,29 @@ ei_x_encode_empty_list(&x);
Decode an atom
This function decodes an atom from the binary format. The
- name of the atom is placed at . There can be at most
+ null terminated name of the atom is placed at . There can be at most
bytes placed in the buffer.
+
+ intei_decode_atom_as(const char *buf, int *index, char *p, int plen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result)
+ Decode an atom
+
+
This function decodes an atom from the binary format. The
+ null terminated name of the atom is placed in buffer at p of length
+ plen bytes.
+
The wanted string encoding is specified by
+ want. The original encoding used in the
+ binary format (latin1 or utf8) can be obtained from *was. The actual encoding of the resulting string
+ (7-bit ascii, latin1 or utf8) can be obtained from *result. Both was and result can be NULL.
+ *result may differ from want if want is ERLANG_WHATEVER or if
+ *result turn out to be pure 7-bit ascii (compatible with both latin1 and utf8).
+
This function fails if the atom is too long for the buffer
+ or if it can not be represented with encoding want.
+
This functions was introduced in R16 release of Erlang/OTP as part of a first step
+ to support UTF8 atoms.
+
+ intei_decode_binary(const char *buf, int *index, void *p, long *len)Decode a binary
diff --git a/lib/erl_interface/doc/src/erl_eterm.xml b/lib/erl_interface/doc/src/erl_eterm.xml
index f403618c59..c7840d7813 100644
--- a/lib/erl_interface/doc/src/erl_eterm.xml
+++ b/lib/erl_interface/doc/src/erl_eterm.xml
@@ -77,10 +77,12 @@
+ A string representing atom .
- The length (in characters) of atom t.
+
+ The length (in bytes) of atom t.A pointer to the contents of
@@ -92,6 +94,7 @@
The floating point value of .
+ The Node in pid .The sequence number in pid .
@@ -104,6 +107,7 @@
The creation number in port .
+ The node in port .The first part of the reference number in ref . Use
@@ -296,7 +300,7 @@ iohead ::= Binary
ETERM *erl_mk_atom(string)Creates an atom
- char *string;
+ const char *string;
Creates an atom.
@@ -305,10 +309,12 @@ iohead ::= Binary
Returns an Erlang term containing an atom. Note that it is
the callers responsibility to make sure that
contains a valid name for an atom.
-
can be used to retrieve the
- atom name (as a string). Note that the string is not
- 0-terminated in the atom. returns
- the length of the atom name.
+
and
+ can be used to retrieve the atom name (as a null terminated string).
+ and returns the length of the atom name.
+
Note that the UTF8 variants were introduced in Erlang/OTP releases R16
+ and the string returned by ERL_ATOM_PTR(atom) was not null terminated on older releases.
+
diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h
index 8f07b24852..20e575f64d 100644
--- a/lib/erl_interface/include/ei.h
+++ b/lib/erl_interface/include/ei.h
@@ -116,7 +116,8 @@
#define NEW_FLOAT_EXT 'F'
#define ERL_ATOM_EXT 'd'
#define ERL_SMALL_ATOM_EXT 's'
-#define ERL_UNICODE_ATOM_EXT 'v'
+#define ERL_ATOM_UTF8_EXT 'v'
+#define ERL_SMALL_ATOM_UTF8_EXT 'w'
#define ERL_REFERENCE_EXT 'e'
#define ERL_NEW_REFERENCE_EXT 'r'
#define ERL_PORT_EXT 'f'
@@ -185,12 +186,16 @@ extern volatile int __erl_errno;
#define EI_MAXHOSTNAMELEN 64
#define EI_MAXALIVELEN 63
#define EI_MAX_COOKIE_SIZE 512
-#define MAXATOMLEN 255
+#define MAXATOMLEN (255 + 1)
+#define MAXATOMLEN_UTF8 (255*4 + 1)
#define MAXNODELEN EI_MAXALIVELEN+1+EI_MAXHOSTNAMELEN
+enum erlang_char_encoding { ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER };
+
/* a pid */
typedef struct {
- char node[MAXATOMLEN+1];
+ char node[MAXATOMLEN_UTF8];
+ enum erlang_char_encoding node_org_enc;
unsigned int num;
unsigned int serial;
unsigned int creation;
@@ -198,14 +203,16 @@ typedef struct {
/* a port */
typedef struct {
- char node[MAXATOMLEN+1];
+ char node[MAXATOMLEN_UTF8];
+ enum erlang_char_encoding node_org_enc;
unsigned int id;
unsigned int creation;
} erlang_port;
/* a ref */
typedef struct {
- char node[MAXATOMLEN+1];
+ char node[MAXATOMLEN_UTF8];
+ enum erlang_char_encoding node_org_enc;
int len;
unsigned int n[3];
unsigned int creation;
@@ -225,15 +232,16 @@ typedef struct {
long msgtype;
erlang_pid from;
erlang_pid to;
- char toname[MAXATOMLEN+1];
- char cookie[MAXATOMLEN+1];
+ char toname[MAXATOMLEN_UTF8];
+ char cookie[MAXATOMLEN_UTF8];
erlang_trace token;
} erlang_msg;
/* a fun */
typedef struct {
long arity;
- char module[MAXATOMLEN+1];
+ char module[MAXATOMLEN_UTF8];
+ enum erlang_char_encoding module_org_enc;
char md5[16];
long index;
long old_index;
@@ -258,7 +266,7 @@ typedef struct {
union {
long i_val;
double d_val;
- char atom_name[MAXATOMLEN+1];
+ char atom_name[MAXATOMLEN_UTF8];
erlang_pid pid;
erlang_port port;
erlang_ref ref;
@@ -427,9 +435,17 @@ int ei_encode_string_len(char *buf, int *index, const char *p, int len);
int ei_x_encode_string(ei_x_buff* x, const char* s);
int ei_x_encode_string_len(ei_x_buff* x, const char* s, int len);
int ei_encode_atom(char *buf, int *index, const char *p);
+int ei_encode_atom_as(char *buf, int *index, const char *p,
+ enum erlang_char_encoding from, enum erlang_char_encoding to);
int ei_encode_atom_len(char *buf, int *index, const char *p, int len);
+int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
+ enum erlang_char_encoding from, enum erlang_char_encoding to);
int ei_x_encode_atom(ei_x_buff* x, const char* s);
+int ei_x_encode_atom_as(ei_x_buff* x, const char* s,
+ enum erlang_char_encoding from, enum erlang_char_encoding to);
int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len);
+int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len,
+ enum erlang_char_encoding from, enum erlang_char_encoding to);
int ei_encode_binary(char *buf, int *index, const void *p, long len);
int ei_x_encode_binary(ei_x_buff* x, const void* s, int len);
int ei_encode_pid(char *buf, int *index, const erlang_pid *p);
@@ -479,6 +495,7 @@ int ei_decode_boolean(const char *buf, int *index, int *p);
int ei_decode_char(const char *buf, int *index, char *p);
int ei_decode_string(const char *buf, int *index, char *p);
int ei_decode_atom(const char *buf, int *index, char *p);
+int ei_decode_atom_as(const char *buf, int *index, char *p, int destlen, enum erlang_char_encoding want, enum erlang_char_encoding* was, enum erlang_char_encoding* result);
int ei_decode_binary(const char *buf, int *index, void *p, long *len);
int ei_decode_fun(const char* buf, int* index, erlang_fun* p);
void free_fun(erlang_fun* f);
diff --git a/lib/erl_interface/include/erl_interface.h b/lib/erl_interface/include/erl_interface.h
index 1c4a94700d..98acc0d71d 100644
--- a/lib/erl_interface/include/erl_interface.h
+++ b/lib/erl_interface/include/erl_interface.h
@@ -95,19 +95,24 @@
#define ERL_FLOAT_VALUE(x) ((x)->uval.fval.f)
-#define ERL_ATOM_PTR(x) ((x)->uval.aval.a)
-#define ERL_ATOM_SIZE(x) ((x)->uval.aval.len)
+#define ERL_ATOM_PTR(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.aval.d)
+#define ERL_ATOM_PTR_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.aval.d)
+#define ERL_ATOM_SIZE(x) erl_atom_size_latin1((Erl_Atom_data*) &(x)->uval.aval.d)
+#define ERL_ATOM_SIZE_UTF8(x) erl_atom_size_utf8((Erl_Atom_data*) &(x)->uval.aval.d)
-#define ERL_PID_NODE(x) ((x)->uval.pidval.node)
+#define ERL_PID_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.pidval.node)
+#define ERL_PID_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.pidval.node)
#define ERL_PID_NUMBER(x) ((x)->uval.pidval.number)
#define ERL_PID_SERIAL(x) ((x)->uval.pidval.serial)
#define ERL_PID_CREATION(x) ((x)->uval.pidval.creation)
-#define ERL_PORT_NODE(x) ((x)->uval.portval.node)
+#define ERL_PORT_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.portval.node)
+#define ERL_PORT_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.portval.node)
#define ERL_PORT_NUMBER(x) ((x)->uval.portval.number)
#define ERL_PORT_CREATION(x) ((x)->uval.portval.creation)
-#define ERL_REF_NODE(x) ((x)->uval.refval.node)
+#define ERL_REF_NODE(x) erl_atom_ptr_latin1((Erl_Atom_data*) &(x)->uval.refval.node)
+#define ERL_REF_NODE_UTF8(x) erl_atom_ptr_utf8((Erl_Atom_data*) &(x)->uval.refval.node)
#define ERL_REF_NUMBER(x) ((x)->uval.refval.n[0])
#define ERL_REF_NUMBERS(x) ((x)->uval.refval.n)
#define ERL_REF_LEN(x) ((x)->uval.refval.len)
@@ -182,15 +187,27 @@ typedef struct {
double f;
} Erl_Float;
+typedef struct {
+ char *utf8;
+ int lenU;
+ char *latin1;
+ int lenL;
+} Erl_Atom_data;
+
+char* erl_atom_ptr_latin1(Erl_Atom_data*);
+char* erl_atom_ptr_utf8(Erl_Atom_data*);
+int erl_atom_size_latin1(Erl_Atom_data*);
+int erl_atom_size_utf8(Erl_Atom_data*);
+char* erl_atom_init_latin1(Erl_Atom_data*, const char*);
+
typedef struct {
Erl_Header h;
- int len;
- char *a;
+ Erl_Atom_data d;
} Erl_Atom;
typedef struct {
Erl_Header h;
- char * node;
+ Erl_Atom_data node;
unsigned int number;
unsigned int serial;
unsigned char creation;
@@ -198,14 +215,14 @@ typedef struct {
typedef struct {
Erl_Header h;
- char * node;
+ Erl_Atom_data node;
unsigned int number;
unsigned char creation;
} Erl_Port;
typedef struct {
Erl_Header h;
- char * node;
+ Erl_Atom_data node;
int len;
unsigned int n[3];
unsigned char creation;
@@ -289,7 +306,7 @@ typedef struct _eterm {
} ETERM;
-#define MAXREGLEN 255 /* max length of registered (atom) name */
+#define MAXREGLEN (255*4) /* max length of registered (atom) name */
typedef struct {
int type; /* one of the message type constants in eiext.h */
@@ -409,6 +426,7 @@ unsigned char erl_ext_type(unsigned char*); /* Note: returned 'char' before R9C
unsigned char *erl_peek_ext(unsigned char*,int);
int erl_term_len(ETERM*);
+int cmp_latin1_vs_utf8(const char* sL, int lenL, const char* sU, int lenU);
/* -------------------------------------------------------------------- */
/* Wrappers around ei functions */
diff --git a/lib/erl_interface/src/connect/ei_connect.c b/lib/erl_interface/src/connect/ei_connect.c
index 34362b4b9f..a17257795e 100644
--- a/lib/erl_interface/src/connect/ei_connect.c
+++ b/lib/erl_interface/src/connect/ei_connect.c
@@ -459,6 +459,7 @@ int ei_connect_xinit(ei_cnode* ec, const char *thishostname,
/* memmove(&ec->this_ipaddr, thisipaddr, sizeof(ec->this_ipaddr)); */
strcpy(ec->self.node,thisnodename);
+ ec->self.node_org_enc = ERLANG_LATIN1;
ec->self.num = 0;
ec->self.serial = 0;
ec->self.creation = creation;
@@ -1332,7 +1333,9 @@ static int send_name_or_challenge(int fd, char *nodename,
| DFLAG_EXTENDED_PIDS_PORTS
| DFLAG_FUN_TAGS
| DFLAG_NEW_FUN_TAGS
- | DFLAG_NEW_FLOATS));
+ | DFLAG_NEW_FLOATS
+ | DFLAG_SMALL_ATOM_TAGS
+ | DFLAG_UTF8_ATOMS));
if (f_chall)
put32be(s, challenge);
memcpy(s, nodename, strlen(nodename));
diff --git a/lib/erl_interface/src/connect/ei_connect_int.h b/lib/erl_interface/src/connect/ei_connect_int.h
index 3c42b49b82..81c384e38d 100644
--- a/lib/erl_interface/src/connect/ei_connect_int.h
+++ b/lib/erl_interface/src/connect/ei_connect_int.h
@@ -102,6 +102,8 @@ extern int h_errno;
#define DFLAG_NEW_FUN_TAGS 0x80
#define DFLAG_EXTENDED_PIDS_PORTS 0x100
#define DFLAG_NEW_FLOATS 0x800
+#define DFLAG_SMALL_ATOM_TAGS 0x4000
+#define DFLAG_UTF8_ATOMS 0x10000
ei_cnode *ei_fd_to_cnode(int fd);
int ei_distversion(int fd);
diff --git a/lib/erl_interface/src/connect/eirecv.c b/lib/erl_interface/src/connect/eirecv.c
index 86852f947d..075f78e3d2 100644
--- a/lib/erl_interface/src/connect/eirecv.c
+++ b/lib/erl_interface/src/connect/eirecv.c
@@ -108,7 +108,7 @@ ei_recv_internal (int fd,
switch (msg->msgtype) {
case ERL_SEND: /* { SEND, Cookie, ToPid } */
if (ei_tracelevel >= 4) show_this_msg = 1;
- if (ei_decode_atom(header,&index,msg->cookie)
+ if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg->to))
{
erl_errno = EIO;
@@ -120,8 +120,8 @@ ei_recv_internal (int fd,
case ERL_REG_SEND: /* { REG_SEND, From, Cookie, ToName } */
if (ei_tracelevel >= 4) show_this_msg = 1;
if (ei_decode_pid(header,&index,&msg->from)
- || ei_decode_atom(header,&index,msg->cookie)
- || ei_decode_atom(header,&index,msg->toname))
+ || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL))
{
erl_errno = EIO;
return -1;
@@ -157,7 +157,7 @@ ei_recv_internal (int fd,
case ERL_SEND_TT: /* { SEND_TT, Cookie, ToPid, TraceToken } */
if (ei_tracelevel >= 4) show_this_msg = 1;
- if (ei_decode_atom(header,&index,msg->cookie)
+ if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg->to)
|| ei_decode_trace(header,&index,&msg->token))
{
@@ -171,8 +171,8 @@ ei_recv_internal (int fd,
case ERL_REG_SEND_TT: /* { REG_SEND_TT, From, Cookie, ToName, TraceToken } */
if (ei_tracelevel >= 4) show_this_msg = 1;
if (ei_decode_pid(header,&index,&msg->from)
- || ei_decode_atom(header,&index,msg->cookie)
- || ei_decode_atom(header,&index,msg->toname)
+ || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL)
|| ei_decode_trace(header,&index,&msg->token))
{
erl_errno = EIO;
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index 84edf1766a..2ada418243 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -21,76 +21,155 @@
#include "eiext.h"
#include "putget.h"
-static int utf8_to_latin1(char* dest, const char* source, unsigned len);
int ei_decode_atom(const char *buf, int *index, char *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- int len;
-
- switch (get8(s)) {
- case ERL_ATOM_EXT:
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- if (p) {
- memmove(p,s,len);
- p[len] = (char)0;
- }
- break;
-
- case ERL_SMALL_ATOM_EXT:
- len = get8(s);
- if (p) {
- memmove(p,s,len);
- p[len] = (char)0;
- }
- break;
-
- case ERL_UNICODE_ATOM_EXT:
- len = get16be(s);
-
- if (len > 2*MAXATOMLEN) return -1;
-
- if (p && utf8_to_latin1(p, s, len) < 0) return -1;
- break;
-
- default:
- return -1;
- }
-
- s += len;
- *index += s-s0;
- return 0;
+ return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL);
}
-int ei_internal_get_atom(const char** bufp, char* p)
+int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
+ enum erlang_char_encoding want_enc,
+ enum erlang_char_encoding* was_encp,
+ enum erlang_char_encoding* res_encp)
{
- int ix = 0;
- if (ei_decode_atom(*bufp, &ix, p) < 0) return -1;
- *bufp += ix;
+ const char *s = buf + *index;
+ const char *s0 = s;
+ int len;
+ enum erlang_char_encoding got_enc;
+
+ switch (get8(s)) {
+ case ERL_ATOM_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_SMALL_ATOM_EXT:
+ len = get8(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_ATOM_UTF8_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ len = get8(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ default:
+ return -1;
+ }
+
+ if (want_enc == got_enc || want_enc == ERLANG_WHATEVER || want_enc == ERLANG_ASCII) {
+ int i, found_non_ascii = 0;
+ if (len >= destlen)
+ return -1;
+ for (i=0; i 0 && dest < dest_end) {
- if ((source[0] & 0x80) == 0) {
- *dest++ = *source++;
+ while (slen > 0) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ ++src;
--slen;
}
else if (slen > 1 &&
- (source[0] & 0xFE) == 0xC2 &&
- (source[1] & 0xC0) == 0x80) {
- *dest++ = (char) ((source[0] << 6) | (source[1] & 0x3F));
- source += 2;
+ (src[0] & 0xFE) == 0xC2 &&
+ (src[1] & 0xC0) == 0x80) {
+ if (dst_start) {
+ *dst = (char) ((src[0] << 6) | (src[1] & 0x3F));
+ }
+ ++dst;
+ src += 2;
slen -= 2;
+ found_non_ascii = 1;
}
else return -1;
}
- *dest = 0;
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
+}
+
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
+ enum erlang_char_encoding* res_encp)
+{
+ const char* const src_end = src + slen;
+ const char* const dst_start = dst;
+ const char* const dst_end = dst + destlen;
+ int found_non_ascii = 0;
+
+ while (src < src_end) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ }
+ else {
+ if (dst_start) {
+ unsigned char ch = *src;
+ dst[0] = 0xC0 | (ch >> 6);
+ dst[1] = 0x80 | (ch & 0x3F);
+ }
+ dst += 2;
+ found_non_ascii = 1;
+ }
+ ++src;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
+}
+
+
+
+int ei_internal_get_atom(const char** bufp, char* p,
+ enum erlang_char_encoding* was_encp)
+{
+ int ix = 0;
+ if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0)
+ return -1;
+ *bufp += ix;
return 0;
}
+
diff --git a/lib/erl_interface/src/decode/decode_boolean.c b/lib/erl_interface/src/decode/decode_boolean.c
index 0a7a06f1d4..f20690249b 100644
--- a/lib/erl_interface/src/decode/decode_boolean.c
+++ b/lib/erl_interface/src/decode/decode_boolean.c
@@ -24,12 +24,11 @@
/* c non-zero -> erlang "true" atom, otherwise "false" */
int ei_decode_boolean(const char *buf, int *index, int *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- char tbuf[MAXATOMLEN+1];
+ char tbuf[6];
int t;
- if (get_atom(&s, tbuf) < 0) return -1;
+ if (ei_decode_atom_as(buf, index, tbuf, sizeof(tbuf), ERLANG_ASCII, NULL, NULL) < 0)
+ return -1;
if (memcmp(tbuf, "true", 5) == 0)
t = 1;
@@ -39,7 +38,6 @@ int ei_decode_boolean(const char *buf, int *index, int *p)
return -1;
if (p) *p = t;
- *index += s-s0;
return 0;
}
diff --git a/lib/erl_interface/src/decode/decode_fun.c b/lib/erl_interface/src/decode/decode_fun.c
index 64fb9e86d8..7bbef5db44 100644
--- a/lib/erl_interface/src/decode/decode_fun.c
+++ b/lib/erl_interface/src/decode/decode_fun.c
@@ -42,7 +42,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (ei_decode_pid(s, &ix, (p == NULL ? (erlang_pid*)NULL : &p->pid)) < 0)
return -1;
/* then the module (atom) */
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->index)) < 0)
@@ -84,7 +85,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (p != NULL) p->n_free_vars = i;
/* then the module (atom) */
ix = 0;
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the old_index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->old_index)) < 0)
diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c
index a762ae499e..e79952195d 100644
--- a/lib/erl_interface/src/decode/decode_pid.c
+++ b/lib/erl_interface/src/decode/decode_pid.c
@@ -26,12 +26,11 @@ int ei_decode_pid(const char *buf, int *index, erlang_pid *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PID_EXT) return -1;
/* first the nodename */
- if (get_atom(&s, p->node) < 0) return -1;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c
index 6eb2bc9197..5fd96b51a4 100644
--- a/lib/erl_interface/src/decode/decode_port.c
+++ b/lib/erl_interface/src/decode/decode_port.c
@@ -25,12 +25,11 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PORT_EXT) return -1;
/* first the nodename */
- if (get_atom(&s, p->node) < 0) return -1;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c
index df3c30777b..7294e5d239 100644
--- a/lib/erl_interface/src/decode/decode_ref.c
+++ b/lib/erl_interface/src/decode/decode_ref.c
@@ -26,13 +26,13 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int count, len, i;
+ int count, i;
switch (get8(s)) {
case ERL_REFERENCE_EXT:
/* nodename */
- if (get_atom(&s, p->node) < 0) return -1;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
@@ -53,7 +53,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
if (p) p->len = count;
/* then the nodename */
- if (get_atom(&s, p->node) < 0) return -1;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* creation */
if (p) {
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index 6f41f045e0..a3d7c4c759 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -26,25 +26,95 @@ int ei_encode_atom(char *buf, int *index, const char *p)
{
size_t len = strlen(p);
- if (len >= INT_MAX) return -1;
- return ei_encode_atom_len(buf, index, p, len);
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN - 1;
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}
int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
+{
+ /* This function is documented to truncate at MAXATOMLEN (256) */
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN - 1;
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+}
+
+int ei_encode_atom_as(char *buf, int *index, const char *p,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
+ return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
+}
+
+int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
{
char *s = buf + *index;
char *s0 = s;
+ int offs;
- /* This function is documented to truncate at MAXATOMLEN (256) */
- if (len > MAXATOMLEN)
- len = MAXATOMLEN;
+ if (from_enc == ERLANG_LATIN1 && len >= MAXATOMLEN) {
+ return -1;
+ }
- if (!buf) s += 3;
- else {
- put8(s,ERL_ATOM_EXT);
- put16be(s,len);
+ switch(to_enc) {
+ case ERLANG_LATIN1:
+ if (buf) {
+ put8(s,ERL_ATOM_EXT);
+ switch (from_enc) {
+ case ERLANG_UTF8:
+ len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
+ if (len < 0) return -1;
+ break;
+ case ERLANG_ASCII:
+ case ERLANG_LATIN1:
+ memcpy(s+2, p, len);
+ break;
+ default:
+ return -1;
+ }
+ put16be(s,len);
+ }
+ else {
+ s += 3;
+ if (from_enc == ERLANG_UTF8) {
+ len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
+ if (len < 0) return -1;
+ }
+ }
+ break;
+
+ case ERLANG_UTF8:
+ offs = 1 + 1;
+ switch (from_enc) {
+ case ERLANG_LATIN1:
+ if (len >= 256/2) offs++;
+ len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
+ break;
+ case ERLANG_ASCII:
+ case ERLANG_UTF8:
+ if (len >= 256) offs++;
+ if (buf) memcpy(s+offs, p, len);
+ break;
+ default:
+ return -1;
+ }
+ if (buf) {
+ if (offs == 2) {
+ put8(s, ERL_SMALL_ATOM_UTF8_EXT);
+ put8(s, len);
+ }
+ else {
+ put8(s, ERL_ATOM_UTF8_EXT);
+ put16be(s, len);
+ }
+ }
+ else s+= offs;
+ break;
- memmove(s,p,len); /* unterminated string */
+ default:
+ return -1;
}
s += len;
@@ -53,3 +123,13 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
return 0;
}
+int
+ei_internal_put_atom(char** bufp, const char* p, int slen,
+ enum erlang_char_encoding to_enc)
+{
+ int ix = 0;
+ if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
+ return -1;
+ *bufp += ix;
+ return 0;
+}
diff --git a/lib/erl_interface/src/encode/encode_fun.c b/lib/erl_interface/src/encode/encode_fun.c
index 54ee2083d6..4daee32648 100644
--- a/lib/erl_interface/src/encode/encode_fun.c
+++ b/lib/erl_interface/src/encode/encode_fun.c
@@ -35,7 +35,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p)
ix += sizeof(char) + 4;
if (ei_encode_pid(buf, &ix, &p->pid) < 0)
return -1;
- if (ei_encode_atom(buf, &ix, p->module) < 0)
+ if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0)
return -1;
if (ei_encode_long(buf, &ix, p->index) < 0)
return -1;
@@ -60,7 +60,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p)
} else
size_p = NULL;
ix += 1 + 4 + 1 + sizeof(p->md5) + 4 + 4;
- if (ei_encode_atom(buf, &ix, p->module) < 0)
+ if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0)
return -1;
if (ei_encode_long(buf, &ix, p->old_index) < 0)
return -1;
diff --git a/lib/erl_interface/src/encode/encode_pid.c b/lib/erl_interface/src/encode/encode_pid.c
index ee7f235c17..0cf3ef4efb 100644
--- a/lib/erl_interface/src/encode/encode_pid.c
+++ b/lib/erl_interface/src/encode/encode_pid.c
@@ -24,29 +24,23 @@
int ei_encode_pid(char *buf, int *index, const erlang_pid *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
-
- if (!buf) s += 13 + len;
- else {
- put8(s,ERL_PID_EXT);
- /* first the nodename */
- put8(s,ERL_ATOM_EXT);
+ ++(*index); /* skip ERL_PID_EXT */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, p->node_org_enc) < 0)
+ return -1;
+
+ if (buf) {
+ put8(s,ERL_PID_EXT);
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put32be(s,p->num & 0x7fff); /* 15 bits */
put32be(s,p->serial & 0x1fff); /* 13 bits */
put8(s,(p->creation & 0x03)); /* 2 bits */
}
-
- *index += s-s0;
-
+
+ *index += 4 + 4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/encode/encode_port.c b/lib/erl_interface/src/encode/encode_port.c
index fbbb33182e..2bf9e26d78 100644
--- a/lib/erl_interface/src/encode/encode_port.c
+++ b/lib/erl_interface/src/encode/encode_port.c
@@ -24,28 +24,23 @@
int ei_encode_port(char *buf, int *index, const erlang_port *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
-
- if (!buf) s += 9 + len;
- else {
- put8(s,ERL_PORT_EXT);
- /* first the nodename */
- put8(s,ERL_ATOM_EXT);
+ ++(*index); /* skip ERL_PORT_EXT */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8,
+ p->node_org_enc) < 0) {
+ return -1;
+ }
+ if (buf) {
+ put8(s,ERL_PORT_EXT);
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put32be(s,p->id & 0x0fffffff /* 28 bits */);
put8(s,(p->creation & 0x03));
}
- *index += s-s0;
-
+ *index += 4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/encode/encode_ref.c b/lib/erl_interface/src/encode/encode_ref.c
index 292b452864..e8b3173315 100644
--- a/lib/erl_interface/src/encode/encode_ref.c
+++ b/lib/erl_interface/src/encode/encode_ref.c
@@ -24,36 +24,32 @@
int ei_encode_ref(char *buf, int *index, const erlang_ref *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
int i;
+ (*index) += 1 + 2; /* skip to node atom */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8,
+ p->node_org_enc) < 0) {
+ return -1;
+ }
+
/* Always encode as an extended reference; all participating parties
are now expected to be able to decode extended references. */
- if (!buf) s += 1 + 2 + (3+len) + p->len*4 + 1;
- else {
+ if (buf) {
put8(s,ERL_NEW_REFERENCE_EXT);
/* first, number of integers */
put16be(s, p->len);
/* then the nodename */
- put8(s,ERL_ATOM_EXT);
-
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put8(s,(p->creation & 0x03));
for (i = 0; i < p->len; i++)
put32be(s,p->n[i]);
-
- }
-
- *index += s-s0;
+ }
+ *index += p->len*4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/legacy/erl_connect.c b/lib/erl_interface/src/legacy/erl_connect.c
index 41d4fa3138..be83fa8469 100644
--- a/lib/erl_interface/src/legacy/erl_connect.c
+++ b/lib/erl_interface/src/legacy/erl_connect.c
@@ -247,9 +247,15 @@ int erl_send(int fd, ETERM *to ,ETERM *msg)
erl_errno = EINVAL;
return -1;
}
-
- strncpy(topid.node, (char *)ERL_PID_NODE(to), sizeof(topid.node));
- topid.node[sizeof(topid.node)-1] = '\0';
+
+ if (to->uval.pidval.node.latin1) {
+ strcpy(topid.node, to->uval.pidval.node.latin1);
+ topid.node_org_enc = ERLANG_LATIN1;
+ }
+ else {
+ strcpy(topid.node, to->uval.pidval.node.utf8);
+ topid.node_org_enc = ERLANG_UTF8;
+ }
topid.num = ERL_PID_NUMBER(to);
topid.serial = ERL_PID_SERIAL(to);
topid.creation = ERL_PID_CREATION(to);
@@ -263,7 +269,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg)
erlang_msg msg;
int r;
- msg.from.node[0] = msg.to.node[0] = '\0';
+ msg.from.node[0] = msg.to.node[0] = msg.toname[0] = '\0';
r = ei_do_receive_msg(fd, 0, &msg, x, 0);
if (r == ERL_MSG) {
@@ -299,7 +305,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg)
emsg->to = erl_mk_pid(msg.to.node, msg.to.num, msg.to.serial, msg.to.creation);
else
emsg->to = NULL;
- memcpy(emsg->to_name, msg.toname, MAXATOMLEN+1);
+ strcpy(emsg->to_name, msg.toname);
return r;
}
diff --git a/lib/erl_interface/src/legacy/erl_eterm.c b/lib/erl_interface/src/legacy/erl_eterm.c
index 8d559f0f55..aa0fd5ddcf 100644
--- a/lib/erl_interface/src/legacy/erl_eterm.c
+++ b/lib/erl_interface/src/legacy/erl_eterm.c
@@ -36,6 +36,7 @@
#include "erl_error.h"
#include "erl_internal.h"
#include "ei_internal.h"
+#include "putget.h"
#define ERL_IS_BYTE(x) (ERL_IS_INTEGER(x) && (ERL_INT_VALUE(x) & ~0xFF) == 0)
@@ -142,9 +143,7 @@ ETERM *erl_mk_atom (const char *s)
ep = erl_alloc_eterm(ERL_ATOM);
ERL_COUNT(ep) = 1;
- ERL_ATOM_SIZE(ep) = strlen(s);
- if ((ERL_ATOM_PTR(ep) = strsave(s)) == NULL)
- {
+ if (erl_atom_init_latin1(&ep->uval.aval.d, s) == NULL) {
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
@@ -152,6 +151,65 @@ ETERM *erl_mk_atom (const char *s)
return ep;
}
+char* erl_atom_ptr_latin1(Erl_Atom_data* a)
+{
+ if (a->latin1 == NULL) {
+ enum erlang_char_encoding enc;
+ a->lenL = utf8_to_latin1(NULL, a->utf8, a->lenU, a->lenU, &enc);
+ if (a->lenL < 0) {
+ a->lenL = 0;
+ return NULL;
+ }
+ if (enc == ERLANG_ASCII) {
+ a->latin1 = a->utf8;
+ }
+ else {
+ a->latin1 = malloc(a->lenL+1);
+ utf8_to_latin1(a->latin1, a->utf8, a->lenU, a->lenL, NULL);
+ a->latin1[a->lenL] = '\0';
+ }
+ }
+ return a->latin1;
+}
+
+char* erl_atom_ptr_utf8(Erl_Atom_data* a)
+{
+ if (a->utf8 == NULL) {
+ int dlen = a->lenL * 2; /* over estimation */
+ a->utf8 = malloc(dlen + 1);
+ a->lenU = latin1_to_utf8(a->utf8, a->latin1, a->lenL, dlen, NULL);
+ a->utf8[a->lenU] = '\0';
+ }
+ return a->utf8;
+
+}
+int erl_atom_size_latin1(Erl_Atom_data* a)
+{
+ if (a->latin1 == NULL) {
+ erl_atom_ptr_latin1(a);
+ }
+ return a->lenL;
+}
+int erl_atom_size_utf8(Erl_Atom_data* a)
+{
+ if (a->utf8 == NULL) {
+ erl_atom_ptr_utf8(a);
+ }
+ return a->lenU;
+}
+char* erl_atom_init_latin1(Erl_Atom_data* a, const char* s)
+{
+ a->lenL = strlen(s);
+ if ((a->latin1 = strsave(s)) == NULL)
+ {
+ return NULL;
+ }
+ a->utf8 = NULL;
+ a->lenU = 0;
+ return a->latin1;
+}
+
+
/*
* Given a string as input, creates a list.
*/
@@ -208,12 +266,19 @@ ETERM *erl_mk_pid(const char *node,
ep = erl_alloc_eterm(ERL_PID);
ERL_COUNT(ep) = 1;
- if ((ERL_PID_NODE(ep) = strsave(node)) == NULL)
+ if (erl_atom_init_latin1(&ep->uval.pidval.node, node) == NULL)
{
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
}
+ erl_mk_pid_helper(ep, number, serial, creation);
+ return ep;
+}
+
+void erl_mk_pid_helper(ETERM *ep, unsigned int number,
+ unsigned int serial, unsigned char creation)
+{
ERL_PID_NUMBER(ep) = number & 0x7fff; /* 15 bits */
if (ei_internal_use_r9_pids_ports()) {
ERL_PID_SERIAL(ep) = serial & 0x07; /* 3 bits */
@@ -222,7 +287,6 @@ ETERM *erl_mk_pid(const char *node,
ERL_PID_SERIAL(ep) = serial & 0x1fff; /* 13 bits */
}
ERL_PID_CREATION(ep) = creation & 0x03; /* 2 bits */
- return ep;
}
/*
@@ -239,12 +303,18 @@ ETERM *erl_mk_port(const char *node,
ep = erl_alloc_eterm(ERL_PORT);
ERL_COUNT(ep) = 1;
- if ((ERL_PORT_NODE(ep) = strsave(node)) == NULL)
+ if (erl_atom_init_latin1(&ep->uval.portval.node, node) == NULL)
{
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
}
+ erl_mk_port_helper(ep, number, creation);
+ return ep;
+}
+
+void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation)
+{
if (ei_internal_use_r9_pids_ports()) {
ERL_PORT_NUMBER(ep) = number & 0x3ffff; /* 18 bits */
}
@@ -252,29 +322,29 @@ ETERM *erl_mk_port(const char *node,
ERL_PORT_NUMBER(ep) = number & 0x0fffffff; /* 18 bits */
}
ERL_PORT_CREATION(ep) = creation & 0x03; /* 2 bits */
- return ep;
}
/*
* Create any kind of reference.
*/
-ETERM *__erl_mk_reference (const char *node,
+ETERM *__erl_mk_reference (ETERM* t,
+ const char *node,
size_t len,
unsigned int n[],
unsigned char creation)
{
- ETERM * t;
-
- if (node == NULL) return NULL;
-
- t = erl_alloc_eterm(ERL_REF);
- ERL_COUNT(t) = 1;
-
- if ((ERL_REF_NODE(t) = strsave(node)) == NULL)
- {
- erl_free_term(t);
- erl_errno = ENOMEM;
- return NULL;
+ if (t == NULL) {
+ if (node == NULL) return NULL;
+
+ t = erl_alloc_eterm(ERL_REF);
+ ERL_COUNT(t) = 1;
+
+ if (erl_atom_init_latin1(&t->uval.refval.node, node) == NULL)
+ {
+ erl_free_term(t);
+ erl_errno = ENOMEM;
+ return NULL;
+ }
}
ERL_REF_LEN(t) = len;
ERL_REF_NUMBERS(t)[0] = n[0] & 0x3ffff; /* 18 bits */
@@ -294,7 +364,7 @@ ETERM *erl_mk_ref (const char *node,
{
unsigned int n[3] = {0, 0, 0};
n[0] = number;
- return __erl_mk_reference(node, 1, n, creation);
+ return __erl_mk_reference(NULL, node, 1, n, creation);
}
/*
@@ -307,7 +377,7 @@ erl_mk_long_ref (const char *node,
{
unsigned int n[3] = {0, 0, 0};
n[0] = n3; n[1] = n2; n[2] = n1;
- return __erl_mk_reference(node, 3, n, creation);
+ return __erl_mk_reference(NULL, node, 3, n, creation);
}
/*
@@ -758,6 +828,28 @@ int erl_iolist_length (const ETERM* term)
return -1;
}
+static int erl_atom_copy(Erl_Atom_data* dst, const Erl_Atom_data* src)
+{
+ if (src->latin1 == src->utf8) {
+ dst->latin1 = dst->utf8 = strsave(src->latin1);
+ dst->lenL = dst->lenU = strlen(src->latin1);
+ }
+ else if (src->latin1) {
+ dst->latin1 = strsave(src->latin1);
+ dst->lenL = strlen(src->latin1);
+ dst->utf8 = NULL;
+ dst->lenU = 0;
+ }
+ else {
+ dst->utf8 = strsave(src->utf8);
+ dst->lenU = strlen(src->utf8);
+ dst->latin1 = NULL;
+ dst->lenL = 0;
+ }
+ return (dst->latin1 != NULL || dst->utf8 == NULL);
+}
+
+
/*
* Return a brand NEW COPY of an ETERM.
*/
@@ -796,9 +888,7 @@ ETERM *erl_copy_term(const ETERM *ep)
ERL_FLOAT_VALUE(cp) = ERL_FLOAT_VALUE(ep);
break;
case ERL_ATOM:
- ERL_ATOM_SIZE(cp) = ERL_ATOM_SIZE(ep);
- ERL_ATOM_PTR(cp) = strsave(ERL_ATOM_PTR(ep));
- if (ERL_ATOM_PTR(cp) == NULL)
+ if (!erl_atom_copy(&cp->uval.aval.d, &ep->uval.aval.d))
{
erl_free_term(cp);
erl_errno = ENOMEM;
@@ -810,17 +900,17 @@ ETERM *erl_copy_term(const ETERM *ep)
name and plug in. Somewhat ugly (also done with port and
ref below). */
memcpy(&cp->uval.pidval, &ep->uval.pidval, sizeof(Erl_Pid));
- ERL_PID_NODE(cp) = strsave(ERL_PID_NODE(ep));
+ erl_atom_copy(&cp->uval.pidval.node, &ep->uval.pidval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_PORT:
memcpy(&cp->uval.portval, &ep->uval.portval, sizeof(Erl_Port));
- ERL_PORT_NODE(cp) = strsave(ERL_PORT_NODE(ep));
+ erl_atom_copy(&cp->uval.portval.node, &ep->uval.portval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_REF:
memcpy(&cp->uval.refval, &ep->uval.refval, sizeof(Erl_Ref));
- ERL_REF_NODE(cp) = strsave(ERL_REF_NODE(ep));
+ erl_atom_copy(&cp->uval.refval.node, &ep->uval.refval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_LIST:
@@ -883,29 +973,29 @@ int erl_print_term(FILE *fp, const ETERM *ep)
j = i = doquote = 0;
switch(ERL_TYPE(ep))
{
- case ERL_ATOM:
+ case ERL_ATOM: {
+ char* adata = ERL_ATOM_PTR(ep);
/* FIXME: what if some weird locale is in use? */
- if (!islower((int)ERL_ATOM_PTR(ep)[0]))
+ if (!islower(adata[0]))
doquote = 1;
for (i = 0; !doquote && i < ERL_ATOM_SIZE(ep); i++)
{
- doquote = !(isalnum((int)ERL_ATOM_PTR(ep)[i])
- || (ERL_ATOM_PTR(ep)[i] == '_'));
+ doquote = !(isalnum(adata[i]) || (adata[i] == '_'));
}
if (doquote) {
putc('\'', fp);
ch_written++;
}
- fputs(ERL_ATOM_PTR(ep), fp);
+ fputs(adata, fp);
ch_written += ERL_ATOM_SIZE(ep);
if (doquote) {
putc('\'', fp);
ch_written++;
}
break;
-
+ }
case ERL_VARIABLE:
if (!isupper((int)ERL_VAR_NAME(ep)[0])) {
doquote = 1;
diff --git a/lib/erl_interface/src/legacy/erl_eterm.h b/lib/erl_interface/src/legacy/erl_eterm.h
index 41b008f04f..2e8129d9cd 100644
--- a/lib/erl_interface/src/legacy/erl_eterm.h
+++ b/lib/erl_interface/src/legacy/erl_eterm.h
@@ -55,7 +55,9 @@ typedef struct _heapmark {
} Erl_HeapMark;
-ETERM * __erl_mk_reference(const char *, size_t, unsigned int n[], unsigned char);
+void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation);
+void erl_mk_pid_helper(ETERM*, unsigned,unsigned, unsigned char);
+ETERM * __erl_mk_reference(ETERM*, const char *, size_t, unsigned int n[], unsigned char);
int erl_current_fix_desc(void);
#endif /* _ERL_ETERM_H */
diff --git a/lib/erl_interface/src/legacy/erl_format.c b/lib/erl_interface/src/legacy/erl_format.c
index dc85806c36..533241e396 100644
--- a/lib/erl_interface/src/legacy/erl_format.c
+++ b/lib/erl_interface/src/legacy/erl_format.c
@@ -574,10 +574,22 @@ static int ematch(ETERM *p, ETERM *t)
switch (type_p) {
- case ERL_ATOM:
- return p->uval.aval.len == t->uval.aval.len &&
- memcmp(p->uval.aval.a, t->uval.aval.a, p->uval.aval.len) == 0;
-
+ case ERL_ATOM: {
+ Erl_Atom_data* pa = &p->uval.aval.d;
+ Erl_Atom_data* ta = &t->uval.aval.d;
+ if (pa->utf8 && ta->utf8) {
+ return pa->lenU == ta->lenU && memcmp(pa->utf8, ta->utf8, pa->lenU)==0;
+ }
+ else if (pa->latin1 && ta->latin1) {
+ return pa->lenL == ta->lenL && memcmp(pa->latin1, ta->latin1, pa->lenL)==0;
+ }
+ else if (pa->latin1) {
+ return cmp_latin1_vs_utf8(pa->latin1, pa->lenL, ta->utf8, ta->lenU)==0;
+ }
+ else {
+ return cmp_latin1_vs_utf8(ta->latin1, ta->lenL, pa->utf8, pa->lenU)==0;
+ }
+ }
case ERL_VARIABLE:
if (strcmp(p->uval.vval.name, "_") == 0) /* anon. variable */
return ERL_TRUE;
diff --git a/lib/erl_interface/src/legacy/erl_malloc.c b/lib/erl_interface/src/legacy/erl_malloc.c
index f51a6c69b3..d09239e02d 100644
--- a/lib/erl_interface/src/legacy/erl_malloc.c
+++ b/lib/erl_interface/src/legacy/erl_malloc.c
@@ -112,6 +112,18 @@ do { \
(ptr) = NULL; \
} while (0)
+static void erl_atom_free(Erl_Atom_data* p)
+{
+ erl_free(p->latin1);
+ if (p->utf8 != p->latin1) {
+ erl_free(p->utf8);
+ }
+ p->latin1 = NULL;
+ p->utf8 = NULL;
+ p->lenL = 0;
+ p->lenU = 0;
+}
+
static void _erl_free_term (ETERM *ep, int external, int compound)
{
restart:
@@ -122,7 +134,7 @@ restart:
switch(ERL_TYPE(ep))
{
case ERL_ATOM:
- FREE_AND_CLEAR(ERL_ATOM_PTR(ep));
+ erl_atom_free(&ep->uval.aval.d);
break;
case ERL_VARIABLE:
FREE_AND_CLEAR(ERL_VAR_NAME(ep));
@@ -161,13 +173,13 @@ restart:
FREE_AND_CLEAR(ERL_BIN_PTR(ep));
break;
case ERL_PID:
- FREE_AND_CLEAR(ERL_PID_NODE(ep));
+ erl_atom_free(&ep->uval.pidval.node);
break;
case ERL_PORT:
- FREE_AND_CLEAR(ERL_PORT_NODE(ep));
+ erl_atom_free(&ep->uval.portval.node);
break;
case ERL_REF:
- FREE_AND_CLEAR(ERL_REF_NODE(ep));
+ erl_atom_free(&ep->uval.refval.node);
break;
case ERL_EMPTY_LIST:
case ERL_INTEGER:
diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c
index 775d7e82ca..884e9d421b 100644
--- a/lib/erl_interface/src/legacy/erl_marshal.c
+++ b/lib/erl_interface/src/legacy/erl_marshal.c
@@ -44,6 +44,9 @@ int erl_fp_compare(unsigned *a, unsigned *b);
static void erl_long_to_fp(long l, unsigned *d);
#endif
+static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2);
+static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1, unsigned char* s2, int l2, unsigned char tag2);
+
/* Used when comparing two encoded byte arrays */
/* this global data is ok (from threading point of view) since it is
* initialized once and never changed
@@ -111,8 +114,9 @@ void erl_init_marshal(void)
cmp_array[ERL_SMALL_BIG_EXT] = ERL_NUM_CMP;
cmp_array[ERL_LARGE_BIG_EXT] = ERL_NUM_CMP;
cmp_array[ERL_ATOM_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_ATOM_UTF8_EXT] = ERL_ATOM_CMP;
cmp_array[ERL_SMALL_ATOM_EXT] = ERL_ATOM_CMP;
- cmp_array[ERL_UNICODE_ATOM_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_SMALL_ATOM_UTF8_EXT] = ERL_ATOM_CMP;
cmp_array[ERL_REFERENCE_EXT] = ERL_REF_CMP;
cmp_array[ERL_NEW_REFERENCE_EXT] = ERL_REF_CMP;
cmp_array[ERL_FUN_EXT] = ERL_FUN_CMP;
@@ -162,6 +166,21 @@ static int erl_length_x(const ETERM *ep) {
*==============================================================
*/
+static void encode_atom(Erl_Atom_data* a, unsigned char **ext)
+{
+ int ix = 0;
+ if (a->latin1) {
+ ei_encode_atom_len_as((char*)*ext, &ix, a->latin1, a->lenL,
+ ERLANG_LATIN1, ERLANG_LATIN1);
+ }
+ else if (ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
+ ERLANG_UTF8, ERLANG_LATIN1) < 0) {
+ ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
+ ERLANG_UTF8, ERLANG_UTF8);
+ }
+ *ext += ix;
+}
+
/*
* The actual ENCODE engine.
* Returns 0 on success, otherwise 1.
@@ -176,12 +195,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
switch(ERL_TYPE(ep))
{
case ERL_ATOM:
- i = ep->uval.aval.len;
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy((void *) *ext, (const void *) ep->uval.aval.a, i);
- *ext += i;
+ encode_atom(&ep->uval.aval.d, ext);
return 0;
case ERL_INTEGER:
@@ -292,12 +306,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
case ERL_PID:
*(*ext)++ = ERL_PID_EXT;
/* First poke in node as an atom */
- i = strlen((char *)ERL_PID_NODE(ep));
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_PID_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.pidval.node, ext);
/* And then fill in the integer fields */
i = ERL_PID_NUMBER(ep);
*(*ext)++ = (i >> 24) &0xff;
@@ -325,11 +334,8 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
*(*ext)++ = (len >> 8) &0xff;
*(*ext)++ = len &0xff;
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >> 8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_REF_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.refval.node, ext);
+
*(*ext)++ = ERL_REF_CREATION(ep);
/* Then the integer fields */
for (j = 0; j < ERL_REF_LEN(ep); j++) {
@@ -344,12 +350,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
case ERL_PORT:
*(*ext)++ = ERL_PORT_EXT;
/* First poke in node as an atom */
- i = strlen((char *)ERL_PORT_NODE(ep));
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_PORT_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.portval.node, ext);
/* Then the integer fields */
i = ERL_PORT_NUMBER(ep);
*(*ext)++ = (i >> 24) &0xff;
@@ -500,6 +501,16 @@ int erl_term_len(ETERM *ep)
return 1+erl_term_len_helper(ep, 4);
}
+static int atom_len_helper(Erl_Atom_data* a)
+{
+ if (erl_atom_ptr_latin1(a)) {
+ return 1 + 2 + a->lenL; /* ERL_ATOM_EXT */
+ }
+ else {
+ return 1 + 1 + (a->lenU > 255) + a->lenU;
+ }
+}
+
static int erl_term_len_helper(ETERM *ep, int dist)
{
int len = 0;
@@ -511,8 +522,7 @@ static int erl_term_len_helper(ETERM *ep, int dist)
if (ep) {
switch (ERL_TYPE(ep)) {
case ERL_ATOM:
- i = ep->uval.aval.len;
- len = i + 3;
+ len = atom_len_helper(&ep->uval.aval.d);
break;
case ERL_INTEGER:
@@ -544,20 +554,15 @@ static int erl_term_len_helper(ETERM *ep, int dist)
break;
case ERL_PID:
- /* 1 + N + 4 + 4 + 1 where N = 3 + strlen */
- i = strlen((char *)ERL_PID_NODE(ep));
- len = 13 + i;
+ len = 1 + atom_len_helper(&ep->uval.pidval.node) + 4 + 4 + 1;
break;
case ERL_REF:
- i = strlen((char *)ERL_REF_NODE(ep));
- len = 1 + 2 + (i+3) + 1 + ERL_REF_LEN(ep) * 4;
+ len = 1 + 2 + atom_len_helper(&ep->uval.refval.node) + 1 + ERL_REF_LEN(ep) * 4;
break;
case ERL_PORT:
- /* 1 + N + 4 + 1 where N = 3 + strlen */
- i = strlen((char *)ERL_PORT_NODE(ep));
- len = 9 + i;
+ len = 1 + atom_len_helper(&ep->uval.portval.node) + 4 + 1;
break;
case ERL_EMPTY_LIST:
@@ -651,11 +656,33 @@ int erl_encode_buf(ETERM *ep, unsigned char **ext)
} /* erl_encode_buf */
-static int read_atom(unsigned char** ext, char* dst)
+static int read_atom(unsigned char** ext, Erl_Atom_data* a)
{
+ char buf[MAXATOMLEN_UTF8];
int offs = 0;
- int ret = ei_decode_atom((char*)*ext, &offs, dst);
+ enum erlang_char_encoding enc;
+ int ret = ei_decode_atom_as((char*)*ext, &offs, buf, MAXATOMLEN_UTF8,
+ ERLANG_WHATEVER, NULL, &enc);
*ext += offs;
+
+ if (ret == 0) {
+ int i = strlen(buf);
+ char* clone = erl_malloc(i+1);
+ memcpy(clone, buf, i+1);
+
+ a->latin1 = NULL;
+ a->lenL = 0;
+ a->utf8 = NULL;
+ a->lenU = 0;
+ if (enc == ERLANG_LATIN1 || enc == ERLANG_ASCII) {
+ a->latin1 = clone;
+ a->lenL = i;
+ }
+ if (enc == ERLANG_UTF8 || enc == ERLANG_ASCII) {
+ a->utf8 = clone;
+ a->lenU = i;
+ }
+ }
return ret;
}
@@ -665,7 +692,6 @@ static int read_atom(unsigned char** ext, char* dst)
*/
static ETERM *erl_decode_it(unsigned char **ext)
{
- char atom_buf[MAXATOMLEN+1];
char *cp;
ETERM *ep,*tp,*np;
unsigned int u,sign;
@@ -765,127 +791,89 @@ static ETERM *erl_decode_it(unsigned char **ext)
case ERL_ATOM_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+
ERL_TYPE(ep) = ERL_ATOM;
--(*ext);
- if (read_atom(ext, atom_buf) < 0) return NULL;
-
- i = strlen(atom_buf);
- ep->uval.aval.len = i;
- ep->uval.aval.a = (char *) erl_malloc(i+1);
- memcpy(ep->uval.aval.a, atom_buf, i+1);
+ if (read_atom(ext, &ep->uval.aval.d) < 0) return NULL;
return ep;
case ERL_PID_EXT:
- erl_free_term(ep);
- { /* Why not use the constructors? */
- char* node = atom_buf;
+ {
unsigned int number, serial;
unsigned char creation;
- ETERM *eterm_p;
- if (read_atom(ext, node) < 0) return NULL;
+ ERL_TYPE(ep) = ERL_PID;
+ if (read_atom(ext, &ep->uval.pidval.node) < 0) return NULL;
/* get the integers */
-#if 0
- /* FIXME: Remove code or whatever....
- Ints on the wire are big-endian (== network byte order)
- so use ntoh[sl]. (But some are little-endian! Arrrgh!)
- Also, the libc authors can be expected to optimize them
- heavily. However, the marshalling makes no guarantees
- about alignments -- so it won't work at all. */
- number = ntohl(*((unsigned int *)*ext)++);
- serial = ntohl(*((unsigned int *)*ext)++);
-#else
number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
serial = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_pid(node, number, serial, creation);
- return eterm_p;
+ erl_mk_pid_helper(ep, number, serial, creation);
+ return ep;
}
case ERL_REFERENCE_EXT:
- erl_free_term(ep);
{
- char* node = atom_buf;
- unsigned int number;
+ unsigned int n[3] = {0, 0, 0};
unsigned char creation;
- ETERM *eterm_p;
- if (read_atom(ext, node) < 0) return NULL;
+ ERL_TYPE(ep) = ERL_REF;
+ if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL;
/* get the integers */
-#if 0
- number = ntohl(*((unsigned int *)*ext)++);
-#else
- number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
+ n[0] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_ref(node, number, creation);
- return eterm_p;
+ __erl_mk_reference(ep, NULL, 1, n, creation);
+ return ep;
}
case ERL_NEW_REFERENCE_EXT:
- erl_free_term(ep);
{
- char* node = atom_buf;
size_t cnt, i;
unsigned int n[3];
unsigned char creation;
- ETERM *eterm_p;
-#if 0
- cnt = ntohs(*((unsigned short *)*ext)++);
-#else
+ ERL_TYPE(ep) = ERL_REF;
cnt = ((*ext)[0] << 8) | (*ext)[1];
*ext += 2;
-#endif
- if (read_atom(ext, node) < 0) return NULL;
+ if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL;
/* get the integers */
creation = *(*ext)++;
for(i = 0; i < cnt; i++)
{
-#if 0
- n[i] = ntohl(*((unsigned int *)*ext)++);
-#else
n[i] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
}
- eterm_p = __erl_mk_reference(node, cnt, n, creation);
- return eterm_p;
+ __erl_mk_reference(ep, NULL, cnt, n, creation);
+ return ep;
}
case ERL_PORT_EXT:
- erl_free_term(ep);
{
- char* node = atom_buf;
unsigned int number;
unsigned char creation;
- ETERM *eterm_p;
- if (read_atom(ext, node) < 0) return NULL;
+ ERL_TYPE(ep) = ERL_PORT;
+ if (read_atom(ext, &ep->uval.portval.node) < 0) return NULL;
/* get the integers */
-#if 0
- number = ntohl(*((unsigned int *)*ext)++);
-#else
number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_port(node, number, creation);
- return eterm_p;
+ erl_mk_port_helper(ep, number, creation);
+ return ep;
}
case ERL_NIL_EXT:
@@ -1120,8 +1108,9 @@ unsigned char erl_ext_type(unsigned char *ext)
case ERL_INTEGER_EXT:
return ERL_INTEGER;
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
return ERL_ATOM;
case ERL_PID_EXT:
return ERL_PID;
@@ -1173,8 +1162,9 @@ int erl_ext_size(unsigned char *t)
case ERL_SMALL_INTEGER_EXT:
case ERL_INTEGER_EXT:
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
case ERL_PID_EXT:
case ERL_PORT_EXT:
case ERL_REFERENCE_EXT:
@@ -1221,12 +1211,13 @@ static int jump_atom(unsigned char** ext)
switch (*e++) {
case ERL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
len = (e[0] << 8) | e[1];
e += (len + 2);
break;
case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
len = e[0];
e += (len + 1);
break;
@@ -1259,8 +1250,9 @@ static int jump(unsigned char **ext)
*ext += 1;
break;
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
jump_atom(ext);
break;
case ERL_PID_EXT:
@@ -1426,6 +1418,58 @@ static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2)
} /* cmpbytes */
+#define tag2enc(T) ((T)==ERL_ATOM_EXT || (T)==ERL_SMALL_ATOM_EXT ? ERLANG_LATIN1 : ERLANG_UTF8)
+
+static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1,
+ unsigned char* s2, int l2, unsigned char tag2)
+{
+ enum erlang_char_encoding enc1 = tag2enc(tag1);
+ enum erlang_char_encoding enc2 = tag2enc(tag2);
+
+ if (enc1 == enc2) {
+ return cmpbytes(s1, l1,s2,l2);
+ }
+
+ if (enc1 == ERLANG_LATIN1) {
+ return cmp_latin1_vs_utf8((char*)s1, l1, (char*)s2, l2);
+ }
+ else {
+ return -cmp_latin1_vs_utf8((char*)s2, l2, (char*)s1, l1);
+ }
+}
+
+int cmp_latin1_vs_utf8(const char* strL, int lenL, const char* strU, int lenU)
+{
+ unsigned char* sL = (unsigned char*)strL;
+ unsigned char* sU = (unsigned char*)strU;
+ unsigned char* sL_end = sL + lenL;
+ unsigned char* sU_end = sU + lenU;
+
+ while(sL < sL_end && sU < sU_end) {
+ unsigned char UasL;
+ if (*sL >= 0x80) {
+ if (*sU < 0xC4 && (sU+1) < sU_end) {
+ UasL = ((sU[0] & 0x3) << 6) | (sU[1] & 0x3F);
+ }
+ else return -1;
+ }
+ else {
+ UasL = *sU;
+ }
+ if (*sL < UasL) return -1;
+ if (*sL > UasL) return 1;
+
+ sL++;
+ if (*sU < 0x80) sU++;
+ else if (*sU < 0xE0) sU += 2;
+ else if (*sU < 0xF0) sU += 3;
+ else /*if (*sU < 0xF8)*/ sU += 4;
+ }
+
+ return (sU >= sU_end) - (sL >= sL_end); /* -1, 0 or 1 */
+}
+
+
#define CMP_EXT_ERROR_CODE 4711
#define CMP_EXT_INT32_BE(AP, BP) \
@@ -1561,6 +1605,7 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
int min, ret,i,j,k;
double ff1, ff2;
unsigned char *tmp1, *tmp2;
+ unsigned char tag1, tag2;
if ( ((*e1)[0] == ERL_STRING_EXT) && ((*e2)[0] == ERL_LIST_EXT) ) {
return cmp_string_list(e1, e2);
@@ -1568,9 +1613,10 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
return -cmp_string_list(e2, e1);
}
- *e2 += 1;
+ tag1 = *(*e1)++;
+ tag2 = *(*e2)++;
i = j = 0;
- switch (*(*e1)++)
+ switch (tag1)
{
case ERL_SMALL_INTEGER_EXT:
if (**e1 < **e2) ret = -1;
@@ -1590,14 +1636,15 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
*e1 += 4; *e2 += 4;
return ret;
case ERL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
i = (**e1) << 8; (*e1)++;
j = (**e2) << 8; (*e2)++;
/*fall through*/
case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
i |= (**e1); (*e1)++;
j |= (**e2); (*e2)++;
- ret = cmpbytes(*e1, i, *e2, j);
+ ret = cmpatoms(*e1, i, tag1, *e2, j, tag2);
*e1 += i;
*e2 += j;
return ret;
diff --git a/lib/erl_interface/src/legacy/global_whereis.c b/lib/erl_interface/src/legacy/global_whereis.c
index 2afb193504..e6c556d907 100644
--- a/lib/erl_interface/src/legacy/global_whereis.c
+++ b/lib/erl_interface/src/legacy/global_whereis.c
@@ -85,7 +85,16 @@ ETERM *erl_global_whereis(int fd, const char *name, char *node)
opid = erl_decode((unsigned char*)buf);
/* extract the nodename for the caller */
- if (node) strcpy(node,epid.node);
+ if (node) {
+ char* node_str = ERL_PID_NODE(opid);
+ if (node_str) {
+ strcpy(node, node_str);
+ }
+ else {
+ erl_free_term(opid);
+ return NULL;
+ }
+ }
return opid;
}
diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c
index 6773f90bfc..65afee89cc 100644
--- a/lib/erl_interface/src/misc/ei_decode_term.c
+++ b/lib/erl_interface/src/misc/ei_decode_term.c
@@ -32,7 +32,7 @@
int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
{
const char* s = buf + *index, * s0 = s;
- int len, i, n, sign;
+ int i, n, sign;
char c;
if (term == NULL) return -1;
@@ -48,12 +48,13 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
case NEW_FLOAT_EXT:
return ei_decode_double(buf, index, &term->value.d_val);
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
return ei_decode_atom(buf, index, term->value.atom_name);
case ERL_REFERENCE_EXT:
/* first the nodename */
- if (get_atom(&s, term->value.ref.node) < 0) return -1;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
term->value.ref.n[0] = get32be(s);
term->value.ref.len = 1;
@@ -63,7 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
/* first the integer count */
term->value.ref.len = get16be(s);
/* then the nodename */
- if (get_atom(&s, term->value.ref.node) < 0) return -1;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* creation */
term->value.ref.creation = get8(s) & 0x03;
/* finally the id integers */
@@ -75,12 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
}
break;
case ERL_PORT_EXT:
- if (get_atom(&s, term->value.port.node) < 0) return -1;
+ if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1;
term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */;
term->value.port.creation = get8(s) & 0x03;
break;
case ERL_PID_EXT:
- if (get_atom(&s, term->value.pid.node) < 0) return -1;
+ if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */
term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */
diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c
index 620c6e72e2..91fe73e68c 100644
--- a/lib/erl_interface/src/misc/ei_printterm.c
+++ b/lib/erl_interface/src/misc/ei_printterm.c
@@ -132,9 +132,10 @@ static int print_term(FILE* fp, ei_x_buff* x,
doquote = 0;
ei_get_type_internal(buf, index, &ty, &n);
switch (ty) {
- case ERL_ATOM_EXT:
+ case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
case ERL_SMALL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
if (ei_decode_atom(buf, index, a) < 0)
goto err;
doquote = !islower((int)a[0]);
diff --git a/lib/erl_interface/src/misc/ei_x_encode.c b/lib/erl_interface/src/misc/ei_x_encode.c
index fa1e26ccbb..44dcff7664 100644
--- a/lib/erl_interface/src/misc/ei_x_encode.c
+++ b/lib/erl_interface/src/misc/ei_x_encode.c
@@ -197,18 +197,33 @@ int ei_x_encode_tuple_header(ei_x_buff* x, long n)
int ei_x_encode_atom(ei_x_buff* x, const char* s)
{
- return ei_x_encode_atom_len(x, s, strlen(s));
+ return ei_x_encode_atom_len_as(x, s, strlen(s), ERLANG_LATIN1, ERLANG_LATIN1);
}
int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len)
+{
+ return ei_x_encode_atom_len_as(x, s, len, ERLANG_LATIN1, ERLANG_LATIN1);
+}
+
+int ei_x_encode_atom_as(ei_x_buff* x, const char* s,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
+ return ei_x_encode_atom_len_as(x, s, strlen(s), from_enc, to_enc);
+}
+
+int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
{
int i = x->index;
- ei_encode_atom_len(NULL, &i, s, len);
+ ei_encode_atom_len_as(NULL, &i, s, len, from_enc, to_enc);
if (!x_fix_buff(x, i))
return -1;
- return ei_encode_atom_len(x->buff, &x->index, s, len);
+ return ei_encode_atom_len_as(x->buff, &x->index, s, len, from_enc, to_enc);
}
+
int ei_x_encode_pid(ei_x_buff* x, const erlang_pid* pid)
{
int i = x->index;
diff --git a/lib/erl_interface/src/misc/get_type.c b/lib/erl_interface/src/misc/get_type.c
index c9a040fbbd..54465196b0 100644
--- a/lib/erl_interface/src/misc/get_type.c
+++ b/lib/erl_interface/src/misc/get_type.c
@@ -33,80 +33,6 @@ int ei_get_type(const char *buf, const int *index, int *type, int *len)
return ei_get_type_internal(buf, index, type, len);
}
-#if 0
-int ei_get_type(const char *buf, const int *index, int *type, int *len)
-{
- const char *s = buf + *index;
- int itype = get8(s); /* Internal type */
-
- *len = 0;
-
- switch (*type) {
-
- case ERL_SMALL_INTEGER_EXT:
- case ERL_INTEGER_EXT:
- case ERL_SMALL_BIG_EXT:
- case ERL_LARGE_BIG_EXT:
- *type = EI_TYPE_INTEGER;
- break;
-
- case ERL_FLOAT_EXT:
- *type = EI_TYPE_FLOAT;
- break;
-
- case ERL_SMALL_TUPLE_EXT:
- case ERL_SMALL_ATOM_EXT:
- *len = get8(s);
- break;
-
- case ERL_ATOM_EXT:
- case ERL_UNICODE_ATOM_EXT:
- case ERL_STRING_EXT:
- *len = get16be(s);
- break;
-
- case ERL_LARGE_TUPLE_EXT:
- case ERL_LIST_EXT:
- case ERL_BINARY_EXT:
- *len = get32be(s);
- break;
-
- case ERL_SMALL_BIG_EXT:
- *len = (get8(s)+1)/2; /* big arity */
- break;
-
- case ERL_LARGE_BIG_EXT:
- *len = (get32be(s)+1)/2; /* big arity */
- break;
-
- case ERL_BINARY_EXT:
- *type = EI_TYPE_BINARY;
- break;
-
- case ERL_PID_EXT:
- *type = EI_TYPE_PID;
- break;
-
- case ERL_PORT_EXT:
- *type = EI_TYPE_PORT;
- break;
-
- case ERL_REFERENCE_EXT:
- case ERL_NEW_REFERENCE_EXT:
- *type = EI_TYPE_REF;
- break;
-
- default:
- break;
- }
-
- /* leave index unchanged */
- return 0;
-}
-#endif
-
-
-/* Old definition of function above */
int ei_get_type_internal(const char *buf, const int *index,
int *type, int *len)
@@ -117,12 +43,13 @@ int ei_get_type_internal(const char *buf, const int *index,
switch (*type) {
case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
*type = ERL_ATOM_EXT;
case ERL_SMALL_TUPLE_EXT:
*len = get8(s);
break;
- case ERL_UNICODE_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
*type = ERL_ATOM_EXT;
case ERL_ATOM_EXT:
case ERL_STRING_EXT:
diff --git a/lib/erl_interface/src/misc/putget.h b/lib/erl_interface/src/misc/putget.h
index 8b0d4d3404..77ae168f8c 100644
--- a/lib/erl_interface/src/misc/putget.h
+++ b/lib/erl_interface/src/misc/putget.h
@@ -105,8 +105,12 @@
((EI_ULONGLONG)((unsigned char *)(s))[-2] << 8) | \
(EI_ULONGLONG)((unsigned char *)(s))[-1]))
-int ei_internal_get_atom(const char** bufp, char* p);
+int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int ei_internal_get_atom(const char** bufp, char* p, enum erlang_char_encoding*);
+int ei_internal_put_atom(char** bufp, const char* p, int slen, enum erlang_char_encoding);
#define get_atom ei_internal_get_atom
+#define put_atom ei_internal_put_atom
typedef union float_ext {
double d;
diff --git a/lib/erl_interface/src/misc/show_msg.c b/lib/erl_interface/src/misc/show_msg.c
index 194296798b..ca46b15aff 100644
--- a/lib/erl_interface/src/misc/show_msg.c
+++ b/lib/erl_interface/src/misc/show_msg.c
@@ -132,13 +132,13 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
switch (msg.msgtype) {
case ERL_SEND:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)) return -1;
mbuf = msgbuf;
break;
case ERL_SEND_TT:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
@@ -146,15 +146,15 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
case ERL_REG_SEND:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)) return -1;
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)) return -1;
mbuf = msgbuf;
break;
case ERL_REG_SEND_TT:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
break;
diff --git a/lib/erl_interface/test/ei_decode_encode_SUITE.erl b/lib/erl_interface/test/ei_decode_encode_SUITE.erl
index 85cb62239b..e8ae7a6f81 100644
--- a/lib/erl_interface/test/ei_decode_encode_SUITE.erl
+++ b/lib/erl_interface/test/ei_decode_encode_SUITE.erl
@@ -118,6 +118,9 @@ test_ei_decode_encode(Config) when is_list(Config) ->
?line send_rec(P, OXPort),
?line send_rec(P, OXRef),
+ %% Unicode atoms
+ [send_rec(P, Atom) || Atom <- unicode_atom_data()],
+
?line runner:recv_eot(P),
ok.
@@ -127,7 +130,7 @@ test_ei_decode_encode(Config) when is_list(Config) ->
% We read two packets for each test, the ei_decode_encode and ei_x_decode_encode version....
send_rec(P, Term) when is_port(P) ->
- ?t:format("Testing: ~p~n", [Term]),
+ %%?t:format("Testing: ~p~n", [Term]),
P ! {self(), {command, term_to_binary(Term)}},
{_B,Term} = get_buf_and_term(P).
@@ -146,7 +149,7 @@ get_buf_and_term(P) ->
_ ->
B1 = list_to_binary([131,B]), % No magic, add
T = binary_to_term(B1),
- io:format("~w\n~w\n(got no magic)\n",[B,T]),
+ %io:format("~w\n~w\n(got no magic)\n",[B,T]),
{B,T}
end.
@@ -160,7 +163,7 @@ get_binary(P) ->
case runner:get_term(P) of
{bytes,L} ->
B = list_to_binary(L),
- io:format("~w\n",[L]),
+ %%io:format("~w\n",[L]),
% For strange reasons <<131>> show up as <>....
% io:format("~w\n",[B]),
B;
@@ -305,3 +308,60 @@ mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName),
exit({unexpected_binary_to_term_result, Other})
end.
+
+
+unicode_atom_data() ->
+ [uc_atup(lists:seq(16#1f600, 16#1f600+254)),
+ uc_atup(lists:seq(16#1f600, 16#1f600+63)),
+ uc_atup(lists:seq(1, 255)),
+ uc_atup(lists:seq(100, 163)),
+ uc_atup(lists:seq(200, 354)),
+ uc_atup(lists:seq(200, 263)),
+ uc_atup(lists:seq(2000, 2254)),
+ uc_atup(lists:seq(2000, 2063)),
+ uc_atup(lists:seq(65500, 65754)),
+ uc_atup(lists:seq(65500, 65563))
+ | lists:map(fun (N) ->
+ uc_atup(lists:seq(64000+N, 64254+N))
+ end,
+ lists:seq(1, 2000))].
+
+uc_atup(ATxt) ->
+ string_to_atom(ATxt).
+
+string_to_atom(String) ->
+ Utf8List = string_to_utf8_list(String),
+ Len = length(Utf8List),
+ TagLen = case Len < 256 of
+ true -> [119, Len];
+ false -> [118, Len bsr 8, Len band 16#ff]
+ end,
+ binary_to_term(list_to_binary([131, TagLen, Utf8List])).
+
+string_to_utf8_list([]) ->
+ [];
+string_to_utf8_list([CP|CPs]) when is_integer(CP),
+ 0 =< CP,
+ CP =< 16#7F ->
+ [CP | string_to_utf8_list(CPs)];
+string_to_utf8_list([CP|CPs]) when is_integer(CP),
+ 16#80 =< CP,
+ CP =< 16#7FF ->
+ [16#C0 bor (CP bsr 6),
+ 16#80 bor (16#3F band CP)
+ | string_to_utf8_list(CPs)];
+string_to_utf8_list([CP|CPs]) when is_integer(CP),
+ 16#800 =< CP,
+ CP =< 16#FFFF ->
+ [16#E0 bor (CP bsr 12),
+ 16#80 bor (16#3F band (CP bsr 6)),
+ 16#80 bor (16#3F band CP)
+ | string_to_utf8_list(CPs)];
+string_to_utf8_list([CP|CPs]) when is_integer(CP),
+ 16#10000 =< CP,
+ CP =< 16#10FFFF ->
+ [16#F0 bor (CP bsr 18),
+ 16#80 bor (16#3F band (CP bsr 12)),
+ 16#80 bor (16#3F band (CP bsr 6)),
+ 16#80 bor (16#3F band CP)
+ | string_to_utf8_list(CPs)].
diff --git a/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c b/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c
index 406f02ecfb..194ce9057b 100644
--- a/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c
+++ b/lib/erl_interface/test/ei_decode_encode_SUITE_data/ei_decode_encode_test.c
@@ -29,171 +29,222 @@
* Author: kent@erix.ericsson.se
*/
-#define EI_DECODE_ENCODE(FUNC,TYPE) \
- { \
- char *buf; \
- char buf2[1024]; \
- TYPE p; \
- int size1 = 0; \
- int size2 = 0; \
- int size3 = 0; \
- int err; \
- ei_x_buff arg; \
-\
- message("ei_decode_" #FUNC ", arg is type " #TYPE); \
- buf = read_packet(NULL); \
- err = ei_decode_ ## FUNC(buf+1, &size1, &p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("decode returned non zero but not -1"); \
- } else { \
- fail("decode returned non zero"); \
- } \
- return; \
- } \
- if (size1 < 1) { \
- fail("size is < 1"); \
- return; \
- } \
-\
- message("ei_encode_" #FUNC " buf is NULL, arg is type " #TYPE); \
- err = ei_encode_ ## FUNC(NULL, &size2, &p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("size calculation returned non zero but not -1"); \
- return; \
- } else { \
- fail("size calculation returned non zero"); \
- return; \
- } \
- } \
- if (size1 != size2) { \
- message("size1 = %d, size2 = %d\n",size1,size2); \
- fail("decode and encode size differs when buf is NULL"); \
- return; \
- } \
- message("ei_encode_" #FUNC ", arg is type " #TYPE); \
- err = ei_encode_ ## FUNC(buf2, &size3, &p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("returned non zero but not -1"); \
- } else { \
- fail("returned non zero"); \
- } \
- return; \
- } \
- if (size1 != size3) { \
- message("size1 = %d, size2 = %d\n",size1,size3); \
- fail("decode and encode size differs"); \
- return; \
- } \
- send_buffer(buf2, size1); \
-\
- message("ei_x_encode_" #FUNC ", arg is type " #TYPE); \
- ei_x_new(&arg); \
- err = ei_x_encode_ ## FUNC(&arg, &p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("returned non zero but not -1"); \
- } else { \
- fail("returned non zero"); \
- } \
- ei_x_free(&arg); \
- return; \
- } \
- if (arg.index < 1) { \
- fail("size is < 1"); \
- ei_x_free(&arg); \
- return; \
- } \
- send_buffer(arg.buff, arg.index); \
- ei_x_free(&arg); \
- }
-
-#define EI_DECODE_ENCODE_BIG(FUNC,TYPE) \
- { \
- char *buf; \
- char buf2[2048]; \
- TYPE *p; \
- int size1 = 0; \
- int size2 = 0; \
- int size3 = 0; \
- int err, index = 0, len, type; \
- ei_x_buff arg; \
-\
- message("ei_decode_" #FUNC ", arg is type " #TYPE); \
- buf = read_packet(NULL); \
- ei_get_type(buf+1, &index, &type, &len); \
- p = ei_alloc_big(len); \
- err = ei_decode_ ## FUNC(buf+1, &size1, p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("decode returned non zero but not -1"); \
- } else { \
- fail("decode returned non zero"); \
- } \
- return; \
- } \
- if (size1 < 1) { \
- fail("size is < 1"); \
- return; \
- } \
-\
- message("ei_encode_" #FUNC " buf is NULL, arg is type " #TYPE); \
- err = ei_encode_ ## FUNC(NULL, &size2, p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("size calculation returned non zero but not -1"); \
- return; \
- } else { \
- fail("size calculation returned non zero"); \
- return; \
- } \
- } \
- if (size1 != size2) { \
- message("size1 = %d, size2 = %d\n",size1,size2); \
- fail("decode and encode size differs when buf is NULL"); \
- return; \
- } \
- message("ei_encode_" #FUNC ", arg is type " #TYPE); \
- err = ei_encode_ ## FUNC(buf2, &size3, p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("returned non zero but not -1"); \
- } else { \
- fail("returned non zero"); \
- } \
- return; \
- } \
- if (size1 != size3) { \
- message("size1 = %d, size2 = %d\n",size1,size3); \
- fail("decode and encode size differs"); \
- return; \
- } \
- send_buffer(buf2, size1); \
-\
- message("ei_x_encode_" #FUNC ", arg is type " #TYPE); \
- ei_x_new(&arg); \
- err = ei_x_encode_ ## FUNC(&arg, p); \
- if (err != 0) { \
- if (err != -1) { \
- fail("returned non zero but not -1"); \
- } else { \
- fail("returned non zero"); \
- } \
- ei_x_free(&arg); \
- return; \
- } \
- if (arg.index < 1) { \
- fail("size is < 1"); \
- ei_x_free(&arg); \
- return; \
- } \
- send_buffer(arg.buff, arg.index); \
- ei_x_free(&arg); \
- ei_free_big(p); \
- }
+/*#define MESSAGE(FMT,A1,A2) message(FMT,A1,A2)*/
+#define MESSAGE(FMT,A1,A2)
+typedef int decodeFT(const char *buf, int *index, void*);
+typedef int encodeFT(char *buf, int *index, void*);
+typedef int x_encodeFT(ei_x_buff*, void*);
+
+struct Type {
+ char* name;
+ char* type;
+ decodeFT* ei_decode_fp;
+ encodeFT* ei_encode_fp;
+ x_encodeFT* ei_x_encode_fp;
+};
+
+typedef struct
+{
+ char name[MAXATOMLEN_UTF8];
+ enum erlang_char_encoding enc;
+}my_atom;
+
+int ei_decode_my_atom(const char *buf, int *index, my_atom* a)
+{
+ return ei_decode_atom_as(buf, index, a->name, sizeof(a->name), ERLANG_UTF8, &a->enc, NULL);
+}
+int ei_encode_my_atom(char *buf, int *index, my_atom* a)
+{
+ return ei_encode_atom_as(buf, index, a->name, ERLANG_UTF8, a->enc);
+}
+int ei_x_encode_my_atom(ei_x_buff* x, my_atom* a)
+{
+ return ei_x_encode_atom_as(x, a->name, ERLANG_UTF8, a->enc);
+}
+
+void decode_encode(struct Type* t, void* obj)
+{
+ char *buf;
+ char buf2[1024];
+ int size1 = 0;
+ int size2 = 0;
+ int size3 = 0;
+ int err;
+ ei_x_buff arg;
+
+ MESSAGE("ei_decode_%s, arg is type %s", t->name, t->type);
+ buf = read_packet(NULL);
+ err = t->ei_decode_fp(buf+1, &size1, obj);
+ if (err != 0) {
+ if (err != -1) {
+ fail("decode returned non zero but not -1");
+ } else {
+ fail("decode returned non zero");
+ }
+ return;
+ }
+ if (size1 < 1) {
+ fail("size is < 1");
+ return;
+ }
+
+ MESSAGE("ei_encode_%s buf is NULL, arg is type %s", t->name, t->type);
+ err = t->ei_encode_fp(NULL, &size2, obj);
+ if (err != 0) {
+ if (err != -1) {
+ fail("size calculation returned non zero but not -1");
+ return;
+ } else {
+ fail("size calculation returned non zero");
+ return;
+ }
+ }
+ if (size1 != size2) {
+ MESSAGE("size1 = %d, size2 = %d\n",size1,size2);
+ fail("decode and encode size differs when buf is NULL");
+ return;
+ }
+ MESSAGE("ei_encode_%s, arg is type %s", t->name, t->type);
+ err = t->ei_encode_fp(buf2, &size3, obj);
+ if (err != 0) {
+ if (err != -1) {
+ fail("returned non zero but not -1");
+ } else {
+ fail("returned non zero");
+ }
+ return;
+ }
+ if (size1 != size3) {
+ MESSAGE("size1 = %d, size2 = %d\n",size1,size3);
+ fail("decode and encode size differs");
+ return;
+ }
+ send_buffer(buf2, size1);
+
+ MESSAGE("ei_x_encode_%s, arg is type %s", t->name, t->type);
+ ei_x_new(&arg);
+ err = t->ei_x_encode_fp(&arg, obj);
+ if (err != 0) {
+ if (err != -1) {
+ fail("returned non zero but not -1");
+ } else {
+ fail("returned non zero");
+ }
+ ei_x_free(&arg);
+ return;
+ }
+ if (arg.index < 1) {
+ fail("size is < 1");
+ ei_x_free(&arg);
+ return;
+ }
+ send_buffer(arg.buff, arg.index);
+ ei_x_free(&arg);
+}
+
+
+#define EI_DECODE_ENCODE(TYPE, ERLANG_TYPE) { \
+ struct Type type_struct = {#TYPE, #ERLANG_TYPE, \
+ (decodeFT*)ei_decode_##TYPE, \
+ (encodeFT*)ei_encode_##TYPE, \
+ (x_encodeFT*)ei_x_encode_##TYPE }; \
+ ERLANG_TYPE type_obj; \
+ decode_encode(&type_struct, &type_obj); \
+ }
+
+
+void decode_encode_big(struct Type* t)
+{
+ char *buf;
+ char buf2[2048];
+ void *p; /* (TYPE*) */
+ int size1 = 0;
+ int size2 = 0;
+ int size3 = 0;
+ int err, index = 0, len, type;
+ ei_x_buff arg;
+
+ MESSAGE("ei_decode_%s, arg is type %s", t->name, t->type);
+ buf = read_packet(NULL);
+ ei_get_type(buf+1, &index, &type, &len);
+ p = ei_alloc_big(len);
+ err = t->ei_decode_fp(buf+1, &size1, p);
+ if (err != 0) {
+ if (err != -1) {
+ fail("decode returned non zero but not -1");
+ } else {
+ fail("decode returned non zero");
+ }
+ return;
+ }
+ if (size1 < 1) {
+ fail("size is < 1");
+ return;
+ }
+
+ MESSAGE("ei_encode_%s buf is NULL, arg is type %s", t->name, t->type);
+ err = t->ei_encode_fp(NULL, &size2, p);
+ if (err != 0) {
+ if (err != -1) {
+ fail("size calculation returned non zero but not -1");
+ return;
+ } else {
+ fail("size calculation returned non zero");
+ return;
+ }
+ }
+ if (size1 != size2) {
+ MESSAGE("size1 = %d, size2 = %d\n",size1,size2);
+ fail("decode and encode size differs when buf is NULL");
+ return;
+ }
+ MESSAGE("ei_encode_%s, arg is type %s", t->name, t->type);
+ err = t->ei_encode_fp(buf2, &size3, p);
+ if (err != 0) {
+ if (err != -1) {
+ fail("returned non zero but not -1");
+ } else {
+ fail("returned non zero");
+ }
+ return;
+ }
+ if (size1 != size3) {
+ MESSAGE("size1 = %d, size2 = %d\n",size1,size3);
+ fail("decode and encode size differs");
+ return;
+ }
+ send_buffer(buf2, size1);
+
+ MESSAGE("ei_x_encode_%s, arg is type %s", t->name, t->type);
+ ei_x_new(&arg);
+ err = t->ei_x_encode_fp(&arg, p);
+ if (err != 0) {
+ if (err != -1) {
+ fail("returned non zero but not -1");
+ } else {
+ fail("returned non zero");
+ }
+ ei_x_free(&arg);
+ return;
+ }
+ if (arg.index < 1) {
+ fail("size is < 1");
+ ei_x_free(&arg);
+ return;
+ }
+ send_buffer(arg.buff, arg.index);
+ ei_x_free(&arg);
+ ei_free_big(p);
+}
+
+#define EI_DECODE_ENCODE_BIG(TYPE, ERLANG_TYPE) { \
+ struct Type type_struct = {#TYPE, #ERLANG_TYPE, \
+ (decodeFT*)ei_decode_##TYPE, \
+ (encodeFT*)ei_encode_##TYPE, \
+ (x_encodeFT*)ei_x_encode_##TYPE }; \
+ decode_encode_big(&type_struct); \
+ }
@@ -201,6 +252,8 @@
TESTCASE(test_ei_decode_encode)
{
+ int i;
+
EI_DECODE_ENCODE(fun , erlang_fun);
EI_DECODE_ENCODE(pid , erlang_pid);
EI_DECODE_ENCODE(port , erlang_port);
@@ -223,6 +276,11 @@ TESTCASE(test_ei_decode_encode)
EI_DECODE_ENCODE(port , erlang_port);
EI_DECODE_ENCODE(ref , erlang_ref);
+ /* Unicode atoms */
+ for (i=0; i<2010; i++) {
+ EI_DECODE_ENCODE(my_atom , my_atom);
+ }
+
report(1);
}
diff --git a/lib/ic/examples/all-against-all/client.c b/lib/ic/examples/all-against-all/client.c
index e0a52b142d..5dece9cfa6 100644
--- a/lib/ic/examples/all-against-all/client.c
+++ b/lib/ic/examples/all-against-all/client.c
@@ -88,6 +88,7 @@ int main(){
/* Initiating pid*/
strcpy(pid.node,client_node);
+ pid.node_org_enc = ERLANG_LATIN1;
pid.num = 99;
pid.serial = 0;
pid.creation = 0;
diff --git a/lib/ic/examples/c-client/client.c b/lib/ic/examples/c-client/client.c
index 816477cf15..5b11510ce3 100644
--- a/lib/ic/examples/c-client/client.c
+++ b/lib/ic/examples/c-client/client.c
@@ -64,6 +64,7 @@ int main()
/* Initiating pid*/
strcpy(pid.node,CLNODE);
+ pid.node_org_enc = ERLANG_LATIN1;
pid.num = 99;
pid.serial = 0;
pid.creation = 0;
diff --git a/lib/ic/examples/c-server/client.c b/lib/ic/examples/c-server/client.c
index fa570089b5..605e41ddb1 100644
--- a/lib/ic/examples/c-server/client.c
+++ b/lib/ic/examples/c-server/client.c
@@ -58,6 +58,7 @@ int main()
/* Initiating pid*/
strcpy(pid.node, CLNODE);
+ pid.node_org_enc = ERLANG_LATIN1;
pid.num = 99;
pid.serial = 0;
pid.creation = 0;
--
cgit v1.2.3
From a912b3c6f4759a6a8e60fc4ea559c19edb02448c Mon Sep 17 00:00:00 2001
From: Rickard Green
Date: Thu, 17 Jan 2013 21:09:46 +0100
Subject: test case
---
erts/emulator/test/distribution_SUITE.erl | 197 ++++++++++++++++++++++++++----
1 file changed, 175 insertions(+), 22 deletions(-)
diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl
index aaf6420ccd..e7bd8d19c3 100644
--- a/erts/emulator/test/distribution_SUITE.erl
+++ b/erts/emulator/test/distribution_SUITE.erl
@@ -18,7 +18,17 @@
%%
-module(distribution_SUITE).
--compile(r13).
+-compile(r15).
+
+-define(VERSION_MAGIC, 131).
+
+-define(ATOM_EXT, 100).
+-define(REFERENCE_EXT, 101).
+-define(PORT_EXT, 102).
+-define(PID_EXT, 103).
+-define(NEW_REFERENCE_EXT, 114).
+-define(ATOM_UTF8_EXT, 118).
+-define(SMALL_ATOM_UTF8_EXT, 119).
%% Tests distribution and the tcp driver.
@@ -1139,27 +1149,66 @@ atom_data() ->
lists:seq(1, 2000)).
verify_atom_data(AtomData) ->
- lists:foreach(fun ({Atom, AtomTxt}) ->
- AtomTxt = atom_to_list(Atom)
+ lists:foreach(fun ({Atom, AtomTxt}) when is_atom(Atom) ->
+ AtomTxt = atom_to_list(Atom);
+ ({PPR, AtomTxt}) ->
+ % Pid, Port, or Ref
+ AtomTxt = atom_to_list(node(PPR))
end,
AtomData).
-uc_atup(ATxt) ->
- {string_to_atom(ATxt), ATxt}.
+uc_atom_tup(ATxt) ->
+ Atom = string_to_atom(ATxt),
+ ATxt = atom_to_list(Atom),
+ {Atom, ATxt}.
+
+uc_pid_tup(ATxt) ->
+ ATxtExt = string_to_atom_ext(ATxt),
+ Pid = mk_pid({ATxtExt, 1}, 4711,17),
+ true = is_pid(Pid),
+ Atom = node(Pid),
+ true = is_atom(Atom),
+ ATxt = atom_to_list(Atom),
+ {Pid, ATxt}.
+
+uc_port_tup(ATxt) ->
+ ATxtExt = string_to_atom_ext(ATxt),
+ Port = mk_port({ATxtExt, 2}, 4711),
+ true = is_port(Port),
+ Atom = node(Port),
+ true = is_atom(Atom),
+ ATxt = atom_to_list(Atom),
+ {Port, ATxt}.
+
+uc_ref_tup(ATxt) ->
+ ATxtExt = string_to_atom_ext(ATxt),
+ Ref = mk_ref({ATxtExt, 3}, [4711,17, 4711]),
+ true = is_reference(Ref),
+ Atom = node(Ref),
+ true = is_atom(Atom),
+ ATxt = atom_to_list(Atom),
+ {Ref, ATxt}.
+
unicode_atom_data() ->
- [uc_atup(lists:seq(16#1f600, 16#1f600+254)),
- uc_atup(lists:seq(16#1f600, 16#1f600+63)),
- uc_atup(lists:seq(0, 254)),
- uc_atup(lists:seq(100, 163)),
- uc_atup(lists:seq(200, 354)),
- uc_atup(lists:seq(200, 263)),
- uc_atup(lists:seq(2000, 2254)),
- uc_atup(lists:seq(2000, 2063)),
- uc_atup(lists:seq(65500, 65754)),
- uc_atup(lists:seq(65500, 65563))
+ [uc_pid_tup(lists:seq(16#1f600, 16#1f600+249) ++ "@host"),
+ uc_pid_tup(lists:seq(16#1f600, 16#1f600+30) ++ "@host"),
+ uc_port_tup(lists:seq(16#1f600, 16#1f600+249) ++ "@host"),
+ uc_port_tup(lists:seq(16#1f600, 16#1f600+30) ++ "@host"),
+ uc_ref_tup(lists:seq(16#1f600, 16#1f600+249) ++ "@host"),
+ uc_ref_tup(lists:seq(16#1f600, 16#1f600+30) ++ "@host"),
+ uc_atom_tup(lists:seq(16#1f600, 16#1f600+254)),
+ uc_atom_tup(lists:seq(16#1f600, 16#1f600+63)),
+ uc_atom_tup(lists:seq(0, 254)),
+ uc_atom_tup(lists:seq(100, 163)),
+ uc_atom_tup(lists:seq(200, 354)),
+ uc_atom_tup(lists:seq(200, 263)),
+ uc_atom_tup(lists:seq(2000, 2254)),
+ uc_atom_tup(lists:seq(2000, 2063)),
+ uc_atom_tup(lists:seq(65500, 65754)),
+ uc_atom_tup(lists:seq(65500, 65563))
| lists:map(fun (N) ->
- uc_atup(lists:seq(64000+N, 64254+N))
+ uc_atom_tup(lists:seq(64000+N, 64254+N))
end,
lists:seq(1, 2000))].
@@ -2193,14 +2242,19 @@ repeat(Fun, N) ->
Fun(),
repeat(Fun, N-1).
-string_to_atom(String) ->
+string_to_atom_ext(String) ->
Utf8List = string_to_utf8_list(String),
Len = length(Utf8List),
- TagLen = case Len < 256 of
- true -> [119, Len];
- false -> [118, Len bsr 8, Len band 16#ff]
- end,
- binary_to_term(list_to_binary([131, TagLen, Utf8List])).
+ case Len < 256 of
+ true ->
+ [?SMALL_ATOM_UTF8_EXT, Len | Utf8List];
+ false ->
+ [?ATOM_UTF8_EXT, Len bsr 8, Len band 16#ff | Utf8List]
+ end.
+
+string_to_atom(String) ->
+ binary_to_term(list_to_binary([?VERSION_MAGIC
+ | string_to_atom_ext(String)])).
string_to_utf8_list([]) ->
[];
@@ -2275,3 +2329,102 @@ utf8_list_to_string([B0, B1, B2, B3 | Bs]) when is_integer(B0),
bor ((B2 band 16#3F) bsl 6)
bor (B3 band 16#3F))
| utf8_list_to_string(Bs)].
+
+mk_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) ->
+ <> = term_to_binary(NodeName),
+ mk_pid({NodeNameExt, Creation}, Number, Serial);
+mk_pid({NodeNameExt, Creation}, Number, Serial) ->
+ case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?PID_EXT,
+ NodeNameExt,
+ uint32_be(Number),
+ uint32_be(Serial),
+ uint8(Creation)])) of
+ Pid when is_pid(Pid) ->
+ Pid;
+ {'EXIT', {badarg, _}} ->
+ exit({badarg, mk_pid, [{NodeNameExt, Creation}, Number, Serial]});
+ Other ->
+ exit({unexpected_binary_to_term_result, Other})
+ end.
+
+mk_port({NodeName, Creation}, Number) when is_atom(NodeName) ->
+ <> = term_to_binary(NodeName),
+ mk_port({NodeNameExt, Creation}, Number);
+mk_port({NodeNameExt, Creation}, Number) ->
+ case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?PORT_EXT,
+ NodeNameExt,
+ uint32_be(Number),
+ uint8(Creation)])) of
+ Port when is_port(Port) ->
+ Port;
+ {'EXIT', {badarg, _}} ->
+ exit({badarg, mk_port, [{NodeNameExt, Creation}, Number]});
+ Other ->
+ exit({unexpected_binary_to_term_result, Other})
+ end.
+
+mk_ref({NodeName, Creation}, [Number] = NL) when is_atom(NodeName),
+ is_integer(Creation),
+ is_integer(Number) ->
+ <> = term_to_binary(NodeName),
+ mk_ref({NodeNameExt, Creation}, NL);
+mk_ref({NodeNameExt, Creation}, [Number]) when is_integer(Creation),
+ is_integer(Number) ->
+ case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?REFERENCE_EXT,
+ NodeNameExt,
+ uint32_be(Number),
+ uint8(Creation)])) of
+ Ref when is_reference(Ref) ->
+ Ref;
+ {'EXIT', {badarg, _}} ->
+ exit({badarg, mk_ref, [{NodeNameExt, Creation}, [Number]]});
+ Other ->
+ exit({unexpected_binary_to_term_result, Other})
+ end;
+mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName),
+ is_integer(Creation),
+ is_list(Numbers) ->
+ <> = term_to_binary(NodeName),
+ mk_ref({NodeNameExt, Creation}, Numbers);
+mk_ref({NodeNameExt, Creation}, Numbers) when is_integer(Creation),
+ is_list(Numbers) ->
+ case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+ ?NEW_REFERENCE_EXT,
+ uint16_be(length(Numbers)),
+ NodeNameExt,
+ uint8(Creation),
+ lists:map(fun (N) ->
+ uint32_be(N)
+ end,
+ Numbers)])) of
+ Ref when is_reference(Ref) ->
+ Ref;
+ {'EXIT', {badarg, _}} ->
+ exit({badarg, mk_ref, [{NodeNameExt, Creation}, Numbers]});
+ Other ->
+ exit({unexpected_binary_to_term_result, Other})
+ end.
+
+
+uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 ->
+ [(Uint bsr 24) band 16#ff,
+ (Uint bsr 16) band 16#ff,
+ (Uint bsr 8) band 16#ff,
+ Uint band 16#ff];
+uint32_be(Uint) ->
+ exit({badarg, uint32_be, [Uint]}).
+
+
+uint16_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 16 ->
+ [(Uint bsr 8) band 16#ff,
+ Uint band 16#ff];
+uint16_be(Uint) ->
+ exit({badarg, uint16_be, [Uint]}).
+
+uint8(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 8 ->
+ Uint band 16#ff;
+uint8(Uint) ->
+ exit({badarg, uint8, [Uint]}).
--
cgit v1.2.3
From b15688d40d5147c1122aaad3b82495fbbc4dede8 Mon Sep 17 00:00:00 2001
From: Rickard Green
Date: Sat, 19 Jan 2013 00:45:16 +0100
Subject: UTF-8 atom documentation
---
erts/doc/src/erl_dist_protocol.xml | 288 ++++++++++++++++++++-
erts/doc/src/erl_ext_dist.xml | 121 ++++++++-
erts/doc/src/erlang.xml | 10 +-
lib/kernel/internal_doc/distribution_handshake.txt | 216 +---------------
4 files changed, 398 insertions(+), 237 deletions(-)
diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml
index 6c725fc82d..0252187be5 100644
--- a/erts/doc/src/erl_dist_protocol.xml
+++ b/erts/doc/src/erl_dist_protocol.xml
@@ -547,13 +547,289 @@ If Result > 0, the packet only consists of [119, Result].
-->
-
+
- Handshake
-
- The handshake is discussed in detail in the internal documentation for
- the kernel (Erlang) application.
-
+ Distribution Handshake
+
+ This section describes the distribution handshake protocol
+ introduced in the OTP-R6 release of Erlang/OTP. This
+ description was previously located in
+ $ERL_TOP/lib/kernel/internal_doc/distribution_handshake.txt,
+ and has more or less been copied and "formatted" here. It has been
+ more or less unchanged since the year 1999, but the handshake
+ should not have changed much since then either.
+
+
+ General
+
+ The TCP/IP distribution uses a handshake which expects a
+ connection based protocol, i.e. the protocol does not include
+ any authentication after the handshake procedure.
+
+
+ This is not entirely safe, as it is vulnerable against takeover
+ attacks, but it is a tradeoff between fair safety and performance.
+
+
+ The cookies are never sent in cleartext and the handshake procedure
+ expects the client (called A) to be the first one to prove that it can
+ generate a sufficient digest. The digest is generated with the
+ MD5 message digest algorithm and the challenges are expected to be very
+ random numbers.
+
+
+
+ Definitions
+
+ A challenge is a 32 bit integer number in big endian order. Below the function
+ gen_challenge() returns a random 32 bit integer used as a challenge.
+
+
+ A digest is a (16 bytes) MD5 hash of the Challenge (as text) concatenated
+ with the cookie (as text). Below, the function gen_digest(Challenge, Cookie)
+ generates a digest as described above.
+
+
+ An out_cookie is the cookie used in outgoing communication to a certain node,
+ so that A's out_cookie for B should correspond with B's in_cookie for A and
+ the other way around. A's out_cookie for B and A's in_cookie for B need NOT
+ be the same. Below the function out_cookie(Node) returns the current
+ node's out_cookie for Node.
+
+
+ An in_cookie is the cookie expected to be used by another node when
+ communicating with us, so that A's in_cookie for B corresponds with B's
+ out_cookie for A. Below the function in_cookie(Node) returns the current
+ node's in_cookie for Node.
+
+
+ The cookies are text strings that can be viewed as passwords.
+
+
+ Every message in the handshake starts with a 16 bit big endian integer
+ which contains the length of the message (not counting the two initial bytes).
+ In erlang this corresponds to the gen_tcp option {packet, 2}. Note that after
+ the handshake, the distribution switches to 4 byte packet headers.
+
+
+
+
+ The Handshake in Detail
+
+ Imagine two nodes, node A, which initiates the handshake and node B, which
+ accepts the connection.
+
+
+ 1) connect/accept
+
A connects to B via TCP/IP and B accepts the connection.
+ 2) send_name/receive_name
+
A sends an initial identification to B. B receives the message.
+ The message looks like this (every "square" being one byte and the packet
+ header removed):
+
+ The 'n' is just a message tag.
+ Version0 and Version1 is the distribution version selected by node A,
+ based on information from EPMD. (16 bit big endian)
+ Flag0 ... Flag3 are capability flags, the capabilities defined in
+ $ERL_TOP/lib/kernel/include/dist.hrl.
+ (32 bit big endian)
+ Name0 ... NameN is the full nodename of A, as a string of bytes (the
+ packet length denotes how long it is).
+
+ 3) recv_status/send_status
+
B sends a status message to A, which indicates
+ if the connection is allowed. The following status codes are defined:
+
+ ok
+ The handshake will continue.
+ ok_simultaneous
+ The handshake will continue, but A is informed that B
+ has another ongoing connection attempt that will be
+ shut down (simultaneous connect where A's name is
+ greater than B's name, compared literally).
+ nok
+ The handshake will not continue, as B already has an ongoing handshake
+ which it itself has initiated. (simultaneous connect where B's name is
+ greater than A's).
+ not_allowed
+ The connection is disallowed for some (unspecified) security
+ reason.
+ alive
+ A connection to the node is already active, which either means
+ that node A is confused or that the TCP connection breakdown
+ of a previous node with this name has not yet reached node B.
+ See 3B below.
+
+
+ 's' is the message tag Status0 ... StatusN is the status as a string (not terminated)
+
+
+ 3B) send_status/recv_status
+
If status was 'alive', node A will answer with
+ another status message containing either 'true' which means that the
+ connection should continue (The old connection from this node is broken), or
+ 'false', which simply means that the connection should be closed, the
+ connection attempt was a mistake.
+ 4) recv_challenge/send_challenge
+
If the status was ok or ok_simultaneous,
+ The handshake continues with B sending A another message, the challenge.
+ The challenge contains the same type of information as the "name" message
+ initially sent from A to B, with the addition of a 32 bit challenge:
+ Where 'r' is the tag, Chal0 ... Chal3 is A's challenge for B to handle and
+ Dige0 ... Dige15 is the digest that A constructed from the challenge B sent
+ in the previous step.
+
+ 6) recv_challenge_ack/send_challenge_ack
+
B checks that the digest received from A is correct and generates a
+ digest from the challenge received from A. The digest is then sent to A. The
+ message looks like this:
+
+
+
+ The Currently Defined Distribution Flags
+
+ Currently (OTP-R16) the following capability flags are defined:
+
+
+%% The node should be published and part of the global namespace
+-define(DFLAG_PUBLISHED,1).
+
+%% The node implements an atom cache (obsolete)
+-define(DFLAG_ATOM_CACHE,2).
+
+%% The node implements extended (3 * 32 bits) references. This is
+%% required today. If not present connection will be refused.
+-define(DFLAG_EXTENDED_REFERENCES,4).
+
+%% The node implements distributed process monitoring.
+-define(DFLAG_DIST_MONITOR,8).
+
+%% The node uses separate tag for fun's (lambdas) in the distribution protocol.
+-define(DFLAG_FUN_TAGS,16#10).
+
+%% The node implements distributed named process monitoring.
+-define(DFLAG_DIST_MONITOR_NAME,16#20).
+
+%% The (hidden) node implements atom cache (obsolete)
+-define(DFLAG_HIDDEN_ATOM_CACHE,16#40).
+
+%% The node understand new fun-tags
+-define(DFLAG_NEW_FUN_TAGS,16#80).
+
+%% The node is capable of handling extended pids and ports. This is
+%% required today. If not present connection will be refused.
+-define(DFLAG_EXTENDED_PIDS_PORTS,16#100).
+
+%%
+-define(DFLAG_EXPORT_PTR_TAG,16#200).
+
+%%
+-define(DFLAG_BIT_BINARIES,16#400).
+
+%% The node understands new float format
+-define(DFLAG_NEW_FLOATS,16#800).
+
+%%
+-define(DFLAG_UNICODE_IO,16#1000).
+
+%% The node implements atom cache in distribution header.
+-define(DFLAG_DIST_HDR_ATOM_CACHE,16#2000).
+
+%% The node understand the SMALL_ATOM_EXT tag
+-define(DFLAG_SMALL_ATOM_TAGS, 16#4000).
+
+%% The node understand UTF-8 encoded atoms
+-define(DFLAG_UTF8_ATOMS, 16#10000).
+
+
+
diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml
index fd2da2cfe3..28afea8b29 100644
--- a/erts/doc/src/erl_ext_dist.xml
+++ b/erts/doc/src/erl_ext_dist.xml
@@ -119,10 +119,39 @@
Data
+
+
+
As of ERTS version 5.10 (OTP-R16) support
+ for UTF-8 encoded atoms has been introduced in the external format.
+ However, only characters that can be encoded using Latin1 (ISO-8859-1)
+ are currently supported in atoms. The support for UTF-8 encoded atoms
+ in the external format has been implemented in order to be able to support
+ all Unicode characters in atoms in some future release. Full
+ support for Unicode atoms will not happen before OTP-R18, and might
+ be introduced even later than that. Until full Unicode support for
+ atoms has been introduced, it is an error to pass atoms containing
+ characters that cannot be encoded in Latin1, and the behavior is
+ undefined.
+
When the
+ DFLAG_UTF8_ATOMS
+ distribution flag has been exchanged between both nodes in the
+ distribution handshake,
+ all atoms in the distribution header will be encoded in UTF-8; otherwise,
+ all atoms in the distribution header will be encoded in Latin1. The two
+ new tags ATOM_UTF8_EXT, and
+ SMALL_ATOM_UTF8_EXT
+ will only be used if the DFLAG_UTF8_ATOMS distribution flag has
+ been exchanged between nodes, or if an atom containing characters
+ that cannot be encoded in Latin1 is encountered.
+
+
The maximum number of allowed characters in an atom is 255. In the
+ UTF-8 case each character may need 4 bytes to be encoded.
+
+
-
-
+
+ Distribution header
As of erts version 5.7.2 the old atom cache protocol was
@@ -219,8 +248,7 @@
The least significant bit in that half byte is the LongAtoms
flag. If it is set, 2 bytes are used for atom lengths instead of
- 1 byte in the distribution header. However, the current emulator
- cannot handle long atoms, so it will currently always be 0.
+ 1 byte in the distribution header.
After the Flags field follow the AtomCacheRefs. The
@@ -247,15 +275,25 @@
InternalSegmentIndex together with the SegmentIndex
completely identify the location of an atom cache entry in the
- atom cache. Length is number of one byte characters that
- the atom text consists of. Length is a two byte big endian integer
+ atom cache. Length is number of bytes that AtomText
+ consists of. Length is a two byte big endian integer
if the LongAtoms flag has been set, otherwise a one byte
- integer. Subsequent CachedAtomRefs with the same
+ integer. When the
+ DFLAG_UTF8_ATOMS
+ distribution flag has been exchanged between both nodes in the
+ distribution handshake,
+ characters in AtomText is encoded in UTF-8; otherwise,
+ encoded in Latin1. Subsequent CachedAtomRefs with the same
SegmentIndex and InternalSegmentIndex as this
NewAtomCacheRef will refer to this atom until a new
NewAtomCacheRef with the same SegmentIndex
and InternalSegmentIndex appear.
+
+ For more information on encoding of atoms, see
+ note on UTF-8 encoded atoms
+ in the beginning of this document.
+
If the NewCacheEntryFlag for the next AtomCacheRef
has not been set, a CachedAtomRef on the following format
@@ -383,9 +421,9 @@
An atom is stored with a 2 byte unsigned length in big-endian order,
- followed by Len numbers of 8 bit characters that forms the
- AtomName.
- Note: The maximum allowed value for Len is 255.
+ followed by Len numbers of 8 bit Latin1 characters that forms
+ the AtomName.
+ Note: The maximum allowed value for Len is 255.
@@ -754,12 +792,14 @@
An atom is stored with a 1 byte unsigned length,
- followed by Len numbers of 8 bit characters that
+ followed by Len numbers of 8 bit Latin1 characters that
forms the AtomName. Longer atoms can be represented
by ATOM_EXT. Note
the SMALL_ATOM_EXT was introduced in erts version 5.7.2 and
- require a small atom distribution flag exchanged in the distribution
- handshake.
+ require an exchange of the
+ DFLAG_SMALL_ATOM_TAGS
+ distribution flag in the
+ distribution handshake.
@@ -1007,7 +1047,62 @@
This term is used in minor version 1 of the external format.
+
+
+ ATOM_UTF8_EXT
+
+
+
+ 1
+ 2
+ Len
+
+
+ 118
+ Len
+ AtomName
+
+
+
+ An atom is stored with a 2 byte unsigned length in big-endian order,
+ followed by Len bytes containing the AtomName encoded
+ in UTF-8.
+
+
+ For more information on encoding of atoms, see
+ note on UTF-8 encoded atoms
+ in the beginning of this document.
+
+
+
+
+ SMALL_ATOM_UTF8_EXT
+
+
+
+ 1
+ 1
+ Len
+
+
+ 119
+ Len
+ AtomName
+
+
+
+ An atom is stored with a 1 byte unsigned length,
+ followed by Len bytes containing the AtomName encoded
+ in UTF-8. Longer atoms encoded in UTF-8 can be represented using
+ ATOM_UTF8_EXT.
+
+
+ For more information on encoding of atoms, see
+ note on UTF-8 encoded atoms
+ in the beginning of this document.
+
+
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 5002c48ca1..0077c0096c 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -277,7 +277,9 @@
the binary contains Unicode characters greater than 16#FF.
In a future release, such Unicode characters might be allowed
and binary_to_atom(Binary, utf8)
- will not fail in that case.
+ will not fail in that case. For more information on Unicode support in atoms
+ see note on UTF-8 encoded atoms
+ in the chapter about the external term format in the ERTS User's Guide.
Returns the atom whose text representation is String.
String may only contain ISO-latin-1
- characterns (i.e. numbers below 256) as the current
+ characters (i.e. numbers below 256) as the current
implementation does not allow unicode characters >= 256 in
- atoms.
+ atoms. For more information on Unicode support in atoms
+ see note on UTF-8 encoded atoms
+ in the chapter about the external term format in the ERTS User's Guide.
> list_to_atom("Erlang").
'Erlang'
diff --git a/lib/kernel/internal_doc/distribution_handshake.txt b/lib/kernel/internal_doc/distribution_handshake.txt
index 6a3ee22ed3..d00c4ceb02 100644
--- a/lib/kernel/internal_doc/distribution_handshake.txt
+++ b/lib/kernel/internal_doc/distribution_handshake.txt
@@ -1,215 +1 @@
-HOW THE DISTRIBUTION HANDSHAKE WORKS
-------------------------------------
-
-This document describes the distribution handshake introduced in
-the R6 release of Erlang/OTP.
-
-GENERAL
--------
-
-The TCP/IP distribution uses a handshake which expects a
-connection based protocol, i.e. the protocol does not include
-any authentication after the handshake procedure.
-
-This is not entirely safe, as it is vulnerable against takeover
-attacks, but it is a tradeoff between fair safety and performance.
-
-The cookies are never sent in cleartext and the handshake procedure
-expects the client (called A) to be the first one to prove that it can
-generate a sufficient digest. The digest is generated with the
-MD5 message digest algorithm and the challenges are expected to be very
-random numbers.
-
-DEFINITIONS
------------
-
-A challenge is a 32 bit integer number in big endian order. Below the function
-gen_challenge() returns a random 32 bit integer used as a challenge.
-
-A digest is a (16 bytes) MD5 hash of [the Challenge (as text) concatenated
-with the cookie (as text)]. Below, the function gen_digest(Challenge, Cookie)
-generates a digest as described above.
-
-An out_cookie is the cookie used in outgoing communication to a certain node,
-so that A's out_cookie for B should correspond with B's in_cookie for A and
-the other way around. A's out_cookie for B and A's in_cookie for B need *NOT*
-be the same. Below the function out_cookie(Node) returns the current
-node's out_cookie for Node.
-
-An in_cookie is the cookie expected to be used by another node when
-communicating with us, so that A's in_cookie for B corresponds with B's
-out_cookie for A. Below the function in_cookie(Node) returns the current
-node's in_cookie for Node.
-
-The cookies are text strings that can be viewed as passwords.
-
-Every message in the handshake starts with a 16 bit big endian integer
-which contains the length of the message (not counting the two initial bytes).
-In erlang this corresponds to the gen_tcp option {packet, 2}. Note that after
-the handshake, the distribution switches to 4 byte packet headers.
-
-THE HANDSHAKE IN DETAIL
------------------------
-
-Imagine two nodes, node A, which initiates the handshake and node B, which
-accepts the connection.
-
-1) connect/accept: A connects to B via TCP/IP and B accepts the connection.
-
-2) send_name/receive_name: A sends an initial identification to B.
-B receives the message. The message looks
-like this (every "square" being one byte and the packet header removed):
-
-+---+--------+--------+-----+-----+-----+-----+-----+-----+-...-+-----+
-|'n'|Version0|Version1|Flag0|Flag1|Flag2|Flag3|Name0|Name1| ... |NameN|
-+---+--------+--------+-----+-----+-----+-----+-----+-----+-... +-----+
-
-The 'n' is just a message tag,
-Version0 & Version1 is the distribution version selected by node A,
- based on information from EPMD. (16 bit big endian)
-Flag0 ... Flag3 are capability flags, the capabilities defined in dist.hrl.
- (32 bit big endian)
-Name0 ... NameN is the full nodename of A, as a string of bytes (the
- packet length denotes how long it is).
-
-3) recv_status/send_status: B sends a status message to A, which indicates
-if the connection is allowed. Four different status codes are defined:
-ok: The handshake will continue.
-ok_simultaneous: The handshake will continue, but A is informed that B
- has another ongoing connection attempt that will be
- shut down (simultaneous connect where A's name is
- greater than B's name, compared literally),
-nok: The handshake will not continue, as B already has an ongoing handshake
- which it itself has initiated. (simultaneous connect where B's name is
- greater than A's)
-not_allowed: The connection is disallowed for some (unspecified) security
- reason.
-alive: A connection to the node is already active, which either means
- that node A is confused or that the TCP connection breakdown
- of a previous node with this name has not yet reached node B.
- See 3B below.
-
-This is the format of the status message:
-
-+---+-------+-------+-...-+-------+
-|'s'|Status0|Status1| ... |StatusN|
-+---+-------+-------+-...-+-------+
-
-'s' is the message tag
-Status0 ... StatusN is the status as a string (not terminated)
-
-3B) send_status/recv_status: If status was 'alive', node A will answer with
-another status message containing either 'true' which means that the
-connection should continue (The old connection from this node is broken), or
-'false', which simply means that the connection should be closed, the
-connection attempt was a mistake.
-
-4) recv_challenge/send_challenge: If the status was 'ok' or 'ok_simultaneous',
-The handshake continues with B sending A another message, the challenge.
-The challenge contains the same type of information as the "name" message
-initially sent from A to B, with the addition of a 32 bit challenge:
-
-+---+--------+--------+-----+-----+-----+-----+-----+-----+-----+-----+---
-|'n'|Version0|Version1|Flag0|Flag1|Flag2|Flag3|Chal0|Chal1|Chal2|Chal3|
-+---+--------+--------+-----+-----+-----+-----+-----+-----+-----+-----+---
- ------+-----+-...-+-----+
- Name0|Name1| ... |NameN|
- ------+-----+-... +-----+
-
-Where Chal0 ... Chal3 is the challenge as a 32 bit big endian integer
-and the other fields are B's version, flags and full nodename.
-
-5) send_challenge_reply/recv_challenge_reply: Now A has generated
-a digest and its own challenge. Those are sent together in a package
-to B:
-
-+---+-----+-----+-----+-----+-----+-----+-----+-----+-...-+------+
-|'r'|Chal0|Chal1|Chal2|Chal3|Dige0|Dige1|Dige2|Dige3| ... |Dige15|
-+---+-----+-----+-----+-----+-----+-----+-----+-----+-...-+------+
-
-Where 'r' is the tag, Chal0 ... Chal3 is A's challenge for B to handle and
-Dige0 ... Dige15 is the digest that A constructed from the challenge B sent
-in the previous step.
-
-6) recv_challenge_ack/send_challenge_ack: B checks that the digest received
-from A is correct and generates a digest from the challenge received from
-A. The digest is then sent to A. The message looks like this:
-
-+---+-----+-----+-----+-----+-...-+------+
-|'a'|Dige0|Dige1|Dige2|Dige3| ... |Dige15|
-+---+-----+-----+-----+-----+-...-+------+
-
-Where 'a' is the tag and Dige0 ... Dige15 is the digest calculated by B
-for A's challenge.
-
-7) A checks the digest from B and the connection is up.
-
-SEMIGRAPHIC VIEW
-----------------
-
-A (initiator) B (acceptor)
-
-TCP connect ----------------------------------------->
- TCP accept
-
-send_name ----------------------------------------->
- recv_name
-
- <---------------------------------------- send_status
-recv_status
-(if status was 'alive'
- send_status - - - - - - - - - - - - - - - - - - - ->
- recv_status)
- ChB = gen_challenge()
- (ChB)
- <---------------------------------------- send_challenge
-recv_challenge
-
-ChA = gen_challenge(),
-OCA = out_cookie(B),
-DiA = gen_digest(ChB,OCA)
- (ChA, DiA)
-send_challenge_reply -------------------------------->
- recv_challenge_reply
- ICB = in_cookie(A),
- check:
- DiA == gen_digest
- (ChB, ICB) ?
- - if OK:
- OCB = out_cookie(A),
- DiB = gen_digest
- (DiB) (ChA, OCB)
- <----------------------------------------- send_challenge_ack
-recv_challenge_ack DONE
-ICA = in_cookie(B), - else
-check: CLOSE
-DiB == gen_digest(ChA,ICA) ?
-- if OK
- DONE
-- else
- CLOSE
-
-
-THE CURRENTLY DEFINED FLAGS
----------------------------
-Currently the following capability flags are defined:
-
-%% The node should be published and part of the global namespace
--define(DFLAG_PUBLISHED,1).
-
-%% The node implements an atom cache
--define(DFLAG_ATOM_CACHE,2).
-
-%% The node implements extended (3 * 32 bits) references
--define(DFLAG_EXTENDED_REFERENCES,4).
-
-%% The node implements distributed process monitoring.
--define(DFLAG_DIST_MONITOR,8).
-
-%% The node uses separate tag for fun's (lambdas) in the distribution protocol.
--define(DFLAG_FUN_TAGS,16).
-
-An R6 erlang node implements all of the above, while a C or Java node only
-implements DFLAG_EXTENDED_REFERENCES.
-
-Last modified 1999-11-08 -- Patrik Nyblom, OTP
+This information has been moved to the "Distribution Protocol" chapter of "ERTS User's Guide".
--
cgit v1.2.3
From 6347bc07dac9f958b4ac3cb751dabae08d350b8b Mon Sep 17 00:00:00 2001
From: Rickard Green
Date: Sat, 19 Jan 2013 00:46:21 +0100
Subject: Fix merge conflict with hasse
---
lib/stdlib/src/io_lib.erl | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/stdlib/src/io_lib.erl b/lib/stdlib/src/io_lib.erl
index 0f1f417d01..513d904c39 100644
--- a/lib/stdlib/src/io_lib.erl
+++ b/lib/stdlib/src/io_lib.erl
@@ -276,7 +276,7 @@ write_atom(Atom) ->
Chars = atom_to_list(Atom),
case quote_atom(Atom, Chars) of
true ->
- write_unicode_string(Chars, $'); %'
+ write_string(Chars, $'); %'
false ->
Chars
end.
--
cgit v1.2.3
From 5ee4c7136ce8f311e3d3384ae0feb29bcbff6e85 Mon Sep 17 00:00:00 2001
From: Rickard Green
Date: Mon, 21 Jan 2013 15:44:33 +0100
Subject: workaround...
---
erts/emulator/test/distribution_SUITE.erl | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl
index e7bd8d19c3..7edd92d56f 100644
--- a/erts/emulator/test/distribution_SUITE.erl
+++ b/erts/emulator/test/distribution_SUITE.erl
@@ -1240,9 +1240,9 @@ contended_atom_cache_entry_test(Config, Type) ->
get_conflicting_unicode_atoms(CIX,
ProcessPairs)
end,
- io:format("Testing with the following atoms all using "
- "cache index ~p:~n ~p~n",
- [CIX, TestAtoms]),
+% io:format("Testing with the following atoms all using "
+% "cache index ~p:~n ~p~n",
+% [CIX, TestAtoms]),
Ps = lists:map(
fun (A) ->
Ref = make_ref(),
--
cgit v1.2.3
From 3aa60cc472bc330dbe9360eb27a1f340b7e23dc6 Mon Sep 17 00:00:00 2001
From: Rickard Green
Date: Tue, 22 Jan 2013 18:35:35 +0100
Subject: Add UTF-8 node name support for epmd
---
erts/epmd/src/epmd_cli.c | 13 ++-
erts/epmd/src/epmd_int.h | 20 +++-
erts/epmd/src/epmd_srv.c | 215 ++++++++++++++++++++++++++++++++++++------
erts/epmd/test/epmd_SUITE.erl | 47 ++++++++-
4 files changed, 254 insertions(+), 41 deletions(-)
diff --git a/erts/epmd/src/epmd_cli.c b/erts/epmd/src/epmd_cli.c
index 74408e3ebe..1d4de64b63 100644
--- a/erts/epmd/src/epmd_cli.c
+++ b/erts/epmd/src/epmd_cli.c
@@ -22,6 +22,7 @@
#endif
#include "epmd.h" /* Renamed from 'epmd_r4.h' */
#include "epmd_int.h"
+#include "erl_printf.h" /* erts_snprintf */
/* forward declarations */
@@ -114,16 +115,18 @@ void epmd_call(EpmdVars *g,int what)
epmd_cleanup_exit(g,1);
}
j = ntohl(i);
- if (!g->silent)
- printf("epmd: up and running on port %d with data:\n", j);
+ if (!g->silent) {
+ rval = erts_snprintf(buf, OUTBUF_SIZE,
+ "epmd: up and running on port %d with data:\n", j);
+ write(1, buf, rval);
+ }
while(1) {
- if ((rval = read(fd,buf,1)) <= 0) {
+ if ((rval = read(fd,buf,OUTBUF_SIZE)) <= 0) {
close(fd);
epmd_cleanup_exit(g,0);
}
- buf[rval] = '\0';
if (!g->silent)
- printf("%s",buf);
+ write(1, buf, rval); /* Potentially UTF-8 encoded */
}
}
diff --git a/erts/epmd/src/epmd_int.h b/erts/epmd/src/epmd_int.h
index 14d05c3f19..b25412c905 100644
--- a/erts/epmd/src/epmd_int.h
+++ b/erts/epmd/src/epmd_int.h
@@ -226,13 +226,25 @@
#define MAX_UNREG_COUNT 1000
#define DEBUG_MAX_UNREG_COUNT 5
-/* Maximum length of a node name == atom name */
-#define MAXSYMLEN 255
+/*
+ * Maximum length of a node name == atom name
+ * 255 characters; UTF-8 encoded -> max 255*4
+ */
+#define MAXSYMLEN (255*4)
#define MAX_LISTEN_SOCKETS 16
-#define INBUF_SIZE 1024
-#define OUTBUF_SIZE 1024
+/*
+ * Largest request: ALIVE2_REQ
+ * 2 + 13 + 2*MAXSYMLEN
+ * Largest response: PORT2_RESP
+ * 2 + 14 + 2*MAXSYMLEN
+ *
+ * That is, 3*MAXSYMLEN should be large enough
+ */
+
+#define INBUF_SIZE (3*MAXSYMLEN)
+#define OUTBUF_SIZE (3*MAXSYMLEN)
#define get_int16(s) ((((unsigned char*) (s))[0] << 8) | \
(((unsigned char*) (s))[1]))
diff --git a/erts/epmd/src/epmd_srv.c b/erts/epmd/src/epmd_srv.c
index 36565b7438..2a74c4955e 100644
--- a/erts/epmd/src/epmd_srv.c
+++ b/erts/epmd/src/epmd_srv.c
@@ -73,7 +73,7 @@ static int conn_open(EpmdVars*,int);
static int conn_close_fd(EpmdVars*,int);
static void node_init(EpmdVars*);
-static Node *node_reg2(EpmdVars*,char*, int, int, unsigned char, unsigned char, int, int, int, char*);
+static Node *node_reg2(EpmdVars*, int, char*, int, int, unsigned char, unsigned char, int, int, int, char*);
static int node_unreg(EpmdVars*,char*);
static int node_unreg_sock(EpmdVars*,int);
@@ -81,6 +81,113 @@ static int reply(EpmdVars*,int,char *,int);
static void dbg_print_buf(EpmdVars*,char *,int);
static void print_names(EpmdVars*);
+static int is_same_str(char *x, char *y)
+{
+ int i = 0;
+ /*
+ * Using strcmp() == 0 is probably ok, but just to be sure,
+ * since we got UTF-8 strings, we do it ourselves.
+ *
+ * We assume null-terminated correctly encoded UTF-8.
+ */
+ while (x[i] == y[i]) {
+ if (x[i] == '\0')
+ return 1;
+ i++;
+ }
+ return 0;
+}
+
+static int copy_str(char *x, char *y)
+{
+ int i = 0;
+ /*
+ * Using strcpy() is probably ok, but just to be sure,
+ * since we got UTF-8 strings, we do it ourselves.
+ *
+ * We assume null-terminated correctly encoded UTF-8.
+ */
+ while (1) {
+ x[i] = y[i];
+ if (y[i] == '\0')
+ return i;
+ i++;
+ }
+}
+
+static int length_str(char *x)
+{
+ int i = 0;
+ /*
+ * Using strlen is probably ok, but just to be sure,
+ * since we got UTF-8 strings, we do it ourselves.
+ *
+ * We assume null-terminated correctly encoded UTF-8.
+ */
+ while (x[i])
+ i++;
+ return i;
+}
+
+static int verify_utf8(const char *src, int sz, int null_term)
+{
+ unsigned char *source = (unsigned char *) src;
+ int size = sz;
+ int num_chars = 0;
+ while (size) {
+ if (null_term && (*source) == 0)
+ return num_chars;
+ if (((*source) & ((unsigned char) 0x80)) == 0) {
+ source++;
+ --size;
+ } else if (((*source) & ((unsigned char) 0xE0)) == 0xC0) {
+ if (size < 2)
+ return -1;
+ if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+ ((*source) < 0xC2) /* overlong */) {
+ return -1;
+ }
+ source += 2;
+ size -= 2;
+ } else if (((*source) & ((unsigned char) 0xF0)) == 0xE0) {
+ if (size < 3)
+ return -1;
+ if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+ ((source[2] & ((unsigned char) 0xC0)) != 0x80) ||
+ (((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
+ return -1;
+ }
+ if ((((*source) & ((unsigned char) 0xF)) == 0xD) &&
+ ((source[1] & 0x20) != 0)) {
+ return -1;
+ }
+ source += 3;
+ size -= 3;
+ } else if (((*source) & ((unsigned char) 0xF8)) == 0xF0) {
+ if (size < 4)
+ return -1;
+ if (((source[1] & ((unsigned char) 0xC0)) != 0x80) ||
+ ((source[2] & ((unsigned char) 0xC0)) != 0x80) ||
+ ((source[3] & ((unsigned char) 0xC0)) != 0x80) ||
+ (((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
+ return -1;
+ }
+ if ((((*source) & ((unsigned char)0x7)) > 0x4U) ||
+ ((((*source) & ((unsigned char)0x7)) == 0x4U) &&
+ ((source[1] & ((unsigned char)0x3F)) > 0xFU))) {
+ return -1;
+ }
+ source += 4;
+ size -= 4;
+ } else {
+ return -1;
+ }
+ ++num_chars;
+ }
+ return num_chars;
+}
+
+
static EPMD_INLINE void select_fd_set(EpmdVars* g, int fd)
{
FD_SET(fd, &g->orig_read_mask);
@@ -525,10 +632,11 @@ static void do_request(g, fd, s, buf, bsize)
}
name = &buf[11];
name[namelen]='\000';
+
extra = &buf[11+namelen+2];
extra[extralen]='\000';
wbuf[0] = EPMD_ALIVE2_RESP;
- if ((node = node_reg2(g, name, fd, eport, nodetype, protocol,
+ if ((node = node_reg2(g, namelen, name, fd, eport, nodetype, protocol,
highvsn, lowvsn, extralen, extra)) == NULL) {
wbuf[1] = 1; /* error */
put_int16(99, wbuf+2);
@@ -573,22 +681,28 @@ static void do_request(g, fd, s, buf, bsize)
{
char *name = &buf[1]; /* Points to node name */
+ int nsz;
Node *node;
-
+
+ nsz = verify_utf8(name, bsize, 0);
+ if (nsz < 1 || 255 < nsz) {
+ dbg_printf(g,0,"invalid node name in PORT2_REQ");
+ return;
+ }
+
wbuf[0] = EPMD_PORT2_RESP;
for (node = g->nodes.reg; node; node = node->next) {
int offset;
- if (strcmp(node->symname, name) == 0) {
+ if (is_same_str(node->symname, name)) {
wbuf[1] = 0; /* ok */
put_int16(node->port,wbuf+2);
wbuf[4] = node->nodetype;
wbuf[5] = node->protocol;
put_int16(node->highvsn,wbuf+6);
put_int16(node->lowvsn,wbuf+8);
- put_int16(strlen(node->symname),wbuf+10);
+ put_int16(length_str(node->symname),wbuf+10);
offset = 12;
- strcpy(wbuf + offset,node->symname);
- offset += strlen(node->symname);
+ offset += copy_str(wbuf + offset,node->symname);
put_int16(node->extralen,wbuf + offset);
offset += 2;
memcpy(wbuf + offset,node->extra,node->extralen);
@@ -629,15 +743,22 @@ static void do_request(g, fd, s, buf, bsize)
for (node = g->nodes.reg; node; node = node->next)
{
- int len;
+ int len = 0;
+ int r;
/* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
change in syntax will break < OTP R3A */
- erts_snprintf(wbuf, sizeof(wbuf), "name %s at port %d\n",node->symname, node->port);
- len = strlen(wbuf);
+ len += copy_str(&wbuf[len], "name ");
+ len += copy_str(&wbuf[len], node->symname);
+ r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+ " at port %d\n", node->port);
+ if (r < 0)
+ goto failed_names_resp;
+ len += r;
if (reply(g, fd, wbuf, len) != len)
{
+ failed_names_resp:
dbg_tty_printf(g,1,"failed to send NAMES_RESP");
return;
}
@@ -665,16 +786,22 @@ static void do_request(g, fd, s, buf, bsize)
for (node = g->nodes.reg; node; node = node->next)
{
- int len;
+ int len = 0, r;
/* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
change in syntax will break < OTP R3A */
- erts_snprintf(wbuf, sizeof(wbuf), "active name <%s> at port %d, fd = %d\n",
- node->symname, node->port, node->fd);
- len = strlen(wbuf) + 1;
- if (reply(g, fd,wbuf,len) != len)
+ len += copy_str(&wbuf[len], "active name <");
+ len += copy_str(&wbuf[len], node->symname);
+ r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+ "> at port %d, fd = %d\n",
+ node->port, node->fd);
+ if (r < 0)
+ goto failed_dump_resp;
+ len += r + 1;
+ if (reply(g, fd,wbuf,len) != len)
{
+ failed_dump_resp:
dbg_tty_printf(g,1,"failed to send DUMP_RESP");
return;
}
@@ -682,16 +809,22 @@ static void do_request(g, fd, s, buf, bsize)
for (node = g->nodes.unreg; node; node = node->next)
{
- int len;
+ int len = 0, r;
/* CAREFUL!!! These are parsed by "erl_epmd.erl" so a slight
change in syntax will break < OTP R3A */
- erts_snprintf(wbuf, sizeof(wbuf), "old/unused name <%s>, port = %d, fd = %d \n",
- node->symname,node->port, node->fd);
- len = strlen(wbuf) + 1;
- if (reply(g, fd,wbuf,len) != len)
+ len += copy_str(&wbuf[len], "old/unused name <");
+ len += copy_str(&wbuf[len], node->symname);
+ r = erts_snprintf(&wbuf[len], sizeof(wbuf)-len,
+ ">, port = %d, fd = %d \n",
+ node->port, node->fd);
+ if (r < 0)
+ goto failed_dump_resp2;
+ len += r + 1;
+ if (reply(g, fd,wbuf,len) != len)
{
+ failed_dump_resp2:
dbg_tty_printf(g,1,"failed to send DUMP_RESP");
return;
}
@@ -933,7 +1066,7 @@ static int node_unreg(EpmdVars *g,char *name)
Node *node = g->nodes.reg; /* Point to first node */
for (; node; prev = &node->next, node = node->next)
- if (strcmp(node->symname, name) == 0)
+ if (is_same_str(node->symname, name))
{
dbg_tty_printf(g,1,"unregistering '%s:%d', port %d",
node->symname, node->creation, node->port);
@@ -1013,6 +1146,7 @@ static int node_unreg_sock(EpmdVars *g,int fd)
*/
static Node *node_reg2(EpmdVars *g,
+ int namelen,
char* name,
int fd,
int port,
@@ -1025,6 +1159,7 @@ static Node *node_reg2(EpmdVars *g,
{
Node *prev; /* Point to previous node or NULL */
Node *node; /* Point to first node */
+ int sz;
/* Can be NULL; means old style */
if (extra == NULL)
@@ -1032,21 +1167,47 @@ static Node *node_reg2(EpmdVars *g,
/* Fail if node name is too long */
- if (strlen(name) > MAXSYMLEN)
+
+ if (namelen > MAXSYMLEN)
{
- dbg_printf(g,0,"node name is too long (%d) %s", strlen(name), name);
+ too_long_name:
+ dbg_printf(g,0,"node name is too long (%d) %s", namelen, name);
return NULL;
}
+
+ sz = verify_utf8(name, namelen, 0);
+ if (sz > 255)
+ goto too_long_name;
+
+ if (sz < 0) {
+ dbg_printf(g,0,"invalid node name encoding");
+ return NULL;
+ }
+
if (extralen > MAXSYMLEN)
{
- dbg_printf(g,0,"extra data is too long (%d) %s", strlen(name), name);
+#if 0
+ too_long_extra:
+#endif
+ dbg_printf(g,0,"extra data is too long (%d) %s", extralen, extra);
return NULL;
}
+#if 0 /* Should we require valid utf8 here? */
+ sz = verify_utf8(extra, extralen, 0);
+ if (sz > 255)
+ goto too_long_extra;
+
+ if (sz < 0) {
+ dbg_printf(g,0,"invalid extra data encoding");
+ return NULL;
+ }
+#endif
+
/* Fail if it is already registered */
for (node = g->nodes.reg; node; node = node->next)
- if (strcmp(node->symname, name) == 0)
+ if (is_same_str(node->symname, name))
{
dbg_printf(g,0,"node name already occupied %s", name);
return NULL;
@@ -1058,7 +1219,7 @@ static Node *node_reg2(EpmdVars *g,
prev = NULL;
for (node = g->nodes.unreg; node; prev = node, node = node->next)
- if (strcmp(node->symname, name) == 0)
+ if (is_same_str(node->symname, name))
{
dbg_tty_printf(g,1,"reusing slot with same name '%s'", node->symname);
@@ -1126,7 +1287,7 @@ static Node *node_reg2(EpmdVars *g,
node->lowvsn = lowvsn;
node->extralen = extralen;
memcpy(node->extra,extra,extralen);
- strcpy(node->symname,name);
+ copy_str(node->symname,name);
select_fd_set(g, fd);
if (highvsn == 0) {
diff --git a/erts/epmd/test/epmd_SUITE.erl b/erts/epmd/test/epmd_SUITE.erl
index fd9969ae2b..fc0abef400 100644
--- a/erts/epmd/test/epmd_SUITE.erl
+++ b/erts/epmd/test/epmd_SUITE.erl
@@ -45,6 +45,8 @@
register_names_1/1,
register_names_2/1,
register_duplicate_name/1,
+ unicode_name/1,
+ long_unicode_name/1,
get_port_nr/1,
slow_get_port_nr/1,
unregister_others_name_1/1,
@@ -107,7 +109,8 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
all() ->
[register_name, register_names_1, register_names_2,
- register_duplicate_name, get_port_nr, slow_get_port_nr,
+ register_duplicate_name, unicode_name, long_unicode_name,
+ get_port_nr, slow_get_port_nr,
unregister_others_name_1, unregister_others_name_2,
register_overflow, name_with_null_inside,
name_null_terminated, stupid_names_req, no_data,
@@ -197,6 +200,37 @@ register_duplicate_name(Config) when is_list(Config) ->
?line ok = close(Sock), % Unregister
ok.
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+unicode_name(doc) ->
+ ["Check that we can register and lookup a unicode name"];
+unicode_name(suite) ->
+ [];
+unicode_name(Config) when is_list(Config) ->
+ ok = epmdrun(),
+ NodeName = [16#1f608],
+ {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []),
+ {ok,NodeInfo} = port_please_v2(NodeName),
+ NodeName = NodeInfo#node_info.node_name,
+ ok = close(Sock),
+ ok.
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+long_unicode_name(doc) ->
+ ["Check that we can register and lookup a long unicode name"];
+long_unicode_name(suite) ->
+ [];
+long_unicode_name(Config) when is_list(Config) ->
+ ok = epmdrun(),
+ BaseChar = 16#1f600,
+ NodeName = lists:seq(BaseChar, BaseChar+200), % will be 800 bytes long
+ {ok,Sock} = register_node_v2(4711, 72, 0, 5, 5, NodeName, []),
+ {ok,NodeInfo} = port_please_v2(NodeName),
+ NodeName = NodeInfo#node_info.node_name,
+ ok = close(Sock),
+ ok.
+
% Internal function to register a node name, no close, i.e. unregister
register_node(Name) ->
@@ -205,9 +239,10 @@ register_node(Name,Port) ->
register_node_v2(Port,$M,0,5,5,Name,"").
register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) ->
+ Utf8Name = unicode:characters_to_binary(Name),
Req = [?EPMD_ALIVE2_REQ, put16(Port), NodeType, Prot,
put16(HVsn), put16(LVsn),
- size16(Name), Name,
+ put16(size(Utf8Name)), binary_to_list(Utf8Name),
size16(Extra), Extra],
case send_req(Req) of
{ok,Sock} ->
@@ -226,7 +261,8 @@ register_node_v2(Port, NodeType, Prot, HVsn, LVsn, Name, Extra) ->
% Internal function to fetch information about a node
port_please_v2(Name) ->
- case send_req([?EPMD_PORT_PLEASE2_REQ, Name]) of
+ case send_req([?EPMD_PORT_PLEASE2_REQ,
+ binary_to_list(unicode:characters_to_binary(Name))]) of
{ok,Sock} ->
case recv_until_sock_closes(Sock) of
{ok, Resp} ->
@@ -247,7 +283,7 @@ parse_port2_resp(Resp) ->
ELen:16,Extra:ELen/binary>> when Res =:= 0 ->
{ok, #node_info{port=Port,node_type=NodeType,prot=Prot,
hvsn=HVsn,lvsn=LVsn,
- node_name=binary_to_list(NodeName),
+ node_name=unicode:characters_to_list(NodeName),
extra=binary_to_list(Extra)}};
_Other ->
test_server:format("invalid port2 resp: ~p~n",
@@ -737,7 +773,7 @@ buffer_overrun_2(doc) ->
["Test security vulnerability in fake extra lengths in alive2_req"];
buffer_overrun_2(Config) when is_list(Config) ->
?line ok = epmdrun(),
- ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255,10000)],
+ ?line [false | Rest] = [hostile2(N) || N <- lists:seq(255*4,10000)],
?line true = alltrue(Rest),
ok.
hostile(N) ->
@@ -880,6 +916,7 @@ no_live_killing(Config) when is_list(Config) ->
?line close(Sock3),
ok.
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Terminate all tests with killing epmd.
--
cgit v1.2.3
From 8eb544073fe243a8935a54f83f9c9f1f7478e3c5 Mon Sep 17 00:00:00 2001
From: Sverker Eriksson
Date: Tue, 22 Jan 2013 17:20:17 +0100
Subject: erts: Fix bug in analyze_utf8 causing faulty latin1 detection
---
erts/emulator/beam/erl_unicode.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index c00293de89..883405d066 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1180,13 +1180,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
((*source) < 0xC2) /* overlong */) {
return ERTS_UTF8_ERROR;
}
- source += 2;
- size -= 2;
if (num_latin1_chars) {
latin1_count++;
if ((source[0] & ((byte) 0xFC)) != ((byte) 0xC0))
is_latin1 = 0;
}
+ source += 2;
+ size -= 2;
} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
if (size < 3) {
return ERTS_UTF8_INCOMPLETE;
--
cgit v1.2.3
From 1f4765cca4874fa92fcfad888fbe6d5f2fbf74d1 Mon Sep 17 00:00:00 2001
From: Sverker Eriksson
Date: Tue, 22 Jan 2013 19:25:36 +0100
Subject: erl_interface: even more utf8 atom stuff
---
lib/erl_interface/doc/src/ei.xml | 12 +++--
lib/erl_interface/src/connect/ei_connect.c | 2 +-
lib/erl_interface/src/encode/encode_atom.c | 59 +++++++++++++++++++-
lib/erl_interface/src/legacy/erl_connect.c | 2 +-
lib/erl_interface/src/misc/ei_format.c | 4 +-
lib/erl_interface/src/misc/ei_printterm.c | 2 +-
lib/erl_interface/src/misc/show_msg.c | 2 +-
lib/erl_interface/src/prog/ei_fake_prog.c | 6 +++
lib/erl_interface/test/ei_decode_encode_SUITE.erl | 63 +++++++++++-----------
.../ei_decode_encode_test.c | 16 ++++--
10 files changed, 122 insertions(+), 46 deletions(-)
diff --git a/lib/erl_interface/doc/src/ei.xml b/lib/erl_interface/doc/src/ei.xml
index 0b0b1eeb79..e9c7c644b5 100644
--- a/lib/erl_interface/doc/src/ei.xml
+++ b/lib/erl_interface/doc/src/ei.xml
@@ -94,7 +94,11 @@ enum erlang_char_encoding {
ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER
};
-
The character encoding used for atoms.
+
The character encoding used for atoms. ERLANG_ASCII represents 7-bit ASCII.
+ Latin1 and UTF8 are different extensions of 7-bit ASCII. All 7-bit ASCII characters
+ are valid Latin1 and UTF8 characters. ASCII and Latin1 both represent each character
+ by one byte. A UTF8 character can consist of one to four bytes. ERLANG_WHATEVER
+ is not an encoding but rather used as a wildcard.
@@ -256,11 +260,11 @@ enum erlang_char_encoding {
Encodes an atom in the binary format with character encoding
to_enc (latin1 or utf8).
The p parameter is the name of the atom with character encoding
- from_enc.
+ from_enc (ascii, latin1 or utf8).
The name must either be zero-terminated or a function variant with a len
parameter must be used.
-
The encoding will fail if the atom is too long or if it can not be represented
- with character encoding to_enc.
+
The encoding will fail if p is not a valid string in encoding from_enc,
+ if the string is too long or if it can not be represented with character encoding to_enc.
These functions were introduced in R16 release of Erlang/OTP as part of a first step
to support UTF8 atoms. Atoms encoded with ERLANG_UTF8
can not be decoded by earlier releases than R16.
The character encoding used for atoms. ERLANG_ASCII represents 7-bit ASCII.
Latin1 and UTF8 are different extensions of 7-bit ASCII. All 7-bit ASCII characters
are valid Latin1 and UTF8 characters. ASCII and Latin1 both represent each character
- by one byte. A UTF8 character can consist of one to four bytes. ERLANG_WHATEVER
- is not an encoding but rather used as a wildcard.
+ by one byte. A UTF8 character can consist of one to four bytes.
@@ -545,11 +544,13 @@ ei_x_encode_empty_list(&x);
want. The original encoding used in the
binary format (latin1 or utf8) can be obtained from *was. The actual encoding of the resulting string
(7-bit ascii, latin1 or utf8) can be obtained from *result. Both was and result can be NULL.
- *result may differ from want if want is ERLANG_WHATEVER or if
- *result turn out to be pure 7-bit ascii (compatible with both latin1 and utf8).
+
+ *result may differ from want if want is a bitwise-or'd combination like
+ ERLANG_LATIN1|ERLANG_UTF8 or if *result turn out to be pure 7-bit ascii
+ (compatible with both latin1 and utf8).
This function fails if the atom is too long for the buffer
or if it can not be represented with encoding want.
-
This functions was introduced in R16 release of Erlang/OTP as part of a first step
+
This function was introduced in R16 release of Erlang/OTP as part of a first step
to support UTF8 atoms.
diff --git a/lib/erl_interface/include/ei.h b/lib/erl_interface/include/ei.h
index 20e575f64d..2278a28adb 100644
--- a/lib/erl_interface/include/ei.h
+++ b/lib/erl_interface/include/ei.h
@@ -190,7 +190,12 @@ extern volatile int __erl_errno;
#define MAXATOMLEN_UTF8 (255*4 + 1)
#define MAXNODELEN EI_MAXALIVELEN+1+EI_MAXHOSTNAMELEN
-enum erlang_char_encoding { ERLANG_ASCII, ERLANG_LATIN1, ERLANG_UTF8, ERLANG_WHATEVER };
+enum erlang_char_encoding {
+ ERLANG_ASCII = 1,
+ ERLANG_LATIN1 = 2,
+ ERLANG_UTF8 = 4,
+ ERLANG_ANY = ERLANG_ASCII|ERLANG_LATIN1|ERLANG_UTF8
+};
/* a pid */
typedef struct {
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index 2ada418243..556c400cb3 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -58,7 +58,7 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
return -1;
}
- if (want_enc == got_enc || want_enc == ERLANG_WHATEVER || want_enc == ERLANG_ASCII) {
+ if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) {
int i, found_non_ascii = 0;
if (len >= destlen)
return -1;
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index 8bbe962396..044f17cb60 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -59,8 +59,7 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
char *s0 = s;
int offs;
- if (len >= MAXATOMLEN && (from_enc == ERLANG_LATIN1 ||
- from_enc == ERLANG_ASCII)) {
+ if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
return -1;
}
diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c
index 884e9d421b..4c45cebb02 100644
--- a/lib/erl_interface/src/legacy/erl_marshal.c
+++ b/lib/erl_interface/src/legacy/erl_marshal.c
@@ -662,7 +662,7 @@ static int read_atom(unsigned char** ext, Erl_Atom_data* a)
int offs = 0;
enum erlang_char_encoding enc;
int ret = ei_decode_atom_as((char*)*ext, &offs, buf, MAXATOMLEN_UTF8,
- ERLANG_WHATEVER, NULL, &enc);
+ ERLANG_LATIN1|ERLANG_UTF8, NULL, &enc);
*ext += offs;
if (ret == 0) {
@@ -674,11 +674,11 @@ static int read_atom(unsigned char** ext, Erl_Atom_data* a)
a->lenL = 0;
a->utf8 = NULL;
a->lenU = 0;
- if (enc == ERLANG_LATIN1 || enc == ERLANG_ASCII) {
+ if (enc & (ERLANG_LATIN1 | ERLANG_ASCII)) {
a->latin1 = clone;
a->lenL = i;
}
- if (enc == ERLANG_UTF8 || enc == ERLANG_ASCII) {
+ if (enc & (ERLANG_UTF8 | ERLANG_ASCII)) {
a->utf8 = clone;
a->lenU = i;
}
--
cgit v1.2.3
From d6e3e256b850050b7a86323b2948009d5fcc30a9 Mon Sep 17 00:00:00 2001
From: Lukas Larsson
Date: Wed, 23 Jan 2013 10:17:29 +0100
Subject: erl_interface: Fix bug when transcoding atoms from and to UTF8
---
lib/erl_interface/src/decode/decode_atom.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index 556c400cb3..9779ad3f35 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -76,8 +76,8 @@ int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
}
else {
int plen = (got_enc == ERLANG_LATIN1) ?
- utf8_to_latin1(p, s, len, destlen-1, res_encp) :
- latin1_to_utf8(p, s, len, destlen-1, res_encp);
+ latin1_to_utf8(p, s, len, destlen-1, res_encp) :
+ utf8_to_latin1(p, s, len, destlen-1, res_encp);
if (plen < 0) return -1;
if (p) p[plen] = 0;
}
--
cgit v1.2.3