aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
committerSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
commitb8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch)
tree708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/erl_interface/src
parente99df74bee7c245ec76678e336fcd09d4b51a089 (diff)
parentd6e3e256b850050b7a86323b2948009d5fcc30a9 (diff)
downloadotp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms: erl_interface: Fix bug when transcoding atoms from and to UTF8 erl_interface: Changed erlang_char_encoding interface erts: Testcase doing unicode atom printout with ~w erl_interface: even more utf8 atom stuff erts: Fix bug in analyze_utf8 causing faulty latin1 detection Add UTF-8 node name support for epmd workaround... Fix merge conflict with hasse UTF-8 atom documentation test case erl_interface: utf8 atoms continued Add utf8 atom distribution test cases atom fixes for NIFs and atom_to_binary UTF-8 support for distribution Implement UTF-8 atom support for jinterface erl_interface: Enable decode of unicode atoms stdlib: Fix printing of unicode atoms erts: Change internal representation of atoms to utf8 erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity Conflicts: erts/emulator/beam/io.c OTP-10753
Diffstat (limited to 'lib/erl_interface/src')
-rw-r--r--lib/erl_interface/src/connect/ei_connect.c7
-rw-r--r--lib/erl_interface/src/connect/ei_connect_int.h2
-rw-r--r--lib/erl_interface/src/connect/eirecv.c12
-rw-r--r--lib/erl_interface/src/decode/decode_atom.c159
-rw-r--r--lib/erl_interface/src/decode/decode_boolean.c36
-rw-r--r--lib/erl_interface/src/decode/decode_fun.c6
-rw-r--r--lib/erl_interface/src/decode/decode_pid.c14
-rw-r--r--lib/erl_interface/src/decode/decode_port.c13
-rw-r--r--lib/erl_interface/src/decode/decode_ref.c28
-rw-r--r--lib/erl_interface/src/encode/encode_atom.c154
-rw-r--r--lib/erl_interface/src/encode/encode_fun.c4
-rw-r--r--lib/erl_interface/src/encode/encode_pid.c24
-rw-r--r--lib/erl_interface/src/encode/encode_port.c23
-rw-r--r--lib/erl_interface/src/encode/encode_ref.c24
-rw-r--r--lib/erl_interface/src/legacy/erl_connect.c18
-rw-r--r--lib/erl_interface/src/legacy/erl_eterm.c158
-rw-r--r--lib/erl_interface/src/legacy/erl_eterm.h4
-rw-r--r--lib/erl_interface/src/legacy/erl_format.c20
-rw-r--r--lib/erl_interface/src/legacy/erl_malloc.c20
-rw-r--r--lib/erl_interface/src/legacy/erl_marshal.c413
-rw-r--r--lib/erl_interface/src/legacy/global_whereis.c11
-rw-r--r--lib/erl_interface/src/misc/ei_decode_term.c40
-rw-r--r--lib/erl_interface/src/misc/ei_format.c4
-rw-r--r--lib/erl_interface/src/misc/ei_printterm.c7
-rw-r--r--lib/erl_interface/src/misc/ei_x_encode.c21
-rw-r--r--lib/erl_interface/src/misc/get_type.c79
-rw-r--r--lib/erl_interface/src/misc/putget.h7
-rw-r--r--lib/erl_interface/src/misc/show_msg.c14
-rw-r--r--lib/erl_interface/src/prog/ei_fake_prog.c6
29 files changed, 830 insertions, 498 deletions
diff --git a/lib/erl_interface/src/connect/ei_connect.c b/lib/erl_interface/src/connect/ei_connect.c
index 34362b4b9f..4421bbb7fe 100644
--- a/lib/erl_interface/src/connect/ei_connect.c
+++ b/lib/erl_interface/src/connect/ei_connect.c
@@ -459,6 +459,7 @@ int ei_connect_xinit(ei_cnode* ec, const char *thishostname,
/* memmove(&ec->this_ipaddr, thisipaddr, sizeof(ec->this_ipaddr)); */
strcpy(ec->self.node,thisnodename);
+ ec->self.node_org_enc = ERLANG_LATIN1;
ec->self.num = 0;
ec->self.serial = 0;
ec->self.creation = creation;
@@ -1070,7 +1071,7 @@ int ei_rpc(ei_cnode* ec, int fd, char *mod, char *fun,
int i, index;
ei_term t;
erlang_msg msg;
- char rex[MAXATOMLEN+1];
+ char rex[MAXATOMLEN];
if (ei_rpc_to(ec, fd, mod, fun, inbuf, inbuflen) < 0) {
return -1;
@@ -1332,7 +1333,9 @@ static int send_name_or_challenge(int fd, char *nodename,
| DFLAG_EXTENDED_PIDS_PORTS
| DFLAG_FUN_TAGS
| DFLAG_NEW_FUN_TAGS
- | DFLAG_NEW_FLOATS));
+ | DFLAG_NEW_FLOATS
+ | DFLAG_SMALL_ATOM_TAGS
+ | DFLAG_UTF8_ATOMS));
if (f_chall)
put32be(s, challenge);
memcpy(s, nodename, strlen(nodename));
diff --git a/lib/erl_interface/src/connect/ei_connect_int.h b/lib/erl_interface/src/connect/ei_connect_int.h
index 3c42b49b82..81c384e38d 100644
--- a/lib/erl_interface/src/connect/ei_connect_int.h
+++ b/lib/erl_interface/src/connect/ei_connect_int.h
@@ -102,6 +102,8 @@ extern int h_errno;
#define DFLAG_NEW_FUN_TAGS 0x80
#define DFLAG_EXTENDED_PIDS_PORTS 0x100
#define DFLAG_NEW_FLOATS 0x800
+#define DFLAG_SMALL_ATOM_TAGS 0x4000
+#define DFLAG_UTF8_ATOMS 0x10000
ei_cnode *ei_fd_to_cnode(int fd);
int ei_distversion(int fd);
diff --git a/lib/erl_interface/src/connect/eirecv.c b/lib/erl_interface/src/connect/eirecv.c
index 86852f947d..075f78e3d2 100644
--- a/lib/erl_interface/src/connect/eirecv.c
+++ b/lib/erl_interface/src/connect/eirecv.c
@@ -108,7 +108,7 @@ ei_recv_internal (int fd,
switch (msg->msgtype) {
case ERL_SEND: /* { SEND, Cookie, ToPid } */
if (ei_tracelevel >= 4) show_this_msg = 1;
- if (ei_decode_atom(header,&index,msg->cookie)
+ if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg->to))
{
erl_errno = EIO;
@@ -120,8 +120,8 @@ ei_recv_internal (int fd,
case ERL_REG_SEND: /* { REG_SEND, From, Cookie, ToName } */
if (ei_tracelevel >= 4) show_this_msg = 1;
if (ei_decode_pid(header,&index,&msg->from)
- || ei_decode_atom(header,&index,msg->cookie)
- || ei_decode_atom(header,&index,msg->toname))
+ || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL))
{
erl_errno = EIO;
return -1;
@@ -157,7 +157,7 @@ ei_recv_internal (int fd,
case ERL_SEND_TT: /* { SEND_TT, Cookie, ToPid, TraceToken } */
if (ei_tracelevel >= 4) show_this_msg = 1;
- if (ei_decode_atom(header,&index,msg->cookie)
+ if (ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg->to)
|| ei_decode_trace(header,&index,&msg->token))
{
@@ -171,8 +171,8 @@ ei_recv_internal (int fd,
case ERL_REG_SEND_TT: /* { REG_SEND_TT, From, Cookie, ToName, TraceToken } */
if (ei_tracelevel >= 4) show_this_msg = 1;
if (ei_decode_pid(header,&index,&msg->from)
- || ei_decode_atom(header,&index,msg->cookie)
- || ei_decode_atom(header,&index,msg->toname)
+ || ei_decode_atom_as(header,&index,msg->cookie,sizeof(msg->cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg->toname,sizeof(msg->toname),ERLANG_UTF8,NULL,NULL)
|| ei_decode_trace(header,&index,&msg->token))
{
erl_errno = EIO;
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index c2e6a0426e..9779ad3f35 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -21,24 +21,155 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_atom(const char *buf, int *index, char *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- int len;
+ return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL);
+}
+
+int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
+ enum erlang_char_encoding want_enc,
+ enum erlang_char_encoding* was_encp,
+ enum erlang_char_encoding* res_encp)
+{
+ const char *s = buf + *index;
+ const char *s0 = s;
+ int len;
+ enum erlang_char_encoding got_enc;
+
+ switch (get8(s)) {
+ case ERL_ATOM_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_SMALL_ATOM_EXT:
+ len = get8(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_ATOM_UTF8_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ len = get8(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ default:
+ return -1;
+ }
+
+ if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) {
+ int i, found_non_ascii = 0;
+ if (len >= destlen)
+ return -1;
+ for (i=0; i<len; i++) {
+ if (s[i] & 0x80) found_non_ascii = 1;
+ if (p) p[i] = s[i];
+ }
+ if (p) p[len] = 0;
+ if (want_enc == ERLANG_ASCII && found_non_ascii) {
+ return -1;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? got_enc : ERLANG_ASCII;
+ }
+ }
+ else {
+ int plen = (got_enc == ERLANG_LATIN1) ?
+ latin1_to_utf8(p, s, len, destlen-1, res_encp) :
+ utf8_to_latin1(p, s, len, destlen-1, res_encp);
+ if (plen < 0) return -1;
+ if (p) p[plen] = 0;
+ }
+ if (was_encp) {
+ *was_encp = got_enc;
+ }
+
+ s += len;
+ *index += s-s0;
+ return 0;
+}
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
+int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
+ enum erlang_char_encoding* res_encp)
+{
+ const char* const dst_start = dst;
+ const char* const dst_end = dst + destlen;
+ int found_non_ascii = 0;
+
+ while (slen > 0) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ ++src;
+ --slen;
+ }
+ else if (slen > 1 &&
+ (src[0] & 0xFE) == 0xC2 &&
+ (src[1] & 0xC0) == 0x80) {
+ if (dst_start) {
+ *dst = (char) ((src[0] << 6) | (src[1] & 0x3F));
+ }
+ ++dst;
+ src += 2;
+ slen -= 2;
+ found_non_ascii = 1;
+ }
+ else return -1;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
+}
- if (len > MAXATOMLEN) return -1;
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
+ enum erlang_char_encoding* res_encp)
+{
+ const char* const src_end = src + slen;
+ const char* const dst_start = dst;
+ const char* const dst_end = dst + destlen;
+ int found_non_ascii = 0;
- if (p) {
- memmove(p,s,len);
- p[len] = (char)0;
- }
- s += len;
- *index += s-s0;
-
- return 0;
+ while (src < src_end) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ }
+ else {
+ if (dst_start) {
+ unsigned char ch = *src;
+ dst[0] = 0xC0 | (ch >> 6);
+ dst[1] = 0x80 | (ch & 0x3F);
+ }
+ dst += 2;
+ found_non_ascii = 1;
+ }
+ ++src;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
}
+
+
+
+int ei_internal_get_atom(const char** bufp, char* p,
+ enum erlang_char_encoding* was_encp)
+{
+ int ix = 0;
+ if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0)
+ return -1;
+ *bufp += ix;
+ return 0;
+}
+
+
diff --git a/lib/erl_interface/src/decode/decode_boolean.c b/lib/erl_interface/src/decode/decode_boolean.c
index 9fd09c63f1..f20690249b 100644
--- a/lib/erl_interface/src/decode/decode_boolean.c
+++ b/lib/erl_interface/src/decode/decode_boolean.c
@@ -24,34 +24,20 @@
/* c non-zero -> erlang "true" atom, otherwise "false" */
int ei_decode_boolean(const char *buf, int *index, int *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- int len;
+ char tbuf[6];
int t;
- if (get8(s) != ERL_ATOM_EXT) return -1;
+ if (ei_decode_atom_as(buf, index, tbuf, sizeof(tbuf), ERLANG_ASCII, NULL, NULL) < 0)
+ return -1;
- len = get16be(s);
-
- switch (len) {
- case 4:
- /* typecast makes ansi happy */
- if (strncmp((char*)s,"true",4)) return -1;
- t = 1;
- break;
-
- case 5:
- if (strncmp((char*)s,"false",5)) return -1;
- t = 0;
- break;
-
- default:
- return -1;
- }
-
- s += len;
+ if (memcmp(tbuf, "true", 5) == 0)
+ t = 1;
+ else if (memcmp(tbuf, "false", 6) == 0)
+ t = 0;
+ else
+ return -1;
+
if (p) *p = t;
- *index += s-s0;
-
return 0;
}
+
diff --git a/lib/erl_interface/src/decode/decode_fun.c b/lib/erl_interface/src/decode/decode_fun.c
index 64fb9e86d8..7bbef5db44 100644
--- a/lib/erl_interface/src/decode/decode_fun.c
+++ b/lib/erl_interface/src/decode/decode_fun.c
@@ -42,7 +42,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (ei_decode_pid(s, &ix, (p == NULL ? (erlang_pid*)NULL : &p->pid)) < 0)
return -1;
/* then the module (atom) */
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->index)) < 0)
@@ -84,7 +85,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (p != NULL) p->n_free_vars = i;
/* then the module (atom) */
ix = 0;
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the old_index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->old_index)) < 0)
diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c
index 9ed1c36db6..e79952195d 100644
--- a/lib/erl_interface/src/decode/decode_pid.c
+++ b/lib/erl_interface/src/decode/decode_pid.c
@@ -21,26 +21,16 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_pid(const char *buf, int *index, erlang_pid *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PID_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c
index 28abed801a..5fd96b51a4 100644
--- a/lib/erl_interface/src/decode/decode_port.c
+++ b/lib/erl_interface/src/decode/decode_port.c
@@ -25,22 +25,11 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PORT_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c
index 7b15808bc5..7294e5d239 100644
--- a/lib/erl_interface/src/decode/decode_ref.c
+++ b/lib/erl_interface/src/decode/decode_ref.c
@@ -21,27 +21,18 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int count, len, i;
+ int count, i;
switch (get8(s)) {
case ERL_REFERENCE_EXT:
- /* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ /* nodename */
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
@@ -62,15 +53,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
if (p) p->len = count;
/* then the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* creation */
if (p) {
@@ -95,3 +78,4 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
return -1;
}
}
+
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index 6f41f045e0..044f17cb60 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -22,29 +22,108 @@
#include "eiext.h"
#include "putget.h"
+
+static int copy_ascii_atom(char* dst, const char* src, int slen);
+static int copy_utf8_atom(char* dst, const char* src, int slen);
+
+
int ei_encode_atom(char *buf, int *index, const char *p)
{
size_t len = strlen(p);
- if (len >= INT_MAX) return -1;
- return ei_encode_atom_len(buf, index, p, len);
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN - 1;
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
}
int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
{
+ /* This function is documented to truncate at MAXATOMLEN (256) */
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN - 1;
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+}
+
+int ei_encode_atom_as(char *buf, int *index, const char *p,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
+ return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc);
+}
+
+int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
char *s = buf + *index;
char *s0 = s;
+ int offs;
- /* This function is documented to truncate at MAXATOMLEN (256) */
- if (len > MAXATOMLEN)
- len = MAXATOMLEN;
+ if (len >= MAXATOMLEN && (from_enc & (ERLANG_LATIN1|ERLANG_ASCII))) {
+ return -1;
+ }
- if (!buf) s += 3;
- else {
- put8(s,ERL_ATOM_EXT);
- put16be(s,len);
+ switch(to_enc) {
+ case ERLANG_LATIN1:
+ if (buf) {
+ put8(s,ERL_ATOM_EXT);
+ switch (from_enc) {
+ case ERLANG_UTF8:
+ len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
+ if (len < 0) return -1;
+ break;
+ case ERLANG_ASCII:
+ if (copy_ascii_atom(s+2, p, len) < 0) return -1;
+ break;
+ case ERLANG_LATIN1:
+ memcpy(s+2, p, len);
+ break;
+ default:
+ return -1;
+ }
+ put16be(s,len);
+ }
+ else {
+ s += 3;
+ if (from_enc == ERLANG_UTF8) {
+ len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
+ if (len < 0) return -1;
+ }
+ }
+ break;
+
+ case ERLANG_UTF8:
+ offs = 1 + 1;
+ switch (from_enc) {
+ case ERLANG_LATIN1:
+ if (len >= 256/2) offs++;
+ len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
+ break;
+ case ERLANG_ASCII:
+ if (buf && copy_ascii_atom(s+offs, p, len) < 0) return -1;
+ break;
+ case ERLANG_UTF8:
+ if (len >= 256) offs++;
+ if (buf && copy_utf8_atom(s+offs, p, len) < 0) return -1;
+ break;
+ default:
+ return -1;
+ }
+ if (buf) {
+ if (offs == 2) {
+ put8(s, ERL_SMALL_ATOM_UTF8_EXT);
+ put8(s, len);
+ }
+ else {
+ put8(s, ERL_ATOM_UTF8_EXT);
+ put16be(s, len);
+ }
+ }
+ else s+= offs;
+ break;
- memmove(s,p,len); /* unterminated string */
+ default:
+ return -1;
}
s += len;
@@ -53,3 +132,58 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
return 0;
}
+int
+ei_internal_put_atom(char** bufp, const char* p, int slen,
+ enum erlang_char_encoding to_enc)
+{
+ int ix = 0;
+ if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0)
+ return -1;
+ *bufp += ix;
+ return 0;
+}
+
+
+int copy_ascii_atom(char* dst, const char* src, int slen)
+{
+ while (slen > 0) {
+ if ((src[0] & 0x80) != 0) return -1;
+ *dst++ = *src++;
+ slen--;
+ }
+ return 0;
+}
+
+int copy_utf8_atom(char* dst, const char* src, int slen)
+{
+ int num_chars = 0;
+
+ while (slen > 0) {
+ if (++num_chars >= MAXATOMLEN) return -1;
+ if ((src[0] & 0x80) != 0) {
+ if ((src[0] & 0xE0) == 0xC0) {
+ if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ slen--;
+ }
+ else if ((src[0] & 0xF0) == 0xE0) {
+ if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ slen -= 2;
+ }
+ else if ((src[0] & 0xF8) == 0xF0) {
+ if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ slen -= 3;
+ }
+ else return -1;
+ }
+ *dst++ = *src++;
+ slen--;
+ }
+ return 0;
+}
+
diff --git a/lib/erl_interface/src/encode/encode_fun.c b/lib/erl_interface/src/encode/encode_fun.c
index 54ee2083d6..4daee32648 100644
--- a/lib/erl_interface/src/encode/encode_fun.c
+++ b/lib/erl_interface/src/encode/encode_fun.c
@@ -35,7 +35,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p)
ix += sizeof(char) + 4;
if (ei_encode_pid(buf, &ix, &p->pid) < 0)
return -1;
- if (ei_encode_atom(buf, &ix, p->module) < 0)
+ if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0)
return -1;
if (ei_encode_long(buf, &ix, p->index) < 0)
return -1;
@@ -60,7 +60,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p)
} else
size_p = NULL;
ix += 1 + 4 + 1 + sizeof(p->md5) + 4 + 4;
- if (ei_encode_atom(buf, &ix, p->module) < 0)
+ if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0)
return -1;
if (ei_encode_long(buf, &ix, p->old_index) < 0)
return -1;
diff --git a/lib/erl_interface/src/encode/encode_pid.c b/lib/erl_interface/src/encode/encode_pid.c
index ee7f235c17..0cf3ef4efb 100644
--- a/lib/erl_interface/src/encode/encode_pid.c
+++ b/lib/erl_interface/src/encode/encode_pid.c
@@ -24,29 +24,23 @@
int ei_encode_pid(char *buf, int *index, const erlang_pid *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
-
- if (!buf) s += 13 + len;
- else {
- put8(s,ERL_PID_EXT);
- /* first the nodename */
- put8(s,ERL_ATOM_EXT);
+ ++(*index); /* skip ERL_PID_EXT */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, p->node_org_enc) < 0)
+ return -1;
+
+ if (buf) {
+ put8(s,ERL_PID_EXT);
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put32be(s,p->num & 0x7fff); /* 15 bits */
put32be(s,p->serial & 0x1fff); /* 13 bits */
put8(s,(p->creation & 0x03)); /* 2 bits */
}
-
- *index += s-s0;
-
+
+ *index += 4 + 4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/encode/encode_port.c b/lib/erl_interface/src/encode/encode_port.c
index fbbb33182e..2bf9e26d78 100644
--- a/lib/erl_interface/src/encode/encode_port.c
+++ b/lib/erl_interface/src/encode/encode_port.c
@@ -24,28 +24,23 @@
int ei_encode_port(char *buf, int *index, const erlang_port *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
-
- if (!buf) s += 9 + len;
- else {
- put8(s,ERL_PORT_EXT);
- /* first the nodename */
- put8(s,ERL_ATOM_EXT);
+ ++(*index); /* skip ERL_PORT_EXT */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8,
+ p->node_org_enc) < 0) {
+ return -1;
+ }
+ if (buf) {
+ put8(s,ERL_PORT_EXT);
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put32be(s,p->id & 0x0fffffff /* 28 bits */);
put8(s,(p->creation & 0x03));
}
- *index += s-s0;
-
+ *index += 4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/encode/encode_ref.c b/lib/erl_interface/src/encode/encode_ref.c
index 292b452864..e8b3173315 100644
--- a/lib/erl_interface/src/encode/encode_ref.c
+++ b/lib/erl_interface/src/encode/encode_ref.c
@@ -24,36 +24,32 @@
int ei_encode_ref(char *buf, int *index, const erlang_ref *p)
{
char *s = buf + *index;
- char *s0 = s;
- int len = strlen(p->node);
int i;
+ (*index) += 1 + 2; /* skip to node atom */
+ if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8,
+ p->node_org_enc) < 0) {
+ return -1;
+ }
+
/* Always encode as an extended reference; all participating parties
are now expected to be able to decode extended references. */
- if (!buf) s += 1 + 2 + (3+len) + p->len*4 + 1;
- else {
+ if (buf) {
put8(s,ERL_NEW_REFERENCE_EXT);
/* first, number of integers */
put16be(s, p->len);
/* then the nodename */
- put8(s,ERL_ATOM_EXT);
-
- put16be(s,len);
-
- memmove(s, p->node, len);
- s += len;
+ s = buf + *index;
/* now the integers */
put8(s,(p->creation & 0x03));
for (i = 0; i < p->len; i++)
put32be(s,p->n[i]);
-
- }
-
- *index += s-s0;
+ }
+ *index += p->len*4 + 1;
return 0;
}
diff --git a/lib/erl_interface/src/legacy/erl_connect.c b/lib/erl_interface/src/legacy/erl_connect.c
index 41d4fa3138..f82704ea8b 100644
--- a/lib/erl_interface/src/legacy/erl_connect.c
+++ b/lib/erl_interface/src/legacy/erl_connect.c
@@ -125,7 +125,7 @@ static ei_cnode erl_if_ec;
int erl_connect_init(int this_node_number, char *cookie, short creation)
{
- char nn[MAXATOMLEN+1];
+ char nn[MAXATOMLEN];
sprintf(nn, "c%d", this_node_number);
@@ -247,9 +247,15 @@ int erl_send(int fd, ETERM *to ,ETERM *msg)
erl_errno = EINVAL;
return -1;
}
-
- strncpy(topid.node, (char *)ERL_PID_NODE(to), sizeof(topid.node));
- topid.node[sizeof(topid.node)-1] = '\0';
+
+ if (to->uval.pidval.node.latin1) {
+ strcpy(topid.node, to->uval.pidval.node.latin1);
+ topid.node_org_enc = ERLANG_LATIN1;
+ }
+ else {
+ strcpy(topid.node, to->uval.pidval.node.utf8);
+ topid.node_org_enc = ERLANG_UTF8;
+ }
topid.num = ERL_PID_NUMBER(to);
topid.serial = ERL_PID_SERIAL(to);
topid.creation = ERL_PID_CREATION(to);
@@ -263,7 +269,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg)
erlang_msg msg;
int r;
- msg.from.node[0] = msg.to.node[0] = '\0';
+ msg.from.node[0] = msg.to.node[0] = msg.toname[0] = '\0';
r = ei_do_receive_msg(fd, 0, &msg, x, 0);
if (r == ERL_MSG) {
@@ -299,7 +305,7 @@ static int erl_do_receive_msg(int fd, ei_x_buff* x, ErlMessage* emsg)
emsg->to = erl_mk_pid(msg.to.node, msg.to.num, msg.to.serial, msg.to.creation);
else
emsg->to = NULL;
- memcpy(emsg->to_name, msg.toname, MAXATOMLEN+1);
+ strcpy(emsg->to_name, msg.toname);
return r;
}
diff --git a/lib/erl_interface/src/legacy/erl_eterm.c b/lib/erl_interface/src/legacy/erl_eterm.c
index 8d559f0f55..aa0fd5ddcf 100644
--- a/lib/erl_interface/src/legacy/erl_eterm.c
+++ b/lib/erl_interface/src/legacy/erl_eterm.c
@@ -36,6 +36,7 @@
#include "erl_error.h"
#include "erl_internal.h"
#include "ei_internal.h"
+#include "putget.h"
#define ERL_IS_BYTE(x) (ERL_IS_INTEGER(x) && (ERL_INT_VALUE(x) & ~0xFF) == 0)
@@ -142,9 +143,7 @@ ETERM *erl_mk_atom (const char *s)
ep = erl_alloc_eterm(ERL_ATOM);
ERL_COUNT(ep) = 1;
- ERL_ATOM_SIZE(ep) = strlen(s);
- if ((ERL_ATOM_PTR(ep) = strsave(s)) == NULL)
- {
+ if (erl_atom_init_latin1(&ep->uval.aval.d, s) == NULL) {
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
@@ -152,6 +151,65 @@ ETERM *erl_mk_atom (const char *s)
return ep;
}
+char* erl_atom_ptr_latin1(Erl_Atom_data* a)
+{
+ if (a->latin1 == NULL) {
+ enum erlang_char_encoding enc;
+ a->lenL = utf8_to_latin1(NULL, a->utf8, a->lenU, a->lenU, &enc);
+ if (a->lenL < 0) {
+ a->lenL = 0;
+ return NULL;
+ }
+ if (enc == ERLANG_ASCII) {
+ a->latin1 = a->utf8;
+ }
+ else {
+ a->latin1 = malloc(a->lenL+1);
+ utf8_to_latin1(a->latin1, a->utf8, a->lenU, a->lenL, NULL);
+ a->latin1[a->lenL] = '\0';
+ }
+ }
+ return a->latin1;
+}
+
+char* erl_atom_ptr_utf8(Erl_Atom_data* a)
+{
+ if (a->utf8 == NULL) {
+ int dlen = a->lenL * 2; /* over estimation */
+ a->utf8 = malloc(dlen + 1);
+ a->lenU = latin1_to_utf8(a->utf8, a->latin1, a->lenL, dlen, NULL);
+ a->utf8[a->lenU] = '\0';
+ }
+ return a->utf8;
+
+}
+int erl_atom_size_latin1(Erl_Atom_data* a)
+{
+ if (a->latin1 == NULL) {
+ erl_atom_ptr_latin1(a);
+ }
+ return a->lenL;
+}
+int erl_atom_size_utf8(Erl_Atom_data* a)
+{
+ if (a->utf8 == NULL) {
+ erl_atom_ptr_utf8(a);
+ }
+ return a->lenU;
+}
+char* erl_atom_init_latin1(Erl_Atom_data* a, const char* s)
+{
+ a->lenL = strlen(s);
+ if ((a->latin1 = strsave(s)) == NULL)
+ {
+ return NULL;
+ }
+ a->utf8 = NULL;
+ a->lenU = 0;
+ return a->latin1;
+}
+
+
/*
* Given a string as input, creates a list.
*/
@@ -208,12 +266,19 @@ ETERM *erl_mk_pid(const char *node,
ep = erl_alloc_eterm(ERL_PID);
ERL_COUNT(ep) = 1;
- if ((ERL_PID_NODE(ep) = strsave(node)) == NULL)
+ if (erl_atom_init_latin1(&ep->uval.pidval.node, node) == NULL)
{
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
}
+ erl_mk_pid_helper(ep, number, serial, creation);
+ return ep;
+}
+
+void erl_mk_pid_helper(ETERM *ep, unsigned int number,
+ unsigned int serial, unsigned char creation)
+{
ERL_PID_NUMBER(ep) = number & 0x7fff; /* 15 bits */
if (ei_internal_use_r9_pids_ports()) {
ERL_PID_SERIAL(ep) = serial & 0x07; /* 3 bits */
@@ -222,7 +287,6 @@ ETERM *erl_mk_pid(const char *node,
ERL_PID_SERIAL(ep) = serial & 0x1fff; /* 13 bits */
}
ERL_PID_CREATION(ep) = creation & 0x03; /* 2 bits */
- return ep;
}
/*
@@ -239,12 +303,18 @@ ETERM *erl_mk_port(const char *node,
ep = erl_alloc_eterm(ERL_PORT);
ERL_COUNT(ep) = 1;
- if ((ERL_PORT_NODE(ep) = strsave(node)) == NULL)
+ if (erl_atom_init_latin1(&ep->uval.portval.node, node) == NULL)
{
erl_free_term(ep);
erl_errno = ENOMEM;
return NULL;
}
+ erl_mk_port_helper(ep, number, creation);
+ return ep;
+}
+
+void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation)
+{
if (ei_internal_use_r9_pids_ports()) {
ERL_PORT_NUMBER(ep) = number & 0x3ffff; /* 18 bits */
}
@@ -252,29 +322,29 @@ ETERM *erl_mk_port(const char *node,
ERL_PORT_NUMBER(ep) = number & 0x0fffffff; /* 18 bits */
}
ERL_PORT_CREATION(ep) = creation & 0x03; /* 2 bits */
- return ep;
}
/*
* Create any kind of reference.
*/
-ETERM *__erl_mk_reference (const char *node,
+ETERM *__erl_mk_reference (ETERM* t,
+ const char *node,
size_t len,
unsigned int n[],
unsigned char creation)
{
- ETERM * t;
-
- if (node == NULL) return NULL;
-
- t = erl_alloc_eterm(ERL_REF);
- ERL_COUNT(t) = 1;
-
- if ((ERL_REF_NODE(t) = strsave(node)) == NULL)
- {
- erl_free_term(t);
- erl_errno = ENOMEM;
- return NULL;
+ if (t == NULL) {
+ if (node == NULL) return NULL;
+
+ t = erl_alloc_eterm(ERL_REF);
+ ERL_COUNT(t) = 1;
+
+ if (erl_atom_init_latin1(&t->uval.refval.node, node) == NULL)
+ {
+ erl_free_term(t);
+ erl_errno = ENOMEM;
+ return NULL;
+ }
}
ERL_REF_LEN(t) = len;
ERL_REF_NUMBERS(t)[0] = n[0] & 0x3ffff; /* 18 bits */
@@ -294,7 +364,7 @@ ETERM *erl_mk_ref (const char *node,
{
unsigned int n[3] = {0, 0, 0};
n[0] = number;
- return __erl_mk_reference(node, 1, n, creation);
+ return __erl_mk_reference(NULL, node, 1, n, creation);
}
/*
@@ -307,7 +377,7 @@ erl_mk_long_ref (const char *node,
{
unsigned int n[3] = {0, 0, 0};
n[0] = n3; n[1] = n2; n[2] = n1;
- return __erl_mk_reference(node, 3, n, creation);
+ return __erl_mk_reference(NULL, node, 3, n, creation);
}
/*
@@ -758,6 +828,28 @@ int erl_iolist_length (const ETERM* term)
return -1;
}
+static int erl_atom_copy(Erl_Atom_data* dst, const Erl_Atom_data* src)
+{
+ if (src->latin1 == src->utf8) {
+ dst->latin1 = dst->utf8 = strsave(src->latin1);
+ dst->lenL = dst->lenU = strlen(src->latin1);
+ }
+ else if (src->latin1) {
+ dst->latin1 = strsave(src->latin1);
+ dst->lenL = strlen(src->latin1);
+ dst->utf8 = NULL;
+ dst->lenU = 0;
+ }
+ else {
+ dst->utf8 = strsave(src->utf8);
+ dst->lenU = strlen(src->utf8);
+ dst->latin1 = NULL;
+ dst->lenL = 0;
+ }
+ return (dst->latin1 != NULL || dst->utf8 == NULL);
+}
+
+
/*
* Return a brand NEW COPY of an ETERM.
*/
@@ -796,9 +888,7 @@ ETERM *erl_copy_term(const ETERM *ep)
ERL_FLOAT_VALUE(cp) = ERL_FLOAT_VALUE(ep);
break;
case ERL_ATOM:
- ERL_ATOM_SIZE(cp) = ERL_ATOM_SIZE(ep);
- ERL_ATOM_PTR(cp) = strsave(ERL_ATOM_PTR(ep));
- if (ERL_ATOM_PTR(cp) == NULL)
+ if (!erl_atom_copy(&cp->uval.aval.d, &ep->uval.aval.d))
{
erl_free_term(cp);
erl_errno = ENOMEM;
@@ -810,17 +900,17 @@ ETERM *erl_copy_term(const ETERM *ep)
name and plug in. Somewhat ugly (also done with port and
ref below). */
memcpy(&cp->uval.pidval, &ep->uval.pidval, sizeof(Erl_Pid));
- ERL_PID_NODE(cp) = strsave(ERL_PID_NODE(ep));
+ erl_atom_copy(&cp->uval.pidval.node, &ep->uval.pidval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_PORT:
memcpy(&cp->uval.portval, &ep->uval.portval, sizeof(Erl_Port));
- ERL_PORT_NODE(cp) = strsave(ERL_PORT_NODE(ep));
+ erl_atom_copy(&cp->uval.portval.node, &ep->uval.portval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_REF:
memcpy(&cp->uval.refval, &ep->uval.refval, sizeof(Erl_Ref));
- ERL_REF_NODE(cp) = strsave(ERL_REF_NODE(ep));
+ erl_atom_copy(&cp->uval.refval.node, &ep->uval.refval.node);
ERL_COUNT(cp) = 1;
break;
case ERL_LIST:
@@ -883,29 +973,29 @@ int erl_print_term(FILE *fp, const ETERM *ep)
j = i = doquote = 0;
switch(ERL_TYPE(ep))
{
- case ERL_ATOM:
+ case ERL_ATOM: {
+ char* adata = ERL_ATOM_PTR(ep);
/* FIXME: what if some weird locale is in use? */
- if (!islower((int)ERL_ATOM_PTR(ep)[0]))
+ if (!islower(adata[0]))
doquote = 1;
for (i = 0; !doquote && i < ERL_ATOM_SIZE(ep); i++)
{
- doquote = !(isalnum((int)ERL_ATOM_PTR(ep)[i])
- || (ERL_ATOM_PTR(ep)[i] == '_'));
+ doquote = !(isalnum(adata[i]) || (adata[i] == '_'));
}
if (doquote) {
putc('\'', fp);
ch_written++;
}
- fputs(ERL_ATOM_PTR(ep), fp);
+ fputs(adata, fp);
ch_written += ERL_ATOM_SIZE(ep);
if (doquote) {
putc('\'', fp);
ch_written++;
}
break;
-
+ }
case ERL_VARIABLE:
if (!isupper((int)ERL_VAR_NAME(ep)[0])) {
doquote = 1;
diff --git a/lib/erl_interface/src/legacy/erl_eterm.h b/lib/erl_interface/src/legacy/erl_eterm.h
index 41b008f04f..2e8129d9cd 100644
--- a/lib/erl_interface/src/legacy/erl_eterm.h
+++ b/lib/erl_interface/src/legacy/erl_eterm.h
@@ -55,7 +55,9 @@ typedef struct _heapmark {
} Erl_HeapMark;
-ETERM * __erl_mk_reference(const char *, size_t, unsigned int n[], unsigned char);
+void erl_mk_port_helper(ETERM* ep, unsigned number, unsigned char creation);
+void erl_mk_pid_helper(ETERM*, unsigned,unsigned, unsigned char);
+ETERM * __erl_mk_reference(ETERM*, const char *, size_t, unsigned int n[], unsigned char);
int erl_current_fix_desc(void);
#endif /* _ERL_ETERM_H */
diff --git a/lib/erl_interface/src/legacy/erl_format.c b/lib/erl_interface/src/legacy/erl_format.c
index dc85806c36..533241e396 100644
--- a/lib/erl_interface/src/legacy/erl_format.c
+++ b/lib/erl_interface/src/legacy/erl_format.c
@@ -574,10 +574,22 @@ static int ematch(ETERM *p, ETERM *t)
switch (type_p) {
- case ERL_ATOM:
- return p->uval.aval.len == t->uval.aval.len &&
- memcmp(p->uval.aval.a, t->uval.aval.a, p->uval.aval.len) == 0;
-
+ case ERL_ATOM: {
+ Erl_Atom_data* pa = &p->uval.aval.d;
+ Erl_Atom_data* ta = &t->uval.aval.d;
+ if (pa->utf8 && ta->utf8) {
+ return pa->lenU == ta->lenU && memcmp(pa->utf8, ta->utf8, pa->lenU)==0;
+ }
+ else if (pa->latin1 && ta->latin1) {
+ return pa->lenL == ta->lenL && memcmp(pa->latin1, ta->latin1, pa->lenL)==0;
+ }
+ else if (pa->latin1) {
+ return cmp_latin1_vs_utf8(pa->latin1, pa->lenL, ta->utf8, ta->lenU)==0;
+ }
+ else {
+ return cmp_latin1_vs_utf8(ta->latin1, ta->lenL, pa->utf8, pa->lenU)==0;
+ }
+ }
case ERL_VARIABLE:
if (strcmp(p->uval.vval.name, "_") == 0) /* anon. variable */
return ERL_TRUE;
diff --git a/lib/erl_interface/src/legacy/erl_malloc.c b/lib/erl_interface/src/legacy/erl_malloc.c
index f51a6c69b3..d09239e02d 100644
--- a/lib/erl_interface/src/legacy/erl_malloc.c
+++ b/lib/erl_interface/src/legacy/erl_malloc.c
@@ -112,6 +112,18 @@ do { \
(ptr) = NULL; \
} while (0)
+static void erl_atom_free(Erl_Atom_data* p)
+{
+ erl_free(p->latin1);
+ if (p->utf8 != p->latin1) {
+ erl_free(p->utf8);
+ }
+ p->latin1 = NULL;
+ p->utf8 = NULL;
+ p->lenL = 0;
+ p->lenU = 0;
+}
+
static void _erl_free_term (ETERM *ep, int external, int compound)
{
restart:
@@ -122,7 +134,7 @@ restart:
switch(ERL_TYPE(ep))
{
case ERL_ATOM:
- FREE_AND_CLEAR(ERL_ATOM_PTR(ep));
+ erl_atom_free(&ep->uval.aval.d);
break;
case ERL_VARIABLE:
FREE_AND_CLEAR(ERL_VAR_NAME(ep));
@@ -161,13 +173,13 @@ restart:
FREE_AND_CLEAR(ERL_BIN_PTR(ep));
break;
case ERL_PID:
- FREE_AND_CLEAR(ERL_PID_NODE(ep));
+ erl_atom_free(&ep->uval.pidval.node);
break;
case ERL_PORT:
- FREE_AND_CLEAR(ERL_PORT_NODE(ep));
+ erl_atom_free(&ep->uval.portval.node);
break;
case ERL_REF:
- FREE_AND_CLEAR(ERL_REF_NODE(ep));
+ erl_atom_free(&ep->uval.refval.node);
break;
case ERL_EMPTY_LIST:
case ERL_INTEGER:
diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c
index dad715c762..4c45cebb02 100644
--- a/lib/erl_interface/src/legacy/erl_marshal.c
+++ b/lib/erl_interface/src/legacy/erl_marshal.c
@@ -44,6 +44,9 @@ int erl_fp_compare(unsigned *a, unsigned *b);
static void erl_long_to_fp(long l, unsigned *d);
#endif
+static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2);
+static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1, unsigned char* s2, int l2, unsigned char tag2);
+
/* Used when comparing two encoded byte arrays */
/* this global data is ok (from threading point of view) since it is
* initialized once and never changed
@@ -51,7 +54,13 @@ static void erl_long_to_fp(long l, unsigned *d);
#define CMP_ARRAY_SIZE 256
/* FIXME problem for threaded ? */
-static char cmp_array[CMP_ARRAY_SIZE];
+
+static enum
+{
+ ERL_NUM_CMP=1, ERL_ATOM_CMP, ERL_REF_CMP, ERL_FUN_CMP, ERL_PORT_CMP,
+ ERL_PID_CMP, ERL_TUPLE_CMP, ERL_NIL_CMP, ERL_LIST_CMP, ERL_BIN_CMP
+}cmp_array[CMP_ARRAY_SIZE];
+
static int init_cmp_array_p=1; /* initialize array, the first time */
#if defined(VXWORKS) && CPU == PPC860
@@ -69,10 +78,8 @@ static int init_cmp_array_p=1; /* initialize array, the first time */
static int cmp_floats(double f1, double f2);
static INLINE double to_float(long l);
-#define ERL_NUM_CMP 1
-#define ERL_REF_CMP 3
-
#define IS_ERL_NUM(t) (cmp_array[t]==ERL_NUM_CMP)
+#define IS_ERL_ATOM(t) (cmp_array[t]==ERL_ATOM_CMP)
#define CMP_NUM_CLASS_SIZE 256
static unsigned char cmp_num_class[CMP_NUM_CLASS_SIZE];
@@ -100,25 +107,28 @@ void erl_init_marshal(void)
{
if (init_cmp_array_p) {
memset(cmp_array, 0, CMP_ARRAY_SIZE);
- cmp_array[ERL_SMALL_INTEGER_EXT] = 1;
- cmp_array[ERL_INTEGER_EXT] = 1;
- cmp_array[ERL_FLOAT_EXT] = 1;
- cmp_array[NEW_FLOAT_EXT] = 1;
- cmp_array[ERL_SMALL_BIG_EXT] = 1;
- cmp_array[ERL_LARGE_BIG_EXT] = 1;
- cmp_array[ERL_ATOM_EXT] = 2;
- cmp_array[ERL_REFERENCE_EXT] = 3;
- cmp_array[ERL_NEW_REFERENCE_EXT] = 3;
- cmp_array[ERL_FUN_EXT] = 4;
- cmp_array[ERL_NEW_FUN_EXT] = 4;
- cmp_array[ERL_PORT_EXT] = 5;
- cmp_array[ERL_PID_EXT] = 6;
- cmp_array[ERL_SMALL_TUPLE_EXT] = 7;
- cmp_array[ERL_LARGE_TUPLE_EXT] = 7;
- cmp_array[ERL_NIL_EXT] = 8;
- cmp_array[ERL_STRING_EXT] = 9;
- cmp_array[ERL_LIST_EXT] = 9;
- cmp_array[ERL_BINARY_EXT] = 10;
+ cmp_array[ERL_SMALL_INTEGER_EXT] = ERL_NUM_CMP;
+ cmp_array[ERL_INTEGER_EXT] = ERL_NUM_CMP;
+ cmp_array[ERL_FLOAT_EXT] = ERL_NUM_CMP;
+ cmp_array[NEW_FLOAT_EXT] = ERL_NUM_CMP;
+ cmp_array[ERL_SMALL_BIG_EXT] = ERL_NUM_CMP;
+ cmp_array[ERL_LARGE_BIG_EXT] = ERL_NUM_CMP;
+ cmp_array[ERL_ATOM_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_ATOM_UTF8_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_SMALL_ATOM_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_SMALL_ATOM_UTF8_EXT] = ERL_ATOM_CMP;
+ cmp_array[ERL_REFERENCE_EXT] = ERL_REF_CMP;
+ cmp_array[ERL_NEW_REFERENCE_EXT] = ERL_REF_CMP;
+ cmp_array[ERL_FUN_EXT] = ERL_FUN_CMP;
+ cmp_array[ERL_NEW_FUN_EXT] = ERL_FUN_CMP;
+ cmp_array[ERL_PORT_EXT] = ERL_PORT_CMP;
+ cmp_array[ERL_PID_EXT] = ERL_PID_CMP;
+ cmp_array[ERL_SMALL_TUPLE_EXT] = ERL_TUPLE_CMP;
+ cmp_array[ERL_LARGE_TUPLE_EXT] = ERL_TUPLE_CMP;
+ cmp_array[ERL_NIL_EXT] = ERL_NIL_CMP;
+ cmp_array[ERL_STRING_EXT] = ERL_LIST_CMP;
+ cmp_array[ERL_LIST_EXT] = ERL_LIST_CMP;
+ cmp_array[ERL_BINARY_EXT] = ERL_BIN_CMP;
init_cmp_array_p = 0;
}
if (init_cmp_num_class_p) {
@@ -156,6 +166,21 @@ static int erl_length_x(const ETERM *ep) {
*==============================================================
*/
+static void encode_atom(Erl_Atom_data* a, unsigned char **ext)
+{
+ int ix = 0;
+ if (a->latin1) {
+ ei_encode_atom_len_as((char*)*ext, &ix, a->latin1, a->lenL,
+ ERLANG_LATIN1, ERLANG_LATIN1);
+ }
+ else if (ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
+ ERLANG_UTF8, ERLANG_LATIN1) < 0) {
+ ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
+ ERLANG_UTF8, ERLANG_UTF8);
+ }
+ *ext += ix;
+}
+
/*
* The actual ENCODE engine.
* Returns 0 on success, otherwise 1.
@@ -170,12 +195,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
switch(ERL_TYPE(ep))
{
case ERL_ATOM:
- i = ep->uval.aval.len;
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy((void *) *ext, (const void *) ep->uval.aval.a, i);
- *ext += i;
+ encode_atom(&ep->uval.aval.d, ext);
return 0;
case ERL_INTEGER:
@@ -286,12 +306,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
case ERL_PID:
*(*ext)++ = ERL_PID_EXT;
/* First poke in node as an atom */
- i = strlen((char *)ERL_PID_NODE(ep));
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_PID_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.pidval.node, ext);
/* And then fill in the integer fields */
i = ERL_PID_NUMBER(ep);
*(*ext)++ = (i >> 24) &0xff;
@@ -319,11 +334,8 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
*(*ext)++ = (len >> 8) &0xff;
*(*ext)++ = len &0xff;
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >> 8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_REF_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.refval.node, ext);
+
*(*ext)++ = ERL_REF_CREATION(ep);
/* Then the integer fields */
for (j = 0; j < ERL_REF_LEN(ep); j++) {
@@ -338,12 +350,7 @@ int erl_encode_it(ETERM *ep, unsigned char **ext, int dist)
case ERL_PORT:
*(*ext)++ = ERL_PORT_EXT;
/* First poke in node as an atom */
- i = strlen((char *)ERL_PORT_NODE(ep));
- *(*ext)++ = ERL_ATOM_EXT;
- *(*ext)++ = (i >>8) &0xff;
- *(*ext)++ = i &0xff;
- memcpy(*ext, ERL_PORT_NODE(ep), i);
- *ext += i;
+ encode_atom(&ep->uval.portval.node, ext);
/* Then the integer fields */
i = ERL_PORT_NUMBER(ep);
*(*ext)++ = (i >> 24) &0xff;
@@ -494,6 +501,16 @@ int erl_term_len(ETERM *ep)
return 1+erl_term_len_helper(ep, 4);
}
+static int atom_len_helper(Erl_Atom_data* a)
+{
+ if (erl_atom_ptr_latin1(a)) {
+ return 1 + 2 + a->lenL; /* ERL_ATOM_EXT */
+ }
+ else {
+ return 1 + 1 + (a->lenU > 255) + a->lenU;
+ }
+}
+
static int erl_term_len_helper(ETERM *ep, int dist)
{
int len = 0;
@@ -505,8 +522,7 @@ static int erl_term_len_helper(ETERM *ep, int dist)
if (ep) {
switch (ERL_TYPE(ep)) {
case ERL_ATOM:
- i = ep->uval.aval.len;
- len = i + 3;
+ len = atom_len_helper(&ep->uval.aval.d);
break;
case ERL_INTEGER:
@@ -538,20 +554,15 @@ static int erl_term_len_helper(ETERM *ep, int dist)
break;
case ERL_PID:
- /* 1 + N + 4 + 4 + 1 where N = 3 + strlen */
- i = strlen((char *)ERL_PID_NODE(ep));
- len = 13 + i;
+ len = 1 + atom_len_helper(&ep->uval.pidval.node) + 4 + 4 + 1;
break;
case ERL_REF:
- i = strlen((char *)ERL_REF_NODE(ep));
- len = 1 + 2 + (i+3) + 1 + ERL_REF_LEN(ep) * 4;
+ len = 1 + 2 + atom_len_helper(&ep->uval.refval.node) + 1 + ERL_REF_LEN(ep) * 4;
break;
case ERL_PORT:
- /* 1 + N + 4 + 1 where N = 3 + strlen */
- i = strlen((char *)ERL_PORT_NODE(ep));
- len = 9 + i;
+ len = 1 + atom_len_helper(&ep->uval.portval.node) + 4 + 1;
break;
case ERL_EMPTY_LIST:
@@ -644,31 +655,36 @@ int erl_encode_buf(ETERM *ep, unsigned char **ext)
} /* erl_encode_buf */
-/*
- * A nice macro to make it look cleaner in the
- * cases of PID's,PORT's and REF's below.
- * It reads the NODE name from a buffer.
- */
-#define READ_THE_NODE(ext,cp,len,i) \
-/* eat first atom, repr. the node */ \
-if (**ext != ERL_ATOM_EXT) \
- return (ETERM *) NULL; \
-*ext += 1; \
-i = (**ext << 8) | (*ext)[1]; \
-cp = (char *) *(ext) + 2; \
-*ext += (i + 2); \
-len = i
-
-#define STATIC_NODE_BUF_SZ 30
-
-#define SET_NODE(node,node_buf,cp,len) \
-if (len >= STATIC_NODE_BUF_SZ) node = erl_malloc(len+1); \
-else node = node_buf; \
-memcpy(node, cp, len); \
-node[len] = '\0'
-
-#define RESET_NODE(node,len) \
-if (len >= STATIC_NODE_BUF_SZ) free(node)
+
+static int read_atom(unsigned char** ext, Erl_Atom_data* a)
+{
+ char buf[MAXATOMLEN_UTF8];
+ int offs = 0;
+ enum erlang_char_encoding enc;
+ int ret = ei_decode_atom_as((char*)*ext, &offs, buf, MAXATOMLEN_UTF8,
+ ERLANG_LATIN1|ERLANG_UTF8, NULL, &enc);
+ *ext += offs;
+
+ if (ret == 0) {
+ int i = strlen(buf);
+ char* clone = erl_malloc(i+1);
+ memcpy(clone, buf, i+1);
+
+ a->latin1 = NULL;
+ a->lenL = 0;
+ a->utf8 = NULL;
+ a->lenU = 0;
+ if (enc & (ERLANG_LATIN1 | ERLANG_ASCII)) {
+ a->latin1 = clone;
+ a->lenL = i;
+ }
+ if (enc & (ERLANG_UTF8 | ERLANG_ASCII)) {
+ a->utf8 = clone;
+ a->lenU = i;
+ }
+ }
+ return ret;
+}
/*
* The actual DECODE engine.
@@ -679,13 +695,13 @@ static ETERM *erl_decode_it(unsigned char **ext)
char *cp;
ETERM *ep,*tp,*np;
unsigned int u,sign;
- int i,j,len,arity;
+ int i,j,arity;
double ff;
/* Assume we are going to decode an integer */
ep = erl_alloc_eterm(ERL_INTEGER);
ERL_COUNT(ep) = 1;
-
+
switch (*(*ext)++)
{
case ERL_INTEGER_EXT:
@@ -774,138 +790,90 @@ static ETERM *erl_decode_it(unsigned char **ext)
return ep;
case ERL_ATOM_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+
ERL_TYPE(ep) = ERL_ATOM;
- i = (**ext << 8) | (*ext)[1];
- cp = (char *) *(ext) + 2;
- *ext += (i + 2);
- ep->uval.aval.len = i;
- ep->uval.aval.a = (char *) erl_malloc(i+1);
- memcpy(ep->uval.aval.a, cp, i);
- ep->uval.aval.a[i]='\0';
+ --(*ext);
+ if (read_atom(ext, &ep->uval.aval.d) < 0) return NULL;
return ep;
case ERL_PID_EXT:
- erl_free_term(ep);
- { /* Why not use the constructors? */
- char *node;
- char node_buf[STATIC_NODE_BUF_SZ];
+ {
unsigned int number, serial;
unsigned char creation;
- ETERM *eterm_p;
- READ_THE_NODE(ext,cp,len,i);
- SET_NODE(node,node_buf,cp,len);
+ ERL_TYPE(ep) = ERL_PID;
+ if (read_atom(ext, &ep->uval.pidval.node) < 0) return NULL;
/* get the integers */
-#if 0
- /* FIXME: Remove code or whatever....
- Ints on the wire are big-endian (== network byte order)
- so use ntoh[sl]. (But some are little-endian! Arrrgh!)
- Also, the libc authors can be expected to optimize them
- heavily. However, the marshalling makes no guarantees
- about alignments -- so it won't work at all. */
- number = ntohl(*((unsigned int *)*ext)++);
- serial = ntohl(*((unsigned int *)*ext)++);
-#else
number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
serial = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_pid(node, number, serial, creation);
- RESET_NODE(node,len);
- return eterm_p;
+ erl_mk_pid_helper(ep, number, serial, creation);
+ return ep;
}
case ERL_REFERENCE_EXT:
- erl_free_term(ep);
{
- char *node;
- char node_buf[STATIC_NODE_BUF_SZ];
- unsigned int number;
+ unsigned int n[3] = {0, 0, 0};
unsigned char creation;
- ETERM *eterm_p;
- READ_THE_NODE(ext,cp,len,i);
- SET_NODE(node,node_buf,cp,len);
+ ERL_TYPE(ep) = ERL_REF;
+ if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL;
/* get the integers */
-#if 0
- number = ntohl(*((unsigned int *)*ext)++);
-#else
- number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
+ n[0] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_ref(node, number, creation);
- RESET_NODE(node,len);
- return eterm_p;
+ __erl_mk_reference(ep, NULL, 1, n, creation);
+ return ep;
}
case ERL_NEW_REFERENCE_EXT:
- erl_free_term(ep);
{
- char *node;
- char node_buf[STATIC_NODE_BUF_SZ];
size_t cnt, i;
unsigned int n[3];
unsigned char creation;
- ETERM *eterm_p;
-#if 0
- cnt = ntohs(*((unsigned short *)*ext)++);
-#else
+ ERL_TYPE(ep) = ERL_REF;
cnt = ((*ext)[0] << 8) | (*ext)[1];
*ext += 2;
-#endif
- READ_THE_NODE(ext,cp,len,i);
- SET_NODE(node,node_buf,cp,len);
+ if (read_atom(ext, &ep->uval.refval.node) < 0) return NULL;
/* get the integers */
creation = *(*ext)++;
for(i = 0; i < cnt; i++)
{
-#if 0
- n[i] = ntohl(*((unsigned int *)*ext)++);
-#else
n[i] = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
}
- eterm_p = __erl_mk_reference(node, cnt, n, creation);
- RESET_NODE(node,len);
- return eterm_p;
+ __erl_mk_reference(ep, NULL, cnt, n, creation);
+ return ep;
}
case ERL_PORT_EXT:
- erl_free_term(ep);
{
- char *node;
- char node_buf[STATIC_NODE_BUF_SZ];
unsigned int number;
unsigned char creation;
- ETERM *eterm_p;
- READ_THE_NODE(ext,cp,len,i);
- SET_NODE(node,node_buf,cp,len);
+ ERL_TYPE(ep) = ERL_PORT;
+ if (read_atom(ext, &ep->uval.portval.node) < 0) return NULL;
/* get the integers */
-#if 0
- number = ntohl(*((unsigned int *)*ext)++);
-#else
number = ((*ext)[0] << 24) | ((*ext)[1]) << 16 |
((*ext)[2]) << 8 | ((*ext)[3]);
*ext += 4;
-#endif
creation = *(*ext)++;
- eterm_p = erl_mk_port(node, number, creation);
- RESET_NODE(node,len);
- return eterm_p;
+ erl_mk_port_helper(ep, number, creation);
+ return ep;
}
case ERL_NIL_EXT:
@@ -1140,6 +1108,9 @@ unsigned char erl_ext_type(unsigned char *ext)
case ERL_INTEGER_EXT:
return ERL_INTEGER;
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
return ERL_ATOM;
case ERL_PID_EXT:
return ERL_PID;
@@ -1191,6 +1162,9 @@ int erl_ext_size(unsigned char *t)
case ERL_SMALL_INTEGER_EXT:
case ERL_INTEGER_EXT:
case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
case ERL_PID_EXT:
case ERL_PORT_EXT:
case ERL_REFERENCE_EXT:
@@ -1229,15 +1203,32 @@ int erl_ext_size(unsigned char *t)
} /* ext_size */
-/*
- * A nice macro that eats up the atom pointed to.
- */
-#define JUMP_ATOM(ext,i) \
-if (**ext != ERL_ATOM_EXT) \
- return 0; \
-*ext += 1; \
-i = (**ext << 8) | (*ext)[1]; \
-*ext += (i + 2)
+
+static int jump_atom(unsigned char** ext)
+{
+ unsigned char* e = *ext;
+ int len;
+
+ switch (*e++) {
+ case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ len = (e[0] << 8) | e[1];
+ e += (len + 2);
+ break;
+
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ len = e[0];
+ e += (len + 1);
+ break;
+
+ default:
+ return 0;
+ }
+ *ext = e;
+ return 1;
+}
+
/*
* MOVE the POINTER PAST the ENCODED ETERM we
@@ -1259,25 +1250,27 @@ static int jump(unsigned char **ext)
*ext += 1;
break;
case ERL_ATOM_EXT:
- i = (**ext << 8) | (*ext)[1];
- *ext += (i + 2);
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ jump_atom(ext);
break;
case ERL_PID_EXT:
/* eat first atom */
- JUMP_ATOM(ext,i);
+ if (!jump_atom(ext)) return 0;
*ext += 9; /* Two int's and the creation field */
break;
case ERL_REFERENCE_EXT:
case ERL_PORT_EXT:
/* first field is an atom */
- JUMP_ATOM(ext,i);
+ if (!jump_atom(ext)) return 0;
*ext += 5; /* One int and the creation field */
break;
case ERL_NEW_REFERENCE_EXT:
n = (**ext << 8) | (*ext)[1];
*ext += 2;
/* first field is an atom */
- JUMP_ATOM(ext,i);
+ if (!jump_atom(ext)) return 0;
*ext += 4*n+1;
break;
case ERL_NIL_EXT:
@@ -1425,6 +1418,58 @@ static int cmpbytes(unsigned char* s1,int l1,unsigned char* s2,int l2)
} /* cmpbytes */
+#define tag2enc(T) ((T)==ERL_ATOM_EXT || (T)==ERL_SMALL_ATOM_EXT ? ERLANG_LATIN1 : ERLANG_UTF8)
+
+static int cmpatoms(unsigned char* s1, int l1, unsigned char tag1,
+ unsigned char* s2, int l2, unsigned char tag2)
+{
+ enum erlang_char_encoding enc1 = tag2enc(tag1);
+ enum erlang_char_encoding enc2 = tag2enc(tag2);
+
+ if (enc1 == enc2) {
+ return cmpbytes(s1, l1,s2,l2);
+ }
+
+ if (enc1 == ERLANG_LATIN1) {
+ return cmp_latin1_vs_utf8((char*)s1, l1, (char*)s2, l2);
+ }
+ else {
+ return -cmp_latin1_vs_utf8((char*)s2, l2, (char*)s1, l1);
+ }
+}
+
+int cmp_latin1_vs_utf8(const char* strL, int lenL, const char* strU, int lenU)
+{
+ unsigned char* sL = (unsigned char*)strL;
+ unsigned char* sU = (unsigned char*)strU;
+ unsigned char* sL_end = sL + lenL;
+ unsigned char* sU_end = sU + lenU;
+
+ while(sL < sL_end && sU < sU_end) {
+ unsigned char UasL;
+ if (*sL >= 0x80) {
+ if (*sU < 0xC4 && (sU+1) < sU_end) {
+ UasL = ((sU[0] & 0x3) << 6) | (sU[1] & 0x3F);
+ }
+ else return -1;
+ }
+ else {
+ UasL = *sU;
+ }
+ if (*sL < UasL) return -1;
+ if (*sL > UasL) return 1;
+
+ sL++;
+ if (*sU < 0x80) sU++;
+ else if (*sU < 0xE0) sU += 2;
+ else if (*sU < 0xF0) sU += 3;
+ else /*if (*sU < 0xF8)*/ sU += 4;
+ }
+
+ return (sU >= sU_end) - (sL >= sL_end); /* -1, 0 or 1 */
+}
+
+
#define CMP_EXT_ERROR_CODE 4711
#define CMP_EXT_INT32_BE(AP, BP) \
@@ -1437,9 +1482,8 @@ do { \
#define CMP_EXT_SKIP_ATOM(EP) \
do { \
- if ((EP)[0] != ERL_ATOM_EXT) \
+ if (!jump_atom(&(EP))) \
return CMP_EXT_ERROR_CODE; \
- (EP) += 3 + ((EP)[1] << 8 | (EP)[2]); \
} while (0)
/*
@@ -1561,6 +1605,7 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
int min, ret,i,j,k;
double ff1, ff2;
unsigned char *tmp1, *tmp2;
+ unsigned char tag1, tag2;
if ( ((*e1)[0] == ERL_STRING_EXT) && ((*e2)[0] == ERL_LIST_EXT) ) {
return cmp_string_list(e1, e2);
@@ -1568,8 +1613,10 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
return -cmp_string_list(e2, e1);
}
- *e2 += 1;
- switch (*(*e1)++)
+ tag1 = *(*e1)++;
+ tag2 = *(*e2)++;
+ i = j = 0;
+ switch (tag1)
{
case ERL_SMALL_INTEGER_EXT:
if (**e1 < **e2) ret = -1;
@@ -1589,11 +1636,17 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
*e1 += 4; *e2 += 4;
return ret;
case ERL_ATOM_EXT:
- i = (**e1 << 8) | (*e1)[1];
- j = (**e2 << 8) | (*e2)[1];
- ret = cmpbytes(*e1 +2, i, *e2 +2, j);
- *e1 += (i + 2);
- *e2 += (j + 2);
+ case ERL_ATOM_UTF8_EXT:
+ i = (**e1) << 8; (*e1)++;
+ j = (**e2) << 8; (*e2)++;
+ /*fall through*/
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ i |= (**e1); (*e1)++;
+ j |= (**e2); (*e2)++;
+ ret = cmpatoms(*e1, i, tag1, *e2, j, tag2);
+ *e1 += i;
+ *e2 += j;
return ret;
case ERL_PID_EXT: {
unsigned char *n1 = *e1;
@@ -1622,7 +1675,7 @@ static int cmp_exe2(unsigned char **e1, unsigned char **e2)
}
case ERL_PORT_EXT:
/* First compare node names ... */
- if (**e1 != ERL_ATOM_EXT || **e2 != ERL_ATOM_EXT)
+ if (!IS_ERL_ATOM(**e1) || !IS_ERL_ATOM(**e2))
return CMP_EXT_ERROR_CODE;
ret = cmp_exe2(e1, e2);
*e1 += 5; *e2 += 5;
diff --git a/lib/erl_interface/src/legacy/global_whereis.c b/lib/erl_interface/src/legacy/global_whereis.c
index 2afb193504..e6c556d907 100644
--- a/lib/erl_interface/src/legacy/global_whereis.c
+++ b/lib/erl_interface/src/legacy/global_whereis.c
@@ -85,7 +85,16 @@ ETERM *erl_global_whereis(int fd, const char *name, char *node)
opid = erl_decode((unsigned char*)buf);
/* extract the nodename for the caller */
- if (node) strcpy(node,epid.node);
+ if (node) {
+ char* node_str = ERL_PID_NODE(opid);
+ if (node_str) {
+ strcpy(node, node_str);
+ }
+ else {
+ erl_free_term(opid);
+ return NULL;
+ }
+ }
return opid;
}
diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c
index 0b82ef0e35..65afee89cc 100644
--- a/lib/erl_interface/src/misc/ei_decode_term.c
+++ b/lib/erl_interface/src/misc/ei_decode_term.c
@@ -32,7 +32,7 @@
int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
{
const char* s = buf + *index, * s0 = s;
- int len, i, n, sign;
+ int i, n, sign;
char c;
if (term == NULL) return -1;
@@ -48,20 +48,13 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
case NEW_FLOAT_EXT:
return ei_decode_double(buf, index, &term->value.d_val);
case ERL_ATOM_EXT:
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.atom_name, s, len);
- term->value.atom_name[len] = '\0';
- s += len;
- break;
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ return ei_decode_atom(buf, index, term->value.atom_name);
case ERL_REFERENCE_EXT:
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.ref.node, s, len);
- term->value.ref.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
term->value.ref.n[0] = get32be(s);
term->value.ref.len = 1;
@@ -71,12 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
/* first the integer count */
term->value.ref.len = get16be(s);
/* then the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.ref.node, s, len);
- term->value.ref.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* creation */
term->value.ref.creation = get8(s) & 0x03;
/* finally the id integers */
@@ -88,22 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
}
break;
case ERL_PORT_EXT:
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.port.node, s, len);
- term->value.port.node[len] = '\0';
+ if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1;
term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */;
term->value.port.creation = get8(s) & 0x03;
break;
case ERL_PID_EXT:
- if (get8(s) != ERL_ATOM_EXT) return -1;
- /* name first */
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.pid.node, s, len);
- term->value.pid.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */
term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */
diff --git a/lib/erl_interface/src/misc/ei_format.c b/lib/erl_interface/src/misc/ei_format.c
index 281a192535..b5f11e618e 100644
--- a/lib/erl_interface/src/misc/ei_format.c
+++ b/lib/erl_interface/src/misc/ei_format.c
@@ -139,8 +139,8 @@ static int patom(const char** fmt, ei_x_buff* x)
--(*fmt);
len = *fmt - start;
/* FIXME why truncate atom name and not fail?! */
- if (len > MAXATOMLEN)
- len = MAXATOMLEN;
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN-1;
return ei_x_encode_atom_len(x, start, len);
}
diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c
index 5fc6b3542c..f3003a6172 100644
--- a/lib/erl_interface/src/misc/ei_printterm.c
+++ b/lib/erl_interface/src/misc/ei_printterm.c
@@ -115,7 +115,7 @@ static int print_term(FILE* fp, ei_x_buff* x,
const char* buf, int* index)
{
int i, doquote, n, m, ty, r;
- char a[MAXATOMLEN+1], *p;
+ char a[MAXATOMLEN], *p;
int ch_written = 0; /* counter of written chars */
erlang_pid pid;
erlang_port port;
@@ -132,7 +132,10 @@ static int print_term(FILE* fp, ei_x_buff* x,
doquote = 0;
ei_get_type_internal(buf, index, &ty, &n);
switch (ty) {
- case ERL_ATOM_EXT:
+ case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
if (ei_decode_atom(buf, index, a) < 0)
goto err;
doquote = !islower((int)a[0]);
diff --git a/lib/erl_interface/src/misc/ei_x_encode.c b/lib/erl_interface/src/misc/ei_x_encode.c
index fa1e26ccbb..44dcff7664 100644
--- a/lib/erl_interface/src/misc/ei_x_encode.c
+++ b/lib/erl_interface/src/misc/ei_x_encode.c
@@ -197,18 +197,33 @@ int ei_x_encode_tuple_header(ei_x_buff* x, long n)
int ei_x_encode_atom(ei_x_buff* x, const char* s)
{
- return ei_x_encode_atom_len(x, s, strlen(s));
+ return ei_x_encode_atom_len_as(x, s, strlen(s), ERLANG_LATIN1, ERLANG_LATIN1);
}
int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len)
{
+ return ei_x_encode_atom_len_as(x, s, len, ERLANG_LATIN1, ERLANG_LATIN1);
+}
+
+int ei_x_encode_atom_as(ei_x_buff* x, const char* s,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
+ return ei_x_encode_atom_len_as(x, s, strlen(s), from_enc, to_enc);
+}
+
+int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
int i = x->index;
- ei_encode_atom_len(NULL, &i, s, len);
+ ei_encode_atom_len_as(NULL, &i, s, len, from_enc, to_enc);
if (!x_fix_buff(x, i))
return -1;
- return ei_encode_atom_len(x->buff, &x->index, s, len);
+ return ei_encode_atom_len_as(x->buff, &x->index, s, len, from_enc, to_enc);
}
+
int ei_x_encode_pid(ei_x_buff* x, const erlang_pid* pid)
{
int i = x->index;
diff --git a/lib/erl_interface/src/misc/get_type.c b/lib/erl_interface/src/misc/get_type.c
index 2a680d0f94..54465196b0 100644
--- a/lib/erl_interface/src/misc/get_type.c
+++ b/lib/erl_interface/src/misc/get_type.c
@@ -33,78 +33,6 @@ int ei_get_type(const char *buf, const int *index, int *type, int *len)
return ei_get_type_internal(buf, index, type, len);
}
-#if 0
-int ei_get_type(const char *buf, const int *index, int *type, int *len)
-{
- const char *s = buf + *index;
- int itype = get8(s); /* Internal type */
-
- *len = 0;
-
- switch (*type) {
-
- case ERL_SMALL_INTEGER_EXT:
- case ERL_INTEGER_EXT:
- case ERL_SMALL_BIG_EXT:
- case ERL_LARGE_BIG_EXT:
- *type = EI_TYPE_INTEGER;
- break;
-
- case ERL_FLOAT_EXT:
- *type = EI_TYPE_FLOAT;
- break;
-
- case ERL_SMALL_TUPLE_EXT:
- *len = get8(s);
- break;
-
- case ERL_ATOM_EXT:
- case ERL_STRING_EXT:
- *len = get16be(s);
- break;
-
- case ERL_LARGE_TUPLE_EXT:
- case ERL_LIST_EXT:
- case ERL_BINARY_EXT:
- *len = get32be(s);
- break;
-
- case ERL_SMALL_BIG_EXT:
- *len = (get8(s)+1)/2; /* big arity */
- break;
-
- case ERL_LARGE_BIG_EXT:
- *len = (get32be(s)+1)/2; /* big arity */
- break;
-
- case ERL_BINARY_EXT:
- *type = EI_TYPE_BINARY;
- break;
-
- case ERL_PID_EXT:
- *type = EI_TYPE_PID;
- break;
-
- case ERL_PORT_EXT:
- *type = EI_TYPE_PORT;
- break;
-
- case ERL_REFERENCE_EXT:
- case ERL_NEW_REFERENCE_EXT:
- *type = EI_TYPE_REF;
- break;
-
- default:
- break;
- }
-
- /* leave index unchanged */
- return 0;
-}
-#endif
-
-
-/* Old definition of function above */
int ei_get_type_internal(const char *buf, const int *index,
int *type, int *len)
@@ -114,10 +42,15 @@ int ei_get_type_internal(const char *buf, const int *index,
*type = get8(s);
switch (*type) {
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ *type = ERL_ATOM_EXT;
case ERL_SMALL_TUPLE_EXT:
*len = get8(s);
break;
-
+
+ case ERL_ATOM_UTF8_EXT:
+ *type = ERL_ATOM_EXT;
case ERL_ATOM_EXT:
case ERL_STRING_EXT:
*len = get16be(s);
diff --git a/lib/erl_interface/src/misc/putget.h b/lib/erl_interface/src/misc/putget.h
index 7a43de324b..77ae168f8c 100644
--- a/lib/erl_interface/src/misc/putget.h
+++ b/lib/erl_interface/src/misc/putget.h
@@ -105,6 +105,13 @@
((EI_ULONGLONG)((unsigned char *)(s))[-2] << 8) | \
(EI_ULONGLONG)((unsigned char *)(s))[-1]))
+int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int ei_internal_get_atom(const char** bufp, char* p, enum erlang_char_encoding*);
+int ei_internal_put_atom(char** bufp, const char* p, int slen, enum erlang_char_encoding);
+#define get_atom ei_internal_get_atom
+#define put_atom ei_internal_put_atom
+
typedef union float_ext {
double d;
EI_ULONGLONG val;
diff --git a/lib/erl_interface/src/misc/show_msg.c b/lib/erl_interface/src/misc/show_msg.c
index 194296798b..33b09643ca 100644
--- a/lib/erl_interface/src/misc/show_msg.c
+++ b/lib/erl_interface/src/misc/show_msg.c
@@ -132,13 +132,13 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
switch (msg.msgtype) {
case ERL_SEND:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)) return -1;
mbuf = msgbuf;
break;
case ERL_SEND_TT:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
@@ -146,15 +146,15 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
case ERL_REG_SEND:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)) return -1;
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)) return -1;
mbuf = msgbuf;
break;
case ERL_REG_SEND_TT:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
break;
@@ -457,7 +457,7 @@ static void show_term(const char *termbuf, int *index, FILE *stream)
break;
case ERL_FUN_EXT: {
- char atom[MAXATOMLEN+1];
+ char atom[MAXATOMLEN];
long idx;
long uniq;
const char* s = termbuf + *index, * s0 = s;
diff --git a/lib/erl_interface/src/prog/ei_fake_prog.c b/lib/erl_interface/src/prog/ei_fake_prog.c
index 68eb537211..34101a2851 100644
--- a/lib/erl_interface/src/prog/ei_fake_prog.c
+++ b/lib/erl_interface/src/prog/ei_fake_prog.c
@@ -96,6 +96,7 @@ int main(void)
EI_ULONGLONG *ulonglongp = (EI_ULONGLONG*)NULL;
EI_ULONGLONG ulonglongx = 0;
#endif
+ enum erlang_char_encoding enc;
intx = erl_errno;
@@ -148,9 +149,13 @@ int main(void)
ei_x_encode_string(&eix, charp);
ei_x_encode_string_len(&eix, charp, intx);
ei_encode_atom(charp, intp, charp);
+ ei_encode_atom_as(charp, intp, charp, ERLANG_LATIN1, ERLANG_UTF8);
ei_encode_atom_len(charp, intp, charp, intx);
+ ei_encode_atom_len_as(charp, intp, charp, intx, ERLANG_ASCII, ERLANG_LATIN1);
ei_x_encode_atom(&eix, charp);
+ ei_x_encode_atom_as(&eix, charp, ERLANG_LATIN1, ERLANG_UTF8);
ei_x_encode_atom_len(&eix, charp, intx);
+ ei_x_encode_atom_len_as(&eix, charp, intx, ERLANG_LATIN1, ERLANG_UTF8);
ei_encode_binary(charp, intp, (void *)0, longx);
ei_x_encode_binary(&eix, (void*)0, intx);
ei_encode_pid(charp, intp, &epid);
@@ -181,6 +186,7 @@ int main(void)
ei_decode_char(charp, intp, charp);
ei_decode_string(charp, intp, charp);
ei_decode_atom(charp, intp, charp);
+ ei_decode_atom_as(charp, intp, charp, MAXATOMLEN_UTF8, ERLANG_WHATEVER, &enc, &enc);
ei_decode_binary(charp, intp, (void *)0, longp);
ei_decode_fun(charp, intp, &efun);
free_fun(&efun);