aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src/misc
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
committerSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
commitb8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch)
tree708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/erl_interface/src/misc
parente99df74bee7c245ec76678e336fcd09d4b51a089 (diff)
parentd6e3e256b850050b7a86323b2948009d5fcc30a9 (diff)
downloadotp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms: erl_interface: Fix bug when transcoding atoms from and to UTF8 erl_interface: Changed erlang_char_encoding interface erts: Testcase doing unicode atom printout with ~w erl_interface: even more utf8 atom stuff erts: Fix bug in analyze_utf8 causing faulty latin1 detection Add UTF-8 node name support for epmd workaround... Fix merge conflict with hasse UTF-8 atom documentation test case erl_interface: utf8 atoms continued Add utf8 atom distribution test cases atom fixes for NIFs and atom_to_binary UTF-8 support for distribution Implement UTF-8 atom support for jinterface erl_interface: Enable decode of unicode atoms stdlib: Fix printing of unicode atoms erts: Change internal representation of atoms to utf8 erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity Conflicts: erts/emulator/beam/io.c OTP-10753
Diffstat (limited to 'lib/erl_interface/src/misc')
-rw-r--r--lib/erl_interface/src/misc/ei_decode_term.c40
-rw-r--r--lib/erl_interface/src/misc/ei_format.c4
-rw-r--r--lib/erl_interface/src/misc/ei_printterm.c7
-rw-r--r--lib/erl_interface/src/misc/ei_x_encode.c21
-rw-r--r--lib/erl_interface/src/misc/get_type.c79
-rw-r--r--lib/erl_interface/src/misc/putget.h7
-rw-r--r--lib/erl_interface/src/misc/show_msg.c14
7 files changed, 54 insertions, 118 deletions
diff --git a/lib/erl_interface/src/misc/ei_decode_term.c b/lib/erl_interface/src/misc/ei_decode_term.c
index 0b82ef0e35..65afee89cc 100644
--- a/lib/erl_interface/src/misc/ei_decode_term.c
+++ b/lib/erl_interface/src/misc/ei_decode_term.c
@@ -32,7 +32,7 @@
int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
{
const char* s = buf + *index, * s0 = s;
- int len, i, n, sign;
+ int i, n, sign;
char c;
if (term == NULL) return -1;
@@ -48,20 +48,13 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
case NEW_FLOAT_EXT:
return ei_decode_double(buf, index, &term->value.d_val);
case ERL_ATOM_EXT:
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.atom_name, s, len);
- term->value.atom_name[len] = '\0';
- s += len;
- break;
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ return ei_decode_atom(buf, index, term->value.atom_name);
case ERL_REFERENCE_EXT:
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.ref.node, s, len);
- term->value.ref.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
term->value.ref.n[0] = get32be(s);
term->value.ref.len = 1;
@@ -71,12 +64,7 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
/* first the integer count */
term->value.ref.len = get16be(s);
/* then the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.ref.node, s, len);
- term->value.ref.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.ref.node, &term->value.ref.node_org_enc) < 0) return -1;
/* creation */
term->value.ref.creation = get8(s) & 0x03;
/* finally the id integers */
@@ -88,22 +76,12 @@ int ei_decode_ei_term(const char* buf, int* index, ei_term* term)
}
break;
case ERL_PORT_EXT:
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.port.node, s, len);
- term->value.port.node[len] = '\0';
+ if (get_atom(&s, term->value.port.node, &term->value.port.node_org_enc) < 0) return -1;
term->value.port.id = get32be(s) & 0x0fffffff; /* 28 bits */;
term->value.port.creation = get8(s) & 0x03;
break;
case ERL_PID_EXT:
- if (get8(s) != ERL_ATOM_EXT) return -1;
- /* name first */
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
- memcpy(term->value.pid.node, s, len);
- term->value.pid.node[len] = '\0';
- s += len;
+ if (get_atom(&s, term->value.pid.node, &term->value.port.node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
term->value.pid.num = get32be(s) & 0x7fff; /* 15 bits */
term->value.pid.serial = get32be(s) & 0x1fff; /* 13 bits */
diff --git a/lib/erl_interface/src/misc/ei_format.c b/lib/erl_interface/src/misc/ei_format.c
index 281a192535..b5f11e618e 100644
--- a/lib/erl_interface/src/misc/ei_format.c
+++ b/lib/erl_interface/src/misc/ei_format.c
@@ -139,8 +139,8 @@ static int patom(const char** fmt, ei_x_buff* x)
--(*fmt);
len = *fmt - start;
/* FIXME why truncate atom name and not fail?! */
- if (len > MAXATOMLEN)
- len = MAXATOMLEN;
+ if (len >= MAXATOMLEN)
+ len = MAXATOMLEN-1;
return ei_x_encode_atom_len(x, start, len);
}
diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c
index 5fc6b3542c..f3003a6172 100644
--- a/lib/erl_interface/src/misc/ei_printterm.c
+++ b/lib/erl_interface/src/misc/ei_printterm.c
@@ -115,7 +115,7 @@ static int print_term(FILE* fp, ei_x_buff* x,
const char* buf, int* index)
{
int i, doquote, n, m, ty, r;
- char a[MAXATOMLEN+1], *p;
+ char a[MAXATOMLEN], *p;
int ch_written = 0; /* counter of written chars */
erlang_pid pid;
erlang_port port;
@@ -132,7 +132,10 @@ static int print_term(FILE* fp, ei_x_buff* x,
doquote = 0;
ei_get_type_internal(buf, index, &ty, &n);
switch (ty) {
- case ERL_ATOM_EXT:
+ case ERL_ATOM_EXT:
+ case ERL_ATOM_UTF8_EXT:
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
if (ei_decode_atom(buf, index, a) < 0)
goto err;
doquote = !islower((int)a[0]);
diff --git a/lib/erl_interface/src/misc/ei_x_encode.c b/lib/erl_interface/src/misc/ei_x_encode.c
index fa1e26ccbb..44dcff7664 100644
--- a/lib/erl_interface/src/misc/ei_x_encode.c
+++ b/lib/erl_interface/src/misc/ei_x_encode.c
@@ -197,18 +197,33 @@ int ei_x_encode_tuple_header(ei_x_buff* x, long n)
int ei_x_encode_atom(ei_x_buff* x, const char* s)
{
- return ei_x_encode_atom_len(x, s, strlen(s));
+ return ei_x_encode_atom_len_as(x, s, strlen(s), ERLANG_LATIN1, ERLANG_LATIN1);
}
int ei_x_encode_atom_len(ei_x_buff* x, const char* s, int len)
{
+ return ei_x_encode_atom_len_as(x, s, len, ERLANG_LATIN1, ERLANG_LATIN1);
+}
+
+int ei_x_encode_atom_as(ei_x_buff* x, const char* s,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
+ return ei_x_encode_atom_len_as(x, s, strlen(s), from_enc, to_enc);
+}
+
+int ei_x_encode_atom_len_as(ei_x_buff* x, const char* s, int len,
+ enum erlang_char_encoding from_enc,
+ enum erlang_char_encoding to_enc)
+{
int i = x->index;
- ei_encode_atom_len(NULL, &i, s, len);
+ ei_encode_atom_len_as(NULL, &i, s, len, from_enc, to_enc);
if (!x_fix_buff(x, i))
return -1;
- return ei_encode_atom_len(x->buff, &x->index, s, len);
+ return ei_encode_atom_len_as(x->buff, &x->index, s, len, from_enc, to_enc);
}
+
int ei_x_encode_pid(ei_x_buff* x, const erlang_pid* pid)
{
int i = x->index;
diff --git a/lib/erl_interface/src/misc/get_type.c b/lib/erl_interface/src/misc/get_type.c
index 2a680d0f94..54465196b0 100644
--- a/lib/erl_interface/src/misc/get_type.c
+++ b/lib/erl_interface/src/misc/get_type.c
@@ -33,78 +33,6 @@ int ei_get_type(const char *buf, const int *index, int *type, int *len)
return ei_get_type_internal(buf, index, type, len);
}
-#if 0
-int ei_get_type(const char *buf, const int *index, int *type, int *len)
-{
- const char *s = buf + *index;
- int itype = get8(s); /* Internal type */
-
- *len = 0;
-
- switch (*type) {
-
- case ERL_SMALL_INTEGER_EXT:
- case ERL_INTEGER_EXT:
- case ERL_SMALL_BIG_EXT:
- case ERL_LARGE_BIG_EXT:
- *type = EI_TYPE_INTEGER;
- break;
-
- case ERL_FLOAT_EXT:
- *type = EI_TYPE_FLOAT;
- break;
-
- case ERL_SMALL_TUPLE_EXT:
- *len = get8(s);
- break;
-
- case ERL_ATOM_EXT:
- case ERL_STRING_EXT:
- *len = get16be(s);
- break;
-
- case ERL_LARGE_TUPLE_EXT:
- case ERL_LIST_EXT:
- case ERL_BINARY_EXT:
- *len = get32be(s);
- break;
-
- case ERL_SMALL_BIG_EXT:
- *len = (get8(s)+1)/2; /* big arity */
- break;
-
- case ERL_LARGE_BIG_EXT:
- *len = (get32be(s)+1)/2; /* big arity */
- break;
-
- case ERL_BINARY_EXT:
- *type = EI_TYPE_BINARY;
- break;
-
- case ERL_PID_EXT:
- *type = EI_TYPE_PID;
- break;
-
- case ERL_PORT_EXT:
- *type = EI_TYPE_PORT;
- break;
-
- case ERL_REFERENCE_EXT:
- case ERL_NEW_REFERENCE_EXT:
- *type = EI_TYPE_REF;
- break;
-
- default:
- break;
- }
-
- /* leave index unchanged */
- return 0;
-}
-#endif
-
-
-/* Old definition of function above */
int ei_get_type_internal(const char *buf, const int *index,
int *type, int *len)
@@ -114,10 +42,15 @@ int ei_get_type_internal(const char *buf, const int *index,
*type = get8(s);
switch (*type) {
+ case ERL_SMALL_ATOM_EXT:
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ *type = ERL_ATOM_EXT;
case ERL_SMALL_TUPLE_EXT:
*len = get8(s);
break;
-
+
+ case ERL_ATOM_UTF8_EXT:
+ *type = ERL_ATOM_EXT;
case ERL_ATOM_EXT:
case ERL_STRING_EXT:
*len = get16be(s);
diff --git a/lib/erl_interface/src/misc/putget.h b/lib/erl_interface/src/misc/putget.h
index 7a43de324b..77ae168f8c 100644
--- a/lib/erl_interface/src/misc/putget.h
+++ b/lib/erl_interface/src/misc/putget.h
@@ -105,6 +105,13 @@
((EI_ULONGLONG)((unsigned char *)(s))[-2] << 8) | \
(EI_ULONGLONG)((unsigned char *)(s))[-1]))
+int utf8_to_latin1(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen, enum erlang_char_encoding* res_encp);
+int ei_internal_get_atom(const char** bufp, char* p, enum erlang_char_encoding*);
+int ei_internal_put_atom(char** bufp, const char* p, int slen, enum erlang_char_encoding);
+#define get_atom ei_internal_get_atom
+#define put_atom ei_internal_put_atom
+
typedef union float_ext {
double d;
EI_ULONGLONG val;
diff --git a/lib/erl_interface/src/misc/show_msg.c b/lib/erl_interface/src/misc/show_msg.c
index 194296798b..33b09643ca 100644
--- a/lib/erl_interface/src/misc/show_msg.c
+++ b/lib/erl_interface/src/misc/show_msg.c
@@ -132,13 +132,13 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
switch (msg.msgtype) {
case ERL_SEND:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)) return -1;
mbuf = msgbuf;
break;
case ERL_SEND_TT:
- if (ei_decode_atom(header,&index,msg.cookie)
+ if (ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
|| ei_decode_pid(header,&index,&msg.to)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
@@ -146,15 +146,15 @@ int ei_show_sendmsg(FILE *stream, const char *header, const char *msgbuf)
case ERL_REG_SEND:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)) return -1;
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)) return -1;
mbuf = msgbuf;
break;
case ERL_REG_SEND_TT:
if (ei_decode_pid(header,&index,&msg.from)
- || ei_decode_atom(header,&index,msg.cookie)
- || ei_decode_atom(header,&index,msg.toname)
+ || ei_decode_atom_as(header,&index,msg.cookie,sizeof(msg.cookie),ERLANG_UTF8,NULL,NULL)
+ || ei_decode_atom_as(header,&index,msg.toname,sizeof(msg.toname),ERLANG_UTF8,NULL,NULL)
|| ei_decode_trace(header,&index,&msg.token)) return -1;
mbuf = msgbuf;
break;
@@ -457,7 +457,7 @@ static void show_term(const char *termbuf, int *index, FILE *stream)
break;
case ERL_FUN_EXT: {
- char atom[MAXATOMLEN+1];
+ char atom[MAXATOMLEN];
long idx;
long uniq;
const char* s = termbuf + *index, * s0 = s;