aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src/decode
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
committerSverker Eriksson <[email protected]>2013-01-23 18:09:35 +0100
commitb8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch)
tree708d64e36e18b61ae1801c02ec3aeef42a697be3 /lib/erl_interface/src/decode
parente99df74bee7c245ec76678e336fcd09d4b51a089 (diff)
parentd6e3e256b850050b7a86323b2948009d5fcc30a9 (diff)
downloadotp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2
otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms: erl_interface: Fix bug when transcoding atoms from and to UTF8 erl_interface: Changed erlang_char_encoding interface erts: Testcase doing unicode atom printout with ~w erl_interface: even more utf8 atom stuff erts: Fix bug in analyze_utf8 causing faulty latin1 detection Add UTF-8 node name support for epmd workaround... Fix merge conflict with hasse UTF-8 atom documentation test case erl_interface: utf8 atoms continued Add utf8 atom distribution test cases atom fixes for NIFs and atom_to_binary UTF-8 support for distribution Implement UTF-8 atom support for jinterface erl_interface: Enable decode of unicode atoms stdlib: Fix printing of unicode atoms erts: Change internal representation of atoms to utf8 erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity Conflicts: erts/emulator/beam/io.c OTP-10753
Diffstat (limited to 'lib/erl_interface/src/decode')
-rw-r--r--lib/erl_interface/src/decode/decode_atom.c159
-rw-r--r--lib/erl_interface/src/decode/decode_boolean.c36
-rw-r--r--lib/erl_interface/src/decode/decode_fun.c6
-rw-r--r--lib/erl_interface/src/decode/decode_pid.c14
-rw-r--r--lib/erl_interface/src/decode/decode_port.c13
-rw-r--r--lib/erl_interface/src/decode/decode_ref.c28
6 files changed, 169 insertions, 87 deletions
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index c2e6a0426e..9779ad3f35 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -21,24 +21,155 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_atom(const char *buf, int *index, char *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- int len;
+ return ei_decode_atom_as(buf, index, p, MAXATOMLEN, ERLANG_LATIN1, NULL, NULL);
+}
+
+int ei_decode_atom_as(const char *buf, int *index, char* p, int destlen,
+ enum erlang_char_encoding want_enc,
+ enum erlang_char_encoding* was_encp,
+ enum erlang_char_encoding* res_encp)
+{
+ const char *s = buf + *index;
+ const char *s0 = s;
+ int len;
+ enum erlang_char_encoding got_enc;
+
+ switch (get8(s)) {
+ case ERL_ATOM_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_SMALL_ATOM_EXT:
+ len = get8(s);
+ got_enc = ERLANG_LATIN1;
+ break;
+ case ERL_ATOM_UTF8_EXT:
+ len = get16be(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ case ERL_SMALL_ATOM_UTF8_EXT:
+ len = get8(s);
+ got_enc = ERLANG_UTF8;
+ break;
+ default:
+ return -1;
+ }
+
+ if ((want_enc & got_enc) || want_enc == ERLANG_ASCII) {
+ int i, found_non_ascii = 0;
+ if (len >= destlen)
+ return -1;
+ for (i=0; i<len; i++) {
+ if (s[i] & 0x80) found_non_ascii = 1;
+ if (p) p[i] = s[i];
+ }
+ if (p) p[len] = 0;
+ if (want_enc == ERLANG_ASCII && found_non_ascii) {
+ return -1;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? got_enc : ERLANG_ASCII;
+ }
+ }
+ else {
+ int plen = (got_enc == ERLANG_LATIN1) ?
+ latin1_to_utf8(p, s, len, destlen-1, res_encp) :
+ utf8_to_latin1(p, s, len, destlen-1, res_encp);
+ if (plen < 0) return -1;
+ if (p) p[plen] = 0;
+ }
+ if (was_encp) {
+ *was_encp = got_enc;
+ }
+
+ s += len;
+ *index += s-s0;
+ return 0;
+}
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
+int utf8_to_latin1(char* dst, const char* src, int slen, int destlen,
+ enum erlang_char_encoding* res_encp)
+{
+ const char* const dst_start = dst;
+ const char* const dst_end = dst + destlen;
+ int found_non_ascii = 0;
+
+ while (slen > 0) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ ++src;
+ --slen;
+ }
+ else if (slen > 1 &&
+ (src[0] & 0xFE) == 0xC2 &&
+ (src[1] & 0xC0) == 0x80) {
+ if (dst_start) {
+ *dst = (char) ((src[0] << 6) | (src[1] & 0x3F));
+ }
+ ++dst;
+ src += 2;
+ slen -= 2;
+ found_non_ascii = 1;
+ }
+ else return -1;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_LATIN1 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
+}
- if (len > MAXATOMLEN) return -1;
+int latin1_to_utf8(char* dst, const char* src, int slen, int destlen,
+ enum erlang_char_encoding* res_encp)
+{
+ const char* const src_end = src + slen;
+ const char* const dst_start = dst;
+ const char* const dst_end = dst + destlen;
+ int found_non_ascii = 0;
- if (p) {
- memmove(p,s,len);
- p[len] = (char)0;
- }
- s += len;
- *index += s-s0;
-
- return 0;
+ while (src < src_end) {
+ if (dst >= dst_end) return -1;
+ if ((src[0] & 0x80) == 0) {
+ if (dst_start) {
+ *dst = *src;
+ }
+ ++dst;
+ }
+ else {
+ if (dst_start) {
+ unsigned char ch = *src;
+ dst[0] = 0xC0 | (ch >> 6);
+ dst[1] = 0x80 | (ch & 0x3F);
+ }
+ dst += 2;
+ found_non_ascii = 1;
+ }
+ ++src;
+ }
+ if (res_encp) {
+ *res_encp = found_non_ascii ? ERLANG_UTF8 : ERLANG_ASCII;
+ }
+ return dst - dst_start;
}
+
+
+
+int ei_internal_get_atom(const char** bufp, char* p,
+ enum erlang_char_encoding* was_encp)
+{
+ int ix = 0;
+ if (ei_decode_atom_as(*bufp, &ix, p, MAXATOMLEN_UTF8, ERLANG_UTF8, was_encp, NULL) < 0)
+ return -1;
+ *bufp += ix;
+ return 0;
+}
+
+
diff --git a/lib/erl_interface/src/decode/decode_boolean.c b/lib/erl_interface/src/decode/decode_boolean.c
index 9fd09c63f1..f20690249b 100644
--- a/lib/erl_interface/src/decode/decode_boolean.c
+++ b/lib/erl_interface/src/decode/decode_boolean.c
@@ -24,34 +24,20 @@
/* c non-zero -> erlang "true" atom, otherwise "false" */
int ei_decode_boolean(const char *buf, int *index, int *p)
{
- const char *s = buf + *index;
- const char *s0 = s;
- int len;
+ char tbuf[6];
int t;
- if (get8(s) != ERL_ATOM_EXT) return -1;
+ if (ei_decode_atom_as(buf, index, tbuf, sizeof(tbuf), ERLANG_ASCII, NULL, NULL) < 0)
+ return -1;
- len = get16be(s);
-
- switch (len) {
- case 4:
- /* typecast makes ansi happy */
- if (strncmp((char*)s,"true",4)) return -1;
- t = 1;
- break;
-
- case 5:
- if (strncmp((char*)s,"false",5)) return -1;
- t = 0;
- break;
-
- default:
- return -1;
- }
-
- s += len;
+ if (memcmp(tbuf, "true", 5) == 0)
+ t = 1;
+ else if (memcmp(tbuf, "false", 6) == 0)
+ t = 0;
+ else
+ return -1;
+
if (p) *p = t;
- *index += s-s0;
-
return 0;
}
+
diff --git a/lib/erl_interface/src/decode/decode_fun.c b/lib/erl_interface/src/decode/decode_fun.c
index 64fb9e86d8..7bbef5db44 100644
--- a/lib/erl_interface/src/decode/decode_fun.c
+++ b/lib/erl_interface/src/decode/decode_fun.c
@@ -42,7 +42,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (ei_decode_pid(s, &ix, (p == NULL ? (erlang_pid*)NULL : &p->pid)) < 0)
return -1;
/* then the module (atom) */
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->index)) < 0)
@@ -84,7 +85,8 @@ int ei_decode_fun(const char *buf, int *index, erlang_fun *p)
if (p != NULL) p->n_free_vars = i;
/* then the module (atom) */
ix = 0;
- if (ei_decode_atom(s, &ix, (p == NULL ? (char*)NULL : p->module)) < 0)
+ if (ei_decode_atom_as(s, &ix, (p == NULL ? (char*)NULL : p->module),
+ MAXATOMLEN_UTF8, ERLANG_UTF8, &p->module_org_enc, NULL) < 0)
return -1;
/* then the old_index */
if (ei_decode_long(s, &ix, (p == NULL ? (long*)NULL : &p->old_index)) < 0)
diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c
index 9ed1c36db6..e79952195d 100644
--- a/lib/erl_interface/src/decode/decode_pid.c
+++ b/lib/erl_interface/src/decode/decode_pid.c
@@ -21,26 +21,16 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_pid(const char *buf, int *index, erlang_pid *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PID_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c
index 28abed801a..5fd96b51a4 100644
--- a/lib/erl_interface/src/decode/decode_port.c
+++ b/lib/erl_interface/src/decode/decode_port.c
@@ -25,22 +25,11 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
if (get8(s) != ERL_PORT_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c
index 7b15808bc5..7294e5d239 100644
--- a/lib/erl_interface/src/decode/decode_ref.c
+++ b/lib/erl_interface/src/decode/decode_ref.c
@@ -21,27 +21,18 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int count, len, i;
+ int count, i;
switch (get8(s)) {
case ERL_REFERENCE_EXT:
- /* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ /* nodename */
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
@@ -62,15 +53,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
if (p) p->len = count;
/* then the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node, &p->node_org_enc) < 0) return -1;
/* creation */
if (p) {
@@ -95,3 +78,4 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
return -1;
}
}
+