aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src/decode
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2012-12-21 15:50:21 +0100
committerSverker Eriksson <[email protected]>2013-01-08 11:15:01 +0100
commit685d009efcfd7521e9c918a14b58eac19755299d (patch)
tree83515a7dcc21f52ecc008bf16da1e7278750e9ca /lib/erl_interface/src/decode
parente4e007afd032f7aca359d2665f91ddb12727521a (diff)
downloadotp-685d009efcfd7521e9c918a14b58eac19755299d.tar.gz
otp-685d009efcfd7521e9c918a14b58eac19755299d.tar.bz2
otp-685d009efcfd7521e9c918a14b58eac19755299d.zip
erl_interface: Enable decode of unicode atoms
No API changes or additions. Just the ability for erl_interface to decode unicode atoms and convert them into latin1 strings to preserve backward compatibility for the existing API.
Diffstat (limited to 'lib/erl_interface/src/decode')
-rw-r--r--lib/erl_interface/src/decode/decode_atom.c68
-rw-r--r--lib/erl_interface/src/decode/decode_boolean.c32
-rw-r--r--lib/erl_interface/src/decode/decode_pid.c13
-rw-r--r--lib/erl_interface/src/decode/decode_port.c12
-rw-r--r--lib/erl_interface/src/decode/decode_ref.c26
5 files changed, 78 insertions, 73 deletions
diff --git a/lib/erl_interface/src/decode/decode_atom.c b/lib/erl_interface/src/decode/decode_atom.c
index c2e6a0426e..84edf1766a 100644
--- a/lib/erl_interface/src/decode/decode_atom.c
+++ b/lib/erl_interface/src/decode/decode_atom.c
@@ -21,24 +21,76 @@
#include "eiext.h"
#include "putget.h"
+static int utf8_to_latin1(char* dest, const char* source, unsigned len);
+
int ei_decode_atom(const char *buf, int *index, char *p)
{
const char *s = buf + *index;
const char *s0 = s;
int len;
- if (get8(s) != ERL_ATOM_EXT) return -1;
+ switch (get8(s)) {
+ case ERL_ATOM_EXT:
+ len = get16be(s);
+ if (len > MAXATOMLEN) return -1;
+ if (p) {
+ memmove(p,s,len);
+ p[len] = (char)0;
+ }
+ break;
+
+ case ERL_SMALL_ATOM_EXT:
+ len = get8(s);
+ if (p) {
+ memmove(p,s,len);
+ p[len] = (char)0;
+ }
+ break;
+
+ case ERL_UNICODE_ATOM_EXT:
+ len = get16be(s);
- len = get16be(s);
+ if (len > 2*MAXATOMLEN) return -1;
- if (len > MAXATOMLEN) return -1;
+ if (p && utf8_to_latin1(p, s, len) < 0) return -1;
+ break;
- if (p) {
- memmove(p,s,len);
- p[len] = (char)0;
+ default:
+ return -1;
}
+
s += len;
*index += s-s0;
-
- return 0;
+ return 0;
+}
+
+int ei_internal_get_atom(const char** bufp, char* p)
+{
+ int ix = 0;
+ if (ei_decode_atom(*bufp, &ix, p) < 0) return -1;
+ *bufp += ix;
+ return 0;
+}
+
+static int utf8_to_latin1(char* dest, const char* source, unsigned slen)
+{
+ const char* dest_end = dest + MAXATOMLEN - 1;
+
+ while (slen > 0 && dest < dest_end) {
+ if ((source[0] & 0x80) == 0) {
+ *dest++ = *source++;
+ --slen;
+ }
+ else if (slen > 1 &&
+ (source[0] & 0xFE) == 0xC2 &&
+ (source[1] & 0xC0) == 0x80) {
+ *dest++ = (char) ((source[0] << 6) | (source[1] & 0x3F));
+ source += 2;
+ slen -= 2;
+ }
+ else return -1;
+ }
+ *dest = 0;
+ return 0;
}
+
diff --git a/lib/erl_interface/src/decode/decode_boolean.c b/lib/erl_interface/src/decode/decode_boolean.c
index 9fd09c63f1..0a7a06f1d4 100644
--- a/lib/erl_interface/src/decode/decode_boolean.c
+++ b/lib/erl_interface/src/decode/decode_boolean.c
@@ -26,32 +26,20 @@ int ei_decode_boolean(const char *buf, int *index, int *p)
{
const char *s = buf + *index;
const char *s0 = s;
- int len;
+ char tbuf[MAXATOMLEN+1];
int t;
- if (get8(s) != ERL_ATOM_EXT) return -1;
+ if (get_atom(&s, tbuf) < 0) return -1;
- len = get16be(s);
-
- switch (len) {
- case 4:
- /* typecast makes ansi happy */
- if (strncmp((char*)s,"true",4)) return -1;
- t = 1;
- break;
-
- case 5:
- if (strncmp((char*)s,"false",5)) return -1;
- t = 0;
- break;
-
- default:
- return -1;
- }
-
- s += len;
+ if (memcmp(tbuf, "true", 5) == 0)
+ t = 1;
+ else if (memcmp(tbuf, "false", 6) == 0)
+ t = 0;
+ else
+ return -1;
+
if (p) *p = t;
*index += s-s0;
-
return 0;
}
+
diff --git a/lib/erl_interface/src/decode/decode_pid.c b/lib/erl_interface/src/decode/decode_pid.c
index 9ed1c36db6..a762ae499e 100644
--- a/lib/erl_interface/src/decode/decode_pid.c
+++ b/lib/erl_interface/src/decode/decode_pid.c
@@ -21,6 +21,7 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_pid(const char *buf, int *index, erlang_pid *p)
{
const char *s = buf + *index;
@@ -30,17 +31,7 @@ int ei_decode_pid(const char *buf, int *index, erlang_pid *p)
if (get8(s) != ERL_PID_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node) < 0) return -1;
/* now the numbers: num (4), serial (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_port.c b/lib/erl_interface/src/decode/decode_port.c
index 28abed801a..6eb2bc9197 100644
--- a/lib/erl_interface/src/decode/decode_port.c
+++ b/lib/erl_interface/src/decode/decode_port.c
@@ -30,17 +30,7 @@ int ei_decode_port(const char *buf, int *index, erlang_port *p)
if (get8(s) != ERL_PORT_EXT) return -1;
/* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
diff --git a/lib/erl_interface/src/decode/decode_ref.c b/lib/erl_interface/src/decode/decode_ref.c
index 7b15808bc5..df3c30777b 100644
--- a/lib/erl_interface/src/decode/decode_ref.c
+++ b/lib/erl_interface/src/decode/decode_ref.c
@@ -21,6 +21,7 @@
#include "eiext.h"
#include "putget.h"
+
int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
{
const char *s = buf + *index;
@@ -30,18 +31,8 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
switch (get8(s)) {
case ERL_REFERENCE_EXT:
- /* first the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
-
- len = get16be(s);
-
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ /* nodename */
+ if (get_atom(&s, p->node) < 0) return -1;
/* now the numbers: num (4), creation (1) */
if (p) {
@@ -62,15 +53,7 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
if (p) p->len = count;
/* then the nodename */
- if (get8(s) != ERL_ATOM_EXT) return -1;
- len = get16be(s);
- if (len > MAXATOMLEN) return -1;
-
- if (p) {
- memmove(p->node, s, len);
- p->node[len] = (char)0;
- }
- s += len;
+ if (get_atom(&s, p->node) < 0) return -1;
/* creation */
if (p) {
@@ -95,3 +78,4 @@ int ei_decode_ref(const char *buf, int *index, erlang_ref *p)
return -1;
}
}
+