aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src/encode
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2013-01-22 19:25:36 +0100
committerSverker Eriksson <[email protected]>2013-01-22 19:34:03 +0100
commit1f4765cca4874fa92fcfad888fbe6d5f2fbf74d1 (patch)
treebf152c1b3dbf855dfc5a8724c3e043e161a971b6 /lib/erl_interface/src/encode
parent8eb544073fe243a8935a54f83f9c9f1f7478e3c5 (diff)
downloadotp-1f4765cca4874fa92fcfad888fbe6d5f2fbf74d1.tar.gz
otp-1f4765cca4874fa92fcfad888fbe6d5f2fbf74d1.tar.bz2
otp-1f4765cca4874fa92fcfad888fbe6d5f2fbf74d1.zip
erl_interface: even more utf8 atom stuff
Diffstat (limited to 'lib/erl_interface/src/encode')
-rw-r--r--lib/erl_interface/src/encode/encode_atom.c59
1 files changed, 57 insertions, 2 deletions
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index a3d7c4c759..8bbe962396 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -22,6 +22,11 @@
#include "eiext.h"
#include "putget.h"
+
+static int copy_ascii_atom(char* dst, const char* src, int slen);
+static int copy_utf8_atom(char* dst, const char* src, int slen);
+
+
int ei_encode_atom(char *buf, int *index, const char *p)
{
size_t len = strlen(p);
@@ -54,7 +59,8 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
char *s0 = s;
int offs;
- if (from_enc == ERLANG_LATIN1 && len >= MAXATOMLEN) {
+ if (len >= MAXATOMLEN && (from_enc == ERLANG_LATIN1 ||
+ from_enc == ERLANG_ASCII)) {
return -1;
}
@@ -68,6 +74,8 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
if (len < 0) return -1;
break;
case ERLANG_ASCII:
+ if (copy_ascii_atom(s+2, p, len) < 0) return -1;
+ break;
case ERLANG_LATIN1:
memcpy(s+2, p, len);
break;
@@ -93,9 +101,11 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL);
break;
case ERLANG_ASCII:
+ if (buf && copy_ascii_atom(s+offs, p, len) < 0) return -1;
+ break;
case ERLANG_UTF8:
if (len >= 256) offs++;
- if (buf) memcpy(s+offs, p, len);
+ if (buf && copy_utf8_atom(s+offs, p, len) < 0) return -1;
break;
default:
return -1;
@@ -133,3 +143,48 @@ ei_internal_put_atom(char** bufp, const char* p, int slen,
*bufp += ix;
return 0;
}
+
+
+int copy_ascii_atom(char* dst, const char* src, int slen)
+{
+ while (slen > 0) {
+ if ((src[0] & 0x80) != 0) return -1;
+ *dst++ = *src++;
+ slen--;
+ }
+ return 0;
+}
+
+int copy_utf8_atom(char* dst, const char* src, int slen)
+{
+ int num_chars = 0;
+
+ while (slen > 0) {
+ if (++num_chars >= MAXATOMLEN) return -1;
+ if ((src[0] & 0x80) != 0) {
+ if ((src[0] & 0xE0) == 0xC0) {
+ if (slen < 2 || (src[1] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ slen--;
+ }
+ else if ((src[0] & 0xF0) == 0xE0) {
+ if (slen < 3 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ slen -= 2;
+ }
+ else if ((src[0] & 0xF8) == 0xF0) {
+ if (slen < 4 || (src[1] & 0xC0) != 0x80 || (src[2] & 0xC0) != 0x80 || (src[3] & 0xC0) != 0x80) return -1;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ *dst++ = *src++;
+ slen -= 3;
+ }
+ else return -1;
+ }
+ *dst++ = *src++;
+ slen--;
+ }
+ return 0;
+}
+