From b553664f54034e8c04ae6f9cc44f16b7f516518b Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Fri, 11 Jan 2013 17:27:29 +0100 Subject: erl_interface: utf8 atoms continued --- lib/erl_interface/src/encode/encode_atom.c | 100 ++++++++++++++++++++++++++--- lib/erl_interface/src/encode/encode_fun.c | 4 +- lib/erl_interface/src/encode/encode_pid.c | 24 +++---- lib/erl_interface/src/encode/encode_port.c | 23 +++---- lib/erl_interface/src/encode/encode_ref.c | 24 +++---- 5 files changed, 120 insertions(+), 55 deletions(-) (limited to 'lib/erl_interface/src/encode') diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c index 6f41f045e0..a3d7c4c759 100644 --- a/lib/erl_interface/src/encode/encode_atom.c +++ b/lib/erl_interface/src/encode/encode_atom.c @@ -26,25 +26,95 @@ int ei_encode_atom(char *buf, int *index, const char *p) { size_t len = strlen(p); - if (len >= INT_MAX) return -1; - return ei_encode_atom_len(buf, index, p, len); + if (len >= MAXATOMLEN) + len = MAXATOMLEN - 1; + return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1); } int ei_encode_atom_len(char *buf, int *index, const char *p, int len) +{ + /* This function is documented to truncate at MAXATOMLEN (256) */ + if (len >= MAXATOMLEN) + len = MAXATOMLEN - 1; + return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1); +} + +int ei_encode_atom_as(char *buf, int *index, const char *p, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) +{ + return ei_encode_atom_len_as(buf, index, p, strlen(p), from_enc, to_enc); +} + +int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len, + enum erlang_char_encoding from_enc, + enum erlang_char_encoding to_enc) { char *s = buf + *index; char *s0 = s; + int offs; - /* This function is documented to truncate at MAXATOMLEN (256) */ - if (len > MAXATOMLEN) - len = MAXATOMLEN; + if (from_enc == ERLANG_LATIN1 && len >= MAXATOMLEN) { + return -1; + } - if (!buf) s += 3; - else { - put8(s,ERL_ATOM_EXT); - put16be(s,len); + switch(to_enc) { + case ERLANG_LATIN1: + if (buf) { + put8(s,ERL_ATOM_EXT); + switch (from_enc) { + case ERLANG_UTF8: + len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL); + if (len < 0) return -1; + break; + case ERLANG_ASCII: + case ERLANG_LATIN1: + memcpy(s+2, p, len); + break; + default: + return -1; + } + put16be(s,len); + } + else { + s += 3; + if (from_enc == ERLANG_UTF8) { + len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL); + if (len < 0) return -1; + } + } + break; + + case ERLANG_UTF8: + offs = 1 + 1; + switch (from_enc) { + case ERLANG_LATIN1: + if (len >= 256/2) offs++; + len = latin1_to_utf8((buf ? s+offs : NULL), p, len, MAXATOMLEN_UTF8-1, NULL); + break; + case ERLANG_ASCII: + case ERLANG_UTF8: + if (len >= 256) offs++; + if (buf) memcpy(s+offs, p, len); + break; + default: + return -1; + } + if (buf) { + if (offs == 2) { + put8(s, ERL_SMALL_ATOM_UTF8_EXT); + put8(s, len); + } + else { + put8(s, ERL_ATOM_UTF8_EXT); + put16be(s, len); + } + } + else s+= offs; + break; - memmove(s,p,len); /* unterminated string */ + default: + return -1; } s += len; @@ -53,3 +123,13 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len) return 0; } +int +ei_internal_put_atom(char** bufp, const char* p, int slen, + enum erlang_char_encoding to_enc) +{ + int ix = 0; + if (ei_encode_atom_len_as(*bufp, &ix, p, slen, ERLANG_UTF8, to_enc) < 0) + return -1; + *bufp += ix; + return 0; +} diff --git a/lib/erl_interface/src/encode/encode_fun.c b/lib/erl_interface/src/encode/encode_fun.c index 54ee2083d6..4daee32648 100644 --- a/lib/erl_interface/src/encode/encode_fun.c +++ b/lib/erl_interface/src/encode/encode_fun.c @@ -35,7 +35,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p) ix += sizeof(char) + 4; if (ei_encode_pid(buf, &ix, &p->pid) < 0) return -1; - if (ei_encode_atom(buf, &ix, p->module) < 0) + if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0) return -1; if (ei_encode_long(buf, &ix, p->index) < 0) return -1; @@ -60,7 +60,7 @@ int ei_encode_fun(char *buf, int *index, const erlang_fun *p) } else size_p = NULL; ix += 1 + 4 + 1 + sizeof(p->md5) + 4 + 4; - if (ei_encode_atom(buf, &ix, p->module) < 0) + if (ei_encode_atom_as(buf, &ix, p->module, ERLANG_UTF8, p->module_org_enc) < 0) return -1; if (ei_encode_long(buf, &ix, p->old_index) < 0) return -1; diff --git a/lib/erl_interface/src/encode/encode_pid.c b/lib/erl_interface/src/encode/encode_pid.c index ee7f235c17..0cf3ef4efb 100644 --- a/lib/erl_interface/src/encode/encode_pid.c +++ b/lib/erl_interface/src/encode/encode_pid.c @@ -24,29 +24,23 @@ int ei_encode_pid(char *buf, int *index, const erlang_pid *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); - - if (!buf) s += 13 + len; - else { - put8(s,ERL_PID_EXT); - /* first the nodename */ - put8(s,ERL_ATOM_EXT); + ++(*index); /* skip ERL_PID_EXT */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, p->node_org_enc) < 0) + return -1; + + if (buf) { + put8(s,ERL_PID_EXT); - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put32be(s,p->num & 0x7fff); /* 15 bits */ put32be(s,p->serial & 0x1fff); /* 13 bits */ put8(s,(p->creation & 0x03)); /* 2 bits */ } - - *index += s-s0; - + + *index += 4 + 4 + 1; return 0; } diff --git a/lib/erl_interface/src/encode/encode_port.c b/lib/erl_interface/src/encode/encode_port.c index fbbb33182e..2bf9e26d78 100644 --- a/lib/erl_interface/src/encode/encode_port.c +++ b/lib/erl_interface/src/encode/encode_port.c @@ -24,28 +24,23 @@ int ei_encode_port(char *buf, int *index, const erlang_port *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); - - if (!buf) s += 9 + len; - else { - put8(s,ERL_PORT_EXT); - /* first the nodename */ - put8(s,ERL_ATOM_EXT); + ++(*index); /* skip ERL_PORT_EXT */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, + p->node_org_enc) < 0) { + return -1; + } + if (buf) { + put8(s,ERL_PORT_EXT); - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put32be(s,p->id & 0x0fffffff /* 28 bits */); put8(s,(p->creation & 0x03)); } - *index += s-s0; - + *index += 4 + 1; return 0; } diff --git a/lib/erl_interface/src/encode/encode_ref.c b/lib/erl_interface/src/encode/encode_ref.c index 292b452864..e8b3173315 100644 --- a/lib/erl_interface/src/encode/encode_ref.c +++ b/lib/erl_interface/src/encode/encode_ref.c @@ -24,36 +24,32 @@ int ei_encode_ref(char *buf, int *index, const erlang_ref *p) { char *s = buf + *index; - char *s0 = s; - int len = strlen(p->node); int i; + (*index) += 1 + 2; /* skip to node atom */ + if (ei_encode_atom_len_as(buf, index, p->node, strlen(p->node), ERLANG_UTF8, + p->node_org_enc) < 0) { + return -1; + } + /* Always encode as an extended reference; all participating parties are now expected to be able to decode extended references. */ - if (!buf) s += 1 + 2 + (3+len) + p->len*4 + 1; - else { + if (buf) { put8(s,ERL_NEW_REFERENCE_EXT); /* first, number of integers */ put16be(s, p->len); /* then the nodename */ - put8(s,ERL_ATOM_EXT); - - put16be(s,len); - - memmove(s, p->node, len); - s += len; + s = buf + *index; /* now the integers */ put8(s,(p->creation & 0x03)); for (i = 0; i < p->len; i++) put32be(s,p->n[i]); - - } - - *index += s-s0; + } + *index += p->len*4 + 1; return 0; } -- cgit v1.2.3