aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-03-08 14:46:24 +0100
committerSverker Eriksson <[email protected]>2017-03-08 18:07:09 +0100
commit32d13d59910a384ad09682cafc83c7300c96c694 (patch)
tree1f5bfca2565243210b16a6de2bdc3ae66cbff8bb /lib/erl_interface/src
parent65b04e233e09e3cc2e0fda3c28e155b95c5a4baf (diff)
downloadotp-32d13d59910a384ad09682cafc83c7300c96c694.tar.gz
otp-32d13d59910a384ad09682cafc83c7300c96c694.tar.bz2
otp-32d13d59910a384ad09682cafc83c7300c96c694.zip
erl_interface: Do not generate atoms on old latin1 ext format
Solved by letting ei_encode_atom_as ignore 'to_enc' argument and always encode in UTF8 format.
Diffstat (limited to 'lib/erl_interface/src')
-rw-r--r--lib/erl_interface/src/encode/encode_atom.c64
-rw-r--r--lib/erl_interface/src/encode/encode_boolean.c8
-rw-r--r--lib/erl_interface/src/legacy/erl_eterm.c16
-rw-r--r--lib/erl_interface/src/legacy/erl_marshal.c13
4 files changed, 26 insertions, 75 deletions
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index c1817628e5..1fd7811a0e 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -26,7 +26,6 @@
static int verify_ascii_atom(const char* src, int slen);
static int verify_utf8_atom(const char* src, int slen);
-static int is_latin1_as_utf8(const char *p, int len);
int ei_encode_atom(char *buf, int *index, const char *p)
{
@@ -34,7 +33,7 @@ int ei_encode_atom(char *buf, int *index, const char *p)
if (len >= MAXATOMLEN)
len = MAXATOMLEN - 1;
- return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
}
int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
@@ -42,7 +41,7 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
/* This function is documented to truncate at MAXATOMLEN (256) */
if (len >= MAXATOMLEN)
len = MAXATOMLEN - 1;
- return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
}
int ei_encode_atom_as(char *buf, int *index, const char *p,
@@ -64,46 +63,11 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
return -1;
}
- if (to_enc == (ERLANG_LATIN1 | ERLANG_UTF8)) {
- if (from_enc == ERLANG_UTF8) {
- to_enc = is_latin1_as_utf8(p, len) ? ERLANG_LATIN1 : ERLANG_UTF8;
- }
- else {
- to_enc = from_enc;
- }
- }
- switch(to_enc) {
- case ERLANG_LATIN1:
- if (buf) {
- put8(s,ERL_ATOM_EXT);
- switch (from_enc) {
- case ERLANG_UTF8:
- len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
- if (len < 0) return -1;
- break;
- case ERLANG_ASCII:
- if (verify_ascii_atom(p, len) < 0) return -1;
- memcpy(s+2, p, len);
- break;
- case ERLANG_LATIN1:
- memcpy(s+2, p, len);
- break;
- default:
- return -1;
- }
- put16be(s,len);
- }
- else {
- s += 3;
- if (from_enc == ERLANG_UTF8) {
- len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
- if (len < 0) return -1;
- } else if (from_enc == ERLANG_ASCII)
- if (verify_ascii_atom(p, len) < 0) return -1;
- }
- break;
-
- case ERLANG_UTF8:
+ /*
+ * Since OTP 20 we totally ignore 'to_enc'
+ * and alway encode as UTF8.
+ */
+ {
offs = 1 + 1;
switch (from_enc) {
case ERLANG_LATIN1:
@@ -133,10 +97,6 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
}
}
else s+= offs;
- break;
-
- default:
- return -1;
}
s += len;
@@ -197,13 +157,3 @@ static int verify_utf8_atom(const char* src, int slen)
return 0;
}
-/* Only latin1 code points in utf8 string?
- */
-static int is_latin1_as_utf8(const char *p, int len)
-{
- int i;
- for (i=0; i<len; i++) {
- if ((unsigned char)p[i] > 0xC3) return 0;
- }
- return 1;
-}
diff --git a/lib/erl_interface/src/encode/encode_boolean.c b/lib/erl_interface/src/encode/encode_boolean.c
index 61e7e5e6e7..053029af05 100644
--- a/lib/erl_interface/src/encode/encode_boolean.c
+++ b/lib/erl_interface/src/encode/encode_boolean.c
@@ -32,12 +32,12 @@ int ei_encode_boolean(char *buf, int *index, int p)
val = p ? "true" : "false";
len = strlen(val);
- if (!buf) s += 3;
+ if (!buf) s += 2;
else {
- put8(s,ERL_ATOM_EXT);
- put16be(s,len);
+ put8(s, ERL_SMALL_ATOM_UTF8_EXT);
+ put8(s, len);
- memmove(s,val,len); /* unterminated string */
+ memcpy(s,val,len); /* unterminated string */
}
s += len;
diff --git a/lib/erl_interface/src/legacy/erl_eterm.c b/lib/erl_interface/src/legacy/erl_eterm.c
index e4b3b49c7d..5153d0f2e7 100644
--- a/lib/erl_interface/src/legacy/erl_eterm.c
+++ b/lib/erl_interface/src/legacy/erl_eterm.c
@@ -188,14 +188,20 @@ char* erl_atom_ptr_latin1(Erl_Atom_data* a)
char* erl_atom_ptr_utf8(Erl_Atom_data* a)
{
if (a->utf8 == NULL) {
- int dlen = a->lenL * 2; /* over estimation */
- a->utf8 = malloc(dlen + 1);
- a->lenU = latin1_to_utf8(a->utf8, a->latin1, a->lenL, dlen, NULL);
- a->utf8[a->lenU] = '\0';
+ erlang_char_encoding enc;
+ a->lenU = latin1_to_utf8(NULL, a->latin1, a->lenL, a->lenL*2, &enc);
+ if (enc == ERLANG_ASCII) {
+ a->utf8 = a->latin1;
+ }
+ else {
+ a->utf8 = malloc(a->lenU + 1);
+ latin1_to_utf8(a->utf8, a->latin1, a->lenL, a->lenU, NULL);
+ a->utf8[a->lenU] = '\0';
+ }
}
return a->utf8;
-
}
+
int erl_atom_size_latin1(Erl_Atom_data* a)
{
if (a->latin1 == NULL) {
diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c
index 527ae0ef8f..b7a8455313 100644
--- a/lib/erl_interface/src/legacy/erl_marshal.c
+++ b/lib/erl_interface/src/legacy/erl_marshal.c
@@ -175,10 +175,9 @@ static void encode_atom(Erl_Atom_data* a, unsigned char **ext)
int ix = 0;
if (a->latin1) {
ei_encode_atom_len_as((char*)*ext, &ix, a->latin1, a->lenL,
- ERLANG_LATIN1, ERLANG_LATIN1);
+ ERLANG_LATIN1, ERLANG_UTF8);
}
- else if (ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
- ERLANG_UTF8, ERLANG_LATIN1) < 0) {
+ else {
ei_encode_atom_len_as((char*)*ext, &ix, a->utf8, a->lenU,
ERLANG_UTF8, ERLANG_UTF8);
}
@@ -542,12 +541,8 @@ int erl_term_len(ETERM *ep)
static int atom_len_helper(Erl_Atom_data* a)
{
- if (erl_atom_ptr_latin1(a)) {
- return 1 + 2 + a->lenL; /* ERL_ATOM_EXT */
- }
- else {
- return 1 + 1 + (a->lenU > 255) + a->lenU;
- }
+ (void) erl_atom_ptr_utf8(a);
+ return 1 + 1 + (a->lenU > 255) + a->lenU;
}
static int erl_term_len_helper(ETERM *ep, int dist)