aboutsummaryrefslogtreecommitdiffstats
path: root/lib/erl_interface/src/encode/encode_atom.c
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-03-08 14:46:24 +0100
committerSverker Eriksson <[email protected]>2017-03-08 18:07:09 +0100
commit32d13d59910a384ad09682cafc83c7300c96c694 (patch)
tree1f5bfca2565243210b16a6de2bdc3ae66cbff8bb /lib/erl_interface/src/encode/encode_atom.c
parent65b04e233e09e3cc2e0fda3c28e155b95c5a4baf (diff)
downloadotp-32d13d59910a384ad09682cafc83c7300c96c694.tar.gz
otp-32d13d59910a384ad09682cafc83c7300c96c694.tar.bz2
otp-32d13d59910a384ad09682cafc83c7300c96c694.zip
erl_interface: Do not generate atoms on old latin1 ext format
Solved by letting ei_encode_atom_as ignore 'to_enc' argument and always encode in UTF8 format.
Diffstat (limited to 'lib/erl_interface/src/encode/encode_atom.c')
-rw-r--r--lib/erl_interface/src/encode/encode_atom.c64
1 files changed, 7 insertions, 57 deletions
diff --git a/lib/erl_interface/src/encode/encode_atom.c b/lib/erl_interface/src/encode/encode_atom.c
index c1817628e5..1fd7811a0e 100644
--- a/lib/erl_interface/src/encode/encode_atom.c
+++ b/lib/erl_interface/src/encode/encode_atom.c
@@ -26,7 +26,6 @@
static int verify_ascii_atom(const char* src, int slen);
static int verify_utf8_atom(const char* src, int slen);
-static int is_latin1_as_utf8(const char *p, int len);
int ei_encode_atom(char *buf, int *index, const char *p)
{
@@ -34,7 +33,7 @@ int ei_encode_atom(char *buf, int *index, const char *p)
if (len >= MAXATOMLEN)
len = MAXATOMLEN - 1;
- return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
}
int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
@@ -42,7 +41,7 @@ int ei_encode_atom_len(char *buf, int *index, const char *p, int len)
/* This function is documented to truncate at MAXATOMLEN (256) */
if (len >= MAXATOMLEN)
len = MAXATOMLEN - 1;
- return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, ERLANG_LATIN1);
+ return ei_encode_atom_len_as(buf, index, p, len, ERLANG_LATIN1, 0);
}
int ei_encode_atom_as(char *buf, int *index, const char *p,
@@ -64,46 +63,11 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
return -1;
}
- if (to_enc == (ERLANG_LATIN1 | ERLANG_UTF8)) {
- if (from_enc == ERLANG_UTF8) {
- to_enc = is_latin1_as_utf8(p, len) ? ERLANG_LATIN1 : ERLANG_UTF8;
- }
- else {
- to_enc = from_enc;
- }
- }
- switch(to_enc) {
- case ERLANG_LATIN1:
- if (buf) {
- put8(s,ERL_ATOM_EXT);
- switch (from_enc) {
- case ERLANG_UTF8:
- len = utf8_to_latin1(s+2, p, len, MAXATOMLEN-1, NULL);
- if (len < 0) return -1;
- break;
- case ERLANG_ASCII:
- if (verify_ascii_atom(p, len) < 0) return -1;
- memcpy(s+2, p, len);
- break;
- case ERLANG_LATIN1:
- memcpy(s+2, p, len);
- break;
- default:
- return -1;
- }
- put16be(s,len);
- }
- else {
- s += 3;
- if (from_enc == ERLANG_UTF8) {
- len = utf8_to_latin1(NULL, p, len, MAXATOMLEN-1, NULL);
- if (len < 0) return -1;
- } else if (from_enc == ERLANG_ASCII)
- if (verify_ascii_atom(p, len) < 0) return -1;
- }
- break;
-
- case ERLANG_UTF8:
+ /*
+ * Since OTP 20 we totally ignore 'to_enc'
+ * and alway encode as UTF8.
+ */
+ {
offs = 1 + 1;
switch (from_enc) {
case ERLANG_LATIN1:
@@ -133,10 +97,6 @@ int ei_encode_atom_len_as(char *buf, int *index, const char *p, int len,
}
}
else s+= offs;
- break;
-
- default:
- return -1;
}
s += len;
@@ -197,13 +157,3 @@ static int verify_utf8_atom(const char* src, int slen)
return 0;
}
-/* Only latin1 code points in utf8 string?
- */
-static int is_latin1_as_utf8(const char *p, int len)
-{
- int i;
- for (i=0; i<len; i++) {
- if ((unsigned char)p[i] > 0xC3) return 0;
- }
- return 1;
-}