aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r--erts/emulator/beam/erl_unicode.c152
1 files changed, 117 insertions, 35 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index e5c7a9502b..604f0be127 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2017. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -1158,14 +1158,15 @@ static ERTS_INLINE int
analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left,
Sint *num_latin1_chars, Uint max_chars)
{
+ int res = ERTS_UTF8_OK;
Uint latin1_count;
int is_latin1;
+ Uint nchars = 0;
*err_pos = source;
if (num_latin1_chars) {
is_latin1 = 1;
latin1_count = 0;
}
- *num_chars = 0;
while (size) {
if (((*source) & ((byte) 0x80)) == 0) {
source++;
@@ -1174,11 +1175,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
latin1_count++;
} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
if (size < 2) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((*source) < 0xC2) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if (num_latin1_chars) {
latin1_count++;
@@ -1189,16 +1192,19 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
size -= 2;
} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
if (size < 3) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte) 0xF)) == 0xD) &&
((source[1] & 0x20) != 0)) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 3;
size -= 3;
@@ -1206,37 +1212,47 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
is_latin1 = 0;
} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
if (size < 4) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
((source[3] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte)0x7)) > 0x4U) ||
((((*source) & ((byte)0x7)) == 0x4U) &&
((source[1] & ((byte)0x3F)) > 0xFU))) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 4;
size -= 4;
if (num_latin1_chars)
is_latin1 = 0;
} else {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
- ++(*num_chars);
+ ++nchars;
*err_pos = source;
- if (max_chars && size > 0 && *num_chars == max_chars)
- return ERTS_UTF8_OK_MAX_CHARS;
+ if (max_chars && size > 0 && nchars == max_chars) {
+ res = ERTS_UTF8_OK_MAX_CHARS;
+ break;
+ }
if (left && --(*left) <= 0 && size) {
- return ERTS_UTF8_ANALYZE_MORE;
+ res = ERTS_UTF8_ANALYZE_MORE;
+ break;
}
}
+
+ *num_chars = nchars;
if (num_latin1_chars)
*num_latin1_chars = is_latin1 ? latin1_count : -1;
- return ERTS_UTF8_OK;
+
+ return res;
}
int erts_analyze_utf8(byte *source, Uint size,
@@ -1252,29 +1268,18 @@ int erts_analyze_utf8_x(byte *source, Uint size,
return analyze_utf8(source, size, err_pos, num_chars, left, num_latin1_chars, max_chars);
}
-/*
- * No errors should be able to occur - no overlongs, no malformed, no nothing
- */
-static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
- Uint left,
- Uint *num_built, Uint *num_eaten, Eterm tail)
+static ERTS_INLINE Eterm
+make_list_from_utf8_buf(Eterm **hpp, Uint num,
+ byte *bytes, Uint sz,
+ Uint *num_built, Uint *num_eaten,
+ Eterm tail)
{
Eterm *hp;
Eterm ret;
+ Uint left = num;
byte *source, *ssource;
Uint unipoint;
-
- ASSERT(num > 0);
- if (left < num) {
- if (left > 0)
- num = left;
- else
- num = 1;
- }
-
- *num_built = num; /* Always */
-
- hp = HAlloc(p,num * 2);
+ hp = *hpp;
ret = tail;
source = bytes + sz;
ssource = source;
@@ -1302,20 +1307,97 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
}
ret = CONS(hp,make_small(unipoint),ret);
hp += 2;
- if (--num <= 0) {
+ if (--left <= 0) {
break;
}
}
+ *hpp = hp;
+ *num_built = num; /* Always */
*num_eaten = (ssource - source);
return ret;
}
+/*
+ * No errors should be able to occur - no overlongs, no malformed, no nothing
+ */
+static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
+ Uint left,
+ Uint *num_built, Uint *num_eaten, Eterm tail)
+{
+ Eterm *hp;
+
+ ASSERT(num > 0);
+ if (left < num) {
+ if (left > 0)
+ num = left;
+ else
+ num = 1;
+ }
+
+ hp = HAlloc(p,num * 2);
+
+ return make_list_from_utf8_buf(&hp, num, bytes, sz,
+ num_built, num_eaten,
+ tail);
+}
Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
Uint *num_built, Uint *num_eaten, Eterm tail)
{
return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail);
}
+Uint erts_atom_to_string_length(Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+
+ if (ap->latin1_chars >= 0)
+ return (Uint) ap->len;
+ else {
+ byte* err_pos;
+ Uint num_chars;
+#ifdef DEBUG
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ return num_chars;
+ }
+}
+
+Eterm erts_atom_to_string(Eterm **hpp, Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+ if (ap->latin1_chars >= 0)
+ return buf_to_intlist(hpp, (char*)ap->name, ap->len, NIL);
+ else {
+ Eterm res;
+ byte* err_pos;
+ Uint num_chars, num_built, num_eaten;
+#ifdef DEBUG
+ Eterm *hp_start = *hpp;
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ res = make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len,
+ &num_built, &num_eaten, NIL);
+
+ ASSERT(num_built == num_chars);
+ ASSERT(num_eaten == ap->len);
+ ASSERT(*hpp - hp_start == 2*num_chars);
+
+ return res;
+ }
+}
+
static int is_candidate(Uint cp)
{
int index,pos;