aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r--erts/emulator/beam/erl_unicode.c250
1 files changed, 156 insertions, 94 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index bd5e1482fb..c5fc6f9815 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2016. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -123,19 +123,16 @@ static void cleanup_restart_context(RestartContext *rc)
}
}
-static void cleanup_restart_context_bin(Binary *bp)
+static int cleanup_restart_context_bin(Binary *bp)
{
RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp);
cleanup_restart_context(rc);
+ return 1;
}
-static RestartContext *get_rc_from_bin(Eterm bin)
+static RestartContext *get_rc_from_bin(Eterm mref)
{
- Binary *mbp;
- ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin));
-
- mbp = ((ProcBin *) binary_val(bin))->val;
-
+ Binary *mbp = erts_magic_ref2bin(mref);
ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp)
== cleanup_restart_context_bin);
return (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp);
@@ -148,8 +145,8 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc)
RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp);
Eterm *hp;
memcpy(restartp,rc,sizeof(RestartContext));
- hp = HAlloc(p, PROC_BIN_SIZE);
- return erts_mk_magic_binary_term(&hp, &MSO(p), mbp);
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+ return erts_mk_magic_ref(&hp, &MSO(p), mbp);
}
@@ -1161,14 +1158,15 @@ static ERTS_INLINE int
analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left,
Sint *num_latin1_chars, Uint max_chars)
{
+ int res = ERTS_UTF8_OK;
Uint latin1_count;
int is_latin1;
+ Uint nchars = 0;
*err_pos = source;
if (num_latin1_chars) {
is_latin1 = 1;
latin1_count = 0;
}
- *num_chars = 0;
while (size) {
if (((*source) & ((byte) 0x80)) == 0) {
source++;
@@ -1177,11 +1175,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
latin1_count++;
} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
if (size < 2) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((*source) < 0xC2) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if (num_latin1_chars) {
latin1_count++;
@@ -1192,16 +1192,19 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
size -= 2;
} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
if (size < 3) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte) 0xF)) == 0xD) &&
((source[1] & 0x20) != 0)) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 3;
size -= 3;
@@ -1209,37 +1212,47 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
is_latin1 = 0;
} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
if (size < 4) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
((source[3] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte)0x7)) > 0x4U) ||
((((*source) & ((byte)0x7)) == 0x4U) &&
((source[1] & ((byte)0x3F)) > 0xFU))) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 4;
size -= 4;
if (num_latin1_chars)
is_latin1 = 0;
} else {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
- ++(*num_chars);
+ ++nchars;
*err_pos = source;
- if (max_chars && size > 0 && *num_chars == max_chars)
- return ERTS_UTF8_OK_MAX_CHARS;
+ if (max_chars && size > 0 && nchars == max_chars) {
+ res = ERTS_UTF8_OK_MAX_CHARS;
+ break;
+ }
if (left && --(*left) <= 0 && size) {
- return ERTS_UTF8_ANALYZE_MORE;
+ res = ERTS_UTF8_ANALYZE_MORE;
+ break;
}
}
+
+ *num_chars = nchars;
if (num_latin1_chars)
*num_latin1_chars = is_latin1 ? latin1_count : -1;
- return ERTS_UTF8_OK;
+
+ return res;
}
int erts_analyze_utf8(byte *source, Uint size,
@@ -1255,29 +1268,18 @@ int erts_analyze_utf8_x(byte *source, Uint size,
return analyze_utf8(source, size, err_pos, num_chars, left, num_latin1_chars, max_chars);
}
-/*
- * No errors should be able to occur - no overlongs, no malformed, no nothing
- */
-static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
- Uint left,
- Uint *num_built, Uint *num_eaten, Eterm tail)
+static ERTS_INLINE Eterm
+make_list_from_utf8_buf(Eterm **hpp, Uint num,
+ byte *bytes, Uint sz,
+ Uint *num_built, Uint *num_eaten,
+ Eterm tail)
{
Eterm *hp;
Eterm ret;
+ Uint left = num;
byte *source, *ssource;
Uint unipoint;
-
- ASSERT(num > 0);
- if (left < num) {
- if (left > 0)
- num = left;
- else
- num = 1;
- }
-
- *num_built = num; /* Always */
-
- hp = HAlloc(p,num * 2);
+ hp = *hpp;
ret = tail;
source = bytes + sz;
ssource = source;
@@ -1305,20 +1307,97 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
}
ret = CONS(hp,make_small(unipoint),ret);
hp += 2;
- if (--num <= 0) {
+ if (--left <= 0) {
break;
}
}
+ *hpp = hp;
+ *num_built = num; /* Always */
*num_eaten = (ssource - source);
return ret;
}
+/*
+ * No errors should be able to occur - no overlongs, no malformed, no nothing
+ */
+static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
+ Uint left,
+ Uint *num_built, Uint *num_eaten, Eterm tail)
+{
+ Eterm *hp;
+
+ ASSERT(num > 0);
+ if (left < num) {
+ if (left > 0)
+ num = left;
+ else
+ num = 1;
+ }
+
+ hp = HAlloc(p,num * 2);
+
+ return make_list_from_utf8_buf(&hp, num, bytes, sz,
+ num_built, num_eaten,
+ tail);
+}
Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
Uint *num_built, Uint *num_eaten, Eterm tail)
{
return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail);
}
+Uint erts_atom_to_string_length(Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+
+ if (ap->latin1_chars >= 0)
+ return (Uint) ap->len;
+ else {
+ byte* err_pos;
+ Uint num_chars;
+#ifdef DEBUG
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ return num_chars;
+ }
+}
+
+Eterm erts_atom_to_string(Eterm **hpp, Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+ if (ap->latin1_chars >= 0)
+ return buf_to_intlist(hpp, (char*)ap->name, ap->len, NIL);
+ else {
+ Eterm res;
+ byte* err_pos;
+ Uint num_chars, num_built, num_eaten;
+#ifdef DEBUG
+ Eterm *hp_start = *hpp;
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ res = make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len,
+ &num_built, &num_eaten, NIL);
+
+ ASSERT(num_built == num_chars);
+ ASSERT(num_eaten == ap->len);
+ ASSERT(*hpp - hp_start == 2*num_chars);
+
+ return res;
+ }
+}
+
static int is_candidate(Uint cp)
{
int index,pos;
@@ -1890,74 +1969,57 @@ binary_to_atom(Process* proc, Eterm bin, Eterm enc, int must_exist)
byte* bytes;
byte *temp_alloc = NULL;
Uint bin_size;
+ Eterm a;
if ((bytes = erts_get_aligned_binary_bytes(bin, &temp_alloc)) == 0) {
BIF_ERROR(proc, BADARG);
}
bin_size = binary_size(bin);
+
if (enc == am_latin1) {
- Eterm a;
- if (bin_size > MAX_ATOM_CHARACTERS) {
- system_limit:
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_ERROR(proc, SYSTEM_LIMIT);
- }
if (!must_exist) {
- a = erts_atom_put((byte *) bytes,
- bin_size,
- ERTS_ATOM_ENC_LATIN1,
- 0);
- erts_free_aligned_binary_bytes(temp_alloc);
- if (is_non_value(a))
- goto badarg;
- BIF_RET(a);
- } else if (erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) {
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_RET(a);
- } else {
+ int lix = erts_atom_put_index((byte *) bytes,
+ bin_size,
+ ERTS_ATOM_ENC_LATIN1,
+ 0);
+ if (lix == ATOM_BAD_ENCODING_ERROR) {
+ badarg:
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(proc, BADARG);
+ } else if (lix == ATOM_MAX_CHARS_ERROR) {
+ system_limit:
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_ERROR(proc, SYSTEM_LIMIT);
+ }
+
+ a = make_atom(lix);
+ } else if (!erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) {
goto badarg;
}
- } else if (enc == am_utf8 || enc == am_unicode) {
- Eterm res;
- Uint num_chars = 0;
- const byte* p = bytes;
- Uint left = bin_size;
- while (left) {
- if (++num_chars > MAX_ATOM_CHARACTERS) {
+ } else if (enc == am_utf8 || enc == am_unicode) {
+ if (!must_exist) {
+ int uix = erts_atom_put_index((byte *) bytes,
+ bin_size,
+ ERTS_ATOM_ENC_UTF8,
+ 0);
+ if (uix == ATOM_BAD_ENCODING_ERROR) {
+ goto badarg;
+ } else if (uix == ATOM_MAX_CHARS_ERROR) {
goto system_limit;
}
- if ((p[0] & 0x80) == 0) {
- ++p;
- --left;
- }
- else if (left >= 2
- && (p[0] & 0xFE) == 0xC2 /* only allow latin1 subset */
- && (p[1] & 0xC0) == 0x80) {
- p += 2;
- left -= 2;
- }
- else goto badarg;
- }
- if (!must_exist) {
- res = erts_atom_put((byte *) bytes,
- bin_size,
- ERTS_ATOM_ENC_UTF8,
- 0);
+ a = make_atom(uix);
}
- else if (!erts_atom_get((char*)bytes, bin_size, &res, ERTS_ATOM_ENC_UTF8)) {
+ else if (!erts_atom_get((char*)bytes, bin_size, &a, ERTS_ATOM_ENC_UTF8)) {
goto badarg;
}
- erts_free_aligned_binary_bytes(temp_alloc);
- if (is_non_value(res))
- goto badarg;
- BIF_RET(res);
} else {
- badarg:
- erts_free_aligned_binary_bytes(temp_alloc);
- BIF_ERROR(proc, BADARG);
+ goto badarg;
}
+
+ erts_free_aligned_binary_bytes(temp_alloc);
+ BIF_RET(a);
}
BIF_RETTYPE binary_to_atom_2(BIF_ALIST_2)