aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r--erts/emulator/beam/erl_unicode.c194
1 files changed, 127 insertions, 67 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index b7a5c45fea..8673e029e6 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2017. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -144,7 +144,7 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc)
cleanup_restart_context_bin);
RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp);
Eterm *hp;
- memcpy(restartp,rc,sizeof(RestartContext));
+ sys_memcpy(restartp,rc,sizeof(RestartContext));
hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
return erts_mk_magic_ref(&hp, &MSO(p), mbp);
}
@@ -254,12 +254,12 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size,
ASSERT(need > 0);
ASSERT(from_source > 0);
if (size < from_source) {
- memcpy(leftover + (*num_leftovers), source, size);
+ sys_memcpy(leftover + (*num_leftovers), source, size);
*num_leftovers += size;
return 0;
}
/* leftover has room for four bytes (see bif) */
- memcpy(leftover + (*num_leftovers),source,from_source);
+ sys_memcpy(leftover + (*num_leftovers),source,from_source);
c = copy_utf8_bin(target, leftover, need, NULL, NULL, &tmp_err_pos, characters);
if (tmp_err_pos != 0) {
*err_pos = source;
@@ -291,7 +291,7 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size,
size -= 2; copied += 2;
} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
if (leftover && size < 3) {
- memcpy(leftover, source, (int) size);
+ sys_memcpy(leftover, source, (int) size);
*num_leftovers = (int) size;
break;
}
@@ -313,7 +313,7 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size,
size -= 3; copied += 3;
} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
if (leftover && size < 4) {
- memcpy(leftover, source, (int) size);
+ sys_memcpy(leftover, source, (int) size);
*num_leftovers = (int) size;
break;
}
@@ -1158,14 +1158,15 @@ static ERTS_INLINE int
analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left,
Sint *num_latin1_chars, Uint max_chars)
{
+ int res = ERTS_UTF8_OK;
Uint latin1_count;
int is_latin1;
+ Uint nchars = 0;
*err_pos = source;
if (num_latin1_chars) {
is_latin1 = 1;
latin1_count = 0;
}
- *num_chars = 0;
while (size) {
if (((*source) & ((byte) 0x80)) == 0) {
source++;
@@ -1174,11 +1175,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
latin1_count++;
} else if (((*source) & ((byte) 0xE0)) == 0xC0) {
if (size < 2) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((*source) < 0xC2) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if (num_latin1_chars) {
latin1_count++;
@@ -1189,16 +1192,19 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
size -= 2;
} else if (((*source) & ((byte) 0xF0)) == 0xE0) {
if (size < 3) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte) 0xF)) == 0xD) &&
((source[1] & 0x20) != 0)) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 3;
size -= 3;
@@ -1206,37 +1212,47 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left
is_latin1 = 0;
} else if (((*source) & ((byte) 0xF8)) == 0xF0) {
if (size < 4) {
- return ERTS_UTF8_INCOMPLETE;
+ res = ERTS_UTF8_INCOMPLETE;
+ break;
}
if (((source[1] & ((byte) 0xC0)) != 0x80) ||
((source[2] & ((byte) 0xC0)) != 0x80) ||
((source[3] & ((byte) 0xC0)) != 0x80) ||
(((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
if ((((*source) & ((byte)0x7)) > 0x4U) ||
((((*source) & ((byte)0x7)) == 0x4U) &&
((source[1] & ((byte)0x3F)) > 0xFU))) {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
source += 4;
size -= 4;
if (num_latin1_chars)
is_latin1 = 0;
} else {
- return ERTS_UTF8_ERROR;
+ res = ERTS_UTF8_ERROR;
+ break;
}
- ++(*num_chars);
+ ++nchars;
*err_pos = source;
- if (max_chars && size > 0 && *num_chars == max_chars)
- return ERTS_UTF8_OK_MAX_CHARS;
+ if (max_chars && size > 0 && nchars == max_chars) {
+ res = ERTS_UTF8_OK_MAX_CHARS;
+ break;
+ }
if (left && --(*left) <= 0 && size) {
- return ERTS_UTF8_ANALYZE_MORE;
+ res = ERTS_UTF8_ANALYZE_MORE;
+ break;
}
}
+
+ *num_chars = nchars;
if (num_latin1_chars)
*num_latin1_chars = is_latin1 ? latin1_count : -1;
- return ERTS_UTF8_OK;
+
+ return res;
}
int erts_analyze_utf8(byte *source, Uint size,
@@ -1252,29 +1268,18 @@ int erts_analyze_utf8_x(byte *source, Uint size,
return analyze_utf8(source, size, err_pos, num_chars, left, num_latin1_chars, max_chars);
}
-/*
- * No errors should be able to occur - no overlongs, no malformed, no nothing
- */
-static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
- Uint left,
- Uint *num_built, Uint *num_eaten, Eterm tail)
+static ERTS_INLINE Eterm
+make_list_from_utf8_buf(Eterm **hpp, Uint num,
+ byte *bytes, Uint sz,
+ Uint *num_built, Uint *num_eaten,
+ Eterm tail)
{
Eterm *hp;
Eterm ret;
+ Uint left = num;
byte *source, *ssource;
Uint unipoint;
-
- ASSERT(num > 0);
- if (left < num) {
- if (left > 0)
- num = left;
- else
- num = 1;
- }
-
- *num_built = num; /* Always */
-
- hp = HAlloc(p,num * 2);
+ hp = *hpp;
ret = tail;
source = bytes + sz;
ssource = source;
@@ -1302,20 +1307,97 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
}
ret = CONS(hp,make_small(unipoint),ret);
hp += 2;
- if (--num <= 0) {
+ if (--left <= 0) {
break;
}
}
+ *hpp = hp;
+ *num_built = num; /* Always */
*num_eaten = (ssource - source);
return ret;
}
+/*
+ * No errors should be able to occur - no overlongs, no malformed, no nothing
+ */
+static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz,
+ Uint left,
+ Uint *num_built, Uint *num_eaten, Eterm tail)
+{
+ Eterm *hp;
+
+ ASSERT(num > 0);
+ if (left < num) {
+ if (left > 0)
+ num = left;
+ else
+ num = 1;
+ }
+
+ hp = HAlloc(p,num * 2);
+
+ return make_list_from_utf8_buf(&hp, num, bytes, sz,
+ num_built, num_eaten,
+ tail);
+}
Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left,
Uint *num_built, Uint *num_eaten, Eterm tail)
{
return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail);
}
+Uint erts_atom_to_string_length(Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+
+ if (ap->latin1_chars >= 0)
+ return (Uint) ap->len;
+ else {
+ byte* err_pos;
+ Uint num_chars;
+#ifdef DEBUG
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ return num_chars;
+ }
+}
+
+Eterm erts_atom_to_string(Eterm **hpp, Eterm atom)
+{
+ Atom *ap;
+
+ ASSERT(is_atom(atom));
+ ap = atom_tab(atom_val(atom));
+ if (ap->latin1_chars >= 0)
+ return buf_to_intlist(hpp, (char*)ap->name, ap->len, NIL);
+ else {
+ Eterm res;
+ byte* err_pos;
+ Uint num_chars, num_built, num_eaten;
+#ifdef DEBUG
+ Eterm *hp_start = *hpp;
+ int ares =
+#endif
+ erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
+ ASSERT(ares == ERTS_UTF8_OK);
+
+ res = make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len,
+ &num_built, &num_eaten, NIL);
+
+ ASSERT(num_built == num_chars);
+ ASSERT(num_eaten == ap->len);
+ ASSERT(*hpp - hp_start == 2*num_chars);
+
+ return res;
+ }
+}
+
static int is_candidate(Uint cp)
{
int index,pos;
@@ -2026,7 +2108,7 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu
} else {
name_buf = statbuf;
}
- memcpy(name_buf,bytes,size);
+ sys_memcpy(name_buf,bytes,size);
name_buf[size]=0;
} else {
name_buf = erts_convert_filename_to_wchar(bytes, size,
@@ -2083,18 +2165,9 @@ char* erts_convert_filename_to_wchar(byte* bytes, Uint size,
return name_buf;
}
-
-static int filename_len_16bit(byte *str)
-{
- byte *p = str;
- while(*p != '\0' || p[1] != '\0') {
- p += 2;
- }
- return (p - str);
-}
-Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
+Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes)
{
- Uint size,num_chars;
+ Uint num_chars;
Eterm *hp;
byte *err_pos;
Uint num_built; /* characters */
@@ -2108,7 +2181,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
case ERL_FILENAME_UTF8_MAC:
mac = 1;
case ERL_FILENAME_UTF8:
- size = strlen((char *) bytes);
if (size == 0)
return NIL;
if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) {
@@ -2123,7 +2195,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
}
return ret;
case ERL_FILENAME_WIN_WCHAR:
- size=filename_len_16bit(bytes);
if ((size % 2) != 0) { /* Panic fixup to avoid crashing the emulator */
size--;
hp = HAlloc(p, size+2);
@@ -2146,7 +2217,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes)
goto noconvert;
}
noconvert:
- size = strlen((char *) bytes);
hp = HAlloc(p, 2 * size);
return erts_bin_bytes_to_list(NIL, hp, bytes, size, 0);
}
@@ -2158,7 +2228,6 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
Eterm obj;
DECLARE_ESTACK(stack);
Sint need = 0;
- int seen_null = 0;
if (is_atom(ioterm)) {
Atom* ap;
@@ -2203,9 +2272,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding)
byte *name = ap->name;
int len = ap->len;
for (i = 0; i < len; i++) {
- if (name[i] == 0)
- seen_null = 1;
- else if (seen_null) {
+ if (name[i] == 0) {
need = -1;
break;
}
@@ -2245,9 +2312,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */
* Do not allow null in
* the middle of filenames
*/
- if (x == 0)
- seen_null = 1;
- else if (seen_null) {
+ if (x == 0) {
DESTROY_ESTACK(stack);
return ((Sint) -1);
}
@@ -2580,7 +2645,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
BIF_ERROR(BIF_P,BADARG);
}
if (is_binary(BIF_ARG_1)) {
- int seen_null = 0;
byte *temp_alloc = NULL;
byte *bytes;
byte *err_pos;
@@ -2597,8 +2661,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
for (i = 0; i < size; i++) {
/* Don't allow null in the middle of filenames... */
if (bytes[i] == 0)
- seen_null = 1;
- else if (seen_null)
goto bin_name_error;
bin_p[i] = bytes[i];
}
@@ -2617,8 +2679,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1)
while (size--) {
/* Don't allow null in the middle of filenames... */
if (*bytes == 0)
- seen_null = 1;
- else if (seen_null)
goto bin_name_error;
*bin_p++ = *bytes++;
*bin_p++ = 0;