diff options
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r-- | erts/emulator/beam/erl_unicode.c | 194 |
1 files changed, 127 insertions, 67 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index b7a5c45fea..8673e029e6 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2008-2017. All Rights Reserved. + * Copyright Ericsson AB 2008-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -144,7 +144,7 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc) cleanup_restart_context_bin); RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp); Eterm *hp; - memcpy(restartp,rc,sizeof(RestartContext)); + sys_memcpy(restartp,rc,sizeof(RestartContext)); hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); return erts_mk_magic_ref(&hp, &MSO(p), mbp); } @@ -254,12 +254,12 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size, ASSERT(need > 0); ASSERT(from_source > 0); if (size < from_source) { - memcpy(leftover + (*num_leftovers), source, size); + sys_memcpy(leftover + (*num_leftovers), source, size); *num_leftovers += size; return 0; } /* leftover has room for four bytes (see bif) */ - memcpy(leftover + (*num_leftovers),source,from_source); + sys_memcpy(leftover + (*num_leftovers),source,from_source); c = copy_utf8_bin(target, leftover, need, NULL, NULL, &tmp_err_pos, characters); if (tmp_err_pos != 0) { *err_pos = source; @@ -291,7 +291,7 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size, size -= 2; copied += 2; } else if (((*source) & ((byte) 0xF0)) == 0xE0) { if (leftover && size < 3) { - memcpy(leftover, source, (int) size); + sys_memcpy(leftover, source, (int) size); *num_leftovers = (int) size; break; } @@ -313,7 +313,7 @@ static Uint copy_utf8_bin(byte *target, byte *source, Uint size, size -= 3; copied += 3; } else if (((*source) & ((byte) 0xF8)) == 0xF0) { if (leftover && size < 4) { - memcpy(leftover, source, (int) size); + sys_memcpy(leftover, source, (int) size); *num_leftovers = (int) size; break; } @@ -1158,14 +1158,15 @@ static ERTS_INLINE int analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left, Sint *num_latin1_chars, Uint max_chars) { + int res = ERTS_UTF8_OK; Uint latin1_count; int is_latin1; + Uint nchars = 0; *err_pos = source; if (num_latin1_chars) { is_latin1 = 1; latin1_count = 0; } - *num_chars = 0; while (size) { if (((*source) & ((byte) 0x80)) == 0) { source++; @@ -1174,11 +1175,13 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left latin1_count++; } else if (((*source) & ((byte) 0xE0)) == 0xC0) { if (size < 2) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((*source) < 0xC2) /* overlong */) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if (num_latin1_chars) { latin1_count++; @@ -1189,16 +1192,19 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left size -= 2; } else if (((*source) & ((byte) 0xF0)) == 0xE0) { if (size < 3) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xE0) && (source[1] < 0xA0)) /* overlong */ ) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if ((((*source) & ((byte) 0xF)) == 0xD) && ((source[1] & 0x20) != 0)) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } source += 3; size -= 3; @@ -1206,37 +1212,47 @@ analyze_utf8(byte *source, Uint size, byte **err_pos, Uint *num_chars, int *left is_latin1 = 0; } else if (((*source) & ((byte) 0xF8)) == 0xF0) { if (size < 4) { - return ERTS_UTF8_INCOMPLETE; + res = ERTS_UTF8_INCOMPLETE; + break; } if (((source[1] & ((byte) 0xC0)) != 0x80) || ((source[2] & ((byte) 0xC0)) != 0x80) || ((source[3] & ((byte) 0xC0)) != 0x80) || (((*source) == 0xF0) && (source[1] < 0x90)) /* overlong */) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } if ((((*source) & ((byte)0x7)) > 0x4U) || ((((*source) & ((byte)0x7)) == 0x4U) && ((source[1] & ((byte)0x3F)) > 0xFU))) { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } source += 4; size -= 4; if (num_latin1_chars) is_latin1 = 0; } else { - return ERTS_UTF8_ERROR; + res = ERTS_UTF8_ERROR; + break; } - ++(*num_chars); + ++nchars; *err_pos = source; - if (max_chars && size > 0 && *num_chars == max_chars) - return ERTS_UTF8_OK_MAX_CHARS; + if (max_chars && size > 0 && nchars == max_chars) { + res = ERTS_UTF8_OK_MAX_CHARS; + break; + } if (left && --(*left) <= 0 && size) { - return ERTS_UTF8_ANALYZE_MORE; + res = ERTS_UTF8_ANALYZE_MORE; + break; } } + + *num_chars = nchars; if (num_latin1_chars) *num_latin1_chars = is_latin1 ? latin1_count : -1; - return ERTS_UTF8_OK; + + return res; } int erts_analyze_utf8(byte *source, Uint size, @@ -1252,29 +1268,18 @@ int erts_analyze_utf8_x(byte *source, Uint size, return analyze_utf8(source, size, err_pos, num_chars, left, num_latin1_chars, max_chars); } -/* - * No errors should be able to occur - no overlongs, no malformed, no nothing - */ -static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, - Uint left, - Uint *num_built, Uint *num_eaten, Eterm tail) +static ERTS_INLINE Eterm +make_list_from_utf8_buf(Eterm **hpp, Uint num, + byte *bytes, Uint sz, + Uint *num_built, Uint *num_eaten, + Eterm tail) { Eterm *hp; Eterm ret; + Uint left = num; byte *source, *ssource; Uint unipoint; - - ASSERT(num > 0); - if (left < num) { - if (left > 0) - num = left; - else - num = 1; - } - - *num_built = num; /* Always */ - - hp = HAlloc(p,num * 2); + hp = *hpp; ret = tail; source = bytes + sz; ssource = source; @@ -1302,20 +1307,97 @@ static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, } ret = CONS(hp,make_small(unipoint),ret); hp += 2; - if (--num <= 0) { + if (--left <= 0) { break; } } + *hpp = hp; + *num_built = num; /* Always */ *num_eaten = (ssource - source); return ret; } +/* + * No errors should be able to occur - no overlongs, no malformed, no nothing + */ +static Eterm do_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, + Uint left, + Uint *num_built, Uint *num_eaten, Eterm tail) +{ + Eterm *hp; + + ASSERT(num > 0); + if (left < num) { + if (left > 0) + num = left; + else + num = 1; + } + + hp = HAlloc(p,num * 2); + + return make_list_from_utf8_buf(&hp, num, bytes, sz, + num_built, num_eaten, + tail); +} Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail) { return do_utf8_to_list(p, num, bytes, sz, left, num_built, num_eaten, tail); } +Uint erts_atom_to_string_length(Eterm atom) +{ + Atom *ap; + + ASSERT(is_atom(atom)); + ap = atom_tab(atom_val(atom)); + + if (ap->latin1_chars >= 0) + return (Uint) ap->len; + else { + byte* err_pos; + Uint num_chars; +#ifdef DEBUG + int ares = +#endif + erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + ASSERT(ares == ERTS_UTF8_OK); + + return num_chars; + } +} + +Eterm erts_atom_to_string(Eterm **hpp, Eterm atom) +{ + Atom *ap; + + ASSERT(is_atom(atom)); + ap = atom_tab(atom_val(atom)); + if (ap->latin1_chars >= 0) + return buf_to_intlist(hpp, (char*)ap->name, ap->len, NIL); + else { + Eterm res; + byte* err_pos; + Uint num_chars, num_built, num_eaten; +#ifdef DEBUG + Eterm *hp_start = *hpp; + int ares = +#endif + erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL); + ASSERT(ares == ERTS_UTF8_OK); + + res = make_list_from_utf8_buf(hpp, num_chars, ap->name, ap->len, + &num_built, &num_eaten, NIL); + + ASSERT(num_built == num_chars); + ASSERT(num_eaten == ap->len); + ASSERT(*hpp - hp_start == 2*num_chars); + + return res; + } +} + static int is_candidate(Uint cp) { int index,pos; @@ -2026,7 +2108,7 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu } else { name_buf = statbuf; } - memcpy(name_buf,bytes,size); + sys_memcpy(name_buf,bytes,size); name_buf[size]=0; } else { name_buf = erts_convert_filename_to_wchar(bytes, size, @@ -2083,18 +2165,9 @@ char* erts_convert_filename_to_wchar(byte* bytes, Uint size, return name_buf; } - -static int filename_len_16bit(byte *str) -{ - byte *p = str; - while(*p != '\0' || p[1] != '\0') { - p += 2; - } - return (p - str); -} -Eterm erts_convert_native_to_filename(Process *p, byte *bytes) +Eterm erts_convert_native_to_filename(Process *p, size_t size, byte *bytes) { - Uint size,num_chars; + Uint num_chars; Eterm *hp; byte *err_pos; Uint num_built; /* characters */ @@ -2108,7 +2181,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes) case ERL_FILENAME_UTF8_MAC: mac = 1; case ERL_FILENAME_UTF8: - size = strlen((char *) bytes); if (size == 0) return NIL; if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { @@ -2123,7 +2195,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes) } return ret; case ERL_FILENAME_WIN_WCHAR: - size=filename_len_16bit(bytes); if ((size % 2) != 0) { /* Panic fixup to avoid crashing the emulator */ size--; hp = HAlloc(p, size+2); @@ -2146,7 +2217,6 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes) goto noconvert; } noconvert: - size = strlen((char *) bytes); hp = HAlloc(p, 2 * size); return erts_bin_bytes_to_list(NIL, hp, bytes, size, 0); } @@ -2158,7 +2228,6 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) Eterm obj; DECLARE_ESTACK(stack); Sint need = 0; - int seen_null = 0; if (is_atom(ioterm)) { Atom* ap; @@ -2203,9 +2272,7 @@ Sint erts_native_filename_need(Eterm ioterm, int encoding) byte *name = ap->name; int len = ap->len; for (i = 0; i < len; i++) { - if (name[i] == 0) - seen_null = 1; - else if (seen_null) { + if (name[i] == 0) { need = -1; break; } @@ -2245,9 +2312,7 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ * Do not allow null in * the middle of filenames */ - if (x == 0) - seen_null = 1; - else if (seen_null) { + if (x == 0) { DESTROY_ESTACK(stack); return ((Sint) -1); } @@ -2580,7 +2645,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) BIF_ERROR(BIF_P,BADARG); } if (is_binary(BIF_ARG_1)) { - int seen_null = 0; byte *temp_alloc = NULL; byte *bytes; byte *err_pos; @@ -2597,8 +2661,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) for (i = 0; i < size; i++) { /* Don't allow null in the middle of filenames... */ if (bytes[i] == 0) - seen_null = 1; - else if (seen_null) goto bin_name_error; bin_p[i] = bytes[i]; } @@ -2617,8 +2679,6 @@ BIF_RETTYPE prim_file_internal_name2native_1(BIF_ALIST_1) while (size--) { /* Don't allow null in the middle of filenames... */ if (*bytes == 0) - seen_null = 1; - else if (seen_null) goto bin_name_error; *bin_p++ = *bytes++; *bin_p++ = 0; |