diff options
Diffstat (limited to 'erts/emulator/beam/erl_unicode.c')
-rw-r--r-- | erts/emulator/beam/erl_unicode.c | 181 |
1 files changed, 83 insertions, 98 deletions
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 3a968594f3..2d1d1443a7 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1,18 +1,19 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2008-2013. All Rights Reserved. + * Copyright Ericsson AB 2008-2017. All Rights Reserved. * - * The contents of this file are subject to the Erlang Public License, - * Version 1.1, (the "License"); you may not use this file except in - * compliance with the License. You should have received a copy of the - * Erlang Public License along with this software. If not, it can be - * retrieved online at http://www.erlang.org/. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * * %CopyrightEnd% */ @@ -54,7 +55,7 @@ static BIF_RETTYPE finalize_list_to_list(Process *p, Uint num_processed_bytes, Uint num_bytes_to_process, Uint num_resulting_chars, - int state, int left, + int state, Sint left, Eterm tail); static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3); static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3); @@ -122,19 +123,16 @@ static void cleanup_restart_context(RestartContext *rc) } } -static void cleanup_restart_context_bin(Binary *bp) +static int cleanup_restart_context_bin(Binary *bp) { RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp); cleanup_restart_context(rc); + return 1; } -static RestartContext *get_rc_from_bin(Eterm bin) +static RestartContext *get_rc_from_bin(Eterm mref) { - Binary *mbp; - ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin)); - - mbp = ((ProcBin *) binary_val(bin))->val; - + Binary *mbp = erts_magic_ref2bin(mref); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp) == cleanup_restart_context_bin); return (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp); @@ -147,8 +145,8 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc) RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp); Eterm *hp; memcpy(restartp,rc,sizeof(RestartContext)); - hp = HAlloc(p, PROC_BIN_SIZE); - return erts_mk_magic_binary_term(&hp, &MSO(p), mbp); + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &MSO(p), mbp); } @@ -172,12 +170,13 @@ static ERTS_INLINE int allowed_iterations(Process *p) else return tmp; } -static ERTS_INLINE int cost_to_proc(Process *p, int cost) + +static ERTS_INLINE void cost_to_proc(Process *p, Sint cost) { - int x = (cost / LOOP_FACTOR); + Sint x = (cost / LOOP_FACTOR); BUMP_REDS(p,x); - return x; } + static ERTS_INLINE int simple_loops_to_common(int cost) { int factor = (LOOP_FACTOR_SIMPLE / LOOP_FACTOR); @@ -242,14 +241,15 @@ static int utf8_len(byte first) return -1; } -static int copy_utf8_bin(byte *target, byte *source, Uint size, - byte *leftover, int *num_leftovers, - byte **err_pos, Uint *characters) { - int copied = 0; +static Uint copy_utf8_bin(byte *target, byte *source, Uint size, + byte *leftover, int *num_leftovers, + byte **err_pos, Uint *characters) +{ + Uint copied = 0; if (leftover != NULL && *num_leftovers) { int need = utf8_len(leftover[0]); int from_source = need - (*num_leftovers); - int c; + Uint c; byte *tmp_err_pos = NULL; ASSERT(need > 0); ASSERT(from_source > 0); @@ -501,8 +501,8 @@ L_Again: /* Restart with sublist, old listend was pushed on stack */ } -static Eterm do_build_utf8(Process *p, Eterm ioterm, int *left, int latin1, - byte *target, int *pos, Uint *characters, int *err, +static Eterm do_build_utf8(Process *p, Eterm ioterm, Sint *left, int latin1, + byte *target, Uint *pos, Uint *characters, int *err, byte *leftover, int *num_leftovers) { int c; @@ -572,7 +572,7 @@ static Eterm do_build_utf8(Process *p, Eterm ioterm, int *left, int latin1, } if (!latin1) { - int num; + Uint num; byte *err_pos = NULL; num = copy_utf8_bin(target + (*pos), bytes, size, leftover, num_leftovers,&err_pos,characters); @@ -803,7 +803,7 @@ static int check_leftovers(byte *source, int size) -static BIF_RETTYPE build_utf8_return(Process *p,Eterm bin,int pos, +static BIF_RETTYPE build_utf8_return(Process *p,Eterm bin,Uint pos, Eterm rest_term,int err, byte *leftover,int num_leftovers,Eterm latin1) { @@ -858,8 +858,8 @@ static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3) #endif byte* bytes; Eterm rest_term; - int left, sleft; - int pos; + Sint left, sleft; + Uint pos; int err; byte leftover[4]; /* used for temp buffer too, otherwise 3 bytes would have been enough */ @@ -873,7 +873,7 @@ static BIF_RETTYPE characters_to_utf8_trap(BIF_ALIST_3) real_bin = binary_val(BIF_ARG_1); ASSERT(*real_bin == HEADER_PROC_BIN); #endif - pos = (int) binary_size(BIF_ARG_1); + pos = binary_size(BIF_ARG_1); bytes = binary_bytes(BIF_ARG_1); sleft = left = allowed_iterations(BIF_P); err = 0; @@ -933,9 +933,9 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2) int latin1; Eterm bin; byte *bytes; - int pos; + Uint pos; int err; - int left, sleft; + Sint left, sleft; Eterm rest_term, subject; byte leftover[4]; /* used for temp buffer too, o therwise 3 bytes would have been enough */ @@ -998,7 +998,7 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2) byte *t = NULL; Uint sz = binary_size(bin); byte *by = erts_get_aligned_binary_bytes(bin,&t); - int i; + Uint i; erts_printf("<<"); for (i = 0;i < sz; ++i) { erts_printf((i == sz -1) ? "0x%X" : "0x%X, ", (unsigned) by[i]); @@ -1006,7 +1006,7 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2) erts_printf(">>: "); erts_free_aligned_binary_bytes(t); } - erts_printf("%d - %d = %d\n",sleft,left,sleft - left); + erts_printf("%ld - %ld = %ld\n", sleft, left, sleft - left); } #endif cost_to_proc(BIF_P, sleft - left); @@ -1014,10 +1014,10 @@ BIF_RETTYPE unicode_characters_to_binary_2(BIF_ALIST_2) leftover,num_leftovers,BIF_ARG_2); } -static BIF_RETTYPE build_list_return(Process *p, byte *bytes, int pos, Uint characters, +static BIF_RETTYPE build_list_return(Process *p, byte *bytes, Uint pos, Uint characters, Eterm rest_term, int err, byte *leftover, int num_leftovers, - Eterm latin1, int left) + Eterm latin1, Sint left) { Eterm *hp; @@ -1069,11 +1069,11 @@ static BIF_RETTYPE characters_to_list_trap_1(BIF_ALIST_3) { RestartContext *rc; byte* bytes; - int pos; + Uint pos; Uint characters; int err; Eterm rest_term; - int left, sleft; + Sint left, sleft; int latin1 = 0; byte leftover[4]; /* used for temp buffer too, @@ -1106,9 +1106,9 @@ BIF_RETTYPE unicode_characters_to_list_2(BIF_ALIST_2) int latin1; Uint characters = 0; byte *bytes; - int pos; + Uint pos; int err; - int left, sleft; + Sint left, sleft; Eterm rest_term; byte leftover[4]; /* used for temp buffer too, o therwise 3 bytes would have been enough */ @@ -1540,7 +1540,7 @@ static BIF_RETTYPE finalize_list_to_list(Process *p, Uint num_processed_bytes, Uint num_bytes_to_process, Uint num_resulting_chars, - int state, int left, + int state, Sint left, Eterm tail) { Uint num_built; /* characters */ @@ -1887,74 +1887,57 @@ binary_to_atom(Process* proc, Eterm bin, Eterm enc, int must_exist) byte* bytes; byte *temp_alloc = NULL; Uint bin_size; + Eterm a; if ((bytes = erts_get_aligned_binary_bytes(bin, &temp_alloc)) == 0) { BIF_ERROR(proc, BADARG); } bin_size = binary_size(bin); + if (enc == am_latin1) { - Eterm a; - if (bin_size > MAX_ATOM_CHARACTERS) { - system_limit: - erts_free_aligned_binary_bytes(temp_alloc); - BIF_ERROR(proc, SYSTEM_LIMIT); - } if (!must_exist) { - a = erts_atom_put((byte *) bytes, - bin_size, - ERTS_ATOM_ENC_LATIN1, - 0); - erts_free_aligned_binary_bytes(temp_alloc); - if (is_non_value(a)) - goto badarg; - BIF_RET(a); - } else if (erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) { - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(a); - } else { + int lix = erts_atom_put_index((byte *) bytes, + bin_size, + ERTS_ATOM_ENC_LATIN1, + 0); + if (lix == ATOM_BAD_ENCODING_ERROR) { + badarg: + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(proc, BADARG); + } else if (lix == ATOM_MAX_CHARS_ERROR) { + system_limit: + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(proc, SYSTEM_LIMIT); + } + + a = make_atom(lix); + } else if (!erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) { goto badarg; } - } else if (enc == am_utf8 || enc == am_unicode) { - Eterm res; - Uint num_chars = 0; - const byte* p = bytes; - Uint left = bin_size; - while (left) { - if (++num_chars > MAX_ATOM_CHARACTERS) { + } else if (enc == am_utf8 || enc == am_unicode) { + if (!must_exist) { + int uix = erts_atom_put_index((byte *) bytes, + bin_size, + ERTS_ATOM_ENC_UTF8, + 0); + if (uix == ATOM_BAD_ENCODING_ERROR) { + goto badarg; + } else if (uix == ATOM_MAX_CHARS_ERROR) { goto system_limit; } - if ((p[0] & 0x80) == 0) { - ++p; - --left; - } - else if (left >= 2 - && (p[0] & 0xFE) == 0xC2 /* only allow latin1 subset */ - && (p[1] & 0xC0) == 0x80) { - p += 2; - left -= 2; - } - else goto badarg; - } - if (!must_exist) { - res = erts_atom_put((byte *) bytes, - bin_size, - ERTS_ATOM_ENC_UTF8, - 0); + a = make_atom(uix); } - else if (!erts_atom_get((char*)bytes, bin_size, &res, ERTS_ATOM_ENC_UTF8)) { + else if (!erts_atom_get((char*)bytes, bin_size, &a, ERTS_ATOM_ENC_UTF8)) { goto badarg; } - erts_free_aligned_binary_bytes(temp_alloc); - if (is_non_value(res)) - goto badarg; - BIF_RET(res); } else { - badarg: - erts_free_aligned_binary_bytes(temp_alloc); - BIF_ERROR(proc, BADARG); + goto badarg; } + + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(a); } BIF_RETTYPE binary_to_atom_2(BIF_ALIST_2) @@ -2015,7 +1998,7 @@ char *erts_convert_filename_to_encoding(Eterm name, char *statbuf, size_t statbu ++need; } if (used) - *used = (Sint) need; + *used = need; if (need+extra > statbuf_size) { name_buf = (char *) erts_alloc(alloc_type, need+extra); } else { @@ -2126,6 +2109,8 @@ Eterm erts_convert_native_to_filename(Process *p, byte *bytes) mac = 1; case ERL_FILENAME_UTF8: size = strlen((char *) bytes); + if (size == 0) + return NIL; if (erts_analyze_utf8(bytes,size,&err_pos,&num_chars,NULL) != ERTS_UTF8_OK) { goto noconvert; } @@ -2504,7 +2489,7 @@ void erts_copy_utf8_to_utf16_little(byte *target, byte *bytes, int num_chars) ((Uint) (bytes[3] & ((byte) 0x3F))); bytes += 4; } else { - erl_exit(1,"Internal unicode error in prim_file:internal_name2native/1"); + erts_exit(ERTS_ERROR_EXIT,"Internal unicode error in prim_file:internal_name2native/1"); } *target++ = (byte) (unipoint & 0xFF); *target++ = (byte) ((unipoint >> 8) & 0xFF); |