From 26b59dfe67ef551cd94765557cdd8c79794bcc38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Tue, 31 May 2016 14:28:54 +0200 Subject: Add new AtU8 beam chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson). --- erts/emulator/beam/erl_unicode.c | 83 ++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 50 deletions(-) (limited to 'erts/emulator/beam/erl_unicode.c') diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index bd5e1482fb..8919898181 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1890,74 +1890,57 @@ binary_to_atom(Process* proc, Eterm bin, Eterm enc, int must_exist) byte* bytes; byte *temp_alloc = NULL; Uint bin_size; + Eterm a; if ((bytes = erts_get_aligned_binary_bytes(bin, &temp_alloc)) == 0) { BIF_ERROR(proc, BADARG); } bin_size = binary_size(bin); + if (enc == am_latin1) { - Eterm a; - if (bin_size > MAX_ATOM_CHARACTERS) { - system_limit: - erts_free_aligned_binary_bytes(temp_alloc); - BIF_ERROR(proc, SYSTEM_LIMIT); - } if (!must_exist) { - a = erts_atom_put((byte *) bytes, - bin_size, - ERTS_ATOM_ENC_LATIN1, - 0); - erts_free_aligned_binary_bytes(temp_alloc); - if (is_non_value(a)) - goto badarg; - BIF_RET(a); - } else if (erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) { - erts_free_aligned_binary_bytes(temp_alloc); - BIF_RET(a); - } else { + int lix = erts_atom_put_index((byte *) bytes, + bin_size, + ERTS_ATOM_ENC_LATIN1, + 0); + if (lix == ATOM_BAD_ENCODING_ERROR) { + badarg: + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(proc, BADARG); + } else if (lix == ATOM_MAX_CHARS_ERROR) { + system_limit: + erts_free_aligned_binary_bytes(temp_alloc); + BIF_ERROR(proc, SYSTEM_LIMIT); + } + + a = make_atom(lix); + } else if (!erts_atom_get((char *)bytes, bin_size, &a, ERTS_ATOM_ENC_LATIN1)) { goto badarg; } - } else if (enc == am_utf8 || enc == am_unicode) { - Eterm res; - Uint num_chars = 0; - const byte* p = bytes; - Uint left = bin_size; - while (left) { - if (++num_chars > MAX_ATOM_CHARACTERS) { + } else if (enc == am_utf8 || enc == am_unicode) { + if (!must_exist) { + int uix = erts_atom_put_index((byte *) bytes, + bin_size, + ERTS_ATOM_ENC_UTF8, + 0); + if (uix == ATOM_BAD_ENCODING_ERROR) { + goto badarg; + } else if (uix == ATOM_MAX_CHARS_ERROR) { goto system_limit; } - if ((p[0] & 0x80) == 0) { - ++p; - --left; - } - else if (left >= 2 - && (p[0] & 0xFE) == 0xC2 /* only allow latin1 subset */ - && (p[1] & 0xC0) == 0x80) { - p += 2; - left -= 2; - } - else goto badarg; - } - if (!must_exist) { - res = erts_atom_put((byte *) bytes, - bin_size, - ERTS_ATOM_ENC_UTF8, - 0); + a = make_atom(uix); } - else if (!erts_atom_get((char*)bytes, bin_size, &res, ERTS_ATOM_ENC_UTF8)) { + else if (!erts_atom_get((char*)bytes, bin_size, &a, ERTS_ATOM_ENC_UTF8)) { goto badarg; } - erts_free_aligned_binary_bytes(temp_alloc); - if (is_non_value(res)) - goto badarg; - BIF_RET(res); } else { - badarg: - erts_free_aligned_binary_bytes(temp_alloc); - BIF_ERROR(proc, BADARG); + goto badarg; } + + erts_free_aligned_binary_bytes(temp_alloc); + BIF_RET(a); } BIF_RETTYPE binary_to_atom_2(BIF_ALIST_2) -- cgit v1.2.3 From 40d55ced08579e9fc0ade28ba0810bf800690394 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Tue, 24 Jan 2017 15:11:28 +0100 Subject: Use magic refs for unicode static NIFs traps --- erts/emulator/beam/erl_unicode.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'erts/emulator/beam/erl_unicode.c') diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 8919898181..dd7c589c3d 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -129,13 +129,9 @@ static void cleanup_restart_context_bin(Binary *bp) cleanup_restart_context(rc); } -static RestartContext *get_rc_from_bin(Eterm bin) +static RestartContext *get_rc_from_bin(Eterm mref) { - Binary *mbp; - ASSERT(ERTS_TERM_IS_MAGIC_BINARY(bin)); - - mbp = ((ProcBin *) binary_val(bin))->val; - + Binary *mbp = erts_magic_ref2bin(mref); ASSERT(ERTS_MAGIC_BIN_DESTRUCTOR(mbp) == cleanup_restart_context_bin); return (RestartContext *) ERTS_MAGIC_BIN_DATA(mbp); @@ -148,8 +144,8 @@ static Eterm make_magic_bin_for_restart(Process *p, RestartContext *rc) RestartContext *restartp = ERTS_MAGIC_BIN_DATA(mbp); Eterm *hp; memcpy(restartp,rc,sizeof(RestartContext)); - hp = HAlloc(p, PROC_BIN_SIZE); - return erts_mk_magic_binary_term(&hp, &MSO(p), mbp); + hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &MSO(p), mbp); } -- cgit v1.2.3 From 118de47d703e303aea7f4575849a37c11416ba14 Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Tue, 14 Feb 2017 18:26:31 +0100 Subject: erts: Add deallocation veto for magic destructors A magic destructor can return 0 and thereby take control and prolong the lifetime of a magic binary. --- erts/emulator/beam/erl_unicode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'erts/emulator/beam/erl_unicode.c') diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index dd7c589c3d..01629db9ad 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -123,10 +123,11 @@ static void cleanup_restart_context(RestartContext *rc) } } -static void cleanup_restart_context_bin(Binary *bp) +static int cleanup_restart_context_bin(Binary *bp) { RestartContext *rc = ERTS_MAGIC_BIN_DATA(bp); cleanup_restart_context(rc); + return 1; } static RestartContext *get_rc_from_bin(Eterm mref) -- cgit v1.2.3