From 26b59dfe67ef551cd94765557cdd8c79794bcc38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Tue, 31 May 2016 14:28:54 +0200 Subject: Add new AtU8 beam chunk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new chunk stores atoms encoded in UTF-8. beam_lib has also been modified to handle the new 'utf8_atoms' attribute while the 'atoms' attribute may be a missing chunk from now on. The binary_to_atom/2 BIF can now encode any utf8 binary with up to 255 characters. The list_to_atom/1 BIF can now accept codepoints higher than 255 with up to 255 characters (thanks to Björn Gustavsson). --- erts/emulator/beam/atom.c | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) (limited to 'erts/emulator/beam/atom.c') diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 2b5ad097a0..2055c29190 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -233,10 +233,10 @@ need_convertion: } /* - * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned! + * erts_atom_put_index() may fail. Returns negative indexes for errors. */ -Eterm -erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) +int +erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc) { byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1]; const byte *text = name; @@ -253,7 +253,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) if (trunc) tlen = 0; else - return THE_NON_VALUE; + return ATOM_MAX_CHARS_ERROR; } switch (enc) { @@ -262,7 +262,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) if (trunc) tlen = MAX_ATOM_CHARACTERS; else - return THE_NON_VALUE; + return ATOM_MAX_CHARS_ERROR; } #ifdef DEBUG for (aix = 0; aix < len; aix++) { @@ -276,7 +276,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) if (trunc) tlen = MAX_ATOM_CHARACTERS; else - return THE_NON_VALUE; + return ATOM_MAX_CHARS_ERROR; } no_latin1_chars = tlen; latin1_to_utf8(utf8_copy, &text, &tlen); @@ -284,7 +284,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) case ERTS_ATOM_ENC_UTF8: /* First sanity check; need to verify later */ if (tlen > MAX_ATOM_SZ_LIMIT && !trunc) - return THE_NON_VALUE; + return ATOM_MAX_CHARS_ERROR; break; } @@ -295,7 +295,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) atom_read_unlock(); if (aix >= 0) { /* Already in table no need to verify it */ - return make_atom(aix); + return aix; } if (enc == ERTS_ATOM_ENC_UTF8) { @@ -314,13 +314,13 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) case ERTS_UTF8_OK_MAX_CHARS: /* Truncated... */ if (!trunc) - return THE_NON_VALUE; + return ATOM_MAX_CHARS_ERROR; ASSERT(no_chars == MAX_ATOM_CHARACTERS); tlen = err_pos - text; break; default: /* Bad utf8... */ - return THE_NON_VALUE; + return ATOM_BAD_ENCODING_ERROR; } } @@ -333,7 +333,20 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) atom_write_lock(); aix = index_put(&erts_atom_table, (void*) &a); atom_write_unlock(); - return make_atom(aix); + return aix; +} + +/* + * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned! + */ +Eterm +erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) +{ + int aix = erts_atom_put_index(name, len, enc, trunc); + if (aix >= 0) + return make_atom(aix); + else + return THE_NON_VALUE; } Eterm -- cgit v1.2.3