From e81e9e367eb0f6bae1c818f5351a7118f832c31c Mon Sep 17 00:00:00 2001 From: Sverker Eriksson Date: Wed, 16 Jan 2013 20:44:09 +0100 Subject: atom fixes for NIFs and atom_to_binary --- erts/emulator/beam/erl_nif.c | 24 +++++++++----- erts/emulator/beam/erl_unicode.c | 67 +++++----------------------------------- erts/emulator/beam/global.h | 2 -- 3 files changed, 24 insertions(+), 69 deletions(-) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 185ac75d73..f00d5f86ce 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -743,16 +743,23 @@ int enif_get_atom(ErlNifEnv* env, Eterm atom, char* buf, unsigned len, { Atom* ap; ASSERT(encoding == ERL_NIF_LATIN1); - if (is_not_atom(atom)) { + if (is_not_atom(atom) || len==0) { return 0; } ap = atom_tab(atom_val(atom)); - if (ap->len+1 > len) { + + if (ap->latin1_chars < 0 || ap->latin1_chars >= len) { return 0; } - sys_memcpy(buf, ap->name, ap->len); - buf[ap->len] = '\0'; - return ap->len + 1; + if (ap->latin1_chars == ap->len) { + sys_memcpy(buf, ap->name, ap->len); + } + else { + int dlen = erts_utf8_to_latin1((byte*)buf, ap->name, ap->len); + ASSERT(dlen == ap->latin1_chars); (void)dlen; + } + buf[ap->latin1_chars] = '\0'; + return ap->latin1_chars + 1; } int enif_get_int(ErlNifEnv* env, Eterm term, int* ip) @@ -854,7 +861,10 @@ int enif_get_atom_length(ErlNifEnv* env, Eterm atom, unsigned* len, ASSERT(enc == ERL_NIF_LATIN1); if (is_not_atom(atom)) return 0; ap = atom_tab(atom_val(atom)); - *len = ap->len; + if (ap->latin1_chars < 0) { + return 0; + } + *len = ap->latin1_chars; return 1; } @@ -961,7 +971,7 @@ ERL_NIF_TERM enif_make_atom(ErlNifEnv* env, const char* name) ERL_NIF_TERM enif_make_atom_len(ErlNifEnv* env, const char* name, size_t len) { - return am_atom_put(name, len); + return erts_atom_put((byte*)name, len, ERTS_ATOM_ENC_LATIN1, 1); } int enif_make_existing_atom(ErlNifEnv* env, const char* name, ERL_NIF_TERM* atom, diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c index 6600ce4a4a..c00293de89 100644 --- a/erts/emulator/beam/erl_unicode.c +++ b/erts/emulator/beam/erl_unicode.c @@ -1853,32 +1853,21 @@ BIF_RETTYPE atom_to_binary_2(BIF_ALIST_2) ap = atom_tab(atom_val(BIF_ARG_1)); if (BIF_ARG_2 == am_latin1) { - int i; Eterm bin_term; - int bin_size = ap->len; - - for (i = 0; i < ap->len; ) { - if (ap->name[i] < 0x80) i++; - else { - ASSERT(ap->name[i] >= 0xC0); - if (ap->name[i] < 0xE0) { - ASSERT(i+1 < ap->len && (ap->name[i+1] & 0xC0) == 0x80); - i += 2; - bin_size -= 1; - } - else goto error; - } + + if (ap->latin1_chars < 0) { + goto error; } - if (bin_size == ap->len) { + if (ap->latin1_chars == ap->len) { bin_term = new_binary(BIF_P, ap->name, ap->len); } else { byte* bin_p; int dbg_sz; - bin_term = new_binary(BIF_P, 0, bin_size); + bin_term = new_binary(BIF_P, 0, ap->latin1_chars); bin_p = binary_bytes(bin_term); dbg_sz = erts_utf8_to_latin1(bin_p, ap->name, ap->len); - ASSERT(dbg_sz == bin_size); (void)dbg_sz; + ASSERT(dbg_sz == ap->latin1_chars); (void)dbg_sz; } BIF_RET(bin_term); } else if (BIF_ARG_2 == am_utf8 || BIF_ARG_2 == am_unicode) { @@ -2676,23 +2665,6 @@ BIF_RETTYPE file_native_name_encoding_0(BIF_ALIST_0) } } -int erts_utf8_is_latin1_string(const byte *string, int len) -{ - /* Assumes string is encoded in valid UTF-8 */ - int i; - while (i < len) { - if ((string[i] & 0x80) == 0) - i++; - else if (i+1 < len - && (string[i] & 0xFE) == 0xC2 - && (string[i+1] & 0xC0) == 0x80) - i +=2; - else - return 0; - } - return 1; -} - int erts_utf8_to_latin1(byte* dest, const byte* source, int slen) { /* @@ -2700,6 +2672,7 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen) * and that dest has enough room. */ byte* dp = dest; + while (slen > 0) { if ((source[0] & 0x80) == 0) { *dp++ = *source++; @@ -2717,29 +2690,3 @@ int erts_utf8_to_latin1(byte* dest, const byte* source, int slen) return dp - dest; } -int erts_utf8_to_latin1_backwards(byte *dest, const byte *source, int slen) -{ - /* - * Assumes source contains valid utf8 that can be encoded as latin1, - * and that dest has enough room. - */ - int dix = 0; - int six = slen; - while (six > 0) { - six--; - dix--; - if ((source[six] & 0x80) == 0) - dest[dix] = source[six]; - else { - byte c; - ASSERT(six > 0); - ASSERT((source[six] & 0xC0) == 0x80); - ASSERT((source[six-1] & 0xFE) == 0xC2); - c = source[six] & 0x3F; - six--; - c |= source[six] << 6; - dest[dix] = c; - } - } - return -dix; -} diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h index eccdf10c75..649352ca91 100755 --- a/erts/emulator/beam/global.h +++ b/erts/emulator/beam/global.h @@ -1530,9 +1530,7 @@ char *erts_convert_filename_to_native(Eterm name, char *statbuf, Eterm erts_convert_native_to_filename(Process *p, byte *bytes); Eterm erts_utf8_to_list(Process *p, Uint num, byte *bytes, Uint sz, Uint left, Uint *num_built, Uint *num_eaten, Eterm tail); -int erts_utf8_is_latin1_string(const byte *string, int len); int erts_utf8_to_latin1(byte* dest, const byte* source, int slen); -int erts_utf8_to_latin1_backwards(byte* dest, const byte* source, int slen); #define ERTS_UTF8_OK 0 #define ERTS_UTF8_INCOMPLETE 1 #define ERTS_UTF8_ERROR 2 -- cgit v1.2.3