diff options
-rw-r--r-- | erts/emulator/beam/atom.c | 77 | ||||
-rw-r--r-- | erts/emulator/beam/atom.h | 8 | ||||
-rw-r--r-- | erts/emulator/test/bif_SUITE.erl | 10 |
3 files changed, 73 insertions, 22 deletions
diff --git a/erts/emulator/beam/atom.c b/erts/emulator/beam/atom.c index 59b51fd15e..5a70509ffd 100644 --- a/erts/emulator/beam/atom.c +++ b/erts/emulator/beam/atom.c @@ -200,11 +200,15 @@ atom_free(Atom* obj) ASSERT(obj->slot.index == atom_val(am_ErtsSecretAtom)); } -static void latin1_to_utf8(byte* conv_buf, const byte** srcp, int* lenp) +static void latin1_to_utf8(byte* conv_buf, Uint buf_sz, + const byte** srcp, Uint* lenp) { byte* dst; const byte* src = *srcp; - int i, len = *lenp; + Uint i, len = *lenp; + + ASSERT(len <= MAX_ATOM_CHARACTERS); + ASSERT(buf_sz >= MAX_ATOM_SZ_FROM_LATIN1); for (i=0 ; i < len; ++i) { if (src[i] & 0x80) { @@ -234,11 +238,11 @@ need_convertion: * erts_atom_put_index() may fail. Returns negative indexes for errors. */ int -erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc) +erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) { byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1]; const byte *text = name; - int tlen = len; + Uint tlen; Sint no_latin1_chars; Atom a; int aix; @@ -247,13 +251,16 @@ erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc) erts_atomic_inc_nob(&atom_put_ops); #endif - if (tlen < 0) { - if (trunc) - tlen = 0; - else - return ATOM_MAX_CHARS_ERROR; + if (len < 0) { + if (trunc) { + len = 0; + } else { + return ATOM_MAX_CHARS_ERROR; + } } + tlen = len; + switch (enc) { case ERTS_ATOM_ENC_7BIT_ASCII: if (tlen > MAX_ATOM_CHARACTERS) { @@ -277,7 +284,7 @@ erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc) return ATOM_MAX_CHARS_ERROR; } no_latin1_chars = tlen; - latin1_to_utf8(utf8_copy, &text, &tlen); + latin1_to_utf8(utf8_copy, sizeof(utf8_copy), &text, &tlen); break; case ERTS_ATOM_ENC_UTF8: /* First sanity check; need to verify later */ @@ -338,7 +345,7 @@ erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc) * erts_atom_put() may fail. If it fails THE_NON_VALUE is returned! */ Eterm -erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) +erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc) { int aix = erts_atom_put_index(name, len, enc, trunc); if (aix >= 0) @@ -348,7 +355,7 @@ erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc) } Eterm -am_atom_put(const char* name, int len) +am_atom_put(const char* name, Sint len) { /* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */ return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1); @@ -379,23 +386,57 @@ int atom_table_sz(void) } int -erts_atom_get(const char *name, int len, Eterm* ap, ErtsAtomEncoding enc) +erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc) { byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1]; Atom a; int i; int res; - a.len = (Sint16) len; - a.name = (byte *)name; - if (enc == ERTS_ATOM_ENC_LATIN1) { - latin1_to_utf8(utf8_copy, (const byte**)&a.name, &len); - a.len = (Sint16) len; + switch (enc) { + case ERTS_ATOM_ENC_LATIN1: + if (len > MAX_ATOM_CHARACTERS) { + return 0; + } + + latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len); + + a.name = (byte*)name; + a.len = (Sint16)len; + break; + case ERTS_ATOM_ENC_7BIT_ASCII: + if (len > MAX_ATOM_CHARACTERS) { + return 0; + } + + for (i = 0; i < len; i++) { + if (name[i] & 0x80) { + return 0; + } + } + + a.len = (Sint16)len; + a.name = (byte*)name; + break; + case ERTS_ATOM_ENC_UTF8: + if (len > MAX_ATOM_SZ_LIMIT) { + return 0; + } + + /* We don't need to check whether the encoding is legal as all atom + * names are stored as UTF-8 and we know a lookup with a badly encoded + * name will fail. */ + + a.len = (Sint16)len; + a.name = (byte*)name; + break; } + atom_read_lock(); i = index_get(&erts_atom_table, (void*) &a); res = i < 0 ? 0 : (*ap = make_atom(i), 1); atom_read_unlock(); + return res; } diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index ca920679c6..f51c5a8c62 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -133,14 +133,14 @@ typedef enum { int atom_table_size(void); /* number of elements */ int atom_table_sz(void); /* table size in bytes, excluding stored objects */ -Eterm am_atom_put(const char*, int); /* ONLY 7-bit ascii! */ -Eterm erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc); -int erts_atom_put_index(const byte *name, int len, ErtsAtomEncoding enc, int trunc); +Eterm am_atom_put(const char*, Sint); /* ONLY 7-bit ascii! */ +Eterm erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); +int erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc); void init_atom_table(void); void atom_info(fmtfn_t, void *); void dump_atoms(fmtfn_t, void *); Uint erts_get_atom_limit(void); -int erts_atom_get(const char* name, int len, Eterm* ap, ErtsAtomEncoding enc); +int erts_atom_get(const char* name, Uint len, Eterm* ap, ErtsAtomEncoding enc); void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used); #endif diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl index 9e7bcd5255..9f6e1059b9 100644 --- a/erts/emulator/test/bif_SUITE.erl +++ b/erts/emulator/test/bif_SUITE.erl @@ -609,6 +609,16 @@ binary_to_existing_atom(Config) when is_list(Config) -> UnlikelyAtom = binary_to_atom(id(UnlikelyBin), latin1), UnlikelyAtom = binary_to_existing_atom(UnlikelyBin, latin1), + + %% ERL-944; a binary that was too large would overflow the latin1-to-utf8 + %% conversion buffer. + OverflowAtom = <<0:511/unit:8, + 196, 133, 196, 133, 196, 133, 196, 133, 196, 133, + 196, 133, 196, 133, 196, 133, 196, 133, 196, 133, + 196, 133, 196, 133, 196, 133, 196, 133, 196, 133, + 196, 133, 196, 133, 196, 133, 196, 133, 196, 133>>, + {'EXIT', _} = (catch binary_to_existing_atom(OverflowAtom, latin1)), + ok. |