diff options
author | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2013-01-23 18:09:35 +0100 |
commit | b8e623410d1c22fe6d5fdeb8ccb0b2305533f033 (patch) | |
tree | 708d64e36e18b61ae1801c02ec3aeef42a697be3 /erts/emulator/beam/atom.h | |
parent | e99df74bee7c245ec76678e336fcd09d4b51a089 (diff) | |
parent | d6e3e256b850050b7a86323b2948009d5fcc30a9 (diff) | |
download | otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.gz otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.tar.bz2 otp-b8e623410d1c22fe6d5fdeb8ccb0b2305533f033.zip |
Merge branch 'sverk/r16/utf8-atoms'
* sverk/r16/utf8-atoms:
erl_interface: Fix bug when transcoding atoms from and to UTF8
erl_interface: Changed erlang_char_encoding interface
erts: Testcase doing unicode atom printout with ~w
erl_interface: even more utf8 atom stuff
erts: Fix bug in analyze_utf8 causing faulty latin1 detection
Add UTF-8 node name support for epmd
workaround...
Fix merge conflict with hasse
UTF-8 atom documentation
test case
erl_interface: utf8 atoms continued
Add utf8 atom distribution test cases
atom fixes for NIFs and atom_to_binary
UTF-8 support for distribution
Implement UTF-8 atom support for jinterface
erl_interface: Enable decode of unicode atoms
stdlib: Fix printing of unicode atoms
erts: Change internal representation of atoms to utf8
erts: Refactor rename DFLAG(S)_INTERNAL_TAGS for conformity
Conflicts:
erts/emulator/beam/io.c
OTP-10753
Diffstat (limited to 'erts/emulator/beam/atom.h')
-rw-r--r-- | erts/emulator/beam/atom.h | 62 |
1 files changed, 46 insertions, 16 deletions
diff --git a/erts/emulator/beam/atom.h b/erts/emulator/beam/atom.h index fd9c04d3d0..f721999a4c 100644 --- a/erts/emulator/beam/atom.h +++ b/erts/emulator/beam/atom.h @@ -26,7 +26,9 @@ #include "erl_atom_table.h" -#define MAX_ATOM_LENGTH 255 +#define MAX_ATOM_CHARACTERS 255 +#define MAX_ATOM_SZ_FROM_LATIN1 (2*MAX_ATOM_CHARACTERS) +#define MAX_ATOM_SZ_LIMIT (4*MAX_ATOM_CHARACTERS) /* theoretical byte limit */ #define ATOM_LIMIT (1024*1024) #define MIN_ATOM_TABLE_SIZE 8192 @@ -45,7 +47,8 @@ */ typedef struct atom { IndexSlot slot; /* MUST BE LOCATED AT TOP OF STRUCT!!! */ - int len; /* length of atom name */ + Sint16 len; /* length of atom name (UTF-8 encoded) */ + Sint16 latin1_chars; /* 0-255 if atom can be encoded in latin1; otherwise, -1 */ int ord0; /* ordinal value of first 3 bytes + 7 bits */ byte* name; /* name of atom */ } Atom; @@ -53,8 +56,8 @@ typedef struct atom { extern IndexTable erts_atom_table; ERTS_GLB_INLINE Atom* atom_tab(Uint i); -ERTS_GLB_INLINE int erts_is_atom_bytes(byte *text, size_t len, Eterm term); -ERTS_GLB_INLINE int erts_is_atom_str(char *str, Eterm term); +ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term); +ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1); #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE Atom* @@ -63,7 +66,7 @@ atom_tab(Uint i) return (Atom *) erts_index_lookup(&erts_atom_table, i); } -ERTS_GLB_INLINE int erts_is_atom_bytes(byte *text, size_t len, Eterm term) +ERTS_GLB_INLINE int erts_is_atom_utf8_bytes(byte *text, size_t len, Eterm term) { Atom *a; if (!is_atom(term)) @@ -73,43 +76,70 @@ ERTS_GLB_INLINE int erts_is_atom_bytes(byte *text, size_t len, Eterm term) && sys_memcmp((void *) a->name, (void *) text, len) == 0); } -ERTS_GLB_INLINE int erts_is_atom_str(char *str, Eterm term) +ERTS_GLB_INLINE int erts_is_atom_str(const char *str, Eterm term, int is_latin1) { Atom *a; int i, len; - char *aname; + const byte* aname; + const byte* s = (const byte*) str; + if (!is_atom(term)) return 0; a = atom_tab(atom_val(term)); len = a->len; - aname = (char *) a->name; - for (i = 0; i < len; i++) - if (aname[i] != str[i] || str[i] == '\0') - return 0; - return str[len] == '\0'; + aname = a->name; + if (is_latin1) { + for (i = 0; i < len; s++) { + if (aname[i] < 0x80) { + if (aname[i] != *s || *s == '\0') + return 0; + i++; + } + else { + if (aname[i] != (0xC0 | (*s >> 6)) || + aname[i+1] != (0x80 | (*s & 0x3F))) { + return 0; + } + i += 2; + } + } + } + else { + for (i = 0; i < len; i++, s++) + if (aname[i] != *s || *s == '\0') + return 0; + } + return *s == '\0'; } #endif +typedef enum { + ERTS_ATOM_ENC_7BIT_ASCII, + ERTS_ATOM_ENC_LATIN1, + ERTS_ATOM_ENC_UTF8 +} ErtsAtomEncoding; + /* * Note, ERTS_IS_ATOM_STR() expects the first argument to be a - * string literal. + * 7-bit ASCII string literal. */ #define ERTS_IS_ATOM_STR(LSTR, TERM) \ - (erts_is_atom_bytes((byte *) LSTR, sizeof(LSTR) - 1, (TERM))) + (erts_is_atom_utf8_bytes((byte *) LSTR, sizeof(LSTR) - 1, (TERM))) #define ERTS_DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1) #define ERTS_INIT_AM(S) AM_ ## S = am_atom_put(#S, sizeof(#S) - 1) int atom_table_size(void); /* number of elements */ int atom_table_sz(void); /* table size in bytes, excluding stored objects */ -Eterm am_atom_put(const char*, int); /* most callers pass plain char*'s */ +Eterm am_atom_put(const char*, int); /* ONLY 7-bit ascii! */ +Eterm erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc); int atom_erase(byte*, int); int atom_static_put(byte*, int); void init_atom_table(void); void atom_info(int, void *); void dump_atoms(int, void *); -int erts_atom_get(const char* name, int len, Eterm* ap); +int erts_atom_get(const char* name, int len, Eterm* ap, int is_latin1); void erts_atom_get_text_space_sizes(Uint *reserved, Uint *used); #endif |