/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 1996-2018. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* %CopyrightEnd%
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "sys.h"
#include "erl_sys_driver.h"
#include "erl_vm.h"
#include "global.h"
#include "hash.h"
#include "atom.h"
#define ATOM_SIZE 3000
IndexTable erts_atom_table; /* The index table */
static erts_rwmtx_t atom_table_lock;
#define atom_read_lock() erts_rwmtx_rlock(&atom_table_lock)
#define atom_read_unlock() erts_rwmtx_runlock(&atom_table_lock)
#define atom_write_lock() erts_rwmtx_rwlock(&atom_table_lock)
#define atom_write_unlock() erts_rwmtx_rwunlock(&atom_table_lock)
#if 0
#define ERTS_ATOM_PUT_OPS_STAT
#endif
#ifdef ERTS_ATOM_PUT_OPS_STAT
static erts_atomic_t atom_put_ops;
#endif
/* Functions for allocating space for the ext of atoms. We do not
* use malloc for each atom to prevent excessive memory fragmentation
*/
typedef struct _atom_text {
struct _atom_text* next;
unsigned char text[ATOM_TEXT_SIZE];
} AtomText;
static AtomText* text_list; /* List of text buffers */
static byte *atom_text_pos;
static byte *atom_text_end;
static Uint reserved_atom_space; /* Total amount of atom text space */
static Uint atom_space; /* Amount of atom text space used */
/*
* Print info about atom tables
*/
void atom_info(fmtfn_t to, void *to_arg)
{
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
index_info(to, to_arg, &erts_atom_table);
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_print(to, to_arg, "atom_put_ops: %ld\n",
erts_atomic_read_nob(&atom_put_ops));
#endif
if (lock)
atom_read_unlock();
}
/*
* Allocate an atom text segment.
*/
static void
more_atom_space(void)
{
AtomText* ptr;
ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
ptr->next = text_list;
text_list = ptr;
atom_text_pos = ptr->text;
atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
reserved_atom_space += sizeof(AtomText);
VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
}
/*
* Allocate string space within an atom text segment.
*/
static byte*
atom_text_alloc(int bytes)
{
byte *res;
ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
if (atom_text_pos + bytes >= atom_text_end) {
more_atom_space();
}
res = atom_text_pos;
atom_text_pos += bytes;
atom_space += bytes;
return res;
}
/*
* Calculate atom hash value (using the hash algorithm
* hashpjw from the Dragon Book).
*/
static HashValue
atom_hash(Atom* obj)
{
byte* p = obj->name;
int len = obj->len;
HashValue h = 0, g;
byte v;
while(len--) {
v = *p++;
/* latin1 clutch for r16 */
if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) {
v = (v << 6) | (*p & 0x3F);
p++; len--;
}
/* normal hashpjw follows for v */
h = (h << 4) + v;
if ((g = h & 0xf0000000)) {
h ^= (g >> 24);
h ^= g;
}
}
return h;
}
static int
atom_cmp(Atom* tmpl, Atom* obj)
{
if (tmpl->len == obj->len &&
sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
return 0;
return 1;
}
static Atom*
atom_alloc(Atom* tmpl)
{
Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
obj->name = atom_text_alloc(tmpl->len);
sys_memcpy(obj->name, tmpl->name, tmpl->len);
obj->len = tmpl->len;
obj->latin1_chars = tmpl->latin1_chars;
obj->slot.index = -1;
/*
* Precompute ordinal value of first 3 bytes + 7 bits.
* This is used by erl_utils.h:erts_cmp_atoms().
* We cannot use the full 32 bits of the first 4 bytes,
* since we use the sign of the difference between two
* ordinal values to represent their relative order.
*/
{
unsigned char c[4];
int i;
int j;
j = (tmpl->len < 4) ? tmpl->len : 4;
for(i = 0; i < j; ++i)
c[i] = tmpl->name[i];
for(; i < 4; ++i)
c[i] = '\0';
obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
}
return obj;
}
static void
atom_free(Atom* obj)
{
ASSERT(obj->slot.index == atom_val(am_ErtsSecretAtom));
}
static void latin1_to_utf8(byte* conv_buf, Uint buf_sz,
const byte** srcp, Uint* lenp)
{
byte* dst;
const byte* src = *srcp;
Uint i, len = *lenp;
ASSERT(len <= MAX_ATOM_CHARACTERS);
ASSERT(buf_sz >= MAX_ATOM_SZ_FROM_LATIN1);
for (i=0 ; i < len; ++i) {
if (src[i] & 0x80) {
goto need_convertion;
}
}
return;
need_convertion:
sys_memcpy(conv_buf, src, i);
dst = conv_buf + i;
for ( ; i < len; ++i) {
unsigned char chr = src[i];
if (!(chr & 0x80)) {
*dst++ = chr;
}
else {
*dst++ = 0xC0 | (chr >> 6);
*dst++ = 0x80 | (chr & 0x3F);
}
}
*srcp = conv_buf;
*lenp = dst - conv_buf;
}
/*
* erts_atom_put_index() may fail. Returns negative indexes for errors.
*/
int
erts_atom_put_index(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
const byte *text = name;
Uint tlen;
Sint no_latin1_chars;
Atom a;
int aix;
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_atomic_inc_nob(&atom_put_ops);
#endif
if (len < 0) {
if (trunc) {
len = 0;
} else {
return ATOM_MAX_CHARS_ERROR;
}
}
tlen = len;
switch (enc) {
case ERTS_ATOM_ENC_7BIT_ASCII:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return ATOM_MAX_CHARS_ERROR;
}
#ifdef DEBUG
for (aix = 0; aix < len; aix++) {
ASSERT((name[aix] & 0x80) == 0);
}
#endif
no_latin1_chars = tlen;
break;
case ERTS_ATOM_ENC_LATIN1:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return ATOM_MAX_CHARS_ERROR;
}
no_latin1_chars = tlen;
latin1_to_utf8(utf8_copy, sizeof(utf8_copy), &text, &tlen);
break;
case ERTS_ATOM_ENC_UTF8:
/* First sanity check; need to verify later */
if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
return ATOM_MAX_CHARS_ERROR;
break;
}
a.len = tlen;
a.name = (byte *) text;
atom_read_lock();
aix = index_get(&erts_atom_table, (void*) &a);
atom_read_unlock();
if (aix >= 0) {
/* Already in table no need to verify it */
return aix;
}
if (enc == ERTS_ATOM_ENC_UTF8) {
/* Need to verify encoding and length */
byte *err_pos;
Uint no_chars;
switch (erts_analyze_utf8_x((byte *) text,
(Uint) tlen,
&err_pos,
&no_chars, NULL,
&no_latin1_chars,
MAX_ATOM_CHARACTERS)) {
case ERTS_UTF8_OK:
ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
break;
case ERTS_UTF8_OK_MAX_CHARS:
/* Truncated... */
if (!trunc)
return ATOM_MAX_CHARS_ERROR;
ASSERT(no_chars == MAX_ATOM_CHARACTERS);
tlen = err_pos - text;
break;
default:
/* Bad utf8... */
return ATOM_BAD_ENCODING_ERROR;
}
}
ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
a.len = tlen;
a.latin1_chars = (Sint16) no_latin1_chars;
a.name = (byte *) text;
atom_write_lock();
aix = index_put(&erts_atom_table, (void*) &a);
atom_write_unlock();
return aix;
}
/*
* erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
*/
Eterm
erts_atom_put(const byte *name, Sint len, ErtsAtomEncoding enc, int trunc)
{
int aix = erts_atom_put_index(name, len, enc, trunc);
if (aix >= 0)
return make_atom(aix);
else
return THE_NON_VALUE;
}
Eterm
am_atom_put(const char* name, Sint len)
{
/* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */
return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1);
}
int atom_table_size(void)
{
int ret;
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
ret = erts_atom_table.entries;
if (lock)
atom_read_unlock();
return ret;
}
int atom_table_sz(void)
{
int ret;
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
ret = index_table_sz(&erts_atom_table);
if (lock)
atom_read_unlock();
return ret;
}
int
erts_atom_get(const char *name, Uint len, Eterm* ap, ErtsAtomEncoding enc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
Atom a;
int i;
int res;
switch (enc) {
case ERTS_ATOM_ENC_LATIN1:
if (len > MAX_ATOM_CHARACTERS) {
return 0;
}
latin1_to_utf8(utf8_copy, sizeof(utf8_copy), (const byte**)&name, &len);
a.name = (byte*)name;
a.len = (Sint16)len;
break;
case ERTS_ATOM_ENC_7BIT_ASCII:
if (len > MAX_ATOM_CHARACTERS) {
return 0;
}
for (i = 0; i < len; i++) {
if (name[i] & 0x80) {
return 0;
}
}
a.len = (Sint16)len;
a.name = (byte*)name;
break;
case ERTS_ATOM_ENC_UTF8:
if (len > MAX_ATOM_SZ_LIMIT) {
return 0;
}
/* We don't need to check whether the encoding is legal as all atom
* names are stored as UTF-8 and we know a lookup with a badly encoded
* name will fail. */
a.len = (Sint16)len;
a.name = (byte*)name;
break;
}
atom_read_lock();
i = index_get(&erts_atom_table, (void*) &a);
res = i < 0 ? 0 : (*ap = make_atom(i), 1);
atom_read_unlock();
return res;
}
void
erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
{
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
if (reserved)
*reserved = reserved_atom_space;
if (used)
*used = atom_space;
if (lock)
atom_read_unlock();
}
void
init_atom_table(void)
{
HashFunctions f;
int i;
Atom a;
erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;
rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED;
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_atomic_init_nob(&atom_put_ops, 0);
#endif
erts_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab", NIL,
ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
f.hash = (H_FUN) atom_hash;
f.cmp = (HCMP_FUN) atom_cmp;
f.alloc = (HALLOC_FUN) atom_alloc;
f.free = (HFREE_FUN) atom_free;
f.meta_alloc = (HMALLOC_FUN) erts_alloc;
f.meta_free = (HMFREE_FUN) erts_free;
f.meta_print = (HMPRINT_FUN) erts_print;
atom_text_pos = NULL;
atom_text_end = NULL;
reserved_atom_space = 0;
atom_space = 0;
text_list = NULL;
erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
"atom_tab", ATOM_SIZE, erts_atom_table_size, f);
more_atom_space();
/* Ordinary atoms */
for (i = 0; erl_atom_names[i] != 0; i++) {
int ix;
a.len = sys_strlen(erl_atom_names[i]);
a.latin1_chars = a.len;
a.name = (byte*)erl_atom_names[i];
a.slot.index = i;
#ifdef DEBUG
/* Verify 7-bit ascii */
for (ix = 0; ix < a.len; ix++) {
ASSERT((a.name[ix] & 0x80) == 0);
}
#endif
ix = index_put(&erts_atom_table, (void*) &a);
atom_text_pos -= a.len;
atom_space -= a.len;
atom_tab(ix)->name = (byte*)erl_atom_names[i];
}
/* Hide am_ErtsSecretAtom */
hash_erase(&erts_atom_table.htable, atom_tab(atom_val(am_ErtsSecretAtom)));
}
void
dump_atoms(fmtfn_t to, void *to_arg)
{
int i = erts_atom_table.entries;
/*
* Print out the atom table starting from the end.
*/
while (--i >= 0) {
if (erts_index_lookup(&erts_atom_table, i)) {
erts_print(to, to_arg, "%T\n", make_atom(i));
}
}
}
Uint
erts_get_atom_limit(void)
{
return erts_atom_table.limit;
}