/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 1996-2013. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* %CopyrightEnd%
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "sys.h"
#include "erl_sys_driver.h"
#include "erl_vm.h"
#include "global.h"
#include "hash.h"
#include "atom.h"
#define ATOM_SIZE 3000
IndexTable erts_atom_table; /* The index table */
#include "erl_smp.h"
static erts_smp_rwmtx_t atom_table_lock;
#define atom_read_lock() erts_smp_rwmtx_rlock(&atom_table_lock)
#define atom_read_unlock() erts_smp_rwmtx_runlock(&atom_table_lock)
#define atom_write_lock() erts_smp_rwmtx_rwlock(&atom_table_lock)
#define atom_write_unlock() erts_smp_rwmtx_rwunlock(&atom_table_lock)
#if 0
#define ERTS_ATOM_PUT_OPS_STAT
#endif
#ifdef ERTS_ATOM_PUT_OPS_STAT
static erts_smp_atomic_t atom_put_ops;
#endif
/* Functions for allocating space for the ext of atoms. We do not
* use malloc for each atom to prevent excessive memory fragmentation
*/
typedef struct _atom_text {
struct _atom_text* next;
unsigned char text[ATOM_TEXT_SIZE];
} AtomText;
static AtomText* text_list; /* List of text buffers */
static byte *atom_text_pos;
static byte *atom_text_end;
static Uint reserved_atom_space; /* Total amount of atom text space */
static Uint atom_space; /* Amount of atom text space used */
/*
* Print info about atom tables
*/
void atom_info(int to, void *to_arg)
{
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
index_info(to, to_arg, &erts_atom_table);
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_print(to, to_arg, "atom_put_ops: %ld\n",
erts_smp_atomic_read_nob(&atom_put_ops));
#endif
if (lock)
atom_read_unlock();
}
/*
* Allocate an atom text segment.
*/
static void
more_atom_space(void)
{
AtomText* ptr;
ptr = (AtomText*) erts_alloc(ERTS_ALC_T_ATOM_TXT, sizeof(AtomText));
ptr->next = text_list;
text_list = ptr;
atom_text_pos = ptr->text;
atom_text_end = atom_text_pos + ATOM_TEXT_SIZE;
reserved_atom_space += sizeof(AtomText);
VERBOSE(DEBUG_SYSTEM,("Allocated %d atom space\n",ATOM_TEXT_SIZE));
}
/*
* Allocate string space within an atom text segment.
*/
static byte*
atom_text_alloc(int bytes)
{
byte *res;
ASSERT(bytes <= MAX_ATOM_SZ_LIMIT);
if (atom_text_pos + bytes >= atom_text_end) {
more_atom_space();
}
res = atom_text_pos;
atom_text_pos += bytes;
atom_space += bytes;
return res;
}
/*
* Calculate atom hash value (using the hash algorithm
* hashpjw from the Dragon Book).
*/
static HashValue
atom_hash(Atom* obj)
{
byte* p = obj->name;
int len = obj->len;
HashValue h = 0, g;
byte v;
while(len--) {
v = *p++;
/* latin1 clutch for r16 */
if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) {
v = (v << 6) | (*p & 0x3F);
p++; len--;
}
/* normal hashpjw follows for v */
h = (h << 4) + v;
if ((g = h & 0xf0000000)) {
h ^= (g >> 24);
h ^= g;
}
}
return h;
}
static int
atom_cmp(Atom* tmpl, Atom* obj)
{
if (tmpl->len == obj->len &&
sys_memcmp(tmpl->name, obj->name, tmpl->len) == 0)
return 0;
return 1;
}
static Atom*
atom_alloc(Atom* tmpl)
{
Atom* obj = (Atom*) erts_alloc(ERTS_ALC_T_ATOM, sizeof(Atom));
obj->name = atom_text_alloc(tmpl->len);
sys_memcpy(obj->name, tmpl->name, tmpl->len);
obj->len = tmpl->len;
obj->latin1_chars = tmpl->latin1_chars;
obj->slot.index = -1;
/*
* Precompute ordinal value of first 3 bytes + 7 bits.
* This is used by utils.c:cmp_atoms().
* We cannot use the full 32 bits of the first 4 bytes,
* since we use the sign of the difference between two
* ordinal values to represent their relative order.
*/
{
unsigned char c[4];
int i;
int j;
j = (tmpl->len < 4) ? tmpl->len : 4;
for(i = 0; i < j; ++i)
c[i] = tmpl->name[i];
for(; i < 4; ++i)
c[i] = '\0';
obj->ord0 = (c[0] << 23) + (c[1] << 15) + (c[2] << 7) + (c[3] >> 1);
}
return obj;
}
static void
atom_free(Atom* obj)
{
erts_free(ERTS_ALC_T_ATOM, (void*) obj);
}
static void latin1_to_utf8(byte* conv_buf, const byte** srcp, int* lenp)
{
byte* dst;
const byte* src = *srcp;
int i, len = *lenp;
for (i=0 ; i < len; ++i) {
if (src[i] & 0x80) {
goto need_convertion;
}
}
return;
need_convertion:
sys_memcpy(conv_buf, src, i);
dst = conv_buf + i;
for ( ; i < len; ++i) {
unsigned char chr = src[i];
if (!(chr & 0x80)) {
*dst++ = chr;
}
else {
*dst++ = 0xC0 | (chr >> 6);
*dst++ = 0x80 | (chr & 0x3F);
}
}
*srcp = conv_buf;
*lenp = dst - conv_buf;
}
/*
* erts_atom_put() may fail. If it fails THE_NON_VALUE is returned!
*/
Eterm
erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
const byte *text = name;
int tlen = len;
Sint no_latin1_chars;
Atom a;
int aix;
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_smp_atomic_inc_nob(&atom_put_ops);
#endif
if (tlen < 0) {
if (trunc)
tlen = 0;
else
return THE_NON_VALUE;
}
switch (enc) {
case ERTS_ATOM_ENC_7BIT_ASCII:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return THE_NON_VALUE;
}
#ifdef DEBUG
for (aix = 0; aix < len; aix++) {
ASSERT((name[aix] & 0x80) == 0);
}
#endif
no_latin1_chars = tlen;
break;
case ERTS_ATOM_ENC_LATIN1:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return THE_NON_VALUE;
}
no_latin1_chars = tlen;
latin1_to_utf8(utf8_copy, &text, &tlen);
break;
case ERTS_ATOM_ENC_UTF8:
/* First sanity check; need to verify later */
if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
return THE_NON_VALUE;
break;
}
a.len = tlen;
a.name = (byte *) text;
atom_read_lock();
aix = index_get(&erts_atom_table, (void*) &a);
atom_read_unlock();
if (aix >= 0) {
/* Already in table no need to verify it */
return make_atom(aix);
}
if (enc == ERTS_ATOM_ENC_UTF8) {
/* Need to verify encoding and length */
byte *err_pos;
Uint no_chars;
switch (erts_analyze_utf8_x((byte *) text,
(Uint) tlen,
&err_pos,
&no_chars, NULL,
&no_latin1_chars,
MAX_ATOM_CHARACTERS)) {
case ERTS_UTF8_OK:
ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
break;
case ERTS_UTF8_OK_MAX_CHARS:
/* Truncated... */
if (!trunc)
return THE_NON_VALUE;
ASSERT(no_chars == MAX_ATOM_CHARACTERS);
tlen = err_pos - text;
break;
default:
/* Bad utf8... */
return THE_NON_VALUE;
}
}
ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
a.len = tlen;
a.latin1_chars = (Sint16) no_latin1_chars;
a.name = (byte *) text;
atom_write_lock();
aix = index_put(&erts_atom_table, (void*) &a);
atom_write_unlock();
return make_atom(aix);
}
Eterm
am_atom_put(const char* name, int len)
{
/* Assumes 7-bit ascii; use erts_atom_put() for other encodings... */
return erts_atom_put((byte *) name, len, ERTS_ATOM_ENC_7BIT_ASCII, 1);
}
int atom_table_size(void)
{
int ret;
#ifdef ERTS_SMP
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
#endif
ret = erts_atom_table.entries;
#ifdef ERTS_SMP
if (lock)
atom_read_unlock();
#endif
return ret;
}
int atom_table_sz(void)
{
int ret;
#ifdef ERTS_SMP
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
#endif
ret = index_table_sz(&erts_atom_table);
#ifdef ERTS_SMP
if (lock)
atom_read_unlock();
#endif
return ret;
}
int
erts_atom_get(const char *name, int len, Eterm* ap, ErtsAtomEncoding enc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
Atom a;
int i;
int res;
a.len = (Sint16) len;
a.name = (byte *)name;
if (enc == ERTS_ATOM_ENC_LATIN1) {
latin1_to_utf8(utf8_copy, (const byte**)&a.name, &len);
a.len = (Sint16) len;
}
atom_read_lock();
i = index_get(&erts_atom_table, (void*) &a);
res = i < 0 ? 0 : (*ap = make_atom(i), 1);
atom_read_unlock();
return res;
}
void
erts_atom_get_text_space_sizes(Uint *reserved, Uint *used)
{
#ifdef ERTS_SMP
int lock = !ERTS_IS_CRASH_DUMPING;
if (lock)
atom_read_lock();
#endif
if (reserved)
*reserved = reserved_atom_space;
if (used)
*used = atom_space;
#ifdef ERTS_SMP
if (lock)
atom_read_unlock();
#endif
}
void
init_atom_table(void)
{
HashFunctions f;
int i;
Atom a;
erts_smp_rwmtx_opt_t rwmtx_opt = ERTS_SMP_RWMTX_OPT_DEFAULT_INITER;
rwmtx_opt.type = ERTS_SMP_RWMTX_TYPE_FREQUENT_READ;
rwmtx_opt.lived = ERTS_SMP_RWMTX_LONG_LIVED;
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_smp_atomic_init_nob(&atom_put_ops, 0);
#endif
erts_smp_rwmtx_init_opt(&atom_table_lock, &rwmtx_opt, "atom_tab");
f.hash = (H_FUN) atom_hash;
f.cmp = (HCMP_FUN) atom_cmp;
f.alloc = (HALLOC_FUN) atom_alloc;
f.free = (HFREE_FUN) atom_free;
atom_text_pos = NULL;
atom_text_end = NULL;
reserved_atom_space = 0;
atom_space = 0;
text_list = NULL;
erts_index_init(ERTS_ALC_T_ATOM_TABLE, &erts_atom_table,
"atom_tab", ATOM_SIZE, erts_atom_table_size, f);
more_atom_space();
/* Ordinary atoms */
for (i = 0; erl_atom_names[i] != 0; i++) {
int ix;
a.len = strlen(erl_atom_names[i]);
a.latin1_chars = a.len;
a.name = (byte*)erl_atom_names[i];
a.slot.index = i;
#ifdef DEBUG
/* Verify 7-bit ascii */
for (ix = 0; ix < a.len; ix++) {
ASSERT((a.name[ix] & 0x80) == 0);
}
#endif
ix = index_put(&erts_atom_table, (void*) &a);
atom_text_pos -= a.len;
atom_space -= a.len;
atom_tab(ix)->name = (byte*)erl_atom_names[i];
}
}
void
dump_atoms(int to, void *to_arg)
{
int i = erts_atom_table.entries;
/*
* Print out the atom table starting from the end.
*/
while (--i >= 0) {
if (erts_index_lookup(&erts_atom_table, i)) {
erts_print(to, to_arg, "%T\n", make_atom(i));
}
}
}