aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_db.c
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/beam/erl_db.c
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/beam/erl_db.c')
-rw-r--r--erts/emulator/beam/erl_db.c3631
1 files changed, 3631 insertions, 0 deletions
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c
new file mode 100644
index 0000000000..b02150008f
--- /dev/null
+++ b/erts/emulator/beam/erl_db.c
@@ -0,0 +1,3631 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 1996-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * This file contains the bif interface functions and
+ * the handling of the "meta tables" ie the tables of
+ * db tables.
+ */
+
+/*
+#ifdef DEBUG
+#define HARDDEBUG 1
+#endif
+*/
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "erl_process.h"
+#include "error.h"
+#define ERTS_WANT_DB_INTERNAL__
+#include "erl_db.h"
+#include "bif.h"
+#include "big.h"
+
+
+erts_smp_atomic_t erts_ets_misc_mem_size;
+
+/*
+** Utility macros
+*/
+
+/* Get a key from any table structure and a tagged object */
+#define TERM_GETKEY(tb, obj) db_getkey((tb)->common.keypos, (obj))
+
+
+/* How safe are we from double-hits or missed objects
+** when iterating without fixation? */
+enum DbIterSafety {
+ ITER_UNSAFE, /* Must fixate to be safe */
+ ITER_SAFE_LOCKED, /* Safe while table is locked, not between trap calls */
+ ITER_SAFE /* No need to fixate at all */
+};
+#ifdef ERTS_SMP
+# define ITERATION_SAFETY(Proc,Tab) \
+ ((IS_TREE_TABLE((Tab)->common.status) || ONLY_WRITER(Proc,Tab)) ? ITER_SAFE \
+ : (((Tab)->common.status & DB_FINE_LOCKED) ? ITER_UNSAFE : ITER_SAFE_LOCKED))
+#else
+# define ITERATION_SAFETY(Proc,Tab) \
+ ((IS_TREE_TABLE((Tab)->common.status) || ONLY_WRITER(Proc,Tab)) \
+ ? ITER_SAFE : ITER_SAFE_LOCKED)
+#endif
+
+#define DID_TRAP(P,Ret) (!is_value(Ret) && ((P)->freason == TRAP))
+
+
+/*
+** The main meta table, containing all ets tables.
+*/
+#ifdef ERTS_SMP
+# define META_MAIN_TAB_LOCK_CNT 16
+static union {
+ erts_smp_spinlock_t lck;
+ byte _cache_line_alignment[64];
+}meta_main_tab_locks[META_MAIN_TAB_LOCK_CNT];
+#endif
+static struct {
+ union {
+ DbTable *tb; /* Only directly readable if slot is ALIVE */
+ Uint next_free; /* (index<<2)|1 if slot is FREE */
+ }u;
+} *meta_main_tab;
+
+/* A slot in meta_main_tab can have three states:
+ * FREE : Free to use for new table. Part of linked free-list.
+ * ALIVE: Contains a table
+ * DEAD : Contains a table that is being removed.
+ */
+#define IS_SLOT_FREE(i) (meta_main_tab[(i)].u.next_free & 1)
+#define IS_SLOT_DEAD(i) (meta_main_tab[(i)].u.next_free & 2)
+#define IS_SLOT_ALIVE(i) (!(meta_main_tab[(i)].u.next_free & (1|2)))
+#define GET_NEXT_FREE_SLOT(i) (meta_main_tab[(i)].u.next_free >> 2)
+#define SET_NEXT_FREE_SLOT(i,next) (meta_main_tab[(i)].u.next_free = ((next)<<2)|1)
+#define MARK_SLOT_DEAD(i) (meta_main_tab[(i)].u.next_free |= 2)
+#define GET_ANY_SLOT_TAB(i) ((DbTable*)(meta_main_tab[(i)].u.next_free & ~(1|2))) /* dead or alive */
+
+static ERTS_INLINE void meta_main_tab_lock(unsigned slot)
+{
+#ifdef ERTS_SMP
+ erts_smp_spin_lock(&meta_main_tab_locks[slot % META_MAIN_TAB_LOCK_CNT].lck);
+#endif
+}
+
+static ERTS_INLINE void meta_main_tab_unlock(unsigned slot)
+{
+#ifdef ERTS_SMP
+ erts_smp_spin_unlock(&meta_main_tab_locks[slot % META_MAIN_TAB_LOCK_CNT].lck);
+#endif
+}
+
+static erts_smp_spinlock_t meta_main_tab_main_lock;
+static Uint meta_main_tab_first_free; /* Index of first free slot */
+static int meta_main_tab_cnt; /* Number of active tables */
+static Uint meta_main_tab_slot_mask; /* The slot index part of an unnamed table id */
+static Uint meta_main_tab_seq_incr;
+static Uint meta_main_tab_seq_cnt = 0; /* To give unique(-ish) table identifiers */
+
+
+
+/*
+** The meta hash table of all NAMED ets tables
+*/
+#ifdef ERTS_SMP
+# define META_NAME_TAB_LOCK_CNT 16
+union {
+ erts_smp_rwmtx_t lck;
+ byte _cache_line_alignment[64];
+}meta_name_tab_rwlocks[META_NAME_TAB_LOCK_CNT];
+#endif
+static struct meta_name_tab_entry {
+ union {
+ Eterm name_atom;
+ Eterm mcnt; /* Length of mvec in multiple tab entry */
+ }u;
+ union {
+ DbTable *tb;
+ struct meta_name_tab_entry* mvec;
+ }pu;
+} *meta_name_tab;
+
+static unsigned meta_name_tab_mask;
+
+static ERTS_INLINE
+struct meta_name_tab_entry* meta_name_tab_bucket(Eterm name,
+ erts_smp_rwmtx_t** lockp)
+{
+ unsigned bix = atom_val(name) & meta_name_tab_mask;
+ struct meta_name_tab_entry* bucket = &meta_name_tab[bix];
+#ifdef ERTS_SMP
+ *lockp = &meta_name_tab_rwlocks[bix % META_NAME_TAB_LOCK_CNT].lck;
+#endif
+ return bucket;
+}
+
+
+typedef enum {
+ LCK_READ=1, /* read only access */
+ LCK_WRITE=2, /* exclusive table write access */
+ LCK_WRITE_REC=3 /* record write access */
+} db_lock_kind_t;
+
+extern DbTableMethod db_hash;
+extern DbTableMethod db_tree;
+
+int user_requested_db_max_tabs;
+int erts_ets_realloc_always_moves;
+static int db_max_tabs;
+static DbTable *meta_pid_to_tab; /* Pid mapped to owned tables */
+static DbTable *meta_pid_to_fixed_tab; /* Pid mapped to fixed tables */
+static Eterm ms_delete_all;
+static Eterm ms_delete_all_buff[8]; /* To compare with for deletion
+ of all objects */
+
+/*
+** Forward decls, static functions
+*/
+
+static void fix_table_locked(Process* p, DbTable* tb);
+static void unfix_table_locked(Process* p, DbTable* tb, db_lock_kind_t* kind);
+static void set_heir(Process* me, DbTable* tb, Eterm heir, Eterm heir_data);
+static void free_heir_data(DbTable*);
+static void free_fixations_locked(DbTable *tb);
+
+static int free_table_cont(Process *p,
+ DbTable *tb,
+ int first,
+ int clean_meta_tab);
+static void print_table(int to, void *to_arg, int show, DbTable* tb);
+static BIF_RETTYPE ets_select_delete_1(Process *p, Eterm a1);
+static BIF_RETTYPE ets_select_count_1(Process *p, Eterm a1);
+static BIF_RETTYPE ets_select_trap_1(Process *p, Eterm a1);
+static BIF_RETTYPE ets_delete_trap(Process *p, Eterm a1);
+static Eterm table_info(Process* p, DbTable* tb, Eterm What);
+
+/*
+ * Exported global
+ */
+Export ets_select_delete_continue_exp;
+Export ets_select_count_continue_exp;
+Export ets_select_continue_exp;
+
+/*
+ * Static traps
+ */
+static Export ets_delete_continue_exp;
+
+static ERTS_INLINE DbTable* db_ref(DbTable* tb)
+{
+ if (tb != NULL) {
+ erts_refc_inc(&tb->common.ref, 2);
+ }
+ return tb;
+}
+
+static ERTS_INLINE DbTable* db_unref(DbTable* tb)
+{
+ if (!erts_refc_dectest(&tb->common.ref, 0)) {
+#ifdef HARDDEBUG
+ if (erts_smp_atomic_read(&tb->common.memory_size) != sizeof(DbTable)) {
+ erts_fprintf(stderr, "ets: db_unref memory remain=%ld fix=%x\n",
+ erts_smp_atomic_read(&tb->common.memory_size)-sizeof(DbTable),
+ tb->common.fixations);
+ }
+ erts_fprintf(stderr, "ets: db_unref(%T) deleted!!!\r\n",
+ tb->common.id);
+
+ erts_fprintf(stderr, "ets: db_unref: meta_pid_to_tab common.memory_size = %ld\n",
+ erts_smp_atomic_read(&meta_pid_to_tab->common.memory_size));
+ print_table(ERTS_PRINT_STDOUT, NULL, 1, meta_pid_to_tab);
+
+
+ erts_fprintf(stderr, "ets: db_unref: meta_pid_to_fixed_tab common.memory_size = %ld\n",
+ erts_smp_atomic_read(&meta_pid_to_fixed_tab->common.memory_size));
+ print_table(ERTS_PRINT_STDOUT, NULL, 1, meta_pid_to_fixed_tab);
+
+#endif
+#ifdef ERTS_SMP
+ erts_smp_rwmtx_destroy(&tb->common.rwlock);
+ erts_smp_mtx_destroy(&tb->common.fixlock);
+#endif
+ ASSERT(is_immed(tb->common.heir_data));
+ erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable));
+ ERTS_ETS_MISC_MEM_ADD(-sizeof(DbTable));
+ return NULL;
+ }
+ return tb;
+}
+
+static ERTS_INLINE void db_init_lock(DbTable* tb, char *rwname, char* fixname)
+{
+ erts_refc_init(&tb->common.ref, 1);
+ erts_refc_init(&tb->common.fixref, 0);
+#ifdef ERTS_SMP
+# ifdef ERTS_ENABLE_LOCK_COUNT
+ erts_smp_rwmtx_init_x(&tb->common.rwlock, rwname, tb->common.the_name);
+ erts_smp_mtx_init_x(&tb->common.fixlock, fixname, tb->common.the_name);
+# else
+ erts_smp_rwmtx_init(&tb->common.rwlock, rwname);
+ erts_smp_mtx_init(&tb->common.fixlock, fixname);
+# endif
+ tb->common.is_thread_safe = !(tb->common.status & DB_FINE_LOCKED);
+#endif
+}
+
+static ERTS_INLINE void db_lock_take_over_ref(DbTable* tb, db_lock_kind_t kind)
+{
+#ifdef ERTS_SMP
+ ASSERT(tb != meta_pid_to_tab && tb != meta_pid_to_fixed_tab);
+ if (tb->common.type & DB_FINE_LOCKED) {
+ if (kind == LCK_WRITE) {
+ erts_smp_rwmtx_rwlock(&tb->common.rwlock);
+ tb->common.is_thread_safe = 1;
+ } else {
+ erts_smp_rwmtx_rlock(&tb->common.rwlock);
+ ASSERT(!tb->common.is_thread_safe);
+ }
+ }
+ else
+ {
+ switch (kind) {
+ case LCK_WRITE:
+ case LCK_WRITE_REC:
+ erts_smp_rwmtx_rwlock(&tb->common.rwlock);
+ break;
+ default:
+ erts_smp_rwmtx_rlock(&tb->common.rwlock);
+ }
+ ASSERT(tb->common.is_thread_safe);
+ }
+#endif
+}
+
+static ERTS_INLINE void db_lock(DbTable* tb, db_lock_kind_t kind)
+{
+ (void) db_ref(tb);
+#ifdef ERTS_SMP
+ db_lock_take_over_ref(tb, kind);
+#endif
+}
+
+static ERTS_INLINE void db_unlock(DbTable* tb, db_lock_kind_t kind)
+{
+#ifdef ERTS_SMP
+ ASSERT(tb != meta_pid_to_tab && tb != meta_pid_to_fixed_tab);
+
+ if (tb->common.type & DB_FINE_LOCKED) {
+ if (tb->common.is_thread_safe) {
+ ASSERT(kind == LCK_WRITE);
+ tb->common.is_thread_safe = 0;
+ erts_smp_rwmtx_rwunlock(&tb->common.rwlock);
+ }
+ else {
+ ASSERT(kind != LCK_WRITE);
+ erts_smp_rwmtx_runlock(&tb->common.rwlock);
+ }
+ }
+ else {
+ ASSERT(tb->common.is_thread_safe);
+ switch (kind) {
+ case LCK_WRITE:
+ case LCK_WRITE_REC:
+ erts_smp_rwmtx_rwunlock(&tb->common.rwlock);
+ break;
+ default:
+ erts_smp_rwmtx_runlock(&tb->common.rwlock);
+ }
+ }
+#endif
+ (void) db_unref(tb); /* May delete table... */
+}
+
+
+static ERTS_INLINE void db_meta_lock(DbTable* tb, db_lock_kind_t kind)
+{
+ ASSERT(tb == meta_pid_to_tab || tb == meta_pid_to_fixed_tab);
+ ASSERT(kind != LCK_WRITE);
+ /* As long as we only lock for READ we don't have to lock at all. */
+}
+
+static ERTS_INLINE void db_meta_unlock(DbTable* tb, db_lock_kind_t kind)
+{
+ ASSERT(tb == meta_pid_to_tab || tb == meta_pid_to_fixed_tab);
+ ASSERT(kind != LCK_WRITE);
+}
+
+static ERTS_INLINE
+DbTable* db_get_table(Process *p,
+ Eterm id,
+ int what,
+ db_lock_kind_t kind)
+{
+ DbTable *tb = NULL;
+
+ if (is_small(id)) {
+ Uint slot = unsigned_val(id) & meta_main_tab_slot_mask;
+ meta_main_tab_lock(slot);
+ if (slot < db_max_tabs && IS_SLOT_ALIVE(slot)) {
+ /* SMP: inc to prevent race, between unlock of meta_main_tab_lock
+ * and the table locking outside the meta_main_tab_lock
+ */
+ tb = db_ref(meta_main_tab[slot].u.tb);
+ }
+ meta_main_tab_unlock(slot);
+ }
+ else if (is_atom(id)) {
+ erts_smp_rwmtx_t* rwlock;
+ struct meta_name_tab_entry* bucket = meta_name_tab_bucket(id,&rwlock);
+ erts_smp_rwmtx_rlock(rwlock);
+ if (bucket->pu.tb != NULL) {
+ if (is_atom(bucket->u.name_atom)) { /* single */
+ if (bucket->u.name_atom == id) {
+ tb = db_ref(bucket->pu.tb);
+ }
+ }
+ else { /* multi */
+ Uint cnt = unsigned_val(bucket->u.mcnt);
+ Uint i;
+ for (i=0; i<cnt; i++) {
+ if (bucket->pu.mvec[i].u.name_atom == id) {
+ tb = db_ref(bucket->pu.mvec[i].pu.tb);
+ break;
+ }
+ }
+ }
+ }
+ erts_smp_rwmtx_runlock(rwlock);
+ }
+ if (tb) {
+ db_lock_take_over_ref(tb, kind);
+ if (tb->common.id == id && ((tb->common.status & what) != 0 ||
+ p->id == tb->common.owner)) {
+ return tb;
+ }
+ db_unlock(tb, kind);
+ }
+ return NULL;
+}
+
+/* Requires meta_main_tab_locks[slot] locked.
+*/
+static ERTS_INLINE void free_slot(int slot)
+{
+ ASSERT(!IS_SLOT_FREE(slot));
+ erts_smp_spin_lock(&meta_main_tab_main_lock);
+ SET_NEXT_FREE_SLOT(slot,meta_main_tab_first_free);
+ meta_main_tab_first_free = slot;
+ meta_main_tab_cnt--;
+ erts_smp_spin_unlock(&meta_main_tab_main_lock);
+}
+
+static int insert_named_tab(Eterm name_atom, DbTable* tb)
+{
+ int ret = 0;
+ erts_smp_rwmtx_t* rwlock;
+ struct meta_name_tab_entry* new_entry;
+ struct meta_name_tab_entry* bucket = meta_name_tab_bucket(name_atom,
+ &rwlock);
+
+ erts_smp_rwmtx_rwlock(rwlock);
+
+ if (bucket->pu.tb == NULL) { /* empty */
+ new_entry = bucket;
+ }
+ else {
+ struct meta_name_tab_entry* entries;
+ Uint cnt;
+ if (is_atom(bucket->u.name_atom)) { /* single */
+ size_t size;
+ if (bucket->u.name_atom == name_atom) {
+ goto done;
+ }
+ cnt = 2;
+ size = sizeof(struct meta_name_tab_entry)*cnt;
+ entries = erts_db_alloc_nt(ERTS_ALC_T_DB_NTAB_ENT, size);
+ ERTS_ETS_MISC_MEM_ADD(size);
+ new_entry = &entries[0];
+ entries[1] = *bucket;
+ }
+ else { /* multi */
+ size_t size, old_size;
+ Uint i;
+ cnt = unsigned_val(bucket->u.mcnt);
+ for (i=0; i<cnt; i++) {
+ if (bucket->pu.mvec[i].u.name_atom == name_atom) {
+ goto done;
+ }
+ }
+ old_size = sizeof(struct meta_name_tab_entry)*cnt;
+ size = sizeof(struct meta_name_tab_entry)*(cnt+1);
+ entries = erts_db_realloc_nt(ERTS_ALC_T_DB_NTAB_ENT,
+ bucket->pu.mvec,
+ old_size,
+ size);
+ ERTS_ETS_MISC_MEM_ADD(size-old_size);
+ new_entry = &entries[cnt];
+ cnt++;
+ }
+ bucket->pu.mvec = entries;
+ bucket->u.mcnt = make_small(cnt);
+ }
+ new_entry->pu.tb = tb;
+ new_entry->u.name_atom = name_atom;
+ ret = 1; /* Ok */
+
+done:
+ erts_smp_rwmtx_rwunlock(rwlock);
+ return ret;
+}
+
+static int remove_named_tab(Eterm name_atom)
+{
+ int ret = 0;
+ erts_smp_rwmtx_t* rwlock;
+ struct meta_name_tab_entry* bucket = meta_name_tab_bucket(name_atom,
+ &rwlock);
+ erts_smp_rwmtx_rwlock(rwlock);
+ if (bucket->pu.tb == NULL) {
+ goto done;
+ }
+ else if (is_atom(bucket->u.name_atom)) { /* single */
+ if (bucket->u.name_atom != name_atom) {
+ goto done;
+ }
+ bucket->pu.tb = NULL;
+ }
+ else { /* multi */
+ Uint cnt = unsigned_val(bucket->u.mcnt);
+ Uint i = 0;
+ for (;;) {
+ if (bucket->pu.mvec[i].u.name_atom == name_atom) {
+ break;
+ }
+ if (++i >= cnt) {
+ goto done;
+ }
+ }
+ if (cnt == 2) { /* multi -> single */
+ size_t size;
+ struct meta_name_tab_entry* entries = bucket->pu.mvec;
+ *bucket = entries[1-i];
+ size = sizeof(struct meta_name_tab_entry)*cnt;
+ erts_db_free_nt(ERTS_ALC_T_DB_NTAB_ENT, entries, size);
+ ERTS_ETS_MISC_MEM_ADD(-size);
+ ASSERT(is_atom(bucket->u.name_atom));
+ }
+ else {
+ size_t size, old_size;
+ ASSERT(cnt > 2);
+ bucket->u.mcnt = make_small(--cnt);
+ if (i != cnt) {
+ /* reposition last one before realloc destroys it */
+ bucket->pu.mvec[i] = bucket->pu.mvec[cnt];
+ }
+ old_size = sizeof(struct meta_name_tab_entry)*(cnt+1);
+ size = sizeof(struct meta_name_tab_entry)*cnt;
+ bucket->pu.mvec = erts_db_realloc_nt(ERTS_ALC_T_DB_NTAB_ENT,
+ bucket->pu.mvec,
+ old_size,
+ size);
+ ERTS_ETS_MISC_MEM_ADD(size - old_size);
+
+ }
+ }
+ ret = 1; /* Ok */
+
+done:
+ erts_smp_rwmtx_rwunlock(rwlock);
+ return ret;
+}
+
+/* Do a fast fixation of a hash table.
+** Must be matched by a local unfix before releasing table lock.
+*/
+static ERTS_INLINE void local_fix_table(DbTable* tb)
+{
+ erts_refc_inc(&tb->common.fixref, 1);
+}
+static ERTS_INLINE void local_unfix_table(DbTable* tb)
+{
+ if (erts_refc_dectest(&tb->common.fixref, 0) == 0) {
+ ASSERT(IS_HASH_TABLE(tb->common.status));
+ db_unfix_table_hash(&(tb->hash));
+ }
+}
+
+
+/*
+ * BIFs.
+ */
+
+BIF_RETTYPE ets_safe_fixtable_2(BIF_ALIST_2)
+{
+ DbTable *tb;
+ db_lock_kind_t kind;
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,
+ "ets:safe_fixtable(%T,%T); Process: %T, initial: %T:%T/%bpu\n",
+ BIF_ARG_1, BIF_ARG_2, BIF_P->id,
+ BIF_P->initial[0], BIF_P->initial[1], BIF_P->initial[2]);
+#endif
+ kind = (BIF_ARG_2 == am_true) ? LCK_READ : LCK_WRITE_REC;
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, kind)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ if (BIF_ARG_2 == am_true) {
+ fix_table_locked(BIF_P, tb);
+ }
+ else if (BIF_ARG_2 == am_false) {
+ if (IS_FIXED(tb)) {
+ unfix_table_locked(BIF_P, tb, &kind);
+ }
+ }
+ else {
+ db_unlock(tb, kind);
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ db_unlock(tb, kind);
+ BIF_RET(am_true);
+}
+
+
+/*
+** Returns the first Key in a table
+*/
+BIF_RETTYPE ets_first_1(BIF_ALIST_1)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ);
+
+ if (!tb) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_first(BIF_P, tb, &ret);
+
+ db_unlock(tb, LCK_READ);
+
+ if (cret != DB_ERROR_NONE) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ BIF_RET(ret);
+}
+
+/*
+** The next BIF, given a key, return the "next" key
+*/
+BIF_RETTYPE ets_next_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ);
+
+ if (!tb) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_next(BIF_P, tb, BIF_ARG_2, &ret);
+
+ db_unlock(tb, LCK_READ);
+
+ if (cret != DB_ERROR_NONE) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ BIF_RET(ret);
+}
+
+/*
+** Returns the last Key in a table
+*/
+BIF_RETTYPE ets_last_1(BIF_ALIST_1)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ);
+
+ if (!tb) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_last(BIF_P, tb, &ret);
+
+ db_unlock(tb, LCK_READ);
+
+ if (cret != DB_ERROR_NONE) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ BIF_RET(ret);
+}
+
+/*
+** The prev BIF, given a key, return the "previous" key
+*/
+BIF_RETTYPE ets_prev_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ);
+
+ if (!tb) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_prev(BIF_P,tb,BIF_ARG_2,&ret);
+
+ db_unlock(tb, LCK_READ);
+
+ if (cret != DB_ERROR_NONE) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ BIF_RET(ret);
+}
+
+/*
+** update_element(Tab, Key, {Pos, Value})
+** update_element(Tab, Key, [{Pos, Value}])
+*/
+BIF_RETTYPE ets_update_element_3(BIF_ALIST_3)
+{
+ DbTable* tb;
+ int cret = DB_ERROR_BADITEM;
+ Eterm list;
+ Eterm iter;
+ Eterm cell[2];
+ DbUpdateHandle handle;
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) {
+ goto bail_out;
+ }
+ if (is_tuple(BIF_ARG_3)) {
+ list = CONS(cell, BIF_ARG_3, NIL);
+ }
+ else {
+ list = BIF_ARG_3;
+ }
+
+ if (!tb->common.meth->db_lookup_dbterm(tb, BIF_ARG_2, &handle)) {
+ cret = DB_ERROR_BADKEY;
+ goto bail_out;
+ }
+
+ /* First verify that list is ok to avoid nasty rollback scenarios
+ */
+ for (iter=list ; is_not_nil(iter); iter = CDR(list_val(iter))) {
+ Eterm pv;
+ Eterm* pvp;
+ Sint position;
+
+ if (is_not_list(iter)) {
+ goto finalize;
+ }
+ pv = CAR(list_val(iter)); /* {Pos,Value} */
+ if (is_not_tuple(pv)) {
+ goto finalize;
+ }
+ pvp = tuple_val(pv);
+ if (arityval(*pvp) != 2 || !is_small(pvp[1])) {
+ goto finalize;
+ }
+ position = signed_val(pvp[1]);
+ if (position < 1 || position == tb->common.keypos ||
+ position > arityval(handle.dbterm->tpl[0])) {
+ goto finalize;
+ }
+ }
+ /* The point of no return, no failures from here on.
+ */
+ cret = DB_ERROR_NONE;
+
+ for (iter=list ; is_not_nil(iter); iter = CDR(list_val(iter))) {
+ Eterm* pvp = tuple_val(CAR(list_val(iter))); /* {Pos,Value} */
+ db_do_update_element(&handle, signed_val(pvp[1]), pvp[2]);
+ }
+
+finalize:
+ tb->common.meth->db_finalize_dbterm(&handle);
+
+bail_out:
+ db_unlock(tb, LCK_WRITE_REC);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(am_true);
+ case DB_ERROR_BADKEY:
+ BIF_RET(am_false);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ break;
+ }
+}
+
+/*
+** update_counter(Tab, Key, Incr)
+** update_counter(Tab, Key, {Upop})
+** update_counter(Tab, Key, [{Upop}])
+** Upop = {Pos,Incr} | {Pos,Incr,Threshold,WarpTo}
+** Returns new value(s) (integer or [integer])
+*/
+BIF_RETTYPE ets_update_counter_3(BIF_ALIST_3)
+{
+ DbTable* tb;
+ int cret = DB_ERROR_BADITEM;
+ Eterm upop_list;
+ int list_size;
+ Eterm ret; /* int or [int] */
+ Eterm* ret_list_currp = NULL;
+ Eterm* ret_list_prevp = NULL;
+ Eterm iter;
+ Eterm cell[2];
+ Eterm tuple[3];
+ DbUpdateHandle handle;
+ Uint halloc_size = 0; /* overestimated heap usage */
+ Eterm* htop; /* actual heap usage */
+ Eterm* hstart;
+ Eterm* hend;
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) {
+ goto bail_out;
+ }
+ if (is_integer(BIF_ARG_3)) { /* Incr */
+ upop_list = CONS(cell, TUPLE2(tuple, make_small(tb->common.keypos+1),
+ BIF_ARG_3), NIL);
+ }
+ else if (is_tuple(BIF_ARG_3)) { /* {Upop} */
+ upop_list = CONS(cell, BIF_ARG_3, NIL);
+ }
+ else { /* [{Upop}] (probably) */
+ upop_list = BIF_ARG_3;
+ ret_list_prevp = &ret;
+ }
+
+ if (!tb->common.meth->db_lookup_dbterm(tb, BIF_ARG_2, &handle)) {
+ goto bail_out; /* key not found */
+ }
+
+ /* First verify that list is ok to avoid nasty rollback scenarios
+ */
+ list_size = 0;
+ for (iter=upop_list ; is_not_nil(iter); iter = CDR(list_val(iter)),
+ list_size += 2) {
+ Eterm upop;
+ Eterm* tpl;
+ Sint position;
+ Eterm incr, warp, oldcnt;
+
+ if (is_not_list(iter)) {
+ goto finalize;
+ }
+ upop = CAR(list_val(iter));
+ if (is_not_tuple(upop)) {
+ goto finalize;
+ }
+ tpl = tuple_val(upop);
+ switch (arityval(*tpl)) {
+ case 4: /* threshold specified */
+ if (is_not_integer(tpl[3])) {
+ goto finalize;
+ }
+ warp = tpl[4];
+ if (is_big(warp)) {
+ halloc_size += BIG_NEED_SIZE(big_arity(warp));
+ }
+ else if (is_not_small(warp)) {
+ goto finalize;
+ }
+ /* Fall through */
+ case 2:
+ if (!is_small(tpl[1])) {
+ goto finalize;
+ }
+ incr = tpl[2];
+ if (is_big(incr)) {
+ halloc_size += BIG_NEED_SIZE(big_arity(incr));
+ }
+ else if (is_not_small(incr)) {
+ goto finalize;
+ }
+ position = signed_val(tpl[1]);
+ if (position < 1 || position == tb->common.keypos ||
+ position > arityval(handle.dbterm->tpl[0])) {
+ goto finalize;
+ }
+ oldcnt = handle.dbterm->tpl[position];
+ if (is_big(oldcnt)) {
+ halloc_size += BIG_NEED_SIZE(big_arity(oldcnt));
+ }
+ else if (is_not_small(oldcnt)) {
+ goto finalize;
+ }
+ break;
+ default:
+ goto finalize;
+ }
+ halloc_size += 2; /* worst growth case: small(0)+small(0)=big(2) */
+ }
+
+ /* The point of no return, no failures from here on.
+ */
+ cret = DB_ERROR_NONE;
+
+ if (ret_list_prevp) { /* Prepare to return a list */
+ ret = NIL;
+ halloc_size += list_size;
+ hstart = HAlloc(BIF_P, halloc_size);
+ ret_list_currp = hstart;
+ htop = hstart + list_size;
+ hend = hstart + halloc_size;
+ }
+ else {
+ hstart = htop = HAlloc(BIF_P, halloc_size);
+ }
+ hend = hstart + halloc_size;
+
+ for (iter=upop_list ; is_not_nil(iter); iter = CDR(list_val(iter))) {
+
+ Eterm* tpl = tuple_val(CAR(list_val(iter)));
+ Sint position = signed_val(tpl[1]);
+ Eterm incr = tpl[2];
+ Eterm oldcnt = handle.dbterm->tpl[position];
+ Eterm newcnt = db_add_counter(&htop, oldcnt, incr);
+
+ if (newcnt == NIL) {
+ cret = DB_ERROR_SYSRES; /* Can only happen if BIG_ARITY_MAX */
+ ret = NIL; /* is reached, ie should not happen */
+ htop = hstart;
+ break;
+ }
+ ASSERT(is_integer(newcnt));
+
+ if (arityval(*tpl) == 4) { /* Maybe warp it */
+ Eterm threshold = tpl[3];
+ if ((cmp(incr,make_small(0)) < 0) ? /* negative increment? */
+ (cmp(newcnt,threshold) < 0) : /* if negative, check if below */
+ (cmp(newcnt,threshold) > 0)) { /* else check if above threshold */
+
+ newcnt = tpl[4];
+ }
+ }
+
+ db_do_update_element(&handle,position,newcnt);
+
+ if (ret_list_prevp) {
+ *ret_list_prevp = CONS(ret_list_currp,newcnt,NIL);
+ ret_list_prevp = &CDR(ret_list_currp);
+ ret_list_currp += 2;
+ }
+ else {
+ ret = newcnt;
+ break;
+ }
+ }
+
+ ASSERT(is_integer(ret) || is_nil(ret) ||
+ (is_list(ret) && (list_val(ret)+list_size)==ret_list_currp));
+ ASSERT(htop <= hend);
+
+ HRelease(BIF_P,hend,htop);
+
+finalize:
+ tb->common.meth->db_finalize_dbterm(&handle);
+
+bail_out:
+ db_unlock(tb, LCK_WRITE_REC);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ break;
+ }
+}
+
+/*
+** The put BIF
+*/
+BIF_RETTYPE ets_insert_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret = DB_ERROR_NONE;
+ Eterm lst;
+ DbTableMethod* meth;
+ db_lock_kind_t kind;
+
+ CHECK_TABLES();
+
+ /* Write lock table if more than one object to keep atomicy */
+ kind = ((is_list(BIF_ARG_2) && CDR(list_val(BIF_ARG_2)) != NIL)
+ ? LCK_WRITE : LCK_WRITE_REC);
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (BIF_ARG_2 == NIL) {
+ db_unlock(tb, kind);
+ BIF_RET(am_true);
+ }
+ meth = tb->common.meth;
+ if (is_list(BIF_ARG_2)) {
+ for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) {
+ if (is_not_tuple(CAR(list_val(lst))) ||
+ (arityval(*tuple_val(CAR(list_val(lst)))) < tb->common.keypos)) {
+ goto badarg;
+ }
+ }
+ if (lst != NIL) {
+ goto badarg;
+ }
+ for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) {
+ cret = meth->db_put(tb, CAR(list_val(lst)), 0);
+ if (cret != DB_ERROR_NONE)
+ break;
+ }
+ } else {
+ if (is_not_tuple(BIF_ARG_2) ||
+ (arityval(*tuple_val(BIF_ARG_2)) < tb->common.keypos)) {
+ goto badarg;
+ }
+ cret = meth->db_put(tb, BIF_ARG_2, 0);
+ }
+
+ db_unlock(tb, kind);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(am_true);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ badarg:
+ db_unlock(tb, kind);
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+
+/*
+** The put-if-not-already-there BIF...
+*/
+BIF_RETTYPE ets_insert_new_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret = DB_ERROR_NONE;
+ Eterm ret = am_true;
+ Eterm obj;
+ db_lock_kind_t kind;
+
+ CHECK_TABLES();
+
+ if (is_list(BIF_ARG_2)) {
+ if (CDR(list_val(BIF_ARG_2)) != NIL) {
+ Eterm lst;
+ Eterm lookup_ret;
+ DbTableMethod* meth;
+
+ /* More than one object, use LCK_WRITE to keep atomicy */
+ kind = LCK_WRITE;
+ tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind);
+ if (tb == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ meth = tb->common.meth;
+ for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) {
+ if (is_not_tuple(CAR(list_val(lst)))
+ || (arityval(*tuple_val(CAR(list_val(lst))))
+ < tb->common.keypos)) {
+ goto badarg;
+ }
+ }
+ if (lst != NIL) {
+ goto badarg;
+ }
+ for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) {
+ cret = meth->db_member(tb, TERM_GETKEY(tb,CAR(list_val(lst))),
+ &lookup_ret);
+ if ((cret != DB_ERROR_NONE) || (lookup_ret != am_false)) {
+ ret = am_false;
+ goto done;
+ }
+ }
+
+ for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) {
+ cret = meth->db_put(tb,CAR(list_val(lst)), 0);
+ if (cret != DB_ERROR_NONE)
+ break;
+ }
+ goto done;
+ }
+ obj = CAR(list_val(BIF_ARG_2));
+ }
+ else {
+ obj = BIF_ARG_2;
+ }
+ /* Only one object (or NIL)
+ */
+ kind = LCK_WRITE_REC;
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (BIF_ARG_2 == NIL) {
+ db_unlock(tb, kind);
+ BIF_RET(am_true);
+ }
+ if (is_not_tuple(obj)
+ || (arityval(*tuple_val(obj)) < tb->common.keypos)) {
+ goto badarg;
+ }
+ cret = tb->common.meth->db_put(tb, obj,
+ 1); /* key_clash_fail */
+
+done:
+ db_unlock(tb, kind);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_BADKEY:
+ BIF_RET(am_false);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ badarg:
+ db_unlock(tb, kind);
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+/*
+** Rename a (possibly) named table
+*/
+
+BIF_RETTYPE ets_rename_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ Eterm ret;
+
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,
+ "ets:rename(%T,%T); Process: %T, initial: %T:%T/%bpu\n",
+ BIF_ARG_1, BIF_ARG_2, BIF_P->id,
+ BIF_P->initial[0], BIF_P->initial[1], BIF_P->initial[2]);
+#endif
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ if (is_not_atom(BIF_ARG_2)) {
+ goto badarg;
+ }
+
+ if (is_not_atom(tb->common.id)) { /* Not a named table */
+ tb->common.the_name = BIF_ARG_2;
+ goto done;
+ }
+
+ if (!insert_named_tab(BIF_ARG_2,tb)) {
+ goto badarg;
+ }
+ if (!remove_named_tab(tb->common.id)) {
+ erl_exit(1,"Could not find named tab %s", tb->common.id);
+ }
+
+ tb->common.id = tb->common.the_name = BIF_ARG_2;
+
+ done:
+ ret = tb->common.id;
+ db_unlock(tb, LCK_WRITE);
+ BIF_RET(ret);
+ badarg:
+ db_unlock(tb, LCK_WRITE);
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+
+/*
+** The create table BIF
+** Args: (Name, Properties)
+*/
+
+BIF_RETTYPE ets_new_2(BIF_ALIST_2)
+{
+ DbTable* tb = NULL;
+ int slot;
+ Eterm list;
+ Eterm val;
+ Eterm ret;
+ Eterm heir;
+ Eterm heir_data;
+ Uint32 status;
+ Sint keypos;
+ int is_named, is_fine_locked;
+ int cret;
+ Eterm meta_tuple[3];
+ DbTableMethod* meth;
+
+ if (is_not_atom(BIF_ARG_1)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (is_not_nil(BIF_ARG_2) && is_not_list(BIF_ARG_2)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ status = DB_NORMAL | DB_SET | DB_PROTECTED;
+ keypos = 1;
+ is_named = 0;
+ is_fine_locked = 0;
+ heir = am_none;
+ heir_data = am_undefined;
+
+ list = BIF_ARG_2;
+ while(is_list(list)) {
+ val = CAR(list_val(list));
+ if (val == am_bag) {
+ status |= DB_BAG;
+ status &= ~(DB_SET | DB_DUPLICATE_BAG | DB_ORDERED_SET);
+ }
+ else if (val == am_duplicate_bag) {
+ status |= DB_DUPLICATE_BAG;
+ status &= ~(DB_SET | DB_BAG | DB_ORDERED_SET);
+ }
+ else if (val == am_ordered_set) {
+ status |= DB_ORDERED_SET;
+ status &= ~(DB_SET | DB_BAG | DB_DUPLICATE_BAG);
+ }
+ /*TT*/
+ else if (is_tuple(val)) {
+ Eterm *tp = tuple_val(val);
+ if (arityval(tp[0]) == 2) {
+ if (tp[1] == am_keypos
+ && is_small(tp[2]) && (signed_val(tp[2]) > 0)) {
+ keypos = signed_val(tp[2]);
+ }
+ else if (tp[1] == am_write_concurrency) {
+ if (tp[2] == am_true) {
+ is_fine_locked = 1;
+ } else if (tp[2] == am_false) {
+ is_fine_locked = 0;
+ } else break;
+ }
+ else if (tp[1] == am_heir && tp[2] == am_none) {
+ heir = am_none;
+ heir_data = am_undefined;
+ }
+ else break;
+ }
+ else if (arityval(tp[0]) == 3 && tp[1] == am_heir
+ && is_internal_pid(tp[2])) {
+ heir = tp[2];
+ heir_data = tp[3];
+ }
+ else break;
+ }
+ else if (val == am_public) {
+ status |= DB_PUBLIC;
+ status &= ~(DB_PROTECTED|DB_PRIVATE);
+ }
+ else if (val == am_private) {
+ status |= DB_PRIVATE;
+ status &= ~(DB_PROTECTED|DB_PUBLIC);
+ }
+ else if (val == am_named_table) {
+ is_named = 1;
+ }
+ else if (val == am_set || val == am_protected)
+ ;
+ else break;
+
+ list = CDR(list_val(list));
+ }
+ if (is_not_nil(list)) { /* bad opt or not a well formed list */
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (IS_HASH_TABLE(status)) {
+ meth = &db_hash;
+ #ifdef ERTS_SMP
+ if (is_fine_locked && !(status & DB_PRIVATE)) {
+ status |= DB_FINE_LOCKED;
+ }
+ #endif
+ }
+ else if (IS_TREE_TABLE(status)) {
+ meth = &db_tree;
+ }
+ else {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ /* we create table outside any table lock
+ * and take the unusal cost of destroy table if it
+ * fails to find a slot
+ */
+ {
+ DbTable init_tb;
+
+ erts_smp_atomic_init(&init_tb.common.memory_size, 0);
+ tb = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE,
+ &init_tb, sizeof(DbTable));
+ ERTS_ETS_MISC_MEM_ADD(sizeof(DbTable));
+ erts_smp_atomic_init(&tb->common.memory_size,
+ erts_smp_atomic_read(&init_tb.common.memory_size));
+ }
+
+ tb->common.meth = meth;
+ tb->common.the_name = BIF_ARG_1;
+ tb->common.status = status;
+#ifdef ERTS_SMP
+ tb->common.type = status & ERTS_ETS_TABLE_TYPES;
+ /* Note, 'type' is *read only* from now on... */
+#endif
+ db_init_lock(tb, "db_tab", "db_tab_fix");
+ tb->common.keypos = keypos;
+ tb->common.owner = BIF_P->id;
+ set_heir(BIF_P, tb, heir, heir_data);
+
+ erts_smp_atomic_init(&tb->common.nitems, 0);
+
+ tb->common.fixations = NULL;
+
+ cret = meth->db_create(BIF_P, tb);
+ ASSERT(cret == DB_ERROR_NONE);
+
+ erts_smp_spin_lock(&meta_main_tab_main_lock);
+
+ if (meta_main_tab_cnt >= db_max_tabs) {
+ erts_smp_spin_unlock(&meta_main_tab_main_lock);
+ erts_send_error_to_logger_str(BIF_P->group_leader,
+ "** Too many db tables **\n");
+ free_heir_data(tb);
+ tb->common.meth->db_free_table(tb);
+ erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable));
+ ERTS_ETS_MISC_MEM_ADD(-sizeof(DbTable));
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ }
+
+ slot = meta_main_tab_first_free;
+ ASSERT(slot>=0 && slot<db_max_tabs);
+ meta_main_tab_first_free = GET_NEXT_FREE_SLOT(slot);
+ meta_main_tab_cnt++;
+
+ if (is_named) {
+ ret = BIF_ARG_1;
+ }
+ else {
+ ret = make_small(slot | meta_main_tab_seq_cnt);
+ meta_main_tab_seq_cnt += meta_main_tab_seq_incr;
+ ASSERT((unsigned_val(ret) & meta_main_tab_slot_mask) == slot);
+ }
+ erts_smp_spin_unlock(&meta_main_tab_main_lock);
+
+ tb->common.id = ret;
+ tb->common.slot = slot; /* store slot for erase */
+
+ meta_main_tab_lock(slot);
+ meta_main_tab[slot].u.tb = tb;
+ ASSERT(IS_SLOT_ALIVE(slot));
+ meta_main_tab_unlock(slot);
+
+ if (is_named && !insert_named_tab(BIF_ARG_1, tb)) {
+ meta_main_tab_lock(slot);
+ free_slot(slot);
+ meta_main_tab_unlock(slot);
+
+ db_lock_take_over_ref(tb,LCK_WRITE);
+ free_heir_data(tb);
+ tb->common.meth->db_free_table(tb);
+ db_unlock(tb,LCK_WRITE);
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ BIF_P->flags |= F_USING_DB; /* So we can remove tb if p dies */
+
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,
+ "ets:new(%T,%T)=%T; Process: %T, initial: %T:%T/%bpu\n",
+ BIF_ARG_1, BIF_ARG_2, ret, BIF_P->id,
+ BIF_P->initial[0], BIF_P->initial[1], BIF_P->initial[2]);
+ erts_fprintf(stderr, "ets: new: meta_pid_to_tab common.memory_size = %ld\n",
+ erts_smp_atomic_read(&meta_pid_to_tab->common.memory_size));
+ erts_fprintf(stderr, "ets: new: meta_pid_to_fixed_tab common.memory_size = %ld\n",
+ erts_smp_atomic_read(&meta_pid_to_fixed_tab->common.memory_size));
+#endif
+
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ if (db_put_hash(meta_pid_to_tab,
+ TUPLE2(meta_tuple, BIF_P->id, make_small(slot)),
+ 0) != DB_ERROR_NONE) {
+ erl_exit(1,"Could not update ets metadata.");
+ }
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+
+ BIF_RET(ret);
+}
+
+/*
+** The lookup BIF
+*/
+BIF_RETTYPE ets_lookup_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_get(BIF_P, tb, BIF_ARG_2, &ret);
+
+ db_unlock(tb, LCK_READ);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+}
+
+/*
+** The lookup BIF
+*/
+BIF_RETTYPE ets_member_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_member(tb, BIF_ARG_2, &ret);
+
+ db_unlock(tb, LCK_READ);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+}
+
+/*
+** Get an element from a term
+** get_element_3(Tab, Key, Index)
+** return the element or a list of elements if bag
+*/
+BIF_RETTYPE ets_lookup_element_3(BIF_ALIST_3)
+{
+ DbTable* tb;
+ Sint index;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ if (is_not_small(BIF_ARG_3) || ((index = signed_val(BIF_ARG_3)) < 1)) {
+ db_unlock(tb, LCK_READ);
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_get_element(BIF_P, tb,
+ BIF_ARG_2, index, &ret);
+ db_unlock(tb, LCK_READ);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+}
+
+/*
+ * BIF to erase a whole table and release all memory it holds
+ */
+BIF_RETTYPE ets_delete_1(BIF_ALIST_1)
+{
+ int trap;
+ DbTable* tb;
+
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,
+ "ets:delete(%T); Process: %T, initial: %T:%T/%bpu\n",
+ BIF_ARG_1, BIF_P->id,
+ BIF_P->initial[0], BIF_P->initial[1], BIF_P->initial[2]);
+#endif
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ /*
+ * Clear all access bits to prevent any ets operation to access the
+ * table while it is being deleted.
+ */
+ tb->common.status &= ~(DB_PROTECTED|DB_PUBLIC|DB_PRIVATE);
+ tb->common.status |= DB_DELETE;
+
+ meta_main_tab_lock(tb->common.slot);
+ /* We must keep the slot, to be found by db_proc_dead() if process dies */
+ MARK_SLOT_DEAD(tb->common.slot);
+ meta_main_tab_unlock(tb->common.slot);
+ if (is_atom(tb->common.id)) {
+ remove_named_tab(tb->common.id);
+ }
+
+ if (tb->common.owner != BIF_P->id) {
+ Eterm meta_tuple[3];
+
+ /*
+ * The table is being deleted by a process other than its owner.
+ * To make sure that the table will be completely deleted if the
+ * current process will be killed (e.g. by an EXIT signal), we will
+ * now transfer the ownership to the current process.
+ */
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner,
+ make_small(tb->common.slot));
+
+ BIF_P->flags |= F_USING_DB;
+ tb->common.owner = BIF_P->id;
+
+ db_put_hash(meta_pid_to_tab,
+ TUPLE2(meta_tuple,BIF_P->id,make_small(tb->common.slot)),
+ 0);
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+ }
+ /* disable inheritance */
+ free_heir_data(tb);
+ tb->common.heir = am_none;
+
+ free_fixations_locked(tb);
+
+ trap = free_table_cont(BIF_P, tb, 1, 1);
+ db_unlock(tb, LCK_WRITE);
+ if (trap) {
+ /*
+ * Package the DbTable* pointer into a bignum so that it can be safely
+ * passed through a trap. We used to pass the DbTable* pointer directly
+ * (it looks like an continuation pointer), but that is will crash the
+ * emulator if this BIF is call traced.
+ */
+ Eterm *hp = HAlloc(BIF_P, 2);
+ hp[0] = make_pos_bignum_header(1);
+ hp[1] = (Eterm) tb;
+ BIF_TRAP1(&ets_delete_continue_exp, BIF_P, make_big(hp));
+ }
+ else {
+ BIF_RET(am_true);
+ }
+}
+
+/*
+** BIF ets:give_away(Tab, Pid, GiftData)
+*/
+BIF_RETTYPE ets_give_away_3(BIF_ALIST_3)
+{
+ Process* to_proc = NULL;
+ ErtsProcLocks to_locks = ERTS_PROC_LOCK_MAIN;
+ Eterm buf[5];
+ Eterm to_pid = BIF_ARG_2;
+ Eterm from_pid;
+ DbTable* tb = NULL;
+
+ if (!is_internal_pid(to_pid)) {
+ goto badarg;
+ }
+ to_proc = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN, to_pid, to_locks);
+ if (to_proc == NULL) {
+ goto badarg;
+ }
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL
+ || tb->common.owner != BIF_P->id) {
+ goto badarg;
+ }
+ from_pid = tb->common.owner;
+ if (to_pid == from_pid) {
+ goto badarg; /* or should we be idempotent? return false maybe */
+ }
+
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner,
+ make_small(tb->common.slot));
+
+ to_proc->flags |= F_USING_DB;
+ tb->common.owner = to_pid;
+
+ db_put_hash(meta_pid_to_tab,
+ TUPLE2(buf,to_pid,make_small(tb->common.slot)),
+ 0);
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+
+ db_unlock(tb,LCK_WRITE);
+ erts_send_message(BIF_P, to_proc, &to_locks,
+ TUPLE4(buf, am_ETS_TRANSFER, tb->common.id, from_pid, BIF_ARG_3),
+ 0);
+ erts_smp_proc_unlock(to_proc, to_locks);
+ BIF_RET(am_true);
+
+badarg:
+ if (to_proc != NULL && to_proc != BIF_P) erts_smp_proc_unlock(to_proc, to_locks);
+ if (tb != NULL) db_unlock(tb, LCK_WRITE);
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ets_setopts_2(BIF_ALIST_2)
+{
+ DbTable* tb = NULL;
+ Eterm* tp;
+ Eterm opt;
+ Eterm heir = THE_NON_VALUE;
+ Eterm heir_data = THE_NON_VALUE;
+ Uint32 protection = 0;
+ Eterm fakelist[2];
+ Eterm tail;
+
+ for (tail = is_tuple(BIF_ARG_2) ? CONS(fakelist, BIF_ARG_2, NIL) : BIF_ARG_2;
+ is_list(tail);
+ tail = CDR(list_val(tail))) {
+
+ opt = CAR(list_val(tail));
+ if (!is_tuple(opt) || (tp = tuple_val(opt), arityval(tp[0]) < 2)) {
+ goto badarg;
+ }
+
+ switch (tp[1]) {
+ case am_heir:
+ if (heir != THE_NON_VALUE) goto badarg;
+ heir = tp[2];
+ if (arityval(tp[0]) == 2 && heir == am_none) {
+ heir_data = am_undefined;
+ }
+ else if (arityval(tp[0]) == 3 && is_internal_pid(heir)) {
+ heir_data = tp[3];
+ }
+ else goto badarg;
+ break;
+
+ case am_protection:
+ if (arityval(tp[0]) != 2 || protection != 0) goto badarg;
+ switch (tp[2]) {
+ case am_private: protection = DB_PRIVATE; break;
+ case am_protected: protection = DB_PROTECTED; break;
+ case am_public: protection = DB_PUBLIC; break;
+ default: goto badarg;
+ }
+ break;
+
+ default: goto badarg;
+ }
+ }
+
+ if (tail != NIL
+ || (tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL
+ || tb->common.owner != BIF_P->id) {
+ goto badarg;
+ }
+
+ if (heir_data != THE_NON_VALUE) {
+ free_heir_data(tb);
+ set_heir(BIF_P, tb, heir, heir_data);
+ }
+ if (protection) {
+ tb->common.status &= ~(DB_PRIVATE|DB_PROTECTED|DB_PUBLIC);
+ tb->common.status |= protection;
+ }
+
+ db_unlock (tb,LCK_WRITE);
+ BIF_RET(am_true);
+
+badarg:
+ if (tb != NULL) {
+ db_unlock(tb,LCK_WRITE);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+}
+
+/*
+** BIF to erase a whole table and release all memory it holds
+*/
+BIF_RETTYPE ets_delete_all_objects_1(BIF_ALIST_1)
+{
+ DbTable* tb;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ tb->common.meth->db_delete_all_objects(BIF_P, tb);
+
+ db_unlock(tb, LCK_WRITE);
+
+ BIF_RET(am_true);
+}
+
+/*
+** Erase an object with given key, or maybe several objects if we have a bag
+** Called as db_erase(Tab, Key), where Key is element 1 of the
+** object(s) we want to erase
+*/
+BIF_RETTYPE ets_delete_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_erase(tb,BIF_ARG_2,&ret);
+
+ db_unlock(tb, LCK_WRITE_REC);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+}
+
+/*
+** Erase a specific object, or maybe several objects if we have a bag
+*/
+BIF_RETTYPE ets_delete_object_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if (is_not_tuple(BIF_ARG_2) ||
+ (arityval(*tuple_val(BIF_ARG_2)) < tb->common.keypos)) {
+ db_unlock(tb, LCK_WRITE_REC);
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ cret = tb->common.meth->db_erase_object(tb, BIF_ARG_2, &ret);
+ db_unlock(tb, LCK_WRITE_REC);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+}
+
+/*
+** This is for trapping, cannot be called directly.
+*/
+static BIF_RETTYPE ets_select_delete_1(Process *p, Eterm a1)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ Eterm *tptr;
+ db_lock_kind_t kind = LCK_WRITE_REC;
+
+ CHECK_TABLES();
+ ASSERT(is_tuple(a1));
+ tptr = tuple_val(a1);
+ ASSERT(arityval(*tptr) >= 1);
+
+ if ((tb = db_get_table(p, tptr[1], DB_WRITE, kind)) == NULL) {
+ BIF_ERROR(p,BADARG);
+ }
+
+ cret = tb->common.meth->db_select_delete_continue(p,tb,a1,&ret);
+
+ if(!DID_TRAP(p,ret) && ITERATION_SAFETY(p,tb) != ITER_SAFE) {
+ unfix_table_locked(p, tb, &kind);
+ }
+
+ db_unlock(tb, kind);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, p, BADARG);
+ break;
+ }
+ erts_match_set_release_result(p);
+
+ return result;
+}
+
+
+BIF_RETTYPE ets_select_delete_2(BIF_ALIST_2)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ enum DbIterSafety safety;
+
+ CHECK_TABLES();
+
+ if(eq(BIF_ARG_2, ms_delete_all)) {
+ int nitems;
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ nitems = erts_smp_atomic_read(&tb->common.nitems);
+ tb->common.meth->db_delete_all_objects(BIF_P, tb);
+ db_unlock(tb, LCK_WRITE);
+ BIF_RET(erts_make_integer(nitems,BIF_P));
+ }
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+ cret = tb->common.meth->db_select_delete(BIF_P, tb, BIF_ARG_2, &ret);
+
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P,tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_WRITE_REC);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(BIF_P);
+
+ return result;
+}
+
+/*
+** Return a list of tables on this node
+*/
+BIF_RETTYPE ets_all_0(BIF_ALIST_0)
+{
+ DbTable* tb;
+ Eterm previous;
+ int i, j;
+ Eterm* hp;
+ Eterm* hendp;
+ int t_tabs_cnt;
+ int t_max_tabs;
+
+ erts_smp_spin_lock(&meta_main_tab_main_lock);
+ t_tabs_cnt = meta_main_tab_cnt;
+ t_max_tabs = db_max_tabs;
+ erts_smp_spin_unlock(&meta_main_tab_main_lock);
+
+ hp = HAlloc(BIF_P, 2*t_tabs_cnt);
+ hendp = hp + 2*t_tabs_cnt;
+
+ previous = NIL;
+ j = 0;
+ for(i = 0; (i < t_max_tabs && j < t_tabs_cnt); i++) {
+ meta_main_tab_lock(i);
+ if (IS_SLOT_ALIVE(i)) {
+ j++;
+ tb = meta_main_tab[i].u.tb;
+ previous = CONS(hp, tb->common.id, previous);
+ hp += 2;
+ }
+ meta_main_tab_unlock(i);
+ }
+ HRelease(BIF_P, hendp, hp);
+ BIF_RET(previous);
+}
+
+
+/*
+** db_slot(Db, Slot) -> [Items].
+*/
+BIF_RETTYPE ets_slot_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ /* The slot number is checked in table specific code. */
+ cret = tb->common.meth->db_slot(BIF_P, tb, BIF_ARG_2, &ret);
+ db_unlock(tb, LCK_READ);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ BIF_RET(ret);
+ case DB_ERROR_SYSRES:
+ BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+ default:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+}
+
+/*
+** The match BIF, called as ets:match(Table, Pattern), ets:match(Continuation) or ets:match(Table,Pattern,ChunkSize).
+*/
+
+BIF_RETTYPE ets_match_1(BIF_ALIST_1)
+{
+ return ets_select_1(BIF_P, BIF_ARG_1);
+}
+
+BIF_RETTYPE ets_match_2(BIF_ALIST_2)
+{
+ Eterm ms;
+ Eterm buff[8];
+ Eterm *hp = buff;
+ /*hp = HAlloc(BIF_P, 8);*/
+ ms = CONS(hp, am_DollarDollar, NIL);
+ hp += 2;
+ ms = TUPLE3(hp, BIF_ARG_2, NIL, ms);
+ hp += 4;
+ ms = CONS(hp, ms, NIL);
+ return ets_select_2(BIF_P, BIF_ARG_1, ms);
+}
+
+BIF_RETTYPE ets_match_3(BIF_ALIST_3)
+{
+ Eterm ms;
+ Eterm buff[8];
+ Eterm *hp = buff;
+ /*hp = HAlloc(BIF_P, 8);*/
+ ms = CONS(hp, am_DollarDollar, NIL);
+ hp += 2;
+ ms = TUPLE3(hp, BIF_ARG_2, NIL, ms);
+ hp += 4;
+ ms = CONS(hp, ms, NIL);
+ return ets_select_3(BIF_P, BIF_ARG_1, ms, BIF_ARG_3);
+}
+
+
+BIF_RETTYPE ets_select_3(BIF_ALIST_3)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ Sint chunk_size;
+ enum DbIterSafety safety;
+
+ CHECK_TABLES();
+
+ /* Chunk size strictly greater than 0 */
+ if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+ cret = tb->common.meth->db_select_chunk(BIF_P, tb,
+ BIF_ARG_2, chunk_size,
+ 0 /* not reversed */,
+ &ret);
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(BIF_P);
+
+ return result;
+}
+
+
+/* We get here instead of in the real BIF when trapping */
+static BIF_RETTYPE ets_select_trap_1(Process *p, Eterm a1)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ Eterm *tptr;
+ db_lock_kind_t kind = LCK_READ;
+
+ CHECK_TABLES();
+
+ tptr = tuple_val(a1);
+ ASSERT(arityval(*tptr) >= 1)
+
+ if ((tb = db_get_table(p, tptr[1], DB_READ, kind)) == NULL) {
+ BIF_ERROR(p, BADARG);
+ }
+
+ cret = tb->common.meth->db_select_continue(p, tb, a1,
+ &ret);
+
+ if (!DID_TRAP(p,ret) && ITERATION_SAFETY(p,tb) != ITER_SAFE) {
+ unfix_table_locked(p, tb, &kind);
+ }
+ db_unlock(tb, kind);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, p, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, p, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(p);
+
+ return result;
+}
+
+
+BIF_RETTYPE ets_select_1(BIF_ALIST_1)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ Eterm *tptr;
+ enum DbIterSafety safety;
+
+ CHECK_TABLES();
+
+ /*
+ * Make sure that the table exists.
+ */
+
+ if (!is_tuple(BIF_ARG_1)) {
+ if (BIF_ARG_1 == am_EOT) {
+ BIF_RET(am_EOT);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ tptr = tuple_val(BIF_ARG_1);
+ if (arityval(*tptr) < 1 ||
+ (tb = db_get_table(BIF_P, tptr[1], DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+
+ cret = tb->common.meth->db_select_continue(BIF_P,tb,
+ BIF_ARG_1, &ret);
+
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(BIF_P);
+
+ return result;
+}
+
+BIF_RETTYPE ets_select_2(BIF_ALIST_2)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ enum DbIterSafety safety;
+ Eterm ret;
+
+ CHECK_TABLES();
+
+ /*
+ * Make sure that the table exists.
+ */
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+
+ cret = tb->common.meth->db_select(BIF_P, tb, BIF_ARG_2,
+ 0, &ret);
+
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(BIF_P);
+
+ return result;
+}
+
+/* We get here instead of in the real BIF when trapping */
+static BIF_RETTYPE ets_select_count_1(Process *p, Eterm a1)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ Eterm ret;
+ Eterm *tptr;
+ db_lock_kind_t kind = LCK_READ;
+
+ CHECK_TABLES();
+
+ tptr = tuple_val(a1);
+ ASSERT(arityval(*tptr) >= 1)
+ if ((tb = db_get_table(p, tptr[1], DB_READ, kind)) == NULL) {
+ BIF_ERROR(p, BADARG);
+ }
+
+ cret = tb->common.meth->db_select_count_continue(p, tb, a1, &ret);
+
+ if (!DID_TRAP(p,ret) && ITERATION_SAFETY(p,tb) != ITER_SAFE) {
+ unfix_table_locked(p, tb, &kind);
+ }
+ db_unlock(tb, kind);
+
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, p, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, p, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(p);
+
+ return result;
+}
+
+BIF_RETTYPE ets_select_count_2(BIF_ALIST_2)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ enum DbIterSafety safety;
+ Eterm ret;
+
+ CHECK_TABLES();
+ /*
+ * Make sure that the table exists.
+ */
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+ cret = tb->common.meth->db_select_count(BIF_P,tb,BIF_ARG_2, &ret);
+
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+
+ erts_match_set_release_result(BIF_P);
+
+ return result;
+}
+
+
+BIF_RETTYPE ets_select_reverse_3(BIF_ALIST_3)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ enum DbIterSafety safety;
+ Eterm ret;
+ Sint chunk_size;
+
+ CHECK_TABLES();
+ /*
+ * Make sure that the table exists.
+ */
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ /* Chunk size strictly greater than 0 */
+ if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) {
+ db_unlock(tb, LCK_READ);
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+ cret = tb->common.meth->db_select_chunk(BIF_P,tb,
+ BIF_ARG_2, chunk_size,
+ 1 /* reversed */, &ret);
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+ erts_match_set_release_result(BIF_P);
+ return result;
+}
+
+BIF_RETTYPE ets_select_reverse_1(BIF_ALIST_1)
+{
+ return ets_select_1(BIF_P, BIF_ARG_1);
+}
+
+BIF_RETTYPE ets_select_reverse_2(BIF_ALIST_2)
+{
+ BIF_RETTYPE result;
+ DbTable* tb;
+ int cret;
+ enum DbIterSafety safety;
+ Eterm ret;
+
+ CHECK_TABLES();
+ /*
+ * Make sure that the table exists.
+ */
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ safety = ITERATION_SAFETY(BIF_P,tb);
+ if (safety == ITER_UNSAFE) {
+ local_fix_table(tb);
+ }
+ cret = tb->common.meth->db_select(BIF_P,tb,BIF_ARG_2,
+ 1 /*reversed*/, &ret);
+
+ if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) {
+ fix_table_locked(BIF_P, tb);
+ }
+ if (safety == ITER_UNSAFE) {
+ local_unfix_table(tb);
+ }
+ db_unlock(tb, LCK_READ);
+ switch (cret) {
+ case DB_ERROR_NONE:
+ ERTS_BIF_PREP_RET(result, ret);
+ break;
+ case DB_ERROR_SYSRES:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT);
+ break;
+ default:
+ ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG);
+ break;
+ }
+ erts_match_set_release_result(BIF_P);
+ return result;
+}
+
+
+/*
+** ets:match_object(Continuation), ets:match_object(Table, Pattern), ets:match_object(Table,Pattern,ChunkSize)
+*/
+BIF_RETTYPE ets_match_object_1(BIF_ALIST_1)
+{
+ return ets_select_1(BIF_P, BIF_ARG_1);
+}
+
+BIF_RETTYPE ets_match_object_2(BIF_ALIST_2)
+{
+ Eterm ms;
+ Eterm buff[8];
+ Eterm *hp = buff;
+ /*hp = HAlloc(BIF_P, 8);*/
+ ms = CONS(hp, am_DollarUnderscore, NIL);
+ hp += 2;
+ ms = TUPLE3(hp, BIF_ARG_2, NIL, ms);
+ hp += 4;
+ ms = CONS(hp, ms, NIL);
+ return ets_select_2(BIF_P, BIF_ARG_1, ms);
+}
+
+BIF_RETTYPE ets_match_object_3(BIF_ALIST_3)
+{
+ Eterm ms;
+ Eterm buff[8];
+ Eterm *hp = buff;
+ /*hp = HAlloc(BIF_P, 8);*/
+ ms = CONS(hp, am_DollarUnderscore, NIL);
+ hp += 2;
+ ms = TUPLE3(hp, BIF_ARG_2, NIL, ms);
+ hp += 4;
+ ms = CONS(hp, ms, NIL);
+ return ets_select_3(BIF_P, BIF_ARG_1, ms, BIF_ARG_3);
+}
+
+/*
+ * BIF to extract information about a particular table.
+ */
+
+BIF_RETTYPE ets_info_1(BIF_ALIST_1)
+{
+ static Eterm fields[] = {am_protection, am_keypos, am_type, am_named_table,
+ am_node, am_size, am_name, am_heir, am_owner, am_memory};
+ Eterm results[sizeof(fields)/sizeof(Eterm)];
+ DbTable* tb;
+ Eterm res;
+ int i;
+ Eterm* hp;
+ /*Process* rp = NULL;*/
+ Eterm owner;
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL) {
+ if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) {
+ BIF_RET(am_undefined);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ owner = tb->common.owner;
+
+ /* If/when we implement lockless private tables:
+ if ((tb->common.status & DB_PRIVATE) && owner != BIF_P->id) {
+ db_unlock(tb, LCK_READ);
+ rp = erts_pid2proc_not_running(BIF_P, ERTS_PROC_LOCK_MAIN,
+ owner, ERTS_PROC_LOCK_MAIN);
+ if (rp == NULL) {
+ BIF_RET(am_undefined);
+ }
+ if (rp == ERTS_PROC_LOCK_BUSY) {
+ ERTS_BIF_YIELD1(bif_export[BIF_ets_info_1], BIF_P, BIF_ARG_1);
+ }
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL
+ || tb->common.owner != owner) {
+ if (BIF_P != rp)
+ erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_MAIN);
+ if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) {
+ BIF_RET(am_undefined);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ }*/
+ for (i = 0; i < sizeof(fields)/sizeof(Eterm); i++) {
+ results[i] = table_info(BIF_P, tb, fields[i]);
+ ASSERT(is_value(results[i]));
+ }
+ db_unlock(tb, LCK_READ);
+
+ /*if (rp != NULL && rp != BIF_P)
+ erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_MAIN);*/
+
+ hp = HAlloc(BIF_P, 5*sizeof(fields)/sizeof(Eterm));
+ res = NIL;
+ for (i = 0; i < sizeof(fields)/sizeof(Eterm); i++) {
+ Eterm tuple;
+ tuple = TUPLE2(hp, fields[i], results[i]);
+ hp += 3;
+ res = CONS(hp, tuple, res);
+ hp += 2;
+ }
+ BIF_RET(res);
+}
+
+/*
+ * BIF to extract information about a particular table.
+ */
+
+BIF_RETTYPE ets_info_2(BIF_ALIST_2)
+{
+ DbTable* tb;
+ Eterm ret = THE_NON_VALUE;
+
+ if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL) {
+ if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) {
+ BIF_RET(am_undefined);
+ }
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ ret = table_info(BIF_P, tb, BIF_ARG_2);
+ db_unlock(tb, LCK_READ);
+ if (is_non_value(ret)) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+ BIF_RET(ret);
+}
+
+
+BIF_RETTYPE ets_is_compiled_ms_1(BIF_ALIST_1)
+{
+ if (erts_db_is_compiled_ms(BIF_ARG_1)) {
+ BIF_RET(am_true);
+ } else {
+ BIF_RET(am_false);
+ }
+}
+
+BIF_RETTYPE ets_match_spec_compile_1(BIF_ALIST_1)
+{
+ Binary *mp = db_match_set_compile(BIF_P, BIF_ARG_1, DCOMP_TABLE);
+ Eterm *hp;
+ if (mp == NULL) {
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ hp = HAlloc(BIF_P, PROC_BIN_SIZE);
+
+ BIF_RET(erts_mk_magic_binary_term(&hp, &MSO(BIF_P), mp));
+}
+
+BIF_RETTYPE ets_match_spec_run_r_3(BIF_ALIST_3)
+{
+ Eterm ret = BIF_ARG_3;
+ int i = 0;
+ Eterm *hp;
+ Eterm lst;
+ ProcBin *bp;
+ Binary *mp;
+ Eterm res;
+ Uint32 dummy;
+ Uint sz;
+
+ if (!(is_list(BIF_ARG_1) || BIF_ARG_1 == NIL) || !is_binary(BIF_ARG_2)) {
+ error:
+ BIF_ERROR(BIF_P, BADARG);
+ }
+
+ bp = (ProcBin*) binary_val(BIF_ARG_2);
+ if (thing_subtag(bp->thing_word) != REFC_BINARY_SUBTAG) {
+ goto error;
+ }
+ mp = bp->val;
+ if (!IsMatchProgBinary(mp)) {
+ goto error;
+ }
+
+ if (BIF_ARG_1 == NIL) {
+ BIF_RET(BIF_ARG_3);
+ }
+ for (lst = BIF_ARG_1; is_list(lst); lst = CDR(list_val(lst))) {
+ if (++i > CONTEXT_REDS) {
+ BUMP_ALL_REDS(BIF_P);
+ BIF_TRAP3(bif_export[BIF_ets_match_spec_run_r_3],
+ BIF_P,lst,BIF_ARG_2,ret);
+ }
+ res = db_prog_match(BIF_P, mp, CAR(list_val(lst)), 0, &dummy);
+ if (is_value(res)) {
+ sz = size_object(res);
+ hp = HAlloc(BIF_P, sz + 2);
+ res = copy_struct(res, sz, &hp, &MSO(BIF_P));
+ ret = CONS(hp,res,ret);
+ /*hp += 2;*/
+ }
+ }
+ if (lst != NIL) {
+ goto error;
+ }
+ BIF_RET2(ret,i);
+}
+
+
+/*
+** External interface (NOT BIF's)
+*/
+
+
+/* Init the db */
+
+void init_db(void)
+{
+ DbTable init_tb;
+ int i;
+ extern Eterm* em_apply_bif;
+ Eterm *hp;
+ unsigned bits;
+ size_t size;
+
+#ifdef ERTS_SMP
+ for (i=0; i<META_MAIN_TAB_LOCK_CNT; i++) {
+#ifdef ERTS_ENABLE_LOCK_COUNT
+ erts_smp_spinlock_init_x(&meta_main_tab_locks[i].lck, "meta_main_tab_slot", make_small(i));
+#else
+ erts_smp_spinlock_init(&meta_main_tab_locks[i].lck, "meta_main_tab_slot");
+#endif
+ }
+ erts_smp_spinlock_init(&meta_main_tab_main_lock, "meta_main_tab_main");
+ for (i=0; i<META_NAME_TAB_LOCK_CNT; i++) {
+#ifdef ERTS_ENABLE_LOCK_COUNT
+ erts_smp_rwmtx_init_x(&meta_name_tab_rwlocks[i].lck, "meta_name_tab", make_small(i));
+#else
+ erts_smp_rwmtx_init(&meta_name_tab_rwlocks[i].lck, "meta_name_tab");
+#endif
+ }
+#endif
+
+ erts_smp_atomic_init(&erts_ets_misc_mem_size, 0);
+ db_initialize_util();
+
+ if (user_requested_db_max_tabs < DB_DEF_MAX_TABS)
+ db_max_tabs = DB_DEF_MAX_TABS;
+ else
+ db_max_tabs = user_requested_db_max_tabs;
+
+ bits = erts_fit_in_bits(db_max_tabs-1);
+ if (bits > SMALL_BITS) {
+ erl_exit(1,"Max limit for ets tabled too high %u (max %u).",
+ db_max_tabs, 1L<<SMALL_BITS);
+ }
+ meta_main_tab_slot_mask = (1L<<bits) - 1;
+ meta_main_tab_seq_incr = (1L<<bits);
+
+ size = sizeof(*meta_main_tab)*db_max_tabs;
+ meta_main_tab = erts_db_alloc_nt(ERTS_ALC_T_DB_TABLES, size);
+ ERTS_ETS_MISC_MEM_ADD(size);
+
+ meta_main_tab_cnt = 0;
+ for (i=1; i<db_max_tabs; i++) {
+ SET_NEXT_FREE_SLOT(i-1,i);
+ }
+ SET_NEXT_FREE_SLOT(db_max_tabs-1, (Uint)-1);
+ meta_main_tab_first_free = 0;
+
+ meta_name_tab_mask = (1L<<(bits-1)) - 1; /* At least half the size of main tab */
+ size = sizeof(struct meta_name_tab_entry)*(meta_name_tab_mask+1);
+ meta_name_tab = erts_db_alloc_nt(ERTS_ALC_T_DB_TABLES, size);
+ ERTS_ETS_MISC_MEM_ADD(size);
+
+ for (i=0; i<=meta_name_tab_mask; i++) {
+ meta_name_tab[i].pu.tb = NULL;
+ meta_name_tab[i].u.name_atom = NIL;
+ }
+
+ db_initialize_hash();
+ db_initialize_tree();
+
+ /*TT*/
+ /* Create meta table invertion. */
+ erts_smp_atomic_init(&init_tb.common.memory_size, 0);
+ meta_pid_to_tab = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE,
+ &init_tb,
+ sizeof(DbTable));
+ ERTS_ETS_MISC_MEM_ADD(sizeof(DbTable));
+ erts_smp_atomic_init(&meta_pid_to_tab->common.memory_size,
+ erts_smp_atomic_read(&init_tb.common.memory_size));
+
+ meta_pid_to_tab->common.id = NIL;
+ meta_pid_to_tab->common.the_name = am_true;
+ meta_pid_to_tab->common.status = (DB_NORMAL | DB_BAG | DB_PUBLIC | DB_FINE_LOCKED);
+#ifdef ERTS_SMP
+ meta_pid_to_tab->common.type
+ = meta_pid_to_tab->common.status & ERTS_ETS_TABLE_TYPES;
+ /* Note, 'type' is *read only* from now on... */
+ meta_pid_to_tab->common.is_thread_safe = 0;
+#endif
+ meta_pid_to_tab->common.keypos = 1;
+ meta_pid_to_tab->common.owner = NIL;
+ erts_smp_atomic_init(&meta_pid_to_tab->common.nitems, 0);
+ meta_pid_to_tab->common.slot = -1;
+ meta_pid_to_tab->common.meth = &db_hash;
+
+ erts_refc_init(&meta_pid_to_tab->common.ref, 1);
+ erts_refc_init(&meta_pid_to_tab->common.fixref, 0);
+ /* Neither rwlock or fixlock used
+ db_init_lock(meta_pid_to_tab, "meta_pid_to_tab", "meta_pid_to_tab_FIX");*/
+
+ if (db_create_hash(NULL, meta_pid_to_tab) != DB_ERROR_NONE) {
+ erl_exit(1,"Unable to create ets metadata tables.");
+ }
+
+ erts_smp_atomic_set(&init_tb.common.memory_size, 0);
+ meta_pid_to_fixed_tab = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE,
+ &init_tb,
+ sizeof(DbTable));
+ ERTS_ETS_MISC_MEM_ADD(sizeof(DbTable));
+ erts_smp_atomic_init(&meta_pid_to_fixed_tab->common.memory_size,
+ erts_smp_atomic_read(&init_tb.common.memory_size));
+
+ meta_pid_to_fixed_tab->common.id = NIL;
+ meta_pid_to_fixed_tab->common.the_name = am_true;
+ meta_pid_to_fixed_tab->common.status = (DB_NORMAL | DB_BAG | DB_PUBLIC | DB_FINE_LOCKED);
+#ifdef ERTS_SMP
+ meta_pid_to_fixed_tab->common.type
+ = meta_pid_to_fixed_tab->common.status & ERTS_ETS_TABLE_TYPES;
+ /* Note, 'type' is *read only* from now on... */
+ meta_pid_to_fixed_tab->common.is_thread_safe = 0;
+#endif
+ meta_pid_to_fixed_tab->common.keypos = 1;
+ meta_pid_to_fixed_tab->common.owner = NIL;
+ erts_smp_atomic_init(&meta_pid_to_fixed_tab->common.nitems, 0);
+ meta_pid_to_fixed_tab->common.slot = -1;
+ meta_pid_to_fixed_tab->common.meth = &db_hash;
+
+ erts_refc_init(&meta_pid_to_fixed_tab->common.ref, 1);
+ erts_refc_init(&meta_pid_to_fixed_tab->common.fixref, 0);
+ /* Neither rwlock or fixlock used
+ db_init_lock(meta_pid_to_fixed_tab, "meta_pid_to_fixed_tab", "meta_pid_to_fixed_tab_FIX");*/
+
+ if (db_create_hash(NULL, meta_pid_to_fixed_tab) != DB_ERROR_NONE) {
+ erl_exit(1,"Unable to create ets metadata tables.");
+ }
+
+ /* Non visual BIF to trap to. */
+ memset(&ets_select_delete_continue_exp, 0, sizeof(Export));
+ ets_select_delete_continue_exp.address =
+ &ets_select_delete_continue_exp.code[3];
+ ets_select_delete_continue_exp.code[0] = am_ets;
+ ets_select_delete_continue_exp.code[1] = am_atom_put("delete_trap",11);
+ ets_select_delete_continue_exp.code[2] = 1;
+ ets_select_delete_continue_exp.code[3] =
+ (Eterm) em_apply_bif;
+ ets_select_delete_continue_exp.code[4] =
+ (Eterm) &ets_select_delete_1;
+
+ /* Non visual BIF to trap to. */
+ memset(&ets_select_count_continue_exp, 0, sizeof(Export));
+ ets_select_count_continue_exp.address =
+ &ets_select_count_continue_exp.code[3];
+ ets_select_count_continue_exp.code[0] = am_ets;
+ ets_select_count_continue_exp.code[1] = am_atom_put("count_trap",11);
+ ets_select_count_continue_exp.code[2] = 1;
+ ets_select_count_continue_exp.code[3] =
+ (Eterm) em_apply_bif;
+ ets_select_count_continue_exp.code[4] =
+ (Eterm) &ets_select_count_1;
+
+ /* Non visual BIF to trap to. */
+ memset(&ets_select_continue_exp, 0, sizeof(Export));
+ ets_select_continue_exp.address =
+ &ets_select_continue_exp.code[3];
+ ets_select_continue_exp.code[0] = am_ets;
+ ets_select_continue_exp.code[1] = am_atom_put("select_trap",11);
+ ets_select_continue_exp.code[2] = 1;
+ ets_select_continue_exp.code[3] =
+ (Eterm) em_apply_bif;
+ ets_select_continue_exp.code[4] =
+ (Eterm) &ets_select_trap_1;
+
+ /* Non visual BIF to trap to. */
+ memset(&ets_delete_continue_exp, 0, sizeof(Export));
+ ets_delete_continue_exp.address = &ets_delete_continue_exp.code[3];
+ ets_delete_continue_exp.code[0] = am_ets;
+ ets_delete_continue_exp.code[1] = am_atom_put("delete_trap",11);
+ ets_delete_continue_exp.code[2] = 1;
+ ets_delete_continue_exp.code[3] = (Eterm) em_apply_bif;
+ ets_delete_continue_exp.code[4] = (Eterm) &ets_delete_trap;
+
+ hp = ms_delete_all_buff;
+ ms_delete_all = CONS(hp, am_true, NIL);
+ hp += 2;
+ ms_delete_all = TUPLE3(hp,am_Underscore,NIL,ms_delete_all);
+ hp +=4;
+ ms_delete_all = CONS(hp, ms_delete_all,NIL);
+}
+
+#define ARRAY_CHUNK 100
+
+typedef enum {
+ ErtsDbProcCleanupProgressTables,
+ ErtsDbProcCleanupProgressFixations,
+ ErtsDbProcCleanupProgressDone,
+} ErtsDbProcCleanupProgress;
+
+typedef enum {
+ ErtsDbProcCleanupOpGetTables,
+ ErtsDbProcCleanupOpDeleteTables,
+ ErtsDbProcCleanupOpGetFixations,
+ ErtsDbProcCleanupOpDeleteFixations,
+ ErtsDbProcCleanupOpDone
+} ErtsDbProcCleanupOperation;
+
+typedef struct {
+ ErtsDbProcCleanupProgress progress;
+ ErtsDbProcCleanupOperation op;
+ struct {
+ Eterm arr[ARRAY_CHUNK];
+ int size;
+ int ix;
+ int clean_ix;
+ } slots;
+} ErtsDbProcCleanupState;
+
+
+static void
+proc_exit_cleanup_tables_meta_data(Eterm pid, ErtsDbProcCleanupState *state)
+{
+ ASSERT(state->slots.clean_ix <= state->slots.ix);
+ if (state->slots.clean_ix < state->slots.ix) {
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ if (state->slots.size < ARRAY_CHUNK
+ && state->slots.ix == state->slots.size) {
+ Eterm dummy;
+ db_erase_hash(meta_pid_to_tab,pid,&dummy);
+ }
+ else {
+ int ix;
+ /* Need to erase each explicitly */
+ for (ix = state->slots.clean_ix; ix < state->slots.ix; ix++)
+ db_erase_bag_exact2(meta_pid_to_tab,
+ pid,
+ state->slots.arr[ix]);
+ }
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+ state->slots.clean_ix = state->slots.ix;
+ }
+}
+
+static void
+proc_exit_cleanup_fixations_meta_data(Eterm pid, ErtsDbProcCleanupState *state)
+{
+ ASSERT(state->slots.clean_ix <= state->slots.ix);
+ if (state->slots.clean_ix < state->slots.ix) {
+ db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ if (state->slots.size < ARRAY_CHUNK
+ && state->slots.ix == state->slots.size) {
+ Eterm dummy;
+ db_erase_hash(meta_pid_to_fixed_tab,pid,&dummy);
+ }
+ else {
+ int ix;
+ /* Need to erase each explicitly */
+ for (ix = state->slots.clean_ix; ix < state->slots.ix; ix++)
+ db_erase_bag_exact2(meta_pid_to_fixed_tab,
+ pid,
+ state->slots.arr[ix]);
+ }
+ db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ state->slots.clean_ix = state->slots.ix;
+ }
+}
+
+/* In: Table LCK_WRITE
+** Return TRUE : ok, table not mine and NOT locked anymore.
+** Return FALSE: failed, table still mine (LCK_WRITE)
+*/
+static int give_away_to_heir(Process* p, DbTable* tb)
+{
+ Process* to_proc;
+ ErtsProcLocks to_locks = ERTS_PROC_LOCK_MAIN;
+ Eterm buf[5];
+ Eterm to_pid;
+ Eterm heir_data;
+
+ ASSERT(tb->common.owner == p->id);
+ ASSERT(is_internal_pid(tb->common.heir));
+ ASSERT(tb->common.heir != p->id);
+retry:
+ to_pid = tb->common.heir;
+ to_proc = erts_pid2proc_opt(p, ERTS_PROC_LOCK_MAIN,
+ to_pid, to_locks,
+ ERTS_P2P_FLG_TRY_LOCK);
+ if (to_proc == ERTS_PROC_LOCK_BUSY) {
+ db_ref(tb); /* while unlocked */
+ db_unlock(tb,LCK_WRITE);
+ to_proc = erts_pid2proc(p, ERTS_PROC_LOCK_MAIN,
+ to_pid, to_locks);
+ db_lock(tb,LCK_WRITE);
+ tb = db_unref(tb);
+ ASSERT(tb != NULL);
+
+ if (tb->common.owner != p->id) {
+ if (to_proc != NULL ) {
+ erts_smp_proc_unlock(to_proc, to_locks);
+ }
+ db_unlock(tb,LCK_WRITE);
+ return !0; /* ok, someone already gave my table away */
+ }
+ if (tb->common.heir != to_pid) { /* someone changed the heir */
+ if (to_proc != NULL ) {
+ erts_smp_proc_unlock(to_proc, to_locks);
+ }
+ if (to_pid == p->id || to_pid == am_none) {
+ return 0; /* no real heir, table still mine */
+ }
+ goto retry;
+ }
+ }
+ if (to_proc == NULL) {
+ return 0; /* heir not alive, table still mine */
+ }
+ if (erts_cmp_timeval(&to_proc->started, &tb->common.heir_started) != 0) {
+ erts_smp_proc_unlock(to_proc, to_locks);
+ return 0; /* heir dead and pid reused, table still mine */
+ }
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner,
+ make_small(tb->common.slot));
+
+ to_proc->flags |= F_USING_DB;
+ tb->common.owner = to_pid;
+
+ db_put_hash(meta_pid_to_tab,
+ TUPLE2(buf,to_pid,make_small(tb->common.slot)),
+ 0);
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+
+ db_unlock(tb,LCK_WRITE);
+ heir_data = tb->common.heir_data;
+ if (!is_immed(heir_data)) {
+ Eterm* tpv = DBTERM_BUF((DbTerm*)heir_data); /* tuple_val */
+ ASSERT(arityval(*tpv) == 1);
+ heir_data = tpv[1];
+ }
+ erts_send_message(p, to_proc, &to_locks,
+ TUPLE4(buf, am_ETS_TRANSFER, tb->common.id, p->id, heir_data),
+ 0);
+ erts_smp_proc_unlock(to_proc, to_locks);
+ return !0;
+}
+
+/*
+ * erts_db_process_exiting() is called when a process terminates.
+ * It returns 0 when completely done, and !0 when it wants to
+ * yield. c_p->u.exit_data can hold a pointer to a state while
+ * yielding.
+ */
+#define ERTS_DB_INTERNAL_ERROR(LSTR) \
+ erl_exit(ERTS_ABORT_EXIT, "%s:%d:erts_db_process_exiting(): " LSTR "\n", \
+ __FILE__, __LINE__)
+
+int
+erts_db_process_exiting(Process *c_p, ErtsProcLocks c_p_locks)
+{
+ ErtsDbProcCleanupState *state = (ErtsDbProcCleanupState *) c_p->u.exit_data;
+ Eterm pid = c_p->id;
+ ErtsDbProcCleanupState default_state;
+ int ret;
+
+ if (!state) {
+ state = &default_state;
+ state->progress = ErtsDbProcCleanupProgressTables;
+ state->op = ErtsDbProcCleanupOpGetTables;
+ }
+
+ while (!0) {
+ switch (state->op) {
+ case ErtsDbProcCleanupOpGetTables:
+ state->slots.size = ARRAY_CHUNK;
+ db_meta_lock(meta_pid_to_tab, LCK_READ);
+ ret = db_get_element_array(meta_pid_to_tab,
+ pid,
+ 2,
+ state->slots.arr,
+ &state->slots.size);
+ db_meta_unlock(meta_pid_to_tab, LCK_READ);
+ if (ret == DB_ERROR_BADKEY) {
+ /* Done with tables; now fixations */
+ state->progress = ErtsDbProcCleanupProgressFixations;
+ state->op = ErtsDbProcCleanupOpGetFixations;
+ break;
+ } else if (ret != DB_ERROR_NONE) {
+ ERTS_DB_INTERNAL_ERROR("Inconsistent ets table metadata");
+ }
+
+ state->slots.ix = 0;
+ state->slots.clean_ix = 0;
+ state->op = ErtsDbProcCleanupOpDeleteTables;
+ /* Fall through */
+
+ case ErtsDbProcCleanupOpDeleteTables:
+
+ while (state->slots.ix < state->slots.size) {
+ DbTable *tb = NULL;
+ Sint ix = unsigned_val(state->slots.arr[state->slots.ix]);
+ meta_main_tab_lock(ix);
+ if (!IS_SLOT_FREE(ix)) {
+ tb = db_ref(GET_ANY_SLOT_TAB(ix));
+ ASSERT(tb);
+ }
+ meta_main_tab_unlock(ix);
+ if (tb) {
+ int do_yield;
+ db_lock_take_over_ref(tb, LCK_WRITE);
+ /* Ownership may have changed since
+ we looked up the table. */
+ if (tb->common.owner != pid) {
+ do_yield = 0;
+ db_unlock(tb, LCK_WRITE);
+ }
+ else if (tb->common.heir != am_none
+ && tb->common.heir != pid
+ && give_away_to_heir(c_p, tb)) {
+ do_yield = 0;
+ }
+ else {
+ int first_call;
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,
+ "erts_db_process_exiting(); Table: %T, "
+ "Process: %T\n",
+ tb->common.id, pid);
+#endif
+ first_call = (tb->common.status & DB_DELETE) == 0;
+ if (first_call) {
+ /* Clear all access bits. */
+ tb->common.status &= ~(DB_PROTECTED
+ | DB_PUBLIC
+ | DB_PRIVATE);
+ tb->common.status |= DB_DELETE;
+
+ if (is_atom(tb->common.id))
+ remove_named_tab(tb->common.id);
+
+ free_heir_data(tb);
+ free_fixations_locked(tb);
+ }
+
+ do_yield = free_table_cont(c_p, tb, first_call, 0);
+ db_unlock(tb, LCK_WRITE);
+ }
+ if (do_yield)
+ goto yield;
+ }
+ state->slots.ix++;
+ if (ERTS_BIF_REDS_LEFT(c_p) <= 0)
+ goto yield;
+ }
+
+ proc_exit_cleanup_tables_meta_data(pid, state);
+ state->op = ErtsDbProcCleanupOpGetTables;
+ break;
+
+ case ErtsDbProcCleanupOpGetFixations:
+ state->slots.size = ARRAY_CHUNK;
+ db_meta_lock(meta_pid_to_fixed_tab, LCK_READ);
+ ret = db_get_element_array(meta_pid_to_fixed_tab,
+ pid,
+ 2,
+ state->slots.arr,
+ &state->slots.size);
+ db_meta_unlock(meta_pid_to_fixed_tab, LCK_READ);
+
+ if (ret == DB_ERROR_BADKEY) {
+ /* Done */
+ state->progress = ErtsDbProcCleanupProgressDone;
+ state->op = ErtsDbProcCleanupOpDone;
+ break;
+ } else if (ret != DB_ERROR_NONE) {
+ ERTS_DB_INTERNAL_ERROR("Inconsistent ets fix table metadata");
+ }
+
+ state->slots.ix = 0;
+ state->slots.clean_ix = 0;
+ state->op = ErtsDbProcCleanupOpDeleteFixations;
+ /* Fall through */
+
+ case ErtsDbProcCleanupOpDeleteFixations:
+
+ while (state->slots.ix < state->slots.size) {
+ DbTable *tb = NULL;
+ Sint ix = unsigned_val(state->slots.arr[state->slots.ix]);
+ meta_main_tab_lock(ix);
+ if (IS_SLOT_ALIVE(ix)) {
+ tb = db_ref(meta_main_tab[ix].u.tb);
+ ASSERT(tb);
+ }
+ meta_main_tab_unlock(ix);
+ if (tb) {
+ int reds;
+ DbFixation** pp;
+
+ db_lock_take_over_ref(tb, LCK_WRITE_REC);
+ #ifdef ERTS_SMP
+ erts_smp_mtx_lock(&tb->common.fixlock);
+ #endif
+ reds = 10;
+
+ for (pp = &tb->common.fixations; *pp != NULL;
+ pp = &(*pp)->next) {
+ if ((*pp)->pid == pid) {
+ DbFixation* fix = *pp;
+ erts_refc_add(&tb->common.fixref,-fix->counter,0);
+ *pp = fix->next;
+ erts_db_free(ERTS_ALC_T_DB_FIXATION,
+ tb, fix, sizeof(DbFixation));
+ ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation));
+ break;
+ }
+ }
+ #ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+ #endif
+ if (!IS_FIXED(tb) && IS_HASH_TABLE(tb->common.status)) {
+ db_unfix_table_hash(&(tb->hash));
+ reds += 40;
+ }
+ db_unlock(tb, LCK_WRITE_REC);
+ BUMP_REDS(c_p, reds);
+ }
+ state->slots.ix++;
+ if (ERTS_BIF_REDS_LEFT(c_p) <= 0)
+ goto yield;
+ }
+
+ proc_exit_cleanup_fixations_meta_data(pid, state);
+ state->op = ErtsDbProcCleanupOpGetFixations;
+ break;
+
+ case ErtsDbProcCleanupOpDone:
+
+ if (state != &default_state)
+ erts_free(ERTS_ALC_T_DB_PROC_CLEANUP, state);
+ c_p->u.exit_data = NULL;
+ return 0;
+
+ default:
+ ERTS_DB_INTERNAL_ERROR("Bad internal state");
+ }
+ }
+
+ yield:
+
+ switch (state->progress) {
+ case ErtsDbProcCleanupProgressTables:
+ proc_exit_cleanup_tables_meta_data(pid, state);
+ break;
+ case ErtsDbProcCleanupProgressFixations:
+ proc_exit_cleanup_fixations_meta_data(pid, state);
+ break;
+ default:
+ break;
+ }
+
+ ASSERT(c_p->u.exit_data == (void *) state
+ || state == &default_state);
+
+ if (state == &default_state) {
+ c_p->u.exit_data = erts_alloc(ERTS_ALC_T_DB_PROC_CLEANUP,
+ sizeof(ErtsDbProcCleanupState));
+ sys_memcpy(c_p->u.exit_data,
+ (void*) state,
+ sizeof(ErtsDbProcCleanupState));
+ }
+
+ return !0;
+}
+
+/* SMP note: table only need to be LCK_READ locked */
+static void fix_table_locked(Process* p, DbTable* tb)
+{
+ DbFixation *fix;
+ Eterm meta_tuple[3];
+
+#ifdef ERTS_SMP
+ erts_smp_mtx_lock(&tb->common.fixlock);
+#endif
+ erts_refc_inc(&tb->common.fixref,1);
+ fix = tb->common.fixations;
+ if (fix == NULL) {
+ get_now(&(tb->common.megasec),
+ &(tb->common.sec),
+ &(tb->common.microsec));
+ }
+ else {
+ for (; fix != NULL; fix = fix->next) {
+ if (fix->pid == p->id) {
+ ++(fix->counter);
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+#endif
+ return;
+ }
+ }
+ }
+ fix = (DbFixation *) erts_db_alloc(ERTS_ALC_T_DB_FIXATION,
+ tb, sizeof(DbFixation));
+ ERTS_ETS_MISC_MEM_ADD(sizeof(DbFixation));
+ fix->pid = p->id;
+ fix->counter = 1;
+ fix->next = tb->common.fixations;
+ tb->common.fixations = fix;
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+#endif
+ p->flags |= F_USING_DB;
+ db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ if (db_put_hash(meta_pid_to_fixed_tab,
+ TUPLE2(meta_tuple, p->id, make_small(tb->common.slot)),
+ 0) != DB_ERROR_NONE) {
+ erl_exit(1,"Could not insert ets metadata in safe_fixtable.");
+ }
+ db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+}
+
+/* SMP note: May re-lock table
+*/
+static void unfix_table_locked(Process* p, DbTable* tb,
+ db_lock_kind_t* kind_p)
+{
+ DbFixation** pp;
+
+#ifdef ERTS_SMP
+ erts_smp_mtx_lock(&tb->common.fixlock);
+#endif
+ for (pp = &tb->common.fixations; *pp != NULL; pp = &(*pp)->next) {
+ if ((*pp)->pid == p->id) {
+ DbFixation* fix = *pp;
+ erts_refc_dec(&tb->common.fixref,0);
+ --(fix->counter);
+ ASSERT(fix->counter >= 0);
+ if (fix->counter > 0) {
+ break;
+ }
+ *pp = fix->next;
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+#endif
+ db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_fixed_tab,
+ p->id, make_small(tb->common.slot));
+ db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ erts_db_free(ERTS_ALC_T_DB_FIXATION,
+ tb, (void *) fix, sizeof(DbFixation));
+ ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation));
+ goto unlocked;
+ }
+ }
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+#endif
+unlocked:
+
+ if (!IS_FIXED(tb) && IS_HASH_TABLE(tb->common.status)
+ && erts_smp_atomic_read(&tb->hash.fixdel) != (long)NULL) {
+#ifdef ERTS_SMP
+ if (*kind_p == LCK_READ && tb->common.is_thread_safe) {
+ /* Must have write lock while purging pseudo-deleted (OTP-8166) */
+ erts_smp_rwmtx_runlock(&tb->common.rwlock);
+ erts_smp_rwmtx_rwlock(&tb->common.rwlock);
+ *kind_p = LCK_WRITE;
+ if (tb->common.status & DB_DELETE) return;
+ }
+#endif
+ db_unfix_table_hash(&(tb->hash));
+ }
+}
+
+/* Assume that tb is WRITE locked */
+static void free_fixations_locked(DbTable *tb)
+{
+ DbFixation *fix;
+ DbFixation *next_fix;
+
+ fix = tb->common.fixations;
+ while (fix != NULL) {
+ next_fix = fix->next;
+ db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_fixed_tab,
+ fix->pid,
+ make_small(tb->common.slot));
+ db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC);
+ erts_db_free(ERTS_ALC_T_DB_FIXATION,
+ tb, (void *) fix, sizeof(DbFixation));
+ ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation));
+
+ fix = next_fix;
+ }
+ tb->common.fixations = NULL;
+}
+
+static void set_heir(Process* me, DbTable* tb, Eterm heir, Eterm heir_data)
+{
+ tb->common.heir = heir;
+ if (heir == am_none) {
+ return;
+ }
+ if (heir == me->id) {
+ tb->common.heir_started = me->started;
+ }
+ else {
+ Process* heir_proc= erts_pid2proc_opt(me, ERTS_PROC_LOCK_MAIN, heir,
+ 0, ERTS_P2P_FLG_SMP_INC_REFC);
+ if (heir_proc != NULL) {
+ tb->common.heir_started = heir_proc->started;
+ erts_smp_proc_dec_refc(heir_proc);
+ } else {
+ tb->common.heir = am_none;
+ }
+ }
+
+ if (!is_immed(heir_data)) {
+ Eterm tmp[2];
+ /* Make a dummy 1-tuple around data to use db_get_term() */
+ heir_data = (Eterm) db_get_term(&tb->common, NULL, 0,
+ TUPLE1(tmp,heir_data));
+ ASSERT(!is_immed(heir_data));
+ }
+ tb->common.heir_data = heir_data;
+}
+
+static void free_heir_data(DbTable* tb)
+{
+ if (tb->common.heir != am_none && !is_immed(tb->common.heir_data)) {
+ DbTerm* p = (DbTerm*) tb->common.heir_data;
+ db_free_term_data(p);
+ erts_db_free(ERTS_ALC_T_DB_TERM, tb, (void *)p,
+ sizeof(DbTerm) + (p->size-1)*sizeof(Eterm));
+ }
+ #ifdef DEBUG
+ tb->common.heir_data = am_undefined;
+ #endif
+}
+
+static BIF_RETTYPE ets_delete_trap(Process *p, Eterm cont)
+{
+ int trap;
+ Eterm* ptr = big_val(cont);
+ DbTable *tb = (DbTable *) ptr[1];
+
+ ASSERT(*ptr == make_pos_bignum_header(1));
+
+ db_lock(tb, LCK_WRITE);
+ trap = free_table_cont(p, tb, 0, 1);
+ db_unlock(tb, LCK_WRITE);
+
+ if (trap) {
+ BIF_TRAP1(&ets_delete_continue_exp, p, cont);
+ }
+ else {
+ BIF_RET(am_true);
+ }
+}
+
+
+/*
+ * free_table_cont() returns 0 when done and !0 when more work is needed.
+ */
+static int free_table_cont(Process *p,
+ DbTable *tb,
+ int first,
+ int clean_meta_tab)
+{
+ Eterm result;
+
+#ifdef HARDDEBUG
+ if (!first) {
+ erts_fprintf(stderr,"ets: free_table_cont %T (continue)\r\n",
+ tb->common.id);
+ }
+#endif
+
+ result = tb->common.meth->db_free_table_continue(tb);
+
+ if (result == 0) {
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,"ets: free_table_cont %T (continue begin)\r\n",
+ tb->common.id);
+#endif
+ /* More work to be done. Let other processes work and call us again. */
+ BUMP_ALL_REDS(p);
+ return !0;
+ }
+ else {
+#ifdef HARDDEBUG
+ erts_fprintf(stderr,"ets: free_table_cont %T (continue end)\r\n",
+ tb->common.id);
+#endif
+ /* Completely done - we will not get called again. */
+ meta_main_tab_lock(tb->common.slot);
+ free_slot(tb->common.slot);
+ meta_main_tab_unlock(tb->common.slot);
+
+ if (clean_meta_tab) {
+ db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC);
+ db_erase_bag_exact2(meta_pid_to_tab,tb->common.owner,
+ make_small(tb->common.slot));
+ db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC);
+ }
+ db_unref(tb);
+ BUMP_REDS(p, 100);
+ return 0;
+ }
+}
+
+static Eterm table_info(Process* p, DbTable* tb, Eterm What)
+{
+ Eterm ret = THE_NON_VALUE;
+
+ if (What == am_size) {
+ ret = make_small(erts_smp_atomic_read(&tb->common.nitems));
+ } else if (What == am_type) {
+ if (tb->common.status & DB_SET) {
+ ret = am_set;
+ } else if (tb->common.status & DB_DUPLICATE_BAG) {
+ ret = am_duplicate_bag;
+ } else if (tb->common.status & DB_ORDERED_SET) {
+ ret = am_ordered_set;
+ } else { /*TT*/
+ ASSERT(tb->common.status & DB_BAG);
+ ret = am_bag;
+ }
+ } else if (What == am_memory) {
+ Uint words = (Uint) ((erts_smp_atomic_read(&tb->common.memory_size)
+ + sizeof(Uint)
+ - 1)
+ / sizeof(Uint));
+ ret = erts_make_integer(words, p);
+ } else if (What == am_owner) {
+ ret = tb->common.owner;
+ } else if (What == am_heir) {
+ ret = tb->common.heir;
+ } else if (What == am_protection) {
+ if (tb->common.status & DB_PRIVATE)
+ ret = am_private;
+ else if (tb->common.status & DB_PROTECTED)
+ ret = am_protected;
+ else if (tb->common.status & DB_PUBLIC)
+ ret = am_public;
+ } else if (What == am_name) {
+ ret = tb->common.the_name;
+ } else if (What == am_keypos) {
+ ret = make_small(tb->common.keypos);
+ } else if (What == am_node) {
+ ret = erts_this_dist_entry->sysname;
+ } else if (What == am_named_table) {
+ ret = is_atom(tb->common.id) ? am_true : am_false;
+ /*
+ * For debugging purposes
+ */
+ } else if (What == am_data) {
+ print_table(ERTS_PRINT_STDOUT, NULL, 1, tb);
+ ret = am_true;
+ } else if (What == am_atom_put("fixed",5)) {
+ if (IS_FIXED(tb))
+ ret = am_true;
+ else
+ ret = am_false;
+ } else if (What == am_atom_put("kept_objects",12)) {
+ ret = make_small(IS_HASH_TABLE(tb->common.status)
+ ? db_kept_items_hash(&tb->hash) : 0);
+ } else if (What == am_atom_put("safe_fixed",10)) {
+#ifdef ERTS_SMP
+ erts_smp_mtx_lock(&tb->common.fixlock);
+#endif
+ if (IS_FIXED(tb)) {
+ Uint need;
+ Eterm *hp;
+ Eterm tpl, lst;
+ DbFixation *fix;
+ need = 7;
+ for (fix = tb->common.fixations; fix != NULL; fix = fix->next) {
+ need += 5;
+ }
+ hp = HAlloc(p, need);
+ lst = NIL;
+ for (fix = tb->common.fixations; fix != NULL; fix = fix->next) {
+ tpl = TUPLE2(hp,fix->pid,make_small(fix->counter));
+ hp += 3;
+ lst = CONS(hp,tpl,lst);
+ hp += 2;
+ }
+ tpl = TUPLE3(hp,
+ make_small(tb->common.megasec),
+ make_small(tb->common.sec),
+ make_small(tb->common.microsec));
+ hp += 4;
+ ret = TUPLE2(hp, tpl, lst);
+ } else {
+ ret = am_false;
+ }
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(&tb->common.fixlock);
+#endif
+ } else if (What == am_atom_put("stats",5)) {
+ if (IS_HASH_TABLE(tb->common.status)) {
+ FloatDef f;
+ DbHashStats stats;
+ Eterm avg, std_dev_real, std_dev_exp;
+ Eterm* hp;
+
+ db_calc_stats_hash(&tb->hash, &stats);
+ hp = HAlloc(p, 1 + 6 + FLOAT_SIZE_OBJECT*3);
+ f.fd = stats.avg_chain_len;
+ avg = make_float(hp);
+ PUT_DOUBLE(f, hp);
+ hp += FLOAT_SIZE_OBJECT;
+
+ f.fd = stats.std_dev_chain_len;
+ std_dev_real = make_float(hp);
+ PUT_DOUBLE(f, hp);
+ hp += FLOAT_SIZE_OBJECT;
+
+ f.fd = stats.std_dev_expected;
+ std_dev_exp = make_float(hp);
+ PUT_DOUBLE(f, hp);
+ hp += FLOAT_SIZE_OBJECT;
+ ret = TUPLE6(hp, make_small(erts_smp_atomic_read(&tb->hash.nactive)),
+ avg, std_dev_real, std_dev_exp,
+ make_small(stats.min_chain_len),
+ make_small(stats.max_chain_len));
+ }
+ else {
+ ret = am_false;
+ }
+ }
+ return ret;
+}
+
+static void print_table(int to, void *to_arg, int show, DbTable* tb)
+{
+ erts_print(to, to_arg, "Table: %T\n", tb->common.id);
+ erts_print(to, to_arg, "Name: %T\n", tb->common.the_name);
+
+ tb->common.meth->db_print(to, to_arg, show, tb);
+
+ erts_print(to, to_arg, "Objects: %d\n", (int)erts_smp_atomic_read(&tb->common.nitems));
+ erts_print(to, to_arg, "Words: %bpu\n",
+ (Uint) ((erts_smp_atomic_read(&tb->common.memory_size)
+ + sizeof(Uint)
+ - 1)
+ / sizeof(Uint)));
+}
+
+void db_info(int to, void *to_arg, int show) /* Called by break handler */
+{
+ int i;
+ for (i=0; i < db_max_tabs; i++)
+ if (IS_SLOT_ALIVE(i)) {
+ erts_print(to, to_arg, "=ets:%T\n", meta_main_tab[i].u.tb->common.owner);
+ erts_print(to, to_arg, "Slot: %d\n", i);
+ print_table(to, to_arg, show, meta_main_tab[i].u.tb);
+ }
+#ifdef DEBUG
+ erts_print(to, to_arg, "=internal_ets: Process to table index\n");
+ print_table(to, to_arg, show, meta_pid_to_tab);
+ erts_print(to, to_arg, "=internal_ets: Process to fixation index\n");
+ print_table(to, to_arg, show, meta_pid_to_fixed_tab);
+#endif
+}
+
+Uint
+erts_get_ets_misc_mem_size(void)
+{
+ /* Memory not allocated in ets_alloc */
+ return (Uint) erts_smp_atomic_read(&erts_ets_misc_mem_size);
+}
+
+/* SMP Note: May only be used when system is locked */
+void
+erts_db_foreach_table(void (*func)(DbTable *, void *), void *arg)
+{
+ int i, j;
+ j = 0;
+ for(i = 0; (i < db_max_tabs && j < meta_main_tab_cnt); i++) {
+ if (IS_SLOT_ALIVE(i)) {
+ j++;
+ (*func)(meta_main_tab[i].u.tb, arg);
+ }
+ }
+ ASSERT(j == meta_main_tab_cnt);
+}
+
+/* SMP Note: May only be used when system is locked */
+void
+erts_db_foreach_offheap(DbTable *tb,
+ void (*func)(ErlOffHeap *, void *),
+ void *arg)
+{
+ tb->common.meth->db_foreach_offheap(tb, func, arg);
+}
+
+/*
+ * For testing of meta tables only.
+ *
+ * Given a name atom (as returned from ets:new/2), return a list of 'cnt'
+ * number of other names that will hash to the same bucket in meta_name_tab.
+ *
+ * WARNING: Will bloat the atom table!
+ */
+Eterm
+erts_ets_colliding_names(Process* p, Eterm name, Uint cnt)
+{
+ Eterm list = NIL;
+ Eterm* hp = HAlloc(p,cnt*2);
+ Uint index = atom_val(name) & meta_name_tab_mask;
+
+ while (cnt) {
+ if (index != atom_val(name)) {
+ while (index >= atom_table_size()) {
+ char tmp[20];
+ erts_snprintf(tmp, sizeof(tmp), "am%x", atom_table_size());
+ am_atom_put(tmp,strlen(tmp));
+ }
+ list = CONS(hp, make_atom(index), list);
+ hp += 2;
+ --cnt;
+ }
+ index += meta_name_tab_mask + 1;
+ }
+ return list;
+}
+
+
+#ifdef HARDDEBUG /* Here comes some debug functions */
+
+void db_check_tables(void)
+{
+#ifdef ERTS_SMP
+ return;
+#else
+ int i;
+
+ for (i = 0; i < db_max_tabs; i++) {
+ if (IS_SLOT_ALIVE(i)) {
+ DbTable* tb = meta_main_tab[i].t;
+ tb->common.meth->db_check_table(tb);
+ }
+ }
+#endif
+}
+
+#endif /* HARDDEBUG */