diff options
Diffstat (limited to 'erts/emulator/beam/erl_db.c')
-rw-r--r-- | erts/emulator/beam/erl_db.c | 2871 |
1 files changed, 1657 insertions, 1214 deletions
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c index 23c8a5bae9..c009a3bde8 100644 --- a/erts/emulator/beam/erl_db.c +++ b/erts/emulator/beam/erl_db.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2017. All Rights Reserved. + * Copyright Ericsson AB 1996-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,9 +19,7 @@ */ /* - * This file contains the bif interface functions and - * the handling of the "meta tables" ie the tables of - * db tables. + * This file contains the 'ets' bif interface functions. */ /* @@ -43,14 +41,43 @@ #include "erl_db.h" #include "bif.h" #include "big.h" +#include "erl_binary.h" -erts_smp_atomic_t erts_ets_misc_mem_size; +erts_atomic_t erts_ets_misc_mem_size; /* ** Utility macros */ +#define DB_BIF_GET_TABLE(TB, WHAT, KIND, BIF_IX) \ + DB_GET_TABLE(TB, BIF_ARG_1, WHAT, KIND, BIF_IX, NULL, BIF_P) + +#define DB_TRAP_GET_TABLE(TB, TID, WHAT, KIND, BIF_EXP) \ + DB_GET_TABLE(TB, TID, WHAT, KIND, 0, BIF_EXP, BIF_P) + +#define DB_GET_TABLE(TB, TID, WHAT, KIND, BIF_IX, BIF_EXP, PROC) \ +do { \ + Uint freason__; \ + if (!(TB = db_get_table(PROC, TID, WHAT, KIND, &freason__))) { \ + return db_bif_fail(PROC, freason__, BIF_IX, BIF_EXP); \ + } \ +}while(0) + +static BIF_RETTYPE db_bif_fail(Process* p, Uint freason, + Uint bif_ix, Export* bif_exp) +{ + if (freason == TRAP) { + if (!bif_exp) + bif_exp = bif_export[bif_ix]; + p->arity = bif_exp->info.mfa.arity; + p->i = (BeamInstr*) bif_exp->addressv[erts_active_code_ix()]; + } + p->freason = freason; + return THE_NON_VALUE; +} + + /* Get a key from any table structure and a tagged object */ #define TERM_GETKEY(tb, obj) db_getkey((tb)->common.keypos, (obj)) @@ -62,85 +89,244 @@ enum DbIterSafety { ITER_SAFE_LOCKED, /* Safe while table is locked, not between trap calls */ ITER_SAFE /* No need to fixate at all */ }; -#ifdef ERTS_SMP # define ITERATION_SAFETY(Proc,Tab) \ ((IS_TREE_TABLE((Tab)->common.status) || ONLY_WRITER(Proc,Tab)) ? ITER_SAFE \ : (((Tab)->common.status & DB_FINE_LOCKED) ? ITER_UNSAFE : ITER_SAFE_LOCKED)) -#else -# define ITERATION_SAFETY(Proc,Tab) \ - ((IS_TREE_TABLE((Tab)->common.status) || ONLY_WRITER(Proc,Tab)) \ - ? ITER_SAFE : ITER_SAFE_LOCKED) -#endif #define DID_TRAP(P,Ret) (!is_value(Ret) && ((P)->freason == TRAP)) +/* + * "fixed_tabs": list of all fixed tables for a process + */ +#ifdef DEBUG +static int fixed_tabs_find(DbFixation* first, DbFixation* fix); +#endif -/* -** The main meta table, containing all ets tables. -*/ -#ifdef ERTS_SMP +static void fixed_tabs_insert(Process* p, DbFixation* fix) +{ + DbFixation* first = erts_psd_get(p, ERTS_PSD_ETS_FIXED_TABLES); + + if (!first) { + fix->tabs.next = fix->tabs.prev = fix; + erts_psd_set(p, ERTS_PSD_ETS_FIXED_TABLES, fix); + } + else { + ASSERT(!fixed_tabs_find(first, fix)); + fix->tabs.prev = first->tabs.prev; + fix->tabs.next = first; + fix->tabs.prev->tabs.next = fix; + first->tabs.prev = fix; + } +} + +static void fixed_tabs_delete(Process *p, DbFixation* fix) +{ + if (fix->tabs.next == fix) { + DbFixation* old; + ASSERT(fix->tabs.prev == fix); + old = erts_psd_set(p, ERTS_PSD_ETS_FIXED_TABLES, NULL); + ASSERT(old == fix); (void)old; + } + else { + DbFixation *first = (DbFixation*) erts_psd_get(p, ERTS_PSD_ETS_FIXED_TABLES); -#define ERTS_META_MAIN_TAB_LOCK_TAB_BITS 8 -#define ERTS_META_MAIN_TAB_LOCK_TAB_SIZE (1 << ERTS_META_MAIN_TAB_LOCK_TAB_BITS) -#define ERTS_META_MAIN_TAB_LOCK_TAB_MASK (ERTS_META_MAIN_TAB_LOCK_TAB_SIZE - 1) + ASSERT(fixed_tabs_find(first, fix)); + fix->tabs.prev->tabs.next = fix->tabs.next; + fix->tabs.next->tabs.prev = fix->tabs.prev; -typedef union { - erts_smp_rwmtx_t rwmtx; - byte cache_line_align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE( - sizeof(erts_smp_rwmtx_t))]; -} erts_meta_main_tab_lock_t; + if (fix == first) + erts_psd_set(p, ERTS_PSD_ETS_FIXED_TABLES, fix->tabs.next); + } +} -static erts_meta_main_tab_lock_t *meta_main_tab_locks; +#ifdef DEBUG +static int fixed_tabs_find(DbFixation* first, DbFixation* fix) +{ + DbFixation* p; + if (!first) { + first = (DbFixation*) erts_psd_get(fix->procs.p, ERTS_PSD_ETS_FIXED_TABLES); + } + p = first; + do { + if (p == fix) + return 1; + ASSERT(p->procs.p == fix->procs.p); + ASSERT(p->tabs.next->tabs.prev == p); + p = p->tabs.next; + } while (p != first); + return 0; +} #endif -static struct { - union { - DbTable *tb; /* Only directly readable if slot is ALIVE */ - UWord next_free; /* (index<<2)|1 if slot is FREE */ - }u; -} *meta_main_tab; -/* A slot in meta_main_tab can have three states: - * FREE : Free to use for new table. Part of linked free-list. - * ALIVE: Contains a table - * DEAD : Contains a table that is being removed. + +/* + * fixing_procs: tree of all processes fixating a table */ -#define IS_SLOT_FREE(i) (meta_main_tab[(i)].u.next_free & 1) -#define IS_SLOT_DEAD(i) (meta_main_tab[(i)].u.next_free & 2) -#define IS_SLOT_ALIVE(i) (!(meta_main_tab[(i)].u.next_free & (1|2))) -#define GET_NEXT_FREE_SLOT(i) (meta_main_tab[(i)].u.next_free >> 2) -#define SET_NEXT_FREE_SLOT(i,next) (meta_main_tab[(i)].u.next_free = ((next)<<2)|1) -#define MARK_SLOT_DEAD(i) (meta_main_tab[(i)].u.next_free |= 2) -#define GET_ANY_SLOT_TAB(i) ((DbTable*)(meta_main_tab[(i)].u.next_free & ~(1|2))) /* dead or alive */ - -static ERTS_INLINE erts_smp_rwmtx_t * -get_meta_main_tab_lock(unsigned slot) -{ -#ifdef ERTS_SMP - return &meta_main_tab_locks[slot & ERTS_META_MAIN_TAB_LOCK_TAB_MASK].rwmtx; +#define ERTS_RBT_PREFIX fixing_procs +#define ERTS_RBT_T DbFixation +#define ERTS_RBT_KEY_T Process* +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->procs.parent = NULL; \ + (T)->procs.right = NULL; \ + (T)->procs.left = NULL; \ + } while (0) +#define ERTS_RBT_IS_RED(T) ((T)->procs.is_red) +#define ERTS_RBT_SET_RED(T) ((T)->procs.is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!(T)->procs.is_red) +#define ERTS_RBT_SET_BLACK(T) ((T)->procs.is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->procs.is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->procs.is_red = (F)) +#define ERTS_RBT_GET_PARENT(T) ((T)->procs.parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->procs.parent = (P)) +#define ERTS_RBT_GET_RIGHT(T) ((T)->procs.right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->procs.right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->procs.left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->procs.left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->procs.p) +#define ERTS_RBT_IS_LT(KX, KY) ((KX) < (KY)) +#define ERTS_RBT_IS_EQ(KX, KY) ((KX) == (KY)) + +#define ERTS_RBT_WANT_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_WANT_FOREACH +#define ERTS_RBT_WANT_FOREACH_DESTROY +#ifdef DEBUG +# define ERTS_RBT_WANT_LOOKUP +#endif +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +#ifdef HARDDEBUG +# error Do something useful with CHECK_TABLES maybe #else - return NULL; +# define CHECK_TABLES() #endif + + +static void +send_ets_transfer_message(Process *c_p, Process *proc, + ErtsProcLocks *locks, + DbTable *tb, Eterm heir_data); +static void schedule_free_dbtable(DbTable* tb); +static void delete_sched_table(Process *c_p, DbTable *tb); + +static void table_dec_refc(DbTable *tb, erts_aint_t min_val) +{ + if (erts_refc_dectest(&tb->common.refc, min_val) == 0) + schedule_free_dbtable(tb); } -static erts_smp_spinlock_t meta_main_tab_main_lock; -static Uint meta_main_tab_first_free; /* Index of first free slot */ -static int meta_main_tab_cnt; /* Number of active tables */ -static int meta_main_tab_top; /* Highest ever used slot + 1 */ -static Uint meta_main_tab_slot_mask; /* The slot index part of an unnamed table id */ -static Uint meta_main_tab_seq_incr; -static Uint meta_main_tab_seq_cnt = 0; /* To give unique(-ish) table identifiers */ +static int +db_table_tid_destructor(Binary *unused) +{ + return 1; +} + +static ERTS_INLINE void +make_btid(DbTable *tb) +{ + Binary *btid = erts_create_magic_indirection(db_table_tid_destructor); + erts_atomic_t *tbref = erts_binary_to_magic_indirection(btid); + erts_atomic_init_nob(tbref, (erts_aint_t) tb); + tb->common.btid = btid; + /* + * Table and magic indirection refer eachother, + * and table is refered once by being alive... + */ + erts_refc_init(&tb->common.refc, 2); + erts_refc_inc(&btid->intern.refc, 1); +} + +static ERTS_INLINE DbTable* btid2tab(Binary* btid) +{ + erts_atomic_t *tbref = erts_binary_to_magic_indirection(btid); + return (DbTable *) erts_atomic_read_nob(tbref); +} + +static DbTable * +tid2tab(Eterm tid) +{ + DbTable *tb; + Binary *btid; + erts_atomic_t *tbref; + if (!is_internal_magic_ref(tid)) + return NULL; + + btid = erts_magic_ref2bin(tid); + if (ERTS_MAGIC_BIN_DESTRUCTOR(btid) != db_table_tid_destructor) + return NULL; + + tbref = erts_binary_to_magic_indirection(btid); + tb = (DbTable *) erts_atomic_read_nob(tbref); + + ASSERT(!tb || tb->common.btid == btid); + + return tb; +} + +static ERTS_INLINE int +is_table_alive(DbTable *tb) +{ + erts_atomic_t *tbref; + DbTable *rtb; + + tbref = erts_binary_to_magic_indirection(tb->common.btid); + rtb = (DbTable *) erts_atomic_read_nob(tbref); + + ASSERT(!rtb || rtb == tb); + + return !!rtb; +} + +static ERTS_INLINE int +is_table_named(DbTable *tb) +{ + return tb->common.type & DB_NAMED_TABLE; +} + + +static ERTS_INLINE void +tid_clear(Process *c_p, DbTable *tb) +{ + DbTable *rtb; + Binary *btid = tb->common.btid; + erts_atomic_t *tbref = erts_binary_to_magic_indirection(btid); + rtb = (DbTable *) erts_atomic_xchg_nob(tbref, (erts_aint_t) NULL); + ASSERT(!rtb || tb == rtb); + if (rtb) { + table_dec_refc(tb, 1); + delete_sched_table(c_p, tb); + } +} + +static ERTS_INLINE Eterm +make_tid(Process *c_p, DbTable *tb) +{ + Eterm *hp = HAlloc(c_p, ERTS_MAGIC_REF_THING_SIZE); + return erts_mk_magic_ref(&hp, &c_p->off_heap, tb->common.btid); +} + +Eterm +erts_db_make_tid(Process *c_p, DbTableCommon *tb) +{ + return make_tid(c_p, (DbTable*)tb); +} + + /* ** The meta hash table of all NAMED ets tables */ -#ifdef ERTS_SMP -# define META_NAME_TAB_LOCK_CNT 16 +# define META_NAME_TAB_LOCK_CNT 256 union { - erts_smp_rwmtx_t lck; - byte _cache_line_alignment[64]; + erts_rwmtx_t lck; + byte align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_rwmtx_t))]; }meta_name_tab_rwlocks[META_NAME_TAB_LOCK_CNT]; -#endif static struct meta_name_tab_entry { union { Eterm name_atom; @@ -156,13 +342,11 @@ static unsigned meta_name_tab_mask; static ERTS_INLINE struct meta_name_tab_entry* meta_name_tab_bucket(Eterm name, - erts_smp_rwmtx_t** lockp) + erts_rwmtx_t** lockp) { unsigned bix = atom_val(name) & meta_name_tab_mask; struct meta_name_tab_entry* bucket = &meta_name_tab[bix]; -#ifdef ERTS_SMP *lockp = &meta_name_tab_rwlocks[bix % META_NAME_TAB_LOCK_CNT].lck; -#endif return bucket; } @@ -170,8 +354,7 @@ struct meta_name_tab_entry* meta_name_tab_bucket(Eterm name, typedef enum { LCK_READ=1, /* read only access */ LCK_WRITE=2, /* exclusive table write access */ - LCK_WRITE_REC=3, /* record write access */ - LCK_NONE=4 + LCK_WRITE_REC=3 /* record write access */ } db_lock_kind_t; extern DbTableMethod db_hash; @@ -181,11 +364,6 @@ int user_requested_db_max_tabs; int erts_ets_realloc_always_moves; int erts_ets_always_compress; static int db_max_tabs; -static DbTable *meta_pid_to_tab; /* Pid mapped to owned tables */ -static DbTable *meta_pid_to_fixed_tab; /* Pid mapped to fixed tables */ -static Eterm ms_delete_all; -static Eterm ms_delete_all_buff[8]; /* To compare with for deletion - of all objects */ /* ** Forward decls, static functions @@ -195,22 +373,21 @@ static void fix_table_locked(Process* p, DbTable* tb); static void unfix_table_locked(Process* p, DbTable* tb, db_lock_kind_t* kind); static void set_heir(Process* me, DbTable* tb, Eterm heir, UWord heir_data); static void free_heir_data(DbTable*); -static void free_fixations_locked(DbTable *tb); +static SWord free_fixations_locked(Process* p, DbTable *tb); -static int free_table_cont(Process *p, - DbTable *tb, - int first, - int clean_meta_tab); +static void delete_all_objects_continue(Process* p, DbTable* tb); +static SWord free_table_continue(Process *p, DbTable *tb, SWord reds); static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb); -static BIF_RETTYPE ets_select_delete_1(BIF_ALIST_1); +static BIF_RETTYPE ets_select_delete_trap_1(BIF_ALIST_1); static BIF_RETTYPE ets_select_count_1(BIF_ALIST_1); +static BIF_RETTYPE ets_select_replace_1(BIF_ALIST_1); static BIF_RETTYPE ets_select_trap_1(BIF_ALIST_1); static BIF_RETTYPE ets_delete_trap(BIF_ALIST_1); static Eterm table_info(Process* p, DbTable* tb, Eterm What); -static BIF_RETTYPE ets_select1(Process* p, Eterm arg1); -static BIF_RETTYPE ets_select2(Process* p, Eterm arg1, Eterm arg2); -static BIF_RETTYPE ets_select3(Process* p, Eterm arg1, Eterm arg2, Eterm arg3); +static BIF_RETTYPE ets_select1(Process* p, int bif_ix, Eterm arg1); +static BIF_RETTYPE ets_select2(Process* p, DbTable*, Eterm tid, Eterm ms); +static BIF_RETTYPE ets_select3(Process* p, DbTable*, Eterm tid, Eterm ms, Sint chunk_size); /* @@ -218,6 +395,7 @@ static BIF_RETTYPE ets_select3(Process* p, Eterm arg1, Eterm arg2, Eterm arg3); */ Export ets_select_delete_continue_exp; Export ets_select_count_continue_exp; +Export ets_select_replace_continue_exp; Export ets_select_continue_exp; /* @@ -230,28 +408,19 @@ free_dbtable(void *vtb) { DbTable *tb = (DbTable *) vtb; #ifdef HARDDEBUG - if (erts_smp_atomic_read_nob(&tb->common.memory_size) != sizeof(DbTable)) { + if (erts_atomic_read_nob(&tb->common.memory_size) != sizeof(DbTable)) { erts_fprintf(stderr, "ets: free_dbtable memory remain=%ld fix=%x\n", - erts_smp_atomic_read_nob(&tb->common.memory_size)-sizeof(DbTable), + erts_atomic_read_nob(&tb->common.memory_size)-sizeof(DbTable), tb->common.fixations); } - erts_fprintf(stderr, "ets: free_dbtable(%T) deleted!!!\r\n", - tb->common.id); - - erts_fprintf(stderr, "ets: free_dbtable: meta_pid_to_tab common.memory_size = %ld\n", - erts_smp_atomic_read_nob(&meta_pid_to_tab->common.memory_size)); - print_table(ERTS_PRINT_STDOUT, NULL, 1, meta_pid_to_tab); - - - erts_fprintf(stderr, "ets: free_dbtable: meta_pid_to_fixed_tab common.memory_size = %ld\n", - erts_smp_atomic_read_nob(&meta_pid_to_fixed_tab->common.memory_size)); - print_table(ERTS_PRINT_STDOUT, NULL, 1, meta_pid_to_fixed_tab); -#endif -#ifdef ERTS_SMP - erts_smp_rwmtx_destroy(&tb->common.rwlock); - erts_smp_mtx_destroy(&tb->common.fixlock); #endif + erts_rwmtx_destroy(&tb->common.rwlock); + erts_mtx_destroy(&tb->common.fixlock); ASSERT(is_immed(tb->common.heir_data)); + + if (tb->common.btid) + erts_bin_release(tb->common.btid); + erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable)); } @@ -266,41 +435,184 @@ static void schedule_free_dbtable(DbTable* tb) * Caller is *not* allowed to access the specialized part * (hash or tree) of *tb after this function has returned. */ - ASSERT(erts_smp_refc_read(&tb->common.ref, 0) == 0); + ASSERT(erts_refc_read(&tb->common.refc, 0) == 0); + ASSERT(erts_refc_read(&tb->common.fix_count, 0) == 0); erts_schedule_thr_prgr_later_cleanup_op(free_dbtable, (void *) tb, &tb->release.data, sizeof(DbTable)); } -static ERTS_INLINE void db_init_lock(DbTable* tb, int use_frequent_read_lock, - char *rwname, char* fixname) +static ERTS_INLINE void +save_sched_table(Process *c_p, DbTable *tb) { -#ifdef ERTS_SMP - erts_smp_rwmtx_opt_t rwmtx_opt = ERTS_SMP_RWMTX_OPT_DEFAULT_INITER; + ErtsSchedulerData *esdp = erts_proc_sched_data(c_p); + DbTable *first; + + ASSERT(esdp); + erts_atomic_inc_nob(&esdp->ets_tables.count); + erts_refc_inc(&tb->common.refc, 1); + + first = esdp->ets_tables.clist; + if (!first) { + tb->common.all.next = tb->common.all.prev = tb; + esdp->ets_tables.clist = tb; + } + else { + tb->common.all.prev = first->common.all.prev; + tb->common.all.next = first; + tb->common.all.prev->common.all.next = tb; + first->common.all.prev = tb; + } +} + +static ERTS_INLINE void +remove_sched_table(ErtsSchedulerData *esdp, DbTable *tb) +{ + ErtsEtsAllYieldData *eaydp; + ASSERT(esdp); + ASSERT(erts_get_ref_numbers_thr_id(ERTS_MAGIC_BIN_REFN(tb->common.btid)) + == (Uint32) esdp->no); + + ASSERT(erts_atomic_read_nob(&esdp->ets_tables.count) > 0); + erts_atomic_dec_nob(&esdp->ets_tables.count); + + eaydp = ERTS_SCHED_AUX_YIELD_DATA(esdp, ets_all); + if (eaydp->ongoing) { + /* ets:all() op process list from last to first... */ + if (eaydp->tab == tb) { + if (eaydp->tab == esdp->ets_tables.clist) + eaydp->tab = NULL; + else + eaydp->tab = tb->common.all.prev; + } + } + + if (tb->common.all.next == tb) { + ASSERT(tb->common.all.prev == tb); + ASSERT(esdp->ets_tables.clist == tb); + esdp->ets_tables.clist = NULL; + } + else { +#ifdef DEBUG + DbTable *tmp = esdp->ets_tables.clist; + do { + if (tmp == tb) break; + tmp = tmp->common.all.next; + } while (tmp != esdp->ets_tables.clist); + ASSERT(tmp == tb); +#endif + tb->common.all.prev->common.all.next = tb->common.all.next; + tb->common.all.next->common.all.prev = tb->common.all.prev; + + if (esdp->ets_tables.clist == tb) + esdp->ets_tables.clist = tb->common.all.next; + + } + + table_dec_refc(tb, 0); +} + +static void +scheduled_remove_sched_table(void *vtb) +{ + remove_sched_table(erts_get_scheduler_data(), (DbTable *) vtb); +} + +static void +delete_sched_table(Process *c_p, DbTable *tb) +{ + ErtsSchedulerData *esdp = erts_proc_sched_data(c_p); + Uint32 sid; + + ASSERT(esdp); + + ASSERT(tb->common.btid); + sid = erts_get_ref_numbers_thr_id(ERTS_MAGIC_BIN_REFN(tb->common.btid)); + ASSERT(1 <= sid && sid <= erts_no_schedulers); + if (sid == (Uint32) esdp->no) + remove_sched_table(esdp, tb); + else + erts_schedule_misc_aux_work((int) sid, scheduled_remove_sched_table, tb); +} + +static ERTS_INLINE void +save_owned_table(Process *c_p, DbTable *tb) +{ + DbTable *first; + + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + + first = (DbTable*) erts_psd_get(c_p, ERTS_PSD_ETS_OWNED_TABLES); + + erts_refc_inc(&tb->common.refc, 1); + + if (!first) { + tb->common.owned.next = tb->common.owned.prev = tb; + erts_psd_set(c_p, ERTS_PSD_ETS_OWNED_TABLES, tb); + } + else { + tb->common.owned.prev = first->common.owned.prev; + tb->common.owned.next = first; + tb->common.owned.prev->common.owned.next = tb; + first->common.owned.prev = tb; + } + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); +} + +static ERTS_INLINE void +delete_owned_table(Process *p, DbTable *tb) +{ + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (tb->common.owned.next == tb) { + DbTable* old; + ASSERT(tb->common.owned.prev == tb); + old = erts_psd_set(p, ERTS_PSD_ETS_OWNED_TABLES, NULL); + ASSERT(old == tb); (void)old; + } + else { + DbTable *first = (DbTable*) erts_psd_get(p, ERTS_PSD_ETS_OWNED_TABLES); +#ifdef DEBUG + DbTable *tmp = first; + do { + if (tmp == tb) break; + tmp = tmp->common.owned.next; + } while (tmp != first); + ASSERT(tmp == tb); +#endif + tb->common.owned.prev->common.owned.next = tb->common.owned.next; + tb->common.owned.next->common.owned.prev = tb->common.owned.prev; + + if (tb == first) + erts_psd_set(p, ERTS_PSD_ETS_OWNED_TABLES, tb->common.owned.next); + } + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + + table_dec_refc(tb, 1); +} + +static ERTS_INLINE void db_init_lock(DbTable* tb, int use_frequent_read_lock) +{ + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; if (use_frequent_read_lock) - rwmtx_opt.type = ERTS_SMP_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; if (erts_ets_rwmtx_spin_count >= 0) rwmtx_opt.main_spincount = erts_ets_rwmtx_spin_count; -#endif -#ifdef ERTS_SMP - erts_smp_rwmtx_init_opt_x(&tb->common.rwlock, &rwmtx_opt, - rwname, tb->common.the_name); - erts_smp_mtx_init_x(&tb->common.fixlock, fixname, tb->common.the_name); + erts_rwmtx_init_opt(&tb->common.rwlock, &rwmtx_opt, "db_tab", + tb->common.the_name, ERTS_LOCK_FLAGS_CATEGORY_DB); + erts_mtx_init(&tb->common.fixlock, "db_tab_fix", + tb->common.the_name, ERTS_LOCK_FLAGS_CATEGORY_DB); tb->common.is_thread_safe = !(tb->common.status & DB_FINE_LOCKED); -#endif } static ERTS_INLINE void db_lock(DbTable* tb, db_lock_kind_t kind) { -#ifdef ERTS_SMP - ASSERT(tb != meta_pid_to_tab && tb != meta_pid_to_fixed_tab); if (tb->common.type & DB_FINE_LOCKED) { if (kind == LCK_WRITE) { - erts_smp_rwmtx_rwlock(&tb->common.rwlock); + erts_rwmtx_rwlock(&tb->common.rwlock); tb->common.is_thread_safe = 1; } else { - erts_smp_rwmtx_rlock(&tb->common.rwlock); + erts_rwmtx_rlock(&tb->common.rwlock); ASSERT(!tb->common.is_thread_safe); } } @@ -309,14 +621,13 @@ static ERTS_INLINE void db_lock(DbTable* tb, db_lock_kind_t kind) switch (kind) { case LCK_WRITE: case LCK_WRITE_REC: - erts_smp_rwmtx_rwlock(&tb->common.rwlock); + erts_rwmtx_rwlock(&tb->common.rwlock); break; default: - erts_smp_rwmtx_rlock(&tb->common.rwlock); + erts_rwmtx_rlock(&tb->common.rwlock); } ASSERT(tb->common.is_thread_safe); } -#endif } static ERTS_INLINE void db_unlock(DbTable* tb, db_lock_kind_t kind) @@ -326,18 +637,15 @@ static ERTS_INLINE void db_unlock(DbTable* tb, db_lock_kind_t kind) * DbTable structure. That is, ONLY the SMP case is allowed * to follow the tb pointer! */ -#ifdef ERTS_SMP - ASSERT(tb != meta_pid_to_tab && tb != meta_pid_to_fixed_tab); - if (tb->common.type & DB_FINE_LOCKED) { if (kind == LCK_WRITE) { ASSERT(tb->common.is_thread_safe); tb->common.is_thread_safe = 0; - erts_smp_rwmtx_rwunlock(&tb->common.rwlock); + erts_rwmtx_rwunlock(&tb->common.rwlock); } else { ASSERT(!tb->common.is_thread_safe); - erts_smp_rwmtx_runlock(&tb->common.rwlock); + erts_rwmtx_runlock(&tb->common.rwlock); } } else { @@ -345,27 +653,39 @@ static ERTS_INLINE void db_unlock(DbTable* tb, db_lock_kind_t kind) switch (kind) { case LCK_WRITE: case LCK_WRITE_REC: - erts_smp_rwmtx_rwunlock(&tb->common.rwlock); + erts_rwmtx_rwunlock(&tb->common.rwlock); break; default: - erts_smp_rwmtx_runlock(&tb->common.rwlock); + erts_rwmtx_runlock(&tb->common.rwlock); } } -#endif } - -static ERTS_INLINE void db_meta_lock(DbTable* tb, db_lock_kind_t kind) +static ERTS_INLINE int db_is_exclusive(DbTable* tb, db_lock_kind_t kind) { - ASSERT(tb == meta_pid_to_tab || tb == meta_pid_to_fixed_tab); - ASSERT(kind != LCK_WRITE); - /* As long as we only lock for READ we don't have to lock at all. */ + return kind != LCK_READ && tb->common.is_thread_safe; } -static ERTS_INLINE void db_meta_unlock(DbTable* tb, db_lock_kind_t kind) +static DbTable* handle_lacking_permission(Process* p, DbTable* tb, + db_lock_kind_t kind, + Uint* freason_p) { - ASSERT(tb == meta_pid_to_tab || tb == meta_pid_to_fixed_tab); - ASSERT(kind != LCK_WRITE); + if (tb->common.status & DB_BUSY) { + if (!db_is_exclusive(tb, kind)) { + db_unlock(tb, kind); + db_lock(tb, LCK_WRITE); + } + delete_all_objects_continue(p, tb); + db_unlock(tb, LCK_WRITE); + tb = NULL; + *freason_p = TRAP; + } + else if (p->common.id != tb->common.owner) { + db_unlock(tb, kind); + tb = NULL; + *freason_p = BADARG; + } + return tb; } static ERTS_INLINE @@ -373,10 +693,10 @@ DbTable* db_get_table_aux(Process *p, Eterm id, int what, db_lock_kind_t kind, - int meta_already_locked) + int meta_already_locked, + Uint* freason_p) { - DbTable *tb = NULL; - erts_smp_rwmtx_t *mtl = NULL; + DbTable *tb; /* * IMPORTANT: Only scheduler threads are allowed @@ -385,32 +705,16 @@ DbTable* db_get_table_aux(Process *p, */ ASSERT(erts_get_scheduler_data()); - if (is_small(id)) { - Uint slot = unsigned_val(id) & meta_main_tab_slot_mask; - if (!meta_already_locked) { - mtl = get_meta_main_tab_lock(slot); - erts_smp_rwmtx_rlock(mtl); - } -#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) - else { - erts_smp_rwmtx_t *test_mtl = get_meta_main_tab_lock(slot); - ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(test_mtl) - || erts_lc_rwmtx_is_rwlocked(test_mtl)); - } -#endif - if (slot < db_max_tabs && IS_SLOT_ALIVE(slot)) - tb = meta_main_tab[slot].u.tb; - } - else if (is_atom(id)) { + if (is_atom(id)) { + erts_rwmtx_t *mtl; struct meta_name_tab_entry* bucket = meta_name_tab_bucket(id,&mtl); if (!meta_already_locked) - erts_smp_rwmtx_rlock(mtl); + erts_rwmtx_rlock(mtl); else{ - ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(mtl) + ERTS_LC_ASSERT(erts_lc_rwmtx_is_rlocked(mtl) || erts_lc_rwmtx_is_rwlocked(mtl)); - mtl = NULL; } - + tb = NULL; if (bucket->pu.tb != NULL) { if (is_atom(bucket->u.name_atom)) { /* single */ if (bucket->u.name_atom == id) @@ -427,18 +731,29 @@ DbTable* db_get_table_aux(Process *p, } } } + if (!meta_already_locked) + erts_rwmtx_runlock(mtl); } + else + tb = tid2tab(id); + if (tb) { db_lock(tb, kind); - if (tb->common.id != id - || ((tb->common.status & what) == 0 - && p->common.id != tb->common.owner)) { - db_unlock(tb, kind); - tb = NULL; - } +#ifdef ETS_DBG_FORCE_TRAP + if (erts_atomic_read_nob(&tb->common.dbg_force_trap) && + erts_atomic_add_read_nob(&tb->common.dbg_force_trap, 2) & 2) { + db_unlock(tb, kind); + tb = NULL; + *freason_p = TRAP; + } + else +#endif + if (ERTS_UNLIKELY(!(tb->common.status & what))) + tb = handle_lacking_permission(p, tb, kind, freason_p); } - if (mtl) - erts_smp_rwmtx_runlock(mtl); + else + *freason_p = BADARG; + return tb; } @@ -446,32 +761,21 @@ static ERTS_INLINE DbTable* db_get_table(Process *p, Eterm id, int what, - db_lock_kind_t kind) -{ - return db_get_table_aux(p, id, what, kind, 0); -} - -/* Requires meta_main_tab_locks[slot] locked. -*/ -static ERTS_INLINE void free_slot(int slot) + db_lock_kind_t kind, + Uint* freason_p) { - ASSERT(!IS_SLOT_FREE(slot)); - erts_smp_spin_lock(&meta_main_tab_main_lock); - SET_NEXT_FREE_SLOT(slot,meta_main_tab_first_free); - meta_main_tab_first_free = slot; - meta_main_tab_cnt--; - erts_smp_spin_unlock(&meta_main_tab_main_lock); + return db_get_table_aux(p, id, what, kind, 0, freason_p); } static int insert_named_tab(Eterm name_atom, DbTable* tb, int have_lock) { int ret = 0; - erts_smp_rwmtx_t* rwlock; + erts_rwmtx_t* rwlock; struct meta_name_tab_entry* new_entry; struct meta_name_tab_entry* bucket = meta_name_tab_bucket(name_atom, &rwlock); if (!have_lock) - erts_smp_rwmtx_rwlock(rwlock); + erts_rwmtx_rwlock(rwlock); if (bucket->pu.tb == NULL) { /* empty */ new_entry = bucket; @@ -519,26 +823,25 @@ static int insert_named_tab(Eterm name_atom, DbTable* tb, int have_lock) done: if (!have_lock) - erts_smp_rwmtx_rwunlock(rwlock); + erts_rwmtx_rwunlock(rwlock); return ret; } static int remove_named_tab(DbTable *tb, int have_lock) { int ret = 0; - erts_smp_rwmtx_t* rwlock; - Eterm name_atom = tb->common.id; + erts_rwmtx_t* rwlock; + Eterm name_atom = tb->common.the_name; struct meta_name_tab_entry* bucket = meta_name_tab_bucket(name_atom, &rwlock); -#ifdef ERTS_SMP - if (!have_lock && erts_smp_rwmtx_tryrwlock(rwlock) == EBUSY) { + ASSERT(is_table_named(tb)); + if (!have_lock && erts_rwmtx_tryrwlock(rwlock) == EBUSY) { db_unlock(tb, LCK_WRITE); - erts_smp_rwmtx_rwlock(rwlock); + erts_rwmtx_rwlock(rwlock); db_lock(tb, LCK_WRITE); } -#endif - ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(rwlock)); + ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(rwlock)); if (bucket->pu.tb == NULL) { goto done; @@ -591,7 +894,7 @@ static int remove_named_tab(DbTable *tb, int have_lock) done: if (!have_lock) - erts_smp_rwmtx_rwunlock(rwlock); + erts_rwmtx_rwunlock(rwlock); return ret; } @@ -600,11 +903,11 @@ done: */ static ERTS_INLINE void local_fix_table(DbTable* tb) { - erts_smp_refc_inc(&tb->common.ref, 1); + erts_refc_inc(&tb->common.fix_count, 1); } static ERTS_INLINE void local_unfix_table(DbTable* tb) { - if (erts_smp_refc_dectest(&tb->common.ref, 0) == 0) { + if (erts_refc_dectest(&tb->common.fix_count, 0) == 0) { ASSERT(IS_HASH_TABLE(tb->common.status)); db_unfix_table_hash(&(tb->hash)); } @@ -627,9 +930,7 @@ BIF_RETTYPE ets_safe_fixtable_2(BIF_ALIST_2) #endif kind = (BIF_ARG_2 == am_true) ? LCK_READ : LCK_WRITE_REC; - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, kind)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, kind, BIF_ets_safe_fixtable_2); if (BIF_ARG_2 == am_true) { fix_table_locked(BIF_P, tb); @@ -659,11 +960,7 @@ BIF_RETTYPE ets_first_1(BIF_ALIST_1) CHECK_TABLES(); - tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ); - - if (!tb) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_first_1); cret = tb->common.meth->db_first(BIF_P, tb, &ret); @@ -686,11 +983,7 @@ BIF_RETTYPE ets_next_2(BIF_ALIST_2) CHECK_TABLES(); - tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ); - - if (!tb) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_next_2); cret = tb->common.meth->db_next(BIF_P, tb, BIF_ARG_2, &ret); @@ -713,11 +1006,7 @@ BIF_RETTYPE ets_last_1(BIF_ALIST_1) CHECK_TABLES(); - tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ); - - if (!tb) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_last_1); cret = tb->common.meth->db_last(BIF_P, tb, &ret); @@ -740,11 +1029,7 @@ BIF_RETTYPE ets_prev_2(BIF_ALIST_2) CHECK_TABLES(); - tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ); - - if (!tb) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_prev_2); cret = tb->common.meth->db_prev(BIF_P,tb,BIF_ARG_2,&ret); @@ -762,21 +1047,15 @@ BIF_RETTYPE ets_prev_2(BIF_ALIST_2) BIF_RETTYPE ets_take_2(BIF_ALIST_2) { DbTable* tb; -#ifdef DEBUG int cret; -#endif Eterm ret; CHECK_TABLES(); - tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC); - if (!tb) { - BIF_ERROR(BIF_P, BADARG); - } -#ifdef DEBUG - cret = -#endif - tb->common.meth->db_take(BIF_P, tb, BIF_ARG_2, &ret); - ASSERT(cret == DB_ERROR_NONE); + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_take_2); + + cret = tb->common.meth->db_take(BIF_P, tb, BIF_ARG_2, &ret); + + ASSERT(cret == DB_ERROR_NONE); (void)cret; db_unlock(tb, LCK_WRITE_REC); BIF_RET(ret); } @@ -794,9 +1073,8 @@ BIF_RETTYPE ets_update_element_3(BIF_ALIST_3) DeclareTmpHeap(cell,2,BIF_P); DbUpdateHandle handle; - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_update_element_3); + UseTmpHeap(2,BIF_P); if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) { goto bail_out; @@ -867,9 +1145,9 @@ bail_out: } static BIF_RETTYPE -do_update_counter(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, Eterm arg4) +do_update_counter(Process *p, DbTable* tb, + Eterm arg2, Eterm arg3, Eterm arg4) { - DbTable* tb; int cret = DB_ERROR_BADITEM; Eterm upop_list; int list_size; @@ -885,10 +1163,6 @@ do_update_counter(Process *p, Eterm arg1, Eterm arg2, Eterm arg3, Eterm arg4) Eterm* hstart; Eterm* hend; - if ((tb = db_get_table(p, arg1, DB_WRITE, LCK_WRITE_REC)) == NULL) { - BIF_ERROR(p, BADARG); - } - UseTmpHeap(5, p); if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) { @@ -1062,7 +1336,11 @@ bail_out: */ BIF_RETTYPE ets_update_counter_3(BIF_ALIST_3) { - return do_update_counter(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, THE_NON_VALUE); + DbTable* tb; + + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_update_counter_3); + + return do_update_counter(BIF_P, tb, BIF_ARG_2, BIF_ARG_3, THE_NON_VALUE); } /* @@ -1074,10 +1352,14 @@ BIF_RETTYPE ets_update_counter_3(BIF_ALIST_3) */ BIF_RETTYPE ets_update_counter_4(BIF_ALIST_4) { + DbTable* tb; + if (is_not_tuple(BIF_ARG_4)) { BIF_ERROR(BIF_P, BADARG); } - return do_update_counter(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3, BIF_ARG_4); + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_update_counter_4); + + return do_update_counter(BIF_P, tb, BIF_ARG_2, BIF_ARG_3, BIF_ARG_4); } @@ -1098,9 +1380,8 @@ BIF_RETTYPE ets_insert_2(BIF_ALIST_2) kind = ((is_list(BIF_ARG_2) && CDR(list_val(BIF_ARG_2)) != NIL) ? LCK_WRITE : LCK_WRITE_REC); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, kind, BIF_ets_insert_2); + if (BIF_ARG_2 == NIL) { db_unlock(tb, kind); BIF_RET(am_true); @@ -1166,11 +1447,9 @@ BIF_RETTYPE ets_insert_new_2(BIF_ALIST_2) /* More than one object, use LCK_WRITE to keep atomicity */ kind = LCK_WRITE; - tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind); - if (tb == NULL) { - BIF_ERROR(BIF_P, BADARG); - } - meth = tb->common.meth; + DB_BIF_GET_TABLE(tb, DB_WRITE, kind, BIF_ets_insert_new_2); + + meth = tb->common.meth; for (lst = BIF_ARG_2; is_list(lst); lst = CDR(list_val(lst))) { if (is_not_tuple(CAR(list_val(lst))) || (arityval(*tuple_val(CAR(list_val(lst)))) @@ -1205,9 +1484,8 @@ BIF_RETTYPE ets_insert_new_2(BIF_ALIST_2) /* Only one object (or NIL) */ kind = LCK_WRITE_REC; - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, kind)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, kind, BIF_ets_insert_new_2); + if (BIF_ARG_2 == NIL) { db_unlock(tb, kind); BIF_RET(am_true); @@ -1244,7 +1522,9 @@ BIF_RETTYPE ets_rename_2(BIF_ALIST_2) { DbTable* tb; Eterm ret; - erts_smp_rwmtx_t *lck1, *lck2; + Eterm old_name; + erts_rwmtx_t *lck1, *lck2; + Uint freason; #ifdef HARDDEBUG erts_fprintf(stderr, @@ -1260,59 +1540,69 @@ BIF_RETTYPE ets_rename_2(BIF_ALIST_2) (void) meta_name_tab_bucket(BIF_ARG_2, &lck1); - if (is_small(BIF_ARG_1)) { - Uint slot = unsigned_val(BIF_ARG_1) & meta_main_tab_slot_mask; - lck2 = get_meta_main_tab_lock(slot); - } - else if (is_atom(BIF_ARG_1)) { - (void) meta_name_tab_bucket(BIF_ARG_1, &lck2); + if (is_atom(BIF_ARG_1)) { + old_name = BIF_ARG_1; + named_tab: + (void) meta_name_tab_bucket(old_name, &lck2); if (lck1 == lck2) lck2 = NULL; else if (lck1 > lck2) { - erts_smp_rwmtx_t *tmp = lck1; + erts_rwmtx_t *tmp = lck1; lck1 = lck2; lck2 = tmp; } } else { - BIF_ERROR(BIF_P, BADARG); + tb = tid2tab(BIF_ARG_1); + if (!tb) + BIF_ERROR(BIF_P, BADARG); + else { + if (is_table_named(tb)) { + old_name = tb->common.the_name; + goto named_tab; + } + lck2 = NULL; + } } - erts_smp_rwmtx_rwlock(lck1); + erts_rwmtx_rwlock(lck1); if (lck2) - erts_smp_rwmtx_rwlock(lck2); + erts_rwmtx_rwlock(lck2); - tb = db_get_table_aux(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE, 1); + tb = db_get_table_aux(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE, 1, &freason); if (!tb) - goto badarg; - - if (is_not_atom(tb->common.id)) { /* Not a named table */ - tb->common.the_name = BIF_ARG_2; - goto done; - } - - if (!insert_named_tab(BIF_ARG_2, tb, 1)) - goto badarg; + goto fail; - if (!remove_named_tab(tb, 1)) - erts_exit(ERTS_ERROR_EXIT,"Could not find named tab %s", tb->common.id); + if (is_table_named(tb)) { + if (!insert_named_tab(BIF_ARG_2, tb, 1)) + goto badarg; - tb->common.id = tb->common.the_name = BIF_ARG_2; + if (!remove_named_tab(tb, 1)) + erts_exit(ERTS_ERROR_EXIT,"Could not find named tab %s", tb->common.the_name); + ret = BIF_ARG_2; + } + else { /* Not a named table */ + ret = BIF_ARG_1; + } + tb->common.the_name = BIF_ARG_2; - done: - ret = tb->common.id; db_unlock(tb, LCK_WRITE); - erts_smp_rwmtx_rwunlock(lck1); + erts_rwmtx_rwunlock(lck1); if (lck2) - erts_smp_rwmtx_rwunlock(lck2); + erts_rwmtx_rwunlock(lck2); BIF_RET(ret); - badarg: + +badarg: + freason = BADARG; + +fail: if (tb) db_unlock(tb, LCK_WRITE); - erts_smp_rwmtx_rwunlock(lck1); + erts_rwmtx_rwunlock(lck1); if (lck2) - erts_smp_rwmtx_rwunlock(lck2); - BIF_ERROR(BIF_P, BADARG); + erts_rwmtx_rwunlock(lck2); + + return db_bif_fail(BIF_P, freason, BIF_ets_rename_2, NULL); } @@ -1324,7 +1614,6 @@ BIF_RETTYPE ets_rename_2(BIF_ALIST_2) BIF_RETTYPE ets_new_2(BIF_ALIST_2) { DbTable* tb = NULL; - int slot; Eterm list; Eterm val; Eterm ret; @@ -1333,15 +1622,9 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) Uint32 status; Sint keypos; int is_named, is_compressed; -#ifdef ERTS_SMP int is_fine_locked, frequent_read; -#endif -#ifdef DEBUG int cret; -#endif - DeclareTmpHeap(meta_tuple,3,BIF_P); DbTableMethod* meth; - erts_smp_rwmtx_t *mmtl; if (is_not_atom(BIF_ARG_1)) { BIF_ERROR(BIF_P, BADARG); @@ -1350,13 +1633,11 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) BIF_ERROR(BIF_P, BADARG); } - status = DB_NORMAL | DB_SET | DB_PROTECTED; + status = DB_SET | DB_PROTECTED; keypos = 1; is_named = 0; -#ifdef ERTS_SMP is_fine_locked = 0; frequent_read = 0; -#endif heir = am_none; heir_data = (UWord) am_undefined; is_compressed = erts_ets_always_compress; @@ -1384,30 +1665,18 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) keypos = signed_val(tp[2]); } else if (tp[1] == am_write_concurrency) { -#ifdef ERTS_SMP if (tp[2] == am_true) { is_fine_locked = 1; } else if (tp[2] == am_false) { is_fine_locked = 0; } else break; -#else - if ((tp[2] != am_true) && (tp[2] != am_false)) { - break; - } -#endif } else if (tp[1] == am_read_concurrency) { -#ifdef ERTS_SMP if (tp[2] == am_true) { frequent_read = 1; } else if (tp[2] == am_false) { frequent_read = 0; } else break; -#else - if ((tp[2] != am_true) && (tp[2] != am_false)) { - break; - } -#endif } else if (tp[1] == am_heir && tp[2] == am_none) { @@ -1433,6 +1702,7 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) } else if (val == am_named_table) { is_named = 1; + status |= DB_NAMED_TABLE; } else if (val == am_compressed) { is_compressed = 1; @@ -1448,11 +1718,9 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) } if (IS_HASH_TABLE(status)) { meth = &db_hash; -#ifdef ERTS_SMP if (is_fine_locked && !(status & DB_PRIVATE)) { status |= DB_FINE_LOCKED; } -#endif } else if (IS_TREE_TABLE(status)) { meth = &db_tree; @@ -1461,10 +1729,8 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) BIF_ERROR(BIF_P, BADARG); } -#ifdef ERTS_SMP if (frequent_read && !(status & DB_PRIVATE)) status |= DB_FREQ_READ; -#endif /* we create table outside any table lock * and take the unusal cost of destroy table if it @@ -1473,89 +1739,54 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) { DbTable init_tb; - erts_smp_atomic_init_nob(&init_tb.common.memory_size, 0); + erts_atomic_init_nob(&init_tb.common.memory_size, 0); tb = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE, &init_tb, sizeof(DbTable)); - erts_smp_atomic_init_nob(&tb->common.memory_size, - erts_smp_atomic_read_nob(&init_tb.common.memory_size)); + erts_atomic_init_nob(&tb->common.memory_size, + erts_atomic_read_nob(&init_tb.common.memory_size)); } tb->common.meth = meth; tb->common.the_name = BIF_ARG_1; tb->common.status = status; -#ifdef ERTS_SMP - tb->common.type = status & ERTS_ETS_TABLE_TYPES; + tb->common.type = status; /* Note, 'type' is *read only* from now on... */ -#endif - erts_smp_refc_init(&tb->common.ref, 0); - db_init_lock(tb, status & (DB_FINE_LOCKED|DB_FREQ_READ), - "db_tab", "db_tab_fix"); + erts_refc_init(&tb->common.fix_count, 0); + db_init_lock(tb, status & (DB_FINE_LOCKED|DB_FREQ_READ)); tb->common.keypos = keypos; tb->common.owner = BIF_P->common.id; set_heir(BIF_P, tb, heir, heir_data); - erts_smp_atomic_init_nob(&tb->common.nitems, 0); + erts_atomic_init_nob(&tb->common.nitems, 0); - tb->common.fixations = NULL; + tb->common.fixing_procs = NULL; tb->common.compress = is_compressed; - -#ifdef DEBUG - cret = +#ifdef ETS_DBG_FORCE_TRAP + erts_atomic_init_nob(&tb->common.dbg_force_trap, erts_ets_dbg_force_trap); #endif - meth->db_create(BIF_P, tb); - ASSERT(cret == DB_ERROR_NONE); - erts_smp_spin_lock(&meta_main_tab_main_lock); + cret = meth->db_create(BIF_P, tb); + ASSERT(cret == DB_ERROR_NONE); (void)cret; - if (meta_main_tab_cnt >= db_max_tabs) { - erts_smp_spin_unlock(&meta_main_tab_main_lock); - erts_send_error_to_logger_str(BIF_P->group_leader, - "** Too many db tables **\n"); - free_heir_data(tb); - tb->common.meth->db_free_table(tb); - free_dbtable((void *) tb); - BIF_ERROR(BIF_P, SYSTEM_LIMIT); - } - - slot = meta_main_tab_first_free; - ASSERT(slot>=0 && slot<db_max_tabs); - meta_main_tab_first_free = GET_NEXT_FREE_SLOT(slot); - meta_main_tab_cnt++; - if (slot >= meta_main_tab_top) { - ASSERT(slot == meta_main_tab_top); - meta_main_tab_top = slot + 1; - } - - if (is_named) { - ret = BIF_ARG_1; - } - else { - ret = make_small(slot | meta_main_tab_seq_cnt); - meta_main_tab_seq_cnt += meta_main_tab_seq_incr; - ASSERT((unsigned_val(ret) & meta_main_tab_slot_mask) == slot); - } - erts_smp_spin_unlock(&meta_main_tab_main_lock); + make_btid(tb); - tb->common.id = ret; - tb->common.slot = slot; /* store slot for erase */ + if (is_named) + ret = BIF_ARG_1; + else + ret = make_tid(BIF_P, tb); - mmtl = get_meta_main_tab_lock(slot); - erts_smp_rwmtx_rwlock(mmtl); - meta_main_tab[slot].u.tb = tb; - ASSERT(IS_SLOT_ALIVE(slot)); - erts_smp_rwmtx_rwunlock(mmtl); + save_sched_table(BIF_P, tb); + save_owned_table(BIF_P, tb); if (is_named && !insert_named_tab(BIF_ARG_1, tb, 0)) { - mmtl = get_meta_main_tab_lock(slot); - erts_smp_rwmtx_rwlock(mmtl); - free_slot(slot); - erts_smp_rwmtx_rwunlock(mmtl); + tid_clear(BIF_P, tb); + delete_owned_table(BIF_P, tb); db_lock(tb,LCK_WRITE); free_heir_data(tb); - tb->common.meth->db_free_table(tb); - schedule_free_dbtable(tb); + tb->common.meth->db_free_empty_table(tb); db_unlock(tb,LCK_WRITE); + table_dec_refc(tb, 0); BIF_ERROR(BIF_P, BADARG); } @@ -1566,27 +1797,37 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2) "ets:new(%T,%T)=%T; Process: %T, initial: %T:%T/%bpu\n", BIF_ARG_1, BIF_ARG_2, ret, BIF_P->common.id, BIF_P->u.initial[0], BIF_P->u.initial[1], BIF_P->u.initial[2]); - erts_fprintf(stderr, "ets: new: meta_pid_to_tab common.memory_size = %ld\n", - erts_smp_atomic_read_nob(&meta_pid_to_tab->common.memory_size)); - erts_fprintf(stderr, "ets: new: meta_pid_to_fixed_tab common.memory_size = %ld\n", - erts_smp_atomic_read_nob(&meta_pid_to_fixed_tab->common.memory_size)); #endif - UseTmpHeap(3,BIF_P); + BIF_RET(ret); +} + +/* +** Retrieves the tid() of a named ets table. +*/ +BIF_RETTYPE ets_whereis_1(BIF_ALIST_1) +{ + DbTable* tb; + Eterm res; + Uint freason; - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - if (db_put_hash(meta_pid_to_tab, - TUPLE2(meta_tuple, - BIF_P->common.id, - make_small(slot)), - 0) != DB_ERROR_NONE) { - erts_exit(ERTS_ERROR_EXIT,"Could not update ets metadata."); + if (is_not_atom(BIF_ARG_1)) { + BIF_ERROR(BIF_P, BADARG); } - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); - UnUseTmpHeap(3,BIF_P); + if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { + if (freason == BADARG) + BIF_RET(am_undefined); + else { + //ToDo: Could we avoid this + return db_bif_fail(BIF_P, freason, BIF_ets_whereis_1, NULL); + } + } - BIF_RET(ret); + res = make_tid(BIF_P, tb); + db_unlock(tb, LCK_READ); + + BIF_RET(res); } /* @@ -1600,9 +1841,7 @@ BIF_RETTYPE ets_lookup_2(BIF_ALIST_2) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_lookup_2); cret = tb->common.meth->db_get(BIF_P, tb, BIF_ARG_2, &ret); @@ -1630,9 +1869,7 @@ BIF_RETTYPE ets_member_2(BIF_ALIST_2) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_member_2); cret = tb->common.meth->db_member(tb, BIF_ARG_2, &ret); @@ -1663,9 +1900,7 @@ BIF_RETTYPE ets_lookup_element_3(BIF_ALIST_3) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_lookup_element_3); if (is_not_small(BIF_ARG_3) || ((index = signed_val(BIF_ARG_3)) < 1)) { db_unlock(tb, LCK_READ); @@ -1690,9 +1925,9 @@ BIF_RETTYPE ets_lookup_element_3(BIF_ALIST_3) */ BIF_RETTYPE ets_delete_1(BIF_ALIST_1) { - int trap; + SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P); + SWord reds = initial_reds; DbTable* tb; - erts_smp_rwmtx_t *mmtl; #ifdef HARDDEBUG erts_fprintf(stderr, @@ -1703,9 +1938,7 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_delete_1); /* * Clear all access bits to prevent any ets operation to access the @@ -1715,7 +1948,6 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1) tb->common.status |= DB_DELETE; if (tb->common.owner != BIF_P->common.id) { - DeclareTmpHeap(meta_tuple,3,BIF_P); /* * The table is being deleted by a process other than its owner. @@ -1723,50 +1955,33 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1) * current process will be killed (e.g. by an EXIT signal), we will * now transfer the ownership to the current process. */ - UseTmpHeap(3,BIF_P); - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner, - make_small(tb->common.slot)); - - BIF_P->flags |= F_USING_DB; - tb->common.owner = BIF_P->common.id; - - db_put_hash(meta_pid_to_tab, - TUPLE2(meta_tuple, - BIF_P->common.id, - make_small(tb->common.slot)), - 0); - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); - UnUseTmpHeap(3,BIF_P); - } - mmtl = get_meta_main_tab_lock(tb->common.slot); -#ifdef ERTS_SMP - if (erts_smp_rwmtx_tryrwlock(mmtl) == EBUSY) { - /* - * We keep our increased refc over this op in order to - * prevent the table from disapearing. - */ - db_unlock(tb, LCK_WRITE); - erts_smp_rwmtx_rwlock(mmtl); - db_lock(tb, LCK_WRITE); + Process *rp = erts_proc_lookup_raw(tb->common.owner); + /* + * Process 'rp' might be exiting, but our table lock prevents it + * from terminating as it cannot complete erts_db_process_exiting(). + */ + ASSERT(!(ERTS_PSFLG_FREE & erts_atomic32_read_nob(&rp->state))); + + delete_owned_table(rp, tb); + BIF_P->flags |= F_USING_DB; + tb->common.owner = BIF_P->common.id; + save_owned_table(BIF_P, tb); } -#endif - /* We must keep the slot, to be found by db_proc_dead() if process dies */ - MARK_SLOT_DEAD(tb->common.slot); - erts_smp_rwmtx_rwunlock(mmtl); - if (is_atom(tb->common.id)) + + if (is_table_named(tb)) remove_named_tab(tb, 0); /* disable inheritance */ free_heir_data(tb); tb->common.heir = am_none; - free_fixations_locked(tb); - - trap = free_table_cont(BIF_P, tb, 1, 1); + reds -= free_fixations_locked(BIF_P, tb); + tid_clear(BIF_P, tb); db_unlock(tb, LCK_WRITE); - if (trap) { + + reds = free_table_continue(BIF_P, tb, reds); + if (reds < 0) { /* * Package the DbTable* pointer into a bignum so that it can be safely * passed through a trap. We used to pass the DbTable* pointer directly @@ -1776,9 +1991,11 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1) Eterm *hp = HAlloc(BIF_P, 2); hp[0] = make_pos_bignum_header(1); hp[1] = (Eterm) tb; + BUMP_ALL_REDS(BIF_P); BIF_TRAP1(&ets_delete_continue_exp, BIF_P, make_big(hp)); } else { + BUMP_REDS(BIF_P, (initial_reds - reds)); BIF_RET(am_true); } } @@ -1790,10 +2007,10 @@ BIF_RETTYPE ets_give_away_3(BIF_ALIST_3) { Process* to_proc = NULL; ErtsProcLocks to_locks = ERTS_PROC_LOCK_MAIN; - DeclareTmpHeap(buf,5,BIF_P); Eterm to_pid = BIF_ARG_2; Eterm from_pid; DbTable* tb = NULL; + Uint freason; if (!is_internal_pid(to_pid)) { goto badarg; @@ -1803,43 +2020,35 @@ BIF_RETTYPE ets_give_away_3(BIF_ALIST_3) goto badarg; } - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL - || tb->common.owner != BIF_P->common.id) { - goto badarg; - } + if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE, &freason)) == NULL) + goto fail; + if (tb->common.owner != BIF_P->common.id) + goto badarg; + from_pid = tb->common.owner; if (to_pid == from_pid) { goto badarg; /* or should we be idempotent? return false maybe */ } - UseTmpHeap(5,BIF_P); - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner, - make_small(tb->common.slot)); - + delete_owned_table(BIF_P, tb); to_proc->flags |= F_USING_DB; tb->common.owner = to_pid; - - db_put_hash(meta_pid_to_tab, - TUPLE2(buf,to_pid,make_small(tb->common.slot)), - 0); - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); + save_owned_table(to_proc, tb); db_unlock(tb,LCK_WRITE); - erts_send_message(BIF_P, to_proc, &to_locks, - TUPLE4(buf, am_ETS_TRANSFER, - tb->common.id, - from_pid, - BIF_ARG_3), - 0); - erts_smp_proc_unlock(to_proc, to_locks); + send_ets_transfer_message(BIF_P, to_proc, &to_locks, + tb, BIF_ARG_3); + erts_proc_unlock(to_proc, to_locks); UnUseTmpHeap(5,BIF_P); BIF_RET(am_true); badarg: - if (to_proc != NULL && to_proc != BIF_P) erts_smp_proc_unlock(to_proc, to_locks); + freason = BADARG; +fail: + if (to_proc != NULL && to_proc != BIF_P) erts_proc_unlock(to_proc, to_locks); if (tb != NULL) db_unlock(tb, LCK_WRITE); - BIF_ERROR(BIF_P, BADARG); + + return db_bif_fail(BIF_P, freason, BIF_ets_give_away_3, NULL); } BIF_RETTYPE ets_setopts_2(BIF_ALIST_2) @@ -1890,11 +2099,13 @@ BIF_RETTYPE ets_setopts_2(BIF_ALIST_2) } } - if (tail != NIL - || (tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL - || tb->common.owner != BIF_P->common.id) { + if (tail != NIL) + goto badarg; + + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_setopts_2); + + if (tb->common.owner != BIF_P->common.id) goto badarg; - } if (heir_data != THE_NON_VALUE) { free_heir_data(tb); @@ -1918,23 +2129,84 @@ badarg: } /* -** BIF to erase a whole table and release all memory it holds -*/ -BIF_RETTYPE ets_delete_all_objects_1(BIF_ALIST_1) + * Common for delete_all_objects and select_delete(DeleteAll). + */ +BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2) { + SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P); + SWord reds = initial_reds; + Eterm nitems; DbTable* tb; CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_internal_delete_all_2); + + if (BIF_ARG_2 == am_undefined) { + nitems = erts_make_integer(erts_atomic_read_nob(&tb->common.nitems), + BIF_P); + + reds = tb->common.meth->db_delete_all_objects(BIF_P, tb, reds); + + ASSERT(!(tb->common.status & DB_BUSY)); + + if (reds < 0) { + /* + * Oboy, need to trap AND need to be atomic. + * Solved by cooperative trapping where every process trying to + * access this table (including this process) will "fail" to lookup + * the table and instead pitch in deleting objects + * (in delete_all_objects_continue) and then trap to self. + */ + ASSERT((tb->common.status & (DB_PRIVATE|DB_PROTECTED|DB_PUBLIC)) + == + (tb->common.type & (DB_PRIVATE|DB_PROTECTED|DB_PUBLIC))); + tb->common.status &= ~(DB_PRIVATE|DB_PROTECTED|DB_PUBLIC); + tb->common.status |= DB_BUSY; + db_unlock(tb, LCK_WRITE); + BUMP_ALL_REDS(BIF_P); + BIF_TRAP2(bif_export[BIF_ets_internal_delete_all_2], BIF_P, + BIF_ARG_1, nitems); + } + else { + /* Done, no trapping needed */ + BUMP_REDS(BIF_P, (initial_reds - reds)); + } - tb->common.meth->db_delete_all_objects(BIF_P, tb); + } + else { + /* + * The table lookup succeeded and second argument is nitems + * and not 'undefined', which means we have trapped at least once + * and are now done. + */ + nitems = BIF_ARG_2; + } db_unlock(tb, LCK_WRITE); + BIF_RET(nitems); +} - BIF_RET(am_true); +static void delete_all_objects_continue(Process* p, DbTable* tb) +{ + SWord initial_reds = ERTS_BIF_REDS_LEFT(p); + SWord reds = initial_reds; + + ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&tb->common.rwlock)); + + if ((tb->common.status & (DB_DELETE|DB_BUSY)) != DB_BUSY) + return; + + reds = tb->common.meth->db_delete_all_objects(p, tb, reds); + + if (reds < 0) { + BUMP_ALL_REDS(p); + } + else { + tb->common.status |= tb->common.type & (DB_PRIVATE|DB_PROTECTED|DB_PUBLIC); + tb->common.status &= ~DB_BUSY; + BUMP_REDS(p, (initial_reds - reds)); + } } /* @@ -1950,9 +2222,7 @@ BIF_RETTYPE ets_delete_2(BIF_ALIST_2) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_delete_2); cret = tb->common.meth->db_erase(tb,BIF_ARG_2,&ret); @@ -1979,9 +2249,8 @@ BIF_RETTYPE ets_delete_object_2(BIF_ALIST_2) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_delete_object_2); + if (is_not_tuple(BIF_ARG_2) || (arityval(*tuple_val(BIF_ARG_2)) < tb->common.keypos)) { db_unlock(tb, LCK_WRITE_REC); @@ -2004,7 +2273,7 @@ BIF_RETTYPE ets_delete_object_2(BIF_ALIST_2) /* ** This is for trapping, cannot be called directly. */ -static BIF_RETTYPE ets_select_delete_1(BIF_ALIST_1) +static BIF_RETTYPE ets_select_delete_trap_1(BIF_ALIST_1) { Process *p = BIF_P; Eterm a1 = BIF_ARG_1; @@ -2014,15 +2283,14 @@ static BIF_RETTYPE ets_select_delete_1(BIF_ALIST_1) Eterm ret; Eterm *tptr; db_lock_kind_t kind = LCK_WRITE_REC; - + CHECK_TABLES(); ASSERT(is_tuple(a1)); tptr = tuple_val(a1); ASSERT(arityval(*tptr) >= 1); - if ((tb = db_get_table(p, tptr[1], DB_WRITE, kind)) == NULL) { - BIF_ERROR(p,BADARG); - } + DB_TRAP_GET_TABLE(tb, tptr[1], DB_WRITE, kind, + &ets_select_delete_continue_exp); cret = tb->common.meth->db_select_delete_continue(p,tb,a1,&ret); @@ -2046,7 +2314,10 @@ static BIF_RETTYPE ets_select_delete_1(BIF_ALIST_1) } -BIF_RETTYPE ets_select_delete_2(BIF_ALIST_2) +/* + * ets:select_delete/2 without special case for "delete-all". + */ +BIF_RETTYPE ets_internal_select_delete_2(BIF_ALIST_2) { BIF_RETTYPE result; DbTable* tb; @@ -2056,25 +2327,13 @@ BIF_RETTYPE ets_select_delete_2(BIF_ALIST_2) CHECK_TABLES(); - if(eq(BIF_ARG_2, ms_delete_all)) { - int nitems; - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } - nitems = erts_smp_atomic_read_nob(&tb->common.nitems); - tb->common.meth->db_delete_all_objects(BIF_P, tb); - db_unlock(tb, LCK_WRITE); - BIF_RET(erts_make_integer(nitems,BIF_P)); - } + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_internal_select_delete_2); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_WRITE, LCK_WRITE_REC)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } safety = ITERATION_SAFETY(BIF_P,tb); if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select_delete(BIF_P, tb, BIF_ARG_2, &ret); + cret = tb->common.meth->db_select_delete(BIF_P, tb, BIF_ARG_1, BIF_ARG_2, &ret); if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) { fix_table_locked(BIF_P,tb); @@ -2101,48 +2360,254 @@ BIF_RETTYPE ets_select_delete_2(BIF_ALIST_2) return result; } -/* -** Return a list of tables on this node -*/ -BIF_RETTYPE ets_all_0(BIF_ALIST_0) +/* + * ets:all/0 + * + * ets:all() calls ets:internal_request_all/0 which + * requests information about all tables from + * each scheduler thread. Each scheduler replies + * to the calling process with information about + * existing tables created on that specific scheduler. + */ + +struct ErtsEtsAllReq_ { + erts_atomic32_t refc; + Process *proc; + ErtsOIRefStorage ref; + ErtsEtsAllReqList list[1]; /* one per scheduler */ +}; + +#define ERTS_ETS_ALL_REQ_SIZE \ + (sizeof(ErtsEtsAllReq) \ + + (sizeof(ErtsEtsAllReqList) \ + * (erts_no_schedulers - 1))) + +typedef struct { + ErtsEtsAllReq *ongoing; + ErlHeapFragment *hfrag; + DbTable *tab; + ErtsEtsAllReq *queue; +} ErtsEtsAllData; + +/* Tables handled before yielding */ +#define ERTS_ETS_ALL_TB_YCNT 200 +/* + * Min yield count required before starting + * an operation that will require yield. + */ +#define ERTS_ETS_ALL_TB_YCNT_START 10 + +#ifdef DEBUG +/* Test yielding... */ +#undef ERTS_ETS_ALL_TB_YCNT +#undef ERTS_ETS_ALL_TB_YCNT_START +#define ERTS_ETS_ALL_TB_YCNT 10 +#define ERTS_ETS_ALL_TB_YCNT_START 1 +#endif + +static int +ets_all_reply(ErtsSchedulerData *esdp, ErtsEtsAllReq **reqpp, + ErlHeapFragment **hfragpp, DbTable **tablepp, + int *yield_count_p) { - DbTable* tb; - Eterm previous; - int i; - Eterm* hp; - Eterm* hendp; - int t_tabs_cnt; - int t_top; - - erts_smp_spin_lock(&meta_main_tab_main_lock); - t_tabs_cnt = meta_main_tab_cnt; - t_top = meta_main_tab_top; - erts_smp_spin_unlock(&meta_main_tab_main_lock); - - hp = HAlloc(BIF_P, 2*t_tabs_cnt); - hendp = hp + 2*t_tabs_cnt; - - previous = NIL; - for(i = 0; i < t_top; i++) { - erts_smp_rwmtx_t *mmtl = get_meta_main_tab_lock(i); - erts_smp_rwmtx_rlock(mmtl); - if (IS_SLOT_ALIVE(i)) { - if (hp == hendp) { - /* Racing table creator, grab some more heap space */ - t_tabs_cnt = 10; - hp = HAlloc(BIF_P, 2*t_tabs_cnt); - hendp = hp + 2*t_tabs_cnt; - } - tb = meta_main_tab[i].u.tb; - previous = CONS(hp, tb->common.id, previous); - hp += 2; - } - erts_smp_rwmtx_runlock(mmtl); + ErtsEtsAllReq *reqp = *reqpp; + ErlHeapFragment *hfragp = *hfragpp; + int ycount = *yield_count_p; + DbTable *tb, *first; + Uint sz; + Eterm list, msg, ref, *hp; + ErlOffHeap *ohp; + ErtsMessage *mp; + + /* + * - save_sched_table() inserts at end of circular list. + * + * - This function scans from the end so we know that + * the amount of tables to scan wont grow even if we + * yield. + * + * - remove_sched_table() updates the table we yielded + * on if it removes it. + */ + + if (hfragp) { + /* Restart of a yielded operation... */ + ASSERT(hfragp->used_size < hfragp->alloc_size); + ohp = &hfragp->off_heap; + hp = &hfragp->mem[hfragp->used_size]; + list = *hp; + hfragp->used_size = hfragp->alloc_size; + first = esdp->ets_tables.clist; + tb = *tablepp; + } + else { + /* A new operation... */ + ASSERT(!*tablepp); + + /* Max heap size needed... */ + sz = erts_atomic_read_nob(&esdp->ets_tables.count); + sz *= ERTS_MAGIC_REF_THING_SIZE + 2; + sz += 3 + ERTS_REF_THING_SIZE; + hfragp = new_message_buffer(sz); + + hp = &hfragp->mem[0]; + ohp = &hfragp->off_heap; + list = NIL; + first = esdp->ets_tables.clist; + tb = first ? first->common.all.prev : NULL; + } + + if (tb) { + while (1) { + if (is_table_alive(tb)) { + Eterm tid; + if (is_table_named(tb)) + tid = tb->common.the_name; + else + tid = erts_mk_magic_ref(&hp, ohp, tb->common.btid); + list = CONS(hp, tid, list); + hp += 2; + } + + if (tb == first) + break; + + tb = tb->common.all.prev; + + if (--ycount <= 0) { + sz = hp - &hfragp->mem[0]; + ASSERT(hfragp->alloc_size > sz + 1); + *hp = list; + hfragp->used_size = sz; + *hfragpp = hfragp; + *reqpp = reqp; + *tablepp = tb; + *yield_count_p = 0; + return 1; /* Yield! */ + } + } } - HRelease(BIF_P, hendp, hp); - BIF_RET(previous); + + ref = erts_oiref_storage_make_ref(&reqp->ref, &hp); + msg = TUPLE2(hp, ref, list); + hp += 3; + + sz = hp - &hfragp->mem[0]; + ASSERT(sz <= hfragp->alloc_size); + + hfragp = erts_resize_message_buffer(hfragp, sz, &msg, 1); + + mp = erts_alloc_message(0, NULL); + mp->data.heap_frag = hfragp; + + erts_queue_message(reqp->proc, 0, mp, msg, am_system); + + erts_proc_dec_refc(reqp->proc); + + if (erts_atomic32_dec_read_nob(&reqp->refc) == 0) + erts_free(ERTS_ALC_T_ETS_ALL_REQ, reqp); + + *reqpp = NULL; + *hfragpp = NULL; + *tablepp = NULL; + *yield_count_p = ycount; + + return 0; } +int +erts_handle_yielded_ets_all_request(ErtsSchedulerData *esdp, + ErtsEtsAllYieldData *eaydp) +{ + int ix = (int) esdp->no - 1; + int yc = ERTS_ETS_ALL_TB_YCNT; + + while (1) { + if (!eaydp->ongoing) { + ErtsEtsAllReq *ongoing; + + if (!eaydp->queue) + return 0; /* All work completed! */ + + if (yc < ERTS_ETS_ALL_TB_YCNT_START && + yc > erts_atomic_read_nob(&esdp->ets_tables.count)) + return 1; /* Yield! */ + + eaydp->ongoing = ongoing = eaydp->queue; + if (ongoing->list[ix].next == ongoing) + eaydp->queue = NULL; + else { + ongoing->list[ix].next->list[ix].prev = ongoing->list[ix].prev; + ongoing->list[ix].prev->list[ix].next = ongoing->list[ix].next; + eaydp->queue = ongoing->list[ix].next; + } + ASSERT(!eaydp->hfrag); + ASSERT(!eaydp->tab); + } + + if (ets_all_reply(esdp, &eaydp->ongoing, &eaydp->hfrag, &eaydp->tab, &yc)) + return 1; /* Yield! */ + } +} + +static void +handle_ets_all_request(void *vreq) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + ErtsEtsAllYieldData *eayp = ERTS_SCHED_AUX_YIELD_DATA(esdp, ets_all); + ErtsEtsAllReq *req = (ErtsEtsAllReq *) vreq; + + if (!eayp->ongoing && !eayp->queue) { + /* No ets:all() operations ongoing... */ + ErlHeapFragment *hf = NULL; + DbTable *tb = NULL; + int yc = ERTS_ETS_ALL_TB_YCNT; + if (ets_all_reply(esdp, &req, &hf, &tb, &yc)) { + /* Yielded... */ + ASSERT(hf); + eayp->ongoing = req; + eayp->hfrag = hf; + eayp->tab = tb; + erts_notify_new_aux_yield_work(esdp); + } + } + else { + /* Ongoing ets:all() operations; queue up this request... */ + int ix = (int) esdp->no - 1; + if (!eayp->queue) { + req->list[ix].next = req; + req->list[ix].prev = req; + eayp->queue = req; + } + else { + req->list[ix].next = eayp->queue; + req->list[ix].prev = eayp->queue->list[ix].prev; + eayp->queue->list[ix].prev = req; + req->list[ix].prev->list[ix].next = req; + } + } +} + +BIF_RETTYPE ets_internal_request_all_0(BIF_ALIST_0) +{ + Eterm ref = erts_make_ref(BIF_P); + ErtsEtsAllReq *req = erts_alloc(ERTS_ALC_T_ETS_ALL_REQ, + ERTS_ETS_ALL_REQ_SIZE); + erts_atomic32_init_nob(&req->refc, + (erts_aint32_t) erts_no_schedulers); + erts_oiref_storage_save(&req->ref, ref); + req->proc = BIF_P; + erts_proc_add_refc(BIF_P, (Sint) erts_no_schedulers); + + if (erts_no_schedulers > 1) + erts_schedule_multi_misc_aux_work(1, + erts_no_schedulers, + handle_ets_all_request, + (void *) req); + + handle_ets_all_request((void *) req); + BIF_RET(ref); +} /* ** db_slot(Db, Slot) -> [Items]. @@ -2155,9 +2620,8 @@ BIF_RETTYPE ets_slot_2(BIF_ALIST_2) CHECK_TABLES(); - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_slot_2); + /* The slot number is checked in table specific code. */ cret = tb->common.meth->db_slot(BIF_P, tb, BIF_ARG_2, &ret); db_unlock(tb, LCK_READ); @@ -2177,41 +2641,53 @@ BIF_RETTYPE ets_slot_2(BIF_ALIST_2) BIF_RETTYPE ets_match_1(BIF_ALIST_1) { - return ets_select1(BIF_P, BIF_ARG_1); + return ets_select1(BIF_P, BIF_ets_match_1, BIF_ARG_1); } BIF_RETTYPE ets_match_2(BIF_ALIST_2) { + DbTable* tb; Eterm ms; DeclareTmpHeap(buff,8,BIF_P); Eterm *hp = buff; Eterm res; + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_match_2); + UseTmpHeap(8,BIF_P); ms = CONS(hp, am_DollarDollar, NIL); hp += 2; ms = TUPLE3(hp, BIF_ARG_2, NIL, ms); hp += 4; ms = CONS(hp, ms, NIL); - res = ets_select2(BIF_P, BIF_ARG_1, ms); + res = ets_select2(BIF_P, tb, BIF_ARG_1, ms); UnUseTmpHeap(8,BIF_P); return res; } BIF_RETTYPE ets_match_3(BIF_ALIST_3) { + DbTable* tb; Eterm ms; + Sint chunk_size; DeclareTmpHeap(buff,8,BIF_P); Eterm *hp = buff; Eterm res; + /* Chunk size strictly greater than 0 */ + if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) { + BIF_ERROR(BIF_P, BADARG); + } + + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_match_3); + UseTmpHeap(8,BIF_P); ms = CONS(hp, am_DollarDollar, NIL); hp += 2; ms = TUPLE3(hp, BIF_ARG_2, NIL, ms); hp += 4; ms = CONS(hp, ms, NIL); - res = ets_select3(BIF_P, BIF_ARG_1, ms, BIF_ARG_3); + res = ets_select3(BIF_P, tb, BIF_ARG_1, ms, chunk_size); UnUseTmpHeap(8,BIF_P); return res; } @@ -2219,34 +2695,35 @@ BIF_RETTYPE ets_match_3(BIF_ALIST_3) BIF_RETTYPE ets_select_3(BIF_ALIST_3) { - return ets_select3(BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); + DbTable* tb; + Sint chunk_size; + + /* Chunk size strictly greater than 0 */ + if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) { + BIF_ERROR(BIF_P, BADARG); + } + + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_select_3); + + return ets_select3(BIF_P, tb, BIF_ARG_1, BIF_ARG_2, chunk_size); } static BIF_RETTYPE -ets_select3(Process* p, Eterm arg1, Eterm arg2, Eterm arg3) +ets_select3(Process* p, DbTable* tb, Eterm tid, Eterm ms, Sint chunk_size) { BIF_RETTYPE result; - DbTable* tb; int cret; Eterm ret; - Sint chunk_size; enum DbIterSafety safety; CHECK_TABLES(); - /* Chunk size strictly greater than 0 */ - if (is_not_small(arg3) || (chunk_size = signed_val(arg3)) <= 0) { - BIF_ERROR(p, BADARG); - } - if ((tb = db_get_table(p, arg1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(p, BADARG); - } safety = ITERATION_SAFETY(p,tb); if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select_chunk(p, tb, - arg2, chunk_size, + cret = tb->common.meth->db_select_chunk(p, tb, tid, + ms, chunk_size, 0 /* not reversed */, &ret); if (DID_TRAP(p,ret) && safety != ITER_SAFE) { @@ -2292,9 +2769,8 @@ static BIF_RETTYPE ets_select_trap_1(BIF_ALIST_1) tptr = tuple_val(a1); ASSERT(arityval(*tptr) >= 1); - if ((tb = db_get_table(p, tptr[1], DB_READ, kind)) == NULL) { - BIF_ERROR(p, BADARG); - } + DB_TRAP_GET_TABLE(tb, tptr[1], DB_READ, kind, + &ets_select_continue_exp); cret = tb->common.meth->db_select_continue(p, tb, a1, &ret); @@ -2324,10 +2800,10 @@ static BIF_RETTYPE ets_select_trap_1(BIF_ALIST_1) BIF_RETTYPE ets_select_1(BIF_ALIST_1) { - return ets_select1(BIF_P, BIF_ARG_1); + return ets_select1(BIF_P, BIF_ets_select_1, BIF_ARG_1); } -static BIF_RETTYPE ets_select1(Process *p, Eterm arg1) +static BIF_RETTYPE ets_select1(Process *p, int bif_ix, Eterm arg1) { BIF_RETTYPE result; DbTable* tb; @@ -2349,10 +2825,10 @@ static BIF_RETTYPE ets_select1(Process *p, Eterm arg1) BIF_ERROR(p, BADARG); } tptr = tuple_val(arg1); - if (arityval(*tptr) < 1 || - (tb = db_get_table(p, tptr[1], DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(p, BADARG); - } + if (arityval(*tptr) < 1) + BIF_ERROR(p, BADARG); + + DB_GET_TABLE(tb, tptr[1], DB_READ, LCK_READ, bif_ix, NULL, p); safety = ITERATION_SAFETY(p,tb); if (safety == ITER_UNSAFE) { @@ -2388,34 +2864,27 @@ static BIF_RETTYPE ets_select1(Process *p, Eterm arg1) BIF_RETTYPE ets_select_2(BIF_ALIST_2) { - return ets_select2(BIF_P, BIF_ARG_1, BIF_ARG_2); + DbTable* tb; + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_select_2); + return ets_select2(BIF_P, tb, BIF_ARG_1, BIF_ARG_2); } static BIF_RETTYPE -ets_select2(Process* p, Eterm arg1, Eterm arg2) +ets_select2(Process* p, DbTable* tb, Eterm tid, Eterm ms) { BIF_RETTYPE result; - DbTable* tb; int cret; enum DbIterSafety safety; Eterm ret; CHECK_TABLES(); - /* - * Make sure that the table exists. - */ - - if ((tb = db_get_table(p, arg1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(p, BADARG); - } safety = ITERATION_SAFETY(p,tb); if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select(p, tb, arg2, - 0, &ret); + cret = tb->common.meth->db_select(p, tb, tid, ms, 0, &ret); if (DID_TRAP(p,ret) && safety != ITER_SAFE) { fix_table_locked(p, tb); @@ -2458,9 +2927,9 @@ static BIF_RETTYPE ets_select_count_1(BIF_ALIST_1) tptr = tuple_val(a1); ASSERT(arityval(*tptr) >= 1); - if ((tb = db_get_table(p, tptr[1], DB_READ, kind)) == NULL) { - BIF_ERROR(p, BADARG); - } + + DB_TRAP_GET_TABLE(tb, tptr[1], DB_READ, kind, + &ets_select_count_continue_exp); cret = tb->common.meth->db_select_count_continue(p, tb, a1, &ret); @@ -2495,18 +2964,14 @@ BIF_RETTYPE ets_select_count_2(BIF_ALIST_2) Eterm ret; CHECK_TABLES(); - /* - * Make sure that the table exists. - */ - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_select_count_2); + safety = ITERATION_SAFETY(BIF_P,tb); if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select_count(BIF_P,tb,BIF_ARG_2, &ret); + cret = tb->common.meth->db_select_count(BIF_P,tb, BIF_ARG_1, BIF_ARG_2, &ret); if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) { fix_table_locked(BIF_P, tb); @@ -2532,6 +2997,100 @@ BIF_RETTYPE ets_select_count_2(BIF_ALIST_2) return result; } +/* + ** This is for trapping, cannot be called directly. + */ +static BIF_RETTYPE ets_select_replace_1(BIF_ALIST_1) +{ + Process *p = BIF_P; + Eterm a1 = BIF_ARG_1; + BIF_RETTYPE result; + DbTable* tb; + int cret; + Eterm ret; + Eterm *tptr; + db_lock_kind_t kind = LCK_WRITE_REC; + + CHECK_TABLES(); + ASSERT(is_tuple(a1)); + tptr = tuple_val(a1); + ASSERT(arityval(*tptr) >= 1); + + DB_TRAP_GET_TABLE(tb, tptr[1], DB_WRITE, kind, + &ets_select_replace_continue_exp); + + cret = tb->common.meth->db_select_replace_continue(p,tb,a1,&ret); + + if(!DID_TRAP(p,ret) && ITERATION_SAFETY(p,tb) != ITER_SAFE) { + unfix_table_locked(p, tb, &kind); + } + + db_unlock(tb, kind); + + switch (cret) { + case DB_ERROR_NONE: + ERTS_BIF_PREP_RET(result, ret); + break; + default: + ERTS_BIF_PREP_ERROR(result, p, BADARG); + break; + } + erts_match_set_release_result(p); + + return result; +} + + +BIF_RETTYPE ets_select_replace_2(BIF_ALIST_2) +{ + BIF_RETTYPE result; + DbTable* tb; + int cret; + Eterm ret; + enum DbIterSafety safety; + + CHECK_TABLES(); + + DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_select_replace_2); + + if (tb->common.status & DB_BAG) { + /* Bag implementation presented both semantic consistency + and performance issues */ + db_unlock(tb, LCK_WRITE_REC); + BIF_ERROR(BIF_P, BADARG); + } + + safety = ITERATION_SAFETY(BIF_P,tb); + if (safety == ITER_UNSAFE) { + local_fix_table(tb); + } + cret = tb->common.meth->db_select_replace(BIF_P, tb, BIF_ARG_1, BIF_ARG_2, &ret); + + if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) { + fix_table_locked(BIF_P,tb); + } + if (safety == ITER_UNSAFE) { + local_unfix_table(tb); + } + db_unlock(tb, LCK_WRITE_REC); + + switch (cret) { + case DB_ERROR_NONE: + ERTS_BIF_PREP_RET(result, ret); + break; + case DB_ERROR_SYSRES: + ERTS_BIF_PREP_ERROR(result, BIF_P, SYSTEM_LIMIT); + break; + default: + ERTS_BIF_PREP_ERROR(result, BIF_P, BADARG); + break; + } + + erts_match_set_release_result(BIF_P); + + return result; +} + BIF_RETTYPE ets_select_reverse_3(BIF_ALIST_3) { @@ -2543,13 +3102,8 @@ BIF_RETTYPE ets_select_reverse_3(BIF_ALIST_3) Sint chunk_size; CHECK_TABLES(); - /* - * Make sure that the table exists. - */ - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_select_reverse_3); /* Chunk size strictly greater than 0 */ if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) { @@ -2560,7 +3114,7 @@ BIF_RETTYPE ets_select_reverse_3(BIF_ALIST_3) if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select_chunk(BIF_P,tb, + cret = tb->common.meth->db_select_chunk(BIF_P,tb, BIF_ARG_1, BIF_ARG_2, chunk_size, 1 /* reversed */, &ret); if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) { @@ -2587,7 +3141,7 @@ BIF_RETTYPE ets_select_reverse_3(BIF_ALIST_3) BIF_RETTYPE ets_select_reverse_1(BIF_ALIST_1) { - return ets_select1(BIF_P, BIF_ARG_1); + return ets_select1(BIF_P, BIF_ets_select_reverse_1, BIF_ARG_1); } BIF_RETTYPE ets_select_reverse_2(BIF_ALIST_2) @@ -2599,18 +3153,14 @@ BIF_RETTYPE ets_select_reverse_2(BIF_ALIST_2) Eterm ret; CHECK_TABLES(); - /* - * Make sure that the table exists. - */ - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_READ, LCK_READ)) == NULL) { - BIF_ERROR(BIF_P, BADARG); - } + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_select_reverse_2); + safety = ITERATION_SAFETY(BIF_P,tb); if (safety == ITER_UNSAFE) { local_fix_table(tb); } - cret = tb->common.meth->db_select(BIF_P,tb,BIF_ARG_2, + cret = tb->common.meth->db_select(BIF_P,tb, BIF_ARG_1, BIF_ARG_2, 1 /*reversed*/, &ret); if (DID_TRAP(BIF_P,ret) && safety != ITER_SAFE) { @@ -2637,45 +3187,63 @@ BIF_RETTYPE ets_select_reverse_2(BIF_ALIST_2) /* -** ets:match_object(Continuation), ets:match_object(Table, Pattern), ets:match_object(Table,Pattern,ChunkSize) +** ets:match_object(Continuation) */ BIF_RETTYPE ets_match_object_1(BIF_ALIST_1) { - return ets_select1(BIF_P, BIF_ARG_1); + return ets_select1(BIF_P, BIF_ets_match_object_1, BIF_ARG_1); } +/* +** ets:match_object(Table, Pattern) +*/ BIF_RETTYPE ets_match_object_2(BIF_ALIST_2) { + DbTable* tb; Eterm ms; DeclareTmpHeap(buff,8,BIF_P); Eterm *hp = buff; Eterm res; + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_match_object_2); + UseTmpHeap(8,BIF_P); ms = CONS(hp, am_DollarUnderscore, NIL); hp += 2; ms = TUPLE3(hp, BIF_ARG_2, NIL, ms); hp += 4; ms = CONS(hp, ms, NIL); - res = ets_select2(BIF_P, BIF_ARG_1, ms); + res = ets_select2(BIF_P, tb, BIF_ARG_1, ms); UnUseTmpHeap(8,BIF_P); return res; } +/* +** ets:match_object(Table,Pattern,ChunkSize) +*/ BIF_RETTYPE ets_match_object_3(BIF_ALIST_3) { + DbTable* tb; + Sint chunk_size; Eterm ms; DeclareTmpHeap(buff,8,BIF_P); Eterm *hp = buff; Eterm res; + /* Chunk size strictly greater than 0 */ + if (is_not_small(BIF_ARG_3) || (chunk_size = signed_val(BIF_ARG_3)) <= 0) { + BIF_ERROR(BIF_P, BADARG); + } + + DB_BIF_GET_TABLE(tb, DB_READ, LCK_READ, BIF_ets_match_object_3); + UseTmpHeap(8,BIF_P); ms = CONS(hp, am_DollarUnderscore, NIL); hp += 2; ms = TUPLE3(hp, BIF_ARG_2, NIL, ms); hp += 4; ms = CONS(hp, ms, NIL); - res = ets_select3(BIF_P, BIF_ARG_1, ms, BIF_ARG_3); + res = ets_select3(BIF_P, tb, BIF_ARG_1, ms, chunk_size); UnUseTmpHeap(8,BIF_P); return res; } @@ -2689,22 +3257,24 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) static Eterm fields[] = {am_protection, am_keypos, am_type, am_named_table, am_node, am_size, am_name, am_heir, am_owner, am_memory, am_compressed, am_write_concurrency, - am_read_concurrency}; + am_read_concurrency, + am_id}; Eterm results[sizeof(fields)/sizeof(Eterm)]; DbTable* tb; Eterm res; int i; Eterm* hp; + Uint freason; /*Process* rp = NULL;*/ /* If/when we implement lockless private tables: Eterm owner; */ - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL) { - if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) { + if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { + if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) BIF_RET(am_undefined); - } - BIF_ERROR(BIF_P, BADARG); + else + return db_bif_fail(BIF_P, freason, BIF_ets_info_1, NULL); } /* If/when we implement lockless private tables: @@ -2725,7 +3295,7 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL || tb->common.owner != owner) { if (BIF_P != rp) - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_MAIN); + erts_proc_unlock(rp, ERTS_PROC_LOCK_MAIN); if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) { BIF_RET(am_undefined); } @@ -2739,7 +3309,7 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1) db_unlock(tb, LCK_READ); /*if (rp != NULL && rp != BIF_P) - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_MAIN);*/ + erts_proc_unlock(rp, ERTS_PROC_LOCK_MAIN);*/ hp = HAlloc(BIF_P, 5*sizeof(fields)/sizeof(Eterm)); res = NIL; @@ -2761,12 +3331,13 @@ BIF_RETTYPE ets_info_2(BIF_ALIST_2) { DbTable* tb; Eterm ret = THE_NON_VALUE; + Uint freason; - if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ)) == NULL) { - if (is_atom(BIF_ARG_1) || is_small(BIF_ARG_1)) { + if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) { + if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1))) BIF_RET(am_undefined); - } - BIF_ERROR(BIF_P, BADARG); + else + return db_bif_fail(BIF_P, freason, BIF_ets_info_2, NULL); } ret = table_info(BIF_P, tb, BIF_ARG_2); db_unlock(tb, LCK_READ); @@ -2779,7 +3350,7 @@ BIF_RETTYPE ets_info_2(BIF_ALIST_2) BIF_RETTYPE ets_is_compiled_ms_1(BIF_ALIST_1) { - if (erts_db_is_compiled_ms(BIF_ARG_1)) { + if (erts_db_get_match_prog_binary(BIF_ARG_1)) { BIF_RET(am_true); } else { BIF_RET(am_false); @@ -2794,9 +3365,9 @@ BIF_RETTYPE ets_match_spec_compile_1(BIF_ALIST_1) BIF_ERROR(BIF_P, BADARG); } - hp = HAlloc(BIF_P, PROC_BIN_SIZE); + hp = HAlloc(BIF_P, ERTS_MAGIC_REF_THING_SIZE); - BIF_RET(erts_mk_magic_binary_term(&hp, &MSO(BIF_P), mp)); + BIF_RET(erts_db_make_match_prog_ref(BIF_P, mp, &hp)); } BIF_RETTYPE ets_match_spec_run_r_3(BIF_ALIST_3) @@ -2805,24 +3376,18 @@ BIF_RETTYPE ets_match_spec_run_r_3(BIF_ALIST_3) int i = 0; Eterm *hp; Eterm lst; - ProcBin *bp; Binary *mp; Eterm res; Uint32 dummy; - if (!(is_list(BIF_ARG_1) || BIF_ARG_1 == NIL) || !is_binary(BIF_ARG_2)) { + if (!(is_list(BIF_ARG_1) || BIF_ARG_1 == NIL)) { error: BIF_ERROR(BIF_P, BADARG); } - bp = (ProcBin*) binary_val(BIF_ARG_2); - if (thing_subtag(bp->thing_word) != REFC_BINARY_SUBTAG) { - goto error; - } - mp = bp->val; - if (!IsMatchProgBinary(mp)) { + mp = erts_db_get_match_prog_binary(BIF_ARG_2); + if (!mp) goto error; - } if (BIF_ARG_1 == NIL) { BIF_RET(BIF_ARG_3); @@ -2859,17 +3424,14 @@ int erts_ets_rwmtx_spin_count = -1; void init_db(ErtsDbSpinCount db_spin_count) { - DbTable init_tb; int i; - Eterm *hp; unsigned bits; size_t size; -#ifdef ERTS_SMP int max_spin_count = (1 << 15) - 1; /* internal limit */ - erts_smp_rwmtx_opt_t rwmtx_opt = ERTS_SMP_RWMTX_OPT_DEFAULT_INITER; - rwmtx_opt.type = ERTS_SMP_RWMTX_TYPE_FREQUENT_READ; - rwmtx_opt.lived = ERTS_SMP_RWMTX_LONG_LIVED; + erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER; + rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ; + rwmtx_opt.lived = ERTS_RWMTX_LONG_LIVED; switch (db_spin_count) { case ERTS_DB_SPNCNT_NONE: @@ -2908,23 +3470,13 @@ void init_db(ErtsDbSpinCount db_spin_count) if (erts_ets_rwmtx_spin_count >= 0) rwmtx_opt.main_spincount = erts_ets_rwmtx_spin_count; - meta_main_tab_locks = - erts_alloc_permanent_cache_aligned(ERTS_ALC_T_DB_TABLES, - sizeof(erts_meta_main_tab_lock_t) - * ERTS_META_MAIN_TAB_LOCK_TAB_SIZE); - - for (i = 0; i < ERTS_META_MAIN_TAB_LOCK_TAB_SIZE; i++) { - erts_smp_rwmtx_init_opt_x(&meta_main_tab_locks[i].rwmtx, &rwmtx_opt, - "meta_main_tab_slot", make_small(i)); - } - erts_smp_spinlock_init(&meta_main_tab_main_lock, "meta_main_tab_main"); for (i=0; i<META_NAME_TAB_LOCK_CNT; i++) { - erts_smp_rwmtx_init_opt_x(&meta_name_tab_rwlocks[i].lck, &rwmtx_opt, - "meta_name_tab", make_small(i)); + erts_rwmtx_init_opt(&meta_name_tab_rwlocks[i].lck, &rwmtx_opt, + "meta_name_tab", make_small(i), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DB); } -#endif - erts_smp_atomic_init_nob(&erts_ets_misc_mem_size, 0); + erts_atomic_init_nob(&erts_ets_misc_mem_size, 0); db_initialize_util(); if (user_requested_db_max_tabs < DB_DEF_MAX_TABS) @@ -2937,22 +3489,12 @@ void init_db(ErtsDbSpinCount db_spin_count) erts_exit(ERTS_ERROR_EXIT,"Max limit for ets tabled too high %u (max %u).", db_max_tabs, ((Uint)1)<<SMALL_BITS); } - meta_main_tab_slot_mask = (((Uint)1)<<bits) - 1; - meta_main_tab_seq_incr = (((Uint)1)<<bits); - - size = sizeof(*meta_main_tab)*db_max_tabs; - meta_main_tab = erts_db_alloc_nt(ERTS_ALC_T_DB_TABLES, size); - ERTS_ETS_MISC_MEM_ADD(size); - - meta_main_tab_cnt = 0; - meta_main_tab_top = 0; - for (i=1; i<db_max_tabs; i++) { - SET_NEXT_FREE_SLOT(i-1,i); - } - SET_NEXT_FREE_SLOT(db_max_tabs-1, (Uint)-1); - meta_main_tab_first_free = 0; - meta_name_tab_mask = (((Uint) 1)<<(bits-1)) - 1; /* At least half the size of main tab */ + /* + * We don't have ony hard limit for number of tables anymore, . + * but we use 'db_max_tabs' to determine size of name hash table. + */ + meta_name_tab_mask = (((Uint) 1)<<bits) - 1; size = sizeof(struct meta_name_tab_entry)*(meta_name_tab_mask+1); meta_name_tab = erts_db_alloc_nt(ERTS_ALC_T_DB_TABLES, size); ERTS_ETS_MISC_MEM_ADD(size); @@ -2965,173 +3507,44 @@ void init_db(ErtsDbSpinCount db_spin_count) db_initialize_hash(); db_initialize_tree(); - /*TT*/ - /* Create meta table invertion. */ - erts_smp_atomic_init_nob(&init_tb.common.memory_size, 0); - meta_pid_to_tab = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE, - &init_tb, - sizeof(DbTable)); - erts_smp_atomic_init_nob(&meta_pid_to_tab->common.memory_size, - erts_smp_atomic_read_nob(&init_tb.common.memory_size)); - - meta_pid_to_tab->common.id = NIL; - meta_pid_to_tab->common.the_name = am_true; - meta_pid_to_tab->common.status = (DB_NORMAL | DB_BAG | DB_PUBLIC | DB_FINE_LOCKED); -#ifdef ERTS_SMP - meta_pid_to_tab->common.type - = meta_pid_to_tab->common.status & ERTS_ETS_TABLE_TYPES; - /* Note, 'type' is *read only* from now on... */ - meta_pid_to_tab->common.is_thread_safe = 0; -#endif - meta_pid_to_tab->common.keypos = 1; - meta_pid_to_tab->common.owner = NIL; - erts_smp_atomic_init_nob(&meta_pid_to_tab->common.nitems, 0); - meta_pid_to_tab->common.slot = -1; - meta_pid_to_tab->common.meth = &db_hash; - meta_pid_to_tab->common.compress = 0; - - erts_smp_refc_init(&meta_pid_to_tab->common.ref, 0); - /* Neither rwlock or fixlock used - db_init_lock(meta_pid_to_tab, "meta_pid_to_tab", "meta_pid_to_tab_FIX");*/ - - if (db_create_hash(NULL, meta_pid_to_tab) != DB_ERROR_NONE) { - erts_exit(ERTS_ERROR_EXIT,"Unable to create ets metadata tables."); - } - - erts_smp_atomic_set_nob(&init_tb.common.memory_size, 0); - meta_pid_to_fixed_tab = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE, - &init_tb, - sizeof(DbTable)); - erts_smp_atomic_init_nob(&meta_pid_to_fixed_tab->common.memory_size, - erts_smp_atomic_read_nob(&init_tb.common.memory_size)); - - meta_pid_to_fixed_tab->common.id = NIL; - meta_pid_to_fixed_tab->common.the_name = am_true; - meta_pid_to_fixed_tab->common.status = (DB_NORMAL | DB_BAG | DB_PUBLIC | DB_FINE_LOCKED); -#ifdef ERTS_SMP - meta_pid_to_fixed_tab->common.type - = meta_pid_to_fixed_tab->common.status & ERTS_ETS_TABLE_TYPES; - /* Note, 'type' is *read only* from now on... */ - meta_pid_to_fixed_tab->common.is_thread_safe = 0; -#endif - meta_pid_to_fixed_tab->common.keypos = 1; - meta_pid_to_fixed_tab->common.owner = NIL; - erts_smp_atomic_init_nob(&meta_pid_to_fixed_tab->common.nitems, 0); - meta_pid_to_fixed_tab->common.slot = -1; - meta_pid_to_fixed_tab->common.meth = &db_hash; - meta_pid_to_fixed_tab->common.compress = 0; - - erts_smp_refc_init(&meta_pid_to_fixed_tab->common.ref, 0); - /* Neither rwlock or fixlock used - db_init_lock(meta_pid_to_fixed_tab, "meta_pid_to_fixed_tab", "meta_pid_to_fixed_tab_FIX");*/ - - if (db_create_hash(NULL, meta_pid_to_fixed_tab) != DB_ERROR_NONE) { - erts_exit(ERTS_ERROR_EXIT,"Unable to create ets metadata tables."); - } - /* Non visual BIF to trap to. */ erts_init_trap_export(&ets_select_delete_continue_exp, - am_ets, am_atom_put("delete_trap",11), 1, - &ets_select_delete_1); + am_ets, ERTS_MAKE_AM("select_delete_trap"), 1, + &ets_select_delete_trap_1); /* Non visual BIF to trap to. */ erts_init_trap_export(&ets_select_count_continue_exp, - am_ets, am_atom_put("count_trap",11), 1, + am_ets, ERTS_MAKE_AM("count_trap"), 1, &ets_select_count_1); /* Non visual BIF to trap to. */ + erts_init_trap_export(&ets_select_replace_continue_exp, + am_ets, ERTS_MAKE_AM("replace_trap"), 1, + &ets_select_replace_1); + + /* Non visual BIF to trap to. */ erts_init_trap_export(&ets_select_continue_exp, - am_ets, am_atom_put("select_trap",11), 1, + am_ets, ERTS_MAKE_AM("select_trap"), 1, &ets_select_trap_1); /* Non visual BIF to trap to. */ erts_init_trap_export(&ets_delete_continue_exp, - am_ets, am_atom_put("delete_trap",11), 1, + am_ets, ERTS_MAKE_AM("delete_trap"), 1, &ets_delete_trap); - - hp = ms_delete_all_buff; - ms_delete_all = CONS(hp, am_true, NIL); - hp += 2; - ms_delete_all = TUPLE3(hp,am_Underscore,NIL,ms_delete_all); - hp +=4; - ms_delete_all = CONS(hp, ms_delete_all,NIL); } -#define ARRAY_CHUNK 100 - -typedef enum { - ErtsDbProcCleanupProgressTables, - ErtsDbProcCleanupProgressFixations, - ErtsDbProcCleanupProgressDone, -} ErtsDbProcCleanupProgress; - -typedef enum { - ErtsDbProcCleanupOpGetTables, - ErtsDbProcCleanupOpDeleteTables, - ErtsDbProcCleanupOpGetFixations, - ErtsDbProcCleanupOpDeleteFixations, - ErtsDbProcCleanupOpDone -} ErtsDbProcCleanupOperation; - -typedef struct { - ErtsDbProcCleanupProgress progress; - ErtsDbProcCleanupOperation op; - struct { - Eterm arr[ARRAY_CHUNK]; - int size; - int ix; - int clean_ix; - } slots; -} ErtsDbProcCleanupState; - - -static void -proc_exit_cleanup_tables_meta_data(Eterm pid, ErtsDbProcCleanupState *state) -{ - ASSERT(state->slots.clean_ix <= state->slots.ix); - if (state->slots.clean_ix < state->slots.ix) { - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - if (state->slots.size < ARRAY_CHUNK - && state->slots.ix == state->slots.size) { - Eterm dummy; - db_erase_hash(meta_pid_to_tab,pid,&dummy); - } - else { - int ix; - /* Need to erase each explicitly */ - for (ix = state->slots.clean_ix; ix < state->slots.ix; ix++) - db_erase_bag_exact2(meta_pid_to_tab, - pid, - state->slots.arr[ix]); - } - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); - state->slots.clean_ix = state->slots.ix; - } +void +erts_ets_sched_spec_data_init(ErtsSchedulerData *esdp) +{ + ErtsEtsAllYieldData *eaydp = ERTS_SCHED_AUX_YIELD_DATA(esdp, ets_all); + eaydp->ongoing = NULL; + eaydp->hfrag = NULL; + eaydp->tab = NULL; + eaydp->queue = NULL; + esdp->ets_tables.clist = NULL; + erts_atomic_init_nob(&esdp->ets_tables.count, 0); } -static void -proc_exit_cleanup_fixations_meta_data(Eterm pid, ErtsDbProcCleanupState *state) -{ - ASSERT(state->slots.clean_ix <= state->slots.ix); - if (state->slots.clean_ix < state->slots.ix) { - db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - if (state->slots.size < ARRAY_CHUNK - && state->slots.ix == state->slots.size) { - Eterm dummy; - db_erase_hash(meta_pid_to_fixed_tab,pid,&dummy); - } - else { - int ix; - /* Need to erase each explicitly */ - for (ix = state->slots.clean_ix; ix < state->slots.ix; ix++) - db_erase_bag_exact2(meta_pid_to_fixed_tab, - pid, - state->slots.arr[ix]); - } - db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - state->slots.clean_ix = state->slots.ix; - } -} /* In: Table LCK_WRITE ** Return TRUE : ok, table not mine and NOT locked anymore. @@ -3141,7 +3554,6 @@ static int give_away_to_heir(Process* p, DbTable* tb) { Process* to_proc; ErtsProcLocks to_locks = ERTS_PROC_LOCK_MAIN; - DeclareTmpHeap(buf,5,p); Eterm to_pid; UWord heir_data; @@ -3162,14 +3574,14 @@ retry: if (tb->common.owner != p->common.id) { if (to_proc != NULL ) { - erts_smp_proc_unlock(to_proc, to_locks); + erts_proc_unlock(to_proc, to_locks); } db_unlock(tb,LCK_WRITE); return !0; /* ok, someone already gave my table away */ } if (tb->common.heir != to_pid) { /* someone changed the heir */ if (to_proc != NULL ) { - erts_smp_proc_unlock(to_proc, to_locks); + erts_proc_unlock(to_proc, to_locks); } if (to_pid == p->common.id || to_pid == am_none) { return 0; /* no real heir, table still mine */ @@ -3182,22 +3594,15 @@ retry: } if (to_proc->common.u.alive.started_interval != tb->common.heir_started_interval) { - erts_smp_proc_unlock(to_proc, to_locks); + erts_proc_unlock(to_proc, to_locks); return 0; /* heir dead and pid reused, table still mine */ } - UseTmpHeap(5,p); - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_tab, tb->common.owner, - make_small(tb->common.slot)); + delete_owned_table(p, tb); to_proc->flags |= F_USING_DB; tb->common.owner = to_pid; - - db_put_hash(meta_pid_to_tab, - TUPLE2(buf,to_pid,make_small(tb->common.slot)), - 0); - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); - UnUseTmpHeap(5,p); + save_owned_table(to_proc, tb); + db_unlock(tb,LCK_WRITE); heir_data = tb->common.heir_data; if (!is_immed(heir_data)) { @@ -3205,17 +3610,89 @@ retry: ASSERT(arityval(*tpv) == 1); heir_data = tpv[1]; } - erts_send_message(p, to_proc, &to_locks, - TUPLE4(buf, - am_ETS_TRANSFER, - tb->common.id, - p->common.id, - heir_data), - 0); - erts_smp_proc_unlock(to_proc, to_locks); + send_ets_transfer_message(p, to_proc, &to_locks, tb, heir_data); + erts_proc_unlock(to_proc, to_locks); return !0; } +static void +send_ets_transfer_message(Process *c_p, Process *proc, + ErtsProcLocks *locks, + DbTable *tb, Eterm heir_data) +{ + Uint hsz, hd_sz; + ErtsMessage *mp; + Eterm *hp; + ErlOffHeap *ohp; + Eterm tid, hd_copy, msg, sender; + + hsz = 5; + if (!is_table_named(tb)) + hsz += ERTS_MAGIC_REF_THING_SIZE; + if (is_immed(heir_data)) + hd_sz = 0; + else { + hd_sz = size_object(heir_data); + hsz += hd_sz; + } + + mp = erts_alloc_message_heap(proc, locks, hsz, &hp, &ohp); + if (is_table_named(tb)) + tid = tb->common.the_name; + else + tid = erts_mk_magic_ref(&hp, ohp, tb->common.btid); + if (!hd_sz) + hd_copy = heir_data; + else + hd_copy = copy_struct(heir_data, hd_sz, &hp, ohp); + sender = c_p->common.id; + msg = TUPLE4(hp, am_ETS_TRANSFER, tid, sender, hd_copy); + ERL_MESSAGE_TOKEN(mp) = am_undefined; + erts_queue_proc_message(c_p, proc, *locks, mp, msg); +} + + +/* Auto-release fixation from exiting process */ +static SWord proc_cleanup_fixed_table(Process* p, DbFixation* fix) +{ + DbTable* tb = btid2tab(fix->tabs.btid); + SWord work = 0; + + ASSERT(fix->procs.p == p); (void)p; + if (tb) { + db_lock(tb, LCK_WRITE_REC); + if (!(tb->common.status & DB_DELETE)) { + erts_aint_t diff; + erts_mtx_lock(&tb->common.fixlock); + + ASSERT(fixing_procs_rbt_lookup(tb->common.fixing_procs, p)); + + diff = -((erts_aint_t) fix->counter); + erts_refc_add(&tb->common.fix_count,diff,0); + fix->counter = 0; + + fixing_procs_rbt_delete(&tb->common.fixing_procs, fix); + + erts_mtx_unlock(&tb->common.fixlock); + if (!IS_FIXED(tb) && IS_HASH_TABLE(tb->common.status)) { + work += db_unfix_table_hash(&(tb->hash)); + } + + ASSERT(sizeof(DbFixation) == ERTS_ALC_DBG_BLK_SZ(fix)); + ERTS_DB_ALC_MEM_UPDATE_(tb, sizeof(DbFixation), 0); + } + db_unlock(tb, LCK_WRITE_REC); + } + + erts_bin_release(fix->tabs.btid); + erts_free(ERTS_ALC_T_DB_FIXATION, fix); + ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); + ++work; + + return work; +} + + /* * erts_db_process_exiting() is called when a process terminates. * It returns 0 when completely done, and !0 when it wants to @@ -3229,276 +3706,154 @@ retry: int erts_db_process_exiting(Process *c_p, ErtsProcLocks c_p_locks) { - ErtsDbProcCleanupState *state = (ErtsDbProcCleanupState *) c_p->u.terminate; + typedef struct { + enum { + GET_OWNED_TABLE, + FREE_OWNED_TABLE, + UNFIX_TABLES, + }op; + DbTable *tb; + } CleanupState; + CleanupState *state = (CleanupState *) c_p->u.terminate; Eterm pid = c_p->common.id; - ErtsDbProcCleanupState default_state; - int ret; + CleanupState default_state; + SWord initial_reds = ERTS_BIF_REDS_LEFT(c_p); + SWord reds = initial_reds; if (!state) { state = &default_state; - state->progress = ErtsDbProcCleanupProgressTables; - state->op = ErtsDbProcCleanupOpGetTables; + state->op = GET_OWNED_TABLE; + state->tb = NULL; } - while (!0) { + do { switch (state->op) { - case ErtsDbProcCleanupOpGetTables: - state->slots.size = ARRAY_CHUNK; - db_meta_lock(meta_pid_to_tab, LCK_READ); - ret = db_get_element_array(meta_pid_to_tab, - pid, - 2, - state->slots.arr, - &state->slots.size); - db_meta_unlock(meta_pid_to_tab, LCK_READ); - if (ret == DB_ERROR_BADKEY) { - /* Done with tables; now fixations */ - state->progress = ErtsDbProcCleanupProgressFixations; - state->op = ErtsDbProcCleanupOpGetFixations; - break; - } else if (ret != DB_ERROR_NONE) { - ERTS_DB_INTERNAL_ERROR("Inconsistent ets table metadata"); - } - - state->slots.ix = 0; - state->slots.clean_ix = 0; - state->op = ErtsDbProcCleanupOpDeleteTables; - /* Fall through */ - - case ErtsDbProcCleanupOpDeleteTables: - - while (state->slots.ix < state->slots.size) { - DbTable *tb = NULL; - Sint ix = unsigned_val(state->slots.arr[state->slots.ix]); - erts_smp_rwmtx_t *mmtl = get_meta_main_tab_lock(ix); - erts_smp_rwmtx_rlock(mmtl); - if (!IS_SLOT_FREE(ix)) { - tb = GET_ANY_SLOT_TAB(ix); - ASSERT(tb); - } - erts_smp_rwmtx_runlock(mmtl); - if (tb) { - int do_yield; - db_lock(tb, LCK_WRITE); - /* Ownership may have changed since - we looked up the table. */ - if (tb->common.owner != pid) { - do_yield = 0; - db_unlock(tb, LCK_WRITE); - } - else if (tb->common.heir != am_none - && tb->common.heir != pid - && give_away_to_heir(c_p, tb)) { - do_yield = 0; - } - else { - int first_call; -#ifdef HARDDEBUG - erts_fprintf(stderr, - "erts_db_process_exiting(); Table: %T, " - "Process: %T\n", - tb->common.id, pid); -#endif - first_call = (tb->common.status & DB_DELETE) == 0; - if (first_call) { - /* Clear all access bits. */ - tb->common.status &= ~(DB_PROTECTED - | DB_PUBLIC - | DB_PRIVATE); - tb->common.status |= DB_DELETE; - - if (is_atom(tb->common.id)) - remove_named_tab(tb, 0); - - free_heir_data(tb); - free_fixations_locked(tb); - } - - do_yield = free_table_cont(c_p, tb, first_call, 0); - db_unlock(tb, LCK_WRITE); - } - if (do_yield) - goto yield; - } - state->slots.ix++; - if (ERTS_BIF_REDS_LEFT(c_p) <= 0) - goto yield; - } + case GET_OWNED_TABLE: { + DbTable* tb; + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + tb = (DbTable*) erts_psd_get(c_p, ERTS_PSD_ETS_OWNED_TABLES); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + + if (!tb) { + /* Done with owned tables; now fixations */ + state->op = UNFIX_TABLES; + break; + } - proc_exit_cleanup_tables_meta_data(pid, state); - state->op = ErtsDbProcCleanupOpGetTables; - break; + ASSERT(tb != state->tb); + state->tb = tb; + db_lock(tb, LCK_WRITE); + /* + * Ownership may have changed since we looked up the table. + */ + if (tb->common.owner != pid) { + db_unlock(tb, LCK_WRITE); + break; + } + if (tb->common.heir != am_none + && tb->common.heir != pid + && give_away_to_heir(c_p, tb)) { + break; + } + /* Clear all access bits. */ + tb->common.status &= ~(DB_PROTECTED | DB_PUBLIC | DB_PRIVATE); + tb->common.status |= DB_DELETE; + + if (is_table_named(tb)) + remove_named_tab(tb, 0); + + free_heir_data(tb); + reds -= free_fixations_locked(c_p, tb); + tid_clear(c_p, tb); + db_unlock(tb, LCK_WRITE); + state->op = FREE_OWNED_TABLE; + break; + } + case FREE_OWNED_TABLE: + reds = free_table_continue(c_p, state->tb, reds); + if (reds < 0) + goto yield; - case ErtsDbProcCleanupOpGetFixations: - state->slots.size = ARRAY_CHUNK; - db_meta_lock(meta_pid_to_fixed_tab, LCK_READ); - ret = db_get_element_array(meta_pid_to_fixed_tab, - pid, - 2, - state->slots.arr, - &state->slots.size); - db_meta_unlock(meta_pid_to_fixed_tab, LCK_READ); - - if (ret == DB_ERROR_BADKEY) { - /* Done */ - state->progress = ErtsDbProcCleanupProgressDone; - state->op = ErtsDbProcCleanupOpDone; - break; - } else if (ret != DB_ERROR_NONE) { - ERTS_DB_INTERNAL_ERROR("Inconsistent ets fix table metadata"); - } + state->op = GET_OWNED_TABLE; + break; - state->slots.ix = 0; - state->slots.clean_ix = 0; - state->op = ErtsDbProcCleanupOpDeleteFixations; - /* Fall through */ + case UNFIX_TABLES: { + DbFixation* fix; - case ErtsDbProcCleanupOpDeleteFixations: + fix = (DbFixation*) erts_psd_get(c_p, ERTS_PSD_ETS_FIXED_TABLES); - while (state->slots.ix < state->slots.size) { - DbTable *tb = NULL; - Sint ix = unsigned_val(state->slots.arr[state->slots.ix]); - erts_smp_rwmtx_t *mmtl = get_meta_main_tab_lock(ix); - erts_smp_rwmtx_rlock(mmtl); - if (IS_SLOT_ALIVE(ix)) { - tb = meta_main_tab[ix].u.tb; - ASSERT(tb); - } - erts_smp_rwmtx_runlock(mmtl); - if (tb) { - int reds = 0; - - db_lock(tb, LCK_WRITE_REC); - if (!(tb->common.status & DB_DELETE)) { - DbFixation** pp; - - #ifdef ERTS_SMP - erts_smp_mtx_lock(&tb->common.fixlock); - #endif - reds = 10; - - for (pp = &tb->common.fixations; *pp != NULL; - pp = &(*pp)->next) { - if ((*pp)->pid == pid) { - DbFixation* fix = *pp; - erts_aint_t diff = -((erts_aint_t) fix->counter); - erts_smp_refc_add(&tb->common.ref,diff,0); - *pp = fix->next; - erts_db_free(ERTS_ALC_T_DB_FIXATION, - tb, fix, sizeof(DbFixation)); - ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); - break; - } - } - #ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); - #endif - if (!IS_FIXED(tb) && IS_HASH_TABLE(tb->common.status)) { - db_unfix_table_hash(&(tb->hash)); - reds += 40; - } - } - db_unlock(tb, LCK_WRITE_REC); - BUMP_REDS(c_p, reds); - } - state->slots.ix++; - if (ERTS_BIF_REDS_LEFT(c_p) <= 0) - goto yield; - } + if (!fix) { + /* Done */ - proc_exit_cleanup_fixations_meta_data(pid, state); - state->op = ErtsDbProcCleanupOpGetFixations; - break; + if (state != &default_state) + erts_free(ERTS_ALC_T_DB_PROC_CLEANUP, state); + c_p->u.terminate = NULL; - case ErtsDbProcCleanupOpDone: + BUMP_REDS(c_p, (initial_reds - reds)); + return 0; + } - if (state != &default_state) - erts_free(ERTS_ALC_T_DB_PROC_CLEANUP, state); - c_p->u.terminate = NULL; - return 0; + fixed_tabs_delete(c_p, fix); + reds -= proc_cleanup_fixed_table(c_p, fix); + break; + } default: ERTS_DB_INTERNAL_ERROR("Bad internal state"); - } - } - - yield: + } - switch (state->progress) { - case ErtsDbProcCleanupProgressTables: - proc_exit_cleanup_tables_meta_data(pid, state); - break; - case ErtsDbProcCleanupProgressFixations: - proc_exit_cleanup_fixations_meta_data(pid, state); - break; - default: - break; - } + } while (reds > 0); - ASSERT(c_p->u.terminate == (void *) state - || state == &default_state); + yield: if (state == &default_state) { c_p->u.terminate = erts_alloc(ERTS_ALC_T_DB_PROC_CLEANUP, - sizeof(ErtsDbProcCleanupState)); - sys_memcpy(c_p->u.terminate, - (void*) state, - sizeof(ErtsDbProcCleanupState)); + sizeof(CleanupState)); + sys_memcpy(c_p->u.terminate, (void*) state, sizeof(CleanupState)); } + else + ASSERT(state == c_p->u.terminate); return !0; } + /* SMP note: table only need to be LCK_READ locked */ static void fix_table_locked(Process* p, DbTable* tb) { DbFixation *fix; - DeclareTmpHeap(meta_tuple,3,p); -#ifdef ERTS_SMP - erts_smp_mtx_lock(&tb->common.fixlock); -#endif - erts_smp_refc_inc(&tb->common.ref,1); - fix = tb->common.fixations; + erts_mtx_lock(&tb->common.fixlock); + erts_refc_inc(&tb->common.fix_count,1); + fix = tb->common.fixing_procs; if (fix == NULL) { tb->common.time.monotonic = erts_get_monotonic_time(erts_proc_sched_data(p)); tb->common.time.offset = erts_get_time_offset(); } else { - for (; fix != NULL; fix = fix->next) { - if (fix->pid == p->common.id) { - ++(fix->counter); -#ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); -#endif - return; - } + fix = fixing_procs_rbt_lookup(fix, p); + if (fix) { + ASSERT(fixed_tabs_find(NULL, fix)); + ++(fix->counter); + + erts_mtx_unlock(&tb->common.fixlock); + return; } } fix = (DbFixation *) erts_db_alloc(ERTS_ALC_T_DB_FIXATION, tb, sizeof(DbFixation)); ERTS_ETS_MISC_MEM_ADD(sizeof(DbFixation)); - fix->pid = p->common.id; + fix->tabs.btid = tb->common.btid; + erts_refc_inc(&fix->tabs.btid->intern.refc, 2); + fix->procs.p = p; fix->counter = 1; - fix->next = tb->common.fixations; - tb->common.fixations = fix; -#ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); -#endif - p->flags |= F_USING_DB; - UseTmpHeap(3,p); - db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - if (db_put_hash(meta_pid_to_fixed_tab, - TUPLE2(meta_tuple, - p->common.id, - make_small(tb->common.slot)), - 0) != DB_ERROR_NONE) { - UnUseTmpHeap(3,p); - erts_exit(ERTS_ERROR_EXIT,"Could not insert ets metadata in safe_fixtable."); - } - UnUseTmpHeap(3,p); - db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC); + fixing_procs_rbt_insert(&tb->common.fixing_procs, fix); + + erts_mtx_unlock(&tb->common.fixlock); + p->flags |= F_USING_DB; + + fixed_tabs_insert(p, fix); } /* SMP note: May re-lock table @@ -3506,77 +3861,116 @@ static void fix_table_locked(Process* p, DbTable* tb) static void unfix_table_locked(Process* p, DbTable* tb, db_lock_kind_t* kind_p) { - DbFixation** pp; + DbFixation* fix; + + erts_mtx_lock(&tb->common.fixlock); + fix = fixing_procs_rbt_lookup(tb->common.fixing_procs, p); + + if (fix) { + erts_refc_dec(&tb->common.fix_count,0); + --(fix->counter); + ASSERT(fix->counter >= 0); + if (fix->counter == 0) { + fixing_procs_rbt_delete(&tb->common.fixing_procs, fix); + erts_mtx_unlock(&tb->common.fixlock); + fixed_tabs_delete(p, fix); + + erts_refc_dec(&fix->tabs.btid->intern.refc, 1); -#ifdef ERTS_SMP - erts_smp_mtx_lock(&tb->common.fixlock); -#endif - for (pp = &tb->common.fixations; *pp != NULL; pp = &(*pp)->next) { - if ((*pp)->pid == p->common.id) { - DbFixation* fix = *pp; - erts_smp_refc_dec(&tb->common.ref,0); - --(fix->counter); - ASSERT(fix->counter >= 0); - if (fix->counter > 0) { - break; - } - *pp = fix->next; -#ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); -#endif - db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_fixed_tab, - p->common.id, make_small(tb->common.slot)); - db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC); erts_db_free(ERTS_ALC_T_DB_FIXATION, tb, (void *) fix, sizeof(DbFixation)); ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); goto unlocked; } } -#ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); -#endif + erts_mtx_unlock(&tb->common.fixlock); unlocked: if (!IS_FIXED(tb) && IS_HASH_TABLE(tb->common.status) - && erts_smp_atomic_read_nob(&tb->hash.fixdel) != (erts_aint_t)NULL) { -#ifdef ERTS_SMP + && erts_atomic_read_nob(&tb->hash.fixdel) != (erts_aint_t)NULL) { if (*kind_p == LCK_READ && tb->common.is_thread_safe) { /* Must have write lock while purging pseudo-deleted (OTP-8166) */ - erts_smp_rwmtx_runlock(&tb->common.rwlock); - erts_smp_rwmtx_rwlock(&tb->common.rwlock); + erts_rwmtx_runlock(&tb->common.rwlock); + erts_rwmtx_rwlock(&tb->common.rwlock); *kind_p = LCK_WRITE; - if (tb->common.status & DB_DELETE) return; + if (tb->common.status & (DB_DELETE|DB_BUSY)) + return; } -#endif db_unfix_table_hash(&(tb->hash)); } } -/* Assume that tb is WRITE locked */ -static void free_fixations_locked(DbTable *tb) +struct free_fixations_ctx { - DbFixation *fix; - DbFixation *next_fix; + Process* p; + DbTable* tb; + SWord cnt; +}; + +static void free_fixations_op(DbFixation* fix, void* vctx) +{ + struct free_fixations_ctx* ctx = (struct free_fixations_ctx*) vctx; + erts_aint_t diff; + + ASSERT(btid2tab(fix->tabs.btid) == ctx->tb); + ASSERT(fix->counter > 0); + ASSERT(ctx->tb->common.status & DB_DELETE); + + diff = -((erts_aint_t) fix->counter); + erts_refc_add(&ctx->tb->common.fix_count, diff, 0); - fix = tb->common.fixations; - while (fix != NULL) { - erts_aint_t diff = -((erts_aint_t) fix->counter); - erts_smp_refc_add(&tb->common.ref,diff,0); - next_fix = fix->next; - db_meta_lock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_fixed_tab, - fix->pid, - make_small(tb->common.slot)); - db_meta_unlock(meta_pid_to_fixed_tab, LCK_WRITE_REC); - erts_db_free(ERTS_ALC_T_DB_FIXATION, - tb, (void *) fix, sizeof(DbFixation)); - ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); + if (fix->procs.p != ctx->p) { /* Fixated by other process */ + fix->counter = 0; - fix = next_fix; + /* Fake memory stats for table */ + ASSERT(sizeof(DbFixation) == ERTS_ALC_DBG_BLK_SZ(fix)); + ERTS_DB_ALC_MEM_UPDATE_(ctx->tb, sizeof(DbFixation), 0); + + erts_schedule_ets_free_fixation(fix->procs.p->common.id, fix); + /* + * Either sys task is scheduled and erts_db_execute_free_fixation() + * will remove 'fix' or process will exit, drop sys task and + * proc_cleanup_fixed_table() will remove 'fix'. + */ + } + else + { + fixed_tabs_delete(fix->procs.p, fix); + + erts_bin_release(fix->tabs.btid); + + erts_db_free(ERTS_ALC_T_DB_FIXATION, + ctx->tb, (void *) fix, sizeof(DbFixation)); + ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); } - tb->common.fixations = NULL; + ctx->cnt++; +} + +int erts_db_execute_free_fixation(Process* p, DbFixation* fix) +{ + ASSERT(fix->counter == 0); + fixed_tabs_delete(p, fix); + + erts_bin_release(fix->tabs.btid); + + erts_free(ERTS_ALC_T_DB_FIXATION, fix); + ERTS_ETS_MISC_MEM_ADD(-sizeof(DbFixation)); + return 1; +} + +static SWord free_fixations_locked(Process* p, DbTable *tb) +{ + struct free_fixations_ctx ctx; + + ERTS_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&tb->common.rwlock)); + + ctx.p = p; + ctx.tb = tb; + ctx.cnt = 0; + fixing_procs_rbt_foreach_destroy(&tb->common.fixing_procs, + free_fixations_op, &ctx); + tb->common.fixing_procs = NULL; + return ctx.cnt; } static void set_heir(Process* me, DbTable* tb, Eterm heir, UWord heir_data) @@ -3640,55 +4034,41 @@ static void free_heir_data(DbTable* tb) static BIF_RETTYPE ets_delete_trap(BIF_ALIST_1) { - Process *p = BIF_P; + SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P); + SWord reds = initial_reds; Eterm cont = BIF_ARG_1; - int trap; Eterm* ptr = big_val(cont); DbTable *tb = *((DbTable **) (UWord) (ptr + 1)); ASSERT(*ptr == make_pos_bignum_header(1)); - db_lock(tb, LCK_WRITE); - trap = free_table_cont(p, tb, 0, 1); - db_unlock(tb, LCK_WRITE); - - if (trap) { - BIF_TRAP1(&ets_delete_continue_exp, p, cont); + reds = free_table_continue(BIF_P, tb, reds); + if (reds < 0) { + BUMP_ALL_REDS(BIF_P); + BIF_TRAP1(&ets_delete_continue_exp, BIF_P, cont); } else { + BUMP_REDS(BIF_P, (initial_reds - reds)); BIF_RET(am_true); } } /* - * free_table_cont() returns 0 when done and !0 when more work is needed. + * free_table_continue() returns reductions left + * done if >= 0 + * yield if < 0 */ -static int free_table_cont(Process *p, - DbTable *tb, - int first, - int clean_meta_tab) +static SWord free_table_continue(Process *p, DbTable *tb, SWord reds) { - Eterm result; - erts_smp_rwmtx_t *mmtl; - -#ifdef HARDDEBUG - if (!first) { - erts_fprintf(stderr,"ets: free_table_cont %T (continue)\r\n", - tb->common.id); - } -#endif - - result = tb->common.meth->db_free_table_continue(tb); + reds = tb->common.meth->db_free_table_continue(tb, reds); - if (result == 0) { + if (reds < 0) { #ifdef HARDDEBUG erts_fprintf(stderr,"ets: free_table_cont %T (continue begin)\r\n", tb->common.id); #endif /* More work to be done. Let other processes work and call us again. */ - BUMP_ALL_REDS(p); - return !0; } else { #ifdef HARDDEBUG @@ -3696,27 +4076,28 @@ static int free_table_cont(Process *p, tb->common.id); #endif /* Completely done - we will not get called again. */ - mmtl = get_meta_main_tab_lock(tb->common.slot); -#ifdef ERTS_SMP - if (erts_smp_rwmtx_tryrwlock(mmtl) == EBUSY) { - erts_smp_rwmtx_rwunlock(&tb->common.rwlock); - erts_smp_rwmtx_rwlock(mmtl); - erts_smp_rwmtx_rwlock(&tb->common.rwlock); - } -#endif - free_slot(tb->common.slot); - erts_smp_rwmtx_rwunlock(mmtl); - - if (clean_meta_tab) { - db_meta_lock(meta_pid_to_tab, LCK_WRITE_REC); - db_erase_bag_exact2(meta_pid_to_tab,tb->common.owner, - make_small(tb->common.slot)); - db_meta_unlock(meta_pid_to_tab, LCK_WRITE_REC); - } - schedule_free_dbtable(tb); - BUMP_REDS(p, 100); - return 0; + delete_owned_table(p, tb); + table_dec_refc(tb, 0); } + return reds; +} + +struct fixing_procs_info_ctx +{ + Process* p; + Eterm list; +}; + +static void fixing_procs_info_op(DbFixation* fix, void* vctx) +{ + struct fixing_procs_info_ctx* ctx = (struct fixing_procs_info_ctx*) vctx; + Eterm* hp; + Eterm tpl; + + hp = HAllocX(ctx->p, 5, 100); + tpl = TUPLE2(hp, fix->procs.p->common.id, make_small(fix->counter)); + hp += 3; + ctx->list = CONS(hp, tpl, ctx->list); } static Eterm table_info(Process* p, DbTable* tb, Eterm What) @@ -3725,7 +4106,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) int use_monotonic; if (What == am_size) { - ret = make_small(erts_smp_atomic_read_nob(&tb->common.nitems)); + ret = make_small(erts_atomic_read_nob(&tb->common.nitems)); } else if (What == am_type) { if (tb->common.status & DB_SET) { ret = am_set; @@ -3738,7 +4119,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) ret = am_bag; } } else if (What == am_memory) { - Uint words = (Uint) ((erts_smp_atomic_read_nob(&tb->common.memory_size) + Uint words = (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + sizeof(Uint) - 1) / sizeof(Uint)); @@ -3765,17 +4146,20 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) } else if (What == am_node) { ret = erts_this_dist_entry->sysname; } else if (What == am_named_table) { - ret = is_atom(tb->common.id) ? am_true : am_false; + ret = is_table_named(tb) ? am_true : am_false; } else if (What == am_compressed) { ret = tb->common.compress ? am_true : am_false; + } else if (What == am_id) { + ret = make_tid(p, tb); } + /* * For debugging purposes */ else if (What == am_data) { print_table(ERTS_PRINT_STDOUT, NULL, 1, tb); ret = am_true; - } else if (What == am_atom_put("fixed",5)) { + } else if (ERTS_IS_ATOM_STR("fixed",What)) { if (IS_FIXED(tb)) ret = am_true; else @@ -3784,15 +4168,13 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) = ERTS_IS_ATOM_STR("safe_fixed_monotonic_time", What)) || ERTS_IS_ATOM_STR("safe_fixed", What)) { -#ifdef ERTS_SMP - erts_smp_mtx_lock(&tb->common.fixlock); -#endif + erts_mtx_lock(&tb->common.fixlock); if (IS_FIXED(tb)) { Uint need; Eterm *hp; - Eterm tpl, lst; - DbFixation *fix; + Eterm time; Sint64 mtime; + struct fixing_procs_info_ctx ctx; need = 3; if (use_monotonic) { @@ -3805,19 +4187,15 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) mtime = 0; need += 4; } - for (fix = tb->common.fixations; fix != NULL; fix = fix->next) { - need += 5; - } + ctx.p = p; + ctx.list = NIL; + fixing_procs_rbt_foreach(tb->common.fixing_procs, + fixing_procs_info_op, + &ctx); + hp = HAlloc(p, need); - lst = NIL; - for (fix = tb->common.fixations; fix != NULL; fix = fix->next) { - tpl = TUPLE2(hp,fix->pid,make_small(fix->counter)); - hp += 3; - lst = CONS(hp,tpl,lst); - hp += 2; - } if (use_monotonic) - tpl = (IS_SSMALL(mtime) + time = (IS_SSMALL(mtime) ? make_small(mtime) : erts_sint64_to_big(mtime, &hp)); else { @@ -3825,17 +4203,15 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) erts_make_timestamp_value(&ms, &s, &us, tb->common.time.monotonic, tb->common.time.offset); - tpl = TUPLE3(hp, make_small(ms), make_small(s), make_small(us)); + time = TUPLE3(hp, make_small(ms), make_small(s), make_small(us)); hp += 4; } - ret = TUPLE2(hp, tpl, lst); + ret = TUPLE2(hp, time, ctx.list); } else { ret = am_false; } -#ifdef ERTS_SMP - erts_smp_mtx_unlock(&tb->common.fixlock); -#endif - } else if (What == am_atom_put("stats",5)) { + erts_mtx_unlock(&tb->common.fixlock); + } else if (ERTS_IS_ATOM_STR("stats",What)) { if (IS_HASH_TABLE(tb->common.status)) { FloatDef f; DbHashStats stats; @@ -3858,7 +4234,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) std_dev_exp = make_float(hp); PUT_DOUBLE(f, hp); hp += FLOAT_SIZE_OBJECT; - ret = TUPLE7(hp, make_small(erts_smp_atomic_read_nob(&tb->hash.nactive)), + ret = TUPLE7(hp, make_small(erts_atomic_read_nob(&tb->hash.nactive)), avg, std_dev_real, std_dev_exp, make_small(stats.min_chain_len), make_small(stats.max_chain_len), @@ -3873,14 +4249,26 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What) static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb) { - erts_print(to, to_arg, "Table: %T\n", tb->common.id); + Eterm tid; + Eterm heap[ERTS_MAGIC_REF_THING_SIZE]; + + if (is_table_named(tb)) { + tid = tb->common.the_name; + } else { + ErlOffHeap oh; + ERTS_INIT_OFF_HEAP(&oh); + write_magic_ref_thing(heap, &oh, (ErtsMagicBinary *) tb->common.btid); + tid = make_internal_ref(heap); + } + + erts_print(to, to_arg, "Table: %T\n", tid); erts_print(to, to_arg, "Name: %T\n", tb->common.the_name); tb->common.meth->db_print(to, to_arg, show, tb); - erts_print(to, to_arg, "Objects: %d\n", (int)erts_smp_atomic_read_nob(&tb->common.nitems)); + erts_print(to, to_arg, "Objects: %d\n", (int)erts_atomic_read_nob(&tb->common.nitems)); erts_print(to, to_arg, "Words: %bpu\n", - (Uint) ((erts_smp_atomic_read_nob(&tb->common.memory_size) + (Uint) ((erts_atomic_read_nob(&tb->common.memory_size) + sizeof(Uint) - 1) / sizeof(Uint))); @@ -3891,44 +4279,60 @@ static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb) erts_print(to, to_arg, "Read Concurrency: %T\n", table_info(NULL, tb, am_read_concurrency)); } +typedef struct { + fmtfn_t to; + void *to_arg; + int show; +} ErtsPrintDbInfo; + +static void +db_info_print(DbTable *tb, void *vpdbip) +{ + ErtsPrintDbInfo *pdbip = (ErtsPrintDbInfo *) vpdbip; + erts_print(pdbip->to, pdbip->to_arg, "=ets:%T\n", tb->common.owner); + erts_print(pdbip->to, pdbip->to_arg, "Slot: %bpu\n", (Uint) tb); + print_table(pdbip->to, pdbip->to_arg, pdbip->show, tb); +} + void db_info(fmtfn_t to, void *to_arg, int show) /* Called by break handler */ { - int i; - for (i=0; i < db_max_tabs; i++) - if (IS_SLOT_ALIVE(i)) { - erts_print(to, to_arg, "=ets:%T\n", meta_main_tab[i].u.tb->common.owner); - erts_print(to, to_arg, "Slot: %d\n", i); - print_table(to, to_arg, show, meta_main_tab[i].u.tb); - } -#ifdef DEBUG - erts_print(to, to_arg, "=internal_ets: Process to table index\n"); - print_table(to, to_arg, show, meta_pid_to_tab); - erts_print(to, to_arg, "=internal_ets: Process to fixation index\n"); - print_table(to, to_arg, show, meta_pid_to_fixed_tab); -#endif + ErtsPrintDbInfo pdbi; + + pdbi.to = to; + pdbi.to_arg = to_arg; + pdbi.show = show; + + erts_db_foreach_table(db_info_print, &pdbi); } Uint erts_get_ets_misc_mem_size(void) { - ERTS_SMP_MEMORY_BARRIER; + ERTS_THR_MEMORY_BARRIER; /* Memory not allocated in ets_alloc */ - return (Uint) erts_smp_atomic_read_nob(&erts_ets_misc_mem_size); + return (Uint) erts_atomic_read_nob(&erts_ets_misc_mem_size); } /* SMP Note: May only be used when system is locked */ void erts_db_foreach_table(void (*func)(DbTable *, void *), void *arg) { - int i, j; - j = 0; - for(i = 0; (i < db_max_tabs && j < meta_main_tab_cnt); i++) { - if (IS_SLOT_ALIVE(i)) { - j++; - (*func)(meta_main_tab[i].u.tb, arg); - } + int ix; + + ASSERT(erts_thr_progress_is_blocking()); + + for (ix = 0; ix < erts_no_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); + DbTable *first = esdp->ets_tables.clist; + if (first) { + DbTable *tb = first; + do { + if (is_table_alive(tb)) + (*func)(tb, arg); + tb = tb->common.all.next; + } while (tb != first); + } } - ASSERT(j == meta_main_tab_cnt); } /* SMP Note: May only be used when system is locked */ @@ -3947,6 +4351,18 @@ erts_db_get_max_tabs() return db_max_tabs; } +Uint erts_ets_table_count(void) +{ + Uint tb_count = 0; + Uint six; + + for (six = 0; six < erts_no_schedulers; six++) { + ErtsSchedulerData *esdp = &erts_aligned_scheduler_data[six].esd; + tb_count += erts_atomic_read_nob(&esdp->ets_tables.count); + } + return tb_count; +} + /* * For testing of meta tables only. * @@ -3967,7 +4383,7 @@ erts_ets_colliding_names(Process* p, Eterm name, Uint cnt) while (index >= atom_table_size()) { char tmp[20]; erts_snprintf(tmp, sizeof(tmp), "am%x", atom_table_size()); - erts_atom_put((byte *) tmp, strlen(tmp), ERTS_ATOM_ENC_LATIN1, 1); + erts_atom_put((byte *) tmp, sys_strlen(tmp), ERTS_ATOM_ENC_LATIN1, 1); } list = CONS(hp, make_atom(index), list); hp += 2; @@ -3978,23 +4394,50 @@ erts_ets_colliding_names(Process* p, Eterm name, Uint cnt) return list; } +#ifdef ERTS_ENABLE_LOCK_COUNT -#ifdef HARDDEBUG /* Here comes some debug functions */ +void erts_lcnt_enable_db_lock_count(DbTable *tb, int enable) { + if(enable) { + erts_lcnt_install_new_lock_info(&tb->common.rwlock.lcnt, "db_tab", + tb->common.the_name, ERTS_LOCK_TYPE_RWMUTEX | ERTS_LOCK_FLAGS_CATEGORY_DB); + erts_lcnt_install_new_lock_info(&tb->common.fixlock.lcnt, "db_tab_fix", + tb->common.the_name, ERTS_LOCK_TYPE_MUTEX | ERTS_LOCK_FLAGS_CATEGORY_DB); + } else { + erts_lcnt_uninstall(&tb->common.rwlock.lcnt); + erts_lcnt_uninstall(&tb->common.fixlock.lcnt); + } -void db_check_tables(void) -{ -#ifdef ERTS_SMP - return; -#else - int i; + if(IS_HASH_TABLE(tb->common.status)) { + erts_lcnt_enable_db_hash_lock_count(&tb->hash, enable); + } +} - for (i = 0; i < db_max_tabs; i++) { - if (IS_SLOT_ALIVE(i)) { - DbTable* tb = meta_main_tab[i].t; - tb->common.meth->db_check_table(tb); - } +static void lcnt_update_db_locks_per_sched(void *enable) { + ErtsSchedulerData *esdp; + DbTable *head; + + esdp = erts_get_scheduler_data(); + head = esdp->ets_tables.clist; + + if(head) { + DbTable *iterator = head; + + do { + if(is_table_alive(iterator)) { + erts_lcnt_enable_db_lock_count(iterator, !!enable); + } + + iterator = iterator->common.all.next; + } while (iterator != head); } -#endif } -#endif /* HARDDEBUG */ +void erts_lcnt_update_db_locks(int enable) { + erts_schedule_multi_misc_aux_work(0, erts_no_schedulers, + &lcnt_update_db_locks_per_sched, (void*)(UWord)enable); +} +#endif /* ERTS_ENABLE_LOCK_COUNT */ + +#ifdef ETS_DBG_FORCE_TRAP +erts_aint_t erts_ets_dbg_force_trap = 0; +#endif |