aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_db_hash.c
diff options
context:
space:
mode:
authorKjell Winblad <[email protected]>2019-02-22 16:49:37 +0100
committerKjell Winblad <[email protected]>2019-04-10 15:42:42 +0200
commitc5e9766712436bea2b91bccd062f66a3ad1841bb (patch)
tree25ce2b16b9f847088997b0d4e77af46dbb6d2d66 /erts/emulator/beam/erl_db_hash.c
parent326c3cb70c1b37c794b781a42c50725766098810 (diff)
downloadotp-c5e9766712436bea2b91bccd062f66a3ad1841bb.tar.gz
otp-c5e9766712436bea2b91bccd062f66a3ad1841bb.tar.bz2
otp-c5e9766712436bea2b91bccd062f66a3ad1841bb.zip
Decentralized counters for ETS ordered_set with write_concurrency
Previously, all ETS tables used centralized counter variables to keep track of the number of items stored and the amount of memory consumed. These counters can cause scalability problems (especially on big NUMA systems). This commit adds an implementation of a decentralized counter and modifies the implementation of ETS so that ETS tables of type ordered_set with write_concurrency enabled use the decentralized counter. [Experiments][1] indicate that this change substantially improves the scalability of ETS ordered_set tables with write_concurrency enabled in scenarios with frequent `ets:insert/2` and `ets:delete/2` calls. The new counter is implemented in the module erts_flxctr (`erts_flxctr.h` and `erts_flxctr.c`). The module has the suffix flxctr as it contains the implementation of a flexible counter (i.e., counter instances can be configured to be either centralized or decentralized). Counters that are configured to be centralized are implemented with a single counter variable which is modified with atomic operations. Decentralized counters are spread over several cache lines (how many can be configured with the parameter `+dcg`). The scheduler threads are mapped to cache lines so that there is no single point of contention when decentralized counters are updated. The thread progress functionality of the Erlang VM is utilized to implement support for linearizable snapshots of decentralized counters. The snapshot functionality is used by the `ets:info/1` and `ets:info/2` functions. [1]: http://winsh.me/ets_catree_benchmark/flxctr_res.html
Diffstat (limited to 'erts/emulator/beam/erl_db_hash.c')
-rw-r--r--erts/emulator/beam/erl_db_hash.c63
1 files changed, 47 insertions, 16 deletions
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index f225730029..ceaccf7e44 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -85,6 +85,14 @@
#include "erl_db_hash.h"
+#define ADD_NITEMS(DB, TO_ADD) \
+ erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD)
+#define INC_NITEMS(DB) \
+ erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define DEC_NITEMS(DB) \
+ erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define RESET_NITEMS(DB) \
+ erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
/*
* The following symbols can be manipulated to "tune" the linear hash array
*/
@@ -121,7 +129,9 @@
: ((struct segment**) erts_atomic_read_nob(&(tb)->segtab)))
#endif
#define NACTIVE(tb) ((int)erts_atomic_read_nob(&(tb)->nactive))
-#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems))
+#define NITEMS(tb) \
+ ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID))
#define SLOT_IX_TO_SEG_IX(i) (((i)+(EXT_SEGSZ-FIRST_SEGSZ)) >> EXT_SEGSZ_EXP)
@@ -444,7 +454,12 @@ static void db_foreach_offheap_hash(DbTable *,
void (*)(ErlOffHeap *, void *),
void *);
-static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds);
+static SWord db_delete_all_objects_hash(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb);
+static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p,
+ Eterm nitems_holder);
#ifdef HARDDEBUG
static void db_check_table_hash(DbTableHash *tb);
#endif
@@ -548,6 +563,7 @@ DbTableMethod db_hash =
db_select_replace_continue_hash,
db_take_hash,
db_delete_all_objects_hash,
+ db_delete_all_objects_get_nitems_from_holder_hash,
db_free_empty_table_hash,
db_free_table_continue_hash,
db_print_hash,
@@ -806,7 +822,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
if (tb->common.status & DB_SET) {
HashDbTerm* bnext = b->next;
if (is_pseudo_deleted(b)) {
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
b->pseudo_deleted = 0;
}
else if (key_clash_fail) {
@@ -835,7 +851,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
do {
if (db_eq(&tb->common,obj,&q->dbterm)) {
if (is_pseudo_deleted(q)) {
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
q->pseudo_deleted = 0;
ASSERT(q->hvalue == hval);
if (q != b) { /* must move to preserve key insertion order */
@@ -858,7 +874,7 @@ Lnew:
q->pseudo_deleted = 0;
q->next = b;
*bp = q;
- nitems = erts_atomic_inc_read_nob(&tb->common.nitems);
+ nitems = INC_NITEMS(tb);
WUNLOCK_HASH(lck);
{
int nactive = NACTIVE(tb);
@@ -1056,7 +1072,7 @@ int db_erase_hash(DbTable *tbl, Eterm key, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -1117,7 +1133,7 @@ static int db_erase_object_hash(DbTable *tbl, Eterm object, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -2023,7 +2039,7 @@ static int select_delete_on_match_res(traverse_context_t* ctx_base, Sint slot_ix
del->next = ctx->free_us;
ctx->free_us = del;
}
- erts_atomic_dec_nob(&ctx->base.tb->common.nitems);
+ DEC_NITEMS(ctx->base.tb);
return 1;
}
@@ -2300,7 +2316,7 @@ static int db_take_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -2360,7 +2376,7 @@ static SWord db_mark_all_deleted_hash(DbTable *tbl, SWord reds)
fixdel->slot = NACTIVE(tb) - 1;
fixdel->all = 1;
fixdel->trap = 0;
- erts_atomic_set_nob(&tb->common.nitems, 0);
+ RESET_NITEMS(tb);
return loops < 0 ? 0 : loops / LOOPS_PER_REDUCTION;
}
@@ -2468,7 +2484,8 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds)
(void*)tb->locks, sizeof(DbTableHashFineLocks));
tb->locks = NULL;
}
- ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable));
+ ASSERT(sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters,
+ ERTS_DB_TABLE_MEM_COUNTER_ID));
return reds; /* Done */
}
@@ -3080,7 +3097,7 @@ db_lookup_dbterm_hash(Process *p, DbTable *tbl, Eterm key, Eterm obj,
ASSERT(q->hvalue == hval);
q->pseudo_deleted = 0;
*bp = b = q;
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
}
HRelease(p, hend, htop);
@@ -3123,7 +3140,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
}
WUNLOCK_HASH(lck);
- erts_atomic_dec_nob(&tb->common.nitems);
+ DEC_NITEMS(tb);
try_shrink(tb);
} else {
if (handle->flags & DB_MUST_RESIZE) {
@@ -3132,7 +3149,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
}
if (handle->flags & DB_INC_TRY_GROW) {
int nactive;
- int nitems = erts_atomic_inc_read_nob(&tb->common.nitems);
+ int nitems = INC_NITEMS(tb);
WUNLOCK_HASH(lck);
nactive = NACTIVE(tb);
@@ -3153,8 +3170,17 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
return;
}
-static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds)
+static SWord db_delete_all_objects_hash(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb)
{
+ if (nitems_holder_wb != NULL) {
+ Uint nr_of_items =
+ erts_flxctr_read_centralized(&tbl->common.counters,
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID);
+ *nitems_holder_wb = erts_make_integer(nr_of_items, p);
+ }
if (IS_FIXED(tbl)) {
reds = db_mark_all_deleted_hash(tbl, reds);
} else {
@@ -3163,11 +3189,16 @@ static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds)
return reds;
db_create_hash(p, tbl);
- erts_atomic_set_nob(&tbl->hash.common.nitems, 0);
+ RESET_NITEMS(tbl);
}
return reds;
}
+static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p,
+ Eterm nitems_holder){
+ return nitems_holder;
+}
+
void db_foreach_offheap_hash(DbTable *tbl,
void (*func)(ErlOffHeap *, void *),
void * arg)