aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
Diffstat (limited to 'erts')
-rw-r--r--erts/doc/src/erl.xml23
-rw-r--r--erts/emulator/Makefile.in2
-rw-r--r--erts/emulator/beam/erl_bif_info.c2
-rw-r--r--erts/emulator/beam/erl_cpu_topology.c41
-rw-r--r--erts/emulator/beam/erl_cpu_topology.h22
-rw-r--r--erts/emulator/beam/erl_db.c149
-rw-r--r--erts/emulator/beam/erl_db.h12
-rw-r--r--erts/emulator/beam/erl_db_catree.c60
-rw-r--r--erts/emulator/beam/erl_db_catree.h5
-rw-r--r--erts/emulator/beam/erl_db_hash.c63
-rw-r--r--erts/emulator/beam/erl_db_tree.c97
-rw-r--r--erts/emulator/beam/erl_db_tree_util.h18
-rw-r--r--erts/emulator/beam/erl_db_util.h19
-rw-r--r--erts/emulator/beam/erl_flxctr.c370
-rw-r--r--erts/emulator/beam/erl_flxctr.h406
-rw-r--r--erts/emulator/beam/erl_init.c30
-rw-r--r--erts/emulator/beam/erl_process.h2
17 files changed, 1219 insertions, 102 deletions
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index 88ddb03e97..471d7caa5a 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -636,6 +636,29 @@
produces a crash dump. On Unix systems, sending an emulator process
a <c>SIGUSR1</c> signal also forces a crash dump.</p>
</item>
+ <tag><marker id="+dcg"/><c><![CDATA[+rg DecentralizedCounterGroupsLimit]]></c></tag>
+ <item>
+ <p>Limits the number of decentralized counter groups used by
+ decentralized counters optimized for update operations in the
+ Erlang runtime system. By default, the limit is 256.</p>
+ <p>When the number of schedulers is less than or equal to the
+ limit, each scheduler has its own group. When the
+ number of schedulers is larger than the groups limit,
+ schedulers share groups. Shared groups degrade
+ the performance for updating counters while many reader groups
+ degrade the performance for reading counters. So, the limit is a tradeoff
+ between performance for update operations and performance for
+ read operations. Each group consumes 64 bytes in each
+ counter.</p>
+ <p>Notice that a runtime system using decentralized
+ counter groups benefits from <seealso marker="#+sbt">binding
+ schedulers to logical processors</seealso>, as the groups are
+ distributed better between schedulers with this option.</p>
+ <p>This option only affects decentralized counters used for
+ the counters that are keeping track of the memory consumption
+ and the number of terms in ETS tables of type ordered_set with
+ the write_concurrency option activated.</p>
+ </item>
<tag><marker id="+e"/><c><![CDATA[+e Number]]></c></tag>
<item>
<p>Sets the maximum number of ETS tables. This limit is
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 21351df656..448f41b523 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -894,7 +894,7 @@ RUN_OBJS += \
$(OBJDIR)/erl_ptab.o $(OBJDIR)/erl_map.o \
$(OBJDIR)/erl_msacc.o $(OBJDIR)/erl_lock_flags.o \
$(OBJDIR)/erl_io_queue.o $(OBJDIR)/erl_db_catree.o \
- $(ESOCK_RUN_OBJS)
+ $(ESOCK_RUN_OBJS) $(OBJDIR)/erl_flxctr.o
LTTNG_OBJS = $(OBJDIR)/erlang_lttng.o
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 39d42d9757..a2f6284c9d 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3028,6 +3028,8 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_RET(erts_nif_taints(BIF_P));
} else if (ERTS_IS_ATOM_STR("reader_groups_map", BIF_ARG_1)) {
BIF_RET(erts_get_reader_groups_map(BIF_P));
+ } else if (ERTS_IS_ATOM_STR("decentralized_counter_groups_map", BIF_ARG_1)) {
+ BIF_RET(erts_get_decentralized_counter_groups_map(BIF_P));
} else if (ERTS_IS_ATOM_STR("dist_buf_busy_limit", BIF_ARG_1)) {
Uint hsz = 0;
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
index 6f8d2f8c35..6a4f43297e 100644
--- a/erts/emulator/beam/erl_cpu_topology.c
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -34,6 +34,7 @@
#include "error.h"
#include "bif.h"
#include "erl_cpu_topology.h"
+#include "erl_flxctr.h"
#define ERTS_MAX_READER_GROUPS 64
@@ -58,6 +59,7 @@ static erts_cpu_info_t *cpuinfo;
static int max_main_threads;
static int reader_groups;
+static int decentralized_counter_groups;
static ErtsCpuBindData *scheduler2cpu_map;
static erts_rwmtx_t cpuinfo_rwmtx;
@@ -127,6 +129,8 @@ static erts_cpu_groups_map_t *cpu_groups_maps;
static erts_cpu_groups_map_t *reader_groups_map;
+static erts_cpu_groups_map_t *decentralized_counter_groups_map;
+
#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
@@ -138,6 +142,7 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *);
static erts_cpu_groups_map_t *add_cpu_groups(int groups,
erts_cpu_groups_callback_t callback,
void *arg);
@@ -1646,7 +1651,8 @@ erts_get_logical_processors(int *conf, int *onln, int *avail)
}
void
-erts_pre_early_init_cpu_topology(int *max_rg_p,
+erts_pre_early_init_cpu_topology(int *max_dcg_p,
+ int *max_rg_p,
int *conf_p,
int *onln_p,
int *avail_p)
@@ -1654,6 +1660,7 @@ erts_pre_early_init_cpu_topology(int *max_rg_p,
cpu_groups_maps = NULL;
no_cpu_groups_callbacks = 0;
*max_rg_p = ERTS_MAX_READER_GROUPS;
+ *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS;
cpuinfo = erts_cpu_info_create();
get_logical_processors(conf_p, onln_p, avail_p);
}
@@ -1662,7 +1669,9 @@ void
erts_early_init_cpu_topology(int no_schedulers,
int *max_main_threads_p,
int max_reader_groups,
- int *reader_groups_p)
+ int *reader_groups_p,
+ int max_decentralized_counter_groups,
+ int *decentralized_counter_groups_p)
{
user_cpudata = NULL;
user_cpudata_size = 0;
@@ -1687,6 +1696,12 @@ erts_early_init_cpu_topology(int no_schedulers,
max_main_threads = no_schedulers;
*max_main_threads_p = max_main_threads;
+ decentralized_counter_groups = max_main_threads;
+ if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1)
+ decentralized_counter_groups = 1;
+ if (decentralized_counter_groups > max_decentralized_counter_groups)
+ decentralized_counter_groups = max_decentralized_counter_groups;
+ *decentralized_counter_groups_p = decentralized_counter_groups;
reader_groups = max_main_threads;
if (reader_groups <= 1 || max_reader_groups <= 1)
reader_groups = 0;
@@ -1718,6 +1733,9 @@ erts_init_cpu_topology(void)
reader_groups_map = add_cpu_groups(reader_groups,
reader_groups_callback,
NULL);
+ decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups,
+ flxctr_groups_callback,
+ NULL);
if (cpu_bind_order == ERTS_CPU_BIND_NONE)
erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
@@ -1789,6 +1807,15 @@ reader_groups_callback(int suspending,
erts_rwmtx_set_reader_group(suspending ? 0 : group+1);
}
+void
+flxctr_groups_callback(int suspending,
+ ErtsSchedulerData *esdp,
+ int group,
+ void *unused)
+{
+ erts_flxctr_set_slot(suspending ? 0 : group+1);
+}
+
static Eterm get_cpu_groups_map(Process *c_p,
erts_cpu_groups_map_t *map,
int offset);
@@ -1821,6 +1848,16 @@ erts_get_reader_groups_map(Process *c_p)
return res;
}
+Eterm
+erts_get_decentralized_counter_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1);
+ erts_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
/*
* CPU groups
*/
diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h
index 88bcad79ab..4a428d7972 100644
--- a/erts/emulator/beam/erl_cpu_topology.h
+++ b/erts/emulator/beam/erl_cpu_topology.h
@@ -27,14 +27,19 @@
#ifndef ERL_CPU_TOPOLOGY_H__
#define ERL_CPU_TOPOLOGY_H__
-void erts_pre_early_init_cpu_topology(int *max_rg_p,
- int *conf_p,
- int *onln_p,
- int *avail_p);
-void erts_early_init_cpu_topology(int no_schedulers,
- int *max_main_threads_p,
- int max_reader_groups,
- int *reader_groups_p);
+void
+erts_pre_early_init_cpu_topology(int *max_dcg_p,
+ int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p);
+void
+erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p,
+ int max_decentralized_counter_groups,
+ int *decentralized_counter_groups_p);
void erts_init_cpu_topology(void);
@@ -70,6 +75,7 @@ Eterm erts_bind_schedulers(Process *c_p, Eterm how);
Eterm erts_get_schedulers_binds(Process *c_p);
Eterm erts_get_reader_groups_map(Process *c_p);
+Eterm erts_get_decentralized_counter_groups_map(Process *c_p);
Eterm erts_set_cpu_topology(Process *c_p, Eterm term);
Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c
index 0a50af4d1a..c0f5c506f4 100644
--- a/erts/emulator/beam/erl_db.c
+++ b/erts/emulator/beam/erl_db.c
@@ -42,6 +42,7 @@
#include "bif.h"
#include "big.h"
#include "erl_binary.h"
+#include "bif.h"
erts_atomic_t erts_ets_misc_mem_size;
@@ -64,6 +65,11 @@ do { \
} \
}while(0)
+#define DB_GET_APPROX_NITEMS(DB) \
+ erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define DB_GET_APPROX_MEM_CONSUMED(DB) \
+ erts_flxctr_read_approx(&(DB)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID)
+
static BIF_RETTYPE db_bif_fail(Process* p, Uint freason,
Uint bif_ix, Export* bif_exp)
{
@@ -398,8 +404,9 @@ static void
free_dbtable(void *vtb)
{
DbTable *tb = (DbTable *) vtb;
-
- ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable));
+ ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) ||
+ sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters,
+ ERTS_DB_TABLE_MEM_COUNTER_ID));
erts_rwmtx_destroy(&tb->common.rwlock);
erts_mtx_destroy(&tb->common.fixlock);
@@ -408,7 +415,8 @@ free_dbtable(void *vtb)
if (tb->common.btid)
erts_bin_release(tb->common.btid);
- erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable));
+ erts_flxctr_destroy(&tb->common.counters, ERTS_ALC_T_DB_TABLE);
+ erts_free(ERTS_ALC_T_DB_TABLE, tb);
}
static void schedule_free_dbtable(DbTable* tb)
@@ -1731,12 +1739,16 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
*/
{
DbTable init_tb;
-
- erts_atomic_init_nob(&init_tb.common.memory_size, 0);
+ erts_flxctr_init(&init_tb.common.counters, 0, 2, ERTS_ALC_T_DB_TABLE);
tb = (DbTable*) erts_db_alloc(ERTS_ALC_T_DB_TABLE,
&init_tb, sizeof(DbTable));
- erts_atomic_init_nob(&tb->common.memory_size,
- erts_atomic_read_nob(&init_tb.common.memory_size));
+ erts_flxctr_init(&tb->common.counters,
+ status & DB_CA_ORDERED_SET,
+ 2,
+ ERTS_ALC_T_DB_TABLE);
+ erts_flxctr_add(&tb->common.counters,
+ ERTS_DB_TABLE_MEM_COUNTER_ID,
+ DB_GET_APPROX_MEM_CONSUMED(&init_tb));
}
tb->common.meth = meth;
@@ -1750,8 +1762,6 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
tb->common.owner = BIF_P->common.id;
set_heir(BIF_P, tb, heir, heir_data);
- erts_atomic_init_nob(&tb->common.nitems, 0);
-
tb->common.fixing_procs = NULL;
tb->common.compress = is_compressed;
#ifdef ETS_DBG_FORCE_TRAP
@@ -2128,19 +2138,18 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2)
{
SWord initial_reds = ERTS_BIF_REDS_LEFT(BIF_P);
SWord reds = initial_reds;
- Eterm nitems;
+ Eterm nitems_holder = THE_NON_VALUE;
DbTable* tb;
-
CHECK_TABLES();
DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE, BIF_ets_internal_delete_all_2);
if (BIF_ARG_2 == am_undefined) {
- nitems = erts_make_integer(erts_atomic_read_nob(&tb->common.nitems),
- BIF_P);
-
- reds = tb->common.meth->db_delete_all_objects(BIF_P, tb, reds);
-
+ reds = tb->common.meth->db_delete_all_objects(BIF_P,
+ tb,
+ reds,
+ &nitems_holder);
+ ASSERT(nitems_holder != THE_NON_VALUE);
ASSERT(!(tb->common.status & DB_BUSY));
if (reds < 0) {
@@ -2159,7 +2168,7 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2)
db_unlock(tb, LCK_WRITE);
BUMP_ALL_REDS(BIF_P);
BIF_TRAP2(bif_export[BIF_ets_internal_delete_all_2], BIF_P,
- BIF_ARG_1, nitems);
+ BIF_ARG_1, nitems_holder);
}
else {
/* Done, no trapping needed */
@@ -2169,15 +2178,19 @@ BIF_RETTYPE ets_internal_delete_all_2(BIF_ALIST_2)
}
else {
/*
- * The table lookup succeeded and second argument is nitems
+ * The table lookup succeeded and second argument is nitems_holder
* and not 'undefined', which means we have trapped at least once
* and are now done.
*/
- nitems = BIF_ARG_2;
+ nitems_holder = BIF_ARG_2;
}
-
db_unlock(tb, LCK_WRITE);
+ {
+ Eterm nitems =
+ tb->common.meth->db_delete_all_objects_get_nitems_from_holder(BIF_P,
+ nitems_holder);
BIF_RET(nitems);
+ }
}
static void delete_all_objects_continue(Process* p, DbTable* tb)
@@ -2190,7 +2203,7 @@ static void delete_all_objects_continue(Process* p, DbTable* tb)
if ((tb->common.status & (DB_DELETE|DB_BUSY)) != DB_BUSY)
return;
- reds = tb->common.meth->db_delete_all_objects(p, tb, reds);
+ reds = tb->common.meth->db_delete_all_objects(p, tb, reds, NULL);
if (reds < 0) {
BUMP_ALL_REDS(p);
@@ -3277,13 +3290,29 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1)
int i;
Eterm* hp;
Uint freason;
+ Sint size = -1;
+ Sint memory = -1;
+ Eterm table;
+ int is_ctrs_read_result_set = 0;
/*Process* rp = NULL;*/
/* If/when we implement lockless private tables:
Eterm owner;
*/
-
- if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) {
- if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1)))
+ if(is_tuple(BIF_ARG_1) &&
+ is_tuple_arity(BIF_ARG_1, 2) &&
+ erts_flxctr_is_snapshot_result(tuple_val(BIF_ARG_1)[1])) {
+ Eterm counter_read_result = tuple_val(BIF_ARG_1)[1];
+ table = tuple_val(BIF_ARG_1)[2];
+ size = erts_flxctr_get_snapshot_result_after_trap(counter_read_result,
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID);
+ memory = erts_flxctr_get_snapshot_result_after_trap(counter_read_result,
+ ERTS_DB_TABLE_MEM_COUNTER_ID);
+ is_ctrs_read_result_set = 1;
+ } else {
+ table = BIF_ARG_1;
+ }
+ if ((tb = db_get_table(BIF_P, table, DB_INFO, LCK_READ, &freason)) == NULL) {
+ if (freason == BADARG && (is_atom(table) || is_ref(table)))
BIF_RET(am_undefined);
else
return db_bif_fail(BIF_P, freason, BIF_ets_info_1, NULL);
@@ -3314,9 +3343,35 @@ BIF_RETTYPE ets_info_1(BIF_ALIST_1)
BIF_ERROR(BIF_P, BADARG);
}
}*/
+
+ if (!is_ctrs_read_result_set) {
+ ErtsFlxCtrSnapshotResult res =
+ erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P);
+ if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) {
+ Eterm tuple;
+ db_unlock(tb, LCK_READ);
+ hp = HAlloc(BIF_P, 3);
+ tuple = TUPLE2(hp, res.trap_resume_state, table);
+ BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, tuple);
+ } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) {
+ db_unlock(tb, LCK_READ);
+ BIF_TRAP1(bif_export[BIF_ets_info_1], BIF_P, table);
+ } else {
+ size = res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID];
+ memory = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID];
+ is_ctrs_read_result_set = 1;
+ }
+ }
for (i = 0; i < sizeof(fields)/sizeof(Eterm); i++) {
- results[i] = table_info(BIF_P, tb, fields[i]);
- ASSERT(is_value(results[i]));
+ if (is_ctrs_read_result_set && am_size == fields[i]) {
+ results[i] = erts_make_integer(size, BIF_P);
+ } else if (is_ctrs_read_result_set && am_memory == fields[i]) {
+ Sint words = (Sint) ((memory + sizeof(Sint) - 1) / sizeof(Sint));
+ results[i] = erts_make_integer(words, BIF_P);
+ } else {
+ results[i] = table_info(BIF_P, tb, fields[i]);
+ ASSERT(is_value(results[i]));
+ }
}
db_unlock(tb, LCK_READ);
@@ -3344,14 +3399,43 @@ BIF_RETTYPE ets_info_2(BIF_ALIST_2)
DbTable* tb;
Eterm ret = THE_NON_VALUE;
Uint freason;
-
+ if (erts_flxctr_is_snapshot_result(BIF_ARG_1)) {
+ Sint res;
+ if (am_memory == BIF_ARG_2) {
+ res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1,
+ ERTS_DB_TABLE_MEM_COUNTER_ID);
+ res = (Sint) ((res + sizeof(Sint) - 1) / sizeof(Sint));
+ } else {
+ res = erts_flxctr_get_snapshot_result_after_trap(BIF_ARG_1,
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID);
+ }
+ BIF_RET(erts_make_integer(res, BIF_P));
+ }
if ((tb = db_get_table(BIF_P, BIF_ARG_1, DB_INFO, LCK_READ, &freason)) == NULL) {
if (freason == BADARG && (is_atom(BIF_ARG_1) || is_ref(BIF_ARG_1)))
BIF_RET(am_undefined);
else
return db_bif_fail(BIF_P, freason, BIF_ets_info_2, NULL);
}
- ret = table_info(BIF_P, tb, BIF_ARG_2);
+ if (BIF_ARG_2 == am_size || BIF_ARG_2 == am_memory) {
+ ErtsFlxCtrSnapshotResult res =
+ erts_flxctr_snapshot(&tb->common.counters, ERTS_ALC_T_DB_TABLE, BIF_P);
+ if (ERTS_FLXCTR_GET_RESULT_AFTER_TRAP == res.type) {
+ db_unlock(tb, LCK_READ);
+ BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, res.trap_resume_state, BIF_ARG_2);
+ } else if (res.type == ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP) {
+ db_unlock(tb, LCK_READ);
+ BIF_TRAP2(bif_export[BIF_ets_info_2], BIF_P, BIF_ARG_1, BIF_ARG_2);
+ } else if (BIF_ARG_2 == am_size) {
+ ret = erts_make_integer(res.result[ERTS_DB_TABLE_NITEMS_COUNTER_ID], BIF_P);
+ } else { /* BIF_ARG_2 == am_memory */
+ Sint r = res.result[ERTS_DB_TABLE_MEM_COUNTER_ID];
+ r = (Sint) ((r + sizeof(Sint) - 1) / sizeof(Sint));
+ ret = erts_make_integer(r, BIF_P);
+ }
+ } else {
+ ret = table_info(BIF_P, tb, BIF_ARG_2);
+ }
db_unlock(tb, LCK_READ);
if (is_non_value(ret)) {
BIF_ERROR(BIF_P, BADARG);
@@ -4121,7 +4205,8 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What)
int use_monotonic;
if (What == am_size) {
- ret = make_small(erts_atomic_read_nob(&tb->common.nitems));
+ Uint size = (Uint) (DB_GET_APPROX_NITEMS(tb));
+ ret = erts_make_integer(size, p);
} else if (What == am_type) {
if (tb->common.status & DB_SET) {
ret = am_set;
@@ -4136,7 +4221,7 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What)
ret = am_bag;
}
} else if (What == am_memory) {
- Uint words = (Uint) ((erts_atomic_read_nob(&tb->common.memory_size)
+ Uint words = (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb)
+ sizeof(Uint)
- 1)
/ sizeof(Uint));
@@ -4294,9 +4379,9 @@ static void print_table(fmtfn_t to, void *to_arg, int show, DbTable* tb)
tb->common.meth->db_print(to, to_arg, show, tb);
- erts_print(to, to_arg, "Objects: %d\n", (int)erts_atomic_read_nob(&tb->common.nitems));
+ erts_print(to, to_arg, "Objects: %d\n", (int)DB_GET_APPROX_NITEMS(tb));
erts_print(to, to_arg, "Words: %bpu\n",
- (Uint) ((erts_atomic_read_nob(&tb->common.memory_size)
+ (Uint) ((DB_GET_APPROX_MEM_CONSUMED(tb)
+ sizeof(Uint)
- 1)
/ sizeof(Uint)));
diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h
index dc77fbb60c..b22f35a5ef 100644
--- a/erts/emulator/beam/erl_db.h
+++ b/erts/emulator/beam/erl_db.h
@@ -160,7 +160,9 @@ do { \
erts_aint_t sz__ = (((erts_aint_t) (ALLOC_SZ)) \
- ((erts_aint_t) (FREE_SZ))); \
ASSERT((TAB)); \
- erts_atomic_add_nob(&(TAB)->common.memory_size, sz__); \
+ erts_flxctr_add(&(TAB)->common.counters, \
+ ERTS_DB_TABLE_MEM_COUNTER_ID, \
+ sz__); \
} while (0)
#define ERTS_ETS_MISC_MEM_ADD(SZ) \
@@ -305,10 +307,10 @@ erts_db_free(ErtsAlcType_t type, DbTable *tab, void *ptr, Uint size)
ASSERT(ptr != 0);
ASSERT(size == ERTS_ALC_DBG_BLK_SZ(ptr));
ERTS_DB_ALC_MEM_UPDATE_(tab, size, 0);
-
- ASSERT(((void *) tab) != ptr
- || erts_atomic_read_nob(&tab->common.memory_size) == 0);
-
+ ASSERT(((void *) tab) != ptr ||
+ tab->common.counters.is_decentralized ||
+ 0 == erts_flxctr_read_centralized(&tab->common.counters,
+ ERTS_DB_TABLE_MEM_COUNTER_ID));
erts_free(type, ptr);
}
diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c
index 0402c6b7b4..962fe4c4f8 100644
--- a/erts/emulator/beam/erl_db_catree.c
+++ b/erts/emulator/beam/erl_db_catree.c
@@ -149,7 +149,12 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord);
static void db_foreach_offheap_catree(DbTable *,
void (*)(ErlOffHeap *, void *),
void *);
-static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds);
+static SWord db_delete_all_objects_catree(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb);
+static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p,
+ Eterm nitems_holder);
static int
db_lookup_dbterm_catree(Process *, DbTable *, Eterm key, Eterm obj,
DbUpdateHandle*);
@@ -191,6 +196,7 @@ DbTableMethod db_catree =
db_select_replace_continue_catree,
db_take_catree,
db_delete_all_objects_catree,
+ db_delete_all_objects_get_nitems_from_holder_catree,
db_free_table_catree,
db_free_table_continue_catree,
db_print_catree,
@@ -1357,6 +1363,8 @@ static SWord do_free_base_node_cont(DbTableCATree *tb, SWord num_left)
PUSH_NODE(&tb->free_stack_elems, root);
root = p;
} else {
+ DEC_NITEMS((DbTable*)tb);
+ tb->nr_of_deleted_items++;
free_term((DbTable*)tb, root);
if (--num_left >= 0) {
break;
@@ -1397,6 +1405,7 @@ int db_create_catree(Process *p, DbTable *tbl)
root = create_base_node(tb, NULL);
tb->deletion = 0;
tb->base_nodes_to_free_list = NULL;
+ tb->nr_of_deleted_items = 0;
erts_atomic_init_relb(&(tb->root), (erts_aint_t)root);
return DB_ERROR_NONE;
}
@@ -2050,6 +2059,7 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds)
PUSH_NODE(&tb->free_stack_rnodes, GET_ROOT(tb));
tb->is_routing_nodes_freed = 0;
tb->base_nodes_to_free_list = NULL;
+ tb->nr_of_deleted_items = 0;
}
if ( ! tb->is_routing_nodes_freed ) {
reds = do_free_routing_nodes_catree_cont(tb, reds);
@@ -2079,13 +2089,57 @@ static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds)
return 1;
}
-static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds)
+static
+int db_catree_nr_of_items_deleted_wb_dtor(Binary *context_bin) {
+ (void)context_bin;
+ return 1;
+}
+
+typedef struct {
+ Uint nr_of_deleted_items;
+} DbCATreeNrOfItemsDeletedWb;
+
+static Eterm
+create_and_install_num_of_deleted_items_wb_bin(Process *p, DbTableCATree *tb)
+{
+ Binary* bin =
+ erts_create_magic_binary(sizeof(DbCATreeNrOfItemsDeletedWb),
+ db_catree_nr_of_items_deleted_wb_dtor);
+ DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin);
+ Eterm* hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+ Eterm mref = erts_mk_magic_ref(&hp, &MSO(p), bin);
+ data->nr_of_deleted_items = 0;
+ tb->nr_of_deleted_items_wb = bin;
+ erts_refc_inctest(&bin->intern.refc, 2);
+ return mref;
+}
+
+static Eterm db_delete_all_objects_get_nitems_from_holder_catree(Process* p,
+ Eterm mref)
{
+ Binary* bin = erts_magic_ref2bin(mref);
+ DbCATreeNrOfItemsDeletedWb* data = ERTS_MAGIC_BIN_DATA(bin);
+ return erts_make_integer(data->nr_of_deleted_items, p);
+}
+
+static SWord db_delete_all_objects_catree(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb)
+{
+ DbTableCATree *tb = &tbl->catree;
+ DbCATreeNrOfItemsDeletedWb* data;
+ if (!tb->deletion) {
+ *nitems_holder_wb =
+ create_and_install_num_of_deleted_items_wb_bin(p, tb);
+ }
reds = db_free_table_continue_catree(tbl, reds);
if (reds < 0)
return reds;
+ data = ERTS_MAGIC_BIN_DATA(tb->nr_of_deleted_items_wb);
+ data->nr_of_deleted_items = tb->nr_of_deleted_items;
+ erts_bin_release(tb->nr_of_deleted_items_wb);
db_create_catree(p, tbl);
- erts_atomic_set_nob(&tbl->catree.common.nitems, 0);
return reds;
}
diff --git a/erts/emulator/beam/erl_db_catree.h b/erts/emulator/beam/erl_db_catree.h
index 418837be8e..fde442eaf5 100644
--- a/erts/emulator/beam/erl_db_catree.h
+++ b/erts/emulator/beam/erl_db_catree.h
@@ -87,6 +87,10 @@ typedef struct db_table_catree {
CATreeNodeStack free_stack_rnodes;
DbTableCATreeNode *base_nodes_to_free_list;
int is_routing_nodes_freed;
+ /* The fields below are used by delete_all_objects and
+ select_delete(DeleteAll)*/
+ Uint nr_of_deleted_items;
+ Binary* nr_of_deleted_items_wb;
} DbTableCATree;
typedef struct {
@@ -104,7 +108,6 @@ void db_initialize_catree(void);
int db_create_catree(Process *p, DbTable *tbl);
-
TreeDbTerm** catree_find_root(Eterm key, CATreeRootIterator*);
TreeDbTerm** catree_find_next_from_pb_key_root(Eterm key, CATreeRootIterator*);
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index f225730029..ceaccf7e44 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -85,6 +85,14 @@
#include "erl_db_hash.h"
+#define ADD_NITEMS(DB, TO_ADD) \
+ erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD)
+#define INC_NITEMS(DB) \
+ erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define DEC_NITEMS(DB) \
+ erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define RESET_NITEMS(DB) \
+ erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
/*
* The following symbols can be manipulated to "tune" the linear hash array
*/
@@ -121,7 +129,9 @@
: ((struct segment**) erts_atomic_read_nob(&(tb)->segtab)))
#endif
#define NACTIVE(tb) ((int)erts_atomic_read_nob(&(tb)->nactive))
-#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems))
+#define NITEMS(tb) \
+ ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID))
#define SLOT_IX_TO_SEG_IX(i) (((i)+(EXT_SEGSZ-FIRST_SEGSZ)) >> EXT_SEGSZ_EXP)
@@ -444,7 +454,12 @@ static void db_foreach_offheap_hash(DbTable *,
void (*)(ErlOffHeap *, void *),
void *);
-static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds);
+static SWord db_delete_all_objects_hash(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb);
+static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p,
+ Eterm nitems_holder);
#ifdef HARDDEBUG
static void db_check_table_hash(DbTableHash *tb);
#endif
@@ -548,6 +563,7 @@ DbTableMethod db_hash =
db_select_replace_continue_hash,
db_take_hash,
db_delete_all_objects_hash,
+ db_delete_all_objects_get_nitems_from_holder_hash,
db_free_empty_table_hash,
db_free_table_continue_hash,
db_print_hash,
@@ -806,7 +822,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
if (tb->common.status & DB_SET) {
HashDbTerm* bnext = b->next;
if (is_pseudo_deleted(b)) {
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
b->pseudo_deleted = 0;
}
else if (key_clash_fail) {
@@ -835,7 +851,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
do {
if (db_eq(&tb->common,obj,&q->dbterm)) {
if (is_pseudo_deleted(q)) {
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
q->pseudo_deleted = 0;
ASSERT(q->hvalue == hval);
if (q != b) { /* must move to preserve key insertion order */
@@ -858,7 +874,7 @@ Lnew:
q->pseudo_deleted = 0;
q->next = b;
*bp = q;
- nitems = erts_atomic_inc_read_nob(&tb->common.nitems);
+ nitems = INC_NITEMS(tb);
WUNLOCK_HASH(lck);
{
int nactive = NACTIVE(tb);
@@ -1056,7 +1072,7 @@ int db_erase_hash(DbTable *tbl, Eterm key, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -1117,7 +1133,7 @@ static int db_erase_object_hash(DbTable *tbl, Eterm object, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -2023,7 +2039,7 @@ static int select_delete_on_match_res(traverse_context_t* ctx_base, Sint slot_ix
del->next = ctx->free_us;
ctx->free_us = del;
}
- erts_atomic_dec_nob(&ctx->base.tb->common.nitems);
+ DEC_NITEMS(ctx->base.tb);
return 1;
}
@@ -2300,7 +2316,7 @@ static int db_take_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
}
WUNLOCK_HASH(lck);
if (nitems_diff) {
- erts_atomic_add_nob(&tb->common.nitems, nitems_diff);
+ ADD_NITEMS(tb, nitems_diff);
try_shrink(tb);
}
free_term_list(tb, free_us);
@@ -2360,7 +2376,7 @@ static SWord db_mark_all_deleted_hash(DbTable *tbl, SWord reds)
fixdel->slot = NACTIVE(tb) - 1;
fixdel->all = 1;
fixdel->trap = 0;
- erts_atomic_set_nob(&tb->common.nitems, 0);
+ RESET_NITEMS(tb);
return loops < 0 ? 0 : loops / LOOPS_PER_REDUCTION;
}
@@ -2468,7 +2484,8 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds)
(void*)tb->locks, sizeof(DbTableHashFineLocks));
tb->locks = NULL;
}
- ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable));
+ ASSERT(sizeof(DbTable) == erts_flxctr_read_approx(&tb->common.counters,
+ ERTS_DB_TABLE_MEM_COUNTER_ID));
return reds; /* Done */
}
@@ -3080,7 +3097,7 @@ db_lookup_dbterm_hash(Process *p, DbTable *tbl, Eterm key, Eterm obj,
ASSERT(q->hvalue == hval);
q->pseudo_deleted = 0;
*bp = b = q;
- erts_atomic_inc_nob(&tb->common.nitems);
+ INC_NITEMS(tb);
}
HRelease(p, hend, htop);
@@ -3123,7 +3140,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
}
WUNLOCK_HASH(lck);
- erts_atomic_dec_nob(&tb->common.nitems);
+ DEC_NITEMS(tb);
try_shrink(tb);
} else {
if (handle->flags & DB_MUST_RESIZE) {
@@ -3132,7 +3149,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
}
if (handle->flags & DB_INC_TRY_GROW) {
int nactive;
- int nitems = erts_atomic_inc_read_nob(&tb->common.nitems);
+ int nitems = INC_NITEMS(tb);
WUNLOCK_HASH(lck);
nactive = NACTIVE(tb);
@@ -3153,8 +3170,17 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
return;
}
-static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds)
+static SWord db_delete_all_objects_hash(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb)
{
+ if (nitems_holder_wb != NULL) {
+ Uint nr_of_items =
+ erts_flxctr_read_centralized(&tbl->common.counters,
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID);
+ *nitems_holder_wb = erts_make_integer(nr_of_items, p);
+ }
if (IS_FIXED(tbl)) {
reds = db_mark_all_deleted_hash(tbl, reds);
} else {
@@ -3163,11 +3189,16 @@ static SWord db_delete_all_objects_hash(Process* p, DbTable* tbl, SWord reds)
return reds;
db_create_hash(p, tbl);
- erts_atomic_set_nob(&tbl->hash.common.nitems, 0);
+ RESET_NITEMS(tbl);
}
return reds;
}
+static Eterm db_delete_all_objects_get_nitems_from_holder_hash(Process* p,
+ Eterm nitems_holder){
+ return nitems_holder;
+}
+
void db_foreach_offheap_hash(DbTable *tbl,
void (*func)(ErlOffHeap *, void *),
void * arg)
diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c
index f9ba04f399..492ea81b63 100644
--- a/erts/emulator/beam/erl_db_tree.c
+++ b/erts/emulator/beam/erl_db_tree.c
@@ -51,9 +51,20 @@
#include "erl_db_tree_util.h"
#define GETKEY_WITH_POS(Keypos, Tplp) (*((Tplp) + Keypos))
-#define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems))
-#define TREE_MAX_ELEMENTS 0xFFFFFFFFUL
+#define NITEMS_CENTRALIZED(tb) \
+ ((Sint)erts_flxctr_read_centralized(&(tb)->common.counters, \
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID))
+#define ADD_NITEMS(DB, TO_ADD) \
+ erts_flxctr_add(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID, TO_ADD)
+#define INC_NITEMS(DB) \
+ erts_flxctr_inc(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define INC_NITEMS_CENTRALIZED(DB) \
+ erts_flxctr_inc_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define RESET_NITEMS(DB) \
+ erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+#define IS_CENTRALIZED_CTR(tb) (!(tb)->common.counters.is_decentralized)
+#define APPROX_MEM_CONSUMED(tb) erts_flxctr_read_approx(&(tb)->common.counters, ERTS_DB_TABLE_MEM_COUNTER_ID)
#define TOPN_NODE(Dtt, Pos) \
(((Pos) < Dtt->pos) ? \
@@ -296,7 +307,7 @@ int tree_balance_right(TreeDbTerm **this);
static int delsub(TreeDbTerm **this);
static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot,
DbTable *tb, DbTableTree *stack_container,
- CATreeRootIterator *iter);
+ CATreeRootIterator *iter, int* is_EOT);
static TreeDbTerm *find_node(DbTableCommon *tb, TreeDbTerm *root,
Eterm key, DbTableTree *stack_container);
static TreeDbTerm **find_node2(DbTableCommon *tb, TreeDbTerm **root, Eterm key);
@@ -433,8 +444,12 @@ static void db_foreach_offheap_tree(DbTable *,
void (*)(ErlOffHeap *, void *),
void *);
-static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds);
-
+static SWord db_delete_all_objects_tree(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb);
+static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p,
+ Eterm nitems_holder);
#ifdef HARDDEBUG
static void db_check_table_tree(DbTable *tbl);
#endif
@@ -478,6 +493,7 @@ DbTableMethod db_tree =
db_select_replace_continue_tree,
db_take_tree,
db_delete_all_objects_tree,
+ db_delete_all_objects_get_nitems_from_holder_tree,
db_free_empty_table_tree,
db_free_table_continue_tree,
db_print_tree,
@@ -595,7 +611,8 @@ int db_last_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
}
if (stack) {
PUSH_NODE(stack, this);
- stack->slot = NITEMS(tbl);
+ /* Always centralized counters when static stack is used */
+ stack->slot = NITEMS_CENTRALIZED(tbl);
release_stack(tbl,stack_container,stack);
}
*ret = db_copy_key(p, tbl, &this->dbterm);
@@ -661,10 +678,7 @@ int db_put_tree_common(DbTableCommon *tb, TreeDbTerm **root, Eterm obj,
for (;;)
if (!*this) { /* Found our place */
state = 1;
- if (erts_atomic_inc_read_nob(&tb->nitems) >= TREE_MAX_ELEMENTS) {
- erts_atomic_dec_nob(&tb->nitems);
- return DB_ERROR_SYSRES;
- }
+ INC_NITEMS(((DbTable*)tb));
*this = new_dbterm(tb, obj);
(*this)->balance = 0;
(*this)->left = (*this)->right = NULL;
@@ -888,7 +902,7 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
TreeDbTerm *st;
Eterm *hp, *hend;
Eterm copy;
-
+ int is_EOT = 0;
/*
* The notion of a "slot" is not natural in a tree, but we try to
* simulate it by giving the n'th node in the tree instead.
@@ -899,10 +913,10 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
if (is_not_small(slot_term) ||
((slot = signed_val(slot_term)) < 0) ||
- (slot > NITEMS(tbl)))
+ (IS_CENTRALIZED_CTR(tbl) && slot > NITEMS_CENTRALIZED(tbl)))
return DB_ERROR_BADPARAM;
- if (slot == NITEMS(tbl)) {
+ if (IS_CENTRALIZED_CTR(tbl) && slot == NITEMS_CENTRALIZED(tbl)) {
*ret = am_EOT;
return DB_ERROR_NONE;
}
@@ -912,7 +926,11 @@ int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
* are counted from 1 and up.
*/
++slot;
- st = slot_search(p, root, slot, tbl, stack_container, iter);
+ st = slot_search(p, root, slot, tbl, stack_container, iter, &is_EOT);
+ if (is_EOT) {
+ *ret = am_EOT;
+ return DB_ERROR_NONE;
+ }
if (st == NULL) {
*ret = am_false;
return DB_ERROR_UNSPEC;
@@ -2244,7 +2262,8 @@ void db_print_tree_common(fmtfn_t to, void *to_arg,
erts_print(to, to_arg, "\n"
"------------------------------------------------\n");
#else
- erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", NITEMS(tbl));
+ erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n",
+ erts_flxctr_read_approx(&tbl->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID));
#endif
}
@@ -2281,24 +2300,41 @@ static SWord db_free_table_continue_tree(DbTable *tbl, SWord reds)
(DbTable *) tb,
(void *) tb->static_stack.array,
sizeof(TreeDbTerm *) * STACK_NEED);
- ASSERT((erts_atomic_read_nob(&tb->common.memory_size)
- == sizeof(DbTable)) ||
- (erts_atomic_read_nob(&tb->common.memory_size)
- == (sizeof(DbTable) + sizeof(DbFixation))));
+ ASSERT(erts_flxctr_is_snapshot_ongoing(&tb->common.counters) ||
+ ((APPROX_MEM_CONSUMED(tb)
+ == sizeof(DbTable)) ||
+ (APPROX_MEM_CONSUMED(tb)
+ == (sizeof(DbTable) + sizeof(DbFixation)))));
}
return reds;
}
-static SWord db_delete_all_objects_tree(Process* p, DbTable* tbl, SWord reds)
+static SWord db_delete_all_objects_tree(Process* p,
+ DbTable* tbl,
+ SWord reds,
+ Eterm* nitems_holder_wb)
{
+ if (nitems_holder_wb != NULL) {
+ Uint nr_of_items =
+ erts_flxctr_read_centralized(&tbl->common.counters,
+ ERTS_DB_TABLE_NITEMS_COUNTER_ID);
+ *nitems_holder_wb = erts_make_integer(nr_of_items, p);
+ }
reds = db_free_table_continue_tree(tbl, reds);
if (reds < 0)
return reds;
db_create_tree(p, tbl);
- erts_atomic_set_nob(&tbl->tree.common.nitems, 0);
+ RESET_NITEMS(tbl);
return reds;
}
+static Eterm db_delete_all_objects_get_nitems_from_holder_tree(Process* p,
+ Eterm holder)
+{
+ (void)p;
+ return holder;
+}
+
static void do_db_tree_foreach_offheap(TreeDbTerm *,
void (*)(ErlOffHeap *, void *),
void *);
@@ -2383,7 +2419,7 @@ static TreeDbTerm *linkout_tree(DbTableCommon *tb, TreeDbTerm **root,
tstack[tpos++] = this;
state = delsub(this);
}
- erts_atomic_dec_nob(&tb->nitems);
+ DEC_NITEMS(((DbTable*)tb));
break;
}
}
@@ -2450,7 +2486,7 @@ static TreeDbTerm *linkout_object_tree(DbTableCommon *tb, TreeDbTerm **root,
tstack[tpos++] = this;
state = delsub(this);
}
- erts_atomic_dec_nob(&tb->nitems);
+ DEC_NITEMS(((DbTable*)tb));
break;
}
}
@@ -2745,7 +2781,8 @@ static int delsub(TreeDbTerm **this)
static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root,
Sint slot, DbTable *tb,
DbTableTree *stack_container,
- CATreeRootIterator *iter)
+ CATreeRootIterator *iter,
+ int* is_EOT)
{
TreeDbTerm *this;
TreeDbTerm *tmp;
@@ -2837,8 +2874,12 @@ static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root,
break;
next_root:
- if (!iter)
+ if (!iter) {
+ if (stack->slot == (slot-1)) {
+ *is_EOT = 1;
+ }
break; /* EOT */
+ }
ASSERT(slot > stack->slot);
if (lastobj) {
@@ -2846,8 +2887,12 @@ next_root:
lastobj = NULL;
}
pp = catree_find_next_root(iter, &lastkey);
- if (!pp)
+ if (!pp) {
+ if (stack->slot == (slot-1)) {
+ *is_EOT = 1;
+ }
break; /* EOT */
+ }
root = *pp;
stack->pos = 0;
find_next(&tb->common, root, stack, lastkey);
diff --git a/erts/emulator/beam/erl_db_tree_util.h b/erts/emulator/beam/erl_db_tree_util.h
index 02df74678d..ba4a8f79e5 100644
--- a/erts/emulator/beam/erl_db_tree_util.h
+++ b/erts/emulator/beam/erl_db_tree_util.h
@@ -25,6 +25,8 @@
** Internal functions and macros used by both the CA tree and the AVL tree
*/
+
+#if defined(ARCH_32)
/*
** A stack of this size is enough for an AVL tree with more than
** 0xFFFFFFFF elements. May be subject to change if
@@ -34,8 +36,19 @@
** Where n denotes the number of nodes, h(n) the height of the tree
** with n nodes and log is the binary logarithm.
*/
-
#define STACK_NEED 50
+#elif defined(ARCH_64)
+/*
+** A stack of this size is enough for an AVL tree with more than
+** 2^61 elements.
+** The Maximal height of an AVL tree is calculated as above.
+*/
+#define STACK_NEED 90
+#else
+#error "Unsported architecture"
+#endif
+
+
#define PUSH_NODE(Dtt, Tdt) \
((Dtt)->array[(Dtt)->pos++] = Tdt)
@@ -50,6 +63,9 @@
#define EMPTY_NODE(Dtt) (TOP_NODE(Dtt) == NULL)
+#define DEC_NITEMS(DB) \
+ erts_flxctr_dec(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
+
static ERTS_INLINE void free_term(DbTable *tb, TreeDbTerm* p)
{
db_free_term(tb, p, offsetof(TreeDbTerm, dbterm));
diff --git a/erts/emulator/beam/erl_db_util.h b/erts/emulator/beam/erl_db_util.h
index e3d3c0e804..97f2848679 100644
--- a/erts/emulator/beam/erl_db_util.h
+++ b/erts/emulator/beam/erl_db_util.h
@@ -21,6 +21,7 @@
#ifndef _DB_UTIL_H
#define _DB_UTIL_H
+#include "erl_flxctr.h"
#include "global.h"
#include "erl_message.h"
#include "erl_bif_unique.h"
@@ -207,8 +208,12 @@ typedef struct db_table_method
enum DbIterSafety*);
int (*db_take)(Process *, DbTable *, Eterm, Eterm *);
- SWord (*db_delete_all_objects)(Process* p, DbTable* db, SWord reds);
-
+ SWord (*db_delete_all_objects)(Process* p,
+ DbTable* db,
+ SWord reds,
+ Eterm* nitems_holder_wb);
+ Eterm (*db_delete_all_objects_get_nitems_from_holder)(Process* p,
+ Eterm nitems_holder);
int (*db_free_empty_table)(DbTable* db);
SWord (*db_free_table_continue)(DbTable* db, SWord reds);
@@ -257,6 +262,9 @@ typedef struct {
DbTable *prev;
} DbTableList;
+#define ERTS_DB_TABLE_NITEMS_COUNTER_ID 0
+#define ERTS_DB_TABLE_MEM_COUNTER_ID 1
+
/*
* This structure contains data for all different types of database
* tables. Note that these fields must match the same fields
@@ -281,8 +289,11 @@ typedef struct db_table_common {
Eterm the_name; /* an atom */
Binary *btid;
DbTableMethod* meth; /* table methods */
- erts_atomic_t nitems; /* Total number of items in table */
- erts_atomic_t memory_size;/* Total memory size. NOTE: in bytes! */
+ /* The ErtsFlxCtr below contains:
+ * - Total number of items in table
+ * - Total memory size (NOTE: in bytes!) */
+ ErtsFlxCtr counters;
+ char extra_for_flxctr[ERTS_FLXCTR_NR_OF_EXTRA_BYTES(2)];
struct { /* Last fixation time */
ErtsMonotonicTime monotonic;
ErtsMonotonicTime offset;
diff --git a/erts/emulator/beam/erl_flxctr.c b/erts/emulator/beam/erl_flxctr.c
new file mode 100644
index 0000000000..35f4a21508
--- /dev/null
+++ b/erts/emulator/beam/erl_flxctr.c
@@ -0,0 +1,370 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2019. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Author: Kjell Winblad
+ */
+
+#include "erl_flxctr.h"
+
+static int reader_groups_array_size = 0;
+#define ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS (reader_groups_array_size)
+
+static int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin);
+static int erts_flxctr_wait_dtor(Binary *context_bin);
+
+typedef struct {
+ ErtsThrPrgrLaterOp later_op;
+ Process* process;
+ ErtsFlxCtrDecentralizedCtrArray* array;
+ ErtsFlxCtrDecentralizedCtrArray* next_array;
+ ErtsAlcType_t alloc_type;
+ int nr_of_counters;
+ Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE];
+} DecentralizedReadSnapshotInfo;
+
+typedef enum {
+ ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING = 0,
+ ERTS_FLXCTR_SNAPSHOT_ONGOING = 1,
+ ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE = 2
+} erts_flxctr_snapshot_status;
+
+static void
+thr_prg_wake_up_and_count(void* bin_p)
+{
+ Binary* bin = bin_p;
+ DecentralizedReadSnapshotInfo* info = ERTS_MAGIC_BIN_DATA(bin);
+ Process* p = info->process;
+ ErtsFlxCtrDecentralizedCtrArray* array = info->array;
+ ErtsFlxCtrDecentralizedCtrArray* next = info->next_array;
+ int i, sched;
+ /* Reset result array */
+ for (i = 0; i < info->nr_of_counters; i++) {
+ info->result[i] = 0;
+ }
+ /* Read result from snapshot */
+ for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) {
+ for (i = 0; i < info->nr_of_counters; i++) {
+ info->result[i] = info->result[i] +
+ erts_atomic_read_nob(&array->array[sched].counters[i]);
+ }
+ }
+ /* Update the next decentralized counter array */
+ for (i = 0; i < info->nr_of_counters; i++) {
+ erts_atomic_add_nob(&next->array[0].counters[i], info->result[i]);
+ }
+ /* Announce that the snapshot is done */
+ {
+ Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING;
+ if (expected != erts_atomic_cmpxchg_mb(&next->snapshot_status,
+ ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING,
+ expected)) {
+ /* The CAS failed which means that this thread need to free the next array. */
+ erts_free(info->alloc_type, next->block_start);
+ }
+ }
+ /* Resume the process that requested the snapshot */
+ erts_proc_lock(p, ERTS_PROC_LOCK_STATUS);
+ if (!ERTS_PROC_IS_EXITING(p)) {
+ erts_resume(p, ERTS_PROC_LOCK_STATUS);
+ }
+ /* Free the memory that is no longer needed */
+ erts_free(info->alloc_type, array->block_start);
+ erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+ erts_proc_dec_refc(p);
+ erts_bin_release(bin);
+}
+
+typedef struct {
+ ErtsThrPrgrLaterOp later_op;
+ Process* process;
+} ErtsFlxCtrWakeUpLaterInfo;
+
+static void
+thr_prg_wake_up_later(void* bin_p)
+{
+ Binary* bin = bin_p;
+ ErtsFlxCtrWakeUpLaterInfo* info = ERTS_MAGIC_BIN_DATA(bin);
+ Process* p = info->process;
+ /* Resume the requesting process */
+ erts_proc_lock(p, ERTS_PROC_LOCK_STATUS);
+ if (!ERTS_PROC_IS_EXITING(p)) {
+ erts_resume(p, ERTS_PROC_LOCK_STATUS);
+ }
+ erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+ /* Free data */
+ erts_proc_dec_refc(p);
+ erts_bin_release(bin);
+}
+
+static
+int erts_flxctr_read_ctx_bin_dtor(Binary *context_bin) {
+ (void)context_bin;
+ return 1;
+}
+
+static
+int erts_flxctr_wait_dtor(Binary *context_bin) {
+ (void)context_bin;
+ return 1;
+}
+
+static void suspend_until_thr_prg(Process* p)
+{
+ Binary* state_bin;
+ ErtsFlxCtrWakeUpLaterInfo* info;
+ state_bin = erts_create_magic_binary(sizeof(ErtsFlxCtrWakeUpLaterInfo),
+ erts_flxctr_wait_dtor);
+ info = ERTS_MAGIC_BIN_DATA(state_bin);
+ info->process = p;
+ erts_refc_inctest(&state_bin->intern.refc, 1);
+ erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL);
+ erts_proc_inc_refc(p);
+ ERTS_VBUMP_ALL_REDS(p);
+ erts_schedule_thr_prgr_later_op(thr_prg_wake_up_later, state_bin, &info->later_op);
+}
+
+
+static ErtsFlxCtrDecentralizedCtrArray*
+create_decentralized_ctr_array(ErtsAlcType_t alloc_type, Uint nr_of_counters) {
+ /* Allocate an ErtsFlxCtrDecentralizedCtrArray and make sure that
+ the array field is located at the start of a cache line */
+ char* bytes =
+ erts_alloc(alloc_type,
+ sizeof(ErtsFlxCtrDecentralizedCtrArray) +
+ (sizeof(ErtsFlxCtrDecentralizedCtrArrayElem) *
+ ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS) +
+ ERTS_CACHE_LINE_SIZE);
+ void* block_start = bytes;
+ int bytes_to_next_cacheline_border;
+ ErtsFlxCtrDecentralizedCtrArray* array;
+ int i, sched;
+ bytes = &bytes[offsetof(ErtsFlxCtrDecentralizedCtrArray, array)];
+ bytes_to_next_cacheline_border =
+ ERTS_CACHE_LINE_SIZE - (((Uint)bytes) % ERTS_CACHE_LINE_SIZE);
+ array = (ErtsFlxCtrDecentralizedCtrArray*)
+ (&bytes[bytes_to_next_cacheline_border -
+ (int)offsetof(ErtsFlxCtrDecentralizedCtrArray, array)]);
+ ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0);
+ ASSERT(((Uint)array - (Uint)block_start) <= ERTS_CACHE_LINE_SIZE);
+ /* Initialize fields */
+ erts_atomic_init_nob(&array->snapshot_status, ERTS_FLXCTR_SNAPSHOT_ONGOING);
+ for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) {
+ for (i = 0; i < nr_of_counters; i++) {
+ erts_atomic_init_nob(&array->array[sched].counters[i], 0);
+ }
+ }
+ array->block_start = block_start;
+ return array;
+}
+
+void erts_flxctr_setup(int decentralized_counter_groups)
+{
+ reader_groups_array_size = decentralized_counter_groups+1;
+}
+
+void erts_flxctr_init(ErtsFlxCtr* c,
+ int is_decentralized,
+ Uint nr_of_counters,
+ ErtsAlcType_t alloc_type)
+{
+ ASSERT(nr_of_counters <= ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE);
+ c->is_decentralized = is_decentralized;
+ c->nr_of_counters = nr_of_counters;
+ if (c->is_decentralized) {
+ ErtsFlxCtrDecentralizedCtrArray* array =
+ create_decentralized_ctr_array(alloc_type, nr_of_counters);
+ erts_atomic_set_nob(&array->snapshot_status,
+ ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING);
+ erts_atomic_init_nob(&c->u.counters_ptr, (Sint)array);
+ ASSERT(((Uint)array->array) % ERTS_CACHE_LINE_SIZE == 0);
+ } else {
+ int i;
+ for (i = 0; i < nr_of_counters; i++) {
+ erts_atomic_init_nob(&c->u.counters[i], 0);
+ }
+ }
+}
+
+void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t type)
+{
+ if (c->is_decentralized) {
+ if (erts_flxctr_is_snapshot_ongoing(c)) {
+ ErtsFlxCtrDecentralizedCtrArray* array =
+ ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c);
+ /* Try to delegate the resposibilty of freeing to
+ thr_prg_wake_up_and_count */
+ Sint expected = ERTS_FLXCTR_SNAPSHOT_ONGOING;
+ if (expected !=
+ erts_atomic_cmpxchg_mb(&array->snapshot_status,
+ ERTS_FLXCTR_SNAPSHOT_ONGOING_TP_THREAD_DO_FREE,
+ expected)) {
+ /* The delegation was unsuccessful which means that no
+ snapshot is ongoing anymore and the freeing needs
+ to be done here */
+ ERTS_ASSERT(!erts_flxctr_is_snapshot_ongoing(c));
+ erts_free(type, array->block_start);
+ }
+ } else {
+ erts_free(type, ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->block_start);
+ }
+ }
+}
+
+ErtsFlxCtrSnapshotResult
+erts_flxctr_snapshot(ErtsFlxCtr* c,
+ ErtsAlcType_t alloc_type,
+ Process* p)
+{
+ if (c->is_decentralized) {
+ ErtsFlxCtrDecentralizedCtrArray* array = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c);
+ if (erts_flxctr_is_snapshot_ongoing(c)) {
+ /* Let the caller try again later */
+ ErtsFlxCtrSnapshotResult res =
+ {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP};
+ suspend_until_thr_prg(p);
+ return res;
+ } else {
+ Eterm* hp;
+ Binary* state_bin;
+ Eterm state_mref;
+ DecentralizedReadSnapshotInfo* info;
+ ErtsFlxCtrDecentralizedCtrArray* new_array =
+ create_decentralized_ctr_array(alloc_type, c->nr_of_counters);
+ int success =
+ ((Sint)array) == erts_atomic_cmpxchg_mb(&c->u.counters_ptr,
+ (Sint)new_array,
+ (Sint)array);
+ if (!success) {
+ /* Let the caller try again later */
+ ErtsFlxCtrSnapshotResult res =
+ {.type = ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP};
+ suspend_until_thr_prg(p);
+ erts_free(alloc_type, new_array->block_start);
+ return res;
+ }
+ /* Create binary with info about the operation that can be
+ sent to the caller and to a thread progress function */
+ state_bin =
+ erts_create_magic_binary(sizeof(DecentralizedReadSnapshotInfo),
+ erts_flxctr_read_ctx_bin_dtor);
+ hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+ state_mref = erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+ info = ERTS_MAGIC_BIN_DATA(state_bin);
+ info->alloc_type = alloc_type;
+ info->array = array;
+ info->next_array = new_array;
+ info->process = p;
+ info->nr_of_counters = c->nr_of_counters;
+ erts_proc_inc_refc(p);
+ erts_refc_inctest(&state_bin->intern.refc, 2);
+ erts_suspend(p, ERTS_PROC_LOCK_MAIN, NULL);
+ ERTS_VBUMP_ALL_REDS(p);
+ erts_schedule_thr_prgr_later_op(thr_prg_wake_up_and_count,
+ state_bin,
+ &info->later_op);
+ {
+ ErtsFlxCtrSnapshotResult res = {
+ .type = ERTS_FLXCTR_GET_RESULT_AFTER_TRAP,
+ .trap_resume_state = state_mref};
+ return res;
+ }
+ }
+ } else {
+ ErtsFlxCtrSnapshotResult res;
+ int i;
+ res.type = ERTS_FLXCTR_DONE;
+ for (i = 0; i < c->nr_of_counters; i++){
+ res.result[i] = erts_flxctr_read_centralized(c, i);
+ }
+ return res;
+ }
+}
+
+
+Sint erts_flxctr_get_snapshot_result_after_trap(Eterm result_holder,
+ Uint counter_nr)
+{
+ Binary* bin = erts_magic_ref2bin(result_holder);
+ DecentralizedReadSnapshotInfo* data = ERTS_MAGIC_BIN_DATA(bin);;
+ return data->result[counter_nr];
+}
+
+int erts_flxctr_is_snapshot_result(Eterm term)
+{
+ if (is_internal_magic_ref(term)) {
+ Binary* bin = erts_magic_ref2bin(term);
+ return ERTS_MAGIC_BIN_DESTRUCTOR(bin) == erts_flxctr_read_ctx_bin_dtor;
+ } else return 0;
+}
+
+Sint erts_flxctr_read_approx(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ if (c->is_decentralized) {
+ ErtsFlxCtrDecentralizedCtrArray* counter = ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c);
+ Sint sum = 0;
+ int sched;
+ for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) {
+ sum = sum + erts_atomic_read_nob(&counter->array[sched].counters[counter_nr]);
+ }
+ return sum;
+ } else {
+ return erts_flxctr_read_centralized(c, counter_nr);
+ }
+}
+
+int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c)
+{
+ return c->is_decentralized &&
+ (ERTS_FLXCTR_SNAPSHOT_NOT_ONGOING !=
+ erts_atomic_read_acqb(&ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c)->snapshot_status));
+}
+
+int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p)
+{
+ if (erts_flxctr_is_snapshot_ongoing(c)) {
+ suspend_until_thr_prg(p);
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+void erts_flxctr_reset(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ if (c->is_decentralized) {
+ int sched;
+ ErtsFlxCtrDecentralizedCtrArray* counter =
+ ERTS_FLXCTR_GET_CTR_ARRAY_PTR(c);
+ for (sched = 0; sched < ERTS_FLXCTR_DECENTRALIZED_NO_SLOTS; sched++) {
+ erts_atomic_set_nob(&counter->array[sched].counters[counter_nr], 0);
+ }
+ } else {
+ erts_atomic_set_nob(&c->u.counters[counter_nr], 0);
+ }
+}
+
+
+void erts_flxctr_set_slot(int group) {
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ esdp->flxctr_slot_no = group;
+}
diff --git a/erts/emulator/beam/erl_flxctr.h b/erts/emulator/beam/erl_flxctr.h
new file mode 100644
index 0000000000..5cab02b9eb
--- /dev/null
+++ b/erts/emulator/beam/erl_flxctr.h
@@ -0,0 +1,406 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2019. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/**
+ * @file erl_flxctr.h
+ *
+ * @brief This file contains the API of a flexible counter. The
+ * counter can be configured during its initialization to be
+ * centralized or decentralized. The centralized configuration makes
+ * it possible to read the counter value extremely efficiently, but
+ * updates of the counter value can easily cause contention. The
+ * decentralized configuration has the reverse trade-off (i.e.,
+ * updates are efficient and scalable but reading the counter value is
+ * slow and may cause contention).
+ *
+ * @author Kjell Winblad
+ */
+
+#ifndef ERL_FLXCTR_H__
+#define ERL_FLXCTR_H__
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "error.h"
+#include "bif.h"
+#include "big.h"
+#include "erl_binary.h"
+#include "bif.h"
+#include <stddef.h>
+
+/* Public Interface */
+
+#define ERTS_MAX_FLXCTR_GROUPS 256
+#define ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE (ERTS_CACHE_LINE_SIZE / sizeof(erts_atomic_t))
+
+typedef struct {
+ int nr_of_counters;
+ int is_decentralized;
+ union {
+ erts_atomic_t counters_ptr;
+ erts_atomic_t counters[1];
+ } u;
+} ErtsFlxCtr;
+
+#define ERTS_FLXCTR_NR_OF_EXTRA_BYTES(NR_OF_COUNTERS) \
+ ((NR_OF_COUNTERS-1) * sizeof(erts_atomic_t))
+
+/* Called by early_init */
+void erts_flxctr_setup(int decentralized_counter_groups);
+
+/**
+ * @brief Initializes an ErtsFlxCtr. The macro
+ * ERTS_FLXCTR_NR_OF_EXTRA_BYTES should be used to determine how much
+ * extra space that needs to be allocated directly after the
+ * ErtsFlxCtr when is_decentralized is set to zero. Each ErtsFlxCtr
+ * instance may contain up to ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE
+ * counters. These counters are numbered from zero to
+ * (ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE-1). Most of the functions in
+ * this module take a parameter named counter_nr that controls which
+ * of the ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE counters in the given
+ * ErtsFlxCtr that should be operated on.
+ *
+ * @param c The counter to initialize
+ * @param is_decentralized Non-zero value to make c decentralized
+ * @param nr_of_counters The number of counters included in c
+ * (max ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE)
+ * @param alloc_type
+ */
+void erts_flxctr_init(ErtsFlxCtr* c,
+ int is_decentralized,
+ Uint nr_of_counters,
+ ErtsAlcType_t alloc_type);
+
+/**
+ * @brief Destroys an initialized counter.
+ *
+ * @param c The counter that should be destroyed
+ * @param alloc_type The allocation type (needs to be the same as the
+ * one passed to erts_flxctr_init when c was
+ * initialized)
+ */
+void erts_flxctr_destroy(ErtsFlxCtr* c, ErtsAlcType_t alloc_type);
+
+/**
+ * @brief Adds to_add to the counter with counter_nr in c
+ *
+ * @param c the ErtsFlxCtr to operate on
+ * @param counter_nr The number of the counter in c to modify
+ * @param to_add The amount that should be added to the specified counter
+ */
+ERTS_GLB_INLINE
+void erts_flxctr_add(ErtsFlxCtr* c,
+ Uint counter_nr,
+ int to_add);
+
+/**
+ * @brief Increases the specified counter by 1
+ *
+ * @param c The ErtsFlxCtr instance to operate on
+ * @param counter_nr The number of the counter within c to operate on
+ */
+ERTS_GLB_INLINE
+void erts_flxctr_inc(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief Decreases the specified counter by 1
+ */
+ERTS_GLB_INLINE
+void erts_flxctr_dec(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief This function tries to return the current value of the
+ * specified counter but may return an incorrect result if the counter
+ * is decentralized and other threads are accessing the counter
+ * concurrently.
+ *
+ * @param c The ErtsFlxCtr instance to operate on
+ * @param counter_nr The number of the counter within c to operate on
+ *
+ * @return A snapshot of the specifed counter if c is centralized or a
+ * possibly incorrect estimate of the counter value if c is
+ * decentralized
+ */
+Sint erts_flxctr_read_approx(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief This function can only be used together with an ErtsFlxCtr
+ * that is configured to be centralized. The function increments the
+ * specified counter by 1 and returns the value of the counter after
+ * the increment.
+ */
+ERTS_GLB_INLINE
+Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief This function can only be used together with a ErtsFlxCtr
+ * that is configured to be centralized. The function decrements the
+ * specified counter by 1 and returns the value of the counter after
+ * the operation.
+ */
+ERTS_GLB_INLINE
+Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief This function can only be used together with an ErtsFlxCtr
+ * that is configured to be centralized. The function returns the
+ * current value of the specified counter.
+ */
+ERTS_GLB_INLINE
+Sint erts_flxctr_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+
+typedef enum {
+ ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP,
+ ERTS_FLXCTR_DONE,
+ ERTS_FLXCTR_GET_RESULT_AFTER_TRAP
+} ErtsFlxctrSnapshotResultType;
+
+typedef struct {
+ ErtsFlxctrSnapshotResultType type;
+ Eterm trap_resume_state;
+ Sint result[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE];
+} ErtsFlxCtrSnapshotResult;
+
+/**
+ * @brief This function initiates an atomic snapshot of an ErtsFlxCtr
+ * to read out the values of one or more of the counters that are
+ * stored in the given ErtsFlxCtr. The caller needs to perform
+ * different actions after the return of this function depending on
+ * the value of the type field in the returned struct:
+ *
+ * - The caller needs to trap and try again after the trap if the
+ * return value has the type ERTS_FLXCTR_TRY_AGAIN_AFTER_TRAP.
+ *
+ * - The caller can get the result directly from the result field of
+ * the returned struct if the return value has the type
+ * ERTS_FLXCTR_DONE. The value at index i in the result field
+ * correspond to counter number i.
+ *
+ * - Finally, if the return value has the type
+ * ERTS_FLXCTR_GET_RESULT_AFTER_TRAP, then the caller needs to save
+ * the value of the field trap_resume_state from the returned struct
+ * and trap. After the trap, the values of the counters can be
+ * obtained by using the function
+ * erts_flxctr_get_snapshot_result_after_trap. Note that the
+ * function erts_flxctr_is_snapshot_result can be used to check if a
+ * value is obtained from the trap_resume_state field in the
+ * returned struct (this can be useful when the calling function
+ * wakes up again after the trap).
+ *
+ * The snapshot operation that is initiated by this function should be
+ * considered to be ongoing from the issuing of this function until a
+ * struct with the type field set to ERTS_FLXCTR_DONE has been
+ * returned from the function or until the caller of this function has
+ * woken up after trapping.
+ *
+ * @param c The ErtsFlxCtr that the snapshot shall be taken from
+ * @param alloc_type The allocation type (needs to be the same as the
+ * type passed to erts_flxctr_init when c was
+ * initialized)
+ * @param p The Erlang process that is doing the call
+ *
+ * @return See the description above
+ *
+ */
+ErtsFlxCtrSnapshotResult
+erts_flxctr_snapshot(ErtsFlxCtr* c,
+ ErtsAlcType_t alloc_type,
+ Process* p);
+
+/**
+ * @brief Checks if the parameter term is a snapshot result (i.e.,
+ * something obtained from the trap_resume_state field of an
+ * ErtsFlxCtrSnapshotResult struct that has been returned from
+ * erts_flxctr_snapshot).
+ *
+ * @param term The term to check
+ *
+ * @return A nonzero value iff the term is a snapshot result
+ */
+int erts_flxctr_is_snapshot_result(Eterm term);
+
+/**
+ * @brief Returns the result of a snapshot for a counter given a
+ * snapshot result returned by a call to erts_flxctr_snapshot (i.e.,
+ * the value stored in the trap_resume_state field of a struct
+ * returned by erts_flxctr_snapshot). The caller needs to trap between
+ * the return of erts_flxctr_snapshot and the call to this function.
+ */
+Sint erts_flxctr_get_snapshot_result_after_trap(Eterm trap_resume_state,
+ Uint counter_nr);
+
+/**
+ * @brief Resets the specified counter to 0. This function is unsafe
+ * to call while a snapshot operation may be active (initiated with
+ * the erts_flxctr_snapshot function).
+ */
+void erts_flxctr_reset(ErtsFlxCtr* c,
+ Uint counter_nr);
+
+/**
+ * @brief Checks if a snapshot operation is active (snapshots are
+ * initiated with the erts_flxctr_snapshot function).
+ *
+ * @return nonzero value iff a snapshot was active at some point
+ * between the invocation and return of the function
+ */
+int erts_flxctr_is_snapshot_ongoing(ErtsFlxCtr* c);
+
+/**
+ * @brief This function checks if a snapshot operation is ongoing
+ * (snapshots are initiated with the erts_flxctr_snapshot function)
+ * and suspend the given process until thread progress has happened if
+ * it detected an ongoing snapshot operation. The caller needs to trap
+ * if a non-zero value is returned.
+ *
+ * @param c The ErtsFlxCtr to check
+ * @param p The calling process
+ *
+ * @return nonzero value if the given process has got suspended
+ */
+int erts_flxctr_suspend_until_thr_prg_if_snapshot_ongoing(ErtsFlxCtr* c, Process* p);
+
+/* End: Public Interface */
+
+/* Internal Declarations */
+
+#define ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C) \
+ ((ErtsFlxCtrDecentralizedCtrArray*) erts_atomic_read_acqb(&(C)->u.counters_ptr))
+#define ERTS_FLXCTR_GET_CTR_PTR(C, SCHEDULER_ID, COUNTER_ID) \
+ &(ERTS_FLXCTR_GET_CTR_ARRAY_PTR(C))->array[SCHEDULER_ID].counters[COUNTER_ID]
+
+
+typedef union {
+ erts_atomic_t counters[ERTS_FLXCTR_ATOMICS_PER_CACHE_LINE];
+ char pad[ERTS_CACHE_LINE_SIZE];
+} ErtsFlxCtrDecentralizedCtrArrayElem;
+
+typedef struct ErtsFlxCtrDecentralizedCtrArray {
+ void* block_start;
+ erts_atomic_t snapshot_status;
+ ErtsFlxCtrDecentralizedCtrArrayElem array[];
+} ErtsFlxCtrDecentralizedCtrArray;
+
+void erts_flxctr_set_slot(int group);
+
+ERTS_GLB_INLINE
+int erts_flxctr_get_slot_index(void);
+
+/* End: Internal Declarations */
+
+
+/* Implementation of inlined functions */
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE
+int erts_flxctr_get_slot_index(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp));
+ ASSERT(esdp->flxctr_slot_no > 0);
+ return esdp->flxctr_slot_no;
+}
+
+ERTS_GLB_INLINE
+void erts_flxctr_add(ErtsFlxCtr* c,
+ Uint counter_nr,
+ int to_add)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ if (c->is_decentralized) {
+ erts_atomic_add_nob(ERTS_FLXCTR_GET_CTR_PTR(c,
+ erts_flxctr_get_slot_index(),
+ counter_nr),
+ to_add);
+ } else {
+ erts_atomic_add_nob(&c->u.counters[counter_nr], to_add);
+ }
+}
+
+ERTS_GLB_INLINE
+void erts_flxctr_inc(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ if (c->is_decentralized) {
+ erts_atomic_inc_nob(ERTS_FLXCTR_GET_CTR_PTR(c,
+ erts_flxctr_get_slot_index(),
+ counter_nr));
+ } else {
+ erts_atomic_inc_read_nob(&c->u.counters[counter_nr]);
+ }
+}
+
+ERTS_GLB_INLINE
+void erts_flxctr_dec(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ if (c->is_decentralized) {
+ erts_atomic_dec_nob(ERTS_FLXCTR_GET_CTR_PTR(c,
+ erts_flxctr_get_slot_index(),
+ counter_nr));
+ } else {
+ erts_atomic_dec_nob(&c->u.counters[counter_nr]);
+ }
+}
+
+ERTS_GLB_INLINE
+Sint erts_flxctr_inc_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ ASSERT(!c->is_decentralized);
+ return erts_atomic_inc_read_nob(&c->u.counters[counter_nr]);
+}
+
+ERTS_GLB_INLINE
+Sint erts_flxctr_dec_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ ASSERT(!c->is_decentralized);
+ return erts_atomic_dec_read_nob(&c->u.counters[counter_nr]);
+}
+
+ERTS_GLB_INLINE
+Sint erts_flxctr_read_centralized(ErtsFlxCtr* c,
+ Uint counter_nr)
+{
+ ASSERT(counter_nr < c->nr_of_counters);
+ ASSERT(!c->is_decentralized);
+ return erts_atomic_read_nob(&((erts_atomic_t*)(c->u.counters))[counter_nr]);
+}
+
+#endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+#endif /* ERL_FLXCTR_H__ */
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 12750b9aa6..547e4064a2 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -593,6 +593,7 @@ void erts_usage(void)
erts_fprintf(stderr, " no_time_warp|single_time_warp|multi_time_warp\n");
erts_fprintf(stderr, "-d don't write a crash dump for internally detected errors\n");
erts_fprintf(stderr, " (halt(String) will still produce a crash dump)\n");
+ erts_fprintf(stderr, "-dcg set the limit for the number of decentralized counter groups\n");
erts_fprintf(stderr, "-fn[u|a|l] Control how filenames are interpreted\n");
erts_fprintf(stderr, "-hms size set minimum heap size in words (default %d)\n",
H_DEFAULT_SIZE);
@@ -785,6 +786,8 @@ early_init(int *argc, char **argv) /*
int dirty_io_scheds;
int max_reader_groups;
int reader_groups;
+ int max_decentralized_counter_groups;
+ int decentralized_counter_groups;
char envbuf[21]; /* enough for any 64-bit integer */
size_t envbufsz;
@@ -804,7 +807,8 @@ early_init(int *argc, char **argv) /*
erts_initialized = 0;
- erts_pre_early_init_cpu_topology(&max_reader_groups,
+ erts_pre_early_init_cpu_topology(&max_decentralized_counter_groups,
+ &max_reader_groups,
&ncpu,
&ncpuonln,
&ncpuavail);
@@ -865,6 +869,24 @@ early_init(int *argc, char **argv) /*
}
if (argv[i][0] == '-') {
switch (argv[i][1]) {
+ case 'd': {
+ char *sub_param = argv[i]+2;
+ if (has_prefix("cg", sub_param)) {
+ char *arg = get_arg(sub_param+2, argv[i+1], &i);
+ if (sscanf(arg, "%d", &max_decentralized_counter_groups) != 1) {
+ erts_fprintf(stderr,
+ "bad decentralized counter groups limit: %s\n", arg);
+ erts_usage();
+ }
+ if (max_decentralized_counter_groups < 0) {
+ erts_fprintf(stderr,
+ "bad decentralized counter groups limit: %d\n",
+ max_decentralized_counter_groups);
+ erts_usage();
+ }
+ }
+ break;
+ }
case 'r': {
char *sub_param = argv[i]+2;
if (has_prefix("g", sub_param)) {
@@ -1186,8 +1208,10 @@ early_init(int *argc, char **argv) /*
erts_early_init_cpu_topology(no_schedulers,
&max_main_threads,
max_reader_groups,
- &reader_groups);
-
+ &reader_groups,
+ max_decentralized_counter_groups,
+ &decentralized_counter_groups);
+ erts_flxctr_setup(decentralized_counter_groups);
{
erts_thr_late_init_data_t elid = ERTS_THR_LATE_INIT_DATA_DEF_INITER;
elid.mem.std.alloc = ethr_std_alloc;
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 711b73417d..6118c671ee 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -641,6 +641,7 @@ struct ErtsSchedulerData_ {
ErtsSchedType type;
Uint no; /* Scheduler number for normal schedulers */
Uint dirty_no; /* Scheduler number for dirty schedulers */
+ int flxctr_slot_no; /* slot nr when a flxctr is used */
struct enif_environment_t *current_nif;
Process *dirty_shadow_process;
Port *current_port;
@@ -1847,6 +1848,7 @@ int erts_resume_processes(ErtsProcList *);
void erts_deep_process_dump(fmtfn_t, void *);
Eterm erts_get_reader_groups_map(Process *c_p);
+Eterm erts_get_decentralized_counter_groups_map(Process *c_p);
Eterm erts_debug_reader_groups_map(Process *c_p, int groups);
Uint erts_debug_nbalance(void);