aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_cpu_topology.c
diff options
context:
space:
mode:
authorKjell Winblad <[email protected]>2019-02-22 16:49:37 +0100
committerKjell Winblad <[email protected]>2019-04-10 15:42:42 +0200
commitc5e9766712436bea2b91bccd062f66a3ad1841bb (patch)
tree25ce2b16b9f847088997b0d4e77af46dbb6d2d66 /erts/emulator/beam/erl_cpu_topology.c
parent326c3cb70c1b37c794b781a42c50725766098810 (diff)
downloadotp-c5e9766712436bea2b91bccd062f66a3ad1841bb.tar.gz
otp-c5e9766712436bea2b91bccd062f66a3ad1841bb.tar.bz2
otp-c5e9766712436bea2b91bccd062f66a3ad1841bb.zip
Decentralized counters for ETS ordered_set with write_concurrency
Previously, all ETS tables used centralized counter variables to keep track of the number of items stored and the amount of memory consumed. These counters can cause scalability problems (especially on big NUMA systems). This commit adds an implementation of a decentralized counter and modifies the implementation of ETS so that ETS tables of type ordered_set with write_concurrency enabled use the decentralized counter. [Experiments][1] indicate that this change substantially improves the scalability of ETS ordered_set tables with write_concurrency enabled in scenarios with frequent `ets:insert/2` and `ets:delete/2` calls. The new counter is implemented in the module erts_flxctr (`erts_flxctr.h` and `erts_flxctr.c`). The module has the suffix flxctr as it contains the implementation of a flexible counter (i.e., counter instances can be configured to be either centralized or decentralized). Counters that are configured to be centralized are implemented with a single counter variable which is modified with atomic operations. Decentralized counters are spread over several cache lines (how many can be configured with the parameter `+dcg`). The scheduler threads are mapped to cache lines so that there is no single point of contention when decentralized counters are updated. The thread progress functionality of the Erlang VM is utilized to implement support for linearizable snapshots of decentralized counters. The snapshot functionality is used by the `ets:info/1` and `ets:info/2` functions. [1]: http://winsh.me/ets_catree_benchmark/flxctr_res.html
Diffstat (limited to 'erts/emulator/beam/erl_cpu_topology.c')
-rw-r--r--erts/emulator/beam/erl_cpu_topology.c41
1 files changed, 39 insertions, 2 deletions
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
index 6f8d2f8c35..6a4f43297e 100644
--- a/erts/emulator/beam/erl_cpu_topology.c
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -34,6 +34,7 @@
#include "error.h"
#include "bif.h"
#include "erl_cpu_topology.h"
+#include "erl_flxctr.h"
#define ERTS_MAX_READER_GROUPS 64
@@ -58,6 +59,7 @@ static erts_cpu_info_t *cpuinfo;
static int max_main_threads;
static int reader_groups;
+static int decentralized_counter_groups;
static ErtsCpuBindData *scheduler2cpu_map;
static erts_rwmtx_t cpuinfo_rwmtx;
@@ -127,6 +129,8 @@ static erts_cpu_groups_map_t *cpu_groups_maps;
static erts_cpu_groups_map_t *reader_groups_map;
+static erts_cpu_groups_map_t *decentralized_counter_groups_map;
+
#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
@@ -138,6 +142,7 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static void flxctr_groups_callback(int, ErtsSchedulerData *, int, void *);
static erts_cpu_groups_map_t *add_cpu_groups(int groups,
erts_cpu_groups_callback_t callback,
void *arg);
@@ -1646,7 +1651,8 @@ erts_get_logical_processors(int *conf, int *onln, int *avail)
}
void
-erts_pre_early_init_cpu_topology(int *max_rg_p,
+erts_pre_early_init_cpu_topology(int *max_dcg_p,
+ int *max_rg_p,
int *conf_p,
int *onln_p,
int *avail_p)
@@ -1654,6 +1660,7 @@ erts_pre_early_init_cpu_topology(int *max_rg_p,
cpu_groups_maps = NULL;
no_cpu_groups_callbacks = 0;
*max_rg_p = ERTS_MAX_READER_GROUPS;
+ *max_dcg_p = ERTS_MAX_FLXCTR_GROUPS;
cpuinfo = erts_cpu_info_create();
get_logical_processors(conf_p, onln_p, avail_p);
}
@@ -1662,7 +1669,9 @@ void
erts_early_init_cpu_topology(int no_schedulers,
int *max_main_threads_p,
int max_reader_groups,
- int *reader_groups_p)
+ int *reader_groups_p,
+ int max_decentralized_counter_groups,
+ int *decentralized_counter_groups_p)
{
user_cpudata = NULL;
user_cpudata_size = 0;
@@ -1687,6 +1696,12 @@ erts_early_init_cpu_topology(int no_schedulers,
max_main_threads = no_schedulers;
*max_main_threads_p = max_main_threads;
+ decentralized_counter_groups = max_main_threads;
+ if (decentralized_counter_groups <= 1 || max_decentralized_counter_groups <= 1)
+ decentralized_counter_groups = 1;
+ if (decentralized_counter_groups > max_decentralized_counter_groups)
+ decentralized_counter_groups = max_decentralized_counter_groups;
+ *decentralized_counter_groups_p = decentralized_counter_groups;
reader_groups = max_main_threads;
if (reader_groups <= 1 || max_reader_groups <= 1)
reader_groups = 0;
@@ -1718,6 +1733,9 @@ erts_init_cpu_topology(void)
reader_groups_map = add_cpu_groups(reader_groups,
reader_groups_callback,
NULL);
+ decentralized_counter_groups_map = add_cpu_groups(decentralized_counter_groups,
+ flxctr_groups_callback,
+ NULL);
if (cpu_bind_order == ERTS_CPU_BIND_NONE)
erts_rwmtx_rwunlock(&cpuinfo_rwmtx);
@@ -1789,6 +1807,15 @@ reader_groups_callback(int suspending,
erts_rwmtx_set_reader_group(suspending ? 0 : group+1);
}
+void
+flxctr_groups_callback(int suspending,
+ ErtsSchedulerData *esdp,
+ int group,
+ void *unused)
+{
+ erts_flxctr_set_slot(suspending ? 0 : group+1);
+}
+
static Eterm get_cpu_groups_map(Process *c_p,
erts_cpu_groups_map_t *map,
int offset);
@@ -1821,6 +1848,16 @@ erts_get_reader_groups_map(Process *c_p)
return res;
}
+Eterm
+erts_get_decentralized_counter_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = get_cpu_groups_map(c_p, decentralized_counter_groups_map, 1);
+ erts_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
/*
* CPU groups
*/