aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_cpu_topology.c
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2010-11-18 14:28:51 +0100
committerRickard Green <[email protected]>2010-11-18 14:28:51 +0100
commit648d80a4fa2fb48bb185d2dfb40043476b297073 (patch)
tree946c9c985f91b1ff32c41560dd2924162e627666 /erts/emulator/beam/erl_cpu_topology.c
parent45fb0ee83f92b8686955cebae038b3f930bf59e9 (diff)
parentf4aa12fc5f5756d7574311cf66cd5ec8025df682 (diff)
downloadotp-648d80a4fa2fb48bb185d2dfb40043476b297073.tar.gz
otp-648d80a4fa2fb48bb185d2dfb40043476b297073.tar.bz2
otp-648d80a4fa2fb48bb185d2dfb40043476b297073.zip
Merge branch 'rickard/cpu-groups/OTP-8861' into dev
* rickard/cpu-groups/OTP-8861: Generalize reader groups Move cpu topology functionality into erl_cpu_topology.[ch] Do not use more reader groups for schedulers than schedulers Conflicts: erts/emulator/beam/erl_init.c
Diffstat (limited to 'erts/emulator/beam/erl_cpu_topology.c')
-rw-r--r--erts/emulator/beam/erl_cpu_topology.c2359
1 files changed, 2359 insertions, 0 deletions
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
new file mode 100644
index 0000000000..db95c4a5d4
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -0,0 +1,2359 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: CPU topology and related functionality
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ctype.h>
+
+#include "global.h"
+#include "error.h"
+#include "bif.h"
+#include "erl_cpu_topology.h"
+
+#define ERTS_MAX_READER_GROUPS 8
+
+/*
+ * Cpu topology hierarchy.
+ */
+#define ERTS_TOPOLOGY_NODE 0
+#define ERTS_TOPOLOGY_PROCESSOR 1
+#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
+#define ERTS_TOPOLOGY_CORE 3
+#define ERTS_TOPOLOGY_THREAD 4
+#define ERTS_TOPOLOGY_LOGICAL 5
+
+#define ERTS_TOPOLOGY_MAX_DEPTH 6
+
+typedef struct {
+ int bind_id;
+ int bound_id;
+} ErtsCpuBindData;
+
+static erts_cpu_info_t *cpuinfo;
+
+static int max_main_threads;
+static int reader_groups;
+
+static ErtsCpuBindData *scheduler2cpu_map;
+static erts_smp_rwmtx_t cpuinfo_rwmtx;
+
+typedef enum {
+ ERTS_CPU_BIND_UNDEFINED,
+ ERTS_CPU_BIND_SPREAD,
+ ERTS_CPU_BIND_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_THREAD_SPREAD,
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
+ ERTS_CPU_BIND_NO_SPREAD,
+ ERTS_CPU_BIND_NONE
+} ErtsCpuBindOrder;
+
+#define ERTS_CPU_BIND_DEFAULT_BIND \
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+
+static int no_cpu_groups_callbacks;
+static ErtsCpuBindOrder cpu_bind_order;
+
+static erts_cpu_topology_t *user_cpudata;
+static int user_cpudata_size;
+static erts_cpu_topology_t *system_cpudata;
+static int system_cpudata_size;
+
+typedef struct {
+ int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
+} erts_avail_cput;
+
+typedef struct {
+ int id;
+ int sub_levels;
+ int cpu_groups;
+} erts_cpu_groups_count_t;
+
+typedef struct {
+ int logical;
+ int cpu_group;
+} erts_cpu_groups_map_array_t;
+
+typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
+struct erts_cpu_groups_callback_list_t_ {
+ erts_cpu_groups_callback_list_t *next;
+ erts_cpu_groups_callback_t callback;
+ void *arg;
+};
+
+typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
+struct erts_cpu_groups_map_t_ {
+ erts_cpu_groups_map_t *next;
+ int groups;
+ erts_cpu_groups_map_array_t *array;
+ int size;
+ int logical_processors;
+ erts_cpu_groups_callback_list_t *callback_list;
+};
+
+typedef struct {
+ erts_cpu_groups_callback_t callback;
+ int ix;
+ void *arg;
+} erts_cpu_groups_callback_call_t;
+
+static erts_cpu_groups_map_t *cpu_groups_maps;
+
+static erts_cpu_groups_map_t *reader_groups_map;
+
+#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
+
+#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
+
+#ifdef ERTS_SMP
+static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq);
+static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
+#endif
+
+static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static erts_cpu_groups_map_t *add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg);
+static void update_cpu_groups_maps(void);
+static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
+static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
+ ErtsSchedulerData *esdp);
+
+static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
+ int *cpudata_size);
+static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
+
+static int
+int_cmp(const void *vx, const void *vy)
+{
+ return *((int *) vx) - *((int *) vy);
+}
+
+static int
+cpu_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ return 0;
+}
+
+static int
+cpu_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_no_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+static ERTS_INLINE void
+make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
+{
+ int ix;
+ int node = -1;
+ int processor = -1;
+ int processor_node = -1;
+ int processor_node_node = -1;
+ int core = -1;
+ int thread = -1;
+ int old_node = -1;
+ int old_processor = -1;
+ int old_processor_node = -1;
+ int old_core = -1;
+ int old_thread = -1;
+
+ for (ix = 0; ix < size; ix++) {
+ if (!no_node || cpudata[ix].node >= 0) {
+ if (old_node == cpudata[ix].node)
+ cpudata[ix].node = node;
+ else {
+ old_node = cpudata[ix].node;
+ old_processor = processor = -1;
+ if (!no_node)
+ old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node || cpudata[ix].node >= 0)
+ cpudata[ix].node = ++node;
+ }
+ }
+ if (old_processor == cpudata[ix].processor)
+ cpudata[ix].processor = processor;
+ else {
+ old_processor = cpudata[ix].processor;
+ if (!no_node)
+ processor_node_node = old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ cpudata[ix].processor = ++processor;
+ }
+ if (no_node && cpudata[ix].processor_node < 0)
+ old_processor_node = -1;
+ else {
+ if (old_processor_node == cpudata[ix].processor_node) {
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = node;
+ else {
+ if (processor_node_node >= 0)
+ cpudata[ix].node = processor_node_node;
+ cpudata[ix].processor_node = processor_node;
+ }
+ }
+ else {
+ old_processor_node = cpudata[ix].processor_node;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = ++node;
+ else {
+ cpudata[ix].node = processor_node_node = ++node;
+ cpudata[ix].processor_node = ++processor_node;
+ }
+ }
+ }
+ if (!no_node && cpudata[ix].processor_node < 0)
+ cpudata[ix].processor_node = 0;
+ if (old_core == cpudata[ix].core)
+ cpudata[ix].core = core;
+ else {
+ old_core = cpudata[ix].core;
+ old_thread = thread = -1;
+ cpudata[ix].core = ++core;
+ }
+ if (old_thread == cpudata[ix].thread)
+ cpudata[ix].thread = thread;
+ else
+ old_thread = cpudata[ix].thread = ++thread;
+ }
+}
+
+static void
+cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq)
+{
+ if (size > 1) {
+ int no_node = 0;
+ int (*cmp_func)(const void *, const void *);
+ switch (bind_order) {
+ case ERTS_CPU_BIND_SPREAD:
+ cmp_func = cpu_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD:
+ cmp_func = cpu_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_SPREAD:
+ cmp_func = cpu_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_SPREAD:
+ cmp_func = cpu_no_spread_order_cmp;
+ break;
+ default:
+ cmp_func = NULL;
+ erl_exit(ERTS_ABORT_EXIT,
+ "Bad cpu bind type: %d\n",
+ (int) cpu_bind_order);
+ break;
+ }
+
+ if (mk_seq)
+ make_cpudata_id_seq(cpudata, size, no_node);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
+ }
+}
+
+static int
+processor_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
+{
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+ int cgcc_ix;
+
+ /* Unbind from cpu */
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0
+ && erts_unbind_from_cpu(cpuinfo) == 0) {
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ }
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(1,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(0, 0);
+
+}
+
+void
+erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
+{
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+
+ /* Make sure we check if we should bind to a cpu or not... */
+ if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
+ else
+ esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+}
+
+#endif
+
+void
+erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int res, cpu_id, cgcc_ix;
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+#ifdef ERTS_SMP
+ if (erts_common_run_queue)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
+ else {
+ esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ }
+#endif
+ erts_smp_runq_unlock(esdp->run_queue);
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ cpu_id = scheduler2cpu_map[esdp->no].bind_id;
+ if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
+ res = erts_bind_to_cpu(cpuinfo, cpu_id);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
+ else {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0)
+ goto unbind;
+ }
+ }
+ else if (cpu_id < 0) {
+ unbind:
+ /* Get rid of old binding */
+ res = erts_unbind_from_cpu(cpuinfo);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ else if (res != -ENOTSUP) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+ }
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(0,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ erts_smp_runq_lock(esdp->run_queue);
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int cgcc_ix;
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(0,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+}
+#endif
+
+static void
+write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
+{
+ int s_ix = 1;
+ int cpu_ix;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
+
+ cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
+
+ for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
+ if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
+ scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
+ }
+
+ if (s_ix <= erts_no_schedulers)
+ for (; s_ix <= erts_no_schedulers; s_ix++)
+ scheduler2cpu_map[s_ix].bind_id = -1;
+}
+
+int
+erts_init_scheduler_bind_type_string(char *how)
+{
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
+ return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
+
+ if (!system_cpudata && !user_cpudata)
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
+
+ if (sys_strcmp(how, "db") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (sys_strcmp(how, "s") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (sys_strcmp(how, "ps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "ts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (sys_strcmp(how, "tnnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (sys_strcmp(how, "ns") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (sys_strcmp(how, "u") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
+
+ return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
+}
+
+static Eterm
+bound_schedulers_term(ErtsCpuBindOrder order)
+{
+ switch (order) {
+ case ERTS_CPU_BIND_SPREAD: {
+ ERTS_DECL_AM(spread);
+ return AM_spread;
+ }
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(processor_spread);
+ return AM_processor_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_SPREAD: {
+ ERTS_DECL_AM(thread_spread);
+ return AM_thread_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(thread_no_node_processor_spread);
+ return AM_thread_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(no_node_processor_spread);
+ return AM_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
+ ERTS_DECL_AM(no_node_thread_spread);
+ return AM_no_node_thread_spread;
+ }
+ case ERTS_CPU_BIND_NO_SPREAD: {
+ ERTS_DECL_AM(no_spread);
+ return AM_no_spread;
+ }
+ case ERTS_CPU_BIND_NONE: {
+ ERTS_DECL_AM(unbound);
+ return AM_unbound;
+ }
+ default:
+ ASSERT(0);
+ return THE_NON_VALUE;
+ }
+}
+
+Eterm
+erts_bound_schedulers_term(Process *c_p)
+{
+ ErtsCpuBindOrder order;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ order = cpu_bind_order;
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return bound_schedulers_term(order);
+}
+
+Eterm
+erts_bind_schedulers(Process *c_p, Eterm how)
+{
+ int notify = 0;
+ Eterm res;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ ErtsCpuBindOrder old_cpu_bind_order;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
+ ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
+ }
+ else {
+
+ old_cpu_bind_order = cpu_bind_order;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+
+ if (!cpudata) {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ notify = 1;
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ res = bound_schedulers_term(old_cpu_bind_order);
+ }
+
+ done:
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ if (notify)
+ erts_sched_notify_check_cpu_bind();
+
+ return res;
+}
+
+int
+erts_sched_bind_atthrcreate_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ return esdp != NULL && erts_is_scheduler_bound(esdp);
+}
+
+int
+erts_sched_bind_atthrcreate_child(int unbind)
+{
+ int res = 0;
+ if (unbind) {
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = erts_unbind_from_cpu(cpuinfo);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ }
+ return res;
+}
+
+void
+erts_sched_bind_atthrcreate_parent(int unbind)
+{
+
+}
+
+int
+erts_sched_bind_atfork_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
+ if (unbind)
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ return unbind;
+}
+
+int
+erts_sched_bind_atfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_unbind_from_cpu(cpuinfo);
+ }
+ return 0;
+}
+
+char *
+erts_sched_bind_atvfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_get_unbind_from_cpu_str(cpuinfo);
+ }
+ return "false";
+}
+
+void
+erts_sched_bind_atfork_parent(int unbind)
+{
+ if (unbind)
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+Eterm
+erts_fake_scheduler_bindings(Process *p, Eterm how)
+{
+ ErtsCpuBindOrder fake_cpu_bind_order;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ Eterm res;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ ERTS_BIF_PREP_ERROR(res, p, BADARG);
+ return res;
+ }
+
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+ if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
+ ERTS_BIF_PREP_RET(res, am_false);
+ else {
+ int i;
+ Eterm *hp;
+
+ cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
+
+#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
+
+ erts_fprintf(stderr, "node: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].node);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "processor: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor);
+ erts_fprintf(stderr, "\n");
+ if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
+ erts_fprintf(stderr, "processor_node:");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
+ erts_fprintf(stderr, "\n");
+ }
+ erts_fprintf(stderr, "core: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].core);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "thread: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].thread);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "logical: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].logical);
+ erts_fprintf(stderr, "\n");
+#endif
+
+ hp = HAlloc(p, cpudata_size+1);
+ ERTS_BIF_PREP_RET(res, make_tuple(hp));
+ *hp++ = make_arityval((Uint) cpudata_size);
+ for (i = 0; i < cpudata_size; i++)
+ *hp++ = make_small((Uint) cpudata[i].logical);
+ }
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_schedulers_binds(Process *c_p)
+{
+ int ix;
+ ERTS_DECL_AM(unbound);
+ Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
+ Eterm res = make_tuple(hp);
+
+ *(hp++) = make_arityval(erts_no_schedulers);
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ for (ix = 1; ix <= erts_no_schedulers; ix++)
+ *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
+ ? make_small(scheduler2cpu_map[ix].bound_id)
+ : AM_unbound);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+/*
+ * CPU topology
+ */
+
+typedef struct {
+ int *id;
+ int used;
+ int size;
+} ErtsCpuTopIdSeq;
+
+typedef struct {
+ ErtsCpuTopIdSeq logical;
+ ErtsCpuTopIdSeq thread;
+ ErtsCpuTopIdSeq core;
+ ErtsCpuTopIdSeq processor_node;
+ ErtsCpuTopIdSeq processor;
+ ErtsCpuTopIdSeq node;
+} ErtsCpuTopEntry;
+
+static void
+init_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ int size = 10;
+ cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->logical.size = size;
+ cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->thread.size = size;
+ cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->core.size = size;
+ cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor_node.size = size;
+ cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor.size = size;
+ cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->node.size = size;
+}
+
+static void
+destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
+}
+
+static int
+get_cput_value_or_range(int *v, int *vr, char **str)
+{
+ long l;
+ char *c = *str;
+ errno = 0;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ *v = (int) l;
+ if (*c == '-') {
+ c++;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ *vr = (int) l;
+ }
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
+{
+ int ix = 0;
+ int need_size = 0;
+ char *c = *str;
+
+ while (1) {
+ int res;
+ int val;
+ int nids;
+ int val_range = -1;
+ res = get_cput_value_or_range(&val, &val_range, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ if (val_range < 0 || val_range == val)
+ nids = 1;
+ else {
+ if (val_range > val)
+ nids = val_range - val + 1;
+ else
+ nids = val - val_range + 1;
+ }
+ need_size += nids;
+ if (need_size > idseq->size) {
+ idseq->size = need_size + 10;
+ idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
+ idseq->id,
+ sizeof(int)*idseq->size);
+ }
+ if (nids == 1)
+ idseq->id[ix++] = val;
+ else if (val_range > val) {
+ for (; val <= val_range; val++)
+ idseq->id[ix++] = val;
+ }
+ else {
+ for (; val >= val_range; val--)
+ idseq->id[ix++] = val;
+ }
+ if (*c != ',')
+ break;
+ c++;
+ }
+ *str = c;
+ idseq->used = ix;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_entry(ErtsCpuTopEntry *cput, char **str)
+{
+ int h;
+ char *c = *str;
+
+ cput->logical.used = 0;
+ cput->thread.id[0] = 0;
+ cput->thread.used = 1;
+ cput->core.id[0] = 0;
+ cput->core.used = 1;
+ cput->processor_node.id[0] = -1;
+ cput->processor_node.used = 1;
+ cput->processor.id[0] = 0;
+ cput->processor.used = 1;
+ cput->node.id[0] = -1;
+ cput->node.used = 1;
+
+ h = ERTS_TOPOLOGY_MAX_DEPTH;
+ while (*c != ':' && *c != '\0') {
+ int res;
+ ErtsCpuTopIdSeq *idseqp;
+ switch (*c++) {
+ case 'L':
+ if (h <= ERTS_TOPOLOGY_LOGICAL)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->logical;
+ h = ERTS_TOPOLOGY_LOGICAL;
+ break;
+ case 't':
+ case 'T':
+ if (h <= ERTS_TOPOLOGY_THREAD)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->thread;
+ h = ERTS_TOPOLOGY_THREAD;
+ break;
+ case 'c':
+ case 'C':
+ if (h <= ERTS_TOPOLOGY_CORE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->core;
+ h = ERTS_TOPOLOGY_CORE;
+ break;
+ case 'p':
+ case 'P':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor;
+ h = ERTS_TOPOLOGY_PROCESSOR;
+ break;
+ case 'n':
+ case 'N':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR) {
+ do_node:
+ if (h <= ERTS_TOPOLOGY_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->node;
+ h = ERTS_TOPOLOGY_NODE;
+ }
+ else {
+ int p_node = 0;
+ char *p_chk = c;
+ while (*p_chk != '\0' && *p_chk != ':') {
+ if (*p_chk == 'p' || *p_chk == 'P') {
+ p_node = 1;
+ break;
+ }
+ p_chk++;
+ }
+ if (!p_node)
+ goto do_node;
+ if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor_node;
+ h = ERTS_TOPOLOGY_PROCESSOR_NODE;
+ }
+ break;
+ default:
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
+ }
+ res = get_cput_id_seq(idseqp, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ }
+
+ if (cput->logical.used < 1)
+ return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
+
+ if (*c == ':') {
+ c++;
+ }
+
+ if (cput->thread.used != 1
+ && cput->thread.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->core.used != 1
+ && cput->core.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor_node.used != 1
+ && cput->processor_node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor.used != 1
+ && cput->processor.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->node.used != 1
+ && cput->node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+verify_topology(erts_cpu_topology_t *cpudata, int size)
+{
+ if (size > 0) {
+ int *logical;
+ int node, processor, no_nodes, i;
+
+ /* Verify logical ids */
+ logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
+
+ for (i = 0; i < size; i++)
+ logical[i] = cpudata[i].logical;
+
+ qsort(logical, size, sizeof(int), int_cmp);
+ for (i = 0; i < size-1; i++) {
+ if (logical[i] == logical[i+1]) {
+ erts_free(ERTS_ALC_T_TMP, logical);
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
+ }
+ }
+
+ erts_free(ERTS_ALC_T_TMP, logical);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
+
+ /* Verify unique entities */
+
+ for (i = 1; i < size; i++) {
+ if (cpudata[i-1].processor == cpudata[i].processor
+ && cpudata[i-1].node == cpudata[i].node
+ && (cpudata[i-1].processor_node
+ == cpudata[i].processor_node)
+ && cpudata[i-1].core == cpudata[i].core
+ && cpudata[i-1].thread == cpudata[i].thread) {
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
+ }
+ }
+
+ /* Verify numa nodes */
+ node = cpudata[0].node;
+ processor = cpudata[0].processor;
+ no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
+ for (i = 1; i < size; i++) {
+ if (no_nodes) {
+ if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ else {
+ if (cpudata[i].processor == processor && cpudata[i].node != node)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ node = cpudata[i].node;
+ processor = cpudata[i].processor;
+ if (node >= 0 && cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ if (node < 0 && cpudata[i].processor_node < 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ }
+ }
+
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+int
+erts_init_cpu_topology_string(char *topology_str)
+{
+ ErtsCpuTopEntry cput;
+ int need_size;
+ char *c;
+ int ix;
+ int error = ERTS_INIT_CPU_TOPOLOGY_OK;
+
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 10;
+
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+
+ init_cpu_top_entry(&cput);
+
+ ix = 0;
+ need_size = 0;
+
+ c = topology_str;
+ if (*c == '\0') {
+ error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
+ goto fail;
+ }
+ do {
+ int r;
+ error = get_cput_entry(&cput, &c);
+ if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
+ goto fail;
+ need_size += cput.logical.used;
+ if (user_cpudata_size < need_size) {
+ user_cpudata_size = need_size + 10;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ ASSERT(cput.thread.used == 1
+ || cput.thread.used == cput.logical.used);
+ ASSERT(cput.core.used == 1
+ || cput.core.used == cput.logical.used);
+ ASSERT(cput.processor_node.used == 1
+ || cput.processor_node.used == cput.logical.used);
+ ASSERT(cput.processor.used == 1
+ || cput.processor.used == cput.logical.used);
+ ASSERT(cput.node.used == 1
+ || cput.node.used == cput.logical.used);
+
+ for (r = 0; r < cput.logical.used; r++) {
+ user_cpudata[ix].logical = cput.logical.id[r];
+ user_cpudata[ix].thread =
+ cput.thread.id[cput.thread.used == 1 ? 0 : r];
+ user_cpudata[ix].core =
+ cput.core.id[cput.core.used == 1 ? 0 : r];
+ user_cpudata[ix].processor_node =
+ cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
+ user_cpudata[ix].processor =
+ cput.processor.id[cput.processor.used == 1 ? 0 : r];
+ user_cpudata[ix].node =
+ cput.node.id[cput.node.used == 1 ? 0 : r];
+ ix++;
+ }
+ } while (*c != '\0');
+
+ if (user_cpudata_size != ix) {
+ user_cpudata_size = ix;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ error = verify_topology(user_cpudata, user_cpudata_size);
+ if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
+ destroy_cpu_top_entry(&cput);
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+ }
+
+ fail:
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 0;
+ destroy_cpu_top_entry(&cput);
+ return error;
+}
+
+#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
+#define ERTS_GET_USED_CPU_TOPOLOGY 0
+#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
+#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
+
+static Eterm get_cpu_topology_term(Process *c_p, int type);
+
+Eterm
+erts_set_cpu_topology(Process *c_p, Eterm term)
+{
+ erts_cpu_topology_t *cpudata = NULL;
+ int cpudata_size = 0;
+ Eterm res;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
+ if (term == am_undefined) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
+ cpudata_size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+ }
+ else if (is_not_list(term)) {
+ error:
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ res = THE_NON_VALUE;
+ goto done;
+ }
+ else {
+ Eterm list = term;
+ int ix = 0;
+
+ cpudata_size = 100;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ while (is_list(list)) {
+ Eterm *lp = list_val(list);
+ Eterm cpu = CAR(lp);
+ Eterm* tp;
+ Sint id;
+
+ if (is_not_tuple(cpu))
+ goto error;
+
+ tp = tuple_val(cpu);
+
+ if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
+ goto error;
+
+ if (ix >= cpudata_size) {
+ cpudata_size += 100;
+ cpudata = erts_realloc(ERTS_ALC_T_TMP,
+ cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+ }
+
+ id = signed_val(tp[2]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].node = (int) id;
+
+ id = signed_val(tp[3]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor = (int) id;
+
+ id = signed_val(tp[4]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor_node = (int) id;
+
+ id = signed_val(tp[5]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].core = (int) id;
+
+ id = signed_val(tp[6]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].thread = (int) id;
+
+ id = signed_val(tp[7]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].logical = (int) id;
+
+ list = CDR(lp);
+ ix++;
+ }
+
+ if (is_not_nil(list))
+ goto error;
+
+ cpudata_size = ix;
+
+ if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
+ goto error;
+
+ if (user_cpudata_size != cpudata_size) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ user_cpudata_size = cpudata_size;
+ }
+
+ sys_memcpy((void *) user_cpudata,
+ (void *) cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+
+ update_cpu_groups_maps();
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+
+ done:
+
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+static void
+create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
+{
+ if (user_cpudata) {
+ *cpudata_size = user_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) user_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else if (system_cpudata) {
+ *cpudata_size = system_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else {
+ *cpudata = NULL;
+ *cpudata_size = 0;
+ }
+}
+
+static void
+destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
+{
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+}
+
+
+static Eterm
+bld_topology_term(Eterm **hpp,
+ Uint *hszp,
+ erts_cpu_topology_t *cpudata,
+ int size)
+{
+ Eterm res = NIL;
+ int i;
+
+ if (size == 0)
+ return am_undefined;
+
+ for (i = size-1; i >= 0; i--) {
+ res = erts_bld_cons(hpp,
+ hszp,
+ erts_bld_tuple(hpp,
+ hszp,
+ 7,
+ am_cpu,
+ make_small(cpudata[i].node),
+ make_small(cpudata[i].processor),
+ make_small(cpudata[i].processor_node),
+ make_small(cpudata[i].core),
+ make_small(cpudata[i].thread),
+ make_small(cpudata[i].logical)),
+ res);
+ }
+ return res;
+}
+
+static Eterm
+get_cpu_topology_term(Process *c_p, int type)
+{
+#ifdef DEBUG
+ Eterm *hp_end;
+#endif
+ Eterm *hp;
+ Uint hsz;
+ Eterm res = THE_NON_VALUE;
+ erts_cpu_topology_t *cpudata = NULL;
+ int size = 0;
+
+ switch (type) {
+ case ERTS_GET_USED_CPU_TOPOLOGY:
+ if (user_cpudata)
+ goto defined;
+ else
+ goto detected;
+ case ERTS_GET_DETECTED_CPU_TOPOLOGY:
+ detected:
+ if (!system_cpudata)
+ res = am_undefined;
+ else {
+ size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * size));
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*size);
+ }
+ break;
+ case ERTS_GET_DEFINED_CPU_TOPOLOGY:
+ defined:
+ if (!user_cpudata)
+ res = am_undefined;
+ else {
+ size = user_cpudata_size;
+ cpudata = user_cpudata;
+ }
+ break;
+ default:
+ erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
+ break;
+ }
+
+ if (res == am_undefined) {
+ ASSERT(!cpudata);
+ return res;
+ }
+
+ hsz = 0;
+
+ bld_topology_term(NULL, &hsz,
+ cpudata, size);
+
+ hp = HAlloc(c_p, hsz);
+
+#ifdef DEBUG
+ hp_end = hp + hsz;
+#endif
+
+ res = bld_topology_term(&hp, NULL,
+ cpudata, size);
+
+ ASSERT(hp_end == hp);
+
+ if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_cpu_topology_term(Process *c_p, Eterm which)
+{
+ Eterm res;
+ int type;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ if (ERTS_IS_ATOM_STR("used", which))
+ type = ERTS_GET_USED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("detected", which))
+ type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("defined", which))
+ type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
+ else
+ type = ERTS_GET_CPU_TOPOLOGY_ERROR;
+ if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
+ res = THE_NON_VALUE;
+ else
+ res = get_cpu_topology_term(c_p, type);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+static void
+get_logical_processors(int *conf, int *onln, int *avail)
+{
+ if (conf)
+ *conf = erts_get_cpu_configured(cpuinfo);
+ if (onln)
+ *onln = erts_get_cpu_online(cpuinfo);
+ if (avail)
+ *avail = erts_get_cpu_available(cpuinfo);
+}
+
+void
+erts_get_logical_processors(int *conf, int *onln, int *avail)
+{
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ get_logical_processors(conf, onln, avail);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+void
+erts_pre_early_init_cpu_topology(int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p)
+{
+ cpu_groups_maps = NULL;
+ no_cpu_groups_callbacks = 0;
+ *max_rg_p = ERTS_MAX_READER_GROUPS;
+ cpuinfo = erts_cpu_info_create();
+ get_logical_processors(conf_p, onln_p, avail_p);
+}
+
+void
+erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p)
+{
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
+ system_cpudata_size)) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+
+ max_main_threads = erts_get_cpu_configured(cpuinfo);
+ if (max_main_threads > no_schedulers)
+ max_main_threads = no_schedulers;
+ *max_main_threads_p = max_main_threads;
+
+ reader_groups = max_main_threads;
+ if (reader_groups <= 1 || max_reader_groups <= 1)
+ reader_groups = 0;
+ if (reader_groups > max_reader_groups)
+ reader_groups = max_reader_groups;
+ *reader_groups_p = reader_groups;
+}
+
+void
+erts_init_cpu_topology(void)
+{
+ int ix;
+
+ erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+ scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(ErtsCpuBindData)
+ * (erts_no_schedulers+1)));
+ for (ix = 1; ix <= erts_no_schedulers; ix++) {
+ scheduler2cpu_map[ix].bind_id = -1;
+ scheduler2cpu_map[ix].bound_id = -1;
+ }
+
+ if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
+ int ncpus = erts_get_cpu_configured(cpuinfo);
+ if (ncpus < 1 || erts_no_schedulers < ncpus)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ cpu_bind_order = ((system_cpudata || user_cpudata)
+ && (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP)
+ ? ERTS_CPU_BIND_DEFAULT_BIND
+ : ERTS_CPU_BIND_NONE);
+ }
+
+ reader_groups_map = add_cpu_groups(reader_groups,
+ reader_groups_callback,
+ NULL);
+
+ if (cpu_bind_order == ERTS_CPU_BIND_NONE)
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ else {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+}
+
+int
+erts_update_cpu_info(void)
+{
+ int changed;
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ changed = erts_cpu_info_update(cpuinfo);
+ if (changed) {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+
+ if (system_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ if (!system_cpudata_size)
+ system_cpudata = NULL;
+ else {
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || (ERTS_INIT_CPU_TOPOLOGY_OK
+ != verify_topology(system_cpudata,
+ system_cpudata_size))) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+ }
+
+ update_cpu_groups_maps();
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ if (changed)
+ erts_sched_notify_check_cpu_bind();
+ return changed;
+}
+
+/*
+ * reader groups map
+ */
+
+void
+reader_groups_callback(int suspending,
+ ErtsSchedulerData *esdp,
+ int group,
+ void *unused)
+{
+ if (reader_groups && esdp->no <= max_main_threads)
+ erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1);
+}
+
+static Eterm get_cpu_groups_map(Process *c_p,
+ erts_cpu_groups_map_t *map,
+ int offset);
+Eterm
+erts_debug_reader_groups_map(Process *c_p, int groups)
+{
+ Eterm res;
+ erts_cpu_groups_map_t test;
+
+ test.array = NULL;
+ test.groups = groups;
+ make_cpu_groups_map(&test, 1);
+ if (!test.array)
+ res = NIL;
+ else {
+ res = get_cpu_groups_map(c_p, &test, 1);
+ erts_free(ERTS_ALC_T_TMP, test.array);
+ }
+ return res;
+}
+
+
+Eterm
+erts_get_reader_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = get_cpu_groups_map(c_p, reader_groups_map, 1);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+/*
+ * CPU groups
+ */
+
+static Eterm
+get_cpu_groups_map(Process *c_p,
+ erts_cpu_groups_map_t *map,
+ int offset)
+{
+#ifdef DEBUG
+ Eterm *endp;
+#endif
+ Eterm res = NIL, tuple;
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(c_p, map->logical_processors*(2+3));
+#ifdef DEBUG
+ endp = hp + map->logical_processors*(2+3);
+#endif
+ for (i = map->size - 1; i >= 0; i--) {
+ if (map->array[i].logical >= 0) {
+ tuple = TUPLE2(hp,
+ make_small(map->array[i].logical),
+ make_small(map->array[i].cpu_group + offset));
+ hp += 3;
+ res = CONS(hp, tuple, res);
+ hp += 2;
+ }
+ }
+ ASSERT(hp == endp);
+ return res;
+}
+
+static void
+make_available_cpu_topology(erts_avail_cput *no,
+ erts_avail_cput *avail,
+ erts_cpu_topology_t *cpudata,
+ int *size,
+ int test)
+{
+ int len = *size;
+ erts_cpu_topology_t last;
+ int a, i, j;
+
+ no->level[ERTS_TOPOLOGY_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_CORE] = -1;
+ no->level[ERTS_TOPOLOGY_THREAD] = -1;
+ no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
+
+ last.node = INT_MIN;
+ last.processor = INT_MIN;
+ last.processor_node = INT_MIN;
+ last.core = INT_MIN;
+ last.thread = INT_MIN;
+ last.logical = INT_MIN;
+
+ a = 0;
+
+ for (i = 0; i < len; i++) {
+
+ if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
+ continue;
+
+ if (last.node != cpudata[i].node)
+ goto node;
+ if (last.processor != cpudata[i].processor)
+ goto processor;
+ if (last.processor_node != cpudata[i].processor_node)
+ goto processor_node;
+ if (last.core != cpudata[i].core)
+ goto core;
+ ASSERT(last.thread != cpudata[i].thread);
+ goto thread;
+
+ node:
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ processor:
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ processor_node:
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ core:
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ thread:
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
+ avail[a].level[j] = no->level[j];
+
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
+ avail[a].level[ERTS_TOPOLOGY_CG] = 0;
+
+ ASSERT(last.logical != cpudata[i].logical);
+
+ last = cpudata[i];
+ a++;
+ }
+
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ *size = a;
+}
+
+static void
+cpu_group_insert(erts_cpu_groups_map_t *map,
+ int logical, int cpu_group)
+{
+ int start = logical % map->size;
+ int ix = start;
+
+ do {
+ if (map->array[ix].logical < 0) {
+ map->array[ix].logical = logical;
+ map->array[ix].cpu_group = cpu_group;
+ return;
+ }
+ ix++;
+ if (ix == map->size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
+}
+
+
+static int
+sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int sub_level = level+1;
+ int last = -1;
+ cgc->sub_levels = 0;
+
+ do {
+ if (last != avail[aix].level[sub_level]) {
+ cgc->sub_levels++;
+ last = avail[aix].level[sub_level];
+ }
+ aix++;
+ }
+ while (aix < avail_sz && cgc->id == avail[aix].level[level]);
+ cgc->cpu_groups = 0;
+ return aix;
+}
+
+static int
+write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
+ int level, int a,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int cg = *cgp;
+ int sub_level = level+1;
+ int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
+ int xsl = cgcp->sub_levels % cgcp->cpu_groups;
+ int sls = 0;
+ int last = -1;
+ int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
+
+ ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
+
+ do {
+ if (last != avail[a].level[sub_level]) {
+ if (!sls) {
+ sls = sl_per_gr;
+ cg++;
+ if (cg >= xsl_cg_lim)
+ sls++;
+ }
+ last = avail[a].level[sub_level];
+ sls--;
+ }
+ avail[a].level[ERTS_TOPOLOGY_CG] = cg;
+ a++;
+ } while (a < avail_sz && (level < 0
+ || avail[a].level[level] == cgcp->id));
+
+ ASSERT(cgcp->cpu_groups == cg - *cgp);
+
+ *cgp = cg;
+
+ return a;
+}
+
+static int
+cg_count_sub_levels_compare(const void *vx, const void *vy)
+{
+ erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+ erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+ if (x->sub_levels != y->sub_levels)
+ return y->sub_levels - x->sub_levels;
+ return x->id - y->id;
+}
+
+static int
+cg_count_id_compare(const void *vx, const void *vy)
+{
+ erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+ erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+ return x->id - y->id;
+}
+
+static void
+make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
+{
+ int i, spread_level, avail_sz;
+ erts_avail_cput no, *avail;
+ erts_cpu_topology_t *cpudata;
+ ErtsAlcType_t alc_type = (test
+ ? ERTS_ALC_T_TMP
+ : ERTS_ALC_T_CPU_GRPS_MAP);
+
+ if (map->array)
+ erts_free(alc_type, map->array);
+
+ map->array = NULL;
+ map->logical_processors = 0;
+ map->size = 0;
+
+ if (!map->groups)
+ return;
+
+ create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
+
+ if (!cpudata)
+ return;
+
+ cpu_bind_order_sort(cpudata,
+ avail_sz,
+ ERTS_CPU_BIND_NO_SPREAD,
+ 1);
+
+ avail = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(erts_avail_cput)*avail_sz);
+
+ make_available_cpu_topology(&no, avail, cpudata,
+ &avail_sz, test);
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ map->size = avail_sz*2+1;
+
+ map->array = erts_alloc(alc_type,
+ (sizeof(erts_cpu_groups_map_array_t)
+ * map->size));;
+ map->logical_processors = avail_sz;
+
+ for (i = 0; i < map->size; i++) {
+ map->array[i].logical = -1;
+ map->array[i].cpu_group = -1;
+ }
+
+ spread_level = ERTS_TOPOLOGY_CORE;
+ for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
+ if (no.level[i] > map->groups) {
+ spread_level = i;
+ break;
+ }
+ }
+
+ if (no.level[spread_level] <= map->groups) {
+ int a, cg, last = -1;
+ cg = -1;
+ ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
+ for (a = 0; a < avail_sz; a++) {
+ if (last != avail[a].level[spread_level]) {
+ cg++;
+ last = avail[a].level[spread_level];
+ }
+ cpu_group_insert(map,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ cg);
+ }
+ }
+ else { /* map->groups < no.level[spread_level] */
+ erts_cpu_groups_count_t *cg_count;
+ int a, cg, tl, toplevels;
+
+ tl = spread_level-1;
+
+ if (spread_level == ERTS_TOPOLOGY_NODE)
+ toplevels = 1;
+ else
+ toplevels = no.level[tl];
+
+ cg_count = erts_alloc(ERTS_ALC_T_TMP,
+ toplevels*sizeof(erts_cpu_groups_count_t));
+
+ if (toplevels == 1) {
+ cg_count[0].id = 0;
+ cg_count[0].sub_levels = no.level[spread_level];
+ cg_count[0].cpu_groups = map->groups;
+ }
+ else {
+ int cgs_per_tl, cgs;
+ cgs = map->groups;
+ cgs_per_tl = cgs / toplevels;
+
+ a = 0;
+ for (i = 0; i < toplevels; i++) {
+ cg_count[i].id = avail[a].level[tl];
+ a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
+ }
+
+ qsort(cg_count,
+ toplevels,
+ sizeof(erts_cpu_groups_count_t),
+ cg_count_sub_levels_compare);
+
+ for (i = 0; i < toplevels; i++) {
+ if (cg_count[i].sub_levels < cgs_per_tl) {
+ cg_count[i].cpu_groups = cg_count[i].sub_levels;
+ cgs -= cg_count[i].sub_levels;
+ }
+ else {
+ cg_count[i].cpu_groups = cgs_per_tl;
+ cgs -= cgs_per_tl;
+ }
+ }
+
+ while (cgs > 0) {
+ for (i = 0; i < toplevels; i++) {
+ if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
+ break;
+ else {
+ cg_count[i].cpu_groups++;
+ if (--cgs == 0)
+ break;
+ }
+ }
+ }
+
+ qsort(cg_count,
+ toplevels,
+ sizeof(erts_cpu_groups_count_t),
+ cg_count_id_compare);
+ }
+
+ a = i = 0;
+ cg = -1;
+ while (a < avail_sz) {
+ a = write_cpu_groups(&cg, &cg_count[i], tl,
+ a, avail_sz, avail);
+ i++;
+ }
+
+ ASSERT(map->groups == cg + 1);
+
+ for (a = 0; a < avail_sz; a++)
+ cpu_group_insert(map,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ avail[a].level[ERTS_TOPOLOGY_CG]);
+
+ erts_free(ERTS_ALC_T_TMP, cg_count);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, avail);
+}
+
+static erts_cpu_groups_map_t *
+add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ int use_groups = groups;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_map_t *cgm;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (use_groups > max_main_threads)
+ use_groups = max_main_threads;
+
+ if (!use_groups)
+ return NULL;
+
+ no_cpu_groups_callbacks++;
+ cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+ sizeof(erts_cpu_groups_callback_list_t));
+ cgcl->callback = callback;
+ cgcl->arg = arg;
+
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ if (cgm->groups == use_groups) {
+ cgcl->next = cgm->callback_list;
+ cgm->callback_list = cgcl;
+ return cgm;
+ }
+ }
+
+
+ cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+ sizeof(erts_cpu_groups_map_t));
+ cgm->next = cpu_groups_maps;
+ cgm->groups = use_groups;
+ cgm->array = NULL;
+ cgm->size = 0;
+ cgm->logical_processors = 0;
+ cgm->callback_list = cgcl;
+
+ cgcl->next = NULL;
+
+ make_cpu_groups_map(cgm, 0);
+
+ cpu_groups_maps = cgm;
+
+ return cgm;
+}
+
+static void
+remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg)
+{
+ erts_cpu_groups_map_t *prev_cgm, *cgm;
+ erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ no_cpu_groups_callbacks--;
+
+ prev_cgm = NULL;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ prev_cgcl = NULL;
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ if (cgcl->callback == callback && cgcl->arg == arg) {
+ if (prev_cgcl)
+ prev_cgcl->next = cgcl->next;
+ else
+ cgm->callback_list = cgcl->next;
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl);
+ if (!cgm->callback_list) {
+ if (prev_cgm)
+ prev_cgm->next = cgm->next;
+ else
+ cpu_groups_maps = cgm->next;
+ if (cgm->array)
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array);
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm);
+ }
+ return;
+ }
+ prev_cgcl = cgcl;
+ }
+ prev_cgm = cgm;
+ }
+
+ erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n");
+}
+
+static int
+cpu_groups_lookup(erts_cpu_groups_map_t *map,
+ ErtsSchedulerData *esdp)
+{
+ int start, logical, ix;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (esdp->cpu_id < 0)
+ return (((int) esdp->no) - 1) % map->groups;
+
+ logical = esdp->cpu_id;
+ start = logical % map->size;
+ ix = start;
+
+ do {
+ if (map->array[ix].logical == logical) {
+ int group = map->array[ix].cpu_group;
+ ASSERT(0 <= group && group < map->groups);
+ return group;
+ }
+ ix++;
+ if (ix == map->size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+update_cpu_groups_maps(void)
+{
+ erts_cpu_groups_map_t *cgm;
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
+ make_cpu_groups_map(cgm, 0);
+}
+
+void
+erts_add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ add_cpu_groups(groups, callback, arg);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
+
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ remove_cpu_groups(callback, arg);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}