aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRickard Green <rickard@erlang.org>2010-09-23 10:17:29 +0200
committerRickard Green <rickard@erlang.org>2010-11-18 14:13:11 +0100
commit728b62363b9ec6248d14438f36adf03f7d737f89 (patch)
tree4dab7fb0582b89a26ebe1bed1a8c2b4b3e47b1c5
parentc4b78dcacf24f871cbd2ecc6375c3af2c229d299 (diff)
downloadotp-728b62363b9ec6248d14438f36adf03f7d737f89.tar.gz
otp-728b62363b9ec6248d14438f36adf03f7d737f89.tar.bz2
otp-728b62363b9ec6248d14438f36adf03f7d737f89.zip
Move cpu topology functionality into erl_cpu_topology.[ch]
-rw-r--r--erts/emulator/Makefile.in2
-rw-r--r--erts/emulator/beam/erl_bif_info.c13
-rw-r--r--erts/emulator/beam/erl_cpu_topology.c2162
-rw-r--r--erts/emulator/beam/erl_cpu_topology.h94
-rw-r--r--erts/emulator/beam/erl_init.c84
-rw-r--r--erts/emulator/beam/erl_lock_check.c2
-rw-r--r--erts/emulator/beam/erl_process.c2050
-rw-r--r--erts/emulator/beam/erl_process.h38
-rw-r--r--erts/emulator/beam/erl_process_lock.c4
-rw-r--r--erts/emulator/beam/erl_process_lock.h4
-rw-r--r--erts/emulator/beam/erl_threads.h3
-rw-r--r--erts/emulator/beam/global.h7
-rw-r--r--erts/emulator/beam/sys.h2
-rw-r--r--erts/emulator/sys/unix/sys.c28
-rw-r--r--erts/emulator/sys/win32/sys.c51
15 files changed, 2406 insertions, 2138 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 903abe6f5c..8707272f40 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -733,7 +733,7 @@ RUN_OBJS = \
$(OBJDIR)/erl_fun.o $(OBJDIR)/erl_bif_port.o \
$(OBJDIR)/erl_term.o $(OBJDIR)/erl_node_tables.o \
$(OBJDIR)/erl_monitors.o $(OBJDIR)/erl_process_dump.o \
- $(OBJDIR)/erl_bif_timer.o \
+ $(OBJDIR)/erl_bif_timer.o $(OBJDIR)/erl_cpu_topology.o \
$(OBJDIR)/erl_drv_thread.o $(OBJDIR)/erl_bif_chksum.o \
$(OBJDIR)/erl_bif_re.o $(OBJDIR)/erl_unicode.o \
$(OBJDIR)/packet_parser.o $(OBJDIR)/safe_hash.o \
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 40d8dc097c..4970cb406c 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -38,6 +38,7 @@
#include "erl_instrument.h"
#include "dist.h"
#include "erl_gc.h"
+#include "erl_cpu_topology.h"
#ifdef HIPE
#include "hipe_arch.h"
#endif
@@ -2345,9 +2346,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
/* Arguments that are unusual follow ... */
else if (ERTS_IS_ATOM_STR("logical_processors", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_configured(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(&no, NULL, NULL);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
@@ -2357,9 +2356,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
}
else if (ERTS_IS_ATOM_STR("logical_processors_online", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_online(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(NULL, &no, NULL);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
@@ -2369,9 +2366,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
}
else if (ERTS_IS_ATOM_STR("logical_processors_available", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_available(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(NULL, NULL, &no);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
new file mode 100644
index 0000000000..befab6f3b7
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -0,0 +1,2162 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: CPU topology and related functionality
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ctype.h>
+
+#include "global.h"
+#include "error.h"
+#include "bif.h"
+#include "erl_cpu_topology.h"
+
+#define ERTS_MAX_READER_GROUPS 8
+
+/*
+ * Cpu topology hierarchy.
+ */
+#define ERTS_TOPOLOGY_NODE 0
+#define ERTS_TOPOLOGY_PROCESSOR 1
+#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
+#define ERTS_TOPOLOGY_CORE 3
+#define ERTS_TOPOLOGY_THREAD 4
+#define ERTS_TOPOLOGY_LOGICAL 5
+
+#define ERTS_TOPOLOGY_MAX_DEPTH 6
+
+typedef struct {
+ int bind_id;
+ int bound_id;
+} ErtsCpuBindData;
+
+static erts_cpu_info_t *cpuinfo;
+
+static int max_main_threads;
+static int reader_groups;
+
+static ErtsCpuBindData *scheduler2cpu_map;
+static erts_smp_rwmtx_t cpuinfo_rwmtx;
+
+typedef enum {
+ ERTS_CPU_BIND_UNDEFINED,
+ ERTS_CPU_BIND_SPREAD,
+ ERTS_CPU_BIND_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_THREAD_SPREAD,
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
+ ERTS_CPU_BIND_NO_SPREAD,
+ ERTS_CPU_BIND_NONE
+} ErtsCpuBindOrder;
+
+#define ERTS_CPU_BIND_DEFAULT_BIND \
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+
+ErtsCpuBindOrder cpu_bind_order;
+
+static erts_cpu_topology_t *user_cpudata;
+static int user_cpudata_size;
+static erts_cpu_topology_t *system_cpudata;
+static int system_cpudata_size;
+
+typedef struct {
+ int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
+} erts_avail_cput;
+
+typedef struct {
+ int *map;
+ int size;
+ int groups;
+} erts_reader_groups_map_test;
+
+typedef struct {
+ int id;
+ int sub_levels;
+ int reader_groups;
+} erts_rg_count_t;
+
+typedef struct {
+ int logical;
+ int reader_group;
+} erts_reader_groups_map_t;
+
+typedef struct {
+ erts_reader_groups_map_t *map;
+ int map_size;
+ int logical_processors;
+ int groups;
+} erts_make_reader_groups_map_test;
+
+static int reader_groups_available_cpu_check;
+static int reader_groups_logical_processors;
+static int reader_groups_map_size;
+static erts_reader_groups_map_t *reader_groups_map;
+
+#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
+
+#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
+
+#ifdef ERTS_SMP
+static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq);
+static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
+#endif
+
+static void make_reader_groups_map(erts_make_reader_groups_map_test *test);
+static int reader_group_lookup(int logical);
+
+static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
+ int *cpudata_size);
+static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
+
+static int
+int_cmp(const void *vx, const void *vy)
+{
+ return *((int *) vx) - *((int *) vy);
+}
+
+static int
+cpu_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ return 0;
+}
+
+static int
+cpu_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_no_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+static ERTS_INLINE void
+make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
+{
+ int ix;
+ int node = -1;
+ int processor = -1;
+ int processor_node = -1;
+ int processor_node_node = -1;
+ int core = -1;
+ int thread = -1;
+ int old_node = -1;
+ int old_processor = -1;
+ int old_processor_node = -1;
+ int old_core = -1;
+ int old_thread = -1;
+
+ for (ix = 0; ix < size; ix++) {
+ if (!no_node || cpudata[ix].node >= 0) {
+ if (old_node == cpudata[ix].node)
+ cpudata[ix].node = node;
+ else {
+ old_node = cpudata[ix].node;
+ old_processor = processor = -1;
+ if (!no_node)
+ old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node || cpudata[ix].node >= 0)
+ cpudata[ix].node = ++node;
+ }
+ }
+ if (old_processor == cpudata[ix].processor)
+ cpudata[ix].processor = processor;
+ else {
+ old_processor = cpudata[ix].processor;
+ if (!no_node)
+ processor_node_node = old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ cpudata[ix].processor = ++processor;
+ }
+ if (no_node && cpudata[ix].processor_node < 0)
+ old_processor_node = -1;
+ else {
+ if (old_processor_node == cpudata[ix].processor_node) {
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = node;
+ else {
+ if (processor_node_node >= 0)
+ cpudata[ix].node = processor_node_node;
+ cpudata[ix].processor_node = processor_node;
+ }
+ }
+ else {
+ old_processor_node = cpudata[ix].processor_node;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = ++node;
+ else {
+ cpudata[ix].node = processor_node_node = ++node;
+ cpudata[ix].processor_node = ++processor_node;
+ }
+ }
+ }
+ if (!no_node && cpudata[ix].processor_node < 0)
+ cpudata[ix].processor_node = 0;
+ if (old_core == cpudata[ix].core)
+ cpudata[ix].core = core;
+ else {
+ old_core = cpudata[ix].core;
+ old_thread = thread = -1;
+ cpudata[ix].core = ++core;
+ }
+ if (old_thread == cpudata[ix].thread)
+ cpudata[ix].thread = thread;
+ else
+ old_thread = cpudata[ix].thread = ++thread;
+ }
+}
+
+static void
+cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq)
+{
+ if (size > 1) {
+ int no_node = 0;
+ int (*cmp_func)(const void *, const void *);
+ switch (bind_order) {
+ case ERTS_CPU_BIND_SPREAD:
+ cmp_func = cpu_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD:
+ cmp_func = cpu_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_SPREAD:
+ cmp_func = cpu_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_SPREAD:
+ cmp_func = cpu_no_spread_order_cmp;
+ break;
+ default:
+ cmp_func = NULL;
+ erl_exit(ERTS_ABORT_EXIT,
+ "Bad cpu bind type: %d\n",
+ (int) cpu_bind_order);
+ break;
+ }
+
+ if (mk_seq)
+ make_cpudata_id_seq(cpudata, size, no_node);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
+ }
+}
+
+static int
+processor_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
+{
+ int reset_read_group = 0;
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+ /* Unbind from cpu */
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0
+ && erts_unbind_from_cpu(cpuinfo) == 0) {
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ reset_read_group = 1;
+ }
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ if (reset_read_group)
+ erts_smp_rwmtx_set_reader_group(0);
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(0, 0);
+}
+
+void
+erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
+{
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+
+ /* Make sure we check if we should bind to a cpu or not... */
+ if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
+ else
+ esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+}
+
+#endif
+
+void
+erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int rg = 0;
+ int res;
+ int cpu_id;
+ erts_smp_runq_unlock(esdp->run_queue);
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ cpu_id = scheduler2cpu_map[esdp->no].bind_id;
+ if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
+ res = erts_bind_to_cpu(cpuinfo, cpu_id);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
+ else {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0)
+ goto unbind;
+ }
+ }
+ else if (cpu_id < 0) {
+ unbind:
+ /* Get rid of old binding */
+ res = erts_unbind_from_cpu(cpuinfo);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ else if (res != -ENOTSUP) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+ }
+ if (reader_groups) {
+ if (esdp->cpu_id >= 0)
+ rg = reader_group_lookup(esdp->cpu_id);
+ else
+ rg = (((int) esdp->no) - 1) % reader_groups + 1;
+ }
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_smp_runq_lock(esdp->run_queue);
+#ifdef ERTS_SMP
+ if (erts_common_run_queue)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
+ else {
+ esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ }
+#endif
+ if (reader_groups)
+ erts_smp_rwmtx_set_reader_group(rg);
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int no = (int) esdp->no;
+ if (no <= max_main_threads) {
+ erts_thr_set_main_status(1, (int) no);
+ if (reader_groups) {
+ int rg = (int) no;
+ if (rg > reader_groups)
+ rg = (((int) no) - 1) % reader_groups + 1;
+ erts_smp_rwmtx_set_reader_group(rg);
+ }
+ }
+}
+#endif
+
+static void
+write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
+{
+ int s_ix = 1;
+ int cpu_ix;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
+
+ cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
+
+ for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
+ if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
+ scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
+ }
+
+ if (s_ix <= erts_no_schedulers)
+ for (; s_ix <= erts_no_schedulers; s_ix++)
+ scheduler2cpu_map[s_ix].bind_id = -1;
+}
+
+int
+erts_init_scheduler_bind_type_string(char *how)
+{
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
+ return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
+
+ if (!system_cpudata && !user_cpudata)
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
+
+ if (sys_strcmp(how, "db") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (sys_strcmp(how, "s") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (sys_strcmp(how, "ps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "ts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (sys_strcmp(how, "tnnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (sys_strcmp(how, "ns") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (sys_strcmp(how, "u") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
+
+ return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
+}
+
+static Eterm
+bound_schedulers_term(ErtsCpuBindOrder order)
+{
+ switch (order) {
+ case ERTS_CPU_BIND_SPREAD: {
+ ERTS_DECL_AM(spread);
+ return AM_spread;
+ }
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(processor_spread);
+ return AM_processor_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_SPREAD: {
+ ERTS_DECL_AM(thread_spread);
+ return AM_thread_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(thread_no_node_processor_spread);
+ return AM_thread_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(no_node_processor_spread);
+ return AM_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
+ ERTS_DECL_AM(no_node_thread_spread);
+ return AM_no_node_thread_spread;
+ }
+ case ERTS_CPU_BIND_NO_SPREAD: {
+ ERTS_DECL_AM(no_spread);
+ return AM_no_spread;
+ }
+ case ERTS_CPU_BIND_NONE: {
+ ERTS_DECL_AM(unbound);
+ return AM_unbound;
+ }
+ default:
+ ASSERT(0);
+ return THE_NON_VALUE;
+ }
+}
+
+Eterm
+erts_bound_schedulers_term(Process *c_p)
+{
+ ErtsCpuBindOrder order;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ order = cpu_bind_order;
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return bound_schedulers_term(order);
+}
+
+Eterm
+erts_bind_schedulers(Process *c_p, Eterm how)
+{
+ int notify = 0;
+ Eterm res;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ ErtsCpuBindOrder old_cpu_bind_order;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
+ ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
+ }
+ else {
+
+ old_cpu_bind_order = cpu_bind_order;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+
+ if (!cpudata) {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ notify = 1;
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ res = bound_schedulers_term(old_cpu_bind_order);
+ }
+
+ done:
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ if (notify)
+ erts_sched_notify_check_cpu_bind();
+
+ return res;
+}
+
+int
+erts_sched_bind_atthrcreate_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ return esdp != NULL && erts_is_scheduler_bound(esdp);
+}
+
+int
+erts_sched_bind_atthrcreate_child(int unbind)
+{
+ int res = 0;
+ if (unbind) {
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = erts_unbind_from_cpu(cpuinfo);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ }
+ return res;
+}
+
+void
+erts_sched_bind_atthrcreate_parent(int unbind)
+{
+
+}
+
+int
+erts_sched_bind_atfork_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
+ if (unbind)
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ return unbind;
+}
+
+int
+erts_sched_bind_atfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_unbind_from_cpu(cpuinfo);
+ }
+ return 0;
+}
+
+char *
+erts_sched_bind_atvfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_get_unbind_from_cpu_str(cpuinfo);
+ }
+ return "false";
+}
+
+void
+erts_sched_bind_atfork_parent(int unbind)
+{
+ if (unbind)
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+Eterm
+erts_fake_scheduler_bindings(Process *p, Eterm how)
+{
+ ErtsCpuBindOrder fake_cpu_bind_order;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ Eterm res;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ ERTS_BIF_PREP_ERROR(res, p, BADARG);
+ return res;
+ }
+
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+ if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
+ ERTS_BIF_PREP_RET(res, am_false);
+ else {
+ int i;
+ Eterm *hp;
+
+ cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
+
+#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
+
+ erts_fprintf(stderr, "node: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].node);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "processor: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor);
+ erts_fprintf(stderr, "\n");
+ if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
+ erts_fprintf(stderr, "processor_node:");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
+ erts_fprintf(stderr, "\n");
+ }
+ erts_fprintf(stderr, "core: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].core);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "thread: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].thread);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "logical: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].logical);
+ erts_fprintf(stderr, "\n");
+#endif
+
+ hp = HAlloc(p, cpudata_size+1);
+ ERTS_BIF_PREP_RET(res, make_tuple(hp));
+ *hp++ = make_arityval((Uint) cpudata_size);
+ for (i = 0; i < cpudata_size; i++)
+ *hp++ = make_small((Uint) cpudata[i].logical);
+ }
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_schedulers_binds(Process *c_p)
+{
+ int ix;
+ ERTS_DECL_AM(unbound);
+ Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
+ Eterm res = make_tuple(hp);
+
+ *(hp++) = make_arityval(erts_no_schedulers);
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ for (ix = 1; ix <= erts_no_schedulers; ix++)
+ *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
+ ? make_small(scheduler2cpu_map[ix].bound_id)
+ : AM_unbound);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+/*
+ * CPU topology
+ */
+
+typedef struct {
+ int *id;
+ int used;
+ int size;
+} ErtsCpuTopIdSeq;
+
+typedef struct {
+ ErtsCpuTopIdSeq logical;
+ ErtsCpuTopIdSeq thread;
+ ErtsCpuTopIdSeq core;
+ ErtsCpuTopIdSeq processor_node;
+ ErtsCpuTopIdSeq processor;
+ ErtsCpuTopIdSeq node;
+} ErtsCpuTopEntry;
+
+static void
+init_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ int size = 10;
+ cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->logical.size = size;
+ cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->thread.size = size;
+ cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->core.size = size;
+ cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor_node.size = size;
+ cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor.size = size;
+ cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->node.size = size;
+}
+
+static void
+destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
+}
+
+static int
+get_cput_value_or_range(int *v, int *vr, char **str)
+{
+ long l;
+ char *c = *str;
+ errno = 0;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ *v = (int) l;
+ if (*c == '-') {
+ c++;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ *vr = (int) l;
+ }
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
+{
+ int ix = 0;
+ int need_size = 0;
+ char *c = *str;
+
+ while (1) {
+ int res;
+ int val;
+ int nids;
+ int val_range = -1;
+ res = get_cput_value_or_range(&val, &val_range, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ if (val_range < 0 || val_range == val)
+ nids = 1;
+ else {
+ if (val_range > val)
+ nids = val_range - val + 1;
+ else
+ nids = val - val_range + 1;
+ }
+ need_size += nids;
+ if (need_size > idseq->size) {
+ idseq->size = need_size + 10;
+ idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
+ idseq->id,
+ sizeof(int)*idseq->size);
+ }
+ if (nids == 1)
+ idseq->id[ix++] = val;
+ else if (val_range > val) {
+ for (; val <= val_range; val++)
+ idseq->id[ix++] = val;
+ }
+ else {
+ for (; val >= val_range; val--)
+ idseq->id[ix++] = val;
+ }
+ if (*c != ',')
+ break;
+ c++;
+ }
+ *str = c;
+ idseq->used = ix;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_entry(ErtsCpuTopEntry *cput, char **str)
+{
+ int h;
+ char *c = *str;
+
+ cput->logical.used = 0;
+ cput->thread.id[0] = 0;
+ cput->thread.used = 1;
+ cput->core.id[0] = 0;
+ cput->core.used = 1;
+ cput->processor_node.id[0] = -1;
+ cput->processor_node.used = 1;
+ cput->processor.id[0] = 0;
+ cput->processor.used = 1;
+ cput->node.id[0] = -1;
+ cput->node.used = 1;
+
+ h = ERTS_TOPOLOGY_MAX_DEPTH;
+ while (*c != ':' && *c != '\0') {
+ int res;
+ ErtsCpuTopIdSeq *idseqp;
+ switch (*c++) {
+ case 'L':
+ if (h <= ERTS_TOPOLOGY_LOGICAL)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->logical;
+ h = ERTS_TOPOLOGY_LOGICAL;
+ break;
+ case 't':
+ case 'T':
+ if (h <= ERTS_TOPOLOGY_THREAD)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->thread;
+ h = ERTS_TOPOLOGY_THREAD;
+ break;
+ case 'c':
+ case 'C':
+ if (h <= ERTS_TOPOLOGY_CORE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->core;
+ h = ERTS_TOPOLOGY_CORE;
+ break;
+ case 'p':
+ case 'P':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor;
+ h = ERTS_TOPOLOGY_PROCESSOR;
+ break;
+ case 'n':
+ case 'N':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR) {
+ do_node:
+ if (h <= ERTS_TOPOLOGY_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->node;
+ h = ERTS_TOPOLOGY_NODE;
+ }
+ else {
+ int p_node = 0;
+ char *p_chk = c;
+ while (*p_chk != '\0' && *p_chk != ':') {
+ if (*p_chk == 'p' || *p_chk == 'P') {
+ p_node = 1;
+ break;
+ }
+ p_chk++;
+ }
+ if (!p_node)
+ goto do_node;
+ if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor_node;
+ h = ERTS_TOPOLOGY_PROCESSOR_NODE;
+ }
+ break;
+ default:
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
+ }
+ res = get_cput_id_seq(idseqp, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ }
+
+ if (cput->logical.used < 1)
+ return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
+
+ if (*c == ':') {
+ c++;
+ }
+
+ if (cput->thread.used != 1
+ && cput->thread.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->core.used != 1
+ && cput->core.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor_node.used != 1
+ && cput->processor_node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor.used != 1
+ && cput->processor.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->node.used != 1
+ && cput->node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+verify_topology(erts_cpu_topology_t *cpudata, int size)
+{
+ if (size > 0) {
+ int *logical;
+ int node, processor, no_nodes, i;
+
+ /* Verify logical ids */
+ logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
+
+ for (i = 0; i < size; i++)
+ logical[i] = cpudata[i].logical;
+
+ qsort(logical, size, sizeof(int), int_cmp);
+ for (i = 0; i < size-1; i++) {
+ if (logical[i] == logical[i+1]) {
+ erts_free(ERTS_ALC_T_TMP, logical);
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
+ }
+ }
+
+ erts_free(ERTS_ALC_T_TMP, logical);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
+
+ /* Verify unique entities */
+
+ for (i = 1; i < size; i++) {
+ if (cpudata[i-1].processor == cpudata[i].processor
+ && cpudata[i-1].node == cpudata[i].node
+ && (cpudata[i-1].processor_node
+ == cpudata[i].processor_node)
+ && cpudata[i-1].core == cpudata[i].core
+ && cpudata[i-1].thread == cpudata[i].thread) {
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
+ }
+ }
+
+ /* Verify numa nodes */
+ node = cpudata[0].node;
+ processor = cpudata[0].processor;
+ no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
+ for (i = 1; i < size; i++) {
+ if (no_nodes) {
+ if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ else {
+ if (cpudata[i].processor == processor && cpudata[i].node != node)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ node = cpudata[i].node;
+ processor = cpudata[i].processor;
+ if (node >= 0 && cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ if (node < 0 && cpudata[i].processor_node < 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ }
+ }
+
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+int
+erts_init_cpu_topology_string(char *topology_str)
+{
+ ErtsCpuTopEntry cput;
+ int need_size;
+ char *c;
+ int ix;
+ int error = ERTS_INIT_CPU_TOPOLOGY_OK;
+
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 10;
+
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+
+ init_cpu_top_entry(&cput);
+
+ ix = 0;
+ need_size = 0;
+
+ c = topology_str;
+ if (*c == '\0') {
+ error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
+ goto fail;
+ }
+ do {
+ int r;
+ error = get_cput_entry(&cput, &c);
+ if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
+ goto fail;
+ need_size += cput.logical.used;
+ if (user_cpudata_size < need_size) {
+ user_cpudata_size = need_size + 10;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ ASSERT(cput.thread.used == 1
+ || cput.thread.used == cput.logical.used);
+ ASSERT(cput.core.used == 1
+ || cput.core.used == cput.logical.used);
+ ASSERT(cput.processor_node.used == 1
+ || cput.processor_node.used == cput.logical.used);
+ ASSERT(cput.processor.used == 1
+ || cput.processor.used == cput.logical.used);
+ ASSERT(cput.node.used == 1
+ || cput.node.used == cput.logical.used);
+
+ for (r = 0; r < cput.logical.used; r++) {
+ user_cpudata[ix].logical = cput.logical.id[r];
+ user_cpudata[ix].thread =
+ cput.thread.id[cput.thread.used == 1 ? 0 : r];
+ user_cpudata[ix].core =
+ cput.core.id[cput.core.used == 1 ? 0 : r];
+ user_cpudata[ix].processor_node =
+ cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
+ user_cpudata[ix].processor =
+ cput.processor.id[cput.processor.used == 1 ? 0 : r];
+ user_cpudata[ix].node =
+ cput.node.id[cput.node.used == 1 ? 0 : r];
+ ix++;
+ }
+ } while (*c != '\0');
+
+ if (user_cpudata_size != ix) {
+ user_cpudata_size = ix;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ error = verify_topology(user_cpudata, user_cpudata_size);
+ if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
+ destroy_cpu_top_entry(&cput);
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+ }
+
+ fail:
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 0;
+ destroy_cpu_top_entry(&cput);
+ return error;
+}
+
+#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
+#define ERTS_GET_USED_CPU_TOPOLOGY 0
+#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
+#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
+
+static Eterm get_cpu_topology_term(Process *c_p, int type);
+
+Eterm
+erts_set_cpu_topology(Process *c_p, Eterm term)
+{
+ erts_cpu_topology_t *cpudata = NULL;
+ int cpudata_size = 0;
+ Eterm res;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
+ if (term == am_undefined) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
+ cpudata_size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+ }
+ else if (is_not_list(term)) {
+ error:
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ res = THE_NON_VALUE;
+ goto done;
+ }
+ else {
+ Eterm list = term;
+ int ix = 0;
+
+ cpudata_size = 100;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ while (is_list(list)) {
+ Eterm *lp = list_val(list);
+ Eterm cpu = CAR(lp);
+ Eterm* tp;
+ Sint id;
+
+ if (is_not_tuple(cpu))
+ goto error;
+
+ tp = tuple_val(cpu);
+
+ if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
+ goto error;
+
+ if (ix >= cpudata_size) {
+ cpudata_size += 100;
+ cpudata = erts_realloc(ERTS_ALC_T_TMP,
+ cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+ }
+
+ id = signed_val(tp[2]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].node = (int) id;
+
+ id = signed_val(tp[3]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor = (int) id;
+
+ id = signed_val(tp[4]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor_node = (int) id;
+
+ id = signed_val(tp[5]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].core = (int) id;
+
+ id = signed_val(tp[6]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].thread = (int) id;
+
+ id = signed_val(tp[7]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].logical = (int) id;
+
+ list = CDR(lp);
+ ix++;
+ }
+
+ if (is_not_nil(list))
+ goto error;
+
+ cpudata_size = ix;
+
+ if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
+ goto error;
+
+ if (user_cpudata_size != cpudata_size) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ user_cpudata_size = cpudata_size;
+ }
+
+ sys_memcpy((void *) user_cpudata,
+ (void *) cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+
+ make_reader_groups_map(NULL);
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+
+ done:
+
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+static void
+create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
+{
+ if (user_cpudata) {
+ *cpudata_size = user_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) user_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else if (system_cpudata) {
+ *cpudata_size = system_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else {
+ *cpudata = NULL;
+ *cpudata_size = 0;
+ }
+}
+
+static void
+destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
+{
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+}
+
+
+static Eterm
+bld_topology_term(Eterm **hpp,
+ Uint *hszp,
+ erts_cpu_topology_t *cpudata,
+ int size)
+{
+ Eterm res = NIL;
+ int i;
+
+ if (size == 0)
+ return am_undefined;
+
+ for (i = size-1; i >= 0; i--) {
+ res = erts_bld_cons(hpp,
+ hszp,
+ erts_bld_tuple(hpp,
+ hszp,
+ 7,
+ am_cpu,
+ make_small(cpudata[i].node),
+ make_small(cpudata[i].processor),
+ make_small(cpudata[i].processor_node),
+ make_small(cpudata[i].core),
+ make_small(cpudata[i].thread),
+ make_small(cpudata[i].logical)),
+ res);
+ }
+ return res;
+}
+
+static Eterm
+get_cpu_topology_term(Process *c_p, int type)
+{
+#ifdef DEBUG
+ Eterm *hp_end;
+#endif
+ Eterm *hp;
+ Uint hsz;
+ Eterm res = THE_NON_VALUE;
+ erts_cpu_topology_t *cpudata = NULL;
+ int size = 0;
+
+ switch (type) {
+ case ERTS_GET_USED_CPU_TOPOLOGY:
+ if (user_cpudata)
+ goto defined;
+ else
+ goto detected;
+ case ERTS_GET_DETECTED_CPU_TOPOLOGY:
+ detected:
+ if (!system_cpudata)
+ res = am_undefined;
+ else {
+ size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * size));
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*size);
+ }
+ break;
+ case ERTS_GET_DEFINED_CPU_TOPOLOGY:
+ defined:
+ if (!user_cpudata)
+ res = am_undefined;
+ else {
+ size = user_cpudata_size;
+ cpudata = user_cpudata;
+ }
+ break;
+ default:
+ erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
+ break;
+ }
+
+ if (res == am_undefined) {
+ ASSERT(!cpudata);
+ return res;
+ }
+
+ hsz = 0;
+
+ bld_topology_term(NULL, &hsz,
+ cpudata, size);
+
+ hp = HAlloc(c_p, hsz);
+
+#ifdef DEBUG
+ hp_end = hp + hsz;
+#endif
+
+ res = bld_topology_term(&hp, NULL,
+ cpudata, size);
+
+ ASSERT(hp_end == hp);
+
+ if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_cpu_topology_term(Process *c_p, Eterm which)
+{
+ Eterm res;
+ int type;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ if (ERTS_IS_ATOM_STR("used", which))
+ type = ERTS_GET_USED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("detected", which))
+ type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("defined", which))
+ type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
+ else
+ type = ERTS_GET_CPU_TOPOLOGY_ERROR;
+ if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
+ res = THE_NON_VALUE;
+ else
+ res = get_cpu_topology_term(c_p, type);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+static void
+get_logical_processors(int *conf, int *onln, int *avail)
+{
+ if (conf)
+ *conf = erts_get_cpu_configured(cpuinfo);
+ if (onln)
+ *onln = erts_get_cpu_online(cpuinfo);
+ if (avail)
+ *avail = erts_get_cpu_available(cpuinfo);
+}
+
+void
+erts_get_logical_processors(int *conf, int *onln, int *avail)
+{
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ get_logical_processors(conf, onln, avail);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+void
+erts_pre_early_init_cpu_topology(int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p)
+{
+ *max_rg_p = ERTS_MAX_READER_GROUPS;
+ cpuinfo = erts_cpu_info_create();
+ get_logical_processors(conf_p, onln_p, avail_p);
+}
+
+void
+erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p)
+{
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
+
+ reader_groups_available_cpu_check = 1;
+ reader_groups_logical_processors = 0;
+ reader_groups_map_size = 0;
+ reader_groups_map = NULL;
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
+ system_cpudata_size)) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+
+ max_main_threads = erts_get_cpu_configured(cpuinfo);
+ if (max_main_threads > no_schedulers)
+ max_main_threads = no_schedulers;
+ *max_main_threads_p = max_main_threads;
+
+ reader_groups = max_main_threads;
+ if (reader_groups <= 1 || max_reader_groups <= 1)
+ reader_groups = 0;
+ if (reader_groups > max_reader_groups)
+ reader_groups = max_reader_groups;
+ *reader_groups_p = reader_groups;
+}
+
+void
+erts_init_cpu_topology(void)
+{
+ int ix;
+
+ erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
+
+ scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(ErtsCpuBindData)
+ * (erts_no_schedulers+1)));
+ for (ix = 1; ix <= erts_no_schedulers; ix++) {
+ scheduler2cpu_map[ix].bind_id = -1;
+ scheduler2cpu_map[ix].bound_id = -1;
+ }
+
+ if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
+ int ncpus = erts_get_cpu_configured(cpuinfo);
+ if (ncpus < 1 || erts_no_schedulers < ncpus)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ cpu_bind_order = ((system_cpudata || user_cpudata)
+ && (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP)
+ ? ERTS_CPU_BIND_DEFAULT_BIND
+ : ERTS_CPU_BIND_NONE);
+ }
+
+ make_reader_groups_map(NULL);
+
+ if (cpu_bind_order == ERTS_CPU_BIND_NONE)
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ else {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+}
+
+int
+erts_update_cpu_info(void)
+{
+ int changed;
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ changed = erts_cpu_info_update(cpuinfo);
+ if (changed) {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+
+ if (system_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ if (!system_cpudata_size)
+ system_cpudata = NULL;
+ else {
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || (ERTS_INIT_CPU_TOPOLOGY_OK
+ != verify_topology(system_cpudata,
+ system_cpudata_size))) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+ }
+
+ make_reader_groups_map(NULL);
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ if (changed)
+ erts_sched_notify_check_cpu_bind();
+ return changed;
+}
+
+/*
+ * reader groups map
+ */
+
+static Eterm
+get_reader_groups_map(Process *c_p,
+ erts_reader_groups_map_t *map,
+ int map_size,
+ int logical_processors)
+{
+#ifdef DEBUG
+ Eterm *endp;
+#endif
+ Eterm res = NIL, tuple;
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(c_p, logical_processors*(2+3));
+#ifdef DEBUG
+ endp = hp + logical_processors*(2+3);
+#endif
+ for (i = map_size - 1; i >= 0; i--) {
+ if (map[i].logical >= 0) {
+ tuple = TUPLE2(hp,
+ make_small(map[i].logical),
+ make_small(map[i].reader_group));
+ hp += 3;
+ res = CONS(hp, tuple, res);
+ hp += 2;
+ }
+ }
+ ASSERT(hp == endp);
+ return res;
+}
+
+Eterm
+erts_debug_reader_groups_map(Process *c_p, int groups)
+{
+ Eterm res;
+ erts_make_reader_groups_map_test test;
+
+ test.groups = groups;
+ make_reader_groups_map(&test);
+ if (!test.map)
+ res = NIL;
+ else {
+ res = get_reader_groups_map(c_p,
+ test.map,
+ test.map_size,
+ test.logical_processors);
+ erts_free(ERTS_ALC_T_TMP, test.map);
+ }
+ return res;
+}
+
+
+Eterm
+erts_get_reader_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = get_reader_groups_map(c_p,
+ reader_groups_map,
+ reader_groups_map_size,
+ reader_groups_logical_processors);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+static void
+make_available_cpu_topology(erts_avail_cput *no,
+ erts_avail_cput *avail,
+ erts_cpu_topology_t *cpudata,
+ int *size,
+ int test)
+{
+ int len = *size;
+ erts_cpu_topology_t last;
+ int a, i, j;
+
+ no->level[ERTS_TOPOLOGY_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_CORE] = -1;
+ no->level[ERTS_TOPOLOGY_THREAD] = -1;
+ no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
+
+ last.node = INT_MIN;
+ last.processor = INT_MIN;
+ last.processor_node = INT_MIN;
+ last.core = INT_MIN;
+ last.thread = INT_MIN;
+ last.logical = INT_MIN;
+
+ a = 0;
+
+ for (i = 0; i < len; i++) {
+
+ if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
+ continue;
+
+ if (last.node != cpudata[i].node)
+ goto node;
+ if (last.processor != cpudata[i].processor)
+ goto processor;
+ if (last.processor_node != cpudata[i].processor_node)
+ goto processor_node;
+ if (last.core != cpudata[i].core)
+ goto core;
+ ASSERT(last.thread != cpudata[i].thread);
+ goto thread;
+
+ node:
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ processor:
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ processor_node:
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ core:
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ thread:
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
+ avail[a].level[j] = no->level[j];
+
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
+ avail[a].level[ERTS_TOPOLOGY_RG] = 0;
+
+ ASSERT(last.logical != cpudata[i].logical);
+
+ last = cpudata[i];
+ a++;
+ }
+
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ *size = a;
+}
+
+static int
+reader_group_lookup(int logical)
+{
+ int start = logical % reader_groups_map_size;
+ int ix = start;
+
+ do {
+ if (reader_groups_map[ix].logical == logical) {
+ ASSERT(reader_groups_map[ix].reader_group > 0);
+ return reader_groups_map[ix].reader_group;
+ }
+ ix++;
+ if (ix == reader_groups_map_size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+reader_group_insert(erts_reader_groups_map_t *map, int map_size,
+ int logical, int reader_group)
+{
+ int start = logical % map_size;
+ int ix = start;
+
+ do {
+ if (map[ix].logical < 0) {
+ map[ix].logical = logical;
+ map[ix].reader_group = reader_group;
+ return;
+ }
+ ix++;
+ if (ix == map_size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
+}
+
+
+static int
+sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
+{
+ int sub_level = level+1;
+ int last = -1;
+ rgc->sub_levels = 0;
+
+ do {
+ if (last != avail[aix].level[sub_level]) {
+ rgc->sub_levels++;
+ last = avail[aix].level[sub_level];
+ }
+ aix++;
+ }
+ while (aix < avail_sz && rgc->id == avail[aix].level[level]);
+ rgc->reader_groups = 0;
+ return aix;
+}
+
+static int
+write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
+ int level, int a,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int rg = *rgp;
+ int sub_level = level+1;
+ int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
+ int xsl = rgcp->sub_levels % rgcp->reader_groups;
+ int sls = 0;
+ int last = -1;
+ int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
+
+ ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
+
+ do {
+ if (last != avail[a].level[sub_level]) {
+ if (!sls) {
+ sls = sl_per_gr;
+ rg++;
+ if (rg >= xsl_rg_lim)
+ sls++;
+ }
+ last = avail[a].level[sub_level];
+ sls--;
+ }
+ avail[a].level[ERTS_TOPOLOGY_RG] = rg;
+ a++;
+ } while (a < avail_sz && (level < 0
+ || avail[a].level[level] == rgcp->id));
+
+ ASSERT(rgcp->reader_groups == rg - *rgp);
+
+ *rgp = rg;
+
+ return a;
+}
+
+static int
+rg_count_sub_levels_compare(const void *vx, const void *vy)
+{
+ erts_rg_count_t *x = (erts_rg_count_t *) vx;
+ erts_rg_count_t *y = (erts_rg_count_t *) vy;
+ if (x->sub_levels != y->sub_levels)
+ return y->sub_levels - x->sub_levels;
+ return x->id - y->id;
+}
+
+static int
+rg_count_id_compare(const void *vx, const void *vy)
+{
+ erts_rg_count_t *x = (erts_rg_count_t *) vx;
+ erts_rg_count_t *y = (erts_rg_count_t *) vy;
+ return x->id - y->id;
+}
+
+static void
+make_reader_groups_map(erts_make_reader_groups_map_test *test)
+{
+ int i, spread_level, avail_sz;
+ erts_avail_cput no, *avail;
+ erts_cpu_topology_t *cpudata;
+ erts_reader_groups_map_t *map;
+ int map_sz;
+ int groups = reader_groups;
+
+ if (test) {
+ test->map = NULL;
+ test->map_size = 0;
+ groups = test->groups;
+ }
+
+ if (!groups)
+ return;
+
+ if (!test) {
+ if (reader_groups_map)
+ erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
+
+ reader_groups_logical_processors = 0;
+ reader_groups_map_size = 0;
+ reader_groups_map = NULL;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
+
+ if (!cpudata)
+ return;
+
+ cpu_bind_order_sort(cpudata,
+ avail_sz,
+ ERTS_CPU_BIND_NO_SPREAD,
+ 1);
+
+ avail = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(erts_avail_cput)*avail_sz);
+
+ make_available_cpu_topology(&no, avail, cpudata,
+ &avail_sz, test != NULL);
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ map_sz = avail_sz*2+1;
+
+ if (test) {
+ map = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_reader_groups_map_t)
+ * map_sz));
+ test->map = map;
+ test->map_size = map_sz;
+ test->logical_processors = avail_sz;
+ }
+ else {
+ map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
+ (sizeof(erts_reader_groups_map_t)
+ * map_sz));
+ reader_groups_map = map;
+ reader_groups_logical_processors = avail_sz;
+ reader_groups_map_size = map_sz;
+
+ }
+
+ for (i = 0; i < map_sz; i++) {
+ map[i].logical = -1;
+ map[i].reader_group = 0;
+ }
+
+ spread_level = ERTS_TOPOLOGY_CORE;
+ for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
+ if (no.level[i] > groups) {
+ spread_level = i;
+ break;
+ }
+ }
+
+ if (no.level[spread_level] <= groups) {
+ int a, rg, last = -1;
+ rg = 0;
+ ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
+ for (a = 0; a < avail_sz; a++) {
+ if (last != avail[a].level[spread_level]) {
+ rg++;
+ last = avail[a].level[spread_level];
+ }
+ reader_group_insert(map,
+ map_sz,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ rg);
+ }
+ }
+ else { /* groups < no.level[spread_level] */
+ erts_rg_count_t *rg_count;
+ int a, rg, tl, toplevels;
+
+ tl = spread_level-1;
+
+ if (spread_level == ERTS_TOPOLOGY_NODE)
+ toplevels = 1;
+ else
+ toplevels = no.level[tl];
+
+ rg_count = erts_alloc(ERTS_ALC_T_TMP,
+ toplevels*sizeof(erts_rg_count_t));
+
+ if (toplevels == 1) {
+ rg_count[0].id = 0;
+ rg_count[0].sub_levels = no.level[spread_level];
+ rg_count[0].reader_groups = groups;
+ }
+ else {
+ int rgs_per_tl, rgs;
+ rgs = groups;
+ rgs_per_tl = rgs / toplevels;
+
+ a = 0;
+ for (i = 0; i < toplevels; i++) {
+ rg_count[i].id = avail[a].level[tl];
+ a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
+ }
+
+ qsort(rg_count,
+ toplevels,
+ sizeof(erts_rg_count_t),
+ rg_count_sub_levels_compare);
+
+ for (i = 0; i < toplevels; i++) {
+ if (rg_count[i].sub_levels < rgs_per_tl) {
+ rg_count[i].reader_groups = rg_count[i].sub_levels;
+ rgs -= rg_count[i].sub_levels;
+ }
+ else {
+ rg_count[i].reader_groups = rgs_per_tl;
+ rgs -= rgs_per_tl;
+ }
+ }
+
+ while (rgs > 0) {
+ for (i = 0; i < toplevels; i++) {
+ if (rg_count[i].sub_levels == rg_count[i].reader_groups)
+ break;
+ else {
+ rg_count[i].reader_groups++;
+ if (--rgs == 0)
+ break;
+ }
+ }
+ }
+
+ qsort(rg_count,
+ toplevels,
+ sizeof(erts_rg_count_t),
+ rg_count_id_compare);
+ }
+
+ a = i = rg = 0;
+ while (a < avail_sz) {
+ a = write_reader_groups(&rg, &rg_count[i], tl,
+ a, avail_sz, avail);
+ i++;
+ }
+
+ ASSERT(groups == rg);
+
+ for (a = 0; a < avail_sz; a++)
+ reader_group_insert(map,
+ map_sz,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ avail[a].level[ERTS_TOPOLOGY_RG]);
+
+ erts_free(ERTS_ALC_T_TMP, rg_count);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, avail);
+}
diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h
new file mode 100644
index 0000000000..b83ddc25da
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.h
@@ -0,0 +1,94 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: CPU topology and related functionality
+ *
+ * Author: Rickard Green
+ */
+
+#ifndef ERL_CPU_TOPOLOGY_H__
+#define ERL_CPU_TOPOLOGY_H__
+
+void erts_pre_early_init_cpu_topology(int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p);
+void erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p);
+void erts_init_cpu_topology(void);
+
+
+#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0
+#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3
+
+int erts_init_scheduler_bind_type_string(char *how);
+
+
+#define ERTS_INIT_CPU_TOPOLOGY_OK 0
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9
+
+int erts_init_cpu_topology_string(char *topology_str);
+
+void erts_sched_check_cpu_bind(ErtsSchedulerData *esdp);
+#ifdef ERTS_SMP
+void erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp);
+#endif
+
+int erts_update_cpu_info(void);
+
+Eterm erts_bind_schedulers(Process *c_p, Eterm how);
+Eterm erts_get_schedulers_binds(Process *c_p);
+
+Eterm erts_get_reader_groups_map(Process *c_p);
+
+Eterm erts_set_cpu_topology(Process *c_p, Eterm term);
+Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
+
+int erts_update_cpu_info(void);
+void erts_get_logical_processors(int *conf, int *onln, int *avail);
+
+int erts_sched_bind_atthrcreate_prepare(void);
+int erts_sched_bind_atthrcreate_child(int unbind);
+void erts_sched_bind_atthrcreate_parent(int unbind);
+
+int erts_sched_bind_atfork_prepare(void);
+int erts_sched_bind_atfork_child(int unbind);
+char *erts_sched_bind_atvfork_child(int unbind);
+void erts_sched_bind_atfork_parent(int unbind);
+
+Eterm erts_fake_scheduler_bindings(Process *p, Eterm how);
+Eterm erts_debug_cpu_groups_map(Process *c_p, int groups);
+
+
+#endif
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index e2f2cccb7e..e21728333b 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -41,6 +41,7 @@
#include "erl_printf_term.h"
#include "erl_misc_utils.h"
#include "packet_parser.h"
+#include "erl_cpu_topology.h"
#ifdef HIPE
#include "hipe_mode_switch.h" /* for hipe_mode_switch_init() */
@@ -63,6 +64,8 @@ extern void ConNormalExit(void);
extern void ConWaitForExit(void);
#endif
+static void erl_init(int ncpu);
+
#define ERTS_MIN_COMPAT_REL 7
#ifdef ERTS_SMP
@@ -76,9 +79,6 @@ int erts_initialized = 0;
static erts_tid_t main_thread;
#endif
-erts_cpu_info_t *erts_cpuinfo;
-
-int erts_reader_groups;
int erts_use_sender_punish;
/*
@@ -111,7 +111,6 @@ int erts_compat_rel;
static int use_multi_run_queue;
static int no_schedulers;
static int no_schedulers_online;
-static int max_reader_groups;
#ifdef DEBUG
Uint32 verbose; /* See erl_debug.h for information about verbose */
@@ -230,18 +229,18 @@ void erl_error(char *fmt, va_list args)
erts_vfprintf(stderr, fmt, args);
}
-static void early_init(int *argc, char **argv);
+static int early_init(int *argc, char **argv);
void
erts_short_init(void)
{
- early_init(NULL, NULL);
- erl_init();
+ int ncpu = early_init(NULL, NULL);
+ erl_init(ncpu);
erts_initialized = 1;
}
-void
-erl_init(void)
+static void
+erl_init(int ncpu)
{
init_benchmarking();
@@ -252,11 +251,11 @@ erl_init(void)
erts_init_monitors();
erts_init_gc();
init_time();
- erts_init_process();
+ erts_init_process(ncpu);
erts_init_scheduling(use_multi_run_queue,
no_schedulers,
no_schedulers_online);
-
+ erts_init_cpu_topology(); /* Must be after init_scheduling */
H_MIN_SIZE = erts_next_heap_size(H_MIN_SIZE, 0);
BIN_VH_MIN_SIZE = erts_next_heap_size(BIN_VH_MIN_SIZE, 0);
@@ -587,7 +586,7 @@ static void ethr_ll_free(void *ptr)
#endif
-static void
+static int
early_init(int *argc, char **argv) /*
* Only put things here which are
* really important initialize
@@ -600,6 +599,10 @@ early_init(int *argc, char **argv) /*
int ncpuavail;
int schdlrs;
int schdlrs_onln;
+ int max_main_threads;
+ int max_reader_groups;
+ int reader_groups;
+
use_multi_run_queue = 1;
erts_printf_eterm_func = erts_printf_term;
erts_disable_tolerant_timeofday = 0;
@@ -615,13 +618,11 @@ early_init(int *argc, char **argv) /*
erts_use_sender_punish = 1;
- erts_cpuinfo = erts_cpu_info_create();
-
-#ifdef ERTS_SMP
- ncpu = erts_get_cpu_configured(erts_cpuinfo);
- ncpuonln = erts_get_cpu_online(erts_cpuinfo);
- ncpuavail = erts_get_cpu_available(erts_cpuinfo);
-#else
+ erts_pre_early_init_cpu_topology(&max_reader_groups,
+ &ncpu,
+ &ncpuonln,
+ &ncpuavail);
+#ifndef ERTS_SMP
ncpu = 1;
ncpuonln = 1;
ncpuavail = 1;
@@ -664,15 +665,9 @@ early_init(int *argc, char **argv) /*
? ncpuavail
: (ncpuonln > 0 ? ncpuonln : no_schedulers));
-#ifdef ERTS_SMP
- erts_max_main_threads = no_schedulers_online;
-#endif
-
schdlrs = no_schedulers;
schdlrs_onln = no_schedulers_online;
- max_reader_groups = ERTS_MAX_READER_GROUPS;
-
if (argc && argv) {
int i = 1;
while (i < *argc) {
@@ -768,9 +763,13 @@ early_init(int *argc, char **argv) /*
erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes)
-M flags. */
-
- erts_early_init_scheduling(); /* Require allocators */
- erts_init_utils(); /* Require allocators */
+ /* Require allocators */
+ erts_early_init_scheduling();
+ erts_init_utils();
+ erts_early_init_cpu_topology(no_schedulers,
+ &max_main_threads,
+ max_reader_groups,
+ &reader_groups);
#ifdef USE_THREADS
{
@@ -784,26 +783,13 @@ early_init(int *argc, char **argv) /*
elid.mem.ll.alloc = ethr_ll_alloc;
elid.mem.ll.realloc = ethr_ll_realloc;
elid.mem.ll.free = ethr_ll_free;
-
-#ifdef ERTS_SMP
- if (erts_max_main_threads > no_schedulers)
- erts_max_main_threads = no_schedulers;
- elid.main_threads = erts_max_main_threads;
-#else
- elid.main_threads = 1;
-#endif
- elid.reader_groups = (elid.main_threads > 1
- ? elid.main_threads
- : 0);
- if (max_reader_groups <= 1)
- elid.reader_groups = 0;
- if (elid.reader_groups > max_reader_groups)
- elid.reader_groups = max_reader_groups;
- erts_reader_groups = elid.reader_groups;
+ elid.main_threads = max_main_threads;
+ elid.reader_groups = reader_groups;
erts_thr_late_init(&elid);
}
#endif
+
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_late_init();
#endif
@@ -821,6 +807,7 @@ early_init(int *argc, char **argv) /*
erts_ets_realloc_always_moves = 0;
+ return ncpu;
}
#ifndef ERTS_SMP
@@ -854,8 +841,7 @@ erl_start(int argc, char **argv)
char envbuf[21]; /* enough for any 64-bit integer */
size_t envbufsz;
int async_max_threads = erts_async_max_threads;
-
- early_init(&argc, argv);
+ int ncpu = early_init(&argc, argv);
envbufsz = sizeof(envbuf);
if (erts_sys_getenv(ERL_MAX_ETS_TABLES_ENV, envbuf, &envbufsz) == 0)
@@ -1112,7 +1098,7 @@ erl_start(int argc, char **argv)
char *sub_param = argv[i]+2;
if (has_prefix("bt", sub_param)) {
arg = get_arg(sub_param+2, argv[i+1], &i);
- res = erts_init_scheduler_bind_type(arg);
+ res = erts_init_scheduler_bind_type_string(arg);
if (res != ERTS_INIT_SCHED_BIND_TYPE_SUCCESS) {
switch (res) {
case ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED:
@@ -1137,7 +1123,7 @@ erl_start(int argc, char **argv)
}
else if (has_prefix("ct", sub_param)) {
arg = get_arg(sub_param+2, argv[i+1], &i);
- res = erts_init_cpu_topology(arg);
+ res = erts_init_cpu_topology_string(arg);
if (res != ERTS_INIT_CPU_TOPOLOGY_OK) {
switch (res) {
case ERTS_INIT_CPU_TOPOLOGY_INVALID_ID:
@@ -1388,7 +1374,7 @@ erl_start(int argc, char **argv)
boot_argc = argc - i; /* Number of arguments to init */
boot_argv = &argv[i];
- erl_init();
+ erl_init(ncpu);
init_shared_memory(boot_argc, boot_argv);
load_preloaded();
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index d6138fa4e4..04c7dbd2ec 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -128,8 +128,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "removed_fd_pre_alloc_lock", NULL },
{ "state_prealloc", NULL },
{ "schdlr_sspnd", NULL },
- { "cpu_bind", NULL },
{ "run_queue", "address" },
+ { "cpu_info", NULL },
{ "pollset", "address" },
#ifdef __WIN32__
{ "pollwaiter", "address" },
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 901167a315..4940344108 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -24,7 +24,6 @@
#endif
#include <stddef.h> /* offsetof() */
-#include <ctype.h>
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
@@ -39,6 +38,7 @@
#include "erl_threads.h"
#include "erl_binary.h"
#include "beam_bp.h"
+#include "erl_cpu_topology.h"
#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -63,8 +63,6 @@
#define ERTS_WAKEUP_OTHER_DEC 10
#define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10)
-#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
-
#if 0 || defined(DEBUG)
#define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
#endif
@@ -119,10 +117,6 @@ Uint erts_process_tab_index_mask;
static int wakeup_other_limit;
-#ifdef ERTS_SMP
-Uint erts_max_main_threads;
-#endif
-
int erts_sched_thread_suggested_stack_size = -1;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -195,48 +189,6 @@ do { \
#endif
-/*
- * Cpu topology hierarchy.
- */
-#define ERTS_TOPOLOGY_NODE 0
-#define ERTS_TOPOLOGY_PROCESSOR 1
-#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
-#define ERTS_TOPOLOGY_CORE 3
-#define ERTS_TOPOLOGY_THREAD 4
-#define ERTS_TOPOLOGY_LOGICAL 5
-
-#define ERTS_TOPOLOGY_MAX_DEPTH 6
-
-typedef struct {
- int bind_id;
- int bound_id;
-} ErtsCpuBindData;
-
-static ErtsCpuBindData *scheduler2cpu_map;
-erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
-
-typedef enum {
- ERTS_CPU_BIND_UNDEFINED,
- ERTS_CPU_BIND_SPREAD,
- ERTS_CPU_BIND_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_THREAD_SPREAD,
- ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
- ERTS_CPU_BIND_NO_SPREAD,
- ERTS_CPU_BIND_NONE
-} ErtsCpuBindOrder;
-
-#define ERTS_CPU_BIND_DEFAULT_BIND \
- ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
-
-ErtsCpuBindOrder cpu_bind_order;
-
-static erts_cpu_topology_t *user_cpudata;
-static int user_cpudata_size;
-static erts_cpu_topology_t *system_cpudata;
-static int system_cpudata_size;
-
erts_sched_stat_t erts_sched_stat;
ErtsRunQueue *erts_common_run_queue;
@@ -259,11 +211,6 @@ ErtsSchedulerData *erts_scheduler_data;
ErtsAlignedRunQueue *erts_aligned_run_queues;
Uint erts_no_run_queues;
-typedef union {
- ErtsSchedulerData esd;
- char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
-} ErtsAlignedSchedulerData;
-
ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
#ifdef ERTS_SMP
@@ -334,12 +281,6 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
200,
ERTS_ALC_T_PROC_LIST)
-#define ERTS_RUNQ_IX(IX) \
- (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \
- &erts_aligned_run_queues[(IX)].runq)
-#define ERTS_SCHEDULER_IX(IX) \
- (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
- &erts_aligned_scheduler_data[(IX)].esd)
#define ERTS_SCHED_SLEEP_INFO_IX(IX) \
(ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
&aligned_sched_sleep_info[(IX)].ssi)
@@ -398,22 +339,8 @@ static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
#ifdef ERTS_SMP
static void handle_pending_exiters(ErtsProcList *);
-static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq);
-static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
-
#endif
-static int reader_group_lookup(int logical);
-static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
- int *cpudata_size);
-static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
-
-static void early_cpu_bind_init(void);
-static void late_cpu_bind_init(void);
-
#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK)
int
erts_smp_lc_runq_is_locked(ErtsRunQueue *runq)
@@ -469,13 +396,13 @@ erts_pre_init_process(void)
/* initialize the scheduler */
void
-erts_init_process(void)
+erts_init_process(int ncpu)
{
Uint proc_bits = ERTS_PROC_BITS;
#ifdef ERTS_SMP
erts_disable_proc_not_running_opt = 0;
- erts_init_proc_lock();
+ erts_init_proc_lock(ncpu);
#endif
init_proclist_alloc();
@@ -1242,7 +1169,7 @@ wake_scheduler(ErtsRunQueue *rq, int incq, int one)
do {
ErtsSchedulerSleepInfo *wake_ssi = ssi;
ssi = ssi->next;
- erts_sched_finish_poke(ssi, ssi_flags_set_wake(wake_ssi));
+ erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
} while (ssi);
}
}
@@ -1335,6 +1262,31 @@ erts_smp_notify_inc_runq(ErtsRunQueue *runq)
smp_notify_inc_runq(runq);
}
+void
+erts_sched_notify_check_cpu_bind(void)
+{
+#ifdef ERTS_SMP
+ int ix;
+ if (erts_common_run_queue) {
+ for (ix = 0; ix < erts_no_schedulers; ix++)
+ erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1);
+ wake_all_schedulers();
+ }
+ else {
+ for (ix = 0; ix < erts_no_run_queues; ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+ erts_smp_runq_lock(rq);
+ rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
+ };
+ }
+#else
+ erts_sched_check_cpu_bind(erts_get_scheduler_data());
+#endif
+}
+
+
#ifdef ERTS_SMP
ErtsRunQueue *
@@ -2379,7 +2331,6 @@ erts_debug_nbalance(void)
void
erts_early_init_scheduling(void)
{
- early_cpu_bind_init();
wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
}
@@ -2656,8 +2607,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
/* init port tasks */
erts_port_task_init();
-
- late_cpu_bind_init();
}
ErtsRunQueue *
@@ -2883,12 +2832,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
long flgs;
int changing;
long no = (long) esdp->no;
- ErtsRunQueue *rq = esdp->run_queue;
ErtsSchedulerSleepInfo *ssi = esdp->ssi;
long active_schedulers;
int curr_online = 1;
int wake = 0;
- int reset_read_group = 0;
#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
|| defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
long aux_work;
@@ -2907,22 +2854,9 @@ suspend_scheduler(ErtsSchedulerData *esdp)
ASSERT(no != 1);
- erts_smp_runq_unlock(esdp->run_queue);
-
- /* Unbind from cpu */
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0
- && erts_unbind_from_cpu(erts_cpuinfo) == 0) {
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- reset_read_group = 1;
- }
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (reset_read_group)
- erts_smp_rwmtx_set_reader_group(0);
+ erts_sched_check_cpu_bind_prep_suspend(esdp);
- if (esdp->no <= erts_max_main_threads)
- erts_thr_set_main_status(0, 0);
+ erts_smp_runq_unlock(esdp->run_queue);
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_inactive);
@@ -3056,17 +2990,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_active);
- if (esdp->no <= erts_max_main_threads)
- erts_thr_set_main_status(1, (int) esdp->no);
-
erts_smp_runq_lock(esdp->run_queue);
non_empty_runq(esdp->run_queue);
- /* Make sure we check if we should bind to a cpu or not... */
- if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
- else
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_sched_check_cpu_bind_post_suspend(esdp);
}
#define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \
@@ -3583,15 +3510,7 @@ sched_thread_func(void *vesdp)
erts_tsd_set(sched_data_key, vesdp);
#ifdef ERTS_SMP
- if (no <= erts_max_main_threads) {
- erts_thr_set_main_status(1, (int) no);
- if (erts_reader_groups) {
- int rg = (int) no;
- if (rg > erts_reader_groups)
- rg = (((int) no) - 1) % erts_reader_groups + 1;
- erts_smp_rwmtx_set_reader_group(rg);
- }
- }
+ erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp);
erts_proc_lock_prepare_proc_lock_waiter();
ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
@@ -3693,1907 +3612,6 @@ erts_start_schedulers(void)
#endif /* ERTS_SMP */
-static int
-int_cmp(const void *vx, const void *vy)
-{
- return *((int *) vx) - *((int *) vy);
-}
-
-static int
-cpu_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- return 0;
-}
-
-static int
-cpu_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_no_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static ERTS_INLINE void
-make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
-{
- int ix;
- int node = -1;
- int processor = -1;
- int processor_node = -1;
- int processor_node_node = -1;
- int core = -1;
- int thread = -1;
- int old_node = -1;
- int old_processor = -1;
- int old_processor_node = -1;
- int old_core = -1;
- int old_thread = -1;
-
- for (ix = 0; ix < size; ix++) {
- if (!no_node || cpudata[ix].node >= 0) {
- if (old_node == cpudata[ix].node)
- cpudata[ix].node = node;
- else {
- old_node = cpudata[ix].node;
- old_processor = processor = -1;
- if (!no_node)
- old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node || cpudata[ix].node >= 0)
- cpudata[ix].node = ++node;
- }
- }
- if (old_processor == cpudata[ix].processor)
- cpudata[ix].processor = processor;
- else {
- old_processor = cpudata[ix].processor;
- if (!no_node)
- processor_node_node = old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- cpudata[ix].processor = ++processor;
- }
- if (no_node && cpudata[ix].processor_node < 0)
- old_processor_node = -1;
- else {
- if (old_processor_node == cpudata[ix].processor_node) {
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = node;
- else {
- if (processor_node_node >= 0)
- cpudata[ix].node = processor_node_node;
- cpudata[ix].processor_node = processor_node;
- }
- }
- else {
- old_processor_node = cpudata[ix].processor_node;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = ++node;
- else {
- cpudata[ix].node = processor_node_node = ++node;
- cpudata[ix].processor_node = ++processor_node;
- }
- }
- }
- if (!no_node && cpudata[ix].processor_node < 0)
- cpudata[ix].processor_node = 0;
- if (old_core == cpudata[ix].core)
- cpudata[ix].core = core;
- else {
- old_core = cpudata[ix].core;
- old_thread = thread = -1;
- cpudata[ix].core = ++core;
- }
- if (old_thread == cpudata[ix].thread)
- cpudata[ix].thread = thread;
- else
- old_thread = cpudata[ix].thread = ++thread;
- }
-}
-
-static void
-cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq)
-{
- if (size > 1) {
- int no_node = 0;
- int (*cmp_func)(const void *, const void *);
- switch (bind_order) {
- case ERTS_CPU_BIND_SPREAD:
- cmp_func = cpu_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_PROCESSOR_SPREAD:
- cmp_func = cpu_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_SPREAD:
- cmp_func = cpu_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_SPREAD:
- cmp_func = cpu_no_spread_order_cmp;
- break;
- default:
- cmp_func = NULL;
- erl_exit(ERTS_ABORT_EXIT,
- "Bad cpu bind type: %d\n",
- (int) cpu_bind_order);
- break;
- }
-
- if (mk_seq)
- make_cpudata_id_seq(cpudata, size, no_node);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
- }
-}
-
-static int
-processor_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static void
-check_cpu_bind(ErtsSchedulerData *esdp)
-{
- int rg = 0;
- int res;
- int cpu_id;
- erts_smp_runq_unlock(esdp->run_queue);
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- cpu_id = scheduler2cpu_map[esdp->no].bind_id;
- if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
- res = erts_bind_to_cpu(erts_cpuinfo, cpu_id);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
- else {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0)
- goto unbind;
- }
- }
- else if (cpu_id < 0) {
- unbind:
- /* Get rid of old binding */
- res = erts_unbind_from_cpu(erts_cpuinfo);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- else if (res != -ENOTSUP) {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- }
- }
- if (erts_reader_groups) {
- if (esdp->cpu_id >= 0)
- rg = reader_group_lookup(esdp->cpu_id);
- else
- rg = (((int) esdp->no) - 1) % erts_reader_groups + 1;
- }
- erts_smp_runq_lock(esdp->run_queue);
-#ifdef ERTS_SMP
- if (erts_common_run_queue)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
- else {
- esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
- }
-#endif
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (erts_reader_groups)
- erts_smp_rwmtx_set_reader_group(rg);
-}
-
-static void
-signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
-{
- int s_ix = 1;
- int cpu_ix;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
-
- cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
-
- for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
- if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical))
- scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
- }
-
- if (s_ix <= erts_no_schedulers)
- for (; s_ix <= erts_no_schedulers; s_ix++)
- scheduler2cpu_map[s_ix].bind_id = -1;
-
-#ifdef ERTS_SMP
- if (erts_common_run_queue) {
- for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1);
- wake_all_schedulers();
- }
- else {
- for (s_ix = 0; s_ix < erts_no_run_queues; s_ix++) {
- ErtsRunQueue *rq = ERTS_RUNQ_IX(s_ix);
- erts_smp_runq_lock(rq);
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
- erts_smp_runq_unlock(rq);
- wake_scheduler(rq, 0, 1);
- };
- }
-#else
- check_cpu_bind(erts_get_scheduler_data());
-#endif
-}
-
-int
-erts_init_scheduler_bind_type(char *how)
-{
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP)
- return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
-
- if (!system_cpudata && !user_cpudata)
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
-
- if (sys_strcmp(how, "db") == 0)
- cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (sys_strcmp(how, "s") == 0)
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (sys_strcmp(how, "ps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "ts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (sys_strcmp(how, "tnnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (sys_strcmp(how, "ns") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (sys_strcmp(how, "u") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
-
- return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
-}
-
-/*
- * reader groups map
- */
-
-typedef struct {
- int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
-} erts_avail_cput;
-
-typedef struct {
- int *map;
- int size;
- int groups;
-} erts_reader_groups_map_test;
-
-typedef struct {
- int id;
- int sub_levels;
- int reader_groups;
-} erts_rg_count_t;
-
-typedef struct {
- int logical;
- int reader_group;
-} erts_reader_groups_map_t;
-
-typedef struct {
- erts_reader_groups_map_t *map;
- int map_size;
- int logical_processors;
- int groups;
-} erts_make_reader_groups_map_test;
-
-static int reader_groups_available_cpu_check;
-static int reader_groups_logical_processors;
-static int reader_groups_map_size;
-static erts_reader_groups_map_t *reader_groups_map;
-
-#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test);
-
-static Eterm
-get_reader_groups_map(Process *c_p,
- erts_reader_groups_map_t *map,
- int map_size,
- int logical_processors)
-{
-#ifdef DEBUG
- Eterm *endp;
-#endif
- Eterm res = NIL, tuple;
- Eterm *hp;
- int i;
-
- hp = HAlloc(c_p, logical_processors*(2+3));
-#ifdef DEBUG
- endp = hp + logical_processors*(2+3);
-#endif
- for (i = map_size - 1; i >= 0; i--) {
- if (map[i].logical >= 0) {
- tuple = TUPLE2(hp,
- make_small(map[i].logical),
- make_small(map[i].reader_group));
- hp += 3;
- res = CONS(hp, tuple, res);
- hp += 2;
- }
- }
- ASSERT(hp == endp);
- return res;
-}
-
-Eterm
-erts_debug_reader_groups_map(Process *c_p, int groups)
-{
- Eterm res;
- erts_make_reader_groups_map_test test;
-
- test.groups = groups;
- make_reader_groups_map(&test);
- if (!test.map)
- res = NIL;
- else {
- res = get_reader_groups_map(c_p,
- test.map,
- test.map_size,
- test.logical_processors);
- erts_free(ERTS_ALC_T_TMP, test.map);
- }
- return res;
-}
-
-
-Eterm
-erts_get_reader_groups_map(Process *c_p)
-{
- Eterm res;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- res = get_reader_groups_map(c_p,
- reader_groups_map,
- reader_groups_map_size,
- reader_groups_logical_processors);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static void
-make_available_cpu_topology(erts_avail_cput *no,
- erts_avail_cput *avail,
- erts_cpu_topology_t *cpudata,
- int *size,
- int test)
-{
- int len = *size;
- erts_cpu_topology_t last;
- int a, i, j;
-
- no->level[ERTS_TOPOLOGY_NODE] = -1;
- no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
- no->level[ERTS_TOPOLOGY_CORE] = -1;
- no->level[ERTS_TOPOLOGY_THREAD] = -1;
- no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
-
- last.node = INT_MIN;
- last.processor = INT_MIN;
- last.processor_node = INT_MIN;
- last.core = INT_MIN;
- last.thread = INT_MIN;
- last.logical = INT_MIN;
-
- a = 0;
-
- for (i = 0; i < len; i++) {
-
- if (!test && !erts_is_cpu_available(erts_cpuinfo, cpudata[i].logical))
- continue;
-
- if (last.node != cpudata[i].node)
- goto node;
- if (last.processor != cpudata[i].processor)
- goto processor;
- if (last.processor_node != cpudata[i].processor_node)
- goto processor_node;
- if (last.core != cpudata[i].core)
- goto core;
- ASSERT(last.thread != cpudata[i].thread);
- goto thread;
-
- node:
- no->level[ERTS_TOPOLOGY_NODE]++;
- processor:
- no->level[ERTS_TOPOLOGY_PROCESSOR]++;
- processor_node:
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
- core:
- no->level[ERTS_TOPOLOGY_CORE]++;
- thread:
- no->level[ERTS_TOPOLOGY_THREAD]++;
-
- no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
- for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
- avail[a].level[j] = no->level[j];
-
- avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
- avail[a].level[ERTS_TOPOLOGY_RG] = 0;
-
- ASSERT(last.logical != cpudata[a].logical);
-
- last = cpudata[i];
- a++;
- }
-
- no->level[ERTS_TOPOLOGY_NODE]++;
- no->level[ERTS_TOPOLOGY_PROCESSOR]++;
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
- no->level[ERTS_TOPOLOGY_CORE]++;
- no->level[ERTS_TOPOLOGY_THREAD]++;
- no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
- *size = a;
-}
-
-static int
-reader_group_lookup(int logical)
-{
- int start = logical % reader_groups_map_size;
- int ix = start;
-
- do {
- if (reader_groups_map[ix].logical == logical) {
- ASSERT(reader_groups_map[ix].reader_group > 0);
- return reader_groups_map[ix].reader_group;
- }
- ix++;
- if (ix == reader_groups_map_size)
- ix = 0;
- } while (ix != start);
-
- erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
-}
-
-static void
-reader_group_insert(erts_reader_groups_map_t *map, int map_size,
- int logical, int reader_group)
-{
- int start = logical % map_size;
- int ix = start;
-
- do {
- if (map[ix].logical < 0) {
- map[ix].logical = logical;
- map[ix].reader_group = reader_group;
- return;
- }
- ix++;
- if (ix == map_size)
- ix = 0;
- } while (ix != start);
-
- erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
-}
-
-
-static int
-sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
-{
- int sub_level = level+1;
- int last = -1;
- rgc->sub_levels = 0;
-
- do {
- if (last != avail[aix].level[sub_level]) {
- rgc->sub_levels++;
- last = avail[aix].level[sub_level];
- }
- aix++;
- }
- while (aix < avail_sz && rgc->id == avail[aix].level[level]);
- rgc->reader_groups = 0;
- return aix;
-}
-
-static int
-write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
- int level, int a,
- int avail_sz, erts_avail_cput *avail)
-{
- int rg = *rgp;
- int sub_level = level+1;
- int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
- int xsl = rgcp->sub_levels % rgcp->reader_groups;
- int sls = 0;
- int last = -1;
- int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
-
- ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
-
- do {
- if (last != avail[a].level[sub_level]) {
- if (!sls) {
- sls = sl_per_gr;
- rg++;
- if (rg >= xsl_rg_lim)
- sls++;
- }
- last = avail[a].level[sub_level];
- sls--;
- }
- avail[a].level[ERTS_TOPOLOGY_RG] = rg;
- a++;
- } while (a < avail_sz && (level < 0
- || avail[a].level[level] == rgcp->id));
-
- ASSERT(rgcp->reader_groups == rg - *rgp);
-
- *rgp = rg;
-
- return a;
-}
-
-static int
-rg_count_sub_levels_compare(const void *vx, const void *vy)
-{
- erts_rg_count_t *x = (erts_rg_count_t *) vx;
- erts_rg_count_t *y = (erts_rg_count_t *) vy;
- if (x->sub_levels != y->sub_levels)
- return y->sub_levels - x->sub_levels;
- return x->id - y->id;
-}
-
-static int
-rg_count_id_compare(const void *vx, const void *vy)
-{
- erts_rg_count_t *x = (erts_rg_count_t *) vx;
- erts_rg_count_t *y = (erts_rg_count_t *) vy;
- return x->id - y->id;
-}
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test)
-{
- int i, spread_level, avail_sz;
- erts_avail_cput no, *avail;
- erts_cpu_topology_t *cpudata;
- erts_reader_groups_map_t *map;
- int map_sz;
- int groups = erts_reader_groups;
-
- if (test) {
- test->map = NULL;
- test->map_size = 0;
- groups = test->groups;
- }
-
- if (!groups)
- return;
-
- if (!test) {
- if (reader_groups_map)
- erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
-
- reader_groups_logical_processors = 0;
- reader_groups_map_size = 0;
- reader_groups_map = NULL;
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
-
- if (!cpudata)
- return;
-
- cpu_bind_order_sort(cpudata,
- avail_sz,
- ERTS_CPU_BIND_NO_SPREAD,
- 1);
-
- avail = erts_alloc(ERTS_ALC_T_TMP,
- sizeof(erts_avail_cput)*avail_sz);
-
- make_available_cpu_topology(&no, avail, cpudata,
- &avail_sz, test != NULL);
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- map_sz = avail_sz*2+1;
-
- if (test) {
- map = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_reader_groups_map_t)
- * map_sz));
- test->map = map;
- test->map_size = map_sz;
- test->logical_processors = avail_sz;
- }
- else {
- map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
- (sizeof(erts_reader_groups_map_t)
- * map_sz));
- reader_groups_map = map;
- reader_groups_logical_processors = avail_sz;
- reader_groups_map_size = map_sz;
-
- }
-
- for (i = 0; i < map_sz; i++) {
- map[i].logical = -1;
- map[i].reader_group = 0;
- }
-
- spread_level = ERTS_TOPOLOGY_CORE;
- for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
- if (no.level[i] > groups) {
- spread_level = i;
- break;
- }
- }
-
- if (no.level[spread_level] <= groups) {
- int a, rg, last = -1;
- rg = 0;
- ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
- for (a = 0; a < avail_sz; a++) {
- if (last != avail[a].level[spread_level]) {
- rg++;
- last = avail[a].level[spread_level];
- }
- reader_group_insert(map,
- map_sz,
- avail[a].level[ERTS_TOPOLOGY_LOGICAL],
- rg);
- }
- }
- else { /* groups < no.level[spread_level] */
- erts_rg_count_t *rg_count;
- int a, rg, tl, toplevels;
-
- tl = spread_level-1;
-
- if (spread_level == ERTS_TOPOLOGY_NODE)
- toplevels = 1;
- else
- toplevels = no.level[tl];
-
- rg_count = erts_alloc(ERTS_ALC_T_TMP,
- toplevels*sizeof(erts_rg_count_t));
-
- if (toplevels == 1) {
- rg_count[0].id = 0;
- rg_count[0].sub_levels = no.level[spread_level];
- rg_count[0].reader_groups = groups;
- }
- else {
- int rgs_per_tl, rgs;
- rgs = groups;
- rgs_per_tl = rgs / toplevels;
-
- a = 0;
- for (i = 0; i < toplevels; i++) {
- rg_count[i].id = avail[a].level[tl];
- a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
- }
-
- qsort(rg_count,
- toplevels,
- sizeof(erts_rg_count_t),
- rg_count_sub_levels_compare);
-
- for (i = 0; i < toplevels; i++) {
- if (rg_count[i].sub_levels < rgs_per_tl) {
- rg_count[i].reader_groups = rg_count[i].sub_levels;
- rgs -= rg_count[i].sub_levels;
- }
- else {
- rg_count[i].reader_groups = rgs_per_tl;
- rgs -= rgs_per_tl;
- }
- }
-
- while (rgs > 0) {
- for (i = 0; i < toplevels; i++) {
- if (rg_count[i].sub_levels == rg_count[i].reader_groups)
- break;
- else {
- rg_count[i].reader_groups++;
- if (--rgs == 0)
- break;
- }
- }
- }
-
- qsort(rg_count,
- toplevels,
- sizeof(erts_rg_count_t),
- rg_count_id_compare);
- }
-
- a = i = rg = 0;
- while (a < avail_sz) {
- a = write_reader_groups(&rg, &rg_count[i], tl,
- a, avail_sz, avail);
- i++;
- }
-
- ASSERT(groups == rg);
-
- for (a = 0; a < avail_sz; a++)
- reader_group_insert(map,
- map_sz,
- avail[a].level[ERTS_TOPOLOGY_LOGICAL],
- avail[a].level[ERTS_TOPOLOGY_RG]);
-
- erts_free(ERTS_ALC_T_TMP, rg_count);
- }
-
- erts_free(ERTS_ALC_T_TMP, avail);
-}
-
-/*
- * CPU topology
- */
-
-typedef struct {
- int *id;
- int used;
- int size;
-} ErtsCpuTopIdSeq;
-
-typedef struct {
- ErtsCpuTopIdSeq logical;
- ErtsCpuTopIdSeq thread;
- ErtsCpuTopIdSeq core;
- ErtsCpuTopIdSeq processor_node;
- ErtsCpuTopIdSeq processor;
- ErtsCpuTopIdSeq node;
-} ErtsCpuTopEntry;
-
-static void
-init_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- int size = 10;
- cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->logical.size = size;
- cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->thread.size = size;
- cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->core.size = size;
- cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor_node.size = size;
- cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor.size = size;
- cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->node.size = size;
-}
-
-static void
-destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
-}
-
-static int
-get_cput_value_or_range(int *v, int *vr, char **str)
-{
- long l;
- char *c = *str;
- errno = 0;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- *v = (int) l;
- if (*c == '-') {
- c++;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- *vr = (int) l;
- }
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
-{
- int ix = 0;
- int need_size = 0;
- char *c = *str;
-
- while (1) {
- int res;
- int val;
- int nids;
- int val_range = -1;
- res = get_cput_value_or_range(&val, &val_range, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- if (val_range < 0 || val_range == val)
- nids = 1;
- else {
- if (val_range > val)
- nids = val_range - val + 1;
- else
- nids = val - val_range + 1;
- }
- need_size += nids;
- if (need_size > idseq->size) {
- idseq->size = need_size + 10;
- idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
- idseq->id,
- sizeof(int)*idseq->size);
- }
- if (nids == 1)
- idseq->id[ix++] = val;
- else if (val_range > val) {
- for (; val <= val_range; val++)
- idseq->id[ix++] = val;
- }
- else {
- for (; val >= val_range; val--)
- idseq->id[ix++] = val;
- }
- if (*c != ',')
- break;
- c++;
- }
- *str = c;
- idseq->used = ix;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_entry(ErtsCpuTopEntry *cput, char **str)
-{
- int h;
- char *c = *str;
-
- cput->logical.used = 0;
- cput->thread.id[0] = 0;
- cput->thread.used = 1;
- cput->core.id[0] = 0;
- cput->core.used = 1;
- cput->processor_node.id[0] = -1;
- cput->processor_node.used = 1;
- cput->processor.id[0] = 0;
- cput->processor.used = 1;
- cput->node.id[0] = -1;
- cput->node.used = 1;
-
- h = ERTS_TOPOLOGY_MAX_DEPTH;
- while (*c != ':' && *c != '\0') {
- int res;
- ErtsCpuTopIdSeq *idseqp;
- switch (*c++) {
- case 'L':
- if (h <= ERTS_TOPOLOGY_LOGICAL)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->logical;
- h = ERTS_TOPOLOGY_LOGICAL;
- break;
- case 't':
- case 'T':
- if (h <= ERTS_TOPOLOGY_THREAD)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->thread;
- h = ERTS_TOPOLOGY_THREAD;
- break;
- case 'c':
- case 'C':
- if (h <= ERTS_TOPOLOGY_CORE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->core;
- h = ERTS_TOPOLOGY_CORE;
- break;
- case 'p':
- case 'P':
- if (h <= ERTS_TOPOLOGY_PROCESSOR)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor;
- h = ERTS_TOPOLOGY_PROCESSOR;
- break;
- case 'n':
- case 'N':
- if (h <= ERTS_TOPOLOGY_PROCESSOR) {
- do_node:
- if (h <= ERTS_TOPOLOGY_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->node;
- h = ERTS_TOPOLOGY_NODE;
- }
- else {
- int p_node = 0;
- char *p_chk = c;
- while (*p_chk != '\0' && *p_chk != ':') {
- if (*p_chk == 'p' || *p_chk == 'P') {
- p_node = 1;
- break;
- }
- p_chk++;
- }
- if (!p_node)
- goto do_node;
- if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor_node;
- h = ERTS_TOPOLOGY_PROCESSOR_NODE;
- }
- break;
- default:
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
- }
- res = get_cput_id_seq(idseqp, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- }
-
- if (cput->logical.used < 1)
- return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
-
- if (*c == ':') {
- c++;
- }
-
- if (cput->thread.used != 1
- && cput->thread.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->core.used != 1
- && cput->core.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor_node.used != 1
- && cput->processor_node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor.used != 1
- && cput->processor.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->node.used != 1
- && cput->node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-verify_topology(erts_cpu_topology_t *cpudata, int size)
-{
- if (size > 0) {
- int *logical;
- int node, processor, no_nodes, i;
-
- /* Verify logical ids */
- logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
-
- for (i = 0; i < size; i++)
- logical[i] = cpudata[i].logical;
-
- qsort(logical, size, sizeof(int), int_cmp);
- for (i = 0; i < size-1; i++) {
- if (logical[i] == logical[i+1]) {
- erts_free(ERTS_ALC_T_TMP, logical);
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
- }
- }
-
- erts_free(ERTS_ALC_T_TMP, logical);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
-
- /* Verify unique entities */
-
- for (i = 1; i < size; i++) {
- if (cpudata[i-1].processor == cpudata[i].processor
- && cpudata[i-1].node == cpudata[i].node
- && (cpudata[i-1].processor_node
- == cpudata[i].processor_node)
- && cpudata[i-1].core == cpudata[i].core
- && cpudata[i-1].thread == cpudata[i].thread) {
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
- }
- }
-
- /* Verify numa nodes */
- node = cpudata[0].node;
- processor = cpudata[0].processor;
- no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
- for (i = 1; i < size; i++) {
- if (no_nodes) {
- if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- else {
- if (cpudata[i].processor == processor && cpudata[i].node != node)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- node = cpudata[i].node;
- processor = cpudata[i].processor;
- if (node >= 0 && cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- if (node < 0 && cpudata[i].processor_node < 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- }
- }
-
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-int
-erts_init_cpu_topology(char *topology_str)
-{
- ErtsCpuTopEntry cput;
- int need_size;
- char *c;
- int ix;
- int error = ERTS_INIT_CPU_TOPOLOGY_OK;
-
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 10;
-
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
-
- init_cpu_top_entry(&cput);
-
- ix = 0;
- need_size = 0;
-
- c = topology_str;
- if (*c == '\0') {
- error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
- goto fail;
- }
- do {
- int r;
- error = get_cput_entry(&cput, &c);
- if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
- goto fail;
- need_size += cput.logical.used;
- if (user_cpudata_size < need_size) {
- user_cpudata_size = need_size + 10;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- ASSERT(cput.thread.used == 1
- || cput.thread.used == cput.logical.used);
- ASSERT(cput.core.used == 1
- || cput.core.used == cput.logical.used);
- ASSERT(cput.processor_node.used == 1
- || cput.processor_node.used == cput.logical.used);
- ASSERT(cput.processor.used == 1
- || cput.processor.used == cput.logical.used);
- ASSERT(cput.node.used == 1
- || cput.node.used == cput.logical.used);
-
- for (r = 0; r < cput.logical.used; r++) {
- user_cpudata[ix].logical = cput.logical.id[r];
- user_cpudata[ix].thread =
- cput.thread.id[cput.thread.used == 1 ? 0 : r];
- user_cpudata[ix].core =
- cput.core.id[cput.core.used == 1 ? 0 : r];
- user_cpudata[ix].processor_node =
- cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
- user_cpudata[ix].processor =
- cput.processor.id[cput.processor.used == 1 ? 0 : r];
- user_cpudata[ix].node =
- cput.node.id[cput.node.used == 1 ? 0 : r];
- ix++;
- }
- } while (*c != '\0');
-
- if (user_cpudata_size != ix) {
- user_cpudata_size = ix;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- error = verify_topology(user_cpudata, user_cpudata_size);
- if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
- destroy_cpu_top_entry(&cput);
- return ERTS_INIT_CPU_TOPOLOGY_OK;
- }
-
- fail:
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 0;
- destroy_cpu_top_entry(&cput);
- return error;
-}
-
-#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
-#define ERTS_GET_USED_CPU_TOPOLOGY 0
-#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
-#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
-
-static Eterm get_cpu_topology_term(Process *c_p, int type);
-
-Eterm
-erts_set_cpu_topology(Process *c_p, Eterm term)
-{
- erts_cpu_topology_t *cpudata = NULL;
- int cpudata_size = 0;
- Eterm res;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
- if (term == am_undefined) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
- cpudata_size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
- }
- else if (is_not_list(term)) {
- error:
- res = THE_NON_VALUE;
- goto done;
- }
- else {
- Eterm list = term;
- int ix = 0;
-
- cpudata_size = 100;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- while (is_list(list)) {
- Eterm *lp = list_val(list);
- Eterm cpu = CAR(lp);
- Eterm* tp;
- Sint id;
-
- if (is_not_tuple(cpu))
- goto error;
-
- tp = tuple_val(cpu);
-
- if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
- goto error;
-
- if (ix >= cpudata_size) {
- cpudata_size += 100;
- cpudata = erts_realloc(ERTS_ALC_T_TMP,
- cpudata,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
- }
-
- id = signed_val(tp[2]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].node = (int) id;
-
- id = signed_val(tp[3]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor = (int) id;
-
- id = signed_val(tp[4]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor_node = (int) id;
-
- id = signed_val(tp[5]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].core = (int) id;
-
- id = signed_val(tp[6]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].thread = (int) id;
-
- id = signed_val(tp[7]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].logical = (int) id;
-
- list = CDR(lp);
- ix++;
- }
-
- if (is_not_nil(list))
- goto error;
-
- cpudata_size = ix;
-
- if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
- goto error;
-
- if (user_cpudata_size != cpudata_size) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- user_cpudata_size = cpudata_size;
- }
-
- sys_memcpy((void *) user_cpudata,
- (void *) cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
-
- make_reader_groups_map(NULL);
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- done:
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-static Eterm
-bound_schedulers_term(ErtsCpuBindOrder order)
-{
- switch (order) {
- case ERTS_CPU_BIND_SPREAD: {
- ERTS_DECL_AM(spread);
- return AM_spread;
- }
- case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(processor_spread);
- return AM_processor_spread;
- }
- case ERTS_CPU_BIND_THREAD_SPREAD: {
- ERTS_DECL_AM(thread_spread);
- return AM_thread_spread;
- }
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(thread_no_node_processor_spread);
- return AM_thread_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(no_node_processor_spread);
- return AM_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
- ERTS_DECL_AM(no_node_thread_spread);
- return AM_no_node_thread_spread;
- }
- case ERTS_CPU_BIND_NO_SPREAD: {
- ERTS_DECL_AM(no_spread);
- return AM_no_spread;
- }
- case ERTS_CPU_BIND_NONE: {
- ERTS_DECL_AM(unbound);
- return AM_unbound;
- }
- default:
- ASSERT(0);
- return THE_NON_VALUE;
- }
-}
-
-Eterm
-erts_bound_schedulers_term(Process *c_p)
-{
- ErtsCpuBindOrder order;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- order = cpu_bind_order;
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return bound_schedulers_term(order);
-}
-
-static void
-create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
-{
- if (user_cpudata) {
- *cpudata_size = user_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) user_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else if (system_cpudata) {
- *cpudata_size = system_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else {
- *cpudata = NULL;
- *cpudata_size = 0;
- }
-}
-
-static void
-destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
-{
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-}
-
-Eterm
-erts_bind_schedulers(Process *c_p, Eterm how)
-{
- Eterm res;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- ErtsCpuBindOrder old_cpu_bind_order;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) {
- ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
- }
- else {
-
- old_cpu_bind_order = cpu_bind_order;
-
- if (ERTS_IS_ATOM_STR("default_bind", how))
- cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (ERTS_IS_ATOM_STR("spread", how))
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-
- if (!cpudata) {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- res = bound_schedulers_term(old_cpu_bind_order);
- }
-
- done:
-
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- return res;
-}
-
-Eterm
-erts_fake_scheduler_bindings(Process *p, Eterm how)
-{
- ErtsCpuBindOrder fake_cpu_bind_order;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- Eterm res;
-
- if (ERTS_IS_ATOM_STR("default_bind", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (ERTS_IS_ATOM_STR("spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- ERTS_BIF_PREP_ERROR(res, p, BADARG);
- return res;
- }
-
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-
- if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
- ERTS_BIF_PREP_RET(res, am_false);
- else {
- int i;
- Eterm *hp;
-
- cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
-
-#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
-
- erts_fprintf(stderr, "node: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].node);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "processor: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor);
- erts_fprintf(stderr, "\n");
- if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
- erts_fprintf(stderr, "processor_node:");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
- erts_fprintf(stderr, "\n");
- }
- erts_fprintf(stderr, "core: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].core);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "thread: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].thread);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "logical: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].logical);
- erts_fprintf(stderr, "\n");
-#endif
-
- hp = HAlloc(p, cpudata_size+1);
- ERTS_BIF_PREP_RET(res, make_tuple(hp));
- *hp++ = make_arityval((Uint) cpudata_size);
- for (i = 0; i < cpudata_size; i++)
- *hp++ = make_small((Uint) cpudata[i].logical);
- }
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_schedulers_binds(Process *c_p)
-{
- int ix;
- ERTS_DECL_AM(unbound);
- Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
- Eterm res = make_tuple(hp);
-
- *(hp++) = make_arityval(erts_no_schedulers);
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- for (ix = 1; ix <= erts_no_schedulers; ix++)
- *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
- ? make_small(scheduler2cpu_map[ix].bound_id)
- : AM_unbound);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static Eterm
-bld_topology_term(Eterm **hpp,
- Uint *hszp,
- erts_cpu_topology_t *cpudata,
- int size)
-{
- Eterm res = NIL;
- int i;
-
- if (size == 0)
- return am_undefined;
-
- for (i = size-1; i >= 0; i--) {
- res = erts_bld_cons(hpp,
- hszp,
- erts_bld_tuple(hpp,
- hszp,
- 7,
- am_cpu,
- make_small(cpudata[i].node),
- make_small(cpudata[i].processor),
- make_small(cpudata[i].processor_node),
- make_small(cpudata[i].core),
- make_small(cpudata[i].thread),
- make_small(cpudata[i].logical)),
- res);
- }
- return res;
-}
-
-static Eterm
-get_cpu_topology_term(Process *c_p, int type)
-{
-#ifdef DEBUG
- Eterm *hp_end;
-#endif
- Eterm *hp;
- Uint hsz;
- Eterm res = THE_NON_VALUE;
- erts_cpu_topology_t *cpudata = NULL;
- int size = 0;
-
- switch (type) {
- case ERTS_GET_USED_CPU_TOPOLOGY:
- if (user_cpudata)
- goto defined;
- else
- goto detected;
- case ERTS_GET_DETECTED_CPU_TOPOLOGY:
- detected:
- if (!system_cpudata)
- res = am_undefined;
- else {
- size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * size));
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*size);
- }
- break;
- case ERTS_GET_DEFINED_CPU_TOPOLOGY:
- defined:
- if (!user_cpudata)
- res = am_undefined;
- else {
- size = user_cpudata_size;
- cpudata = user_cpudata;
- }
- break;
- default:
- erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
- break;
- }
-
- if (res == am_undefined) {
- ASSERT(!cpudata);
- return res;
- }
-
- hsz = 0;
-
- bld_topology_term(NULL, &hsz,
- cpudata, size);
-
- hp = HAlloc(c_p, hsz);
-
-#ifdef DEBUG
- hp_end = hp + hsz;
-#endif
-
- res = bld_topology_term(&hp, NULL,
- cpudata, size);
-
- ASSERT(hp_end == hp);
-
- if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_cpu_topology_term(Process *c_p, Eterm which)
-{
- Eterm res;
- int type;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- if (ERTS_IS_ATOM_STR("used", which))
- type = ERTS_GET_USED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("detected", which))
- type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("defined", which))
- type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
- else
- type = ERTS_GET_CPU_TOPOLOGY_ERROR;
- if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
- res = THE_NON_VALUE;
- else
- res = get_cpu_topology_term(c_p, type);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static void
-early_cpu_bind_init(void)
-{
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
- system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * system_cpudata_size));
-
- cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
-
- reader_groups_available_cpu_check = 1;
- reader_groups_logical_processors = 0;
- reader_groups_map_size = 0;
- reader_groups_map = NULL;
-
- if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
- || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
- system_cpudata_size)) {
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
- system_cpudata = NULL;
- system_cpudata_size = 0;
- }
-}
-
-static void
-late_cpu_bind_init(void)
-{
- int ix;
-
- erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind");
-
- scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(ErtsCpuBindData)
- * (erts_no_schedulers+1)));
- for (ix = 1; ix <= erts_no_schedulers; ix++) {
- scheduler2cpu_map[ix].bind_id = -1;
- scheduler2cpu_map[ix].bound_id = -1;
- }
-
- if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
- int ncpus = erts_get_cpu_configured(erts_cpuinfo);
- if (ncpus < 1 || erts_no_schedulers < ncpus)
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else
- cpu_bind_order = ((system_cpudata || user_cpudata)
- && (erts_bind_to_cpu(erts_cpuinfo, -1) != -ENOTSUP)
- ? ERTS_CPU_BIND_DEFAULT_BIND
- : ERTS_CPU_BIND_NONE);
- }
-
- make_reader_groups_map(NULL);
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- signal_schedulers_bind_change(cpudata, cpudata_size);
- destroy_tmp_cpu_topology_copy(cpudata);
- }
-}
-
-int
-erts_update_cpu_info(void)
-{
- int changed;
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- changed = erts_cpu_info_update(erts_cpuinfo);
- if (changed) {
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
-
- if (system_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
-
- system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
- if (!system_cpudata_size)
- system_cpudata = NULL;
- else {
- system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * system_cpudata_size));
-
- if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
- || (ERTS_INIT_CPU_TOPOLOGY_OK
- != verify_topology(system_cpudata,
- system_cpudata_size))) {
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
- system_cpudata = NULL;
- system_cpudata_size = 0;
- }
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- signal_schedulers_bind_change(cpudata, cpudata_size);
- destroy_tmp_cpu_topology_copy(cpudata);
- }
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
- return changed;
-}
-
#ifdef ERTS_SMP
static void
@@ -7069,7 +5087,7 @@ Process *schedule(Process *p, int calls)
}
if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND)
|| erts_smp_atomic_read(&esdp->chk_cpu_bind)) {
- check_cpu_bind(esdp);
+ erts_sched_check_cpu_bind(esdp);
}
}
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 4365e409e5..c038e57b65 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -89,7 +89,6 @@ extern int erts_sched_thread_suggested_stack_size;
#define ERTS_SCHED_THREAD_MAX_STACK_SIZE 8192 /* Kilo words */
#ifdef ERTS_SMP
-extern Uint erts_max_main_threads;
#include "erl_bits.h"
#endif
@@ -426,6 +425,13 @@ struct ErtsSchedulerData_ {
#endif
};
+typedef union {
+ ErtsSchedulerData esd;
+ char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
+} ErtsAlignedSchedulerData;
+
+extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+
#ifndef ERTS_SMP
extern ErtsSchedulerData *erts_scheduler_data;
#endif
@@ -1007,27 +1013,12 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags;
(p)->flags &= ~F_TIMO; \
} while (0)
-
-#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0
-#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3
-
-int erts_init_scheduler_bind_type(char *how);
-
-#define ERTS_INIT_CPU_TOPOLOGY_OK 0
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9
-
-int erts_init_cpu_topology(char *topology_str);
-int erts_update_cpu_info(void);
+#define ERTS_RUNQ_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \
+ &erts_aligned_run_queues[(IX)].runq)
+#define ERTS_SCHEDULER_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
+ &erts_aligned_scheduler_data[(IX)].esd)
void erts_pre_init_process(void);
void erts_late_init_process(void);
@@ -1058,8 +1049,9 @@ Eterm erts_multi_scheduling_blockers(Process *);
void erts_start_schedulers(void);
void erts_smp_notify_check_children_needed(void);
#endif
+void erts_sched_notify_check_cpu_bind(void);
Uint erts_active_schedulers(void);
-void erts_init_process(void);
+void erts_init_process(int);
Eterm erts_process_status(Process *, ErtsProcLocks, Process *, Eterm);
Uint erts_run_queues_len(Uint *);
void erts_add_to_runq(Process *);
diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c
index a4d12139e9..1bebcdb911 100644
--- a/erts/emulator/beam/erl_process_lock.c
+++ b/erts/emulator/beam/erl_process_lock.c
@@ -117,10 +117,9 @@ static int aux_thr_proc_lock_spin_count;
static void cleanup_tse(void);
void
-erts_init_proc_lock(void)
+erts_init_proc_lock(int cpus)
{
int i;
- int cpus;
erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc");
for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) {
#ifdef ERTS_ENABLE_LOCK_COUNT
@@ -138,7 +137,6 @@ erts_init_proc_lock(void)
lc_id.proc_lock_msgq = erts_lc_get_lock_order_id("proc_msgq");
lc_id.proc_lock_status = erts_lc_get_lock_order_id("proc_status");
#endif
- cpus = erts_get_cpu_configured(erts_cpuinfo);
if (cpus > 1) {
proc_lock_spin_count = ERTS_PROC_LOCK_SPIN_COUNT_BASE;
proc_lock_spin_count += (ERTS_PROC_LOCK_SPIN_COUNT_SCHED_INC
diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h
index 7cfc9893fa..4fe30c7209 100644
--- a/erts/emulator/beam/erl_process_lock.h
+++ b/erts/emulator/beam/erl_process_lock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2007-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2007-2010. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -334,7 +334,7 @@ erts_proc_lock_flags_cmpxchg(erts_proc_lock_t *lck, ErtsProcLocks new,
extern erts_pix_lock_t erts_pix_locks[ERTS_NO_OF_PIX_LOCKS];
-void erts_init_proc_lock(void);
+void erts_init_proc_lock(int cpus);
void erts_proc_lock_prepare_proc_lock_waiter(void);
void erts_proc_lock_failed(Process *,
erts_pix_lock_t *,
diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h
index 0b7269262e..a74cf79b8c 100644
--- a/erts/emulator/beam/erl_threads.h
+++ b/erts/emulator/beam/erl_threads.h
@@ -27,9 +27,6 @@
#define ERTS_SPIN_BODY ETHR_SPIN_BODY
-#define ERTS_MAX_READER_GROUPS 8
-extern int erts_reader_groups;
-
#include "sys.h"
#ifdef USE_THREADS
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 280421952e..12536f6cde 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -1499,7 +1499,7 @@ erts_cmp_timeval(SysTimeval *t1p, SysTimeval *t2p)
#endif
#ifdef DEBUG
-void p_slpq(_VOID_);
+void p_slpq(void);
#endif
/* utils.c */
@@ -1728,11 +1728,6 @@ Uint erts_current_reductions(Process* current, Process *p);
int erts_print_system_version(int to, void *arg, Process *c_p);
-/*
- * Interface to erl_init
- */
-void erl_init(void);
-
#define seq_trace_output(token, msg, type, receiver, process) \
seq_trace_output_generic((token), (msg), (type), (receiver), (process), NIL)
#define seq_trace_output_exit(token, msg, type, receiver, exitfrom) \
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 0031568af6..93203a08a9 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -466,8 +466,6 @@ static const int zero_value = 0, one_value = 1;
# endif /* !__WIN32__ */
#endif /* WANT_NONBLOCKING */
-extern erts_cpu_info_t *erts_cpuinfo; /* erl_init.c */
-
__decl_noreturn void __noreturn erl_exit(int n, char*, ...);
/* Some special erl_exit() codes: */
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index af4ab693dc..01ba773688 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -75,6 +75,7 @@ static erts_smp_rwmtx_t environ_rwmtx;
#include "erl_sys_driver.h"
#include "erl_check_io.h"
+#include "erl_cpu_topology.h"
#ifndef DISABLE_VFORK
#define DISABLE_VFORK 0
@@ -399,7 +400,7 @@ typedef struct {
#ifdef ERTS_THR_HAVE_SIG_FUNCS
sigset_t saved_sigmask;
#endif
- int unbind_child;
+ int sched_bind_data;
} erts_thr_create_data_t;
/*
@@ -410,15 +411,13 @@ static void *
thr_create_prepare(void)
{
erts_thr_create_data_t *tcdp;
- ErtsSchedulerData *esdp;
tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
#ifdef ERTS_THR_HAVE_SIG_FUNCS
erts_thr_sigmask(SIG_BLOCK, &thr_create_sigmask, &tcdp->saved_sigmask);
#endif
- esdp = erts_get_scheduler_data();
- tcdp->unbind_child = esdp && erts_is_scheduler_bound(esdp);
+ tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
return (void *) tcdp;
}
@@ -430,6 +429,8 @@ thr_create_cleanup(void *vtcdp)
{
erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+ erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
#ifdef ERTS_THR_HAVE_SIG_FUNCS
/* Restore signalmask... */
erts_thr_sigmask(SIG_SETMASK, &tcdp->saved_sigmask, NULL);
@@ -456,12 +457,7 @@ thr_create_prepare_child(void *vtcdp)
erts_thread_disable_fpe();
#endif
- if (tcdp->unbind_child) {
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- erts_unbind_from_cpu(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- }
-
+ erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
}
#endif /* #ifdef USE_THREADS */
@@ -1461,9 +1457,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
CHLD_STAT_LOCK;
- unbind = erts_is_scheduler_bound(NULL);
- if (unbind)
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
+ unbind = erts_sched_bind_atfork_prepare();
#if !DISABLE_VFORK
/* See fork/vfork discussion before this function. */
@@ -1476,7 +1470,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
if (pid == 0) {
/* The child! Setup child... */
- if (unbind && erts_unbind_from_cpu(erts_cpuinfo) != 0)
+ if (erts_sched_bind_atfork_child(unbind) != 0)
goto child_error;
/* OBSERVE!
@@ -1577,8 +1571,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog;
cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : ".";
- cs_argv[CS_ARGV_UNBIND_IX]
- = (unbind ? erts_get_unbind_from_cpu_str(erts_cpuinfo) : "false");
+ cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind);
cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range;
for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++)
cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0];
@@ -1627,8 +1620,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
}
#endif
- if (unbind)
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_sched_bind_atfork_parent(unbind);
if (pid == -1) {
saved_errno = errno;
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 15d4cd7361..d24347b3aa 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -31,7 +31,7 @@
#include "global.h"
#include "erl_threads.h"
#include "../../drivers/win32/win_con.h"
-
+#include "erl_cpu_topology.h"
void erts_sys_init_float(void);
@@ -2973,13 +2973,50 @@ check_supported_os_version(void)
}
#ifdef USE_THREADS
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
+typedef struct {
+ int sched_bind_data;
+} erts_thr_create_data_t;
+
+/*
+ * thr_create_prepare() is called in parent thread before thread creation.
+ * Returned value is passed as argument to thr_create_cleanup().
+ */
+static void *
+thr_create_prepare(void)
+{
+ erts_thr_create_data_t *tcdp;
+
+ tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
+ tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
+
+ return (void *) tcdp;
+}
+
+
+/* thr_create_cleanup() is called in parent thread after thread creation. */
+static void
+thr_create_cleanup(void *vtcdp)
+{
+ erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+ erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
+ erts_free(ERTS_ALC_T_TMP, tcdp);
+}
+
static void
thr_create_prepare_child(void *vtcdp)
{
+ erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_thread_setup();
-}
#endif /* ERTS_ENABLE_LOCK_COUNT */
+
+ erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
+}
+
#endif /* USE_THREADS */
void
@@ -2991,9 +3028,13 @@ erts_sys_pre_init(void)
#ifdef USE_THREADS
{
erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
eid.thread_create_child_func = thr_create_prepare_child;
-#endif
+ /* Before creation in parent */
+ eid.thread_create_prepare_func = thr_create_prepare;
+ /* After creation in parent */
+ eid.thread_create_parent_func = thr_create_cleanup,
+
erts_thr_init(&eid);
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_init();