/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2010-2011. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
* compliance with the License. You should have received a copy of the
* Erlang Public License along with this software. If not, it can be
* retrieved online at http://www.erlang.org/.
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* %CopyrightEnd%
*/
/*
* Description: CPU topology and related functionality
*
* Author: Rickard Green
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <ctype.h>
#include "global.h"
#include "error.h"
#include "bif.h"
#include "erl_cpu_topology.h"
#define ERTS_MAX_READER_GROUPS 8
/*
* Cpu topology hierarchy.
*/
#define ERTS_TOPOLOGY_NODE 0
#define ERTS_TOPOLOGY_PROCESSOR 1
#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
#define ERTS_TOPOLOGY_CORE 3
#define ERTS_TOPOLOGY_THREAD 4
#define ERTS_TOPOLOGY_LOGICAL 5
#define ERTS_TOPOLOGY_MAX_DEPTH 6
typedef struct {
int bind_id;
int bound_id;
} ErtsCpuBindData;
static erts_cpu_info_t *cpuinfo;
static int max_main_threads;
static int reader_groups;
static ErtsCpuBindData *scheduler2cpu_map;
static erts_smp_rwmtx_t cpuinfo_rwmtx;
typedef enum {
ERTS_CPU_BIND_UNDEFINED,
ERTS_CPU_BIND_SPREAD,
ERTS_CPU_BIND_PROCESSOR_SPREAD,
ERTS_CPU_BIND_THREAD_SPREAD,
ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
ERTS_CPU_BIND_NO_SPREAD,
ERTS_CPU_BIND_NONE
} ErtsCpuBindOrder;
#define ERTS_CPU_BIND_DEFAULT_BIND \
ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
static int no_cpu_groups_callbacks;
static ErtsCpuBindOrder cpu_bind_order;
static erts_cpu_topology_t *user_cpudata;
static int user_cpudata_size;
static erts_cpu_topology_t *system_cpudata;
static int system_cpudata_size;
typedef struct {
int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
} erts_avail_cput;
typedef struct {
int id;
int sub_levels;
int cpu_groups;
} erts_cpu_groups_count_t;
typedef struct {
int logical;
int cpu_group;
} erts_cpu_groups_map_array_t;
typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
struct erts_cpu_groups_callback_list_t_ {
erts_cpu_groups_callback_list_t *next;
erts_cpu_groups_callback_t callback;
void *arg;
};
typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
struct erts_cpu_groups_map_t_ {
erts_cpu_groups_map_t *next;
int groups;
erts_cpu_groups_map_array_t *array;
int size;
int logical_processors;
erts_cpu_groups_callback_list_t *callback_list;
};
typedef struct {
erts_cpu_groups_callback_t callback;
int ix;
void *arg;
} erts_cpu_groups_callback_call_t;
static erts_cpu_groups_map_t *cpu_groups_maps;
static erts_cpu_groups_map_t *reader_groups_map;
#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
#ifdef ERTS_SMP
static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
int size,
ErtsCpuBindOrder bind_order,
int mk_seq);
static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
#endif
static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
static erts_cpu_groups_map_t *add_cpu_groups(int groups,
erts_cpu_groups_callback_t callback,
void *arg);
static void update_cpu_groups_maps(void);
static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
ErtsSchedulerData *esdp);
static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
int *cpudata_size);
static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
static int
int_cmp(const void *vx, const void *vy)
{
return *((int *) vx) - *((int *) vy);
}
static int
cpu_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->core != y->core)
return x->core - y->core;
if (x->processor_node != y->processor_node)
return x->processor_node - y->processor_node;
if (x->processor != y->processor)
return x->processor - y->processor;
if (x->node != y->node)
return x->node - y->node;
return 0;
}
static int
cpu_processor_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->processor_node != y->processor_node)
return x->processor_node - y->processor_node;
if (x->core != y->core)
return x->core - y->core;
if (x->node != y->node)
return x->node - y->node;
if (x->processor != y->processor)
return x->processor - y->processor;
return 0;
}
static int
cpu_thread_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->node != y->node)
return x->node - y->node;
if (x->processor != y->processor)
return x->processor - y->processor;
if (x->processor_node != y->processor_node)
return x->processor_node - y->processor_node;
if (x->core != y->core)
return x->core - y->core;
return 0;
}
static int
cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->node != y->node)
return x->node - y->node;
if (x->core != y->core)
return x->core - y->core;
if (x->processor != y->processor)
return x->processor - y->processor;
return 0;
}
static int
cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->node != y->node)
return x->node - y->node;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->core != y->core)
return x->core - y->core;
if (x->processor != y->processor)
return x->processor - y->processor;
return 0;
}
static int
cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->node != y->node)
return x->node - y->node;
if (x->thread != y->thread)
return x->thread - y->thread;
if (x->processor != y->processor)
return x->processor - y->processor;
if (x->core != y->core)
return x->core - y->core;
return 0;
}
static int
cpu_no_spread_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->node != y->node)
return x->node - y->node;
if (x->processor != y->processor)
return x->processor - y->processor;
if (x->processor_node != y->processor_node)
return x->processor_node - y->processor_node;
if (x->core != y->core)
return x->core - y->core;
if (x->thread != y->thread)
return x->thread - y->thread;
return 0;
}
static ERTS_INLINE void
make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
{
int ix;
int node = -1;
int processor = -1;
int processor_node = -1;
int processor_node_node = -1;
int core = -1;
int thread = -1;
int old_node = -1;
int old_processor = -1;
int old_processor_node = -1;
int old_core = -1;
int old_thread = -1;
for (ix = 0; ix < size; ix++) {
if (!no_node || cpudata[ix].node >= 0) {
if (old_node == cpudata[ix].node)
cpudata[ix].node = node;
else {
old_node = cpudata[ix].node;
old_processor = processor = -1;
if (!no_node)
old_processor_node = processor_node = -1;
old_core = core = -1;
old_thread = thread = -1;
if (no_node || cpudata[ix].node >= 0)
cpudata[ix].node = ++node;
}
}
if (old_processor == cpudata[ix].processor)
cpudata[ix].processor = processor;
else {
old_processor = cpudata[ix].processor;
if (!no_node)
processor_node_node = old_processor_node = processor_node = -1;
old_core = core = -1;
old_thread = thread = -1;
cpudata[ix].processor = ++processor;
}
if (no_node && cpudata[ix].processor_node < 0)
old_processor_node = -1;
else {
if (old_processor_node == cpudata[ix].processor_node) {
if (no_node)
cpudata[ix].node = cpudata[ix].processor_node = node;
else {
if (processor_node_node >= 0)
cpudata[ix].node = processor_node_node;
cpudata[ix].processor_node = processor_node;
}
}
else {
old_processor_node = cpudata[ix].processor_node;
old_core = core = -1;
old_thread = thread = -1;
if (no_node)
cpudata[ix].node = cpudata[ix].processor_node = ++node;
else {
cpudata[ix].node = processor_node_node = ++node;
cpudata[ix].processor_node = ++processor_node;
}
}
}
if (!no_node && cpudata[ix].processor_node < 0)
cpudata[ix].processor_node = 0;
if (old_core == cpudata[ix].core)
cpudata[ix].core = core;
else {
old_core = cpudata[ix].core;
old_thread = thread = -1;
cpudata[ix].core = ++core;
}
if (old_thread == cpudata[ix].thread)
cpudata[ix].thread = thread;
else
old_thread = cpudata[ix].thread = ++thread;
}
}
static void
cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
int size,
ErtsCpuBindOrder bind_order,
int mk_seq)
{
if (size > 1) {
int no_node = 0;
int (*cmp_func)(const void *, const void *);
switch (bind_order) {
case ERTS_CPU_BIND_SPREAD:
cmp_func = cpu_spread_order_cmp;
break;
case ERTS_CPU_BIND_PROCESSOR_SPREAD:
cmp_func = cpu_processor_spread_order_cmp;
break;
case ERTS_CPU_BIND_THREAD_SPREAD:
cmp_func = cpu_thread_spread_order_cmp;
break;
case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
no_node = 1;
cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
break;
case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
no_node = 1;
cmp_func = cpu_no_node_processor_spread_order_cmp;
break;
case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
no_node = 1;
cmp_func = cpu_no_node_thread_spread_order_cmp;
break;
case ERTS_CPU_BIND_NO_SPREAD:
cmp_func = cpu_no_spread_order_cmp;
break;
default:
cmp_func = NULL;
erl_exit(ERTS_ABORT_EXIT,
"Bad cpu bind type: %d\n",
(int) cpu_bind_order);
break;
}
if (mk_seq)
make_cpudata_id_seq(cpudata, size, no_node);
qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
}
}
static int
processor_order_cmp(const void *vx, const void *vy)
{
erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
if (x->processor != y->processor)
return x->processor - y->processor;
if (x->node != y->node)
return x->node - y->node;
if (x->processor_node != y->processor_node)
return x->processor_node - y->processor_node;
if (x->core != y->core)
return x->core - y->core;
if (x->thread != y->thread)
return x->thread - y->thread;
return 0;
}
#ifdef ERTS_SMP
void
erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
{
erts_cpu_groups_map_t *cgm;
erts_cpu_groups_callback_list_t *cgcl;
erts_cpu_groups_callback_call_t *cgcc;
int cgcc_ix;
/* Unbind from cpu */
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
if (scheduler2cpu_map[esdp->no].bound_id >= 0
&& erts_unbind_from_cpu(cpuinfo) == 0) {
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
}
cgcc = erts_alloc(ERTS_ALC_T_TMP,
(no_cpu_groups_callbacks
* sizeof(erts_cpu_groups_callback_call_t)));
cgcc_ix = 0;
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
cgcc[cgcc_ix].callback = cgcl->callback;
cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
cgcc[cgcc_ix].arg = cgcl->arg;
cgcc_ix++;
}
}
ASSERT(no_cpu_groups_callbacks == cgcc_ix);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
cgcc[cgcc_ix].callback(1,
esdp,
cgcc[cgcc_ix].ix,
cgcc[cgcc_ix].arg);
erts_free(ERTS_ALC_T_TMP, cgcc);
if (esdp->no <= max_main_threads)
erts_thr_set_main_status(0, 0);
}
void
erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
{
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
if (esdp->no <= max_main_threads)
erts_thr_set_main_status(1, (int) esdp->no);
/* Make sure we check if we should bind to a cpu or not... */
if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
erts_smp_atomic32_set(&esdp->chk_cpu_bind, 1);
else
esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
}
#endif
void
erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
{
int res, cpu_id, cgcc_ix;
erts_cpu_groups_map_t *cgm;
erts_cpu_groups_callback_list_t *cgcl;
erts_cpu_groups_callback_call_t *cgcc;
#ifdef ERTS_SMP
if (erts_common_run_queue)
erts_smp_atomic32_set(&esdp->chk_cpu_bind, 0);
else {
esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
}
#endif
erts_smp_runq_unlock(esdp->run_queue);
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
cpu_id = scheduler2cpu_map[esdp->no].bind_id;
if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
res = erts_bind_to_cpu(cpuinfo, cpu_id);
if (res == 0)
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
else {
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
(int) esdp->no, cpu_id, erl_errno_id(-res));
erts_send_error_to_logger_nogl(dsbufp);
if (scheduler2cpu_map[esdp->no].bound_id >= 0)
goto unbind;
}
}
else if (cpu_id < 0) {
unbind:
/* Get rid of old binding */
res = erts_unbind_from_cpu(cpuinfo);
if (res == 0)
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
else if (res != -ENOTSUP) {
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
(int) esdp->no, cpu_id, erl_errno_id(-res));
erts_send_error_to_logger_nogl(dsbufp);
}
}
cgcc = erts_alloc(ERTS_ALC_T_TMP,
(no_cpu_groups_callbacks
* sizeof(erts_cpu_groups_callback_call_t)));
cgcc_ix = 0;
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
cgcc[cgcc_ix].callback = cgcl->callback;
cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
cgcc[cgcc_ix].arg = cgcl->arg;
cgcc_ix++;
}
}
ASSERT(no_cpu_groups_callbacks == cgcc_ix);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
cgcc[cgcc_ix].callback(0,
esdp,
cgcc[cgcc_ix].ix,
cgcc[cgcc_ix].arg);
erts_free(ERTS_ALC_T_TMP, cgcc);
erts_smp_runq_lock(esdp->run_queue);
}
#ifdef ERTS_SMP
void
erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
{
int cgcc_ix;
erts_cpu_groups_map_t *cgm;
erts_cpu_groups_callback_list_t *cgcl;
erts_cpu_groups_callback_call_t *cgcc;
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
cgcc = erts_alloc(ERTS_ALC_T_TMP,
(no_cpu_groups_callbacks
* sizeof(erts_cpu_groups_callback_call_t)));
cgcc_ix = 0;
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
cgcc[cgcc_ix].callback = cgcl->callback;
cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
cgcc[cgcc_ix].arg = cgcl->arg;
cgcc_ix++;
}
}
ASSERT(no_cpu_groups_callbacks == cgcc_ix);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
cgcc[cgcc_ix].callback(0,
esdp,
cgcc[cgcc_ix].ix,
cgcc[cgcc_ix].arg);
erts_free(ERTS_ALC_T_TMP, cgcc);
if (esdp->no <= max_main_threads)
erts_thr_set_main_status(1, (int) esdp->no);
}
#endif
static void
write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
{
int s_ix = 1;
int cpu_ix;
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
}
if (s_ix <= erts_no_schedulers)
for (; s_ix <= erts_no_schedulers; s_ix++)
scheduler2cpu_map[s_ix].bind_id = -1;
}
int
erts_init_scheduler_bind_type_string(char *how)
{
if (sys_strcmp(how, "u") == 0)
cpu_bind_order = ERTS_CPU_BIND_NONE;
else if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
else if (!system_cpudata && !user_cpudata)
return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
else if (sys_strcmp(how, "db") == 0)
cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
else if (sys_strcmp(how, "s") == 0)
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (sys_strcmp(how, "ps") == 0)
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (sys_strcmp(how, "ts") == 0)
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
else if (sys_strcmp(how, "tnnps") == 0)
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (sys_strcmp(how, "nnps") == 0)
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
else if (sys_strcmp(how, "nnts") == 0)
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
else if (sys_strcmp(how, "ns") == 0)
cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
else
return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
}
static Eterm
bound_schedulers_term(ErtsCpuBindOrder order)
{
switch (order) {
case ERTS_CPU_BIND_SPREAD: {
ERTS_DECL_AM(spread);
return AM_spread;
}
case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
ERTS_DECL_AM(processor_spread);
return AM_processor_spread;
}
case ERTS_CPU_BIND_THREAD_SPREAD: {
ERTS_DECL_AM(thread_spread);
return AM_thread_spread;
}
case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
ERTS_DECL_AM(thread_no_node_processor_spread);
return AM_thread_no_node_processor_spread;
}
case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
ERTS_DECL_AM(no_node_processor_spread);
return AM_no_node_processor_spread;
}
case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
ERTS_DECL_AM(no_node_thread_spread);
return AM_no_node_thread_spread;
}
case ERTS_CPU_BIND_NO_SPREAD: {
ERTS_DECL_AM(no_spread);
return AM_no_spread;
}
case ERTS_CPU_BIND_NONE: {
ERTS_DECL_AM(unbound);
return AM_unbound;
}
default:
ASSERT(0);
return THE_NON_VALUE;
}
}
Eterm
erts_bound_schedulers_term(Process *c_p)
{
ErtsCpuBindOrder order;
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
order = cpu_bind_order;
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
return bound_schedulers_term(order);
}
Eterm
erts_bind_schedulers(Process *c_p, Eterm how)
{
int notify = 0;
Eterm res;
erts_cpu_topology_t *cpudata;
int cpudata_size;
ErtsCpuBindOrder old_cpu_bind_order;
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
if (cpu_bind_order == ERTS_CPU_BIND_NONE
&& ERTS_IS_ATOM_STR("unbound", how)) {
res = bound_schedulers_term(ERTS_CPU_BIND_NONE);
goto done;
}
ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
}
else {
old_cpu_bind_order = cpu_bind_order;
if (ERTS_IS_ATOM_STR("default_bind", how))
cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
else if (ERTS_IS_ATOM_STR("spread", how))
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (ERTS_IS_ATOM_STR("processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_spread", how))
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
else if (ERTS_IS_ATOM_STR("no_spread", how))
cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
else if (ERTS_IS_ATOM_STR("unbound", how))
cpu_bind_order = ERTS_CPU_BIND_NONE;
else {
cpu_bind_order = old_cpu_bind_order;
ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
goto done;
}
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
if (!cpudata) {
cpu_bind_order = old_cpu_bind_order;
ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
goto done;
}
write_schedulers_bind_change(cpudata, cpudata_size);
notify = 1;
destroy_tmp_cpu_topology_copy(cpudata);
res = bound_schedulers_term(old_cpu_bind_order);
}
done:
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
if (notify)
erts_sched_notify_check_cpu_bind();
return res;
}
int
erts_sched_bind_atthrcreate_prepare(void)
{
ErtsSchedulerData *esdp = erts_get_scheduler_data();
return esdp != NULL && erts_is_scheduler_bound(esdp);
}
int
erts_sched_bind_atthrcreate_child(int unbind)
{
int res = 0;
if (unbind) {
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
res = erts_unbind_from_cpu(cpuinfo);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
}
return res;
}
void
erts_sched_bind_atthrcreate_parent(int unbind)
{
}
int
erts_sched_bind_atfork_prepare(void)
{
ErtsSchedulerData *esdp = erts_get_scheduler_data();
int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
if (unbind)
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
return unbind;
}
int
erts_sched_bind_atfork_child(int unbind)
{
if (unbind) {
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
|| erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
return erts_unbind_from_cpu(cpuinfo);
}
return 0;
}
char *
erts_sched_bind_atvfork_child(int unbind)
{
if (unbind) {
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
|| erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
return erts_get_unbind_from_cpu_str(cpuinfo);
}
return "false";
}
void
erts_sched_bind_atfork_parent(int unbind)
{
if (unbind)
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
}
Eterm
erts_fake_scheduler_bindings(Process *p, Eterm how)
{
ErtsCpuBindOrder fake_cpu_bind_order;
erts_cpu_topology_t *cpudata;
int cpudata_size;
Eterm res;
if (ERTS_IS_ATOM_STR("default_bind", how))
fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
else if (ERTS_IS_ATOM_STR("spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (ERTS_IS_ATOM_STR("processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
else if (ERTS_IS_ATOM_STR("no_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
else if (ERTS_IS_ATOM_STR("unbound", how))
fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
else {
ERTS_BIF_PREP_ERROR(res, p, BADARG);
return res;
}
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
ERTS_BIF_PREP_RET(res, am_false);
else {
int i;
Eterm *hp;
cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
erts_fprintf(stderr, "node: ");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].node);
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "processor: ");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].processor);
erts_fprintf(stderr, "\n");
if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
&& fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
&& fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
erts_fprintf(stderr, "processor_node:");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
erts_fprintf(stderr, "\n");
}
erts_fprintf(stderr, "core: ");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].core);
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "thread: ");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].thread);
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "logical: ");
for (i = 0; i < cpudata_size; i++)
erts_fprintf(stderr, " %2d", cpudata[i].logical);
erts_fprintf(stderr, "\n");
#endif
hp = HAlloc(p, cpudata_size+1);
ERTS_BIF_PREP_RET(res, make_tuple(hp));
*hp++ = make_arityval((Uint) cpudata_size);
for (i = 0; i < cpudata_size; i++)
*hp++ = make_small((Uint) cpudata[i].logical);
}
destroy_tmp_cpu_topology_copy(cpudata);
return res;
}
Eterm
erts_get_schedulers_binds(Process *c_p)
{
int ix;
ERTS_DECL_AM(unbound);
Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
Eterm res = make_tuple(hp);
*(hp++) = make_arityval(erts_no_schedulers);
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
for (ix = 1; ix <= erts_no_schedulers; ix++)
*(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
? make_small(scheduler2cpu_map[ix].bound_id)
: AM_unbound);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
return res;
}
/*
* CPU topology
*/
typedef struct {
int *id;
int used;
int size;
} ErtsCpuTopIdSeq;
typedef struct {
ErtsCpuTopIdSeq logical;
ErtsCpuTopIdSeq thread;
ErtsCpuTopIdSeq core;
ErtsCpuTopIdSeq processor_node;
ErtsCpuTopIdSeq processor;
ErtsCpuTopIdSeq node;
} ErtsCpuTopEntry;
static void
init_cpu_top_entry(ErtsCpuTopEntry *cte)
{
int size = 10;
cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->logical.size = size;
cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->thread.size = size;
cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->core.size = size;
cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->processor_node.size = size;
cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->processor.size = size;
cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
sizeof(int)*size);
cte->node.size = size;
}
static void
destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
{
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
}
static int
get_cput_value_or_range(int *v, int *vr, char **str)
{
long l;
char *c = *str;
errno = 0;
if (!isdigit((unsigned char)*c))
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
l = strtol(c, &c, 10);
if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
*v = (int) l;
if (*c == '-') {
c++;
if (!isdigit((unsigned char)*c))
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
l = strtol(c, &c, 10);
if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
*vr = (int) l;
}
*str = c;
return ERTS_INIT_CPU_TOPOLOGY_OK;
}
static int
get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
{
int ix = 0;
int need_size = 0;
char *c = *str;
while (1) {
int res;
int val;
int nids;
int val_range = -1;
res = get_cput_value_or_range(&val, &val_range, &c);
if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
return res;
if (val_range < 0 || val_range == val)
nids = 1;
else {
if (val_range > val)
nids = val_range - val + 1;
else
nids = val - val_range + 1;
}
need_size += nids;
if (need_size > idseq->size) {
idseq->size = need_size + 10;
idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
idseq->id,
sizeof(int)*idseq->size);
}
if (nids == 1)
idseq->id[ix++] = val;
else if (val_range > val) {
for (; val <= val_range; val++)
idseq->id[ix++] = val;
}
else {
for (; val >= val_range; val--)
idseq->id[ix++] = val;
}
if (*c != ',')
break;
c++;
}
*str = c;
idseq->used = ix;
return ERTS_INIT_CPU_TOPOLOGY_OK;
}
static int
get_cput_entry(ErtsCpuTopEntry *cput, char **str)
{
int h;
char *c = *str;
cput->logical.used = 0;
cput->thread.id[0] = 0;
cput->thread.used = 1;
cput->core.id[0] = 0;
cput->core.used = 1;
cput->processor_node.id[0] = -1;
cput->processor_node.used = 1;
cput->processor.id[0] = 0;
cput->processor.used = 1;
cput->node.id[0] = -1;
cput->node.used = 1;
h = ERTS_TOPOLOGY_MAX_DEPTH;
while (*c != ':' && *c != '\0') {
int res;
ErtsCpuTopIdSeq *idseqp;
switch (*c++) {
case 'L':
if (h <= ERTS_TOPOLOGY_LOGICAL)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->logical;
h = ERTS_TOPOLOGY_LOGICAL;
break;
case 't':
case 'T':
if (h <= ERTS_TOPOLOGY_THREAD)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->thread;
h = ERTS_TOPOLOGY_THREAD;
break;
case 'c':
case 'C':
if (h <= ERTS_TOPOLOGY_CORE)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->core;
h = ERTS_TOPOLOGY_CORE;
break;
case 'p':
case 'P':
if (h <= ERTS_TOPOLOGY_PROCESSOR)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->processor;
h = ERTS_TOPOLOGY_PROCESSOR;
break;
case 'n':
case 'N':
if (h <= ERTS_TOPOLOGY_PROCESSOR) {
do_node:
if (h <= ERTS_TOPOLOGY_NODE)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->node;
h = ERTS_TOPOLOGY_NODE;
}
else {
int p_node = 0;
char *p_chk = c;
while (*p_chk != '\0' && *p_chk != ':') {
if (*p_chk == 'p' || *p_chk == 'P') {
p_node = 1;
break;
}
p_chk++;
}
if (!p_node)
goto do_node;
if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
idseqp = &cput->processor_node;
h = ERTS_TOPOLOGY_PROCESSOR_NODE;
}
break;
default:
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
}
res = get_cput_id_seq(idseqp, &c);
if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
return res;
}
if (cput->logical.used < 1)
return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
if (*c == ':') {
c++;
}
if (cput->thread.used != 1
&& cput->thread.used != cput->logical.used)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
if (cput->core.used != 1
&& cput->core.used != cput->logical.used)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
if (cput->processor_node.used != 1
&& cput->processor_node.used != cput->logical.used)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
if (cput->processor.used != 1
&& cput->processor.used != cput->logical.used)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
if (cput->node.used != 1
&& cput->node.used != cput->logical.used)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
*str = c;
return ERTS_INIT_CPU_TOPOLOGY_OK;
}
static int
verify_topology(erts_cpu_topology_t *cpudata, int size)
{
if (size > 0) {
int *logical;
int node, processor, no_nodes, i;
/* Verify logical ids */
logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
for (i = 0; i < size; i++)
logical[i] = cpudata[i].logical;
qsort(logical, size, sizeof(int), int_cmp);
for (i = 0; i < size-1; i++) {
if (logical[i] == logical[i+1]) {
erts_free(ERTS_ALC_T_TMP, logical);
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
}
}
erts_free(ERTS_ALC_T_TMP, logical);
qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
/* Verify unique entities */
for (i = 1; i < size; i++) {
if (cpudata[i-1].processor == cpudata[i].processor
&& cpudata[i-1].node == cpudata[i].node
&& (cpudata[i-1].processor_node
== cpudata[i].processor_node)
&& cpudata[i-1].core == cpudata[i].core
&& cpudata[i-1].thread == cpudata[i].thread) {
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
}
}
/* Verify numa nodes */
node = cpudata[0].node;
processor = cpudata[0].processor;
no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
for (i = 1; i < size; i++) {
if (no_nodes) {
if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
}
else {
if (cpudata[i].processor == processor && cpudata[i].node != node)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
node = cpudata[i].node;
processor = cpudata[i].processor;
if (node >= 0 && cpudata[i].processor_node >= 0)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
if (node < 0 && cpudata[i].processor_node < 0)
return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
}
}
}
return ERTS_INIT_CPU_TOPOLOGY_OK;
}
int
erts_init_cpu_topology_string(char *topology_str)
{
ErtsCpuTopEntry cput;
int need_size;
char *c;
int ix;
int error = ERTS_INIT_CPU_TOPOLOGY_OK;
if (user_cpudata)
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
user_cpudata_size = 10;
user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
(sizeof(erts_cpu_topology_t)
* user_cpudata_size));
init_cpu_top_entry(&cput);
ix = 0;
need_size = 0;
c = topology_str;
if (*c == '\0') {
error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
goto fail;
}
do {
int r;
error = get_cput_entry(&cput, &c);
if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
goto fail;
need_size += cput.logical.used;
if (user_cpudata_size < need_size) {
user_cpudata_size = need_size + 10;
user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
user_cpudata,
(sizeof(erts_cpu_topology_t)
* user_cpudata_size));
}
ASSERT(cput.thread.used == 1
|| cput.thread.used == cput.logical.used);
ASSERT(cput.core.used == 1
|| cput.core.used == cput.logical.used);
ASSERT(cput.processor_node.used == 1
|| cput.processor_node.used == cput.logical.used);
ASSERT(cput.processor.used == 1
|| cput.processor.used == cput.logical.used);
ASSERT(cput.node.used == 1
|| cput.node.used == cput.logical.used);
for (r = 0; r < cput.logical.used; r++) {
user_cpudata[ix].logical = cput.logical.id[r];
user_cpudata[ix].thread =
cput.thread.id[cput.thread.used == 1 ? 0 : r];
user_cpudata[ix].core =
cput.core.id[cput.core.used == 1 ? 0 : r];
user_cpudata[ix].processor_node =
cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
user_cpudata[ix].processor =
cput.processor.id[cput.processor.used == 1 ? 0 : r];
user_cpudata[ix].node =
cput.node.id[cput.node.used == 1 ? 0 : r];
ix++;
}
} while (*c != '\0');
if (user_cpudata_size != ix) {
user_cpudata_size = ix;
user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
user_cpudata,
(sizeof(erts_cpu_topology_t)
* user_cpudata_size));
}
error = verify_topology(user_cpudata, user_cpudata_size);
if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
destroy_cpu_top_entry(&cput);
return ERTS_INIT_CPU_TOPOLOGY_OK;
}
fail:
if (user_cpudata)
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
user_cpudata_size = 0;
destroy_cpu_top_entry(&cput);
return error;
}
#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
#define ERTS_GET_USED_CPU_TOPOLOGY 0
#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
static Eterm get_cpu_topology_term(Process *c_p, int type);
Eterm
erts_set_cpu_topology(Process *c_p, Eterm term)
{
erts_cpu_topology_t *cpudata = NULL;
int cpudata_size = 0;
Eterm res;
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
if (term == am_undefined) {
if (user_cpudata)
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
user_cpudata = NULL;
user_cpudata_size = 0;
if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
cpudata_size = system_cpudata_size;
cpudata = erts_alloc(ERTS_ALC_T_TMP,
(sizeof(erts_cpu_topology_t)
* cpudata_size));
sys_memcpy((void *) cpudata,
(void *) system_cpudata,
sizeof(erts_cpu_topology_t)*cpudata_size);
}
}
else if (is_not_list(term)) {
error:
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
res = THE_NON_VALUE;
goto done;
}
else {
Eterm list = term;
int ix = 0;
cpudata_size = 100;
cpudata = erts_alloc(ERTS_ALC_T_TMP,
(sizeof(erts_cpu_topology_t)
* cpudata_size));
while (is_list(list)) {
Eterm *lp = list_val(list);
Eterm cpu = CAR(lp);
Eterm* tp;
Sint id;
if (is_not_tuple(cpu))
goto error;
tp = tuple_val(cpu);
if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
goto error;
if (ix >= cpudata_size) {
cpudata_size += 100;
cpudata = erts_realloc(ERTS_ALC_T_TMP,
cpudata,
(sizeof(erts_cpu_topology_t)
* cpudata_size));
}
id = signed_val(tp[2]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].node = (int) id;
id = signed_val(tp[3]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].processor = (int) id;
id = signed_val(tp[4]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].processor_node = (int) id;
id = signed_val(tp[5]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].core = (int) id;
id = signed_val(tp[6]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].thread = (int) id;
id = signed_val(tp[7]);
if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
goto error;
cpudata[ix].logical = (int) id;
list = CDR(lp);
ix++;
}
if (is_not_nil(list))
goto error;
cpudata_size = ix;
if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
goto error;
if (user_cpudata_size != cpudata_size) {
if (user_cpudata)
erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
sizeof(erts_cpu_topology_t)*cpudata_size);
user_cpudata_size = cpudata_size;
}
sys_memcpy((void *) user_cpudata,
(void *) cpudata,
sizeof(erts_cpu_topology_t)*cpudata_size);
}
update_cpu_groups_maps();
write_schedulers_bind_change(cpudata, cpudata_size);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
erts_sched_notify_check_cpu_bind();
done:
if (cpudata)
erts_free(ERTS_ALC_T_TMP, cpudata);
return res;
}
static void
create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
{
if (user_cpudata) {
*cpudata_size = user_cpudata_size;
*cpudata = erts_alloc(ERTS_ALC_T_TMP,
(sizeof(erts_cpu_topology_t)
* (*cpudata_size)));
sys_memcpy((void *) *cpudata,
(void *) user_cpudata,
sizeof(erts_cpu_topology_t)*(*cpudata_size));
}
else if (system_cpudata) {
*cpudata_size = system_cpudata_size;
*cpudata = erts_alloc(ERTS_ALC_T_TMP,
(sizeof(erts_cpu_topology_t)
* (*cpudata_size)));
sys_memcpy((void *) *cpudata,
(void *) system_cpudata,
sizeof(erts_cpu_topology_t)*(*cpudata_size));
}
else {
*cpudata = NULL;
*cpudata_size = 0;
}
}
static void
destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
{
if (cpudata)
erts_free(ERTS_ALC_T_TMP, cpudata);
}
static Eterm
bld_topology_term(Eterm **hpp,
Uint *hszp,
erts_cpu_topology_t *cpudata,
int size)
{
Eterm res = NIL;
int i;
if (size == 0)
return am_undefined;
for (i = size-1; i >= 0; i--) {
res = erts_bld_cons(hpp,
hszp,
erts_bld_tuple(hpp,
hszp,
7,
am_cpu,
make_small(cpudata[i].node),
make_small(cpudata[i].processor),
make_small(cpudata[i].processor_node),
make_small(cpudata[i].core),
make_small(cpudata[i].thread),
make_small(cpudata[i].logical)),
res);
}
return res;
}
static Eterm
get_cpu_topology_term(Process *c_p, int type)
{
#ifdef DEBUG
Eterm *hp_end;
#endif
Eterm *hp;
Uint hsz;
Eterm res = THE_NON_VALUE;
erts_cpu_topology_t *cpudata = NULL;
int size = 0;
switch (type) {
case ERTS_GET_USED_CPU_TOPOLOGY:
if (user_cpudata)
goto defined;
else
goto detected;
case ERTS_GET_DETECTED_CPU_TOPOLOGY:
detected:
if (!system_cpudata)
res = am_undefined;
else {
size = system_cpudata_size;
cpudata = erts_alloc(ERTS_ALC_T_TMP,
(sizeof(erts_cpu_topology_t)
* size));
sys_memcpy((void *) cpudata,
(void *) system_cpudata,
sizeof(erts_cpu_topology_t)*size);
}
break;
case ERTS_GET_DEFINED_CPU_TOPOLOGY:
defined:
if (!user_cpudata)
res = am_undefined;
else {
size = user_cpudata_size;
cpudata = user_cpudata;
}
break;
default:
erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
break;
}
if (res == am_undefined) {
ASSERT(!cpudata);
return res;
}
hsz = 0;
bld_topology_term(NULL, &hsz,
cpudata, size);
hp = HAlloc(c_p, hsz);
#ifdef DEBUG
hp_end = hp + hsz;
#endif
res = bld_topology_term(&hp, NULL,
cpudata, size);
ASSERT(hp_end == hp);
if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
erts_free(ERTS_ALC_T_TMP, cpudata);
return res;
}
Eterm
erts_get_cpu_topology_term(Process *c_p, Eterm which)
{
Eterm res;
int type;
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
if (ERTS_IS_ATOM_STR("used", which))
type = ERTS_GET_USED_CPU_TOPOLOGY;
else if (ERTS_IS_ATOM_STR("detected", which))
type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
else if (ERTS_IS_ATOM_STR("defined", which))
type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
else
type = ERTS_GET_CPU_TOPOLOGY_ERROR;
if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
res = THE_NON_VALUE;
else
res = get_cpu_topology_term(c_p, type);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
return res;
}
static void
get_logical_processors(int *conf, int *onln, int *avail)
{
if (conf)
*conf = erts_get_cpu_configured(cpuinfo);
if (onln)
*onln = erts_get_cpu_online(cpuinfo);
if (avail)
*avail = erts_get_cpu_available(cpuinfo);
}
void
erts_get_logical_processors(int *conf, int *onln, int *avail)
{
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
get_logical_processors(conf, onln, avail);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
}
void
erts_pre_early_init_cpu_topology(int *max_rg_p,
int *conf_p,
int *onln_p,
int *avail_p)
{
cpu_groups_maps = NULL;
no_cpu_groups_callbacks = 0;
*max_rg_p = ERTS_MAX_READER_GROUPS;
cpuinfo = erts_cpu_info_create();
get_logical_processors(conf_p, onln_p, avail_p);
}
void
erts_early_init_cpu_topology(int no_schedulers,
int *max_main_threads_p,
int max_reader_groups,
int *reader_groups_p)
{
user_cpudata = NULL;
user_cpudata_size = 0;
system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
(sizeof(erts_cpu_topology_t)
* system_cpudata_size));
cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
system_cpudata_size)) {
erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
system_cpudata = NULL;
system_cpudata_size = 0;
}
max_main_threads = erts_get_cpu_configured(cpuinfo);
if (max_main_threads > no_schedulers)
max_main_threads = no_schedulers;
*max_main_threads_p = max_main_threads;
reader_groups = max_main_threads;
if (reader_groups <= 1 || max_reader_groups <= 1)
reader_groups = 0;
if (reader_groups > max_reader_groups)
reader_groups = max_reader_groups;
*reader_groups_p = reader_groups;
}
void
erts_init_cpu_topology(void)
{
int ix;
erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
(sizeof(ErtsCpuBindData)
* (erts_no_schedulers+1)));
for (ix = 1; ix <= erts_no_schedulers; ix++) {
scheduler2cpu_map[ix].bind_id = -1;
scheduler2cpu_map[ix].bound_id = -1;
}
if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
int ncpus = erts_get_cpu_configured(cpuinfo);
if (ncpus < 1 || erts_no_schedulers < ncpus)
cpu_bind_order = ERTS_CPU_BIND_NONE;
else
cpu_bind_order = ((system_cpudata || user_cpudata)
&& (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP)
? ERTS_CPU_BIND_DEFAULT_BIND
: ERTS_CPU_BIND_NONE);
}
reader_groups_map = add_cpu_groups(reader_groups,
reader_groups_callback,
NULL);
if (cpu_bind_order == ERTS_CPU_BIND_NONE)
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
else {
erts_cpu_topology_t *cpudata;
int cpudata_size;
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
write_schedulers_bind_change(cpudata, cpudata_size);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
erts_sched_notify_check_cpu_bind();
destroy_tmp_cpu_topology_copy(cpudata);
}
}
int
erts_update_cpu_info(void)
{
int changed;
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
changed = erts_cpu_info_update(cpuinfo);
if (changed) {
erts_cpu_topology_t *cpudata;
int cpudata_size;
if (system_cpudata)
erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
if (!system_cpudata_size)
system_cpudata = NULL;
else {
system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
(sizeof(erts_cpu_topology_t)
* system_cpudata_size));
if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
|| (ERTS_INIT_CPU_TOPOLOGY_OK
!= verify_topology(system_cpudata,
system_cpudata_size))) {
erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
system_cpudata = NULL;
system_cpudata_size = 0;
}
}
update_cpu_groups_maps();
create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
write_schedulers_bind_change(cpudata, cpudata_size);
destroy_tmp_cpu_topology_copy(cpudata);
}
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
if (changed)
erts_sched_notify_check_cpu_bind();
return changed;
}
/*
* reader groups map
*/
void
reader_groups_callback(int suspending,
ErtsSchedulerData *esdp,
int group,
void *unused)
{
if (reader_groups && esdp->no <= max_main_threads)
erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1);
}
static Eterm get_cpu_groups_map(Process *c_p,
erts_cpu_groups_map_t *map,
int offset);
Eterm
erts_debug_reader_groups_map(Process *c_p, int groups)
{
Eterm res;
erts_cpu_groups_map_t test;
test.array = NULL;
test.groups = groups;
make_cpu_groups_map(&test, 1);
if (!test.array)
res = NIL;
else {
res = get_cpu_groups_map(c_p, &test, 1);
erts_free(ERTS_ALC_T_TMP, test.array);
}
return res;
}
Eterm
erts_get_reader_groups_map(Process *c_p)
{
Eterm res;
erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
res = get_cpu_groups_map(c_p, reader_groups_map, 1);
erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
return res;
}
/*
* CPU groups
*/
static Eterm
get_cpu_groups_map(Process *c_p,
erts_cpu_groups_map_t *map,
int offset)
{
#ifdef DEBUG
Eterm *endp;
#endif
Eterm res = NIL, tuple;
Eterm *hp;
int i;
hp = HAlloc(c_p, map->logical_processors*(2+3));
#ifdef DEBUG
endp = hp + map->logical_processors*(2+3);
#endif
for (i = map->size - 1; i >= 0; i--) {
if (map->array[i].logical >= 0) {
tuple = TUPLE2(hp,
make_small(map->array[i].logical),
make_small(map->array[i].cpu_group + offset));
hp += 3;
res = CONS(hp, tuple, res);
hp += 2;
}
}
ASSERT(hp == endp);
return res;
}
static void
make_available_cpu_topology(erts_avail_cput *no,
erts_avail_cput *avail,
erts_cpu_topology_t *cpudata,
int *size,
int test)
{
int len = *size;
erts_cpu_topology_t last;
int a, i, j;
no->level[ERTS_TOPOLOGY_NODE] = -1;
no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
no->level[ERTS_TOPOLOGY_CORE] = -1;
no->level[ERTS_TOPOLOGY_THREAD] = -1;
no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
last.node = INT_MIN;
last.processor = INT_MIN;
last.processor_node = INT_MIN;
last.core = INT_MIN;
last.thread = INT_MIN;
last.logical = INT_MIN;
a = 0;
for (i = 0; i < len; i++) {
if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
continue;
if (last.node != cpudata[i].node)
goto node;
if (last.processor != cpudata[i].processor)
goto processor;
if (last.processor_node != cpudata[i].processor_node)
goto processor_node;
if (last.core != cpudata[i].core)
goto core;
ASSERT(last.thread != cpudata[i].thread);
goto thread;
node:
no->level[ERTS_TOPOLOGY_NODE]++;
processor:
no->level[ERTS_TOPOLOGY_PROCESSOR]++;
processor_node:
no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
core:
no->level[ERTS_TOPOLOGY_CORE]++;
thread:
no->level[ERTS_TOPOLOGY_THREAD]++;
no->level[ERTS_TOPOLOGY_LOGICAL]++;
for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
avail[a].level[j] = no->level[j];
avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
avail[a].level[ERTS_TOPOLOGY_CG] = 0;
ASSERT(last.logical != cpudata[i].logical);
last = cpudata[i];
a++;
}
no->level[ERTS_TOPOLOGY_NODE]++;
no->level[ERTS_TOPOLOGY_PROCESSOR]++;
no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
no->level[ERTS_TOPOLOGY_CORE]++;
no->level[ERTS_TOPOLOGY_THREAD]++;
no->level[ERTS_TOPOLOGY_LOGICAL]++;
*size = a;
}
static void
cpu_group_insert(erts_cpu_groups_map_t *map,
int logical, int cpu_group)
{
int start = logical % map->size;
int ix = start;
do {
if (map->array[ix].logical < 0) {
map->array[ix].logical = logical;
map->array[ix].cpu_group = cpu_group;
return;
}
ix++;
if (ix == map->size)
ix = 0;
} while (ix != start);
erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
}
static int
sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
int avail_sz, erts_avail_cput *avail)
{
int sub_level = level+1;
int last = -1;
cgc->sub_levels = 0;
do {
if (last != avail[aix].level[sub_level]) {
cgc->sub_levels++;
last = avail[aix].level[sub_level];
}
aix++;
}
while (aix < avail_sz && cgc->id == avail[aix].level[level]);
cgc->cpu_groups = 0;
return aix;
}
static int
write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
int level, int a,
int avail_sz, erts_avail_cput *avail)
{
int cg = *cgp;
int sub_level = level+1;
int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
int xsl = cgcp->sub_levels % cgcp->cpu_groups;
int sls = 0;
int last = -1;
int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
do {
if (last != avail[a].level[sub_level]) {
if (!sls) {
sls = sl_per_gr;
cg++;
if (cg >= xsl_cg_lim)
sls++;
}
last = avail[a].level[sub_level];
sls--;
}
avail[a].level[ERTS_TOPOLOGY_CG] = cg;
a++;
} while (a < avail_sz && (level < 0
|| avail[a].level[level] == cgcp->id));
ASSERT(cgcp->cpu_groups == cg - *cgp);
*cgp = cg;
return a;
}
static int
cg_count_sub_levels_compare(const void *vx, const void *vy)
{
erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
if (x->sub_levels != y->sub_levels)
return y->sub_levels - x->sub_levels;
return x->id - y->id;
}
static int
cg_count_id_compare(const void *vx, const void *vy)
{
erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
return x->id - y->id;
}
static void
make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
{
int i, spread_level, avail_sz;
erts_avail_cput no, *avail;
erts_cpu_topology_t *cpudata;
ErtsAlcType_t alc_type = (test
? ERTS_ALC_T_TMP
: ERTS_ALC_T_CPU_GRPS_MAP);
if (map->array)
erts_free(alc_type, map->array);
map->array = NULL;
map->logical_processors = 0;
map->size = 0;
if (!map->groups)
return;
create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
if (!cpudata)
return;
cpu_bind_order_sort(cpudata,
avail_sz,
ERTS_CPU_BIND_NO_SPREAD,
1);
avail = erts_alloc(ERTS_ALC_T_TMP,
sizeof(erts_avail_cput)*avail_sz);
make_available_cpu_topology(&no, avail, cpudata,
&avail_sz, test);
destroy_tmp_cpu_topology_copy(cpudata);
map->size = avail_sz*2+1;
map->array = erts_alloc(alc_type,
(sizeof(erts_cpu_groups_map_array_t)
* map->size));;
map->logical_processors = avail_sz;
for (i = 0; i < map->size; i++) {
map->array[i].logical = -1;
map->array[i].cpu_group = -1;
}
spread_level = ERTS_TOPOLOGY_CORE;
for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
if (no.level[i] > map->groups) {
spread_level = i;
break;
}
}
if (no.level[spread_level] <= map->groups) {
int a, cg, last = -1;
cg = -1;
ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
for (a = 0; a < avail_sz; a++) {
if (last != avail[a].level[spread_level]) {
cg++;
last = avail[a].level[spread_level];
}
cpu_group_insert(map,
avail[a].level[ERTS_TOPOLOGY_LOGICAL],
cg);
}
}
else { /* map->groups < no.level[spread_level] */
erts_cpu_groups_count_t *cg_count;
int a, cg, tl, toplevels;
tl = spread_level-1;
if (spread_level == ERTS_TOPOLOGY_NODE)
toplevels = 1;
else
toplevels = no.level[tl];
cg_count = erts_alloc(ERTS_ALC_T_TMP,
toplevels*sizeof(erts_cpu_groups_count_t));
if (toplevels == 1) {
cg_count[0].id = 0;
cg_count[0].sub_levels = no.level[spread_level];
cg_count[0].cpu_groups = map->groups;
}
else {
int cgs_per_tl, cgs;
cgs = map->groups;
cgs_per_tl = cgs / toplevels;
a = 0;
for (i = 0; i < toplevels; i++) {
cg_count[i].id = avail[a].level[tl];
a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
}
qsort(cg_count,
toplevels,
sizeof(erts_cpu_groups_count_t),
cg_count_sub_levels_compare);
for (i = 0; i < toplevels; i++) {
if (cg_count[i].sub_levels < cgs_per_tl) {
cg_count[i].cpu_groups = cg_count[i].sub_levels;
cgs -= cg_count[i].sub_levels;
}
else {
cg_count[i].cpu_groups = cgs_per_tl;
cgs -= cgs_per_tl;
}
}
while (cgs > 0) {
for (i = 0; i < toplevels; i++) {
if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
break;
else {
cg_count[i].cpu_groups++;
if (--cgs == 0)
break;
}
}
}
qsort(cg_count,
toplevels,
sizeof(erts_cpu_groups_count_t),
cg_count_id_compare);
}
a = i = 0;
cg = -1;
while (a < avail_sz) {
a = write_cpu_groups(&cg, &cg_count[i], tl,
a, avail_sz, avail);
i++;
}
ASSERT(map->groups == cg + 1);
for (a = 0; a < avail_sz; a++)
cpu_group_insert(map,
avail[a].level[ERTS_TOPOLOGY_LOGICAL],
avail[a].level[ERTS_TOPOLOGY_CG]);
erts_free(ERTS_ALC_T_TMP, cg_count);
}
erts_free(ERTS_ALC_T_TMP, avail);
}
static erts_cpu_groups_map_t *
add_cpu_groups(int groups,
erts_cpu_groups_callback_t callback,
void *arg)
{
int use_groups = groups;
erts_cpu_groups_callback_list_t *cgcl;
erts_cpu_groups_map_t *cgm;
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
if (use_groups > max_main_threads)
use_groups = max_main_threads;
if (!use_groups)
return NULL;
no_cpu_groups_callbacks++;
cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
sizeof(erts_cpu_groups_callback_list_t));
cgcl->callback = callback;
cgcl->arg = arg;
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
if (cgm->groups == use_groups) {
cgcl->next = cgm->callback_list;
cgm->callback_list = cgcl;
return cgm;
}
}
cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
sizeof(erts_cpu_groups_map_t));
cgm->next = cpu_groups_maps;
cgm->groups = use_groups;
cgm->array = NULL;
cgm->size = 0;
cgm->logical_processors = 0;
cgm->callback_list = cgcl;
cgcl->next = NULL;
make_cpu_groups_map(cgm, 0);
cpu_groups_maps = cgm;
return cgm;
}
static void
remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg)
{
erts_cpu_groups_map_t *prev_cgm, *cgm;
erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl;
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
no_cpu_groups_callbacks--;
prev_cgm = NULL;
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
prev_cgcl = NULL;
for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
if (cgcl->callback == callback && cgcl->arg == arg) {
if (prev_cgcl)
prev_cgcl->next = cgcl->next;
else
cgm->callback_list = cgcl->next;
erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl);
if (!cgm->callback_list) {
if (prev_cgm)
prev_cgm->next = cgm->next;
else
cpu_groups_maps = cgm->next;
if (cgm->array)
erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array);
erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm);
}
return;
}
prev_cgcl = cgcl;
}
prev_cgm = cgm;
}
erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n");
}
static int
cpu_groups_lookup(erts_cpu_groups_map_t *map,
ErtsSchedulerData *esdp)
{
int start, logical, ix;
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
|| erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
if (esdp->cpu_id < 0)
return (((int) esdp->no) - 1) % map->groups;
logical = esdp->cpu_id;
start = logical % map->size;
ix = start;
do {
if (map->array[ix].logical == logical) {
int group = map->array[ix].cpu_group;
ASSERT(0 <= group && group < map->groups);
return group;
}
ix++;
if (ix == map->size)
ix = 0;
} while (ix != start);
erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
}
static void
update_cpu_groups_maps(void)
{
erts_cpu_groups_map_t *cgm;
ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
make_cpu_groups_map(cgm, 0);
}
void
erts_add_cpu_groups(int groups,
erts_cpu_groups_callback_t callback,
void *arg)
{
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
add_cpu_groups(groups, callback, arg);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
}
void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
void *arg)
{
erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
remove_cpu_groups(callback, arg);
erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
}