diff options
-rw-r--r-- | erts/emulator/beam/erl_alloc.types | 2 | ||||
-rw-r--r-- | erts/emulator/beam/erl_cpu_topology.c | 673 | ||||
-rw-r--r-- | erts/emulator/beam/erl_cpu_topology.h | 11 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 4 |
4 files changed, 449 insertions, 241 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types index 7df9f19af0..408ffd12f7 100644 --- a/erts/emulator/beam/erl_alloc.types +++ b/erts/emulator/beam/erl_alloc.types @@ -247,7 +247,7 @@ type CPUDATA LONG_LIVED SYSTEM cpu_data type TMP_CPU_IDS SHORT_LIVED SYSTEM tmp_cpu_ids type EXT_TERM_DATA SHORT_LIVED PROCESSES external_term_data type ZLIB STANDARD SYSTEM zlib -type RDR_GRPS_MAP LONG_LIVED SYSTEM reader_groups_map +type CPU_GRPS_MAP LONG_LIVED SYSTEM cpu_groups_map +if smp type ASYNC SHORT_LIVED SYSTEM async diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c index befab6f3b7..db95c4a5d4 100644 --- a/erts/emulator/beam/erl_cpu_topology.c +++ b/erts/emulator/beam/erl_cpu_topology.c @@ -76,7 +76,8 @@ typedef enum { #define ERTS_CPU_BIND_DEFAULT_BIND \ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD -ErtsCpuBindOrder cpu_bind_order; +static int no_cpu_groups_callbacks; +static ErtsCpuBindOrder cpu_bind_order; static erts_cpu_topology_t *user_cpudata; static int user_cpudata_size; @@ -88,35 +89,44 @@ typedef struct { } erts_avail_cput; typedef struct { - int *map; - int size; - int groups; -} erts_reader_groups_map_test; - -typedef struct { int id; int sub_levels; - int reader_groups; -} erts_rg_count_t; + int cpu_groups; +} erts_cpu_groups_count_t; typedef struct { int logical; - int reader_group; -} erts_reader_groups_map_t; + int cpu_group; +} erts_cpu_groups_map_array_t; + +typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t; +struct erts_cpu_groups_callback_list_t_ { + erts_cpu_groups_callback_list_t *next; + erts_cpu_groups_callback_t callback; + void *arg; +}; + +typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t; +struct erts_cpu_groups_map_t_ { + erts_cpu_groups_map_t *next; + int groups; + erts_cpu_groups_map_array_t *array; + int size; + int logical_processors; + erts_cpu_groups_callback_list_t *callback_list; +}; typedef struct { - erts_reader_groups_map_t *map; - int map_size; - int logical_processors; - int groups; -} erts_make_reader_groups_map_test; + erts_cpu_groups_callback_t callback; + int ix; + void *arg; +} erts_cpu_groups_callback_call_t; + +static erts_cpu_groups_map_t *cpu_groups_maps; -static int reader_groups_available_cpu_check; -static int reader_groups_logical_processors; -static int reader_groups_map_size; -static erts_reader_groups_map_t *reader_groups_map; +static erts_cpu_groups_map_t *reader_groups_map; -#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH +#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) @@ -128,8 +138,14 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); #endif -static void make_reader_groups_map(erts_make_reader_groups_map_test *test); -static int reader_group_lookup(int logical); +static void reader_groups_callback(int, ErtsSchedulerData *, int, void *); +static erts_cpu_groups_map_t *add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg); +static void update_cpu_groups_maps(void); +static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test); +static int cpu_groups_lookup(erts_cpu_groups_map_t *map, + ErtsSchedulerData *esdp); static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size); @@ -421,28 +437,51 @@ processor_order_cmp(const void *vx, const void *vy) void erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp) { - int reset_read_group = 0; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue)); + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; + int cgcc_ix; + /* Unbind from cpu */ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); if (scheduler2cpu_map[esdp->no].bound_id >= 0 && erts_unbind_from_cpu(cpuinfo) == 0) { esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; - reset_read_group = 1; } + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; + } + } + ASSERT(no_cpu_groups_callbacks == cgcc_ix); erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); - if (reset_read_group) - erts_smp_rwmtx_set_reader_group(0); + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(1, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); if (esdp->no <= max_main_threads) erts_thr_set_main_status(0, 0); + } void erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp) { ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue)); + if (esdp->no <= max_main_threads) erts_thr_set_main_status(1, (int) esdp->no); @@ -458,9 +497,17 @@ erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp) void erts_sched_check_cpu_bind(ErtsSchedulerData *esdp) { - int rg = 0; - int res; - int cpu_id; + int res, cpu_id, cgcc_ix; + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; +#ifdef ERTS_SMP + if (erts_common_run_queue) + erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); + else { + esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; + } +#endif erts_smp_runq_unlock(esdp->run_queue); erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); cpu_id = scheduler2cpu_map[esdp->no].bind_id; @@ -490,39 +537,71 @@ erts_sched_check_cpu_bind(ErtsSchedulerData *esdp) erts_send_error_to_logger_nogl(dsbufp); } } - if (reader_groups) { - if (esdp->cpu_id >= 0) - rg = reader_group_lookup(esdp->cpu_id); - else - rg = (((int) esdp->no) - 1) % reader_groups + 1; + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; + } } + + ASSERT(no_cpu_groups_callbacks == cgcc_ix); erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); + + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(0, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); + erts_smp_runq_lock(esdp->run_queue); -#ifdef ERTS_SMP - if (erts_common_run_queue) - erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); - else { - esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; - } -#endif - if (reader_groups) - erts_smp_rwmtx_set_reader_group(rg); } #ifdef ERTS_SMP void erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp) { - int no = (int) esdp->no; - if (no <= max_main_threads) { - erts_thr_set_main_status(1, (int) no); - if (reader_groups) { - int rg = (int) no; - if (rg > reader_groups) - rg = (((int) no) - 1) % reader_groups + 1; - erts_smp_rwmtx_set_reader_group(rg); + int cgcc_ix; + erts_cpu_groups_map_t *cgm; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_callback_call_t *cgcc; + + erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); + + cgcc = erts_alloc(ERTS_ALC_T_TMP, + (no_cpu_groups_callbacks + * sizeof(erts_cpu_groups_callback_call_t))); + cgcc_ix = 0; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + cgcc[cgcc_ix].callback = cgcl->callback; + cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp); + cgcc[cgcc_ix].arg = cgcl->arg; + cgcc_ix++; } } + + ASSERT(no_cpu_groups_callbacks == cgcc_ix); + erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); + + for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++) + cgcc[cgcc_ix].callback(0, + esdp, + cgcc[cgcc_ix].ix, + cgcc[cgcc_ix].arg); + + erts_free(ERTS_ALC_T_TMP, cgcc); + + if (esdp->no <= max_main_threads) + erts_thr_set_main_status(1, (int) esdp->no); } #endif @@ -1388,7 +1467,7 @@ erts_set_cpu_topology(Process *c_p, Eterm term) sizeof(erts_cpu_topology_t)*cpudata_size); } - make_reader_groups_map(NULL); + update_cpu_groups_maps(); write_schedulers_bind_change(cpudata, cpudata_size); @@ -1588,6 +1667,8 @@ erts_pre_early_init_cpu_topology(int *max_rg_p, int *onln_p, int *avail_p) { + cpu_groups_maps = NULL; + no_cpu_groups_callbacks = 0; *max_rg_p = ERTS_MAX_READER_GROUPS; cpuinfo = erts_cpu_info_create(); get_logical_processors(conf_p, onln_p, avail_p); @@ -1609,11 +1690,6 @@ erts_early_init_cpu_topology(int no_schedulers, cpu_bind_order = ERTS_CPU_BIND_UNDEFINED; - reader_groups_available_cpu_check = 1; - reader_groups_logical_processors = 0; - reader_groups_map_size = 0; - reader_groups_map = NULL; - if (!erts_get_cpu_topology(cpuinfo, system_cpudata) || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata, system_cpudata_size)) { @@ -1641,6 +1717,7 @@ erts_init_cpu_topology(void) int ix; erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info"); + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA, (sizeof(ErtsCpuBindData) @@ -1661,7 +1738,9 @@ erts_init_cpu_topology(void) : ERTS_CPU_BIND_NONE); } - make_reader_groups_map(NULL); + reader_groups_map = add_cpu_groups(reader_groups, + reader_groups_callback, + NULL); if (cpu_bind_order == ERTS_CPU_BIND_NONE) erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); @@ -1707,7 +1786,7 @@ erts_update_cpu_info(void) } } - make_reader_groups_map(NULL); + update_cpu_groups_maps(); create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); write_schedulers_bind_change(cpudata, cpudata_size); @@ -1723,53 +1802,33 @@ erts_update_cpu_info(void) * reader groups map */ -static Eterm -get_reader_groups_map(Process *c_p, - erts_reader_groups_map_t *map, - int map_size, - int logical_processors) +void +reader_groups_callback(int suspending, + ErtsSchedulerData *esdp, + int group, + void *unused) { -#ifdef DEBUG - Eterm *endp; -#endif - Eterm res = NIL, tuple; - Eterm *hp; - int i; - - hp = HAlloc(c_p, logical_processors*(2+3)); -#ifdef DEBUG - endp = hp + logical_processors*(2+3); -#endif - for (i = map_size - 1; i >= 0; i--) { - if (map[i].logical >= 0) { - tuple = TUPLE2(hp, - make_small(map[i].logical), - make_small(map[i].reader_group)); - hp += 3; - res = CONS(hp, tuple, res); - hp += 2; - } - } - ASSERT(hp == endp); - return res; + if (reader_groups && esdp->no <= max_main_threads) + erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1); } +static Eterm get_cpu_groups_map(Process *c_p, + erts_cpu_groups_map_t *map, + int offset); Eterm erts_debug_reader_groups_map(Process *c_p, int groups) { Eterm res; - erts_make_reader_groups_map_test test; + erts_cpu_groups_map_t test; + test.array = NULL; test.groups = groups; - make_reader_groups_map(&test); - if (!test.map) + make_cpu_groups_map(&test, 1); + if (!test.array) res = NIL; else { - res = get_reader_groups_map(c_p, - test.map, - test.map_size, - test.logical_processors); - erts_free(ERTS_ALC_T_TMP, test.map); + res = get_cpu_groups_map(c_p, &test, 1); + erts_free(ERTS_ALC_T_TMP, test.array); } return res; } @@ -1780,14 +1839,45 @@ erts_get_reader_groups_map(Process *c_p) { Eterm res; erts_smp_rwmtx_rlock(&cpuinfo_rwmtx); - res = get_reader_groups_map(c_p, - reader_groups_map, - reader_groups_map_size, - reader_groups_logical_processors); + res = get_cpu_groups_map(c_p, reader_groups_map, 1); erts_smp_rwmtx_runlock(&cpuinfo_rwmtx); return res; } +/* + * CPU groups + */ + +static Eterm +get_cpu_groups_map(Process *c_p, + erts_cpu_groups_map_t *map, + int offset) +{ +#ifdef DEBUG + Eterm *endp; +#endif + Eterm res = NIL, tuple; + Eterm *hp; + int i; + + hp = HAlloc(c_p, map->logical_processors*(2+3)); +#ifdef DEBUG + endp = hp + map->logical_processors*(2+3); +#endif + for (i = map->size - 1; i >= 0; i--) { + if (map->array[i].logical >= 0) { + tuple = TUPLE2(hp, + make_small(map->array[i].logical), + make_small(map->array[i].cpu_group + offset)); + hp += 3; + res = CONS(hp, tuple, res); + hp += 2; + } + } + ASSERT(hp == endp); + return res; +} + static void make_available_cpu_topology(erts_avail_cput *no, erts_avail_cput *avail, @@ -1848,7 +1938,7 @@ make_available_cpu_topology(erts_avail_cput *no, avail[a].level[j] = no->level[j]; avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical; - avail[a].level[ERTS_TOPOLOGY_RG] = 0; + avail[a].level[ERTS_TOPOLOGY_CG] = 0; ASSERT(last.logical != cpudata[i].logical); @@ -1866,40 +1956,21 @@ make_available_cpu_topology(erts_avail_cput *no, *size = a; } -static int -reader_group_lookup(int logical) -{ - int start = logical % reader_groups_map_size; - int ix = start; - - do { - if (reader_groups_map[ix].logical == logical) { - ASSERT(reader_groups_map[ix].reader_group > 0); - return reader_groups_map[ix].reader_group; - } - ix++; - if (ix == reader_groups_map_size) - ix = 0; - } while (ix != start); - - erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical); -} - static void -reader_group_insert(erts_reader_groups_map_t *map, int map_size, - int logical, int reader_group) +cpu_group_insert(erts_cpu_groups_map_t *map, + int logical, int cpu_group) { - int start = logical % map_size; + int start = logical % map->size; int ix = start; do { - if (map[ix].logical < 0) { - map[ix].logical = logical; - map[ix].reader_group = reader_group; + if (map->array[ix].logical < 0) { + map->array[ix].logical = logical; + map->array[ix].cpu_group = cpu_group; return; } ix++; - if (ix == map_size) + if (ix == map->size) ix = 0; } while (ix != start); @@ -1908,107 +1979,100 @@ reader_group_insert(erts_reader_groups_map_t *map, int map_size, static int -sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail) +sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix, + int avail_sz, erts_avail_cput *avail) { int sub_level = level+1; int last = -1; - rgc->sub_levels = 0; + cgc->sub_levels = 0; do { if (last != avail[aix].level[sub_level]) { - rgc->sub_levels++; + cgc->sub_levels++; last = avail[aix].level[sub_level]; } aix++; } - while (aix < avail_sz && rgc->id == avail[aix].level[level]); - rgc->reader_groups = 0; + while (aix < avail_sz && cgc->id == avail[aix].level[level]); + cgc->cpu_groups = 0; return aix; } static int -write_reader_groups(int *rgp, erts_rg_count_t *rgcp, +write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp, int level, int a, int avail_sz, erts_avail_cput *avail) { - int rg = *rgp; + int cg = *cgp; int sub_level = level+1; - int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups; - int xsl = rgcp->sub_levels % rgcp->reader_groups; + int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups; + int xsl = cgcp->sub_levels % cgcp->cpu_groups; int sls = 0; int last = -1; - int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1; + int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1; - ASSERT(level < 0 || avail[a].level[level] == rgcp->id) + ASSERT(level < 0 || avail[a].level[level] == cgcp->id); do { if (last != avail[a].level[sub_level]) { if (!sls) { sls = sl_per_gr; - rg++; - if (rg >= xsl_rg_lim) + cg++; + if (cg >= xsl_cg_lim) sls++; } last = avail[a].level[sub_level]; sls--; } - avail[a].level[ERTS_TOPOLOGY_RG] = rg; + avail[a].level[ERTS_TOPOLOGY_CG] = cg; a++; } while (a < avail_sz && (level < 0 - || avail[a].level[level] == rgcp->id)); + || avail[a].level[level] == cgcp->id)); - ASSERT(rgcp->reader_groups == rg - *rgp); + ASSERT(cgcp->cpu_groups == cg - *cgp); - *rgp = rg; + *cgp = cg; return a; } static int -rg_count_sub_levels_compare(const void *vx, const void *vy) +cg_count_sub_levels_compare(const void *vx, const void *vy) { - erts_rg_count_t *x = (erts_rg_count_t *) vx; - erts_rg_count_t *y = (erts_rg_count_t *) vy; + erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx; + erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy; if (x->sub_levels != y->sub_levels) return y->sub_levels - x->sub_levels; return x->id - y->id; } static int -rg_count_id_compare(const void *vx, const void *vy) +cg_count_id_compare(const void *vx, const void *vy) { - erts_rg_count_t *x = (erts_rg_count_t *) vx; - erts_rg_count_t *y = (erts_rg_count_t *) vy; + erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx; + erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy; return x->id - y->id; } static void -make_reader_groups_map(erts_make_reader_groups_map_test *test) +make_cpu_groups_map(erts_cpu_groups_map_t *map, int test) { int i, spread_level, avail_sz; erts_avail_cput no, *avail; erts_cpu_topology_t *cpudata; - erts_reader_groups_map_t *map; - int map_sz; - int groups = reader_groups; - - if (test) { - test->map = NULL; - test->map_size = 0; - groups = test->groups; - } + ErtsAlcType_t alc_type = (test + ? ERTS_ALC_T_TMP + : ERTS_ALC_T_CPU_GRPS_MAP); - if (!groups) - return; + if (map->array) + erts_free(alc_type, map->array); - if (!test) { - if (reader_groups_map) - erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map); + map->array = NULL; + map->logical_processors = 0; + map->size = 0; - reader_groups_logical_processors = 0; - reader_groups_map_size = 0; - reader_groups_map = NULL; - } + if (!map->groups) + return; create_tmp_cpu_topology_copy(&cpudata, &avail_sz); @@ -2024,61 +2088,47 @@ make_reader_groups_map(erts_make_reader_groups_map_test *test) sizeof(erts_avail_cput)*avail_sz); make_available_cpu_topology(&no, avail, cpudata, - &avail_sz, test != NULL); + &avail_sz, test); destroy_tmp_cpu_topology_copy(cpudata); - map_sz = avail_sz*2+1; - - if (test) { - map = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_reader_groups_map_t) - * map_sz)); - test->map = map; - test->map_size = map_sz; - test->logical_processors = avail_sz; - } - else { - map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP, - (sizeof(erts_reader_groups_map_t) - * map_sz)); - reader_groups_map = map; - reader_groups_logical_processors = avail_sz; - reader_groups_map_size = map_sz; + map->size = avail_sz*2+1; - } + map->array = erts_alloc(alc_type, + (sizeof(erts_cpu_groups_map_array_t) + * map->size));; + map->logical_processors = avail_sz; - for (i = 0; i < map_sz; i++) { - map[i].logical = -1; - map[i].reader_group = 0; + for (i = 0; i < map->size; i++) { + map->array[i].logical = -1; + map->array[i].cpu_group = -1; } spread_level = ERTS_TOPOLOGY_CORE; for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) { - if (no.level[i] > groups) { + if (no.level[i] > map->groups) { spread_level = i; break; } } - if (no.level[spread_level] <= groups) { - int a, rg, last = -1; - rg = 0; + if (no.level[spread_level] <= map->groups) { + int a, cg, last = -1; + cg = -1; ASSERT(spread_level == ERTS_TOPOLOGY_CORE); for (a = 0; a < avail_sz; a++) { if (last != avail[a].level[spread_level]) { - rg++; + cg++; last = avail[a].level[spread_level]; } - reader_group_insert(map, - map_sz, - avail[a].level[ERTS_TOPOLOGY_LOGICAL], - rg); + cpu_group_insert(map, + avail[a].level[ERTS_TOPOLOGY_LOGICAL], + cg); } } - else { /* groups < no.level[spread_level] */ - erts_rg_count_t *rg_count; - int a, rg, tl, toplevels; + else { /* map->groups < no.level[spread_level] */ + erts_cpu_groups_count_t *cg_count; + int a, cg, tl, toplevels; tl = spread_level-1; @@ -2087,76 +2137,223 @@ make_reader_groups_map(erts_make_reader_groups_map_test *test) else toplevels = no.level[tl]; - rg_count = erts_alloc(ERTS_ALC_T_TMP, - toplevels*sizeof(erts_rg_count_t)); + cg_count = erts_alloc(ERTS_ALC_T_TMP, + toplevels*sizeof(erts_cpu_groups_count_t)); if (toplevels == 1) { - rg_count[0].id = 0; - rg_count[0].sub_levels = no.level[spread_level]; - rg_count[0].reader_groups = groups; + cg_count[0].id = 0; + cg_count[0].sub_levels = no.level[spread_level]; + cg_count[0].cpu_groups = map->groups; } else { - int rgs_per_tl, rgs; - rgs = groups; - rgs_per_tl = rgs / toplevels; + int cgs_per_tl, cgs; + cgs = map->groups; + cgs_per_tl = cgs / toplevels; a = 0; for (i = 0; i < toplevels; i++) { - rg_count[i].id = avail[a].level[tl]; - a = sub_levels(&rg_count[i], tl, a, avail_sz, avail); + cg_count[i].id = avail[a].level[tl]; + a = sub_levels(&cg_count[i], tl, a, avail_sz, avail); } - qsort(rg_count, + qsort(cg_count, toplevels, - sizeof(erts_rg_count_t), - rg_count_sub_levels_compare); + sizeof(erts_cpu_groups_count_t), + cg_count_sub_levels_compare); for (i = 0; i < toplevels; i++) { - if (rg_count[i].sub_levels < rgs_per_tl) { - rg_count[i].reader_groups = rg_count[i].sub_levels; - rgs -= rg_count[i].sub_levels; + if (cg_count[i].sub_levels < cgs_per_tl) { + cg_count[i].cpu_groups = cg_count[i].sub_levels; + cgs -= cg_count[i].sub_levels; } else { - rg_count[i].reader_groups = rgs_per_tl; - rgs -= rgs_per_tl; + cg_count[i].cpu_groups = cgs_per_tl; + cgs -= cgs_per_tl; } } - while (rgs > 0) { + while (cgs > 0) { for (i = 0; i < toplevels; i++) { - if (rg_count[i].sub_levels == rg_count[i].reader_groups) + if (cg_count[i].sub_levels == cg_count[i].cpu_groups) break; else { - rg_count[i].reader_groups++; - if (--rgs == 0) + cg_count[i].cpu_groups++; + if (--cgs == 0) break; } } } - qsort(rg_count, + qsort(cg_count, toplevels, - sizeof(erts_rg_count_t), - rg_count_id_compare); + sizeof(erts_cpu_groups_count_t), + cg_count_id_compare); } - a = i = rg = 0; + a = i = 0; + cg = -1; while (a < avail_sz) { - a = write_reader_groups(&rg, &rg_count[i], tl, - a, avail_sz, avail); + a = write_cpu_groups(&cg, &cg_count[i], tl, + a, avail_sz, avail); i++; } - ASSERT(groups == rg); + ASSERT(map->groups == cg + 1); for (a = 0; a < avail_sz; a++) - reader_group_insert(map, - map_sz, - avail[a].level[ERTS_TOPOLOGY_LOGICAL], - avail[a].level[ERTS_TOPOLOGY_RG]); + cpu_group_insert(map, + avail[a].level[ERTS_TOPOLOGY_LOGICAL], + avail[a].level[ERTS_TOPOLOGY_CG]); - erts_free(ERTS_ALC_T_TMP, rg_count); + erts_free(ERTS_ALC_T_TMP, cg_count); } erts_free(ERTS_ALC_T_TMP, avail); } + +static erts_cpu_groups_map_t * +add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg) +{ + int use_groups = groups; + erts_cpu_groups_callback_list_t *cgcl; + erts_cpu_groups_map_t *cgm; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + if (use_groups > max_main_threads) + use_groups = max_main_threads; + + if (!use_groups) + return NULL; + + no_cpu_groups_callbacks++; + cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP, + sizeof(erts_cpu_groups_callback_list_t)); + cgcl->callback = callback; + cgcl->arg = arg; + + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + if (cgm->groups == use_groups) { + cgcl->next = cgm->callback_list; + cgm->callback_list = cgcl; + return cgm; + } + } + + + cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP, + sizeof(erts_cpu_groups_map_t)); + cgm->next = cpu_groups_maps; + cgm->groups = use_groups; + cgm->array = NULL; + cgm->size = 0; + cgm->logical_processors = 0; + cgm->callback_list = cgcl; + + cgcl->next = NULL; + + make_cpu_groups_map(cgm, 0); + + cpu_groups_maps = cgm; + + return cgm; +} + +static void +remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg) +{ + erts_cpu_groups_map_t *prev_cgm, *cgm; + erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + no_cpu_groups_callbacks--; + + prev_cgm = NULL; + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) { + prev_cgcl = NULL; + for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) { + if (cgcl->callback == callback && cgcl->arg == arg) { + if (prev_cgcl) + prev_cgcl->next = cgcl->next; + else + cgm->callback_list = cgcl->next; + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl); + if (!cgm->callback_list) { + if (prev_cgm) + prev_cgm->next = cgm->next; + else + cpu_groups_maps = cgm->next; + if (cgm->array) + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array); + erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm); + } + return; + } + prev_cgcl = cgcl; + } + prev_cgm = cgm; + } + + erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n"); +} + +static int +cpu_groups_lookup(erts_cpu_groups_map_t *map, + ErtsSchedulerData *esdp) +{ + int start, logical, ix; + + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx) + || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + if (esdp->cpu_id < 0) + return (((int) esdp->no) - 1) % map->groups; + + logical = esdp->cpu_id; + start = logical % map->size; + ix = start; + + do { + if (map->array[ix].logical == logical) { + int group = map->array[ix].cpu_group; + ASSERT(0 <= group && group < map->groups); + return group; + } + ix++; + if (ix == map->size) + ix = 0; + } while (ix != start); + + erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical); +} + +static void +update_cpu_groups_maps(void) +{ + erts_cpu_groups_map_t *cgm; + ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx)); + + for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) + make_cpu_groups_map(cgm, 0); +} + +void +erts_add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg) +{ + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + add_cpu_groups(groups, callback, arg); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); +} + +void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback, + void *arg) +{ + erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx); + remove_cpu_groups(callback, arg); + erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx); +} diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h index b83ddc25da..c5a9520b61 100644 --- a/erts/emulator/beam/erl_cpu_topology.h +++ b/erts/emulator/beam/erl_cpu_topology.h @@ -91,4 +91,15 @@ Eterm erts_fake_scheduler_bindings(Process *p, Eterm how); Eterm erts_debug_cpu_groups_map(Process *c_p, int groups); +typedef void (*erts_cpu_groups_callback_t)(int, + ErtsSchedulerData *, + int, + void *); + +void erts_add_cpu_groups(int groups, + erts_cpu_groups_callback_t callback, + void *arg); +void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback, + void *arg); + #endif diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 4940344108..ee282ebbee 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -2854,10 +2854,10 @@ suspend_scheduler(ErtsSchedulerData *esdp) ASSERT(no != 1); - erts_sched_check_cpu_bind_prep_suspend(esdp); - erts_smp_runq_unlock(esdp->run_queue); + erts_sched_check_cpu_bind_prep_suspend(esdp); + if (erts_system_profile_flags.scheduler) profile_scheduler(make_small(esdp->no), am_inactive); |