Generalize reader groups

Reader groups have been generalized to cpu groups which can be used for implementing reader groups, but also for implementing other functionality in the future.
author: Rickard Green <[email protected]> 2010-09-24 11:09:55 +0200
committer: Rickard Green <[email protected]> 2010-11-18 14:14:25 +0100
commit: f4aa12fc5f5756d7574311cf66cd5ec8025df682 (patch)
tree: c0b36f4aeb7a950b88b858b0b9975aad8668aaf4
parent: 728b62363b9ec6248d14438f36adf03f7d737f89 (diff)
download: otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.tar.gz
otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.tar.bz2
otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.zip
4 files changed, 449 insertions, 241 deletions
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 7df9f19af0..408ffd12f7 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -247,7 +247,7 @@ type	CPUDATA		LONG_LIVED	SYSTEM		cpu_data
 type	TMP_CPU_IDS	SHORT_LIVED	SYSTEM		tmp_cpu_ids
 type	EXT_TERM_DATA	SHORT_LIVED	PROCESSES	external_term_data
 type	ZLIB		STANDARD	SYSTEM		zlib
-type	RDR_GRPS_MAP	LONG_LIVED	SYSTEM		reader_groups_map
+type	CPU_GRPS_MAP	LONG_LIVED	SYSTEM		cpu_groups_map
 
 +if smp
 type	ASYNC		SHORT_LIVED	SYSTEM		async
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
index befab6f3b7..db95c4a5d4 100644
--- a/erts/emulator/beam/erl_cpu_topology.c
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -76,7 +76,8 @@ typedef enum {
 #define ERTS_CPU_BIND_DEFAULT_BIND \
   ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
 
-ErtsCpuBindOrder cpu_bind_order;
+static int no_cpu_groups_callbacks;
+static ErtsCpuBindOrder cpu_bind_order;
 
 static erts_cpu_topology_t *user_cpudata;
 static int user_cpudata_size;
@@ -88,35 +89,44 @@ typedef struct {
 } erts_avail_cput;
 
 typedef struct {
-    int *map;
-    int size;
-    int groups;
-} erts_reader_groups_map_test;
-
-typedef struct {
     int id;
     int sub_levels;
-    int reader_groups;
-} erts_rg_count_t;
+    int cpu_groups;
+} erts_cpu_groups_count_t;
 
 typedef struct {
     int logical;
-    int reader_group;
-} erts_reader_groups_map_t;
+    int cpu_group;
+} erts_cpu_groups_map_array_t;
+
+typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
+struct erts_cpu_groups_callback_list_t_ {
+    erts_cpu_groups_callback_list_t *next;
+    erts_cpu_groups_callback_t callback;
+    void *arg;
+};
+
+typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
+struct erts_cpu_groups_map_t_ {
+    erts_cpu_groups_map_t *next;
+    int groups;
+    erts_cpu_groups_map_array_t *array;
+    int size;
+    int logical_processors;
+    erts_cpu_groups_callback_list_t *callback_list;
+};
 
 typedef struct {
-    erts_reader_groups_map_t *map;
-    int map_size;
-    int logical_processors;
-    int groups;
-} erts_make_reader_groups_map_test;
+    erts_cpu_groups_callback_t callback;
+    int ix;
+    void *arg;
+} erts_cpu_groups_callback_call_t;
+
+static erts_cpu_groups_map_t *cpu_groups_maps;
 
-static int reader_groups_available_cpu_check;
-static int reader_groups_logical_processors;
-static int reader_groups_map_size;
-static erts_reader_groups_map_t *reader_groups_map;
+static erts_cpu_groups_map_t *reader_groups_map;
 
-#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
+#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
 
 #define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
 
@@ -128,8 +138,14 @@ static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
 static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
 #endif
 
-static void make_reader_groups_map(erts_make_reader_groups_map_test *test);
-static int reader_group_lookup(int logical);
+static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static erts_cpu_groups_map_t *add_cpu_groups(int groups,
+					     erts_cpu_groups_callback_t callback,
+					     void *arg);
+static void update_cpu_groups_maps(void);
+static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
+static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
+			     ErtsSchedulerData *esdp);
 
 static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
 					 int *cpudata_size);
@@ -421,28 +437,51 @@ processor_order_cmp(const void *vx, const void *vy)
 void
 erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
 {
-    int reset_read_group = 0;
-    ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+    int cgcc_ix;
+
     /* Unbind from cpu */
     erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
     if (scheduler2cpu_map[esdp->no].bound_id >= 0
 	&& erts_unbind_from_cpu(cpuinfo) == 0) {
 	esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
-	reset_read_group = 1;
     }
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
+	}
+    }
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
     erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
 
-    if (reset_read_group)
-	erts_smp_rwmtx_set_reader_group(0);
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(1,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
 
     if (esdp->no <= max_main_threads)
 	erts_thr_set_main_status(0, 0);
+
 }
 
 void
 erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
 {
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+
     if (esdp->no <= max_main_threads)
 	erts_thr_set_main_status(1, (int) esdp->no);
 
@@ -458,9 +497,17 @@ erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
 void
 erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
 {
-    int rg = 0;
-    int res;
-    int cpu_id;
+    int res, cpu_id, cgcc_ix;
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+#ifdef ERTS_SMP
+    if (erts_common_run_queue)
+	erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
+    else {
+	esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
+    }
+#endif
     erts_smp_runq_unlock(esdp->run_queue);
     erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
     cpu_id = scheduler2cpu_map[esdp->no].bind_id;
@@ -490,39 +537,71 @@ erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
 	    erts_send_error_to_logger_nogl(dsbufp);
 	}
     }
-    if (reader_groups) {
-	if (esdp->cpu_id >= 0)
-	    rg = reader_group_lookup(esdp->cpu_id);
-	else
-	    rg = (((int) esdp->no) - 1) % reader_groups + 1;
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
+	}
     }
+
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
     erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(0,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
+
     erts_smp_runq_lock(esdp->run_queue);
-#ifdef ERTS_SMP
-    if (erts_common_run_queue)
-	erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
-    else {
-	esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
-    }
-#endif
-    if (reader_groups)
-	erts_smp_rwmtx_set_reader_group(rg);
 }
 
 #ifdef ERTS_SMP
 void
 erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
 {
-    int no = (int) esdp->no;
-    if (no <= max_main_threads) {
-	erts_thr_set_main_status(1, (int) no);
-	if (reader_groups) {
-	    int rg = (int) no;
-	    if (rg > reader_groups)
-		rg = (((int) no) - 1) % reader_groups + 1;
-	    erts_smp_rwmtx_set_reader_group(rg);
+    int cgcc_ix;
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
 	}
     }
+
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(0,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
+
+    if (esdp->no <= max_main_threads)
+	erts_thr_set_main_status(1, (int) esdp->no);
 }
 #endif
 
@@ -1388,7 +1467,7 @@ erts_set_cpu_topology(Process *c_p, Eterm term)
 		   sizeof(erts_cpu_topology_t)*cpudata_size);
     }
 
-    make_reader_groups_map(NULL);
+    update_cpu_groups_maps();
 
     write_schedulers_bind_change(cpudata, cpudata_size);
 
@@ -1588,6 +1667,8 @@ erts_pre_early_init_cpu_topology(int *max_rg_p,
 				 int *onln_p,
 				 int *avail_p)
 {
+    cpu_groups_maps = NULL;
+    no_cpu_groups_callbacks = 0;
     *max_rg_p = ERTS_MAX_READER_GROUPS;
     cpuinfo = erts_cpu_info_create();
     get_logical_processors(conf_p, onln_p, avail_p);
@@ -1609,11 +1690,6 @@ erts_early_init_cpu_topology(int no_schedulers,
 
     cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
 
-    reader_groups_available_cpu_check = 1;
-    reader_groups_logical_processors = 0;
-    reader_groups_map_size = 0;
-    reader_groups_map = NULL;
-
     if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
 	|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
 							system_cpudata_size)) {
@@ -1641,6 +1717,7 @@ erts_init_cpu_topology(void)
     int ix;
 
     erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
 
     scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
 				   (sizeof(ErtsCpuBindData)
@@ -1661,7 +1738,9 @@ erts_init_cpu_topology(void)
 			      : ERTS_CPU_BIND_NONE);
     }
 
-    make_reader_groups_map(NULL);
+    reader_groups_map = add_cpu_groups(reader_groups,
+				       reader_groups_callback,
+				       NULL);
 
     if (cpu_bind_order == ERTS_CPU_BIND_NONE)
 	erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
@@ -1707,7 +1786,7 @@ erts_update_cpu_info(void)
 	    }
 	}
 
-	make_reader_groups_map(NULL);
+	update_cpu_groups_maps();
 
 	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
 	write_schedulers_bind_change(cpudata, cpudata_size);
@@ -1723,53 +1802,33 @@ erts_update_cpu_info(void)
  * reader groups map
  */
 
-static Eterm
-get_reader_groups_map(Process *c_p,
-		      erts_reader_groups_map_t *map,
-		      int map_size,
-		      int logical_processors)
+void
+reader_groups_callback(int suspending,
+		       ErtsSchedulerData *esdp,
+		       int group,
+		       void *unused)
 {
-#ifdef DEBUG
-    Eterm *endp;
-#endif
-    Eterm res = NIL, tuple;
-    Eterm *hp;
-    int i;
-
-    hp = HAlloc(c_p, logical_processors*(2+3));
-#ifdef DEBUG
-    endp = hp + logical_processors*(2+3);
-#endif
-    for (i = map_size - 1; i >= 0; i--) {
-	if (map[i].logical >= 0) {
-	    tuple = TUPLE2(hp,
-			   make_small(map[i].logical),
-			   make_small(map[i].reader_group));
-	    hp += 3;
-	    res = CONS(hp, tuple, res);
-	    hp += 2;
-	}
-    }
-    ASSERT(hp == endp);
-    return res;
+    if (reader_groups && esdp->no <= max_main_threads)
+	erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1);
 }
 
+static Eterm get_cpu_groups_map(Process *c_p,
+				erts_cpu_groups_map_t *map,
+				int offset);
 Eterm
 erts_debug_reader_groups_map(Process *c_p, int groups)
 {
     Eterm res;
-    erts_make_reader_groups_map_test test;
+    erts_cpu_groups_map_t test;
 
+    test.array = NULL;
     test.groups = groups;
-    make_reader_groups_map(&test);
-    if (!test.map)
+    make_cpu_groups_map(&test, 1);
+    if (!test.array)
 	res = NIL;
     else {
-	res = get_reader_groups_map(c_p,
-				    test.map,
-				    test.map_size,
-				    test.logical_processors);
-	erts_free(ERTS_ALC_T_TMP, test.map);
+	res = get_cpu_groups_map(c_p, &test, 1);
+	erts_free(ERTS_ALC_T_TMP, test.array);
     }
     return res;
 }
@@ -1780,14 +1839,45 @@ erts_get_reader_groups_map(Process *c_p)
 {
     Eterm res;
     erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
-    res = get_reader_groups_map(c_p,
-				reader_groups_map,
-				reader_groups_map_size,
-				reader_groups_logical_processors);
+    res = get_cpu_groups_map(c_p, reader_groups_map, 1);
     erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
     return res;
 }
 
+/*
+ * CPU groups
+ */
+
+static Eterm
+get_cpu_groups_map(Process *c_p,
+		   erts_cpu_groups_map_t *map,
+		   int offset)
+{
+#ifdef DEBUG
+    Eterm *endp;
+#endif
+    Eterm res = NIL, tuple;
+    Eterm *hp;
+    int i;
+
+    hp = HAlloc(c_p, map->logical_processors*(2+3));
+#ifdef DEBUG
+    endp = hp + map->logical_processors*(2+3);
+#endif
+    for (i = map->size - 1; i >= 0; i--) {
+	if (map->array[i].logical >= 0) {
+	    tuple = TUPLE2(hp,
+			   make_small(map->array[i].logical),
+			   make_small(map->array[i].cpu_group + offset));
+	    hp += 3;
+	    res = CONS(hp, tuple, res);
+	    hp += 2;
+	}
+    }
+    ASSERT(hp == endp);
+    return res;
+}
+
 static void
 make_available_cpu_topology(erts_avail_cput *no,
 			    erts_avail_cput *avail,
@@ -1848,7 +1938,7 @@ make_available_cpu_topology(erts_avail_cput *no,
 	    avail[a].level[j] = no->level[j];
 
 	avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
-	avail[a].level[ERTS_TOPOLOGY_RG] = 0;
+	avail[a].level[ERTS_TOPOLOGY_CG] = 0;
 
 	ASSERT(last.logical != cpudata[i].logical);
 
@@ -1866,40 +1956,21 @@ make_available_cpu_topology(erts_avail_cput *no,
     *size = a;
 }
 
-static int
-reader_group_lookup(int logical)
-{
-    int start = logical % reader_groups_map_size;
-    int ix = start;
-
-    do {
-	if (reader_groups_map[ix].logical == logical) {
-	    ASSERT(reader_groups_map[ix].reader_group > 0);
-	    return reader_groups_map[ix].reader_group;
-	}
-	ix++;
-	if (ix == reader_groups_map_size)
-	    ix = 0;
-    } while (ix != start);
-
-    erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
-}
-
 static void
-reader_group_insert(erts_reader_groups_map_t *map, int map_size,
-		    int logical, int reader_group)
+cpu_group_insert(erts_cpu_groups_map_t *map,
+		 int logical, int cpu_group)
 {
-    int start = logical % map_size;
+    int start = logical % map->size;
     int ix = start;
 
     do {
-	if (map[ix].logical < 0) {
-	    map[ix].logical = logical;
-	    map[ix].reader_group = reader_group;
+	if (map->array[ix].logical < 0) {
+	    map->array[ix].logical = logical;
+	    map->array[ix].cpu_group = cpu_group;
 	    return;
 	}
 	ix++;
-	if (ix == map_size)
+	if (ix == map->size)
 	    ix = 0;
     } while (ix != start);
 
@@ -1908,107 +1979,100 @@ reader_group_insert(erts_reader_groups_map_t *map, int map_size,
 
 
 static int
-sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
+sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
+	   int avail_sz, erts_avail_cput *avail)
 {
     int sub_level = level+1;
     int last = -1;
-    rgc->sub_levels = 0;
+    cgc->sub_levels = 0;
 
     do {
 	if (last != avail[aix].level[sub_level]) {
-	    rgc->sub_levels++;
+	    cgc->sub_levels++;
 	    last = avail[aix].level[sub_level];
 	}
 	aix++;
     }
-    while (aix < avail_sz && rgc->id == avail[aix].level[level]);
-    rgc->reader_groups = 0;
+    while (aix < avail_sz && cgc->id == avail[aix].level[level]);
+    cgc->cpu_groups = 0;
     return aix;
 }
 
 static int
-write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
+write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
 		    int level, int a,
 		    int avail_sz, erts_avail_cput *avail)
 {
-    int rg = *rgp;
+    int cg = *cgp;
     int sub_level = level+1;
-    int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
-    int xsl = rgcp->sub_levels % rgcp->reader_groups;
+    int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
+    int xsl = cgcp->sub_levels % cgcp->cpu_groups;
     int sls = 0;
     int last = -1;
-    int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
+    int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
 
-    ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
+    ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
 
     do {
 	if (last != avail[a].level[sub_level]) {
 	    if (!sls) {
 		sls = sl_per_gr;
-		rg++;
-		if (rg >= xsl_rg_lim)
+		cg++;
+		if (cg >= xsl_cg_lim)
 		    sls++;
 	    }
 	    last = avail[a].level[sub_level];
 	    sls--;
 	}
-	avail[a].level[ERTS_TOPOLOGY_RG] = rg;
+	avail[a].level[ERTS_TOPOLOGY_CG] = cg;
 	a++;
     } while (a < avail_sz && (level < 0
-			      || avail[a].level[level] == rgcp->id));
+			      || avail[a].level[level] == cgcp->id));
 
-    ASSERT(rgcp->reader_groups == rg - *rgp);
+    ASSERT(cgcp->cpu_groups == cg - *cgp);
 
-    *rgp = rg;
+    *cgp = cg;
 
     return a;
 }
 
 static int
-rg_count_sub_levels_compare(const void *vx, const void *vy)
+cg_count_sub_levels_compare(const void *vx, const void *vy)
 {
-    erts_rg_count_t *x = (erts_rg_count_t *) vx;
-    erts_rg_count_t *y = (erts_rg_count_t *) vy;
+    erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+    erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
     if (x->sub_levels != y->sub_levels)
 	return y->sub_levels - x->sub_levels;
     return x->id - y->id;
 }
 
 static int
-rg_count_id_compare(const void *vx, const void *vy)
+cg_count_id_compare(const void *vx, const void *vy)
 {
-    erts_rg_count_t *x = (erts_rg_count_t *) vx;
-    erts_rg_count_t *y = (erts_rg_count_t *) vy;
+    erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+    erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
     return x->id - y->id;
 }
 
 static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test)
+make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
 {
     int i, spread_level, avail_sz;
     erts_avail_cput no, *avail;
     erts_cpu_topology_t *cpudata;
-    erts_reader_groups_map_t *map;
-    int map_sz;
-    int groups = reader_groups;
-
-    if (test) {
-	test->map = NULL;
-	test->map_size = 0;
-	groups = test->groups;
-    }
+    ErtsAlcType_t alc_type = (test
+			      ? ERTS_ALC_T_TMP
+			      : ERTS_ALC_T_CPU_GRPS_MAP);
 
-    if (!groups)
-	return;
+    if (map->array)
+	erts_free(alc_type, map->array);
 
-    if (!test) {
-	if (reader_groups_map)
-	    erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
+    map->array = NULL;
+    map->logical_processors = 0;
+    map->size = 0;
 
-	reader_groups_logical_processors = 0;
-	reader_groups_map_size = 0;
-	reader_groups_map = NULL;
-    }
+    if (!map->groups)
+	return;
 
     create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
 
@@ -2024,61 +2088,47 @@ make_reader_groups_map(erts_make_reader_groups_map_test *test)
 		       sizeof(erts_avail_cput)*avail_sz);
 
     make_available_cpu_topology(&no, avail, cpudata,
-				&avail_sz, test != NULL);
+				&avail_sz, test);
 
     destroy_tmp_cpu_topology_copy(cpudata);
 
-    map_sz = avail_sz*2+1;
-
-    if (test) {
-	map = erts_alloc(ERTS_ALC_T_TMP,
-			 (sizeof(erts_reader_groups_map_t)
-			  * map_sz));
-	test->map = map;
-	test->map_size = map_sz;
-	test->logical_processors = avail_sz;
-    }
-    else {
-	map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
-			 (sizeof(erts_reader_groups_map_t)
-			  * map_sz));
-	reader_groups_map = map;
-	reader_groups_logical_processors = avail_sz;
-	reader_groups_map_size = map_sz;
+    map->size = avail_sz*2+1;
 
-    }
+    map->array = erts_alloc(alc_type,
+			    (sizeof(erts_cpu_groups_map_array_t)
+			     * map->size));;
+    map->logical_processors = avail_sz;
 
-    for (i = 0; i < map_sz; i++) {
-	map[i].logical = -1;
-	map[i].reader_group = 0;
+    for (i = 0; i < map->size; i++) {
+	map->array[i].logical = -1;
+	map->array[i].cpu_group = -1;
     }
 
     spread_level = ERTS_TOPOLOGY_CORE;
     for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
-	if (no.level[i] > groups) {
+	if (no.level[i] > map->groups) {
 	    spread_level = i;
 	    break;
 	}
     }
 
-    if (no.level[spread_level] <= groups) {
-	int a, rg, last = -1;
-	rg = 0;
+    if (no.level[spread_level] <= map->groups) {
+	int a, cg, last = -1;
+	cg = -1;
 	ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
 	for (a = 0; a < avail_sz; a++) {
 	    if (last != avail[a].level[spread_level]) {
-		rg++;
+		cg++;
 		last = avail[a].level[spread_level];
 	    }
-	    reader_group_insert(map,
-				map_sz,
-				avail[a].level[ERTS_TOPOLOGY_LOGICAL],
-				rg);
+	    cpu_group_insert(map,
+			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+			     cg);
 	}
     }
-    else { /* groups < no.level[spread_level] */
-	erts_rg_count_t *rg_count;
-	int a, rg, tl, toplevels;
+    else { /* map->groups < no.level[spread_level] */
+	erts_cpu_groups_count_t *cg_count;
+	int a, cg, tl, toplevels;
 
 	tl = spread_level-1;
 
@@ -2087,76 +2137,223 @@ make_reader_groups_map(erts_make_reader_groups_map_test *test)
 	else
 	    toplevels = no.level[tl];
 
-	rg_count = erts_alloc(ERTS_ALC_T_TMP,
-			      toplevels*sizeof(erts_rg_count_t));
+	cg_count = erts_alloc(ERTS_ALC_T_TMP,
+			      toplevels*sizeof(erts_cpu_groups_count_t));
 
 	if (toplevels == 1) {
-	    rg_count[0].id = 0;
-	    rg_count[0].sub_levels = no.level[spread_level];
-	    rg_count[0].reader_groups = groups;
+	    cg_count[0].id = 0;
+	    cg_count[0].sub_levels = no.level[spread_level];
+	    cg_count[0].cpu_groups = map->groups;
 	}
 	else {
-	    int rgs_per_tl, rgs;
-	    rgs = groups;
-	    rgs_per_tl = rgs / toplevels;
+	    int cgs_per_tl, cgs;
+	    cgs = map->groups;
+	    cgs_per_tl = cgs / toplevels;
 
 	    a = 0;
 	    for (i = 0; i < toplevels; i++) {
-		rg_count[i].id = avail[a].level[tl];
-		a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
+		cg_count[i].id = avail[a].level[tl];
+		a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
 	    }
 
-	    qsort(rg_count,
+	    qsort(cg_count,
 		  toplevels,
-		  sizeof(erts_rg_count_t),
-		  rg_count_sub_levels_compare);
+		  sizeof(erts_cpu_groups_count_t),
+		  cg_count_sub_levels_compare);
 
 	    for (i = 0; i < toplevels; i++) {
-		if (rg_count[i].sub_levels < rgs_per_tl) {
-		    rg_count[i].reader_groups = rg_count[i].sub_levels;
-		    rgs -= rg_count[i].sub_levels;
+		if (cg_count[i].sub_levels < cgs_per_tl) {
+		    cg_count[i].cpu_groups = cg_count[i].sub_levels;
+		    cgs -= cg_count[i].sub_levels;
 		}
 		else {
-		    rg_count[i].reader_groups = rgs_per_tl;
-		    rgs -= rgs_per_tl;
+		    cg_count[i].cpu_groups = cgs_per_tl;
+		    cgs -= cgs_per_tl;
 		}
 	    }
 
-	    while (rgs > 0) {
+	    while (cgs > 0) {
 		for (i = 0; i < toplevels; i++) {
-		    if (rg_count[i].sub_levels == rg_count[i].reader_groups)
+		    if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
 			break;
 		    else {
-			rg_count[i].reader_groups++;
-			if (--rgs == 0)
+			cg_count[i].cpu_groups++;
+			if (--cgs == 0)
 			    break;
 		    }
 		}
 	    }
 
-	    qsort(rg_count,
+	    qsort(cg_count,
 		  toplevels,
-		  sizeof(erts_rg_count_t),
-		  rg_count_id_compare);
+		  sizeof(erts_cpu_groups_count_t),
+		  cg_count_id_compare);
 	}
 
-	a = i = rg = 0;
+	a = i = 0;
+	cg = -1;
 	while (a < avail_sz) {
-	    a = write_reader_groups(&rg, &rg_count[i], tl,
-				    a, avail_sz, avail);
+	    a = write_cpu_groups(&cg, &cg_count[i], tl,
+				 a, avail_sz, avail);
 	    i++;
 	}
 
-	ASSERT(groups == rg);
+	ASSERT(map->groups == cg + 1);
 
 	for (a = 0; a < avail_sz; a++)
-	    reader_group_insert(map,
-				map_sz,
-				avail[a].level[ERTS_TOPOLOGY_LOGICAL],
-				avail[a].level[ERTS_TOPOLOGY_RG]);
+	    cpu_group_insert(map,
+			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+			     avail[a].level[ERTS_TOPOLOGY_CG]);
 
-	erts_free(ERTS_ALC_T_TMP, rg_count);
+	erts_free(ERTS_ALC_T_TMP, cg_count);
     }
 
     erts_free(ERTS_ALC_T_TMP, avail);
 }
+
+static erts_cpu_groups_map_t *
+add_cpu_groups(int groups,
+	       erts_cpu_groups_callback_t callback,
+	       void *arg)
+{
+    int use_groups = groups;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_map_t *cgm;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    if (use_groups > max_main_threads)
+	use_groups = max_main_threads;
+
+    if (!use_groups)
+	return NULL;
+
+    no_cpu_groups_callbacks++;
+    cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+		      sizeof(erts_cpu_groups_callback_list_t));
+    cgcl->callback = callback;
+    cgcl->arg = arg;
+
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	if (cgm->groups == use_groups) {
+	    cgcl->next = cgm->callback_list;
+	    cgm->callback_list = cgcl;
+	    return cgm;
+	}
+    }
+
+
+    cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+		     sizeof(erts_cpu_groups_map_t));
+    cgm->next = cpu_groups_maps;
+    cgm->groups = use_groups;
+    cgm->array = NULL;
+    cgm->size = 0;
+    cgm->logical_processors = 0;
+    cgm->callback_list = cgcl;
+
+    cgcl->next = NULL;
+
+    make_cpu_groups_map(cgm, 0);
+
+    cpu_groups_maps = cgm;
+
+    return cgm;
+}
+
+static void
+remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg)
+{
+    erts_cpu_groups_map_t *prev_cgm, *cgm;
+    erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    no_cpu_groups_callbacks--;
+
+    prev_cgm = NULL;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	prev_cgcl = NULL;
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    if (cgcl->callback == callback && cgcl->arg == arg) {
+		if (prev_cgcl)
+		    prev_cgcl->next = cgcl->next;
+		else
+		    cgm->callback_list = cgcl->next;
+		erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl);
+		if (!cgm->callback_list) {
+		    if (prev_cgm)
+			prev_cgm->next = cgm->next;
+		    else
+			cpu_groups_maps = cgm->next;
+		    if (cgm->array)
+			erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array);
+		    erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm);
+		}
+		return;
+	    }
+	    prev_cgcl = cgcl;
+	}
+	prev_cgm = cgm;
+    }
+
+    erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n");
+}
+
+static int
+cpu_groups_lookup(erts_cpu_groups_map_t *map,
+		  ErtsSchedulerData *esdp)
+{
+    int start, logical, ix;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+		       || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    if (esdp->cpu_id < 0)
+	return (((int) esdp->no) - 1) % map->groups;
+
+    logical = esdp->cpu_id;
+    start = logical % map->size;
+    ix = start;
+
+    do {
+	if (map->array[ix].logical == logical) {
+	    int group = map->array[ix].cpu_group;
+	    ASSERT(0 <= group && group < map->groups);
+	    return group;
+	}
+	ix++;
+	if (ix == map->size)
+	    ix = 0;
+    } while (ix != start);
+
+    erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+update_cpu_groups_maps(void)
+{
+    erts_cpu_groups_map_t *cgm;
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
+	make_cpu_groups_map(cgm, 0);
+}
+
+void
+erts_add_cpu_groups(int groups,
+		    erts_cpu_groups_callback_t callback,
+		    void *arg)
+{
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    add_cpu_groups(groups, callback, arg);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
+
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+			    void *arg)
+{
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    remove_cpu_groups(callback, arg);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h
index b83ddc25da..c5a9520b61 100644
--- a/erts/emulator/beam/erl_cpu_topology.h
+++ b/erts/emulator/beam/erl_cpu_topology.h
@@ -91,4 +91,15 @@ Eterm erts_fake_scheduler_bindings(Process *p, Eterm how);
 Eterm erts_debug_cpu_groups_map(Process *c_p, int groups);
 
 
+typedef void (*erts_cpu_groups_callback_t)(int,
+					   ErtsSchedulerData *,
+					   int,
+					   void *);
+
+void erts_add_cpu_groups(int groups,
+			 erts_cpu_groups_callback_t callback,
+			 void *arg);
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+			    void *arg);
+
 #endif
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 4940344108..ee282ebbee 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -2854,10 +2854,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
 
     ASSERT(no != 1);
 
-    erts_sched_check_cpu_bind_prep_suspend(esdp);
-
     erts_smp_runq_unlock(esdp->run_queue);
 
+    erts_sched_check_cpu_bind_prep_suspend(esdp);
+
     if (erts_system_profile_flags.scheduler)
     	profile_scheduler(make_small(esdp->no), am_inactive);
author	Rickard Green <[email protected]>	2010-09-24 11:09:55 +0200
committer	Rickard Green <[email protected]>	2010-11-18 14:14:25 +0100
commit	f4aa12fc5f5756d7574311cf66cd5ec8025df682 (patch)
tree	c0b36f4aeb7a950b88b858b0b9975aad8668aaf4
parent	728b62363b9ec6248d14438f36adf03f7d737f89 (diff)
download	otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.tar.gz otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.tar.bz2 otp-f4aa12fc5f5756d7574311cf66cd5ec8025df682.zip