aboutsummaryrefslogtreecommitdiffstats
path: root/erts/lib_src/common/erl_misc_utils.c
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2010-08-17 19:43:08 +0200
committerRickard Green <[email protected]>2010-08-30 11:59:30 +0200
commitf45560001d8128193ed4933ab9dca1b105ae752a (patch)
tree0e115639c2a3202fd2f986d02847a8283f695988 /erts/lib_src/common/erl_misc_utils.c
parent1b273b618002d65159453fdfb9520a9476e4423a (diff)
downloadotp-f45560001d8128193ed4933ab9dca1b105ae752a.tar.gz
otp-f45560001d8128193ed4933ab9dca1b105ae752a.tar.bz2
otp-f45560001d8128193ed4933ab9dca1b105ae752a.zip
Implement automatic detection of CPU topology on Windows
The CPU topology is now automatically detected on Windows systems with less than 33 logical processors. The runtime system will now, also on Windows, by default bind schedulers to logical processors using the 'default_bind' bind type if the amount of schedulers is at least equal to the amount of logical processors configured, binding of schedulers is supported, and a CPU topology is available at startup.
Diffstat (limited to 'erts/lib_src/common/erl_misc_utils.c')
-rw-r--r--erts/lib_src/common/erl_misc_utils.c381
1 files changed, 335 insertions, 46 deletions
diff --git a/erts/lib_src/common/erl_misc_utils.c b/erts/lib_src/common/erl_misc_utils.c
index 6346fe361d..116c9886d8 100644
--- a/erts/lib_src/common/erl_misc_utils.c
+++ b/erts/lib_src/common/erl_misc_utils.c
@@ -21,10 +21,13 @@
#include "config.h"
#endif
+#if defined(__WIN32__)
+# include <windows.h>
+#endif
+
#include "erl_misc_utils.h"
#if defined(__WIN32__)
-# include <windows.h>
#elif defined(VXWORKS)
# include <selectLib.h>
#else /* UNIX */
@@ -684,6 +687,56 @@ cpu_cmp(const void *vx, const void *vy)
return 0;
}
+static void
+adjust_processor_nodes(erts_cpu_info_t *cpuinfo, int no_nodes)
+{
+ erts_cpu_topology_t *prev, *this, *last;
+ if (no_nodes > 1) {
+ int processor = -1;
+ int processor_node = 0;
+ int node = -1;
+
+ qsort(cpuinfo->topology,
+ cpuinfo->topology_size,
+ sizeof(erts_cpu_topology_t),
+ pn_cmp);
+
+ prev = NULL;
+ this = &cpuinfo->topology[0];
+ last = &cpuinfo->topology[cpuinfo->configured-1];
+ while (1) {
+ if (processor == this->processor) {
+ if (node != this->node)
+ processor_node = 1;
+ }
+ else {
+ if (processor_node) {
+ make_processor_node:
+ while (prev->processor == processor) {
+ prev->processor_node = prev->node;
+ prev->node = -1;
+ if (prev == &cpuinfo->topology[0])
+ break;
+ prev--;
+ }
+ processor_node = 0;
+ }
+ processor = this->processor;
+ node = this->node;
+ }
+ if (this == last) {
+ if (processor_node) {
+ prev = this;
+ goto make_processor_node;
+ }
+ break;
+ }
+ prev = this++;
+ }
+ }
+}
+
+
#ifdef __linux__
static int
@@ -850,49 +903,7 @@ read_topology(erts_cpu_info_t *cpuinfo)
cpuinfo->topology = t;
}
- if (no_nodes > 1) {
- int processor = -1;
- int processor_node = 0;
- int node = -1;
-
- qsort(cpuinfo->topology,
- cpuinfo->topology_size,
- sizeof(erts_cpu_topology_t),
- pn_cmp);
-
- prev = NULL;
- this = &cpuinfo->topology[0];
- last = &cpuinfo->topology[cpuinfo->configured-1];
- while (1) {
- if (processor == this->processor) {
- if (node != this->node)
- processor_node = 1;
- }
- else {
- if (processor_node) {
- make_processor_node:
- while (prev->processor == processor) {
- prev->processor_node = prev->node;
- prev->node = -1;
- if (prev == &cpuinfo->topology[0])
- break;
- prev--;
- }
- processor_node = 0;
- }
- processor = this->processor;
- node = this->node;
- }
- if (this == last) {
- if (processor_node) {
- prev = this;
- goto make_processor_node;
- }
- break;
- }
- prev = this++;
- }
- }
+ adjust_processor_nodes(cpuinfo, no_nodes);
qsort(cpuinfo->topology,
cpuinfo->topology_size,
@@ -1075,6 +1086,8 @@ read_topology(erts_cpu_info_t *cpuinfo)
}
}
+ adjust_processor_nodes(cpuinfo, 1);
+
error:
if (res == 0) {
@@ -1093,6 +1106,275 @@ read_topology(erts_cpu_info_t *cpuinfo)
}
+#elif defined(__WIN32__)
+
+/*
+ * We cannot use Relation* out of the box since all of them are not
+ * always part of the LOGICAL_PROCESSOR_RELATIONSHIP enum. They are
+ * however documented as follows...
+ */
+#define ERTS_MU_RELATION_PROCESSOR_CORE 0 /* RelationProcessorCore */
+#define ERTS_MU_RELATION_NUMA_NODE 1 /* RelationNumaNode */
+#define ERTS_MU_RELATION_CACHE 2 /* RelationCache */
+#define ERTS_MU_RELATION_PROCESSOR_PACKAGE 3 /* RelationProcessorPackage */
+
+static __forceinline int
+rel_cmp_val(int r)
+{
+ switch (r) {
+ case ERTS_MU_RELATION_NUMA_NODE: return 0;
+ case ERTS_MU_RELATION_PROCESSOR_PACKAGE: return 1;
+ case ERTS_MU_RELATION_PROCESSOR_CORE: return 2;
+ default: /* currently not used */ return 3;
+ }
+}
+
+static int
+slpi_cmp(const void *vx, const void *vy)
+{
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION x, y;
+ x = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) vx;
+ y = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION) vy;
+
+ if ((int) x->Relationship != (int) y->Relationship)
+ return (rel_cmp_val((int) x->Relationship)
+ - rel_cmp_val((int) y->Relationship));
+
+ switch ((int) x->Relationship) {
+ case ERTS_MU_RELATION_NUMA_NODE:
+ if (x->NumaNode.NodeNumber == y->NumaNode.NodeNumber)
+ break;
+ return ((int) x->NumaNode.NodeNumber) - ((int) y->NumaNode.NodeNumber);
+ case ERTS_MU_RELATION_PROCESSOR_CORE:
+ case ERTS_MU_RELATION_PROCESSOR_PACKAGE:
+ default:
+ break;
+ }
+
+ if (x->ProcessorMask == y->ProcessorMask)
+ return 0;
+ return x->ProcessorMask < y->ProcessorMask ? -1 : 1;
+}
+
+typedef BOOL (WINAPI *glpi_t)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD);
+
+static int
+read_topology(erts_cpu_info_t *cpuinfo)
+{
+ int res = 0;
+ glpi_t glpi;
+ int *core_id = NULL;
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION slpip = NULL;
+ int wix, rix, max_l, l, packages, nodes, no_slpi;
+ DWORD slpi_size = 0;
+
+
+ glpi = (glpi_t) GetProcAddress(GetModuleHandle("kernel32"),
+ "GetLogicalProcessorInformation");
+ if (!glpi)
+ return -ENOTSUP;
+
+ cpuinfo->topology = NULL;
+
+ if (cpuinfo->configured < 1 || sizeof(ULONG_PTR)*8 < cpuinfo->configured)
+ goto error;
+
+ while (1) {
+ DWORD werr;
+ if (TRUE == glpi(slpip, &slpi_size))
+ break;
+ werr = GetLastError();
+ if (werr != ERROR_INSUFFICIENT_BUFFER) {
+ res = -erts_map_win_error_to_errno(werr);
+ goto error;
+ }
+ if (slpip)
+ free(slpip);
+ slpip = malloc(slpi_size);
+ if (!slpip) {
+ res = -ENOMEM;
+ goto error;
+ }
+ }
+
+ no_slpi = (int) slpi_size/sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+
+ qsort(slpip,
+ no_slpi,
+ sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION),
+ slpi_cmp);
+
+ /*
+ * Now numa node relations appear before package relations which
+ * appear before core relations which appear before relations
+ * we aren't interested in...
+ */
+
+ max_l = 0;
+ packages = 0;
+ nodes = 0;
+ for (rix = 0; rix < no_slpi; rix++) {
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION this = &slpip[rix];
+ for (l = sizeof(ULONG_PTR)*8 - 1; l > 0; l--) {
+ if (slpip[rix].ProcessorMask & (((ULONG_PTR) 1) << l)) {
+ if (max_l < l)
+ max_l = l;
+ break;
+ }
+ }
+ if ((int) slpip[rix].Relationship == ERTS_MU_RELATION_PROCESSOR_PACKAGE)
+ packages++;
+ if ((int) slpip[rix].Relationship == ERTS_MU_RELATION_NUMA_NODE)
+ nodes++;
+ }
+
+ core_id = malloc(sizeof(int)*(packages ? packages : 1));
+ if (!core_id) {
+ res = -ENOMEM;
+ goto error;
+ }
+
+ for (rix = 0; rix < packages; rix++)
+ core_id[rix] = 0;
+
+ cpuinfo->topology_size = max_l + 1;
+ cpuinfo->topology = malloc(sizeof(erts_cpu_topology_t)
+ * cpuinfo->topology_size);
+ if (!cpuinfo->topology) {
+ res = -ENOMEM;
+ goto error;
+ }
+
+ for (wix = 0; wix < cpuinfo->topology_size; wix++) {
+ cpuinfo->topology[wix].node = -1;
+ cpuinfo->topology[wix].processor = -1;
+ cpuinfo->topology[wix].processor_node = -1;
+ cpuinfo->topology[wix].core = -1;
+ cpuinfo->topology[wix].thread = -1;
+ cpuinfo->topology[wix].logical = -1;
+ }
+
+ nodes = 0;
+ packages = 0;
+
+ for (rix = 0; rix < no_slpi; rix++) {
+
+ switch ((int) slpip[rix].Relationship) {
+ case ERTS_MU_RELATION_NUMA_NODE:
+ for (l = 0; l < sizeof(ULONG_PTR)*8; l++) {
+ if (slpip[rix].ProcessorMask & (((ULONG_PTR) 1) << l)) {
+ cpuinfo->topology[l].logical = l;
+ cpuinfo->topology[l].node = slpip[rix].NumaNode.NodeNumber;
+ }
+ }
+ nodes++;
+ break;
+ case ERTS_MU_RELATION_PROCESSOR_PACKAGE:
+ for (l = 0; l < sizeof(ULONG_PTR)*8; l++) {
+ if (slpip[rix].ProcessorMask & (((ULONG_PTR) 1) << l)) {
+ cpuinfo->topology[l].logical = l;
+ cpuinfo->topology[l].processor = packages;
+ }
+ }
+ packages++;
+ break;
+ case ERTS_MU_RELATION_PROCESSOR_CORE: {
+ int thread = 0;
+ int processor = -1;
+ for (l = 0; l < sizeof(ULONG_PTR)*8; l++) {
+ /*
+ * Nodes and packages may not be supported; pretend
+ * that there are one if this is the case...
+ */
+ if (!nodes)
+ cpuinfo->topology[l].node = 0;
+ if (!packages)
+ cpuinfo->topology[l].processor = 0;
+ if (slpip[rix].ProcessorMask & (((ULONG_PTR) 1) << l)) {
+ if (processor < 0) {
+ processor = cpuinfo->topology[l].processor;
+ if (processor < 0) {
+ res = -EINVAL;
+ goto error;
+ }
+ }
+ else if (processor != cpuinfo->topology[l].processor) {
+ res = -EINVAL;
+ goto error;
+ }
+ cpuinfo->topology[l].logical = l;
+ cpuinfo->topology[l].thread = thread;
+ cpuinfo->topology[l].core = core_id[processor];
+ thread++;
+ }
+ }
+ core_id[processor]++;
+ break;
+ }
+ default:
+ /*
+ * We have reached the end of the relationships
+ * that we (currently) are interested in...
+ */
+ goto relationships_done;
+ }
+ }
+
+ relationships_done:
+
+ /*
+ * There may be unused entries; remove them...
+ */
+ for (rix = wix = 0; rix < cpuinfo->topology_size; rix++) {
+ if (cpuinfo->topology[rix].logical >= 0) {
+ if (wix != rix)
+ cpuinfo->topology[wix] = cpuinfo->topology[rix];
+ wix++;
+ }
+ }
+
+ if (cpuinfo->topology_size != wix) {
+ erts_cpu_topology_t *new = cpuinfo->topology;
+ new = realloc(cpuinfo->topology,
+ sizeof(erts_cpu_topology_t)*wix);
+ if (!new) {
+ res = -ENOMEM;
+ goto error;
+ }
+ cpuinfo->topology = new;
+ cpuinfo->topology_size = wix;
+ }
+
+ res = wix;
+
+ adjust_processor_nodes(cpuinfo, nodes);
+
+ qsort(cpuinfo->topology,
+ cpuinfo->topology_size,
+ sizeof(erts_cpu_topology_t),
+ cpu_cmp);
+
+ if (res < cpuinfo->online)
+ res = -EINVAL;
+
+ error:
+
+ if (res <= 0) {
+ cpuinfo->topology_size = 0;
+ if (cpuinfo->topology) {
+ free(cpuinfo->topology);
+ cpuinfo->topology = NULL;
+ }
+ }
+
+ if (slpip)
+ free(slpip);
+ if (core_id)
+ free(core_id);
+
+ return res;
+}
+
#else
static int
@@ -1106,9 +1388,9 @@ read_topology(erts_cpu_info_t *cpuinfo)
#if defined(__WIN32__)
int
-erts_get_last_win_errno(void)
+erts_map_win_error_to_errno(DWORD win_error)
{
- switch (GetLastError()) {
+ switch (win_error) {
case ERROR_INVALID_FUNCTION: return EINVAL; /* 1 */
case ERROR_FILE_NOT_FOUND: return ENOENT; /* 2 */
case ERROR_PATH_NOT_FOUND: return ENOENT; /* 3 */
@@ -1189,4 +1471,11 @@ erts_get_last_win_errno(void)
}
}
+int
+erts_get_last_win_errno(void)
+{
+ return erts_map_win_error_to_errno(GetLastError());
+}
+
+
#endif