aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/Makefile.in2
-rw-r--r--erts/emulator/beam/dist.c95
-rw-r--r--erts/emulator/beam/dist.h8
-rw-r--r--erts/emulator/beam/erl_alloc.c7
-rw-r--r--erts/emulator/beam/erl_alloc.types2
-rw-r--r--erts/emulator/beam/erl_bif_info.c24
-rw-r--r--erts/emulator/beam/erl_cpu_topology.c2359
-rw-r--r--erts/emulator/beam/erl_cpu_topology.h105
-rw-r--r--erts/emulator/beam/erl_db.c30
-rw-r--r--erts/emulator/beam/erl_db.h3
-rw-r--r--erts/emulator/beam/erl_db_hash.c206
-rw-r--r--erts/emulator/beam/erl_db_tree.c189
-rw-r--r--erts/emulator/beam/erl_db_util.c499
-rw-r--r--erts/emulator/beam/erl_db_util.h96
-rw-r--r--erts/emulator/beam/erl_drv_thread.c2
-rw-r--r--erts/emulator/beam/erl_init.c128
-rw-r--r--erts/emulator/beam/erl_lock_check.c2
-rw-r--r--erts/emulator/beam/erl_nif.c30
-rw-r--r--erts/emulator/beam/erl_node_tables.c10
-rw-r--r--erts/emulator/beam/erl_node_tables.h2
-rw-r--r--erts/emulator/beam/erl_process.c2063
-rw-r--r--erts/emulator/beam/erl_process.h38
-rw-r--r--erts/emulator/beam/erl_process_lock.c4
-rw-r--r--erts/emulator/beam/erl_process_lock.h4
-rw-r--r--erts/emulator/beam/erl_threads.h3
-rw-r--r--erts/emulator/beam/erl_vm.h1
-rw-r--r--erts/emulator/beam/external.c329
-rw-r--r--erts/emulator/beam/external.h8
-rw-r--r--erts/emulator/beam/global.h6
-rw-r--r--erts/emulator/beam/io.c16
-rw-r--r--erts/emulator/beam/packet_parser.c9
-rw-r--r--erts/emulator/beam/sys.h12
-rw-r--r--erts/emulator/drivers/common/inet_drv.c1065
-rwxr-xr-x[-rw-r--r--]erts/emulator/drivers/win32/win_efile.c131
-rw-r--r--erts/emulator/sys/unix/sys.c28
-rw-r--r--erts/emulator/sys/win32/sys.c167
-rw-r--r--erts/emulator/test/distribution_SUITE.erl93
-rw-r--r--erts/emulator/test/nif_SUITE.erl11
-rw-r--r--erts/emulator/test/port_SUITE.erl27
-rw-r--r--erts/emulator/test/port_SUITE_data/dead_port.c6
-rw-r--r--erts/emulator/test/send_term_SUITE.erl4
-rw-r--r--erts/emulator/test/system_info_SUITE.erl1
42 files changed, 4896 insertions, 2929 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 76d782b159..4ed0ccabc6 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -734,7 +734,7 @@ RUN_OBJS = \
$(OBJDIR)/erl_fun.o $(OBJDIR)/erl_bif_port.o \
$(OBJDIR)/erl_term.o $(OBJDIR)/erl_node_tables.o \
$(OBJDIR)/erl_monitors.o $(OBJDIR)/erl_process_dump.o \
- $(OBJDIR)/erl_bif_timer.o \
+ $(OBJDIR)/erl_bif_timer.o $(OBJDIR)/erl_cpu_topology.o \
$(OBJDIR)/erl_drv_thread.o $(OBJDIR)/erl_bif_chksum.o \
$(OBJDIR)/erl_bif_re.o $(OBJDIR)/erl_unicode.o \
$(OBJDIR)/packet_parser.o $(OBJDIR)/safe_hash.o \
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index 16b6aeac3f..694460d702 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -97,6 +97,8 @@ dist_msg_dbg(ErtsDistExternal *edep, char *what, byte *buf, int sz)
#define PASS_THROUGH 'p' /* This code should go */
int erts_is_alive; /* System must be blocked on change */
+int erts_dist_buf_busy_limit;
+
/* distribution trap functions */
Export* dsend2_trap = NULL;
@@ -160,7 +162,7 @@ Uint erts_dist_cache_size(void)
static ErtsProcList *
get_suspended_on_de(DistEntry *dep, Uint32 unset_qflgs)
{
- ERTS_SMP_LC_ASSERT(erts_smp_lc_spinlock_is_locked(&dep->qlock));
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&dep->qlock));
dep->qflgs &= ~unset_qflgs;
if (dep->qflgs & ERTS_DE_QFLG_EXIT) {
/* No resume when exit has been scheduled */
@@ -453,17 +455,17 @@ int erts_do_net_exits(DistEntry *dep, Eterm reason)
if (dep->status & ERTS_DE_SFLG_EXITING) {
#ifdef DEBUG
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qflgs & ERTS_DE_QFLG_EXIT);
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
#endif
}
else {
dep->status |= ERTS_DE_SFLG_EXITING;
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT));
dep->qflgs |= ERTS_DE_QFLG_EXIT;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
}
erts_smp_de_links_lock(dep);
@@ -577,7 +579,7 @@ static void clear_dist_entry(DistEntry *dep)
erts_smp_de_links_unlock(dep);
#endif
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
if (!dep->out_queue.last)
obuf = dep->finalized_out_queue.first;
@@ -593,7 +595,7 @@ static void clear_dist_entry(DistEntry *dep)
dep->status = 0;
suspendees = get_suspended_on_de(dep, ERTS_DE_QFLGS_ALL);
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
erts_smp_atomic_set(&dep->dist_cmd_scheduled, 0);
dep->send = NULL;
erts_smp_de_rwunlock(dep);
@@ -611,10 +613,10 @@ static void clear_dist_entry(DistEntry *dep)
}
if (obufsize) {
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qsize >= obufsize);
dep->qsize -= obufsize;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
}
}
@@ -1453,8 +1455,6 @@ int erts_net_message(Port *prt,
return -1;
}
-#define ERTS_DE_BUSY_LIMIT (128*1024)
-
static int
dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy)
{
@@ -1538,18 +1538,18 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy)
}
else {
ErtsProcList *plp = NULL;
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
dep->qsize += size_obuf(obuf);
- if (dep->qsize >= ERTS_DE_BUSY_LIMIT)
+ if (dep->qsize >= erts_dist_buf_busy_limit)
dep->qflgs |= ERTS_DE_QFLG_BUSY;
if (!force_busy && (dep->qflgs & ERTS_DE_QFLG_BUSY)) {
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
plp = erts_proclist_create(c_p);
plp->next = NULL;
erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
suspended = 1;
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
}
/* Enqueue obuf on dist entry */
@@ -1575,7 +1575,7 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy)
}
}
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
erts_schedule_dist_command(NULL, dep);
erts_smp_de_runlock(dep);
@@ -1708,10 +1708,8 @@ erts_dist_command(Port *prt, int reds_limit)
{
Sint reds = ERTS_PORT_REDS_DIST_CMD_START;
int prt_busy;
- int de_busy;
Uint32 status;
Uint32 flags;
- Uint32 qflgs;
Sint obufsize = 0;
ErtsDistOutputQueue oq, foq;
DistEntry *dep = prt->dist_entry;
@@ -1746,13 +1744,12 @@ erts_dist_command(Port *prt, int reds_limit)
* a mess.
*/
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
oq.first = dep->out_queue.first;
oq.last = dep->out_queue.last;
dep->out_queue.first = NULL;
dep->out_queue.last = NULL;
- qflgs = dep->qflgs;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
foq.first = dep->finalized_out_queue.first;
foq.last = dep->finalized_out_queue.last;
@@ -1763,17 +1760,8 @@ erts_dist_command(Port *prt, int reds_limit)
goto preempted;
prt_busy = (int) (prt->status & ERTS_PORT_SFLG_PORT_BUSY);
- de_busy = (int) (qflgs & ERTS_DE_QFLG_BUSY);
- if (prt_busy) {
- if (!de_busy) {
- erts_smp_spin_lock(&dep->qlock);
- dep->qflgs |= ERTS_DE_QFLG_BUSY;
- erts_smp_spin_unlock(&dep->qlock);
- de_busy = 1;
- }
- }
- else if (foq.first) {
+ if (!prt_busy && foq.first) {
int preempt = 0;
do {
Uint size;
@@ -1791,10 +1779,7 @@ erts_dist_command(Port *prt, int reds_limit)
free_dist_obuf(fob);
preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD);
if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) {
- erts_smp_spin_lock(&dep->qlock);
- dep->qflgs |= ERTS_DE_QFLG_BUSY;
- erts_smp_spin_unlock(&dep->qlock);
- de_busy = prt_busy = 1;
+ prt_busy = 1;
break;
}
} while (foq.first && !preempt);
@@ -1877,10 +1862,7 @@ erts_dist_command(Port *prt, int reds_limit)
free_dist_obuf(fob);
preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD);
if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) {
- erts_smp_spin_lock(&dep->qlock);
- dep->qflgs |= ERTS_DE_QFLG_BUSY;
- erts_smp_spin_unlock(&dep->qlock);
- de_busy = prt_busy = 1;
+ prt_busy = 1;
if (oq.first && !preempt)
goto finalize_only;
}
@@ -1907,22 +1889,23 @@ erts_dist_command(Port *prt, int reds_limit)
* dist entry in a non-busy state and resume suspended
* processes.
*/
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qsize >= obufsize);
dep->qsize -= obufsize;
obufsize = 0;
- if (de_busy && !prt_busy && dep->qsize < ERTS_DE_BUSY_LIMIT) {
+ if (!prt_busy
+ && (dep->qflgs & ERTS_DE_QFLG_BUSY)
+ && dep->qsize < erts_dist_buf_busy_limit) {
ErtsProcList *suspendees;
int resumed;
suspendees = get_suspended_on_de(dep, ERTS_DE_QFLG_BUSY);
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
resumed = erts_resume_processes(suspendees);
reds += resumed*ERTS_PORT_REDS_DIST_CMD_RESUMED;
- de_busy = 0;
}
else
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
}
ASSERT(!oq.first && !oq.last);
@@ -1931,10 +1914,10 @@ erts_dist_command(Port *prt, int reds_limit)
if (obufsize != 0) {
ASSERT(obufsize > 0);
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qsize >= obufsize);
dep->qsize -= obufsize;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
}
ASSERT(foq.first || !foq.last);
@@ -1984,9 +1967,9 @@ erts_dist_command(Port *prt, int reds_limit)
foq.last = NULL;
#ifdef DEBUG
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qsize == obufsize);
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
#endif
}
else {
@@ -1995,14 +1978,14 @@ erts_dist_command(Port *prt, int reds_limit)
* Unhandle buffers need to be put back first
* in out_queue.
*/
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
dep->qsize -= obufsize;
obufsize = 0;
oq.last->next = dep->out_queue.first;
dep->out_queue.first = oq.first;
if (!dep->out_queue.last)
dep->out_queue.last = oq.last;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
}
erts_schedule_dist_command(prt, NULL);
@@ -2026,10 +2009,10 @@ erts_kill_dist_connection(DistEntry *dep, Uint32 connection_id)
dep->status |= ERTS_DE_SFLG_EXITING;
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT));
dep->qflgs |= ERTS_DE_QFLG_EXIT;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
erts_schedule_dist_command(NULL, dep);
}
@@ -2400,13 +2383,13 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3)
ErtsProcList *plp = erts_proclist_create(BIF_P);
plp->next = NULL;
erts_suspend(BIF_P, ERTS_PROC_LOCK_MAIN, NULL);
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
if (dep->suspended.last)
dep->suspended.last->next = plp;
else
dep->suspended.first = plp;
dep->suspended.last = plp;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
goto yield;
}
@@ -2434,9 +2417,9 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3)
ASSERT(dep->send);
#ifdef DEBUG
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
ASSERT(dep->qsize == 0);
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
#endif
erts_set_dist_entry_connected(dep, BIF_ARG_2, flags);
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index fa19c7fb45..9ccc3e5ba9 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -38,6 +38,7 @@
#define DFLAG_UNICODE_IO 0x1000
#define DFLAG_DIST_HDR_ATOM_CACHE 0x2000
#define DFLAG_SMALL_ATOM_TAGS 0x4000
+#define DFLAGS_INTERNAL_TAGS 0x8000
/* All flags that should be enabled when term_to_binary/1 is used. */
#define TERM_TO_BINARY_DFLAGS (DFLAG_EXTENDED_REFERENCES \
@@ -99,7 +100,8 @@ typedef struct {
#define ERTS_DE_IS_CONNECTED(DEP) \
(!ERTS_DE_IS_NOT_CONNECTED((DEP)))
-
+#define ERTS_DE_BUSY_LIMIT (1024*1024)
+extern int erts_dist_buf_busy_limit;
extern int erts_is_alive;
/*
@@ -153,10 +155,10 @@ erts_dsig_prepare(ErtsDSigData *dsdp,
}
if (no_suspend) {
failure = ERTS_DSIG_PREP_CONNECTED;
- erts_smp_spin_lock(&dep->qlock);
+ erts_smp_mtx_lock(&dep->qlock);
if (dep->qflgs & ERTS_DE_QFLG_BUSY)
failure = ERTS_DSIG_PREP_WOULD_SUSPEND;
- erts_smp_spin_unlock(&dep->qlock);
+ erts_smp_mtx_unlock(&dep->qlock);
if (failure == ERTS_DSIG_PREP_WOULD_SUSPEND)
goto fail;
}
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 07b4167b27..7793f60f4f 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -1348,6 +1348,13 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init)
argv[j++] = argv[i];
}
*argc = j;
+#if HALFWORD_HEAP
+ /* If halfword heap, silently ignore any disabling of internal
+ allocators */
+ for (i = 0; i < aui_sz; ++i)
+ aui[i]->enable = 1;
+#endif
+
}
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 7df9f19af0..408ffd12f7 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -247,7 +247,7 @@ type CPUDATA LONG_LIVED SYSTEM cpu_data
type TMP_CPU_IDS SHORT_LIVED SYSTEM tmp_cpu_ids
type EXT_TERM_DATA SHORT_LIVED PROCESSES external_term_data
type ZLIB STANDARD SYSTEM zlib
-type RDR_GRPS_MAP LONG_LIVED SYSTEM reader_groups_map
+type CPU_GRPS_MAP LONG_LIVED SYSTEM cpu_groups_map
+if smp
type ASYNC SHORT_LIVED SYSTEM async
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 40d8dc097c..75d8db880c 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -38,6 +38,7 @@
#include "erl_instrument.h"
#include "dist.h"
#include "erl_gc.h"
+#include "erl_cpu_topology.h"
#ifdef HIPE
#include "hipe_arch.h"
#endif
@@ -1687,6 +1688,8 @@ info_1_tuple(Process* BIF_P, /* Pointer to current process. */
return erts_get_cpu_topology_term(BIF_P, *tp);
} else if (ERTS_IS_ATOM_STR("cpu_topology", sel) && arity == 2) {
Eterm res = erts_get_cpu_topology_term(BIF_P, *tp);
+ if (res == THE_NON_VALUE)
+ goto badarg;
ERTS_BIF_PREP_TRAP1(ret, erts_format_cpu_topology_trap, BIF_P, res);
return ret;
#if defined(PURIFY) || defined(VALGRIND)
@@ -1999,6 +2002,8 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_RET(db_get_trace_control_word_0(BIF_P));
} else if (ERTS_IS_ATOM_STR("ets_realloc_moves", BIF_ARG_1)) {
BIF_RET((erts_ets_realloc_always_moves) ? am_true : am_false);
+ } else if (ERTS_IS_ATOM_STR("ets_always_compress", BIF_ARG_1)) {
+ BIF_RET((erts_ets_always_compress) ? am_true : am_false);
} else if (ERTS_IS_ATOM_STR("snifs", BIF_ARG_1)) {
Uint size = 0;
Uint *szp;
@@ -2345,9 +2350,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
/* Arguments that are unusual follow ... */
else if (ERTS_IS_ATOM_STR("logical_processors", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_configured(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(&no, NULL, NULL);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
@@ -2357,9 +2360,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
}
else if (ERTS_IS_ATOM_STR("logical_processors_online", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_online(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(NULL, &no, NULL);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
@@ -2369,9 +2370,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
}
else if (ERTS_IS_ATOM_STR("logical_processors_available", BIF_ARG_1)) {
int no;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- no = erts_get_cpu_available(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_get_logical_processors(NULL, NULL, &no);
if (no > 0)
BIF_RET(make_small((Uint) no));
else {
@@ -2533,6 +2532,13 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
BIF_RET(erts_nif_taints(BIF_P));
} else if (ERTS_IS_ATOM_STR("reader_groups_map", BIF_ARG_1)) {
BIF_RET(erts_get_reader_groups_map(BIF_P));
+ } else if (ERTS_IS_ATOM_STR("dist_buf_busy_limit", BIF_ARG_1)) {
+ Uint hsz = 0;
+
+ (void) erts_bld_uint(NULL, &hsz, erts_dist_buf_busy_limit);
+ hp = hsz ? HAlloc(BIF_P, hsz) : NULL;
+ res = erts_bld_uint(&hp, NULL, erts_dist_buf_busy_limit);
+ BIF_RET(res);
}
BIF_ERROR(BIF_P, BADARG);
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
new file mode 100644
index 0000000000..db95c4a5d4
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -0,0 +1,2359 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: CPU topology and related functionality
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include <ctype.h>
+
+#include "global.h"
+#include "error.h"
+#include "bif.h"
+#include "erl_cpu_topology.h"
+
+#define ERTS_MAX_READER_GROUPS 8
+
+/*
+ * Cpu topology hierarchy.
+ */
+#define ERTS_TOPOLOGY_NODE 0
+#define ERTS_TOPOLOGY_PROCESSOR 1
+#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
+#define ERTS_TOPOLOGY_CORE 3
+#define ERTS_TOPOLOGY_THREAD 4
+#define ERTS_TOPOLOGY_LOGICAL 5
+
+#define ERTS_TOPOLOGY_MAX_DEPTH 6
+
+typedef struct {
+ int bind_id;
+ int bound_id;
+} ErtsCpuBindData;
+
+static erts_cpu_info_t *cpuinfo;
+
+static int max_main_threads;
+static int reader_groups;
+
+static ErtsCpuBindData *scheduler2cpu_map;
+static erts_smp_rwmtx_t cpuinfo_rwmtx;
+
+typedef enum {
+ ERTS_CPU_BIND_UNDEFINED,
+ ERTS_CPU_BIND_SPREAD,
+ ERTS_CPU_BIND_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_THREAD_SPREAD,
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
+ ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
+ ERTS_CPU_BIND_NO_SPREAD,
+ ERTS_CPU_BIND_NONE
+} ErtsCpuBindOrder;
+
+#define ERTS_CPU_BIND_DEFAULT_BIND \
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+
+static int no_cpu_groups_callbacks;
+static ErtsCpuBindOrder cpu_bind_order;
+
+static erts_cpu_topology_t *user_cpudata;
+static int user_cpudata_size;
+static erts_cpu_topology_t *system_cpudata;
+static int system_cpudata_size;
+
+typedef struct {
+ int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
+} erts_avail_cput;
+
+typedef struct {
+ int id;
+ int sub_levels;
+ int cpu_groups;
+} erts_cpu_groups_count_t;
+
+typedef struct {
+ int logical;
+ int cpu_group;
+} erts_cpu_groups_map_array_t;
+
+typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
+struct erts_cpu_groups_callback_list_t_ {
+ erts_cpu_groups_callback_list_t *next;
+ erts_cpu_groups_callback_t callback;
+ void *arg;
+};
+
+typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
+struct erts_cpu_groups_map_t_ {
+ erts_cpu_groups_map_t *next;
+ int groups;
+ erts_cpu_groups_map_array_t *array;
+ int size;
+ int logical_processors;
+ erts_cpu_groups_callback_list_t *callback_list;
+};
+
+typedef struct {
+ erts_cpu_groups_callback_t callback;
+ int ix;
+ void *arg;
+} erts_cpu_groups_callback_call_t;
+
+static erts_cpu_groups_map_t *cpu_groups_maps;
+
+static erts_cpu_groups_map_t *reader_groups_map;
+
+#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
+
+#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
+
+#ifdef ERTS_SMP
+static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq);
+static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
+#endif
+
+static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static erts_cpu_groups_map_t *add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg);
+static void update_cpu_groups_maps(void);
+static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
+static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
+ ErtsSchedulerData *esdp);
+
+static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
+ int *cpudata_size);
+static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
+
+static int
+int_cmp(const void *vx, const void *vy)
+{
+ return *((int *) vx) - *((int *) vy);
+}
+
+static int
+cpu_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ return 0;
+}
+
+static int
+cpu_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ return 0;
+}
+
+static int
+cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->core != y->core)
+ return x->core - y->core;
+ return 0;
+}
+
+static int
+cpu_no_spread_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+static ERTS_INLINE void
+make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
+{
+ int ix;
+ int node = -1;
+ int processor = -1;
+ int processor_node = -1;
+ int processor_node_node = -1;
+ int core = -1;
+ int thread = -1;
+ int old_node = -1;
+ int old_processor = -1;
+ int old_processor_node = -1;
+ int old_core = -1;
+ int old_thread = -1;
+
+ for (ix = 0; ix < size; ix++) {
+ if (!no_node || cpudata[ix].node >= 0) {
+ if (old_node == cpudata[ix].node)
+ cpudata[ix].node = node;
+ else {
+ old_node = cpudata[ix].node;
+ old_processor = processor = -1;
+ if (!no_node)
+ old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node || cpudata[ix].node >= 0)
+ cpudata[ix].node = ++node;
+ }
+ }
+ if (old_processor == cpudata[ix].processor)
+ cpudata[ix].processor = processor;
+ else {
+ old_processor = cpudata[ix].processor;
+ if (!no_node)
+ processor_node_node = old_processor_node = processor_node = -1;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ cpudata[ix].processor = ++processor;
+ }
+ if (no_node && cpudata[ix].processor_node < 0)
+ old_processor_node = -1;
+ else {
+ if (old_processor_node == cpudata[ix].processor_node) {
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = node;
+ else {
+ if (processor_node_node >= 0)
+ cpudata[ix].node = processor_node_node;
+ cpudata[ix].processor_node = processor_node;
+ }
+ }
+ else {
+ old_processor_node = cpudata[ix].processor_node;
+ old_core = core = -1;
+ old_thread = thread = -1;
+ if (no_node)
+ cpudata[ix].node = cpudata[ix].processor_node = ++node;
+ else {
+ cpudata[ix].node = processor_node_node = ++node;
+ cpudata[ix].processor_node = ++processor_node;
+ }
+ }
+ }
+ if (!no_node && cpudata[ix].processor_node < 0)
+ cpudata[ix].processor_node = 0;
+ if (old_core == cpudata[ix].core)
+ cpudata[ix].core = core;
+ else {
+ old_core = cpudata[ix].core;
+ old_thread = thread = -1;
+ cpudata[ix].core = ++core;
+ }
+ if (old_thread == cpudata[ix].thread)
+ cpudata[ix].thread = thread;
+ else
+ old_thread = cpudata[ix].thread = ++thread;
+ }
+}
+
+static void
+cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+ int size,
+ ErtsCpuBindOrder bind_order,
+ int mk_seq)
+{
+ if (size > 1) {
+ int no_node = 0;
+ int (*cmp_func)(const void *, const void *);
+ switch (bind_order) {
+ case ERTS_CPU_BIND_SPREAD:
+ cmp_func = cpu_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD:
+ cmp_func = cpu_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_SPREAD:
+ cmp_func = cpu_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_processor_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
+ no_node = 1;
+ cmp_func = cpu_no_node_thread_spread_order_cmp;
+ break;
+ case ERTS_CPU_BIND_NO_SPREAD:
+ cmp_func = cpu_no_spread_order_cmp;
+ break;
+ default:
+ cmp_func = NULL;
+ erl_exit(ERTS_ABORT_EXIT,
+ "Bad cpu bind type: %d\n",
+ (int) cpu_bind_order);
+ break;
+ }
+
+ if (mk_seq)
+ make_cpudata_id_seq(cpudata, size, no_node);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
+ }
+}
+
+static int
+processor_order_cmp(const void *vx, const void *vy)
+{
+ erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+ erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+ if (x->processor != y->processor)
+ return x->processor - y->processor;
+ if (x->node != y->node)
+ return x->node - y->node;
+ if (x->processor_node != y->processor_node)
+ return x->processor_node - y->processor_node;
+ if (x->core != y->core)
+ return x->core - y->core;
+ if (x->thread != y->thread)
+ return x->thread - y->thread;
+ return 0;
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
+{
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+ int cgcc_ix;
+
+ /* Unbind from cpu */
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0
+ && erts_unbind_from_cpu(cpuinfo) == 0) {
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ }
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(1,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(0, 0);
+
+}
+
+void
+erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
+{
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+
+ /* Make sure we check if we should bind to a cpu or not... */
+ if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
+ else
+ esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+}
+
+#endif
+
+void
+erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int res, cpu_id, cgcc_ix;
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+#ifdef ERTS_SMP
+ if (erts_common_run_queue)
+ erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
+ else {
+ esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ }
+#endif
+ erts_smp_runq_unlock(esdp->run_queue);
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ cpu_id = scheduler2cpu_map[esdp->no].bind_id;
+ if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
+ res = erts_bind_to_cpu(cpuinfo, cpu_id);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
+ else {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ if (scheduler2cpu_map[esdp->no].bound_id >= 0)
+ goto unbind;
+ }
+ }
+ else if (cpu_id < 0) {
+ unbind:
+ /* Get rid of old binding */
+ res = erts_unbind_from_cpu(cpuinfo);
+ if (res == 0)
+ esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ else if (res != -ENOTSUP) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
+ (int) esdp->no, cpu_id, erl_errno_id(-res));
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+ }
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(0,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ erts_smp_runq_lock(esdp->run_queue);
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+ int cgcc_ix;
+ erts_cpu_groups_map_t *cgm;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_callback_call_t *cgcc;
+
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+
+ cgcc = erts_alloc(ERTS_ALC_T_TMP,
+ (no_cpu_groups_callbacks
+ * sizeof(erts_cpu_groups_callback_call_t)));
+ cgcc_ix = 0;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ cgcc[cgcc_ix].callback = cgcl->callback;
+ cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+ cgcc[cgcc_ix].arg = cgcl->arg;
+ cgcc_ix++;
+ }
+ }
+
+ ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+ for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+ cgcc[cgcc_ix].callback(0,
+ esdp,
+ cgcc[cgcc_ix].ix,
+ cgcc[cgcc_ix].arg);
+
+ erts_free(ERTS_ALC_T_TMP, cgcc);
+
+ if (esdp->no <= max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+}
+#endif
+
+static void
+write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
+{
+ int s_ix = 1;
+ int cpu_ix;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
+
+ cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
+
+ for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
+ if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
+ scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
+ }
+
+ if (s_ix <= erts_no_schedulers)
+ for (; s_ix <= erts_no_schedulers; s_ix++)
+ scheduler2cpu_map[s_ix].bind_id = -1;
+}
+
+int
+erts_init_scheduler_bind_type_string(char *how)
+{
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
+ return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
+
+ if (!system_cpudata && !user_cpudata)
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
+
+ if (sys_strcmp(how, "db") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (sys_strcmp(how, "s") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (sys_strcmp(how, "ps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "ts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (sys_strcmp(how, "tnnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnps") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (sys_strcmp(how, "nnts") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (sys_strcmp(how, "ns") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (sys_strcmp(how, "u") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
+
+ return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
+}
+
+static Eterm
+bound_schedulers_term(ErtsCpuBindOrder order)
+{
+ switch (order) {
+ case ERTS_CPU_BIND_SPREAD: {
+ ERTS_DECL_AM(spread);
+ return AM_spread;
+ }
+ case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(processor_spread);
+ return AM_processor_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_SPREAD: {
+ ERTS_DECL_AM(thread_spread);
+ return AM_thread_spread;
+ }
+ case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(thread_no_node_processor_spread);
+ return AM_thread_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
+ ERTS_DECL_AM(no_node_processor_spread);
+ return AM_no_node_processor_spread;
+ }
+ case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
+ ERTS_DECL_AM(no_node_thread_spread);
+ return AM_no_node_thread_spread;
+ }
+ case ERTS_CPU_BIND_NO_SPREAD: {
+ ERTS_DECL_AM(no_spread);
+ return AM_no_spread;
+ }
+ case ERTS_CPU_BIND_NONE: {
+ ERTS_DECL_AM(unbound);
+ return AM_unbound;
+ }
+ default:
+ ASSERT(0);
+ return THE_NON_VALUE;
+ }
+}
+
+Eterm
+erts_bound_schedulers_term(Process *c_p)
+{
+ ErtsCpuBindOrder order;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ order = cpu_bind_order;
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return bound_schedulers_term(order);
+}
+
+Eterm
+erts_bind_schedulers(Process *c_p, Eterm how)
+{
+ int notify = 0;
+ Eterm res;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ ErtsCpuBindOrder old_cpu_bind_order;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+ if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
+ ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
+ }
+ else {
+
+ old_cpu_bind_order = cpu_bind_order;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+
+ if (!cpudata) {
+ cpu_bind_order = old_cpu_bind_order;
+ ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+ goto done;
+ }
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ notify = 1;
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ res = bound_schedulers_term(old_cpu_bind_order);
+ }
+
+ done:
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+ if (notify)
+ erts_sched_notify_check_cpu_bind();
+
+ return res;
+}
+
+int
+erts_sched_bind_atthrcreate_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ return esdp != NULL && erts_is_scheduler_bound(esdp);
+}
+
+int
+erts_sched_bind_atthrcreate_child(int unbind)
+{
+ int res = 0;
+ if (unbind) {
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = erts_unbind_from_cpu(cpuinfo);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ }
+ return res;
+}
+
+void
+erts_sched_bind_atthrcreate_parent(int unbind)
+{
+
+}
+
+int
+erts_sched_bind_atfork_prepare(void)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
+ if (unbind)
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ return unbind;
+}
+
+int
+erts_sched_bind_atfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_unbind_from_cpu(cpuinfo);
+ }
+ return 0;
+}
+
+char *
+erts_sched_bind_atvfork_child(int unbind)
+{
+ if (unbind) {
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+ return erts_get_unbind_from_cpu_str(cpuinfo);
+ }
+ return "false";
+}
+
+void
+erts_sched_bind_atfork_parent(int unbind)
+{
+ if (unbind)
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+Eterm
+erts_fake_scheduler_bindings(Process *p, Eterm how)
+{
+ ErtsCpuBindOrder fake_cpu_bind_order;
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ Eterm res;
+
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+ else if (ERTS_IS_ATOM_STR("processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+ else if (ERTS_IS_ATOM_STR("no_spread", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+ else if (ERTS_IS_ATOM_STR("unbound", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else {
+ ERTS_BIF_PREP_ERROR(res, p, BADARG);
+ return res;
+ }
+
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+ if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
+ ERTS_BIF_PREP_RET(res, am_false);
+ else {
+ int i;
+ Eterm *hp;
+
+ cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
+
+#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
+
+ erts_fprintf(stderr, "node: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].node);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "processor: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor);
+ erts_fprintf(stderr, "\n");
+ if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
+ && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
+ erts_fprintf(stderr, "processor_node:");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
+ erts_fprintf(stderr, "\n");
+ }
+ erts_fprintf(stderr, "core: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].core);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "thread: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].thread);
+ erts_fprintf(stderr, "\n");
+ erts_fprintf(stderr, "logical: ");
+ for (i = 0; i < cpudata_size; i++)
+ erts_fprintf(stderr, " %2d", cpudata[i].logical);
+ erts_fprintf(stderr, "\n");
+#endif
+
+ hp = HAlloc(p, cpudata_size+1);
+ ERTS_BIF_PREP_RET(res, make_tuple(hp));
+ *hp++ = make_arityval((Uint) cpudata_size);
+ for (i = 0; i < cpudata_size; i++)
+ *hp++ = make_small((Uint) cpudata[i].logical);
+ }
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_schedulers_binds(Process *c_p)
+{
+ int ix;
+ ERTS_DECL_AM(unbound);
+ Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
+ Eterm res = make_tuple(hp);
+
+ *(hp++) = make_arityval(erts_no_schedulers);
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ for (ix = 1; ix <= erts_no_schedulers; ix++)
+ *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
+ ? make_small(scheduler2cpu_map[ix].bound_id)
+ : AM_unbound);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+/*
+ * CPU topology
+ */
+
+typedef struct {
+ int *id;
+ int used;
+ int size;
+} ErtsCpuTopIdSeq;
+
+typedef struct {
+ ErtsCpuTopIdSeq logical;
+ ErtsCpuTopIdSeq thread;
+ ErtsCpuTopIdSeq core;
+ ErtsCpuTopIdSeq processor_node;
+ ErtsCpuTopIdSeq processor;
+ ErtsCpuTopIdSeq node;
+} ErtsCpuTopEntry;
+
+static void
+init_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ int size = 10;
+ cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->logical.size = size;
+ cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->thread.size = size;
+ cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->core.size = size;
+ cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor_node.size = size;
+ cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->processor.size = size;
+ cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+ sizeof(int)*size);
+ cte->node.size = size;
+}
+
+static void
+destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
+ erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
+}
+
+static int
+get_cput_value_or_range(int *v, int *vr, char **str)
+{
+ long l;
+ char *c = *str;
+ errno = 0;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+ *v = (int) l;
+ if (*c == '-') {
+ c++;
+ if (!isdigit((unsigned char)*c))
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ l = strtol(c, &c, 10);
+ if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ *vr = (int) l;
+ }
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
+{
+ int ix = 0;
+ int need_size = 0;
+ char *c = *str;
+
+ while (1) {
+ int res;
+ int val;
+ int nids;
+ int val_range = -1;
+ res = get_cput_value_or_range(&val, &val_range, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ if (val_range < 0 || val_range == val)
+ nids = 1;
+ else {
+ if (val_range > val)
+ nids = val_range - val + 1;
+ else
+ nids = val - val_range + 1;
+ }
+ need_size += nids;
+ if (need_size > idseq->size) {
+ idseq->size = need_size + 10;
+ idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
+ idseq->id,
+ sizeof(int)*idseq->size);
+ }
+ if (nids == 1)
+ idseq->id[ix++] = val;
+ else if (val_range > val) {
+ for (; val <= val_range; val++)
+ idseq->id[ix++] = val;
+ }
+ else {
+ for (; val >= val_range; val--)
+ idseq->id[ix++] = val;
+ }
+ if (*c != ',')
+ break;
+ c++;
+ }
+ *str = c;
+ idseq->used = ix;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_entry(ErtsCpuTopEntry *cput, char **str)
+{
+ int h;
+ char *c = *str;
+
+ cput->logical.used = 0;
+ cput->thread.id[0] = 0;
+ cput->thread.used = 1;
+ cput->core.id[0] = 0;
+ cput->core.used = 1;
+ cput->processor_node.id[0] = -1;
+ cput->processor_node.used = 1;
+ cput->processor.id[0] = 0;
+ cput->processor.used = 1;
+ cput->node.id[0] = -1;
+ cput->node.used = 1;
+
+ h = ERTS_TOPOLOGY_MAX_DEPTH;
+ while (*c != ':' && *c != '\0') {
+ int res;
+ ErtsCpuTopIdSeq *idseqp;
+ switch (*c++) {
+ case 'L':
+ if (h <= ERTS_TOPOLOGY_LOGICAL)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->logical;
+ h = ERTS_TOPOLOGY_LOGICAL;
+ break;
+ case 't':
+ case 'T':
+ if (h <= ERTS_TOPOLOGY_THREAD)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->thread;
+ h = ERTS_TOPOLOGY_THREAD;
+ break;
+ case 'c':
+ case 'C':
+ if (h <= ERTS_TOPOLOGY_CORE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->core;
+ h = ERTS_TOPOLOGY_CORE;
+ break;
+ case 'p':
+ case 'P':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor;
+ h = ERTS_TOPOLOGY_PROCESSOR;
+ break;
+ case 'n':
+ case 'N':
+ if (h <= ERTS_TOPOLOGY_PROCESSOR) {
+ do_node:
+ if (h <= ERTS_TOPOLOGY_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->node;
+ h = ERTS_TOPOLOGY_NODE;
+ }
+ else {
+ int p_node = 0;
+ char *p_chk = c;
+ while (*p_chk != '\0' && *p_chk != ':') {
+ if (*p_chk == 'p' || *p_chk == 'P') {
+ p_node = 1;
+ break;
+ }
+ p_chk++;
+ }
+ if (!p_node)
+ goto do_node;
+ if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+ idseqp = &cput->processor_node;
+ h = ERTS_TOPOLOGY_PROCESSOR_NODE;
+ }
+ break;
+ default:
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
+ }
+ res = get_cput_id_seq(idseqp, &c);
+ if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+ return res;
+ }
+
+ if (cput->logical.used < 1)
+ return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
+
+ if (*c == ':') {
+ c++;
+ }
+
+ if (cput->thread.used != 1
+ && cput->thread.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->core.used != 1
+ && cput->core.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor_node.used != 1
+ && cput->processor_node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->processor.used != 1
+ && cput->processor.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+ if (cput->node.used != 1
+ && cput->node.used != cput->logical.used)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+
+ *str = c;
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+verify_topology(erts_cpu_topology_t *cpudata, int size)
+{
+ if (size > 0) {
+ int *logical;
+ int node, processor, no_nodes, i;
+
+ /* Verify logical ids */
+ logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
+
+ for (i = 0; i < size; i++)
+ logical[i] = cpudata[i].logical;
+
+ qsort(logical, size, sizeof(int), int_cmp);
+ for (i = 0; i < size-1; i++) {
+ if (logical[i] == logical[i+1]) {
+ erts_free(ERTS_ALC_T_TMP, logical);
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
+ }
+ }
+
+ erts_free(ERTS_ALC_T_TMP, logical);
+
+ qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
+
+ /* Verify unique entities */
+
+ for (i = 1; i < size; i++) {
+ if (cpudata[i-1].processor == cpudata[i].processor
+ && cpudata[i-1].node == cpudata[i].node
+ && (cpudata[i-1].processor_node
+ == cpudata[i].processor_node)
+ && cpudata[i-1].core == cpudata[i].core
+ && cpudata[i-1].thread == cpudata[i].thread) {
+ return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
+ }
+ }
+
+ /* Verify numa nodes */
+ node = cpudata[0].node;
+ processor = cpudata[0].processor;
+ no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
+ for (i = 1; i < size; i++) {
+ if (no_nodes) {
+ if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ else {
+ if (cpudata[i].processor == processor && cpudata[i].node != node)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ node = cpudata[i].node;
+ processor = cpudata[i].processor;
+ if (node >= 0 && cpudata[i].processor_node >= 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ if (node < 0 && cpudata[i].processor_node < 0)
+ return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+ }
+ }
+ }
+
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+int
+erts_init_cpu_topology_string(char *topology_str)
+{
+ ErtsCpuTopEntry cput;
+ int need_size;
+ char *c;
+ int ix;
+ int error = ERTS_INIT_CPU_TOPOLOGY_OK;
+
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 10;
+
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+
+ init_cpu_top_entry(&cput);
+
+ ix = 0;
+ need_size = 0;
+
+ c = topology_str;
+ if (*c == '\0') {
+ error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
+ goto fail;
+ }
+ do {
+ int r;
+ error = get_cput_entry(&cput, &c);
+ if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
+ goto fail;
+ need_size += cput.logical.used;
+ if (user_cpudata_size < need_size) {
+ user_cpudata_size = need_size + 10;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ ASSERT(cput.thread.used == 1
+ || cput.thread.used == cput.logical.used);
+ ASSERT(cput.core.used == 1
+ || cput.core.used == cput.logical.used);
+ ASSERT(cput.processor_node.used == 1
+ || cput.processor_node.used == cput.logical.used);
+ ASSERT(cput.processor.used == 1
+ || cput.processor.used == cput.logical.used);
+ ASSERT(cput.node.used == 1
+ || cput.node.used == cput.logical.used);
+
+ for (r = 0; r < cput.logical.used; r++) {
+ user_cpudata[ix].logical = cput.logical.id[r];
+ user_cpudata[ix].thread =
+ cput.thread.id[cput.thread.used == 1 ? 0 : r];
+ user_cpudata[ix].core =
+ cput.core.id[cput.core.used == 1 ? 0 : r];
+ user_cpudata[ix].processor_node =
+ cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
+ user_cpudata[ix].processor =
+ cput.processor.id[cput.processor.used == 1 ? 0 : r];
+ user_cpudata[ix].node =
+ cput.node.id[cput.node.used == 1 ? 0 : r];
+ ix++;
+ }
+ } while (*c != '\0');
+
+ if (user_cpudata_size != ix) {
+ user_cpudata_size = ix;
+ user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+ user_cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * user_cpudata_size));
+ }
+
+ error = verify_topology(user_cpudata, user_cpudata_size);
+ if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
+ destroy_cpu_top_entry(&cput);
+ return ERTS_INIT_CPU_TOPOLOGY_OK;
+ }
+
+ fail:
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata_size = 0;
+ destroy_cpu_top_entry(&cput);
+ return error;
+}
+
+#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
+#define ERTS_GET_USED_CPU_TOPOLOGY 0
+#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
+#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
+
+static Eterm get_cpu_topology_term(Process *c_p, int type);
+
+Eterm
+erts_set_cpu_topology(Process *c_p, Eterm term)
+{
+ erts_cpu_topology_t *cpudata = NULL;
+ int cpudata_size = 0;
+ Eterm res;
+
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
+ if (term == am_undefined) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
+ cpudata_size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+ }
+ else if (is_not_list(term)) {
+ error:
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ res = THE_NON_VALUE;
+ goto done;
+ }
+ else {
+ Eterm list = term;
+ int ix = 0;
+
+ cpudata_size = 100;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+
+ while (is_list(list)) {
+ Eterm *lp = list_val(list);
+ Eterm cpu = CAR(lp);
+ Eterm* tp;
+ Sint id;
+
+ if (is_not_tuple(cpu))
+ goto error;
+
+ tp = tuple_val(cpu);
+
+ if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
+ goto error;
+
+ if (ix >= cpudata_size) {
+ cpudata_size += 100;
+ cpudata = erts_realloc(ERTS_ALC_T_TMP,
+ cpudata,
+ (sizeof(erts_cpu_topology_t)
+ * cpudata_size));
+ }
+
+ id = signed_val(tp[2]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].node = (int) id;
+
+ id = signed_val(tp[3]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor = (int) id;
+
+ id = signed_val(tp[4]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].processor_node = (int) id;
+
+ id = signed_val(tp[5]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].core = (int) id;
+
+ id = signed_val(tp[6]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].thread = (int) id;
+
+ id = signed_val(tp[7]);
+ if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+ goto error;
+ cpudata[ix].logical = (int) id;
+
+ list = CDR(lp);
+ ix++;
+ }
+
+ if (is_not_nil(list))
+ goto error;
+
+ cpudata_size = ix;
+
+ if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
+ goto error;
+
+ if (user_cpudata_size != cpudata_size) {
+ if (user_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+ user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ user_cpudata_size = cpudata_size;
+ }
+
+ sys_memcpy((void *) user_cpudata,
+ (void *) cpudata,
+ sizeof(erts_cpu_topology_t)*cpudata_size);
+ }
+
+ update_cpu_groups_maps();
+
+ write_schedulers_bind_change(cpudata, cpudata_size);
+
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+
+ done:
+
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+static void
+create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
+{
+ if (user_cpudata) {
+ *cpudata_size = user_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) user_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else if (system_cpudata) {
+ *cpudata_size = system_cpudata_size;
+ *cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * (*cpudata_size)));
+ sys_memcpy((void *) *cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*(*cpudata_size));
+ }
+ else {
+ *cpudata = NULL;
+ *cpudata_size = 0;
+ }
+}
+
+static void
+destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
+{
+ if (cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+}
+
+
+static Eterm
+bld_topology_term(Eterm **hpp,
+ Uint *hszp,
+ erts_cpu_topology_t *cpudata,
+ int size)
+{
+ Eterm res = NIL;
+ int i;
+
+ if (size == 0)
+ return am_undefined;
+
+ for (i = size-1; i >= 0; i--) {
+ res = erts_bld_cons(hpp,
+ hszp,
+ erts_bld_tuple(hpp,
+ hszp,
+ 7,
+ am_cpu,
+ make_small(cpudata[i].node),
+ make_small(cpudata[i].processor),
+ make_small(cpudata[i].processor_node),
+ make_small(cpudata[i].core),
+ make_small(cpudata[i].thread),
+ make_small(cpudata[i].logical)),
+ res);
+ }
+ return res;
+}
+
+static Eterm
+get_cpu_topology_term(Process *c_p, int type)
+{
+#ifdef DEBUG
+ Eterm *hp_end;
+#endif
+ Eterm *hp;
+ Uint hsz;
+ Eterm res = THE_NON_VALUE;
+ erts_cpu_topology_t *cpudata = NULL;
+ int size = 0;
+
+ switch (type) {
+ case ERTS_GET_USED_CPU_TOPOLOGY:
+ if (user_cpudata)
+ goto defined;
+ else
+ goto detected;
+ case ERTS_GET_DETECTED_CPU_TOPOLOGY:
+ detected:
+ if (!system_cpudata)
+ res = am_undefined;
+ else {
+ size = system_cpudata_size;
+ cpudata = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_cpu_topology_t)
+ * size));
+ sys_memcpy((void *) cpudata,
+ (void *) system_cpudata,
+ sizeof(erts_cpu_topology_t)*size);
+ }
+ break;
+ case ERTS_GET_DEFINED_CPU_TOPOLOGY:
+ defined:
+ if (!user_cpudata)
+ res = am_undefined;
+ else {
+ size = user_cpudata_size;
+ cpudata = user_cpudata;
+ }
+ break;
+ default:
+ erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
+ break;
+ }
+
+ if (res == am_undefined) {
+ ASSERT(!cpudata);
+ return res;
+ }
+
+ hsz = 0;
+
+ bld_topology_term(NULL, &hsz,
+ cpudata, size);
+
+ hp = HAlloc(c_p, hsz);
+
+#ifdef DEBUG
+ hp_end = hp + hsz;
+#endif
+
+ res = bld_topology_term(&hp, NULL,
+ cpudata, size);
+
+ ASSERT(hp_end == hp);
+
+ if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
+ erts_free(ERTS_ALC_T_TMP, cpudata);
+
+ return res;
+}
+
+Eterm
+erts_get_cpu_topology_term(Process *c_p, Eterm which)
+{
+ Eterm res;
+ int type;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ if (ERTS_IS_ATOM_STR("used", which))
+ type = ERTS_GET_USED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("detected", which))
+ type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
+ else if (ERTS_IS_ATOM_STR("defined", which))
+ type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
+ else
+ type = ERTS_GET_CPU_TOPOLOGY_ERROR;
+ if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
+ res = THE_NON_VALUE;
+ else
+ res = get_cpu_topology_term(c_p, type);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+static void
+get_logical_processors(int *conf, int *onln, int *avail)
+{
+ if (conf)
+ *conf = erts_get_cpu_configured(cpuinfo);
+ if (onln)
+ *onln = erts_get_cpu_online(cpuinfo);
+ if (avail)
+ *avail = erts_get_cpu_available(cpuinfo);
+}
+
+void
+erts_get_logical_processors(int *conf, int *onln, int *avail)
+{
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ get_logical_processors(conf, onln, avail);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+void
+erts_pre_early_init_cpu_topology(int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p)
+{
+ cpu_groups_maps = NULL;
+ no_cpu_groups_callbacks = 0;
+ *max_rg_p = ERTS_MAX_READER_GROUPS;
+ cpuinfo = erts_cpu_info_create();
+ get_logical_processors(conf_p, onln_p, avail_p);
+}
+
+void
+erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p)
+{
+ user_cpudata = NULL;
+ user_cpudata_size = 0;
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
+ system_cpudata_size)) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+
+ max_main_threads = erts_get_cpu_configured(cpuinfo);
+ if (max_main_threads > no_schedulers)
+ max_main_threads = no_schedulers;
+ *max_main_threads_p = max_main_threads;
+
+ reader_groups = max_main_threads;
+ if (reader_groups <= 1 || max_reader_groups <= 1)
+ reader_groups = 0;
+ if (reader_groups > max_reader_groups)
+ reader_groups = max_reader_groups;
+ *reader_groups_p = reader_groups;
+}
+
+void
+erts_init_cpu_topology(void)
+{
+ int ix;
+
+ erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+ scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(ErtsCpuBindData)
+ * (erts_no_schedulers+1)));
+ for (ix = 1; ix <= erts_no_schedulers; ix++) {
+ scheduler2cpu_map[ix].bind_id = -1;
+ scheduler2cpu_map[ix].bound_id = -1;
+ }
+
+ if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
+ int ncpus = erts_get_cpu_configured(cpuinfo);
+ if (ncpus < 1 || erts_no_schedulers < ncpus)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ cpu_bind_order = ((system_cpudata || user_cpudata)
+ && (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP)
+ ? ERTS_CPU_BIND_DEFAULT_BIND
+ : ERTS_CPU_BIND_NONE);
+ }
+
+ reader_groups_map = add_cpu_groups(reader_groups,
+ reader_groups_callback,
+ NULL);
+
+ if (cpu_bind_order == ERTS_CPU_BIND_NONE)
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ else {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ erts_sched_notify_check_cpu_bind();
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+}
+
+int
+erts_update_cpu_info(void)
+{
+ int changed;
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ changed = erts_cpu_info_update(cpuinfo);
+ if (changed) {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+
+ if (system_cpudata)
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+
+ system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+ if (!system_cpudata_size)
+ system_cpudata = NULL;
+ else {
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+ || (ERTS_INIT_CPU_TOPOLOGY_OK
+ != verify_topology(system_cpudata,
+ system_cpudata_size))) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+ }
+
+ update_cpu_groups_maps();
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ write_schedulers_bind_change(cpudata, cpudata_size);
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+ if (changed)
+ erts_sched_notify_check_cpu_bind();
+ return changed;
+}
+
+/*
+ * reader groups map
+ */
+
+void
+reader_groups_callback(int suspending,
+ ErtsSchedulerData *esdp,
+ int group,
+ void *unused)
+{
+ if (reader_groups && esdp->no <= max_main_threads)
+ erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1);
+}
+
+static Eterm get_cpu_groups_map(Process *c_p,
+ erts_cpu_groups_map_t *map,
+ int offset);
+Eterm
+erts_debug_reader_groups_map(Process *c_p, int groups)
+{
+ Eterm res;
+ erts_cpu_groups_map_t test;
+
+ test.array = NULL;
+ test.groups = groups;
+ make_cpu_groups_map(&test, 1);
+ if (!test.array)
+ res = NIL;
+ else {
+ res = get_cpu_groups_map(c_p, &test, 1);
+ erts_free(ERTS_ALC_T_TMP, test.array);
+ }
+ return res;
+}
+
+
+Eterm
+erts_get_reader_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+ res = get_cpu_groups_map(c_p, reader_groups_map, 1);
+ erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+ return res;
+}
+
+/*
+ * CPU groups
+ */
+
+static Eterm
+get_cpu_groups_map(Process *c_p,
+ erts_cpu_groups_map_t *map,
+ int offset)
+{
+#ifdef DEBUG
+ Eterm *endp;
+#endif
+ Eterm res = NIL, tuple;
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(c_p, map->logical_processors*(2+3));
+#ifdef DEBUG
+ endp = hp + map->logical_processors*(2+3);
+#endif
+ for (i = map->size - 1; i >= 0; i--) {
+ if (map->array[i].logical >= 0) {
+ tuple = TUPLE2(hp,
+ make_small(map->array[i].logical),
+ make_small(map->array[i].cpu_group + offset));
+ hp += 3;
+ res = CONS(hp, tuple, res);
+ hp += 2;
+ }
+ }
+ ASSERT(hp == endp);
+ return res;
+}
+
+static void
+make_available_cpu_topology(erts_avail_cput *no,
+ erts_avail_cput *avail,
+ erts_cpu_topology_t *cpudata,
+ int *size,
+ int test)
+{
+ int len = *size;
+ erts_cpu_topology_t last;
+ int a, i, j;
+
+ no->level[ERTS_TOPOLOGY_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_CORE] = -1;
+ no->level[ERTS_TOPOLOGY_THREAD] = -1;
+ no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
+
+ last.node = INT_MIN;
+ last.processor = INT_MIN;
+ last.processor_node = INT_MIN;
+ last.core = INT_MIN;
+ last.thread = INT_MIN;
+ last.logical = INT_MIN;
+
+ a = 0;
+
+ for (i = 0; i < len; i++) {
+
+ if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
+ continue;
+
+ if (last.node != cpudata[i].node)
+ goto node;
+ if (last.processor != cpudata[i].processor)
+ goto processor;
+ if (last.processor_node != cpudata[i].processor_node)
+ goto processor_node;
+ if (last.core != cpudata[i].core)
+ goto core;
+ ASSERT(last.thread != cpudata[i].thread);
+ goto thread;
+
+ node:
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ processor:
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ processor_node:
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ core:
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ thread:
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
+ avail[a].level[j] = no->level[j];
+
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
+ avail[a].level[ERTS_TOPOLOGY_CG] = 0;
+
+ ASSERT(last.logical != cpudata[i].logical);
+
+ last = cpudata[i];
+ a++;
+ }
+
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ *size = a;
+}
+
+static void
+cpu_group_insert(erts_cpu_groups_map_t *map,
+ int logical, int cpu_group)
+{
+ int start = logical % map->size;
+ int ix = start;
+
+ do {
+ if (map->array[ix].logical < 0) {
+ map->array[ix].logical = logical;
+ map->array[ix].cpu_group = cpu_group;
+ return;
+ }
+ ix++;
+ if (ix == map->size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
+}
+
+
+static int
+sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int sub_level = level+1;
+ int last = -1;
+ cgc->sub_levels = 0;
+
+ do {
+ if (last != avail[aix].level[sub_level]) {
+ cgc->sub_levels++;
+ last = avail[aix].level[sub_level];
+ }
+ aix++;
+ }
+ while (aix < avail_sz && cgc->id == avail[aix].level[level]);
+ cgc->cpu_groups = 0;
+ return aix;
+}
+
+static int
+write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
+ int level, int a,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int cg = *cgp;
+ int sub_level = level+1;
+ int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
+ int xsl = cgcp->sub_levels % cgcp->cpu_groups;
+ int sls = 0;
+ int last = -1;
+ int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
+
+ ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
+
+ do {
+ if (last != avail[a].level[sub_level]) {
+ if (!sls) {
+ sls = sl_per_gr;
+ cg++;
+ if (cg >= xsl_cg_lim)
+ sls++;
+ }
+ last = avail[a].level[sub_level];
+ sls--;
+ }
+ avail[a].level[ERTS_TOPOLOGY_CG] = cg;
+ a++;
+ } while (a < avail_sz && (level < 0
+ || avail[a].level[level] == cgcp->id));
+
+ ASSERT(cgcp->cpu_groups == cg - *cgp);
+
+ *cgp = cg;
+
+ return a;
+}
+
+static int
+cg_count_sub_levels_compare(const void *vx, const void *vy)
+{
+ erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+ erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+ if (x->sub_levels != y->sub_levels)
+ return y->sub_levels - x->sub_levels;
+ return x->id - y->id;
+}
+
+static int
+cg_count_id_compare(const void *vx, const void *vy)
+{
+ erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+ erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+ return x->id - y->id;
+}
+
+static void
+make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
+{
+ int i, spread_level, avail_sz;
+ erts_avail_cput no, *avail;
+ erts_cpu_topology_t *cpudata;
+ ErtsAlcType_t alc_type = (test
+ ? ERTS_ALC_T_TMP
+ : ERTS_ALC_T_CPU_GRPS_MAP);
+
+ if (map->array)
+ erts_free(alc_type, map->array);
+
+ map->array = NULL;
+ map->logical_processors = 0;
+ map->size = 0;
+
+ if (!map->groups)
+ return;
+
+ create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
+
+ if (!cpudata)
+ return;
+
+ cpu_bind_order_sort(cpudata,
+ avail_sz,
+ ERTS_CPU_BIND_NO_SPREAD,
+ 1);
+
+ avail = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(erts_avail_cput)*avail_sz);
+
+ make_available_cpu_topology(&no, avail, cpudata,
+ &avail_sz, test);
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ map->size = avail_sz*2+1;
+
+ map->array = erts_alloc(alc_type,
+ (sizeof(erts_cpu_groups_map_array_t)
+ * map->size));;
+ map->logical_processors = avail_sz;
+
+ for (i = 0; i < map->size; i++) {
+ map->array[i].logical = -1;
+ map->array[i].cpu_group = -1;
+ }
+
+ spread_level = ERTS_TOPOLOGY_CORE;
+ for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
+ if (no.level[i] > map->groups) {
+ spread_level = i;
+ break;
+ }
+ }
+
+ if (no.level[spread_level] <= map->groups) {
+ int a, cg, last = -1;
+ cg = -1;
+ ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
+ for (a = 0; a < avail_sz; a++) {
+ if (last != avail[a].level[spread_level]) {
+ cg++;
+ last = avail[a].level[spread_level];
+ }
+ cpu_group_insert(map,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ cg);
+ }
+ }
+ else { /* map->groups < no.level[spread_level] */
+ erts_cpu_groups_count_t *cg_count;
+ int a, cg, tl, toplevels;
+
+ tl = spread_level-1;
+
+ if (spread_level == ERTS_TOPOLOGY_NODE)
+ toplevels = 1;
+ else
+ toplevels = no.level[tl];
+
+ cg_count = erts_alloc(ERTS_ALC_T_TMP,
+ toplevels*sizeof(erts_cpu_groups_count_t));
+
+ if (toplevels == 1) {
+ cg_count[0].id = 0;
+ cg_count[0].sub_levels = no.level[spread_level];
+ cg_count[0].cpu_groups = map->groups;
+ }
+ else {
+ int cgs_per_tl, cgs;
+ cgs = map->groups;
+ cgs_per_tl = cgs / toplevels;
+
+ a = 0;
+ for (i = 0; i < toplevels; i++) {
+ cg_count[i].id = avail[a].level[tl];
+ a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
+ }
+
+ qsort(cg_count,
+ toplevels,
+ sizeof(erts_cpu_groups_count_t),
+ cg_count_sub_levels_compare);
+
+ for (i = 0; i < toplevels; i++) {
+ if (cg_count[i].sub_levels < cgs_per_tl) {
+ cg_count[i].cpu_groups = cg_count[i].sub_levels;
+ cgs -= cg_count[i].sub_levels;
+ }
+ else {
+ cg_count[i].cpu_groups = cgs_per_tl;
+ cgs -= cgs_per_tl;
+ }
+ }
+
+ while (cgs > 0) {
+ for (i = 0; i < toplevels; i++) {
+ if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
+ break;
+ else {
+ cg_count[i].cpu_groups++;
+ if (--cgs == 0)
+ break;
+ }
+ }
+ }
+
+ qsort(cg_count,
+ toplevels,
+ sizeof(erts_cpu_groups_count_t),
+ cg_count_id_compare);
+ }
+
+ a = i = 0;
+ cg = -1;
+ while (a < avail_sz) {
+ a = write_cpu_groups(&cg, &cg_count[i], tl,
+ a, avail_sz, avail);
+ i++;
+ }
+
+ ASSERT(map->groups == cg + 1);
+
+ for (a = 0; a < avail_sz; a++)
+ cpu_group_insert(map,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ avail[a].level[ERTS_TOPOLOGY_CG]);
+
+ erts_free(ERTS_ALC_T_TMP, cg_count);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, avail);
+}
+
+static erts_cpu_groups_map_t *
+add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ int use_groups = groups;
+ erts_cpu_groups_callback_list_t *cgcl;
+ erts_cpu_groups_map_t *cgm;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (use_groups > max_main_threads)
+ use_groups = max_main_threads;
+
+ if (!use_groups)
+ return NULL;
+
+ no_cpu_groups_callbacks++;
+ cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+ sizeof(erts_cpu_groups_callback_list_t));
+ cgcl->callback = callback;
+ cgcl->arg = arg;
+
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ if (cgm->groups == use_groups) {
+ cgcl->next = cgm->callback_list;
+ cgm->callback_list = cgcl;
+ return cgm;
+ }
+ }
+
+
+ cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+ sizeof(erts_cpu_groups_map_t));
+ cgm->next = cpu_groups_maps;
+ cgm->groups = use_groups;
+ cgm->array = NULL;
+ cgm->size = 0;
+ cgm->logical_processors = 0;
+ cgm->callback_list = cgcl;
+
+ cgcl->next = NULL;
+
+ make_cpu_groups_map(cgm, 0);
+
+ cpu_groups_maps = cgm;
+
+ return cgm;
+}
+
+static void
+remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg)
+{
+ erts_cpu_groups_map_t *prev_cgm, *cgm;
+ erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ no_cpu_groups_callbacks--;
+
+ prev_cgm = NULL;
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+ prev_cgcl = NULL;
+ for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+ if (cgcl->callback == callback && cgcl->arg == arg) {
+ if (prev_cgcl)
+ prev_cgcl->next = cgcl->next;
+ else
+ cgm->callback_list = cgcl->next;
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl);
+ if (!cgm->callback_list) {
+ if (prev_cgm)
+ prev_cgm->next = cgm->next;
+ else
+ cpu_groups_maps = cgm->next;
+ if (cgm->array)
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array);
+ erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm);
+ }
+ return;
+ }
+ prev_cgcl = cgcl;
+ }
+ prev_cgm = cgm;
+ }
+
+ erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n");
+}
+
+static int
+cpu_groups_lookup(erts_cpu_groups_map_t *map,
+ ErtsSchedulerData *esdp)
+{
+ int start, logical, ix;
+
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+ || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ if (esdp->cpu_id < 0)
+ return (((int) esdp->no) - 1) % map->groups;
+
+ logical = esdp->cpu_id;
+ start = logical % map->size;
+ ix = start;
+
+ do {
+ if (map->array[ix].logical == logical) {
+ int group = map->array[ix].cpu_group;
+ ASSERT(0 <= group && group < map->groups);
+ return group;
+ }
+ ix++;
+ if (ix == map->size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+update_cpu_groups_maps(void)
+{
+ erts_cpu_groups_map_t *cgm;
+ ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+ for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
+ make_cpu_groups_map(cgm, 0);
+}
+
+void
+erts_add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ add_cpu_groups(groups, callback, arg);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
+
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+ void *arg)
+{
+ erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+ remove_cpu_groups(callback, arg);
+ erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h
new file mode 100644
index 0000000000..c5a9520b61
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.h
@@ -0,0 +1,105 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: CPU topology and related functionality
+ *
+ * Author: Rickard Green
+ */
+
+#ifndef ERL_CPU_TOPOLOGY_H__
+#define ERL_CPU_TOPOLOGY_H__
+
+void erts_pre_early_init_cpu_topology(int *max_rg_p,
+ int *conf_p,
+ int *onln_p,
+ int *avail_p);
+void erts_early_init_cpu_topology(int no_schedulers,
+ int *max_main_threads_p,
+ int max_reader_groups,
+ int *reader_groups_p);
+void erts_init_cpu_topology(void);
+
+
+#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0
+#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3
+
+int erts_init_scheduler_bind_type_string(char *how);
+
+
+#define ERTS_INIT_CPU_TOPOLOGY_OK 0
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9
+
+int erts_init_cpu_topology_string(char *topology_str);
+
+void erts_sched_check_cpu_bind(ErtsSchedulerData *esdp);
+#ifdef ERTS_SMP
+void erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp);
+#endif
+
+int erts_update_cpu_info(void);
+
+Eterm erts_bind_schedulers(Process *c_p, Eterm how);
+Eterm erts_get_schedulers_binds(Process *c_p);
+
+Eterm erts_get_reader_groups_map(Process *c_p);
+
+Eterm erts_set_cpu_topology(Process *c_p, Eterm term);
+Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
+
+int erts_update_cpu_info(void);
+void erts_get_logical_processors(int *conf, int *onln, int *avail);
+
+int erts_sched_bind_atthrcreate_prepare(void);
+int erts_sched_bind_atthrcreate_child(int unbind);
+void erts_sched_bind_atthrcreate_parent(int unbind);
+
+int erts_sched_bind_atfork_prepare(void);
+int erts_sched_bind_atfork_child(int unbind);
+char *erts_sched_bind_atvfork_child(int unbind);
+void erts_sched_bind_atfork_parent(int unbind);
+
+Eterm erts_fake_scheduler_bindings(Process *p, Eterm how);
+Eterm erts_debug_cpu_groups_map(Process *c_p, int groups);
+
+
+typedef void (*erts_cpu_groups_callback_t)(int,
+ ErtsSchedulerData *,
+ int,
+ void *);
+
+void erts_add_cpu_groups(int groups,
+ erts_cpu_groups_callback_t callback,
+ void *arg);
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+ void *arg);
+
+#endif
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c
index 1c2c0fe4cb..8577354d27 100644
--- a/erts/emulator/beam/erl_db.c
+++ b/erts/emulator/beam/erl_db.c
@@ -179,6 +179,7 @@ extern DbTableMethod db_tree;
int user_requested_db_max_tabs;
int erts_ets_realloc_always_moves;
+int erts_ets_always_compress;
static int db_max_tabs;
static DbTable *meta_pid_to_tab; /* Pid mapped to owned tables */
static DbTable *meta_pid_to_fixed_tab; /* Pid mapped to fixed tables */
@@ -931,7 +932,7 @@ BIF_RETTYPE ets_update_counter_3(BIF_ALIST_3)
position > arityval(handle.dbterm->tpl[0])) {
goto finalize;
}
- oldcnt = handle.dbterm->tpl[position];
+ oldcnt = db_do_read_element(&handle, position);
if (is_big(oldcnt)) {
halloc_size += BIG_NEED_SIZE(big_arity(oldcnt));
}
@@ -1276,7 +1277,7 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
UWord heir_data;
Uint32 status;
Sint keypos;
- int is_named, is_fine_locked, frequent_read;
+ int is_named, is_fine_locked, frequent_read, is_compressed;
int cret;
DeclareTmpHeap(meta_tuple,3,BIF_P);
DbTableMethod* meth;
@@ -1296,6 +1297,7 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
frequent_read = 0;
heir = am_none;
heir_data = (UWord) am_undefined;
+ is_compressed = erts_ets_always_compress;
list = BIF_ARG_2;
while(is_list(list)) {
@@ -1358,6 +1360,9 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
else if (val == am_named_table) {
is_named = 1;
}
+ else if (val == am_compressed) {
+ is_compressed = 1;
+ }
else if (val == am_set || val == am_protected)
;
else break;
@@ -1418,6 +1423,7 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
erts_smp_atomic_init(&tb->common.nitems, 0);
tb->common.fixations = NULL;
+ tb->common.compress = is_compressed;
cret = meth->db_create(BIF_P, tb);
ASSERT(cret == DB_ERROR_NONE);
@@ -2572,7 +2578,7 @@ BIF_RETTYPE ets_match_object_3(BIF_ALIST_3)
BIF_RETTYPE ets_info_1(BIF_ALIST_1)
{
static Eterm fields[] = {am_protection, am_keypos, am_type, am_named_table,
- am_node, am_size, am_name, am_heir, am_owner, am_memory};
+ am_node, am_size, am_name, am_heir, am_owner, am_memory, am_compressed};
Eterm results[sizeof(fields)/sizeof(Eterm)];
DbTable* tb;
Eterm res;
@@ -2837,6 +2843,7 @@ void init_db(void)
erts_smp_atomic_init(&meta_pid_to_tab->common.nitems, 0);
meta_pid_to_tab->common.slot = -1;
meta_pid_to_tab->common.meth = &db_hash;
+ meta_pid_to_tab->common.compress = 0;
erts_refc_init(&meta_pid_to_tab->common.ref, 1);
erts_refc_init(&meta_pid_to_tab->common.fixref, 0);
@@ -2869,6 +2876,7 @@ void init_db(void)
erts_smp_atomic_init(&meta_pid_to_fixed_tab->common.nitems, 0);
meta_pid_to_fixed_tab->common.slot = -1;
meta_pid_to_fixed_tab->common.meth = &db_hash;
+ meta_pid_to_fixed_tab->common.compress = 0;
erts_refc_init(&meta_pid_to_fixed_tab->common.ref, 1);
erts_refc_init(&meta_pid_to_fixed_tab->common.fixref, 0);
@@ -3077,7 +3085,7 @@ retry:
db_unlock(tb,LCK_WRITE);
heir_data = tb->common.heir_data;
if (!is_immed(heir_data)) {
- Eterm* tpv = DBTERM_BUF((DbTerm*)heir_data); /* tuple_val */
+ Eterm* tpv = ((DbTerm*)heir_data)->tpl; /* tuple_val */
ASSERT(arityval(*tpv) == 1);
heir_data = tpv[1];
}
@@ -3251,7 +3259,8 @@ erts_db_process_exiting(Process *c_p, ErtsProcLocks c_p_locks)
pp = &(*pp)->next) {
if ((*pp)->pid == pid) {
DbFixation* fix = *pp;
- erts_refc_add(&tb->common.fixref,-fix->counter,0);
+ long diff = -(long)fix->counter;
+ erts_refc_add(&tb->common.fixref,diff,0);
*pp = fix->next;
erts_db_free(ERTS_ALC_T_DB_FIXATION,
tb, fix, sizeof(DbFixation));
@@ -3469,8 +3478,8 @@ static void set_heir(Process* me, DbTable* tb, Eterm heir, UWord heir_data)
UseTmpHeap(2,me);
/* Make a dummy 1-tuple around data to use db_get_term() */
- heir_data = (UWord) db_get_term(&tb->common, NULL, 0,
- TUPLE1(tmp,heir_data));
+ heir_data = (UWord) db_store_term(&tb->common, NULL, 0,
+ TUPLE1(tmp,heir_data));
UnUseTmpHeap(2,me);
ASSERT(!is_immed(heir_data));
}
@@ -3481,7 +3490,7 @@ static void free_heir_data(DbTable* tb)
{
if (tb->common.heir != am_none && !is_immed(tb->common.heir_data)) {
DbTerm* p = (DbTerm*) tb->common.heir_data;
- db_free_term_data(p);
+ db_cleanup_offheap_comp(p);
erts_db_free(ERTS_ALC_T_DB_TERM, tb, (void *)p,
sizeof(DbTerm) + (p->size-1)*sizeof(Eterm));
}
@@ -3618,10 +3627,13 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What)
ret = erts_this_dist_entry->sysname;
} else if (What == am_named_table) {
ret = is_atom(tb->common.id) ? am_true : am_false;
+ } else if (What == am_compressed) {
+ ret = tb->common.compress ? am_true : am_false;
+ }
/*
* For debugging purposes
*/
- } else if (What == am_data) {
+ else if (What == am_data) {
print_table(ERTS_PRINT_STDOUT, NULL, 1, tb);
ret = am_true;
} else if (What == am_atom_put("fixed",5)) {
diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h
index 7da28fad29..cb2da603f0 100644
--- a/erts/emulator/beam/erl_db.h
+++ b/erts/emulator/beam/erl_db.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1996-2009. All Rights Reserved.
+ * Copyright Ericsson AB 1996-2010. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -61,6 +61,7 @@ void erts_db_foreach_offheap(DbTable *,
extern int user_requested_db_max_tabs; /* set in erl_init */
extern int erts_ets_realloc_always_moves; /* set in erl_init */
+extern int erts_ets_always_compress; /* set in erl_init */
extern Export ets_select_delete_continue_exp;
extern Export ets_select_count_continue_exp;
extern Export ets_select_continue_exp;
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index fa707f4eed..14ee63100a 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -267,11 +267,11 @@ static ERTS_INLINE Sint next_slot_w(DbTableHash* tb, Uint ix,
*/
#define BIN_FLAG_ALL_OBJECTS BIN_FLAG_USR1
-/*
- * Size calculations
- */
-#define SIZ_OVERHEAD ((sizeof(HashDbTerm)/sizeof(Eterm)) - 1)
-#define SIZ_DBTERM(HDT) (SIZ_OVERHEAD + (HDT)->dbterm.size)
+
+static ERTS_INLINE void free_term(DbTableHash *tb, HashDbTerm* p)
+{
+ db_free_term((DbTable*)tb, p, offsetof(HashDbTerm, dbterm));
+}
/*
* Local types
@@ -358,10 +358,8 @@ static HashDbTerm* search_list(DbTableHash* tb, Eterm key,
HashValue hval, HashDbTerm *list);
static void shrink(DbTableHash* tb, int nactive);
static void grow(DbTableHash* tb, int nactive);
-static void free_term(DbTableHash *tb, HashDbTerm* p);
-static Eterm put_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2);
-static HashDbTerm* get_term(DbTableHash* tb, HashDbTerm* old,
- Eterm obj, HashValue hval);
+static Eterm build_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2,
+ DbTableHash*);
static int analyze_pattern(DbTableHash *tb, Eterm pattern,
struct mp_info *mpi);
@@ -442,6 +440,7 @@ static ERTS_INLINE int has_live_key(DbTableHash* tb, HashDbTerm* b,
if (b->hvalue != hval) return 0;
else {
Eterm itemKey = GETKEY(tb, b->dbterm.tpl);
+ ASSERT(!is_header(itemKey));
return EQ(key,itemKey);
}
}
@@ -454,10 +453,38 @@ static ERTS_INLINE int has_key(DbTableHash* tb, HashDbTerm* b,
if (b->hvalue != hval && b->hvalue != INVALID_HASH) return 0;
else {
Eterm itemKey = GETKEY(tb, b->dbterm.tpl);
+ ASSERT(!is_header(itemKey));
return EQ(key,itemKey);
}
}
+static ERTS_INLINE HashDbTerm* new_dbterm(DbTableHash* tb, Eterm obj)
+{
+ HashDbTerm* p;
+ if (tb->common.compress) {
+ p = db_store_term_comp(&tb->common, NULL, offsetof(HashDbTerm,dbterm), obj);
+ }
+ else {
+ p = db_store_term(&tb->common, NULL, offsetof(HashDbTerm,dbterm), obj);
+ }
+ return p;
+}
+
+static ERTS_INLINE HashDbTerm* replace_dbterm(DbTableHash* tb, HashDbTerm* old,
+ Eterm obj)
+{
+ HashDbTerm* ret;
+ ASSERT(old != NULL);
+ if (tb->common.compress) {
+ ret = db_store_term_comp(&tb->common, &(old->dbterm), offsetof(HashDbTerm,dbterm), obj);
+ }
+ else {
+ ret = db_store_term(&tb->common, &(old->dbterm), offsetof(HashDbTerm,dbterm), obj);
+ }
+ return ret;
+}
+
+
/*
** External interface
@@ -764,7 +791,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
ret = DB_ERROR_BADKEY;
goto Ldone;
}
- q = get_term(tb, b, obj, hval);
+ q = replace_dbterm(tb, b, obj);
q->next = bnext;
q->hvalue = hval; /* In case of INVALID_HASH */
*bp = q;
@@ -784,7 +811,7 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
HashDbTerm** qp = bp;
q = b;
do {
- if (eq(make_tuple(q->dbterm.tpl), obj)) {
+ if (db_eq(&tb->common,obj,&q->dbterm)) {
if (q->hvalue == INVALID_HASH) {
erts_smp_atomic_inc(&tb->common.nitems);
q->hvalue = hval;
@@ -803,7 +830,8 @@ int db_put_hash(DbTable *tbl, Eterm obj, int key_clash_fail)
/*else DB_DUPLICATE_BAG */
Lnew:
- q = get_term(tb, NULL, obj, hval);
+ q = new_dbterm(tb, obj);
+ q->hvalue = hval;
q->next = b;
*bp = q;
nitems = erts_smp_atomic_inctest(&tb->common.nitems);
@@ -844,7 +872,7 @@ int db_get_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
while(b2 != NULL && has_key(tb,b2,key,hval))
b2 = b2->next;
}
- copy = put_term_list(p, b1, b2);
+ copy = build_term_list(p, b1, b2, tb);
CHECK_TABLES();
*ret = copy;
goto done;
@@ -967,13 +995,10 @@ static int db_get_element_hash(Process *p, DbTable *tbl,
while(b1 != 0) {
if (has_live_key(tb,b1,key,hval)) {
- Eterm copy;
-
if (ndex > arityval(b1->dbterm.tpl[0])) {
retval = DB_ERROR_BADITEM;
goto done;
}
-
if (tb->common.status & (DB_BAG | DB_DUPLICATE_BAG)) {
HashDbTerm* b;
HashDbTerm* b2 = b1->next;
@@ -987,15 +1012,12 @@ static int db_get_element_hash(Process *p, DbTable *tbl,
}
b2 = b2->next;
}
-
b = b1;
while(b != b2) {
if (b->hvalue != INVALID_HASH) {
Eterm *hp;
- Uint sz = size_object(b->dbterm.tpl[ndex])+2;
-
- hp = HAlloc(p, sz);
- copy = copy_struct(b->dbterm.tpl[ndex], sz-2, &hp, &MSO(p));
+ Eterm copy = db_copy_element_from_ets(&tb->common, p,
+ &b->dbterm, ndex, &hp, 2);
elem_list = CONS(hp, copy, elem_list);
hp += 2;
}
@@ -1004,8 +1026,8 @@ static int db_get_element_hash(Process *p, DbTable *tbl,
*ret = elem_list;
}
else {
- COPY_OBJECT(b1->dbterm.tpl[ndex], p, &copy);
- *ret = copy;
+ Eterm* hp;
+ *ret = db_copy_element_from_ets(&tb->common, p, &b1->dbterm, ndex, &hp, 0);
}
retval = DB_ERROR_NONE;
goto done;
@@ -1040,6 +1062,7 @@ int db_erase_bag_exact2(DbTable *tbl, Eterm key, Eterm value)
ASSERT(!IS_FIXED(tb));
ASSERT((tb->common.status & DB_BAG));
+ ASSERT(!tb->common.compress);
while(b != 0) {
if (has_live_key(tb,b,key,hval)) {
@@ -1139,7 +1162,7 @@ static int db_erase_object_hash(DbTable *tbl, Eterm object, Eterm *ret)
while(b != 0) {
if (has_live_key(tb,b,key,hval)) {
++nkeys;
- if (eq(object, make_tuple(b->dbterm.tpl))) {
+ if (db_eq(&tb->common,object, &b->dbterm)) {
--nitems_diff;
if (nkeys==1 && IS_FIXED(tb)) { /* Pseudo remove */
add_fixed_deletion(tb,ix);
@@ -1188,7 +1211,7 @@ static int db_slot_hash(Process *p, DbTable *tbl, Eterm slot_term, Eterm *ret)
lck = RLOCK_HASH(tb, slot);
nactive = NACTIVE(tb);
if (slot < nactive) {
- *ret = put_term_list(p, BUCKET(tb, slot), 0);
+ *ret = build_term_list(p, BUCKET(tb, slot), 0, tb);
retval = DB_ERROR_NONE;
}
else if (slot == nactive) {
@@ -1232,8 +1255,6 @@ static int db_select_continue_hash(Process *p,
int num_left = 1000;
HashDbTerm *current = 0;
Eterm match_list;
- Uint32 dummy;
- unsigned sz;
Eterm *hp;
Eterm match_res;
Sint got;
@@ -1285,26 +1306,14 @@ static int db_select_continue_hash(Process *p,
}
for(;;) {
if (current->hvalue != INVALID_HASH &&
- (match_res =
- db_prog_match(p,mp,
- make_tuple(current->dbterm.tpl),
- NULL,0,&dummy),
+ (match_res = db_prog_match_and_copy(&tb->common, p, mp, all_objects,
+ &current->dbterm, &hp, 2),
is_value(match_res))) {
- if (all_objects) {
- hp = HAlloc(p, current->dbterm.size + 2);
- match_res = copy_shallow(DBTERM_BUF(&current->dbterm),
- current->dbterm.size,
- &hp,
- &MSO(p));
- } else {
- sz = size_object(match_res);
-
- hp = HAlloc(p, sz + 2);
- match_res = copy_struct(match_res, sz, &hp, &MSO(p));
- }
- match_list = CONS(hp, match_res, match_list);
+
+ match_list = CONS(hp, match_res, match_list);
++got;
}
+
--num_left;
save_slot_ix = slot_ix;
if ((current = next(tb, (Uint*)&slot_ix, &lck, current)) == NULL) {
@@ -1395,9 +1404,7 @@ static int db_select_chunk_hash(Process *p, DbTable *tbl,
HashDbTerm *current = 0;
unsigned current_list_pos = 0;
Eterm match_list;
- Uint32 dummy;
Eterm match_res;
- unsigned sz;
Eterm *hp;
int num_left = 1000;
Uint got = 0;
@@ -1464,22 +1471,9 @@ static int db_select_chunk_hash(Process *p, DbTable *tbl,
for(;;) {
if (current != NULL) {
if (current->hvalue != INVALID_HASH) {
- match_res = db_prog_match(p,mpi.mp,
- make_tuple(current->dbterm.tpl),
- NULL,0,&dummy);
+ match_res = db_prog_match_and_copy(&tb->common, p, mpi.mp, 0,
+ &current->dbterm, &hp, 2);
if (is_value(match_res)) {
- if (mpi.all_objects) {
- hp = HAlloc(p, current->dbterm.size + 2);
- match_res = copy_shallow(DBTERM_BUF(&current->dbterm),
- current->dbterm.size,
- &hp,
- &MSO(p));
- } else {
- sz = size_object(match_res);
-
- hp = HAlloc(p, sz + 2);
- match_res = copy_struct(match_res, sz, &hp, &MSO(p));
- }
match_list = CONS(hp, match_res, match_list);
++got;
}
@@ -1594,7 +1588,6 @@ static int db_select_count_hash(Process *p,
Uint slot_ix = 0;
HashDbTerm* current = NULL;
unsigned current_list_pos = 0;
- Uint32 dummy;
Eterm *hp;
int num_left = 1000;
Uint got = 0;
@@ -1644,8 +1637,8 @@ static int db_select_count_hash(Process *p,
for(;;) {
if (current != NULL) {
if (current->hvalue != INVALID_HASH) {
- if (db_prog_match(p, mpi.mp, make_tuple(current->dbterm.tpl),
- NULL,0, &dummy) == am_true) {
+ if (db_prog_match_and_copy(&tb->common, p, mpi.mp, 0,
+ &current->dbterm, NULL,0) == am_true) {
++got;
}
--num_left;
@@ -1713,7 +1706,6 @@ static int db_select_delete_hash(Process *p,
Uint slot_ix = 0;
HashDbTerm **current = NULL;
unsigned current_list_pos = 0;
- Uint32 dummy;
Eterm *hp;
int num_left = 1000;
Uint got = 0;
@@ -1794,9 +1786,8 @@ static int db_select_delete_hash(Process *p,
}
else {
int did_erase = 0;
- if ((db_prog_match(p,mpi.mp,
- make_tuple((*current)->dbterm.tpl),
- NULL,0,&dummy)) == am_true) {
+ if (db_prog_match_and_copy(&tb->common, p, mpi.mp, 0,
+ &(*current)->dbterm, NULL, 0) == am_true) {
if (NFIXED(tb) > fixated_by_me) { /* fixated by others? */
if (slot_ix != last_pseudo_delete) {
add_fixed_deletion(tb, slot_ix);
@@ -1859,7 +1850,6 @@ static int db_select_delete_continue_hash(Process *p,
Uint slot_ix;
Uint last_pseudo_delete = (Uint)-1;
HashDbTerm **current = NULL;
- Uint32 dummy;
Eterm *hp;
int num_left = 1000;
Uint got;
@@ -1907,8 +1897,8 @@ static int db_select_delete_continue_hash(Process *p,
}
else {
int did_erase = 0;
- if ((db_prog_match(p,mp,make_tuple((*current)->dbterm.tpl),
- NULL,0,&dummy)) == am_true) {
+ if (db_prog_match_and_copy(&tb->common, p, mp, 0,
+ &(*current)->dbterm, NULL, 0) == am_true) {
if (NFIXED(tb) > fixated_by_me) { /* fixated by others? */
if (slot_ix != last_pseudo_delete) {
add_fixed_deletion(tb, slot_ix);
@@ -1970,7 +1960,6 @@ static int db_select_count_continue_hash(Process *p,
DbTableHash *tb = &tbl->hash;
Uint slot_ix;
HashDbTerm* current;
- Uint32 dummy;
Eterm *hp;
int num_left = 1000;
Uint got;
@@ -2008,8 +1997,8 @@ static int db_select_count_continue_hash(Process *p,
current = current->next;
continue;
}
- if (db_prog_match(p, mp, make_tuple(current->dbterm.tpl),
- NULL,0,&dummy) == am_true) {
+ if (db_prog_match_and_copy(&tb->common, p, mp, 0, &current->dbterm,
+ NULL, 0) == am_true) {
++got;
}
--num_left;
@@ -2454,31 +2443,19 @@ static int free_seg(DbTableHash *tb, int free_records)
}
-static HashDbTerm* get_term(DbTableHash* tb, HashDbTerm* old,
- Eterm obj, HashValue hval)
-{
- HashDbTerm* p = db_get_term((DbTableCommon *) tb,
- (old != NULL) ? &(old->dbterm) : NULL,
- ((char *) &(old->dbterm)) - ((char *) old),
- obj);
- p->hvalue = hval;
- /*p->next = NULL;*/ /*No Need */
- return p;
-}
-
-
/*
** Copy terms from ptr1 until ptr2
** works for ptr1 == ptr2 == 0 => []
** or ptr2 == 0
*/
-static Eterm put_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2)
+static Eterm build_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2,
+ DbTableHash* tb)
{
int sz = 0;
HashDbTerm* ptr;
Eterm list = NIL;
Eterm copy;
- Eterm *hp;
+ Eterm *hp, *hend;
ptr = ptr1;
while(ptr != ptr2) {
@@ -2490,26 +2467,20 @@ static Eterm put_term_list(Process* p, HashDbTerm* ptr1, HashDbTerm* ptr2)
}
hp = HAlloc(p, sz);
+ hend = hp + sz;
ptr = ptr1;
while(ptr != ptr2) {
if (ptr->hvalue != INVALID_HASH) {
- copy = copy_shallow(DBTERM_BUF(&ptr->dbterm), ptr->dbterm.size, &hp, &MSO(p));
+ copy = db_copy_object_from_ets(&tb->common, &ptr->dbterm, &hp, &MSO(p));
list = CONS(hp, copy, list);
hp += 2;
}
ptr = ptr->next;
}
- return list;
-}
+ HRelease(p,hend,hp);
-static void free_term(DbTableHash *tb, HashDbTerm* p)
-{
- db_free_term_data(&(p->dbterm));
- erts_db_free(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- (void *) p,
- SIZ_DBTERM(p)*sizeof(Eterm));
+ return list;
}
/* Grow table with one new bucket.
@@ -2720,8 +2691,8 @@ static int db_lookup_dbterm_hash(DbTable *tbl, Eterm key, DbUpdateHandle* handle
handle->tb = tbl;
handle->bp = (void**) prevp;
handle->dbterm = &b->dbterm;
- handle->new_size = b->dbterm.size;
handle->mustResize = 0;
+ handle->new_size = b->dbterm.size;
handle->lck = lck;
/* KEEP hval WLOCKED, db_finalize_dbterm_hash will WUNLOCK */
return 1;
@@ -2742,37 +2713,14 @@ static void db_finalize_dbterm_hash(DbUpdateHandle* handle)
erts_smp_rwmtx_t* lck = (erts_smp_rwmtx_t*) handle->lck;
ERTS_SMP_LC_ASSERT(IS_HASH_WLOCKED(&tbl->hash,lck)); /* locked by db_lookup_dbterm_hash */
- ASSERT(&oldp->dbterm == handle->dbterm);
- if (handle->mustResize) {
- ErlOffHeap tmp_offheap;
- Eterm* top;
- Eterm copy;
- DbTerm* newDbTerm;
- HashDbTerm* newp = erts_db_alloc(ERTS_ALC_T_DB_TERM, tbl,
- sizeof(HashDbTerm)+sizeof(Eterm)*(handle->new_size-1));
- sys_memcpy(newp, oldp, sizeof(HashDbTerm)-sizeof(DbTerm)); /* copy only hashtab header */
- *(handle->bp) = newp;
- newDbTerm = &newp->dbterm;
-
- newDbTerm->size = handle->new_size;
- tmp_offheap.first = NULL;
- tmp_offheap.overhead = 0;
-
- /* make a flat copy */
- top = DBTERM_BUF(newDbTerm);
- copy = copy_struct(make_tuple(handle->dbterm->tpl),
- handle->new_size,
- &top, &tmp_offheap);
- newDbTerm->first_oh = tmp_offheap.first;
- DBTERM_SET_TPL(newDbTerm,tuple_val(copy));
+ ASSERT((&oldp->dbterm == handle->dbterm) == !(tbl->common.compress && handle->mustResize));
+ if (handle->mustResize) {
+ db_finalize_resize(handle, offsetof(HashDbTerm,dbterm));
WUNLOCK_HASH(lck);
-
- db_free_term_data(handle->dbterm);
- erts_db_free(ERTS_ALC_T_DB_TERM, tbl,
- (void *) (((char *) handle->dbterm) - (sizeof(HashDbTerm) - sizeof(DbTerm))),
- sizeof(HashDbTerm) + sizeof(Eterm)*(handle->dbterm->size-1));
+
+ free_term(&tbl->hash, oldp);
}
else {
WUNLOCK_HASH(lck);
@@ -2781,7 +2729,7 @@ static void db_finalize_dbterm_hash(DbUpdateHandle* handle)
handle->dbterm = 0;
#endif
return;
-}
+}
static int db_delete_all_objects_hash(Process* p, DbTable* tbl)
{
diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c
index 5644e85f97..8108494fc5 100644
--- a/erts/emulator/beam/erl_db_tree.c
+++ b/erts/emulator/beam/erl_db_tree.c
@@ -122,12 +122,41 @@ static void release_stack(DbTableTree* tb, DbTreeStack* stack)
}
}
-static void reset_static_stack(DbTableTree* tb)
+static ERTS_INLINE void reset_static_stack(DbTableTree* tb)
{
tb->static_stack.pos = 0;
tb->static_stack.slot = 0;
}
+static ERTS_INLINE void free_term(DbTableTree *tb, TreeDbTerm* p)
+{
+ db_free_term((DbTable*)tb, p, offsetof(TreeDbTerm, dbterm));
+}
+
+static ERTS_INLINE TreeDbTerm* new_dbterm(DbTableTree *tb, Eterm obj)
+{
+ TreeDbTerm* p;
+ if (tb->common.compress) {
+ p = db_store_term_comp(&tb->common, NULL, offsetof(TreeDbTerm,dbterm), obj);
+ }
+ else {
+ p = db_store_term(&tb->common, NULL, offsetof(TreeDbTerm,dbterm), obj);
+ }
+ return p;
+}
+static ERTS_INLINE TreeDbTerm* replace_dbterm(DbTableTree *tb, TreeDbTerm* old,
+ Eterm obj)
+{
+ TreeDbTerm* p;
+ ASSERT(old != NULL);
+ if (tb->common.compress) {
+ p = db_store_term_comp(&tb->common, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
+ }
+ else {
+ p = db_store_term(&tb->common, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
+ }
+ return p;
+}
/*
** Some macros for "direction stacks"
@@ -178,12 +207,6 @@ static void do_dump_tree2(int to, void *to_arg, int show, TreeDbTerm *t,
#endif
/*
- * Size calculations
- */
-#define SIZ_OVERHEAD ((sizeof(TreeDbTerm)/sizeof(Eterm)) - 1)
-#define SIZ_DBTERM(TDT) (SIZ_OVERHEAD + (TDT)->dbterm.size)
-
-/*
** Datatypes
*/
@@ -263,9 +286,6 @@ static TreeDbTerm *linkout_tree(DbTableTree *tb, Eterm key);
static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
Eterm object);
static int do_free_tree_cont(DbTableTree *tb, int num_left);
-static TreeDbTerm* get_term(DbTableTree *tb,
- TreeDbTerm* old,
- Eterm obj);
static void free_term(DbTableTree *tb, TreeDbTerm* p);
static int balance_left(TreeDbTerm **this);
static int balance_right(TreeDbTerm **this);
@@ -622,7 +642,7 @@ static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
erts_smp_atomic_dec(&tb->common.nitems);
return DB_ERROR_SYSRES;
}
- *this = get_term(tb, NULL, obj);
+ *this = new_dbterm(tb, obj);
(*this)->balance = 0;
(*this)->left = (*this)->right = NULL;
break;
@@ -636,7 +656,7 @@ static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
tstack[tpos++] = this;
this = &((*this)->right);
} else if (!key_clash_fail) { /* Equal key and this is a set, replace. */
- *this = get_term(tb, *this, obj);
+ *this = replace_dbterm(tb, *this, obj);
break;
} else {
return DB_ERROR_BADKEY; /* key already exists */
@@ -714,7 +734,7 @@ static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
{
DbTableTree *tb = &tbl->tree;
Eterm copy;
- Eterm *hp;
+ Eterm *hp, *hend;
TreeDbTerm *this;
/*
@@ -728,11 +748,11 @@ static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
*ret = NIL;
} else {
hp = HAlloc(p, this->dbterm.size + 2);
- copy = copy_shallow(DBTERM_BUF(&this->dbterm),
- this->dbterm.size,
- &hp,
- &MSO(p));
+ hend = hp + this->dbterm.size + 2;
+ copy = db_copy_object_from_ets(&tb->common, &this->dbterm, &hp, &MSO(p));
*ret = CONS(hp, copy, NIL);
+ hp += 2;
+ HRelease(p,hend,hp);
}
return DB_ERROR_NONE;
}
@@ -766,18 +786,10 @@ static int db_get_element_tree(Process *p, DbTable *tbl,
if (this == NULL) {
return DB_ERROR_BADKEY;
} else {
- Eterm element;
- Uint sz;
if (ndex > arityval(this->dbterm.tpl[0])) {
return DB_ERROR_BADPARAM;
}
- element = this->dbterm.tpl[ndex];
- sz = size_object(element);
- hp = HAlloc(p, sz);
- *ret = copy_struct(element,
- sz,
- &hp,
- &MSO(p));
+ *ret = db_copy_element_from_ets(&tb->common, p, &this->dbterm, ndex, &hp, 0);
}
return DB_ERROR_NONE;
}
@@ -815,7 +827,7 @@ static int db_slot_tree(Process *p, DbTable *tbl,
DbTableTree *tb = &tbl->tree;
Sint slot;
TreeDbTerm *st;
- Eterm *hp;
+ Eterm *hp, *hend;
Eterm copy;
/*
@@ -847,11 +859,11 @@ static int db_slot_tree(Process *p, DbTable *tbl,
return DB_ERROR_UNSPEC;
}
hp = HAlloc(p, st->dbterm.size + 2);
- copy = copy_shallow(DBTERM_BUF(&st->dbterm),
- st->dbterm.size,
- &hp,
- &MSO(p));
+ hend = hp + st->dbterm.size + 2;
+ copy = db_copy_object_from_ets(&tb->common, &st->dbterm, &hp, &MSO(p));
*ret = CONS(hp, copy, NIL);
+ hp += 2;
+ HRelease(p,hend,hp);
return DB_ERROR_NONE;
}
@@ -1738,7 +1750,7 @@ static int db_select_delete_tree(Process *p, DbTable *tbl,
** Other interface routines (not directly coupled to one bif)
*/
-/* Display hash table contents (for dump) */
+/* Display tree contents (for dump) */
static void db_print_tree(int to, void *to_arg,
int show,
DbTable *tbl)
@@ -1926,7 +1938,7 @@ static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
tstack[tpos++] = this;
this = &((*this)->right);
} else { /* Equal key, found the only possible matching object*/
- if (!eq(object,make_tuple((*this)->dbterm.tpl))) {
+ if (!db_eq(&tb->common,object,&(*this)->dbterm)) {
return NULL;
}
q = (*this);
@@ -2079,15 +2091,6 @@ static void do_dump_tree(int to, void *to_arg, TreeDbTerm *t)
}
}
-static void free_term(DbTableTree *tb, TreeDbTerm* p)
-{
- db_free_term_data(&(p->dbterm));
- erts_db_free(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- (void *) p,
- SIZ_DBTERM(p)*sizeof(Uint));
-}
-
static int do_free_tree_cont(DbTableTree *tb, int num_left)
{
TreeDbTerm *root;
@@ -2118,17 +2121,6 @@ static int do_free_tree_cont(DbTableTree *tb, int num_left)
return 1;
}
-static TreeDbTerm* get_term(DbTableTree *tb,
- TreeDbTerm* old,
- Eterm obj)
-{
- TreeDbTerm* p = db_get_term((DbTableCommon *) tb,
- (old != NULL) ? &(old->dbterm) : NULL,
- ((char *) &(old->dbterm)) - ((char *) old),
- obj);
- return p;
-}
-
/*
* Deletion helpers
*/
@@ -2570,46 +2562,21 @@ static int db_lookup_dbterm_tree(DbTable *tbl, Eterm key, DbUpdateHandle* handle
handle->tb = tbl;
handle->dbterm = &(*pp)->dbterm;
+ handle->mustResize = 0;
handle->bp = (void**) pp;
handle->new_size = (*pp)->dbterm.size;
- handle->mustResize = 0;
return 1;
}
static void db_finalize_dbterm_tree(DbUpdateHandle* handle)
{
if (handle->mustResize) {
- ErlOffHeap tmp_offheap;
- Eterm* top;
- Eterm copy;
- DbTerm* newDbTerm;
- DbTableTree *tb = &handle->tb->tree;
TreeDbTerm* oldp = (TreeDbTerm*) *handle->bp;
- TreeDbTerm* newp = erts_db_alloc(ERTS_ALC_T_DB_TERM,
- handle->tb,
- sizeof(TreeDbTerm)+sizeof(Eterm)*(handle->new_size-1));
- memcpy(newp, oldp, sizeof(TreeDbTerm)-sizeof(DbTerm)); /* copy only tree header */
- *(handle->bp) = newp;
- reset_static_stack(tb);
- newDbTerm = &newp->dbterm;
-
- newDbTerm->size = handle->new_size;
- tmp_offheap.first = NULL;
- tmp_offheap.overhead = 0;
-
- /* make a flat copy */
- top = DBTERM_BUF(newDbTerm);
- copy = copy_struct(make_tuple(handle->dbterm->tpl),
- handle->new_size,
- &top, &tmp_offheap);
- newDbTerm->first_oh = tmp_offheap.first;
- DBTERM_SET_TPL(newDbTerm,tuple_val(copy));
-
- db_free_term_data(handle->dbterm);
- erts_db_free(ERTS_ALC_T_DB_TERM,
- handle->tb,
- (void *) (((char *) handle->dbterm) - (sizeof(TreeDbTerm) - sizeof(DbTerm))),
- sizeof(TreeDbTerm) + sizeof(Eterm)*(handle->dbterm->size-1));
+
+ db_finalize_resize(handle, offsetof(TreeDbTerm,dbterm));
+ reset_static_stack(&handle->tb->tree);
+
+ free_term(&handle->tb->tree, oldp);
}
#ifdef DEBUG
handle->dbterm = 0;
@@ -3009,7 +2976,7 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
{
struct select_context *sc = (struct select_context *) ptr;
Eterm ret;
- Uint32 dummy;
+ Eterm* hp;
sc->lastobj = this->dbterm.tpl;
@@ -3024,24 +2991,9 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
this->dbterm.tpl)) > 0))) {
return 0;
}
- ret = db_prog_match(sc->p, sc->mp,
- make_tuple(this->dbterm.tpl),
- NULL,0, &dummy);
+ ret = db_prog_match_and_copy(&tb->common,sc->p,sc->mp,sc->all_objects,
+ &this->dbterm, &hp, 2);
if (is_value(ret)) {
- Uint sz;
- Eterm *hp;
- if (sc->all_objects) {
- hp = HAlloc(sc->p, this->dbterm.size + 2);
- ret = copy_shallow(DBTERM_BUF(&this->dbterm),
- this->dbterm.size,
- &hp,
- &MSO(sc->p));
- } else {
- sz = size_object(ret);
- hp = HAlloc(sc->p, sz + 2);
- ret = copy_struct(ret, sz,
- &hp, &MSO(sc->p));
- }
sc->accum = CONS(hp, ret, sc->accum);
}
if (MBUF(sc->p)) {
@@ -3062,7 +3014,6 @@ static int doit_select_count(DbTableTree *tb, TreeDbTerm *this, void *ptr,
{
struct select_count_context *sc = (struct select_count_context *) ptr;
Eterm ret;
- Uint32 dummy;
sc->lastobj = this->dbterm.tpl;
@@ -3073,9 +3024,8 @@ static int doit_select_count(DbTableTree *tb, TreeDbTerm *this, void *ptr,
this->dbterm.tpl)) > 0)) {
return 0;
}
- ret = db_prog_match(sc->p, sc->mp,
- make_tuple(this->dbterm.tpl),
- NULL,0, &dummy);
+ ret = db_prog_match_and_copy(&tb->common, sc->p, sc->mp, 0,
+ &this->dbterm, NULL, 0);
if (ret == am_true) {
++(sc->got);
}
@@ -3090,7 +3040,7 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
{
struct select_context *sc = (struct select_context *) ptr;
Eterm ret;
- Uint32 dummy;
+ Eterm* hp;
sc->lastobj = this->dbterm.tpl;
@@ -3106,25 +3056,10 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
return 0;
}
- ret = db_prog_match(sc->p, sc->mp,
- make_tuple(this->dbterm.tpl),
- NULL,0, &dummy);
+ ret = db_prog_match_and_copy(&tb->common, sc->p, sc->mp, sc->all_objects,
+ &this->dbterm, &hp, 2);
if (is_value(ret)) {
- Uint sz;
- Eterm *hp;
-
++(sc->got);
- if (sc->all_objects) {
- hp = HAlloc(sc->p, this->dbterm.size + 2);
- ret = copy_shallow(DBTERM_BUF(&this->dbterm),
- this->dbterm.size,
- &hp,
- &MSO(sc->p));
- } else {
- sz = size_object(ret);
- hp = HAlloc(sc->p, sz + 2);
- ret = copy_struct(ret, sz, &hp, &MSO(sc->p));
- }
sc->accum = CONS(hp, ret, sc->accum);
}
if (MBUF(sc->p)) {
@@ -3146,7 +3081,6 @@ static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
{
struct select_delete_context *sc = (struct select_delete_context *) ptr;
Eterm ret;
- Uint32 dummy;
Eterm key;
if (sc->erase_lastterm)
@@ -3159,9 +3093,8 @@ static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
GETKEY_WITH_POS(sc->keypos,
this->dbterm.tpl)) > 0)
return 0;
- ret = db_prog_match(sc->p, sc->mp,
- make_tuple(this->dbterm.tpl),
- NULL,0, &dummy);
+ ret = db_prog_match_and_copy(&tb->common, sc->p, sc->mp, 0,
+ &this->dbterm, NULL, 0);
if (ret == am_true) {
key = GETKEY(sc->tb, this->dbterm.tpl);
linkout_tree(sc->tb, key);
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index 2f34561234..e773361619 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -25,7 +25,6 @@
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
-
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
@@ -890,6 +889,8 @@ static Eterm match_spec_test(Process *p, Eterm against, Eterm spec, int trace);
static Eterm seq_trace_fake(Process *p, Eterm arg1);
+static void db_free_tmp_uncompressed(DbTerm* obj);
+
/*
** Interface routines.
@@ -1604,6 +1605,7 @@ static Eterm dpm_array_to_list(Process *psp, Eterm *arr, int arity)
}
return ret;
}
+
/*
** Execution of the match program, this is Pam.
** May return THE_NON_VALUE, which is a bailout.
@@ -2391,33 +2393,46 @@ void db_do_update_element(DbUpdateHandle* handle,
if (is_both_immed(newval,oldval)) {
handle->dbterm->tpl[position] = newval;
+#ifdef DEBUG_CLONE
+ if (handle->dbterm->debug_clone) {
+ handle->dbterm->debug_clone[position] = newval;
+ }
+#endif
return;
}
- else if (!handle->mustResize && is_boxed(newval)) {
- newp = boxed_val(newval);
- switch (*newp & _TAG_HEADER_MASK) {
- case _TAG_HEADER_POS_BIG:
- case _TAG_HEADER_NEG_BIG:
- case _TAG_HEADER_FLOAT:
- case _TAG_HEADER_HEAP_BIN:
- newval_sz = header_arity(*newp) + 1;
- if (is_boxed(oldval)) {
- oldp = boxed_val(oldval);
- switch (*oldp & _TAG_HEADER_MASK) {
- case _TAG_HEADER_POS_BIG:
- case _TAG_HEADER_NEG_BIG:
- case _TAG_HEADER_FLOAT:
- case _TAG_HEADER_HEAP_BIN:
- oldval_sz = header_arity(*oldp) + 1;
- if (oldval_sz == newval_sz) {
- /* "self contained" terms of same size, do memcpy */
- sys_memcpy(oldp, newp, newval_sz*sizeof(Eterm));
- return;
+ if (!handle->mustResize) {
+ if (handle->tb->common.compress) {
+ handle->dbterm = db_alloc_tmp_uncompressed(&handle->tb->common,
+ handle->dbterm);
+ handle->mustResize = 1;
+ oldval = handle->dbterm->tpl[position];
+ }
+ else if (is_boxed(newval)) {
+ newp = boxed_val(newval);
+ switch (*newp & _TAG_HEADER_MASK) {
+ case _TAG_HEADER_POS_BIG:
+ case _TAG_HEADER_NEG_BIG:
+ case _TAG_HEADER_FLOAT:
+ case _TAG_HEADER_HEAP_BIN:
+ newval_sz = header_arity(*newp) + 1;
+ if (is_boxed(oldval)) {
+ oldp = boxed_val(oldval);
+ switch (*oldp & _TAG_HEADER_MASK) {
+ case _TAG_HEADER_POS_BIG:
+ case _TAG_HEADER_NEG_BIG:
+ case _TAG_HEADER_FLOAT:
+ case _TAG_HEADER_HEAP_BIN:
+ oldval_sz = header_arity(*oldp) + 1;
+ if (oldval_sz == newval_sz) {
+ /* "self contained" terms of same size, do memcpy */
+ sys_memcpy(oldp, newp, newval_sz*sizeof(Eterm));
+ return;
+ }
+ goto both_size_set;
}
- goto both_size_set;
}
+ goto new_size_set;
}
- goto new_size_set;
}
}
/* Not possible for simple memcpy or dbterm is already non-contiguous, */
@@ -2436,84 +2451,372 @@ both_size_set:
handle->mustResize = 1;
}
+static ERTS_INLINE byte* db_realloc_term(DbTableCommon* tb, void* old,
+ Uint old_sz, Uint new_sz, Uint offset)
+{
+ byte* ret;
+ if (erts_ets_realloc_always_moves) {
+ ret = erts_db_alloc(ERTS_ALC_T_DB_TERM, (DbTable*)tb, new_sz);
+ sys_memcpy(ret, old, offset);
+ erts_db_free(ERTS_ALC_T_DB_TERM, (DbTable*)tb, old, old_sz);
+ } else {
+ ret = erts_db_realloc(ERTS_ALC_T_DB_TERM, (DbTable*)tb,
+ old, old_sz, new_sz);
+ }
+ return ret;
+}
+
+/* Allocated size of a compressed dbterm
+*/
+static ERTS_INLINE Uint db_alloced_size_comp(DbTerm* obj)
+{
+ return obj->tpl[arityval(*obj->tpl) + 1];
+}
+
+void db_free_term(DbTable *tb, void* basep, Uint offset)
+{
+ DbTerm* db = (DbTerm*) ((byte*)basep + offset);
+ Uint size;
+ if (tb->common.compress) {
+ db_cleanup_offheap_comp(db);
+ size = db_alloced_size_comp(db);
+ }
+ else {
+ ErlOffHeap tmp_oh;
+ tmp_oh.first = db->first_oh;
+ erts_cleanup_offheap(&tmp_oh);
+ size = offset + offsetof(DbTerm,tpl) + db->size*sizeof(Eterm);
+ }
+ erts_db_free(ERTS_ALC_T_DB_TERM, tb, basep, size);
+}
+
+static ERTS_INLINE Uint align_up(Uint value, Uint pow2)
+{
+ ASSERT((pow2 & (pow2-1)) == 0);
+ return (value + (pow2-1)) & ~(pow2-1);
+}
+
+/* Compressed size of an uncompressed term
+*/
+static Uint db_size_dbterm_comp(DbTableCommon* tb, Eterm obj)
+{
+ Eterm* tpl = tuple_val(obj);
+ int i;
+ Uint size = sizeof(DbTerm)
+ + arityval(*tpl) * sizeof(Eterm)
+ + sizeof(Uint); /* "alloc_size" */
+
+ for (i = arityval(*tpl); i>0; i--) {
+ if (i != tb->keypos && is_not_immed(tpl[i])) {
+ size += erts_encode_ext_size_ets(tpl[i]);
+ }
+ }
+ size += size_object(tpl[tb->keypos]) * sizeof(Eterm);
+ return align_up(size, sizeof(Uint));
+}
+
+/* Conversion between top tuple element and pointer to compressed data
+*/
+static ERTS_INLINE Eterm ext2elem(Eterm* tpl, byte* ext)
+{
+ return (((Uint)(ext - (byte*)tpl)) << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER;
+}
+static ERTS_INLINE byte* elem2ext(Eterm* tpl, Uint ix)
+{
+ ASSERT(is_header(tpl[ix]));
+ return (byte*)tpl + (tpl[ix] >> _TAG_PRIMARY_SIZE);
+}
+
+static void* copy_to_comp(DbTableCommon* tb, Eterm obj, DbTerm* dest,
+ Uint alloc_size)
+{
+ ErlOffHeap tmp_offheap;
+ Eterm* src = tuple_val(obj);
+ Eterm* tpl = dest->tpl;
+ Eterm key = src[tb->keypos];
+ int arity = arityval(src[0]);
+ union {
+ Eterm* ep;
+ byte* cp;
+ UWord ui;
+ }top;
+ int i;
+
+ top.ep = tpl+ 1 + arity + 1;
+ tpl[0] = src[0];
+ tpl[arity + 1] = alloc_size;
+
+ tmp_offheap.first = NULL;
+ tpl[tb->keypos] = copy_struct(key, size_object(key), &top.ep, &tmp_offheap);
+ dest->first_oh = tmp_offheap.first;
+ for (i=1; i<=arity; i++) {
+ if (i != tb->keypos) {
+ if (is_immed(src[i])) {
+ tpl[i] = src[i];
+ }
+ else {
+ tpl[i] = ext2elem(tpl, top.cp);
+ top.cp = erts_encode_ext_ets(src[i], top.cp, &dest->first_oh);
+ }
+ }
+ }
+
+#ifdef DEBUG_CLONE
+ {
+ Eterm* dbg_top = erts_alloc(ERTS_ALC_T_DB_TERM, dest->size * sizeof(Eterm));
+ dest->debug_clone = dbg_top;
+ tmp_offheap.first = dest->first_oh;
+ copy_struct(obj, dest->size, &dbg_top, &tmp_offheap);
+ dest->first_oh = tmp_offheap.first;
+ ASSERT(dbg_top == dest->debug_clone + dest->size);
+ }
+#endif
+ return top.cp;
+}
/*
** Copy the object into a possibly new DbTerm,
** offset is the offset of the DbTerm from the start
-** of the sysAllocaed structure, The possibly realloced and copied
+** of the allocated structure, The possibly realloced and copied
** structure is returned. Make sure (((char *) old) - offset) is a
** pointer to a ERTS_ALC_T_DB_TERM allocated data area.
*/
-void* db_get_term(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj)
+void* db_store_term(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj)
{
+ byte* basep;
+ DbTerm* newp;
+ Eterm* top;
int size = size_object(obj);
- void *structp = ((char*) old) - offset;
- DbTerm* p;
- Eterm copy;
- Eterm *top;
ErlOffHeap tmp_offheap;
if (old != 0) {
+ basep = ((byte*) old) - offset;
tmp_offheap.first = old->first_oh;
- tmp_offheap.overhead = 0;
erts_cleanup_offheap(&tmp_offheap);
old->first_oh = tmp_offheap.first;
if (size == old->size) {
- p = old;
- } else {
+ newp = old;
+ }
+ else {
Uint new_sz = offset + sizeof(DbTerm) + sizeof(Eterm)*(size-1);
Uint old_sz = offset + sizeof(DbTerm) + sizeof(Eterm)*(old->size-1);
- if (erts_ets_realloc_always_moves) {
- void *nstructp = erts_db_alloc(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- new_sz);
- memcpy(nstructp,structp,offset);
- erts_db_free(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- structp,
- old_sz);
- structp = nstructp;
- } else {
- structp = erts_db_realloc(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- structp,
- old_sz,
- new_sz);
- }
- p = (DbTerm*) ((void *)(((char *) structp) + offset));
+ basep = db_realloc_term(tb, basep, old_sz, new_sz, offset);
+ newp = (DbTerm*) (basep + offset);
}
}
else {
- structp = erts_db_alloc(ERTS_ALC_T_DB_TERM,
- (DbTable *) tb,
- (offset
- + sizeof(DbTerm)
- + sizeof(Eterm)*(size-1)));
- p = (DbTerm*) ((void *)(((char *) structp) + offset));
- }
- p->size = size;
+ basep = erts_db_alloc(ERTS_ALC_T_DB_TERM, (DbTable *)tb,
+ (offset + sizeof(DbTerm) + sizeof(Eterm)*(size-1)));
+ newp = (DbTerm*) (basep + offset);
+ }
+ newp->size = size;
+ top = newp->tpl;
tmp_offheap.first = NULL;
- tmp_offheap.overhead = 0;
+ copy_struct(obj, size, &top, &tmp_offheap);
+ newp->first_oh = tmp_offheap.first;
+#ifdef DEBUG_CLONE
+ newp->debug_clone = NULL;
+#endif
+ return basep;
+}
+
+
+void* db_store_term_comp(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj)
+{
+ Uint new_sz = offset + db_size_dbterm_comp(tb, obj);
+ byte* basep;
+ DbTerm* newp;
+ byte* top;
+
+ ASSERT(tb->compress);
+ if (old != 0) {
+ Uint old_sz = db_alloced_size_comp(old);
+ db_cleanup_offheap_comp(old);
+
+ basep = ((byte*) old) - offset;
+ if (new_sz == old_sz) {
+ newp = old;
+ }
+ else {
+ basep = db_realloc_term(tb, basep, old_sz, new_sz, offset);
+ newp = (DbTerm*) (basep + offset);
+ }
+ }
+ else {
+ basep = erts_db_alloc(ERTS_ALC_T_DB_TERM, (DbTable*)tb, new_sz);
+ newp = (DbTerm*) (basep + offset);
+ }
+
+ newp->size = size_object(obj);
+ top = copy_to_comp(tb, obj, newp, new_sz);
+ ASSERT(top <= basep + new_sz);
+
+ // SVERK: realloc?
+
+ return basep;
+}
+
+
+void db_finalize_resize(DbUpdateHandle* handle, Uint offset)
+{
+ DbTable* tbl = handle->tb;
+ DbTerm* newDbTerm;
+ Uint alloc_sz = offset +
+ (tbl->common.compress ?
+ db_size_dbterm_comp(&tbl->common, make_tuple(handle->dbterm->tpl)) :
+ sizeof(DbTerm)+sizeof(Eterm)*(handle->new_size-1));
+ byte* newp = erts_db_alloc(ERTS_ALC_T_DB_TERM, tbl, alloc_sz);
+ byte* oldp = *(handle->bp);
+
+ sys_memcpy(newp, oldp, offset); /* copy only hash/tree header */
+ *(handle->bp) = newp;
+ newDbTerm = (DbTerm*) (newp + offset);
+ newDbTerm->size = handle->new_size;
+
+ /* make a flat copy */
+
+ if (tbl->common.compress) {
+ copy_to_comp(&tbl->common, make_tuple(handle->dbterm->tpl),
+ newDbTerm, alloc_sz);
+ db_free_tmp_uncompressed(handle->dbterm);
+ }
+ else {
+ Eterm* top;
+ ErlOffHeap tmp_offheap;
+ tmp_offheap.first = NULL;
+ top = newDbTerm->tpl;
+ copy_struct(make_tuple(handle->dbterm->tpl), handle->new_size,
+ &top, &tmp_offheap);
+ newDbTerm->first_oh = tmp_offheap.first;
+#ifdef DEBUG_CLONE
+ newDbTerm->debug_clone = NULL;
+#endif
+ ASSERT((byte*)top <= (newp + alloc_sz));
+ }
+}
- top = DBTERM_BUF(p);
- copy = copy_struct(obj, size, &top, &tmp_offheap);
- p->first_oh = tmp_offheap.first;
- DBTERM_SET_TPL(p,tuple_val(copy));
+Eterm db_copy_from_comp(DbTableCommon* tb, DbTerm* bp, Eterm** hpp,
+ ErlOffHeap* off_heap)
+{
+ Eterm* hp = *hpp;
+ int i, arity = arityval(bp->tpl[0]);
+
+ hp[0] = bp->tpl[0];
+ *hpp += arity + 1;
+
+ hp[tb->keypos] = copy_struct(bp->tpl[tb->keypos],
+ size_object(bp->tpl[tb->keypos]),
+ hpp, off_heap);
+ for (i=arity; i>0; i--) {
+ if (i != tb->keypos) {
+ if (is_immed(bp->tpl[i])) {
+ hp[i] = bp->tpl[i];
+ }
+ else {
+ hp[i] = erts_decode_ext_ets(hpp, off_heap,
+ elem2ext(bp->tpl, i));
+ }
+ }
+ }
+ ASSERT((*hpp - hp) <= bp->size);
+#ifdef DEBUG_CLONE
+ ASSERT(eq(make_tuple(hp),make_tuple(bp->debug_clone)));
+#endif
+ return make_tuple(hp);
+}
- return structp;
+Eterm db_copy_element_from_ets(DbTableCommon* tb, Process* p,
+ DbTerm* obj, Uint pos,
+ Eterm** hpp, Uint extra)
+{
+ if (is_immed(obj->tpl[pos])) {
+ *hpp = HAlloc(p, extra);
+ return obj->tpl[pos];
+ }
+ if (tb->compress && pos != tb->keypos) {
+ byte* ext = elem2ext(obj->tpl, pos);
+ Sint sz = erts_decode_ext_size_ets(ext, db_alloced_size_comp(obj)) + extra;
+ Eterm* hp = HAlloc(p, sz);
+ Eterm* endp = hp + sz;
+ Eterm copy = erts_decode_ext_ets(&hp, &MSO(p), ext);
+ *hpp = hp;
+ hp += extra;
+ HRelease(p, endp, hp);
+#ifdef DEBUG_CLONE
+ ASSERT(eq(copy,obj->debug_clone[pos]));
+#endif
+ return copy;
+ }
+ else {
+ Uint sz = size_object(obj->tpl[pos]);
+ *hpp = HAlloc(p, sz + extra);
+ return copy_struct(obj->tpl[pos], sz, hpp, &MSO(p));
+ }
}
-void db_free_term_data(DbTerm* p)
+/* Our own "cleanup_offheap"
+ * as refc-binaries may be unaligned in compressed terms
+*/
+void db_cleanup_offheap_comp(DbTerm* obj)
+{
+ union erl_off_heap_ptr u;
+ ProcBin tmp;
+
+ for (u.hdr = obj->first_oh; u.hdr; u.hdr = u.hdr->next) {
+ if ((UWord)u.voidp % sizeof(UWord) != 0) { /* unaligned ptr */
+ sys_memcpy(&tmp, u.voidp, sizeof(tmp));
+ /* Warning, must pass (void*)-variable to memcpy. Otherwise it will
+ cause Bus error on Sparc due to false compile time assumptions
+ about word aligned memory (type cast is not enough) */
+ u.pb = &tmp;
+ }
+ switch (thing_subtag(u.hdr->thing_word)) {
+ case REFC_BINARY_SUBTAG:
+ if (erts_refc_dectest(&u.pb->val->refc, 0) == 0) {
+ erts_bin_free(u.pb->val);
+ }
+ break;
+ case FUN_SUBTAG:
+ ASSERT(u.pb != &tmp);
+ if (erts_refc_dectest(&u.fun->fe->refc, 0) == 0) {
+ erts_erase_fun_entry(u.fun->fe);
+ }
+ break;
+ default:
+ ASSERT(is_external_header(u.hdr->thing_word));
+ ASSERT(u.pb != &tmp);
+ erts_deref_node_entry(u.ext->node);
+ break;
+ }
+ }
+#ifdef DEBUG_CLONE
+ if (obj->debug_clone != NULL) {
+ erts_free(ERTS_ALC_T_DB_TERM, obj->debug_clone);
+ obj->debug_clone = NULL;
+ }
+#endif
+}
+
+int db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b)
{
ErlOffHeap tmp_offheap;
- tmp_offheap.first = p->first_oh;
- tmp_offheap.overhead = 0;
+ Eterm* allocp;
+ Eterm* hp;
+ Eterm tmp_b;
+ int is_eq;
+
+ ASSERT(tb->compress);
+ hp = allocp = erts_alloc(ERTS_ALC_T_TMP, b->size*sizeof(Eterm));
+ tmp_offheap.first = NULL;
+ tmp_b = db_copy_from_comp(tb, b, &hp, &tmp_offheap);
+ is_eq = eq(a,tmp_b);
erts_cleanup_offheap(&tmp_offheap);
+ erts_free(ERTS_ALC_T_TMP, allocp);
+ return is_eq;
}
-
/*
** Check if object represents a "match" variable
** i.e and atom $N where N is an integer
@@ -4404,7 +4707,65 @@ static Eterm seq_trace_fake(Process *p, Eterm arg1)
}
return result;
}
-
+
+DbTerm* db_alloc_tmp_uncompressed(DbTableCommon* tb, DbTerm* org)
+{
+ ErlOffHeap tmp_offheap;
+ DbTerm* res = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(DbTerm) + org->size*sizeof(Eterm));
+ Eterm* hp = res->tpl;
+ tmp_offheap.first = NULL;
+ db_copy_from_comp(tb, org, &hp, &tmp_offheap);
+ res->first_oh = tmp_offheap.first;
+ res->size = org->size;
+#ifdef DEBUG_CLONE
+ res->debug_clone = NULL;
+#endif
+ return res;
+}
+
+void db_free_tmp_uncompressed(DbTerm* obj)
+{
+ ErlOffHeap off_heap;
+ off_heap.first = obj->first_oh;
+ erts_cleanup_offheap(&off_heap);
+#ifdef DEBUG_CLONE
+ ASSERT(obj->debug_clone == NULL);
+#endif
+ erts_free(ERTS_ALC_T_TMP, obj);
+}
+
+Eterm db_prog_match_and_copy(DbTableCommon* tb, Process* c_p, Binary* bprog,
+ int all, DbTerm* obj, Eterm** hpp, Uint extra)
+{
+ Uint32 dummy;
+ Eterm res;
+
+ if (tb->compress) {
+ obj = db_alloc_tmp_uncompressed(tb, obj);
+ }
+
+ res = db_prog_match(c_p, bprog, make_tuple(obj->tpl), NULL, 0, &dummy);
+
+ if (is_value(res) && hpp!=NULL) {
+ if (all) {
+ *hpp = HAlloc(c_p, obj->size + extra);
+ res = copy_shallow(obj->tpl, obj->size, hpp, &MSO(c_p));
+ }
+ else {
+ Uint sz = size_object(res);
+ *hpp = HAlloc(c_p, sz + extra);
+ res = copy_struct(res, sz, hpp, &MSO(c_p));
+ }
+ }
+
+ if (tb->compress) {
+ db_free_tmp_uncompressed(obj);
+ }
+ return res;
+}
+
+
#ifdef DMC_DEBUG
/*
** Disassemble match program
diff --git a/erts/emulator/beam/erl_db_util.h b/erts/emulator/beam/erl_db_util.h
index 0f333e8b34..10ba755e80 100644
--- a/erts/emulator/beam/erl_db_util.h
+++ b/erts/emulator/beam/erl_db_util.h
@@ -52,22 +52,27 @@
is broken.*/
#define DB_ERROR_UNSPEC -10 /* Unspecified error */
+/*#define DEBUG_CLONE*/
/*
* A datatype for a database entry stored out of a process heap
*/
typedef struct db_term {
struct erl_off_heap_header* first_oh; /* Off heap data for term. */
- Uint size; /* Size of term in "words" */
- Eterm tpl[1]; /* Untagged "constant pointer" to top tuple */
- /* (assumed to be first in buffer) */
+ Uint size; /* Heap size of term in "words" */
+#ifdef DEBUG_CLONE
+ Eterm* debug_clone; /* An uncompressed copy */
+#endif
+ Eterm tpl[1]; /* Term data. Top tuple always first */
+
+ /* Compression: is_immed and key element are uncompressed.
+ Compressed elements are stored in external format after each other
+ last in dbterm. The top tuple elements contains byte offsets, to
+ the start of the data, tagged as headers.
+ The allocated size of the dbterm in bytes is stored at tpl[arity+1].
+ */
} DbTerm;
-/* "Assign" a value to DbTerm.tpl */
-#define DBTERM_SET_TPL(dbtermPtr,tplPtr) ASSERT((tplPtr)==(dbtermPtr->tpl))
-/* Get start of term buffer */
-#define DBTERM_BUF(dbtermPtr) ((dbtermPtr)->tpl)
-
union db_table;
typedef union db_table DbTable;
@@ -186,6 +191,12 @@ typedef struct db_table_method
} DbTableMethod;
+typedef struct db_fixation {
+ Eterm pid;
+ Uint counter;
+ struct db_fixation *next;
+} DbFixation;
+
/*
* This structure contains data for all different types of database
* tables. Note that these fields must match the same fields
@@ -194,13 +205,6 @@ typedef struct db_table_method
* operations may be the same on different types of tables.
*/
-typedef struct db_fixation {
- Eterm pid;
- Uint counter;
- struct db_fixation *next;
-} DbFixation;
-
-
typedef struct db_table_common {
erts_refc_t ref;
erts_refc_t fixref; /* fixation counter */
@@ -226,6 +230,7 @@ typedef struct db_table_common {
Uint32 status; /* bit masks defined below */
int slot; /* slot index in meta_main_tab */
int keypos; /* defaults to 1 */
+ int compress;
} DbTableCommon;
/* These are status bit patterns */
@@ -252,6 +257,54 @@ typedef struct db_table_common {
#define IS_FIXED(T) (NFIXED(T) != 0)
Eterm erts_ets_copy_object(Eterm, Process*);
+Eterm db_copy_from_comp(DbTableCommon* tb, DbTerm* bp, Eterm** hpp,
+ ErlOffHeap* off_heap);
+int db_eq_comp(DbTableCommon* tb, Eterm a, DbTerm* b);
+DbTerm* db_alloc_tmp_uncompressed(DbTableCommon* tb, DbTerm* org);
+
+ERTS_GLB_INLINE Eterm db_copy_object_from_ets(DbTableCommon* tb, DbTerm* bp,
+ Eterm** hpp, ErlOffHeap* off_heap);
+ERTS_GLB_INLINE int db_eq(DbTableCommon* tb, Eterm a, DbTerm* b);
+ERTS_GLB_INLINE Eterm db_do_read_element(DbUpdateHandle* handle, Sint position);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE Eterm db_copy_object_from_ets(DbTableCommon* tb, DbTerm* bp,
+ Eterm** hpp, ErlOffHeap* off_heap)
+{
+ if (tb->compress) {
+ return db_copy_from_comp(tb, bp, hpp, off_heap);
+ }
+ else {
+ return copy_shallow(bp->tpl, bp->size, hpp, off_heap);
+ }
+}
+
+ERTS_GLB_INLINE int db_eq(DbTableCommon* tb, Eterm a, DbTerm* b)
+{
+ if (!tb->compress) {
+ return eq(a, make_tuple(b->tpl));
+ }
+ else {
+ return db_eq_comp(tb, a, b);
+ }
+}
+
+/* Must be called to read elements after db_lookup_dbterm.
+** Will decompress if needed. */
+ERTS_GLB_INLINE Eterm db_do_read_element(DbUpdateHandle* handle, Sint position)
+{
+ Eterm elem = handle->dbterm->tpl[position];
+ if (!is_header(elem)) {
+ return elem;
+ }
+ ASSERT(((DbTableCommon*)handle->tb)->compress);
+ ASSERT(!handle->mustResize);
+ handle->dbterm = db_alloc_tmp_uncompressed((DbTableCommon*)handle->tb, handle->dbterm);
+ handle->mustResize = 1;
+ return handle->dbterm->tpl[position];
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
/* optimised version of copy_object (normal case? atomic object) */
#define COPY_OBJECT(obj, p, objp) \
@@ -277,14 +330,19 @@ Eterm db_set_trace_control_word_1(Process *p, Eterm val);
void db_initialize_util(void);
Eterm db_getkey(int keypos, Eterm obj);
-void db_free_term_data(DbTerm* p);
-void* db_get_term(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj);
+void db_cleanup_offheap_comp(DbTerm* p);
+void db_free_term(DbTable *tb, void* basep, Uint offset);
+void* db_store_term(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj);
+void* db_store_term_comp(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj);
+Eterm db_copy_element_from_ets(DbTableCommon* tb, Process* p, DbTerm* obj,
+ Uint pos, Eterm** hpp, Uint extra);
int db_has_variable(Eterm obj);
int db_is_variable(Eterm obj);
+Eterm db_do_read_element(DbUpdateHandle* handle, Sint position);
void db_do_update_element(DbUpdateHandle* handle,
Sint position,
Eterm newval);
-void db_finalize_update_element(DbUpdateHandle* handle);
+void db_finalize_resize(DbUpdateHandle* handle, Uint offset);
Eterm db_add_counter(Eterm** hpp, Eterm counter, Eterm incr);
Eterm db_match_set_lint(Process *p, Eterm matchexpr, Uint flags);
Binary *db_match_set_compile(Process *p, Eterm matchexpr,
@@ -366,6 +424,8 @@ Binary *db_match_compile(Eterm *matchexpr, Eterm *guards,
Eterm *body, int num_matches,
Uint flags,
DMCErrInfo *err_info);
+Eterm db_prog_match_and_copy(DbTableCommon* tb, Process* c_p, Binary* bprog,
+ int all, DbTerm* obj, Eterm** hpp, Uint extra);
/* Returns newly allocated MatchProg binary with refc == 0*/
Eterm db_prog_match(Process *p, Binary *prog, Eterm term, Eterm *termp, int arity,
Uint32 *return_flags /* Zeroed on enter */);
diff --git a/erts/emulator/beam/erl_drv_thread.c b/erts/emulator/beam/erl_drv_thread.c
index d42820ddf3..17b08a71d4 100644
--- a/erts/emulator/beam/erl_drv_thread.c
+++ b/erts/emulator/beam/erl_drv_thread.c
@@ -528,7 +528,7 @@ erl_drv_tsd_get(ErlDrvTSDKey key)
if (!dtid)
return NULL;
#endif
- if (ERL_DRV_TSD_LEN__ < key)
+ if (ERL_DRV_TSD_LEN__ <= key)
return NULL;
return ERL_DRV_TSD__[key];
}
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 4ae656a3ad..a9f4f041ac 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -41,6 +41,7 @@
#include "erl_printf_term.h"
#include "erl_misc_utils.h"
#include "packet_parser.h"
+#include "erl_cpu_topology.h"
#ifdef HIPE
#include "hipe_mode_switch.h" /* for hipe_mode_switch_init() */
@@ -63,6 +64,8 @@ extern void ConNormalExit(void);
extern void ConWaitForExit(void);
#endif
+static void erl_init(int ncpu);
+
#define ERTS_MIN_COMPAT_REL 7
#ifdef ERTS_SMP
@@ -76,9 +79,6 @@ int erts_initialized = 0;
static erts_tid_t main_thread;
#endif
-erts_cpu_info_t *erts_cpuinfo;
-
-int erts_reader_groups;
int erts_use_sender_punish;
/*
@@ -111,7 +111,6 @@ int erts_compat_rel;
static int use_multi_run_queue;
static int no_schedulers;
static int no_schedulers_online;
-static int max_reader_groups;
#ifdef DEBUG
Uint32 verbose; /* See erl_debug.h for information about verbose */
@@ -230,18 +229,18 @@ void erl_error(char *fmt, va_list args)
erts_vfprintf(stderr, fmt, args);
}
-static void early_init(int *argc, char **argv);
+static int early_init(int *argc, char **argv);
void
erts_short_init(void)
{
- early_init(NULL, NULL);
- erl_init();
+ int ncpu = early_init(NULL, NULL);
+ erl_init(ncpu);
erts_initialized = 1;
}
-void
-erl_init(void)
+static void
+erl_init(int ncpu)
{
init_benchmarking();
@@ -252,11 +251,11 @@ erl_init(void)
erts_init_monitors();
erts_init_gc();
init_time();
- erts_init_process();
+ erts_init_process(ncpu);
erts_init_scheduling(use_multi_run_queue,
no_schedulers,
no_schedulers_online);
-
+ erts_init_cpu_topology(); /* Must be after init_scheduling */
H_MIN_SIZE = erts_next_heap_size(H_MIN_SIZE, 0);
BIN_VH_MIN_SIZE = erts_next_heap_size(BIN_VH_MIN_SIZE, 0);
@@ -535,7 +534,8 @@ void erts_usage(void)
erts_fprintf(stderr, "-W<i|w> set error logger warnings mapping,\n");
erts_fprintf(stderr, " see error_logger documentation for details\n");
-
+ erts_fprintf(stderr, "-zdbbl size set the distribution buffer busy limit in kilobytes\n");
+ erts_fprintf(stderr, " valid range is [1-%d]\n", INT_MAX/1024);
erts_fprintf(stderr, "\n");
erts_fprintf(stderr, "Note that if the emulator is started with erlexec (typically\n");
erts_fprintf(stderr, "from the erl script), these flags should be specified with +.\n");
@@ -587,7 +587,7 @@ static void ethr_ll_free(void *ptr)
#endif
-static void
+static int
early_init(int *argc, char **argv) /*
* Only put things here which are
* really important initialize
@@ -600,6 +600,10 @@ early_init(int *argc, char **argv) /*
int ncpuavail;
int schdlrs;
int schdlrs_onln;
+ int max_main_threads;
+ int max_reader_groups;
+ int reader_groups;
+
use_multi_run_queue = 1;
erts_printf_eterm_func = erts_printf_term;
erts_disable_tolerant_timeofday = 0;
@@ -615,13 +619,11 @@ early_init(int *argc, char **argv) /*
erts_use_sender_punish = 1;
- erts_cpuinfo = erts_cpu_info_create();
-
-#ifdef ERTS_SMP
- ncpu = erts_get_cpu_configured(erts_cpuinfo);
- ncpuonln = erts_get_cpu_online(erts_cpuinfo);
- ncpuavail = erts_get_cpu_available(erts_cpuinfo);
-#else
+ erts_pre_early_init_cpu_topology(&max_reader_groups,
+ &ncpu,
+ &ncpuonln,
+ &ncpuavail);
+#ifndef ERTS_SMP
ncpu = 1;
ncpuonln = 1;
ncpuavail = 1;
@@ -664,15 +666,9 @@ early_init(int *argc, char **argv) /*
? ncpuavail
: (ncpuonln > 0 ? ncpuonln : no_schedulers));
-#ifdef ERTS_SMP
- erts_max_main_threads = no_schedulers_online;
-#endif
-
schdlrs = no_schedulers;
schdlrs_onln = no_schedulers_online;
- max_reader_groups = ERTS_MAX_READER_GROUPS;
-
if (argc && argv) {
int i = 1;
while (i < *argc) {
@@ -768,9 +764,13 @@ early_init(int *argc, char **argv) /*
erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes)
-M flags. */
-
- erts_early_init_scheduling(); /* Require allocators */
- erts_init_utils(); /* Require allocators */
+ /* Require allocators */
+ erts_early_init_scheduling();
+ erts_init_utils();
+ erts_early_init_cpu_topology(no_schedulers,
+ &max_main_threads,
+ max_reader_groups,
+ &reader_groups);
#ifdef USE_THREADS
{
@@ -784,24 +784,13 @@ early_init(int *argc, char **argv) /*
elid.mem.ll.alloc = ethr_ll_alloc;
elid.mem.ll.realloc = ethr_ll_realloc;
elid.mem.ll.free = ethr_ll_free;
-
-#ifdef ERTS_SMP
- elid.main_threads = erts_max_main_threads;
-#else
- elid.main_threads = 1;
-#endif
- elid.reader_groups = (elid.main_threads > 1
- ? elid.main_threads
- : 0);
- if (max_reader_groups <= 1)
- elid.reader_groups = 0;
- if (elid.reader_groups > max_reader_groups)
- elid.reader_groups = max_reader_groups;
- erts_reader_groups = elid.reader_groups;
+ elid.main_threads = max_main_threads;
+ elid.reader_groups = reader_groups;
erts_thr_late_init(&elid);
}
#endif
+
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_late_init();
#endif
@@ -818,7 +807,10 @@ early_init(int *argc, char **argv) /*
erl_sys_args(argc, argv);
erts_ets_realloc_always_moves = 0;
+ erts_ets_always_compress = 0;
+ erts_dist_buf_busy_limit = ERTS_DE_BUSY_LIMIT;
+ return ncpu;
}
#ifndef ERTS_SMP
@@ -852,8 +844,7 @@ erl_start(int argc, char **argv)
char envbuf[21]; /* enough for any 64-bit integer */
size_t envbufsz;
int async_max_threads = erts_async_max_threads;
-
- early_init(&argc, argv);
+ int ncpu = early_init(&argc, argv);
envbufsz = sizeof(envbuf);
if (erts_sys_getenv(ERL_MAX_ETS_TABLES_ENV, envbuf, &envbufsz) == 0)
@@ -1038,15 +1029,20 @@ erl_start(int argc, char **argv)
break;
case 'e':
- /* set maximum number of ets tables */
- arg = get_arg(argv[i]+2, argv[i+1], &i);
- if (( user_requested_db_max_tabs = atoi(arg) ) < 0) {
- erts_fprintf(stderr, "bad maximum number of ets tables %s\n", arg);
- erts_usage();
+ if (sys_strcmp("c", argv[i]+2) == 0) {
+ erts_ets_always_compress = 1;
+ }
+ else {
+ /* set maximum number of ets tables */
+ arg = get_arg(argv[i]+2, argv[i+1], &i);
+ if (( user_requested_db_max_tabs = atoi(arg) ) < 0) {
+ erts_fprintf(stderr, "bad maximum number of ets tables %s\n", arg);
+ erts_usage();
+ }
+ VERBOSE(DEBUG_SYSTEM,
+ ("using maximum number of ets tables %d\n",
+ user_requested_db_max_tabs));
}
- VERBOSE(DEBUG_SYSTEM,
- ("using maximum number of ets tables %d\n",
- user_requested_db_max_tabs));
break;
case 'i':
@@ -1110,7 +1106,7 @@ erl_start(int argc, char **argv)
char *sub_param = argv[i]+2;
if (has_prefix("bt", sub_param)) {
arg = get_arg(sub_param+2, argv[i+1], &i);
- res = erts_init_scheduler_bind_type(arg);
+ res = erts_init_scheduler_bind_type_string(arg);
if (res != ERTS_INIT_SCHED_BIND_TYPE_SUCCESS) {
switch (res) {
case ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED:
@@ -1135,7 +1131,7 @@ erl_start(int argc, char **argv)
}
else if (has_prefix("ct", sub_param)) {
arg = get_arg(sub_param+2, argv[i+1], &i);
- res = erts_init_cpu_topology(arg);
+ res = erts_init_cpu_topology_string(arg);
if (res != ERTS_INIT_CPU_TOPOLOGY_OK) {
switch (res) {
case ERTS_INIT_CPU_TOPOLOGY_INVALID_ID:
@@ -1346,6 +1342,26 @@ erl_start(int argc, char **argv)
}
break;
+ case 'z': {
+ char *sub_param = argv[i]+2;
+ int new_limit;
+
+ if (has_prefix("dbbl", sub_param)) {
+ arg = get_arg(sub_param+4, argv[i+1], &i);
+ new_limit = atoi(arg);
+ if (new_limit < 1 || INT_MAX/1024 < new_limit) {
+ erts_fprintf(stderr, "Invalid dbbl limit: %d\n", new_limit);
+ erts_usage();
+ } else {
+ erts_dist_buf_busy_limit = new_limit*1024;
+ }
+ } else {
+ erts_fprintf(stderr, "bad -z option %s\n", argv[i]);
+ erts_usage();
+ }
+ break;
+ }
+
default:
erts_fprintf(stderr, "%s unknown flag %s\n", argv[0], argv[i]);
erts_usage();
@@ -1386,7 +1402,7 @@ erl_start(int argc, char **argv)
boot_argc = argc - i; /* Number of arguments to init */
boot_argv = &argv[i];
- erl_init();
+ erl_init(ncpu);
init_shared_memory(boot_argc, boot_argv);
load_preloaded();
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index d6138fa4e4..04c7dbd2ec 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -128,8 +128,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
{ "removed_fd_pre_alloc_lock", NULL },
{ "state_prealloc", NULL },
{ "schdlr_sspnd", NULL },
- { "cpu_bind", NULL },
{ "run_queue", "address" },
+ { "cpu_info", NULL },
{ "pollset", "address" },
#ifdef __WIN32__
{ "pollwaiter", "address" },
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index 1dd9c8bd4a..a680097c2d 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -99,6 +99,16 @@ static Eterm* alloc_heap_heavy(ErlNifEnv* env, unsigned need, Eterm* hp)
return hp;
}
+#if SIZEOF_LONG != ERTS_SIZEOF_ETERM
+static ERTS_INLINE void ensure_heap(ErlNifEnv* env, unsigned may_need)
+{
+ if (env->hp + may_need > env->hp_end) {
+ alloc_heap_heavy(env, may_need, env->hp);
+ env->hp -= may_need;
+ }
+}
+#endif
+
void erts_pre_nif(ErlNifEnv* env, Process* p, struct erl_module_nif* mod_nif)
{
env->mod_nif = mod_nif;
@@ -730,9 +740,8 @@ int enif_get_long(ErlNifEnv* env, Eterm term, long* ip)
{
#if SIZEOF_LONG == ERTS_SIZEOF_ETERM
return term_to_Sint(term, ip);
-#elif SIZEOF_INT == ERTS_SIZEOF_ETERM
- Sint i;
- return term_to_Sint(term, &i) ? (*ip = (long) i, 1) : 0;
+#elif SIZEOF_LONG == 8
+ return term_to_Sint64(term, ip);
#else
# error Unknown long word size
#endif
@@ -742,9 +751,8 @@ int enif_get_ulong(ErlNifEnv* env, Eterm term, unsigned long* ip)
{
#if SIZEOF_LONG == ERTS_SIZEOF_ETERM
return term_to_Uint(term, ip);
-#elif SIZEOF_INT == ERTS_SIZEOF_ETERM
- Uint u;
- return term_to_Uint(term, &u) ? (*ip = (unsigned long) u, 1) : 0;
+#elif SIZEOF_LONG == 8
+ return term_to_Uint64(term, ip);
#else
# error Unknown long word size
#endif
@@ -821,12 +829,22 @@ ERL_NIF_TERM enif_make_uint(ErlNifEnv* env, unsigned i)
ERL_NIF_TERM enif_make_long(ErlNifEnv* env, long i)
{
+#if SIZEOF_LONG == ERTS_SIZEOF_ETERM
return IS_SSMALL(i) ? make_small(i) : small_to_big(i, alloc_heap(env,2));
+#elif SIZEOF_LONG == 8
+ ensure_heap(env,3);
+ return erts_sint64_to_big(i, &env->hp);
+#endif
}
ERL_NIF_TERM enif_make_ulong(ErlNifEnv* env, unsigned long i)
{
+#if SIZEOF_LONG == ERTS_SIZEOF_ETERM
return IS_USMALL(0,i) ? make_small(i) : uint_to_big(i,alloc_heap(env,2));
+#elif SIZEOF_LONG == 8
+ ensure_heap(env,3);
+ return erts_uint64_to_big(i, &env->hp);
+#endif
}
#if HAVE_INT64 && SIZEOF_LONG != 8
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index d0b08bf72e..8cdda395df 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -107,7 +107,7 @@ dist_table_alloc(void *dep_tmpl)
dep->nlinks = NULL;
dep->monitors = NULL;
- erts_smp_spinlock_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr);
+ erts_smp_mtx_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr);
dep->qflgs = 0;
dep->qsize = 0;
dep->out_queue.first = NULL;
@@ -172,7 +172,7 @@ dist_table_free(void *vdep)
ASSERT(!dep->cache);
erts_smp_rwmtx_destroy(&dep->rwmtx);
erts_smp_mtx_destroy(&dep->lnk_mtx);
- erts_smp_spinlock_destroy(&dep->qlock);
+ erts_smp_mtx_destroy(&dep->qlock);
#ifdef DEBUG
sys_memset(vdep, 0x77, sizeof(DistEntry));
@@ -755,9 +755,9 @@ void erts_init_node_tables(void)
erts_this_dist_entry->nlinks = NULL;
erts_this_dist_entry->monitors = NULL;
- erts_smp_spinlock_init_x(&erts_this_dist_entry->qlock,
- "dist_entry_out_queue",
- make_small(ERST_INTERNAL_CHANNEL_NO));
+ erts_smp_mtx_init_x(&erts_this_dist_entry->qlock,
+ "dist_entry_out_queue",
+ make_small(ERST_INTERNAL_CHANNEL_NO));
erts_this_dist_entry->qflgs = 0;
erts_this_dist_entry->qsize = 0;
erts_this_dist_entry->out_queue.first = NULL;
diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h
index eb759b87e9..b0a63ae035 100644
--- a/erts/emulator/beam/erl_node_tables.h
+++ b/erts/emulator/beam/erl_node_tables.h
@@ -131,7 +131,7 @@ typedef struct dist_entry_ {
ErtsLink *nlinks; /* Link tree with subtrees */
ErtsMonitor *monitors; /* Monitor tree */
- erts_smp_spinlock_t qlock; /* Protects qflgs and out_queue */
+ erts_smp_mtx_t qlock; /* Protects qflgs and out_queue */
Uint32 qflgs;
Sint qsize;
ErtsDistOutputQueue out_queue;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index b47ce97c46..f252c2cbe2 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -24,7 +24,6 @@
#endif
#include <stddef.h> /* offsetof() */
-#include <ctype.h>
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
@@ -39,6 +38,7 @@
#include "erl_threads.h"
#include "erl_binary.h"
#include "beam_bp.h"
+#include "erl_cpu_topology.h"
#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -63,8 +63,6 @@
#define ERTS_WAKEUP_OTHER_DEC 10
#define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10)
-#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
-
#if 0 || defined(DEBUG)
#define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
#endif
@@ -119,10 +117,6 @@ Uint erts_process_tab_index_mask;
static int wakeup_other_limit;
-#ifdef ERTS_SMP
-Uint erts_max_main_threads;
-#endif
-
int erts_sched_thread_suggested_stack_size = -1;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -195,48 +189,6 @@ do { \
#endif
-/*
- * Cpu topology hierarchy.
- */
-#define ERTS_TOPOLOGY_NODE 0
-#define ERTS_TOPOLOGY_PROCESSOR 1
-#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
-#define ERTS_TOPOLOGY_CORE 3
-#define ERTS_TOPOLOGY_THREAD 4
-#define ERTS_TOPOLOGY_LOGICAL 5
-
-#define ERTS_TOPOLOGY_MAX_DEPTH 6
-
-typedef struct {
- int bind_id;
- int bound_id;
-} ErtsCpuBindData;
-
-static ErtsCpuBindData *scheduler2cpu_map;
-erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
-
-typedef enum {
- ERTS_CPU_BIND_UNDEFINED,
- ERTS_CPU_BIND_SPREAD,
- ERTS_CPU_BIND_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_THREAD_SPREAD,
- ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
- ERTS_CPU_BIND_NO_SPREAD,
- ERTS_CPU_BIND_NONE
-} ErtsCpuBindOrder;
-
-#define ERTS_CPU_BIND_DEFAULT_BIND \
- ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
-
-ErtsCpuBindOrder cpu_bind_order;
-
-static erts_cpu_topology_t *user_cpudata;
-static int user_cpudata_size;
-static erts_cpu_topology_t *system_cpudata;
-static int system_cpudata_size;
-
erts_sched_stat_t erts_sched_stat;
ErtsRunQueue *erts_common_run_queue;
@@ -259,11 +211,6 @@ ErtsSchedulerData *erts_scheduler_data;
ErtsAlignedRunQueue *erts_aligned_run_queues;
Uint erts_no_run_queues;
-typedef union {
- ErtsSchedulerData esd;
- char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
-} ErtsAlignedSchedulerData;
-
ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
#ifdef ERTS_SMP
@@ -334,12 +281,6 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
200,
ERTS_ALC_T_PROC_LIST)
-#define ERTS_RUNQ_IX(IX) \
- (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \
- &erts_aligned_run_queues[(IX)].runq)
-#define ERTS_SCHEDULER_IX(IX) \
- (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
- &erts_aligned_scheduler_data[(IX)].esd)
#define ERTS_SCHED_SLEEP_INFO_IX(IX) \
(ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
&aligned_sched_sleep_info[(IX)].ssi)
@@ -398,22 +339,8 @@ static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
#ifdef ERTS_SMP
static void handle_pending_exiters(ErtsProcList *);
-static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq);
-static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
-
#endif
-static int reader_group_lookup(int logical);
-static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
- int *cpudata_size);
-static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
-
-static void early_cpu_bind_init(void);
-static void late_cpu_bind_init(void);
-
#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK)
int
erts_smp_lc_runq_is_locked(ErtsRunQueue *runq)
@@ -469,13 +396,13 @@ erts_pre_init_process(void)
/* initialize the scheduler */
void
-erts_init_process(void)
+erts_init_process(int ncpu)
{
Uint proc_bits = ERTS_PROC_BITS;
#ifdef ERTS_SMP
erts_disable_proc_not_running_opt = 0;
- erts_init_proc_lock();
+ erts_init_proc_lock(ncpu);
#endif
init_proclist_alloc();
@@ -1060,6 +987,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
sys_poll_aux_work:
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
erl_sys_schedule(1); /* Might give us something to do */
dt = do_time_read_and_reset();
@@ -1155,6 +1084,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
erts_smp_runq_unlock(rq);
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
erl_sys_schedule(0);
dt = do_time_read_and_reset();
@@ -1242,7 +1173,7 @@ wake_scheduler(ErtsRunQueue *rq, int incq, int one)
do {
ErtsSchedulerSleepInfo *wake_ssi = ssi;
ssi = ssi->next;
- erts_sched_finish_poke(ssi, ssi_flags_set_wake(wake_ssi));
+ erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
} while (ssi);
}
}
@@ -1335,6 +1266,31 @@ erts_smp_notify_inc_runq(ErtsRunQueue *runq)
smp_notify_inc_runq(runq);
}
+void
+erts_sched_notify_check_cpu_bind(void)
+{
+#ifdef ERTS_SMP
+ int ix;
+ if (erts_common_run_queue) {
+ for (ix = 0; ix < erts_no_schedulers; ix++)
+ erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1);
+ wake_all_schedulers();
+ }
+ else {
+ for (ix = 0; ix < erts_no_run_queues; ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+ erts_smp_runq_lock(rq);
+ rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
+ };
+ }
+#else
+ erts_sched_check_cpu_bind(erts_get_scheduler_data());
+#endif
+}
+
+
#ifdef ERTS_SMP
ErtsRunQueue *
@@ -2379,7 +2335,6 @@ erts_debug_nbalance(void)
void
erts_early_init_scheduling(void)
{
- early_cpu_bind_init();
wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
}
@@ -2656,8 +2611,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
/* init port tasks */
erts_port_task_init();
-
- late_cpu_bind_init();
}
ErtsRunQueue *
@@ -2883,12 +2836,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
long flgs;
int changing;
long no = (long) esdp->no;
- ErtsRunQueue *rq = esdp->run_queue;
ErtsSchedulerSleepInfo *ssi = esdp->ssi;
long active_schedulers;
int curr_online = 1;
int wake = 0;
- int reset_read_group = 0;
#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
|| defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
long aux_work;
@@ -2909,20 +2860,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
erts_smp_runq_unlock(esdp->run_queue);
- /* Unbind from cpu */
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0
- && erts_unbind_from_cpu(erts_cpuinfo) == 0) {
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- reset_read_group = 1;
- }
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (reset_read_group)
- erts_smp_rwmtx_set_reader_group(0);
-
- if (esdp->no <= erts_max_main_threads)
- erts_thr_set_main_status(0, 0);
+ erts_sched_check_cpu_bind_prep_suspend(esdp);
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_inactive);
@@ -3056,17 +2994,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_active);
- if (esdp->no <= erts_max_main_threads)
- erts_thr_set_main_status(1, (int) esdp->no);
-
erts_smp_runq_lock(esdp->run_queue);
non_empty_runq(esdp->run_queue);
- /* Make sure we check if we should bind to a cpu or not... */
- if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
- else
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_sched_check_cpu_bind_post_suspend(esdp);
}
#define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \
@@ -3583,15 +3514,7 @@ sched_thread_func(void *vesdp)
erts_tsd_set(sched_data_key, vesdp);
#ifdef ERTS_SMP
- if (no <= erts_max_main_threads) {
- erts_thr_set_main_status(1, (int) no);
- if (erts_reader_groups) {
- int rg = (int) no;
- if (rg > erts_reader_groups)
- rg = (((int) no) - 1) % erts_reader_groups + 1;
- erts_smp_rwmtx_set_reader_group(rg);
- }
- }
+ erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp);
erts_proc_lock_prepare_proc_lock_waiter();
ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
@@ -3693,1907 +3616,6 @@ erts_start_schedulers(void)
#endif /* ERTS_SMP */
-static int
-int_cmp(const void *vx, const void *vy)
-{
- return *((int *) vx) - *((int *) vy);
-}
-
-static int
-cpu_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- return 0;
-}
-
-static int
-cpu_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_no_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static ERTS_INLINE void
-make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
-{
- int ix;
- int node = -1;
- int processor = -1;
- int processor_node = -1;
- int processor_node_node = -1;
- int core = -1;
- int thread = -1;
- int old_node = -1;
- int old_processor = -1;
- int old_processor_node = -1;
- int old_core = -1;
- int old_thread = -1;
-
- for (ix = 0; ix < size; ix++) {
- if (!no_node || cpudata[ix].node >= 0) {
- if (old_node == cpudata[ix].node)
- cpudata[ix].node = node;
- else {
- old_node = cpudata[ix].node;
- old_processor = processor = -1;
- if (!no_node)
- old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node || cpudata[ix].node >= 0)
- cpudata[ix].node = ++node;
- }
- }
- if (old_processor == cpudata[ix].processor)
- cpudata[ix].processor = processor;
- else {
- old_processor = cpudata[ix].processor;
- if (!no_node)
- processor_node_node = old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- cpudata[ix].processor = ++processor;
- }
- if (no_node && cpudata[ix].processor_node < 0)
- old_processor_node = -1;
- else {
- if (old_processor_node == cpudata[ix].processor_node) {
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = node;
- else {
- if (processor_node_node >= 0)
- cpudata[ix].node = processor_node_node;
- cpudata[ix].processor_node = processor_node;
- }
- }
- else {
- old_processor_node = cpudata[ix].processor_node;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = ++node;
- else {
- cpudata[ix].node = processor_node_node = ++node;
- cpudata[ix].processor_node = ++processor_node;
- }
- }
- }
- if (!no_node && cpudata[ix].processor_node < 0)
- cpudata[ix].processor_node = 0;
- if (old_core == cpudata[ix].core)
- cpudata[ix].core = core;
- else {
- old_core = cpudata[ix].core;
- old_thread = thread = -1;
- cpudata[ix].core = ++core;
- }
- if (old_thread == cpudata[ix].thread)
- cpudata[ix].thread = thread;
- else
- old_thread = cpudata[ix].thread = ++thread;
- }
-}
-
-static void
-cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq)
-{
- if (size > 1) {
- int no_node = 0;
- int (*cmp_func)(const void *, const void *);
- switch (bind_order) {
- case ERTS_CPU_BIND_SPREAD:
- cmp_func = cpu_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_PROCESSOR_SPREAD:
- cmp_func = cpu_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_SPREAD:
- cmp_func = cpu_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_SPREAD:
- cmp_func = cpu_no_spread_order_cmp;
- break;
- default:
- cmp_func = NULL;
- erl_exit(ERTS_ABORT_EXIT,
- "Bad cpu bind type: %d\n",
- (int) cpu_bind_order);
- break;
- }
-
- if (mk_seq)
- make_cpudata_id_seq(cpudata, size, no_node);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
- }
-}
-
-static int
-processor_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static void
-check_cpu_bind(ErtsSchedulerData *esdp)
-{
- int rg = 0;
- int res;
- int cpu_id;
- erts_smp_runq_unlock(esdp->run_queue);
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- cpu_id = scheduler2cpu_map[esdp->no].bind_id;
- if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
- res = erts_bind_to_cpu(erts_cpuinfo, cpu_id);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
- else {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0)
- goto unbind;
- }
- }
- else if (cpu_id < 0) {
- unbind:
- /* Get rid of old binding */
- res = erts_unbind_from_cpu(erts_cpuinfo);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- else if (res != -ENOTSUP) {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- }
- }
- if (erts_reader_groups) {
- if (esdp->cpu_id >= 0)
- rg = reader_group_lookup(esdp->cpu_id);
- else
- rg = (((int) esdp->no) - 1) % erts_reader_groups + 1;
- }
- erts_smp_runq_lock(esdp->run_queue);
-#ifdef ERTS_SMP
- if (erts_common_run_queue)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
- else {
- esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
- }
-#endif
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (erts_reader_groups)
- erts_smp_rwmtx_set_reader_group(rg);
-}
-
-static void
-signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
-{
- int s_ix = 1;
- int cpu_ix;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
-
- cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
-
- for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
- if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical))
- scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
- }
-
- if (s_ix <= erts_no_schedulers)
- for (; s_ix <= erts_no_schedulers; s_ix++)
- scheduler2cpu_map[s_ix].bind_id = -1;
-
-#ifdef ERTS_SMP
- if (erts_common_run_queue) {
- for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1);
- wake_all_schedulers();
- }
- else {
- for (s_ix = 0; s_ix < erts_no_run_queues; s_ix++) {
- ErtsRunQueue *rq = ERTS_RUNQ_IX(s_ix);
- erts_smp_runq_lock(rq);
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
- erts_smp_runq_unlock(rq);
- wake_scheduler(rq, 0, 1);
- };
- }
-#else
- check_cpu_bind(erts_get_scheduler_data());
-#endif
-}
-
-int
-erts_init_scheduler_bind_type(char *how)
-{
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP)
- return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
-
- if (!system_cpudata && !user_cpudata)
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
-
- if (sys_strcmp(how, "db") == 0)
- cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (sys_strcmp(how, "s") == 0)
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (sys_strcmp(how, "ps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "ts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (sys_strcmp(how, "tnnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (sys_strcmp(how, "ns") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (sys_strcmp(how, "u") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
-
- return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
-}
-
-/*
- * reader groups map
- */
-
-typedef struct {
- int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
-} erts_avail_cput;
-
-typedef struct {
- int *map;
- int size;
- int groups;
-} erts_reader_groups_map_test;
-
-typedef struct {
- int id;
- int sub_levels;
- int reader_groups;
-} erts_rg_count_t;
-
-typedef struct {
- int logical;
- int reader_group;
-} erts_reader_groups_map_t;
-
-typedef struct {
- erts_reader_groups_map_t *map;
- int map_size;
- int logical_processors;
- int groups;
-} erts_make_reader_groups_map_test;
-
-static int reader_groups_available_cpu_check;
-static int reader_groups_logical_processors;
-static int reader_groups_map_size;
-static erts_reader_groups_map_t *reader_groups_map;
-
-#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test);
-
-static Eterm
-get_reader_groups_map(Process *c_p,
- erts_reader_groups_map_t *map,
- int map_size,
- int logical_processors)
-{
-#ifdef DEBUG
- Eterm *endp;
-#endif
- Eterm res = NIL, tuple;
- Eterm *hp;
- int i;
-
- hp = HAlloc(c_p, logical_processors*(2+3));
-#ifdef DEBUG
- endp = hp + logical_processors*(2+3);
-#endif
- for (i = map_size - 1; i >= 0; i--) {
- if (map[i].logical >= 0) {
- tuple = TUPLE2(hp,
- make_small(map[i].logical),
- make_small(map[i].reader_group));
- hp += 3;
- res = CONS(hp, tuple, res);
- hp += 2;
- }
- }
- ASSERT(hp == endp);
- return res;
-}
-
-Eterm
-erts_debug_reader_groups_map(Process *c_p, int groups)
-{
- Eterm res;
- erts_make_reader_groups_map_test test;
-
- test.groups = groups;
- make_reader_groups_map(&test);
- if (!test.map)
- res = NIL;
- else {
- res = get_reader_groups_map(c_p,
- test.map,
- test.map_size,
- test.logical_processors);
- erts_free(ERTS_ALC_T_TMP, test.map);
- }
- return res;
-}
-
-
-Eterm
-erts_get_reader_groups_map(Process *c_p)
-{
- Eterm res;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- res = get_reader_groups_map(c_p,
- reader_groups_map,
- reader_groups_map_size,
- reader_groups_logical_processors);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static void
-make_available_cpu_topology(erts_avail_cput *no,
- erts_avail_cput *avail,
- erts_cpu_topology_t *cpudata,
- int *size,
- int test)
-{
- int len = *size;
- erts_cpu_topology_t last;
- int a, i, j;
-
- no->level[ERTS_TOPOLOGY_NODE] = -1;
- no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
- no->level[ERTS_TOPOLOGY_CORE] = -1;
- no->level[ERTS_TOPOLOGY_THREAD] = -1;
- no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
-
- last.node = INT_MIN;
- last.processor = INT_MIN;
- last.processor_node = INT_MIN;
- last.core = INT_MIN;
- last.thread = INT_MIN;
- last.logical = INT_MIN;
-
- a = 0;
-
- for (i = 0; i < len; i++) {
-
- if (!test && !erts_is_cpu_available(erts_cpuinfo, cpudata[i].logical))
- continue;
-
- if (last.node != cpudata[i].node)
- goto node;
- if (last.processor != cpudata[i].processor)
- goto processor;
- if (last.processor_node != cpudata[i].processor_node)
- goto processor_node;
- if (last.core != cpudata[i].core)
- goto core;
- ASSERT(last.thread != cpudata[i].thread);
- goto thread;
-
- node:
- no->level[ERTS_TOPOLOGY_NODE]++;
- processor:
- no->level[ERTS_TOPOLOGY_PROCESSOR]++;
- processor_node:
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
- core:
- no->level[ERTS_TOPOLOGY_CORE]++;
- thread:
- no->level[ERTS_TOPOLOGY_THREAD]++;
-
- no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
- for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
- avail[a].level[j] = no->level[j];
-
- avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
- avail[a].level[ERTS_TOPOLOGY_RG] = 0;
-
- ASSERT(last.logical != cpudata[a].logical);
-
- last = cpudata[i];
- a++;
- }
-
- no->level[ERTS_TOPOLOGY_NODE]++;
- no->level[ERTS_TOPOLOGY_PROCESSOR]++;
- no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
- no->level[ERTS_TOPOLOGY_CORE]++;
- no->level[ERTS_TOPOLOGY_THREAD]++;
- no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
- *size = a;
-}
-
-static int
-reader_group_lookup(int logical)
-{
- int start = logical % reader_groups_map_size;
- int ix = start;
-
- do {
- if (reader_groups_map[ix].logical == logical) {
- ASSERT(reader_groups_map[ix].reader_group > 0);
- return reader_groups_map[ix].reader_group;
- }
- ix++;
- if (ix == reader_groups_map_size)
- ix = 0;
- } while (ix != start);
-
- erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
-}
-
-static void
-reader_group_insert(erts_reader_groups_map_t *map, int map_size,
- int logical, int reader_group)
-{
- int start = logical % map_size;
- int ix = start;
-
- do {
- if (map[ix].logical < 0) {
- map[ix].logical = logical;
- map[ix].reader_group = reader_group;
- return;
- }
- ix++;
- if (ix == map_size)
- ix = 0;
- } while (ix != start);
-
- erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
-}
-
-
-static int
-sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
-{
- int sub_level = level+1;
- int last = -1;
- rgc->sub_levels = 0;
-
- do {
- if (last != avail[aix].level[sub_level]) {
- rgc->sub_levels++;
- last = avail[aix].level[sub_level];
- }
- aix++;
- }
- while (aix < avail_sz && rgc->id == avail[aix].level[level]);
- rgc->reader_groups = 0;
- return aix;
-}
-
-static int
-write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
- int level, int a,
- int avail_sz, erts_avail_cput *avail)
-{
- int rg = *rgp;
- int sub_level = level+1;
- int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
- int xsl = rgcp->sub_levels % rgcp->reader_groups;
- int sls = 0;
- int last = -1;
- int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
-
- ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
-
- do {
- if (last != avail[a].level[sub_level]) {
- if (!sls) {
- sls = sl_per_gr;
- rg++;
- if (rg >= xsl_rg_lim)
- sls++;
- }
- last = avail[a].level[sub_level];
- sls--;
- }
- avail[a].level[ERTS_TOPOLOGY_RG] = rg;
- a++;
- } while (a < avail_sz && (level < 0
- || avail[a].level[level] == rgcp->id));
-
- ASSERT(rgcp->reader_groups == rg - *rgp);
-
- *rgp = rg;
-
- return a;
-}
-
-static int
-rg_count_sub_levels_compare(const void *vx, const void *vy)
-{
- erts_rg_count_t *x = (erts_rg_count_t *) vx;
- erts_rg_count_t *y = (erts_rg_count_t *) vy;
- if (x->sub_levels != y->sub_levels)
- return y->sub_levels - x->sub_levels;
- return x->id - y->id;
-}
-
-static int
-rg_count_id_compare(const void *vx, const void *vy)
-{
- erts_rg_count_t *x = (erts_rg_count_t *) vx;
- erts_rg_count_t *y = (erts_rg_count_t *) vy;
- return x->id - y->id;
-}
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test)
-{
- int i, spread_level, avail_sz;
- erts_avail_cput no, *avail;
- erts_cpu_topology_t *cpudata;
- erts_reader_groups_map_t *map;
- int map_sz;
- int groups = erts_reader_groups;
-
- if (test) {
- test->map = NULL;
- test->map_size = 0;
- groups = test->groups;
- }
-
- if (!groups)
- return;
-
- if (!test) {
- if (reader_groups_map)
- erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
-
- reader_groups_logical_processors = 0;
- reader_groups_map_size = 0;
- reader_groups_map = NULL;
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
-
- if (!cpudata)
- return;
-
- cpu_bind_order_sort(cpudata,
- avail_sz,
- ERTS_CPU_BIND_NO_SPREAD,
- 1);
-
- avail = erts_alloc(ERTS_ALC_T_TMP,
- sizeof(erts_avail_cput)*avail_sz);
-
- make_available_cpu_topology(&no, avail, cpudata,
- &avail_sz, test != NULL);
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- map_sz = avail_sz*2+1;
-
- if (test) {
- map = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_reader_groups_map_t)
- * map_sz));
- test->map = map;
- test->map_size = map_sz;
- test->logical_processors = avail_sz;
- }
- else {
- map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
- (sizeof(erts_reader_groups_map_t)
- * map_sz));
- reader_groups_map = map;
- reader_groups_logical_processors = avail_sz;
- reader_groups_map_size = map_sz;
-
- }
-
- for (i = 0; i < map_sz; i++) {
- map[i].logical = -1;
- map[i].reader_group = 0;
- }
-
- spread_level = ERTS_TOPOLOGY_CORE;
- for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
- if (no.level[i] > groups) {
- spread_level = i;
- break;
- }
- }
-
- if (no.level[spread_level] <= groups) {
- int a, rg, last = -1;
- rg = 0;
- ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
- for (a = 0; a < avail_sz; a++) {
- if (last != avail[a].level[spread_level]) {
- rg++;
- last = avail[a].level[spread_level];
- }
- reader_group_insert(map,
- map_sz,
- avail[a].level[ERTS_TOPOLOGY_LOGICAL],
- rg);
- }
- }
- else { /* groups < no.level[spread_level] */
- erts_rg_count_t *rg_count;
- int a, rg, tl, toplevels;
-
- tl = spread_level-1;
-
- if (spread_level == ERTS_TOPOLOGY_NODE)
- toplevels = 1;
- else
- toplevels = no.level[tl];
-
- rg_count = erts_alloc(ERTS_ALC_T_TMP,
- toplevels*sizeof(erts_rg_count_t));
-
- if (toplevels == 1) {
- rg_count[0].id = 0;
- rg_count[0].sub_levels = no.level[spread_level];
- rg_count[0].reader_groups = groups;
- }
- else {
- int rgs_per_tl, rgs;
- rgs = groups;
- rgs_per_tl = rgs / toplevels;
-
- a = 0;
- for (i = 0; i < toplevels; i++) {
- rg_count[i].id = avail[a].level[tl];
- a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
- }
-
- qsort(rg_count,
- toplevels,
- sizeof(erts_rg_count_t),
- rg_count_sub_levels_compare);
-
- for (i = 0; i < toplevels; i++) {
- if (rg_count[i].sub_levels < rgs_per_tl) {
- rg_count[i].reader_groups = rg_count[i].sub_levels;
- rgs -= rg_count[i].sub_levels;
- }
- else {
- rg_count[i].reader_groups = rgs_per_tl;
- rgs -= rgs_per_tl;
- }
- }
-
- while (rgs > 0) {
- for (i = 0; i < toplevels; i++) {
- if (rg_count[i].sub_levels == rg_count[i].reader_groups)
- break;
- else {
- rg_count[i].reader_groups++;
- if (--rgs == 0)
- break;
- }
- }
- }
-
- qsort(rg_count,
- toplevels,
- sizeof(erts_rg_count_t),
- rg_count_id_compare);
- }
-
- a = i = rg = 0;
- while (a < avail_sz) {
- a = write_reader_groups(&rg, &rg_count[i], tl,
- a, avail_sz, avail);
- i++;
- }
-
- ASSERT(groups == rg);
-
- for (a = 0; a < avail_sz; a++)
- reader_group_insert(map,
- map_sz,
- avail[a].level[ERTS_TOPOLOGY_LOGICAL],
- avail[a].level[ERTS_TOPOLOGY_RG]);
-
- erts_free(ERTS_ALC_T_TMP, rg_count);
- }
-
- erts_free(ERTS_ALC_T_TMP, avail);
-}
-
-/*
- * CPU topology
- */
-
-typedef struct {
- int *id;
- int used;
- int size;
-} ErtsCpuTopIdSeq;
-
-typedef struct {
- ErtsCpuTopIdSeq logical;
- ErtsCpuTopIdSeq thread;
- ErtsCpuTopIdSeq core;
- ErtsCpuTopIdSeq processor_node;
- ErtsCpuTopIdSeq processor;
- ErtsCpuTopIdSeq node;
-} ErtsCpuTopEntry;
-
-static void
-init_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- int size = 10;
- cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->logical.size = size;
- cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->thread.size = size;
- cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->core.size = size;
- cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor_node.size = size;
- cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor.size = size;
- cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->node.size = size;
-}
-
-static void
-destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
-}
-
-static int
-get_cput_value_or_range(int *v, int *vr, char **str)
-{
- long l;
- char *c = *str;
- errno = 0;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- *v = (int) l;
- if (*c == '-') {
- c++;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- *vr = (int) l;
- }
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
-{
- int ix = 0;
- int need_size = 0;
- char *c = *str;
-
- while (1) {
- int res;
- int val;
- int nids;
- int val_range = -1;
- res = get_cput_value_or_range(&val, &val_range, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- if (val_range < 0 || val_range == val)
- nids = 1;
- else {
- if (val_range > val)
- nids = val_range - val + 1;
- else
- nids = val - val_range + 1;
- }
- need_size += nids;
- if (need_size > idseq->size) {
- idseq->size = need_size + 10;
- idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
- idseq->id,
- sizeof(int)*idseq->size);
- }
- if (nids == 1)
- idseq->id[ix++] = val;
- else if (val_range > val) {
- for (; val <= val_range; val++)
- idseq->id[ix++] = val;
- }
- else {
- for (; val >= val_range; val--)
- idseq->id[ix++] = val;
- }
- if (*c != ',')
- break;
- c++;
- }
- *str = c;
- idseq->used = ix;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_entry(ErtsCpuTopEntry *cput, char **str)
-{
- int h;
- char *c = *str;
-
- cput->logical.used = 0;
- cput->thread.id[0] = 0;
- cput->thread.used = 1;
- cput->core.id[0] = 0;
- cput->core.used = 1;
- cput->processor_node.id[0] = -1;
- cput->processor_node.used = 1;
- cput->processor.id[0] = 0;
- cput->processor.used = 1;
- cput->node.id[0] = -1;
- cput->node.used = 1;
-
- h = ERTS_TOPOLOGY_MAX_DEPTH;
- while (*c != ':' && *c != '\0') {
- int res;
- ErtsCpuTopIdSeq *idseqp;
- switch (*c++) {
- case 'L':
- if (h <= ERTS_TOPOLOGY_LOGICAL)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->logical;
- h = ERTS_TOPOLOGY_LOGICAL;
- break;
- case 't':
- case 'T':
- if (h <= ERTS_TOPOLOGY_THREAD)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->thread;
- h = ERTS_TOPOLOGY_THREAD;
- break;
- case 'c':
- case 'C':
- if (h <= ERTS_TOPOLOGY_CORE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->core;
- h = ERTS_TOPOLOGY_CORE;
- break;
- case 'p':
- case 'P':
- if (h <= ERTS_TOPOLOGY_PROCESSOR)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor;
- h = ERTS_TOPOLOGY_PROCESSOR;
- break;
- case 'n':
- case 'N':
- if (h <= ERTS_TOPOLOGY_PROCESSOR) {
- do_node:
- if (h <= ERTS_TOPOLOGY_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->node;
- h = ERTS_TOPOLOGY_NODE;
- }
- else {
- int p_node = 0;
- char *p_chk = c;
- while (*p_chk != '\0' && *p_chk != ':') {
- if (*p_chk == 'p' || *p_chk == 'P') {
- p_node = 1;
- break;
- }
- p_chk++;
- }
- if (!p_node)
- goto do_node;
- if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor_node;
- h = ERTS_TOPOLOGY_PROCESSOR_NODE;
- }
- break;
- default:
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
- }
- res = get_cput_id_seq(idseqp, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- }
-
- if (cput->logical.used < 1)
- return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
-
- if (*c == ':') {
- c++;
- }
-
- if (cput->thread.used != 1
- && cput->thread.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->core.used != 1
- && cput->core.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor_node.used != 1
- && cput->processor_node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor.used != 1
- && cput->processor.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->node.used != 1
- && cput->node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-verify_topology(erts_cpu_topology_t *cpudata, int size)
-{
- if (size > 0) {
- int *logical;
- int node, processor, no_nodes, i;
-
- /* Verify logical ids */
- logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
-
- for (i = 0; i < size; i++)
- logical[i] = cpudata[i].logical;
-
- qsort(logical, size, sizeof(int), int_cmp);
- for (i = 0; i < size-1; i++) {
- if (logical[i] == logical[i+1]) {
- erts_free(ERTS_ALC_T_TMP, logical);
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
- }
- }
-
- erts_free(ERTS_ALC_T_TMP, logical);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
-
- /* Verify unique entities */
-
- for (i = 1; i < size; i++) {
- if (cpudata[i-1].processor == cpudata[i].processor
- && cpudata[i-1].node == cpudata[i].node
- && (cpudata[i-1].processor_node
- == cpudata[i].processor_node)
- && cpudata[i-1].core == cpudata[i].core
- && cpudata[i-1].thread == cpudata[i].thread) {
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
- }
- }
-
- /* Verify numa nodes */
- node = cpudata[0].node;
- processor = cpudata[0].processor;
- no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
- for (i = 1; i < size; i++) {
- if (no_nodes) {
- if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- else {
- if (cpudata[i].processor == processor && cpudata[i].node != node)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- node = cpudata[i].node;
- processor = cpudata[i].processor;
- if (node >= 0 && cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- if (node < 0 && cpudata[i].processor_node < 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- }
- }
-
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-int
-erts_init_cpu_topology(char *topology_str)
-{
- ErtsCpuTopEntry cput;
- int need_size;
- char *c;
- int ix;
- int error = ERTS_INIT_CPU_TOPOLOGY_OK;
-
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 10;
-
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
-
- init_cpu_top_entry(&cput);
-
- ix = 0;
- need_size = 0;
-
- c = topology_str;
- if (*c == '\0') {
- error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
- goto fail;
- }
- do {
- int r;
- error = get_cput_entry(&cput, &c);
- if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
- goto fail;
- need_size += cput.logical.used;
- if (user_cpudata_size < need_size) {
- user_cpudata_size = need_size + 10;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- ASSERT(cput.thread.used == 1
- || cput.thread.used == cput.logical.used);
- ASSERT(cput.core.used == 1
- || cput.core.used == cput.logical.used);
- ASSERT(cput.processor_node.used == 1
- || cput.processor_node.used == cput.logical.used);
- ASSERT(cput.processor.used == 1
- || cput.processor.used == cput.logical.used);
- ASSERT(cput.node.used == 1
- || cput.node.used == cput.logical.used);
-
- for (r = 0; r < cput.logical.used; r++) {
- user_cpudata[ix].logical = cput.logical.id[r];
- user_cpudata[ix].thread =
- cput.thread.id[cput.thread.used == 1 ? 0 : r];
- user_cpudata[ix].core =
- cput.core.id[cput.core.used == 1 ? 0 : r];
- user_cpudata[ix].processor_node =
- cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
- user_cpudata[ix].processor =
- cput.processor.id[cput.processor.used == 1 ? 0 : r];
- user_cpudata[ix].node =
- cput.node.id[cput.node.used == 1 ? 0 : r];
- ix++;
- }
- } while (*c != '\0');
-
- if (user_cpudata_size != ix) {
- user_cpudata_size = ix;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- error = verify_topology(user_cpudata, user_cpudata_size);
- if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
- destroy_cpu_top_entry(&cput);
- return ERTS_INIT_CPU_TOPOLOGY_OK;
- }
-
- fail:
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 0;
- destroy_cpu_top_entry(&cput);
- return error;
-}
-
-#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
-#define ERTS_GET_USED_CPU_TOPOLOGY 0
-#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
-#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
-
-static Eterm get_cpu_topology_term(Process *c_p, int type);
-
-Eterm
-erts_set_cpu_topology(Process *c_p, Eterm term)
-{
- erts_cpu_topology_t *cpudata = NULL;
- int cpudata_size = 0;
- Eterm res;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
- if (term == am_undefined) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
- cpudata_size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
- }
- else if (is_not_list(term)) {
- error:
- res = THE_NON_VALUE;
- goto done;
- }
- else {
- Eterm list = term;
- int ix = 0;
-
- cpudata_size = 100;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- while (is_list(list)) {
- Eterm *lp = list_val(list);
- Eterm cpu = CAR(lp);
- Eterm* tp;
- Sint id;
-
- if (is_not_tuple(cpu))
- goto error;
-
- tp = tuple_val(cpu);
-
- if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
- goto error;
-
- if (ix >= cpudata_size) {
- cpudata_size += 100;
- cpudata = erts_realloc(ERTS_ALC_T_TMP,
- cpudata,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
- }
-
- id = signed_val(tp[2]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].node = (int) id;
-
- id = signed_val(tp[3]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor = (int) id;
-
- id = signed_val(tp[4]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor_node = (int) id;
-
- id = signed_val(tp[5]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].core = (int) id;
-
- id = signed_val(tp[6]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].thread = (int) id;
-
- id = signed_val(tp[7]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].logical = (int) id;
-
- list = CDR(lp);
- ix++;
- }
-
- if (is_not_nil(list))
- goto error;
-
- cpudata_size = ix;
-
- if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
- goto error;
-
- if (user_cpudata_size != cpudata_size) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- user_cpudata_size = cpudata_size;
- }
-
- sys_memcpy((void *) user_cpudata,
- (void *) cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
-
- make_reader_groups_map(NULL);
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- done:
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-static Eterm
-bound_schedulers_term(ErtsCpuBindOrder order)
-{
- switch (order) {
- case ERTS_CPU_BIND_SPREAD: {
- ERTS_DECL_AM(spread);
- return AM_spread;
- }
- case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(processor_spread);
- return AM_processor_spread;
- }
- case ERTS_CPU_BIND_THREAD_SPREAD: {
- ERTS_DECL_AM(thread_spread);
- return AM_thread_spread;
- }
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(thread_no_node_processor_spread);
- return AM_thread_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(no_node_processor_spread);
- return AM_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
- ERTS_DECL_AM(no_node_thread_spread);
- return AM_no_node_thread_spread;
- }
- case ERTS_CPU_BIND_NO_SPREAD: {
- ERTS_DECL_AM(no_spread);
- return AM_no_spread;
- }
- case ERTS_CPU_BIND_NONE: {
- ERTS_DECL_AM(unbound);
- return AM_unbound;
- }
- default:
- ASSERT(0);
- return THE_NON_VALUE;
- }
-}
-
-Eterm
-erts_bound_schedulers_term(Process *c_p)
-{
- ErtsCpuBindOrder order;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- order = cpu_bind_order;
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return bound_schedulers_term(order);
-}
-
-static void
-create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
-{
- if (user_cpudata) {
- *cpudata_size = user_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) user_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else if (system_cpudata) {
- *cpudata_size = system_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else {
- *cpudata = NULL;
- *cpudata_size = 0;
- }
-}
-
-static void
-destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
-{
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-}
-
-Eterm
-erts_bind_schedulers(Process *c_p, Eterm how)
-{
- Eterm res;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- ErtsCpuBindOrder old_cpu_bind_order;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) {
- ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
- }
- else {
-
- old_cpu_bind_order = cpu_bind_order;
-
- if (ERTS_IS_ATOM_STR("default_bind", how))
- cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (ERTS_IS_ATOM_STR("spread", how))
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-
- if (!cpudata) {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- res = bound_schedulers_term(old_cpu_bind_order);
- }
-
- done:
-
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- return res;
-}
-
-Eterm
-erts_fake_scheduler_bindings(Process *p, Eterm how)
-{
- ErtsCpuBindOrder fake_cpu_bind_order;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- Eterm res;
-
- if (ERTS_IS_ATOM_STR("default_bind", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
- else if (ERTS_IS_ATOM_STR("spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- ERTS_BIF_PREP_ERROR(res, p, BADARG);
- return res;
- }
-
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-
- if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
- ERTS_BIF_PREP_RET(res, am_false);
- else {
- int i;
- Eterm *hp;
-
- cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
-
-#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
-
- erts_fprintf(stderr, "node: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].node);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "processor: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor);
- erts_fprintf(stderr, "\n");
- if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
- erts_fprintf(stderr, "processor_node:");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
- erts_fprintf(stderr, "\n");
- }
- erts_fprintf(stderr, "core: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].core);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "thread: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].thread);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "logical: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].logical);
- erts_fprintf(stderr, "\n");
-#endif
-
- hp = HAlloc(p, cpudata_size+1);
- ERTS_BIF_PREP_RET(res, make_tuple(hp));
- *hp++ = make_arityval((Uint) cpudata_size);
- for (i = 0; i < cpudata_size; i++)
- *hp++ = make_small((Uint) cpudata[i].logical);
- }
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_schedulers_binds(Process *c_p)
-{
- int ix;
- ERTS_DECL_AM(unbound);
- Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
- Eterm res = make_tuple(hp);
-
- *(hp++) = make_arityval(erts_no_schedulers);
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- for (ix = 1; ix <= erts_no_schedulers; ix++)
- *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
- ? make_small(scheduler2cpu_map[ix].bound_id)
- : AM_unbound);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static Eterm
-bld_topology_term(Eterm **hpp,
- Uint *hszp,
- erts_cpu_topology_t *cpudata,
- int size)
-{
- Eterm res = NIL;
- int i;
-
- if (size == 0)
- return am_undefined;
-
- for (i = size-1; i >= 0; i--) {
- res = erts_bld_cons(hpp,
- hszp,
- erts_bld_tuple(hpp,
- hszp,
- 7,
- am_cpu,
- make_small(cpudata[i].node),
- make_small(cpudata[i].processor),
- make_small(cpudata[i].processor_node),
- make_small(cpudata[i].core),
- make_small(cpudata[i].thread),
- make_small(cpudata[i].logical)),
- res);
- }
- return res;
-}
-
-static Eterm
-get_cpu_topology_term(Process *c_p, int type)
-{
-#ifdef DEBUG
- Eterm *hp_end;
-#endif
- Eterm *hp;
- Uint hsz;
- Eterm res = THE_NON_VALUE;
- erts_cpu_topology_t *cpudata = NULL;
- int size = 0;
-
- switch (type) {
- case ERTS_GET_USED_CPU_TOPOLOGY:
- if (user_cpudata)
- goto defined;
- else
- goto detected;
- case ERTS_GET_DETECTED_CPU_TOPOLOGY:
- detected:
- if (!system_cpudata)
- res = am_undefined;
- else {
- size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * size));
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*size);
- }
- break;
- case ERTS_GET_DEFINED_CPU_TOPOLOGY:
- defined:
- if (!user_cpudata)
- res = am_undefined;
- else {
- size = user_cpudata_size;
- cpudata = user_cpudata;
- }
- break;
- default:
- erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
- break;
- }
-
- if (res == am_undefined) {
- ASSERT(!cpudata);
- return res;
- }
-
- hsz = 0;
-
- bld_topology_term(NULL, &hsz,
- cpudata, size);
-
- hp = HAlloc(c_p, hsz);
-
-#ifdef DEBUG
- hp_end = hp + hsz;
-#endif
-
- res = bld_topology_term(&hp, NULL,
- cpudata, size);
-
- ASSERT(hp_end == hp);
-
- if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_cpu_topology_term(Process *c_p, Eterm which)
-{
- Eterm res;
- int type;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- if (ERTS_IS_ATOM_STR("used", which))
- type = ERTS_GET_USED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("detected", which))
- type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("defined", which))
- type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
- else
- type = ERTS_GET_CPU_TOPOLOGY_ERROR;
- if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
- res = THE_NON_VALUE;
- else
- res = get_cpu_topology_term(c_p, type);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static void
-early_cpu_bind_init(void)
-{
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
- system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * system_cpudata_size));
-
- cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
-
- reader_groups_available_cpu_check = 1;
- reader_groups_logical_processors = 0;
- reader_groups_map_size = 0;
- reader_groups_map = NULL;
-
- if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
- || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
- system_cpudata_size)) {
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
- system_cpudata = NULL;
- system_cpudata_size = 0;
- }
-}
-
-static void
-late_cpu_bind_init(void)
-{
- int ix;
-
- erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind");
-
- scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(ErtsCpuBindData)
- * (erts_no_schedulers+1)));
- for (ix = 1; ix <= erts_no_schedulers; ix++) {
- scheduler2cpu_map[ix].bind_id = -1;
- scheduler2cpu_map[ix].bound_id = -1;
- }
-
- if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
- int ncpus = erts_get_cpu_configured(erts_cpuinfo);
- if (ncpus < 1 || erts_no_schedulers < ncpus)
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else
- cpu_bind_order = ((system_cpudata || user_cpudata)
- && (erts_bind_to_cpu(erts_cpuinfo, -1) != -ENOTSUP)
- ? ERTS_CPU_BIND_DEFAULT_BIND
- : ERTS_CPU_BIND_NONE);
- }
-
- make_reader_groups_map(NULL);
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- signal_schedulers_bind_change(cpudata, cpudata_size);
- destroy_tmp_cpu_topology_copy(cpudata);
- }
-}
-
-int
-erts_update_cpu_info(void)
-{
- int changed;
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- changed = erts_cpu_info_update(erts_cpuinfo);
- if (changed) {
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
-
- if (system_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
-
- system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
- if (!system_cpudata_size)
- system_cpudata = NULL;
- else {
- system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * system_cpudata_size));
-
- if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
- || (ERTS_INIT_CPU_TOPOLOGY_OK
- != verify_topology(system_cpudata,
- system_cpudata_size))) {
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
- system_cpudata = NULL;
- system_cpudata_size = 0;
- }
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- signal_schedulers_bind_change(cpudata, cpudata_size);
- destroy_tmp_cpu_topology_copy(cpudata);
- }
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
- return changed;
-}
-
#ifdef ERTS_SMP
static void
@@ -7069,7 +5091,7 @@ Process *schedule(Process *p, int calls)
}
if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND)
|| erts_smp_atomic_read(&esdp->chk_cpu_bind)) {
- check_cpu_bind(esdp);
+ erts_sched_check_cpu_bind(esdp);
}
}
@@ -7165,7 +5187,9 @@ Process *schedule(Process *p, int calls)
erts_smp_atomic_set(&function_calls, 0);
fcalls = 0;
+
ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
#ifdef ERTS_SMP
/* erts_sys_schedule_interrupt(0); */
#endif
@@ -7497,6 +5521,15 @@ erts_schedule_misc_op(void (*func)(void *), void *arg)
ErtsRunQueue *rq = erts_get_runq_current(NULL);
ErtsMiscOpList *molp = misc_op_list_alloc();
+ if (!rq) {
+ /*
+ * This can only happen when the sys msg dispatcher
+ * thread schedules misc ops (this happens *very*
+ * seldom; only when trace drivers are unloaded).
+ */
+ rq = ERTS_RUNQ_IX(0);
+ }
+
erts_smp_runq_lock(rq);
while (rq->misc.evac_runq) {
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 4365e409e5..c038e57b65 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -89,7 +89,6 @@ extern int erts_sched_thread_suggested_stack_size;
#define ERTS_SCHED_THREAD_MAX_STACK_SIZE 8192 /* Kilo words */
#ifdef ERTS_SMP
-extern Uint erts_max_main_threads;
#include "erl_bits.h"
#endif
@@ -426,6 +425,13 @@ struct ErtsSchedulerData_ {
#endif
};
+typedef union {
+ ErtsSchedulerData esd;
+ char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
+} ErtsAlignedSchedulerData;
+
+extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+
#ifndef ERTS_SMP
extern ErtsSchedulerData *erts_scheduler_data;
#endif
@@ -1007,27 +1013,12 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags;
(p)->flags &= ~F_TIMO; \
} while (0)
-
-#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS 0
-#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED 1
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY 2
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE 3
-
-int erts_init_scheduler_bind_type(char *how);
-
-#define ERTS_INIT_CPU_TOPOLOGY_OK 0
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID 1
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE 2
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY 3
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE 4
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES 5
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID 6
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS 7
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES 8
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING 9
-
-int erts_init_cpu_topology(char *topology_str);
-int erts_update_cpu_info(void);
+#define ERTS_RUNQ_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \
+ &erts_aligned_run_queues[(IX)].runq)
+#define ERTS_SCHEDULER_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
+ &erts_aligned_scheduler_data[(IX)].esd)
void erts_pre_init_process(void);
void erts_late_init_process(void);
@@ -1058,8 +1049,9 @@ Eterm erts_multi_scheduling_blockers(Process *);
void erts_start_schedulers(void);
void erts_smp_notify_check_children_needed(void);
#endif
+void erts_sched_notify_check_cpu_bind(void);
Uint erts_active_schedulers(void);
-void erts_init_process(void);
+void erts_init_process(int);
Eterm erts_process_status(Process *, ErtsProcLocks, Process *, Eterm);
Uint erts_run_queues_len(Uint *);
void erts_add_to_runq(Process *);
diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c
index a4d12139e9..1bebcdb911 100644
--- a/erts/emulator/beam/erl_process_lock.c
+++ b/erts/emulator/beam/erl_process_lock.c
@@ -117,10 +117,9 @@ static int aux_thr_proc_lock_spin_count;
static void cleanup_tse(void);
void
-erts_init_proc_lock(void)
+erts_init_proc_lock(int cpus)
{
int i;
- int cpus;
erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc");
for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) {
#ifdef ERTS_ENABLE_LOCK_COUNT
@@ -138,7 +137,6 @@ erts_init_proc_lock(void)
lc_id.proc_lock_msgq = erts_lc_get_lock_order_id("proc_msgq");
lc_id.proc_lock_status = erts_lc_get_lock_order_id("proc_status");
#endif
- cpus = erts_get_cpu_configured(erts_cpuinfo);
if (cpus > 1) {
proc_lock_spin_count = ERTS_PROC_LOCK_SPIN_COUNT_BASE;
proc_lock_spin_count += (ERTS_PROC_LOCK_SPIN_COUNT_SCHED_INC
diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h
index 7cfc9893fa..4fe30c7209 100644
--- a/erts/emulator/beam/erl_process_lock.h
+++ b/erts/emulator/beam/erl_process_lock.h
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2007-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2007-2010. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -334,7 +334,7 @@ erts_proc_lock_flags_cmpxchg(erts_proc_lock_t *lck, ErtsProcLocks new,
extern erts_pix_lock_t erts_pix_locks[ERTS_NO_OF_PIX_LOCKS];
-void erts_init_proc_lock(void);
+void erts_init_proc_lock(int cpus);
void erts_proc_lock_prepare_proc_lock_waiter(void);
void erts_proc_lock_failed(Process *,
erts_pix_lock_t *,
diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h
index 0b7269262e..a74cf79b8c 100644
--- a/erts/emulator/beam/erl_threads.h
+++ b/erts/emulator/beam/erl_threads.h
@@ -27,9 +27,6 @@
#define ERTS_SPIN_BODY ETHR_SPIN_BODY
-#define ERTS_MAX_READER_GROUPS 8
-extern int erts_reader_groups;
-
#include "sys.h"
#ifdef USE_THREADS
diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h
index cd63401581..48fa99934e 100644
--- a/erts/emulator/beam/erl_vm.h
+++ b/erts/emulator/beam/erl_vm.h
@@ -199,6 +199,7 @@ extern int BIN_VH_MIN_SIZE; /* minimum virtual (bin) heap */
extern int erts_atom_table_size;/* Atom table size */
#define ORIG_CREATION 0
+#define INTERNAL_CREATION 255
/* macros for extracting bytes from uint16's */
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index d7c8aa84e9..328aa2be6a 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -49,10 +49,8 @@
#define in_area(ptr,start,nbytes) ((Uint)((char*)(ptr) - (char*)(start)) < (nbytes))
#define MAX_STRING_LEN 0xffff
-#define dec_set_creation(nodename,creat) \
- (((nodename) == erts_this_node->sysname && (creat) == ORIG_CREATION) \
- ? erts_this_node->creation \
- : (creat))
+
+#define is_valid_creation(Cre) ((unsigned)(Cre) < MAX_CREATION || (Cre) == INTERNAL_CREATION)
#undef ERTS_DEBUG_USE_DIST_SEP
#ifdef DEBUG
@@ -83,14 +81,14 @@
*
*/
-static byte* enc_term(ErtsAtomCacheMap *, Eterm, byte*, Uint32);
+static byte* enc_term(ErtsAtomCacheMap *, Eterm, byte*, Uint32, struct erl_off_heap_header** off_heap);
static Uint is_external_string(Eterm obj, int* p_is_string);
static byte* enc_atom(ErtsAtomCacheMap *, Eterm, byte*, Uint32);
static byte* enc_pid(ErtsAtomCacheMap *, Eterm, byte*, Uint32);
static byte* dec_term(ErtsDistExternal *, Eterm**, byte*, ErlOffHeap*, Eterm*);
static byte* dec_atom(ErtsDistExternal *, byte*, Eterm*);
static byte* dec_pid(ErtsDistExternal *, Eterm**, byte*, ErlOffHeap*, Eterm*);
-static Sint decoded_size(byte *ep, byte* endp, int only_heap_bins);
+static Sint decoded_size(byte *ep, byte* endp, int only_heap_bins, int internal_tags);
static Uint encode_size_struct2(ErtsAtomCacheMap *, Eterm, unsigned);
@@ -461,6 +459,12 @@ Uint erts_encode_ext_size(Eterm term)
+ 1 /* VERSION_MAGIC */;
}
+Uint erts_encode_ext_size_ets(Eterm term)
+{
+ return encode_size_struct2(NULL, term, TERM_TO_BINARY_DFLAGS|DFLAGS_INTERNAL_TAGS);
+}
+
+
void erts_encode_dist_ext(Eterm term, byte **ext, Uint32 flags, ErtsAtomCacheMap *acmp)
{
byte *ep = *ext;
@@ -468,7 +472,7 @@ void erts_encode_dist_ext(Eterm term, byte **ext, Uint32 flags, ErtsAtomCacheMap
if (!(flags & DFLAG_DIST_HDR_ATOM_CACHE))
#endif
*ep++ = VERSION_MAGIC;
- ep = enc_term(acmp, term, ep, flags);
+ ep = enc_term(acmp, term, ep, flags, NULL);
if (!ep)
erl_exit(ERTS_ABORT_EXIT,
"%s:%d:erts_encode_dist_ext(): Internal data structure error\n",
@@ -480,7 +484,7 @@ void erts_encode_ext(Eterm term, byte **ext)
{
byte *ep = *ext;
*ep++ = VERSION_MAGIC;
- ep = enc_term(NULL, term, ep, TERM_TO_BINARY_DFLAGS);
+ ep = enc_term(NULL, term, ep, TERM_TO_BINARY_DFLAGS, NULL);
if (!ep)
erl_exit(ERTS_ABORT_EXIT,
"%s:%d:erts_encode_ext(): Internal data structure error\n",
@@ -488,6 +492,12 @@ void erts_encode_ext(Eterm term, byte **ext)
*ext = ep;
}
+byte* erts_encode_ext_ets(Eterm term, byte *ep, struct erl_off_heap_header** off_heap)
+{
+ return enc_term(NULL, term, ep, TERM_TO_BINARY_DFLAGS|DFLAGS_INTERNAL_TAGS,
+ off_heap);
+}
+
ErtsDistExternal *
erts_make_dist_ext_copy(ErtsDistExternal *edep, Uint xsize)
{
@@ -813,7 +823,7 @@ erts_decode_dist_ext_size(ErtsDistExternal *edep, int no_refc_bins)
goto fail;
ep = edep->extp+1;
}
- res = decoded_size(ep, edep->ext_endp, no_refc_bins);
+ res = decoded_size(ep, edep->ext_endp, no_refc_bins, 0);
if (res >= 0)
return res;
fail:
@@ -825,9 +835,17 @@ Sint erts_decode_ext_size(byte *ext, Uint size, int no_refc_bins)
{
if (size == 0 || *ext != VERSION_MAGIC)
return -1;
- return decoded_size(ext+1, ext+size, no_refc_bins);
+ return decoded_size(ext+1, ext+size, no_refc_bins, 0);
}
+Sint erts_decode_ext_size_ets(byte *ext, Uint size)
+{
+ Sint sz = decoded_size(ext, ext+size, 0, 1);
+ ASSERT(sz >= 0);
+ return sz;
+}
+
+
/*
** hpp is set to either a &p->htop or
** a pointer to a memory pointer (form message buffers)
@@ -887,7 +905,13 @@ Eterm erts_decode_ext(Eterm **hpp, ErlOffHeap *off_heap, byte **ext)
return obj;
}
-
+Eterm erts_decode_ext_ets(Eterm **hpp, ErlOffHeap *off_heap, byte *ext)
+{
+ Eterm obj;
+ ext = dec_term(NULL, hpp, ext, off_heap, &obj);
+ ASSERT(ext);
+ return obj;
+}
/**********************************************************************/
@@ -964,6 +988,7 @@ term_to_binary_1(Process* p, Eterm Term)
return erts_term_to_binary(p, Term, 0, TERM_TO_BINARY_DFLAGS);
}
+
Eterm
term_to_binary_2(Process* p, Eterm Term, Eterm Flags)
{
@@ -1075,7 +1100,7 @@ binary2term_prepare(ErtsBinary2TermState *state, byte *data, Sint data_size)
goto error;
size = (Sint) dest_len;
}
- res = decoded_size(state->extp, state->extp + size, 0);
+ res = decoded_size(state->extp, state->extp + size, 0, 0);
if (res < 0)
goto error;
return res;
@@ -1183,7 +1208,8 @@ BIF_RETTYPE binary_to_term_2(BIF_ALIST_2)
opt = CAR(list_val(opts));
if (opt == am_safe) {
fakedep.flags |= ERTS_DIST_EXT_BTT_SAFE;
- } else {
+ }
+ else {
goto error;
}
opts = CDR(list_val(opts));
@@ -1255,7 +1281,7 @@ erts_term_to_binary(Process* p, Eterm Term, int level, Uint flags)
bytes = erts_alloc(ERTS_ALC_T_TMP, size);
}
- if ((endp = enc_term(NULL, Term, bytes, flags))
+ if ((endp = enc_term(NULL, Term, bytes, flags, NULL))
== NULL) {
erl_exit(1, "%s, line %d: bad term: %x\n",
__FILE__, __LINE__, Term);
@@ -1300,7 +1326,7 @@ erts_term_to_binary(Process* p, Eterm Term, int level, Uint flags)
bin = new_binary(p, (byte *)NULL, size);
bytes = binary_bytes(bin);
bytes[0] = VERSION_MAGIC;
- if ((endp = enc_term(NULL, Term, bytes+1, flags))
+ if ((endp = enc_term(NULL, Term, bytes+1, flags, NULL))
== NULL) {
erl_exit(1, "%s, line %d: bad term: %x\n",
__FILE__, __LINE__, Term);
@@ -1330,6 +1356,21 @@ enc_atom(ErtsAtomCacheMap *acmp, Eterm atom, byte *ep, Uint32 dflags)
ASSERT(is_atom(atom));
+ if (dflags & DFLAGS_INTERNAL_TAGS) {
+ Uint aval = atom_val(atom);
+ ASSERT(aval < (1<<24));
+ if (aval >= (1 << 16)) {
+ *ep++ = ATOM_INTERNAL_REF3;
+ put_int24(aval, ep);
+ ep += 3;
+ }
+ else {
+ *ep++ = ATOM_INTERNAL_REF2;
+ put_int16(aval, ep);
+ ep += 2;
+ }
+ return ep;
+ }
/*
* term_to_binary/1,2 and the initial distribution message
* don't use the cache.
@@ -1379,7 +1420,8 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags)
ep += 4;
put_int32(os, ep);
ep += 4;
- *ep++ = pid_creation(pid);
+ *ep++ = (is_internal_pid(pid) && (dflags & DFLAGS_INTERNAL_TAGS)) ?
+ INTERNAL_CREATION : pid_creation(pid);
return ep;
}
@@ -1418,6 +1460,23 @@ dec_atom(ErtsDistExternal *edep, byte* ep, Eterm* objp)
}
ep += len;
break;
+ case ATOM_INTERNAL_REF2:
+ n = get_int16(ep);
+ ep += 2;
+ if (n >= atom_table_size()) {
+ goto error;
+ }
+ *objp = make_atom(n);
+ break;
+ case ATOM_INTERNAL_REF3:
+ n = get_int24(ep);
+ ep += 3;
+ if (n >= atom_table_size()) {
+ goto error;
+ }
+ *objp = make_atom(n);
+ break;
+
default:
error:
*objp = NIL; /* Don't leave a hole in the heap */
@@ -1426,6 +1485,19 @@ dec_atom(ErtsDistExternal *edep, byte* ep, Eterm* objp)
return ep;
}
+static ERTS_INLINE ErlNode* dec_get_node(Eterm sysname, Uint creation)
+{
+ switch (creation) {
+ case INTERNAL_CREATION:
+ return erts_this_node;
+ case ORIG_CREATION:
+ if (sysname == erts_this_node->sysname) {
+ creation = erts_this_node->creation;
+ }
+ }
+ return erts_find_or_insert_node(sysname,creation);
+}
+
static byte*
dec_pid(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, Eterm* objp)
{
@@ -1449,18 +1521,20 @@ dec_pid(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, Ete
ep += 4;
if (ser > ERTS_MAX_PID_SERIAL)
return NULL;
- if ((cre = get_int8(ep)) >= MAX_CREATION)
- return NULL;
+ cre = get_int8(ep);
ep += 1;
+ if (!is_valid_creation(cre)) {
+ return NULL;
+ }
+ data = make_pid_data(ser, num);
+
/*
* We are careful to create the node entry only after all
* validity tests are done.
*/
- cre = dec_set_creation(sysname,cre);
- node = erts_find_or_insert_node(sysname,cre);
+ node = dec_get_node(sysname, cre);
- data = make_pid_data(ser, num);
if(node == erts_this_node) {
*objp = make_internal_pid(data);
} else {
@@ -1485,7 +1559,8 @@ dec_pid(ErtsDistExternal *edep, Eterm** hpp, byte* ep, ErlOffHeap* off_heap, Ete
#define ENC_LAST_ARRAY_ELEMENT ((Eterm) 3)
static byte*
-enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
+enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags,
+ struct erl_off_heap_header** off_heap)
{
DECLARE_WSTACK(s);
Uint n;
@@ -1637,12 +1712,14 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
Uint32 *ref_num;
ASSERT(dflags & DFLAG_EXTENDED_REFERENCES);
+
*ep++ = NEW_REFERENCE_EXT;
i = ref_no_of_numbers(obj);
put_int16(i, ep);
ep += 2;
ep = enc_atom(acmp,ref_node_name(obj),ep,dflags);
- *ep++ = ref_creation(obj);
+ *ep++ = ((dflags & DFLAGS_INTERNAL_TAGS) && is_internal_ref(obj)) ?
+ INTERNAL_CREATION : ref_creation(obj);
ref_num = ref_numbers(obj);
for (j = 0; j < i; j++) {
put_int32(ref_num[j], ep);
@@ -1658,7 +1735,8 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
j = port_number(obj);
put_int32(j, ep);
ep += 4;
- *ep++ = port_creation(obj);
+ *ep++ = ((dflags & DFLAGS_INTERNAL_TAGS) && is_internal_port(obj)) ?
+ INTERNAL_CREATION : port_creation(obj);
break;
case LIST_DEF:
@@ -1738,6 +1816,41 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
byte* bytes;
ERTS_GET_BINARY_BYTES(obj, bytes, bitoffs, bitsize);
+ if (dflags & DFLAGS_INTERNAL_TAGS) {
+ ProcBin* pb = (ProcBin*) binary_val(obj);
+ Uint bytesize = pb->size;
+ if (pb->thing_word == HEADER_SUB_BIN) {
+ ErlSubBin* sub = (ErlSubBin*)pb;
+ pb = (ProcBin*) binary_val(sub->orig);
+ ASSERT(bytesize == sub->size);
+ bytesize += (bitoffs + bitsize + 7) / 8;
+ }
+ if (pb->thing_word == HEADER_PROC_BIN
+ && heap_bin_size(bytesize) > PROC_BIN_SIZE) {
+ ProcBin tmp;
+ if (bitoffs || bitsize) {
+ *ep++ = BIT_BINARY_INTERNAL_REF;
+ *ep++ = bitoffs;
+ *ep++ = bitsize;
+ }
+ else {
+ *ep++ = BINARY_INTERNAL_REF;
+ }
+ if (pb->flags) {
+ erts_emasculate_writable_binary(pb);
+ }
+ erts_refc_inc(&pb->val->refc, 2);
+
+ sys_memcpy(&tmp, pb, sizeof(ProcBin));
+ tmp.next = *off_heap;
+ tmp.bytes = bytes;
+ tmp.size = bytesize;
+ sys_memcpy(ep, &tmp, sizeof(ProcBin));
+ *off_heap = (struct erl_off_heap_header*) ep;
+ ep += sizeof(ProcBin);
+ break;
+ }
+ }
if (bitsize == 0) {
/* Plain old byte-sized binary. */
*ep++ = BINARY_EXT;
@@ -1773,8 +1886,8 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
*ep++ = SMALL_INTEGER_EXT;
*ep++ = bitsize;
}
- break;
}
+ break;
case EXPORT_DEF:
{
Export* exp = *((Export **) (export_val(obj) + 1));
@@ -1782,7 +1895,7 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
*ep++ = EXPORT_EXT;
ep = enc_atom(acmp, exp->code[0], ep, dflags);
ep = enc_atom(acmp, exp->code[1], ep, dflags);
- ep = enc_term(acmp, make_small(exp->code[2]), ep, dflags);
+ ep = enc_term(acmp, make_small(exp->code[2]), ep, dflags, off_heap);
} else {
/* Tag, arity */
*ep++ = SMALL_TUPLE_EXT;
@@ -1818,8 +1931,8 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
put_int32(funp->num_free, ep);
ep += 4;
ep = enc_atom(acmp, funp->fe->module, ep, dflags);
- ep = enc_term(acmp, make_small(funp->fe->old_index), ep, dflags);
- ep = enc_term(acmp, make_small(funp->fe->old_uniq), ep, dflags);
+ ep = enc_term(acmp, make_small(funp->fe->old_index), ep, dflags, off_heap);
+ ep = enc_term(acmp, make_small(funp->fe->old_uniq), ep, dflags, off_heap);
ep = enc_pid(acmp, funp->creator, ep, dflags);
fun_env:
@@ -1872,7 +1985,8 @@ enc_term(ErtsAtomCacheMap *acmp, Eterm obj, byte* ep, Uint32 dflags)
return ep;
}
-static Uint
+static
+Uint
is_external_string(Eterm list, int* p_is_string)
{
Uint len = 0;
@@ -2162,13 +2276,13 @@ dec_term_atom_common:
goto error;
}
ep += 4;
- if ((cre = get_int8(ep)) >= MAX_CREATION) {
+ cre = get_int8(ep);
+ ep++;
+ if (!is_valid_creation(cre)) {
goto error;
}
- ep++;
- cre = dec_set_creation(sysname,cre);
- node = erts_find_or_insert_node(sysname, cre);
+ node = dec_get_node(sysname, cre);
if(node == erts_this_node) {
*objp = make_internal_port(num);
}
@@ -2205,9 +2319,11 @@ dec_term_atom_common:
goto error;
ep += 4;
- if ((cre = get_int8(ep)) >= MAX_CREATION)
- goto error;
+ cre = get_int8(ep);
ep += 1;
+ if (!is_valid_creation(cre)) {
+ goto error;
+ }
goto ref_ext_common;
case NEW_REFERENCE_EXT:
@@ -2220,10 +2336,11 @@ dec_term_atom_common:
if ((ep = dec_atom(edep, ep, &sysname)) == NULL)
goto error;
- if ((cre = get_int8(ep)) >= MAX_CREATION)
- goto error;
+ cre = get_int8(ep);
ep += 1;
-
+ if (!is_valid_creation(cre)) {
+ goto error;
+ }
r0 = get_int32(ep);
ep += 4;
if (r0 >= MAX_REFERENCE)
@@ -2231,8 +2348,7 @@ dec_term_atom_common:
ref_ext_common:
- cre = dec_set_creation(sysname, cre);
- node = erts_find_or_insert_node(sysname, cre);
+ node = dec_get_node(sysname, cre);
if(node == erts_this_node) {
RefThing *rtp = (RefThing *) hp;
ref_num = (Uint32 *) (hp + REF_THING_HEAD_SIZE);
@@ -2560,6 +2676,66 @@ dec_term_atom_common:
}
break;
}
+ case ATOM_INTERNAL_REF2:
+ n = get_int16(ep);
+ ep += 2;
+ if (n >= atom_table_size()) {
+ goto error;
+ }
+ *objp = make_atom(n);
+ break;
+ case ATOM_INTERNAL_REF3:
+ n = get_int24(ep);
+ ep += 3;
+ if (n >= atom_table_size()) {
+ goto error;
+ }
+ *objp = make_atom(n);
+ break;
+
+ case BINARY_INTERNAL_REF:
+ {
+ ProcBin* pb = (ProcBin*) hp;
+ sys_memcpy(pb, ep, sizeof(ProcBin));
+ ep += sizeof(ProcBin);
+
+ erts_refc_inc(&pb->val->refc, 1);
+ hp += PROC_BIN_SIZE;
+ pb->next = off_heap->first;
+ off_heap->first = (struct erl_off_heap_header*)pb;
+ pb->flags = 0;
+ *objp = make_binary(pb);
+ break;
+ }
+ case BIT_BINARY_INTERNAL_REF:
+ {
+ Sint bitoffs = *ep++;
+ Sint bitsize = *ep++;
+ ProcBin* pb = (ProcBin*) hp;
+ ErlSubBin* sub;
+ sys_memcpy(pb, ep, sizeof(ProcBin));
+ ep += sizeof(ProcBin);
+
+ erts_refc_inc(&pb->val->refc, 1);
+ hp += PROC_BIN_SIZE;
+ pb->next = off_heap->first;
+ off_heap->first = (struct erl_off_heap_header*)pb;
+ pb->flags = 0;
+
+ sub = (ErlSubBin*)hp;
+ sub->thing_word = HEADER_SUB_BIN;
+ sub->size = pb->size - (bitoffs + bitsize + 7)/8;
+ sub->offs = 0;
+ sub->bitoffs = bitoffs;
+ sub->bitsize = bitsize;
+ sub->is_writable = 0;
+ sub->orig = make_binary(pb);
+
+ hp += ERL_SUB_BIN_SIZE;
+ *objp = make_binary(sub);
+ break;
+ }
+
default:
error:
/* UNDO:
@@ -2642,20 +2818,29 @@ encode_size_struct2(ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags)
case NIL_DEF:
result++;
break;
- case ATOM_DEF: {
- int alen = atom_tab(atom_val(obj))->len;
- if ((MAX_ATOM_LENGTH <= 255 || alen <= 255)
- && (dflags & DFLAG_SMALL_ATOM_TAGS)) {
- /* Make sure a SMALL_ATOM_EXT fits: SMALL_ATOM_EXT l t1 t2... */
- result += 1 + 1 + alen;
+ case ATOM_DEF:
+ if (dflags & DFLAGS_INTERNAL_TAGS) {
+ if (atom_val(obj) >= (1<<16)) {
+ result += 1 + 3;
+ }
+ else {
+ result += 1 + 2;
+ }
}
else {
- /* Make sure an ATOM_EXT fits: ATOM_EXT l1 l0 t1 t2... */
- result += 1 + 2 + alen;
+ int alen = atom_tab(atom_val(obj))->len;
+ if ((MAX_ATOM_LENGTH <= 255 || alen <= 255)
+ && (dflags & DFLAG_SMALL_ATOM_TAGS)) {
+ /* Make sure a SMALL_ATOM_EXT fits: SMALL_ATOM_EXT l t1 t2... */
+ result += 1 + 1 + alen;
+ }
+ else {
+ /* Make sure an ATOM_EXT fits: ATOM_EXT l1 l0 t1 t2... */
+ result += 1 + 2 + alen;
+ }
+ insert_acache_map(acmp, obj);
}
- insert_acache_map(acmp, obj);
break;
- }
case SMALL_DEF:
{
Sint val = signed_val(obj);
@@ -2734,8 +2919,25 @@ encode_size_struct2(ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags)
}
break;
case BINARY_DEF:
+ if (dflags & DFLAGS_INTERNAL_TAGS) {
+ ProcBin* pb = (ProcBin*) binary_val(obj);
+ Uint sub_extra = 0;
+ Uint tot_bytes = pb->size;
+ if (pb->thing_word == HEADER_SUB_BIN) {
+ ErlSubBin* sub = (ErlSubBin*) pb;
+ pb = (ProcBin*) binary_val(sub->orig);
+ sub_extra = 2; /* bitoffs and bitsize */
+ tot_bytes += (sub->bitoffs + sub->bitsize+ 7) / 8;
+ }
+ if (pb->thing_word == HEADER_PROC_BIN
+ && heap_bin_size(tot_bytes) > PROC_BIN_SIZE) {
+
+ result += 1 + sub_extra + sizeof(ProcBin);
+ break;
+ }
+ }
result += 1 + 4 + binary_size(obj) +
- 5; /* For unaligned binary */
+ 5; /* For unaligned binary */
break;
case FUN_DEF:
{
@@ -2807,7 +3009,7 @@ encode_size_struct2(ErtsAtomCacheMap *acmp, Eterm obj, unsigned dflags)
}
static Sint
-decoded_size(byte *ep, byte* endp, int no_refc_bins)
+decoded_size(byte *ep, byte* endp, int no_refc_bins, int internal_tags)
{
int heap_size = 0;
int terms;
@@ -3017,6 +3219,29 @@ decoded_size(byte *ep, byte* endp, int no_refc_bins)
heap_size += ERL_FUN_SIZE + num_free;
break;
}
+ case ATOM_INTERNAL_REF2:
+ SKIP(2+atom_extra_skip);
+ atom_extra_skip = 0;
+ break;
+ case ATOM_INTERNAL_REF3:
+ SKIP(3+atom_extra_skip);
+ atom_extra_skip = 0;
+ break;
+
+ case BINARY_INTERNAL_REF:
+ if (!internal_tags) {
+ return -1;
+ }
+ SKIP(sizeof(ProcBin));
+ heap_size += PROC_BIN_SIZE;
+ break;
+ case BIT_BINARY_INTERNAL_REF:
+ if (!internal_tags) {
+ return -1;
+ }
+ SKIP(2+sizeof(ProcBin));
+ heap_size += PROC_BIN_SIZE + ERL_SUB_BIN_SIZE;
+ break;
default:
return -1;
}
diff --git a/erts/emulator/beam/external.h b/erts/emulator/beam/external.h
index cee48bbeb0..d8287b96a4 100644
--- a/erts/emulator/beam/external.h
+++ b/erts/emulator/beam/external.h
@@ -54,6 +54,10 @@
#define DIST_HEADER 'D'
#define ATOM_CACHE_REF 'R'
+#define ATOM_INTERNAL_REF2 'I'
+#define ATOM_INTERNAL_REF3 'K'
+#define BINARY_INTERNAL_REF 'J'
+#define BIT_BINARY_INTERNAL_REF 'L'
#define COMPRESSED 'P'
#if 0
@@ -156,7 +160,9 @@ Uint erts_encode_dist_ext_size(Eterm, Uint32, ErtsAtomCacheMap *);
void erts_encode_dist_ext(Eterm, byte **, Uint32, ErtsAtomCacheMap *);
Uint erts_encode_ext_size(Eterm);
+Uint erts_encode_ext_size_ets(Eterm);
void erts_encode_ext(Eterm, byte **);
+byte* erts_encode_ext_ets(Eterm, byte *, struct erl_off_heap_header** ext_off_heap);
#ifdef ERTS_WANT_EXTERNAL_TAGS
ERTS_GLB_INLINE void erts_peek_dist_header(ErtsDistHeaderPeek *, byte *, Uint);
@@ -172,7 +178,9 @@ Sint erts_decode_dist_ext_size(ErtsDistExternal *, int);
Eterm erts_decode_dist_ext(Eterm **, ErlOffHeap *, ErtsDistExternal *);
Sint erts_decode_ext_size(byte*, Uint, int);
+Sint erts_decode_ext_size_ets(byte*, Uint);
Eterm erts_decode_ext(Eterm **, ErlOffHeap *, byte**);
+Eterm erts_decode_ext_ets(Eterm **, ErlOffHeap *, byte*);
Eterm erts_term_to_binary(Process* p, Eterm Term, int level, Uint flags);
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index ecd3c8f68a..89c6625550 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -522,6 +522,7 @@ union erl_off_heap_ptr {
struct erl_fun_thing* fun;
struct external_thing_* ext;
Eterm* ep;
+ void* voidp;
};
/* arrays that get malloced at startup */
@@ -1728,11 +1729,6 @@ Uint erts_current_reductions(Process* current, Process *p);
int erts_print_system_version(int to, void *arg, Process *c_p);
-/*
- * Interface to erl_init
- */
-void erl_init(void);
-
#define seq_trace_output(token, msg, type, receiver, process) \
seq_trace_output_generic((token), (msg), (type), (receiver), (process), NIL)
#define seq_trace_output_exit(token, msg, type, receiver, exitfrom) \
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 79022d5dd7..9ed92bbe03 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -2802,17 +2802,25 @@ driver_deliver_term(ErlDrvPort port,
break;
case ERL_DRV_INT: /* signed int argument */
ERTS_DDT_CHK_ENOUGH_ARGS(1);
+#if HALFWORD_HEAP
+ erts_bld_sint64(NULL, &need, (Sint64)ptr[0]);
+#else
/* check for bignum */
if (!IS_SSMALL((Sint)ptr[0]))
need += BIG_UINT_HEAP_SIZE; /* use small_to_big */
+#endif
ptr++;
depth++;
break;
case ERL_DRV_UINT: /* unsigned int argument */
ERTS_DDT_CHK_ENOUGH_ARGS(1);
+#if HALFWORD_HEAP
+ erts_bld_uint64(NULL, &need, (Uint64)ptr[0]);
+#else
/* check for bignum */
if (!IS_USMALL(0, (Uint)ptr[0]))
need += BIG_UINT_HEAP_SIZE; /* use small_to_big */
+#endif
ptr++;
depth++;
break;
@@ -2979,22 +2987,30 @@ driver_deliver_term(ErlDrvPort port,
break;
case ERL_DRV_INT: /* signed int argument */
+#if HALFWORD_HEAP
+ mess = erts_bld_sint64(&hp, NULL, (Sint64)ptr[0]);
+#else
if (IS_SSMALL((Sint)ptr[0]))
mess = make_small((Sint)ptr[0]);
else {
mess = small_to_big((Sint)ptr[0], hp);
hp += BIG_UINT_HEAP_SIZE;
}
+#endif
ptr++;
break;
case ERL_DRV_UINT: /* unsigned int argument */
+#if HALFWORD_HEAP
+ mess = erts_bld_uint64(&hp, NULL, (Uint64)ptr[0]);
+#else
if (IS_USMALL(0, (Uint)ptr[0]))
mess = make_small((Uint)ptr[0]);
else {
mess = uint_to_big((Uint)ptr[0], hp);
hp += BIG_UINT_HEAP_SIZE;
}
+#endif
ptr++;
break;
diff --git a/erts/emulator/beam/packet_parser.c b/erts/emulator/beam/packet_parser.c
index 5bcd567b5f..a66d60aa22 100644
--- a/erts/emulator/beam/packet_parser.c
+++ b/erts/emulator/beam/packet_parser.c
@@ -1,7 +1,7 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2008-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2008-2010. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -47,11 +47,6 @@
(((unsigned char*) (s))[1] << 8) | \
(((unsigned char*) (s))[0]))
-#define put_int24(s, x) ((((unsigned char*)(s))[0] = ((x) >> 16) & 0xff), \
- (((unsigned char*)(s))[1] = ((x) >> 8) & 0xff), \
- (((unsigned char*)(s))[2] = (x) & 0xff))
-
-
#if !defined(__WIN32__) && !defined(HAVE_STRNCASECMP)
#define STRNCASECMP my_strncasecmp
@@ -833,7 +828,7 @@ int packet_parse_ssl(const char* buf, int len,
char prefix[4];
/* <<1:8,Length:24,Data/binary>> */
prefix[0] = 1;
- put_int24(&prefix[1],len-3);
+ put_int24(len-3,&prefix[1]);
return pcb->ssl_tls(arg, 22, major, minor, buf+3, len-3, prefix, sizeof(prefix));
}
else {
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 0031568af6..0d15272aa8 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -466,8 +466,6 @@ static const int zero_value = 0, one_value = 1;
# endif /* !__WIN32__ */
#endif /* WANT_NONBLOCKING */
-extern erts_cpu_info_t *erts_cpuinfo; /* erl_init.c */
-
__decl_noreturn void __noreturn erl_exit(int n, char*, ...);
/* Some special erl_exit() codes: */
@@ -1168,6 +1166,15 @@ void* sys_calloc2(Uint, Uint);
((char*)(s))[3] = (char)(i) & 0xff;} \
while (0)
+#define get_int24(s) ((((unsigned char*) (s))[0] << 16) | \
+ (((unsigned char*) (s))[1] << 8) | \
+ (((unsigned char*) (s))[2]))
+
+#define put_int24(i, s) do {((char*)(s))[0] = (char)((i) >> 16) & 0xff; \
+ ((char*)(s))[1] = (char)((i) >> 8) & 0xff; \
+ ((char*)(s))[2] = (char)(i) & 0xff;} \
+ while (0)
+
#define get_int16(s) ((((unsigned char*) (s))[0] << 8) | \
(((unsigned char*) (s))[1]))
@@ -1181,6 +1188,7 @@ void* sys_calloc2(Uint, Uint);
#define put_int8(i, s) do {((unsigned char*)(s))[0] = (i) & 0xff;} while (0)
+
/*
* Use DEBUGF as you would use printf, but use double parentheses:
*
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 3de48194fb..18f7cdd15a 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -54,6 +54,9 @@
#ifdef HAVE_IFADDRS_H
#include <ifaddrs.h>
#endif
+#ifdef HAVE_NETPACKET_PACKET_H
+#include <netpacket/packet.h>
+#endif
/* All platforms fail on malloc errors. */
#define FATAL_MALLOC
@@ -85,6 +88,7 @@
#include <winsock2.h>
#endif
#include <windows.h>
+#include <iphlpapi.h>
#include <Ws2tcpip.h> /* NEED VC 6.0 !!! */
@@ -467,6 +471,7 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
#define INET_REQ_IFGET 22
#define INET_REQ_IFSET 23
#define INET_REQ_SUBSCRIBE 24
+#define INET_REQ_GETIFADDRS 25
/* TCP requests */
#define TCP_REQ_ACCEPT 40
#define TCP_REQ_LISTEN 41
@@ -632,15 +637,12 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
#define IS_BUSY(d) \
(((d)->state & INET_F_BUSY) == INET_F_BUSY)
+#define INET_MAX_OPT_BUFFER (64*1024)
+
#define INET_DEF_BUFFER 1460 /* default buffer size */
#define INET_MIN_BUFFER 1 /* internal min buffer */
-#define INET_MAX_BUFFER (1024*64) /* internal max buffer */
-/* Note: INET_HIGH_WATERMARK MUST be less than 2*INET_MAX_BUFFER */
#define INET_HIGH_WATERMARK (1024*8) /* 8k pending high => busy */
-/* Note: INET_LOW_WATERMARK MUST be less than INET_MAX_BUFFER and
-** less than INET_HIGH_WATERMARK
-*/
#define INET_LOW_WATERMARK (1024*4) /* 4k pending => allow more */
#define INET_INFINITY 0xffffffff /* infinity value */
@@ -1251,139 +1253,136 @@ static int load_ip_and_port
LOAD_ATOM((spec), (i), (flag) ? am_true : am_false);
#endif /* HAVE_SCTP */
+/* Assume a cache line size of 64 bytes */
+#define INET_DRV_CACHE_LINE_SIZE ((ErlDrvUInt) 64)
+#define INET_DRV_CACHE_LINE_MASK (INET_DRV_CACHE_LINE_SIZE - 1)
+
/*
** Binary Buffer Managment
** We keep a stack of usable buffers
*/
-#define BUFFER_STACK_SIZE 16
+#define BUFFER_STACK_SIZE 14
+#define BUFFER_STACK_MAX_MEM_SIZE (1024*1024)
-static erts_smp_spinlock_t inet_buffer_stack_lock;
-static ErlDrvBinary* buffer_stack[BUFFER_STACK_SIZE];
-static int buffer_stack_pos = 0;
-
-
-/*
- * XXX
- * The erts_smp_spin_* functions should not be used by drivers (but this
- * driver is special). Replace when driver locking api has been implemented.
- * /rickard
- */
-#define BUFSTK_LOCK erts_smp_spin_lock(&inet_buffer_stack_lock);
-#define BUFSTK_UNLOCK erts_smp_spin_unlock(&inet_buffer_stack_lock);
+ErlDrvTSDKey buffer_stack_key;
-#ifdef DEBUG
-static int tot_buf_allocated = 0; /* memory in use for i_buf */
-static int tot_buf_stacked = 0; /* memory on stack */
-static int max_buf_allocated = 0; /* max allocated */
-
-#define COUNT_BUF_ALLOC(sz) do { \
- BUFSTK_LOCK; \
- tot_buf_allocated += (sz); \
- if (tot_buf_allocated > max_buf_allocated) \
- max_buf_allocated = tot_buf_allocated; \
- BUFSTK_UNLOCK; \
-} while(0)
-
-#define COUNT_BUF_FREE(sz) do { \
- BUFSTK_LOCK; \
- tot_buf_allocated -= (sz); \
- BUFSTK_UNLOCK; \
- } while(0)
-
-#define COUNT_BUF_STACK(sz) do { \
- BUFSTK_LOCK; \
- tot_buf_stacked += (sz); \
- BUFSTK_UNLOCK; \
- } while(0)
+typedef struct {
+ int mem_size;
+ int pos;
+ ErlDrvBinary* stk[BUFFER_STACK_SIZE];
+} InetDrvBufStkBase;
-#else
+typedef struct {
+ InetDrvBufStkBase buf;
+ char align[(((sizeof(InetDrvBufStkBase) - 1) / INET_DRV_CACHE_LINE_SIZE) + 1)
+ * INET_DRV_CACHE_LINE_SIZE];
+} InetDrvBufStk;
+
+static InetDrvBufStk *get_bufstk(void)
+{
+ InetDrvBufStk *bs = erl_drv_tsd_get(buffer_stack_key);
+ if (bs)
+ return bs;
+ bs = driver_alloc(sizeof(InetDrvBufStk)
+ + INET_DRV_CACHE_LINE_SIZE - 1);
+ if (!bs)
+ return NULL;
+ if ((((ErlDrvUInt) bs) & INET_DRV_CACHE_LINE_MASK) != 0)
+ bs = ((InetDrvBufStk *)
+ ((((ErlDrvUInt) bs) & ~INET_DRV_CACHE_LINE_MASK)
+ + INET_DRV_CACHE_LINE_SIZE));
+ erl_drv_tsd_set(buffer_stack_key, bs);
+ bs->buf.pos = 0;
+ bs->buf.mem_size = 0;
-#define COUNT_BUF_ALLOC(sz)
-#define COUNT_BUF_FREE(sz)
-#define COUNT_BUF_STACK(sz)
+ ASSERT(bs == erl_drv_tsd_get(buffer_stack_key));
-#endif
+ return bs;
+}
static ErlDrvBinary* alloc_buffer(long minsz)
{
- ErlDrvBinary* buf = NULL;
+ InetDrvBufStk *bs = get_bufstk();
- BUFSTK_LOCK;
+ DEBUGF(("alloc_buffer: %ld\r\n", minsz));
- DEBUGF(("alloc_buffer: sz = %ld, tot = %d, max = %d\r\n",
- minsz, tot_buf_allocated, max_buf_allocated));
+ if (bs && bs->buf.pos > 0) {
+ long size;
+ ErlDrvBinary* buf = bs->buf.stk[--bs->buf.pos];
+ size = buf->orig_size;
+ bs->buf.mem_size -= size;
+ ASSERT(0 <= bs->buf.mem_size
+ && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
+ if (size >= minsz)
+ return buf;
- if (buffer_stack_pos > 0) {
- int origsz;
+ driver_free_binary(buf);
+ }
- buf = buffer_stack[--buffer_stack_pos];
- origsz = buf->orig_size;
- BUFSTK_UNLOCK;
- COUNT_BUF_STACK(-origsz);
- if (origsz < minsz) {
- if ((buf = driver_realloc_binary(buf, minsz)) == NULL)
- return NULL;
- COUNT_BUF_ALLOC(buf->orig_size - origsz);
+ ASSERT(!bs || bs->buf.pos != 0 || bs->buf.mem_size == 0);
+
+ return driver_alloc_binary(minsz);
+}
+
+/*#define CHECK_DOUBLE_RELEASE 1*/
+#ifdef CHECK_DOUBLE_RELEASE
+static void
+check_double_release(InetDrvBufStk *bs, ErlDrvBinary* buf)
+{
+#ifdef __GNUC__
+#warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator
+#endif
+ int i;
+ for (i = 0; i < bs->buf.pos; ++i) {
+ if (bs->buf.stk[i] == buf) {
+ erl_exit(ERTS_ABORT_EXIT,
+ "Multiple buffer release in inet_drv, this "
+ "is a bug, save the core and send it to "
}
}
- else {
- BUFSTK_UNLOCK;
- if ((buf = driver_alloc_binary(minsz)) == NULL)
- return NULL;
- COUNT_BUF_ALLOC(buf->orig_size);
- }
- return buf;
}
+#endif
-/*
-** Max buffer memory "cached" BUFFER_STACK_SIZE * INET_MAX_BUFFER
-** (16 * 64k ~ 1M)
-*/
-/*#define CHECK_DOUBLE_RELEASE 1*/
static void release_buffer(ErlDrvBinary* buf)
{
+ InetDrvBufStk *bs;
+ long size;
+
DEBUGF(("release_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size));
- if (buf == NULL)
+
+ if (!buf)
return;
- BUFSTK_LOCK;
- if ((buf->orig_size > INET_MAX_BUFFER) ||
- (buffer_stack_pos >= BUFFER_STACK_SIZE)) {
- BUFSTK_UNLOCK;
- COUNT_BUF_FREE(buf->orig_size);
+
+ size = buf->orig_size;
+
+ if (size > BUFFER_STACK_MAX_MEM_SIZE)
+ goto free_binary;
+
+ bs = get_bufstk();
+ if (!bs
+ || (bs->buf.mem_size + size > BUFFER_STACK_MAX_MEM_SIZE)
+ || (bs->buf.pos >= BUFFER_STACK_SIZE)) {
+ free_binary:
driver_free_binary(buf);
}
else {
#ifdef CHECK_DOUBLE_RELEASE
-#ifdef __GNUC__
-#warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator
+ check_double_release(bs, buf);
#endif
- int i;
- for (i = 0; i < buffer_stack_pos; ++i) {
- if (buffer_stack[i] == buf) {
- erl_exit(1,"Multiple buffer release in inet_drv, this is a "
- "bug, save the core and send it to "
- }
- }
-#endif
- buffer_stack[buffer_stack_pos++] = buf;
- BUFSTK_UNLOCK;
- COUNT_BUF_STACK(buf->orig_size);
+ ASSERT(bs->buf.pos != 0 || bs->buf.mem_size == 0);
+
+ bs->buf.mem_size += size;
+ bs->buf.stk[bs->buf.pos++] = buf;
+
+ ASSERT(0 <= bs->buf.mem_size
+ && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
}
}
static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, long newsz)
{
- ErlDrvBinary* bin;
-#ifdef DEBUG
- long orig_size = buf->orig_size;
-#endif
-
- if ((bin = driver_realloc_binary(buf,newsz)) != NULL) {
- COUNT_BUF_ALLOC(newsz - orig_size);
- ;
- }
- return bin;
+ return driver_realloc_binary(buf, newsz);
}
/* use a TRICK, access the refc field to see if any one else has
@@ -1397,10 +1396,8 @@ static void free_buffer(ErlDrvBinary* buf)
if (buf != NULL) {
if (driver_binary_get_refc(buf) == 1)
release_buffer(buf);
- else {
- COUNT_BUF_FREE(buf->orig_size);
+ else
driver_free_binary(buf);
- }
}
}
@@ -3404,20 +3401,14 @@ static int inet_init()
if (!sock_init())
goto error;
- buffer_stack_pos = 0;
-
- erts_smp_spinlock_init(&inet_buffer_stack_lock, "inet_buffer_stack_lock");
+ if (0 != erl_drv_tsd_key_create("inet_buffer_stack_key", &buffer_stack_key))
+ goto error;
ASSERT(sizeof(struct in_addr) == 4);
# if defined(HAVE_IN6) && defined(AF_INET6)
ASSERT(sizeof(struct in6_addr) == 16);
# endif
-#ifdef DEBUG
- tot_buf_allocated = 0;
- max_buf_allocated = 0;
- tot_buf_stacked = 0;
-#endif
INIT_ATOM(ok);
INIT_ATOM(tcp);
INIT_ATOM(udp);
@@ -3824,39 +3815,81 @@ do { if ((end)-(ptr) < (n)) goto error; } while(0)
static char* sockaddr_to_buf(struct sockaddr* addr, char* ptr, char* end)
{
if (addr->sa_family == AF_INET || addr->sa_family == 0) {
- struct in_addr a;
- buf_check(ptr,end,sizeof(struct in_addr));
- a = ((struct sockaddr_in*) addr)->sin_addr;
- sys_memcpy(ptr, (char*)&a, sizeof(struct in_addr));
- return ptr + sizeof(struct in_addr);
+ struct in_addr *p = &(((struct sockaddr_in*) addr)->sin_addr);
+ buf_check(ptr, end, 1 + sizeof(struct in_addr));
+ *ptr = INET_AF_INET;
+ sys_memcpy(ptr+1, (char*)p, sizeof(struct in_addr));
+ return ptr + 1 + sizeof(struct in_addr);
}
#if defined(HAVE_IN6) && defined(AF_INET6)
else if (addr->sa_family == AF_INET6) {
- struct in6_addr a;
- buf_check(ptr,end,sizeof(struct in6_addr));
- a = ((struct sockaddr_in6*) addr)->sin6_addr;
- sys_memcpy(ptr, (char*)&a, sizeof(struct in6_addr));
- return ptr + sizeof(struct in6_addr);
+ struct in6_addr *p = &(((struct sockaddr_in6*) addr)->sin6_addr);
+ buf_check(ptr, end, 1 + sizeof(struct in6_addr));
+ *ptr = INET_AF_INET6;
+ sys_memcpy(ptr+1, (char*)p, sizeof(struct in6_addr));
+ return ptr + 1 + sizeof(struct in6_addr);
+ }
+#endif
+#if defined(AF_LINK)
+ else if (addr->sa_family == AF_LINK) {
+ struct sockaddr_dl *sdl_p = (struct sockaddr_dl*) addr;
+ buf_check(ptr, end, 2 + sdl_p->sdl_alen);
+ put_int16(sdl_p->sdl_alen, ptr); ptr += 2;
+ sys_memcpy(ptr, sdl_p->sdl_data + sdl_p->sdl_nlen, sdl_p->sdl_alen);
+ return ptr + sdl_p->sdl_alen;
+ }
+#endif
+#if defined(AF_PACKET) && defined(HAVE_NETPACKET_PACKET_H)
+ else if(addr->sa_family == AF_PACKET) {
+ struct sockaddr_ll *sll_p = (struct sockaddr_ll*) addr;
+ buf_check(ptr, end, 2 + sll_p->sll_halen);
+ put_int16(sll_p->sll_halen, ptr); ptr += 2;
+ sys_memcpy(ptr, sll_p->sll_addr, sll_p->sll_halen);
+ return ptr + sll_p->sll_halen;
}
#endif
+ return ptr;
error:
return NULL;
-
}
static char* buf_to_sockaddr(char* ptr, char* end, struct sockaddr* addr)
{
- buf_check(ptr,end,sizeof(struct in_addr));
- sys_memcpy((char*) &((struct sockaddr_in*)addr)->sin_addr, ptr,
- sizeof(struct in_addr));
- addr->sa_family = AF_INET;
- return ptr + sizeof(struct in_addr);
-
+ buf_check(ptr,end,1);
+ switch (*ptr++) {
+ case INET_AF_INET: {
+ struct in_addr *p = &((struct sockaddr_in*)addr)->sin_addr;
+ buf_check(ptr,end,sizeof(struct in_addr));
+ sys_memcpy((char*) p, ptr, sizeof(struct in_addr));
+ addr->sa_family = AF_INET;
+ return ptr + sizeof(struct in_addr);
+ }
+ case INET_AF_INET6: {
+ struct in6_addr *p = &((struct sockaddr_in6*)addr)->sin6_addr;
+ buf_check(ptr,end,sizeof(struct in6_addr));
+ sys_memcpy((char*) p, ptr, sizeof(struct in6_addr));
+ addr->sa_family = AF_INET6;
+ return ptr + sizeof(struct in6_addr);
+ }
+ }
error:
return NULL;
}
+#if defined (IFF_POINTOPOINT)
+#define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTOPOINT)
+#elif defined IFF_POINTTOPOINT
+#define IFGET_FLAGS(cflags) IFGET_FLAGS_P2P(cflags, IFF_POINTTOPOINT)
+#endif
+
+#define IFGET_FLAGS_P2P(cflags, iff_ptp) \
+ ((((cflags) & IFF_UP) ? INET_IFF_UP : 0) | \
+ (((cflags) & IFF_BROADCAST) ? INET_IFF_BROADCAST : 0) | \
+ (((cflags) & IFF_LOOPBACK) ? INET_IFF_LOOPBACK : 0) | \
+ (((cflags) & iff_ptp) ? INET_IFF_POINTTOPOINT : 0) | \
+ (((cflags) & IFF_UP) ? INET_IFF_RUNNING : 0) | /* emulate running ? */ \
+ (((cflags) & IFF_MULTICAST) ? INET_IFF_MULTICAST : 0))
#if defined(__WIN32__) && defined(SIO_GET_INTERFACE_LIST)
@@ -3894,7 +3927,6 @@ static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize)
return ctl_reply(INET_REP_OK, sbuf, sptr - sbuf, rbuf, rsize);
}
-
/* input is an ip-address in string format i.e A.B.C.D
** scan the INTERFACE_LIST to get the options
*/
@@ -3980,27 +4012,12 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len,
break;
case INET_IFOPT_FLAGS: {
- long eflags = 0;
int flags = ifp->iiFlags;
/* just enumerate the interfaces (no names) */
- /* translate flags */
- if (flags & IFF_UP)
- eflags |= INET_IFF_UP;
- if (flags & IFF_BROADCAST)
- eflags |= INET_IFF_BROADCAST;
- if (flags & IFF_LOOPBACK)
- eflags |= INET_IFF_LOOPBACK;
- if (flags & IFF_POINTTOPOINT)
- eflags |= INET_IFF_POINTTOPOINT;
- if (flags & IFF_UP) /* emulate runnign ? */
- eflags |= INET_IFF_RUNNING;
- if (flags & IFF_MULTICAST)
- eflags |= INET_IFF_MULTICAST;
-
buf_check(sptr, s_end, 5);
*sptr++ = INET_IFOPT_FLAGS;
- put_int32(eflags, sptr);
+ put_int32(IFGET_FLAGS(flags), sptr);
sptr += 4;
break;
}
@@ -4021,7 +4038,6 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len,
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
}
-
#elif defined(SIOCGIFCONF) && defined(SIOCSIFFLAGS)
/* cygwin has SIOCGIFCONF but not SIOCSIFFLAGS (Nov 2002) */
@@ -4032,69 +4048,77 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len,
#define SIZEA(p) (sizeof (p))
#endif
-
-static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize)
-{
- struct ifconf ifc;
- struct ifreq *ifr;
- char *buf;
- int buflen, ifc_len, i;
- char *sbuf, *sp;
-
- /* Courtesy of Per Bergqvist and W. Richard Stevens */
-
- ifc_len = 0;
- buflen = 100 * sizeof(struct ifreq);
- buf = ALLOC(buflen);
+static int get_ifconf(SOCKET s, struct ifconf *ifcp) {
+ int ifc_len = 0;
+ int buflen = 100 * sizeof(struct ifreq);
+ char *buf = ALLOC(buflen);
for (;;) {
- ifc.ifc_len = buflen;
- ifc.ifc_buf = buf;
- if (ioctl(desc->s, SIOCGIFCONF, (char *)&ifc) < 0) {
+ ifcp->ifc_len = buflen;
+ ifcp->ifc_buf = buf;
+ if (ioctl(s, SIOCGIFCONF, (char *)ifcp) < 0) {
int res = sock_errno();
if (res != EINVAL || ifc_len) {
FREE(buf);
- return ctl_error(res, rbuf, rsize);
+ return -1;
}
} else {
- if (ifc.ifc_len == ifc_len) break; /* buf large enough */
- ifc_len = ifc.ifc_len;
+ if (ifcp->ifc_len == ifc_len) break; /* buf large enough */
+ ifc_len = ifcp->ifc_len;
}
buflen += 10 * sizeof(struct ifreq);
buf = (char *)REALLOC(buf, buflen);
}
-
- sp = sbuf = ALLOC(ifc_len+1);
+ return 0;
+}
+
+static void free_ifconf(struct ifconf *ifcp) {
+ FREE(ifcp->ifc_buf);
+}
+
+static int inet_ctl_getiflist(inet_descriptor* desc, char** rbuf, int rsize)
+{
+ struct ifconf ifc;
+ struct ifreq *ifrp;
+ char *sbuf, *sp;
+ int i;
+
+ /* Courtesy of Per Bergqvist and W. Richard Stevens */
+
+ if (get_ifconf(desc->s, &ifc) < 0) {
+ return ctl_error(sock_errno(), rbuf, rsize);
+ }
+
+ sp = sbuf = ALLOC(ifc.ifc_len+1);
*sp++ = INET_REP_OK;
i = 0;
for (;;) {
int n;
-
- ifr = (struct ifreq *) VOIDP(buf + i);
- n = sizeof(ifr->ifr_name) + SIZEA(ifr->ifr_addr);
- if (n < sizeof(*ifr)) n = sizeof(*ifr);
- if (i+n > ifc_len) break;
+
+ ifrp = (struct ifreq *) VOIDP(ifc.ifc_buf + i);
+ n = sizeof(ifrp->ifr_name) + SIZEA(ifrp->ifr_addr);
+ if (n < sizeof(*ifrp)) n = sizeof(*ifrp);
+ if (i+n > ifc.ifc_len) break;
i += n;
-
- switch (ifr->ifr_addr.sa_family) {
+
+ switch (ifrp->ifr_addr.sa_family) {
#if defined(HAVE_IN6) && defined(AF_INET6)
case AF_INET6:
#endif
case AF_INET:
- ASSERT(sp+IFNAMSIZ+1 < sbuf+buflen+1)
- strncpy(sp, ifr->ifr_name, IFNAMSIZ);
+ ASSERT(sp+IFNAMSIZ+1 < sbuf+ifc.ifc_len+1)
+ strncpy(sp, ifrp->ifr_name, IFNAMSIZ);
sp[IFNAMSIZ] = '\0';
sp += strlen(sp), ++sp;
}
-
- if (i >= ifc_len) break;
+
+ if (i >= ifc.ifc_len) break;
}
- FREE(buf);
+ free_ifconf(&ifc);
*rbuf = sbuf;
return sp - sbuf;
}
-
/* FIXME: temporary hack */
#ifndef IFHWADDRLEN
#define IFHWADDRLEN 6
@@ -4133,37 +4157,52 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len,
#ifdef SIOCGIFHWADDR
if (ioctl(desc->s, SIOCGIFHWADDR, (char *)&ifreq) < 0)
break;
- buf_check(sptr, s_end, 1+IFHWADDRLEN);
+ buf_check(sptr, s_end, 1+2+IFHWADDRLEN);
*sptr++ = INET_IFOPT_HWADDR;
+ put_int16(IFHWADDRLEN, sptr); sptr += 2;
/* raw memcpy (fix include autoconf later) */
sys_memcpy(sptr, (char*)(&ifreq.ifr_hwaddr.sa_data), IFHWADDRLEN);
sptr += IFHWADDRLEN;
-#elif defined(HAVE_GETIFADDRS)
- struct ifaddrs *ifa, *ifp;
- int found = 0;
-
- if (getifaddrs(&ifa) == -1)
- goto error;
+#elif defined(SIOCGENADDR)
+ if (ioctl(desc->s, SIOCGENADDR, (char *)&ifreq) < 0)
+ break;
+ buf_check(sptr, s_end, 1+2+sizeof(ifreq.ifr_enaddr));
+ *sptr++ = INET_IFOPT_HWADDR;
+ put_int16(sizeof(ifreq.ifr_enaddr), sptr); sptr += 2;
+ /* raw memcpy (fix include autoconf later) */
+ sys_memcpy(sptr, (char*)(&ifreq.ifr_enaddr),
+ sizeof(ifreq.ifr_enaddr));
+ sptr += sizeof(ifreq.ifr_enaddr);
+#elif defined(HAVE_GETIFADDRS) && defined(AF_LINK)
+ struct ifaddrs *ifa, *ifp;
+ struct sockaddr_dl *sdlp;
+ int found = 0;
+
+ if (getifaddrs(&ifa) == -1)
+ goto error;
- for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
- if ((ifp->ifa_addr->sa_family == AF_LINK) &&
- (sys_strcmp(ifp->ifa_name, ifreq.ifr_name) == 0)) {
- found = 1;
- break;
- }
- }
+ for (ifp = ifa; ifp; ifp = ifp->ifa_next) {
+ if ((ifp->ifa_addr->sa_family == AF_LINK) &&
+ (sys_strcmp(ifp->ifa_name, ifreq.ifr_name) == 0)) {
+ found = 1;
+ break;
+ }
+ }
- if (found == 0) {
- freeifaddrs(ifa);
- break;
- }
+ if (found == 0) {
+ freeifaddrs(ifa);
+ break;
+ }
+ sdlp = (struct sockaddr_dl *)ifp->ifa_addr;
- buf_check(sptr, s_end, 1+IFHWADDRLEN);
- *sptr++ = INET_IFOPT_HWADDR;
- sys_memcpy(sptr, ((struct sockaddr_dl *)ifp->ifa_addr)->sdl_data +
- ((struct sockaddr_dl *)ifp->ifa_addr)->sdl_nlen, IFHWADDRLEN);
- freeifaddrs(ifa);
- sptr += IFHWADDRLEN;
+ buf_check(sptr, s_end, 1+2+sdlp->sdl_alen);
+ *sptr++ = INET_IFOPT_HWADDR;
+ put_int16(sdlp->sdl_alen, sptr); sptr += 2;
+ sys_memcpy(sptr,
+ sdlp->sdl_data + sdlp->sdl_nlen,
+ sdlp->sdl_alen);
+ freeifaddrs(ifa);
+ sptr += sdlp->sdl_alen;
#endif
break;
}
@@ -4240,29 +4279,15 @@ static int inet_ctl_ifget(inet_descriptor* desc, char* buf, int len,
case INET_IFOPT_FLAGS: {
int flags;
- int eflags = 0;
if (ioctl(desc->s, SIOCGIFFLAGS, (char*)&ifreq) < 0)
flags = 0;
else
flags = ifreq.ifr_flags;
- /* translate flags */
- if (flags & IFF_UP)
- eflags |= INET_IFF_UP;
- if (flags & IFF_BROADCAST)
- eflags |= INET_IFF_BROADCAST;
- if (flags & IFF_LOOPBACK)
- eflags |= INET_IFF_LOOPBACK;
- if (flags & IFF_POINTOPOINT)
- eflags |= INET_IFF_POINTTOPOINT;
- if (flags & IFF_RUNNING)
- eflags |= INET_IFF_RUNNING;
- if (flags & IFF_MULTICAST)
- eflags |= INET_IFF_MULTICAST;
buf_check(sptr, s_end, 5);
*sptr++ = INET_IFOPT_FLAGS;
- put_int32(eflags, sptr);
+ put_int32(IFGET_FLAGS(flags), sptr);
sptr += 4;
break;
}
@@ -4300,17 +4325,22 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len,
(void) ioctl(desc->s, SIOCSIFADDR, (char*)&ifreq);
break;
- case INET_IFOPT_HWADDR:
- buf_check(buf, b_end, IFHWADDRLEN);
+ case INET_IFOPT_HWADDR: {
+ unsigned int len;
+ buf_check(buf, b_end, 2);
+ len = get_int16(buf); buf += 2;
+ buf_check(buf, b_end, len);
#ifdef SIOCSIFHWADDR
/* raw memcpy (fix include autoconf later) */
- sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, IFHWADDRLEN);
+ sys_memset((char*)(&ifreq.ifr_hwaddr.sa_data),
+ '\0', sizeof(ifreq.ifr_hwaddr.sa_data));
+ sys_memcpy((char*)(&ifreq.ifr_hwaddr.sa_data), buf, len);
(void) ioctl(desc->s, SIOCSIFHWADDR, (char *)&ifreq);
#endif
- buf += IFHWADDRLEN;
+ buf += len;
break;
-
+ }
case INET_IFOPT_BROADADDR:
#ifdef SIOCSIFBRDADDR
@@ -4415,6 +4445,551 @@ static int inet_ctl_ifset(inet_descriptor* desc, char* buf, int len,
#endif
+
+
+/* Latin-1 to utf8 */
+
+static int utf8_len(const char *c, int m) {
+ int l;
+ for (l = 0; m; c++, l++, m--) {
+ if (*c == '\0') break;
+ if ((*c & 0x7f) != *c) l++;
+ }
+ return l;
+}
+
+static void utf8_encode(const char *c, int m, char *p) {
+ for (; m; c++, m--) {
+ if (*c == '\0') break;
+ if ((*c & 0x7f) != *c) {
+ *p++ = (char) (0xC0 | (0x03 & (*c >> 6)));
+ *p++ = (char) (0x80 | (0x3F & *c));
+ } else {
+ *p++ = (char) *c;
+ }
+ }
+}
+
+#if defined(__WIN32__)
+
+static void set_netmask_bytes(char *c, int len, int pref_len) {
+ int i, m;
+ for (i = 0, m = pref_len >> 3; i < m && i < len; i++) c[i] = '\xFF';
+ if (i < len) c[i++] = 0xFF << (8 - (pref_len & 7));
+ for (; i < len; i++) c[i] = '\0';
+}
+
+
+int eq_masked_bytes(char *a, char *b, int pref_len) {
+ int i, m;
+ for (i = 0, m = pref_len >> 3; i < m; i++) {
+ if (a[i] != b[i]) return 0;
+ }
+ m = pref_len & 7;
+ if (m) {
+ m = 0xFF & (0xFF << (8 - m));
+ if ((a[i] & m) != (b[i] & m)) return 0;
+ }
+ return !0;
+}
+
+static int inet_ctl_getifaddrs(inet_descriptor* desc_p,
+ char **rbuf_pp, int rsize)
+{
+ int i;
+ DWORD ret, n;
+ IP_INTERFACE_INFO *info_p;
+ MIB_IPADDRTABLE *ip_addrs_p;
+ IP_ADAPTER_ADDRESSES *ip_adaddrs_p, *ia_p;
+
+ char *buf_p;
+ char *buf_alloc_p;
+ int buf_size =512;
+# define BUF_ENSURE(Size) \
+ do { \
+ int NEED_, GOT_ = buf_p - buf_alloc_p; \
+ NEED_ = GOT_ + (Size); \
+ if (NEED_ > buf_size) { \
+ buf_size = NEED_ + 512; \
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \
+ buf_p = buf_alloc_p + GOT_; \
+ } \
+ } while(0)
+# define SOCKADDR_TO_BUF(opt, sa) \
+ do { \
+ if (sa) { \
+ char *P_; \
+ *buf_p++ = (opt); \
+ while (! (P_ = sockaddr_to_buf((sa), buf_p, \
+ buf_alloc_p+buf_size))) { \
+ int GOT_ = buf_p - buf_alloc_p; \
+ buf_size += 512; \
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \
+ buf_p = buf_alloc_p + GOT_; \
+ } \
+ if (P_ == buf_p) { \
+ buf_p--; \
+ } else { \
+ buf_p = P_; \
+ } \
+ } \
+ } while (0)
+
+ {
+ /* Try GetAdaptersAddresses, if it is available */
+ unsigned long ip_adaddrs_size = 16 * 1024;
+ ULONG family = AF_UNSPEC;
+ ULONG flags =
+ GAA_FLAG_INCLUDE_PREFIX | GAA_FLAG_SKIP_ANYCAST |
+ GAA_FLAG_SKIP_DNS_SERVER | GAA_FLAG_SKIP_FRIENDLY_NAME |
+ GAA_FLAG_SKIP_MULTICAST;
+ ULONG (WINAPI *fpGetAdaptersAddresses)
+ (ULONG, ULONG, PVOID, PIP_ADAPTER_ADDRESSES, PULONG);
+ HMODULE iphlpapi = GetModuleHandle("iphlpapi");
+ fpGetAdaptersAddresses = (void *)
+ (iphlpapi ?
+ GetProcAddress(iphlpapi, "GetAdaptersAddresses") :
+ NULL);
+ if (fpGetAdaptersAddresses) {
+ ip_adaddrs_p = ALLOC(ip_adaddrs_size);
+ for (i = 17; i; i--) {
+ ret = fpGetAdaptersAddresses(
+ family, flags, NULL, ip_adaddrs_p, &ip_adaddrs_size);
+ ip_adaddrs_p = REALLOC(ip_adaddrs_p, ip_adaddrs_size);
+ if (ret == NO_ERROR) break;
+ if (ret == ERROR_BUFFER_OVERFLOW) continue;
+ i = 0;
+ }
+ if (! i) {
+ FREE(ip_adaddrs_p);
+ ip_adaddrs_p = NULL;
+ }
+ } else ip_adaddrs_p = NULL;
+ }
+
+ {
+ /* Load the IP_INTERFACE_INFO table (only IPv4 interfaces),
+ * reliable source of interface names on XP
+ */
+ unsigned long info_size = 4 * 1024;
+ info_p = ALLOC(info_size);
+ for (i = 17; i; i--) {
+ ret = GetInterfaceInfo(info_p, &info_size);
+ info_p = REALLOC(info_p, info_size);
+ if (ret == NO_ERROR) break;
+ if (ret == ERROR_INSUFFICIENT_BUFFER) continue;
+ i = 0;
+ }
+ if (! i) {
+ FREE(info_p);
+ info_p = NULL;
+ }
+ }
+
+ if (! ip_adaddrs_p) {
+ /* If GetAdaptersAddresses gave nothing we fall back to
+ * MIB_IPADDRTABLE (only IPv4 interfaces)
+ */
+ unsigned long ip_addrs_size = 16 * sizeof(*ip_addrs_p);
+ ip_addrs_p = ALLOC(ip_addrs_size);
+ for (i = 17; i; i--) {
+ ret = GetIpAddrTable(ip_addrs_p, &ip_addrs_size, FALSE);
+ ip_addrs_p = REALLOC(ip_addrs_p, ip_addrs_size);
+ if (ret == NO_ERROR) break;
+ if (ret == ERROR_INSUFFICIENT_BUFFER) continue;
+ i = 0;
+ }
+ if (! i) {
+ if (info_p) FREE(info_p);
+ FREE(ip_addrs_p);
+ return ctl_reply(INET_REP_OK, NULL, 0, rbuf_pp, rsize);
+ }
+ } else ip_addrs_p = NULL;
+
+ buf_p = buf_alloc_p = ALLOC(buf_size);
+ *buf_p++ = INET_REP_OK;
+
+ /* Iterate over MIB_IPADDRTABLE or IP_ADAPTER_ADDRESSES */
+ for (ia_p = NULL, ip_addrs_p ? ((void *)(i = 0)) : (ia_p = ip_adaddrs_p);
+ ip_addrs_p ? (i < ip_addrs_p->dwNumEntries) : (ia_p != NULL);
+ ip_addrs_p ? ((void *)(i++)) : (ia_p = ia_p->Next)) {
+ MIB_IPADDRROW *ipaddrrow_p = NULL;
+ DWORD flags = INET_IFF_MULTICAST;
+ DWORD index = 0;
+ WCHAR *wname_p = NULL;
+ MIB_IFROW ifrow;
+
+ if (ip_addrs_p) {
+ ipaddrrow_p = ip_addrs_p->table + i;
+ index = ipaddrrow_p->dwIndex;
+ } else {
+ index = ia_p->IfIndex;
+ if (ia_p->Flags & IP_ADAPTER_NO_MULTICAST) {
+ flags &= ~INET_IFF_MULTICAST;
+ }
+ }
+index:
+ if (! index) goto done;
+ sys_memzero(&ifrow, sizeof(ifrow));
+ ifrow.dwIndex = index;
+ if (GetIfEntry(&ifrow) != NO_ERROR) break;
+ /* Find the interface name - first try MIB_IFROW.wzname */
+ if (ifrow.wszName[0] != 0) {
+ wname_p = ifrow.wszName;
+ } else {
+ /* Then try IP_ADAPTER_INDEX_MAP.Name (only IPv4 adapters) */
+ int j;
+ for (j = 0; j < info_p->NumAdapters; j++) {
+ if (info_p->Adapter[j].Index == (ULONG) ifrow.dwIndex) {
+ if (info_p->Adapter[j].Name[0] != 0) {
+ wname_p = info_p->Adapter[j].Name;
+ }
+ break;
+ }
+ }
+ }
+ if (wname_p) {
+ int len;
+ /* Convert interface name to UTF-8 */
+ len =
+ WideCharToMultiByte(
+ CP_UTF8, 0, wname_p, -1, NULL, 0, NULL, NULL);
+ if (! len) break;
+ BUF_ENSURE(len);
+ WideCharToMultiByte(
+ CP_UTF8, 0, wname_p, -1, buf_p, len, NULL, NULL);
+ buf_p += len;
+ } else {
+ /* Found no name -
+ * use "MIB_IFROW.dwIndex: MIB_IFROW.bDescr" as name instead */
+ int l;
+ l = utf8_len(ifrow.bDescr, ifrow.dwDescrLen);
+ BUF_ENSURE(9 + l+1);
+ buf_p +=
+ erts_sprintf(
+ buf_p, "%lu: ", (unsigned long) ifrow.dwIndex);
+ utf8_encode(ifrow.bDescr, ifrow.dwDescrLen, buf_p);
+ buf_p += l;
+ *buf_p++ = '\0';
+ }
+ /* Interface flags, often make up broadcast and multicast flags */
+ switch (ifrow.dwType) {
+ case IF_TYPE_ETHERNET_CSMACD:
+ flags |= INET_IFF_BROADCAST;
+ break;
+ case IF_TYPE_SOFTWARE_LOOPBACK:
+ flags |= INET_IFF_LOOPBACK;
+ flags &= ~INET_IFF_MULTICAST;
+ break;
+ default:
+ flags &= ~INET_IFF_MULTICAST;
+ break;
+ }
+ if (ifrow.dwAdminStatus) {
+ flags |= INET_IFF_UP;
+ switch (ifrow.dwOperStatus) {
+ case IF_OPER_STATUS_CONNECTING:
+ flags |= INET_IFF_POINTTOPOINT;
+ break;
+ case IF_OPER_STATUS_CONNECTED:
+ flags |= INET_IFF_RUNNING | INET_IFF_POINTTOPOINT;
+ break;
+ case IF_OPER_STATUS_OPERATIONAL:
+ flags |= INET_IFF_RUNNING;
+ break;
+ }
+ }
+ BUF_ENSURE(1 + 4);
+ *buf_p++ = INET_IFOPT_FLAGS;
+ put_int32(flags, buf_p); buf_p += 4;
+ if (ipaddrrow_p) {
+ /* Legacy implementation through GetIpAddrTable */
+ struct sockaddr_in sin;
+ /* IP Address */
+ sys_memzero(&sin, sizeof(sin));
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = ipaddrrow_p->dwAddr;
+ BUF_ENSURE(1);
+ /* Netmask */
+ SOCKADDR_TO_BUF(INET_IFOPT_ADDR, (struct sockaddr *) &sin);
+ sin.sin_addr.s_addr = ipaddrrow_p->dwMask;
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, (struct sockaddr *) &sin);
+ if (flags & INET_IFF_BROADCAST) {
+ /* Broadcast address - fake it*/
+ sin.sin_addr.s_addr = ipaddrrow_p->dwAddr;
+ sin.sin_addr.s_addr |= ~ipaddrrow_p->dwMask;
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(
+ INET_IFOPT_BROADADDR, (struct sockaddr *) &sin);
+ }
+ } else {
+ IP_ADAPTER_UNICAST_ADDRESS *p;
+ /* IP Address(es) */
+ for (p = ia_p->FirstUnicastAddress;
+ p;
+ p = p->Next)
+ {
+ IP_ADAPTER_PREFIX *q;
+ ULONG shortest_length;
+ struct sockaddr *shortest_p, *sa_p = p->Address.lpSockaddr;
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_ADDR, sa_p);
+ shortest_p = NULL;
+ shortest_length = 0;
+ for (q = ia_p->FirstPrefix;
+ q;
+ q = q->Next) {
+ struct sockaddr *sp_p = q->Address.lpSockaddr;
+ if (sa_p->sa_family != sp_p->sa_family) continue;
+ switch (sa_p->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in sin;
+ DWORD sa, sp, mask;
+ sa = ntohl((DWORD)
+ ((struct sockaddr_in *)
+ sa_p)->sin_addr.s_addr);
+ sp = ntohl((DWORD)
+ ((struct sockaddr_in *)
+ sp_p)->sin_addr.s_addr);
+ mask = 0xFFFFFFFF << (32 - q->PrefixLength);
+ if ((sa & mask) != (sp & mask)) continue;
+ if ((! shortest_p)
+ || q->PrefixLength < shortest_length) {
+ shortest_p = sp_p;
+ shortest_length = q->PrefixLength;
+ }
+ } break;
+ case AF_INET6: {
+ struct sockaddr_in6 sin6;
+ if (!eq_masked_bytes((char *)
+ &((struct sockaddr_in6 *)
+ sa_p)->sin6_addr,
+ (char *)
+ &((struct sockaddr_in6 *)
+ sp_p)->sin6_addr,
+ q->PrefixLength)) {
+ continue;
+ }
+ if ((! shortest_p)
+ || q->PrefixLength < shortest_length) {
+ shortest_p = sp_p;
+ shortest_length = q->PrefixLength;
+ }
+ } break;
+ }
+ }
+ if (! shortest_p) {
+ /* Found no shortest prefix */
+ shortest_p = sa_p;
+ switch (shortest_p->sa_family) {
+ case AF_INET: {
+ /* Fall back to old classfull network addresses */
+ DWORD addr = ntohl(((struct sockaddr_in *)shortest_p)
+ ->sin_addr.s_addr);
+ if (! (addr & 0x800000)) {
+ /* Class A */
+ shortest_length = 8;
+ } else if (! (addr & 0x400000)) {
+ /* Class B */
+ shortest_length = 16;
+ } else if (! (addr & 0x200000)) {
+ /* Class C */
+ shortest_length = 24;
+ } else {
+ shortest_length = 32;
+ }
+ } break;
+ case AF_INET6: {
+ /* Just play it safe */
+ shortest_length = 128;
+ } break;
+ }
+ }
+ switch (shortest_p->sa_family) {
+ case AF_INET: {
+ struct sockaddr_in sin;
+ DWORD mask = 0xFFFFFFFF << (32 - shortest_length);
+ sys_memzero(&sin, sizeof(sin));
+ sin.sin_family = shortest_p->sa_family;
+ sin.sin_addr.s_addr = htonl(mask);
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_NETMASK,
+ (struct sockaddr *) &sin);
+ if (flags & INET_IFF_BROADCAST) {
+ DWORD sp =
+ ntohl((DWORD)
+ ((struct sockaddr_in *)shortest_p)
+ -> sin_addr.s_addr);
+ sin.sin_addr.s_addr = htonl(sp | ~mask);
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR,
+ (struct sockaddr *) &sin);
+ }
+ } break;
+ case AF_INET6: {
+ struct sockaddr_in6 sin6;
+ sys_memzero(&sin6, sizeof(sin6));
+ sin6.sin6_family = shortest_p->sa_family;
+ set_netmask_bytes((char *) &sin6.sin6_addr,
+ 16,
+ shortest_length);
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_NETMASK,
+ (struct sockaddr *) &sin6);
+ } break;
+ }
+ }
+ }
+ if (ifrow.dwPhysAddrLen) {
+ /* Hardware Address */
+ BUF_ENSURE(1 + 2 + ifrow.dwPhysAddrLen);
+ *buf_p++ = INET_IFOPT_HWADDR;
+ put_int16(ifrow.dwPhysAddrLen, buf_p); buf_p += 2;
+ sys_memcpy(buf_p, ifrow.bPhysAddr, ifrow.dwPhysAddrLen);
+ buf_p += ifrow.dwPhysAddrLen;
+ }
+
+done:
+ /* That is all for this interface */
+ BUF_ENSURE(1);
+ *buf_p++ = '\0';
+ if (ia_p &&
+ ia_p->Ipv6IfIndex &&
+ ia_p->Ipv6IfIndex != index)
+ {
+ /* Oops, there was an other interface for IPv6. Possible? XXX */
+ index = ia_p->Ipv6IfIndex;
+ goto index;
+ }
+ }
+
+ if (ip_adaddrs_p) FREE(ip_adaddrs_p);
+ if (info_p) FREE(info_p);
+ if (ip_addrs_p) FREE(ip_addrs_p);
+
+ buf_size = buf_p - buf_alloc_p;
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size);
+ /* buf_p is now unreliable */
+ *rbuf_pp = buf_alloc_p;
+ return buf_size;
+# undef BUF_ENSURE
+}
+
+#elif defined(HAVE_GETIFADDRS)
+
+static int inet_ctl_getifaddrs(inet_descriptor* desc_p,
+ char **rbuf_pp, int rsize)
+{
+ struct ifaddrs *ifa_p, *ifa_free_p;
+
+ int buf_size;
+ char *buf_p;
+ char *buf_alloc_p;
+
+ buf_size = 512;
+ buf_alloc_p = ALLOC(buf_size);
+ buf_p = buf_alloc_p;
+# define BUF_ENSURE(Size) \
+ do { \
+ int NEED_, GOT_ = buf_p - buf_alloc_p; \
+ NEED_ = GOT_ + (Size); \
+ if (NEED_ > buf_size) { \
+ buf_size = NEED_ + 512; \
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \
+ buf_p = buf_alloc_p + GOT_; \
+ } \
+ } while (0)
+# define SOCKADDR_TO_BUF(opt, sa) \
+ do { \
+ if (sa) { \
+ char *P_; \
+ *buf_p++ = (opt); \
+ while (! (P_ = sockaddr_to_buf((sa), buf_p, \
+ buf_alloc_p+buf_size))) { \
+ int GOT_ = buf_p - buf_alloc_p; \
+ buf_size += 512; \
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size); \
+ buf_p = buf_alloc_p + GOT_; \
+ } \
+ if (P_ == buf_p) { \
+ buf_p--; \
+ } else { \
+ buf_p = P_; \
+ } \
+ } \
+ } while (0)
+
+ if (getifaddrs(&ifa_p) < 0) {
+ return ctl_error(sock_errno(), rbuf_pp, rsize);
+ }
+ ifa_free_p = ifa_p;
+ *buf_p++ = INET_REP_OK;
+ for (; ifa_p; ifa_p = ifa_p->ifa_next) {
+ int len = utf8_len(ifa_p->ifa_name, -1);
+ BUF_ENSURE(len+1 + 1+4 + 1);
+ utf8_encode(ifa_p->ifa_name, -1, buf_p);
+ buf_p += len;
+ *buf_p++ = '\0';
+ *buf_p++ = INET_IFOPT_FLAGS;
+ put_int32(IFGET_FLAGS(ifa_p->ifa_flags), buf_p); buf_p += 4;
+ if (ifa_p->ifa_addr->sa_family == AF_INET
+#if defined(AF_INET6)
+ || ifa_p->ifa_addr->sa_family == AF_INET6
+#endif
+ ) {
+ SOCKADDR_TO_BUF(INET_IFOPT_ADDR, ifa_p->ifa_addr);
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_NETMASK, ifa_p->ifa_netmask);
+ if (ifa_p->ifa_flags & IFF_POINTOPOINT) {
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_DSTADDR, ifa_p->ifa_dstaddr);
+ } else if (ifa_p->ifa_flags & IFF_BROADCAST) {
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_BROADADDR, ifa_p->ifa_broadaddr);
+ }
+ }
+#if defined(AF_LINK) || defined(AF_PACKET)
+ else if (
+#if defined(AF_LINK)
+ ifa_p->ifa_addr->sa_family == AF_LINK
+#else
+ 0
+#endif
+#if defined(AF_PACKET)
+ || ifa_p->ifa_addr->sa_family == AF_PACKET
+#endif
+ ) {
+ char *bp = buf_p;
+ BUF_ENSURE(1);
+ SOCKADDR_TO_BUF(INET_IFOPT_HWADDR, ifa_p->ifa_addr);
+ if (buf_p - bp < 4) buf_p = bp; /* Empty hwaddr */
+ }
+#endif
+ BUF_ENSURE(1);
+ *buf_p++ = '\0';
+ }
+ buf_size = buf_p - buf_alloc_p;
+ buf_alloc_p = REALLOC(buf_alloc_p, buf_size);
+ /* buf_p is now unreliable */
+ freeifaddrs(ifa_free_p);
+ *rbuf_pp = buf_alloc_p;
+ return buf_size;
+# undef BUF_ENSURE
+}
+
+#else
+
+static int inet_ctl_getifaddrs(inet_descriptor* desc_p,
+ char **rbuf_pp, int rsize)
+{
+ return ctl_error(ENOTSUP, rbuf_pp, rsize);
+}
+
+#endif
+
+
+
#ifdef VXWORKS
/*
** THIS is a terrible creature, a bug in the TCP part
@@ -4576,8 +5151,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
case INET_LOPT_BUFFER:
DEBUGF(("inet_set_opts(%ld): s=%d, BUFFER=%d\r\n",
(long)desc->port, desc->s, ival));
- if (ival > INET_MAX_BUFFER) ival = INET_MAX_BUFFER;
- else if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
+ if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
desc->bufsz = ival;
continue;
@@ -4642,7 +5216,6 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
if (desc->stype == SOCK_STREAM) {
tcp_descriptor* tdesc = (tcp_descriptor*) desc;
if (ival < 0) ival = 0;
- else if (ival > INET_MAX_BUFFER*2) ival = INET_MAX_BUFFER*2;
if (tdesc->low > ival)
tdesc->low = ival;
tdesc->high = ival;
@@ -4653,7 +5226,6 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
if (desc->stype == SOCK_STREAM) {
tcp_descriptor* tdesc = (tcp_descriptor*) desc;
if (ival < 0) ival = 0;
- else if (ival > INET_MAX_BUFFER) ival = INET_MAX_BUFFER;
if (tdesc->high < ival)
tdesc->high = ival;
tdesc->low = ival;
@@ -4999,9 +5571,6 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
case INET_LOPT_BUFFER:
desc->bufsz = get_int32(curr); curr += 4;
- if (desc->bufsz > INET_MAX_BUFFER)
- desc->bufsz = INET_MAX_BUFFER;
- else
if (desc->bufsz < INET_MIN_BUFFER)
desc->bufsz = INET_MIN_BUFFER;
res = 0; /* This does not affect the kernel buffer size */
@@ -5293,12 +5862,15 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
if (pmtud_enable) cflags |= SPP_PMTUD_ENABLE;
if (pmtud_disable) cflags |= SPP_PMTUD_DISABLE;
+# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
+ /* The followings are missing in FreeBSD 7.1 */
sackdelay_enable =eflags& SCTP_FLAG_SACDELAY_ENABLE;
sackdelay_disable=eflags& SCTP_FLAG_SACDELAY_DISABLE;
if (sackdelay_enable && sackdelay_disable)
return -1;
if (sackdelay_enable) cflags |= SPP_SACKDELAY_ENABLE;
if (sackdelay_disable) cflags |= SPP_SACKDELAY_DISABLE;
+# endif
arg.pap.spp_flags = cflags;
# endif
@@ -5436,7 +6008,7 @@ static int inet_fill_opts(inet_descriptor* desc,
#define PLACE_FOR(Size,Ptr) \
do { \
int need = dest_used + (Size); \
- if (need > INET_MAX_BUFFER) { \
+ if (need > INET_MAX_OPT_BUFFER) { \
RETURN_ERROR(); \
} \
if (need > dest_allocated) { \
@@ -5660,7 +6232,7 @@ static int inet_fill_opts(inet_descriptor* desc,
buf += 4;
data_provided = (int) *buf++;
arg_sz = get_int32(buf);
- if (arg_sz > INET_MAX_BUFFER) {
+ if (arg_sz > INET_MAX_OPT_BUFFER) {
RETURN_ERROR();
}
buf += 4;
@@ -5774,7 +6346,7 @@ static int sctp_fill_opts(inet_descriptor* desc, char* buf, int buflen,
"miscalculated buffer size"); \
} \
need = (Index) + (N); \
- if (need > INET_MAX_BUFFER/sizeof(ErlDrvTermData)) { \
+ if (need > INET_MAX_OPT_BUFFER/sizeof(ErlDrvTermData)) {\
RETURN_ERROR((Spec), -ENOMEM); \
} \
if (need > spec_allocated) { \
@@ -6199,13 +6771,15 @@ static int sctp_fill_opts(inet_descriptor* desc, char* buf, int buflen,
if (ap.spp_flags & SPP_PMTUD_DISABLE)
{ i = LOAD_ATOM (spec, i, am_pmtud_disable); n++; }
-
+# ifdef HAVE_STRUCT_SCTP_PADDRPARAMS_SPP_SACKDELAY
+ /* SPP_SACKDELAY_* not in FreeBSD 7.1 */
if (ap.spp_flags & SPP_SACKDELAY_ENABLE)
{ i = LOAD_ATOM (spec, i, am_sackdelay_enable); n++; }
if (ap.spp_flags & SPP_SACKDELAY_DISABLE)
{ i = LOAD_ATOM (spec, i, am_sackdelay_disable); n++; }
# endif
+# endif
PLACE_FOR(spec, i,
LOAD_NIL_CNT + LOAD_LIST_CNT + 2*LOAD_TUPLE_CNT);
@@ -6625,7 +7199,7 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len,
}
}
DEBUGF(("inet_ctl(%ld): GETSTAT\r\n", (long) desc->port));
- if (dstlen > INET_MAX_BUFFER) /* sanity check */
+ if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
return 0;
if (dstlen > rsize) {
if ((dst = (char*) ALLOC(dstlen)) == NULL)
@@ -6641,7 +7215,7 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len,
char* dst;
int dstlen = 1 /* Reply code */ + len*5;
DEBUGF(("inet_ctl(%ld): INET_REQ_SUBSCRIBE\r\n", (long) desc->port));
- if (dstlen > INET_MAX_BUFFER) /* sanity check */
+ if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
return 0;
if (dstlen > rsize) {
if ((dst = (char*) ALLOC(dstlen)) == NULL)
@@ -6676,6 +7250,13 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len,
return inet_ctl_getiflist(desc, rbuf, rsize);
}
+ case INET_REQ_GETIFADDRS: {
+ DEBUGF(("inet_ctl(%ld): GETIFADDRS\r\n", (long)desc->port));
+ if (!IS_OPEN(desc))
+ return ctl_xerror(EXBADPORT, rbuf, rsize);
+ return inet_ctl_getifaddrs(desc, rbuf, rsize);
+ }
+
case INET_REQ_IFGET: {
DEBUGF(("inet_ctl(%ld): IFGET\r\n", (long)desc->port));
if (!IS_OPEN(desc))
diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c
index 04bd1139f5..6de08e2fa6 100644..100755
--- a/erts/emulator/drivers/win32/win_efile.c
+++ b/erts/emulator/drivers/win32/win_efile.c
@@ -862,6 +862,7 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo,
findbuf.nFileSizeLow = 0;
findbuf.cFileName[0] = '\0';
+ pInfo->links = 1;
pInfo->modifyTime.year = 1980;
pInfo->modifyTime.month = 1;
pInfo->modifyTime.day = 1;
@@ -874,6 +875,33 @@ efile_fileinfo(Efile_error* errInfo, Efile_info* pInfo,
SYSTEMTIME SystemTime;
FILETIME LocalFTime;
+ /*first check if we are a symlink */
+ if (!info_for_link && (findbuf.dwFileAttributes &
+ FILE_ATTRIBUTE_REPARSE_POINT)){
+ /*
+ * given that we know this is a symlink,
+ we should be able to find its target */
+ char target_name[256];
+ if (efile_readlink(errInfo, name, target_name,256) == 1) {
+ return efile_fileinfo(errInfo, pInfo,
+ target_name, info_for_link);
+ }
+ }
+
+#if 0
+ /* number of links: */
+ {
+ HANDLE handle; /* Handle returned by CreateFile() */
+ BY_HANDLE_FILE_INFORMATION fileInfo; /* from CreateFile() */
+ if (handle = CreateFile(name, GENERIC_READ, 0,NULL,
+ OPEN_EXISTING, 0, NULL)) {
+ GetFileInformationByHandle(handle, &fileInfo);
+ pInfo->links = fileInfo.nNumberOfLinks;
+ CloseHandle(handle);
+ }
+ }
+#endif
+
#define GET_TIME(dst, src) \
if (!FileTimeToLocalFileTime(&findbuf.src, &LocalFTime) || \
!FileTimeToSystemTime(&LocalFTime, &SystemTime)) { \
@@ -908,7 +936,10 @@ if (!FileTimeToLocalFileTime(&findbuf.src, &LocalFTime) || \
pInfo->size_low = findbuf.nFileSizeLow;
pInfo->size_high = findbuf.nFileSizeHigh;
- if (findbuf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
+ if (info_for_link && (findbuf.dwFileAttributes &
+ FILE_ATTRIBUTE_REPARSE_POINT))
+ pInfo->type = FT_SYMLINK;
+ else if (findbuf.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
pInfo->type = FT_DIRECTORY;
else
pInfo->type = FT_REGULAR;
@@ -919,7 +950,6 @@ if (!FileTimeToLocalFileTime(&findbuf.src, &LocalFTime) || \
pInfo->access = FA_READ|FA_WRITE;
pInfo->mode = dos_to_posix_mode(findbuf.dwFileAttributes, name);
- pInfo->links = 1;
pInfo->major_device = drive;
pInfo->minor_device = 0;
pInfo->inode = 0;
@@ -1082,12 +1112,17 @@ char* buf; /* Buffer to write. */
size_t count; /* Number of bytes to write. */
{
DWORD written; /* Bytes written in last operation. */
+ OVERLAPPED overlapped;
+ OVERLAPPED* pOverlapped = NULL;
if (flags & EFILE_MODE_APPEND) {
- (void) SetFilePointer((HANDLE) fd, 0, NULL, FILE_END);
+ memset(&overlapped, 0, sizeof(overlapped));
+ overlapped.Offset = 0xffffffff;
+ overlapped.OffsetHigh = 0xffffffff;
+ pOverlapped = &overlapped;
}
while (count > 0) {
- if (!WriteFile((HANDLE) fd, buf, count, &written, NULL))
+ if (!WriteFile((HANDLE) fd, buf, count, &written, pOverlapped))
return set_error(errInfo);
buf += written;
count -= written;
@@ -1107,11 +1142,16 @@ efile_writev(Efile_error* errInfo, /* Where to return error codes */
size_t size) /* Number of bytes to write */
{
int cnt; /* Buffers so far written */
+ OVERLAPPED overlapped;
+ OVERLAPPED* pOverlapped = NULL;
ASSERT(iovcnt >= 0);
if (flags & EFILE_MODE_APPEND) {
- (void) SetFilePointer((HANDLE) fd, 0, NULL, FILE_END);
+ memset(&overlapped, 0, sizeof(overlapped));
+ overlapped.Offset = 0xffffffff;
+ overlapped.OffsetHigh = 0xffffffff;
+ pOverlapped = &overlapped;
}
for (cnt = 0; cnt < iovcnt; cnt++) {
if (iov[cnt].iov_base && iov[cnt].iov_len > 0) {
@@ -1123,7 +1163,7 @@ efile_writev(Efile_error* errInfo, /* Where to return error codes */
iov[cnt].iov_base + p,
iov[cnt].iov_len - p,
&w,
- NULL))
+ pOverlapped))
return set_error(errInfo);
}
}
@@ -1343,6 +1383,48 @@ dos_to_posix_mode(int attr, const char *name)
int
efile_readlink(Efile_error* errInfo, char* name, char* buffer, size_t size)
{
+ /*
+ * load dll and see if we have CreateSymbolicLink at runtime:
+ * (Vista only)
+ */
+ HINSTANCE hModule = NULL;
+ if ((hModule = LoadLibrary("kernel32.dll")) != NULL) {
+ typedef DWORD (WINAPI * GETFINALPATHNAMEBYHANDLEPTR)(
+ HANDLE hFile,
+ LPCSTR lpFilePath,
+ DWORD cchFilePath,
+ DWORD dwFlags);
+
+ GETFINALPATHNAMEBYHANDLEPTR pGetFinalPathNameByHandle =
+ (GETFINALPATHNAMEBYHANDLEPTR)GetProcAddress(hModule, "GetFinalPathNameByHandleA");
+
+ if (pGetFinalPathNameByHandle == NULL) {
+ FreeLibrary(hModule);
+ } else {
+ /* first check if file is a symlink; {error, einval} otherwise */
+ DWORD fileAttributes = GetFileAttributes(name);
+ if ((fileAttributes & FILE_ATTRIBUTE_REPARSE_POINT)) {
+ BOOLEAN success = 0;
+ HANDLE h = CreateFile(name, GENERIC_READ, 0,NULL, OPEN_EXISTING, 0, NULL);
+ if(h != INVALID_HANDLE_VALUE) {
+ success = pGetFinalPathNameByHandle(h, buffer, size,0);
+ /* GetFinalPathNameByHandle prepends path with "\\?\": */
+ sprintf(buffer, buffer+4);
+ CloseHandle(h);
+ }
+ FreeLibrary(hModule);
+ if (success) {
+ return 1;
+ } else {
+ return set_error(errInfo);
+ }
+ } else {
+ FreeLibrary(hModule);
+ errno = EINVAL;
+ return check_error(-1, errInfo);
+ }
+ }
+ }
errno = ENOTSUP;
return check_error(-1, errInfo);
}
@@ -1427,13 +1509,46 @@ efile_altname(Efile_error* errInfo, char* orig_name, char* buffer, size_t size)
int
efile_link(Efile_error* errInfo, char* old, char* new)
{
- errno = ENOTSUP;
- return check_error(-1, errInfo);
+ if(!CreateHardLink(new, old, NULL)) {
+ return set_error(errInfo);
+ }
+ return 1;
}
int
efile_symlink(Efile_error* errInfo, char* old, char* new)
{
+ /*
+ * Load dll and see if we have CreateSymbolicLink at runtime:
+ * (Vista only)
+ */
+ HINSTANCE hModule = NULL;
+ if ((hModule = LoadLibrary("kernel32.dll")) != NULL) {
+ typedef BOOLEAN (WINAPI * CREATESYMBOLICLINKFUNCPTR) (
+ LPCSTR lpSymlinkFileName,
+ LPCSTR lpTargetFileName,
+ DWORD dwFlags);
+
+ CREATESYMBOLICLINKFUNCPTR pCreateSymbolicLink =
+ (CREATESYMBOLICLINKFUNCPTR) GetProcAddress(hModule,
+ "CreateSymbolicLinkA");
+ /* A for MBCS, W for UNICODE... char* above implies 'A'! */
+ if (pCreateSymbolicLink != NULL) {
+ DWORD attr = GetFileAttributes(old);
+ int flag = (attr != INVALID_FILE_ATTRIBUTES &&
+ attr & FILE_ATTRIBUTE_DIRECTORY) ? 1 : 0;
+ /* SYMBOLIC_LINK_FLAG_DIRECTORY = 1 */
+ BOOLEAN success = pCreateSymbolicLink(new, old, flag);
+ FreeLibrary(hModule);
+
+ if (success) {
+ return 1;
+ } else {
+ return set_error(errInfo);
+ }
+ } else
+ FreeLibrary(hModule);
+ }
errno = ENOTSUP;
return check_error(-1, errInfo);
}
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index af4ab693dc..01ba773688 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -75,6 +75,7 @@ static erts_smp_rwmtx_t environ_rwmtx;
#include "erl_sys_driver.h"
#include "erl_check_io.h"
+#include "erl_cpu_topology.h"
#ifndef DISABLE_VFORK
#define DISABLE_VFORK 0
@@ -399,7 +400,7 @@ typedef struct {
#ifdef ERTS_THR_HAVE_SIG_FUNCS
sigset_t saved_sigmask;
#endif
- int unbind_child;
+ int sched_bind_data;
} erts_thr_create_data_t;
/*
@@ -410,15 +411,13 @@ static void *
thr_create_prepare(void)
{
erts_thr_create_data_t *tcdp;
- ErtsSchedulerData *esdp;
tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
#ifdef ERTS_THR_HAVE_SIG_FUNCS
erts_thr_sigmask(SIG_BLOCK, &thr_create_sigmask, &tcdp->saved_sigmask);
#endif
- esdp = erts_get_scheduler_data();
- tcdp->unbind_child = esdp && erts_is_scheduler_bound(esdp);
+ tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
return (void *) tcdp;
}
@@ -430,6 +429,8 @@ thr_create_cleanup(void *vtcdp)
{
erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+ erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
#ifdef ERTS_THR_HAVE_SIG_FUNCS
/* Restore signalmask... */
erts_thr_sigmask(SIG_SETMASK, &tcdp->saved_sigmask, NULL);
@@ -456,12 +457,7 @@ thr_create_prepare_child(void *vtcdp)
erts_thread_disable_fpe();
#endif
- if (tcdp->unbind_child) {
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- erts_unbind_from_cpu(erts_cpuinfo);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- }
-
+ erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
}
#endif /* #ifdef USE_THREADS */
@@ -1461,9 +1457,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
CHLD_STAT_LOCK;
- unbind = erts_is_scheduler_bound(NULL);
- if (unbind)
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
+ unbind = erts_sched_bind_atfork_prepare();
#if !DISABLE_VFORK
/* See fork/vfork discussion before this function. */
@@ -1476,7 +1470,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
if (pid == 0) {
/* The child! Setup child... */
- if (unbind && erts_unbind_from_cpu(erts_cpuinfo) != 0)
+ if (erts_sched_bind_atfork_child(unbind) != 0)
goto child_error;
/* OBSERVE!
@@ -1577,8 +1571,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog;
cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : ".";
- cs_argv[CS_ARGV_UNBIND_IX]
- = (unbind ? erts_get_unbind_from_cpu_str(erts_cpuinfo) : "false");
+ cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind);
cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range;
for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++)
cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0];
@@ -1627,8 +1620,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
}
#endif
- if (unbind)
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ erts_sched_bind_atfork_parent(unbind);
if (pid == -1) {
saved_errno = errno;
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 15d4cd7361..39b04b26a9 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -31,7 +31,7 @@
#include "global.h"
#include "erl_threads.h"
#include "../../drivers/win32/win_con.h"
-
+#include "erl_cpu_topology.h"
void erts_sys_init_float(void);
@@ -97,7 +97,7 @@ static int driver_write(long, HANDLE, byte*, int);
static void common_stop(int);
static int create_file_thread(struct async_io* aio, int mode);
#ifdef ERTS_SMP
-static void close_active_handles(ErlDrvPort, const HANDLE* handles, int cnt);
+static void close_active_handle(ErlDrvPort, HANDLE handle);
static DWORD WINAPI threaded_handle_closer(LPVOID param);
#endif
static DWORD WINAPI threaded_reader(LPVOID param);
@@ -137,7 +137,11 @@ static BOOL win_console = FALSE;
static OSVERSIONINFO int_os_version; /* Version information for Win32. */
-#ifdef ERTS_SMP
+/*#define USE_CANCELIOEX
+ Disabled the use of CancelIoEx as its been seen to cause problem with some
+ drivers. Not sure what to blame; faulty drivers or some form of invalid use.
+*/
+#if defined(ERTS_SMP) && defined(USE_CANCELIOEX)
static BOOL (WINAPI *fpCancelIoEx)(HANDLE,LPOVERLAPPED);
#endif
@@ -684,6 +688,7 @@ release_driver_data(DriverData* dp)
erts_smp_mtx_lock(&sys_driver_data_lock);
#ifdef ERTS_SMP
+#ifdef USE_CANCELIOEX
if (fpCancelIoEx != NULL) {
if (dp->in.thread == (HANDLE) -1 && dp->in.fd != INVALID_HANDLE_VALUE) {
(*fpCancelIoEx)(dp->in.fd, NULL);
@@ -692,10 +697,12 @@ release_driver_data(DriverData* dp)
(*fpCancelIoEx)(dp->out.fd, NULL);
}
}
- else {
+ else
+#endif
+ {
/* This is a workaround for the fact that CancelIo cant cancel
requests issued by another thread and that we cant use
- CancelIoEx as that's only availabele in Vista etc.
+ CancelIoEx as that's only available in Vista etc.
R14: Avoid scheduler deadlock by only wait for 10ms, and then spawn
a thread that will keep waiting in in order to close handles. */
HANDLE handles[2];
@@ -706,7 +713,7 @@ release_driver_data(DriverData* dp)
dp->in.fd = INVALID_HANDLE_VALUE;
DEBUGF(("Waiting for the in event thingie"));
if (WaitForSingleObject(dp->in.ov.hEvent,timeout) == WAIT_TIMEOUT) {
- handles[i++] = dp->in.ov.hEvent;
+ close_active_handle(dp->port_num, dp->in.ov.hEvent);
dp->in.ov.hEvent = NULL;
timeout = 0;
}
@@ -717,14 +724,11 @@ release_driver_data(DriverData* dp)
dp->out.fd = INVALID_HANDLE_VALUE;
DEBUGF(("Waiting for the out event thingie"));
if (WaitForSingleObject(dp->out.ov.hEvent,timeout) == WAIT_TIMEOUT) {
- handles[i++] = dp->out.ov.hEvent;
+ close_active_handle(dp->port_num, dp->out.ov.hEvent);
dp->out.ov.hEvent = NULL;
}
DEBUGF(("...done\n"));
}
- if (i > 0) {
- close_active_handles(dp->port_num, handles, i);
- }
}
#else
if (dp->in.thread == (HANDLE) -1 && dp->in.fd != INVALID_HANDLE_VALUE) {
@@ -772,42 +776,82 @@ release_driver_data(DriverData* dp)
#ifdef ERTS_SMP
-struct handles_to_be_closed
-{
- int cnt;
- HANDLE handles[2];
+struct handles_to_be_closed {
+ HANDLE handles[MAXIMUM_WAIT_OBJECTS];
+ unsigned cnt;
};
+static struct handles_to_be_closed* htbc_curr = NULL;
+CRITICAL_SECTION htbc_lock;
-static void close_active_handles(ErlDrvPort port_num, const HANDLE* handles, int cnt)
+static void close_active_handle(ErlDrvPort port_num, HANDLE handle)
{
- DWORD tid;
- HANDLE thread;
+ struct handles_to_be_closed* htbc;
int i;
- struct handles_to_be_closed* htbc = erts_alloc(ERTS_ALC_T_DRV_TAB,
- sizeof(struct handles_to_be_closed));
- htbc->cnt = cnt;
- for (i=0; i < cnt; ++i) {
- htbc->handles[i] = handles[i];
- (void) driver_select(port_num, (ErlDrvEvent)handles[i],
- ERL_DRV_USE_NO_CALLBACK, 0);
+ EnterCriticalSection(&htbc_lock);
+ htbc = htbc_curr;
+ if (htbc == NULL || htbc->cnt >= MAXIMUM_WAIT_OBJECTS) {
+ DWORD tid;
+ HANDLE thread;
+
+ htbc = (struct handles_to_be_closed*) erts_alloc(ERTS_ALC_T_DRV_TAB,
+ sizeof(*htbc));
+ htbc->handles[0] = CreateAutoEvent(FALSE);
+ htbc->cnt = 1;
+ thread = (HANDLE *) _beginthreadex(NULL, 0, threaded_handle_closer, htbc, 0, &tid);
+ CloseHandle(thread);
}
- thread = (HANDLE *) _beginthreadex(NULL, 0, threaded_handle_closer, htbc, 0, &tid);
- CloseHandle(thread);
+ htbc->handles[htbc->cnt++] = handle;
+ driver_select(port_num, (ErlDrvEvent)handle, ERL_DRV_USE_NO_CALLBACK, 0);
+ SetEvent(htbc->handles[0]);
+ htbc_curr = htbc;
+ LeaveCriticalSection(&htbc_lock);
}
-
static DWORD WINAPI
threaded_handle_closer(LPVOID param)
{
struct handles_to_be_closed* htbc = (struct handles_to_be_closed*) param;
- int i;
- DEBUGF(("threaded_handle_closer waiting for %d handles\r\n",htbc->cnt));
- WaitForMultipleObjects(htbc->cnt, htbc->handles, TRUE, INFINITE);
- for (i=0; i < htbc->cnt; ++i) {
- CloseHandle(htbc->handles[i]);
+ unsigned ix;
+ DWORD res;
+ DEBUGF(("threaded_handle_closer %p started\r\n", htbc));
+ EnterCriticalSection(&htbc_lock);
+ for (;;) {
+ {
+ HANDLE* handles = htbc->handles;
+ unsigned cnt = htbc->cnt;
+ DWORD timeout = (htbc == htbc_curr) ? INFINITE : 10*1000;
+
+ LeaveCriticalSection(&htbc_lock);
+ DEBUGF(("threaded_handle_closer %p waiting for %d handles\r\n", htbc, cnt));
+ res = WaitForMultipleObjects(cnt, handles, FALSE, timeout);
+ }
+ EnterCriticalSection(&htbc_lock);
+ switch (res) {
+ case WAIT_OBJECT_0:
+ case WAIT_TIMEOUT:
+ break; /* got some more handles to wait for maybe */
+ default:
+ ix = res - WAIT_OBJECT_0;
+ if (ix > 0 && ix < htbc->cnt) {
+ CloseHandle(htbc->handles[ix]);
+ htbc->handles[ix] = htbc->handles[--htbc->cnt];
+ }
+ }
+ if (htbc != htbc_curr) {
+ if (htbc->cnt == 1) { /* no real handles left */
+ break;
+ }
+ /* The thread with most free slots will be "current" */
+ if (htbc->cnt < htbc_curr->cnt) {
+ htbc_curr = htbc;
+ DEBUGF(("threaded_handle_closer %p made current\r\n", htbc));
+ }
+ }
}
+ LeaveCriticalSection(&htbc_lock);
+ CloseHandle(htbc->handles[0]);
erts_free(ERTS_ALC_T_DRV_TAB, htbc);
- DEBUGF(("threaded_handle_closer terminating\r\n"));
+ DEBUGF(("threaded_handle_closer %p terminating\r\n", htbc));
return 0;
}
#endif /* ERTS_SMP */
@@ -1101,11 +1145,10 @@ static int
spawn_init()
{
int i;
-#ifdef ERTS_SMP
+#if defined(ERTS_SMP) && defined(USE_CANCELIOEX)
HMODULE module = GetModuleHandle("kernel32");
- fpCancelIoEx = (module != NULL) ?
- (BOOL (WINAPI *)(HANDLE,LPOVERLAPPED))
- GetProcAddress(module,"CancelIoEx") : NULL;
+ fpCancelIoEx = (BOOL (WINAPI *)(HANDLE,LPOVERLAPPED))
+ ((module != NULL) ? GetProcAddress(module,"CancelIoEx") : NULL);
DEBUGF(("fpCancelIoEx = %p\r\n", fpCancelIoEx));
#endif
driver_data = (struct driver_data *)
@@ -2973,13 +3016,50 @@ check_supported_os_version(void)
}
#ifdef USE_THREADS
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
+typedef struct {
+ int sched_bind_data;
+} erts_thr_create_data_t;
+
+/*
+ * thr_create_prepare() is called in parent thread before thread creation.
+ * Returned value is passed as argument to thr_create_cleanup().
+ */
+static void *
+thr_create_prepare(void)
+{
+ erts_thr_create_data_t *tcdp;
+
+ tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
+ tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
+
+ return (void *) tcdp;
+}
+
+
+/* thr_create_cleanup() is called in parent thread after thread creation. */
+static void
+thr_create_cleanup(void *vtcdp)
+{
+ erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+ erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
+ erts_free(ERTS_ALC_T_TMP, tcdp);
+}
+
static void
thr_create_prepare_child(void *vtcdp)
{
+ erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_thread_setup();
-}
#endif /* ERTS_ENABLE_LOCK_COUNT */
+
+ erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
+}
+
#endif /* USE_THREADS */
void
@@ -2991,9 +3071,13 @@ erts_sys_pre_init(void)
#ifdef USE_THREADS
{
erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
eid.thread_create_child_func = thr_create_prepare_child;
-#endif
+ /* Before creation in parent */
+ eid.thread_create_prepare_func = thr_create_prepare;
+ /* After creation in parent */
+ eid.thread_create_parent_func = thr_create_cleanup,
+
erts_thr_init(&eid);
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_init();
@@ -3027,6 +3111,7 @@ void erl_sys_init(void)
#ifdef ERTS_SMP
erts_smp_tsd_key_create(&win32_errstr_key);
+ InitializeCriticalSection(&htbc_lock);
#endif
erts_smp_atomic_init(&pipe_creation_counter,0);
/*
diff --git a/erts/emulator/test/distribution_SUITE.erl b/erts/emulator/test/distribution_SUITE.erl
index 7c19274696..79252d0593 100644
--- a/erts/emulator/test/distribution_SUITE.erl
+++ b/erts/emulator/test/distribution_SUITE.erl
@@ -27,6 +27,7 @@
-export([all/1,
ping/1, bulk_send/1, bulk_send_small/1,
bulk_send_big/1,
+ bulk_send_bigbig/1,
local_send/1, local_send_small/1, local_send_big/1,
local_send_legal/1, link_to_busy/1, exit_to_busy/1,
lost_exit/1, link_to_dead/1, link_to_dead_new_node/1,
@@ -50,7 +51,8 @@
-export([sender/3, receiver2/2, dummy_waiter/0, dead_process/0,
roundtrip/1, bounce/1, do_dist_auto_connect/1, inet_rpc_server/1,
dist_parallel_sender/3, dist_parallel_receiver/0,
- dist_evil_parallel_receiver/0]).
+ dist_evil_parallel_receiver/0,
+ sendersender/4, sendersender2/4]).
all(suite) -> [
ping, bulk_send, local_send, link_to_busy, exit_to_busy,
@@ -121,7 +123,7 @@ bulk_send(doc) ->
"the time. This tests that a process that is suspended on a ",
"busy port will eventually be resumed."];
bulk_send(suite) ->
- [bulk_send_small, bulk_send_big].
+ [bulk_send_small, bulk_send_big, bulk_send_bigbig].
bulk_send_small(Config) when is_list(Config) ->
?line bulk_send(64, 32).
@@ -129,6 +131,9 @@ bulk_send_small(Config) when is_list(Config) ->
bulk_send_big(Config) when is_list(Config) ->
?line bulk_send(32, 64).
+bulk_send_bigbig(Config) when is_list(Config) ->
+ ?line bulk_sendsend(32*5, 4).
+
bulk_send(Terms, BinSize) ->
?line Dog = test_server:timetrap(test_server:seconds(30)),
@@ -145,6 +150,53 @@ bulk_send(Terms, BinSize) ->
?line test_server:timetrap_cancel(Dog),
{comment, integer_to_list(trunc(Size/1024/Elapsed+0.5)) ++ " K/s"}.
+bulk_sendsend(Terms, BinSize) ->
+ {Rate1, MonitorCount1} = bulk_sendsend2(Terms, BinSize, 5),
+ {Rate2, MonitorCount2} = bulk_sendsend2(Terms, BinSize, 995),
+ Ratio = if MonitorCount2 == 0 -> MonitorCount1 / 1.0;
+ true -> MonitorCount1 / MonitorCount2
+ end,
+ %% A somewhat arbitrary ratio, but hopefully one that will accomodate
+ %% a wide range of CPU speeds.
+ true = (Ratio > 8.0),
+ {comment,
+ integer_to_list(Rate1) ++ " K/s, " ++
+ integer_to_list(Rate2) ++ " K/s, " ++
+ integer_to_list(MonitorCount1) ++ " monitor msgs, " ++
+ integer_to_list(MonitorCount2) ++ " monitor msgs, " ++
+ float_to_list(Ratio) ++ " monitor ratio"}.
+
+bulk_sendsend2(Terms, BinSize, BusyBufSize) ->
+ ?line Dog = test_server:timetrap(test_server:seconds(30)),
+
+ ?line io:format("Sending ~w binaries, each of size ~w K",
+ [Terms, BinSize]),
+ ?line {ok, NodeRecv} = start_node(bulk_receiver),
+ ?line Recv = spawn(NodeRecv, erlang, apply, [fun receiver/2, [0, 0]]),
+ ?line Bin = list_to_binary(lists:duplicate(BinSize*1024, 253)),
+ ?line Size = Terms*size(Bin),
+
+ %% SLF LEFT OFF HERE.
+ %% When the caller uses small hunks, like 4k via
+ %% bulk_sendsend(32*5, 4), then (on my laptop at least), we get
+ %% zero monitor messages. But if we use "+zdbbl 5", then we
+ %% get a lot of monitor messages. So, if we can count up the
+ %% total number of monitor messages that we get when running both
+ %% default busy size and "+zdbbl 5", and if the 5 case gets
+ %% "many many more" monitor messages, then we know we're working.
+
+ ?line {ok, NodeSend} = start_node(bulk_sender, "+zdbbl " ++ integer_to_list(BusyBufSize)),
+ ?line _Send = spawn(NodeSend, erlang, apply, [fun sendersender/4, [self(), Recv, Bin, Terms]]),
+ ?line {Elapsed, {TermsN, SizeN}, MonitorCount} =
+ receive {sendersender, BigRes} ->
+ BigRes
+ end,
+ ?line stop_node(NodeRecv),
+ ?line stop_node(NodeSend),
+
+ ?line test_server:timetrap_cancel(Dog),
+ {trunc(SizeN/1024/Elapsed+0.5), MonitorCount}.
+
sender(To, _Bin, 0) ->
To ! {done, self()},
receive
@@ -155,6 +207,43 @@ sender(To, Bin, Left) ->
To ! {term, Bin},
sender(To, Bin, Left-1).
+%% Sender process to be run on a slave node
+
+sendersender(Parent, To, Bin, Left) ->
+ erlang:system_monitor(self(), [busy_dist_port]),
+ [spawn(fun() -> sendersender2(To, Bin, Left, false) end) ||
+ _ <- lists:seq(1,1)],
+ {USec, {Res, MonitorCount}} =
+ timer:tc(?MODULE, sendersender2, [To, Bin, Left, true]),
+ Parent ! {sendersender, {USec/1000000, Res, MonitorCount}}.
+
+sendersender2(To, Bin, Left, SendDone) ->
+ sendersender3(To, Bin, Left, SendDone, 0).
+
+sendersender3(To, _Bin, 0, SendDone, MonitorCount) ->
+ if SendDone ->
+ To ! {done, self()};
+ true ->
+ ok
+ end,
+ receive
+ {monitor, _Pid, _Type, _Info} = M ->
+ sendersender3(To, _Bin, 0, SendDone, MonitorCount + 1)
+ after 0 ->
+ if SendDone ->
+ receive
+ Any when is_tuple(Any), size(Any) == 2 ->
+ {Any, MonitorCount}
+ end;
+ true ->
+ exit(normal)
+ end
+ end;
+sendersender3(To, Bin, Left, SendDone, MonitorCount) ->
+ To ! {term, Bin},
+ %%timer:sleep(50),
+ sendersender3(To, Bin, Left-1, SendDone, MonitorCount).
+
%% Receiver process to be run on a slave node.
receiver(Terms, Size) ->
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index f45cfa3e4a..42947aa6be 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -73,7 +73,7 @@ basic(Config) when is_list(Config) ->
?line true = (lib_version() =/= undefined),
?line [{load,1,1,101},{lib_version,1,2,102}] = call_history(),
?line [] = call_history(),
- ?line [?MODULE] = erlang:system_info(taints),
+ ?line true = lists:member(?MODULE, erlang:system_info(taints)),
ok.
reload(doc) -> ["Test reload callback in nif lib"];
@@ -107,7 +107,8 @@ reload(Config) when is_list(Config) ->
?line true = erlang:purge_module(nif_mod),
?line [{unload,1,3,103}] = nif_mod_call_history(),
- ?line [?MODULE, nif_mod] = erlang:system_info(taints),
+ ?line true = lists:member(?MODULE, erlang:system_info(taints)),
+ ?line true = lists:member(nif_mod, erlang:system_info(taints)),
?line verify_tmpmem(TmpMem),
ok.
@@ -197,7 +198,8 @@ upgrade(Config) when is_list(Config) ->
?line true = erlang:purge_module(nif_mod),
?line [{unload,2,4,204}] = nif_mod_call_history(),
- ?line [?MODULE, nif_mod] = erlang:system_info(taints),
+ ?line true = lists:member(?MODULE, erlang:system_info(taints)),
+ ?line true = lists:member(nif_mod, erlang:system_info(taints)),
?line verify_tmpmem(TmpMem),
ok.
@@ -727,7 +729,8 @@ resource_takeover(Config) when is_list(Config) ->
?line ok = forget_resource(AN4),
?line [] = nif_mod_call_history(),
- ?line [?MODULE, nif_mod] = erlang:system_info(taints),
+ ?line true = lists:member(?MODULE, erlang:system_info(taints)),
+ ?line true = lists:member(nif_mod, erlang:system_info(taints)),
?line verify_tmpmem(TmpMem),
ok.
diff --git a/erts/emulator/test/port_SUITE.erl b/erts/emulator/test/port_SUITE.erl
index a7476ca9bb..7fe532abd0 100644
--- a/erts/emulator/test/port_SUITE.erl
+++ b/erts/emulator/test/port_SUITE.erl
@@ -2302,14 +2302,35 @@ load_driver(Dir, Driver) ->
end.
-close_deaf_port(doc) -> ["Send data to port program that does not read it, then close port."];
+close_deaf_port(doc) -> ["Send data to port program that does not read it, then close port."
+ "Primary targeting Windows to test threaded_handle_closer in sys.c"];
close_deaf_port(suite) -> [];
close_deaf_port(Config) when is_list(Config) ->
?line Dog = test_server:timetrap(test_server:seconds(100)),
?line DataDir = ?config(data_dir, Config),
?line DeadPort = os:find_executable("dead_port", DataDir),
-
?line Port = open_port({spawn,DeadPort++" 60"},[]),
?line erlang:port_command(Port,"Hello, can you hear me!?!?"),
?line port_close(Port),
- ok.
+
+ Res = close_deaf_port_1(0, DeadPort),
+ io:format("Waiting for OS procs to terminate...\n"),
+ receive after 5*1000 -> ok end,
+ ?line test_server:timetrap_cancel(Dog),
+ Res.
+
+close_deaf_port_1(1000, _) ->
+ ok;
+close_deaf_port_1(N, Cmd) ->
+ Timeout = integer_to_list(random:uniform(5*1000)),
+ ?line try open_port({spawn_executable,Cmd},[{args,[Timeout]}]) of
+ Port ->
+ ?line erlang:port_command(Port,"Hello, can you hear me!?!?"),
+ ?line port_close(Port),
+ close_deaf_port_1(N+1, Cmd)
+ catch
+ _:eagain ->
+ {comment, "Could not spawn more than " ++ integer_to_list(N) ++ " OS processes."}
+ end.
+
+
diff --git a/erts/emulator/test/port_SUITE_data/dead_port.c b/erts/emulator/test/port_SUITE_data/dead_port.c
index 6fa77112be..68e96fbf14 100644
--- a/erts/emulator/test/port_SUITE_data/dead_port.c
+++ b/erts/emulator/test/port_SUITE_data/dead_port.c
@@ -72,14 +72,14 @@ char *argv[];
{
int x;
if (argc < 2) {
- fprintf(stderr,"Usage %s <seconds>\n",argv[0]);
+ fprintf(stderr,"Usage %s <milliseconds>\n",argv[0]);
return 1;
}
if ((x = atoi(argv[1])) <= 0) {
- fprintf(stderr,"Usage %s <seconds>\n",argv[0]);
+ fprintf(stderr,"Usage %s <milliseconds>\n",argv[0]);
return 1;
}
- delay(x*1000);
+ delay(x);
return 0;
}
diff --git a/erts/emulator/test/send_term_SUITE.erl b/erts/emulator/test/send_term_SUITE.erl
index 5fd01a9ac5..819aa34886 100644
--- a/erts/emulator/test/send_term_SUITE.erl
+++ b/erts/emulator/test/send_term_SUITE.erl
@@ -1,7 +1,7 @@
%%
%% %CopyrightBegin%
%%
-%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2005-2010. All Rights Reserved.
%%
%% The contents of this file are subject to the Erlang Public License,
%% Version 1.1, (the "License"); you may not use this file except in
@@ -61,7 +61,7 @@ basic(Config) when is_list(Config) ->
?line ExpectExt2Term = term(P, 5),
%% ERL_DRV_INT, ERL_DRV_UINT
- ?line case erlang:system_info(wordsize) of
+ ?line case erlang:system_info({wordsize, external}) of
4 ->
?line {-1, 4294967295} = term(P, 6);
8 ->
diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl
index ba433d4e11..cd940f3ddf 100644
--- a/erts/emulator/test/system_info_SUITE.erl
+++ b/erts/emulator/test/system_info_SUITE.erl
@@ -132,6 +132,7 @@ misc_smoke_tests(Config) when is_list(Config) ->
?line true = is_binary(erlang:system_info(procs)),
?line true = is_binary(erlang:system_info(loaded)),
?line true = is_binary(erlang:system_info(dist)),
+ ?line ok = try erlang:system_info({cpu_topology,erts_get_cpu_topology_error_case}), fail catch error:badarg -> ok end,
?line ok.