aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_process.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_process.c')
-rw-r--r--erts/emulator/beam/erl_process.c2979
1 files changed, 1067 insertions, 1912 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 996806fc75..fc950af8ce 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -24,7 +24,6 @@
#endif
#include <stddef.h> /* offsetof() */
-#include <ctype.h>
#include "sys.h"
#include "erl_vm.h"
#include "global.h"
@@ -38,6 +37,8 @@
#include "erl_instrument.h"
#include "erl_threads.h"
#include "erl_binary.h"
+#include "beam_bp.h"
+#include "erl_cpu_topology.h"
#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -45,14 +46,23 @@
#define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10)
-#define ERTS_SCHED_SLEEP_SPINCOUNT 10000
+#define ERTS_SCHED_SPIN_UNTIL_YIELD 100
+
+#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT 10
+#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT 1000
+#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT \
+ (ERTS_SCHED_SYS_SLEEP_SPINCOUNT*ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT)
+#define ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT 0
+
+#define ERTS_WAKEUP_OTHER_LIMIT_VERY_HIGH (200*CONTEXT_REDS)
+#define ERTS_WAKEUP_OTHER_LIMIT_HIGH (50*CONTEXT_REDS)
+#define ERTS_WAKEUP_OTHER_LIMIT_MEDIUM (10*CONTEXT_REDS)
+#define ERTS_WAKEUP_OTHER_LIMIT_LOW (CONTEXT_REDS)
+#define ERTS_WAKEUP_OTHER_LIMIT_VERY_LOW (CONTEXT_REDS/10)
-#define ERTS_WAKEUP_OTHER_LIMIT (100*CONTEXT_REDS/2)
#define ERTS_WAKEUP_OTHER_DEC 10
#define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10)
-#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
-
#if 0 || defined(DEBUG)
#define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
#endif
@@ -91,9 +101,9 @@ do { \
#define ERTS_EMPTY_RUNQ(RQ) \
((RQ)->len == 0 && (RQ)->misc.start == NULL)
-extern Eterm beam_apply[];
-extern Eterm beam_exit[];
-extern Eterm beam_continue_exit[];
+extern BeamInstr beam_apply[];
+extern BeamInstr beam_exit[];
+extern BeamInstr beam_continue_exit[];
static Sint p_last;
static Sint p_next;
@@ -105,6 +115,8 @@ Uint erts_no_schedulers;
Uint erts_max_processes = ERTS_DEFAULT_MAX_PROCESSES;
Uint erts_process_tab_index_mask;
+static int wakeup_other_limit;
+
int erts_sched_thread_suggested_stack_size = -1;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -115,16 +127,34 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
int erts_disable_proc_not_running_opt;
-#define ERTS_SCHED_CHANGING_ONLINE 1
-#define ERTS_SCHED_CHANGING_MULTI_SCHED 2
+#define ERTS_SCHDLR_SSPND_CHNG_WAITER (((long) 1) << 0)
+#define ERTS_SCHDLR_SSPND_CHNG_MSB (((long) 1) << 1)
+#define ERTS_SCHDLR_SSPND_CHNG_ONLN (((long) 1) << 2)
+
+#ifndef DEBUG
+
+#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \
+ erts_smp_atomic_set(&schdlr_sspnd.changing, (VAL))
+
+#else
+
+#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \
+do { \
+ long old_val__ = erts_smp_atomic_xchg(&schdlr_sspnd.changing, \
+ (VAL)); \
+ ASSERT(old_val__ == (OLD_VAL)); \
+} while (0)
+
+#endif
+
static struct {
erts_smp_mtx_t mtx;
erts_smp_cnd_t cnd;
- int changing;
int online;
int curr_online;
int wait_curr_online;
+ erts_smp_atomic_t changing;
erts_smp_atomic_t active;
struct {
erts_smp_atomic_t ongoing;
@@ -159,44 +189,6 @@ do { \
#endif
-/*
- * Cpu topology hierarchy.
- */
-#define ERTS_TOPOLOGY_NODE 0
-#define ERTS_TOPOLOGY_PROCESSOR 1
-#define ERTS_TOPOLOGY_PROCESSOR_NODE 2
-#define ERTS_TOPOLOGY_CORE 3
-#define ERTS_TOPOLOGY_THREAD 4
-#define ERTS_TOPOLOGY_LOGICAL 5
-
-#define ERTS_TOPOLOGY_MAX_DEPTH 6
-
-typedef struct {
- int bind_id;
- int bound_id;
-} ErtsCpuBindData;
-
-static ErtsCpuBindData *scheduler2cpu_map;
-erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
-
-typedef enum {
- ERTS_CPU_BIND_SPREAD,
- ERTS_CPU_BIND_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_THREAD_SPREAD,
- ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
- ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
- ERTS_CPU_BIND_NO_SPREAD,
- ERTS_CPU_BIND_NONE
-} ErtsCpuBindOrder;
-
-ErtsCpuBindOrder cpu_bind_order;
-
-static erts_cpu_topology_t *user_cpudata;
-static int user_cpudata_size;
-static erts_cpu_topology_t *system_cpudata;
-static int system_cpudata_size;
-
erts_sched_stat_t erts_sched_stat;
ErtsRunQueue *erts_common_run_queue;
@@ -219,12 +211,18 @@ ErtsSchedulerData *erts_scheduler_data;
ErtsAlignedRunQueue *erts_aligned_run_queues;
Uint erts_no_run_queues;
+ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+
+#ifdef ERTS_SMP
+
typedef union {
- ErtsSchedulerData esd;
- char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
-} ErtsAlignedSchedulerData;
+ ErtsSchedulerSleepInfo ssi;
+ char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerSleepInfo))];
+} ErtsAlignedSchedulerSleepInfo;
-ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info;
+
+#endif
#ifndef BM_COUNTERS
static int processes_busy;
@@ -283,8 +281,9 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
200,
ERTS_ALC_T_PROC_LIST)
-#define ERTS_RUNQ_IX(IX) (&erts_aligned_run_queues[(IX)].runq)
-#define ERTS_SCHEDULER_IX(IX) (&erts_aligned_scheduler_data[(IX)].esd)
+#define ERTS_SCHED_SLEEP_INFO_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
+ &aligned_sched_sleep_info[(IX)].ssi)
#define ERTS_FOREACH_RUNQ(RQVAR, DO) \
do { \
@@ -334,23 +333,14 @@ do { \
static void init_processes_bif(void);
static void save_terminating_process(Process *p);
static void exec_misc_ops(ErtsRunQueue *);
-static void print_function_from_pc(int to, void *to_arg, Eterm* x);
+static void print_function_from_pc(int to, void *to_arg, BeamInstr* x);
static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
int yreg);
#ifdef ERTS_SMP
static void handle_pending_exiters(ErtsProcList *);
-static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq);
-static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
-
#endif
-static void early_cpu_bind_init(void);
-static void late_cpu_bind_init(void);
-
#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK)
int
erts_smp_lc_runq_is_locked(ErtsRunQueue *runq)
@@ -388,7 +378,12 @@ erts_pre_init_process(void)
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks
= ERTS_PSD_DIST_ENTRY_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks
- = ERTS_PSD_DIST_ENTRY_GET_LOCKS;
+ = ERTS_PSD_DIST_ENTRY_SET_LOCKS;
+
+ erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks
+ = ERTS_PSD_CALL_TIME_BP_GET_LOCKS;
+ erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks
+ = ERTS_PSD_CALL_TIME_BP_SET_LOCKS;
/* Check that we have locks for all entries */
for (ix = 0; ix < ERTS_PSD_SIZE; ix++) {
@@ -401,13 +396,13 @@ erts_pre_init_process(void)
/* initialize the scheduler */
void
-erts_init_process(void)
+erts_init_process(int ncpu)
{
Uint proc_bits = ERTS_PROC_BITS;
#ifdef ERTS_SMP
erts_disable_proc_not_running_opt = 0;
- erts_init_proc_lock();
+ erts_init_proc_lock(ncpu);
#endif
init_proclist_alloc();
@@ -572,6 +567,76 @@ erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data)
#ifdef ERTS_SMP
+void
+erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, long flags)
+{
+ switch (flags & ERTS_SSI_FLGS_SLEEP_TYPE) {
+ case ERTS_SSI_FLG_POLL_SLEEPING:
+ erts_sys_schedule_interrupt(1);
+ break;
+ case ERTS_SSI_FLG_TSE_SLEEPING:
+ erts_tse_set(ssi->event);
+ break;
+ case 0:
+ break;
+ default:
+ erl_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error\n",
+ __FILE__, __LINE__);
+ break;
+ }
+}
+
+#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
+void
+erts_smp_notify_check_children_needed(void)
+{
+ int i;
+
+ for (i = 0; i < erts_no_schedulers; i++) {
+ long aux_work;
+ ErtsSchedulerSleepInfo *ssi;
+ ssi = ERTS_SCHED_SLEEP_INFO_IX(i);
+ aux_work = erts_smp_atomic_bor(&ssi->aux_work,
+ ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+ if (!(aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN))
+ erts_sched_poke(ssi);
+ }
+}
+#endif
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+static ERTS_INLINE long
+blockable_aux_work(ErtsSchedulerData *esdp,
+ ErtsSchedulerSleepInfo *ssi,
+ long aux_work)
+{
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
+ if (aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN) {
+ aux_work = erts_smp_atomic_band(&ssi->aux_work,
+ ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+ aux_work &= ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN;
+ erts_check_children();
+ }
+#endif
+ }
+ return aux_work;
+}
+
+#endif
+
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+static ERTS_INLINE long
+nonblockable_aux_work(ErtsSchedulerData *esdp,
+ ErtsSchedulerSleepInfo *ssi,
+ long aux_work)
+{
+ if (aux_work & ERTS_SSI_NONBLOCKABLE_AUX_WORK_MASK) {
+
+ }
+}
+#endif
+
static void
prepare_for_block(void *vrq)
{
@@ -624,7 +689,31 @@ erts_active_schedulers(void)
return as;
}
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
#ifdef ERTS_SMP
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ }
+ return 0;
+#else
+ return !erts_port_task_have_outstanding_io_tasks();
+#endif
+}
+
+#ifdef ERTS_SMP
+
+static ERTS_INLINE void
+sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq)
+{
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ ASSERT(rq->waiting < 0);
+ rq->waiting *= -1;
+}
static ERTS_INLINE void
sched_waiting(Uint no, ErtsRunQueue *rq)
@@ -666,7 +755,11 @@ empty_runq(ErtsRunQueue *rq)
if (oifls & ERTS_RUNQ_IFLG_NONEMPTY) {
#ifdef DEBUG
long empty = erts_smp_atomic_read(&no_empty_run_queues);
- ASSERT(0 <= empty && empty < erts_no_run_queues);
+ /*
+ * For a short period of time no_empty_run_queues may have
+ * been increased twice for a specific run queue.
+ */
+ ASSERT(0 <= empty && empty < 2*erts_no_run_queues);
#endif
erts_smp_atomic_inc(&no_empty_run_queues);
}
@@ -679,242 +772,422 @@ non_empty_runq(ErtsRunQueue *rq)
if (!(oifls & ERTS_RUNQ_IFLG_NONEMPTY)) {
#ifdef DEBUG
long empty = erts_smp_atomic_read(&no_empty_run_queues);
- ASSERT(0 < empty && empty <= erts_no_run_queues);
+ /*
+ * For a short period of time no_empty_run_queues may have
+ * been increased twice for a specific run queue.
+ */
+ ASSERT(0 < empty && empty <= 2*erts_no_run_queues);
#endif
erts_smp_atomic_dec(&no_empty_run_queues);
}
}
-static ERTS_INLINE int
-sched_spin_wake(ErtsRunQueue *rq)
+static long
+sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi)
{
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- return 0;
-#else
- long val;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING);
+ long xflgs = 0;
- val = erts_smp_atomic_read(&rq->spin_waiter);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_atomic_inc(&rq->spin_wake);
- return 1;
- }
- return 0;
-#endif
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ } while (!(oflgs & ERTS_SSI_FLG_SUSPENDED));
+ return oflgs;
}
-static ERTS_INLINE int
-sched_spin_wake_all(ErtsRunQueue *rq)
+static long
+sched_prep_cont_spin_wait(ErtsSchedulerSleepInfo *ssi)
{
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- return 0;
-#else
- long val;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING);
+ long xflgs = ERTS_SSI_FLG_WAITING;
- val = erts_smp_atomic_read(&rq->spin_waiter);
- ASSERT(val >= 0);
- if (val != 0)
- erts_smp_atomic_add(&rq->spin_wake, val);
- return val;
-#endif
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
+ } while (oflgs & ERTS_SSI_FLG_WAITING);
+ return oflgs;
}
+static long
+sched_spin_wait(ErtsSchedulerSleepInfo *ssi, int spincount)
+{
+ long until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ int sc = spincount;
+ long flgs;
+
+ do {
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if ((flgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
+ != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
+ break;
+ }
+ ERTS_SPIN_BODY;
+ if (--until_yield == 0) {
+ until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ erts_thr_yield();
+ }
+ } while (--sc > 0);
+ return flgs;
+}
+
+static long
+sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, long sleep_type)
+{
+ long oflgs;
+ long nflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING|sleep_type;
+ long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
+
+ if (sleep_type == ERTS_SSI_FLG_TSE_SLEEPING)
+ erts_tse_reset(ssi->event);
+
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ if ((oflgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
+ != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
+ return oflgs;
+ }
+ xflgs = oflgs;
+ nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
+ }
+}
+
+#define ERTS_SCHED_WAIT_WOKEN(FLGS) \
+ (((FLGS) & (ERTS_SSI_FLG_WAITING|ERTS_SSI_FLG_SUSPENDED)) \
+ != ERTS_SSI_FLG_WAITING)
+
static void
-sched_sys_wait(Uint no, ErtsRunQueue *rq)
+scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
- long dt;
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- int val;
- int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
+ int spincount;
+ long flgs;
+#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
+ long aux_work;
+#endif
+
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ erts_smp_spin_lock(&rq->sleepers.lock);
+ flgs = sched_prep_spin_wait(ssi);
+ if (flgs & ERTS_SSI_FLG_SUSPENDED) {
+ /* Go suspend instead... */
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+ return;
+ }
+
+ ssi->prev = NULL;
+ ssi->next = rq->sleepers.list;
+ if (rq->sleepers.list)
+ rq->sleepers.list->prev = ssi;
+ rq->sleepers.list = ssi;
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+
+ /*
+ * If all schedulers are waiting, one of them *should*
+ * be waiting in erl_sys_schedule()
+ */
+
+ if (!prepare_for_sys_schedule()) {
+
+ sched_waiting(esdp->no, rq);
+
+ erts_smp_runq_unlock(rq);
+
+ spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
+
+ tse_wait:
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ tse_blockable_aux_work:
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
#endif
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
- sched_waiting_sys(no, rq);
+ while (1) {
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- erts_smp_atomic_inc(&rq->spin_waiter);
- erts_smp_runq_unlock(rq);
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
- erl_sys_schedule(1); /* Might give us something to do */
+ flgs = sched_spin_wait(ssi, spincount);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ int res;
+ ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ do {
+ res = erts_tse_wait(ssi->event);
+ } while (res == EINTR);
+ }
+ }
- dt = do_time_read_and_reset();
- if (dt) bump_timer(dt);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ break;
+ }
+
+ flgs = sched_prep_cont_spin_wait(ssi);
+ spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
+
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ break;
+ }
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ goto tse_blockable_aux_work;
+ }
+#endif
- while (spincount-- > 0) {
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_runq_lock(rq);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0)
- goto woken;
- if (spincount == 0)
- goto sleep;
- erts_smp_runq_unlock(rq);
}
- }
- erts_smp_runq_lock(rq);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- woken:
- erts_smp_atomic_dec(&rq->spin_wake);
- ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+ if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
+ erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+
+ erts_smp_runq_lock(rq);
+ sched_active(esdp->no, rq);
+
}
else {
- sleep:
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
+ long dt;
+
+ erts_smp_atomic_set(&function_calls, 0);
+ *fcalls = 0;
+
+ sched_waiting_sys(esdp->no, rq);
+
+ erts_smp_runq_unlock(rq);
+
+ spincount = ERTS_SCHED_SYS_SLEEP_SPINCOUNT;
+
+ while (spincount-- > 0) {
+
+ sys_poll_aux_work:
+
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
+ erl_sys_schedule(1); /* Might give us something to do */
+
+ dt = do_time_read_and_reset();
+ if (dt) bump_timer(dt);
+
+ sys_aux_work:
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
+#endif
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
+
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ }
+
+ /*
+ * If we got new I/O tasks we aren't allowed to
+ * call erl_sys_schedule() until it is handled.
+ */
+ if (erts_port_task_have_outstanding_io_tasks()) {
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ /*
+ * Got to check that we still got I/O tasks; otherwise
+ * we have to continue checking for I/O...
+ */
+ if (!prepare_for_sys_schedule()) {
+ spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
+ goto tse_wait;
+ }
+ }
+ }
+
+ erts_smp_runq_lock(rq);
+
/*
* If we got new I/O tasks we aren't allowed to
* sleep in erl_sys_schedule().
*/
- if (!erts_port_task_have_outstanding_io_tasks()) {
-#endif
+ if (erts_port_task_have_outstanding_io_tasks()) {
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ /*
+ * Got to check that we still got I/O tasks; otherwise
+ * we have to wait in erl_sys_schedule() after all...
+ */
+ if (prepare_for_sys_schedule())
+ goto do_sys_schedule;
+
+ /*
+ * Not allowed to wait in erl_sys_schedule;
+ * do tse wait instead...
+ */
+ sched_change_waiting_sys_to_waiting(esdp->no, rq);
+ erts_smp_runq_unlock(rq);
+ spincount = 0;
+ goto tse_wait;
+ }
+ else {
+ do_sys_schedule:
erts_sys_schedule_interrupt(0);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
+ if (!(flgs & ERTS_SSI_FLG_WAITING))
+ goto sys_locked_woken;
+ erts_smp_runq_unlock(rq);
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+ goto sys_poll_aux_work;
+ }
+
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+
erts_smp_runq_unlock(rq);
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
erl_sys_schedule(0);
dt = do_time_read_and_reset();
if (dt) bump_timer(dt);
- erts_smp_runq_lock(rq);
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (flgs & ERTS_SSI_FLG_WAITING)
+ goto sys_aux_work;
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
+ sys_woken:
+ erts_smp_runq_lock(rq);
+ sys_locked_woken:
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
+ erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+ sched_active_sys(esdp->no, rq);
}
}
-#endif
- sched_active_sys(no, rq);
-}
-
-static void
-sched_cnd_wait(Uint no, ErtsRunQueue *rq)
-{
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- int val;
- int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
-#endif
-
- sched_waiting(no, rq);
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- prepare_for_block,
- resume_after_block,
- (void *) rq);
-
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
-#else
- erts_smp_atomic_inc(&rq->spin_waiter);
- erts_smp_mtx_unlock(&rq->mtx);
-
- while (spincount-- > 0) {
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_mtx_lock(&rq->mtx);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0)
- goto woken;
- if (spincount == 0)
- goto sleep;
- erts_smp_mtx_unlock(&rq->mtx);
- }
- }
-
- erts_smp_mtx_lock(&rq->mtx);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val == 0) {
- sleep:
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
- erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
- }
- else {
- woken:
- erts_smp_atomic_dec(&rq->spin_wake);
- ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
- }
-#endif
-
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- prepare_for_block,
- resume_after_block,
- (void *) rq);
-
- sched_active(no, rq);
}
-static void
-wake_one_scheduler(void)
-{
- ASSERT(erts_common_run_queue);
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(erts_common_run_queue));
- if (erts_common_run_queue->waiting) {
- if (!sched_spin_wake(erts_common_run_queue)) {
- if (erts_common_run_queue->waiting == -1) /* One scheduler waiting
- and doing so in
- sys_schedule */
- erts_sys_schedule_interrupt(1);
- else
- erts_smp_cnd_signal(&erts_common_run_queue->cnd);
- }
+static ERTS_INLINE long
+ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi)
+{
+ /* reset all flags but suspended */
+ long oflgs;
+ long nflgs = 0;
+ long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return oflgs;
+ nflgs = oflgs & ERTS_SSI_FLG_SUSPENDED;
+ xflgs = oflgs;
}
}
static void
-wake_scheduler(ErtsRunQueue *rq, int incq)
+wake_scheduler(ErtsRunQueue *rq, int incq, int one)
{
- ASSERT(!erts_common_run_queue);
- ASSERT(-1 <= rq->waiting && rq->waiting <= 1);
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
- if (rq->waiting && !rq->woken) {
- if (!sched_spin_wake(rq)) {
- if (rq->waiting < 0)
- erts_sys_schedule_interrupt(1);
- else
- erts_smp_cnd_signal(&rq->cnd);
+ int res;
+ ErtsSchedulerSleepInfo *ssi;
+ ErtsSchedulerSleepList *sl;
+
+ /*
+ * The unlocked run queue is not strictly necessary
+ * from a thread safety or deadlock prevention
+ * perspective. It will, however, cost us performance
+ * if it is locked during wakup of another scheduler,
+ * so all code *should* handle this without having
+ * the lock on the run queue.
+ */
+ ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq));
+
+ sl = &rq->sleepers;
+
+ erts_smp_spin_lock(&sl->lock);
+ ssi = sl->list;
+ if (!ssi)
+ erts_smp_spin_unlock(&sl->lock);
+ else if (one) {
+ long flgs;
+ if (ssi->prev)
+ ssi->prev->next = ssi->next;
+ else {
+ ASSERT(sl->list == ssi);
+ sl->list = ssi->next;
}
- rq->woken = 1;
- if (incq)
+ if (ssi->next)
+ ssi->next->prev = ssi->prev;
+
+ res = sl->list != NULL;
+ erts_smp_spin_unlock(&sl->lock);
+
+ flgs = ssi_flags_set_wake(ssi);
+ erts_sched_finish_poke(ssi, flgs);
+
+ if (incq && !erts_common_run_queue && (flgs & ERTS_SSI_FLG_WAITING))
non_empty_runq(rq);
}
+ else {
+ sl->list = NULL;
+ erts_smp_spin_unlock(&sl->lock);
+ do {
+ ErtsSchedulerSleepInfo *wake_ssi = ssi;
+ ssi = ssi->next;
+ erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
+ } while (ssi);
+ }
}
static void
wake_all_schedulers(void)
{
- if (erts_common_run_queue) {
- erts_smp_runq_lock(erts_common_run_queue);
- if (erts_common_run_queue->waiting) {
- if (erts_common_run_queue->waiting < 0)
- erts_sys_schedule_interrupt(1);
- sched_spin_wake_all(erts_common_run_queue);
- erts_smp_cnd_broadcast(&erts_common_run_queue->cnd);
- }
- erts_smp_runq_unlock(erts_common_run_queue);
- }
+ if (erts_common_run_queue)
+ wake_scheduler(erts_common_run_queue, 0, 0);
else {
int ix;
for (ix = 0; ix < erts_no_run_queues; ix++) {
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
- erts_smp_runq_lock(rq);
- wake_scheduler(rq, 0);
- erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
}
}
}
@@ -929,14 +1202,14 @@ chk_wake_sched(ErtsRunQueue *crq, int ix, int activate)
wrq = ERTS_RUNQ_IX(ix);
iflgs = erts_smp_atomic_read(&wrq->info_flags);
if (!(iflgs & (ERTS_RUNQ_IFLG_SUSPENDED|ERTS_RUNQ_IFLG_NONEMPTY))) {
- erts_smp_xrunq_lock(crq, wrq);
if (activate) {
if (ix == erts_smp_atomic_cmpxchg(&balance_info.active_runqs, ix+1, ix)) {
+ erts_smp_xrunq_lock(crq, wrq);
wrq->flags &= ~ERTS_RUNQ_FLG_INACTIVE;
+ erts_smp_xrunq_unlock(crq, wrq);
}
}
- wake_scheduler(wrq, 0);
- erts_smp_xrunq_unlock(crq, wrq);
+ wake_scheduler(wrq, 0, 1);
return 1;
}
return 0;
@@ -982,19 +1255,42 @@ static ERTS_INLINE void
smp_notify_inc_runq(ErtsRunQueue *runq)
{
#ifdef ERTS_SMP
- if (erts_common_run_queue)
- wake_one_scheduler();
- else
- wake_scheduler(runq, 1);
+ if (runq)
+ wake_scheduler(runq, 1, 1);
#endif
}
void
-erts_smp_notify_inc_runq__(ErtsRunQueue *runq)
+erts_smp_notify_inc_runq(ErtsRunQueue *runq)
{
smp_notify_inc_runq(runq);
}
+void
+erts_sched_notify_check_cpu_bind(void)
+{
+#ifdef ERTS_SMP
+ int ix;
+ if (erts_common_run_queue) {
+ for (ix = 0; ix < erts_no_schedulers; ix++)
+ erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1);
+ wake_all_schedulers();
+ }
+ else {
+ for (ix = 0; ix < erts_no_run_queues; ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+ erts_smp_runq_lock(rq);
+ rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
+ };
+ }
+#else
+ erts_sched_check_cpu_bind(erts_get_scheduler_data());
+#endif
+}
+
+
#ifdef ERTS_SMP
ErtsRunQueue *
@@ -1136,20 +1432,23 @@ static void
evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
{
Port *prt;
+ int notify_to_rq = 0;
int prio;
int prt_locked = 0;
int rq_locked = 0;
int evac_rq_locked = 1;
+ ErtsMigrateResult mres;
erts_smp_runq_lock(evac_rq);
+ erts_smp_atomic_bor(&evac_rq->scheduler->ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+
evac_rq->flags &= ~ERTS_RUNQ_FLGS_IMMIGRATE_QMASK;
evac_rq->flags |= (ERTS_RUNQ_FLGS_EMIGRATE_QMASK
| ERTS_RUNQ_FLGS_EVACUATE_QMASK
| ERTS_RUNQ_FLG_SUSPENDED);
erts_smp_atomic_bor(&evac_rq->info_flags, ERTS_RUNQ_IFLG_SUSPENDED);
-
/*
* Need to set up evacuation paths first since we
* may release the run queue lock on evac_rq
@@ -1177,9 +1476,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
/* Evacuate scheduled ports */
prt = evac_rq->ports.start;
while (prt) {
- (void) erts_port_migrate(prt, &prt_locked,
+ mres = erts_port_migrate(prt, &prt_locked,
evac_rq, &evac_rq_locked,
rq, &rq_locked);
+ if (mres == ERTS_MIGRATE_SUCCESS)
+ notify_to_rq = 1;
if (prt_locked)
erts_smp_port_unlock(prt);
if (!evac_rq_locked) {
@@ -1208,9 +1509,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
goto end_of_proc;
}
- (void) erts_proc_migrate(proc, &proc_locks,
+ mres = erts_proc_migrate(proc, &proc_locks,
evac_rq, &evac_rq_locked,
rq, &rq_locked);
+ if (mres == ERTS_MIGRATE_SUCCESS)
+ notify_to_rq = 1;
if (proc_locks)
erts_smp_proc_unlock(proc, proc_locks);
if (!evac_rq_locked) {
@@ -1242,10 +1545,13 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
if (rq_locked)
erts_smp_runq_unlock(rq);
- if (!evac_rq_locked)
- erts_smp_runq_lock(evac_rq);
- wake_scheduler(evac_rq, 0);
- erts_smp_runq_unlock(evac_rq);
+ if (evac_rq_locked)
+ erts_smp_runq_unlock(evac_rq);
+
+ if (notify_to_rq)
+ smp_notify_inc_runq(rq);
+
+ wake_scheduler(evac_rq, 0, 1);
}
static int
@@ -1473,31 +1779,6 @@ try_steal_task(ErtsRunQueue *rq)
return res;
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
-void
-erts_smp_notify_check_children_needed(void)
-{
- int i;
- for (i = 0; i < erts_no_schedulers; i++) {
- erts_smp_runq_lock(ERTS_SCHEDULER_IX(i)->run_queue);
- ERTS_SCHEDULER_IX(i)->check_children = 1;
- if (!erts_common_run_queue)
- wake_scheduler(ERTS_SCHEDULER_IX(i)->run_queue, 0);
- erts_smp_runq_unlock(ERTS_SCHEDULER_IX(i)->run_queue);
- }
- if (ongoing_multi_scheduling_block()) {
- /* Also blocked schedulers need to check children */
- erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- for (i = 0; i < erts_no_schedulers; i++)
- ERTS_SCHEDULER_IX(i)->blocked_check_children = 1;
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- }
- if (erts_common_run_queue)
- wake_all_schedulers();
-}
-#endif
-
/* Run queue balancing */
typedef struct {
@@ -1561,6 +1842,9 @@ do { \
static void
check_balance(ErtsRunQueue *c_rq)
{
+#if ERTS_MAX_PROCESSES >= (1 << 27)
+# error check_balance() assumes ERTS_MAX_PROCESS < (1 << 27)
+#endif
ErtsRunQueueBalance avg = {0};
Sint64 scheds_reds, full_scheds_reds;
int forced, active, current_active, oowc, half_full_scheds, full_scheds,
@@ -1684,12 +1968,14 @@ check_balance(ErtsRunQueue *c_rq)
run_queue_info[qix].prio[pix].avail = 0;
}
else {
- int xreds = 0;
- int procreds = treds;
- procreds -= run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds;
+ Sint64 xreds = 0;
+ Sint64 procreds = treds;
+ procreds -=
+ ((Sint64)
+ run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds);
for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) {
- int av;
+ Sint64 av;
if (xreds == 0)
av = 100;
@@ -1700,9 +1986,10 @@ check_balance(ErtsRunQueue *c_rq)
if (av == 0)
av = 1;
}
- run_queue_info[qix].prio[pix].avail = av;
+ run_queue_info[qix].prio[pix].avail = (int) av;
+ ASSERT(run_queue_info[qix].prio[pix].avail >= 0);
if (pix < PRIORITY_NORMAL) /* ie., max or high */
- xreds += run_queue_info[qix].prio[pix].reds;
+ xreds += (Sint64) run_queue_info[qix].prio[pix].reds;
}
run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].avail = 100;
}
@@ -1807,7 +2094,8 @@ check_balance(ErtsRunQueue *c_rq)
if (max_len != 0) {
int avail = avg.prio[pix].avail;
if (avail != 0) {
- max_len = ((100*max_len - 1) / avail) + 1;
+ max_len = (int) ((100*((Sint64) max_len) - 1)
+ / ((Sint64) avail)) + 1;
avg.prio[pix].max_len = max_len;
ASSERT(max_len >= 0);
}
@@ -1824,9 +2112,10 @@ check_balance(ErtsRunQueue *c_rq)
|| run_queue_info[qix].prio[pix].avail == 0)
limit = 0;
else
- limit = (((avg.prio[pix].max_len
- * run_queue_info[qix].prio[pix].avail) - 1)
- / 100 + 1);
+ limit = (int) (((((Sint64) avg.prio[pix].max_len)
+ * ((Sint64) run_queue_info[qix].prio[pix].avail))
+ - 1)
+ / 100 + 1);
run_queue_info[qix].prio[pix].migration_limit = limit;
}
}
@@ -2054,8 +2343,27 @@ erts_debug_nbalance(void)
void
erts_early_init_scheduling(void)
{
- early_cpu_bind_init();
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
+}
+
+int
+erts_sched_set_wakeup_limit(char *str)
+{
+ if (sys_strcmp(str, "very_high") == 0)
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_VERY_HIGH;
+ else if (sys_strcmp(str, "high") == 0)
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_HIGH;
+ else if (sys_strcmp(str, "medium") == 0)
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
+ else if (sys_strcmp(str, "low") == 0)
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_LOW;
+ else if (sys_strcmp(str, "very_low") == 0)
+ wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_VERY_LOW;
+ else
+ return EINVAL;
+ return 0;
}
+
void
erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
@@ -2078,16 +2386,20 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_aligned_run_queues = erts_alloc(ERTS_ALC_T_RUNQS,
(sizeof(ErtsAlignedRunQueue)*(n+1)));
- if ((((Uint) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0)
+ if ((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) != 0)
erts_aligned_run_queues = ((ErtsAlignedRunQueue *)
- ((((Uint) erts_aligned_run_queues)
+ ((((UWord) erts_aligned_run_queues)
& ~ERTS_CACHE_LINE_MASK)
+ ERTS_CACHE_LINE_SIZE));
+ ASSERT((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0);
+
#ifdef ERTS_SMP
erts_smp_atomic_init(&no_empty_run_queues, 0);
#endif
+ erts_no_run_queues = n;
+
for (ix = 0; ix < n; ix++) {
int pix, rix;
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
@@ -2102,8 +2414,10 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1));
erts_smp_cnd_init(&rq->cnd);
- erts_smp_atomic_init(&rq->spin_waiter, 0);
- erts_smp_atomic_init(&rq->spin_wake, 0);
+#ifdef ERTS_SMP
+ erts_smp_spinlock_init(&rq->sleepers.lock, "run_queue_sleep_list");
+ rq->sleepers.list = NULL;
+#endif
rq->waiting = 0;
rq->woken = 0;
@@ -2154,7 +2468,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
}
erts_common_run_queue = !mrq ? ERTS_RUNQ_IX(0) : NULL;
- erts_no_run_queues = n;
#ifdef ERTS_SMP
@@ -2169,23 +2482,59 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
#endif
+ n = (int) no_schedulers;
+ erts_no_schedulers = n;
+
+#ifdef ERTS_SMP
+ /* Create and initialize scheduler sleep info */
+
+ aligned_sched_sleep_info = erts_alloc(ERTS_ALC_T_SCHDLR_SLP_INFO,
+ (sizeof(ErtsAlignedSchedulerSleepInfo)
+ *(n+1)));
+ if ((((UWord) aligned_sched_sleep_info) & ERTS_CACHE_LINE_MASK) == 0)
+ aligned_sched_sleep_info = ((ErtsAlignedSchedulerSleepInfo *)
+ ((((UWord) aligned_sched_sleep_info)
+ & ~ERTS_CACHE_LINE_MASK)
+ + ERTS_CACHE_LINE_SIZE));
+ for (ix = 0; ix < n; ix++) {
+ ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+#if 0 /* no need to initialize these... */
+ ssi->next = NULL;
+ ssi->prev = NULL;
+#endif
+ erts_smp_atomic_init(&ssi->flags, 0);
+ ssi->event = NULL; /* initialized in sched_thread_func */
+ erts_smp_atomic_init(&ssi->aux_work, 0);
+ }
+#endif
+
/* Create and initialize scheduler specific data */
- n = (int) no_schedulers;
erts_aligned_scheduler_data = erts_alloc(ERTS_ALC_T_SCHDLR_DATA,
(sizeof(ErtsAlignedSchedulerData)
*(n+1)));
- if ((((Uint) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0)
+ if ((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) != 0)
erts_aligned_scheduler_data = ((ErtsAlignedSchedulerData *)
- ((((Uint) erts_aligned_scheduler_data)
+ ((((UWord) erts_aligned_scheduler_data)
& ~ERTS_CACHE_LINE_MASK)
+ ERTS_CACHE_LINE_SIZE));
+
+ ASSERT((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0);
+
for (ix = 0; ix < n; ix++) {
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix);
#ifdef ERTS_SMP
erts_bits_init_state(&esdp->erl_bits_state);
esdp->match_pseudo_process = NULL;
+ esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
esdp->free_process = NULL;
+#if HALFWORD_HEAP
+ /* Registers need to be heap allocated (correct memory range) for tracing to work */
+ esdp->save_reg = erts_alloc(ERTS_ALC_T_BEAM_REGISTER, ERTS_X_REGS_ALLOCATED * sizeof(Eterm));
+#endif
+#endif
+#if !HEAP_ON_C_STACK
+ esdp->num_tmp_heap_used = 0;
#endif
esdp->no = (Uint) ix+1;
esdp->current_process = NULL;
@@ -2206,11 +2555,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
}
#ifdef ERTS_SMP
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- esdp->check_children = 0;
- esdp->blocked_check_children = 0;
-#endif
- erts_smp_atomic_init(&esdp->suspended, 0);
erts_smp_atomic_init(&esdp->chk_cpu_bind, 0);
#endif
}
@@ -2219,7 +2563,7 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd");
erts_smp_cnd_init(&schdlr_sspnd.cnd);
- schdlr_sspnd.changing = 0;
+ erts_smp_atomic_init(&schdlr_sspnd.changing, 0);
schdlr_sspnd.online = no_schedulers_online;
schdlr_sspnd.curr_online = no_schedulers;
erts_smp_atomic_init(&schdlr_sspnd.msb.ongoing, 0);
@@ -2242,7 +2586,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
if (no_schedulers_online < no_schedulers) {
if (erts_common_run_queue) {
for (ix = no_schedulers_online; ix < no_schedulers; ix++)
- erts_smp_atomic_set(&(ERTS_SCHEDULER_IX(ix)->suspended), 1);
+ erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ERTS_SSI_FLG_SUSPENDED);
}
else {
for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++)
@@ -2253,7 +2598,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
schdlr_sspnd.wait_curr_online = no_schedulers_online;
schdlr_sspnd.curr_online *= 2; /* Boot strapping... */
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
erts_smp_atomic_init(&doing_sys_schedule, 0);
@@ -2273,8 +2619,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
/* init port tasks */
erts_port_task_init();
-
- late_cpu_bind_init();
}
ErtsRunQueue *
@@ -2401,13 +2745,113 @@ susp_sched_resume_block(void *unused)
}
static void
+scheduler_ix_resume_wake(Uint ix)
+{
+ ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+ long xflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long oflgs;
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, 0, xflgs);
+ if (oflgs == xflgs) {
+ erts_sched_finish_poke(ssi, oflgs);
+ break;
+ }
+ xflgs = oflgs;
+ } while (oflgs & ERTS_SSI_FLG_SUSPENDED);
+}
+
+static long
+sched_prep_spin_suspended(ErtsSchedulerSleepInfo *ssi, long xpct)
+{
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long xflgs = xpct;
+
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ } while (oflgs & ERTS_SSI_FLG_SUSPENDED);
+
+ return oflgs;
+}
+
+static long
+sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount)
+{
+ int until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ int sc = spincount;
+ long flgs;
+
+ do {
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if ((flgs & (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED))
+ != (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ break;
+ }
+ ERTS_SPIN_BODY;
+ if (--until_yield == 0) {
+ until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ erts_thr_yield();
+ }
+ } while (--sc > 0);
+ return flgs;
+}
+
+static long
+sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi)
+{
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long xflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+
+ erts_tse_reset(ssi->event);
+
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ if ((oflgs & (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED))
+ != (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ return oflgs;
+ }
+ xflgs = oflgs;
+ }
+}
+
+static void
suspend_scheduler(ErtsSchedulerData *esdp)
{
+ long flgs;
+ int changing;
long no = (long) esdp->no;
- ErtsRunQueue *rq = esdp->run_queue;
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
long active_schedulers;
int curr_online = 1;
int wake = 0;
+#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
+ long aux_work;
+#endif
/*
* Schedulers may be suspended in two different ways:
@@ -2424,126 +2868,144 @@ suspend_scheduler(ErtsSchedulerData *esdp)
erts_smp_runq_unlock(esdp->run_queue);
- /* Unbind from cpu */
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0
- && erts_unbind_from_cpu(erts_cpuinfo) == 0) {
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- }
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
+ erts_sched_check_cpu_bind_prep_suspend(esdp);
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_inactive);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active);
- ASSERT(active_schedulers >= 1);
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED) {
- if (active_schedulers == schdlr_sspnd.msb.wait_active)
- wake = 1;
- if (active_schedulers == 1)
- schdlr_sspnd.changing = 0;
- }
-
- while (1) {
+ flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED);
+ if (flgs & ERTS_SSI_FLG_SUSPENDED) {
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- int check_children;
- erts_smp_runq_lock(esdp->run_queue);
- check_children = esdp->check_children;
- esdp->check_children = 0;
- erts_smp_runq_unlock(esdp->run_queue);
- if (check_children) {
- erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- erts_check_children();
- erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active);
+ ASSERT(active_schedulers >= 1);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) {
+ if (active_schedulers == schdlr_sspnd.msb.wait_active)
+ wake = 1;
+ if (active_schedulers == 1) {
+ changing = erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_MSB);
+ changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB;
+ }
}
-#endif
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) {
- int changed = 0;
- if (no > schdlr_sspnd.online && curr_online) {
- schdlr_sspnd.curr_online--;
- curr_online = 0;
- changed = 1;
+ while (1) {
+ if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) {
+ int changed = 0;
+ if (no > schdlr_sspnd.online && curr_online) {
+ schdlr_sspnd.curr_online--;
+ curr_online = 0;
+ changed = 1;
+ }
+ else if (no <= schdlr_sspnd.online && !curr_online) {
+ schdlr_sspnd.curr_online++;
+ curr_online = 1;
+ changed = 1;
+ }
+ if (changed
+ && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
+ wake = 1;
+ if (schdlr_sspnd.online == schdlr_sspnd.curr_online) {
+ changing = erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_ONLN);
+ changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN;
+ }
}
- else if (no <= schdlr_sspnd.online && !curr_online) {
- schdlr_sspnd.curr_online++;
- curr_online = 1;
- changed = 1;
+
+ if (wake) {
+ erts_smp_cnd_signal(&schdlr_sspnd.cnd);
+ wake = 0;
}
- if (changed
- && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
- wake = 1;
- if (schdlr_sspnd.online == schdlr_sspnd.curr_online)
- schdlr_sspnd.changing = 0;
- }
- if (wake) {
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- wake = 0;
- }
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if (!(flgs & ERTS_SSI_FLG_SUSPENDED))
+ break;
+ erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ|ERTS_RUNQ_FLG_SUSPENDED)))
- break;
- if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- && !erts_smp_atomic_read(&esdp->suspended))
- break;
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ blockable_aux_work:
+ blockable_aux_work(esdp, ssi, aux_work);
+#endif
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- while (1) {
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ while (1) {
+ long flgs;
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- if (esdp->blocked_check_children)
- break;
+ flgs = sched_spin_suspended(ssi, ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT);
+ if (flgs == (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ flgs = sched_set_suspended_sleeptype(ssi);
+ if (flgs == (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ int res;
+ do {
+ res = erts_tse_wait(ssi->event);
+ } while (res == EINTR);
+ }
+ }
+
+ flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED));
+ if (!(flgs & ERTS_SSI_FLG_SUSPENDED))
+ break;
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)
+ break;
+
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ goto blockable_aux_work;
+ }
#endif
- erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
+ }
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE)
- break;
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
- if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ
- | ERTS_RUNQ_FLG_SUSPENDED)))
- break;
- if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- && !erts_smp_atomic_read(&esdp->suspended))
- break;
+ erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- esdp->blocked_check_children = 0;
-#endif
+ active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB)
+ && schdlr_sspnd.online == active_schedulers) {
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_MSB);
+ }
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- }
+ ASSERT(no <= schdlr_sspnd.online);
+ ASSERT(!erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing));
- active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active);
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED
- && schdlr_sspnd.online == active_schedulers) {
- schdlr_sspnd.changing = 0;
}
+
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+ ASSERT(curr_online);
+
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_active);
erts_smp_runq_lock(esdp->run_queue);
non_empty_runq(esdp->run_queue);
- /* Make sure we check if we should bind to a cpu or not... */
- if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
- else
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+ erts_sched_check_cpu_bind_post_suspend(esdp);
}
#define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \
@@ -2600,8 +3062,10 @@ erts_schedulers_state(Uint *total,
int yield_allowed)
{
int res;
+ long changing;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- if (yield_allowed && schdlr_sspnd.changing)
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER))
res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
else {
*active = *online = schdlr_sspnd.online;
@@ -2621,6 +3085,7 @@ erts_set_schedulers_online(Process *p,
Sint *old_no)
{
int ix, res, no, have_unlocked_plocks;
+ long changing;
if (new_no < 1 || erts_no_schedulers < new_no)
return ERTS_SCHDLR_SSPND_EINVAL;
@@ -2630,7 +3095,8 @@ erts_set_schedulers_online(Process *p,
have_unlocked_plocks = 0;
no = (int) new_no;
- if (schdlr_sspnd.changing) {
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing) {
res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
}
else {
@@ -2639,17 +3105,19 @@ erts_set_schedulers_online(Process *p,
res = ERTS_SCHDLR_SSPND_DONE;
}
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
schdlr_sspnd.online = no;
if (no > online) {
int ix;
schdlr_sspnd.wait_curr_online = no;
- if (ongoing_multi_scheduling_block())
- /* No schedulers to resume */;
+ if (ongoing_multi_scheduling_block()) {
+ for (ix = online; ix < no; ix++)
+ erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix));
+ }
else if (erts_common_run_queue) {
for (ix = online; ix < no; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended,
- 0);
+ scheduler_ix_resume_wake(ix);
}
else {
if (plocks) {
@@ -2663,6 +3131,7 @@ erts_set_schedulers_online(Process *p,
erts_smp_runq_lock(rq);
ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x5);
erts_smp_runq_unlock(rq);
+ scheduler_ix_resume_wake(ix);
}
/*
* Spread evacuation paths among all online
@@ -2677,7 +3146,6 @@ erts_set_schedulers_online(Process *p,
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
}
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
res = ERTS_SCHDLR_SSPND_DONE;
}
else /* if (no < online) */ {
@@ -2694,12 +3162,17 @@ erts_set_schedulers_online(Process *p,
schdlr_sspnd.wait_curr_online = no+1;
}
- if (ongoing_multi_scheduling_block())
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- else if (erts_common_run_queue) {
+ if (ongoing_multi_scheduling_block()) {
for (ix = no; ix < online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended,
- 1);
+ erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix));
+ }
+ else if (erts_common_run_queue) {
+ for (ix = no; ix < online; ix++) {
+ ErtsSchedulerSleepInfo *ssi;
+ ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+ erts_smp_atomic_bor(&ssi->flags,
+ ERTS_SSI_FLG_SUSPENDED);
+ }
wake_all_schedulers();
}
else {
@@ -2726,7 +3199,10 @@ erts_set_schedulers_online(Process *p,
erts_smp_atomic_set(&balance_info.used_runqs, no);
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- ERTS_FOREACH_OP_RUNQ(rq, wake_scheduler(rq, 0));
+ for (ix = no; ix < online; ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+ wake_scheduler(rq, 0, 1);
+ }
}
}
@@ -2740,6 +3216,12 @@ erts_set_schedulers_online(Process *p,
susp_sched_prep_block,
susp_sched_resume_block,
NULL);
+ ASSERT(res != ERTS_SCHDLR_SSPND_DONE
+ ? (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ & erts_smp_atomic_read(&schdlr_sspnd.changing))
+ : (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ == erts_smp_atomic_read(&schdlr_sspnd.changing)));
+ erts_smp_atomic_band(&schdlr_sspnd.changing, ~ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
}
@@ -2754,15 +3236,16 @@ ErtsSchedSuspendResult
erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
{
int ix, res, have_unlocked_plocks = 0;
+ long changing;
ErtsProcList *plp;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
-
- if (schdlr_sspnd.changing) {
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing) {
res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */
}
else if (on) { /* ------ BLOCK ------ */
- if (erts_is_multi_scheduling_blocked()) {
+ if (schdlr_sspnd.msb.procs) {
plp = proclist_create(p);
plp->next = schdlr_sspnd.msb.procs;
schdlr_sspnd.msb.procs = plp;
@@ -2772,19 +3255,22 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
}
else {
+ int online = schdlr_sspnd.online;
p->flags |= F_HAVE_BLCKD_MSCHED;
if (plocks) {
have_unlocked_plocks = 1;
erts_smp_proc_unlock(p, plocks);
}
+ ASSERT(0 == erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing));
erts_smp_atomic_set(&schdlr_sspnd.msb.ongoing, 1);
- if (schdlr_sspnd.online == 1) {
+ if (online == 1) {
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
ASSERT(p->scheduler_data->no == 1);
}
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
if (p->scheduler_data->no == 1) {
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
schdlr_sspnd.msb.wait_active = 1;
@@ -2798,17 +3284,19 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
schdlr_sspnd.msb.wait_active = 2;
}
if (erts_common_run_queue) {
- for (ix = 1; ix < schdlr_sspnd.online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 1);
+ for (ix = 1; ix < online; ix++)
+ erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ERTS_SSI_FLG_SUSPENDED);
wake_all_schedulers();
}
else {
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
erts_smp_mtx_lock(&balance_info.update_mtx);
erts_smp_atomic_set(&balance_info.used_runqs, 1);
- for (ix = 0; ix < schdlr_sspnd.online; ix++) {
+ for (ix = 0; ix < online; ix++) {
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
erts_smp_runq_lock(rq);
+ ASSERT(!(rq->flags & ERTS_RUNQ_FLG_SUSPENDED));
ERTS_RUNQ_RESET_MIGRATION_PATHS(rq, 0x7);
erts_smp_runq_unlock(rq);
}
@@ -2833,6 +3321,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
susp_sched_prep_block,
susp_sched_resume_block,
NULL);
+ ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED
+ ? (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ & erts_smp_atomic_read(&schdlr_sspnd.changing))
+ : (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ == erts_smp_atomic_read(&schdlr_sspnd.changing)));
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
plp = proclist_create(p);
plp->next = schdlr_sspnd.msb.procs;
@@ -2876,7 +3371,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
if (schdlr_sspnd.msb.procs)
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED;
+ ERTS_SCHDLR_SSPND_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0);
#ifdef DEBUG
ERTS_FOREACH_RUNQ(rq,
{
@@ -2903,13 +3398,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
if (schdlr_sspnd.online == 1) {
/* No schedulers to resume */
ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
- schdlr_sspnd.changing = 0;
+ ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB);
}
else if (erts_common_run_queue) {
for (ix = 1; ix < schdlr_sspnd.online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 0);
+ erts_smp_atomic_band(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ~ERTS_SSI_FLG_SUSPENDED);
wake_all_schedulers();
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
}
else {
int online = schdlr_sspnd.online;
@@ -2926,6 +3421,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
erts_smp_runq_lock(rq);
ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x4);
erts_smp_runq_unlock(rq);
+ scheduler_ix_resume_wake(ix);
}
/* Spread evacuation paths among all online run queues */
@@ -2941,7 +3437,6 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
erts_smp_runq_unlock(ERTS_RUNQ_IX(0));
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
}
res = ERTS_SCHDLR_SSPND_DONE;
}
@@ -2968,8 +3463,11 @@ erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value)
int
erts_is_multi_scheduling_blocked(void)
{
- return (erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing)
- && erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
+ int res;
+ erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ res = schdlr_sspnd.msb.procs != NULL;
+ erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+ return res;
}
Eterm
@@ -2978,7 +3476,7 @@ erts_multi_scheduling_blockers(Process *p)
Eterm res = NIL;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- if (erts_is_multi_scheduling_blocked()) {
+ if (schdlr_sspnd.msb.procs) {
Eterm *hp, *hp_end;
ErtsProcList *plp1, *plp2;
Uint max_size;
@@ -3010,18 +3508,26 @@ erts_multi_scheduling_blockers(Process *p)
static void *
sched_thread_func(void *vesdp)
{
+#ifdef ERTS_SMP
+ Uint no = ((ErtsSchedulerData *) vesdp)->no;
+#endif
#ifdef ERTS_ENABLE_LOCK_CHECK
{
char buf[31];
- Uint no = ((ErtsSchedulerData *) vesdp)->no;
erts_snprintf(&buf[0], 31, "scheduler %bpu", no);
erts_lc_set_thread_name(&buf[0]);
}
#endif
- erts_alloc_reg_scheduler_id(((ErtsSchedulerData *) vesdp)->no);
+ erts_alloc_reg_scheduler_id(no);
erts_tsd_set(sched_data_key, vesdp);
#ifdef ERTS_SMP
+
+ erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp);
+
erts_proc_lock_prepare_proc_lock_waiter();
+ ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
+
+
#endif
erts_register_blockable_thread();
#ifdef HIPE
@@ -3030,30 +3536,30 @@ sched_thread_func(void *vesdp)
erts_thread_init_float();
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- ASSERT(schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE);
+ ASSERT(erts_smp_atomic_read(&schdlr_sspnd.changing)
+ & ERTS_SCHDLR_SSPND_CHNG_ONLN);
- schdlr_sspnd.curr_online--;
+ if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) {
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_ONLN);
+ if (((ErtsSchedulerData *) vesdp)->no != 1)
+ erts_smp_cnd_signal(&schdlr_sspnd.cnd);
+ }
- if (((ErtsSchedulerData *) vesdp)->no != 1) {
- if (schdlr_sspnd.online == schdlr_sspnd.curr_online) {
- schdlr_sspnd.changing = 0;
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
+ if (((ErtsSchedulerData *) vesdp)->no == 1) {
+ if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) {
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
+ susp_sched_prep_block,
+ susp_sched_resume_block,
+ NULL);
+ while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online)
+ erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
+ susp_sched_prep_block,
+ susp_sched_resume_block,
+ NULL);
}
- }
- else if (schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
- schdlr_sspnd.changing = 0;
- else {
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online)
- erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- ASSERT(!schdlr_sspnd.changing);
+ ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
@@ -3089,11 +3595,7 @@ erts_start_schedulers(void)
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual);
actual++;
ASSERT(actual == esdp->no);
-#ifdef ERTS_ENABLE_LOCK_COUNT
- res = erts_lcnt_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts);
-#else
res = ethr_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts);
-#endif
if (res != 0) {
actual--;
break;
@@ -3122,1351 +3624,6 @@ erts_start_schedulers(void)
#endif /* ERTS_SMP */
-static int
-int_cmp(const void *vx, const void *vy)
-{
- return *((int *) vx) - *((int *) vy);
-}
-
-static int
-cpu_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- return 0;
-}
-
-static int
-cpu_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- return 0;
-}
-
-static int
-cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->core != y->core)
- return x->core - y->core;
- return 0;
-}
-
-static int
-cpu_no_spread_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static ERTS_INLINE void
-make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
-{
- int ix;
- int node = -1;
- int processor = -1;
- int processor_node = -1;
- int processor_node_node = -1;
- int core = -1;
- int thread = -1;
- int old_node = -1;
- int old_processor = -1;
- int old_processor_node = -1;
- int old_core = -1;
- int old_thread = -1;
-
- for (ix = 0; ix < size; ix++) {
- if (!no_node || cpudata[ix].node >= 0) {
- if (old_node == cpudata[ix].node)
- cpudata[ix].node = node;
- else {
- old_node = cpudata[ix].node;
- old_processor = processor = -1;
- if (!no_node)
- old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node || cpudata[ix].node >= 0)
- cpudata[ix].node = ++node;
- }
- }
- if (old_processor == cpudata[ix].processor)
- cpudata[ix].processor = processor;
- else {
- old_processor = cpudata[ix].processor;
- if (!no_node)
- processor_node_node = old_processor_node = processor_node = -1;
- old_core = core = -1;
- old_thread = thread = -1;
- cpudata[ix].processor = ++processor;
- }
- if (no_node && cpudata[ix].processor_node < 0)
- old_processor_node = -1;
- else {
- if (old_processor_node == cpudata[ix].processor_node) {
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = node;
- else {
- if (processor_node_node >= 0)
- cpudata[ix].node = processor_node_node;
- cpudata[ix].processor_node = processor_node;
- }
- }
- else {
- old_processor_node = cpudata[ix].processor_node;
- old_core = core = -1;
- old_thread = thread = -1;
- if (no_node)
- cpudata[ix].node = cpudata[ix].processor_node = ++node;
- else {
- cpudata[ix].node = processor_node_node = ++node;
- cpudata[ix].processor_node = ++processor_node;
- }
- }
- }
- if (!no_node && cpudata[ix].processor_node < 0)
- cpudata[ix].processor_node = 0;
- if (old_core == cpudata[ix].core)
- cpudata[ix].core = core;
- else {
- old_core = cpudata[ix].core;
- old_thread = thread = -1;
- cpudata[ix].core = ++core;
- }
- if (old_thread == cpudata[ix].thread)
- cpudata[ix].thread = thread;
- else
- old_thread = cpudata[ix].thread = ++thread;
- }
-}
-
-static void
-cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
- int size,
- ErtsCpuBindOrder bind_order,
- int mk_seq)
-{
- if (size > 1) {
- int no_node = 0;
- int (*cmp_func)(const void *, const void *);
- switch (bind_order) {
- case ERTS_CPU_BIND_SPREAD:
- cmp_func = cpu_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_PROCESSOR_SPREAD:
- cmp_func = cpu_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_SPREAD:
- cmp_func = cpu_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_processor_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
- no_node = 1;
- cmp_func = cpu_no_node_thread_spread_order_cmp;
- break;
- case ERTS_CPU_BIND_NO_SPREAD:
- cmp_func = cpu_no_spread_order_cmp;
- break;
- default:
- cmp_func = NULL;
- erl_exit(ERTS_ABORT_EXIT,
- "Bad cpu bind type: %d\n",
- (int) cpu_bind_order);
- break;
- }
-
- if (mk_seq)
- make_cpudata_id_seq(cpudata, size, no_node);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
- }
-}
-
-static int
-processor_order_cmp(const void *vx, const void *vy)
-{
- erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
- erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
- if (x->processor != y->processor)
- return x->processor - y->processor;
- if (x->node != y->node)
- return x->node - y->node;
- if (x->processor_node != y->processor_node)
- return x->processor_node - y->processor_node;
- if (x->core != y->core)
- return x->core - y->core;
- if (x->thread != y->thread)
- return x->thread - y->thread;
- return 0;
-}
-
-static void
-check_cpu_bind(ErtsSchedulerData *esdp)
-{
- int res;
- int cpu_id;
- erts_smp_runq_unlock(esdp->run_queue);
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- cpu_id = scheduler2cpu_map[esdp->no].bind_id;
- if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
- res = erts_bind_to_cpu(erts_cpuinfo, cpu_id);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
- else {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- if (scheduler2cpu_map[esdp->no].bound_id >= 0)
- goto unbind;
- }
- }
- else if (cpu_id < 0 && scheduler2cpu_map[esdp->no].bound_id >= 0) {
- unbind:
- /* Get rid of old binding */
- res = erts_unbind_from_cpu(erts_cpuinfo);
- if (res == 0)
- esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- else {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
- (int) esdp->no, cpu_id, erl_errno_id(-res));
- erts_send_error_to_logger_nogl(dsbufp);
- }
- }
- erts_smp_runq_lock(esdp->run_queue);
-#ifdef ERTS_SMP
- if (erts_common_run_queue)
- erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
- else {
- esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
- }
-#endif
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
-}
-
-static void
-signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
-{
- int s_ix = 1;
- int cpu_ix;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
-
- cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
-
- for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
- if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical))
- scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
- }
-
- if (s_ix <= erts_no_schedulers)
- for (; s_ix <= erts_no_schedulers; s_ix++)
- scheduler2cpu_map[s_ix].bind_id = -1;
-
-#ifdef ERTS_SMP
- if (erts_common_run_queue) {
- for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1);
- wake_all_schedulers();
- }
- else {
- ERTS_FOREACH_RUNQ(rq,
- {
- rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
- wake_scheduler(rq, 0);
- });
- }
-#else
- check_cpu_bind(erts_get_scheduler_data());
-#endif
-}
-
-int
-erts_init_scheduler_bind_type(char *how)
-{
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP)
- return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
-
- if (!system_cpudata && !user_cpudata)
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
-
- if (sys_strcmp(how, "s") == 0)
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (sys_strcmp(how, "ps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "ts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (sys_strcmp(how, "db") == 0
- || sys_strcmp(how, "tnnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnps") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (sys_strcmp(how, "nnts") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (sys_strcmp(how, "ns") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (sys_strcmp(how, "u") == 0)
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else
- return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
-
- return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
-}
-
-typedef struct {
- int *id;
- int used;
- int size;
-} ErtsCpuTopIdSeq;
-
-typedef struct {
- ErtsCpuTopIdSeq logical;
- ErtsCpuTopIdSeq thread;
- ErtsCpuTopIdSeq core;
- ErtsCpuTopIdSeq processor_node;
- ErtsCpuTopIdSeq processor;
- ErtsCpuTopIdSeq node;
-} ErtsCpuTopEntry;
-
-static void
-init_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- int size = 10;
- cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->logical.size = size;
- cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->thread.size = size;
- cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->core.size = size;
- cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor_node.size = size;
- cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->processor.size = size;
- cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
- sizeof(int)*size);
- cte->node.size = size;
-}
-
-static void
-destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
- erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
-}
-
-static int
-get_cput_value_or_range(int *v, int *vr, char **str)
-{
- long l;
- char *c = *str;
- errno = 0;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
- *v = (int) l;
- if (*c == '-') {
- c++;
- if (!isdigit((unsigned char)*c))
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- l = strtol(c, &c, 10);
- if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- *vr = (int) l;
- }
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
-{
- int ix = 0;
- int need_size = 0;
- char *c = *str;
-
- while (1) {
- int res;
- int val;
- int nids;
- int val_range = -1;
- res = get_cput_value_or_range(&val, &val_range, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- if (val_range < 0 || val_range == val)
- nids = 1;
- else {
- if (val_range > val)
- nids = val_range - val + 1;
- else
- nids = val - val_range + 1;
- }
- need_size += nids;
- if (need_size > idseq->size) {
- idseq->size = need_size + 10;
- idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
- idseq->id,
- sizeof(int)*idseq->size);
- }
- if (nids == 1)
- idseq->id[ix++] = val;
- else if (val_range > val) {
- for (; val <= val_range; val++)
- idseq->id[ix++] = val;
- }
- else {
- for (; val >= val_range; val--)
- idseq->id[ix++] = val;
- }
- if (*c != ',')
- break;
- c++;
- }
- *str = c;
- idseq->used = ix;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_entry(ErtsCpuTopEntry *cput, char **str)
-{
- int h;
- char *c = *str;
-
- cput->logical.used = 0;
- cput->thread.id[0] = 0;
- cput->thread.used = 1;
- cput->core.id[0] = 0;
- cput->core.used = 1;
- cput->processor_node.id[0] = -1;
- cput->processor_node.used = 1;
- cput->processor.id[0] = 0;
- cput->processor.used = 1;
- cput->node.id[0] = -1;
- cput->node.used = 1;
-
- h = ERTS_TOPOLOGY_MAX_DEPTH;
- while (*c != ':' && *c != '\0') {
- int res;
- ErtsCpuTopIdSeq *idseqp;
- switch (*c++) {
- case 'L':
- if (h <= ERTS_TOPOLOGY_LOGICAL)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->logical;
- h = ERTS_TOPOLOGY_LOGICAL;
- break;
- case 't':
- case 'T':
- if (h <= ERTS_TOPOLOGY_THREAD)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->thread;
- h = ERTS_TOPOLOGY_THREAD;
- break;
- case 'c':
- case 'C':
- if (h <= ERTS_TOPOLOGY_CORE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->core;
- h = ERTS_TOPOLOGY_CORE;
- break;
- case 'p':
- case 'P':
- if (h <= ERTS_TOPOLOGY_PROCESSOR)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor;
- h = ERTS_TOPOLOGY_PROCESSOR;
- break;
- case 'n':
- case 'N':
- if (h <= ERTS_TOPOLOGY_PROCESSOR) {
- do_node:
- if (h <= ERTS_TOPOLOGY_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->node;
- h = ERTS_TOPOLOGY_NODE;
- }
- else {
- int p_node = 0;
- char *p_chk = c;
- while (*p_chk != '\0' && *p_chk != ':') {
- if (*p_chk == 'p' || *p_chk == 'P') {
- p_node = 1;
- break;
- }
- p_chk++;
- }
- if (!p_node)
- goto do_node;
- if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
- idseqp = &cput->processor_node;
- h = ERTS_TOPOLOGY_PROCESSOR_NODE;
- }
- break;
- default:
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
- }
- res = get_cput_id_seq(idseqp, &c);
- if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
- return res;
- }
-
- if (cput->logical.used < 1)
- return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
-
- if (*c == ':') {
- c++;
- }
-
- if (cput->thread.used != 1
- && cput->thread.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->core.used != 1
- && cput->core.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor_node.used != 1
- && cput->processor_node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->processor.used != 1
- && cput->processor.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
- if (cput->node.used != 1
- && cput->node.used != cput->logical.used)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-
- *str = c;
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-verify_topology(erts_cpu_topology_t *cpudata, int size)
-{
- if (size > 0) {
- int *logical;
- int node, processor, no_nodes, i;
-
- /* Verify logical ids */
- logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
-
- for (i = 0; i < user_cpudata_size; i++)
- logical[i] = user_cpudata[i].logical;
-
- qsort(logical, user_cpudata_size, sizeof(int), int_cmp);
- for (i = 0; i < user_cpudata_size-1; i++) {
- if (logical[i] == logical[i+1]) {
- erts_free(ERTS_ALC_T_TMP, logical);
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
- }
- }
-
- erts_free(ERTS_ALC_T_TMP, logical);
-
- qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
-
- /* Verify unique entities */
-
- for (i = 1; i < user_cpudata_size; i++) {
- if (user_cpudata[i-1].processor == user_cpudata[i].processor
- && user_cpudata[i-1].node == user_cpudata[i].node
- && (user_cpudata[i-1].processor_node
- == user_cpudata[i].processor_node)
- && user_cpudata[i-1].core == user_cpudata[i].core
- && user_cpudata[i-1].thread == user_cpudata[i].thread) {
- return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
- }
- }
-
- /* Verify numa nodes */
- node = cpudata[0].node;
- processor = cpudata[0].processor;
- no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
- for (i = 1; i < size; i++) {
- if (no_nodes) {
- if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- else {
- if (cpudata[i].processor == processor && cpudata[i].node != node)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- node = cpudata[i].node;
- processor = cpudata[i].processor;
- if (node >= 0 && cpudata[i].processor_node >= 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- if (node < 0 && cpudata[i].processor_node < 0)
- return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
- }
- }
- }
-
- return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-int
-erts_init_cpu_topology(char *topology_str)
-{
- ErtsCpuTopEntry cput;
- int need_size;
- char *c;
- int ix;
- int error = ERTS_INIT_CPU_TOPOLOGY_OK;
-
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 10;
-
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
-
- init_cpu_top_entry(&cput);
-
- ix = 0;
- need_size = 0;
-
- c = topology_str;
- if (*c == '\0') {
- error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
- goto fail;
- }
- do {
- int r;
- error = get_cput_entry(&cput, &c);
- if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
- goto fail;
- need_size += cput.logical.used;
- if (user_cpudata_size < need_size) {
- user_cpudata_size = need_size + 10;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- ASSERT(cput.thread.used == 1
- || cput.thread.used == cput.logical.used);
- ASSERT(cput.core.used == 1
- || cput.core.used == cput.logical.used);
- ASSERT(cput.processor_node.used == 1
- || cput.processor_node.used == cput.logical.used);
- ASSERT(cput.processor.used == 1
- || cput.processor.used == cput.logical.used);
- ASSERT(cput.node.used == 1
- || cput.node.used == cput.logical.used);
-
- for (r = 0; r < cput.logical.used; r++) {
- user_cpudata[ix].logical = cput.logical.id[r];
- user_cpudata[ix].thread =
- cput.thread.id[cput.thread.used == 1 ? 0 : r];
- user_cpudata[ix].core =
- cput.core.id[cput.core.used == 1 ? 0 : r];
- user_cpudata[ix].processor_node =
- cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
- user_cpudata[ix].processor =
- cput.processor.id[cput.processor.used == 1 ? 0 : r];
- user_cpudata[ix].node =
- cput.node.id[cput.node.used == 1 ? 0 : r];
- ix++;
- }
- } while (*c != '\0');
-
- if (user_cpudata_size != ix) {
- user_cpudata_size = ix;
- user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
- user_cpudata,
- (sizeof(erts_cpu_topology_t)
- * user_cpudata_size));
- }
-
- error = verify_topology(user_cpudata, user_cpudata_size);
- if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
- destroy_cpu_top_entry(&cput);
- return ERTS_INIT_CPU_TOPOLOGY_OK;
- }
-
- fail:
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata_size = 0;
- destroy_cpu_top_entry(&cput);
- return error;
-}
-
-#define ERTS_GET_CPU_TOPOLOGY_ERROR -1
-#define ERTS_GET_USED_CPU_TOPOLOGY 0
-#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1
-#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2
-
-static Eterm get_cpu_topology_term(Process *c_p, int type);
-
-Eterm
-erts_set_cpu_topology(Process *c_p, Eterm term)
-{
- erts_cpu_topology_t *cpudata = NULL;
- int cpudata_size = 0;
- Eterm res;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
- res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
- if (term == am_undefined) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
- cpudata_size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
- }
- else if (is_not_list(term)) {
- error:
- res = THE_NON_VALUE;
- goto done;
- }
- else {
- Eterm list = term;
- int ix = 0;
-
- cpudata_size = 100;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
-
- while (is_list(list)) {
- Eterm *lp = list_val(list);
- Eterm cpu = CAR(lp);
- Eterm* tp;
- Sint id;
-
- if (is_not_tuple(cpu))
- goto error;
-
- tp = tuple_val(cpu);
-
- if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
- goto error;
-
- if (ix >= cpudata_size) {
- cpudata_size += 100;
- cpudata = erts_realloc(ERTS_ALC_T_TMP,
- cpudata,
- (sizeof(erts_cpu_topology_t)
- * cpudata_size));
- }
-
- id = signed_val(tp[2]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].node = (int) id;
-
- id = signed_val(tp[3]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor = (int) id;
-
- id = signed_val(tp[4]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].processor_node = (int) id;
-
- id = signed_val(tp[5]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].core = (int) id;
-
- id = signed_val(tp[6]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].thread = (int) id;
-
- id = signed_val(tp[7]);
- if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
- goto error;
- cpudata[ix].logical = (int) id;
-
- list = CDR(lp);
- ix++;
- }
-
- if (is_not_nil(list))
- goto error;
-
- cpudata_size = ix;
-
- if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
- goto error;
-
- if (user_cpudata_size != cpudata_size) {
- if (user_cpudata)
- erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
- user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- user_cpudata_size = cpudata_size;
- }
-
- sys_memcpy((void *) user_cpudata,
- (void *) cpudata,
- sizeof(erts_cpu_topology_t)*cpudata_size);
- }
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- done:
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-static Eterm
-bound_schedulers_term(ErtsCpuBindOrder order)
-{
- switch (order) {
- case ERTS_CPU_BIND_SPREAD: {
- ERTS_DECL_AM(spread);
- return AM_spread;
- }
- case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(processor_spread);
- return AM_processor_spread;
- }
- case ERTS_CPU_BIND_THREAD_SPREAD: {
- ERTS_DECL_AM(thread_spread);
- return AM_thread_spread;
- }
- case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(thread_no_node_processor_spread);
- return AM_thread_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
- ERTS_DECL_AM(no_node_processor_spread);
- return AM_no_node_processor_spread;
- }
- case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
- ERTS_DECL_AM(no_node_thread_spread);
- return AM_no_node_thread_spread;
- }
- case ERTS_CPU_BIND_NO_SPREAD: {
- ERTS_DECL_AM(no_spread);
- return AM_no_spread;
- }
- case ERTS_CPU_BIND_NONE: {
- ERTS_DECL_AM(unbound);
- return AM_unbound;
- }
- default:
- ASSERT(0);
- return THE_NON_VALUE;
- }
-}
-
-Eterm
-erts_bound_schedulers_term(Process *c_p)
-{
- ErtsCpuBindOrder order;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- order = cpu_bind_order;
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return bound_schedulers_term(order);
-}
-
-static void
-create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
-{
- if (user_cpudata) {
- *cpudata_size = user_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) user_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else if (system_cpudata) {
- *cpudata_size = system_cpudata_size;
- *cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * (*cpudata_size)));
- sys_memcpy((void *) *cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*(*cpudata_size));
- }
- else {
- *cpudata = NULL;
- *cpudata_size = 0;
- }
-}
-
-static void
-destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
-{
- if (cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-}
-
-Eterm
-erts_bind_schedulers(Process *c_p, Eterm how)
-{
- Eterm res;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- ErtsCpuBindOrder old_cpu_bind_order;
-
- erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-
- if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) {
- ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
- }
- else {
-
- old_cpu_bind_order = cpu_bind_order;
-
- if (ERTS_IS_ATOM_STR("spread", how))
- cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("default_bind", how)
- || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-
- if (!cpudata) {
- cpu_bind_order = old_cpu_bind_order;
- ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
- goto done;
- }
-
- signal_schedulers_bind_change(cpudata, cpudata_size);
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- res = bound_schedulers_term(old_cpu_bind_order);
- }
-
- done:
-
- erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
- return res;
-}
-
-Eterm
-erts_fake_scheduler_bindings(Process *p, Eterm how)
-{
- ErtsCpuBindOrder fake_cpu_bind_order;
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- Eterm res;
-
- if (ERTS_IS_ATOM_STR("spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
- else if (ERTS_IS_ATOM_STR("processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("default_bind", how)
- || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("no_spread", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
- else if (ERTS_IS_ATOM_STR("unbound", how))
- fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
- else {
- ERTS_BIF_PREP_ERROR(res, p, BADARG);
- return res;
- }
-
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-
- if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
- ERTS_BIF_PREP_RET(res, am_false);
- else {
- int i;
- Eterm *hp;
-
- cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
-
-#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
-
- erts_fprintf(stderr, "node: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].node);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "processor: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor);
- erts_fprintf(stderr, "\n");
- if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
- && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
- erts_fprintf(stderr, "processor_node:");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
- erts_fprintf(stderr, "\n");
- }
- erts_fprintf(stderr, "core: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].core);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "thread: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].thread);
- erts_fprintf(stderr, "\n");
- erts_fprintf(stderr, "logical: ");
- for (i = 0; i < cpudata_size; i++)
- erts_fprintf(stderr, " %2d", cpudata[i].logical);
- erts_fprintf(stderr, "\n");
-#endif
-
- hp = HAlloc(p, cpudata_size+1);
- ERTS_BIF_PREP_RET(res, make_tuple(hp));
- *hp++ = make_arityval((Uint) cpudata_size);
- for (i = 0; i < cpudata_size; i++)
- *hp++ = make_small((Uint) cpudata[i].logical);
- }
-
- destroy_tmp_cpu_topology_copy(cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_schedulers_binds(Process *c_p)
-{
- int ix;
- ERTS_DECL_AM(unbound);
- Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
- Eterm res = make_tuple(hp);
-
- *(hp++) = make_arityval(erts_no_schedulers);
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- for (ix = 1; ix <= erts_no_schedulers; ix++)
- *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
- ? make_small(scheduler2cpu_map[ix].bound_id)
- : AM_unbound);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static Eterm
-bld_topology_term(Eterm **hpp,
- Uint *hszp,
- erts_cpu_topology_t *cpudata,
- int size)
-{
- Eterm res = NIL;
- int i;
-
- if (size == 0)
- return am_undefined;
-
- for (i = size-1; i >= 0; i--) {
- res = erts_bld_cons(hpp,
- hszp,
- erts_bld_tuple(hpp,
- hszp,
- 7,
- am_cpu,
- make_small(cpudata[i].node),
- make_small(cpudata[i].processor),
- make_small(cpudata[i].processor_node),
- make_small(cpudata[i].core),
- make_small(cpudata[i].thread),
- make_small(cpudata[i].logical)),
- res);
- }
- return res;
-}
-
-static Eterm
-get_cpu_topology_term(Process *c_p, int type)
-{
-#ifdef DEBUG
- Eterm *hp_end;
-#endif
- Eterm *hp;
- Uint hsz;
- Eterm res = THE_NON_VALUE;
- erts_cpu_topology_t *cpudata = NULL;
- int size = 0;
-
- switch (type) {
- case ERTS_GET_USED_CPU_TOPOLOGY:
- if (user_cpudata)
- goto defined;
- else
- goto detected;
- case ERTS_GET_DETECTED_CPU_TOPOLOGY:
- detected:
- if (!system_cpudata)
- res = am_undefined;
- else {
- size = system_cpudata_size;
- cpudata = erts_alloc(ERTS_ALC_T_TMP,
- (sizeof(erts_cpu_topology_t)
- * size));
- sys_memcpy((void *) cpudata,
- (void *) system_cpudata,
- sizeof(erts_cpu_topology_t)*size);
- }
- break;
- case ERTS_GET_DEFINED_CPU_TOPOLOGY:
- defined:
- if (!user_cpudata)
- res = am_undefined;
- else {
- size = user_cpudata_size;
- cpudata = user_cpudata;
- }
- break;
- default:
- erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
- break;
- }
-
- if (res == am_undefined) {
- ASSERT(!cpudata);
- return res;
- }
-
- hsz = 0;
-
- bld_topology_term(NULL, &hsz,
- cpudata, size);
-
- hp = HAlloc(c_p, hsz);
-
-#ifdef DEBUG
- hp_end = hp + hsz;
-#endif
-
- res = bld_topology_term(&hp, NULL,
- cpudata, size);
-
- ASSERT(hp_end == hp);
-
- if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
- erts_free(ERTS_ALC_T_TMP, cpudata);
-
- return res;
-}
-
-Eterm
-erts_get_cpu_topology_term(Process *c_p, Eterm which)
-{
- Eterm res;
- int type;
- erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
- if (ERTS_IS_ATOM_STR("used", which))
- type = ERTS_GET_USED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("detected", which))
- type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
- else if (ERTS_IS_ATOM_STR("defined", which))
- type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
- else
- type = ERTS_GET_CPU_TOPOLOGY_ERROR;
- if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
- res = THE_NON_VALUE;
- else
- res = get_cpu_topology_term(c_p, type);
- erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
- return res;
-}
-
-static void
-early_cpu_bind_init(void)
-{
- user_cpudata = NULL;
- user_cpudata_size = 0;
-
- system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
- system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(erts_cpu_topology_t)
- * system_cpudata_size));
-
- cpu_bind_order = ERTS_CPU_BIND_NONE;
-
- if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
- || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
- system_cpudata_size)) {
- erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
- system_cpudata = NULL;
- system_cpudata_size = 0;
- }
-}
-
-static void
-late_cpu_bind_init(void)
-{
- int ix;
-
- erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind");
-
- scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
- (sizeof(ErtsCpuBindData)
- * (erts_no_schedulers+1)));
- for (ix = 1; ix <= erts_no_schedulers; ix++) {
- scheduler2cpu_map[ix].bind_id = -1;
- scheduler2cpu_map[ix].bound_id = -1;
- }
-
- if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
- erts_cpu_topology_t *cpudata;
- int cpudata_size;
- create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
- ASSERT(cpudata);
- signal_schedulers_bind_change(cpudata, cpudata_size);
- destroy_tmp_cpu_topology_copy(cpudata);
- }
-}
-
#ifdef ERTS_SMP
static void
@@ -4481,7 +3638,7 @@ add_pend_suspend(Process *suspendee,
sizeof(ErtsPendingSuspend));
psp->next = NULL;
#ifdef DEBUG
-#ifdef ARCH_64
+#if defined(ARCH_64) && !HALFWORD_HEAP
psp->end = (ErtsPendingSuspend *) 0xdeaddeaddeaddead;
#else
psp->end = (ErtsPendingSuspend *) 0xdeaddead;
@@ -5322,7 +4479,7 @@ dequeue_process(ErtsRunQueue *runq, Process *p)
}
/* schedule a process */
-static ERTS_INLINE void
+static ERTS_INLINE ErtsRunQueue *
internal_add_to_runq(ErtsRunQueue *runq, Process *p)
{
Uint32 prev_status = p->status;
@@ -5333,12 +4490,12 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p)
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq));
if (p->status_flags & ERTS_PROC_SFLG_INRUNQ)
- return;
+ return NULL;
else if (p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) {
ASSERT(p->status != P_SUSPENDED);
ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p);
p->status_flags |= ERTS_PROC_SFLG_PENDADD2SCHEDQ;
- return;
+ return NULL;
}
ASSERT(!p->scheduler_data);
#endif
@@ -5377,20 +4534,23 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p)
profile_runnable_proc(p, am_active);
}
- smp_notify_inc_runq(add_runq);
-
if (add_runq != runq)
erts_smp_runq_unlock(add_runq);
+
+ return add_runq;
}
void
erts_add_to_runq(Process *p)
{
+ ErtsRunQueue *notify_runq;
ErtsRunQueue *runq = erts_get_runq_proc(p);
erts_smp_runq_lock(runq);
- internal_add_to_runq(runq, p);
+ notify_runq = internal_add_to_runq(runq, p);
erts_smp_runq_unlock(runq);
+ smp_notify_inc_runq(notify_runq);
+
}
/* Possibly remove a scheduled process we need to suspend */
@@ -5529,8 +4689,6 @@ erts_proc_migrate(Process *p, ErtsProcLocks *plcks,
p->run_queue = to_rq;
enqueue_process(to_rq, p);
- smp_notify_inc_runq(to_rq);
-
return ERTS_MIGRATE_SUCCESS;
}
#endif /* ERTS_SMP */
@@ -5727,30 +4885,6 @@ erts_set_process_priority(Process *p, Eterm new_value)
return old_value;
}
-#ifdef ERTS_SMP
-
-static ERTS_INLINE int
-prepare_for_sys_schedule(void)
-{
- while (!erts_port_task_have_outstanding_io_tasks()
- && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) {
- if (!erts_port_task_have_outstanding_io_tasks())
- return 1;
- erts_smp_atomic_set(&doing_sys_schedule, 0);
- }
- return 0;
-}
-
-#else
-
-static ERTS_INLINE int
-prepare_for_sys_schedule(void)
-{
- return !erts_port_task_have_outstanding_io_tasks();
-}
-
-#endif
-
/* note that P_RUNNING is only set so that we don't try to remove
** running processes from the schedule queue if they exit - a running
** process not being in the schedule queue!!
@@ -5839,6 +4973,9 @@ Process *schedule(Process *p, int calls)
}
if (IS_TRACED(p)) {
+ if (IS_TRACED_FL(p, F_TRACE_CALLS) && p->status != P_FREE) {
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_OUT);
+ }
switch (p->status) {
case P_EXITING:
if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT))
@@ -5882,8 +5019,11 @@ Process *schedule(Process *p, int calls)
p->status_flags &= ~ERTS_PROC_SFLG_RUNNING;
if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) {
+ ErtsRunQueue *notify_runq;
p->status_flags &= ~ERTS_PROC_SFLG_PENDADD2SCHEDQ;
- internal_add_to_runq(rq, p);
+ notify_runq = internal_add_to_runq(rq, p);
+ if (notify_runq != rq)
+ smp_notify_inc_runq(notify_runq);
}
#endif
@@ -5951,21 +5091,33 @@ Process *schedule(Process *p, int calls)
| ERTS_RUNQ_FLG_CHK_CPU_BIND
| ERTS_RUNQ_FLG_SUSPENDED)) {
if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED)
- || erts_smp_atomic_read(&esdp->suspended)) {
+ || (erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED)) {
+ ASSERT(erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED);
suspend_scheduler(esdp);
}
if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND)
|| erts_smp_atomic_read(&esdp->chk_cpu_bind)) {
- check_cpu_bind(esdp);
+ erts_sched_check_cpu_bind(esdp);
}
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- if (esdp->check_children) {
- esdp->check_children = 0;
- erts_smp_runq_unlock(rq);
- erts_check_children();
- erts_smp_runq_lock(rq);
+#if defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK)
+ {
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
+ long aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work) {
+ erts_smp_runq_unlock(rq);
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
+#endif
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
+ erts_smp_runq_lock(rq);
+ }
}
#endif
@@ -5997,7 +5149,10 @@ Process *schedule(Process *p, int calls)
if (rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ
| ERTS_RUNQ_FLG_SUSPENDED)) {
if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED)
- || erts_smp_atomic_read(&esdp->suspended)) {
+ || (erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED)) {
+ ASSERT(erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED);
non_empty_runq(rq);
goto continue_check_activities_to_run;
}
@@ -6014,17 +5169,7 @@ Process *schedule(Process *p, int calls)
}
}
- if (prepare_for_sys_schedule()) {
- erts_smp_atomic_set(&function_calls, 0);
- fcalls = 0;
- sched_sys_wait(esdp->no, rq);
- erts_smp_atomic_set(&doing_sys_schedule, 0);
- }
- else {
- /* If all schedulers are waiting, one of them *should*
- be waiting in erl_sys_schedule() */
- sched_cnd_wait(esdp->no, rq);
- }
+ scheduler_wait(&fcalls, esdp, rq);
non_empty_runq(rq);
@@ -6050,7 +5195,9 @@ Process *schedule(Process *p, int calls)
erts_smp_atomic_set(&function_calls, 0);
fcalls = 0;
+
ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
#ifdef ERTS_SMP
/* erts_sys_schedule_interrupt(0); */
#endif
@@ -6081,12 +5228,12 @@ Process *schedule(Process *p, int calls)
if (rq->wakeup_other < 0)
rq->wakeup_other = 0;
}
- else if (rq->wakeup_other < ERTS_WAKEUP_OTHER_LIMIT)
+ else if (rq->wakeup_other < wakeup_other_limit)
rq->wakeup_other += rq->len*wo_reds + ERTS_WAKEUP_OTHER_FIXED_INC;
else {
if (erts_common_run_queue) {
if (erts_common_run_queue->waiting)
- wake_one_scheduler();
+ wake_scheduler(erts_common_run_queue, 0, 1);
}
else if (erts_smp_atomic_read(&no_empty_run_queues) != 0) {
wake_scheduler_on_empty_runq(rq);
@@ -6231,10 +5378,10 @@ Process *schedule(Process *p, int calls)
erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS);
if (erts_sched_stat.enabled) {
- Uint old = ERTS_PROC_SCHED_ID(p,
+ UWord old = ERTS_PROC_SCHED_ID(p,
(ERTS_PROC_LOCK_MAIN
| ERTS_PROC_LOCK_STATUS),
- esdp->no);
+ (UWord) esdp->no);
int migrated = old && old != esdp->no;
erts_smp_spin_lock(&erts_sched_stat.lock);
@@ -6275,7 +5422,11 @@ Process *schedule(Process *p, int calls)
trace_virtual_sched(p, am_in);
break;
}
+ if (IS_TRACED_FL(p, F_TRACE_CALLS)) {
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_IN);
+ }
}
+
if (p->status != P_EXITING)
p->status = P_RUNNING;
@@ -6378,6 +5529,15 @@ erts_schedule_misc_op(void (*func)(void *), void *arg)
ErtsRunQueue *rq = erts_get_runq_current(NULL);
ErtsMiscOpList *molp = misc_op_list_alloc();
+ if (!rq) {
+ /*
+ * This can only happen when the sys msg dispatcher
+ * thread schedules misc ops (this happens *very*
+ * seldom; only when trace drivers are unloaded).
+ */
+ rq = ERTS_RUNQ_IX(0);
+ }
+
erts_smp_runq_lock(rq);
while (rq->misc.evac_runq) {
@@ -6397,8 +5557,8 @@ erts_schedule_misc_op(void (*func)(void *), void *arg)
else
rq->misc.start = molp;
rq->misc.end = molp;
- smp_notify_inc_runq(rq);
erts_smp_runq_unlock(rq);
+ smp_notify_inc_runq(rq);
}
static void
@@ -6640,7 +5800,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
Eterm args, /* Arguments for function (must be well-formed list). */
ErlSpawnOpts* so) /* Options for spawn. */
{
- ErtsRunQueue *rq;
+ ErtsRunQueue *rq, *notify_runq;
Process *p;
Sint arity; /* Number of arguments. */
#ifndef HYBRID
@@ -6719,11 +5879,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
/*
* Must initialize binary lists here before copying binaries to process.
*/
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
heap_need +=
@@ -6757,13 +5913,14 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
p->bin_vheap_sz = p->min_vheap_size;
p->bin_old_vheap_sz = p->min_vheap_size;
p->bin_old_vheap = 0;
+ p->bin_vheap_mature = 0;
/* No need to initialize p->fcalls. */
p->current = p->initial+INITIAL_MOD;
- p->i = (Eterm *) beam_apply;
- p->cp = (Eterm *) beam_apply+1;
+ p->i = (BeamInstr *) beam_apply;
+ p->cp = (BeamInstr *) beam_apply+1;
p->arg_reg = p->def_arg_reg;
p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]);
@@ -6813,7 +5970,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
p->group_leader =
IS_CONST(parent->group_leader)
? parent->group_leader
- : STORE_NC(&p->htop, &p->off_heap.externals, parent->group_leader);
+ : STORE_NC(&p->htop, &p->off_heap, parent->group_leader);
}
erts_get_default_tracing(&p->trace_flags, &p->tracer_proc);
@@ -6957,10 +6114,12 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
#endif
p->status = P_WAITING;
- internal_add_to_runq(rq, p);
+ notify_runq = internal_add_to_runq(rq, p);
erts_smp_runq_unlock(rq);
+ smp_notify_inc_runq(notify_runq);
+
res = p->id;
erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL);
@@ -7007,6 +6166,7 @@ void erts_init_empty_process(Process *p)
p->bin_vheap_sz = BIN_VH_MIN_SIZE;
p->bin_old_vheap_sz = BIN_VH_MIN_SIZE;
p->bin_old_vheap = 0;
+ p->bin_vheap_mature = 0;
#ifdef ERTS_SMP
p->u.ptimer = NULL;
p->bound_runq = NULL;
@@ -7014,11 +6174,7 @@ void erts_init_empty_process(Process *p)
memset(&(p->u.tm), 0, sizeof(ErlTimer));
#endif
p->next = NULL;
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
p->reg = NULL;
p->heap_sz = 0;
@@ -7165,11 +6321,7 @@ erts_debug_verify_clean_empty_process(Process* p)
/* Thing that erts_cleanup_empty_process() cleans up */
- ASSERT(p->off_heap.mso == NULL);
-#ifndef HYBRID /* FIND ME! */
- ASSERT(p->off_heap.funs == NULL);
-#endif
- ASSERT(p->off_heap.externals == NULL);
+ ASSERT(p->off_heap.first == NULL);
ASSERT(p->off_heap.overhead == 0);
ASSERT(p->mbuf == NULL);
@@ -7180,25 +6332,16 @@ erts_debug_verify_clean_empty_process(Process* p)
void
erts_cleanup_empty_process(Process* p)
{
- ErlHeapFragment* mbufp;
-
/* We only check fields that are known to be used... */
erts_cleanup_offheap(&p->off_heap);
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
- mbufp = p->mbuf;
- while (mbufp) {
- ErlHeapFragment *next = mbufp->next;
- free_message_buffer(mbufp);
- mbufp = next;
+ if (p->mbuf != NULL) {
+ free_message_buffer(p->mbuf);
+ p->mbuf = NULL;
}
- p->mbuf = NULL;
#if defined(ERTS_ENABLE_LOCK_COUNT) && defined(ERTS_SMP)
erts_lcnt_proc_lock_destroy(p);
#endif
@@ -7214,7 +6357,6 @@ static void
delete_process(Process* p)
{
ErlMessage* mp;
- ErlHeapFragment* bp;
VERBOSE(DEBUG_PROCESSES, ("Removing process: %T\n",p->id));
@@ -7230,7 +6372,7 @@ delete_process(Process* p)
* The mso list should not be used anymore, but if it is, make sure that
* we'll notice.
*/
- p->off_heap.mso = (void *) 0x8DEFFACD;
+ p->off_heap.first = (void *) 0x8DEFFACD;
if (p->arg_reg != p->def_arg_reg) {
erts_free(ERTS_ALC_T_ARG_REG, p->arg_reg);
@@ -7264,11 +6406,8 @@ delete_process(Process* p)
/*
* Free all pending message buffers.
*/
- bp = p->mbuf;
- while (bp != NULL) {
- ErlHeapFragment* next_bp = bp->next;
- free_message_buffer(bp);
- bp = next_bp;
+ if (p->mbuf != NULL) {
+ free_message_buffer(p->mbuf);
}
erts_erase_dicts(p);
@@ -7348,7 +6487,7 @@ set_proc_exiting(Process *p, Eterm reason, ErlHeapFragment *bp)
p->freason = EXTAG_EXIT;
KILL_CATCHES(p);
cancel_timer(p);
- p->i = (Eterm *) beam_exit;
+ p->i = (BeamInstr *) beam_exit;
}
@@ -7778,9 +6917,10 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext)
erts_port_release(prt);
} else if (is_internal_pid(mon->pid)) {/* local by name or pid */
Eterm watched;
- Eterm lhp[3];
+ DeclareTmpHeapNoproc(lhp,3);
ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK
| ERTS_PROC_LOCKS_MSG_SEND);
+ UseTmpHeapNoproc(3);
rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks);
if (rp == NULL) {
goto done;
@@ -7795,6 +6935,7 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext)
erts_queue_monitor_message(rp, &rp_locks, mon->ref, am_process,
watched, pcontext->reason);
}
+ UnUseTmpHeapNoproc(3);
/* else: demonitor while we exited, i.e. do nothing... */
erts_smp_proc_unlock(rp, rp_locks);
} else { /* external by pid or name */
@@ -8025,8 +7166,13 @@ erts_do_exit_process(Process* p, Eterm reason)
ERTS_SMP_MSGQ_MV_INQ2PRIVQ(p);
#endif
- if (IS_TRACED_FL(p,F_TRACE_PROCS))
- trace_proc(p, p, am_exit, reason);
+ if (IS_TRACED(p)) {
+ if (IS_TRACED_FL(p, F_TRACE_CALLS))
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_EXITING);
+
+ if (IS_TRACED_FL(p,F_TRACE_PROCS))
+ trace_proc(p, p, am_exit, reason);
+ }
erts_trace_check_exiting(p->id);
@@ -8075,6 +7221,8 @@ continue_exit_process(Process *p
Eterm reason = p->fvalue;
DistEntry *dep;
struct saved_calls *scb;
+ process_breakpoint_time_t *pbt;
+
#ifdef DEBUG
int yield_allowed = 1;
#endif
@@ -8214,6 +7362,7 @@ continue_exit_process(Process *p
? ERTS_PROC_SET_DIST_ENTRY(p, ERTS_PROC_LOCKS_ALL, NULL)
: NULL);
scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, ERTS_PROC_LOCKS_ALL, NULL);
+ pbt = ERTS_PROC_SET_CALL_TIME(p, ERTS_PROC_LOCKS_ALL, NULL);
erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL);
processes_busy--;
@@ -8228,11 +7377,12 @@ continue_exit_process(Process *p
* Pre-build the EXIT tuple if there are any links.
*/
if (lnk) {
- Eterm tmp_heap[4];
+ DeclareTmpHeap(tmp_heap,4,p);
Eterm exit_tuple;
Uint exit_tuple_sz;
Eterm* hp;
+ UseTmpHeap(4,p);
hp = &tmp_heap[0];
exit_tuple = TUPLE3(hp, am_EXIT, p->id, reason);
@@ -8243,16 +7393,21 @@ continue_exit_process(Process *p
ExitLinkContext context = {p, reason, exit_tuple, exit_tuple_sz};
erts_sweep_links(lnk, &doit_exit_link, &context);
}
+ UnUseTmpHeap(4,p);
}
{
ExitMonitorContext context = {reason, p};
- erts_sweep_monitors(mon,&doit_exit_monitor,&context);
+ erts_sweep_monitors(mon,&doit_exit_monitor,&context); /* Allocates TmpHeap, but we
+ have none here */
}
if (scb)
erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb);
+ if (pbt)
+ erts_free(ERTS_ALC_T_BPD, (void *) pbt);
+
delete_process(p);
erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN);
@@ -8271,7 +7426,7 @@ continue_exit_process(Process *p
ASSERT(p->status == P_EXITING);
- p->i = (Eterm *) beam_continue_exit;
+ p->i = (BeamInstr *) beam_continue_exit;
if (!(curr_locks & ERTS_PROC_LOCK_STATUS)) {
erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS);
@@ -8291,7 +7446,7 @@ continue_exit_process(Process *p
static void
timeout_proc(Process* p)
{
- p->i = (Eterm *) p->def_arg_reg[0];
+ p->i = *((BeamInstr **) (UWord) p->def_arg_reg);
p->flags |= F_TIMO;
p->flags &= ~F_INSLPQUEUE;
@@ -8390,9 +7545,9 @@ erts_program_counter_info(int to, void *to_arg, Process *p)
}
static void
-print_function_from_pc(int to, void *to_arg, Eterm* x)
+print_function_from_pc(int to, void *to_arg, BeamInstr* x)
{
- Eterm* addr = find_function_from_pc(x);
+ BeamInstr* addr = find_function_from_pc(x);
if (addr == NULL) {
if (x == beam_exit) {
erts_print(to, to_arg, "<terminate process>");
@@ -8426,7 +7581,7 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg)
}
if (is_CP(x)) {
- erts_print(to, to_arg, "Return addr %p (", (Eterm *) x);
+ erts_print(to, to_arg, "Return addr %p (", (Eterm *) EXPAND_POINTER(x));
print_function_from_pc(to, to_arg, cp_val(x));
erts_print(to, to_arg, ")\n");
yreg = 0;
@@ -9255,8 +8410,8 @@ init_processes_bif(void)
processes_trap_export.code[0] = am_erlang;
processes_trap_export.code[1] = am_processes_trap;
processes_trap_export.code[2] = 2;
- processes_trap_export.code[3] = (Eterm) em_apply_bif;
- processes_trap_export.code[4] = (Eterm) &processes_trap;
+ processes_trap_export.code[3] = (BeamInstr) em_apply_bif;
+ processes_trap_export.code[4] = (BeamInstr) &processes_trap;
#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST
erts_get_emu_time(&debug_tv_start);