aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_process.c
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam/erl_process.c')
-rw-r--r--erts/emulator/beam/erl_process.c2100
1 files changed, 1591 insertions, 509 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 996806fc75..761096e9ad 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -38,6 +38,7 @@
#include "erl_instrument.h"
#include "erl_threads.h"
#include "erl_binary.h"
+#include "beam_bp.h"
#define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
#define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -45,7 +46,13 @@
#define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10)
-#define ERTS_SCHED_SLEEP_SPINCOUNT 10000
+#define ERTS_SCHED_SPIN_UNTIL_YIELD 100
+
+#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT 10
+#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT 1000
+#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT \
+ (ERTS_SCHED_SYS_SLEEP_SPINCOUNT*ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT)
+#define ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT 0
#define ERTS_WAKEUP_OTHER_LIMIT (100*CONTEXT_REDS/2)
#define ERTS_WAKEUP_OTHER_DEC 10
@@ -91,9 +98,9 @@ do { \
#define ERTS_EMPTY_RUNQ(RQ) \
((RQ)->len == 0 && (RQ)->misc.start == NULL)
-extern Eterm beam_apply[];
-extern Eterm beam_exit[];
-extern Eterm beam_continue_exit[];
+extern BeamInstr beam_apply[];
+extern BeamInstr beam_exit[];
+extern BeamInstr beam_continue_exit[];
static Sint p_last;
static Sint p_next;
@@ -105,6 +112,10 @@ Uint erts_no_schedulers;
Uint erts_max_processes = ERTS_DEFAULT_MAX_PROCESSES;
Uint erts_process_tab_index_mask;
+#ifdef ERTS_SMP
+Uint erts_max_main_threads;
+#endif
+
int erts_sched_thread_suggested_stack_size = -1;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -115,16 +126,34 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
int erts_disable_proc_not_running_opt;
-#define ERTS_SCHED_CHANGING_ONLINE 1
-#define ERTS_SCHED_CHANGING_MULTI_SCHED 2
+#define ERTS_SCHDLR_SSPND_CHNG_WAITER (((long) 1) << 0)
+#define ERTS_SCHDLR_SSPND_CHNG_MSB (((long) 1) << 1)
+#define ERTS_SCHDLR_SSPND_CHNG_ONLN (((long) 1) << 2)
+
+#ifndef DEBUG
+
+#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \
+ erts_smp_atomic_set(&schdlr_sspnd.changing, (VAL))
+
+#else
+
+#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \
+do { \
+ long old_val__ = erts_smp_atomic_xchg(&schdlr_sspnd.changing, \
+ (VAL)); \
+ ASSERT(old_val__ == (OLD_VAL)); \
+} while (0)
+
+#endif
+
static struct {
erts_smp_mtx_t mtx;
erts_smp_cnd_t cnd;
- int changing;
int online;
int curr_online;
int wait_curr_online;
+ erts_smp_atomic_t changing;
erts_smp_atomic_t active;
struct {
erts_smp_atomic_t ongoing;
@@ -180,6 +209,7 @@ static ErtsCpuBindData *scheduler2cpu_map;
erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
typedef enum {
+ ERTS_CPU_BIND_UNDEFINED,
ERTS_CPU_BIND_SPREAD,
ERTS_CPU_BIND_PROCESSOR_SPREAD,
ERTS_CPU_BIND_THREAD_SPREAD,
@@ -190,6 +220,9 @@ typedef enum {
ERTS_CPU_BIND_NONE
} ErtsCpuBindOrder;
+#define ERTS_CPU_BIND_DEFAULT_BIND \
+ ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+
ErtsCpuBindOrder cpu_bind_order;
static erts_cpu_topology_t *user_cpudata;
@@ -226,6 +259,17 @@ typedef union {
ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+#ifdef ERTS_SMP
+
+typedef union {
+ ErtsSchedulerSleepInfo ssi;
+ char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerSleepInfo))];
+} ErtsAlignedSchedulerSleepInfo;
+
+static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info;
+
+#endif
+
#ifndef BM_COUNTERS
static int processes_busy;
#endif
@@ -283,8 +327,15 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
200,
ERTS_ALC_T_PROC_LIST)
-#define ERTS_RUNQ_IX(IX) (&erts_aligned_run_queues[(IX)].runq)
-#define ERTS_SCHEDULER_IX(IX) (&erts_aligned_scheduler_data[(IX)].esd)
+#define ERTS_RUNQ_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues), \
+ &erts_aligned_run_queues[(IX)].runq)
+#define ERTS_SCHEDULER_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
+ &erts_aligned_scheduler_data[(IX)].esd)
+#define ERTS_SCHED_SLEEP_INFO_IX(IX) \
+ (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \
+ &aligned_sched_sleep_info[(IX)].ssi)
#define ERTS_FOREACH_RUNQ(RQVAR, DO) \
do { \
@@ -334,7 +385,7 @@ do { \
static void init_processes_bif(void);
static void save_terminating_process(Process *p);
static void exec_misc_ops(ErtsRunQueue *);
-static void print_function_from_pc(int to, void *to_arg, Eterm* x);
+static void print_function_from_pc(int to, void *to_arg, BeamInstr* x);
static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
int yreg);
#ifdef ERTS_SMP
@@ -348,6 +399,11 @@ static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size
#endif
+static int reader_group_lookup(int logical);
+static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
+ int *cpudata_size);
+static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
+
static void early_cpu_bind_init(void);
static void late_cpu_bind_init(void);
@@ -388,7 +444,12 @@ erts_pre_init_process(void)
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks
= ERTS_PSD_DIST_ENTRY_GET_LOCKS;
erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks
- = ERTS_PSD_DIST_ENTRY_GET_LOCKS;
+ = ERTS_PSD_DIST_ENTRY_SET_LOCKS;
+
+ erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks
+ = ERTS_PSD_CALL_TIME_BP_GET_LOCKS;
+ erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks
+ = ERTS_PSD_CALL_TIME_BP_SET_LOCKS;
/* Check that we have locks for all entries */
for (ix = 0; ix < ERTS_PSD_SIZE; ix++) {
@@ -572,6 +633,76 @@ erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data)
#ifdef ERTS_SMP
+void
+erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, long flags)
+{
+ switch (flags & ERTS_SSI_FLGS_SLEEP_TYPE) {
+ case ERTS_SSI_FLG_POLL_SLEEPING:
+ erts_sys_schedule_interrupt(1);
+ break;
+ case ERTS_SSI_FLG_TSE_SLEEPING:
+ erts_tse_set(ssi->event);
+ break;
+ case 0:
+ break;
+ default:
+ erl_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error\n",
+ __FILE__, __LINE__);
+ break;
+ }
+}
+
+#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
+void
+erts_smp_notify_check_children_needed(void)
+{
+ int i;
+
+ for (i = 0; i < erts_no_schedulers; i++) {
+ long aux_work;
+ ErtsSchedulerSleepInfo *ssi;
+ ssi = ERTS_SCHED_SLEEP_INFO_IX(i);
+ aux_work = erts_smp_atomic_bor(&ssi->aux_work,
+ ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+ if (!(aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN))
+ erts_sched_poke(ssi);
+ }
+}
+#endif
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+static ERTS_INLINE long
+blockable_aux_work(ErtsSchedulerData *esdp,
+ ErtsSchedulerSleepInfo *ssi,
+ long aux_work)
+{
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
+ if (aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN) {
+ aux_work = erts_smp_atomic_band(&ssi->aux_work,
+ ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+ aux_work &= ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN;
+ erts_check_children();
+ }
+#endif
+ }
+ return aux_work;
+}
+
+#endif
+
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+static ERTS_INLINE long
+nonblockable_aux_work(ErtsSchedulerData *esdp,
+ ErtsSchedulerSleepInfo *ssi,
+ long aux_work)
+{
+ if (aux_work & ERTS_SSI_NONBLOCKABLE_AUX_WORK_MASK) {
+
+ }
+}
+#endif
+
static void
prepare_for_block(void *vrq)
{
@@ -624,7 +755,31 @@ erts_active_schedulers(void)
return as;
}
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
#ifdef ERTS_SMP
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ }
+ return 0;
+#else
+ return !erts_port_task_have_outstanding_io_tasks();
+#endif
+}
+
+#ifdef ERTS_SMP
+
+static ERTS_INLINE void
+sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq)
+{
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ ASSERT(rq->waiting < 0);
+ rq->waiting *= -1;
+}
static ERTS_INLINE void
sched_waiting(Uint no, ErtsRunQueue *rq)
@@ -666,7 +821,11 @@ empty_runq(ErtsRunQueue *rq)
if (oifls & ERTS_RUNQ_IFLG_NONEMPTY) {
#ifdef DEBUG
long empty = erts_smp_atomic_read(&no_empty_run_queues);
- ASSERT(0 <= empty && empty < erts_no_run_queues);
+ /*
+ * For a short period of time no_empty_run_queues may have
+ * been increased twice for a specific run queue.
+ */
+ ASSERT(0 <= empty && empty < 2*erts_no_run_queues);
#endif
erts_smp_atomic_inc(&no_empty_run_queues);
}
@@ -679,107 +838,314 @@ non_empty_runq(ErtsRunQueue *rq)
if (!(oifls & ERTS_RUNQ_IFLG_NONEMPTY)) {
#ifdef DEBUG
long empty = erts_smp_atomic_read(&no_empty_run_queues);
- ASSERT(0 < empty && empty <= erts_no_run_queues);
+ /*
+ * For a short period of time no_empty_run_queues may have
+ * been increased twice for a specific run queue.
+ */
+ ASSERT(0 < empty && empty <= 2*erts_no_run_queues);
#endif
erts_smp_atomic_dec(&no_empty_run_queues);
}
}
-static ERTS_INLINE int
-sched_spin_wake(ErtsRunQueue *rq)
+static long
+sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi)
{
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- return 0;
-#else
- long val;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING);
+ long xflgs = 0;
- val = erts_smp_atomic_read(&rq->spin_waiter);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_atomic_inc(&rq->spin_wake);
- return 1;
- }
- return 0;
-#endif
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ } while (!(oflgs & ERTS_SSI_FLG_SUSPENDED));
+ return oflgs;
}
-static ERTS_INLINE int
-sched_spin_wake_all(ErtsRunQueue *rq)
+static long
+sched_prep_cont_spin_wait(ErtsSchedulerSleepInfo *ssi)
{
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- return 0;
-#else
- long val;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING);
+ long xflgs = ERTS_SSI_FLG_WAITING;
- val = erts_smp_atomic_read(&rq->spin_waiter);
- ASSERT(val >= 0);
- if (val != 0)
- erts_smp_atomic_add(&rq->spin_wake, val);
- return val;
-#endif
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
+ } while (oflgs & ERTS_SSI_FLG_WAITING);
+ return oflgs;
}
+static long
+sched_spin_wait(ErtsSchedulerSleepInfo *ssi, int spincount)
+{
+ long until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ int sc = spincount;
+ long flgs;
+
+ do {
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if ((flgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
+ != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
+ break;
+ }
+ ERTS_SPIN_BODY;
+ if (--until_yield == 0) {
+ until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ erts_thr_yield();
+ }
+ } while (--sc > 0);
+ return flgs;
+}
+
+static long
+sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, long sleep_type)
+{
+ long oflgs;
+ long nflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING|sleep_type;
+ long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
+
+ if (sleep_type == ERTS_SSI_FLG_TSE_SLEEPING)
+ erts_tse_reset(ssi->event);
+
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ if ((oflgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING))
+ != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) {
+ return oflgs;
+ }
+ xflgs = oflgs;
+ nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED;
+ }
+}
+
+#define ERTS_SCHED_WAIT_WOKEN(FLGS) \
+ (((FLGS) & (ERTS_SSI_FLG_WAITING|ERTS_SSI_FLG_SUSPENDED)) \
+ != ERTS_SSI_FLG_WAITING)
+
static void
-sched_sys_wait(Uint no, ErtsRunQueue *rq)
+scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
- long dt;
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- int val;
- int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
+ int spincount;
+ long flgs;
+#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
+ long aux_work;
+#endif
+
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+ erts_smp_spin_lock(&rq->sleepers.lock);
+ flgs = sched_prep_spin_wait(ssi);
+ if (flgs & ERTS_SSI_FLG_SUSPENDED) {
+ /* Go suspend instead... */
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+ return;
+ }
+
+ ssi->prev = NULL;
+ ssi->next = rq->sleepers.list;
+ if (rq->sleepers.list)
+ rq->sleepers.list->prev = ssi;
+ rq->sleepers.list = ssi;
+ erts_smp_spin_unlock(&rq->sleepers.lock);
+
+ /*
+ * If all schedulers are waiting, one of them *should*
+ * be waiting in erl_sys_schedule()
+ */
+
+ if (!prepare_for_sys_schedule()) {
+
+ sched_waiting(esdp->no, rq);
+
+ erts_smp_runq_unlock(rq);
+
+ spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
+
+ tse_wait:
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ tse_blockable_aux_work:
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
#endif
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
- sched_waiting_sys(no, rq);
+ while (1) {
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- erts_smp_atomic_inc(&rq->spin_waiter);
- erts_smp_runq_unlock(rq);
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
- erl_sys_schedule(1); /* Might give us something to do */
+ flgs = sched_spin_wait(ssi, spincount);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ int res;
+ ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ do {
+ res = erts_tse_wait(ssi->event);
+ } while (res == EINTR);
+ }
+ }
- dt = do_time_read_and_reset();
- if (dt) bump_timer(dt);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ break;
+ }
+
+ flgs = sched_prep_cont_spin_wait(ssi);
+ spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT;
+
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ break;
+ }
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ goto tse_blockable_aux_work;
+ }
+#endif
- while (spincount-- > 0) {
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_runq_lock(rq);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0)
- goto woken;
- if (spincount == 0)
- goto sleep;
- erts_smp_runq_unlock(rq);
}
- }
- erts_smp_runq_lock(rq);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- woken:
- erts_smp_atomic_dec(&rq->spin_wake);
- ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+ if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
+ erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+
+ erts_smp_runq_lock(rq);
+ sched_active(esdp->no, rq);
+
}
else {
- sleep:
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
+ long dt;
+
+ erts_smp_atomic_set(&function_calls, 0);
+ *fcalls = 0;
+
+ sched_waiting_sys(esdp->no, rq);
+
+ erts_smp_runq_unlock(rq);
+
+ spincount = ERTS_SCHED_SYS_SLEEP_SPINCOUNT;
+
+ while (spincount-- > 0) {
+
+ sys_poll_aux_work:
+
+ erl_sys_schedule(1); /* Might give us something to do */
+
+ dt = do_time_read_and_reset();
+ if (dt) bump_timer(dt);
+
+ sys_aux_work:
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
+#endif
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
+
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ }
+
+ /*
+ * If we got new I/O tasks we aren't allowed to
+ * call erl_sys_schedule() until it is handled.
+ */
+ if (erts_port_task_have_outstanding_io_tasks()) {
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ /*
+ * Got to check that we still got I/O tasks; otherwise
+ * we have to continue checking for I/O...
+ */
+ if (!prepare_for_sys_schedule()) {
+ spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
+ goto tse_wait;
+ }
+ }
+ }
+
+ erts_smp_runq_lock(rq);
+
/*
* If we got new I/O tasks we aren't allowed to
* sleep in erl_sys_schedule().
*/
- if (!erts_port_task_have_outstanding_io_tasks()) {
-#endif
+ if (erts_port_task_have_outstanding_io_tasks()) {
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ /*
+ * Got to check that we still got I/O tasks; otherwise
+ * we have to wait in erl_sys_schedule() after all...
+ */
+ if (prepare_for_sys_schedule())
+ goto do_sys_schedule;
+
+ /*
+ * Not allowed to wait in erl_sys_schedule;
+ * do tse wait instead...
+ */
+ sched_change_waiting_sys_to_waiting(esdp->no, rq);
+ erts_smp_runq_unlock(rq);
+ spincount = 0;
+ goto tse_wait;
+ }
+ else {
+ do_sys_schedule:
erts_sys_schedule_interrupt(0);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
+ if (!(flgs & ERTS_SSI_FLG_WAITING))
+ goto sys_locked_woken;
+ erts_smp_runq_unlock(rq);
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+ ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+ goto sys_woken;
+ }
+ ASSERT(!erts_port_task_have_outstanding_io_tasks());
+ goto sys_poll_aux_work;
+ }
+
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+
erts_smp_runq_unlock(rq);
erl_sys_schedule(0);
@@ -787,134 +1153,103 @@ sched_sys_wait(Uint no, ErtsRunQueue *rq)
dt = do_time_read_and_reset();
if (dt) bump_timer(dt);
- erts_smp_runq_lock(rq);
+ flgs = sched_prep_cont_spin_wait(ssi);
+ if (flgs & ERTS_SSI_FLG_WAITING)
+ goto sys_aux_work;
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
+ sys_woken:
+ erts_smp_runq_lock(rq);
+ sys_locked_woken:
+ erts_smp_atomic_set(&doing_sys_schedule, 0);
+ if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
+ erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+ sched_active_sys(esdp->no, rq);
}
}
-#endif
-
- sched_active_sys(no, rq);
-}
-static void
-sched_cnd_wait(Uint no, ErtsRunQueue *rq)
-{
-#if ERTS_SCHED_SLEEP_SPINCOUNT != 0
- int val;
- int spincount = ERTS_SCHED_SLEEP_SPINCOUNT;
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
-#endif
-
- sched_waiting(no, rq);
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- prepare_for_block,
- resume_after_block,
- (void *) rq);
-
-#if ERTS_SCHED_SLEEP_SPINCOUNT == 0
- erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
-#else
- erts_smp_atomic_inc(&rq->spin_waiter);
- erts_smp_mtx_unlock(&rq->mtx);
-
- while (spincount-- > 0) {
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0) {
- erts_smp_mtx_lock(&rq->mtx);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val != 0)
- goto woken;
- if (spincount == 0)
- goto sleep;
- erts_smp_mtx_unlock(&rq->mtx);
- }
- }
-
- erts_smp_mtx_lock(&rq->mtx);
- val = erts_smp_atomic_read(&rq->spin_wake);
- ASSERT(val >= 0);
- if (val == 0) {
- sleep:
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
- erts_smp_cnd_wait(&rq->cnd, &rq->mtx);
- }
- else {
- woken:
- erts_smp_atomic_dec(&rq->spin_wake);
- ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0);
- erts_smp_atomic_dec(&rq->spin_waiter);
- ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0);
- }
-#endif
-
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- prepare_for_block,
- resume_after_block,
- (void *) rq);
-
- sched_active(no, rq);
}
-static void
-wake_one_scheduler(void)
-{
- ASSERT(erts_common_run_queue);
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(erts_common_run_queue));
- if (erts_common_run_queue->waiting) {
- if (!sched_spin_wake(erts_common_run_queue)) {
- if (erts_common_run_queue->waiting == -1) /* One scheduler waiting
- and doing so in
- sys_schedule */
- erts_sys_schedule_interrupt(1);
- else
- erts_smp_cnd_signal(&erts_common_run_queue->cnd);
- }
+static ERTS_INLINE long
+ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi)
+{
+ /* reset all flags but suspended */
+ long oflgs;
+ long nflgs = 0;
+ long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING;
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return oflgs;
+ nflgs = oflgs & ERTS_SSI_FLG_SUSPENDED;
+ xflgs = oflgs;
}
}
static void
-wake_scheduler(ErtsRunQueue *rq, int incq)
+wake_scheduler(ErtsRunQueue *rq, int incq, int one)
{
- ASSERT(!erts_common_run_queue);
- ASSERT(-1 <= rq->waiting && rq->waiting <= 1);
- ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
- if (rq->waiting && !rq->woken) {
- if (!sched_spin_wake(rq)) {
- if (rq->waiting < 0)
- erts_sys_schedule_interrupt(1);
- else
- erts_smp_cnd_signal(&rq->cnd);
+ int res;
+ ErtsSchedulerSleepInfo *ssi;
+ ErtsSchedulerSleepList *sl;
+
+ /*
+ * The unlocked run queue is not strictly necessary
+ * from a thread safety or deadlock prevention
+ * perspective. It will, however, cost us performance
+ * if it is locked during wakup of another scheduler,
+ * so all code *should* handle this without having
+ * the lock on the run queue.
+ */
+ ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq));
+
+ sl = &rq->sleepers;
+
+ erts_smp_spin_lock(&sl->lock);
+ ssi = sl->list;
+ if (!ssi)
+ erts_smp_spin_unlock(&sl->lock);
+ else if (one) {
+ long flgs;
+ if (ssi->prev)
+ ssi->prev->next = ssi->next;
+ else {
+ ASSERT(sl->list == ssi);
+ sl->list = ssi->next;
}
- rq->woken = 1;
- if (incq)
+ if (ssi->next)
+ ssi->next->prev = ssi->prev;
+
+ res = sl->list != NULL;
+ erts_smp_spin_unlock(&sl->lock);
+
+ flgs = ssi_flags_set_wake(ssi);
+ erts_sched_finish_poke(ssi, flgs);
+
+ if (incq && !erts_common_run_queue && (flgs & ERTS_SSI_FLG_WAITING))
non_empty_runq(rq);
}
+ else {
+ sl->list = NULL;
+ erts_smp_spin_unlock(&sl->lock);
+ do {
+ ErtsSchedulerSleepInfo *wake_ssi = ssi;
+ ssi = ssi->next;
+ erts_sched_finish_poke(ssi, ssi_flags_set_wake(wake_ssi));
+ } while (ssi);
+ }
}
static void
wake_all_schedulers(void)
{
- if (erts_common_run_queue) {
- erts_smp_runq_lock(erts_common_run_queue);
- if (erts_common_run_queue->waiting) {
- if (erts_common_run_queue->waiting < 0)
- erts_sys_schedule_interrupt(1);
- sched_spin_wake_all(erts_common_run_queue);
- erts_smp_cnd_broadcast(&erts_common_run_queue->cnd);
- }
- erts_smp_runq_unlock(erts_common_run_queue);
- }
+ if (erts_common_run_queue)
+ wake_scheduler(erts_common_run_queue, 0, 0);
else {
int ix;
for (ix = 0; ix < erts_no_run_queues; ix++) {
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
- erts_smp_runq_lock(rq);
- wake_scheduler(rq, 0);
- erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
}
}
}
@@ -929,14 +1264,14 @@ chk_wake_sched(ErtsRunQueue *crq, int ix, int activate)
wrq = ERTS_RUNQ_IX(ix);
iflgs = erts_smp_atomic_read(&wrq->info_flags);
if (!(iflgs & (ERTS_RUNQ_IFLG_SUSPENDED|ERTS_RUNQ_IFLG_NONEMPTY))) {
- erts_smp_xrunq_lock(crq, wrq);
if (activate) {
if (ix == erts_smp_atomic_cmpxchg(&balance_info.active_runqs, ix+1, ix)) {
+ erts_smp_xrunq_lock(crq, wrq);
wrq->flags &= ~ERTS_RUNQ_FLG_INACTIVE;
+ erts_smp_xrunq_unlock(crq, wrq);
}
}
- wake_scheduler(wrq, 0);
- erts_smp_xrunq_unlock(crq, wrq);
+ wake_scheduler(wrq, 0, 1);
return 1;
}
return 0;
@@ -982,15 +1317,13 @@ static ERTS_INLINE void
smp_notify_inc_runq(ErtsRunQueue *runq)
{
#ifdef ERTS_SMP
- if (erts_common_run_queue)
- wake_one_scheduler();
- else
- wake_scheduler(runq, 1);
+ if (runq)
+ wake_scheduler(runq, 1, 1);
#endif
}
void
-erts_smp_notify_inc_runq__(ErtsRunQueue *runq)
+erts_smp_notify_inc_runq(ErtsRunQueue *runq)
{
smp_notify_inc_runq(runq);
}
@@ -1136,20 +1469,23 @@ static void
evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
{
Port *prt;
+ int notify_to_rq = 0;
int prio;
int prt_locked = 0;
int rq_locked = 0;
int evac_rq_locked = 1;
+ ErtsMigrateResult mres;
erts_smp_runq_lock(evac_rq);
+ erts_smp_atomic_bor(&evac_rq->scheduler->ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+
evac_rq->flags &= ~ERTS_RUNQ_FLGS_IMMIGRATE_QMASK;
evac_rq->flags |= (ERTS_RUNQ_FLGS_EMIGRATE_QMASK
| ERTS_RUNQ_FLGS_EVACUATE_QMASK
| ERTS_RUNQ_FLG_SUSPENDED);
erts_smp_atomic_bor(&evac_rq->info_flags, ERTS_RUNQ_IFLG_SUSPENDED);
-
/*
* Need to set up evacuation paths first since we
* may release the run queue lock on evac_rq
@@ -1177,9 +1513,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
/* Evacuate scheduled ports */
prt = evac_rq->ports.start;
while (prt) {
- (void) erts_port_migrate(prt, &prt_locked,
+ mres = erts_port_migrate(prt, &prt_locked,
evac_rq, &evac_rq_locked,
rq, &rq_locked);
+ if (mres == ERTS_MIGRATE_SUCCESS)
+ notify_to_rq = 1;
if (prt_locked)
erts_smp_port_unlock(prt);
if (!evac_rq_locked) {
@@ -1208,9 +1546,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
goto end_of_proc;
}
- (void) erts_proc_migrate(proc, &proc_locks,
+ mres = erts_proc_migrate(proc, &proc_locks,
evac_rq, &evac_rq_locked,
rq, &rq_locked);
+ if (mres == ERTS_MIGRATE_SUCCESS)
+ notify_to_rq = 1;
if (proc_locks)
erts_smp_proc_unlock(proc, proc_locks);
if (!evac_rq_locked) {
@@ -1242,10 +1582,13 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq)
if (rq_locked)
erts_smp_runq_unlock(rq);
- if (!evac_rq_locked)
- erts_smp_runq_lock(evac_rq);
- wake_scheduler(evac_rq, 0);
- erts_smp_runq_unlock(evac_rq);
+ if (evac_rq_locked)
+ erts_smp_runq_unlock(evac_rq);
+
+ if (notify_to_rq)
+ smp_notify_inc_runq(rq);
+
+ wake_scheduler(evac_rq, 0, 1);
}
static int
@@ -1473,31 +1816,6 @@ try_steal_task(ErtsRunQueue *rq)
return res;
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
-void
-erts_smp_notify_check_children_needed(void)
-{
- int i;
- for (i = 0; i < erts_no_schedulers; i++) {
- erts_smp_runq_lock(ERTS_SCHEDULER_IX(i)->run_queue);
- ERTS_SCHEDULER_IX(i)->check_children = 1;
- if (!erts_common_run_queue)
- wake_scheduler(ERTS_SCHEDULER_IX(i)->run_queue, 0);
- erts_smp_runq_unlock(ERTS_SCHEDULER_IX(i)->run_queue);
- }
- if (ongoing_multi_scheduling_block()) {
- /* Also blocked schedulers need to check children */
- erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- for (i = 0; i < erts_no_schedulers; i++)
- ERTS_SCHEDULER_IX(i)->blocked_check_children = 1;
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- }
- if (erts_common_run_queue)
- wake_all_schedulers();
-}
-#endif
-
/* Run queue balancing */
typedef struct {
@@ -2078,16 +2396,20 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_aligned_run_queues = erts_alloc(ERTS_ALC_T_RUNQS,
(sizeof(ErtsAlignedRunQueue)*(n+1)));
- if ((((Uint) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0)
+ if ((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) != 0)
erts_aligned_run_queues = ((ErtsAlignedRunQueue *)
- ((((Uint) erts_aligned_run_queues)
+ ((((UWord) erts_aligned_run_queues)
& ~ERTS_CACHE_LINE_MASK)
+ ERTS_CACHE_LINE_SIZE));
+ ASSERT((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0);
+
#ifdef ERTS_SMP
erts_smp_atomic_init(&no_empty_run_queues, 0);
#endif
+ erts_no_run_queues = n;
+
for (ix = 0; ix < n; ix++) {
int pix, rix;
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
@@ -2102,8 +2424,10 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1));
erts_smp_cnd_init(&rq->cnd);
- erts_smp_atomic_init(&rq->spin_waiter, 0);
- erts_smp_atomic_init(&rq->spin_wake, 0);
+#ifdef ERTS_SMP
+ erts_smp_spinlock_init(&rq->sleepers.lock, "run_queue_sleep_list");
+ rq->sleepers.list = NULL;
+#endif
rq->waiting = 0;
rq->woken = 0;
@@ -2154,7 +2478,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
}
erts_common_run_queue = !mrq ? ERTS_RUNQ_IX(0) : NULL;
- erts_no_run_queues = n;
#ifdef ERTS_SMP
@@ -2169,23 +2492,59 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
#endif
+ n = (int) no_schedulers;
+ erts_no_schedulers = n;
+
+#ifdef ERTS_SMP
+ /* Create and initialize scheduler sleep info */
+
+ aligned_sched_sleep_info = erts_alloc(ERTS_ALC_T_SCHDLR_SLP_INFO,
+ (sizeof(ErtsAlignedSchedulerSleepInfo)
+ *(n+1)));
+ if ((((Uint) aligned_sched_sleep_info) & ERTS_CACHE_LINE_MASK) == 0)
+ aligned_sched_sleep_info = ((ErtsAlignedSchedulerSleepInfo *)
+ ((((Uint) aligned_sched_sleep_info)
+ & ~ERTS_CACHE_LINE_MASK)
+ + ERTS_CACHE_LINE_SIZE));
+ for (ix = 0; ix < n; ix++) {
+ ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+#if 0 /* no need to initialize these... */
+ ssi->next = NULL;
+ ssi->prev = NULL;
+#endif
+ erts_smp_atomic_init(&ssi->flags, 0);
+ ssi->event = NULL; /* initialized in sched_thread_func */
+ erts_smp_atomic_init(&ssi->aux_work, 0);
+ }
+#endif
+
/* Create and initialize scheduler specific data */
- n = (int) no_schedulers;
erts_aligned_scheduler_data = erts_alloc(ERTS_ALC_T_SCHDLR_DATA,
(sizeof(ErtsAlignedSchedulerData)
*(n+1)));
- if ((((Uint) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0)
+ if ((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) != 0)
erts_aligned_scheduler_data = ((ErtsAlignedSchedulerData *)
- ((((Uint) erts_aligned_scheduler_data)
+ ((((UWord) erts_aligned_scheduler_data)
& ~ERTS_CACHE_LINE_MASK)
+ ERTS_CACHE_LINE_SIZE));
+
+ ASSERT((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0);
+
for (ix = 0; ix < n; ix++) {
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix);
#ifdef ERTS_SMP
erts_bits_init_state(&esdp->erl_bits_state);
esdp->match_pseudo_process = NULL;
+ esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
esdp->free_process = NULL;
+#if HALFWORD_HEAP
+ /* Registers need to be heap allocated (correct memory range) for tracing to work */
+ esdp->save_reg = erts_alloc(ERTS_ALC_T_BEAM_REGISTER, ERTS_X_REGS_ALLOCATED * sizeof(Eterm));
+#endif
+#endif
+#if !HEAP_ON_C_STACK
+ esdp->num_tmp_heap_used = 0;
#endif
esdp->no = (Uint) ix+1;
esdp->current_process = NULL;
@@ -2206,11 +2565,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
}
#ifdef ERTS_SMP
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- esdp->check_children = 0;
- esdp->blocked_check_children = 0;
-#endif
- erts_smp_atomic_init(&esdp->suspended, 0);
erts_smp_atomic_init(&esdp->chk_cpu_bind, 0);
#endif
}
@@ -2219,7 +2573,7 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd");
erts_smp_cnd_init(&schdlr_sspnd.cnd);
- schdlr_sspnd.changing = 0;
+ erts_smp_atomic_init(&schdlr_sspnd.changing, 0);
schdlr_sspnd.online = no_schedulers_online;
schdlr_sspnd.curr_online = no_schedulers;
erts_smp_atomic_init(&schdlr_sspnd.msb.ongoing, 0);
@@ -2242,7 +2596,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
if (no_schedulers_online < no_schedulers) {
if (erts_common_run_queue) {
for (ix = no_schedulers_online; ix < no_schedulers; ix++)
- erts_smp_atomic_set(&(ERTS_SCHEDULER_IX(ix)->suspended), 1);
+ erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ERTS_SSI_FLG_SUSPENDED);
}
else {
for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++)
@@ -2253,7 +2608,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
schdlr_sspnd.wait_curr_online = no_schedulers_online;
schdlr_sspnd.curr_online *= 2; /* Boot strapping... */
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
erts_smp_atomic_init(&doing_sys_schedule, 0);
@@ -2401,13 +2757,115 @@ susp_sched_resume_block(void *unused)
}
static void
+scheduler_ix_resume_wake(Uint ix)
+{
+ ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+ long xflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long oflgs;
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, 0, xflgs);
+ if (oflgs == xflgs) {
+ erts_sched_finish_poke(ssi, oflgs);
+ break;
+ }
+ xflgs = oflgs;
+ } while (oflgs & ERTS_SSI_FLG_SUSPENDED);
+}
+
+static long
+sched_prep_spin_suspended(ErtsSchedulerSleepInfo *ssi, long xpct)
+{
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long xflgs = xpct;
+
+ do {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ xflgs = oflgs;
+ } while (oflgs & ERTS_SSI_FLG_SUSPENDED);
+
+ return oflgs;
+}
+
+static long
+sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount)
+{
+ int until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ int sc = spincount;
+ long flgs;
+
+ do {
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if ((flgs & (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED))
+ != (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ break;
+ }
+ ERTS_SPIN_BODY;
+ if (--until_yield == 0) {
+ until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD;
+ erts_thr_yield();
+ }
+ } while (--sc > 0);
+ return flgs;
+}
+
+static long
+sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi)
+{
+ long oflgs;
+ long nflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+ long xflgs = (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED);
+
+ erts_tse_reset(ssi->event);
+
+ while (1) {
+ oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs);
+ if (oflgs == xflgs)
+ return nflgs;
+ if ((oflgs & (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED))
+ != (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ return oflgs;
+ }
+ xflgs = oflgs;
+ }
+}
+
+static void
suspend_scheduler(ErtsSchedulerData *esdp)
{
+ long flgs;
+ int changing;
long no = (long) esdp->no;
ErtsRunQueue *rq = esdp->run_queue;
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
long active_schedulers;
int curr_online = 1;
int wake = 0;
+ int reset_read_group = 0;
+#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
+ long aux_work;
+#endif
/*
* Schedulers may be suspended in two different ways:
@@ -2429,113 +2887,151 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (scheduler2cpu_map[esdp->no].bound_id >= 0
&& erts_unbind_from_cpu(erts_cpuinfo) == 0) {
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+ reset_read_group = 1;
}
erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
+ if (reset_read_group)
+ erts_smp_rwmtx_set_reader_group(0);
+
+ if (esdp->no <= erts_max_main_threads)
+ erts_thr_set_main_status(0, 0);
+
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_inactive);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active);
- ASSERT(active_schedulers >= 1);
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED) {
- if (active_schedulers == schdlr_sspnd.msb.wait_active)
- wake = 1;
- if (active_schedulers == 1)
- schdlr_sspnd.changing = 0;
- }
+ flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED);
+ if (flgs & ERTS_SSI_FLG_SUSPENDED) {
- while (1) {
-
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- int check_children;
- erts_smp_runq_lock(esdp->run_queue);
- check_children = esdp->check_children;
- esdp->check_children = 0;
- erts_smp_runq_unlock(esdp->run_queue);
- if (check_children) {
- erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- erts_check_children();
- erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active);
+ ASSERT(active_schedulers >= 1);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) {
+ if (active_schedulers == schdlr_sspnd.msb.wait_active)
+ wake = 1;
+ if (active_schedulers == 1) {
+ changing = erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_MSB);
+ changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB;
+ }
}
-#endif
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) {
- int changed = 0;
- if (no > schdlr_sspnd.online && curr_online) {
- schdlr_sspnd.curr_online--;
- curr_online = 0;
- changed = 1;
+ while (1) {
+ if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) {
+ int changed = 0;
+ if (no > schdlr_sspnd.online && curr_online) {
+ schdlr_sspnd.curr_online--;
+ curr_online = 0;
+ changed = 1;
+ }
+ else if (no <= schdlr_sspnd.online && !curr_online) {
+ schdlr_sspnd.curr_online++;
+ curr_online = 1;
+ changed = 1;
+ }
+ if (changed
+ && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
+ wake = 1;
+ if (schdlr_sspnd.online == schdlr_sspnd.curr_online) {
+ changing = erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_ONLN);
+ changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN;
+ }
}
- else if (no <= schdlr_sspnd.online && !curr_online) {
- schdlr_sspnd.curr_online++;
- curr_online = 1;
- changed = 1;
+
+ if (wake) {
+ erts_smp_cnd_signal(&schdlr_sspnd.cnd);
+ wake = 0;
}
- if (changed
- && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
- wake = 1;
- if (schdlr_sspnd.online == schdlr_sspnd.curr_online)
- schdlr_sspnd.changing = 0;
- }
- if (wake) {
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- wake = 0;
- }
+ flgs = erts_smp_atomic_read(&ssi->flags);
+ if (!(flgs & ERTS_SSI_FLG_SUSPENDED))
+ break;
+ erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
- if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ|ERTS_RUNQ_FLG_SUSPENDED)))
- break;
- if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- && !erts_smp_atomic_read(&esdp->suspended))
- break;
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ blockable_aux_work:
+ blockable_aux_work(esdp, ssi, aux_work);
+#endif
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- while (1) {
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ while (1) {
+ long flgs;
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+#endif
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- if (esdp->blocked_check_children)
- break;
+ flgs = sched_spin_suspended(ssi, ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT);
+ if (flgs == (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ flgs = sched_set_suspended_sleeptype(ssi);
+ if (flgs == (ERTS_SSI_FLG_SLEEPING
+ | ERTS_SSI_FLG_TSE_SLEEPING
+ | ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED)) {
+ int res;
+ do {
+ res = erts_tse_wait(ssi->event);
+ } while (res == EINTR);
+ }
+ }
+
+ flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING
+ | ERTS_SSI_FLG_SUSPENDED));
+ if (!(flgs & ERTS_SSI_FLG_SUSPENDED))
+ break;
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)
+ break;
+
+
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ goto blockable_aux_work;
+ }
#endif
- erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
+ }
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE)
- break;
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
- if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ
- | ERTS_RUNQ_FLG_SUSPENDED)))
- break;
- if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
- && !erts_smp_atomic_read(&esdp->suspended))
- break;
+ erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- esdp->blocked_check_children = 0;
-#endif
+ active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active);
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB)
+ && schdlr_sspnd.online == active_schedulers) {
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_MSB);
+ }
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- }
+ ASSERT(no <= schdlr_sspnd.online);
+ ASSERT(!erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing));
- active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active);
- if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED
- && schdlr_sspnd.online == active_schedulers) {
- schdlr_sspnd.changing = 0;
}
+
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+ ASSERT(curr_online);
+
if (erts_system_profile_flags.scheduler)
profile_scheduler(make_small(esdp->no), am_active);
+ if (esdp->no <= erts_max_main_threads)
+ erts_thr_set_main_status(1, (int) esdp->no);
+
erts_smp_runq_lock(esdp->run_queue);
non_empty_runq(esdp->run_queue);
@@ -2600,8 +3096,10 @@ erts_schedulers_state(Uint *total,
int yield_allowed)
{
int res;
+ long changing;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- if (yield_allowed && schdlr_sspnd.changing)
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER))
res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
else {
*active = *online = schdlr_sspnd.online;
@@ -2621,6 +3119,7 @@ erts_set_schedulers_online(Process *p,
Sint *old_no)
{
int ix, res, no, have_unlocked_plocks;
+ long changing;
if (new_no < 1 || erts_no_schedulers < new_no)
return ERTS_SCHDLR_SSPND_EINVAL;
@@ -2630,7 +3129,8 @@ erts_set_schedulers_online(Process *p,
have_unlocked_plocks = 0;
no = (int) new_no;
- if (schdlr_sspnd.changing) {
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing) {
res = ERTS_SCHDLR_SSPND_YIELD_RESTART;
}
else {
@@ -2639,17 +3139,19 @@ erts_set_schedulers_online(Process *p,
res = ERTS_SCHDLR_SSPND_DONE;
}
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
schdlr_sspnd.online = no;
if (no > online) {
int ix;
schdlr_sspnd.wait_curr_online = no;
- if (ongoing_multi_scheduling_block())
- /* No schedulers to resume */;
+ if (ongoing_multi_scheduling_block()) {
+ for (ix = online; ix < no; ix++)
+ erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix));
+ }
else if (erts_common_run_queue) {
for (ix = online; ix < no; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended,
- 0);
+ scheduler_ix_resume_wake(ix);
}
else {
if (plocks) {
@@ -2663,6 +3165,7 @@ erts_set_schedulers_online(Process *p,
erts_smp_runq_lock(rq);
ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x5);
erts_smp_runq_unlock(rq);
+ scheduler_ix_resume_wake(ix);
}
/*
* Spread evacuation paths among all online
@@ -2677,7 +3180,6 @@ erts_set_schedulers_online(Process *p,
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
}
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
res = ERTS_SCHDLR_SSPND_DONE;
}
else /* if (no < online) */ {
@@ -2694,12 +3196,17 @@ erts_set_schedulers_online(Process *p,
schdlr_sspnd.wait_curr_online = no+1;
}
- if (ongoing_multi_scheduling_block())
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
- else if (erts_common_run_queue) {
+ if (ongoing_multi_scheduling_block()) {
for (ix = no; ix < online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended,
- 1);
+ erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix));
+ }
+ else if (erts_common_run_queue) {
+ for (ix = no; ix < online; ix++) {
+ ErtsSchedulerSleepInfo *ssi;
+ ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+ erts_smp_atomic_bor(&ssi->flags,
+ ERTS_SSI_FLG_SUSPENDED);
+ }
wake_all_schedulers();
}
else {
@@ -2726,7 +3233,10 @@ erts_set_schedulers_online(Process *p,
erts_smp_atomic_set(&balance_info.used_runqs, no);
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- ERTS_FOREACH_OP_RUNQ(rq, wake_scheduler(rq, 0));
+ for (ix = no; ix < online; ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+ wake_scheduler(rq, 0, 1);
+ }
}
}
@@ -2740,6 +3250,12 @@ erts_set_schedulers_online(Process *p,
susp_sched_prep_block,
susp_sched_resume_block,
NULL);
+ ASSERT(res != ERTS_SCHDLR_SSPND_DONE
+ ? (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ & erts_smp_atomic_read(&schdlr_sspnd.changing))
+ : (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ == erts_smp_atomic_read(&schdlr_sspnd.changing)));
+ erts_smp_atomic_band(&schdlr_sspnd.changing, ~ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
}
@@ -2754,15 +3270,16 @@ ErtsSchedSuspendResult
erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
{
int ix, res, have_unlocked_plocks = 0;
+ long changing;
ErtsProcList *plp;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
-
- if (schdlr_sspnd.changing) {
+ changing = erts_smp_atomic_read(&schdlr_sspnd.changing);
+ if (changing) {
res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */
}
else if (on) { /* ------ BLOCK ------ */
- if (erts_is_multi_scheduling_blocked()) {
+ if (schdlr_sspnd.msb.procs) {
plp = proclist_create(p);
plp->next = schdlr_sspnd.msb.procs;
schdlr_sspnd.msb.procs = plp;
@@ -2772,19 +3289,22 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
}
else {
+ int online = schdlr_sspnd.online;
p->flags |= F_HAVE_BLCKD_MSCHED;
if (plocks) {
have_unlocked_plocks = 1;
erts_smp_proc_unlock(p, plocks);
}
+ ASSERT(0 == erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing));
erts_smp_atomic_set(&schdlr_sspnd.msb.ongoing, 1);
- if (schdlr_sspnd.online == 1) {
+ if (online == 1) {
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
ASSERT(p->scheduler_data->no == 1);
}
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED;
+ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB
+ | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0);
if (p->scheduler_data->no == 1) {
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
schdlr_sspnd.msb.wait_active = 1;
@@ -2798,17 +3318,19 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
schdlr_sspnd.msb.wait_active = 2;
}
if (erts_common_run_queue) {
- for (ix = 1; ix < schdlr_sspnd.online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 1);
+ for (ix = 1; ix < online; ix++)
+ erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ERTS_SSI_FLG_SUSPENDED);
wake_all_schedulers();
}
else {
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
erts_smp_mtx_lock(&balance_info.update_mtx);
erts_smp_atomic_set(&balance_info.used_runqs, 1);
- for (ix = 0; ix < schdlr_sspnd.online; ix++) {
+ for (ix = 0; ix < online; ix++) {
ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
erts_smp_runq_lock(rq);
+ ASSERT(!(rq->flags & ERTS_RUNQ_FLG_SUSPENDED));
ERTS_RUNQ_RESET_MIGRATION_PATHS(rq, 0x7);
erts_smp_runq_unlock(rq);
}
@@ -2833,6 +3355,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
susp_sched_prep_block,
susp_sched_resume_block,
NULL);
+ ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED
+ ? (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ & erts_smp_atomic_read(&schdlr_sspnd.changing))
+ : (ERTS_SCHDLR_SSPND_CHNG_WAITER
+ == erts_smp_atomic_read(&schdlr_sspnd.changing)));
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
plp = proclist_create(p);
plp->next = schdlr_sspnd.msb.procs;
@@ -2876,7 +3405,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
if (schdlr_sspnd.msb.procs)
res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED;
else {
- schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED;
+ ERTS_SCHDLR_SSPND_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0);
#ifdef DEBUG
ERTS_FOREACH_RUNQ(rq,
{
@@ -2903,13 +3432,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
if (schdlr_sspnd.online == 1) {
/* No schedulers to resume */
ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
- schdlr_sspnd.changing = 0;
+ ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB);
}
else if (erts_common_run_queue) {
for (ix = 1; ix < schdlr_sspnd.online; ix++)
- erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 0);
+ erts_smp_atomic_band(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags,
+ ~ERTS_SSI_FLG_SUSPENDED);
wake_all_schedulers();
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
}
else {
int online = schdlr_sspnd.online;
@@ -2926,6 +3455,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
erts_smp_runq_lock(rq);
ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x4);
erts_smp_runq_unlock(rq);
+ scheduler_ix_resume_wake(ix);
}
/* Spread evacuation paths among all online run queues */
@@ -2941,7 +3471,6 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all)
erts_smp_runq_unlock(ERTS_RUNQ_IX(0));
erts_smp_mtx_unlock(&balance_info.update_mtx);
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
}
res = ERTS_SCHDLR_SSPND_DONE;
}
@@ -2968,8 +3497,11 @@ erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value)
int
erts_is_multi_scheduling_blocked(void)
{
- return (erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing)
- && erts_smp_atomic_read(&schdlr_sspnd.active) == 1);
+ int res;
+ erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+ res = schdlr_sspnd.msb.procs != NULL;
+ erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+ return res;
}
Eterm
@@ -2978,7 +3510,7 @@ erts_multi_scheduling_blockers(Process *p)
Eterm res = NIL;
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- if (erts_is_multi_scheduling_blocked()) {
+ if (schdlr_sspnd.msb.procs) {
Eterm *hp, *hp_end;
ErtsProcList *plp1, *plp2;
Uint max_size;
@@ -3010,18 +3542,34 @@ erts_multi_scheduling_blockers(Process *p)
static void *
sched_thread_func(void *vesdp)
{
+#ifdef ERTS_SMP
+ Uint no = ((ErtsSchedulerData *) vesdp)->no;
+#endif
#ifdef ERTS_ENABLE_LOCK_CHECK
{
char buf[31];
- Uint no = ((ErtsSchedulerData *) vesdp)->no;
erts_snprintf(&buf[0], 31, "scheduler %bpu", no);
erts_lc_set_thread_name(&buf[0]);
}
#endif
- erts_alloc_reg_scheduler_id(((ErtsSchedulerData *) vesdp)->no);
+ erts_alloc_reg_scheduler_id(no);
erts_tsd_set(sched_data_key, vesdp);
#ifdef ERTS_SMP
+
+ if (no <= erts_max_main_threads) {
+ erts_thr_set_main_status(1, (int) no);
+ if (erts_reader_groups) {
+ int rg = (int) no;
+ if (rg > erts_reader_groups)
+ rg = (((int) no) - 1) % erts_reader_groups + 1;
+ erts_smp_rwmtx_set_reader_group(rg);
+ }
+ }
+
erts_proc_lock_prepare_proc_lock_waiter();
+ ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
+
+
#endif
erts_register_blockable_thread();
#ifdef HIPE
@@ -3030,30 +3578,30 @@ sched_thread_func(void *vesdp)
erts_thread_init_float();
erts_smp_mtx_lock(&schdlr_sspnd.mtx);
- ASSERT(schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE);
+ ASSERT(erts_smp_atomic_read(&schdlr_sspnd.changing)
+ & ERTS_SCHDLR_SSPND_CHNG_ONLN);
- schdlr_sspnd.curr_online--;
+ if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) {
+ erts_smp_atomic_band(&schdlr_sspnd.changing,
+ ~ERTS_SCHDLR_SSPND_CHNG_ONLN);
+ if (((ErtsSchedulerData *) vesdp)->no != 1)
+ erts_smp_cnd_signal(&schdlr_sspnd.cnd);
+ }
- if (((ErtsSchedulerData *) vesdp)->no != 1) {
- if (schdlr_sspnd.online == schdlr_sspnd.curr_online) {
- schdlr_sspnd.changing = 0;
- erts_smp_cnd_broadcast(&schdlr_sspnd.cnd);
+ if (((ErtsSchedulerData *) vesdp)->no == 1) {
+ if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) {
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
+ susp_sched_prep_block,
+ susp_sched_resume_block,
+ NULL);
+ while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online)
+ erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
+ susp_sched_prep_block,
+ susp_sched_resume_block,
+ NULL);
}
- }
- else if (schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online)
- schdlr_sspnd.changing = 0;
- else {
- erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online)
- erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
- erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
- susp_sched_prep_block,
- susp_sched_resume_block,
- NULL);
- ASSERT(!schdlr_sspnd.changing);
+ ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER);
}
erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
@@ -3089,11 +3637,7 @@ erts_start_schedulers(void)
ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual);
actual++;
ASSERT(actual == esdp->no);
-#ifdef ERTS_ENABLE_LOCK_COUNT
- res = erts_lcnt_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts);
-#else
res = ethr_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts);
-#endif
if (res != 0) {
actual--;
break;
@@ -3407,6 +3951,7 @@ processor_order_cmp(const void *vx, const void *vy)
static void
check_cpu_bind(ErtsSchedulerData *esdp)
{
+ int rg = 0;
int res;
int cpu_id;
erts_smp_runq_unlock(esdp->run_queue);
@@ -3425,19 +3970,25 @@ check_cpu_bind(ErtsSchedulerData *esdp)
goto unbind;
}
}
- else if (cpu_id < 0 && scheduler2cpu_map[esdp->no].bound_id >= 0) {
+ else if (cpu_id < 0) {
unbind:
/* Get rid of old binding */
res = erts_unbind_from_cpu(erts_cpuinfo);
if (res == 0)
esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
- else {
+ else if (res != -ENOTSUP) {
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
(int) esdp->no, cpu_id, erl_errno_id(-res));
erts_send_error_to_logger_nogl(dsbufp);
}
}
+ if (erts_reader_groups) {
+ if (esdp->cpu_id >= 0)
+ rg = reader_group_lookup(esdp->cpu_id);
+ else
+ rg = (((int) esdp->no) - 1) % erts_reader_groups + 1;
+ }
erts_smp_runq_lock(esdp->run_queue);
#ifdef ERTS_SMP
if (erts_common_run_queue)
@@ -3448,6 +3999,8 @@ check_cpu_bind(ErtsSchedulerData *esdp)
#endif
erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
+ if (erts_reader_groups)
+ erts_smp_rwmtx_set_reader_group(rg);
}
static void
@@ -3476,11 +4029,13 @@ signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
wake_all_schedulers();
}
else {
- ERTS_FOREACH_RUNQ(rq,
- {
+ for (s_ix = 0; s_ix < erts_no_run_queues; s_ix++) {
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(s_ix);
+ erts_smp_runq_lock(rq);
rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
- wake_scheduler(rq, 0);
- });
+ erts_smp_runq_unlock(rq);
+ wake_scheduler(rq, 0, 1);
+ };
}
#else
check_cpu_bind(erts_get_scheduler_data());
@@ -3496,14 +4051,15 @@ erts_init_scheduler_bind_type(char *how)
if (!system_cpudata && !user_cpudata)
return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
- if (sys_strcmp(how, "s") == 0)
+ if (sys_strcmp(how, "db") == 0)
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (sys_strcmp(how, "s") == 0)
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (sys_strcmp(how, "ps") == 0)
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (sys_strcmp(how, "ts") == 0)
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (sys_strcmp(how, "db") == 0
- || sys_strcmp(how, "tnnps") == 0)
+ else if (sys_strcmp(how, "tnnps") == 0)
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (sys_strcmp(how, "nnps") == 0)
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
@@ -3519,6 +4075,490 @@ erts_init_scheduler_bind_type(char *how)
return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
}
+/*
+ * reader groups map
+ */
+
+typedef struct {
+ int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
+} erts_avail_cput;
+
+typedef struct {
+ int *map;
+ int size;
+ int groups;
+} erts_reader_groups_map_test;
+
+typedef struct {
+ int id;
+ int sub_levels;
+ int reader_groups;
+} erts_rg_count_t;
+
+typedef struct {
+ int logical;
+ int reader_group;
+} erts_reader_groups_map_t;
+
+typedef struct {
+ erts_reader_groups_map_t *map;
+ int map_size;
+ int logical_processors;
+ int groups;
+} erts_make_reader_groups_map_test;
+
+static int reader_groups_available_cpu_check;
+static int reader_groups_logical_processors;
+static int reader_groups_map_size;
+static erts_reader_groups_map_t *reader_groups_map;
+
+#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
+
+static void
+make_reader_groups_map(erts_make_reader_groups_map_test *test);
+
+static Eterm
+get_reader_groups_map(Process *c_p,
+ erts_reader_groups_map_t *map,
+ int map_size,
+ int logical_processors)
+{
+#ifdef DEBUG
+ Eterm *endp;
+#endif
+ Eterm res = NIL, tuple;
+ Eterm *hp;
+ int i;
+
+ hp = HAlloc(c_p, logical_processors*(2+3));
+#ifdef DEBUG
+ endp = hp + logical_processors*(2+3);
+#endif
+ for (i = map_size - 1; i >= 0; i--) {
+ if (map[i].logical >= 0) {
+ tuple = TUPLE2(hp,
+ make_small(map[i].logical),
+ make_small(map[i].reader_group));
+ hp += 3;
+ res = CONS(hp, tuple, res);
+ hp += 2;
+ }
+ }
+ ASSERT(hp == endp);
+ return res;
+}
+
+Eterm
+erts_debug_reader_groups_map(Process *c_p, int groups)
+{
+ Eterm res;
+ erts_make_reader_groups_map_test test;
+
+ test.groups = groups;
+ make_reader_groups_map(&test);
+ if (!test.map)
+ res = NIL;
+ else {
+ res = get_reader_groups_map(c_p,
+ test.map,
+ test.map_size,
+ test.logical_processors);
+ erts_free(ERTS_ALC_T_TMP, test.map);
+ }
+ return res;
+}
+
+
+Eterm
+erts_get_reader_groups_map(Process *c_p)
+{
+ Eterm res;
+ erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
+ res = get_reader_groups_map(c_p,
+ reader_groups_map,
+ reader_groups_map_size,
+ reader_groups_logical_processors);
+ erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+ return res;
+}
+
+static void
+make_available_cpu_topology(erts_avail_cput *no,
+ erts_avail_cput *avail,
+ erts_cpu_topology_t *cpudata,
+ int *size,
+ int test)
+{
+ int len = *size;
+ erts_cpu_topology_t last;
+ int a, i, j;
+
+ no->level[ERTS_TOPOLOGY_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
+ no->level[ERTS_TOPOLOGY_CORE] = -1;
+ no->level[ERTS_TOPOLOGY_THREAD] = -1;
+ no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
+
+ last.node = INT_MIN;
+ last.processor = INT_MIN;
+ last.processor_node = INT_MIN;
+ last.core = INT_MIN;
+ last.thread = INT_MIN;
+ last.logical = INT_MIN;
+
+ a = 0;
+
+ for (i = 0; i < len; i++) {
+
+ if (!test && !erts_is_cpu_available(erts_cpuinfo, cpudata[i].logical))
+ continue;
+
+ if (last.node != cpudata[i].node)
+ goto node;
+ if (last.processor != cpudata[i].processor)
+ goto processor;
+ if (last.processor_node != cpudata[i].processor_node)
+ goto processor_node;
+ if (last.core != cpudata[i].core)
+ goto core;
+ ASSERT(last.thread != cpudata[i].thread);
+ goto thread;
+
+ node:
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ processor:
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ processor_node:
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ core:
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ thread:
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
+ avail[a].level[j] = no->level[j];
+
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
+ avail[a].level[ERTS_TOPOLOGY_RG] = 0;
+
+ ASSERT(last.logical != cpudata[a].logical);
+
+ last = cpudata[i];
+ a++;
+ }
+
+ no->level[ERTS_TOPOLOGY_NODE]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+ no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+ no->level[ERTS_TOPOLOGY_CORE]++;
+ no->level[ERTS_TOPOLOGY_THREAD]++;
+ no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+ *size = a;
+}
+
+static int
+reader_group_lookup(int logical)
+{
+ int start = logical % reader_groups_map_size;
+ int ix = start;
+
+ do {
+ if (reader_groups_map[ix].logical == logical) {
+ ASSERT(reader_groups_map[ix].reader_group > 0);
+ return reader_groups_map[ix].reader_group;
+ }
+ ix++;
+ if (ix == reader_groups_map_size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+reader_group_insert(erts_reader_groups_map_t *map, int map_size,
+ int logical, int reader_group)
+{
+ int start = logical % map_size;
+ int ix = start;
+
+ do {
+ if (map[ix].logical < 0) {
+ map[ix].logical = logical;
+ map[ix].reader_group = reader_group;
+ return;
+ }
+ ix++;
+ if (ix == map_size)
+ ix = 0;
+ } while (ix != start);
+
+ erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
+}
+
+
+static int
+sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
+{
+ int sub_level = level+1;
+ int last = -1;
+ rgc->sub_levels = 0;
+
+ do {
+ if (last != avail[aix].level[sub_level]) {
+ rgc->sub_levels++;
+ last = avail[aix].level[sub_level];
+ }
+ aix++;
+ }
+ while (aix < avail_sz && rgc->id == avail[aix].level[level]);
+ rgc->reader_groups = 0;
+ return aix;
+}
+
+static int
+write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
+ int level, int a,
+ int avail_sz, erts_avail_cput *avail)
+{
+ int rg = *rgp;
+ int sub_level = level+1;
+ int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
+ int xsl = rgcp->sub_levels % rgcp->reader_groups;
+ int sls = 0;
+ int last = -1;
+ int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
+
+ ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
+
+ do {
+ if (last != avail[a].level[sub_level]) {
+ if (!sls) {
+ sls = sl_per_gr;
+ rg++;
+ if (rg >= xsl_rg_lim)
+ sls++;
+ }
+ last = avail[a].level[sub_level];
+ sls--;
+ }
+ avail[a].level[ERTS_TOPOLOGY_RG] = rg;
+ a++;
+ } while (a < avail_sz && (level < 0
+ || avail[a].level[level] == rgcp->id));
+
+ ASSERT(rgcp->reader_groups == rg - *rgp);
+
+ *rgp = rg;
+
+ return a;
+}
+
+static int
+rg_count_sub_levels_compare(const void *vx, const void *vy)
+{
+ erts_rg_count_t *x = (erts_rg_count_t *) vx;
+ erts_rg_count_t *y = (erts_rg_count_t *) vy;
+ if (x->sub_levels != y->sub_levels)
+ return y->sub_levels - x->sub_levels;
+ return x->id - y->id;
+}
+
+static int
+rg_count_id_compare(const void *vx, const void *vy)
+{
+ erts_rg_count_t *x = (erts_rg_count_t *) vx;
+ erts_rg_count_t *y = (erts_rg_count_t *) vy;
+ return x->id - y->id;
+}
+
+static void
+make_reader_groups_map(erts_make_reader_groups_map_test *test)
+{
+ int i, spread_level, avail_sz;
+ erts_avail_cput no, *avail;
+ erts_cpu_topology_t *cpudata;
+ erts_reader_groups_map_t *map;
+ int map_sz;
+ int groups = erts_reader_groups;
+
+ if (test) {
+ test->map = NULL;
+ test->map_size = 0;
+ groups = test->groups;
+ }
+
+ if (!groups)
+ return;
+
+ if (!test) {
+ if (reader_groups_map)
+ erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
+
+ reader_groups_logical_processors = 0;
+ reader_groups_map_size = 0;
+ reader_groups_map = NULL;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
+
+ if (!cpudata)
+ return;
+
+ cpu_bind_order_sort(cpudata,
+ avail_sz,
+ ERTS_CPU_BIND_NO_SPREAD,
+ 1);
+
+ avail = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(erts_avail_cput)*avail_sz);
+
+ make_available_cpu_topology(&no, avail, cpudata,
+ &avail_sz, test != NULL);
+
+ destroy_tmp_cpu_topology_copy(cpudata);
+
+ map_sz = avail_sz*2+1;
+
+ if (test) {
+ map = erts_alloc(ERTS_ALC_T_TMP,
+ (sizeof(erts_reader_groups_map_t)
+ * map_sz));
+ test->map = map;
+ test->map_size = map_sz;
+ test->logical_processors = avail_sz;
+ }
+ else {
+ map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
+ (sizeof(erts_reader_groups_map_t)
+ * map_sz));
+ reader_groups_map = map;
+ reader_groups_logical_processors = avail_sz;
+ reader_groups_map_size = map_sz;
+
+ }
+
+ for (i = 0; i < map_sz; i++) {
+ map[i].logical = -1;
+ map[i].reader_group = 0;
+ }
+
+ spread_level = ERTS_TOPOLOGY_CORE;
+ for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
+ if (no.level[i] > groups) {
+ spread_level = i;
+ break;
+ }
+ }
+
+ if (no.level[spread_level] <= groups) {
+ int a, rg, last = -1;
+ rg = 0;
+ ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
+ for (a = 0; a < avail_sz; a++) {
+ if (last != avail[a].level[spread_level]) {
+ rg++;
+ last = avail[a].level[spread_level];
+ }
+ reader_group_insert(map,
+ map_sz,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ rg);
+ }
+ }
+ else { /* groups < no.level[spread_level] */
+ erts_rg_count_t *rg_count;
+ int a, rg, tl, toplevels;
+
+ tl = spread_level-1;
+
+ if (spread_level == ERTS_TOPOLOGY_NODE)
+ toplevels = 1;
+ else
+ toplevels = no.level[tl];
+
+ rg_count = erts_alloc(ERTS_ALC_T_TMP,
+ toplevels*sizeof(erts_rg_count_t));
+
+ if (toplevels == 1) {
+ rg_count[0].id = 0;
+ rg_count[0].sub_levels = no.level[spread_level];
+ rg_count[0].reader_groups = groups;
+ }
+ else {
+ int rgs_per_tl, rgs;
+ rgs = groups;
+ rgs_per_tl = rgs / toplevels;
+
+ a = 0;
+ for (i = 0; i < toplevels; i++) {
+ rg_count[i].id = avail[a].level[tl];
+ a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
+ }
+
+ qsort(rg_count,
+ toplevels,
+ sizeof(erts_rg_count_t),
+ rg_count_sub_levels_compare);
+
+ for (i = 0; i < toplevels; i++) {
+ if (rg_count[i].sub_levels < rgs_per_tl) {
+ rg_count[i].reader_groups = rg_count[i].sub_levels;
+ rgs -= rg_count[i].sub_levels;
+ }
+ else {
+ rg_count[i].reader_groups = rgs_per_tl;
+ rgs -= rgs_per_tl;
+ }
+ }
+
+ while (rgs > 0) {
+ for (i = 0; i < toplevels; i++) {
+ if (rg_count[i].sub_levels == rg_count[i].reader_groups)
+ break;
+ else {
+ rg_count[i].reader_groups++;
+ if (--rgs == 0)
+ break;
+ }
+ }
+ }
+
+ qsort(rg_count,
+ toplevels,
+ sizeof(erts_rg_count_t),
+ rg_count_id_compare);
+ }
+
+ a = i = rg = 0;
+ while (a < avail_sz) {
+ a = write_reader_groups(&rg, &rg_count[i], tl,
+ a, avail_sz, avail);
+ i++;
+ }
+
+ ASSERT(groups == rg);
+
+ for (a = 0; a < avail_sz; a++)
+ reader_group_insert(map,
+ map_sz,
+ avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+ avail[a].level[ERTS_TOPOLOGY_RG]);
+
+ erts_free(ERTS_ALC_T_TMP, rg_count);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, avail);
+}
+
+/*
+ * CPU topology
+ */
+
typedef struct {
int *id;
int used;
@@ -3765,11 +4805,11 @@ verify_topology(erts_cpu_topology_t *cpudata, int size)
/* Verify logical ids */
logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
- for (i = 0; i < user_cpudata_size; i++)
- logical[i] = user_cpudata[i].logical;
+ for (i = 0; i < size; i++)
+ logical[i] = cpudata[i].logical;
- qsort(logical, user_cpudata_size, sizeof(int), int_cmp);
- for (i = 0; i < user_cpudata_size-1; i++) {
+ qsort(logical, size, sizeof(int), int_cmp);
+ for (i = 0; i < size-1; i++) {
if (logical[i] == logical[i+1]) {
erts_free(ERTS_ALC_T_TMP, logical);
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
@@ -3782,13 +4822,13 @@ verify_topology(erts_cpu_topology_t *cpudata, int size)
/* Verify unique entities */
- for (i = 1; i < user_cpudata_size; i++) {
- if (user_cpudata[i-1].processor == user_cpudata[i].processor
- && user_cpudata[i-1].node == user_cpudata[i].node
- && (user_cpudata[i-1].processor_node
- == user_cpudata[i].processor_node)
- && user_cpudata[i-1].core == user_cpudata[i].core
- && user_cpudata[i-1].thread == user_cpudata[i].thread) {
+ for (i = 1; i < size; i++) {
+ if (cpudata[i-1].processor == cpudata[i].processor
+ && cpudata[i-1].node == cpudata[i].node
+ && (cpudata[i-1].processor_node
+ == cpudata[i].processor_node)
+ && cpudata[i-1].core == cpudata[i].core
+ && cpudata[i-1].thread == cpudata[i].thread) {
return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
}
}
@@ -4032,6 +5072,8 @@ erts_set_cpu_topology(Process *c_p, Eterm term)
sizeof(erts_cpu_topology_t)*cpudata_size);
}
+ make_reader_groups_map(NULL);
+
signal_schedulers_bind_change(cpudata, cpudata_size);
done:
@@ -4146,14 +5188,15 @@ erts_bind_schedulers(Process *c_p, Eterm how)
old_cpu_bind_order = cpu_bind_order;
- if (ERTS_IS_ATOM_STR("spread", how))
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (ERTS_IS_ATOM_STR("processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_spread", how))
cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("default_bind", how)
- || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
@@ -4199,14 +5242,15 @@ erts_fake_scheduler_bindings(Process *p, Eterm how)
int cpudata_size;
Eterm res;
- if (ERTS_IS_ATOM_STR("spread", how))
+ if (ERTS_IS_ATOM_STR("default_bind", how))
+ fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+ else if (ERTS_IS_ATOM_STR("spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
else if (ERTS_IS_ATOM_STR("processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("thread_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
- else if (ERTS_IS_ATOM_STR("default_bind", how)
- || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+ else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
@@ -4431,7 +5475,12 @@ early_cpu_bind_init(void)
(sizeof(erts_cpu_topology_t)
* system_cpudata_size));
- cpu_bind_order = ERTS_CPU_BIND_NONE;
+ cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
+
+ reader_groups_available_cpu_check = 1;
+ reader_groups_logical_processors = 0;
+ reader_groups_map_size = 0;
+ reader_groups_map = NULL;
if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
@@ -4457,6 +5506,19 @@ late_cpu_bind_init(void)
scheduler2cpu_map[ix].bound_id = -1;
}
+ if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
+ int ncpus = erts_get_cpu_configured(erts_cpuinfo);
+ if (ncpus < 1 || erts_no_schedulers < ncpus)
+ cpu_bind_order = ERTS_CPU_BIND_NONE;
+ else
+ cpu_bind_order = ((system_cpudata || user_cpudata)
+ && (erts_bind_to_cpu(erts_cpuinfo, -1) != -ENOTSUP)
+ ? ERTS_CPU_BIND_DEFAULT_BIND
+ : ERTS_CPU_BIND_NONE);
+ }
+
+ make_reader_groups_map(NULL);
+
if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
erts_cpu_topology_t *cpudata;
int cpudata_size;
@@ -4467,6 +5529,39 @@ late_cpu_bind_init(void)
}
}
+int
+erts_update_cpu_info(void)
+{
+ int changed;
+ erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
+ changed = erts_cpu_info_update(erts_cpuinfo);
+ if (changed) {
+ erts_cpu_topology_t *cpudata;
+ int cpudata_size;
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+
+ system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
+ system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+ (sizeof(erts_cpu_topology_t)
+ * system_cpudata_size));
+
+ if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
+ || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
+ system_cpudata_size)) {
+ erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+ system_cpudata = NULL;
+ system_cpudata_size = 0;
+ }
+
+ create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+ ASSERT(cpudata);
+ signal_schedulers_bind_change(cpudata, cpudata_size);
+ destroy_tmp_cpu_topology_copy(cpudata);
+ }
+ erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
+ return changed;
+}
+
#ifdef ERTS_SMP
static void
@@ -4481,7 +5576,7 @@ add_pend_suspend(Process *suspendee,
sizeof(ErtsPendingSuspend));
psp->next = NULL;
#ifdef DEBUG
-#ifdef ARCH_64
+#if defined(ARCH_64) && !HALFWORD_HEAP
psp->end = (ErtsPendingSuspend *) 0xdeaddeaddeaddead;
#else
psp->end = (ErtsPendingSuspend *) 0xdeaddead;
@@ -5322,7 +6417,7 @@ dequeue_process(ErtsRunQueue *runq, Process *p)
}
/* schedule a process */
-static ERTS_INLINE void
+static ERTS_INLINE ErtsRunQueue *
internal_add_to_runq(ErtsRunQueue *runq, Process *p)
{
Uint32 prev_status = p->status;
@@ -5333,12 +6428,12 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p)
ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq));
if (p->status_flags & ERTS_PROC_SFLG_INRUNQ)
- return;
+ return NULL;
else if (p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) {
ASSERT(p->status != P_SUSPENDED);
ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p);
p->status_flags |= ERTS_PROC_SFLG_PENDADD2SCHEDQ;
- return;
+ return NULL;
}
ASSERT(!p->scheduler_data);
#endif
@@ -5377,20 +6472,23 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p)
profile_runnable_proc(p, am_active);
}
- smp_notify_inc_runq(add_runq);
-
if (add_runq != runq)
erts_smp_runq_unlock(add_runq);
+
+ return add_runq;
}
void
erts_add_to_runq(Process *p)
{
+ ErtsRunQueue *notify_runq;
ErtsRunQueue *runq = erts_get_runq_proc(p);
erts_smp_runq_lock(runq);
- internal_add_to_runq(runq, p);
+ notify_runq = internal_add_to_runq(runq, p);
erts_smp_runq_unlock(runq);
+ smp_notify_inc_runq(notify_runq);
+
}
/* Possibly remove a scheduled process we need to suspend */
@@ -5529,8 +6627,6 @@ erts_proc_migrate(Process *p, ErtsProcLocks *plcks,
p->run_queue = to_rq;
enqueue_process(to_rq, p);
- smp_notify_inc_runq(to_rq);
-
return ERTS_MIGRATE_SUCCESS;
}
#endif /* ERTS_SMP */
@@ -5727,30 +6823,6 @@ erts_set_process_priority(Process *p, Eterm new_value)
return old_value;
}
-#ifdef ERTS_SMP
-
-static ERTS_INLINE int
-prepare_for_sys_schedule(void)
-{
- while (!erts_port_task_have_outstanding_io_tasks()
- && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) {
- if (!erts_port_task_have_outstanding_io_tasks())
- return 1;
- erts_smp_atomic_set(&doing_sys_schedule, 0);
- }
- return 0;
-}
-
-#else
-
-static ERTS_INLINE int
-prepare_for_sys_schedule(void)
-{
- return !erts_port_task_have_outstanding_io_tasks();
-}
-
-#endif
-
/* note that P_RUNNING is only set so that we don't try to remove
** running processes from the schedule queue if they exit - a running
** process not being in the schedule queue!!
@@ -5839,6 +6911,9 @@ Process *schedule(Process *p, int calls)
}
if (IS_TRACED(p)) {
+ if (IS_TRACED_FL(p, F_TRACE_CALLS) && p->status != P_FREE) {
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_OUT);
+ }
switch (p->status) {
case P_EXITING:
if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT))
@@ -5882,8 +6957,11 @@ Process *schedule(Process *p, int calls)
p->status_flags &= ~ERTS_PROC_SFLG_RUNNING;
if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) {
+ ErtsRunQueue *notify_runq;
p->status_flags &= ~ERTS_PROC_SFLG_PENDADD2SCHEDQ;
- internal_add_to_runq(rq, p);
+ notify_runq = internal_add_to_runq(rq, p);
+ if (notify_runq != rq)
+ smp_notify_inc_runq(notify_runq);
}
#endif
@@ -5951,7 +7029,10 @@ Process *schedule(Process *p, int calls)
| ERTS_RUNQ_FLG_CHK_CPU_BIND
| ERTS_RUNQ_FLG_SUSPENDED)) {
if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED)
- || erts_smp_atomic_read(&esdp->suspended)) {
+ || (erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED)) {
+ ASSERT(erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED);
suspend_scheduler(esdp);
}
if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND)
@@ -5960,12 +7041,21 @@ Process *schedule(Process *p, int calls)
}
}
-#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
- if (esdp->check_children) {
- esdp->check_children = 0;
- erts_smp_runq_unlock(rq);
- erts_check_children();
- erts_smp_runq_lock(rq);
+#if defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) \
+ || defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK)
+ {
+ ErtsSchedulerSleepInfo *ssi = esdp->ssi;
+ long aux_work = erts_smp_atomic_read(&ssi->aux_work);
+ if (aux_work) {
+ erts_smp_runq_unlock(rq);
+#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+ aux_work = blockable_aux_work(esdp, ssi, aux_work);
+#endif
+#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
+ nonblockable_aux_work(esdp, ssi, aux_work);
+#endif
+ erts_smp_runq_lock(rq);
+ }
}
#endif
@@ -5997,7 +7087,10 @@ Process *schedule(Process *p, int calls)
if (rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ
| ERTS_RUNQ_FLG_SUSPENDED)) {
if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED)
- || erts_smp_atomic_read(&esdp->suspended)) {
+ || (erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED)) {
+ ASSERT(erts_smp_atomic_read(&esdp->ssi->flags)
+ & ERTS_SSI_FLG_SUSPENDED);
non_empty_runq(rq);
goto continue_check_activities_to_run;
}
@@ -6014,17 +7107,7 @@ Process *schedule(Process *p, int calls)
}
}
- if (prepare_for_sys_schedule()) {
- erts_smp_atomic_set(&function_calls, 0);
- fcalls = 0;
- sched_sys_wait(esdp->no, rq);
- erts_smp_atomic_set(&doing_sys_schedule, 0);
- }
- else {
- /* If all schedulers are waiting, one of them *should*
- be waiting in erl_sys_schedule() */
- sched_cnd_wait(esdp->no, rq);
- }
+ scheduler_wait(&fcalls, esdp, rq);
non_empty_runq(rq);
@@ -6086,7 +7169,7 @@ Process *schedule(Process *p, int calls)
else {
if (erts_common_run_queue) {
if (erts_common_run_queue->waiting)
- wake_one_scheduler();
+ wake_scheduler(erts_common_run_queue, 0, 1);
}
else if (erts_smp_atomic_read(&no_empty_run_queues) != 0) {
wake_scheduler_on_empty_runq(rq);
@@ -6231,10 +7314,10 @@ Process *schedule(Process *p, int calls)
erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS);
if (erts_sched_stat.enabled) {
- Uint old = ERTS_PROC_SCHED_ID(p,
+ UWord old = ERTS_PROC_SCHED_ID(p,
(ERTS_PROC_LOCK_MAIN
| ERTS_PROC_LOCK_STATUS),
- esdp->no);
+ (UWord) esdp->no);
int migrated = old && old != esdp->no;
erts_smp_spin_lock(&erts_sched_stat.lock);
@@ -6275,7 +7358,11 @@ Process *schedule(Process *p, int calls)
trace_virtual_sched(p, am_in);
break;
}
+ if (IS_TRACED_FL(p, F_TRACE_CALLS)) {
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_IN);
+ }
}
+
if (p->status != P_EXITING)
p->status = P_RUNNING;
@@ -6397,8 +7484,8 @@ erts_schedule_misc_op(void (*func)(void *), void *arg)
else
rq->misc.start = molp;
rq->misc.end = molp;
- smp_notify_inc_runq(rq);
erts_smp_runq_unlock(rq);
+ smp_notify_inc_runq(rq);
}
static void
@@ -6640,7 +7727,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
Eterm args, /* Arguments for function (must be well-formed list). */
ErlSpawnOpts* so) /* Options for spawn. */
{
- ErtsRunQueue *rq;
+ ErtsRunQueue *rq, *notify_runq;
Process *p;
Sint arity; /* Number of arguments. */
#ifndef HYBRID
@@ -6719,11 +7806,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
/*
* Must initialize binary lists here before copying binaries to process.
*/
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
heap_need +=
@@ -6757,13 +7840,14 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
p->bin_vheap_sz = p->min_vheap_size;
p->bin_old_vheap_sz = p->min_vheap_size;
p->bin_old_vheap = 0;
+ p->bin_vheap_mature = 0;
/* No need to initialize p->fcalls. */
p->current = p->initial+INITIAL_MOD;
- p->i = (Eterm *) beam_apply;
- p->cp = (Eterm *) beam_apply+1;
+ p->i = (BeamInstr *) beam_apply;
+ p->cp = (BeamInstr *) beam_apply+1;
p->arg_reg = p->def_arg_reg;
p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]);
@@ -6813,7 +7897,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
p->group_leader =
IS_CONST(parent->group_leader)
? parent->group_leader
- : STORE_NC(&p->htop, &p->off_heap.externals, parent->group_leader);
+ : STORE_NC(&p->htop, &p->off_heap, parent->group_leader);
}
erts_get_default_tracing(&p->trace_flags, &p->tracer_proc);
@@ -6957,10 +8041,12 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
#endif
p->status = P_WAITING;
- internal_add_to_runq(rq, p);
+ notify_runq = internal_add_to_runq(rq, p);
erts_smp_runq_unlock(rq);
+ smp_notify_inc_runq(notify_runq);
+
res = p->id;
erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL);
@@ -7007,6 +8093,7 @@ void erts_init_empty_process(Process *p)
p->bin_vheap_sz = BIN_VH_MIN_SIZE;
p->bin_old_vheap_sz = BIN_VH_MIN_SIZE;
p->bin_old_vheap = 0;
+ p->bin_vheap_mature = 0;
#ifdef ERTS_SMP
p->u.ptimer = NULL;
p->bound_runq = NULL;
@@ -7014,11 +8101,7 @@ void erts_init_empty_process(Process *p)
memset(&(p->u.tm), 0, sizeof(ErlTimer));
#endif
p->next = NULL;
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
p->reg = NULL;
p->heap_sz = 0;
@@ -7165,11 +8248,7 @@ erts_debug_verify_clean_empty_process(Process* p)
/* Thing that erts_cleanup_empty_process() cleans up */
- ASSERT(p->off_heap.mso == NULL);
-#ifndef HYBRID /* FIND ME! */
- ASSERT(p->off_heap.funs == NULL);
-#endif
- ASSERT(p->off_heap.externals == NULL);
+ ASSERT(p->off_heap.first == NULL);
ASSERT(p->off_heap.overhead == 0);
ASSERT(p->mbuf == NULL);
@@ -7180,25 +8259,16 @@ erts_debug_verify_clean_empty_process(Process* p)
void
erts_cleanup_empty_process(Process* p)
{
- ErlHeapFragment* mbufp;
-
/* We only check fields that are known to be used... */
erts_cleanup_offheap(&p->off_heap);
- p->off_heap.mso = NULL;
-#ifndef HYBRID /* FIND ME! */
- p->off_heap.funs = NULL;
-#endif
- p->off_heap.externals = NULL;
+ p->off_heap.first = NULL;
p->off_heap.overhead = 0;
- mbufp = p->mbuf;
- while (mbufp) {
- ErlHeapFragment *next = mbufp->next;
- free_message_buffer(mbufp);
- mbufp = next;
+ if (p->mbuf != NULL) {
+ free_message_buffer(p->mbuf);
+ p->mbuf = NULL;
}
- p->mbuf = NULL;
#if defined(ERTS_ENABLE_LOCK_COUNT) && defined(ERTS_SMP)
erts_lcnt_proc_lock_destroy(p);
#endif
@@ -7214,7 +8284,6 @@ static void
delete_process(Process* p)
{
ErlMessage* mp;
- ErlHeapFragment* bp;
VERBOSE(DEBUG_PROCESSES, ("Removing process: %T\n",p->id));
@@ -7230,7 +8299,7 @@ delete_process(Process* p)
* The mso list should not be used anymore, but if it is, make sure that
* we'll notice.
*/
- p->off_heap.mso = (void *) 0x8DEFFACD;
+ p->off_heap.first = (void *) 0x8DEFFACD;
if (p->arg_reg != p->def_arg_reg) {
erts_free(ERTS_ALC_T_ARG_REG, p->arg_reg);
@@ -7264,11 +8333,8 @@ delete_process(Process* p)
/*
* Free all pending message buffers.
*/
- bp = p->mbuf;
- while (bp != NULL) {
- ErlHeapFragment* next_bp = bp->next;
- free_message_buffer(bp);
- bp = next_bp;
+ if (p->mbuf != NULL) {
+ free_message_buffer(p->mbuf);
}
erts_erase_dicts(p);
@@ -7348,7 +8414,7 @@ set_proc_exiting(Process *p, Eterm reason, ErlHeapFragment *bp)
p->freason = EXTAG_EXIT;
KILL_CATCHES(p);
cancel_timer(p);
- p->i = (Eterm *) beam_exit;
+ p->i = (BeamInstr *) beam_exit;
}
@@ -7778,9 +8844,10 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext)
erts_port_release(prt);
} else if (is_internal_pid(mon->pid)) {/* local by name or pid */
Eterm watched;
- Eterm lhp[3];
+ DeclareTmpHeapNoproc(lhp,3);
ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK
| ERTS_PROC_LOCKS_MSG_SEND);
+ UseTmpHeapNoproc(3);
rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks);
if (rp == NULL) {
goto done;
@@ -7795,6 +8862,7 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext)
erts_queue_monitor_message(rp, &rp_locks, mon->ref, am_process,
watched, pcontext->reason);
}
+ UnUseTmpHeapNoproc(3);
/* else: demonitor while we exited, i.e. do nothing... */
erts_smp_proc_unlock(rp, rp_locks);
} else { /* external by pid or name */
@@ -8025,8 +9093,13 @@ erts_do_exit_process(Process* p, Eterm reason)
ERTS_SMP_MSGQ_MV_INQ2PRIVQ(p);
#endif
- if (IS_TRACED_FL(p,F_TRACE_PROCS))
- trace_proc(p, p, am_exit, reason);
+ if (IS_TRACED(p)) {
+ if (IS_TRACED_FL(p, F_TRACE_CALLS))
+ erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_EXITING);
+
+ if (IS_TRACED_FL(p,F_TRACE_PROCS))
+ trace_proc(p, p, am_exit, reason);
+ }
erts_trace_check_exiting(p->id);
@@ -8075,6 +9148,8 @@ continue_exit_process(Process *p
Eterm reason = p->fvalue;
DistEntry *dep;
struct saved_calls *scb;
+ process_breakpoint_time_t *pbt;
+
#ifdef DEBUG
int yield_allowed = 1;
#endif
@@ -8214,6 +9289,7 @@ continue_exit_process(Process *p
? ERTS_PROC_SET_DIST_ENTRY(p, ERTS_PROC_LOCKS_ALL, NULL)
: NULL);
scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, ERTS_PROC_LOCKS_ALL, NULL);
+ pbt = ERTS_PROC_SET_CALL_TIME(p, ERTS_PROC_LOCKS_ALL, NULL);
erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL);
processes_busy--;
@@ -8228,11 +9304,12 @@ continue_exit_process(Process *p
* Pre-build the EXIT tuple if there are any links.
*/
if (lnk) {
- Eterm tmp_heap[4];
+ DeclareTmpHeap(tmp_heap,4,p);
Eterm exit_tuple;
Uint exit_tuple_sz;
Eterm* hp;
+ UseTmpHeap(4,p);
hp = &tmp_heap[0];
exit_tuple = TUPLE3(hp, am_EXIT, p->id, reason);
@@ -8243,16 +9320,21 @@ continue_exit_process(Process *p
ExitLinkContext context = {p, reason, exit_tuple, exit_tuple_sz};
erts_sweep_links(lnk, &doit_exit_link, &context);
}
+ UnUseTmpHeap(4,p);
}
{
ExitMonitorContext context = {reason, p};
- erts_sweep_monitors(mon,&doit_exit_monitor,&context);
+ erts_sweep_monitors(mon,&doit_exit_monitor,&context); /* Allocates TmpHeap, but we
+ have none here */
}
if (scb)
erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb);
+ if (pbt)
+ erts_free(ERTS_ALC_T_BPD, (void *) pbt);
+
delete_process(p);
erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN);
@@ -8271,7 +9353,7 @@ continue_exit_process(Process *p
ASSERT(p->status == P_EXITING);
- p->i = (Eterm *) beam_continue_exit;
+ p->i = (BeamInstr *) beam_continue_exit;
if (!(curr_locks & ERTS_PROC_LOCK_STATUS)) {
erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS);
@@ -8291,7 +9373,7 @@ continue_exit_process(Process *p
static void
timeout_proc(Process* p)
{
- p->i = (Eterm *) p->def_arg_reg[0];
+ p->i = *((BeamInstr **) (UWord) p->def_arg_reg);
p->flags |= F_TIMO;
p->flags &= ~F_INSLPQUEUE;
@@ -8390,9 +9472,9 @@ erts_program_counter_info(int to, void *to_arg, Process *p)
}
static void
-print_function_from_pc(int to, void *to_arg, Eterm* x)
+print_function_from_pc(int to, void *to_arg, BeamInstr* x)
{
- Eterm* addr = find_function_from_pc(x);
+ BeamInstr* addr = find_function_from_pc(x);
if (addr == NULL) {
if (x == beam_exit) {
erts_print(to, to_arg, "<terminate process>");
@@ -8426,7 +9508,7 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg)
}
if (is_CP(x)) {
- erts_print(to, to_arg, "Return addr %p (", (Eterm *) x);
+ erts_print(to, to_arg, "Return addr %p (", (Eterm *) EXPAND_POINTER(x));
print_function_from_pc(to, to_arg, cp_val(x));
erts_print(to, to_arg, ")\n");
yreg = 0;
@@ -9255,8 +10337,8 @@ init_processes_bif(void)
processes_trap_export.code[0] = am_erlang;
processes_trap_export.code[1] = am_processes_trap;
processes_trap_export.code[2] = 2;
- processes_trap_export.code[3] = (Eterm) em_apply_bif;
- processes_trap_export.code[4] = (Eterm) &processes_trap;
+ processes_trap_export.code[3] = (BeamInstr) em_apply_bif;
+ processes_trap_export.code[4] = (BeamInstr) &processes_trap;
#if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST
erts_get_emu_time(&debug_tv_start);