diff options
Diffstat (limited to 'erts/emulator/beam/erl_process.c')
-rw-r--r-- | erts/emulator/beam/erl_process.c | 2979 |
1 files changed, 1067 insertions, 1912 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 996806fc75..fc950af8ce 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -24,7 +24,6 @@ #endif #include <stddef.h> /* offsetof() */ -#include <ctype.h> #include "sys.h" #include "erl_vm.h" #include "global.h" @@ -38,6 +37,8 @@ #include "erl_instrument.h" #include "erl_threads.h" #include "erl_binary.h" +#include "beam_bp.h" +#include "erl_cpu_topology.h" #define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS) #define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \ @@ -45,14 +46,23 @@ #define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10) -#define ERTS_SCHED_SLEEP_SPINCOUNT 10000 +#define ERTS_SCHED_SPIN_UNTIL_YIELD 100 + +#define ERTS_SCHED_SYS_SLEEP_SPINCOUNT 10 +#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT 1000 +#define ERTS_SCHED_TSE_SLEEP_SPINCOUNT \ + (ERTS_SCHED_SYS_SLEEP_SPINCOUNT*ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT) +#define ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT 0 + +#define ERTS_WAKEUP_OTHER_LIMIT_VERY_HIGH (200*CONTEXT_REDS) +#define ERTS_WAKEUP_OTHER_LIMIT_HIGH (50*CONTEXT_REDS) +#define ERTS_WAKEUP_OTHER_LIMIT_MEDIUM (10*CONTEXT_REDS) +#define ERTS_WAKEUP_OTHER_LIMIT_LOW (CONTEXT_REDS) +#define ERTS_WAKEUP_OTHER_LIMIT_VERY_LOW (CONTEXT_REDS/10) -#define ERTS_WAKEUP_OTHER_LIMIT (100*CONTEXT_REDS/2) #define ERTS_WAKEUP_OTHER_DEC 10 #define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10) -#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff) - #if 0 || defined(DEBUG) #define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA #endif @@ -91,9 +101,9 @@ do { \ #define ERTS_EMPTY_RUNQ(RQ) \ ((RQ)->len == 0 && (RQ)->misc.start == NULL) -extern Eterm beam_apply[]; -extern Eterm beam_exit[]; -extern Eterm beam_continue_exit[]; +extern BeamInstr beam_apply[]; +extern BeamInstr beam_exit[]; +extern BeamInstr beam_continue_exit[]; static Sint p_last; static Sint p_next; @@ -105,6 +115,8 @@ Uint erts_no_schedulers; Uint erts_max_processes = ERTS_DEFAULT_MAX_PROCESSES; Uint erts_process_tab_index_mask; +static int wakeup_other_limit; + int erts_sched_thread_suggested_stack_size = -1; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -115,16 +127,34 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; int erts_disable_proc_not_running_opt; -#define ERTS_SCHED_CHANGING_ONLINE 1 -#define ERTS_SCHED_CHANGING_MULTI_SCHED 2 +#define ERTS_SCHDLR_SSPND_CHNG_WAITER (((long) 1) << 0) +#define ERTS_SCHDLR_SSPND_CHNG_MSB (((long) 1) << 1) +#define ERTS_SCHDLR_SSPND_CHNG_ONLN (((long) 1) << 2) + +#ifndef DEBUG + +#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ + erts_smp_atomic_set(&schdlr_sspnd.changing, (VAL)) + +#else + +#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ +do { \ + long old_val__ = erts_smp_atomic_xchg(&schdlr_sspnd.changing, \ + (VAL)); \ + ASSERT(old_val__ == (OLD_VAL)); \ +} while (0) + +#endif + static struct { erts_smp_mtx_t mtx; erts_smp_cnd_t cnd; - int changing; int online; int curr_online; int wait_curr_online; + erts_smp_atomic_t changing; erts_smp_atomic_t active; struct { erts_smp_atomic_t ongoing; @@ -159,44 +189,6 @@ do { \ #endif -/* - * Cpu topology hierarchy. - */ -#define ERTS_TOPOLOGY_NODE 0 -#define ERTS_TOPOLOGY_PROCESSOR 1 -#define ERTS_TOPOLOGY_PROCESSOR_NODE 2 -#define ERTS_TOPOLOGY_CORE 3 -#define ERTS_TOPOLOGY_THREAD 4 -#define ERTS_TOPOLOGY_LOGICAL 5 - -#define ERTS_TOPOLOGY_MAX_DEPTH 6 - -typedef struct { - int bind_id; - int bound_id; -} ErtsCpuBindData; - -static ErtsCpuBindData *scheduler2cpu_map; -erts_smp_rwmtx_t erts_cpu_bind_rwmtx; - -typedef enum { - ERTS_CPU_BIND_SPREAD, - ERTS_CPU_BIND_PROCESSOR_SPREAD, - ERTS_CPU_BIND_THREAD_SPREAD, - ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD, - ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD, - ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD, - ERTS_CPU_BIND_NO_SPREAD, - ERTS_CPU_BIND_NONE -} ErtsCpuBindOrder; - -ErtsCpuBindOrder cpu_bind_order; - -static erts_cpu_topology_t *user_cpudata; -static int user_cpudata_size; -static erts_cpu_topology_t *system_cpudata; -static int system_cpudata_size; - erts_sched_stat_t erts_sched_stat; ErtsRunQueue *erts_common_run_queue; @@ -219,12 +211,18 @@ ErtsSchedulerData *erts_scheduler_data; ErtsAlignedRunQueue *erts_aligned_run_queues; Uint erts_no_run_queues; +ErtsAlignedSchedulerData *erts_aligned_scheduler_data; + +#ifdef ERTS_SMP + typedef union { - ErtsSchedulerData esd; - char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))]; -} ErtsAlignedSchedulerData; + ErtsSchedulerSleepInfo ssi; + char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerSleepInfo))]; +} ErtsAlignedSchedulerSleepInfo; -ErtsAlignedSchedulerData *erts_aligned_scheduler_data; +static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info; + +#endif #ifndef BM_COUNTERS static int processes_busy; @@ -283,8 +281,9 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, 200, ERTS_ALC_T_PROC_LIST) -#define ERTS_RUNQ_IX(IX) (&erts_aligned_run_queues[(IX)].runq) -#define ERTS_SCHEDULER_IX(IX) (&erts_aligned_scheduler_data[(IX)].esd) +#define ERTS_SCHED_SLEEP_INFO_IX(IX) \ + (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers), \ + &aligned_sched_sleep_info[(IX)].ssi) #define ERTS_FOREACH_RUNQ(RQVAR, DO) \ do { \ @@ -334,23 +333,14 @@ do { \ static void init_processes_bif(void); static void save_terminating_process(Process *p); static void exec_misc_ops(ErtsRunQueue *); -static void print_function_from_pc(int to, void *to_arg, Eterm* x); +static void print_function_from_pc(int to, void *to_arg, BeamInstr* x); static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg); #ifdef ERTS_SMP static void handle_pending_exiters(ErtsProcList *); -static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata, - int size, - ErtsCpuBindOrder bind_order, - int mk_seq); -static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size); - #endif -static void early_cpu_bind_init(void); -static void late_cpu_bind_init(void); - #if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) int erts_smp_lc_runq_is_locked(ErtsRunQueue *runq) @@ -388,7 +378,12 @@ erts_pre_init_process(void) erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks = ERTS_PSD_DIST_ENTRY_GET_LOCKS; erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks - = ERTS_PSD_DIST_ENTRY_GET_LOCKS; + = ERTS_PSD_DIST_ENTRY_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks + = ERTS_PSD_CALL_TIME_BP_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks + = ERTS_PSD_CALL_TIME_BP_SET_LOCKS; /* Check that we have locks for all entries */ for (ix = 0; ix < ERTS_PSD_SIZE; ix++) { @@ -401,13 +396,13 @@ erts_pre_init_process(void) /* initialize the scheduler */ void -erts_init_process(void) +erts_init_process(int ncpu) { Uint proc_bits = ERTS_PROC_BITS; #ifdef ERTS_SMP erts_disable_proc_not_running_opt = 0; - erts_init_proc_lock(); + erts_init_proc_lock(ncpu); #endif init_proclist_alloc(); @@ -572,6 +567,76 @@ erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data) #ifdef ERTS_SMP +void +erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, long flags) +{ + switch (flags & ERTS_SSI_FLGS_SLEEP_TYPE) { + case ERTS_SSI_FLG_POLL_SLEEPING: + erts_sys_schedule_interrupt(1); + break; + case ERTS_SSI_FLG_TSE_SLEEPING: + erts_tse_set(ssi->event); + break; + case 0: + break; + default: + erl_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error\n", + __FILE__, __LINE__); + break; + } +} + +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN +void +erts_smp_notify_check_children_needed(void) +{ + int i; + + for (i = 0; i < erts_no_schedulers; i++) { + long aux_work; + ErtsSchedulerSleepInfo *ssi; + ssi = ERTS_SCHED_SLEEP_INFO_IX(i); + aux_work = erts_smp_atomic_bor(&ssi->aux_work, + ERTS_SSI_AUX_WORK_CHECK_CHILDREN); + if (!(aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN)) + erts_sched_poke(ssi); + } +} +#endif + +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK +static ERTS_INLINE long +blockable_aux_work(ErtsSchedulerData *esdp, + ErtsSchedulerSleepInfo *ssi, + long aux_work) +{ + if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) { +#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN + if (aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN) { + aux_work = erts_smp_atomic_band(&ssi->aux_work, + ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN); + aux_work &= ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN; + erts_check_children(); + } +#endif + } + return aux_work; +} + +#endif + +#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK +static ERTS_INLINE long +nonblockable_aux_work(ErtsSchedulerData *esdp, + ErtsSchedulerSleepInfo *ssi, + long aux_work) +{ + if (aux_work & ERTS_SSI_NONBLOCKABLE_AUX_WORK_MASK) { + + } +} +#endif + static void prepare_for_block(void *vrq) { @@ -624,7 +689,31 @@ erts_active_schedulers(void) return as; } +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ #ifdef ERTS_SMP + while (!erts_port_task_have_outstanding_io_tasks() + && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + erts_smp_atomic_set(&doing_sys_schedule, 0); + } + return 0; +#else + return !erts_port_task_have_outstanding_io_tasks(); +#endif +} + +#ifdef ERTS_SMP + +static ERTS_INLINE void +sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq) +{ + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ASSERT(rq->waiting < 0); + rq->waiting *= -1; +} static ERTS_INLINE void sched_waiting(Uint no, ErtsRunQueue *rq) @@ -666,7 +755,11 @@ empty_runq(ErtsRunQueue *rq) if (oifls & ERTS_RUNQ_IFLG_NONEMPTY) { #ifdef DEBUG long empty = erts_smp_atomic_read(&no_empty_run_queues); - ASSERT(0 <= empty && empty < erts_no_run_queues); + /* + * For a short period of time no_empty_run_queues may have + * been increased twice for a specific run queue. + */ + ASSERT(0 <= empty && empty < 2*erts_no_run_queues); #endif erts_smp_atomic_inc(&no_empty_run_queues); } @@ -679,242 +772,422 @@ non_empty_runq(ErtsRunQueue *rq) if (!(oifls & ERTS_RUNQ_IFLG_NONEMPTY)) { #ifdef DEBUG long empty = erts_smp_atomic_read(&no_empty_run_queues); - ASSERT(0 < empty && empty <= erts_no_run_queues); + /* + * For a short period of time no_empty_run_queues may have + * been increased twice for a specific run queue. + */ + ASSERT(0 < empty && empty <= 2*erts_no_run_queues); #endif erts_smp_atomic_dec(&no_empty_run_queues); } } -static ERTS_INLINE int -sched_spin_wake(ErtsRunQueue *rq) +static long +sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi) { -#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 - return 0; -#else - long val; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + long oflgs; + long nflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING); + long xflgs = 0; - val = erts_smp_atomic_read(&rq->spin_waiter); - ASSERT(val >= 0); - if (val != 0) { - erts_smp_atomic_inc(&rq->spin_wake); - return 1; - } - return 0; -#endif + do { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return nflgs; + xflgs = oflgs; + } while (!(oflgs & ERTS_SSI_FLG_SUSPENDED)); + return oflgs; } -static ERTS_INLINE int -sched_spin_wake_all(ErtsRunQueue *rq) +static long +sched_prep_cont_spin_wait(ErtsSchedulerSleepInfo *ssi) { -#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 - return 0; -#else - long val; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + long oflgs; + long nflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING); + long xflgs = ERTS_SSI_FLG_WAITING; - val = erts_smp_atomic_read(&rq->spin_waiter); - ASSERT(val >= 0); - if (val != 0) - erts_smp_atomic_add(&rq->spin_wake, val); - return val; -#endif + do { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return nflgs; + xflgs = oflgs; + nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED; + } while (oflgs & ERTS_SSI_FLG_WAITING); + return oflgs; } +static long +sched_spin_wait(ErtsSchedulerSleepInfo *ssi, int spincount) +{ + long until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD; + int sc = spincount; + long flgs; + + do { + flgs = erts_smp_atomic_read(&ssi->flags); + if ((flgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) + != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) { + break; + } + ERTS_SPIN_BODY; + if (--until_yield == 0) { + until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD; + erts_thr_yield(); + } + } while (--sc > 0); + return flgs; +} + +static long +sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, long sleep_type) +{ + long oflgs; + long nflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING|sleep_type; + long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING; + + if (sleep_type == ERTS_SSI_FLG_TSE_SLEEPING) + erts_tse_reset(ssi->event); + + while (1) { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return nflgs; + if ((oflgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) + != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) { + return oflgs; + } + xflgs = oflgs; + nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED; + } +} + +#define ERTS_SCHED_WAIT_WOKEN(FLGS) \ + (((FLGS) & (ERTS_SSI_FLG_WAITING|ERTS_SSI_FLG_SUSPENDED)) \ + != ERTS_SSI_FLG_WAITING) + static void -sched_sys_wait(Uint no, ErtsRunQueue *rq) +scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) { - long dt; -#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 - int val; - int spincount = ERTS_SCHED_SLEEP_SPINCOUNT; + ErtsSchedulerSleepInfo *ssi = esdp->ssi; + int spincount; + long flgs; +#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \ + || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) + long aux_work; +#endif + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + erts_smp_spin_lock(&rq->sleepers.lock); + flgs = sched_prep_spin_wait(ssi); + if (flgs & ERTS_SSI_FLG_SUSPENDED) { + /* Go suspend instead... */ + erts_smp_spin_unlock(&rq->sleepers.lock); + return; + } + + ssi->prev = NULL; + ssi->next = rq->sleepers.list; + if (rq->sleepers.list) + rq->sleepers.list->prev = ssi; + rq->sleepers.list = ssi; + erts_smp_spin_unlock(&rq->sleepers.lock); + + /* + * If all schedulers are waiting, one of them *should* + * be waiting in erl_sys_schedule() + */ + + if (!prepare_for_sys_schedule()) { + + sched_waiting(esdp->no, rq); + + erts_smp_runq_unlock(rq); + + spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT; + + tse_wait: + +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); + tse_blockable_aux_work: + aux_work = blockable_aux_work(esdp, ssi, aux_work); #endif + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); - sched_waiting_sys(no, rq); + while (1) { -#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 - erts_smp_atomic_inc(&rq->spin_waiter); - erts_smp_runq_unlock(rq); +#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK +#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); +#endif + nonblockable_aux_work(esdp, ssi, aux_work); +#endif - erl_sys_schedule(1); /* Might give us something to do */ + flgs = sched_spin_wait(ssi, spincount); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + int res; + ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + } + } - dt = do_time_read_and_reset(); - if (dt) bump_timer(dt); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; + } + + flgs = sched_prep_cont_spin_wait(ssi); + spincount = ERTS_SCHED_TSE_SLEEP_SPINCOUNT; + + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; + } + +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); + if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) { + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + goto tse_blockable_aux_work; + } +#endif - while (spincount-- > 0) { - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val != 0) { - erts_smp_runq_lock(rq); - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val != 0) - goto woken; - if (spincount == 0) - goto sleep; - erts_smp_runq_unlock(rq); } - } - erts_smp_runq_lock(rq); - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val != 0) { - woken: - erts_smp_atomic_dec(&rq->spin_wake); - ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0); - erts_smp_atomic_dec(&rq->spin_waiter); - ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + + if (flgs & ~ERTS_SSI_FLG_SUSPENDED) + erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + + erts_smp_runq_lock(rq); + sched_active(esdp->no, rq); + } else { - sleep: - erts_smp_atomic_dec(&rq->spin_waiter); - ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); + long dt; + + erts_smp_atomic_set(&function_calls, 0); + *fcalls = 0; + + sched_waiting_sys(esdp->no, rq); + + erts_smp_runq_unlock(rq); + + spincount = ERTS_SCHED_SYS_SLEEP_SPINCOUNT; + + while (spincount-- > 0) { + + sys_poll_aux_work: + + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + + erl_sys_schedule(1); /* Might give us something to do */ + + dt = do_time_read_and_reset(); + if (dt) bump_timer(dt); + + sys_aux_work: + +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); + aux_work = blockable_aux_work(esdp, ssi, aux_work); +#endif +#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK +#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); +#endif + nonblockable_aux_work(esdp, ssi, aux_work); +#endif + + flgs = erts_smp_atomic_read(&ssi->flags); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + goto sys_woken; + } + if (!(flgs & ERTS_SSI_FLG_SLEEPING)) { + flgs = sched_prep_cont_spin_wait(ssi); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + goto sys_woken; + } + } + + /* + * If we got new I/O tasks we aren't allowed to + * call erl_sys_schedule() until it is handled. + */ + if (erts_port_task_have_outstanding_io_tasks()) { + erts_smp_atomic_set(&doing_sys_schedule, 0); + /* + * Got to check that we still got I/O tasks; otherwise + * we have to continue checking for I/O... + */ + if (!prepare_for_sys_schedule()) { + spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; + goto tse_wait; + } + } + } + + erts_smp_runq_lock(rq); + /* * If we got new I/O tasks we aren't allowed to * sleep in erl_sys_schedule(). */ - if (!erts_port_task_have_outstanding_io_tasks()) { -#endif + if (erts_port_task_have_outstanding_io_tasks()) { + erts_smp_atomic_set(&doing_sys_schedule, 0); + /* + * Got to check that we still got I/O tasks; otherwise + * we have to wait in erl_sys_schedule() after all... + */ + if (prepare_for_sys_schedule()) + goto do_sys_schedule; + + /* + * Not allowed to wait in erl_sys_schedule; + * do tse wait instead... + */ + sched_change_waiting_sys_to_waiting(esdp->no, rq); + erts_smp_runq_unlock(rq); + spincount = 0; + goto tse_wait; + } + else { + do_sys_schedule: erts_sys_schedule_interrupt(0); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (!(flgs & ERTS_SSI_FLG_SLEEPING)) { + if (!(flgs & ERTS_SSI_FLG_WAITING)) + goto sys_locked_woken; + erts_smp_runq_unlock(rq); + flgs = sched_prep_cont_spin_wait(ssi); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + goto sys_woken; + } + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + goto sys_poll_aux_work; + } + + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_smp_runq_unlock(rq); + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + erl_sys_schedule(0); dt = do_time_read_and_reset(); if (dt) bump_timer(dt); - erts_smp_runq_lock(rq); + flgs = sched_prep_cont_spin_wait(ssi); + if (flgs & ERTS_SSI_FLG_WAITING) + goto sys_aux_work; -#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 + sys_woken: + erts_smp_runq_lock(rq); + sys_locked_woken: + erts_smp_atomic_set(&doing_sys_schedule, 0); + if (flgs & ~ERTS_SSI_FLG_SUSPENDED) + erts_smp_atomic_band(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + sched_active_sys(esdp->no, rq); } } -#endif - sched_active_sys(no, rq); -} - -static void -sched_cnd_wait(Uint no, ErtsRunQueue *rq) -{ -#if ERTS_SCHED_SLEEP_SPINCOUNT != 0 - int val; - int spincount = ERTS_SCHED_SLEEP_SPINCOUNT; ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); -#endif - - sched_waiting(no, rq); - erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, - prepare_for_block, - resume_after_block, - (void *) rq); - -#if ERTS_SCHED_SLEEP_SPINCOUNT == 0 - erts_smp_cnd_wait(&rq->cnd, &rq->mtx); -#else - erts_smp_atomic_inc(&rq->spin_waiter); - erts_smp_mtx_unlock(&rq->mtx); - - while (spincount-- > 0) { - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val != 0) { - erts_smp_mtx_lock(&rq->mtx); - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val != 0) - goto woken; - if (spincount == 0) - goto sleep; - erts_smp_mtx_unlock(&rq->mtx); - } - } - - erts_smp_mtx_lock(&rq->mtx); - val = erts_smp_atomic_read(&rq->spin_wake); - ASSERT(val >= 0); - if (val == 0) { - sleep: - erts_smp_atomic_dec(&rq->spin_waiter); - ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); - erts_smp_cnd_wait(&rq->cnd, &rq->mtx); - } - else { - woken: - erts_smp_atomic_dec(&rq->spin_wake); - ASSERT(erts_smp_atomic_read(&rq->spin_wake) >= 0); - erts_smp_atomic_dec(&rq->spin_waiter); - ASSERT(erts_smp_atomic_read(&rq->spin_waiter) >= 0); - } -#endif - - erts_smp_activity_end(ERTS_ACTIVITY_WAIT, - prepare_for_block, - resume_after_block, - (void *) rq); - - sched_active(no, rq); } -static void -wake_one_scheduler(void) -{ - ASSERT(erts_common_run_queue); - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(erts_common_run_queue)); - if (erts_common_run_queue->waiting) { - if (!sched_spin_wake(erts_common_run_queue)) { - if (erts_common_run_queue->waiting == -1) /* One scheduler waiting - and doing so in - sys_schedule */ - erts_sys_schedule_interrupt(1); - else - erts_smp_cnd_signal(&erts_common_run_queue->cnd); - } +static ERTS_INLINE long +ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi) +{ + /* reset all flags but suspended */ + long oflgs; + long nflgs = 0; + long xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING; + while (1) { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return oflgs; + nflgs = oflgs & ERTS_SSI_FLG_SUSPENDED; + xflgs = oflgs; } } static void -wake_scheduler(ErtsRunQueue *rq, int incq) +wake_scheduler(ErtsRunQueue *rq, int incq, int one) { - ASSERT(!erts_common_run_queue); - ASSERT(-1 <= rq->waiting && rq->waiting <= 1); - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); - if (rq->waiting && !rq->woken) { - if (!sched_spin_wake(rq)) { - if (rq->waiting < 0) - erts_sys_schedule_interrupt(1); - else - erts_smp_cnd_signal(&rq->cnd); + int res; + ErtsSchedulerSleepInfo *ssi; + ErtsSchedulerSleepList *sl; + + /* + * The unlocked run queue is not strictly necessary + * from a thread safety or deadlock prevention + * perspective. It will, however, cost us performance + * if it is locked during wakup of another scheduler, + * so all code *should* handle this without having + * the lock on the run queue. + */ + ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq)); + + sl = &rq->sleepers; + + erts_smp_spin_lock(&sl->lock); + ssi = sl->list; + if (!ssi) + erts_smp_spin_unlock(&sl->lock); + else if (one) { + long flgs; + if (ssi->prev) + ssi->prev->next = ssi->next; + else { + ASSERT(sl->list == ssi); + sl->list = ssi->next; } - rq->woken = 1; - if (incq) + if (ssi->next) + ssi->next->prev = ssi->prev; + + res = sl->list != NULL; + erts_smp_spin_unlock(&sl->lock); + + flgs = ssi_flags_set_wake(ssi); + erts_sched_finish_poke(ssi, flgs); + + if (incq && !erts_common_run_queue && (flgs & ERTS_SSI_FLG_WAITING)) non_empty_runq(rq); } + else { + sl->list = NULL; + erts_smp_spin_unlock(&sl->lock); + do { + ErtsSchedulerSleepInfo *wake_ssi = ssi; + ssi = ssi->next; + erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi)); + } while (ssi); + } } static void wake_all_schedulers(void) { - if (erts_common_run_queue) { - erts_smp_runq_lock(erts_common_run_queue); - if (erts_common_run_queue->waiting) { - if (erts_common_run_queue->waiting < 0) - erts_sys_schedule_interrupt(1); - sched_spin_wake_all(erts_common_run_queue); - erts_smp_cnd_broadcast(&erts_common_run_queue->cnd); - } - erts_smp_runq_unlock(erts_common_run_queue); - } + if (erts_common_run_queue) + wake_scheduler(erts_common_run_queue, 0, 0); else { int ix; for (ix = 0; ix < erts_no_run_queues; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - erts_smp_runq_lock(rq); - wake_scheduler(rq, 0); - erts_smp_runq_unlock(rq); + wake_scheduler(rq, 0, 1); } } } @@ -929,14 +1202,14 @@ chk_wake_sched(ErtsRunQueue *crq, int ix, int activate) wrq = ERTS_RUNQ_IX(ix); iflgs = erts_smp_atomic_read(&wrq->info_flags); if (!(iflgs & (ERTS_RUNQ_IFLG_SUSPENDED|ERTS_RUNQ_IFLG_NONEMPTY))) { - erts_smp_xrunq_lock(crq, wrq); if (activate) { if (ix == erts_smp_atomic_cmpxchg(&balance_info.active_runqs, ix+1, ix)) { + erts_smp_xrunq_lock(crq, wrq); wrq->flags &= ~ERTS_RUNQ_FLG_INACTIVE; + erts_smp_xrunq_unlock(crq, wrq); } } - wake_scheduler(wrq, 0); - erts_smp_xrunq_unlock(crq, wrq); + wake_scheduler(wrq, 0, 1); return 1; } return 0; @@ -982,19 +1255,42 @@ static ERTS_INLINE void smp_notify_inc_runq(ErtsRunQueue *runq) { #ifdef ERTS_SMP - if (erts_common_run_queue) - wake_one_scheduler(); - else - wake_scheduler(runq, 1); + if (runq) + wake_scheduler(runq, 1, 1); #endif } void -erts_smp_notify_inc_runq__(ErtsRunQueue *runq) +erts_smp_notify_inc_runq(ErtsRunQueue *runq) { smp_notify_inc_runq(runq); } +void +erts_sched_notify_check_cpu_bind(void) +{ +#ifdef ERTS_SMP + int ix; + if (erts_common_run_queue) { + for (ix = 0; ix < erts_no_schedulers; ix++) + erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1); + wake_all_schedulers(); + } + else { + for (ix = 0; ix < erts_no_run_queues; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + erts_smp_runq_lock(rq); + rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; + erts_smp_runq_unlock(rq); + wake_scheduler(rq, 0, 1); + }; + } +#else + erts_sched_check_cpu_bind(erts_get_scheduler_data()); +#endif +} + + #ifdef ERTS_SMP ErtsRunQueue * @@ -1136,20 +1432,23 @@ static void evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq) { Port *prt; + int notify_to_rq = 0; int prio; int prt_locked = 0; int rq_locked = 0; int evac_rq_locked = 1; + ErtsMigrateResult mres; erts_smp_runq_lock(evac_rq); + erts_smp_atomic_bor(&evac_rq->scheduler->ssi->flags, ERTS_SSI_FLG_SUSPENDED); + evac_rq->flags &= ~ERTS_RUNQ_FLGS_IMMIGRATE_QMASK; evac_rq->flags |= (ERTS_RUNQ_FLGS_EMIGRATE_QMASK | ERTS_RUNQ_FLGS_EVACUATE_QMASK | ERTS_RUNQ_FLG_SUSPENDED); erts_smp_atomic_bor(&evac_rq->info_flags, ERTS_RUNQ_IFLG_SUSPENDED); - /* * Need to set up evacuation paths first since we * may release the run queue lock on evac_rq @@ -1177,9 +1476,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq) /* Evacuate scheduled ports */ prt = evac_rq->ports.start; while (prt) { - (void) erts_port_migrate(prt, &prt_locked, + mres = erts_port_migrate(prt, &prt_locked, evac_rq, &evac_rq_locked, rq, &rq_locked); + if (mres == ERTS_MIGRATE_SUCCESS) + notify_to_rq = 1; if (prt_locked) erts_smp_port_unlock(prt); if (!evac_rq_locked) { @@ -1208,9 +1509,11 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq) goto end_of_proc; } - (void) erts_proc_migrate(proc, &proc_locks, + mres = erts_proc_migrate(proc, &proc_locks, evac_rq, &evac_rq_locked, rq, &rq_locked); + if (mres == ERTS_MIGRATE_SUCCESS) + notify_to_rq = 1; if (proc_locks) erts_smp_proc_unlock(proc, proc_locks); if (!evac_rq_locked) { @@ -1242,10 +1545,13 @@ evacuate_run_queue(ErtsRunQueue *evac_rq, ErtsRunQueue *rq) if (rq_locked) erts_smp_runq_unlock(rq); - if (!evac_rq_locked) - erts_smp_runq_lock(evac_rq); - wake_scheduler(evac_rq, 0); - erts_smp_runq_unlock(evac_rq); + if (evac_rq_locked) + erts_smp_runq_unlock(evac_rq); + + if (notify_to_rq) + smp_notify_inc_runq(rq); + + wake_scheduler(evac_rq, 0, 1); } static int @@ -1473,31 +1779,6 @@ try_steal_task(ErtsRunQueue *rq) return res; } -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN -void -erts_smp_notify_check_children_needed(void) -{ - int i; - for (i = 0; i < erts_no_schedulers; i++) { - erts_smp_runq_lock(ERTS_SCHEDULER_IX(i)->run_queue); - ERTS_SCHEDULER_IX(i)->check_children = 1; - if (!erts_common_run_queue) - wake_scheduler(ERTS_SCHEDULER_IX(i)->run_queue, 0); - erts_smp_runq_unlock(ERTS_SCHEDULER_IX(i)->run_queue); - } - if (ongoing_multi_scheduling_block()) { - /* Also blocked schedulers need to check children */ - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - for (i = 0; i < erts_no_schedulers; i++) - ERTS_SCHEDULER_IX(i)->blocked_check_children = 1; - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - } - if (erts_common_run_queue) - wake_all_schedulers(); -} -#endif - /* Run queue balancing */ typedef struct { @@ -1561,6 +1842,9 @@ do { \ static void check_balance(ErtsRunQueue *c_rq) { +#if ERTS_MAX_PROCESSES >= (1 << 27) +# error check_balance() assumes ERTS_MAX_PROCESS < (1 << 27) +#endif ErtsRunQueueBalance avg = {0}; Sint64 scheds_reds, full_scheds_reds; int forced, active, current_active, oowc, half_full_scheds, full_scheds, @@ -1684,12 +1968,14 @@ check_balance(ErtsRunQueue *c_rq) run_queue_info[qix].prio[pix].avail = 0; } else { - int xreds = 0; - int procreds = treds; - procreds -= run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds; + Sint64 xreds = 0; + Sint64 procreds = treds; + procreds -= + ((Sint64) + run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].reds); for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { - int av; + Sint64 av; if (xreds == 0) av = 100; @@ -1700,9 +1986,10 @@ check_balance(ErtsRunQueue *c_rq) if (av == 0) av = 1; } - run_queue_info[qix].prio[pix].avail = av; + run_queue_info[qix].prio[pix].avail = (int) av; + ASSERT(run_queue_info[qix].prio[pix].avail >= 0); if (pix < PRIORITY_NORMAL) /* ie., max or high */ - xreds += run_queue_info[qix].prio[pix].reds; + xreds += (Sint64) run_queue_info[qix].prio[pix].reds; } run_queue_info[qix].prio[ERTS_PORT_PRIO_LEVEL].avail = 100; } @@ -1807,7 +2094,8 @@ check_balance(ErtsRunQueue *c_rq) if (max_len != 0) { int avail = avg.prio[pix].avail; if (avail != 0) { - max_len = ((100*max_len - 1) / avail) + 1; + max_len = (int) ((100*((Sint64) max_len) - 1) + / ((Sint64) avail)) + 1; avg.prio[pix].max_len = max_len; ASSERT(max_len >= 0); } @@ -1824,9 +2112,10 @@ check_balance(ErtsRunQueue *c_rq) || run_queue_info[qix].prio[pix].avail == 0) limit = 0; else - limit = (((avg.prio[pix].max_len - * run_queue_info[qix].prio[pix].avail) - 1) - / 100 + 1); + limit = (int) (((((Sint64) avg.prio[pix].max_len) + * ((Sint64) run_queue_info[qix].prio[pix].avail)) + - 1) + / 100 + 1); run_queue_info[qix].prio[pix].migration_limit = limit; } } @@ -2054,8 +2343,27 @@ erts_debug_nbalance(void) void erts_early_init_scheduling(void) { - early_cpu_bind_init(); + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM; +} + +int +erts_sched_set_wakeup_limit(char *str) +{ + if (sys_strcmp(str, "very_high") == 0) + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_VERY_HIGH; + else if (sys_strcmp(str, "high") == 0) + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_HIGH; + else if (sys_strcmp(str, "medium") == 0) + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM; + else if (sys_strcmp(str, "low") == 0) + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_LOW; + else if (sys_strcmp(str, "very_low") == 0) + wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_VERY_LOW; + else + return EINVAL; + return 0; } + void erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) @@ -2078,16 +2386,20 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) erts_aligned_run_queues = erts_alloc(ERTS_ALC_T_RUNQS, (sizeof(ErtsAlignedRunQueue)*(n+1))); - if ((((Uint) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0) + if ((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) != 0) erts_aligned_run_queues = ((ErtsAlignedRunQueue *) - ((((Uint) erts_aligned_run_queues) + ((((UWord) erts_aligned_run_queues) & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE)); + ASSERT((((UWord) erts_aligned_run_queues) & ERTS_CACHE_LINE_MASK) == 0); + #ifdef ERTS_SMP erts_smp_atomic_init(&no_empty_run_queues, 0); #endif + erts_no_run_queues = n; + for (ix = 0; ix < n; ix++) { int pix, rix; ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); @@ -2102,8 +2414,10 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1)); erts_smp_cnd_init(&rq->cnd); - erts_smp_atomic_init(&rq->spin_waiter, 0); - erts_smp_atomic_init(&rq->spin_wake, 0); +#ifdef ERTS_SMP + erts_smp_spinlock_init(&rq->sleepers.lock, "run_queue_sleep_list"); + rq->sleepers.list = NULL; +#endif rq->waiting = 0; rq->woken = 0; @@ -2154,7 +2468,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) } erts_common_run_queue = !mrq ? ERTS_RUNQ_IX(0) : NULL; - erts_no_run_queues = n; #ifdef ERTS_SMP @@ -2169,23 +2482,59 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) #endif + n = (int) no_schedulers; + erts_no_schedulers = n; + +#ifdef ERTS_SMP + /* Create and initialize scheduler sleep info */ + + aligned_sched_sleep_info = erts_alloc(ERTS_ALC_T_SCHDLR_SLP_INFO, + (sizeof(ErtsAlignedSchedulerSleepInfo) + *(n+1))); + if ((((UWord) aligned_sched_sleep_info) & ERTS_CACHE_LINE_MASK) == 0) + aligned_sched_sleep_info = ((ErtsAlignedSchedulerSleepInfo *) + ((((UWord) aligned_sched_sleep_info) + & ~ERTS_CACHE_LINE_MASK) + + ERTS_CACHE_LINE_SIZE)); + for (ix = 0; ix < n; ix++) { + ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); +#if 0 /* no need to initialize these... */ + ssi->next = NULL; + ssi->prev = NULL; +#endif + erts_smp_atomic_init(&ssi->flags, 0); + ssi->event = NULL; /* initialized in sched_thread_func */ + erts_smp_atomic_init(&ssi->aux_work, 0); + } +#endif + /* Create and initialize scheduler specific data */ - n = (int) no_schedulers; erts_aligned_scheduler_data = erts_alloc(ERTS_ALC_T_SCHDLR_DATA, (sizeof(ErtsAlignedSchedulerData) *(n+1))); - if ((((Uint) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0) + if ((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) != 0) erts_aligned_scheduler_data = ((ErtsAlignedSchedulerData *) - ((((Uint) erts_aligned_scheduler_data) + ((((UWord) erts_aligned_scheduler_data) & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE)); + + ASSERT((((UWord) erts_aligned_scheduler_data) & ERTS_CACHE_LINE_MASK) == 0); + for (ix = 0; ix < n; ix++) { ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); #ifdef ERTS_SMP erts_bits_init_state(&esdp->erl_bits_state); esdp->match_pseudo_process = NULL; + esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); esdp->free_process = NULL; +#if HALFWORD_HEAP + /* Registers need to be heap allocated (correct memory range) for tracing to work */ + esdp->save_reg = erts_alloc(ERTS_ALC_T_BEAM_REGISTER, ERTS_X_REGS_ALLOCATED * sizeof(Eterm)); +#endif +#endif +#if !HEAP_ON_C_STACK + esdp->num_tmp_heap_used = 0; #endif esdp->no = (Uint) ix+1; esdp->current_process = NULL; @@ -2206,11 +2555,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) } #ifdef ERTS_SMP -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - esdp->check_children = 0; - esdp->blocked_check_children = 0; -#endif - erts_smp_atomic_init(&esdp->suspended, 0); erts_smp_atomic_init(&esdp->chk_cpu_bind, 0); #endif } @@ -2219,7 +2563,7 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd"); erts_smp_cnd_init(&schdlr_sspnd.cnd); - schdlr_sspnd.changing = 0; + erts_smp_atomic_init(&schdlr_sspnd.changing, 0); schdlr_sspnd.online = no_schedulers_online; schdlr_sspnd.curr_online = no_schedulers; erts_smp_atomic_init(&schdlr_sspnd.msb.ongoing, 0); @@ -2242,7 +2586,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) if (no_schedulers_online < no_schedulers) { if (erts_common_run_queue) { for (ix = no_schedulers_online; ix < no_schedulers; ix++) - erts_smp_atomic_set(&(ERTS_SCHEDULER_IX(ix)->suspended), 1); + erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags, + ERTS_SSI_FLG_SUSPENDED); } else { for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++) @@ -2253,7 +2598,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) schdlr_sspnd.wait_curr_online = no_schedulers_online; schdlr_sspnd.curr_online *= 2; /* Boot strapping... */ - schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE; + ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); erts_smp_atomic_init(&doing_sys_schedule, 0); @@ -2273,8 +2619,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online) /* init port tasks */ erts_port_task_init(); - - late_cpu_bind_init(); } ErtsRunQueue * @@ -2401,13 +2745,113 @@ susp_sched_resume_block(void *unused) } static void +scheduler_ix_resume_wake(Uint ix) +{ + ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); + long xflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED); + long oflgs; + do { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, 0, xflgs); + if (oflgs == xflgs) { + erts_sched_finish_poke(ssi, oflgs); + break; + } + xflgs = oflgs; + } while (oflgs & ERTS_SSI_FLG_SUSPENDED); +} + +static long +sched_prep_spin_suspended(ErtsSchedulerSleepInfo *ssi, long xpct) +{ + long oflgs; + long nflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED); + long xflgs = xpct; + + do { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return nflgs; + xflgs = oflgs; + } while (oflgs & ERTS_SSI_FLG_SUSPENDED); + + return oflgs; +} + +static long +sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount) +{ + int until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD; + int sc = spincount; + long flgs; + + do { + flgs = erts_smp_atomic_read(&ssi->flags); + if ((flgs & (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) + != (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + break; + } + ERTS_SPIN_BODY; + if (--until_yield == 0) { + until_yield = ERTS_SCHED_SPIN_UNTIL_YIELD; + erts_thr_yield(); + } + } while (--sc > 0); + return flgs; +} + +static long +sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) +{ + long oflgs; + long nflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED); + long xflgs = (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED); + + erts_tse_reset(ssi->event); + + while (1) { + oflgs = erts_smp_atomic_cmpxchg(&ssi->flags, nflgs, xflgs); + if (oflgs == xflgs) + return nflgs; + if ((oflgs & (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) + != (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + return oflgs; + } + xflgs = oflgs; + } +} + +static void suspend_scheduler(ErtsSchedulerData *esdp) { + long flgs; + int changing; long no = (long) esdp->no; - ErtsRunQueue *rq = esdp->run_queue; + ErtsSchedulerSleepInfo *ssi = esdp->ssi; long active_schedulers; int curr_online = 1; int wake = 0; +#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \ + || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) + long aux_work; +#endif /* * Schedulers may be suspended in two different ways: @@ -2424,126 +2868,144 @@ suspend_scheduler(ErtsSchedulerData *esdp) erts_smp_runq_unlock(esdp->run_queue); - /* Unbind from cpu */ - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - if (scheduler2cpu_map[esdp->no].bound_id >= 0 - && erts_unbind_from_cpu(erts_cpuinfo) == 0) { - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; - } - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); + erts_sched_check_cpu_bind_prep_suspend(esdp); if (erts_system_profile_flags.scheduler) profile_scheduler(make_small(esdp->no), am_inactive); erts_smp_mtx_lock(&schdlr_sspnd.mtx); - active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active); - ASSERT(active_schedulers >= 1); - if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED) { - if (active_schedulers == schdlr_sspnd.msb.wait_active) - wake = 1; - if (active_schedulers == 1) - schdlr_sspnd.changing = 0; - } - - while (1) { + flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); + if (flgs & ERTS_SSI_FLG_SUSPENDED) { -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - int check_children; - erts_smp_runq_lock(esdp->run_queue); - check_children = esdp->check_children; - esdp->check_children = 0; - erts_smp_runq_unlock(esdp->run_queue); - if (check_children) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - erts_check_children(); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + active_schedulers = erts_smp_atomic_dectest(&schdlr_sspnd.active); + ASSERT(active_schedulers >= 1); + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { + if (active_schedulers == schdlr_sspnd.msb.wait_active) + wake = 1; + if (active_schedulers == 1) { + changing = erts_smp_atomic_band(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_MSB); + changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; + } } -#endif - if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) { - int changed = 0; - if (no > schdlr_sspnd.online && curr_online) { - schdlr_sspnd.curr_online--; - curr_online = 0; - changed = 1; + while (1) { + if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + int changed = 0; + if (no > schdlr_sspnd.online && curr_online) { + schdlr_sspnd.curr_online--; + curr_online = 0; + changed = 1; + } + else if (no <= schdlr_sspnd.online && !curr_online) { + schdlr_sspnd.curr_online++; + curr_online = 1; + changed = 1; + } + if (changed + && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) + wake = 1; + if (schdlr_sspnd.online == schdlr_sspnd.curr_online) { + changing = erts_smp_atomic_band(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_ONLN); + changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; + } } - else if (no <= schdlr_sspnd.online && !curr_online) { - schdlr_sspnd.curr_online++; - curr_online = 1; - changed = 1; + + if (wake) { + erts_smp_cnd_signal(&schdlr_sspnd.cnd); + wake = 0; } - if (changed - && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) - wake = 1; - if (schdlr_sspnd.online == schdlr_sspnd.curr_online) - schdlr_sspnd.changing = 0; - } - if (wake) { - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - wake = 0; - } + flgs = erts_smp_atomic_read(&ssi->flags); + if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) + break; + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ|ERTS_RUNQ_FLG_SUSPENDED))) - break; - if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) - && !erts_smp_atomic_read(&esdp->suspended)) - break; +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); + blockable_aux_work: + blockable_aux_work(esdp, ssi, aux_work); +#endif - erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, - susp_sched_prep_block, - susp_sched_resume_block, - NULL); - while (1) { + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + while (1) { + long flgs; +#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK +#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); +#endif + nonblockable_aux_work(esdp, ssi, aux_work); +#endif -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - if (esdp->blocked_check_children) - break; + flgs = sched_spin_suspended(ssi, ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + flgs = sched_set_suspended_sleeptype(ssi); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + int res; + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + } + } + + flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)); + if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) + break; + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) + break; + + +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = erts_smp_atomic_read(&ssi->aux_work); + if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) { + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + goto blockable_aux_work; + } #endif - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + } - if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE) - break; + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); - if (!(rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ - | ERTS_RUNQ_FLG_SUSPENDED))) - break; - if ((rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) - && !erts_smp_atomic_read(&esdp->suspended)) - break; + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); } -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - esdp->blocked_check_children = 0; -#endif + active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active); + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) + && schdlr_sspnd.online == active_schedulers) { + erts_smp_atomic_band(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_MSB); + } - erts_smp_activity_end(ERTS_ACTIVITY_WAIT, - susp_sched_prep_block, - susp_sched_resume_block, - NULL); - } + ASSERT(no <= schdlr_sspnd.online); + ASSERT(!erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing)); - active_schedulers = erts_smp_atomic_inctest(&schdlr_sspnd.active); - if (schdlr_sspnd.changing == ERTS_SCHED_CHANGING_MULTI_SCHED - && schdlr_sspnd.online == active_schedulers) { - schdlr_sspnd.changing = 0; } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + ASSERT(curr_online); + if (erts_system_profile_flags.scheduler) profile_scheduler(make_small(esdp->no), am_active); erts_smp_runq_lock(esdp->run_queue); non_empty_runq(esdp->run_queue); - /* Make sure we check if we should bind to a cpu or not... */ - if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ) - erts_smp_atomic_set(&esdp->chk_cpu_bind, 1); - else - rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; + erts_sched_check_cpu_bind_post_suspend(esdp); } #define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID) \ @@ -2600,8 +3062,10 @@ erts_schedulers_state(Uint *total, int yield_allowed) { int res; + long changing; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - if (yield_allowed && schdlr_sspnd.changing) + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) res = ERTS_SCHDLR_SSPND_YIELD_RESTART; else { *active = *online = schdlr_sspnd.online; @@ -2621,6 +3085,7 @@ erts_set_schedulers_online(Process *p, Sint *old_no) { int ix, res, no, have_unlocked_plocks; + long changing; if (new_no < 1 || erts_no_schedulers < new_no) return ERTS_SCHDLR_SSPND_EINVAL; @@ -2630,7 +3095,8 @@ erts_set_schedulers_online(Process *p, have_unlocked_plocks = 0; no = (int) new_no; - if (schdlr_sspnd.changing) { + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if (changing) { res = ERTS_SCHDLR_SSPND_YIELD_RESTART; } else { @@ -2639,17 +3105,19 @@ erts_set_schedulers_online(Process *p, res = ERTS_SCHDLR_SSPND_DONE; } else { - schdlr_sspnd.changing = ERTS_SCHED_CHANGING_ONLINE; + ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); schdlr_sspnd.online = no; if (no > online) { int ix; schdlr_sspnd.wait_curr_online = no; - if (ongoing_multi_scheduling_block()) - /* No schedulers to resume */; + if (ongoing_multi_scheduling_block()) { + for (ix = online; ix < no; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } else if (erts_common_run_queue) { for (ix = online; ix < no; ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, - 0); + scheduler_ix_resume_wake(ix); } else { if (plocks) { @@ -2663,6 +3131,7 @@ erts_set_schedulers_online(Process *p, erts_smp_runq_lock(rq); ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x5); erts_smp_runq_unlock(rq); + scheduler_ix_resume_wake(ix); } /* * Spread evacuation paths among all online @@ -2677,7 +3146,6 @@ erts_set_schedulers_online(Process *p, erts_smp_mtx_unlock(&balance_info.update_mtx); erts_smp_mtx_lock(&schdlr_sspnd.mtx); } - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); res = ERTS_SCHDLR_SSPND_DONE; } else /* if (no < online) */ { @@ -2694,12 +3162,17 @@ erts_set_schedulers_online(Process *p, schdlr_sspnd.wait_curr_online = no+1; } - if (ongoing_multi_scheduling_block()) - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - else if (erts_common_run_queue) { + if (ongoing_multi_scheduling_block()) { for (ix = no; ix < online; ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, - 1); + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + else if (erts_common_run_queue) { + for (ix = no; ix < online; ix++) { + ErtsSchedulerSleepInfo *ssi; + ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic_bor(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } wake_all_schedulers(); } else { @@ -2726,7 +3199,10 @@ erts_set_schedulers_online(Process *p, erts_smp_atomic_set(&balance_info.used_runqs, no); erts_smp_mtx_unlock(&balance_info.update_mtx); erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ERTS_FOREACH_OP_RUNQ(rq, wake_scheduler(rq, 0)); + for (ix = no; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + wake_scheduler(rq, 0, 1); + } } } @@ -2740,6 +3216,12 @@ erts_set_schedulers_online(Process *p, susp_sched_prep_block, susp_sched_resume_block, NULL); + ASSERT(res != ERTS_SCHDLR_SSPND_DONE + ? (ERTS_SCHDLR_SSPND_CHNG_WAITER + & erts_smp_atomic_read(&schdlr_sspnd.changing)) + : (ERTS_SCHDLR_SSPND_CHNG_WAITER + == erts_smp_atomic_read(&schdlr_sspnd.changing))); + erts_smp_atomic_band(&schdlr_sspnd.changing, ~ERTS_SCHDLR_SSPND_CHNG_WAITER); } } @@ -2754,15 +3236,16 @@ ErtsSchedSuspendResult erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) { int ix, res, have_unlocked_plocks = 0; + long changing; ErtsProcList *plp; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - if (schdlr_sspnd.changing) { + changing = erts_smp_atomic_read(&schdlr_sspnd.changing); + if (changing) { res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */ } else if (on) { /* ------ BLOCK ------ */ - if (erts_is_multi_scheduling_blocked()) { + if (schdlr_sspnd.msb.procs) { plp = proclist_create(p); plp->next = schdlr_sspnd.msb.procs; schdlr_sspnd.msb.procs = plp; @@ -2772,19 +3255,22 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; } else { + int online = schdlr_sspnd.online; p->flags |= F_HAVE_BLCKD_MSCHED; if (plocks) { have_unlocked_plocks = 1; erts_smp_proc_unlock(p, plocks); } + ASSERT(0 == erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing)); erts_smp_atomic_set(&schdlr_sspnd.msb.ongoing, 1); - if (schdlr_sspnd.online == 1) { + if (online == 1) { res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1); ASSERT(p->scheduler_data->no == 1); } else { - schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED; + ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); if (p->scheduler_data->no == 1) { res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; schdlr_sspnd.msb.wait_active = 1; @@ -2798,17 +3284,19 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) schdlr_sspnd.msb.wait_active = 2; } if (erts_common_run_queue) { - for (ix = 1; ix < schdlr_sspnd.online; ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 1); + for (ix = 1; ix < online; ix++) + erts_smp_atomic_bor(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags, + ERTS_SSI_FLG_SUSPENDED); wake_all_schedulers(); } else { erts_smp_mtx_unlock(&schdlr_sspnd.mtx); erts_smp_mtx_lock(&balance_info.update_mtx); erts_smp_atomic_set(&balance_info.used_runqs, 1); - for (ix = 0; ix < schdlr_sspnd.online; ix++) { + for (ix = 0; ix < online; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); erts_smp_runq_lock(rq); + ASSERT(!(rq->flags & ERTS_RUNQ_FLG_SUSPENDED)); ERTS_RUNQ_RESET_MIGRATION_PATHS(rq, 0x7); erts_smp_runq_unlock(rq); } @@ -2833,6 +3321,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) susp_sched_prep_block, susp_sched_resume_block, NULL); + ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED + ? (ERTS_SCHDLR_SSPND_CHNG_WAITER + & erts_smp_atomic_read(&schdlr_sspnd.changing)) + : (ERTS_SCHDLR_SSPND_CHNG_WAITER + == erts_smp_atomic_read(&schdlr_sspnd.changing))); + erts_smp_atomic_band(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_WAITER); } plp = proclist_create(p); plp->next = schdlr_sspnd.msb.procs; @@ -2876,7 +3371,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) if (schdlr_sspnd.msb.procs) res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; else { - schdlr_sspnd.changing = ERTS_SCHED_CHANGING_MULTI_SCHED; + ERTS_SCHDLR_SSPND_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); #ifdef DEBUG ERTS_FOREACH_RUNQ(rq, { @@ -2903,13 +3398,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) if (schdlr_sspnd.online == 1) { /* No schedulers to resume */ ASSERT(erts_smp_atomic_read(&schdlr_sspnd.active) == 1); - schdlr_sspnd.changing = 0; + ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB); } else if (erts_common_run_queue) { for (ix = 1; ix < schdlr_sspnd.online; ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->suspended, 0); + erts_smp_atomic_band(&ERTS_SCHED_SLEEP_INFO_IX(ix)->flags, + ~ERTS_SSI_FLG_SUSPENDED); wake_all_schedulers(); - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); } else { int online = schdlr_sspnd.online; @@ -2926,6 +3421,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) erts_smp_runq_lock(rq); ERTS_RUNQ_RESET_SUSPEND_INFO(rq, 0x4); erts_smp_runq_unlock(rq); + scheduler_ix_resume_wake(ix); } /* Spread evacuation paths among all online run queues */ @@ -2941,7 +3437,6 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) erts_smp_runq_unlock(ERTS_RUNQ_IX(0)); erts_smp_mtx_unlock(&balance_info.update_mtx); erts_smp_mtx_lock(&schdlr_sspnd.mtx); - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); } res = ERTS_SCHDLR_SSPND_DONE; } @@ -2968,8 +3463,11 @@ erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value) int erts_is_multi_scheduling_blocked(void) { - return (erts_smp_atomic_read(&schdlr_sspnd.msb.ongoing) - && erts_smp_atomic_read(&schdlr_sspnd.active) == 1); + int res; + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + res = schdlr_sspnd.msb.procs != NULL; + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + return res; } Eterm @@ -2978,7 +3476,7 @@ erts_multi_scheduling_blockers(Process *p) Eterm res = NIL; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - if (erts_is_multi_scheduling_blocked()) { + if (schdlr_sspnd.msb.procs) { Eterm *hp, *hp_end; ErtsProcList *plp1, *plp2; Uint max_size; @@ -3010,18 +3508,26 @@ erts_multi_scheduling_blockers(Process *p) static void * sched_thread_func(void *vesdp) { +#ifdef ERTS_SMP + Uint no = ((ErtsSchedulerData *) vesdp)->no; +#endif #ifdef ERTS_ENABLE_LOCK_CHECK { char buf[31]; - Uint no = ((ErtsSchedulerData *) vesdp)->no; erts_snprintf(&buf[0], 31, "scheduler %bpu", no); erts_lc_set_thread_name(&buf[0]); } #endif - erts_alloc_reg_scheduler_id(((ErtsSchedulerData *) vesdp)->no); + erts_alloc_reg_scheduler_id(no); erts_tsd_set(sched_data_key, vesdp); #ifdef ERTS_SMP + + erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp); + erts_proc_lock_prepare_proc_lock_waiter(); + ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch(); + + #endif erts_register_blockable_thread(); #ifdef HIPE @@ -3030,30 +3536,30 @@ sched_thread_func(void *vesdp) erts_thread_init_float(); erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ASSERT(schdlr_sspnd.changing == ERTS_SCHED_CHANGING_ONLINE); + ASSERT(erts_smp_atomic_read(&schdlr_sspnd.changing) + & ERTS_SCHDLR_SSPND_CHNG_ONLN); - schdlr_sspnd.curr_online--; + if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) { + erts_smp_atomic_band(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_ONLN); + if (((ErtsSchedulerData *) vesdp)->no != 1) + erts_smp_cnd_signal(&schdlr_sspnd.cnd); + } - if (((ErtsSchedulerData *) vesdp)->no != 1) { - if (schdlr_sspnd.online == schdlr_sspnd.curr_online) { - schdlr_sspnd.changing = 0; - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + if (((ErtsSchedulerData *) vesdp)->no == 1) { + if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); + while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, + susp_sched_prep_block, + susp_sched_resume_block, + NULL); } - } - else if (schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) - schdlr_sspnd.changing = 0; - else { - erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, - susp_sched_prep_block, - susp_sched_resume_block, - NULL); - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - erts_smp_activity_end(ERTS_ACTIVITY_WAIT, - susp_sched_prep_block, - susp_sched_resume_block, - NULL); - ASSERT(!schdlr_sspnd.changing); + ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); } erts_smp_mtx_unlock(&schdlr_sspnd.mtx); @@ -3089,11 +3595,7 @@ erts_start_schedulers(void) ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual); actual++; ASSERT(actual == esdp->no); -#ifdef ERTS_ENABLE_LOCK_COUNT - res = erts_lcnt_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts); -#else res = ethr_thr_create(&esdp->tid,sched_thread_func,(void*)esdp,&opts); -#endif if (res != 0) { actual--; break; @@ -3122,1351 +3624,6 @@ erts_start_schedulers(void) #endif /* ERTS_SMP */ -static int -int_cmp(const void *vx, const void *vy) -{ - return *((int *) vx) - *((int *) vy); -} - -static int -cpu_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->core != y->core) - return x->core - y->core; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->node != y->node) - return x->node - y->node; - return 0; -} - -static int -cpu_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_thread_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - return 0; -} - -static int -cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->node != y->node) - return x->node - y->node; - if (x->core != y->core) - return x->core - y->core; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->core != y->core) - return x->core - y->core; - if (x->processor != y->processor) - return x->processor - y->processor; - return 0; -} - -static int -cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->thread != y->thread) - return x->thread - y->thread; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->core != y->core) - return x->core - y->core; - return 0; -} - -static int -cpu_no_spread_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->node != y->node) - return x->node - y->node; - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->thread != y->thread) - return x->thread - y->thread; - return 0; -} - -static ERTS_INLINE void -make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node) -{ - int ix; - int node = -1; - int processor = -1; - int processor_node = -1; - int processor_node_node = -1; - int core = -1; - int thread = -1; - int old_node = -1; - int old_processor = -1; - int old_processor_node = -1; - int old_core = -1; - int old_thread = -1; - - for (ix = 0; ix < size; ix++) { - if (!no_node || cpudata[ix].node >= 0) { - if (old_node == cpudata[ix].node) - cpudata[ix].node = node; - else { - old_node = cpudata[ix].node; - old_processor = processor = -1; - if (!no_node) - old_processor_node = processor_node = -1; - old_core = core = -1; - old_thread = thread = -1; - if (no_node || cpudata[ix].node >= 0) - cpudata[ix].node = ++node; - } - } - if (old_processor == cpudata[ix].processor) - cpudata[ix].processor = processor; - else { - old_processor = cpudata[ix].processor; - if (!no_node) - processor_node_node = old_processor_node = processor_node = -1; - old_core = core = -1; - old_thread = thread = -1; - cpudata[ix].processor = ++processor; - } - if (no_node && cpudata[ix].processor_node < 0) - old_processor_node = -1; - else { - if (old_processor_node == cpudata[ix].processor_node) { - if (no_node) - cpudata[ix].node = cpudata[ix].processor_node = node; - else { - if (processor_node_node >= 0) - cpudata[ix].node = processor_node_node; - cpudata[ix].processor_node = processor_node; - } - } - else { - old_processor_node = cpudata[ix].processor_node; - old_core = core = -1; - old_thread = thread = -1; - if (no_node) - cpudata[ix].node = cpudata[ix].processor_node = ++node; - else { - cpudata[ix].node = processor_node_node = ++node; - cpudata[ix].processor_node = ++processor_node; - } - } - } - if (!no_node && cpudata[ix].processor_node < 0) - cpudata[ix].processor_node = 0; - if (old_core == cpudata[ix].core) - cpudata[ix].core = core; - else { - old_core = cpudata[ix].core; - old_thread = thread = -1; - cpudata[ix].core = ++core; - } - if (old_thread == cpudata[ix].thread) - cpudata[ix].thread = thread; - else - old_thread = cpudata[ix].thread = ++thread; - } -} - -static void -cpu_bind_order_sort(erts_cpu_topology_t *cpudata, - int size, - ErtsCpuBindOrder bind_order, - int mk_seq) -{ - if (size > 1) { - int no_node = 0; - int (*cmp_func)(const void *, const void *); - switch (bind_order) { - case ERTS_CPU_BIND_SPREAD: - cmp_func = cpu_spread_order_cmp; - break; - case ERTS_CPU_BIND_PROCESSOR_SPREAD: - cmp_func = cpu_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_THREAD_SPREAD: - cmp_func = cpu_thread_spread_order_cmp; - break; - case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: - no_node = 1; - cmp_func = cpu_thread_no_node_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: - no_node = 1; - cmp_func = cpu_no_node_processor_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: - no_node = 1; - cmp_func = cpu_no_node_thread_spread_order_cmp; - break; - case ERTS_CPU_BIND_NO_SPREAD: - cmp_func = cpu_no_spread_order_cmp; - break; - default: - cmp_func = NULL; - erl_exit(ERTS_ABORT_EXIT, - "Bad cpu bind type: %d\n", - (int) cpu_bind_order); - break; - } - - if (mk_seq) - make_cpudata_id_seq(cpudata, size, no_node); - - qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func); - } -} - -static int -processor_order_cmp(const void *vx, const void *vy) -{ - erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx; - erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy; - - if (x->processor != y->processor) - return x->processor - y->processor; - if (x->node != y->node) - return x->node - y->node; - if (x->processor_node != y->processor_node) - return x->processor_node - y->processor_node; - if (x->core != y->core) - return x->core - y->core; - if (x->thread != y->thread) - return x->thread - y->thread; - return 0; -} - -static void -check_cpu_bind(ErtsSchedulerData *esdp) -{ - int res; - int cpu_id; - erts_smp_runq_unlock(esdp->run_queue); - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - cpu_id = scheduler2cpu_map[esdp->no].bind_id; - if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) { - res = erts_bind_to_cpu(erts_cpuinfo, cpu_id); - if (res == 0) - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id; - else { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n", - (int) esdp->no, cpu_id, erl_errno_id(-res)); - erts_send_error_to_logger_nogl(dsbufp); - if (scheduler2cpu_map[esdp->no].bound_id >= 0) - goto unbind; - } - } - else if (cpu_id < 0 && scheduler2cpu_map[esdp->no].bound_id >= 0) { - unbind: - /* Get rid of old binding */ - res = erts_unbind_from_cpu(erts_cpuinfo); - if (res == 0) - esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1; - else { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n", - (int) esdp->no, cpu_id, erl_errno_id(-res)); - erts_send_error_to_logger_nogl(dsbufp); - } - } - erts_smp_runq_lock(esdp->run_queue); -#ifdef ERTS_SMP - if (erts_common_run_queue) - erts_smp_atomic_set(&esdp->chk_cpu_bind, 0); - else { - esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; - } -#endif - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - -} - -static void -signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size) -{ - int s_ix = 1; - int cpu_ix; - - if (cpu_bind_order != ERTS_CPU_BIND_NONE) { - - cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1); - - for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++) - if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical)) - scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical; - } - - if (s_ix <= erts_no_schedulers) - for (; s_ix <= erts_no_schedulers; s_ix++) - scheduler2cpu_map[s_ix].bind_id = -1; - -#ifdef ERTS_SMP - if (erts_common_run_queue) { - for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++) - erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1); - wake_all_schedulers(); - } - else { - ERTS_FOREACH_RUNQ(rq, - { - rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND; - wake_scheduler(rq, 0); - }); - } -#else - check_cpu_bind(erts_get_scheduler_data()); -#endif -} - -int -erts_init_scheduler_bind_type(char *how) -{ - if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) - return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED; - - if (!system_cpudata && !user_cpudata) - return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY; - - if (sys_strcmp(how, "s") == 0) - cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (sys_strcmp(how, "ps") == 0) - cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "ts") == 0) - cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (sys_strcmp(how, "db") == 0 - || sys_strcmp(how, "tnnps") == 0) - cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "nnps") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (sys_strcmp(how, "nnts") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (sys_strcmp(how, "ns") == 0) - cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (sys_strcmp(how, "u") == 0) - cpu_bind_order = ERTS_CPU_BIND_NONE; - else - return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE; - - return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS; -} - -typedef struct { - int *id; - int used; - int size; -} ErtsCpuTopIdSeq; - -typedef struct { - ErtsCpuTopIdSeq logical; - ErtsCpuTopIdSeq thread; - ErtsCpuTopIdSeq core; - ErtsCpuTopIdSeq processor_node; - ErtsCpuTopIdSeq processor; - ErtsCpuTopIdSeq node; -} ErtsCpuTopEntry; - -static void -init_cpu_top_entry(ErtsCpuTopEntry *cte) -{ - int size = 10; - cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->logical.size = size; - cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->thread.size = size; - cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->core.size = size; - cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->processor_node.size = size; - cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->processor.size = size; - cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS, - sizeof(int)*size); - cte->node.size = size; -} - -static void -destroy_cpu_top_entry(ErtsCpuTopEntry *cte) -{ - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id); - erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id); -} - -static int -get_cput_value_or_range(int *v, int *vr, char **str) -{ - long l; - char *c = *str; - errno = 0; - if (!isdigit((unsigned char)*c)) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; - l = strtol(c, &c, 10); - if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID; - *v = (int) l; - if (*c == '-') { - c++; - if (!isdigit((unsigned char)*c)) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - l = strtol(c, &c, 10); - if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - *vr = (int) l; - } - *str = c; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str) -{ - int ix = 0; - int need_size = 0; - char *c = *str; - - while (1) { - int res; - int val; - int nids; - int val_range = -1; - res = get_cput_value_or_range(&val, &val_range, &c); - if (res != ERTS_INIT_CPU_TOPOLOGY_OK) - return res; - if (val_range < 0 || val_range == val) - nids = 1; - else { - if (val_range > val) - nids = val_range - val + 1; - else - nids = val - val_range + 1; - } - need_size += nids; - if (need_size > idseq->size) { - idseq->size = need_size + 10; - idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS, - idseq->id, - sizeof(int)*idseq->size); - } - if (nids == 1) - idseq->id[ix++] = val; - else if (val_range > val) { - for (; val <= val_range; val++) - idseq->id[ix++] = val; - } - else { - for (; val >= val_range; val--) - idseq->id[ix++] = val; - } - if (*c != ',') - break; - c++; - } - *str = c; - idseq->used = ix; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -get_cput_entry(ErtsCpuTopEntry *cput, char **str) -{ - int h; - char *c = *str; - - cput->logical.used = 0; - cput->thread.id[0] = 0; - cput->thread.used = 1; - cput->core.id[0] = 0; - cput->core.used = 1; - cput->processor_node.id[0] = -1; - cput->processor_node.used = 1; - cput->processor.id[0] = 0; - cput->processor.used = 1; - cput->node.id[0] = -1; - cput->node.used = 1; - - h = ERTS_TOPOLOGY_MAX_DEPTH; - while (*c != ':' && *c != '\0') { - int res; - ErtsCpuTopIdSeq *idseqp; - switch (*c++) { - case 'L': - if (h <= ERTS_TOPOLOGY_LOGICAL) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->logical; - h = ERTS_TOPOLOGY_LOGICAL; - break; - case 't': - case 'T': - if (h <= ERTS_TOPOLOGY_THREAD) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->thread; - h = ERTS_TOPOLOGY_THREAD; - break; - case 'c': - case 'C': - if (h <= ERTS_TOPOLOGY_CORE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->core; - h = ERTS_TOPOLOGY_CORE; - break; - case 'p': - case 'P': - if (h <= ERTS_TOPOLOGY_PROCESSOR) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->processor; - h = ERTS_TOPOLOGY_PROCESSOR; - break; - case 'n': - case 'N': - if (h <= ERTS_TOPOLOGY_PROCESSOR) { - do_node: - if (h <= ERTS_TOPOLOGY_NODE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->node; - h = ERTS_TOPOLOGY_NODE; - } - else { - int p_node = 0; - char *p_chk = c; - while (*p_chk != '\0' && *p_chk != ':') { - if (*p_chk == 'p' || *p_chk == 'P') { - p_node = 1; - break; - } - p_chk++; - } - if (!p_node) - goto do_node; - if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY; - idseqp = &cput->processor_node; - h = ERTS_TOPOLOGY_PROCESSOR_NODE; - } - break; - default: - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE; - } - res = get_cput_id_seq(idseqp, &c); - if (res != ERTS_INIT_CPU_TOPOLOGY_OK) - return res; - } - - if (cput->logical.used < 1) - return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID; - - if (*c == ':') { - c++; - } - - if (cput->thread.used != 1 - && cput->thread.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->core.used != 1 - && cput->core.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->processor_node.used != 1 - && cput->processor_node.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->processor.used != 1 - && cput->processor.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - if (cput->node.used != 1 - && cput->node.used != cput->logical.used) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE; - - *str = c; - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -static int -verify_topology(erts_cpu_topology_t *cpudata, int size) -{ - if (size > 0) { - int *logical; - int node, processor, no_nodes, i; - - /* Verify logical ids */ - logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size); - - for (i = 0; i < user_cpudata_size; i++) - logical[i] = user_cpudata[i].logical; - - qsort(logical, user_cpudata_size, sizeof(int), int_cmp); - for (i = 0; i < user_cpudata_size-1; i++) { - if (logical[i] == logical[i+1]) { - erts_free(ERTS_ALC_T_TMP, logical); - return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS; - } - } - - erts_free(ERTS_ALC_T_TMP, logical); - - qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp); - - /* Verify unique entities */ - - for (i = 1; i < user_cpudata_size; i++) { - if (user_cpudata[i-1].processor == user_cpudata[i].processor - && user_cpudata[i-1].node == user_cpudata[i].node - && (user_cpudata[i-1].processor_node - == user_cpudata[i].processor_node) - && user_cpudata[i-1].core == user_cpudata[i].core - && user_cpudata[i-1].thread == user_cpudata[i].thread) { - return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES; - } - } - - /* Verify numa nodes */ - node = cpudata[0].node; - processor = cpudata[0].processor; - no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0; - for (i = 1; i < size; i++) { - if (no_nodes) { - if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - } - else { - if (cpudata[i].processor == processor && cpudata[i].node != node) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - node = cpudata[i].node; - processor = cpudata[i].processor; - if (node >= 0 && cpudata[i].processor_node >= 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - if (node < 0 && cpudata[i].processor_node < 0) - return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES; - } - } - } - - return ERTS_INIT_CPU_TOPOLOGY_OK; -} - -int -erts_init_cpu_topology(char *topology_str) -{ - ErtsCpuTopEntry cput; - int need_size; - char *c; - int ix; - int error = ERTS_INIT_CPU_TOPOLOGY_OK; - - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata_size = 10; - - user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - - init_cpu_top_entry(&cput); - - ix = 0; - need_size = 0; - - c = topology_str; - if (*c == '\0') { - error = ERTS_INIT_CPU_TOPOLOGY_MISSING; - goto fail; - } - do { - int r; - error = get_cput_entry(&cput, &c); - if (error != ERTS_INIT_CPU_TOPOLOGY_OK) - goto fail; - need_size += cput.logical.used; - if (user_cpudata_size < need_size) { - user_cpudata_size = need_size + 10; - user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, - user_cpudata, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - } - - ASSERT(cput.thread.used == 1 - || cput.thread.used == cput.logical.used); - ASSERT(cput.core.used == 1 - || cput.core.used == cput.logical.used); - ASSERT(cput.processor_node.used == 1 - || cput.processor_node.used == cput.logical.used); - ASSERT(cput.processor.used == 1 - || cput.processor.used == cput.logical.used); - ASSERT(cput.node.used == 1 - || cput.node.used == cput.logical.used); - - for (r = 0; r < cput.logical.used; r++) { - user_cpudata[ix].logical = cput.logical.id[r]; - user_cpudata[ix].thread = - cput.thread.id[cput.thread.used == 1 ? 0 : r]; - user_cpudata[ix].core = - cput.core.id[cput.core.used == 1 ? 0 : r]; - user_cpudata[ix].processor_node = - cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r]; - user_cpudata[ix].processor = - cput.processor.id[cput.processor.used == 1 ? 0 : r]; - user_cpudata[ix].node = - cput.node.id[cput.node.used == 1 ? 0 : r]; - ix++; - } - } while (*c != '\0'); - - if (user_cpudata_size != ix) { - user_cpudata_size = ix; - user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA, - user_cpudata, - (sizeof(erts_cpu_topology_t) - * user_cpudata_size)); - } - - error = verify_topology(user_cpudata, user_cpudata_size); - if (error == ERTS_INIT_CPU_TOPOLOGY_OK) { - destroy_cpu_top_entry(&cput); - return ERTS_INIT_CPU_TOPOLOGY_OK; - } - - fail: - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata_size = 0; - destroy_cpu_top_entry(&cput); - return error; -} - -#define ERTS_GET_CPU_TOPOLOGY_ERROR -1 -#define ERTS_GET_USED_CPU_TOPOLOGY 0 -#define ERTS_GET_DETECTED_CPU_TOPOLOGY 1 -#define ERTS_GET_DEFINED_CPU_TOPOLOGY 2 - -static Eterm get_cpu_topology_term(Process *c_p, int type); - -Eterm -erts_set_cpu_topology(Process *c_p, Eterm term) -{ - erts_cpu_topology_t *cpudata = NULL; - int cpudata_size = 0; - Eterm res; - - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY); - if (term == am_undefined) { - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata = NULL; - user_cpudata_size = 0; - - if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) { - cpudata_size = system_cpudata_size; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - - sys_memcpy((void *) cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*cpudata_size); - } - } - else if (is_not_list(term)) { - error: - res = THE_NON_VALUE; - goto done; - } - else { - Eterm list = term; - int ix = 0; - - cpudata_size = 100; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - - while (is_list(list)) { - Eterm *lp = list_val(list); - Eterm cpu = CAR(lp); - Eterm* tp; - Sint id; - - if (is_not_tuple(cpu)) - goto error; - - tp = tuple_val(cpu); - - if (arityval(tp[0]) != 7 || tp[1] != am_cpu) - goto error; - - if (ix >= cpudata_size) { - cpudata_size += 100; - cpudata = erts_realloc(ERTS_ALC_T_TMP, - cpudata, - (sizeof(erts_cpu_topology_t) - * cpudata_size)); - } - - id = signed_val(tp[2]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].node = (int) id; - - id = signed_val(tp[3]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].processor = (int) id; - - id = signed_val(tp[4]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].processor_node = (int) id; - - id = signed_val(tp[5]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].core = (int) id; - - id = signed_val(tp[6]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].thread = (int) id; - - id = signed_val(tp[7]); - if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id) - goto error; - cpudata[ix].logical = (int) id; - - list = CDR(lp); - ix++; - } - - if (is_not_nil(list)) - goto error; - - cpudata_size = ix; - - if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size)) - goto error; - - if (user_cpudata_size != cpudata_size) { - if (user_cpudata) - erts_free(ERTS_ALC_T_CPUDATA, user_cpudata); - user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - sizeof(erts_cpu_topology_t)*cpudata_size); - user_cpudata_size = cpudata_size; - } - - sys_memcpy((void *) user_cpudata, - (void *) cpudata, - sizeof(erts_cpu_topology_t)*cpudata_size); - } - - signal_schedulers_bind_change(cpudata, cpudata_size); - - done: - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - if (cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); - - return res; -} - -static Eterm -bound_schedulers_term(ErtsCpuBindOrder order) -{ - switch (order) { - case ERTS_CPU_BIND_SPREAD: { - ERTS_DECL_AM(spread); - return AM_spread; - } - case ERTS_CPU_BIND_PROCESSOR_SPREAD: { - ERTS_DECL_AM(processor_spread); - return AM_processor_spread; - } - case ERTS_CPU_BIND_THREAD_SPREAD: { - ERTS_DECL_AM(thread_spread); - return AM_thread_spread; - } - case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: { - ERTS_DECL_AM(thread_no_node_processor_spread); - return AM_thread_no_node_processor_spread; - } - case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: { - ERTS_DECL_AM(no_node_processor_spread); - return AM_no_node_processor_spread; - } - case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: { - ERTS_DECL_AM(no_node_thread_spread); - return AM_no_node_thread_spread; - } - case ERTS_CPU_BIND_NO_SPREAD: { - ERTS_DECL_AM(no_spread); - return AM_no_spread; - } - case ERTS_CPU_BIND_NONE: { - ERTS_DECL_AM(unbound); - return AM_unbound; - } - default: - ASSERT(0); - return THE_NON_VALUE; - } -} - -Eterm -erts_bound_schedulers_term(Process *c_p) -{ - ErtsCpuBindOrder order; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - order = cpu_bind_order; - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return bound_schedulers_term(order); -} - -static void -create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size) -{ - if (user_cpudata) { - *cpudata_size = user_cpudata_size; - *cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * (*cpudata_size))); - sys_memcpy((void *) *cpudata, - (void *) user_cpudata, - sizeof(erts_cpu_topology_t)*(*cpudata_size)); - } - else if (system_cpudata) { - *cpudata_size = system_cpudata_size; - *cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * (*cpudata_size))); - sys_memcpy((void *) *cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*(*cpudata_size)); - } - else { - *cpudata = NULL; - *cpudata_size = 0; - } -} - -static void -destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata) -{ - if (cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); -} - -Eterm -erts_bind_schedulers(Process *c_p, Eterm how) -{ - Eterm res; - erts_cpu_topology_t *cpudata; - int cpudata_size; - ErtsCpuBindOrder old_cpu_bind_order; - - erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx); - - if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) { - ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP); - } - else { - - old_cpu_bind_order = cpu_bind_order; - - if (ERTS_IS_ATOM_STR("spread", how)) - cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (ERTS_IS_ATOM_STR("processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("default_bind", how) - || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("no_spread", how)) - cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (ERTS_IS_ATOM_STR("unbound", how)) - cpu_bind_order = ERTS_CPU_BIND_NONE; - else { - cpu_bind_order = old_cpu_bind_order; - ERTS_BIF_PREP_ERROR(res, c_p, BADARG); - goto done; - } - - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - - if (!cpudata) { - cpu_bind_order = old_cpu_bind_order; - ERTS_BIF_PREP_ERROR(res, c_p, BADARG); - goto done; - } - - signal_schedulers_bind_change(cpudata, cpudata_size); - - destroy_tmp_cpu_topology_copy(cpudata); - - res = bound_schedulers_term(old_cpu_bind_order); - } - - done: - - erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx); - - return res; -} - -Eterm -erts_fake_scheduler_bindings(Process *p, Eterm how) -{ - ErtsCpuBindOrder fake_cpu_bind_order; - erts_cpu_topology_t *cpudata; - int cpudata_size; - Eterm res; - - if (ERTS_IS_ATOM_STR("spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD; - else if (ERTS_IS_ATOM_STR("processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("thread_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("default_bind", how) - || ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD; - else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD; - else if (ERTS_IS_ATOM_STR("no_spread", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD; - else if (ERTS_IS_ATOM_STR("unbound", how)) - fake_cpu_bind_order = ERTS_CPU_BIND_NONE; - else { - ERTS_BIF_PREP_ERROR(res, p, BADARG); - return res; - } - - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - - if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE) - ERTS_BIF_PREP_RET(res, am_false); - else { - int i; - Eterm *hp; - - cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1); - -#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA - - erts_fprintf(stderr, "node: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].node); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "processor: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].processor); - erts_fprintf(stderr, "\n"); - if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD - && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD - && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) { - erts_fprintf(stderr, "processor_node:"); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].processor_node); - erts_fprintf(stderr, "\n"); - } - erts_fprintf(stderr, "core: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].core); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "thread: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].thread); - erts_fprintf(stderr, "\n"); - erts_fprintf(stderr, "logical: "); - for (i = 0; i < cpudata_size; i++) - erts_fprintf(stderr, " %2d", cpudata[i].logical); - erts_fprintf(stderr, "\n"); -#endif - - hp = HAlloc(p, cpudata_size+1); - ERTS_BIF_PREP_RET(res, make_tuple(hp)); - *hp++ = make_arityval((Uint) cpudata_size); - for (i = 0; i < cpudata_size; i++) - *hp++ = make_small((Uint) cpudata[i].logical); - } - - destroy_tmp_cpu_topology_copy(cpudata); - - return res; -} - -Eterm -erts_get_schedulers_binds(Process *c_p) -{ - int ix; - ERTS_DECL_AM(unbound); - Eterm *hp = HAlloc(c_p, erts_no_schedulers+1); - Eterm res = make_tuple(hp); - - *(hp++) = make_arityval(erts_no_schedulers); - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - for (ix = 1; ix <= erts_no_schedulers; ix++) - *(hp++) = (scheduler2cpu_map[ix].bound_id >= 0 - ? make_small(scheduler2cpu_map[ix].bound_id) - : AM_unbound); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return res; -} - -static Eterm -bld_topology_term(Eterm **hpp, - Uint *hszp, - erts_cpu_topology_t *cpudata, - int size) -{ - Eterm res = NIL; - int i; - - if (size == 0) - return am_undefined; - - for (i = size-1; i >= 0; i--) { - res = erts_bld_cons(hpp, - hszp, - erts_bld_tuple(hpp, - hszp, - 7, - am_cpu, - make_small(cpudata[i].node), - make_small(cpudata[i].processor), - make_small(cpudata[i].processor_node), - make_small(cpudata[i].core), - make_small(cpudata[i].thread), - make_small(cpudata[i].logical)), - res); - } - return res; -} - -static Eterm -get_cpu_topology_term(Process *c_p, int type) -{ -#ifdef DEBUG - Eterm *hp_end; -#endif - Eterm *hp; - Uint hsz; - Eterm res = THE_NON_VALUE; - erts_cpu_topology_t *cpudata = NULL; - int size = 0; - - switch (type) { - case ERTS_GET_USED_CPU_TOPOLOGY: - if (user_cpudata) - goto defined; - else - goto detected; - case ERTS_GET_DETECTED_CPU_TOPOLOGY: - detected: - if (!system_cpudata) - res = am_undefined; - else { - size = system_cpudata_size; - cpudata = erts_alloc(ERTS_ALC_T_TMP, - (sizeof(erts_cpu_topology_t) - * size)); - sys_memcpy((void *) cpudata, - (void *) system_cpudata, - sizeof(erts_cpu_topology_t)*size); - } - break; - case ERTS_GET_DEFINED_CPU_TOPOLOGY: - defined: - if (!user_cpudata) - res = am_undefined; - else { - size = user_cpudata_size; - cpudata = user_cpudata; - } - break; - default: - erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type); - break; - } - - if (res == am_undefined) { - ASSERT(!cpudata); - return res; - } - - hsz = 0; - - bld_topology_term(NULL, &hsz, - cpudata, size); - - hp = HAlloc(c_p, hsz); - -#ifdef DEBUG - hp_end = hp + hsz; -#endif - - res = bld_topology_term(&hp, NULL, - cpudata, size); - - ASSERT(hp_end == hp); - - if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata) - erts_free(ERTS_ALC_T_TMP, cpudata); - - return res; -} - -Eterm -erts_get_cpu_topology_term(Process *c_p, Eterm which) -{ - Eterm res; - int type; - erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx); - if (ERTS_IS_ATOM_STR("used", which)) - type = ERTS_GET_USED_CPU_TOPOLOGY; - else if (ERTS_IS_ATOM_STR("detected", which)) - type = ERTS_GET_DETECTED_CPU_TOPOLOGY; - else if (ERTS_IS_ATOM_STR("defined", which)) - type = ERTS_GET_DEFINED_CPU_TOPOLOGY; - else - type = ERTS_GET_CPU_TOPOLOGY_ERROR; - if (type == ERTS_GET_CPU_TOPOLOGY_ERROR) - res = THE_NON_VALUE; - else - res = get_cpu_topology_term(c_p, type); - erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx); - return res; -} - -static void -early_cpu_bind_init(void) -{ - user_cpudata = NULL; - user_cpudata_size = 0; - - system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo); - system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(erts_cpu_topology_t) - * system_cpudata_size)); - - cpu_bind_order = ERTS_CPU_BIND_NONE; - - if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata) - || ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata, - system_cpudata_size)) { - erts_free(ERTS_ALC_T_CPUDATA, system_cpudata); - system_cpudata = NULL; - system_cpudata_size = 0; - } -} - -static void -late_cpu_bind_init(void) -{ - int ix; - - erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind"); - - scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA, - (sizeof(ErtsCpuBindData) - * (erts_no_schedulers+1))); - for (ix = 1; ix <= erts_no_schedulers; ix++) { - scheduler2cpu_map[ix].bind_id = -1; - scheduler2cpu_map[ix].bound_id = -1; - } - - if (cpu_bind_order != ERTS_CPU_BIND_NONE) { - erts_cpu_topology_t *cpudata; - int cpudata_size; - create_tmp_cpu_topology_copy(&cpudata, &cpudata_size); - ASSERT(cpudata); - signal_schedulers_bind_change(cpudata, cpudata_size); - destroy_tmp_cpu_topology_copy(cpudata); - } -} - #ifdef ERTS_SMP static void @@ -4481,7 +3638,7 @@ add_pend_suspend(Process *suspendee, sizeof(ErtsPendingSuspend)); psp->next = NULL; #ifdef DEBUG -#ifdef ARCH_64 +#if defined(ARCH_64) && !HALFWORD_HEAP psp->end = (ErtsPendingSuspend *) 0xdeaddeaddeaddead; #else psp->end = (ErtsPendingSuspend *) 0xdeaddead; @@ -5322,7 +4479,7 @@ dequeue_process(ErtsRunQueue *runq, Process *p) } /* schedule a process */ -static ERTS_INLINE void +static ERTS_INLINE ErtsRunQueue * internal_add_to_runq(ErtsRunQueue *runq, Process *p) { Uint32 prev_status = p->status; @@ -5333,12 +4490,12 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p) ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); if (p->status_flags & ERTS_PROC_SFLG_INRUNQ) - return; + return NULL; else if (p->runq_flags & ERTS_PROC_RUNQ_FLG_RUNNING) { ASSERT(p->status != P_SUSPENDED); ERTS_DBG_CHK_PROCS_RUNQ_NOPROC(runq, p); p->status_flags |= ERTS_PROC_SFLG_PENDADD2SCHEDQ; - return; + return NULL; } ASSERT(!p->scheduler_data); #endif @@ -5377,20 +4534,23 @@ internal_add_to_runq(ErtsRunQueue *runq, Process *p) profile_runnable_proc(p, am_active); } - smp_notify_inc_runq(add_runq); - if (add_runq != runq) erts_smp_runq_unlock(add_runq); + + return add_runq; } void erts_add_to_runq(Process *p) { + ErtsRunQueue *notify_runq; ErtsRunQueue *runq = erts_get_runq_proc(p); erts_smp_runq_lock(runq); - internal_add_to_runq(runq, p); + notify_runq = internal_add_to_runq(runq, p); erts_smp_runq_unlock(runq); + smp_notify_inc_runq(notify_runq); + } /* Possibly remove a scheduled process we need to suspend */ @@ -5529,8 +4689,6 @@ erts_proc_migrate(Process *p, ErtsProcLocks *plcks, p->run_queue = to_rq; enqueue_process(to_rq, p); - smp_notify_inc_runq(to_rq); - return ERTS_MIGRATE_SUCCESS; } #endif /* ERTS_SMP */ @@ -5727,30 +4885,6 @@ erts_set_process_priority(Process *p, Eterm new_value) return old_value; } -#ifdef ERTS_SMP - -static ERTS_INLINE int -prepare_for_sys_schedule(void) -{ - while (!erts_port_task_have_outstanding_io_tasks() - && !erts_smp_atomic_xchg(&doing_sys_schedule, 1)) { - if (!erts_port_task_have_outstanding_io_tasks()) - return 1; - erts_smp_atomic_set(&doing_sys_schedule, 0); - } - return 0; -} - -#else - -static ERTS_INLINE int -prepare_for_sys_schedule(void) -{ - return !erts_port_task_have_outstanding_io_tasks(); -} - -#endif - /* note that P_RUNNING is only set so that we don't try to remove ** running processes from the schedule queue if they exit - a running ** process not being in the schedule queue!! @@ -5839,6 +4973,9 @@ Process *schedule(Process *p, int calls) } if (IS_TRACED(p)) { + if (IS_TRACED_FL(p, F_TRACE_CALLS) && p->status != P_FREE) { + erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_OUT); + } switch (p->status) { case P_EXITING: if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) @@ -5882,8 +5019,11 @@ Process *schedule(Process *p, int calls) p->status_flags &= ~ERTS_PROC_SFLG_RUNNING; if (p->status_flags & ERTS_PROC_SFLG_PENDADD2SCHEDQ) { + ErtsRunQueue *notify_runq; p->status_flags &= ~ERTS_PROC_SFLG_PENDADD2SCHEDQ; - internal_add_to_runq(rq, p); + notify_runq = internal_add_to_runq(rq, p); + if (notify_runq != rq) + smp_notify_inc_runq(notify_runq); } #endif @@ -5951,21 +5091,33 @@ Process *schedule(Process *p, int calls) | ERTS_RUNQ_FLG_CHK_CPU_BIND | ERTS_RUNQ_FLG_SUSPENDED)) { if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED) - || erts_smp_atomic_read(&esdp->suspended)) { + || (erts_smp_atomic_read(&esdp->ssi->flags) + & ERTS_SSI_FLG_SUSPENDED)) { + ASSERT(erts_smp_atomic_read(&esdp->ssi->flags) + & ERTS_SSI_FLG_SUSPENDED); suspend_scheduler(esdp); } if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) || erts_smp_atomic_read(&esdp->chk_cpu_bind)) { - check_cpu_bind(esdp); + erts_sched_check_cpu_bind(esdp); } } -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - if (esdp->check_children) { - esdp->check_children = 0; - erts_smp_runq_unlock(rq); - erts_check_children(); - erts_smp_runq_lock(rq); +#if defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) \ + || defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) + { + ErtsSchedulerSleepInfo *ssi = esdp->ssi; + long aux_work = erts_smp_atomic_read(&ssi->aux_work); + if (aux_work) { + erts_smp_runq_unlock(rq); +#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK + aux_work = blockable_aux_work(esdp, ssi, aux_work); +#endif +#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK + nonblockable_aux_work(esdp, ssi, aux_work); +#endif + erts_smp_runq_lock(rq); + } } #endif @@ -5997,7 +5149,10 @@ Process *schedule(Process *p, int calls) if (rq->flags & (ERTS_RUNQ_FLG_SHARED_RUNQ | ERTS_RUNQ_FLG_SUSPENDED)) { if ((rq->flags & ERTS_RUNQ_FLG_SUSPENDED) - || erts_smp_atomic_read(&esdp->suspended)) { + || (erts_smp_atomic_read(&esdp->ssi->flags) + & ERTS_SSI_FLG_SUSPENDED)) { + ASSERT(erts_smp_atomic_read(&esdp->ssi->flags) + & ERTS_SSI_FLG_SUSPENDED); non_empty_runq(rq); goto continue_check_activities_to_run; } @@ -6014,17 +5169,7 @@ Process *schedule(Process *p, int calls) } } - if (prepare_for_sys_schedule()) { - erts_smp_atomic_set(&function_calls, 0); - fcalls = 0; - sched_sys_wait(esdp->no, rq); - erts_smp_atomic_set(&doing_sys_schedule, 0); - } - else { - /* If all schedulers are waiting, one of them *should* - be waiting in erl_sys_schedule() */ - sched_cnd_wait(esdp->no, rq); - } + scheduler_wait(&fcalls, esdp, rq); non_empty_runq(rq); @@ -6050,7 +5195,9 @@ Process *schedule(Process *p, int calls) erts_smp_atomic_set(&function_calls, 0); fcalls = 0; + ASSERT(!erts_port_task_have_outstanding_io_tasks()); + #ifdef ERTS_SMP /* erts_sys_schedule_interrupt(0); */ #endif @@ -6081,12 +5228,12 @@ Process *schedule(Process *p, int calls) if (rq->wakeup_other < 0) rq->wakeup_other = 0; } - else if (rq->wakeup_other < ERTS_WAKEUP_OTHER_LIMIT) + else if (rq->wakeup_other < wakeup_other_limit) rq->wakeup_other += rq->len*wo_reds + ERTS_WAKEUP_OTHER_FIXED_INC; else { if (erts_common_run_queue) { if (erts_common_run_queue->waiting) - wake_one_scheduler(); + wake_scheduler(erts_common_run_queue, 0, 1); } else if (erts_smp_atomic_read(&no_empty_run_queues) != 0) { wake_scheduler_on_empty_runq(rq); @@ -6231,10 +5378,10 @@ Process *schedule(Process *p, int calls) erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); if (erts_sched_stat.enabled) { - Uint old = ERTS_PROC_SCHED_ID(p, + UWord old = ERTS_PROC_SCHED_ID(p, (ERTS_PROC_LOCK_MAIN | ERTS_PROC_LOCK_STATUS), - esdp->no); + (UWord) esdp->no); int migrated = old && old != esdp->no; erts_smp_spin_lock(&erts_sched_stat.lock); @@ -6275,7 +5422,11 @@ Process *schedule(Process *p, int calls) trace_virtual_sched(p, am_in); break; } + if (IS_TRACED_FL(p, F_TRACE_CALLS)) { + erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_IN); + } } + if (p->status != P_EXITING) p->status = P_RUNNING; @@ -6378,6 +5529,15 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) ErtsRunQueue *rq = erts_get_runq_current(NULL); ErtsMiscOpList *molp = misc_op_list_alloc(); + if (!rq) { + /* + * This can only happen when the sys msg dispatcher + * thread schedules misc ops (this happens *very* + * seldom; only when trace drivers are unloaded). + */ + rq = ERTS_RUNQ_IX(0); + } + erts_smp_runq_lock(rq); while (rq->misc.evac_runq) { @@ -6397,8 +5557,8 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) else rq->misc.start = molp; rq->misc.end = molp; - smp_notify_inc_runq(rq); erts_smp_runq_unlock(rq); + smp_notify_inc_runq(rq); } static void @@ -6640,7 +5800,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). Eterm args, /* Arguments for function (must be well-formed list). */ ErlSpawnOpts* so) /* Options for spawn. */ { - ErtsRunQueue *rq; + ErtsRunQueue *rq, *notify_runq; Process *p; Sint arity; /* Number of arguments. */ #ifndef HYBRID @@ -6719,11 +5879,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). /* * Must initialize binary lists here before copying binaries to process. */ - p->off_heap.mso = NULL; -#ifndef HYBRID /* FIND ME! */ - p->off_heap.funs = NULL; -#endif - p->off_heap.externals = NULL; + p->off_heap.first = NULL; p->off_heap.overhead = 0; heap_need += @@ -6757,13 +5913,14 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->bin_vheap_sz = p->min_vheap_size; p->bin_old_vheap_sz = p->min_vheap_size; p->bin_old_vheap = 0; + p->bin_vheap_mature = 0; /* No need to initialize p->fcalls. */ p->current = p->initial+INITIAL_MOD; - p->i = (Eterm *) beam_apply; - p->cp = (Eterm *) beam_apply+1; + p->i = (BeamInstr *) beam_apply; + p->cp = (BeamInstr *) beam_apply+1; p->arg_reg = p->def_arg_reg; p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]); @@ -6813,7 +5970,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->group_leader = IS_CONST(parent->group_leader) ? parent->group_leader - : STORE_NC(&p->htop, &p->off_heap.externals, parent->group_leader); + : STORE_NC(&p->htop, &p->off_heap, parent->group_leader); } erts_get_default_tracing(&p->trace_flags, &p->tracer_proc); @@ -6957,10 +6114,12 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). #endif p->status = P_WAITING; - internal_add_to_runq(rq, p); + notify_runq = internal_add_to_runq(rq, p); erts_smp_runq_unlock(rq); + smp_notify_inc_runq(notify_runq); + res = p->id; erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); @@ -7007,6 +6166,7 @@ void erts_init_empty_process(Process *p) p->bin_vheap_sz = BIN_VH_MIN_SIZE; p->bin_old_vheap_sz = BIN_VH_MIN_SIZE; p->bin_old_vheap = 0; + p->bin_vheap_mature = 0; #ifdef ERTS_SMP p->u.ptimer = NULL; p->bound_runq = NULL; @@ -7014,11 +6174,7 @@ void erts_init_empty_process(Process *p) memset(&(p->u.tm), 0, sizeof(ErlTimer)); #endif p->next = NULL; - p->off_heap.mso = NULL; -#ifndef HYBRID /* FIND ME! */ - p->off_heap.funs = NULL; -#endif - p->off_heap.externals = NULL; + p->off_heap.first = NULL; p->off_heap.overhead = 0; p->reg = NULL; p->heap_sz = 0; @@ -7165,11 +6321,7 @@ erts_debug_verify_clean_empty_process(Process* p) /* Thing that erts_cleanup_empty_process() cleans up */ - ASSERT(p->off_heap.mso == NULL); -#ifndef HYBRID /* FIND ME! */ - ASSERT(p->off_heap.funs == NULL); -#endif - ASSERT(p->off_heap.externals == NULL); + ASSERT(p->off_heap.first == NULL); ASSERT(p->off_heap.overhead == 0); ASSERT(p->mbuf == NULL); @@ -7180,25 +6332,16 @@ erts_debug_verify_clean_empty_process(Process* p) void erts_cleanup_empty_process(Process* p) { - ErlHeapFragment* mbufp; - /* We only check fields that are known to be used... */ erts_cleanup_offheap(&p->off_heap); - p->off_heap.mso = NULL; -#ifndef HYBRID /* FIND ME! */ - p->off_heap.funs = NULL; -#endif - p->off_heap.externals = NULL; + p->off_heap.first = NULL; p->off_heap.overhead = 0; - mbufp = p->mbuf; - while (mbufp) { - ErlHeapFragment *next = mbufp->next; - free_message_buffer(mbufp); - mbufp = next; + if (p->mbuf != NULL) { + free_message_buffer(p->mbuf); + p->mbuf = NULL; } - p->mbuf = NULL; #if defined(ERTS_ENABLE_LOCK_COUNT) && defined(ERTS_SMP) erts_lcnt_proc_lock_destroy(p); #endif @@ -7214,7 +6357,6 @@ static void delete_process(Process* p) { ErlMessage* mp; - ErlHeapFragment* bp; VERBOSE(DEBUG_PROCESSES, ("Removing process: %T\n",p->id)); @@ -7230,7 +6372,7 @@ delete_process(Process* p) * The mso list should not be used anymore, but if it is, make sure that * we'll notice. */ - p->off_heap.mso = (void *) 0x8DEFFACD; + p->off_heap.first = (void *) 0x8DEFFACD; if (p->arg_reg != p->def_arg_reg) { erts_free(ERTS_ALC_T_ARG_REG, p->arg_reg); @@ -7264,11 +6406,8 @@ delete_process(Process* p) /* * Free all pending message buffers. */ - bp = p->mbuf; - while (bp != NULL) { - ErlHeapFragment* next_bp = bp->next; - free_message_buffer(bp); - bp = next_bp; + if (p->mbuf != NULL) { + free_message_buffer(p->mbuf); } erts_erase_dicts(p); @@ -7348,7 +6487,7 @@ set_proc_exiting(Process *p, Eterm reason, ErlHeapFragment *bp) p->freason = EXTAG_EXIT; KILL_CATCHES(p); cancel_timer(p); - p->i = (Eterm *) beam_exit; + p->i = (BeamInstr *) beam_exit; } @@ -7778,9 +6917,10 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) erts_port_release(prt); } else if (is_internal_pid(mon->pid)) {/* local by name or pid */ Eterm watched; - Eterm lhp[3]; + DeclareTmpHeapNoproc(lhp,3); ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK | ERTS_PROC_LOCKS_MSG_SEND); + UseTmpHeapNoproc(3); rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks); if (rp == NULL) { goto done; @@ -7795,6 +6935,7 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) erts_queue_monitor_message(rp, &rp_locks, mon->ref, am_process, watched, pcontext->reason); } + UnUseTmpHeapNoproc(3); /* else: demonitor while we exited, i.e. do nothing... */ erts_smp_proc_unlock(rp, rp_locks); } else { /* external by pid or name */ @@ -8025,8 +7166,13 @@ erts_do_exit_process(Process* p, Eterm reason) ERTS_SMP_MSGQ_MV_INQ2PRIVQ(p); #endif - if (IS_TRACED_FL(p,F_TRACE_PROCS)) - trace_proc(p, p, am_exit, reason); + if (IS_TRACED(p)) { + if (IS_TRACED_FL(p, F_TRACE_CALLS)) + erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_EXITING); + + if (IS_TRACED_FL(p,F_TRACE_PROCS)) + trace_proc(p, p, am_exit, reason); + } erts_trace_check_exiting(p->id); @@ -8075,6 +7221,8 @@ continue_exit_process(Process *p Eterm reason = p->fvalue; DistEntry *dep; struct saved_calls *scb; + process_breakpoint_time_t *pbt; + #ifdef DEBUG int yield_allowed = 1; #endif @@ -8214,6 +7362,7 @@ continue_exit_process(Process *p ? ERTS_PROC_SET_DIST_ENTRY(p, ERTS_PROC_LOCKS_ALL, NULL) : NULL); scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, ERTS_PROC_LOCKS_ALL, NULL); + pbt = ERTS_PROC_SET_CALL_TIME(p, ERTS_PROC_LOCKS_ALL, NULL); erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); processes_busy--; @@ -8228,11 +7377,12 @@ continue_exit_process(Process *p * Pre-build the EXIT tuple if there are any links. */ if (lnk) { - Eterm tmp_heap[4]; + DeclareTmpHeap(tmp_heap,4,p); Eterm exit_tuple; Uint exit_tuple_sz; Eterm* hp; + UseTmpHeap(4,p); hp = &tmp_heap[0]; exit_tuple = TUPLE3(hp, am_EXIT, p->id, reason); @@ -8243,16 +7393,21 @@ continue_exit_process(Process *p ExitLinkContext context = {p, reason, exit_tuple, exit_tuple_sz}; erts_sweep_links(lnk, &doit_exit_link, &context); } + UnUseTmpHeap(4,p); } { ExitMonitorContext context = {reason, p}; - erts_sweep_monitors(mon,&doit_exit_monitor,&context); + erts_sweep_monitors(mon,&doit_exit_monitor,&context); /* Allocates TmpHeap, but we + have none here */ } if (scb) erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb); + if (pbt) + erts_free(ERTS_ALC_T_BPD, (void *) pbt); + delete_process(p); erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN); @@ -8271,7 +7426,7 @@ continue_exit_process(Process *p ASSERT(p->status == P_EXITING); - p->i = (Eterm *) beam_continue_exit; + p->i = (BeamInstr *) beam_continue_exit; if (!(curr_locks & ERTS_PROC_LOCK_STATUS)) { erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); @@ -8291,7 +7446,7 @@ continue_exit_process(Process *p static void timeout_proc(Process* p) { - p->i = (Eterm *) p->def_arg_reg[0]; + p->i = *((BeamInstr **) (UWord) p->def_arg_reg); p->flags |= F_TIMO; p->flags &= ~F_INSLPQUEUE; @@ -8390,9 +7545,9 @@ erts_program_counter_info(int to, void *to_arg, Process *p) } static void -print_function_from_pc(int to, void *to_arg, Eterm* x) +print_function_from_pc(int to, void *to_arg, BeamInstr* x) { - Eterm* addr = find_function_from_pc(x); + BeamInstr* addr = find_function_from_pc(x); if (addr == NULL) { if (x == beam_exit) { erts_print(to, to_arg, "<terminate process>"); @@ -8426,7 +7581,7 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg) } if (is_CP(x)) { - erts_print(to, to_arg, "Return addr %p (", (Eterm *) x); + erts_print(to, to_arg, "Return addr %p (", (Eterm *) EXPAND_POINTER(x)); print_function_from_pc(to, to_arg, cp_val(x)); erts_print(to, to_arg, ")\n"); yreg = 0; @@ -9255,8 +8410,8 @@ init_processes_bif(void) processes_trap_export.code[0] = am_erlang; processes_trap_export.code[1] = am_processes_trap; processes_trap_export.code[2] = 2; - processes_trap_export.code[3] = (Eterm) em_apply_bif; - processes_trap_export.code[4] = (Eterm) &processes_trap; + processes_trap_export.code[3] = (BeamInstr) em_apply_bif; + processes_trap_export.code[4] = (BeamInstr) &processes_trap; #if ERTS_PROCESSES_BIF_DEBUGLEVEL >= ERTS_PROCS_DBGLVL_CHK_TERM_PROC_LIST erts_get_emu_time(&debug_tv_start); |