diff options
Diffstat (limited to 'erts/emulator/beam/erl_process.c')
-rw-r--r-- | erts/emulator/beam/erl_process.c | 9336 |
1 files changed, 5372 insertions, 3964 deletions
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index fd02f10540..3c0a126fe2 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -1,18 +1,19 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2014. All Rights Reserved. + * Copyright Ericsson AB 1996-2017. All Rights Reserved. * - * The contents of this file are subject to the Erlang Public License, - * Version 1.1, (the "License"); you may not use this file except in - * compliance with the License. You should have received a copy of the - * Erlang Public License along with this software. If not, it can be - * retrieved online at http://www.erlang.org/. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at * - * Software distributed under the License is distributed on an "AS IS" - * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See - * the License for the specific language governing rights and limitations - * under the License. + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. * * %CopyrightEnd% */ @@ -23,6 +24,8 @@ # include "config.h" #endif +#define ERTS_WANT_BREAK_HANDLING + #include <stddef.h> /* offsetof() */ #include "sys.h" #include "erl_vm.h" @@ -42,9 +45,16 @@ #include "erl_thr_queue.h" #include "erl_async.h" #include "dtrace-wrapper.h" +#include "lttng-wrapper.h" #include "erl_ptab.h" - - +#include "erl_bif_unique.h" +#define ERTS_WANT_TIMER_WHEEL_API +#include "erl_time.h" +#include "erl_nfunc_sched.h" +#include "erl_check_io.h" +#include "erl_poll.h" + +#define ERTS_CHECK_TIME_REDS CONTEXT_REDS #define ERTS_DELAYED_WAKEUP_INFINITY (~(Uint64) 0) #define ERTS_DELAYED_WAKEUP_REDUCTIONS ((Uint64) CONTEXT_REDS/2) @@ -54,11 +64,7 @@ #define ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST (CONTEXT_REDS/10) -#ifndef ERTS_SCHED_MIN_SPIN #define ERTS_SCHED_SPIN_UNTIL_YIELD 100 -#else -#define ERTS_SCHED_SPIN_UNTIL_YIELD 1 -#endif #define ERTS_SCHED_SYS_SLEEP_SPINCOUNT_VERY_LONG 40 #define ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_VERY_LONG 1000 @@ -105,17 +111,39 @@ #define LOW_BIT (1 << PRIORITY_LOW) #define PORT_BIT (1 << ERTS_PORT_PRIO_LEVEL) -#define ERTS_EMPTY_RUNQ(RQ) \ - ((ERTS_RUNQ_FLGS_GET_NOB((RQ)) & ERTS_RUNQ_FLGS_QMASK) == 0 \ - && (RQ)->misc.start == NULL) +#define ERTS_IS_RUNQ_EMPTY_FLGS(FLGS) \ + (!((FLGS) & (ERTS_RUNQ_FLGS_QMASK|ERTS_RUNQ_FLG_MISC_OP))) + +#define ERTS_IS_RUNQ_EMPTY_PORTS_FLGS(FLGS) \ + (!((FLGS) & (PORT_BIT|ERTS_RUNQ_FLG_MISC_OP))) + +#define ERTS_EMPTY_RUNQ(RQ) \ + ERTS_IS_RUNQ_EMPTY_FLGS(ERTS_RUNQ_FLGS_GET_NOB((RQ))) + +#define ERTS_EMPTY_RUNQ_PORTS(RQ) \ + ERTS_IS_RUNQ_EMPTY_FLGS(ERTS_RUNQ_FLGS_GET_NOB((RQ))) + +static ERTS_INLINE int +runq_got_work_to_execute_flags(Uint32 flags) +{ + if (flags & ERTS_RUNQ_FLG_HALTING) + return !ERTS_IS_RUNQ_EMPTY_PORTS_FLGS(flags); + return !ERTS_IS_RUNQ_EMPTY_FLGS(flags); +} + +static ERTS_INLINE int +runq_got_work_to_execute(ErtsRunQueue *rq) +{ + return runq_got_work_to_execute_flags(ERTS_RUNQ_FLGS_GET_NOB(rq)); +} #undef RUNQ_READ_RQ #undef RUNQ_SET_RQ -#define RUNQ_READ_RQ(X) ((ErtsRunQueue *) erts_smp_atomic_read_nob((X))) -#define RUNQ_SET_RQ(X, RQ) erts_smp_atomic_set_nob((X), (erts_aint_t) (RQ)) +#define RUNQ_READ_RQ(X) ((ErtsRunQueue *) erts_atomic_read_nob((X))) +#define RUNQ_SET_RQ(X, RQ) erts_atomic_set_nob((X), (erts_aint_t) (RQ)) #ifdef DEBUG -# if defined(ARCH_64) && !HALFWORD_HEAP +# if defined(ARCH_64) # define ERTS_DBG_SET_INVALID_RUNQP(RQP, N) \ (RUNQ_SET_RQ((RQP), (0xdeadbeefdead0003LL | ((N) << 4))) # define ERTS_DBG_VERIFY_VALID_RUNQP(RQP) \ @@ -139,22 +167,22 @@ do { \ # define ERTS_DBG_VERIFY_VALID_RUNQP(RQP) #endif -#define ERTS_EMPTY_RUNQ_PORTS(RQ) \ - (RUNQ_READ_LEN(&(RQ)->ports.info.len) == 0 && (RQ)->misc.start == NULL) - const Process erts_invalid_process = {{ERTS_INVALID_PID}}; extern BeamInstr beam_apply[]; extern BeamInstr beam_exit[]; extern BeamInstr beam_continue_exit[]; -int erts_sched_compact_load; -int erts_sched_balance_util = 0; -Uint erts_no_schedulers; -#ifdef ERTS_DIRTY_SCHEDULERS -Uint erts_no_dirty_cpu_schedulers; -Uint erts_no_dirty_io_schedulers; -#endif +int ERTS_WRITE_UNLIKELY(erts_default_spo_flags) = SPO_ON_HEAP_MSGQ; +int ERTS_WRITE_UNLIKELY(erts_sched_compact_load); +int ERTS_WRITE_UNLIKELY(erts_sched_balance_util) = 0; +Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers); +Uint ERTS_WRITE_UNLIKELY(erts_no_total_schedulers); +Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers) = 0; +Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers) = 0; + +static char *erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_NO_FLAGS] = {0}; +int erts_aux_work_no_flags = ERTS_SSI_AUX_WORK_NO_FLAGS; #define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_VERY_LAZY (4*1024*1024) #define ERTS_THR_PRGR_LATER_CLEANUP_OP_THRESHOLD_LAZY (512*1024) @@ -167,107 +195,157 @@ static UWord thr_prgr_later_cleanup_op_threshold = ERTS_THR_PRGR_LATER_CLEANUP_O ErtsPTab erts_proc erts_align_attribute(ERTS_CACHE_LINE_SIZE); int erts_sched_thread_suggested_stack_size = -1; - +int erts_dcpu_sched_thread_suggested_stack_size = -1; +int erts_dio_sched_thread_suggested_stack_size = -1; #ifdef ERTS_ENABLE_LOCK_CHECK ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; #endif -static struct { +static struct ErtsSchedBusyWait_ { int aux_work; int tse; int sys_schedule; } sched_busy_wait; -#ifdef ERTS_SMP int erts_disable_proc_not_running_opt; static ErtsAuxWorkData *aux_thread_aux_work_data; +static ErtsAuxWorkData *poll_thread_aux_work_data; -#define ERTS_SCHDLR_SSPND_CHNG_WAITER (((erts_aint32_t) 1) << 0) +#define ERTS_SCHDLR_SSPND_CHNG_NMSB (((erts_aint32_t) 1) << 0) #define ERTS_SCHDLR_SSPND_CHNG_MSB (((erts_aint32_t) 1) << 1) #define ERTS_SCHDLR_SSPND_CHNG_ONLN (((erts_aint32_t) 1) << 2) +#define ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN (((erts_aint32_t) 1) << 3) + +typedef struct ErtsMultiSchedulingBlock_ { + int ongoing; + ErtsProcList *blckrs; + ErtsProcList *chngq; +} ErtsMultiSchedulingBlock; + +typedef struct ErtsSchedTypeCounters_ { + Uint32 normal; + Uint32 dirty_cpu; + Uint32 dirty_io; +} ErtsSchedTypeCounters; + +static struct ErtsSchedSuspend_ { + erts_mtx_t mtx; + ErtsSchedTypeCounters online; + ErtsSchedTypeCounters curr_online; + ErtsSchedTypeCounters active; + erts_atomic32_t changing; + ErtsProcList *chngq; + Eterm changer; + ErtsMultiSchedulingBlock nmsb; /* Normal multi Scheduling Block */ + ErtsMultiSchedulingBlock msb; /* Multi Scheduling Block */ + ErtsSchedType last_msb_dirty_type; +} schdlr_sspnd; -#ifndef DEBUG - -#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.changing, (VAL)) +static void init_scheduler_suspend(void); -#ifdef ERTS_DIRTY_SCHEDULERS -#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_cpu_changing, (VAL)) -#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_io_changing, (VAL)) -#endif - -#else +static ERTS_INLINE Uint32 +schdlr_sspnd_eq_nscheds(ErtsSchedTypeCounters *val1p, ErtsSchedTypeCounters *val2p) +{ + int res = val1p->normal == val2p->normal; + res &= val1p->dirty_cpu == val2p->dirty_cpu; + res &= val1p->dirty_io == val2p->dirty_io; + return res; +} -#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) +static ERTS_INLINE Uint32 +schdlr_sspnd_get_nscheds(ErtsSchedTypeCounters *valp, + ErtsSchedType type) +{ + switch (type) { + case ERTS_SCHED_NORMAL: + return valp->normal; + case ERTS_SCHED_DIRTY_CPU: + return valp->dirty_cpu; + case ERTS_SCHED_DIRTY_IO: + return valp->dirty_io; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + return 0; + } +} -#ifdef ERTS_DIRTY_SCHEDULERS -#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_cpu_changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) -#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_io_changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) +#ifdef DEBUG +static ERTS_INLINE Uint32 +schdlr_sspnd_get_nscheds_tot(ErtsSchedTypeCounters *valp) +{ + Uint32 res = valp->normal; + res += valp->dirty_cpu; + res += valp->dirty_io; + return res; +} #endif -#endif +static ERTS_INLINE void +schdlr_sspnd_dec_nscheds(ErtsSchedTypeCounters *valp, + ErtsSchedType type) +{ + ASSERT(schdlr_sspnd_get_nscheds(valp, type) > 0); + switch (type) { + case ERTS_SCHED_NORMAL: + valp->normal--; + break; + case ERTS_SCHED_DIRTY_CPU: + valp->dirty_cpu--; + break; + case ERTS_SCHED_DIRTY_IO: + valp->dirty_io--; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } +} -static struct { - erts_smp_mtx_t mtx; - erts_smp_cnd_t cnd; - int online; - int curr_online; - int wait_curr_online; -#ifdef ERTS_DIRTY_SCHEDULERS - int dirty_cpu_online; - int dirty_cpu_curr_online; - int dirty_cpu_wait_curr_online; - int dirty_io_online; - int dirty_io_curr_online; - int dirty_io_wait_curr_online; -#endif - erts_smp_atomic32_t changing; - erts_smp_atomic32_t active; -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_atomic32_t dirty_cpu_changing; - erts_smp_atomic32_t dirty_cpu_active; - erts_smp_atomic32_t dirty_io_changing; - erts_smp_atomic32_t dirty_io_active; -#endif - struct { - int ongoing; - long wait_active; -#ifdef ERTS_DIRTY_SCHEDULERS - long dirty_cpu_wait_active; - long dirty_io_wait_active; -#endif - ErtsProcList *procs; - } msb; /* Multi Scheduling Block */ -} schdlr_sspnd; +static ERTS_INLINE void +schdlr_sspnd_inc_nscheds(ErtsSchedTypeCounters *valp, + ErtsSchedType type) +{ + switch (type) { + case ERTS_SCHED_NORMAL: + valp->normal++; + break; + case ERTS_SCHED_DIRTY_CPU: + valp->dirty_cpu++; + break; + case ERTS_SCHED_DIRTY_IO: + valp->dirty_io++; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } +} + +static ERTS_INLINE void +schdlr_sspnd_set_nscheds(ErtsSchedTypeCounters *valp, + ErtsSchedType type, Uint32 no) +{ + switch (type) { + case ERTS_SCHED_NORMAL: + valp->normal = no; + break; + case ERTS_SCHED_DIRTY_CPU: + valp->dirty_cpu = no; + break; + case ERTS_SCHED_DIRTY_IO: + valp->dirty_io = no; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } +} static struct { - erts_smp_mtx_t update_mtx; - erts_smp_atomic32_t no_runqs; + erts_mtx_t update_mtx; + erts_atomic32_t no_runqs; int last_active_runqs; int forced_check_balance; - erts_smp_atomic32_t checking_balance; + erts_atomic32_t checking_balance; int halftime; int full_reds_history_index; struct { @@ -285,35 +363,69 @@ do { \ balance_info.prev_rise.reds = (REDS); \ } while (0) -#endif erts_sched_stat_t erts_sched_stat; -#ifdef USE_THREADS -static erts_tsd_key_t sched_data_key; -#endif +static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key); -static erts_smp_atomic32_t function_calls; - -#ifdef ERTS_SMP -static erts_smp_atomic32_t doing_sys_schedule; -static erts_smp_atomic32_t no_empty_run_queues; +static erts_atomic32_t no_empty_run_queues; long erts_runq_supervision_interval = 0; static ethr_event runq_supervision_event; static erts_tid_t runq_supervisor_tid; static erts_atomic_t runq_supervisor_sleeping; -#else /* !ERTS_SMP */ -ErtsSchedulerData *erts_scheduler_data; -#endif -ErtsAlignedRunQueue *erts_aligned_run_queues; -Uint erts_no_run_queues; +ErtsAlignedRunQueue * ERTS_WRITE_UNLIKELY(erts_aligned_run_queues); +Uint ERTS_WRITE_UNLIKELY(erts_no_run_queues); -ErtsAlignedSchedulerData *erts_aligned_scheduler_data; -#ifdef ERTS_DIRTY_SCHEDULERS -ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data; -ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data; -#endif + +struct { + union { + erts_atomic32_t active; + char align__[ERTS_CACHE_LINE_SIZE]; + } cpu; + union { + erts_atomic32_t active; + char align__[ERTS_CACHE_LINE_SIZE]; + } io; +} dirty_count erts_align_attribute(ERTS_CACHE_LINE_SIZE); + + +static ERTS_INLINE void +dirty_active(ErtsSchedulerData *esdp, erts_aint32_t add) +{ + erts_aint32_t val; + erts_atomic32_t *ap; + switch (esdp->type) { + case ERTS_SCHED_DIRTY_CPU: + ap = &dirty_count.cpu.active; + break; + case ERTS_SCHED_DIRTY_IO: + ap = &dirty_count.io.active; + break; + default: + ap = NULL; + ERTS_INTERNAL_ERROR("Not a dirty scheduler"); + break; + } + + /* + * All updates done under run-queue lock, so + * no inc or dec needed... + */ + ERTS_LC_ASSERT(erts_lc_runq_is_locked(esdp->run_queue)); + + val = erts_atomic32_read_nob(ap); + val += add; + erts_atomic32_set_nob(ap, val); +} + +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_scheduler_data); +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_cpu_scheduler_data); +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_io_scheduler_data); +typedef union { + Process dsp; + char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(Process))]; +} ErtsAlignedDirtyShadowProcess; typedef union { ErtsSchedulerSleepInfo ssi; @@ -321,33 +433,35 @@ typedef union { } ErtsAlignedSchedulerSleepInfo; static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info; -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP static ErtsAlignedSchedulerSleepInfo *aligned_dirty_cpu_sched_sleep_info; static ErtsAlignedSchedulerSleepInfo *aligned_dirty_io_sched_sleep_info; -#endif -#endif +static ErtsAlignedSchedulerSleepInfo *aligned_poll_thread_sleep_info; static Uint last_reductions; static Uint last_exact_reductions; -Uint erts_default_process_flags; -Eterm erts_system_monitor; -Eterm erts_system_monitor_long_gc; -Uint erts_system_monitor_long_schedule; -Eterm erts_system_monitor_large_heap; +Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor); +Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_long_gc); +Uint ERTS_WRITE_UNLIKELY(erts_system_monitor_long_schedule); +Eterm ERTS_WRITE_UNLIKELY(erts_system_monitor_large_heap); struct erts_system_monitor_flags_t erts_system_monitor_flags; /* system performance monitor */ Eterm erts_system_profile; struct erts_system_profile_flags_t erts_system_profile_flags; +int erts_system_profile_ts_type = ERTS_TRACE_FLG_NOW_TIMESTAMP; #if ERTS_MAX_PROCESSES > 0x7fffffff #error "Need to store process_count in another type" #endif typedef enum { - ERTS_PSTT_GC, /* Garbage Collect */ - ERTS_PSTT_CPC /* Check Process Code */ + ERTS_PSTT_GC_MAJOR, /* Garbage Collect: Fullsweep */ + ERTS_PSTT_GC_MINOR, /* Garbage Collect: Generational */ + ERTS_PSTT_CPC, /* Check Process Code */ + ERTS_PSTT_CLA, /* Copy Literal Area */ + ERTS_PSTT_COHMQ, /* Change off heap message queue */ + ERTS_PSTT_FTMQ, /* Flush trace msg queue */ + ERTS_PSTT_ETS_FREE_FIXATION } ErtsProcSysTaskType; #define ERTS_MAX_PROC_SYS_TASK_ARGS 2 @@ -389,11 +503,14 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, 200, ERTS_ALC_T_PROC_LIST) +#define ERTS_POLL_THREAD_SLEEP_INFO_IX(IX) \ + (ASSERT(0 <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_poll_threads)), \ + &aligned_poll_thread_sleep_info[(IX)].ssi) #define ERTS_SCHED_SLEEP_INFO_IX(IX) \ - (ASSERT(-1 <= ((int) (IX)) \ - && ((int) (IX)) < ((int) erts_no_schedulers)), \ + (ASSERT(((int)-1) <= ((int) (IX)) \ + && ((int) (IX)) < ((int) erts_no_schedulers)), \ &aligned_sched_sleep_info[(IX)].ssi) -#ifdef ERTS_DIRTY_SCHEDULERS #define ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(IX) \ (ASSERT(0 <= ((int) (IX)) \ && ((int) (IX)) < ((int) erts_no_dirty_cpu_schedulers)), \ @@ -402,7 +519,6 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist, (ASSERT(0 <= ((int) (IX)) \ && ((int) (IX)) < ((int) erts_no_dirty_io_schedulers)), \ &aligned_dirty_io_sched_sleep_info[(IX)].ssi) -#endif #define ERTS_FOREACH_RUNQ(RQVAR, DO) \ do { \ @@ -410,9 +526,9 @@ do { \ int ix__; \ for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \ RQVAR = ERTS_RUNQ_IX(ix__); \ - erts_smp_runq_lock(RQVAR); \ + erts_runq_lock(RQVAR); \ { DO; } \ - erts_smp_runq_unlock(RQVAR); \ + erts_runq_unlock(RQVAR); \ } \ } while (0) @@ -420,44 +536,50 @@ do { \ do { \ ErtsRunQueue *RQVAR; \ int ix__; \ - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&schdlr_sspnd.mtx)); \ - for (ix__ = 0; ix__ < schdlr_sspnd.online; ix__++) { \ + int online__ = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, \ + ERTS_SCHED_NORMAL); \ + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&schdlr_sspnd.mtx)); \ + for (ix__ = 0; ix__ < online__; ix__++) { \ RQVAR = ERTS_RUNQ_IX(ix__); \ - erts_smp_runq_lock(RQVAR); \ + erts_runq_lock(RQVAR); \ { DO; } \ - erts_smp_runq_unlock(RQVAR); \ + erts_runq_unlock(RQVAR); \ } \ } while (0) -#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, DOX) \ +#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, NRQS, DO, DOX) \ do { \ ErtsRunQueue *RQVAR; \ + int nrqs = (NRQS); \ int ix__; \ - for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \ + for (ix__ = 0; ix__ < nrqs; ix__++) { \ RQVAR = ERTS_RUNQ_IX(ix__); \ - erts_smp_runq_lock(RQVAR); \ + erts_runq_lock(RQVAR); \ { DO; } \ } \ { DOX; } \ - for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) \ - erts_smp_runq_unlock(ERTS_RUNQ_IX(ix__)); \ + for (ix__ = 0; ix__ < nrqs; ix__++) \ + erts_runq_unlock(ERTS_RUNQ_IX(ix__)); \ } while (0) -#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \ - ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, ) +#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \ + ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, DO, ) + +#define ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(RQVAR, DO) \ + ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues, DO, ) + + /* * Local functions. */ static void exec_misc_ops(ErtsRunQueue *); -static void print_function_from_pc(int to, void *to_arg, BeamInstr* x); -static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, - int yreg); +static void print_function_from_pc(fmtfn_t to, void *to_arg, BeamInstr* x); +static int stack_element_dump(fmtfn_t to, void *to_arg, Eterm* sp, int yreg); static void aux_work_timeout(void *unused); static void aux_work_timeout_early_init(int no_schedulers); -static void aux_work_timeout_late_init(void); -static void setup_aux_work_timer(void); +static void setup_aux_work_timer(ErtsSchedulerData *esdp); static int execute_sys_tasks(Process *c_p, erts_aint32_t *statep, @@ -478,29 +600,27 @@ dbg_chk_aux_work_val(erts_aint32_t value) valid |= ERTS_SSI_AUX_WORK_MISC; valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM; valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC; -#if ERTS_USE_ASYNC_READY_Q valid |= ERTS_SSI_AUX_WORK_ASYNC_READY; valid |= ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN; -#endif -#ifdef ERTS_SMP valid |= ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP; valid |= ERTS_SSI_AUX_WORK_MISC_THR_PRGR; valid |= ERTS_SSI_AUX_WORK_DD; valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR; + valid |= ERTS_SSI_AUX_WORK_CNCLD_TMRS; + valid |= ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR; valid |= ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP; -#endif + valid |= ERTS_SSI_AUX_WORK_PENDING_EXITERS; #if HAVE_ERTS_MSEG valid |= ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK; #endif -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - valid |= ERTS_SSI_AUX_WORK_CHECK_CHILDREN; -#endif #ifdef ERTS_SSI_AUX_WORK_REAP_PORTS valid |= ERTS_SSI_AUX_WORK_REAP_PORTS; #endif + valid |= ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED; + valid |= ERTS_SSI_AUX_WORK_YIELD; if (~valid & value) - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Invalid aux_work value found: 0x%x\n", ~valid & value); } @@ -511,16 +631,14 @@ dbg_chk_aux_work_val(erts_aint32_t value) #define ERTS_DBG_CHK_SSI_AUX_WORK(SSI) #endif -#ifdef ERTS_SMP -static void handle_pending_exiters(ErtsProcList *); +static void do_handle_pending_exiters(ErtsProcList *); static void wake_scheduler(ErtsRunQueue *rq); -#endif -#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) +#if defined(ERTS_ENABLE_LOCK_CHECK) int -erts_smp_lc_runq_is_locked(ErtsRunQueue *runq) +erts_lc_runq_is_locked(ErtsRunQueue *runq) { - return erts_smp_lc_mtx_is_locked(&runq->mtx); + return erts_lc_mtx_is_locked(&runq->mtx); } #endif @@ -528,13 +646,13 @@ erts_smp_lc_runq_is_locked(ErtsRunQueue *runq) static ERTS_INLINE Uint64 ensure_later_proc_interval(Uint64 interval) { - return erts_smp_ensure_later_interval_nob(erts_ptab_interval(&erts_proc), interval); + return erts_ensure_later_interval_nob(erts_ptab_interval(&erts_proc), interval); } Uint64 erts_get_proc_interval(void) { - return erts_smp_current_interval_nob(erts_ptab_interval(&erts_proc)); + return erts_current_interval_nob(erts_ptab_interval(&erts_proc)); } Uint64 @@ -546,102 +664,129 @@ erts_ensure_later_proc_interval(Uint64 interval) Uint64 erts_step_proc_interval(void) { - return erts_smp_step_interval_nob(erts_ptab_interval(&erts_proc)); + return erts_step_interval_nob(erts_ptab_interval(&erts_proc)); } void erts_pre_init_process(void) { -#ifdef USE_THREADS erts_tsd_key_create(&sched_data_key, "erts_sched_data_key"); -#endif + + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP_IX] + = "DELAYED_AW_WAKEUP"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DD_IX] + = "DD"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DD_THR_PRGR_IX] + = "DD_THR_PRGR"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC_IX] + = "FIX_ALLOC_DEALLOC"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM_IX] + = "FIX_ALLOC_LOWER_LIM"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP_IX] + = "THR_PRGR_LATER_OP"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_CNCLD_TMRS_IX] + = "CNCLD_TMRS"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR_IX] + = "CNCLD_TMRS_THR_PRGR"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_ASYNC_READY_IX] + = "ASYNC_READY"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN_IX] + = "ASYNC_READY_CLEAN"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MISC_THR_PRGR_IX] + = "MISC_THR_PRGR"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MISC_IX] + = "MISC"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_PENDING_EXITERS_IX] + = "PENDING_EXITERS"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_SET_TMO_IX] + = "SET_TMO"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK_IX] + = "MSEG_CACHE_CHECK"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_YIELD_IX] + = "YIELD"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_REAP_PORTS_IX] + = "REAP_PORTS"; + erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED_IX] + = "DEBUG_WAIT_COMPLETED"; #ifdef ERTS_ENABLE_LOCK_CHECK - { - int ix; - - erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].get_locks - = ERTS_PSD_ERROR_HANDLER_BUF_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].set_locks - = ERTS_PSD_ERROR_HANDLER_BUF_SET_LOCKS; - - erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].get_locks - = ERTS_PSD_SAVED_CALLS_BUF_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].set_locks - = ERTS_PSD_SAVED_CALLS_BUF_SET_LOCKS; - - erts_psd_required_locks[ERTS_PSD_SCHED_ID].get_locks - = ERTS_PSD_SCHED_ID_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_SCHED_ID].set_locks - = ERTS_PSD_SCHED_ID_SET_LOCKS; - - erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks - = ERTS_PSD_DIST_ENTRY_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks - = ERTS_PSD_DIST_ENTRY_SET_LOCKS; - - erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks - = ERTS_PSD_CALL_TIME_BP_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks - = ERTS_PSD_CALL_TIME_BP_SET_LOCKS; - - erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].get_locks - = ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks - = ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS; - -#ifdef ERTS_DIRTY_SCHEDULERS - erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].get_locks - = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS; - erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].set_locks - = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS; -#endif - /* Check that we have locks for all entries */ - for (ix = 0; ix < ERTS_PSD_SIZE; ix++) { - ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].get_locks); - ERTS_SMP_LC_ASSERT(erts_psd_required_locks[ix].set_locks); - } - } + erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].get_locks + = ERTS_PSD_ERROR_HANDLER_BUF_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_ERROR_HANDLER].set_locks + = ERTS_PSD_ERROR_HANDLER_BUF_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].get_locks + = ERTS_PSD_SAVED_CALLS_BUF_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_SAVED_CALLS_BUF].set_locks + = ERTS_PSD_SAVED_CALLS_BUF_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_SCHED_ID].get_locks + = ERTS_PSD_SCHED_ID_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_SCHED_ID].set_locks + = ERTS_PSD_SCHED_ID_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].get_locks + = ERTS_PSD_CALL_TIME_BP_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_CALL_TIME_BP].set_locks + = ERTS_PSD_CALL_TIME_BP_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].get_locks + = ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks + = ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_NIF_TRAP_EXPORT].get_locks + = ERTS_PSD_NIF_TRAP_EXPORT_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_NIF_TRAP_EXPORT].set_locks + = ERTS_PSD_NIF_TRAP_EXPORT_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_ETS_OWNED_TABLES].get_locks + = ERTS_PSD_ETS_OWNED_TABLES_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_ETS_OWNED_TABLES].set_locks + = ERTS_PSD_ETS_OWNED_TABLES_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_ETS_FIXED_TABLES].get_locks + = ERTS_PSD_ETS_FIXED_TABLES_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_ETS_FIXED_TABLES].set_locks + = ERTS_PSD_ETS_FIXED_TABLES_SET_LOCKS; + + erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].get_locks + = ERTS_PSD_DIST_ENTRY_GET_LOCKS; + erts_psd_required_locks[ERTS_PSD_DIST_ENTRY].set_locks + = ERTS_PSD_DIST_ENTRY_SET_LOCKS; #endif } -#ifdef ERTS_SMP static void release_process(void *vproc) { - erts_smp_proc_dec_refc((Process *) vproc); + erts_proc_dec_refc((Process *) vproc); } -#endif /* initialize the scheduler */ void erts_init_process(int ncpu, int proc_tab_size, int legacy_proc_tab) { -#ifdef ERTS_SMP erts_disable_proc_not_running_opt = 0; erts_init_proc_lock(ncpu); -#endif init_proclist_alloc(); erts_ptab_init_table(&erts_proc, ERTS_ALC_T_PROC_TABLE, -#ifdef ERTS_SMP release_process, -#else - NULL, -#endif (ErtsPTabElementCommon *) &erts_invalid_process.common, proc_tab_size, sizeof(Process), "process_table", - legacy_proc_tab); + legacy_proc_tab, + 1 + ); last_reductions = 0; last_exact_reductions = 0; - erts_default_process_flags = 0; } void @@ -649,7 +794,9 @@ erts_late_init_process(void) { int ix; - erts_smp_spinlock_init(&erts_sched_stat.lock, "sched_stat"); + erts_spinlock_init(&erts_sched_stat.lock, "sched_stat", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER); + for (ix = 0; ix < ERTS_NO_PRIO_LEVELS; ix++) { Eterm atom; char *atom_str; @@ -684,22 +831,33 @@ erts_late_init_process(void) } +#define ERTS_SCHED_WTIME_IDLE ~((Uint64) 0) + static void -init_sched_wall_time(ErtsSchedWallTime *swtp) +init_sched_wall_time(ErtsSchedulerData *esdp, Uint64 time_stamp) { - swtp->need = erts_sched_balance_util; - swtp->enabled = 0; - swtp->start = 0; - swtp->working.total = 0; - swtp->working.start = 0; - swtp->working.currently = 0; + if (esdp->type != ERTS_SCHED_NORMAL) { + erts_atomic32_init_nob(&esdp->sched_wall_time.u.mod, 0); + esdp->sched_wall_time.enabled = 1; + esdp->sched_wall_time.start = time_stamp; + esdp->sched_wall_time.working.total = 0; + esdp->sched_wall_time.working.start = ERTS_SCHED_WTIME_IDLE; + } + else + { + esdp->sched_wall_time.u.need = erts_sched_balance_util; + esdp->sched_wall_time.enabled = 0; + esdp->sched_wall_time.start = 0; + esdp->sched_wall_time.working.total = 0; + esdp->sched_wall_time.working.start = 0; + } } static ERTS_INLINE Uint64 sched_wall_time_ts(void) { -#ifdef HAVE_GETHRTIME - return (Uint64) sys_gethrtime(); +#ifdef ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT + return (Uint64) erts_os_monotonic_time(); #else Uint64 res; SysTimeval tv; @@ -712,72 +870,24 @@ sched_wall_time_ts(void) #if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT -#ifdef ARCH_64 - static ERTS_INLINE Uint64 aschedtime_read(ErtsAtomicSchedTime *var) { - return (Uint64) erts_atomic_read_nob((erts_atomic_t *) var); + return (Uint64) erts_atomic64_read_nob((erts_atomic64_t *) var); } static ERTS_INLINE void aschedtime_set(ErtsAtomicSchedTime *var, Uint64 val) { - erts_atomic_set_nob((erts_atomic_t *) var, (erts_aint_t) val); + erts_atomic64_set_nob((erts_atomic64_t *) var, (erts_aint64_t) val); } static ERTS_INLINE void aschedtime_init(ErtsAtomicSchedTime *var) { - erts_atomic_init_nob((erts_atomic_t *) var, (erts_aint_t) 0); + erts_atomic64_init_nob((erts_atomic64_t *) var, (erts_aint64_t) 0); } -#elif defined(ARCH_32) - -static ERTS_INLINE Uint64 -aschedtime_read(ErtsAtomicSchedTime *var) -{ - erts_dw_aint_t dw; - erts_dw_atomic_read_nob((erts_dw_atomic_t *) var, &dw); -#ifdef ETHR_SU_DW_NAINT_T__ - return (Uint64) dw.dw_sint; -#else - { - Uint64 res; - res = (Uint64) ((Uint32) dw.sint[ERTS_DW_AINT_HIGH_WORD]); - res <<= 32; - res |= (Uint64) ((Uint32) dw.sint[ERTS_DW_AINT_LOW_WORD]); - return res; - } -#endif -} - -static ERTS_INLINE void -aschedtime_set(ErtsAtomicSchedTime *var, Uint64 val) -{ - erts_dw_aint_t dw; -#ifdef ETHR_SU_DW_NAINT_T__ - dw.dw_sint = (ETHR_SU_DW_NAINT_T__) val; -#else - dw.sint[ERTS_DW_AINT_LOW_WORD] = (erts_aint_t) (val & 0xffffffff); - dw.sint[ERTS_DW_AINT_HIGH_WORD] = (erts_aint_t) ((val >> 32) & 0xffffffff); -#endif - erts_dw_atomic_set_nob((erts_dw_atomic_t *) var, &dw); -} - -static ERTS_INLINE void -aschedtime_init(ErtsAtomicSchedTime *var) -{ - erts_dw_aint_t dw; - dw.sint[ERTS_DW_AINT_LOW_WORD] = (erts_aint_t) 0; - dw.sint[ERTS_DW_AINT_HIGH_WORD] = (erts_aint_t) 0; - erts_dw_atomic_init_nob((erts_dw_atomic_t *) var, &dw); -} - -#else -# error :-/ -#endif - #define ERTS_GET_AVG_MAX_UNLOCKED_TRY 50 #define ERTS_SCHED_AVG_UTIL_WRITE_MARKER (~((Uint64) 0)) @@ -882,14 +992,14 @@ erts_get_sched_util(ErtsRunQueue *rq, int initially_locked, int short_interval) if (!locked) { if (++try >= ERTS_GET_AVG_MAX_UNLOCKED_TRY) { /* Writer will eventually block on runq-lock */ - erts_smp_runq_lock(rq); + erts_runq_lock(rq); locked = 1; } } } if (!initially_locked && locked) - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); now = sched_wall_time_ts(); worktime = calc_sched_worktime(is_working, now, last, interval, old_worktime); @@ -931,34 +1041,148 @@ init_runq_sched_util(ErtsRunQueueSchedUtil *rqsu, int enabled) #endif /* ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT */ -static ERTS_INLINE void + +typedef struct { + Uint64 working; + Uint64 total; +} ErtsDirtySchedWallTime; + +static void +read_dirty_sched_wall_time(ErtsSchedulerData *esdp, ErtsDirtySchedWallTime *info) +{ + erts_aint32_t mod1; + Uint64 working, start, ts; + + mod1 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod); + + while (1) { + erts_aint32_t mod2; + + /* Spin until values are not written... */ + while (1) { + if ((mod1 & 1) == 0) + break; + ERTS_SPIN_BODY; + mod1 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod); + } + + ERTS_THR_READ_MEMORY_BARRIER; + + working = esdp->sched_wall_time.working.total; + start = esdp->sched_wall_time.working.start; + + ERTS_THR_READ_MEMORY_BARRIER; + + mod2 = erts_atomic32_read_nob(&esdp->sched_wall_time.u.mod); + if (mod1 == mod2) + break; + mod1 = mod2; + } + + ts = sched_wall_time_ts(); + ts -= esdp->sched_wall_time.start; + + info->total = ts; + + if (start == ERTS_SCHED_WTIME_IDLE || ts < start) + info->working = working; + else + info->working = working + (ts - start); + + if (info->working > info->total) + info->working = info->total; +} + + + +static void +dirty_sched_wall_time_change(ErtsSchedulerData *esdp, int working) +{ + erts_aint32_t mod; + Uint64 ts = sched_wall_time_ts(); + + ts -= esdp->sched_wall_time.start; + + /* + * This thread is the only thread writing in + * this sched_wall_time struct. We set 'mod' to + * an odd value while writing... + */ + mod = erts_atomic32_read_dirty(&esdp->sched_wall_time.u.mod); + ASSERT((mod & 1) == 0); + mod++; + + erts_atomic32_set_nob(&esdp->sched_wall_time.u.mod, mod); + ERTS_THR_WRITE_MEMORY_BARRIER; + + if (working) { + ASSERT(esdp->sched_wall_time.working.start + == ERTS_SCHED_WTIME_IDLE); + + esdp->sched_wall_time.working.start = ts; + + } + else { + Uint64 total; + + ASSERT(esdp->sched_wall_time.working.start + != ERTS_SCHED_WTIME_IDLE); + + total = esdp->sched_wall_time.working.total; + total += ts - esdp->sched_wall_time.working.start; + + esdp->sched_wall_time.working.total = total; + esdp->sched_wall_time.working.start = ERTS_SCHED_WTIME_IDLE; + + + } + + ERTS_THR_WRITE_MEMORY_BARRIER; + mod++; + erts_atomic32_set_nob(&esdp->sched_wall_time.u.mod, mod); + + if (!working) { + ERTS_MSACC_SET_STATE_X(ERTS_MSACC_STATE_BUSY_WAIT); + } else { + ERTS_MSACC_SET_STATE_X(ERTS_MSACC_STATE_OTHER); + } +} + + +static void sched_wall_time_change(ErtsSchedulerData *esdp, int working) { - if (esdp->sched_wall_time.need) { + if (esdp->sched_wall_time.u.need) { Uint64 ts = sched_wall_time_ts(); #if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT - update_avg_sched_util(esdp, ts, working); + update_avg_sched_util(esdp, ts, working); #endif if (esdp->sched_wall_time.enabled) { if (working) { -#ifdef DEBUG - ASSERT(!esdp->sched_wall_time.working.currently); - esdp->sched_wall_time.working.currently = 1; -#endif + ASSERT(esdp->sched_wall_time.working.start + == ERTS_SCHED_WTIME_IDLE); ts -= esdp->sched_wall_time.start; esdp->sched_wall_time.working.start = ts; } else { -#ifdef DEBUG - ASSERT(esdp->sched_wall_time.working.currently); - esdp->sched_wall_time.working.currently = 0; -#endif + ASSERT(esdp->sched_wall_time.working.start + != ERTS_SCHED_WTIME_IDLE); ts -= esdp->sched_wall_time.start; ts -= esdp->sched_wall_time.working.start; esdp->sched_wall_time.working.total += ts; +#ifdef DEBUG + esdp->sched_wall_time.working.start + = ERTS_SCHED_WTIME_IDLE; +#endif } } } + if (!working) { + ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_BUSY_WAIT); + } else { + ERTS_MSACC_SET_STATE_M_X(ERTS_MSACC_STATE_OTHER); + } + } typedef struct { @@ -966,30 +1190,32 @@ typedef struct { int enable; Process *proc; Eterm ref; - Eterm ref_heap[REF_THING_SIZE]; + Eterm ref_heap[ERTS_REF_THING_SIZE]; Uint req_sched; - erts_smp_atomic32_t refc; + erts_atomic32_t refc; + int want_dirty_cpu; + int want_dirty_io; } ErtsSchedWallTimeReq; -#if !HALFWORD_HEAP +typedef struct { + Process *proc; + Eterm ref; + Eterm ref_heap[ERTS_REF_THING_SIZE]; + Uint req_sched; + erts_atomic32_t refc; +} ErtsSystemCheckReq; + + ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(swtreq, ErtsSchedWallTimeReq, 5, ERTS_ALC_T_SCHED_WTIME_REQ) -#else -static ERTS_INLINE ErtsSchedWallTimeReq * -swtreq_alloc(void) -{ - return erts_alloc(ERTS_ALC_T_SCHED_WTIME_REQ, - sizeof(ErtsSchedWallTimeReq)); -} -static ERTS_INLINE void -swtreq_free(ErtsSchedWallTimeReq *ptr) -{ - erts_free(ERTS_ALC_T_SCHED_WTIME_REQ, ptr); -} -#endif +ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(screq, + ErtsSystemCheckReq, + 5, + ERTS_ALC_T_SYS_CHECK_REQ) + static void reply_sched_wall_time(void *vswtrp) @@ -1006,31 +1232,29 @@ reply_sched_wall_time(void *vswtrp) Eterm **hpp; Uint sz, *szp; ErlOffHeap *ohp = NULL; - ErlHeapFragment *bp = NULL; + ErtsMessage *mp = NULL; + + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); - ASSERT(esdp); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); -#endif if (swtrp->set) { if (!swtrp->enable && esdp->sched_wall_time.enabled) { - esdp->sched_wall_time.need = erts_sched_balance_util; + esdp->sched_wall_time.u.need = erts_sched_balance_util; esdp->sched_wall_time.enabled = 0; } else if (swtrp->enable && !esdp->sched_wall_time.enabled) { Uint64 ts = sched_wall_time_ts(); - esdp->sched_wall_time.need = 1; + esdp->sched_wall_time.u.need = 1; esdp->sched_wall_time.enabled = 1; esdp->sched_wall_time.start = ts; esdp->sched_wall_time.working.total = 0; esdp->sched_wall_time.working.start = 0; - esdp->sched_wall_time.working.currently = 1; } } if (esdp->sched_wall_time.enabled) { Uint64 ts = sched_wall_time_ts(); - ASSERT(esdp->sched_wall_time.working.currently); + ASSERT(esdp->sched_wall_time.working.start + != ERTS_SCHED_WTIME_IDLE); ts -= esdp->sched_wall_time.start; total = ts; ts -= esdp->sched_wall_time.working.start; @@ -1041,63 +1265,144 @@ reply_sched_wall_time(void *vswtrp) hpp = NULL; szp = &sz; - while (1) { - if (hpp) - ref_copy = STORE_NC(hpp, ohp, swtrp->ref); - else - *szp += REF_THING_SIZE; + if (esdp->sched_wall_time.enabled + && swtrp->req_sched == esdp->no + && (swtrp->want_dirty_cpu || swtrp->want_dirty_io)) { + /* Reply with info about this scheduler and all dirty schedulers... */ + ErtsDirtySchedWallTime *dswt; + int ix, no_dirty_scheds, want_dcpu, want_dio, soffset; + + want_dcpu = swtrp->want_dirty_cpu; + want_dio = swtrp->want_dirty_io; + + no_dirty_scheds = 0; + if (want_dcpu) + no_dirty_scheds += erts_no_dirty_cpu_schedulers; + if (want_dio) + no_dirty_scheds += erts_no_dirty_io_schedulers; + + ASSERT(no_dirty_scheds); + + dswt = erts_alloc(ERTS_ALC_T_TMP, + sizeof(ErtsDirtySchedWallTime) + * no_dirty_scheds); + + for (ix = 0; ix < no_dirty_scheds; ix++) { + ErtsSchedulerData *esdp; + if (want_dcpu && ix < erts_no_dirty_cpu_schedulers) + esdp = &erts_aligned_dirty_cpu_scheduler_data[ix].esd; + else { + int dio_ix = ix - erts_no_dirty_cpu_schedulers; + esdp = &erts_aligned_dirty_io_scheduler_data[dio_ix].esd; + } + read_dirty_sched_wall_time(esdp, &dswt[ix]); + } - if (swtrp->set) - msg = ref_copy; - else { - msg = (!esdp->sched_wall_time.enabled - ? am_notsup - : erts_bld_tuple(hpp, szp, 3, - make_small(esdp->no), - erts_bld_uint64(hpp, szp, working), - erts_bld_uint64(hpp, szp, total))); + soffset = erts_no_schedulers + 1; - msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg); - } - if (hpp) - break; + if (!want_dcpu) { + ASSERT(want_dio); + soffset += erts_no_dirty_cpu_schedulers; + } - hp = erts_alloc_message_heap(sz, &bp, &ohp, rp, &rp_locks); - szp = NULL; - hpp = &hp; + while (1) { + if (hpp) + ref_copy = STORE_NC(hpp, ohp, swtrp->ref); + else + *szp += ERTS_REF_THING_SIZE; + + ASSERT(!swtrp->set); + + /* info about dirty schedulers... */ + msg = NIL; + for (ix = no_dirty_scheds-1; ix >= 0; ix--) { + msg = erts_bld_cons(hpp, szp, + erts_bld_tuple(hpp, szp, 3, + make_small(ix+soffset), + erts_bld_uint64(hpp, szp, + dswt[ix].working), + erts_bld_uint64(hpp, szp, + dswt[ix].total)), + msg); + } + /* info about this scheduler... */ + msg = erts_bld_cons(hpp, szp, + erts_bld_tuple(hpp, szp, 3, + make_small(esdp->no), + erts_bld_uint64(hpp, szp, working), + erts_bld_uint64(hpp, szp, total)), + msg); + + msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg); + + if (hpp) + break; + + mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp); + szp = NULL; + hpp = &hp; + } + + erts_free(ERTS_ALC_T_TMP, dswt); + } + else + { + /* Reply with info about this scheduler only... */ + + while (1) { + if (hpp) + ref_copy = STORE_NC(hpp, ohp, swtrp->ref); + else + *szp += ERTS_REF_THING_SIZE; + + if (swtrp->set) + msg = ref_copy; + else { + msg = (!esdp->sched_wall_time.enabled + ? am_undefined + : erts_bld_tuple(hpp, szp, 3, + make_small(esdp->no), + erts_bld_uint64(hpp, szp, working), + erts_bld_uint64(hpp, szp, total))); + + msg = erts_bld_tuple(hpp, szp, 2, ref_copy, msg); + } + if (hpp) + break; + + mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp); + szp = NULL; + hpp = &hp; + } } - erts_queue_message(rp, &rp_locks, bp, msg, NIL -#ifdef USE_VM_PROBES - , NIL -#endif - ); + erts_queue_message(rp, rp_locks, mp, msg, am_system); if (swtrp->req_sched == esdp->no) rp_locks &= ~ERTS_PROC_LOCK_MAIN; if (rp_locks) - erts_smp_proc_unlock(rp, rp_locks); + erts_proc_unlock(rp, rp_locks); - erts_smp_proc_dec_refc(rp); + erts_proc_dec_refc(rp); - if (erts_smp_atomic32_dec_read_nob(&swtrp->refc) == 0) + if (erts_atomic32_dec_read_nob(&swtrp->refc) == 0) swtreq_free(vswtrp); } Eterm -erts_sched_wall_time_request(Process *c_p, int set, int enable) +erts_sched_wall_time_request(Process *c_p, int set, int enable, + int want_dirty_cpu, int want_dirty_io) { - ErtsSchedulerData *esdp = ERTS_PROC_GET_SCHDATA(c_p); + ErtsSchedulerData *esdp = erts_proc_sched_data(c_p); Eterm ref; ErtsSchedWallTimeReq *swtrp; Eterm *hp; + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); + if (!set && !esdp->sched_wall_time.enabled) return THE_NON_VALUE; -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); -#endif swtrp = swtreq_alloc(); ref = erts_make_ref(c_p); @@ -1108,24 +1413,88 @@ erts_sched_wall_time_request(Process *c_p, int set, int enable) swtrp->proc = c_p; swtrp->ref = STORE_NC(&hp, NULL, ref); swtrp->req_sched = esdp->no; - erts_smp_atomic32_init_nob(&swtrp->refc, + swtrp->want_dirty_cpu = want_dirty_cpu; + swtrp->want_dirty_io = want_dirty_io; + erts_atomic32_init_nob(&swtrp->refc, (erts_aint32_t) erts_no_schedulers); - erts_smp_proc_add_refc(c_p, (Sint32) erts_no_schedulers); + erts_proc_add_refc(c_p, (Sint32) erts_no_schedulers); -#ifdef ERTS_SMP if (erts_no_schedulers > 1) erts_schedule_multi_misc_aux_work(1, erts_no_schedulers, reply_sched_wall_time, (void *) swtrp); -#endif reply_sched_wall_time((void *) swtrp); return ref; } +static void +reply_system_check(void *vscrp) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + ErtsSystemCheckReq *scrp = (ErtsSystemCheckReq *) vscrp; + ErtsProcLocks rp_locks = (scrp->req_sched == esdp->no ? ERTS_PROC_LOCK_MAIN : 0); + Process *rp = scrp->proc; + Eterm msg; + Eterm *hp = NULL; + Eterm **hpp; + Uint sz; + ErlOffHeap *ohp = NULL; + ErtsMessage *mp = NULL; + + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); + + sz = ERTS_REF_THING_SIZE; + mp = erts_alloc_message_heap(rp, &rp_locks, sz, &hp, &ohp); + hpp = &hp; + msg = STORE_NC(hpp, ohp, scrp->ref); + + erts_queue_message(rp, rp_locks, mp, msg, am_system); + + if (scrp->req_sched == esdp->no) + rp_locks &= ~ERTS_PROC_LOCK_MAIN; + + if (rp_locks) + erts_proc_unlock(rp, rp_locks); + + erts_proc_dec_refc(rp); + + if (erts_atomic32_dec_read_nob(&scrp->refc) == 0) + screq_free(vscrp); +} + + +Eterm erts_system_check_request(Process *c_p) { + ErtsSchedulerData *esdp = erts_proc_sched_data(c_p); + Eterm ref; + ErtsSystemCheckReq *scrp; + Eterm *hp; + + scrp = screq_alloc(); + ref = erts_make_ref(c_p); + hp = &scrp->ref_heap[0]; + + scrp->proc = c_p; + scrp->ref = STORE_NC(&hp, NULL, ref); + scrp->req_sched = esdp->no; + erts_atomic32_init_nob(&scrp->refc, (erts_aint32_t) erts_no_schedulers); + + erts_proc_add_refc(c_p, (Sint) erts_no_schedulers); + + if (erts_no_schedulers > 1) + erts_schedule_multi_misc_aux_work(1, + erts_no_schedulers, + reply_system_check, + (void *) scrp); + + reply_system_check((void *) scrp); + + return ref; +} + static ERTS_INLINE ErtsProcList * proclist_create(Process *p) { @@ -1136,6 +1505,15 @@ proclist_create(Process *p) return plp; } +static ERTS_INLINE ErtsProcList * +proclist_copy(ErtsProcList *plp0) +{ + ErtsProcList *plp1 = proclist_alloc(); + plp1->pid = plp0->pid; + plp1->started_interval = plp0->started_interval; + return plp1; +} + static ERTS_INLINE void proclist_destroy(ErtsProcList *plp) { @@ -1148,6 +1526,12 @@ erts_proclist_create(Process *p) return proclist_create(p); } +ErtsProcList * +erts_proclist_copy(ErtsProcList *plp) +{ + return proclist_copy(plp); +} + void erts_proclist_destroy(ErtsProcList *plp) { @@ -1155,64 +1539,43 @@ erts_proclist_destroy(ErtsProcList *plp) } void * -erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data) +erts_psd_set_init(Process *p, int ix, void *data) { void *old; - ErtsProcLocks xplocks; - int refc = 0; - ErtsPSD *psd = erts_alloc(ERTS_ALC_T_PSD, sizeof(ErtsPSD)); + ErtsPSD *psd, *new_psd; int i; - for (i = 0; i < ERTS_PSD_SIZE; i++) - psd->data[i] = NULL; - ERTS_SMP_LC_ASSERT(plocks); - ERTS_SMP_LC_ASSERT(plocks == erts_proc_lc_my_proc_locks(p)); + new_psd = erts_alloc(ERTS_ALC_T_PSD, sizeof(ErtsPSD)); + for (i = 0; i < ERTS_PSD_SIZE; i++) + new_psd->data[i] = NULL; - xplocks = ERTS_PROC_LOCKS_ALL; - xplocks &= ~plocks; - if (xplocks && erts_smp_proc_trylock(p, xplocks) == EBUSY) { - if (xplocks & ERTS_PROC_LOCK_MAIN) { - erts_smp_proc_inc_refc(p); - erts_smp_proc_unlock(p, plocks); - erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL); - refc = 1; - } - else { - if (plocks & ERTS_PROC_LOCKS_ALL_MINOR) - erts_smp_proc_unlock(p, plocks & ERTS_PROC_LOCKS_ALL_MINOR); - erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); - } - } - if (!p->psd) - p->psd = psd; - if (xplocks) - erts_smp_proc_unlock(p, xplocks); - if (refc) - erts_smp_proc_dec_refc(p); - ASSERT(p->psd); - if (p->psd != psd) - erts_free(ERTS_ALC_T_PSD, psd); - old = p->psd->data[ix]; - p->psd->data[ix] = data; - ERTS_SMP_LC_ASSERT(plocks == erts_proc_lc_my_proc_locks(p)); + psd = (ErtsPSD *) erts_atomic_cmpxchg_mb(&p->psd, + (erts_aint_t) new_psd, + (erts_aint_t) NULL); + if (psd) + erts_free(ERTS_ALC_T_PSD, new_psd); + else + psd = new_psd; + old = psd->data[ix]; + psd->data[ix] = data; return old; } -#ifdef ERTS_SMP void -erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flags) +erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, + erts_aint32_t flags) { switch (flags & ERTS_SSI_FLGS_SLEEP_TYPE) { case ERTS_SSI_FLG_POLL_SLEEPING: - erts_sys_schedule_interrupt(1); + erts_check_io_interrupt(ssi->psi, 1); break; case ERTS_SSI_FLG_POLL_SLEEPING|ERTS_SSI_FLG_TSE_SLEEPING: /* * Thread progress blocking while poll sleeping; need * to signal on both... */ - erts_sys_schedule_interrupt(1); + erts_check_io_interrupt(ssi->psi, 1); /* fall through */ case ERTS_SSI_FLG_TSE_SLEEPING: erts_tse_set(ssi->event); @@ -1220,13 +1583,12 @@ erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flags) case 0: break; default: - erl_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error\n", + erts_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error\n", __FILE__, __LINE__); break; } } -#endif static ERTS_INLINE void set_aux_work_flags_wakeup_nob(ErtsSchedulerSleepInfo *ssi, @@ -1237,16 +1599,12 @@ set_aux_work_flags_wakeup_nob(ErtsSchedulerSleepInfo *ssi, ERTS_DBG_CHK_SSI_AUX_WORK(ssi); old_flgs = erts_atomic32_read_nob(&ssi->aux_work); - if ((old_flgs & flgs) == 0) { + if ((old_flgs & flgs) != flgs) { old_flgs = erts_atomic32_read_bor_nob(&ssi->aux_work, flgs); - if ((old_flgs & flgs) == 0) { -#ifdef ERTS_SMP + if ((old_flgs & flgs) != flgs) { erts_sched_poke(ssi); -#else - erts_sys_schedule_interrupt(1); -#endif } } } @@ -1261,12 +1619,8 @@ set_aux_work_flags_wakeup_relb(ErtsSchedulerSleepInfo *ssi, old_flgs = erts_atomic32_read_bor_relb(&ssi->aux_work, flgs); - if ((old_flgs & flgs) == 0) { -#ifdef ERTS_SMP + if ((old_flgs & flgs) != flgs) { erts_sched_poke(ssi); -#else - erts_sys_schedule_interrupt(1); -#endif } } @@ -1282,7 +1636,6 @@ unset_aux_work_flags(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flgs) return erts_atomic32_read_band_nob(&ssi->aux_work, ~flgs); } -#ifdef ERTS_SMP static ERTS_INLINE void haw_chk_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) @@ -1352,9 +1705,9 @@ static ERTS_INLINE void haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp) { ErtsThrPrgrVal current = awdp->current_thr_prgr; -#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + if (current != ERTS_THR_PRGR_INVALID && !erts_thr_progress_equal(current, erts_thr_progress_current())) { /* @@ -1371,9 +1724,7 @@ handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, in { int jix, max_jix; -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif ASSERT(awdp->delayed_wakeup.next != ERTS_DELAYED_WAKEUP_INFINITY); @@ -1431,7 +1782,6 @@ schedule_aux_work_wakeup(ErtsAuxWorkData *awdp, } } -#endif typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t; struct erts_misc_aux_work_t_ { @@ -1472,11 +1822,7 @@ init_misc_aux_work(void) sizeof(erts_algnd_misc_aux_work_q_t) * (erts_no_schedulers+1)); -#ifdef ERTS_SMP ix = 0; /* aux_thread + schedulers */ -#else - ix = 1; /* scheduler only */ -#endif for (; ix <= erts_no_schedulers; ix++) { qinit.arg = (void *) ERTS_SCHED_SLEEP_INFO_IX(ix-1); @@ -1494,10 +1840,8 @@ misc_aux_work_clean(ErtsThrQ_t *q, set_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_MISC); return aux_work | ERTS_SSI_AUX_WORK_MISC; case ERTS_THR_Q_NEED_THR_PRGR: -#ifdef ERTS_SMP set_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_MISC_THR_PRGR); haw_thr_prgr_soft_wakeup(awdp, erts_thr_q_need_thr_progress(q)); -#endif case ERTS_THR_Q_CLEAN: break; } @@ -1523,16 +1867,14 @@ handle_misc_aux_work(ErtsAuxWorkData *awdp, return misc_aux_work_clean(q, awdp, aux_work & ~ERTS_SSI_AUX_WORK_MISC); } -#ifdef ERTS_SMP static ERTS_INLINE erts_aint32_t handle_misc_aux_work_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) { -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + if (!erts_thr_progress_has_reached_this(haw_thr_prgr_current(awdp), awdp->misc.thr_prgr)) return aux_work & ~ERTS_SSI_AUX_WORK_MISC_THR_PRGR; @@ -1544,7 +1886,6 @@ handle_misc_aux_work_thr_prgr(ErtsAuxWorkData *awdp, aux_work & ~ERTS_SSI_AUX_WORK_MISC_THR_PRGR); } -#endif static ERTS_INLINE void schedule_misc_aux_work(int sched_id, @@ -1554,11 +1895,7 @@ schedule_misc_aux_work(int sched_id, ErtsThrQ_t *q; erts_misc_aux_work_t *mawp; -#ifdef ERTS_SMP ASSERT(0 <= sched_id && sched_id <= erts_no_schedulers); -#else - ASSERT(sched_id == 1); -#endif q = &misc_aux_work_queues[sched_id].q; mawp = misc_aux_work_alloc(); @@ -1584,12 +1921,13 @@ erts_schedule_multi_misc_aux_work(int ignore_self, int id, self = 0; if (ignore_self) { - ErtsSchedulerData *esdp = erts_get_scheduler_data(); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); -#endif - if (esdp) - self = (int) esdp->no; + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + + /* ignore_self is meaningless on dirty schedulers since aux work can + * only run on normal schedulers, and their ids do not translate. */ + if(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { + self = (int)esdp->no; + } } ASSERT(0 < max_sched && max_sched <= erts_no_schedulers); @@ -1601,7 +1939,6 @@ erts_schedule_multi_misc_aux_work(int ignore_self, } } -#if ERTS_USE_ASYNC_READY_Q void erts_notify_check_async_ready_queue(void *vno) @@ -1617,9 +1954,9 @@ handle_async_ready(ErtsAuxWorkData *awdp, int waiting) { ErtsSchedulerSleepInfo *ssi = awdp->ssi; -#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_ASYNC_READY); if (erts_check_async_ready(awdp->async_ready.queue)) { if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_ASYNC_READY) @@ -1629,9 +1966,7 @@ handle_async_ready(ErtsAuxWorkData *awdp, } return aux_work; } -#ifdef ERTS_SMP awdp->async_ready.need_thr_prgr = 0; -#endif set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN); return ((aux_work & ~ERTS_SSI_AUX_WORK_ASYNC_READY) | ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN); @@ -1644,10 +1979,8 @@ handle_async_ready_clean(ErtsAuxWorkData *awdp, { void *thr_prgr_p; -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif -#ifdef ERTS_SMP + if (awdp->async_ready.need_thr_prgr && !erts_thr_progress_has_reached_this(haw_thr_prgr_current(awdp), awdp->async_ready.thr_prgr)) { @@ -1656,26 +1989,20 @@ handle_async_ready_clean(ErtsAuxWorkData *awdp, awdp->async_ready.need_thr_prgr = 0; thr_prgr_p = (void *) &awdp->async_ready.thr_prgr; -#else - thr_prgr_p = NULL; -#endif switch (erts_async_ready_clean(awdp->async_ready.queue, thr_prgr_p)) { case ERTS_ASYNC_READY_CLEAN: unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN); return aux_work & ~ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN; -#ifdef ERTS_SMP case ERTS_ASYNC_READY_NEED_THR_PRGR: haw_thr_prgr_soft_wakeup(awdp, awdp->async_ready.thr_prgr); awdp->async_ready.need_thr_prgr = 1; return aux_work & ~ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN; -#endif default: return aux_work; } } -#endif /* ERTS_USE_ASYNC_READY_Q */ static ERTS_INLINE erts_aint32_t @@ -1684,9 +2011,8 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) ErtsSchedulerSleepInfo *ssi = awdp->ssi; erts_aint32_t res; -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + unset_aux_work_flags(ssi, (ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC)); aux_work &= ~(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM @@ -1700,7 +2026,6 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) return aux_work; } -#ifdef ERTS_SMP void erts_alloc_notify_delayed_dealloc(int ix) @@ -1733,15 +2058,17 @@ handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waitin int need_thr_progress = 0; ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; int more_work = 0; + ERTS_MSACC_PUSH_STATE_M_X(); -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD); + ERTS_MSACC_SET_STATE_CACHED_M_X(ERTS_MSACC_STATE_ALLOC); erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp, &need_thr_progress, &wakeup, &more_work); + ERTS_MSACC_POP_STATE_M_X(); if (more_work) { if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD) & ERTS_SSI_AUX_WORK_DD_THR_PRGR) { @@ -1759,11 +2086,6 @@ handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waitin awdp->dd.thr_prgr = wakeup; haw_thr_prgr_soft_wakeup(awdp, wakeup); } - else if (awdp->dd.completed_callback) { - awdp->dd.completed_callback(awdp->dd.completed_arg); - awdp->dd.completed_callback = NULL; - awdp->dd.completed_arg = NULL; - } return aux_work & ~ERTS_SSI_AUX_WORK_DD; } @@ -1776,9 +2098,8 @@ handle_delayed_dealloc_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, i ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; ErtsThrPrgrVal current = haw_thr_prgr_current(awdp); -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + if (!erts_thr_progress_has_reached_this(current, awdp->dd.thr_prgr)) return aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR; @@ -1805,17 +2126,105 @@ handle_delayed_dealloc_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, i } else { unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR); - if (awdp->dd.completed_callback) { - awdp->dd.completed_callback(awdp->dd.completed_arg); - awdp->dd.completed_callback = NULL; - awdp->dd.completed_arg = NULL; - } } return aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR; } /* + * Canceled timers + */ + +void +erts_notify_canceled_timer(ErtsSchedulerData *esdp, int rsid) +{ + ASSERT(esdp && esdp == erts_get_scheduler_data()); + if (esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)) + schedule_aux_work_wakeup(&esdp->aux_work_data, + rsid, + ERTS_SSI_AUX_WORK_CNCLD_TMRS); + else + set_aux_work_flags_wakeup_relb(ERTS_SCHED_SLEEP_INFO_IX(rsid-1), + ERTS_SSI_AUX_WORK_CNCLD_TMRS); +} + +static ERTS_INLINE erts_aint32_t +handle_canceled_timers(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) +{ + ErtsSchedulerSleepInfo *ssi = awdp->ssi; + int need_thr_progress = 0; + ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; + int more_work = 0; + + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); + + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS); + erts_handle_canceled_timers((void *) awdp->esdp, + &need_thr_progress, + &wakeup, + &more_work); + if (more_work) { + if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS) + & ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR) { + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR); + aux_work &= ~ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR; + } + return aux_work; + } + + if (need_thr_progress) { + if (wakeup == ERTS_THR_PRGR_INVALID) + wakeup = erts_thr_progress_later(awdp->esdp); + awdp->cncld_tmrs.thr_prgr = wakeup; + set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR); + haw_thr_prgr_soft_wakeup(awdp, wakeup); + } + return aux_work & ~ERTS_SSI_AUX_WORK_CNCLD_TMRS; +} + +static ERTS_INLINE erts_aint32_t +handle_canceled_timers_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) +{ + ErtsSchedulerSleepInfo *ssi; + int need_thr_progress; + int more_work; + ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; + ErtsThrPrgrVal current = haw_thr_prgr_current(awdp); + + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); + + if (!erts_thr_progress_has_reached_this(current, awdp->cncld_tmrs.thr_prgr)) + return aux_work & ~ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR; + + ssi = awdp->ssi; + need_thr_progress = 0; + more_work = 0; + + erts_handle_canceled_timers((void *) awdp->esdp, + &need_thr_progress, + &wakeup, + &more_work); + if (more_work) { + set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS); + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR); + return ((aux_work & ~ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR) + | ERTS_SSI_AUX_WORK_CNCLD_TMRS); + } + + if (need_thr_progress) { + if (wakeup == ERTS_THR_PRGR_INVALID) + wakeup = erts_thr_progress_later(awdp->esdp); + awdp->cncld_tmrs.thr_prgr = wakeup; + haw_thr_prgr_soft_wakeup(awdp, wakeup); + } + else { + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR); + } + + return aux_work & ~ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR; +} + +/* * Handle scheduled thread progress later operations. */ #define ERTS_MAX_THR_PRGR_LATER_OPS 50 @@ -1826,11 +2235,11 @@ handle_thr_prgr_later_op(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int wait int lops; ErtsThrPrgrVal current = haw_thr_prgr_current(awdp); -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); -#endif + for (lops = 0; lops < ERTS_MAX_THR_PRGR_LATER_OPS; lops++) { ErtsThrPrgrLaterOp *lop = awdp->later_op.first; + if (!erts_thr_progress_has_reached_this(current, lop->later)) return aux_work & ~ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP; awdp->later_op.first = lop->next; @@ -1857,7 +2266,7 @@ enqueue_later_op(ErtsSchedulerData *esdp, ErtsThrPrgrLaterOp *lop) { ErtsThrPrgrVal later = erts_thr_progress_later(esdp); - ASSERT(esdp); + ASSERT(esdp && !ERTS_SCHEDULER_IS_DIRTY(esdp)); lop->func = later_func; lop->data = later_data; @@ -1873,20 +2282,15 @@ enqueue_later_op(ErtsSchedulerData *esdp, return later; } -#endif /* ERTS_SMP */ void erts_schedule_thr_prgr_later_op(void (*later_func)(void *), void *later_data, ErtsThrPrgrLaterOp *lop) { -#ifndef ERTS_SMP - later_func(later_data); -#else ErtsSchedulerData *esdp = erts_get_scheduler_data(); ErtsThrPrgrVal later = enqueue_later_op(esdp, later_func, later_data, lop); haw_thr_prgr_wakeup(&esdp->aux_work_data, later); -#endif } void @@ -1895,118 +2299,143 @@ erts_schedule_thr_prgr_later_cleanup_op(void (*later_func)(void *), ErtsThrPrgrLaterOp *lop, UWord size) { -#ifndef ERTS_SMP - later_func(later_data); -#else ErtsSchedulerData *esdp = erts_get_scheduler_data(); ErtsThrPrgrVal later = enqueue_later_op(esdp, later_func, later_data, lop); haw_thr_prgr_later_cleanup_op_wakeup(&esdp->aux_work_data, later, size); -#endif } -#ifdef ERTS_SMP +static ERTS_INLINE erts_aint32_t +handle_debug_wait_completed(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) +{ + ErtsSchedulerSleepInfo *ssi = awdp->ssi; + erts_aint32_t saved_aux_work, flags; + + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); + + flags = awdp->debug.wait_completed.flags; + + if (aux_work & flags) + return aux_work; -static erts_atomic32_t completed_dealloc_count; + saved_aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + + if (saved_aux_work & flags) + return aux_work & ~ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED; + + awdp->debug.wait_completed.callback(awdp->debug.wait_completed.arg); + + awdp->debug.wait_completed.flags = 0; + awdp->debug.wait_completed.callback = NULL; + awdp->debug.wait_completed.arg = NULL; + + unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED); + + return aux_work & ~ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED; +} + +static erts_atomic32_t debug_wait_completed_count; +static int debug_wait_completed_flags; static void -completed_dealloc(void *vproc) +thr_debug_wait_completed(void *vproc) { - if (erts_atomic32_dec_read_mb(&completed_dealloc_count) == 0) { + if (erts_atomic32_dec_read_mb(&debug_wait_completed_count) == 0) { erts_resume((Process *) vproc, (ErtsProcLocks) 0); - erts_smp_proc_dec_refc((Process *) vproc); + erts_proc_dec_refc((Process *) vproc); } } static void -setup_completed_dealloc(void *vproc) +setup_thr_debug_wait_completed(void *vproc) { ErtsSchedulerData *esdp = erts_get_scheduler_data(); - ErtsAuxWorkData *awdp = (esdp - ? &esdp->aux_work_data - : aux_thread_aux_work_data); - erts_alloc_fix_alloc_shrink(awdp->sched_id, 0); - set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DD); - awdp->dd.completed_callback = completed_dealloc; - awdp->dd.completed_arg = vproc; + ErtsAuxWorkData *awdp; + erts_aint32_t wait_flags, aux_work_flags; + awdp = esdp ? &esdp->aux_work_data : aux_thread_aux_work_data; + + wait_flags = 0; + aux_work_flags = ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED; + + if (debug_wait_completed_flags & ERTS_DEBUG_WAIT_COMPLETED_DEALLOCATIONS) { + erts_alloc_fix_alloc_shrink(awdp->sched_id, 0); + wait_flags |= (ERTS_SSI_AUX_WORK_DD + | ERTS_SSI_AUX_WORK_DD_THR_PRGR); + aux_work_flags |= ERTS_SSI_AUX_WORK_DD; + } + + if (debug_wait_completed_flags & ERTS_DEBUG_WAIT_COMPLETED_TIMER_CANCELLATIONS) { + wait_flags |= (ERTS_SSI_AUX_WORK_CNCLD_TMRS + | ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR); + if (awdp->esdp && !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)) + aux_work_flags |= ERTS_SSI_AUX_WORK_CNCLD_TMRS; + } + + set_aux_work_flags_wakeup_nob(awdp->ssi, aux_work_flags); + + awdp->debug.wait_completed.flags = wait_flags; + awdp->debug.wait_completed.callback = thr_debug_wait_completed; + awdp->debug.wait_completed.arg = vproc; } -static void -prep_setup_completed_dealloc(void *vproc) +struct debug_lop { + ErtsThrPrgrLaterOp lop; + Process *proc; +}; + +static void later_thr_debug_wait_completed(void *vlop) { - erts_aint32_t count = (erts_aint32_t) (erts_no_schedulers+1); - if (erts_atomic32_dec_read_mb(&completed_dealloc_count) == count) { - /* scheduler threads */ - erts_schedule_multi_misc_aux_work(0, - erts_no_schedulers, - setup_completed_dealloc, - vproc); - /* aux_thread */ - erts_schedule_misc_aux_work(0, - setup_completed_dealloc, - vproc); + struct debug_lop *lop = vlop; + erts_aint32_t count = (erts_aint32_t) erts_no_schedulers; + count += 1; /* aux thread */ + if (erts_atomic32_dec_read_mb(&debug_wait_completed_count) == count) { + /* scheduler threads */ + erts_schedule_multi_misc_aux_work(0, + erts_no_schedulers, + setup_thr_debug_wait_completed, + lop->proc); + /* aux_thread */ + erts_schedule_misc_aux_work(0, + setup_thr_debug_wait_completed, + lop->proc); } + erts_free(ERTS_ALC_T_DEBUG, lop); +} + + +static void +init_thr_debug_wait_completed(void *vproc) +{ + struct debug_lop* lop = erts_alloc(ERTS_ALC_T_DEBUG, + sizeof(struct debug_lop)); + lop->proc = vproc; + erts_schedule_thr_prgr_later_op(later_thr_debug_wait_completed, lop, &lop->lop); } -#endif /* ERTS_SMP */ int -erts_debug_wait_deallocations(Process *c_p) +erts_debug_wait_completed(Process *c_p, int flags) { -#ifndef ERTS_SMP - erts_alloc_fix_alloc_shrink(1, 0); - return 1; -#else /* Only one process at a time can do this */ - erts_aint32_t count = (erts_aint32_t) (2*(erts_no_schedulers+1)); - if (0 == erts_atomic32_cmpxchg_mb(&completed_dealloc_count, + erts_aint32_t count = (erts_aint32_t) (2*erts_no_schedulers); + count += 1; /* aux thread */ + if (0 == erts_atomic32_cmpxchg_mb(&debug_wait_completed_count, count, 0)) { + debug_wait_completed_flags = flags; erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL); - erts_smp_proc_inc_refc(c_p); - /* scheduler threads */ + erts_proc_inc_refc(c_p); + + /* First flush later-ops on all scheduler threads */ erts_schedule_multi_misc_aux_work(0, erts_no_schedulers, - prep_setup_completed_dealloc, + init_thr_debug_wait_completed, (void *) c_p); - /* aux_thread */ - erts_schedule_misc_aux_work(0, - prep_setup_completed_dealloc, - (void *) c_p); return 1; } return 0; -#endif } -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN -void -erts_smp_notify_check_children_needed(void) -{ - int i; - for (i = 0; i < erts_no_schedulers; i++) - set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(i), - ERTS_SSI_AUX_WORK_CHECK_CHILDREN); -#ifdef ERTS_DIRTY_SCHEDULERS - for (i = 0; i < erts_no_dirty_cpu_schedulers; i++) - set_aux_work_flags_wakeup_nob(ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(i), - ERTS_SSI_AUX_WORK_CHECK_CHILDREN); - for (i = 0; i < erts_no_dirty_io_schedulers; i++) - set_aux_work_flags_wakeup_nob(ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(i), - ERTS_SSI_AUX_WORK_CHECK_CHILDREN); -#endif -} - -static ERTS_INLINE erts_aint32_t -handle_check_children(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) -{ - unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_CHECK_CHILDREN); - erts_check_children(); - return aux_work & ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN; -} - -#endif - static void notify_reap_ports_relb(void) { @@ -2017,17 +2446,18 @@ notify_reap_ports_relb(void) } } -erts_smp_atomic32_t erts_halt_progress; +erts_atomic32_t erts_halt_progress; int erts_halt_code; static ERTS_INLINE erts_aint32_t handle_reap_ports(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) { unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_REAP_PORTS); - awdp->esdp->run_queue->halt_in_progress = 1; - if (erts_smp_atomic32_dec_read_acqb(&erts_halt_progress) == 0) { + ERTS_RUNQ_FLGS_SET(awdp->esdp->run_queue, ERTS_RUNQ_FLG_HALTING); + + if (erts_atomic32_dec_read_acqb(&erts_halt_progress) == 0) { int i, max = erts_ptab_max(&erts_port); - erts_smp_atomic32_set_nob(&erts_halt_progress, 1); + erts_atomic32_set_nob(&erts_halt_progress, 1); for (i = 0; i < max; i++) { erts_aint32_t state; Port *prt = erts_pix2port(i); @@ -2040,27 +2470,69 @@ handle_reap_ports(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) /* We need to set the halt flag - get the port lock */ - erts_smp_port_lock(prt); + erts_port_lock(prt); state = erts_atomic32_read_nob(&prt->state); if (!(state & (ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP | ERTS_PORT_SFLG_HALT))) { state = erts_atomic32_read_bor_relb(&prt->state, ERTS_PORT_SFLG_HALT); - erts_smp_atomic32_inc_nob(&erts_halt_progress); + erts_atomic32_inc_nob(&erts_halt_progress); if (!(state & (ERTS_PORT_SFLG_EXITING|ERTS_PORT_SFLG_CLOSING))) - erts_deliver_port_exit(prt, prt->common.id, am_killed, 0); + erts_deliver_port_exit(prt, prt->common.id, am_killed, 0, 1); } erts_port_release(prt); } - if (erts_smp_atomic32_dec_read_nob(&erts_halt_progress) == 0) { - erl_exit_flush_async(erts_halt_code, ""); + if (erts_atomic32_dec_read_nob(&erts_halt_progress) == 0) { + erts_flush_async_exit(erts_halt_code, ""); } } return aux_work & ~ERTS_SSI_AUX_WORK_REAP_PORTS; } +void +erts_notify_new_aux_yield_work(ErtsSchedulerData *esdp) +{ + ASSERT(esdp == erts_get_scheduler_data()); + /* Always called by the scheduler itself... */ + set_aux_work_flags_wakeup_nob(esdp->ssi, ERTS_SSI_AUX_WORK_YIELD); +} + +static ERTS_INLINE erts_aint32_t +handle_yield(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) +{ + int yield = 0; + /* + * Yield operations are always requested by the scheduler itself. + * + * The following handlers should *not* set the ERTS_SSI_AUX_WORK_YIELD + * flag in order to indicate more work. They should instead return + * information so this "main handler" can manipulate the flag... + * + * The following handlers should be able to handle being called + * even though no work is to be done... + */ + + /* Various yielding operations... */ + + yield |= erts_handle_yielded_ets_all_request(awdp->esdp, + &awdp->yield.ets_all); + + /* + * Other yielding operations... + * + */ + + if (!yield) { + unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_YIELD); + return aux_work & ~ERTS_SSI_AUX_WORK_YIELD; + } + + return aux_work; +} + + #if HAVE_ERTS_MSEG static ERTS_INLINE erts_aint32_t @@ -2073,11 +2545,33 @@ handle_mseg_cache_check(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiti #endif + +static ERTS_INLINE erts_aint32_t +handle_pending_exiters(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) +{ + ErtsProcList *pnd_xtrs; + ErtsRunQueue *rq; + + rq = awdp->esdp->run_queue; + unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_PENDING_EXITERS); + + erts_runq_lock(rq); + pnd_xtrs = rq->procs.pending_exiters; + rq->procs.pending_exiters = NULL; + erts_runq_unlock(rq); + + if (erts_proclist_fetch(&pnd_xtrs, NULL)) + do_handle_pending_exiters(pnd_xtrs); + + return aux_work & ~ERTS_SSI_AUX_WORK_PENDING_EXITERS; +} + + static ERTS_INLINE erts_aint32_t handle_setup_aux_work_timer(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) { unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_SET_TMO); - setup_aux_work_timer(); + setup_aux_work_timer(awdp->esdp); return aux_work & ~ERTS_SSI_AUX_WORK_SET_TMO; } @@ -2092,16 +2586,18 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); \ if (!(aux_work & ~ignore)) { \ ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); \ + ERTS_MSACC_UPDATE_CACHE(); \ + ERTS_MSACC_POP_STATE_M(); \ return aux_work; \ } \ } erts_aint32_t aux_work = orig_aux_work; erts_aint32_t ignore = 0; + ERTS_MSACC_PUSH_AND_SET_STATE_M(ERTS_MSACC_STATE_AUX); -#ifdef ERTS_SMP + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); haw_thr_prgr_current_reset(awdp); -#endif ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); ASSERT(aux_work); @@ -2120,7 +2616,6 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) * Keep ERTS_SSI_AUX_WORK flags in expected frequency order relative * eachother. Most frequent first. */ -#ifdef ERTS_SMP HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DELAYED_AW_WAKEUP, handle_delayed_aux_work_wakeup); HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD, @@ -2128,37 +2623,33 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) /* DD must be before DD_THR_PRGR */ HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DD_THR_PRGR, handle_delayed_dealloc_thr_prgr); -#endif HANDLE_AUX_WORK((ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC), handle_fix_alloc); -#ifdef ERTS_SMP HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_THR_PRGR_LATER_OP, handle_thr_prgr_later_op); -#endif + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_CNCLD_TMRS, + handle_canceled_timers); + /* CNCLD_TMRS must be before CNCLD_TMRS_THR_PRGR */ + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_CNCLD_TMRS_THR_PRGR, + handle_canceled_timers_thr_prgr); -#if ERTS_USE_ASYNC_READY_Q HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_ASYNC_READY, handle_async_ready); /* ASYNC_READY must be before ASYNC_READY_CLEAN */ HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_ASYNC_READY_CLEAN, handle_async_ready_clean); -#endif -#ifdef ERTS_SMP HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_MISC_THR_PRGR, handle_misc_aux_work_thr_prgr); -#endif /* MISC_THR_PRGR must be before MISC */ HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_MISC, handle_misc_aux_work); -#ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN - HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_CHECK_CHILDREN, - handle_check_children); -#endif + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_PENDING_EXITERS, + handle_pending_exiters); HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_SET_TMO, handle_setup_aux_work_timer); @@ -2168,16 +2659,27 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) handle_mseg_cache_check); #endif + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_YIELD, + handle_yield); + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_REAP_PORTS, handle_reap_ports); + /* + * ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED *need* to be + * the last flag checked! + */ + + HANDLE_AUX_WORK(ERTS_SSI_AUX_WORK_DEBUG_WAIT_COMPLETED, + handle_debug_wait_completed); + ERTS_DBG_CHK_AUX_WORK_VAL(aux_work); -#ifdef ERTS_SMP if (waiting && !aux_work) haw_thr_prgr_current_check_progress(awdp); -#endif + ERTS_MSACC_UPDATE_CACHE(); + ERTS_MSACC_POP_STATE_M(); return aux_work; #undef HANDLE_AUX_WORK @@ -2186,17 +2688,42 @@ handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t orig_aux_work, int waiting) typedef struct { union { - ErlTimer data; - char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErlTimer))]; + ErtsTWheelTimer data; + char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsTWheelTimer))]; } timer; int initialized; erts_atomic32_t refc; +#ifdef DEBUG + erts_atomic32_t used; +#endif erts_atomic32_t type[1]; } ErtsAuxWorkTmo; static ErtsAuxWorkTmo *aux_work_tmo; +static ERTS_INLINE void +start_aux_work_timer(ErtsSchedulerData *esdp) +{ + ErtsMonotonicTime tmo = erts_get_monotonic_time(esdp); +#ifdef DEBUG + Uint no = (Uint) erts_atomic32_xchg_mb(&aux_work_tmo->used, + (erts_aint32_t) esdp->no); + ASSERT(esdp->type == ERTS_SCHED_NORMAL); + ASSERT(!no); +#endif + + tmo = ERTS_MONOTONIC_TO_CLKTCKS(tmo-1); + tmo += ERTS_MSEC_TO_CLKTCKS(1000) + 1; + erts_twheel_init_timer(&aux_work_tmo->timer.data); + ASSERT(esdp); + erts_twheel_set_timer(esdp->timer_wheel, + &aux_work_tmo->timer.data, + aux_work_timeout, + (void *) esdp, + tmo); +} + static void aux_work_timeout_early_init(int no_schedulers) { @@ -2211,6 +2738,9 @@ aux_work_timeout_early_init(int no_schedulers) p = (UWord) malloc((sizeof(ErtsAuxWorkTmo) + sizeof(erts_atomic32_t)*(no_schedulers+1)) + ERTS_CACHE_LINE_SIZE-1); + if (!p) { + ERTS_INTERNAL_ERROR("malloc failed to allocate memory!"); + } if (p & ERTS_CACHE_LINE_MASK) p = (p & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE; ASSERT((p & ERTS_CACHE_LINE_MASK) == 0); @@ -2218,35 +2748,35 @@ aux_work_timeout_early_init(int no_schedulers) aux_work_tmo = (ErtsAuxWorkTmo *) p; aux_work_tmo->initialized = 0; erts_atomic32_init_nob(&aux_work_tmo->refc, 0); +#ifdef DEBUG + erts_atomic32_init_nob(&aux_work_tmo->used, 0); +#endif for (i = 0; i <= no_schedulers; i++) erts_atomic32_init_nob(&aux_work_tmo->type[i], 0); } void -aux_work_timeout_late_init(void) +erts_aux_work_timeout_late_init(ErtsSchedulerData *esdp) { aux_work_tmo->initialized = 1; - if (erts_atomic32_read_nob(&aux_work_tmo->refc)) { - aux_work_tmo->timer.data.active = 0; - erts_set_timer(&aux_work_tmo->timer.data, - aux_work_timeout, - NULL, - NULL, - 1000); - } + if (erts_atomic32_read_acqb(&aux_work_tmo->refc)) + start_aux_work_timer(esdp); } static void -aux_work_timeout(void *unused) +aux_work_timeout(void *vesdp) { erts_aint32_t refc; int i; -#ifdef ERTS_SMP - i = 0; -#else - i = 1; +#ifdef DEBUG + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + Uint no = (Uint) erts_atomic32_xchg_mb(&aux_work_tmo->used, 0); + ASSERT(no == esdp->no); + ASSERT(esdp == (ErtsSchedulerData *) vesdp); #endif + i = 0; + for (; i <= erts_no_schedulers; i++) { erts_aint32_t type; type = erts_atomic32_read_acqb(&aux_work_tmo->type[i]); @@ -2260,32 +2790,18 @@ aux_work_timeout(void *unused) if (refc != 1 || 1 != erts_atomic32_cmpxchg_relb(&aux_work_tmo->refc, 0, 1)) { /* Setup next timeout... */ - aux_work_tmo->timer.data.active = 0; - erts_set_timer(&aux_work_tmo->timer.data, - aux_work_timeout, - NULL, - NULL, - 1000); + start_aux_work_timer((ErtsSchedulerData *) vesdp); } } static void -setup_aux_work_timer(void) +setup_aux_work_timer(ErtsSchedulerData *esdp) { -#ifndef ERTS_SMP - if (!erts_get_scheduler_data()) + if (!esdp || !esdp->timer_wheel) set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(0), ERTS_SSI_AUX_WORK_SET_TMO); else -#endif - { - aux_work_tmo->timer.data.active = 0; - erts_set_timer(&aux_work_tmo->timer.data, - aux_work_timeout, - NULL, - NULL, - 1000); - } + start_aux_work_timer(esdp); } erts_aint32_t @@ -2293,9 +2809,6 @@ erts_set_aux_work_timeout(int ix, erts_aint32_t type, int enable) { erts_aint32_t old, refc; -#ifndef ERTS_SMP - ix = 1; -#endif ERTS_DBG_CHK_AUX_WORK_VAL(type); ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&aux_work_tmo->type[ix])); @@ -2316,112 +2829,26 @@ erts_set_aux_work_timeout(int ix, erts_aint32_t type, int enable) if (refc == 1) { erts_atomic32_inc_acqb(&aux_work_tmo->refc); if (aux_work_tmo->initialized) - setup_aux_work_timer(); + setup_aux_work_timer(erts_get_scheduler_data()); } } return old; } - - -static ERTS_INLINE void -sched_waiting_sys(Uint no, ErtsRunQueue *rq) -{ - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); - ASSERT(rq->waiting >= 0); - (void) ERTS_RUNQ_FLGS_SET(rq, (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); - rq->waiting++; - rq->waiting *= -1; - rq->woken = 0; - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(no), am_inactive); -} - -static ERTS_INLINE void -sched_active_sys(Uint no, ErtsRunQueue *rq) -{ - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); -#endif - ASSERT(rq->waiting < 0); - rq->waiting *= -1; - rq->waiting--; - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(no), am_active); -} - Uint erts_active_schedulers(void) { Uint as = erts_no_schedulers; - ERTS_ATOMIC_FOREACH_RUNQ(rq, as -= abs(rq->waiting)); + ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(rq, as -= abs(rq->waiting)); - ASSERT(as >= 0); return as; } -#ifdef ERTS_SMP - -static ERTS_INLINE void -clear_sys_scheduling(void) -{ - erts_smp_atomic32_set_mb(&doing_sys_schedule, 0); -} - -static ERTS_INLINE int -try_set_sys_scheduling(void) -{ - return 0 == erts_smp_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0); -} - -#endif - -static ERTS_INLINE int -prepare_for_sys_schedule(ErtsSchedulerData *esdp) -{ -#ifdef ERTS_SMP - while (!erts_port_task_have_outstanding_io_tasks() - && try_set_sys_scheduling()) { -#ifdef ERTS_SCHED_ONLY_POLL_SCHED_1 - if (esdp->no != 1) { - /* If we are not scheduler 1 and ERTS_SCHED_ONLY_POLL_SCHED_1 is used - then we make sure to wake scheduler 1 */ - ErtsRunQueue *rq = ERTS_RUNQ_IX(0); - clear_sys_scheduling(); - wake_scheduler(rq); - return 0; - } -#endif - if (!erts_port_task_have_outstanding_io_tasks()) - return 1; - clear_sys_scheduling(); - } - return 0; -#else - return !erts_port_task_have_outstanding_io_tasks(); -#endif -} - -#ifdef ERTS_SMP - -static ERTS_INLINE void -sched_change_waiting_sys_to_waiting(Uint no, ErtsRunQueue *rq) -{ - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); -#endif - ASSERT(rq->waiting < 0); - rq->waiting *= -1; -} - static ERTS_INLINE void sched_waiting(Uint no, ErtsRunQueue *rq) { - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); (void) ERTS_RUNQ_FLGS_SET(rq, (ERTS_RUNQ_FLG_OUT_OF_WORK | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); if (rq->waiting < 0) @@ -2436,7 +2863,7 @@ sched_waiting(Uint no, ErtsRunQueue *rq) static ERTS_INLINE void sched_active(Uint no, ErtsRunQueue *rq) { - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); if (rq->waiting < 0) rq->waiting++; else @@ -2445,19 +2872,12 @@ sched_active(Uint no, ErtsRunQueue *rq) profile_scheduler(make_small(no), am_active); } -static int ERTS_INLINE -ongoing_multi_scheduling_block(void) -{ - ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&schdlr_sspnd.mtx)); - return schdlr_sspnd.msb.ongoing; -} - static ERTS_INLINE void empty_runq_aux(ErtsRunQueue *rq, Uint32 old_flags) { if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && old_flags & ERTS_RUNQ_FLG_NONEMPTY) { #ifdef DEBUG - erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues); + erts_aint32_t empty = erts_atomic32_read_nob(&no_empty_run_queues); /* * For a short period of time no_empty_run_queues may have * been increased twice for a specific run queue. @@ -2465,9 +2885,9 @@ empty_runq_aux(ErtsRunQueue *rq, Uint32 old_flags) ASSERT(0 <= empty && empty < 2*erts_no_run_queues); #endif if (!erts_runq_supervision_interval) - erts_smp_atomic32_inc_relb(&no_empty_run_queues); + erts_atomic32_inc_relb(&no_empty_run_queues); else { - erts_smp_atomic32_inc_mb(&no_empty_run_queues); + erts_atomic32_inc_mb(&no_empty_run_queues); if (erts_atomic_read_nob(&runq_supervisor_sleeping)) ethr_event_set(&runq_supervision_event); } @@ -2497,7 +2917,7 @@ non_empty_runq(ErtsRunQueue *rq) Uint32 old_flags = ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_NONEMPTY); if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && (!(old_flags & ERTS_RUNQ_FLG_NONEMPTY))) { #ifdef DEBUG - erts_aint32_t empty = erts_smp_atomic32_read_nob(&no_empty_run_queues); + erts_aint32_t empty = erts_atomic32_read_nob(&no_empty_run_queues); /* * For a short period of time no_empty_run_queues may have * been increased twice for a specific run queue. @@ -2505,10 +2925,10 @@ non_empty_runq(ErtsRunQueue *rq) ASSERT(0 < empty && empty <= 2*erts_no_run_queues); #endif if (!erts_runq_supervision_interval) - erts_smp_atomic32_dec_relb(&no_empty_run_queues); + erts_atomic32_dec_relb(&no_empty_run_queues); else { erts_aint32_t no; - no = erts_smp_atomic32_dec_read_mb(&no_empty_run_queues); + no = erts_atomic32_dec_read_mb(&no_empty_run_queues); if (no > 0 && erts_atomic_read_nob(&runq_supervisor_sleeping)) ethr_event_set(&runq_supervision_event); } @@ -2531,12 +2951,13 @@ static erts_aint32_t sched_prep_spin_wait(ErtsSchedulerSleepInfo *ssi) { erts_aint32_t oflgs; - erts_aint32_t nflgs = (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_WAITING); + erts_aint32_t nflgs; erts_aint32_t xflgs = 0; do { - oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + nflgs = (xflgs & ERTS_SSI_FLG_MSB_EXEC); + nflgs |= ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING; + oflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return nflgs; xflgs = oflgs; @@ -2553,11 +2974,11 @@ sched_prep_cont_spin_wait(ErtsSchedulerSleepInfo *ssi) erts_aint32_t xflgs = ERTS_SSI_FLG_WAITING; do { - oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + oflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return nflgs; xflgs = oflgs; - nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED; + nflgs |= oflgs & (ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC); } while (oflgs & ERTS_SSI_FLG_WAITING); return oflgs; } @@ -2570,7 +2991,7 @@ sched_spin_wait(ErtsSchedulerSleepInfo *ssi, int spincount) erts_aint32_t flgs; do { - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); + flgs = erts_atomic32_read_acqb(&ssi->flags); if ((flgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) != (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) { break; @@ -2595,11 +3016,11 @@ sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type) erts_tse_reset(ssi->event); else { ASSERT(sleep_type == ERTS_SSI_FLG_POLL_SLEEPING); - erts_sys_schedule_interrupt(0); + erts_check_io_interrupt(ssi->psi, 0); } while (1) { - oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + oflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return nflgs; if ((oflgs & (ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING)) @@ -2607,7 +3028,7 @@ sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type) return oflgs; } xflgs = oflgs; - nflgs |= oflgs & ERTS_SSI_FLG_SUSPENDED; + nflgs |= oflgs & (ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC); } } @@ -2626,7 +3047,7 @@ static void thr_prgr_prep_wait(void *vssi) { ErtsSchedulerSleepInfo *ssi = (ErtsSchedulerSleepInfo *) vssi; - erts_smp_atomic32_read_bor_acqb(&ssi->flags, + erts_atomic32_read_bor_acqb(&ssi->flags, ERTS_SSI_FLG_SLEEPING); } @@ -2641,7 +3062,7 @@ thr_prgr_wait(void *vssi) while (1) { erts_aint32_t aflgs, nflgs; nflgs = xflgs | ERTS_SSI_FLG_TSE_SLEEPING; - aflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + aflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (aflgs == xflgs) { erts_tse_wait(ssi->event); break; @@ -2656,13 +3077,19 @@ static void thr_prgr_fin_wait(void *vssi) { ErtsSchedulerSleepInfo *ssi = (ErtsSchedulerSleepInfo *) vssi; - erts_smp_atomic32_read_band_nob(&ssi->flags, + erts_atomic32_read_band_nob(&ssi->flags, ~(ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_TSE_SLEEPING)); } static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp); +void +erts_aux_thread_poke() +{ + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(-1)); +} + static void * aux_thread(void *unused) { @@ -2671,9 +3098,20 @@ aux_thread(void *unused) erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; int thr_prgr_active = 1; + ERTS_MSACC_DECLARE_CACHE(); + +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[] = "aux_thread"; + erts_lc_set_thread_name(buf); + } +#endif + erts_port_task_pre_alloc_init_thread(); ssi->event = erts_tse_fetch(); + erts_msacc_init_thread("aux", 1, 1); + callbacks.arg = (void *) ssi; callbacks.wakeup = thr_prgr_wakeup; callbacks.prepare_wait = thr_prgr_prep_wait; @@ -2684,8 +3122,14 @@ aux_thread(void *unused) init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; +#if ERTS_POLL_USE_FALLBACK + ssi->psi = erts_create_pollset_thread(-1); +#endif + sched_prep_spin_wait(ssi); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + while (1) { erts_aint32_t flgs; @@ -2694,32 +3138,54 @@ aux_thread(void *unused) if (!thr_prgr_active) erts_thr_progress_active(NULL, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); if (aux_work && erts_thr_progress_update(NULL)) erts_thr_progress_leader_update(NULL); } if (!aux_work) { + +#ifdef ERTS_BREAK_REQUESTED + if (ERTS_BREAK_REQUESTED) + erts_do_break_handling(); +#endif + if (thr_prgr_active) erts_thr_progress_active(NULL, thr_prgr_active = 0); - erts_thr_progress_prepare_wait(NULL); - ERTS_SCHED_FAIR_YIELD(); +#if ERTS_POLL_USE_FALLBACK flgs = sched_spin_wait(ssi, 0); if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi); + } + } +#else + erts_thr_progress_prepare_wait(NULL); + + flgs = sched_spin_wait(ssi, 0); + + if (flgs & ERTS_SSI_FLG_SLEEPING) { flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); if (flgs & ERTS_SSI_FLG_SLEEPING) { - int res; + int res; ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); } - } - erts_thr_progress_finalize_wait(NULL); + } + erts_thr_progress_finalize_wait(NULL); +#endif } flgs = sched_prep_spin_wait(ssi); @@ -2727,7 +3193,79 @@ aux_thread(void *unused) return NULL; } -#endif /* ERTS_SMP */ +static void * +poll_thread(void *arg) +{ + int id = (int)(UWord)arg; + ErtsAuxWorkData *awdp = poll_thread_aux_work_data+id; + ErtsSchedulerSleepInfo *ssi = ERTS_POLL_THREAD_SLEEP_INFO_IX(id); + erts_aint32_t aux_work; + ErtsThrPrgrCallbacks callbacks; + int thr_prgr_active = 1; + struct erts_poll_thread *psi = erts_create_pollset_thread(id); + ERTS_MSACC_DECLARE_CACHE(); + +#ifdef ERTS_ENABLE_LOCK_CHECK + { + char buf[] = "poll_thread"; + erts_lc_set_thread_name(buf); + } +#endif + + erts_port_task_pre_alloc_init_thread(); + ssi->event = erts_tse_fetch(); + + erts_msacc_init_thread("poll", id, 0); + + callbacks.arg = (void *) ssi; + callbacks.wakeup = thr_prgr_wakeup; + callbacks.prepare_wait = thr_prgr_prep_wait; + callbacks.wait = thr_prgr_wait; + callbacks.finalize_wait = thr_prgr_fin_wait; + + erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + init_aux_work_data(awdp, NULL, NULL); + awdp->ssi = ssi; + ssi->psi = psi; + + sched_prep_spin_wait(ssi); + + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + + while (1) { + erts_aint32_t flgs; + + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + if (aux_work) { + if (!thr_prgr_active) + erts_thr_progress_active(NULL, thr_prgr_active = 1); + aux_work = handle_aux_work(awdp, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); + if (aux_work && erts_thr_progress_update(NULL)) + erts_thr_progress_leader_update(NULL); + } + + if (!aux_work) { + if (thr_prgr_active) + erts_thr_progress_active(NULL, thr_prgr_active = 0); + + flgs = sched_spin_wait(ssi, 0); + + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(psi); + } + } + } + + flgs = sched_prep_spin_wait(ssi); + } + return NULL; +} static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) @@ -2736,314 +3274,168 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) ErtsSchedulerSleepInfo *ssi = esdp->ssi; int spincount; erts_aint32_t aux_work = 0; -#ifdef ERTS_SMP int thr_prgr_active = 1; erts_aint32_t flgs; + ERTS_MSACC_PUSH_STATE(); - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); -#ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - erts_smp_spin_lock(&rq->sleepers.lock); -#endif + erts_spin_lock(&rq->sleepers.lock); flgs = sched_prep_spin_wait(ssi); if (flgs & ERTS_SSI_FLG_SUSPENDED) { /* Go suspend instead... */ -#ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - erts_smp_spin_unlock(&rq->sleepers.lock); -#endif + erts_spin_unlock(&rq->sleepers.lock); return; } -#ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { ssi->prev = NULL; ssi->next = rq->sleepers.list; if (rq->sleepers.list) rq->sleepers.list->prev = ssi; rq->sleepers.list = ssi; - erts_smp_spin_unlock(&rq->sleepers.lock); + erts_spin_unlock(&rq->sleepers.lock); + dirty_active(esdp, -1); } -#endif - - /* - * If all schedulers are waiting, one of them *should* - * be waiting in erl_sys_schedule() - */ - - if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule(esdp)) { - - sched_waiting(esdp->no, rq); - - erts_smp_runq_unlock(rq); - - spincount = sched_busy_wait.tse; - - tse_wait: - - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && thr_prgr_active != working) - sched_wall_time_change(esdp, thr_prgr_active); - - while (1) { - - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); - if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp) - && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); - } - if (aux_work) - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); - else { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - } + sched_waiting(esdp->no, rq); - ERTS_SCHED_FAIR_YIELD(); + erts_runq_unlock(rq); - flgs = sched_spin_wait(ssi, spincount); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); - if (flgs & ERTS_SSI_FLG_SLEEPING) { - int res; - ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); - } - } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - erts_thr_progress_finalize_wait(esdp); - } + spincount = sched_busy_wait.tse; - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - break; - } + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_sched_wall_time_change(esdp, working = 0); + else if (thr_prgr_active != working) + sched_wall_time_change(esdp, working = thr_prgr_active); - flgs = sched_prep_cont_spin_wait(ssi); - spincount = sched_busy_wait.aux_work; + while (1) { + ErtsMonotonicTime current_time = 0; - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - break; - } + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); + ERTS_MSACC_UPDATE_CACHE(); + if (aux_work && erts_thr_progress_update(esdp)) + erts_thr_progress_leader_update(esdp); + } - } + if (aux_work) { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + flgs = erts_atomic32_read_acqb(&ssi->flags); + current_time = erts_get_monotonic_time(esdp); + if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + erts_bump_timers(esdp->timer_wheel, current_time); + } + } + } + else { + ErtsMonotonicTime timeout_time; + int do_timeout = 0; + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + timeout_time = erts_check_next_timeout_time(esdp); + current_time = erts_get_monotonic_time(esdp); + do_timeout = (current_time >= timeout_time); + } else { + current_time = 0; + timeout_time = ERTS_MONOTONIC_TIME_MAX; + } + if (do_timeout) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + } + else { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(esdp); + } + + flgs = sched_spin_wait(ssi, spincount); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + int res; + ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : + erts_get_monotonic_time(esdp); + do { + Sint64 timeout; + if (current_time >= timeout_time) + break; + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + timeout = ERTS_MONOTONIC_TO_NSEC(timeout_time + - current_time + - 1) + 1; + } else + timeout = -1; + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + res = erts_tse_twait(ssi->event, timeout); + ERTS_MSACC_POP_STATE(); + current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : + erts_get_monotonic_time(esdp); + } while (res == EINTR); + } + } + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) + erts_thr_progress_finalize_wait(esdp); + } + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) + erts_bump_timers(esdp->timer_wheel, current_time); + } - if (flgs & ~ERTS_SSI_FLG_SUSPENDED) - erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; + } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } + flgs = sched_prep_cont_spin_wait(ssi); + spincount = sched_busy_wait.aux_work; - erts_smp_runq_lock(rq); - sched_active(esdp->no, rq); + if (!(flgs & ERTS_SSI_FLG_WAITING)) { + ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); + break; + } } - else -#endif - { - erts_aint_t dt; - - erts_smp_atomic32_set_relb(&function_calls, 0); - *fcalls = 0; - -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); -#endif - -#ifdef ERTS_SCHED_ONLY_POLL_SCHED_1 - ASSERT(esdp->no == 1); -#endif - sched_waiting_sys(esdp->no, rq); - - - erts_smp_runq_unlock(rq); - - ASSERT(working); - sched_wall_time_change(esdp, working = 0); - - spincount = sched_busy_wait.sys_schedule; - if (spincount == 0) - goto sys_aux_work; - - while (spincount-- > 0) { - - sys_poll_aux_work: - - if (working) - sched_wall_time_change(esdp, working = 0); - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - erl_sys_schedule(1); /* Might give us something to do */ + if (flgs & ~(ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) + erts_atomic32_read_band_nob(&ssi->flags, + (ERTS_SSI_FLG_SUSPENDED + | ERTS_SSI_FLG_MSB_EXEC)); - dt = erts_do_time_read_and_reset(); - if (dt) erts_bump_timer(dt); - - sys_aux_work: -#ifndef ERTS_SMP - erts_sys_schedule_interrupt(0); -#endif - - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work) { - if (!working) - sched_wall_time_change(esdp, working = 1); -#ifdef ERTS_SMP - if (!thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 1); -#endif - aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); -#ifdef ERTS_SMP - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); -#endif - } - -#ifndef ERTS_SMP - if (rq->len != 0 || rq->misc.start) - goto sys_woken; -#else - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_woken; - } - - /* - * If we got new I/O tasks we aren't allowed to - * call erl_sys_schedule() until it is handled. - */ - if (erts_port_task_have_outstanding_io_tasks()) { - clear_sys_scheduling(); - /* - * Got to check that we still got I/O tasks; otherwise - * we have to continue checking for I/O... - */ - if (!prepare_for_sys_schedule(esdp)) { - spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; - goto tse_wait; - } - } -#endif - } - - erts_smp_runq_lock(rq); - -#ifdef ERTS_SMP - /* - * If we got new I/O tasks we aren't allowed to - * sleep in erl_sys_schedule(). - */ - if (erts_port_task_have_outstanding_io_tasks()) { - clear_sys_scheduling(); - - /* - * Got to check that we still got I/O tasks; otherwise - * we have to wait in erl_sys_schedule() after all... - */ - if (!prepare_for_sys_schedule(esdp)) { - /* - * Not allowed to wait in erl_sys_schedule; - * do tse wait instead... - */ - sched_change_waiting_sys_to_waiting(esdp->no, rq); - erts_smp_runq_unlock(rq); - spincount = 0; - goto tse_wait; - } - } -#endif - if (aux_work) { - erts_smp_runq_unlock(rq); - goto sys_poll_aux_work; - } -#ifdef ERTS_SMP - flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); - if (!(flgs & ERTS_SSI_FLG_SLEEPING)) { - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_locked_woken; - } - erts_smp_runq_unlock(rq); - flgs = sched_prep_cont_spin_wait(ssi); - if (!(flgs & ERTS_SSI_FLG_WAITING)) { - ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING)); - goto sys_woken; - } - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - goto sys_poll_aux_work; - } - - ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); - ASSERT(flgs & ERTS_SSI_FLG_WAITING); -#endif - - erts_smp_runq_unlock(rq); - - if (working) - sched_wall_time_change(esdp, working = 0); - -#ifdef ERTS_SMP - if (thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 0); -#endif - - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - - erl_sys_schedule(0); + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_sched_wall_time_change(esdp, working = 1); + else if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } - dt = erts_do_time_read_and_reset(); - if (dt) erts_bump_timer(dt); + erts_runq_lock(rq); + sched_active(esdp->no, rq); -#ifndef ERTS_SMP - if (rq->len == 0 && !rq->misc.start) - goto sys_aux_work; - sys_woken: -#else - flgs = sched_prep_cont_spin_wait(ssi); - if (flgs & ERTS_SSI_FLG_WAITING) - goto sys_aux_work; - - sys_woken: - if (!thr_prgr_active) - erts_thr_progress_active(esdp, thr_prgr_active = 1); - erts_smp_runq_lock(rq); - sys_locked_woken: - if (!thr_prgr_active) { - erts_smp_runq_unlock(rq); - erts_thr_progress_active(esdp, thr_prgr_active = 1); - erts_smp_runq_lock(rq); - } - clear_sys_scheduling(); - if (flgs & ~ERTS_SSI_FLG_SUSPENDED) - erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); -#endif - if (!working) - sched_wall_time_change(esdp, working = 1); - sched_active_sys(esdp->no, rq); - } + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_active(esdp, 1); - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); } -#ifdef ERTS_SMP static ERTS_INLINE erts_aint32_t ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi) @@ -3053,20 +3445,55 @@ ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi) erts_aint32_t nflgs = 0; erts_aint32_t xflgs = ERTS_SSI_FLG_SLEEPING|ERTS_SSI_FLG_WAITING; while (1) { - oflgs = erts_smp_atomic32_cmpxchg_relb(&ssi->flags, nflgs, xflgs); + oflgs = erts_atomic32_cmpxchg_relb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return oflgs; - nflgs = oflgs & ERTS_SSI_FLG_SUSPENDED; + nflgs = oflgs & (ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC); xflgs = oflgs; } } +static ERTS_INLINE void +ssi_wake(ErtsSchedulerSleepInfo *ssi) +{ + erts_sched_finish_poke(ssi, ssi_flags_set_wake(ssi)); +} + + static void -wake_scheduler(ErtsRunQueue *rq) +dcpu_sched_ix_suspend_wake(Uint ix) { - ErtsSchedulerSleepInfo *ssi; - erts_aint32_t flgs; + ErtsSchedulerSleepInfo* ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_atomic32_read_bor_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + ssi_wake(ssi); +} + +static void +dio_sched_ix_suspend_wake(Uint ix) +{ + ErtsSchedulerSleepInfo* ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + erts_atomic32_read_bor_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + ssi_wake(ssi); +} +static void +dcpu_sched_ix_wake(Uint ix) +{ + ssi_wake(ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix)); +} + +#if 0 +static void +dio_sched_ix_wake(Uint ix) +{ + ssi_wake(ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix)); +} +#endif + + +static void +wake_scheduler(ErtsRunQueue *rq) +{ /* * The unlocked run queue is not strictly necessary * from a thread safety or deadlock prevention @@ -3075,15 +3502,12 @@ wake_scheduler(ErtsRunQueue *rq) * so all code *should* handle this without having * the lock on the run queue. */ - ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq)); - - ssi = rq->scheduler->ssi; + ERTS_LC_ASSERT(!erts_lc_runq_is_locked(rq) + || ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); - flgs = ssi_flags_set_wake(ssi); - erts_sched_finish_poke(ssi, flgs); + ssi_wake(rq->scheduler->ssi); } -#ifdef ERTS_DIRTY_SCHEDULERS static void wake_dirty_schedulers(ErtsRunQueue *rq, int one) { @@ -3093,10 +3517,10 @@ wake_dirty_schedulers(ErtsRunQueue *rq, int one) ASSERT(ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); sl = &rq->sleepers; - erts_smp_spin_lock(&sl->lock); + erts_spin_lock(&sl->lock); ssi = sl->list; if (!ssi) { - erts_smp_spin_unlock(&sl->lock); + erts_spin_unlock(&sl->lock); if (one) wake_scheduler(rq); } else if (one) { @@ -3110,14 +3534,14 @@ wake_dirty_schedulers(ErtsRunQueue *rq, int one) if (ssi->next) ssi->next->prev = ssi->prev; - erts_smp_spin_unlock(&sl->lock); + erts_spin_unlock(&sl->lock); ERTS_THR_MEMORY_BARRIER; flgs = ssi_flags_set_wake(ssi); erts_sched_finish_poke(ssi, flgs); } else { sl->list = NULL; - erts_smp_spin_unlock(&sl->lock); + erts_spin_unlock(&sl->lock); ERTS_THR_MEMORY_BARRIER; do { @@ -3127,10 +3551,16 @@ wake_dirty_schedulers(ErtsRunQueue *rq, int one) } while (ssi); } } -#endif + +static void +wake_dirty_scheduler(ErtsRunQueue *rq) +{ + wake_dirty_schedulers(rq, 1); +} + #define ERTS_NO_USED_RUNQS_SHIFT 16 -#define ERTS_NO_RUNQS_MASK 0xffff +#define ERTS_NO_RUNQS_MASK 0xffffU #if ERTS_MAX_NO_OF_SCHEDULERS > ERTS_NO_RUNQS_MASK # error "Too large amount of schedulers allowed" @@ -3141,13 +3571,13 @@ init_no_runqs(int active, int used) { erts_aint32_t no_runqs = (erts_aint32_t) (active & ERTS_NO_RUNQS_MASK); no_runqs |= (erts_aint32_t) ((used & ERTS_NO_RUNQS_MASK) << ERTS_NO_USED_RUNQS_SHIFT); - erts_smp_atomic32_init_nob(&balance_info.no_runqs, no_runqs); + erts_atomic32_init_nob(&balance_info.no_runqs, no_runqs); } static ERTS_INLINE void get_no_runqs(int *active, int *used) { - erts_aint32_t no_runqs = erts_smp_atomic32_read_nob(&balance_info.no_runqs); + erts_aint32_t no_runqs = erts_atomic32_read_nob(&balance_info.no_runqs); if (active) *active = (int) (no_runqs & ERTS_NO_RUNQS_MASK); if (used) @@ -3157,12 +3587,12 @@ get_no_runqs(int *active, int *used) static ERTS_INLINE void set_no_used_runqs(int used) { - erts_aint32_t exp = erts_smp_atomic32_read_nob(&balance_info.no_runqs); + erts_aint32_t exp = erts_atomic32_read_nob(&balance_info.no_runqs); while (1) { erts_aint32_t act, new; new = (used & ERTS_NO_RUNQS_MASK) << ERTS_NO_USED_RUNQS_SHIFT; new |= exp & ERTS_NO_RUNQS_MASK; - act = erts_smp_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); + act = erts_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); if (act == exp) break; exp = act; @@ -3172,14 +3602,14 @@ set_no_used_runqs(int used) static ERTS_INLINE void set_no_active_runqs(int active) { - erts_aint32_t exp = erts_smp_atomic32_read_nob(&balance_info.no_runqs); + erts_aint32_t exp = erts_atomic32_read_nob(&balance_info.no_runqs); while (1) { erts_aint32_t act, new; if ((exp & ERTS_NO_RUNQS_MASK) == active) break; new = exp & (ERTS_NO_RUNQS_MASK << ERTS_NO_USED_RUNQS_SHIFT); new |= active & ERTS_NO_RUNQS_MASK; - act = erts_smp_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); + act = erts_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); if (act == exp) break; exp = act; @@ -3189,14 +3619,14 @@ set_no_active_runqs(int active) static ERTS_INLINE int try_inc_no_active_runqs(int active) { - erts_aint32_t exp = erts_smp_atomic32_read_nob(&balance_info.no_runqs); + erts_aint32_t exp = erts_atomic32_read_nob(&balance_info.no_runqs); if (((exp >> ERTS_NO_USED_RUNQS_SHIFT) & ERTS_NO_RUNQS_MASK) < active) return 0; if ((exp & ERTS_NO_RUNQS_MASK) + 1 == active) { erts_aint32_t new, act; new = exp & (ERTS_NO_RUNQS_MASK << ERTS_NO_USED_RUNQS_SHIFT); new |= active & ERTS_NO_RUNQS_MASK; - act = erts_smp_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); + act = erts_atomic32_cmpxchg_nob(&balance_info.no_runqs, new, exp); if (act == exp) return 1; } @@ -3212,11 +3642,11 @@ chk_wake_sched(ErtsRunQueue *crq, int ix, int activate) return 0; wrq = ERTS_RUNQ_IX(ix); flags = ERTS_RUNQ_FLGS_GET(wrq); + if (activate && !(flags & ERTS_RUNQ_FLG_SUSPENDED)) { + if (try_inc_no_active_runqs(ix+1)) + (void) ERTS_RUNQ_FLGS_UNSET(wrq, ERTS_RUNQ_FLG_INACTIVE); + } if (!(flags & (ERTS_RUNQ_FLG_SUSPENDED|ERTS_RUNQ_FLG_NONEMPTY))) { - if (activate) { - if (try_inc_no_active_runqs(ix+1)) - (void) ERTS_RUNQ_FLGS_UNSET(wrq, ERTS_RUNQ_FLG_INACTIVE); - } wake_scheduler(wrq); return 1; } @@ -3258,25 +3688,20 @@ wake_scheduler_on_empty_runq(ErtsRunQueue *crq) } } -#endif /* ERTS_SMP */ static ERTS_INLINE void smp_notify_inc_runq(ErtsRunQueue *runq) { -#ifdef ERTS_SMP if (runq) { -#ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) - wake_dirty_schedulers(runq, 1); + wake_dirty_scheduler(runq); else -#endif wake_scheduler(runq); } -#endif } void -erts_smp_notify_inc_runq(ErtsRunQueue *runq) +erts_notify_inc_runq(ErtsRunQueue *runq) { smp_notify_inc_runq(runq); } @@ -3284,16 +3709,12 @@ erts_smp_notify_inc_runq(ErtsRunQueue *runq) void erts_sched_notify_check_cpu_bind(void) { -#ifdef ERTS_SMP int ix; for (ix = 0; ix < erts_no_run_queues; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); (void) ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_CHK_CPU_BIND); wake_scheduler(rq); } -#else - erts_sched_check_cpu_bind(erts_get_scheduler_data()); -#endif } @@ -3302,9 +3723,9 @@ enqueue_process(ErtsRunQueue *runq, int prio, Process *p) { ErtsRunPrioQueue *rpq; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); - erts_smp_inc_runq_len(runq, &runq->procs.prio_info[prio], prio); + erts_inc_runq_len(runq, &runq->procs.prio_info[prio], prio); if (prio == PRIORITY_LOW) { p->schedule_count = RESCHEDULE_LOW; @@ -3332,7 +3753,7 @@ unqueue_process(ErtsRunQueue *runq, Process *prev_proc, Process *proc) { - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); if (prev_proc) prev_proc->next = proc->next; @@ -3344,7 +3765,7 @@ unqueue_process(ErtsRunQueue *runq, if (!rpq->first) rpq->last = NULL; - erts_smp_dec_runq_len(runq, rqi, prio); + erts_dec_runq_len(runq, rqi, prio); } @@ -3357,7 +3778,7 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep) ErtsRunQueueInfo *rqi; Process *p; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); ASSERT(PRIORITY_NORMAL == prio_q || PRIORITY_HIGH == prio_q @@ -3368,9 +3789,9 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep) if (!p) return NULL; - ERTS_SMP_DATA_DEPENDENCY_READ_MEMORY_BARRIER; + ERTS_THR_DATA_DEPENDENCY_READ_MEMORY_BARRIER; - state = erts_smp_atomic32_read_nob(&p->state); + state = erts_atomic32_read_nob(&p->state); if (statep) *statep = state; @@ -3400,7 +3821,13 @@ check_requeue_process(ErtsRunQueue *rq, int prio_q) return 0; } -#ifdef ERTS_SMP +static ERTS_INLINE void +free_proxy_proc(Process *proxy) +{ + ASSERT(erts_atomic32_read_nob(&proxy->state) & ERTS_PSFLG_PROXY); + erts_free(ERTS_ALC_T_PROC, proxy); +} + static ErtsRunQueue * check_immigration_need(ErtsRunQueue *c_rq, ErtsMigrationPath *mp, int prio) @@ -3453,7 +3880,7 @@ static void immigrate(ErtsRunQueue *c_rq, ErtsMigrationPath *mp) { Uint32 iflags, iflag; - erts_smp_runq_unlock(c_rq); + erts_runq_unlock(c_rq); ASSERT(erts_thr_progress_is_managed_thread()); @@ -3482,7 +3909,7 @@ immigrate(ErtsRunQueue *c_rq, ErtsMigrationPath *mp) prio = ERTS_PORT_PRIO_LEVEL; break; default: - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "%s:%d:%s(): Invalid immigrate queue mask", __FILE__, __LINE__, __func__); prio = 0; @@ -3494,25 +3921,25 @@ immigrate(ErtsRunQueue *c_rq, ErtsMigrationPath *mp) rq = check_immigration_need(c_rq, mp, prio); if (rq) { - erts_smp_runq_lock(rq); + erts_runq_lock(rq); if (prio == ERTS_PORT_PRIO_LEVEL) { Port *prt; prt = erts_dequeue_port(rq); if (prt) RUNQ_SET_RQ(&prt->run_queue, c_rq); - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); if (prt) { /* port might terminate while we have no lock... */ rq = erts_port_runq(prt); if (rq) { if (rq != c_rq) - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "%s:%d:%s(): Internal error", __FILE__, __LINE__, __func__); erts_enqueue_port(c_rq, prt); if (!iflag) return; /* done */ - erts_smp_runq_unlock(c_rq); + erts_runq_unlock(c_rq); } } } @@ -3526,76 +3953,84 @@ immigrate(ErtsRunQueue *c_rq, ErtsMigrationPath *mp) while (proc) { erts_aint32_t state; - state = erts_smp_atomic32_read_acqb(&proc->state); + state = erts_atomic32_read_acqb(&proc->state); if (!(ERTS_PSFLG_BOUND & state) && (prio == (int) ERTS_PSFLGS_GET_PRQ_PRIO(state))) { ErtsRunQueueInfo *rqi = &rq->procs.prio_info[prio]; unqueue_process(rq, rpq, rqi, prio, prev_proc, proc); - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); RUNQ_SET_RQ(&proc->run_queue, c_rq); rq_locked = 0; - erts_smp_runq_lock(c_rq); + erts_runq_lock(c_rq); enqueue_process(c_rq, prio, proc); if (!iflag) return; /* done */ - erts_smp_runq_unlock(c_rq); + erts_runq_unlock(c_rq); break; } prev_proc = proc; proc = proc->next; } if (rq_locked) - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); } } } - erts_smp_runq_lock(c_rq); + erts_runq_lock(c_rq); } static ERTS_INLINE void suspend_run_queue(ErtsRunQueue *rq) { - erts_smp_atomic32_read_bor_nob(&rq->scheduler->ssi->flags, + erts_atomic32_read_bor_nob(&rq->scheduler->ssi->flags, ERTS_SSI_FLG_SUSPENDED); (void) ERTS_RUNQ_FLGS_SET(rq, ERTS_RUNQ_FLG_SUSPENDED); wake_scheduler(rq); } -static void scheduler_ix_resume_wake(Uint ix); -static void scheduler_ssi_resume_wake(ErtsSchedulerSleepInfo *ssi); +static void nrml_sched_ix_resume_wake(Uint ix); static ERTS_INLINE void resume_run_queue(ErtsRunQueue *rq) { int pix; + Uint32 oflgs; - erts_smp_runq_lock(rq); + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); + + erts_runq_lock(rq); + + oflgs = ERTS_RUNQ_FLGS_READ_BSET(rq, + (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK + | ERTS_RUNQ_FLG_SUSPENDED + | ERTS_RUNQ_FLG_MSB_EXEC), + (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); - (void) ERTS_RUNQ_FLGS_READ_BSET(rq, - (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK - | ERTS_RUNQ_FLG_SUSPENDED), - (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); + if (oflgs & (ERTS_RUNQ_FLG_SUSPENDED|ERTS_RUNQ_FLG_MSB_EXEC)) { + erts_aint32_t len; + + rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + len = erts_atomic32_read_dirty(&rq->procs.prio_info[pix].len); + rq->procs.prio_info[pix].max_len = len; + rq->procs.prio_info[pix].reds = 0; + } + len = erts_atomic32_read_dirty(&rq->ports.info.len); + rq->ports.info.max_len = len; + rq->ports.info.reds = 0; + len = erts_atomic32_read_dirty(&rq->len); + rq->max_len = len; - rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; - for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { - rq->procs.prio_info[pix].max_len = 0; - rq->procs.prio_info[pix].reds = 0; } - rq->ports.info.max_len = 0; - rq->ports.info.reds = 0; - rq->max_len = 0; - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) -#endif - scheduler_ix_resume_wake(rq->ix); + nrml_sched_ix_resume_wake(rq->ix); } typedef struct { @@ -3608,17 +4043,42 @@ schedule_bound_processes(ErtsRunQueue *rq, ErtsStuckBoundProcesses *sbpp) { Process *proc, *next; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); proc = sbpp->first; while (proc) { - erts_aint32_t state = erts_smp_atomic32_read_acqb(&proc->state); + erts_aint32_t state = erts_atomic32_read_acqb(&proc->state); next = proc->next; enqueue_process(rq, (int) ERTS_PSFLGS_GET_PRQ_PRIO(state), proc); proc = next; } } + +static ERTS_INLINE void +clear_proc_dirty_queue_bit(Process *p, ErtsRunQueue *rq, int prio_bit) +{ +#ifdef DEBUG + erts_aint32_t old; +#endif + erts_aint32_t qb = prio_bit; + if (rq == ERTS_DIRTY_CPU_RUNQ) + qb <<= ERTS_PDSFLGS_IN_CPU_PRQ_MASK_OFFSET; + else { + ASSERT(rq == ERTS_DIRTY_IO_RUNQ); + qb <<= ERTS_PDSFLGS_IN_IO_PRQ_MASK_OFFSET; + } +#ifdef DEBUG + old = (int) +#else + (void) +#endif + erts_atomic32_read_band_mb(&p->dirty_state, ~qb); + ASSERT(old & qb); +} + + + static void evacuate_run_queue(ErtsRunQueue *rq, ErtsStuckBoundProcesses *sbpp) @@ -3626,28 +4086,22 @@ evacuate_run_queue(ErtsRunQueue *rq, int prio_q; ErtsRunQueue *to_rq; ErtsMigrationPaths *mps; - ErtsMigrationPath *mp = NULL; + ErtsMigrationPath *mp; - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) -#endif - { - mps = erts_get_migration_paths_managed(); - mp = &mps->mpath[rq->ix]; - } + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); + + mps = erts_get_migration_paths_managed(); + mp = &mps->mpath[rq->ix]; /* Evacuate scheduled misc ops */ if (rq->misc.start) { ErtsMiscOpList *start, *end; -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); -#endif to_rq = mp->misc_evac_runq; if (!to_rq) return; @@ -3656,9 +4110,10 @@ evacuate_run_queue(ErtsRunQueue *rq, end = rq->misc.end; rq->misc.start = NULL; rq->misc.end = NULL; - erts_smp_runq_unlock(rq); + ERTS_RUNQ_FLGS_UNSET_NOB(rq, ERTS_RUNQ_FLG_MISC_OP); + erts_runq_unlock(rq); - erts_smp_runq_lock(to_rq); + erts_runq_lock(to_rq); if (to_rq->misc.end) to_rq->misc.end->next = start; else @@ -3668,17 +4123,14 @@ evacuate_run_queue(ErtsRunQueue *rq, non_empty_runq(to_rq); - erts_smp_runq_unlock(to_rq); + erts_runq_unlock(to_rq); smp_notify_inc_runq(to_rq); - erts_smp_runq_lock(to_rq); + erts_runq_lock(to_rq); } if (rq->ports.start) { Port *prt; -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); -#endif to_rq = mp->prio[ERTS_PORT_PRIO_LEVEL].runq; if (!to_rq) return; @@ -3689,7 +4141,7 @@ evacuate_run_queue(ErtsRunQueue *rq, ErtsRunQueue *prt_rq; prt = erts_dequeue_port(rq); RUNQ_SET_RQ(&prt->run_queue, to_rq); - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); /* * The port might terminate while * we have no lock on it... @@ -3697,13 +4149,13 @@ evacuate_run_queue(ErtsRunQueue *rq, prt_rq = erts_port_runq(prt); if (prt_rq) { if (prt_rq != to_rq) - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "%s:%d:%s() internal error\n", __FILE__, __LINE__, __func__); erts_enqueue_port(to_rq, prt); - erts_smp_runq_unlock(to_rq); + erts_runq_unlock(to_rq); } - erts_smp_runq_lock(rq); + erts_runq_lock(rq); prt = rq->ports.start; } smp_notify_inc_runq(to_rq); @@ -3714,27 +4166,68 @@ evacuate_run_queue(ErtsRunQueue *rq, erts_aint32_t state; Process *proc; int notify = 0; -#ifdef ERTS_DIRTY_SCHEDULERS - int requeue; -#endif to_rq = NULL; -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) -#endif - { - if (!mp->prio[prio_q].runq) - return; - if (prio_q == PRIORITY_NORMAL && !mp->prio[PRIORITY_LOW].runq) - return; - } + if (!mp->prio[prio_q].runq) + return; + if (prio_q == PRIORITY_NORMAL && !mp->prio[PRIORITY_LOW].runq) + return; proc = dequeue_process(rq, prio_q, &state); while (proc) { -#ifdef ERTS_DIRTY_SCHEDULERS - requeue = 1; + Process *real_proc; + int prio; + erts_aint32_t max_qbit, qbit, real_state; + + prio = ERTS_PSFLGS_GET_PRQ_PRIO(state); + qbit = ((erts_aint32_t) 1) << prio; + + if (!(state & ERTS_PSFLG_PROXY)) { + real_proc = proc; + real_state = state; + } + else { + real_proc = erts_proc_lookup_raw(proc->common.id); + if (!real_proc) { + free_proxy_proc(proc); + goto handle_next_proc; + } + real_state = erts_atomic32_read_acqb(&real_proc->state); + } + + max_qbit = (state >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET); + max_qbit &= ERTS_PSFLGS_QMASK; + max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; + max_qbit &= -max_qbit; + + if (qbit > max_qbit) { + /* Process already queued with higher prio; drop it... */ + if (real_proc != proc) + free_proxy_proc(proc); + else { + erts_aint32_t clr_bits; +#ifdef DEBUG + erts_aint32_t old; +#endif + + clr_bits = ERTS_PSFLG_IN_RUNQ; + clr_bits |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; + +#ifdef DEBUG + old = +#else + (void) #endif - if (ERTS_PSFLG_BOUND & state) { + erts_atomic32_read_band_mb(&proc->state, + ~clr_bits); + ASSERT((old & clr_bits) == clr_bits); + + } + + goto handle_next_proc; + } + + if (ERTS_PSFLG_BOUND & real_state) { /* Bound processes get stuck here... */ proc->next = NULL; if (sbpp->last) @@ -3742,52 +4235,23 @@ evacuate_run_queue(ErtsRunQueue *rq, else sbpp->first = proc; sbpp->last = proc; -#ifdef ERTS_DIRTY_SCHEDULERS - requeue = 0; -#endif - } -#ifdef ERTS_DIRTY_SCHEDULERS - else if (state & ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q) { - erts_aint32_t old; - old = erts_smp_atomic32_read_band_nob(&proc->state, - ~(ERTS_PSFLG_DIRTY_CPU_PROC - | ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q)); - /* assert that no other dirty flags are set */ - ASSERT(!(old & (ERTS_PSFLG_DIRTY_IO_PROC|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q))); - } else if (state & ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) { - erts_aint32_t old; - old = erts_smp_atomic32_read_band_nob(&proc->state, - ~(ERTS_PSFLG_DIRTY_IO_PROC - | ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)); - /* assert that no other dirty flags are set */ - ASSERT(!(old & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q))); } - if (requeue) { -#else else { -#endif int prio = (int) ERTS_PSFLGS_GET_PRQ_PRIO(state); - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - /* - * dirty run queues evacuate only to run - * queue 0 during multi-scheduling blocking - */ - to_rq = ERTS_RUNQ_IX(0); - else -#endif - to_rq = mp->prio[prio].runq; + to_rq = mp->prio[prio].runq; RUNQ_SET_RQ(&proc->run_queue, to_rq); - erts_smp_runq_lock(to_rq); + erts_runq_lock(to_rq); enqueue_process(to_rq, prio, proc); - erts_smp_runq_unlock(to_rq); + erts_runq_unlock(to_rq); notify = 1; - erts_smp_runq_lock(rq); + erts_runq_lock(rq); } + + handle_next_proc: proc = dequeue_process(rq, prio_q, &state); } if (notify) @@ -3803,15 +4267,15 @@ try_steal_task_from_victim(ErtsRunQueue *rq, int *rq_lockedp, ErtsRunQueue *vrq, ErtsRunPrioQueue *rpq; if (*rq_lockedp) { - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); *rq_lockedp = 0; } - ERTS_SMP_LC_ASSERT(!erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(!erts_lc_runq_is_locked(rq)); - erts_smp_runq_lock(vrq); + erts_runq_lock(vrq); - if (rq->halt_in_progress) + if (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_HALTING) goto no_procs; /* @@ -3845,16 +4309,16 @@ try_steal_task_from_victim(ErtsRunQueue *rq, int *rq_lockedp, ErtsRunQueue *vrq, proc = rpq->first; while (proc) { - erts_aint32_t state = erts_smp_atomic32_read_acqb(&proc->state); + erts_aint32_t state = erts_atomic32_read_acqb(&proc->state); if (!(ERTS_PSFLG_BOUND & state)) { /* Steal process */ int prio = (int) ERTS_PSFLGS_GET_PRQ_PRIO(state); ErtsRunQueueInfo *rqi = &vrq->procs.prio_info[prio]; unqueue_process(vrq, rpq, rqi, prio, prev_proc, proc); - erts_smp_runq_unlock(vrq); + erts_runq_unlock(vrq); RUNQ_SET_RQ(&proc->run_queue, rq); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); *rq_lockedp = 1; enqueue_process(rq, prio, proc); return !0; @@ -3868,7 +4332,7 @@ try_steal_task_from_victim(ErtsRunQueue *rq, int *rq_lockedp, ErtsRunQueue *vrq, no_procs: - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(vrq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(vrq)); /* * Check for a runnable port to steal... @@ -3878,7 +4342,7 @@ no_procs: ErtsRunQueue *prt_rq; Port *prt = erts_dequeue_port(vrq); RUNQ_SET_RQ(&prt->run_queue, rq); - erts_smp_runq_unlock(vrq); + erts_runq_unlock(vrq); /* * The port might terminate while @@ -3890,7 +4354,7 @@ no_procs: return 0; else { if (prt_rq != rq) - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "%s:%d:%s() internal error\n", __FILE__, __LINE__, __func__); *rq_lockedp = 1; @@ -3899,7 +4363,7 @@ no_procs: } } - erts_smp_runq_unlock(vrq); + erts_runq_unlock(vrq); return 0; } @@ -3910,8 +4374,7 @@ check_possible_steal_victim(ErtsRunQueue *rq, int *rq_lockedp, int vix) { ErtsRunQueue *vrq = ERTS_RUNQ_IX(vix); Uint32 flags = ERTS_RUNQ_FLGS_GET(vrq); - if ((flags & (ERTS_RUNQ_FLG_NONEMPTY - | ERTS_RUNQ_FLG_PROTECTED)) == ERTS_RUNQ_FLG_NONEMPTY) + if (runq_got_work_to_execute_flags(flags) & (!(flags & ERTS_RUNQ_FLG_PROTECTED))) return try_steal_task_from_victim(rq, rq_lockedp, vrq, flags); else return 0; @@ -3932,7 +4395,7 @@ try_steal_task(ErtsRunQueue *rq) res = 0; rq_locked = 1; - ERTS_SMP_LC_CHK_RUNQ_LOCK(rq, rq_locked); + ERTS_LC_CHK_RUNQ_LOCK(rq, rq_locked); get_no_runqs(&active_rqs, &blnc_rqs); @@ -3945,7 +4408,7 @@ try_steal_task(ErtsRunQueue *rq) if (active_rqs < blnc_rqs) { int no = blnc_rqs - active_rqs; int stop_ix = vix = active_rqs + rq->ix % no; - while (erts_smp_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) { + while (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) { res = check_possible_steal_victim(rq, &rq_locked, vix); if (res) goto done; @@ -3960,7 +4423,7 @@ try_steal_task(ErtsRunQueue *rq) vix = rq->ix; /* ... then try to steal a job from another active queue... */ - while (erts_smp_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) { + while (erts_atomic32_read_acqb(&no_empty_run_queues) < blnc_rqs) { vix++; if (vix >= active_rqs) vix = 0; @@ -3977,13 +4440,11 @@ try_steal_task(ErtsRunQueue *rq) done: if (!rq_locked) - erts_smp_runq_lock(rq); - - if (!res) - res = rq->halt_in_progress ? - !ERTS_EMPTY_RUNQ_PORTS(rq) : !ERTS_EMPTY_RUNQ(rq); + erts_runq_lock(rq); - return res; + if (res) + return res; + return runq_got_work_to_execute(rq); } /* Run queue balancing */ @@ -4105,7 +4566,7 @@ alloc_mpaths(void) { void *block; ErtsMigrationPaths *res; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&balance_info.update_mtx)); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&balance_info.update_mtx)); res = mpaths.freelist; if (res) { @@ -4128,7 +4589,7 @@ retire_mpaths(ErtsMigrationPaths *mps) { ErtsThrPrgrVal current; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&balance_info.update_mtx)); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&balance_info.update_mtx)); current = erts_thr_progress_current(); @@ -4174,7 +4635,7 @@ check_balance(ErtsRunQueue *c_rq) int sched_util_balancing; #endif - if (erts_smp_atomic32_xchg_nob(&balance_info.checking_balance, 1)) { + if (erts_atomic32_xchg_nob(&balance_info.checking_balance, 1)) { c_rq->check_balance_reds = INT_MAX; return; } @@ -4182,15 +4643,15 @@ check_balance(ErtsRunQueue *c_rq) get_no_runqs(NULL, &blnc_no_rqs); if (blnc_no_rqs == 1) { c_rq->check_balance_reds = INT_MAX; - erts_smp_atomic32_set_nob(&balance_info.checking_balance, 0); + erts_atomic32_set_nob(&balance_info.checking_balance, 0); return; } - erts_smp_runq_unlock(c_rq); + erts_runq_unlock(c_rq); if (balance_info.halftime) { balance_info.halftime = 0; - erts_smp_atomic32_set_nob(&balance_info.checking_balance, 0); + erts_atomic32_set_nob(&balance_info.checking_balance, 0); ERTS_FOREACH_RUNQ(rq, { if (rq->waiting) @@ -4200,7 +4661,7 @@ check_balance(ErtsRunQueue *c_rq) rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; }); - erts_smp_runq_lock(c_rq); + erts_runq_lock(c_rq); return; } @@ -4213,7 +4674,7 @@ check_balance(ErtsRunQueue *c_rq) * is manipulated. Such updates of the migration information * might clash with balancing. */ - erts_smp_mtx_lock(&balance_info.update_mtx); + erts_mtx_lock(&balance_info.update_mtx); forced = balance_info.forced_check_balance; balance_info.forced_check_balance = 0; @@ -4221,10 +4682,10 @@ check_balance(ErtsRunQueue *c_rq) get_no_runqs(¤t_active, &blnc_no_rqs); if (blnc_no_rqs == 1) { - erts_smp_mtx_unlock(&balance_info.update_mtx); - erts_smp_runq_lock(c_rq); + erts_mtx_unlock(&balance_info.update_mtx); + erts_runq_lock(c_rq); c_rq->check_balance_reds = INT_MAX; - erts_smp_atomic32_set_nob(&balance_info.checking_balance, 0); + erts_atomic32_set_nob(&balance_info.checking_balance, 0); return; } @@ -4240,7 +4701,7 @@ check_balance(ErtsRunQueue *c_rq) /* Read balance information for all run queues */ for (qix = 0; qix < blnc_no_rqs; qix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(qix); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); run_queue_info[qix].flags = ERTS_RUNQ_FLGS_GET_NOB(rq); for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { @@ -4268,7 +4729,7 @@ check_balance(ErtsRunQueue *c_rq) run_queue_info[qix].sched_util = erts_get_sched_util(rq, 1, 0); #endif - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); } full_scheds = 0; @@ -4350,7 +4811,7 @@ check_balance(ErtsRunQueue *c_rq) sched_util_balancing = 1; /* * In order to avoid renaming a large amount of fields - * we write utilization values instead of lenght values + * we write utilization values instead of length values * in the 'max_len' and 'migration_limit' fields... */ for (qix = 0; qix < blnc_no_rqs; qix++) { @@ -4707,7 +5168,7 @@ erts_fprintf(stderr, "--------------------------------\n"); Uint32 flags = run_queue_info[qix].flags; ErtsRunQueue *rq = ERTS_RUNQ_IX(qix); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK)); if (rq->waiting) flags |= ERTS_RUNQ_FLG_OUT_OF_WORK; @@ -4722,27 +5183,27 @@ erts_fprintf(stderr, "--------------------------------\n"); rq->out_of_work_count = 0; (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags); - rq->max_len = rq->len; + rq->max_len = erts_atomic32_read_dirty(&rq->len); for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { ErtsRunQueueInfo *rqi; rqi = (pix == ERTS_PORT_PRIO_LEVEL ? &rq->ports.info : &rq->procs.prio_info[pix]); - erts_smp_reset_max_len(rq, rqi); + erts_reset_max_len(rq, rqi); rqi->reds = 0; } rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); } - erts_smp_atomic32_set_nob(&balance_info.checking_balance, 0); + erts_atomic32_set_nob(&balance_info.checking_balance, 0); balance_info.n++; retire_mpaths(old_mpaths); - erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_mtx_unlock(&balance_info.update_mtx); - erts_smp_runq_lock(c_rq); + erts_runq_lock(c_rq); } static void @@ -4750,7 +5211,7 @@ change_no_used_runqs(int used) { ErtsMigrationPaths *new_mpaths, *old_mpaths; int qix; - erts_smp_mtx_lock(&balance_info.update_mtx); + erts_mtx_lock(&balance_info.update_mtx); set_no_used_runqs(used); old_mpaths = erts_get_migration_paths_managed(); @@ -4797,28 +5258,23 @@ change_no_used_runqs(int used) /* Make sure that we balance soon... */ balance_info.forced_check_balance = 1; - erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_mtx_unlock(&balance_info.update_mtx); - erts_smp_runq_lock(ERTS_RUNQ_IX(0)); + erts_runq_lock(ERTS_RUNQ_IX(0)); ERTS_RUNQ_IX(0)->check_balance_reds = 0; - erts_smp_runq_unlock(ERTS_RUNQ_IX(0)); + erts_runq_unlock(ERTS_RUNQ_IX(0)); } -#endif /* #ifdef ERTS_SMP */ Uint erts_debug_nbalance(void) { -#ifdef ERTS_SMP Uint n; - erts_smp_mtx_lock(&balance_info.update_mtx); + erts_mtx_lock(&balance_info.update_mtx); n = balance_info.n; - erts_smp_mtx_unlock(&balance_info.update_mtx); + erts_mtx_unlock(&balance_info.update_mtx); return n; -#else - return 0; -#endif } /* Wakeup other schedulers */ @@ -4864,7 +5320,6 @@ typedef enum { #define ERTS_WAKEUP_OTHER_DEC_LEGACY 10 #define ERTS_WAKEUP_OTHER_FIXED_INC_LEGACY (CONTEXT_REDS/10) -#ifdef ERTS_SMP static struct { ErtsSchedWakeupOtherThreshold threshold; @@ -4880,7 +5335,7 @@ wakeup_other_check(ErtsRunQueue *rq, Uint32 flags) { int wo_reds = rq->wakeup_other_reds; if (wo_reds) { - int left_len = rq->len - 1; + int left_len = erts_atomic32_read_dirty(&rq->len) - 1; if (left_len < 1) { int wo_reduce = wo_reds << wakeup_other.dec_shift; wo_reduce &= wakeup_other.dec_mask; @@ -4892,14 +5347,14 @@ wakeup_other_check(ErtsRunQueue *rq, Uint32 flags) rq->wakeup_other += (left_len*wo_reds + ERTS_WAKEUP_OTHER_FIXED_INC); if (rq->wakeup_other > wakeup_other.limit) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && rq->waiting) - wake_dirty_schedulers(rq, 1); - else -#endif + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + if (rq->waiting) { + wake_dirty_scheduler(rq); + } + } else { int empty_rqs = - erts_smp_atomic32_read_acqb(&no_empty_run_queues); + erts_atomic32_read_acqb(&no_empty_run_queues); if (flags & ERTS_RUNQ_FLG_PROTECTED) (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); if (empty_rqs != 0) @@ -4951,7 +5406,7 @@ wakeup_other_check_legacy(ErtsRunQueue *rq, Uint32 flags) { int wo_reds = rq->wakeup_other_reds; if (wo_reds) { - erts_aint32_t len = rq->len; + erts_aint32_t len = erts_atomic32_read_dirty(&rq->len); if (len < 2) { rq->wakeup_other -= ERTS_WAKEUP_OTHER_DEC_LEGACY*wo_reds; if (rq->wakeup_other < 0) @@ -4962,7 +5417,7 @@ wakeup_other_check_legacy(ErtsRunQueue *rq, Uint32 flags) else { if (flags & ERTS_RUNQ_FLG_PROTECTED) (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); - if (erts_smp_atomic32_read_acqb(&no_empty_run_queues) != 0) { + if (erts_atomic32_read_acqb(&no_empty_run_queues) != 0) { wake_scheduler_on_empty_runq(rq); rq->wakeup_other = 0; } @@ -5013,7 +5468,7 @@ static int no_runqs_to_supervise(void) { int used; - erts_aint32_t nerq = erts_smp_atomic32_read_acqb(&no_empty_run_queues); + erts_aint32_t nerq = erts_atomic32_read_acqb(&no_empty_run_queues); if (nerq <= 0) return 0; get_no_runqs(NULL, &used); @@ -5046,43 +5501,33 @@ runq_supervisor(void *unused) for (ix = 0; ix < no_rqs; ix++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); if (ERTS_RUNQ_FLGS_GET(rq) & ERTS_RUNQ_FLG_NONEMPTY) { - erts_smp_runq_lock(rq); - if (rq->len != 0) + erts_runq_lock(rq); + if (erts_atomic32_read_dirty(&rq->len) != 0) wake_scheduler_on_empty_runq(rq); /* forced wakeup... */ - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); } } } return NULL; } -#endif void erts_early_init_scheduling(int no_schedulers) { aux_work_timeout_early_init(no_schedulers); -#ifdef ERTS_SMP wakeup_other.threshold = ERTS_SCHED_WAKEUP_OTHER_THRESHOLD_MEDIUM; wakeup_other.type = ERTS_SCHED_WAKEUP_OTHER_TYPE_DEFAULT; -#endif -#ifndef ERTS_SCHED_MIN_SPIN sched_busy_wait.sys_schedule = ERTS_SCHED_SYS_SLEEP_SPINCOUNT_MEDIUM; sched_busy_wait.tse = (ERTS_SCHED_SYS_SLEEP_SPINCOUNT_MEDIUM * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT); sched_busy_wait.aux_work = (ERTS_SCHED_SYS_SLEEP_SPINCOUNT_MEDIUM * ERTS_SCHED_AUX_WORK_SLEEP_SPINCOUNT_FACT_MEDIUM); -#else - sched_busy_wait.sys_schedule = ERTS_SCHED_SYS_SLEEP_SPINCOUNT_NONE; - sched_busy_wait.tse = ERTS_SCHED_SYS_SLEEP_SPINCOUNT_NONE; - sched_busy_wait.aux_work = ERTS_SCHED_SYS_SLEEP_SPINCOUNT_NONE; -#endif } int erts_sched_set_wakeup_other_thresold(char *str) { -#ifdef ERTS_SMP ErtsSchedWakeupOtherThreshold threshold; if (sys_strcmp(str, "very_high") == 0) threshold = ERTS_SCHED_WAKEUP_OTHER_THRESHOLD_VERY_HIGH; @@ -5099,20 +5544,11 @@ erts_sched_set_wakeup_other_thresold(char *str) wakeup_other.threshold = threshold; set_wakeup_other_data(); return 0; -#else - if (sys_strcmp(str, "very_high") == 0 || sys_strcmp(str, "high") == 0 || - sys_strcmp(str, "medium") == 0 || sys_strcmp(str, "low") == 0 || - sys_strcmp(str, "very_low") == 0) { - return 0; - } - return EINVAL; -#endif } int erts_sched_set_wakeup_other_type(char *str) { -#ifdef ERTS_SMP ErtsSchedWakeupOtherType type; if (sys_strcmp(str, "default") == 0) type = ERTS_SCHED_WAKEUP_OTHER_TYPE_DEFAULT; @@ -5122,12 +5558,6 @@ erts_sched_set_wakeup_other_type(char *str) return EINVAL; wakeup_other.type = type; return 0; -#else - if (sys_strcmp(str, "default") == 0 || sys_strcmp(str, "legacy") == 0) { - return 0; - } - return EINVAL; -#endif } int @@ -5192,35 +5622,41 @@ erts_sched_set_wake_cleanup_threshold(char *str) static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) { - if (!esdp) - awdp->sched_id = 0; -#ifdef ERTS_DIRTY_SCHEDULERS - else if (ERTS_SCHEDULER_IS_DIRTY(esdp)) - awdp->sched_id = (int) ERTS_DIRTY_SCHEDULER_NO(esdp); -#endif - else - awdp->sched_id = (int) esdp->no; + int id = 0; + if (esdp) { + switch (esdp->type) { + case ERTS_SCHED_NORMAL: + id = (int) esdp->no; + break; + case ERTS_SCHED_DIRTY_CPU: + id = (int) erts_no_schedulers; + id += (int) esdp->dirty_no; + break; + case ERTS_SCHED_DIRTY_IO: + id = (int) erts_no_schedulers; + id += (int) erts_no_dirty_cpu_schedulers; + id += (int) esdp->dirty_no; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + break; + } + } + + awdp->sched_id = id; awdp->esdp = esdp; awdp->ssi = esdp ? esdp->ssi : NULL; -#ifdef ERTS_SMP awdp->latest_wakeup = ERTS_THR_PRGR_VAL_FIRST; awdp->misc.thr_prgr = ERTS_THR_PRGR_VAL_WAITING; awdp->dd.thr_prgr = ERTS_THR_PRGR_VAL_WAITING; - awdp->dd.completed_callback = NULL; - awdp->dd.completed_arg = NULL; + awdp->cncld_tmrs.thr_prgr = ERTS_THR_PRGR_VAL_WAITING; awdp->later_op.thr_prgr = ERTS_THR_PRGR_VAL_FIRST; awdp->later_op.size = 0; awdp->later_op.first = NULL; awdp->later_op.last = NULL; -#endif -#ifdef ERTS_USE_ASYNC_READY_Q -#ifdef ERTS_SMP awdp->async_ready.need_thr_prgr = 0; awdp->async_ready.thr_prgr = ERTS_THR_PRGR_VAL_WAITING; -#endif awdp->async_ready.queue = NULL; -#endif -#ifdef ERTS_SMP awdp->delayed_wakeup.next = ERTS_DELAYED_WAKEUP_INFINITY; if (!dawwp) { awdp->delayed_wakeup.job = NULL; @@ -5236,20 +5672,23 @@ init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) for (i = 0; i <= erts_no_schedulers; i++) awdp->delayed_wakeup.sched2jix[i] = -1; } -#endif + awdp->debug.wait_completed.flags = 0; + awdp->debug.wait_completed.callback = NULL; + awdp->debug.wait_completed.arg = NULL; } static void init_scheduler_data(ErtsSchedulerData* esdp, int num, ErtsSchedulerSleepInfo* ssi, ErtsRunQueue* runq, - char** daww_ptr, size_t daww_sz) + char** daww_ptr, size_t daww_sz, + Process *shadow_proc, + Uint64 time_stamp) { -#ifdef ERTS_SMP + esdp->timer_wheel = NULL; erts_bits_init_state(&esdp->erl_bits_state); esdp->match_pseudo_process = NULL; esdp->free_process = NULL; -#endif esdp->x_reg_array = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, ERTS_X_REGS_ALLOCATED * @@ -5257,21 +5696,41 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, esdp->f_reg_array = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_BEAM_REGISTER, MAX_REG * sizeof(FloatDef)); -#if !HEAP_ON_C_STACK - esdp->num_tmp_heap_used = 0; -#endif -#ifdef ERTS_DIRTY_SCHEDULERS + esdp->run_queue = runq; if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) { esdp->no = 0; - ERTS_DIRTY_SCHEDULER_NO(esdp) = (Uint) num; + if (runq == ERTS_DIRTY_CPU_RUNQ) + esdp->type = ERTS_SCHED_DIRTY_CPU; + else { + ASSERT(runq == ERTS_DIRTY_IO_RUNQ); + esdp->type = ERTS_SCHED_DIRTY_IO; + } + esdp->dirty_no = (Uint) num; + if (num == 1) { + /* + * Multi-scheduling block functionality depends + * on finding dirty scheduler number 1 here... + */ + runq->scheduler = esdp; + } } else { + esdp->type = ERTS_SCHED_NORMAL; esdp->no = (Uint) num; - ERTS_DIRTY_SCHEDULER_NO(esdp) = 0; + esdp->dirty_no = 0; + runq->scheduler = esdp; } -#else - esdp->no = (Uint) num; -#endif + esdp->dirty_shadow_process = shadow_proc; + if (shadow_proc) { + erts_init_empty_process(shadow_proc); + erts_atomic32_init_nob(&shadow_proc->state, + (ERTS_PSFLG_ACTIVE + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_PROXY)); + shadow_proc->static_flags = ERTS_STC_FLG_SHADOW_PROC; + } + + ssi->esdp = esdp; esdp->ssi = ssi; esdp->current_process = NULL; esdp->current_port = NULL; @@ -5281,41 +5740,42 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, erts_init_atom_cache_map(&esdp->atom_cache_map); - esdp->run_queue = runq; - esdp->run_queue->scheduler = esdp; + esdp->last_monotonic_time = 0; + esdp->check_time_reds = 0; + + esdp->thr_id = (Uint32) num; + erts_sched_bif_unique_init(esdp); + + esdp->io.out = (Uint64) 0; + esdp->io.in = (Uint64) 0; if (daww_ptr) { init_aux_work_data(&esdp->aux_work_data, esdp, *daww_ptr); -#ifdef ERTS_SMP *daww_ptr += daww_sz; -#endif } esdp->reductions = 0; - init_sched_wall_time(&esdp->sched_wall_time); + init_sched_wall_time(esdp, time_stamp); erts_port_task_handle_init(&esdp->nosuspend_port_task_handle); } void -erts_init_scheduling(int no_schedulers, int no_schedulers_online -#ifdef ERTS_DIRTY_SCHEDULERS - , int no_dirty_cpu_schedulers, int no_dirty_cpu_schedulers_online, +erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_threads, + int no_dirty_cpu_schedulers, int no_dirty_cpu_schedulers_online, int no_dirty_io_schedulers -#endif ) { - int ix, n, no_ssi; + int ix, n, no_ssi, tot_rqs; char *daww_ptr; size_t daww_sz; size_t size_runqs; + erts_aint32_t set_schdlr_sspnd_change_flags; init_misc_op_list_alloc(); init_proc_sys_task_queues_alloc(); -#ifdef ERTS_SMP set_wakeup_other_data(); -#endif #if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT if (erts_sched_balance_util) @@ -5325,36 +5785,26 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online ASSERT(no_schedulers_online <= no_schedulers); ASSERT(no_schedulers_online >= 1); ASSERT(no_schedulers >= 1); -#ifdef ERTS_DIRTY_SCHEDULERS ASSERT(no_dirty_cpu_schedulers <= no_schedulers); ASSERT(no_dirty_cpu_schedulers >= 1); ASSERT(no_dirty_cpu_schedulers_online <= no_schedulers_online); ASSERT(no_dirty_cpu_schedulers_online >= 1); -#endif + ASSERT(erts_no_poll_threads == no_poll_threads); /* Create and initialize run queues */ n = no_schedulers; - size_runqs = sizeof(ErtsAlignedRunQueue) * (n + ERTS_NUM_DIRTY_RUNQS); + tot_rqs = (n + ERTS_NUM_DIRTY_RUNQS); + size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs; erts_aligned_run_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); -#ifdef ERTS_SMP -#ifdef ERTS_DIRTY_SCHEDULERS - erts_aligned_run_queues += ERTS_NUM_DIRTY_RUNQS; -#endif - erts_smp_atomic32_init_nob(&no_empty_run_queues, 0); -#endif + erts_atomic32_init_nob(&no_empty_run_queues, 0); erts_no_run_queues = n; - for (ix = -(ERTS_NUM_DIRTY_RUNQS); ix < n; ix++) { + for (ix = 0; ix < tot_rqs; ix++) { int pix, rix; -#ifdef ERTS_DIRTY_SCHEDULERS - ErtsRunQueue *rq = ERTS_RUNQ_IX_IS_DIRTY(ix) ? - ERTS_DIRTY_RUNQ_IX(ix) : ERTS_RUNQ_IX(ix); -#else ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); -#endif rq->ix = ix; @@ -5362,16 +5812,16 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online * id if the esdp->no <-> ix+1 mapping change. */ - erts_smp_mtx_init_x(&rq->mtx, "run_queue", make_small(ix + 1)); - erts_smp_cnd_init(&rq->cnd); + erts_mtx_init(&rq->mtx, "run_queue", make_small(ix + 1), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER); + erts_cnd_init(&rq->cnd); -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP - if (ERTS_RUNQ_IX_IS_DIRTY(ix)) - erts_smp_spinlock_init(&rq->sleepers.lock, "dirty_run_queue_sleep_list"); + if (ERTS_RUNQ_IX_IS_DIRTY(ix)) { + erts_spinlock_init(&rq->sleepers.lock, "dirty_run_queue_sleep_list", + make_small(ix + 1), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER); + } rq->sleepers.list = NULL; -#endif -#endif rq->waiting = 0; rq->woken = 0; @@ -5384,17 +5834,16 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online } rq->out_of_work_count = 0; rq->max_len = 0; - rq->len = 0; + erts_atomic32_set_nob(&rq->len, 0); rq->wakeup_other = 0; rq->wakeup_other_reds = 0; - rq->halt_in_progress = 0; rq->procs.pending_exiters = NULL; rq->procs.context_switches = 0; rq->procs.reductions = 0; for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { - erts_smp_atomic32_init_nob(&rq->procs.prio_info[pix].len, 0); + erts_atomic32_init_nob(&rq->procs.prio_info[pix].len, 0); rq->procs.prio_info[pix].max_len = 0; rq->procs.prio_info[pix].reds = 0; if (pix < ERTS_NO_PROC_PRIO_LEVELS - 1) { @@ -5406,7 +5855,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online rq->misc.start = NULL; rq->misc.end = NULL; - erts_smp_atomic32_init_nob(&rq->ports.info.len, 0); + erts_atomic32_init_nob(&rq->ports.info.len, 0); rq->ports.info.max_len = 0; rq->ports.info.reds = 0; rq->ports.start = NULL; @@ -5418,7 +5867,6 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online } -#ifdef ERTS_SMP if (erts_no_run_queues != 1) { run_queue_info = erts_alloc(ERTS_ALC_T_RUNQ_BLNS, @@ -5429,49 +5877,42 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online * erts_no_run_queues)); } -#endif n = (int) no_schedulers; erts_no_schedulers = n; -#ifdef ERTS_DIRTY_SCHEDULERS + erts_no_total_schedulers = n; erts_no_dirty_cpu_schedulers = no_dirty_cpu_schedulers; + erts_no_total_schedulers += no_dirty_cpu_schedulers; erts_no_dirty_io_schedulers = no_dirty_io_schedulers; -#endif + erts_no_total_schedulers += no_dirty_io_schedulers; /* Create and initialize scheduler sleep info */ -#ifdef ERTS_SMP - no_ssi = n+1; -#else - no_ssi = 1; -#endif + no_ssi = n + 1 /* aux thread */; aligned_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, no_ssi*sizeof(ErtsAlignedSchedulerSleepInfo)); for (ix = 0; ix < no_ssi; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_sched_sleep_info[ix].ssi; -#ifdef ERTS_SMP #if 0 /* no need to initialize these... */ ssi->next = NULL; ssi->prev = NULL; #endif - erts_smp_atomic32_init_nob(&ssi->flags, 0); + ssi->esdp = NULL; + erts_atomic32_init_nob(&ssi->flags, 0); ssi->event = NULL; /* initialized in sched_thread_func */ -#endif erts_atomic32_init_nob(&ssi->aux_work, 0); } -#ifdef ERTS_SMP - aligned_sched_sleep_info++; + aligned_sched_sleep_info += 1 /* aux thread */; -#ifdef ERTS_DIRTY_SCHEDULERS aligned_dirty_cpu_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi; - erts_smp_atomic32_init_nob(&ssi->flags, 0); + erts_atomic32_init_nob(&ssi->flags, 0); ssi->event = NULL; /* initialized in sched_dirty_cpu_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } @@ -5481,98 +5922,95 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); for (ix = 0; ix < no_dirty_io_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi; - erts_smp_atomic32_init_nob(&ssi->flags, 0); + erts_atomic32_init_nob(&ssi->flags, 0); ssi->event = NULL; /* initialized in sched_dirty_io_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } -#endif -#endif + + aligned_poll_thread_sleep_info = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_SLP_INFO, + no_poll_threads*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < no_poll_threads; ix++) { + ErtsSchedulerSleepInfo *ssi = &aligned_poll_thread_sleep_info[ix].ssi; + ssi->esdp = NULL; + erts_atomic32_init_nob(&ssi->flags, 0); + ssi->event = NULL; /* initialized in poll_thread */ + erts_atomic32_init_nob(&ssi->aux_work, 0); + } /* Create and initialize scheduler specific data */ -#ifdef ERTS_SMP daww_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE((sizeof(ErtsDelayedAuxWorkWakeupJob) + sizeof(int))*(n+1)); daww_ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, daww_sz*n); -#else - daww_sz = 0; - daww_ptr = NULL; -#endif erts_aligned_scheduler_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, - n*sizeof(ErtsAlignedSchedulerData)); + n*sizeof(ErtsAlignedSchedulerData)); for (ix = 0; ix < n; ix++) { ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(ix); init_scheduler_data(esdp, ix+1, ERTS_SCHED_SLEEP_INFO_IX(ix), - ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz); + ERTS_RUNQ_IX(ix), &daww_ptr, daww_sz, + NULL, 0); } -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP - erts_aligned_dirty_cpu_scheduler_data = - erts_alloc_permanent_cache_aligned( - ERTS_ALC_T_SCHDLR_DATA, - no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerData)); - for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { - ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); - init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix), - ERTS_DIRTY_CPU_RUNQ, NULL, 0); - } - erts_aligned_dirty_io_scheduler_data = - erts_alloc_permanent_cache_aligned( - ERTS_ALC_T_SCHDLR_DATA, - no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerData)); - for (ix = 0; ix < no_dirty_io_schedulers; ix++) { - ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); - init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix), - ERTS_DIRTY_IO_RUNQ, NULL, 0); + { + Uint64 ts = sched_wall_time_ts(); + int dirty_scheds = no_dirty_cpu_schedulers + no_dirty_io_schedulers; + int adspix = 0; + ErtsAlignedDirtyShadowProcess *adsp = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_DATA, + dirty_scheds * sizeof(ErtsAlignedDirtyShadowProcess)); + + erts_aligned_dirty_cpu_scheduler_data = + erts_alloc_permanent_cache_aligned( + ERTS_ALC_T_SCHDLR_DATA, + dirty_scheds * sizeof(ErtsAlignedSchedulerData)); + + erts_aligned_dirty_io_scheduler_data = + &erts_aligned_dirty_cpu_scheduler_data[no_dirty_cpu_schedulers]; + + for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix), + ERTS_DIRTY_CPU_RUNQ, NULL, 0, + &adsp[adspix++].dsp, ts); + } + for (ix = 0; ix < no_dirty_io_schedulers; ix++) { + ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); + init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix), + ERTS_DIRTY_IO_RUNQ, NULL, 0, + &adsp[adspix++].dsp, ts); + } } -#endif -#endif init_misc_aux_work(); -#if !HALFWORD_HEAP init_swtreq_alloc(); -#endif - - -#ifdef ERTS_SMP + init_screq_alloc(); - erts_atomic32_init_nob(&completed_dealloc_count, 0); /* debug only */ + erts_atomic32_init_nob(&debug_wait_completed_count, 0); /* debug only */ + debug_wait_completed_flags = 0; aux_thread_aux_work_data = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, sizeof(ErtsAuxWorkData)); - erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd"); - erts_smp_cnd_init(&schdlr_sspnd.cnd); + poll_thread_aux_work_data = + erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, + no_poll_threads * sizeof(ErtsAuxWorkData)); - erts_smp_atomic32_init_nob(&schdlr_sspnd.changing, 0); - schdlr_sspnd.online = no_schedulers_online; - schdlr_sspnd.curr_online = no_schedulers; - schdlr_sspnd.msb.ongoing = 0; - erts_smp_atomic32_init_nob(&schdlr_sspnd.active, no_schedulers); -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_changing, 0); - schdlr_sspnd.dirty_cpu_online = no_dirty_cpu_schedulers_online; - schdlr_sspnd.dirty_cpu_curr_online = no_dirty_cpu_schedulers; - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_active, no_dirty_cpu_schedulers); - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_changing, 0); - schdlr_sspnd.dirty_io_online = no_dirty_io_schedulers; - schdlr_sspnd.dirty_io_curr_online = no_dirty_io_schedulers; - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_active, no_dirty_io_schedulers); -#endif - schdlr_sspnd.msb.procs = NULL; init_no_runqs(no_schedulers_online, no_schedulers_online); balance_info.last_active_runqs = no_schedulers; - erts_smp_mtx_init(&balance_info.update_mtx, "migration_info_update"); + erts_mtx_init(&balance_info.update_mtx, "migration_info_update", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER); balance_info.forced_check_balance = 0; balance_info.halftime = 1; balance_info.full_reds_history_index = 0; - erts_smp_atomic32_init_nob(&balance_info.checking_balance, 0); + erts_atomic32_init_nob(&balance_info.checking_balance, 0); balance_info.prev_rise.active_runqs = 0; balance_info.prev_rise.max_len = 0; balance_info.prev_rise.reds = 0; @@ -5580,74 +6018,79 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online init_migration_paths(); - if (no_schedulers_online < no_schedulers) { + init_scheduler_suspend(); + + set_schdlr_sspnd_change_flags = 0; + + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL, + no_schedulers_online); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_NORMAL, + no_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL, + no_schedulers); + + if (no_schedulers_online != no_schedulers) { + ASSERT(no_schedulers_online < no_schedulers); + set_schdlr_sspnd_change_flags |= ERTS_SCHDLR_SSPND_CHNG_ONLN; + schdlr_sspnd.changer = am_init; change_no_used_runqs(no_schedulers_online); for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); } - schdlr_sspnd.wait_curr_online = no_schedulers_online; - schdlr_sspnd.curr_online *= 2; /* Boot strapping... */ - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); -#ifdef ERTS_DIRTY_SCHEDULERS - schdlr_sspnd.dirty_cpu_wait_curr_online = no_dirty_cpu_schedulers_online; - schdlr_sspnd.dirty_cpu_curr_online *= 2; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = no_dirty_cpu_schedulers_online; ix < no_dirty_cpu_schedulers; ix++) { - ErtsSchedulerData* esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); - erts_smp_atomic32_read_bor_nob(&esdp->ssi->flags, ERTS_SSI_FLG_SUSPENDED); - } - - schdlr_sspnd.dirty_io_wait_curr_online = no_dirty_io_schedulers; - schdlr_sspnd.dirty_io_curr_online *= 2; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); -#endif - erts_smp_atomic32_init_nob(&doing_sys_schedule, 0); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers_online); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers); - init_misc_aux_work(); - -#else /* !ERTS_SMP */ - { - ErtsSchedulerData *esdp; - esdp = ERTS_SCHEDULER_IX(0); - erts_scheduler_data = esdp; -#ifdef USE_THREADS - erts_tsd_set(sched_data_key, (void *) esdp); -#endif + if (no_dirty_cpu_schedulers_online != no_dirty_cpu_schedulers) { + ASSERT(no_dirty_cpu_schedulers_online < no_dirty_cpu_schedulers); + set_schdlr_sspnd_change_flags |= ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + for (ix = no_dirty_cpu_schedulers_online; ix < no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData* esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + erts_atomic32_read_bor_nob(&esdp->ssi->flags, ERTS_SSI_FLG_SUSPENDED); + } } - erts_no_schedulers = 1; -#ifdef ERTS_DIRTY_SCHEDULERS - erts_no_dirty_cpu_schedulers = 0; - erts_no_dirty_io_schedulers = 0; -#endif -#endif - erts_smp_atomic32_init_nob(&function_calls, 0); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + + erts_atomic32_init_nob(&dirty_count.cpu.active, + (erts_aint32_t) no_dirty_cpu_schedulers); + erts_atomic32_init_nob(&dirty_count.io.active, + (erts_aint32_t) no_dirty_io_schedulers); + + + if (set_schdlr_sspnd_change_flags) + erts_atomic32_set_nob(&schdlr_sspnd.changing, + set_schdlr_sspnd_change_flags); + + init_misc_aux_work(); + /* init port tasks */ erts_port_task_init(); - aux_work_timeout_late_init(); -#ifndef ERTS_SMP -#ifdef ERTS_DO_VERIFY_UNUSED_TEMP_ALLOC - erts_scheduler_data->verify_unused_temp_alloc - = erts_alloc_get_verify_unused_temp_alloc( - &erts_scheduler_data->verify_unused_temp_alloc_data); - ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL); -#endif -#endif - - erts_smp_atomic32_init_relb(&erts_halt_progress, -1); + erts_atomic32_init_relb(&erts_halt_progress, -1); erts_halt_code = 0; -#if !defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) - erts_lc_set_thread_name("scheduler 1"); -#endif } @@ -5660,7 +6103,6 @@ erts_schedid2runq(Uint id) return ERTS_RUNQ_IX(ix); } -#ifdef USE_THREADS ErtsSchedulerData * erts_get_scheduler_data(void) @@ -5668,16 +6110,13 @@ erts_get_scheduler_data(void) return (ErtsSchedulerData *) erts_tsd_get(sched_data_key); } -#endif static Process * make_proxy_proc(Process *prev_proxy, Process *proc, erts_aint32_t prio) { erts_aint32_t state; Process *proxy; -#ifdef ERTS_SMP ErtsRunQueue *rq = RUNQ_READ_RQ(&proc->run_queue); -#endif state = (ERTS_PSFLG_PROXY | ERTS_PSFLG_IN_RUNQ @@ -5688,11 +6127,9 @@ make_proxy_proc(Process *prev_proxy, Process *proc, erts_aint32_t prio) if (prev_proxy) { proxy = prev_proxy; - ASSERT(erts_smp_atomic32_read_nob(&proxy->state) & ERTS_PSFLG_PROXY); - erts_smp_atomic32_set_nob(&proxy->state, state); -#ifdef ERTS_SMP + ASSERT(erts_atomic32_read_nob(&proxy->state) & ERTS_PSFLG_PROXY); + erts_atomic32_set_nob(&proxy->state, state); RUNQ_SET_RQ(&proc->run_queue, rq); -#endif } else { proxy = erts_alloc(ERTS_ALC_T_PROC, sizeof(Process)); @@ -5704,11 +6141,9 @@ make_proxy_proc(Process *prev_proxy, Process *proc, erts_aint32_t prio) ui32[i] = (Uint32) 0xdeadbeef; } #endif - erts_smp_atomic32_init_nob(&proxy->state, state); -#ifdef ERTS_SMP - erts_smp_atomic_init_nob(&proxy->run_queue, - erts_smp_atomic_read_nob(&proc->run_queue)); -#endif + erts_atomic32_init_nob(&proxy->state, state); + erts_atomic_init_nob(&proxy->run_queue, + erts_atomic_read_nob(&proc->run_queue)); } proxy->common.id = proc->common.id; @@ -5716,19 +6151,104 @@ make_proxy_proc(Process *prev_proxy, Process *proc, erts_aint32_t prio) return proxy; } -static ERTS_INLINE void -free_proxy_proc(Process *proxy) -{ - ASSERT(erts_smp_atomic32_read_nob(&proxy->state) & ERTS_PSFLG_PROXY); - erts_free(ERTS_ALC_T_PROC, proxy); -} - #define ERTS_ENQUEUE_NOT 0 #define ERTS_ENQUEUE_NORMAL_QUEUE 1 -#ifdef ERTS_DIRTY_SCHEDULERS #define ERTS_ENQUEUE_DIRTY_CPU_QUEUE 2 #define ERTS_ENQUEUE_DIRTY_IO_QUEUE 3 + + +static int +check_dirty_enqueue_in_prio_queue(Process *c_p, + erts_aint32_t *newp, + erts_aint32_t actual, + erts_aint32_t aprio, + erts_aint32_t qbit) +{ + int queue; + erts_aint32_t dact, max_qbit; + + /* Do not enqueue free process... */ + if (actual & ERTS_PSFLG_FREE) { + *newp &= ~ERTS_PSFLGS_DIRTY_WORK; + return ERTS_ENQUEUE_NOT; + } + + /* Termination should be done on an ordinary scheduler */ + if ((*newp) & ERTS_PSFLG_EXITING) { + *newp &= ~ERTS_PSFLGS_DIRTY_WORK; + return ERTS_ENQUEUE_NORMAL_QUEUE; + } + + /* + * If we have system tasks, we enqueue on ordinary run-queue + * and take care of those system tasks first. + */ + if ((*newp) & ERTS_PSFLG_ACTIVE_SYS) + return ERTS_ENQUEUE_NORMAL_QUEUE; + + dact = erts_atomic32_read_mb(&c_p->dirty_state); + if (actual & (ERTS_PSFLG_DIRTY_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_CPU_PROC)) { + max_qbit = ((dact >> ERTS_PDSFLGS_IN_CPU_PRQ_MASK_OFFSET) + & ERTS_PDSFLGS_QMASK); + queue = ERTS_ENQUEUE_DIRTY_CPU_QUEUE; + } + else { + ASSERT(actual & ERTS_PSFLG_DIRTY_IO_PROC); + max_qbit = ((dact >> ERTS_PDSFLGS_IN_IO_PRQ_MASK_OFFSET) + & ERTS_PDSFLGS_QMASK); + queue = ERTS_ENQUEUE_DIRTY_IO_QUEUE; + } + + max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; + max_qbit &= -max_qbit; + + if (qbit >= max_qbit) + return ERTS_ENQUEUE_NOT; /* Already queued in higher or equal prio */ + if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) + != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { + /* + * Process struct already enqueued, or actual prio not + * equal to user prio, i.e., enqueue using proxy. + */ + return -1*queue; + } + + /* + * Enqueue using process struct. + */ + *newp &= ~ERTS_PSFLGS_PRQ_PRIO_MASK; + *newp |= ERTS_PSFLG_IN_RUNQ | (aprio << ERTS_PSFLGS_PRQ_PRIO_OFFSET); + return queue; +} + +static ERTS_INLINE int +fin_dirty_enq_s_change(Process *p, + int pstruct_reserved, + erts_aint32_t enq_prio, + int qmask_offset) +{ + erts_aint32_t qbit = 1 << enq_prio; + qbit <<= qmask_offset; + + if (qbit & erts_atomic32_read_bor_mb(&p->dirty_state, qbit)) { + /* Already enqueue by someone else... */ + if (pstruct_reserved) { + /* We reserved process struct for enqueue; clear it... */ +#ifdef DEBUG + erts_aint32_t old = +#else + (void) #endif + erts_atomic32_read_band_nob(&p->state, ~ERTS_PSFLG_IN_RUNQ); + ASSERT(old & ERTS_PSFLG_IN_RUNQ); + } + return 0; + } + + return !0; +} + static ERTS_INLINE int check_enqueue_in_prio_queue(Process *c_p, @@ -5743,62 +6263,13 @@ check_enqueue_in_prio_queue(Process *c_p, *prq_prio_p = aprio; -#ifdef ERTS_DIRTY_SCHEDULERS - if (actual & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) { - /* - * If we have system tasks of a priority higher - * or equal to the user priority, we enqueue - * on ordinary run-queue and take care of - * those system tasks first. - */ - if (actual & ERTS_PSFLG_ACTIVE_SYS) { - erts_aint32_t uprio, stprio, qmask; - uprio = (actual >> ERTS_PSFLGS_USR_PRIO_OFFSET) & ERTS_PSFLGS_PRIO_MASK; - if (aprio < uprio) - goto enqueue_normal_runq; /* system tasks with higher prio */ - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); - qmask = c_p->sys_task_qs->qmask; - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); - switch (qmask & -qmask) { - case MAX_BIT: - stprio = PRIORITY_MAX; - break; - case HIGH_BIT: - stprio = PRIORITY_HIGH; - break; - case NORMAL_BIT: - stprio = PRIORITY_NORMAL; - break; - case LOW_BIT: - stprio = PRIORITY_LOW; - break; - default: - stprio = PRIORITY_LOW+1; - break; - } - if (stprio <= uprio) - goto enqueue_normal_runq; /* system tasks with higher prio */ - } - - /* Enqueue in dirty run queue if not already enqueued */ - if (actual & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)) - return ERTS_ENQUEUE_NOT; /* already in queue */ - if (actual & ERTS_PSFLG_DIRTY_CPU_PROC) { - *newp |= ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q; - if (actual & ERTS_PSFLG_IN_RUNQ) - return -ERTS_ENQUEUE_DIRTY_CPU_QUEUE; /* use proxy */ - *newp |= ERTS_PSFLG_IN_RUNQ; - return ERTS_ENQUEUE_DIRTY_CPU_QUEUE; - } - *newp |= ERTS_PSFLG_DIRTY_IO_PROC_IN_Q; - if (actual & ERTS_PSFLG_IN_RUNQ) - return -ERTS_ENQUEUE_DIRTY_IO_QUEUE; /* use proxy */ - *newp |= ERTS_PSFLG_IN_RUNQ; - return ERTS_ENQUEUE_DIRTY_IO_QUEUE; + if (actual & ERTS_PSFLGS_DIRTY_WORK) { + int res = check_dirty_enqueue_in_prio_queue(c_p, newp, actual, + aprio, qbit); + if (res != ERTS_ENQUEUE_NORMAL_QUEUE) + return res; } - enqueue_normal_runq: -#endif max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK; max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; max_qbit &= -max_qbit; @@ -5830,46 +6301,128 @@ check_enqueue_in_prio_queue(Process *c_p, return ERTS_ENQUEUE_NORMAL_QUEUE; } +static ERTS_INLINE ErtsRunQueue * +select_enqueue_run_queue(int enqueue, int enq_prio, Process *p, erts_aint32_t state) +{ + + switch (enqueue) { + + case ERTS_ENQUEUE_NOT: + + return NULL; + + + case ERTS_ENQUEUE_DIRTY_CPU_QUEUE: + case -ERTS_ENQUEUE_DIRTY_CPU_QUEUE: + + if (fin_dirty_enq_s_change(p, enqueue > 0, enq_prio, + ERTS_PDSFLGS_IN_CPU_PRQ_MASK_OFFSET)) + return ERTS_DIRTY_CPU_RUNQ; + + return NULL; + + + case ERTS_ENQUEUE_DIRTY_IO_QUEUE: + case -ERTS_ENQUEUE_DIRTY_IO_QUEUE: + + if (fin_dirty_enq_s_change(p, enqueue > 0, enq_prio, + ERTS_PDSFLGS_IN_IO_PRQ_MASK_OFFSET)) + return ERTS_DIRTY_IO_RUNQ; + + return NULL; + + + default: { + ErtsRunQueue* runq; + + ASSERT(enqueue == ERTS_ENQUEUE_NORMAL_QUEUE + || enqueue == -ERTS_ENQUEUE_NORMAL_QUEUE); + + runq = erts_get_runq_proc(p); + + if (!(ERTS_PSFLG_BOUND & state)) { + ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio); + if (new_runq) { + RUNQ_SET_RQ(&p->run_queue, new_runq); + runq = new_runq; + } + } + + ASSERT(runq); + + return runq; + } + } +} + + /* * schedule_out_process() return with c_rq locked. + * + * Return non-zero value if caller should decrease + * reference count on the process when done with it... */ static ERTS_INLINE int -schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Process *proxy) +schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, + Process *proxy, int is_normal_sched) { - erts_aint32_t a, e, n, enq_prio = -1; + erts_aint32_t a, e, n, enq_prio = -1, running_flgs; int enqueue; /* < 0 -> use proxy */ - Process* sched_p; ErtsRunQueue* runq; -#ifdef ERTS_SMP - int check_emigration_need; -#endif + + if (!is_normal_sched) + running_flgs = ERTS_PSFLG_DIRTY_RUNNING|ERTS_PSFLG_DIRTY_RUNNING_SYS; + else { + running_flgs = ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS; + if (state & ERTS_PSFLG_DIRTY_ACTIVE_SYS + && (p->flags & (F_DELAY_GC|F_DISABLE_GC))) { + /* + * Delay dirty GC; will be enabled automatically + * again by next GC... + */ + + /* + * No normal execution until dirty CLA or hibernat has + * been handled... + */ + ASSERT(!(p->flags & (F_DIRTY_CLA | F_DIRTY_GC_HIBERNATE))); + + state = erts_atomic32_read_band_nob(&p->state, + ~ERTS_PSFLG_DIRTY_ACTIVE_SYS); + state &= ~ERTS_PSFLG_DIRTY_ACTIVE_SYS; + } + } a = state; while (1) { n = e = a; - ASSERT(a & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS)); + ASSERT(a & running_flgs); enqueue = ERTS_ENQUEUE_NOT; - n &= ~(ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS); - if (a & ERTS_PSFLG_ACTIVE_SYS + n &= ~running_flgs; + if ((a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS)) || (a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE) { enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); } - a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); + a = erts_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; } - switch (enqueue) { - case ERTS_ENQUEUE_NOT: + runq = select_enqueue_run_queue(enqueue, enq_prio, p, n); + + if (!runq) { + if (erts_system_profile_flags.runnable_procs) { - if (!(a & ERTS_PSFLG_ACTIVE_SYS) - && (!(a & ERTS_PSFLG_ACTIVE) - || (a & ERTS_PSFLG_SUSPENDED))) { + /* Status lock prevents out of order "runnable proc" trace msgs */ + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + + if (!(a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + && (!(a & ERTS_PSFLG_ACTIVE) || (a & ERTS_PSFLG_SUSPENDED))) { /* Process inactive */ profile_runnable_proc(p, am_inactive); } @@ -5878,101 +6431,99 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces if (proxy) free_proxy_proc(proxy); - erts_smp_runq_lock(c_rq); - return 0; + erts_runq_lock(c_rq); -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP - case ERTS_ENQUEUE_DIRTY_CPU_QUEUE: - case -ERTS_ENQUEUE_DIRTY_CPU_QUEUE: - runq = ERTS_DIRTY_CPU_RUNQ; - ASSERT(ERTS_SCHEDULER_IS_DIRTY_CPU(runq->scheduler)); -#ifdef ERTS_SMP - check_emigration_need = 0; -#endif - break; + /* Decrement refc if scheduled out from dirty scheduler... */ + return !is_normal_sched; + } + else { + Process* sched_p; - case ERTS_ENQUEUE_DIRTY_IO_QUEUE: - case -ERTS_ENQUEUE_DIRTY_IO_QUEUE: - runq = ERTS_DIRTY_IO_RUNQ; - ASSERT(ERTS_SCHEDULER_IS_DIRTY_IO(runq->scheduler)); -#ifdef ERTS_SMP - check_emigration_need = 0; -#endif - break; -#endif -#endif + ASSERT(!(n & ERTS_PSFLG_FREE)); + ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & (ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS))); - default: - ASSERT(enqueue == ERTS_ENQUEUE_NORMAL_QUEUE - || enqueue == -ERTS_ENQUEUE_NORMAL_QUEUE); + if (enqueue < 0) + sched_p = make_proxy_proc(proxy, p, enq_prio); + else { + sched_p = p; + if (proxy) + free_proxy_proc(proxy); + } - runq = erts_get_runq_proc(p); -#ifdef ERTS_SMP - check_emigration_need = !(ERTS_PSFLG_BOUND & n); -#endif - break; - } + ASSERT(runq); - ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & ERTS_PSFLG_ACTIVE_SYS)); + erts_runq_lock(runq); - if (enqueue < 0) - sched_p = make_proxy_proc(proxy, p, enq_prio); - else { - sched_p = p; - if (proxy) - free_proxy_proc(proxy); - } + if (is_normal_sched && sched_p == p && ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) + erts_proc_inc_refc(p); /* Needs to be done before enqueue_process() */ -#ifdef ERTS_SMP - if (check_emigration_need) { - ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio); - if (new_runq) { - RUNQ_SET_RQ(&sched_p->run_queue, new_runq); - runq = new_runq; - } - } -#endif + /* Enqueue the process */ + enqueue_process(runq, (int) enq_prio, sched_p); - ASSERT(runq); + if (runq == c_rq) + return 0; - erts_smp_runq_lock(runq); + erts_runq_unlock(runq); - /* Enqueue the process */ - enqueue_process(runq, (int) enq_prio, sched_p); + smp_notify_inc_runq(runq); - if (runq == c_rq) - return 1; - erts_smp_runq_unlock(runq); - smp_notify_inc_runq(runq); - erts_smp_runq_lock(c_rq); - return 1; + erts_runq_lock(c_rq); + + /* + * Decrement refc if process is scheduled out by a + * dirty scheduler, and we have not just scheduled + * the process using the ordinary process struct + * on a dirty run-queue again... + */ + return !is_normal_sched && (sched_p != p + || !ERTS_RUNQ_IX_IS_DIRTY(runq->ix)); + } } static ERTS_INLINE void -add2runq(Process *p, erts_aint32_t state, erts_aint32_t prio) +add2runq(int enqueue, erts_aint32_t prio, + Process *proc, erts_aint32_t state, + Process **proxy) { - ErtsRunQueue *runq = erts_get_runq_proc(p); + ErtsRunQueue *runq; -#ifdef ERTS_SMP - if (!(ERTS_PSFLG_BOUND & state)) { - ErtsRunQueue *new_runq = erts_check_emigration_need(runq, (int) prio); - if (new_runq) { - RUNQ_SET_RQ(&p->run_queue, new_runq); - runq = new_runq; - } - } -#endif - ASSERT(runq); + runq = select_enqueue_run_queue(enqueue, prio, proc, state); - erts_smp_runq_lock(runq); + if (runq) { + Process *sched_p; - /* Enqueue the process */ - enqueue_process(runq, (int) prio, p); + if (enqueue > 0) { + sched_p = proc; + /* + * Refc on process struct (i.e. true struct, + * not proxy-struct) increased while in a + * dirty run-queue or executing on a dirty + * scheduler. + */ + if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) + erts_proc_inc_refc(proc); + } + else { + Process *pxy; - erts_smp_runq_unlock(runq); - smp_notify_inc_runq(runq); + if (!proxy) + pxy = NULL; + else { + pxy = *proxy; + *proxy = NULL; + } + sched_p = make_proxy_proc(pxy, proc, prio); + } + + erts_runq_lock(runq); + /* Enqueue the process */ + enqueue_process(runq, (int) prio, sched_p); + + erts_runq_unlock(runq); + smp_notify_inc_runq(runq); + } } static ERTS_INLINE int @@ -5980,24 +6531,38 @@ change_proc_schedule_state(Process *p, erts_aint32_t clear_state_flags, erts_aint32_t set_state_flags, erts_aint32_t *statep, - erts_aint32_t *enq_prio_p) + erts_aint32_t *enq_prio_p, + ErtsProcLocks locks) { /* - * NOTE: ERTS_PSFLG_RUNNING, ERTS_PSFLG_RUNNING_SYS and - * ERTS_PSFLG_ACTIVE_SYS are not allowed to be + * NOTE: ERTS_PSFLG_RUNNING, ERTS_PSFLG_RUNNING_SYS, + * ERTS_PSFLG_DIRTY_RUNNING, ERTS_PSFLG_DIRTY_RUNNING_SYS + * and ERTS_PSFLG_ACTIVE_SYS are not allowed to be * altered by this function! */ erts_aint32_t a = *statep, n; int enqueue; /* < 0 -> use proxy */ + unsigned int prof_runnable_procs = erts_system_profile_flags.runnable_procs; + unsigned int lock_status = (prof_runnable_procs + && !(locks & ERTS_PROC_LOCK_STATUS)); + + ERTS_LC_ASSERT(locks == erts_proc_lc_my_proc_locks(p)); ASSERT(!(a & ERTS_PSFLG_PROXY)); ASSERT((clear_state_flags & (ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS | ERTS_PSFLG_ACTIVE_SYS)) == 0); ASSERT((set_state_flags & (ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS | ERTS_PSFLG_ACTIVE_SYS)) == 0); + if (lock_status) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + while (1) { erts_aint32_t e; n = e = a; @@ -6016,8 +6581,14 @@ change_proc_schedule_state(Process *p, if ((n & (ERTS_PSFLG_SUSPENDED | ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS | ERTS_PSFLG_IN_RUNQ - | ERTS_PSFLG_ACTIVE)) == ERTS_PSFLG_ACTIVE) { + | ERTS_PSFLG_ACTIVE)) == ERTS_PSFLG_ACTIVE + || (n & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_EXITING)) == ERTS_PSFLG_EXITING + ) { /* * Active and seemingly need to be enqueued, but * process may be in a run queue via proxy, need @@ -6026,35 +6597,42 @@ change_proc_schedule_state(Process *p, enqueue = check_enqueue_in_prio_queue(p, enq_prio_p, &n, a); } - a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); + a = erts_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; if (enqueue == ERTS_ENQUEUE_NOT && n == a) break; } - if (erts_system_profile_flags.runnable_procs) { + if (prof_runnable_procs) { + + /* Status lock prevents out of order "runnable proc" trace msgs */ if (((n & (ERTS_PSFLG_SUSPENDED | ERTS_PSFLG_ACTIVE)) == ERTS_PSFLG_ACTIVE) && (!(a & (ERTS_PSFLG_ACTIVE_SYS | ERTS_PSFLG_RUNNING - | ERTS_PSFLG_RUNNING_SYS) + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS) && (!(a & ERTS_PSFLG_ACTIVE) || (a & ERTS_PSFLG_SUSPENDED))))) { /* We activated a prevously inactive process */ profile_runnable_proc(p, am_active); } + if (lock_status) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } + *statep = a; return enqueue; } static ERTS_INLINE void -schedule_process(Process *p, erts_aint32_t in_state) +schedule_process(Process *p, erts_aint32_t in_state, ErtsProcLocks locks) { erts_aint32_t enq_prio = -1; erts_aint32_t state = in_state; @@ -6062,24 +6640,114 @@ schedule_process(Process *p, erts_aint32_t in_state) 0, ERTS_PSFLG_ACTIVE, &state, - &enq_prio); - if (enqueue != ERTS_ENQUEUE_NOT) - add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio), - state, - enq_prio); + &enq_prio, + locks); + add2runq(enqueue, enq_prio, p, state, NULL); } void -erts_schedule_process(Process *p, erts_aint32_t state) +erts_schedule_process(Process *p, erts_aint32_t state, ErtsProcLocks locks) { - schedule_process(p, state); + schedule_process(p, state, locks); } -static void -schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy) +static int +schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st, + erts_aint32_t *fail_state_p) { - erts_aint32_t a = state, n, enq_prio = -1; + int res; + int locked; + ErtsProcSysTaskQs *stqs, *free_stqs; + erts_aint32_t fail_state, state, a, n, enq_prio; int enqueue; /* < 0 -> use proxy */ + unsigned int prof_runnable_procs; + + fail_state = *fail_state_p; + + res = 1; /* prepare for success */ + st->next = st->prev = st; /* Prep for empty prio queue */ + state = erts_atomic32_read_nob(&p->state); + prof_runnable_procs = erts_system_profile_flags.runnable_procs; + locked = 0; + free_stqs = NULL; + if (state & ERTS_PSFLG_ACTIVE_SYS) + stqs = NULL; + else { + alloc_qs: + stqs = proc_sys_task_queues_alloc(); + stqs->qmask = 1 << prio; + stqs->ncount = 0; + stqs->q[PRIORITY_MAX] = NULL; + stqs->q[PRIORITY_HIGH] = NULL; + stqs->q[PRIORITY_NORMAL] = NULL; + stqs->q[PRIORITY_LOW] = NULL; + stqs->q[prio] = st; + } + + if (!locked) { + locked = 1; + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + + state = erts_atomic32_read_nob(&p->state); + if (state & fail_state) { + *fail_state_p = (state & fail_state); + free_stqs = stqs; + res = 0; + goto cleanup; + } + } + + if (!p->sys_task_qs) { + if (stqs) + p->sys_task_qs = stqs; + else + goto alloc_qs; + } + else { + free_stqs = stqs; + stqs = p->sys_task_qs; + if (!stqs->q[prio]) { + stqs->q[prio] = st; + stqs->qmask |= 1 << prio; + } + else { + st->next = stqs->q[prio]; + st->prev = stqs->q[prio]->prev; + st->next->prev = st; + st->prev->next = st; + ASSERT(stqs->qmask & (1 << prio)); + } + } + + if (ERTS_PSFLGS_GET_ACT_PRIO(state) > prio) { + erts_aint32_t n, a, e; + /* Need to elevate actual prio */ + + a = state; + do { + if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) { + n = a; + break; + } + n = e = a; + n &= ~ERTS_PSFLGS_ACT_PRIO_MASK; + n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET); + a = erts_atomic32_cmpxchg_nob(&p->state, n, e); + } while (a != e); + state = n; + } + + + a = state; + enq_prio = -1; + + /* Status lock prevents out of order "runnable proc" trace msgs */ + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + + if (!prof_runnable_procs) { + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + locked = 0; + } ASSERT(!(state & ERTS_PSFLG_PROXY)); @@ -6088,45 +6756,51 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy) n = e = a; if (a & ERTS_PSFLG_FREE) - return; /* We don't want to schedule free processes... */ + goto cleanup; /* We don't want to schedule free processes... */ enqueue = ERTS_ENQUEUE_NOT; n |= ERTS_PSFLG_ACTIVE_SYS; - if (!(a & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS))) + if (!(a & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS))) enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); - a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); + a = erts_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; if (a == n && enqueue == ERTS_ENQUEUE_NOT) goto cleanup; } - if (erts_system_profile_flags.runnable_procs) { + if (prof_runnable_procs) { if (!(a & (ERTS_PSFLG_ACTIVE_SYS | ERTS_PSFLG_RUNNING - | ERTS_PSFLG_RUNNING_SYS)) + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) && (!(a & ERTS_PSFLG_ACTIVE) || (a & ERTS_PSFLG_SUSPENDED))) { /* We activated a prevously inactive process */ profile_runnable_proc(p, am_active); } + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + locked = 0; } - if (enqueue != ERTS_ENQUEUE_NOT) { - Process *sched_p; - if (enqueue > 0) - sched_p = p; - else { - sched_p = make_proxy_proc(proxy, p, enq_prio); - proxy = NULL; - } - add2runq(sched_p, n, enq_prio); - } + add2runq(enqueue, enq_prio, p, n, NULL); cleanup: - if (proxy) - free_proxy_proc(proxy); + + if (locked) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + + if (free_stqs) + proc_sys_task_queues_free(free_stqs); + + ERTS_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p))); + + return res; } static ERTS_INLINE int @@ -6134,26 +6808,31 @@ suspend_process(Process *c_p, Process *p) { erts_aint32_t state; int suspended = 0; - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); - state = erts_smp_atomic32_read_acqb(&p->state); + state = erts_atomic32_read_acqb(&p->state); if ((state & ERTS_PSFLG_SUSPENDED)) suspended = -1; else { if (c_p == p) { - state = erts_smp_atomic32_read_bor_relb(&p->state, + state = erts_atomic32_read_bor_relb(&p->state, ERTS_PSFLG_SUSPENDED); - ASSERT(state & ERTS_PSFLG_RUNNING); + ASSERT(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)); suspended = (state & ERTS_PSFLG_SUSPENDED) ? -1: 1; } else { - while (!(state & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_EXITING))) { + while (!(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_EXITING))) { erts_aint32_t n, e; n = e = state; n |= ERTS_PSFLG_SUSPENDED; - state = erts_smp_atomic32_cmpxchg_relb(&p->state, n, e); + state = erts_atomic32_cmpxchg_relb(&p->state, n, e); if (state == e) { suspended = 1; break; @@ -6168,17 +6847,17 @@ suspend_process(Process *c_p, Process *p) if (suspended) { - ASSERT(!(ERTS_PSFLG_RUNNING & state) - || p == erts_get_current_process()); - if (suspended > 0 && erts_system_profile_flags.runnable_procs) { /* 'state' is before our change... */ if ((state & (ERTS_PSFLG_ACTIVE | ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS | ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS | ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE) { /* We made process inactive */ profile_runnable_proc(p, am_inactive); @@ -6193,54 +6872,31 @@ suspend_process(Process *c_p, Process *p) } static ERTS_INLINE void -resume_process(Process *p) +resume_process(Process *p, ErtsProcLocks locks) { erts_aint32_t state, enq_prio = -1; int enqueue; - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); ASSERT(p->rcount > 0); if (--p->rcount > 0) /* multiple suspend */ return; - state = erts_smp_atomic32_read_nob(&p->state); + state = erts_atomic32_read_nob(&p->state); enqueue = change_proc_schedule_state(p, ERTS_PSFLG_SUSPENDED, 0, &state, - &enq_prio); - if (enqueue) - add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio), - state, - enq_prio); -} - -int -erts_get_max_no_executing_schedulers(void) -{ -#ifdef ERTS_SMP - if (erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - return (int) erts_no_schedulers; - ERTS_THR_MEMORY_BARRIER; - return (int) erts_smp_atomic32_read_nob(&schdlr_sspnd.active); -#else - return 1; -#endif + &enq_prio, + locks); + add2runq(enqueue, enq_prio, p, state, NULL); } -#ifdef ERTS_SMP - -static void -scheduler_ix_resume_wake(Uint ix) -{ - ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); -} -static void -scheduler_ssi_resume_wake(ErtsSchedulerSleepInfo *ssi) +static ERTS_INLINE void +sched_resume_wake__(ErtsSchedulerSleepInfo *ssi) { erts_aint32_t xflgs = (ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_TSE_SLEEPING @@ -6248,15 +6904,35 @@ scheduler_ssi_resume_wake(ErtsSchedulerSleepInfo *ssi) | ERTS_SSI_FLG_SUSPENDED); erts_aint32_t oflgs; do { - oflgs = erts_smp_atomic32_cmpxchg_relb(&ssi->flags, 0, xflgs); + oflgs = erts_atomic32_cmpxchg_relb(&ssi->flags, 0, xflgs); if (oflgs == xflgs) { erts_sched_finish_poke(ssi, oflgs); break; } xflgs = oflgs; - } while (oflgs & ERTS_SSI_FLG_SUSPENDED); + } while (oflgs & (ERTS_SSI_FLG_MSB_EXEC|ERTS_SSI_FLG_SUSPENDED)); +} + +static void +nrml_sched_ix_resume_wake(Uint ix) +{ + sched_resume_wake__(ERTS_SCHED_SLEEP_INFO_IX(ix)); } + +static void +dcpu_sched_ix_resume_wake(Uint ix) +{ + sched_resume_wake__(ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix)); +} + +static void +dio_sched_ix_resume_wake(Uint ix) +{ + sched_resume_wake__(ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix)); +} + + static erts_aint32_t sched_prep_spin_suspended(ErtsSchedulerSleepInfo *ssi, erts_aint32_t xpct) { @@ -6267,7 +6943,7 @@ sched_prep_spin_suspended(ErtsSchedulerSleepInfo *ssi, erts_aint32_t xpct) erts_aint32_t xflgs = xpct; do { - oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + oflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return nflgs; xflgs = oflgs; @@ -6284,7 +6960,7 @@ sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount) erts_aint32_t flgs; do { - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); + flgs = erts_atomic32_read_acqb(&ssi->flags); if ((flgs & (ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_WAITING | ERTS_SSI_FLG_SUSPENDED)) @@ -6303,21 +6979,23 @@ sched_spin_suspended(ErtsSchedulerSleepInfo *ssi, int spincount) } static erts_aint32_t -sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) +sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi, + erts_aint32_t sleep_type) { erts_aint32_t oflgs; - erts_aint32_t nflgs = (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED); + erts_aint32_t nflgs = ((ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED) + | sleep_type); erts_aint32_t xflgs = (ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_WAITING | ERTS_SSI_FLG_SUSPENDED); + ASSERT(sleep_type == ERTS_SSI_FLG_TSE_SLEEPING); erts_tse_reset(ssi->event); while (1) { - oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); + oflgs = erts_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs); if (oflgs == xflgs) return nflgs; if ((oflgs & (ERTS_SSI_FLG_SLEEPING @@ -6332,1154 +7010,1143 @@ sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) } } -#ifdef ERTS_DIRTY_SCHEDULERS +static void +init_scheduler_suspend(void) +{ + erts_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_SCHEDULER); + schdlr_sspnd.online.normal = 1; + schdlr_sspnd.curr_online.normal = 1; + schdlr_sspnd.active.normal = 1; + schdlr_sspnd.online.dirty_cpu = 0; + schdlr_sspnd.curr_online.dirty_cpu = 0; + schdlr_sspnd.active.dirty_cpu = 0; + schdlr_sspnd.online.dirty_io = 0; + schdlr_sspnd.curr_online.dirty_io = 0; + schdlr_sspnd.active.dirty_io = 0; + schdlr_sspnd.last_msb_dirty_type = ERTS_SCHED_DIRTY_IO; + erts_atomic32_init_nob(&schdlr_sspnd.changing, 0); + schdlr_sspnd.chngq = NULL; + schdlr_sspnd.changer = am_false; + schdlr_sspnd.nmsb.ongoing = 0; + schdlr_sspnd.nmsb.blckrs = NULL; + schdlr_sspnd.nmsb.chngq = NULL; + schdlr_sspnd.msb.ongoing = 0; + schdlr_sspnd.msb.blckrs = NULL; + schdlr_sspnd.msb.chngq = NULL; +} + +typedef struct { + struct { + Eterm chngr; + Eterm nxt; + } onln; + struct { + ErtsProcList *chngrs; + } msb; +} ErtsSchdlrSspndResume; static void -suspend_scheduler(ErtsSchedulerData *esdp) +schdlr_sspnd_resume_proc(ErtsSchedType sched_type, Eterm pid) { - erts_aint32_t flgs; - erts_aint32_t changing; -#ifdef ERTS_DIRTY_SCHEDULERS - long no = (long) (ERTS_SCHEDULER_IS_DIRTY(esdp) - ? ERTS_DIRTY_SCHEDULER_NO(esdp) - : esdp->no); -#else - long no = (long) esdp->no; -#endif - ErtsSchedulerSleepInfo *ssi = esdp->ssi; - long active_schedulers; - int curr_online = 1; - int wake = 0; - erts_aint32_t aux_work; - int thr_prgr_active = 1; - ErtsStuckBoundProcesses sbp = {NULL, NULL}; - int* ss_onlinep; - int* ss_curr_onlinep; - int* ss_wait_curr_onlinep; - long* ss_wait_activep; - long ss_wait_active_target; - erts_smp_atomic32_t* ss_changingp; - erts_smp_atomic32_t* ss_activep; + Process *p; + p = erts_pid2proc_opt(NULL, 0, pid, ERTS_PROC_LOCK_STATUS, + (sched_type != ERTS_SCHED_NORMAL + ? ERTS_P2P_FLG_INC_REFC + : 0)); + if (p) { + resume_process(p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + if (sched_type != ERTS_SCHED_NORMAL) + erts_proc_dec_refc(p); + } +} - /* - * Schedulers may be suspended in two different ways: - * - A scheduler may be suspended since it is not online. - * All schedulers with scheduler ids greater than - * schdlr_sspnd.online are suspended; same for dirty - * schedulers and schdlr_sspnd.dirty_cpu_online and - * schdlr_sspnd.dirty_io_online. - * - Multi scheduling is blocked. All schedulers except the - * scheduler with scheduler id 1 are suspended, and all - * dirty CPU and dirty I/O schedulers are suspended. - * - * Regardless of why a scheduler is suspended, it ends up here. - */ +static ERTS_INLINE void +schdlr_sspnd_resume_procs(ErtsSchedType sched_type, + ErtsSchdlrSspndResume *resume) +{ + if (is_internal_pid(resume->onln.chngr)) { + schdlr_sspnd_resume_proc(sched_type, + resume->onln.chngr); + resume->onln.chngr = NIL; + } + if (is_internal_pid(resume->onln.nxt)) { + schdlr_sspnd_resume_proc(sched_type, + resume->onln.nxt); + resume->onln.nxt = NIL; + } + while (resume->msb.chngrs) { + ErtsProcList *plp = resume->msb.chngrs; + resume->msb.chngrs = plp->next; + schdlr_sspnd_resume_proc(sched_type, + plp->pid); + proclist_destroy(plp); + } +} - ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) || no != 1); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (erts_smp_mtx_trylock(&schdlr_sspnd.mtx) == EBUSY) { - erts_smp_runq_unlock(esdp->run_queue); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - erts_smp_runq_lock(esdp->run_queue); - } - if (ongoing_multi_scheduling_block()) - evacuate_run_queue(esdp->run_queue, &sbp); - } else -#endif - evacuate_run_queue(esdp->run_queue, &sbp); +static ERTS_INLINE int +have_dirty_work(void) +{ + return !(ERTS_EMPTY_RUNQ(ERTS_DIRTY_CPU_RUNQ) + | ERTS_EMPTY_RUNQ(ERTS_DIRTY_IO_RUNQ)); +} - erts_smp_runq_unlock(esdp->run_queue); +#define ERTS_MSB_NONE_PRIO_BIT PORT_BIT -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - erts_sched_check_cpu_bind_prep_suspend(esdp); +static ERTS_INLINE Uint32 +msb_runq_prio_bit(Uint32 flgs) +{ + int pbit; - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_inactive); + pbit = (int) (flgs & ERTS_RUNQ_FLGS_PROCS_QMASK); + if (flgs & PORT_BIT) { + /* rate ports as proc prio high */ + pbit |= HIGH_BIT; + } + if (flgs & ERTS_RUNQ_FLG_MISC_OP) { + /* rate misc ops as proc prio normal */ + pbit |= NORMAL_BIT; + } + if (flgs & LOW_BIT) { + /* rate low prio as normal (avoid starvation) */ + pbit |= NORMAL_BIT; + } + if (!pbit) + pbit = (int) ERTS_MSB_NONE_PRIO_BIT; + else + pbit &= -pbit; /* least significant bit set... */ + ASSERT(pbit); - sched_wall_time_change(esdp, 0); + /* High prio low value; low prio high value... */ + return (Uint32) pbit; +} - erts_smp_mtx_lock(&schdlr_sspnd.mtx); +static ERTS_INLINE void +msb_runq_prio_bits(Uint32 *nrmlp, Uint32 *dcpup, Uint32 *diop) +{ + Uint32 flgs = ERTS_RUNQ_FLGS_GET(ERTS_RUNQ_IX(0)); + if (flgs & ERTS_RUNQ_FLG_HALTING) { + /* + * Emulator is halting; only execute port jobs + * on normal scheduler. Ensure that we switch + * to the normal scheduler. + */ + *nrmlp = HIGH_BIT; + *dcpup = ERTS_MSB_NONE_PRIO_BIT; + *diop = ERTS_MSB_NONE_PRIO_BIT; } + else { + *nrmlp = msb_runq_prio_bit(flgs); - flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); - if (flgs & ERTS_SSI_FLG_SUSPENDED) { + flgs = ERTS_RUNQ_FLGS_GET(ERTS_DIRTY_CPU_RUNQ); + *dcpup = msb_runq_prio_bit(flgs); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(esdp->run_queue)) { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_cpu_active); - ASSERT(active_schedulers >= 0); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); - ss_onlinep = &schdlr_sspnd.dirty_cpu_online; - ss_curr_onlinep = &schdlr_sspnd.dirty_cpu_curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.dirty_cpu_wait_curr_online; - ss_changingp = &schdlr_sspnd.dirty_cpu_changing; - ss_wait_activep = &schdlr_sspnd.msb.dirty_cpu_wait_active; - ss_activep = &schdlr_sspnd.dirty_cpu_active; - } else { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_io_active); - ASSERT(active_schedulers >= 0); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing); - ss_onlinep = &schdlr_sspnd.dirty_io_online; - ss_curr_onlinep = &schdlr_sspnd.dirty_io_curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.dirty_io_wait_curr_online; - ss_changingp = &schdlr_sspnd.dirty_io_changing; - ss_wait_activep = &schdlr_sspnd.msb.dirty_io_wait_active; - ss_activep = &schdlr_sspnd.dirty_io_active; - } - ss_wait_active_target = 0; - } - else + flgs = ERTS_RUNQ_FLGS_GET(ERTS_DIRTY_IO_RUNQ); + *diop = msb_runq_prio_bit(flgs); + } +} + +static int +msb_scheduler_type_switch(ErtsSchedType sched_type, + ErtsSchedulerData *esdp, + long no) +{ + Uint32 nrml_prio, dcpu_prio, dio_prio; + ErtsSchedType exec_type; + ErtsRunQueue *exec_rq; +#ifdef DEBUG + erts_aint32_t dbg_val; #endif - { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.active); - ASSERT(active_schedulers >= 1); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - ss_onlinep = &schdlr_sspnd.online; - ss_curr_onlinep = &schdlr_sspnd.curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.wait_curr_online; - ss_changingp = &schdlr_sspnd.changing; - ss_wait_activep = &schdlr_sspnd.msb.wait_active; - ss_activep = &schdlr_sspnd.active; - ss_wait_active_target = 1; - } - if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { - if (active_schedulers == *ss_wait_activep) - wake = 1; - if (active_schedulers == ss_wait_active_target) { - changing = erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; - } - } - while (1) { - if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { - int changed = 0; - if (no > *ss_onlinep && curr_online) { - (*ss_curr_onlinep)--; - curr_online = 0; - changed = 1; - } - else if (no <= *ss_onlinep && !curr_online) { - (*ss_curr_onlinep)++; - curr_online = 1; - changed = 1; - } - if (changed - && *ss_curr_onlinep == *ss_wait_curr_onlinep) - wake = 1; - if (*ss_onlinep == *ss_curr_onlinep) { - changing = erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; - } - } + ASSERT(schdlr_sspnd.msb.ongoing); - if (wake) { - erts_smp_cnd_signal(&schdlr_sspnd.cnd); - wake = 0; - } + /* + * This function determines how to switch + * between scheduler types when multi-scheduling + * is blocked. + * + * If no dirty work exist, we always select + * execution of normal scheduler. If nothing + * executes, normal scheduler 1 should be waiting + * in sys_schedule(), otherwise we cannot react + * on I/O events. + * + * We unconditionally switch back to normal + * scheduler after executing dirty in order to + * make sure we check for I/O... + */ - if (curr_online && !ongoing_multi_scheduling_block()) { - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); - if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) - break; - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + msb_runq_prio_bits(&nrml_prio, &dcpu_prio, &dio_prio); - while (1) { - erts_aint32_t qmask; - erts_aint32_t flgs; - - qmask = (ERTS_RUNQ_FLGS_GET(esdp->run_queue) - & ERTS_RUNQ_FLGS_QMASK); - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work|qmask) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - if (aux_work) - aux_work = handle_aux_work(&esdp->aux_work_data, - aux_work, - 1); - - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && - (aux_work && erts_thr_progress_update(esdp))) - erts_thr_progress_leader_update(esdp); - if (qmask) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - erts_smp_runq_lock(esdp->run_queue); - if (ongoing_multi_scheduling_block()) - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - } else -#endif - { - erts_smp_runq_lock(esdp->run_queue); - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); - } - } - } + exec_type = ERTS_SCHED_NORMAL; + if (sched_type == ERTS_SCHED_NORMAL) { - if (!aux_work) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - } - flgs = sched_spin_suspended(ssi, - ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - flgs = sched_set_suspended_sleeptype(ssi); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - int res; - - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); - } - } -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - erts_thr_progress_finalize_wait(esdp); - } + /* + * Check priorities of work in the + * different run-queues and determine + * run-queue with highest prio job... + */ - flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)); - if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) - break; - changing = erts_smp_atomic32_read_nob(ss_changingp); - if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) - break; - } + if ((dcpu_prio == ERTS_MSB_NONE_PRIO_BIT) + & (dio_prio == ERTS_MSB_NONE_PRIO_BIT)) { + /* + * No dirty work exist; continue on normal + * scheduler... + */ + return 0; + } - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(ss_changingp); - } + if (dcpu_prio < nrml_prio) { + exec_type = ERTS_SCHED_DIRTY_CPU; + if (dio_prio < dcpu_prio) + exec_type = ERTS_SCHED_DIRTY_IO; + } + else { + if (dio_prio < nrml_prio) + exec_type = ERTS_SCHED_DIRTY_IO; + } - active_schedulers = erts_smp_atomic32_inc_read_nob(ss_activep); - changing = erts_smp_atomic32_read_nob(ss_changingp); - if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) - && *ss_onlinep == active_schedulers) { - erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - } + /* + * Make sure to alternate between dirty types + * inbetween normal execution if highest + * priorities are equal. + */ -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - ASSERT(no <= *ss_onlinep); - ASSERT(!ongoing_multi_scheduling_block()); + if (exec_type == ERTS_SCHED_NORMAL) { + if (dcpu_prio == nrml_prio) + exec_type = ERTS_SCHED_DIRTY_CPU; + else if (dio_prio == nrml_prio) + exec_type = ERTS_SCHED_DIRTY_IO; + else { + /* + * Normal work has higher prio than + * dirty work; continue on normal + * scheduler... + */ + return 0; + } + } + ASSERT(exec_type != ERTS_SCHED_NORMAL); + if (dio_prio == dcpu_prio) { + /* Alter between dirty types... */ + if (schdlr_sspnd.last_msb_dirty_type == ERTS_SCHED_DIRTY_IO) + exec_type = ERTS_SCHED_DIRTY_CPU; + else + exec_type = ERTS_SCHED_DIRTY_IO; + } } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - ASSERT(curr_online); + ASSERT(sched_type != exec_type); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_active); + if (exec_type != ERTS_SCHED_NORMAL) + schdlr_sspnd.last_msb_dirty_type = exec_type; + else { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } + if ((nrml_prio == ERTS_MSB_NONE_PRIO_BIT) + & ((dcpu_prio != ERTS_MSB_NONE_PRIO_BIT) + | (dio_prio != ERTS_MSB_NONE_PRIO_BIT))) { + /* + * We have dirty work, but an empty + * normal run-queue. + * + * Since the normal run-queue is + * empty, the normal scheduler will + * go to sleep when selected for + * execution. We have dirty work to + * do, so we only want it to check + * I/O, and then come back here and + * switch to dirty execution. + * + * To prevent the scheduler from going + * to sleep we trick it into believing + * it has work to do... + */ + ERTS_RUNQ_FLGS_SET_NOB(ERTS_RUNQ_IX(0), + ERTS_RUNQ_FLG_MISC_OP); + } } - erts_smp_runq_lock(esdp->run_queue); - non_empty_runq(esdp->run_queue); + /* + * Suspend this scheduler and wake up scheduler + * number one of another type... + */ +#ifdef DEBUG + dbg_val = +#else + (void) +#endif + erts_atomic32_read_bset_mb(&esdp->ssi->flags, + (ERTS_SSI_FLG_SUSPENDED + | ERTS_SSI_FLG_MSB_EXEC), + ERTS_SSI_FLG_SUSPENDED); + ASSERT(dbg_val & ERTS_SSI_FLG_MSB_EXEC); + + switch (exec_type) { + case ERTS_SCHED_NORMAL: + exec_rq = ERTS_RUNQ_IX(0); + break; + case ERTS_SCHED_DIRTY_CPU: + exec_rq = ERTS_DIRTY_CPU_RUNQ; + break; + case ERTS_SCHED_DIRTY_IO: + exec_rq = ERTS_DIRTY_IO_RUNQ; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + exec_rq = NULL; + break; + } -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#ifdef DEBUG + dbg_val = +#else + (void) #endif - { - schedule_bound_processes(esdp->run_queue, &sbp); + erts_atomic32_read_bset_mb(&exec_rq->scheduler->ssi->flags, + (ERTS_SSI_FLG_SUSPENDED + | ERTS_SSI_FLG_MSB_EXEC), + ERTS_SSI_FLG_MSB_EXEC); + ASSERT(dbg_val & ERTS_SSI_FLG_SUSPENDED); - erts_sched_check_cpu_bind_post_suspend(esdp); + wake_scheduler(exec_rq); + + return 1; /* suspend this scheduler... */ + +} + +static ERTS_INLINE void +suspend_normal_scheduler_sleep(ErtsSchedulerData *esdp) +{ + ErtsSchedulerSleepInfo *ssi = esdp->ssi; + erts_aint32_t flgs = sched_spin_suspended(ssi, + ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + flgs = sched_set_suspended_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + int res; + + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + } } } -#else /* !ERTS_DIRTY_SCHEDULERS */ +static ERTS_INLINE void +suspend_dirty_scheduler_sleep(ErtsSchedulerData *esdp) +{ + suspend_normal_scheduler_sleep(esdp); +} static void suspend_scheduler(ErtsSchedulerData *esdp) { erts_aint32_t flgs; erts_aint32_t changing; - long no = (long) esdp->no; + long no; ErtsSchedulerSleepInfo *ssi = esdp->ssi; - long active_schedulers; int curr_online = 1; - int wake = 0; + ErtsSchdlrSspndResume resume = {{NIL, NIL}, {NULL}}; erts_aint32_t aux_work; int thr_prgr_active = 1; ErtsStuckBoundProcesses sbp = {NULL, NULL}; + ErtsSchedType sched_type; + erts_aint32_t online_flag; /* * Schedulers may be suspended in two different ways: * - A scheduler may be suspended since it is not online. - * All schedulers with scheduler ids greater than - * schdlr_sspnd.online are suspended. * - Multi scheduling is blocked. All schedulers except the - * scheduler with scheduler id 1 are suspended. + * scheduler with scheduler id 1 are suspended, and all + * dirty CPU and dirty I/O schedulers are suspended. * * Regardless of why a scheduler is suspended, it ends up here. */ - ASSERT(no != 1); - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); + sched_type = esdp->type; + switch (sched_type) { + case ERTS_SCHED_NORMAL: + online_flag = ERTS_SCHDLR_SSPND_CHNG_ONLN; + no = esdp->no; + break; + case ERTS_SCHED_DIRTY_CPU: + online_flag = ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + no = esdp->dirty_no; + break; + case ERTS_SCHED_DIRTY_IO: + online_flag = 0; + no = esdp->dirty_no; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + return; + } + + if (erts_atomic32_read_nob(&ssi->flags) & ERTS_SSI_FLG_MSB_EXEC) { + ASSERT(no == 1); + if (!msb_scheduler_type_switch(sched_type, esdp, no)) + return; + /* Suspend and let scheduler 1 of another type execute... */ + } - erts_sched_check_cpu_bind_prep_suspend(esdp); - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_inactive); + if (sched_type != ERTS_SCHED_NORMAL) { + dirty_active(esdp, -1); + erts_runq_unlock(esdp->run_queue); + dirty_sched_wall_time_change(esdp, 0); + } + else { + if (no != 1) + evacuate_run_queue(esdp->run_queue, &sbp); - sched_wall_time_change(esdp, 0); + erts_runq_unlock(esdp->run_queue); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + erts_sched_check_cpu_bind_prep_suspend(esdp); + + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_inactive); + } + + erts_mtx_lock(&schdlr_sspnd.mtx); flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); if (flgs & ERTS_SSI_FLG_SUSPENDED) { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.active); - ASSERT(active_schedulers >= 1); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { - if (active_schedulers == schdlr_sspnd.msb.wait_active) - wake = 1; - if (active_schedulers == 1) { - changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; - } - } + schdlr_sspnd_dec_nscheds(&schdlr_sspnd.active, sched_type); + + changing = erts_atomic32_read_nob(&schdlr_sspnd.changing); while (1) { - if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + + if (changing & (ERTS_SCHDLR_SSPND_CHNG_NMSB + | ERTS_SCHDLR_SSPND_CHNG_MSB)) { + int i = 0; + ErtsMultiSchedulingBlock *msb[3] = {0}; + if (changing & ERTS_SCHDLR_SSPND_CHNG_NMSB) + msb[i++] = &schdlr_sspnd.nmsb; + if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) + msb[i++] = &schdlr_sspnd.msb; + + for (i = 0; msb[i]; i++) { + erts_aint32_t clr_flg = 0; + + if (msb[i] == &schdlr_sspnd.nmsb + && schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) == 1) { + clr_flg = ERTS_SCHDLR_SSPND_CHNG_NMSB; + } + else if (schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) == 1 + && schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_CPU) == 0 + && schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_IO) == 0) { + clr_flg = ERTS_SCHDLR_SSPND_CHNG_MSB; + } + + if (clr_flg) { + ErtsProcList *plp, *end_plp; + changing = erts_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~clr_flg); + changing &= ~clr_flg; + (void) erts_proclist_fetch(&msb[i]->chngq, &end_plp); + /* resume processes that initiated the multi scheduling block... */ + plp = msb[i]->chngq; + if (plp) { + ASSERT(end_plp); + ASSERT(msb[i]->ongoing); + do { + erts_proclist_store_last(&msb[i]->blckrs, + proclist_copy(plp)); + plp = plp->next; + } while (plp); + end_plp->next = resume.msb.chngrs; + resume.msb.chngrs = msb[i]->chngq; + msb[i]->chngq = NULL; + } + } + } + } + + if (changing & online_flag) { int changed = 0; - if (no > schdlr_sspnd.online && curr_online) { - schdlr_sspnd.curr_online--; + Uint32 st_online; + + st_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + sched_type); + if (no > st_online && curr_online) { + schdlr_sspnd_dec_nscheds(&schdlr_sspnd.curr_online, + sched_type); curr_online = 0; changed = 1; } - else if (no <= schdlr_sspnd.online && !curr_online) { - schdlr_sspnd.curr_online++; + else if (no <= st_online && !curr_online) { + schdlr_sspnd_inc_nscheds(&schdlr_sspnd.curr_online, + sched_type); curr_online = 1; changed = 1; } if (changed - && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) - wake = 1; - if (schdlr_sspnd.online == schdlr_sspnd.curr_online) { - changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; + && (schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + sched_type) + == schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + sched_type))) { + ErtsProcList *plp; + changing = erts_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~online_flag); + changing &= ~online_flag; + if (sched_type == ERTS_SCHED_NORMAL) { + ASSERT(is_internal_pid(schdlr_sspnd.changer) + || schdlr_sspnd.changer == am_init); + /* resume process that initiated this change... */ + resume.onln.chngr = schdlr_sspnd.changer; + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (!plp) + schdlr_sspnd.changer = am_false; + else { + schdlr_sspnd.changer = am_true; /* change right in transit */ + /* resume process that is queued for next change... */ + resume.onln.nxt = plp->pid; + ASSERT(is_internal_pid(resume.onln.nxt)); + } + } } } - if (wake) { - erts_smp_cnd_signal(&schdlr_sspnd.cnd); - wake = 0; - } - - if (curr_online && !ongoing_multi_scheduling_block()) { - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); + if (curr_online) { + flgs = erts_atomic32_read_acqb(&ssi->flags); if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) break; } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_mtx_unlock(&schdlr_sspnd.mtx); + + schdlr_sspnd_resume_procs(sched_type, &resume); while (1) { - erts_aint32_t qmask; - erts_aint32_t flgs; - - qmask = (ERTS_RUNQ_FLGS_GET(esdp->run_queue) - & ERTS_RUNQ_FLGS_QMASK); - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work|qmask) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - if (aux_work) - aux_work = handle_aux_work(&esdp->aux_work_data, - aux_work, - 1); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); - if (qmask) { - erts_smp_runq_lock(esdp->run_queue); - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); - } - } + if (sched_type != ERTS_SCHED_NORMAL) + suspend_dirty_scheduler_sleep(esdp); + else + { + ErtsMonotonicTime current_time, timeout_time; + int evacuate = no == 1 ? 0 : !ERTS_EMPTY_RUNQ(esdp->run_queue); - if (!aux_work) { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - flgs = sched_spin_suspended(ssi, - ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - flgs = sched_set_suspended_sleeptype(ssi); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - int res; - - do { - res = erts_tse_wait(ssi->event); - } while (res == EINTR); + ASSERT(sched_type == ERTS_SCHED_NORMAL); + + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + + if (aux_work|evacuate) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + if (aux_work) + aux_work = handle_aux_work(&esdp->aux_work_data, + aux_work, + 1); + + if (aux_work && erts_thr_progress_update(esdp)) + erts_thr_progress_leader_update(esdp); + if (evacuate) { + erts_runq_lock(esdp->run_queue); + evacuate_run_queue(esdp->run_queue, &sbp); + erts_runq_unlock(esdp->run_queue); } } - erts_thr_progress_finalize_wait(esdp); - } + + + if (aux_work) + timeout_time = erts_next_timeout_time(esdp->next_tmo_ref); + else + timeout_time = erts_check_next_timeout_time(esdp); + + current_time = erts_get_monotonic_time(esdp); + + if (!aux_work && current_time < timeout_time) { + /* go to sleep... */ + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(NULL); + suspend_normal_scheduler_sleep(esdp); + erts_thr_progress_finalize_wait(NULL); + current_time = erts_get_monotonic_time(esdp); + } + + if (current_time >= timeout_time) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + erts_bump_timers(esdp->timer_wheel, current_time); + } + } flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING | ERTS_SSI_FLG_SUSPENDED)); if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) break; - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) + changing = erts_atomic32_read_nob(&schdlr_sspnd.changing); + if (changing) break; } - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - } - - active_schedulers = erts_smp_atomic32_inc_read_nob(&schdlr_sspnd.active); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) - && schdlr_sspnd.online == active_schedulers) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); + erts_mtx_lock(&schdlr_sspnd.mtx); + changing = erts_atomic32_read_nob(&schdlr_sspnd.changing); } - ASSERT(no <= schdlr_sspnd.online); - ASSERT(!ongoing_multi_scheduling_block()); - + schdlr_sspnd_inc_nscheds(&schdlr_sspnd.active, sched_type); + changing = erts_atomic32_read_nob(&schdlr_sspnd.changing); + if (changing) { + if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) + && !schdlr_sspnd.msb.ongoing + && schdlr_sspnd_eq_nscheds(&schdlr_sspnd.online, + &schdlr_sspnd.active)) { + erts_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_MSB); + } + if ((changing & ERTS_SCHDLR_SSPND_CHNG_NMSB) + && !schdlr_sspnd.nmsb.ongoing + && (schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL) + == schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL))) { + erts_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_NMSB); + } + } + ASSERT(no <= schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, sched_type)); } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_mtx_unlock(&schdlr_sspnd.mtx); + + schdlr_sspnd_resume_procs(sched_type, &resume); ASSERT(curr_online); - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_active); + if (sched_type != ERTS_SCHED_NORMAL) + dirty_sched_wall_time_change(esdp, 1); + else { + (void) erts_get_monotonic_time(esdp); + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_active); - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } } - erts_smp_runq_lock(esdp->run_queue); + erts_runq_lock(esdp->run_queue); non_empty_runq(esdp->run_queue); - schedule_bound_processes(esdp->run_queue, &sbp); + if (sched_type != ERTS_SCHED_NORMAL) + dirty_active(esdp, 1); + else { + schedule_bound_processes(esdp->run_queue, &sbp); - erts_sched_check_cpu_bind_post_suspend(esdp); + erts_sched_check_cpu_bind_post_suspend(esdp); + } } -#endif - -ErtsSchedSuspendResult +void erts_schedulers_state(Uint *total, Uint *online, Uint *active, Uint *dirty_cpu, Uint *dirty_cpu_online, + Uint *dirty_cpu_active, Uint *dirty_io, - int yield_allowed) + Uint *dirty_io_active) { - int res = ERTS_SCHDLR_SSPND_EINVAL; - erts_aint32_t changing; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); -#endif - if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - else { + if (active || online || dirty_cpu_online + || dirty_cpu_active || dirty_io_active) { + erts_mtx_lock(&schdlr_sspnd.mtx); if (active) - *active = schdlr_sspnd.online; + *active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL); if (online) - *online = schdlr_sspnd.online; - if (ongoing_multi_scheduling_block() && active) - *active = 1; -#ifdef ERTS_DIRTY_SCHEDULERS + *online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_NORMAL); + if (dirty_cpu_active) + *dirty_cpu_active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_CPU); if (dirty_cpu_online) - *dirty_cpu_online = schdlr_sspnd.dirty_cpu_online; -#endif - res = ERTS_SCHDLR_SSPND_DONE; + *dirty_cpu_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_CPU); + if (dirty_io_active) + *dirty_io_active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_IO); + erts_mtx_unlock(&schdlr_sspnd.mtx); } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (total) *total = erts_no_schedulers; -#ifdef ERTS_DIRTY_SCHEDULERS if (dirty_cpu) *dirty_cpu = erts_no_dirty_cpu_schedulers; if (dirty_io) *dirty_io = erts_no_dirty_io_schedulers; -#endif - return res; } -#ifdef ERTS_DIRTY_SCHEDULERS +static void +abort_sched_onln_chng_waitq(Process *p) +{ + Eterm resume = NIL; + + erts_mtx_lock(&schdlr_sspnd.mtx); + +#ifdef DEBUG + { + int found_it = 0; + ErtsProcList *plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + while (plp) { + if (erts_proclist_same(plp, p)) + found_it++; + plp = erts_proclist_peek_next(schdlr_sspnd.chngq, plp); + } + ASSERT(found_it == !!(p->flags & F_SCHDLR_ONLN_WAITQ)); + } +#endif + + if (p->flags & F_SCHDLR_ONLN_WAITQ) { + ErtsProcList *plp = NULL; + + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (plp) { + if (erts_proclist_same(plp, p) + && schdlr_sspnd.changer == am_true) { + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + /* + * Change right was in transit to us; + * transfer it to the next process by + * resuming it... + */ + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (plp) + resume = plp->pid; + else + schdlr_sspnd.changer = am_false; + } + else { + do { + if (erts_proclist_same(plp, p)) { + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + break; + } + plp = erts_proclist_peek_next(schdlr_sspnd.chngq, plp); + } while (plp); + } + } + } + + erts_mtx_unlock(&schdlr_sspnd.mtx); + + if (is_internal_pid(resume)) + schdlr_sspnd_resume_proc(ERTS_SCHED_NORMAL, resume); +} ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, Sint new_no, - Sint *old_no -#ifdef ERTS_DIRTY_SCHEDULERS - , int dirty_only -#endif - ) + Sint *old_no, + int dirty_only) { - ErtsSchedulerData *esdp; - int ix, res = -1, no, have_unlocked_plocks, end_wait; - erts_aint32_t changing = 0; -#ifdef ERTS_DIRTY_SCHEDULERS - ErtsSchedulerSleepInfo* ssi; - int dirty_no, change_dirty; -#endif + int resume_proc, ix, res = -1, no, have_unlocked_plocks; + erts_aint32_t changing = 0, change_flags; + int online, increase; + ErtsProcList *plp; + int dirty_no, change_dirty, dirty_online; if (new_no < 1) return ERTS_SCHDLR_SSPND_EINVAL; -#ifdef ERTS_DIRTY_SCHEDULERS else if (dirty_only && erts_no_dirty_cpu_schedulers < new_no) return ERTS_SCHDLR_SSPND_EINVAL; -#endif else if (erts_no_schedulers < new_no) return ERTS_SCHDLR_SSPND_EINVAL; - esdp = ERTS_PROC_GET_SCHDATA(p); - end_wait = 0; + if (dirty_only) + resume_proc = 0; + else + { + resume_proc = 1; + /* + * If we suspend current process we need to suspend before + * requesting the change; otherwise, we got a resume/suspend + * race... + */ + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + suspend_process(p, p); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + erts_mtx_lock(&schdlr_sspnd.mtx); + change_flags = 0; have_unlocked_plocks = 0; no = (int) new_no; -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(schdlr_sspnd.dirty_cpu_online <= erts_no_dirty_cpu_schedulers); + if (!dirty_only) + { + changing = erts_atomic32_read_nob(&schdlr_sspnd.changing); + if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + enqueue_wait: + p->flags |= F_SCHDLR_ONLN_WAITQ; + plp = proclist_create(p); + erts_proclist_store_last(&schdlr_sspnd.chngq, plp); + resume_proc = 0; + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + goto done; + } + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (!plp) { + ASSERT(schdlr_sspnd.changer == am_false); + } + else { + ASSERT(schdlr_sspnd.changer == am_true); + if (!erts_proclist_same(plp, p)) + goto enqueue_wait; + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + } + } + + *old_no = online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); + dirty_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU); + if (dirty_only) + *old_no = dirty_online; + + ASSERT(dirty_online <= erts_no_dirty_cpu_schedulers); + if (dirty_only) { - if (no > schdlr_sspnd.online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - return ERTS_SCHDLR_SSPND_EINVAL; + if (no > online) { + res = ERTS_SCHDLR_SSPND_EINVAL; + goto done; } dirty_no = no; + if (dirty_no == dirty_online) { + res = ERTS_SCHDLR_SSPND_DONE; + goto done; + } + change_dirty = 1; } else { /* * Adjust the number of dirty CPU schedulers online relative to the * adjustment made to the number of normal schedulers online. */ int total_pct = erts_no_dirty_cpu_schedulers*100/erts_no_schedulers; - int onln_pct = no*total_pct/schdlr_sspnd.online; - dirty_no = schdlr_sspnd.dirty_cpu_online*onln_pct/100; + int onln_pct = no*total_pct/online; + dirty_no = dirty_online*onln_pct/100; if (dirty_no == 0) dirty_no = 1; ASSERT(dirty_no <= erts_no_dirty_cpu_schedulers); - } -#endif - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); -#endif - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - } - else { - int online = *old_no = schdlr_sspnd.online; -#ifdef ERTS_DIRTY_SCHEDULERS - int dirty_online = schdlr_sspnd.dirty_cpu_online; - if (dirty_only) { - *old_no = schdlr_sspnd.dirty_cpu_online; - if (dirty_no == schdlr_sspnd.dirty_cpu_online) { + if (no != online) + change_dirty = (dirty_no != dirty_online); + else { + dirty_only = 1; + if (dirty_no == dirty_online) { res = ERTS_SCHDLR_SSPND_DONE; + goto done; } change_dirty = 1; - } else { -#endif - if (no == schdlr_sspnd.online) { -#ifdef ERTS_DIRTY_SCHEDULERS - dirty_only = 1; - if (dirty_no == schdlr_sspnd.dirty_cpu_online) -#endif - res = ERTS_SCHDLR_SSPND_DONE; -#ifdef ERTS_DIRTY_SCHEDULERS - else - change_dirty = 1; -#endif - } -#ifdef ERTS_DIRTY_SCHEDULERS - else - change_dirty = (dirty_no != schdlr_sspnd.dirty_cpu_online); } -#endif - if (res == -1) - { - int increase = (no > online); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!dirty_only) { -#endif - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.online = no; -#ifdef ERTS_DIRTY_SCHEDULERS - } else - increase = (dirty_no > dirty_online); - if (change_dirty) { - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.dirty_cpu_online = dirty_no; - } -#endif - if (increase) { - int ix; -#ifdef ERTS_DIRTY_SCHEDULERS - if (!dirty_only) { -#endif - schdlr_sspnd.wait_curr_online = no; - if (ongoing_multi_scheduling_block()) { - for (ix = online; ix < no; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - change_no_used_runqs(no); - - for (ix = online; ix < no; ix++) - resume_run_queue(ERTS_RUNQ_IX(ix)); - - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - } -#ifdef ERTS_DIRTY_SCHEDULERS - } - if (change_dirty) { - schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; - ASSERT(schdlr_sspnd.dirty_cpu_curr_online != - schdlr_sspnd.dirty_cpu_wait_curr_online); - if (ongoing_multi_scheduling_block()) { - for (ix = dirty_online; ix < dirty_no; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_sched_poke(ssi); - } - } else { - for (ix = dirty_online; ix < dirty_no; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - } - } -#endif - res = ERTS_SCHDLR_SSPND_DONE; - } - else /* if (no < online) */ { -#ifdef ERTS_DIRTY_SCHEDULERS - if (change_dirty) { - schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; - ASSERT(schdlr_sspnd.dirty_cpu_curr_online != - schdlr_sspnd.dirty_cpu_wait_curr_online); - if (ongoing_multi_scheduling_block()) { - for (ix = dirty_no; ix < dirty_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_sched_poke(ssi); - } - } else { - for (ix = dirty_no; ix < dirty_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - } - } - if (dirty_only) { - res = ERTS_SCHDLR_SSPND_DONE; - } - else -#endif - { - if (p->scheduler_data->no <= no) { - res = ERTS_SCHDLR_SSPND_DONE; - schdlr_sspnd.wait_curr_online = no; - } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE; - schdlr_sspnd.wait_curr_online = no+1; - } + } + if (change_dirty) { + change_flags |= ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU, + dirty_no); + } - if (ongoing_multi_scheduling_block()) { - for (ix = no; ix < online; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } + if (dirty_only) + increase = (dirty_no > dirty_online); + else + { + change_flags |= ERTS_SCHDLR_SSPND_CHNG_ONLN; + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL, + no); + increase = (no > online); + } - change_no_used_runqs(no); - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); + erts_atomic32_read_bor_nob(&schdlr_sspnd.changing, change_flags); - for (ix = no; ix < online; ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq); - } - } + res = ERTS_SCHDLR_SSPND_DONE; + if (increase) { + int ix; + if (change_dirty) { + ErtsSchedulerSleepInfo* ssi; + if (schdlr_sspnd.msb.ongoing) { + for (ix = dirty_online; ix < dirty_no; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_sched_poke(ssi); } + } else { + for (ix = dirty_online; ix < dirty_no; ix++) + dcpu_sched_ix_resume_wake(ix); } - -#ifdef ERTS_DIRTY_SCHEDULERS - if (change_dirty) { - while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + } + if (!dirty_only) + { + if (schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) { + for (ix = online; ix < no; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); } - if (!dirty_only) -#endif - { - if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (plocks && !have_unlocked_plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - end_wait = 1; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_proc_unlock(p, plocks); } + change_no_used_runqs(no); - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + for (ix = online; ix < no; ix++) + resume_run_queue(ERTS_RUNQ_IX(ix)); - ASSERT(res != ERTS_SCHDLR_SSPND_DONE - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); } } } - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(schdlr_sspnd.dirty_cpu_online <= schdlr_sspnd.online); - if (!dirty_only) -#endif - { - if (end_wait) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); - } - if (have_unlocked_plocks) - erts_smp_proc_lock(p, plocks); - } - - return res; -} - -#else /* !ERTS_DIRTY_SCHEDULERS */ - -ErtsSchedSuspendResult -erts_set_schedulers_online(Process *p, - ErtsProcLocks plocks, - Sint new_no, - Sint *old_no) -{ - ErtsSchedulerData *esdp; - int ix, res, no, have_unlocked_plocks, end_wait; - erts_aint32_t changing; - - if (new_no < 1 || erts_no_schedulers < new_no) - return ERTS_SCHDLR_SSPND_EINVAL; - - esdp = ERTS_PROC_GET_SCHDATA(p); - end_wait = 0; - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - have_unlocked_plocks = 0; - no = (int) new_no; - - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - } - else { - int online = *old_no = schdlr_sspnd.online; - if (no == schdlr_sspnd.online) { - res = ERTS_SCHDLR_SSPND_DONE; + else /* if decrease */ { + if (change_dirty) { + if (schdlr_sspnd.msb.ongoing) { + for (ix = dirty_no; ix < dirty_online; ix++) + erts_sched_poke(ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix)); + } + else { + for (ix = dirty_no; ix < dirty_online; ix++) + dcpu_sched_ix_suspend_wake(ix); + /* + * Newly suspended scheduler may have just been + * about to handle a task. Make sure someone takes + * care of such a task... + */ + dcpu_sched_ix_wake(0); + } } - else { - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.online = no; - if (no > online) { - int ix; - schdlr_sspnd.wait_curr_online = no; - if (ongoing_multi_scheduling_block()) { - for (ix = online; ix < no; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - change_no_used_runqs(no); - - for (ix = online; ix < no; ix++) - resume_run_queue(ERTS_RUNQ_IX(ix)); - - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - } - res = ERTS_SCHDLR_SSPND_DONE; + if (!dirty_only) + { + if (schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) { + for (ix = no; ix < online; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); } - else /* if (no < online) */ { - if (p->scheduler_data->no <= no) { - res = ERTS_SCHDLR_SSPND_DONE; - schdlr_sspnd.wait_curr_online = no; - } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE; - schdlr_sspnd.wait_curr_online = no+1; - } - - if (ongoing_multi_scheduling_block()) { - for (ix = no; ix < online; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_proc_unlock(p, plocks); } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - change_no_used_runqs(no); - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); + change_no_used_runqs(no); + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); - for (ix = no; ix < online; ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq); - } + for (ix = no; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + wake_scheduler(rq); } } + } + } - if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (plocks && !have_unlocked_plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - end_wait = 1; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - } + if (change_flags & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + /* Suspend and wait for requested change to complete... */ + schdlr_sspnd.changer = p->common.id; + resume_proc = 0; + res = ERTS_SCHDLR_SSPND_YIELD_DONE; + } + +done: - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ASSERT(schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU) + <= schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL)); - ASSERT(res != ERTS_SCHDLR_SSPND_DONE - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + erts_mtx_unlock(&schdlr_sspnd.mtx); - } - } + if (have_unlocked_plocks) + erts_proc_lock(p, plocks); - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (end_wait) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); + if (resume_proc) { + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + resume_process(p, plocks|ERTS_PROC_LOCK_STATUS); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } - if (have_unlocked_plocks) - erts_smp_proc_lock(p, plocks); return res; } -#endif - ErtsSchedSuspendResult -erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) +erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int normal, int all) { - int ix, res, have_unlocked_plocks = 0, online; - erts_aint32_t changing; + int resume_proc, ix, res, have_unlocked_plocks = 0; ErtsProcList *plp; -#ifdef ERTS_DIRTY_SCHEDULERS - ErtsSchedulerSleepInfo* ssi; -#endif + ErtsMultiSchedulingBlock *msbp; + erts_aint32_t chng_flg; + int have_blckd_flg; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); -#endif - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */ + if (normal) { + chng_flg = ERTS_SCHDLR_SSPND_CHNG_NMSB; + have_blckd_flg = F_HAVE_BLCKD_NMSCHED; + msbp = &schdlr_sspnd.nmsb; + } + else { + chng_flg = ERTS_SCHDLR_SSPND_CHNG_MSB; + have_blckd_flg = F_HAVE_BLCKD_MSCHED; + msbp = &schdlr_sspnd.msb; + } + + /* + * If we suspend current process we need to suspend before + * requesting the change; otherwise, we got a resume/suspend + * race... + */ + if (!on) { + /* We never suspend current process when unblocking... */ + resume_proc = 0; } - else if (on) { /* ------ BLOCK ------ */ - if (schdlr_sspnd.msb.procs) { + else { + resume_proc = 1; + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + suspend_process(p, p); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } + + erts_mtx_lock(&schdlr_sspnd.mtx); + if (on) { /* ------ BLOCK ------ */ + if (msbp->chngq) { + ASSERT(msbp->ongoing); + p->flags |= have_blckd_flg; + goto wait_until_msb; + } + else if (msbp->blckrs || (normal && erts_no_schedulers == 1)) { + ASSERT(!msbp->blckrs || msbp->ongoing); + msbp->ongoing = 1; plp = proclist_create(p); - erts_proclist_store_last(&schdlr_sspnd.msb.procs, plp); - p->flags |= F_HAVE_BLCKD_MSCHED; - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 0); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) == 0); -#endif - ASSERT(p->scheduler_data->no == 1); - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - } else { - int online = schdlr_sspnd.online; - p->flags |= F_HAVE_BLCKD_MSCHED; + erts_proclist_store_last(&msbp->blckrs, plp); + p->flags |= have_blckd_flg; + ASSERT(normal + ? 1 == schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) + : schdlr_sspnd_get_nscheds_tot(&schdlr_sspnd.active) == 1); + ASSERT(erts_proc_sched_data(p)->no == 1); + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED; + } + else { + int online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); + ASSERT(!msbp->ongoing); + p->flags |= have_blckd_flg; if (plocks) { have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - ASSERT(!ongoing_multi_scheduling_block()); - schdlr_sspnd.msb.ongoing = 1; - if (online == 1) { - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 1); - ASSERT(!(erts_smp_atomic32_read_nob(&ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0)->flags) - & ERTS_SSI_FLG_SUSPENDED)); - schdlr_sspnd.msb.dirty_cpu_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0); - erts_smp_atomic32_read_bor_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) - != schdlr_sspnd.msb.dirty_cpu_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - - schdlr_sspnd.msb.dirty_io_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) - != schdlr_sspnd.msb.dirty_io_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); -#endif - ASSERT(p->scheduler_data->no == 1); + erts_proc_unlock(p, plocks); } - else { - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - if (p->scheduler_data->no == 1) { - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - schdlr_sspnd.msb.wait_active = 1; - } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED; - schdlr_sspnd.msb.wait_active = 2; - } + ASSERT(!msbp->ongoing); + msbp->ongoing = 1; -#ifdef ERTS_DIRTY_SCHEDULERS - schdlr_sspnd.msb.dirty_cpu_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) - != schdlr_sspnd.msb.dirty_cpu_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - ASSERT(schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_online); - - schdlr_sspnd.msb.dirty_io_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) - != schdlr_sspnd.msb.dirty_io_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - ASSERT(schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_online); -#endif - change_no_used_runqs(1); - for (ix = 1; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - - for (ix = 1; ix < online; ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq); - } - - if (erts_smp_atomic32_read_nob(&schdlr_sspnd.active) - != schdlr_sspnd.msb.wait_active) { - ErtsSchedulerData *esdp; - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - if (plocks && !have_unlocked_plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } + erts_atomic32_read_bor_nob(&schdlr_sspnd.changing, + chng_flg); + change_no_used_runqs(1); + for (ix = 1; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); - esdp = ERTS_PROC_GET_SCHDATA(p); - - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + for (ix = 1; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + wake_scheduler(rq); + } - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.active) - != schdlr_sspnd.msb.wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, - &schdlr_sspnd.mtx); - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - erts_thr_progress_active(esdp, 1); - erts_thr_progress_finalize_wait(esdp); + if (!normal) { + ERTS_RUNQ_FLGS_SET_NOB(ERTS_RUNQ_IX(0), ERTS_RUNQ_FLG_MSB_EXEC); + erts_atomic32_read_bor_nob(&ERTS_RUNQ_IX(0)->scheduler->ssi->flags, + ERTS_SSI_FLG_MSB_EXEC); + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) + dcpu_sched_ix_suspend_wake(ix); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) + dio_sched_ix_suspend_wake(ix); + } - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + wait_until_msb: - } + ASSERT(chng_flg & erts_atomic32_read_nob(&schdlr_sspnd.changing)); - ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - plp = proclist_create(p); - erts_proclist_store_last(&schdlr_sspnd.msb.procs, plp); - ASSERT(p->scheduler_data); + plp = proclist_create(p); + erts_proclist_store_last(&msbp->chngq, plp); + resume_proc = 0; + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED; + ASSERT(erts_proc_sched_data(p)); } } - else if (!ongoing_multi_scheduling_block()) { - /* unblock not ongoing */ - ASSERT(!schdlr_sspnd.msb.procs); - res = ERTS_SCHDLR_SSPND_DONE; + else if (!msbp->ongoing) { + ASSERT(!msbp->blckrs); + goto unblock_res; } else { /* ------ UNBLOCK ------ */ - if (p->flags & F_HAVE_BLCKD_MSCHED) { - ErtsProcList *plp = erts_proclist_peek_first(schdlr_sspnd.msb.procs); - - while (plp) { - ErtsProcList *tmp_plp = plp; - plp = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp); - if (erts_proclist_same(tmp_plp, p)) { - erts_proclist_remove(&schdlr_sspnd.msb.procs, tmp_plp); - proclist_destroy(tmp_plp); - if (!all) - break; + if (p->flags & have_blckd_flg) { + ErtsProcList **plpps[3] = {0}; + ErtsProcList *plp; + + plpps[0] = &msbp->blckrs; + if (all) + plpps[1] = &msbp->chngq; + + for (ix = 0; plpps[ix]; ix++) { + plp = erts_proclist_peek_first(*plpps[ix]); + while (plp) { + ErtsProcList *tmp_plp = plp; + plp = erts_proclist_peek_next(*plpps[ix], plp); + if (erts_proclist_same(tmp_plp, p)) { + erts_proclist_remove(plpps[ix], tmp_plp); + proclist_destroy(tmp_plp); + if (!all) + break; + } } } } - if (schdlr_sspnd.msb.procs) - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - else { - ERTS_SCHDLR_SSPND_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - p->flags &= ~F_HAVE_BLCKD_MSCHED; - schdlr_sspnd.msb.ongoing = 0; - if (schdlr_sspnd.online == 1) { - /* No normal schedulers to resume */ - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); - ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB); - } - else { - online = schdlr_sspnd.online; - if (plocks) { + if (!msbp->blckrs && !msbp->chngq) { + int online; + erts_atomic32_read_bor_nob(&schdlr_sspnd.changing, + chng_flg); + p->flags &= ~have_blckd_flg; + msbp->ongoing = 0; + if (!(schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing)) { + if (plocks) { have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); + erts_proc_unlock(p, plocks); } + online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); change_no_used_runqs(online); /* Resume all online run queues */ @@ -7489,84 +8156,86 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) for (ix = online; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); } -#ifdef ERTS_DIRTY_SCHEDULERS - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - schdlr_sspnd.msb.dirty_cpu_wait_active = schdlr_sspnd.dirty_cpu_online; - for (ix = 0; ix < schdlr_sspnd.dirty_cpu_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - schdlr_sspnd.msb.dirty_io_wait_active = erts_no_dirty_io_schedulers; - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); + if (!schdlr_sspnd.msb.ongoing) { + /* Get rid of msb-exec flag in run-queue of scheduler 1 */ + resume_run_queue(ERTS_RUNQ_IX(0)); + online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU); + for (ix = 0; ix < online; ix++) + dcpu_sched_ix_resume_wake(ix); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) + dio_sched_ix_resume_wake(ix); } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); -#endif - res = ERTS_SCHDLR_SSPND_DONE; } + + unblock_res: + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else if (schdlr_sspnd.nmsb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_DONE; } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_mtx_unlock(&schdlr_sspnd.mtx); + if (have_unlocked_plocks) - erts_smp_proc_lock(p, plocks); - return res; -} + erts_proc_lock(p, plocks); -#ifdef DEBUG -void -erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value) -{ - if (return_value == am_blocked) { - erts_aint32_t active = erts_smp_atomic32_read_nob(&schdlr_sspnd.active); - ASSERT(1 <= active && active <= 2); - ASSERT(ERTS_PROC_GET_SCHDATA(p)->no == 1); + if (resume_proc) { + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + resume_process(p, plocks|ERTS_PROC_LOCK_STATUS); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } + + return res; } -#endif int erts_is_multi_scheduling_blocked(void) { int res; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - res = schdlr_sspnd.msb.procs != NULL; - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_mtx_lock(&schdlr_sspnd.mtx); + if (schdlr_sspnd.msb.blckrs) + res = 1; + else if (schdlr_sspnd.nmsb.blckrs) + res = -1; + else + res = 0; + erts_mtx_unlock(&schdlr_sspnd.mtx); return res; } Eterm -erts_multi_scheduling_blockers(Process *p) +erts_multi_scheduling_blockers(Process *p, int normal) { Eterm res = NIL; + ErtsMultiSchedulingBlock *msbp; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - if (!erts_proclist_is_empty(schdlr_sspnd.msb.procs)) { + msbp = normal ? &schdlr_sspnd.nmsb : &schdlr_sspnd.msb; + + erts_mtx_lock(&schdlr_sspnd.mtx); + if (!erts_proclist_is_empty(msbp->blckrs)) { Eterm *hp, *hp_end; ErtsProcList *plp1, *plp2; Uint max_size = 0; - for (plp1 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + for (plp1 = erts_proclist_peek_first(msbp->blckrs); plp1; - plp1 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp1)) { + plp1 = erts_proclist_peek_next(msbp->blckrs, plp1)) { max_size += 2; } ASSERT(max_size); hp = HAlloc(p, max_size); hp_end = hp + max_size; - for (plp1 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + for (plp1 = erts_proclist_peek_first(msbp->blckrs); plp1; - plp1 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp1)) { - for (plp2 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + plp1 = erts_proclist_peek_next(msbp->blckrs, plp1)) { + for (plp2 = erts_proclist_peek_first(msbp->blckrs); plp2->pid != plp1->pid; - plp2 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp2)); + plp2 = erts_proclist_peek_next(msbp->blckrs, plp2)); if (plp2 == plp1) { res = CONS(hp, plp1->pid, res); hp += 2; @@ -7575,7 +8244,7 @@ erts_multi_scheduling_blockers(Process *p) } HRelease(p, hp_end, hp); } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + erts_mtx_unlock(&schdlr_sspnd.mtx); return res; } @@ -7585,17 +8254,30 @@ sched_thread_func(void *vesdp) ErtsThrPrgrCallbacks callbacks; ErtsSchedulerData *esdp = vesdp; Uint no = esdp->no; -#ifdef ERTS_SMP - ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch(); + erts_tse_t *tse; + + erts_port_task_pre_alloc_init_thread(); + erts_sched_init_time_sup(esdp); + + if (no == 1) + erts_aux_work_timeout_late_init(esdp); + + (void) ERTS_RUNQ_FLGS_SET_NOB(esdp->run_queue, + ERTS_RUNQ_FLG_EXEC); + + tse = erts_tse_fetch(); + erts_tse_prepare_timed(tse); + ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = tse; callbacks.arg = (void *) esdp->ssi; callbacks.wakeup = thr_prgr_wakeup; callbacks.prepare_wait = thr_prgr_prep_wait; callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; + erts_msacc_init_thread("scheduler", no, 1); + erts_thr_progress_register_managed_thread(esdp, &callbacks, 0); erts_alloc_register_scheduler(vesdp); -#endif #ifdef ERTS_ENABLE_LOCK_CHECK { char buf[31]; @@ -7604,57 +8286,20 @@ sched_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); -#ifdef ERTS_SMP #if HAVE_ERTS_MSEG erts_mseg_late_init(); #endif -#if ERTS_USE_ASYNC_READY_Q esdp->aux_work_data.async_ready.queue = erts_get_async_ready_queue(no); -#endif erts_sched_init_check_cpu_bind(esdp); erts_proc_lock_prepare_proc_lock_waiter(); -#endif #ifdef HIPE hipe_thread_signal_init(); #endif erts_thread_init_float(); - if (no == 1) { - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - } - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); -#else - erts_smp_cnd_signal(&schdlr_sspnd.cnd); -#endif - } - - if (no == 1) { - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - if (no == 1) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); - } - #ifdef ERTS_DO_VERIFY_UNUSED_TEMP_ALLOC esdp->verify_unused_temp_alloc = erts_alloc_get_verify_unused_temp_alloc( @@ -7662,23 +8307,23 @@ sched_thread_func(void *vesdp) ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL); #endif - process_main(); + erts_ets_sched_spec_data_init(esdp); + + process_main(esdp->x_reg_array, esdp->f_reg_array); + /* No schedulers should *ever* terminate */ - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Scheduler thread number %beu terminated\n", no); return NULL; } -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP static void* sched_dirty_cpu_thread_func(void *vesdp) { ErtsThrPrgrCallbacks callbacks; ErtsSchedulerData *esdp = vesdp; - Uint no = ERTS_DIRTY_SCHEDULER_NO(esdp); - ERTS_DIRTY_SCHEDULER_TYPE(esdp) = ERTS_DIRTY_CPU_SCHEDULER; + Uint no = esdp->dirty_no; ASSERT(no != 0); ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); callbacks.arg = (void *) esdp->ssi; @@ -7687,6 +8332,12 @@ sched_dirty_cpu_thread_func(void *vesdp) callbacks.wait = NULL; callbacks.finalize_wait = NULL; + dirty_sched_wall_time_change(esdp, 1); + + esdp->thr_id += erts_no_schedulers; + + erts_msacc_init_thread("dirty_cpu_scheduler", no, 0); + erts_thr_progress_register_unmanaged_thread(&callbacks); #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -7696,9 +8347,7 @@ sched_dirty_cpu_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); -#if ERTS_USE_ASYNC_READY_Q esdp->aux_work_data.async_ready.queue = NULL; -#endif erts_proc_lock_prepare_proc_lock_waiter(); @@ -7707,27 +8356,9 @@ sched_dirty_cpu_thread_func(void *vesdp) #endif erts_thread_init_float(); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - } - - if (no == 1) { - while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - process_main(); + erts_dirty_process_main(esdp); /* No schedulers should *ever* terminate */ - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Dirty CPU scheduler thread number %beu terminated\n", no); return NULL; @@ -7738,8 +8369,7 @@ sched_dirty_io_thread_func(void *vesdp) { ErtsThrPrgrCallbacks callbacks; ErtsSchedulerData *esdp = vesdp; - Uint no = ERTS_DIRTY_SCHEDULER_NO(esdp); - ERTS_DIRTY_SCHEDULER_TYPE(esdp) = ERTS_DIRTY_IO_SCHEDULER; + Uint no = esdp->dirty_no; ASSERT(no != 0); ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); callbacks.arg = (void *) esdp->ssi; @@ -7748,6 +8378,12 @@ sched_dirty_io_thread_func(void *vesdp) callbacks.wait = NULL; callbacks.finalize_wait = NULL; + dirty_sched_wall_time_change(esdp, 1); + + esdp->thr_id += erts_no_schedulers + erts_no_dirty_cpu_schedulers; + + erts_msacc_init_thread("dirty_io_scheduler", no, 0); + erts_thr_progress_register_unmanaged_thread(&callbacks); #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -7757,9 +8393,7 @@ sched_dirty_io_thread_func(void *vesdp) } #endif erts_tsd_set(sched_data_key, vesdp); -#if ERTS_USE_ASYNC_READY_Q esdp->aux_work_data.async_ready.queue = NULL; -#endif erts_proc_lock_prepare_proc_lock_waiter(); @@ -7768,68 +8402,45 @@ sched_dirty_io_thread_func(void *vesdp) #endif erts_thread_init_float(); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_io_changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - } - - if (no == 1) { - while (schdlr_sspnd.dirty_io_curr_online != schdlr_sspnd.dirty_io_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - process_main(); + erts_dirty_process_main(esdp); /* No schedulers should *ever* terminate */ - erl_exit(ERTS_ABORT_EXIT, + erts_exit(ERTS_ABORT_EXIT, "Dirty I/O scheduler thread number %beu terminated\n", no); return NULL; } -#endif -#endif - -static ethr_tid aux_tid; void erts_start_schedulers(void) { + ethr_tid tid; int res = 0; Uint actual; Uint wanted = erts_no_schedulers; Uint wanted_no_schedulers = erts_no_schedulers; + char name[16]; ethr_thr_opts opts = ETHR_THR_OPTS_DEFAULT_INITER; + int ix; opts.detached = 1; -#ifdef ETHR_HAVE_THREAD_NAMES - opts.name = malloc(80); -#endif + opts.name = name; -#ifdef ERTS_SMP if (erts_runq_supervision_interval) { opts.suggested_stack_size = 16; -#ifdef ETHR_HAVE_THREAD_NAMES - sprintf(opts.name, "runq_supervisor"); -#endif + erts_snprintf(opts.name, 16, "runq_supervisor"); erts_atomic_init_nob(&runq_supervisor_sleeping, 0); if (0 != ethr_event_init(&runq_supervision_event)) - erl_exit(1, "Failed to create run-queue supervision event\n"); - if (0 != ethr_thr_create(&runq_supervisor_tid, - runq_supervisor, - NULL, - &opts)) - erl_exit(1, "Failed to create run-queue supervision thread\n"); + erts_exit(ERTS_ERROR_EXIT, "Failed to create run-queue supervision event\n"); + res = ethr_thr_create(&runq_supervisor_tid, + runq_supervisor, + NULL, + &opts); + if (0 != res) + erts_exit(ERTS_ERROR_EXIT, "Failed to create run-queue supervision thread, " + "error = %d\n", res); } -#endif opts.suggested_stack_size = erts_sched_thread_suggested_stack_size; @@ -7845,14 +8456,7 @@ erts_start_schedulers(void) ASSERT(actual == esdp->no - 1); -#ifdef ETHR_HAVE_THREAD_NAMES - sprintf(opts.name, "scheduler_%d", actual + 1); -#endif - -#ifdef __OSE__ - /* This should be done in the bind strategy */ - opts.coreNo = (actual+1) % ose_num_cpus(); -#endif + erts_snprintf(opts.name, 16, "%lu_scheduler", actual + 1); res = ethr_thr_create(&esdp->tid, sched_thread_func, (void*)esdp, &opts); @@ -7860,51 +8464,45 @@ erts_start_schedulers(void) break; } } - erts_no_schedulers = actual; -#ifdef ERTS_DIRTY_SCHEDULERS -#ifdef ERTS_SMP { - int ix; for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); -#ifdef ETHR_HAVE_THREAD_NAMES - sprintf(opts.name,"dirty_cpu_scheduler_%d", ix + 1); -#endif + erts_snprintf(opts.name, 16, "%d_dirty_cpu_scheduler", ix + 1); + opts.suggested_stack_size = erts_dcpu_sched_thread_suggested_stack_size; res = ethr_thr_create(&esdp->tid,sched_dirty_cpu_thread_func,(void*)esdp,&opts); if (res != 0) - erl_exit(1, "Failed to create dirty cpu scheduler thread %d\n", ix); + erts_exit(ERTS_ERROR_EXIT, "Failed to create dirty cpu scheduler thread %d, error = %d\n", ix, res); } for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); -#ifdef ETHR_HAVE_THREAD_NAMES - sprintf(opts.name,"dirty_io_scheduler_%d", ix + 1); -#endif + erts_snprintf(opts.name, 16, "%d_dirty_io_scheduler", ix + 1); + opts.suggested_stack_size = erts_dio_sched_thread_suggested_stack_size; res = ethr_thr_create(&esdp->tid,sched_dirty_io_thread_func,(void*)esdp,&opts); if (res != 0) - erl_exit(1, "Failed to create dirty io scheduler thread %d\n", ix); + erts_exit(ERTS_ERROR_EXIT, "Failed to create dirty io scheduler thread %d, error = %d\n", ix, res); } } -#endif -#endif ERTS_THR_MEMORY_BARRIER; -#ifdef ETHR_HAVE_THREAD_NAMES - sprintf(opts.name, "aux"); -#endif - -#ifdef __OSE__ - opts.coreNo = 0; -#endif /* __OSE__ */ + erts_snprintf(opts.name, 16, "aux"); - res = ethr_thr_create(&aux_tid, aux_thread, NULL, &opts); + res = ethr_thr_create(&tid, aux_thread, NULL, &opts); if (res != 0) - erl_exit(1, "Failed to create aux thread\n"); + erts_exit(ERTS_ERROR_EXIT, "Failed to create aux thread, error = %d\n", res); + + for (ix = 0; ix < erts_no_poll_threads; ix++) { + erts_snprintf(opts.name, 16, "%d_poller", ix); + + res = ethr_thr_create(&tid, poll_thread, (void*)(UWord)ix, &opts); + if (res != 0) + erts_exit(ERTS_ERROR_EXIT, "Failed to create poll thread\n"); + } if (actual < 1) - erl_exit(1, + erts_exit(ERTS_ERROR_EXIT, "Failed to create any scheduler-threads: %s (%d)\n", erl_errno_id(res), res); @@ -7918,15 +8516,9 @@ erts_start_schedulers(void) actual, actual == 1 ? " was" : "s were"); erts_send_error_to_logger_nogl(dsbufp); } - -#ifdef ETHR_HAVE_THREAD_NAMES - free(opts.name); -#endif } -#endif /* ERTS_SMP */ -#ifdef ERTS_SMP static void add_pend_suspend(Process *suspendee, @@ -7940,7 +8532,7 @@ add_pend_suspend(Process *suspendee, sizeof(ErtsPendingSuspend)); psp->next = NULL; #ifdef DEBUG -#if defined(ARCH_64) && !HALFWORD_HEAP +#if defined(ARCH_64) psp->end = (ErtsPendingSuspend *) 0xdeaddeaddeaddead; #else psp->end = (ErtsPendingSuspend *) 0xdeaddead; @@ -7962,7 +8554,7 @@ handle_pending_suspend(Process *p, ErtsProcLocks p_locks) ErtsPendingSuspend *psp; int is_alive = !ERTS_PROC_IS_EXITING(p); - ERTS_SMP_LC_ASSERT(p_locks & ERTS_PROC_LOCK_STATUS); + ERTS_LC_ASSERT(p_locks & ERTS_PROC_LOCK_STATUS); /* * New pending suspenders might appear while we are processing @@ -7988,15 +8580,15 @@ cancel_suspend_of_suspendee(Process *p, ErtsProcLocks p_locks) if (is_not_nil(p->suspendee)) { Process *rp; if (!(p_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); rp = erts_pid2proc(p, p_locks|ERTS_PROC_LOCK_STATUS, p->suspendee, ERTS_PROC_LOCK_STATUS); if (rp) { erts_resume(rp, ERTS_PROC_LOCK_STATUS); - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); } if (!(p_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); p->suspendee = NIL; } } @@ -8009,7 +8601,7 @@ handle_pend_sync_suspend(Process *suspendee, { Process *suspender; - ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + ERTS_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); suspender = erts_pid2proc(suspendee, suspendee_locks, @@ -8023,8 +8615,9 @@ handle_pend_sync_suspend(Process *suspendee, } /* suspender is suspended waiting for suspendee to suspend; resume suspender */ - resume_process(suspender); - erts_smp_proc_unlock(suspender, ERTS_PROC_LOCK_STATUS); + ASSERT(suspendee != suspender); + resume_process(suspender, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(suspender, ERTS_PROC_LOCK_STATUS); } } @@ -8035,10 +8628,10 @@ pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, Process *rp; int unlock_c_p_status; - ERTS_SMP_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); + ERTS_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); - ERTS_SMP_LC_ASSERT(c_p_locks & ERTS_PROC_LOCK_MAIN); - ERTS_SMP_LC_ASSERT(pid_locks & (ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS)); + ERTS_LC_ASSERT(c_p_locks & ERTS_PROC_LOCK_MAIN); + ERTS_LC_ASSERT(pid_locks & (ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS)); if (c_p->common.id == pid) return erts_pid2proc(c_p, c_p_locks, pid, pid_locks); @@ -8047,7 +8640,7 @@ pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, unlock_c_p_status = 0; else { unlock_c_p_status = 1; - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); } if (c_p->suspendee == pid) { @@ -8058,12 +8651,11 @@ pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, ASSERT(c_p->flags & F_P2PNR_RESCHED); c_p->flags &= ~F_P2PNR_RESCHED; if (!suspend && rp) - resume_process(rp); + resume_process(rp, rp_locks); } else { - rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, - pid, pid_locks|ERTS_PROC_LOCK_STATUS); + pid, ERTS_PROC_LOCK_STATUS); if (!rp) { c_p->flags &= ~F_P2PNR_RESCHED; @@ -8072,44 +8664,106 @@ pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, ASSERT(!(c_p->flags & F_P2PNR_RESCHED)); - if (suspend) { - if (suspend_process(c_p, rp)) - goto done; - } - else { - if (!((ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS) - & erts_smp_atomic32_read_acqb(&rp->state))) + /* + * Suspend the other process in order to prevent + * it from being selected for normal execution. + * This will however not prevent it from being + * selected for execution of a system task. If + * it is selected for execution of a system task + * we might be blocked for quite a while if the + * try-lock below fails. That is, there is room + * for improvement here... + */ + + if (!suspend_process(c_p, rp)) { + /* Other process running */ + + ASSERT((ERTS_PSFLG_RUNNING | ERTS_PSFLG_DIRTY_RUNNING) + & erts_atomic32_read_nob(&rp->state)); + + if (!suspend + && (erts_atomic32_read_nob(&rp->state) + & ERTS_PSFLG_DIRTY_RUNNING)) { + ErtsProcLocks need_locks = pid_locks & ~ERTS_PROC_LOCK_STATUS; + if (need_locks && erts_proc_trylock(rp, need_locks) == EBUSY) { + erts_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, + pid, pid_locks|ERTS_PROC_LOCK_STATUS); + } goto done; + } - } + running: - /* Other process running */ + /* + * If we got pending suspenders and suspend ourselves waiting + * to suspend another process we might deadlock. + * In this case we have to yield, be suspended by + * someone else and then do it all over again. + */ + if (!c_p->pending_suspenders) { + /* Mark rp pending for suspend by c_p */ + add_pend_suspend(rp, c_p->common.id, handle_pend_sync_suspend); + ASSERT(is_nil(c_p->suspendee)); - /* - * If we got pending suspenders and suspend ourselves waiting - * to suspend another process we might deadlock. - * In this case we have to yield, be suspended by - * someone else and then do it all over again. - */ - if (!c_p->pending_suspenders) { - /* Mark rp pending for suspend by c_p */ - add_pend_suspend(rp, c_p->common.id, handle_pend_sync_suspend); - ASSERT(is_nil(c_p->suspendee)); + /* Suspend c_p; when rp is suspended c_p will be resumed. */ + suspend_process(c_p, c_p); + c_p->flags |= F_P2PNR_RESCHED; + } + /* Yield (caller is assumed to yield immediately in bif). */ + erts_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + rp = ERTS_PROC_LOCK_BUSY; + } + else { + ErtsProcLocks need_locks = pid_locks & ~ERTS_PROC_LOCK_STATUS; + if (need_locks && erts_proc_trylock(rp, need_locks) == EBUSY) { + if ((ERTS_PSFLG_RUNNING_SYS|ERTS_PSFLG_DIRTY_RUNNING_SYS) + & erts_atomic32_read_nob(&rp->state)) { + /* Executing system task... */ + resume_process(rp, ERTS_PROC_LOCK_STATUS); + goto running; + } + erts_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + /* + * If we are unlucky, the process just got selected for + * execution of a system task. In this case we may be + * blocked here for quite a while... Execution of system + * tasks are fortunately quite rare events. We try to + * avoid this by checking if it is in a state executing + * system tasks (above), but it will not prevent all + * scenarios for a long block here... + */ + rp = erts_pid2proc(c_p, c_p_locks|ERTS_PROC_LOCK_STATUS, + pid, pid_locks|ERTS_PROC_LOCK_STATUS); + if (!rp) + goto done; + } - /* Suspend c_p; when rp is suspended c_p will be resumed. */ - suspend_process(c_p, c_p); - c_p->flags |= F_P2PNR_RESCHED; + /* + * The previous suspend has prevented the process + * from being selected for normal execution regardless + * of locks held or not held on it... + */ +#ifdef DEBUG + { + erts_aint32_t state; + state = erts_atomic32_read_nob(&rp->state); + ASSERT((state & ERTS_PSFLG_PENDING_EXIT) + || !(state & ERTS_PSFLG_RUNNING)); + } +#endif + + if (!suspend) + resume_process(rp, pid_locks|ERTS_PROC_LOCK_STATUS); } - /* Yield (caller is assumed to yield immediately in bif). */ - erts_smp_proc_unlock(rp, pid_locks|ERTS_PROC_LOCK_STATUS); - rp = ERTS_PROC_LOCK_BUSY; } done: + if (rp && rp != ERTS_PROC_LOCK_BUSY && !(pid_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); if (unlock_c_p_status) - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); return rp; } @@ -8132,17 +8786,6 @@ erts_pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks, } /* - * Like erts_pid2proc_not_running(), but hands over the process - * in a suspended state unless (c_p is looked up). - */ -Process * -erts_pid2proc_suspend(Process *c_p, ErtsProcLocks c_p_locks, - Eterm pid, ErtsProcLocks pid_locks) -{ - return pid2proc_not_running(c_p, c_p_locks, pid, pid_locks, 1); -} - -/* * erts_pid2proc_nropt() is normally the same as * erts_pid2proc_not_running(). However it is only * to be used when 'not running' is a pure optimization, @@ -8166,7 +8809,7 @@ do_bif_suspend_process(Process *c_p, { ASSERT(suspendee); ASSERT(!ERTS_PROC_IS_EXITING(suspendee)); - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(suspendee)); if (smon) { if (!smon->active) { @@ -8189,7 +8832,7 @@ handle_pend_bif_sync_suspend(Process *suspendee, { Process *suspender; - ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + ERTS_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); suspender = erts_pid2proc(suspendee, suspendee_locks, @@ -8216,8 +8859,9 @@ handle_pend_bif_sync_suspend(Process *suspendee, } /* suspender is suspended waiting for suspendee to suspend; resume suspender */ - resume_process(suspender); - erts_smp_proc_unlock(suspender, + ASSERT(suspender != suspendee); + resume_process(suspender, ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(suspender, ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS); } } @@ -8231,7 +8875,7 @@ handle_pend_bif_async_suspend(Process *suspendee, Process *suspender; - ERTS_SMP_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); + ERTS_LC_ASSERT(suspendee_locks & ERTS_PROC_LOCK_STATUS); suspender = erts_pid2proc(suspendee, suspendee_locks, @@ -8255,26 +8899,10 @@ handle_pend_bif_async_suspend(Process *suspendee, do_bif_suspend_process(suspendee, smon, suspendee); ASSERT(!smon || res != 0); } - erts_smp_proc_unlock(suspender, ERTS_PROC_LOCK_LINK); + erts_proc_unlock(suspender, ERTS_PROC_LOCK_LINK); } } -#else - -/* - * Non-smp version of erts_pid2proc_suspend(). - */ -Process * -erts_pid2proc_suspend(Process *c_p, ErtsProcLocks c_p_locks, - Eterm pid, ErtsProcLocks pid_locks) -{ - Process *rp = erts_pid2proc(c_p, c_p_locks, pid, pid_locks); - if (rp) - erts_suspend(rp, pid_locks, NULL); - return rp; -} - -#endif /* ERTS_SMP */ /* * The erlang:suspend_process/2 BIF @@ -8323,7 +8951,7 @@ suspend_process_2(BIF_ALIST_2) ? (ErtsProcLocks) 0 : ERTS_PROC_LOCK_STATUS); - erts_smp_proc_lock(BIF_P, xlocks); + erts_proc_lock(BIF_P, xlocks); suspendee = erts_pid2proc(BIF_P, ERTS_PROC_LOCK_MAIN|xlocks, @@ -8334,34 +8962,15 @@ suspend_process_2(BIF_ALIST_2) smon = erts_add_or_lookup_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); -#ifndef ERTS_SMP /* no ERTS_SMP */ - - /* This is really a piece of cake without SMP support... */ - if (!smon->active) { - erts_smp_atomic32_read_bor_nob(&suspendee->state, ERTS_PSFLG_SUSPENDED); - suspend_process(BIF_P, suspendee); - smon->active++; - res = am_true; - } - else if (unless_suspending) - res = am_false; - else if (smon->active == INT_MAX) - goto system_limit; - else { - smon->active++; - res = am_true; - } - -#else /* ERTS_SMP */ /* ... but a little trickier with SMP support ... */ if (asynchronous) { /* --- Asynchronous suspend begin ---------------------------------- */ - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_LINK + ERTS_LC_ASSERT(ERTS_PROC_LOCK_LINK & erts_proc_lc_my_proc_locks(BIF_P)); - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS == erts_proc_lc_my_proc_locks(suspendee)); if (smon->active) { @@ -8401,10 +9010,10 @@ suspend_process_2(BIF_ALIST_2) else /* if (!asynchronous) */ { /* --- Synchronous suspend begin ----------------------------------- */ - ERTS_SMP_LC_ASSERT(((ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS) + ERTS_LC_ASSERT(((ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS) & erts_proc_lc_my_proc_locks(BIF_P)) == (ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS)); - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS == erts_proc_lc_my_proc_locks(suspendee)); if (BIF_P->suspendee == BIF_ARG_1) { @@ -8470,10 +9079,9 @@ suspend_process_2(BIF_ALIST_2) /* --- Synchronous suspend end ------------------------------------- */ } -#endif /* ERTS_SMP */ #ifdef DEBUG { - erts_aint32_t state = erts_smp_atomic32_read_acqb(&suspendee->state); + erts_aint32_t state = erts_atomic32_read_acqb(&suspendee->state); ASSERT((state & ERTS_PSFLG_SUSPENDED) || (asynchronous && smon->pending)); ASSERT((state & ERTS_PSFLG_SUSPENDED) @@ -8481,8 +9089,8 @@ suspend_process_2(BIF_ALIST_2) } #endif - erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); - erts_smp_proc_unlock(BIF_P, xlocks); + erts_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(BIF_P, xlocks); BIF_RET(res); system_limit: @@ -8490,26 +9098,22 @@ suspend_process_2(BIF_ALIST_2) goto do_return; no_suspendee: -#ifdef ERTS_SMP BIF_P->suspendee = NIL; -#endif erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); badarg: ERTS_BIF_PREP_ERROR(res, BIF_P, BADARG); -#ifdef ERTS_SMP goto do_return; yield: ERTS_BIF_PREP_YIELD2(res, bif_export[BIF_suspend_process_2], BIF_P, BIF_ARG_1, BIF_ARG_2); -#endif do_return: if (suspendee) - erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); if (xlocks) - erts_smp_proc_unlock(BIF_P, xlocks); + erts_proc_unlock(BIF_P, xlocks); return res; } @@ -8529,7 +9133,7 @@ resume_process_1(BIF_ALIST_1) if (BIF_P->common.id == BIF_ARG_1) BIF_ERROR(BIF_P, BADARG); - erts_smp_proc_lock(BIF_P, ERTS_PROC_LOCK_LINK); + erts_proc_lock(BIF_P, ERTS_PROC_LOCK_LINK); smon = erts_lookup_suspend_monitor(BIF_P->suspend_monitors, BIF_ARG_1); if (!smon) { @@ -8575,16 +9179,17 @@ resume_process_1(BIF_ALIST_1) goto no_suspendee; ASSERT(ERTS_PSFLG_SUSPENDED - & erts_smp_atomic32_read_nob(&suspendee->state)); - resume_process(suspendee); + & erts_atomic32_read_nob(&suspendee->state)); + ASSERT(BIF_P != suspendee); + resume_process(suspendee, ERTS_PROC_LOCK_STATUS); - erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); } if (!smon->active && !smon->pending) erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); - erts_smp_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); + erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); BIF_RET(am_true); @@ -8593,74 +9198,148 @@ resume_process_1(BIF_ALIST_1) erts_delete_suspend_monitor(&BIF_P->suspend_monitors, BIF_ARG_1); error: - erts_smp_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); + erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_LINK); BIF_ERROR(BIF_P, BADARG); } +BIF_RETTYPE +erts_internal_is_process_executing_dirty_1(BIF_ALIST_1) +{ + if (is_not_internal_pid(BIF_ARG_1)) + BIF_ERROR(BIF_P, BADARG); + else { + Process *rp = erts_proc_lookup(BIF_ARG_1); + if (rp) { + erts_aint32_t state = erts_atomic32_read_nob(&rp->state); + if (state & (ERTS_PSFLG_DIRTY_RUNNING + |ERTS_PSFLG_DIRTY_RUNNING_SYS)) { + BIF_RET(am_true); + } + } + } + BIF_RET(am_false); +} + +static ERTS_INLINE void +run_queues_len_aux(ErtsRunQueue *rq, Uint *tot_len, Uint *qlen, int *ip, int incl_active_sched, int locked) +{ + Sint rq_len; + + if (locked) + rq_len = (Sint) erts_atomic32_read_dirty(&rq->len); + else + rq_len = (Sint) erts_atomic32_read_nob(&rq->len); + ASSERT(rq_len >= 0); + + if (incl_active_sched) { + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + erts_aint32_t dcnt; + if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(rq)) { + dcnt = erts_atomic32_read_nob(&dirty_count.cpu.active); + ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_cpu_schedulers); + } + else { + ASSERT(ERTS_RUNQ_IS_DIRTY_IO_RUNQ(rq)); + dcnt = erts_atomic32_read_nob(&dirty_count.io.active); + ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_io_schedulers); + } + rq_len += (Sint) dcnt; + } + else + { + if (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC) + rq_len++; + } + } + if (qlen) + qlen[(*ip)++] = rq_len; + *tot_len += (Uint) rq_len; +} + Uint -erts_run_queues_len(Uint *qlen) +erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched, + int incl_dirty_io) { - int i = 0; + int i = 0, j = 0; Uint len = 0; - ERTS_ATOMIC_FOREACH_RUNQ(rq, - { - Sint pqlen = 0; - int pix; - for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) - pqlen += RUNQ_READ_LEN(&rq->procs.prio_info[pix].len); + int no_rqs = erts_no_run_queues; + + if (incl_dirty_io) + no_rqs += ERTS_NUM_DIRTY_RUNQS; + else + no_rqs += ERTS_NUM_DIRTY_CPU_RUNQS; + + if (atomic_queues_read) { + ERTS_ATOMIC_FOREACH_RUNQ_X(rq, no_rqs, + run_queues_len_aux(rq, &len, qlen, &j, + incl_active_sched, 1), + /* Nothing... */); + } + else { + for (i = 0; i < no_rqs; i++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(i); + run_queues_len_aux(rq, &len, qlen, &j, incl_active_sched, 0); + } - if (pqlen < 0) - pqlen = 0; - if (qlen) - qlen[i++] = pqlen; - len += pqlen; } - ); return len; } Eterm -erts_process_status(Process *c_p, ErtsProcLocks c_p_locks, - Process *rp, Eterm rpid) +erts_process_state2status(erts_aint32_t state) +{ + if (state & ERTS_PSFLG_FREE) + return am_free; + + if (state & ERTS_PSFLG_EXITING) + return am_exiting; + + if (state & ERTS_PSFLG_GC) + return am_garbage_collecting; + + if (state & ERTS_PSFLG_SUSPENDED) + return am_suspended; + + if (state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) + return am_running; + + if (state & (ERTS_PSFLG_ACTIVE + | ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + return am_runnable; + + return am_waiting; +} + +Eterm +erts_process_status(Process *rp, Eterm rpid) { Eterm res = am_undefined; Process *p = rp ? rp : erts_proc_lookup_raw(rpid); if (p) { - erts_aint32_t state = erts_smp_atomic32_read_acqb(&p->state); - if (state & ERTS_PSFLG_FREE) - res = am_free; - else if (state & ERTS_PSFLG_EXITING) - res = am_exiting; - else if (state & ERTS_PSFLG_GC) - res = am_garbage_collecting; - else if (state & ERTS_PSFLG_SUSPENDED) - res = am_suspended; - else if (state & ERTS_PSFLG_RUNNING) - res = am_running; - else if (state & ERTS_PSFLG_ACTIVE) - res = am_runnable; - else - res = am_waiting; + erts_aint32_t state = erts_atomic32_read_acqb(&p->state); + res = erts_process_state2status(state); } -#ifdef ERTS_SMP else { int i; ErtsSchedulerData *esdp; for (i = 0; i < erts_no_schedulers; i++) { esdp = ERTS_SCHEDULER_IX(i); - erts_smp_runq_lock(esdp->run_queue); + erts_runq_lock(esdp->run_queue); if (esdp->free_process && esdp->free_process->common.id == rpid) { res = am_free; - erts_smp_runq_unlock(esdp->run_queue); + erts_runq_unlock(esdp->run_queue); break; } - erts_smp_runq_unlock(esdp->run_queue); + erts_runq_unlock(esdp->run_queue); } } -#endif return res; } @@ -8676,9 +9355,9 @@ erts_suspend(Process* c_p, ErtsProcLocks c_p_locks, Port *busy_port) int suspend; ASSERT(c_p == erts_get_current_process()); - ERTS_SMP_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); + ERTS_LC_ASSERT(c_p_locks == erts_proc_lc_my_proc_locks(c_p)); if (!(c_p_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); if (busy_port) suspend = erts_save_suspend_process_on_port(busy_port, c_p); @@ -8694,7 +9373,7 @@ erts_suspend(Process* c_p, ErtsProcLocks c_p_locks, Port *busy_port) } if (!(c_p_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); if (suspend && busy_port && erts_system_monitor_flags.busy_port) monitor_generic(c_p, am_busy_port, busy_port->common.id); @@ -8703,12 +9382,12 @@ erts_suspend(Process* c_p, ErtsProcLocks c_p_locks, Port *busy_port) void erts_resume(Process* process, ErtsProcLocks process_locks) { - ERTS_SMP_LC_ASSERT(process_locks == erts_proc_lc_my_proc_locks(process)); + ERTS_LC_ASSERT(process_locks == erts_proc_lc_my_proc_locks(process)); if (!(process_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_lock(process, ERTS_PROC_LOCK_STATUS); - resume_process(process); + erts_proc_lock(process, ERTS_PROC_LOCK_STATUS); + resume_process(process, process_locks|ERTS_PROC_LOCK_STATUS); if (!(process_locks & ERTS_PROC_LOCK_STATUS)) - erts_smp_proc_unlock(process, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(process, ERTS_PROC_LOCK_STATUS); } int @@ -8725,10 +9404,10 @@ erts_resume_processes(ErtsProcList *list) proc = erts_pid2proc(NULL, 0, plp->pid, ERTS_PROC_LOCK_STATUS); if (proc) { if (erts_proclist_same(plp, proc)) { - resume_process(proc); + resume_process(proc, ERTS_PROC_LOCK_STATUS); nresumed++; } - erts_smp_proc_unlock(proc, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(proc, ERTS_PROC_LOCK_STATUS); } fplp = plp; plp = plp->next; @@ -8740,7 +9419,7 @@ erts_resume_processes(ErtsProcList *list) Eterm erts_get_process_priority(Process *p) { - erts_aint32_t state = erts_smp_atomic32_read_nob(&p->state); + erts_aint32_t state = erts_atomic32_read_nob(&p->state); switch (ERTS_PSFLGS_GET_USR_PRIO(state)) { case PRIORITY_MAX: return am_max; case PRIORITY_HIGH: return am_high; @@ -8763,7 +9442,7 @@ erts_set_process_priority(Process *p, Eterm value) default: return THE_NON_VALUE; break; } - a = erts_smp_atomic32_read_nob(&p->state); + a = erts_atomic32_read_nob(&p->state); if (nprio == ERTS_PSFLGS_GET_USR_PRIO(a)) oprio = nprio; else { @@ -8771,7 +9450,7 @@ erts_set_process_priority(Process *p, Eterm value) erts_aint32_t e, n, aprio; if (a & ERTS_PSFLG_ACTIVE_SYS) { - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); slocked = 1; } @@ -8785,7 +9464,7 @@ erts_set_process_priority(Process *p, Eterm value) int max_qbit; if (!slocked) { - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); slocked = 1; } @@ -8826,8 +9505,12 @@ erts_set_process_priority(Process *p, Eterm value) n |= ((nprio << ERTS_PSFLGS_USR_PRIO_OFFSET) | (aprio << ERTS_PSFLGS_ACT_PRIO_OFFSET)); - a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); + a = erts_atomic32_cmpxchg_mb(&p->state, n, e); } while (a != e); + + if (slocked) + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } switch (oprio) { @@ -8839,6 +9522,27 @@ erts_set_process_priority(Process *p, Eterm value) } } +#ifdef __WIN32__ +Sint64 +erts_time2reds(ErtsMonotonicTime start, ErtsMonotonicTime end) +{ + return ERTS_TIME2REDS_IMPL__(start, end); +} +#endif + +static int +scheduler_gc_proc(Process *c_p, int reds_left) +{ + int fcalls, reds; + if (!ERTS_PROC_GET_SAVED_CALLS_BUF(c_p)) + fcalls = reds_left; + else + fcalls = reds_left - CONTEXT_REDS; + reds = erts_garbage_collect_nobump(c_p, 0, c_p->arg_reg, c_p->arity, fcalls); + ASSERT(reds_left >= reds); + return reds; +} + /* * schedule() is called from BEAM (process_main()) or HiPE * (hipe_mode_switch()) when the current process is to be @@ -8857,19 +9561,19 @@ erts_set_process_priority(Process *p, Eterm value) * so that normal processes get to run more frequently. */ -Process *schedule(Process *p, int calls) +Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) { Process *proxy_p = NULL; ErtsRunQueue *rq; - erts_aint_t dt; - ErtsSchedulerData *esdp; int context_reds; int fcalls; - int input_reductions; int actual_reds; int reds; Uint32 flags; erts_aint32_t state = 0; /* Supress warning... */ + int is_normal_sched; + + ERTS_MSACC_DECLARE_CACHE(); #ifdef USE_VM_PROBES if (p != NULL && DTRACE_ENABLED(process_unscheduled)) { @@ -8882,145 +9586,166 @@ Process *schedule(Process *p, int calls) if (ERTS_USE_MODIFIED_TIMING()) { context_reds = ERTS_MODIFIED_TIMING_CONTEXT_REDS; - input_reductions = ERTS_MODIFIED_TIMING_INPUT_REDS; } else { context_reds = CONTEXT_REDS; - input_reductions = INPUT_REDUCTIONS; } - ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data()) + ERTS_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data()) || !erts_thr_progress_is_blocking()); /* * Clean up after the process being scheduled out. */ if (!p) { /* NULL in the very first schedule() call */ - esdp = erts_get_scheduler_data(); + is_normal_sched = !esdp; + if (is_normal_sched) { + esdp = erts_get_scheduler_data(); + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); + } + else { + ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp)); + } rq = erts_get_runq_current(esdp); ASSERT(esdp); - fcalls = (int) erts_smp_atomic32_read_acqb(&function_calls); actual_reds = reds = 0; - erts_smp_runq_lock(rq); + erts_runq_lock(rq); } else { - sched_out_proc: - -#ifdef ERTS_SMP - ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); - esdp = p->scheduler_data; + is_normal_sched = !esdp; + if (is_normal_sched) { + esdp = p->scheduler_data; + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); + } + else { + ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp)); + } ASSERT(esdp->current_process == p || esdp->free_process == p); -#else - esdp = erts_scheduler_data; - ASSERT(esdp->current_process == p); -#endif + + sched_out_proc: + + ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + reds = actual_reds = calls - esdp->virtual_reds; + + ASSERT(actual_reds >= 0); if (reds < ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST) reds = ERTS_PROC_MIN_CONTEXT_SWITCH_REDS_COST; esdp->virtual_reds = 0; - fcalls = (int) erts_smp_atomic32_add_read_acqb(&function_calls, reds); ASSERT(esdp && esdp == erts_get_scheduler_data()); rq = erts_get_runq_current(esdp); p->reds += actual_reds; - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); - - state = erts_smp_atomic32_read_acqb(&p->state); + state = erts_atomic32_read_nob(&p->state); if (IS_TRACED(p)) { if (IS_TRACED_FL(p, F_TRACE_CALLS) && !(state & ERTS_PSFLG_FREE)) erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_OUT); - if (state & (ERTS_PSFLG_FREE|ERTS_PSFLG_EXITING)) { + if ((state & (ERTS_PSFLG_FREE|ERTS_PSFLG_EXITING)) == ERTS_PSFLG_EXITING) { if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) - trace_sched(p, ((state & ERTS_PSFLG_FREE) - ? am_out_exited - : am_out_exiting)); + trace_sched(p, ERTS_PROC_LOCK_MAIN, + ((state & ERTS_PSFLG_FREE) + ? am_out_exited + : am_out_exiting)); } else { - if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED)) - trace_sched(p, am_out); - else if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) - trace_virtual_sched(p, am_out); + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED) || + ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) + trace_sched(p, ERTS_PROC_LOCK_MAIN, am_out); } } -#ifdef ERTS_SMP - if (state & ERTS_PSFLG_PENDING_EXIT) + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + + if (p->trace_msg_q) { + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + erts_schedule_flush_trace_messages(p, 1); + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + } + + /* have to re-read state after taking lock */ + state = erts_atomic32_read_nob(&p->state); + + if (is_normal_sched && (state & ERTS_PSFLG_PENDING_EXIT)) erts_handle_pending_exit(p, (ERTS_PROC_LOCK_MAIN + | ERTS_PROC_LOCK_TRACE | ERTS_PROC_LOCK_STATUS)); if (p->pending_suspenders) handle_pending_suspend(p, (ERTS_PROC_LOCK_MAIN + | ERTS_PROC_LOCK_TRACE | ERTS_PROC_LOCK_STATUS)); -#endif esdp->reductions += reds; - schedule_out_process(rq, state, p, proxy_p); /* Returns with rq locked! */ - proxy_p = NULL; - - ERTS_PROC_REDUCTIONS_EXECUTED(rq, - (int) ERTS_PSFLGS_GET_USR_PRIO(state), - reds, - actual_reds); - - esdp->current_process = NULL; -#ifdef ERTS_SMP - p->scheduler_data = NULL; -#endif - - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + { + int dec_refc; + + /* schedule_out_process() returns with rq locked! */ + dec_refc = schedule_out_process(rq, state, p, + proxy_p, is_normal_sched); + proxy_p = NULL; + + ERTS_PROC_REDUCTIONS_EXECUTED(esdp, rq, + (int) ERTS_PSFLGS_GET_USR_PRIO(state), + reds, + actual_reds); + + esdp->current_process = NULL; + if (is_normal_sched) + p->scheduler_data = NULL; + + erts_proc_unlock(p, (ERTS_PROC_LOCK_MAIN + | ERTS_PROC_LOCK_STATUS + | ERTS_PROC_LOCK_TRACE)); + + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); + + if (state & ERTS_PSFLG_FREE) { + if (!is_normal_sched) { + ASSERT(p->flags & F_DELAYED_DEL_PROC); + } + else { + ASSERT(esdp->free_process == p); + esdp->free_process = NULL; + } + } - if (state & ERTS_PSFLG_FREE) { -#ifdef ERTS_SMP - ASSERT(esdp->free_process == p); - esdp->free_process = NULL; -#else - state = erts_smp_atomic32_read_nob(&p->state); - if (!(state & ERTS_PSFLG_IN_RUNQ)) - erts_free_proc(p); -#endif - } + if (dec_refc) + erts_proc_dec_refc(p); + } -#ifdef ERTS_SMP ASSERT(!esdp->free_process); -#endif ASSERT(!esdp->current_process); - ERTS_SMP_CHK_NO_PROC_LOCKS; - - dt = erts_do_time_read_and_reset(); - if (dt) { - erts_smp_runq_unlock(rq); - erts_bump_timer(dt); - erts_smp_runq_lock(rq); - } - BM_STOP_TIMER(system); + ERTS_CHK_NO_PROC_LOCKS; } - ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) - || !erts_thr_progress_is_blocking()); + ERTS_LC_ASSERT(!is_normal_sched || !erts_thr_progress_is_blocking()); check_activities_to_run: { -#ifdef ERTS_SMP + erts_aint32_t psflg_running, psflg_running_sys; ErtsMigrationPaths *mps; ErtsMigrationPath *mp; - ErtsProcList *pnd_xtrs = rq->procs.pending_exiters; - if (erts_proclist_fetch(&pnd_xtrs, NULL)) { - rq->procs.pending_exiters = NULL; - erts_smp_runq_unlock(rq); - handle_pending_exiters(pnd_xtrs); - erts_smp_runq_lock(rq); - } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (is_normal_sched) { + + if (esdp->check_time_reds >= ERTS_CHECK_TIME_REDS) + (void) erts_get_monotonic_time(esdp); + + if (esdp->last_monotonic_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { + erts_runq_unlock(rq); + erts_bump_timers(esdp->timer_wheel, esdp->last_monotonic_time); + erts_runq_lock(rq); + } + if (rq->check_balance_reds <= 0) check_balance(rq); - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + ERTS_LC_ASSERT(!erts_thr_progress_is_blocking()); mps = erts_get_migration_paths_managed(); mp = &mps->mpath[rq->ix]; @@ -9029,79 +9754,66 @@ Process *schedule(Process *p, int calls) immigrate(rq, mp); } - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); continue_check_activities_to_run: flags = ERTS_RUNQ_FLGS_GET_NOB(rq); continue_check_activities_to_run_known_flags: - ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) - || flags & ERTS_RUNQ_FLG_NONEMPTY); + ASSERT(!is_normal_sched || (flags & ERTS_RUNQ_FLG_NONEMPTY)); - if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND|ERTS_RUNQ_FLG_SUSPENDED)) { - if (flags & ERTS_RUNQ_FLG_SUSPENDED) { + if (!is_normal_sched) { + if (erts_atomic32_read_acqb(&esdp->ssi->flags) + & (ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) { suspend_scheduler(esdp); - flags = ERTS_RUNQ_FLGS_GET_NOB(rq); - } - if (flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) { - flags = ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_CHK_CPU_BIND); - flags &= ~ ERTS_RUNQ_FLG_CHK_CPU_BIND; - erts_sched_check_cpu_bind(esdp); } } -#ifdef ERTS_DIRTY_SCHEDULERS - else if (ERTS_SCHEDULER_IS_DIRTY(esdp) - && (erts_smp_atomic32_read_acqb(&esdp->ssi->flags) - & ERTS_SSI_FLG_SUSPENDED)) - suspend_scheduler(esdp); -#endif - - { + else { erts_aint32_t aux_work; - int leader_update = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 - : erts_thr_progress_update(esdp); + int leader_update; + + ASSERT(is_normal_sched); + + if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND + | ERTS_RUNQ_FLG_SUSPENDED + | ERTS_RUNQ_FLG_MSB_EXEC)) { + if (flags & (ERTS_RUNQ_FLG_SUSPENDED|ERTS_RUNQ_FLG_MSB_EXEC)) { + (void) ERTS_RUNQ_FLGS_UNSET_NOB(rq, ERTS_RUNQ_FLG_EXEC); + suspend_scheduler(esdp); + flags = ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_EXEC); + flags |= ERTS_RUNQ_FLG_EXEC; + } + if (flags & ERTS_RUNQ_FLG_CHK_CPU_BIND) { + flags = ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_CHK_CPU_BIND); + flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND; + erts_sched_check_cpu_bind(esdp); + } + } + + leader_update = erts_thr_progress_update(esdp); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); - if (aux_work | leader_update | ERTS_SCHED_FAIR) { - erts_smp_runq_unlock(rq); + if (aux_work | leader_update) { + erts_runq_unlock(rq); if (leader_update) erts_thr_progress_leader_update(esdp); - else if (ERTS_SCHED_FAIR) - ERTS_SCHED_FAIR_YIELD(); if (aux_work) handle_aux_work(&esdp->aux_work_data, aux_work, 0); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); } - ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) - || !erts_thr_progress_is_blocking()); + ERTS_LC_ASSERT(!erts_thr_progress_is_blocking()); } - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); -#else /* ERTS_SMP */ - { - erts_aint32_t aux_work; - aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); - if (aux_work) - handle_aux_work(&esdp->aux_work_data, aux_work, 0); - } -#endif /* ERTS_SMP */ flags = ERTS_RUNQ_FLGS_GET_NOB(rq); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && rq->halt_in_progress) { - /* - * TODO: if halt in progress, need to put the dirty scheduler - * to sleep somewhere around here to prevent it from picking up - * new work - */ + if (!is_normal_sched & !!(flags & ERTS_RUNQ_FLG_HALTING)) { + /* Wait for emulator to terminate... */ + while (1) + erts_milli_sleep(1000*1000); } - else -#endif - - if ((!(flags & ERTS_RUNQ_FLGS_QMASK) && !rq->misc.start) - || (rq->halt_in_progress && ERTS_EMPTY_RUNQ_PORTS(rq))) { + else if (!runq_got_work_to_execute_flags(flags)) { /* Prepare for scheduler wait */ -#ifdef ERTS_SMP - ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); + ERTS_LC_ASSERT(erts_lc_runq_is_locked(rq)); rq->wakeup_other = 0; rq->wakeup_other_reds = 0; @@ -9112,94 +9824,69 @@ Process *schedule(Process *p, int calls) if (flags & ERTS_RUNQ_FLG_INACTIVE) empty_runq(rq); else { - if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && try_steal_task(rq)) - goto continue_check_activities_to_run; - - empty_runq(rq); - - /* - * Check for ERTS_RUNQ_FLG_SUSPENDED has to be done - * after trying to steal a task. - */ - flags = ERTS_RUNQ_FLGS_GET_NOB(rq); - if (flags & ERTS_RUNQ_FLG_SUSPENDED) { - non_empty_runq(rq); - flags |= ERTS_RUNQ_FLG_NONEMPTY; - goto continue_check_activities_to_run_known_flags; + ASSERT(!runq_got_work_to_execute(rq)); + if (!is_normal_sched) { + /* Dirty scheduler */ + if (erts_atomic32_read_acqb(&esdp->ssi->flags) + & (ERTS_SSI_FLG_SUSPENDED|ERTS_SSI_FLG_MSB_EXEC)) { + /* Go suspend... */ + goto continue_check_activities_to_run_known_flags; + } + } + else { + /* Normal scheduler */ + if (try_steal_task(rq)) + goto continue_check_activities_to_run; + /* + * Check for suspend has to be done after trying + * to steal a task... + */ + flags = ERTS_RUNQ_FLGS_GET_NOB(rq); + if ((flags & ERTS_RUNQ_FLG_SUSPENDED) + /* If multi scheduling block and we have + * dirty work, suspend and let dirty + * scheduler handle work... */ + || ((((flags & (ERTS_RUNQ_FLG_HALTING + | ERTS_RUNQ_FLG_MSB_EXEC)) + == ERTS_RUNQ_FLG_MSB_EXEC)) + && have_dirty_work()) + ) { + non_empty_runq(rq); + flags |= ERTS_RUNQ_FLG_NONEMPTY; + /* + * Go suspend... + */ + goto continue_check_activities_to_run_known_flags; + } } + empty_runq(rq); } -#endif + (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_EXEC); scheduler_wait(&fcalls, esdp, rq); - -#ifdef ERTS_SMP + flags = ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_EXEC); + flags |= ERTS_RUNQ_FLG_EXEC; + ERTS_MSACC_UPDATE_CACHE(); non_empty_runq(rq); -#endif goto check_activities_to_run; } - else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && - (fcalls > input_reductions && - prepare_for_sys_schedule(esdp))) { - /* - * Schedule system-level activities. - */ - erts_smp_atomic32_set_relb(&function_calls, 0); - fcalls = 0; - - ASSERT(!erts_port_task_have_outstanding_io_tasks()); - -#if 0 /* Not needed since we wont wait in sys schedule */ - erts_sys_schedule_interrupt(0); -#endif - erts_smp_runq_unlock(rq); - erl_sys_schedule(1); - dt = erts_do_time_read_and_reset(); - if (dt) erts_bump_timer(dt); - -#ifdef ERTS_SMP - erts_smp_runq_lock(rq); - clear_sys_scheduling(); - goto continue_check_activities_to_run; -#else - goto check_activities_to_run; -#endif - } - - if (rq->misc.start) + if (flags & ERTS_RUNQ_FLG_MISC_OP) exec_misc_ops(rq); -#ifdef ERTS_SMP wakeup_other.check(rq, flags); -#endif /* * Find a new port to run. */ - if (RUNQ_READ_LEN(&rq->ports.info.len)) { - int have_outstanding_io; - have_outstanding_io = erts_port_task_execute(rq, &esdp->current_port); - if ((have_outstanding_io && fcalls > 2*input_reductions) - || rq->halt_in_progress) { - /* - * If we have performed more than 2*INPUT_REDUCTIONS since - * last call to erl_sys_schedule() and we still haven't - * handled all I/O tasks we stop running processes and - * focus completely on ports. - * - * One could argue that this is a strange behavior. The - * reason for doing it this way is that it is similar - * to the behavior before port tasks were introduced. - * We don't want to change the behavior too much, at - * least not at the time of writing. This behavior - * might change in the future. - * - * /rickard - */ - goto check_activities_to_run; - } + flags = ERTS_RUNQ_FLGS_GET_NOB(rq); + + if (flags & PORT_BIT) { + erts_port_task_execute(rq, &esdp->current_port); + if (flags & ERTS_RUNQ_FLG_HALTING) + goto check_activities_to_run; } /* @@ -9208,11 +9895,12 @@ Process *schedule(Process *p, int calls) pick_next_process: { erts_aint32_t psflg_band_mask; int prio_q; - int qmask; + int qmask, qbit; flags = ERTS_RUNQ_FLGS_GET_NOB(rq); qmask = (int) (flags & ERTS_RUNQ_FLGS_PROCS_QMASK); - switch (qmask & -qmask) { + qbit = qmask & -qmask; + switch (qbit) { case MAX_BIT: prio_q = PRIORITY_MAX; break; @@ -9228,11 +9916,10 @@ Process *schedule(Process *p, int calls) case 0: /* No process at all */ default: ASSERT(qmask == 0); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); goto check_activities_to_run; } - BM_START_TIMER(system); - /* * Take the chosen process out of the queue. */ @@ -9240,70 +9927,95 @@ Process *schedule(Process *p, int calls) ASSERT(p); /* Wrong qmask in rq->flags? */ - psflg_band_mask = ~(((erts_aint32_t) 1) << (ERTS_PSFLGS_GET_PRQ_PRIO(state) - + ERTS_PSFLGS_IN_PRQ_MASK_OFFSET)); - -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT((state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) != - (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)); - if (state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) { - ASSERT((ERTS_SCHEDULER_IS_DIRTY_CPU(esdp) && (state & ERTS_PSFLG_DIRTY_CPU_PROC)) || - (ERTS_SCHEDULER_IS_DIRTY_IO(esdp) && (state & ERTS_PSFLG_DIRTY_IO_PROC))); - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !(state & ERTS_PSFLG_ACTIVE_SYS)) - goto pick_next_process; - state &= ~(ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q); + if (is_normal_sched) { + psflg_running = ERTS_PSFLG_RUNNING; + psflg_running_sys = ERTS_PSFLG_RUNNING_SYS; + psflg_band_mask = ~(((erts_aint32_t) 1) << (ERTS_PSFLGS_GET_PRQ_PRIO(state) + + ERTS_PSFLGS_IN_PRQ_MASK_OFFSET)); + } + else { + psflg_running = ERTS_PSFLG_DIRTY_RUNNING; + psflg_running_sys = ERTS_PSFLG_DIRTY_RUNNING_SYS; + psflg_band_mask = ~((erts_aint32_t) 0); } -#endif if (!(state & ERTS_PSFLG_PROXY)) psflg_band_mask &= ~ERTS_PSFLG_IN_RUNQ; else { + Eterm pid = p->common.id; proxy_p = p; - p = erts_proc_lookup_raw(proxy_p->common.id); + p = (is_normal_sched + ? erts_proc_lookup_raw(pid) + : erts_pid2proc_opt(NULL, 0, pid, 0, + ERTS_P2P_FLG_INC_REFC)); if (!p) { free_proxy_proc(proxy_p); proxy_p = NULL; goto pick_next_process; } - state = erts_smp_atomic32_read_nob(&p->state); + state = erts_atomic32_read_nob(&p->state); } + if (!is_normal_sched) + clear_proc_dirty_queue_bit(p, rq, qbit); + while (1) { - erts_aint32_t exp, new, tmp; - tmp = new = exp = state; + erts_aint32_t exp, new; + int run_process; + new = exp = state; new &= psflg_band_mask; - if (!(state & (ERTS_PSFLG_RUNNING - | ERTS_PSFLG_RUNNING_SYS))) { - tmp = state & (ERTS_PSFLG_SUSPENDED - | ERTS_PSFLG_PENDING_EXIT - | ERTS_PSFLG_ACTIVE_SYS); - if (tmp != ERTS_PSFLG_SUSPENDED) { - if (state & ERTS_PSFLG_ACTIVE_SYS) - new |= ERTS_PSFLG_RUNNING_SYS; - else - new |= ERTS_PSFLG_RUNNING; - } + /* + * Run process if not already running (or free) + * or exiting and not running on a normal + * scheduler, and not suspended (and not in a + * state where suspend should be ignored). + */ + run_process = (((!(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS + | ERTS_PSFLG_FREE))) + | (((state & (ERTS_PSFLG_RUNNING + + | ERTS_PSFLG_FREE + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING_SYS + | ERTS_PSFLG_EXITING)) + == ERTS_PSFLG_EXITING) + & (!!is_normal_sched)) + ) + & ((state & (ERTS_PSFLG_SUSPENDED + | ERTS_PSFLG_EXITING + | ERTS_PSFLG_FREE + | ERTS_PSFLG_PENDING_EXIT + | ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + != ERTS_PSFLG_SUSPENDED) + & (!(state & (ERTS_PSFLG_EXITING + | ERTS_PSFLG_PENDING_EXIT)) + | (!!is_normal_sched)) + ); + + if (run_process) { + if (state & (ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + new |= psflg_running_sys; + else + new |= psflg_running; } - state = erts_smp_atomic32_cmpxchg_relb(&p->state, new, exp); + state = erts_atomic32_cmpxchg_relb(&p->state, new, exp); if (state == exp) { - if ((state & (ERTS_PSFLG_RUNNING - | ERTS_PSFLG_RUNNING_SYS - | ERTS_PSFLG_FREE)) - || ((state & (ERTS_PSFLG_SUSPENDED - | ERTS_PSFLG_PENDING_EXIT - | ERTS_PSFLG_ACTIVE_SYS)) - == ERTS_PSFLG_SUSPENDED)) { - if (state & ERTS_PSFLG_FREE) { -#ifdef ERTS_SMP - erts_smp_proc_dec_refc(p); -#else - erts_free_proc(p); -#endif - } + if (!run_process) { if (proxy_p) { free_proxy_proc(proxy_p); proxy_p = NULL; } + else if (state & ERTS_PSFLG_FREE) { + /* free and not queued by proxy */ + erts_proc_dec_refc(p); + } + if (!is_normal_sched) + erts_proc_dec_refc(p); goto pick_next_process; } state = new; @@ -9315,145 +10027,219 @@ Process *schedule(Process *p, int calls) esdp->current_process = p; + calls = 0; + reds = context_reds; + + erts_runq_unlock(rq); + } -#ifdef ERTS_SMP - erts_smp_runq_unlock(rq); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_EMULATOR); + if (flags & ERTS_RUNQ_FLG_PROTECTED) (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); - ERTS_SMP_CHK_NO_PROC_LOCKS; + ERTS_CHK_NO_PROC_LOCKS; - erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + erts_proc_lock(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); + + state = erts_atomic32_read_nob(&p->state); if (erts_sched_stat.enabled) { int prio; - UWord old = ERTS_PROC_SCHED_ID(p, - (ERTS_PROC_LOCK_MAIN - | ERTS_PROC_LOCK_STATUS), - (UWord) esdp->no); + UWord old = ERTS_PROC_SCHED_ID(p, (UWord) esdp->no); int migrated = old && old != esdp->no; -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); -#endif + ASSERT(is_normal_sched); prio = (int) ERTS_PSFLGS_GET_USR_PRIO(state); - erts_smp_spin_lock(&erts_sched_stat.lock); + erts_spin_lock(&erts_sched_stat.lock); erts_sched_stat.prio[prio].total_executed++; erts_sched_stat.prio[prio].executed++; if (migrated) { erts_sched_stat.prio[prio].total_migrated++; erts_sched_stat.prio[prio].migrated++; } - erts_smp_spin_unlock(&erts_sched_stat.lock); + erts_spin_unlock(&erts_sched_stat.lock); } - if (ERTS_PROC_PENDING_EXIT(p)) { + state = erts_atomic32_read_nob(&p->state); + + if (is_normal_sched) { + if ((!!(state & ERTS_PSFLGS_DIRTY_WORK)) + & (!(state & ERTS_PSFLG_ACTIVE_SYS))) { + /* Migrate to dirty scheduler... */ + sunlock_sched_out_proc: + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + goto sched_out_proc; + } + ASSERT(!p->scheduler_data); + p->scheduler_data = esdp; + } + else { + if (state & (ERTS_PSFLG_ACTIVE_SYS + | ERTS_PSFLG_PENDING_EXIT + | ERTS_PSFLG_EXITING)) { + /* + * IMPORTANT! We need to take care of + * scheduled check-process-code requests + * before continuing with dirty execution! + */ + /* Migrate to normal scheduler... */ + goto sunlock_sched_out_proc; + } + if ((state & ERTS_PSFLG_DIRTY_ACTIVE_SYS) + && rq == ERTS_DIRTY_IO_RUNQ) { + /* Migrate to dirty cpu scheduler... */ + goto sunlock_sched_out_proc; + } + + ASSERT(rq == ERTS_DIRTY_CPU_RUNQ + ? (state & (ERTS_PSFLG_DIRTY_CPU_PROC + | ERTS_PSFLG_DIRTY_ACTIVE_SYS)) + : (rq == ERTS_DIRTY_IO_RUNQ + && (state & ERTS_PSFLG_DIRTY_IO_PROC))); + } + + if (state & ERTS_PSFLG_PENDING_EXIT) { erts_handle_pending_exit(p, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS); - state = erts_smp_atomic32_read_nob(&p->state); + state = erts_atomic32_read_nob(&p->state); } - ASSERT(!p->scheduler_data); - p->scheduler_data = esdp; -#endif - reds = context_reds; - if (IS_TRACED(p)) { + + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + + /* Clear tracer if it has been removed */ + if (IS_TRACED(p) && erts_is_tracer_proc_enabled( + p, ERTS_PROC_LOCK_MAIN, &p->common)) { + if (state & ERTS_PSFLG_EXITING) { if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_EXIT)) - trace_sched(p, am_in_exiting); + trace_sched(p, ERTS_PROC_LOCK_MAIN, am_in_exiting); } else { - if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED)) - trace_sched(p, am_in); - else if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) - trace_virtual_sched(p, am_in); + if (ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED) || + ARE_TRACE_FLAGS_ON(p, F_TRACE_SCHED_PROCS)) + trace_sched(p, ERTS_PROC_LOCK_MAIN, am_in); } if (IS_TRACED_FL(p, F_TRACE_CALLS)) { erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_IN); } } - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); - -#ifdef ERTS_SMP - if (is_not_nil(ERTS_TRACER_PROC(p))) - erts_check_my_tracer_proc(p); -#endif - - if (state & ERTS_PSFLG_RUNNING_SYS) { - reds -= execute_sys_tasks(p, &state, reds); - if (reds <= 0 -#ifdef ERTS_DIRTY_SCHEDULERS - || (state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) -#endif - ) { - p->fcalls = reds; - goto sched_out_proc; - } - - ASSERT(state & ERTS_PSFLG_RUNNING_SYS); - ASSERT(!(state & ERTS_PSFLG_RUNNING)); - - while (1) { - erts_aint32_t n, e; - - if (((state & (ERTS_PSFLG_SUSPENDED - | ERTS_PSFLG_ACTIVE)) != ERTS_PSFLG_ACTIVE) - && !(state & ERTS_PSFLG_EXITING)) - goto sched_out_proc; - - n = e = state; - n &= ~ERTS_PSFLG_RUNNING_SYS; - n |= ERTS_PSFLG_RUNNING; - - state = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); - if (state == e) { - state = n; - break; - } - - ASSERT(state & ERTS_PSFLG_RUNNING_SYS); - ASSERT(!(state & ERTS_PSFLG_RUNNING)); - } - } + if (is_normal_sched) { + + if (state & ERTS_PSFLG_RUNNING_SYS) { + /* + * GC is normally never delayed when a process + * is scheduled out, but might be when executing + * hand written beam assembly in + * prim_eval:'receive'. If GC is delayed we are + * not allowed to execute system tasks. + */ + if (!(p->flags & F_DELAY_GC)) { + int cost = execute_sys_tasks(p, &state, reds); + calls += cost; + reds -= cost; + if (reds <= 0) + goto sched_out_proc; + if (state & ERTS_PSFLGS_DIRTY_WORK) + goto sched_out_proc; + } + + ASSERT(state & psflg_running_sys); + ASSERT(!(state & psflg_running)); + + while (1) { + erts_aint32_t n, e; + + if (((state & (ERTS_PSFLG_SUSPENDED + | ERTS_PSFLG_ACTIVE)) != ERTS_PSFLG_ACTIVE) + && !(state & ERTS_PSFLG_EXITING)) { + goto sched_out_proc; + } + + n = e = state; + n &= ~psflg_running_sys; + n |= psflg_running; + + state = erts_atomic32_cmpxchg_mb(&p->state, n, e); + if (state == e) { + state = n; + break; + } + + ASSERT(state & psflg_running_sys); + ASSERT(!(state & psflg_running)); + } + } - if (!(state & ERTS_PSFLG_EXITING) - && ((FLAGS(p) & F_FORCE_GC) - || (MSO(p).overhead > BIN_VHEAP_SZ(p)))) { - reds -= erts_garbage_collect(p, 0, p->arg_reg, p->arity); - if (reds <= 0) { - p->fcalls = reds; - goto sched_out_proc; - } - } + if (ERTS_IS_GC_DESIRED(p)) { + if (!(state & ERTS_PSFLG_EXITING) + && !(p->flags & (F_DELAY_GC|F_DISABLE_GC))) { + int cost = scheduler_gc_proc(p, reds); + calls += cost; + reds -= cost; + if (reds <= 0) + goto sched_out_proc; + if (p->flags & (F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC)) + goto sched_out_proc; + } + } + } if (proxy_p) { free_proxy_proc(proxy_p); proxy_p = NULL; } - + p->fcalls = reds; - ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + + ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); /* Never run a suspended process */ - ASSERT(!(ERTS_PSFLG_SUSPENDED & erts_smp_atomic32_read_nob(&p->state))); +#ifdef DEBUG + { + erts_aint32_t dstate = erts_atomic32_read_nob(&p->state); + ASSERT(!(ERTS_PSFLG_SUSPENDED & dstate) + || (ERTS_PSFLG_DIRTY_RUNNING_SYS & dstate)); + } +#endif + + ASSERT(erts_proc_read_refc(p) > 0); + + if (!(state & ERTS_PSFLG_EXITING) && ERTS_PTMR_IS_TIMED_OUT(p)) { + BeamInstr** pi; + ETHR_MEMBAR(ETHR_LoadLoad|ETHR_LoadStore); + pi = (BeamInstr **) p->def_arg_reg; + p->i = *pi; + p->flags &= ~F_INSLPQUEUE; + p->flags |= F_TIMO; + ERTS_PTMR_CLEAR(p); + } return p; } } static int -notify_sys_task_executed(Process *c_p, ErtsProcSysTask *st, Eterm st_result) +notify_sys_task_executed(Process *c_p, ErtsProcSysTask *st, + Eterm st_result, int normal_sched) { - Process *rp = erts_proc_lookup(st->requester); + Process *rp; + if (!normal_sched) + rp = erts_pid2proc_opt(c_p, ERTS_PROC_LOCK_MAIN, + st->requester, 0, + ERTS_P2P_FLG_INC_REFC); + else + rp = erts_proc_lookup(st->requester); if (rp) { ErtsProcLocks rp_locks; ErlOffHeap *ohp; - ErlHeapFragment* bp; + ErtsMessage *mp; Eterm *hp, msg, req_id, result; Uint st_result_sz, hsz; #ifdef DEBUG @@ -9465,11 +10251,7 @@ notify_sys_task_executed(Process *c_p, ErtsProcSysTask *st, Eterm st_result) st_result_sz = is_immed(st_result) ? 0 : size_object(st_result); hsz = st->req_id_sz + st_result_sz + 4 /* 3-tuple */; - hp = erts_alloc_message_heap(hsz, - &bp, - &ohp, - rp, - &rp_locks); + mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp); #ifdef DEBUG hp_start = hp; @@ -9494,21 +10276,16 @@ notify_sys_task_executed(Process *c_p, ErtsProcSysTask *st, Eterm st_result) ASSERT(hp_start + hsz == hp); #endif - erts_queue_message(rp, - &rp_locks, - bp, - msg, - NIL -#ifdef USE_VM_PROBES - , NIL -#endif - ); + erts_queue_message(rp, rp_locks, mp, msg, c_p->common.id); if (c_p == rp) rp_locks &= ~ERTS_PROC_LOCK_MAIN; if (rp_locks) - erts_smp_proc_unlock(rp, rp_locks); + erts_proc_unlock(rp, rp_locks); + + if (!normal_sched) + erts_proc_dec_refc(rp); } erts_cleanup_offheap(&st->off_heap); @@ -9527,7 +10304,7 @@ fetch_sys_task(Process *c_p, erts_aint32_t state, int *qmaskp, int *priop) *priop = -1; /* Shut up annoying erroneous warning */ - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); if (!c_p->sys_task_qs) { qmask = 0; @@ -9647,13 +10424,13 @@ fetch_sys_task(Process *c_p, erts_aint32_t state, int *qmaskp, int *priop) if (a == n) break; - a = erts_smp_atomic32_cmpxchg_nob(&c_p->state, n, e); + a = erts_atomic32_cmpxchg_nob(&c_p->state, n, e); } while (a != e); } done: - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); if (unused_qs) proc_sys_task_queues_free(unused_qs); @@ -9664,28 +10441,28 @@ done: } static void save_gc_task(Process *c_p, ErtsProcSysTask *st, int prio); +static void save_dirty_task(Process *c_p, ErtsProcSysTask *st); static int execute_sys_tasks(Process *c_p, erts_aint32_t *statep, int in_reds) { - int garbage_collected = 0; + int minor_gc = 0, major_gc = 0; erts_aint32_t state = *statep; - int max_reds = in_reds; - int reds = 0; + int reds = in_reds; int qmask = 0; - ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCK_MAIN); + ASSERT(!ERTS_SCHEDULER_IS_DIRTY(erts_proc_sched_data(c_p))); + ERTS_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCK_MAIN); do { + ErtsProcSysTaskType type; ErtsProcSysTask *st; int st_prio; Eterm st_res; if (state & (ERTS_PSFLG_EXITING|ERTS_PSFLG_PENDING_EXIT)) { -#ifdef ERTS_SMP if (state & ERTS_PSFLG_PENDING_EXIT) erts_handle_pending_exit(c_p, ERTS_PROC_LOCK_MAIN); -#endif ASSERT(ERTS_PROC_IS_EXITING(c_p)); break; } @@ -9694,35 +10471,95 @@ execute_sys_tasks(Process *c_p, erts_aint32_t *statep, int in_reds) if (!st) break; - switch (st->type) { - case ERTS_PSTT_GC: + type = st->type; + + switch (type) { + case ERTS_PSTT_GC_MAJOR: + case ERTS_PSTT_GC_MINOR: if (c_p->flags & F_DISABLE_GC) { save_gc_task(c_p, st, st_prio); st = NULL; - reds++; + reds--; } else { - if (!garbage_collected) { - FLAGS(c_p) |= F_NEED_FULLSWEEP; - reds += erts_garbage_collect(c_p, - 0, - c_p->arg_reg, - c_p->arity); - garbage_collected = 1; + if ((!minor_gc + || (!major_gc && type == ERTS_PSTT_GC_MAJOR)) + && !(c_p->flags & F_HIBERNATED)) { + if (type == ERTS_PSTT_GC_MAJOR) { + FLAGS(c_p) |= F_NEED_FULLSWEEP; + } + reds -= scheduler_gc_proc(c_p, reds); + if (c_p->flags & (F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC)) { + save_dirty_task(c_p, st); + st = NULL; + break; + } + if (type == ERTS_PSTT_GC_MAJOR) + minor_gc = major_gc = 1; + else + minor_gc = 1; } st_res = am_true; } break; - case ERTS_PSTT_CPC: + case ERTS_PSTT_CPC: { + int fcalls; + int cpc_reds = 0; + if (!ERTS_PROC_GET_SAVED_CALLS_BUF(c_p)) + fcalls = reds; + else + fcalls = reds - CONTEXT_REDS; st_res = erts_check_process_code(c_p, st->arg[0], - st->arg[1] == am_true, - &reds); + &cpc_reds, + fcalls); + reds -= cpc_reds; + if (is_non_value(st_res)) { + /* Needed gc, but gc was disabled */ + save_gc_task(c_p, st, st_prio); + st = NULL; + } + break; + } + case ERTS_PSTT_CLA: { + int fcalls; + int cla_reds = 0; + int do_gc; + + if (!ERTS_PROC_GET_SAVED_CALLS_BUF(c_p)) + fcalls = reds; + else + fcalls = reds - CONTEXT_REDS; + do_gc = st->arg[0] == am_true; + st_res = erts_proc_copy_literal_area(c_p, &cla_reds, + fcalls, do_gc); + reds -= cla_reds; if (is_non_value(st_res)) { + if (c_p->flags & F_DIRTY_CLA) { + save_dirty_task(c_p, st); + st = NULL; + break; + } /* Needed gc, but gc was disabled */ save_gc_task(c_p, st, st_prio); st = NULL; + break; } + if (do_gc) /* We did a major gc */ + minor_gc = major_gc = 1; + break; + } + case ERTS_PSTT_COHMQ: + reds -= erts_complete_off_heap_message_queue_change(c_p); + st_res = am_true; + break; + case ERTS_PSTT_FTMQ: + reds -= erts_flush_trace_messages(c_p, ERTS_PROC_LOCK_MAIN); + st_res = am_true; + break; + case ERTS_PSTT_ETS_FREE_FIXATION: + reds -= erts_db_execute_free_fixation(c_p, (DbFixation*)st->arg[0]); + st_res = am_true; break; default: ERTS_INTERNAL_ERROR("Invalid process sys task type"); @@ -9730,14 +10567,17 @@ execute_sys_tasks(Process *c_p, erts_aint32_t *statep, int in_reds) } if (st) - reds += notify_sys_task_executed(c_p, st, st_res); + reds += notify_sys_task_executed(c_p, st, st_res, 1); - state = erts_smp_atomic32_read_acqb(&c_p->state); - } while (qmask && reds < max_reds); + state = erts_atomic32_read_acqb(&c_p->state); + } while (qmask && reds > 0); *statep = state; - return reds; + if (in_reds < reds) + return in_reds; + + return in_reds - reds; } static int @@ -9746,58 +10586,218 @@ cleanup_sys_tasks(Process *c_p, erts_aint32_t in_state, int in_reds) erts_aint32_t state = in_state; int max_reds = in_reds; int reds = 0; - int qmask = 0; + int qmask = 1; /* Set to 1 to force looping as long as there + * are dirty tasks. + */ - ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCK_MAIN); + ERTS_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCK_MAIN); do { ErtsProcSysTask *st; Eterm st_res; int st_prio; - st = fetch_sys_task(c_p, state, &qmask, &st_prio); - if (!st) - break; + if (c_p->dirty_sys_tasks) { + st = c_p->dirty_sys_tasks; + c_p->dirty_sys_tasks = st->next; + } + else + { + st = fetch_sys_task(c_p, state, &qmask, &st_prio); + if (!st) + break; + } switch (st->type) { - case ERTS_PSTT_GC: - st_res = am_false; - break; + case ERTS_PSTT_GC_MAJOR: + case ERTS_PSTT_GC_MINOR: case ERTS_PSTT_CPC: + case ERTS_PSTT_COHMQ: + case ERTS_PSTT_ETS_FREE_FIXATION: st_res = am_false; break; + case ERTS_PSTT_CLA: + st_res = am_ok; + break; + case ERTS_PSTT_FTMQ: + reds -= erts_flush_trace_messages(c_p, ERTS_PROC_LOCK_MAIN); + st_res = am_true; + break; default: ERTS_INTERNAL_ERROR("Invalid process sys task type"); st_res = am_false; break; } - reds += notify_sys_task_executed(c_p, st, st_res); + reds += notify_sys_task_executed(c_p, st, st_res, 1); - state = erts_smp_atomic32_read_acqb(&c_p->state); + state = erts_atomic32_read_acqb(&c_p->state); } while (qmask && reds < max_reds); return reds; } -BIF_RETTYPE -erts_internal_request_system_task_3(BIF_ALIST_3) + +void +erts_execute_dirty_system_task(Process *c_p) +{ + Eterm cla_res = THE_NON_VALUE; + ErtsProcSysTask *stasks; + + /* + * If multiple operations, perform them in the following + * order (in order to avoid unnecessary GC): + * 1. Copy Literal Area (implies major GC). + * 2. GC Hibernate (implies major GC if not woken). + * 3. Major GC (implies minor GC). + * 4. Minor GC. + * + * System task requests are handled after the actual + * operations have been performed... + */ + + ASSERT(!(c_p->flags & (F_DELAY_GC|F_DISABLE_GC))); + + if (c_p->flags & F_DIRTY_CLA) { + int cla_reds = 0; + cla_res = erts_proc_copy_literal_area(c_p, &cla_reds, c_p->fcalls, 1); + ASSERT(is_value(cla_res)); + } + + if (c_p->flags & F_DIRTY_GC_HIBERNATE) { + erts_proc_lock(c_p, ERTS_PROC_LOCK_MSGQ|ERTS_PROC_LOCK_STATUS); + ERTS_MSGQ_MV_INQ2PRIVQ(c_p); + if (c_p->msg.len) + c_p->flags &= ~F_DIRTY_GC_HIBERNATE; /* operation aborted... */ + else { + erts_proc_unlock(c_p, ERTS_PROC_LOCK_MSGQ|ERTS_PROC_LOCK_STATUS); + c_p->fvalue = NIL; + erts_garbage_collect_hibernate(c_p); + ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p); + erts_proc_lock(c_p, ERTS_PROC_LOCK_MSGQ|ERTS_PROC_LOCK_STATUS); + ASSERT(!ERTS_PROC_IS_EXITING(c_p)); + } + erts_proc_unlock(c_p, ERTS_PROC_LOCK_MSGQ|ERTS_PROC_LOCK_STATUS); + } + + if (c_p->flags & (F_DIRTY_MAJOR_GC|F_DIRTY_MINOR_GC)) { + if (c_p->flags & F_DIRTY_MAJOR_GC) + c_p->flags |= F_NEED_FULLSWEEP; + (void) erts_garbage_collect_nobump(c_p, 0, c_p->arg_reg, + c_p->arity, c_p->fcalls); + } + + ASSERT(!(c_p->flags & (F_DIRTY_CLA + | F_DIRTY_GC_HIBERNATE + | F_DIRTY_MAJOR_GC + | F_DIRTY_MINOR_GC))); + + stasks = c_p->dirty_sys_tasks; + c_p->dirty_sys_tasks = NULL; + + while (stasks) { + Eterm st_res; + ErtsProcSysTask *st = stasks; + stasks = st->next; + + switch (st->type) { + case ERTS_PSTT_CLA: + ASSERT(is_value(cla_res)); + st_res = cla_res; + break; + case ERTS_PSTT_GC_MAJOR: + st_res = am_true; + break; + case ERTS_PSTT_GC_MINOR: + st_res = am_true; + break; + + default: + ERTS_INTERNAL_ERROR("Not supported dirty system task"); + break; + } + + (void) notify_sys_task_executed(c_p, st, st_res, 0); + + } + + erts_atomic32_read_band_relb(&c_p->state, ~ERTS_PSFLG_DIRTY_ACTIVE_SYS); +} + +static BIF_RETTYPE +dispatch_system_task(Process *c_p, erts_aint_t fail_state, + ErtsProcSysTask *st, Eterm target, + Eterm prio, Eterm operation) { - Process *rp = erts_proc_lookup(BIF_ARG_1); - ErtsProcSysTaskQs *stqs, *free_stqs = NULL; + Process *rp; + ErtsProcLocks rp_locks = 0; + ErlOffHeap *ohp; + ErtsMessage *mp; + Eterm *hp, msg; + Uint hsz, osz; + BIF_RETTYPE ret; + + switch (st->type) { + case ERTS_PSTT_CPC: + rp = erts_dirty_process_code_checker; + ASSERT(fail_state & (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)); + if (c_p == rp) { + ERTS_BIF_PREP_RET(ret, am_dirty_execution); + return ret; + } + break; + default: + rp = NULL; + ERTS_INTERNAL_ERROR("Non-dispatchable system task"); + break; + } + + ERTS_BIF_PREP_RET(ret, am_ok); + + /* + * Send message on the form: {Requester, Target, Operation} + */ + + ASSERT(is_immed(st->requester)); + ASSERT(is_immed(target)); + ASSERT(is_immed(prio)); + + osz = size_object(operation); + hsz = 5 /* 4-tuple */ + osz; + + mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp); + + msg = copy_struct(operation, osz, &hp, ohp); + msg = TUPLE4(hp, st->requester, target, prio, msg); + + erts_queue_message(rp, rp_locks, mp, msg, st->requester); + + if (rp_locks) + erts_proc_unlock(rp, rp_locks); + + return ret; +} + + +static BIF_RETTYPE +request_system_task(Process *c_p, Eterm requester, Eterm target, + Eterm priority, Eterm operation) +{ + BIF_RETTYPE ret; + Process *rp = erts_proc_lookup(target); ErtsProcSysTask *st = NULL; - erts_aint32_t prio, rp_state; - int rp_locked; + erts_aint32_t prio, fail_state = ERTS_PSFLG_EXITING; Eterm noproc_res, req_type; - if (!rp && !is_internal_pid(BIF_ARG_1)) { - if (!is_external_pid(BIF_ARG_1)) + if (!rp && !is_internal_pid(target)) { + if (!is_external_pid(target)) goto badarg; - if (external_pid_dist_entry(BIF_ARG_1) != erts_this_dist_entry) + if (external_pid_dist_entry(target) != erts_this_dist_entry) goto badarg; } - switch (BIF_ARG_2) { + switch (priority) { case am_max: prio = PRIORITY_MAX; break; case am_high: prio = PRIORITY_HIGH; break; case am_normal: prio = PRIORITY_NORMAL; break; @@ -9805,11 +10805,11 @@ erts_internal_request_system_task_3(BIF_ALIST_3) default: goto badarg; } - if (is_not_tuple(BIF_ARG_3)) + if (is_not_tuple(operation)) goto badarg; else { int i; - Eterm *tp = tuple_val(BIF_ARG_3); + Eterm *tp = tuple_val(operation); Uint arity = arityval(*tp); Eterm req_id; Uint req_id_sz; @@ -9844,11 +10844,10 @@ erts_internal_request_system_task_3(BIF_ALIST_3) } st = erts_alloc(ERTS_ALC_T_PROC_SYS_TSK, ERTS_PROC_SYS_TASK_SIZE(tot_sz)); - st->next = st->prev = st; /* Prep for empty prio queue */ ERTS_INIT_OFF_HEAP(&st->off_heap); hp = &st->heap[0]; - st->requester = BIF_P->common.id; + st->requester = requester; st->reply_tag = req_type; st->req_id_sz = req_id_sz; st->req_id = req_id_sz == 0 ? req_id : copy_struct(req_id, @@ -9867,125 +10866,208 @@ erts_internal_request_system_task_3(BIF_ALIST_3) switch (req_type) { case am_garbage_collect: - st->type = ERTS_PSTT_GC; - noproc_res = am_false; - if (!rp) + switch (st->arg[0]) { + case am_minor: st->type = ERTS_PSTT_GC_MINOR; break; + case am_major: st->type = ERTS_PSTT_GC_MAJOR; break; + default: goto badarg; + } + noproc_res = am_false; + if (!rp) goto noproc; break; case am_check_process_code: if (is_not_atom(st->arg[0])) goto badarg; - if (st->arg[1] != am_true && st->arg[1] != am_false) - goto badarg; noproc_res = am_false; st->type = ERTS_PSTT_CPC; if (!rp) goto noproc; + /* + * If the process should start executing dirty + * code it is important that this task is + * aborted. Therefore this strict fail state... + */ + fail_state |= (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS); + break; + + case am_copy_literals: + if (st->arg[0] != am_true && st->arg[0] != am_false) + goto badarg; + st->type = ERTS_PSTT_CLA; + noproc_res = am_ok; + if (!rp) + goto noproc; break; default: goto badarg; } - rp_state = erts_smp_atomic32_read_nob(&rp->state); + if (!schedule_process_sys_task(rp, prio, st, &fail_state)) { + Eterm failure; + if (fail_state & ERTS_PSFLG_EXITING) { + noproc: + failure = noproc_res; + } + else if (fail_state & (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) { + ret = dispatch_system_task(c_p, fail_state, st, + target, priority, operation); + goto cleanup_return; + } + else { + ERTS_INTERNAL_ERROR("Unknown failure schedule_process_sys_task()"); + failure = am_internal_error; + } + notify_sys_task_executed(c_p, st, failure, 1); + } - rp_locked = 0; + ERTS_BIF_PREP_RET(ret, am_ok); - free_stqs = NULL; - if (rp_state & ERTS_PSFLG_ACTIVE_SYS) - stqs = NULL; - else { - alloc_qs: - stqs = proc_sys_task_queues_alloc(); - stqs->qmask = 1 << prio; - stqs->ncount = 0; - stqs->q[PRIORITY_MAX] = NULL; - stqs->q[PRIORITY_HIGH] = NULL; - stqs->q[PRIORITY_NORMAL] = NULL; - stqs->q[PRIORITY_LOW] = NULL; - stqs->q[prio] = st; - } + return ret; - if (!rp_locked) { - rp_locked = 1; - erts_smp_proc_lock(rp, ERTS_PROC_LOCK_STATUS); +badarg: - rp_state = erts_smp_atomic32_read_nob(&rp->state); - if (rp_state & ERTS_PSFLG_EXITING) { - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); - rp = NULL; - free_stqs = stqs; - goto noproc; - } - } + ERTS_BIF_PREP_ERROR(ret, c_p, BADARG); - if (!rp->sys_task_qs) { - if (stqs) - rp->sys_task_qs = stqs; - else - goto alloc_qs; +cleanup_return: + + if (st) { + erts_cleanup_offheap(&st->off_heap); + erts_free(ERTS_ALC_T_PROC_SYS_TSK, st); } - else { - if (stqs) - free_stqs = stqs; - stqs = rp->sys_task_qs; - if (!stqs->q[prio]) { - stqs->q[prio] = st; - stqs->qmask |= 1 << prio; - } - else { - st->next = stqs->q[prio]; - st->prev = stqs->q[prio]->prev; - st->next->prev = st; - st->prev->next = st; - ASSERT(stqs->qmask & (1 << prio)); - } + + return ret; +} + +BIF_RETTYPE +erts_internal_request_system_task_3(BIF_ALIST_3) +{ + return request_system_task(BIF_P, BIF_P->common.id, + BIF_ARG_1, BIF_ARG_2, BIF_ARG_3); +} + +BIF_RETTYPE +erts_internal_request_system_task_4(BIF_ALIST_4) +{ + return request_system_task(BIF_P, BIF_ARG_1, + BIF_ARG_2, BIF_ARG_3, BIF_ARG_4); +} + +static void +erts_schedule_generic_sys_task(Eterm pid, ErtsProcSysTaskType type, void* arg) +{ + Process *rp = erts_proc_lookup(pid); + if (rp) { + ErtsProcSysTask *st; + erts_aint32_t state, fail_state; + + st = erts_alloc(ERTS_ALC_T_PROC_SYS_TSK, + ERTS_PROC_SYS_TASK_SIZE(0)); + st->type = type; + st->requester = NIL; + st->reply_tag = NIL; + st->req_id = NIL; + st->req_id_sz = 0; + st->arg[0] = (Eterm)arg; + ERTS_INIT_OFF_HEAP(&st->off_heap); + state = erts_atomic32_read_nob(&rp->state); + + fail_state = ERTS_PSFLG_EXITING; + + if (!schedule_process_sys_task(rp, ERTS_PSFLGS_GET_USR_PRIO(state), + st, &fail_state)) + erts_free(ERTS_ALC_T_PROC_SYS_TSK, st); } +} - if (ERTS_PSFLGS_GET_ACT_PRIO(rp_state) > prio) { - erts_aint32_t n, a, e; - /* Need to elevate actual prio */ - a = rp_state; - do { - if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) { - n = a; - break; - } - n = e = a; - n &= ~ERTS_PSFLGS_ACT_PRIO_MASK; - n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET); - a = erts_smp_atomic32_cmpxchg_nob(&rp->state, n, e); - } while (a != e); - rp_state = n; +void +erts_schedule_complete_off_heap_message_queue_change(Eterm pid) +{ + erts_schedule_generic_sys_task(pid, ERTS_PSTT_COHMQ, NULL); +} + +void +erts_schedule_ets_free_fixation(Eterm pid, DbFixation* fix) +{ + erts_schedule_generic_sys_task(pid, ERTS_PSTT_ETS_FREE_FIXATION, fix); +} + + +static void +flush_dirty_trace_messages(void *vpid) +{ + Process *proc; + Eterm pid; +#ifdef ARCH_64 + pid = (Eterm) vpid; +#else + pid = *((Eterm *) vpid); + erts_free(ERTS_ALC_T_DIRTY_SL, vpid); +#endif + + proc = erts_pid2proc_opt(NULL, 0, pid, ERTS_PROC_LOCK_MAIN, 0); + if (proc) { + (void) erts_flush_trace_messages(proc, ERTS_PROC_LOCK_MAIN); + erts_proc_unlock(proc, ERTS_PROC_LOCK_MAIN); } +} - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS); - schedule_process_sys_task(rp, rp_state, NULL); +void +erts_schedule_flush_trace_messages(Process *proc, int force_on_proc) +{ + ErtsThrPrgrDelayHandle dhndl; + Eterm pid = proc->common.id; - if (free_stqs) - proc_sys_task_queues_free(free_stqs); + erts_aint32_t state; - BIF_RET(am_ok); + if (!force_on_proc) { + state = erts_atomic32_read_nob(&proc->state); + if (state & (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) { + goto sched_flush_dirty; + } + } -noproc: + dhndl = erts_thr_progress_unmanaged_delay(); - notify_sys_task_executed(BIF_P, st, noproc_res); - if (free_stqs) - proc_sys_task_queues_free(free_stqs); - BIF_RET(am_ok); + erts_schedule_generic_sys_task(pid, ERTS_PSTT_FTMQ, NULL); -badarg: + erts_thr_progress_unmanaged_continue(dhndl); - if (st) { - erts_cleanup_offheap(&st->off_heap); - erts_free(ERTS_ALC_T_PROC_SYS_TSK, st); + if (!force_on_proc) { + state = erts_atomic32_read_mb(&proc->state); + if (state & (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) { + void *vargp; + + sched_flush_dirty: + /* + * We traced 'proc' from another thread than + * it is executing on, and it is executing + * on a dirty scheduler. It might take a + * significant amount of time before it is + * scheduled out (where it gets opportunity + * to flush messages). We therefore schedule + * the flush on the first ordinary scheduler. + */ + +#ifdef ARCH_64 + vargp = (void *) pid; +#else + { + Eterm *argp = erts_alloc(ERTS_ALC_T_DIRTY_SL, sizeof(Eterm)); + *argp = pid; + vargp = (void *) argp; + } +#endif + erts_schedule_misc_aux_work(1, flush_dirty_trace_messages, vargp); + } } - if (free_stqs) - proc_sys_task_queues_free(free_stqs); - BIF_ERROR(BIF_P, BADARG); } static void @@ -9994,7 +11076,7 @@ save_gc_task(Process *c_p, ErtsProcSysTask *st, int prio) erts_aint32_t state; ErtsProcSysTaskQs *qs; - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p)); qs = ERTS_PROC_GET_DELAYED_GC_TASK_QS(c_p); if (!qs) { @@ -10007,7 +11089,7 @@ save_gc_task(Process *c_p, ErtsProcSysTask *st, int prio) qs->q[PRIORITY_NORMAL] = NULL; qs->q[PRIORITY_LOW] = NULL; qs->q[prio] = st; - (void) ERTS_PROC_SET_DELAYED_GC_TASK_QS(c_p, ERTS_PROC_LOCK_MAIN, qs); + (void) ERTS_PROC_SET_DELAYED_GC_TASK_QS(c_p, qs); } else { if (!qs->q[prio]) { @@ -10024,8 +11106,11 @@ save_gc_task(Process *c_p, ErtsProcSysTask *st, int prio) } } - state = erts_smp_atomic32_read_nob(&c_p->state); - ASSERT((ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS) & state); + state = erts_atomic32_read_nob(&c_p->state); + ASSERT((ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS) & state); while (!(state & ERTS_PSFLG_DELAYED_SYS) || prio < ERTS_PSFLGS_GET_ACT_PRIO(state)) { @@ -10037,20 +11122,27 @@ save_gc_task(Process *c_p, ErtsProcSysTask *st, int prio) n &= ~ERTS_PSFLGS_ACT_PRIO_MASK; n |= prio << ERTS_PSFLGS_ACT_PRIO_OFFSET; } - state = erts_smp_atomic32_cmpxchg_relb(&c_p->state, n, e); + state = erts_atomic32_cmpxchg_relb(&c_p->state, n, e); if (state == e) break; } } +static void +save_dirty_task(Process *c_p, ErtsProcSysTask *st) +{ + st->next = c_p->dirty_sys_tasks; + c_p->dirty_sys_tasks = st; +} + int erts_set_gc_state(Process *c_p, int enable) { ErtsProcSysTaskQs *dgc_tsk_qs; ASSERT(c_p == erts_get_current_process()); ASSERT((ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS) - & erts_smp_atomic32_read_nob(&c_p->state)); - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p)); + & erts_atomic32_read_nob(&c_p->state)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p)); if (!enable) { c_p->flags |= F_DISABLE_GC; @@ -10065,7 +11157,7 @@ erts_set_gc_state(Process *c_p, int enable) /* Move delayed gc tasks into sys tasks queues. */ - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); if (!c_p->sys_task_qs) { c_p->sys_task_qs = dgc_tsk_qs; @@ -10138,7 +11230,7 @@ erts_set_gc_state(Process *c_p, int enable) erts_aint32_t aprio, state = #endif - erts_smp_atomic32_read_bset_nob(&c_p->state, + erts_atomic32_read_bset_nob(&c_p->state, (ERTS_PSFLG_DELAYED_SYS | ERTS_PSFLG_ACTIVE_SYS), ERTS_PSFLG_ACTIVE_SYS); @@ -10152,9 +11244,9 @@ erts_set_gc_state(Process *c_p, int enable) } #endif - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); - (void) ERTS_PROC_SET_DELAYED_GC_TASK_QS(c_p, ERTS_PROC_LOCK_MAIN, NULL); + (void) ERTS_PROC_SET_DELAYED_GC_TASK_QS(c_p, NULL); if (dgc_tsk_qs) proc_sys_task_queues_free(dgc_tsk_qs); @@ -10168,24 +11260,24 @@ erts_sched_stat_modify(int what) int ix; switch (what) { case ERTS_SCHED_STAT_MODIFY_ENABLE: - erts_smp_thr_progress_block(); + erts_thr_progress_block(); erts_sched_stat.enabled = 1; - erts_smp_thr_progress_unblock(); + erts_thr_progress_unblock(); break; case ERTS_SCHED_STAT_MODIFY_DISABLE: - erts_smp_thr_progress_block(); + erts_thr_progress_block(); erts_sched_stat.enabled = 0; - erts_smp_thr_progress_unblock(); + erts_thr_progress_unblock(); break; case ERTS_SCHED_STAT_MODIFY_CLEAR: - erts_smp_spin_lock(&erts_sched_stat.lock); + erts_spin_lock(&erts_sched_stat.lock); for (ix = 0; ix < ERTS_NO_PRIO_LEVELS; ix++) { erts_sched_stat.prio[ix].total_executed = 0; erts_sched_stat.prio[ix].executed = 0; erts_sched_stat.prio[ix].total_migrated = 0; erts_sched_stat.prio[ix].migrated = 0; } - erts_smp_spin_unlock(&erts_sched_stat.lock); + erts_spin_unlock(&erts_sched_stat.lock); break; } } @@ -10199,7 +11291,7 @@ erts_sched_stat_term(Process *p, int total) Uint executed[ERTS_NO_PRIO_LEVELS]; Uint migrated[ERTS_NO_PRIO_LEVELS]; - erts_smp_spin_lock(&erts_sched_stat.lock); + erts_spin_lock(&erts_sched_stat.lock); if (total) { int i; for (i = 0; i < ERTS_NO_PRIO_LEVELS; i++) { @@ -10218,7 +11310,7 @@ erts_sched_stat_term(Process *p, int total) erts_sched_stat.prio[i].migrated = 0; } } - erts_smp_spin_unlock(&erts_sched_stat.lock); + erts_spin_unlock(&erts_sched_stat.lock); sz = 0; (void) erts_bld_atom_2uint_3tup_list(NULL, &sz, ERTS_NO_PRIO_LEVELS, @@ -10238,7 +11330,6 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) ErtsSchedulerData *esdp = erts_get_scheduler_data(); ErtsRunQueue *rq = esdp ? esdp->run_queue : ERTS_RUNQ_IX(0); ErtsMiscOpList *molp = misc_op_list_alloc(); -#ifdef ERTS_SMP ErtsMigrationPaths *mpaths = erts_get_migration_paths(); if (!mpaths) @@ -10248,9 +11339,8 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) if (erq) rq = erq; } -#endif - erts_smp_runq_lock(rq); + erts_runq_lock(rq); molp->next = NULL; molp->func = func; @@ -10261,11 +11351,11 @@ erts_schedule_misc_op(void (*func)(void *), void *arg) rq->misc.start = molp; rq->misc.end = molp; -#ifdef ERTS_SMP non_empty_runq(rq); -#endif - erts_smp_runq_unlock(rq); + ERTS_RUNQ_FLGS_SET_NOB(rq, ERTS_RUNQ_FLG_MISC_OP); + + erts_runq_unlock(rq); smp_notify_inc_runq(rq); } @@ -10295,7 +11385,10 @@ exec_misc_ops(ErtsRunQueue *rq) rq->misc.end = NULL; } - erts_smp_runq_unlock(rq); + if (!rq->misc.start) + ERTS_RUNQ_FLGS_UNSET_NOB(rq, ERTS_RUNQ_FLG_MISC_OP); + + erts_runq_unlock(rq); while (molp) { tmp_molp = molp; @@ -10304,7 +11397,7 @@ exec_misc_ops(ErtsRunQueue *rq) misc_op_list_free(tmp_molp); } - erts_smp_runq_lock(rq); + erts_runq_lock(rq); } Uint @@ -10321,6 +11414,8 @@ erts_get_total_reductions(Uint *redsp, Uint *diffp) Uint reds = 0; ERTS_ATOMIC_FOREACH_RUNQ_X(rq, + erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, + reds += rq->procs.reductions, if (redsp) *redsp = reds; @@ -10333,12 +11428,12 @@ erts_get_exact_total_reductions(Process *c_p, Uint *redsp, Uint *diffp) { Uint reds = erts_current_reductions(c_p, c_p); int ix; - erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_MAIN); + erts_proc_unlock(c_p, ERTS_PROC_LOCK_MAIN); /* * Wait for other schedulers to schedule out their processes * and update 'reductions'. */ - erts_smp_thr_progress_block(); + erts_thr_progress_block(); for (reds = 0, ix = 0; ix < erts_no_run_queues; ix++) reds += ERTS_RUNQ_IX(ix)->procs.reductions; if (redsp) @@ -10346,16 +11441,20 @@ erts_get_exact_total_reductions(Process *c_p, Uint *redsp, Uint *diffp) if (diffp) *diffp = reds - last_exact_reductions; last_exact_reductions = reds; - erts_smp_thr_progress_unblock(); - erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_MAIN); + erts_thr_progress_unblock(); + erts_proc_lock(c_p, ERTS_PROC_LOCK_MAIN); } +static void delete_process(Process* p); + void erts_free_proc(Process *p) { -#ifdef ERTS_SMP erts_proc_lock_fin(p); -#endif + ASSERT(erts_atomic32_read_nob(&p->state) & ERTS_PSFLG_FREE); + ASSERT(0 == erts_proc_read_refc(p)); + if (p->flags & F_DELAYED_DEL_PROC) + delete_process(p); erts_free(ERTS_ALC_T_PROC, (void *) p); } @@ -10371,13 +11470,13 @@ static void early_init_process_struct(void *varg, Eterm data) Process *proc = arg->proc; proc->common.id = make_internal_pid(data); - erts_smp_atomic32_init_relb(&proc->state, arg->state); + erts_atomic32_init_nob(&proc->dirty_state, 0); + proc->dirty_sys_tasks = NULL; + erts_atomic32_init_relb(&proc->state, arg->state); -#ifdef ERTS_SMP RUNQ_SET_RQ(&proc->run_queue, arg->run_queue); erts_proc_lock_init(proc); /* All locks locked */ -#endif } @@ -10398,7 +11497,7 @@ alloc_process(ErtsRunQueue *rq, erts_aint32_t state) init_arg.run_queue = rq; init_arg.state = state; - ASSERT(((char *) p) == ((char *) &p->common)); + ERTS_CT_ASSERT(offsetof(Process,common) == 0); if (!erts_ptab_new_element(&erts_proc, &p->common, @@ -10408,6 +11507,8 @@ alloc_process(ErtsRunQueue *rq, erts_aint32_t state) return NULL; } + ASSERT(erts_proc_read_refc(p) > 0); + ASSERT(internal_pid_serial(p->common.id) <= ERTS_MAX_PID_SERIAL); p->approx_started = erts_get_approx_time(); @@ -10429,6 +11530,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). Eterm args, /* Arguments for function (must be well-formed list). */ ErlSpawnOpts* so) /* Options for spawn. */ { + Uint flags = 0; ErtsRunQueue *rq = NULL; Process *p; Sint arity; /* Number of arguments. */ @@ -10438,11 +11540,17 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). Eterm res = THE_NON_VALUE; erts_aint32_t state = 0; erts_aint32_t prio = (erts_aint32_t) PRIORITY_NORMAL; - -#ifdef ERTS_SMP - erts_smp_proc_lock(parent, ERTS_PROC_LOCKS_ALL_MINOR); + ErtsProcLocks locks = ERTS_PROC_LOCKS_ALL; +#ifdef SHCOPY_SPAWN + erts_shcopy_t info; + INITIALIZE_SHCOPY(info); +#else + erts_literal_area_t litarea; + INITIALIZE_LITERAL_PURGE_AREA(litarea); #endif + erts_proc_lock(parent, ERTS_PROC_LOCKS_ALL_MINOR); + /* * Check for errors. */ @@ -10457,6 +11565,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). int ix = so->scheduler-1; ASSERT(0 <= ix && ix < erts_no_run_queues); rq = ERTS_RUNQ_IX(ix); + /* Unsupported feature... */ state |= ERTS_PSFLG_BOUND; } prio = (erts_aint32_t) so->priority; @@ -10465,6 +11574,17 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). state |= (((prio & ERTS_PSFLGS_PRIO_MASK) << ERTS_PSFLGS_ACT_PRIO_OFFSET) | ((prio & ERTS_PSFLGS_PRIO_MASK) << ERTS_PSFLGS_USR_PRIO_OFFSET)); + if (so->flags & SPO_OFF_HEAP_MSGQ) { + state |= ERTS_PSFLG_OFF_HEAP_MSGQ; + flags |= F_OFF_HEAP_MSGQ; + } + else if (so->flags & SPO_ON_HEAP_MSGQ) { + state |= ERTS_PSFLG_ON_HEAP_MSGQ; + flags |= F_ON_HEAP_MSGQ; + } + + ASSERT((flags & F_ON_HEAP_MSGQ) || (flags & F_OFF_HEAP_MSGQ)); + if (!rq) rq = erts_get_runq_proc(parent); @@ -10477,33 +11597,42 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). goto error; } -#ifdef BM_COUNTERS - processes_busy++; -#endif - BM_COUNT(processes_spawned); + ASSERT((erts_atomic32_read_nob(&p->state) + & ERTS_PSFLG_ON_HEAP_MSGQ) + || (erts_atomic32_read_nob(&p->state) + & ERTS_PSFLG_OFF_HEAP_MSGQ)); - BM_SWAP_TIMER(system,size); - arg_size = size_object(args); - BM_SWAP_TIMER(size,system); +#ifdef SHCOPY_SPAWN + arg_size = copy_shared_calculate(args, &info); +#else + arg_size = size_object_litopt(args, &litarea); +#endif heap_need = arg_size; - p->flags = erts_default_process_flags; + p->flags = flags; + p->static_flags = 0; + if (so->flags & SPO_SYSTEM_PROC) + p->static_flags |= ERTS_STC_FLG_SYSTEM_PROC; if (so->flags & SPO_USE_ARGS) { p->min_heap_size = so->min_heap_size; p->min_vheap_size = so->min_vheap_size; p->max_gen_gcs = so->max_gen_gcs; + MAX_HEAP_SIZE_SET(p, so->max_heap_size); + MAX_HEAP_SIZE_FLAGS_SET(p, so->max_heap_flags); } else { p->min_heap_size = H_MIN_SIZE; p->min_vheap_size = BIN_VH_MIN_SIZE; - p->max_gen_gcs = (Uint16) erts_smp_atomic32_read_nob(&erts_max_gen_gcs); + MAX_HEAP_SIZE_SET(p, H_MAX_SIZE); + MAX_HEAP_SIZE_FLAGS_SET(p, H_MAX_FLAGS); + p->max_gen_gcs = (Uint16) erts_atomic32_read_nob(&erts_max_gen_gcs); } p->schedule_count = 0; ASSERT(p->min_heap_size == erts_next_heap_size(p->min_heap_size, 0)); - - p->initial[INITIAL_MOD] = mod; - p->initial[INITIAL_FUN] = func; - p->initial[INITIAL_ARI] = (Uint) arity; + + p->u.initial.module = mod; + p->u.initial.function = func; + p->u.initial.arity = (Uint) arity; /* * Must initialize binary lists here before copying binaries to process. @@ -10522,10 +11651,8 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). #ifdef HIPE hipe_init_process(&p->hipe); -#ifdef ERTS_SMP hipe_init_process_smp(&p->hipe_smp); #endif -#endif p->heap = (Eterm *) ERTS_HEAP_ALLOC(ERTS_ALC_T_HEAP, sizeof(Eterm)*sz); p->old_hend = p->old_htop = p->old_heap = NULL; p->high_water = p->heap; @@ -10533,18 +11660,19 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->stop = p->hend = p->heap + sz; p->htop = p->heap; p->heap_sz = sz; + p->abandoned_heap = NULL; + p->live_hf_end = ERTS_INVALID_HFRAG_PTR; p->catches = 0; p->bin_vheap_sz = p->min_vheap_size; p->bin_old_vheap_sz = p->min_vheap_size; p->bin_old_vheap = 0; - p->bin_vheap_mature = 0; p->sys_task_qs = NULL; /* No need to initialize p->fcalls. */ - p->current = p->initial+INITIAL_MOD; + p->current = &p->u.initial; p->i = (BeamInstr *) beam_apply; p->cp = (BeamInstr *) beam_apply+1; @@ -10553,13 +11681,12 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]); p->arg_reg[0] = mod; p->arg_reg[1] = func; - BM_STOP_TIMER(system); - BM_MESSAGE(args,p,parent); - BM_START_TIMER(system); - BM_SWAP_TIMER(system,copy); - p->arg_reg[2] = copy_struct(args, arg_size, &p->htop, &p->off_heap); - BM_MESSAGE_COPIED(arg_size); - BM_SWAP_TIMER(copy,system); +#ifdef SHCOPY_SPAWN + p->arg_reg[2] = copy_shared_perform(args, arg_size, &info, &p->htop, &p->off_heap); + DESTROY_SHCOPY(info); +#else + p->arg_reg[2] = copy_struct_litopt(args, arg_size, &p->htop, &p->off_heap, &litarea); +#endif p->arity = 3; p->fvalue = NIL; @@ -10567,11 +11694,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->ftrace = NIL; p->reds = 0; -#ifdef ERTS_SMP - p->common.u.alive.ptimer = NULL; -#else - sys_memset(&p->common.u.alive.tm, 0, sizeof(ErlTimer)); -#endif + ERTS_PTMR_INIT(p); p->common.u.alive.reg = NULL; ERTS_P_LINKS(p) = NULL; @@ -10591,21 +11714,21 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). : STORE_NC(&p->htop, &p->off_heap, parent->group_leader); } - erts_get_default_tracing(&ERTS_TRACE_FLAGS(p), &ERTS_TRACER_PROC(p)); + erts_get_default_proc_tracing(&ERTS_TRACE_FLAGS(p), &ERTS_TRACER(p)); p->msg.first = NULL; p->msg.last = &p->msg.first; p->msg.save = &p->msg.first; + p->msg.saved_last = &p->msg.first; p->msg.len = 0; -#ifdef ERTS_SMP p->msg_inq.first = NULL; p->msg_inq.last = &p->msg_inq.first; p->msg_inq.len = 0; -#endif - p->u.bif_timers = NULL; + p->bif_timers = NULL; p->mbuf = NULL; + p->msg_frag = NULL; p->mbuf_sz = 0; - p->psd = NULL; + erts_atomic_init_nob(&p->psd, (erts_aint_t) NULL); p->dictionary = NULL; p->seq_trace_lastcnt = 0; p->seq_trace_clock = 0; @@ -10623,21 +11746,58 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). p->last_old_htop = NULL; #endif + p->trace_msg_q = NULL; + p->scheduler_data = NULL; + p->suspendee = NIL; + p->pending_suspenders = NULL; + p->pending_exit.reason = THE_NON_VALUE; + p->pending_exit.bp = NULL; + +#if !defined(NO_FPE_SIGNALS) || defined(HIPE) + p->fp_exception = 0; +#endif + if (IS_TRACED(parent)) { if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOS) { ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent) & TRACEE_FLAGS); - ERTS_TRACER_PROC(p) = ERTS_TRACER_PROC(parent); - } - if (ARE_TRACE_FLAGS_ON(parent, F_TRACE_PROCS)) { - trace_proc_spawn(parent, p->common.id, mod, func, args); + erts_tracer_replace(&p->common, ERTS_TRACER(parent)); } - if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOS1) { + if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOS1) { /* Overrides TRACE_CHILDREN */ ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent) & TRACEE_FLAGS); - ERTS_TRACER_PROC(p) = ERTS_TRACER_PROC(parent); + erts_tracer_replace(&p->common, ERTS_TRACER(parent)); ERTS_TRACE_FLAGS(p) &= ~(F_TRACE_SOS1 | F_TRACE_SOS); ERTS_TRACE_FLAGS(parent) &= ~(F_TRACE_SOS1 | F_TRACE_SOS); } + if (so->flags & SPO_LINK && ERTS_TRACE_FLAGS(parent) & (F_TRACE_SOL|F_TRACE_SOL1)) { + ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent)&TRACEE_FLAGS); + erts_tracer_replace(&p->common, ERTS_TRACER(parent)); + if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOL1) {/*maybe override*/ + ERTS_TRACE_FLAGS(p) &= ~(F_TRACE_SOL1 | F_TRACE_SOL); + ERTS_TRACE_FLAGS(parent) &= ~(F_TRACE_SOL1 | F_TRACE_SOL); + } + } + if (ARE_TRACE_FLAGS_ON(parent, F_TRACE_PROCS)) { + locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + trace_proc_spawn(parent, am_spawn, p->common.id, mod, func, args); + if (so->flags & SPO_LINK) + trace_proc(parent, locks, parent, am_link, p->common.id); + } + } + + if (IS_TRACED_FL(p, F_TRACE_PROCS)) { + if ((locks & (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) + == (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) { + /* This happens when parent was not traced, but child is */ + locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE); + } + trace_proc_spawn(p, am_spawned, parent->common.id, mod, func, args); + if (so->flags & SPO_LINK) + trace_proc(p, locks, p, am_getting_linked, parent->common.id); } /* @@ -10648,10 +11808,6 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). #ifdef DEBUG int ret; #endif - if (IS_TRACED_FL(parent, F_TRACE_PROCS)) { - trace_proc(parent, parent, am_link, p->common.id); - } - #ifdef DEBUG ret = erts_add_link(&ERTS_P_LINKS(parent), LINK_PID, p->common.id); ASSERT(ret == 0); @@ -10662,17 +11818,6 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). erts_add_link(&ERTS_P_LINKS(p), LINK_PID, parent->common.id); #endif - if (IS_TRACED(parent)) { - if (ERTS_TRACE_FLAGS(parent) & (F_TRACE_SOL|F_TRACE_SOL1)) { - ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent)&TRACEE_FLAGS); - ERTS_TRACER_PROC(p) = ERTS_TRACER_PROC(parent); /*maybe steal*/ - - if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOL1) {/*maybe override*/ - ERTS_TRACE_FLAGS(p) &= ~(F_TRACE_SOL1 | F_TRACE_SOL); - ERTS_TRACE_FLAGS(parent) &= ~(F_TRACE_SOL1 | F_TRACE_SOL); - } - } - } } /* @@ -10687,19 +11832,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). so->mref = mref; } -#ifdef ERTS_SMP - p->scheduler_data = NULL; - p->suspendee = NIL; - p->pending_suspenders = NULL; - p->pending_exit.reason = THE_NON_VALUE; - p->pending_exit.bp = NULL; -#endif - -#if !defined(NO_FPE_SIGNALS) || defined(HIPE) - p->fp_exception = 0; -#endif - - erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + erts_proc_unlock(p, locks); res = p->common.id; @@ -10707,23 +11840,27 @@ erl_create_process(Process* parent, /* Parent of process (default group leader). * Schedule process for execution. */ - schedule_process(p, state); + erts_proc_unlock(parent, locks & ERTS_PROC_LOCKS_ALL_MINOR); + + schedule_process(p, state, 0); VERBOSE(DEBUG_PROCESSES, ("Created a new process: %T\n",p->common.id)); #ifdef USE_VM_PROBES if (DTRACE_ENABLED(process_spawn)) { + ErtsCodeMFA cmfa = {mod, func, arity}; DTRACE_CHARBUF(process_name, DTRACE_TERM_BUF_SIZE); - DTRACE_CHARBUF(mfa, DTRACE_TERM_BUF_SIZE); + DTRACE_CHARBUF(mfa_buf, DTRACE_TERM_BUF_SIZE); - dtrace_fun_decode(p, mod, func, arity, process_name, mfa); - DTRACE2(process_spawn, process_name, mfa); + dtrace_fun_decode(p, &cmfa, process_name, mfa_buf); + DTRACE2(process_spawn, process_name, mfa_buf); } #endif + return res; error: - erts_smp_proc_unlock(parent, ERTS_PROC_LOCKS_ALL_MINOR); + erts_proc_unlock(parent, locks & ERTS_PROC_LOCKS_ALL_MINOR); return res; } @@ -10739,6 +11876,8 @@ void erts_init_empty_process(Process *p) p->stop = NULL; p->hend = NULL; p->heap = NULL; + p->abandoned_heap = NULL; + p->live_hf_end = ERTS_INVALID_HFRAG_PTR; p->gen_gcs = 0; p->max_gen_gcs = 0; p->min_heap_size = 0; @@ -10746,7 +11885,7 @@ void erts_init_empty_process(Process *p) p->rcount = 0; p->common.id = ERTS_INVALID_PID; p->reds = 0; - ERTS_TRACER_PROC(p) = NIL; + ERTS_TRACER(p) = erts_tracer_nil; ERTS_TRACE_FLAGS(p) = F_INITIAL_TRACE_FLAGS; p->group_leader = ERTS_INVALID_PID; p->flags = 0; @@ -10759,12 +11898,7 @@ void erts_init_empty_process(Process *p) p->bin_old_vheap_sz = BIN_VH_MIN_SIZE; p->bin_old_vheap = 0; p->sys_task_qs = NULL; - p->bin_vheap_mature = 0; -#ifdef ERTS_SMP - p->common.u.alive.ptimer = NULL; -#else - memset(&(p->common.u.alive.tm), 0, sizeof(ErlTimer)); -#endif + ERTS_PTMR_INIT(p); p->next = NULL; p->off_heap.first = NULL; p->off_heap.overhead = 0; @@ -10775,8 +11909,9 @@ void erts_init_empty_process(Process *p) p->old_htop = NULL; p->old_heap = NULL; p->mbuf = NULL; + p->msg_frag = NULL; p->mbuf_sz = 0; - p->psd = NULL; + erts_atomic_init_nob(&p->psd, (erts_aint_t) NULL); ERTS_P_MONITORS(p) = NULL; ERTS_P_LINKS(p) = NULL; /* List of links */ p->nodes_monitors = NULL; @@ -10785,14 +11920,14 @@ void erts_init_empty_process(Process *p) p->msg.last = &p->msg.first; p->msg.save = &p->msg.first; p->msg.len = 0; - p->u.bif_timers = NULL; + p->bif_timers = NULL; p->dictionary = NULL; p->seq_trace_clock = 0; p->seq_trace_lastcnt = 0; p->seq_trace_token = NIL; - p->initial[0] = 0; - p->initial[1] = 0; - p->initial[2] = 0; + p->u.initial.module = 0; + p->u.initial.function = 0; + p->u.initial.arity = 0; p->catches = 0; p->cp = NULL; p->i = NULL; @@ -10813,23 +11948,24 @@ void erts_init_empty_process(Process *p) p->parent = NIL; p->approx_started = 0; + p->static_flags = 0; + p->common.u.alive.started_interval = 0; #ifdef HIPE hipe_init_process(&p->hipe); -#ifdef ERTS_SMP hipe_init_process_smp(&p->hipe_smp); #endif -#endif INIT_HOLE_CHECK(p); #ifdef DEBUG p->last_old_htop = NULL; #endif - erts_smp_atomic32_init_nob(&p->state, (erts_aint32_t) PRIORITY_NORMAL); + erts_atomic32_init_nob(&p->dirty_state, 0); + p->dirty_sys_tasks = NULL; + erts_atomic32_init_nob(&p->state, (erts_aint32_t) PRIORITY_NORMAL); -#ifdef ERTS_SMP p->scheduler_data = NULL; p->msg_inq.first = NULL; p->msg_inq.last = &p->msg_inq.first; @@ -10839,9 +11975,8 @@ void erts_init_empty_process(Process *p) p->pending_exit.reason = THE_NON_VALUE; p->pending_exit.bp = NULL; erts_proc_lock_init(p); - erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + erts_proc_unlock(p, ERTS_PROC_LOCKS_ALL); RUNQ_SET_RQ(&p->run_queue, ERTS_RUNQ_IX(0)); -#endif #if !defined(NO_FPE_SIGNALS) || defined(HIPE) p->fp_exception = 0; @@ -10858,9 +11993,11 @@ erts_debug_verify_clean_empty_process(Process* p) ASSERT(p->htop == NULL); ASSERT(p->stop == NULL); ASSERT(p->hend == NULL); + ASSERT(p->abandoned_heap == NULL); + ASSERT(p->live_hf_end == ERTS_INVALID_HFRAG_PTR); ASSERT(p->heap == NULL); ASSERT(p->common.id == ERTS_INVALID_PID); - ASSERT(ERTS_TRACER_PROC(p) == NIL); + ASSERT(ERTS_TRACER_IS_NIL(ERTS_TRACER(p))); ASSERT(ERTS_TRACE_FLAGS(p) == F_INITIAL_TRACE_FLAGS); ASSERT(p->group_leader == ERTS_INVALID_PID); ASSERT(p->next == NULL); @@ -10877,7 +12014,7 @@ erts_debug_verify_clean_empty_process(Process* p) ASSERT(p->suspend_monitors == NULL); ASSERT(p->msg.first == NULL); ASSERT(p->msg.len == 0); - ASSERT(p->u.bif_timers == NULL); + ASSERT(p->bif_timers == NULL); ASSERT(p->dictionary == NULL); ASSERT(p->catches == 0); ASSERT(p->cp == NULL); @@ -10886,14 +12023,12 @@ erts_debug_verify_clean_empty_process(Process* p) ASSERT(p->parent == NIL); -#ifdef ERTS_SMP ASSERT(p->msg_inq.first == NULL); ASSERT(p->msg_inq.len == 0); ASSERT(p->suspendee == NIL); ASSERT(p->pending_suspenders == NULL); ASSERT(p->pending_exit.reason == THE_NON_VALUE); ASSERT(p->pending_exit.bp == NULL); -#endif /* Thing that erts_cleanup_empty_process() cleans up */ @@ -10918,28 +12053,44 @@ erts_cleanup_empty_process(Process* p) free_message_buffer(p->mbuf); p->mbuf = NULL; } -#ifdef ERTS_SMP erts_proc_lock_fin(p); -#endif #ifdef DEBUG erts_debug_verify_clean_empty_process(p); #endif } -/* - * p must be the currently executing process. - */ static void delete_process(Process* p) { - ErlMessage* mp; + ErtsPSD *psd; + struct saved_calls *scb; + process_breakpoint_time_t *pbt; VERBOSE(DEBUG_PROCESSES, ("Removing process: %T\n",p->common.id)); + VERBOSE(DEBUG_SHCOPY, ("[pid=%T] delete process: %p %p %p %p\n", p->common.id, + HEAP_START(p), HEAP_END(p), OLD_HEAP(p), OLD_HEND(p))); + + scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, NULL); + + if (scb) { + p->fcalls += CONTEXT_REDS; /* Reduction counting depends on this... */ + erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb); + } + + pbt = ERTS_PROC_SET_CALL_TIME(p, NULL); + if (pbt) + erts_free(ERTS_ALC_T_BPD, (void *) pbt); + + erts_destroy_nif_export(p); /* Cleanup psd */ - if (p->psd) - erts_free(ERTS_ALC_T_PSD, p->psd); + psd = (ErtsPSD *) erts_atomic_read_nob(&p->psd); + + if (psd) { + erts_atomic_set_nob(&p->psd, (erts_aint_t) NULL); /* Reduction counting depends on this... */ + erts_free(ERTS_ALC_T_PSD, psd); + } /* Clean binaries and funs */ erts_cleanup_offheap(&p->off_heap); @@ -10958,16 +12109,12 @@ delete_process(Process* p) * Release heaps. Clobber contents in DEBUG build. */ - -#ifdef DEBUG - sys_memset(p->heap, DEBUG_BAD_BYTE, p->heap_sz*sizeof(Eterm)); -#endif - #ifdef HIPE hipe_delete_process(&p->hipe); #endif - ERTS_HEAP_FREE(ERTS_ALC_T_HEAP, (void*) p->heap, p->heap_sz*sizeof(Eterm)); + erts_deallocate_young_generation(p); + if (p->old_heap != NULL) { #ifdef DEBUG @@ -10979,34 +12126,11 @@ delete_process(Process* p) (p->old_hend-p->old_heap)*sizeof(Eterm)); } - /* - * Free all pending message buffers. - */ - if (p->mbuf != NULL) { - free_message_buffer(p->mbuf); - } - erts_erase_dicts(p); /* free all pending messages */ - mp = p->msg.first; - while(mp != NULL) { - ErlMessage* next_mp = mp->next; - if (mp->data.attached) { - if (is_value(mp->m[0])) - free_message_buffer(mp->data.heap_frag); - else { - if (is_not_nil(mp->m[1])) { - ErlHeapFragment *heap_frag; - heap_frag = (ErlHeapFragment *) mp->data.dist_ext->ext_endp; - erts_cleanup_offheap(&heap_frag->off_heap); - } - erts_free_dist_ext_copy(mp->data.dist_ext); - } - } - free_message(mp); - mp = next_mp; - } + erts_cleanup_messages(p->msg.first); + p->msg.first = NULL; ASSERT(!p->nodes_monitors); ASSERT(!p->suspend_monitors); @@ -11022,13 +12146,16 @@ set_proc_exiting(Process *p, { erts_aint32_t state = in_state, enq_prio = -1; int enqueue; - ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(p) == ERTS_PROC_LOCKS_ALL); + ERTS_LC_ASSERT(erts_proc_lc_my_proc_locks(p) == ERTS_PROC_LOCKS_ALL); enqueue = change_proc_schedule_state(p, - ERTS_PSFLG_SUSPENDED|ERTS_PSFLG_PENDING_EXIT, + (ERTS_PSFLG_SUSPENDED + | ERTS_PSFLG_PENDING_EXIT + | ERTS_PSFLGS_DIRTY_WORK), ERTS_PSFLG_EXITING|ERTS_PSFLG_ACTIVE, &state, - &enq_prio); + &enq_prio, + ERTS_PROC_LOCKS_ALL); p->fvalue = reason; if (bp) @@ -11040,13 +12167,10 @@ set_proc_exiting(Process *p, */ p->freason = EXTAG_EXIT; KILL_CATCHES(p); - cancel_timer(p); p->i = (BeamInstr *) beam_exit; - if (enqueue) - add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio), - state, - enq_prio); + + add2runq(enqueue, enq_prio, p, state, NULL); } static ERTS_INLINE erts_aint32_t @@ -11057,10 +12181,13 @@ set_proc_self_exiting(Process *c_p) #endif erts_aint32_t state, enq_prio = -1; - ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCKS_ALL); + ERTS_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCKS_ALL); - state = erts_smp_atomic32_read_nob(&c_p->state); - ASSERT(state & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS)); + state = erts_atomic32_read_nob(&c_p->state); + ASSERT(state & (ERTS_PSFLG_RUNNING + |ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)); #ifdef DEBUG enqueue = @@ -11069,99 +12196,127 @@ set_proc_self_exiting(Process *c_p) ERTS_PSFLG_SUSPENDED|ERTS_PSFLG_PENDING_EXIT, ERTS_PSFLG_EXITING|ERTS_PSFLG_ACTIVE, &state, - &enq_prio); + &enq_prio, + ERTS_PROC_LOCKS_ALL); ASSERT(!enqueue); return state; } -#ifdef ERTS_SMP void erts_handle_pending_exit(Process *c_p, ErtsProcLocks locks) { ErtsProcLocks xlocks; ASSERT(is_value(c_p->pending_exit.reason)); - ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == locks); - ERTS_SMP_LC_ASSERT(locks & ERTS_PROC_LOCK_MAIN); - ERTS_SMP_LC_ASSERT(!((ERTS_PSFLG_EXITING|ERTS_PSFLG_FREE) - & erts_smp_atomic32_read_nob(&c_p->state))); + ERTS_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == locks); + ERTS_LC_ASSERT(locks & ERTS_PROC_LOCK_MAIN); + ERTS_LC_ASSERT(!((ERTS_PSFLG_EXITING|ERTS_PSFLG_FREE) + & erts_atomic32_read_nob(&c_p->state))); /* Ensure that all locks on c_p are locked before proceeding... */ if (locks == ERTS_PROC_LOCKS_ALL) xlocks = 0; else { xlocks = ~locks & ERTS_PROC_LOCKS_ALL; - if (erts_smp_proc_trylock(c_p, xlocks) == EBUSY) { - erts_smp_proc_unlock(c_p, locks & ~ERTS_PROC_LOCK_MAIN); - erts_smp_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); + if (erts_proc_trylock(c_p, xlocks) == EBUSY) { + erts_proc_unlock(c_p, locks & ~ERTS_PROC_LOCK_MAIN); + erts_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); } } set_proc_exiting(c_p, - erts_smp_atomic32_read_acqb(&c_p->state), + erts_atomic32_read_acqb(&c_p->state), c_p->pending_exit.reason, c_p->pending_exit.bp); c_p->pending_exit.reason = THE_NON_VALUE; c_p->pending_exit.bp = NULL; if (xlocks) - erts_smp_proc_unlock(c_p, xlocks); + erts_proc_unlock(c_p, xlocks); } +static void save_pending_exiter(Process *p, ErtsProcList *plp); + static void -handle_pending_exiters(ErtsProcList *pnd_xtrs) +do_handle_pending_exiters(ErtsProcList *pnd_xtrs) { /* 'list' is expected to have been fetched (i.e. not a ring anymore) */ ErtsProcList *plp = pnd_xtrs; while (plp) { - ErtsProcList *free_plp; - Process *p = erts_pid2proc(NULL, 0, plp->pid, ERTS_PROC_LOCKS_ALL); + ErtsProcList *next_plp = plp->next; + Process *p = erts_proc_lookup(plp->pid); if (p) { - if (erts_proclist_same(plp, p)) { - erts_aint32_t state = erts_smp_atomic32_read_acqb(&p->state); - if (!(state & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS))) { - ASSERT(state & ERTS_PSFLG_PENDING_EXIT); - erts_handle_pending_exit(p, ERTS_PROC_LOCKS_ALL); + erts_aint32_t state; + /* + * If the process is running on a normal scheduler, the + * pending exit will soon be detected and handled by the + * scheduler running the process (at schedule in/out). + */ + if (erts_proc_trylock(p, ERTS_PROC_LOCKS_ALL) != EBUSY) { + if (erts_proclist_same(plp, p)) { + state = erts_atomic32_read_acqb(&p->state); + if (!(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_EXITING))) { + ASSERT(state & ERTS_PSFLG_PENDING_EXIT); + erts_handle_pending_exit(p, ERTS_PROC_LOCKS_ALL); + } } + erts_proc_unlock(p, ERTS_PROC_LOCKS_ALL); + } + else { + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + if (erts_proclist_same(plp, p)) { + state = erts_atomic32_read_acqb(&p->state); + if (!(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_EXITING))) { + /* + * Save process and try to acquire all + * locks at a later time... + */ + save_pending_exiter(p, plp); + plp = NULL; + } + } + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } - erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); } - free_plp = plp; - plp = plp->next; - proclist_destroy(free_plp); + if (plp) + proclist_destroy(plp); + plp = next_plp; } } static void -save_pending_exiter(Process *p) +save_pending_exiter(Process *p, ErtsProcList *plp) { - ErtsProcList *plp; + ErtsSchedulerSleepInfo *ssi; ErtsRunQueue *rq; - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)); - rq = erts_get_runq_current(NULL); + rq = RUNQ_READ_RQ(&p->run_queue); + ASSERT(rq && !ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); - plp = proclist_create(p); + if (!plp) + plp = proclist_create(p); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); erts_proclist_store_last(&rq->procs.pending_exiters, plp); non_empty_runq(rq); - erts_smp_runq_unlock(rq); -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - wake_dirty_schedulers(rq, 0); - else -#endif - wake_scheduler(rq); + ssi = rq->scheduler->ssi; + + erts_runq_unlock(rq); + + set_aux_work_flags_wakeup_nob(ssi, ERTS_SSI_AUX_WORK_PENDING_EXITERS); } -#endif /* * This function delivers an EXIT message to a process @@ -11172,43 +12327,47 @@ static ERTS_INLINE void send_exit_message(Process *to, ErtsProcLocks *to_locksp, Eterm exit_term, Uint term_size, Eterm token) { - if (token == NIL -#ifdef USE_VM_PROBES - || token == am_have_dt_utag -#endif - ) { - Eterm* hp; - Eterm mess; - ErlHeapFragment* bp; - ErlOffHeap *ohp; - - hp = erts_alloc_message_heap(term_size, &bp, &ohp, to, to_locksp); + ErtsMessage *mp; + ErlOffHeap *ohp; + Eterm* hp; + Eterm mess; +#ifdef SHCOPY_SEND + erts_shcopy_t info; +#endif + + if (!have_seqtrace(token)) { +#ifdef SHCOPY_SEND + INITIALIZE_SHCOPY(info); + term_size = copy_shared_calculate(exit_term, &info); + mp = erts_alloc_message_heap(to, to_locksp, term_size, &hp, &ohp); + mess = copy_shared_perform(exit_term, term_size, &info, &hp, ohp); + DESTROY_SHCOPY(info); +#else + mp = erts_alloc_message_heap(to, to_locksp, term_size, &hp, &ohp); mess = copy_struct(exit_term, term_size, &hp, ohp); - erts_queue_message(to, to_locksp, bp, mess, NIL -#ifdef USE_VM_PROBES - , NIL #endif - ); + erts_queue_message(to, *to_locksp, mp, mess, am_system); } else { - ErlHeapFragment* bp; - Eterm* hp; - Eterm mess; Eterm temp_token; Uint sz_token; ASSERT(is_tuple(token)); sz_token = size_object(token); - bp = new_message_buffer(term_size+sz_token); - hp = bp->mem; - mess = copy_struct(exit_term, term_size, &hp, &bp->off_heap); - /* the trace token must in this case be updated by the caller */ - seq_trace_output(token, mess, SEQ_TRACE_SEND, to->common.id, NULL); - temp_token = copy_struct(token, sz_token, &hp, &bp->off_heap); - erts_queue_message(to, to_locksp, bp, mess, temp_token -#ifdef USE_VM_PROBES - , NIL +#ifdef SHCOPY_SEND + INITIALIZE_SHCOPY(info); + term_size = copy_shared_calculate(exit_term, &info); + mp = erts_alloc_message_heap(to, to_locksp, term_size+sz_token, &hp, &ohp); + mess = copy_shared_perform(exit_term, term_size, &info, &hp, ohp); + DESTROY_SHCOPY(info); +#else + mp = erts_alloc_message_heap(to, to_locksp, term_size+sz_token, &hp, &ohp); + mess = copy_struct(exit_term, term_size, &hp, ohp); #endif - ); + /* the trace token must in this case be updated by the caller */ + seq_trace_output(token, mess, SEQ_TRACE_SEND, to->common.id, to); + temp_token = copy_struct(token, sz_token, &hp, ohp); + ERL_MESSAGE_TOKEN(mp) = temp_token; + erts_queue_message(to, *to_locksp, mp, mess, am_system); } } @@ -11293,11 +12452,11 @@ send_exit_signal(Process *c_p, /* current process if and only Uint32 flags /* flags */ ) { - erts_aint32_t state = erts_smp_atomic32_read_nob(&rp->state); + erts_aint32_t state = erts_atomic32_read_nob(&rp->state); Eterm rsn = reason == am_kill ? am_killed : reason; - ERTS_SMP_LC_ASSERT(*rp_locks == erts_proc_lc_my_proc_locks(rp)); - ERTS_SMP_LC_ASSERT((*rp_locks & ERTS_PROC_LOCKS_XSIG_SEND) + ERTS_LC_ASSERT(*rp_locks == erts_proc_lc_my_proc_locks(rp)); + ERTS_LC_ASSERT((*rp_locks & ERTS_PROC_LOCKS_XSIG_SEND) == ERTS_PROC_LOCKS_XSIG_SEND); ASSERT(reason != THE_NON_VALUE); @@ -11317,11 +12476,10 @@ send_exit_signal(Process *c_p, /* current process if and only if ((state & ERTS_PSFLG_TRAP_EXIT) && (reason != am_kill || (flags & ERTS_XSIG_FLG_IGN_KILL))) { - if (is_not_nil(token) -#ifdef USE_VM_PROBES - && token != am_have_dt_utag -#endif - && token_update) + /* have to release the status and trace lock in order to send the exit message */ + erts_proc_unlock(rp, *rp_locks & (ERTS_PROC_LOCKS_XSIG_SEND|ERTS_PROC_LOCK_TRACE)); + *rp_locks &= ~(ERTS_PROC_LOCKS_XSIG_SEND|ERTS_PROC_LOCK_TRACE); + if (have_seqtrace(token) && token_update) seq_trace_update_send(token_update); if (is_value(exit_tuple)) send_exit_message(rp, rp_locks, exit_tuple, exit_tuple_sz, token); @@ -11330,7 +12488,6 @@ send_exit_signal(Process *c_p, /* current process if and only return 1; /* Receiver will get a message */ } else if (reason != am_normal || (flags & ERTS_XSIG_FLG_NO_IGN_NORMAL)) { -#ifdef ERTS_SMP if (!(state & (ERTS_PSFLG_EXITING|ERTS_PSFLG_PENDING_EXIT))) { ASSERT(!rp->pending_exit.bp); @@ -11340,41 +12497,60 @@ send_exit_signal(Process *c_p, /* current process if and only if (*rp_locks != ERTS_PROC_LOCKS_ALL) { ErtsProcLocks need_locks = (~(*rp_locks) & ERTS_PROC_LOCKS_ALL); - if (erts_smp_proc_trylock(c_p, need_locks) == EBUSY) { - erts_smp_proc_unlock(c_p, + if (erts_proc_trylock(c_p, need_locks) == EBUSY) { + erts_proc_unlock(c_p, *rp_locks & ~ERTS_PROC_LOCK_MAIN); - erts_smp_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); + erts_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR); } *rp_locks = ERTS_PROC_LOCKS_ALL; } set_proc_exiting(c_p, state, rsn, NULL); } - else if (!(state & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS))) { + else if (!(state & (ERTS_PSFLG_RUNNING + | ERTS_PSFLG_RUNNING_SYS + | ERTS_PSFLG_DIRTY_RUNNING_SYS))) { /* Process not running ... */ ErtsProcLocks need_locks = ~(*rp_locks) & ERTS_PROC_LOCKS_ALL; + ErlHeapFragment *bp = NULL; + Eterm rsn_cpy; if (need_locks - && erts_smp_proc_trylock(rp, need_locks) == EBUSY) { + && erts_proc_trylock(rp, need_locks) == EBUSY) { /* ... but we havn't got all locks on it ... */ - save_pending_exiter(rp); + save_pending_exiter(rp, NULL); /* * The pending exit will be discovered when next * process is scheduled in */ - goto set_pending_exit; + goto set_pending_exit; } + /* ...and we have all locks on it... */ + *rp_locks = ERTS_PROC_LOCKS_ALL; + + state = erts_atomic32_read_nob(&rp->state); + + if (is_immed(rsn)) + rsn_cpy = rsn; else { - /* ...and we have all locks on it... */ - *rp_locks = ERTS_PROC_LOCKS_ALL; - set_proc_exiting(rp, - state, - (is_immed(rsn) - ? rsn - : copy_object(rsn, rp)), - NULL); + Eterm *hp; + ErlOffHeap *ohp; + Uint rsn_sz = size_object(rsn); + if (state & ERTS_PSFLG_DIRTY_RUNNING) { + bp = new_message_buffer(rsn_sz); + ohp = &bp->off_heap; + hp = &bp->mem[0]; + } + else + { + hp = HAlloc(rp, rsn_sz); + ohp = &rp->off_heap; + } + rsn_cpy = copy_struct(rsn, rsn_sz, &hp, ohp); } + + set_proc_exiting(rp, state, rsn_cpy, bp); } else { /* Process running... */ - + /* * The pending exit will be discovered when the process * is scheduled out if not discovered earlier. @@ -11396,8 +12572,30 @@ send_exit_signal(Process *c_p, /* current process if and only &bp->off_heap); rp->pending_exit.bp = bp; } - erts_smp_atomic32_read_bor_relb(&rp->state, - ERTS_PSFLG_PENDING_EXIT); + + /* + * If no dirty work has been scheduled, pending exit will + * be discovered when the process is scheduled. If dirty work + * has been scheduled, we may need to add it to a normal run + * queue... + */ + { + erts_aint32_t a = erts_atomic32_read_nob(&rp->state); + while (1) { + erts_aint32_t n, e; + int dwork; + n = e = a; + n |= ERTS_PSFLG_PENDING_EXIT; + dwork = !!(n & ERTS_PSFLGS_DIRTY_WORK); + n &= ~ERTS_PSFLGS_DIRTY_WORK; + a = erts_atomic32_cmpxchg_mb(&rp->state, n, e); + if (a == e) { + if (dwork) + erts_schedule_process(rp, n, *rp_locks); + break; + } + } + } } } /* else: @@ -11409,17 +12607,6 @@ send_exit_signal(Process *c_p, /* current process if and only * that the receiver *will* exit; either on the pending * exit or by itself before seeing the pending exit. */ -#else /* !ERTS_SMP */ - erts_aint32_t state = erts_smp_atomic32_read_nob(&rp->state); - if (!(state & ERTS_PSFLG_EXITING)) { - set_proc_exiting(rp, - state, - (is_immed(rsn) || c_p == rp - ? rsn - : copy_object(rsn, rp)), - NULL); - } -#endif return -1; /* Receiver will exit */ } @@ -11459,24 +12646,24 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) ExitMonitorContext *pcontext = vpcontext; DistEntry *dep; ErtsMonitor *rmon; - Process *rp; - if (mon->type == MON_ORIGIN) { + switch (mon->type) { + case MON_ORIGIN: /* We are monitoring someone else, we need to demonitor that one.. */ - if (is_atom(mon->pid)) { /* remote by name */ - ASSERT(is_node_name_atom(mon->pid)); - dep = erts_sysname_to_connected_dist_entry(mon->pid); + if (is_atom(mon->u.pid)) { /* remote by name */ + ASSERT(is_node_name_atom(mon->u.pid)); + dep = erts_sysname_to_connected_dist_entry(mon->u.pid); if (dep) { - erts_smp_de_links_lock(dep); + erts_de_links_lock(dep); rmon = erts_remove_monitor(&(dep->monitors), mon->ref); - erts_smp_de_links_unlock(dep); + erts_de_links_unlock(dep); if (rmon) { ErtsDSigData dsd; int code = erts_dsig_prepare(&dsd, dep, NULL, ERTS_DSP_NO_LOCK, 0); if (code == ERTS_DSIG_PREP_CONNECTED) { code = erts_dsig_send_demonitor(&dsd, - rmon->pid, + rmon->u.pid, mon->name, mon->ref, 1); @@ -11484,37 +12671,46 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) } erts_destroy_monitor(rmon); } - erts_deref_dist_entry(dep); } } else { - ASSERT(is_pid(mon->pid)); - if (is_internal_pid(mon->pid)) { /* local by pid or name */ - rp = erts_pid2proc(NULL, 0, mon->pid, ERTS_PROC_LOCK_LINK); + ASSERT(is_pid(mon->u.pid) || is_port(mon->u.pid)); + /* if is local by pid or name */ + if (is_internal_pid(mon->u.pid)) { + Process *rp = erts_pid2proc(NULL, 0, mon->u.pid, ERTS_PROC_LOCK_LINK); if (!rp) { goto done; } rmon = erts_remove_monitor(&ERTS_P_MONITORS(rp), mon->ref); - erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_LINK); + erts_proc_unlock(rp, ERTS_PROC_LOCK_LINK); if (rmon == NULL) { goto done; } erts_destroy_monitor(rmon); - } else { /* remote by pid */ - ASSERT(is_external_pid(mon->pid)); - dep = external_pid_dist_entry(mon->pid); + } else if (is_internal_port(mon->u.pid)) { + /* Is a local port */ + Port *prt = erts_port_lookup_raw(mon->u.pid); + if (!prt) { + goto done; + } + erts_port_demonitor(pcontext->p, + ERTS_PORT_DEMONITOR_ORIGIN_ON_DEATHBED, + prt, mon->ref, NULL); + } else { /* remote by pid */ + ASSERT(is_external_pid(mon->u.pid)); + dep = external_pid_dist_entry(mon->u.pid); ASSERT(dep != NULL); if (dep) { - erts_smp_de_links_lock(dep); + erts_de_links_lock(dep); rmon = erts_remove_monitor(&(dep->monitors), mon->ref); - erts_smp_de_links_unlock(dep); + erts_de_links_unlock(dep); if (rmon) { ErtsDSigData dsd; int code = erts_dsig_prepare(&dsd, dep, NULL, ERTS_DSP_NO_LOCK, 0); if (code == ERTS_DSIG_PREP_CONNECTED) { code = erts_dsig_send_demonitor(&dsd, - rmon->pid, - mon->pid, + rmon->u.pid, + mon->u.pid, mon->ref, 1); ASSERT(code == ERTS_DSIG_SEND_OK); @@ -11524,22 +12720,23 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) } } } - } else { /* type == MON_TARGET */ - ASSERT(mon->type == MON_TARGET); - ASSERT(is_pid(mon->pid) || is_internal_port(mon->pid)); - if (is_internal_port(mon->pid)) { - Port *prt = erts_id2port(mon->pid); + break; + case MON_TARGET: + ASSERT(is_pid(mon->u.pid) || is_internal_port(mon->u.pid)); + if (is_internal_port(mon->u.pid)) { + Port *prt = erts_id2port(mon->u.pid); if (prt == NULL) { goto done; } erts_fire_port_monitor(prt, mon->ref); erts_port_release(prt); - } else if (is_internal_pid(mon->pid)) {/* local by name or pid */ + } else if (is_internal_pid(mon->u.pid)) {/* local by name or pid */ Eterm watched; + Process *rp; DeclareTmpHeapNoproc(lhp,3); ErtsProcLocks rp_locks = (ERTS_PROC_LOCK_LINK | ERTS_PROC_LOCKS_MSG_SEND); - rp = erts_pid2proc(NULL, 0, mon->pid, rp_locks); + rp = erts_pid2proc(NULL, 0, mon->u.pid, rp_locks); if (rp == NULL) { goto done; } @@ -11556,25 +12753,25 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) } UnUseTmpHeapNoproc(3); /* else: demonitor while we exited, i.e. do nothing... */ - erts_smp_proc_unlock(rp, rp_locks); + erts_proc_unlock(rp, rp_locks); } else { /* external by pid or name */ - ASSERT(is_external_pid(mon->pid)); - dep = external_pid_dist_entry(mon->pid); + ASSERT(is_external_pid(mon->u.pid)); + dep = external_pid_dist_entry(mon->u.pid); ASSERT(dep != NULL); if (dep) { - erts_smp_de_links_lock(dep); + erts_de_links_lock(dep); rmon = erts_remove_monitor(&(dep->monitors), mon->ref); - erts_smp_de_links_unlock(dep); + erts_de_links_unlock(dep); if (rmon) { ErtsDSigData dsd; int code = erts_dsig_prepare(&dsd, dep, NULL, ERTS_DSP_NO_LOCK, 0); if (code == ERTS_DSIG_PREP_CONNECTED) { code = erts_dsig_send_m_exit(&dsd, - mon->pid, + mon->u.pid, (rmon->name != NIL ? rmon->name - : rmon->pid), + : rmon->u.pid), mon->ref, pcontext->reason); ASSERT(code == ERTS_DSIG_SEND_OK); @@ -11583,6 +12780,17 @@ static void doit_exit_monitor(ErtsMonitor *mon, void *vpcontext) } } } + break; + case MON_NIF_TARGET: + erts_fire_nif_monitor(mon->u.resource, + pcontext->p->common.id, + mon->ref); + break; + case MON_TIME_OFFSET: + erts_demonitor_time_offset(mon->ref); + break; + default: + ERTS_INTERNAL_ERROR("Invalid monitor type"); } done: /* As the monitors are previously removed from the process, @@ -11612,6 +12820,7 @@ static void doit_exit_link(ErtsLink *lnk, void *vpcontext) DistEntry *dep; Process *rp; + switch(lnk->type) { case LINK_PID: if(is_internal_port(item)) { @@ -11659,12 +12868,16 @@ static void doit_exit_link(ErtsLink *lnk, void *vpcontext) if (xres >= 0 && IS_TRACED_FL(rp, F_TRACE_PROCS)) { /* We didn't exit the process and it is traced */ if (IS_TRACED_FL(rp, F_TRACE_PROCS)) { - trace_proc(p, rp, am_getting_unlinked, p->common.id); + if (rp_locks & ERTS_PROC_LOCKS_XSIG_SEND) { + erts_proc_unlock(rp, ERTS_PROC_LOCKS_XSIG_SEND); + rp_locks &= ~ERTS_PROC_LOCKS_XSIG_SEND; + } + trace_proc(NULL, 0, rp, am_getting_unlinked, p->common.id); } } } ASSERT(rp != p); - erts_smp_proc_unlock(rp, rp_locks); + erts_proc_unlock(rp, rp_locks); } } else if (is_external_pid(item)) { @@ -11674,14 +12887,14 @@ static void doit_exit_link(ErtsLink *lnk, void *vpcontext) int code; ErtsDistLinkData dld; erts_remove_dist_link(&dld, p->common.id, item, dep); - erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN); + erts_proc_lock(p, ERTS_PROC_LOCK_MAIN); code = erts_dsig_prepare(&dsd, dep, p, ERTS_DSP_NO_LOCK, 0); if (code == ERTS_DSIG_PREP_CONNECTED) { code = erts_dsig_send_exit_tt(&dsd, p->common.id, item, reason, SEQ_TRACE_TOKEN(p)); ASSERT(code == ERTS_DSIG_SEND_OK); } - erts_smp_proc_unlock(p, ERTS_PROC_LOCK_MAIN); + erts_proc_unlock(p, ERTS_PROC_LOCK_MAIN); erts_destroy_dist_link(&dld); } } @@ -11692,17 +12905,16 @@ static void doit_exit_link(ErtsLink *lnk, void *vpcontext) if(dep) { /* dist entries have node links in a separate structure to avoid confusion */ - erts_smp_de_links_lock(dep); + erts_de_links_lock(dep); rlnk = erts_remove_link(&(dep->node_links), p->common.id); - erts_smp_de_links_unlock(dep); + erts_de_links_unlock(dep); if (rlnk) erts_destroy_link(rlnk); - erts_deref_dist_entry(dep); } break; default: - erl_exit(1, "bad type in link list\n"); + erts_exit(ERTS_ERROR_EXIT, "bad type in link list\n"); break; } erts_destroy_link(lnk); @@ -11714,9 +12926,10 @@ resume_suspend_monitor(ErtsSuspendMonitor *smon, void *vc_p) Process *suspendee = erts_pid2proc((Process *) vc_p, ERTS_PROC_LOCK_MAIN, smon->pid, ERTS_PROC_LOCK_STATUS); if (suspendee) { + ASSERT(suspendee != vc_p); if (smon->active) - resume_process(suspendee); - erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); + resume_process(suspendee, ERTS_PROC_LOCK_STATUS); + erts_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS); } erts_destroy_suspend_monitor(smon); } @@ -11728,6 +12941,7 @@ erts_do_exit_process(Process* p, Eterm reason) { p->arity = 0; /* No live registers */ p->fvalue = reason; + #ifdef USE_VM_PROBES if (DTRACE_ENABLED(process_exit)) { @@ -11740,18 +12954,17 @@ erts_do_exit_process(Process* p, Eterm reason) } #endif -#ifdef ERTS_SMP - ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); + if (p->static_flags & ERTS_STC_FLG_SYSTEM_PROC) + erts_exit(ERTS_DUMP_EXIT, "System process %T terminated: %T\n", + p->common.id, reason); + + ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); /* By locking all locks (main lock is already locked) when going to exiting state (ERTS_PSFLG_EXITING), it is enough to take any lock when looking up a process (erts_pid2proc()) to prevent the looked up process from exiting until the lock has been released. */ - erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); -#endif + erts_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); -#ifndef ERTS_SMP - set_proc_self_exiting(p); -#else if (ERTS_PSFLG_PENDING_EXIT & set_proc_self_exiting(p)) { /* Process exited before pending exit was received... */ p->pending_exit.reason = THE_NON_VALUE; @@ -11763,15 +12976,12 @@ erts_do_exit_process(Process* p, Eterm reason) cancel_suspend_of_suspendee(p, ERTS_PROC_LOCKS_ALL); - ERTS_SMP_MSGQ_MV_INQ2PRIVQ(p); -#endif + ERTS_MSGQ_MV_INQ2PRIVQ(p); if (IS_TRACED(p)) { if (IS_TRACED_FL(p, F_TRACE_CALLS)) erts_schedule_time_break(p, ERTS_BP_CALL_TIME_SCHEDULE_EXITING); - if (IS_TRACED_FL(p,F_TRACE_PROCS)) - trace_proc(p, p, am_exit, reason); } erts_trace_check_exiting(p->common.id); @@ -11779,20 +12989,24 @@ erts_do_exit_process(Process* p, Eterm reason) ASSERT((ERTS_TRACE_FLAGS(p) & F_INITIAL_TRACE_FLAGS) == F_INITIAL_TRACE_FLAGS); - cancel_timer(p); /* Always cancel timer just in case */ + ASSERT(erts_proc_read_refc(p) > 0); + if (ERTS_PTMR_IS_SET(p)) { + erts_cancel_proc_timer(p); + ASSERT(erts_proc_read_refc(p) > 0); + } + + erts_proc_unlock(p, ERTS_PROC_LOCKS_ALL_MINOR); - if (p->u.bif_timers) - erts_cancel_bif_timers(p, ERTS_PROC_LOCKS_ALL); + if (IS_TRACED_FL(p,F_TRACE_PROCS)) + trace_proc(p, ERTS_PROC_LOCK_MAIN, p, am_exit, reason); - erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL_MINOR); /* - * The p->u.bif_timers of this process can *not* be used anymore; + * p->u.initial of this process can *not* be used anymore; * will be overwritten by misc termination data. */ p->u.terminate = NULL; - erts_continue_exit_process(p); } @@ -11803,39 +13017,71 @@ erts_continue_exit_process(Process *p) ErtsMonitor *mon; ErtsProcLocks curr_locks = ERTS_PROC_LOCK_MAIN; Eterm reason = p->fvalue; - DistEntry *dep; - struct saved_calls *scb; - process_breakpoint_time_t *pbt; + DistEntry *dep = NULL; erts_aint32_t state; + int delay_del_proc = 0; #ifdef DEBUG int yield_allowed = 1; #endif - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); ASSERT(ERTS_PROC_IS_EXITING(p)); -#ifdef ERTS_SMP + ASSERT(erts_proc_read_refc(p) > 0); + if (p->bif_timers) { + if (erts_cancel_bif_timers(p, &p->bif_timers, &p->u.terminate)) { + ASSERT(erts_proc_read_refc(p) > 0); + goto yield; + } + ASSERT(erts_proc_read_refc(p) > 0); + p->bif_timers = NULL; + } + + if (p->flags & F_SCHDLR_ONLN_WAITQ) + abort_sched_onln_chng_waitq(p); + if (p->flags & F_HAVE_BLCKD_MSCHED) { ErtsSchedSuspendResult ssr; - ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 1); + ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 0, 1); + switch (ssr) { + case ERTS_SCHDLR_SSPND_YIELD_RESTART: + goto yield; + case ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE: + case ERTS_SCHDLR_SSPND_YIELD_DONE: + p->flags &= ~F_HAVE_BLCKD_MSCHED; + break; + case ERTS_SCHDLR_SSPND_EINVAL: + default: + erts_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error: %d\n", + __FILE__, __LINE__, (int) ssr); + } + } + if (p->flags & F_HAVE_BLCKD_NMSCHED) { + ErtsSchedSuspendResult ssr; + ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 1, 1); switch (ssr) { case ERTS_SCHDLR_SSPND_YIELD_RESTART: goto yield; case ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED: case ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED: case ERTS_SCHDLR_SSPND_DONE: case ERTS_SCHDLR_SSPND_YIELD_DONE: p->flags &= ~F_HAVE_BLCKD_MSCHED; break; case ERTS_SCHDLR_SSPND_EINVAL: default: - erl_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error: %d\n", + erts_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error: %d\n", __FILE__, __LINE__, (int) ssr); } } -#endif if (p->flags & F_USING_DB) { if (erts_db_process_exiting(p, ERTS_PROC_LOCK_MAIN)) @@ -11844,12 +13090,22 @@ erts_continue_exit_process(Process *p) } erts_set_gc_state(p, 1); - state = erts_smp_atomic32_read_acqb(&p->state); - if (state & ERTS_PSFLG_ACTIVE_SYS) { + state = erts_atomic32_read_acqb(&p->state); + if (state & ERTS_PSFLG_ACTIVE_SYS + || p->dirty_sys_tasks + ) { if (cleanup_sys_tasks(p, state, CONTEXT_REDS) >= CONTEXT_REDS/2) goto yield; } +#ifdef DEBUG + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); + ASSERT(p->sys_task_qs == NULL); + ASSERT(ERTS_PROC_GET_DELAYED_GC_TASK_QS(p) == NULL); + ASSERT(p->dirty_sys_tasks == NULL); + erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS); +#endif + if (p->flags & F_USING_DDLL) { erts_ddll_proc_dead(p, ERTS_PROC_LOCK_MAIN); p->flags &= ~F_USING_DDLL; @@ -11877,7 +13133,10 @@ erts_continue_exit_process(Process *p) ASSERT(!p->common.u.alive.reg); } - erts_smp_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); + if (IS_TRACED_FL(p, F_TRACE_SCHED_EXIT)) + trace_sched(p, curr_locks, am_out_exited); + + erts_proc_lock(p, ERTS_PROC_LOCKS_ALL_MINOR); curr_locks = ERTS_PROC_LOCKS_ALL; /* @@ -11898,23 +13157,21 @@ erts_continue_exit_process(Process *p) { /* Do *not* use erts_get_runq_proc() */ ErtsRunQueue *rq; - rq = erts_get_runq_current(ERTS_GET_SCHEDULER_DATA_FROM_PROC(p)); + rq = erts_get_runq_current(erts_proc_sched_data(p)); - erts_smp_runq_lock(rq); + erts_runq_lock(rq); -#ifdef ERTS_SMP ASSERT(p->scheduler_data); ASSERT(p->scheduler_data->current_process == p); ASSERT(p->scheduler_data->free_process == NULL); p->scheduler_data->current_process = NULL; p->scheduler_data->free_process = p; -#endif /* Time of death! */ erts_ptab_delete_element(&erts_proc, &p->common); - erts_smp_runq_unlock(rq); + erts_runq_unlock(rq); } /* @@ -11926,47 +13183,45 @@ erts_continue_exit_process(Process *p) { /* Inactivate and notify free */ - erts_aint32_t n, e, a = erts_smp_atomic32_read_nob(&p->state); -#ifdef ERTS_SMP + erts_aint32_t n, e, a = erts_atomic32_read_nob(&p->state); int refc_inced = 0; -#endif while (1) { n = e = a; ASSERT(a & ERTS_PSFLG_EXITING); n |= ERTS_PSFLG_FREE; n &= ~ERTS_PSFLG_ACTIVE; -#ifdef ERTS_SMP if ((n & ERTS_PSFLG_IN_RUNQ) && !refc_inced) { - erts_smp_proc_inc_refc(p); + erts_proc_inc_refc(p); refc_inced = 1; } -#endif - a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); + a = erts_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; } -#ifdef ERTS_SMP + if (a & (ERTS_PSFLG_DIRTY_RUNNING + | ERTS_PSFLG_DIRTY_RUNNING_SYS)) { + p->flags |= F_DELAYED_DEL_PROC; + delay_del_proc = 1; + /* + * The dirty scheduler decrease refc + * when done with the process... + */ + } + if (refc_inced && !(n & ERTS_PSFLG_IN_RUNQ)) - erts_smp_proc_dec_refc(p); -#endif + erts_proc_dec_refc(p); } - + dep = ((p->flags & F_DISTRIBUTION) - ? ERTS_PROC_SET_DIST_ENTRY(p, ERTS_PROC_LOCKS_ALL, NULL) - : NULL); - scb = ERTS_PROC_SET_SAVED_CALLS_BUF(p, ERTS_PROC_LOCKS_ALL, NULL); - pbt = ERTS_PROC_SET_CALL_TIME(p, ERTS_PROC_LOCKS_ALL, NULL); - - erts_smp_proc_unlock(p, ERTS_PROC_LOCKS_ALL); -#ifdef BM_COUNTERS - processes_busy--; -#endif + ? ERTS_PROC_SET_DIST_ENTRY(p, NULL) + : NULL); + + erts_proc_unlock(p, ERTS_PROC_LOCKS_ALL); if (dep) { - erts_do_net_exits(dep, reason); - if(dep) - erts_deref_dist_entry(dep); + erts_do_net_exits(dep, (reason == am_kill) ? am_killed : reason); + erts_deref_dist_entry(dep); } /* @@ -11998,18 +13253,15 @@ erts_continue_exit_process(Process *p) have none here */ } - if (scb) - erts_free(ERTS_ALC_T_CALLS_BUF, (void *) scb); + erts_proc_lock(p, ERTS_PROC_LOCK_MAIN); + ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); - if (pbt) - erts_free(ERTS_ALC_T_BPD, (void *) pbt); + erts_flush_trace_messages(p, ERTS_PROC_LOCK_MAIN); - delete_process(p); + ERTS_TRACER_CLEAR(&ERTS_TRACER(p)); -#ifdef ERTS_SMP - erts_smp_proc_lock(p, ERTS_PROC_LOCK_MAIN); - ERTS_SMP_CHK_HAVE_ONLY_MAIN_PROC_LOCK(p); -#endif + if (!delay_del_proc) + delete_process(p); return; @@ -12019,79 +13271,22 @@ erts_continue_exit_process(Process *p) ASSERT(yield_allowed); #endif - ERTS_SMP_LC_ASSERT(curr_locks == erts_proc_lc_my_proc_locks(p)); - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & curr_locks); + ERTS_LC_ASSERT(curr_locks == erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN & curr_locks); p->i = (BeamInstr *) beam_continue_exit; if (!(curr_locks & ERTS_PROC_LOCK_STATUS)) { - erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + erts_proc_lock(p, ERTS_PROC_LOCK_STATUS); curr_locks |= ERTS_PROC_LOCK_STATUS; } if (curr_locks != ERTS_PROC_LOCK_MAIN) - erts_smp_proc_unlock(p, ~ERTS_PROC_LOCK_MAIN & curr_locks); - - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); - -} - -/* Callback for process timeout */ -static void -timeout_proc(Process* p) -{ - erts_aint32_t state; - BeamInstr** pi = (BeamInstr **) p->def_arg_reg; - p->i = *pi; - p->flags |= F_TIMO; - p->flags &= ~F_INSLPQUEUE; - - state = erts_smp_atomic32_read_acqb(&p->state); - if (!(state & ERTS_PSFLG_ACTIVE)) - schedule_process(p, state); -} - - -void -cancel_timer(Process* p) -{ - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(p)); - p->flags &= ~(F_INSLPQUEUE|F_TIMO); -#ifdef ERTS_SMP - erts_cancel_smp_ptimer(p->common.u.alive.ptimer); -#else - erts_cancel_timer(&p->common.u.alive.tm); -#endif -} + erts_proc_unlock(p, ~ERTS_PROC_LOCK_MAIN & curr_locks); -/* - * Insert a process into the time queue, with a timeout 'timeout' in ms. - */ -void -set_timer(Process* p, Uint timeout) -{ - ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(p)); + ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(p)); - /* check for special case timeout=0 DONT ADD TO time queue */ - if (timeout == 0) { - p->flags |= F_TIMO; - return; - } - p->flags |= F_INSLPQUEUE; - p->flags &= ~F_TIMO; - -#ifdef ERTS_SMP - erts_create_smp_ptimer(&p->common.u.alive.ptimer, - p->common.id, - (ErlTimeoutProc) timeout_proc, - timeout); -#else - erts_set_timer(&p->common.u.alive.tm, - (ErlTimeoutProc) timeout_proc, - NULL, - (void*) p, - timeout); -#endif + BUMP_ALL_REDS(p); } /* @@ -12099,7 +13294,7 @@ set_timer(Process* p, Uint timeout) */ void -erts_stack_dump(int to, void *to_arg, Process *p) +erts_stack_dump(fmtfn_t to, void *to_arg, Process *p) { Eterm* sp; int yreg = -1; @@ -12109,12 +13304,12 @@ erts_stack_dump(int to, void *to_arg, Process *p) } erts_program_counter_info(to, to_arg, p); for (sp = p->stop; sp < STACK_START(p); sp++) { - yreg = stack_element_dump(to, to_arg, p, sp, yreg); + yreg = stack_element_dump(to, to_arg, sp, yreg); } } void -erts_program_counter_info(int to, void *to_arg, Process *p) +erts_program_counter_info(fmtfn_t to, void *to_arg, Process *p) { erts_aint32_t state; int i; @@ -12125,7 +13320,7 @@ erts_program_counter_info(int to, void *to_arg, Process *p) erts_print(to, to_arg, "CP: %p (", p->cp); print_function_from_pc(to, to_arg, p->cp); erts_print(to, to_arg, ")\n"); - state = erts_smp_atomic32_read_acqb(&p->state); + state = erts_atomic32_read_acqb(&p->state); if (!(state & (ERTS_PSFLG_RUNNING | ERTS_PSFLG_RUNNING_SYS | ERTS_PSFLG_GC))) { @@ -12144,10 +13339,10 @@ erts_program_counter_info(int to, void *to_arg, Process *p) } static void -print_function_from_pc(int to, void *to_arg, BeamInstr* x) +print_function_from_pc(fmtfn_t to, void *to_arg, BeamInstr* x) { - BeamInstr* addr = find_function_from_pc(x); - if (addr == NULL) { + ErtsCodeMFA *cmfa = find_function_from_pc(x); + if (cmfa == NULL) { if (x == beam_exit) { erts_print(to, to_arg, "<terminate process>"); } else if (x == beam_continue_exit) { @@ -12161,12 +13356,13 @@ print_function_from_pc(int to, void *to_arg, BeamInstr* x) } } else { erts_print(to, to_arg, "%T:%T/%d + %d", - addr[0], addr[1], addr[2], ((x-addr)-2) * sizeof(Eterm)); + cmfa->module, cmfa->function, cmfa->arity, + (x-(BeamInstr*)cmfa) * sizeof(Eterm)); } } static int -stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg) +stack_element_dump(fmtfn_t to, void *to_arg, Eterm* sp, int yreg) { Eterm x = *sp; @@ -12180,7 +13376,7 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg) } if (is_CP(x)) { - erts_print(to, to_arg, "Return addr %p (", (Eterm *) EXPAND_POINTER(x)); + erts_print(to, to_arg, "Return addr %p (", (Eterm *) x); print_function_from_pc(to, to_arg, cp_val(x)); erts_print(to, to_arg, ")\n"); yreg = 0; @@ -12195,48 +13391,260 @@ stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp, int yreg) } /* + * Print scheduler information + */ +void +erts_print_scheduler_info(fmtfn_t to, void *to_arg, ErtsSchedulerData *esdp) { + int i; + erts_aint32_t flg; + Process *p; + + erts_print(to, to_arg, "=scheduler:%u\n", esdp->no); + + flg = erts_atomic32_read_dirty(&esdp->ssi->flags); + erts_print(to, to_arg, "Scheduler Sleep Info Flags: "); + for (i = 0; i < ERTS_SSI_FLGS_MAX && flg; i++) { + erts_aint32_t chk = (1 << i); + if (flg & chk) { + switch (chk) { + case ERTS_SSI_FLG_SLEEPING: + erts_print(to, to_arg, "SLEEPING"); break; + case ERTS_SSI_FLG_POLL_SLEEPING: + erts_print(to, to_arg, "POLL_SLEEPING"); break; + case ERTS_SSI_FLG_TSE_SLEEPING: + erts_print(to, to_arg, "TSE_SLEEPING"); break; + case ERTS_SSI_FLG_WAITING: + erts_print(to, to_arg, "WAITING"); break; + case ERTS_SSI_FLG_SUSPENDED: + erts_print(to, to_arg, "SUSPENDED"); break; + case ERTS_SSI_FLG_MSB_EXEC: + erts_print(to, to_arg, "MSB_EXEC"); break; + default: + erts_print(to, to_arg, "UNKNOWN(%d)", flg); break; + } + if (flg > chk) + erts_print(to, to_arg, " | "); + flg -= chk; + } + } + erts_print(to, to_arg, "\n"); + + flg = erts_atomic32_read_dirty(&esdp->ssi->aux_work); + erts_print(to, to_arg, "Scheduler Sleep Info Aux Work: "); + for (i = 0; i < ERTS_SSI_AUX_WORK_NO_FLAGS && flg; i++) { + erts_aint32_t chk = (1 << i); + if (flg & chk) { + if (erts_aux_work_flag_descr[i]) + erts_print(to, to_arg, "%s", erts_aux_work_flag_descr[i]); + else + erts_print(to, to_arg, "1<<%d", i); + if (flg > chk) + erts_print(to, to_arg, " | "); + flg -= chk; + } + } + erts_print(to, to_arg, "\n"); + + erts_print(to, to_arg, "Current Port: "); + if (esdp->current_port) + erts_print(to, to_arg, "%T", esdp->current_port->common.id); + erts_print(to, to_arg, "\n"); + + for (i = 0; i < ERTS_NO_PROC_PRIO_LEVELS; i++) { + erts_print(to, to_arg, "Run Queue "); + switch (i) { + case PRIORITY_MAX: + erts_print(to, to_arg, "Max "); + break; + case PRIORITY_HIGH: + erts_print(to, to_arg, "High "); + break; + case PRIORITY_NORMAL: + erts_print(to, to_arg, "Normal "); + break; + case PRIORITY_LOW: + erts_print(to, to_arg, "Low "); + break; + default: + erts_print(to, to_arg, "Unknown "); + break; + } + erts_print(to, to_arg, "Length: %d\n", + erts_atomic32_read_dirty(&esdp->run_queue->procs.prio_info[i].len)); + } + erts_print(to, to_arg, "Run Queue Port Length: %d\n", + erts_atomic32_read_dirty(&esdp->run_queue->ports.info.len)); + + flg = erts_atomic32_read_dirty(&esdp->run_queue->flags); + erts_print(to, to_arg, "Run Queue Flags: "); + for (i = 0; i < ERTS_RUNQ_FLG_MAX && flg; i++) { + erts_aint32_t chk = (1 << i); + if (flg & chk) { + switch (chk) { + case (1 << PRIORITY_MAX): + erts_print(to, to_arg, "NONEMPTY_MAX"); break; + case (1 << PRIORITY_HIGH): + erts_print(to, to_arg, "NONEMPTY_HIGH"); break; + case (1 << PRIORITY_NORMAL): + erts_print(to, to_arg, "NONEMPTY_NORMAL"); break; + case (1 << PRIORITY_LOW): + erts_print(to, to_arg, "NONEMPTY_LOW"); break; + case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)): + erts_print(to, to_arg, "EMIGRATE_MAX"); break; + case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)): + erts_print(to, to_arg, "EMIGRATE_HIGH"); break; + case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)): + erts_print(to, to_arg, "EMIGRATE_NORMAL"); break; + case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_EMIGRATE_SHFT)): + erts_print(to, to_arg, "EMIGRATE_LOW"); break; + case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)): + erts_print(to, to_arg, "IMMIGRATE_MAX"); break; + case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)): + erts_print(to, to_arg, "IMMIGRATE_HIGH"); break; + case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)): + erts_print(to, to_arg, "IMMIGRATE_NORMAL"); break; + case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_IMMIGRATE_SHFT)): + erts_print(to, to_arg, "IMMIGRATE_LOW"); break; + case (1 << (PRIORITY_MAX + ERTS_RUNQ_FLGS_EVACUATE_SHFT)): + erts_print(to, to_arg, "EVACUATE_MAX"); break; + case (1 << (PRIORITY_HIGH + ERTS_RUNQ_FLGS_EVACUATE_SHFT)): + erts_print(to, to_arg, "EVACUATE_HIGH"); break; + case (1 << (PRIORITY_NORMAL + ERTS_RUNQ_FLGS_EVACUATE_SHFT)): + erts_print(to, to_arg, "EVACUATE_NORMAL"); break; + case (1 << (PRIORITY_LOW + ERTS_RUNQ_FLGS_EVACUATE_SHFT)): + erts_print(to, to_arg, "EVACUATE_LOW"); break; + case ERTS_RUNQ_FLG_OUT_OF_WORK: + erts_print(to, to_arg, "OUT_OF_WORK"); break; + case ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK: + erts_print(to, to_arg, "HALFTIME_OUT_OF_WORK"); break; + case ERTS_RUNQ_FLG_SUSPENDED: + erts_print(to, to_arg, "SUSPENDED"); break; + case ERTS_RUNQ_FLG_CHK_CPU_BIND: + erts_print(to, to_arg, "CHK_CPU_BIND"); break; + case ERTS_RUNQ_FLG_INACTIVE: + erts_print(to, to_arg, "INACTIVE"); break; + case ERTS_RUNQ_FLG_NONEMPTY: + erts_print(to, to_arg, "NONEMPTY"); break; + case ERTS_RUNQ_FLG_PROTECTED: + erts_print(to, to_arg, "PROTECTED"); break; + case ERTS_RUNQ_FLG_EXEC: + erts_print(to, to_arg, "EXEC"); break; + case ERTS_RUNQ_FLG_MSB_EXEC: + erts_print(to, to_arg, "MSB_EXEC"); break; + case ERTS_RUNQ_FLG_MISC_OP: + erts_print(to, to_arg, "MISC_OP"); break; + default: + erts_print(to, to_arg, "UNKNOWN(%d)", flg); break; + } + if (flg > chk) + erts_print(to, to_arg, " | "); + flg -= chk; + } + } + erts_print(to, to_arg, "\n"); + + /* This *MUST* to be the last information in scheduler block */ + p = esdp->current_process; + erts_print(to, to_arg, "Current Process: "); + if (esdp->current_process && !(ERTS_TRACE_FLAGS(p) & F_SENSITIVE)) { + flg = erts_atomic32_read_dirty(&p->state); + erts_print(to, to_arg, "%T\n", p->common.id); + + erts_print(to, to_arg, "Current Process State: "); + erts_dump_process_state(to, to_arg, flg); + + erts_print(to, to_arg, "Current Process Internal State: "); + erts_dump_extended_process_state(to, to_arg, flg); + + erts_print(to, to_arg, "Current Process Program counter: %p (", p->i); + print_function_from_pc(to, to_arg, p->i); + erts_print(to, to_arg, ")\n"); + erts_print(to, to_arg, "Current Process CP: %p (", p->cp); + print_function_from_pc(to, to_arg, p->cp); + erts_print(to, to_arg, ")\n"); + + /* Getting this stacktrace can segfault if we are very very + unlucky if called while a process is being garbage collected. + Therefore we only call this on other schedulers if we either + have protection against segfaults, or we know that the process + is not garbage collecting. It *should* always be safe to call + on a process owned by us, even if it is currently being garbage + collected. + */ + erts_print(to, to_arg, "Current Process Limited Stack Trace:\n"); + erts_limited_stack_trace(to, to_arg, p); + } else + erts_print(to, to_arg, "\n"); + +} + +/* * A nice system halt closing all open port goes as follows: * 1) This function schedules the aux work ERTS_SSI_AUX_WORK_REAP_PORTS * on all schedulers, then schedules itself out. - * 2) All shedulers detect this and set the flag halt_in_progress + * 2) All shedulers detect this and set the flag ERTS_RUNQ_FLG_HALTING * on their run queue. The last scheduler sets all non-closed ports * ERTS_PORT_SFLG_HALT. Global atomic erts_halt_progress is used * as refcount to determine which is last. - * 3) While the run ques has flag halt_in_progress no processes + * 3) While the run queues has flag ERTS_RUNQ_FLG_HALTING no processes * will be scheduled, only ports. * 4) When the last port closes that scheduler calls erlang:halt/1. * The same global atomic is used as refcount. * * A BIF that calls this should make sure to schedule out to never come back: - * erl_halt((int)(- code)); + * erts_halt(code); * ERTS_BIF_YIELD1(bif_export[BIF_erlang_halt_1], BIF_P, NIL); */ -void erl_halt(int code) +void erts_halt(int code) { - if (-1 == erts_smp_atomic32_cmpxchg_acqb(&erts_halt_progress, + if (-1 == erts_atomic32_cmpxchg_acqb(&erts_halt_progress, erts_no_schedulers, -1)) { -#ifdef ERTS_DIRTY_SCHEDULERS - ERTS_DIRTY_CPU_RUNQ->halt_in_progress = 1; - ERTS_DIRTY_IO_RUNQ->halt_in_progress = 1; -#endif + ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_CPU_RUNQ, ERTS_RUNQ_FLG_HALTING); + ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_IO_RUNQ, ERTS_RUNQ_FLG_HALTING); erts_halt_code = code; notify_reap_ports_relb(); } } -#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) +#if defined(ERTS_ENABLE_LOCK_CHECK) int erts_dbg_check_halloc_lock(Process *p) { + ErtsSchedulerData *esdp; if (ERTS_PROC_LOCK_MAIN & erts_proc_lc_my_proc_locks(p)) return 1; + if ((p->static_flags & ERTS_STC_FLG_SHADOW_PROC) + && ERTS_SCHEDULER_IS_DIRTY(erts_get_scheduler_data())) + return 1; if (p->common.id == ERTS_INVALID_PID) return 1; - if (p->scheduler_data && p == p->scheduler_data->match_pseudo_process) + esdp = erts_proc_sched_data(p); + if (esdp && p == esdp->match_pseudo_process) return 1; if (erts_thr_progress_is_blocking()) return 1; return 0; } #endif + +void +erts_debug_later_op_foreach(void (*callback)(void*), + void (*func)(void *, ErtsThrPrgrVal, void *), + void *arg) +{ + int six; + if (!erts_thr_progress_is_blocking()) + ERTS_INTERNAL_ERROR("Not blocking thread progress"); + + for (six = 0; six < erts_no_schedulers; six++) { + ErtsSchedulerData *esdp = &erts_aligned_scheduler_data[six].esd; + ErtsThrPrgrLaterOp *lop = esdp->aux_work_data.later_op.first; + + while (lop) { + if (lop->func == callback) + func(arg, lop->later, lop->data); + lop = lop->next; + } + } +} |