diff options
-rw-r--r-- | erts/configure.in | 2 | ||||
-rw-r--r-- | erts/doc/src/erlang.xml | 33 | ||||
-rw-r--r-- | erts/emulator/beam/atom.names | 1 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 6 | ||||
-rw-r--r-- | erts/emulator/beam/bif.c | 33 | ||||
-rw-r--r-- | erts/emulator/beam/erl_drv_nif.h | 7 | ||||
-rw-r--r-- | erts/emulator/beam/erl_init.c | 25 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.c | 24 | ||||
-rw-r--r-- | erts/emulator/beam/erl_nif.h | 4 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 1363 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 46 | ||||
-rw-r--r-- | erts/emulator/test/scheduler_SUITE.erl | 95 | ||||
-rw-r--r-- | erts/etc/common/erlexec.c | 10 |
13 files changed, 1430 insertions, 219 deletions
diff --git a/erts/configure.in b/erts/configure.in index 02e5d12918..51e2e99c88 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -1243,6 +1243,7 @@ if test $emu_threads != yes; then AC_MSG_CHECKING(whether dirty schedulers should be enabled) if test "x$enable_dirty_schedulers" != "xno"; then AC_DEFINE(ERL_NIF_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) + AC_DEFINE(ERL_DRV_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) AC_MSG_RESULT(yes) else AC_MSG_RESULT(no) @@ -1271,6 +1272,7 @@ else EMU_THR_DEFS="$EMU_THR_DEFS -DERTS_DIRTY_SCHEDULERS" AC_DEFINE(ERTS_DIRTY_SCHEDULERS, 1, [Define if the emulator supports dirty schedulers]) AC_DEFINE(ERL_NIF_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) + AC_DEFINE(ERL_DRV_DIRTY_SCHEDULER_SUPPORT, 1, [Dirty scheduler support]) AC_MSG_RESULT(yes) else AC_MSG_RESULT(no) diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml index 0f312fb430..e440a3d270 100644 --- a/erts/doc/src/erlang.xml +++ b/erts/doc/src/erlang.xml @@ -5203,9 +5203,16 @@ ok <c>erlang:system_info(schedulers_online)</c>. </p> <p>Returns the old value of the flag.</p> - <p><em>Note that the dirty schedulers functionality is experimental</em>, and - that you have to enable support for dirty schedulers when building OTP in - order to try the functionality out.</p> + <p>Note that the number of dirty CPU schedulers online may change if the number of + schedulers online changes. For example, if there are 12 schedulers and all are + online, and 6 dirty CPU schedulers, all online as well, and <c>system_flag/2</c> + is used to set the number of schedulers online to 6, then the number of dirty + CPU schedulers online is automatically decreased by half as well, down to 3. + Similarly, the number of dirty CPU schedulers online increases proportionally + to increases in the number of schedulers online.</p> + <p><em>Note that the dirty schedulers functionality is experimental</em>, and + that you have to enable support for dirty schedulers when building OTP in order + to try out the functionality.</p> <p>For more information see <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso> and @@ -5438,6 +5445,15 @@ ok <![CDATA[1 <= SchedulersOnline <= erlang:system_info(schedulers)]]>. </p> <p>Returns the old value of the flag.</p> + <p>Note that if the emulator was built with support for <seealso + marker="#system_flag_dirty_cpu_schedulers_online">dirty schedulers</seealso>, + changing the number of schedulers online can also change the number of dirty + CPU schedulers online. For example, if there are 12 schedulers and all are + online, and 6 dirty CPU schedulers, all online as well, and <c>system_flag/2</c> + is used to set the number of schedulers online to 6, then the number of dirty + CPU schedulers online is automatically decreased by half as well, down to 3. + Similarly, the number of dirty CPU schedulers online increases proportionally + to increases in the number of schedulers online.</p> <p>For more information see, <seealso marker="#system_info_schedulers">erlang:system_info(schedulers)</seealso>, and @@ -5817,12 +5833,13 @@ ok boot time and cannot be changed after that. The number of dirty CPU scheduler threads online can however be changed at any time. The number of dirty CPU schedulers can be set on startup by passing - the <seealso marker="erts:erl#+SDcpu">+SDcpu</seealso> command line flag, see - <seealso marker="erts:erl#+SDcpu">erl(1)</seealso>. + the <seealso marker="erts:erl#+SDcpu">+SDcpu</seealso> or + <seealso marker="erts:erl#+SDPcpu">+SDPcpu</seealso> command line flags, + see <seealso marker="erts:erl#+SDcpu">erl(1)</seealso>. </p> <p><em>Note that the dirty schedulers functionality is experimental</em>, and that you have to enable support for dirty schedulers when building OTP in - order to try the functionality out.</p> + order to try out the functionality.</p> <p>See also <seealso marker="#system_flag_dirty_cpu_schedulers_online">erlang:system_flag(dirty_cpu_schedulers_online, DirtyCPUSchedulersOnline)</seealso>, <seealso marker="#system_info_dirty_cpu_schedulers_online">erlang:system_info(dirty_cpu_schedulers_online)</seealso>, <seealso marker="#system_info_dirty_io_schedulers">erlang:system_info(dirty_io_schedulers)</seealso>, @@ -5844,7 +5861,7 @@ ok </p> <p><em>Note that the dirty schedulers functionality is experimental</em>, and that you have to enable support for dirty schedulers when building OTP in - order to try the functionality out.</p> + order to try out the functionality.</p> <p>For more information, see <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso>, <seealso marker="#system_info_dirty_io_schedulers">erlang:system_info(dirty_io_schedulers)</seealso>, @@ -5864,7 +5881,7 @@ ok </p> <p><em>Note that the dirty schedulers functionality is experimental</em>, and that you have to enable support for dirty schedulers when building OTP in - order to try the functionality out.</p> + order to try out the functionality.</p> <p>For more information, see <seealso marker="#system_info_dirty_cpu_schedulers">erlang:system_info(dirty_cpu_schedulers)</seealso>, <seealso marker="#system_info_dirty_cpu_schedulers_online">erlang:system_info(dirty_cpu_schedulers_online)</seealso>, and diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 96547ba743..d28e519ae1 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -179,6 +179,7 @@ atom dexit atom depth atom dgroup_leader atom dictionary +atom dirty_cpu_schedulers_online atom disable_trace atom disabled atom display_items diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 6f295530b9..80c1265541 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1203,7 +1203,11 @@ void process_main(void) if (start_time != 0) { Sint64 diff = erts_timestamp_millis() - start_time; - if (diff > 0 && (Uint) diff > erts_system_monitor_long_schedule) { + if (diff > 0 && (Uint) diff > erts_system_monitor_long_schedule +#ifdef ERTS_DIRTY_SCHEDULERS + && !ERTS_SCHEDULER_IS_DIRTY(c_p->scheduler_data) +#endif + ) { BeamInstr *inptr = find_function_from_pc(start_time_i); BeamInstr *outptr = find_function_from_pc(c_p->i); monitor_long_schedule_proc(c_p,inptr,outptr,(Uint) diff); diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 9c4801041f..f34045a589 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -4333,7 +4333,11 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) switch (erts_set_schedulers_online(BIF_P, ERTS_PROC_LOCK_MAIN, signed_val(BIF_ARG_2), - &old_no)) { + &old_no +#ifdef ERTS_DIRTY_SCHEDULERS + , 0 +#endif + )) { case ERTS_SCHDLR_SSPND_DONE: BIF_RET(make_small(old_no)); case ERTS_SCHDLR_SSPND_YIELD_RESTART: @@ -4465,6 +4469,33 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) ref, old ? am_true : am_false); } +#if defined(ERTS_SMP) && defined(ERTS_DIRTY_SCHEDULERS) + } else if (BIF_ARG_1 == am_dirty_cpu_schedulers_online) { + Sint old_no; + if (!is_small(BIF_ARG_2)) + goto error; + switch (erts_set_schedulers_online(BIF_P, + ERTS_PROC_LOCK_MAIN, + signed_val(BIF_ARG_2), + &old_no, + 1)) { + case ERTS_SCHDLR_SSPND_DONE: + BIF_RET(make_small(old_no)); + case ERTS_SCHDLR_SSPND_YIELD_RESTART: + ERTS_VBUMP_ALL_REDS(BIF_P); + BIF_TRAP2(bif_export[BIF_system_flag_2], + BIF_P, BIF_ARG_1, BIF_ARG_2); + case ERTS_SCHDLR_SSPND_YIELD_DONE: + ERTS_BIF_YIELD_RETURN_X(BIF_P, make_small(old_no), + am_dirty_cpu_schedulers_online); + case ERTS_SCHDLR_SSPND_EINVAL: + goto error; + default: + ASSERT(0); + BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); + break; + } +#endif } else if (ERTS_IS_ATOM_STR("scheduling_statistics", BIF_ARG_1)) { int what; if (ERTS_IS_ATOM_STR("disable", BIF_ARG_2)) diff --git a/erts/emulator/beam/erl_drv_nif.h b/erts/emulator/beam/erl_drv_nif.h index ea013a49a3..3f829ea7ea 100644 --- a/erts/emulator/beam/erl_drv_nif.h +++ b/erts/emulator/beam/erl_drv_nif.h @@ -41,6 +41,13 @@ typedef struct { int suggested_stack_size; } ErlDrvThreadOpts; +#if defined(ERL_DRV_DIRTY_SCHEDULER_SUPPORT) || defined(ERL_NIF_DIRTY_SCHEDULER_SUPPORT) +typedef enum { + ERL_DRV_DIRTY_JOB_CPU_BOUND = 1, + ERL_DRV_DIRTY_JOB_IO_BOUND = 2 +} ErlDrvDirtyJobFlags; +#endif + #endif /* __ERL_DRV_NIF_H__ */ diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index c17256f466..5bdebc493c 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -178,6 +178,11 @@ int erts_compat_rel; static int no_schedulers; static int no_schedulers_online; +#ifdef ERTS_DIRTY_SCHEDULERS +static int no_dirty_cpu_schedulers; +static int no_dirty_cpu_schedulers_online; +static int no_dirty_io_schedulers; +#endif #ifdef DEBUG Uint32 verbose; /* See erl_debug.h for information about verbose */ @@ -304,7 +309,13 @@ erl_init(int ncpu, erts_init_sys_common_misc(); erts_init_process(ncpu, proc_tab_sz, legacy_proc_tab); erts_init_scheduling(no_schedulers, - no_schedulers_online); + no_schedulers_online +#ifdef ERTS_DIRTY_SCHEDULERS + , no_dirty_cpu_schedulers, + no_dirty_cpu_schedulers_online, + no_dirty_io_schedulers +#endif + ); erts_init_cpu_topology(); /* Must be after init_scheduling */ erts_init_gc(); /* Must be after init_scheduling */ erts_alloc_late_init(); @@ -835,7 +846,7 @@ early_init(int *argc, char **argv) /* else if (argv[i][2] == 'D') { char *arg; char *type = argv[i]+3; - if (strcmp(type, "Pcpu") == 0) { + if (strncmp(type, "Pcpu", 4) == 0) { int ptot, ponln; arg = get_arg(argv[i]+7, argv[i+1], &i); switch (sscanf(arg, "%d:%d", &ptot, &ponln)) { @@ -872,7 +883,7 @@ early_init(int *argc, char **argv) /* VERBOSE(DEBUG_SYSTEM, ("using %d:%d dirty CPU scheduler percentages\n", dirty_cpu_scheds_pctg, dirty_cpu_scheds_onln_pctg)); - } else if (strcmp(type, "cpu") == 0) { + } else if (strncmp(type, "cpu", 3) == 0) { int tot, onln; arg = get_arg(argv[i]+6, argv[i+1], &i); switch (sscanf(arg, "%d:%d", &tot, &onln)) { @@ -923,7 +934,7 @@ early_init(int *argc, char **argv) /* } VERBOSE(DEBUG_SYSTEM, ("using %d:%d dirty CPU scheduler(s)\n", tot, onln)); - } else if (strcmp(type, "io") == 0) { + } else if (strncmp(type, "io", 2) == 0) { arg = get_arg(argv[i]+5, argv[i+1], &i); dirty_io_scheds = atoi(arg); if (dirty_io_scheds < 0 || @@ -1055,9 +1066,9 @@ early_init(int *argc, char **argv) /* erts_no_schedulers = (Uint) no_schedulers; #endif #ifdef ERTS_DIRTY_SCHEDULERS - erts_no_dirty_cpu_schedulers = dirty_cpu_scheds; - erts_no_dirty_cpu_schedulers_online = dirty_cpu_scheds_online; - erts_no_dirty_io_schedulers = dirty_io_scheds; + erts_no_dirty_cpu_schedulers = no_dirty_cpu_schedulers = dirty_cpu_scheds; + no_dirty_cpu_schedulers_online = dirty_cpu_scheds_online; + erts_no_dirty_io_schedulers = no_dirty_io_schedulers = dirty_io_scheds; #endif erts_early_init_scheduling(no_schedulers); diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c index 4126df6f34..63e0d6d0c2 100644 --- a/erts/emulator/beam/erl_nif.c +++ b/erts/emulator/beam/erl_nif.c @@ -1570,6 +1570,13 @@ enif_schedule_dirty_nif(ErlNifEnv* env, int flags, a = erts_smp_atomic32_read_acqb(&proc->state); while (1) { n = state = a; + /* + * clear any current dirty flags and dirty queue indicators, + * in case the application is shifting a job from one type + * of dirty scheduler to the other + */ + n &= ~(ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC + |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q); if (chkflgs == ERL_NIF_DIRTY_JOB_CPU_BOUND) n |= ERTS_PSFLG_DIRTY_CPU_PROC; else @@ -1600,22 +1607,15 @@ enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result, ERL_NIF_TERM (*fp)(ErlNifEnv*, ERL_NIF_TERM)) { #ifdef USE_THREADS - erts_aint32_t state, n, a; Process* proc = env->proc; Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array; Export* ep; - a = erts_smp_atomic32_read_acqb(&proc->state); - while (1) { - n = state = a; - if (!(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q))) - break; - n &= ~(ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC - |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q); - a = erts_smp_atomic32_cmpxchg_mb(&proc->state, n, state); - if (a == state) - break; - } + erts_smp_atomic32_read_band_mb(&proc->state, + ~(ERTS_PSFLG_DIRTY_CPU_PROC + |ERTS_PSFLG_DIRTY_IO_PROC + |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q + |ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)); if (!(ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc))) alloc_proc_psd(proc, &ep); ERTS_VBUMP_ALL_REDS(proc); diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h index 7613446f64..c12ba4d554 100644 --- a/erts/emulator/beam/erl_nif.h +++ b/erts/emulator/beam/erl_nif.h @@ -172,8 +172,8 @@ typedef ErlDrvThreadOpts ErlNifThreadOpts; #ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT typedef enum { - ERL_NIF_DIRTY_JOB_CPU_BOUND = 1, - ERL_NIF_DIRTY_JOB_IO_BOUND = 2 + ERL_NIF_DIRTY_JOB_CPU_BOUND = ERL_DRV_DIRTY_JOB_CPU_BOUND, + ERL_NIF_DIRTY_JOB_IO_BOUND = ERL_DRV_DIRTY_JOB_IO_BOUND }ErlNifDirtyTaskFlags; #endif diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 937881212a..1d0275c524 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -148,7 +148,6 @@ int erts_sched_balance_util = 0; Uint erts_no_schedulers; #ifdef ERTS_DIRTY_SCHEDULERS Uint erts_no_dirty_cpu_schedulers; -Uint erts_no_dirty_cpu_schedulers_online; Uint erts_no_dirty_io_schedulers; #endif @@ -188,6 +187,13 @@ static ErtsAuxWorkData *aux_thread_aux_work_data; #define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ erts_smp_atomic32_set_nob(&schdlr_sspnd.changing, (VAL)) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ + erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_cpu_changing, (VAL)) +#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ + erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_io_changing, (VAL)) +#endif + #else #define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ @@ -198,6 +204,23 @@ do { \ ASSERT(old_val__ == (OLD_VAL)); \ } while (0) +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ +do { \ + erts_aint32_t old_val__; \ + old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_cpu_changing, \ + (VAL)); \ + ASSERT(old_val__ == (OLD_VAL)); \ +} while (0) +#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ +do { \ + erts_aint32_t old_val__; \ + old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_io_changing, \ + (VAL)); \ + ASSERT(old_val__ == (OLD_VAL)); \ +} while (0) +#endif + #endif @@ -207,11 +230,29 @@ static struct { int online; int curr_online; int wait_curr_online; +#ifdef ERTS_DIRTY_SCHEDULERS + int dirty_cpu_online; + int dirty_cpu_curr_online; + int dirty_cpu_wait_curr_online; + int dirty_io_online; + int dirty_io_curr_online; + int dirty_io_wait_curr_online; +#endif erts_smp_atomic32_t changing; erts_smp_atomic32_t active; +#ifdef ERTS_DIRTY_SCHEDULERS + erts_smp_atomic32_t dirty_cpu_changing; + erts_smp_atomic32_t dirty_cpu_active; + erts_smp_atomic32_t dirty_io_changing; + erts_smp_atomic32_t dirty_io_active; +#endif struct { int ongoing; long wait_active; +#ifdef ERTS_DIRTY_SCHEDULERS + long dirty_cpu_wait_active; + long dirty_io_wait_active; +#endif ErtsProcList *procs; } msb; /* Multi Scheduling Block */ } schdlr_sspnd; @@ -1306,6 +1347,9 @@ static ERTS_INLINE void haw_thr_prgr_current_check_progress(ErtsAuxWorkData *awdp) { ErtsThrPrgrVal current = awdp->current_thr_prgr; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif if (current != ERTS_THR_PRGR_INVALID && !erts_thr_progress_equal(current, erts_thr_progress_current())) { /* @@ -1322,6 +1366,10 @@ handle_delayed_aux_work_wakeup(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, in { int jix, max_jix; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif + ASSERT(awdp->delayed_wakeup.next != ERTS_DELAYED_WAKEUP_INFINITY); if (!waiting && awdp->delayed_wakeup.next > awdp->esdp->reductions) @@ -1477,6 +1525,9 @@ handle_misc_aux_work_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) { +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif if (!erts_thr_progress_has_reached_this(haw_thr_prgr_current(awdp), awdp->misc.thr_prgr)) return aux_work & ~ERTS_SSI_AUX_WORK_MISC_THR_PRGR; @@ -1561,6 +1612,9 @@ handle_async_ready(ErtsAuxWorkData *awdp, int waiting) { ErtsSchedulerSleepInfo *ssi = awdp->ssi; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_ASYNC_READY); if (erts_check_async_ready(awdp->async_ready.queue)) { if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_ASYNC_READY) @@ -1585,6 +1639,9 @@ handle_async_ready_clean(ErtsAuxWorkData *awdp, { void *thr_prgr_p; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif #ifdef ERTS_SMP if (awdp->async_ready.need_thr_prgr && !erts_thr_progress_has_reached_this(haw_thr_prgr_current(awdp), @@ -1622,6 +1679,9 @@ handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waiting) ErtsSchedulerSleepInfo *ssi = awdp->ssi; erts_aint32_t res; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif unset_aux_work_flags(ssi, (ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC)); aux_work &= ~(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM @@ -1655,11 +1715,7 @@ erts_alloc_ensure_handle_delayed_dealloc_call(int ix) { #ifdef DEBUG ErtsSchedulerData *esdp = erts_get_scheduler_data(); -#ifdef ERTS_DIRTY_SCHEDULERS - if (esdp && ERTS_SCHEDULER_IS_DIRTY(esdp)) - return; -#endif - ASSERT(!esdp || ix == (int) esdp->no); + ASSERT(!esdp || (ERTS_SCHEDULER_IS_DIRTY(esdp) || ix == (int) esdp->no)); #endif set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1), ERTS_SSI_AUX_WORK_DD); @@ -1673,6 +1729,9 @@ handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int waitin ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; int more_work = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD); erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp, &need_thr_progress, @@ -1712,6 +1771,9 @@ handle_delayed_dealloc_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, i ErtsThrPrgrVal wakeup = ERTS_THR_PRGR_INVALID; ErtsThrPrgrVal current = haw_thr_prgr_current(awdp); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif if (!erts_thr_progress_has_reached_this(current, awdp->dd.thr_prgr)) return aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR; @@ -1759,6 +1821,9 @@ handle_thr_prgr_later_op(ErtsAuxWorkData *awdp, erts_aint32_t aux_work, int wait int lops; ErtsThrPrgrVal current = haw_thr_prgr_current(awdp); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!awdp->esdp || !ERTS_SCHEDULER_IS_DIRTY(awdp->esdp)); +#endif for (lops = 0; lops < ERTS_MAX_THR_PRGR_LATER_OPS; lops++) { ErtsThrPrgrLaterOp *lop = awdp->later_op.first; if (!erts_thr_progress_has_reached_this(current, lop->later)) @@ -1917,6 +1982,14 @@ erts_smp_notify_check_children_needed(void) for (i = 0; i < erts_no_schedulers; i++) set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(i), ERTS_SSI_AUX_WORK_CHECK_CHILDREN); +#ifdef ERTS_DIRTY_SCHEDULERS + for (i = 0; i < erts_no_dirty_cpu_schedulers; i++) + set_aux_work_flags_wakeup_nob(ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(i), + ERTS_SSI_AUX_WORK_CHECK_CHILDREN); + for (i = 0; i < erts_no_dirty_io_schedulers; i++) + set_aux_work_flags_wakeup_nob(ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(i), + ERTS_SSI_AUX_WORK_CHECK_CHILDREN); +#endif } static ERTS_INLINE erts_aint32_t @@ -2697,15 +2770,15 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) while (1) { - aux_work = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : - erts_atomic32_read_acqb(&ssi->aux_work); + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { - if (!thr_prgr_active) { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); - if (aux_work && erts_thr_progress_update(esdp)) + if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp) + && erts_thr_progress_update(esdp)) erts_thr_progress_leader_update(esdp); } @@ -2990,7 +3063,7 @@ wake_scheduler(ErtsRunQueue *rq) #ifdef ERTS_DIRTY_SCHEDULERS static void -wake_dirty_scheduler(ErtsRunQueue *rq) +wake_dirty_schedulers(ErtsRunQueue *rq, int one) { ErtsSchedulerSleepInfo *ssi; ErtsSchedulerSleepList *sl; @@ -3000,9 +3073,27 @@ wake_dirty_scheduler(ErtsRunQueue *rq) sl = &rq->sleepers; erts_smp_spin_lock(&sl->lock); ssi = sl->list; - if (!ssi) + if (!ssi) { erts_smp_spin_unlock(&sl->lock); - else { + if (one) + wake_scheduler(rq); + } else if (one) { + erts_aint32_t flgs; + if (ssi->prev) + ssi->prev->next = ssi->next; + else { + ASSERT(sl->list == ssi); + sl->list = ssi->next; + } + if (ssi->next) + ssi->next->prev = ssi->prev; + + erts_smp_spin_unlock(&sl->lock); + + ERTS_THR_MEMORY_BARRIER; + flgs = ssi_flags_set_wake(ssi); + erts_sched_finish_poke(ssi, flgs); + } else { sl->list = NULL; erts_smp_spin_unlock(&sl->lock); @@ -3154,7 +3245,7 @@ smp_notify_inc_runq(ErtsRunQueue *runq) if (runq) { #ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) - wake_dirty_scheduler(runq); + wake_dirty_schedulers(runq, 1); else #endif wake_scheduler(runq); @@ -3452,6 +3543,7 @@ suspend_run_queue(ErtsRunQueue *rq) } static void scheduler_ix_resume_wake(Uint ix); +static void scheduler_ssi_resume_wake(ErtsSchedulerSleepInfo *ssi); static ERTS_INLINE void resume_run_queue(ErtsRunQueue *rq) @@ -3478,7 +3570,10 @@ resume_run_queue(ErtsRunQueue *rq) erts_smp_runq_unlock(rq); - scheduler_ix_resume_wake(rq->ix); +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) +#endif + scheduler_ix_resume_wake(rq->ix); } typedef struct { @@ -3509,20 +3604,28 @@ evacuate_run_queue(ErtsRunQueue *rq, int prio_q; ErtsRunQueue *to_rq; ErtsMigrationPaths *mps; - ErtsMigrationPath *mp; + ErtsMigrationPath *mp = NULL; ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); - mps = erts_get_migration_paths_managed(); - mp = &mps->mpath[rq->ix]; +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) +#endif + { + mps = erts_get_migration_paths_managed(); + mp = &mps->mpath[rq->ix]; + } /* Evacuate scheduled misc ops */ if (rq->misc.start) { ErtsMiscOpList *start, *end; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); +#endif to_rq = mp->misc_evac_runq; if (!to_rq) return; @@ -3551,6 +3654,9 @@ evacuate_run_queue(ErtsRunQueue *rq, if (rq->ports.start) { Port *prt; +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)); +#endif to_rq = mp->prio[ERTS_PORT_PRIO_LEVEL].runq; if (!to_rq) return; @@ -3586,15 +3692,26 @@ evacuate_run_queue(ErtsRunQueue *rq, erts_aint32_t state; Process *proc; int notify = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + int requeue; +#endif to_rq = NULL; - if (!mp->prio[prio_q].runq) - return; - if (prio_q == PRIORITY_NORMAL && !mp->prio[PRIORITY_LOW].runq) - return; +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) +#endif + { + if (!mp->prio[prio_q].runq) + return; + if (prio_q == PRIORITY_NORMAL && !mp->prio[PRIORITY_LOW].runq) + return; + } proc = dequeue_process(rq, prio_q, &state); while (proc) { +#ifdef ERTS_DIRTY_SCHEDULERS + requeue = 1; +#endif if (ERTS_PSFLG_BOUND & state) { /* Bound processes get stuck here... */ proc->next = NULL; @@ -3603,12 +3720,43 @@ evacuate_run_queue(ErtsRunQueue *rq, else sbpp->first = proc; sbpp->last = proc; +#ifdef ERTS_DIRTY_SCHEDULERS + requeue = 0; +#endif + } +#ifdef ERTS_DIRTY_SCHEDULERS + else if (state & ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q) { + erts_aint32_t old; + old = erts_smp_atomic32_read_band_nob(&proc->state, + ~(ERTS_PSFLG_DIRTY_CPU_PROC + | ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q)); + /* assert that no other dirty flags are set */ + ASSERT(!(old & (ERTS_PSFLG_DIRTY_IO_PROC|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q))); + } else if (state & ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) { + erts_aint32_t old; + old = erts_smp_atomic32_read_band_nob(&proc->state, + ~(ERTS_PSFLG_DIRTY_IO_PROC + | ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)); + /* assert that no other dirty flags are set */ + ASSERT(!(old & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q))); } + if (requeue) { +#else else { +#endif int prio = (int) ERTS_PSFLGS_GET_PRQ_PRIO(state); erts_smp_runq_unlock(rq); - to_rq = mp->prio[prio].runq; +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) + /* + * dirty run queues evacuate only to run + * queue 0 during multi-scheduling blocking + */ + to_rq = ERTS_RUNQ_IX(0); + else +#endif + to_rq = mp->prio[prio].runq; RUNQ_SET_RQ(&proc->run_queue, to_rq); erts_smp_runq_lock(to_rq); @@ -4722,13 +4870,20 @@ wakeup_other_check(ErtsRunQueue *rq, Uint32 flags) rq->wakeup_other += (left_len*wo_reds + ERTS_WAKEUP_OTHER_FIXED_INC); if (rq->wakeup_other > wakeup_other.limit) { - int empty_rqs = - erts_smp_atomic32_read_acqb(&no_empty_run_queues); - if (flags & ERTS_RUNQ_FLG_PROTECTED) - (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); - if (empty_rqs != 0) - wake_scheduler_on_empty_runq(rq); - rq->wakeup_other = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && rq->waiting) + wake_dirty_schedulers(rq, 1); + else +#endif + { + int empty_rqs = + erts_smp_atomic32_read_acqb(&no_empty_run_queues); + if (flags & ERTS_RUNQ_FLG_PROTECTED) + (void) ERTS_RUNQ_FLGS_UNSET(rq, ERTS_RUNQ_FLG_PROTECTED); + if (empty_rqs != 0) + wake_scheduler_on_empty_runq(rq); + rq->wakeup_other = 0; + } } } rq->wakeup_other_reds = 0; @@ -5009,8 +5164,12 @@ erts_sched_set_wake_cleanup_threshold(char *str) static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp, char *dawwp) { - if (!esdp || ERTS_SCHEDULER_IS_DIRTY(esdp)) + if (!esdp) awdp->sched_id = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + else if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + awdp->sched_id = (int) ERTS_DIRTY_SCHEDULER_NO(esdp); +#endif else awdp->sched_id = (int) esdp->no; awdp->esdp = esdp; @@ -5076,11 +5235,11 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, #ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(runq->ix)) { esdp->no = 0; - esdp->dirty_no = (Uint) num; + ERTS_DIRTY_SCHEDULER_NO(esdp) = (Uint) num; } else { esdp->no = (Uint) num; - esdp->dirty_no = 0; + ERTS_DIRTY_SCHEDULER_NO(esdp) = 0; } #else esdp->no = (Uint) num; @@ -5111,7 +5270,12 @@ init_scheduler_data(ErtsSchedulerData* esdp, int num, } void -erts_init_scheduling(int no_schedulers, int no_schedulers_online) +erts_init_scheduling(int no_schedulers, int no_schedulers_online +#ifdef ERTS_DIRTY_SCHEDULERS + , int no_dirty_cpu_schedulers, int no_dirty_cpu_schedulers_online, + int no_dirty_io_schedulers +#endif + ) { int ix, n, no_ssi; char *daww_ptr; @@ -5133,6 +5297,12 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) ASSERT(no_schedulers_online <= no_schedulers); ASSERT(no_schedulers_online >= 1); ASSERT(no_schedulers >= 1); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(no_dirty_cpu_schedulers <= no_schedulers); + ASSERT(no_dirty_cpu_schedulers >= 1); + ASSERT(no_dirty_cpu_schedulers_online <= no_schedulers_online); + ASSERT(no_dirty_cpu_schedulers_online >= 1); +#endif /* Create and initialize run queues */ @@ -5169,10 +5339,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) #ifdef ERTS_DIRTY_SCHEDULERS #ifdef ERTS_SMP - if (ERTS_RUNQ_IX_IS_DIRTY(ix)) { + if (ERTS_RUNQ_IX_IS_DIRTY(ix)) erts_smp_spinlock_init(&rq->sleepers.lock, "dirty_run_queue_sleep_list"); - rq->sleepers.list = NULL; - } + rq->sleepers.list = NULL; #endif #endif @@ -5236,6 +5405,10 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) n = (int) no_schedulers; erts_no_schedulers = n; +#ifdef ERTS_DIRTY_SCHEDULERS + erts_no_dirty_cpu_schedulers = no_dirty_cpu_schedulers; + erts_no_dirty_io_schedulers = no_dirty_io_schedulers; +#endif /* Create and initialize scheduler sleep info */ #ifdef ERTS_SMP @@ -5267,21 +5440,21 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) aligned_dirty_cpu_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, - erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); - for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi; erts_smp_atomic32_init_nob(&ssi->flags, 0); - ssi->event = NULL; /* initialized in sched_thread_func */ + ssi->event = NULL; /* initialized in sched_dirty_cpu_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } aligned_dirty_io_sched_sleep_info = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_SLP_INFO, - erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerSleepInfo)); + for (ix = 0; ix < no_dirty_io_schedulers; ix++) { ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi; erts_smp_atomic32_init_nob(&ssi->flags, 0); - ssi->event = NULL; /* initialized in sched_thread_func */ + ssi->event = NULL; /* initialized in sched_dirty_io_thread_func */ erts_atomic32_init_nob(&ssi->aux_work, 0); } #endif @@ -5314,8 +5487,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) erts_aligned_dirty_cpu_scheduler_data = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_DATA, - erts_no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerData)); - for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + no_dirty_cpu_schedulers*sizeof(ErtsAlignedSchedulerData)); + for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); init_scheduler_data(esdp, ix+1, ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix), ERTS_DIRTY_CPU_RUNQ, NULL, 0); @@ -5323,8 +5496,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) erts_aligned_dirty_io_scheduler_data = erts_alloc_permanent_cache_aligned( ERTS_ALC_T_SCHDLR_DATA, - erts_no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerData)); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + no_dirty_io_schedulers*sizeof(ErtsAlignedSchedulerData)); + for (ix = 0; ix < no_dirty_io_schedulers; ix++) { ErtsSchedulerData *esdp = ERTS_DIRTY_IO_SCHEDULER_IX(ix); init_scheduler_data(esdp, ix+1, ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix), ERTS_DIRTY_IO_RUNQ, NULL, 0); @@ -5354,6 +5527,16 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) schdlr_sspnd.curr_online = no_schedulers; schdlr_sspnd.msb.ongoing = 0; erts_smp_atomic32_init_nob(&schdlr_sspnd.active, no_schedulers); +#ifdef ERTS_DIRTY_SCHEDULERS + erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_changing, 0); + schdlr_sspnd.dirty_cpu_online = no_dirty_cpu_schedulers_online; + schdlr_sspnd.dirty_cpu_curr_online = no_dirty_cpu_schedulers; + erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_active, no_dirty_cpu_schedulers); + erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_changing, 0); + schdlr_sspnd.dirty_io_online = no_dirty_io_schedulers; + schdlr_sspnd.dirty_io_curr_online = no_dirty_io_schedulers; + erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_active, no_dirty_io_schedulers); +#endif schdlr_sspnd.msb.procs = NULL; init_no_runqs(no_schedulers_online, no_schedulers_online); balance_info.last_active_runqs = no_schedulers; @@ -5379,6 +5562,21 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online) schdlr_sspnd.curr_online *= 2; /* Boot strapping... */ ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); +#ifdef ERTS_DIRTY_SCHEDULERS + schdlr_sspnd.dirty_cpu_wait_curr_online = no_dirty_cpu_schedulers_online; + schdlr_sspnd.dirty_cpu_curr_online *= 2; + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + for (ix = no_dirty_cpu_schedulers_online; ix < no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData* esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + erts_smp_atomic32_read_bor_nob(&esdp->ssi->flags, ERTS_SSI_FLG_SUSPENDED); + } + + schdlr_sspnd.dirty_io_wait_curr_online = no_dirty_io_schedulers; + schdlr_sspnd.dirty_io_curr_online *= 2; + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); +#endif erts_smp_atomic32_init_nob(&doing_sys_schedule, 0); @@ -5497,9 +5695,16 @@ free_proxy_proc(Process *proxy) erts_free(ERTS_ALC_T_PROC, proxy); } +#define ERTS_ENQUEUE_NOT 0 +#define ERTS_ENQUEUE_NORMAL_QUEUE 1 +#ifdef ERTS_DIRTY_SCHEDULERS +#define ERTS_ENQUEUE_DIRTY_CPU_QUEUE 2 +#define ERTS_ENQUEUE_DIRTY_IO_QUEUE 3 +#endif static ERTS_INLINE int -check_enqueue_in_prio_queue(erts_aint32_t *prq_prio_p, +check_enqueue_in_prio_queue(Process *c_p, + erts_aint32_t *prq_prio_p, erts_aint32_t *newp, erts_aint32_t actual) { @@ -5511,56 +5716,105 @@ check_enqueue_in_prio_queue(erts_aint32_t *prq_prio_p, *prq_prio_p = aprio; #ifdef ERTS_DIRTY_SCHEDULERS - if (!(actual & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC))) { + if (actual & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) { + /* + * If we have system tasks of a priority higher + * or equal to the user priority, we enqueue + * on ordinary run-queue and take care of + * those system tasks first. + */ + if (actual & ERTS_PSFLG_ACTIVE_SYS) { + erts_aint32_t uprio, stprio, qmask; + uprio = (actual >> ERTS_PSFLGS_USR_PRIO_OFFSET) & ERTS_PSFLGS_PRIO_MASK; + if (aprio < uprio) + goto enqueue_normal_runq; /* system tasks with higher prio */ + erts_smp_proc_lock(c_p, ERTS_PROC_LOCK_STATUS); + qmask = c_p->sys_task_qs->qmask; + erts_smp_proc_unlock(c_p, ERTS_PROC_LOCK_STATUS); + switch (qmask & -qmask) { + case MAX_BIT: + stprio = PRIORITY_MAX; + break; + case HIGH_BIT: + stprio = PRIORITY_HIGH; + break; + case NORMAL_BIT: + stprio = PRIORITY_NORMAL; + break; + case LOW_BIT: + stprio = PRIORITY_LOW; + break; + default: + stprio = PRIORITY_LOW+1; + break; + } + if (stprio <= uprio) + goto enqueue_normal_runq; /* system tasks with higher prio */ + } + + /* Enqueue in dirty run queue if not already enqueued */ + if (actual & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)) + return ERTS_ENQUEUE_NOT; /* already in queue */ + if (actual & ERTS_PSFLG_DIRTY_CPU_PROC) { + *newp |= ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q; + if (actual & ERTS_PSFLG_IN_RUNQ) + return -ERTS_ENQUEUE_DIRTY_CPU_QUEUE; /* use proxy */ + *newp |= ERTS_PSFLG_IN_RUNQ; + return ERTS_ENQUEUE_DIRTY_CPU_QUEUE; + } + *newp |= ERTS_PSFLG_DIRTY_IO_PROC_IN_Q; + if (actual & ERTS_PSFLG_IN_RUNQ) + return -ERTS_ENQUEUE_DIRTY_IO_QUEUE; /* use proxy */ + *newp |= ERTS_PSFLG_IN_RUNQ; + return ERTS_ENQUEUE_DIRTY_IO_QUEUE; + } + + enqueue_normal_runq: #endif - max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK; - max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; - max_qbit &= -max_qbit; - /* - * max_qbit now either contain bit set for highest prio queue or a bit - * out of range (which will have a value larger than valid range). - */ + max_qbit = (actual >> ERTS_PSFLGS_IN_PRQ_MASK_OFFSET) & ERTS_PSFLGS_QMASK; + max_qbit |= 1 << ERTS_PSFLGS_QMASK_BITS; + max_qbit &= -max_qbit; + /* + * max_qbit now either contain bit set for highest prio queue or a bit + * out of range (which will have a value larger than valid range). + */ - if (qbit >= max_qbit) - return 0; /* Already queued in higher or equal prio */ + if (qbit >= max_qbit) + return ERTS_ENQUEUE_NOT; /* Already queued in higher or equal prio */ - /* Need to enqueue (if already enqueued, it is in lower prio) */ - *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; + /* Need to enqueue (if already enqueued, it is in lower prio) */ + *newp |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET; - if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) - != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { - /* - * Process struct already enqueued, or actual prio not - * equal to user prio, i.e., enqueue using proxy. - */ - return -1; - } -#ifdef ERTS_DIRTY_SCHEDULERS - } else { - if (actual & ERTS_PSFLG_DIRTY_CPU_PROC) - *newp |= ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q; - else - *newp |= ERTS_PSFLG_DIRTY_IO_PROC_IN_Q; + if ((actual & (ERTS_PSFLG_IN_RUNQ|ERTS_PSFLGS_USR_PRIO_MASK)) + != (aprio << ERTS_PSFLGS_USR_PRIO_OFFSET)) { + /* + * Process struct already enqueued, or actual prio not + * equal to user prio, i.e., enqueue using proxy. + */ + return -ERTS_ENQUEUE_NORMAL_QUEUE; } -#endif /* * Enqueue using process struct. */ *newp &= ~ERTS_PSFLGS_PRQ_PRIO_MASK; *newp |= ERTS_PSFLG_IN_RUNQ | (aprio << ERTS_PSFLGS_PRQ_PRIO_OFFSET); - return 1; + return ERTS_ENQUEUE_NORMAL_QUEUE; } /* - * scheduler_out_process() return with c_rq locked. + * schedule_out_process() return with c_rq locked. */ static ERTS_INLINE int schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Process *proxy) { erts_aint32_t a, e, n, enq_prio = -1; - int res = 0; int enqueue; /* < 0 -> use proxy */ + Process* sched_p; + ErtsRunQueue* runq; +#ifdef ERTS_SMP + int check_emigration_need; +#endif a = state; @@ -5569,20 +5823,20 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces ASSERT(a & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS)); - enqueue = 0; + enqueue = ERTS_ENQUEUE_NOT; n &= ~(ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS); if (a & ERTS_PSFLG_ACTIVE_SYS || (a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE) { - enqueue = check_enqueue_in_prio_queue(&enq_prio, &n, a); + enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); } a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; } - if (!enqueue) { - + switch (enqueue) { + case ERTS_ENQUEUE_NOT: if (erts_system_profile_flags.runnable_procs) { if (!(a & ERTS_PSFLG_ACTIVE_SYS) @@ -5595,60 +5849,76 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces if (proxy) free_proxy_proc(proxy); - } - else { - Process *sched_p; - ErtsRunQueue *runq; - ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & ERTS_PSFLG_ACTIVE_SYS)); + erts_smp_runq_lock(c_rq); + return 0; #ifdef ERTS_DIRTY_SCHEDULERS #ifdef ERTS_SMP - if (ERTS_PSFLG_DIRTY_CPU_PROC & a) - runq = ERTS_DIRTY_CPU_RUNQ; - else if (ERTS_PSFLG_DIRTY_IO_PROC & a) - runq = ERTS_DIRTY_IO_RUNQ; - else + case ERTS_ENQUEUE_DIRTY_CPU_QUEUE: + case -ERTS_ENQUEUE_DIRTY_CPU_QUEUE: + runq = ERTS_DIRTY_CPU_RUNQ; + ASSERT(ERTS_SCHEDULER_IS_DIRTY_CPU(runq->scheduler)); +#ifdef ERTS_SMP + check_emigration_need = 0; +#endif + break; + + case ERTS_ENQUEUE_DIRTY_IO_QUEUE: + case -ERTS_ENQUEUE_DIRTY_IO_QUEUE: + runq = ERTS_DIRTY_IO_RUNQ; + ASSERT(ERTS_SCHEDULER_IS_DIRTY_IO(runq->scheduler)); +#ifdef ERTS_SMP + check_emigration_need = 0; +#endif + break; #endif #endif - runq = erts_get_runq_proc(p); - if (enqueue < 0) - sched_p = make_proxy_proc(proxy, p, enq_prio); - else { - sched_p = p; - if (proxy) - free_proxy_proc(proxy); - } + default: + ASSERT(enqueue == ERTS_ENQUEUE_NORMAL_QUEUE + || enqueue == -ERTS_ENQUEUE_NORMAL_QUEUE); + runq = erts_get_runq_proc(p); #ifdef ERTS_SMP - if (!(ERTS_PSFLG_BOUND & n) -#ifdef ERTS_DIRTY_SCHEDULERS - && !(n & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)) + check_emigration_need = !(ERTS_PSFLG_BOUND & n); #endif - ) { - ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio); - if (new_runq) { - RUNQ_SET_RQ(&sched_p->run_queue, new_runq); - runq = new_runq; - } + break; + } + + ASSERT(!(n & ERTS_PSFLG_SUSPENDED) || (n & ERTS_PSFLG_ACTIVE_SYS)); + + if (enqueue < 0) + sched_p = make_proxy_proc(proxy, p, enq_prio); + else { + sched_p = p; + if (proxy) + free_proxy_proc(proxy); + } + +#ifdef ERTS_SMP + if (check_emigration_need) { + ErtsRunQueue *new_runq = erts_check_emigration_need(runq, enq_prio); + if (new_runq) { + RUNQ_SET_RQ(&sched_p->run_queue, new_runq); + runq = new_runq; } + } #endif - ASSERT(runq); - res = 1; - erts_smp_runq_lock(runq); + ASSERT(runq); - /* Enqueue the process */ - enqueue_process(runq, (int) enq_prio, sched_p); + erts_smp_runq_lock(runq); - if (runq == c_rq) - return res; - erts_smp_runq_unlock(runq); - smp_notify_inc_runq(runq); - } + /* Enqueue the process */ + enqueue_process(runq, (int) enq_prio, sched_p); + + if (runq == c_rq) + return 1; + erts_smp_runq_unlock(runq); + smp_notify_inc_runq(runq); erts_smp_runq_lock(c_rq); - return res; + return 1; } static ERTS_INLINE void @@ -5704,7 +5974,7 @@ change_proc_schedule_state(Process *p, erts_aint32_t e; n = e = a; - enqueue = 0; + enqueue = ERTS_ENQUEUE_NOT; if (a & ERTS_PSFLG_FREE) break; /* We don't want to schedule free processes... */ @@ -5725,13 +5995,13 @@ change_proc_schedule_state(Process *p, * process may be in a run queue via proxy, need * further inspection... */ - enqueue = check_enqueue_in_prio_queue(enq_prio_p, &n, a); + enqueue = check_enqueue_in_prio_queue(p, enq_prio_p, &n, a); } a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; - if (enqueue == 0 && n == a) + if (enqueue == ERTS_ENQUEUE_NOT && n == a) break; } @@ -5765,7 +6035,7 @@ schedule_process(Process *p, erts_aint32_t in_state) ERTS_PSFLG_ACTIVE, &state, &enq_prio); - if (enqueue) + if (enqueue != ERTS_ENQUEUE_NOT) add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio), state, enq_prio); @@ -5792,14 +6062,14 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy) if (a & ERTS_PSFLG_FREE) return; /* We don't want to schedule free processes... */ - enqueue = 0; + enqueue = ERTS_ENQUEUE_NOT; n |= ERTS_PSFLG_ACTIVE_SYS; if (!(a & (ERTS_PSFLG_RUNNING|ERTS_PSFLG_RUNNING_SYS))) - enqueue = check_enqueue_in_prio_queue(&enq_prio, &n, a); + enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a); a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e); if (a == e) break; - if (a == n && !enqueue) + if (a == n && enqueue == ERTS_ENQUEUE_NOT) goto cleanup; } @@ -5815,7 +6085,7 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy) } - if (enqueue) { + if (enqueue != ERTS_ENQUEUE_NOT) { Process *sched_p; if (enqueue > 0) sched_p = p; @@ -5938,6 +6208,12 @@ static void scheduler_ix_resume_wake(Uint ix) { ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); +} + +static void +scheduler_ssi_resume_wake(ErtsSchedulerSleepInfo *ssi) +{ erts_aint32_t xflgs = (ERTS_SSI_FLG_SLEEPING | ERTS_SSI_FLG_TSE_SLEEPING | ERTS_SSI_FLG_WAITING @@ -6028,12 +6304,20 @@ sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) } } +#ifdef ERTS_DIRTY_SCHEDULERS + static void suspend_scheduler(ErtsSchedulerData *esdp) { erts_aint32_t flgs; erts_aint32_t changing; +#ifdef ERTS_DIRTY_SCHEDULERS + long no = (long) (ERTS_SCHEDULER_IS_DIRTY(esdp) + ? ERTS_DIRTY_SCHEDULER_NO(esdp) + : esdp->no); +#else long no = (long) esdp->no; +#endif ErtsSchedulerSleepInfo *ssi = esdp->ssi; long active_schedulers; int curr_online = 1; @@ -6041,21 +6325,305 @@ suspend_scheduler(ErtsSchedulerData *esdp) erts_aint32_t aux_work; int thr_prgr_active = 1; ErtsStuckBoundProcesses sbp = {NULL, NULL}; + int* ss_onlinep; + int* ss_curr_onlinep; + int* ss_wait_curr_onlinep; + long* ss_wait_activep; + long ss_wait_active_target; + erts_smp_atomic32_t* ss_changingp; + erts_smp_atomic32_t* ss_activep; /* * Schedulers may be suspended in two different ways: * - A scheduler may be suspended since it is not online. * All schedulers with scheduler ids greater than - * schdlr_sspnd.online are suspended. + * schdlr_sspnd.online are suspended; same for dirty + * schedulers and schdlr_sspnd.dirty_cpu_online and + * schdlr_sspnd.dirty_io_online. * - Multi scheduling is blocked. All schedulers except the - * scheduler with scheduler id 1 are suspended. + * scheduler with scheduler id 1 are suspended, and all + * dirty CPU and dirty I/O schedulers are suspended. * * Regardless of why a scheduler is suspended, it ends up here. */ + ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) || no != 1); + #ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(!ERTS_SCHEDULER_IS_DIRTY(esdp)); + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (erts_smp_mtx_trylock(&schdlr_sspnd.mtx) == EBUSY) { + erts_smp_runq_unlock(esdp->run_queue); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + erts_smp_runq_lock(esdp->run_queue); + } + if (ongoing_multi_scheduling_block()) + evacuate_run_queue(esdp->run_queue, &sbp); + } else +#endif + evacuate_run_queue(esdp->run_queue, &sbp); + + erts_smp_runq_unlock(esdp->run_queue); + +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) #endif + { + erts_sched_check_cpu_bind_prep_suspend(esdp); + + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_inactive); + + sched_wall_time_change(esdp, 0); + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } + + flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); + if (flgs & ERTS_SSI_FLG_SUSPENDED) { + +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(esdp->run_queue)) { + active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_cpu_active); + ASSERT(active_schedulers >= 0); + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); + ss_onlinep = &schdlr_sspnd.dirty_cpu_online; + ss_curr_onlinep = &schdlr_sspnd.dirty_cpu_curr_online; + ss_wait_curr_onlinep = &schdlr_sspnd.dirty_cpu_wait_curr_online; + ss_changingp = &schdlr_sspnd.dirty_cpu_changing; + ss_wait_activep = &schdlr_sspnd.msb.dirty_cpu_wait_active; + ss_activep = &schdlr_sspnd.dirty_cpu_active; + } else { + active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_io_active); + ASSERT(active_schedulers >= 0); + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing); + ss_onlinep = &schdlr_sspnd.dirty_io_online; + ss_curr_onlinep = &schdlr_sspnd.dirty_io_curr_online; + ss_wait_curr_onlinep = &schdlr_sspnd.dirty_io_wait_curr_online; + ss_changingp = &schdlr_sspnd.dirty_io_changing; + ss_wait_activep = &schdlr_sspnd.msb.dirty_io_wait_active; + ss_activep = &schdlr_sspnd.dirty_io_active; + } + ss_wait_active_target = 0; + } + else +#endif + { + active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.active); + ASSERT(active_schedulers >= 1); + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); + ss_onlinep = &schdlr_sspnd.online; + ss_curr_onlinep = &schdlr_sspnd.curr_online; + ss_wait_curr_onlinep = &schdlr_sspnd.wait_curr_online; + ss_changingp = &schdlr_sspnd.changing; + ss_wait_activep = &schdlr_sspnd.msb.wait_active; + ss_activep = &schdlr_sspnd.active; + ss_wait_active_target = 1; + } + if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { + if (active_schedulers == *ss_wait_activep) + wake = 1; + if (active_schedulers == ss_wait_active_target) { + changing = erts_smp_atomic32_read_band_nob(ss_changingp, + ~ERTS_SCHDLR_SSPND_CHNG_MSB); + changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; + } + } + + while (1) { + if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + int changed = 0; + if (no > *ss_onlinep && curr_online) { + (*ss_curr_onlinep)--; + curr_online = 0; + changed = 1; + } + else if (no <= *ss_onlinep && !curr_online) { + (*ss_curr_onlinep)++; + curr_online = 1; + changed = 1; + } + if (changed + && *ss_curr_onlinep == *ss_wait_curr_onlinep) + wake = 1; + if (*ss_onlinep == *ss_curr_onlinep) { + changing = erts_smp_atomic32_read_band_nob(ss_changingp, + ~ERTS_SCHDLR_SSPND_CHNG_ONLN); + changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; + } + } + + if (wake) { + erts_smp_cnd_signal(&schdlr_sspnd.cnd); + wake = 0; + } + + if (curr_online && !ongoing_multi_scheduling_block()) { + flgs = erts_smp_atomic32_read_acqb(&ssi->flags); + if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) + break; + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + + while (1) { + erts_aint32_t qmask; + erts_aint32_t flgs; + + qmask = (ERTS_RUNQ_FLGS_GET(esdp->run_queue) + & ERTS_RUNQ_FLGS_QMASK); + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + if (aux_work|qmask) { + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + if (aux_work) + aux_work = handle_aux_work(&esdp->aux_work_data, + aux_work, + 1); + + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && + (aux_work && erts_thr_progress_update(esdp))) + erts_thr_progress_leader_update(esdp); + if (qmask) { +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + erts_smp_runq_lock(esdp->run_queue); + if (ongoing_multi_scheduling_block()) + evacuate_run_queue(esdp->run_queue, &sbp); + erts_smp_runq_unlock(esdp->run_queue); + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + } else +#endif + { + erts_smp_runq_lock(esdp->run_queue); + evacuate_run_queue(esdp->run_queue, &sbp); + erts_smp_runq_unlock(esdp->run_queue); + } + } + } + + if (!aux_work) { +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#endif + { + if (thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + erts_thr_progress_prepare_wait(esdp); + } + flgs = sched_spin_suspended(ssi, + ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + flgs = sched_set_suspended_sleeptype(ssi); + if (flgs == (ERTS_SSI_FLG_SLEEPING + | ERTS_SSI_FLG_TSE_SLEEPING + | ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)) { + int res; + + do { + res = erts_tse_wait(ssi->event); + } while (res == EINTR); + } + } +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#endif + erts_thr_progress_finalize_wait(esdp); + } + + flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING + | ERTS_SSI_FLG_SUSPENDED)); + if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) + break; + changing = erts_smp_atomic32_read_nob(ss_changingp); + if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) + break; + } + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + changing = erts_smp_atomic32_read_nob(ss_changingp); + } + + active_schedulers = erts_smp_atomic32_inc_read_nob(ss_activep); + changing = erts_smp_atomic32_read_nob(ss_changingp); + if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) + && *ss_onlinep == active_schedulers) { + erts_smp_atomic32_read_band_nob(ss_changingp, + ~ERTS_SCHDLR_SSPND_CHNG_MSB); + } + +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#endif + ASSERT(no <= *ss_onlinep); + ASSERT(!ongoing_multi_scheduling_block()); + + } + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + + ASSERT(curr_online); + +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#endif + { + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_active); + + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } + } + + erts_smp_runq_lock(esdp->run_queue); + non_empty_runq(esdp->run_queue); + +#ifdef ERTS_DIRTY_SCHEDULERS + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) +#endif + { + schedule_bound_processes(esdp->run_queue, &sbp); + + erts_sched_check_cpu_bind_post_suspend(esdp); + } +} + +#else /* !ERTS_DIRTY_SCHEDULERS */ + +static void +suspend_scheduler(ErtsSchedulerData *esdp) +{ + erts_aint32_t flgs; + erts_aint32_t changing; + long no = (long) esdp->no; + ErtsSchedulerSleepInfo *ssi = esdp->ssi; + long active_schedulers; + int curr_online = 1; + int wake = 0; + erts_aint32_t aux_work; + int thr_prgr_active = 1; + ErtsStuckBoundProcesses sbp = {NULL, NULL}; + + /* + * Schedulers may be suspended in two different ways: + * - A scheduler may be suspended since it is not online. + * All schedulers with scheduler ids greater than + * schdlr_sspnd.online are suspended. + * - Multi scheduling is blocked. All schedulers except the + * scheduler with scheduler id 1 are suspended. + * + * Regardless of why a scheduler is suspended, it ends up here. + */ + ASSERT(no != 1); evacuate_run_queue(esdp->run_queue, &sbp); @@ -6219,6 +6787,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) erts_sched_check_cpu_bind_post_suspend(esdp); } +#endif + ErtsSchedSuspendResult erts_schedulers_state(Uint *total, Uint *online, @@ -6230,40 +6800,315 @@ erts_schedulers_state(Uint *total, { int res = ERTS_SCHDLR_SSPND_EINVAL; erts_aint32_t changing; - if (total) { - ASSERT(online); - ASSERT(active); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - else { - *active = *online = schdlr_sspnd.online; - if (ongoing_multi_scheduling_block()) - *active = 1; - res = ERTS_SCHDLR_SSPND_DONE; - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - *total = erts_no_schedulers; + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); +#ifdef ERTS_DIRTY_SCHEDULERS + changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) + | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); +#endif + if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + else { + if (active) + *active = schdlr_sspnd.online; + if (online) + *online = schdlr_sspnd.online; + if (ongoing_multi_scheduling_block() && active) + *active = 1; +#ifdef ERTS_DIRTY_SCHEDULERS + if (dirty_cpu_online) + *dirty_cpu_online = schdlr_sspnd.dirty_cpu_online; +#endif + res = ERTS_SCHDLR_SSPND_DONE; } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (total) + *total = erts_no_schedulers; #ifdef ERTS_DIRTY_SCHEDULERS if (dirty_cpu) *dirty_cpu = erts_no_dirty_cpu_schedulers; - if (dirty_cpu_online) - *dirty_cpu_online = erts_no_dirty_cpu_schedulers_online; if (dirty_io) *dirty_io = erts_no_dirty_io_schedulers; -#else - if (dirty_cpu) - *dirty_cpu = 0; - if (dirty_cpu_online) - *dirty_cpu_online = 0; - if (dirty_io) - *dirty_io = 0; #endif return res; } +#ifdef ERTS_DIRTY_SCHEDULERS + +ErtsSchedSuspendResult +erts_set_schedulers_online(Process *p, + ErtsProcLocks plocks, + Sint new_no, + Sint *old_no +#ifdef ERTS_DIRTY_SCHEDULERS + , int dirty_only +#endif + ) +{ + ErtsSchedulerData *esdp; + int ix, res = -1, no, have_unlocked_plocks, end_wait; + erts_aint32_t changing = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + ErtsSchedulerSleepInfo* ssi; + int dirty_no, change_dirty; +#endif + + if (new_no < 1) + return ERTS_SCHDLR_SSPND_EINVAL; +#ifdef ERTS_DIRTY_SCHEDULERS + else if (dirty_only && erts_no_dirty_cpu_schedulers < new_no) + return ERTS_SCHDLR_SSPND_EINVAL; +#endif + else if (erts_no_schedulers < new_no) + return ERTS_SCHDLR_SSPND_EINVAL; + + esdp = ERTS_PROC_GET_SCHDATA(p); + end_wait = 0; + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + + have_unlocked_plocks = 0; + no = (int) new_no; + +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(schdlr_sspnd.dirty_cpu_online <= erts_no_dirty_cpu_schedulers); + if (dirty_only) { + if (no > schdlr_sspnd.online) { + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + return ERTS_SCHDLR_SSPND_EINVAL; + } + dirty_no = no; + } else { + /* + * Adjust the number of dirty CPU schedulers online relative to the + * adjustment made to the number of normal schedulers online. + */ + int total_pct = erts_no_dirty_cpu_schedulers*100/erts_no_schedulers; + int onln_pct = no*total_pct/schdlr_sspnd.online; + dirty_no = schdlr_sspnd.dirty_cpu_online*onln_pct/100; + if (dirty_no == 0) + dirty_no = 1; + ASSERT(dirty_no <= erts_no_dirty_cpu_schedulers); + } +#endif + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); +#ifdef ERTS_DIRTY_SCHEDULERS + changing |= erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); +#endif + if (changing) { + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + } + else { + int online = *old_no = schdlr_sspnd.online; +#ifdef ERTS_DIRTY_SCHEDULERS + int dirty_online = schdlr_sspnd.dirty_cpu_online; + + if (dirty_only) { + *old_no = schdlr_sspnd.dirty_cpu_online; + if (dirty_no == schdlr_sspnd.dirty_cpu_online) { + res = ERTS_SCHDLR_SSPND_DONE; + } + change_dirty = 1; + } else { +#endif + if (no == schdlr_sspnd.online) { +#ifdef ERTS_DIRTY_SCHEDULERS + dirty_only = 1; + if (dirty_no == schdlr_sspnd.dirty_cpu_online) +#endif + res = ERTS_SCHDLR_SSPND_DONE; +#ifdef ERTS_DIRTY_SCHEDULERS + else + change_dirty = 1; +#endif + } +#ifdef ERTS_DIRTY_SCHEDULERS + else + change_dirty = (dirty_no != schdlr_sspnd.dirty_cpu_online); + } +#endif + if (res == -1) + { + int increase = (no > online); +#ifdef ERTS_DIRTY_SCHEDULERS + if (!dirty_only) { +#endif + ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + schdlr_sspnd.online = no; +#ifdef ERTS_DIRTY_SCHEDULERS + } else + increase = (dirty_no > dirty_online); + if (change_dirty) { + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + schdlr_sspnd.dirty_cpu_online = dirty_no; + } +#endif + if (increase) { + int ix; +#ifdef ERTS_DIRTY_SCHEDULERS + if (!dirty_only) { +#endif + schdlr_sspnd.wait_curr_online = no; + if (ongoing_multi_scheduling_block()) { + for (ix = online; ix < no; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + change_no_used_runqs(no); + + for (ix = online; ix < no; ix++) + resume_run_queue(ERTS_RUNQ_IX(ix)); + + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); + } +#ifdef ERTS_DIRTY_SCHEDULERS + } + if (change_dirty) { + schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; + ASSERT(schdlr_sspnd.dirty_cpu_curr_online != + schdlr_sspnd.dirty_cpu_wait_curr_online); + if (ongoing_multi_scheduling_block()) { + for (ix = dirty_online; ix < dirty_no; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_sched_poke(ssi); + } + } else { + for (ix = dirty_online; ix < dirty_no; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); + erts_smp_atomic32_read_band_nob(&ssi->flags, + ~ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); + } + } +#endif + res = ERTS_SCHDLR_SSPND_DONE; + } + else /* if (no < online) */ { +#ifdef ERTS_DIRTY_SCHEDULERS + if (change_dirty) { + schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; + ASSERT(schdlr_sspnd.dirty_cpu_curr_online != + schdlr_sspnd.dirty_cpu_wait_curr_online); + if (ongoing_multi_scheduling_block()) { + for (ix = dirty_no; ix < dirty_online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_sched_poke(ssi); + } + } else { + for (ix = dirty_no; ix < dirty_online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); + } + } + if (dirty_only) { + res = ERTS_SCHDLR_SSPND_DONE; + } + else +#endif + { + if (p->scheduler_data->no <= no) { + res = ERTS_SCHDLR_SSPND_DONE; + schdlr_sspnd.wait_curr_online = no; + } + else { + /* + * Yield! Current process needs to migrate + * before bif returns. + */ + res = ERTS_SCHDLR_SSPND_YIELD_DONE; + schdlr_sspnd.wait_curr_online = no+1; + } + + if (ongoing_multi_scheduling_block()) { + for (ix = no; ix < online; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + + change_no_used_runqs(no); + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); + + for (ix = no; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + wake_scheduler(rq); + } + } + } + } + +#ifdef ERTS_DIRTY_SCHEDULERS + if (change_dirty) { + while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, + ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + } + if (!dirty_only) +#endif + { + if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (plocks && !have_unlocked_plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); + } + erts_thr_progress_active(esdp, 0); + erts_thr_progress_prepare_wait(esdp); + end_wait = 1; + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + } + + while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + + ASSERT(res != ERTS_SCHDLR_SSPND_DONE + ? (ERTS_SCHDLR_SSPND_CHNG_WAITER + & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) + : (ERTS_SCHDLR_SSPND_CHNG_WAITER + == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); + erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + } + } + } + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(schdlr_sspnd.dirty_cpu_online <= schdlr_sspnd.online); + if (!dirty_only) +#endif + { + if (end_wait) { + erts_thr_progress_finalize_wait(esdp); + erts_thr_progress_active(esdp, 1); + } + if (have_unlocked_plocks) + erts_smp_proc_lock(p, plocks); + } + + return res; +} + +#else /* !ERTS_DIRTY_SCHEDULERS */ + ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, @@ -6352,10 +7197,6 @@ erts_set_schedulers_online(Process *p, ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); wake_scheduler(rq); } -#ifdef ERTS_DIRTY_SCHEDULERS - wake_dirty_scheduler(ERTS_DIRTY_CPU_RUNQ); - wake_dirty_scheduler(ERTS_DIRTY_IO_RUNQ); -#endif } } @@ -6396,15 +7237,24 @@ erts_set_schedulers_online(Process *p, return res; } +#endif + ErtsSchedSuspendResult erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) { - int ix, res, have_unlocked_plocks = 0; + int ix, res, have_unlocked_plocks = 0, online; erts_aint32_t changing; ErtsProcList *plp; +#ifdef ERTS_DIRTY_SCHEDULERS + ErtsSchedulerSleepInfo* ssi; +#endif erts_smp_mtx_lock(&schdlr_sspnd.mtx); changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); +#ifdef ERTS_DIRTY_SCHEDULERS + changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) + | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); +#endif if (changing) { res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */ } @@ -6414,10 +7264,13 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) erts_proclist_store_last(&schdlr_sspnd.msb.procs, plp); p->flags |= F_HAVE_BLCKD_MSCHED; ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 0); + ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) == 0); +#endif ASSERT(p->scheduler_data->no == 1); res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - } - else { + } else { int online = schdlr_sspnd.online; p->flags |= F_HAVE_BLCKD_MSCHED; if (plocks) { @@ -6429,6 +7282,35 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) if (online == 1) { res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 1); + ASSERT(!(erts_smp_atomic32_read_nob(&ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0)->flags) + & ERTS_SSI_FLG_SUSPENDED)); + schdlr_sspnd.msb.dirty_cpu_wait_active = 0; + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0); + erts_smp_atomic32_read_bor_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); + while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) + != schdlr_sspnd.msb.dirty_cpu_wait_active) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + + schdlr_sspnd.msb.dirty_io_wait_active = 0; + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); + while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) + != schdlr_sspnd.msb.dirty_io_wait_active) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); +#endif ASSERT(p->scheduler_data->no == 1); } else { @@ -6447,6 +7329,37 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) schdlr_sspnd.msb.wait_active = 2; } +#ifdef ERTS_DIRTY_SCHEDULERS + schdlr_sspnd.msb.dirty_cpu_wait_active = 0; + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); + while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) + != schdlr_sspnd.msb.dirty_cpu_wait_active) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + ASSERT(schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_online); + + schdlr_sspnd.msb.dirty_io_wait_active = 0; + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB + | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); + while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) + != schdlr_sspnd.msb.dirty_io_wait_active) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + ASSERT(schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_online); +#endif change_no_used_runqs(1); for (ix = 1; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); @@ -6487,6 +7400,7 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) erts_smp_mtx_lock(&schdlr_sspnd.mtx); } + ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED ? (ERTS_SCHDLR_SSPND_CHNG_WAITER & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) @@ -6527,12 +7441,12 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) p->flags &= ~F_HAVE_BLCKD_MSCHED; schdlr_sspnd.msb.ongoing = 0; if (schdlr_sspnd.online == 1) { - /* No schedulers to resume */ + /* No normal schedulers to resume */ ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB); } else { - int online = schdlr_sspnd.online; + online = schdlr_sspnd.online; if (plocks) { have_unlocked_plocks = 1; erts_smp_proc_unlock(p, plocks); @@ -6547,6 +7461,27 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) for (ix = online; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); } +#ifdef ERTS_DIRTY_SCHEDULERS + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); + schdlr_sspnd.msb.dirty_cpu_wait_active = schdlr_sspnd.dirty_cpu_online; + for (ix = 0; ix < schdlr_sspnd.dirty_cpu_online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); + erts_smp_atomic32_read_band_nob(&ssi->flags, + ~ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); + + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); + schdlr_sspnd.msb.dirty_io_wait_active = erts_no_dirty_io_schedulers; + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); + erts_smp_atomic32_read_band_nob(&ssi->flags, + ~ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); +#endif res = ERTS_SCHDLR_SSPND_DONE; } } @@ -6673,7 +7608,11 @@ sched_thread_func(void *vesdp) erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, ~ERTS_SCHDLR_SSPND_CHNG_ONLN); if (no != 1) +#ifdef ERTS_DIRTY_SCHEDULERS + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); +#else erts_smp_cnd_signal(&schdlr_sspnd.cnd); +#endif } if (no == 1) { @@ -6710,7 +7649,9 @@ sched_dirty_cpu_thread_func(void *vesdp) { ErtsThrPrgrCallbacks callbacks; ErtsSchedulerData *esdp = vesdp; - Uint no = esdp->dirty_no; + Uint no = ERTS_DIRTY_SCHEDULER_NO(esdp); + ERTS_DIRTY_SCHEDULER_TYPE(esdp) = ERTS_DIRTY_CPU_SCHEDULER; + ASSERT(no != 0); ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); callbacks.arg = (void *) esdp->ssi; callbacks.wakeup = thr_prgr_wakeup; @@ -6738,6 +7679,24 @@ sched_dirty_cpu_thread_func(void *vesdp) #endif erts_thread_init_float(); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) + & ERTS_SCHDLR_SSPND_CHNG_ONLN); + + if (--schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_wait_curr_online) { + erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, + ~ERTS_SCHDLR_SSPND_CHNG_ONLN); + if (no != 1) + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + } + + if (no == 1) { + while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + process_main(); /* No schedulers should *ever* terminate */ erl_exit(ERTS_ABORT_EXIT, @@ -6751,7 +7710,9 @@ sched_dirty_io_thread_func(void *vesdp) { ErtsThrPrgrCallbacks callbacks; ErtsSchedulerData *esdp = vesdp; - Uint no = esdp->dirty_no; + Uint no = ERTS_DIRTY_SCHEDULER_NO(esdp); + ERTS_DIRTY_SCHEDULER_TYPE(esdp) = ERTS_DIRTY_IO_SCHEDULER; + ASSERT(no != 0); ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(no-1)->event = erts_tse_fetch(); callbacks.arg = (void *) esdp->ssi; callbacks.wakeup = thr_prgr_wakeup; @@ -6779,6 +7740,24 @@ sched_dirty_io_thread_func(void *vesdp) #endif erts_thread_init_float(); + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing) + & ERTS_SCHDLR_SSPND_CHNG_ONLN); + + if (--schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_wait_curr_online) { + erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_io_changing, + ~ERTS_SCHDLR_SSPND_CHNG_ONLN); + if (no != 1) + erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); + } + + if (no == 1) { + while (schdlr_sspnd.dirty_io_curr_online != schdlr_sspnd.dirty_io_wait_curr_online) + erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); + ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); + } + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + process_main(); /* No schedulers should *ever* terminate */ erl_exit(ERTS_ABORT_EXIT, @@ -7995,7 +8974,6 @@ Process *schedule(Process *p, int calls) || flags & ERTS_RUNQ_FLG_NONEMPTY); if (flags & (ERTS_RUNQ_FLG_CHK_CPU_BIND|ERTS_RUNQ_FLG_SUSPENDED)) { - if (flags & ERTS_RUNQ_FLG_SUSPENDED) { suspend_scheduler(esdp); flags = ERTS_RUNQ_FLGS_GET_NOB(rq); @@ -8006,10 +8984,17 @@ Process *schedule(Process *p, int calls) erts_sched_check_cpu_bind(esdp); } } +#ifdef ERTS_DIRTY_SCHEDULERS + else if (ERTS_SCHEDULER_IS_DIRTY(esdp) + && (erts_smp_atomic32_read_acqb(&esdp->ssi->flags) + & ERTS_SSI_FLG_SUSPENDED)) + suspend_scheduler(esdp); +#endif - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + { erts_aint32_t aux_work; - int leader_update = erts_thr_progress_update(esdp); + int leader_update = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 + : erts_thr_progress_update(esdp); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); if (aux_work | leader_update) { erts_smp_runq_unlock(rq); @@ -8020,7 +9005,8 @@ Process *schedule(Process *p, int calls) erts_smp_runq_lock(rq); } - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + ERTS_SMP_LC_ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) + || !erts_thr_progress_is_blocking()); } ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); @@ -8035,6 +9021,17 @@ Process *schedule(Process *p, int calls) flags = ERTS_RUNQ_FLGS_GET_NOB(rq); +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix) && rq->halt_in_progress) { + /* + * TODO: if halt in progress, need to put the dirty scheduler + * to sleep somewhere around here to prevent it from picking up + * new work + */ + } + else +#endif + if ((!(flags & ERTS_RUNQ_FLGS_QMASK) && !rq->misc.start) || (rq->halt_in_progress && ERTS_EMPTY_RUNQ_PORTS(rq))) { /* Prepare for scheduler wait */ @@ -8182,11 +9179,15 @@ Process *schedule(Process *p, int calls) + ERTS_PSFLGS_IN_PRQ_MASK_OFFSET)); #ifdef ERTS_DIRTY_SCHEDULERS - /* if a non-dirty scheduler picks up a process marked as already being - in a dirty run queue, just drop it and go get another process */ - if (state & (ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) && - !ERTS_SCHEDULER_IS_DIRTY(esdp)) - goto pick_next_process; + ASSERT((state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) != + (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)); + if (state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) { + ASSERT((ERTS_SCHEDULER_IS_DIRTY_CPU(esdp) && (state & ERTS_PSFLG_DIRTY_CPU_PROC)) || + (ERTS_SCHEDULER_IS_DIRTY_IO(esdp) && (state & ERTS_PSFLG_DIRTY_IO_PROC))); + if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && !(state & ERTS_PSFLG_ACTIVE_SYS)) + goto pick_next_process; + state &= ~(ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q); + } #endif if (!(state & ERTS_PSFLG_PROXY)) @@ -8322,7 +9323,11 @@ Process *schedule(Process *p, int calls) if (state & ERTS_PSFLG_RUNNING_SYS) { reds -= execute_sys_tasks(p, &state, reds); - if (reds <= 0) { + if (reds <= 0 +#ifdef ERTS_DIRTY_SCHEDULERS + || (state & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC)) +#endif + ) { p->fcalls = reds; goto sched_out_proc; } @@ -10087,7 +11092,7 @@ save_pending_exiter(Process *p) erts_smp_runq_unlock(rq); #ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) - wake_dirty_scheduler(rq); + wake_dirty_schedulers(rq, 0); else #endif wake_scheduler(rq); @@ -11148,6 +12153,10 @@ void erl_halt(int code) if (-1 == erts_smp_atomic32_cmpxchg_acqb(&erts_halt_progress, erts_no_schedulers, -1)) { +#ifdef ERTS_DIRTY_SCHEDULERS + ERTS_DIRTY_CPU_RUNQ->halt_in_progress = 1; + ERTS_DIRTY_IO_RUNQ->halt_in_progress = 1; +#endif erts_halt_code = code; notify_reap_ports_relb(); } diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index dcb9251d0d..f6b185c0ad 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -109,7 +109,6 @@ extern int erts_sched_balance_util; extern Uint erts_no_schedulers; #ifdef ERTS_DIRTY_SCHEDULERS extern Uint erts_no_dirty_cpu_schedulers; -extern Uint erts_no_dirty_cpu_schedulers_online; extern Uint erts_no_dirty_io_schedulers; #endif extern Uint erts_no_run_queues; @@ -544,6 +543,21 @@ typedef struct { #endif } ErtsAuxWorkData; +#ifdef ERTS_DIRTY_SCHEDULERS +typedef enum { + ERTS_DIRTY_CPU_SCHEDULER, + ERTS_DIRTY_IO_SCHEDULER +} ErtsDirtySchedulerType; + +typedef union { + struct { + ErtsDirtySchedulerType type: 1; + unsigned num: 31; + } s; + Uint no; +} ErtsDirtySchedId; +#endif + struct ErtsSchedulerData_ { /* * Keep X registers first (so we get as many low @@ -570,7 +584,7 @@ struct ErtsSchedulerData_ { Process *current_process; Uint no; /* Scheduler number for normal schedulers */ #ifdef ERTS_DIRTY_SCHEDULERS - Uint dirty_no; /* Scheduler number for dirty schedulers */ + ErtsDirtySchedId dirty_no; /* Scheduler number for dirty schedulers */ #endif Port *current_port; ErtsRunQueue *run_queue; @@ -1292,6 +1306,8 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags; &erts_aligned_run_queues[(IX)].runq) #define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[-1].runq) #define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[-2].runq) +#define ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(RQ) ((RQ)->ix == -1) +#define ERTS_RUNQ_IS_DIRTY_IO_RUNQ(RQ) ((RQ)->ix == -2) #else #define ERTS_RUNQ_IX_IS_DIRTY(IX) 0 #endif @@ -1305,21 +1321,37 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags; #define ERTS_DIRTY_IO_SCHEDULER_IX(IX) \ (ASSERT(0 <= (IX) && (IX) < erts_no_dirty_io_schedulers), \ &erts_aligned_dirty_io_scheduler_data[(IX)].esd) +#define ERTS_DIRTY_SCHEDULER_NO(ESDP) \ + ((ESDP)->dirty_no.s.num) +#define ERTS_DIRTY_SCHEDULER_TYPE(ESDP) \ + ((ESDP)->dirty_no.s.type) #ifdef ERTS_SMP #define ERTS_SCHEDULER_IS_DIRTY(ESDP) \ - ((ESDP)->dirty_no != 0) + ((ESDP)->dirty_no.s.num != 0) +#define ERTS_SCHEDULER_IS_DIRTY_CPU(ESDP) \ + ((ESDP)->dirty_no.s.type == 0) +#define ERTS_SCHEDULER_IS_DIRTY_IO(ESDP) \ + ((ESDP)->dirty_no.s.type == 1) #else #define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0 +#define ERTS_SCHEDULER_IS_DIRTY_CPU(ESDP) 0 +#define ERTS_SCHEDULER_IS_DIRTY_IO(ESDP) 0 #endif #else #define ERTS_RUNQ_IX_IS_DIRTY(IX) 0 #define ERTS_SCHEDULER_IS_DIRTY(ESDP) 0 +#define ERTS_SCHEDULER_IS_DIRTY_CPU(ESDP) 0 +#define ERTS_SCHEDULER_IS_DIRTY_IO(ESDP) 0 #endif void erts_pre_init_process(void); void erts_late_init_process(void); void erts_early_init_scheduling(int); -void erts_init_scheduling(int, int); +void erts_init_scheduling(int, int +#ifdef ERTS_DIRTY_SCHEDULERS + , int, int, int +#endif + ); int erts_set_gc_state(Process *c_p, int enable); Eterm erts_sched_wall_time_request(Process *c_p, int set, int enable); @@ -1526,7 +1558,11 @@ ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, Sint new_no, - Sint *old_no); + Sint *old_no +#ifdef ERTS_DIRTY_SCHEDULERS + , int dirty_only +#endif + ); ErtsSchedSuspendResult erts_block_multi_scheduling(Process *, ErtsProcLocks, int, int); int erts_is_multi_scheduling_blocked(void); diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index 6a43e2b0e7..3906471f87 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -54,6 +54,7 @@ sbt_cmd/1, scheduler_threads/1, scheduler_suspend/1, + dirty_scheduler_threads/1, reader_groups/1]). -define(DEFAULT_TIMEOUT, ?t:minutes(15)). @@ -68,6 +69,7 @@ all() -> equal_and_high_with_part_time_max, equal_with_high, equal_with_high_max, bound_process, {group, scheduler_bind}, scheduler_threads, scheduler_suspend, + dirty_scheduler_threads, reader_groups]. groups() -> @@ -1092,6 +1094,55 @@ scheduler_threads(Config) when is_list(Config) -> end, ok. +dirty_scheduler_threads(Config) when is_list(Config) -> + SmpSupport = erlang:system_info(smp_support), + try + erlang:system_info(dirty_cpu_schedulers), + dirty_scheduler_threads_test(Config, SmpSupport) + catch + error:badarg -> + {skipped, "No dirty scheduler support"} + end. + +dirty_scheduler_threads_test(Config, SmpSupport) -> + {Sched, SchedOnln, _} = get_dsstate(Config, ""), + {HalfSched, HalfSchedOnln} = case SmpSupport of + false -> {1,1}; + true -> + {Sched div 2, + SchedOnln div 2} + end, + Cmd1 = "+SDcpu "++integer_to_list(HalfSched)++":"++ + integer_to_list(HalfSchedOnln), + {HalfSched, HalfSchedOnln, _} = get_dsstate(Config, Cmd1), + {HalfSched, HalfSchedOnln, _} = get_dsstate(Config, "+SDPcpu 50:50"), + IOSched = 20, + {_, _, IOSched} = get_dsstate(Config, "+SDio "++integer_to_list(IOSched)), + {ok, Node} = start_node(Config, ""), + [ok] = mcall(Node, [fun() -> dirty_schedulers_online_test() end]), + ok. + +dirty_schedulers_online_test() -> + dirty_schedulers_online_test(erlang:system_info(smp_support)). +dirty_schedulers_online_test(false) -> ok; +dirty_schedulers_online_test(true) -> + dirty_schedulers_online_smp_test(erlang:system_info(schedulers_online)). +dirty_schedulers_online_smp_test(SchedOnln) when SchedOnln < 4 -> ok; +dirty_schedulers_online_smp_test(SchedOnln) -> + DirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), + SchedOnln = DirtyCPUSchedOnln, + HalfSchedOnln = SchedOnln div 2, + SchedOnln = erlang:system_flag(schedulers_online, HalfSchedOnln), + HalfDirtyCPUSchedOnln = DirtyCPUSchedOnln div 2, + HalfDirtyCPUSchedOnln = erlang:system_flag(schedulers_online, SchedOnln), + DirtyCPUSchedOnln = erlang:system_flag(dirty_cpu_schedulers_online, + HalfDirtyCPUSchedOnln), + HalfDirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), + QrtrDirtyCPUSchedOnln = HalfDirtyCPUSchedOnln div 2, + SchedOnln = erlang:system_flag(schedulers_online, HalfSchedOnln), + QrtrDirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), + ok. + get_sstate(Config, Cmd) -> {ok, Node} = start_node(Config, Cmd), [SState] = mcall(Node, [fun () -> @@ -1100,6 +1151,20 @@ get_sstate(Config, Cmd) -> stop_node(Node), SState. +get_dsstate(Config, Cmd) -> + {ok, Node} = start_node(Config, Cmd), + [DSCPU] = mcall(Node, [fun () -> + erlang:system_info(dirty_cpu_schedulers) + end]), + [DSCPUOnln] = mcall(Node, [fun () -> + erlang:system_info(dirty_cpu_schedulers_online) + end]), + [DSIO] = mcall(Node, [fun () -> + erlang:system_info(dirty_io_schedulers) + end]), + stop_node(Node), + {DSCPU, DSCPUOnln, DSIO}. + scheduler_suspend(Config) when is_list(Config) -> ?line Dog = ?t:timetrap(?t:minutes(5)), ?line lists:foreach(fun (S) -> scheduler_suspend_test(Config, S) end, @@ -1171,16 +1236,40 @@ sst2_loop(N) -> erlang:system_flag(multi_scheduling, unblock), sst2_loop(N-1). -sst3_loop(_S, 0) -> - ok; sst3_loop(S, N) -> + try erlang:system_info(dirty_cpu_schedulers) of + DS -> + sst3_loop_with_dirty_schedulers(S, DS, N) + catch + error:badarg -> + sst3_loop_normal_schedulers_only(S, N) + end. + +sst3_loop_normal_schedulers_only(_S, 0) -> + ok; +sst3_loop_normal_schedulers_only(S, N) -> + erlang:system_flag(schedulers_online, (S div 2)+1), + erlang:system_flag(schedulers_online, 1), + erlang:system_flag(schedulers_online, (S div 2)+1), + erlang:system_flag(schedulers_online, S), + erlang:system_flag(schedulers_online, 1), + erlang:system_flag(schedulers_online, S), + sst3_loop_normal_schedulers_only(S, N-1). + +sst3_loop_with_dirty_schedulers(_S, _DS, 0) -> + ok; +sst3_loop_with_dirty_schedulers(S, DS, N) -> erlang:system_flag(schedulers_online, (S div 2)+1), + erlang:system_flag(dirty_cpu_schedulers_online, (DS div 2)+1), erlang:system_flag(schedulers_online, 1), erlang:system_flag(schedulers_online, (S div 2)+1), + erlang:system_flag(dirty_cpu_schedulers_online, 1), erlang:system_flag(schedulers_online, S), + erlang:system_flag(dirty_cpu_schedulers_online, DS), erlang:system_flag(schedulers_online, 1), erlang:system_flag(schedulers_online, S), - sst3_loop(S, N-1). + erlang:system_flag(dirty_cpu_schedulers_online, DS), + sst3_loop_with_dirty_schedulers(S, DS, N-1). reader_groups(Config) when is_list(Config) -> %% White box testing. These results are correct, but other results diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c index 2cf7280ebc..709c6f02d1 100644 --- a/erts/etc/common/erlexec.c +++ b/erts/etc/common/erlexec.c @@ -833,9 +833,13 @@ int main(int argc, char **argv) #ifdef ERTS_DIRTY_SCHEDULERS else if (argv[i][2] == 'D') { char* type = argv[i]+3; - if (strcmp(type, "cpu") != 0 && - strcmp(type, "Pcpu") != 0 && - strcmp(type, "io") != 0) + if (strncmp(type, "cpu", 3) != 0 && + strncmp(type, "Pcpu", 4) != 0 && + strncmp(type, "io", 2) != 0) + usage(argv[i]); + if ((argv[i][3] == 'c' && argv[i][6] != '\0') || + (argv[i][3] == 'P' && argv[i][7] != '\0') || + (argv[i][3] == 'i' && argv[i][5] != '\0')) goto the_default; } #endif |