From 207a569d4caf80b10a54eec2220ac672a3377f00 Mon Sep 17 00:00:00 2001 From: Rickard Green Date: Fri, 22 Jan 2016 17:48:49 +0100 Subject: Improved scheduler suspend functionality - The calling process is now suspended while synchronizing scheduler suspends via erlang:system_flag(schedulers_online, _) and erlang:system_flag(multi_scheduling, _), instead of blocking the scheduler thread in the BIF call waiting for the operation to synchronize. Besides releasing the scheduler for other work (or immediate suspend) it also makes it possible to abort the operation by killing the process. - erlang:system_flag(schedulers_online, _) now only wait for normal schedulers to complete before it returns. This since it may take a very long time before all dirty schedulers suspends. - erlang:system_flag(multi_scheduling, block_normal|unblock_normal) which only operate on normal schedulers has been introduced. This since there are use cases where suspend of dirty schedulers are not of interest (hipe loader). - erlang:system_flag(multi_scheduling, block) still blocks all dirty schedulers as well as all normal schedulers except one since it is hard to redefine what multi scheduling block means. - The three operations: - changing amount of schedulers online - blocking/unblocking normal multi scheduling - blocking/unblocking full multi scheduling can now be done in parallel. This is important since otherwise a full multi scheduling block would potentially delay the other operations for a very long time. --- erts/emulator/beam/atom.names | 6 + erts/emulator/beam/bif.c | 66 +- erts/emulator/beam/bif.h | 25 +- erts/emulator/beam/erl_bif_info.c | 172 ++- erts/emulator/beam/erl_process.c | 2162 +++++++++++++++----------------- erts/emulator/beam/erl_process.h | 26 +- erts/emulator/test/scheduler_SUITE.erl | 176 ++- 7 files changed, 1339 insertions(+), 1294 deletions(-) (limited to 'erts/emulator') diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index dca8b503bf..169b071cd7 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -125,7 +125,9 @@ atom binary_longest_suffix_trap atom binary_to_list_continue atom binary_to_term_trap atom block +atom block_normal atom blocked +atom blocked_normal atom bm atom bnot atom bor @@ -190,7 +192,9 @@ atom dexit atom depth atom dgroup_leader atom dictionary +atom dirty_cpu atom dirty_cpu_schedulers_online +atom dirty_io atom disable_trace atom disabled atom display_items @@ -305,6 +309,7 @@ atom index atom infinity atom info atom info_msg +atom init atom initial_call atom input atom internal @@ -612,6 +617,7 @@ atom use_stdio atom used atom utf8 atom unblock +atom unblock_normal atom uniq atom unless_suspending atom unloaded diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c index 6ab75496a0..37bb28c6f8 100644 --- a/erts/emulator/beam/bif.c +++ b/erts/emulator/beam/bif.c @@ -4197,22 +4197,33 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) Sint n; if (BIF_ARG_1 == am_multi_scheduling) { - if (BIF_ARG_2 == am_block || BIF_ARG_2 == am_unblock) { + if (BIF_ARG_2 == am_block || BIF_ARG_2 == am_unblock + || BIF_ARG_2 == am_block_normal || BIF_ARG_2 == am_unblock_normal) { #ifndef ERTS_SMP BIF_RET(am_disabled); #else + int block = (BIF_ARG_2 == am_block + || BIF_ARG_2 == am_block_normal); + int normal = (BIF_ARG_2 == am_block_normal + || BIF_ARG_2 == am_unblock_normal); if (erts_no_schedulers == 1) BIF_RET(am_disabled); else { switch (erts_block_multi_scheduling(BIF_P, ERTS_PROC_LOCK_MAIN, - BIF_ARG_2 == am_block, + block, + normal, 0)) { case ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED: BIF_RET(am_blocked); + case ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED: + BIF_RET(am_blocked_normal); case ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED: ERTS_BIF_YIELD_RETURN_X(BIF_P, am_blocked, am_multi_scheduling); + case ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED: + ERTS_BIF_YIELD_RETURN_X(BIF_P, am_blocked_normal, + am_multi_scheduling); case ERTS_SCHDLR_SSPND_DONE: BIF_RET(am_enabled); case ERTS_SCHDLR_SSPND_YIELD_RESTART: @@ -4245,11 +4256,7 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2) switch (erts_set_schedulers_online(BIF_P, ERTS_PROC_LOCK_MAIN, signed_val(BIF_ARG_2), - &old_no -#ifdef ERTS_DIRTY_SCHEDULERS - , 0 -#endif - )) { + &old_no, 0)) { case ERTS_SCHDLR_SSPND_DONE: BIF_RET(make_small(old_no)); case ERTS_SCHDLR_SSPND_YIELD_RESTART: @@ -4619,29 +4626,27 @@ BIF_RETTYPE erts_internal_cmp_term_2(BIF_ALIST_2) { /* * Processes doing yield on return in a bif ends up in bif_return_trap(). */ -static BIF_RETTYPE bif_return_trap( -#ifdef DEBUG - BIF_ALIST_2 -#else - BIF_ALIST_1 -#endif - ) +static BIF_RETTYPE bif_return_trap(BIF_ALIST_2) { -#ifdef DEBUG + Eterm res = BIF_ARG_1; + switch (BIF_ARG_2) { - case am_multi_scheduling: #ifdef ERTS_SMP - erts_dbg_multi_scheduling_return_trap(BIF_P, BIF_ARG_1); -#endif - break; - case am_schedulers_online: + case am_multi_scheduling: { + int msb = erts_is_multi_scheduling_blocked(); + if (msb > 0) + res = am_blocked; + else if (msb < 0) + res = am_blocked_normal; + else + ERTS_INTERNAL_ERROR("Unexpected multi scheduling block state"); break; + } +#endif default: break; } -#endif - - BIF_RET(BIF_ARG_1); + BIF_RET(res); } /* @@ -4746,17 +4751,12 @@ void erts_init_bif(void) erts_smp_atomic_init_nob(&erts_dead_ports_ptr, (erts_aint_t) NULL); /* - * bif_return_trap/1 is a hidden BIF that bifs that need to - * yield the calling process traps to. The only thing it does: - * return the value passed as argument. + * bif_return_trap/2 is a hidden BIF that bifs that need to + * yield the calling process traps to. */ - erts_init_trap_export(&bif_return_trap_export, am_erlang, am_bif_return_trap, -#ifdef DEBUG - 2 -#else - 1 -#endif - , &bif_return_trap); + erts_init_trap_export(&bif_return_trap_export, + am_erlang, am_bif_return_trap, 2, + &bif_return_trap); erts_await_result = erts_export_put(am_erts_internal, am_await_result, diff --git a/erts/emulator/beam/bif.h b/erts/emulator/beam/bif.h index a62eddf36b..46af552b39 100644 --- a/erts/emulator/beam/bif.h +++ b/erts/emulator/beam/bif.h @@ -338,37 +338,20 @@ do { \ } while(0) extern Export bif_return_trap_export; -#ifdef DEBUG -#define ERTS_BIF_PREP_YIELD_RETURN_X(RET, P, VAL, DEBUG_VAL) \ +#define ERTS_BIF_PREP_YIELD_RETURN_X(RET, P, VAL, OP) \ do { \ ERTS_VBUMP_ALL_REDS(P); \ - ERTS_BIF_PREP_TRAP2(RET, &bif_return_trap_export, (P), (VAL), \ - (DEBUG_VAL)); \ + ERTS_BIF_PREP_TRAP2(RET, &bif_return_trap_export, (P), (VAL), (OP));\ } while (0) -#else -#define ERTS_BIF_PREP_YIELD_RETURN_X(RET, P, VAL, DEBUG_VAL) \ -do { \ - ERTS_VBUMP_ALL_REDS(P); \ - ERTS_BIF_PREP_TRAP1(RET, &bif_return_trap_export, (P), (VAL)); \ -} while (0) -#endif #define ERTS_BIF_PREP_YIELD_RETURN(RET, P, VAL) \ ERTS_BIF_PREP_YIELD_RETURN_X(RET, (P), (VAL), am_undefined) -#ifdef DEBUG -#define ERTS_BIF_YIELD_RETURN_X(P, VAL, DEBUG_VAL) \ +#define ERTS_BIF_YIELD_RETURN_X(P, VAL, OP) \ do { \ ERTS_VBUMP_ALL_REDS(P); \ - BIF_TRAP2(&bif_return_trap_export, (P), (VAL), (DEBUG_VAL)); \ + BIF_TRAP2(&bif_return_trap_export, (P), (VAL), (OP)); \ } while (0) -#else -#define ERTS_BIF_YIELD_RETURN_X(P, VAL, DEBUG_VAL) \ -do { \ - ERTS_VBUMP_ALL_REDS(P); \ - BIF_TRAP1(&bif_return_trap_export, (P), (VAL)); \ -} while (0) -#endif #define ERTS_BIF_RETURN_YIELD(P) ERTS_VBUMP_ALL_REDS((P)) diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 39eb257c2c..8c748c9bf7 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -320,13 +320,11 @@ erts_print_system_version(int to, void *arg, Process *c_p) char *ov = otp_version; #ifdef ERTS_SMP Uint total, online, active; -#ifdef ERTS_DIRTY_SCHEDULERS Uint dirty_cpu, dirty_cpu_onln, dirty_io; - (void) erts_schedulers_state(&total, &online, &active, &dirty_cpu, &dirty_cpu_onln, &dirty_io, 0); -#else - (void) erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 0); -#endif + erts_schedulers_state(&total, &online, &active, + &dirty_cpu, &dirty_cpu_onln, NULL, + &dirty_io, NULL); #endif for (i = 0; i < sizeof(otp_version)-4; i++) { if (ov[i] == '-' && ov[i+1] == 'r' && ov[i+2] == 'c') @@ -2044,12 +2042,18 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) #ifndef ERTS_SMP BIF_RET(am_disabled); #else +#ifndef ERTS_DIRTY_SCHEDULERS if (erts_no_schedulers == 1) BIF_RET(am_disabled); - else { - BIF_RET(erts_is_multi_scheduling_blocked() - ? am_blocked - : am_enabled); + else +#endif + { + int msb = erts_is_multi_scheduling_blocked(); + BIF_RET(!msb + ? am_enabled + : (msb > 0 + ? am_blocked + : am_blocked_normal)); } #endif } else if (BIF_ARG_1 == am_build_type) { @@ -2518,77 +2522,120 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) res = TUPLE3(hp, make_small(1), make_small(1), make_small(1)); BIF_RET(res); #else + Eterm *hp; Uint total, online, active; - switch (erts_schedulers_state(&total, - &online, - &active, - NULL, - NULL, - NULL, - 1)) { - case ERTS_SCHDLR_SSPND_DONE: { - Eterm *hp = HAlloc(BIF_P, 4); - res = TUPLE3(hp, - make_small(total), - make_small(online), - make_small(active)); - BIF_RET(res); + erts_schedulers_state(&total, &online, &active, + NULL, NULL, NULL, NULL, NULL); + hp = HAlloc(BIF_P, 4); + res = TUPLE3(hp, + make_small(total), + make_small(online), + make_small(active)); + BIF_RET(res); +#endif + } else if (ERTS_IS_ATOM_STR("schedulers_state", BIF_ARG_1)) { +#ifndef ERTS_SMP + Eterm *hp = HAlloc(BIF_P, 4); + res = TUPLE3(hp, make_small(1), make_small(1), make_small(1)); + BIF_RET(res); +#else + Eterm *hp; + Uint total, online, active; + erts_schedulers_state(&total, &online, &active, + NULL, NULL, NULL, NULL, NULL); + hp = HAlloc(BIF_P, 4); + res = TUPLE3(hp, + make_small(total), + make_small(online), + make_small(active)); + BIF_RET(res); +#endif + } else if (ERTS_IS_ATOM_STR("all_schedulers_state", BIF_ARG_1)) { +#ifndef ERTS_SMP + Eterm *hp = HAlloc(BIF_P, 2+5); + res = CONS(hp+5, + TUPLE4(hp, + am_normal, + make_small(1), + make_small(1), + make_small(1)), + NIL); + BIF_RET(res); +#else + Eterm *hp, tpl; + Uint sz, total, online, active, + dirty_cpu_total, dirty_cpu_online, dirty_cpu_active, + dirty_io_total, dirty_io_active; + erts_schedulers_state(&total, &online, &active, + &dirty_cpu_total, &dirty_cpu_online, &dirty_cpu_active, + &dirty_io_total, &dirty_io_active); + + sz = 2+5; + if (dirty_cpu_total) + sz += 2+5; + if (dirty_io_total) + sz += 2+5; + + hp = HAlloc(BIF_P, sz); + + res = NIL; + if (dirty_io_total) { + tpl = TUPLE4(hp, + am_dirty_io, + make_small(dirty_io_total), + make_small(dirty_io_total), + make_small(dirty_io_active)); + hp += 5; + res = CONS(hp, tpl, res); + hp += 2; } - case ERTS_SCHDLR_SSPND_YIELD_RESTART: - ERTS_VBUMP_ALL_REDS(BIF_P); - BIF_TRAP1(bif_export[BIF_system_info_1], - BIF_P, BIF_ARG_1); - default: - ASSERT(0); - BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); + if (dirty_cpu_total) { + tpl = TUPLE4(hp, + am_dirty_cpu, + make_small(dirty_cpu_total), + make_small(dirty_cpu_online), + make_small(dirty_cpu_active)); + hp += 5; + res = CONS(hp, tpl, res); + hp += 2; } + tpl = TUPLE4(hp, + am_normal, + make_small(total), + make_small(online), + make_small(active)); + hp += 5; + res = CONS(hp, tpl, res); + BIF_RET(res); #endif } else if (ERTS_IS_ATOM_STR("schedulers_online", BIF_ARG_1)) { #ifndef ERTS_SMP BIF_RET(make_small(1)); #else - Uint total, online, active; - switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) { - case ERTS_SCHDLR_SSPND_DONE: - BIF_RET(make_small(online)); - case ERTS_SCHDLR_SSPND_YIELD_RESTART: - ERTS_VBUMP_ALL_REDS(BIF_P); - BIF_TRAP1(bif_export[BIF_system_info_1], - BIF_P, BIF_ARG_1); - default: - ASSERT(0); - BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); - } + Uint online; + erts_schedulers_state(NULL, &online, NULL, NULL, NULL, NULL, NULL, NULL); + BIF_RET(make_small(online)); #endif } else if (ERTS_IS_ATOM_STR("schedulers_active", BIF_ARG_1)) { #ifndef ERTS_SMP BIF_RET(make_small(1)); #else - Uint total, online, active; - switch (erts_schedulers_state(&total, &online, &active, NULL, NULL, NULL, 1)) { - case ERTS_SCHDLR_SSPND_DONE: - BIF_RET(make_small(active)); - case ERTS_SCHDLR_SSPND_YIELD_RESTART: - ERTS_VBUMP_ALL_REDS(BIF_P); - BIF_TRAP1(bif_export[BIF_system_info_1], - BIF_P, BIF_ARG_1); - default: - ASSERT(0); - BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); - } + Uint active; + erts_schedulers_state(NULL, NULL, &active, NULL, NULL, NULL, NULL, NULL); + BIF_RET(make_small(active)); #endif #if defined(ERTS_SMP) && defined(ERTS_DIRTY_SCHEDULERS) } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers", BIF_ARG_1)) { Uint dirty_cpu; - erts_schedulers_state(NULL, NULL, NULL, &dirty_cpu, NULL, NULL, 1); + erts_schedulers_state(NULL, NULL, NULL, &dirty_cpu, NULL, NULL, NULL, NULL); BIF_RET(make_small(dirty_cpu)); } else if (ERTS_IS_ATOM_STR("dirty_cpu_schedulers_online", BIF_ARG_1)) { Uint dirty_cpu_onln; - erts_schedulers_state(NULL, NULL, NULL, NULL, &dirty_cpu_onln, NULL, 1); + erts_schedulers_state(NULL, NULL, NULL, NULL, &dirty_cpu_onln, NULL, NULL, NULL); BIF_RET(make_small(dirty_cpu_onln)); } else if (ERTS_IS_ATOM_STR("dirty_io_schedulers", BIF_ARG_1)) { Uint dirty_io; - erts_schedulers_state(NULL, NULL, NULL, NULL, NULL, &dirty_io, 1); + erts_schedulers_state(NULL, NULL, NULL, NULL, NULL, NULL, &dirty_io, NULL); BIF_RET(make_small(dirty_io)); #endif } else if (ERTS_IS_ATOM_STR("run_queues", BIF_ARG_1)) { @@ -2642,7 +2689,16 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1) if (erts_no_schedulers == 1) BIF_RET(NIL); else - BIF_RET(erts_multi_scheduling_blockers(BIF_P)); + BIF_RET(erts_multi_scheduling_blockers(BIF_P, 0)); +#endif + } else if (ERTS_IS_ATOM_STR("normal_multi_scheduling_blockers", BIF_ARG_1)) { +#ifndef ERTS_SMP + BIF_RET(NIL); +#else + if (erts_no_schedulers == 1) + BIF_RET(NIL); + else + BIF_RET(erts_multi_scheduling_blockers(BIF_P, 1)); #endif } else if (ERTS_IS_ATOM_STR("modified_timing_level", BIF_ARG_1)) { BIF_RET(ERTS_USE_MODIFIED_TIMING() diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index c69cbc023c..41e471a96a 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -153,10 +153,8 @@ int ERTS_WRITE_UNLIKELY(erts_eager_check_io) = 1; int ERTS_WRITE_UNLIKELY(erts_sched_compact_load); int ERTS_WRITE_UNLIKELY(erts_sched_balance_util) = 0; Uint ERTS_WRITE_UNLIKELY(erts_no_schedulers); -#ifdef ERTS_DIRTY_SCHEDULERS -Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers); -Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers); -#endif +Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_cpu_schedulers) = 0; +Uint ERTS_WRITE_UNLIKELY(erts_no_dirty_io_schedulers) = 0; static char *erts_aux_work_flag_descr[ERTS_SSI_AUX_WORK_NO_FLAGS] = {0}; int erts_aux_work_no_flags = ERTS_SSI_AUX_WORK_NO_FLAGS; @@ -188,84 +186,176 @@ int erts_disable_proc_not_running_opt; static ErtsAuxWorkData *aux_thread_aux_work_data; -#define ERTS_SCHDLR_SSPND_CHNG_WAITER (((erts_aint32_t) 1) << 0) +#define ERTS_SCHDLR_SSPND_CHNG_NMSB (((erts_aint32_t) 1) << 0) #define ERTS_SCHDLR_SSPND_CHNG_MSB (((erts_aint32_t) 1) << 1) #define ERTS_SCHDLR_SSPND_CHNG_ONLN (((erts_aint32_t) 1) << 2) +#define ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN (((erts_aint32_t) 1) << 3) -#ifndef DEBUG +typedef enum { + ERTS_SCHED_NORMAL, + ERTS_SCHED_DIRTY_CPU, + ERTS_SCHED_DIRTY_IO +} ErtsSchedType; -#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.changing, (VAL)) +typedef struct { + int ongoing; + ErtsProcList *blckrs; + ErtsProcList *chngq; +} ErtsMultiSchedulingBlock; -#ifdef ERTS_DIRTY_SCHEDULERS -#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_cpu_changing, (VAL)) -#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ - erts_smp_atomic32_set_nob(&schdlr_sspnd.dirty_io_changing, (VAL)) -#endif +static struct { + erts_smp_mtx_t mtx; + Uint32 online; + Uint32 curr_online; + Uint32 active; + erts_smp_atomic32_t changing; + ErtsProcList *chngq; + Eterm changer; + ErtsMultiSchedulingBlock nmsb; /* Normal multi Scheduling Block */ + ErtsMultiSchedulingBlock msb; /* Multi Scheduling Block */ +} schdlr_sspnd; -#else +#define ERTS_SCHDLR_SSPND_S_BITS 10 +#define ERTS_SCHDLR_SSPND_DCS_BITS 11 +#define ERTS_SCHDLR_SSPND_DIS_BITS 11 -#define ERTS_SCHDLR_SSPND_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) +#define ERTS_SCHDLR_SSPND_S_MASK ((1 << ERTS_SCHDLR_SSPND_S_BITS)-1) +#define ERTS_SCHDLR_SSPND_DCS_MASK ((1 << ERTS_SCHDLR_SSPND_DCS_BITS)-1) +#define ERTS_SCHDLR_SSPND_DIS_MASK ((1 << ERTS_SCHDLR_SSPND_DIS_BITS)-1) -#ifdef ERTS_DIRTY_SCHEDULERS -#define ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_cpu_changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) -#define ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(VAL, OLD_VAL) \ -do { \ - erts_aint32_t old_val__; \ - old_val__ = erts_smp_atomic32_xchg_nob(&schdlr_sspnd.dirty_io_changing, \ - (VAL)); \ - ASSERT(old_val__ == (OLD_VAL)); \ -} while (0) -#endif +#define ERTS_SCHDLR_SSPND_S_SHIFT 0 +#define ERTS_SCHDLR_SSPND_DCS_SHIFT (ERTS_SCHDLR_SSPND_S_SHIFT \ + + ERTS_SCHDLR_SSPND_S_BITS) +#define ERTS_SCHDLR_SSPND_DIS_SHIFT (ERTS_SCHDLR_SSPND_DCS_SHIFT \ + + ERTS_SCHDLR_SSPND_DCS_BITS) +#if (ERTS_SCHDLR_SSPND_S_BITS \ + + ERTS_SCHDLR_SSPND_DCS_BITS \ + + ERTS_SCHDLR_SSPND_DIS_BITS) > 32 +# error Wont fit in Uint32 #endif - -static struct { - erts_smp_mtx_t mtx; - erts_smp_cnd_t cnd; - int online; - int curr_online; - int wait_curr_online; -#ifdef ERTS_DIRTY_SCHEDULERS - int dirty_cpu_online; - int dirty_cpu_curr_online; - int dirty_cpu_wait_curr_online; - int dirty_io_online; - int dirty_io_curr_online; - int dirty_io_wait_curr_online; +#if (ERTS_MAX_NO_OF_SCHEDULERS-1) > ERTS_SCHDLR_SSPND_S_MASK +# error Max no schedulers wont fit in its bit-field #endif - erts_smp_atomic32_t changing; - erts_smp_atomic32_t active; -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_atomic32_t dirty_cpu_changing; - erts_smp_atomic32_t dirty_cpu_active; - erts_smp_atomic32_t dirty_io_changing; - erts_smp_atomic32_t dirty_io_active; +#if ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS > ERTS_SCHDLR_SSPND_DCS_MASK +# error Max no dirty cpu schedulers wont fit in its bit-field #endif - struct { - int ongoing; - long wait_active; -#ifdef ERTS_DIRTY_SCHEDULERS - long dirty_cpu_wait_active; - long dirty_io_wait_active; +#if ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS > ERTS_SCHDLR_SSPND_DIS_MASK +# error Max no dirty io schedulers wont fit in its bit-field #endif - ErtsProcList *procs; - } msb; /* Multi Scheduling Block */ -} schdlr_sspnd; + +#define ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(S, DCS, DIS) \ + ((((Uint32) (((S) & ERTS_SCHDLR_SSPND_S_MASK))-1) \ + << ERTS_SCHDLR_SSPND_S_SHIFT) \ + | ((((Uint32) ((DCS) & ERTS_SCHDLR_SSPND_DCS_MASK)) \ + << ERTS_SCHDLR_SSPND_DCS_SHIFT)) \ + | ((((Uint32) ((DIS) & ERTS_SCHDLR_SSPND_DIS_MASK)) \ + << ERTS_SCHDLR_SSPND_DIS_SHIFT))) + +static void init_scheduler_suspend(void); + +static ERTS_INLINE Uint32 +schdlr_sspnd_get_nscheds(Uint32 *valp, ErtsSchedType type) +{ + Uint32 res = (Uint32) (*valp); + switch (type) { + case ERTS_SCHED_NORMAL: + res >>= ERTS_SCHDLR_SSPND_S_SHIFT; + res &= (Uint32) ERTS_SCHDLR_SSPND_S_MASK; + res++; + break; + case ERTS_SCHED_DIRTY_CPU: + res >>= ERTS_SCHDLR_SSPND_DCS_SHIFT; + res &= (Uint32) ERTS_SCHDLR_SSPND_DCS_MASK; + break; + case ERTS_SCHED_DIRTY_IO: + res >>= ERTS_SCHDLR_SSPND_DIS_SHIFT; + res &= (Uint32) ERTS_SCHDLR_SSPND_DIS_MASK; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + return 0; + } + + return res; +} + +static ERTS_INLINE void +schdlr_sspnd_dec_nscheds(Uint32 *valp, ErtsSchedType type) +{ + ASSERT(schdlr_sspnd_get_nscheds(valp, type) > 0); + + switch (type) { + case ERTS_SCHED_NORMAL: + *valp -= ((Uint32) 1) << ERTS_SCHDLR_SSPND_S_SHIFT; + break; + case ERTS_SCHED_DIRTY_CPU: + *valp -= ((Uint32) 1) << ERTS_SCHDLR_SSPND_DCS_SHIFT; + break; + case ERTS_SCHED_DIRTY_IO: + *valp -= ((Uint32) 1) << ERTS_SCHDLR_SSPND_DIS_SHIFT; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } +} + +static ERTS_INLINE void +schdlr_sspnd_inc_nscheds(Uint32 *valp, ErtsSchedType type) +{ + switch (type) { + case ERTS_SCHED_NORMAL: + ASSERT(schdlr_sspnd_get_nscheds(valp, type) + < ERTS_MAX_NO_OF_SCHEDULERS-1); + *valp += ((Uint32) 1) << ERTS_SCHDLR_SSPND_S_SHIFT; + break; + case ERTS_SCHED_DIRTY_CPU: + ASSERT(schdlr_sspnd_get_nscheds(valp, type) + < ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS); + *valp += ((Uint32) 1) << ERTS_SCHDLR_SSPND_DCS_SHIFT; + break; + case ERTS_SCHED_DIRTY_IO: + ASSERT(schdlr_sspnd_get_nscheds(valp, type) + < ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS); + *valp += ((Uint32) 1) << ERTS_SCHDLR_SSPND_DIS_SHIFT; + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } +} + +static ERTS_INLINE void +schdlr_sspnd_set_nscheds(Uint32 *valp, ErtsSchedType type, Uint32 no) +{ + Uint32 val = *valp; + + switch (type) { + case ERTS_SCHED_NORMAL: + ASSERT(no > 0); + val &= ~(((Uint32) ERTS_SCHDLR_SSPND_S_MASK) + << ERTS_SCHDLR_SSPND_S_SHIFT); + val |= (((no-1) & ((Uint32) ERTS_SCHDLR_SSPND_S_MASK)) + << ERTS_SCHDLR_SSPND_S_SHIFT); + break; + case ERTS_SCHED_DIRTY_CPU: + val &= ~(((Uint32) ERTS_SCHDLR_SSPND_DCS_MASK) + << ERTS_SCHDLR_SSPND_DCS_SHIFT); + val |= ((no & ((Uint32) ERTS_SCHDLR_SSPND_DCS_MASK)) + << ERTS_SCHDLR_SSPND_DCS_SHIFT); + break; + case ERTS_SCHED_DIRTY_IO: + val &= ~(((Uint32) ERTS_SCHDLR_SSPND_DIS_MASK) + << ERTS_SCHDLR_SSPND_DIS_SHIFT); + val |= ((no & ((Uint32) ERTS_SCHDLR_SSPND_DIS_MASK)) + << ERTS_SCHDLR_SSPND_DIS_SHIFT); + break; + default: + ERTS_INTERNAL_ERROR("Invalid scheduler type"); + } + + *valp = val; +} static struct { erts_smp_mtx_t update_mtx; @@ -426,8 +516,10 @@ do { \ do { \ ErtsRunQueue *RQVAR; \ int ix__; \ + int online__ = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, \ + ERTS_SCHED_NORMAL); \ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&schdlr_sspnd.mtx)); \ - for (ix__ = 0; ix__ < schdlr_sspnd.online; ix__++) { \ + for (ix__ = 0; ix__ < online__; ix__++) { \ RQVAR = ERTS_RUNQ_IX(ix__); \ erts_smp_runq_lock(RQVAR); \ { DO; } \ @@ -1188,12 +1280,27 @@ proclist_create(Process *p) return plp; } +static ERTS_INLINE ErtsProcList * +proclist_copy(ErtsProcList *plp0) +{ + ErtsProcList *plp1 = proclist_alloc(); + plp1->pid = plp0->pid; + plp1->started_interval = plp0->started_interval; + return plp1; +} + static ERTS_INLINE void proclist_destroy(ErtsProcList *plp) { proclist_free(plp); } +ErtsProcList * +erts_proclist_copy(ErtsProcList *plp) +{ + return proclist_copy(plp); +} + ErtsProcList * erts_proclist_create(Process *p) { @@ -2632,13 +2739,6 @@ sched_active(Uint no, ErtsRunQueue *rq) profile_scheduler(make_small(no), am_active); } -static int ERTS_INLINE -ongoing_multi_scheduling_block(void) -{ - ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&schdlr_sspnd.mtx)); - return schdlr_sspnd.msb.ongoing; -} - static ERTS_INLINE void empty_runq_aux(ErtsRunQueue *rq, Uint32 old_flags) { @@ -3025,8 +3125,10 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) timeout_time = erts_check_next_timeout_time(esdp); current_time = erts_get_monotonic_time(esdp); do_timeout = (current_time >= timeout_time); - } else + } else { + current_time = 0; timeout_time = ERTS_MONOTONIC_TIME_MAX; + } if (do_timeout) { if (!thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); @@ -3836,24 +3938,33 @@ static ERTS_INLINE void resume_run_queue(ErtsRunQueue *rq) { int pix; + Uint32 oflgs; erts_smp_runq_lock(rq); - (void) ERTS_RUNQ_FLGS_READ_BSET(rq, - (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK - | ERTS_RUNQ_FLG_SUSPENDED), - (ERTS_RUNQ_FLG_OUT_OF_WORK - | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); + oflgs = ERTS_RUNQ_FLGS_READ_BSET(rq, + (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK + | ERTS_RUNQ_FLG_SUSPENDED), + (ERTS_RUNQ_FLG_OUT_OF_WORK + | ERTS_RUNQ_FLG_HALFTIME_OUT_OF_WORK)); + + if (oflgs & ERTS_RUNQ_FLG_SUSPENDED) { + erts_aint32_t len; + + rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { + len = erts_smp_atomic32_read_dirty(&rq->procs.prio_info[pix].len); + rq->procs.prio_info[pix].max_len = len; + rq->procs.prio_info[pix].reds = 0; + } + len = erts_smp_atomic32_read_dirty(&rq->ports.info.len); + rq->ports.info.max_len = len; + rq->ports.info.reds = 0; + len = erts_smp_atomic32_read_dirty(&rq->len); + rq->max_len = len; - rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; - for (pix = 0; pix < ERTS_NO_PROC_PRIO_LEVELS; pix++) { - rq->procs.prio_info[pix].max_len = 0; - rq->procs.prio_info[pix].reds = 0; } - rq->ports.info.max_len = 0; - rq->ports.info.reds = 0; - rq->max_len = 0; erts_smp_runq_unlock(rq); @@ -5632,6 +5743,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online char *daww_ptr; size_t daww_sz; size_t size_runqs; +#ifdef ERTS_SMP + erts_aint32_t set_schdlr_sspnd_change_flags; +#endif init_misc_op_list_alloc(); init_proc_sys_task_queues_alloc(); @@ -5869,25 +5983,6 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA, sizeof(ErtsAuxWorkData)); - erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd"); - erts_smp_cnd_init(&schdlr_sspnd.cnd); - - erts_smp_atomic32_init_nob(&schdlr_sspnd.changing, 0); - schdlr_sspnd.online = no_schedulers_online; - schdlr_sspnd.curr_online = no_schedulers; - schdlr_sspnd.msb.ongoing = 0; - erts_smp_atomic32_init_nob(&schdlr_sspnd.active, no_schedulers); -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_changing, 0); - schdlr_sspnd.dirty_cpu_online = no_dirty_cpu_schedulers_online; - schdlr_sspnd.dirty_cpu_curr_online = no_dirty_cpu_schedulers; - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_cpu_active, no_dirty_cpu_schedulers); - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_changing, 0); - schdlr_sspnd.dirty_io_online = no_dirty_io_schedulers; - schdlr_sspnd.dirty_io_curr_online = no_dirty_io_schedulers; - erts_smp_atomic32_init_nob(&schdlr_sspnd.dirty_io_active, no_dirty_io_schedulers); -#endif - schdlr_sspnd.msb.procs = NULL; init_no_runqs(no_schedulers_online, no_schedulers_online); balance_info.last_active_runqs = no_schedulers; erts_smp_mtx_init(&balance_info.update_mtx, "migration_info_update"); @@ -5902,32 +5997,66 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online init_migration_paths(); - if (no_schedulers_online < no_schedulers) { + init_scheduler_suspend(); + + set_schdlr_sspnd_change_flags = 0; + + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL, + no_schedulers_online); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_NORMAL, + no_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL, + no_schedulers); + + if (no_schedulers_online != no_schedulers) { + ASSERT(no_schedulers_online < no_schedulers); + set_schdlr_sspnd_change_flags |= ERTS_SCHDLR_SSPND_CHNG_ONLN; + schdlr_sspnd.changer = am_init; change_no_used_runqs(no_schedulers_online); for (ix = no_schedulers_online; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); } - schdlr_sspnd.wait_curr_online = no_schedulers_online; - schdlr_sspnd.curr_online *= 2; /* Boot strapping... */ - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); #ifdef ERTS_DIRTY_SCHEDULERS - schdlr_sspnd.dirty_cpu_wait_curr_online = no_dirty_cpu_schedulers_online; - schdlr_sspnd.dirty_cpu_curr_online *= 2; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = no_dirty_cpu_schedulers_online; ix < no_dirty_cpu_schedulers; ix++) { - ErtsSchedulerData* esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); - erts_smp_atomic32_read_bor_nob(&esdp->ssi->flags, ERTS_SSI_FLG_SUSPENDED); + + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers_online); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_CPU, + no_dirty_cpu_schedulers); + + if (no_dirty_cpu_schedulers_online != no_dirty_cpu_schedulers) { + ASSERT(no_dirty_cpu_schedulers_online < no_dirty_cpu_schedulers); + set_schdlr_sspnd_change_flags |= ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + for (ix = no_dirty_cpu_schedulers_online; ix < no_dirty_cpu_schedulers; ix++) { + ErtsSchedulerData* esdp = ERTS_DIRTY_CPU_SCHEDULER_IX(ix); + erts_smp_atomic32_read_bor_nob(&esdp->ssi->flags, ERTS_SSI_FLG_SUSPENDED); + } } - schdlr_sspnd.dirty_io_wait_curr_online = no_dirty_io_schedulers; - schdlr_sspnd.dirty_io_curr_online *= 2; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + schdlr_sspnd_set_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_IO, + no_dirty_io_schedulers); + #endif + if (set_schdlr_sspnd_change_flags) + erts_smp_atomic32_set_nob(&schdlr_sspnd.changing, + set_schdlr_sspnd_change_flags); + erts_smp_atomic32_init_nob(&doing_sys_schedule, 0); init_misc_aux_work(); @@ -5942,10 +6071,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online #endif } erts_no_schedulers = 1; -#ifdef ERTS_DIRTY_SCHEDULERS erts_no_dirty_cpu_schedulers = 0; erts_no_dirty_io_schedulers = 0; -#endif #endif erts_smp_atomic32_init_nob(&function_calls, 0); @@ -6705,19 +6832,6 @@ resume_process(Process *p, ErtsProcLocks locks) add2runq(enqueue, enq_prio, p, state, NULL); } -int -erts_get_max_no_executing_schedulers(void) -{ -#ifdef ERTS_SMP - if (erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - return (int) erts_no_schedulers; - ERTS_THR_MEMORY_BARRIER; - return (int) erts_smp_atomic32_read_nob(&schdlr_sspnd.active); -#else - return 1; -#endif -} - #ifdef ERTS_SMP static void @@ -6820,42 +6934,82 @@ sched_set_suspended_sleeptype(ErtsSchedulerSleepInfo *ssi) } } -#ifdef ERTS_DIRTY_SCHEDULERS +static void +init_scheduler_suspend(void) +{ + erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd"); + schdlr_sspnd.online = ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0); + schdlr_sspnd.curr_online = ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0); + schdlr_sspnd.active = ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0); + erts_smp_atomic32_init_nob(&schdlr_sspnd.changing, 0); + schdlr_sspnd.chngq = NULL; + schdlr_sspnd.changer = am_false; + schdlr_sspnd.nmsb.ongoing = 0; + schdlr_sspnd.nmsb.blckrs = NULL; + schdlr_sspnd.nmsb.chngq = NULL; + schdlr_sspnd.msb.ongoing = 0; + schdlr_sspnd.msb.blckrs = NULL; + schdlr_sspnd.msb.chngq = NULL; +} + +typedef struct { + struct { + Eterm chngr; + Eterm nxt; + } onln; + struct { + ErtsProcList *chngrs; + } msb; +} ErtsSchdlrSspndResume; + +static void +schdlr_sspnd_resume_proc(Eterm pid) +{ + Process *p = erts_pid2proc(NULL, 0, pid, ERTS_PROC_LOCK_STATUS); + if (p) { + resume_process(p, ERTS_PROC_LOCK_STATUS); + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } +} + +static ERTS_INLINE void +schdlr_sspnd_resume_procs(ErtsSchedType sched_type, + ErtsSchdlrSspndResume *resume) +{ + if (is_internal_pid(resume->onln.chngr)) { + schdlr_sspnd_resume_proc(resume->onln.chngr); + resume->onln.chngr = NIL; + } + if (is_internal_pid(resume->onln.nxt)) { + schdlr_sspnd_resume_proc(resume->onln.nxt); + resume->onln.nxt = NIL; + } + while (resume->msb.chngrs) { + ErtsProcList *plp = resume->msb.chngrs; + resume->msb.chngrs = plp->next; + schdlr_sspnd_resume_proc(plp->pid); + proclist_destroy(plp); + } +} static void suspend_scheduler(ErtsSchedulerData *esdp) { erts_aint32_t flgs; erts_aint32_t changing; -#ifdef ERTS_DIRTY_SCHEDULERS - long no = (long) (ERTS_SCHEDULER_IS_DIRTY(esdp) - ? ERTS_DIRTY_SCHEDULER_NO(esdp) - : esdp->no); -#else - long no = (long) esdp->no; -#endif + long no; ErtsSchedulerSleepInfo *ssi = esdp->ssi; - long active_schedulers; int curr_online = 1; - int wake = 0; + ErtsSchdlrSspndResume resume = {{NIL, NIL}, {NULL}}; erts_aint32_t aux_work; int thr_prgr_active = 1; ErtsStuckBoundProcesses sbp = {NULL, NULL}; - int* ss_onlinep; - int* ss_curr_onlinep; - int* ss_wait_curr_onlinep; - long* ss_wait_activep; - long ss_wait_active_target; - erts_smp_atomic32_t* ss_changingp; - erts_smp_atomic32_t* ss_activep; + ErtsSchedType sched_type; + erts_aint32_t online_flag; /* * Schedulers may be suspended in two different ways: * - A scheduler may be suspended since it is not online. - * All schedulers with scheduler ids greater than - * schdlr_sspnd.online are suspended; same for dirty - * schedulers and schdlr_sspnd.dirty_cpu_online and - * schdlr_sspnd.dirty_io_online. * - Multi scheduling is blocked. All schedulers except the * scheduler with scheduler id 1 are suspended, and all * dirty CPU and dirty I/O schedulers are suspended. @@ -6863,27 +7017,43 @@ suspend_scheduler(ErtsSchedulerData *esdp) * Regardless of why a scheduler is suspended, it ends up here. */ - ASSERT(ERTS_SCHEDULER_IS_DIRTY(esdp) || no != 1); - #ifdef ERTS_DIRTY_SCHEDULERS if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { + no = ERTS_DIRTY_SCHEDULER_NO(esdp); + if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(esdp->run_queue)) { + online_flag = ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + sched_type = ERTS_SCHED_DIRTY_CPU; + } + else { + online_flag = 0; + sched_type = ERTS_SCHED_DIRTY_IO; + } + } + else +#endif + { + online_flag = ERTS_SCHDLR_SSPND_CHNG_ONLN; + no = esdp->no; + sched_type = ERTS_SCHED_NORMAL; + } + + ASSERT(sched_type != ERTS_SCHED_NORMAL || no != 1); + + if (sched_type != ERTS_SCHED_NORMAL) { if (erts_smp_mtx_trylock(&schdlr_sspnd.mtx) == EBUSY) { erts_smp_runq_unlock(esdp->run_queue); erts_smp_mtx_lock(&schdlr_sspnd.mtx); erts_smp_runq_lock(esdp->run_queue); } - if (ongoing_multi_scheduling_block()) + if (schdlr_sspnd.msb.ongoing) evacuate_run_queue(esdp->run_queue, &sbp); - } else -#endif + erts_smp_runq_unlock(esdp->run_queue); + } + else { evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); + erts_smp_runq_unlock(esdp->run_queue); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { erts_sched_check_cpu_bind_prep_suspend(esdp); if (erts_system_profile_flags.scheduler) @@ -6897,99 +7067,139 @@ suspend_scheduler(ErtsSchedulerData *esdp) flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); if (flgs & ERTS_SSI_FLG_SUSPENDED) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(esdp->run_queue)) { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_cpu_active); - ASSERT(active_schedulers >= 0); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); - ss_onlinep = &schdlr_sspnd.dirty_cpu_online; - ss_curr_onlinep = &schdlr_sspnd.dirty_cpu_curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.dirty_cpu_wait_curr_online; - ss_changingp = &schdlr_sspnd.dirty_cpu_changing; - ss_wait_activep = &schdlr_sspnd.msb.dirty_cpu_wait_active; - ss_activep = &schdlr_sspnd.dirty_cpu_active; - } else { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.dirty_io_active); - ASSERT(active_schedulers >= 0); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing); - ss_onlinep = &schdlr_sspnd.dirty_io_online; - ss_curr_onlinep = &schdlr_sspnd.dirty_io_curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.dirty_io_wait_curr_online; - ss_changingp = &schdlr_sspnd.dirty_io_changing; - ss_wait_activep = &schdlr_sspnd.msb.dirty_io_wait_active; - ss_activep = &schdlr_sspnd.dirty_io_active; - } - ss_wait_active_target = 0; - } - else -#endif - { - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.active); - ASSERT(active_schedulers >= 1); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - ss_onlinep = &schdlr_sspnd.online; - ss_curr_onlinep = &schdlr_sspnd.curr_online; - ss_wait_curr_onlinep = &schdlr_sspnd.wait_curr_online; - ss_changingp = &schdlr_sspnd.changing; - ss_wait_activep = &schdlr_sspnd.msb.wait_active; - ss_activep = &schdlr_sspnd.active; - ss_wait_active_target = 1; - } - if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { - if (active_schedulers == *ss_wait_activep) - wake = 1; - if (active_schedulers == ss_wait_active_target) { - changing = erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; - } - } + schdlr_sspnd_dec_nscheds(&schdlr_sspnd.active, sched_type); + + ASSERT(schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) >= 1); + + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); while (1) { - if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + + if (changing & (ERTS_SCHDLR_SSPND_CHNG_NMSB + | ERTS_SCHDLR_SSPND_CHNG_MSB)) { + int i = 0; + ErtsMultiSchedulingBlock *msb[3] = {0}; + if (changing & ERTS_SCHDLR_SSPND_CHNG_NMSB) + msb[i++] = &schdlr_sspnd.nmsb; + if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) + msb[i++] = &schdlr_sspnd.msb; + + for (i = 0; msb[i]; i++) { + erts_aint32_t clr_flg = 0; + + if (msb[i] == &schdlr_sspnd.nmsb + && schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) == 1) { + clr_flg = ERTS_SCHDLR_SSPND_CHNG_NMSB; + } + else if (schdlr_sspnd.active + == ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0)) { + clr_flg = ERTS_SCHDLR_SSPND_CHNG_MSB; + } + + if (clr_flg) { + ErtsProcList *plp, *end_plp; + changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~clr_flg); + changing &= ~clr_flg; + (void) erts_proclist_fetch(&msb[i]->chngq, &end_plp); + /* resume processes that initiated the multi scheduling block... */ + plp = msb[i]->chngq; + while (plp) { + erts_proclist_store_last(&msb[i]->blckrs, + proclist_copy(plp)); + plp = plp->next; + } + if (end_plp) + end_plp->next = resume.msb.chngrs; + resume.msb.chngrs = msb[i]->chngq; + msb[i]->chngq = NULL; + } + } + } + + if (changing & online_flag) { int changed = 0; - if (no > *ss_onlinep && curr_online) { - (*ss_curr_onlinep)--; + Uint32 st_online; + + st_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + sched_type); + if (no > st_online && curr_online) { + schdlr_sspnd_dec_nscheds(&schdlr_sspnd.curr_online, + sched_type); curr_online = 0; changed = 1; } - else if (no <= *ss_onlinep && !curr_online) { - (*ss_curr_onlinep)++; + else if (no <= st_online && !curr_online) { + schdlr_sspnd_inc_nscheds(&schdlr_sspnd.curr_online, + sched_type); curr_online = 1; changed = 1; } if (changed - && *ss_curr_onlinep == *ss_wait_curr_onlinep) - wake = 1; - if (*ss_onlinep == *ss_curr_onlinep) { - changing = erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; + && (schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + sched_type) + == schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + sched_type))) { + ErtsProcList *plp; + changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~online_flag); + changing &= ~online_flag; + if (sched_type == ERTS_SCHED_NORMAL) { + ASSERT(is_internal_pid(schdlr_sspnd.changer) + || schdlr_sspnd.changer == am_init); + /* resume process that initiated this change... */ + resume.onln.chngr = schdlr_sspnd.changer; + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (!plp) + schdlr_sspnd.changer = am_false; + else { + schdlr_sspnd.changer = am_true; /* change right in transit */ + /* resume process that is queued for next change... */ + resume.onln.nxt = plp->pid; + ASSERT(is_internal_pid(resume.onln.nxt)); + } + } } } - if (wake) { - erts_smp_cnd_signal(&schdlr_sspnd.cnd); - wake = 0; - } - - if (curr_online && !ongoing_multi_scheduling_block()) { + if (curr_online + && (sched_type == ERTS_SCHED_NORMAL + ? !(schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) + : !schdlr_sspnd.msb.ongoing)) { flgs = erts_smp_atomic32_read_acqb(&ssi->flags); if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) break; } erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + schdlr_sspnd_resume_procs(sched_type, &resume); + while (1) { + ErtsMonotonicTime current_time; erts_aint32_t qmask; erts_aint32_t flgs; qmask = (ERTS_RUNQ_FLGS_GET(esdp->run_queue) & ERTS_RUNQ_FLGS_QMASK); - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work|qmask) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + + if (sched_type != ERTS_SCHED_NORMAL) { + if (qmask) { + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + erts_smp_runq_lock(esdp->run_queue); + if (schdlr_sspnd.msb.ongoing) + evacuate_run_queue(esdp->run_queue, &sbp); + erts_smp_runq_unlock(esdp->run_queue); + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + } + aux_work = 0; + } + else { + + aux_work = erts_atomic32_read_acqb(&ssi->aux_work); + + if (aux_work|qmask) { if (!thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); sched_wall_time_change(esdp, 1); @@ -7001,271 +7211,50 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (aux_work && erts_thr_progress_update(esdp)) erts_thr_progress_leader_update(esdp); - } - if (qmask) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (ERTS_SCHEDULER_IS_DIRTY(esdp)) { - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - erts_smp_runq_lock(esdp->run_queue); - if (ongoing_multi_scheduling_block()) - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - } else -#endif - { + if (qmask) { erts_smp_runq_lock(esdp->run_queue); evacuate_run_queue(esdp->run_queue, &sbp); erts_smp_runq_unlock(esdp->run_queue); } } - } - if (!aux_work) { -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); - sched_wall_time_change(esdp, 0); - } - erts_thr_progress_prepare_wait(esdp); - } - flgs = sched_spin_suspended(ssi, - ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - flgs = sched_set_suspended_sleeptype(ssi); - if (flgs == (ERTS_SSI_FLG_SLEEPING - | ERTS_SSI_FLG_TSE_SLEEPING - | ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)) { - int res; + } - do { - res = erts_tse_twait(ssi->event, -1); - } while (res == EINTR); + if (aux_work) { + ASSERT(sched_type == ERTS_SCHED_NORMAL); + current_time = erts_get_monotonic_time(esdp); + if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); } + erts_bump_timers(esdp->timer_wheel, current_time); } -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - erts_thr_progress_finalize_wait(esdp); } + else { + ErtsMonotonicTime timeout_time; + int do_timeout; - flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING - | ERTS_SSI_FLG_SUSPENDED)); - if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) - break; - changing = erts_smp_atomic32_read_nob(ss_changingp); - if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) - break; - } - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(ss_changingp); - } + if (sched_type == ERTS_SCHED_NORMAL) { + timeout_time = erts_check_next_timeout_time(esdp); + current_time = erts_get_monotonic_time(esdp); + do_timeout = (current_time >= timeout_time); + } + else { + timeout_time = ERTS_MONOTONIC_TIME_MAX; + current_time = 0; + do_timeout = 0; + } - active_schedulers = erts_smp_atomic32_inc_read_nob(ss_activep); - changing = erts_smp_atomic32_read_nob(ss_changingp); - if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) - && *ss_onlinep == active_schedulers) { - erts_smp_atomic32_read_band_nob(ss_changingp, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - } - -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - ASSERT(no <= *ss_onlinep); - ASSERT(!ongoing_multi_scheduling_block()); - - } - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - ASSERT(curr_online); - -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_active); - - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - } - - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - (void) erts_get_monotonic_time(esdp); - erts_smp_runq_lock(esdp->run_queue); - non_empty_runq(esdp->run_queue); - -#ifdef ERTS_DIRTY_SCHEDULERS - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) -#endif - { - schedule_bound_processes(esdp->run_queue, &sbp); - - erts_sched_check_cpu_bind_post_suspend(esdp); - } -} - -#else /* !ERTS_DIRTY_SCHEDULERS */ - -static void -suspend_scheduler(ErtsSchedulerData *esdp) -{ - erts_aint32_t flgs; - erts_aint32_t changing; - long no = (long) esdp->no; - ErtsSchedulerSleepInfo *ssi = esdp->ssi; - long active_schedulers; - int curr_online = 1; - int wake = 0; - erts_aint32_t aux_work; - int thr_prgr_active = 1; - ErtsStuckBoundProcesses sbp = {NULL, NULL}; - - /* - * Schedulers may be suspended in two different ways: - * - A scheduler may be suspended since it is not online. - * All schedulers with scheduler ids greater than - * schdlr_sspnd.online are suspended. - * - Multi scheduling is blocked. All schedulers except the - * scheduler with scheduler id 1 are suspended. - * - * Regardless of why a scheduler is suspended, it ends up here. - */ - - ASSERT(no != 1); - - evacuate_run_queue(esdp->run_queue, &sbp); - - erts_smp_runq_unlock(esdp->run_queue); - - erts_sched_check_cpu_bind_prep_suspend(esdp); - - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_inactive); - - sched_wall_time_change(esdp, 0); - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - flgs = sched_prep_spin_suspended(ssi, ERTS_SSI_FLG_SUSPENDED); - if (flgs & ERTS_SSI_FLG_SUSPENDED) { - - active_schedulers = erts_smp_atomic32_dec_read_nob(&schdlr_sspnd.active); - ASSERT(active_schedulers >= 1); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing & ERTS_SCHDLR_SSPND_CHNG_MSB) { - if (active_schedulers == schdlr_sspnd.msb.wait_active) - wake = 1; - if (active_schedulers == 1) { - changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_MSB); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_MSB; - } - } - - while (1) { - if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { - int changed = 0; - if (no > schdlr_sspnd.online && curr_online) { - schdlr_sspnd.curr_online--; - curr_online = 0; - changed = 1; - } - else if (no <= schdlr_sspnd.online && !curr_online) { - schdlr_sspnd.curr_online++; - curr_online = 1; - changed = 1; - } - if (changed - && schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) - wake = 1; - if (schdlr_sspnd.online == schdlr_sspnd.curr_online) { - changing = erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - changing &= ~ERTS_SCHDLR_SSPND_CHNG_ONLN; - } - } - - if (wake) { - erts_smp_cnd_signal(&schdlr_sspnd.cnd); - wake = 0; - } - - if (curr_online && !ongoing_multi_scheduling_block()) { - flgs = erts_smp_atomic32_read_acqb(&ssi->flags); - if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) - break; - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - while (1) { - ErtsMonotonicTime current_time; - erts_aint32_t qmask; - erts_aint32_t flgs; - - qmask = (ERTS_RUNQ_FLGS_GET(esdp->run_queue) - & ERTS_RUNQ_FLGS_QMASK); - aux_work = erts_atomic32_read_acqb(&ssi->aux_work); - if (aux_work|qmask) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - if (aux_work) - aux_work = handle_aux_work(&esdp->aux_work_data, - aux_work, - 1); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); - } - if (qmask) { - erts_smp_runq_lock(esdp->run_queue); - evacuate_run_queue(esdp->run_queue, &sbp); - erts_smp_runq_unlock(esdp->run_queue); - } - } - - if (aux_work) { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - current_time = erts_get_monotonic_time(esdp); - if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); - } - erts_bump_timers(esdp->timer_wheel, current_time); - } - } - } - else { - ErtsMonotonicTime timeout_time; - int do_timeout = 0; - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { - timeout_time = erts_check_next_timeout_time(esdp); - current_time = erts_get_monotonic_time(esdp); - do_timeout = (current_time >= timeout_time); - } else - timeout_time = ERTS_MONOTONIC_TIME_MAX; if (do_timeout) { + ASSERT(sched_type == ERTS_SCHED_NORMAL); if (!thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } else { - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (sched_type == ERTS_SCHED_NORMAL) { if (thr_prgr_active) { erts_thr_progress_active(esdp, thr_prgr_active = 0); sched_wall_time_change(esdp, 0); @@ -7284,30 +7273,39 @@ suspend_scheduler(ErtsSchedulerData *esdp) | ERTS_SSI_FLG_SUSPENDED)) { int res; - current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : - erts_get_monotonic_time(esdp); + if (sched_type == ERTS_SCHED_NORMAL) + current_time = erts_get_monotonic_time(esdp); + else + current_time = 0; + do { Sint64 timeout; if (current_time >= timeout_time) break; - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (sched_type != ERTS_SCHED_NORMAL) + timeout = -1; + else timeout = ERTS_MONOTONIC_TO_NSEC(timeout_time - current_time - 1) + 1; - } else - timeout = -1; res = erts_tse_twait(ssi->event, timeout); - current_time = ERTS_SCHEDULER_IS_DIRTY(esdp) ? 0 : - erts_get_monotonic_time(esdp); + + if (sched_type == ERTS_SCHED_NORMAL) + current_time = erts_get_monotonic_time(esdp); + else + current_time = 0; + } while (res == EINTR); } } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) + if (sched_type == ERTS_SCHED_NORMAL) erts_thr_progress_finalize_wait(esdp); } - if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) + if (current_time >= timeout_time) { + ASSERT(sched_type == ERTS_SCHED_NORMAL); erts_bump_timers(esdp->timer_wheel, current_time); + } } flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING @@ -7315,7 +7313,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (!(flgs & ERTS_SSI_FLG_SUSPENDED)) break; changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER) + if (changing) break; } @@ -7323,612 +7321,494 @@ suspend_scheduler(ErtsSchedulerData *esdp) changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); } - active_schedulers = erts_smp_atomic32_inc_read_nob(&schdlr_sspnd.active); + schdlr_sspnd_inc_nscheds(&schdlr_sspnd.active, sched_type); changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); if ((changing & ERTS_SCHDLR_SSPND_CHNG_MSB) - && schdlr_sspnd.online == active_schedulers) { + && schdlr_sspnd.online == schdlr_sspnd.active) { erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, ~ERTS_SCHDLR_SSPND_CHNG_MSB); } - ASSERT(no <= schdlr_sspnd.online); - ASSERT(!ongoing_multi_scheduling_block()); - + ASSERT(no <= schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, sched_type)); + ASSERT((sched_type == ERTS_SCHED_NORMAL + ? !(schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) + : !schdlr_sspnd.msb.ongoing)); } erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + ASSERT(!resume.msb.chngrs); + schdlr_sspnd_resume_procs(sched_type, &resume); + ASSERT(curr_online); - if (erts_system_profile_flags.scheduler) - profile_scheduler(make_small(esdp->no), am_active); + if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { + if (erts_system_profile_flags.scheduler) + profile_scheduler(make_small(esdp->no), am_active); - if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); - sched_wall_time_change(esdp, 1); + if (!thr_prgr_active) { + erts_thr_progress_active(esdp, thr_prgr_active = 1); + sched_wall_time_change(esdp, 1); + } } + if (sched_type == ERTS_SCHED_NORMAL) + (void) erts_get_monotonic_time(esdp); erts_smp_runq_lock(esdp->run_queue); non_empty_runq(esdp->run_queue); - schedule_bound_processes(esdp->run_queue, &sbp); + if (sched_type == ERTS_SCHED_NORMAL) { + schedule_bound_processes(esdp->run_queue, &sbp); - erts_sched_check_cpu_bind_post_suspend(esdp); + erts_sched_check_cpu_bind_post_suspend(esdp); + } } -#endif - -ErtsSchedSuspendResult +void erts_schedulers_state(Uint *total, Uint *online, Uint *active, Uint *dirty_cpu, Uint *dirty_cpu_online, + Uint *dirty_cpu_active, Uint *dirty_io, - int yield_allowed) + Uint *dirty_io_active) { - int res = ERTS_SCHDLR_SSPND_EINVAL; - erts_aint32_t changing; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); -#endif - if (yield_allowed && (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)) - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - else { + if (active || online || dirty_cpu_online + || dirty_cpu_active || dirty_io_active) { + erts_smp_mtx_lock(&schdlr_sspnd.mtx); if (active) - *active = schdlr_sspnd.online; + *active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL); if (online) - *online = schdlr_sspnd.online; - if (ongoing_multi_scheduling_block() && active) - *active = 1; -#ifdef ERTS_DIRTY_SCHEDULERS + *online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_NORMAL); + if (dirty_cpu_active) + *dirty_cpu_active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_CPU); if (dirty_cpu_online) - *dirty_cpu_online = schdlr_sspnd.dirty_cpu_online; -#endif - res = ERTS_SCHDLR_SSPND_DONE; + *dirty_cpu_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.curr_online, + ERTS_SCHED_DIRTY_CPU); + if (dirty_io_active) + *dirty_io_active = schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_DIRTY_IO); + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (total) *total = erts_no_schedulers; -#ifdef ERTS_DIRTY_SCHEDULERS if (dirty_cpu) *dirty_cpu = erts_no_dirty_cpu_schedulers; if (dirty_io) *dirty_io = erts_no_dirty_io_schedulers; -#endif - return res; } -#ifdef ERTS_DIRTY_SCHEDULERS - -ErtsSchedSuspendResult -erts_set_schedulers_online(Process *p, - ErtsProcLocks plocks, - Sint new_no, - Sint *old_no -#ifdef ERTS_DIRTY_SCHEDULERS - , int dirty_only -#endif - ) +static void +abort_sched_onln_chng_waitq(Process *p) { - ErtsSchedulerData *esdp; - int ix, res = -1, no, have_unlocked_plocks, end_wait; - erts_aint32_t changing = 0; -#ifdef ERTS_DIRTY_SCHEDULERS - ErtsSchedulerSleepInfo* ssi; - int dirty_no, change_dirty; -#endif - - if (new_no < 1) - return ERTS_SCHDLR_SSPND_EINVAL; -#ifdef ERTS_DIRTY_SCHEDULERS - else if (dirty_only && erts_no_dirty_cpu_schedulers < new_no) - return ERTS_SCHDLR_SSPND_EINVAL; -#endif - else if (erts_no_schedulers < new_no) - return ERTS_SCHDLR_SSPND_EINVAL; - - esdp = ERTS_PROC_GET_SCHDATA(p); - end_wait = 0; + Eterm resume = NIL; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - have_unlocked_plocks = 0; - no = (int) new_no; - -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(schdlr_sspnd.dirty_cpu_online <= erts_no_dirty_cpu_schedulers); - if (dirty_only) { - if (no > schdlr_sspnd.online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - return ERTS_SCHDLR_SSPND_EINVAL; +#ifdef DEBUG + { + int found_it = 0; + ErtsProcList *plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + while (plp) { + if (erts_proclist_same(plp, p)) + found_it++; + plp = erts_proclist_peek_next(schdlr_sspnd.chngq, plp); } - dirty_no = no; - } else { - /* - * Adjust the number of dirty CPU schedulers online relative to the - * adjustment made to the number of normal schedulers online. - */ - int total_pct = erts_no_dirty_cpu_schedulers*100/erts_no_schedulers; - int onln_pct = no*total_pct/schdlr_sspnd.online; - dirty_no = schdlr_sspnd.dirty_cpu_online*onln_pct/100; - if (dirty_no == 0) - dirty_no = 1; - ASSERT(dirty_no <= erts_no_dirty_cpu_schedulers); + ASSERT(found_it == !!(p->flags & F_SCHDLR_ONLN_WAITQ)); } #endif - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing); -#endif - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; - } - else { - int online = *old_no = schdlr_sspnd.online; -#ifdef ERTS_DIRTY_SCHEDULERS - int dirty_online = schdlr_sspnd.dirty_cpu_online; - - if (dirty_only) { - *old_no = schdlr_sspnd.dirty_cpu_online; - if (dirty_no == schdlr_sspnd.dirty_cpu_online) { - res = ERTS_SCHDLR_SSPND_DONE; - } - change_dirty = 1; - } else { -#endif - if (no == schdlr_sspnd.online) { -#ifdef ERTS_DIRTY_SCHEDULERS - dirty_only = 1; - if (dirty_no == schdlr_sspnd.dirty_cpu_online) -#endif - res = ERTS_SCHDLR_SSPND_DONE; -#ifdef ERTS_DIRTY_SCHEDULERS - else - change_dirty = 1; -#endif - } -#ifdef ERTS_DIRTY_SCHEDULERS - else - change_dirty = (dirty_no != schdlr_sspnd.dirty_cpu_online); - } -#endif - if (res == -1) - { - int increase = (no > online); -#ifdef ERTS_DIRTY_SCHEDULERS - if (!dirty_only) { -#endif - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.online = no; -#ifdef ERTS_DIRTY_SCHEDULERS - } else - increase = (dirty_no > dirty_online); - if (change_dirty) { - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.dirty_cpu_online = dirty_no; - } -#endif - if (increase) { - int ix; -#ifdef ERTS_DIRTY_SCHEDULERS - if (!dirty_only) { -#endif - schdlr_sspnd.wait_curr_online = no; - if (ongoing_multi_scheduling_block()) { - for (ix = online; ix < no; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - change_no_used_runqs(no); - for (ix = online; ix < no; ix++) - resume_run_queue(ERTS_RUNQ_IX(ix)); + if (p->flags & F_SCHDLR_ONLN_WAITQ) { + ErtsProcList *plp = NULL; - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - } -#ifdef ERTS_DIRTY_SCHEDULERS - } - if (change_dirty) { - schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; - ASSERT(schdlr_sspnd.dirty_cpu_curr_online != - schdlr_sspnd.dirty_cpu_wait_curr_online); - if (ongoing_multi_scheduling_block()) { - for (ix = dirty_online; ix < dirty_no; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_sched_poke(ssi); - } - } else { - for (ix = dirty_online; ix < dirty_no; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - } - } -#endif - res = ERTS_SCHDLR_SSPND_DONE; - } - else /* if (no < online) */ { -#ifdef ERTS_DIRTY_SCHEDULERS - if (change_dirty) { - schdlr_sspnd.dirty_cpu_wait_curr_online = dirty_no; - ASSERT(schdlr_sspnd.dirty_cpu_curr_online != - schdlr_sspnd.dirty_cpu_wait_curr_online); - if (ongoing_multi_scheduling_block()) { - for (ix = dirty_no; ix < dirty_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_sched_poke(ssi); - } - } else { - for (ix = dirty_no; ix < dirty_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - } - } - if (dirty_only) { - res = ERTS_SCHDLR_SSPND_DONE; - } + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (plp) { + if (erts_proclist_same(plp, p) + && schdlr_sspnd.changer == am_true) { + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + /* + * Change right was in transit to us; + * transfer it to the next process by + * resuming it... + */ + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (plp) + resume = plp->pid; else -#endif - { - if (p->scheduler_data->no <= no) { - res = ERTS_SCHDLR_SSPND_DONE; - schdlr_sspnd.wait_curr_online = no; - } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE; - schdlr_sspnd.wait_curr_online = no+1; - } - - if (ongoing_multi_scheduling_block()) { - for (ix = no; ix < online; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - - change_no_used_runqs(no); - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - - for (ix = no; ix < online; ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq); - } - } - } + schdlr_sspnd.changer = am_false; } - -#ifdef ERTS_DIRTY_SCHEDULERS - if (change_dirty) { - while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - if (!dirty_only) -#endif - { - if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (plocks && !have_unlocked_plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); + else { + do { + if (erts_proclist_same(plp, p)) { + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + break; } - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - end_wait = 1; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - } - - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - - ASSERT(res != ERTS_SCHDLR_SSPND_DONE - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + plp = erts_proclist_peek_next(schdlr_sspnd.chngq, plp); + } while (plp); } } } erts_smp_mtx_unlock(&schdlr_sspnd.mtx); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(schdlr_sspnd.dirty_cpu_online <= schdlr_sspnd.online); - if (!dirty_only) -#endif - { - if (end_wait) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); - } - if (have_unlocked_plocks) - erts_smp_proc_lock(p, plocks); - } - return res; + if (is_internal_pid(resume)) + schdlr_sspnd_resume_proc(resume); } -#else /* !ERTS_DIRTY_SCHEDULERS */ - ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, Sint new_no, - Sint *old_no) + Sint *old_no, + int dirty_only) { - ErtsSchedulerData *esdp; - int ix, res, no, have_unlocked_plocks, end_wait; - erts_aint32_t changing; + int resume_proc, ix, res = -1, no, have_unlocked_plocks; + erts_aint32_t changing = 0, change_flags; + int online, increase; + ErtsProcList *plp; +#ifdef ERTS_DIRTY_SCHEDULERS + int dirty_no, change_dirty, dirty_online; +#else + ASSERT(!dirty_only); +#endif - if (new_no < 1 || erts_no_schedulers < new_no) + if (new_no < 1) + return ERTS_SCHDLR_SSPND_EINVAL; + else if (dirty_only && erts_no_dirty_cpu_schedulers < new_no) + return ERTS_SCHDLR_SSPND_EINVAL; + else if (erts_no_schedulers < new_no) return ERTS_SCHDLR_SSPND_EINVAL; - esdp = ERTS_PROC_GET_SCHDATA(p); - end_wait = 0; +#ifdef ERTS_DIRTY_SCHEDULERS + if (dirty_only) + resume_proc = 0; + else +#endif + { + resume_proc = 1; + /* + * If we suspend current process we need to suspend before + * requesting the change; otherwise, we got a resume/suspend + * race... + */ + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + suspend_process(p, p); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } erts_smp_mtx_lock(&schdlr_sspnd.mtx); + change_flags = 0; have_unlocked_plocks = 0; no = (int) new_no; - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; +#ifdef ERTS_DIRTY_SCHEDULERS + if (!dirty_only) +#endif + { + changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); + if (changing & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + enqueue_wait: + p->flags |= F_SCHDLR_ONLN_WAITQ; + plp = proclist_create(p); + erts_proclist_store_last(&schdlr_sspnd.chngq, plp); + resume_proc = 0; + res = ERTS_SCHDLR_SSPND_YIELD_RESTART; + goto done; + } + plp = erts_proclist_peek_first(schdlr_sspnd.chngq); + if (!plp) { + ASSERT(schdlr_sspnd.changer == am_false); + } + else { + ASSERT(schdlr_sspnd.changer == am_true); + if (!erts_proclist_same(plp, p)) + goto enqueue_wait; + p->flags &= ~F_SCHDLR_ONLN_WAITQ; + erts_proclist_remove(&schdlr_sspnd.chngq, plp); + proclist_destroy(plp); + } + } + + *old_no = online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); +#ifndef ERTS_DIRTY_SCHEDULERS + if (no == online) { + res = ERTS_SCHDLR_SSPND_DONE; + goto done; } - else { - int online = *old_no = schdlr_sspnd.online; - if (no == schdlr_sspnd.online) { +#else /* ERTS_DIRTY_SCHEDULERS */ + dirty_online = schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU); + if (dirty_only) + *old_no = dirty_online; + + ASSERT(dirty_online <= erts_no_dirty_cpu_schedulers); + + if (dirty_only) { + if (no > online) { + res = ERTS_SCHDLR_SSPND_EINVAL; + goto done; + } + dirty_no = no; + if (dirty_no == dirty_online) { res = ERTS_SCHDLR_SSPND_DONE; + goto done; } - else { - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_ONLN - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - schdlr_sspnd.online = no; - if (no > online) { - int ix; - schdlr_sspnd.wait_curr_online = no; - if (ongoing_multi_scheduling_block()) { - for (ix = online; ix < no; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); - } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } - change_no_used_runqs(no); - - for (ix = online; ix < no; ix++) - resume_run_queue(ERTS_RUNQ_IX(ix)); + change_dirty = 1; + } else { + /* + * Adjust the number of dirty CPU schedulers online relative to the + * adjustment made to the number of normal schedulers online. + */ + int total_pct = erts_no_dirty_cpu_schedulers*100/erts_no_schedulers; + int onln_pct = no*total_pct/online; + dirty_no = dirty_online*onln_pct/100; + if (dirty_no == 0) + dirty_no = 1; + ASSERT(dirty_no <= erts_no_dirty_cpu_schedulers); - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); - } + if (no != online) + change_dirty = (dirty_no != dirty_online); + else { + dirty_only = 1; + if (dirty_no == dirty_online) { res = ERTS_SCHDLR_SSPND_DONE; + goto done; } - else /* if (no < online) */ { - if (p->scheduler_data->no <= no) { - res = ERTS_SCHDLR_SSPND_DONE; - schdlr_sspnd.wait_curr_online = no; + change_dirty = 1; + } + } + if (change_dirty) { + change_flags |= ERTS_SCHDLR_SSPND_CHNG_DCPU_ONLN; + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU, + dirty_no); + } + + if (dirty_only) + increase = (dirty_no > dirty_online); + else +#endif /* ERTS_DIRTY_SCHEDULERS */ + { + change_flags |= ERTS_SCHDLR_SSPND_CHNG_ONLN; + schdlr_sspnd_set_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL, + no); + increase = (no > online); + } + + erts_smp_atomic32_read_bor_nob(&schdlr_sspnd.changing, change_flags); + + res = ERTS_SCHDLR_SSPND_DONE; + if (increase) { + int ix; +#ifdef ERTS_DIRTY_SCHEDULERS + if (change_dirty) { + ErtsSchedulerSleepInfo* ssi; + if (schdlr_sspnd.msb.ongoing) { + for (ix = dirty_online; ix < dirty_no; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_sched_poke(ssi); } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE; - schdlr_sspnd.wait_curr_online = no+1; + } else { + for (ix = dirty_online; ix < dirty_no; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); } - - if (ongoing_multi_scheduling_block()) { - for (ix = no; ix < online; ix++) - erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + } + if (!dirty_only) +#endif + { + if (schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) { + for (ix = online; ix < no; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + else { + if (plocks) { + have_unlocked_plocks = 1; + erts_smp_proc_unlock(p, plocks); } - else { - if (plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); - } + change_no_used_runqs(no); - change_no_used_runqs(no); - for (ix = no; ix < erts_no_run_queues; ix++) - suspend_run_queue(ERTS_RUNQ_IX(ix)); + for (ix = online; ix < no; ix++) + resume_run_queue(ERTS_RUNQ_IX(ix)); - for (ix = no; ix < online; ix++) { - ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); - wake_scheduler(rq); - } + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); + } + } + } + else /* if decrease */ { +#ifdef ERTS_DIRTY_SCHEDULERS + if (change_dirty) { + ErtsSchedulerSleepInfo* ssi; + if (schdlr_sspnd.msb.ongoing) { + for (ix = dirty_no; ix < dirty_online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_sched_poke(ssi); } + } else { + for (ix = dirty_no; ix < dirty_online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); } - - if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) { - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (plocks && !have_unlocked_plocks) { + } + if (!dirty_only) +#endif + { + if (schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing) { + for (ix = no; ix < online; ix++) + erts_sched_poke(ERTS_SCHED_SLEEP_INFO_IX(ix)); + } + else { + if (plocks) { have_unlocked_plocks = 1; erts_smp_proc_unlock(p, plocks); } - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - end_wait = 1; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - } - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - - ASSERT(res != ERTS_SCHDLR_SSPND_DONE - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + change_no_used_runqs(no); + for (ix = no; ix < erts_no_run_queues; ix++) + suspend_run_queue(ERTS_RUNQ_IX(ix)); + for (ix = no; ix < online; ix++) { + ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); + wake_scheduler(rq); + } + } } } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - if (end_wait) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); + if (change_flags & ERTS_SCHDLR_SSPND_CHNG_ONLN) { + /* Suspend and wait for requested change to complete... */ + schdlr_sspnd.changer = p->common.id; + resume_proc = 0; + res = ERTS_SCHDLR_SSPND_YIELD_DONE; } + +done: + + ASSERT(schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU) + <= schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL)); + + erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (have_unlocked_plocks) erts_smp_proc_lock(p, plocks); + if (resume_proc) { + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + resume_process(p, plocks|ERTS_PROC_LOCK_STATUS); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); + } + return res; } -#endif - ErtsSchedSuspendResult -erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) +erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int normal, int all) { - int ix, res, have_unlocked_plocks = 0, online; - erts_aint32_t changing; + int resume_proc, ix, res, have_unlocked_plocks = 0; ErtsProcList *plp; #ifdef ERTS_DIRTY_SCHEDULERS ErtsSchedulerSleepInfo* ssi; #endif + ErtsMultiSchedulingBlock *msbp; + erts_aint32_t chng_flg; + int have_blckd_flg; - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing); -#ifdef ERTS_DIRTY_SCHEDULERS - changing |= (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - | erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing)); -#endif - if (changing) { - res = ERTS_SCHDLR_SSPND_YIELD_RESTART; /* Yield */ + if (normal) { + chng_flg = ERTS_SCHDLR_SSPND_CHNG_NMSB; + have_blckd_flg = F_HAVE_BLCKD_NMSCHED; + msbp = &schdlr_sspnd.nmsb; + } + else { + chng_flg = ERTS_SCHDLR_SSPND_CHNG_MSB; + have_blckd_flg = F_HAVE_BLCKD_MSCHED; + msbp = &schdlr_sspnd.msb; + } + + /* + * If we suspend current process we need to suspend before + * requesting the change; otherwise, we got a resume/suspend + * race... + */ + if (!on) { + /* We never suspend current process when unblocking... */ + resume_proc = 0; + } + else { + resume_proc = 1; + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + suspend_process(p, p); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } - else if (on) { /* ------ BLOCK ------ */ - if (schdlr_sspnd.msb.procs) { + + erts_smp_mtx_lock(&schdlr_sspnd.mtx); + if (on) { /* ------ BLOCK ------ */ + if (msbp->chngq) { + ASSERT(msbp->ongoing); + p->flags |= have_blckd_flg; + goto wait_until_msb; + } + else if (msbp->blckrs) { + ASSERT(msbp->ongoing); plp = proclist_create(p); - erts_proclist_store_last(&schdlr_sspnd.msb.procs, plp); - p->flags |= F_HAVE_BLCKD_MSCHED; - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 0); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) == 0); -#endif + erts_proclist_store_last(&msbp->blckrs, plp); + p->flags |= have_blckd_flg; + ASSERT(schdlr_sspnd.active == ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0)); ASSERT(p->scheduler_data->no == 1); - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - } else { - int online = schdlr_sspnd.online; - p->flags |= F_HAVE_BLCKD_MSCHED; + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED; + } + else { + int online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); + ASSERT(!msbp->ongoing); + p->flags |= have_blckd_flg; if (plocks) { have_unlocked_plocks = 1; erts_smp_proc_unlock(p, plocks); } - ASSERT(!ongoing_multi_scheduling_block()); - schdlr_sspnd.msb.ongoing = 1; - if (online == 1) { - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); -#ifdef ERTS_DIRTY_SCHEDULERS - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) == 1); - ASSERT(!(erts_smp_atomic32_read_nob(&ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0)->flags) - & ERTS_SSI_FLG_SUSPENDED)); - schdlr_sspnd.msb.dirty_cpu_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(0); - erts_smp_atomic32_read_bor_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED); - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) - != schdlr_sspnd.msb.dirty_cpu_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - - schdlr_sspnd.msb.dirty_io_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) - != schdlr_sspnd.msb.dirty_io_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); -#endif + ASSERT(!msbp->ongoing); + msbp->ongoing = 1; + if (schdlr_sspnd.active == ERTS_SCHDLR_SSPND_MAKE_NSCHEDS_VAL(1, 0, 0) + || (normal && schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) == 1)) { ASSERT(p->scheduler_data->no == 1); + plp = proclist_create(p); + erts_proclist_store_last(&msbp->blckrs, plp); + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED; } else { - ERTS_SCHDLR_SSPND_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - if (p->scheduler_data->no == 1) { - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - schdlr_sspnd.msb.wait_active = 1; - } - else { - /* - * Yield! Current process needs to migrate - * before bif returns. - */ - res = ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED; - schdlr_sspnd.msb.wait_active = 2; - } - -#ifdef ERTS_DIRTY_SCHEDULERS - schdlr_sspnd.msb.dirty_cpu_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_active) - != schdlr_sspnd.msb.dirty_cpu_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - ASSERT(schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_online); - - schdlr_sspnd.msb.dirty_io_wait_active = 0; - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET((ERTS_SCHDLR_SSPND_CHNG_MSB - | ERTS_SCHDLR_SSPND_CHNG_WAITER), 0); - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - erts_smp_atomic32_read_bor_nob(&ssi->flags, - ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_active) - != schdlr_sspnd.msb.dirty_io_wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - ASSERT(schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_online); -#endif + erts_smp_atomic32_read_bor_nob(&schdlr_sspnd.changing, + chng_flg); change_no_used_runqs(1); for (ix = 1; ix < erts_no_run_queues; ix++) suspend_run_queue(ERTS_RUNQ_IX(ix)); @@ -7938,84 +7818,84 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) wake_scheduler(rq); } - if (erts_smp_atomic32_read_nob(&schdlr_sspnd.active) - != schdlr_sspnd.msb.wait_active) { - ErtsSchedulerData *esdp; - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - if (plocks && !have_unlocked_plocks) { - have_unlocked_plocks = 1; - erts_smp_proc_unlock(p, plocks); +#ifdef ERTS_DIRTY_SCHEDULERS + if (!normal) { + for (ix = 0; ix < erts_no_dirty_cpu_schedulers; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); } + wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - esdp = ERTS_PROC_GET_SCHDATA(p); - - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - while (erts_smp_atomic32_read_nob(&schdlr_sspnd.active) - != schdlr_sspnd.msb.wait_active) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, - &schdlr_sspnd.mtx); - - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - erts_thr_progress_active(esdp, 1); - erts_thr_progress_finalize_wait(esdp); + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + erts_smp_atomic32_read_bor_nob(&ssi->flags, + ERTS_SSI_FLG_SUSPENDED); + } + wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); + } +#endif - erts_smp_mtx_lock(&schdlr_sspnd.mtx); + wait_until_msb: - } + ASSERT(chng_flg & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)); - ASSERT(res != ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED - ? (ERTS_SCHDLR_SSPND_CHNG_WAITER - & erts_smp_atomic32_read_nob(&schdlr_sspnd.changing)) - : (ERTS_SCHDLR_SSPND_CHNG_WAITER - == erts_smp_atomic32_read_nob(&schdlr_sspnd.changing))); - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_WAITER); + plp = proclist_create(p); + erts_proclist_store_last(&msbp->chngq, plp); + resume_proc = 0; + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED; } - plp = proclist_create(p); - erts_proclist_store_last(&schdlr_sspnd.msb.procs, plp); ASSERT(p->scheduler_data); } } - else if (!ongoing_multi_scheduling_block()) { - /* unblock not ongoing */ - ASSERT(!schdlr_sspnd.msb.procs); - res = ERTS_SCHDLR_SSPND_DONE; + else if (!msbp->ongoing) { + ASSERT(!msbp->blckrs); + goto unblock_res; } else { /* ------ UNBLOCK ------ */ - if (p->flags & F_HAVE_BLCKD_MSCHED) { - ErtsProcList *plp = erts_proclist_peek_first(schdlr_sspnd.msb.procs); - - while (plp) { - ErtsProcList *tmp_plp = plp; - plp = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp); - if (erts_proclist_same(tmp_plp, p)) { - erts_proclist_remove(&schdlr_sspnd.msb.procs, tmp_plp); - proclist_destroy(tmp_plp); - if (!all) - break; + if (p->flags & have_blckd_flg) { + ErtsProcList *plps[2]; + ErtsProcList *plp; + int limit = 0; + + plps[limit++] = erts_proclist_peek_first(msbp->blckrs); + if (all) + plps[limit++] = erts_proclist_peek_first(msbp->chngq); + + for (ix = 0; ix < limit; ix++) { + plp = plps[ix]; + while (plp) { + ErtsProcList *tmp_plp = plp; + plp = erts_proclist_peek_next(msbp->blckrs, plp); + if (erts_proclist_same(tmp_plp, p)) { + erts_proclist_remove(&msbp->blckrs, tmp_plp); + proclist_destroy(tmp_plp); + if (!all) + break; + } } } } - if (schdlr_sspnd.msb.procs) - res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; - else { - ERTS_SCHDLR_SSPND_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - p->flags &= ~F_HAVE_BLCKD_MSCHED; - schdlr_sspnd.msb.ongoing = 0; - if (schdlr_sspnd.online == 1) { + if (!msbp->blckrs && !msbp->chngq) { + int online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_NORMAL); + erts_smp_atomic32_read_bor_nob(&schdlr_sspnd.changing, + chng_flg); + p->flags &= ~have_blckd_flg; + msbp->ongoing = 0; + if (online == 1) { /* No normal schedulers to resume */ - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.active) == 1); - ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_MSB); + ASSERT(schdlr_sspnd_get_nscheds(&schdlr_sspnd.active, + ERTS_SCHED_NORMAL) == 1); +#ifndef ERTS_DIRTY_SCHEDULERS + erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, + ~chng_flg); +#endif } - else { - online = schdlr_sspnd.online; + else if (!(schdlr_sspnd.msb.ongoing|schdlr_sspnd.nmsb.ongoing)) { if (plocks) { have_unlocked_plocks = 1; erts_smp_proc_unlock(p, plocks); @@ -8031,83 +7911,91 @@ erts_block_multi_scheduling(Process *p, ErtsProcLocks plocks, int on, int all) suspend_run_queue(ERTS_RUNQ_IX(ix)); } #ifdef ERTS_DIRTY_SCHEDULERS - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - schdlr_sspnd.msb.dirty_cpu_wait_active = schdlr_sspnd.dirty_cpu_online; - for (ix = 0; ix < schdlr_sspnd.dirty_cpu_online; ix++) { - ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_CPU_RUNQ, 0); - - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(ERTS_SCHDLR_SSPND_CHNG_MSB, 0); - schdlr_sspnd.msb.dirty_io_wait_active = erts_no_dirty_io_schedulers; - for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { - ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); - scheduler_ssi_resume_wake(ssi); - erts_smp_atomic32_read_band_nob(&ssi->flags, - ~ERTS_SSI_FLG_SUSPENDED); - } - wake_dirty_schedulers(ERTS_DIRTY_IO_RUNQ, 0); + if (!normal) { + ASSERT(!schdlr_sspnd.msb.ongoing); + online = (int) schdlr_sspnd_get_nscheds(&schdlr_sspnd.online, + ERTS_SCHED_DIRTY_CPU); + for (ix = 0; ix < online; ix++) { + ssi = ERTS_DIRTY_CPU_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); + } + + for (ix = 0; ix < erts_no_dirty_io_schedulers; ix++) { + ssi = ERTS_DIRTY_IO_SCHED_SLEEP_INFO_IX(ix); + scheduler_ssi_resume_wake(ssi); + } + } #endif - res = ERTS_SCHDLR_SSPND_DONE; } + + unblock_res: + if (schdlr_sspnd.msb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED; + else if (schdlr_sspnd.nmsb.ongoing) + res = ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED; + else + res = ERTS_SCHDLR_SSPND_DONE; } erts_smp_mtx_unlock(&schdlr_sspnd.mtx); + if (have_unlocked_plocks) erts_smp_proc_lock(p, plocks); - return res; -} -#ifdef DEBUG -void -erts_dbg_multi_scheduling_return_trap(Process *p, Eterm return_value) -{ - if (return_value == am_blocked) { - erts_aint32_t active = erts_smp_atomic32_read_nob(&schdlr_sspnd.active); - ASSERT(1 <= active && active <= 2); - ASSERT(ERTS_PROC_GET_SCHDATA(p)->no == 1); + if (resume_proc) { + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + resume_process(p, plocks|ERTS_PROC_LOCK_STATUS); + if (!(plocks & ERTS_PROC_LOCK_STATUS)) + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); } + + return res; } -#endif int erts_is_multi_scheduling_blocked(void) { int res; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - res = schdlr_sspnd.msb.procs != NULL; + if (schdlr_sspnd.msb.blckrs) + res = 1; + else if (schdlr_sspnd.nmsb.blckrs) + res = -1; + else + res = 0; erts_smp_mtx_unlock(&schdlr_sspnd.mtx); return res; } Eterm -erts_multi_scheduling_blockers(Process *p) +erts_multi_scheduling_blockers(Process *p, int normal) { Eterm res = NIL; + ErtsMultiSchedulingBlock *msbp; + + msbp = normal ? &schdlr_sspnd.nmsb : &schdlr_sspnd.msb; erts_smp_mtx_lock(&schdlr_sspnd.mtx); - if (!erts_proclist_is_empty(schdlr_sspnd.msb.procs)) { + if (!erts_proclist_is_empty(msbp->blckrs)) { Eterm *hp, *hp_end; ErtsProcList *plp1, *plp2; Uint max_size = 0; - for (plp1 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + for (plp1 = erts_proclist_peek_first(msbp->blckrs); plp1; - plp1 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp1)) { + plp1 = erts_proclist_peek_next(msbp->blckrs, plp1)) { max_size += 2; } ASSERT(max_size); hp = HAlloc(p, max_size); hp_end = hp + max_size; - for (plp1 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + for (plp1 = erts_proclist_peek_first(msbp->blckrs); plp1; - plp1 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp1)) { - for (plp2 = erts_proclist_peek_first(schdlr_sspnd.msb.procs); + plp1 = erts_proclist_peek_next(msbp->blckrs, plp1)) { + for (plp2 = erts_proclist_peek_first(msbp->blckrs); plp2->pid != plp1->pid; - plp2 = erts_proclist_peek_next(schdlr_sspnd.msb.procs, plp2)); + plp2 = erts_proclist_peek_next(msbp->blckrs, plp2)); if (plp2 == plp1) { res = CONS(hp, plp1->pid, res); hp += 2; @@ -8176,39 +8064,6 @@ sched_thread_func(void *vesdp) #endif erts_thread_init_float(); - if (no == 1) { - erts_thr_progress_active(esdp, 0); - erts_thr_progress_prepare_wait(esdp); - } - - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) -#ifdef ERTS_DIRTY_SCHEDULERS - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); -#else - erts_smp_cnd_signal(&schdlr_sspnd.cnd); -#endif - } - - if (no == 1) { - while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - - if (no == 1) { - erts_thr_progress_finalize_wait(esdp); - erts_thr_progress_active(esdp, 1); - } - #ifdef ERTS_DO_VERIFY_UNUSED_TEMP_ALLOC esdp->verify_unused_temp_alloc = erts_alloc_get_verify_unused_temp_alloc( @@ -8263,24 +8118,6 @@ sched_dirty_cpu_thread_func(void *vesdp) #endif erts_thread_init_float(); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_cpu_changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.dirty_cpu_curr_online == schdlr_sspnd.dirty_cpu_wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_cpu_changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - } - - if (no == 1) { - while (schdlr_sspnd.dirty_cpu_curr_online != schdlr_sspnd.dirty_cpu_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_CPU_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - process_main(); /* No schedulers should *ever* terminate */ erts_exit(ERTS_ABORT_EXIT, @@ -8326,24 +8163,6 @@ sched_dirty_io_thread_func(void *vesdp) #endif erts_thread_init_float(); - erts_smp_mtx_lock(&schdlr_sspnd.mtx); - ASSERT(erts_smp_atomic32_read_nob(&schdlr_sspnd.dirty_io_changing) - & ERTS_SCHDLR_SSPND_CHNG_ONLN); - - if (--schdlr_sspnd.dirty_io_curr_online == schdlr_sspnd.dirty_io_wait_curr_online) { - erts_smp_atomic32_read_band_nob(&schdlr_sspnd.dirty_io_changing, - ~ERTS_SCHDLR_SSPND_CHNG_ONLN); - if (no != 1) - erts_smp_cnd_broadcast(&schdlr_sspnd.cnd); - } - - if (no == 1) { - while (schdlr_sspnd.dirty_io_curr_online != schdlr_sspnd.dirty_io_wait_curr_online) - erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx); - ERTS_SCHDLR_SSPND_DIRTY_IO_CHNG_SET(0, ERTS_SCHDLR_SSPND_CHNG_WAITER); - } - erts_smp_mtx_unlock(&schdlr_sspnd.mtx); - process_main(); /* No schedulers should *ever* terminate */ erts_exit(ERTS_ABORT_EXIT, @@ -9616,11 +9435,11 @@ Process *schedule(Process *p, int calls) if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { ErtsProcList *pnd_xtrs = rq->procs.pending_exiters; if (erts_proclist_fetch(&pnd_xtrs, NULL)) { - rq->procs.pending_exiters = NULL; - erts_smp_runq_unlock(rq); - handle_pending_exiters(pnd_xtrs); - erts_smp_runq_lock(rq); - } + rq->procs.pending_exiters = NULL; + erts_smp_runq_unlock(rq); + handle_pending_exiters(pnd_xtrs); + erts_smp_runq_lock(rq); + } if (rq->check_balance_reds <= 0) check_balance(rq); @@ -12537,14 +12356,39 @@ erts_continue_exit_process(Process *p) #endif #ifdef ERTS_SMP + if (p->flags & F_SCHDLR_ONLN_WAITQ) + abort_sched_onln_chng_waitq(p); + if (p->flags & F_HAVE_BLCKD_MSCHED) { ErtsSchedSuspendResult ssr; - ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 1); + ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 0, 1); + switch (ssr) { + case ERTS_SCHDLR_SSPND_YIELD_RESTART: + goto yield; + case ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE: + case ERTS_SCHDLR_SSPND_YIELD_DONE: + p->flags &= ~F_HAVE_BLCKD_MSCHED; + break; + case ERTS_SCHDLR_SSPND_EINVAL: + default: + erts_exit(ERTS_ABORT_EXIT, "%s:%d: Internal error: %d\n", + __FILE__, __LINE__, (int) ssr); + } + } + if (p->flags & F_HAVE_BLCKD_NMSCHED) { + ErtsSchedSuspendResult ssr; + ssr = erts_block_multi_scheduling(p, ERTS_PROC_LOCK_MAIN, 0, 1, 1); switch (ssr) { case ERTS_SCHDLR_SSPND_YIELD_RESTART: goto yield; case ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED: case ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED: + case ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED: case ERTS_SCHDLR_SSPND_DONE: case ERTS_SCHDLR_SSPND_YIELD_DONE: p->flags &= ~F_HAVE_BLCKD_MSCHED; diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 6b370e630e..c88bd7056c 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -79,10 +79,8 @@ struct ErtsNodesMonitor_; #define ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT 0 #define ERTS_MAX_NO_OF_SCHEDULERS 1024 -#ifdef ERTS_DIRTY_SCHEDULERS #define ERTS_MAX_NO_OF_DIRTY_CPU_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS #define ERTS_MAX_NO_OF_DIRTY_IO_SCHEDULERS ERTS_MAX_NO_OF_SCHEDULERS -#endif #define ERTS_DEFAULT_MAX_PROCESSES (1 << 18) @@ -246,7 +244,9 @@ extern int erts_sched_thread_suggested_stack_size; typedef enum { ERTS_SCHDLR_SSPND_DONE_MSCHED_BLOCKED, + ERTS_SCHDLR_SSPND_DONE_NMSCHED_BLOCKED, ERTS_SCHDLR_SSPND_YIELD_DONE_MSCHED_BLOCKED, + ERTS_SCHDLR_SSPND_YIELD_DONE_NMSCHED_BLOCKED, ERTS_SCHDLR_SSPND_DONE, ERTS_SCHDLR_SSPND_YIELD_RESTART, ERTS_SCHDLR_SSPND_YIELD_DONE, @@ -1339,6 +1339,8 @@ extern int erts_system_profile_ts_type; #define F_OFF_HEAP_MSGQ_CHNG (1 << 14) /* Off heap msg queue changing */ #define F_ABANDONED_HEAP_USE (1 << 15) /* Have usage of abandoned heap */ #define F_DELAY_GC (1 << 16) /* Similar to disable GC (see below) */ +#define F_SCHDLR_ONLN_WAITQ (1 << 17) /* Process enqueued waiting to change schedulers online */ +#define F_HAVE_BLCKD_NMSCHED (1 << 18) /* Process has blocked normal multi-scheduling */ /* * F_DISABLE_GC and F_DELAY_GC are similar. Both will prevent @@ -1533,6 +1535,7 @@ int erts_setup_nif_gc(Process* proc, Eterm** objv, int* nobj); /* see erl_nif.c void erts_destroy_nif_export(void *); /* see erl_nif.c */ ErtsProcList *erts_proclist_create(Process *); +ErtsProcList *erts_proclist_copy(ErtsProcList *); void erts_proclist_destroy(ErtsProcList *); ERTS_GLB_INLINE int erts_proclist_same(ErtsProcList *, Process *); @@ -1718,28 +1721,21 @@ void erts_schedule_complete_off_heap_message_queue_change(Eterm pid); #if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) int erts_dbg_check_halloc_lock(Process *p); #endif -#ifdef DEBUG -void erts_dbg_multi_scheduling_return_trap(Process *, Eterm); -#endif -int erts_get_max_no_executing_schedulers(void); #if defined(ERTS_SMP) || defined(ERTS_DIRTY_SCHEDULERS) -ErtsSchedSuspendResult -erts_schedulers_state(Uint *, Uint *, Uint *, Uint *, Uint *, Uint *, int); +void +erts_schedulers_state(Uint *, Uint *, Uint *, Uint *, Uint *, Uint *, Uint *, Uint *); #endif #ifdef ERTS_SMP ErtsSchedSuspendResult erts_set_schedulers_online(Process *p, ErtsProcLocks plocks, Sint new_no, - Sint *old_no -#ifdef ERTS_DIRTY_SCHEDULERS - , int dirty_only -#endif - ); + Sint *old_no, + int dirty_only); ErtsSchedSuspendResult -erts_block_multi_scheduling(Process *, ErtsProcLocks, int, int); +erts_block_multi_scheduling(Process *, ErtsProcLocks, int, int, int); int erts_is_multi_scheduling_blocked(void); -Eterm erts_multi_scheduling_blockers(Process *); +Eterm erts_multi_scheduling_blockers(Process *, int); void erts_start_schedulers(void); void erts_alloc_notify_delayed_dealloc(int); void erts_alloc_ensure_handle_delayed_dealloc_call(int); diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index 1f284228db..f998e7d4d2 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -54,6 +54,7 @@ sct_cmd/1, sbt_cmd/1, scheduler_threads/1, + scheduler_suspend_basic/1, scheduler_suspend/1, dirty_scheduler_threads/1, dirty_scheduler_exit/1, @@ -69,8 +70,10 @@ all() -> [equal, few_low, many_low, equal_with_part_time_high, equal_with_part_time_max, equal_and_high_with_part_time_max, equal_with_high, - equal_with_high_max, bound_process, - {group, scheduler_bind}, scheduler_threads, scheduler_suspend, + equal_with_high_max, + bound_process, + {group, scheduler_bind}, scheduler_threads, + scheduler_suspend_basic, scheduler_suspend, dirty_scheduler_threads, dirty_scheduler_exit, reader_groups]. @@ -1131,6 +1134,7 @@ dirty_schedulers_online_test(true) -> dirty_schedulers_online_smp_test(erlang:system_info(schedulers_online)). dirty_schedulers_online_smp_test(SchedOnln) when SchedOnln < 4 -> ok; dirty_schedulers_online_smp_test(SchedOnln) -> + receive after 500 -> ok end, DirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), SchedOnln = DirtyCPUSchedOnln, HalfSchedOnln = SchedOnln div 2, @@ -1139,9 +1143,11 @@ dirty_schedulers_online_smp_test(SchedOnln) -> HalfDirtyCPUSchedOnln = erlang:system_flag(schedulers_online, SchedOnln), DirtyCPUSchedOnln = erlang:system_flag(dirty_cpu_schedulers_online, HalfDirtyCPUSchedOnln), + receive after 500 -> ok end, HalfDirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), QrtrDirtyCPUSchedOnln = HalfDirtyCPUSchedOnln div 2, SchedOnln = erlang:system_flag(schedulers_online, HalfSchedOnln), + receive after 500 -> ok end, QrtrDirtyCPUSchedOnln = erlang:system_info(dirty_cpu_schedulers_online), ok. @@ -1214,6 +1220,113 @@ wait_dse([Pid|Pids]) -> dirty_sleeper() -> erlang:nif_error({error,?MODULE}). +scheduler_suspend_basic(Config) when is_list(Config) -> + case erlang:system_info(multi_scheduling) of + disabled -> + {skip, "Nothing to test"}; + _ -> + Onln = erlang:system_info(schedulers_online), + try + scheduler_suspend_basic_test() + after + erlang:system_flag(schedulers_online, Onln) + end + end. + +scheduler_suspend_basic_test() -> + %% The receives after setting scheduler states are there + %% since the operation is not fully synchronous. For example, + %% we do not wait for dirty cpu schedulers online to complete + %% before returning from erlang:system_flag(schedulers_online, _). + + erlang:system_flag(schedulers_online, + erlang:system_info(schedulers)), + try + erlang:system_flag(dirty_cpu_schedulers_online, + erlang:system_info(dirty_cpu_schedulers)), + receive after 500 -> ok end + catch + _ : _ -> + ok + end, + + S0 = sched_state(), + io:format("~p~n", [S0]), + {{normal,NTot0,NOnln0,NAct0}, + {dirty_cpu,DCTot0,DCOnln0,DCAct0}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = S0, + enabled = erlang:system_info(multi_scheduling), + + DCOne = case DCTot0 of + 0 -> 0; + _ -> 1 + end, + + blocked_normal = erlang:system_flag(multi_scheduling, block_normal), + blocked_normal = erlang:system_info(multi_scheduling), + {{normal,NTot0,NOnln0,1}, + {dirty_cpu,DCTot0,DCOnln0,DCAct0}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + NOnln0 = erlang:system_flag(schedulers_online, 1), + receive after 500 -> ok end, + {{normal,NTot0,1,1}, + {dirty_cpu,DCTot0,DCOne,DCOne}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + 1 = erlang:system_flag(schedulers_online, NOnln0), + receive after 500 -> ok end, + {{normal,NTot0,NOnln0,1}, + {dirty_cpu,DCTot0,DCOnln0,DCAct0}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + blocked = erlang:system_flag(multi_scheduling, block), + blocked = erlang:system_info(multi_scheduling), + receive after 500 -> ok end, + {{normal,NTot0,NOnln0,1}, + {dirty_cpu,DCTot0,DCOnln0,0}, + {dirty_io,DITot0,DIOnln0,0}} = sched_state(), + + NOnln0 = erlang:system_flag(schedulers_online, 1), + receive after 500 -> ok end, + {{normal,NTot0,1,1}, + {dirty_cpu,DCTot0,DCOne,0}, + {dirty_io,DITot0,DIOnln0,0}} = sched_state(), + + 1 = erlang:system_flag(schedulers_online, NOnln0), + receive after 500 -> ok end, + {{normal,NTot0,NOnln0,1}, + {dirty_cpu,DCTot0,DCOnln0,0}, + {dirty_io,DITot0,DIOnln0,0}} = sched_state(), + + blocked = erlang:system_flag(multi_scheduling, unblock_normal), + blocked = erlang:system_info(multi_scheduling), + {{normal,NTot0,NOnln0,1}, + {dirty_cpu,DCTot0,DCOnln0,0}, + {dirty_io,DITot0,DIOnln0,0}} = sched_state(), + + enabled = erlang:system_flag(multi_scheduling, unblock), + enabled = erlang:system_info(multi_scheduling), + receive after 500 -> ok end, + {{normal,NTot0,NOnln0,NAct0}, + {dirty_cpu,DCTot0,DCOnln0,DCAct0}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + NOnln0 = erlang:system_flag(schedulers_online, 1), + receive after 500 -> ok end, + {{normal,NTot0,1,1}, + {dirty_cpu,DCTot0,DCOne,DCOne}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + 1 = erlang:system_flag(schedulers_online, NOnln0), + receive after 500 -> ok end, + {{normal,NTot0,NOnln0,NAct0}, + {dirty_cpu,DCTot0,DCOnln0,DCAct0}, + {dirty_io,DITot0,DIOnln0,DIAct0}} = sched_state(), + + ok. + + scheduler_suspend(Config) when is_list(Config) -> ?line Dog = ?t:timetrap(?t:minutes(5)), ?line lists:foreach(fun (S) -> scheduler_suspend_test(Config, S) end, @@ -1238,12 +1351,17 @@ scheduler_suspend_test(Config, Schedulers) -> ?line [ok] = mcall(Node, [fun () -> sst0_loop(300) end]), ?line [ok] = mcall(Node, [fun () -> sst1_loop(300) end]), ?line [ok] = mcall(Node, [fun () -> sst2_loop(300) end]), - ?line [ok, ok, ok, ok, ok] = mcall(Node, - [fun () -> sst0_loop(200) end, - fun () -> sst1_loop(200) end, - fun () -> sst2_loop(200) end, - fun () -> sst2_loop(200) end, - fun () -> sst3_loop(Sched, 200) end]), + ?line [ok] = mcall(Node, [fun () -> sst4_loop(300) end]), + ?line [ok] = mcall(Node, [fun () -> sst5_loop(300) end]), + ?line [ok, ok, ok, ok, + ok, ok, ok] = mcall(Node, + [fun () -> sst0_loop(200) end, + fun () -> sst1_loop(200) end, + fun () -> sst2_loop(200) end, + fun () -> sst2_loop(200) end, + fun () -> sst3_loop(Sched, 200) end, + fun () -> sst4_loop(200) end, + fun () -> sst5_loop(200) end]), ?line [SState] = mcall(Node, [fun () -> case Sched == SchedOnln of false -> @@ -1320,6 +1438,20 @@ sst3_loop_with_dirty_schedulers(S, DS, N) -> erlang:system_flag(dirty_cpu_schedulers_online, DS), sst3_loop_with_dirty_schedulers(S, DS, N-1). +sst4_loop(0) -> + ok; +sst4_loop(N) -> + erlang:system_flag(multi_scheduling, block_normal), + erlang:system_flag(multi_scheduling, unblock_normal), + sst4_loop(N-1). + +sst5_loop(0) -> + ok; +sst5_loop(N) -> + erlang:system_flag(multi_scheduling, block_normal), + erlang:system_flag(multi_scheduling, unblock_normal), + sst5_loop(N-1). + reader_groups(Config) when is_list(Config) -> %% White box testing. These results are correct, but other results %% could be too... @@ -1598,6 +1730,34 @@ reader_groups_map(CPUT, Groups) -> %% Utils %% +sched_state() -> + sched_state(erlang:system_info(all_schedulers_state), + undefined, + {dirty_cpu,0,0,0}, + {dirty_io,0,0,0}). + + +sched_state([], N, DC, DI) -> + try + chk_basic(N), + chk_basic(DC), + chk_basic(DI), + {N, DC, DI} + catch + _ : _ -> + ?t:fail({inconsisten_scheduler_state, {N, DC, DI}}) + end; +sched_state([{normal, _, _, _} = S | Rest], _S, DC, DI) -> + sched_state(Rest, S, DC, DI); +sched_state([{dirty_cpu, _, _, _} = DC | Rest], S, _DC, DI) -> + sched_state(Rest, S, DC, DI); +sched_state([{dirty_io, _, _, _} = DI | Rest], S, DC, _DI) -> + sched_state(Rest, S, DC, DI). + +chk_basic({_Type, Tot, Onln, Act}) -> + true = Tot >= Onln, + true = Onln >= Act. + l(Id) -> {logical, Id}. -- cgit v1.2.3