diff options
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r-- | erts/emulator/beam/atom.names | 4 | ||||
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 74 | ||||
-rw-r--r-- | erts/emulator/beam/erl_bif_info.c | 34 | ||||
-rw-r--r-- | erts/emulator/beam/erl_init.c | 10 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.c | 202 | ||||
-rw-r--r-- | erts/emulator/beam/erl_process.h | 23 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 128 |
7 files changed, 273 insertions, 202 deletions
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names index 477a7676d6..a44d23b181 100644 --- a/erts/emulator/beam/atom.names +++ b/erts/emulator/beam/atom.names @@ -76,6 +76,7 @@ atom ac atom accessor atom active atom active_tasks +atom active_tasks_all atom alive atom all atom all_but_first @@ -567,6 +568,7 @@ atom return_to atom return_trace atom run_queue atom run_queue_lengths +atom run_queue_lengths_all atom runnable atom runnable_ports atom runnable_procs @@ -656,8 +658,10 @@ atom Times='*' atom timestamp atom total atom total_active_tasks +atom total_active_tasks_all atom total_heap_size atom total_run_queue_lengths +atom total_run_queue_lengths_all atom tpkt atom trace trace_ts traced atom trace_control_word diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index a90e6a0ba8..79d751d13e 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -158,7 +158,9 @@ do { \ /* * Register target (X or Y register). */ -#define REG_TARGET(Target) (*(((Target) & 1) ? &yb(Target-1) : &xb(Target))) + +#define REG_TARGET_PTR(Target) (((Target) & 1) ? &yb(Target-1) : &xb(Target)) +#define REG_TARGET(Target) (*REG_TARGET_PTR(Target)) /* * Store a result into a register given a destination descriptor. @@ -172,8 +174,6 @@ do { \ REG_TARGET(stb_reg) = (Result); \ } while (0) -#define StoreSimpleDest(Src, Dest) Dest = (Src) - /* * Store a result into a register and execute the next instruction. * Dst points to the word with a destination descriptor, which MUST @@ -420,7 +420,7 @@ void** beam_ops; #define TestHeapPutList(Need, Reg) \ do { \ TestHeap((Need), 1); \ - PutList(Reg, r(0), r(0), StoreSimpleDest); \ + PutList(Reg, r(0), r(0)); \ CHECK_TERM(r(0)); \ } while (0) @@ -547,11 +547,11 @@ void** beam_ops; GetR((N)+1, Dst2); \ } while (0) -#define PutList(H, T, Dst, Store) \ - do { \ - HTOP[0] = (H); HTOP[1] = (T); \ - Store(make_list(HTOP), Dst); \ - HTOP += 2; \ +#define PutList(H, T, Dst) \ + do { \ + HTOP[0] = (H); HTOP[1] = (T); \ + Dst = make_list(HTOP); \ + HTOP += 2; \ } while (0) #define Swap(R1, R2) \ @@ -568,11 +568,7 @@ void** beam_ops; R2 = Tmp = V; \ } while (0) -#define Move(Src, Dst, Store) \ - do { \ - Eterm term = (Src); \ - Store(term, Dst); \ - } while (0) +#define Move(Src, Dst) Dst = (Src) #define Move2Par(S1, D1, S2, D2) \ do { \ @@ -911,7 +907,7 @@ do { \ Target = _uint_size * Unit; \ } while (0) -#define BsGetFloat2(Ms, Live, Sz, Flags, Dst, Store, Fail) \ +#define BsGetFloat2(Ms, Live, Sz, Flags, Dst, Fail) \ do { \ ErlBinMatchBuffer *_mb; \ Eterm _result; Sint _size; \ @@ -922,12 +918,12 @@ do { \ LIGHT_SWAPOUT; \ _result = erts_bs_get_float_2(c_p, _size, (Flags), _mb); \ LIGHT_SWAPIN; \ - HEAP_SPACE_VERIFIED(0); \ + HEAP_SPACE_VERIFIED(0); \ if (is_non_value(_result)) { Fail; } \ - else { Store(_result, Dst); } \ + else { Dst = _result; } \ } while (0) -#define BsGetBinaryImm_2(Ms, Live, Sz, Flags, Dst, Store, Fail) \ +#define BsGetBinaryImm_2(Ms, Live, Sz, Flags, Dst, Fail) \ do { \ ErlBinMatchBuffer *_mb; \ Eterm _result; \ @@ -936,12 +932,12 @@ do { \ LIGHT_SWAPOUT; \ _result = erts_bs_get_binary_2(c_p, (Sz), (Flags), _mb); \ LIGHT_SWAPIN; \ - HEAP_SPACE_VERIFIED(0); \ + HEAP_SPACE_VERIFIED(0); \ if (is_non_value(_result)) { Fail; } \ - else { Store(_result, Dst); } \ + else { Dst = _result; } \ } while (0) -#define BsGetBinary_2(Ms, Live, Sz, Flags, Dst, Store, Fail) \ +#define BsGetBinary_2(Ms, Live, Sz, Flags, Dst, Fail) \ do { \ ErlBinMatchBuffer *_mb; \ Eterm _result; Uint _size; \ @@ -951,27 +947,27 @@ do { \ LIGHT_SWAPOUT; \ _result = erts_bs_get_binary_2(c_p, _size, (Flags), _mb); \ LIGHT_SWAPIN; \ - HEAP_SPACE_VERIFIED(0); \ + HEAP_SPACE_VERIFIED(0); \ if (is_non_value(_result)) { Fail; } \ - else { Store(_result, Dst); } \ + else { Dst = _result; } \ } while (0) -#define BsGetBinaryAll_2(Ms, Live, Unit, Dst, Store, Fail) \ - do { \ - ErlBinMatchBuffer *_mb; \ - Eterm _result; \ - TestHeap(ERL_SUB_BIN_SIZE, Live); \ - _mb = ms_matchbuffer(Ms); \ - if (((_mb->size - _mb->offset) % Unit) == 0) { \ - LIGHT_SWAPOUT; \ - _result = erts_bs_get_binary_all_2(c_p, _mb); \ - LIGHT_SWAPIN; \ - HEAP_SPACE_VERIFIED(0); \ - ASSERT(is_value(_result)); \ - Store(_result, Dst); \ - } else { \ - HEAP_SPACE_VERIFIED(0); \ - Fail; } \ +#define BsGetBinaryAll_2(Ms, Live, Unit, Dst, Fail) \ + do { \ + ErlBinMatchBuffer *_mb; \ + Eterm _result; \ + TestHeap(ERL_SUB_BIN_SIZE, Live); \ + _mb = ms_matchbuffer(Ms); \ + if (((_mb->size - _mb->offset) % Unit) == 0) { \ + LIGHT_SWAPOUT; \ + _result = erts_bs_get_binary_all_2(c_p, _mb); \ + LIGHT_SWAPIN; \ + HEAP_SPACE_VERIFIED(0); \ + ASSERT(is_value(_result)); \ + Dst = _result; \ + } else { \ + HEAP_SPACE_VERIFIED(0); \ + Fail; } \ } while (0) #define BsSkipBits2(Ms, Bits, Unit, Fail) \ diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c index 5fc70dfc02..e2773475b0 100644 --- a/erts/emulator/beam/erl_bif_info.c +++ b/erts/emulator/beam/erl_bif_info.c @@ -3450,9 +3450,15 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1) if (is_non_value(res)) BIF_RET(am_undefined); BIF_TRAP1(gather_sched_wall_time_res_trap, BIF_P, res); - } else if (BIF_ARG_1 == am_total_active_tasks - || BIF_ARG_1 == am_total_run_queue_lengths) { - Uint no = erts_run_queues_len(NULL, 0, BIF_ARG_1 == am_total_active_tasks); + } else if ((BIF_ARG_1 == am_total_active_tasks) + | (BIF_ARG_1 == am_total_run_queue_lengths) + | (BIF_ARG_1 == am_total_active_tasks_all) + | (BIF_ARG_1 == am_total_run_queue_lengths_all)) { + Uint no = erts_run_queues_len(NULL, 0, + ((BIF_ARG_1 == am_total_active_tasks) + | (BIF_ARG_1 == am_total_active_tasks_all)), + ((BIF_ARG_1 == am_total_active_tasks_all) + | (BIF_ARG_1 == am_total_run_queue_lengths_all))); if (IS_USMALL(0, no)) res = make_small(no); else { @@ -3460,13 +3466,21 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1) res = uint_to_big(no, hp); } BIF_RET(res); - } else if (BIF_ARG_1 == am_active_tasks - || BIF_ARG_1 == am_run_queue_lengths) { + } else if ((BIF_ARG_1 == am_active_tasks) + | (BIF_ARG_1 == am_run_queue_lengths) + | (BIF_ARG_1 == am_active_tasks_all) + | (BIF_ARG_1 == am_run_queue_lengths_all)) { Eterm res, *hp, **hpp; Uint sz, *szp; - int no_qs = erts_no_run_queues; + int incl_dirty_io = ((BIF_ARG_1 == am_active_tasks_all) + | (BIF_ARG_1 == am_run_queue_lengths_all)); + int no_qs = (erts_no_run_queues + ERTS_NUM_DIRTY_CPU_RUNQS + + (incl_dirty_io ? ERTS_NUM_DIRTY_IO_RUNQS : 0)); Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2); - (void) erts_run_queues_len(qszs, 0, BIF_ARG_1 == am_active_tasks); + (void) erts_run_queues_len(qszs, 0, + ((BIF_ARG_1 == am_active_tasks) + | (BIF_ARG_1 == am_active_tasks_all)), + incl_dirty_io); sz = 0; szp = &sz; hpp = NULL; @@ -3539,7 +3553,7 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1) res = TUPLE2(hp, b1, b2); BIF_RET(res); } else if (BIF_ARG_1 == am_run_queue) { - res = erts_run_queues_len(NULL, 1, 0); + res = erts_run_queues_len(NULL, 1, 0, 0); BIF_RET(make_small(res)); } else if (BIF_ARG_1 == am_wall_clock) { UWord w1, w2; @@ -3557,9 +3571,9 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1) else if (ERTS_IS_ATOM_STR("run_queues", BIF_ARG_1)) { Eterm res, *hp, **hpp; Uint sz, *szp; - int no_qs = erts_no_run_queues; + int no_qs = erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS; Uint *qszs = erts_alloc(ERTS_ALC_T_TMP,sizeof(Uint)*no_qs*2); - (void) erts_run_queues_len(qszs, 0, 0); + (void) erts_run_queues_len(qszs, 0, 0, 1); sz = 0; szp = &sz; hpp = NULL; diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index ac99f043e6..6172595552 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -431,7 +431,7 @@ erl_first_process_otp(char* modname, void* code, unsigned size, int argc, char** } static Eterm -erl_system_process_otp(Eterm parent_pid, char* modname) +erl_system_process_otp(Eterm parent_pid, char* modname, int off_heap_msgq) { Eterm start_mod; Process* parent; @@ -447,6 +447,8 @@ erl_system_process_otp(Eterm parent_pid, char* modname) parent = erts_pid2proc(NULL, 0, parent_pid, ERTS_PROC_LOCK_MAIN); so.flags = erts_default_spo_flags|SPO_SYSTEM_PROC; + if (off_heap_msgq) + so.flags |= SPO_OFF_HEAP_MSGQ; res = erl_create_process(parent, start_mod, am_start, NIL, &so); erts_smp_proc_unlock(parent, ERTS_PROC_LOCK_MAIN); return res; @@ -2326,14 +2328,14 @@ erl_start(int argc, char **argv) */ Eterm pid; - pid = erl_system_process_otp(otp_ring0_pid, "erts_code_purger"); + pid = erl_system_process_otp(otp_ring0_pid, "erts_code_purger", !0); erts_code_purger = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, internal_pid_index(pid)); ASSERT(erts_code_purger && erts_code_purger->common.id == pid); erts_proc_inc_refc(erts_code_purger); - pid = erl_system_process_otp(otp_ring0_pid, "erts_literal_area_collector"); + pid = erl_system_process_otp(otp_ring0_pid, "erts_literal_area_collector", !0); erts_literal_area_collector = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, internal_pid_index(pid)); @@ -2342,7 +2344,7 @@ erl_start(int argc, char **argv) erts_proc_inc_refc(erts_literal_area_collector); #ifdef ERTS_DIRTY_SCHEDULERS - pid = erl_system_process_otp(otp_ring0_pid, "erts_dirty_process_code_checker"); + pid = erl_system_process_otp(otp_ring0_pid, "erts_dirty_process_code_checker", !0); erts_dirty_process_code_checker = (Process *) erts_ptab_pix2intptr_ddrb(&erts_proc, internal_pid_index(pid)); diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index da27c7e7c6..d4385e3987 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -405,13 +405,59 @@ static erts_atomic_t runq_supervisor_sleeping; ErtsSchedulerData *erts_scheduler_data; #endif -ErtsAlignedRunQueue *erts_aligned_run_queues; -Uint erts_no_run_queues; +ErtsAlignedRunQueue * ERTS_WRITE_UNLIKELY(erts_aligned_run_queues); +Uint ERTS_WRITE_UNLIKELY(erts_no_run_queues); -ErtsAlignedSchedulerData *erts_aligned_scheduler_data; #ifdef ERTS_DIRTY_SCHEDULERS -ErtsAlignedSchedulerData *erts_aligned_dirty_cpu_scheduler_data; -ErtsAlignedSchedulerData *erts_aligned_dirty_io_scheduler_data; + +struct { + union { + erts_smp_atomic32_t active; + char align__[ERTS_CACHE_LINE_SIZE]; + } cpu; + union { + erts_smp_atomic32_t active; + char align__[ERTS_CACHE_LINE_SIZE]; + } io; +} dirty_count erts_align_attribute(ERTS_CACHE_LINE_SIZE); + +#endif + +static ERTS_INLINE void +dirty_active(ErtsSchedulerData *esdp, erts_aint32_t add) +{ +#ifdef ERTS_DIRTY_SCHEDULERS + erts_aint32_t val; + erts_smp_atomic32_t *ap; + switch (esdp->type) { + case ERTS_SCHED_DIRTY_CPU: + ap = &dirty_count.cpu.active; + break; + case ERTS_SCHED_DIRTY_IO: + ap = &dirty_count.io.active; + break; + default: + ap = NULL; + ERTS_INTERNAL_ERROR("Not a dirty scheduler"); + break; + } + + /* + * All updates done under run-queue lock, so + * no inc or dec needed... + */ + ERTS_SMP_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue)); + + val = erts_smp_atomic32_read_nob(ap); + val += add; + erts_smp_atomic32_set_nob(ap, val); +#endif +} + +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_scheduler_data); +#ifdef ERTS_DIRTY_SCHEDULERS +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_cpu_scheduler_data); +ErtsAlignedSchedulerData * ERTS_WRITE_UNLIKELY(erts_aligned_dirty_io_scheduler_data); typedef union { Process dsp; char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(Process))]; @@ -539,22 +585,28 @@ do { \ } \ } while (0) -#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, DOX) \ +#define ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, NRQS, DO, DOX) \ do { \ ErtsRunQueue *RQVAR; \ + int nrqs = (NRQS); \ int ix__; \ - for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) { \ + for (ix__ = 0; ix__ < nrqs; ix__++) { \ RQVAR = ERTS_RUNQ_IX(ix__); \ erts_smp_runq_lock(RQVAR); \ { DO; } \ } \ { DOX; } \ - for (ix__ = 0; ix__ < erts_no_run_queues; ix__++) \ + for (ix__ = 0; ix__ < nrqs; ix__++) \ erts_smp_runq_unlock(ERTS_RUNQ_IX(ix__)); \ } while (0) -#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \ - ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, DO, ) +#define ERTS_ATOMIC_FOREACH_RUNQ(RQVAR, DO) \ + ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, DO, ) + +#define ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(RQVAR, DO) \ + ERTS_ATOMIC_FOREACH_RUNQ_X(RQVAR, erts_no_run_queues, DO, ) + + /* * Local functions. */ @@ -2971,7 +3023,7 @@ erts_active_schedulers(void) { Uint as = erts_no_schedulers; - ERTS_ATOMIC_FOREACH_RUNQ(rq, as -= abs(rq->waiting)); + ERTS_ATOMIC_FOREACH_NORMAL_RUNQ(rq, as -= abs(rq->waiting)); return as; } @@ -3383,6 +3435,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) rq->sleepers.list->prev = ssi; rq->sleepers.list = ssi; erts_smp_spin_unlock(&rq->sleepers.lock); + dirty_active(esdp, -1); } #endif @@ -3724,6 +3777,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) sched_active_sys(esdp->no, rq); } + if (ERTS_SCHEDULER_IS_DIRTY(esdp)) + dirty_active(esdp, 1); + ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq)); } @@ -6123,7 +6179,7 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online #endif ) { - int ix, n, no_ssi; + int ix, n, no_ssi, tot_rqs; char *daww_ptr; size_t daww_sz; size_t size_runqs; @@ -6156,26 +6212,19 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online /* Create and initialize run queues */ n = no_schedulers; - size_runqs = sizeof(ErtsAlignedRunQueue) * (n + ERTS_NUM_DIRTY_RUNQS); + tot_rqs = (n + ERTS_NUM_DIRTY_RUNQS); + size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs; erts_aligned_run_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); #ifdef ERTS_SMP -#ifdef ERTS_DIRTY_SCHEDULERS - erts_aligned_run_queues += ERTS_NUM_DIRTY_RUNQS; -#endif erts_smp_atomic32_init_nob(&no_empty_run_queues, 0); #endif erts_no_run_queues = n; - for (ix = -(ERTS_NUM_DIRTY_RUNQS); ix < n; ix++) { + for (ix = 0; ix < tot_rqs; ix++) { int pix, rix; -#ifdef ERTS_DIRTY_SCHEDULERS - ErtsRunQueue *rq = ERTS_RUNQ_IX_IS_DIRTY(ix) ? - ERTS_DIRTY_RUNQ_IX(ix) : ERTS_RUNQ_IX(ix); -#else ErtsRunQueue *rq = ERTS_RUNQ_IX(ix); -#endif rq->ix = ix; @@ -6448,6 +6497,11 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online ERTS_SCHED_DIRTY_IO, no_dirty_io_schedulers); + erts_smp_atomic32_init_nob(&dirty_count.cpu.active, + (erts_aint32_t) no_dirty_cpu_schedulers); + erts_smp_atomic32_init_nob(&dirty_count.io.active, + (erts_aint32_t) no_dirty_io_schedulers); + #endif if (set_schdlr_sspnd_change_flags) @@ -7822,6 +7876,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) #endif if (sched_type != ERTS_SCHED_NORMAL) { + dirty_active(esdp, -1); erts_smp_runq_unlock(esdp->run_queue); dirty_sched_wall_time_change(esdp, 0); } @@ -8130,7 +8185,9 @@ suspend_scheduler(ErtsSchedulerData *esdp) erts_smp_runq_lock(esdp->run_queue); non_empty_runq(esdp->run_queue); - if (sched_type == ERTS_SCHED_NORMAL) { + if (sched_type != ERTS_SCHED_NORMAL) + dirty_active(esdp, 1); + else { schedule_bound_processes(esdp->run_queue, &sbp); erts_sched_check_cpu_bind_post_suspend(esdp); @@ -9764,38 +9821,69 @@ erts_internal_is_process_executing_dirty_1(BIF_ALIST_1) BIF_RET(am_false); } +static ERTS_INLINE void +run_queues_len_aux(ErtsRunQueue *rq, Uint *tot_len, Uint *qlen, int *ip, int incl_active_sched, int locked) +{ + Sint rq_len; + + if (locked) + rq_len = (Sint) erts_smp_atomic32_read_dirty(&rq->len); + else + rq_len = (Sint) erts_smp_atomic32_read_nob(&rq->len); + ASSERT(rq_len >= 0); + + if (incl_active_sched) { +#ifdef ERTS_DIRTY_SCHEDULERS + if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) { + erts_aint32_t dcnt; + if (ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(rq)) { + dcnt = erts_smp_atomic32_read_nob(&dirty_count.cpu.active); + ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_cpu_schedulers); + } + else { + ASSERT(ERTS_RUNQ_IS_DIRTY_IO_RUNQ(rq)); + dcnt = erts_smp_atomic32_read_nob(&dirty_count.io.active); + ASSERT(0 <= dcnt && dcnt <= erts_no_dirty_io_schedulers); + } + rq_len += (Sint) dcnt; + } + else +#endif + { + if (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC) + rq_len++; + } + } + if (qlen) + qlen[(*ip)++] = rq_len; + *tot_len += (Uint) rq_len; +} Uint -erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched) +erts_run_queues_len(Uint *qlen, int atomic_queues_read, int incl_active_sched, + int incl_dirty_io) { - int i = 0; + int i = 0, j = 0; Uint len = 0; - if (atomic_queues_read) - ERTS_ATOMIC_FOREACH_RUNQ(rq, - { - Sint rq_len = (Sint) erts_smp_atomic32_read_dirty(&rq->len); - ASSERT(rq_len >= 0); - if (incl_active_sched - && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) { - rq_len++; - } - if (qlen) - qlen[i++] = rq_len; - len += (Uint) rq_len; - } - ); + int no_rqs = erts_no_run_queues; + +#ifdef ERTS_DIRTY_SCHEDULERS + if (incl_dirty_io) + no_rqs += ERTS_NUM_DIRTY_RUNQS; + else + no_rqs += ERTS_NUM_DIRTY_CPU_RUNQS; +#endif + + if (atomic_queues_read) { + ERTS_ATOMIC_FOREACH_RUNQ_X(rq, no_rqs, + run_queues_len_aux(rq, &len, qlen, &j, + incl_active_sched, 1), + /* Nothing... */); + } else { - for (i = 0; i < erts_no_run_queues; i++) { + for (i = 0; i < no_rqs; i++) { ErtsRunQueue *rq = ERTS_RUNQ_IX(i); - Sint rq_len = (Sint) erts_smp_atomic32_read_nob(&rq->len); - ASSERT(rq_len >= 0); - if (incl_active_sched - && (ERTS_RUNQ_FLGS_GET_NOB(rq) & ERTS_RUNQ_FLG_EXEC)) { - rq_len++; - } - if (qlen) - qlen[i] = rq_len; - len += (Uint) rq_len; + run_queues_len_aux(rq, &len, qlen, &j, incl_active_sched, 0); } } @@ -11243,7 +11331,9 @@ cleanup_sys_tasks(Process *c_p, erts_aint32_t in_state, int in_reds) erts_aint32_t state = in_state; int max_reds = in_reds; int reds = 0; - int qmask = 0; + int qmask = 1; /* Set to 1 to force looping as long as there + * are dirty tasks. + */ ERTS_SMP_LC_ASSERT(erts_proc_lc_my_proc_locks(c_p) == ERTS_PROC_LOCK_MAIN); @@ -12097,6 +12187,8 @@ erts_get_total_reductions(Uint *redsp, Uint *diffp) Uint reds = 0; ERTS_ATOMIC_FOREACH_RUNQ_X(rq, + erts_no_run_queues + ERTS_NUM_DIRTY_RUNQS, + reds += rq->procs.reductions, if (redsp) *redsp = reds; @@ -13845,6 +13937,16 @@ erts_continue_exit_process(Process *p) goto yield; } +#ifdef DEBUG + erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS); + ASSERT(p->sys_task_qs == NULL); + ASSERT(ERTS_PROC_GET_DELAYED_GC_TASK_QS(p) == NULL); +#ifdef ERTS_DIRTY_SCHEDULERS + ASSERT(p->dirty_sys_tasks == NULL); +#endif + erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS); +#endif + if (p->flags & F_USING_DDLL) { erts_ddll_proc_dead(p, ERTS_PROC_LOCK_MAIN); p->flags &= ~F_USING_DDLL; diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 2b169bb9ce..d44e8c252d 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -1541,24 +1541,29 @@ extern int erts_system_profile_ts_type; } while (0) #if defined(ERTS_DIRTY_SCHEDULERS) && defined(ERTS_SMP) -#define ERTS_NUM_DIRTY_RUNQS 2 +#define ERTS_NUM_DIRTY_CPU_RUNQS 1 +#define ERTS_NUM_DIRTY_IO_RUNQS 1 #else -#define ERTS_NUM_DIRTY_RUNQS 0 +#define ERTS_NUM_DIRTY_CPU_RUNQS 0 +#define ERTS_NUM_DIRTY_IO_RUNQS 0 #endif +#define ERTS_NUM_DIRTY_RUNQS (ERTS_NUM_DIRTY_CPU_RUNQS+ERTS_NUM_DIRTY_IO_RUNQS) + #define ERTS_RUNQ_IX(IX) \ - (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues), \ + (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues+ERTS_NUM_DIRTY_RUNQS), \ &erts_aligned_run_queues[(IX)].runq) #ifdef ERTS_DIRTY_SCHEDULERS #define ERTS_RUNQ_IX_IS_DIRTY(IX) \ - (-(ERTS_NUM_DIRTY_RUNQS) <= (IX) && (IX) < 0) + (ASSERT(0 <= (IX) && (IX) < erts_no_run_queues+ERTS_NUM_DIRTY_RUNQS), \ + (erts_no_run_queues <= (IX))) #define ERTS_DIRTY_RUNQ_IX(IX) \ (ASSERT(ERTS_RUNQ_IX_IS_DIRTY(IX)), \ &erts_aligned_run_queues[(IX)].runq) -#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[-1].runq) -#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[-2].runq) -#define ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(RQ) ((RQ)->ix == -1) -#define ERTS_RUNQ_IS_DIRTY_IO_RUNQ(RQ) ((RQ)->ix == -2) +#define ERTS_DIRTY_CPU_RUNQ (&erts_aligned_run_queues[erts_no_run_queues].runq) +#define ERTS_DIRTY_IO_RUNQ (&erts_aligned_run_queues[erts_no_run_queues+1].runq) +#define ERTS_RUNQ_IS_DIRTY_CPU_RUNQ(RQ) ((RQ) == ERTS_DIRTY_CPU_RUNQ) +#define ERTS_RUNQ_IS_DIRTY_IO_RUNQ(RQ) ((RQ) == ERTS_DIRTY_IO_RUNQ) #else #define ERTS_RUNQ_IX_IS_DIRTY(IX) 0 #endif @@ -1836,7 +1841,7 @@ Uint erts_active_schedulers(void); void erts_init_process(int, int, int); Eterm erts_process_state2status(erts_aint32_t); Eterm erts_process_status(Process *, Eterm); -Uint erts_run_queues_len(Uint *, int, int); +Uint erts_run_queues_len(Uint *, int, int, int); void erts_add_to_runq(Process *); Eterm erts_bound_schedulers_term(Process *c_p); Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which); diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9b5bd7a749..8abe871c14 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -182,15 +182,7 @@ i_jump_on_val x f I I i_jump_on_val y f I I %macro: get_list GetList -pack -get_list x x x -get_list x x y -get_list x y x -get_list x y y - -get_list y x x -get_list y x y -get_list y y x -get_list y y y +get_list xy xy xy # The following get_list instructions using x(0) are frequently used. get_list r x x @@ -218,12 +210,10 @@ set_tuple_element s d P # Get tuple element %macro: i_get_tuple_element GetTupleElement -pack -i_get_tuple_element x P x -i_get_tuple_element y P x +i_get_tuple_element xy P x %cold -i_get_tuple_element x P y -i_get_tuple_element y P y +i_get_tuple_element xy P y %hot %macro: i_get_tuple_element2 GetTupleElement2 -pack @@ -270,11 +260,7 @@ system_limit j move C=cxy x==0 | jump Lbl => move_jump Lbl C %macro: move_jump MoveJump -nonext -move_jump f n -move_jump f c -move_jump f x -move_jump f y - +move_jump f ncxy # Movement to and from the stack is common # Try to pack as much as we can into one instruction @@ -326,12 +312,10 @@ swap_temp R1 R2 Tmp | line Loc | call_ext_last Live Addr D | \ is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_last Live Addr D %macro: swap_temp SwapTemp -pack -swap_temp x x x -swap_temp x y x +swap_temp x xy x %macro: swap Swap -pack -swap x x -swap x y +swap x xy move Src=x D1=x | move Src=x D2=x => move_dup Src D1 D2 move Src=x SD=x | move SD=x D=x => move_dup Src SD D @@ -381,10 +365,7 @@ move_shift x y x move_shift x x y %macro: move_dup MoveDup -pack -move_dup x x x -move_dup x x y -move_dup y x x -move_dup y x y +move_dup xy x xy %macro: move2_par Move2Par -pack @@ -409,7 +390,7 @@ move3 x x x x x x move S=n D=y => init D move S=c D=y => move S x | move x D -%macro:move Move -pack -gen_dest +%macro:move Move -pack move x x move x y move y x @@ -483,8 +464,7 @@ i_is_ne_exact_literal f y c is_eq_exact Lbl Y=y X=x => is_eq_exact Lbl X Y %macro: is_eq_exact EqualExact -fail_action -pack -is_eq_exact f x x -is_eq_exact f x y +is_eq_exact f x xy is_eq_exact f s s %macro: is_lt IsLessThan -fail_action @@ -538,7 +518,7 @@ i_put_tuple y I # put_list Const=c n Dst => move Const x | put_list x n Dst -%macro:put_list PutList -pack -gen_dest +%macro:put_list PutList -pack put_list x n x put_list y n x @@ -630,9 +610,7 @@ is_tagged_tuple Fail=f c Arity Atom => jump Fail %macro:is_tagged_tuple IsTaggedTuple -fail_action -is_tagged_tuple f r A a -is_tagged_tuple f x A a -is_tagged_tuple f y A a +is_tagged_tuple f rxy A a # Test tuple & arity (head) @@ -642,21 +620,16 @@ is_tuple Fail=f S=xy | test_arity Fail=f S=xy Arity => is_tuple_of_arity Fail S %macro:is_tuple_of_arity IsTupleOfArity -fail_action -is_tuple_of_arity f r A -is_tuple_of_arity f x A -is_tuple_of_arity f y A +is_tuple_of_arity f rxy A %macro: is_tuple IsTuple -fail_action -is_tuple f r -is_tuple f x -is_tuple f y +is_tuple f rxy test_arity Fail Literal=q Arity => move Literal x | test_arity Fail x Arity test_arity Fail=f c Arity => jump Fail %macro: test_arity IsArity -fail_action -test_arity f x A -test_arity f y A +test_arity f xy A get_tuple_element Reg=x P1 D1=x | get_tuple_element Reg=x P2 D2=x | \ get_tuple_element Reg=x P3 D3=x | \ @@ -681,8 +654,7 @@ is_integer Fail=f S=x | allocate Need Regs => is_integer_allocate Fail S Need Re is_integer_allocate f x I I %macro: is_integer IsInteger -fail_action -is_integer f x -is_integer f y +is_integer f xy is_list Fail=f n => is_list Fail Literal=q => move Literal x | is_list Fail x @@ -696,8 +668,7 @@ is_list f y is_nonempty_list Fail=f S=x | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs %macro:is_nonempty_list_allocate IsNonemptyListAllocate -fail_action -pack -is_nonempty_list_allocate f r I t -is_nonempty_list_allocate f x I t +is_nonempty_list_allocate f rx I t is_nonempty_list F=f x==0 | test_heap I1 I2 => is_non_empty_list_test_heap F I1 I2 @@ -708,12 +679,10 @@ is_nonempty_list Fail=f S=x | get_list S D1=x D2=x => \ is_nonempty_list_get_list Fail S D1 D2 %macro: is_nonempty_list_get_list IsNonemptyListGetList -fail_action -pack -is_nonempty_list_get_list f r x x -is_nonempty_list_get_list f x x x +is_nonempty_list_get_list f rx x x %macro: is_nonempty_list IsNonemptyList -fail_action -is_nonempty_list f x -is_nonempty_list f y +is_nonempty_list f xy %macro: is_atom IsAtom -fail_action is_atom f x @@ -735,8 +704,7 @@ is_nil Fail=f n => is_nil Fail=f qia => jump Fail %macro: is_nil IsNil -fail_action -is_nil f x -is_nil f y +is_nil f xy is_binary Fail Literal=q => move Literal x | is_binary Fail x is_binary Fail=f c => jump Fail @@ -784,8 +752,7 @@ is_boolean Fail=f ac => jump Fail %cold %macro: is_boolean IsBoolean -fail_action -is_boolean f x -is_boolean f y +is_boolean f xy %hot is_function2 Fail=f acq Arity => jump Fail @@ -1079,8 +1046,7 @@ i_get_hash c I d i_get s d %macro: self Self -self x -self y +self xy %macro: node Node node x @@ -1091,8 +1057,7 @@ node y i_fast_element j x I d i_fast_element j y I d -i_element j x s d -i_element j y s d +i_element j xy s d bif1 f b s d bif1_body b s d @@ -1111,8 +1076,7 @@ i_move_call c f %macro:move_call MoveCall -arg_f -size -nonext move_call/2 -move_call x f -move_call y f +move_call xy f move S=c x==0 | call_last Ar P=f D => i_move_call_last P D S move S x==0 | call_last Ar P=f D => move_call_last S P D @@ -1122,8 +1086,7 @@ i_move_call_last f P c %macro:move_call_last MoveCallLast -arg_f -nonext -pack move_call_last/3 -move_call_last x f Q -move_call_last y f Q +move_call_last xy f Q move S=c x==0 | call_only Ar P=f => i_move_call_only P S move S=x x==0 | call_only Ar P=f => move_call_only S P @@ -1167,8 +1130,7 @@ i_make_fun I t %hot %macro: is_function IsFunction -fail_action -is_function f x -is_function f y +is_function f xy is_function Fail=f c => jump Fail func_info M F A => i_func_info u M F A @@ -1180,8 +1142,7 @@ func_info M F A => i_func_info u M F A %cold bs_start_match2 Fail=f ica X Y D => jump Fail bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D -i_bs_start_match2 x f I I d -i_bs_start_match2 y f I I d +i_bs_start_match2 xy f I I d bs_save2 Reg Index => gen_bs_save(Reg, Index) i_bs_save2 x I @@ -1209,9 +1170,9 @@ i_bs_get_integer_32 x f I d bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \ gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst) -%macro: i_bs_get_binary_imm2 BsGetBinaryImm_2 -fail_action -gen_dest -%macro: i_bs_get_binary2 BsGetBinary_2 -fail_action -gen_dest -%macro: i_bs_get_binary_all2 BsGetBinaryAll_2 -fail_action -gen_dest +%macro: i_bs_get_binary_imm2 BsGetBinaryImm_2 -fail_action +%macro: i_bs_get_binary2 BsGetBinary_2 -fail_action +%macro: i_bs_get_binary_all2 BsGetBinaryAll_2 -fail_action i_bs_get_binary_imm2 f x I I I d i_bs_get_binary2 f x I s I d @@ -1224,7 +1185,7 @@ bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \ bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail -%macro: i_bs_get_float2 BsGetFloat2 -fail_action -gen_dest +%macro: i_bs_get_float2 BsGetFloat2 -fail_action i_bs_get_float2 f x I s I d # Miscellanous @@ -1236,8 +1197,7 @@ bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \ i_bs_skip_bits_imm2 f x I %macro: i_bs_skip_bits2 BsSkipBits2 -fail_action -i_bs_skip_bits2 f x x I -i_bs_skip_bits2 f x y I +i_bs_skip_bits2 f x xy I %macro: i_bs_skip_bits_all2 BsSkipBitsAll2 -fail_action i_bs_skip_bits_all2 f x I @@ -1302,8 +1262,7 @@ bs_init2 Fail Sz Words=u==0 Regs Flags Dst => \ bs_init2 Fail Sz Words Regs Flags Dst => \ i_bs_init_fail_heap Sz Words Fail Regs Dst -i_bs_init_fail x j I d -i_bs_init_fail y j I d +i_bs_init_fail xy j I d i_bs_init_fail_heap s I j I d @@ -1324,8 +1283,7 @@ bs_init_bits Fail Sz Words=u==0 Regs Flags Dst => \ bs_init_bits Fail Sz Words Regs Flags Dst => \ i_bs_init_bits_fail_heap Sz Words Fail Regs Dst -i_bs_init_bits_fail x j I d -i_bs_init_bits_fail y j I d +i_bs_init_bits_fail xy j I d i_bs_init_bits_fail_heap s I j I d @@ -1493,8 +1451,7 @@ is_map Fail Lit=q | literal_is_map(Lit) => is_map Fail cq => jump Fail %macro: is_map IsMap -fail_action -is_map f x -is_map f y +is_map f xy ## Transform has_map_fields #{ K1 := _, K2 := _ } to has_map_elements @@ -1514,16 +1471,10 @@ i_get_map_element Fail Src=xy Key=y Dst => \ move Key x | i_get_map_element Fail Src x Dst %macro: i_get_map_element_hash GetMapElementHash -fail_action -i_get_map_element_hash f x c I x -i_get_map_element_hash f y c I x -i_get_map_element_hash f x c I y -i_get_map_element_hash f y c I y +i_get_map_element_hash f xy c I xy %macro: i_get_map_element GetMapElement -fail_action -i_get_map_element f x x x -i_get_map_element f y x x -i_get_map_element f x x y -i_get_map_element f y x y +i_get_map_element f xy x xy # # Convert the plus operations to a generic plus instruction. @@ -1589,12 +1540,9 @@ gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst -i_increment r I I d -i_increment x I I d -i_increment y I I d +i_increment rxy I I d -i_plus j I x x d -i_plus j I x y d +i_plus j I x xy d i_plus j I s s d i_minus j I x x d |