From 090f9543f66cc0c4ea4d1ca63902c300b103f271 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Tue, 9 Oct 2018 16:11:43 +0200 Subject: erts: Pass thread progress data where possible The poll thread does a lot of waking up and then going back to sleep. A large part of the waking up is managing thread progress and a large part of that was using thread specific data to get the thread progress data pointer. With this refactor the tpd is passed to each of the functions which greatly decreases the number of ethr_get_tsd calls which in turn halves the CPU usage of the poller thread in certain scenarios. --- erts/emulator/beam/erl_async.c | 2 +- erts/emulator/beam/erl_init.c | 4 +- erts/emulator/beam/erl_process.c | 78 ++++++++++++++++++--------------- erts/emulator/beam/erl_thr_progress.c | 28 ++++++------ erts/emulator/beam/erl_thr_progress.h | 29 ++++++++---- erts/emulator/beam/erl_trace.c | 15 ++++--- erts/emulator/sys/common/erl_check_io.c | 8 ++-- erts/emulator/sys/common/erl_check_io.h | 5 ++- erts/emulator/sys/common/erl_poll.c | 9 ++-- erts/emulator/sys/common/erl_poll_api.h | 4 +- erts/emulator/sys/win32/erl_poll.c | 7 +-- 11 files changed, 109 insertions(+), 80 deletions(-) diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c index 605a2b3461..44655ad5df 100644 --- a/erts/emulator/beam/erl_async.c +++ b/erts/emulator/beam/erl_async.c @@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q, case ERTS_THR_Q_NEED_THR_PRGR: { ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q); - erts_thr_progress_wakeup(NULL, prgr); + erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr); /* * We do no dequeue finalizing in hope that a new async * job will arrive before we are woken due to thread diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index 57c6c10c7f..ff8b9fd567 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -2358,8 +2358,8 @@ system_cleanup(int flush_async) * The exiting thread might be waiting for * us to block; need to update status... */ - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(erts_thr_prgr_data(NULL), 0); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); } /* Wait forever... */ while (1) diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 0f7f1598fd..919d9f9edf 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -1646,7 +1646,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); } - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1656,7 +1656,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) { awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1670,7 +1670,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val, else { awdp->latest_wakeup = val; awdp->later_op.size = thr_prgr_later_cleanup_op_threshold; - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } } @@ -3066,6 +3066,7 @@ aux_thread(void *unused) ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1); erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; + ErtsThrPrgrData *tpd; int thr_prgr_active = 1; ERTS_MSACC_DECLARE_CACHE(); @@ -3087,12 +3088,12 @@ aux_thread(void *unused) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; #if ERTS_POLL_USE_FALLBACK - ssi->psi = erts_create_pollset_thread(-1); + ssi->psi = erts_create_pollset_thread(-1, tpd); #endif sched_prep_spin_wait(ssi); @@ -3105,11 +3106,11 @@ aux_thread(void *unused) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { @@ -3120,7 +3121,7 @@ aux_thread(void *unused) #endif if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); #if ERTS_POLL_USE_FALLBACK @@ -3136,7 +3137,7 @@ aux_thread(void *unused) } } #else - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); flgs = sched_spin_wait(ssi, 0); @@ -3153,7 +3154,7 @@ aux_thread(void *unused) ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); } } - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); #endif } @@ -3171,7 +3172,8 @@ poll_thread(void *arg) erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; int thr_prgr_active = 1; - struct erts_poll_thread *psi = erts_create_pollset_thread(id); + struct erts_poll_thread *psi; + ErtsThrPrgrData *tpd; ERTS_MSACC_DECLARE_CACHE(); #ifdef ERTS_ENABLE_LOCK_CHECK @@ -3192,9 +3194,12 @@ poll_thread(void *arg) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; + + psi = erts_create_pollset_thread(id, tpd); + ssi->psi = psi; sched_prep_spin_wait(ssi); @@ -3207,16 +3212,16 @@ poll_thread(void *arg) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); flgs = sched_spin_wait(ssi, 0); @@ -3286,13 +3291,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); } if (aux_work) { @@ -3301,7 +3306,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) current_time = erts_get_monotonic_time(esdp); if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -3321,17 +3326,17 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } if (do_timeout) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } else { if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(esdp); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp)); } flgs = sched_spin_wait(ssi, spincount); @@ -3363,7 +3368,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } } if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - erts_thr_progress_finalize_wait(esdp); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp)); } if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) erts_bump_timers(esdp->timer_wheel, current_time); @@ -3392,7 +3397,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (ERTS_SCHEDULER_IS_DIRTY(esdp)) dirty_sched_wall_time_change(esdp, working = 1); else if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } @@ -7565,7 +7570,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (aux_work|evacuate) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), + thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } if (aux_work) @@ -7573,8 +7579,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) aux_work, 1); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (evacuate) { erts_runq_lock(esdp->run_queue); evacuate_run_queue(esdp->run_queue, &sbp); @@ -7593,18 +7599,18 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (!aux_work && current_time < timeout_time) { /* go to sleep... */ if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); suspend_normal_scheduler_sleep(esdp); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL)); current_time = erts_get_monotonic_time(esdp); } if (current_time >= timeout_time) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -7661,7 +7667,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) profile_scheduler(make_small(esdp->no), am_active); if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } @@ -9313,12 +9319,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) } } - leader_update = erts_thr_progress_update(esdp); + leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp)); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); if (aux_work | leader_update) { erts_runq_unlock(rq); if (leader_update) - erts_thr_progress_leader_update(esdp); + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (aux_work) handle_aux_work(&esdp->aux_work_data, aux_work, 0); erts_runq_lock(rq); diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c index aa08eb40ec..20b05dc942 100644 --- a/erts/emulator/beam/erl_thr_progress.c +++ b/erts/emulator/beam/erl_thr_progress.c @@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w) } } +ErtsThrPrgrData *erts_thr_progress_data(void) { + return erts_tsd_get(erts_thr_prgr_data_key__); +} + void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) { @@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) } -void +ErtsThrPrgrData * erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *callbacks, int pref_wakeup) @@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, wakeup_managed(id); } callbacks->finalize_wait(callbacks->arg); + return tpd; } static ERTS_INLINE int @@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd) } int -erts_thr_progress_update(ErtsSchedulerData *esdp) +erts_thr_progress_update(ErtsThrPrgrData *tpd) { - return update(thr_prgr_data(esdp)); + return update(tpd); } int -erts_thr_progress_leader_update(ErtsSchedulerData *esdp) +erts_thr_progress_leader_update(ErtsThrPrgrData *tpd) { - return leader_update(thr_prgr_data(esdp)); + return leader_update(tpd); } void -erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) +erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd) { erts_aint32_t lflgs; - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -889,9 +893,8 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) } void -erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) +erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); ErtsThrPrgrVal current, val; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) } void -erts_thr_progress_active(ErtsSchedulerData *esdp, int on) +erts_thr_progress_active(ErtsThrPrgrData *tpd, int on) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) } void -erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +erts_thr_progress_wakeup(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); + ASSERT(!tpd->is_temporary); if (tpd->is_managed) request_wakeup_managed(tpd, value); diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h index 8329995b24..00a9e61407 100644 --- a/erts/emulator/beam/erl_thr_progress.h +++ b/erts/emulator/beam/erl_thr_progress.h @@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__; void erts_thr_progress_pre_init(void); void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged); -void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, - ErtsThrPrgrCallbacks *, - int); +ErtsThrPrgrData *erts_thr_progress_register_managed_thread( + ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int); void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *); -void erts_thr_progress_active(ErtsSchedulerData *esdp, int on); -void erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +void erts_thr_progress_active(ErtsThrPrgrData *, int on); +void erts_thr_progress_wakeup(ErtsThrPrgrData *, ErtsThrPrgrVal value); -int erts_thr_progress_update(ErtsSchedulerData *esdp); -int erts_thr_progress_leader_update(ErtsSchedulerData *esdp); -void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp); -void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp); +int erts_thr_progress_update(ErtsThrPrgrData *); +int erts_thr_progress_leader_update(ErtsThrPrgrData *); +void erts_thr_progress_prepare_wait(ErtsThrPrgrData *); +void erts_thr_progress_finalize_wait(ErtsThrPrgrData *); ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void); void erts_thr_progress_unmanaged_continue__(int umrefc_ix); +ErtsThrPrgrData *erts_thr_progress_data(void); void erts_thr_progress_dbg_print_state(void); +ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp); + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc); @@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val); #if ERTS_GLB_INLINE_INCL_FUNC_DEF +ERTS_GLB_INLINE ErtsThrPrgrData * +erts_thr_prgr_data(ErtsSchedulerData *esdp) { + if (esdp) { + return &esdp->thr_progress_data; + } else { + return erts_thr_progress_data(); + } +} + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc) { diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index 53a020e7a5..2350d4c02f 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused) { ErtsThrPrgrCallbacks callbacks; ErtsSysMsgQ *local_sys_message_queue = NULL; + ErtsThrPrgrData *tpd; int wait = 0; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused) callbacks.wait = sys_msg_dispatcher_wait; callbacks.finalize_wait = sys_msg_dispatcher_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); while (1) { int end_wait = 0; @@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused) if (!sys_message_queue) { erts_mtx_unlock(&smq_mtx); end_wait = 1; - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(tpd, 0); + erts_thr_progress_prepare_wait(tpd); erts_mtx_lock(&smq_mtx); } @@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused) erts_mtx_unlock(&smq_mtx); if (end_wait) { - erts_thr_progress_finalize_wait(NULL); - erts_thr_progress_active(NULL, 1); + erts_thr_progress_finalize_wait(tpd); + erts_thr_progress_active(tpd, 1); } /* Send trace messages ... */ @@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused) Process *proc = NULL; Port *port = NULL; - if (erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); #ifdef DEBUG_PRINTOUTS print_msg_type(smqp); diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 9f115706dc..748555165f 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -105,6 +105,7 @@ typedef struct erts_poll_thread { ErtsPollSet *ps; ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; int pollres_len; } ErtsPollThread; @@ -1516,7 +1517,8 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) } ErtsPollThread * -erts_create_pollset_thread(int id) { +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; return psiv+id; } @@ -1538,12 +1540,12 @@ erts_check_io(ErtsPollThread *psi) #if ERTS_POLL_USE_FALLBACK if (psi->ps == get_fallback()) { - poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd); } else #endif { - poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd); } #ifdef ERTS_ENABLE_LOCK_CHECK diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 443ef1264c..16aba8a5f3 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); /** * Create a new poll thread structure that is associated with the number no. * It is the callers responsibility that no is unique. + * + * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler + * @param tpd the thread progress data of the pollset thread */ -struct erts_poll_thread* erts_create_pollset_thread(int no); +struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd); #ifdef ERTS_ENABLE_LOCK_COUNT /** * Toggle lock counting on all check io locks diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index b4d1575ee5..5a5874ddfa 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -75,6 +75,7 @@ # define WANT_NONBLOCKING #endif +#include "erl_thr_progress.h" #include "erl_poll.h" #if ERTS_POLL_USE_KQUEUE # include @@ -95,7 +96,6 @@ # include # endif #endif -#include "erl_thr_progress.h" #include "erl_driver.h" #include "erl_alloc.h" #include "erl_msacc.h" @@ -1629,7 +1629,8 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd) { int res, no_fds, used_fds = 0; int ebadf = 0; @@ -1659,7 +1660,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); if (do_wait) { - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); } @@ -1703,7 +1704,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, } if (do_wait) { - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_UPDATE_CACHE(); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h index 1170a549b9..f35f64a9f3 100644 --- a/erts/emulator/sys/common/erl_poll_api.h +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -72,11 +72,13 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, * @param res an array of fd results that the ready fds are put in. * @param[in] length the length of the res array * @param[out] length the number of ready events returned in res + * @param tpd the thread progress data to note sleep state in * @return 0 on success, else the ERRNO of the error that happened. */ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd res[], - int *length); + int *length, + ErtsThrPrgrData *tpd); /** * Interrupt the thread waiting in the pollset. This function should be called * with set = 0 before any thread calls erts_poll_wait in order to clear any diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 39bb4d515e..5d832f4d34 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1017,7 +1017,8 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps, int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd) { int no_fds; DWORD timeout = INFINITE; @@ -1056,10 +1057,10 @@ int erts_poll_wait(ErtsPollSet *ps, HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout)); ERTS_POLLSET_UNLOCK(ps); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_POP_STATE(); ERTS_POLLSET_LOCK(ps); HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout)); -- cgit v1.2.3 From 4047f7177835928f2205fb728c65247ea68d5d59 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 11 Oct 2018 16:27:34 +0200 Subject: erts: Make thr prgr wakeup current or sched 1 Before this change, it was always the aux thread that was woken to handle thread progress events scheduled to happen when all schedulers were going to sleep. This was ok in the pre-OTP-21 implementation when the aux thread just slept on a tse. Now that it sleeps in the fallback pollset this uses too much cpu so instead we wake the thread that is doing the request if it is a managed thread, or else we wake scheduler 1. --- erts/emulator/beam/erl_thr_progress.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c index 20b05dc942..bac437efe9 100644 --- a/erts/emulator/beam/erl_thr_progress.c +++ b/erts/emulator/beam/erl_thr_progress.c @@ -801,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd) == ERTS_THR_PRGR_LFLG_NO_LEADER)) && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } } @@ -888,7 +888,7 @@ erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd) == ERTS_THR_PRGR_LFLG_NO_LEADER && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } @@ -975,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle) == (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM) && got_sched_wakeups()) { /* Others waiting for us... */ - wakeup_managed(0); + wakeup_managed(1); } } } -- cgit v1.2.3 From 33324fd140998f36698145d2eea7e8722c044740 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 10 Oct 2018 10:42:11 +0200 Subject: erts: Add pre-alloc to ALLOC msacc state OTP-15450 --- erts/emulator/beam/erl_sched_spec_pre_alloc.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index b119c59ab3..74cc966cbe 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) erts_sspa_chunk_t *chnk; erts_sspa_chunk_header_t *chdr; erts_sspa_blk_t *res; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC); chnk = erts_sspa_cix2chunk(data, cix); chdr = &chnk->aligned.header; @@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) chdr->local.last = NULL; ERTS_SSPA_DBG_CHK_LCL(chdr); } - if (chdr->local.cnt <= chdr->local.lim) - return (char *) erts_sspa_process_remote_frees(chdr, res); + if (chdr->local.cnt <= chdr->local.lim) { + res = erts_sspa_process_remote_frees(chdr, res); + ERTS_MSACC_POP_STATE_M_X(); + return (char*) res; + } else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS) chdr->head.no_thr_progress_check++; ASSERT(res); + ERTS_MSACC_POP_STATE_M_X(); return (char *) res; } -- cgit v1.2.3 From 19e7675913a0c244231344d4d40db447a0bb7ef1 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 12 Oct 2018 10:17:18 +0200 Subject: erts: Add erts_io_notify_port_task_executed to check_io msacc state OTP-15450 --- erts/emulator/sys/common/erl_check_io.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 748555165f..1444cee805 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -431,6 +431,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + erts_mtx_lock(mtx); state = get_drv_ev_state(fd); @@ -486,6 +488,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, free_drv_select_data(free_select); if (free_nif) free_nif_select_data(free_nif); + + ERTS_MSACC_POP_STATE_M_X(); } static ERTS_INLINE void -- cgit v1.2.3 From f1579990145efb24ea8f2f9a6744c8912561a922 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 12 Oct 2018 10:08:28 +0200 Subject: erts: Move all inet tcp CONNECTED timers to multi timer --- erts/emulator/drivers/common/inet_drv.c | 133 ++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 51 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 259a27cf57..222b369a3f 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -1010,6 +1010,9 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, ErlDrvData data); static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p); +static void cancel_multi_timer(MultiTimerData **first, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller); static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port); @@ -9779,15 +9782,14 @@ static void tcp_close_check(tcp_descriptor* desc) driver_demonitor_process(desc->inet.port, &monitor); send_async_error(desc->inet.dport, id, caller, am_closed); } - clean_multi_timers(&(desc->mtd), desc->inet.port); } - else if (desc->inet.state == INET_STATE_CONNECTING) { async_error_am(INETP(desc), am_closed); } else if (desc->inet.state == INET_STATE_CONNECTED) { async_error_am_all(INETP(desc), am_closed); } + clean_multi_timers(&(desc->mtd), desc->inet.port); } /* @@ -9830,6 +9832,15 @@ static void tcp_desc_close(tcp_descriptor* desc) erl_inet_close(INETP(desc)); } +static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(!desc->inet.active); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); + desc->i_remain = 0; + async_error_am(INETP(desc), am_timeout); +} + /* TCP requests from Erlang */ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, ErlDrvSizeT len, @@ -10117,7 +10128,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, async_error_am(INETP(desc), am_timeout); else { if (timeout != INET_INFINITY) - driver_set_timer(desc->inet.port, timeout); + add_multi_timer(&(desc->mtd), INETP(desc)->port, 0, + timeout, &tcp_inet_recv_timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); else @@ -10223,12 +10235,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, } +static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(IS_BUSY(INETP(desc))); + ASSERT(desc->busy_on_send); + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + desc->busy_on_send = 0; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_timeout); + if (desc->send_timeout_close) { + tcp_desc_close(desc); + } +} + /* ** tcp_inet_timeout: ** called when timer expire: ** TCP socket may be: ** -** a) receiving -- deselect +** a) receiving -- send timeout ** b) connecting -- close socket ** c) accepting -- reset listener ** @@ -10244,24 +10271,7 @@ static void tcp_inet_timeout(ErlDrvData e) if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */ fire_multi_timers(&(desc->mtd), desc->inet.port, e); } else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) { - if (desc->busy_on_send) { - ASSERT(IS_BUSY(INETP(desc))); - desc->inet.caller = desc->inet.busy_caller; - desc->inet.state &= ~INET_F_BUSY; - desc->busy_on_send = 0; - set_busy_port(desc->inet.port, 0); - inet_reply_error_am(INETP(desc), am_timeout); - if (desc->send_timeout_close) { - tcp_desc_close(desc); - } - } - else { - /* assume recv timeout */ - ASSERT(!desc->inet.active); - sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); - desc->i_remain = 0; - async_error_am(INETP(desc), am_timeout); - } + fire_multi_timers(&(desc->mtd), desc->inet.port, e); } else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) { /* assume connect timeout */ @@ -10429,7 +10439,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port)); } @@ -10444,9 +10454,10 @@ static int tcp_recv_closed(tcp_descriptor* desc) */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; } + if (!desc->inet.active) { - /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + /* We must cancel any timer here ! */ + clean_multi_timers(&(desc->mtd), INETP(desc)->port); /* passive mode do not terminate port ! */ tcp_clear_input(desc); if (desc->inet.exitf) { @@ -10481,7 +10492,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; @@ -10490,7 +10501,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) } if (!desc->inet.active) { /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + clean_multi_timers(&(desc->mtd), INETP(desc)->port); tcp_clear_input(desc); if (desc->inet.exitf) { tcp_desc_close(desc); @@ -10595,13 +10606,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len) if (len == 0) { /* empty buffer or waiting for more input */ if ((desc->i_buf == NULL) || (desc->i_remain > 0)) - return count; + return 0; if ((n = tcp_remain(desc, &len)) != 0) { if (n < 0) /* packet error */ return n; if (len > 0) /* more data pending */ desc->i_remain = len; - return count; + return 0; } } @@ -10653,9 +10664,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len) len = 0; if (!desc->inet.active) { - if (!desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - } + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_recv_timeout); sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); if (desc->i_buf != NULL) tcp_restart_input(desc); @@ -10681,7 +10690,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int len; int nread; - if (desc->i_buf == NULL) { /* allocte a read buffer */ + if (desc->i_buf == NULL) { /* allocate a read buffer */ int sz = (request_len > 0) ? request_len : desc->inet.bufsz; if ((desc->i_buf = alloc_buffer(sz)) == NULL) @@ -10754,10 +10763,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start); } else { - if ((nread = tcp_remain(desc, &len)) < 0) + nread = tcp_remain(desc, &len); + if (nread < 0) return tcp_recv_error(desc, EMSGSIZE); else if (nread == 0) - return tcp_deliver(desc, len); + return tcp_deliver(desc, len); else if (len > 0) desc->i_remain = len; /* set remain */ } @@ -11135,8 +11145,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) if (IS_BUSY(INETP(desc))) { desc->inet.caller = desc->inet.busy_caller; if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - desc->busy_on_send = 0; + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); + desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; set_busy_port(desc->inet.port, 0); @@ -11288,7 +11298,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(&(desc->mtd), INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11386,7 +11398,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(&(desc->mtd), INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11490,7 +11504,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) { /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, + &tcp_inet_send_timeout); desc->busy_on_send = 0; } @@ -11822,7 +11837,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) set_busy_port(desc->inet.port, 0); /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } inet_reply_ok(INETP(desc)); @@ -12634,7 +12649,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) udesc->i_buf = NULL; if (!desc->active) { async_error(desc, err); - driver_cancel_timer(desc->port); + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); } else { @@ -12723,7 +12738,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) return count; count++; if (!desc->active) { - driver_cancel_timer(desc->port); /* possibly cancel */ + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); return count; /* passive mode (read one packet only) */ } @@ -12806,26 +12821,28 @@ static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, ErlDrvData data) { ErlDrvTime next_timeout; - if (!*first) { + MultiTimerData *curr = *first; + if (!curr) { ASSERT(0); return; } #ifdef DEBUG { ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC); - ASSERT(chk >= (*first)->when); + ASSERT(chk >= curr->when); } #endif do { - MultiTimerData *save = *first; - *first = save->next; + MultiTimerData *save = curr; + *first = save->next; + curr = *first; (*(save->timeout_function))(data,save->caller); FREE(save); - if (*first == NULL) { + if (curr == NULL) { return; } - (*first)->prev = NULL; - next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + curr->prev = NULL; + next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC); } while (next_timeout <= 0); driver_set_timer(port, (unsigned long) next_timeout); } @@ -12862,6 +12879,20 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim FREE(p); } +/* Cancel a timer based on the timeout_fun */ +static void cancel_multi_timer(MultiTimerData **first, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)) +{ + MultiTimerData *timer = *first; + while(timer && timer->timeout_function != timeout_fun) { + timer = timer->next; + } + if (timer) { + remove_multi_timer(first, port, timer); + } +} + static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, ErlDrvTermData caller, unsigned timeout, void (*timeout_fun)(ErlDrvData drv_data, @@ -12869,7 +12900,7 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, { MultiTimerData *mtd, *p, *s; mtd = ALLOC(sizeof(MultiTimerData)); - mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1; + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout); mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; -- cgit v1.2.3 From 4f68b86b22806fb052def64926ae7340facaed53 Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Tue, 16 Oct 2018 17:24:55 +0200 Subject: erts: Optimize the inet driver multi timers for one timer The most common case for any socket is to have zero or one timer, so we optimize for the one case. The only case when we have more than one timer is when the multi accept feature is used. --- erts/emulator/drivers/common/inet_drv.c | 146 ++++++++++++++++++++------------ 1 file changed, 90 insertions(+), 56 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 222b369a3f..8a7cb7e6fa 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -38,6 +38,7 @@ #include #include #include +#include #define IDENTITY(c) c #define STRINGIFY_1(b) IDENTITY(#b) @@ -954,6 +955,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #endif #endif +typedef struct _tcp_descriptor tcp_descriptor; #define BIN_REALLOC_MARGIN(x) ((x)/4) /* 25% */ @@ -1003,19 +1005,19 @@ typedef struct _multi_timer_data { struct _multi_timer_data *prev; } MultiTimerData; -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, - ErlDrvTermData caller, unsigned timeout, - void (*timeout_fun)(ErlDrvData drv_data, - ErlDrvTermData caller)); -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + ErlDrvTermData caller, unsigned timeout, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data); -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p); -static void cancel_multi_timer(MultiTimerData **first, ErlDrvPort port, +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p); +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, void (*timeout_fun)(ErlDrvData drv_data, ErlDrvTermData caller)); static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller); -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port); +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port); typedef struct { int id; /* id used to identify reply */ @@ -1274,7 +1276,7 @@ static struct erl_drv_entry sctp_inet_driver_entry = }; #endif -typedef struct { +struct _tcp_descriptor { inet_descriptor inet; /* common data structure (DON'T MOVE) */ int high; /* high watermark */ int low; /* low watermark */ @@ -1290,7 +1292,8 @@ typedef struct { int http_state; /* 0 = response|request 1=headers fields */ inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */ inet_async_multi_op *multi_last; - MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd_cache; /* A cache for timer allocations */ #ifdef HAVE_SENDFILE struct { ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time @@ -1306,7 +1309,7 @@ typedef struct { Uint64 length; } sendfile; #endif -} tcp_descriptor; +}; /* send function */ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len); @@ -9679,6 +9682,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args) desc->tcp_add_flags = 0; desc->http_state = 0; desc->mtd = NULL; + desc->mtd_cache = NULL; desc->multi_first = desc->multi_last = NULL; DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port)); return (ErlDrvData) desc; @@ -9789,7 +9793,7 @@ static void tcp_close_check(tcp_descriptor* desc) else if (desc->inet.state == INET_STATE_CONNECTED) { async_error_am_all(INETP(desc), am_closed); } - clean_multi_timers(&(desc->mtd), desc->inet.port); + clean_multi_timers(desc, desc->inet.port); } /* @@ -10011,12 +10015,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, if (time_left <= 0) { time_left = 1; } - omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller, + omtd = add_multi_timer(desc, desc->inet.port, ocaller, time_left, &tcp_inet_multi_timeout); } enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor); if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10031,7 +10035,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_xerror("noproc", rbuf, rsize); } if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10128,7 +10132,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, async_error_am(INETP(desc), am_timeout); else { if (timeout != INET_INFINITY) - add_multi_timer(&(desc->mtd), INETP(desc)->port, 0, + add_multi_timer(desc, INETP(desc)->port, 0, timeout, &tcp_inet_recv_timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); @@ -10269,9 +10273,9 @@ static void tcp_inet_timeout(ErlDrvData e) DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n", (long)desc->inet.port, desc->inet.s)); if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */ - fire_multi_timers(&(desc->mtd), desc->inet.port, e); + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) { - fire_multi_timers(&(desc->mtd), desc->inet.port, e); + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) { /* assume connect timeout */ @@ -10401,7 +10405,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) return; } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } if (desc->multi_first == NULL) { sock_select(INETP(desc),FD_ACCEPT,0); @@ -10439,7 +10443,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port)); } @@ -10457,7 +10461,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) if (!desc->inet.active) { /* We must cancel any timer here ! */ - clean_multi_timers(&(desc->mtd), INETP(desc)->port); + clean_multi_timers(desc, INETP(desc)->port); /* passive mode do not terminate port ! */ tcp_clear_input(desc); if (desc->inet.exitf) { @@ -10492,7 +10496,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; @@ -10501,7 +10505,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) } if (!desc->inet.active) { /* We must cancel any timer here ! */ - clean_multi_timers(&(desc->mtd), INETP(desc)->port); + clean_multi_timers(desc, INETP(desc)->port); tcp_clear_input(desc); if (desc->inet.exitf) { tcp_desc_close(desc); @@ -10664,7 +10668,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len) len = 0; if (!desc->inet.active) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_recv_timeout); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout); sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); if (desc->i_buf != NULL) tcp_restart_input(desc); @@ -11086,7 +11090,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } driver_demonitor_process(desc->inet.port, &monitor); @@ -11145,7 +11149,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) if (IS_BUSY(INETP(desc))) { desc->inet.caller = desc->inet.busy_caller; if (desc->busy_on_send) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; @@ -11298,7 +11302,7 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - add_multi_timer(&(desc->mtd), INETP(desc)->port, + add_multi_timer(desc, INETP(desc)->port, 0 /* arg */, desc->send_timeout /* timeout */, &tcp_inet_send_timeout); } @@ -11398,7 +11402,7 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - add_multi_timer(&(desc->mtd), INETP(desc)->port, + add_multi_timer(desc, INETP(desc)->port, 0 /* arg */, desc->send_timeout /* timeout */, &tcp_inet_send_timeout); } @@ -11504,7 +11508,7 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) { /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } @@ -11837,7 +11841,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) set_busy_port(desc->inet.port, 0); /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - cancel_multi_timer(&(desc->mtd), INETP(desc)->port, &tcp_inet_send_timeout); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } inet_reply_ok(INETP(desc)); @@ -12817,11 +12821,11 @@ make_noninheritable_handle(SOCKET s) * Multi-timers */ -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data) { ErlDrvTime next_timeout; - MultiTimerData *curr = *first; + MultiTimerData *curr = desc->mtd; if (!curr) { ASSERT(0); return; @@ -12834,40 +12838,52 @@ static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, #endif do { MultiTimerData *save = curr; - *first = save->next; - curr = *first; + (*(save->timeout_function))(data,save->caller); - FREE(save); + + curr = curr->next; + + if (desc->mtd_cache == NULL) + desc->mtd_cache = save; + else + FREE(save); + if (curr == NULL) { + desc->mtd = NULL; return; } curr->prev = NULL; next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC); } while (next_timeout <= 0); + desc->mtd = curr; driver_set_timer(port, (unsigned long) next_timeout); } -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port) { - MultiTimerData *p; - if (*first) { + if (desc->mtd) { driver_cancel_timer(port); } - while (*first) { - p = *first; - *first = p->next; - FREE(p); + while (desc->mtd) { + MultiTimerData *p = desc->mtd; + desc->mtd = p->next; + FREE(p); + } + desc->mtd = NULL; + if (desc->mtd_cache) { + FREE(desc->mtd_cache); + desc->mtd_cache = NULL; } } -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p) +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p) { if (p->prev != NULL) { p->prev->next = p->next; } else { driver_cancel_timer(port); - *first = p->next; - if (*first) { - ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + desc->mtd = p->next; + if (desc->mtd) { + ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC); if (ntmo < 0) ntmo = 0; driver_set_timer(port, (unsigned long) ntmo); @@ -12876,50 +12892,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim if (p->next != NULL) { p->next->prev = p->prev; } - FREE(p); + if (desc->mtd_cache == NULL) + desc->mtd_cache = p; + else + FREE(p); } /* Cancel a timer based on the timeout_fun */ -static void cancel_multi_timer(MultiTimerData **first, ErlDrvPort port, +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, void (*timeout_fun)(ErlDrvData drv_data, ErlDrvTermData caller)) { - MultiTimerData *timer = *first; + MultiTimerData *timer = desc->mtd; while(timer && timer->timeout_function != timeout_fun) { timer = timer->next; } if (timer) { - remove_multi_timer(first, port, timer); + remove_multi_timer(desc, port, timer); } } -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, ErlDrvTermData caller, unsigned timeout, void (*timeout_fun)(ErlDrvData drv_data, ErlDrvTermData caller)) { MultiTimerData *mtd, *p, *s; - mtd = ALLOC(sizeof(MultiTimerData)); - mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout); + + /* Use cached timer if available */ + if (desc->mtd_cache != NULL) { + mtd = desc->mtd_cache; + desc->mtd_cache = NULL; + } else + mtd = ALLOC(sizeof(MultiTimerData)); + + if (timeout) + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout); + else + mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */ + mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; - for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { + + /* Find correct slot in timer linked list */ + for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) { if (p->when >= mtd->when) { break; } } + /* Insert in linked list */ if (!p) { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; } } else { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; @@ -12927,6 +12960,7 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, mtd->next = p; p->prev = mtd; } + /* Possibly set new timer */ if (!s) { if (mtd->next) { driver_cancel_timer(port); -- cgit v1.2.3 From d4440ce1793638f8daca35a32936d3005cc3be7a Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 18 Oct 2018 10:25:31 +0200 Subject: erts: Optimize driver_set_timer(0) to fire at once By optimizing driver_set_timer for 0 we can use it instead of select to do a yield in the driver. Use full for delay_send in the inet driver. OTP-15472 --- erts/emulator/beam/erl_hl_timer.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c index 6ec6f8065e..ef7a55fa38 100644 --- a/erts/emulator/beam/erl_hl_timer.c +++ b/erts/emulator/beam/erl_hl_timer.c @@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo) check_canceled_queue(esdp, esdp->timer_service); - timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); - - create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC - ? create_tw_timer - : create_hl_timer); - tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, - (void *) c_prt, c_prt->common.id, - THE_NON_VALUE, NULL, NULL, NULL); - erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + if (tmo == 0) { + erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT); + erts_port_task_schedule(c_prt->common.id, + &c_prt->timeout_task, + ERTS_PORT_TASK_TIMEOUT); + } else { + + timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); + + create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC + ? create_tw_timer + : create_hl_timer); + tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, + (void *) c_prt, c_prt->common.id, + THE_NON_VALUE, NULL, NULL, NULL); + erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + } } void -- cgit v1.2.3 From a2d9a51091578da0ef41fbd91b281091d20aca2b Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 10 Oct 2018 16:19:02 +0200 Subject: erts: Implement delay_send using timer instead of poll The previous implementation uses a round-trip in the poll-set to simulate a yield of the port context. With the poll thread implementation this is no longer a good idea as it generated a lot more work for the system. So this commit changes the implementation to use a timer instead. OTP-15471 --- erts/emulator/drivers/common/inet_drv.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index 8a7cb7e6fa..627e1c8ec2 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -11250,6 +11250,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err) return tcp_send_or_shutdown_error(desc, err); } +static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy) +{ + tcp_descriptor *desc = (tcp_descriptor*)data; + (void)tcp_inet_output(desc, INETP(desc)->s); +} + /* ** Send non-blocking vector data */ @@ -11319,7 +11325,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) INETP(desc)->is_ignored |= INET_IGNORE_WRITE; n = 0; } else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { - n = 0; + driver_enqv(ix, ev, 0); + add_multi_timer(desc, INETP(desc)->port, 0, + 0, &tcp_inet_delay_send); + return 0; } else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov, vsize, &n, 0))) { if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { @@ -11816,6 +11825,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) #ifdef __WIN32__ desc->inet.send_would_block = 1; #endif + /* If DELAY_SEND is set ready_output may have + been called without doing select so we do + a select in order to get into the correct + state */ + if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) + sock_select(INETP(desc), FD_WRITE, 1); goto done; } else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning 0 when sending packets with @@ -12962,9 +12977,6 @@ static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, } /* Possibly set new timer */ if (!s) { - if (mtd->next) { - driver_cancel_timer(port); - } driver_set_timer(port,timeout); } return mtd; -- cgit v1.2.3 From 3e6890af1d8d5ec9e7ae51bf3ed439906d43905b Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Thu, 18 Oct 2018 12:07:05 +0200 Subject: erts: Allow code_model_small to be set in xcomp setting OTP-15473 --- HOWTO/INSTALL-CROSS.md | 3 +++ erts/configure.in | 6 +++++- xcomp/erl-xcomp-vars.sh | 2 +- xcomp/erl-xcomp.conf.template | 5 +++++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/HOWTO/INSTALL-CROSS.md b/HOWTO/INSTALL-CROSS.md index 3796bf8a59..0fb58cd4af 100644 --- a/HOWTO/INSTALL-CROSS.md +++ b/HOWTO/INSTALL-CROSS.md @@ -521,6 +521,9 @@ When a variable has been set, no warning will be issued. `posix_memalign` implementation that accepts larger than page size alignment. +* `erl_xcomp_code_model_small` - `yes|no`. Default to `no`. If `yes`, the target + system must place the beam.smp executable in the lower 2 GB of memory. That is it + should not use position independent executable. [$ERL_TOP/HOWTO/INSTALL.md]: INSTALL.md diff --git a/erts/configure.in b/erts/configure.in index bcdc6cd083..1b6d187bcb 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -3156,7 +3156,11 @@ AC_TRY_RUN([ ], erl_code_model_small=yes, erl_code_model_small=no, -erl_code_model_small=no) +[case X$erl_xcomp_code_model_small in + X) erl_code_model_small=no;; + Xyes|Xno) erl_code_model_small=$erl_xcomp_code_model_small;; + *) AC_MSG_ERROR([Bad erl_xcomp_code_model_small value: $erl_xcomp_code_model_small]);; + esac]) AC_MSG_RESULT([$erl_code_model_small]) LDFLAGS="$saved_LDFLAGS" case $erl_code_model_small in diff --git a/xcomp/erl-xcomp-vars.sh b/xcomp/erl-xcomp-vars.sh index e864f7b96b..0f168e1aa4 100644 --- a/xcomp/erl-xcomp-vars.sh +++ b/xcomp/erl-xcomp-vars.sh @@ -27,4 +27,4 @@ # and precious variables in $ERL_TOP/erts/aclocal.m4. # -erl_xcomp_vars="erl_xcomp_sysroot erl_xcomp_isysroot erl_xcomp_bigendian erl_xcomp_double_middle_endian erl_xcomp_linux_clock_gettime_correction erl_xcomp_linux_nptl erl_xcomp_linux_usable_sigusrx erl_xcomp_linux_usable_sigaltstack erl_xcomp_poll erl_xcomp_kqueue erl_xcomp_putenv_copy erl_xcomp_reliable_fpe erl_xcomp_getaddrinfo erl_xcomp_gethrvtime_procfs_ioctl erl_xcomp_clock_gettime_cpu_time erl_xcomp_after_morecore_hook erl_xcomp_dlsym_brk_wrappers erl_xcomp_posix_memalign" +erl_xcomp_vars="erl_xcomp_sysroot erl_xcomp_isysroot erl_xcomp_bigendian erl_xcomp_double_middle_endian erl_xcomp_linux_clock_gettime_correction erl_xcomp_linux_nptl erl_xcomp_linux_usable_sigusrx erl_xcomp_linux_usable_sigaltstack erl_xcomp_poll erl_xcomp_kqueue erl_xcomp_putenv_copy erl_xcomp_reliable_fpe erl_xcomp_getaddrinfo erl_xcomp_gethrvtime_procfs_ioctl erl_xcomp_clock_gettime_cpu_time erl_xcomp_after_morecore_hook erl_xcomp_dlsym_brk_wrappers erl_xcomp_posix_memalign erl_xcomp_code_model_small" diff --git a/xcomp/erl-xcomp.conf.template b/xcomp/erl-xcomp.conf.template index e9bbebb960..8ac187ad69 100644 --- a/xcomp/erl-xcomp.conf.template +++ b/xcomp/erl-xcomp.conf.template @@ -271,4 +271,9 @@ # alignment. #erl_xcomp_posix_memalign= +# * `erl_xcomp_code_model_small` - `yes|no`. Default to `no`. If `yes`, the target +# system must place the beam.smp executable in the lower 2 GB of memory. That is it +# should not use position independent executable. +#erl_xcomp_code_model_small= + ## ----------------------------------------------------------------------------- -- cgit v1.2.3 From 790aa06d380b5842fe2fdd984bdde4f160b3adaa Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Wed, 24 Oct 2018 17:08:03 +0200 Subject: erts: Fix lists_member_2 reduction count OTP-15474 --- erts/emulator/beam/erl_bif_lists.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 395be67a90..2a2b94c831 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -249,7 +249,8 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2) Eterm list; Eterm item; int non_immed_key; - int max_iter = 10 * CONTEXT_REDS; + int reds_left = ERTS_BIF_REDS_LEFT(BIF_P); + int max_iter = 16 * reds_left; if (is_nil(BIF_ARG_2)) { BIF_RET(am_false); @@ -267,14 +268,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2) } item = CAR(list_val(list)); if ((item == term) || (non_immed_key && eq(item, term))) { - BIF_RET2(am_true, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_true, reds_left - max_iter/16); } list = CDR(list_val(list)); } if (is_not_nil(list)) { + BUMP_REDS(BIF_P, reds_left - max_iter/16); BIF_ERROR(BIF_P, BADARG); } - BIF_RET2(am_false, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_false, reds_left - max_iter/16); } static BIF_RETTYPE lists_reverse_alloc(Process *c_p, -- cgit v1.2.3 From c6498571109b524fb319300e1b177b942e556f1b Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Fri, 12 Oct 2018 18:16:17 +0200 Subject: erts: Move fds with active true behaviour to own pollset At start of the VM a poll-set that the schedulers will check is created where fds that have triggered many (at the moment, many means 10) times without being deselected inbetween. In this scheduler specific poll-set fds do not use ONESHOT, which means that the number of syscalls goes down dramatically for such fds. This pollset is introduced in order to handle fds that are used by the erlang distribution and that never change their state from {active, true}. This pollset only handles ready_input events, ready_output is still handled by the poll threads. During overload, polling the scheduler poll-set is done on a 10ms timer. --- erts/emulator/beam/erl_port.h | 2 + erts/emulator/beam/erl_port_task.c | 81 ++++- erts/emulator/beam/erl_port_task.h | 21 +- erts/emulator/beam/erl_process.c | 122 +++++++- erts/emulator/beam/erl_process.h | 4 +- erts/emulator/sys/common/erl_check_io.c | 310 ++++++++++++++------ erts/emulator/sys/common/erl_check_io.h | 16 +- erts/emulator/sys/common/erl_poll.c | 504 ++++++++++++++++++++++++-------- erts/emulator/sys/common/erl_poll.h | 14 +- erts/emulator/sys/common/erl_poll_api.h | 4 +- erts/emulator/sys/win32/erl_poll.c | 5 +- erts/emulator/test/scheduler_SUITE.erl | 27 +- erts/emulator/test/signal_SUITE.erl | 2 +- 13 files changed, 843 insertions(+), 269 deletions(-) diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h index 2be0a5bf74..25976d38cc 100644 --- a/erts/emulator/beam/erl_port.h +++ b/erts/emulator/beam/erl_port.h @@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p); #define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11)) /* Last port to terminate halts the emulator */ #define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12)) +/* Check if the event in ready_input should be cleaned */ +#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13)) #ifdef DEBUG /* Only debug: make sure all flags aren't cleared unintentionally */ #define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31)) diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 4928d80f27..c8f2e88127 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q typedef union { struct { /* I/O tasks */ ErlDrvEvent event; +#if ERTS_POLL_USE_SCHEDULER_POLLING + int is_scheduler_event; +#endif } io; struct { ErtsProc2PortSigCallback callback; @@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ { ErtsPortTaskBusyCaller pre_alloc_busy_caller; }; +#if ERTS_POLL_USE_SCHEDULER_POLLING +erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif static void begin_port_cleanup(Port *pp, ErtsPortTask **execq, @@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp) } static ERTS_INLINE void -reset_executed_io_task_handle(ErtsPortTask *ptp) +reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp) { if (ptp->u.alive.handle) { ASSERT(ptp == handle2task(ptp->u.alive.handle)); - /* The port task handle is reset inside task_executed */ - erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, - reset_port_task_handle); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) { + if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) { + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP); + } else { + reset_port_task_handle(ptp->u.alive.handle); + } + } else +#endif + { + /* The port task handle is reset inside task_executed */ + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + } } } @@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp) res = - 1; /* Task already aborted, executing, or executed */ else { reset_port_task_handle(pthp); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + switch (ptp->type) { + case ERTS_PORT_TASK_INPUT: + case ERTS_PORT_TASK_OUTPUT: + if (ptp->u.alive.td.io.is_scheduler_event) { + ASSERT(erts_atomic_read_nob( + &erts_port_task_outstanding_io_tasks) > 0); + erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); + } + break; + default: + break; + } +#endif + res = 0; } } @@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id, va_list argp; va_start(argp, type); ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent); +#if ERTS_POLL_USE_SCHEDULER_POLLING + ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int); +#endif va_end(argp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks); +#endif break; } case ERTS_PORT_TASK_PROC_SIG: { @@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) int processing_busy_q; int vreds = 0; int reds = 0; - erts_aint_t io_tasks_executed = 0; int fpe_was_unmasked; erts_aint32_t state; int active; Uint64 start_time = 0; ErtsSchedulerData *esdp = runq->scheduler; +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_aint_t io_tasks_executed = 0; +#endif ERTS_MSACC_PUSH_STATE_M(); ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); @@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) for input and output */ (*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_OUTPUT: reds = ERTS_PORT_REDS_OUTPUT; @@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) LTTNG_DRIVER(driver_ready_output, pp); (*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_PROC_SIG: { ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data; @@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) erts_unblock_fpe(fpe_was_unmasked); ERTS_MSACC_POP_STATE_M(); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (io_tasks_executed) { + ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) + >= io_tasks_executed); + erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks, + -1*io_tasks_executed); + } +#endif + ASSERT(runq == erts_get_runq_port(pp)); active = finalize_exec(pp, &execq, processing_busy_q); @@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq) void erts_port_task_init(void) { +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks, + (erts_aint_t) 0); +#endif init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads + 1); /* aux_thread */ init_busy_caller_table_alloc(); diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h index ae78a7d8a3..ca5183b305 100644 --- a/erts/emulator/beam/erl_port_task.h +++ b/erts/emulator/beam/erl_port_task.h @@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #ifndef ERL_PORT_TASK_H__ #define ERL_PORT_TASK_H__ +#include "erl_poll.h" + #undef ERTS_INCLUDE_SCHEDULER_INTERNALS #if (defined(ERL_PROCESS_C__) \ || defined(ERL_PORT_TASK_C__) \ @@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #define ERTS_PT_FLG_BAD_OUTPUT (1 << 4) typedef enum { - ERTS_PORT_TASK_INPUT, - ERTS_PORT_TASK_OUTPUT, + ERTS_PORT_TASK_INPUT = 0, + ERTS_PORT_TASK_OUTPUT = 1, ERTS_PORT_TASK_TIMEOUT, ERTS_PORT_TASK_DIST_CMD, ERTS_PORT_TASK_PROC_SIG @@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp); +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void); +/* NOTE: Do not access any of the exported variables directly */ +extern erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE void @@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp) erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING); } +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int +erts_port_task_have_outstanding_io_tasks(void) +{ + return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks) + != 0); +} +#endif + #endif #ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 919d9f9edf..2427d87f66 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; typedef struct { int aux_work; int tse; - int sys_schedule; } ErtsBusyWaitParams; static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1]; @@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat; static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key); +#if ERTS_POLL_USE_SCHEDULER_POLLING +static erts_atomic32_t doing_sys_schedule; +#endif static erts_atomic32_t no_empty_run_queues; long erts_runq_supervision_interval = 0; static ethr_event runq_supervision_event; @@ -3093,7 +3095,11 @@ aux_thread(void *unused) awdp->ssi = ssi; #if ERTS_POLL_USE_FALLBACK +#if ERTS_POLL_USE_SCHEDULER_POLLING + ssi->psi = erts_create_pollset_thread(-2, tpd); +#else ssi->psi = erts_create_pollset_thread(-1, tpd); +#endif #endif sched_prep_spin_wait(ssi); @@ -3133,7 +3139,7 @@ aux_thread(void *unused) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(ssi->psi); + erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT); } } #else @@ -3231,7 +3237,7 @@ poll_thread(void *arg) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(psi); + erts_check_io(psi, ERTS_POLL_INF_TIMEOUT); } } } @@ -3241,6 +3247,59 @@ poll_thread(void *arg) return NULL; } +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ERTS_INLINE void +clear_sys_scheduling(void) +{ + erts_atomic32_set_mb(&doing_sys_schedule, 0); +} + +static ERTS_INLINE int +try_set_sys_scheduling(void) +{ + return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0); +} + + +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ + while (!erts_port_task_have_outstanding_io_tasks() + && try_set_sys_scheduling()) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + clear_sys_scheduling(); + } + return 0; +} + +static void +check_io_timer(void *null) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (prepare_for_sys_schedule()) { + erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT); + clear_sys_scheduling(); + } + + /* The timer is cleared if this schedulers run-queue became empty + or if the CHECKIO flag was cleared. The CHECKIO flags is cleared + when a check_balance assigns another scheduler to be the poller in + the overload scenario. */ + if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == ERTS_RUNQ_FLG_CHECKIO) { + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, + check_io_timer, NULL); + } else { + ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO); + } +} + +#else +#define clear_sys_scheduling() +#define prepare_for_sys_schedule() 0 +#endif + static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) { @@ -3330,7 +3389,25 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) sched_wall_time_change(esdp, 1); } } - else { + else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) { + /* We sleep in check_io, only for normal schedulers */ + if (thr_prgr_active) { + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + flgs = sched_spin_wait(ssi, 0); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi, timeout_time); + current_time = erts_get_monotonic_time(esdp); + } + } + clear_sys_scheduling(); + } else { if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (thr_prgr_active) { erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); @@ -3338,7 +3415,6 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp)); } - flgs = sched_spin_wait(ssi, spincount); if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_WAITING); @@ -4585,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq) if (blnc_no_rqs == 1) { c_rq->check_balance_reds = INT_MAX; erts_atomic32_set_nob(&balance_info.checking_balance, 0); +#if ERTS_POLL_USE_SCHEDULER_POLLING + c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == 0) { + ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO); + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO); +#endif return; } @@ -5104,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n"); /* Publish new migration paths... */ erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (full_scheds == current_active) { + ERTS_ASSERT(full_scheds <= current_active); + /* All active schedulers ran for full, we need to do active polling, + so we setup a timer that does active polling */ + if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) { + /* Active polling is not running, start it */ + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO; + } +#endif + /* Reset balance statistics in all online queues */ for (qix = 0; qix < blnc_no_rqs; qix++) { Uint32 flags = run_queue_info[qix].flags; @@ -5113,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n"); ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK)); if (rq->waiting) flags |= ERTS_RUNQ_FLG_OUT_OF_WORK; + if (rq != c_rq) + flags &= ~ERTS_RUNQ_FLG_CHECKIO; rq->full_reds_history_sum = run_queue_info[qix].full_reds_history_sum; @@ -5122,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n"); ERTS_DBG_CHK_FULL_REDS_HISTORY(rq); rq->out_of_work_count = 0; - (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags); - + (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags); rq->max_len = erts_atomic32_read_dirty(&rq->len); for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { ErtsRunQueueInfo *rqi; @@ -5562,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str) return EINVAL; } - params->sys_schedule = sys_sched; params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; params->aux_work = sys_sched * aux_work_fact; @@ -5773,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs; erts_aligned_run_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic32_init_nob(&doing_sys_schedule, 0); +#endif erts_atomic32_init_nob(&no_empty_run_queues, 0); erts_no_run_queues = n; @@ -8302,6 +8403,11 @@ sched_thread_func(void *vesdp) erts_msacc_init_thread("scheduler", no, 1); erts_thr_progress_register_managed_thread(esdp, &callbacks, 0); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); +#endif + erts_alloc_register_scheduler(vesdp); #ifdef ERTS_ENABLE_LOCK_CHECK { diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 8d20ccdf90..a1b029adbe 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size; (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9)) #define ERTS_RUNQ_FLG_HALTING \ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10)) +#define ERTS_RUNQ_FLG_CHECKIO \ + (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11)) -#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11) +#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12) #define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \ (ERTS_RUNQ_FLGS_EMIGRATE_QMASK \ diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 1444cee805..c681fa481f 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -46,11 +46,11 @@ #if 0 #define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) #define DEBUG_PRINT_FD(FMT, STATE, ...) \ - DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ - (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR) + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) #define DEBUG_PRINT_MODE #else #define DEBUG_PRINT(...) @@ -76,22 +76,40 @@ typedef enum { typedef enum { ERTS_EV_FLAG_CLEAR = 0, ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ -#ifdef ERTS_ENABLE_KERNEL_POLL - ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ +#else + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, +#endif +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd and it was put in the nkp version */ #else ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, #endif /* Combinations */ - ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER } EventStateFlags; #define flag2str(flags) \ ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ - ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR")))) + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) /* How many events that can be handled at once by one erts_poll_wait call */ #define ERTS_CHECK_IO_POLL_RES_LEN 512 @@ -113,10 +131,13 @@ typedef struct erts_poll_thread * Which pollset to use is determined by hashing the fd. */ static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; #if ERTS_POLL_USE_FALLBACK static ErtsPollSet *flbk_pollset; #endif -static ErtsPollThread *psiv; +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; +#endif typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -131,10 +152,12 @@ typedef struct { ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ } stop; } driver; - ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents events; /* The events that have been selected upon */ ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; struct drv_ev_state_shared { @@ -371,12 +394,22 @@ get_pollset(ErtsSysFdType fd) #if ERTS_POLL_USE_FALLBACK static ERTS_INLINE ErtsPollSet * -get_fallback(void) +get_fallback_pollset(void) { return flbk_pollset; } #endif +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; +#else + return get_pollset(fd); +#endif +} + /* * Place a fd within a pollset. This will automatically use * the fallback ps if needed. @@ -392,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); if (!(flags & ERTS_EV_FLAG_FALLBACK)) { - res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + } #if ERTS_POLL_USE_FALLBACK if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { /* When an add fails with NVAL, the poll/kevent operation could not put that fd in the pollset, so we instead put it into a fallback pollset */ state->flags |= ERTS_EV_FLAG_FALLBACK; - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); } } else { ASSERT(op != ERTS_POLL_OP_ADD); - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); #endif } @@ -426,7 +468,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); ErtsSysFdType fd = itp->fd; erts_mtx_t *mtx = fd_mtx(fd); - int active_events; + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; ErtsDrvEventState *state; ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; @@ -436,51 +479,66 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, erts_mtx_lock(mtx); state = get_drv_ev_state(fd); + reset_handle(pthp); + active_events = state->active_events; - switch (type) { - case ERTS_PORT_TASK_INPUT: + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: - DEBUG_PRINT_FD("executed ready_input", state); + DEBUG_PRINT_FD("executed ready_output", state); - ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); - if (state->events & ERTS_POLL_EV_IN) - active_events |= ERTS_POLL_EV_IN; - break; - case ERTS_PORT_TASK_OUTPUT: + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } - DEBUG_PRINT_FD("executed ready_output", state); + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } - ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); - if (state->events & ERTS_POLL_EV_OUT) - active_events |= ERTS_POLL_EV_OUT; - break; - default: - erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); - break; + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } - reset_handle(pthp); - - if (active_events) { - /* This is not needed if active_events has not changed */ - if (state->active_events != active_events) { - ErtsPollEvents new_events; - state->active_events = active_events; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events); - - /* We were unable to re-insert the fd into the pollset, signal the callback. */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (active_events & ERTS_POLL_EV_IN) - iready(state->driver.select->inport, state); - if (active_events & ERTS_POLL_EV_OUT) - oready(state->driver.select->outport, state); - state->active_events = 0; - } - } - } else { + if (!active_events) check_fd_cleanup(state, &free_select, &free_nif); - } erts_mtx_unlock(mtx); @@ -760,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { ctl_op = ERTS_POLL_OP_ADD; } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } else { ctl_events &= old_events; state->events &= ~ctl_events; state->active_events &= ~ctl_events; + new_events = state->active_events; + + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } if (!state->events) { if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) @@ -775,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { new_events = erts_io_control_wakeup(state, ctl_op, - state->active_events, + new_events, &wake_poller); ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); @@ -807,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events & ERTS_POLL_EV_IN) { abort_tasks(state, ERL_DRV_READ); state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; } if (ctl_events & ERTS_POLL_EV_OUT) { abort_tasks(state, ERL_DRV_WRITE); @@ -815,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (state->events == 0) { if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); state->flags = 0; } /*else keep it, as fd will probably be selected upon again */ @@ -1431,7 +1503,8 @@ iready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); } else { DEBUG_PRINT_FD("schedule ready_input(%T, %d)", @@ -1449,7 +1522,8 @@ oready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + 0) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); } else { DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); @@ -1511,7 +1585,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) { if (psi) { #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { erts_poll_interrupt_flbk(psi->ps, set); return; } @@ -1527,7 +1601,7 @@ erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { } void -erts_check_io(ErtsPollThread *psi) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { int pollres_len; int poll_ret, i; @@ -1542,14 +1616,14 @@ erts_check_io(ErtsPollThread *psi) pollres_len = psi->pollres_len; #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { - poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } else #endif { - poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd); + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } #ifdef ERTS_ENABLE_LOCK_CHECK @@ -1585,7 +1659,12 @@ erts_check_io(ErtsPollThread *psi) ErtsNifSelectDataState *free_nif = NULL; ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; - ErtsPollEvents revents; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); + + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; erts_mtx_lock(fd_mtx(fd)); @@ -1596,8 +1675,6 @@ erts_check_io(ErtsPollThread *psi) continue; } - revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); if (revents & ERTS_POLL_EV_ERR) { @@ -1609,25 +1686,39 @@ erts_check_io(ErtsPollThread *psi) */ revents = state->active_events; state->active_events = 0; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } } else { /* Disregard any events that are not active at the moment, for instance this could happen if the driver/nif does select/deselect in rapid succession. */ revents &= state->active_events | ERTS_POLL_EV_NVAL; - state->active_events &= ~revents; - /* Reactivate the poll op if there are still active events */ - if (state->active_events) { - ErtsPollEvents new_events; - DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events)); + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; + + reactive_events = state->active_events; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; - /* Unable to re-enable the fd, signal all callbacks */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - revents |= state->active_events; - state->active_events = 0; + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } } } } @@ -1703,7 +1794,7 @@ erts_check_io(ErtsPollThread *psi) case ERTS_EV_TYPE_STOP_USE: { #if ERTS_POLL_USE_FALLBACK - ASSERT(psi->ps == get_fallback()); + ASSERT(psi->ps == get_fallback_pollset()); #endif drv_ptr = state->driver.stop.drv_ptr; state->type = ERTS_EV_TYPE_NONE; @@ -2041,12 +2132,17 @@ erts_init_check_io(int *argc, char **argv) for (j=0; j < erts_no_pollsets; j++) pollsetv[j] = erts_poll_create_pollset(j); -#if ERTS_POLL_USE_FALLBACK - flbk_pollset = erts_poll_create_pollset_flbk(-1); + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; #endif - no_poll_threads = erts_no_poll_threads; #if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); no_poll_threads++; #endif @@ -2056,7 +2152,15 @@ erts_init_check_io(int *argc, char **argv) psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); - psiv[0].ps = get_fallback(); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); psiv++; #endif @@ -2113,7 +2217,12 @@ erts_check_io_size(void) int i; #if ERTS_POLL_USE_FALLBACK - erts_poll_info(get_fallback(), &pi); + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); res += pi.memory_size; #endif @@ -2145,13 +2254,21 @@ erts_check_io_info(void *proc) Uint sz, *szp, *hp, **hpp; ErtsPollInfo *piv; Sint i, j = 0, len; - int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); #if ERTS_POLL_USE_FALLBACK - erts_poll_info_flbk(get_fallback(), &piv[0]); + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); piv[0].poll_threads = 1; piv[0].active_fds = 0; piv++; @@ -2205,6 +2322,7 @@ erts_check_io_info(void *proc) sz = 0; piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; bld_it: @@ -2309,15 +2427,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) static ERTS_INLINE void print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) { - const char* delim = ""; - if(f & ERTS_EV_FLAG_USED) { - erts_dsprintf(dsbufp, "%s","USED"); - delim = "|"; - } - if(f & ERTS_EV_FLAG_FALLBACK) { - erts_dsprintf(dsbufp, "%s%s", delim, "FLBK"); - delim = "|"; - } + erts_dsprintf(dsbufp, "%s", flag2str(f)); } #ifdef DEBUG_PRINT_MODE @@ -2659,13 +2769,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) #if ERTS_POLL_USE_FALLBACK erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); - erts_poll_get_selected_events_flbk(get_fallback(), counters.epep, + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } #endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); for (i = 0; i < erts_no_pollsets; i++) { @@ -2674,8 +2797,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) - && get_pollset_id(fd) == i) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } } for (fd = len ; fd < drv_ev_state.max_fds; fd++) { @@ -2722,7 +2852,7 @@ void erts_lcnt_update_cio_locks(int enable) { #endif #if ERTS_POLL_USE_FALLBACK - erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable); + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); #endif for (i = 0; i < erts_no_pollsets; i++) diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 16aba8a5f3..31182be5ec 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -68,7 +68,7 @@ int erts_check_io_max_files(void); * * @param pt the poll thread structure to use. */ -void erts_check_io(struct erts_poll_thread *pt); +void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time); /** * Initialize the check io framework. This function will parse the arguments * and delete any entries that it is interested in. @@ -129,16 +129,6 @@ extern int erts_no_poll_threads; #include "erl_poll.h" #include "erl_port_task.h" -#ifdef __WIN32__ -/* - * Current erts_poll implementation for Windows cannot handle - * active events in the set of events polled. - */ -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#endif - typedef struct { Eterm inport; Eterm outport; @@ -150,10 +140,6 @@ struct erts_nif_select_event { Eterm pid; Eterm immed; Uint32 refn[ERTS_REF_NUMBERS]; - Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress - * 1: Do deselect before next poll - * >1: Countdown of ignored events - */ }; typedef struct { diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index 5a5874ddfa..51d50933ff 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -121,7 +121,8 @@ /* Define to print info about modifications done to each fd */ #define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) /* Define to print entry and exit from erts_poll_wait (can be very spammy) */ -//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0) #else #define ERTS_POLL_DEBUG_PRINT 0 @@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, #define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE) +#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0) #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; + int oneshot; #endif /* ERTS_POLL_USE_KERNEL_POLL */ #if ERTS_POLL_USE_POLL @@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) { ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; erts_atomic32_t have_update_requests; erts_mtx_t mtx; - erts_atomic32_t wakeup_state; +#else + int do_wakeup; #endif -#if ERTS_POLL_USE_WAKEUP_PIPE - int wake_fds[2]; +#if ERTS_POLL_USE_TIMERFD + int timer_fd; #endif + ErtsMonotonicTime timeout_time; + erts_atomic32_t wakeup_state; + int wake_fds[2]; }; void erts_silence_warn_unused_result(long unused); @@ -365,63 +371,47 @@ static void print_misc_debug_info(void); uint32_t epoll_events(int kp_fd, int fd); #endif - #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 -#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void reset_wakeup_state(ErtsPollSet *ps) { erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } -#endif static ERTS_INLINE int is_woken(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; -#else - return 0; -#endif } static ERTS_INLINE int is_interrupted_reset(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); -#else - return 0; -#endif } static ERTS_INLINE void woke_up(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); -#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ -#if ERTS_POLL_USE_WAKEUP_PIPE - static ERTS_INLINE void wake_poller(ErtsPollSet *ps, int interrupted) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE int wake; erts_aint32_t wakeup_state; if (!interrupted) @@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted) wake = wakeup_state == ERTS_POLL_NOT_WOKEN; if (wake) -#endif { ssize_t res; + DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted); if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ do { @@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps) fd, erl_errno_id(errno), errno); } -#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (intr) erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); -#endif } static void @@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps) ps->wake_fds[1] = wake_fds[1]; } +/* + * --- timer fd ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_TIMERFD + +/* We use the timerfd when using epoll_wait to get high accuracy + timeouts, i.e. we want to sleep with < ms accuracy. */ + +static void +create_timerfd(ErtsPollSet *ps) +{ + int do_wake = 0; + int timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + ERTS_POLL_EXPORT(erts_poll_control)(ps, + timer_fd, + ERTS_POLL_OP_ADD, + ERTS_POLL_EV_IN, + &do_wake); + if (ps->internal_fd_limit <= timer_fd) + ps->internal_fd_limit = timer_fd + 1; + ps->timer_fd = timer_fd; +} + +static ERTS_INLINE void +timerfd_set(ErtsPollSet *ps, struct itimerspec *its) +{ +#ifdef DEBUG + struct itimerspec old_its; + int res; + res = timerfd_settime(ps->timer_fd, 0, its, &old_its); + ASSERT(res == 0); + ASSERT(old_its.it_interval.tv_sec == 0 && + old_its.it_interval.tv_nsec == 0 && + old_its.it_value.tv_sec == 0 && + old_its.it_value.tv_nsec == 0); + +#else + timerfd_settime(ps->timer_fd, 0, its, NULL); #endif +} + +static ERTS_INLINE int +timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) { + + struct itimerspec its; + /* we always have to clear the timer */ + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 0; + timerfd_settime(ps->timer_fd, 0, &its, NULL); + + /* only timeout fd triggered */ + if (res == 1 && pr[0].data.fd == ps->timer_fd) + return 0; + + return res; +} + +#endif /* ERTS_POLL_USE_TIMERFD */ /* * --- Poll set update requests ---------------------------------------------- @@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) struct epoll_event epe_templ; struct epoll_event epe; - epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT; + epe_templ.events = ERTS_POLL_EV_E2N(events); epe_templ.data.fd = fd; + if (ps->oneshot) + epe_templ.events |= EPOLLONESHOT; + #ifdef VALGRIND /* Silence invalid valgrind warning ... */ memset((void *) &epe.data, 0, sizeof(epoll_data_t)); @@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) int res = 0, len = 0; struct kevent evts[2]; struct timespec ts = {0, 0}; + uint32_t oneshot = 0; if (op == ERTS_POLL_OP_ADD) { /* This is a hack to make the "noshell" option work; kqueue can poll @@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) man page), but it seems to be the way it works... */ + if (ps->oneshot) + oneshot = EV_DISPATCH; + if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); /* We could probably skip this delete, do we want to? */ @@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) uint32_t flags; erts_atomic_inc_nob(&ps->no_of_user_fds); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } else { uint32_t flags; ASSERT(op == ERTS_POLL_OP_MOD); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } #else - uint32_t flags = EV_ADD|EV_ONESHOT; + uint32_t flags = EV_ADD; + + if (ps->oneshot) flags |= EV_ONESHOT; if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); @@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); for (i = 0; i < len; i++) { const char *flags = "UNKNOWN"; - if (evts[i].flags == EV_DELETE) flags = "EV_DELETE"; + if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE"; if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; + if (evts[i].flags == (EV_ADD)) flags = "EV_ADD"; #ifdef EV_DISPATCH if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; - if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; #endif keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "", @@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, goto done; } #endif -#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } #endif } @@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ERTS_POLLSET_UNLOCK(ps); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (*do_wake) { + if (*do_wake) wake_poller(ps, 0); - } -#endif return res; } @@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, static ERTS_INLINE int ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; int res = n; -#if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; -#endif - for (i = 0; i < n; i++) { - int fd = ERTS_POLL_RES_GET_FD(&pr[i]); -#ifdef DEBUG_PRINT_MODE - ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); -#endif + if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) { - DEBUG_PRINT_FD("trig %s (%s)", ps, fd, - ev2str(evts), + for (i = 0; i < n; i++) { + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#if ERTS_POLL_DEBUG_PRINT + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); + + if (fd != wake_fd +#if ERTS_POLL_USE_TIMERFD + && fd != ps->timer_fd +#endif + ) + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - "kqueue" + "kqueue" #elif ERTS_POLL_USE_EPOLL - "epoll" + "epoll" #else - "/dev/poll" + "/dev/poll" +#endif + ); #endif - ); -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); - if (n == 1) - return 0; - } + if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } +#if ERTS_POLL_USE_TIMERFD + else if (fd == ps->timer_fd) { + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } #endif #if !ERTS_POLL_USE_CONCURRENT_UPDATE - else { - /* Reset the events to emulate ONESHOT semantics */ - ps->fds_status[fd].events = 0; - enqueue_update_request(ps, fd); - } + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } #endif + } } - return res; -#else - ASSERT(chk_fds_res <= max_res); - return chk_fds_res; -#endif + if (res == 0) + return res; + else + return n; } #else /* !ERTS_POLL_USE_KERNEL_POLL */ @@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, #endif /* !ERTS_POLL_USE_KERNEL_POLL */ +static ERTS_INLINE ErtsMonotonicTime +get_timeout(ErtsPollSet *ps, + int resolution, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout; + + if (timeout_time == ERTS_POLL_NO_TIMEOUT) { + timeout = 0; + } + else if (timeout_time == ERTS_POLL_INF_TIMEOUT) { + timeout = -1; + } + else { + ErtsMonotonicTime diff_time, current_time; + current_time = erts_get_monotonic_time(NULL); + diff_time = timeout_time - current_time; + if (diff_time <= 0) { + timeout = 0; + } + else { + switch (resolution) { + case 1000: + /* Round up to nearest even milli second */ + timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; + if (timeout > (ErtsMonotonicTime) INT_MAX) + timeout = (ErtsMonotonicTime) INT_MAX; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); + break; + case 1000000: + /* Round up to nearest even micro second */ + timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); + break; + case 1000000000: + /* Round up to nearest even nano second */ + timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); + break; + default: + ERTS_INTERNAL_ERROR("Invalid resolution"); + timeout = 0; + break; + } + } + } + return timeout; +} + +#if ERTS_POLL_USE_SELECT + +static ERTS_INLINE int +get_timeout_timeval(ErtsPollSet *ps, + SysTimeval *tvp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000, + timeout_time); + + if (!timeout) { + tvp->tv_sec = 0; + tvp->tv_usec = 0; + + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000); + tvp->tv_sec = sec; + tvp->tv_usec = timeout - sec*(1000*1000); + + ASSERT(tvp->tv_sec >= 0); + ASSERT(tvp->tv_usec >= 0); + ASSERT(tvp->tv_usec < 1000*1000); + + return 1; + } + +} + +#endif + +#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD + static ERTS_INLINE int -check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) +get_timeout_timespec(ErtsPollSet *ps, + struct timespec *tsp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000*1000, + timeout_time); + + if (!timeout) { + tsp->tv_sec = 0; + tsp->tv_nsec = 0; + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000*1000); + tsp->tv_sec = sec; + tsp->tv_nsec = timeout - sec*(1000*1000*1000); + + ASSERT(tsp->tv_sec >= 0); + ASSERT(tsp->tv_nsec >= 0); + ASSERT(tsp->tv_nsec < 1000*1000*1000); + + return 1; + } +} + +#endif + +#if ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_itimerspec(ErtsPollSet *ps, + struct itimerspec *itsp, + ErtsMonotonicTime timeout_time) +{ + + itsp->it_interval.tv_sec = 0; + itsp->it_interval.tv_nsec = 0; + + return get_timeout_timespec(ps, &itsp->it_value, timeout_time); +} + +#endif + +static ERTS_INLINE int +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time) { int res; - int timeout = do_wait ? -1 : 0; - DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait); + int timeout; + DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time); { #if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#if ERTS_POLL_USE_TIMERFD + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout > 0) { + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, pr, max_res, -1); + res = timerfd_clear(ps, pr, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + } +#else /* !ERTS_POLL_USE_TIMERFD */ + timeout = (int) get_timeout(ps, 1000, timeout_time); res = epoll_wait(ps->kp_fd, pr, max_res, timeout); - +#endif /* !ERTS_POLL_USE_TIMERFD */ #elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts = {0, 0}; - struct timespec *tsp = timeout ? NULL : &ts; + struct timespec ts; + struct timespec *tsp; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + tsp = timeout < 0 ? NULL : &ts; res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); #elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ /* @@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; poll_res.dp_nfds = nfds < max_res ? nfds : max_res; poll_res.dp_fds = pr; - poll_res.dp_timeout = timeout; + poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time); res = ioctl(ps->kp_fd, DP_POLL, &poll_res); - +#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ + struct timespec ts; + struct timespec *tsp = &ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + if (timeout < 0) tsp = NULL; + res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - + timeout = (int) get_timeout(ps, 1000, timeout_time); res = poll(ps->poll_fds, ps->no_poll_fds, timeout); - #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval tv = {0, 0}; - SysTimeval *tvp = timeout ? NULL : &tv; + SysTimeval tv; + SysTimeval *tvp; + timeout = get_timeout_timeval(ps, &tv, timeout_time); + tvp = timeout < 0 ? NULL : &tv; ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); @@ -1630,7 +1855,8 @@ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], int *len, - ErtsThrPrgrData *tpd) + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time) { int res, no_fds, used_fds = 0; int ebadf = 0; @@ -1655,53 +1881,56 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, } #endif - do_wait = !is_woken(ps) && used_fds == 0; + do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT; DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); if (do_wait) { + tpd = tpd ? tpd : erts_thr_prgr_data(NULL); erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); - } + } else + timeout_time = ERTS_POLL_NO_TIMEOUT; while (1) { - res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds); + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time); + if (res != 0) + break; + if (timeout_time == ERTS_POLL_NO_TIMEOUT) + break; + if (erts_get_monotonic_time(NULL) >= timeout_time) + break; + } #if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. This behaviour should only - * happen when using SELECT as the polling mechanism. - */ - ERTS_POLLSET_LOCK(ps); - used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); - if (used_fds == no_fds) { - *len = used_fds; - ERTS_POLLSET_UNLOCK(ps); - return 0; - } - res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds); - /* Keep the lock over the non-blocking poll in order to not - get any nasty races happening. */ + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; ERTS_POLLSET_UNLOCK(ps); - if (res == 0) { - errno = EAGAIN; - res = -1; - } + return 0; + } + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; } -#endif - - if (res != 0) - break; - if (!do_wait) - break; } +#endif if (do_wait) { erts_thr_progress_finalize_wait(tpd); @@ -1709,7 +1938,8 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } - woke_up(ps); + if (ERTS_POLL_USE_WAKEUP(ps)) + woke_up(ps); if (res < 0) { #if ERTS_POLL_USE_SELECT @@ -1720,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, #endif res = errno; } - else { + else if (res == 0) { + res = used_fds == 0 ? ETIMEDOUT : 0; +#ifdef HARD_DEBUG + check_poll_result(pr, used_fds); +#endif + *len = used_fds; + } else { #if ERTS_POLL_USE_SELECT save_results: #endif - ps_locked = 1; ERTS_POLLSET_LOCK(ps); @@ -1754,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (!set) - reset_wakeup_state(ps); - else - wake_poller(ps, 1); -#endif + DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set); + if (ERTS_POLL_USE_WAKEUP(ps)) { + if (!set) + reset_wakeup_state(ps); + else + wake_poller(ps, 1); + } } int @@ -1875,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; + if (ps->id == -1) + ps->oneshot = 0; + else + ps->oneshot = 1; #endif -#if !ERTS_POLL_USE_CONCURRENT_UPDATE + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); create_wakeup_pipe(ps); + +#if ERTS_POLL_USE_TIMERFD + create_timerfd(ps); +#endif + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE handle_update_requests(ps, NULL, 0); cleanup_wakeup_pipe(ps); #endif @@ -1993,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) pip->memory_size = size; pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE pip->poll_set_size++; /* Wakeup pipe */ -#endif pip->lazy_updates = #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -2178,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ASSERT(0); return; } + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + continue; +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) + continue; +#endif data &= 0xFFFFFFFF; ASSERT(fd == data); /* Events are the events that are being monitored, which of course include diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index e1cea7eb8b..d40dabc529 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -51,6 +51,7 @@ #include "sys.h" #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN +#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX #ifdef ERTS_ENABLE_KERNEL_POLL # undef ERTS_ENABLE_KERNEL_POLL @@ -130,6 +131,9 @@ #endif #define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10 +#define ERTS_POLL_USE_TIMERFD 0 typedef Uint32 ErtsPollEvents; @@ -156,6 +160,14 @@ typedef enum { #include +#if ERTS_POLL_USE_EPOLL +#ifdef HAVE_SYS_TIMERFD_H +#include +#undef ERTS_POLL_USE_TIMERFD +#define ERTS_POLL_USE_TIMERFD 1 +#endif +#endif + #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ @@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd { #endif -#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)) +#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))) #define ev2str(ev) \ (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h index f35f64a9f3..f3a91e54f7 100644 --- a/erts/emulator/sys/common/erl_poll_api.h +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -73,12 +73,14 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, * @param[in] length the length of the res array * @param[out] length the number of ready events returned in res * @param tpd the thread progress data to note sleep state in + * @param timeout_time the time in native to wake up at * @return 0 on success, else the ERRNO of the error that happened. */ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd res[], int *length, - ErtsThrPrgrData *tpd); + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time); /** * Interrupt the thread waiting in the pollset. This function should be called * with set = 0 before any thread calls erts_poll_wait in order to clear any diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 5d832f4d34..3843a27a6e 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1018,10 +1018,11 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps, int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], int *len, - ErtsThrPrgrData *tpd) + ErtsThrPrgrData *tpd, + Sint64 timeout_in) { int no_fds; - DWORD timeout = INFINITE; + DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in; EventData* ev; int res = 0; int num = 0; diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index f04efb9003..2e0dfa42f3 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) -> {Conc, PollType, KP} = get_ioconfig(Config), {Sched, SchedOnln, _} = get_sstate(Config, ""), - [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), - [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), - - [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - if Conc -> - [5] = get_ionum(Config,"+IOt 5 +IOp 1"), - [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"), - [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"), + [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - [2] = get_ionum(Config, "+S 2 +IOPt 100"), - [4] = get_ionum(Config, "+S 4 +IOPt 100"), - [4] = get_ionum(Config, "+S 4:2 +IOPt 100"), - [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), + [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), + [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), + [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), + + [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"), + [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), fail = get_ionum(Config, "+IOt 1 +IOp 2"), ok; not Conc -> + [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl index fab2f45f28..4e6baa9e0e 100644 --- a/erts/emulator/test/signal_SUITE.erl +++ b/erts/emulator/test/signal_SUITE.erl @@ -85,7 +85,7 @@ xm_sig_order_proc() -> receive may_not_reach -> exit(bad_signal_order); may_reach -> ok - after 0 -> ok + after 0 -> erlang:yield() end, xm_sig_order_proc(). -- cgit v1.2.3