aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r--erts/emulator/beam/erl_port.h2
-rw-r--r--erts/emulator/beam/erl_port_task.c81
-rw-r--r--erts/emulator/beam/erl_port_task.h21
-rw-r--r--erts/emulator/beam/erl_process.c122
-rw-r--r--erts/emulator/beam/erl_process.h4
5 files changed, 210 insertions, 20 deletions
diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h
index 2be0a5bf74..25976d38cc 100644
--- a/erts/emulator/beam/erl_port.h
+++ b/erts/emulator/beam/erl_port.h
@@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p);
#define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11))
/* Last port to terminate halts the emulator */
#define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12))
+/* Check if the event in ready_input should be cleaned */
+#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13))
#ifdef DEBUG
/* Only debug: make sure all flags aren't cleared unintentionally */
#define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31))
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 4928d80f27..c8f2e88127 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
typedef union {
struct { /* I/O tasks */
ErlDrvEvent event;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ int is_scheduler_event;
+#endif
} io;
struct {
ErtsProc2PortSigCallback callback;
@@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ {
ErtsPortTaskBusyCaller pre_alloc_busy_caller;
};
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
static void begin_port_cleanup(Port *pp,
ErtsPortTask **execq,
@@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
-reset_executed_io_task_handle(ErtsPortTask *ptp)
+reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp)
{
if (ptp->u.alive.handle) {
ASSERT(ptp == handle2task(ptp->u.alive.handle));
- /* The port task handle is reset inside task_executed */
- erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
- reset_port_task_handle);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) {
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
+ } else {
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+ } else
+#endif
+ {
+ /* The port task handle is reset inside task_executed */
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ }
}
}
@@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp)
res = - 1; /* Task already aborted, executing, or executed */
else {
reset_port_task_handle(pthp);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ switch (ptp->type) {
+ case ERTS_PORT_TASK_INPUT:
+ case ERTS_PORT_TASK_OUTPUT:
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ ASSERT(erts_atomic_read_nob(
+ &erts_port_task_outstanding_io_tasks) > 0);
+ erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks);
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+
res = 0;
}
}
@@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id,
va_list argp;
va_start(argp, type);
ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int);
+#endif
va_end(argp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks);
+#endif
break;
}
case ERTS_PORT_TASK_PROC_SIG: {
@@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int processing_busy_q;
int vreds = 0;
int reds = 0;
- erts_aint_t io_tasks_executed = 0;
int fpe_was_unmasked;
erts_aint32_t state;
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_aint_t io_tasks_executed = 0;
+#endif
ERTS_MSACC_PUSH_STATE_M();
ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq));
@@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_OUTPUT:
reds = ERTS_PORT_REDS_OUTPUT;
@@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
LTTNG_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
@@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
erts_unblock_fpe(fpe_was_unmasked);
ERTS_MSACC_POP_STATE_M();
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (io_tasks_executed) {
+ ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
+ >= io_tasks_executed);
+ erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks,
+ -1*io_tasks_executed);
+ }
+#endif
+
ASSERT(runq == erts_get_runq_port(pp));
active = finalize_exec(pp, &execq, processing_busy_q);
@@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq)
void
erts_port_task_init(void)
{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks,
+ (erts_aint_t) 0);
+#endif
init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads
+ 1); /* aux_thread */
init_busy_caller_table_alloc();
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index ae78a7d8a3..ca5183b305 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#ifndef ERL_PORT_TASK_H__
#define ERL_PORT_TASK_H__
+#include "erl_poll.h"
+
#undef ERTS_INCLUDE_SCHEDULER_INTERNALS
#if (defined(ERL_PROCESS_C__) \
|| defined(ERL_PORT_TASK_C__) \
@@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#define ERTS_PT_FLG_BAD_OUTPUT (1 << 4)
typedef enum {
- ERTS_PORT_TASK_INPUT,
- ERTS_PORT_TASK_OUTPUT,
+ ERTS_PORT_TASK_INPUT = 0,
+ ERTS_PORT_TASK_OUTPUT = 1,
ERTS_PORT_TASK_TIMEOUT,
ERTS_PORT_TASK_DIST_CMD,
ERTS_PORT_TASK_PROC_SIG
@@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp);
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void);
+/* NOTE: Do not access any of the exported variables directly */
+extern erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp)
erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING);
}
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int
+erts_port_task_have_outstanding_io_tasks(void)
+{
+ return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks)
+ != 0);
+}
+#endif
+
#endif
#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 919d9f9edf..2427d87f66 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
typedef struct {
int aux_work;
int tse;
- int sys_schedule;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
@@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static erts_atomic32_t doing_sys_schedule;
+#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
@@ -3093,8 +3095,12 @@ aux_thread(void *unused)
awdp->ssi = ssi;
#if ERTS_POLL_USE_FALLBACK
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ssi->psi = erts_create_pollset_thread(-2, tpd);
+#else
ssi->psi = erts_create_pollset_thread(-1, tpd);
#endif
+#endif
sched_prep_spin_wait(ssi);
@@ -3133,7 +3139,7 @@ aux_thread(void *unused)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(ssi->psi);
+ erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT);
}
}
#else
@@ -3231,7 +3237,7 @@ poll_thread(void *arg)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(psi);
+ erts_check_io(psi, ERTS_POLL_INF_TIMEOUT);
}
}
}
@@ -3241,6 +3247,59 @@ poll_thread(void *arg)
return NULL;
}
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ERTS_INLINE void
+clear_sys_scheduling(void)
+{
+ erts_atomic32_set_mb(&doing_sys_schedule, 0);
+}
+
+static ERTS_INLINE int
+try_set_sys_scheduling(void)
+{
+ return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0);
+}
+
+
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
+}
+
+static void
+check_io_timer(void *null)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (prepare_for_sys_schedule()) {
+ erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT);
+ clear_sys_scheduling();
+ }
+
+ /* The timer is cleared if this schedulers run-queue became empty
+ or if the CHECKIO flag was cleared. The CHECKIO flags is cleared
+ when a check_balance assigns another scheduler to be the poller in
+ the overload scenario. */
+ if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == ERTS_RUNQ_FLG_CHECKIO) {
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT,
+ check_io_timer, NULL);
+ } else {
+ ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO);
+ }
+}
+
+#else
+#define clear_sys_scheduling()
+#define prepare_for_sys_schedule() 0
+#endif
+
static void
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
@@ -3330,7 +3389,25 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
sched_wall_time_change(esdp, 1);
}
}
- else {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) {
+ /* We sleep in check_io, only for normal schedulers */
+ if (thr_prgr_active) {
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ flgs = sched_spin_wait(ssi, 0);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ erts_check_io(ssi->psi, timeout_time);
+ current_time = erts_get_monotonic_time(esdp);
+ }
+ }
+ clear_sys_scheduling();
+ } else {
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (thr_prgr_active) {
erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
@@ -3338,7 +3415,6 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp));
}
-
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
@@ -4585,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq)
if (blnc_no_rqs == 1) {
c_rq->check_balance_reds = INT_MAX;
erts_atomic32_set_nob(&balance_info.checking_balance, 0);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS;
+ if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == 0) {
+ ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO);
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO);
+#endif
return;
}
@@ -5104,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n");
/* Publish new migration paths... */
erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (full_scheds == current_active) {
+ ERTS_ASSERT(full_scheds <= current_active);
+ /* All active schedulers ran for full, we need to do active polling,
+ so we setup a timer that does active polling */
+ if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) {
+ /* Active polling is not running, start it */
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO;
+ }
+#endif
+
/* Reset balance statistics in all online queues */
for (qix = 0; qix < blnc_no_rqs; qix++) {
Uint32 flags = run_queue_info[qix].flags;
@@ -5113,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n");
ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK));
if (rq->waiting)
flags |= ERTS_RUNQ_FLG_OUT_OF_WORK;
+ if (rq != c_rq)
+ flags &= ~ERTS_RUNQ_FLG_CHECKIO;
rq->full_reds_history_sum
= run_queue_info[qix].full_reds_history_sum;
@@ -5122,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n");
ERTS_DBG_CHK_FULL_REDS_HISTORY(rq);
rq->out_of_work_count = 0;
- (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
-
+ (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags);
rq->max_len = erts_atomic32_read_dirty(&rq->len);
for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
ErtsRunQueueInfo *rqi;
@@ -5562,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str)
return EINVAL;
}
- params->sys_schedule = sys_sched;
params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
params->aux_work = sys_sched * aux_work_fact;
@@ -5773,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th
size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic32_init_nob(&doing_sys_schedule, 0);
+#endif
erts_atomic32_init_nob(&no_empty_run_queues, 0);
erts_no_run_queues = n;
@@ -8302,6 +8403,11 @@ sched_thread_func(void *vesdp)
erts_msacc_init_thread("scheduler", no, 1);
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ esdp->ssi->psi = erts_create_pollset_thread(-1, NULL);
+#endif
+
erts_alloc_register_scheduler(vesdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
{
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 8d20ccdf90..a1b029adbe 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size;
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9))
#define ERTS_RUNQ_FLG_HALTING \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10))
+#define ERTS_RUNQ_FLG_CHECKIO \
+ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11))
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12)
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \