aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--erts/emulator/beam/erl_port.h2
-rw-r--r--erts/emulator/beam/erl_port_task.c81
-rw-r--r--erts/emulator/beam/erl_port_task.h21
-rw-r--r--erts/emulator/beam/erl_process.c122
-rw-r--r--erts/emulator/beam/erl_process.h4
-rw-r--r--erts/emulator/sys/common/erl_check_io.c310
-rw-r--r--erts/emulator/sys/common/erl_check_io.h16
-rw-r--r--erts/emulator/sys/common/erl_poll.c504
-rw-r--r--erts/emulator/sys/common/erl_poll.h14
-rw-r--r--erts/emulator/sys/common/erl_poll_api.h4
-rw-r--r--erts/emulator/sys/win32/erl_poll.c5
-rw-r--r--erts/emulator/test/scheduler_SUITE.erl27
-rw-r--r--erts/emulator/test/signal_SUITE.erl2
13 files changed, 843 insertions, 269 deletions
diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h
index 2be0a5bf74..25976d38cc 100644
--- a/erts/emulator/beam/erl_port.h
+++ b/erts/emulator/beam/erl_port.h
@@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p);
#define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11))
/* Last port to terminate halts the emulator */
#define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12))
+/* Check if the event in ready_input should be cleaned */
+#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13))
#ifdef DEBUG
/* Only debug: make sure all flags aren't cleared unintentionally */
#define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31))
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 4928d80f27..c8f2e88127 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
typedef union {
struct { /* I/O tasks */
ErlDrvEvent event;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ int is_scheduler_event;
+#endif
} io;
struct {
ErtsProc2PortSigCallback callback;
@@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ {
ErtsPortTaskBusyCaller pre_alloc_busy_caller;
};
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
static void begin_port_cleanup(Port *pp,
ErtsPortTask **execq,
@@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
-reset_executed_io_task_handle(ErtsPortTask *ptp)
+reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp)
{
if (ptp->u.alive.handle) {
ASSERT(ptp == handle2task(ptp->u.alive.handle));
- /* The port task handle is reset inside task_executed */
- erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
- reset_port_task_handle);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) {
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
+ } else {
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+ } else
+#endif
+ {
+ /* The port task handle is reset inside task_executed */
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ }
}
}
@@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp)
res = - 1; /* Task already aborted, executing, or executed */
else {
reset_port_task_handle(pthp);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ switch (ptp->type) {
+ case ERTS_PORT_TASK_INPUT:
+ case ERTS_PORT_TASK_OUTPUT:
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ ASSERT(erts_atomic_read_nob(
+ &erts_port_task_outstanding_io_tasks) > 0);
+ erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks);
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+
res = 0;
}
}
@@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id,
va_list argp;
va_start(argp, type);
ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int);
+#endif
va_end(argp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks);
+#endif
break;
}
case ERTS_PORT_TASK_PROC_SIG: {
@@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int processing_busy_q;
int vreds = 0;
int reds = 0;
- erts_aint_t io_tasks_executed = 0;
int fpe_was_unmasked;
erts_aint32_t state;
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_aint_t io_tasks_executed = 0;
+#endif
ERTS_MSACC_PUSH_STATE_M();
ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq));
@@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_OUTPUT:
reds = ERTS_PORT_REDS_OUTPUT;
@@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
LTTNG_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
@@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
erts_unblock_fpe(fpe_was_unmasked);
ERTS_MSACC_POP_STATE_M();
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (io_tasks_executed) {
+ ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
+ >= io_tasks_executed);
+ erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks,
+ -1*io_tasks_executed);
+ }
+#endif
+
ASSERT(runq == erts_get_runq_port(pp));
active = finalize_exec(pp, &execq, processing_busy_q);
@@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq)
void
erts_port_task_init(void)
{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks,
+ (erts_aint_t) 0);
+#endif
init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads
+ 1); /* aux_thread */
init_busy_caller_table_alloc();
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index ae78a7d8a3..ca5183b305 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#ifndef ERL_PORT_TASK_H__
#define ERL_PORT_TASK_H__
+#include "erl_poll.h"
+
#undef ERTS_INCLUDE_SCHEDULER_INTERNALS
#if (defined(ERL_PROCESS_C__) \
|| defined(ERL_PORT_TASK_C__) \
@@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#define ERTS_PT_FLG_BAD_OUTPUT (1 << 4)
typedef enum {
- ERTS_PORT_TASK_INPUT,
- ERTS_PORT_TASK_OUTPUT,
+ ERTS_PORT_TASK_INPUT = 0,
+ ERTS_PORT_TASK_OUTPUT = 1,
ERTS_PORT_TASK_TIMEOUT,
ERTS_PORT_TASK_DIST_CMD,
ERTS_PORT_TASK_PROC_SIG
@@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp);
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void);
+/* NOTE: Do not access any of the exported variables directly */
+extern erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp)
erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING);
}
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int
+erts_port_task_have_outstanding_io_tasks(void)
+{
+ return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks)
+ != 0);
+}
+#endif
+
#endif
#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 919d9f9edf..2427d87f66 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
typedef struct {
int aux_work;
int tse;
- int sys_schedule;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
@@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static erts_atomic32_t doing_sys_schedule;
+#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
@@ -3093,8 +3095,12 @@ aux_thread(void *unused)
awdp->ssi = ssi;
#if ERTS_POLL_USE_FALLBACK
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ssi->psi = erts_create_pollset_thread(-2, tpd);
+#else
ssi->psi = erts_create_pollset_thread(-1, tpd);
#endif
+#endif
sched_prep_spin_wait(ssi);
@@ -3133,7 +3139,7 @@ aux_thread(void *unused)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(ssi->psi);
+ erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT);
}
}
#else
@@ -3231,7 +3237,7 @@ poll_thread(void *arg)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(psi);
+ erts_check_io(psi, ERTS_POLL_INF_TIMEOUT);
}
}
}
@@ -3241,6 +3247,59 @@ poll_thread(void *arg)
return NULL;
}
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ERTS_INLINE void
+clear_sys_scheduling(void)
+{
+ erts_atomic32_set_mb(&doing_sys_schedule, 0);
+}
+
+static ERTS_INLINE int
+try_set_sys_scheduling(void)
+{
+ return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0);
+}
+
+
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
+}
+
+static void
+check_io_timer(void *null)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (prepare_for_sys_schedule()) {
+ erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT);
+ clear_sys_scheduling();
+ }
+
+ /* The timer is cleared if this schedulers run-queue became empty
+ or if the CHECKIO flag was cleared. The CHECKIO flags is cleared
+ when a check_balance assigns another scheduler to be the poller in
+ the overload scenario. */
+ if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == ERTS_RUNQ_FLG_CHECKIO) {
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT,
+ check_io_timer, NULL);
+ } else {
+ ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO);
+ }
+}
+
+#else
+#define clear_sys_scheduling()
+#define prepare_for_sys_schedule() 0
+#endif
+
static void
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
@@ -3330,7 +3389,25 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
sched_wall_time_change(esdp, 1);
}
}
- else {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) {
+ /* We sleep in check_io, only for normal schedulers */
+ if (thr_prgr_active) {
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ flgs = sched_spin_wait(ssi, 0);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ erts_check_io(ssi->psi, timeout_time);
+ current_time = erts_get_monotonic_time(esdp);
+ }
+ }
+ clear_sys_scheduling();
+ } else {
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (thr_prgr_active) {
erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
@@ -3338,7 +3415,6 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp));
}
-
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
@@ -4585,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq)
if (blnc_no_rqs == 1) {
c_rq->check_balance_reds = INT_MAX;
erts_atomic32_set_nob(&balance_info.checking_balance, 0);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS;
+ if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == 0) {
+ ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO);
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO);
+#endif
return;
}
@@ -5104,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n");
/* Publish new migration paths... */
erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (full_scheds == current_active) {
+ ERTS_ASSERT(full_scheds <= current_active);
+ /* All active schedulers ran for full, we need to do active polling,
+ so we setup a timer that does active polling */
+ if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) {
+ /* Active polling is not running, start it */
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO;
+ }
+#endif
+
/* Reset balance statistics in all online queues */
for (qix = 0; qix < blnc_no_rqs; qix++) {
Uint32 flags = run_queue_info[qix].flags;
@@ -5113,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n");
ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK));
if (rq->waiting)
flags |= ERTS_RUNQ_FLG_OUT_OF_WORK;
+ if (rq != c_rq)
+ flags &= ~ERTS_RUNQ_FLG_CHECKIO;
rq->full_reds_history_sum
= run_queue_info[qix].full_reds_history_sum;
@@ -5122,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n");
ERTS_DBG_CHK_FULL_REDS_HISTORY(rq);
rq->out_of_work_count = 0;
- (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
-
+ (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags);
rq->max_len = erts_atomic32_read_dirty(&rq->len);
for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
ErtsRunQueueInfo *rqi;
@@ -5562,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str)
return EINVAL;
}
- params->sys_schedule = sys_sched;
params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
params->aux_work = sys_sched * aux_work_fact;
@@ -5773,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th
size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic32_init_nob(&doing_sys_schedule, 0);
+#endif
erts_atomic32_init_nob(&no_empty_run_queues, 0);
erts_no_run_queues = n;
@@ -8302,6 +8403,11 @@ sched_thread_func(void *vesdp)
erts_msacc_init_thread("scheduler", no, 1);
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ esdp->ssi->psi = erts_create_pollset_thread(-1, NULL);
+#endif
+
erts_alloc_register_scheduler(vesdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
{
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 8d20ccdf90..a1b029adbe 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size;
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9))
#define ERTS_RUNQ_FLG_HALTING \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10))
+#define ERTS_RUNQ_FLG_CHECKIO \
+ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11))
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12)
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 1444cee805..c681fa481f 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -46,11 +46,11 @@
#if 0
#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__)
#define DEBUG_PRINT_FD(FMT, STATE, ...) \
- DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \
+ DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \
(STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \
ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
- (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR)
+ (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR)
#define DEBUG_PRINT_MODE
#else
#define DEBUG_PRINT(...)
@@ -76,22 +76,40 @@ typedef enum {
typedef enum {
ERTS_EV_FLAG_CLEAR = 0,
ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */
-#ifdef ERTS_ENABLE_KERNEL_POLL
- ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated
+ to scheduler pollset */
+ ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in
+ scheduler pollset */
+#else
+ ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+ ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+#endif
+#ifdef ERTS_POLL_USE_FALLBACK
+ ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd
and it was put in the nkp version */
#else
ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR,
#endif
/* Combinations */
- ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK
+ ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK,
+ ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER
} EventStateFlags;
#define flag2str(flags) \
((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \
((flags) == ERTS_EV_FLAG_USED ? "USED" : \
((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \
- ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR"))))
+ ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \
+ ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \
+ ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \
+ "ERROR"))))))))
/* How many events that can be handled at once by one erts_poll_wait call */
#define ERTS_CHECK_IO_POLL_RES_LEN 512
@@ -113,10 +131,13 @@ typedef struct erts_poll_thread
* Which pollset to use is determined by hashing the fd.
*/
static ErtsPollSet **pollsetv;
+static ErtsPollThread *psiv;
#if ERTS_POLL_USE_FALLBACK
static ErtsPollSet *flbk_pollset;
#endif
-static ErtsPollThread *psiv;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ErtsPollSet *sched_pollset;
+#endif
typedef struct {
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
@@ -131,10 +152,12 @@ typedef struct {
ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */
} stop;
} driver;
- ErtsPollEvents events; /* The events that have been selected upon */
+ ErtsPollEvents events; /* The events that have been selected upon */
ErtsPollEvents active_events; /* The events currently active in the pollset */
EventStateType type;
EventStateFlags flags;
+ int count; /* Number of times this fd has triggered
+ without being deselected. */
} ErtsDrvEventState;
struct drv_ev_state_shared {
@@ -371,12 +394,22 @@ get_pollset(ErtsSysFdType fd)
#if ERTS_POLL_USE_FALLBACK
static ERTS_INLINE ErtsPollSet *
-get_fallback(void)
+get_fallback_pollset(void)
{
return flbk_pollset;
}
#endif
+static ERTS_INLINE ErtsPollSet *
+get_scheduler_pollset(ErtsSysFdType fd)
+{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ return sched_pollset;
+#else
+ return get_pollset(fd);
+#endif
+}
+
/*
* Place a fd within a pollset. This will automatically use
* the fallback ps if needed.
@@ -392,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
- res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+
+ if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) {
+ erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
+ if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) {
+ res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+ } else {
+ res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ }
#if ERTS_POLL_USE_FALLBACK
if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
/* When an add fails with NVAL, the poll/kevent operation could not
put that fd in the pollset, so we instead put it into a fallback pollset */
state->flags |= ERTS_EV_FLAG_FALLBACK;
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
}
} else {
ASSERT(op != ERTS_POLL_OP_ADD);
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
#endif
}
@@ -426,7 +468,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
ErtsSysFdType fd = itp->fd;
erts_mtx_t *mtx = fd_mtx(fd);
- int active_events;
+ ErtsPollOp op = ERTS_POLL_OP_MOD;
+ int active_events, new_events = 0;
ErtsDrvEventState *state;
ErtsDrvSelectDataState *free_select = NULL;
ErtsNifSelectDataState *free_nif = NULL;
@@ -436,51 +479,66 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
erts_mtx_lock(mtx);
state = get_drv_ev_state(fd);
+ reset_handle(pthp);
+
active_events = state->active_events;
- switch (type) {
- case ERTS_PORT_TASK_INPUT:
+ if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) {
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+
+ DEBUG_PRINT_FD("executed ready_input", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
+ if (state->events & ERTS_POLL_EV_IN) {
+ active_events |= ERTS_POLL_EV_IN;
+ if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) {
+ if (!(state->flags & ERTS_EV_FLAG_SCHEDULER))
+ op = ERTS_POLL_OP_ADD;
+ state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER;
+ new_events = ERTS_POLL_EV_IN;
+ DEBUG_PRINT_FD("moving to scheduler ps", state);
+ } else
+ new_events = active_events;
+ if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING)
+ state->count++;
+ }
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
- DEBUG_PRINT_FD("executed ready_input", state);
+ DEBUG_PRINT_FD("executed ready_output", state);
- ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
- if (state->events & ERTS_POLL_EV_IN)
- active_events |= ERTS_POLL_EV_IN;
- break;
- case ERTS_PORT_TASK_OUTPUT:
+ ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
+ if (state->events & ERTS_POLL_EV_OUT) {
+ active_events |= ERTS_POLL_EV_OUT;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN)
+ new_events = ERTS_POLL_EV_OUT;
+ else
+ new_events = active_events;
+ }
+ break;
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
+ break;
+ }
- DEBUG_PRINT_FD("executed ready_output", state);
+ if (state->active_events != active_events && new_events) {
+ state->active_events = active_events;
+ new_events = erts_io_control(state, op, new_events);
+ }
- ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
- if (state->events & ERTS_POLL_EV_OUT)
- active_events |= ERTS_POLL_EV_OUT;
- break;
- default:
- erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
- break;
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (state->active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
}
- reset_handle(pthp);
-
- if (active_events) {
- /* This is not needed if active_events has not changed */
- if (state->active_events != active_events) {
- ErtsPollEvents new_events;
- state->active_events = active_events;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events);
-
- /* We were unable to re-insert the fd into the pollset, signal the callback. */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- if (active_events & ERTS_POLL_EV_IN)
- iready(state->driver.select->inport, state);
- if (active_events & ERTS_POLL_EV_OUT)
- oready(state->driver.select->outport, state);
- state->active_events = 0;
- }
- }
- } else {
+ if (!active_events)
check_fd_cleanup(state, &free_select, &free_nif);
- }
erts_mtx_unlock(mtx);
@@ -760,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
ctl_op = ERTS_POLL_OP_ADD;
}
+ new_events = state->active_events;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ new_events &= ~ERTS_POLL_EV_IN;
}
else {
ctl_events &= old_events;
state->events &= ~ctl_events;
state->active_events &= ~ctl_events;
+ new_events = state->active_events;
+
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ state->count = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ new_events = 0;
+ }
+ }
if (!state->events) {
if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
@@ -775,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
new_events = erts_io_control_wakeup(state, ctl_op,
- state->active_events,
+ new_events,
&wake_poller);
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
@@ -807,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events & ERTS_POLL_EV_IN) {
abort_tasks(state, ERL_DRV_READ);
state->driver.select->inport = NIL;
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
}
if (ctl_events & ERTS_POLL_EV_OUT) {
abort_tasks(state, ERL_DRV_WRITE);
@@ -815,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (state->events == 0) {
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
+ if (state->flags & ERTS_EV_FLAG_SCHEDULER)
+ erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
state->flags = 0;
}
/*else keep it, as fd will probably be selected upon again */
@@ -1431,7 +1503,8 @@ iready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) {
stale_drv_select(id, state, ERL_DRV_READ);
} else {
DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
@@ -1449,7 +1522,8 @@ oready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ 0) != 0) {
stale_drv_select(id, state, ERL_DRV_WRITE);
} else {
DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
@@ -1511,7 +1585,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
{
if (psi) {
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
erts_poll_interrupt_flbk(psi->ps, set);
return;
}
@@ -1527,7 +1601,7 @@ erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) {
}
void
-erts_check_io(ErtsPollThread *psi)
+erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time)
{
int pollres_len;
int poll_ret, i;
@@ -1542,14 +1616,14 @@ erts_check_io(ErtsPollThread *psi)
pollres_len = psi->pollres_len;
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
- poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd);
+ poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
} else
#endif
{
- poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd);
+ poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
}
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -1585,7 +1659,12 @@ erts_check_io(ErtsPollThread *psi)
ErtsNifSelectDataState *free_nif = NULL;
ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
ErtsDrvEventState *state;
- ErtsPollEvents revents;
+ ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
+
+ /* The fd will be set to -1 if a pollset internal fd was triggered
+ that was determined to be too expensive to remove from the result.
+ */
+ if (fd == -1) continue;
erts_mtx_lock(fd_mtx(fd));
@@ -1596,8 +1675,6 @@ erts_check_io(ErtsPollThread *psi)
continue;
}
- revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
-
DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
if (revents & ERTS_POLL_EV_ERR) {
@@ -1609,25 +1686,39 @@ erts_check_io(ErtsPollThread *psi)
*/
revents = state->active_events;
state->active_events = 0;
+
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ erts_io_control(state, ERTS_POLL_OP_MOD, 0);
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
} else {
/* Disregard any events that are not active at the moment,
for instance this could happen if the driver/nif does
select/deselect in rapid succession. */
revents &= state->active_events | ERTS_POLL_EV_NVAL;
- state->active_events &= ~revents;
- /* Reactivate the poll op if there are still active events */
- if (state->active_events) {
- ErtsPollEvents new_events;
- DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events));
+ if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) {
+ ErtsPollEvents reactive_events;
+ state->active_events &= ~revents;
+
+ reactive_events = state->active_events;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ reactive_events &= ~ERTS_POLL_EV_IN;
- /* Unable to re-enable the fd, signal all callbacks */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- revents |= state->active_events;
- state->active_events = 0;
+ /* Reactivate the poll op if there are still active events */
+ if (reactive_events) {
+ ErtsPollEvents new_events;
+ DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events));
+
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events);
+
+ /* Unable to re-enable the fd, signal all callbacks */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ revents |= reactive_events;
+ state->active_events &= ~reactive_events;
+ }
}
}
}
@@ -1703,7 +1794,7 @@ erts_check_io(ErtsPollThread *psi)
case ERTS_EV_TYPE_STOP_USE: {
#if ERTS_POLL_USE_FALLBACK
- ASSERT(psi->ps == get_fallback());
+ ASSERT(psi->ps == get_fallback_pollset());
#endif
drv_ptr = state->driver.stop.drv_ptr;
state->type = ERTS_EV_TYPE_NONE;
@@ -2041,12 +2132,17 @@ erts_init_check_io(int *argc, char **argv)
for (j=0; j < erts_no_pollsets; j++)
pollsetv[j] = erts_poll_create_pollset(j);
-#if ERTS_POLL_USE_FALLBACK
- flbk_pollset = erts_poll_create_pollset_flbk(-1);
+ no_poll_threads = erts_no_poll_threads;
+
+ j = -1;
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ sched_pollset = erts_poll_create_pollset(j--);
+ no_poll_threads++;
#endif
- no_poll_threads = erts_no_poll_threads;
#if ERTS_POLL_USE_FALLBACK
+ flbk_pollset = erts_poll_create_pollset_flbk(j--);
no_poll_threads++;
#endif
@@ -2056,7 +2152,15 @@ erts_init_check_io(int *argc, char **argv)
psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
- psiv[0].ps = get_fallback();
+ psiv[0].ps = get_fallback_pollset();
+ psiv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[0].ps = get_scheduler_pollset(0);
psiv++;
#endif
@@ -2113,7 +2217,12 @@ erts_check_io_size(void)
int i;
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info(get_fallback(), &pi);
+ erts_poll_info(get_fallback_pollset(), &pi);
+ res += pi.memory_size;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &pi);
res += pi.memory_size;
#endif
@@ -2145,13 +2254,21 @@ erts_check_io_info(void *proc)
Uint sz, *szp, *hp, **hpp;
ErtsPollInfo *piv;
Sint i, j = 0, len;
- int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK;
+ int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING;
ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
+ ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1);
piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info_flbk(get_fallback(), &piv[0]);
+ erts_poll_info_flbk(get_fallback_pollset(), &piv[0]);
+ piv[0].poll_threads = 1;
+ piv[0].active_fds = 0;
+ piv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &piv[0]);
piv[0].poll_threads = 1;
piv[0].active_fds = 0;
piv++;
@@ -2205,6 +2322,7 @@ erts_check_io_info(void *proc)
sz = 0;
piv -= ERTS_POLL_USE_FALLBACK;
+ piv -= ERTS_POLL_USE_SCHEDULER_POLLING;
bld_it:
@@ -2309,15 +2427,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
static ERTS_INLINE void
print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
{
- const char* delim = "";
- if(f & ERTS_EV_FLAG_USED) {
- erts_dsprintf(dsbufp, "%s","USED");
- delim = "|";
- }
- if(f & ERTS_EV_FLAG_FALLBACK) {
- erts_dsprintf(dsbufp, "%s%s", delim, "FLBK");
- delim = "|";
- }
+ erts_dsprintf(dsbufp, "%s", flag2str(f));
}
#ifdef DEBUG_PRINT_MODE
@@ -2659,13 +2769,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
#if ERTS_POLL_USE_FALLBACK
erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
- erts_poll_get_selected_events_flbk(get_fallback(), counters.epep,
+ erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep,
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
}
#endif
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n");
+ erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep,
+ drv_ev_state.max_fds);
+ for (fd = 0; fd < len; fd++) {
+ if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) {
+ if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE)
+ counters.epep[fd] &= ~ERTS_POLL_EV_OUT;
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
+ }
+#endif
+
erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
for (i = 0; i < erts_no_pollsets; i++) {
@@ -2674,8 +2797,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
- && get_pollset_id(fd) == i)
+ && get_pollset_id(fd) == i) {
+ if (counters.epep[fd] != ERTS_POLL_EV_NONE &&
+ drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ /* We add the in flag if it is enabled in the scheduler pollset
+ and get_selected_events works on the platform */
+ counters.epep[fd] |= ERTS_POLL_EV_IN;
+ }
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
}
}
for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
@@ -2722,7 +2852,7 @@ void erts_lcnt_update_cio_locks(int enable) {
#endif
#if ERTS_POLL_USE_FALLBACK
- erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable);
+ erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable);
#endif
for (i = 0; i < erts_no_pollsets; i++)
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index 16aba8a5f3..31182be5ec 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -68,7 +68,7 @@ int erts_check_io_max_files(void);
*
* @param pt the poll thread structure to use.
*/
-void erts_check_io(struct erts_poll_thread *pt);
+void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time);
/**
* Initialize the check io framework. This function will parse the arguments
* and delete any entries that it is interested in.
@@ -129,16 +129,6 @@ extern int erts_no_poll_threads;
#include "erl_poll.h"
#include "erl_port_task.h"
-#ifdef __WIN32__
-/*
- * Current erts_poll implementation for Windows cannot handle
- * active events in the set of events polled.
- */
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#else
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#endif
-
typedef struct {
Eterm inport;
Eterm outport;
@@ -150,10 +140,6 @@ struct erts_nif_select_event {
Eterm pid;
Eterm immed;
Uint32 refn[ERTS_REF_NUMBERS];
- Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress
- * 1: Do deselect before next poll
- * >1: Countdown of ignored events
- */
};
typedef struct {
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 5a5874ddfa..51d50933ff 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -121,7 +121,8 @@
/* Define to print info about modifications done to each fd */
#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__)
/* Define to print entry and exit from erts_poll_wait (can be very spammy) */
-//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0)
#else
#define ERTS_POLL_DEBUG_PRINT 0
@@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE)
-#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE)
+#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0)
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
+ int oneshot;
#endif /* ERTS_POLL_USE_KERNEL_POLL */
#if ERTS_POLL_USE_POLL
@@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
erts_atomic32_t have_update_requests;
erts_mtx_t mtx;
- erts_atomic32_t wakeup_state;
+#else
+ int do_wakeup;
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- int wake_fds[2];
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd;
#endif
+ ErtsMonotonicTime timeout_time;
+ erts_atomic32_t wakeup_state;
+ int wake_fds[2];
};
void erts_silence_warn_unused_result(long unused);
@@ -365,63 +371,47 @@ static void print_misc_debug_info(void);
uint32_t epoll_events(int kp_fd, int fd);
#endif
-
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
reset_wakeup_state(ErtsPollSet *ps)
{
erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
}
-#endif
static ERTS_INLINE int
is_woken(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
-#else
- return 0;
-#endif
}
static ERTS_INLINE int
is_interrupted_reset(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
-#else
- return 0;
-#endif
}
static ERTS_INLINE void
woke_up(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
ERTS_POLL_NOT_WOKEN);
ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN);
-#endif
}
/*
* --- Wakeup pipe -----------------------------------------------------------
*/
-#if ERTS_POLL_USE_WAKEUP_PIPE
-
static ERTS_INLINE void
wake_poller(ErtsPollSet *ps, int interrupted)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int wake;
erts_aint32_t wakeup_state;
if (!interrupted)
@@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted)
wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
if (wake)
-#endif
{
ssize_t res;
+ DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted);
if (ps->wake_fds[1] < 0)
return; /* Not initialized yet */
do {
@@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps)
fd,
erl_errno_id(errno), errno);
}
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (intr)
erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
-#endif
}
static void
@@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps)
ps->wake_fds[1] = wake_fds[1];
}
+/*
+ * --- timer fd -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_TIMERFD
+
+/* We use the timerfd when using epoll_wait to get high accuracy
+ timeouts, i.e. we want to sleep with < ms accuracy. */
+
+static void
+create_timerfd(ErtsPollSet *ps)
+{
+ int do_wake = 0;
+ int timer_fd = timerfd_create(CLOCK_MONOTONIC,0);
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ timer_fd,
+ ERTS_POLL_OP_ADD,
+ ERTS_POLL_EV_IN,
+ &do_wake);
+ if (ps->internal_fd_limit <= timer_fd)
+ ps->internal_fd_limit = timer_fd + 1;
+ ps->timer_fd = timer_fd;
+}
+
+static ERTS_INLINE void
+timerfd_set(ErtsPollSet *ps, struct itimerspec *its)
+{
+#ifdef DEBUG
+ struct itimerspec old_its;
+ int res;
+ res = timerfd_settime(ps->timer_fd, 0, its, &old_its);
+ ASSERT(res == 0);
+ ASSERT(old_its.it_interval.tv_sec == 0 &&
+ old_its.it_interval.tv_nsec == 0 &&
+ old_its.it_value.tv_sec == 0 &&
+ old_its.it_value.tv_nsec == 0);
+
+#else
+ timerfd_settime(ps->timer_fd, 0, its, NULL);
#endif
+}
+
+static ERTS_INLINE int
+timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) {
+
+ struct itimerspec its;
+ /* we always have to clear the timer */
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 0;
+ timerfd_settime(ps->timer_fd, 0, &its, NULL);
+
+ /* only timeout fd triggered */
+ if (res == 1 && pr[0].data.fd == ps->timer_fd)
+ return 0;
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_TIMERFD */
/*
* --- Poll set update requests ----------------------------------------------
@@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
struct epoll_event epe_templ;
struct epoll_event epe;
- epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT;
+ epe_templ.events = ERTS_POLL_EV_E2N(events);
epe_templ.data.fd = fd;
+ if (ps->oneshot)
+ epe_templ.events |= EPOLLONESHOT;
+
#ifdef VALGRIND
/* Silence invalid valgrind warning ... */
memset((void *) &epe.data, 0, sizeof(epoll_data_t));
@@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
int res = 0, len = 0;
struct kevent evts[2];
struct timespec ts = {0, 0};
+ uint32_t oneshot = 0;
if (op == ERTS_POLL_OP_ADD) {
/* This is a hack to make the "noshell" option work; kqueue can poll
@@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
man page), but it seems to be the way it works...
*/
+ if (ps->oneshot)
+ oneshot = EV_DISPATCH;
+
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
/* We could probably skip this delete, do we want to? */
@@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
uint32_t flags;
erts_atomic_inc_nob(&ps->no_of_user_fds);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
} else {
uint32_t flags;
ASSERT(op == ERTS_POLL_OP_MOD);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
}
#else
- uint32_t flags = EV_ADD|EV_ONESHOT;
+ uint32_t flags = EV_ADD;
+
+ if (ps->oneshot) flags |= EV_ONESHOT;
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
@@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd);
for (i = 0; i < len; i++) {
const char *flags = "UNKNOWN";
- if (evts[i].flags == EV_DELETE) flags = "EV_DELETE";
+ if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE";
if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT";
+ if (evts[i].flags == (EV_ADD)) flags = "EV_ADD";
#ifdef EV_DISPATCH
if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH";
if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE";
if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH";
- if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE";
+ if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
#endif
keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "",
@@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op,
goto done;
}
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) {
new_events = ERTS_POLL_EV_NVAL;
goto done;
}
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
#endif
}
@@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
ERTS_POLLSET_UNLOCK(ps);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (*do_wake) {
+ if (*do_wake)
wake_poller(ps, 0);
- }
-#endif
return res;
}
@@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
static ERTS_INLINE int
ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE
int n = chk_fds_res < max_res ? chk_fds_res : max_res, i;
int res = n;
-#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
-#endif
- for (i = 0; i < n; i++) {
- int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
-#ifdef DEBUG_PRINT_MODE
- ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
-#endif
+ if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) {
- DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
- ev2str(evts),
+ for (i = 0; i < n; i++) {
+ int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
+#if ERTS_POLL_DEBUG_PRINT
+ ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
+
+ if (fd != wake_fd
+#if ERTS_POLL_USE_TIMERFD
+ && fd != ps->timer_fd
+#endif
+ )
+ DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
+ ev2str(evts),
#if ERTS_POLL_USE_KQUEUE
- "kqueue"
+ "kqueue"
#elif ERTS_POLL_USE_EPOLL
- "epoll"
+ "epoll"
#else
- "/dev/poll"
+ "/dev/poll"
+#endif
+ );
#endif
- );
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
- if (n == 1)
- return 0;
- }
+ if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
+#if ERTS_POLL_USE_TIMERFD
+ else if (fd == ps->timer_fd) {
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
#endif
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- else {
- /* Reset the events to emulate ONESHOT semantics */
- ps->fds_status[fd].events = 0;
- enqueue_update_request(ps, fd);
- }
+ else {
+ /* Reset the events to emulate ONESHOT semantics */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
+ }
#endif
+ }
}
- return res;
-#else
- ASSERT(chk_fds_res <= max_res);
- return chk_fds_res;
-#endif
+ if (res == 0)
+ return res;
+ else
+ return n;
}
#else /* !ERTS_POLL_USE_KERNEL_POLL */
@@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res,
#endif /* !ERTS_POLL_USE_KERNEL_POLL */
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout(ErtsPollSet *ps,
+ int resolution,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout;
+
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ timeout = 0;
+ }
+ else if (timeout_time == ERTS_POLL_INF_TIMEOUT) {
+ timeout = -1;
+ }
+ else {
+ ErtsMonotonicTime diff_time, current_time;
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0) {
+ timeout = 0;
+ }
+ else {
+ switch (resolution) {
+ case 1000:
+ /* Round up to nearest even milli second */
+ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1;
+ if (timeout > (ErtsMonotonicTime) INT_MAX)
+ timeout = (ErtsMonotonicTime) INT_MAX;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000);
+ break;
+ case 1000000:
+ /* Round up to nearest even micro second */
+ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000);
+ break;
+ case 1000000000:
+ /* Round up to nearest even nano second */
+ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000);
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid resolution");
+ timeout = 0;
+ break;
+ }
+ }
+ }
+ return timeout;
+}
+
+#if ERTS_POLL_USE_SELECT
+
+static ERTS_INLINE int
+get_timeout_timeval(ErtsPollSet *ps,
+ SysTimeval *tvp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
+
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000);
+ tvp->tv_sec = sec;
+ tvp->tv_usec = timeout - sec*(1000*1000);
+
+ ASSERT(tvp->tv_sec >= 0);
+ ASSERT(tvp->tv_usec >= 0);
+ ASSERT(tvp->tv_usec < 1000*1000);
+
+ return 1;
+ }
+
+}
+
+#endif
+
+#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
+get_timeout_timespec(ErtsPollSet *ps,
+ struct timespec *tsp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000*1000);
+ tsp->tv_sec = sec;
+ tsp->tv_nsec = timeout - sec*(1000*1000*1000);
+
+ ASSERT(tsp->tv_sec >= 0);
+ ASSERT(tsp->tv_nsec >= 0);
+ ASSERT(tsp->tv_nsec < 1000*1000*1000);
+
+ return 1;
+ }
+}
+
+#endif
+
+#if ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_itimerspec(ErtsPollSet *ps,
+ struct itimerspec *itsp,
+ ErtsMonotonicTime timeout_time)
+{
+
+ itsp->it_interval.tv_sec = 0;
+ itsp->it_interval.tv_nsec = 0;
+
+ return get_timeout_timespec(ps, &itsp->it_value, timeout_time);
+}
+
+#endif
+
+static ERTS_INLINE int
+check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time)
{
int res;
- int timeout = do_wait ? -1 : 0;
- DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait);
+ int timeout;
+ DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time);
{
#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+#if ERTS_POLL_USE_TIMERFD
+ struct itimerspec its;
+ timeout = get_timeout_itimerspec(ps, &its, timeout_time);
+ if (timeout > 0) {
+ timerfd_set(ps, &its);
+ res = epoll_wait(ps->kp_fd, pr, max_res, -1);
+ res = timerfd_clear(ps, pr, res, max_res);
+ } else {
+ res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
+ }
+#else /* !ERTS_POLL_USE_TIMERFD */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
-
+#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
- struct timespec ts = {0, 0};
- struct timespec *tsp = timeout ? NULL : &ts;
+ struct timespec ts;
+ struct timespec *tsp;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ tsp = timeout < 0 ? NULL : &ts;
res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp);
#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
/*
@@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */;
poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
poll_res.dp_fds = pr;
- poll_res.dp_timeout = timeout;
+ poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time);
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-
+#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
+ struct timespec ts;
+ struct timespec *tsp = &ts;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout < 0) tsp = NULL;
+ res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL);
#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
-
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
-
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval tv = {0, 0};
- SysTimeval *tvp = timeout ? NULL : &tv;
+ SysTimeval tv;
+ SysTimeval *tvp;
+ timeout = get_timeout_timeval(ps, &tv, timeout_time);
+ tvp = timeout < 0 ? NULL : &tv;
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
@@ -1630,7 +1855,8 @@ int
ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd pr[],
int *len,
- ErtsThrPrgrData *tpd)
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time)
{
int res, no_fds, used_fds = 0;
int ebadf = 0;
@@ -1655,53 +1881,56 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
}
#endif
- do_wait = !is_woken(ps) && used_fds == 0;
+ do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT;
DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait);
if (do_wait) {
+ tpd = tpd ? tpd : erts_thr_prgr_data(NULL);
erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
- }
+ } else
+ timeout_time = ERTS_POLL_NO_TIMEOUT;
while (1) {
- res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds);
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time);
+ if (res != 0)
+ break;
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT)
+ break;
+ if (erts_get_monotonic_time(NULL) >= timeout_time)
+ break;
+ }
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (res < 0
- && errno == EBADF
- && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- /*
- * This may have happened because another thread deselected
- * a fd in our poll set and then closed it, i.e. the driver
- * behaved correctly. We wan't to avoid looking for a bad
- * fd, that may even not exist anymore. Therefore, handle
- * update requests and try again. This behaviour should only
- * happen when using SELECT as the polling mechanism.
- */
- ERTS_POLLSET_LOCK(ps);
- used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
- if (used_fds == no_fds) {
- *len = used_fds;
- ERTS_POLLSET_UNLOCK(ps);
- return 0;
- }
- res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds);
- /* Keep the lock over the non-blocking poll in order to not
- get any nasty races happening. */
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again. This behaviour should only
+ * happen when using SELECT as the polling mechanism.
+ */
+ ERTS_POLLSET_LOCK(ps);
+ used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
+ if (used_fds == no_fds) {
+ *len = used_fds;
ERTS_POLLSET_UNLOCK(ps);
- if (res == 0) {
- errno = EAGAIN;
- res = -1;
- }
+ return 0;
+ }
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT);
+ /* Keep the lock over the non-blocking poll in order to not
+ get any nasty races happening. */
+ ERTS_POLLSET_UNLOCK(ps);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
}
-#endif
-
- if (res != 0)
- break;
- if (!do_wait)
- break;
}
+#endif
if (do_wait) {
erts_thr_progress_finalize_wait(tpd);
@@ -1709,7 +1938,8 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO);
}
- woke_up(ps);
+ if (ERTS_POLL_USE_WAKEUP(ps))
+ woke_up(ps);
if (res < 0) {
#if ERTS_POLL_USE_SELECT
@@ -1720,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
#endif
res = errno;
}
- else {
+ else if (res == 0) {
+ res = used_fds == 0 ? ETIMEDOUT : 0;
+#ifdef HARD_DEBUG
+ check_poll_result(pr, used_fds);
+#endif
+ *len = used_fds;
+ } else {
#if ERTS_POLL_USE_SELECT
save_results:
#endif
-
ps_locked = 1;
ERTS_POLLSET_LOCK(ps);
@@ -1754,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
void
ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!set)
- reset_wakeup_state(ps);
- else
- wake_poller(ps, 1);
-#endif
+ DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set);
+ if (ERTS_POLL_USE_WAKEUP(ps)) {
+ if (!set)
+ reset_wakeup_state(ps);
+ else
+ wake_poller(ps, 1);
+ }
}
int
@@ -1875,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id)
if (ps->internal_fd_limit <= kp_fd)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
+ if (ps->id == -1)
+ ps->oneshot = 0;
+ else
+ ps->oneshot = 1;
#endif
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
create_wakeup_pipe(ps);
+
+#if ERTS_POLL_USE_TIMERFD
+ create_timerfd(ps);
+#endif
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
handle_update_requests(ps, NULL, 0);
cleanup_wakeup_pipe(ps);
#endif
@@ -1993,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip)
pip->memory_size = size;
pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pip->poll_set_size++; /* Wakeup pipe */
-#endif
pip->lazy_updates =
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -2178,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
ASSERT(0);
return;
}
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
+ continue;
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd)
+ continue;
+#endif
data &= 0xFFFFFFFF;
ASSERT(fd == data);
/* Events are the events that are being monitored, which of course include
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index e1cea7eb8b..d40dabc529 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -51,6 +51,7 @@
#include "sys.h"
#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
+#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX
#ifdef ERTS_ENABLE_KERNEL_POLL
# undef ERTS_ENABLE_KERNEL_POLL
@@ -130,6 +131,9 @@
#endif
#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10
+#define ERTS_POLL_USE_TIMERFD 0
typedef Uint32 ErtsPollEvents;
@@ -156,6 +160,14 @@ typedef enum {
#include <sys/epoll.h>
+#if ERTS_POLL_USE_EPOLL
+#ifdef HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#undef ERTS_POLL_USE_TIMERFD
+#define ERTS_POLL_USE_TIMERFD 1
+#endif
+#endif
+
#define ERTS_POLL_EV_E2N(EV) \
((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
@@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd {
#endif
-#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))
+#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)))
#define ev2str(ev) \
(((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \
diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h
index f35f64a9f3..f3a91e54f7 100644
--- a/erts/emulator/sys/common/erl_poll_api.h
+++ b/erts/emulator/sys/common/erl_poll_api.h
@@ -73,12 +73,14 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
* @param[in] length the length of the res array
* @param[out] length the number of ready events returned in res
* @param tpd the thread progress data to note sleep state in
+ * @param timeout_time the time in native to wake up at
* @return 0 on success, else the ERRNO of the error that happened.
*/
int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd res[],
int *length,
- ErtsThrPrgrData *tpd);
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time);
/**
* Interrupt the thread waiting in the pollset. This function should be called
* with set = 0 before any thread calls erts_poll_wait in order to clear any
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 5d832f4d34..3843a27a6e 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1018,10 +1018,11 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps,
int erts_poll_wait(ErtsPollSet *ps,
ErtsPollResFd pr[],
int *len,
- ErtsThrPrgrData *tpd)
+ ErtsThrPrgrData *tpd,
+ Sint64 timeout_in)
{
int no_fds;
- DWORD timeout = INFINITE;
+ DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in;
EventData* ev;
int res = 0;
int num = 0;
diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl
index f04efb9003..2e0dfa42f3 100644
--- a/erts/emulator/test/scheduler_SUITE.erl
+++ b/erts/emulator/test/scheduler_SUITE.erl
@@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) ->
{Conc, PollType, KP} = get_ioconfig(Config),
{Sched, SchedOnln, _} = get_sstate(Config, ""),
- [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
- [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
-
- [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
-
if
Conc ->
- [5] = get_ionum(Config,"+IOt 5 +IOp 1"),
- [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"),
- [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+ [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
- [2] = get_ionum(Config, "+S 2 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4:2 +IOPt 100"),
- [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
+ [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
+ [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
+ [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+
+ [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"),
+ [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
fail = get_ionum(Config, "+IOt 1 +IOp 2"),
ok;
not Conc ->
+ [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
+
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl
index fab2f45f28..4e6baa9e0e 100644
--- a/erts/emulator/test/signal_SUITE.erl
+++ b/erts/emulator/test/signal_SUITE.erl
@@ -85,7 +85,7 @@ xm_sig_order_proc() ->
receive
may_not_reach -> exit(bad_signal_order);
may_reach -> ok
- after 0 -> ok
+ after 0 -> erlang:yield()
end,
xm_sig_order_proc().