aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorLukas Larsson <[email protected]>2018-12-06 11:45:13 +0100
committerLukas Larsson <[email protected]>2018-12-06 11:45:13 +0100
commit73e4f5f21bbf492ab61c01dee48fa09fd7309a50 (patch)
tree5f4b82eda1098a94d5fee94232a2819b77a3315a /erts
parent63e436ae75584c885e4c66c08d5f33c68f67e69e (diff)
parentc6498571109b524fb319300e1b177b942e556f1b (diff)
downloadotp-73e4f5f21bbf492ab61c01dee48fa09fd7309a50.tar.gz
otp-73e4f5f21bbf492ab61c01dee48fa09fd7309a50.tar.bz2
otp-73e4f5f21bbf492ab61c01dee48fa09fd7309a50.zip
Merge branch 'lukas/OTP-21.1.1/scheduler_pollset/OTP-15475' into maint
* lukas/OTP-21.1.1/scheduler_pollset/OTP-15475: erts: Move fds with active true behaviour to own pollset erts: Fix lists_member_2 reduction count erts: Allow code_model_small to be set in xcomp setting erts: Implement delay_send using timer instead of poll erts: Optimize driver_set_timer(0) to fire at once erts: Optimize the inet driver multi timers for one timer erts: Move all inet tcp CONNECTED timers to multi timer erts: Add erts_io_notify_port_task_executed to check_io msacc state erts: Add pre-alloc to ALLOC msacc state erts: Make thr prgr wakeup current or sched 1 erts: Pass thread progress data where possible
Diffstat (limited to 'erts')
-rw-r--r--erts/configure.in6
-rw-r--r--erts/emulator/beam/erl_async.c2
-rw-r--r--erts/emulator/beam/erl_bif_lists.c8
-rw-r--r--erts/emulator/beam/erl_hl_timer.c26
-rw-r--r--erts/emulator/beam/erl_init.c4
-rw-r--r--erts/emulator/beam/erl_port.h2
-rw-r--r--erts/emulator/beam/erl_port_task.c81
-rw-r--r--erts/emulator/beam/erl_port_task.h21
-rw-r--r--erts/emulator/beam/erl_process.c200
-rw-r--r--erts/emulator/beam/erl_process.h4
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h9
-rw-r--r--erts/emulator/beam/erl_thr_progress.c34
-rw-r--r--erts/emulator/beam/erl_thr_progress.h29
-rw-r--r--erts/emulator/beam/erl_trace.c15
-rw-r--r--erts/emulator/drivers/common/inet_drv.c257
-rw-r--r--erts/emulator/sys/common/erl_check_io.c318
-rw-r--r--erts/emulator/sys/common/erl_check_io.h21
-rw-r--r--erts/emulator/sys/common/erl_poll.c511
-rw-r--r--erts/emulator/sys/common/erl_poll.h14
-rw-r--r--erts/emulator/sys/common/erl_poll_api.h6
-rw-r--r--erts/emulator/sys/win32/erl_poll.c10
-rw-r--r--erts/emulator/test/scheduler_SUITE.erl27
-rw-r--r--erts/emulator/test/signal_SUITE.erl2
23 files changed, 1155 insertions, 452 deletions
diff --git a/erts/configure.in b/erts/configure.in
index 1074aab2c2..9245e4dc90 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -3217,7 +3217,11 @@ AC_TRY_RUN([
],
erl_code_model_small=yes,
erl_code_model_small=no,
-erl_code_model_small=no)
+[case X$erl_xcomp_code_model_small in
+ X) erl_code_model_small=no;;
+ Xyes|Xno) erl_code_model_small=$erl_xcomp_code_model_small;;
+ *) AC_MSG_ERROR([Bad erl_xcomp_code_model_small value: $erl_xcomp_code_model_small]);;
+ esac])
AC_MSG_RESULT([$erl_code_model_small])
LDFLAGS="$saved_LDFLAGS"
case $erl_code_model_small in
diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c
index 605a2b3461..44655ad5df 100644
--- a/erts/emulator/beam/erl_async.c
+++ b/erts/emulator/beam/erl_async.c
@@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q,
case ERTS_THR_Q_NEED_THR_PRGR:
{
ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q);
- erts_thr_progress_wakeup(NULL, prgr);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr);
/*
* We do no dequeue finalizing in hope that a new async
* job will arrive before we are woken due to thread
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index a793b34852..aaf262780f 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -871,7 +871,8 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2)
Eterm list;
Eterm item;
int non_immed_key;
- int max_iter = 10 * CONTEXT_REDS;
+ int reds_left = ERTS_BIF_REDS_LEFT(BIF_P);
+ int max_iter = 16 * reds_left;
if (is_nil(BIF_ARG_2)) {
BIF_RET(am_false);
@@ -889,14 +890,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2)
}
item = CAR(list_val(list));
if ((item == term) || (non_immed_key && eq(item, term))) {
- BIF_RET2(am_true, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_true, reds_left - max_iter/16);
}
list = CDR(list_val(list));
}
if (is_not_nil(list)) {
+ BUMP_REDS(BIF_P, reds_left - max_iter/16);
BIF_ERROR(BIF_P, BADARG);
}
- BIF_RET2(am_false, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_false, reds_left - max_iter/16);
}
static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c
index 6ec6f8065e..ef7a55fa38 100644
--- a/erts/emulator/beam/erl_hl_timer.c
+++ b/erts/emulator/beam/erl_hl_timer.c
@@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo)
check_canceled_queue(esdp, esdp->timer_service);
- timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
-
- create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
- ? create_tw_timer
- : create_hl_timer);
- tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
- (void *) c_prt, c_prt->common.id,
- THE_NON_VALUE, NULL, NULL, NULL);
- erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ if (tmo == 0) {
+ erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT);
+ erts_port_task_schedule(c_prt->common.id,
+ &c_prt->timeout_task,
+ ERTS_PORT_TASK_TIMEOUT);
+ } else {
+
+ timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
+
+ create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
+ ? create_tw_timer
+ : create_hl_timer);
+ tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
+ (void *) c_prt, c_prt->common.id,
+ THE_NON_VALUE, NULL, NULL, NULL);
+ erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ }
}
void
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index f687dcf335..99e788c718 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -2359,8 +2359,8 @@ system_cleanup(int flush_async)
* The exiting thread might be waiting for
* us to block; need to update status...
*/
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(erts_thr_prgr_data(NULL), 0);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
}
/* Wait forever... */
while (1)
diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h
index 2be0a5bf74..25976d38cc 100644
--- a/erts/emulator/beam/erl_port.h
+++ b/erts/emulator/beam/erl_port.h
@@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p);
#define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11))
/* Last port to terminate halts the emulator */
#define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12))
+/* Check if the event in ready_input should be cleaned */
+#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13))
#ifdef DEBUG
/* Only debug: make sure all flags aren't cleared unintentionally */
#define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31))
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 4928d80f27..c8f2e88127 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
typedef union {
struct { /* I/O tasks */
ErlDrvEvent event;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ int is_scheduler_event;
+#endif
} io;
struct {
ErtsProc2PortSigCallback callback;
@@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ {
ErtsPortTaskBusyCaller pre_alloc_busy_caller;
};
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
static void begin_port_cleanup(Port *pp,
ErtsPortTask **execq,
@@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
-reset_executed_io_task_handle(ErtsPortTask *ptp)
+reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp)
{
if (ptp->u.alive.handle) {
ASSERT(ptp == handle2task(ptp->u.alive.handle));
- /* The port task handle is reset inside task_executed */
- erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
- reset_port_task_handle);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) {
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
+ } else {
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+ } else
+#endif
+ {
+ /* The port task handle is reset inside task_executed */
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ }
}
}
@@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp)
res = - 1; /* Task already aborted, executing, or executed */
else {
reset_port_task_handle(pthp);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ switch (ptp->type) {
+ case ERTS_PORT_TASK_INPUT:
+ case ERTS_PORT_TASK_OUTPUT:
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ ASSERT(erts_atomic_read_nob(
+ &erts_port_task_outstanding_io_tasks) > 0);
+ erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks);
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+
res = 0;
}
}
@@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id,
va_list argp;
va_start(argp, type);
ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int);
+#endif
va_end(argp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks);
+#endif
break;
}
case ERTS_PORT_TASK_PROC_SIG: {
@@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int processing_busy_q;
int vreds = 0;
int reds = 0;
- erts_aint_t io_tasks_executed = 0;
int fpe_was_unmasked;
erts_aint32_t state;
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_aint_t io_tasks_executed = 0;
+#endif
ERTS_MSACC_PUSH_STATE_M();
ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq));
@@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_OUTPUT:
reds = ERTS_PORT_REDS_OUTPUT;
@@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
LTTNG_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
@@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
erts_unblock_fpe(fpe_was_unmasked);
ERTS_MSACC_POP_STATE_M();
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (io_tasks_executed) {
+ ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
+ >= io_tasks_executed);
+ erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks,
+ -1*io_tasks_executed);
+ }
+#endif
+
ASSERT(runq == erts_get_runq_port(pp));
active = finalize_exec(pp, &execq, processing_busy_q);
@@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq)
void
erts_port_task_init(void)
{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks,
+ (erts_aint_t) 0);
+#endif
init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads
+ 1); /* aux_thread */
init_busy_caller_table_alloc();
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index ae78a7d8a3..ca5183b305 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#ifndef ERL_PORT_TASK_H__
#define ERL_PORT_TASK_H__
+#include "erl_poll.h"
+
#undef ERTS_INCLUDE_SCHEDULER_INTERNALS
#if (defined(ERL_PROCESS_C__) \
|| defined(ERL_PORT_TASK_C__) \
@@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#define ERTS_PT_FLG_BAD_OUTPUT (1 << 4)
typedef enum {
- ERTS_PORT_TASK_INPUT,
- ERTS_PORT_TASK_OUTPUT,
+ ERTS_PORT_TASK_INPUT = 0,
+ ERTS_PORT_TASK_OUTPUT = 1,
ERTS_PORT_TASK_TIMEOUT,
ERTS_PORT_TASK_DIST_CMD,
ERTS_PORT_TASK_PROC_SIG
@@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp);
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void);
+/* NOTE: Do not access any of the exported variables directly */
+extern erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp)
erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING);
}
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int
+erts_port_task_have_outstanding_io_tasks(void)
+{
+ return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks)
+ != 0);
+}
+#endif
+
#endif
#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 0f7f1598fd..2427d87f66 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
typedef struct {
int aux_work;
int tse;
- int sys_schedule;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
@@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static erts_atomic32_t doing_sys_schedule;
+#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
@@ -1646,7 +1648,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
}
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1656,7 +1658,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) {
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1670,7 +1672,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val,
else {
awdp->latest_wakeup = val;
awdp->later_op.size = thr_prgr_later_cleanup_op_threshold;
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
}
@@ -3066,6 +3068,7 @@ aux_thread(void *unused)
ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1);
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
+ ErtsThrPrgrData *tpd;
int thr_prgr_active = 1;
ERTS_MSACC_DECLARE_CACHE();
@@ -3087,12 +3090,16 @@ aux_thread(void *unused)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
#if ERTS_POLL_USE_FALLBACK
- ssi->psi = erts_create_pollset_thread(-1);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ssi->psi = erts_create_pollset_thread(-2, tpd);
+#else
+ ssi->psi = erts_create_pollset_thread(-1, tpd);
+#endif
#endif
sched_prep_spin_wait(ssi);
@@ -3105,11 +3112,11 @@ aux_thread(void *unused)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
@@ -3120,7 +3127,7 @@ aux_thread(void *unused)
#endif
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
#if ERTS_POLL_USE_FALLBACK
@@ -3132,11 +3139,11 @@ aux_thread(void *unused)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(ssi->psi);
+ erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT);
}
}
#else
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
flgs = sched_spin_wait(ssi, 0);
@@ -3153,7 +3160,7 @@ aux_thread(void *unused)
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER);
}
}
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
#endif
}
@@ -3171,7 +3178,8 @@ poll_thread(void *arg)
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
int thr_prgr_active = 1;
- struct erts_poll_thread *psi = erts_create_pollset_thread(id);
+ struct erts_poll_thread *psi;
+ ErtsThrPrgrData *tpd;
ERTS_MSACC_DECLARE_CACHE();
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -3192,9 +3200,12 @@ poll_thread(void *arg)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
+
+ psi = erts_create_pollset_thread(id, tpd);
+
ssi->psi = psi;
sched_prep_spin_wait(ssi);
@@ -3207,16 +3218,16 @@ poll_thread(void *arg)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
flgs = sched_spin_wait(ssi, 0);
@@ -3226,7 +3237,7 @@ poll_thread(void *arg)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(psi);
+ erts_check_io(psi, ERTS_POLL_INF_TIMEOUT);
}
}
}
@@ -3236,6 +3247,59 @@ poll_thread(void *arg)
return NULL;
}
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ERTS_INLINE void
+clear_sys_scheduling(void)
+{
+ erts_atomic32_set_mb(&doing_sys_schedule, 0);
+}
+
+static ERTS_INLINE int
+try_set_sys_scheduling(void)
+{
+ return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0);
+}
+
+
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
+}
+
+static void
+check_io_timer(void *null)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (prepare_for_sys_schedule()) {
+ erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT);
+ clear_sys_scheduling();
+ }
+
+ /* The timer is cleared if this schedulers run-queue became empty
+ or if the CHECKIO flag was cleared. The CHECKIO flags is cleared
+ when a check_balance assigns another scheduler to be the poller in
+ the overload scenario. */
+ if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == ERTS_RUNQ_FLG_CHECKIO) {
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT,
+ check_io_timer, NULL);
+ } else {
+ ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO);
+ }
+}
+
+#else
+#define clear_sys_scheduling()
+#define prepare_for_sys_schedule() 0
+#endif
+
static void
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
@@ -3286,13 +3350,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
}
if (aux_work) {
@@ -3301,7 +3365,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3321,19 +3385,36 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
if (do_timeout) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
- else {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) {
+ /* We sleep in check_io, only for normal schedulers */
+ if (thr_prgr_active) {
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ flgs = sched_spin_wait(ssi, 0);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ erts_check_io(ssi->psi, timeout_time);
+ current_time = erts_get_monotonic_time(esdp);
+ }
+ }
+ clear_sys_scheduling();
+ } else {
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(esdp);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp));
}
-
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
@@ -3363,7 +3444,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp))
- erts_thr_progress_finalize_wait(esdp);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp));
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time)
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3392,7 +3473,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (ERTS_SCHEDULER_IS_DIRTY(esdp))
dirty_sched_wall_time_change(esdp, working = 1);
else if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
@@ -4580,6 +4661,15 @@ check_balance(ErtsRunQueue *c_rq)
if (blnc_no_rqs == 1) {
c_rq->check_balance_reds = INT_MAX;
erts_atomic32_set_nob(&balance_info.checking_balance, 0);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS;
+ if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == 0) {
+ ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO);
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO);
+#endif
return;
}
@@ -5099,6 +5189,19 @@ erts_fprintf(stderr, "--------------------------------\n");
/* Publish new migration paths... */
erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (full_scheds == current_active) {
+ ERTS_ASSERT(full_scheds <= current_active);
+ /* All active schedulers ran for full, we need to do active polling,
+ so we setup a timer that does active polling */
+ if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) {
+ /* Active polling is not running, start it */
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO;
+ }
+#endif
+
/* Reset balance statistics in all online queues */
for (qix = 0; qix < blnc_no_rqs; qix++) {
Uint32 flags = run_queue_info[qix].flags;
@@ -5108,6 +5211,8 @@ erts_fprintf(stderr, "--------------------------------\n");
ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK));
if (rq->waiting)
flags |= ERTS_RUNQ_FLG_OUT_OF_WORK;
+ if (rq != c_rq)
+ flags &= ~ERTS_RUNQ_FLG_CHECKIO;
rq->full_reds_history_sum
= run_queue_info[qix].full_reds_history_sum;
@@ -5117,8 +5222,7 @@ erts_fprintf(stderr, "--------------------------------\n");
ERTS_DBG_CHK_FULL_REDS_HISTORY(rq);
rq->out_of_work_count = 0;
- (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
-
+ (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags);
rq->max_len = erts_atomic32_read_dirty(&rq->len);
for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
ErtsRunQueueInfo *rqi;
@@ -5557,7 +5661,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str)
return EINVAL;
}
- params->sys_schedule = sys_sched;
params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
params->aux_work = sys_sched * aux_work_fact;
@@ -5768,6 +5871,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th
size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic32_init_nob(&doing_sys_schedule, 0);
+#endif
erts_atomic32_init_nob(&no_empty_run_queues, 0);
erts_no_run_queues = n;
@@ -7565,7 +7671,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (aux_work|evacuate) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp),
+ thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
if (aux_work)
@@ -7573,8 +7680,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
aux_work,
1);
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (evacuate) {
erts_runq_lock(esdp->run_queue);
evacuate_run_queue(esdp->run_queue, &sbp);
@@ -7593,18 +7700,18 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (!aux_work && current_time < timeout_time) {
/* go to sleep... */
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
suspend_normal_scheduler_sleep(esdp);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL));
current_time = erts_get_monotonic_time(esdp);
}
if (current_time >= timeout_time) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -7661,7 +7768,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
profile_scheduler(make_small(esdp->no), am_active);
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
@@ -8296,6 +8403,11 @@ sched_thread_func(void *vesdp)
erts_msacc_init_thread("scheduler", no, 1);
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ esdp->ssi->psi = erts_create_pollset_thread(-1, NULL);
+#endif
+
erts_alloc_register_scheduler(vesdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
{
@@ -9313,12 +9425,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls)
}
}
- leader_update = erts_thr_progress_update(esdp);
+ leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp));
aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
if (aux_work | leader_update) {
erts_runq_unlock(rq);
if (leader_update)
- erts_thr_progress_leader_update(esdp);
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (aux_work)
handle_aux_work(&esdp->aux_work_data, aux_work, 0);
erts_runq_lock(rq);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 8d20ccdf90..a1b029adbe 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size;
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9))
#define ERTS_RUNQ_FLG_HALTING \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10))
+#define ERTS_RUNQ_FLG_CHECKIO \
+ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11))
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12)
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index b119c59ab3..74cc966cbe 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
erts_sspa_chunk_t *chnk;
erts_sspa_chunk_header_t *chdr;
erts_sspa_blk_t *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC);
chnk = erts_sspa_cix2chunk(data, cix);
chdr = &chnk->aligned.header;
@@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
chdr->local.last = NULL;
ERTS_SSPA_DBG_CHK_LCL(chdr);
}
- if (chdr->local.cnt <= chdr->local.lim)
- return (char *) erts_sspa_process_remote_frees(chdr, res);
+ if (chdr->local.cnt <= chdr->local.lim) {
+ res = erts_sspa_process_remote_frees(chdr, res);
+ ERTS_MSACC_POP_STATE_M_X();
+ return (char*) res;
+ }
else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
chdr->head.no_thr_progress_check++;
ASSERT(res);
+ ERTS_MSACC_POP_STATE_M_X();
return (char *) res;
}
diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c
index aa08eb40ec..bac437efe9 100644
--- a/erts/emulator/beam/erl_thr_progress.c
+++ b/erts/emulator/beam/erl_thr_progress.c
@@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w)
}
}
+ErtsThrPrgrData *erts_thr_progress_data(void) {
+ return erts_tsd_get(erts_thr_prgr_data_key__);
+}
+
void
erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
{
@@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
}
-void
+ErtsThrPrgrData *
erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
ErtsThrPrgrCallbacks *callbacks,
int pref_wakeup)
@@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
wakeup_managed(id);
}
callbacks->finalize_wait(callbacks->arg);
+ return tpd;
}
static ERTS_INLINE int
@@ -796,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd)
== ERTS_THR_PRGR_LFLG_NO_LEADER))
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
}
@@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd)
}
int
-erts_thr_progress_update(ErtsSchedulerData *esdp)
+erts_thr_progress_update(ErtsThrPrgrData *tpd)
{
- return update(thr_prgr_data(esdp));
+ return update(tpd);
}
int
-erts_thr_progress_leader_update(ErtsSchedulerData *esdp)
+erts_thr_progress_leader_update(ErtsThrPrgrData *tpd)
{
- return leader_update(thr_prgr_data(esdp));
+ return leader_update(tpd);
}
void
-erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd)
{
erts_aint32_t lflgs;
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -884,14 +888,13 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
== ERTS_THR_PRGR_LFLG_NO_LEADER
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
void
-erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
ErtsThrPrgrVal current, val;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
}
void
-erts_thr_progress_active(ErtsSchedulerData *esdp, int on)
+erts_thr_progress_active(ErtsThrPrgrData *tpd, int on)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -973,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle)
== (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM)
&& got_sched_wakeups()) {
/* Others waiting for us... */
- wakeup_managed(0);
+ wakeup_managed(1);
}
}
}
@@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value)
}
void
-erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+erts_thr_progress_wakeup(ErtsThrPrgrData *tpd,
ErtsThrPrgrVal value)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+
ASSERT(!tpd->is_temporary);
if (tpd->is_managed)
request_wakeup_managed(tpd, value);
diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h
index 8329995b24..00a9e61407 100644
--- a/erts/emulator/beam/erl_thr_progress.h
+++ b/erts/emulator/beam/erl_thr_progress.h
@@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__;
void erts_thr_progress_pre_init(void);
void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged);
-void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
- ErtsThrPrgrCallbacks *,
- int);
+ErtsThrPrgrData *erts_thr_progress_register_managed_thread(
+ ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int);
void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *);
-void erts_thr_progress_active(ErtsSchedulerData *esdp, int on);
-void erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+void erts_thr_progress_active(ErtsThrPrgrData *, int on);
+void erts_thr_progress_wakeup(ErtsThrPrgrData *,
ErtsThrPrgrVal value);
-int erts_thr_progress_update(ErtsSchedulerData *esdp);
-int erts_thr_progress_leader_update(ErtsSchedulerData *esdp);
-void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp);
-void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp);
+int erts_thr_progress_update(ErtsThrPrgrData *);
+int erts_thr_progress_leader_update(ErtsThrPrgrData *);
+void erts_thr_progress_prepare_wait(ErtsThrPrgrData *);
+void erts_thr_progress_finalize_wait(ErtsThrPrgrData *);
ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void);
void erts_thr_progress_unmanaged_continue__(int umrefc_ix);
+ErtsThrPrgrData *erts_thr_progress_data(void);
void erts_thr_progress_dbg_print_state(void);
+ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp);
+
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc);
@@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE ErtsThrPrgrData *
+erts_thr_prgr_data(ErtsSchedulerData *esdp) {
+ if (esdp) {
+ return &esdp->thr_progress_data;
+ } else {
+ return erts_thr_progress_data();
+ }
+}
+
ERTS_GLB_INLINE ErtsThrPrgrVal
erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc)
{
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index 53a020e7a5..2350d4c02f 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused)
{
ErtsThrPrgrCallbacks callbacks;
ErtsSysMsgQ *local_sys_message_queue = NULL;
+ ErtsThrPrgrData *tpd;
int wait = 0;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused)
callbacks.wait = sys_msg_dispatcher_wait;
callbacks.finalize_wait = sys_msg_dispatcher_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
while (1) {
int end_wait = 0;
@@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused)
if (!sys_message_queue) {
erts_mtx_unlock(&smq_mtx);
end_wait = 1;
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(tpd, 0);
+ erts_thr_progress_prepare_wait(tpd);
erts_mtx_lock(&smq_mtx);
}
@@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused)
erts_mtx_unlock(&smq_mtx);
if (end_wait) {
- erts_thr_progress_finalize_wait(NULL);
- erts_thr_progress_active(NULL, 1);
+ erts_thr_progress_finalize_wait(tpd);
+ erts_thr_progress_active(tpd, 1);
}
/* Send trace messages ... */
@@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused)
Process *proc = NULL;
Port *port = NULL;
- if (erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
#ifdef DEBUG_PRINTOUTS
print_msg_type(smqp);
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 4c1ab90c01..47eb5df7dd 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -38,6 +38,7 @@
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
+#include <stdint.h>
#define IDENTITY(c) c
#define STRINGIFY_1(b) IDENTITY(#b)
@@ -955,6 +956,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#endif
#endif
+typedef struct _tcp_descriptor tcp_descriptor;
#if defined(TCP_CORK)
#define INET_TCP_NOPUSH TCP_CORK
@@ -1010,16 +1012,19 @@ typedef struct _multi_timer_data {
struct _multi_timer_data *prev;
} MultiTimerData;
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
- ErlDrvTermData caller, unsigned timeout,
- void (*timeout_fun)(ErlDrvData drv_data,
- ErlDrvTermData caller));
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ ErlDrvTermData caller, unsigned timeout,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data);
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p);
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port);
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
typedef struct {
int id; /* id used to identify reply */
@@ -1278,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
};
#endif
-typedef struct {
+struct _tcp_descriptor {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
int high; /* high watermark */
int low; /* low watermark */
@@ -1294,7 +1299,8 @@ typedef struct {
int http_state; /* 0 = response|request 1=headers fields */
inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
inet_async_multi_op *multi_last;
- MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd_cache; /* A cache for timer allocations */
#ifdef HAVE_SENDFILE
struct {
ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time
@@ -1310,7 +1316,7 @@ typedef struct {
Uint64 length;
} sendfile;
#endif
-} tcp_descriptor;
+};
/* send function */
static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
@@ -9775,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
desc->tcp_add_flags = 0;
desc->http_state = 0;
desc->mtd = NULL;
+ desc->mtd_cache = NULL;
desc->multi_first = desc->multi_last = NULL;
DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
return (ErlDrvData) desc;
@@ -9878,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc)
driver_demonitor_process(desc->inet.port, &monitor);
send_async_error(desc->inet.dport, id, caller, am_closed);
}
- clean_multi_timers(&(desc->mtd), desc->inet.port);
}
-
else if (desc->inet.state == INET_STATE_CONNECTING) {
async_error_am(INETP(desc), am_closed);
}
else if (desc->inet.state == INET_STATE_CONNECTED) {
async_error_am_all(INETP(desc), am_closed);
}
+ clean_multi_timers(desc, desc->inet.port);
}
/*
@@ -9929,6 +9935,15 @@ static void tcp_desc_close(tcp_descriptor* desc)
erl_inet_close(INETP(desc));
}
+static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(!desc->inet.active);
+ sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
+ desc->i_remain = 0;
+ async_error_am(INETP(desc), am_timeout);
+}
+
/* TCP requests from Erlang */
static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
char* buf, ErlDrvSizeT len,
@@ -10099,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (time_left <= 0) {
time_left = 1;
}
- omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller,
+ omtd = add_multi_timer(desc, desc->inet.port, ocaller,
time_left, &tcp_inet_multi_timeout);
}
enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10119,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_xerror("noproc", rbuf, rsize);
}
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10216,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
async_error_am(INETP(desc), am_timeout);
else {
if (timeout != INET_INFINITY)
- driver_set_timer(desc->inet.port, timeout);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ timeout, &tcp_inet_recv_timeout);
if (!INETP(desc)->is_ignored)
sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
else
@@ -10321,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
}
+static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(IS_BUSY(INETP(desc)));
+ ASSERT(desc->busy_on_send);
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+ desc->busy_on_send = 0;
+ set_busy_port(desc->inet.port, 0);
+ inet_reply_error_am(INETP(desc), am_timeout);
+ if (desc->send_timeout_close) {
+ tcp_desc_close(desc);
+ }
+}
+
/*
** tcp_inet_timeout:
** called when timer expire:
** TCP socket may be:
**
-** a) receiving -- deselect
+** a) receiving -- send timeout
** b) connecting -- close socket
** c) accepting -- reset listener
**
@@ -10340,26 +10371,9 @@ static void tcp_inet_timeout(ErlDrvData e)
DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
- fire_multi_timers(&(desc->mtd), desc->inet.port, e);
+ fire_multi_timers(desc, desc->inet.port, e);
} else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
- if (desc->busy_on_send) {
- ASSERT(IS_BUSY(INETP(desc)));
- desc->inet.caller = desc->inet.busy_caller;
- desc->inet.state &= ~INET_F_BUSY;
- desc->busy_on_send = 0;
- set_busy_port(desc->inet.port, 0);
- inet_reply_error_am(INETP(desc), am_timeout);
- if (desc->send_timeout_close) {
- tcp_desc_close(desc);
- }
- }
- else {
- /* assume recv timeout */
- ASSERT(!desc->inet.active);
- sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
- desc->i_remain = 0;
- async_error_am(INETP(desc), am_timeout);
- }
+ fire_multi_timers(desc, desc->inet.port, e);
}
else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
/* assume connect timeout */
@@ -10489,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
return;
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
if (desc->multi_first == NULL) {
sock_select(INETP(desc),FD_ACCEPT,0);
@@ -10528,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
}
@@ -10551,9 +10565,10 @@ static int tcp_recv_closed(tcp_descriptor* desc)
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+
if (!desc->inet.active) {
- /* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ /* We must cancel any timer here ! */
+ clean_multi_timers(desc, INETP(desc)->port);
/* passive mode do not terminate port ! */
tcp_clear_input(desc);
if (desc->inet.exitf) {
@@ -10588,7 +10603,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
@@ -10602,7 +10617,7 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
#endif
if (!desc->inet.active) {
/* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ clean_multi_timers(desc, INETP(desc)->port);
tcp_clear_input(desc);
if (desc->inet.exitf) {
tcp_desc_close(desc);
@@ -10707,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
if (len == 0) {
/* empty buffer or waiting for more input */
if ((desc->i_buf == NULL) || (desc->i_remain > 0))
- return count;
+ return 0;
if ((n = tcp_remain(desc, &len)) != 0) {
if (n < 0) /* packet error */
return n;
if (len > 0) /* more data pending */
desc->i_remain = len;
- return count;
+ return 0;
}
}
@@ -10765,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
len = 0;
if (!desc->inet.active) {
- if (!desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- }
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
if (desc->i_buf != NULL)
tcp_restart_input(desc);
@@ -10793,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
int len;
int nread;
- if (desc->i_buf == NULL) { /* allocte a read buffer */
+ if (desc->i_buf == NULL) { /* allocate a read buffer */
int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
if ((desc->i_buf = alloc_buffer(sz)) == NULL)
@@ -10866,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
}
else {
- if ((nread = tcp_remain(desc, &len)) < 0)
+ nread = tcp_remain(desc, &len);
+ if (nread < 0)
return tcp_recv_error(desc, EMSGSIZE);
else if (nread == 0)
- return tcp_deliver(desc, len);
+ return tcp_deliver(desc, len);
else if (len > 0)
desc->i_remain = len; /* set remain */
}
@@ -11188,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
driver_demonitor_process(desc->inet.port, &monitor);
@@ -11247,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
if (IS_BUSY(INETP(desc))) {
desc->inet.caller = desc->inet.busy_caller;
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- desc->busy_on_send = 0;
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
@@ -11353,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err)
return tcp_send_or_shutdown_error(desc, err);
}
+static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
+{
+ tcp_descriptor *desc = (tcp_descriptor*)data;
+ (void)tcp_inet_output(desc, INETP(desc)->s);
+}
+
/*
** Send non-blocking vector data
*/
@@ -11405,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11420,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
- n = 0;
+ driver_enqv(ix, ev, 0);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ 0, &tcp_inet_delay_send);
+ return 0;
} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
vsize, &n, 0))) {
if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
@@ -11503,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11607,7 +11634,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) {
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port,
+ &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
@@ -11914,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifdef __WIN32__
desc->inet.send_would_block = 1;
#endif
+ /* If DELAY_SEND is set ready_output may have
+ been called without doing select so we do
+ a select in order to get into the correct
+ state */
+ if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
+ sock_select(INETP(desc), FD_WRITE, 1);
goto done;
} else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
0 when sending packets with
@@ -11939,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
set_busy_port(desc->inet.port, 0);
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
inet_reply_ok(INETP(desc));
@@ -12751,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = NULL;
if (!desc->active) {
async_error(desc, err);
- driver_cancel_timer(desc->port);
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
}
else {
@@ -12840,7 +12874,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
return count;
count++;
if (!desc->active) {
- driver_cancel_timer(desc->port); /* possibly cancel */
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
return count; /* passive mode (read one packet only) */
}
@@ -12919,55 +12953,69 @@ make_noninheritable_handle(SOCKET s)
* Multi-timers
*/
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data)
{
ErlDrvTime next_timeout;
- if (!*first) {
+ MultiTimerData *curr = desc->mtd;
+ if (!curr) {
ASSERT(0);
return;
}
#ifdef DEBUG
{
ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
- ASSERT(chk >= (*first)->when);
+ ASSERT(chk >= curr->when);
}
#endif
do {
- MultiTimerData *save = *first;
- *first = save->next;
+ MultiTimerData *save = curr;
+
(*(save->timeout_function))(data,save->caller);
- FREE(save);
- if (*first == NULL) {
+
+ curr = curr->next;
+
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = save;
+ else
+ FREE(save);
+
+ if (curr == NULL) {
+ desc->mtd = NULL;
return;
}
- (*first)->prev = NULL;
- next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ curr->prev = NULL;
+ next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
} while (next_timeout <= 0);
+ desc->mtd = curr;
driver_set_timer(port, (unsigned long) next_timeout);
}
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port)
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
{
- MultiTimerData *p;
- if (*first) {
+ if (desc->mtd) {
driver_cancel_timer(port);
}
- while (*first) {
- p = *first;
- *first = p->next;
- FREE(p);
+ while (desc->mtd) {
+ MultiTimerData *p = desc->mtd;
+ desc->mtd = p->next;
+ FREE(p);
+ }
+ desc->mtd = NULL;
+ if (desc->mtd_cache) {
+ FREE(desc->mtd_cache);
+ desc->mtd_cache = NULL;
}
}
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p)
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
{
if (p->prev != NULL) {
p->prev->next = p->next;
} else {
driver_cancel_timer(port);
- *first = p->next;
- if (*first) {
- ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ desc->mtd = p->next;
+ if (desc->mtd) {
+ ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
if (ntmo < 0)
ntmo = 0;
driver_set_timer(port, (unsigned long) ntmo);
@@ -12976,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim
if (p->next != NULL) {
p->next->prev = p->prev;
}
- FREE(p);
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = p;
+ else
+ FREE(p);
}
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
+/* Cancel a timer based on the timeout_fun */
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller))
+{
+ MultiTimerData *timer = desc->mtd;
+ while(timer && timer->timeout_function != timeout_fun) {
+ timer = timer->next;
+ }
+ if (timer) {
+ remove_multi_timer(desc, port, timer);
+ }
+}
+
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvTermData caller, unsigned timeout,
void (*timeout_fun)(ErlDrvData drv_data,
ErlDrvTermData caller))
{
MultiTimerData *mtd, *p, *s;
- mtd = ALLOC(sizeof(MultiTimerData));
- mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1;
+
+ /* Use cached timer if available */
+ if (desc->mtd_cache != NULL) {
+ mtd = desc->mtd_cache;
+ desc->mtd_cache = NULL;
+ } else
+ mtd = ALLOC(sizeof(MultiTimerData));
+
+ if (timeout)
+ mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
+ else
+ mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
+
mtd->timeout_function = timeout_fun;
mtd->caller = caller;
mtd->next = mtd->prev = NULL;
- for(p = *first,s = NULL; p != NULL; s = p, p = p->next) {
+
+ /* Find correct slot in timer linked list */
+ for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
if (p->when >= mtd->when) {
break;
}
}
+ /* Insert in linked list */
if (!p) {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
}
} else {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
@@ -13013,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
mtd->next = p;
p->prev = mtd;
}
+ /* Possibly set new timer */
if (!s) {
- if (mtd->next) {
- driver_cancel_timer(port);
- }
driver_set_timer(port,timeout);
}
return mtd;
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 9f115706dc..c681fa481f 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -46,11 +46,11 @@
#if 0
#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__)
#define DEBUG_PRINT_FD(FMT, STATE, ...) \
- DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \
+ DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \
(STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \
ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
- (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR)
+ (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR)
#define DEBUG_PRINT_MODE
#else
#define DEBUG_PRINT(...)
@@ -76,22 +76,40 @@ typedef enum {
typedef enum {
ERTS_EV_FLAG_CLEAR = 0,
ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */
-#ifdef ERTS_ENABLE_KERNEL_POLL
- ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated
+ to scheduler pollset */
+ ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in
+ scheduler pollset */
+#else
+ ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+ ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+#endif
+#ifdef ERTS_POLL_USE_FALLBACK
+ ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd
and it was put in the nkp version */
#else
ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR,
#endif
/* Combinations */
- ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK
+ ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK,
+ ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER
} EventStateFlags;
#define flag2str(flags) \
((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \
((flags) == ERTS_EV_FLAG_USED ? "USED" : \
((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \
- ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR"))))
+ ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \
+ ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \
+ ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \
+ "ERROR"))))))))
/* How many events that can be handled at once by one erts_poll_wait call */
#define ERTS_CHECK_IO_POLL_RES_LEN 512
@@ -105,6 +123,7 @@ typedef struct erts_poll_thread
{
ErtsPollSet *ps;
ErtsPollResFd *pollres;
+ ErtsThrPrgrData *tpd;
int pollres_len;
} ErtsPollThread;
@@ -112,10 +131,13 @@ typedef struct erts_poll_thread
* Which pollset to use is determined by hashing the fd.
*/
static ErtsPollSet **pollsetv;
+static ErtsPollThread *psiv;
#if ERTS_POLL_USE_FALLBACK
static ErtsPollSet *flbk_pollset;
#endif
-static ErtsPollThread *psiv;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ErtsPollSet *sched_pollset;
+#endif
typedef struct {
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
@@ -130,10 +152,12 @@ typedef struct {
ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */
} stop;
} driver;
- ErtsPollEvents events; /* The events that have been selected upon */
+ ErtsPollEvents events; /* The events that have been selected upon */
ErtsPollEvents active_events; /* The events currently active in the pollset */
EventStateType type;
EventStateFlags flags;
+ int count; /* Number of times this fd has triggered
+ without being deselected. */
} ErtsDrvEventState;
struct drv_ev_state_shared {
@@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd)
#if ERTS_POLL_USE_FALLBACK
static ERTS_INLINE ErtsPollSet *
-get_fallback(void)
+get_fallback_pollset(void)
{
return flbk_pollset;
}
#endif
+static ERTS_INLINE ErtsPollSet *
+get_scheduler_pollset(ErtsSysFdType fd)
+{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ return sched_pollset;
+#else
+ return get_pollset(fd);
+#endif
+}
+
/*
* Place a fd within a pollset. This will automatically use
* the fallback ps if needed.
@@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
- res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+
+ if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) {
+ erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
+ if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) {
+ res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+ } else {
+ res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ }
#if ERTS_POLL_USE_FALLBACK
if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
/* When an add fails with NVAL, the poll/kevent operation could not
put that fd in the pollset, so we instead put it into a fallback pollset */
state->flags |= ERTS_EV_FLAG_FALLBACK;
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
}
} else {
ASSERT(op != ERTS_POLL_OP_ADD);
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
#endif
}
@@ -425,59 +468,77 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
ErtsSysFdType fd = itp->fd;
erts_mtx_t *mtx = fd_mtx(fd);
- int active_events;
+ ErtsPollOp op = ERTS_POLL_OP_MOD;
+ int active_events, new_events = 0;
ErtsDrvEventState *state;
ErtsDrvSelectDataState *free_select = NULL;
ErtsNifSelectDataState *free_nif = NULL;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO);
+
erts_mtx_lock(mtx);
state = get_drv_ev_state(fd);
+ reset_handle(pthp);
+
active_events = state->active_events;
- switch (type) {
- case ERTS_PORT_TASK_INPUT:
+ if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) {
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+
+ DEBUG_PRINT_FD("executed ready_input", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
+ if (state->events & ERTS_POLL_EV_IN) {
+ active_events |= ERTS_POLL_EV_IN;
+ if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) {
+ if (!(state->flags & ERTS_EV_FLAG_SCHEDULER))
+ op = ERTS_POLL_OP_ADD;
+ state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER;
+ new_events = ERTS_POLL_EV_IN;
+ DEBUG_PRINT_FD("moving to scheduler ps", state);
+ } else
+ new_events = active_events;
+ if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING)
+ state->count++;
+ }
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
- DEBUG_PRINT_FD("executed ready_input", state);
+ DEBUG_PRINT_FD("executed ready_output", state);
- ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
- if (state->events & ERTS_POLL_EV_IN)
- active_events |= ERTS_POLL_EV_IN;
- break;
- case ERTS_PORT_TASK_OUTPUT:
+ ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
+ if (state->events & ERTS_POLL_EV_OUT) {
+ active_events |= ERTS_POLL_EV_OUT;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN)
+ new_events = ERTS_POLL_EV_OUT;
+ else
+ new_events = active_events;
+ }
+ break;
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
+ break;
+ }
- DEBUG_PRINT_FD("executed ready_output", state);
+ if (state->active_events != active_events && new_events) {
+ state->active_events = active_events;
+ new_events = erts_io_control(state, op, new_events);
+ }
- ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
- if (state->events & ERTS_POLL_EV_OUT)
- active_events |= ERTS_POLL_EV_OUT;
- break;
- default:
- erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
- break;
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (state->active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
}
- reset_handle(pthp);
-
- if (active_events) {
- /* This is not needed if active_events has not changed */
- if (state->active_events != active_events) {
- ErtsPollEvents new_events;
- state->active_events = active_events;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events);
-
- /* We were unable to re-insert the fd into the pollset, signal the callback. */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- if (active_events & ERTS_POLL_EV_IN)
- iready(state->driver.select->inport, state);
- if (active_events & ERTS_POLL_EV_OUT)
- oready(state->driver.select->outport, state);
- state->active_events = 0;
- }
- }
- } else {
+ if (!active_events)
check_fd_cleanup(state, &free_select, &free_nif);
- }
erts_mtx_unlock(mtx);
@@ -485,6 +546,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
free_drv_select_data(free_select);
if (free_nif)
free_nif_select_data(free_nif);
+
+ ERTS_MSACC_POP_STATE_M_X();
}
static ERTS_INLINE void
@@ -755,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
ctl_op = ERTS_POLL_OP_ADD;
}
+ new_events = state->active_events;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ new_events &= ~ERTS_POLL_EV_IN;
}
else {
ctl_events &= old_events;
state->events &= ~ctl_events;
state->active_events &= ~ctl_events;
+ new_events = state->active_events;
+
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ state->count = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ new_events = 0;
+ }
+ }
if (!state->events) {
if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
@@ -770,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
new_events = erts_io_control_wakeup(state, ctl_op,
- state->active_events,
+ new_events,
&wake_poller);
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
@@ -802,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events & ERTS_POLL_EV_IN) {
abort_tasks(state, ERL_DRV_READ);
state->driver.select->inport = NIL;
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
}
if (ctl_events & ERTS_POLL_EV_OUT) {
abort_tasks(state, ERL_DRV_WRITE);
@@ -810,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (state->events == 0) {
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
+ if (state->flags & ERTS_EV_FLAG_SCHEDULER)
+ erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
state->flags = 0;
}
/*else keep it, as fd will probably be selected upon again */
@@ -1426,7 +1503,8 @@ iready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) {
stale_drv_select(id, state, ERL_DRV_READ);
} else {
DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
@@ -1444,7 +1522,8 @@ oready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ 0) != 0) {
stale_drv_select(id, state, ERL_DRV_WRITE);
} else {
DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
@@ -1506,7 +1585,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
{
if (psi) {
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
erts_poll_interrupt_flbk(psi->ps, set);
return;
}
@@ -1516,12 +1595,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
}
ErtsPollThread *
-erts_create_pollset_thread(int id) {
+erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) {
+ psiv[id].tpd = tpd;
return psiv+id;
}
void
-erts_check_io(ErtsPollThread *psi)
+erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time)
{
int pollres_len;
int poll_ret, i;
@@ -1536,14 +1616,14 @@ erts_check_io(ErtsPollThread *psi)
pollres_len = psi->pollres_len;
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
- poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
} else
#endif
{
- poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
}
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -1579,7 +1659,12 @@ erts_check_io(ErtsPollThread *psi)
ErtsNifSelectDataState *free_nif = NULL;
ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
ErtsDrvEventState *state;
- ErtsPollEvents revents;
+ ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
+
+ /* The fd will be set to -1 if a pollset internal fd was triggered
+ that was determined to be too expensive to remove from the result.
+ */
+ if (fd == -1) continue;
erts_mtx_lock(fd_mtx(fd));
@@ -1590,8 +1675,6 @@ erts_check_io(ErtsPollThread *psi)
continue;
}
- revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
-
DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
if (revents & ERTS_POLL_EV_ERR) {
@@ -1603,25 +1686,39 @@ erts_check_io(ErtsPollThread *psi)
*/
revents = state->active_events;
state->active_events = 0;
+
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ erts_io_control(state, ERTS_POLL_OP_MOD, 0);
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
} else {
/* Disregard any events that are not active at the moment,
for instance this could happen if the driver/nif does
select/deselect in rapid succession. */
revents &= state->active_events | ERTS_POLL_EV_NVAL;
- state->active_events &= ~revents;
- /* Reactivate the poll op if there are still active events */
- if (state->active_events) {
- ErtsPollEvents new_events;
- DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events));
+ if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) {
+ ErtsPollEvents reactive_events;
+ state->active_events &= ~revents;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+ reactive_events = state->active_events;
- /* Unable to re-enable the fd, signal all callbacks */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- revents |= state->active_events;
- state->active_events = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ reactive_events &= ~ERTS_POLL_EV_IN;
+
+ /* Reactivate the poll op if there are still active events */
+ if (reactive_events) {
+ ErtsPollEvents new_events;
+ DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events));
+
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events);
+
+ /* Unable to re-enable the fd, signal all callbacks */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ revents |= reactive_events;
+ state->active_events &= ~reactive_events;
+ }
}
}
}
@@ -1697,7 +1794,7 @@ erts_check_io(ErtsPollThread *psi)
case ERTS_EV_TYPE_STOP_USE: {
#if ERTS_POLL_USE_FALLBACK
- ASSERT(psi->ps == get_fallback());
+ ASSERT(psi->ps == get_fallback_pollset());
#endif
drv_ptr = state->driver.stop.drv_ptr;
state->type = ERTS_EV_TYPE_NONE;
@@ -2035,12 +2132,17 @@ erts_init_check_io(int *argc, char **argv)
for (j=0; j < erts_no_pollsets; j++)
pollsetv[j] = erts_poll_create_pollset(j);
-#if ERTS_POLL_USE_FALLBACK
- flbk_pollset = erts_poll_create_pollset_flbk(-1);
+ no_poll_threads = erts_no_poll_threads;
+
+ j = -1;
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ sched_pollset = erts_poll_create_pollset(j--);
+ no_poll_threads++;
#endif
- no_poll_threads = erts_no_poll_threads;
#if ERTS_POLL_USE_FALLBACK
+ flbk_pollset = erts_poll_create_pollset_flbk(j--);
no_poll_threads++;
#endif
@@ -2050,7 +2152,15 @@ erts_init_check_io(int *argc, char **argv)
psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
- psiv[0].ps = get_fallback();
+ psiv[0].ps = get_fallback_pollset();
+ psiv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[0].ps = get_scheduler_pollset(0);
psiv++;
#endif
@@ -2107,7 +2217,12 @@ erts_check_io_size(void)
int i;
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info(get_fallback(), &pi);
+ erts_poll_info(get_fallback_pollset(), &pi);
+ res += pi.memory_size;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &pi);
res += pi.memory_size;
#endif
@@ -2139,13 +2254,21 @@ erts_check_io_info(void *proc)
Uint sz, *szp, *hp, **hpp;
ErtsPollInfo *piv;
Sint i, j = 0, len;
- int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK;
+ int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING;
ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
+ ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1);
piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info_flbk(get_fallback(), &piv[0]);
+ erts_poll_info_flbk(get_fallback_pollset(), &piv[0]);
+ piv[0].poll_threads = 1;
+ piv[0].active_fds = 0;
+ piv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &piv[0]);
piv[0].poll_threads = 1;
piv[0].active_fds = 0;
piv++;
@@ -2199,6 +2322,7 @@ erts_check_io_info(void *proc)
sz = 0;
piv -= ERTS_POLL_USE_FALLBACK;
+ piv -= ERTS_POLL_USE_SCHEDULER_POLLING;
bld_it:
@@ -2303,15 +2427,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
static ERTS_INLINE void
print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
{
- const char* delim = "";
- if(f & ERTS_EV_FLAG_USED) {
- erts_dsprintf(dsbufp, "%s","USED");
- delim = "|";
- }
- if(f & ERTS_EV_FLAG_FALLBACK) {
- erts_dsprintf(dsbufp, "%s%s", delim, "FLBK");
- delim = "|";
- }
+ erts_dsprintf(dsbufp, "%s", flag2str(f));
}
#ifdef DEBUG_PRINT_MODE
@@ -2653,13 +2769,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
#if ERTS_POLL_USE_FALLBACK
erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
- erts_poll_get_selected_events_flbk(get_fallback(), counters.epep,
+ erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep,
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
}
#endif
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n");
+ erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep,
+ drv_ev_state.max_fds);
+ for (fd = 0; fd < len; fd++) {
+ if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) {
+ if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE)
+ counters.epep[fd] &= ~ERTS_POLL_EV_OUT;
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
+ }
+#endif
+
erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
for (i = 0; i < erts_no_pollsets; i++) {
@@ -2668,8 +2797,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
- && get_pollset_id(fd) == i)
+ && get_pollset_id(fd) == i) {
+ if (counters.epep[fd] != ERTS_POLL_EV_NONE &&
+ drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ /* We add the in flag if it is enabled in the scheduler pollset
+ and get_selected_events works on the platform */
+ counters.epep[fd] |= ERTS_POLL_EV_IN;
+ }
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
}
}
for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
@@ -2716,7 +2852,7 @@ void erts_lcnt_update_cio_locks(int enable) {
#endif
#if ERTS_POLL_USE_FALLBACK
- erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable);
+ erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable);
#endif
for (i = 0; i < erts_no_pollsets; i++)
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index 443ef1264c..31182be5ec 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -68,7 +68,7 @@ int erts_check_io_max_files(void);
*
* @param pt the poll thread structure to use.
*/
-void erts_check_io(struct erts_poll_thread *pt);
+void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time);
/**
* Initialize the check io framework. This function will parse the arguments
* and delete any entries that it is interested in.
@@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set);
/**
* Create a new poll thread structure that is associated with the number no.
* It is the callers responsibility that no is unique.
+ *
+ * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler
+ * @param tpd the thread progress data of the pollset thread
*/
-struct erts_poll_thread* erts_create_pollset_thread(int no);
+struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd);
#ifdef ERTS_ENABLE_LOCK_COUNT
/**
* Toggle lock counting on all check io locks
@@ -126,16 +129,6 @@ extern int erts_no_poll_threads;
#include "erl_poll.h"
#include "erl_port_task.h"
-#ifdef __WIN32__
-/*
- * Current erts_poll implementation for Windows cannot handle
- * active events in the set of events polled.
- */
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#else
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#endif
-
typedef struct {
Eterm inport;
Eterm outport;
@@ -147,10 +140,6 @@ struct erts_nif_select_event {
Eterm pid;
Eterm immed;
Uint32 refn[ERTS_REF_NUMBERS];
- Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress
- * 1: Do deselect before next poll
- * >1: Countdown of ignored events
- */
};
typedef struct {
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index b4d1575ee5..51d50933ff 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -75,6 +75,7 @@
# define WANT_NONBLOCKING
#endif
+#include "erl_thr_progress.h"
#include "erl_poll.h"
#if ERTS_POLL_USE_KQUEUE
# include <sys/types.h>
@@ -95,7 +96,6 @@
# include <limits.h>
# endif
#endif
-#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
#include "erl_msacc.h"
@@ -121,7 +121,8 @@
/* Define to print info about modifications done to each fd */
#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__)
/* Define to print entry and exit from erts_poll_wait (can be very spammy) */
-//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0)
#else
#define ERTS_POLL_DEBUG_PRINT 0
@@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE)
-#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE)
+#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0)
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
+ int oneshot;
#endif /* ERTS_POLL_USE_KERNEL_POLL */
#if ERTS_POLL_USE_POLL
@@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
erts_atomic32_t have_update_requests;
erts_mtx_t mtx;
- erts_atomic32_t wakeup_state;
+#else
+ int do_wakeup;
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- int wake_fds[2];
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd;
#endif
+ ErtsMonotonicTime timeout_time;
+ erts_atomic32_t wakeup_state;
+ int wake_fds[2];
};
void erts_silence_warn_unused_result(long unused);
@@ -365,63 +371,47 @@ static void print_misc_debug_info(void);
uint32_t epoll_events(int kp_fd, int fd);
#endif
-
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
reset_wakeup_state(ErtsPollSet *ps)
{
erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
}
-#endif
static ERTS_INLINE int
is_woken(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
-#else
- return 0;
-#endif
}
static ERTS_INLINE int
is_interrupted_reset(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
-#else
- return 0;
-#endif
}
static ERTS_INLINE void
woke_up(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
ERTS_POLL_NOT_WOKEN);
ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN);
-#endif
}
/*
* --- Wakeup pipe -----------------------------------------------------------
*/
-#if ERTS_POLL_USE_WAKEUP_PIPE
-
static ERTS_INLINE void
wake_poller(ErtsPollSet *ps, int interrupted)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int wake;
erts_aint32_t wakeup_state;
if (!interrupted)
@@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted)
wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
if (wake)
-#endif
{
ssize_t res;
+ DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted);
if (ps->wake_fds[1] < 0)
return; /* Not initialized yet */
do {
@@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps)
fd,
erl_errno_id(errno), errno);
}
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (intr)
erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
-#endif
}
static void
@@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps)
ps->wake_fds[1] = wake_fds[1];
}
+/*
+ * --- timer fd -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_TIMERFD
+
+/* We use the timerfd when using epoll_wait to get high accuracy
+ timeouts, i.e. we want to sleep with < ms accuracy. */
+
+static void
+create_timerfd(ErtsPollSet *ps)
+{
+ int do_wake = 0;
+ int timer_fd = timerfd_create(CLOCK_MONOTONIC,0);
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ timer_fd,
+ ERTS_POLL_OP_ADD,
+ ERTS_POLL_EV_IN,
+ &do_wake);
+ if (ps->internal_fd_limit <= timer_fd)
+ ps->internal_fd_limit = timer_fd + 1;
+ ps->timer_fd = timer_fd;
+}
+
+static ERTS_INLINE void
+timerfd_set(ErtsPollSet *ps, struct itimerspec *its)
+{
+#ifdef DEBUG
+ struct itimerspec old_its;
+ int res;
+ res = timerfd_settime(ps->timer_fd, 0, its, &old_its);
+ ASSERT(res == 0);
+ ASSERT(old_its.it_interval.tv_sec == 0 &&
+ old_its.it_interval.tv_nsec == 0 &&
+ old_its.it_value.tv_sec == 0 &&
+ old_its.it_value.tv_nsec == 0);
+
+#else
+ timerfd_settime(ps->timer_fd, 0, its, NULL);
#endif
+}
+
+static ERTS_INLINE int
+timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) {
+
+ struct itimerspec its;
+ /* we always have to clear the timer */
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 0;
+ timerfd_settime(ps->timer_fd, 0, &its, NULL);
+
+ /* only timeout fd triggered */
+ if (res == 1 && pr[0].data.fd == ps->timer_fd)
+ return 0;
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_TIMERFD */
/*
* --- Poll set update requests ----------------------------------------------
@@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
struct epoll_event epe_templ;
struct epoll_event epe;
- epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT;
+ epe_templ.events = ERTS_POLL_EV_E2N(events);
epe_templ.data.fd = fd;
+ if (ps->oneshot)
+ epe_templ.events |= EPOLLONESHOT;
+
#ifdef VALGRIND
/* Silence invalid valgrind warning ... */
memset((void *) &epe.data, 0, sizeof(epoll_data_t));
@@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
int res = 0, len = 0;
struct kevent evts[2];
struct timespec ts = {0, 0};
+ uint32_t oneshot = 0;
if (op == ERTS_POLL_OP_ADD) {
/* This is a hack to make the "noshell" option work; kqueue can poll
@@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
man page), but it seems to be the way it works...
*/
+ if (ps->oneshot)
+ oneshot = EV_DISPATCH;
+
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
/* We could probably skip this delete, do we want to? */
@@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
uint32_t flags;
erts_atomic_inc_nob(&ps->no_of_user_fds);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
} else {
uint32_t flags;
ASSERT(op == ERTS_POLL_OP_MOD);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
}
#else
- uint32_t flags = EV_ADD|EV_ONESHOT;
+ uint32_t flags = EV_ADD;
+
+ if (ps->oneshot) flags |= EV_ONESHOT;
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
@@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd);
for (i = 0; i < len; i++) {
const char *flags = "UNKNOWN";
- if (evts[i].flags == EV_DELETE) flags = "EV_DELETE";
+ if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE";
if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT";
+ if (evts[i].flags == (EV_ADD)) flags = "EV_ADD";
#ifdef EV_DISPATCH
if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH";
if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE";
if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH";
- if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE";
+ if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
#endif
keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "",
@@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op,
goto done;
}
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) {
new_events = ERTS_POLL_EV_NVAL;
goto done;
}
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
#endif
}
@@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
ERTS_POLLSET_UNLOCK(ps);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (*do_wake) {
+ if (*do_wake)
wake_poller(ps, 0);
- }
-#endif
return res;
}
@@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
static ERTS_INLINE int
ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE
int n = chk_fds_res < max_res ? chk_fds_res : max_res, i;
int res = n;
-#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
-#endif
- for (i = 0; i < n; i++) {
- int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
-#ifdef DEBUG_PRINT_MODE
- ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
-#endif
+ if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) {
+
+ for (i = 0; i < n; i++) {
+ int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
+#if ERTS_POLL_DEBUG_PRINT
+ ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
- DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
- ev2str(evts),
+ if (fd != wake_fd
+#if ERTS_POLL_USE_TIMERFD
+ && fd != ps->timer_fd
+#endif
+ )
+ DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
+ ev2str(evts),
#if ERTS_POLL_USE_KQUEUE
- "kqueue"
+ "kqueue"
#elif ERTS_POLL_USE_EPOLL
- "epoll"
+ "epoll"
#else
- "/dev/poll"
+ "/dev/poll"
+#endif
+ );
#endif
- );
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
- if (n == 1)
- return 0;
- }
+ if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
+#if ERTS_POLL_USE_TIMERFD
+ else if (fd == ps->timer_fd) {
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
#endif
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- else {
- /* Reset the events to emulate ONESHOT semantics */
- ps->fds_status[fd].events = 0;
- enqueue_update_request(ps, fd);
- }
+ else {
+ /* Reset the events to emulate ONESHOT semantics */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
+ }
#endif
+ }
}
- return res;
-#else
- ASSERT(chk_fds_res <= max_res);
- return chk_fds_res;
-#endif
+ if (res == 0)
+ return res;
+ else
+ return n;
}
#else /* !ERTS_POLL_USE_KERNEL_POLL */
@@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res,
#endif /* !ERTS_POLL_USE_KERNEL_POLL */
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout(ErtsPollSet *ps,
+ int resolution,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout;
+
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ timeout = 0;
+ }
+ else if (timeout_time == ERTS_POLL_INF_TIMEOUT) {
+ timeout = -1;
+ }
+ else {
+ ErtsMonotonicTime diff_time, current_time;
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0) {
+ timeout = 0;
+ }
+ else {
+ switch (resolution) {
+ case 1000:
+ /* Round up to nearest even milli second */
+ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1;
+ if (timeout > (ErtsMonotonicTime) INT_MAX)
+ timeout = (ErtsMonotonicTime) INT_MAX;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000);
+ break;
+ case 1000000:
+ /* Round up to nearest even micro second */
+ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000);
+ break;
+ case 1000000000:
+ /* Round up to nearest even nano second */
+ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000);
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid resolution");
+ timeout = 0;
+ break;
+ }
+ }
+ }
+ return timeout;
+}
+
+#if ERTS_POLL_USE_SELECT
+
+static ERTS_INLINE int
+get_timeout_timeval(ErtsPollSet *ps,
+ SysTimeval *tvp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
+
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000);
+ tvp->tv_sec = sec;
+ tvp->tv_usec = timeout - sec*(1000*1000);
+
+ ASSERT(tvp->tv_sec >= 0);
+ ASSERT(tvp->tv_usec >= 0);
+ ASSERT(tvp->tv_usec < 1000*1000);
+
+ return 1;
+ }
+
+}
+
+#endif
+
+#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_timespec(ErtsPollSet *ps,
+ struct timespec *tsp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000*1000);
+ tsp->tv_sec = sec;
+ tsp->tv_nsec = timeout - sec*(1000*1000*1000);
+
+ ASSERT(tsp->tv_sec >= 0);
+ ASSERT(tsp->tv_nsec >= 0);
+ ASSERT(tsp->tv_nsec < 1000*1000*1000);
+
+ return 1;
+ }
+}
+
+#endif
+
+#if ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_itimerspec(ErtsPollSet *ps,
+ struct itimerspec *itsp,
+ ErtsMonotonicTime timeout_time)
+{
+
+ itsp->it_interval.tv_sec = 0;
+ itsp->it_interval.tv_nsec = 0;
+
+ return get_timeout_timespec(ps, &itsp->it_value, timeout_time);
+}
+
+#endif
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
+check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time)
{
int res;
- int timeout = do_wait ? -1 : 0;
- DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait);
+ int timeout;
+ DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time);
{
#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+#if ERTS_POLL_USE_TIMERFD
+ struct itimerspec its;
+ timeout = get_timeout_itimerspec(ps, &its, timeout_time);
+ if (timeout > 0) {
+ timerfd_set(ps, &its);
+ res = epoll_wait(ps->kp_fd, pr, max_res, -1);
+ res = timerfd_clear(ps, pr, res, max_res);
+ } else {
+ res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
+ }
+#else /* !ERTS_POLL_USE_TIMERFD */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
-
+#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
- struct timespec ts = {0, 0};
- struct timespec *tsp = timeout ? NULL : &ts;
+ struct timespec ts;
+ struct timespec *tsp;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ tsp = timeout < 0 ? NULL : &ts;
res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp);
#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
/*
@@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */;
poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
poll_res.dp_fds = pr;
- poll_res.dp_timeout = timeout;
+ poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time);
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-
+#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
+ struct timespec ts;
+ struct timespec *tsp = &ts;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout < 0) tsp = NULL;
+ res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL);
#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
-
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
-
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval tv = {0, 0};
- SysTimeval *tvp = timeout ? NULL : &tv;
+ SysTimeval tv;
+ SysTimeval *tvp;
+ timeout = get_timeout_timeval(ps, &tv, timeout_time);
+ tvp = timeout < 0 ? NULL : &tv;
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
@@ -1629,7 +1854,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int
ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time)
{
int res, no_fds, used_fds = 0;
int ebadf = 0;
@@ -1654,61 +1881,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
}
#endif
- do_wait = !is_woken(ps) && used_fds == 0;
+ do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT;
DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait);
if (do_wait) {
- erts_thr_progress_prepare_wait(NULL);
+ tpd = tpd ? tpd : erts_thr_prgr_data(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
- }
+ } else
+ timeout_time = ERTS_POLL_NO_TIMEOUT;
while (1) {
- res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds);
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time);
+ if (res != 0)
+ break;
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT)
+ break;
+ if (erts_get_monotonic_time(NULL) >= timeout_time)
+ break;
+ }
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (res < 0
- && errno == EBADF
- && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- /*
- * This may have happened because another thread deselected
- * a fd in our poll set and then closed it, i.e. the driver
- * behaved correctly. We wan't to avoid looking for a bad
- * fd, that may even not exist anymore. Therefore, handle
- * update requests and try again. This behaviour should only
- * happen when using SELECT as the polling mechanism.
- */
- ERTS_POLLSET_LOCK(ps);
- used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
- if (used_fds == no_fds) {
- *len = used_fds;
- ERTS_POLLSET_UNLOCK(ps);
- return 0;
- }
- res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds);
- /* Keep the lock over the non-blocking poll in order to not
- get any nasty races happening. */
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again. This behaviour should only
+ * happen when using SELECT as the polling mechanism.
+ */
+ ERTS_POLLSET_LOCK(ps);
+ used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
+ if (used_fds == no_fds) {
+ *len = used_fds;
ERTS_POLLSET_UNLOCK(ps);
- if (res == 0) {
- errno = EAGAIN;
- res = -1;
- }
+ return 0;
+ }
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT);
+ /* Keep the lock over the non-blocking poll in order to not
+ get any nasty races happening. */
+ ERTS_POLLSET_UNLOCK(ps);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
}
-#endif
-
- if (res != 0)
- break;
- if (!do_wait)
- break;
}
+#endif
if (do_wait) {
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_UPDATE_CACHE();
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO);
}
- woke_up(ps);
+ if (ERTS_POLL_USE_WAKEUP(ps))
+ woke_up(ps);
if (res < 0) {
#if ERTS_POLL_USE_SELECT
@@ -1719,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
#endif
res = errno;
}
- else {
+ else if (res == 0) {
+ res = used_fds == 0 ? ETIMEDOUT : 0;
+#ifdef HARD_DEBUG
+ check_poll_result(pr, used_fds);
+#endif
+ *len = used_fds;
+ } else {
#if ERTS_POLL_USE_SELECT
save_results:
#endif
-
ps_locked = 1;
ERTS_POLLSET_LOCK(ps);
@@ -1753,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
void
ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!set)
- reset_wakeup_state(ps);
- else
- wake_poller(ps, 1);
-#endif
+ DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set);
+ if (ERTS_POLL_USE_WAKEUP(ps)) {
+ if (!set)
+ reset_wakeup_state(ps);
+ else
+ wake_poller(ps, 1);
+ }
}
int
@@ -1874,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id)
if (ps->internal_fd_limit <= kp_fd)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
+ if (ps->id == -1)
+ ps->oneshot = 0;
+ else
+ ps->oneshot = 1;
#endif
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
create_wakeup_pipe(ps);
+
+#if ERTS_POLL_USE_TIMERFD
+ create_timerfd(ps);
+#endif
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
handle_update_requests(ps, NULL, 0);
cleanup_wakeup_pipe(ps);
#endif
@@ -1992,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip)
pip->memory_size = size;
pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pip->poll_set_size++; /* Wakeup pipe */
-#endif
pip->lazy_updates =
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -2177,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
ASSERT(0);
return;
}
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
+ continue;
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd)
+ continue;
+#endif
data &= 0xFFFFFFFF;
ASSERT(fd == data);
/* Events are the events that are being monitored, which of course include
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index e1cea7eb8b..d40dabc529 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -51,6 +51,7 @@
#include "sys.h"
#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
+#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX
#ifdef ERTS_ENABLE_KERNEL_POLL
# undef ERTS_ENABLE_KERNEL_POLL
@@ -130,6 +131,9 @@
#endif
#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10
+#define ERTS_POLL_USE_TIMERFD 0
typedef Uint32 ErtsPollEvents;
@@ -156,6 +160,14 @@ typedef enum {
#include <sys/epoll.h>
+#if ERTS_POLL_USE_EPOLL
+#ifdef HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#undef ERTS_POLL_USE_TIMERFD
+#define ERTS_POLL_USE_TIMERFD 1
+#endif
+#endif
+
#define ERTS_POLL_EV_E2N(EV) \
((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
@@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd {
#endif
-#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))
+#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)))
#define ev2str(ev) \
(((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \
diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h
index 1170a549b9..f3a91e54f7 100644
--- a/erts/emulator/sys/common/erl_poll_api.h
+++ b/erts/emulator/sys/common/erl_poll_api.h
@@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
* @param res an array of fd results that the ready fds are put in.
* @param[in] length the length of the res array
* @param[out] length the number of ready events returned in res
+ * @param tpd the thread progress data to note sleep state in
+ * @param timeout_time the time in native to wake up at
* @return 0 on success, else the ERRNO of the error that happened.
*/
int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd res[],
- int *length);
+ int *length,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time);
/**
* Interrupt the thread waiting in the pollset. This function should be called
* with set = 0 before any thread calls erts_poll_wait in order to clear any
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 39bb4d515e..3843a27a6e 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1017,10 +1017,12 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps,
int erts_poll_wait(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ Sint64 timeout_in)
{
int no_fds;
- DWORD timeout = INFINITE;
+ DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in;
EventData* ev;
int res = 0;
int num = 0;
@@ -1056,10 +1058,10 @@ int erts_poll_wait(ErtsPollSet *ps,
HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
ERTS_POLLSET_UNLOCK(ps);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_POP_STATE();
ERTS_POLLSET_LOCK(ps);
HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl
index f04efb9003..2e0dfa42f3 100644
--- a/erts/emulator/test/scheduler_SUITE.erl
+++ b/erts/emulator/test/scheduler_SUITE.erl
@@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) ->
{Conc, PollType, KP} = get_ioconfig(Config),
{Sched, SchedOnln, _} = get_sstate(Config, ""),
- [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
- [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
-
- [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
-
if
Conc ->
- [5] = get_ionum(Config,"+IOt 5 +IOp 1"),
- [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"),
- [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+ [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
- [2] = get_ionum(Config, "+S 2 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4:2 +IOPt 100"),
- [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
+ [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
+ [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
+ [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+
+ [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"),
+ [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
fail = get_ionum(Config, "+IOt 1 +IOp 2"),
ok;
not Conc ->
+ [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
+
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl
index fab2f45f28..4e6baa9e0e 100644
--- a/erts/emulator/test/signal_SUITE.erl
+++ b/erts/emulator/test/signal_SUITE.erl
@@ -85,7 +85,7 @@ xm_sig_order_proc() ->
receive
may_not_reach -> exit(bad_signal_order);
may_reach -> ok
- after 0 -> ok
+ after 0 -> erlang:yield()
end,
xm_sig_order_proc().