From e6d3da55a2fe06730f3b92098ff8c13e16e3254b Mon Sep 17 00:00:00 2001 From: Lukas Larsson Date: Mon, 3 Nov 2014 11:32:01 +0100 Subject: erts: Add high accuracy poll timeouts Different poll/select implementations have different ways to handle timeouts of < ms accuracy. Most have extended API like pselect or such, while others rely on using timerfds (epoll_wait). If no high accuracy timeout is available, we simply round up to nearest ms. If we do not roundup we will spin the last ms when waiting for a timeout which is not desirable. --- erts/configure.in | 6 +- erts/emulator/sys/common/erl_poll.c | 162 ++++++++++++++++++++++++++++++++++-- erts/emulator/sys/common/erl_poll.h | 8 ++ 3 files changed, 167 insertions(+), 9 deletions(-) diff --git a/erts/configure.in b/erts/configure.in index 481dfe405e..2699ab6f29 100644 --- a/erts/configure.in +++ b/erts/configure.in @@ -1768,6 +1768,10 @@ AC_CHECK_HEADER(sys/event.h, have_kernel_poll=kqueue) AC_CHECK_HEADER(sys/epoll.h, have_kernel_poll=epoll) AC_CHECK_HEADER(sys/devpoll.h, have_kernel_poll=/dev/poll) +dnl Check if we have timerfds to be used for high accuracy +dnl epoll_wait timeouts +AC_CHECK_HEADERS([sys/timerfd.h]) + dnl Check for kernel SCTP support AC_SUBST(LIBSCTP) if test "x$enable_sctp" != "xno" ; then @@ -2113,7 +2117,7 @@ AC_CHECK_FUNCS([ieee_handler fpsetmask finite isnan isinf res_gethostbyname dlop gethrtime localtime_r gmtime_r inet_pton \ memcpy mallopt sbrk _sbrk __sbrk brk _brk __brk \ flockfile fstat strlcpy strlcat setsid posix2time time2posix \ - setlocale nl_langinfo poll mlockall]) + setlocale nl_langinfo poll mlockall ppoll]) AC_MSG_CHECKING([for isfinite]) AC_TRY_LINK([#include ], diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index f4d4a85ca4..950717881a 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -314,6 +314,9 @@ struct ErtsPollSet_ { #if ERTS_POLL_USE_WAKEUP_PIPE int wake_fds[2]; #endif +#if ERTS_POLL_USE_TIMERFD + int timer_fd; +#endif #if ERTS_POLL_USE_FALLBACK int fallback_used; #endif @@ -579,6 +582,75 @@ create_wakeup_pipe(ErtsPollSet ps) #endif /* ERTS_POLL_USE_WAKEUP_PIPE */ +/* + * --- timer fd ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_TIMERFD + +/* We use the timerfd when using epoll_wait to get high accuracy + timeouts, i.e. we want to sleep with < ms accuracy. */ + +static void +create_timerfd(ErtsPollSet ps) +{ + int do_wake = 0; + int timer_fd; + timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + ERTS_POLL_EXPORT(erts_poll_control)(ps, + timer_fd, + ERTS_POLL_EV_IN, + 1, &do_wake); +#if ERTS_POLL_USE_FALLBACK + /* We depend on the wakeup pipe being handled by kernel poll */ + if (ps->fds_status[timer_fd].flags & ERTS_POLL_FD_FLG_INFLBCK) + fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", + __FILE__, __LINE__); +#endif + if (ps->internal_fd_limit <= timer_fd) + ps->internal_fd_limit = timer_fd + 1; + ps->timer_fd = timer_fd; +} + +static ERTS_INLINE void +timerfd_set(ErtsPollSet ps, struct itimerspec *its) +{ +#ifdef DEBUG + struct itimerspec old_its; + int res; + res = timerfd_settime(ps->timer_fd, 0, its, &old_its); + ASSERT(res == 0); + ASSERT(old_its.it_interval.tv_sec == 0 && + old_its.it_interval.tv_nsec == 0 && + old_its.it_value.tv_sec == 0 && + old_its.it_value.tv_nsec == 0); + +#else + timerfd_settime(ps->timer_fd, 0, its, NULL); +#endif +} + +static ERTS_INLINE int +timerfd_clear(ErtsPollSet ps, int res, int max_res) { + + struct itimerspec its; + /* we always have to clear the timer */ + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 0; + timerfd_settime(ps->timer_fd, 0, &its, NULL); + + /* only timeout fd triggered */ + if (res == 1 && ps->res_events[0].data.fd == ps->timer_fd) + return 0; + + return res; +} + +#endif /* ERTS_POLL_USE_TIMERFD */ + + /* * --- Poll set update requests ---------------------------------------------- */ @@ -1516,6 +1588,12 @@ poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake new_events = ERTS_POLL_EV_NVAL; goto done; } +#endif +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } #endif } @@ -1664,6 +1742,9 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) #if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; #endif +#if ERTS_POLL_USE_TIMERFD + int timer_fd = ps->timer_fd; +#endif for (i = 0; i < n; i++) { @@ -1678,6 +1759,11 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) cleanup_wakeup_pipe(ps); continue; } +#endif +#if ERTS_POLL_USE_TIMERFD + if (fd == timer_fd) { + continue; + } #endif ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); /* epoll_wait() can repeat the same fd in result array... */ @@ -1752,6 +1838,11 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) cleanup_wakeup_pipe(ps); continue; } +#endif +#if ERTS_POLL_USE_TIMERFD + if (fd == timer_fd) { + continue; + } #endif revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); pr[res].fd = fd; @@ -2097,7 +2188,7 @@ get_timeout_timeval(ErtsPollSet ps, #endif -#if ERTS_POLL_USE_KQUEUE +#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD static ERTS_INLINE int get_timeout_timespec(ErtsPollSet ps, @@ -2120,7 +2211,7 @@ get_timeout_timespec(ErtsPollSet ps, ASSERT(tsp->tv_sec >= 0); ASSERT(tsp->tv_nsec >= 0); - ASSERT(tsp->tv_nsec < 1000*1000); + ASSERT(tsp->tv_nsec < 1000*1000*1000); return !0; } @@ -2128,6 +2219,22 @@ get_timeout_timespec(ErtsPollSet ps, #endif +#if ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_itimerspec(ErtsPollSet ps, + struct itimerspec *itsp, + ErtsMonotonicTime timeout_time) +{ + + itsp->it_interval.tv_sec = 0; + itsp->it_interval.tv_nsec = 0; + + return get_timeout_timespec(ps, &itsp->it_value, timeout_time); +} + +#endif + static ERTS_INLINE int check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) { @@ -2145,12 +2252,29 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) #if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ if (max_res > ps->res_events_len) grow_res_events(ps, max_res); +#if ERTS_POLL_USE_TIMERFD + { + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout) { +#ifdef ERTS_SMP + erts_thr_progress_prepare_wait(NULL); +#endif + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, ps->res_events, max_res, -1); + res = timerfd_clear(ps, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); + } + } +#else /* !ERTS_POLL_USE_TIMERFD */ timeout = (int) get_timeout(ps, 1000, timeout_time); #ifdef ERTS_SMP if (timeout) erts_thr_progress_prepare_wait(NULL); #endif res = epoll_wait(ps->kp_fd, ps->res_events, max_res, timeout); +#endif /* !ERTS_POLL_USE_TIMERFD */ #elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ struct timespec ts; if (max_res > ps->res_events_len) @@ -2189,7 +2313,15 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) #endif poll_res.dp_timeout = timeout; res = ioctl(ps->kp_fd, DP_POLL, &poll_res); -#elif ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ +#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ + struct timespec ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); +#ifdef ERTS_SMP + if (timeout) + erts_thr_progress_prepare_wait(NULL); +#endif + res = ppoll(ps->poll_fds, ps->no_poll_fds, &ts, NULL); +#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ timeout = (int) get_timeout(ps, 1000, timeout_time); #ifdef ERTS_SMP if (timeout) @@ -2202,7 +2334,7 @@ check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); - + #ifdef ERTS_SMP if (timeout) erts_thr_progress_prepare_wait(NULL); @@ -2535,6 +2667,9 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) #if ERTS_POLL_USE_WAKEUP_PIPE create_wakeup_pipe(ps); #endif +#if ERTS_POLL_USE_TIMERFD + create_timerfd(ps); +#endif #if ERTS_POLL_USE_FALLBACK if (kp_fd >= ps->fds_status_len) grow_fds_status(ps, kp_fd); @@ -2625,6 +2760,10 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) if (ps->wake_fds[1] >= 0) close(ps->wake_fds[1]); #endif +#if ERTS_POLL_USE_TIMERFD + if (ps->timer_fd >= 0) + close(ps->timer_fd); +#endif erts_smp_spin_lock(&pollsets_lock); if (ps == pollsets) @@ -2729,6 +2868,9 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #if ERTS_POLL_USE_WAKEUP_PIPE pip->poll_set_size++; /* Wakeup pipe */ #endif +#if ERTS_POLL_USE_TIMERFD + pip->poll_set_size++; /* timerfd */ +#endif pip->fallback_poll_set_size = #if !ERTS_POLL_USE_FALLBACK @@ -2857,14 +2999,18 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, ev[fd] = 0; else { ev[fd] = ps->fds_status[fd].events; + if ( #if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) - ev[fd] |= ERTS_POLL_EV_NVAL; + fd == ps->wake_fds[0] || fd == ps->wake_fds[1] || +#endif +#if ERTS_POLL_USE_TIMERFD + fd == ps->timer_fd || #endif #if ERTS_POLL_USE_KERNEL_POLL - if (fd == ps->kp_fd) - ev[fd] |= ERTS_POLL_EV_NVAL; + fd == ps->kp_fd || #endif + 0) + ev[fd] |= ERTS_POLL_EV_NVAL; } } ERTS_POLLSET_UNLOCK(ps); diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index d02ed2396b..ae2d063805 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -98,6 +98,8 @@ # endif #endif +#define ERTS_POLL_USE_TIMERFD 0 + typedef Uint32 ErtsPollEvents; #undef ERTS_POLL_EV_E2N @@ -130,6 +132,12 @@ struct erts_sys_fd_type { #include +#ifdef HAVE_SYS_TIMERFD_H +#include +#undef ERTS_POLL_USE_TIMERFD +#define ERTS_POLL_USE_TIMERFD 1 +#endif + #define ERTS_POLL_EV_E2N(EV) \ ((__uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ -- cgit v1.2.3