aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/sys/common
diff options
context:
space:
mode:
authorErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
committerErlang/OTP <[email protected]>2009-11-20 14:54:40 +0000
commit84adefa331c4159d432d22840663c38f155cd4c1 (patch)
treebff9a9c66adda4df2106dfd0e5c053ab182a12bd /erts/emulator/sys/common
downloadotp-84adefa331c4159d432d22840663c38f155cd4c1.tar.gz
otp-84adefa331c4159d432d22840663c38f155cd4c1.tar.bz2
otp-84adefa331c4159d432d22840663c38f155cd4c1.zip
The R13B03 release.OTP_R13B03
Diffstat (limited to 'erts/emulator/sys/common')
-rw-r--r--erts/emulator/sys/common/erl_check_io.c1912
-rw-r--r--erts/emulator/sys/common/erl_check_io.h96
-rw-r--r--erts/emulator/sys/common/erl_mseg.c1452
-rw-r--r--erts/emulator/sys/common/erl_mseg.h97
-rw-r--r--erts/emulator/sys/common/erl_mtrace_sys_wrap.c245
-rw-r--r--erts/emulator/sys/common/erl_poll.c2693
-rw-r--r--erts/emulator/sys/common/erl_poll.h246
7 files changed, 6741 insertions, 0 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
new file mode 100644
index 0000000000..218bd79584
--- /dev/null
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -0,0 +1,1912 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Check I/O
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#define ERL_CHECK_IO_C__
+#define ERTS_WANT_BREAK_HANDLING
+#ifndef WANT_NONBLOCKING
+# define WANT_NONBLOCKING
+#endif
+#include "sys.h"
+#include "global.h"
+#include "erl_check_io.h"
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+# define ERTS_DRV_EV_STATE_EXTRA_SIZE 128
+#else
+# include "safe_hash.h"
+# define DRV_EV_STATE_HTAB_SIZE 1024
+#endif
+
+typedef char EventStateType;
+#define ERTS_EV_TYPE_NONE ((EventStateType) 0)
+#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */
+#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */
+#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */
+
+typedef char EventStateFlags;
+#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+
+
+#if defined(ERTS_KERNEL_POLL_VERSION)
+# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp
+#elif defined(ERTS_NO_KERNEL_POLL_VERSION)
+# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp
+#else
+# define ERTS_CIO_EXPORT(FUNC) FUNC
+#endif
+
+#define ERTS_CIO_HAVE_DRV_EVENT \
+ (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL)
+
+#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control)
+#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait)
+#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt)
+#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed)
+#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset)
+#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset)
+#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds)
+#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init)
+#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info)
+
+static struct pollset_info
+{
+ ErtsPollSet ps;
+ erts_smp_atomic_t in_poll_wait; /* set while doing poll */
+#ifdef ERTS_SMP
+ struct removed_fd* removed_list; /* list of deselected fd's*/
+ erts_smp_spinlock_t removed_list_lock;
+#endif
+}pollset;
+#define NUM_OF_POLLSETS 1
+
+typedef struct {
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ SafeHashBucket hb;
+#endif
+ ErtsSysFdType fd;
+ union {
+ ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+ ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */
+ erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */
+ } driver;
+ ErtsPollEvents events;
+ unsigned short remove_cnt; /* number of removed_fd's referring to this fd */
+ EventStateType type;
+ EventStateFlags flags;
+} ErtsDrvEventState;
+
+#ifdef ERTS_SMP
+struct removed_fd {
+ struct removed_fd *next;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ ErtsSysFdType fd;
+#else
+ ErtsDrvEventState* state;
+ #ifdef DEBUG
+ ErtsSysFdType fd;
+ #endif
+#endif
+
+};
+#endif
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static int max_fds = -1;
+#endif
+#define DRV_EV_STATE_LOCK_CNT 16
+static union {
+ erts_smp_mtx_t lck;
+ byte _cache_line_alignment[64];
+}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT];
+
+#ifdef ERTS_SMP
+static ERTS_INLINE erts_smp_mtx_t* fd_mtx(ErtsSysFdType fd)
+{
+ int hash = (int)fd;
+# ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ hash ^= (hash >> 9);
+# endif
+ return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck;
+}
+#else
+# define fd_mtx(fd) NULL
+#endif
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+
+static erts_smp_atomic_t drv_ev_state_len;
+static ErtsDrvEventState *drv_ev_state;
+static erts_smp_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */
+
+#else
+static SafeHash drv_ev_state_tab;
+static int num_state_prealloc;
+static ErtsDrvEventState *state_prealloc_first;
+erts_smp_spinlock_t state_prealloc_lock;
+
+static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd)
+{
+ ErtsDrvEventState tmpl;
+ tmpl.fd = fd;
+ return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl);
+}
+
+static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd)
+{
+ ErtsDrvEventState tmpl;
+ tmpl.fd = fd;
+ tmpl.driver.select = NULL;
+ tmpl.events = 0;
+ tmpl.remove_cnt = 0;
+ tmpl.type = ERTS_EV_TYPE_NONE;
+ tmpl.flags = 0;
+ return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl);
+}
+
+static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state)
+{
+ ASSERT(state->remove_cnt == 0);
+ safe_hash_erase(&drv_ev_state_tab, (void *) state);
+}
+
+#endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */
+
+static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode);
+static void select_steal(ErlDrvPort ix, ErtsDrvEventState *state,
+ int mode, int on);
+static void print_select_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort ix, ErtsSysFdType fd, int mode, int on);
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static void select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int);
+#endif
+#if ERTS_CIO_HAVE_DRV_EVENT
+static void event_steal(ErlDrvPort ix, ErtsDrvEventState *state,
+ ErlDrvEventData event_data);
+static void print_event_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort, ErtsSysFdType, ErlDrvEventData);
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData);
+#endif
+#endif
+static void steal_pending_stop_select(erts_dsprintf_buf_t*, ErlDrvPort,
+ ErtsDrvEventState*, int mode, int on);
+static ERTS_INLINE Eterm
+drvport2id(ErlDrvPort dp)
+{
+ Port *pp = erts_drvport2port(dp);
+ if (pp)
+ return pp->id;
+ else {
+ ASSERT(0);
+ return am_undefined;
+ }
+}
+
+#ifdef ERTS_SMP
+ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST)
+#endif
+
+static ERTS_INLINE void
+remember_removed(ErtsDrvEventState *state, struct pollset_info* psi)
+{
+#ifdef ERTS_SMP
+ struct removed_fd *fdlp;
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
+ if (erts_smp_atomic_read(&psi->in_poll_wait)) {
+ state->remove_cnt++;
+ ASSERT(state->remove_cnt > 0);
+ fdlp = removed_fd_alloc();
+ #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG)
+ fdlp->fd = state->fd;
+ #endif
+ #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ fdlp->state = state;
+ #endif
+ erts_smp_spin_lock(&psi->removed_list_lock);
+ fdlp->next = psi->removed_list;
+ psi->removed_list = fdlp;
+ erts_smp_spin_unlock(&psi->removed_list_lock);
+ }
+#endif
+}
+
+
+static ERTS_INLINE int
+is_removed(ErtsDrvEventState *state)
+{
+#ifdef ERTS_SMP
+ /* Note that there is a possible race here, where an fd is removed
+ (increasing remove_cnt) and then added again just before erts_poll_wait
+ is called by erts_check_io. Any polled event on the re-added fd will then
+ be falsely ignored. But that does not matter, as the event will trigger
+ again next time erl_check_io is called. */
+ return state->remove_cnt > 0;
+#else
+ return 0;
+#endif
+}
+
+static void
+forget_removed(struct pollset_info* psi)
+{
+#ifdef ERTS_SMP
+ struct removed_fd* fdlp;
+ struct removed_fd* tofree;
+
+ /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */
+
+ erts_smp_spin_lock(&psi->removed_list_lock);
+ fdlp = psi->removed_list;
+ psi->removed_list = NULL;
+ erts_smp_spin_unlock(&psi->removed_list_lock);
+
+ while (fdlp) {
+ erts_driver_t* drv_ptr = NULL;
+ erts_smp_mtx_t* mtx;
+ ErtsSysFdType fd;
+ ErtsDrvEventState *state;
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ fd = fdlp->fd;
+ mtx = fd_mtx(fd);
+ erts_smp_mtx_lock(mtx);
+ state = &drv_ev_state[(int) fd];
+#else
+ state = fdlp->state;
+ fd = state->fd;
+ ASSERT(fd == fdlp->fd);
+ mtx = fd_mtx(fd);
+ erts_smp_mtx_lock(mtx);
+#endif
+ ASSERT(state->remove_cnt > 0);
+ if (--state->remove_cnt == 0) {
+ switch (state->type) {
+ case ERTS_EV_TYPE_STOP_USE:
+ /* Now we can call stop_select */
+ drv_ptr = state->driver.drv_ptr;
+ ASSERT(drv_ptr);
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ state->driver.drv_ptr = NULL;
+ /* Fall through */
+ case ERTS_EV_TYPE_NONE:
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ hash_erase_drv_ev_state(state);
+#endif
+ break;
+ case ERTS_EV_TYPE_DRV_SEL:
+ case ERTS_EV_TYPE_DRV_EV:
+ break;
+ default:
+ ASSERT(0);
+ }
+ }
+ erts_smp_mtx_unlock(mtx);
+ if (drv_ptr) {
+ int was_unmasked = erts_block_fpe();
+ (*drv_ptr->stop_select) (fd, NULL);
+ erts_unblock_fpe(was_unmasked);
+ if (drv_ptr->handle) {
+ erts_ddll_dereference_driver(drv_ptr->handle);
+ }
+ }
+ tofree = fdlp;
+ fdlp = fdlp->next;
+ removed_fd_free(tofree);
+ }
+#endif /* ERTS_SMP */
+}
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static void
+grow_drv_ev_state(int min_ix)
+{
+ int i;
+ int new_len = min_ix + 1 + ERTS_DRV_EV_STATE_EXTRA_SIZE;
+ if (new_len > max_fds)
+ new_len = max_fds;
+
+ erts_smp_mtx_lock(&drv_ev_state_grow_lock);
+ if (erts_smp_atomic_read(&drv_ev_state_len) <= min_ix) {
+ for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */
+ erts_smp_mtx_lock(&drv_ev_state_locks[i].lck);
+ }
+ drv_ev_state = (drv_ev_state
+ ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE,
+ drv_ev_state,
+ sizeof(ErtsDrvEventState)*new_len)
+ : erts_alloc(ERTS_ALC_T_DRV_EV_STATE,
+ sizeof(ErtsDrvEventState)*new_len));
+ for (i = erts_smp_atomic_read(&drv_ev_state_len); i < new_len; i++) {
+ drv_ev_state[i].fd = (ErtsSysFdType) i;
+ drv_ev_state[i].driver.select = NULL;
+ drv_ev_state[i].events = 0;
+ drv_ev_state[i].remove_cnt = 0;
+ drv_ev_state[i].type = ERTS_EV_TYPE_NONE;
+ drv_ev_state[i].flags = 0;
+ }
+ erts_smp_atomic_set(&drv_ev_state_len, new_len);
+ for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) {
+ erts_smp_mtx_unlock(&drv_ev_state_locks[i].lck);
+ }
+ }
+ /*else already grown by racing thread */
+
+ erts_smp_mtx_unlock(&drv_ev_state_grow_lock);
+}
+#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */
+
+
+static ERTS_INLINE void
+abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type)
+{
+ if (is_nil(id)) {
+ ASSERT(type == ERTS_EV_TYPE_NONE
+ || !erts_port_task_is_scheduled(pthp));
+ }
+ else if (erts_port_task_is_scheduled(pthp)) {
+ erts_port_task_abort(id, pthp);
+ ASSERT(erts_is_port_alive(id));
+ }
+}
+
+static ERTS_INLINE void
+abort_tasks(ErtsDrvEventState *state, int mode)
+{
+ switch (mode) {
+ case 0: check_type:
+ switch (state->type) {
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_EV_TYPE_DRV_EV:
+ abort_task(state->driver.event->port,
+ &state->driver.event->task,
+ ERTS_EV_TYPE_DRV_EV);
+ return;
+#endif
+ case ERTS_EV_TYPE_NONE:
+ return;
+ default:
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
+ /* Fall through */
+ }
+ case ERL_DRV_READ|ERL_DRV_WRITE:
+ case ERL_DRV_WRITE:
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
+ abort_task(state->driver.select->outport,
+ &state->driver.select->outtask,
+ state->type);
+ if (mode == ERL_DRV_WRITE)
+ break;
+ case ERL_DRV_READ:
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
+ abort_task(state->driver.select->inport,
+ &state->driver.select->intask,
+ state->type);
+ break;
+ default:
+ goto check_type;
+ }
+}
+
+static void
+deselect(ErtsDrvEventState *state, int mode)
+{
+ int do_wake = 0;
+ ErtsPollEvents rm_events;
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
+ ASSERT(state->events);
+
+ abort_tasks(state, mode);
+
+ if (!mode)
+ rm_events = state->events;
+ else {
+ rm_events = 0;
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
+ if (mode & ERL_DRV_READ) {
+ state->driver.select->inport = NIL;
+ rm_events |= ERTS_POLL_EV_IN;
+ }
+ if (mode & ERL_DRV_WRITE) {
+ state->driver.select->outport = NIL;
+ rm_events |= ERTS_POLL_EV_OUT;
+ }
+ }
+
+ state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake);
+
+ if (!(state->events)) {
+ switch (state->type) {
+ case ERTS_EV_TYPE_DRV_SEL:
+ ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
+ ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
+ state->driver.select);
+ break;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_EV_TYPE_DRV_EV:
+ ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task));
+ erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
+ state->driver.event);
+ break;
+#endif
+ case ERTS_EV_TYPE_NONE:
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+
+ state->driver.select = NULL;
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ remember_removed(state, &pollset);
+ }
+}
+
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0)
+#else
+# define IS_FD_UNKNOWN(state) ((state) == NULL)
+#endif
+
+
+int
+ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
+ ErlDrvEvent e,
+ int mode,
+ int on)
+{
+ void (*stop_select_fn)(ErlDrvEvent, void*) = NULL;
+ Eterm id = drvport2id(ix);
+ ErtsSysFdType fd = (ErtsSysFdType) e;
+ ErtsPollEvents ctl_events = (ErtsPollEvents) 0;
+ ErtsPollEvents new_events, old_events;
+ ErtsDrvEventState *state;
+ int wake_poller;
+ int ret;
+
+ ERTS_SMP_LC_ASSERT(erts_drvport2port(ix)
+ && erts_lc_is_port_locked(erts_drvport2port(ix)));
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if ((unsigned)fd >= (unsigned)erts_smp_atomic_read(&drv_ev_state_len)) {
+ if (fd < 0) {
+ return -1;
+ }
+ if (fd >= max_fds) {
+ select_large_fd_error(ix, fd, mode, on);
+ return -1;
+ }
+ grow_drv_ev_state(fd);
+ }
+#endif
+
+ erts_smp_mtx_lock(fd_mtx(fd));
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ state = &drv_ev_state[(int) fd];
+#else
+ state = hash_get_drv_ev_state(fd); /* may be NULL! */
+#endif
+
+ if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ if (IS_FD_UNKNOWN(state)) {
+ /* fast track to stop_select callback */
+ stop_select_fn = erts_drvport2port(ix)->drv_ptr->stop_select;
+ ret = 0;
+ goto done_unknown;
+ }
+ mode |= (ERL_DRV_READ | ERL_DRV_WRITE);
+ wake_poller = 1; /* to eject fd from pollset (if needed) */
+ }
+ else wake_poller = 0;
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (state == NULL) {
+ state = hash_new_drv_ev_state(fd);
+ }
+#endif
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->type == ERTS_EV_TYPE_DRV_EV)
+ select_steal(ix, state, mode, on);
+#endif
+ if (state->type == ERTS_EV_TYPE_STOP_USE) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_select_op(dsbufp, ix, state->fd, mode, on);
+ steal_pending_stop_select(dsbufp, ix, state, mode, on);
+ if (state->type == ERTS_EV_TYPE_STOP_USE) {
+ ret = 0;
+ goto done; /* stop_select still pending */
+ }
+ ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ }
+
+ if (mode & ERL_DRV_READ) {
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ Eterm owner = state->driver.select->inport;
+ if (owner != id && is_not_nil(owner))
+ select_steal(ix, state, mode, on);
+ }
+ ctl_events |= ERTS_POLL_EV_IN;
+ }
+ if (mode & ERL_DRV_WRITE) {
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ Eterm owner = state->driver.select->outport;
+ if (owner != id && is_not_nil(owner))
+ select_steal(ix, state, mode, on);
+ }
+ ctl_events |= ERTS_POLL_EV_OUT;
+ }
+
+ ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) ||
+ (state->type == ERTS_EV_TYPE_NONE && !state->events));
+
+ if (!on && !(state->flags & ERTS_EV_FLAG_USED)
+ && state->events && !(state->events & ~ctl_events)) {
+ /* Old driver removing all events. At least wake poller.
+ It will not make close() 100% safe but it will prevent
+ actions delayed by poll timeout. */
+ wake_poller = 1;
+ }
+
+ new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller);
+
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) {
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select);
+ state->driver.select = NULL;
+ }
+ ret = -1;
+ goto done;
+ }
+
+ old_events = state->events;
+
+ ASSERT(on
+ ? (new_events == (state->events | ctl_events))
+ : (new_events == (state->events & ~ctl_events)));
+
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL
+ || state->type == ERTS_EV_TYPE_NONE);
+
+ state->events = new_events;
+ if (ctl_events) {
+ if (on) {
+ if (state->type == ERTS_EV_TYPE_NONE) {
+ ErtsDrvSelectDataState *dsdsp
+ = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
+ sizeof(ErtsDrvSelectDataState));
+ dsdsp->inport = NIL;
+ dsdsp->outport = NIL;
+ erts_port_task_handle_init(&dsdsp->intask);
+ erts_port_task_handle_init(&dsdsp->outtask);
+ ASSERT(state->driver.select == NULL);
+ state->driver.select = dsdsp;
+ state->type = ERTS_EV_TYPE_DRV_SEL;
+ }
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
+ if (ctl_events & ERTS_POLL_EV_IN)
+ state->driver.select->inport = id;
+ if (ctl_events & ERTS_POLL_EV_OUT)
+ state->driver.select->outport = id;
+ if (mode & ERL_DRV_USE) {
+ state->flags |= ERTS_EV_FLAG_USED;
+ }
+ }
+ else { /* off */
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ abort_tasks(state, ERL_DRV_READ);
+ state->driver.select->inport = NIL;
+ }
+ if (ctl_events & ERTS_POLL_EV_OUT) {
+ abort_tasks(state, ERL_DRV_WRITE);
+ state->driver.select->outport = NIL;
+ }
+ if (new_events == 0) {
+ ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
+ ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
+ if (old_events != 0) {
+ remember_removed(state, &pollset);
+ }
+ if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
+ state->driver.select);
+ state->driver.select = NULL;
+ }
+ /*else keep it, as fd will probably be selected upon again */
+ }
+ }
+ if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ erts_driver_t* drv_ptr = erts_drvport2port(ix)->drv_ptr;
+ ASSERT(new_events==0);
+ if (state->remove_cnt == 0 || !wake_poller) {
+ /* Safe to close fd now as it is not in pollset
+ or there was no need to eject fd (kernel poll) */
+ stop_select_fn = drv_ptr->stop_select;
+ }
+ else {
+ /* Not safe to close fd, postpone stop_select callback. */
+ state->type = ERTS_EV_TYPE_STOP_USE;
+ state->driver.drv_ptr = drv_ptr;
+ if (drv_ptr->handle) {
+ erts_ddll_reference_referenced_driver(drv_ptr->handle);
+ }
+ }
+ }
+ }
+ }
+
+ ret = 0;
+
+done:;
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
+ hash_erase_drv_ev_state(state);
+ }
+#endif
+done_unknown:
+ erts_smp_mtx_unlock(fd_mtx(fd));
+ if (stop_select_fn) {
+ int was_unmasked = erts_block_fpe();
+ (*stop_select_fn)(e, NULL);
+ erts_unblock_fpe(was_unmasked);
+ }
+ return ret;
+}
+
+int
+ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
+ ErlDrvEvent e,
+ ErlDrvEventData event_data)
+{
+#if !ERTS_CIO_HAVE_DRV_EVENT
+ return -1;
+#else
+ ErtsSysFdType fd = (ErtsSysFdType) e;
+ ErtsPollEvents events;
+ ErtsPollEvents add_events;
+ ErtsPollEvents remove_events;
+ Eterm id = drvport2id(ix);
+ ErtsDrvEventState *state;
+ int do_wake = 0;
+ int ret;
+
+ ERTS_SMP_LC_ASSERT(erts_drvport2port(ix)
+ && erts_lc_is_port_locked(erts_drvport2port(ix)));
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if ((unsigned)fd >= (unsigned)erts_smp_atomic_read(&drv_ev_state_len)) {
+ if (fd < 0)
+ return -1;
+ if (fd >= max_fds) {
+ event_large_fd_error(ix, fd, event_data);
+ return -1;
+ }
+ grow_drv_ev_state(fd);
+ }
+#endif
+
+ erts_smp_mtx_lock(fd_mtx(fd));
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ state = &drv_ev_state[(int) fd];
+#else
+ /* Could use hash_new directly, but want to keep the normal case fast */
+ state = hash_get_drv_ev_state(fd);
+ if (state == NULL) {
+ state = hash_new_drv_ev_state(fd);
+ }
+#endif
+
+ switch (state->type) {
+ case ERTS_EV_TYPE_DRV_EV:
+ if (state->driver.event->port == id) break;
+ /*fall through*/
+ case ERTS_EV_TYPE_DRV_SEL:
+ event_steal(ix, state, event_data);
+ break;
+ case ERTS_EV_TYPE_STOP_USE: {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_event_op(dsbufp, ix, fd, event_data);
+ steal_pending_stop_select(dsbufp, ix, state, 0, 1);
+ break;
+ }
+ }
+
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_EV
+ || state->type == ERTS_EV_TYPE_NONE);
+
+ events = state->events;
+
+ if (!event_data) {
+ remove_events = events;
+ add_events = 0;
+ }
+ else {
+ remove_events = ~event_data->events & events;
+ add_events = ~events & event_data->events;
+ }
+
+ if (add_events) {
+ events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake);
+ if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ ret = -1;
+ goto done;
+ }
+ }
+ if (remove_events) {
+ events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake);
+ if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ ret = -1;
+ goto done;
+ }
+ }
+ if (event_data && event_data->events != 0) {
+ if (state->type == ERTS_EV_TYPE_DRV_EV) {
+ state->driver.event->removed_events &= ~add_events;
+ state->driver.event->removed_events |= remove_events;
+ }
+ else {
+ state->driver.event
+ = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
+ sizeof(ErtsDrvEventDataState));
+ erts_port_task_handle_init(&state->driver.event->task);
+ state->driver.event->port = id;
+ state->driver.event->removed_events = (ErtsPollEvents) 0;
+ state->type = ERTS_EV_TYPE_DRV_EV;
+ }
+ state->driver.event->data = event_data;
+ }
+ else {
+ if (state->type == ERTS_EV_TYPE_DRV_EV) {
+ abort_tasks(state, 0);
+ erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
+ state->driver.event);
+ }
+ state->driver.select = NULL;
+ state->type = ERTS_EV_TYPE_NONE;
+ remember_removed(state, &pollset);
+ }
+ state->events = events;
+ ASSERT(event_data ? events == event_data->events : events == 0);
+
+ ret = 0;
+
+done:
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
+ hash_erase_drv_ev_state(state);
+ }
+#endif
+ erts_smp_mtx_unlock(fd_mtx(fd));
+ return ret;
+#endif
+}
+
+static ERTS_INLINE int
+chk_stale(Eterm id, ErtsDrvEventState *state, int mode)
+{
+ if (is_nil(id))
+ return 0;
+ if (erts_is_port_alive(id))
+ return 1; /* Steal */
+ stale_drv_select(id, state, mode);
+ return 0;
+}
+
+static int
+need2steal(ErtsDrvEventState *state, int mode)
+{
+ int do_steal = 0;
+ switch (state->type) {
+ case ERTS_EV_TYPE_DRV_SEL:
+ if (mode & ERL_DRV_READ)
+ do_steal |= chk_stale(state->driver.select->inport,
+ state,
+ ERL_DRV_READ);
+ if (mode & ERL_DRV_WRITE)
+ do_steal |= chk_stale(state->driver.select->outport,
+ state,
+ ERL_DRV_WRITE);
+ break;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_EV_TYPE_DRV_EV:
+ do_steal |= chk_stale(state->driver.event->port, state, 0);
+ break;
+#endif
+ case ERTS_EV_TYPE_STOP_USE:
+ ASSERT(0);
+ break;
+ default:
+ break;
+ }
+ return do_steal;
+}
+
+static void
+print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id)
+{
+ ErtsPortNames *pnp = erts_get_port_names(id);
+ if (!pnp->name && !pnp->driver_name)
+ erts_dsprintf(dsbufp, "%s ", "<unknown>");
+ else {
+ if (pnp->name) {
+ if (!pnp->driver_name || strcmp(pnp->driver_name, pnp->name) == 0)
+ erts_dsprintf(dsbufp, "%s ", pnp->name);
+ else
+ erts_dsprintf(dsbufp, "%s (%s) ", pnp->driver_name, pnp->name);
+ }
+ else if (pnp->driver_name) {
+ erts_dsprintf(dsbufp, "%s ", pnp->driver_name);
+ }
+ }
+ erts_free_port_names(pnp);
+}
+
+static void
+steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
+{
+ erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd);
+ switch (state->type) {
+ case ERTS_EV_TYPE_DRV_SEL: {
+ int deselect_mode = 0;
+ Eterm iid = state->driver.select->inport;
+ Eterm oid = state->driver.select->outport;
+ if ((mode & ERL_DRV_READ) && (is_not_nil(iid))) {
+ erts_dsprintf(dsbufp, "input driver ");
+ print_driver_name(dsbufp, iid);
+ erts_dsprintf(dsbufp, "%T ", iid);
+ deselect_mode |= ERL_DRV_READ;
+ }
+ if ((mode & ERL_DRV_WRITE) && is_not_nil(oid)) {
+ if (deselect_mode) {
+ erts_dsprintf(dsbufp, "and ");
+ }
+ erts_dsprintf(dsbufp, "output driver ");
+ print_driver_name(dsbufp, oid);
+ erts_dsprintf(dsbufp, "%T ", oid);
+ deselect_mode |= ERL_DRV_WRITE;
+ }
+ if (deselect_mode)
+ deselect(state, deselect_mode);
+ else {
+ erts_dsprintf(dsbufp, "no one", (int) state->fd);
+ ASSERT(0);
+ }
+ erts_dsprintf(dsbufp, "\n");
+ break;
+ }
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_EV_TYPE_DRV_EV: {
+ Eterm eid = state->driver.event->port;
+ if (is_nil(eid)) {
+ erts_dsprintf(dsbufp, "no one", (int) state->fd);
+ ASSERT(0);
+ }
+ else {
+ erts_dsprintf(dsbufp, "event driver ");
+ print_driver_name(dsbufp, eid);
+ erts_dsprintf(dsbufp, "%T ", eid);
+ }
+ erts_dsprintf(dsbufp, "\n");
+ deselect(state, 0);
+ break;
+ }
+#endif
+ case ERTS_EV_TYPE_STOP_USE: {
+ ASSERT(0);
+ break;
+ }
+ default:
+ erts_dsprintf(dsbufp, "no one\n", (int) state->fd);
+ ASSERT(0);
+ }
+}
+
+static void
+print_select_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
+{
+ Port *pp = erts_drvport2port(ix);
+ erts_dsprintf(dsbufp,
+ "driver_select(%p, %d,%s%s%s%s, %d) "
+ "by ",
+ ix,
+ (int) fd,
+ mode & ERL_DRV_READ ? " ERL_DRV_READ" : "",
+ mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "",
+ mode & ERL_DRV_USE ? " ERL_DRV_USE" : "",
+ mode & (ERL_DRV_USE_NO_CALLBACK & ~ERL_DRV_USE) ? "_NO_CALLBACK" : "",
+ on);
+ print_driver_name(dsbufp, pp->id);
+ erts_dsprintf(dsbufp, "driver %T ", pp ? pp->id : NIL);
+}
+
+static void
+select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on)
+{
+ if (need2steal(state, mode)) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_select_op(dsbufp, ix, state->fd, mode, on);
+ steal(dsbufp, state, mode);
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+}
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static void
+large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd)
+{
+ erts_dsprintf(dsbufp,
+ "fd=%d is larger than the largest allowed fd=%d\n",
+ (int) fd, max_fds - 1);
+}
+
+static void
+select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
+{
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_select_op(dsbufp, ix, fd, mode, on);
+ erts_dsprintf(dsbufp, "failed: ");
+ large_fd_error_common(dsbufp, fd);
+ erts_send_error_to_logger_nogl(dsbufp);
+}
+#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */
+
+
+
+static void
+steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
+ ErtsDrvEventState *state, int mode, int on)
+{
+ ASSERT(state->type == ERTS_EV_TYPE_STOP_USE);
+ erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select "
+ "was called for driver %s\n",
+ (int) state->fd, state->driver.drv_ptr->name);
+ erts_send_error_to_logger_nogl(dsbufp);
+
+ if (on) {
+ /* Either fd-owner changed its mind about closing
+ * or closed fd before stop_select callback and fd is now reused.
+ * In either case stop_select should not be called.
+ */
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ if (state->driver.drv_ptr->handle) {
+ erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
+ }
+ state->driver.drv_ptr = NULL;
+ }
+ else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ erts_driver_t* drv_ptr = erts_drvport2port(ix)->drv_ptr;
+ if (drv_ptr != state->driver.drv_ptr) {
+ /* Some other driver wants the stop_select callback */
+ if (state->driver.drv_ptr->handle) {
+ erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
+ }
+ if (drv_ptr->handle) {
+ erts_ddll_reference_referenced_driver(drv_ptr->handle);
+ }
+ state->driver.drv_ptr = drv_ptr;
+ }
+ }
+
+}
+
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+
+static void
+print_event_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data)
+{
+ Port *pp = erts_drvport2port(ix);
+ erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd);
+ if (!event_data)
+ erts_dsprintf(dsbufp, "NULL");
+ else
+ erts_dsprintf(dsbufp, "{0x%x, 0x%x}",
+ (unsigned int) event_data->events,
+ (unsigned int) event_data->revents);
+ erts_dsprintf(dsbufp, ") by ");
+ print_driver_name(dsbufp, pp->id);
+ erts_dsprintf(dsbufp, "driver %T ", pp ? pp->id : NIL);
+}
+
+static void
+event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data)
+{
+ if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_event_op(dsbufp, ix, state->fd, event_data);
+ steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE);
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+ else if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ ASSERT(state->flags & ERTS_EV_FLAG_USED);
+ deselect(state, 0);
+ }
+}
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static void
+event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data)
+{
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_event_op(dsbufp, ix, fd, event_data);
+ erts_dsprintf(dsbufp, "failed: ");
+ large_fd_error_common(dsbufp, fd);
+ erts_send_error_to_logger_nogl(dsbufp);
+}
+#endif
+#endif
+
+static ERTS_INLINE void
+iready(Eterm id, ErtsDrvEventState *state)
+{
+ if (erts_port_task_schedule(id,
+ &state->driver.select->intask,
+ ERTS_PORT_TASK_INPUT,
+ (ErlDrvEvent) state->fd,
+ NULL) != 0) {
+ stale_drv_select(id, state, ERL_DRV_READ);
+ }
+}
+
+static ERTS_INLINE void
+oready(Eterm id, ErtsDrvEventState *state)
+{
+ if (erts_port_task_schedule(id,
+ &state->driver.select->outtask,
+ ERTS_PORT_TASK_OUTPUT,
+ (ErlDrvEvent) state->fd,
+ NULL) != 0) {
+ stale_drv_select(id, state, ERL_DRV_WRITE);
+ }
+}
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+static ERTS_INLINE void
+eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data)
+{
+ if (erts_port_task_schedule(id,
+ &state->driver.event->task,
+ ERTS_PORT_TASK_EVENT,
+ (ErlDrvEvent) state->fd,
+ event_data) != 0) {
+ stale_drv_select(id, state, 0);
+ }
+}
+#endif
+
+static void bad_fd_in_pollset( ErtsDrvEventState *, Eterm, Eterm, ErtsPollEvents);
+
+void
+ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set)
+{
+ ERTS_CIO_POLL_INTR(pollset.ps, set);
+}
+
+void
+ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, long msec)
+{
+ ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, msec);
+}
+
+void
+ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
+{
+ ErtsPollResFd pollres[256];
+ int pollres_len;
+ SysTimeval wait_time;
+ int poll_ret, i;
+
+ restart:
+
+ /* Figure out timeout value */
+ if (do_wait) {
+ erts_time_remaining(&wait_time);
+ } else { /* poll only */
+ wait_time.tv_sec = 0;
+ wait_time.tv_usec = 0;
+ }
+
+#ifdef ERTS_ENABLE_LOCK_CHECK
+ erts_lc_check_exact(NULL, 0); /* No locks should be locked */
+#endif
+ erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+ pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd);
+
+ erts_smp_atomic_set(&pollset.in_poll_wait, 1);
+
+ poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, &wait_time);
+
+#ifdef ERTS_ENABLE_LOCK_CHECK
+ erts_lc_check_exact(NULL, 0); /* No locks should be locked */
+#endif
+ erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+ erts_deliver_time(); /* sync the machine's idea of time */
+
+#ifdef ERTS_BREAK_REQUESTED
+ if (ERTS_BREAK_REQUESTED)
+ erts_do_break_handling();
+#endif
+
+ if (poll_ret != 0) {
+ erts_smp_atomic_set(&pollset.in_poll_wait, 0);
+ forget_removed(&pollset);
+ if (poll_ret == EAGAIN) {
+ goto restart;
+ }
+
+ if (poll_ret != ETIMEDOUT
+ && poll_ret != EINTR
+#ifdef ERRNO_BLOCK
+ && poll_ret != ERRNO_BLOCK
+#endif
+ ) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp, "erts_poll_wait() failed: %s (%d)\n",
+ erl_errno_id(poll_ret), poll_ret);
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+ return;
+ }
+
+ for (i = 0; i < pollres_len; i++) {
+
+ ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd;
+ ErtsDrvEventState *state;
+
+ erts_smp_mtx_lock(fd_mtx(fd));
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ state = &drv_ev_state[ (int) fd];
+#else
+ state = hash_get_drv_ev_state(fd);
+ if (!state) {
+ goto next_pollres;
+ }
+#endif
+
+ /* Skip this fd if it was removed from pollset */
+ if (is_removed(state)) {
+ goto next_pollres;
+ }
+
+ switch (state->type) {
+ case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */
+ ErtsPollEvents revents;
+ ErtsPollEvents revent_mask;
+
+ revent_mask = ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT);
+ revent_mask |= state->events;
+ revents = pollres[i].events & revent_mask;
+
+ if (revents & ERTS_POLL_EV_ERR) {
+ /*
+ * Let the driver handle the error condition. Only input,
+ * only output, or nothing might have been selected.
+ * We *do not* want to call a callback that corresponds
+ * to an event not selected. revents might give us a clue
+ * on which one to call.
+ */
+ if ((revents & ERTS_POLL_EV_IN)
+ || (!(revents & ERTS_POLL_EV_OUT)
+ && state->events & ERTS_POLL_EV_IN)) {
+ iready(state->driver.select->inport, state);
+ }
+ else if (state->events & ERTS_POLL_EV_OUT) {
+ oready(state->driver.select->outport, state);
+ }
+ }
+ else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
+ if (revents & ERTS_POLL_EV_OUT) {
+ oready(state->driver.select->outport, state);
+ }
+ /* Someone might have deselected input since revents
+ was read (true also on the non-smp emulator since
+ oready() may have been called); therefore, update
+ revents... */
+ revents &= ~(~state->events & ERTS_POLL_EV_IN);
+ if (revents & ERTS_POLL_EV_IN) {
+ iready(state->driver.select->inport, state);
+ }
+ }
+ else if (revents & ERTS_POLL_EV_NVAL) {
+ bad_fd_in_pollset(state,
+ state->driver.select->inport,
+ state->driver.select->outport,
+ state->events);
+ }
+ break;
+ }
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */
+ ErlDrvEventData event_data;
+ ErtsPollEvents revents;
+ ASSERT(state->driver.event);
+ ASSERT(state->driver.event->data);
+ event_data = state->driver.event->data;
+ revents = pollres[i].events;
+ revents &= ~state->driver.event->removed_events;
+
+ if (revents) {
+ event_data->events = state->events;
+ event_data->revents = revents;
+
+ eready(state->driver.event->port, state, event_data);
+ }
+ break;
+ }
+#endif
+
+ case ERTS_EV_TYPE_NONE: /* Deselected ... */
+ break;
+
+ default: { /* Error */
+ erts_dsprintf_buf_t *dsbufp;
+ dsbufp = erts_create_logger_dsbuf();
+ erts_dsprintf(dsbufp,
+ "Invalid event request type for fd in erts_poll()! "
+ "fd=%d, event request type=%sd\n", (int) state->fd,
+ (int) state->type);
+ ASSERT(0);
+ deselect(state, 0);
+ break;
+ }
+ }
+
+ next_pollres:;
+#ifdef ERTS_SMP
+ erts_smp_mtx_unlock(fd_mtx(fd));
+#endif
+ }
+
+ erts_smp_atomic_set(&pollset.in_poll_wait, 0);
+ forget_removed(&pollset);
+}
+
+static void
+bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport,
+ Eterm outport, ErtsPollEvents events)
+{
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+
+ if (events & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
+ char *io_str;
+ Eterm port = NIL;
+ if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) {
+ io_str = "input/output";
+ if (inport == outport)
+ port = inport;
+ }
+ else {
+ if (events & ERTS_POLL_EV_IN) {
+ io_str = "input";
+ port = inport;
+ }
+ else {
+ io_str = "output";
+ port = outport;
+ }
+ }
+ erts_dsprintf(dsbufp,
+ "Bad %s fd in erts_poll()! fd=%d, ",
+ io_str, (int) state->fd);
+ if (is_nil(port)) {
+ ErtsPortNames *ipnp = erts_get_port_names(inport);
+ ErtsPortNames *opnp = erts_get_port_names(outport);
+ erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n",
+ is_nil(inport) ? am_undefined : inport,
+ is_nil(outport) ? am_undefined : outport,
+ ipnp->driver_name ? ipnp->driver_name : "<unknown>",
+ opnp->driver_name ? opnp->driver_name : "<unknown>",
+ ipnp->name ? ipnp->name : "<unknown>",
+ opnp->name ? opnp->name : "<unknown>");
+ erts_free_port_names(ipnp);
+ erts_free_port_names(opnp);
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(port);
+ erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n",
+ is_nil(port) ? am_undefined : port,
+ pnp->driver_name ? pnp->driver_name : "<unknown>",
+ pnp->name ? pnp->name : "<unknown>");
+ erts_free_port_names(pnp);
+ }
+ }
+ else {
+ erts_dsprintf(dsbufp, "Bad fd in erts_poll()! fd=%d\n", (int) state->fd);
+ }
+ erts_send_error_to_logger_nogl(dsbufp);
+
+ /* unmap entry */
+ deselect(state, 0);
+}
+
+static void
+stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode)
+{
+ erts_stale_drv_select(id, (ErlDrvEvent) state->fd, mode, 0);
+ deselect(state, mode);
+}
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+static SafeHashValue drv_ev_state_hash(void *des)
+{
+ SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd;
+ return val ^ (val >> 8); /* Good enough for aligned pointer values? */
+}
+
+static int drv_ev_state_cmp(void *des1, void *des2)
+{
+ return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd
+ ? 0 : 1);
+}
+
+static void *drv_ev_state_alloc(void *des_tmpl)
+{
+ ErtsDrvEventState *evstate;
+ erts_smp_spin_lock(&state_prealloc_lock);
+ if (state_prealloc_first == NULL) {
+ erts_smp_spin_unlock(&state_prealloc_lock);
+ evstate = (ErtsDrvEventState *)
+ erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState));
+ } else {
+ evstate = state_prealloc_first;
+ state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next;
+ --num_state_prealloc;
+ erts_smp_spin_unlock(&state_prealloc_lock);
+ }
+ /* XXX: Already valid data if prealloced, could ignore template! */
+ *evstate = *((ErtsDrvEventState *) des_tmpl);
+
+ return (void *) evstate;
+}
+
+static void drv_ev_state_free(void *des)
+{
+ erts_smp_spin_lock(&state_prealloc_lock);
+ ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb;
+ state_prealloc_first = (ErtsDrvEventState *) des;
+ ++num_state_prealloc;
+ erts_smp_spin_unlock(&state_prealloc_lock);
+}
+#endif
+
+void
+ERTS_CIO_EXPORT(erts_init_check_io)(void)
+{
+ erts_smp_atomic_init(&pollset.in_poll_wait, 0);
+ ERTS_CIO_POLL_INIT();
+ pollset.ps = ERTS_CIO_NEW_POLLSET();
+
+#ifdef ERTS_SMP
+ init_removed_fd_alloc();
+ pollset.removed_list = NULL;
+ erts_smp_spinlock_init(&pollset.removed_list_lock,
+ "pollset_rm_list");
+ {
+ int i;
+ for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) {
+#ifdef ERTS_ENABLE_LOCK_COUNT
+ erts_smp_mtx_init_x(&drv_ev_state_locks[i].lck, "drv_ev_state", make_small(i));
+#else
+ erts_smp_mtx_init(&drv_ev_state_locks[i].lck, "drv_ev_state");
+#endif
+ }
+ }
+#endif
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ max_fds = ERTS_CIO_POLL_MAX_FDS();
+ erts_smp_atomic_init(&drv_ev_state_len, 0);
+ drv_ev_state = NULL;
+ erts_smp_mtx_init(&drv_ev_state_grow_lock, "drv_ev_state_grow");
+#else
+ {
+ SafeHashFunctions hf;
+ hf.hash = &drv_ev_state_hash;
+ hf.cmp = &drv_ev_state_cmp;
+ hf.alloc = &drv_ev_state_alloc;
+ hf.free = &drv_ev_state_free;
+ num_state_prealloc = 0;
+ state_prealloc_first = NULL;
+ erts_smp_spinlock_init(&state_prealloc_lock,"state_prealloc");
+
+ safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state_tab, "drv_ev_state_tab",
+ DRV_EV_STATE_HTAB_SIZE, hf);
+ }
+#endif
+}
+
+int
+ERTS_CIO_EXPORT(erts_check_io_max_files)(void)
+{
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ return max_fds;
+#else
+ return ERTS_POLL_EXPORT(erts_poll_max_fds)();
+#endif
+}
+
+Uint
+ERTS_CIO_EXPORT(erts_check_io_size)(void)
+{
+ Uint res;
+ ErtsPollInfo pi;
+ ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+ res = pi.memory_size;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ res += sizeof(ErtsDrvEventState) * erts_smp_atomic_read(&drv_ev_state_len);
+#else
+ res += safe_hash_table_sz(&drv_ev_state_tab);
+ {
+ SafeHashInfo hi;
+ safe_hash_get_info(&hi, &drv_ev_state_tab);
+ res += hi.objs * sizeof(ErtsDrvEventState);
+ }
+ erts_smp_spin_lock(&state_prealloc_lock);
+ res += num_state_prealloc * sizeof(ErtsDrvEventState);
+ erts_smp_spin_unlock(&state_prealloc_lock);
+#endif
+ return res;
+}
+
+Eterm
+ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
+{
+ Process *p = (Process *) proc;
+ Eterm tags[15], values[15], res;
+ Uint sz, *szp, *hp, **hpp, memory_size;
+ Sint i;
+ ErtsPollInfo pi;
+
+ ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+ memory_size = pi.memory_size;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read(&drv_ev_state_len);
+#else
+ memory_size += safe_hash_table_sz(&drv_ev_state_tab);
+ {
+ SafeHashInfo hi;
+ safe_hash_get_info(&hi, &drv_ev_state_tab);
+ memory_size += hi.objs * sizeof(ErtsDrvEventState);
+ }
+ erts_smp_spin_lock(&state_prealloc_lock);
+ memory_size += num_state_prealloc * sizeof(ErtsDrvEventState);
+ erts_smp_spin_unlock(&state_prealloc_lock);
+#endif
+
+ hpp = NULL;
+ szp = &sz;
+ sz = 0;
+
+ bld_it:
+ i = 0;
+
+ tags[i] = erts_bld_atom(hpp, szp, "name");
+ values[i++] = erts_bld_atom(hpp, szp, "erts_poll");
+
+ tags[i] = erts_bld_atom(hpp, szp, "primary");
+ values[i++] = erts_bld_atom(hpp, szp, pi.primary);
+
+ tags[i] = erts_bld_atom(hpp, szp, "fallback");
+ values[i++] = erts_bld_atom(hpp, szp, pi.fallback ? pi.fallback : "false");
+
+ tags[i] = erts_bld_atom(hpp, szp, "kernel_poll");
+ values[i++] = erts_bld_atom(hpp, szp,
+ pi.kernel_poll ? pi.kernel_poll : "false");
+
+ tags[i] = erts_bld_atom(hpp, szp, "memory_size");
+ values[i++] = erts_bld_uint(hpp, szp, memory_size);
+
+ tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.poll_set_size);
+
+ if (pi.fallback) {
+ tags[i] = erts_bld_atom(hpp, szp, "fallback_poll_set_size");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.fallback_poll_set_size);
+ }
+
+ tags[i] = erts_bld_atom(hpp, szp, "lazy_updates");
+ values[i++] = pi.lazy_updates ? am_true : am_false;
+
+ if (pi.lazy_updates) {
+ tags[i] = erts_bld_atom(hpp, szp, "pending_updates");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.pending_updates);
+ }
+
+ tags[i] = erts_bld_atom(hpp, szp, "batch_updates");
+ values[i++] = pi.batch_updates ? am_true : am_false;
+
+ tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates");
+ values[i++] = pi.concurrent_updates ? am_true : am_false;
+
+ tags[i] = erts_bld_atom(hpp, szp, "max_fds");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds);
+
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups);
+
+ tags[i] = erts_bld_atom(hpp, szp, "no_avoided_interrupts");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_interrupts);
+
+ tags[i] = erts_bld_atom(hpp, szp, "no_interrupt_timed");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_interrupt_timed);
+#endif
+
+ res = erts_bld_2tup_list(hpp, szp, i, tags, values);
+
+ if (!hpp) {
+ hp = HAlloc(p, sz);
+ hpp = &hp;
+ szp = NULL;
+ goto bld_it;
+ }
+
+ return res;
+}
+
+static ERTS_INLINE ErtsPollEvents
+print_events(ErtsPollEvents ev)
+{
+ int first = 1;
+ if(ev & ERTS_POLL_EV_IN) {
+ ev &= ~ERTS_POLL_EV_IN;
+ erts_printf("%s%s", first ? "" : "|", "IN");
+ first = 0;
+ }
+ if(ev & ERTS_POLL_EV_OUT) {
+ ev &= ~ERTS_POLL_EV_OUT;
+ erts_printf("%s%s", first ? "" : "|", "OUT");
+ first = 0;
+ }
+ /* The following should not appear... */
+ if(ev & ERTS_POLL_EV_NVAL) {
+ erts_printf("%s%s", first ? "" : "|", "NVAL");
+ first = 0;
+ }
+ if(ev & ERTS_POLL_EV_ERR) {
+ erts_printf("%s%s", first ? "" : "|", "ERR");
+ first = 0;
+ }
+ if (ev)
+ erts_printf("%s0x%b32x", first ? "" : "|", (Uint32) ev);
+ return ev;
+}
+
+typedef struct {
+ int used_fds;
+ int num_errors;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ int internal_fds;
+ ErtsPollEvents *epep;
+#endif
+} IterDebugCounters;
+
+static void doit_erts_check_io_debug(void *vstate, void *vcounters)
+{
+ ErtsDrvEventState *state = (ErtsDrvEventState *) vstate;
+ IterDebugCounters *counters = (IterDebugCounters *) vcounters;
+ ErtsPollEvents cio_events = state->events;
+ ErtsSysFdType fd = state->fd;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ int internal = 0;
+ ErtsPollEvents ep_events = counters->epep[(int) fd];
+#endif
+ int err = 0;
+
+#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
+ struct stat stat_buf;
+#endif
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (state->events || ep_events) {
+ if (ep_events & ERTS_POLL_EV_NVAL) {
+ ep_events &= ~ERTS_POLL_EV_NVAL;
+ internal = 1;
+ counters->internal_fds++;
+ }
+ else
+ counters->used_fds++;
+#else
+ if (state->events) {
+ counters->used_fds++;
+#endif
+
+ erts_printf("fd=%d ", (int) fd);
+
+#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
+ if (fstat((int) fd, &stat_buf) < 0)
+ erts_printf("type=unknown ");
+ else {
+ erts_printf("type=");
+#ifdef S_ISSOCK
+ if (S_ISSOCK(stat_buf.st_mode))
+ erts_printf("sock ");
+ else
+#endif
+#ifdef S_ISFIFO
+ if (S_ISFIFO(stat_buf.st_mode))
+ erts_printf("fifo ");
+ else
+#endif
+#ifdef S_ISCHR
+ if (S_ISCHR(stat_buf.st_mode))
+ erts_printf("chr ");
+ else
+#endif
+#ifdef S_ISDIR
+ if (S_ISDIR(stat_buf.st_mode))
+ erts_printf("dir ");
+ else
+#endif
+#ifdef S_ISBLK
+ if (S_ISBLK(stat_buf.st_mode))
+ erts_printf("blk ");
+ else
+#endif
+#ifdef S_ISREG
+ if (S_ISREG(stat_buf.st_mode))
+ erts_printf("reg ");
+ else
+#endif
+#ifdef S_ISLNK
+ if (S_ISLNK(stat_buf.st_mode))
+ erts_printf("lnk ");
+ else
+#endif
+#ifdef S_ISDOOR
+ if (S_ISDOOR(stat_buf.st_mode))
+ erts_printf("door ");
+ else
+#endif
+#ifdef S_ISWHT
+ if (S_ISWHT(stat_buf.st_mode))
+ erts_printf("wht ");
+ else
+#endif
+#ifdef S_ISXATTR
+ if (S_ISXATTR(stat_buf.st_mode))
+ erts_printf("xattr ");
+ else
+#endif
+ erts_printf("unknown ");
+ }
+#else
+ erts_printf("type=unknown ");
+#endif
+
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ erts_printf("driver_select ");
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (internal) {
+ erts_printf("internal ");
+ err = 1;
+ }
+
+ if (cio_events == ep_events) {
+ erts_printf("ev=");
+ if (print_events(cio_events) != 0)
+ err = 1;
+ }
+ else {
+ err = 1;
+ erts_printf("cio_ev=");
+ print_events(cio_events);
+ erts_printf(" ep_ev=");
+ print_events(ep_events);
+ }
+#else
+ if (print_events(cio_events) != 0)
+ err = 1;
+#endif
+ erts_printf(" ");
+ if (cio_events & ERTS_POLL_EV_IN) {
+ Eterm id = state->driver.select->inport;
+ if (is_nil(id)) {
+ erts_printf("inport=none inname=none indrv=none ");
+ err = 1;
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(id);
+ erts_printf(" inport=%T inname=%s indrv=%s ",
+ id,
+ pnp->name ? pnp->name : "unknown",
+ (pnp->driver_name
+ ? pnp->driver_name
+ : "unknown"));
+ erts_free_port_names(pnp);
+ }
+ }
+ if (cio_events & ERTS_POLL_EV_OUT) {
+ Eterm id = state->driver.select->outport;
+ if (is_nil(id)) {
+ erts_printf("outport=none outname=none outdrv=none ");
+ err = 1;
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(id);
+ erts_printf(" outport=%T outname=%s outdrv=%s ",
+ id,
+ pnp->name ? pnp->name : "unknown",
+ (pnp->driver_name
+ ? pnp->driver_name
+ : "unknown"));
+ erts_free_port_names(pnp);
+ }
+ }
+ }
+ else if (state->type == ERTS_EV_TYPE_DRV_EV) {
+ Eterm id;
+ erts_printf("driver_event ");
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (internal) {
+ erts_printf("internal ");
+ err = 1;
+ }
+ if (cio_events == ep_events) {
+ erts_printf("ev=0x%b32x", (Uint32) cio_events);
+ }
+ else {
+ err = 1;
+ erts_printf("cio_ev=0x%b32x", (Uint32) cio_events);
+ erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events);
+ }
+#else
+ erts_printf("ev=0x%b32x", (Uint32) cio_events);
+#endif
+ id = state->driver.event->port;
+ if (is_nil(id)) {
+ erts_printf(" port=none name=none drv=none ");
+ err = 1;
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(id);
+ erts_printf(" port=%T name=%s drv=%s ",
+ id,
+ pnp->name ? pnp->name : "unknown",
+ (pnp->driver_name
+ ? pnp->driver_name
+ : "unknown"));
+ erts_free_port_names(pnp);
+ }
+ }
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ else if (internal) {
+ erts_printf("internal ");
+ if (cio_events) {
+ err = 1;
+ erts_printf("cio_ev=");
+ print_events(cio_events);
+ }
+ if (ep_events) {
+ erts_printf("ep_ev=");
+ print_events(ep_events);
+ }
+ }
+#endif
+ else {
+ err = 1;
+ erts_printf("control_type=%d ", (int)state->type);
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (cio_events == ep_events) {
+ erts_printf("ev=0x%b32x", (Uint32) cio_events);
+ }
+ else {
+ erts_printf("cio_ev=0x%b32x", (Uint32) cio_events);
+ erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events);
+ }
+#else
+ erts_printf("ev=0x%b32x", (Uint32) cio_events);
+#endif
+ }
+
+ if (err) {
+ counters->num_errors++;
+ erts_printf(" ERROR");
+ }
+ erts_printf("\n");
+ }
+}
+
+int
+ERTS_CIO_EXPORT(erts_check_io_debug)(void)
+{
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ int fd, len;
+#endif
+ IterDebugCounters counters;
+ ErtsDrvEventState null_des;
+
+ null_des.driver.select = NULL;
+ null_des.events = 0;
+ null_des.remove_cnt = 0;
+ null_des.type = ERTS_EV_TYPE_NONE;
+
+ erts_printf("--- fds in pollset --------------------------------------\n");
+
+#ifdef ERTS_SMP
+# ifdef ERTS_ENABLE_LOCK_CHECK
+ erts_lc_check_exact(NULL, 0); /* No locks should be locked */
+# endif
+ erts_block_system(0); /* stop the world to avoid messy locking */
+#endif
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds);
+ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds);
+ counters.internal_fds = 0;
+#endif
+ counters.used_fds = 0;
+ counters.num_errors = 0;
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ len = erts_smp_atomic_read(&drv_ev_state_len);
+ for (fd = 0; fd < len; fd++) {
+ doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters);
+ }
+ for ( ; fd < max_fds; fd++) {
+ null_des.fd = fd;
+ doit_erts_check_io_debug((void *) &null_des, (void *) &counters);
+ }
+#else
+ safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters);
+#endif
+
+#ifdef ERTS_SMP
+ erts_release_system();
+#endif
+
+ erts_printf("\n");
+ erts_printf("used fds=%d\n", counters.used_fds);
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ erts_printf("internal fds=%d\n", counters.internal_fds);
+#endif
+ erts_printf("---------------------------------------------------------\n");
+ fflush(stdout);
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ erts_free(ERTS_ALC_T_TMP, (void *) counters.epep);
+#endif
+ return counters.num_errors;
+}
+
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
new file mode 100644
index 0000000000..9b45a63913
--- /dev/null
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -0,0 +1,96 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Check I/O
+ *
+ * Author: Rickard Green
+ */
+
+#ifndef ERL_CHECK_IO_H__
+#define ERL_CHECK_IO_H__
+
+#include "erl_sys_driver.h"
+
+#ifdef ERTS_ENABLE_KERNEL_POLL
+
+int driver_select_kp(ErlDrvPort, ErlDrvEvent, int, int);
+int driver_select_nkp(ErlDrvPort, ErlDrvEvent, int, int);
+int driver_event_kp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
+int driver_event_nkp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
+Uint erts_check_io_size_kp(void);
+Uint erts_check_io_size_nkp(void);
+Eterm erts_check_io_info_kp(void *);
+Eterm erts_check_io_info_nkp(void *);
+int erts_check_io_max_files_kp(void);
+int erts_check_io_max_files_nkp(void);
+void erts_check_io_interrupt_kp(int);
+void erts_check_io_interrupt_nkp(int);
+void erts_check_io_interrupt_timed_kp(int, long);
+void erts_check_io_interrupt_timed_nkp(int, long);
+void erts_check_io_kp(int);
+void erts_check_io_nkp(int);
+void erts_init_check_io_kp(void);
+void erts_init_check_io_nkp(void);
+int erts_check_io_debug_kp(void);
+int erts_check_io_debug_nkp(void);
+
+#else /* !ERTS_ENABLE_KERNEL_POLL */
+
+Uint erts_check_io_size(void);
+Eterm erts_check_io_info(void *);
+int erts_check_io_max_files(void);
+void erts_check_io_interrupt(int);
+void erts_check_io_interrupt_timed(int, long);
+void erts_check_io(int);
+void erts_init_check_io(void);
+
+#endif
+
+#endif /* ERL_CHECK_IO_H__ */
+
+#if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__)
+#define ERL_CHECK_IO_INTERNAL__
+#endif
+
+#ifndef ERL_CHECK_IO_INTERNAL__
+#define ERL_CHECK_IO_INTERNAL__
+#include "erl_poll.h"
+#include "erl_port_task.h"
+
+/*
+ * ErtsDrvEventDataState is used by driver_event() which is almost never
+ * used. We allocate ErtsDrvEventDataState separate since we dont wan't
+ * the size of ErtsDrvEventState to increase due to driver_event()
+ * information.
+ */
+typedef struct {
+ Eterm port;
+ ErlDrvEventData data;
+ ErtsPollEvents removed_events;
+ ErtsPortTaskHandle task;
+} ErtsDrvEventDataState;
+
+typedef struct {
+ Eterm inport;
+ Eterm outport;
+ ErtsPortTaskHandle intask;
+ ErtsPortTaskHandle outtask;
+} ErtsDrvSelectDataState;
+#endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */
diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c
new file mode 100644
index 0000000000..f4e21bc05f
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mseg.c
@@ -0,0 +1,1452 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: A memory segment allocator. Segments that are deallocated
+ * are kept for a while in a segment "cache" before they are
+ * destroyed. When segments are allocated, cached segments
+ * are used if possible instead of creating new segments.
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_mseg.h"
+#include "global.h"
+#include "erl_threads.h"
+#include "erl_mtrace.h"
+#include "big.h"
+
+#if HAVE_ERTS_MSEG
+
+#if defined(USE_THREADS) && !defined(ERTS_SMP)
+# define ERTS_THREADS_NO_SMP
+#endif
+
+#define SEGTYPE ERTS_MTRACE_SEGMENT_ID
+
+#ifndef HAVE_GETPAGESIZE
+#define HAVE_GETPAGESIZE 0
+#endif
+
+#ifdef _SC_PAGESIZE
+# define GET_PAGE_SIZE sysconf(_SC_PAGESIZE)
+#elif HAVE_GETPAGESIZE
+# define GET_PAGE_SIZE getpagesize()
+#else
+# error "Page size unknown"
+ /* Implement some other way to get the real page size if needed! */
+#endif
+
+#define MAX_CACHE_SIZE 30
+
+#undef MIN
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#undef MAX
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+
+#undef PAGE_MASK
+#define INV_PAGE_MASK ((Uint) (page_size - 1))
+#define PAGE_MASK (~INV_PAGE_MASK)
+#define PAGE_FLOOR(X) ((X) & PAGE_MASK)
+#define PAGE_CEILING(X) PAGE_FLOOR((X) + INV_PAGE_MASK)
+#define PAGES(X) ((X) >> page_shift)
+
+static int atoms_initialized;
+
+static Uint cache_check_interval;
+
+static void check_cache(void *unused);
+static void mseg_clear_cache(void);
+static int is_cache_check_scheduled;
+#ifdef ERTS_THREADS_NO_SMP
+static int is_cache_check_requested;
+#endif
+
+#if HAVE_MMAP
+/* Mmap ... */
+
+#define MMAP_PROT (PROT_READ|PROT_WRITE)
+#ifdef MAP_ANON
+# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
+# define MMAP_FD (-1)
+#else
+# define MMAP_FLAGS (MAP_PRIVATE)
+# define MMAP_FD mmap_fd
+static int mmap_fd;
+#endif
+
+#if HAVE_MREMAP
+# define HAVE_MSEG_RECREATE 1
+#else
+# define HAVE_MSEG_RECREATE 0
+#endif
+
+#define CAN_PARTLY_DESTROY 1
+#else /* #if HAVE_MMAP */
+#define CAN_PARTLY_DESTROY 0
+#error "Not supported"
+#endif /* #if HAVE_MMAP */
+
+
+#if defined(ERTS_MSEG_FAKE_SEGMENTS)
+#undef CAN_PARTLY_DESTROY
+#define CAN_PARTLY_DESTROY 0
+#endif
+
+static const ErtsMsegOpt_t default_opt = ERTS_MSEG_DEFAULT_OPT_INITIALIZER;
+
+typedef struct cache_desc_t_ {
+ void *seg;
+ Uint size;
+ struct cache_desc_t_ *next;
+ struct cache_desc_t_ *prev;
+} cache_desc_t;
+
+typedef struct {
+ Uint32 giga_no;
+ Uint32 no;
+} CallCounter;
+
+static int is_init_done;
+static Uint page_size;
+static Uint page_shift;
+
+static struct {
+ CallCounter alloc;
+ CallCounter dealloc;
+ CallCounter realloc;
+ CallCounter create;
+ CallCounter destroy;
+#if HAVE_MSEG_RECREATE
+ CallCounter recreate;
+#endif
+ CallCounter clear_cache;
+ CallCounter check_cache;
+} calls;
+
+static cache_desc_t cache_descs[MAX_CACHE_SIZE];
+static cache_desc_t *free_cache_descs;
+static cache_desc_t *cache;
+static cache_desc_t *cache_end;
+static Uint cache_hits;
+static Uint cache_size;
+static Uint min_cached_seg_size;
+static Uint max_cached_seg_size;
+
+static Uint max_cache_size;
+static Uint abs_max_cache_bad_fit;
+static Uint rel_max_cache_bad_fit;
+
+#if CAN_PARTLY_DESTROY
+static Uint min_seg_size;
+#endif
+
+struct {
+ struct {
+ Uint watermark;
+ Uint no;
+ Uint sz;
+ } current;
+ struct {
+ Uint no;
+ Uint sz;
+ } max;
+ struct {
+ Uint no;
+ Uint sz;
+ } max_ever;
+} segments;
+
+#define ERTS_MSEG_ALLOC_STAT(SZ) \
+do { \
+ segments.current.no++; \
+ if (segments.max.no < segments.current.no) \
+ segments.max.no = segments.current.no; \
+ if (segments.current.watermark < segments.current.no) \
+ segments.current.watermark = segments.current.no; \
+ segments.current.sz += (SZ); \
+ if (segments.max.sz < segments.current.sz) \
+ segments.max.sz = segments.current.sz; \
+} while (0)
+
+#define ERTS_MSEG_DEALLOC_STAT(SZ) \
+do { \
+ ASSERT(segments.current.no > 0); \
+ segments.current.no--; \
+ ASSERT(segments.current.sz >= (SZ)); \
+ segments.current.sz -= (SZ); \
+} while (0)
+
+#define ERTS_MSEG_REALLOC_STAT(OSZ, NSZ) \
+do { \
+ ASSERT(segments.current.sz >= (OSZ)); \
+ segments.current.sz -= (OSZ); \
+ segments.current.sz += (NSZ); \
+} while (0)
+
+#define ONE_GIGA (1000000000)
+
+#define ZERO_CC(CC) (calls.CC.no = 0, calls.CC.giga_no = 0)
+
+#define INC_CC(CC) (calls.CC.no == ONE_GIGA - 1 \
+ ? (calls.CC.giga_no++, calls.CC.no = 0) \
+ : calls.CC.no++)
+
+#define DEC_CC(CC) (calls.CC.no == 0 \
+ ? (calls.CC.giga_no--, \
+ calls.CC.no = ONE_GIGA - 1) \
+ : calls.CC.no--)
+
+
+static erts_mtx_t mseg_mutex; /* Also needed when !USE_THREADS */
+static erts_mtx_t init_atoms_mutex; /* Also needed when !USE_THREADS */
+
+#ifdef USE_THREADS
+#ifdef ERTS_THREADS_NO_SMP
+static erts_tid_t main_tid;
+static int async_handle = -1;
+#endif
+
+static void thread_safe_init(void)
+{
+ erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
+ erts_mtx_init(&mseg_mutex, "mseg");
+#ifdef ERTS_THREADS_NO_SMP
+ main_tid = erts_thr_self();
+#endif
+}
+
+#endif
+
+static ErlTimer cache_check_timer;
+
+static ERTS_INLINE void
+schedule_cache_check(void)
+{
+ if (!is_cache_check_scheduled && is_init_done) {
+#ifdef ERTS_THREADS_NO_SMP
+ if (!erts_equal_tids(erts_thr_self(), main_tid)) {
+ if (!is_cache_check_requested) {
+ is_cache_check_requested = 1;
+ sys_async_ready(async_handle);
+ }
+ }
+ else
+#endif
+ {
+ cache_check_timer.active = 0;
+ erl_set_timer(&cache_check_timer,
+ check_cache,
+ NULL,
+ NULL,
+ cache_check_interval);
+ is_cache_check_scheduled = 1;
+#ifdef ERTS_THREADS_NO_SMP
+ is_cache_check_requested = 0;
+#endif
+ }
+ }
+}
+
+#ifdef ERTS_THREADS_NO_SMP
+
+static void
+check_schedule_cache_check(void)
+{
+ erts_mtx_lock(&mseg_mutex);
+ if (is_cache_check_requested
+ && !is_cache_check_scheduled) {
+ schedule_cache_check();
+ }
+ erts_mtx_unlock(&mseg_mutex);
+}
+
+#endif
+
+static void
+mseg_shutdown(void)
+{
+#ifdef ERTS_SMP
+ erts_mtx_lock(&mseg_mutex);
+#endif
+ mseg_clear_cache();
+#ifdef ERTS_SMP
+ erts_mtx_unlock(&mseg_mutex);
+#endif
+}
+
+static ERTS_INLINE void *
+mseg_create(Uint size)
+{
+ void *seg;
+
+ ASSERT(size % page_size == 0);
+
+#if defined(ERTS_MSEG_FAKE_SEGMENTS)
+ seg = erts_sys_alloc(ERTS_ALC_N_INVALID, NULL, size);
+#elif HAVE_MMAP
+ seg = (void *) mmap((void *) 0, (size_t) size,
+ MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0);
+ if (seg == (void *) MAP_FAILED)
+ seg = NULL;
+#else
+#error "Missing mseg_create() implementation"
+#endif
+
+ INC_CC(create);
+
+ return seg;
+}
+
+static ERTS_INLINE void
+mseg_destroy(void *seg, Uint size)
+{
+#if defined(ERTS_MSEG_FAKE_SEGMENTS)
+ erts_sys_free(ERTS_ALC_N_INVALID, NULL, seg);
+#elif HAVE_MMAP
+
+#ifdef DEBUG
+ int res =
+#endif
+
+ munmap((void *) seg, size);
+
+ ASSERT(size % page_size == 0);
+ ASSERT(res == 0);
+#else
+#error "Missing mseg_destroy() implementation"
+#endif
+
+ INC_CC(destroy);
+
+}
+
+#if HAVE_MSEG_RECREATE
+
+static ERTS_INLINE void *
+mseg_recreate(void *old_seg, Uint old_size, Uint new_size)
+{
+ void *new_seg;
+
+ ASSERT(old_size % page_size == 0);
+ ASSERT(new_size % page_size == 0);
+
+#if defined(ERTS_MSEG_FAKE_SEGMENTS)
+ new_seg = erts_sys_realloc(ERTS_ALC_N_INVALID, NULL, old_seg, new_size);
+#elif HAVE_MREMAP
+ new_seg = (void *) mremap((void *) old_seg,
+ (size_t) old_size,
+ (size_t) new_size,
+ MREMAP_MAYMOVE);
+ if (new_seg == (void *) MAP_FAILED)
+ new_seg = NULL;
+#else
+#error "Missing mseg_recreate() implementation"
+#endif
+
+ INC_CC(recreate);
+
+ return new_seg;
+}
+
+#endif /* #if HAVE_MSEG_RECREATE */
+
+
+static ERTS_INLINE cache_desc_t *
+alloc_cd(void)
+{
+ cache_desc_t *cd = free_cache_descs;
+ if (cd)
+ free_cache_descs = cd->next;
+ return cd;
+}
+
+static ERTS_INLINE void
+free_cd(cache_desc_t *cd)
+{
+ cd->next = free_cache_descs;
+ free_cache_descs = cd;
+}
+
+
+static ERTS_INLINE void
+link_cd(cache_desc_t *cd)
+{
+ if (cache)
+ cache->prev = cd;
+ cd->next = cache;
+ cd->prev = NULL;
+ cache = cd;
+
+ if (!cache_end) {
+ ASSERT(!cd->next);
+ cache_end = cd;
+ }
+
+ cache_size++;
+}
+
+static ERTS_INLINE void
+end_link_cd(cache_desc_t *cd)
+{
+ if (cache_end)
+ cache_end->next = cd;
+ cd->next = NULL;
+ cd->prev = cache_end;
+ cache_end = cd;
+
+ if (!cache) {
+ ASSERT(!cd->prev);
+ cache = cd;
+ }
+
+ cache_size++;
+}
+
+static ERTS_INLINE void
+unlink_cd(cache_desc_t *cd)
+{
+
+ if (cd->next)
+ cd->next->prev = cd->prev;
+ else
+ cache_end = cd->prev;
+
+ if (cd->prev)
+ cd->prev->next = cd->next;
+ else
+ cache = cd->next;
+ ASSERT(cache_size > 0);
+ cache_size--;
+}
+
+static ERTS_INLINE void
+check_cache_limits(void)
+{
+ cache_desc_t *cd;
+ max_cached_seg_size = 0;
+ min_cached_seg_size = ~((Uint) 0);
+ for (cd = cache; cd; cd = cd->next) {
+ if (cd->size < min_cached_seg_size)
+ min_cached_seg_size = cd->size;
+ if (cd->size > max_cached_seg_size)
+ max_cached_seg_size = cd->size;
+ }
+
+}
+
+static ERTS_INLINE void
+adjust_cache_size(int force_check_limits)
+{
+ cache_desc_t *cd;
+ int check_limits = force_check_limits;
+ Sint max_cached = ((Sint) segments.current.watermark
+ - (Sint) segments.current.no);
+
+ while (((Sint) cache_size) > max_cached && ((Sint) cache_size) > 0) {
+ ASSERT(cache_end);
+ cd = cache_end;
+ if (!check_limits &&
+ !(min_cached_seg_size < cd->size
+ && cd->size < max_cached_seg_size)) {
+ check_limits = 1;
+ }
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg);
+ mseg_destroy(cd->seg, cd->size);
+ unlink_cd(cd);
+ free_cd(cd);
+ }
+
+ if (check_limits)
+ check_cache_limits();
+
+}
+
+static void
+check_cache(void *unused)
+{
+#ifdef ERTS_SMP
+ erts_mtx_lock(&mseg_mutex);
+#endif
+
+ is_cache_check_scheduled = 0;
+
+ if (segments.current.watermark > segments.current.no)
+ segments.current.watermark--;
+ adjust_cache_size(0);
+
+ if (cache_size)
+ schedule_cache_check();
+
+ INC_CC(check_cache);
+
+#ifdef ERTS_SMP
+ erts_mtx_unlock(&mseg_mutex);
+#endif
+
+}
+
+static void
+mseg_clear_cache(void)
+{
+ segments.current.watermark = 0;
+
+ adjust_cache_size(1);
+
+ ASSERT(!cache);
+ ASSERT(!cache_end);
+ ASSERT(!cache_size);
+
+ segments.current.watermark = segments.current.no;
+
+ INC_CC(clear_cache);
+}
+
+static void *
+mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
+{
+
+ Uint max, min, diff_size, size;
+ cache_desc_t *cd, *cand_cd;
+ void *seg;
+
+ INC_CC(alloc);
+
+ size = PAGE_CEILING(*size_p);
+
+#if CAN_PARTLY_DESTROY
+ if (size < min_seg_size)
+ min_seg_size = size;
+#endif
+
+ if (!opt->cache) {
+ create_seg:
+ adjust_cache_size(0);
+ seg = mseg_create(size);
+ if (!seg) {
+ mseg_clear_cache();
+ seg = mseg_create(size);
+ if (!seg)
+ size = 0;
+ }
+
+ *size_p = size;
+ if (seg) {
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size);
+ ERTS_MSEG_ALLOC_STAT(size);
+ }
+ return seg;
+ }
+
+ if (size > max_cached_seg_size)
+ goto create_seg;
+
+ if (size < min_cached_seg_size) {
+
+ diff_size = min_cached_seg_size - size;
+
+ if (diff_size > abs_max_cache_bad_fit)
+ goto create_seg;
+
+ if (100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size))
+ goto create_seg;
+
+ }
+
+ max = 0;
+ min = ~((Uint) 0);
+ cand_cd = NULL;
+
+ for (cd = cache; cd; cd = cd->next) {
+ if (cd->size >= size) {
+ if (!cand_cd) {
+ cand_cd = cd;
+ continue;
+ }
+ else if (cd->size < cand_cd->size) {
+ if (max < cand_cd->size)
+ max = cand_cd->size;
+ if (min > cand_cd->size)
+ min = cand_cd->size;
+ cand_cd = cd;
+ continue;
+ }
+ }
+ if (max < cd->size)
+ max = cd->size;
+ if (min > cd->size)
+ min = cd->size;
+ }
+
+ min_cached_seg_size = min;
+ max_cached_seg_size = max;
+
+ if (!cand_cd)
+ goto create_seg;
+
+ diff_size = cand_cd->size - size;
+
+ if (diff_size > abs_max_cache_bad_fit
+ || 100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size)) {
+ if (max_cached_seg_size < cand_cd->size)
+ max_cached_seg_size = cand_cd->size;
+ if (min_cached_seg_size > cand_cd->size)
+ min_cached_seg_size = cand_cd->size;
+ goto create_seg;
+ }
+
+ cache_hits++;
+
+ size = cand_cd->size;
+ seg = cand_cd->seg;
+
+ unlink_cd(cand_cd);
+ free_cd(cand_cd);
+
+ *size_p = size;
+
+ if (erts_mtrace_enabled) {
+ erts_mtrace_crr_free(SEGTYPE, SEGTYPE, seg);
+ erts_mtrace_crr_alloc(seg, atype, SEGTYPE, size);
+ }
+
+ if (seg)
+ ERTS_MSEG_ALLOC_STAT(size);
+ return seg;
+}
+
+
+static void
+mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size,
+ const ErtsMsegOpt_t *opt)
+{
+ cache_desc_t *cd;
+
+ ERTS_MSEG_DEALLOC_STAT(size);
+
+ if (!opt->cache || max_cache_size == 0) {
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_free(atype, SEGTYPE, seg);
+ mseg_destroy(seg, size);
+ }
+ else {
+ int check_limits = 0;
+
+ if (size < min_cached_seg_size)
+ min_cached_seg_size = size;
+ if (size > max_cached_seg_size)
+ max_cached_seg_size = size;
+
+ if (!free_cache_descs) {
+ cd = cache_end;
+ if (!(min_cached_seg_size < cd->size
+ && cd->size < max_cached_seg_size)) {
+ check_limits = 1;
+ }
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg);
+ mseg_destroy(cd->seg, cd->size);
+ unlink_cd(cd);
+ free_cd(cd);
+ }
+
+ cd = alloc_cd();
+ ASSERT(cd);
+ cd->seg = seg;
+ cd->size = size;
+ link_cd(cd);
+
+ if (erts_mtrace_enabled) {
+ erts_mtrace_crr_free(atype, SEGTYPE, seg);
+ erts_mtrace_crr_alloc(seg, SEGTYPE, SEGTYPE, size);
+ }
+
+ /* ASSERT(segments.current.watermark >= segments.current.no + cache_size); */
+
+ if (check_limits)
+ check_cache_limits();
+
+ schedule_cache_check();
+
+ }
+
+ INC_CC(dealloc);
+}
+
+static void *
+mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
+ const ErtsMsegOpt_t *opt)
+{
+ void *new_seg;
+ Uint new_size;
+
+ if (!seg || !old_size) {
+ new_seg = mseg_alloc(atype, new_size_p, opt);
+ DEC_CC(alloc);
+ return new_seg;
+ }
+
+ if (!(*new_size_p)) {
+ mseg_dealloc(atype, seg, old_size, opt);
+ DEC_CC(dealloc);
+ return NULL;
+ }
+
+ new_seg = seg;
+ new_size = PAGE_CEILING(*new_size_p);
+
+ if (new_size == old_size)
+ ;
+ else if (new_size < old_size) {
+ Uint shrink_sz = old_size - new_size;
+
+#if CAN_PARTLY_DESTROY
+ if (new_size < min_seg_size)
+ min_seg_size = new_size;
+#endif
+
+ if (shrink_sz < opt->abs_shrink_th
+ && 100*PAGES(shrink_sz) < opt->rel_shrink_th*PAGES(old_size)) {
+ new_size = old_size;
+ }
+ else {
+
+#if CAN_PARTLY_DESTROY
+
+ if (shrink_sz > min_seg_size
+ && free_cache_descs
+ && opt->cache) {
+ cache_desc_t *cd;
+
+ cd = alloc_cd();
+ ASSERT(cd);
+ cd->seg = ((char *) seg) + new_size;
+ cd->size = shrink_sz;
+ end_link_cd(cd);
+
+ if (erts_mtrace_enabled) {
+ erts_mtrace_crr_realloc(new_seg,
+ atype,
+ SEGTYPE,
+ seg,
+ new_size);
+ erts_mtrace_crr_alloc(cd->seg, SEGTYPE, SEGTYPE, cd->size);
+ }
+ schedule_cache_check();
+ }
+ else {
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_realloc(new_seg,
+ atype,
+ SEGTYPE,
+ seg,
+ new_size);
+ mseg_destroy(((char *) seg) + new_size, shrink_sz);
+ }
+
+#elif HAVE_MSEG_RECREATE
+
+ goto do_recreate;
+
+#else
+
+ new_seg = mseg_alloc(atype, &new_size, opt);
+ if (!new_seg)
+ new_size = old_size;
+ else {
+ sys_memcpy(((char *) new_seg),
+ ((char *) seg),
+ MIN(new_size, old_size));
+ mseg_dealloc(atype, seg, old_size, opt);
+ }
+
+#endif
+
+ }
+ }
+ else {
+
+ if (!opt->preserv) {
+ mseg_dealloc(atype, seg, old_size, opt);
+ new_seg = mseg_alloc(atype, &new_size, opt);
+ }
+ else {
+#if HAVE_MSEG_RECREATE
+#if !CAN_PARTLY_DESTROY
+ do_recreate:
+#endif
+ new_seg = mseg_recreate((void *) seg, old_size, new_size);
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
+ if (!new_seg)
+ new_size = old_size;
+#else
+ new_seg = mseg_alloc(atype, &new_size, opt);
+ if (!new_seg)
+ new_size = old_size;
+ else {
+ sys_memcpy(((char *) new_seg),
+ ((char *) seg),
+ MIN(new_size, old_size));
+ mseg_dealloc(atype, seg, old_size, opt);
+ }
+#endif
+ }
+ }
+
+ INC_CC(realloc);
+
+ *new_size_p = new_size;
+
+ ERTS_MSEG_REALLOC_STAT(old_size, new_size);
+
+ return new_seg;
+}
+
+/* --- Info stuff ---------------------------------------------------------- */
+
+static struct {
+ Eterm version;
+
+ Eterm options;
+ Eterm amcbf;
+ Eterm rmcbf;
+ Eterm mcs;
+ Eterm cci;
+
+ Eterm status;
+ Eterm cached_segments;
+ Eterm cache_hits;
+ Eterm segments;
+ Eterm segments_size;
+ Eterm segments_watermark;
+
+
+ Eterm calls;
+ Eterm mseg_alloc;
+ Eterm mseg_dealloc;
+ Eterm mseg_realloc;
+ Eterm mseg_create;
+ Eterm mseg_destroy;
+#if HAVE_MSEG_RECREATE
+ Eterm mseg_recreate;
+#endif
+ Eterm mseg_clear_cache;
+ Eterm mseg_check_cache;
+
+#ifdef DEBUG
+ Eterm end_of_atoms;
+#endif
+} am;
+
+static void ERTS_INLINE atom_init(Eterm *atom, char *name)
+{
+ *atom = am_atom_put(name, strlen(name));
+}
+#define AM_INIT(AM) atom_init(&am.AM, #AM)
+
+static void
+init_atoms(void)
+{
+#ifdef DEBUG
+ Eterm *atom;
+#endif
+
+ erts_mtx_unlock(&mseg_mutex);
+ erts_mtx_lock(&init_atoms_mutex);
+
+ if (!atoms_initialized) {
+#ifdef DEBUG
+ for (atom = (Eterm *) &am; atom <= &am.end_of_atoms; atom++) {
+ *atom = THE_NON_VALUE;
+ }
+#endif
+
+ AM_INIT(version);
+
+ AM_INIT(options);
+ AM_INIT(amcbf);
+ AM_INIT(rmcbf);
+ AM_INIT(mcs);
+ AM_INIT(cci);
+
+ AM_INIT(status);
+ AM_INIT(cached_segments);
+ AM_INIT(cache_hits);
+ AM_INIT(segments);
+ AM_INIT(segments_size);
+ AM_INIT(segments_watermark);
+
+ AM_INIT(calls);
+ AM_INIT(mseg_alloc);
+ AM_INIT(mseg_dealloc);
+ AM_INIT(mseg_realloc);
+ AM_INIT(mseg_create);
+ AM_INIT(mseg_destroy);
+#if HAVE_MSEG_RECREATE
+ AM_INIT(mseg_recreate);
+#endif
+ AM_INIT(mseg_clear_cache);
+ AM_INIT(mseg_check_cache);
+
+#ifdef DEBUG
+ for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) {
+ ASSERT(*atom != THE_NON_VALUE);
+ }
+#endif
+ }
+
+ erts_mtx_lock(&mseg_mutex);
+ atoms_initialized = 1;
+ erts_mtx_unlock(&init_atoms_mutex);
+}
+
+
+#define bld_uint erts_bld_uint
+#define bld_cons erts_bld_cons
+#define bld_tuple erts_bld_tuple
+#define bld_string erts_bld_string
+#define bld_2tup_list erts_bld_2tup_list
+
+
+/*
+ * bld_unstable_uint() (instead of bld_uint()) is used when values may
+ * change between size check and actual build. This because a value
+ * that would fit a small when size check is done may need to be built
+ * as a big when the actual build is performed. Caller is required to
+ * HRelease after build.
+ */
+static ERTS_INLINE Eterm
+bld_unstable_uint(Uint **hpp, Uint *szp, Uint ui)
+{
+ Eterm res = THE_NON_VALUE;
+ if (szp)
+ *szp += BIG_UINT_HEAP_SIZE;
+ if (hpp) {
+ if (IS_USMALL(0, ui))
+ res = make_small(ui);
+ else {
+ res = uint_to_big(ui, *hpp);
+ *hpp += BIG_UINT_HEAP_SIZE;
+ }
+ }
+ return res;
+}
+
+static ERTS_INLINE void
+add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
+{
+ *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 2, el1, el2), *lp);
+}
+
+static ERTS_INLINE void
+add_3tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2, Eterm el3)
+{
+ *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 3, el1, el2, el3), *lp);
+}
+
+static ERTS_INLINE void
+add_4tup(Uint **hpp, Uint *szp, Eterm *lp,
+ Eterm el1, Eterm el2, Eterm el3, Eterm el4)
+{
+ *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 4, el1, el2, el3, el4), *lp);
+}
+
+static Eterm
+info_options(char *prefix,
+ int *print_to_p,
+ void *print_to_arg,
+ Uint **hpp,
+ Uint *szp)
+{
+ Eterm res = THE_NON_VALUE;
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+ erts_print(to, arg, "%samcbf: %bpu\n", prefix, abs_max_cache_bad_fit);
+ erts_print(to, arg, "%srmcbf: %bpu\n", prefix, rel_max_cache_bad_fit);
+ erts_print(to, arg, "%smcs: %bpu\n", prefix, max_cache_size);
+ erts_print(to, arg, "%scci: %bpu\n", prefix, cache_check_interval);
+ }
+
+ if (hpp || szp) {
+
+ if (!atoms_initialized)
+ init_atoms();
+
+ res = NIL;
+ add_2tup(hpp, szp, &res,
+ am.cci,
+ bld_uint(hpp, szp, cache_check_interval));
+ add_2tup(hpp, szp, &res,
+ am.mcs,
+ bld_uint(hpp, szp, max_cache_size));
+ add_2tup(hpp, szp, &res,
+ am.rmcbf,
+ bld_uint(hpp, szp, rel_max_cache_bad_fit));
+ add_2tup(hpp, szp, &res,
+ am.amcbf,
+ bld_uint(hpp, szp, abs_max_cache_bad_fit));
+
+ }
+
+ return res;
+}
+
+static Eterm
+info_calls(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+{
+ Eterm res = THE_NON_VALUE;
+
+ if (print_to_p) {
+
+#define PRINT_CC(TO, TOA, CC) \
+ if (calls.CC.giga_no == 0) \
+ erts_print(TO, TOA, "mseg_%s calls: %bpu\n", #CC, calls.CC.no); \
+ else \
+ erts_print(TO, TOA, "mseg_%s calls: %bpu%09bpu\n", #CC, \
+ calls.CC.giga_no, calls.CC.no)
+
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+
+ PRINT_CC(to, arg, alloc);
+ PRINT_CC(to, arg, dealloc);
+ PRINT_CC(to, arg, realloc);
+ PRINT_CC(to, arg, create);
+ PRINT_CC(to, arg, destroy);
+#if HAVE_MSEG_RECREATE
+ PRINT_CC(to, arg, recreate);
+#endif
+ PRINT_CC(to, arg, clear_cache);
+ PRINT_CC(to, arg, check_cache);
+
+#undef PRINT_CC
+
+ }
+
+ if (hpp || szp) {
+
+ res = NIL;
+
+ add_3tup(hpp, szp, &res,
+ am.mseg_check_cache,
+ bld_unstable_uint(hpp, szp, calls.check_cache.giga_no),
+ bld_unstable_uint(hpp, szp, calls.check_cache.no));
+ add_3tup(hpp, szp, &res,
+ am.mseg_clear_cache,
+ bld_unstable_uint(hpp, szp, calls.clear_cache.giga_no),
+ bld_unstable_uint(hpp, szp, calls.clear_cache.no));
+
+#if HAVE_MSEG_RECREATE
+ add_3tup(hpp, szp, &res,
+ am.mseg_recreate,
+ bld_unstable_uint(hpp, szp, calls.recreate.giga_no),
+ bld_unstable_uint(hpp, szp, calls.recreate.no));
+#endif
+ add_3tup(hpp, szp, &res,
+ am.mseg_destroy,
+ bld_unstable_uint(hpp, szp, calls.destroy.giga_no),
+ bld_unstable_uint(hpp, szp, calls.destroy.no));
+ add_3tup(hpp, szp, &res,
+ am.mseg_create,
+ bld_unstable_uint(hpp, szp, calls.create.giga_no),
+ bld_unstable_uint(hpp, szp, calls.create.no));
+
+
+ add_3tup(hpp, szp, &res,
+ am.mseg_realloc,
+ bld_unstable_uint(hpp, szp, calls.realloc.giga_no),
+ bld_unstable_uint(hpp, szp, calls.realloc.no));
+ add_3tup(hpp, szp, &res,
+ am.mseg_dealloc,
+ bld_unstable_uint(hpp, szp, calls.dealloc.giga_no),
+ bld_unstable_uint(hpp, szp, calls.dealloc.no));
+ add_3tup(hpp, szp, &res,
+ am.mseg_alloc,
+ bld_unstable_uint(hpp, szp, calls.alloc.giga_no),
+ bld_unstable_uint(hpp, szp, calls.alloc.no));
+ }
+
+ return res;
+}
+
+static Eterm
+info_status(int *print_to_p,
+ void *print_to_arg,
+ int begin_new_max_period,
+ Uint **hpp,
+ Uint *szp)
+{
+ Eterm res = THE_NON_VALUE;
+
+ if (segments.max_ever.no < segments.max.no)
+ segments.max_ever.no = segments.max.no;
+ if (segments.max_ever.sz < segments.max.sz)
+ segments.max_ever.sz = segments.max.sz;
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+
+ erts_print(to, arg, "cached_segments: %bpu\n", cache_size);
+ erts_print(to, arg, "cache_hits: %bpu\n", cache_hits);
+ erts_print(to, arg, "segments: %bpu %bpu %bpu\n",
+ segments.current.no, segments.max.no, segments.max_ever.no);
+ erts_print(to, arg, "segments_size: %bpu %bpu %bpu\n",
+ segments.current.sz, segments.max.sz, segments.max_ever.sz);
+ erts_print(to, arg, "segments_watermark: %bpu\n",
+ segments.current.watermark);
+ }
+
+ if (hpp || szp) {
+ res = NIL;
+ add_2tup(hpp, szp, &res,
+ am.segments_watermark,
+ bld_unstable_uint(hpp, szp, segments.current.watermark));
+ add_4tup(hpp, szp, &res,
+ am.segments_size,
+ bld_unstable_uint(hpp, szp, segments.current.sz),
+ bld_unstable_uint(hpp, szp, segments.max.sz),
+ bld_unstable_uint(hpp, szp, segments.max_ever.sz));
+ add_4tup(hpp, szp, &res,
+ am.segments,
+ bld_unstable_uint(hpp, szp, segments.current.no),
+ bld_unstable_uint(hpp, szp, segments.max.no),
+ bld_unstable_uint(hpp, szp, segments.max_ever.no));
+ add_2tup(hpp, szp, &res,
+ am.cache_hits,
+ bld_unstable_uint(hpp, szp, cache_hits));
+ add_2tup(hpp, szp, &res,
+ am.cached_segments,
+ bld_unstable_uint(hpp, szp, cache_size));
+
+ }
+
+ if (begin_new_max_period) {
+ segments.max.no = segments.current.no;
+ segments.max.sz = segments.current.sz;
+ }
+
+ return res;
+}
+
+static Eterm
+info_version(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+{
+ Eterm res = THE_NON_VALUE;
+
+ if (print_to_p) {
+ erts_print(*print_to_p, print_to_arg, "version: %s\n",
+ ERTS_MSEG_VSN_STR);
+ }
+
+ if (hpp || szp) {
+ res = bld_string(hpp, szp, ERTS_MSEG_VSN_STR);
+ }
+
+ return res;
+}
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Exported functions *
+\* */
+
+Eterm
+erts_mseg_info_options(int *print_to_p, void *print_to_arg,
+ Uint **hpp, Uint *szp)
+{
+ Eterm res;
+
+ erts_mtx_lock(&mseg_mutex);
+
+ res = info_options("option ", print_to_p, print_to_arg, hpp, szp);
+
+ erts_mtx_unlock(&mseg_mutex);
+
+ return res;
+}
+
+Eterm
+erts_mseg_info(int *print_to_p,
+ void *print_to_arg,
+ int begin_max_per,
+ Uint **hpp,
+ Uint *szp)
+{
+ Eterm res = THE_NON_VALUE;
+ Eterm atoms[4];
+ Eterm values[4];
+
+ erts_mtx_lock(&mseg_mutex);
+
+ if (hpp || szp) {
+
+ if (!atoms_initialized)
+ init_atoms();
+
+ atoms[0] = am.version;
+ atoms[1] = am.options;
+ atoms[2] = am.status;
+ atoms[3] = am.calls;
+ }
+
+ values[0] = info_version(print_to_p, print_to_arg, hpp, szp);
+ values[1] = info_options("option ", print_to_p, print_to_arg, hpp, szp);
+ values[2] = info_status(print_to_p, print_to_arg, begin_max_per, hpp, szp);
+ values[3] = info_calls(print_to_p, print_to_arg, hpp, szp);
+
+ if (hpp || szp)
+ res = bld_2tup_list(hpp, szp, 4, atoms, values);
+
+ erts_mtx_unlock(&mseg_mutex);
+
+ return res;
+}
+
+void *
+erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
+{
+ void *seg;
+ erts_mtx_lock(&mseg_mutex);
+ seg = mseg_alloc(atype, size_p, opt);
+ erts_mtx_unlock(&mseg_mutex);
+ return seg;
+}
+
+void *
+erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p)
+{
+ return erts_mseg_alloc_opt(atype, size_p, &default_opt);
+}
+
+void
+erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, Uint size,
+ const ErtsMsegOpt_t *opt)
+{
+ erts_mtx_lock(&mseg_mutex);
+ mseg_dealloc(atype, seg, size, opt);
+ erts_mtx_unlock(&mseg_mutex);
+}
+
+void
+erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size)
+{
+ erts_mseg_dealloc_opt(atype, seg, size, &default_opt);
+}
+
+void *
+erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg, Uint old_size,
+ Uint *new_size_p, const ErtsMsegOpt_t *opt)
+{
+ void *new_seg;
+ erts_mtx_lock(&mseg_mutex);
+ new_seg = mseg_realloc(atype, seg, old_size, new_size_p, opt);
+ erts_mtx_unlock(&mseg_mutex);
+ return new_seg;
+}
+
+void *
+erts_mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size,
+ Uint *new_size_p)
+{
+ return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p, &default_opt);
+}
+
+void
+erts_mseg_clear_cache(void)
+{
+ erts_mtx_lock(&mseg_mutex);
+ mseg_clear_cache();
+ erts_mtx_unlock(&mseg_mutex);
+}
+
+Uint
+erts_mseg_no(void)
+{
+ Uint n;
+ erts_mtx_lock(&mseg_mutex);
+ n = segments.current.no;
+ erts_mtx_unlock(&mseg_mutex);
+ return n;
+}
+
+Uint
+erts_mseg_unit_size(void)
+{
+ return page_size;
+}
+
+void
+erts_mseg_init(ErtsMsegInit_t *init)
+{
+ unsigned i;
+
+ atoms_initialized = 0;
+ is_init_done = 0;
+
+ /* Options ... */
+
+ abs_max_cache_bad_fit = init->amcbf;
+ rel_max_cache_bad_fit = init->rmcbf;
+ max_cache_size = init->mcs;
+ cache_check_interval = init->cci;
+
+ /* */
+
+#ifdef USE_THREADS
+ thread_safe_init();
+#endif
+
+#if HAVE_MMAP && !defined(MAP_ANON)
+ mmap_fd = open("/dev/zero", O_RDWR);
+ if (mmap_fd < 0)
+ erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n");
+#endif
+
+ page_size = GET_PAGE_SIZE;
+
+ page_shift = 1;
+ while ((page_size >> page_shift) != 1) {
+ if ((page_size & (1 << (page_shift - 1))) != 0)
+ erl_exit(ERTS_ABORT_EXIT,
+ "erts_mseg: Unexpected page_size %bpu\n", page_size);
+ page_shift++;
+ }
+
+ sys_memzero((void *) &calls, sizeof(calls));
+
+#if CAN_PARTLY_DESTROY
+ min_seg_size = ~((Uint) 0);
+#endif
+
+ cache = NULL;
+ cache_end = NULL;
+ cache_hits = 0;
+ max_cached_seg_size = 0;
+ min_cached_seg_size = ~((Uint) 0);
+ cache_size = 0;
+
+ is_cache_check_scheduled = 0;
+#ifdef ERTS_THREADS_NO_SMP
+ is_cache_check_requested = 0;
+#endif
+
+ if (max_cache_size > MAX_CACHE_SIZE)
+ max_cache_size = MAX_CACHE_SIZE;
+
+ if (max_cache_size > 0) {
+ for (i = 0; i < max_cache_size - 1; i++)
+ cache_descs[i].next = &cache_descs[i + 1];
+ cache_descs[max_cache_size - 1].next = NULL;
+ free_cache_descs = &cache_descs[0];
+ }
+ else
+ free_cache_descs = NULL;
+
+ segments.current.watermark = 0;
+ segments.current.no = 0;
+ segments.current.sz = 0;
+ segments.max.no = 0;
+ segments.max.sz = 0;
+ segments.max_ever.no = 0;
+ segments.max_ever.sz = 0;
+}
+
+
+/*
+ * erts_mseg_late_init() have to be called after all allocators,
+ * threads and timers have been initialized.
+ */
+void
+erts_mseg_late_init(void)
+{
+#ifdef ERTS_THREADS_NO_SMP
+ int handle =
+ erts_register_async_ready_callback(
+ check_schedule_cache_check);
+#endif
+ erts_mtx_lock(&mseg_mutex);
+ is_init_done = 1;
+#ifdef ERTS_THREADS_NO_SMP
+ async_handle = handle;
+#endif
+ if (cache_size)
+ schedule_cache_check();
+ erts_mtx_unlock(&mseg_mutex);
+}
+
+void
+erts_mseg_exit(void)
+{
+ mseg_shutdown();
+}
+
+#endif /* #if HAVE_ERTS_MSEG */
+
+unsigned long
+erts_mseg_test(unsigned long op,
+ unsigned long a1,
+ unsigned long a2,
+ unsigned long a3)
+{
+ switch (op) {
+#if HAVE_ERTS_MSEG
+ case 0x400: /* Have erts_mseg */
+ return (unsigned long) 1;
+ case 0x401:
+ return (unsigned long) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1);
+ case 0x402:
+ erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2);
+ return (unsigned long) 0;
+ case 0x403:
+ return (unsigned long) erts_mseg_realloc(ERTS_ALC_A_INVALID,
+ (void *) a1,
+ (Uint) a2,
+ (Uint *) a3);
+ case 0x404:
+ erts_mseg_clear_cache();
+ return (unsigned long) 0;
+ case 0x405:
+ return (unsigned long) erts_mseg_no();
+ case 0x406: {
+ unsigned long res;
+ erts_mtx_lock(&mseg_mutex);
+ res = (unsigned long) cache_size;
+ erts_mtx_unlock(&mseg_mutex);
+ return res;
+ }
+#else /* #if HAVE_ERTS_MSEG */
+ case 0x400: /* Have erts_mseg */
+ return (unsigned long) 0;
+#endif /* #if HAVE_ERTS_MSEG */
+ default: ASSERT(0); return ~((unsigned long) 0);
+ }
+
+}
+
+
diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h
new file mode 100644
index 0000000000..1c5aa63e90
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mseg.h
@@ -0,0 +1,97 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_MSEG_H_
+#define ERL_MSEG_H_
+
+#include "sys.h"
+#include "erl_alloc_types.h"
+
+#ifndef HAVE_MMAP
+# define HAVE_MMAP 0
+#endif
+#ifndef HAVE_MREMAP
+# define HAVE_MREMAP 0
+#endif
+
+#if HAVE_MMAP
+# define HAVE_ERTS_MSEG 1
+#else
+# define HAVE_ERTS_MSEG 0
+#endif
+
+#if HAVE_ERTS_MSEG
+
+#define ERTS_MSEG_VSN_STR "0.9"
+
+typedef struct {
+ Uint amcbf;
+ Uint rmcbf;
+ Uint mcs;
+ Uint cci;
+} ErtsMsegInit_t;
+
+#define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \
+{ \
+ 4*1024*1024, /* amcbf: Absolute max cache bad fit */ \
+ 20, /* rmcbf: Relative max cache bad fit */ \
+ 5, /* mcs: Max cache size */ \
+ 1000 /* cci: Cache check interval */ \
+}
+
+typedef struct {
+ int cache;
+ int preserv;
+ Uint abs_shrink_th;
+ Uint rel_shrink_th;
+} ErtsMsegOpt_t;
+
+#define ERTS_MSEG_DEFAULT_OPT_INITIALIZER \
+{ \
+ 1, /* Use cache */ \
+ 1, /* Preserv data */ \
+ 0, /* Absolute shrink threshold */ \
+ 0 /* Relative shrink threshold */ \
+}
+
+void *erts_mseg_alloc(ErtsAlcType_t, Uint *);
+void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, const ErtsMsegOpt_t *);
+void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint);
+void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, const ErtsMsegOpt_t *);
+void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *);
+void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *,
+ const ErtsMsegOpt_t *);
+void erts_mseg_clear_cache(void);
+Uint erts_mseg_no(void);
+Uint erts_mseg_unit_size(void);
+void erts_mseg_init(ErtsMsegInit_t *init);
+void erts_mseg_late_init(void); /* Have to be called after all allocators,
+ threads and timers have been initialized. */
+void erts_mseg_exit(void);
+Eterm erts_mseg_info_options(int *, void*, Uint **, Uint *);
+Eterm erts_mseg_info(int *, void*, int, Uint **, Uint *);
+
+#endif /* #if HAVE_ERTS_MSEG */
+
+unsigned long erts_mseg_test(unsigned long,
+ unsigned long,
+ unsigned long,
+ unsigned long);
+
+#endif /* #ifndef ERL_MSEG_H_ */
diff --git a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
new file mode 100644
index 0000000000..408aa7e016
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
@@ -0,0 +1,245 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2004-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "sys.h"
+#include "erl_mtrace.h"
+
+#ifdef ERTS_CAN_TRACK_MALLOC
+#if defined(HAVE_END_SYMBOL)
+extern char end;
+#elif defined(HAVE__END_SYMBOL)
+extern char _end;
+#endif
+
+static int inited = 0;
+static int init(void);
+
+static volatile char *heap_start = NULL;
+static volatile char *heap_end = NULL;
+
+#if defined(ERTS___AFTER_MORECORE_HOOK_CAN_TRACK_MALLOC) /* ----------------- */
+
+#ifdef HAVE_MALLOC_H
+# include <malloc.h>
+#endif
+
+#undef SBRK_0
+#define SBRK_0 sbrk(0)
+
+static void
+init_hook(void)
+{
+ __after_morecore_hook = erts_mtrace_update_heap_size;
+ if (inited)
+ return;
+ heap_end = NULL;
+#if defined(HAVE_END_SYMBOL)
+ heap_start = &end;
+#elif defined(HAVE__END_SYMBOL)
+ heap_start = &_end;
+#else
+ heap_start = SBRK_0;
+ if (heap_start == (SBRK_RET_TYPE) -1) {
+ heap_start = NULL;
+ return;
+ }
+#endif
+ inited = 1;
+}
+
+static int
+init(void)
+{
+ init_hook();
+ return inited;
+}
+
+void (*__malloc_initialize_hook)(void) = init_hook;
+
+#elif defined(ERTS_BRK_WRAPPERS_CAN_TRACK_MALLOC) /* ------------------------ */
+#ifdef HAVE_DLFCN_H
+# include <dlfcn.h>
+#endif
+
+#undef SBRK_0
+#define SBRK_0 (*real_sbrk)(0)
+
+#ifndef HAVE_SBRK
+# error no sbrk()
+#endif
+#if !defined(HAVE_END_SYMBOL) && !defined(HAVE__END_SYMBOL)
+# error no 'end' nor '_end'
+#endif
+
+static void update_heap_size(char *new_end);
+
+#define SBRK_IMPL(RET_TYPE, FUNC, ARG_TYPE) \
+RET_TYPE FUNC (ARG_TYPE); \
+static RET_TYPE (*real_ ## FUNC)(ARG_TYPE) = NULL; \
+RET_TYPE FUNC (ARG_TYPE arg) \
+{ \
+ RET_TYPE res; \
+ if (!inited && !init()) \
+ return (RET_TYPE) -1; \
+ res = (*real_ ## FUNC)(arg); \
+ if (erts_mtrace_enabled && res != ((RET_TYPE) -1)) \
+ update_heap_size((char *) (*real_ ## FUNC)(0)); \
+ return res; \
+}
+
+#define BRK_IMPL(RET_TYPE, FUNC, ARG_TYPE) \
+RET_TYPE FUNC (ARG_TYPE); \
+static RET_TYPE (*real_ ## FUNC)(ARG_TYPE) = NULL; \
+RET_TYPE FUNC (ARG_TYPE arg) \
+{ \
+ RET_TYPE res; \
+ if (!inited && !init()) \
+ return (RET_TYPE) -1; \
+ res = (*real_ ## FUNC)(arg); \
+ if (erts_mtrace_enabled && res != ((RET_TYPE) -1)) \
+ update_heap_size((char *) arg); \
+ return res; \
+}
+
+SBRK_IMPL(SBRK_RET_TYPE, sbrk, SBRK_ARG_TYPE)
+#ifdef HAVE_BRK
+ BRK_IMPL(BRK_RET_TYPE, brk, BRK_ARG_TYPE)
+#endif
+
+#ifdef HAVE__SBRK
+ SBRK_IMPL(SBRK_RET_TYPE, _sbrk, SBRK_ARG_TYPE)
+#endif
+#ifdef HAVE__BRK
+ BRK_IMPL(BRK_RET_TYPE, _brk, BRK_ARG_TYPE)
+#endif
+
+#ifdef HAVE___SBRK
+ SBRK_IMPL(SBRK_RET_TYPE, __sbrk, SBRK_ARG_TYPE)
+#endif
+#ifdef HAVE___BRK
+ BRK_IMPL(BRK_RET_TYPE, __brk, BRK_ARG_TYPE)
+#endif
+
+static int
+init(void)
+{
+ if (inited)
+ return 1;
+
+#define INIT_XBRK_SYM(SYM) \
+do { \
+ if (!real_ ## SYM) { \
+ real_ ## SYM = dlsym(RTLD_NEXT, #SYM); \
+ if (!real_ ## SYM) { \
+ errno = ENOMEM; \
+ return 0; \
+ } \
+ } \
+} while (0)
+
+ heap_end = NULL;
+#if defined(HAVE_END_SYMBOL)
+ heap_start = &end;
+#elif defined(HAVE__END_SYMBOL)
+ heap_start = &_end;
+#endif
+
+ INIT_XBRK_SYM(sbrk);
+#ifdef HAVE_BRK
+ INIT_XBRK_SYM(brk);
+#endif
+#ifdef HAVE__SBRK
+ INIT_XBRK_SYM(_sbrk);
+#endif
+#ifdef HAVE__BRK
+ INIT_XBRK_SYM(_brk);
+#endif
+#ifdef HAVE___SBRK
+ INIT_XBRK_SYM(__sbrk);
+#endif
+#ifdef HAVE___BRK
+ INIT_XBRK_SYM(__brk);
+#endif
+
+ return inited = 1;
+#undef INIT_XBRK_SYM
+}
+
+#endif /* #elif defined(ERTS_BRK_WRAPPERS_CAN_TRACK_MALLOC) */ /* ----------- */
+
+static void
+update_heap_size(char *new_end)
+{
+ volatile char *new_start, *old_start, *old_end;
+ Uint size;
+
+ if (new_end == ((char *) -1))
+ return;
+
+ new_start = (old_start = heap_start);
+ old_end = heap_end;
+ heap_end = new_end;
+ if (new_end < old_start || !old_start)
+ heap_start = (new_start = new_end);
+
+ size = (Uint) (new_end - new_start);
+
+ if (!old_end) {
+ if (size)
+ erts_mtrace_crr_alloc((void *) new_start,
+ ERTS_ALC_A_SYSTEM,
+ ERTS_MTRACE_SEGMENT_ID,
+ size);
+ else
+ heap_end = NULL;
+ }
+ else {
+ if (old_end != new_end || old_start != new_start) {
+
+ if (size)
+ erts_mtrace_crr_realloc((void *) new_start,
+ ERTS_ALC_A_SYSTEM,
+ ERTS_MTRACE_SEGMENT_ID,
+ (void *) old_start,
+ size);
+ else {
+ if (old_start)
+ erts_mtrace_crr_free(ERTS_ALC_A_SYSTEM,
+ ERTS_MTRACE_SEGMENT_ID,
+ (void *) old_start);
+ heap_end = NULL;
+ }
+ }
+ }
+}
+
+#endif /* #ifdef ERTS_CAN_TRACK_MALLOC */
+
+void
+erts_mtrace_update_heap_size(void)
+{
+#ifdef ERTS_CAN_TRACK_MALLOC
+ if (erts_mtrace_enabled && (inited || init()))
+ update_heap_size((char *) SBRK_0);
+#endif
+}
+
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
new file mode 100644
index 0000000000..169d4579a2
--- /dev/null
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -0,0 +1,2693 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Poll interface suitable for ERTS with or without
+ * SMP support.
+ *
+ * The interface is currently implemented using:
+ * - select
+ * - poll
+ * - /dev/poll
+ * - epoll with poll or select as fallback
+ * - kqueue with poll or select as fallback
+ *
+ * Some time in the future it will also be
+ * implemented using Solaris ports.
+ *
+ *
+ *
+ * Author: Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifndef WANT_NONBLOCKING
+# define WANT_NONBLOCKING
+#endif
+#define ERTS_WANT_GOT_SIGUSR1
+
+#include "erl_poll.h"
+#if ERTS_POLL_USE_KQUEUE
+# include <sys/types.h>
+# include <sys/event.h>
+# include <sys/time.h>
+#endif
+#if ERTS_POLL_USE_SELECT
+# ifdef SYS_SELECT_H
+# include <sys/select.h>
+# endif
+# ifdef VXWORKS
+# include <selectLib.h>
+# endif
+#endif
+#ifndef VXWORKS
+# ifdef NO_SYSCONF
+# if ERTS_POLL_USE_SELECT
+# include <sys/param.h>
+# else
+# include <limits.h>
+# endif
+# endif
+#endif
+#include "erl_driver.h"
+#include "erl_alloc.h"
+
+#if !defined(ERTS_POLL_USE_EPOLL) \
+ && !defined(ERTS_POLL_USE_DEVPOLL) \
+ && !defined(ERTS_POLL_USE_POLL) \
+ && !defined(ERTS_POLL_USE_SELECT)
+#error "Missing implementation of erts_poll()"
+#endif
+
+#if defined(ERTS_KERNEL_POLL_VERSION) && !ERTS_POLL_USE_KERNEL_POLL
+#error "Missing kernel poll implementation of erts_poll()"
+#endif
+
+#if defined(ERTS_NO_KERNEL_POLL_VERSION) && ERTS_POLL_USE_KERNEL_POLL
+#error "Kernel poll used when it shouldn't be used"
+#endif
+
+#if 0
+#define ERTS_POLL_DEBUG_PRINT
+#endif
+
+#if defined(DEBUG) && 0
+#define HARD_DEBUG
+#endif
+
+#define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \
+ || ERTS_POLL_USE_KQUEUE)
+#define ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE \
+ (defined(ERTS_SMP) || ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_POLL)
+
+#define ERTS_POLL_USE_CONCURRENT_UPDATE \
+ (defined(ERTS_SMP) && ERTS_POLL_USE_EPOLL)
+
+#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+
+#define FDS_STATUS_EXTRA_FREE_SIZE 128
+#define POLL_FDS_EXTRA_FREE_SIZE 128
+
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 1
+#else
+# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 0
+#endif
+
+#define ERTS_POLL_USE_WAKEUP_PIPE \
+ (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP))
+
+#ifdef ERTS_SMP
+
+#define ERTS_POLLSET_LOCK(PS) \
+ erts_smp_mtx_lock(&(PS)->mtx)
+#define ERTS_POLLSET_UNLOCK(PS) \
+ erts_smp_mtx_unlock(&(PS)->mtx)
+
+#define ERTS_POLLSET_SET_POLLED_CHK(PS) \
+ ((int) erts_smp_atomic_xchg(&(PS)->polled, (long) 1))
+#define ERTS_POLLSET_UNSET_POLLED(PS) \
+ erts_smp_atomic_set(&(PS)->polled, (long) 0)
+#define ERTS_POLLSET_IS_POLLED(PS) \
+ ((int) erts_smp_atomic_read(&(PS)->polled))
+
+#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) \
+ ((int) erts_smp_atomic_xchg(&(PS)->woken, (long) 1))
+#define ERTS_POLLSET_SET_POLLER_WOKEN(PS) \
+ erts_smp_atomic_set(&(PS)->woken, (long) 1)
+#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS) \
+ erts_smp_atomic_set(&(PS)->woken, (long) 0)
+#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) \
+ ((int) erts_smp_atomic_read(&(PS)->woken))
+
+#else
+
+#define ERTS_POLLSET_LOCK(PS)
+#define ERTS_POLLSET_UNLOCK(PS)
+#define ERTS_POLLSET_SET_POLLED_CHK(PS) 0
+#define ERTS_POLLSET_UNSET_POLLED(PS)
+#define ERTS_POLLSET_IS_POLLED(PS) 0
+
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+
+/*
+ * Ideally, the ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) operation would
+ * be atomic. This operation isn't, but we will do okay anyway. The
+ * "woken check" is only an optimization. The only requirement we have:
+ * If (PS)->woken is set to a value != 0 when interrupting, we have to
+ * write on the the wakeup pipe at least once. Multiple writes are okay.
+ */
+#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) ((PS)->woken++)
+#define ERTS_POLLSET_SET_POLLER_WOKEN(PS) ((PS)->woken = 1, (void) 0)
+#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS) ((PS)->woken = 0, (void) 0)
+#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) ((PS)->woken)
+
+#else
+
+#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) 1
+#define ERTS_POLLSET_SET_POLLER_WOKEN(PS)
+#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS)
+#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) 1
+
+#endif
+
+#endif
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \
+ erts_smp_atomic_set(&(PS)->have_update_requests, (long) 1)
+#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \
+ erts_smp_atomic_set(&(PS)->have_update_requests, (long) 0)
+#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \
+ ((int) erts_smp_atomic_read(&(PS)->have_update_requests))
+#else
+#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS)
+#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS)
+#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) 0
+#endif
+
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP)
+
+#define ERTS_POLLSET_UNSET_INTERRUPTED_CHK(PS) unset_interrupted_chk((PS))
+#define ERTS_POLLSET_UNSET_INTERRUPTED(PS) ((PS)->interrupt = 0, (void) 0)
+#define ERTS_POLLSET_SET_INTERRUPTED(PS) ((PS)->interrupt = 1, (void) 0)
+#define ERTS_POLLSET_IS_INTERRUPTED(PS) ((PS)->interrupt)
+
+#else
+
+#define ERTS_POLLSET_UNSET_INTERRUPTED_CHK(PS) \
+ ((int) erts_smp_atomic_xchg(&(PS)->interrupt, (long) 0))
+#define ERTS_POLLSET_UNSET_INTERRUPTED(PS) \
+ erts_smp_atomic_set(&(PS)->interrupt, (long) 0)
+#define ERTS_POLLSET_SET_INTERRUPTED(PS) \
+ erts_smp_atomic_set(&(PS)->interrupt, (long) 1)
+#define ERTS_POLLSET_IS_INTERRUPTED(PS) \
+ ((int) erts_smp_atomic_read(&(PS)->interrupt))
+
+#endif
+
+#if ERTS_POLL_USE_FALLBACK
+# if ERTS_POLL_USE_POLL
+# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_poll_fds > 1)
+# elif ERTS_POLL_USE_SELECT
+# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_select_fds > 1)
+# endif
+#endif
+/*
+ * --- Data types ------------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#define ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE 128
+
+typedef struct ErtsPollSetUpdateRequestsBlock_ ErtsPollSetUpdateRequestsBlock;
+struct ErtsPollSetUpdateRequestsBlock_ {
+ ErtsPollSetUpdateRequestsBlock *next;
+ int len;
+ int fds[ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE];
+};
+
+#endif
+
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+# define ERTS_POLL_FD_FLG_INURQ (((unsigned short) 1) << 0)
+#endif
+#if ERTS_POLL_USE_FALLBACK
+# define ERTS_POLL_FD_FLG_INFLBCK (((unsigned short) 1) << 1)
+# define ERTS_POLL_FD_FLG_USEFLBCK (((unsigned short) 1) << 2)
+#endif
+#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
+# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 3)
+#endif
+typedef struct {
+#if ERTS_POLL_USE_POLL
+ int pix;
+#endif
+ ErtsPollEvents used_events;
+ ErtsPollEvents events;
+#if ERTS_POLL_COALESCE_KP_RES
+ unsigned short res_ev_ix;
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK
+ unsigned short flags;
+#endif
+
+} ErtsFdStatus;
+
+
+#if ERTS_POLL_COALESCE_KP_RES
+/* res_ev_ix max value */
+#define ERTS_POLL_MAX_RES ((1 << sizeof(unsigned short)*8) - 1)
+#endif
+
+#if ERTS_POLL_USE_KQUEUE
+
+#define ERTS_POLL_KQ_OP_HANDLED 1
+#define ERTS_POLL_KQ_OP_DEL_R 2
+#define ERTS_POLL_KQ_OP_DEL_W 3
+#define ERTS_POLL_KQ_OP_ADD_R 4
+#define ERTS_POLL_KQ_OP_ADD_W 5
+#define ERTS_POLL_KQ_OP_ADD2_R 6
+#define ERTS_POLL_KQ_OP_ADD2_W 7
+
+#endif
+
+struct ErtsPollSet_ {
+ ErtsPollSet next;
+ int internal_fd_limit;
+ ErtsFdStatus *fds_status;
+ int no_of_user_fds;
+ int fds_status_len;
+#if ERTS_POLL_USE_KERNEL_POLL
+ int kp_fd;
+ int res_events_len;
+#if ERTS_POLL_USE_EPOLL
+ struct epoll_event *res_events;
+#elif ERTS_POLL_USE_KQUEUE
+ struct kevent *res_events;
+#elif ERTS_POLL_USE_DEVPOLL
+ struct pollfd *res_events;
+#endif
+#endif /* ERTS_POLL_USE_KERNEL_POLL */
+#if ERTS_POLL_USE_POLL
+ int next_poll_fds_ix;
+ int no_poll_fds;
+ int poll_fds_len;
+ struct pollfd*poll_fds;
+#elif ERTS_POLL_USE_SELECT
+ int next_sel_fd;
+ int max_fd;
+#if ERTS_POLL_USE_FALLBACK
+ int no_select_fds;
+#endif
+ fd_set input_fds;
+ fd_set res_input_fds;
+ fd_set output_fds;
+ fd_set res_output_fds;
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ ErtsPollSetUpdateRequestsBlock update_requests;
+ ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
+ erts_smp_atomic_t have_update_requests;
+#endif
+#ifdef ERTS_SMP
+ erts_smp_atomic_t polled;
+ erts_smp_atomic_t woken;
+ erts_smp_mtx_t mtx;
+#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+ volatile int woken;
+#endif
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ int wake_fds[2];
+#endif
+#if ERTS_POLL_USE_FALLBACK
+ int fallback_used;
+#endif
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP)
+ volatile int interrupt;
+#else
+ erts_smp_atomic_t interrupt;
+#endif
+ erts_smp_atomic_t timeout;
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ erts_smp_atomic_t no_avoided_wakeups;
+ erts_smp_atomic_t no_avoided_interrupts;
+ erts_smp_atomic_t no_interrupt_timed;
+#endif
+};
+
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP)
+
+static ERTS_INLINE int
+unset_interrupted_chk(ErtsPollSet ps)
+{
+ /* This operation isn't atomic, but we have no need at all for an
+ atomic operation here... */
+ int res = ps->interrupt;
+ ps->interrupt = 0;
+ return res;
+}
+
+#endif
+
+static void fatal_error(char *format, ...);
+static void fatal_error_async_signal_safe(char *error_str);
+
+static int max_fds = -1;
+static ErtsPollSet pollsets;
+static erts_smp_spinlock_t pollsets_lock;
+
+#if ERTS_POLL_USE_POLL
+
+static ERTS_INLINE short
+ev2pollev(ErtsPollEvents ev)
+{
+#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE
+ return ERTS_POLL_EV_E2N(ev);
+#else /* Note, we only map events we are interested in */
+ short res_ev = (short) 0;
+ if (ev & ERTS_POLL_EV_IN)
+ res_ev |= ERTS_POLL_EV_NKP_IN;
+ if (ev & ERTS_POLL_EV_OUT)
+ res_ev |= ERTS_POLL_EV_NKP_OUT;
+ return res_ev;
+#endif
+}
+
+static ERTS_INLINE ErtsPollEvents
+pollev2ev(short ev)
+{
+#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE
+ return ERTS_POLL_EV_N2E(ev);
+#else /* Note, we only map events we are interested in */
+ ErtsPollEvents res_ev = (ErtsPollEvents) 0;
+ if (ev & ERTS_POLL_EV_NKP_IN)
+ res_ev |= ERTS_POLL_EV_IN;
+ if (ev & ERTS_POLL_EV_NKP_OUT)
+ res_ev |= ERTS_POLL_EV_OUT;
+ if (ev & ERTS_POLL_EV_NKP_ERR)
+ res_ev |= ERTS_POLL_EV_ERR;
+ if (ev & ERTS_POLL_EV_NKP_NVAL)
+ res_ev |= ERTS_POLL_EV_NVAL;
+ return res_ev;
+#endif
+}
+
+#endif
+
+#ifdef HARD_DEBUG
+static void check_poll_result(ErtsPollResFd pr[], int len);
+#if ERTS_POLL_USE_DEVPOLL
+static void check_poll_status(ErtsPollSet ps);
+#endif /* ERTS_POLL_USE_DEVPOLL */
+#endif /* HARD_DEBUG */
+#ifdef ERTS_POLL_DEBUG_PRINT
+static void print_misc_debug_info(void);
+#endif
+
+/*
+ * --- Wakeup pipe -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_WAKEUP_PIPE
+
+static ERTS_INLINE void
+wake_poller(ErtsPollSet ps)
+{
+ /*
+ * NOTE: This function might be called from signal handlers in the
+ * non-smp case; therefore, it has to be async-signal safe in
+ * the non-smp case.
+ */
+ if (!ERTS_POLLSET_SET_POLLER_WOKEN_CHK(ps)) {
+ ssize_t res;
+ if (ps->wake_fds[1] < 0)
+ return; /* Not initialized yet */
+ do {
+ /* write() is async-signal safe (according to posix) */
+ res = write(ps->wake_fds[1], "!", 1);
+ } while (res < 0 && errno == EINTR);
+ if (res <= 0 && errno != ERRNO_BLOCK) {
+ fatal_error_async_signal_safe(__FILE__
+ ":XXX:wake_poller(): "
+ "Failed to write on wakeup pipe\n");
+ }
+ }
+}
+
+static ERTS_INLINE void
+cleanup_wakeup_pipe(ErtsPollSet ps)
+{
+ int fd = ps->wake_fds[0];
+ int res;
+ do {
+ char buf[32];
+ res = read(fd, buf, sizeof(buf));
+ } while (res > 0 || (res < 0 && errno == EINTR));
+ if (res < 0 && errno != ERRNO_BLOCK) {
+ fatal_error("%s:%d:cleanup_wakeup_pipe(): "
+ "Failed to read on wakeup pipe fd=%d: "
+ "%s (%d)\n",
+ __FILE__, __LINE__,
+ fd,
+ erl_errno_id(errno), errno);
+ }
+}
+
+static void
+create_wakeup_pipe(ErtsPollSet ps)
+{
+ int do_wake = 0;
+ int wake_fds[2];
+ ps->wake_fds[0] = -1;
+ ps->wake_fds[1] = -1;
+ if (pipe(wake_fds) < 0) {
+ fatal_error("%s:%d:create_wakeup_pipe(): "
+ "Failed to create pipe: %s (%d)\n",
+ __FILE__,
+ __LINE__,
+ erl_errno_id(errno),
+ errno);
+ }
+ SET_NONBLOCKING(wake_fds[0]);
+ SET_NONBLOCKING(wake_fds[1]);
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("wakeup fds = {%d, %d}\n", wake_fds[0], wake_fds[1]);
+#endif
+
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ wake_fds[0],
+ ERTS_POLL_EV_IN,
+ 1, &do_wake);
+#if ERTS_POLL_USE_FALLBACK
+ /* We depend on the wakeup pipe being handled by kernel poll */
+ if (ps->fds_status[wake_fds[0]].flags & ERTS_POLL_FD_FLG_INFLBCK)
+ fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n",
+ __FILE__, __LINE__);
+#endif
+ if (ps->internal_fd_limit <= wake_fds[1])
+ ps->internal_fd_limit = wake_fds[1] + 1;
+ if (ps->internal_fd_limit <= wake_fds[0])
+ ps->internal_fd_limit = wake_fds[0] + 1;
+ ps->wake_fds[0] = wake_fds[0];
+ ps->wake_fds[1] = wake_fds[1];
+}
+
+#endif /* ERTS_POLL_USE_WAKEUP_PIPE */
+
+/*
+ * --- Poll set update requests ----------------------------------------------
+ */
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+
+static ERTS_INLINE void
+enqueue_update_request(ErtsPollSet ps, int fd)
+{
+ ErtsPollSetUpdateRequestsBlock *urqbp;
+
+ ASSERT(fd < ps->fds_status_len);
+
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ)
+ return;
+
+ if (ps->update_requests.len == 0)
+ ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(ps);
+
+ urqbp = ps->curr_upd_req_block;
+
+ if (urqbp->len == ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE) {
+ ASSERT(!urqbp->next);
+ urqbp = erts_alloc(ERTS_ALC_T_POLLSET_UPDREQ,
+ sizeof(ErtsPollSetUpdateRequestsBlock));
+ ps->curr_upd_req_block->next = urqbp;
+ ps->curr_upd_req_block = urqbp;
+ urqbp->next = NULL;
+ urqbp->len = 0;
+ }
+
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INURQ;
+ urqbp->fds[urqbp->len++] = fd;
+}
+
+static ERTS_INLINE void
+free_update_requests_block(ErtsPollSet ps,
+ ErtsPollSetUpdateRequestsBlock *urqbp)
+{
+ if (urqbp != &ps->update_requests)
+ erts_free(ERTS_ALC_T_POLLSET_UPDREQ, (void *) urqbp);
+ else {
+ urqbp->next = NULL;
+ urqbp->len = 0;
+ }
+}
+
+#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+
+/*
+ * --- Growing poll set structures -------------------------------------------
+ */
+
+#if ERTS_POLL_USE_KERNEL_POLL
+static void
+grow_res_events(ErtsPollSet ps, int new_len)
+{
+ size_t new_size = sizeof(
+#if ERTS_POLL_USE_EPOLL
+ struct epoll_event
+#elif ERTS_POLL_USE_DEVPOLL
+ struct pollfd
+#elif ERTS_POLL_USE_KQUEUE
+ struct kevent
+#endif
+ )*new_len;
+ /* We do not need to save previously stored data */
+ if (ps->res_events)
+ erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events);
+ ps->res_events = erts_alloc(ERTS_ALC_T_POLL_RES_EVS, new_size);
+ ps->res_events_len = new_len;
+}
+#endif /* ERTS_POLL_USE_KERNEL_POLL */
+
+#if ERTS_POLL_USE_POLL
+static void
+grow_poll_fds(ErtsPollSet ps, int min_ix)
+{
+ int i;
+ int new_len = min_ix + 1 + POLL_FDS_EXTRA_FREE_SIZE;
+ if (new_len > max_fds)
+ new_len = max_fds;
+ ps->poll_fds = (ps->poll_fds_len
+ ? erts_realloc(ERTS_ALC_T_POLL_FDS,
+ ps->poll_fds,
+ sizeof(struct pollfd)*new_len)
+ : erts_alloc(ERTS_ALC_T_POLL_FDS,
+ sizeof(struct pollfd)*new_len));
+ for (i = ps->poll_fds_len; i < new_len; i++) {
+ ps->poll_fds[i].fd = -1;
+ ps->poll_fds[i].events = (short) 0;
+ ps->poll_fds[i].revents = (short) 0;
+ }
+ ps->poll_fds_len = new_len;
+}
+#endif
+
+static void
+grow_fds_status(ErtsPollSet ps, int min_fd)
+{
+ int i;
+ int new_len = min_fd + 1 + FDS_STATUS_EXTRA_FREE_SIZE;
+ ASSERT(min_fd < max_fds);
+ if (new_len > max_fds)
+ new_len = max_fds;
+ ps->fds_status = (ps->fds_status_len
+ ? erts_realloc(ERTS_ALC_T_FD_STATUS,
+ ps->fds_status,
+ sizeof(ErtsFdStatus)*new_len)
+ : erts_alloc(ERTS_ALC_T_FD_STATUS,
+ sizeof(ErtsFdStatus)*new_len));
+ for (i = ps->fds_status_len; i < new_len; i++) {
+#if ERTS_POLL_USE_POLL
+ ps->fds_status[i].pix = -1;
+#endif
+ ps->fds_status[i].used_events = (ErtsPollEvents) 0;
+ ps->fds_status[i].events = (ErtsPollEvents) 0;
+#if ERTS_POLL_COALESCE_KP_RES
+ ps->fds_status[i].res_ev_ix = (unsigned short) ERTS_POLL_MAX_RES;
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK
+ ps->fds_status[i].flags = (unsigned short) 0;
+#endif
+ }
+ ps->fds_status_len = new_len;
+}
+
+/*
+ * --- Selecting fd to poll on -----------------------------------------------
+ */
+
+#if ERTS_POLL_USE_FALLBACK
+static int update_fallback_pollset(ErtsPollSet ps, int fd);
+#endif
+
+static ERTS_INLINE int
+need_update(ErtsPollSet ps, int fd)
+{
+#if ERTS_POLL_USE_KERNEL_POLL
+ int reset;
+#endif
+
+ ASSERT(fd < ps->fds_status_len);
+
+#if ERTS_POLL_USE_KERNEL_POLL
+ reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST);
+ if (reset && !ps->fds_status[fd].used_events) {
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
+ reset = 0;
+ }
+#elif defined(ERTS_SMP)
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
+#endif
+
+ if (ps->fds_status[fd].used_events != ps->fds_status[fd].events)
+ return 1;
+
+#if ERTS_POLL_USE_KERNEL_POLL
+ return reset;
+#else
+ return 0;
+#endif
+}
+
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+
+#if ERTS_POLL_USE_KQUEUE
+#define ERTS_POLL_MIN_BATCH_BUF_SIZE 128
+#else
+#define ERTS_POLL_MIN_BATCH_BUF_SIZE 64
+#endif
+
+typedef struct {
+ int len;
+ int size;
+#if ERTS_POLL_USE_DEVPOLL
+ struct pollfd *buf;
+#elif ERTS_POLL_USE_KQUEUE
+ struct kevent *buf;
+ struct kevent *ebuf;
+#endif
+} ErtsPollBatchBuf;
+
+
+static ERTS_INLINE void
+setup_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
+{
+ bbp->len = 0;
+#if ERTS_POLL_USE_DEVPOLL
+ bbp->size = ps->res_events_len;
+ bbp->buf = ps->res_events;
+#elif ERTS_POLL_USE_KQUEUE
+ bbp->size = ps->res_events_len/2;
+ bbp->buf = ps->res_events;
+ bbp->ebuf = bbp->buf + bbp->size;
+#endif
+}
+
+
+#if ERTS_POLL_USE_DEVPOLL
+
+static void
+write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
+{
+ ssize_t wres;
+ char *buf = (char *) bbp->buf;
+ size_t buf_size = sizeof(struct pollfd)*bbp->len;
+
+ while (1) {
+ wres = write(ps->kp_fd, (void *) buf, buf_size);
+ if (wres < 0) {
+ if (errno == EINTR)
+ continue;
+ fatal_error("%s:%d:write_batch_buf(): "
+ "Failed to write to /dev/poll: "
+ "%s (%d)\n",
+ __FILE__, __LINE__,
+ erl_errno_id(errno), errno);
+ }
+ buf_size -= wres;
+ if (buf_size <= 0)
+ break;
+ buf += wres;
+ }
+
+ if (buf_size < 0) {
+ fatal_error("%s:%d:write_devpoll_buf(): Internal error\n",
+ __FILE__, __LINE__);
+ }
+ bbp->len = 0;
+}
+
+#elif ERTS_POLL_USE_KQUEUE
+
+static void
+write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
+{
+ int res;
+ int len = bbp->len;
+ struct kevent *buf = bbp->buf;
+ struct timespec ts = {0, 0};
+
+ do {
+ res = kevent(ps->kp_fd, buf, len, NULL, 0, &ts);
+ } while (res < 0 && errno == EINTR);
+ if (res < 0) {
+ int i;
+ struct kevent *ebuf = bbp->ebuf;
+ do {
+ res = kevent(ps->kp_fd, buf, len, ebuf, len, &ts);
+ } while (res < 0 && errno == EINTR);
+ if (res < 0) {
+ fatal_error("%s:%d: kevent() failed: %s (%d)\n",
+ __FILE__, __LINE__, erl_errno_id(errno), errno);
+ }
+ for (i = 0; i < res; i++) {
+ if (ebuf[i].flags & EV_ERROR) {
+ short filter;
+ int fd = (int) ebuf[i].ident;
+
+ switch ((int) ebuf[i].udata) {
+
+ /*
+ * Since we use a lazy update approach EV_DELETE will
+ * frequently fail. This since kqueue automatically
+ * removes a file descriptor that is closed from the
+ * poll set.
+ */
+ case ERTS_POLL_KQ_OP_DEL_R:
+ case ERTS_POLL_KQ_OP_DEL_W:
+ case ERTS_POLL_KQ_OP_HANDLED:
+ break;
+
+ /*
+ * According to the kqueue man page EVFILT_READ support
+ * does not imply EVFILT_WRITE support; therefore,
+ * if an EV_ADD fail, we may have to remove other
+ * events on this fd in the kqueue pollset before
+ * adding fd to the fallback pollset.
+ */
+ case ERTS_POLL_KQ_OP_ADD_W:
+ if (ps->fds_status[fd].used_events & ERTS_POLL_EV_IN) {
+ filter = EVFILT_READ;
+ goto rm_add_fb;
+ }
+ goto add_fb;
+ case ERTS_POLL_KQ_OP_ADD_R:
+ if (ps->fds_status[fd].used_events & ERTS_POLL_EV_OUT) {
+ filter = EVFILT_WRITE;
+ goto rm_add_fb;
+ }
+ goto add_fb;
+ case ERTS_POLL_KQ_OP_ADD2_W:
+ case ERTS_POLL_KQ_OP_ADD2_R: {
+ int j;
+ for (j = i+1; j < res; j++) {
+ if (fd == (int) ebuf[j].ident) {
+ ebuf[j].udata = (void *) ERTS_POLL_KQ_OP_HANDLED;
+ if (!(ebuf[j].flags & EV_ERROR)) {
+ switch ((int) ebuf[j].udata) {
+ case ERTS_POLL_KQ_OP_ADD2_W:
+ filter = EVFILT_WRITE;
+ goto rm_add_fb;
+ case ERTS_POLL_KQ_OP_ADD2_R:
+ filter = EVFILT_READ;
+ goto rm_add_fb;
+ default:
+ fatal_error("%s:%d:write_batch_buf(): "
+ "Internal error",
+ __FILE__, __LINE__);
+ break;
+ }
+ }
+ goto add_fb;
+ }
+ }
+ /* The other add succeded... */
+ filter = (((int) ebuf[i].udata == ERTS_POLL_KQ_OP_ADD2_W)
+ ? EVFILT_READ
+ : EVFILT_WRITE);
+ rm_add_fb:
+ {
+ struct kevent kev;
+ struct timespec ts = {0, 0};
+ EV_SET(&kev, fd, filter, EV_DELETE, 0, 0, 0);
+ (void) kevent(ps->kp_fd, &kev, 1, NULL, 0, &ts);
+ }
+
+ add_fb:
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK;
+ ASSERT(ps->fds_status[fd].used_events);
+ ps->fds_status[fd].used_events = 0;
+ ps->no_of_user_fds--;
+ update_fallback_pollset(ps, fd);
+ ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
+ break;
+ }
+ default:
+ fatal_error("%s:%d:write_batch_buf(): Internal error",
+ __FILE__, __LINE__);
+ break;
+ }
+ }
+ }
+ }
+ bbp->len = 0;
+}
+
+#endif /* ERTS_POLL_USE_KQUEUE */
+
+static ERTS_INLINE void
+batch_update_pollset(ErtsPollSet ps, int fd, ErtsPollBatchBuf *bbp)
+{
+ int buf_len;
+#if ERTS_POLL_USE_DEVPOLL
+ short events;
+ struct pollfd *buf;
+#elif ERTS_POLL_USE_KQUEUE
+ struct kevent *buf;
+#endif
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("Doing lazy update on fd=%d\n", fd);
+#endif
+
+ if (!need_update(ps, fd))
+ return;
+
+ /* Make sure we have room for at least maximum no of entries
+ per fd */
+ if (bbp->size - bbp->len < 2)
+ write_batch_buf(ps, bbp);
+
+ buf_len = bbp->len;
+ buf = bbp->buf;
+
+ ASSERT(fd < ps->fds_status_len);
+
+#if ERTS_POLL_USE_DEVPOLL
+ events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
+ if (!events) {
+ buf[buf_len].events = POLLREMOVE;
+ ps->no_of_user_fds--;
+ }
+ else if (!ps->fds_status[fd].used_events) {
+ buf[buf_len].events = events;
+ ps->no_of_user_fds++;
+ }
+ else {
+ if ((ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)
+ || (ps->fds_status[fd].used_events & ~events)) {
+ /* Reset or removed events... */
+ buf[buf_len].fd = fd;
+ buf[buf_len].events = POLLREMOVE;
+ buf[buf_len++].revents = 0;
+ }
+ buf[buf_len].events = events;
+ }
+ buf[buf_len].fd = fd;
+ buf[buf_len++].revents = 0;
+
+#elif ERTS_POLL_USE_KQUEUE
+
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) {
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK)
+ update_fallback_pollset(ps, fd);
+ else { /* Remove from fallback and try kqueue */
+ ErtsPollEvents events = ps->fds_status[fd].events;
+ ps->fds_status[fd].events = (ErtsPollEvents) 0;
+ update_fallback_pollset(ps, fd);
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+ if (events) {
+ ps->fds_status[fd].events = events;
+ goto try_kqueue;
+ }
+ }
+ }
+ else {
+ ErtsPollEvents events, used_events;
+ int mod_w, mod_r;
+ try_kqueue:
+ events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
+ used_events = ERTS_POLL_EV_E2N(ps->fds_status[fd].used_events);
+ if (!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) {
+ if (!used_events &&
+ (events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT))
+ goto do_add_rw;
+ mod_r = ((events & ERTS_POLL_EV_IN)
+ != (used_events & ERTS_POLL_EV_IN));
+ mod_w = ((events & ERTS_POLL_EV_OUT)
+ != (used_events & ERTS_POLL_EV_OUT));
+ goto do_mod;
+ }
+ else { /* Reset */
+ if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) {
+ do_add_rw:
+ EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_R);
+ buf_len++;
+ EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_W);
+ buf_len++;
+
+ }
+ else {
+ mod_r = 1;
+ mod_w = 1;
+ do_mod:
+ if (mod_r) {
+ if (events & ERTS_POLL_EV_IN) {
+ EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_R);
+ buf_len++;
+ }
+ else if (used_events & ERTS_POLL_EV_IN) {
+ EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_DELETE,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_R);
+ buf_len++;
+ }
+ }
+ if (mod_w) {
+ if (events & ERTS_POLL_EV_OUT) {
+ EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_W);
+ buf_len++;
+ }
+ else if (used_events & ERTS_POLL_EV_OUT) {
+ EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_DELETE,
+ 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_W);
+ buf_len++;
+ }
+ }
+ }
+ }
+ if (used_events) {
+ if (!events) {
+ ps->no_of_user_fds--;
+ }
+ }
+ else {
+ if (events)
+ ps->no_of_user_fds++;
+ }
+ ASSERT((events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0);
+ ASSERT((used_events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0);
+ }
+
+#endif
+
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
+ ps->fds_status[fd].used_events = ps->fds_status[fd].events;
+
+ bbp->len = buf_len;
+}
+
+#else /* !ERTS_POLL_USE_BATCH_UPDATE_POLLSET */
+
+#if ERTS_POLL_USE_EPOLL
+static int
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+conc_update_pollset(ErtsPollSet ps, int fd, int *update_fallback)
+#else
+update_pollset(ErtsPollSet ps, int fd)
+#endif
+{
+ int res;
+ int op;
+ struct epoll_event epe_templ;
+ struct epoll_event epe;
+
+ ASSERT(fd < ps->fds_status_len);
+
+ if (!need_update(ps, fd))
+ return 0;
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("Doing update on fd=%d\n", fd);
+#endif
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) {
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+ if (!*update_fallback) {
+ *update_fallback = 1;
+ return 0;
+ }
+#endif
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) {
+ return update_fallback_pollset(ps, fd);
+ }
+ else { /* Remove from fallback and try epoll */
+ ErtsPollEvents events = ps->fds_status[fd].events;
+ ps->fds_status[fd].events = (ErtsPollEvents) 0;
+ res = update_fallback_pollset(ps, fd);
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+ if (!events)
+ return res;
+ ps->fds_status[fd].events = events;
+ }
+ }
+
+ epe_templ.events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
+ epe_templ.data.fd = fd;
+
+#ifdef VALGRIND
+ /* Silence invalid valgrind warning ... */
+ memset((void *) &epe.data, 0, sizeof(epoll_data_t));
+#endif
+
+ if (epe_templ.events && ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) {
+ do {
+ /* We init 'epe' every time since epoll_ctl() may modify it
+ (not declared const and not documented as const). */
+ epe.events = epe_templ.events;
+ epe.data.fd = epe_templ.data.fd;
+ res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe);
+ } while (res != 0 && errno == EINTR);
+ ps->no_of_user_fds--;
+ ps->fds_status[fd].used_events = 0;
+ }
+
+ if (!epe_templ.events) {
+ /* A note on EPOLL_CTL_DEL: linux kernel versions before 2.6.9
+ need a non-NULL event pointer even though it is ignored... */
+ op = EPOLL_CTL_DEL;
+ ps->no_of_user_fds--;
+ }
+ else if (!ps->fds_status[fd].used_events) {
+ op = EPOLL_CTL_ADD;
+ ps->no_of_user_fds++;
+ }
+ else {
+ op = EPOLL_CTL_MOD;
+ }
+
+ do {
+ /* We init 'epe' every time since epoll_ctl() may modify it
+ (not declared const and not documented as const). */
+ epe.events = epe_templ.events;
+ epe.data.fd = epe_templ.data.fd;
+ res = epoll_ctl(ps->kp_fd, op, fd, &epe);
+ } while (res != 0 && errno == EINTR);
+
+#if defined(ERTS_POLL_DEBUG_PRINT) && 1
+ {
+ int saved_errno = errno;
+ erts_printf("%s = epoll_ctl(%d, %s, %d, {Ox%x, %d})\n",
+ res == 0 ? "0" : erl_errno_id(errno),
+ ps->kp_fd,
+ (op == EPOLL_CTL_ADD
+ ? "EPOLL_CTL_ADD"
+ : (op == EPOLL_CTL_MOD
+ ? "EPOLL_CTL_MOD"
+ : (op == EPOLL_CTL_DEL
+ ? "EPOLL_CTL_DEL"
+ : "UNKNOWN"))),
+ fd,
+ epe_templ.events,
+ fd);
+ errno = saved_errno;
+ }
+#endif
+ if (res == 0)
+ ps->fds_status[fd].used_events = ps->fds_status[fd].events;
+ else {
+ switch (op) {
+ case EPOLL_CTL_MOD:
+ epe.events = 0;
+ do {
+ /* We init 'epe' every time since epoll_ctl() may modify it
+ (not declared const and not documented as const). */
+ epe.events = 0;
+ epe.data.fd = fd;
+ res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe);
+ } while (res != 0 && errno == EINTR);
+ ps->fds_status[fd].used_events = 0;
+ /* Fall through ... */
+ case EPOLL_CTL_ADD: {
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK;
+ ps->no_of_user_fds--;
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+ if (!*update_fallback) {
+ *update_fallback = 1;
+ return 0;
+ }
+#endif
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+ res = update_fallback_pollset(ps, fd);
+ ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
+ break;
+ }
+ case EPOLL_CTL_DEL: {
+ /*
+ * Since we use a lazy update approach EPOLL_CTL_DEL will
+ * frequently fail. This since epoll automatically removes
+ * a filedescriptor that is closed from the poll set.
+ */
+ ps->fds_status[fd].used_events = 0;
+ res = 0;
+ break;
+ }
+ default:
+ fatal_error("%s:%d:update_pollset(): Internal error\n",
+ __FILE__, __LINE__);
+ break;
+ }
+ }
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
+ return res;
+}
+
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+static int
+update_pollset(ErtsPollSet ps, int fd)
+{
+ int update_fallback = 1;
+ return conc_update_pollset(ps, fd, &update_fallback);
+}
+#endif
+
+#endif /* ERTS_POLL_USE_EPOLL */
+
+#endif /* ERTS_POLL_USE_BATCH_UPDATE_POLLSET */
+
+#if ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK
+
+#if ERTS_POLL_USE_FALLBACK
+static int update_fallback_pollset(ErtsPollSet ps, int fd)
+#else
+static int update_pollset(ErtsPollSet ps, int fd)
+#endif
+{
+#ifdef ERTS_POLL_DEBUG_PRINT
+#if ERTS_POLL_USE_FALLBACK
+ erts_printf("Doing fallback update on fd=%d\n", fd);
+#else
+ erts_printf("Doing update on fd=%d\n", fd);
+#endif
+#endif
+
+ ASSERT(fd < ps->fds_status_len);
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(ps->fds_status[fd].used_events
+ ? (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)
+ : (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK));
+#endif
+
+ if (!need_update(ps, fd))
+ return 0;
+
+#if ERTS_POLL_USE_FALLBACK
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
+#endif
+
+#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
+ if (!ps->fds_status[fd].events) {
+ int pix = ps->fds_status[fd].pix;
+ int last_pix;
+ if (pix < 0) {
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+#endif
+ return -1;
+ }
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
+#endif
+ ps->no_of_user_fds--;
+ last_pix = --ps->no_poll_fds;
+ if (pix != last_pix) {
+ /* Move last pix to this pix */
+ ps->poll_fds[pix].fd = ps->poll_fds[last_pix].fd;
+ ps->poll_fds[pix].events = ps->poll_fds[last_pix].events;
+ ps->poll_fds[pix].revents = ps->poll_fds[last_pix].revents;
+ ps->fds_status[ps->poll_fds[pix].fd].pix = pix;
+ }
+ /* Clear last pix */
+ ps->poll_fds[last_pix].fd = -1;
+ ps->poll_fds[last_pix].events = (short) 0;
+ ps->poll_fds[last_pix].revents = (short) 0;
+ /* Clear this fd status */
+ ps->fds_status[fd].pix = -1;
+ ps->fds_status[fd].used_events = (ErtsPollEvents) 0;
+#if ERTS_POLL_USE_FALLBACK
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK;
+#endif
+ }
+ else {
+ int pix = ps->fds_status[fd].pix;
+ if (pix < 0) {
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)
+ || fd == ps->kp_fd);
+#endif
+ ps->no_of_user_fds++;
+ ps->fds_status[fd].pix = pix = ps->no_poll_fds++;
+ if (pix >= ps->poll_fds_len)
+ grow_poll_fds(ps, pix);
+ ps->poll_fds[pix].fd = fd;
+ ps->fds_status[fd].pix = pix;
+#if ERTS_POLL_USE_FALLBACK
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK;
+#endif
+ }
+
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
+#endif
+
+ /* Events to be used in next poll */
+ ps->poll_fds[pix].events = ev2pollev(ps->fds_status[fd].events);
+ if (ps->poll_fds[pix].revents) {
+ /* Remove result events that we should not poll for anymore */
+ ps->poll_fds[pix].revents
+ &= ev2pollev(~(~ps->fds_status[fd].used_events
+ & ps->fds_status[fd].events));
+ }
+ /* Save events to be used in next poll */
+ ps->fds_status[fd].used_events = ps->fds_status[fd].events;
+ }
+ return 0;
+#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
+ {
+ ErtsPollEvents events = ps->fds_status[fd].events;
+ if ((ERTS_POLL_EV_IN & events)
+ != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) {
+ if (ERTS_POLL_EV_IN & events) {
+ FD_SET(fd, &ps->input_fds);
+ }
+ else {
+ FD_CLR(fd, &ps->input_fds);
+ }
+ }
+ if ((ERTS_POLL_EV_OUT & events)
+ != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) {
+ if (ERTS_POLL_EV_OUT & events) {
+ FD_SET(fd, &ps->output_fds);
+ }
+ else {
+ FD_CLR(fd, &ps->output_fds);
+ }
+ }
+
+ if (!ps->fds_status[fd].used_events) {
+ ASSERT(events);
+ ps->no_of_user_fds++;
+#if ERTS_POLL_USE_FALLBACK
+ ps->no_select_fds++;
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK;
+#endif
+ }
+ else if (!events) {
+ ASSERT(ps->fds_status[fd].used_events);
+ ps->no_of_user_fds--;
+ ps->fds_status[fd].events = events;
+#if ERTS_POLL_USE_FALLBACK
+ ps->no_select_fds--;
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK;
+#endif
+ }
+
+ ps->fds_status[fd].used_events = events;
+
+ if (events && fd > ps->max_fd)
+ ps->max_fd = fd;
+ else if (!events && fd == ps->max_fd) {
+ int max = ps->max_fd;
+ for (max = ps->max_fd; max >= 0; max--)
+ if (ps->fds_status[max].used_events)
+ break;
+ ps->max_fd = max;
+ }
+ }
+ return 0;
+#endif
+}
+
+#endif /* ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK */
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+
+static void
+handle_update_requests(ErtsPollSet ps)
+{
+ ErtsPollSetUpdateRequestsBlock *urqbp = &ps->update_requests;
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ ErtsPollBatchBuf bb;
+ setup_batch_buf(ps, &bb);
+#endif
+
+ while (urqbp) {
+ ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp;
+ int i;
+ int len = urqbp->len;
+ for (i = 0; i < len; i++) {
+ int fd = urqbp->fds[i];
+ ASSERT(fd < ps->fds_status_len);
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ;
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ batch_update_pollset(ps, fd, &bb);
+#else
+ update_pollset(ps, fd);
+#endif
+ }
+
+ free_urqbp = urqbp;
+ urqbp = urqbp->next;
+
+ free_update_requests_block(ps, free_urqbp);
+
+ }
+
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ if (bb.len)
+ write_batch_buf(ps, &bb);
+#endif
+
+ ps->curr_upd_req_block = &ps->update_requests;
+
+#if ERTS_POLL_USE_DEVPOLL && defined(HARD_DEBUG)
+ check_poll_status(ps);
+#endif
+
+ ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(ps);
+}
+
+#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+
+static ERTS_INLINE ErtsPollEvents
+poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on,
+ int *have_set_have_update_requests,
+ int *do_wake)
+{
+ ErtsPollEvents new_events;
+
+ if (fd < ps->internal_fd_limit || fd >= max_fds) {
+ if (fd < 0) {
+ new_events = ERTS_POLL_EV_ERR;
+ goto done;
+ }
+#if ERTS_POLL_USE_KERNEL_POLL
+ if (fd == ps->kp_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
+#endif
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
+#endif
+ }
+
+ if (fd >= ps->fds_status_len)
+ grow_fds_status(ps, fd);
+
+ ASSERT(fd < ps->fds_status_len);
+
+ new_events = ps->fds_status[fd].events;
+
+ if (events == 0) {
+ *do_wake = 0;
+ goto done;
+ }
+
+ if (on)
+ new_events |= events;
+ else
+ new_events &= ~events;
+
+ if (new_events == (ErtsPollEvents) 0) {
+#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST;
+#endif
+#if ERTS_POLL_USE_FALLBACK
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_USEFLBCK;
+#endif
+ }
+
+ ps->fds_status[fd].events = new_events;
+
+ if (new_events == ps->fds_status[fd].used_events
+#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
+ && !(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)
+#endif
+ ) {
+ *do_wake = 0;
+ goto done;
+ }
+
+#if !ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ if (update_pollset(ps, fd) != 0)
+ new_events = ERTS_POLL_EV_ERR;
+#else /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+ if (ERTS_POLLSET_IS_POLLED(ps)) {
+ int update_fallback = 0;
+ conc_update_pollset(ps, fd, &update_fallback);
+ if (!update_fallback) {
+ *do_wake = 0; /* no need to wake kernel poller */
+ goto done;
+ }
+ }
+#endif
+
+ enqueue_update_request(ps, fd);
+
+#ifdef ERTS_SMP
+ /*
+ * If new events have been added, we need to wake up the
+ * polling thread, but if events have been removed we don't.
+ */
+ if ((new_events && (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST))
+ || (~ps->fds_status[fd].used_events & new_events))
+ *do_wake = 1;
+#endif /* ERTS_SMP */
+
+#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+
+ done:
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("0x%x = poll_control(ps, %d, 0x%x, %s) do_wake=%d\n",
+ (int) new_events, fd, (int) events, (on ? "on" : "off"), *do_wake);
+#endif
+ return new_events;
+}
+
+void
+ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps,
+ ErtsPollControlEntry pcev[],
+ int len)
+{
+ int i;
+ int hshur = 0;
+ int do_wake;
+ int final_do_wake = 0;
+
+ ERTS_POLLSET_LOCK(ps);
+
+ for (i = 0; i < len; i++) {
+ do_wake = 0;
+ pcev[i].events = poll_control(ps,
+ pcev[i].fd,
+ pcev[i].events,
+ pcev[i].on,
+ &hshur,
+ &do_wake);
+ final_do_wake |= do_wake;
+ }
+
+#ifdef ERTS_SMP
+ if (final_do_wake)
+ wake_poller(ps);
+#endif /* ERTS_SMP */
+
+ ERTS_POLLSET_UNLOCK(ps);
+}
+
+ErtsPollEvents
+ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps,
+ ErtsSysFdType fd,
+ ErtsPollEvents events,
+ int on,
+ int* do_wake) /* In: Wake up polling thread */
+ /* Out: Poller is woken */
+{
+ int hshur = 0;
+ ErtsPollEvents res;
+
+ ERTS_POLLSET_LOCK(ps);
+
+ res = poll_control(ps, fd, events, on, &hshur, do_wake);
+
+#ifdef ERTS_SMP
+ if (*do_wake) {
+ wake_poller(ps);
+ }
+#endif /* ERTS_SMP */
+
+ ERTS_POLLSET_UNLOCK(ps);
+ return res;
+}
+
+/*
+ * --- Wait on poll set ------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_KERNEL_POLL
+
+static ERTS_INLINE int
+save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res)
+{
+ int res = 0;
+ int i;
+ int n = chk_fds_res < max_res ? chk_fds_res : max_res;
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ int wake_fd = ps->wake_fds[0];
+#endif
+
+ for (i = 0; i < n; i++) {
+
+#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+
+ if (ps->res_events[i].events) {
+ int fd = ps->res_events[i].data.fd;
+ int ix;
+ ErtsPollEvents revents;
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ continue;
+ }
+#endif
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+ /* epoll_wait() can repeat the same fd in result array... */
+ ix = (int) ps->fds_status[fd].res_ev_ix;
+ ASSERT(ix >= 0);
+ if (ix >= res || pr[ix].fd != fd) {
+ ix = res;
+ pr[ix].fd = fd;
+ pr[ix].events = (ErtsPollEvents) 0;
+ }
+
+ revents = ERTS_POLL_EV_N2E(ps->res_events[i].events);
+ pr[ix].events |= revents;
+ if (revents) {
+ if (res == ix) {
+ ps->fds_status[fd].res_ev_ix = (unsigned short) ix;
+ res++;
+ }
+ }
+ }
+
+#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
+
+ struct kevent *ev;
+ int fd;
+ int ix;
+
+ ev = &ps->res_events[i];
+ fd = (int) ev->ident;
+ ASSERT(fd < ps->fds_status_len);
+ ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
+ ix = (int) ps->fds_status[fd].res_ev_ix;
+
+ ASSERT(ix >= 0);
+ if (ix >= res || pr[ix].fd != fd) {
+ ix = res;
+ pr[ix].fd = (int) ev->ident;
+ pr[ix].events = (ErtsPollEvents) 0;
+ }
+
+ if (ev->filter == EVFILT_READ) {
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ continue;
+ }
+#endif
+ pr[ix].events |= ERTS_POLL_EV_IN;
+ }
+ else if (ev->filter == EVFILT_WRITE)
+ pr[ix].events |= ERTS_POLL_EV_OUT;
+ if (ev->flags & (EV_ERROR|EV_EOF)) {
+ if ((ev->flags & EV_ERROR) && (((int) ev->data) == EBADF))
+ pr[ix].events |= ERTS_POLL_EV_NVAL;
+ else
+ pr[ix].events |= ERTS_POLL_EV_ERR;
+ }
+ if (pr[ix].events) {
+ if (res == ix) {
+ ps->fds_status[fd].res_ev_ix = (unsigned short) ix;
+ res++;
+ }
+ }
+
+#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+
+ if (ps->res_events[i].revents) {
+ int fd = ps->res_events[i].fd;
+ ErtsPollEvents revents;
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ continue;
+ }
+#endif
+ revents = ERTS_POLL_EV_N2E(ps->res_events[i].events);
+ pr[res].fd = fd;
+ pr[res].events = revents;
+ res++;
+ }
+
+#endif
+
+ }
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_KERNEL_POLL */
+
+#if ERTS_POLL_USE_FALLBACK
+
+static int
+get_kp_results(ErtsPollSet ps, ErtsPollResFd pr[], int max_res)
+{
+ int res;
+#if ERTS_POLL_USE_KQUEUE
+ struct timespec ts = {0, 0};
+#endif
+
+ if (max_res > ps->res_events_len)
+ grow_res_events(ps, max_res);
+
+ do {
+#if ERTS_POLL_USE_EPOLL
+ res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0);
+#elif ERTS_POLL_USE_KQUEUE
+ res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
+#endif
+ } while (res < 0 && errno == EINTR);
+
+ if (res < 0) {
+ fatal_error("%s:%d: %s() failed: %s (%d)\n",
+ __FILE__, __LINE__,
+#if ERTS_POLL_USE_EPOLL
+ "epoll_wait",
+#elif ERTS_POLL_USE_KQUEUE
+ "kevent",
+#endif
+ erl_errno_id(errno), errno);
+ }
+
+ return save_kp_result(ps, pr, max_res, res);
+}
+
+#endif /* ERTS_POLL_USE_FALLBACK */
+
+
+
+static ERTS_INLINE int
+save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
+ int chk_fds_res, int ebadf)
+{
+#if ERTS_POLL_USE_DEVPOLL
+ return save_kp_result(ps, pr, max_res, chk_fds_res);
+#elif ERTS_POLL_USE_FALLBACK
+ if (!ps->fallback_used)
+ return save_kp_result(ps, pr, max_res, chk_fds_res);
+ else
+#endif /* ERTS_POLL_USE_FALLBACK */
+ {
+
+#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
+ int res = 0;
+#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK
+ int wake_fd = ps->wake_fds[0];
+#endif
+ int i, first_ix, end_ix;
+
+ /*
+ * In order to be somewhat fair, we continue on the poll_fds
+ * index where we stopped last time.
+ */
+ first_ix = i = ((ps->next_poll_fds_ix < ps->no_poll_fds)
+ ? ps->next_poll_fds_ix
+ : 0);
+ end_ix = ps->no_poll_fds;
+
+ while (1) {
+ while (i < end_ix && res < max_res) {
+ if (ps->poll_fds[i].revents != (short) 0) {
+ int fd = ps->poll_fds[i].fd;
+ ErtsPollEvents revents;
+#if ERTS_POLL_USE_FALLBACK
+ if (fd == ps->kp_fd) {
+ res += get_kp_results(ps, &pr[res], max_res-res);
+ i++;
+ continue;
+ }
+#elif ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ i++;
+ continue;
+ }
+#endif
+ revents = pollev2ev(ps->poll_fds[i].revents);
+ pr[res].fd = fd;
+ pr[res].events = revents;
+ res++;
+ }
+ i++;
+ }
+ if (res == max_res || i == first_ix)
+ break;
+ ASSERT(i == ps->no_poll_fds);
+ i = 0;
+ end_ix = first_ix;
+ }
+
+ ps->next_poll_fds_ix = i;
+ return res;
+
+#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
+ int res = 0;
+#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK
+ int wake_fd = ps->wake_fds[0];
+#endif
+ int fd, first_fd, end_fd;
+
+ /*
+ * In order to be fair, we continue on the fd where we stopped
+ * last time.
+ */
+ first_fd = fd = ps->next_sel_fd <= ps->max_fd ? ps->next_sel_fd : 0;
+ end_fd = ps->max_fd + 1;
+
+ if (!ebadf) {
+ while (1) {
+ while (fd < end_fd && res < max_res) {
+
+ pr[res].events = (ErtsPollEvents) 0;
+ if (FD_ISSET(fd, &ps->res_input_fds)) {
+#if ERTS_POLL_USE_FALLBACK
+ if (fd == ps->kp_fd) {
+ res += get_kp_results(ps, &pr[res], max_res-res);
+ fd++;
+ continue;
+ }
+#elif ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ fd++;
+ continue;
+ }
+#endif
+ pr[res].events |= ERTS_POLL_EV_IN;
+ }
+ if (FD_ISSET(fd, &ps->res_output_fds))
+ pr[res].events |= ERTS_POLL_EV_OUT;
+ if (pr[res].events) {
+ pr[res].fd = fd;
+ res++;
+ }
+ fd++;
+ }
+ if (res == max_res || fd == first_fd)
+ break;
+ ASSERT(fd == ps->max_fd + 1);
+ fd = 0;
+ end_fd = first_fd;
+ }
+ }
+ else {
+ /*
+ * Bad file descriptors in poll set.
+ *
+ * This only happens when running poorly written
+ * drivers. This code could be optimized, but we
+ * don't bother since it should never happen...
+ */
+ while (1) {
+ while (fd < end_fd && res < max_res) {
+ if (ps->fds_status[fd].events) {
+ int sres;
+ fd_set *iset = NULL;
+ fd_set *oset = NULL;
+ if (ps->fds_status[fd].events & ERTS_POLL_EV_IN) {
+ iset = &ps->res_input_fds;
+ FD_ZERO(iset);
+ FD_SET(fd, iset);
+ }
+ if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) {
+ oset = &ps->res_output_fds;
+ FD_ZERO(oset);
+ FD_SET(fd, oset);
+
+ }
+ do {
+ /* Initiate 'tv' each time;
+ select() may modify it */
+ SysTimeval tv = {0, 0};
+ sres = select(ps->max_fd+1, iset, oset, NULL, &tv);
+ } while (sres < 0 && errno == EINTR);
+ if (sres < 0) {
+#if ERTS_POLL_USE_FALLBACK
+ if (fd == ps->kp_fd) {
+ res += get_kp_results(ps,
+ &pr[res],
+ max_res-res);
+ fd++;
+ continue;
+ }
+#elif ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ fd++;
+ continue;
+ }
+#endif
+ pr[res].fd = fd;
+ pr[res].events = ERTS_POLL_EV_NVAL;
+ res++;
+ }
+ else if (sres > 0) {
+ pr[res].fd = fd;
+ if (iset && FD_ISSET(fd, iset)) {
+#if ERTS_POLL_USE_FALLBACK
+ if (fd == ps->kp_fd) {
+ res += get_kp_results(ps,
+ &pr[res],
+ max_res-res);
+ fd++;
+ continue;
+ }
+#elif ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ fd++;
+ continue;
+ }
+#endif
+ pr[res].events |= ERTS_POLL_EV_IN;
+ }
+ if (oset && FD_ISSET(fd, oset)) {
+ pr[res].events |= ERTS_POLL_EV_OUT;
+ }
+ ASSERT(pr[res].events);
+ res++;
+ }
+ }
+ fd++;
+ }
+ if (res == max_res || fd == first_fd)
+ break;
+ ASSERT(fd == ps->max_fd + 1);
+ fd = 0;
+ end_fd = first_fd;
+ }
+ }
+ ps->next_sel_fd = fd;
+ return res;
+#endif
+ }
+}
+
+static ERTS_INLINE int
+check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
+{
+ ASSERT(!*ps_locked);
+ if (ps->no_of_user_fds == 0 && tv->tv_usec == 0 && tv->tv_sec == 0) {
+ /* Nothing to poll and zero timeout; done... */
+ return 0;
+ }
+ else {
+ long timeout = tv->tv_sec*1000 + tv->tv_usec/1000;
+ ASSERT(timeout >= 0);
+ erts_smp_atomic_set(&ps->timeout, timeout);
+#if ERTS_POLL_USE_FALLBACK
+ if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) {
+
+#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ if (max_res > ps->res_events_len)
+ grow_res_events(ps, max_res);
+ return epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout);
+#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
+ struct timespec ts;
+ ts.tv_sec = tv->tv_sec;
+ ts.tv_nsec = tv->tv_usec*1000;
+ if (max_res > ps->res_events_len)
+ grow_res_events(ps, max_res);
+ return kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
+#endif /* ----------------------------------------- */
+
+ }
+ else /* use fallback (i.e. poll() or select()) */
+#endif /* ERTS_POLL_USE_FALLBACK */
+ {
+
+#if ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+ /*
+ * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds
+ * is set too high. dp_nfds should not be set greater than
+ * the maximum number of file descriptors in the poll set.
+ */
+ struct dvpoll poll_res;
+ int nfds = ps->no_of_user_fds;
+#ifdef ERTS_SMP
+ nfds++; /* Wakeup pipe */
+#endif
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
+ if (poll_res.dp_nfds > ps->res_events_len)
+ grow_res_events(ps, poll_res.dp_nfds);
+ poll_res.dp_fds = ps->res_events;
+ poll_res.dp_timeout = (int) timeout;
+ return ioctl(ps->kp_fd, DP_POLL, &poll_res);
+#elif ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
+ if (timeout > INT_MAX)
+ timeout = INT_MAX;
+ return poll(ps->poll_fds, ps->no_poll_fds, (int) timeout);
+#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
+ int res;
+ ps->res_input_fds = ps->input_fds;
+ ps->res_output_fds = ps->output_fds;
+ res = select(ps->max_fd + 1,
+ &ps->res_input_fds,
+ &ps->res_output_fds,
+ NULL,
+ tv);
+#ifdef ERTS_SMP
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again.
+ *
+ * We don't know how much of the timeout is left; therfore,
+ * we use a zero timeout. If no error occur and no events
+ * have triggered, we fake an EAGAIN error and let the caller
+ * restart us.
+ */
+ SysTimeval zero_tv = {0, 0};
+ *ps_locked = 1;
+ ERTS_POLLSET_LOCK(ps);
+ handle_update_requests(ps);
+ res = select(ps->max_fd + 1,
+ &ps->res_input_fds,
+ &ps->res_output_fds,
+ NULL,
+ &zero_tv);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
+ }
+ }
+#endif /* ERTS_SMP */
+ return res;
+#endif /* ----------------------------------------- */
+ }
+ }
+}
+
+int
+ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
+ ErtsPollResFd pr[],
+ int *len,
+ SysTimeval *utvp)
+{
+ int res, no_fds;
+ int ebadf = 0;
+ int ps_locked;
+ SysTimeval *tvp;
+ SysTimeval itv;
+
+ no_fds = *len;
+#ifdef ERTS_POLL_MAX_RES
+ if (no_fds >= ERTS_POLL_MAX_RES)
+ no_fds = ERTS_POLL_MAX_RES;
+#endif
+
+ *len = 0;
+
+ ASSERT(utvp);
+
+ tvp = utvp;
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("Entering erts_poll_wait(), timeout=%d\n",
+ (int) tv->tv_sec*1000 + tv->tv_usec/1000);
+#endif
+
+ ERTS_POLLSET_UNSET_POLLER_WOKEN(ps);
+ if (ERTS_POLLSET_SET_POLLED_CHK(ps)) {
+ res = EINVAL; /* Another thread is in erts_poll_wait()
+ on this pollset... */
+ goto done;
+ }
+
+ if (ERTS_POLLSET_IS_INTERRUPTED(ps)) {
+ /* Interrupt use zero timeout */
+ itv.tv_sec = 0;
+ itv.tv_usec = 0;
+ tvp = &itv;
+ }
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ ERTS_POLLSET_LOCK(ps);
+ handle_update_requests(ps);
+ ERTS_POLLSET_UNLOCK(ps);
+ }
+#endif
+
+ ps_locked = 0;
+ res = check_fd_events(ps, tvp, no_fds, &ps_locked);
+
+ ERTS_POLLSET_SET_POLLER_WOKEN(ps);
+
+ if (res == 0) {
+ res = ETIMEDOUT;
+ }
+ else if (res < 0) {
+#if ERTS_POLL_USE_SELECT
+ if (errno == EBADF) {
+ ebadf = 1;
+ goto save_results;
+ }
+#endif
+ res = errno;
+ }
+ else {
+#if ERTS_POLL_USE_SELECT
+ save_results:
+#endif
+
+#ifdef ERTS_SMP
+ if (!ps_locked) {
+ ps_locked = 1;
+ ERTS_POLLSET_LOCK(ps);
+ }
+#endif
+
+ no_fds = save_poll_result(ps, pr, no_fds, res, ebadf);
+
+#ifdef HARD_DEBUG
+ check_poll_result(pr, no_fds);
+#endif
+
+ res = (no_fds == 0
+ ? (ERTS_POLLSET_UNSET_INTERRUPTED_CHK(ps) ? EINTR : EAGAIN)
+ : 0);
+ *len = no_fds;
+ }
+
+#ifdef ERTS_SMP
+ if (ps_locked)
+ ERTS_POLLSET_UNLOCK(ps);
+ ERTS_POLLSET_UNSET_POLLED(ps);
+#endif
+
+ done:
+ erts_smp_atomic_set(&ps->timeout, LONG_MAX);
+#ifdef ERTS_POLL_DEBUG_PRINT
+ erts_printf("Leaving %s = erts_poll_wait()\n",
+ res == 0 ? "0" : erl_errno_id(res));
+#endif
+
+ return res;
+}
+
+/*
+ * --- Interrupt a thread doing erts_poll_wait() -----------------------------
+ */
+
+void
+ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set)
+{
+ /*
+ * NOTE: This function might be called from signal handlers in the
+ * non-smp case; therefore, it has to be async-signal safe in
+ * the non-smp case.
+ */
+ if (set) {
+ ERTS_POLLSET_SET_INTERRUPTED(ps);
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)
+ wake_poller(ps);
+#endif
+ }
+ else {
+ ERTS_POLLSET_UNSET_INTERRUPTED(ps);
+ }
+}
+
+/*
+ * erts_poll_interrupt_timed():
+ * If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it
+ * is not guaranteed that it will timeout before 'msec' milli seconds.
+ */
+void
+ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps, int set, long msec)
+{
+ if (set) {
+ if (erts_smp_atomic_read(&ps->timeout) > msec) {
+ ERTS_POLLSET_SET_INTERRUPTED(ps);
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)
+ wake_poller(ps);
+#endif
+ }
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ else {
+ if (ERTS_POLLSET_IS_POLLED(ps))
+ erts_smp_atomic_inc(&ps->no_avoided_wakeups);
+ erts_smp_atomic_inc(&ps->no_avoided_interrupts);
+ }
+ erts_smp_atomic_inc(&ps->no_interrupt_timed);
+#endif
+ }
+ else {
+ ERTS_POLLSET_UNSET_INTERRUPTED(ps);
+ }
+}
+
+int
+ERTS_POLL_EXPORT(erts_poll_max_fds)(void)
+{
+ return max_fds;
+}
+/*
+ * --- Initialization --------------------------------------------------------
+ */
+
+#ifdef VXWORKS
+extern int erts_vxworks_max_files;
+#endif
+
+void
+ERTS_POLL_EXPORT(erts_poll_init)(void)
+{
+ erts_smp_spinlock_init(&pollsets_lock, "pollsets_lock");
+ pollsets = NULL;
+
+ errno = 0;
+
+#if defined(VXWORKS)
+ max_fds = erts_vxworks_max_files;
+#elif !defined(NO_SYSCONF)
+ max_fds = sysconf(_SC_OPEN_MAX);
+#elif ERTS_POLL_USE_SELECT
+ max_fds = NOFILE;
+#else
+ max_fds = OPEN_MAX;
+#endif
+
+#if ERTS_POLL_USE_SELECT && defined(FD_SETSIZE)
+ if (max_fds > FD_SETSIZE)
+ max_fds = FD_SETSIZE;
+#endif
+
+ if (max_fds < 0)
+ fatal_error("erts_poll_init(): Failed to get max number of files: %s\n",
+ erl_errno_id(errno));
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+ print_misc_debug_info();
+#endif
+}
+
+ErtsPollSet
+ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
+{
+#if ERTS_POLL_USE_KERNEL_POLL
+ int kp_fd;
+#endif
+ ErtsPollSet ps = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(struct ErtsPollSet_));
+ ps->internal_fd_limit = 0;
+ ps->fds_status = NULL;
+ ps->fds_status_len = 0;
+ ps->no_of_user_fds = 0;
+#if ERTS_POLL_USE_KERNEL_POLL
+ ps->kp_fd = -1;
+#if ERTS_POLL_USE_EPOLL
+ kp_fd = epoll_create(256);
+ ps->res_events_len = 0;
+ ps->res_events = NULL;
+#elif ERTS_POLL_USE_DEVPOLL
+ kp_fd = open("/dev/poll", O_RDWR);
+ ps->res_events_len = 0;
+ ps->res_events = NULL;
+#elif ERTS_POLL_USE_KQUEUE
+ kp_fd = kqueue();
+ ps->res_events_len = 0;
+ ps->res_events = NULL;
+#endif
+ if (kp_fd < 0)
+ fatal_error("erts_poll_create_pollset(): Failed to "
+#if ERTS_POLL_USE_EPOLL
+ "create epoll set"
+#elif ERTS_POLL_USE_DEVPOLL
+ "to open /dev/poll"
+#elif ERTS_POLL_USE_KQUEUE
+ "create kqueue"
+#endif
+ ": %s (%d)\n",
+ erl_errno_id(errno), errno);
+#endif /* ERTS_POLL_USE_KERNEL_POLL */
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ /* res_events is also used as write buffer */
+ grow_res_events(ps, ERTS_POLL_MIN_BATCH_BUF_SIZE);
+#endif
+#if ERTS_POLL_USE_POLL
+ ps->next_poll_fds_ix = 0;
+ ps->no_poll_fds = 0;
+ ps->poll_fds_len = 0;
+ ps->poll_fds = NULL;
+#elif ERTS_POLL_USE_SELECT
+ ps->next_sel_fd = 0;
+ ps->max_fd = -1;
+#if ERTS_POLL_USE_FALLBACK
+ ps->no_select_fds = 0;
+#endif
+ FD_ZERO(&ps->input_fds);
+ FD_ZERO(&ps->res_input_fds);
+ FD_ZERO(&ps->output_fds);
+ FD_ZERO(&ps->res_output_fds);
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ ps->update_requests.next = NULL;
+ ps->update_requests.len = 0;
+ ps->curr_upd_req_block = &ps->update_requests;
+ erts_smp_atomic_init(&ps->have_update_requests, 0);
+#endif
+#ifdef ERTS_SMP
+ erts_smp_atomic_init(&ps->polled, 0);
+ erts_smp_atomic_init(&ps->woken, 0);
+ erts_smp_mtx_init(&ps->mtx, "pollset");
+#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+ ps->woken = 0;
+#endif
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ create_wakeup_pipe(ps);
+#endif
+#if ERTS_POLL_USE_FALLBACK
+ if (kp_fd >= ps->fds_status_len)
+ grow_fds_status(ps, kp_fd);
+ /* Force kernel poll fd into fallback (poll/select) set */
+ ps->fds_status[kp_fd].flags
+ |= ERTS_POLL_FD_FLG_INFLBCK|ERTS_POLL_FD_FLG_USEFLBCK;
+ {
+ int do_wake = 0;
+ ERTS_POLL_EXPORT(erts_poll_control)(ps, kp_fd, ERTS_POLL_EV_IN, 1,
+ &do_wake);
+ }
+#endif
+#if ERTS_POLL_USE_KERNEL_POLL
+ if (ps->internal_fd_limit <= kp_fd)
+ ps->internal_fd_limit = kp_fd + 1;
+ ps->kp_fd = kp_fd;
+#endif
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP)
+ ps->interrupt = 0;
+#else
+ erts_smp_atomic_init(&ps->interrupt, 0);
+#endif
+ erts_smp_atomic_init(&ps->timeout, LONG_MAX);
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ erts_smp_atomic_init(&ps->no_avoided_wakeups, 0);
+ erts_smp_atomic_init(&ps->no_avoided_interrupts, 0);
+ erts_smp_atomic_init(&ps->no_interrupt_timed, 0);
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ handle_update_requests(ps);
+#endif
+#if ERTS_POLL_USE_FALLBACK
+ ps->fallback_used = 0;
+#endif
+ ps->no_of_user_fds = 0; /* Don't count wakeup pipe and fallback fd */
+
+ erts_smp_spin_lock(&pollsets_lock);
+ ps->next = pollsets;
+ pollsets = ps;
+ erts_smp_spin_unlock(&pollsets_lock);
+
+ return ps;
+}
+
+void
+ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps)
+{
+
+ if (ps->fds_status)
+ erts_free(ERTS_ALC_T_FD_STATUS, (void *) ps->fds_status);
+
+#if ERTS_POLL_USE_EPOLL
+ if (ps->kp_fd >= 0)
+ close(ps->kp_fd);
+ if (ps->res_events)
+ erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events);
+#elif ERTS_POLL_USE_DEVPOLL
+ if (ps->kp_fd >= 0)
+ close(ps->kp_fd);
+ if (ps->res_events)
+ erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events);
+#elif ERTS_POLL_USE_POLL
+ if (ps->poll_fds)
+ erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds);
+#elif ERTS_POLL_USE_SELECT
+#endif
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ {
+ ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next;
+ while (urqbp) {
+ ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp;
+ urqbp = urqbp->next;
+ free_update_requests_block(ps, free_urqbp);
+ }
+ }
+#endif
+#ifdef ERTS_SMP
+ erts_smp_mtx_destroy(&ps->mtx);
+#endif
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (ps->wake_fds[0] >= 0)
+ close(ps->wake_fds[0]);
+ if (ps->wake_fds[1] >= 0)
+ close(ps->wake_fds[1]);
+#endif
+
+ erts_smp_spin_lock(&pollsets_lock);
+ if (ps == pollsets)
+ pollsets = pollsets->next;
+ else {
+ ErtsPollSet prev_ps;
+ for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next);
+ ASSERT(ps == prev_ps->next);
+ prev_ps->next = ps->next;
+ }
+ erts_smp_spin_unlock(&pollsets_lock);
+
+ erts_free(ERTS_ALC_T_POLLSET, (void *) ps);
+}
+
+/*
+ * --- Info ------------------------------------------------------------------
+ */
+
+void
+ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
+{
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ int pending_updates;
+#endif
+ Uint size = 0;
+
+ ERTS_POLLSET_LOCK(ps);
+
+ size += sizeof(struct ErtsPollSet_);
+ size += ps->fds_status_len*sizeof(ErtsFdStatus);
+
+#if ERTS_POLL_USE_EPOLL
+ size += ps->res_events_len*sizeof(struct epoll_event);
+#elif ERTS_POLL_USE_DEVPOLL
+ size += ps->res_events_len*sizeof(struct pollfd);
+#elif ERTS_POLL_USE_KQUEUE
+ size += ps->res_events_len*sizeof(struct kevent);
+#endif
+
+#if ERTS_POLL_USE_POLL
+ size += ps->poll_fds_len*sizeof(struct pollfd);
+#elif ERTS_POLL_USE_SELECT
+#endif
+
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ {
+ ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next;
+ pending_updates = ps->update_requests.len;
+ while (urqbp) {
+ size += sizeof(ErtsPollSetUpdateRequestsBlock);
+ pending_updates += urqbp->len;
+ }
+ }
+#endif
+
+ pip->primary =
+#if ERTS_POLL_USE_KQUEUE
+ "kqueue"
+#elif ERTS_POLL_USE_EPOLL
+ "epoll"
+#elif ERTS_POLL_USE_DEVPOLL
+ "/dev/poll"
+#elif ERTS_POLL_USE_POLL
+ "poll"
+#elif ERTS_POLL_USE_SELECT
+ "select"
+#endif
+ ;
+
+ pip->fallback =
+#if !ERTS_POLL_USE_FALLBACK
+ NULL
+#elif ERTS_POLL_USE_POLL
+ "poll"
+#elif ERTS_POLL_USE_SELECT
+ "select"
+#endif
+ ;
+
+ pip->kernel_poll =
+#if !ERTS_POLL_USE_KERNEL_POLL
+ NULL
+#elif ERTS_POLL_USE_KQUEUE
+ "kqueue"
+#elif ERTS_POLL_USE_EPOLL
+ "epoll"
+#elif ERTS_POLL_USE_DEVPOLL
+ "/dev/poll"
+#endif
+ ;
+
+ pip->memory_size = size;
+
+ pip->poll_set_size = ps->no_of_user_fds;
+#ifdef ERTS_SMP
+ pip->poll_set_size++; /* Wakeup pipe */
+#endif
+
+ pip->fallback_poll_set_size =
+#if !ERTS_POLL_USE_FALLBACK
+ 0
+#elif ERTS_POLL_USE_POLL
+ ps->no_poll_fds
+#elif ERTS_POLL_USE_SELECT
+ ps->no_select_fds
+#endif
+ ;
+
+#if ERTS_POLL_USE_FALLBACK
+ /* If only kp_fd is in fallback poll set we don't use fallback... */
+ if (pip->fallback_poll_set_size == 1)
+ pip->fallback_poll_set_size = 0;
+ else
+ pip->poll_set_size++; /* kp_fd */
+#endif
+
+ pip->lazy_updates =
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ 1
+#else
+ 0
+#endif
+ ;
+
+ pip->pending_updates =
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ pending_updates
+#else
+ 0
+#endif
+ ;
+
+ pip->batch_updates =
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ 1
+#else
+ 0
+#endif
+ ;
+
+ pip->concurrent_updates =
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+ 1
+#else
+ 0
+#endif
+ ;
+
+ pip->max_fds = max_fds;
+
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ pip->no_avoided_wakeups = erts_smp_atomic_read(&ps->no_avoided_wakeups);
+ pip->no_avoided_interrupts = erts_smp_atomic_read(&ps->no_avoided_interrupts);
+ pip->no_interrupt_timed = erts_smp_atomic_read(&ps->no_interrupt_timed);
+#endif
+
+ ERTS_POLLSET_UNLOCK(ps);
+
+}
+
+/*
+ * Fatal error...
+ */
+
+#ifndef ERTS_GOT_SIGUSR1
+# define ERTS_GOT_SIGUSR1 0
+#endif
+
+static void
+fatal_error(char *format, ...)
+{
+ va_list ap;
+
+ if (ERTS_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) {
+ /*
+ * Crash dump writing and reception of sigusr1 (which will
+ * result in a crash dump) closes all file descriptors. This
+ * typically results in a fatal error for erts_poll() (wakeup
+ * pipes and kernel poll fds are closed).
+ *
+ * We ignore the error and let the crash dump writing continue...
+ */
+ return;
+ }
+ va_start(ap, format);
+ erts_vfprintf(stderr, format, ap);
+ va_end(ap);
+ abort();
+}
+
+static void
+fatal_error_async_signal_safe(char *error_str)
+{
+ if (ERTS_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) {
+ /* See comment above in fatal_error() */
+ return;
+ }
+ if (error_str) {
+ int len = 0;
+ while (error_str[len])
+ len++;
+ if (len)
+ (void) write(2, error_str, len); /* async signal safe */
+ }
+ abort();
+}
+
+/*
+ * --- Debug -----------------------------------------------------------------
+ */
+
+void
+ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps,
+ ErtsPollEvents ev[],
+ int len)
+{
+ int fd;
+ ERTS_POLLSET_LOCK(ps);
+ for (fd = 0; fd < len; fd++) {
+ if (fd >= ps->fds_status_len)
+ ev[fd] = 0;
+ else {
+ ev[fd] = ps->fds_status[fd].events;
+#if ERTS_POLL_USE_WAKEUP_PIPE
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
+ ev[fd] |= ERTS_POLL_EV_NVAL;
+#endif
+#if ERTS_POLL_USE_KERNEL_POLL
+ if (fd == ps->kp_fd)
+ ev[fd] |= ERTS_POLL_EV_NVAL;
+#endif
+ }
+ }
+ ERTS_POLLSET_UNLOCK(ps);
+
+}
+
+#ifdef HARD_DEBUG
+
+static void
+check_poll_result(ErtsPollResFd pr[], int len)
+{
+ int i, j;
+
+ for (i = 0; i < len; i++) {
+ ASSERT(pr[i].fd >= 0);
+ ASSERT(pr[i].fd < max_fds);
+ for (j = 0; j < len; j++) {
+ ASSERT(i == j || pr[i].fd != pr[j].fd);
+ }
+ }
+}
+
+
+#if ERTS_POLL_USE_DEVPOLL
+
+static void
+check_poll_status(ErtsPollSet ps)
+{
+ int i;
+ for (i = 0; i < ps->fds_status_len; i++) {
+ int ires;
+ struct pollfd dp_fd;
+ short events = ERTS_POLL_EV_E2N(ps->fds_status[i].events);
+
+ dp_fd.fd = i;
+ dp_fd.events = (short) 0;
+ dp_fd.revents = (short) 0;
+
+ ires = ioctl(ps->kp_fd, DP_ISPOLLED, &dp_fd);
+
+ if (ires == 0) {
+ ASSERT(!events);
+ }
+ else if (ires == 1) {
+ ASSERT(events);
+ ASSERT(events == dp_fd.revents);
+ }
+ else {
+ ASSERT(0);
+ }
+ ASSERT(dp_fd.fd == i);
+ ASSERT(ps->fds_status[i].events == ps->fds_status[i].used_events);
+ }
+}
+
+#endif /* ERTS_POLL_USE_DEVPOLL */
+#endif /* HARD_DEBUG */
+
+#ifdef ERTS_POLL_DEBUG_PRINT
+static void
+print_misc_debug_info(void)
+{
+ erts_printf("erts_poll using: %s lazy_updates:%s batch_updates:%s\n",
+#if ERTS_POLL_USE_KQUEUE
+ "kqueue"
+#elif ERTS_POLL_USE_EPOLL
+ "epoll"
+#elif ERTS_POLL_USE_DEVPOLL
+ "/dev/poll"
+#endif
+#if ERTS_POLL_USE_FALLBACK
+ "-"
+#endif
+#if ERTS_POLL_USE_POLL
+ "poll"
+#elif ERTS_POLL_USE_SELECT
+ "select"
+#endif
+ ,
+#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ "true"
+#else
+ "false"
+#endif
+ ,
+#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+ "true"
+#else
+ "false"
+#endif
+ );
+
+ erts_printf("ERTS_POLL_EV_IN=0x%x\n"
+ "ERTS_POLL_EV_OUT=0x%x\n"
+ "ERTS_POLL_EV_NVAL=0x%x\n"
+ "ERTS_POLL_EV_ERR=0x%x\n",
+ ERTS_POLL_EV_IN,
+ ERTS_POLL_EV_OUT,
+ ERTS_POLL_EV_NVAL,
+ ERTS_POLL_EV_ERR);
+
+#ifdef FD_SETSIZE
+ erts_printf("FD_SETSIZE=%d\n", FD_SETSIZE);
+#endif
+}
+
+#endif
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
new file mode 100644
index 0000000000..725a77a152
--- /dev/null
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -0,0 +1,246 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Poll interface suitable for ERTS with or without
+ * SMP support.
+ *
+ * Author: Rickard Green
+ */
+
+#ifndef ERL_POLL_H__
+#define ERL_POLL_H__
+
+#include "sys.h"
+
+#if 0
+#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+#endif
+
+#ifdef ERTS_ENABLE_KERNEL_POLL
+# if defined(ERTS_KERNEL_POLL_VERSION)
+# define ERTS_POLL_EXPORT(FUNC) FUNC ## _kp
+# else
+# define ERTS_POLL_EXPORT(FUNC) FUNC ## _nkp
+# undef ERTS_POLL_DISABLE_KERNEL_POLL
+# define ERTS_POLL_DISABLE_KERNEL_POLL
+# endif
+#else
+# define ERTS_POLL_EXPORT(FUNC) FUNC
+# undef ERTS_POLL_DISABLE_KERNEL_POLL
+# define ERTS_POLL_DISABLE_KERNEL_POLL
+#endif
+
+#ifdef ERTS_POLL_DISABLE_KERNEL_POLL
+# undef HAVE_SYS_EPOLL_H
+# undef HAVE_SYS_EVENT_H
+# undef HAVE_SYS_DEVPOLL_H
+#endif
+
+#undef ERTS_POLL_USE_KERNEL_POLL
+#define ERTS_POLL_USE_KERNEL_POLL 0
+
+#undef ERTS_POLL_USE_KQUEUE
+#define ERTS_POLL_USE_KQUEUE 0
+#undef ERTS_POLL_USE_EPOLL
+#define ERTS_POLL_USE_EPOLL 0
+#undef ERTS_POLL_USE_DEVPOLL
+#define ERTS_POLL_USE_DEVPOLL 0
+#undef ERTS_POLL_USE_POLL
+#define ERTS_POLL_USE_POLL 0
+#undef ERTS_POLL_USE_SELECT
+#define ERTS_POLL_USE_SELECT 0
+
+#if defined(HAVE_SYS_EVENT_H)
+# undef ERTS_POLL_USE_KQUEUE
+# define ERTS_POLL_USE_KQUEUE 1
+# undef ERTS_POLL_USE_KERNEL_POLL
+# define ERTS_POLL_USE_KERNEL_POLL 1
+#elif defined(HAVE_SYS_EPOLL_H)
+# undef ERTS_POLL_USE_EPOLL
+# define ERTS_POLL_USE_EPOLL 1
+# undef ERTS_POLL_USE_KERNEL_POLL
+# define ERTS_POLL_USE_KERNEL_POLL 1
+#elif defined(HAVE_SYS_DEVPOLL_H)
+# undef ERTS_POLL_USE_DEVPOLL
+# define ERTS_POLL_USE_DEVPOLL 1
+# undef ERTS_POLL_USE_KERNEL_POLL
+# define ERTS_POLL_USE_KERNEL_POLL 1
+#endif
+
+#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+
+#if !ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_FALLBACK
+# if defined(ERTS_USE_POLL)
+# undef ERTS_POLL_USE_POLL
+# define ERTS_POLL_USE_POLL 1
+# elif !defined(__WIN32__)
+# undef ERTS_POLL_USE_SELECT
+# define ERTS_POLL_USE_SELECT 1
+# endif
+#endif
+
+typedef Uint32 ErtsPollEvents;
+#undef ERTS_POLL_EV_E2N
+
+#if defined(__WIN32__) /* --- win32 ------------------------------- */
+
+#define ERTS_POLL_EV_IN 1
+#define ERTS_POLL_EV_OUT 2
+#define ERTS_POLL_EV_ERR 4
+#define ERTS_POLL_EV_NVAL 8
+
+#elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+
+#include <sys/epoll.h>
+
+#define ERTS_POLL_EV_E2N(EV) \
+ ((__uint32_t) (EV))
+#define ERTS_POLL_EV_N2E(EV) \
+ ((ErtsPollEvents) (EV))
+
+#define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(EPOLLIN)
+#define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(EPOLLOUT)
+#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(EPOLLET)
+#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(EPOLLERR|EPOLLHUP)
+
+#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+
+#include <sys/devpoll.h>
+
+#define ERTS_POLL_EV_E2N(EV) \
+ ((short) ((EV) & ~((~((ErtsPollEvents) 0)) << 8*SIZEOF_SHORT)))
+#define ERTS_POLL_EV_N2E(EV) \
+ ((ErtsPollEvents) ((unsigned short) (EV)))
+
+#define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(POLLIN)
+#define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(POLLOUT)
+#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(POLLNVAL)
+#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP)
+
+#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
+/* Kqueue use fallback defines (poll() or select()) */
+#endif
+
+#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
+
+#include <poll.h>
+
+#define ERTS_POLL_EV_NKP_E2N(EV) \
+ ((short) ((EV) & ~((~((ErtsPollEvents) 0)) << 8*SIZEOF_SHORT)))
+#define ERTS_POLL_EV_NKP_N2E(EV) \
+ ((ErtsPollEvents) ((unsigned short) (EV)))
+
+/* At least on FreeBSD, we need POLLRDNORM for normal files, not POLLIN. */
+/* Whether this is a bug in FreeBSD, I don't know. */
+#ifdef POLLRDNORM
+#define ERTS_POLL_EV_NKP_IN ERTS_POLL_EV_N2E(POLLIN|POLLRDNORM)
+#else
+#define ERTS_POLL_EV_NKP_IN ERTS_POLL_EV_N2E(POLLIN)
+#endif
+#define ERTS_POLL_EV_NKP_OUT ERTS_POLL_EV_N2E(POLLOUT)
+#define ERTS_POLL_EV_NKP_NVAL ERTS_POLL_EV_N2E(POLLNVAL)
+#define ERTS_POLL_EV_NKP_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP)
+
+#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
+
+#define ERTS_POLL_EV_NKP_E2N(EV) (EV)
+#define ERTS_POLL_EV_NKP_N2E(EV) (EV)
+
+#define ERTS_POLL_EV_NKP_IN (((ErtsPollEvents) 1) << 0)
+#define ERTS_POLL_EV_NKP_OUT (((ErtsPollEvents) 1) << 1)
+#define ERTS_POLL_EV_NKP_NVAL (((ErtsPollEvents) 1) << 2)
+#define ERTS_POLL_EV_NKP_ERR (((ErtsPollEvents) 1) << 3)
+
+#endif /* ----------------------------------------- */
+
+
+#if !defined(ERTS_POLL_EV_E2N) && defined(ERTS_POLL_EV_NKP_E2N)
+/* poll(), select(), and kqueue() */
+
+#define ERTS_POLL_EV_E2N(EV) ERTS_POLL_EV_NKP_E2N((EV))
+#define ERTS_POLL_EV_N2E(EV) ERTS_POLL_EV_NKP_N2E((EV))
+
+#define ERTS_POLL_EV_IN ERTS_POLL_EV_NKP_IN
+#define ERTS_POLL_EV_OUT ERTS_POLL_EV_NKP_OUT
+#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_NKP_NVAL
+#define ERTS_POLL_EV_ERR ERTS_POLL_EV_NKP_ERR
+
+#endif
+
+typedef struct ErtsPollSet_ *ErtsPollSet;
+
+typedef struct {
+ ErtsSysFdType fd;
+ ErtsPollEvents events;
+ int on;
+} ErtsPollControlEntry;
+
+typedef struct {
+ ErtsSysFdType fd;
+ ErtsPollEvents events;
+} ErtsPollResFd;
+
+typedef struct {
+ char *primary;
+ char *fallback;
+ char *kernel_poll;
+ Uint memory_size;
+ int poll_set_size;
+ int fallback_poll_set_size;
+ int lazy_updates;
+ int pending_updates;
+ int batch_updates;
+ int concurrent_updates;
+ int max_fds;
+#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
+ long no_avoided_wakeups;
+ long no_avoided_interrupts;
+ long no_interrupt_timed;
+#endif
+} ErtsPollInfo;
+
+void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet,
+ int);
+void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet,
+ int,
+ long);
+ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet,
+ ErtsSysFdType,
+ ErtsPollEvents,
+ int on,
+ int* wake_poller);
+void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet,
+ ErtsPollControlEntry [],
+ int on);
+int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet,
+ ErtsPollResFd [],
+ int *,
+ SysTimeval *);
+int ERTS_POLL_EXPORT(erts_poll_max_fds)(void);
+void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet,
+ ErtsPollInfo *);
+ErtsPollSet ERTS_POLL_EXPORT(erts_poll_create_pollset)(void);
+void ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet);
+void ERTS_POLL_EXPORT(erts_poll_init)(void);
+void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet,
+ ErtsPollEvents [],
+ int);
+
+#endif /* #ifndef ERL_POLL_H__ */