From 84adefa331c4159d432d22840663c38f155cd4c1 Mon Sep 17 00:00:00 2001 From: Erlang/OTP Date: Fri, 20 Nov 2009 14:54:40 +0000 Subject: The R13B03 release. --- erts/emulator/sys/common/erl_check_io.c | 1912 +++++++++++++++++ erts/emulator/sys/common/erl_check_io.h | 96 + erts/emulator/sys/common/erl_mseg.c | 1452 +++++++++++++ erts/emulator/sys/common/erl_mseg.h | 97 + erts/emulator/sys/common/erl_mtrace_sys_wrap.c | 245 +++ erts/emulator/sys/common/erl_poll.c | 2693 ++++++++++++++++++++++++ erts/emulator/sys/common/erl_poll.h | 246 +++ 7 files changed, 6741 insertions(+) create mode 100644 erts/emulator/sys/common/erl_check_io.c create mode 100644 erts/emulator/sys/common/erl_check_io.h create mode 100644 erts/emulator/sys/common/erl_mseg.c create mode 100644 erts/emulator/sys/common/erl_mseg.h create mode 100644 erts/emulator/sys/common/erl_mtrace_sys_wrap.c create mode 100644 erts/emulator/sys/common/erl_poll.c create mode 100644 erts/emulator/sys/common/erl_poll.h (limited to 'erts/emulator/sys/common') diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c new file mode 100644 index 0000000000..218bd79584 --- /dev/null +++ b/erts/emulator/sys/common/erl_check_io.c @@ -0,0 +1,1912 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Check I/O + * + * Author: Rickard Green + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#define ERL_CHECK_IO_C__ +#define ERTS_WANT_BREAK_HANDLING +#ifndef WANT_NONBLOCKING +# define WANT_NONBLOCKING +#endif +#include "sys.h" +#include "global.h" +#include "erl_check_io.h" + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +# define ERTS_DRV_EV_STATE_EXTRA_SIZE 128 +#else +# include "safe_hash.h" +# define DRV_EV_STATE_HTAB_SIZE 1024 +#endif + +typedef char EventStateType; +#define ERTS_EV_TYPE_NONE ((EventStateType) 0) +#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */ +#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */ +#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */ + +typedef char EventStateFlags; +#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ + + +#if defined(ERTS_KERNEL_POLL_VERSION) +# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp +#elif defined(ERTS_NO_KERNEL_POLL_VERSION) +# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp +#else +# define ERTS_CIO_EXPORT(FUNC) FUNC +#endif + +#define ERTS_CIO_HAVE_DRV_EVENT \ + (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL) + +#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control) +#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait) +#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt) +#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed) +#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset) +#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset) +#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds) +#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init) +#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info) + +static struct pollset_info +{ + ErtsPollSet ps; + erts_smp_atomic_t in_poll_wait; /* set while doing poll */ +#ifdef ERTS_SMP + struct removed_fd* removed_list; /* list of deselected fd's*/ + erts_smp_spinlock_t removed_list_lock; +#endif +}pollset; +#define NUM_OF_POLLSETS 1 + +typedef struct { +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + SafeHashBucket hb; +#endif + ErtsSysFdType fd; + union { + ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ + ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */ + erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ + } driver; + ErtsPollEvents events; + unsigned short remove_cnt; /* number of removed_fd's referring to this fd */ + EventStateType type; + EventStateFlags flags; +} ErtsDrvEventState; + +#ifdef ERTS_SMP +struct removed_fd { + struct removed_fd *next; +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + ErtsSysFdType fd; +#else + ErtsDrvEventState* state; + #ifdef DEBUG + ErtsSysFdType fd; + #endif +#endif + +}; +#endif + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static int max_fds = -1; +#endif +#define DRV_EV_STATE_LOCK_CNT 16 +static union { + erts_smp_mtx_t lck; + byte _cache_line_alignment[64]; +}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT]; + +#ifdef ERTS_SMP +static ERTS_INLINE erts_smp_mtx_t* fd_mtx(ErtsSysFdType fd) +{ + int hash = (int)fd; +# ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + hash ^= (hash >> 9); +# endif + return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck; +} +#else +# define fd_mtx(fd) NULL +#endif + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + +static erts_smp_atomic_t drv_ev_state_len; +static ErtsDrvEventState *drv_ev_state; +static erts_smp_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */ + +#else +static SafeHash drv_ev_state_tab; +static int num_state_prealloc; +static ErtsDrvEventState *state_prealloc_first; +erts_smp_spinlock_t state_prealloc_lock; + +static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd) +{ + ErtsDrvEventState tmpl; + tmpl.fd = fd; + return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl); +} + +static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd) +{ + ErtsDrvEventState tmpl; + tmpl.fd = fd; + tmpl.driver.select = NULL; + tmpl.events = 0; + tmpl.remove_cnt = 0; + tmpl.type = ERTS_EV_TYPE_NONE; + tmpl.flags = 0; + return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl); +} + +static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state) +{ + ASSERT(state->remove_cnt == 0); + safe_hash_erase(&drv_ev_state_tab, (void *) state); +} + +#endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ + +static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode); +static void select_steal(ErlDrvPort ix, ErtsDrvEventState *state, + int mode, int on); +static void print_select_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort ix, ErtsSysFdType fd, int mode, int on); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static void select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int); +#endif +#if ERTS_CIO_HAVE_DRV_EVENT +static void event_steal(ErlDrvPort ix, ErtsDrvEventState *state, + ErlDrvEventData event_data); +static void print_event_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort, ErtsSysFdType, ErlDrvEventData); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData); +#endif +#endif +static void steal_pending_stop_select(erts_dsprintf_buf_t*, ErlDrvPort, + ErtsDrvEventState*, int mode, int on); +static ERTS_INLINE Eterm +drvport2id(ErlDrvPort dp) +{ + Port *pp = erts_drvport2port(dp); + if (pp) + return pp->id; + else { + ASSERT(0); + return am_undefined; + } +} + +#ifdef ERTS_SMP +ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST) +#endif + +static ERTS_INLINE void +remember_removed(ErtsDrvEventState *state, struct pollset_info* psi) +{ +#ifdef ERTS_SMP + struct removed_fd *fdlp; + ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); + if (erts_smp_atomic_read(&psi->in_poll_wait)) { + state->remove_cnt++; + ASSERT(state->remove_cnt > 0); + fdlp = removed_fd_alloc(); + #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG) + fdlp->fd = state->fd; + #endif + #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + fdlp->state = state; + #endif + erts_smp_spin_lock(&psi->removed_list_lock); + fdlp->next = psi->removed_list; + psi->removed_list = fdlp; + erts_smp_spin_unlock(&psi->removed_list_lock); + } +#endif +} + + +static ERTS_INLINE int +is_removed(ErtsDrvEventState *state) +{ +#ifdef ERTS_SMP + /* Note that there is a possible race here, where an fd is removed + (increasing remove_cnt) and then added again just before erts_poll_wait + is called by erts_check_io. Any polled event on the re-added fd will then + be falsely ignored. But that does not matter, as the event will trigger + again next time erl_check_io is called. */ + return state->remove_cnt > 0; +#else + return 0; +#endif +} + +static void +forget_removed(struct pollset_info* psi) +{ +#ifdef ERTS_SMP + struct removed_fd* fdlp; + struct removed_fd* tofree; + + /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */ + + erts_smp_spin_lock(&psi->removed_list_lock); + fdlp = psi->removed_list; + psi->removed_list = NULL; + erts_smp_spin_unlock(&psi->removed_list_lock); + + while (fdlp) { + erts_driver_t* drv_ptr = NULL; + erts_smp_mtx_t* mtx; + ErtsSysFdType fd; + ErtsDrvEventState *state; + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + fd = fdlp->fd; + mtx = fd_mtx(fd); + erts_smp_mtx_lock(mtx); + state = &drv_ev_state[(int) fd]; +#else + state = fdlp->state; + fd = state->fd; + ASSERT(fd == fdlp->fd); + mtx = fd_mtx(fd); + erts_smp_mtx_lock(mtx); +#endif + ASSERT(state->remove_cnt > 0); + if (--state->remove_cnt == 0) { + switch (state->type) { + case ERTS_EV_TYPE_STOP_USE: + /* Now we can call stop_select */ + drv_ptr = state->driver.drv_ptr; + ASSERT(drv_ptr); + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.drv_ptr = NULL; + /* Fall through */ + case ERTS_EV_TYPE_NONE: +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + hash_erase_drv_ev_state(state); +#endif + break; + case ERTS_EV_TYPE_DRV_SEL: + case ERTS_EV_TYPE_DRV_EV: + break; + default: + ASSERT(0); + } + } + erts_smp_mtx_unlock(mtx); + if (drv_ptr) { + int was_unmasked = erts_block_fpe(); + (*drv_ptr->stop_select) (fd, NULL); + erts_unblock_fpe(was_unmasked); + if (drv_ptr->handle) { + erts_ddll_dereference_driver(drv_ptr->handle); + } + } + tofree = fdlp; + fdlp = fdlp->next; + removed_fd_free(tofree); + } +#endif /* ERTS_SMP */ +} + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static void +grow_drv_ev_state(int min_ix) +{ + int i; + int new_len = min_ix + 1 + ERTS_DRV_EV_STATE_EXTRA_SIZE; + if (new_len > max_fds) + new_len = max_fds; + + erts_smp_mtx_lock(&drv_ev_state_grow_lock); + if (erts_smp_atomic_read(&drv_ev_state_len) <= min_ix) { + for (i=0; itype) { +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_EV_TYPE_DRV_EV: + abort_task(state->driver.event->port, + &state->driver.event->task, + ERTS_EV_TYPE_DRV_EV); + return; +#endif + case ERTS_EV_TYPE_NONE: + return; + default: + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); + /* Fall through */ + } + case ERL_DRV_READ|ERL_DRV_WRITE: + case ERL_DRV_WRITE: + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); + abort_task(state->driver.select->outport, + &state->driver.select->outtask, + state->type); + if (mode == ERL_DRV_WRITE) + break; + case ERL_DRV_READ: + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); + abort_task(state->driver.select->inport, + &state->driver.select->intask, + state->type); + break; + default: + goto check_type; + } +} + +static void +deselect(ErtsDrvEventState *state, int mode) +{ + int do_wake = 0; + ErtsPollEvents rm_events; + ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); + ASSERT(state->events); + + abort_tasks(state, mode); + + if (!mode) + rm_events = state->events; + else { + rm_events = 0; + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); + if (mode & ERL_DRV_READ) { + state->driver.select->inport = NIL; + rm_events |= ERTS_POLL_EV_IN; + } + if (mode & ERL_DRV_WRITE) { + state->driver.select->outport = NIL; + rm_events |= ERTS_POLL_EV_OUT; + } + } + + state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake); + + if (!(state->events)) { + switch (state->type) { + case ERTS_EV_TYPE_DRV_SEL: + ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask)); + ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask)); + erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, + state->driver.select); + break; +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_EV_TYPE_DRV_EV: + ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task)); + erts_free(ERTS_ALC_T_DRV_EV_D_STATE, + state->driver.event); + break; +#endif + case ERTS_EV_TYPE_NONE: + break; + default: + ASSERT(0); + break; + } + + state->driver.select = NULL; + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + remember_removed(state, &pollset); + } +} + + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0) +#else +# define IS_FD_UNKNOWN(state) ((state) == NULL) +#endif + + +int +ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, + ErlDrvEvent e, + int mode, + int on) +{ + void (*stop_select_fn)(ErlDrvEvent, void*) = NULL; + Eterm id = drvport2id(ix); + ErtsSysFdType fd = (ErtsSysFdType) e; + ErtsPollEvents ctl_events = (ErtsPollEvents) 0; + ErtsPollEvents new_events, old_events; + ErtsDrvEventState *state; + int wake_poller; + int ret; + + ERTS_SMP_LC_ASSERT(erts_drvport2port(ix) + && erts_lc_is_port_locked(erts_drvport2port(ix))); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if ((unsigned)fd >= (unsigned)erts_smp_atomic_read(&drv_ev_state_len)) { + if (fd < 0) { + return -1; + } + if (fd >= max_fds) { + select_large_fd_error(ix, fd, mode, on); + return -1; + } + grow_drv_ev_state(fd); + } +#endif + + erts_smp_mtx_lock(fd_mtx(fd)); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + state = &drv_ev_state[(int) fd]; +#else + state = hash_get_drv_ev_state(fd); /* may be NULL! */ +#endif + + if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + if (IS_FD_UNKNOWN(state)) { + /* fast track to stop_select callback */ + stop_select_fn = erts_drvport2port(ix)->drv_ptr->stop_select; + ret = 0; + goto done_unknown; + } + mode |= (ERL_DRV_READ | ERL_DRV_WRITE); + wake_poller = 1; /* to eject fd from pollset (if needed) */ + } + else wake_poller = 0; + +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state == NULL) { + state = hash_new_drv_ev_state(fd); + } +#endif + +#if ERTS_CIO_HAVE_DRV_EVENT + if (state->type == ERTS_EV_TYPE_DRV_EV) + select_steal(ix, state, mode, on); +#endif + if (state->type == ERTS_EV_TYPE_STOP_USE) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_select_op(dsbufp, ix, state->fd, mode, on); + steal_pending_stop_select(dsbufp, ix, state, mode, on); + if (state->type == ERTS_EV_TYPE_STOP_USE) { + ret = 0; + goto done; /* stop_select still pending */ + } + ASSERT(state->type == ERTS_EV_TYPE_NONE); + } + + if (mode & ERL_DRV_READ) { + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + Eterm owner = state->driver.select->inport; + if (owner != id && is_not_nil(owner)) + select_steal(ix, state, mode, on); + } + ctl_events |= ERTS_POLL_EV_IN; + } + if (mode & ERL_DRV_WRITE) { + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + Eterm owner = state->driver.select->outport; + if (owner != id && is_not_nil(owner)) + select_steal(ix, state, mode, on); + } + ctl_events |= ERTS_POLL_EV_OUT; + } + + ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) || + (state->type == ERTS_EV_TYPE_NONE && !state->events)); + + if (!on && !(state->flags & ERTS_EV_FLAG_USED) + && state->events && !(state->events & ~ctl_events)) { + /* Old driver removing all events. At least wake poller. + It will not make close() 100% safe but it will prevent + actions delayed by poll timeout. */ + wake_poller = 1; + } + + new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select); + state->driver.select = NULL; + } + ret = -1; + goto done; + } + + old_events = state->events; + + ASSERT(on + ? (new_events == (state->events | ctl_events)) + : (new_events == (state->events & ~ctl_events))); + + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL + || state->type == ERTS_EV_TYPE_NONE); + + state->events = new_events; + if (ctl_events) { + if (on) { + if (state->type == ERTS_EV_TYPE_NONE) { + ErtsDrvSelectDataState *dsdsp + = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, + sizeof(ErtsDrvSelectDataState)); + dsdsp->inport = NIL; + dsdsp->outport = NIL; + erts_port_task_handle_init(&dsdsp->intask); + erts_port_task_handle_init(&dsdsp->outtask); + ASSERT(state->driver.select == NULL); + state->driver.select = dsdsp; + state->type = ERTS_EV_TYPE_DRV_SEL; + } + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); + if (ctl_events & ERTS_POLL_EV_IN) + state->driver.select->inport = id; + if (ctl_events & ERTS_POLL_EV_OUT) + state->driver.select->outport = id; + if (mode & ERL_DRV_USE) { + state->flags |= ERTS_EV_FLAG_USED; + } + } + else { /* off */ + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + if (ctl_events & ERTS_POLL_EV_IN) { + abort_tasks(state, ERL_DRV_READ); + state->driver.select->inport = NIL; + } + if (ctl_events & ERTS_POLL_EV_OUT) { + abort_tasks(state, ERL_DRV_WRITE); + state->driver.select->outport = NIL; + } + if (new_events == 0) { + ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask)); + ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask)); + if (old_events != 0) { + remember_removed(state, &pollset); + } + if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, + state->driver.select); + state->driver.select = NULL; + } + /*else keep it, as fd will probably be selected upon again */ + } + } + if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + erts_driver_t* drv_ptr = erts_drvport2port(ix)->drv_ptr; + ASSERT(new_events==0); + if (state->remove_cnt == 0 || !wake_poller) { + /* Safe to close fd now as it is not in pollset + or there was no need to eject fd (kernel poll) */ + stop_select_fn = drv_ptr->stop_select; + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + state->type = ERTS_EV_TYPE_STOP_USE; + state->driver.drv_ptr = drv_ptr; + if (drv_ptr->handle) { + erts_ddll_reference_referenced_driver(drv_ptr->handle); + } + } + } + } + } + + ret = 0; + +done:; +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) { + hash_erase_drv_ev_state(state); + } +#endif +done_unknown: + erts_smp_mtx_unlock(fd_mtx(fd)); + if (stop_select_fn) { + int was_unmasked = erts_block_fpe(); + (*stop_select_fn)(e, NULL); + erts_unblock_fpe(was_unmasked); + } + return ret; +} + +int +ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, + ErlDrvEvent e, + ErlDrvEventData event_data) +{ +#if !ERTS_CIO_HAVE_DRV_EVENT + return -1; +#else + ErtsSysFdType fd = (ErtsSysFdType) e; + ErtsPollEvents events; + ErtsPollEvents add_events; + ErtsPollEvents remove_events; + Eterm id = drvport2id(ix); + ErtsDrvEventState *state; + int do_wake = 0; + int ret; + + ERTS_SMP_LC_ASSERT(erts_drvport2port(ix) + && erts_lc_is_port_locked(erts_drvport2port(ix))); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if ((unsigned)fd >= (unsigned)erts_smp_atomic_read(&drv_ev_state_len)) { + if (fd < 0) + return -1; + if (fd >= max_fds) { + event_large_fd_error(ix, fd, event_data); + return -1; + } + grow_drv_ev_state(fd); + } +#endif + + erts_smp_mtx_lock(fd_mtx(fd)); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + state = &drv_ev_state[(int) fd]; +#else + /* Could use hash_new directly, but want to keep the normal case fast */ + state = hash_get_drv_ev_state(fd); + if (state == NULL) { + state = hash_new_drv_ev_state(fd); + } +#endif + + switch (state->type) { + case ERTS_EV_TYPE_DRV_EV: + if (state->driver.event->port == id) break; + /*fall through*/ + case ERTS_EV_TYPE_DRV_SEL: + event_steal(ix, state, event_data); + break; + case ERTS_EV_TYPE_STOP_USE: { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_event_op(dsbufp, ix, fd, event_data); + steal_pending_stop_select(dsbufp, ix, state, 0, 1); + break; + } + } + + ASSERT(state->type == ERTS_EV_TYPE_DRV_EV + || state->type == ERTS_EV_TYPE_NONE); + + events = state->events; + + if (!event_data) { + remove_events = events; + add_events = 0; + } + else { + remove_events = ~event_data->events & events; + add_events = ~events & event_data->events; + } + + if (add_events) { + events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake); + if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + ret = -1; + goto done; + } + } + if (remove_events) { + events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake); + if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + ret = -1; + goto done; + } + } + if (event_data && event_data->events != 0) { + if (state->type == ERTS_EV_TYPE_DRV_EV) { + state->driver.event->removed_events &= ~add_events; + state->driver.event->removed_events |= remove_events; + } + else { + state->driver.event + = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, + sizeof(ErtsDrvEventDataState)); + erts_port_task_handle_init(&state->driver.event->task); + state->driver.event->port = id; + state->driver.event->removed_events = (ErtsPollEvents) 0; + state->type = ERTS_EV_TYPE_DRV_EV; + } + state->driver.event->data = event_data; + } + else { + if (state->type == ERTS_EV_TYPE_DRV_EV) { + abort_tasks(state, 0); + erts_free(ERTS_ALC_T_DRV_EV_D_STATE, + state->driver.event); + } + state->driver.select = NULL; + state->type = ERTS_EV_TYPE_NONE; + remember_removed(state, &pollset); + } + state->events = events; + ASSERT(event_data ? events == event_data->events : events == 0); + + ret = 0; + +done: +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) { + hash_erase_drv_ev_state(state); + } +#endif + erts_smp_mtx_unlock(fd_mtx(fd)); + return ret; +#endif +} + +static ERTS_INLINE int +chk_stale(Eterm id, ErtsDrvEventState *state, int mode) +{ + if (is_nil(id)) + return 0; + if (erts_is_port_alive(id)) + return 1; /* Steal */ + stale_drv_select(id, state, mode); + return 0; +} + +static int +need2steal(ErtsDrvEventState *state, int mode) +{ + int do_steal = 0; + switch (state->type) { + case ERTS_EV_TYPE_DRV_SEL: + if (mode & ERL_DRV_READ) + do_steal |= chk_stale(state->driver.select->inport, + state, + ERL_DRV_READ); + if (mode & ERL_DRV_WRITE) + do_steal |= chk_stale(state->driver.select->outport, + state, + ERL_DRV_WRITE); + break; +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_EV_TYPE_DRV_EV: + do_steal |= chk_stale(state->driver.event->port, state, 0); + break; +#endif + case ERTS_EV_TYPE_STOP_USE: + ASSERT(0); + break; + default: + break; + } + return do_steal; +} + +static void +print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) +{ + ErtsPortNames *pnp = erts_get_port_names(id); + if (!pnp->name && !pnp->driver_name) + erts_dsprintf(dsbufp, "%s ", ""); + else { + if (pnp->name) { + if (!pnp->driver_name || strcmp(pnp->driver_name, pnp->name) == 0) + erts_dsprintf(dsbufp, "%s ", pnp->name); + else + erts_dsprintf(dsbufp, "%s (%s) ", pnp->driver_name, pnp->name); + } + else if (pnp->driver_name) { + erts_dsprintf(dsbufp, "%s ", pnp->driver_name); + } + } + erts_free_port_names(pnp); +} + +static void +steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) +{ + erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd); + switch (state->type) { + case ERTS_EV_TYPE_DRV_SEL: { + int deselect_mode = 0; + Eterm iid = state->driver.select->inport; + Eterm oid = state->driver.select->outport; + if ((mode & ERL_DRV_READ) && (is_not_nil(iid))) { + erts_dsprintf(dsbufp, "input driver "); + print_driver_name(dsbufp, iid); + erts_dsprintf(dsbufp, "%T ", iid); + deselect_mode |= ERL_DRV_READ; + } + if ((mode & ERL_DRV_WRITE) && is_not_nil(oid)) { + if (deselect_mode) { + erts_dsprintf(dsbufp, "and "); + } + erts_dsprintf(dsbufp, "output driver "); + print_driver_name(dsbufp, oid); + erts_dsprintf(dsbufp, "%T ", oid); + deselect_mode |= ERL_DRV_WRITE; + } + if (deselect_mode) + deselect(state, deselect_mode); + else { + erts_dsprintf(dsbufp, "no one", (int) state->fd); + ASSERT(0); + } + erts_dsprintf(dsbufp, "\n"); + break; + } +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_EV_TYPE_DRV_EV: { + Eterm eid = state->driver.event->port; + if (is_nil(eid)) { + erts_dsprintf(dsbufp, "no one", (int) state->fd); + ASSERT(0); + } + else { + erts_dsprintf(dsbufp, "event driver "); + print_driver_name(dsbufp, eid); + erts_dsprintf(dsbufp, "%T ", eid); + } + erts_dsprintf(dsbufp, "\n"); + deselect(state, 0); + break; + } +#endif + case ERTS_EV_TYPE_STOP_USE: { + ASSERT(0); + break; + } + default: + erts_dsprintf(dsbufp, "no one\n", (int) state->fd); + ASSERT(0); + } +} + +static void +print_select_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) +{ + Port *pp = erts_drvport2port(ix); + erts_dsprintf(dsbufp, + "driver_select(%p, %d,%s%s%s%s, %d) " + "by ", + ix, + (int) fd, + mode & ERL_DRV_READ ? " ERL_DRV_READ" : "", + mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "", + mode & ERL_DRV_USE ? " ERL_DRV_USE" : "", + mode & (ERL_DRV_USE_NO_CALLBACK & ~ERL_DRV_USE) ? "_NO_CALLBACK" : "", + on); + print_driver_name(dsbufp, pp->id); + erts_dsprintf(dsbufp, "driver %T ", pp ? pp->id : NIL); +} + +static void +select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on) +{ + if (need2steal(state, mode)) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_select_op(dsbufp, ix, state->fd, mode, on); + steal(dsbufp, state, mode); + erts_send_error_to_logger_nogl(dsbufp); + } +} + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static void +large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd) +{ + erts_dsprintf(dsbufp, + "fd=%d is larger than the largest allowed fd=%d\n", + (int) fd, max_fds - 1); +} + +static void +select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) +{ + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_select_op(dsbufp, ix, fd, mode, on); + erts_dsprintf(dsbufp, "failed: "); + large_fd_error_common(dsbufp, fd); + erts_send_error_to_logger_nogl(dsbufp); +} +#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */ + + + +static void +steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, + ErtsDrvEventState *state, int mode, int on) +{ + ASSERT(state->type == ERTS_EV_TYPE_STOP_USE); + erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select " + "was called for driver %s\n", + (int) state->fd, state->driver.drv_ptr->name); + erts_send_error_to_logger_nogl(dsbufp); + + if (on) { + /* Either fd-owner changed its mind about closing + * or closed fd before stop_select callback and fd is now reused. + * In either case stop_select should not be called. + */ + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + if (state->driver.drv_ptr->handle) { + erts_ddll_dereference_driver(state->driver.drv_ptr->handle); + } + state->driver.drv_ptr = NULL; + } + else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + erts_driver_t* drv_ptr = erts_drvport2port(ix)->drv_ptr; + if (drv_ptr != state->driver.drv_ptr) { + /* Some other driver wants the stop_select callback */ + if (state->driver.drv_ptr->handle) { + erts_ddll_dereference_driver(state->driver.drv_ptr->handle); + } + if (drv_ptr->handle) { + erts_ddll_reference_referenced_driver(drv_ptr->handle); + } + state->driver.drv_ptr = drv_ptr; + } + } + +} + + +#if ERTS_CIO_HAVE_DRV_EVENT + +static void +print_event_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) +{ + Port *pp = erts_drvport2port(ix); + erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd); + if (!event_data) + erts_dsprintf(dsbufp, "NULL"); + else + erts_dsprintf(dsbufp, "{0x%x, 0x%x}", + (unsigned int) event_data->events, + (unsigned int) event_data->revents); + erts_dsprintf(dsbufp, ") by "); + print_driver_name(dsbufp, pp->id); + erts_dsprintf(dsbufp, "driver %T ", pp ? pp->id : NIL); +} + +static void +event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data) +{ + if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_event_op(dsbufp, ix, state->fd, event_data); + steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE); + erts_send_error_to_logger_nogl(dsbufp); + } + else if (state->type == ERTS_EV_TYPE_DRV_SEL) { + ASSERT(state->flags & ERTS_EV_FLAG_USED); + deselect(state, 0); + } +} + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS +static void +event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) +{ + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_event_op(dsbufp, ix, fd, event_data); + erts_dsprintf(dsbufp, "failed: "); + large_fd_error_common(dsbufp, fd); + erts_send_error_to_logger_nogl(dsbufp); +} +#endif +#endif + +static ERTS_INLINE void +iready(Eterm id, ErtsDrvEventState *state) +{ + if (erts_port_task_schedule(id, + &state->driver.select->intask, + ERTS_PORT_TASK_INPUT, + (ErlDrvEvent) state->fd, + NULL) != 0) { + stale_drv_select(id, state, ERL_DRV_READ); + } +} + +static ERTS_INLINE void +oready(Eterm id, ErtsDrvEventState *state) +{ + if (erts_port_task_schedule(id, + &state->driver.select->outtask, + ERTS_PORT_TASK_OUTPUT, + (ErlDrvEvent) state->fd, + NULL) != 0) { + stale_drv_select(id, state, ERL_DRV_WRITE); + } +} + +#if ERTS_CIO_HAVE_DRV_EVENT +static ERTS_INLINE void +eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data) +{ + if (erts_port_task_schedule(id, + &state->driver.event->task, + ERTS_PORT_TASK_EVENT, + (ErlDrvEvent) state->fd, + event_data) != 0) { + stale_drv_select(id, state, 0); + } +} +#endif + +static void bad_fd_in_pollset( ErtsDrvEventState *, Eterm, Eterm, ErtsPollEvents); + +void +ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set) +{ + ERTS_CIO_POLL_INTR(pollset.ps, set); +} + +void +ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, long msec) +{ + ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, msec); +} + +void +ERTS_CIO_EXPORT(erts_check_io)(int do_wait) +{ + ErtsPollResFd pollres[256]; + int pollres_len; + SysTimeval wait_time; + int poll_ret, i; + + restart: + + /* Figure out timeout value */ + if (do_wait) { + erts_time_remaining(&wait_time); + } else { /* poll only */ + wait_time.tv_sec = 0; + wait_time.tv_usec = 0; + } + +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_check_exact(NULL, 0); /* No locks should be locked */ +#endif + erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd); + + erts_smp_atomic_set(&pollset.in_poll_wait, 1); + + poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, &wait_time); + +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_check_exact(NULL, 0); /* No locks should be locked */ +#endif + erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL); + + erts_deliver_time(); /* sync the machine's idea of time */ + +#ifdef ERTS_BREAK_REQUESTED + if (ERTS_BREAK_REQUESTED) + erts_do_break_handling(); +#endif + + if (poll_ret != 0) { + erts_smp_atomic_set(&pollset.in_poll_wait, 0); + forget_removed(&pollset); + if (poll_ret == EAGAIN) { + goto restart; + } + + if (poll_ret != ETIMEDOUT + && poll_ret != EINTR +#ifdef ERRNO_BLOCK + && poll_ret != ERRNO_BLOCK +#endif + ) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "erts_poll_wait() failed: %s (%d)\n", + erl_errno_id(poll_ret), poll_ret); + erts_send_error_to_logger_nogl(dsbufp); + } + return; + } + + for (i = 0; i < pollres_len; i++) { + + ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd; + ErtsDrvEventState *state; + + erts_smp_mtx_lock(fd_mtx(fd)); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + state = &drv_ev_state[ (int) fd]; +#else + state = hash_get_drv_ev_state(fd); + if (!state) { + goto next_pollres; + } +#endif + + /* Skip this fd if it was removed from pollset */ + if (is_removed(state)) { + goto next_pollres; + } + + switch (state->type) { + case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */ + ErtsPollEvents revents; + ErtsPollEvents revent_mask; + + revent_mask = ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT); + revent_mask |= state->events; + revents = pollres[i].events & revent_mask; + + if (revents & ERTS_POLL_EV_ERR) { + /* + * Let the driver handle the error condition. Only input, + * only output, or nothing might have been selected. + * We *do not* want to call a callback that corresponds + * to an event not selected. revents might give us a clue + * on which one to call. + */ + if ((revents & ERTS_POLL_EV_IN) + || (!(revents & ERTS_POLL_EV_OUT) + && state->events & ERTS_POLL_EV_IN)) { + iready(state->driver.select->inport, state); + } + else if (state->events & ERTS_POLL_EV_OUT) { + oready(state->driver.select->outport, state); + } + } + else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { + if (revents & ERTS_POLL_EV_OUT) { + oready(state->driver.select->outport, state); + } + /* Someone might have deselected input since revents + was read (true also on the non-smp emulator since + oready() may have been called); therefore, update + revents... */ + revents &= ~(~state->events & ERTS_POLL_EV_IN); + if (revents & ERTS_POLL_EV_IN) { + iready(state->driver.select->inport, state); + } + } + else if (revents & ERTS_POLL_EV_NVAL) { + bad_fd_in_pollset(state, + state->driver.select->inport, + state->driver.select->outport, + state->events); + } + break; + } + +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */ + ErlDrvEventData event_data; + ErtsPollEvents revents; + ASSERT(state->driver.event); + ASSERT(state->driver.event->data); + event_data = state->driver.event->data; + revents = pollres[i].events; + revents &= ~state->driver.event->removed_events; + + if (revents) { + event_data->events = state->events; + event_data->revents = revents; + + eready(state->driver.event->port, state, event_data); + } + break; + } +#endif + + case ERTS_EV_TYPE_NONE: /* Deselected ... */ + break; + + default: { /* Error */ + erts_dsprintf_buf_t *dsbufp; + dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, + "Invalid event request type for fd in erts_poll()! " + "fd=%d, event request type=%sd\n", (int) state->fd, + (int) state->type); + ASSERT(0); + deselect(state, 0); + break; + } + } + + next_pollres:; +#ifdef ERTS_SMP + erts_smp_mtx_unlock(fd_mtx(fd)); +#endif + } + + erts_smp_atomic_set(&pollset.in_poll_wait, 0); + forget_removed(&pollset); +} + +static void +bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, + Eterm outport, ErtsPollEvents events) +{ + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + + if (events & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { + char *io_str; + Eterm port = NIL; + if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) { + io_str = "input/output"; + if (inport == outport) + port = inport; + } + else { + if (events & ERTS_POLL_EV_IN) { + io_str = "input"; + port = inport; + } + else { + io_str = "output"; + port = outport; + } + } + erts_dsprintf(dsbufp, + "Bad %s fd in erts_poll()! fd=%d, ", + io_str, (int) state->fd); + if (is_nil(port)) { + ErtsPortNames *ipnp = erts_get_port_names(inport); + ErtsPortNames *opnp = erts_get_port_names(outport); + erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n", + is_nil(inport) ? am_undefined : inport, + is_nil(outport) ? am_undefined : outport, + ipnp->driver_name ? ipnp->driver_name : "", + opnp->driver_name ? opnp->driver_name : "", + ipnp->name ? ipnp->name : "", + opnp->name ? opnp->name : ""); + erts_free_port_names(ipnp); + erts_free_port_names(opnp); + } + else { + ErtsPortNames *pnp = erts_get_port_names(port); + erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n", + is_nil(port) ? am_undefined : port, + pnp->driver_name ? pnp->driver_name : "", + pnp->name ? pnp->name : ""); + erts_free_port_names(pnp); + } + } + else { + erts_dsprintf(dsbufp, "Bad fd in erts_poll()! fd=%d\n", (int) state->fd); + } + erts_send_error_to_logger_nogl(dsbufp); + + /* unmap entry */ + deselect(state, 0); +} + +static void +stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode) +{ + erts_stale_drv_select(id, (ErlDrvEvent) state->fd, mode, 0); + deselect(state, mode); +} + +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS +static SafeHashValue drv_ev_state_hash(void *des) +{ + SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd; + return val ^ (val >> 8); /* Good enough for aligned pointer values? */ +} + +static int drv_ev_state_cmp(void *des1, void *des2) +{ + return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd + ? 0 : 1); +} + +static void *drv_ev_state_alloc(void *des_tmpl) +{ + ErtsDrvEventState *evstate; + erts_smp_spin_lock(&state_prealloc_lock); + if (state_prealloc_first == NULL) { + erts_smp_spin_unlock(&state_prealloc_lock); + evstate = (ErtsDrvEventState *) + erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState)); + } else { + evstate = state_prealloc_first; + state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next; + --num_state_prealloc; + erts_smp_spin_unlock(&state_prealloc_lock); + } + /* XXX: Already valid data if prealloced, could ignore template! */ + *evstate = *((ErtsDrvEventState *) des_tmpl); + + return (void *) evstate; +} + +static void drv_ev_state_free(void *des) +{ + erts_smp_spin_lock(&state_prealloc_lock); + ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb; + state_prealloc_first = (ErtsDrvEventState *) des; + ++num_state_prealloc; + erts_smp_spin_unlock(&state_prealloc_lock); +} +#endif + +void +ERTS_CIO_EXPORT(erts_init_check_io)(void) +{ + erts_smp_atomic_init(&pollset.in_poll_wait, 0); + ERTS_CIO_POLL_INIT(); + pollset.ps = ERTS_CIO_NEW_POLLSET(); + +#ifdef ERTS_SMP + init_removed_fd_alloc(); + pollset.removed_list = NULL; + erts_smp_spinlock_init(&pollset.removed_list_lock, + "pollset_rm_list"); + { + int i; + for (i=0; ievents; + ErtsSysFdType fd = state->fd; +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + int internal = 0; + ErtsPollEvents ep_events = counters->epep[(int) fd]; +#endif + int err = 0; + +#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) + struct stat stat_buf; +#endif + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->events || ep_events) { + if (ep_events & ERTS_POLL_EV_NVAL) { + ep_events &= ~ERTS_POLL_EV_NVAL; + internal = 1; + counters->internal_fds++; + } + else + counters->used_fds++; +#else + if (state->events) { + counters->used_fds++; +#endif + + erts_printf("fd=%d ", (int) fd); + +#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) + if (fstat((int) fd, &stat_buf) < 0) + erts_printf("type=unknown "); + else { + erts_printf("type="); +#ifdef S_ISSOCK + if (S_ISSOCK(stat_buf.st_mode)) + erts_printf("sock "); + else +#endif +#ifdef S_ISFIFO + if (S_ISFIFO(stat_buf.st_mode)) + erts_printf("fifo "); + else +#endif +#ifdef S_ISCHR + if (S_ISCHR(stat_buf.st_mode)) + erts_printf("chr "); + else +#endif +#ifdef S_ISDIR + if (S_ISDIR(stat_buf.st_mode)) + erts_printf("dir "); + else +#endif +#ifdef S_ISBLK + if (S_ISBLK(stat_buf.st_mode)) + erts_printf("blk "); + else +#endif +#ifdef S_ISREG + if (S_ISREG(stat_buf.st_mode)) + erts_printf("reg "); + else +#endif +#ifdef S_ISLNK + if (S_ISLNK(stat_buf.st_mode)) + erts_printf("lnk "); + else +#endif +#ifdef S_ISDOOR + if (S_ISDOOR(stat_buf.st_mode)) + erts_printf("door "); + else +#endif +#ifdef S_ISWHT + if (S_ISWHT(stat_buf.st_mode)) + erts_printf("wht "); + else +#endif +#ifdef S_ISXATTR + if (S_ISXATTR(stat_buf.st_mode)) + erts_printf("xattr "); + else +#endif + erts_printf("unknown "); + } +#else + erts_printf("type=unknown "); +#endif + + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + erts_printf("driver_select "); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (internal) { + erts_printf("internal "); + err = 1; + } + + if (cio_events == ep_events) { + erts_printf("ev="); + if (print_events(cio_events) != 0) + err = 1; + } + else { + err = 1; + erts_printf("cio_ev="); + print_events(cio_events); + erts_printf(" ep_ev="); + print_events(ep_events); + } +#else + if (print_events(cio_events) != 0) + err = 1; +#endif + erts_printf(" "); + if (cio_events & ERTS_POLL_EV_IN) { + Eterm id = state->driver.select->inport; + if (is_nil(id)) { + erts_printf("inport=none inname=none indrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id); + erts_printf(" inport=%T inname=%s indrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + if (cio_events & ERTS_POLL_EV_OUT) { + Eterm id = state->driver.select->outport; + if (is_nil(id)) { + erts_printf("outport=none outname=none outdrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id); + erts_printf(" outport=%T outname=%s outdrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + } + else if (state->type == ERTS_EV_TYPE_DRV_EV) { + Eterm id; + erts_printf("driver_event "); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (internal) { + erts_printf("internal "); + err = 1; + } + if (cio_events == ep_events) { + erts_printf("ev=0x%b32x", (Uint32) cio_events); + } + else { + err = 1; + erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); + erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); + } +#else + erts_printf("ev=0x%b32x", (Uint32) cio_events); +#endif + id = state->driver.event->port; + if (is_nil(id)) { + erts_printf(" port=none name=none drv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id); + erts_printf(" port=%T name=%s drv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + else if (internal) { + erts_printf("internal "); + if (cio_events) { + err = 1; + erts_printf("cio_ev="); + print_events(cio_events); + } + if (ep_events) { + erts_printf("ep_ev="); + print_events(ep_events); + } + } +#endif + else { + err = 1; + erts_printf("control_type=%d ", (int)state->type); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (cio_events == ep_events) { + erts_printf("ev=0x%b32x", (Uint32) cio_events); + } + else { + erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); + erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); + } +#else + erts_printf("ev=0x%b32x", (Uint32) cio_events); +#endif + } + + if (err) { + counters->num_errors++; + erts_printf(" ERROR"); + } + erts_printf("\n"); + } +} + +int +ERTS_CIO_EXPORT(erts_check_io_debug)(void) +{ +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + int fd, len; +#endif + IterDebugCounters counters; + ErtsDrvEventState null_des; + + null_des.driver.select = NULL; + null_des.events = 0; + null_des.remove_cnt = 0; + null_des.type = ERTS_EV_TYPE_NONE; + + erts_printf("--- fds in pollset --------------------------------------\n"); + +#ifdef ERTS_SMP +# ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_check_exact(NULL, 0); /* No locks should be locked */ +# endif + erts_block_system(0); /* stop the world to avoid messy locking */ +#endif + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds); + ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds); + counters.internal_fds = 0; +#endif + counters.used_fds = 0; + counters.num_errors = 0; + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + len = erts_smp_atomic_read(&drv_ev_state_len); + for (fd = 0; fd < len; fd++) { + doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters); + } + for ( ; fd < max_fds; fd++) { + null_des.fd = fd; + doit_erts_check_io_debug((void *) &null_des, (void *) &counters); + } +#else + safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters); +#endif + +#ifdef ERTS_SMP + erts_release_system(); +#endif + + erts_printf("\n"); + erts_printf("used fds=%d\n", counters.used_fds); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + erts_printf("internal fds=%d\n", counters.internal_fds); +#endif + erts_printf("---------------------------------------------------------\n"); + fflush(stdout); +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + erts_free(ERTS_ALC_T_TMP, (void *) counters.epep); +#endif + return counters.num_errors; +} + diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h new file mode 100644 index 0000000000..9b45a63913 --- /dev/null +++ b/erts/emulator/sys/common/erl_check_io.h @@ -0,0 +1,96 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Check I/O + * + * Author: Rickard Green + */ + +#ifndef ERL_CHECK_IO_H__ +#define ERL_CHECK_IO_H__ + +#include "erl_sys_driver.h" + +#ifdef ERTS_ENABLE_KERNEL_POLL + +int driver_select_kp(ErlDrvPort, ErlDrvEvent, int, int); +int driver_select_nkp(ErlDrvPort, ErlDrvEvent, int, int); +int driver_event_kp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); +int driver_event_nkp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); +Uint erts_check_io_size_kp(void); +Uint erts_check_io_size_nkp(void); +Eterm erts_check_io_info_kp(void *); +Eterm erts_check_io_info_nkp(void *); +int erts_check_io_max_files_kp(void); +int erts_check_io_max_files_nkp(void); +void erts_check_io_interrupt_kp(int); +void erts_check_io_interrupt_nkp(int); +void erts_check_io_interrupt_timed_kp(int, long); +void erts_check_io_interrupt_timed_nkp(int, long); +void erts_check_io_kp(int); +void erts_check_io_nkp(int); +void erts_init_check_io_kp(void); +void erts_init_check_io_nkp(void); +int erts_check_io_debug_kp(void); +int erts_check_io_debug_nkp(void); + +#else /* !ERTS_ENABLE_KERNEL_POLL */ + +Uint erts_check_io_size(void); +Eterm erts_check_io_info(void *); +int erts_check_io_max_files(void); +void erts_check_io_interrupt(int); +void erts_check_io_interrupt_timed(int, long); +void erts_check_io(int); +void erts_init_check_io(void); + +#endif + +#endif /* ERL_CHECK_IO_H__ */ + +#if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__) +#define ERL_CHECK_IO_INTERNAL__ +#endif + +#ifndef ERL_CHECK_IO_INTERNAL__ +#define ERL_CHECK_IO_INTERNAL__ +#include "erl_poll.h" +#include "erl_port_task.h" + +/* + * ErtsDrvEventDataState is used by driver_event() which is almost never + * used. We allocate ErtsDrvEventDataState separate since we dont wan't + * the size of ErtsDrvEventState to increase due to driver_event() + * information. + */ +typedef struct { + Eterm port; + ErlDrvEventData data; + ErtsPollEvents removed_events; + ErtsPortTaskHandle task; +} ErtsDrvEventDataState; + +typedef struct { + Eterm inport; + Eterm outport; + ErtsPortTaskHandle intask; + ErtsPortTaskHandle outtask; +} ErtsDrvSelectDataState; +#endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c new file mode 100644 index 0000000000..f4e21bc05f --- /dev/null +++ b/erts/emulator/sys/common/erl_mseg.c @@ -0,0 +1,1452 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: A memory segment allocator. Segments that are deallocated + * are kept for a while in a segment "cache" before they are + * destroyed. When segments are allocated, cached segments + * are used if possible instead of creating new segments. + * + * Author: Rickard Green + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_mseg.h" +#include "global.h" +#include "erl_threads.h" +#include "erl_mtrace.h" +#include "big.h" + +#if HAVE_ERTS_MSEG + +#if defined(USE_THREADS) && !defined(ERTS_SMP) +# define ERTS_THREADS_NO_SMP +#endif + +#define SEGTYPE ERTS_MTRACE_SEGMENT_ID + +#ifndef HAVE_GETPAGESIZE +#define HAVE_GETPAGESIZE 0 +#endif + +#ifdef _SC_PAGESIZE +# define GET_PAGE_SIZE sysconf(_SC_PAGESIZE) +#elif HAVE_GETPAGESIZE +# define GET_PAGE_SIZE getpagesize() +#else +# error "Page size unknown" + /* Implement some other way to get the real page size if needed! */ +#endif + +#define MAX_CACHE_SIZE 30 + +#undef MIN +#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#undef MAX +#define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) + +#undef PAGE_MASK +#define INV_PAGE_MASK ((Uint) (page_size - 1)) +#define PAGE_MASK (~INV_PAGE_MASK) +#define PAGE_FLOOR(X) ((X) & PAGE_MASK) +#define PAGE_CEILING(X) PAGE_FLOOR((X) + INV_PAGE_MASK) +#define PAGES(X) ((X) >> page_shift) + +static int atoms_initialized; + +static Uint cache_check_interval; + +static void check_cache(void *unused); +static void mseg_clear_cache(void); +static int is_cache_check_scheduled; +#ifdef ERTS_THREADS_NO_SMP +static int is_cache_check_requested; +#endif + +#if HAVE_MMAP +/* Mmap ... */ + +#define MMAP_PROT (PROT_READ|PROT_WRITE) +#ifdef MAP_ANON +# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) +# define MMAP_FD (-1) +#else +# define MMAP_FLAGS (MAP_PRIVATE) +# define MMAP_FD mmap_fd +static int mmap_fd; +#endif + +#if HAVE_MREMAP +# define HAVE_MSEG_RECREATE 1 +#else +# define HAVE_MSEG_RECREATE 0 +#endif + +#define CAN_PARTLY_DESTROY 1 +#else /* #if HAVE_MMAP */ +#define CAN_PARTLY_DESTROY 0 +#error "Not supported" +#endif /* #if HAVE_MMAP */ + + +#if defined(ERTS_MSEG_FAKE_SEGMENTS) +#undef CAN_PARTLY_DESTROY +#define CAN_PARTLY_DESTROY 0 +#endif + +static const ErtsMsegOpt_t default_opt = ERTS_MSEG_DEFAULT_OPT_INITIALIZER; + +typedef struct cache_desc_t_ { + void *seg; + Uint size; + struct cache_desc_t_ *next; + struct cache_desc_t_ *prev; +} cache_desc_t; + +typedef struct { + Uint32 giga_no; + Uint32 no; +} CallCounter; + +static int is_init_done; +static Uint page_size; +static Uint page_shift; + +static struct { + CallCounter alloc; + CallCounter dealloc; + CallCounter realloc; + CallCounter create; + CallCounter destroy; +#if HAVE_MSEG_RECREATE + CallCounter recreate; +#endif + CallCounter clear_cache; + CallCounter check_cache; +} calls; + +static cache_desc_t cache_descs[MAX_CACHE_SIZE]; +static cache_desc_t *free_cache_descs; +static cache_desc_t *cache; +static cache_desc_t *cache_end; +static Uint cache_hits; +static Uint cache_size; +static Uint min_cached_seg_size; +static Uint max_cached_seg_size; + +static Uint max_cache_size; +static Uint abs_max_cache_bad_fit; +static Uint rel_max_cache_bad_fit; + +#if CAN_PARTLY_DESTROY +static Uint min_seg_size; +#endif + +struct { + struct { + Uint watermark; + Uint no; + Uint sz; + } current; + struct { + Uint no; + Uint sz; + } max; + struct { + Uint no; + Uint sz; + } max_ever; +} segments; + +#define ERTS_MSEG_ALLOC_STAT(SZ) \ +do { \ + segments.current.no++; \ + if (segments.max.no < segments.current.no) \ + segments.max.no = segments.current.no; \ + if (segments.current.watermark < segments.current.no) \ + segments.current.watermark = segments.current.no; \ + segments.current.sz += (SZ); \ + if (segments.max.sz < segments.current.sz) \ + segments.max.sz = segments.current.sz; \ +} while (0) + +#define ERTS_MSEG_DEALLOC_STAT(SZ) \ +do { \ + ASSERT(segments.current.no > 0); \ + segments.current.no--; \ + ASSERT(segments.current.sz >= (SZ)); \ + segments.current.sz -= (SZ); \ +} while (0) + +#define ERTS_MSEG_REALLOC_STAT(OSZ, NSZ) \ +do { \ + ASSERT(segments.current.sz >= (OSZ)); \ + segments.current.sz -= (OSZ); \ + segments.current.sz += (NSZ); \ +} while (0) + +#define ONE_GIGA (1000000000) + +#define ZERO_CC(CC) (calls.CC.no = 0, calls.CC.giga_no = 0) + +#define INC_CC(CC) (calls.CC.no == ONE_GIGA - 1 \ + ? (calls.CC.giga_no++, calls.CC.no = 0) \ + : calls.CC.no++) + +#define DEC_CC(CC) (calls.CC.no == 0 \ + ? (calls.CC.giga_no--, \ + calls.CC.no = ONE_GIGA - 1) \ + : calls.CC.no--) + + +static erts_mtx_t mseg_mutex; /* Also needed when !USE_THREADS */ +static erts_mtx_t init_atoms_mutex; /* Also needed when !USE_THREADS */ + +#ifdef USE_THREADS +#ifdef ERTS_THREADS_NO_SMP +static erts_tid_t main_tid; +static int async_handle = -1; +#endif + +static void thread_safe_init(void) +{ + erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms"); + erts_mtx_init(&mseg_mutex, "mseg"); +#ifdef ERTS_THREADS_NO_SMP + main_tid = erts_thr_self(); +#endif +} + +#endif + +static ErlTimer cache_check_timer; + +static ERTS_INLINE void +schedule_cache_check(void) +{ + if (!is_cache_check_scheduled && is_init_done) { +#ifdef ERTS_THREADS_NO_SMP + if (!erts_equal_tids(erts_thr_self(), main_tid)) { + if (!is_cache_check_requested) { + is_cache_check_requested = 1; + sys_async_ready(async_handle); + } + } + else +#endif + { + cache_check_timer.active = 0; + erl_set_timer(&cache_check_timer, + check_cache, + NULL, + NULL, + cache_check_interval); + is_cache_check_scheduled = 1; +#ifdef ERTS_THREADS_NO_SMP + is_cache_check_requested = 0; +#endif + } + } +} + +#ifdef ERTS_THREADS_NO_SMP + +static void +check_schedule_cache_check(void) +{ + erts_mtx_lock(&mseg_mutex); + if (is_cache_check_requested + && !is_cache_check_scheduled) { + schedule_cache_check(); + } + erts_mtx_unlock(&mseg_mutex); +} + +#endif + +static void +mseg_shutdown(void) +{ +#ifdef ERTS_SMP + erts_mtx_lock(&mseg_mutex); +#endif + mseg_clear_cache(); +#ifdef ERTS_SMP + erts_mtx_unlock(&mseg_mutex); +#endif +} + +static ERTS_INLINE void * +mseg_create(Uint size) +{ + void *seg; + + ASSERT(size % page_size == 0); + +#if defined(ERTS_MSEG_FAKE_SEGMENTS) + seg = erts_sys_alloc(ERTS_ALC_N_INVALID, NULL, size); +#elif HAVE_MMAP + seg = (void *) mmap((void *) 0, (size_t) size, + MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0); + if (seg == (void *) MAP_FAILED) + seg = NULL; +#else +#error "Missing mseg_create() implementation" +#endif + + INC_CC(create); + + return seg; +} + +static ERTS_INLINE void +mseg_destroy(void *seg, Uint size) +{ +#if defined(ERTS_MSEG_FAKE_SEGMENTS) + erts_sys_free(ERTS_ALC_N_INVALID, NULL, seg); +#elif HAVE_MMAP + +#ifdef DEBUG + int res = +#endif + + munmap((void *) seg, size); + + ASSERT(size % page_size == 0); + ASSERT(res == 0); +#else +#error "Missing mseg_destroy() implementation" +#endif + + INC_CC(destroy); + +} + +#if HAVE_MSEG_RECREATE + +static ERTS_INLINE void * +mseg_recreate(void *old_seg, Uint old_size, Uint new_size) +{ + void *new_seg; + + ASSERT(old_size % page_size == 0); + ASSERT(new_size % page_size == 0); + +#if defined(ERTS_MSEG_FAKE_SEGMENTS) + new_seg = erts_sys_realloc(ERTS_ALC_N_INVALID, NULL, old_seg, new_size); +#elif HAVE_MREMAP + new_seg = (void *) mremap((void *) old_seg, + (size_t) old_size, + (size_t) new_size, + MREMAP_MAYMOVE); + if (new_seg == (void *) MAP_FAILED) + new_seg = NULL; +#else +#error "Missing mseg_recreate() implementation" +#endif + + INC_CC(recreate); + + return new_seg; +} + +#endif /* #if HAVE_MSEG_RECREATE */ + + +static ERTS_INLINE cache_desc_t * +alloc_cd(void) +{ + cache_desc_t *cd = free_cache_descs; + if (cd) + free_cache_descs = cd->next; + return cd; +} + +static ERTS_INLINE void +free_cd(cache_desc_t *cd) +{ + cd->next = free_cache_descs; + free_cache_descs = cd; +} + + +static ERTS_INLINE void +link_cd(cache_desc_t *cd) +{ + if (cache) + cache->prev = cd; + cd->next = cache; + cd->prev = NULL; + cache = cd; + + if (!cache_end) { + ASSERT(!cd->next); + cache_end = cd; + } + + cache_size++; +} + +static ERTS_INLINE void +end_link_cd(cache_desc_t *cd) +{ + if (cache_end) + cache_end->next = cd; + cd->next = NULL; + cd->prev = cache_end; + cache_end = cd; + + if (!cache) { + ASSERT(!cd->prev); + cache = cd; + } + + cache_size++; +} + +static ERTS_INLINE void +unlink_cd(cache_desc_t *cd) +{ + + if (cd->next) + cd->next->prev = cd->prev; + else + cache_end = cd->prev; + + if (cd->prev) + cd->prev->next = cd->next; + else + cache = cd->next; + ASSERT(cache_size > 0); + cache_size--; +} + +static ERTS_INLINE void +check_cache_limits(void) +{ + cache_desc_t *cd; + max_cached_seg_size = 0; + min_cached_seg_size = ~((Uint) 0); + for (cd = cache; cd; cd = cd->next) { + if (cd->size < min_cached_seg_size) + min_cached_seg_size = cd->size; + if (cd->size > max_cached_seg_size) + max_cached_seg_size = cd->size; + } + +} + +static ERTS_INLINE void +adjust_cache_size(int force_check_limits) +{ + cache_desc_t *cd; + int check_limits = force_check_limits; + Sint max_cached = ((Sint) segments.current.watermark + - (Sint) segments.current.no); + + while (((Sint) cache_size) > max_cached && ((Sint) cache_size) > 0) { + ASSERT(cache_end); + cd = cache_end; + if (!check_limits && + !(min_cached_seg_size < cd->size + && cd->size < max_cached_seg_size)) { + check_limits = 1; + } + if (erts_mtrace_enabled) + erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg); + mseg_destroy(cd->seg, cd->size); + unlink_cd(cd); + free_cd(cd); + } + + if (check_limits) + check_cache_limits(); + +} + +static void +check_cache(void *unused) +{ +#ifdef ERTS_SMP + erts_mtx_lock(&mseg_mutex); +#endif + + is_cache_check_scheduled = 0; + + if (segments.current.watermark > segments.current.no) + segments.current.watermark--; + adjust_cache_size(0); + + if (cache_size) + schedule_cache_check(); + + INC_CC(check_cache); + +#ifdef ERTS_SMP + erts_mtx_unlock(&mseg_mutex); +#endif + +} + +static void +mseg_clear_cache(void) +{ + segments.current.watermark = 0; + + adjust_cache_size(1); + + ASSERT(!cache); + ASSERT(!cache_end); + ASSERT(!cache_size); + + segments.current.watermark = segments.current.no; + + INC_CC(clear_cache); +} + +static void * +mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt) +{ + + Uint max, min, diff_size, size; + cache_desc_t *cd, *cand_cd; + void *seg; + + INC_CC(alloc); + + size = PAGE_CEILING(*size_p); + +#if CAN_PARTLY_DESTROY + if (size < min_seg_size) + min_seg_size = size; +#endif + + if (!opt->cache) { + create_seg: + adjust_cache_size(0); + seg = mseg_create(size); + if (!seg) { + mseg_clear_cache(); + seg = mseg_create(size); + if (!seg) + size = 0; + } + + *size_p = size; + if (seg) { + if (erts_mtrace_enabled) + erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size); + ERTS_MSEG_ALLOC_STAT(size); + } + return seg; + } + + if (size > max_cached_seg_size) + goto create_seg; + + if (size < min_cached_seg_size) { + + diff_size = min_cached_seg_size - size; + + if (diff_size > abs_max_cache_bad_fit) + goto create_seg; + + if (100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size)) + goto create_seg; + + } + + max = 0; + min = ~((Uint) 0); + cand_cd = NULL; + + for (cd = cache; cd; cd = cd->next) { + if (cd->size >= size) { + if (!cand_cd) { + cand_cd = cd; + continue; + } + else if (cd->size < cand_cd->size) { + if (max < cand_cd->size) + max = cand_cd->size; + if (min > cand_cd->size) + min = cand_cd->size; + cand_cd = cd; + continue; + } + } + if (max < cd->size) + max = cd->size; + if (min > cd->size) + min = cd->size; + } + + min_cached_seg_size = min; + max_cached_seg_size = max; + + if (!cand_cd) + goto create_seg; + + diff_size = cand_cd->size - size; + + if (diff_size > abs_max_cache_bad_fit + || 100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size)) { + if (max_cached_seg_size < cand_cd->size) + max_cached_seg_size = cand_cd->size; + if (min_cached_seg_size > cand_cd->size) + min_cached_seg_size = cand_cd->size; + goto create_seg; + } + + cache_hits++; + + size = cand_cd->size; + seg = cand_cd->seg; + + unlink_cd(cand_cd); + free_cd(cand_cd); + + *size_p = size; + + if (erts_mtrace_enabled) { + erts_mtrace_crr_free(SEGTYPE, SEGTYPE, seg); + erts_mtrace_crr_alloc(seg, atype, SEGTYPE, size); + } + + if (seg) + ERTS_MSEG_ALLOC_STAT(size); + return seg; +} + + +static void +mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size, + const ErtsMsegOpt_t *opt) +{ + cache_desc_t *cd; + + ERTS_MSEG_DEALLOC_STAT(size); + + if (!opt->cache || max_cache_size == 0) { + if (erts_mtrace_enabled) + erts_mtrace_crr_free(atype, SEGTYPE, seg); + mseg_destroy(seg, size); + } + else { + int check_limits = 0; + + if (size < min_cached_seg_size) + min_cached_seg_size = size; + if (size > max_cached_seg_size) + max_cached_seg_size = size; + + if (!free_cache_descs) { + cd = cache_end; + if (!(min_cached_seg_size < cd->size + && cd->size < max_cached_seg_size)) { + check_limits = 1; + } + if (erts_mtrace_enabled) + erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg); + mseg_destroy(cd->seg, cd->size); + unlink_cd(cd); + free_cd(cd); + } + + cd = alloc_cd(); + ASSERT(cd); + cd->seg = seg; + cd->size = size; + link_cd(cd); + + if (erts_mtrace_enabled) { + erts_mtrace_crr_free(atype, SEGTYPE, seg); + erts_mtrace_crr_alloc(seg, SEGTYPE, SEGTYPE, size); + } + + /* ASSERT(segments.current.watermark >= segments.current.no + cache_size); */ + + if (check_limits) + check_cache_limits(); + + schedule_cache_check(); + + } + + INC_CC(dealloc); +} + +static void * +mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p, + const ErtsMsegOpt_t *opt) +{ + void *new_seg; + Uint new_size; + + if (!seg || !old_size) { + new_seg = mseg_alloc(atype, new_size_p, opt); + DEC_CC(alloc); + return new_seg; + } + + if (!(*new_size_p)) { + mseg_dealloc(atype, seg, old_size, opt); + DEC_CC(dealloc); + return NULL; + } + + new_seg = seg; + new_size = PAGE_CEILING(*new_size_p); + + if (new_size == old_size) + ; + else if (new_size < old_size) { + Uint shrink_sz = old_size - new_size; + +#if CAN_PARTLY_DESTROY + if (new_size < min_seg_size) + min_seg_size = new_size; +#endif + + if (shrink_sz < opt->abs_shrink_th + && 100*PAGES(shrink_sz) < opt->rel_shrink_th*PAGES(old_size)) { + new_size = old_size; + } + else { + +#if CAN_PARTLY_DESTROY + + if (shrink_sz > min_seg_size + && free_cache_descs + && opt->cache) { + cache_desc_t *cd; + + cd = alloc_cd(); + ASSERT(cd); + cd->seg = ((char *) seg) + new_size; + cd->size = shrink_sz; + end_link_cd(cd); + + if (erts_mtrace_enabled) { + erts_mtrace_crr_realloc(new_seg, + atype, + SEGTYPE, + seg, + new_size); + erts_mtrace_crr_alloc(cd->seg, SEGTYPE, SEGTYPE, cd->size); + } + schedule_cache_check(); + } + else { + if (erts_mtrace_enabled) + erts_mtrace_crr_realloc(new_seg, + atype, + SEGTYPE, + seg, + new_size); + mseg_destroy(((char *) seg) + new_size, shrink_sz); + } + +#elif HAVE_MSEG_RECREATE + + goto do_recreate; + +#else + + new_seg = mseg_alloc(atype, &new_size, opt); + if (!new_seg) + new_size = old_size; + else { + sys_memcpy(((char *) new_seg), + ((char *) seg), + MIN(new_size, old_size)); + mseg_dealloc(atype, seg, old_size, opt); + } + +#endif + + } + } + else { + + if (!opt->preserv) { + mseg_dealloc(atype, seg, old_size, opt); + new_seg = mseg_alloc(atype, &new_size, opt); + } + else { +#if HAVE_MSEG_RECREATE +#if !CAN_PARTLY_DESTROY + do_recreate: +#endif + new_seg = mseg_recreate((void *) seg, old_size, new_size); + if (erts_mtrace_enabled) + erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); + if (!new_seg) + new_size = old_size; +#else + new_seg = mseg_alloc(atype, &new_size, opt); + if (!new_seg) + new_size = old_size; + else { + sys_memcpy(((char *) new_seg), + ((char *) seg), + MIN(new_size, old_size)); + mseg_dealloc(atype, seg, old_size, opt); + } +#endif + } + } + + INC_CC(realloc); + + *new_size_p = new_size; + + ERTS_MSEG_REALLOC_STAT(old_size, new_size); + + return new_seg; +} + +/* --- Info stuff ---------------------------------------------------------- */ + +static struct { + Eterm version; + + Eterm options; + Eterm amcbf; + Eterm rmcbf; + Eterm mcs; + Eterm cci; + + Eterm status; + Eterm cached_segments; + Eterm cache_hits; + Eterm segments; + Eterm segments_size; + Eterm segments_watermark; + + + Eterm calls; + Eterm mseg_alloc; + Eterm mseg_dealloc; + Eterm mseg_realloc; + Eterm mseg_create; + Eterm mseg_destroy; +#if HAVE_MSEG_RECREATE + Eterm mseg_recreate; +#endif + Eterm mseg_clear_cache; + Eterm mseg_check_cache; + +#ifdef DEBUG + Eterm end_of_atoms; +#endif +} am; + +static void ERTS_INLINE atom_init(Eterm *atom, char *name) +{ + *atom = am_atom_put(name, strlen(name)); +} +#define AM_INIT(AM) atom_init(&am.AM, #AM) + +static void +init_atoms(void) +{ +#ifdef DEBUG + Eterm *atom; +#endif + + erts_mtx_unlock(&mseg_mutex); + erts_mtx_lock(&init_atoms_mutex); + + if (!atoms_initialized) { +#ifdef DEBUG + for (atom = (Eterm *) &am; atom <= &am.end_of_atoms; atom++) { + *atom = THE_NON_VALUE; + } +#endif + + AM_INIT(version); + + AM_INIT(options); + AM_INIT(amcbf); + AM_INIT(rmcbf); + AM_INIT(mcs); + AM_INIT(cci); + + AM_INIT(status); + AM_INIT(cached_segments); + AM_INIT(cache_hits); + AM_INIT(segments); + AM_INIT(segments_size); + AM_INIT(segments_watermark); + + AM_INIT(calls); + AM_INIT(mseg_alloc); + AM_INIT(mseg_dealloc); + AM_INIT(mseg_realloc); + AM_INIT(mseg_create); + AM_INIT(mseg_destroy); +#if HAVE_MSEG_RECREATE + AM_INIT(mseg_recreate); +#endif + AM_INIT(mseg_clear_cache); + AM_INIT(mseg_check_cache); + +#ifdef DEBUG + for (atom = (Eterm *) &am; atom < &am.end_of_atoms; atom++) { + ASSERT(*atom != THE_NON_VALUE); + } +#endif + } + + erts_mtx_lock(&mseg_mutex); + atoms_initialized = 1; + erts_mtx_unlock(&init_atoms_mutex); +} + + +#define bld_uint erts_bld_uint +#define bld_cons erts_bld_cons +#define bld_tuple erts_bld_tuple +#define bld_string erts_bld_string +#define bld_2tup_list erts_bld_2tup_list + + +/* + * bld_unstable_uint() (instead of bld_uint()) is used when values may + * change between size check and actual build. This because a value + * that would fit a small when size check is done may need to be built + * as a big when the actual build is performed. Caller is required to + * HRelease after build. + */ +static ERTS_INLINE Eterm +bld_unstable_uint(Uint **hpp, Uint *szp, Uint ui) +{ + Eterm res = THE_NON_VALUE; + if (szp) + *szp += BIG_UINT_HEAP_SIZE; + if (hpp) { + if (IS_USMALL(0, ui)) + res = make_small(ui); + else { + res = uint_to_big(ui, *hpp); + *hpp += BIG_UINT_HEAP_SIZE; + } + } + return res; +} + +static ERTS_INLINE void +add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2) +{ + *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 2, el1, el2), *lp); +} + +static ERTS_INLINE void +add_3tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2, Eterm el3) +{ + *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 3, el1, el2, el3), *lp); +} + +static ERTS_INLINE void +add_4tup(Uint **hpp, Uint *szp, Eterm *lp, + Eterm el1, Eterm el2, Eterm el3, Eterm el4) +{ + *lp = bld_cons(hpp, szp, bld_tuple(hpp, szp, 4, el1, el2, el3, el4), *lp); +} + +static Eterm +info_options(char *prefix, + int *print_to_p, + void *print_to_arg, + Uint **hpp, + Uint *szp) +{ + Eterm res = THE_NON_VALUE; + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + erts_print(to, arg, "%samcbf: %bpu\n", prefix, abs_max_cache_bad_fit); + erts_print(to, arg, "%srmcbf: %bpu\n", prefix, rel_max_cache_bad_fit); + erts_print(to, arg, "%smcs: %bpu\n", prefix, max_cache_size); + erts_print(to, arg, "%scci: %bpu\n", prefix, cache_check_interval); + } + + if (hpp || szp) { + + if (!atoms_initialized) + init_atoms(); + + res = NIL; + add_2tup(hpp, szp, &res, + am.cci, + bld_uint(hpp, szp, cache_check_interval)); + add_2tup(hpp, szp, &res, + am.mcs, + bld_uint(hpp, szp, max_cache_size)); + add_2tup(hpp, szp, &res, + am.rmcbf, + bld_uint(hpp, szp, rel_max_cache_bad_fit)); + add_2tup(hpp, szp, &res, + am.amcbf, + bld_uint(hpp, szp, abs_max_cache_bad_fit)); + + } + + return res; +} + +static Eterm +info_calls(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) +{ + Eterm res = THE_NON_VALUE; + + if (print_to_p) { + +#define PRINT_CC(TO, TOA, CC) \ + if (calls.CC.giga_no == 0) \ + erts_print(TO, TOA, "mseg_%s calls: %bpu\n", #CC, calls.CC.no); \ + else \ + erts_print(TO, TOA, "mseg_%s calls: %bpu%09bpu\n", #CC, \ + calls.CC.giga_no, calls.CC.no) + + int to = *print_to_p; + void *arg = print_to_arg; + + PRINT_CC(to, arg, alloc); + PRINT_CC(to, arg, dealloc); + PRINT_CC(to, arg, realloc); + PRINT_CC(to, arg, create); + PRINT_CC(to, arg, destroy); +#if HAVE_MSEG_RECREATE + PRINT_CC(to, arg, recreate); +#endif + PRINT_CC(to, arg, clear_cache); + PRINT_CC(to, arg, check_cache); + +#undef PRINT_CC + + } + + if (hpp || szp) { + + res = NIL; + + add_3tup(hpp, szp, &res, + am.mseg_check_cache, + bld_unstable_uint(hpp, szp, calls.check_cache.giga_no), + bld_unstable_uint(hpp, szp, calls.check_cache.no)); + add_3tup(hpp, szp, &res, + am.mseg_clear_cache, + bld_unstable_uint(hpp, szp, calls.clear_cache.giga_no), + bld_unstable_uint(hpp, szp, calls.clear_cache.no)); + +#if HAVE_MSEG_RECREATE + add_3tup(hpp, szp, &res, + am.mseg_recreate, + bld_unstable_uint(hpp, szp, calls.recreate.giga_no), + bld_unstable_uint(hpp, szp, calls.recreate.no)); +#endif + add_3tup(hpp, szp, &res, + am.mseg_destroy, + bld_unstable_uint(hpp, szp, calls.destroy.giga_no), + bld_unstable_uint(hpp, szp, calls.destroy.no)); + add_3tup(hpp, szp, &res, + am.mseg_create, + bld_unstable_uint(hpp, szp, calls.create.giga_no), + bld_unstable_uint(hpp, szp, calls.create.no)); + + + add_3tup(hpp, szp, &res, + am.mseg_realloc, + bld_unstable_uint(hpp, szp, calls.realloc.giga_no), + bld_unstable_uint(hpp, szp, calls.realloc.no)); + add_3tup(hpp, szp, &res, + am.mseg_dealloc, + bld_unstable_uint(hpp, szp, calls.dealloc.giga_no), + bld_unstable_uint(hpp, szp, calls.dealloc.no)); + add_3tup(hpp, szp, &res, + am.mseg_alloc, + bld_unstable_uint(hpp, szp, calls.alloc.giga_no), + bld_unstable_uint(hpp, szp, calls.alloc.no)); + } + + return res; +} + +static Eterm +info_status(int *print_to_p, + void *print_to_arg, + int begin_new_max_period, + Uint **hpp, + Uint *szp) +{ + Eterm res = THE_NON_VALUE; + + if (segments.max_ever.no < segments.max.no) + segments.max_ever.no = segments.max.no; + if (segments.max_ever.sz < segments.max.sz) + segments.max_ever.sz = segments.max.sz; + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + + erts_print(to, arg, "cached_segments: %bpu\n", cache_size); + erts_print(to, arg, "cache_hits: %bpu\n", cache_hits); + erts_print(to, arg, "segments: %bpu %bpu %bpu\n", + segments.current.no, segments.max.no, segments.max_ever.no); + erts_print(to, arg, "segments_size: %bpu %bpu %bpu\n", + segments.current.sz, segments.max.sz, segments.max_ever.sz); + erts_print(to, arg, "segments_watermark: %bpu\n", + segments.current.watermark); + } + + if (hpp || szp) { + res = NIL; + add_2tup(hpp, szp, &res, + am.segments_watermark, + bld_unstable_uint(hpp, szp, segments.current.watermark)); + add_4tup(hpp, szp, &res, + am.segments_size, + bld_unstable_uint(hpp, szp, segments.current.sz), + bld_unstable_uint(hpp, szp, segments.max.sz), + bld_unstable_uint(hpp, szp, segments.max_ever.sz)); + add_4tup(hpp, szp, &res, + am.segments, + bld_unstable_uint(hpp, szp, segments.current.no), + bld_unstable_uint(hpp, szp, segments.max.no), + bld_unstable_uint(hpp, szp, segments.max_ever.no)); + add_2tup(hpp, szp, &res, + am.cache_hits, + bld_unstable_uint(hpp, szp, cache_hits)); + add_2tup(hpp, szp, &res, + am.cached_segments, + bld_unstable_uint(hpp, szp, cache_size)); + + } + + if (begin_new_max_period) { + segments.max.no = segments.current.no; + segments.max.sz = segments.current.sz; + } + + return res; +} + +static Eterm +info_version(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) +{ + Eterm res = THE_NON_VALUE; + + if (print_to_p) { + erts_print(*print_to_p, print_to_arg, "version: %s\n", + ERTS_MSEG_VSN_STR); + } + + if (hpp || szp) { + res = bld_string(hpp, szp, ERTS_MSEG_VSN_STR); + } + + return res; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * Exported functions * +\* */ + +Eterm +erts_mseg_info_options(int *print_to_p, void *print_to_arg, + Uint **hpp, Uint *szp) +{ + Eterm res; + + erts_mtx_lock(&mseg_mutex); + + res = info_options("option ", print_to_p, print_to_arg, hpp, szp); + + erts_mtx_unlock(&mseg_mutex); + + return res; +} + +Eterm +erts_mseg_info(int *print_to_p, + void *print_to_arg, + int begin_max_per, + Uint **hpp, + Uint *szp) +{ + Eterm res = THE_NON_VALUE; + Eterm atoms[4]; + Eterm values[4]; + + erts_mtx_lock(&mseg_mutex); + + if (hpp || szp) { + + if (!atoms_initialized) + init_atoms(); + + atoms[0] = am.version; + atoms[1] = am.options; + atoms[2] = am.status; + atoms[3] = am.calls; + } + + values[0] = info_version(print_to_p, print_to_arg, hpp, szp); + values[1] = info_options("option ", print_to_p, print_to_arg, hpp, szp); + values[2] = info_status(print_to_p, print_to_arg, begin_max_per, hpp, szp); + values[3] = info_calls(print_to_p, print_to_arg, hpp, szp); + + if (hpp || szp) + res = bld_2tup_list(hpp, szp, 4, atoms, values); + + erts_mtx_unlock(&mseg_mutex); + + return res; +} + +void * +erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt) +{ + void *seg; + erts_mtx_lock(&mseg_mutex); + seg = mseg_alloc(atype, size_p, opt); + erts_mtx_unlock(&mseg_mutex); + return seg; +} + +void * +erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p) +{ + return erts_mseg_alloc_opt(atype, size_p, &default_opt); +} + +void +erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, Uint size, + const ErtsMsegOpt_t *opt) +{ + erts_mtx_lock(&mseg_mutex); + mseg_dealloc(atype, seg, size, opt); + erts_mtx_unlock(&mseg_mutex); +} + +void +erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size) +{ + erts_mseg_dealloc_opt(atype, seg, size, &default_opt); +} + +void * +erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg, Uint old_size, + Uint *new_size_p, const ErtsMsegOpt_t *opt) +{ + void *new_seg; + erts_mtx_lock(&mseg_mutex); + new_seg = mseg_realloc(atype, seg, old_size, new_size_p, opt); + erts_mtx_unlock(&mseg_mutex); + return new_seg; +} + +void * +erts_mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, + Uint *new_size_p) +{ + return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p, &default_opt); +} + +void +erts_mseg_clear_cache(void) +{ + erts_mtx_lock(&mseg_mutex); + mseg_clear_cache(); + erts_mtx_unlock(&mseg_mutex); +} + +Uint +erts_mseg_no(void) +{ + Uint n; + erts_mtx_lock(&mseg_mutex); + n = segments.current.no; + erts_mtx_unlock(&mseg_mutex); + return n; +} + +Uint +erts_mseg_unit_size(void) +{ + return page_size; +} + +void +erts_mseg_init(ErtsMsegInit_t *init) +{ + unsigned i; + + atoms_initialized = 0; + is_init_done = 0; + + /* Options ... */ + + abs_max_cache_bad_fit = init->amcbf; + rel_max_cache_bad_fit = init->rmcbf; + max_cache_size = init->mcs; + cache_check_interval = init->cci; + + /* */ + +#ifdef USE_THREADS + thread_safe_init(); +#endif + +#if HAVE_MMAP && !defined(MAP_ANON) + mmap_fd = open("/dev/zero", O_RDWR); + if (mmap_fd < 0) + erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n"); +#endif + + page_size = GET_PAGE_SIZE; + + page_shift = 1; + while ((page_size >> page_shift) != 1) { + if ((page_size & (1 << (page_shift - 1))) != 0) + erl_exit(ERTS_ABORT_EXIT, + "erts_mseg: Unexpected page_size %bpu\n", page_size); + page_shift++; + } + + sys_memzero((void *) &calls, sizeof(calls)); + +#if CAN_PARTLY_DESTROY + min_seg_size = ~((Uint) 0); +#endif + + cache = NULL; + cache_end = NULL; + cache_hits = 0; + max_cached_seg_size = 0; + min_cached_seg_size = ~((Uint) 0); + cache_size = 0; + + is_cache_check_scheduled = 0; +#ifdef ERTS_THREADS_NO_SMP + is_cache_check_requested = 0; +#endif + + if (max_cache_size > MAX_CACHE_SIZE) + max_cache_size = MAX_CACHE_SIZE; + + if (max_cache_size > 0) { + for (i = 0; i < max_cache_size - 1; i++) + cache_descs[i].next = &cache_descs[i + 1]; + cache_descs[max_cache_size - 1].next = NULL; + free_cache_descs = &cache_descs[0]; + } + else + free_cache_descs = NULL; + + segments.current.watermark = 0; + segments.current.no = 0; + segments.current.sz = 0; + segments.max.no = 0; + segments.max.sz = 0; + segments.max_ever.no = 0; + segments.max_ever.sz = 0; +} + + +/* + * erts_mseg_late_init() have to be called after all allocators, + * threads and timers have been initialized. + */ +void +erts_mseg_late_init(void) +{ +#ifdef ERTS_THREADS_NO_SMP + int handle = + erts_register_async_ready_callback( + check_schedule_cache_check); +#endif + erts_mtx_lock(&mseg_mutex); + is_init_done = 1; +#ifdef ERTS_THREADS_NO_SMP + async_handle = handle; +#endif + if (cache_size) + schedule_cache_check(); + erts_mtx_unlock(&mseg_mutex); +} + +void +erts_mseg_exit(void) +{ + mseg_shutdown(); +} + +#endif /* #if HAVE_ERTS_MSEG */ + +unsigned long +erts_mseg_test(unsigned long op, + unsigned long a1, + unsigned long a2, + unsigned long a3) +{ + switch (op) { +#if HAVE_ERTS_MSEG + case 0x400: /* Have erts_mseg */ + return (unsigned long) 1; + case 0x401: + return (unsigned long) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1); + case 0x402: + erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2); + return (unsigned long) 0; + case 0x403: + return (unsigned long) erts_mseg_realloc(ERTS_ALC_A_INVALID, + (void *) a1, + (Uint) a2, + (Uint *) a3); + case 0x404: + erts_mseg_clear_cache(); + return (unsigned long) 0; + case 0x405: + return (unsigned long) erts_mseg_no(); + case 0x406: { + unsigned long res; + erts_mtx_lock(&mseg_mutex); + res = (unsigned long) cache_size; + erts_mtx_unlock(&mseg_mutex); + return res; + } +#else /* #if HAVE_ERTS_MSEG */ + case 0x400: /* Have erts_mseg */ + return (unsigned long) 0; +#endif /* #if HAVE_ERTS_MSEG */ + default: ASSERT(0); return ~((unsigned long) 0); + } + +} + + diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h new file mode 100644 index 0000000000..1c5aa63e90 --- /dev/null +++ b/erts/emulator/sys/common/erl_mseg.h @@ -0,0 +1,97 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifndef ERL_MSEG_H_ +#define ERL_MSEG_H_ + +#include "sys.h" +#include "erl_alloc_types.h" + +#ifndef HAVE_MMAP +# define HAVE_MMAP 0 +#endif +#ifndef HAVE_MREMAP +# define HAVE_MREMAP 0 +#endif + +#if HAVE_MMAP +# define HAVE_ERTS_MSEG 1 +#else +# define HAVE_ERTS_MSEG 0 +#endif + +#if HAVE_ERTS_MSEG + +#define ERTS_MSEG_VSN_STR "0.9" + +typedef struct { + Uint amcbf; + Uint rmcbf; + Uint mcs; + Uint cci; +} ErtsMsegInit_t; + +#define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \ +{ \ + 4*1024*1024, /* amcbf: Absolute max cache bad fit */ \ + 20, /* rmcbf: Relative max cache bad fit */ \ + 5, /* mcs: Max cache size */ \ + 1000 /* cci: Cache check interval */ \ +} + +typedef struct { + int cache; + int preserv; + Uint abs_shrink_th; + Uint rel_shrink_th; +} ErtsMsegOpt_t; + +#define ERTS_MSEG_DEFAULT_OPT_INITIALIZER \ +{ \ + 1, /* Use cache */ \ + 1, /* Preserv data */ \ + 0, /* Absolute shrink threshold */ \ + 0 /* Relative shrink threshold */ \ +} + +void *erts_mseg_alloc(ErtsAlcType_t, Uint *); +void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, const ErtsMsegOpt_t *); +void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint); +void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, const ErtsMsegOpt_t *); +void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *); +void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *, + const ErtsMsegOpt_t *); +void erts_mseg_clear_cache(void); +Uint erts_mseg_no(void); +Uint erts_mseg_unit_size(void); +void erts_mseg_init(ErtsMsegInit_t *init); +void erts_mseg_late_init(void); /* Have to be called after all allocators, + threads and timers have been initialized. */ +void erts_mseg_exit(void); +Eterm erts_mseg_info_options(int *, void*, Uint **, Uint *); +Eterm erts_mseg_info(int *, void*, int, Uint **, Uint *); + +#endif /* #if HAVE_ERTS_MSEG */ + +unsigned long erts_mseg_test(unsigned long, + unsigned long, + unsigned long, + unsigned long); + +#endif /* #ifndef ERL_MSEG_H_ */ diff --git a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c new file mode 100644 index 0000000000..408aa7e016 --- /dev/null +++ b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c @@ -0,0 +1,245 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2004-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "sys.h" +#include "erl_mtrace.h" + +#ifdef ERTS_CAN_TRACK_MALLOC +#if defined(HAVE_END_SYMBOL) +extern char end; +#elif defined(HAVE__END_SYMBOL) +extern char _end; +#endif + +static int inited = 0; +static int init(void); + +static volatile char *heap_start = NULL; +static volatile char *heap_end = NULL; + +#if defined(ERTS___AFTER_MORECORE_HOOK_CAN_TRACK_MALLOC) /* ----------------- */ + +#ifdef HAVE_MALLOC_H +# include +#endif + +#undef SBRK_0 +#define SBRK_0 sbrk(0) + +static void +init_hook(void) +{ + __after_morecore_hook = erts_mtrace_update_heap_size; + if (inited) + return; + heap_end = NULL; +#if defined(HAVE_END_SYMBOL) + heap_start = &end; +#elif defined(HAVE__END_SYMBOL) + heap_start = &_end; +#else + heap_start = SBRK_0; + if (heap_start == (SBRK_RET_TYPE) -1) { + heap_start = NULL; + return; + } +#endif + inited = 1; +} + +static int +init(void) +{ + init_hook(); + return inited; +} + +void (*__malloc_initialize_hook)(void) = init_hook; + +#elif defined(ERTS_BRK_WRAPPERS_CAN_TRACK_MALLOC) /* ------------------------ */ +#ifdef HAVE_DLFCN_H +# include +#endif + +#undef SBRK_0 +#define SBRK_0 (*real_sbrk)(0) + +#ifndef HAVE_SBRK +# error no sbrk() +#endif +#if !defined(HAVE_END_SYMBOL) && !defined(HAVE__END_SYMBOL) +# error no 'end' nor '_end' +#endif + +static void update_heap_size(char *new_end); + +#define SBRK_IMPL(RET_TYPE, FUNC, ARG_TYPE) \ +RET_TYPE FUNC (ARG_TYPE); \ +static RET_TYPE (*real_ ## FUNC)(ARG_TYPE) = NULL; \ +RET_TYPE FUNC (ARG_TYPE arg) \ +{ \ + RET_TYPE res; \ + if (!inited && !init()) \ + return (RET_TYPE) -1; \ + res = (*real_ ## FUNC)(arg); \ + if (erts_mtrace_enabled && res != ((RET_TYPE) -1)) \ + update_heap_size((char *) (*real_ ## FUNC)(0)); \ + return res; \ +} + +#define BRK_IMPL(RET_TYPE, FUNC, ARG_TYPE) \ +RET_TYPE FUNC (ARG_TYPE); \ +static RET_TYPE (*real_ ## FUNC)(ARG_TYPE) = NULL; \ +RET_TYPE FUNC (ARG_TYPE arg) \ +{ \ + RET_TYPE res; \ + if (!inited && !init()) \ + return (RET_TYPE) -1; \ + res = (*real_ ## FUNC)(arg); \ + if (erts_mtrace_enabled && res != ((RET_TYPE) -1)) \ + update_heap_size((char *) arg); \ + return res; \ +} + +SBRK_IMPL(SBRK_RET_TYPE, sbrk, SBRK_ARG_TYPE) +#ifdef HAVE_BRK + BRK_IMPL(BRK_RET_TYPE, brk, BRK_ARG_TYPE) +#endif + +#ifdef HAVE__SBRK + SBRK_IMPL(SBRK_RET_TYPE, _sbrk, SBRK_ARG_TYPE) +#endif +#ifdef HAVE__BRK + BRK_IMPL(BRK_RET_TYPE, _brk, BRK_ARG_TYPE) +#endif + +#ifdef HAVE___SBRK + SBRK_IMPL(SBRK_RET_TYPE, __sbrk, SBRK_ARG_TYPE) +#endif +#ifdef HAVE___BRK + BRK_IMPL(BRK_RET_TYPE, __brk, BRK_ARG_TYPE) +#endif + +static int +init(void) +{ + if (inited) + return 1; + +#define INIT_XBRK_SYM(SYM) \ +do { \ + if (!real_ ## SYM) { \ + real_ ## SYM = dlsym(RTLD_NEXT, #SYM); \ + if (!real_ ## SYM) { \ + errno = ENOMEM; \ + return 0; \ + } \ + } \ +} while (0) + + heap_end = NULL; +#if defined(HAVE_END_SYMBOL) + heap_start = &end; +#elif defined(HAVE__END_SYMBOL) + heap_start = &_end; +#endif + + INIT_XBRK_SYM(sbrk); +#ifdef HAVE_BRK + INIT_XBRK_SYM(brk); +#endif +#ifdef HAVE__SBRK + INIT_XBRK_SYM(_sbrk); +#endif +#ifdef HAVE__BRK + INIT_XBRK_SYM(_brk); +#endif +#ifdef HAVE___SBRK + INIT_XBRK_SYM(__sbrk); +#endif +#ifdef HAVE___BRK + INIT_XBRK_SYM(__brk); +#endif + + return inited = 1; +#undef INIT_XBRK_SYM +} + +#endif /* #elif defined(ERTS_BRK_WRAPPERS_CAN_TRACK_MALLOC) */ /* ----------- */ + +static void +update_heap_size(char *new_end) +{ + volatile char *new_start, *old_start, *old_end; + Uint size; + + if (new_end == ((char *) -1)) + return; + + new_start = (old_start = heap_start); + old_end = heap_end; + heap_end = new_end; + if (new_end < old_start || !old_start) + heap_start = (new_start = new_end); + + size = (Uint) (new_end - new_start); + + if (!old_end) { + if (size) + erts_mtrace_crr_alloc((void *) new_start, + ERTS_ALC_A_SYSTEM, + ERTS_MTRACE_SEGMENT_ID, + size); + else + heap_end = NULL; + } + else { + if (old_end != new_end || old_start != new_start) { + + if (size) + erts_mtrace_crr_realloc((void *) new_start, + ERTS_ALC_A_SYSTEM, + ERTS_MTRACE_SEGMENT_ID, + (void *) old_start, + size); + else { + if (old_start) + erts_mtrace_crr_free(ERTS_ALC_A_SYSTEM, + ERTS_MTRACE_SEGMENT_ID, + (void *) old_start); + heap_end = NULL; + } + } + } +} + +#endif /* #ifdef ERTS_CAN_TRACK_MALLOC */ + +void +erts_mtrace_update_heap_size(void) +{ +#ifdef ERTS_CAN_TRACK_MALLOC + if (erts_mtrace_enabled && (inited || init())) + update_heap_size((char *) SBRK_0); +#endif +} + diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c new file mode 100644 index 0000000000..169d4579a2 --- /dev/null +++ b/erts/emulator/sys/common/erl_poll.c @@ -0,0 +1,2693 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Poll interface suitable for ERTS with or without + * SMP support. + * + * The interface is currently implemented using: + * - select + * - poll + * - /dev/poll + * - epoll with poll or select as fallback + * - kqueue with poll or select as fallback + * + * Some time in the future it will also be + * implemented using Solaris ports. + * + * + * + * Author: Rickard Green + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#ifndef WANT_NONBLOCKING +# define WANT_NONBLOCKING +#endif +#define ERTS_WANT_GOT_SIGUSR1 + +#include "erl_poll.h" +#if ERTS_POLL_USE_KQUEUE +# include +# include +# include +#endif +#if ERTS_POLL_USE_SELECT +# ifdef SYS_SELECT_H +# include +# endif +# ifdef VXWORKS +# include +# endif +#endif +#ifndef VXWORKS +# ifdef NO_SYSCONF +# if ERTS_POLL_USE_SELECT +# include +# else +# include +# endif +# endif +#endif +#include "erl_driver.h" +#include "erl_alloc.h" + +#if !defined(ERTS_POLL_USE_EPOLL) \ + && !defined(ERTS_POLL_USE_DEVPOLL) \ + && !defined(ERTS_POLL_USE_POLL) \ + && !defined(ERTS_POLL_USE_SELECT) +#error "Missing implementation of erts_poll()" +#endif + +#if defined(ERTS_KERNEL_POLL_VERSION) && !ERTS_POLL_USE_KERNEL_POLL +#error "Missing kernel poll implementation of erts_poll()" +#endif + +#if defined(ERTS_NO_KERNEL_POLL_VERSION) && ERTS_POLL_USE_KERNEL_POLL +#error "Kernel poll used when it shouldn't be used" +#endif + +#if 0 +#define ERTS_POLL_DEBUG_PRINT +#endif + +#if defined(DEBUG) && 0 +#define HARD_DEBUG +#endif + +#define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \ + || ERTS_POLL_USE_KQUEUE) +#define ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE \ + (defined(ERTS_SMP) || ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_POLL) + +#define ERTS_POLL_USE_CONCURRENT_UPDATE \ + (defined(ERTS_SMP) && ERTS_POLL_USE_EPOLL) + +#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) + +#define FDS_STATUS_EXTRA_FREE_SIZE 128 +#define POLL_FDS_EXTRA_FREE_SIZE 128 + +#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT +# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 1 +#else +# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 0 +#endif + +#define ERTS_POLL_USE_WAKEUP_PIPE \ + (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)) + +#ifdef ERTS_SMP + +#define ERTS_POLLSET_LOCK(PS) \ + erts_smp_mtx_lock(&(PS)->mtx) +#define ERTS_POLLSET_UNLOCK(PS) \ + erts_smp_mtx_unlock(&(PS)->mtx) + +#define ERTS_POLLSET_SET_POLLED_CHK(PS) \ + ((int) erts_smp_atomic_xchg(&(PS)->polled, (long) 1)) +#define ERTS_POLLSET_UNSET_POLLED(PS) \ + erts_smp_atomic_set(&(PS)->polled, (long) 0) +#define ERTS_POLLSET_IS_POLLED(PS) \ + ((int) erts_smp_atomic_read(&(PS)->polled)) + +#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) \ + ((int) erts_smp_atomic_xchg(&(PS)->woken, (long) 1)) +#define ERTS_POLLSET_SET_POLLER_WOKEN(PS) \ + erts_smp_atomic_set(&(PS)->woken, (long) 1) +#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS) \ + erts_smp_atomic_set(&(PS)->woken, (long) 0) +#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) \ + ((int) erts_smp_atomic_read(&(PS)->woken)) + +#else + +#define ERTS_POLLSET_LOCK(PS) +#define ERTS_POLLSET_UNLOCK(PS) +#define ERTS_POLLSET_SET_POLLED_CHK(PS) 0 +#define ERTS_POLLSET_UNSET_POLLED(PS) +#define ERTS_POLLSET_IS_POLLED(PS) 0 + +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT + +/* + * Ideally, the ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) operation would + * be atomic. This operation isn't, but we will do okay anyway. The + * "woken check" is only an optimization. The only requirement we have: + * If (PS)->woken is set to a value != 0 when interrupting, we have to + * write on the the wakeup pipe at least once. Multiple writes are okay. + */ +#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) ((PS)->woken++) +#define ERTS_POLLSET_SET_POLLER_WOKEN(PS) ((PS)->woken = 1, (void) 0) +#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS) ((PS)->woken = 0, (void) 0) +#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) ((PS)->woken) + +#else + +#define ERTS_POLLSET_SET_POLLER_WOKEN_CHK(PS) 1 +#define ERTS_POLLSET_SET_POLLER_WOKEN(PS) +#define ERTS_POLLSET_UNSET_POLLER_WOKEN(PS) +#define ERTS_POLLSET_IS_POLLER_WOKEN(PS) 1 + +#endif + +#endif + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \ + erts_smp_atomic_set(&(PS)->have_update_requests, (long) 1) +#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \ + erts_smp_atomic_set(&(PS)->have_update_requests, (long) 0) +#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \ + ((int) erts_smp_atomic_read(&(PS)->have_update_requests)) +#else +#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) +#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) +#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) 0 +#endif + +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP) + +#define ERTS_POLLSET_UNSET_INTERRUPTED_CHK(PS) unset_interrupted_chk((PS)) +#define ERTS_POLLSET_UNSET_INTERRUPTED(PS) ((PS)->interrupt = 0, (void) 0) +#define ERTS_POLLSET_SET_INTERRUPTED(PS) ((PS)->interrupt = 1, (void) 0) +#define ERTS_POLLSET_IS_INTERRUPTED(PS) ((PS)->interrupt) + +#else + +#define ERTS_POLLSET_UNSET_INTERRUPTED_CHK(PS) \ + ((int) erts_smp_atomic_xchg(&(PS)->interrupt, (long) 0)) +#define ERTS_POLLSET_UNSET_INTERRUPTED(PS) \ + erts_smp_atomic_set(&(PS)->interrupt, (long) 0) +#define ERTS_POLLSET_SET_INTERRUPTED(PS) \ + erts_smp_atomic_set(&(PS)->interrupt, (long) 1) +#define ERTS_POLLSET_IS_INTERRUPTED(PS) \ + ((int) erts_smp_atomic_read(&(PS)->interrupt)) + +#endif + +#if ERTS_POLL_USE_FALLBACK +# if ERTS_POLL_USE_POLL +# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_poll_fds > 1) +# elif ERTS_POLL_USE_SELECT +# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_select_fds > 1) +# endif +#endif +/* + * --- Data types ------------------------------------------------------------ + */ + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#define ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE 128 + +typedef struct ErtsPollSetUpdateRequestsBlock_ ErtsPollSetUpdateRequestsBlock; +struct ErtsPollSetUpdateRequestsBlock_ { + ErtsPollSetUpdateRequestsBlock *next; + int len; + int fds[ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE]; +}; + +#endif + + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +# define ERTS_POLL_FD_FLG_INURQ (((unsigned short) 1) << 0) +#endif +#if ERTS_POLL_USE_FALLBACK +# define ERTS_POLL_FD_FLG_INFLBCK (((unsigned short) 1) << 1) +# define ERTS_POLL_FD_FLG_USEFLBCK (((unsigned short) 1) << 2) +#endif +#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) +# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 3) +#endif +typedef struct { +#if ERTS_POLL_USE_POLL + int pix; +#endif + ErtsPollEvents used_events; + ErtsPollEvents events; +#if ERTS_POLL_COALESCE_KP_RES + unsigned short res_ev_ix; +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK + unsigned short flags; +#endif + +} ErtsFdStatus; + + +#if ERTS_POLL_COALESCE_KP_RES +/* res_ev_ix max value */ +#define ERTS_POLL_MAX_RES ((1 << sizeof(unsigned short)*8) - 1) +#endif + +#if ERTS_POLL_USE_KQUEUE + +#define ERTS_POLL_KQ_OP_HANDLED 1 +#define ERTS_POLL_KQ_OP_DEL_R 2 +#define ERTS_POLL_KQ_OP_DEL_W 3 +#define ERTS_POLL_KQ_OP_ADD_R 4 +#define ERTS_POLL_KQ_OP_ADD_W 5 +#define ERTS_POLL_KQ_OP_ADD2_R 6 +#define ERTS_POLL_KQ_OP_ADD2_W 7 + +#endif + +struct ErtsPollSet_ { + ErtsPollSet next; + int internal_fd_limit; + ErtsFdStatus *fds_status; + int no_of_user_fds; + int fds_status_len; +#if ERTS_POLL_USE_KERNEL_POLL + int kp_fd; + int res_events_len; +#if ERTS_POLL_USE_EPOLL + struct epoll_event *res_events; +#elif ERTS_POLL_USE_KQUEUE + struct kevent *res_events; +#elif ERTS_POLL_USE_DEVPOLL + struct pollfd *res_events; +#endif +#endif /* ERTS_POLL_USE_KERNEL_POLL */ +#if ERTS_POLL_USE_POLL + int next_poll_fds_ix; + int no_poll_fds; + int poll_fds_len; + struct pollfd*poll_fds; +#elif ERTS_POLL_USE_SELECT + int next_sel_fd; + int max_fd; +#if ERTS_POLL_USE_FALLBACK + int no_select_fds; +#endif + fd_set input_fds; + fd_set res_input_fds; + fd_set output_fds; + fd_set res_output_fds; +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + ErtsPollSetUpdateRequestsBlock update_requests; + ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; + erts_smp_atomic_t have_update_requests; +#endif +#ifdef ERTS_SMP + erts_smp_atomic_t polled; + erts_smp_atomic_t woken; + erts_smp_mtx_t mtx; +#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT + volatile int woken; +#endif +#if ERTS_POLL_USE_WAKEUP_PIPE + int wake_fds[2]; +#endif +#if ERTS_POLL_USE_FALLBACK + int fallback_used; +#endif +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP) + volatile int interrupt; +#else + erts_smp_atomic_t interrupt; +#endif + erts_smp_atomic_t timeout; +#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS + erts_smp_atomic_t no_avoided_wakeups; + erts_smp_atomic_t no_avoided_interrupts; + erts_smp_atomic_t no_interrupt_timed; +#endif +}; + +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP) + +static ERTS_INLINE int +unset_interrupted_chk(ErtsPollSet ps) +{ + /* This operation isn't atomic, but we have no need at all for an + atomic operation here... */ + int res = ps->interrupt; + ps->interrupt = 0; + return res; +} + +#endif + +static void fatal_error(char *format, ...); +static void fatal_error_async_signal_safe(char *error_str); + +static int max_fds = -1; +static ErtsPollSet pollsets; +static erts_smp_spinlock_t pollsets_lock; + +#if ERTS_POLL_USE_POLL + +static ERTS_INLINE short +ev2pollev(ErtsPollEvents ev) +{ +#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE + return ERTS_POLL_EV_E2N(ev); +#else /* Note, we only map events we are interested in */ + short res_ev = (short) 0; + if (ev & ERTS_POLL_EV_IN) + res_ev |= ERTS_POLL_EV_NKP_IN; + if (ev & ERTS_POLL_EV_OUT) + res_ev |= ERTS_POLL_EV_NKP_OUT; + return res_ev; +#endif +} + +static ERTS_INLINE ErtsPollEvents +pollev2ev(short ev) +{ +#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE + return ERTS_POLL_EV_N2E(ev); +#else /* Note, we only map events we are interested in */ + ErtsPollEvents res_ev = (ErtsPollEvents) 0; + if (ev & ERTS_POLL_EV_NKP_IN) + res_ev |= ERTS_POLL_EV_IN; + if (ev & ERTS_POLL_EV_NKP_OUT) + res_ev |= ERTS_POLL_EV_OUT; + if (ev & ERTS_POLL_EV_NKP_ERR) + res_ev |= ERTS_POLL_EV_ERR; + if (ev & ERTS_POLL_EV_NKP_NVAL) + res_ev |= ERTS_POLL_EV_NVAL; + return res_ev; +#endif +} + +#endif + +#ifdef HARD_DEBUG +static void check_poll_result(ErtsPollResFd pr[], int len); +#if ERTS_POLL_USE_DEVPOLL +static void check_poll_status(ErtsPollSet ps); +#endif /* ERTS_POLL_USE_DEVPOLL */ +#endif /* HARD_DEBUG */ +#ifdef ERTS_POLL_DEBUG_PRINT +static void print_misc_debug_info(void); +#endif + +/* + * --- Wakeup pipe ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_WAKEUP_PIPE + +static ERTS_INLINE void +wake_poller(ErtsPollSet ps) +{ + /* + * NOTE: This function might be called from signal handlers in the + * non-smp case; therefore, it has to be async-signal safe in + * the non-smp case. + */ + if (!ERTS_POLLSET_SET_POLLER_WOKEN_CHK(ps)) { + ssize_t res; + if (ps->wake_fds[1] < 0) + return; /* Not initialized yet */ + do { + /* write() is async-signal safe (according to posix) */ + res = write(ps->wake_fds[1], "!", 1); + } while (res < 0 && errno == EINTR); + if (res <= 0 && errno != ERRNO_BLOCK) { + fatal_error_async_signal_safe(__FILE__ + ":XXX:wake_poller(): " + "Failed to write on wakeup pipe\n"); + } + } +} + +static ERTS_INLINE void +cleanup_wakeup_pipe(ErtsPollSet ps) +{ + int fd = ps->wake_fds[0]; + int res; + do { + char buf[32]; + res = read(fd, buf, sizeof(buf)); + } while (res > 0 || (res < 0 && errno == EINTR)); + if (res < 0 && errno != ERRNO_BLOCK) { + fatal_error("%s:%d:cleanup_wakeup_pipe(): " + "Failed to read on wakeup pipe fd=%d: " + "%s (%d)\n", + __FILE__, __LINE__, + fd, + erl_errno_id(errno), errno); + } +} + +static void +create_wakeup_pipe(ErtsPollSet ps) +{ + int do_wake = 0; + int wake_fds[2]; + ps->wake_fds[0] = -1; + ps->wake_fds[1] = -1; + if (pipe(wake_fds) < 0) { + fatal_error("%s:%d:create_wakeup_pipe(): " + "Failed to create pipe: %s (%d)\n", + __FILE__, + __LINE__, + erl_errno_id(errno), + errno); + } + SET_NONBLOCKING(wake_fds[0]); + SET_NONBLOCKING(wake_fds[1]); + +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("wakeup fds = {%d, %d}\n", wake_fds[0], wake_fds[1]); +#endif + + ERTS_POLL_EXPORT(erts_poll_control)(ps, + wake_fds[0], + ERTS_POLL_EV_IN, + 1, &do_wake); +#if ERTS_POLL_USE_FALLBACK + /* We depend on the wakeup pipe being handled by kernel poll */ + if (ps->fds_status[wake_fds[0]].flags & ERTS_POLL_FD_FLG_INFLBCK) + fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", + __FILE__, __LINE__); +#endif + if (ps->internal_fd_limit <= wake_fds[1]) + ps->internal_fd_limit = wake_fds[1] + 1; + if (ps->internal_fd_limit <= wake_fds[0]) + ps->internal_fd_limit = wake_fds[0] + 1; + ps->wake_fds[0] = wake_fds[0]; + ps->wake_fds[1] = wake_fds[1]; +} + +#endif /* ERTS_POLL_USE_WAKEUP_PIPE */ + +/* + * --- Poll set update requests ---------------------------------------------- + */ +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + +static ERTS_INLINE void +enqueue_update_request(ErtsPollSet ps, int fd) +{ + ErtsPollSetUpdateRequestsBlock *urqbp; + + ASSERT(fd < ps->fds_status_len); + + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ) + return; + + if (ps->update_requests.len == 0) + ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(ps); + + urqbp = ps->curr_upd_req_block; + + if (urqbp->len == ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE) { + ASSERT(!urqbp->next); + urqbp = erts_alloc(ERTS_ALC_T_POLLSET_UPDREQ, + sizeof(ErtsPollSetUpdateRequestsBlock)); + ps->curr_upd_req_block->next = urqbp; + ps->curr_upd_req_block = urqbp; + urqbp->next = NULL; + urqbp->len = 0; + } + + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INURQ; + urqbp->fds[urqbp->len++] = fd; +} + +static ERTS_INLINE void +free_update_requests_block(ErtsPollSet ps, + ErtsPollSetUpdateRequestsBlock *urqbp) +{ + if (urqbp != &ps->update_requests) + erts_free(ERTS_ALC_T_POLLSET_UPDREQ, (void *) urqbp); + else { + urqbp->next = NULL; + urqbp->len = 0; + } +} + +#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ + +/* + * --- Growing poll set structures ------------------------------------------- + */ + +#if ERTS_POLL_USE_KERNEL_POLL +static void +grow_res_events(ErtsPollSet ps, int new_len) +{ + size_t new_size = sizeof( +#if ERTS_POLL_USE_EPOLL + struct epoll_event +#elif ERTS_POLL_USE_DEVPOLL + struct pollfd +#elif ERTS_POLL_USE_KQUEUE + struct kevent +#endif + )*new_len; + /* We do not need to save previously stored data */ + if (ps->res_events) + erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events); + ps->res_events = erts_alloc(ERTS_ALC_T_POLL_RES_EVS, new_size); + ps->res_events_len = new_len; +} +#endif /* ERTS_POLL_USE_KERNEL_POLL */ + +#if ERTS_POLL_USE_POLL +static void +grow_poll_fds(ErtsPollSet ps, int min_ix) +{ + int i; + int new_len = min_ix + 1 + POLL_FDS_EXTRA_FREE_SIZE; + if (new_len > max_fds) + new_len = max_fds; + ps->poll_fds = (ps->poll_fds_len + ? erts_realloc(ERTS_ALC_T_POLL_FDS, + ps->poll_fds, + sizeof(struct pollfd)*new_len) + : erts_alloc(ERTS_ALC_T_POLL_FDS, + sizeof(struct pollfd)*new_len)); + for (i = ps->poll_fds_len; i < new_len; i++) { + ps->poll_fds[i].fd = -1; + ps->poll_fds[i].events = (short) 0; + ps->poll_fds[i].revents = (short) 0; + } + ps->poll_fds_len = new_len; +} +#endif + +static void +grow_fds_status(ErtsPollSet ps, int min_fd) +{ + int i; + int new_len = min_fd + 1 + FDS_STATUS_EXTRA_FREE_SIZE; + ASSERT(min_fd < max_fds); + if (new_len > max_fds) + new_len = max_fds; + ps->fds_status = (ps->fds_status_len + ? erts_realloc(ERTS_ALC_T_FD_STATUS, + ps->fds_status, + sizeof(ErtsFdStatus)*new_len) + : erts_alloc(ERTS_ALC_T_FD_STATUS, + sizeof(ErtsFdStatus)*new_len)); + for (i = ps->fds_status_len; i < new_len; i++) { +#if ERTS_POLL_USE_POLL + ps->fds_status[i].pix = -1; +#endif + ps->fds_status[i].used_events = (ErtsPollEvents) 0; + ps->fds_status[i].events = (ErtsPollEvents) 0; +#if ERTS_POLL_COALESCE_KP_RES + ps->fds_status[i].res_ev_ix = (unsigned short) ERTS_POLL_MAX_RES; +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK + ps->fds_status[i].flags = (unsigned short) 0; +#endif + } + ps->fds_status_len = new_len; +} + +/* + * --- Selecting fd to poll on ----------------------------------------------- + */ + +#if ERTS_POLL_USE_FALLBACK +static int update_fallback_pollset(ErtsPollSet ps, int fd); +#endif + +static ERTS_INLINE int +need_update(ErtsPollSet ps, int fd) +{ +#if ERTS_POLL_USE_KERNEL_POLL + int reset; +#endif + + ASSERT(fd < ps->fds_status_len); + +#if ERTS_POLL_USE_KERNEL_POLL + reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST); + if (reset && !ps->fds_status[fd].used_events) { + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; + reset = 0; + } +#elif defined(ERTS_SMP) + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; +#endif + + if (ps->fds_status[fd].used_events != ps->fds_status[fd].events) + return 1; + +#if ERTS_POLL_USE_KERNEL_POLL + return reset; +#else + return 0; +#endif +} + +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + +#if ERTS_POLL_USE_KQUEUE +#define ERTS_POLL_MIN_BATCH_BUF_SIZE 128 +#else +#define ERTS_POLL_MIN_BATCH_BUF_SIZE 64 +#endif + +typedef struct { + int len; + int size; +#if ERTS_POLL_USE_DEVPOLL + struct pollfd *buf; +#elif ERTS_POLL_USE_KQUEUE + struct kevent *buf; + struct kevent *ebuf; +#endif +} ErtsPollBatchBuf; + + +static ERTS_INLINE void +setup_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) +{ + bbp->len = 0; +#if ERTS_POLL_USE_DEVPOLL + bbp->size = ps->res_events_len; + bbp->buf = ps->res_events; +#elif ERTS_POLL_USE_KQUEUE + bbp->size = ps->res_events_len/2; + bbp->buf = ps->res_events; + bbp->ebuf = bbp->buf + bbp->size; +#endif +} + + +#if ERTS_POLL_USE_DEVPOLL + +static void +write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) +{ + ssize_t wres; + char *buf = (char *) bbp->buf; + size_t buf_size = sizeof(struct pollfd)*bbp->len; + + while (1) { + wres = write(ps->kp_fd, (void *) buf, buf_size); + if (wres < 0) { + if (errno == EINTR) + continue; + fatal_error("%s:%d:write_batch_buf(): " + "Failed to write to /dev/poll: " + "%s (%d)\n", + __FILE__, __LINE__, + erl_errno_id(errno), errno); + } + buf_size -= wres; + if (buf_size <= 0) + break; + buf += wres; + } + + if (buf_size < 0) { + fatal_error("%s:%d:write_devpoll_buf(): Internal error\n", + __FILE__, __LINE__); + } + bbp->len = 0; +} + +#elif ERTS_POLL_USE_KQUEUE + +static void +write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) +{ + int res; + int len = bbp->len; + struct kevent *buf = bbp->buf; + struct timespec ts = {0, 0}; + + do { + res = kevent(ps->kp_fd, buf, len, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); + if (res < 0) { + int i; + struct kevent *ebuf = bbp->ebuf; + do { + res = kevent(ps->kp_fd, buf, len, ebuf, len, &ts); + } while (res < 0 && errno == EINTR); + if (res < 0) { + fatal_error("%s:%d: kevent() failed: %s (%d)\n", + __FILE__, __LINE__, erl_errno_id(errno), errno); + } + for (i = 0; i < res; i++) { + if (ebuf[i].flags & EV_ERROR) { + short filter; + int fd = (int) ebuf[i].ident; + + switch ((int) ebuf[i].udata) { + + /* + * Since we use a lazy update approach EV_DELETE will + * frequently fail. This since kqueue automatically + * removes a file descriptor that is closed from the + * poll set. + */ + case ERTS_POLL_KQ_OP_DEL_R: + case ERTS_POLL_KQ_OP_DEL_W: + case ERTS_POLL_KQ_OP_HANDLED: + break; + + /* + * According to the kqueue man page EVFILT_READ support + * does not imply EVFILT_WRITE support; therefore, + * if an EV_ADD fail, we may have to remove other + * events on this fd in the kqueue pollset before + * adding fd to the fallback pollset. + */ + case ERTS_POLL_KQ_OP_ADD_W: + if (ps->fds_status[fd].used_events & ERTS_POLL_EV_IN) { + filter = EVFILT_READ; + goto rm_add_fb; + } + goto add_fb; + case ERTS_POLL_KQ_OP_ADD_R: + if (ps->fds_status[fd].used_events & ERTS_POLL_EV_OUT) { + filter = EVFILT_WRITE; + goto rm_add_fb; + } + goto add_fb; + case ERTS_POLL_KQ_OP_ADD2_W: + case ERTS_POLL_KQ_OP_ADD2_R: { + int j; + for (j = i+1; j < res; j++) { + if (fd == (int) ebuf[j].ident) { + ebuf[j].udata = (void *) ERTS_POLL_KQ_OP_HANDLED; + if (!(ebuf[j].flags & EV_ERROR)) { + switch ((int) ebuf[j].udata) { + case ERTS_POLL_KQ_OP_ADD2_W: + filter = EVFILT_WRITE; + goto rm_add_fb; + case ERTS_POLL_KQ_OP_ADD2_R: + filter = EVFILT_READ; + goto rm_add_fb; + default: + fatal_error("%s:%d:write_batch_buf(): " + "Internal error", + __FILE__, __LINE__); + break; + } + } + goto add_fb; + } + } + /* The other add succeded... */ + filter = (((int) ebuf[i].udata == ERTS_POLL_KQ_OP_ADD2_W) + ? EVFILT_READ + : EVFILT_WRITE); + rm_add_fb: + { + struct kevent kev; + struct timespec ts = {0, 0}; + EV_SET(&kev, fd, filter, EV_DELETE, 0, 0, 0); + (void) kevent(ps->kp_fd, &kev, 1, NULL, 0, &ts); + } + + add_fb: + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; + ASSERT(ps->fds_status[fd].used_events); + ps->fds_status[fd].used_events = 0; + ps->no_of_user_fds--; + update_fallback_pollset(ps, fd); + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); + break; + } + default: + fatal_error("%s:%d:write_batch_buf(): Internal error", + __FILE__, __LINE__); + break; + } + } + } + } + bbp->len = 0; +} + +#endif /* ERTS_POLL_USE_KQUEUE */ + +static ERTS_INLINE void +batch_update_pollset(ErtsPollSet ps, int fd, ErtsPollBatchBuf *bbp) +{ + int buf_len; +#if ERTS_POLL_USE_DEVPOLL + short events; + struct pollfd *buf; +#elif ERTS_POLL_USE_KQUEUE + struct kevent *buf; +#endif + +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("Doing lazy update on fd=%d\n", fd); +#endif + + if (!need_update(ps, fd)) + return; + + /* Make sure we have room for at least maximum no of entries + per fd */ + if (bbp->size - bbp->len < 2) + write_batch_buf(ps, bbp); + + buf_len = bbp->len; + buf = bbp->buf; + + ASSERT(fd < ps->fds_status_len); + +#if ERTS_POLL_USE_DEVPOLL + events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); + if (!events) { + buf[buf_len].events = POLLREMOVE; + ps->no_of_user_fds--; + } + else if (!ps->fds_status[fd].used_events) { + buf[buf_len].events = events; + ps->no_of_user_fds++; + } + else { + if ((ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) + || (ps->fds_status[fd].used_events & ~events)) { + /* Reset or removed events... */ + buf[buf_len].fd = fd; + buf[buf_len].events = POLLREMOVE; + buf[buf_len++].revents = 0; + } + buf[buf_len].events = events; + } + buf[buf_len].fd = fd; + buf[buf_len++].revents = 0; + +#elif ERTS_POLL_USE_KQUEUE + + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) + update_fallback_pollset(ps, fd); + else { /* Remove from fallback and try kqueue */ + ErtsPollEvents events = ps->fds_status[fd].events; + ps->fds_status[fd].events = (ErtsPollEvents) 0; + update_fallback_pollset(ps, fd); + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); + if (events) { + ps->fds_status[fd].events = events; + goto try_kqueue; + } + } + } + else { + ErtsPollEvents events, used_events; + int mod_w, mod_r; + try_kqueue: + events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); + used_events = ERTS_POLL_EV_E2N(ps->fds_status[fd].used_events); + if (!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) { + if (!used_events && + (events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) + goto do_add_rw; + mod_r = ((events & ERTS_POLL_EV_IN) + != (used_events & ERTS_POLL_EV_IN)); + mod_w = ((events & ERTS_POLL_EV_OUT) + != (used_events & ERTS_POLL_EV_OUT)); + goto do_mod; + } + else { /* Reset */ + if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) { + do_add_rw: + EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, + 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_R); + buf_len++; + EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, + 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_W); + buf_len++; + + } + else { + mod_r = 1; + mod_w = 1; + do_mod: + if (mod_r) { + if (events & ERTS_POLL_EV_IN) { + EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, + 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_R); + buf_len++; + } + else if (used_events & ERTS_POLL_EV_IN) { + EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_DELETE, + 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_R); + buf_len++; + } + } + if (mod_w) { + if (events & ERTS_POLL_EV_OUT) { + EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, + 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_W); + buf_len++; + } + else if (used_events & ERTS_POLL_EV_OUT) { + EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_DELETE, + 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_W); + buf_len++; + } + } + } + } + if (used_events) { + if (!events) { + ps->no_of_user_fds--; + } + } + else { + if (events) + ps->no_of_user_fds++; + } + ASSERT((events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); + ASSERT((used_events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); + } + +#endif + + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; + ps->fds_status[fd].used_events = ps->fds_status[fd].events; + + bbp->len = buf_len; +} + +#else /* !ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ + +#if ERTS_POLL_USE_EPOLL +static int +#if ERTS_POLL_USE_CONCURRENT_UPDATE +conc_update_pollset(ErtsPollSet ps, int fd, int *update_fallback) +#else +update_pollset(ErtsPollSet ps, int fd) +#endif +{ + int res; + int op; + struct epoll_event epe_templ; + struct epoll_event epe; + + ASSERT(fd < ps->fds_status_len); + + if (!need_update(ps, fd)) + return 0; + +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("Doing update on fd=%d\n", fd); +#endif + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { +#if ERTS_POLL_USE_CONCURRENT_UPDATE + if (!*update_fallback) { + *update_fallback = 1; + return 0; + } +#endif + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) { + return update_fallback_pollset(ps, fd); + } + else { /* Remove from fallback and try epoll */ + ErtsPollEvents events = ps->fds_status[fd].events; + ps->fds_status[fd].events = (ErtsPollEvents) 0; + res = update_fallback_pollset(ps, fd); + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); + if (!events) + return res; + ps->fds_status[fd].events = events; + } + } + + epe_templ.events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); + epe_templ.data.fd = fd; + +#ifdef VALGRIND + /* Silence invalid valgrind warning ... */ + memset((void *) &epe.data, 0, sizeof(epoll_data_t)); +#endif + + if (epe_templ.events && ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) { + do { + /* We init 'epe' every time since epoll_ctl() may modify it + (not declared const and not documented as const). */ + epe.events = epe_templ.events; + epe.data.fd = epe_templ.data.fd; + res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); + } while (res != 0 && errno == EINTR); + ps->no_of_user_fds--; + ps->fds_status[fd].used_events = 0; + } + + if (!epe_templ.events) { + /* A note on EPOLL_CTL_DEL: linux kernel versions before 2.6.9 + need a non-NULL event pointer even though it is ignored... */ + op = EPOLL_CTL_DEL; + ps->no_of_user_fds--; + } + else if (!ps->fds_status[fd].used_events) { + op = EPOLL_CTL_ADD; + ps->no_of_user_fds++; + } + else { + op = EPOLL_CTL_MOD; + } + + do { + /* We init 'epe' every time since epoll_ctl() may modify it + (not declared const and not documented as const). */ + epe.events = epe_templ.events; + epe.data.fd = epe_templ.data.fd; + res = epoll_ctl(ps->kp_fd, op, fd, &epe); + } while (res != 0 && errno == EINTR); + +#if defined(ERTS_POLL_DEBUG_PRINT) && 1 + { + int saved_errno = errno; + erts_printf("%s = epoll_ctl(%d, %s, %d, {Ox%x, %d})\n", + res == 0 ? "0" : erl_errno_id(errno), + ps->kp_fd, + (op == EPOLL_CTL_ADD + ? "EPOLL_CTL_ADD" + : (op == EPOLL_CTL_MOD + ? "EPOLL_CTL_MOD" + : (op == EPOLL_CTL_DEL + ? "EPOLL_CTL_DEL" + : "UNKNOWN"))), + fd, + epe_templ.events, + fd); + errno = saved_errno; + } +#endif + if (res == 0) + ps->fds_status[fd].used_events = ps->fds_status[fd].events; + else { + switch (op) { + case EPOLL_CTL_MOD: + epe.events = 0; + do { + /* We init 'epe' every time since epoll_ctl() may modify it + (not declared const and not documented as const). */ + epe.events = 0; + epe.data.fd = fd; + res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); + } while (res != 0 && errno == EINTR); + ps->fds_status[fd].used_events = 0; + /* Fall through ... */ + case EPOLL_CTL_ADD: { + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; + ps->no_of_user_fds--; +#if ERTS_POLL_USE_CONCURRENT_UPDATE + if (!*update_fallback) { + *update_fallback = 1; + return 0; + } +#endif + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); + res = update_fallback_pollset(ps, fd); + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); + break; + } + case EPOLL_CTL_DEL: { + /* + * Since we use a lazy update approach EPOLL_CTL_DEL will + * frequently fail. This since epoll automatically removes + * a filedescriptor that is closed from the poll set. + */ + ps->fds_status[fd].used_events = 0; + res = 0; + break; + } + default: + fatal_error("%s:%d:update_pollset(): Internal error\n", + __FILE__, __LINE__); + break; + } + } + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; + return res; +} + +#if ERTS_POLL_USE_CONCURRENT_UPDATE +static int +update_pollset(ErtsPollSet ps, int fd) +{ + int update_fallback = 1; + return conc_update_pollset(ps, fd, &update_fallback); +} +#endif + +#endif /* ERTS_POLL_USE_EPOLL */ + +#endif /* ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ + +#if ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK + +#if ERTS_POLL_USE_FALLBACK +static int update_fallback_pollset(ErtsPollSet ps, int fd) +#else +static int update_pollset(ErtsPollSet ps, int fd) +#endif +{ +#ifdef ERTS_POLL_DEBUG_PRINT +#if ERTS_POLL_USE_FALLBACK + erts_printf("Doing fallback update on fd=%d\n", fd); +#else + erts_printf("Doing update on fd=%d\n", fd); +#endif +#endif + + ASSERT(fd < ps->fds_status_len); +#if ERTS_POLL_USE_FALLBACK + ASSERT(ps->fds_status[fd].used_events + ? (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) + : (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK)); +#endif + + if (!need_update(ps, fd)) + return 0; + +#if ERTS_POLL_USE_FALLBACK + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; +#endif + +#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ + if (!ps->fds_status[fd].events) { + int pix = ps->fds_status[fd].pix; + int last_pix; + if (pix < 0) { +#if ERTS_POLL_USE_FALLBACK + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); +#endif + return -1; + } +#if ERTS_POLL_USE_FALLBACK + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); +#endif + ps->no_of_user_fds--; + last_pix = --ps->no_poll_fds; + if (pix != last_pix) { + /* Move last pix to this pix */ + ps->poll_fds[pix].fd = ps->poll_fds[last_pix].fd; + ps->poll_fds[pix].events = ps->poll_fds[last_pix].events; + ps->poll_fds[pix].revents = ps->poll_fds[last_pix].revents; + ps->fds_status[ps->poll_fds[pix].fd].pix = pix; + } + /* Clear last pix */ + ps->poll_fds[last_pix].fd = -1; + ps->poll_fds[last_pix].events = (short) 0; + ps->poll_fds[last_pix].revents = (short) 0; + /* Clear this fd status */ + ps->fds_status[fd].pix = -1; + ps->fds_status[fd].used_events = (ErtsPollEvents) 0; +#if ERTS_POLL_USE_FALLBACK + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; +#endif + } + else { + int pix = ps->fds_status[fd].pix; + if (pix < 0) { +#if ERTS_POLL_USE_FALLBACK + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) + || fd == ps->kp_fd); +#endif + ps->no_of_user_fds++; + ps->fds_status[fd].pix = pix = ps->no_poll_fds++; + if (pix >= ps->poll_fds_len) + grow_poll_fds(ps, pix); + ps->poll_fds[pix].fd = fd; + ps->fds_status[fd].pix = pix; +#if ERTS_POLL_USE_FALLBACK + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; +#endif + } + +#if ERTS_POLL_USE_FALLBACK + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); +#endif + + /* Events to be used in next poll */ + ps->poll_fds[pix].events = ev2pollev(ps->fds_status[fd].events); + if (ps->poll_fds[pix].revents) { + /* Remove result events that we should not poll for anymore */ + ps->poll_fds[pix].revents + &= ev2pollev(~(~ps->fds_status[fd].used_events + & ps->fds_status[fd].events)); + } + /* Save events to be used in next poll */ + ps->fds_status[fd].used_events = ps->fds_status[fd].events; + } + return 0; +#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ + { + ErtsPollEvents events = ps->fds_status[fd].events; + if ((ERTS_POLL_EV_IN & events) + != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) { + if (ERTS_POLL_EV_IN & events) { + FD_SET(fd, &ps->input_fds); + } + else { + FD_CLR(fd, &ps->input_fds); + } + } + if ((ERTS_POLL_EV_OUT & events) + != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) { + if (ERTS_POLL_EV_OUT & events) { + FD_SET(fd, &ps->output_fds); + } + else { + FD_CLR(fd, &ps->output_fds); + } + } + + if (!ps->fds_status[fd].used_events) { + ASSERT(events); + ps->no_of_user_fds++; +#if ERTS_POLL_USE_FALLBACK + ps->no_select_fds++; + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; +#endif + } + else if (!events) { + ASSERT(ps->fds_status[fd].used_events); + ps->no_of_user_fds--; + ps->fds_status[fd].events = events; +#if ERTS_POLL_USE_FALLBACK + ps->no_select_fds--; + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; +#endif + } + + ps->fds_status[fd].used_events = events; + + if (events && fd > ps->max_fd) + ps->max_fd = fd; + else if (!events && fd == ps->max_fd) { + int max = ps->max_fd; + for (max = ps->max_fd; max >= 0; max--) + if (ps->fds_status[max].used_events) + break; + ps->max_fd = max; + } + } + return 0; +#endif +} + +#endif /* ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK */ + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + +static void +handle_update_requests(ErtsPollSet ps) +{ + ErtsPollSetUpdateRequestsBlock *urqbp = &ps->update_requests; +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + ErtsPollBatchBuf bb; + setup_batch_buf(ps, &bb); +#endif + + while (urqbp) { + ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; + int i; + int len = urqbp->len; + for (i = 0; i < len; i++) { + int fd = urqbp->fds[i]; + ASSERT(fd < ps->fds_status_len); + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ; +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + batch_update_pollset(ps, fd, &bb); +#else + update_pollset(ps, fd); +#endif + } + + free_urqbp = urqbp; + urqbp = urqbp->next; + + free_update_requests_block(ps, free_urqbp); + + } + +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + if (bb.len) + write_batch_buf(ps, &bb); +#endif + + ps->curr_upd_req_block = &ps->update_requests; + +#if ERTS_POLL_USE_DEVPOLL && defined(HARD_DEBUG) + check_poll_status(ps); +#endif + + ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(ps); +} + +#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ + +static ERTS_INLINE ErtsPollEvents +poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, + int *have_set_have_update_requests, + int *do_wake) +{ + ErtsPollEvents new_events; + + if (fd < ps->internal_fd_limit || fd >= max_fds) { + if (fd < 0) { + new_events = ERTS_POLL_EV_ERR; + goto done; + } +#if ERTS_POLL_USE_KERNEL_POLL + if (fd == ps->kp_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } +#endif +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } +#endif + } + + if (fd >= ps->fds_status_len) + grow_fds_status(ps, fd); + + ASSERT(fd < ps->fds_status_len); + + new_events = ps->fds_status[fd].events; + + if (events == 0) { + *do_wake = 0; + goto done; + } + + if (on) + new_events |= events; + else + new_events &= ~events; + + if (new_events == (ErtsPollEvents) 0) { +#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST; +#endif +#if ERTS_POLL_USE_FALLBACK + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_USEFLBCK; +#endif + } + + ps->fds_status[fd].events = new_events; + + if (new_events == ps->fds_status[fd].used_events +#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) + && !(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) +#endif + ) { + *do_wake = 0; + goto done; + } + +#if !ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + if (update_pollset(ps, fd) != 0) + new_events = ERTS_POLL_EV_ERR; +#else /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ + +#if ERTS_POLL_USE_CONCURRENT_UPDATE + if (ERTS_POLLSET_IS_POLLED(ps)) { + int update_fallback = 0; + conc_update_pollset(ps, fd, &update_fallback); + if (!update_fallback) { + *do_wake = 0; /* no need to wake kernel poller */ + goto done; + } + } +#endif + + enqueue_update_request(ps, fd); + +#ifdef ERTS_SMP + /* + * If new events have been added, we need to wake up the + * polling thread, but if events have been removed we don't. + */ + if ((new_events && (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) + || (~ps->fds_status[fd].used_events & new_events)) + *do_wake = 1; +#endif /* ERTS_SMP */ + +#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ + + done: +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("0x%x = poll_control(ps, %d, 0x%x, %s) do_wake=%d\n", + (int) new_events, fd, (int) events, (on ? "on" : "off"), *do_wake); +#endif + return new_events; +} + +void +ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps, + ErtsPollControlEntry pcev[], + int len) +{ + int i; + int hshur = 0; + int do_wake; + int final_do_wake = 0; + + ERTS_POLLSET_LOCK(ps); + + for (i = 0; i < len; i++) { + do_wake = 0; + pcev[i].events = poll_control(ps, + pcev[i].fd, + pcev[i].events, + pcev[i].on, + &hshur, + &do_wake); + final_do_wake |= do_wake; + } + +#ifdef ERTS_SMP + if (final_do_wake) + wake_poller(ps); +#endif /* ERTS_SMP */ + + ERTS_POLLSET_UNLOCK(ps); +} + +ErtsPollEvents +ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, + ErtsSysFdType fd, + ErtsPollEvents events, + int on, + int* do_wake) /* In: Wake up polling thread */ + /* Out: Poller is woken */ +{ + int hshur = 0; + ErtsPollEvents res; + + ERTS_POLLSET_LOCK(ps); + + res = poll_control(ps, fd, events, on, &hshur, do_wake); + +#ifdef ERTS_SMP + if (*do_wake) { + wake_poller(ps); + } +#endif /* ERTS_SMP */ + + ERTS_POLLSET_UNLOCK(ps); + return res; +} + +/* + * --- Wait on poll set ------------------------------------------------------ + */ + +#if ERTS_POLL_USE_KERNEL_POLL + +static ERTS_INLINE int +save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) +{ + int res = 0; + int i; + int n = chk_fds_res < max_res ? chk_fds_res : max_res; +#if ERTS_POLL_USE_WAKEUP_PIPE + int wake_fd = ps->wake_fds[0]; +#endif + + for (i = 0; i < n; i++) { + +#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ + + if (ps->res_events[i].events) { + int fd = ps->res_events[i].data.fd; + int ix; + ErtsPollEvents revents; +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + continue; + } +#endif + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); + /* epoll_wait() can repeat the same fd in result array... */ + ix = (int) ps->fds_status[fd].res_ev_ix; + ASSERT(ix >= 0); + if (ix >= res || pr[ix].fd != fd) { + ix = res; + pr[ix].fd = fd; + pr[ix].events = (ErtsPollEvents) 0; + } + + revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); + pr[ix].events |= revents; + if (revents) { + if (res == ix) { + ps->fds_status[fd].res_ev_ix = (unsigned short) ix; + res++; + } + } + } + +#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ + + struct kevent *ev; + int fd; + int ix; + + ev = &ps->res_events[i]; + fd = (int) ev->ident; + ASSERT(fd < ps->fds_status_len); + ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); + ix = (int) ps->fds_status[fd].res_ev_ix; + + ASSERT(ix >= 0); + if (ix >= res || pr[ix].fd != fd) { + ix = res; + pr[ix].fd = (int) ev->ident; + pr[ix].events = (ErtsPollEvents) 0; + } + + if (ev->filter == EVFILT_READ) { +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + continue; + } +#endif + pr[ix].events |= ERTS_POLL_EV_IN; + } + else if (ev->filter == EVFILT_WRITE) + pr[ix].events |= ERTS_POLL_EV_OUT; + if (ev->flags & (EV_ERROR|EV_EOF)) { + if ((ev->flags & EV_ERROR) && (((int) ev->data) == EBADF)) + pr[ix].events |= ERTS_POLL_EV_NVAL; + else + pr[ix].events |= ERTS_POLL_EV_ERR; + } + if (pr[ix].events) { + if (res == ix) { + ps->fds_status[fd].res_ev_ix = (unsigned short) ix; + res++; + } + } + +#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ + + if (ps->res_events[i].revents) { + int fd = ps->res_events[i].fd; + ErtsPollEvents revents; +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + continue; + } +#endif + revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); + pr[res].fd = fd; + pr[res].events = revents; + res++; + } + +#endif + + } + + return res; +} + +#endif /* ERTS_POLL_USE_KERNEL_POLL */ + +#if ERTS_POLL_USE_FALLBACK + +static int +get_kp_results(ErtsPollSet ps, ErtsPollResFd pr[], int max_res) +{ + int res; +#if ERTS_POLL_USE_KQUEUE + struct timespec ts = {0, 0}; +#endif + + if (max_res > ps->res_events_len) + grow_res_events(ps, max_res); + + do { +#if ERTS_POLL_USE_EPOLL + res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); +#elif ERTS_POLL_USE_KQUEUE + res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); +#endif + } while (res < 0 && errno == EINTR); + + if (res < 0) { + fatal_error("%s:%d: %s() failed: %s (%d)\n", + __FILE__, __LINE__, +#if ERTS_POLL_USE_EPOLL + "epoll_wait", +#elif ERTS_POLL_USE_KQUEUE + "kevent", +#endif + erl_errno_id(errno), errno); + } + + return save_kp_result(ps, pr, max_res, res); +} + +#endif /* ERTS_POLL_USE_FALLBACK */ + + + +static ERTS_INLINE int +save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, + int chk_fds_res, int ebadf) +{ +#if ERTS_POLL_USE_DEVPOLL + return save_kp_result(ps, pr, max_res, chk_fds_res); +#elif ERTS_POLL_USE_FALLBACK + if (!ps->fallback_used) + return save_kp_result(ps, pr, max_res, chk_fds_res); + else +#endif /* ERTS_POLL_USE_FALLBACK */ + { + +#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ + int res = 0; +#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK + int wake_fd = ps->wake_fds[0]; +#endif + int i, first_ix, end_ix; + + /* + * In order to be somewhat fair, we continue on the poll_fds + * index where we stopped last time. + */ + first_ix = i = ((ps->next_poll_fds_ix < ps->no_poll_fds) + ? ps->next_poll_fds_ix + : 0); + end_ix = ps->no_poll_fds; + + while (1) { + while (i < end_ix && res < max_res) { + if (ps->poll_fds[i].revents != (short) 0) { + int fd = ps->poll_fds[i].fd; + ErtsPollEvents revents; +#if ERTS_POLL_USE_FALLBACK + if (fd == ps->kp_fd) { + res += get_kp_results(ps, &pr[res], max_res-res); + i++; + continue; + } +#elif ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + i++; + continue; + } +#endif + revents = pollev2ev(ps->poll_fds[i].revents); + pr[res].fd = fd; + pr[res].events = revents; + res++; + } + i++; + } + if (res == max_res || i == first_ix) + break; + ASSERT(i == ps->no_poll_fds); + i = 0; + end_ix = first_ix; + } + + ps->next_poll_fds_ix = i; + return res; + +#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ + int res = 0; +#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK + int wake_fd = ps->wake_fds[0]; +#endif + int fd, first_fd, end_fd; + + /* + * In order to be fair, we continue on the fd where we stopped + * last time. + */ + first_fd = fd = ps->next_sel_fd <= ps->max_fd ? ps->next_sel_fd : 0; + end_fd = ps->max_fd + 1; + + if (!ebadf) { + while (1) { + while (fd < end_fd && res < max_res) { + + pr[res].events = (ErtsPollEvents) 0; + if (FD_ISSET(fd, &ps->res_input_fds)) { +#if ERTS_POLL_USE_FALLBACK + if (fd == ps->kp_fd) { + res += get_kp_results(ps, &pr[res], max_res-res); + fd++; + continue; + } +#elif ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + fd++; + continue; + } +#endif + pr[res].events |= ERTS_POLL_EV_IN; + } + if (FD_ISSET(fd, &ps->res_output_fds)) + pr[res].events |= ERTS_POLL_EV_OUT; + if (pr[res].events) { + pr[res].fd = fd; + res++; + } + fd++; + } + if (res == max_res || fd == first_fd) + break; + ASSERT(fd == ps->max_fd + 1); + fd = 0; + end_fd = first_fd; + } + } + else { + /* + * Bad file descriptors in poll set. + * + * This only happens when running poorly written + * drivers. This code could be optimized, but we + * don't bother since it should never happen... + */ + while (1) { + while (fd < end_fd && res < max_res) { + if (ps->fds_status[fd].events) { + int sres; + fd_set *iset = NULL; + fd_set *oset = NULL; + if (ps->fds_status[fd].events & ERTS_POLL_EV_IN) { + iset = &ps->res_input_fds; + FD_ZERO(iset); + FD_SET(fd, iset); + } + if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) { + oset = &ps->res_output_fds; + FD_ZERO(oset); + FD_SET(fd, oset); + + } + do { + /* Initiate 'tv' each time; + select() may modify it */ + SysTimeval tv = {0, 0}; + sres = select(ps->max_fd+1, iset, oset, NULL, &tv); + } while (sres < 0 && errno == EINTR); + if (sres < 0) { +#if ERTS_POLL_USE_FALLBACK + if (fd == ps->kp_fd) { + res += get_kp_results(ps, + &pr[res], + max_res-res); + fd++; + continue; + } +#elif ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + fd++; + continue; + } +#endif + pr[res].fd = fd; + pr[res].events = ERTS_POLL_EV_NVAL; + res++; + } + else if (sres > 0) { + pr[res].fd = fd; + if (iset && FD_ISSET(fd, iset)) { +#if ERTS_POLL_USE_FALLBACK + if (fd == ps->kp_fd) { + res += get_kp_results(ps, + &pr[res], + max_res-res); + fd++; + continue; + } +#elif ERTS_POLL_USE_WAKEUP_PIPE + if (fd == wake_fd) { + cleanup_wakeup_pipe(ps); + fd++; + continue; + } +#endif + pr[res].events |= ERTS_POLL_EV_IN; + } + if (oset && FD_ISSET(fd, oset)) { + pr[res].events |= ERTS_POLL_EV_OUT; + } + ASSERT(pr[res].events); + res++; + } + } + fd++; + } + if (res == max_res || fd == first_fd) + break; + ASSERT(fd == ps->max_fd + 1); + fd = 0; + end_fd = first_fd; + } + } + ps->next_sel_fd = fd; + return res; +#endif + } +} + +static ERTS_INLINE int +check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked) +{ + ASSERT(!*ps_locked); + if (ps->no_of_user_fds == 0 && tv->tv_usec == 0 && tv->tv_sec == 0) { + /* Nothing to poll and zero timeout; done... */ + return 0; + } + else { + long timeout = tv->tv_sec*1000 + tv->tv_usec/1000; + ASSERT(timeout >= 0); + erts_smp_atomic_set(&ps->timeout, timeout); +#if ERTS_POLL_USE_FALLBACK + if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) { + +#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ + if (timeout > INT_MAX) + timeout = INT_MAX; + if (max_res > ps->res_events_len) + grow_res_events(ps, max_res); + return epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout); +#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ + struct timespec ts; + ts.tv_sec = tv->tv_sec; + ts.tv_nsec = tv->tv_usec*1000; + if (max_res > ps->res_events_len) + grow_res_events(ps, max_res); + return kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); +#endif /* ----------------------------------------- */ + + } + else /* use fallback (i.e. poll() or select()) */ +#endif /* ERTS_POLL_USE_FALLBACK */ + { + +#if ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ + /* + * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds + * is set too high. dp_nfds should not be set greater than + * the maximum number of file descriptors in the poll set. + */ + struct dvpoll poll_res; + int nfds = ps->no_of_user_fds; +#ifdef ERTS_SMP + nfds++; /* Wakeup pipe */ +#endif + if (timeout > INT_MAX) + timeout = INT_MAX; + poll_res.dp_nfds = nfds < max_res ? nfds : max_res; + if (poll_res.dp_nfds > ps->res_events_len) + grow_res_events(ps, poll_res.dp_nfds); + poll_res.dp_fds = ps->res_events; + poll_res.dp_timeout = (int) timeout; + return ioctl(ps->kp_fd, DP_POLL, &poll_res); +#elif ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ + if (timeout > INT_MAX) + timeout = INT_MAX; + return poll(ps->poll_fds, ps->no_poll_fds, (int) timeout); +#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ + int res; + ps->res_input_fds = ps->input_fds; + ps->res_output_fds = ps->output_fds; + res = select(ps->max_fd + 1, + &ps->res_input_fds, + &ps->res_output_fds, + NULL, + tv); +#ifdef ERTS_SMP + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. + * + * We don't know how much of the timeout is left; therfore, + * we use a zero timeout. If no error occur and no events + * have triggered, we fake an EAGAIN error and let the caller + * restart us. + */ + SysTimeval zero_tv = {0, 0}; + *ps_locked = 1; + ERTS_POLLSET_LOCK(ps); + handle_update_requests(ps); + res = select(ps->max_fd + 1, + &ps->res_input_fds, + &ps->res_output_fds, + NULL, + &zero_tv); + if (res == 0) { + errno = EAGAIN; + res = -1; + } + } +#endif /* ERTS_SMP */ + return res; +#endif /* ----------------------------------------- */ + } + } +} + +int +ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, + ErtsPollResFd pr[], + int *len, + SysTimeval *utvp) +{ + int res, no_fds; + int ebadf = 0; + int ps_locked; + SysTimeval *tvp; + SysTimeval itv; + + no_fds = *len; +#ifdef ERTS_POLL_MAX_RES + if (no_fds >= ERTS_POLL_MAX_RES) + no_fds = ERTS_POLL_MAX_RES; +#endif + + *len = 0; + + ASSERT(utvp); + + tvp = utvp; + +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("Entering erts_poll_wait(), timeout=%d\n", + (int) tv->tv_sec*1000 + tv->tv_usec/1000); +#endif + + ERTS_POLLSET_UNSET_POLLER_WOKEN(ps); + if (ERTS_POLLSET_SET_POLLED_CHK(ps)) { + res = EINVAL; /* Another thread is in erts_poll_wait() + on this pollset... */ + goto done; + } + + if (ERTS_POLLSET_IS_INTERRUPTED(ps)) { + /* Interrupt use zero timeout */ + itv.tv_sec = 0; + itv.tv_usec = 0; + tvp = &itv; + } + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + ERTS_POLLSET_LOCK(ps); + handle_update_requests(ps); + ERTS_POLLSET_UNLOCK(ps); + } +#endif + + ps_locked = 0; + res = check_fd_events(ps, tvp, no_fds, &ps_locked); + + ERTS_POLLSET_SET_POLLER_WOKEN(ps); + + if (res == 0) { + res = ETIMEDOUT; + } + else if (res < 0) { +#if ERTS_POLL_USE_SELECT + if (errno == EBADF) { + ebadf = 1; + goto save_results; + } +#endif + res = errno; + } + else { +#if ERTS_POLL_USE_SELECT + save_results: +#endif + +#ifdef ERTS_SMP + if (!ps_locked) { + ps_locked = 1; + ERTS_POLLSET_LOCK(ps); + } +#endif + + no_fds = save_poll_result(ps, pr, no_fds, res, ebadf); + +#ifdef HARD_DEBUG + check_poll_result(pr, no_fds); +#endif + + res = (no_fds == 0 + ? (ERTS_POLLSET_UNSET_INTERRUPTED_CHK(ps) ? EINTR : EAGAIN) + : 0); + *len = no_fds; + } + +#ifdef ERTS_SMP + if (ps_locked) + ERTS_POLLSET_UNLOCK(ps); + ERTS_POLLSET_UNSET_POLLED(ps); +#endif + + done: + erts_smp_atomic_set(&ps->timeout, LONG_MAX); +#ifdef ERTS_POLL_DEBUG_PRINT + erts_printf("Leaving %s = erts_poll_wait()\n", + res == 0 ? "0" : erl_errno_id(res)); +#endif + + return res; +} + +/* + * --- Interrupt a thread doing erts_poll_wait() ----------------------------- + */ + +void +ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set) +{ + /* + * NOTE: This function might be called from signal handlers in the + * non-smp case; therefore, it has to be async-signal safe in + * the non-smp case. + */ + if (set) { + ERTS_POLLSET_SET_INTERRUPTED(ps); +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP) + wake_poller(ps); +#endif + } + else { + ERTS_POLLSET_UNSET_INTERRUPTED(ps); + } +} + +/* + * erts_poll_interrupt_timed(): + * If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it + * is not guaranteed that it will timeout before 'msec' milli seconds. + */ +void +ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps, int set, long msec) +{ + if (set) { + if (erts_smp_atomic_read(&ps->timeout) > msec) { + ERTS_POLLSET_SET_INTERRUPTED(ps); +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP) + wake_poller(ps); +#endif + } +#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS + else { + if (ERTS_POLLSET_IS_POLLED(ps)) + erts_smp_atomic_inc(&ps->no_avoided_wakeups); + erts_smp_atomic_inc(&ps->no_avoided_interrupts); + } + erts_smp_atomic_inc(&ps->no_interrupt_timed); +#endif + } + else { + ERTS_POLLSET_UNSET_INTERRUPTED(ps); + } +} + +int +ERTS_POLL_EXPORT(erts_poll_max_fds)(void) +{ + return max_fds; +} +/* + * --- Initialization -------------------------------------------------------- + */ + +#ifdef VXWORKS +extern int erts_vxworks_max_files; +#endif + +void +ERTS_POLL_EXPORT(erts_poll_init)(void) +{ + erts_smp_spinlock_init(&pollsets_lock, "pollsets_lock"); + pollsets = NULL; + + errno = 0; + +#if defined(VXWORKS) + max_fds = erts_vxworks_max_files; +#elif !defined(NO_SYSCONF) + max_fds = sysconf(_SC_OPEN_MAX); +#elif ERTS_POLL_USE_SELECT + max_fds = NOFILE; +#else + max_fds = OPEN_MAX; +#endif + +#if ERTS_POLL_USE_SELECT && defined(FD_SETSIZE) + if (max_fds > FD_SETSIZE) + max_fds = FD_SETSIZE; +#endif + + if (max_fds < 0) + fatal_error("erts_poll_init(): Failed to get max number of files: %s\n", + erl_errno_id(errno)); + +#ifdef ERTS_POLL_DEBUG_PRINT + print_misc_debug_info(); +#endif +} + +ErtsPollSet +ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) +{ +#if ERTS_POLL_USE_KERNEL_POLL + int kp_fd; +#endif + ErtsPollSet ps = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(struct ErtsPollSet_)); + ps->internal_fd_limit = 0; + ps->fds_status = NULL; + ps->fds_status_len = 0; + ps->no_of_user_fds = 0; +#if ERTS_POLL_USE_KERNEL_POLL + ps->kp_fd = -1; +#if ERTS_POLL_USE_EPOLL + kp_fd = epoll_create(256); + ps->res_events_len = 0; + ps->res_events = NULL; +#elif ERTS_POLL_USE_DEVPOLL + kp_fd = open("/dev/poll", O_RDWR); + ps->res_events_len = 0; + ps->res_events = NULL; +#elif ERTS_POLL_USE_KQUEUE + kp_fd = kqueue(); + ps->res_events_len = 0; + ps->res_events = NULL; +#endif + if (kp_fd < 0) + fatal_error("erts_poll_create_pollset(): Failed to " +#if ERTS_POLL_USE_EPOLL + "create epoll set" +#elif ERTS_POLL_USE_DEVPOLL + "to open /dev/poll" +#elif ERTS_POLL_USE_KQUEUE + "create kqueue" +#endif + ": %s (%d)\n", + erl_errno_id(errno), errno); +#endif /* ERTS_POLL_USE_KERNEL_POLL */ +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + /* res_events is also used as write buffer */ + grow_res_events(ps, ERTS_POLL_MIN_BATCH_BUF_SIZE); +#endif +#if ERTS_POLL_USE_POLL + ps->next_poll_fds_ix = 0; + ps->no_poll_fds = 0; + ps->poll_fds_len = 0; + ps->poll_fds = NULL; +#elif ERTS_POLL_USE_SELECT + ps->next_sel_fd = 0; + ps->max_fd = -1; +#if ERTS_POLL_USE_FALLBACK + ps->no_select_fds = 0; +#endif + FD_ZERO(&ps->input_fds); + FD_ZERO(&ps->res_input_fds); + FD_ZERO(&ps->output_fds); + FD_ZERO(&ps->res_output_fds); +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + ps->update_requests.next = NULL; + ps->update_requests.len = 0; + ps->curr_upd_req_block = &ps->update_requests; + erts_smp_atomic_init(&ps->have_update_requests, 0); +#endif +#ifdef ERTS_SMP + erts_smp_atomic_init(&ps->polled, 0); + erts_smp_atomic_init(&ps->woken, 0); + erts_smp_mtx_init(&ps->mtx, "pollset"); +#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT + ps->woken = 0; +#endif +#if ERTS_POLL_USE_WAKEUP_PIPE + create_wakeup_pipe(ps); +#endif +#if ERTS_POLL_USE_FALLBACK + if (kp_fd >= ps->fds_status_len) + grow_fds_status(ps, kp_fd); + /* Force kernel poll fd into fallback (poll/select) set */ + ps->fds_status[kp_fd].flags + |= ERTS_POLL_FD_FLG_INFLBCK|ERTS_POLL_FD_FLG_USEFLBCK; + { + int do_wake = 0; + ERTS_POLL_EXPORT(erts_poll_control)(ps, kp_fd, ERTS_POLL_EV_IN, 1, + &do_wake); + } +#endif +#if ERTS_POLL_USE_KERNEL_POLL + if (ps->internal_fd_limit <= kp_fd) + ps->internal_fd_limit = kp_fd + 1; + ps->kp_fd = kp_fd; +#endif +#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT && !defined(ERTS_SMP) + ps->interrupt = 0; +#else + erts_smp_atomic_init(&ps->interrupt, 0); +#endif + erts_smp_atomic_init(&ps->timeout, LONG_MAX); +#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS + erts_smp_atomic_init(&ps->no_avoided_wakeups, 0); + erts_smp_atomic_init(&ps->no_avoided_interrupts, 0); + erts_smp_atomic_init(&ps->no_interrupt_timed, 0); +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + handle_update_requests(ps); +#endif +#if ERTS_POLL_USE_FALLBACK + ps->fallback_used = 0; +#endif + ps->no_of_user_fds = 0; /* Don't count wakeup pipe and fallback fd */ + + erts_smp_spin_lock(&pollsets_lock); + ps->next = pollsets; + pollsets = ps; + erts_smp_spin_unlock(&pollsets_lock); + + return ps; +} + +void +ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) +{ + + if (ps->fds_status) + erts_free(ERTS_ALC_T_FD_STATUS, (void *) ps->fds_status); + +#if ERTS_POLL_USE_EPOLL + if (ps->kp_fd >= 0) + close(ps->kp_fd); + if (ps->res_events) + erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); +#elif ERTS_POLL_USE_DEVPOLL + if (ps->kp_fd >= 0) + close(ps->kp_fd); + if (ps->res_events) + erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); +#elif ERTS_POLL_USE_POLL + if (ps->poll_fds) + erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds); +#elif ERTS_POLL_USE_SELECT +#endif +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + { + ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; + while (urqbp) { + ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; + urqbp = urqbp->next; + free_update_requests_block(ps, free_urqbp); + } + } +#endif +#ifdef ERTS_SMP + erts_smp_mtx_destroy(&ps->mtx); +#endif +#if ERTS_POLL_USE_WAKEUP_PIPE + if (ps->wake_fds[0] >= 0) + close(ps->wake_fds[0]); + if (ps->wake_fds[1] >= 0) + close(ps->wake_fds[1]); +#endif + + erts_smp_spin_lock(&pollsets_lock); + if (ps == pollsets) + pollsets = pollsets->next; + else { + ErtsPollSet prev_ps; + for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next); + ASSERT(ps == prev_ps->next); + prev_ps->next = ps->next; + } + erts_smp_spin_unlock(&pollsets_lock); + + erts_free(ERTS_ALC_T_POLLSET, (void *) ps); +} + +/* + * --- Info ------------------------------------------------------------------ + */ + +void +ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) +{ +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + int pending_updates; +#endif + Uint size = 0; + + ERTS_POLLSET_LOCK(ps); + + size += sizeof(struct ErtsPollSet_); + size += ps->fds_status_len*sizeof(ErtsFdStatus); + +#if ERTS_POLL_USE_EPOLL + size += ps->res_events_len*sizeof(struct epoll_event); +#elif ERTS_POLL_USE_DEVPOLL + size += ps->res_events_len*sizeof(struct pollfd); +#elif ERTS_POLL_USE_KQUEUE + size += ps->res_events_len*sizeof(struct kevent); +#endif + +#if ERTS_POLL_USE_POLL + size += ps->poll_fds_len*sizeof(struct pollfd); +#elif ERTS_POLL_USE_SELECT +#endif + +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + { + ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; + pending_updates = ps->update_requests.len; + while (urqbp) { + size += sizeof(ErtsPollSetUpdateRequestsBlock); + pending_updates += urqbp->len; + } + } +#endif + + pip->primary = +#if ERTS_POLL_USE_KQUEUE + "kqueue" +#elif ERTS_POLL_USE_EPOLL + "epoll" +#elif ERTS_POLL_USE_DEVPOLL + "/dev/poll" +#elif ERTS_POLL_USE_POLL + "poll" +#elif ERTS_POLL_USE_SELECT + "select" +#endif + ; + + pip->fallback = +#if !ERTS_POLL_USE_FALLBACK + NULL +#elif ERTS_POLL_USE_POLL + "poll" +#elif ERTS_POLL_USE_SELECT + "select" +#endif + ; + + pip->kernel_poll = +#if !ERTS_POLL_USE_KERNEL_POLL + NULL +#elif ERTS_POLL_USE_KQUEUE + "kqueue" +#elif ERTS_POLL_USE_EPOLL + "epoll" +#elif ERTS_POLL_USE_DEVPOLL + "/dev/poll" +#endif + ; + + pip->memory_size = size; + + pip->poll_set_size = ps->no_of_user_fds; +#ifdef ERTS_SMP + pip->poll_set_size++; /* Wakeup pipe */ +#endif + + pip->fallback_poll_set_size = +#if !ERTS_POLL_USE_FALLBACK + 0 +#elif ERTS_POLL_USE_POLL + ps->no_poll_fds +#elif ERTS_POLL_USE_SELECT + ps->no_select_fds +#endif + ; + +#if ERTS_POLL_USE_FALLBACK + /* If only kp_fd is in fallback poll set we don't use fallback... */ + if (pip->fallback_poll_set_size == 1) + pip->fallback_poll_set_size = 0; + else + pip->poll_set_size++; /* kp_fd */ +#endif + + pip->lazy_updates = +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + 1 +#else + 0 +#endif + ; + + pip->pending_updates = +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + pending_updates +#else + 0 +#endif + ; + + pip->batch_updates = +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + 1 +#else + 0 +#endif + ; + + pip->concurrent_updates = +#if ERTS_POLL_USE_CONCURRENT_UPDATE + 1 +#else + 0 +#endif + ; + + pip->max_fds = max_fds; + +#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS + pip->no_avoided_wakeups = erts_smp_atomic_read(&ps->no_avoided_wakeups); + pip->no_avoided_interrupts = erts_smp_atomic_read(&ps->no_avoided_interrupts); + pip->no_interrupt_timed = erts_smp_atomic_read(&ps->no_interrupt_timed); +#endif + + ERTS_POLLSET_UNLOCK(ps); + +} + +/* + * Fatal error... + */ + +#ifndef ERTS_GOT_SIGUSR1 +# define ERTS_GOT_SIGUSR1 0 +#endif + +static void +fatal_error(char *format, ...) +{ + va_list ap; + + if (ERTS_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) { + /* + * Crash dump writing and reception of sigusr1 (which will + * result in a crash dump) closes all file descriptors. This + * typically results in a fatal error for erts_poll() (wakeup + * pipes and kernel poll fds are closed). + * + * We ignore the error and let the crash dump writing continue... + */ + return; + } + va_start(ap, format); + erts_vfprintf(stderr, format, ap); + va_end(ap); + abort(); +} + +static void +fatal_error_async_signal_safe(char *error_str) +{ + if (ERTS_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) { + /* See comment above in fatal_error() */ + return; + } + if (error_str) { + int len = 0; + while (error_str[len]) + len++; + if (len) + (void) write(2, error_str, len); /* async signal safe */ + } + abort(); +} + +/* + * --- Debug ----------------------------------------------------------------- + */ + +void +ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, + ErtsPollEvents ev[], + int len) +{ + int fd; + ERTS_POLLSET_LOCK(ps); + for (fd = 0; fd < len; fd++) { + if (fd >= ps->fds_status_len) + ev[fd] = 0; + else { + ev[fd] = ps->fds_status[fd].events; +#if ERTS_POLL_USE_WAKEUP_PIPE + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + ev[fd] |= ERTS_POLL_EV_NVAL; +#endif +#if ERTS_POLL_USE_KERNEL_POLL + if (fd == ps->kp_fd) + ev[fd] |= ERTS_POLL_EV_NVAL; +#endif + } + } + ERTS_POLLSET_UNLOCK(ps); + +} + +#ifdef HARD_DEBUG + +static void +check_poll_result(ErtsPollResFd pr[], int len) +{ + int i, j; + + for (i = 0; i < len; i++) { + ASSERT(pr[i].fd >= 0); + ASSERT(pr[i].fd < max_fds); + for (j = 0; j < len; j++) { + ASSERT(i == j || pr[i].fd != pr[j].fd); + } + } +} + + +#if ERTS_POLL_USE_DEVPOLL + +static void +check_poll_status(ErtsPollSet ps) +{ + int i; + for (i = 0; i < ps->fds_status_len; i++) { + int ires; + struct pollfd dp_fd; + short events = ERTS_POLL_EV_E2N(ps->fds_status[i].events); + + dp_fd.fd = i; + dp_fd.events = (short) 0; + dp_fd.revents = (short) 0; + + ires = ioctl(ps->kp_fd, DP_ISPOLLED, &dp_fd); + + if (ires == 0) { + ASSERT(!events); + } + else if (ires == 1) { + ASSERT(events); + ASSERT(events == dp_fd.revents); + } + else { + ASSERT(0); + } + ASSERT(dp_fd.fd == i); + ASSERT(ps->fds_status[i].events == ps->fds_status[i].used_events); + } +} + +#endif /* ERTS_POLL_USE_DEVPOLL */ +#endif /* HARD_DEBUG */ + +#ifdef ERTS_POLL_DEBUG_PRINT +static void +print_misc_debug_info(void) +{ + erts_printf("erts_poll using: %s lazy_updates:%s batch_updates:%s\n", +#if ERTS_POLL_USE_KQUEUE + "kqueue" +#elif ERTS_POLL_USE_EPOLL + "epoll" +#elif ERTS_POLL_USE_DEVPOLL + "/dev/poll" +#endif +#if ERTS_POLL_USE_FALLBACK + "-" +#endif +#if ERTS_POLL_USE_POLL + "poll" +#elif ERTS_POLL_USE_SELECT + "select" +#endif + , +#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + "true" +#else + "false" +#endif + , +#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET + "true" +#else + "false" +#endif + ); + + erts_printf("ERTS_POLL_EV_IN=0x%x\n" + "ERTS_POLL_EV_OUT=0x%x\n" + "ERTS_POLL_EV_NVAL=0x%x\n" + "ERTS_POLL_EV_ERR=0x%x\n", + ERTS_POLL_EV_IN, + ERTS_POLL_EV_OUT, + ERTS_POLL_EV_NVAL, + ERTS_POLL_EV_ERR); + +#ifdef FD_SETSIZE + erts_printf("FD_SETSIZE=%d\n", FD_SETSIZE); +#endif +} + +#endif diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h new file mode 100644 index 0000000000..725a77a152 --- /dev/null +++ b/erts/emulator/sys/common/erl_poll.h @@ -0,0 +1,246 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Poll interface suitable for ERTS with or without + * SMP support. + * + * Author: Rickard Green + */ + +#ifndef ERL_POLL_H__ +#define ERL_POLL_H__ + +#include "sys.h" + +#if 0 +#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS +#endif + +#ifdef ERTS_ENABLE_KERNEL_POLL +# if defined(ERTS_KERNEL_POLL_VERSION) +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _kp +# else +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _nkp +# undef ERTS_POLL_DISABLE_KERNEL_POLL +# define ERTS_POLL_DISABLE_KERNEL_POLL +# endif +#else +# define ERTS_POLL_EXPORT(FUNC) FUNC +# undef ERTS_POLL_DISABLE_KERNEL_POLL +# define ERTS_POLL_DISABLE_KERNEL_POLL +#endif + +#ifdef ERTS_POLL_DISABLE_KERNEL_POLL +# undef HAVE_SYS_EPOLL_H +# undef HAVE_SYS_EVENT_H +# undef HAVE_SYS_DEVPOLL_H +#endif + +#undef ERTS_POLL_USE_KERNEL_POLL +#define ERTS_POLL_USE_KERNEL_POLL 0 + +#undef ERTS_POLL_USE_KQUEUE +#define ERTS_POLL_USE_KQUEUE 0 +#undef ERTS_POLL_USE_EPOLL +#define ERTS_POLL_USE_EPOLL 0 +#undef ERTS_POLL_USE_DEVPOLL +#define ERTS_POLL_USE_DEVPOLL 0 +#undef ERTS_POLL_USE_POLL +#define ERTS_POLL_USE_POLL 0 +#undef ERTS_POLL_USE_SELECT +#define ERTS_POLL_USE_SELECT 0 + +#if defined(HAVE_SYS_EVENT_H) +# undef ERTS_POLL_USE_KQUEUE +# define ERTS_POLL_USE_KQUEUE 1 +# undef ERTS_POLL_USE_KERNEL_POLL +# define ERTS_POLL_USE_KERNEL_POLL 1 +#elif defined(HAVE_SYS_EPOLL_H) +# undef ERTS_POLL_USE_EPOLL +# define ERTS_POLL_USE_EPOLL 1 +# undef ERTS_POLL_USE_KERNEL_POLL +# define ERTS_POLL_USE_KERNEL_POLL 1 +#elif defined(HAVE_SYS_DEVPOLL_H) +# undef ERTS_POLL_USE_DEVPOLL +# define ERTS_POLL_USE_DEVPOLL 1 +# undef ERTS_POLL_USE_KERNEL_POLL +# define ERTS_POLL_USE_KERNEL_POLL 1 +#endif + +#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) + +#if !ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_FALLBACK +# if defined(ERTS_USE_POLL) +# undef ERTS_POLL_USE_POLL +# define ERTS_POLL_USE_POLL 1 +# elif !defined(__WIN32__) +# undef ERTS_POLL_USE_SELECT +# define ERTS_POLL_USE_SELECT 1 +# endif +#endif + +typedef Uint32 ErtsPollEvents; +#undef ERTS_POLL_EV_E2N + +#if defined(__WIN32__) /* --- win32 ------------------------------- */ + +#define ERTS_POLL_EV_IN 1 +#define ERTS_POLL_EV_OUT 2 +#define ERTS_POLL_EV_ERR 4 +#define ERTS_POLL_EV_NVAL 8 + +#elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ + +#include + +#define ERTS_POLL_EV_E2N(EV) \ + ((__uint32_t) (EV)) +#define ERTS_POLL_EV_N2E(EV) \ + ((ErtsPollEvents) (EV)) + +#define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(EPOLLIN) +#define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(EPOLLOUT) +#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(EPOLLET) +#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(EPOLLERR|EPOLLHUP) + +#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ + +#include + +#define ERTS_POLL_EV_E2N(EV) \ + ((short) ((EV) & ~((~((ErtsPollEvents) 0)) << 8*SIZEOF_SHORT))) +#define ERTS_POLL_EV_N2E(EV) \ + ((ErtsPollEvents) ((unsigned short) (EV))) + +#define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(POLLIN) +#define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(POLLOUT) +#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(POLLNVAL) +#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) + +#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ +/* Kqueue use fallback defines (poll() or select()) */ +#endif + +#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ + +#include + +#define ERTS_POLL_EV_NKP_E2N(EV) \ + ((short) ((EV) & ~((~((ErtsPollEvents) 0)) << 8*SIZEOF_SHORT))) +#define ERTS_POLL_EV_NKP_N2E(EV) \ + ((ErtsPollEvents) ((unsigned short) (EV))) + +/* At least on FreeBSD, we need POLLRDNORM for normal files, not POLLIN. */ +/* Whether this is a bug in FreeBSD, I don't know. */ +#ifdef POLLRDNORM +#define ERTS_POLL_EV_NKP_IN ERTS_POLL_EV_N2E(POLLIN|POLLRDNORM) +#else +#define ERTS_POLL_EV_NKP_IN ERTS_POLL_EV_N2E(POLLIN) +#endif +#define ERTS_POLL_EV_NKP_OUT ERTS_POLL_EV_N2E(POLLOUT) +#define ERTS_POLL_EV_NKP_NVAL ERTS_POLL_EV_N2E(POLLNVAL) +#define ERTS_POLL_EV_NKP_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) + +#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ + +#define ERTS_POLL_EV_NKP_E2N(EV) (EV) +#define ERTS_POLL_EV_NKP_N2E(EV) (EV) + +#define ERTS_POLL_EV_NKP_IN (((ErtsPollEvents) 1) << 0) +#define ERTS_POLL_EV_NKP_OUT (((ErtsPollEvents) 1) << 1) +#define ERTS_POLL_EV_NKP_NVAL (((ErtsPollEvents) 1) << 2) +#define ERTS_POLL_EV_NKP_ERR (((ErtsPollEvents) 1) << 3) + +#endif /* ----------------------------------------- */ + + +#if !defined(ERTS_POLL_EV_E2N) && defined(ERTS_POLL_EV_NKP_E2N) +/* poll(), select(), and kqueue() */ + +#define ERTS_POLL_EV_E2N(EV) ERTS_POLL_EV_NKP_E2N((EV)) +#define ERTS_POLL_EV_N2E(EV) ERTS_POLL_EV_NKP_N2E((EV)) + +#define ERTS_POLL_EV_IN ERTS_POLL_EV_NKP_IN +#define ERTS_POLL_EV_OUT ERTS_POLL_EV_NKP_OUT +#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_NKP_NVAL +#define ERTS_POLL_EV_ERR ERTS_POLL_EV_NKP_ERR + +#endif + +typedef struct ErtsPollSet_ *ErtsPollSet; + +typedef struct { + ErtsSysFdType fd; + ErtsPollEvents events; + int on; +} ErtsPollControlEntry; + +typedef struct { + ErtsSysFdType fd; + ErtsPollEvents events; +} ErtsPollResFd; + +typedef struct { + char *primary; + char *fallback; + char *kernel_poll; + Uint memory_size; + int poll_set_size; + int fallback_poll_set_size; + int lazy_updates; + int pending_updates; + int batch_updates; + int concurrent_updates; + int max_fds; +#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS + long no_avoided_wakeups; + long no_avoided_interrupts; + long no_interrupt_timed; +#endif +} ErtsPollInfo; + +void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet, + int); +void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet, + int, + long); +ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet, + ErtsSysFdType, + ErtsPollEvents, + int on, + int* wake_poller); +void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet, + ErtsPollControlEntry [], + int on); +int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet, + ErtsPollResFd [], + int *, + SysTimeval *); +int ERTS_POLL_EXPORT(erts_poll_max_fds)(void); +void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet, + ErtsPollInfo *); +ErtsPollSet ERTS_POLL_EXPORT(erts_poll_create_pollset)(void); +void ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet); +void ERTS_POLL_EXPORT(erts_poll_init)(void); +void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet, + ErtsPollEvents [], + int); + +#endif /* #ifndef ERL_POLL_H__ */ -- cgit v1.2.3