diff options
Diffstat (limited to 'erts/emulator/sys/common/erl_check_io.c')
-rw-r--r-- | erts/emulator/sys/common/erl_check_io.c | 3400 |
1 files changed, 1593 insertions, 1807 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 799f67fc45..fb18c837ab 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2017. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,6 @@ #endif #define ERL_CHECK_IO_C__ -#define ERTS_WANT_BREAK_HANDLING #ifndef WANT_NONBLOCKING # define WANT_NONBLOCKING #endif @@ -44,76 +43,101 @@ #define ERTS_WANT_TIMER_WHEEL_API #include "erl_time.h" +#if 0 +#define DEBUG_PRINT(FMT, ...) do { erts_printf(FMT "\r\n", ##__VA_ARGS__); fflush(stdout); } while(0) +#define DEBUG_PRINT_FD(FMT, STATE, ...) \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ + (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ + ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ + ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) +#define DEBUG_PRINT_MODE +#else +#define DEBUG_PRINT(...) +#endif + +#ifndef DEBUG_PRINT_FD +#define DEBUG_PRINT_FD(...) +#endif + #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS # include "safe_hash.h" # define DRV_EV_STATE_HTAB_SIZE 1024 #endif -typedef char EventStateType; -#define ERTS_EV_TYPE_NONE ((EventStateType) 0) -#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */ -#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */ -#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */ -#define ERTS_EV_TYPE_NIF ((EventStateType) 4) /* enif_select */ -#define ERTS_EV_TYPE_STOP_NIF ((EventStateType) 5) /* pending nif stop */ - -typedef char EventStateFlags; -#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ -#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2) -#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4) - -#ifdef DEBUG -# define ERTS_ACTIVE_FD_INC 2 +typedef enum { + ERTS_EV_TYPE_NONE = 0, + ERTS_EV_TYPE_DRV_SEL = 1, /* driver_select */ + ERTS_EV_TYPE_STOP_USE = 2, /* pending stop_select */ + ERTS_EV_TYPE_NIF = 3, /* enif_select */ + ERTS_EV_TYPE_STOP_NIF = 4 /* pending nif stop */ +} EventStateType; + +typedef enum { + ERTS_EV_FLAG_CLEAR = 0, + ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ #else -# define ERTS_ACTIVE_FD_INC 128 + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, #endif - -#define ERTS_CHECK_IO_POLL_RES_LEN 512 - -#if defined(ERTS_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp -#elif defined(ERTS_NO_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd + and it was put in the nkp version */ #else -# define ERTS_CIO_EXPORT(FUNC) FUNC -#endif + ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, +#endif + + /* Combinations */ + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER +} EventStateFlags; + +#define flag2str(flags) \ + ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ + ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ + ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) + +/* How many events that can be handled at once by one erts_poll_wait call */ +#define ERTS_CHECK_IO_POLL_RES_LEN 512 -#define ERTS_CIO_HAVE_DRV_EVENT \ - (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL) +/* Each I/O Poll Thread has one ErtsPollThread each. The ps field + can point to either a private ErtsPollSet or a shared one. + At the moment only kqueue and epoll pollsets can be + shared across threads. +*/ +typedef struct erts_poll_thread +{ + ErtsPollSet *ps; + ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; + int pollres_len; +} ErtsPollThread; -#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control) -#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv) -#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait) -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt) +/* pollsetv contains pointers to the ErtsPollSets that are in use. + * Which pollset to use is determined by hashing the fd. + */ +static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; +#if ERTS_POLL_USE_FALLBACK +static ErtsPollSet *flbk_pollset; +#endif +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; #endif -#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt) -#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed) -#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset) -#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset) -#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds) -#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init) -#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info) - -#define GET_FD(fd) fd - -static struct pollset_info -{ - ErtsPollSet ps; - erts_smp_atomic_t in_poll_wait; /* set while doing poll */ - struct { - int six; /* start index */ - int eix; /* end index */ - erts_smp_atomic32_t no; - int size; - ErtsSysFdType *array; - } active_fd; -#ifdef ERTS_SMP - struct removed_fd* removed_list; /* list of deselected fd's*/ - erts_smp_spinlock_t removed_list_lock; -#endif -}pollset; -#define NUM_OF_POLLSETS 1 typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -122,98 +146,153 @@ typedef struct { ErtsSysFdType fd; struct { ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */ -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ -#endif ErtsNifSelectDataState *nif; /* ERTS_EV_TYPE_NIF */ union { erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ - }stop; + } stop; } driver; - ErtsPollEvents events; - unsigned short remove_cnt; /* number of removed_fd's referring to this fd */ + ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; -#ifdef ERTS_SMP -struct removed_fd { - struct removed_fd *next; +struct drv_ev_state_shared { + + union { + erts_mtx_t lck; + byte _cache_line_alignment[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_mtx_t))]; + } locks[ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT]; + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - ErtsSysFdType fd; + int max_fds; + erts_atomic_t len; + ErtsDrvEventState *v; + erts_mtx_t grow_lock; /* prevent lock-hogging of racing growers */ #else - ErtsDrvEventState* state; - #ifdef DEBUG - ErtsSysFdType fd; - #endif + SafeHash tab; + int num_prealloc; + ErtsDrvEventState *prealloc_first; + erts_spinlock_t prealloc_lock; #endif - }; -#endif -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static int max_fds = -1; -#endif -#define DRV_EV_STATE_LOCK_CNT 16 -static union { - erts_smp_mtx_t lck; - byte _cache_line_alignment[64]; -}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT]; +int ERTS_WRITE_UNLIKELY(erts_no_pollsets) = 1; +int ERTS_WRITE_UNLIKELY(erts_no_poll_threads) = 1; +struct drv_ev_state_shared drv_ev_state; -#ifdef ERTS_SMP -static ERTS_INLINE erts_smp_mtx_t* fd_mtx(ErtsSysFdType fd) -{ +static ERTS_INLINE int fd_hash(ErtsSysFdType fd) { int hash = (int)fd; # ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS hash ^= (hash >> 9); # endif - return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck; + return hash; +} + +static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd) +{ + return &drv_ev_state.locks[fd_hash(fd) % ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT].lck; } -#else -# define fd_mtx(fd) NULL -#endif #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static erts_smp_atomic_t drv_ev_state_len; -static ErtsDrvEventState *drv_ev_state; -static erts_smp_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */ +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) +{ + return &drv_ev_state.v[(int) fd]; +} -#else -static SafeHash drv_ev_state_tab; -static int num_state_prealloc; -static ErtsDrvEventState *state_prealloc_first; -erts_smp_spinlock_t state_prealloc_lock; +#define new_drv_ev_state(State, fd) (State) +#define erase_drv_ev_state(State) + +static ERTS_INLINE int grow_drv_ev_state(ErtsSysFdType fd) { + int i; + int old_len; + int new_len; + + if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state.len)) { + + if (fd < 0 || fd >= drv_ev_state.max_fds) + return 0; -static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd) + erts_mtx_lock(&drv_ev_state.grow_lock); + old_len = erts_atomic_read_nob(&drv_ev_state.len); + if (fd >= old_len) { + new_len = erts_poll_new_table_len(old_len, fd + 1); + if (new_len > drv_ev_state.max_fds) + new_len = drv_ev_state.max_fds; + + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ + erts_mtx_lock(&drv_ev_state.locks[i].lck); + } + drv_ev_state.v = (drv_ev_state.v + ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, + drv_ev_state.v, + sizeof(ErtsDrvEventState)*new_len) + : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, + sizeof(ErtsDrvEventState)*new_len)); + ERTS_CT_ASSERT(ERTS_EV_TYPE_NONE == 0); + sys_memzero(drv_ev_state.v+old_len, + sizeof(ErtsDrvEventState) * (new_len - old_len)); + for (i = old_len; i < new_len; i++) { + drv_ev_state.v[i].fd = (ErtsSysFdType) i; + } + erts_atomic_set_nob(&drv_ev_state.len, new_len); + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + } + /*else already grown by racing thread */ + + erts_mtx_unlock(&drv_ev_state.grow_lock); + } + return 1; +} + +static int drv_ev_state_len(void) +{ + return erts_atomic_read_nob(&drv_ev_state.len); +} + +#else /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ + +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) { ErtsDrvEventState tmpl; tmpl.fd = fd; - return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl); + return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state.tab, (void *) &tmpl); } -static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd) +static ERTS_INLINE ErtsDrvEventState* new_drv_ev_state(ErtsDrvEventState *state, + ErtsSysFdType fd) { ErtsDrvEventState tmpl; + + if (state) + return state; + tmpl.fd = fd; tmpl.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - tmpl.driver.event = NULL; -#endif tmpl.driver.nif = NULL; tmpl.driver.stop.drv_ptr = NULL; tmpl.events = 0; - tmpl.remove_cnt = 0; + tmpl.active_events = 0; tmpl.type = ERTS_EV_TYPE_NONE; tmpl.flags = 0; - return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl); + + return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state.tab, (void *) &tmpl); } -static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state) +static ERTS_INLINE void erase_drv_ev_state(ErtsDrvEventState *state) { - ASSERT(state->remove_cnt == 0); - safe_hash_erase(&drv_ev_state_tab, (void *) state); + safe_hash_erase(&drv_ev_state.tab, (void *) state); +} + +static int drv_ev_state_len(void) +{ + return erts_atomic_read_nob(&drv_ev_state.tab.nitems); } #endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ @@ -233,52 +312,47 @@ static void print_nif_select_op(erts_dsprintf_buf_t*, ErtsSysFdType, static void drv_select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int); static void nif_select_large_fd_error(ErtsSysFdType, int, ErtsResource*,Eterm ref); #endif -#if ERTS_CIO_HAVE_DRV_EVENT -static void drv_event_steal(ErlDrvPort ix, ErtsDrvEventState *state, - ErlDrvEventData event_data); -static void print_drv_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort, ErtsSysFdType, ErlDrvEventData); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData); -#endif -#endif static void steal_pending_stop_use(erts_dsprintf_buf_t*, ErlDrvPort, ErtsDrvEventState*, int mode, int on); static void steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource*, ErtsDrvEventState *state, int mode, int on); - -#ifdef ERTS_SMP -ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST) +static ERTS_INLINE void +check_fd_cleanup(ErtsDrvEventState *state, + ErtsDrvSelectDataState **free_select, + ErtsNifSelectDataState **free_nif); +static ERTS_INLINE void iready(Eterm id, ErtsDrvEventState *state); +static ERTS_INLINE void oready(Eterm id, ErtsDrvEventState *state); +#ifdef DEBUG_PRINT_MODE +static char *drvmode2str(int mode); +static char *nifmode2str(enum ErlNifSelectFlags mode); #endif static ERTS_INLINE void -init_iotask(ErtsIoTask *io_task) +init_iotask(ErtsIoTask *io_task, ErtsSysFdType fd) { erts_port_task_handle_init(&io_task->task); - erts_smp_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0)); + io_task->fd = fd; } static ERTS_INLINE int -is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time) -{ +is_iotask_active(ErtsIoTask *io_task) +{ if (erts_port_task_is_scheduled(&io_task->task)) return 1; - if (erts_smp_atomic_read_nob(&io_task->executed_time) == current_cio_time) - return 1; return 0; } static ERTS_INLINE ErtsDrvSelectDataState * -alloc_drv_select_data(void) +alloc_drv_select_data(ErtsSysFdType fd) { ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, sizeof(ErtsDrvSelectDataState)); dsp->inport = NIL; dsp->outport = NIL; - init_iotask(&dsp->iniotask); - init_iotask(&dsp->outiotask); + init_iotask(&dsp->iniotask, fd); + init_iotask(&dsp->outiotask, fd); return dsp; } @@ -289,8 +363,6 @@ alloc_nif_select_data(void) sizeof(ErtsNifSelectDataState)); dsp->in.pid = NIL; dsp->out.pid = NIL; - dsp->in.ddeselect_cnt = 0; - dsp->out.ddeselect_cnt = 0; return dsp; } @@ -308,209 +380,176 @@ free_nif_select_data(ErtsNifSelectDataState *dsp) erts_free(ERTS_ALC_T_NIF_SEL_D_STATE, dsp); } -#if ERTS_CIO_HAVE_DRV_EVENT - -static ERTS_INLINE ErtsDrvEventDataState * -alloc_drv_event_data(void) +static ERTS_INLINE int +get_pollset_id(ErtsSysFdType fd) { - ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, - sizeof(ErtsDrvEventDataState)); - dep->port = NIL; - dep->data = NULL; - dep->removed_events = 0; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - dep->deferred_events = 0; -#endif - init_iotask(&dep->iotask); - return dep; + return fd_hash(fd) % erts_no_pollsets; } -static ERTS_INLINE void -free_drv_event_data(ErtsDrvEventDataState *dep) +static ERTS_INLINE ErtsPollSet * +get_pollset(ErtsSysFdType fd) { - ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task)); - erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep); + return pollsetv[get_pollset_id(fd)]; } -#endif /* ERTS_CIO_HAVE_DRV_EVENT */ - -static ERTS_INLINE void -remember_removed(ErtsDrvEventState *state, struct pollset_info* psi) +#if ERTS_POLL_USE_FALLBACK +static ERTS_INLINE ErtsPollSet * +get_fallback_pollset(void) { -#ifdef ERTS_SMP - struct removed_fd *fdlp; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - if (erts_smp_atomic_read_nob(&psi->in_poll_wait)) { - state->remove_cnt++; - ASSERT(state->remove_cnt > 0); - fdlp = removed_fd_alloc(); - #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG) - fdlp->fd = state->fd; - #endif - #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - fdlp->state = state; - #endif - erts_smp_spin_lock(&psi->removed_list_lock); - fdlp->next = psi->removed_list; - psi->removed_list = fdlp; - erts_smp_spin_unlock(&psi->removed_list_lock); - } -#endif + return flbk_pollset; } +#endif - -static ERTS_INLINE int -is_removed(ErtsDrvEventState *state) -{ -#ifdef ERTS_SMP - /* Note that there is a possible race here, where an fd is removed - (increasing remove_cnt) and then added again just before erts_poll_wait - is called by erts_check_io. Any polled event on the re-added fd will then - be falsely ignored. But that does not matter, as the event will trigger - again next time erl_check_io is called. */ - return state->remove_cnt > 0; +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; #else - return 0; + return get_pollset(fd); #endif } -static void -forget_removed(struct pollset_info* psi) +/* + * Place a fd within a pollset. This will automatically use + * the fallback ps if needed. + */ +static ERTS_INLINE ErtsPollEvents +erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, + ErtsPollEvents pe, int *wake_poller) { -#ifdef ERTS_SMP - struct removed_fd* fdlp; - struct removed_fd* tofree; - - /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */ + ErtsSysFdType fd = state->fd; + ErtsPollEvents res = 0; + EventStateFlags flags = state->flags; - erts_smp_spin_lock(&psi->removed_list_lock); - fdlp = psi->removed_list; - psi->removed_list = NULL; - erts_smp_spin_unlock(&psi->removed_list_lock); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); - while (fdlp) { - ErtsResource* resource = NULL; - erts_driver_t* drv_ptr = NULL; - erts_smp_mtx_t* mtx; - ErtsSysFdType fd; - ErtsDrvEventState *state; + if (!(flags & ERTS_EV_FLAG_FALLBACK)) { -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - fd = fdlp->fd; - mtx = fd_mtx(fd); - erts_smp_mtx_lock(mtx); - state = &drv_ev_state[(int) fd]; -#else - state = fdlp->state; - fd = state->fd; - ASSERT(fd == fdlp->fd); - mtx = fd_mtx(fd); - erts_smp_mtx_lock(mtx); -#endif - ASSERT(state->remove_cnt > 0); - if (--state->remove_cnt == 0) { - switch (state->type) { - case ERTS_EV_TYPE_STOP_NIF: - /* Now we can call stop */ - resource = state->driver.stop.resource; - state->driver.stop.resource = NULL; - ASSERT(resource); - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - goto case_ERTS_EV_TYPE_NONE; - - case ERTS_EV_TYPE_STOP_USE: - /* Now we can call stop_select */ - drv_ptr = state->driver.stop.drv_ptr; - ASSERT(drv_ptr); - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.stop.drv_ptr = NULL; - /* Fall through */ - case ERTS_EV_TYPE_NONE: - case_ERTS_EV_TYPE_NONE: -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - hash_erase_drv_ev_state(state); -#endif - break; - case ERTS_EV_TYPE_DRV_SEL: - case ERTS_EV_TYPE_DRV_EV: - break; - default: - ASSERT(0); - } - } - erts_smp_mtx_unlock(mtx); - if (drv_ptr) { - int was_unmasked = erts_block_fpe(); - DTRACE1(driver_stop_select, drv_ptr->name); - LTTNG1(driver_stop_select, drv_ptr->name); - (*drv_ptr->stop_select) ((ErlDrvEvent) fd, NULL); - erts_unblock_fpe(was_unmasked); - if (drv_ptr->handle) { - erts_ddll_dereference_driver(drv_ptr->handle); - } - } - if (resource) { - erts_resource_stop(resource, (ErlNifEvent)fd, 0); - enif_release_resource(resource->data); + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); } - tofree = fdlp; - fdlp = fdlp->next; - removed_fd_free(tofree); +#if ERTS_POLL_USE_FALLBACK + if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { + /* When an add fails with NVAL, the poll/kevent operation could not + put that fd in the pollset, so we instead put it into a fallback pollset */ + state->flags |= ERTS_EV_FLAG_FALLBACK; + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); + } + } else { + ASSERT(op != ERTS_POLL_OP_ADD); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); +#endif } -#endif /* ERTS_SMP */ + + return res; } -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -grow_drv_ev_state(int min_ix) +static ERTS_INLINE ErtsPollEvents +erts_io_control(ErtsDrvEventState *state, ErtsPollOp op, ErtsPollEvents pe) { - int i; - int old_len; - int new_len; + int wake_poller = 0; + return erts_io_control_wakeup(state, op, pe, &wake_poller); +} - erts_smp_mtx_lock(&drv_ev_state_grow_lock); - old_len = erts_smp_atomic_read_nob(&drv_ev_state_len); - if (min_ix >= old_len) { - new_len = erts_poll_new_table_len(old_len, min_ix + 1); - if (new_len > max_fds) - new_len = max_fds; +/* ToDo: Was inline in erl_check_io.h but now need struct erts_poll_thread */ +void +erts_io_notify_port_task_executed(ErtsPortTaskType type, + ErtsPortTaskHandle *pthp, + void (*reset_handle)(ErtsPortTaskHandle *)) +{ + ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); + ErtsSysFdType fd = itp->fd; + erts_mtx_t *mtx = fd_mtx(fd); + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; + ErtsDrvEventState *state; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ - erts_smp_mtx_lock(&drv_ev_state_locks[i].lck); - } - drv_ev_state = (drv_ev_state - ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, - drv_ev_state, - sizeof(ErtsDrvEventState)*new_len) - : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, - sizeof(ErtsDrvEventState)*new_len)); - for (i = old_len; i < new_len; i++) { - drv_ev_state[i].fd = (ErtsSysFdType) i; - drv_ev_state[i].driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - drv_ev_state[i].driver.event = NULL; -#endif - drv_ev_state[i].driver.stop.drv_ptr = NULL; - drv_ev_state[i].driver.nif = NULL; - drv_ev_state[i].events = 0; - drv_ev_state[i].remove_cnt = 0; - drv_ev_state[i].type = ERTS_EV_TYPE_NONE; - drv_ev_state[i].flags = 0; - } - erts_smp_atomic_set_nob(&drv_ev_state_len, new_len); - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { - erts_smp_mtx_unlock(&drv_ev_state_locks[i].lck); - } + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + + erts_mtx_lock(mtx); + state = get_drv_ev_state(fd); + + reset_handle(pthp); + + active_events = state->active_events; + + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: + + DEBUG_PRINT_FD("executed ready_output", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } + + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } + + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + active_events = 0; + } } - /*else already grown by racing thread */ - erts_smp_mtx_unlock(&drv_ev_state_grow_lock); -} -#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */ + if (!active_events) + check_fd_cleanup(state, &free_select, &free_nif); + + erts_mtx_unlock(mtx); + + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); + ERTS_MSACC_POP_STATE_M_X(); +} static ERTS_INLINE void abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type) @@ -527,13 +566,6 @@ abort_tasks(ErtsDrvEventState *state, int mode) switch (mode) { case 0: check_type: switch (state->type) { -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - abort_task(state->driver.event->port, - &state->driver.event->iotask.task, - ERTS_EV_TYPE_DRV_EV); - return; -#endif case ERTS_EV_TYPE_NIF: case ERTS_EV_TYPE_NONE: return; @@ -560,19 +592,107 @@ abort_tasks(ErtsDrvEventState *state, int mode) } } +static void prepare_select_msg(struct erts_nif_select_event* e, + enum ErlNifSelectFlags mode, + Eterm recipient, + ErtsResource* resource, + Eterm msg, + ErlNifEnv* msg_env, + Eterm event_atom) +{ + ErtsMessage* mp; + Eterm* hp; + Uint hsz; + + if (is_not_nil(e->pid)) { + ASSERT(e->mp); + erts_cleanup_messages(e->mp); + } + + if (mode & ERL_NIF_SELECT_CUSTOM_MSG) { + if (msg_env) { + mp = erts_create_message_from_nif_env(msg_env); + ERL_MESSAGE_TERM(mp) = msg; + } + else { + hsz = size_object(msg); + mp = erts_alloc_message(hsz, &hp); + ERL_MESSAGE_TERM(mp) = copy_struct(msg, hsz, &hp, &mp->hfrag.off_heap); + } + } + else { + ErtsBinary* bin; + Eterm resource_term, ref_term, tuple; + Eterm* hp_start; + + /* {select, Resource, Ref, EventAtom} */ + hsz = 5 + ERTS_MAGIC_REF_THING_SIZE; + if (is_internal_ref(msg)) + hsz += ERTS_REF_THING_SIZE; + else + ASSERT(is_immed(msg)); + + mp = erts_alloc_message(hsz, &hp); + hp_start = hp; + + bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); + resource_term = erts_mk_magic_ref(&hp, &mp->hfrag.off_heap, &bin->binary); + if (is_internal_ref(msg)) { + Uint32* refn = internal_ref_numbers(msg); + write_ref_thing(hp, refn[0], refn[1], refn[2]); + ref_term = make_internal_ref(hp); + hp += ERTS_REF_THING_SIZE; + } + else { + ASSERT(is_immed(msg)); + ref_term = msg; + } + tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom); + hp += 5; + ERL_MESSAGE_TERM(mp) = tuple; + ASSERT(hp == hp_start + hsz); (void)hp_start; + } + + ASSERT(is_not_nil(recipient)); + e->pid = recipient; + e->mp = mp; +} + +static ERTS_INLINE void send_select_msg(struct erts_nif_select_event* e) +{ + Process* rp = erts_proc_lookup(e->pid); + + ASSERT(is_internal_pid(e->pid)); + if (!rp) { + erts_cleanup_messages(e->mp); + return; + } + + erts_queue_message(rp, 0, e->mp, ERL_MESSAGE_TERM(e->mp), am_system); +} + +static void clear_select_event(struct erts_nif_select_event* e) +{ + if (is_not_nil(e->pid)) { + /* Discard unsent message */ + ASSERT(e->mp); + erts_cleanup_messages(e->mp); + e->mp = NULL; + e->pid = NIL; + } +} + static void deselect(ErtsDrvEventState *state, int mode) { - int do_wake = 0; ErtsPollEvents rm_events; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - ASSERT(state->events); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); abort_tasks(state, mode); - if (!mode) + if (!mode) { rm_events = state->events; - else { + } else { rm_events = 0; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); if (mode & ERL_DRV_READ) { @@ -585,67 +705,63 @@ deselect(ErtsDrvEventState *state, int mode) } } - state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake); + state->events &= ~rm_events; + state->active_events &= ~rm_events; if (!(state->events)) { + erts_io_control(state, ERTS_POLL_OP_DEL, 0); switch (state->type) { case ERTS_EV_TYPE_NIF: - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - enif_release_resource(state->driver.stop.resource); + clear_select_event(&state->driver.nif->in); + clear_select_event(&state->driver.nif->out); + enif_release_resource(state->driver.stop.resource->data); state->driver.stop.resource = NULL; break; case ERTS_EV_TYPE_DRV_SEL: state->driver.select->inport = NIL; state->driver.select->outport = NIL; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - break; -#endif case ERTS_EV_TYPE_NONE: break; default: ASSERT(0); break; } - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - remember_removed(state, &pollset); + state->flags = 0; + } else { + ErtsPollEvents new_events = + erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0) +# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE) #else # define IS_FD_UNKNOWN(state) ((state) == NULL) #endif static ERTS_INLINE void check_fd_cleanup(ErtsDrvEventState *state, -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState **free_event, -#endif ErtsDrvSelectDataState **free_select, ErtsNifSelectDataState **free_nif) { - erts_aint_t current_cio_time; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - - current_cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); *free_select = NULL; if (state->driver.select && (state->type != ERTS_EV_TYPE_DRV_SEL) - && !is_iotask_active(&state->driver.select->iniotask, current_cio_time) - && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - + && !is_iotask_active(&state->driver.select->iniotask) + && !is_iotask_active(&state->driver.select->outiotask)) { + *free_select = state->driver.select; state->driver.select = NULL; } @@ -656,382 +772,87 @@ check_fd_cleanup(ErtsDrvEventState *state, state->driver.nif = NULL; } -#if ERTS_CIO_HAVE_DRV_EVENT - *free_event = NULL; - if (state->driver.event - && (state->type != ERTS_EV_TYPE_DRV_EV) - && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) { - - *free_event = state->driver.event; - state->driver.event = NULL; - } -#endif - -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS if (((state->type != ERTS_EV_TYPE_NONE) - | state->remove_cnt -#if ERTS_CIO_HAVE_DRV_EVENT - | (state->driver.event != NULL) -#endif + | (state->driver.nif != NULL) | (state->driver.select != NULL)) == 0) { - hash_erase_drv_ev_state(state); - + erase_drv_ev_state(state); } -#endif } -static ERTS_INLINE int -check_cleanup_active_fd(ErtsSysFdType fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollControlEntry *pce, - int *pce_ix, -#endif - erts_aint_t current_cio_time, - int may_sleep) -{ - ErtsDrvEventState *state; - int active = 0; - erts_smp_mtx_t *mtx = fd_mtx(fd); - void *free_select = NULL; - void *free_nif = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - void *free_event = NULL; -#endif -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evon = 0, evoff = 0; -#endif - - erts_smp_mtx_lock(mtx); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; +#ifdef __WIN32__ +# define MUST_DEFER(MAY_SLEEP) 1 #else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ - if (state) -#endif - { - if (state->driver.select) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_IN) - && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) { - evoff |= ERTS_POLL_EV_IN; - state->flags |= ERTS_EV_FLAG_DEFER_IN_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) { - if (state->events & ERTS_POLL_EV_IN) - evon |= ERTS_POLL_EV_IN; - state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV; - } - if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_OUT) - && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) { - evoff |= ERTS_POLL_EV_OUT; - state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) { - if (state->events & ERTS_POLL_EV_OUT) - evon |= ERTS_POLL_EV_OUT; - state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV; - } - if (active) - (void) 0; - else -#else - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time) - || is_iotask_active(&state->driver.select->outiotask, current_cio_time)) - active = 1; - else -#endif - if (state->type != ERTS_EV_TYPE_DRV_SEL) { - free_select = state->driver.select; - state->driver.select = NULL; - } - } - - if (state->driver.nif) { - ErtsPollEvents rm_events = 0; - if (state->driver.nif->in.ddeselect_cnt) { - ASSERT(state->type == ERTS_EV_TYPE_NIF); - ASSERT(state->events & ERTS_POLL_EV_IN); - ASSERT(is_nil(state->driver.nif->in.pid)); - if (may_sleep || state->driver.nif->in.ddeselect_cnt == 1) { - rm_events = ERTS_POLL_EV_IN; - state->driver.nif->in.ddeselect_cnt = 0; - } - } - if (state->driver.nif->out.ddeselect_cnt) { - ASSERT(state->type == ERTS_EV_TYPE_NIF); - ASSERT(state->events & ERTS_POLL_EV_OUT); - ASSERT(is_nil(state->driver.nif->out.pid)); - if (may_sleep || state->driver.nif->out.ddeselect_cnt == 1) { - rm_events |= ERTS_POLL_EV_OUT; - state->driver.nif->out.ddeselect_cnt = 0; - } - } - if (rm_events) { - int do_wake = 0; - state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, - rm_events, 0, &do_wake); - } - if (state->events) - active = 1; - else if (state->type != ERTS_EV_TYPE_NIF) { - free_nif = state->driver.nif; - state->driver.nif = NULL; - } - } - -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) { - if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events; - if (evs) { - evoff |= evs; - state->driver.event->deferred_events |= evs; - } -#endif - active = 1; - } - else if (state->type != ERTS_EV_TYPE_DRV_EV) { - free_event = state->driver.event; - state->driver.event = NULL; - } -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - else { - ErtsPollEvents evs = state->events & state->driver.event->deferred_events; - if (evs) { - evon |= evs; - state->driver.event->deferred_events = 0; - } - } -#endif - - } -#endif - -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0) - hash_erase_drv_ev_state(state); +# define MUST_DEFER(MAY_SLEEP) (MAY_SLEEP) #endif - } - - erts_smp_mtx_unlock(mtx); - - if (free_select) - free_drv_select_data(free_select); - if (free_nif) - free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (evoff) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evoff; - pcep->on = 0; - } - if (evon) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evon; - pcep->on = 1; - } -#endif - - return active; -} - -static void -check_cleanup_active_fds(erts_aint_t current_cio_time, int may_sleep) -{ - int six = pollset.active_fd.six; - int eix = pollset.active_fd.eix; - erts_aint32_t no = erts_smp_atomic32_read_dirty(&pollset.active_fd.no); - int size = pollset.active_fd.size; - int ix = six; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - /* every fd might add two entries */ - Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no; - ErtsPollControlEntry *pctrl_entries = (pce_sz - ? erts_alloc(ERTS_ALC_T_TMP, pce_sz) - : NULL); - int pctrl_ix = 0; -#endif - - while (ix != eix) { - ErtsSysFdType fd = pollset.active_fd.array[ix]; - int nix = ix + 1; - if (nix >= size) - nix = 0; - ASSERT(fd != ERTS_SYS_FD_INVALID); - if (!check_cleanup_active_fd(fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - pctrl_entries, - &pctrl_ix, -#endif - current_cio_time, - may_sleep)) { - no--; - if (ix == six) { -#ifdef DEBUG - pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID; -#endif - six = nix; - } - else { - pollset.active_fd.array[ix] = pollset.active_fd.array[six]; -#ifdef DEBUG - pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID; -#endif - six++; - if (six >= size) - six = 0; - } - } - ix = nix; - } - -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry)); - if (pctrl_ix) - ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix); - if (pctrl_entries) - erts_free(ERTS_ALC_T_TMP, pctrl_entries); -#endif - - pollset.active_fd.six = six; - pollset.active_fd.eix = eix; - erts_smp_atomic32_set_relb(&pollset.active_fd.no, no); -} - -static void grow_active_fds(void) -{ - ASSERT(pollset.active_fd.six == pollset.active_fd.eix); - pollset.active_fd.six = 0; - pollset.active_fd.eix = pollset.active_fd.size; - pollset.active_fd.size += ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR, - pollset.active_fd.array, - pollset.active_fd.size*sizeof(ErtsSysFdType)); -#ifdef DEBUG - { - int i; - for (i = pollset.active_fd.eix + 1; i < pollset.active_fd.size; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; - } -#endif -} - -static ERTS_INLINE void -add_active_fd(ErtsSysFdType fd) -{ - int eix = pollset.active_fd.eix; - int size = pollset.active_fd.size; - - pollset.active_fd.array[eix] = fd; - - erts_smp_atomic32_set_relb(&pollset.active_fd.no, - (erts_smp_atomic32_read_dirty(&pollset.active_fd.no) - + 1)); - - eix++; - if (eix >= size) - eix = 0; - pollset.active_fd.eix = eix; - - if (pollset.active_fd.six == eix) { - grow_active_fds(); - } -} - int -ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, - ErlDrvEvent e, - int mode, - int on) +driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) { void (*stop_select_fn)(ErlDrvEvent, void*) = NULL; Port *prt = erts_drvport2port(ix); Eterm id = erts_drvport2id(ix); ErtsSysFdType fd = (ErtsSysFdType) e; ErtsPollEvents ctl_events = (ErtsPollEvents) 0; - ErtsPollEvents new_events, old_events; + ErtsPollEvents old_events; + ErtsPollEvents new_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int wake_poller; + int wake_poller = 0; int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event = NULL; -#endif ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; #ifdef USE_VM_PROBES DTRACE_CHARBUF(name, 64); #endif + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); - if (prt == ERTS_INVALID_ERL_DRV_PORT) + if (prt == ERTS_INVALID_ERL_DRV_PORT) { + ERTS_MSACC_POP_STATE(); return -1; + } - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)); + ERTS_LC_ASSERT(erts_lc_is_port_locked(prt)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) { - return -1; - } - if (fd >= max_fds) { - drv_select_large_fd_error(ix, fd, mode, on); - return -1; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) drv_select_large_fd_error(ix, fd, mode, on); + ERTS_MSACC_POP_STATE(); + return -1; } #endif - erts_smp_mtx_lock(fd_mtx(fd)); + erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ -#endif + state = get_drv_ev_state(fd); /* may be NULL! */ - if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - if (IS_FD_UNKNOWN(state)) { - /* fast track to stop_select callback */ - stop_select_fn = prt->drv_ptr->stop_select; -#ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, - sizeof(DTRACE_CHARBUF_NAME(name))-1); - name[sizeof(name)-1] = '\0'; -#endif - ret = 0; - goto done_unknown; - } - mode |= (ERL_DRV_READ | ERL_DRV_WRITE); - wake_poller = 1; /* to eject fd from pollset (if needed) */ - } - else wake_poller = 0; + DEBUG_PRINT_FD("driver_select(%T, %p, %s, %d)", + state, id, fd, drvmode2str(mode), on); -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state == NULL) { - state = hash_new_drv_ev_state(fd); + if (!on) { + if (IS_FD_UNKNOWN(state)) { + if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + /* fast track to stop_select callback */ + stop_select_fn = prt->drv_ptr->stop_select; + #ifdef USE_VM_PROBES + strncpy(name, prt->drv_ptr->name, + sizeof(DTRACE_CHARBUF_NAME(name))-1); + name[sizeof(name)-1] = '\0'; + #endif + } + ret = 0; + goto done_unknown; + } + /* For some reason (don't know why), we do not clean all + events when doing ERL_DRV_USE_NO_CALLBACK. */ + else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + mode |= (ERL_DRV_READ | ERL_DRV_WRITE); + } } -#endif + + state = new_drv_ev_state(state, fd); switch (state->type) { -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: -#endif case ERTS_EV_TYPE_NIF: drv_select_steal(ix, state, mode, on); break; @@ -1053,7 +874,9 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, ASSERT(state->type == ERTS_EV_TYPE_NONE); break; - }} + } + default: break; + } if (mode & ERL_DRV_READ) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { @@ -1061,7 +884,7 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, if (owner != id && is_not_nil(owner)) drv_select_steal(ix, state, mode, on); } - ctl_events |= ERTS_POLL_EV_IN; + ctl_events = ERTS_POLL_EV_IN; } if (mode & ERL_DRV_WRITE) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { @@ -1070,100 +893,119 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, drv_select_steal(ix, state, mode, on); } ctl_events |= ERTS_POLL_EV_OUT; - } + } + ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) || (state->type == ERTS_EV_TYPE_NONE && !state->events)); - if (!on && !(state->flags & ERTS_EV_FLAG_USED) - && state->events && !(state->events & ~ctl_events)) { - /* Old driver removing all events. At least wake poller. - It will not make close() 100% safe but it will prevent - actions delayed by poll timeout. */ - wake_poller = 1; + old_events = state->events; + + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + if (ctl_events & ERTS_POLL_EV_IN && (!state->driver.select || !is_iotask_active(&state->driver.select->iniotask))) + state->active_events |= ERTS_POLL_EV_IN; + if (ctl_events & ERTS_POLL_EV_OUT && (!state->driver.select || !is_iotask_active(&state->driver.select->outiotask))) + state->active_events |= ERTS_POLL_EV_OUT; + if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { + ctl_op = ERTS_POLL_OP_ADD; + } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } + else { + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; + new_events = state->active_events; - new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.select->inport = NIL; - state->driver.select->outport = NIL; - } - ret = -1; - goto done; + if (!state->events) { + if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) + ctl_op = ERTS_POLL_OP_DEL; + } } - old_events = state->events; + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { - ASSERT(on - ? (new_events == (state->events | ctl_events)) - : (new_events == (state->events & ~ctl_events))); + new_events = erts_io_control_wakeup(state, ctl_op, + new_events, + &wake_poller); - ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL - || state->type == ERTS_EV_TYPE_NONE); + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); + } - state->events = new_events; - if (ctl_events) { - if (on) { + if (on) { + if (ctl_events) { if (!state->driver.select) - state->driver.select = alloc_drv_select_data(); + state->driver.select = alloc_drv_select_data(state->fd); if (state->type == ERTS_EV_TYPE_NONE) state->type = ERTS_EV_TYPE_DRV_SEL; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); - if (ctl_events & ERTS_POLL_EV_IN) + if (ctl_events & ERTS_POLL_EV_IN) { state->driver.select->inport = id; - if (ctl_events & ERTS_POLL_EV_OUT) + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + iready(id, state); + } + if (ctl_events & ERTS_POLL_EV_OUT) { state->driver.select->outport = id; - if (mode & ERL_DRV_USE) { + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + oready(id, state); + } + if (mode & ERL_DRV_USE) state->flags |= ERTS_EV_FLAG_USED; - } - } - else { /* off */ - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - if (ctl_events & ERTS_POLL_EV_IN) { - abort_tasks(state, ERL_DRV_READ); - state->driver.select->inport = NIL; - } - if (ctl_events & ERTS_POLL_EV_OUT) { - abort_tasks(state, ERL_DRV_WRITE); - state->driver.select->outport = NIL; - } - if (new_events == 0) { - if (old_events != 0) { - remember_removed(state, &pollset); - } - if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - } - /*else keep it, as fd will probably be selected upon again */ - } - } - if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - erts_driver_t* drv_ptr = prt->drv_ptr; - ASSERT(new_events==0); - if (state->remove_cnt == 0 || !wake_poller) { - /* Safe to close fd now as it is not in pollset - or there was no need to eject fd (kernel poll) */ - stop_select_fn = drv_ptr->stop_select; + } + } + else { /* off */ + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + if (ctl_events & ERTS_POLL_EV_IN) { + abort_tasks(state, ERL_DRV_READ); + state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (ctl_events & ERTS_POLL_EV_OUT) { + abort_tasks(state, ERL_DRV_WRITE); + state->driver.select->outport = NIL; + } + if (state->events == 0) { + if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { + state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); + state->flags = 0; + } + /*else keep it, as fd will probably be selected upon again */ + } + } + if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + erts_driver_t* drv_ptr = prt->drv_ptr; + ASSERT(state->events==0); + if (!wake_poller) { + /* Safe to close fd now as it is not in pollset + or there was no need to eject fd (kernel poll) */ + stop_select_fn = drv_ptr->stop_select; #ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, sizeof(name)-1); - name[sizeof(name)-1] = '\0'; + strncpy(name, prt->drv_ptr->name, sizeof(name)-1); + name[sizeof(name)-1] = '\0'; #endif - } - else { - /* Not safe to close fd, postpone stop_select callback. */ - state->type = ERTS_EV_TYPE_STOP_USE; - state->driver.stop.drv_ptr = drv_ptr; - if (drv_ptr->handle) { - erts_ddll_reference_referenced_driver(drv_ptr->handle); - } - } - } - } + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + state->type = ERTS_EV_TYPE_STOP_USE; + state->driver.stop.drv_ptr = drv_ptr; + if (drv_ptr->handle) { + erts_ddll_reference_referenced_driver(drv_ptr->handle); + } + } + } } ret = 0; @@ -1171,14 +1013,11 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif &free_select, &free_nif); done_unknown: - erts_smp_mtx_unlock(fd_mtx(fd)); + erts_mtx_unlock(fd_mtx(fd)); if (stop_select_fn) { int was_unmasked = erts_block_fpe(); DTRACE1(driver_stop_select, name); @@ -1191,61 +1030,56 @@ done_unknown: if (free_nif) free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif + ERTS_MSACC_POP_STATE(); + return ret; } int -ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, - ErlNifEvent e, - enum ErlNifSelectFlags mode, - void* obj, - const ErlNifPid* pid, - Eterm ref) +enif_select(ErlNifEnv* env, ErlNifEvent e, enum ErlNifSelectFlags mode, + void* obj, const ErlNifPid* pid, Eterm msg) +{ + return enif_select_x(env, e, mode, obj, pid, msg, NULL); +} + + +int +enif_select_x(ErlNifEnv* env, + ErlNifEvent e, + enum ErlNifSelectFlags mode, + void* obj, + const ErlNifPid* pid, + Eterm msg, + ErlNifEnv* msg_env) { int on; ErtsResource* resource = DATA_TO_RESOURCE(obj); ErtsSysFdType fd = (ErtsSysFdType) e; ErtsPollEvents ctl_events = (ErtsPollEvents) 0; - ErtsPollEvents new_events, old_events; + ErtsPollEvents old_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int wake_poller; - int ret; + int ret, wake_poller = 0; enum { NO_STOP=0, CALL_STOP, CALL_STOP_AND_RELEASE } call_stop = NO_STOP; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event = NULL; -#endif ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; -#ifdef USE_VM_PROBES - DTRACE_CHARBUF(name, 64); -#endif - ASSERT(!(resource->monitors && resource->monitors->is_dying)); + ASSERT(!erts_dbg_is_resource_dying(resource)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) { - return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; - } - if (fd >= max_fds) { - nif_select_large_fd_error(fd, mode, resource, ref); - return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) nif_select_large_fd_error(fd, mode, resource, msg); + return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; } #endif - erts_smp_mtx_lock(fd_mtx(fd)); + erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ -#endif + state = get_drv_ev_state(fd); /* may be NULL! */ + + DEBUG_PRINT_FD("enif_select(%T, %d, %s, %p, %T, %T)", + state, env->proc->common.id, fd, nifmode2str(mode), resource, + pid ? pid->pid : THE_NON_VALUE, ref); if (mode & ERL_NIF_SELECT_STOP) { ASSERT(resource->type->stop); @@ -1257,13 +1091,12 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } on = 0; mode = ERL_DRV_READ | ERL_DRV_WRITE | ERL_DRV_USE; - wake_poller = 1; /* to eject fd from pollset (if needed) */ ctl_events = ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT; + ctl_op = ERTS_POLL_OP_DEL; } else { - on = 1; + on = !(mode & ERL_NIF_SELECT_CANCEL); ASSERT(mode); - wake_poller = 0; if (mode & ERL_DRV_READ) { ctl_events |= ERTS_POLL_EV_IN; } @@ -1272,11 +1105,7 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } } -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state == NULL) { - state = hash_new_drv_ev_state(fd); - } -#endif + state = new_drv_ev_state(state,fd); switch (state->type) { case ERTS_EV_TYPE_NIF: @@ -1285,24 +1114,21 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, * Changing process and/or ref is ok (I think?). */ if (state->driver.stop.resource != resource) - nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, ref); + nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, msg); break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: -#endif case ERTS_EV_TYPE_DRV_SEL: - nif_select_steal(state, mode, resource, ref); + nif_select_steal(state, mode, resource, msg); break; case ERTS_EV_TYPE_STOP_USE: { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_nif_select_op(dsbufp, fd, mode, resource, ref); + print_nif_select_op(dsbufp, fd, mode, resource, msg); steal_pending_stop_use(dsbufp, ERTS_INVALID_ERL_DRV_PORT, state, mode, on); ASSERT(state->type == ERTS_EV_TYPE_NONE); break; } case ERTS_EV_TYPE_STOP_NIF: { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_nif_select_op(dsbufp, fd, mode, resource, ref); + print_nif_select_op(dsbufp, fd, mode, resource, msg); steal_pending_stop_nif(dsbufp, resource, state, mode, on); if (state->type == ERTS_EV_TYPE_STOP_NIF) { ret = ERL_NIF_SELECT_STOP_SCHEDULED; /* ?? */ @@ -1310,40 +1136,55 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } ASSERT(state->type == ERTS_EV_TYPE_NONE); break; - }} + } + default: break; + } ASSERT((state->type == ERTS_EV_TYPE_NIF) || (state->type == ERTS_EV_TYPE_NONE && !state->events)); - new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + old_events = state->events; - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (state->type == ERTS_EV_TYPE_NIF && !state->events) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - state->driver.stop.resource = NULL; - } - ret = INT_MIN | ERL_NIF_SELECT_FAILED; - goto done; + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + state->active_events |= ctl_events; + if (state->type == ERTS_EV_TYPE_NONE) + ctl_op = ERTS_POLL_OP_ADD; + } + else { + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; } - old_events = state->events; + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { + ErtsPollEvents new_events; - ASSERT(on - ? (new_events == (state->events | ctl_events)) - : (new_events == (state->events & ~ctl_events))); + new_events = erts_io_control_wakeup(state, ctl_op, + state->active_events, + &wake_poller); + + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->type == ERTS_EV_TYPE_NIF && !old_events) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.nif->in.pid = NIL; + state->driver.nif->out.pid = NIL; + state->driver.stop.resource = NULL; + } + ret = INT_MIN | ERL_NIF_SELECT_FAILED; + goto done; + } + ASSERT(new_events == state->events); + } ASSERT(state->type == ERTS_EV_TYPE_NIF || state->type == ERTS_EV_TYPE_NONE); - state->events = new_events; if (on) { const Eterm recipient = pid ? pid->pid : env->proc->common.id; - Uint32* refn; + ASSERT(is_internal_pid(recipient)); if (!state->driver.nif) state->driver.nif = alloc_nif_select_data(); if (state->type == ERTS_EV_TYPE_NONE) { @@ -1353,87 +1194,73 @@ ERTS_CIO_EXPORT(enif_select)(ErlNifEnv* env, } ASSERT(state->type == ERTS_EV_TYPE_NIF); ASSERT(state->driver.stop.resource == resource); - if (ctl_events & ERTS_POLL_EV_IN) { - state->driver.nif->in.pid = recipient; - if (is_immed(ref)) { - state->driver.nif->in.immed = ref; - } else { - ASSERT(is_internal_ref(ref)); - refn = internal_ref_numbers(ref); - state->driver.nif->in.immed = THE_NON_VALUE; - state->driver.nif->in.refn[0] = refn[0]; - state->driver.nif->in.refn[1] = refn[1]; - state->driver.nif->in.refn[2] = refn[2]; - } - state->driver.nif->in.ddeselect_cnt = 0; - } - if (ctl_events & ERTS_POLL_EV_OUT) { - state->driver.nif->out.pid = recipient; - if (is_immed(ref)) { - state->driver.nif->out.immed = ref; - } else { - ASSERT(is_internal_ref(ref)); - refn = internal_ref_numbers(ref); - state->driver.nif->out.immed = THE_NON_VALUE; - state->driver.nif->out.refn[0] = refn[0]; - state->driver.nif->out.refn[1] = refn[1]; - state->driver.nif->out.refn[2] = refn[2]; - } - state->driver.nif->out.ddeselect_cnt = 0; + if (mode & ERL_DRV_READ) { + prepare_select_msg(&state->driver.nif->in, mode, recipient, + resource, msg, msg_env, am_ready_input); + msg_env = NULL; + } + if (mode & ERL_DRV_WRITE) { + prepare_select_msg(&state->driver.nif->out, mode, recipient, + resource, msg, msg_env, am_ready_output); } ret = 0; } else { /* off */ + ret = 0; if (state->type == ERTS_EV_TYPE_NIF) { - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; - state->driver.nif->in.ddeselect_cnt = 0; - state->driver.nif->out.ddeselect_cnt = 0; - if (old_events != 0) { - remember_removed(state, &pollset); + if (mode & ERL_NIF_SELECT_READ + && is_not_nil(state->driver.nif->in.pid)) { + clear_select_event(&state->driver.nif->in); + ret |= ERL_NIF_SELECT_READ_CANCELLED; + } + if (mode & ERL_NIF_SELECT_WRITE + && is_not_nil(state->driver.nif->out.pid)) { + clear_select_event(&state->driver.nif->out); + ret |= ERL_NIF_SELECT_WRITE_CANCELLED; } } - ASSERT(new_events==0); - if (state->remove_cnt == 0 || !wake_poller) { - /* - * Safe to close fd now as it is not in pollset - * or there was no need to eject fd (kernel poll) - */ - if (state->type == ERTS_EV_TYPE_NIF) { - ASSERT(state->driver.stop.resource == resource); - call_stop = CALL_STOP_AND_RELEASE; - state->driver.stop.resource = NULL; + if (mode & ERL_NIF_SELECT_STOP) { + ASSERT(state->events==0); + if (!wake_poller) { + /* + * Safe to close fd now as it is not in pollset + * or there was no need to eject fd (kernel poll) + */ + if (state->type == ERTS_EV_TYPE_NIF) { + ASSERT(state->driver.stop.resource == resource); + call_stop = CALL_STOP_AND_RELEASE; + state->driver.stop.resource = NULL; + } + else { + ASSERT(!state->driver.stop.resource); + call_stop = CALL_STOP; + } + state->type = ERTS_EV_TYPE_NONE; + ret |= ERL_NIF_SELECT_STOP_CALLED; } else { - ASSERT(!state->driver.stop.resource); - call_stop = CALL_STOP; - } - state->type = ERTS_EV_TYPE_NONE; - ret = ERL_NIF_SELECT_STOP_CALLED; - } - else { - /* Not safe to close fd, postpone stop_select callback. */ - if (state->type == ERTS_EV_TYPE_NONE) { - ASSERT(!state->driver.stop.resource); - state->driver.stop.resource = resource; - enif_keep_resource(resource); + /* Not safe to close fd, postpone stop_select callback. */ + if (state->type == ERTS_EV_TYPE_NONE) { + ASSERT(!state->driver.stop.resource); + state->driver.stop.resource = resource; + enif_keep_resource(resource); + } + state->type = ERTS_EV_TYPE_STOP_NIF; + ret |= ERL_NIF_SELECT_STOP_SCHEDULED; } - state->type = ERTS_EV_TYPE_STOP_NIF; - ret = ERL_NIF_SELECT_STOP_SCHEDULED; } + else + ASSERT(mode & ERL_NIF_SELECT_CANCEL); } done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif &free_select, &free_nif); done_unknown: - erts_smp_mtx_unlock(fd_mtx(fd)); + erts_mtx_unlock(fd_mtx(fd)); if (call_stop) { erts_resource_stop(resource, (ErlNifEvent)fd, 1); if (call_stop == CALL_STOP_AND_RELEASE) { @@ -1445,160 +1272,7 @@ done_unknown: if (free_nif) free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - return ret; -} - - -int -ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, - ErlDrvEvent e, - ErlDrvEventData event_data) -{ -#if !ERTS_CIO_HAVE_DRV_EVENT - return -1; -#else - ErtsSysFdType fd = (ErtsSysFdType) e; - ErtsPollEvents events; - ErtsPollEvents add_events; - ErtsPollEvents remove_events; - Eterm id = erts_drvport2id(ix); - ErtsDrvEventState *state; - int do_wake = 0; - int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event; -#endif - ErtsDrvSelectDataState *free_select; - ErtsNifSelectDataState *free_nif; - Port *prt = erts_drvport2port(ix); - - if (prt == ERTS_INVALID_ERL_DRV_PORT) - return -1; - - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) - return -1; - if (fd >= max_fds) { - event_large_fd_error(ix, fd, event_data); - return -1; - } - grow_drv_ev_state(fd); - } -#endif - - erts_smp_mtx_lock(fd_mtx(fd)); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - /* Could use hash_new directly, but want to keep the normal case fast */ - state = hash_get_drv_ev_state(fd); - if (state == NULL) { - state = hash_new_drv_ev_state(fd); - } -#endif - - switch (state->type) { - case ERTS_EV_TYPE_DRV_EV: - if (state->driver.event->port == id) break; - /*fall through*/ - case ERTS_EV_TYPE_DRV_SEL: - drv_event_steal(ix, state, event_data); - break; - case ERTS_EV_TYPE_STOP_USE: { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, fd, event_data); - steal_pending_stop_use(dsbufp, ix, state, 0, 1); - break; - } - } - - ASSERT(state->type == ERTS_EV_TYPE_DRV_EV - || state->type == ERTS_EV_TYPE_NONE); - - events = state->events; - - if (!event_data) { - remove_events = events; - add_events = 0; - } - else { - remove_events = ~event_data->events & events; - add_events = ~events & event_data->events; - } - - if (add_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (remove_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (event_data && event_data->events != 0) { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - state->driver.event->removed_events &= ~add_events; - state->driver.event->removed_events |= remove_events; - } - else { - if (!state->driver.event) - state->driver.event = alloc_drv_event_data(); - state->driver.event->port = id; - state->driver.event->removed_events = (ErtsPollEvents) 0; - state->type = ERTS_EV_TYPE_DRV_EV; - } - state->driver.event->data = event_data; - } - else { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - abort_tasks(state, 0); - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - } - state->type = ERTS_EV_TYPE_NONE; - remember_removed(state, &pollset); - } - state->events = events; - ASSERT(event_data ? events == event_data->events : events == 0); - - ret = 0; - -done: - - check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif - &free_select, - &free_nif); - - erts_smp_mtx_unlock(fd_mtx(fd)); - - if (free_select) - free_drv_select_data(free_select); - if (free_nif) - free_nif_select_data(free_nif); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - return ret; -#endif } static ERTS_INLINE int @@ -1632,11 +1306,6 @@ need2steal(ErtsDrvEventState *state, int mode) do_steal = 1; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - do_steal |= chk_stale(state->driver.event->port, state, 0); - break; -#endif case ERTS_EV_TYPE_STOP_USE: case ERTS_EV_TYPE_STOP_NIF: ASSERT(0); @@ -1670,7 +1339,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) static void steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) { - erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd); switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { int deselect_mode = 0; @@ -1694,7 +1363,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) if (deselect_mode) deselect(state, deselect_mode); else { - erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "no one", (int) state->fd); ASSERT(0); } erts_dsprintf(dsbufp, "\n"); @@ -1719,30 +1388,13 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) erts_dsprintf(dsbufp, "\n"); break; } -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { - Eterm eid = state->driver.event->port; - if (is_nil(eid)) { - erts_dsprintf(dsbufp, "no one", (int) state->fd); - ASSERT(0); - } - else { - erts_dsprintf(dsbufp, "event driver "); - print_driver_name(dsbufp, eid); - erts_dsprintf(dsbufp, "%T ", eid); - } - erts_dsprintf(dsbufp, "\n"); - deselect(state, 0); - break; - } -#endif case ERTS_EV_TYPE_STOP_USE: case ERTS_EV_TYPE_STOP_NIF: { ASSERT(0); break; } default: - erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "no one\n", (int) state->fd); ASSERT(0); } } @@ -1756,7 +1408,7 @@ print_drv_select_op(erts_dsprintf_buf_t *dsbufp, "driver_select(%p, %d,%s%s%s%s, %d) " "by ", ix, - (int) GET_FD(fd), + (int) fd, mode & ERL_DRV_READ ? " ERL_DRV_READ" : "", mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "", mode & ERL_DRV_USE ? " ERL_DRV_USE" : "", @@ -1773,10 +1425,11 @@ print_nif_select_op(erts_dsprintf_buf_t *dsbufp, { erts_dsprintf(dsbufp, "enif_select(_, %d,%s%s%s, %T:%T, %T) ", - (int) GET_FD(fd), + (int) fd, mode & ERL_NIF_SELECT_READ ? " READ" : "", mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "", - mode & ERL_NIF_SELECT_STOP ? " STOP" : "", + (mode & ERL_NIF_SELECT_STOP ? " STOP" + : (mode & ERL_NIF_SELECT_CANCEL ? " CANCEL" : "")), resource->type->module, resource->type->name, ref); @@ -1812,7 +1465,7 @@ large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd) { erts_dsprintf(dsbufp, "fd=%d is larger than the largest allowed fd=%d\n", - (int) fd, max_fds - 1); + (int) fd, drv_ev_state.max_fds - 1); } static void @@ -1868,7 +1521,7 @@ steal_pending_stop_use(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, erts_ddll_dereference_driver(state->driver.stop.drv_ptr->handle); } state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; + state->flags = 0; state->driver.stop.drv_ptr = NULL; } else { @@ -1905,9 +1558,9 @@ steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource, erts_dsprintf(dsbufp, "called before stop was called for NIF resource %T:%T\n", rt->module, rt->name); - enif_release_resource(state->driver.stop.resource); + enif_release_resource(state->driver.stop.resource->data); state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; + state->flags = 0; state->driver.stop.resource = NULL; } else { @@ -1917,59 +1570,9 @@ steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource, } - -#if ERTS_CIO_HAVE_DRV_EVENT - -static void -print_drv_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) -{ - Port *pp = erts_drvport2port(ix); - erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd); - if (!event_data) - erts_dsprintf(dsbufp, "NULL"); - else - erts_dsprintf(dsbufp, "{0x%x, 0x%x}", - (unsigned int) event_data->events, - (unsigned int) event_data->revents); - erts_dsprintf(dsbufp, ") by "); - if (pp != ERTS_INVALID_ERL_DRV_PORT) - print_driver_name(dsbufp, pp->common.id); - erts_dsprintf(dsbufp, "driver %T ", pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL); -} - -static void -drv_event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data) -{ - if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, state->fd, event_data); - steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE); - erts_send_error_to_logger_nogl(dsbufp); - } - else if (state->type == ERTS_EV_TYPE_DRV_SEL) { - ASSERT(state->flags & ERTS_EV_FLAG_USED); - deselect(state, 0); - } -} - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) -{ - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_drv_event_op(dsbufp, ix, fd, event_data); - erts_dsprintf(dsbufp, "failed: "); - large_fd_error_common(dsbufp, fd); - erts_send_error_to_logger_nogl(dsbufp); -} -#endif -#endif - static ERTS_INLINE int io_task_schedule_allowed(ErtsDrvEventState *state, - ErtsPortTaskType type, - erts_aint_t current_cio_time) + ErtsPortTaskType type) { ErtsIoTask *io_task; @@ -1977,254 +1580,112 @@ io_task_schedule_allowed(ErtsDrvEventState *state, case ERTS_PORT_TASK_INPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->iniotask; break; case ERTS_PORT_TASK_OUTPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->outiotask; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_PORT_TASK_EVENT: - if (!state->driver.event) - return 0; - if (state->driver.select) - return 0; - io_task = &state->driver.event->iotask; - break; -#endif default: ERTS_INTERNAL_ERROR("Invalid I/O-task type"); return 0; } - return !is_iotask_active(io_task, current_cio_time); + return !is_iotask_active(io_task); } static ERTS_INLINE void -iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +iready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_INPUT, - current_cio_time)) { + ERTS_PORT_TASK_INPUT)) { ErtsIoTask *iotask = &state->driver.select->iniotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); - } - add_active_fd(state->fd); + } else { + DEBUG_PRINT_FD("schedule ready_input(%T, %d)", + state, id, state->fd); + } } } static ERTS_INLINE void -oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +oready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_OUTPUT, - current_cio_time)) { + ERTS_PORT_TASK_OUTPUT)) { ErtsIoTask *iotask = &state->driver.select->outiotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { - stale_drv_select(id, state, ERL_DRV_WRITE); - } - add_active_fd(state->fd); - } -} - -static ERTS_INLINE void -send_event_tuple(struct erts_nif_select_event* e, ErtsResource* resource, - Eterm event_atom) -{ - Process* rp = erts_proc_lookup(e->pid); - ErtsProcLocks rp_locks = 0; - ErtsMessage* mp; - ErlOffHeap* ohp; - ErtsBinary* bin; - Eterm* hp; - Uint hsz; - Eterm resource_term, ref_term, tuple; - - if (!rp) { - return; - } - - bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); - - /* {select, Resource, Ref, EventAtom} */ - if (is_value(e->immed)) { - hsz = 5 + ERTS_MAGIC_REF_THING_SIZE; - } - else { - hsz = 5 + ERTS_MAGIC_REF_THING_SIZE + ERTS_REF_THING_SIZE; - } - - mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp); - - resource_term = erts_mk_magic_ref(&hp, ohp, &bin->binary); - if (is_value(e->immed)) { - ASSERT(is_immed(e->immed)); - ref_term = e->immed; - } - else { - write_ref_thing(hp, e->refn[0], e->refn[1], e->refn[2]); - ref_term = make_internal_ref(hp); - hp += ERTS_REF_THING_SIZE; - } - tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom); - - ERL_MESSAGE_TOKEN(mp) = am_undefined; - erts_queue_message(rp, rp_locks, mp, tuple, am_system); - - if (rp_locks) - erts_smp_proc_unlock(rp, rp_locks); -} - - -#if ERTS_CIO_HAVE_DRV_EVENT -static ERTS_INLINE void -eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data, - erts_aint_t current_cio_time) -{ - if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_EVENT, - current_cio_time)) { - ErtsIoTask *iotask = &state->driver.event->iotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); - if (erts_port_task_schedule(id, - &iotask->task, - ERTS_PORT_TASK_EVENT, (ErlDrvEvent) state->fd, - event_data) != 0) { - stale_drv_select(id, state, 0); - } - add_active_fd(state->fd); + 0) != 0) { + stale_drv_select(id, state, ERL_DRV_WRITE); + } else { + DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); + } } } -#endif static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport); -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT void -ERTS_CIO_EXPORT(erts_check_io_async_sig_interrupt)(void) +erts_check_io_interrupt(ErtsPollThread *psi, int set) { - ERTS_CIO_POLL_AS_INTR(pollset.ps); -} + if (psi) { +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback_pollset()) { + erts_poll_interrupt_flbk(psi->ps, set); + return; + } #endif - -void -ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set) -{ - ERTS_CIO_POLL_INTR(pollset.ps, set); + erts_poll_interrupt(psi->ps, set); + } } -void -ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, - ErtsMonotonicTime timeout_time) -{ - ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, timeout_time); +ErtsPollThread * +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; + return psiv+id; } -#if !ERTS_CIO_DEFER_ACTIVE_EVENTS -/* - * Number of ignored events, for a lingering fd added by enif_select(), - * until we deselect fd-event from pollset. - */ -# define ERTS_NIF_DELAYED_DESELECT 20 -#else -/* Disable delayed deselect as pollset cannot handle active events */ -# define ERTS_NIF_DELAYED_DESELECT 1 -#endif - void -ERTS_CIO_EXPORT(erts_check_io)(int do_wait) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { - ErtsPollResFd *pollres; int pollres_len; - ErtsMonotonicTime timeout_time; int poll_ret, i; - erts_aint_t current_cio_time; - ErtsSchedulerData *esdp = erts_get_scheduler_data(); - - ASSERT(esdp); + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); restart: -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - -#ifdef ERTS_SIGNAL_STATE /* ifndef ERTS_SMP */ - if (ERTS_SIGNAL_STATE) { - erts_handle_signal_state(); - } -#endif - - /* Figure out timeout value */ - timeout_time = (do_wait - ? erts_check_next_timeout_time(esdp) - : ERTS_POLL_NO_TIMEOUT /* poll only */); - - /* - * No need for an atomic inc op when incrementing - * erts_check_io_time, since only one thread can - * check io at a time. - */ - current_cio_time = erts_smp_atomic_read_dirty(&erts_check_io_time); - current_cio_time++; - erts_smp_atomic_set_relb(&erts_check_io_time, current_cio_time); - - check_cleanup_active_fds(current_cio_time, - timeout_time != ERTS_POLL_NO_TIMEOUT); - #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - pollres_len = erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN; + pollres_len = psi->pollres_len; - pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len); +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback_pollset()) { - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); - poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, timeout_time); + } else +#endif + { + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); + } #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - - -#ifdef ERTS_SIGNAL_STATE /* ifndef ERTS_SMP */ - if (ERTS_SIGNAL_STATE) { - erts_handle_signal_state(); - } -#endif - - if (poll_ret != 0) { - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); - forget_removed(&pollset); - erts_free(ERTS_ALC_T_TMP, pollres); + if (poll_ret == EAGAIN) { goto restart; } @@ -2240,65 +1701,103 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) erl_errno_id(poll_ret), poll_ret); erts_send_error_to_logger_nogl(dsbufp); } + ERTS_MSACC_POP_STATE(); return; } for (i = 0; i < pollres_len; i++) { - ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd; + erts_driver_t* drv_ptr = NULL; + ErtsResource* resource = NULL; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; + ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - erts_smp_mtx_lock(fd_mtx(fd)); + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; + + erts_mtx_lock(fd_mtx(fd)); + + state = get_drv_ev_state(fd); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[ (int) fd]; -#else - state = hash_get_drv_ev_state(fd); if (!state) { - goto next_pollres; + erts_mtx_unlock(fd_mtx(fd)); + continue; } -#endif - /* Skip this fd if it was removed from pollset */ - if (is_removed(state)) { - goto next_pollres; - } + DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); - switch (state->type) { - case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */ - ErtsPollEvents revents = pollres[i].events; + if (revents & ERTS_POLL_EV_ERR) { + /* + * Handle error events by triggering all in/out events + * that has been selected on. + * We *do not* want to call a callback that corresponds + * to an event not selected. + */ + revents = state->active_events; + state->active_events = 0; - if (revents & ERTS_POLL_EV_ERR) { - /* - * Handle error events by triggering all in/out events - * that the driver has selected. - * We *do not* want to call a callback that corresponds - * to an event not selected. - */ - revents = state->events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; } - else { - revents &= (state->events | ERTS_POLL_EV_NVAL); + } else { + + /* Disregard any events that are not active at the moment, + for instance this could happen if the driver/nif does + select/deselect in rapid succession. */ + revents &= state->active_events | ERTS_POLL_EV_NVAL; + + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; + + reactive_events = state->active_events; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; + + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } + } } + } + + switch (state->type) { + case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */ if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state, current_cio_time); + oready(state->driver.select->outport, state); } /* Someone might have deselected input since revents was read (true also on the non-smp emulator since oready() may have been called); therefore, update revents... */ - revents &= state->events; + revents &= state->events; if (revents & ERTS_POLL_EV_IN) { - iready(state->driver.select->inport, state, current_cio_time); + iready(state->driver.select->inport, state); } } else if (revents & ERTS_POLL_EV_NVAL) { bad_fd_in_pollset(state, state->driver.select->inport, state->driver.select->outport); - add_active_fd(state->fd); + check_fd_cleanup(state, &free_select, &free_nif); } break; } @@ -2306,113 +1805,118 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) case ERTS_EV_TYPE_NIF: { /* Requested via enif_select()... */ struct erts_nif_select_event in = {NIL}; struct erts_nif_select_event out = {NIL}; - ErtsResource* resource = NULL; - ErtsPollEvents revents = pollres[i].events; - - if (revents & ERTS_POLL_EV_ERR) { - /* - * Handle error events by triggering all in/out events - * that the NIF has selected. - * We *do not* want to send a message that corresponds - * to an event not selected. - */ - revents = state->events; - } - else { - revents &= (state->events | ERTS_POLL_EV_NVAL); - } if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { if (is_not_nil(state->driver.nif->out.pid)) { out = state->driver.nif->out; resource = state->driver.stop.resource; - state->driver.nif->out.ddeselect_cnt = ERTS_NIF_DELAYED_DESELECT; state->driver.nif->out.pid = NIL; - add_active_fd(state->fd); - } - else { - ASSERT(state->driver.nif->out.ddeselect_cnt >= 2); - state->driver.nif->out.ddeselect_cnt--; + state->driver.nif->out.mp = NULL; } } if (revents & ERTS_POLL_EV_IN) { if (is_not_nil(state->driver.nif->in.pid)) { in = state->driver.nif->in; resource = state->driver.stop.resource; - state->driver.nif->in.ddeselect_cnt = ERTS_NIF_DELAYED_DESELECT; state->driver.nif->in.pid = NIL; - add_active_fd(state->fd); - } - else { - ASSERT(state->driver.nif->in.ddeselect_cnt >= 2); - state->driver.nif->in.ddeselect_cnt--; + state->driver.nif->in.mp = NULL; } } + state->events &= ~revents; } else if (revents & ERTS_POLL_EV_NVAL) { bad_fd_in_pollset(state, NIL, NIL); - add_active_fd(state->fd); + check_fd_cleanup(state, &free_select, &free_nif); } -#ifdef ERTS_SMP - erts_smp_mtx_unlock(fd_mtx(fd)); -#endif + erts_mtx_unlock(fd_mtx(fd)); + if (is_not_nil(in.pid)) { - send_event_tuple(&in, resource, am_ready_input); + send_select_msg(&in); } if (is_not_nil(out.pid)) { - send_event_tuple(&out, resource, am_ready_output); + send_select_msg(&out); } - goto next_pollres_unlocked; - } - -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */ - ErlDrvEventData event_data; - ErtsPollEvents revents; - ASSERT(state->driver.event); - ASSERT(state->driver.event->data); - event_data = state->driver.event->data; - revents = pollres[i].events; - revents &= ~state->driver.event->removed_events; - - if (revents) { - event_data->events = state->events; - event_data->revents = revents; - eready(state->driver.event->port, state, event_data, current_cio_time); - } - break; - } -#endif + continue; + } + case ERTS_EV_TYPE_STOP_NIF: { + resource = state->driver.stop.resource; + state->type = ERTS_EV_TYPE_NONE; + goto case_ERTS_EV_TYPE_NONE; + } + + case ERTS_EV_TYPE_STOP_USE: { +#if ERTS_POLL_USE_FALLBACK + ASSERT(psi->ps == get_fallback_pollset()); +#endif + drv_ptr = state->driver.stop.drv_ptr; + state->type = ERTS_EV_TYPE_NONE; + /* fallthrough */ case ERTS_EV_TYPE_NONE: /* Deselected ... */ + case_ERTS_EV_TYPE_NONE: + ASSERT(!state->events && !state->active_events && !state->flags); + check_fd_cleanup(state, &free_select, &free_nif); break; + } default: { /* Error */ erts_dsprintf_buf_t *dsbufp; dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Invalid event request type for fd in erts_poll()! " - "fd=%d, event request type=%sd\n", (int) state->fd, + "fd=%d, event request type=%d\n", (int) state->fd, (int) state->type); ASSERT(0); deselect(state, 0); - add_active_fd(state->fd); break; } } - next_pollres:; -#ifdef ERTS_SMP - erts_smp_mtx_unlock(fd_mtx(fd)); -#endif - next_pollres_unlocked:; - } + erts_mtx_unlock(fd_mtx(fd)); - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); - erts_free(ERTS_ALC_T_TMP, pollres); - forget_removed(&pollset); + if (drv_ptr) { + int was_unmasked = erts_block_fpe(); + DTRACE1(driver_stop_select, drv_ptr->name); + LTTNG1(driver_stop_select, drv_ptr->name); + (*drv_ptr->stop_select)((ErlDrvEvent) fd, NULL); + erts_unblock_fpe(was_unmasked); + if (drv_ptr->handle) { + erts_ddll_dereference_driver(drv_ptr->handle); + } + } + if (resource) { + erts_resource_stop(resource, (ErlNifEvent)fd, 0); + enif_release_resource(resource->data); + } + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); + } + + /* The entire pollres array was filled with events, + * grow it for the next call. We do this for two reasons: + * 1. Pulling out more events in on go will increase throughput + * 2. If the polling implementation is not fair, this will make + * sure that we get all fds that we can. i.e. if 12 fds are + * constantly active, but we only have a pollres_len of 10, + * two of the fds may never be triggered depending on what the + * kernel decides to do. + **/ + if (pollres_len == psi->pollres_len) { + int ev_state_len = drv_ev_state_len(); + erts_free(ERTS_ALC_T_POLLSET, psi->pollres); + psi->pollres_len *= 2; + /* Never grow it larger than the current drv_ev_state.len size */ + if (psi->pollres_len > ev_state_len) + psi->pollres_len = ev_state_len; + psi->pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * psi->pollres_len); + } + + ERTS_MSACC_POP_STATE(); } static void @@ -2506,16 +2010,16 @@ static int drv_ev_state_cmp(void *des1, void *des2) static void *drv_ev_state_alloc(void *des_tmpl) { ErtsDrvEventState *evstate; - erts_smp_spin_lock(&state_prealloc_lock); - if (state_prealloc_first == NULL) { - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + if (drv_ev_state.prealloc_first == NULL) { + erts_spin_unlock(&drv_ev_state.prealloc_lock); evstate = (ErtsDrvEventState *) erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState)); } else { - evstate = state_prealloc_first; - state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next; - --num_state_prealloc; - erts_smp_spin_unlock(&state_prealloc_lock); + evstate = drv_ev_state.prealloc_first; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) evstate->hb.next; + --drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } /* XXX: Already valid data if prealloced, could ignore template! */ *evstate = *((ErtsDrvEventState *) des_tmpl); @@ -2525,60 +2029,213 @@ static void *drv_ev_state_alloc(void *des_tmpl) static void drv_ev_state_free(void *des) { - erts_smp_spin_lock(&state_prealloc_lock); - ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb; - state_prealloc_first = (ErtsDrvEventState *) des; - ++num_state_prealloc; - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + ((ErtsDrvEventState *) des)->hb.next = &drv_ev_state.prealloc_first->hb; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) des; + ++drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } #endif +#define ERTS_MAX_NO_OF_POLL_THREADS ERTS_MAX_NO_OF_SCHEDULERS + +static char * +get_arg(char* rest, char** argv, int* ip) +{ + int i = *ip; + if (*rest == '\0') { + if (argv[i+1] == NULL) { + erts_fprintf(stderr, "too few arguments\n"); + erts_usage(); + } + argv[i++] = NULL; + rest = argv[i]; + } + argv[i] = NULL; + *ip = i; + return rest; +} + +static void +parse_args(int *argc, char **argv, int concurrent_waiters) +{ + int i = 0, j; + int no_pollsets = 0, no_poll_threads = 0, + no_pollsets_percentage = 0, + no_poll_threads_percentage = 0; + ASSERT(argc && argv); + while (i < *argc) { + if(argv[i][0] == '-') { + switch (argv[i][1]) { + case 'I': { + if (strncmp(argv[i]+2, "Ot", 2) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads) != 1 || + no_poll_threads < 1 || + ERTS_MAX_NO_OF_POLL_THREADS < no_poll_threads) { + erts_fprintf(stderr,"bad I/O poll threads number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "Op", 3) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_pollsets) != 1 || + no_pollsets < 1) { + erts_fprintf(stderr,"bad I/O pollset number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPt", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads_percentage) != 1 || + no_poll_threads_percentage < 0 || + no_poll_threads_percentage > 100) { + erts_fprintf(stderr,"bad I/O poll thread percentage number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPp", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_pollsets_percentage) != 1 || + no_pollsets_percentage < 0 || + no_pollsets_percentage > 100) { + erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg); + erts_usage(); + } + } else { + break; + } + break; + } + case 'K': + (void)get_arg(argv[i]+2, argv, &i); + break; + case '-': + goto args_parsed; + default: + break; + } + } + i++; + } + +args_parsed: + + if (!concurrent_waiters) { + no_pollsets = no_poll_threads; + no_pollsets_percentage = 100; + } + + if (no_poll_threads == 0) { + if (no_poll_threads_percentage == 0) + no_poll_threads = 1; /* This is the default */ + else { + no_poll_threads = erts_no_schedulers * no_poll_threads_percentage / 100; + if (no_poll_threads < 1) + no_poll_threads = 1; + } + } + + if (no_pollsets == 0) { + if (no_pollsets_percentage == 0) + no_pollsets = 1; /* This is the default */ + else { + no_pollsets = no_poll_threads * no_pollsets_percentage / 100; + if (no_pollsets < 1) + no_pollsets = 1; + } + } + + if (no_poll_threads < no_pollsets) { + erts_fprintf(stderr, + "number of IO poll threads has to be greater or equal to " + "the number of \nIO pollsets. Current values are set to: \n" + " -IOt %d -IOp %d\n", + no_poll_threads, no_pollsets); + erts_usage(); + } + + /* Handled arguments have been marked with NULL. Slide arguments + not handled towards the beginning of argv. */ + for (i = 0, j = 0; i < *argc; i++) { + if (argv[i]) + argv[j++] = argv[i]; + } + *argc = j; + + erts_no_pollsets = no_pollsets; + erts_no_poll_threads = no_poll_threads; +} + void -ERTS_CIO_EXPORT(erts_init_check_io)(void) +erts_init_check_io(int *argc, char **argv) { + int j, concurrent_waiters, no_poll_threads; ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED | ERL_NIF_SELECT_STOP_SCHEDULED | ERL_NIF_SELECT_INVALID_EVENT | ERL_NIF_SELECT_FAILED)) == 0); - erts_smp_atomic_init_nob(&erts_check_io_time, 0); - erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0); - - ERTS_CIO_POLL_INIT(); - pollset.ps = ERTS_CIO_NEW_POLLSET(); - - pollset.active_fd.six = 0; - pollset.active_fd.eix = 0; - erts_smp_atomic32_init_nob(&pollset.active_fd.no, 0); - pollset.active_fd.size = ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR, - sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC); -#ifdef DEBUG - { - int i; - for (i = 0; i < ERTS_ACTIVE_FD_INC; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; - } + + + erts_poll_init(&concurrent_waiters); +#if ERTS_POLL_USE_FALLBACK + erts_poll_init_flbk(NULL); #endif + parse_args(argc, argv, concurrent_waiters); -#ifdef ERTS_SMP - init_removed_fd_alloc(); - pollset.removed_list = NULL; - erts_smp_spinlock_init(&pollset.removed_list_lock, "pollset_rm_list", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - { - int i; - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { - erts_smp_mtx_init(&drv_ev_state_locks[i].lck, "drv_ev_state", make_small(i), - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - } - } + /* Create the actual pollsets */ + pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets); + + for (j=0; j < erts_no_pollsets; j++) + pollsetv[j] = erts_poll_create_pollset(j); + + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; +#endif + +#if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); + no_poll_threads++; #endif + + psiv = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollThread) * no_poll_threads); + +#if ERTS_POLL_USE_FALLBACK + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); + psiv++; +#endif + + for (j = 0; j < erts_no_poll_threads; j++) { + psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[j].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[j].ps = pollsetv[j % erts_no_pollsets]; + } + + for (j=0; j < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; j++) { + erts_mtx_init(&drv_ev_state.locks[j].lck, "drv_ev_state", make_small(j), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + } + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - max_fds = ERTS_CIO_POLL_MAX_FDS(); - erts_smp_atomic_init_nob(&drv_ev_state_len, 0); - drv_ev_state = NULL; - erts_smp_mtx_init(&drv_ev_state_grow_lock, "drv_ev_state_grow", NIL, + drv_ev_state.max_fds = erts_poll_max_fds(); + erts_atomic_init_nob(&drv_ev_state.len, 0); + drv_ev_state.v = NULL; + erts_mtx_init(&drv_ev_state.grow_lock, "drv_ev_state_grow", NIL, ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); #else { @@ -2587,153 +2244,196 @@ ERTS_CIO_EXPORT(erts_init_check_io)(void) hf.cmp = &drv_ev_state_cmp; hf.alloc = &drv_ev_state_alloc; hf.free = &drv_ev_state_free; - num_state_prealloc = 0; - state_prealloc_first = NULL; - erts_smp_spinlock_init(&state_prealloc_lock,"state_prealloc", NIL, - ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); - - safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state_tab, "drv_ev_state_tab", + drv_ev_state.num_prealloc = 0; + drv_ev_state.prealloc_first = NULL; + erts_spinlock_init(&drv_ev_state.prealloc_lock, "state_prealloc", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state.tab, "drv_ev_state_tab", ERTS_LOCK_FLAGS_CATEGORY_IO, DRV_EV_STATE_HTAB_SIZE, hf); } #endif } int -ERTS_CIO_EXPORT(erts_check_io_max_files)(void) +erts_check_io_max_files(void) { #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - return max_fds; + return drv_ev_state.max_fds; #else - return ERTS_POLL_EXPORT(erts_poll_max_fds)(); + return erts_poll_max_fds(); #endif } Uint -ERTS_CIO_EXPORT(erts_check_io_size)(void) +erts_check_io_size(void) { - Uint res; + Uint res = 0; ErtsPollInfo pi; - ERTS_CIO_POLL_INFO(pollset.ps, &pi); - res = pi.memory_size; + int i; + +#if ERTS_POLL_USE_FALLBACK + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); + res += pi.memory_size; +#endif + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_info(pollsetv[i], &pi); + res += pi.memory_size; + } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - res += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len); + res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - res += safe_hash_table_sz(&drv_ev_state_tab); + res += safe_hash_table_sz(&drv_ev_state.tab); { SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); + safe_hash_get_info(&hi, &drv_ev_state.tab); res += hi.objs * sizeof(ErtsDrvEventState); } - erts_smp_spin_lock(&state_prealloc_lock); - res += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + res += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif return res; } Eterm -ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) +erts_check_io_info(void *proc) { Process *p = (Process *) proc; - Eterm tags[16], values[16], res; - Uint sz, *szp, *hp, **hpp, memory_size; - Sint i; - ErtsPollInfo pi; - erts_aint_t cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); - int active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); + Eterm tags[16], values[16], res, list = NIL; + Uint sz, *szp, *hp, **hpp; + ErtsPollInfo *piv; + Sint i, j = 0, len; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; + ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); - while (1) { - erts_aint_t post_cio_time; - int post_active_fds; + piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); - ERTS_CIO_POLL_INFO(pollset.ps, &pi); +#if ERTS_POLL_USE_FALLBACK + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 0; + piv[0].active_fds = 0; + piv++; +#endif - post_cio_time = erts_smp_atomic_read_mb(&erts_check_io_time); - post_active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); - if (cio_time == post_cio_time && active_fds == post_active_fds) - break; - cio_time = post_cio_time; - active_fds = post_active_fds; +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); + piv[0].poll_threads = 0; + piv[0].active_fds = 0; + piv++; +#endif + + for (j = 0; j < erts_no_pollsets; j++) { + erts_poll_info(pollsetv[j], &piv[j]); + piv[j].active_fds = 0; + piv[j].poll_threads = erts_no_poll_threads / erts_no_pollsets; + if (erts_no_poll_threads % erts_no_pollsets > j) + piv[j].poll_threads++; } - memory_size = pi.memory_size; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len); + i = 0; + erts_mtx_lock(&drv_ev_state.grow_lock); + len = erts_atomic_read_nob(&drv_ev_state.len); + for (i = 0; i < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_lock(&drv_ev_state.locks[i].lck); + for (j = i; j < len; j+=ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT) { + ErtsDrvEventState *state = get_drv_ev_state(j); + int pollsetid = get_pollset_id(j); + ASSERT(fd_mtx(j) == &drv_ev_state.locks[i].lck); + if (state->flags & ERTS_EV_FLAG_FALLBACK) + pollsetid = -1; + if (state->driver.select + && (state->type == ERTS_EV_TYPE_DRV_SEL) + && (is_iotask_active(&state->driver.select->iniotask) + || is_iotask_active(&state->driver.select->outiotask))) + piv[pollsetid].active_fds++; + } + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + erts_mtx_unlock(&drv_ev_state.grow_lock); + + piv[0].memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - memory_size += safe_hash_table_sz(&drv_ev_state_tab); + piv[0].memory_size += safe_hash_table_sz(&drv_ev_state.tab); { - SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); - memory_size += hi.objs * sizeof(ErtsDrvEventState); + SafeHashInfo hi; + safe_hash_get_info(&hi, &drv_ev_state.tab); + piv[0].memory_size += hi.objs * sizeof(ErtsDrvEventState); } - erts_smp_spin_lock(&state_prealloc_lock); - memory_size += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + piv[0].memory_size += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif hpp = NULL; szp = &sz; sz = 0; - bld_it: - i = 0; + piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; - tags[i] = erts_bld_atom(hpp, szp, "name"); - values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); + bld_it: - tags[i] = erts_bld_atom(hpp, szp, "primary"); - values[i++] = erts_bld_atom(hpp, szp, pi.primary); + for (j = no_pollsets-1; j >= 0; j--) { + i = 0; - tags[i] = erts_bld_atom(hpp, szp, "fallback"); - values[i++] = erts_bld_atom(hpp, szp, pi.fallback ? pi.fallback : "false"); + tags[i] = erts_bld_atom(hpp, szp, "name"); + values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); - tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); - values[i++] = erts_bld_atom(hpp, szp, - pi.kernel_poll ? pi.kernel_poll : "false"); + tags[i] = erts_bld_atom(hpp, szp, "primary"); + values[i++] = erts_bld_atom(hpp, szp, piv[j].primary); - tags[i] = erts_bld_atom(hpp, szp, "memory_size"); - values[i++] = erts_bld_uint(hpp, szp, memory_size); + tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); + values[i++] = erts_bld_atom(hpp, szp, + piv[j].kernel_poll ? piv[j].kernel_poll : "false"); - tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.poll_set_size); + tags[i] = erts_bld_atom(hpp, szp, "memory_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].memory_size); - if (pi.fallback) { - tags[i] = erts_bld_atom(hpp, szp, "fallback_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.fallback_poll_set_size); - } + tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_set_size); - tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); - values[i++] = pi.lazy_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); + values[i++] = piv[j].lazy_updates ? am_true : am_false; - if (pi.lazy_updates) { - tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.pending_updates); - } + tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].pending_updates); - tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); - values[i++] = pi.batch_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); + values[i++] = piv[j].batch_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); - values[i++] = pi.concurrent_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); + values[i++] = piv[j].concurrent_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "max_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds); + tags[i] = erts_bld_atom(hpp, szp, "fallback"); + values[i++] = piv[j].is_fallback ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "active_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds); + tags[i] = erts_bld_atom(hpp, szp, "max_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].max_fds); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups); + tags[i] = erts_bld_atom(hpp, szp, "active_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].active_fds); - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_interrupts"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_interrupts); + tags[i] = erts_bld_atom(hpp, szp, "poll_threads"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_threads); - tags[i] = erts_bld_atom(hpp, szp, "no_interrupt_timed"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_interrupt_timed); -#endif + res = erts_bld_2tup_list(hpp, szp, i, tags, values); - res = erts_bld_2tup_list(hpp, szp, i, tags, values); + if (!hpp) { + *szp += 2; + } + else { + list = CONS(*hpp, res, list); + *hpp += 2; + } + } if (!hpp) { hp = HAlloc(p, sz); @@ -2742,386 +2442,464 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) goto bld_it; } - return res; + erts_free(ERTS_ALC_T_TMP, piv); + + return list; } static ERTS_INLINE ErtsPollEvents -print_events(ErtsPollEvents ev) +print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) { int first = 1; + if(ev == ERTS_POLL_EV_NONE) { + erts_dsprintf(dsbufp, "N/A"); + return 0; + } if(ev & ERTS_POLL_EV_IN) { ev &= ~ERTS_POLL_EV_IN; - erts_printf("%s%s", first ? "" : "|", "IN"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "IN"); first = 0; } if(ev & ERTS_POLL_EV_OUT) { ev &= ~ERTS_POLL_EV_OUT; - erts_printf("%s%s", first ? "" : "|", "OUT"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "OUT"); first = 0; } /* The following should not appear... */ if(ev & ERTS_POLL_EV_NVAL) { - erts_printf("%s%s", first ? "" : "|", "NVAL"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "NVAL"); first = 0; } if(ev & ERTS_POLL_EV_ERR) { - erts_printf("%s%s", first ? "" : "|", "ERR"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "ERR"); first = 0; } if (ev) - erts_printf("%s0x%b32x", first ? "" : "|", (Uint32) ev); + erts_dsprintf(dsbufp, "%s0x%b32x", first ? "" : "|", (Uint32) ev); return ev; } +static ERTS_INLINE void +print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) +{ + erts_dsprintf(dsbufp, "%s", flag2str(f)); +} + +#ifdef DEBUG_PRINT_MODE + +static ERTS_INLINE char * +drvmode2str(int mode) { + switch (mode) { + case ERL_DRV_READ|ERL_DRV_USE: return "READ|USE"; + case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE"; + case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE"; + case ERL_DRV_USE: return "USE"; + case ERL_DRV_READ|ERL_DRV_USE_NO_CALLBACK: return "READ|USE_NO_CB"; + case ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "WRITE|USE_NO_CB"; + case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "READ|WRITE|USE_NO_CB"; + case ERL_DRV_USE_NO_CALLBACK: return "USE_NO_CB"; + case ERL_DRV_READ: return "READ"; + case ERL_DRV_WRITE: return "WRITE"; + case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE"; + default: return "UNKNOWN"; + } +} + +static ERTS_INLINE char * +nifmode2str(enum ErlNifSelectFlags mode) { + if (mode & ERL_NIF_SELECT_STOP) + return "STOP"; + switch (mode) { + case ERL_NIF_SELECT_READ: return "READ"; + case ERL_NIF_SELECT_WRITE: return "WRITE"; + case ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: return "READ|WRITE"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ: return "CANCEL|READ"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_WRITE: return "CANCEL|WRITE"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: + return "CANCEL|READ|WRITE"; + default: return "UNKNOWN"; + } +} + +#endif + typedef struct { int used_fds; int num_errors; int no_driver_select_structs; - int no_driver_event_structs; + int no_enif_select_structs; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS int internal_fds; ErtsPollEvents *epep; #endif } IterDebugCounters; -static void doit_erts_check_io_debug(void *vstate, void *vcounters) +static int erts_debug_print_checkio_state(erts_dsprintf_buf_t *dsbufp, + ErtsDrvEventState *state, + ErtsPollEvents ep_events, + int internal) { - ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; - IterDebugCounters *counters = (IterDebugCounters *) vcounters; - ErtsPollEvents cio_events = state->events; - ErtsSysFdType fd = state->fd; -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int internal = 0; - ErtsPollEvents ep_events = counters->epep[(int) fd]; -#endif - int err = 0; - #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) struct stat stat_buf; #endif - - if (state->driver.select) - counters->no_driver_select_structs++; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - counters->no_driver_event_structs++; -#endif - + ErtsSysFdType fd = state->fd; + ErtsPollEvents cio_events = state->events; + int err = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state->events || ep_events) { - if (ep_events & ERTS_POLL_EV_NVAL) { - ep_events &= ~ERTS_POLL_EV_NVAL; - internal = 1; - counters->internal_fds++; - } - else - counters->used_fds++; -#else - if (state->events) { - counters->used_fds++; + ErtsPollEvents aio_events = state->active_events; #endif - - erts_printf("fd=%d ", (int) fd); - + erts_dsprintf(dsbufp, "pollset=%d fd=%d ", + state->flags & ERTS_EV_FLAG_FALLBACK ? -1 : get_pollset_id(fd), (int) fd); + #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) - if (fstat((int) fd, &stat_buf) < 0) - erts_printf("type=unknown "); - else { - erts_printf("type="); + if (fstat((int) fd, &stat_buf) < 0) + erts_dsprintf(dsbufp, "type=unknown "); + else { + erts_dsprintf(dsbufp, "type="); #ifdef S_ISSOCK - if (S_ISSOCK(stat_buf.st_mode)) - erts_printf("sock "); - else + if (S_ISSOCK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "sock "); + else #endif #ifdef S_ISFIFO if (S_ISFIFO(stat_buf.st_mode)) - erts_printf("fifo "); + erts_dsprintf(dsbufp, "fifo "); else #endif #ifdef S_ISCHR - if (S_ISCHR(stat_buf.st_mode)) - erts_printf("chr "); - else + if (S_ISCHR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "chr "); + else #endif #ifdef S_ISDIR - if (S_ISDIR(stat_buf.st_mode)) - erts_printf("dir "); - else + if (S_ISDIR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "dir "); + else #endif #ifdef S_ISBLK - if (S_ISBLK(stat_buf.st_mode)) - erts_printf("blk "); - else + if (S_ISBLK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "blk "); + else #endif #ifdef S_ISREG - if (S_ISREG(stat_buf.st_mode)) - erts_printf("reg "); - else + if (S_ISREG(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "reg "); + else #endif #ifdef S_ISLNK - if (S_ISLNK(stat_buf.st_mode)) - erts_printf("lnk "); - else + if (S_ISLNK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "lnk "); + else #endif #ifdef S_ISDOOR - if (S_ISDOOR(stat_buf.st_mode)) - erts_printf("door "); - else + if (S_ISDOOR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "door "); + else #endif #ifdef S_ISWHT - if (S_ISWHT(stat_buf.st_mode)) - erts_printf("wht "); - else + if (S_ISWHT(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "wht "); + else #endif #ifdef S_ISXATTR - if (S_ISXATTR(stat_buf.st_mode)) - erts_printf("xattr "); - else + if (S_ISXATTR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "xattr "); + else #endif - erts_printf("unknown "); - } + erts_dsprintf(dsbufp, "unknown "); + } #else - erts_printf("type=unknown "); + erts_dsprintf(dsbufp, "type=unknown "); #endif - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - erts_printf("driver_select "); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - - if (cio_events == ep_events) { - erts_printf("ev="); - if (print_events(cio_events) != 0) - err = 1; - } - else { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - erts_printf(" ep_ev="); - print_events(ep_events); - } -#else - if (print_events(cio_events) != 0) - err = 1; -#endif - erts_printf(" "); - if (cio_events & ERTS_POLL_EV_IN) { - Eterm id = state->driver.select->inport; - if (is_nil(id)) { - erts_printf("inport=none inname=none indrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" inport=%T inname=%s indrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - if (cio_events & ERTS_POLL_EV_OUT) { - Eterm id = state->driver.select->outport; - if (is_nil(id)) { - erts_printf("outport=none outname=none outdrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" outport=%T outname=%s outdrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - } - else if (state->type == ERTS_EV_TYPE_NIF) { - ErtsResource* r; - erts_printf("enif_select "); + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + erts_dsprintf(dsbufp, "driver_select "); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + if (aio_events == cio_events) { if (cio_events == ep_events) { - erts_printf("ev="); - if (print_events(cio_events) != 0) + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) err = 1; } else { + ErtsPollEvents ev = cio_events; + if (ev != ep_events && ep_events != ERTS_POLL_EV_NONE) + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } + } else { + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " aio_ev="); + print_events(dsbufp, aio_events); + if ((aio_events != ep_events && ep_events != ERTS_POLL_EV_NONE) || + (aio_events != 0 && ep_events == ERTS_POLL_EV_NONE)) { + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - erts_printf(" ep_ev="); - print_events(ep_events); } + } #else - if (print_events(cio_events) != 0) + if (print_events(dsbufp, cio_events) != 0) + err = 1; +#endif + erts_dsprintf(dsbufp, " "); + if (cio_events & ERTS_POLL_EV_IN) { + Eterm id = state->driver.select->inport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "inport=none inname=none indrv=none "); err = 1; -#endif - erts_printf(" inpid=%T dd_cnt=%b32d", state->driver.nif->in.pid, - state->driver.nif->in.ddeselect_cnt); - erts_printf(" outpid=%T dd_cnt=%b32d", state->driver.nif->out.pid, - state->driver.nif->out.ddeselect_cnt); - r = state->driver.stop.resource; - erts_printf(" resource=%p(%T:%T)", r, r->type->module, r->type->name); - } -#if ERTS_CIO_HAVE_DRV_EVENT - else if (state->type == ERTS_EV_TYPE_DRV_EV) { - Eterm id; - erts_printf("driver_event "); + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " inport=%T inname=%s indrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + if (cio_events & ERTS_POLL_EV_OUT) { + Eterm id = state->driver.select->outport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "outport=none outname=none outdrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " outport=%T outname=%s outdrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + } + else if (state->type == ERTS_EV_TYPE_NIF) { + ErtsResource* r; + erts_dsprintf(dsbufp, "enif_select "); + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - if (cio_events == ep_events) { - erts_printf("ev=0x%b32x", (Uint32) cio_events); - } - else { - err = 1; - erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); - erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); - } + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) + err = 1; + } + else { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + if (print_events(dsbufp, cio_events) != 0) + err = 1; #endif - id = state->driver.event->port; - if (is_nil(id)) { - erts_printf(" port=none name=none drv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" port=%T name=%s drv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } + erts_dsprintf(dsbufp, " inpid=%T", state->driver.nif->in.pid); + erts_dsprintf(dsbufp, " outpid=%T", state->driver.nif->out.pid); + r = state->driver.stop.resource; + erts_dsprintf(dsbufp, " resource=%p(%T:%T)", r, r->type->module, r->type->name); + } +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + else if (internal) { + erts_dsprintf(dsbufp, "internal "); + if (cio_events) { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + } + if (ep_events) { + erts_dsprintf(dsbufp, "ep_ev="); + print_events(dsbufp, ep_events); + } + } #endif + else { + err = 1; + erts_dsprintf(dsbufp, "control_type=%d ", (int)state->type); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - else if (internal) { - erts_printf("internal "); - if (cio_events) { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - } - if (ep_events) { - erts_printf("ep_ev="); - print_events(ep_events); - } - } + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + print_events(dsbufp, cio_events); + } + else { + erts_dsprintf(dsbufp, "cio_ev="); print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); print_events(dsbufp, ep_events); + } +#else + erts_dsprintf(dsbufp, "ev=0x%b32x", (Uint32) cio_events); #endif - else { - err = 1; - erts_printf("control_type=%d ", (int)state->type); + } + + erts_dsprintf(dsbufp, " flags="); print_flags(dsbufp, state->flags); + if (err) { + erts_dsprintf(dsbufp, " ERROR"); + } + erts_dsprintf(dsbufp, "\r\n"); + return err; +} + +static void doit_erts_check_io_debug(void *vstate, void *vcounters, + erts_dsprintf_buf_t *dsbufp) +{ + ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; + IterDebugCounters *counters = (IterDebugCounters *) vcounters; + int internal = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (cio_events == ep_events) { - erts_printf("ev="); - print_events(cio_events); - } - else { - erts_printf("cio_ev="); print_events(cio_events); - erts_printf(" ep_ev="); print_events(ep_events); - } + ErtsSysFdType fd = state->fd; + ErtsPollEvents ep_events = counters->epep[(int) fd]; #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + ErtsPollEvents ep_events = ERTS_POLL_EV_NONE; #endif + + if (state->driver.select) { + counters->no_driver_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + if (state->driver.nif) { + counters->no_enif_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)) { + if (ep_events & ERTS_POLL_EV_NVAL) { + ep_events &= ~ERTS_POLL_EV_NVAL; + internal = 1; + counters->internal_fds++; } - - if (err) { + else + counters->used_fds++; +#else + if (state->events) { + counters->used_fds++; +#endif + if (erts_debug_print_checkio_state(dsbufp, state, ep_events, internal)) { counters->num_errors++; - erts_printf(" ERROR"); } - erts_printf("\n"); } } - + +/* ciodpi can be NULL when called from etp-commands */ int -ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) +erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int fd, len; + int fd, len, i; #endif - IterDebugCounters counters; + IterDebugCounters counters = {0}; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS ErtsDrvEventState null_des; null_des.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - null_des.driver.event = NULL; -#endif + null_des.driver.nif = NULL; null_des.driver.stop.drv_ptr = NULL; null_des.events = 0; - null_des.remove_cnt = 0; null_des.type = ERTS_EV_TYPE_NONE; + null_des.flags = 0; + + counters.epep = erts_alloc(ERTS_ALC_T_TMP, + sizeof(ErtsPollEvents)*drv_ev_state.max_fds); #endif - erts_printf("--- fds in pollset --------------------------------------\n"); -#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) +#if defined(ERTS_ENABLE_LOCK_CHECK) erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - erts_smp_thr_progress_block(); /* stop the world to avoid messy locking */ + if (ciodip) + erts_thr_progress_block(); /* stop the world to avoid messy locking */ #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds); - ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds); - counters.internal_fds = 0; -#endif - counters.used_fds = 0; - counters.num_errors = 0; - counters.no_driver_select_structs = 0; - counters.no_driver_event_structs = 0; + len = erts_atomic_read_nob(&drv_ev_state.len); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - len = erts_smp_atomic_read_nob(&drv_ev_state_len); +#if ERTS_POLL_USE_FALLBACK + erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, + drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { - doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters); + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } - for ( ; fd < max_fds; fd++) { - null_des.fd = fd; - doit_erts_check_io_debug((void *) &null_des, (void *) &counters); +#endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_get_selected_events(pollsetv[i], + counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } + } + for (fd = len ; fd < drv_ev_state.max_fds; fd++) { + null_des.fd = fd; + doit_erts_check_io_debug(&null_des, &counters, dsbufp); } #else - safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters); + safe_hash_for_each(&drv_ev_state.tab, &doit_erts_check_io_debug, + &counters, dsbufp); #endif - erts_smp_thr_progress_unblock(); + if (ciodip) + erts_thr_progress_unblock(); - ciodip->no_used_fds = counters.used_fds; - ciodip->no_driver_select_structs = counters.no_driver_select_structs; - ciodip->no_driver_event_structs = counters.no_driver_event_structs; + if (ciodip) { + ciodip->no_used_fds = counters.used_fds; + ciodip->no_driver_select_structs = counters.no_driver_select_structs; + ciodip->no_enif_select_structs = counters.no_enif_select_structs; + } - erts_printf("\n"); - erts_printf("used fds=%d\n", counters.used_fds); - erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs); -#if ERTS_CIO_HAVE_DRV_EVENT - erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs); -#endif + erts_dsprintf(dsbufp, "\n"); + erts_dsprintf(dsbufp, "used fds=%d\n", counters.used_fds); + erts_dsprintf(dsbufp, "Number of driver_select() structures=%d\n", counters.no_driver_select_structs); + erts_dsprintf(dsbufp, "Number of enif_select() structures=%d\n", counters.no_enif_select_structs); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - erts_printf("internal fds=%d\n", counters.internal_fds); + erts_dsprintf(dsbufp, "internal fds=%d\n", counters.internal_fds); #endif - erts_printf("---------------------------------------------------------\n"); - fflush(stdout); + erts_dsprintf(dsbufp, "---------------------------------------------------------\n"); + erts_send_error_to_logger_nogl(dsbufp); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS erts_free(ERTS_ALC_T_TMP, (void *) counters.epep); #endif @@ -3130,11 +2908,19 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) } #ifdef ERTS_ENABLE_LOCK_COUNT -void ERTS_CIO_EXPORT(erts_lcnt_update_cio_locks)(int enable) { +void erts_lcnt_update_cio_locks(int enable) { + int i; #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - erts_lcnt_enable_hash_lock_count(&drv_ev_state_tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable); + erts_lcnt_enable_hash_lock_count(&drv_ev_state.tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable); #else (void)enable; #endif + +#if ERTS_POLL_USE_FALLBACK + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); +#endif + + for (i = 0; i < erts_no_pollsets; i++) + erts_lcnt_enable_pollset_lock_count(pollsetv[i], enable); } #endif /* ERTS_ENABLE_LOCK_COUNT */ |