diff options
author | Sverker Eriksson <[email protected]> | 2019-02-06 19:10:26 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2019-02-06 19:10:26 +0100 |
commit | 98cfd6016f8b40fc97e03b31177d14318349040f (patch) | |
tree | c0fcdd768071c36bfbcbf186d369d9ca14c47421 /erts/emulator/sys | |
parent | e2ca71b6e7172b320b5b171359d53a161383fb19 (diff) | |
parent | 3825199794da28d79b21052a2e69e2335921d55e (diff) | |
download | otp-98cfd6016f8b40fc97e03b31177d14318349040f.tar.gz otp-98cfd6016f8b40fc97e03b31177d14318349040f.tar.bz2 otp-98cfd6016f8b40fc97e03b31177d14318349040f.zip |
Merge tag 'OTP-21.2' into sverker/map-from-ks-vs-bug
Diffstat (limited to 'erts/emulator/sys')
32 files changed, 5243 insertions, 5350 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 44a77f3ea5..c681fa481f 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,6 @@ #endif #define ERL_CHECK_IO_C__ -#define ERTS_WANT_BREAK_HANDLING #ifndef WANT_NONBLOCKING # define WANT_NONBLOCKING #endif @@ -38,79 +37,107 @@ #include "erl_port.h" #include "erl_check_io.h" #include "erl_thr_progress.h" +#include "erl_bif_unique.h" #include "dtrace-wrapper.h" #include "lttng-wrapper.h" #define ERTS_WANT_TIMER_WHEEL_API #include "erl_time.h" +#if 0 +#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) +#define DEBUG_PRINT_FD(FMT, STATE, ...) \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ + (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ + ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ + ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) +#define DEBUG_PRINT_MODE +#else +#define DEBUG_PRINT(...) +#endif + +#ifndef DEBUG_PRINT_FD +#define DEBUG_PRINT_FD(...) +#endif + #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS # include "safe_hash.h" # define DRV_EV_STATE_HTAB_SIZE 1024 #endif -typedef char EventStateType; -#define ERTS_EV_TYPE_NONE ((EventStateType) 0) -#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */ -#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */ -#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */ - -typedef char EventStateFlags; -#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ -#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2) -#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4) - -#ifdef DEBUG -# define ERTS_ACTIVE_FD_INC 2 +typedef enum { + ERTS_EV_TYPE_NONE = 0, + ERTS_EV_TYPE_DRV_SEL = 1, /* driver_select */ + ERTS_EV_TYPE_STOP_USE = 2, /* pending stop_select */ + ERTS_EV_TYPE_NIF = 3, /* enif_select */ + ERTS_EV_TYPE_STOP_NIF = 4 /* pending nif stop */ +} EventStateType; + +typedef enum { + ERTS_EV_FLAG_CLEAR = 0, + ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ #else -# define ERTS_ACTIVE_FD_INC 128 + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, #endif - -#define ERTS_CHECK_IO_POLL_RES_LEN 512 - -#if defined(ERTS_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp -#elif defined(ERTS_NO_KERNEL_POLL_VERSION) -# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd + and it was put in the nkp version */ #else -# define ERTS_CIO_EXPORT(FUNC) FUNC -#endif + ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, +#endif + + /* Combinations */ + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER +} EventStateFlags; + +#define flag2str(flags) \ + ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ + ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ + ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) + +/* How many events that can be handled at once by one erts_poll_wait call */ +#define ERTS_CHECK_IO_POLL_RES_LEN 512 -#define ERTS_CIO_HAVE_DRV_EVENT \ - (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL) +/* Each I/O Poll Thread has one ErtsPollThread each. The ps field + can point to either a private ErtsPollSet or a shared one. + At the moment only kqueue and epoll pollsets can be + shared across threads. +*/ +typedef struct erts_poll_thread +{ + ErtsPollSet *ps; + ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; + int pollres_len; +} ErtsPollThread; -#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control) -#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv) -#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait) -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt) +/* pollsetv contains pointers to the ErtsPollSets that are in use. + * Which pollset to use is determined by hashing the fd. + */ +static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; +#if ERTS_POLL_USE_FALLBACK +static ErtsPollSet *flbk_pollset; +#endif +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; #endif -#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt) -#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed) -#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset) -#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset) -#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds) -#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init) -#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info) - -#define GET_FD(fd) fd - -static struct pollset_info -{ - ErtsPollSet ps; - erts_smp_atomic_t in_poll_wait; /* set while doing poll */ - struct { - int six; /* start index */ - int eix; /* end index */ - erts_smp_atomic32_t no; - int size; - ErtsSysFdType *array; - } active_fd; -#ifdef ERTS_SMP - struct removed_fd* removed_list; /* list of deselected fd's*/ - erts_smp_spinlock_t removed_list_lock; -#endif -}pollset; -#define NUM_OF_POLLSETS 1 typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -119,147 +146,223 @@ typedef struct { ErtsSysFdType fd; struct { ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */ -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ -#endif - erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ + ErtsNifSelectDataState *nif; /* ERTS_EV_TYPE_NIF */ + union { + erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ + ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ + } stop; } driver; - ErtsPollEvents events; - unsigned short remove_cnt; /* number of removed_fd's referring to this fd */ + ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; -#ifdef ERTS_SMP -struct removed_fd { - struct removed_fd *next; +struct drv_ev_state_shared { + + union { + erts_mtx_t lck; + byte _cache_line_alignment[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_mtx_t))]; + } locks[ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT]; + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - ErtsSysFdType fd; + int max_fds; + erts_atomic_t len; + ErtsDrvEventState *v; + erts_mtx_t grow_lock; /* prevent lock-hogging of racing growers */ #else - ErtsDrvEventState* state; - #ifdef DEBUG - ErtsSysFdType fd; - #endif + SafeHash tab; + int num_prealloc; + ErtsDrvEventState *prealloc_first; + erts_spinlock_t prealloc_lock; #endif - }; -#endif -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static int max_fds = -1; -#endif -#define DRV_EV_STATE_LOCK_CNT 16 -static union { - erts_smp_mtx_t lck; - byte _cache_line_alignment[64]; -}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT]; +int ERTS_WRITE_UNLIKELY(erts_no_pollsets) = 1; +int ERTS_WRITE_UNLIKELY(erts_no_poll_threads) = 1; +struct drv_ev_state_shared drv_ev_state; -#ifdef ERTS_SMP -static ERTS_INLINE erts_smp_mtx_t* fd_mtx(ErtsSysFdType fd) -{ +static ERTS_INLINE int fd_hash(ErtsSysFdType fd) { int hash = (int)fd; # ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS hash ^= (hash >> 9); # endif - return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck; + return hash; +} + +static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd) +{ + return &drv_ev_state.locks[fd_hash(fd) % ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT].lck; } -#else -# define fd_mtx(fd) NULL -#endif #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static erts_smp_atomic_t drv_ev_state_len; -static ErtsDrvEventState *drv_ev_state; -static erts_smp_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */ +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) +{ + return &drv_ev_state.v[(int) fd]; +} -#else -static SafeHash drv_ev_state_tab; -static int num_state_prealloc; -static ErtsDrvEventState *state_prealloc_first; -erts_smp_spinlock_t state_prealloc_lock; +#define new_drv_ev_state(State, fd) (State) +#define erase_drv_ev_state(State) + +static ERTS_INLINE int grow_drv_ev_state(ErtsSysFdType fd) { + int i; + int old_len; + int new_len; + + if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state.len)) { + + if (fd < 0 || fd >= drv_ev_state.max_fds) + return 0; + + erts_mtx_lock(&drv_ev_state.grow_lock); + old_len = erts_atomic_read_nob(&drv_ev_state.len); + if (fd >= old_len) { + new_len = erts_poll_new_table_len(old_len, fd + 1); + if (new_len > drv_ev_state.max_fds) + new_len = drv_ev_state.max_fds; + + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ + erts_mtx_lock(&drv_ev_state.locks[i].lck); + } + drv_ev_state.v = (drv_ev_state.v + ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, + drv_ev_state.v, + sizeof(ErtsDrvEventState)*new_len) + : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, + sizeof(ErtsDrvEventState)*new_len)); + ERTS_CT_ASSERT(ERTS_EV_TYPE_NONE == 0); + sys_memzero(drv_ev_state.v+old_len, + sizeof(ErtsDrvEventState) * (new_len - old_len)); + for (i = old_len; i < new_len; i++) { + drv_ev_state.v[i].fd = (ErtsSysFdType) i; + } + erts_atomic_set_nob(&drv_ev_state.len, new_len); + for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + } + /*else already grown by racing thread */ + + erts_mtx_unlock(&drv_ev_state.grow_lock); + } + return 1; +} -static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd) +static int drv_ev_state_len(void) +{ + return erts_atomic_read_nob(&drv_ev_state.len); +} + +#else /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ + +static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd) { ErtsDrvEventState tmpl; tmpl.fd = fd; - return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl); + return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state.tab, (void *) &tmpl); } -static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd) +static ERTS_INLINE ErtsDrvEventState* new_drv_ev_state(ErtsDrvEventState *state, + ErtsSysFdType fd) { ErtsDrvEventState tmpl; + + if (state) + return state; + tmpl.fd = fd; tmpl.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - tmpl.driver.event = NULL; -#endif - tmpl.driver.drv_ptr = NULL; + tmpl.driver.nif = NULL; + tmpl.driver.stop.drv_ptr = NULL; tmpl.events = 0; - tmpl.remove_cnt = 0; + tmpl.active_events = 0; tmpl.type = ERTS_EV_TYPE_NONE; tmpl.flags = 0; - return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl); + + return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state.tab, (void *) &tmpl); +} + +static ERTS_INLINE void erase_drv_ev_state(ErtsDrvEventState *state) +{ + safe_hash_erase(&drv_ev_state.tab, (void *) state); } -static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state) +static int drv_ev_state_len(void) { - ASSERT(state->remove_cnt == 0); - safe_hash_erase(&drv_ev_state_tab, (void *) state); + return erts_atomic_read_nob(&drv_ev_state.tab.nitems); } #endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */ static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode); -static void select_steal(ErlDrvPort ix, ErtsDrvEventState *state, - int mode, int on); -static void print_select_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort ix, ErtsSysFdType fd, int mode, int on); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int); -#endif -#if ERTS_CIO_HAVE_DRV_EVENT -static void event_steal(ErlDrvPort ix, ErtsDrvEventState *state, - ErlDrvEventData event_data); -static void print_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort, ErtsSysFdType, ErlDrvEventData); +static void drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state, + int mode, int on); +static void nif_select_steal(ErtsDrvEventState *state, int mode, + ErtsResource* resource, Eterm ref); + +static void print_drv_select_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort ix, ErtsSysFdType fd, int mode, int on); +static void print_nif_select_op(erts_dsprintf_buf_t*, ErtsSysFdType, + int mode, ErtsResource*, Eterm ref); + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData); -#endif +static void drv_select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int); +static void nif_select_large_fd_error(ErtsSysFdType, int, ErtsResource*,Eterm ref); #endif -static void steal_pending_stop_select(erts_dsprintf_buf_t*, ErlDrvPort, - ErtsDrvEventState*, int mode, int on); - -#ifdef ERTS_SMP -ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST) +static void +steal_pending_stop_use(erts_dsprintf_buf_t*, ErlDrvPort, ErtsDrvEventState*, + int mode, int on); +static void +steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource*, + ErtsDrvEventState *state, int mode, int on); +static ERTS_INLINE void +check_fd_cleanup(ErtsDrvEventState *state, + ErtsDrvSelectDataState **free_select, + ErtsNifSelectDataState **free_nif); +static ERTS_INLINE void iready(Eterm id, ErtsDrvEventState *state); +static ERTS_INLINE void oready(Eterm id, ErtsDrvEventState *state); +#ifdef DEBUG_PRINT_MODE +static char *drvmode2str(int mode); +static char *nifmode2str(enum ErlNifSelectFlags mode); #endif static ERTS_INLINE void -init_iotask(ErtsIoTask *io_task) +init_iotask(ErtsIoTask *io_task, ErtsSysFdType fd) { erts_port_task_handle_init(&io_task->task); - erts_smp_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0)); + io_task->fd = fd; } static ERTS_INLINE int -is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time) -{ +is_iotask_active(ErtsIoTask *io_task) +{ if (erts_port_task_is_scheduled(&io_task->task)) return 1; - if (erts_smp_atomic_read_nob(&io_task->executed_time) == current_cio_time) - return 1; return 0; } static ERTS_INLINE ErtsDrvSelectDataState * -alloc_drv_select_data(void) +alloc_drv_select_data(ErtsSysFdType fd) { ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, sizeof(ErtsDrvSelectDataState)); dsp->inport = NIL; dsp->outport = NIL; - init_iotask(&dsp->iniotask); - init_iotask(&dsp->outiotask); + init_iotask(&dsp->iniotask, fd); + init_iotask(&dsp->outiotask, fd); + return dsp; +} + +static ERTS_INLINE ErtsNifSelectDataState * +alloc_nif_select_data(void) +{ + ErtsNifSelectDataState *dsp = erts_alloc(ERTS_ALC_T_NIF_SEL_D_STATE, + sizeof(ErtsNifSelectDataState)); + dsp->in.pid = NIL; + dsp->out.pid = NIL; return dsp; } @@ -271,192 +374,181 @@ free_drv_select_data(ErtsDrvSelectDataState *dsp) erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp); } -#if ERTS_CIO_HAVE_DRV_EVENT - -static ERTS_INLINE ErtsDrvEventDataState * -alloc_drv_event_data(void) +static ERTS_INLINE void +free_nif_select_data(ErtsNifSelectDataState *dsp) { - ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, - sizeof(ErtsDrvEventDataState)); - dep->port = NIL; - dep->data = NULL; - dep->removed_events = 0; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - dep->deferred_events = 0; -#endif - init_iotask(&dep->iotask); - return dep; + erts_free(ERTS_ALC_T_NIF_SEL_D_STATE, dsp); } -static ERTS_INLINE void -free_drv_event_data(ErtsDrvEventDataState *dep) +static ERTS_INLINE int +get_pollset_id(ErtsSysFdType fd) { - ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task)); - erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep); + return fd_hash(fd) % erts_no_pollsets; } -#endif /* ERTS_CIO_HAVE_DRV_EVENT */ - -static ERTS_INLINE void -remember_removed(ErtsDrvEventState *state, struct pollset_info* psi) +static ERTS_INLINE ErtsPollSet * +get_pollset(ErtsSysFdType fd) { -#ifdef ERTS_SMP - struct removed_fd *fdlp; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - if (erts_smp_atomic_read_nob(&psi->in_poll_wait)) { - state->remove_cnt++; - ASSERT(state->remove_cnt > 0); - fdlp = removed_fd_alloc(); - #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG) - fdlp->fd = state->fd; - #endif - #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - fdlp->state = state; - #endif - erts_smp_spin_lock(&psi->removed_list_lock); - fdlp->next = psi->removed_list; - psi->removed_list = fdlp; - erts_smp_spin_unlock(&psi->removed_list_lock); - } -#endif + return pollsetv[get_pollset_id(fd)]; } +#if ERTS_POLL_USE_FALLBACK +static ERTS_INLINE ErtsPollSet * +get_fallback_pollset(void) +{ + return flbk_pollset; +} +#endif -static ERTS_INLINE int -is_removed(ErtsDrvEventState *state) +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) { -#ifdef ERTS_SMP - /* Note that there is a possible race here, where an fd is removed - (increasing remove_cnt) and then added again just before erts_poll_wait - is called by erts_check_io. Any polled event on the re-added fd will then - be falsely ignored. But that does not matter, as the event will trigger - again next time erl_check_io is called. */ - return state->remove_cnt > 0; +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; #else - return 0; + return get_pollset(fd); #endif } -static void -forget_removed(struct pollset_info* psi) +/* + * Place a fd within a pollset. This will automatically use + * the fallback ps if needed. + */ +static ERTS_INLINE ErtsPollEvents +erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, + ErtsPollEvents pe, int *wake_poller) { -#ifdef ERTS_SMP - struct removed_fd* fdlp; - struct removed_fd* tofree; - - /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */ - - erts_smp_spin_lock(&psi->removed_list_lock); - fdlp = psi->removed_list; - psi->removed_list = NULL; - erts_smp_spin_unlock(&psi->removed_list_lock); - - while (fdlp) { - erts_driver_t* drv_ptr = NULL; - erts_smp_mtx_t* mtx; - ErtsSysFdType fd; - ErtsDrvEventState *state; - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - fd = fdlp->fd; - mtx = fd_mtx(fd); - erts_smp_mtx_lock(mtx); - state = &drv_ev_state[(int) fd]; -#else - state = fdlp->state; - fd = state->fd; - ASSERT(fd == fdlp->fd); - mtx = fd_mtx(fd); - erts_smp_mtx_lock(mtx); -#endif - ASSERT(state->remove_cnt > 0); - if (--state->remove_cnt == 0) { - switch (state->type) { - case ERTS_EV_TYPE_STOP_USE: - /* Now we can call stop_select */ - drv_ptr = state->driver.drv_ptr; - ASSERT(drv_ptr); - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.drv_ptr = NULL; - /* Fall through */ - case ERTS_EV_TYPE_NONE: -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - hash_erase_drv_ev_state(state); -#endif - break; - case ERTS_EV_TYPE_DRV_SEL: - case ERTS_EV_TYPE_DRV_EV: - break; - default: - ASSERT(0); - } - } - erts_smp_mtx_unlock(mtx); - if (drv_ptr) { - int was_unmasked = erts_block_fpe(); - DTRACE1(driver_stop_select, drv_ptr->name); - LTTNG1(driver_stop_select, drv_ptr->name); - (*drv_ptr->stop_select) ((ErlDrvEvent) fd, NULL); - erts_unblock_fpe(was_unmasked); - if (drv_ptr->handle) { - erts_ddll_dereference_driver(drv_ptr->handle); - } - } - tofree = fdlp; - fdlp = fdlp->next; - removed_fd_free(tofree); + ErtsSysFdType fd = state->fd; + ErtsPollEvents res = 0; + EventStateFlags flags = state->flags; + + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); + + if (!(flags & ERTS_EV_FLAG_FALLBACK)) { + + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + } + +#if ERTS_POLL_USE_FALLBACK + if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { + /* When an add fails with NVAL, the poll/kevent operation could not + put that fd in the pollset, so we instead put it into a fallback pollset */ + state->flags |= ERTS_EV_FLAG_FALLBACK; + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); + } + } else { + ASSERT(op != ERTS_POLL_OP_ADD); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); +#endif } -#endif /* ERTS_SMP */ + + return res; } -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -grow_drv_ev_state(int min_ix) +static ERTS_INLINE ErtsPollEvents +erts_io_control(ErtsDrvEventState *state, ErtsPollOp op, ErtsPollEvents pe) { - int i; - int old_len; - int new_len; + int wake_poller = 0; + return erts_io_control_wakeup(state, op, pe, &wake_poller); +} - erts_smp_mtx_lock(&drv_ev_state_grow_lock); - old_len = erts_smp_atomic_read_nob(&drv_ev_state_len); - if (min_ix >= old_len) { - new_len = erts_poll_new_table_len(old_len, min_ix + 1); - if (new_len > max_fds) - new_len = max_fds; +/* ToDo: Was inline in erl_check_io.h but now need struct erts_poll_thread */ +void +erts_io_notify_port_task_executed(ErtsPortTaskType type, + ErtsPortTaskHandle *pthp, + void (*reset_handle)(ErtsPortTaskHandle *)) +{ + ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); + ErtsSysFdType fd = itp->fd; + erts_mtx_t *mtx = fd_mtx(fd); + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; + ErtsDrvEventState *state; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; + + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + + erts_mtx_lock(mtx); + state = get_drv_ev_state(fd); + + reset_handle(pthp); + + active_events = state->active_events; + + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: + + DEBUG_PRINT_FD("executed ready_output", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } + + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } + + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } + } + + if (!active_events) + check_fd_cleanup(state, &free_select, &free_nif); + + erts_mtx_unlock(mtx); - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */ - erts_smp_mtx_lock(&drv_ev_state_locks[i].lck); - } - drv_ev_state = (drv_ev_state - ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE, - drv_ev_state, - sizeof(ErtsDrvEventState)*new_len) - : erts_alloc(ERTS_ALC_T_DRV_EV_STATE, - sizeof(ErtsDrvEventState)*new_len)); - for (i = old_len; i < new_len; i++) { - drv_ev_state[i].fd = (ErtsSysFdType) i; - drv_ev_state[i].driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - drv_ev_state[i].driver.event = NULL; -#endif - drv_ev_state[i].driver.drv_ptr = NULL; - drv_ev_state[i].events = 0; - drv_ev_state[i].remove_cnt = 0; - drv_ev_state[i].type = ERTS_EV_TYPE_NONE; - drv_ev_state[i].flags = 0; - } - erts_smp_atomic_set_nob(&drv_ev_state_len, new_len); - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { - erts_smp_mtx_unlock(&drv_ev_state_locks[i].lck); - } - } - /*else already grown by racing thread */ + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); - erts_smp_mtx_unlock(&drv_ev_state_grow_lock); + ERTS_MSACC_POP_STATE_M_X(); } -#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */ - static ERTS_INLINE void abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type) @@ -473,13 +565,7 @@ abort_tasks(ErtsDrvEventState *state, int mode) switch (mode) { case 0: check_type: switch (state->type) { -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - abort_task(state->driver.event->port, - &state->driver.event->iotask.task, - ERTS_EV_TYPE_DRV_EV); - return; -#endif + case ERTS_EV_TYPE_NIF: case ERTS_EV_TYPE_NONE: return; default: @@ -508,16 +594,14 @@ abort_tasks(ErtsDrvEventState *state, int mode) static void deselect(ErtsDrvEventState *state, int mode) { - int do_wake = 0; ErtsPollEvents rm_events; - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - ASSERT(state->events); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); abort_tasks(state, mode); - if (!mode) + if (!mode) { rm_events = state->events; - else { + } else { rm_events = 0; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); if (mode & ERL_DRV_READ) { @@ -530,516 +614,305 @@ deselect(ErtsDrvEventState *state, int mode) } } - state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake); + state->events &= ~rm_events; + state->active_events &= ~rm_events; if (!(state->events)) { + erts_io_control(state, ERTS_POLL_OP_DEL, 0); switch (state->type) { + case ERTS_EV_TYPE_NIF: + state->driver.nif->in.pid = NIL; + state->driver.nif->out.pid = NIL; + enif_release_resource(state->driver.stop.resource->data); + state->driver.stop.resource = NULL; + break; case ERTS_EV_TYPE_DRV_SEL: state->driver.select->inport = NIL; state->driver.select->outport = NIL; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - break; -#endif case ERTS_EV_TYPE_NONE: break; default: ASSERT(0); break; } - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - remember_removed(state, &pollset); + state->flags = 0; + } else { + ErtsPollEvents new_events = + erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0) +# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE) #else # define IS_FD_UNKNOWN(state) ((state) == NULL) #endif static ERTS_INLINE void check_fd_cleanup(ErtsDrvEventState *state, -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState **free_event, -#endif - ErtsDrvSelectDataState **free_select) + ErtsDrvSelectDataState **free_select, + ErtsNifSelectDataState **free_nif) { - erts_aint_t current_cio_time; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); - - current_cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); + ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); *free_select = NULL; if (state->driver.select && (state->type != ERTS_EV_TYPE_DRV_SEL) - && !is_iotask_active(&state->driver.select->iniotask, current_cio_time) - && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - + && !is_iotask_active(&state->driver.select->iniotask) + && !is_iotask_active(&state->driver.select->outiotask)) { + *free_select = state->driver.select; state->driver.select = NULL; } -#if ERTS_CIO_HAVE_DRV_EVENT - *free_event = NULL; - if (state->driver.event - && (state->type != ERTS_EV_TYPE_DRV_EV) - && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) { - - *free_event = state->driver.event; - state->driver.event = NULL; + *free_nif = NULL; + if (state->driver.nif && (state->type != ERTS_EV_TYPE_NIF)) { + *free_nif = state->driver.nif; + state->driver.nif = NULL; } -#endif -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS if (((state->type != ERTS_EV_TYPE_NONE) - | state->remove_cnt -#if ERTS_CIO_HAVE_DRV_EVENT - | (state->driver.event != NULL) -#endif + | (state->driver.nif != NULL) | (state->driver.select != NULL)) == 0) { - hash_erase_drv_ev_state(state); - + erase_drv_ev_state(state); } -#endif } -static ERTS_INLINE int -check_cleanup_active_fd(ErtsSysFdType fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollControlEntry *pce, - int *pce_ix, -#endif - erts_aint_t current_cio_time) -{ - ErtsDrvEventState *state; - int active = 0; - erts_smp_mtx_t *mtx = fd_mtx(fd); - void *free_select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - void *free_event = NULL; -#endif -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evon = 0, evoff = 0; -#endif - - erts_smp_mtx_lock(mtx); - -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ - if (state) -#endif - { - if (state->driver.select) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_IN) - && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) { - evoff |= ERTS_POLL_EV_IN; - state->flags |= ERTS_EV_FLAG_DEFER_IN_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) { - if (state->events & ERTS_POLL_EV_IN) - evon |= ERTS_POLL_EV_IN; - state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV; - } - if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { - active = 1; - if ((state->events & ERTS_POLL_EV_OUT) - && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) { - evoff |= ERTS_POLL_EV_OUT; - state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV; - } - } - else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) { - if (state->events & ERTS_POLL_EV_OUT) - evon |= ERTS_POLL_EV_OUT; - state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV; - } - if (active) - (void) 0; - else +#ifdef __WIN32__ +# define MUST_DEFER(MAY_SLEEP) 1 #else - if (is_iotask_active(&state->driver.select->iniotask, current_cio_time) - || is_iotask_active(&state->driver.select->outiotask, current_cio_time)) - active = 1; - else -#endif - if (state->type != ERTS_EV_TYPE_DRV_SEL) { - free_select = state->driver.select; - state->driver.select = NULL; - } - } - -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) { - if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) { -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events; - if (evs) { - evoff |= evs; - state->driver.event->deferred_events |= evs; - } -#endif - active = 1; - } - else if (state->type != ERTS_EV_TYPE_DRV_EV) { - free_event = state->driver.event; - state->driver.event = NULL; - } -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - else { - ErtsPollEvents evs = state->events & state->driver.event->deferred_events; - if (evs) { - evon |= evs; - state->driver.event->deferred_events = 0; - } - } -#endif - - } -#endif - -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0) - hash_erase_drv_ev_state(state); -#endif - - } - - erts_smp_mtx_unlock(mtx); - - if (free_select) - free_drv_select_data(free_select); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif - -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - if (evoff) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evoff; - pcep->on = 0; - } - if (evon) { - ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; - pcep->fd = fd; - pcep->events = evon; - pcep->on = 1; - } -#endif - - return active; -} - -static void -check_cleanup_active_fds(erts_aint_t current_cio_time) -{ - int six = pollset.active_fd.six; - int eix = pollset.active_fd.eix; - erts_aint32_t no = erts_smp_atomic32_read_dirty(&pollset.active_fd.no); - int size = pollset.active_fd.size; - int ix = six; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - /* every fd might add two entries */ - Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no; - ErtsPollControlEntry *pctrl_entries = (pce_sz - ? erts_alloc(ERTS_ALC_T_TMP, pce_sz) - : NULL); - int pctrl_ix = 0; -#endif - - while (ix != eix) { - ErtsSysFdType fd = pollset.active_fd.array[ix]; - int nix = ix + 1; - if (nix >= size) - nix = 0; - ASSERT(fd != ERTS_SYS_FD_INVALID); - if (!check_cleanup_active_fd(fd, -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - pctrl_entries, - &pctrl_ix, -#endif - current_cio_time)) { - no--; - if (ix == six) { -#ifdef DEBUG - pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID; -#endif - six = nix; - } - else { - pollset.active_fd.array[ix] = pollset.active_fd.array[six]; -#ifdef DEBUG - pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID; -#endif - six++; - if (six >= size) - six = 0; - } - } - ix = nix; - } - -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry)); - if (pctrl_ix) - ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix); - if (pctrl_entries) - erts_free(ERTS_ALC_T_TMP, pctrl_entries); +# define MUST_DEFER(MAY_SLEEP) (MAY_SLEEP) #endif - pollset.active_fd.six = six; - pollset.active_fd.eix = eix; - erts_smp_atomic32_set_relb(&pollset.active_fd.no, no); -} - -static ERTS_INLINE void -add_active_fd(ErtsSysFdType fd) -{ - int eix = pollset.active_fd.eix; - int size = pollset.active_fd.size; - - - pollset.active_fd.array[eix] = fd; - - erts_smp_atomic32_set_relb(&pollset.active_fd.no, - (erts_smp_atomic32_read_dirty(&pollset.active_fd.no) - + 1)); - - eix++; - if (eix >= size) - eix = 0; - if (pollset.active_fd.six == eix) { - pollset.active_fd.six = 0; - eix = size; - size += ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR, - pollset.active_fd.array, - sizeof(ErtsSysFdType)*size); - pollset.active_fd.size = size; -#ifdef DEBUG - { - int i; - for (i = eix + 1; i < size; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; - } -#endif - - } - - pollset.active_fd.eix = eix; -} - int -ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, - ErlDrvEvent e, - int mode, - int on) +driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) { void (*stop_select_fn)(ErlDrvEvent, void*) = NULL; Port *prt = erts_drvport2port(ix); Eterm id = erts_drvport2id(ix); ErtsSysFdType fd = (ErtsSysFdType) e; ErtsPollEvents ctl_events = (ErtsPollEvents) 0; - ErtsPollEvents new_events, old_events; + ErtsPollEvents old_events; + ErtsPollEvents new_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int wake_poller; + int wake_poller = 0; int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event = NULL; -#endif ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; #ifdef USE_VM_PROBES DTRACE_CHARBUF(name, 64); #endif + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); - if (prt == ERTS_INVALID_ERL_DRV_PORT) + if (prt == ERTS_INVALID_ERL_DRV_PORT) { + ERTS_MSACC_POP_STATE(); return -1; + } - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)); + ERTS_LC_ASSERT(erts_lc_is_port_locked(prt)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) { - return -1; - } - if (fd >= max_fds) { - select_large_fd_error(ix, fd, mode, on); - return -1; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) drv_select_large_fd_error(ix, fd, mode, on); + ERTS_MSACC_POP_STATE(); + return -1; } #endif - erts_smp_mtx_lock(fd_mtx(fd)); + erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - state = hash_get_drv_ev_state(fd); /* may be NULL! */ -#endif + state = get_drv_ev_state(fd); /* may be NULL! */ - if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - if (IS_FD_UNKNOWN(state)) { - /* fast track to stop_select callback */ - stop_select_fn = prt->drv_ptr->stop_select; -#ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, - sizeof(DTRACE_CHARBUF_NAME(name))-1); - name[sizeof(name)-1] = '\0'; -#endif - ret = 0; - goto done_unknown; - } - mode |= (ERL_DRV_READ | ERL_DRV_WRITE); - wake_poller = 1; /* to eject fd from pollset (if needed) */ + DEBUG_PRINT_FD("driver_select(%T, %p, %s, %d)", + state, id, fd, drvmode2str(mode), on); + + if (!on) { + if (IS_FD_UNKNOWN(state)) { + if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + /* fast track to stop_select callback */ + stop_select_fn = prt->drv_ptr->stop_select; + #ifdef USE_VM_PROBES + strncpy(name, prt->drv_ptr->name, + sizeof(DTRACE_CHARBUF_NAME(name))-1); + name[sizeof(name)-1] = '\0'; + #endif + } + ret = 0; + goto done_unknown; + } + else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + mode |= (ERL_DRV_READ | ERL_DRV_WRITE); + } } - else wake_poller = 0; -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state == NULL) { - state = hash_new_drv_ev_state(fd); + state = new_drv_ev_state(state, fd); + + switch (state->type) { + case ERTS_EV_TYPE_NIF: + drv_select_steal(ix, state, mode, on); + break; + case ERTS_EV_TYPE_STOP_USE: { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_drv_select_op(dsbufp, ix, state->fd, mode, on); + steal_pending_stop_use(dsbufp, ix, state, mode, on); + if (state->type == ERTS_EV_TYPE_STOP_USE) { + ret = 0; + goto done; /* stop_select still pending */ + } + ASSERT(state->type == ERTS_EV_TYPE_NONE); + break; } -#endif + case ERTS_EV_TYPE_STOP_NIF: { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_drv_select_op(dsbufp, ix, state->fd, mode, on); + steal_pending_stop_nif(dsbufp, NULL, state, mode, on); + ASSERT(state->type == ERTS_EV_TYPE_NONE); + break; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->type == ERTS_EV_TYPE_DRV_EV) - select_steal(ix, state, mode, on); -#endif - if (state->type == ERTS_EV_TYPE_STOP_USE) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_select_op(dsbufp, ix, state->fd, mode, on); - steal_pending_stop_select(dsbufp, ix, state, mode, on); - if (state->type == ERTS_EV_TYPE_STOP_USE) { - ret = 0; - goto done; /* stop_select still pending */ - } - ASSERT(state->type == ERTS_EV_TYPE_NONE); + } + default: break; } if (mode & ERL_DRV_READ) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { Eterm owner = state->driver.select->inport; if (owner != id && is_not_nil(owner)) - select_steal(ix, state, mode, on); + drv_select_steal(ix, state, mode, on); } - ctl_events |= ERTS_POLL_EV_IN; + ctl_events = ERTS_POLL_EV_IN; } if (mode & ERL_DRV_WRITE) { if (state->type == ERTS_EV_TYPE_DRV_SEL) { Eterm owner = state->driver.select->outport; if (owner != id && is_not_nil(owner)) - select_steal(ix, state, mode, on); + drv_select_steal(ix, state, mode, on); } ctl_events |= ERTS_POLL_EV_OUT; - } + } + ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) || (state->type == ERTS_EV_TYPE_NONE && !state->events)); - if (!on && !(state->flags & ERTS_EV_FLAG_USED) - && state->events && !(state->events & ~ctl_events)) { - /* Old driver removing all events. At least wake poller. - It will not make close() 100% safe but it will prevent - actions delayed by poll timeout. */ - wake_poller = 1; + old_events = state->events; + + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + if (ctl_events & ERTS_POLL_EV_IN && (!state->driver.select || !is_iotask_active(&state->driver.select->iniotask))) + state->active_events |= ERTS_POLL_EV_IN; + if (ctl_events & ERTS_POLL_EV_OUT && (!state->driver.select || !is_iotask_active(&state->driver.select->outiotask))) + state->active_events |= ERTS_POLL_EV_OUT; + if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { + ctl_op = ERTS_POLL_OP_ADD; + } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } + else { + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; + new_events = state->active_events; - new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller); + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - state->driver.select->inport = NIL; - state->driver.select->outport = NIL; - } - ret = -1; - goto done; + if (!state->events) { + if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) + ctl_op = ERTS_POLL_OP_DEL; + } } - old_events = state->events; + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { - ASSERT(on - ? (new_events == (state->events | ctl_events)) - : (new_events == (state->events & ~ctl_events))); + new_events = erts_io_control_wakeup(state, ctl_op, + new_events, + &wake_poller); - ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL - || state->type == ERTS_EV_TYPE_NONE); + ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); + } - state->events = new_events; - if (ctl_events) { - if (on) { + if (on) { + if (ctl_events) { if (!state->driver.select) - state->driver.select = alloc_drv_select_data(); + state->driver.select = alloc_drv_select_data(state->fd); if (state->type == ERTS_EV_TYPE_NONE) state->type = ERTS_EV_TYPE_DRV_SEL; ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); - if (ctl_events & ERTS_POLL_EV_IN) + if (ctl_events & ERTS_POLL_EV_IN) { state->driver.select->inport = id; - if (ctl_events & ERTS_POLL_EV_OUT) + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + iready(id, state); + } + if (ctl_events & ERTS_POLL_EV_OUT) { state->driver.select->outport = id; - if (mode & ERL_DRV_USE) { + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) + oready(id, state); + } + if (mode & ERL_DRV_USE) state->flags |= ERTS_EV_FLAG_USED; - } - } - else { /* off */ - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - if (ctl_events & ERTS_POLL_EV_IN) { - abort_tasks(state, ERL_DRV_READ); - state->driver.select->inport = NIL; - } - if (ctl_events & ERTS_POLL_EV_OUT) { - abort_tasks(state, ERL_DRV_WRITE); - state->driver.select->outport = NIL; - } - if (new_events == 0) { - if (old_events != 0) { - remember_removed(state, &pollset); - } - if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - } - /*else keep it, as fd will probably be selected upon again */ - } - } - if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { - erts_driver_t* drv_ptr = prt->drv_ptr; - ASSERT(new_events==0); - if (state->remove_cnt == 0 || !wake_poller) { - /* Safe to close fd now as it is not in pollset - or there was no need to eject fd (kernel poll) */ - stop_select_fn = drv_ptr->stop_select; + } + } + else { /* off */ + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + if (ctl_events & ERTS_POLL_EV_IN) { + abort_tasks(state, ERL_DRV_READ); + state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (ctl_events & ERTS_POLL_EV_OUT) { + abort_tasks(state, ERL_DRV_WRITE); + state->driver.select->outport = NIL; + } + if (state->events == 0) { + if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { + state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); + state->flags = 0; + } + /*else keep it, as fd will probably be selected upon again */ + } + } + if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { + erts_driver_t* drv_ptr = prt->drv_ptr; + ASSERT(state->events==0); + if (!wake_poller) { + /* Safe to close fd now as it is not in pollset + or there was no need to eject fd (kernel poll) */ + stop_select_fn = drv_ptr->stop_select; #ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, sizeof(name)-1); - name[sizeof(name)-1] = '\0'; -#endif - } - else { - /* Not safe to close fd, postpone stop_select callback. */ - state->type = ERTS_EV_TYPE_STOP_USE; - state->driver.drv_ptr = drv_ptr; - if (drv_ptr->handle) { - erts_ddll_reference_referenced_driver(drv_ptr->handle); - } - } - } - } + strncpy(name, prt->drv_ptr->name, sizeof(name)-1); + name[sizeof(name)-1] = '\0'; +#endif + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + state->type = ERTS_EV_TYPE_STOP_USE; + state->driver.stop.drv_ptr = drv_ptr; + if (drv_ptr->handle) { + erts_ddll_reference_referenced_driver(drv_ptr->handle); + } + } + } } ret = 0; @@ -1047,13 +920,11 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif - &free_select); + &free_select, + &free_nif); done_unknown: - erts_smp_mtx_unlock(fd_mtx(fd)); + erts_mtx_unlock(fd_mtx(fd)); if (stop_select_fn) { int was_unmasked = erts_block_fpe(); DTRACE1(driver_stop_select, name); @@ -1063,155 +934,245 @@ done_unknown: } if (free_select) free_drv_select_data(free_select); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif + if (free_nif) + free_nif_select_data(free_nif); + + ERTS_MSACC_POP_STATE(); + return ret; } int -ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, - ErlDrvEvent e, - ErlDrvEventData event_data) +enif_select(ErlNifEnv* env, + ErlNifEvent e, + enum ErlNifSelectFlags mode, + void* obj, + const ErlNifPid* pid, + Eterm ref) { -#if !ERTS_CIO_HAVE_DRV_EVENT - return -1; -#else + int on; + ErtsResource* resource = DATA_TO_RESOURCE(obj); ErtsSysFdType fd = (ErtsSysFdType) e; - ErtsPollEvents events; - ErtsPollEvents add_events; - ErtsPollEvents remove_events; - Eterm id = erts_drvport2id(ix); + ErtsPollEvents ctl_events = (ErtsPollEvents) 0; + ErtsPollEvents old_events; + ErtsPollOp ctl_op = ERTS_POLL_OP_MOD; ErtsDrvEventState *state; - int do_wake = 0; - int ret; -#if ERTS_CIO_HAVE_DRV_EVENT - ErtsDrvEventDataState *free_event; -#endif - ErtsDrvSelectDataState *free_select; - Port *prt = erts_drvport2port(ix); - - if (prt == ERTS_INVALID_ERL_DRV_PORT) - return -1; + int ret, wake_poller = 0; + enum { NO_STOP=0, CALL_STOP, CALL_STOP_AND_RELEASE } call_stop = NO_STOP; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; - ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt)); + ASSERT(!resource->monitors); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) { - if (fd < 0) - return -1; - if (fd >= max_fds) { - event_large_fd_error(ix, fd, event_data); - return -1; - } - grow_drv_ev_state(fd); + if (!grow_drv_ev_state(fd)) { + if (fd > 0) nif_select_large_fd_error(fd, mode, resource, ref); + return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; } #endif - erts_smp_mtx_lock(fd_mtx(fd)); + erts_mtx_lock(fd_mtx(fd)); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[(int) fd]; -#else - /* Could use hash_new directly, but want to keep the normal case fast */ - state = hash_get_drv_ev_state(fd); - if (state == NULL) { - state = hash_new_drv_ev_state(fd); + state = get_drv_ev_state(fd); /* may be NULL! */ + + DEBUG_PRINT_FD("enif_select(%T, %d, %s, %p, %T, %T)", + state, env->proc->common.id, fd, nifmode2str(mode), resource, + pid ? pid->pid : THE_NON_VALUE, ref); + + if (mode & ERL_NIF_SELECT_STOP) { + ASSERT(resource->type->stop); + if (IS_FD_UNKNOWN(state)) { + /* fast track to stop callback */ + call_stop = CALL_STOP; + ret = ERL_NIF_SELECT_STOP_CALLED; + goto done_unknown; + } + on = 0; + mode = ERL_DRV_READ | ERL_DRV_WRITE | ERL_DRV_USE; + ctl_events = ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT; + ctl_op = ERTS_POLL_OP_DEL; } -#endif + else { + on = 1; + ASSERT(mode); + if (mode & ERL_DRV_READ) { + ctl_events |= ERTS_POLL_EV_IN; + } + if (mode & ERL_DRV_WRITE) { + ctl_events |= ERTS_POLL_EV_OUT; + } + } + + state = new_drv_ev_state(state,fd); switch (state->type) { - case ERTS_EV_TYPE_DRV_EV: - if (state->driver.event->port == id) break; - /*fall through*/ + case ERTS_EV_TYPE_NIF: + /* + * Changing resource is considered stealing. + * Changing process and/or ref is ok (I think?). + */ + if (state->driver.stop.resource != resource) + nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, ref); + break; case ERTS_EV_TYPE_DRV_SEL: - event_steal(ix, state, event_data); - break; + nif_select_steal(state, mode, resource, ref); + break; case ERTS_EV_TYPE_STOP_USE: { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_event_op(dsbufp, ix, fd, event_data); - steal_pending_stop_select(dsbufp, ix, state, 0, 1); - break; - } - } - - ASSERT(state->type == ERTS_EV_TYPE_DRV_EV - || state->type == ERTS_EV_TYPE_NONE); + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_nif_select_op(dsbufp, fd, mode, resource, ref); + steal_pending_stop_use(dsbufp, ERTS_INVALID_ERL_DRV_PORT, state, mode, on); + ASSERT(state->type == ERTS_EV_TYPE_NONE); + break; + } + case ERTS_EV_TYPE_STOP_NIF: { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_nif_select_op(dsbufp, fd, mode, resource, ref); + steal_pending_stop_nif(dsbufp, resource, state, mode, on); + if (state->type == ERTS_EV_TYPE_STOP_NIF) { + ret = ERL_NIF_SELECT_STOP_SCHEDULED; /* ?? */ + goto done; + } + ASSERT(state->type == ERTS_EV_TYPE_NONE); + break; + } + default: break; + } + + ASSERT((state->type == ERTS_EV_TYPE_NIF) || + (state->type == ERTS_EV_TYPE_NONE && !state->events)); - events = state->events; + old_events = state->events; - if (!event_data) { - remove_events = events; - add_events = 0; + if (on) { + ctl_events &= ~old_events; + state->events |= ctl_events; + state->active_events |= ctl_events; + if (state->type == ERTS_EV_TYPE_NONE) + ctl_op = ERTS_POLL_OP_ADD; } else { - remove_events = ~event_data->events & events; - add_events = ~events & event_data->events; + ctl_events &= old_events; + state->events &= ~ctl_events; + state->active_events &= ~ctl_events; } - if (add_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (remove_events) { - events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake); - if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - ret = -1; - goto done; - } - } - if (event_data && event_data->events != 0) { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - state->driver.event->removed_events &= ~add_events; - state->driver.event->removed_events |= remove_events; - } - else { - if (!state->driver.event) - state->driver.event = alloc_drv_event_data(); - state->driver.event->port = id; - state->driver.event->removed_events = (ErtsPollEvents) 0; - state->type = ERTS_EV_TYPE_DRV_EV; - } - state->driver.event->data = event_data; - } - else { - if (state->type == ERTS_EV_TYPE_DRV_EV) { - abort_tasks(state, 0); - state->driver.event->port = NIL; - state->driver.event->data = NULL; - state->driver.event->removed_events = (ErtsPollEvents) 0; - } - state->type = ERTS_EV_TYPE_NONE; - remember_removed(state, &pollset); + if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { + ErtsPollEvents new_events; + + new_events = erts_io_control_wakeup(state, ctl_op, + state->active_events, + &wake_poller); + + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->type == ERTS_EV_TYPE_NIF && !old_events) { + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.nif->in.pid = NIL; + state->driver.nif->out.pid = NIL; + state->driver.stop.resource = NULL; + } + ret = INT_MIN | ERL_NIF_SELECT_FAILED; + goto done; + } + ASSERT(new_events == state->events); } - state->events = events; - ASSERT(event_data ? events == event_data->events : events == 0); - ret = 0; + ASSERT(state->type == ERTS_EV_TYPE_NIF + || state->type == ERTS_EV_TYPE_NONE); + + if (on) { + const Eterm recipient = pid ? pid->pid : env->proc->common.id; + Uint32* refn; + if (!state->driver.nif) + state->driver.nif = alloc_nif_select_data(); + if (state->type == ERTS_EV_TYPE_NONE) { + state->type = ERTS_EV_TYPE_NIF; + state->driver.stop.resource = resource; + enif_keep_resource(resource->data); + } + ASSERT(state->type == ERTS_EV_TYPE_NIF); + ASSERT(state->driver.stop.resource == resource); + if (mode & ERL_DRV_READ) { + state->driver.nif->in.pid = recipient; + if (is_immed(ref)) { + state->driver.nif->in.immed = ref; + } else { + ASSERT(is_internal_ref(ref)); + refn = internal_ref_numbers(ref); + state->driver.nif->in.immed = THE_NON_VALUE; + sys_memcpy(state->driver.nif->in.refn, refn, + sizeof(state->driver.nif->in.refn)); + } + } + if (mode & ERL_DRV_WRITE) { + state->driver.nif->out.pid = recipient; + if (is_immed(ref)) { + state->driver.nif->out.immed = ref; + } else { + ASSERT(is_internal_ref(ref)); + refn = internal_ref_numbers(ref); + state->driver.nif->out.immed = THE_NON_VALUE; + sys_memcpy(state->driver.nif->out.refn, refn, + sizeof(state->driver.nif->out.refn)); + } + } + ret = 0; + } + else { /* off */ + if (state->type == ERTS_EV_TYPE_NIF) { + state->driver.nif->in.pid = NIL; + state->driver.nif->out.pid = NIL; + } + ASSERT(state->events==0); + if (!wake_poller) { + /* + * Safe to close fd now as it is not in pollset + * or there was no need to eject fd (kernel poll) + */ + if (state->type == ERTS_EV_TYPE_NIF) { + ASSERT(state->driver.stop.resource == resource); + call_stop = CALL_STOP_AND_RELEASE; + state->driver.stop.resource = NULL; + } + else { + ASSERT(!state->driver.stop.resource); + call_stop = CALL_STOP; + } + state->type = ERTS_EV_TYPE_NONE; + ret = ERL_NIF_SELECT_STOP_CALLED; + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + if (state->type == ERTS_EV_TYPE_NONE) { + ASSERT(!state->driver.stop.resource); + state->driver.stop.resource = resource; + enif_keep_resource(resource); + } + state->type = ERTS_EV_TYPE_STOP_NIF; + ret = ERL_NIF_SELECT_STOP_SCHEDULED; + } + } done: check_fd_cleanup(state, -#if ERTS_CIO_HAVE_DRV_EVENT - &free_event, -#endif - &free_select); - - erts_smp_mtx_unlock(fd_mtx(fd)); + &free_select, + &free_nif); +done_unknown: + erts_mtx_unlock(fd_mtx(fd)); + if (call_stop) { + erts_resource_stop(resource, (ErlNifEvent)fd, 1); + if (call_stop == CALL_STOP_AND_RELEASE) { + enif_release_resource(resource->data); + } + } if (free_select) free_drv_select_data(free_select); -#if ERTS_CIO_HAVE_DRV_EVENT - if (free_event) - free_drv_event_data(free_event); -#endif + if (free_nif) + free_nif_select_data(free_nif); return ret; -#endif } static ERTS_INLINE int @@ -1240,13 +1201,14 @@ need2steal(ErtsDrvEventState *state, int mode) state, ERL_DRV_WRITE); break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: - do_steal |= chk_stale(state->driver.event->port, state, 0); - break; -#endif + case ERTS_EV_TYPE_NIF: + ASSERT(state->driver.stop.resource); + do_steal = 1; + break; + case ERTS_EV_TYPE_STOP_USE: - ASSERT(0); + case ERTS_EV_TYPE_STOP_NIF: + ASSERT(0); break; default: break; @@ -1277,7 +1239,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) static void steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) { - erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd); switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { int deselect_mode = 0; @@ -1301,49 +1263,52 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) if (deselect_mode) deselect(state, deselect_mode); else { - erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd)); - ASSERT(0); - } - erts_dsprintf(dsbufp, "\n"); - break; - } -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { - Eterm eid = state->driver.event->port; - if (is_nil(eid)) { erts_dsprintf(dsbufp, "no one", (int) state->fd); ASSERT(0); } - else { - erts_dsprintf(dsbufp, "event driver "); - print_driver_name(dsbufp, eid); - erts_dsprintf(dsbufp, "%T ", eid); - } erts_dsprintf(dsbufp, "\n"); - deselect(state, 0); break; } -#endif - case ERTS_EV_TYPE_STOP_USE: { + case ERTS_EV_TYPE_NIF: { + Eterm iid = state->driver.nif->in.pid; + Eterm oid = state->driver.nif->out.pid; + const char* with = "with"; + ErlNifResourceType* rt = state->driver.stop.resource->type; + + erts_dsprintf(dsbufp, "resource %T:%T", rt->module, rt->name); + + if (is_not_nil(iid)) { + erts_dsprintf(dsbufp, " %s in-pid %T", with, iid); + with = "and"; + } + if (is_not_nil(oid)) { + erts_dsprintf(dsbufp, " %s out-pid %T", with, oid); + } + deselect(state, 0); + erts_dsprintf(dsbufp, "\n"); + break; + } + case ERTS_EV_TYPE_STOP_USE: + case ERTS_EV_TYPE_STOP_NIF: { ASSERT(0); break; } default: - erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd)); + erts_dsprintf(dsbufp, "no one\n", (int) state->fd); ASSERT(0); } } static void -print_select_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) +print_drv_select_op(erts_dsprintf_buf_t *dsbufp, + ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) { Port *pp = erts_drvport2port(ix); erts_dsprintf(dsbufp, "driver_select(%p, %d,%s%s%s%s, %d) " "by ", ix, - (int) GET_FD(fd), + (int) fd, mode & ERL_DRV_READ ? " ERL_DRV_READ" : "", mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "", mode & ERL_DRV_USE ? " ERL_DRV_USE" : "", @@ -1354,11 +1319,40 @@ print_select_op(erts_dsprintf_buf_t *dsbufp, } static void -select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on) +print_nif_select_op(erts_dsprintf_buf_t *dsbufp, + ErtsSysFdType fd, int mode, + ErtsResource* resource, Eterm ref) +{ + erts_dsprintf(dsbufp, + "enif_select(_, %d,%s%s%s, %T:%T, %T) ", + (int) fd, + mode & ERL_NIF_SELECT_READ ? " READ" : "", + mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "", + mode & ERL_NIF_SELECT_STOP ? " STOP" : "", + resource->type->module, + resource->type->name, + ref); +} + + +static void +drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on) { if (need2steal(state, mode)) { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_select_op(dsbufp, ix, state->fd, mode, on); + print_drv_select_op(dsbufp, ix, state->fd, mode, on); + steal(dsbufp, state, mode); + erts_send_error_to_logger_nogl(dsbufp); + } +} + +static void +nif_select_steal(ErtsDrvEventState *state, int mode, + ErtsResource* resource, Eterm ref) +{ + if (need2steal(state, mode)) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_nif_select_op(dsbufp, state->fd, mode, resource, ref); steal(dsbufp, state, mode); erts_send_error_to_logger_nogl(dsbufp); } @@ -1370,14 +1364,24 @@ large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd) { erts_dsprintf(dsbufp, "fd=%d is larger than the largest allowed fd=%d\n", - (int) fd, max_fds - 1); + (int) fd, drv_ev_state.max_fds - 1); } static void -select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) +drv_select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_select_op(dsbufp, ix, fd, mode, on); + print_drv_select_op(dsbufp, ix, fd, mode, on); + erts_dsprintf(dsbufp, "failed: "); + large_fd_error_common(dsbufp, fd); + erts_send_error_to_logger_nogl(dsbufp); +} +static void +nif_select_large_fd_error(ErtsSysFdType fd, int mode, + ErtsResource* resource, Eterm ref) +{ + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + print_nif_select_op(dsbufp, fd, mode, resource, ref); erts_dsprintf(dsbufp, "failed: "); large_fd_error_common(dsbufp, fd); erts_send_error_to_logger_nogl(dsbufp); @@ -1387,97 +1391,87 @@ select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on) static void -steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, - ErtsDrvEventState *state, int mode, int on) +steal_pending_stop_use(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, + ErtsDrvEventState *state, int mode, int on) { + int cancel = 0; ASSERT(state->type == ERTS_EV_TYPE_STOP_USE); - erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select " - "was called for driver %s\n", - (int) GET_FD(state->fd), state->driver.drv_ptr->name); - erts_send_error_to_logger_nogl(dsbufp); if (on) { /* Either fd-owner changed its mind about closing * or closed fd before stop_select callback and fd is now reused. * In either case stop_select should not be called. - */ - state->type = ERTS_EV_TYPE_NONE; - state->flags &= ~ERTS_EV_FLAG_USED; - if (state->driver.drv_ptr->handle) { - erts_ddll_dereference_driver(state->driver.drv_ptr->handle); - } - state->driver.drv_ptr = NULL; + */ + cancel = 1; } else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { Port *prt = erts_drvport2port(ix); - erts_driver_t* drv_ptr = prt != ERTS_INVALID_ERL_DRV_PORT ? prt->drv_ptr : NULL; - if (drv_ptr && drv_ptr != state->driver.drv_ptr) { - /* Some other driver wants the stop_select callback */ - if (state->driver.drv_ptr->handle) { - erts_ddll_dereference_driver(state->driver.drv_ptr->handle); - } - if (drv_ptr->handle) { - erts_ddll_reference_referenced_driver(drv_ptr->handle); - } - state->driver.drv_ptr = drv_ptr; - } + if (prt == ERTS_INVALID_ERL_DRV_PORT + || prt->drv_ptr != state->driver.stop.drv_ptr) { + /* Some other driver or nif wants the stop_select callback */ + cancel = 1; + } + } + + if (cancel) { + erts_dsprintf(dsbufp, "called before stop_select was called for driver '%s'\n", + state->driver.stop.drv_ptr->name); + if (state->driver.stop.drv_ptr->handle) { + erts_ddll_dereference_driver(state->driver.stop.drv_ptr->handle); + } + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.stop.drv_ptr = NULL; } - + else { + erts_dsprintf(dsbufp, "ignored repeated call\n"); + } + erts_send_error_to_logger_nogl(dsbufp); } - -#if ERTS_CIO_HAVE_DRV_EVENT - static void -print_event_op(erts_dsprintf_buf_t *dsbufp, - ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) +steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource, + ErtsDrvEventState *state, int mode, int on) { - Port *pp = erts_drvport2port(ix); - erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd); - if (!event_data) - erts_dsprintf(dsbufp, "NULL"); - else - erts_dsprintf(dsbufp, "{0x%x, 0x%x}", - (unsigned int) event_data->events, - (unsigned int) event_data->revents); - erts_dsprintf(dsbufp, ") by "); - if (pp != ERTS_INVALID_ERL_DRV_PORT) - print_driver_name(dsbufp, pp->common.id); - erts_dsprintf(dsbufp, "driver %T ", pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL); -} + int cancel = 0; -static void -event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data) -{ - if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) { - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_event_op(dsbufp, ix, state->fd, event_data); - steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE); - erts_send_error_to_logger_nogl(dsbufp); + ASSERT(state->type == ERTS_EV_TYPE_STOP_NIF); + ASSERT(state->driver.stop.resource); + + if (on) { + ASSERT(mode & (ERL_NIF_SELECT_READ | ERL_NIF_SELECT_WRITE)); + /* Either fd-owner changed its mind about closing + * or closed fd before stop callback and fd is now reused. + * In either case, stop should not be called. + */ + cancel = 1; } - else if (state->type == ERTS_EV_TYPE_DRV_SEL) { - ASSERT(state->flags & ERTS_EV_FLAG_USED); - deselect(state, 0); + else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE + && resource != state->driver.stop.resource) { + /* Some driver or other resource wants the stop callback */ + cancel = 1; } -} -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS -static void -event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data) -{ - erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_event_op(dsbufp, ix, fd, event_data); - erts_dsprintf(dsbufp, "failed: "); - large_fd_error_common(dsbufp, fd); + if (cancel) { + ErlNifResourceType* rt = state->driver.stop.resource->type; + erts_dsprintf(dsbufp, "called before stop was called for NIF resource %T:%T\n", + rt->module, rt->name); + + enif_release_resource(state->driver.stop.resource->data); + state->type = ERTS_EV_TYPE_NONE; + state->flags = 0; + state->driver.stop.resource = NULL; + } + else { + erts_dsprintf(dsbufp, "ignored repeated call\n"); + } erts_send_error_to_logger_nogl(dsbufp); + } -#endif -#endif static ERTS_INLINE int io_task_schedule_allowed(ErtsDrvEventState *state, - ErtsPortTaskType type, - erts_aint_t current_cio_time) + ErtsPortTaskType type) { ErtsIoTask *io_task; @@ -1485,179 +1479,159 @@ io_task_schedule_allowed(ErtsDrvEventState *state, case ERTS_PORT_TASK_INPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->iniotask; break; case ERTS_PORT_TASK_OUTPUT: if (!state->driver.select) return 0; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - return 0; -#endif io_task = &state->driver.select->outiotask; break; -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_PORT_TASK_EVENT: - if (!state->driver.event) - return 0; - if (state->driver.select) - return 0; - io_task = &state->driver.event->iotask; - break; -#endif default: ERTS_INTERNAL_ERROR("Invalid I/O-task type"); return 0; } - return !is_iotask_active(io_task, current_cio_time); + return !is_iotask_active(io_task); } static ERTS_INLINE void -iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +iready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_INPUT, - current_cio_time)) { + ERTS_PORT_TASK_INPUT)) { ErtsIoTask *iotask = &state->driver.select->iniotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); - } - add_active_fd(state->fd); + } else { + DEBUG_PRINT_FD("schedule ready_input(%T, %d)", + state, id, state->fd); + } } } static ERTS_INLINE void -oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) +oready(Eterm id, ErtsDrvEventState *state) { if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_OUTPUT, - current_cio_time)) { + ERTS_PORT_TASK_OUTPUT)) { ErtsIoTask *iotask = &state->driver.select->outiotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + 0) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); - } - add_active_fd(state->fd); + } else { + DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); + } } } -#if ERTS_CIO_HAVE_DRV_EVENT static ERTS_INLINE void -eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data, - erts_aint_t current_cio_time) +send_event_tuple(struct erts_nif_select_event* e, ErtsResource* resource, + Eterm event_atom) { - if (io_task_schedule_allowed(state, - ERTS_PORT_TASK_EVENT, - current_cio_time)) { - ErtsIoTask *iotask = &state->driver.event->iotask; - erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); - if (erts_port_task_schedule(id, - &iotask->task, - ERTS_PORT_TASK_EVENT, - (ErlDrvEvent) state->fd, - event_data) != 0) { - stale_drv_select(id, state, 0); - } - add_active_fd(state->fd); + Process* rp = erts_proc_lookup(e->pid); + ErtsProcLocks rp_locks = 0; + ErtsMessage* mp; + ErlOffHeap* ohp; + ErtsBinary* bin; + Eterm* hp; + Uint hsz; + Eterm resource_term, ref_term, tuple; + + if (!rp) { + return; } -} -#endif -static void bad_fd_in_pollset( ErtsDrvEventState *, Eterm, Eterm, ErtsPollEvents); + bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -void -ERTS_CIO_EXPORT(erts_check_io_async_sig_interrupt)(void) -{ - ERTS_CIO_POLL_AS_INTR(pollset.ps); + /* {select, Resource, Ref, EventAtom} */ + if (is_value(e->immed)) { + hsz = 5 + ERTS_MAGIC_REF_THING_SIZE; + } + else { + hsz = 5 + ERTS_MAGIC_REF_THING_SIZE + ERTS_REF_THING_SIZE; + } + + mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp); + + resource_term = erts_mk_magic_ref(&hp, ohp, &bin->binary); + if (is_value(e->immed)) { + ASSERT(is_immed(e->immed)); + ref_term = e->immed; + } + else { + write_ref_thing(hp, e->refn[0], e->refn[1], e->refn[2]); + ref_term = make_internal_ref(hp); + hp += ERTS_REF_THING_SIZE; + } + tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom); + + erts_queue_message(rp, rp_locks, mp, tuple, am_system); + + if (rp_locks) + erts_proc_unlock(rp, rp_locks); } -#endif + +static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport); void -ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set) +erts_check_io_interrupt(ErtsPollThread *psi, int set) { - ERTS_CIO_POLL_INTR(pollset.ps, set); + if (psi) { +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback_pollset()) { + erts_poll_interrupt_flbk(psi->ps, set); + return; + } +#endif + erts_poll_interrupt(psi->ps, set); + } } -void -ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, - ErtsMonotonicTime timeout_time) -{ - ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, timeout_time); +ErtsPollThread * +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; + return psiv+id; } void -ERTS_CIO_EXPORT(erts_check_io)(int do_wait) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { - ErtsPollResFd *pollres; int pollres_len; - ErtsMonotonicTime timeout_time; int poll_ret, i; - erts_aint_t current_cio_time; - ErtsSchedulerData *esdp = erts_get_scheduler_data(); - - ASSERT(esdp); + ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO); restart: -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - - /* Figure out timeout value */ - timeout_time = (do_wait - ? erts_check_next_timeout_time(esdp) - : ERTS_POLL_NO_TIMEOUT /* poll only */); - - /* - * No need for an atomic inc op when incrementing - * erts_check_io_time, since only one thread can - * check io at a time. - */ - current_cio_time = erts_smp_atomic_read_dirty(&erts_check_io_time); - current_cio_time++; - erts_smp_atomic_set_relb(&erts_check_io_time, current_cio_time); - - check_cleanup_active_fds(current_cio_time); - #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - pollres_len = erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN; + pollres_len = psi->pollres_len; - pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len); +#if ERTS_POLL_USE_FALLBACK + if (psi->ps == get_fallback_pollset()) { - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); - poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, timeout_time); + } else +#endif + { + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); + } #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif -#ifdef ERTS_BREAK_REQUESTED - if (ERTS_BREAK_REQUESTED) - erts_do_break_handling(); -#endif - if (poll_ret != 0) { - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); - forget_removed(&pollset); - erts_free(ERTS_ALC_T_TMP, pollres); + if (poll_ret == EAGAIN) { goto restart; } @@ -1673,130 +1647,227 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) erl_errno_id(poll_ret), poll_ret); erts_send_error_to_logger_nogl(dsbufp); } + ERTS_MSACC_POP_STATE(); return; } for (i = 0; i < pollres_len; i++) { - ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd; + erts_driver_t* drv_ptr = NULL; + ErtsResource* resource = NULL; + ErtsDrvSelectDataState *free_select = NULL; + ErtsNifSelectDataState *free_nif = NULL; + ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - erts_smp_mtx_lock(fd_mtx(fd)); + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; + + erts_mtx_lock(fd_mtx(fd)); + + state = get_drv_ev_state(fd); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - state = &drv_ev_state[ (int) fd]; -#else - state = hash_get_drv_ev_state(fd); if (!state) { - goto next_pollres; + erts_mtx_unlock(fd_mtx(fd)); + continue; } -#endif - /* Skip this fd if it was removed from pollset */ - if (is_removed(state)) { - goto next_pollres; - } + DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); + + if (revents & ERTS_POLL_EV_ERR) { + /* + * Handle error events by triggering all in/out events + * that has been selected on. + * We *do not* want to call a callback that corresponds + * to an event not selected. + */ + revents = state->active_events; + state->active_events = 0; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + } else { + + /* Disregard any events that are not active at the moment, + for instance this could happen if the driver/nif does + select/deselect in rapid succession. */ + revents &= state->active_events | ERTS_POLL_EV_NVAL; + + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; + + reactive_events = state->active_events; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; + + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } + } + } + } switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */ - ErtsPollEvents revents; - ErtsPollEvents revent_mask; - - revent_mask = ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT); - revent_mask |= state->events; - revents = pollres[i].events & revent_mask; - - if (revents & ERTS_POLL_EV_ERR) { - /* - * Let the driver handle the error condition. Only input, - * only output, or nothing might have been selected. - * We *do not* want to call a callback that corresponds - * to an event not selected. revents might give us a clue - * on which one to call. - */ - if ((revents & ERTS_POLL_EV_IN) - || (!(revents & ERTS_POLL_EV_OUT) - && state->events & ERTS_POLL_EV_IN)) { - iready(state->driver.select->inport, state, current_cio_time); - } - else if (state->events & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state, current_cio_time); - } - } - else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { + + if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state, current_cio_time); + oready(state->driver.select->outport, state); } /* Someone might have deselected input since revents was read (true also on the non-smp emulator since oready() may have been called); therefore, update revents... */ - revents &= ~(~state->events & ERTS_POLL_EV_IN); + revents &= state->events; if (revents & ERTS_POLL_EV_IN) { - iready(state->driver.select->inport, state, current_cio_time); + iready(state->driver.select->inport, state); } } else if (revents & ERTS_POLL_EV_NVAL) { bad_fd_in_pollset(state, - state->driver.select->inport, - state->driver.select->outport, - state->events); - add_active_fd(state->fd); - } - break; - } - -#if ERTS_CIO_HAVE_DRV_EVENT - case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */ - ErlDrvEventData event_data; - ErtsPollEvents revents; - ASSERT(state->driver.event); - ASSERT(state->driver.event->data); - event_data = state->driver.event->data; - revents = pollres[i].events; - revents &= ~state->driver.event->removed_events; - - if (revents) { - event_data->events = state->events; - event_data->revents = revents; - eready(state->driver.event->port, state, event_data, current_cio_time); + state->driver.select->inport, + state->driver.select->outport); + check_fd_cleanup(state, &free_select, &free_nif); } break; } -#endif + case ERTS_EV_TYPE_NIF: { /* Requested via enif_select()... */ + struct erts_nif_select_event in = {NIL}; + struct erts_nif_select_event out = {NIL}; + ErtsResource* resource = NULL; + + if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { + if (revents & ERTS_POLL_EV_OUT) { + if (is_not_nil(state->driver.nif->out.pid)) { + out = state->driver.nif->out; + resource = state->driver.stop.resource; + state->driver.nif->out.pid = NIL; + } + } + if (revents & ERTS_POLL_EV_IN) { + if (is_not_nil(state->driver.nif->in.pid)) { + in = state->driver.nif->in; + resource = state->driver.stop.resource; + state->driver.nif->in.pid = NIL; + } + } + state->events &= ~revents; + } + else if (revents & ERTS_POLL_EV_NVAL) { + bad_fd_in_pollset(state, NIL, NIL); + check_fd_cleanup(state, &free_select, &free_nif); + } + + erts_mtx_unlock(fd_mtx(fd)); + + if (is_not_nil(in.pid)) { + send_event_tuple(&in, resource, am_ready_input); + } + if (is_not_nil(out.pid)) { + send_event_tuple(&out, resource, am_ready_output); + } + continue; + } + + case ERTS_EV_TYPE_STOP_NIF: { + resource = state->driver.stop.resource; + state->type = ERTS_EV_TYPE_NONE; + goto case_ERTS_EV_TYPE_NONE; + } + + case ERTS_EV_TYPE_STOP_USE: { +#if ERTS_POLL_USE_FALLBACK + ASSERT(psi->ps == get_fallback_pollset()); +#endif + drv_ptr = state->driver.stop.drv_ptr; + state->type = ERTS_EV_TYPE_NONE; + /* fallthrough */ case ERTS_EV_TYPE_NONE: /* Deselected ... */ + case_ERTS_EV_TYPE_NONE: + ASSERT(!state->events && !state->active_events && !state->flags); + check_fd_cleanup(state, &free_select, &free_nif); break; + } default: { /* Error */ erts_dsprintf_buf_t *dsbufp; dsbufp = erts_create_logger_dsbuf(); erts_dsprintf(dsbufp, "Invalid event request type for fd in erts_poll()! " - "fd=%d, event request type=%sd\n", (int) state->fd, + "fd=%d, event request type=%d\n", (int) state->fd, (int) state->type); ASSERT(0); deselect(state, 0); - add_active_fd(state->fd); break; } } - next_pollres:; -#ifdef ERTS_SMP - erts_smp_mtx_unlock(fd_mtx(fd)); -#endif - } + erts_mtx_unlock(fd_mtx(fd)); - erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); - erts_free(ERTS_ALC_T_TMP, pollres); - forget_removed(&pollset); + if (drv_ptr) { + int was_unmasked = erts_block_fpe(); + DTRACE1(driver_stop_select, drv_ptr->name); + LTTNG1(driver_stop_select, drv_ptr->name); + (*drv_ptr->stop_select)((ErlDrvEvent) fd, NULL); + erts_unblock_fpe(was_unmasked); + if (drv_ptr->handle) { + erts_ddll_dereference_driver(drv_ptr->handle); + } + } + if (resource) { + erts_resource_stop(resource, (ErlNifEvent)fd, 0); + enif_release_resource(resource->data); + } + if (free_select) + free_drv_select_data(free_select); + if (free_nif) + free_nif_select_data(free_nif); + } + + /* The entire pollres array was filled with events, + * grow it for the next call. We do this for two reasons: + * 1. Pulling out more events in on go will increase throughput + * 2. If the polling implementation is not fair, this will make + * sure that we get all fds that we can. i.e. if 12 fds are + * constantly active, but we only have a pollres_len of 10, + * two of the fds may never be triggered depending on what the + * kernel decides to do. + **/ + if (pollres_len == psi->pollres_len) { + int ev_state_len = drv_ev_state_len(); + erts_free(ERTS_ALC_T_POLLSET, psi->pollres); + psi->pollres_len *= 2; + /* Never grow it larger than the current drv_ev_state.len size */ + if (psi->pollres_len > ev_state_len) + psi->pollres_len = ev_state_len; + psi->pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * psi->pollres_len); + } + + ERTS_MSACC_POP_STATE(); } static void -bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, - Eterm outport, ErtsPollEvents events) +bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, Eterm outport) { + ErtsPollEvents events = state->events; erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); if (events & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { @@ -1820,27 +1891,36 @@ bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, erts_dsprintf(dsbufp, "Bad %s fd in erts_poll()! fd=%d, ", io_str, (int) state->fd); - if (is_nil(port)) { - ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT); - ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT); - erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n", - is_nil(inport) ? am_undefined : inport, - is_nil(outport) ? am_undefined : outport, - ipnp->driver_name ? ipnp->driver_name : "<unknown>", - opnp->driver_name ? opnp->driver_name : "<unknown>", - ipnp->name ? ipnp->name : "<unknown>", - opnp->name ? opnp->name : "<unknown>"); - erts_free_port_names(ipnp); - erts_free_port_names(opnp); - } - else { - ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT); - erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n", - is_nil(port) ? am_undefined : port, - pnp->driver_name ? pnp->driver_name : "<unknown>", - pnp->name ? pnp->name : "<unknown>"); - erts_free_port_names(pnp); - } + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + if (is_nil(port)) { + ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT); + ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n", + is_nil(inport) ? am_undefined : inport, + is_nil(outport) ? am_undefined : outport, + ipnp->driver_name ? ipnp->driver_name : "<unknown>", + opnp->driver_name ? opnp->driver_name : "<unknown>", + ipnp->name ? ipnp->name : "<unknown>", + opnp->name ? opnp->name : "<unknown>"); + erts_free_port_names(ipnp); + erts_free_port_names(opnp); + } + else { + ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n", + is_nil(port) ? am_undefined : port, + pnp->driver_name ? pnp->driver_name : "<unknown>", + pnp->name ? pnp->name : "<unknown>"); + erts_free_port_names(pnp); + } + } + else { + ErlNifResourceType* rt; + ASSERT(state->type == ERTS_EV_TYPE_NIF); + ASSERT(state->driver.stop.resource); + rt = state->driver.stop.resource->type; + erts_dsprintf(dsbufp, "resource={%T,%T}\n", rt->module, rt->name); + } } else { erts_dsprintf(dsbufp, "Bad fd in erts_poll()! fd=%d\n", (int) state->fd); @@ -1875,16 +1955,16 @@ static int drv_ev_state_cmp(void *des1, void *des2) static void *drv_ev_state_alloc(void *des_tmpl) { ErtsDrvEventState *evstate; - erts_smp_spin_lock(&state_prealloc_lock); - if (state_prealloc_first == NULL) { - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + if (drv_ev_state.prealloc_first == NULL) { + erts_spin_unlock(&drv_ev_state.prealloc_lock); evstate = (ErtsDrvEventState *) erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState)); } else { - evstate = state_prealloc_first; - state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next; - --num_state_prealloc; - erts_smp_spin_unlock(&state_prealloc_lock); + evstate = drv_ev_state.prealloc_first; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) evstate->hb.next; + --drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } /* XXX: Already valid data if prealloced, could ignore template! */ *evstate = *((ErtsDrvEventState *) des_tmpl); @@ -1894,59 +1974,214 @@ static void *drv_ev_state_alloc(void *des_tmpl) static void drv_ev_state_free(void *des) { - erts_smp_spin_lock(&state_prealloc_lock); - ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb; - state_prealloc_first = (ErtsDrvEventState *) des; - ++num_state_prealloc; - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + ((ErtsDrvEventState *) des)->hb.next = &drv_ev_state.prealloc_first->hb; + drv_ev_state.prealloc_first = (ErtsDrvEventState *) des; + ++drv_ev_state.num_prealloc; + erts_spin_unlock(&drv_ev_state.prealloc_lock); } #endif -void -ERTS_CIO_EXPORT(erts_init_check_io)(void) +#define ERTS_MAX_NO_OF_POLL_THREADS ERTS_MAX_NO_OF_SCHEDULERS + +static char * +get_arg(char* rest, char** argv, int* ip) { - erts_smp_atomic_init_nob(&erts_check_io_time, 0); - erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0); - - ERTS_CIO_POLL_INIT(); - pollset.ps = ERTS_CIO_NEW_POLLSET(); - - pollset.active_fd.six = 0; - pollset.active_fd.eix = 0; - erts_smp_atomic32_init_nob(&pollset.active_fd.no, 0); - pollset.active_fd.size = ERTS_ACTIVE_FD_INC; - pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR, - sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC); -#ifdef DEBUG - { - int i; - for (i = 0; i < ERTS_ACTIVE_FD_INC; i++) - pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; + int i = *ip; + if (*rest == '\0') { + if (argv[i+1] == NULL) { + erts_fprintf(stderr, "too few arguments\n"); + erts_usage(); + } + argv[i++] = NULL; + rest = argv[i]; } + argv[i] = NULL; + *ip = i; + return rest; +} + +static void +parse_args(int *argc, char **argv, int concurrent_waiters) +{ + int i = 0, j; + int no_pollsets = 0, no_poll_threads = 0, + no_pollsets_percentage = 0, + no_poll_threads_percentage = 0; + ASSERT(argc && argv); + while (i < *argc) { + if(argv[i][0] == '-') { + switch (argv[i][1]) { + case 'I': { + if (strncmp(argv[i]+2, "Ot", 2) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads) != 1 || + no_poll_threads < 1 || + ERTS_MAX_NO_OF_POLL_THREADS < no_poll_threads) { + erts_fprintf(stderr,"bad I/O poll threads number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "Op", 3) == 0) { + char *arg = get_arg(argv[i]+4, argv, &i); + if (sscanf(arg, "%d", &no_pollsets) != 1 || + no_pollsets < 1) { + erts_fprintf(stderr,"bad I/O pollset number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPt", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_poll_threads_percentage) != 1 || + no_poll_threads_percentage < 0 || + no_poll_threads_percentage > 100) { + erts_fprintf(stderr,"bad I/O poll thread percentage number: %s\n", arg); + erts_usage(); + } + } else if (strncmp(argv[i]+2, "OPp", 4) == 0) { + char *arg = get_arg(argv[i]+5, argv, &i); + if (sscanf(arg, "%d", &no_pollsets_percentage) != 1 || + no_pollsets_percentage < 0 || + no_pollsets_percentage > 100) { + erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg); + erts_usage(); + } + } else { + break; + } + break; + } + case 'K': + (void)get_arg(argv[i]+2, argv, &i); + break; + case '-': + goto args_parsed; + default: + break; + } + } + i++; + } + +args_parsed: + + if (!concurrent_waiters) { + no_pollsets = no_poll_threads; + no_pollsets_percentage = 100; + } + + if (no_poll_threads == 0) { + if (no_poll_threads_percentage == 0) + no_poll_threads = 1; /* This is the default */ + else { + no_poll_threads = erts_no_schedulers * no_poll_threads_percentage / 100; + if (no_poll_threads < 1) + no_poll_threads = 1; + } + } + + if (no_pollsets == 0) { + if (no_pollsets_percentage == 0) + no_pollsets = 1; /* This is the default */ + else { + no_pollsets = no_poll_threads * no_pollsets_percentage / 100; + if (no_pollsets < 1) + no_pollsets = 1; + } + } + + if (no_poll_threads < no_pollsets) { + erts_fprintf(stderr, + "number of IO poll threads has to be greater or equal to " + "the number of \nIO pollsets. Current values are set to: \n" + " -IOt %d -IOp %d\n", + no_poll_threads, no_pollsets); + erts_usage(); + } + + /* Handled arguments have been marked with NULL. Slide arguments + not handled towards the beginning of argv. */ + for (i = 0, j = 0; i < *argc; i++) { + if (argv[i]) + argv[j++] = argv[i]; + } + *argc = j; + + erts_no_pollsets = no_pollsets; + erts_no_poll_threads = no_poll_threads; +} + +void +erts_init_check_io(int *argc, char **argv) +{ + int j, concurrent_waiters, no_poll_threads; + ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED | + ERL_NIF_SELECT_STOP_SCHEDULED | + ERL_NIF_SELECT_INVALID_EVENT | + ERL_NIF_SELECT_FAILED)) == 0); + + + erts_poll_init(&concurrent_waiters); +#if ERTS_POLL_USE_FALLBACK + erts_poll_init_flbk(NULL); #endif + parse_args(argc, argv, concurrent_waiters); -#ifdef ERTS_SMP - init_removed_fd_alloc(); - pollset.removed_list = NULL; - erts_smp_spinlock_init(&pollset.removed_list_lock, - "pollset_rm_list"); - { - int i; - for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { -#ifdef ERTS_ENABLE_LOCK_COUNT - erts_smp_mtx_init_x(&drv_ev_state_locks[i].lck, "drv_ev_state", make_small(i)); -#else - erts_smp_mtx_init(&drv_ev_state_locks[i].lck, "drv_ev_state"); + /* Create the actual pollsets */ + pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets); + + for (j=0; j < erts_no_pollsets; j++) + pollsetv[j] = erts_poll_create_pollset(j); + + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; #endif - } - } + +#if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); + no_poll_threads++; +#endif + + psiv = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollThread) * no_poll_threads); + +#if ERTS_POLL_USE_FALLBACK + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); + psiv++; #endif + + for (j = 0; j < erts_no_poll_threads; j++) { + psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[j].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[j].ps = pollsetv[j % erts_no_pollsets]; + } + + for (j=0; j < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; j++) { + erts_mtx_init(&drv_ev_state.locks[j].lck, "drv_ev_state", make_small(j), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + } + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - max_fds = ERTS_CIO_POLL_MAX_FDS(); - erts_smp_atomic_init_nob(&drv_ev_state_len, 0); - drv_ev_state = NULL; - erts_smp_mtx_init(&drv_ev_state_grow_lock, "drv_ev_state_grow"); + drv_ev_state.max_fds = erts_poll_max_fds(); + erts_atomic_init_nob(&drv_ev_state.len, 0); + drv_ev_state.v = NULL; + erts_mtx_init(&drv_ev_state.grow_lock, "drv_ev_state_grow", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); #else { SafeHashFunctions hf; @@ -1954,152 +2189,196 @@ ERTS_CIO_EXPORT(erts_init_check_io)(void) hf.cmp = &drv_ev_state_cmp; hf.alloc = &drv_ev_state_alloc; hf.free = &drv_ev_state_free; - num_state_prealloc = 0; - state_prealloc_first = NULL; - erts_smp_spinlock_init(&state_prealloc_lock,"state_prealloc"); - - safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state_tab, "drv_ev_state_tab", - DRV_EV_STATE_HTAB_SIZE, hf); + drv_ev_state.num_prealloc = 0; + drv_ev_state.prealloc_first = NULL; + erts_spinlock_init(&drv_ev_state.prealloc_lock, "state_prealloc", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); + safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state.tab, "drv_ev_state_tab", + ERTS_LOCK_FLAGS_CATEGORY_IO, DRV_EV_STATE_HTAB_SIZE, hf); } #endif } int -ERTS_CIO_EXPORT(erts_check_io_max_files)(void) +erts_check_io_max_files(void) { #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - return max_fds; + return drv_ev_state.max_fds; #else - return ERTS_POLL_EXPORT(erts_poll_max_fds)(); + return erts_poll_max_fds(); #endif } Uint -ERTS_CIO_EXPORT(erts_check_io_size)(void) +erts_check_io_size(void) { - Uint res; + Uint res = 0; ErtsPollInfo pi; - ERTS_CIO_POLL_INFO(pollset.ps, &pi); - res = pi.memory_size; + int i; + +#if ERTS_POLL_USE_FALLBACK + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); + res += pi.memory_size; +#endif + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_info(pollsetv[i], &pi); + res += pi.memory_size; + } #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - res += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len); + res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - res += safe_hash_table_sz(&drv_ev_state_tab); + res += safe_hash_table_sz(&drv_ev_state.tab); { SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); + safe_hash_get_info(&hi, &drv_ev_state.tab); res += hi.objs * sizeof(ErtsDrvEventState); } - erts_smp_spin_lock(&state_prealloc_lock); - res += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + res += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif return res; } Eterm -ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) +erts_check_io_info(void *proc) { Process *p = (Process *) proc; - Eterm tags[16], values[16], res; - Uint sz, *szp, *hp, **hpp, memory_size; - Sint i; - ErtsPollInfo pi; - erts_aint_t cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); - int active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); + Eterm tags[16], values[16], res, list = NIL; + Uint sz, *szp, *hp, **hpp; + ErtsPollInfo *piv; + Sint i, j = 0, len; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; + ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); - while (1) { - erts_aint_t post_cio_time; - int post_active_fds; + piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); - ERTS_CIO_POLL_INFO(pollset.ps, &pi); +#if ERTS_POLL_USE_FALLBACK + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif - post_cio_time = erts_smp_atomic_read_mb(&erts_check_io_time); - post_active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); - if (cio_time == post_cio_time && active_fds == post_active_fds) - break; - cio_time = post_cio_time; - active_fds = post_active_fds; +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif + + for (j = 0; j < erts_no_pollsets; j++) { + erts_poll_info(pollsetv[j], &piv[j]); + piv[j].active_fds = 0; + piv[j].poll_threads = erts_no_poll_threads / erts_no_pollsets; + if (erts_no_poll_threads % erts_no_pollsets > j) + piv[j].poll_threads++; } - memory_size = pi.memory_size; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len); + i = 0; + erts_mtx_lock(&drv_ev_state.grow_lock); + len = erts_atomic_read_nob(&drv_ev_state.len); + for (i = 0; i < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { + erts_mtx_lock(&drv_ev_state.locks[i].lck); + for (j = i; j < len; j+=ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT) { + ErtsDrvEventState *state = get_drv_ev_state(j); + int pollsetid = get_pollset_id(j); + ASSERT(fd_mtx(j) == &drv_ev_state.locks[i].lck); + if (state->flags & ERTS_EV_FLAG_FALLBACK) + pollsetid = -1; + if (state->driver.select + && (state->type == ERTS_EV_TYPE_DRV_SEL) + && (is_iotask_active(&state->driver.select->iniotask) + || is_iotask_active(&state->driver.select->outiotask))) + piv[pollsetid].active_fds++; + } + erts_mtx_unlock(&drv_ev_state.locks[i].lck); + } + erts_mtx_unlock(&drv_ev_state.grow_lock); + + piv[0].memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len); #else - memory_size += safe_hash_table_sz(&drv_ev_state_tab); + piv[0].memory_size += safe_hash_table_sz(&drv_ev_state.tab); { - SafeHashInfo hi; - safe_hash_get_info(&hi, &drv_ev_state_tab); - memory_size += hi.objs * sizeof(ErtsDrvEventState); + SafeHashInfo hi; + safe_hash_get_info(&hi, &drv_ev_state.tab); + piv[0].memory_size += hi.objs * sizeof(ErtsDrvEventState); } - erts_smp_spin_lock(&state_prealloc_lock); - memory_size += num_state_prealloc * sizeof(ErtsDrvEventState); - erts_smp_spin_unlock(&state_prealloc_lock); + erts_spin_lock(&drv_ev_state.prealloc_lock); + piv[0].memory_size += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState); + erts_spin_unlock(&drv_ev_state.prealloc_lock); #endif hpp = NULL; szp = &sz; sz = 0; + piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; + bld_it: - i = 0; - tags[i] = erts_bld_atom(hpp, szp, "name"); - values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); + for (j = no_pollsets-1; j >= 0; j--) { + i = 0; - tags[i] = erts_bld_atom(hpp, szp, "primary"); - values[i++] = erts_bld_atom(hpp, szp, pi.primary); + tags[i] = erts_bld_atom(hpp, szp, "name"); + values[i++] = erts_bld_atom(hpp, szp, "erts_poll"); - tags[i] = erts_bld_atom(hpp, szp, "fallback"); - values[i++] = erts_bld_atom(hpp, szp, pi.fallback ? pi.fallback : "false"); + tags[i] = erts_bld_atom(hpp, szp, "primary"); + values[i++] = erts_bld_atom(hpp, szp, piv[j].primary); - tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); - values[i++] = erts_bld_atom(hpp, szp, - pi.kernel_poll ? pi.kernel_poll : "false"); + tags[i] = erts_bld_atom(hpp, szp, "kernel_poll"); + values[i++] = erts_bld_atom(hpp, szp, + piv[j].kernel_poll ? piv[j].kernel_poll : "false"); - tags[i] = erts_bld_atom(hpp, szp, "memory_size"); - values[i++] = erts_bld_uint(hpp, szp, memory_size); + tags[i] = erts_bld_atom(hpp, szp, "memory_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].memory_size); - tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.poll_set_size); + tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_set_size); - if (pi.fallback) { - tags[i] = erts_bld_atom(hpp, szp, "fallback_poll_set_size"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.fallback_poll_set_size); - } + tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); + values[i++] = piv[j].lazy_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "lazy_updates"); - values[i++] = pi.lazy_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].pending_updates); - if (pi.lazy_updates) { - tags[i] = erts_bld_atom(hpp, szp, "pending_updates"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.pending_updates); - } + tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); + values[i++] = piv[j].batch_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "batch_updates"); - values[i++] = pi.batch_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); + values[i++] = piv[j].concurrent_updates ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates"); - values[i++] = pi.concurrent_updates ? am_true : am_false; + tags[i] = erts_bld_atom(hpp, szp, "fallback"); + values[i++] = piv[j].is_fallback ? am_true : am_false; - tags[i] = erts_bld_atom(hpp, szp, "max_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds); + tags[i] = erts_bld_atom(hpp, szp, "max_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].max_fds); - tags[i] = erts_bld_atom(hpp, szp, "active_fds"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds); + tags[i] = erts_bld_atom(hpp, szp, "active_fds"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].active_fds); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups); + tags[i] = erts_bld_atom(hpp, szp, "poll_threads"); + values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_threads); - tags[i] = erts_bld_atom(hpp, szp, "no_avoided_interrupts"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_interrupts); + res = erts_bld_2tup_list(hpp, szp, i, tags, values); - tags[i] = erts_bld_atom(hpp, szp, "no_interrupt_timed"); - values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_interrupt_timed); -#endif - - res = erts_bld_2tup_list(hpp, szp, i, tags, values); + if (!hpp) { + *szp += 2; + } + else { + list = CONS(*hpp, res, list); + *hpp += 2; + } + } if (!hpp) { hp = HAlloc(p, sz); @@ -2108,352 +2387,454 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) goto bld_it; } - return res; + erts_free(ERTS_ALC_T_TMP, piv); + + return list; } static ERTS_INLINE ErtsPollEvents -print_events(ErtsPollEvents ev) +print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) { int first = 1; + if(ev == ERTS_POLL_EV_NONE) { + erts_dsprintf(dsbufp, "N/A"); + return 0; + } if(ev & ERTS_POLL_EV_IN) { ev &= ~ERTS_POLL_EV_IN; - erts_printf("%s%s", first ? "" : "|", "IN"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "IN"); first = 0; } if(ev & ERTS_POLL_EV_OUT) { ev &= ~ERTS_POLL_EV_OUT; - erts_printf("%s%s", first ? "" : "|", "OUT"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "OUT"); first = 0; } /* The following should not appear... */ if(ev & ERTS_POLL_EV_NVAL) { - erts_printf("%s%s", first ? "" : "|", "NVAL"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "NVAL"); first = 0; } if(ev & ERTS_POLL_EV_ERR) { - erts_printf("%s%s", first ? "" : "|", "ERR"); + erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "ERR"); first = 0; } if (ev) - erts_printf("%s0x%b32x", first ? "" : "|", (Uint32) ev); + erts_dsprintf(dsbufp, "%s0x%b32x", first ? "" : "|", (Uint32) ev); return ev; } +static ERTS_INLINE void +print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) +{ + erts_dsprintf(dsbufp, "%s", flag2str(f)); +} + +#ifdef DEBUG_PRINT_MODE + +static ERTS_INLINE char * +drvmode2str(int mode) { + switch (mode) { + case ERL_DRV_READ|ERL_DRV_USE: return "READ|USE"; + case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE"; + case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE"; + case ERL_DRV_USE: return "USE"; + case ERL_DRV_READ: return "READ"; + case ERL_DRV_WRITE: return "WRITE"; + case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE"; + default: return "UNKNOWN"; + } +} + +static ERTS_INLINE char * +nifmode2str(enum ErlNifSelectFlags mode) { + switch (mode) { + case ERL_NIF_SELECT_READ: return "READ"; + case ERL_NIF_SELECT_WRITE: return "WRITE"; + case ERL_NIF_SELECT_STOP: return "STOP"; + default: return "UNKNOWN"; + } +} + +#endif + typedef struct { int used_fds; int num_errors; int no_driver_select_structs; - int no_driver_event_structs; + int no_enif_select_structs; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS int internal_fds; ErtsPollEvents *epep; #endif } IterDebugCounters; -static void doit_erts_check_io_debug(void *vstate, void *vcounters) +static int erts_debug_print_checkio_state(erts_dsprintf_buf_t *dsbufp, + ErtsDrvEventState *state, + ErtsPollEvents ep_events, + int internal) { - ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; - IterDebugCounters *counters = (IterDebugCounters *) vcounters; - ErtsPollEvents cio_events = state->events; - ErtsSysFdType fd = state->fd; -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int internal = 0; - ErtsPollEvents ep_events = counters->epep[(int) fd]; -#endif - int err = 0; - #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) struct stat stat_buf; #endif - - if (state->driver.select) - counters->no_driver_select_structs++; -#if ERTS_CIO_HAVE_DRV_EVENT - if (state->driver.event) - counters->no_driver_event_structs++; -#endif - + ErtsSysFdType fd = state->fd; + ErtsPollEvents cio_events = state->events; + int err = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state->events || ep_events) { - if (ep_events & ERTS_POLL_EV_NVAL) { - ep_events &= ~ERTS_POLL_EV_NVAL; - internal = 1; - counters->internal_fds++; - } - else - counters->used_fds++; -#else - if (state->events) { - counters->used_fds++; + ErtsPollEvents aio_events = state->active_events; #endif - - erts_printf("fd=%d ", (int) fd); - + erts_dsprintf(dsbufp, "pollset=%d fd=%d ", + state->flags & ERTS_EV_FLAG_FALLBACK ? -1 : get_pollset_id(fd), (int) fd); + #if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE) - if (fstat((int) fd, &stat_buf) < 0) - erts_printf("type=unknown "); - else { - erts_printf("type="); + if (fstat((int) fd, &stat_buf) < 0) + erts_dsprintf(dsbufp, "type=unknown "); + else { + erts_dsprintf(dsbufp, "type="); #ifdef S_ISSOCK - if (S_ISSOCK(stat_buf.st_mode)) - erts_printf("sock "); - else + if (S_ISSOCK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "sock "); + else #endif #ifdef S_ISFIFO if (S_ISFIFO(stat_buf.st_mode)) - erts_printf("fifo "); + erts_dsprintf(dsbufp, "fifo "); else #endif #ifdef S_ISCHR - if (S_ISCHR(stat_buf.st_mode)) - erts_printf("chr "); - else + if (S_ISCHR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "chr "); + else #endif #ifdef S_ISDIR - if (S_ISDIR(stat_buf.st_mode)) - erts_printf("dir "); - else + if (S_ISDIR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "dir "); + else #endif #ifdef S_ISBLK - if (S_ISBLK(stat_buf.st_mode)) - erts_printf("blk "); - else + if (S_ISBLK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "blk "); + else #endif #ifdef S_ISREG - if (S_ISREG(stat_buf.st_mode)) - erts_printf("reg "); - else + if (S_ISREG(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "reg "); + else #endif #ifdef S_ISLNK - if (S_ISLNK(stat_buf.st_mode)) - erts_printf("lnk "); - else + if (S_ISLNK(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "lnk "); + else #endif #ifdef S_ISDOOR - if (S_ISDOOR(stat_buf.st_mode)) - erts_printf("door "); - else + if (S_ISDOOR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "door "); + else #endif #ifdef S_ISWHT - if (S_ISWHT(stat_buf.st_mode)) - erts_printf("wht "); - else + if (S_ISWHT(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "wht "); + else #endif #ifdef S_ISXATTR - if (S_ISXATTR(stat_buf.st_mode)) - erts_printf("xattr "); - else + if (S_ISXATTR(stat_buf.st_mode)) + erts_dsprintf(dsbufp, "xattr "); + else #endif - erts_printf("unknown "); - } + erts_dsprintf(dsbufp, "unknown "); + } #else - erts_printf("type=unknown "); + erts_dsprintf(dsbufp, "type=unknown "); #endif - if (state->type == ERTS_EV_TYPE_DRV_SEL) { - erts_printf("driver_select "); - + if (state->type == ERTS_EV_TYPE_DRV_SEL) { + erts_dsprintf(dsbufp, "driver_select "); + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - - if (cio_events == ep_events) { - erts_printf("ev="); - if (print_events(cio_events) != 0) - err = 1; - } - else { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - erts_printf(" ep_ev="); - print_events(ep_events); - } + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + if (aio_events == cio_events) { + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) + err = 1; + } + else { + ErtsPollEvents ev = cio_events; + if (ev != ep_events && ep_events != ERTS_POLL_EV_NONE) + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } + } else { + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " aio_ev="); + print_events(dsbufp, aio_events); + if ((aio_events != ep_events && ep_events != ERTS_POLL_EV_NONE) || + (aio_events != 0 && ep_events == ERTS_POLL_EV_NONE)) { + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + err = 1; + } + } #else - if (print_events(cio_events) != 0) - err = 1; -#endif - erts_printf(" "); - if (cio_events & ERTS_POLL_EV_IN) { - Eterm id = state->driver.select->inport; - if (is_nil(id)) { - erts_printf("inport=none inname=none indrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" inport=%T inname=%s indrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - if (cio_events & ERTS_POLL_EV_OUT) { - Eterm id = state->driver.select->outport; - if (is_nil(id)) { - erts_printf("outport=none outname=none outdrv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" outport=%T outname=%s outdrv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } - } -#if ERTS_CIO_HAVE_DRV_EVENT - else if (state->type == ERTS_EV_TYPE_DRV_EV) { - Eterm id; - erts_printf("driver_event "); + if (print_events(dsbufp, cio_events) != 0) + err = 1; +#endif + erts_dsprintf(dsbufp, " "); + if (cio_events & ERTS_POLL_EV_IN) { + Eterm id = state->driver.select->inport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "inport=none inname=none indrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " inport=%T inname=%s indrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + if (cio_events & ERTS_POLL_EV_OUT) { + Eterm id = state->driver.select->outport; + if (is_nil(id)) { + erts_dsprintf(dsbufp, "outport=none outname=none outdrv=none "); + err = 1; + } + else { + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); + erts_dsprintf(dsbufp, " outport=%T outname=%s outdrv=%s ", + id, + pnp->name ? pnp->name : "unknown", + (pnp->driver_name + ? pnp->driver_name + : "unknown")); + erts_free_port_names(pnp); + } + } + } + else if (state->type == ERTS_EV_TYPE_NIF) { + ErtsResource* r; + erts_dsprintf(dsbufp, "enif_select "); + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (internal) { - erts_printf("internal "); - err = 1; - } - if (cio_events == ep_events) { - erts_printf("ev=0x%b32x", (Uint32) cio_events); - } - else { - err = 1; - erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); - erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); - } + if (internal) { + erts_dsprintf(dsbufp, "internal "); + err = 1; + } + + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + if (print_events(dsbufp, cio_events) != 0) + err = 1; + } + else { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); + print_events(dsbufp, ep_events); + } #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + if (print_events(dsbufp, cio_events) != 0) + err = 1; #endif - id = state->driver.event->port; - if (is_nil(id)) { - erts_printf(" port=none name=none drv=none "); - err = 1; - } - else { - ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); - erts_printf(" port=%T name=%s drv=%s ", - id, - pnp->name ? pnp->name : "unknown", - (pnp->driver_name - ? pnp->driver_name - : "unknown")); - erts_free_port_names(pnp); - } - } + erts_dsprintf(dsbufp, " inpid=%T", state->driver.nif->in.pid); + erts_dsprintf(dsbufp, " outpid=%T", state->driver.nif->out.pid); + r = state->driver.stop.resource; + erts_dsprintf(dsbufp, " resource=%p(%T:%T)", r, r->type->module, r->type->name); + } +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + else if (internal) { + erts_dsprintf(dsbufp, "internal "); + if (cio_events) { + err = 1; + erts_dsprintf(dsbufp, "cio_ev="); + print_events(dsbufp, cio_events); + } + if (ep_events) { + erts_dsprintf(dsbufp, "ep_ev="); + print_events(dsbufp, ep_events); + } + } #endif + else { + err = 1; + erts_dsprintf(dsbufp, "control_type=%d ", (int)state->type); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - else if (internal) { - erts_printf("internal "); - if (cio_events) { - err = 1; - erts_printf("cio_ev="); - print_events(cio_events); - } - if (ep_events) { - erts_printf("ep_ev="); - print_events(ep_events); - } - } + if (cio_events == ep_events) { + erts_dsprintf(dsbufp, "ev="); + print_events(dsbufp, cio_events); + } + else { + erts_dsprintf(dsbufp, "cio_ev="); print_events(dsbufp, cio_events); + erts_dsprintf(dsbufp, " ep_ev="); print_events(dsbufp, ep_events); + } +#else + erts_dsprintf(dsbufp, "ev=0x%b32x", (Uint32) cio_events); #endif - else { - err = 1; - erts_printf("control_type=%d ", (int)state->type); + } + + erts_dsprintf(dsbufp, " flags="); print_flags(dsbufp, state->flags); + if (err) { + erts_dsprintf(dsbufp, " ERROR"); + } + erts_dsprintf(dsbufp, "\r\n"); + return err; +} + +static void doit_erts_check_io_debug(void *vstate, void *vcounters, + erts_dsprintf_buf_t *dsbufp) +{ + ErtsDrvEventState *state = (ErtsDrvEventState *) vstate; + IterDebugCounters *counters = (IterDebugCounters *) vcounters; + int internal = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (cio_events == ep_events) { - erts_printf("ev=0x%b32x", (Uint32) cio_events); - } - else { - erts_printf("cio_ev=0x%b32x", (Uint32) cio_events); - erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events); - } + ErtsSysFdType fd = state->fd; + ErtsPollEvents ep_events = counters->epep[(int) fd]; #else - erts_printf("ev=0x%b32x", (Uint32) cio_events); + ErtsPollEvents ep_events = ERTS_POLL_EV_NONE; #endif + + if (state->driver.select) { + counters->no_driver_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + if (state->driver.nif) { + counters->no_enif_select_structs++; + ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)); + } + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)) { + if (ep_events & ERTS_POLL_EV_NVAL) { + ep_events &= ~ERTS_POLL_EV_NVAL; + internal = 1; + counters->internal_fds++; } - - if (err) { + else + counters->used_fds++; +#else + if (state->events) { + counters->used_fds++; +#endif + if (erts_debug_print_checkio_state(dsbufp, state, ep_events, internal)) { counters->num_errors++; - erts_printf(" ERROR"); } - erts_printf("\n"); } } - + +/* ciodpi can be NULL when called from etp-commands */ int -ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) +erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - int fd, len; + int fd, len, i; #endif - IterDebugCounters counters; + IterDebugCounters counters = {0}; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS ErtsDrvEventState null_des; null_des.driver.select = NULL; -#if ERTS_CIO_HAVE_DRV_EVENT - null_des.driver.event = NULL; -#endif - null_des.driver.drv_ptr = NULL; + null_des.driver.nif = NULL; + null_des.driver.stop.drv_ptr = NULL; null_des.events = 0; - null_des.remove_cnt = 0; null_des.type = ERTS_EV_TYPE_NONE; + null_des.flags = 0; + + counters.epep = erts_alloc(ERTS_ALC_T_TMP, + sizeof(ErtsPollEvents)*drv_ev_state.max_fds); #endif - erts_printf("--- fds in pollset --------------------------------------\n"); -#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK) +#if defined(ERTS_ENABLE_LOCK_CHECK) erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - erts_smp_thr_progress_block(); /* stop the world to avoid messy locking */ + if (ciodip) + erts_thr_progress_block(); /* stop the world to avoid messy locking */ #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds); - ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds); - counters.internal_fds = 0; -#endif - counters.used_fds = 0; - counters.num_errors = 0; - counters.no_driver_select_structs = 0; - counters.no_driver_event_structs = 0; + len = erts_atomic_read_nob(&drv_ev_state.len); -#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - len = erts_smp_atomic_read_nob(&drv_ev_state_len); +#if ERTS_POLL_USE_FALLBACK + erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, + drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { - doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters); + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } - for ( ; fd < max_fds; fd++) { - null_des.fd = fd; - doit_erts_check_io_debug((void *) &null_des, (void *) &counters); +#endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); + + for (i = 0; i < erts_no_pollsets; i++) { + erts_poll_get_selected_events(pollsetv[i], + counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } + } + for (fd = len ; fd < drv_ev_state.max_fds; fd++) { + null_des.fd = fd; + doit_erts_check_io_debug(&null_des, &counters, dsbufp); } #else - safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters); + safe_hash_for_each(&drv_ev_state.tab, &doit_erts_check_io_debug, + &counters, dsbufp); #endif - erts_smp_thr_progress_unblock(); + if (ciodip) + erts_thr_progress_unblock(); - ciodip->no_used_fds = counters.used_fds; - ciodip->no_driver_select_structs = counters.no_driver_select_structs; - ciodip->no_driver_event_structs = counters.no_driver_event_structs; + if (ciodip) { + ciodip->no_used_fds = counters.used_fds; + ciodip->no_driver_select_structs = counters.no_driver_select_structs; + ciodip->no_enif_select_structs = counters.no_enif_select_structs; + } - erts_printf("\n"); - erts_printf("used fds=%d\n", counters.used_fds); - erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs); -#if ERTS_CIO_HAVE_DRV_EVENT - erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs); -#endif + erts_dsprintf(dsbufp, "\n"); + erts_dsprintf(dsbufp, "used fds=%d\n", counters.used_fds); + erts_dsprintf(dsbufp, "Number of driver_select() structures=%d\n", counters.no_driver_select_structs); + erts_dsprintf(dsbufp, "Number of enif_select() structures=%d\n", counters.no_enif_select_structs); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS - erts_printf("internal fds=%d\n", counters.internal_fds); + erts_dsprintf(dsbufp, "internal fds=%d\n", counters.internal_fds); #endif - erts_printf("---------------------------------------------------------\n"); - fflush(stdout); + erts_dsprintf(dsbufp, "---------------------------------------------------------\n"); + erts_send_error_to_logger_nogl(dsbufp); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS erts_free(ERTS_ALC_T_TMP, (void *) counters.epep); #endif @@ -2461,3 +2842,20 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) return counters.num_errors; } +#ifdef ERTS_ENABLE_LOCK_COUNT +void erts_lcnt_update_cio_locks(int enable) { + int i; +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + erts_lcnt_enable_hash_lock_count(&drv_ev_state.tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable); +#else + (void)enable; +#endif + +#if ERTS_POLL_USE_FALLBACK + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); +#endif + + for (i = 0; i < erts_no_pollsets; i++) + erts_lcnt_enable_pollset_lock_count(pollsetv[i], enable); +} +#endif /* ERTS_ENABLE_LOCK_COUNT */ diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 14f1ea3f43..31182be5ec 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2017. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,10 +18,11 @@ * %CopyrightEnd% */ -/* - * Description: Check I/O +/** + * @description Check I/O, a cross platform IO polling framework for ERTS * - * Author: Rickard Green + * @author Rickard Green + * @author Lukas Larsson */ #ifndef ERL_CHECK_IO_H__ @@ -30,68 +31,82 @@ #include "sys.h" #include "erl_sys_driver.h" -#ifdef ERTS_ENABLE_KERNEL_POLL - -int driver_select_kp(ErlDrvPort, ErlDrvEvent, int, int); -int driver_select_nkp(ErlDrvPort, ErlDrvEvent, int, int); -int driver_event_kp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); -int driver_event_nkp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); -Uint erts_check_io_size_kp(void); -Uint erts_check_io_size_nkp(void); -Eterm erts_check_io_info_kp(void *); -Eterm erts_check_io_info_nkp(void *); -int erts_check_io_max_files_kp(void); -int erts_check_io_max_files_nkp(void); -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -void erts_check_io_async_sig_interrupt_kp(void); -void erts_check_io_async_sig_interrupt_nkp(void); -#endif -void erts_check_io_interrupt_kp(int); -void erts_check_io_interrupt_nkp(int); -void erts_check_io_interrupt_timed_kp(int, ErtsMonotonicTime); -void erts_check_io_interrupt_timed_nkp(int, ErtsMonotonicTime); -void erts_check_io_kp(int); -void erts_check_io_nkp(int); -void erts_init_check_io_kp(void); -void erts_init_check_io_nkp(void); -int erts_check_io_debug_kp(ErtsCheckIoDebugInfo *); -int erts_check_io_debug_nkp(ErtsCheckIoDebugInfo *); - -#else /* !ERTS_ENABLE_KERNEL_POLL */ +/** @brief a structure that is used by each polling thread */ +struct erts_poll_thread; +/** + * Get the memory size of the check io framework + */ Uint erts_check_io_size(void); -Eterm erts_check_io_info(void *); +/** + * Returns an Eterm with information about all the pollsets active at the + * moment. + * + * @param proc the Process* to allocate the result on. It is passed as + * void * because of header include problems. + */ +Eterm erts_check_io_info(void *proc); +/** + * Should be called when a port IO task has been executed in order to re-enable + * or clear the information about the fd. + * + * @param type The type of event that has been completed. + * @param handle The port task handle of the event. + * @param reset A function pointer to be called when the port task handle + * should be reset. + */ +void erts_io_notify_port_task_executed(ErtsPortTaskType type, + ErtsPortTaskHandle *handle, + void (*reset)(ErtsPortTaskHandle *)); +/** + * Returns the maximum number of fds that the check io framework can handle. + */ int erts_check_io_max_files(void); -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -void erts_check_io_async_sig_interrupt(void); -#endif -void erts_check_io_interrupt(int); -void erts_check_io_interrupt_timed(int, ErtsMonotonicTime); -void erts_check_io(int); -void erts_init_check_io(void); - +/** + * Called by any thread that should check for new IO events. This function will + * not return unless erts_check_io_interrupt(pt, 1) is called by another thread. + * + * @param pt the poll thread structure to use. + */ +void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time); +/** + * Initialize the check io framework. This function will parse the arguments + * and delete any entries that it is interested in. + * + * @param argc the number of arguments + * @param argv an array with the arguments + */ +void erts_init_check_io(int *argc, char **argv); +/** + * Interrupt the poll thread so that it can execute other code. + * + * Should be called with set = 0 by the waiting thread before calling + * erts_check_io. + * + * @param pt the poll thread to wake + * @param set whether to set or clear the interrupt flag + */ +void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); +/** + * Create a new poll thread structure that is associated with the number no. + * It is the callers responsibility that no is unique. + * + * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler + * @param tpd the thread progress data of the pollset thread + */ +struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd); +#ifdef ERTS_ENABLE_LOCK_COUNT +/** + * Toggle lock counting on all check io locks + */ +void erts_lcnt_update_cio_locks(int enable); #endif -extern erts_smp_atomic_t erts_check_io_time; - typedef struct { ErtsPortTaskHandle task; - erts_smp_atomic_t executed_time; + ErtsSysFdType fd; } ErtsIoTask; -ERTS_GLB_INLINE void erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp); - -#if ERTS_GLB_INLINE_INCL_FUNC_DEF - -ERTS_GLB_INLINE void -erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) -{ - ErtsIoTask *itp = (ErtsIoTask *) (((char *) pthp) - offsetof(ErtsIoTask, task)); - erts_aint_t ci_time = erts_smp_atomic_read_acqb(&erts_check_io_time); - erts_smp_atomic_set_relb(&itp->executed_time, ci_time); -} - -#endif #endif /* ERL_CHECK_IO_H__ */ @@ -99,41 +114,37 @@ erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) #define ERL_CHECK_IO_INTERNAL__ #endif +#define ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT 128 + +/* Controls how many pollsets to allocate. Fd's are hashed into + each pollset based on the FD. When doing non-concurrent updates + there will be one pollset per thread. +*/ +extern int erts_no_pollsets; +extern int erts_no_poll_threads; + + #ifndef ERL_CHECK_IO_INTERNAL__ #define ERL_CHECK_IO_INTERNAL__ #include "erl_poll.h" #include "erl_port_task.h" -#ifdef __WIN32__ -/* - * Current erts_poll implementation for Windows cannot handle - * active events in the set of events polled. - */ -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 0 -#endif - -/* - * ErtsDrvEventDataState is used by driver_event() which is almost never - * used. We allocate ErtsDrvEventDataState separate since we dont wan't - * the size of ErtsDrvEventState to increase due to driver_event() - * information. - */ -typedef struct { - Eterm port; - ErlDrvEventData data; - ErtsPollEvents removed_events; -#if ERTS_CIO_DEFER_ACTIVE_EVENTS - ErtsPollEvents deferred_events; -#endif - ErtsIoTask iotask; -} ErtsDrvEventDataState; - typedef struct { Eterm inport; Eterm outport; ErtsIoTask iniotask; ErtsIoTask outiotask; } ErtsDrvSelectDataState; + +struct erts_nif_select_event { + Eterm pid; + Eterm immed; + Uint32 refn[ERTS_REF_NUMBERS]; +}; + +typedef struct { + struct erts_nif_select_event in; + struct erts_nif_select_event out; +} ErtsNifSelectDataState; + #endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */ diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c index 7bbb406f29..b0d9fc0776 100644 --- a/erts/emulator/sys/common/erl_mmap.c +++ b/erts/emulator/sys/common/erl_mmap.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,9 +21,9 @@ # include "config.h" #endif +#define ERTS_WANT_MEM_MAPPERS #include "sys.h" #include "erl_process.h" -#include "erl_smp.h" #include "atom.h" #include "erl_mmap.h" #include <stddef.h> @@ -61,11 +61,11 @@ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \ < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot)) #define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \ - (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ + (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \ < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot))) #define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \ - (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ + (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \ (((UWord) (PTR)) - ((UWord) mm->sua.bot) \ < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot))) @@ -198,10 +198,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \ do { \ - erts_smp_mtx_lock(&mm->mtx); \ + erts_mtx_lock(&mm->mtx); \ ERTS_MMAP_OP_START((IN_SZ)); \ ERTS_MMAP_OP_END((RES), (OUT_SZ)); \ - erts_smp_mtx_unlock(&mm->mtx); \ + erts_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MUNMAP_OP(PTR, SZ) \ @@ -220,9 +220,9 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MUNMAP_OP_LCK(PTR, SZ) \ do { \ - erts_smp_mtx_lock(&mm->mtx); \ + erts_mtx_lock(&mm->mtx); \ ERTS_MUNMAP_OP((PTR), (SZ)); \ - erts_smp_mtx_unlock(&mm->mtx); \ + erts_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \ @@ -248,10 +248,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \ do { \ - erts_smp_mtx_lock(&mm->mtx); \ + erts_mtx_lock(&mm->mtx); \ ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \ ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \ - erts_smp_mtx_unlock(&mm->mtx); \ + erts_mtx_unlock(&mm->mtx); \ } while (0) #define ERTS_MMAP_OP_ABORT() \ @@ -296,11 +296,10 @@ typedef struct { }ErtsFreeSegMap; struct ErtsMemMapper_ { - int (*reserve_physical)(char *, UWord, int exec); + int (*reserve_physical)(char *, UWord); void (*unreserve_physical)(char *, UWord); int supercarrier; int no_os_mmap; - int executable; /* is client a native code allocator? */ /* * Super unaligned area is located above super aligned * area. That is, `sa.bot` is beginning of the super @@ -320,7 +319,7 @@ struct ErtsMemMapper_ { #if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS)) int mmap_fd; #endif - erts_smp_mtx_t mtx; + erts_mtx_t mtx; struct { char *free_list; char *unused_start; @@ -358,11 +357,6 @@ char* erts_literals_start; UWord erts_literals_size; #endif -#ifdef ERTS_ALC_A_EXEC -ErtsMemMapper erts_exec_mmapper; -#endif - - #define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \ do { \ @@ -1241,7 +1235,6 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) #if HAVE_MMAP # define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE) -# define ERTS_MMAP_PROT_EXEC (PROT_READ|PROT_WRITE|PROT_EXEC) # if defined(MAP_ANONYMOUS) # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) # define ERTS_MMAP_FD (-1) @@ -1255,26 +1248,24 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) #endif static ERTS_INLINE void * -os_mmap(void *hint_ptr, UWord size, int try_superalign, int executable) +os_mmap(void *hint_ptr, UWord size, int try_superalign) { #if HAVE_MMAP - const int prot = executable ? ERTS_MMAP_PROT_EXEC : ERTS_MMAP_PROT; void *res; #ifdef MAP_ALIGN if (try_superalign) - res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, prot, + res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT, ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); else #endif - res = mmap((void *) hint_ptr, size, prot, + res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); if (res == MAP_FAILED) return NULL; return res; #elif HAVE_VIRTUALALLOC - const DWORD prot = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE; return (void *) VirtualAlloc(NULL, (SIZE_T) size, - MEM_COMMIT|MEM_RESERVE, prot); + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); #else # error "missing mmap() or similar" #endif @@ -1331,7 +1322,6 @@ os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign) #if HAVE_MMAP #define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT) -#define ERTS_MMAP_RESERVE_PROT_EXEC (ERTS_MMAP_PROT_EXEC) #define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED) #define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE) #if defined(__FreeBSD__) @@ -1347,10 +1337,9 @@ os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign) #endif /* __FreeBSD__ */ static int -os_reserve_physical(char *ptr, UWord size, int exec) +os_reserve_physical(char *ptr, UWord size) { - const int prot = exec ? ERTS_MMAP_RESERVE_PROT_EXEC : ERTS_MMAP_RESERVE_PROT; - void *res = mmap((void *) ptr, (size_t) size, prot, + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT, ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0); if (res == (void *) MAP_FAILED) return 0; @@ -1367,37 +1356,11 @@ os_unreserve_physical(char *ptr, UWord size) } static void * -os_mmap_virtual(char *ptr, UWord size, int exec) +os_mmap_virtual(char *ptr, UWord size) { int flags = ERTS_MMAP_VIRTUAL_FLAGS; void* res; -#ifdef ERTS_ALC_A_EXEC - if (exec) { - ASSERT(!ptr); - /* OTP-19.0: Nice hack below cut-and-pasted from hipe_amd64.c */ - -# ifdef MAP_32BIT - /* If we got MAP_32BIT (Linux), then use that to ask for low memory */ - flags |= MAP_32BIT; -# else - /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect - a plain map_hint (returns high mappings even though the - hint refers to a free area), so we have to use both map_hint - and MAP_FIXED to get addresses below the 2GB boundary. - This is even worse than the Linux/ppc64 case. - Similarly, Solaris 10 doesn't have MAP_32BIT, - and it doesn't respect a plain map_hint. */ - ptr = (char*)(512*1024*1024); /* 0.5GB */ - -# if defined(__FreeBSD__) || defined(__sun__) - flags |= MAP_FIXED; -# endif -# endif /* !MAP_32BIT */ - } -#else /* !ERTS_ALC_A_EXEC */ - ASSERT(!exec); -#endif res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT, flags, ERTS_MMAP_FD, 0); if (res == (void *) MAP_FAILED) @@ -1412,7 +1375,7 @@ os_mmap_virtual(char *ptr, UWord size, int exec) #endif /* ERTS_HAVE_OS_MMAP */ -static int reserve_noop(char *ptr, UWord size, int exec) +static int reserve_noop(char *ptr, UWord size) { #ifdef ERTS_MMAP_DEBUG_FILL_AREAS Uint32 *uip, *end = (Uint32 *) (ptr + size); @@ -1455,7 +1418,7 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, #if ERTS_HAVE_OS_MMAP if (!mm->no_os_mmap) { - ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0, 0); + ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); if (ptr) { mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); @@ -1474,7 +1437,7 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, da_map = &mm->sua.map; desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); if (desc) { - if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0)) + if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE); else desc = NULL; @@ -1484,7 +1447,7 @@ alloc_desc_insert_free_seg(ErtsMemMapper* mm, da_map = &mm->sa.map; desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); if (desc) { - if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0)) + if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE); else desc = NULL; @@ -1536,7 +1499,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) ErtsFreeSegDesc *desc; Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); - erts_smp_mtx_lock(&mm->mtx); + erts_mtx_lock(&mm->mtx); ERTS_MMAP_OP_START(*sizep); @@ -1545,7 +1508,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) if (desc) { seg = desc->start; end = seg+asize; - if (!mm->reserve_physical(seg, asize, mm->executable)) + if (!mm->reserve_physical(seg, asize)) goto supercarrier_reserve_failure; if (desc->end == end) { delete_free_seg(&mm->sua.map, desc); @@ -1560,8 +1523,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) } if (asize <= mm->sua.bot - mm->sa.top) { - if (!mm->reserve_physical(mm->sua.bot - asize, asize, - mm->executable)) + if (!mm->reserve_physical(mm->sua.bot - asize, asize)) goto supercarrier_reserve_failure; mm->sua.bot -= asize; seg = mm->sua.bot; @@ -1577,8 +1539,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) char *start = seg = desc->start; seg = (char *) ERTS_SUPERALIGNED_CEILING(seg); end = seg+asize; - if (!mm->reserve_physical(start, (UWord) (end - start), - mm->executable)) + if (!mm->reserve_physical(start, (UWord) (end - start))) goto supercarrier_reserve_failure; ERTS_MMAP_SIZE_SC_SA_INC(asize); if (desc->end == end) { @@ -1610,8 +1571,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) if (asize + (seg - start) <= mm->sua.bot - start) { end = seg + asize; - if (!mm->reserve_physical(start, (UWord) (end - start), - mm->executable)) + if (!mm->reserve_physical(start, (UWord) (end - start))) goto supercarrier_reserve_failure; mm->sa.top = end; ERTS_MMAP_SIZE_SC_SA_INC(asize); @@ -1633,8 +1593,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start); end = seg + asize; - if (!mm->reserve_physical(seg, (UWord) (org_end - seg), - mm->executable)) + if (!mm->reserve_physical(seg, (UWord) (org_end - seg))) goto supercarrier_reserve_failure; ERTS_MMAP_SIZE_SC_SUA_INC(asize); if (org_start != seg) { @@ -1660,20 +1619,20 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) } ERTS_MMAP_OP_ABORT(); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); } #if ERTS_HAVE_OS_MMAP /* Map using OS primitives */ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) { if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { - seg = os_mmap(NULL, asize, 0, mm->executable); + seg = os_mmap(NULL, asize, 0); if (!seg) goto failure; } else { asize = ERTS_SUPERALIGNED_CEILING(*sizep); - seg = os_mmap(NULL, asize, 1, mm->executable); + seg = os_mmap(NULL, asize, 1); if (!seg) goto failure; @@ -1683,8 +1642,7 @@ erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep) os_munmap(seg, asize); - ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1, - mm->executable); + ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1); if (!ptr) goto failure; @@ -1724,13 +1682,13 @@ supercarrier_success: #endif ERTS_MMAP_OP_END(seg, asize); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); *sizep = asize; return (void *) seg; supercarrier_reserve_failure: - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); *sizep = 0; return NULL; } @@ -1760,7 +1718,7 @@ erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size) start = (char *) ptr; end = start + size; - erts_smp_mtx_lock(&mm->mtx); + erts_mtx_lock(&mm->mtx); ERTS_MUNMAP_OP(ptr, size); @@ -1829,7 +1787,7 @@ erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size) if (unres_sz) mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); } } } @@ -1878,7 +1836,7 @@ erts_mremap(ErtsMemMapper* mm, return NULL; } -#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP +#if defined(ERTS_HAVE_OS_MREMAP) || defined(ERTS_HAVE_GENUINE_OS_MMAP) superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); if (superaligned) { @@ -1898,7 +1856,7 @@ erts_mremap(ErtsMemMapper* mm, } } -#if ERTS_HAVE_GENUINE_OS_MMAP +#ifdef ERTS_HAVE_GENUINE_OS_MMAP if (asize < old_size && (!superaligned || ERTS_IS_SUPERALIGNED(ptr))) { @@ -1913,7 +1871,7 @@ erts_mremap(ErtsMemMapper* mm, return ptr; } #endif -#if ERTS_HAVE_OS_MREMAP +#ifdef ERTS_HAVE_OS_MREMAP if (superaligned) return remap_move(mm, flags, new_ptr, old_size, sizep); else { @@ -1948,12 +1906,12 @@ erts_mremap(ErtsMemMapper* mm, ? ERTS_SUPERALIGNED_CEILING(*sizep) : ERTS_PAGEALIGNED_CEILING(*sizep)); - erts_smp_mtx_lock(&mm->mtx); + erts_mtx_lock(&mm->mtx); if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr) ? (!superaligned && lookup_free_seg(&mm->sua.map, asize)) : (superaligned && lookup_free_seg(&mm->sa.map, asize))) { - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); /* * Segment currently in wrong area (due to a previous memory * shortage), move it to the right area. @@ -2019,8 +1977,7 @@ erts_mremap(ErtsMemMapper* mm, if (next && new_end <= next->end) { if (!mm->reserve_physical(((char *) ptr) + old_size, - asize - old_size, - mm->executable)) + asize - old_size)) goto supercarrier_reserve_failure; if (new_end < next->end) resize_free_seg(&mm->sua.map, next, new_end, next->end); @@ -2038,8 +1995,7 @@ erts_mremap(ErtsMemMapper* mm, if (end == mm->sa.top) { if (new_end <= mm->sua.bot) { if (!mm->reserve_physical(((char *) ptr) + old_size, - asize - old_size, - mm->executable)) + asize - old_size)) goto supercarrier_reserve_failure; mm->sa.top = new_end; new_ptr = ptr; @@ -2051,8 +2007,7 @@ erts_mremap(ErtsMemMapper* mm, adjacent_free_seg(&mm->sa.map, start, end, &prev, &next); if (next && new_end <= next->end) { if (!mm->reserve_physical(((char *) ptr) + old_size, - asize - old_size, - mm->executable)) + asize - old_size)) goto supercarrier_reserve_failure; if (new_end < next->end) resize_free_seg(&mm->sa.map, next, new_end, next->end); @@ -2068,7 +2023,7 @@ erts_mremap(ErtsMemMapper* mm, } ERTS_MMAP_OP_ABORT(); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); /* Failed to resize... */ } @@ -2090,14 +2045,14 @@ supercarrier_resize_success: #endif ERTS_MREMAP_OP_END(new_ptr, asize); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); *sizep = asize; return new_ptr; supercarrier_reserve_failure: ERTS_MREMAP_OP_END(NULL, old_size); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); *sizep = old_size; return NULL; @@ -2172,7 +2127,7 @@ static void hard_dbg_mseg_init(void); #endif void -erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) +erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init) { static int is_first_call = 1; int virtual_map = 0; @@ -2204,7 +2159,6 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) mm->supercarrier = 0; mm->reserve_physical = reserve_noop; mm->unreserve_physical = unreserve_noop; - mm->executable = executable; #if HAVE_MMAP && !defined(MAP_ANON) mm->mmap_fd = open("/dev/zero", O_RDWR); @@ -2212,9 +2166,11 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) erts_exit(1, "erts_mmap: Failed to open /dev/zero\n"); #endif - erts_smp_mtx_init(&mm->mtx, "erts_mmap"); + erts_mtx_init(&mm->mtx, "erts_mmap", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); if (is_first_call) { - erts_mtx_init(&am.init_mutex, "mmap_init_atoms"); + erts_mtx_init(&am.init_mutex, "mmap_init_atoms", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); } #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION @@ -2224,7 +2180,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start); end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end); sz = end - ptr; - start = os_mmap_virtual(ptr, sz, executable); + start = os_mmap_virtual(ptr, sz); if (!start || start > ptr || start >= end) erts_exit(1, "erts_mmap: Failed to create virtual range for super carrier\n"); @@ -2249,7 +2205,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) sz = ERTS_PAGEALIGNED_CEILING(init->scs); #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION if (!init->scrpm) { - start = os_mmap_virtual(NULL, sz, executable); + start = os_mmap_virtual(NULL, sz); mm->reserve_physical = os_reserve_physical; mm->unreserve_physical = os_unreserve_physical; virtual_map = 1; @@ -2261,7 +2217,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) * The whole supercarrier will by physically * reserved all the time. */ - start = os_mmap(NULL, sz, 1, executable); + start = os_mmap(NULL, sz, 1); } if (!start) erts_exit(1, @@ -2335,7 +2291,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) mm->sua.top -= ERTS_PAGEALIGNED_SIZE; mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE; #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION - if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE, 0)) + if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE)) #endif add_free_desc_area(mm, mm->sua.top, end); mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc); @@ -2348,7 +2304,7 @@ erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable) * will be used for free segment descritors. */ #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION - if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start, 0)) + if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start)) erts_exit(1, "erts_mmap: Failed to reserve physical memory for descriptors\n"); #endif mm->desc.unused_start = start; @@ -2390,7 +2346,7 @@ add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2) } Eterm erts_mmap_info(ErtsMemMapper* mm, - int *print_to_p, + fmtfn_t *print_to_p, void *print_to_arg, Eterm** hpp, Uint* szp, struct erts_mmap_info_struct* emis) @@ -2405,7 +2361,7 @@ Eterm erts_mmap_info(ErtsMemMapper* mm, Eterm res = THE_NON_VALUE; if (!hpp) { - erts_smp_mtx_lock(&mm->mtx); + erts_mtx_lock(&mm->mtx); emis->sizes[0] = mm->size.supercarrier.total; emis->sizes[1] = mm->sa.top - mm->sa.bot; emis->sizes[2] = mm->sua.top - mm->sua.bot; @@ -2421,7 +2377,7 @@ Eterm erts_mmap_info(ErtsMemMapper* mm, emis->segs[5] = mm->sua.map.nseg; emis->os_used = mm->size.os.used; - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); } list[lix] = erts_mmap_info_options(mm, "option ", print_to_p, print_to_arg, @@ -2431,7 +2387,7 @@ Eterm erts_mmap_info(ErtsMemMapper* mm, if (print_to_p) { - int to = *print_to_p; + fmtfn_t to = *print_to_p; void *arg = print_to_arg; if (mm->supercarrier) { const char* prefix = "supercarrier "; @@ -2485,7 +2441,7 @@ Eterm erts_mmap_info(ErtsMemMapper* mm, Eterm erts_mmap_info_options(ErtsMemMapper* mm, char *prefix, - int *print_to_p, + fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) @@ -2496,7 +2452,7 @@ Eterm erts_mmap_info_options(ErtsMemMapper* mm, Eterm res = THE_NON_VALUE; if (print_to_p) { - int to = *print_to_p; + fmtfn_t to = *print_to_p; void *arg = print_to_arg; erts_print(to, arg, "%sscs: %bpu\n", prefix, scs); if (mm->supercarrier) { @@ -2541,14 +2497,14 @@ Eterm erts_mmap_debug_info(Process* p) Eterm *hp, *hp_end; Uint may_need; - erts_smp_mtx_lock(&mm->mtx); + erts_mtx_lock(&mm->mtx); values[0] = (UWord)mm->sa.bot; values[1] = (UWord)mm->sa.top; values[2] = (UWord)mm->sua.bot; values[3] = (UWord)mm->sua.top; sa_list = build_free_seg_list(p, &mm->sa.map); sua_list = build_free_seg_list(p, &mm->sua.map); - erts_smp_mtx_unlock(&mm->mtx); + erts_mtx_unlock(&mm->mtx); may_need = 4*(2+3+2) + 2*(2+3); hp = HAlloc(p, may_need); @@ -2558,8 +2514,8 @@ Eterm erts_mmap_debug_info(Process* p) sizeof(values)/sizeof(*values), tags, values); - sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3; - sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3; + sa_list = TUPLE2(hp, ERTS_MAKE_AM("sa_free_segs"), sa_list); hp+=3; + sua_list = TUPLE2(hp, ERTS_MAKE_AM("sua_free_segs"), sua_list); hp+=3; list = CONS(hp, sua_list, list); hp+=2; list = CONS(hp, sa_list, list); hp+=2; @@ -2816,7 +2772,8 @@ static void hard_dbg_mseg_init(void) { ErtsFreeSegDesc_fake* p; - erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg"); + erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); hard_dbg_mseg_tree.root = NULL; hard_dbg_mseg_tree.order = ADDR_ORDER; diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index fa51b663fa..539daea419 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2013-2016. All Rights Reserved. + * Copyright Ericsson AB 2013-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,6 +22,7 @@ #define ERL_MMAP_H__ #include "sys.h" +#include "erl_printf.h" #define ERTS_MMAP_SUPERALIGNED_BITS (18) /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */ @@ -92,11 +93,6 @@ typedef struct { #define ERTS_MMAP_INIT_LITERAL_INITER \ {{NULL, NULL}, {NULL, NULL}, ERTS_LITERAL_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0} -#define ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(512)*1024*1024) - -#define ERTS_MMAP_INIT_HIPE_EXEC_INITER \ - {{NULL, NULL}, {NULL, NULL}, ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0} - #define ERTS_SUPERALIGNED_SIZE \ (1 << ERTS_MMAP_SUPERALIGNED_BITS) @@ -139,17 +135,17 @@ void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep); void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size); void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep); int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr); -void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*, int executable); +void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*); struct erts_mmap_info_struct { UWord sizes[6]; UWord segs[6]; UWord os_used; }; -Eterm erts_mmap_info(ErtsMemMapper*, int *print_to_p, void *print_to_arg, +Eterm erts_mmap_info(ErtsMemMapper*, fmtfn_t *print_to_p, void *print_to_arg, Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*); Eterm erts_mmap_info_options(ErtsMemMapper*, - char *prefix, int *print_to_p, void *print_to_arg, + char *prefix, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp); @@ -157,12 +153,14 @@ Eterm erts_mmap_info_options(ErtsMemMapper*, # include "erl_alloc_types.h" extern ErtsMemMapper erts_dflt_mmapper; -# if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + +# if defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) + +# if defined(ARCH_64) extern ErtsMemMapper erts_literal_mmapper; # endif -# ifdef ERTS_ALC_A_EXEC -extern ErtsMemMapper erts_exec_mmapper; -# endif + +# endif /* ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION */ #endif /* ERTS_WANT_MEM_MAPPERS */ /*#define HARD_DEBUG_MSEG*/ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index f3306a888c..030e5b00a7 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -83,11 +83,13 @@ static const int debruijn[32] = { 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 }; -#define LOG2(X) (debruijn[((Uint32)(((X) & -(X)) * 0x077CB531U)) >> 27]) +#define LSB(X) (debruijn[((Uint32)(((X) & -(X)) * 0x077CB531U)) >> 27]) #define CACHE_AREAS (32 - MSEG_ALIGN_BITS) -#define SIZE_TO_CACHE_AREA_IDX(S) (LOG2((S)) - MSEG_ALIGN_BITS) +/* FIXME: segment sizes > 2 GB result in bogus negative indices */ +/* NOTE: using LSB instead of proper log2 only works if S is a power of 2 */ +#define SIZE_TO_CACHE_AREA_IDX(S) (LSB((S)) - MSEG_ALIGN_BITS) #define MAX_CACHE_SIZE (30) #define MSEG_FLG_IS_2POW(X) ((X) & ERTS_MSEG_FLG_2POW) @@ -186,7 +188,6 @@ typedef union { static int no_mseg_allocators; static ErtsAlgndMsegAllctr_t *aligned_mseg_allctr; -#ifdef ERTS_SMP #define ERTS_MSEG_ALLCTR_IX(IX) \ (&aligned_mseg_allctr[(IX)].mseg_alloc) @@ -197,18 +198,6 @@ static ErtsAlgndMsegAllctr_t *aligned_mseg_allctr; #define ERTS_MSEG_ALLCTR_OPT(OPT) \ ((OPT)->sched_spec ? ERTS_MSEG_ALLCTR_SS() : ERTS_MSEG_ALLCTR_IX(0)) -#else - -#define ERTS_MSEG_ALLCTR_IX(IX) \ - (&aligned_mseg_allctr[0].mseg_alloc) - -#define ERTS_MSEG_ALLCTR_SS() \ - (&aligned_mseg_allctr[0].mseg_alloc) - -#define ERTS_MSEG_ALLCTR_OPT(OPT) \ - (&aligned_mseg_allctr[0].mseg_alloc) - -#endif #define ERTS_MSEG_LOCK(MA) \ do { \ @@ -350,11 +339,11 @@ mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, U do { \ if ((MA)->is_thread_safe) \ ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&(MA)->mtx) \ - || erts_smp_thr_progress_is_blocking() \ + || erts_thr_progress_is_blocking() \ || ERTS_IS_CRASH_DUMPING); \ else \ ERTS_LC_ASSERT((MA)->ix == (int) erts_get_scheduler_id() \ - || erts_smp_thr_progress_is_blocking() \ + || erts_thr_progress_is_blocking() \ || ERTS_IS_CRASH_DUMPING); \ } while (0) #else @@ -378,7 +367,7 @@ static ERTS_INLINE int cache_bless_segment(ErtsMsegAllctr_t *ma, void *seg, UWor ASSERT(!MSEG_FLG_IS_2POW(flags) || (MSEG_FLG_IS_2POW(flags) && MAP_IS_ALIGNED(seg) && IS_2POW(size))); - /* The idea is that sbc caching is prefered over mbc caching. + /* The idea is that sbc caching is preferred over mbc caching. * Blocks are normally allocated in mb carriers and thus cached there. * Large blocks has no such cache and it is up to mseg to cache them to speed things up. */ @@ -396,6 +385,9 @@ static ERTS_INLINE int cache_bless_segment(ErtsMsegAllctr_t *ma, void *seg, UWor if (MSEG_FLG_IS_2POW(flags)) { int ix = SIZE_TO_CACHE_AREA_IDX(size); + if (ix < 0) + return 0; + ASSERT(ix < CACHE_AREAS); ASSERT((1 << (ix + MSEG_ALIGN_BITS)) == size); @@ -471,6 +463,9 @@ static ERTS_INLINE void *cache_get_segment(ErtsMsegAllctr_t *ma, UWord *size_p, ASSERT(IS_2POW(size)); + if (ix < 0) + return NULL; + for( i = ix; i < CACHE_AREAS; i++) { if (erts_circleq_is_empty(&(ma->cache_powered_node[i]))) @@ -991,7 +986,7 @@ add_4tup(Uint **hpp, Uint *szp, Eterm *lp, static Eterm info_options(ErtsMsegAllctr_t *ma, char *prefix, - int *print_to_p, + fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) @@ -999,7 +994,7 @@ info_options(ErtsMsegAllctr_t *ma, Eterm res = NIL; if (print_to_p) { - int to = *print_to_p; + fmtfn_t to = *print_to_p; void *arg = print_to_arg; erts_print(to, arg, "%samcbf: %beu\n", prefix, ma->abs_max_cache_bad_fit); erts_print(to, arg, "%srmcbf: %beu\n", prefix, ma->rel_max_cache_bad_fit); @@ -1027,7 +1022,7 @@ info_options(ErtsMsegAllctr_t *ma, } static Eterm -info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) +info_calls(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; @@ -1040,7 +1035,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp erts_print(TO, TOA, "mseg_%s calls: %b32u%09b32u\n", #CC, \ ma->calls.CC.giga_no, ma->calls.CC.no) - int to = *print_to_p; + fmtfn_t to = *print_to_p; void *arg = print_to_arg; PRINT_CC(to, arg, alloc); @@ -1106,7 +1101,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp } static Eterm -info_status(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, +info_status(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, int begin_new_max_period, int only_sz, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; @@ -1117,7 +1112,7 @@ info_status(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, ma->segments.max_ever.sz = ma->segments.max.sz; if (print_to_p) { - int to = *print_to_p; + fmtfn_t to = *print_to_p; void *arg = print_to_arg; if (!only_sz) { @@ -1165,7 +1160,7 @@ info_status(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, return res; } -static Eterm info_memkind(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, +static Eterm info_memkind(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, int begin_max_per, int only_sz, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; @@ -1196,7 +1191,7 @@ static Eterm info_memkind(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_ } static Eterm -info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) +info_version(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { Eterm res = THE_NON_VALUE; @@ -1218,7 +1213,7 @@ info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **h Eterm erts_mseg_info_options(int ix, - int *print_to_p, void *print_to_arg, + fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix); @@ -1231,7 +1226,7 @@ erts_mseg_info_options(int ix, Eterm erts_mseg_info(int ix, - int *print_to_p, + fmtfn_t *print_to_p, void *print_to_arg, int begin_max_per, int only_sz, @@ -1396,11 +1391,7 @@ erts_mseg_init(ErtsMsegInit_t *init) int i; UWord x; -#ifdef ERTS_SMP no_mseg_allocators = init->nos + 1; -#else - no_mseg_allocators = 1; -#endif x = (UWord) malloc(sizeof(ErtsAlgndMsegAllctr_t) *no_mseg_allocators @@ -1412,17 +1403,12 @@ erts_mseg_init(ErtsMsegInit_t *init) atoms_initialized = 0; - erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms"); + erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); -#ifdef ERTS_ALC_A_EXEC - /* Initialize erts_exec_mapper *FIRST*, to increase probability - * of getting low memory for HiPE AMD64's small code model. - */ - erts_mmap_init(&erts_exec_mmapper, &init->exec_mmap, 1); -#endif - erts_mmap_init(&erts_dflt_mmapper, &init->dflt_mmap, 0); + erts_mmap_init(&erts_dflt_mmapper, &init->dflt_mmap); #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) - erts_mmap_init(&erts_literal_mmapper, &init->literal_mmap, 0); + erts_mmap_init(&erts_literal_mmapper, &init->literal_mmap); #endif if (!IS_2POW(GET_PAGE_SIZE)) @@ -1441,7 +1427,8 @@ erts_mseg_init(ErtsMsegInit_t *init) ma->is_thread_safe = 0; else { ma->is_thread_safe = 1; - erts_mtx_init(&ma->mtx, "mseg"); + erts_mtx_init(&ma->mtx, "mseg", make_small(i), + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR); } ma->is_cache_check_scheduled = 0; diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h index a43b409e94..ea9060ddac 100644 --- a/erts/emulator/sys/common/erl_mseg.h +++ b/erts/emulator/sys/common/erl_mseg.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,6 @@ typedef struct { Uint nos; ErtsMMapInit dflt_mmap; ErtsMMapInit literal_mmap; - ErtsMMapInit exec_mmap; } ErtsMsegInit_t; #define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \ @@ -72,7 +71,6 @@ typedef struct { 1000, /* cci: Cache check interval */ \ ERTS_MMAP_INIT_DEFAULT_INITER, \ ERTS_MMAP_INIT_LITERAL_INITER, \ - ERTS_MMAP_INIT_HIPE_EXEC_INITER \ } typedef struct { @@ -98,8 +96,8 @@ Uint erts_mseg_unit_size(void); void erts_mseg_init(ErtsMsegInit_t *init); void erts_mseg_late_init(void); /* Have to be called after all allocators, threads and timers have been initialized. */ -Eterm erts_mseg_info_options(int, int *, void*, Uint **, Uint *); -Eterm erts_mseg_info(int, int *, void*, int, int, Uint **, Uint *); +Eterm erts_mseg_info_options(int, fmtfn_t*, void*, Uint **, Uint *); +Eterm erts_mseg_info(int, fmtfn_t *, void*, int, int, Uint **, Uint *); #endif /* #if HAVE_ERTS_MSEG */ diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.c b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c index d53190fdd5..5844e7eeb7 100644 --- a/erts/emulator/sys/common/erl_os_monotonic_time_extender.c +++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2015-2016. All Rights Reserved. + * Copyright Ericsson AB 2015-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #endif #include "erl_os_monotonic_time_extender.h" -#ifdef USE_THREADS static void *os_monotonic_time_extender(void *vstatep) { @@ -49,30 +48,22 @@ static void *os_monotonic_time_extender(void *vstatep) } static erts_tid_t os_monotonic_extender_tid; -#endif void erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep, Uint32 (*raw_os_monotonic_time)(void), int check_seconds) { -#ifdef USE_THREADS statep->raw_os_monotonic_time = raw_os_monotonic_time; erts_atomic32_init_nob(&statep->extend[0], (erts_aint32_t) 0); erts_atomic32_init_nob(&statep->extend[1], (erts_aint32_t) 0); statep->check_interval = check_seconds; -#else - statep->extend[0] = (Uint32) 0; - statep->extend[1] = (Uint32) 0; - statep->last_msb = (ErtsMonotonicTime) 0; -#endif } void erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep) { -#ifdef USE_THREADS erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER; thr_opts.detached = 1; thr_opts.suggested_stack_size = 4; @@ -85,5 +76,4 @@ erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep os_monotonic_time_extender, (void*) statep, &thr_opts); -#endif } diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.h b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h index 8089c9aed9..f6659fe973 100644 --- a/erts/emulator/sys/common/erl_os_monotonic_time_extender.h +++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2015. All Rights Reserved. + * Copyright Ericsson AB 2015-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,36 +25,16 @@ #include "erl_threads.h" typedef struct { -#ifdef USE_THREADS Uint32 (*raw_os_monotonic_time)(void); erts_atomic32_t extend[2]; int check_interval; -#else - Uint32 extend[2]; - ErtsMonotonicTime last_msb; -#endif } ErtsOsMonotonicTimeExtendState; -#ifdef USE_THREADS -# define ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(S, RT) ((void) 1) # define ERTS_EXTEND_OS_MONOTONIC_TIME(S, RT) \ ((((ErtsMonotonicTime) \ erts_atomic32_read_nob(&((S)->extend[((int) ((RT) >> 31)) & 1]))) \ << 32) \ + (RT)) -#else -# define ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(S, RT) \ - do { \ - Uint32 msb__ = (RT) & (((Uint32) 1) << 31); \ - if (msb__ != (S)->last_msb) { \ - int ix__ = ((int) ((S)->last_msb >> 31)) & 1; \ - (S)->extend[ix__]++; \ - (S)->last_msb = msb; \ - } \ - } while (0) -# define ERTS_EXTEND_OS_MONOTONIC_TIME(S, RT) \ - ((((ErtsMonotonicTime) (S)->extend[((int) ((RT) >> 31)) & 1]) << 32) + (RT)) -#endif void erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep, diff --git a/erts/emulator/sys/common/erl_osenv.c b/erts/emulator/sys/common/erl_osenv.c new file mode 100644 index 0000000000..6a16377736 --- /dev/null +++ b/erts/emulator/sys/common/erl_osenv.c @@ -0,0 +1,404 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2017-2018. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#include "erl_osenv.h" + +#include "global.h" +#include "erl_alloc.h" +#include "erl_process.h" + +#define STACKBUF_SIZE (512) + +typedef struct __env_rbtnode_t { + struct __env_rbtnode_t *parent; + struct __env_rbtnode_t *left; + struct __env_rbtnode_t *right; + + int is_red; + + erts_osenv_data_t key; + erts_osenv_data_t value; +} env_rbtnode_t; + +#define ERTS_RBT_PREFIX env +#define ERTS_RBT_T env_rbtnode_t +#define ERTS_RBT_KEY_T erts_osenv_data_t +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) \ + do { \ + (T)->parent = NULL; \ + (T)->left = NULL; \ + (T)->right = NULL; \ + (T)->is_red = 0; \ + } while(0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->key) +#define ERTS_RBT_IS_LT(KX, KY) (compare_env_keys(KX, KY) < 0) +#define ERTS_RBT_IS_EQ(KX, KY) (compare_env_keys(KX, KY) == 0) +#define ERTS_RBT_WANT_FOREACH_DESTROY +#define ERTS_RBT_WANT_FOREACH +#define ERTS_RBT_WANT_REPLACE +#define ERTS_RBT_WANT_DELETE +#define ERTS_RBT_WANT_INSERT +#define ERTS_RBT_WANT_LOOKUP + +static int compare_env_keys(const erts_osenv_data_t a, const erts_osenv_data_t b); + +#include "erl_rbtree.h" + +static int compare_env_keys(const erts_osenv_data_t a, const erts_osenv_data_t b) { + int relation; + +#ifdef __WIN32__ + /* Environment variables are case-insensitive on Windows. */ + relation = _wcsnicmp((const WCHAR*)a.data, (const WCHAR*)b.data, + MIN(a.length, b.length) / sizeof(WCHAR)); +#else + relation = sys_memcmp(a.data, b.data, MIN(a.length, b.length)); +#endif + + if(relation != 0) { + return relation; + } + + if(a.length < b.length) { + return -1; + } else if(a.length == b.length) { + return 0; + } else { + return 1; + } +} + +static void *convert_value_to_native(Eterm term, char *stackbuf, + int stackbuf_size, Sint *length) { + int encoding; + void *result; + + if(is_atom(term)) { + return NULL; + } + + encoding = erts_get_native_filename_encoding(); + *length = erts_native_filename_need(term, encoding); + + if(*length < 0) { + return NULL; + } else if(*length >= stackbuf_size) { + result = erts_alloc(ERTS_ALC_T_TMP, *length); + } else { + result = stackbuf; + } + + erts_native_filename_put(term, encoding, (byte*)result); + + return result; +} + +static void *convert_key_to_native(Eterm term, char *stackbuf, + int stackbuf_size, Sint *length) { + byte *name_iterator, *name_end; + void *result; + int encoding; + + result = convert_value_to_native(term, stackbuf, stackbuf_size, length); + + if(result == NULL || length == 0) { + return NULL; + } + + encoding = erts_get_native_filename_encoding(); + + name_iterator = (byte*)result; + name_end = &name_iterator[*length]; + +#ifdef __WIN32__ + /* Windows stores per-drive working directories as variables starting with + * '=', so we skip the first character to tolerate that. */ + name_iterator = erts_raw_env_next_char(name_iterator, encoding); +#endif + + while(name_iterator < name_end) { + if(erts_raw_env_char_is_7bit_ascii_char('=', name_iterator, encoding)) { + if(result != stackbuf) { + erts_free(ERTS_ALC_T_TMP, result); + } + + return NULL; + } + + name_iterator = erts_raw_env_next_char(name_iterator, encoding); + } + + return result; +} + +void erts_osenv_init(erts_osenv_t *env) { + env->variable_count = 0; + env->content_size = 0; + env->tree = NULL; +} + +static void destroy_foreach(env_rbtnode_t *node, void *_state) { + erts_free(ERTS_ALC_T_ENVIRONMENT, node); + (void)_state; +} + +void erts_osenv_clear(erts_osenv_t *env) { + env_rbt_foreach_destroy(&env->tree, &destroy_foreach, NULL); + erts_osenv_init(env); +} + +struct __env_merge { + int overwrite_existing; + erts_osenv_t *env; +}; + +static void merge_foreach(env_rbtnode_t *node, void *_state) { + struct __env_merge *state = (struct __env_merge*)(_state); + env_rbtnode_t *existing_node; + + existing_node = env_rbt_lookup(state->env->tree, node->key); + + if(existing_node == NULL || state->overwrite_existing) { + erts_osenv_put_native(state->env, &node->key, &node->value); + } +} + +void erts_osenv_merge(erts_osenv_t *env, const erts_osenv_t *with, int overwrite) { + struct __env_merge merge_state; + + merge_state.overwrite_existing = overwrite; + merge_state.env = env; + + env_rbt_foreach(with->tree, merge_foreach, &merge_state); +} + +struct __env_foreach_term { + erts_osenv_foreach_term_cb_t user_callback; + struct process *process; + void *user_state; +}; + +static void foreach_term_wrapper(env_rbtnode_t *node, void *_state) { + struct __env_foreach_term *state = (struct __env_foreach_term*)_state; + Eterm key, value; + + key = erts_convert_native_to_filename(state->process, + node->key.length, (byte*)node->key.data); + value = erts_convert_native_to_filename(state->process, + node->value.length, (byte*)node->value.data); + + state->user_callback(state->process, state->user_state, key, value); +} + +void erts_osenv_foreach_term(const erts_osenv_t *env, struct process *process, + void *state, erts_osenv_foreach_term_cb_t callback) { + struct __env_foreach_term wrapper_state; + + wrapper_state.user_callback = callback; + wrapper_state.user_state = state; + wrapper_state.process = process; + + env_rbt_foreach(env->tree, foreach_term_wrapper, &wrapper_state); +} + +int erts_osenv_get_term(const erts_osenv_t *env, Process *process, + Eterm key_term, Eterm *out_term) { + char key_stackbuf[STACKBUF_SIZE]; + erts_osenv_data_t key; + int result; + + key.data = convert_key_to_native(key_term, key_stackbuf, + STACKBUF_SIZE, &key.length); + result = -1; + + if(key.data != NULL) { + env_rbtnode_t *node; + + node = env_rbt_lookup(env->tree, key); + result = 0; + + if(node != NULL) { + (*out_term) = erts_convert_native_to_filename(process, + node->value.length, (byte*)node->value.data); + result = 1; + } + + if(key.data != key_stackbuf) { + erts_free(ERTS_ALC_T_TMP, key.data); + } + } + + return result; +} + +int erts_osenv_put_term(erts_osenv_t *env, Eterm key_term, Eterm value_term) { + char key_stackbuf[STACKBUF_SIZE], value_stackbuf[STACKBUF_SIZE]; + erts_osenv_data_t key, value; + int result; + + key.data = convert_key_to_native(key_term, key_stackbuf, + STACKBUF_SIZE, &key.length); + value.data = convert_value_to_native(value_term, value_stackbuf, + STACKBUF_SIZE, &value.length); + result = -1; + + if(value.data != NULL && key.data != NULL) { + result = erts_osenv_put_native(env, &key, &value); + } + + if(value.data != NULL && value.data != value_stackbuf) { + erts_free(ERTS_ALC_T_TMP, value.data); + } + + if(key.data != NULL && key.data != key_stackbuf) { + erts_free(ERTS_ALC_T_TMP, key.data); + } + + return result; +} + +int erts_osenv_unset_term(erts_osenv_t *env, Eterm key_term) { + char key_stackbuf[STACKBUF_SIZE]; + erts_osenv_data_t key; + int result; + + key.data = convert_key_to_native(key_term, key_stackbuf, + STACKBUF_SIZE, &key.length); + result = -1; + + if(key.data != NULL) { + result = erts_osenv_unset_native(env, &key); + + if(key.data != key_stackbuf) { + erts_free(ERTS_ALC_T_TMP, key.data); + } + } + + return result; +} + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +struct __env_foreach_native { + erts_osenv_foreach_native_cb_t user_callback; + void *user_state; +}; + +static void foreach_native_wrapper(env_rbtnode_t *node, void *_state) { + struct __env_foreach_native *state = (struct __env_foreach_native*)_state; + + state->user_callback(state->user_state, &node->key, &node->value); +} + +void erts_osenv_foreach_native(const erts_osenv_t *env, void *state, + erts_osenv_foreach_native_cb_t callback) { + struct __env_foreach_native wrapper_state; + + wrapper_state.user_callback = callback; + wrapper_state.user_state = state; + + env_rbt_foreach(env->tree, foreach_native_wrapper, &wrapper_state); +} + +int erts_osenv_get_native(const erts_osenv_t *env, + const erts_osenv_data_t *key, + erts_osenv_data_t *value) { + env_rbtnode_t *node = env_rbt_lookup(env->tree, *key); + + if(node != NULL) { + if(value != NULL) { + if(node->value.length > value->length) { + return -1; + } + + sys_memcpy(value->data, node->value.data, node->value.length); + value->length = node->value.length; + } + + return 1; + } + + return 0; +} + +int erts_osenv_put_native(erts_osenv_t *env, const erts_osenv_data_t *key, + const erts_osenv_data_t *value) { + env_rbtnode_t *old_node, *new_node; + + new_node = erts_alloc(ERTS_ALC_T_ENVIRONMENT, sizeof(env_rbtnode_t) + + key->length + value->length); + + new_node->key.data = (char*)(&new_node[1]); + new_node->key.length = key->length; + new_node->value.data = &((char*)new_node->key.data)[key->length]; + new_node->value.length = value->length; + + sys_memcpy(new_node->key.data, key->data, key->length); + sys_memcpy(new_node->value.data, value->data, value->length); + + old_node = env_rbt_lookup(env->tree, *key); + + if(old_node != NULL) { + env->content_size -= old_node->value.length; + env->content_size -= old_node->key.length; + env_rbt_replace(&env->tree, old_node, new_node); + } else { + env_rbt_insert(&env->tree, new_node); + env->variable_count++; + } + + env->content_size += new_node->value.length; + env->content_size += new_node->key.length; + + if(old_node != NULL) { + erts_free(ERTS_ALC_T_ENVIRONMENT, old_node); + } + + return 1; +} + +int erts_osenv_unset_native(erts_osenv_t *env, const erts_osenv_data_t *key) { + env_rbtnode_t *old_node = env_rbt_lookup(env->tree, *key); + + if(old_node != NULL) { + env->content_size -= old_node->value.length; + env->content_size -= old_node->key.length; + env->variable_count -= 1; + + env_rbt_delete(&env->tree, old_node); + erts_free(ERTS_ALC_T_ENVIRONMENT, old_node); + return 1; + } + + return 0; +} diff --git a/erts/emulator/sys/common/erl_osenv.h b/erts/emulator/sys/common/erl_osenv.h new file mode 100644 index 0000000000..4777f2148a --- /dev/null +++ b/erts/emulator/sys/common/erl_osenv.h @@ -0,0 +1,121 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +/* This is a replacement for getenv(3) and friends, operating on instances so + * we can keep a common implementation for both the global and local (per-port) + * environments. + * + * The instances are not thread-safe on their own but unlike getenv(3) we're + * guaranteed to be the only user, so placing locks around all our accesses + * will suffice. + * + * Use erts_sys_rwlock_global_osenv to access the global environment. */ + +#ifndef __ERL_OSENV_H__ +#define __ERL_OSENV_H__ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +typedef struct __erts_osenv_data_t erts_osenv_data_t; + +typedef struct __erts_osenv_t { + struct __env_rbtnode_t *tree; + int variable_count; + int content_size; +} erts_osenv_t; + +#include "sys.h" + +struct __erts_osenv_data_t { + Sint length; + void *data; +}; + +void erts_osenv_init(erts_osenv_t *env); +void erts_osenv_clear(erts_osenv_t *env); + +/* @brief Merges \c with into \c env + * + * @param overwrite Whether to overwrite existing entries or keep them as they + * are. */ +void erts_osenv_merge(erts_osenv_t *env, const erts_osenv_t *with, int overwrite); + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +/* @brief Copies env[key] into \c value + * + * @return 1 on success, 0 if the key couldn't be found, and -1 if the input + * was invalid. */ +int erts_osenv_get_term(const erts_osenv_t *env, struct process *process, + Eterm key, Eterm *value); + +/* @brief Copies \c value into \c env[key] + * + * @return 1 on success, -1 if the input was invalid. */ +int erts_osenv_put_term(erts_osenv_t *env, Eterm key, Eterm value); + +/* @brief Removes \c env[key] + * + * @return 1 on success, 0 if the key couldn't be found, and -1 if the input + * was invalid. */ +int erts_osenv_unset_term(erts_osenv_t *env, Eterm key); + +/* @brief Copies env[key] into \c value + * + * @param value [in,out] The buffer to copy the value into, may be NULL if you + * only wish to query presence. + * + * @return 1 on success, 0 if the key couldn't be found, and -1 if if the value + * didn't fit into the buffer. */ +int erts_osenv_get_native(const erts_osenv_t *env, const erts_osenv_data_t *key, + erts_osenv_data_t *value); + +/* @brief Copies \c value into \c env[key] + * + * @return 1 on success, -1 on failure. */ +int erts_osenv_put_native(erts_osenv_t *env, const erts_osenv_data_t *key, + const erts_osenv_data_t *value); + +/* @brief Removes \c key from the env. + * + * @return 1 on success, 0 if the key couldn't be found. */ +int erts_osenv_unset_native(erts_osenv_t *env, const erts_osenv_data_t *key); + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */ + +typedef void (*erts_osenv_foreach_term_cb_t)(struct process *process, + void *state, Eterm key, Eterm value); + +typedef void (*erts_osenv_foreach_native_cb_t)(void *state, + const erts_osenv_data_t *key, + const erts_osenv_data_t *value); + +/* @brief Walks through all environment variables, calling \c callback for each + * one. It's unsafe to modify \c env within the callback. */ +void erts_osenv_foreach_term(const erts_osenv_t *env, struct process *process, + void *state, erts_osenv_foreach_term_cb_t callback); + +/* @copydoc erts_osenv_foreach_term */ +void erts_osenv_foreach_native(const erts_osenv_t *env, void *state, + erts_osenv_foreach_native_cb_t callback); + +#endif diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index b8a28bcc18..51d50933ff 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,8 @@ * %CopyrightEnd% */ -/* - * Description: Poll interface suitable for ERTS with or without - * SMP support. +/** + * @description Poll interface suitable for ERTS * * The interface is currently implemented using: * - select @@ -29,12 +28,36 @@ * - epoll with poll or select as fallback * - kqueue with poll or select as fallback * - * Some time in the future it will also be - * implemented using Solaris ports. * + * @author Rickard Green + * @author Lukas Larsson + * + * There are two major different implementations off IO polling in this + * file. The concurrent and non-concurrent implementations. + * When available epoll/kqueue are used to implement the concurrent + * versions. poll, select and dev/poll use non-concurrent updates. + * + * Concurrent version: + * In the concurrent version erts_poll_control directly modifies + * the kernel pollset without waking the thread that is waiting + * on events. Also the ErtsPollResFd type is directly mapped to + * the native event type, so no extra copying is needed. Note that + * as no locking at all is done, fds can be triggered that have been + * removed from the pollset. The check_io layer has to deal with this. + * + * Non-concurrent version: + * In the non-concurrent version, the pollset has an internal representation + * of the pollset that is updated by erts_poll_control. When an fd is updated, + * its number is placed in the update request queue and then the waiting thread + * is woken in order to see the change. The internal data in the pollset is + * protected by a mutex that has to be taken by both the modifying and waiting + * thread at different times. * + * The non-concurrent pollset cannot have fd's closed in it while a thread is + * waiting on that fd. In order to fix this, when an ERTS_POLL_OP_DEL command + * is issued, the fd is marked as closing and the waiting thread is woken. The + * fd is then returned in the waiting threads results as ERTS_POLL_EV_NONE. * - * Author: Rickard Green */ #ifdef HAVE_CONFIG_H @@ -51,8 +74,8 @@ #ifndef WANT_NONBLOCKING # define WANT_NONBLOCKING #endif -#define ERTS_WANT_GOT_SIGUSR1 +#include "erl_thr_progress.h" #include "erl_poll.h" #if ERTS_POLL_USE_KQUEUE # include <sys/types.h> @@ -63,6 +86,8 @@ # ifdef SYS_SELECT_H # include <sys/select.h> # endif +#elif defined(_DARWIN_UNLIMITED_SELECT) +# undef _DARWIN_UNLIMITED_SELECT #endif #ifdef NO_SYSCONF # if ERTS_POLL_USE_SELECT @@ -71,7 +96,6 @@ # include <limits.h> # endif #endif -#include "erl_thr_progress.h" #include "erl_driver.h" #include "erl_alloc.h" #include "erl_msacc.h" @@ -84,33 +108,60 @@ #error "Missing implementation of erts_poll()" #endif -#if defined(ERTS_KERNEL_POLL_VERSION) && !ERTS_POLL_USE_KERNEL_POLL -#error "Missing kernel poll implementation of erts_poll()" -#endif +#if 0 +#define ERTS_POLL_DEBUG_PRINT 1 -#if defined(ERTS_NO_KERNEL_POLL_VERSION) && ERTS_POLL_USE_KERNEL_POLL -#error "Kernel poll used when it shouldn't be used" -#endif +#define DEBUG_PRINT(FMT, PS, ...) \ + do { \ + int myerrno = errno; \ + erts_printf("%d: " FMT "\r\n", (PS)->id, ##__VA_ARGS__); \ + errno = myerrno; \ + } while(0) -#if 0 -#define ERTS_POLL_DEBUG_PRINT +/* Define to print info about modifications done to each fd */ +#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) +/* Define to print entry and exit from erts_poll_wait (can be very spammy) */ +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0) + +#else +#define ERTS_POLL_DEBUG_PRINT 0 +#define DEBUG_PRINT(...) #endif -#if defined(DEBUG) && 0 -#define HARD_DEBUG +#ifndef DEBUG_PRINT_FD +#define DEBUG_PRINT_FD(...) +#endif +#ifndef DEBUG_PRINT_WAIT +#define DEBUG_PRINT_WAIT(...) #endif -#ifdef _DARWIN_UNLIMITED_SELECT + +#if defined(_DARWIN_UNLIMITED_SELECT) && ERTS_POLL_USE_SELECT typedef struct { size_t sz; fd_set* ptr; }ERTS_fd_set; -# define ERTS_FD_CLR(fd, fds) FD_CLR((fd), (fds)->ptr) -# define ERTS_FD_SET(fd, fds) FD_SET((fd), (fds)->ptr) -# define ERTS_FD_ISSET(fd,fds) FD_ISSET((fd), (fds)->ptr) + # define ERTS_FD_ZERO(fds) memset((fds)->ptr, 0, (fds)->sz) # define ERTS_FD_SIZE(n) ((((n)+NFDBITS-1)/NFDBITS)*sizeof(fd_mask)) +static ERTS_INLINE void ERTS_FD_CLR(int fd, ERTS_fd_set *fds) +{ + ASSERT(ERTS_FD_SIZE(fd+1) <= fds->sz); + FD_CLR(fd, fds->ptr); +} +static ERTS_INLINE void ERTS_FD_SET(int fd, ERTS_fd_set *fds) +{ + ASSERT(ERTS_FD_SIZE(fd+1) <= fds->sz); + FD_SET(fd, fds->ptr); +} +static ERTS_INLINE int ERTS_FD_ISSET(int fd, ERTS_fd_set *fds) +{ + ASSERT(ERTS_FD_SIZE(fd+1) <= fds->sz); + return FD_ISSET(fd, fds->ptr); +} + static void ERTS_FD_COPY(ERTS_fd_set *src, ERTS_fd_set *dst) { if (dst->sz != src->sz) { @@ -146,74 +197,43 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, # define ERTS_SELECT select #endif -#define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \ - || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE \ - (defined(ERTS_SMP) || ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_POLL) - -#define ERTS_POLL_USE_CONCURRENT_UPDATE \ - (defined(ERTS_SMP) && ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_IS_FALLBACK (ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT) && ERTS_ENABLE_KERNEL_POLL -#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 1 -#else -# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 0 -#endif +#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0) -#define ERTS_POLL_USE_WAKEUP_PIPE \ - (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(USE_THREADS)) +#if !ERTS_POLL_USE_CONCURRENT_UPDATE -#ifdef ERTS_SMP +#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \ + erts_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 1) +#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \ + erts_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 0) +#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \ + ((int) erts_atomic32_read_nob(&(PS)->have_update_requests)) #define ERTS_POLLSET_LOCK(PS) \ - erts_smp_mtx_lock(&(PS)->mtx) + erts_mtx_lock(&(PS)->mtx) #define ERTS_POLLSET_UNLOCK(PS) \ - erts_smp_mtx_unlock(&(PS)->mtx) - -#define ERTS_POLLSET_SET_POLLED_CHK(PS) \ - ((int) erts_atomic32_xchg_nob(&(PS)->polled, (erts_aint32_t) 1)) -#define ERTS_POLLSET_UNSET_POLLED(PS) \ - erts_atomic32_set_nob(&(PS)->polled, (erts_aint32_t) 0) -#define ERTS_POLLSET_IS_POLLED(PS) \ - ((int) erts_atomic32_read_nob(&(PS)->polled)) + erts_mtx_unlock(&(PS)->mtx) #else -#define ERTS_POLLSET_LOCK(PS) -#define ERTS_POLLSET_UNLOCK(PS) -#define ERTS_POLLSET_SET_POLLED_CHK(PS) 0 -#define ERTS_POLLSET_UNSET_POLLED(PS) -#define ERTS_POLLSET_IS_POLLED(PS) 0 - -#endif - -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE -#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \ - erts_smp_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 1) -#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \ - erts_smp_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 0) -#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \ - ((int) erts_smp_atomic32_read_nob(&(PS)->have_update_requests)) -#else #define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) #define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) #define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) 0 -#endif -#if ERTS_POLL_USE_FALLBACK -# if ERTS_POLL_USE_POLL -# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_poll_fds > 1) -# elif ERTS_POLL_USE_SELECT -# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_select_fds > 1) -# endif +#define ERTS_POLLSET_LOCK(PS) +#define ERTS_POLLSET_UNLOCK(PS) + #endif + /* * --- Data types ------------------------------------------------------------ */ -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + #define ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE 128 typedef struct ErtsPollSetUpdateRequestsBlock_ ErtsPollSetUpdateRequestsBlock; @@ -223,266 +243,190 @@ struct ErtsPollSetUpdateRequestsBlock_ { int fds[ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE]; }; -#endif - - -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE # define ERTS_POLL_FD_FLG_INURQ (((unsigned short) 1) << 0) -#endif -#if ERTS_POLL_USE_FALLBACK -# define ERTS_POLL_FD_FLG_INFLBCK (((unsigned short) 1) << 1) -# define ERTS_POLL_FD_FLG_USEFLBCK (((unsigned short) 1) << 2) -#endif -#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) -# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 3) -#endif +# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 1) + typedef struct { #if ERTS_POLL_USE_POLL int pix; #endif + ErtsPollEvents used_events; ErtsPollEvents events; -#if ERTS_POLL_COALESCE_KP_RES - unsigned short res_ev_ix; -#endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK unsigned short flags; -#endif } ErtsFdStatus; - -#if ERTS_POLL_COALESCE_KP_RES -/* res_ev_ix max value */ -#define ERTS_POLL_MAX_RES ((1 << sizeof(unsigned short)*8) - 1) -#endif - -#if ERTS_POLL_USE_KQUEUE - -#define ERTS_POLL_KQ_OP_HANDLED 1 -#define ERTS_POLL_KQ_OP_DEL_R 2 -#define ERTS_POLL_KQ_OP_DEL_W 3 -#define ERTS_POLL_KQ_OP_ADD_R 4 -#define ERTS_POLL_KQ_OP_ADD_W 5 -#define ERTS_POLL_KQ_OP_ADD2_R 6 -#define ERTS_POLL_KQ_OP_ADD2_W 7 - #endif -struct ErtsPollSet_ { - ErtsPollSet next; +/* + * This struct is not really exported, but it's nice to + * get unique names in debugger for kp/nkp + */ +struct ERTS_POLL_EXPORT(erts_pollset) { + int id; int internal_fd_limit; - ErtsFdStatus *fds_status; - erts_smp_atomic_t no_of_user_fds; - int fds_status_len; + erts_atomic_t no_of_user_fds; + #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; - int res_events_len; -#if ERTS_POLL_USE_EPOLL - struct epoll_event *res_events; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *res_events; -#elif ERTS_POLL_USE_DEVPOLL - struct pollfd *res_events; -#endif + int oneshot; #endif /* ERTS_POLL_USE_KERNEL_POLL */ + #if ERTS_POLL_USE_POLL int next_poll_fds_ix; int no_poll_fds; int poll_fds_len; - struct pollfd*poll_fds; + struct pollfd *poll_fds; #elif ERTS_POLL_USE_SELECT int next_sel_fd; int max_fd; -#if ERTS_POLL_USE_FALLBACK - int no_select_fds; -#endif ERTS_fd_set input_fds; ERTS_fd_set res_input_fds; ERTS_fd_set output_fds; ERTS_fd_set res_output_fds; +#elif ERTS_POLL_USE_DEVPOLL + struct pollfd *poll_fds; + int poll_fds_ix; #endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + ErtsFdStatus *fds_status; + int fds_status_len; ErtsPollSetUpdateRequestsBlock update_requests; ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; - erts_smp_atomic32_t have_update_requests; -#endif -#ifdef ERTS_SMP - erts_atomic32_t polled; - erts_smp_mtx_t mtx; -#endif -#if ERTS_POLL_USE_WAKEUP_PIPE - int wake_fds[2]; + erts_atomic32_t have_update_requests; + erts_mtx_t mtx; +#else + int do_wakeup; #endif + #if ERTS_POLL_USE_TIMERFD int timer_fd; #endif -#if ERTS_POLL_USE_FALLBACK - int fallback_used; -#endif -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT + ErtsMonotonicTime timeout_time; erts_atomic32_t wakeup_state; -#endif - erts_atomic64_t timeout_time; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - erts_smp_atomic_t no_avoided_wakeups; - erts_smp_atomic_t no_avoided_interrupts; - erts_smp_atomic_t no_interrupt_timed; -#endif + int wake_fds[2]; }; void erts_silence_warn_unused_result(long unused); static void fatal_error(char *format, ...); -static void fatal_error_async_signal_safe(char *error_str); static int max_fds = -1; -static ErtsPollSet pollsets; -static erts_smp_spinlock_t pollsets_lock; #if ERTS_POLL_USE_POLL +#if !ERTS_POLL_IS_FALLBACK +static ERTS_INLINE short ev2pollev(ErtsPollEvents ev) +{ + return ERTS_POLL_EV_E2N(ev); +} + +static ERTS_INLINE ErtsPollEvents pollev2ev(short ev) +{ + return ERTS_POLL_EV_N2E(ev); +} + +#else /* ERTS_POLL_IS_FALLBACK */ + static ERTS_INLINE short ev2pollev(ErtsPollEvents ev) { -#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE - return ERTS_POLL_EV_E2N(ev); -#else /* Note, we only map events we are interested in */ short res_ev = (short) 0; if (ev & ERTS_POLL_EV_IN) - res_ev |= ERTS_POLL_EV_NKP_IN; + res_ev |= ERTS_POLL_EV_NKP_IN; if (ev & ERTS_POLL_EV_OUT) - res_ev |= ERTS_POLL_EV_NKP_OUT; + res_ev |= ERTS_POLL_EV_NKP_OUT; return res_ev; -#endif } static ERTS_INLINE ErtsPollEvents pollev2ev(short ev) { -#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE - return ERTS_POLL_EV_N2E(ev); -#else /* Note, we only map events we are interested in */ ErtsPollEvents res_ev = (ErtsPollEvents) 0; if (ev & ERTS_POLL_EV_NKP_IN) - res_ev |= ERTS_POLL_EV_IN; + res_ev |= ERTS_POLL_EV_IN; if (ev & ERTS_POLL_EV_NKP_OUT) - res_ev |= ERTS_POLL_EV_OUT; + res_ev |= ERTS_POLL_EV_OUT; if (ev & ERTS_POLL_EV_NKP_ERR) - res_ev |= ERTS_POLL_EV_ERR; + res_ev |= ERTS_POLL_EV_ERR; if (ev & ERTS_POLL_EV_NKP_NVAL) - res_ev |= ERTS_POLL_EV_NVAL; - return res_ev; -#endif + res_ev |= ERTS_POLL_EV_NVAL; + return res_ev; } -#endif +#endif /* !ERTS_POLL_IS_FALLBACK */ + +#endif /* ERTS_POLL_USE_POLL */ + #ifdef HARD_DEBUG static void check_poll_result(ErtsPollResFd pr[], int len); -#if ERTS_POLL_USE_DEVPOLL -static void check_poll_status(ErtsPollSet ps); -#endif /* ERTS_POLL_USE_DEVPOLL */ #endif /* HARD_DEBUG */ -#ifdef ERTS_POLL_DEBUG_PRINT +#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG) +static void check_poll_status(ErtsPollSet *ps); +#endif /* ERTS_POLL_USE_DEVPOLL && DEBUG */ static void print_misc_debug_info(void); +#if ERTS_POLL_USE_EPOLL +uint32_t epoll_events(int kp_fd, int fd); #endif -static ERTS_INLINE void -init_timeout_time(ErtsPollSet ps) -{ - erts_atomic64_init_nob(&ps->timeout_time, - (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX); -} - -static ERTS_INLINE void -set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time) -{ - erts_atomic64_set_relb(&ps->timeout_time, - (erts_aint64_t) time); -} - -static ERTS_INLINE ErtsMonotonicTime -get_timeout_time(ErtsPollSet ps) -{ - return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time); -} - #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 static ERTS_INLINE void -reset_wakeup_state(ErtsPollSet ps) +reset_wakeup_state(ErtsPollSet *ps) { -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); -#endif } static ERTS_INLINE int -is_woken(ErtsPollSet ps) +is_woken(ErtsPollSet *ps) { -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; -#else - return 0; -#endif } static ERTS_INLINE int -is_interrupted_reset(ErtsPollSet ps) +is_interrupted_reset(ErtsPollSet *ps) { -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); -#else - return 0; -#endif } static ERTS_INLINE void -woke_up(ErtsPollSet ps) +woke_up(ErtsPollSet *ps) { -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); -#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ -#if ERTS_POLL_USE_WAKEUP_PIPE - static ERTS_INLINE void -wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe) +wake_poller(ErtsPollSet *ps, int interrupted) { int wake; - if (async_signal_safe) - wake = 1; - else { - erts_aint32_t wakeup_state; - if (!interrupted) - wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state, - ERTS_POLL_WOKEN, - ERTS_POLL_NOT_WOKEN); - else - wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state, - ERTS_POLL_WOKEN_INTR); - wake = wakeup_state == ERTS_POLL_NOT_WOKEN; - } - /* - * NOTE: This function might be called from signal handlers in the - * non-smp case; therefore, it has to be async-signal safe in - * the non-smp case. - */ - if (wake) { + erts_aint32_t wakeup_state; + if (!interrupted) + wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state, + ERTS_POLL_WOKEN, + ERTS_POLL_NOT_WOKEN); + else + wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state, + ERTS_POLL_WOKEN_INTR); + wake = wakeup_state == ERTS_POLL_NOT_WOKEN; + + if (wake) + { ssize_t res; + DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted); if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ do { @@ -490,36 +434,27 @@ wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe) res = write(ps->wake_fds[1], "!", 1); } while (res < 0 && errno == EINTR); if (res <= 0 && errno != ERRNO_BLOCK) { - if (async_signal_safe) - fatal_error_async_signal_safe(__FILE__ - ":XXX:wake_poller(): " - "Failed to write on wakeup pipe\n"); - else - fatal_error("%s:%d:wake_poller(): " - "Failed to write to wakeup pipe fd=%d: " - "%s (%d)\n", - __FILE__, __LINE__, - ps->wake_fds[1], - erl_errno_id(errno), errno); + fatal_error("%s:%d:wake_poller(): " + "Failed to write to wakeup pipe fd=%d: " + "%s (%d)\n", + __FILE__, __LINE__, + ps->wake_fds[1], + erl_errno_id(errno), errno); } } } static ERTS_INLINE void -cleanup_wakeup_pipe(ErtsPollSet ps) +cleanup_wakeup_pipe(ErtsPollSet *ps) { -#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT int intr = 0; -#endif int fd = ps->wake_fds[0]; int res; do { char buf[32]; res = read(fd, buf, sizeof(buf)); -#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT if (res > 0) intr = 1; -#endif } while (res > 0 || (res < 0 && errno == EINTR)); if (res < 0 && errno != ERRNO_BLOCK) { fatal_error("%s:%d:cleanup_wakeup_pipe(): " @@ -529,14 +464,12 @@ cleanup_wakeup_pipe(ErtsPollSet ps) fd, erl_errno_id(errno), errno); } -#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT if (intr) erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); -#endif } static void -create_wakeup_pipe(ErtsPollSet ps) +create_wakeup_pipe(ErtsPollSet *ps) { int do_wake = 0; int wake_fds[2]; @@ -553,20 +486,13 @@ create_wakeup_pipe(ErtsPollSet ps) SET_NONBLOCKING(wake_fds[0]); SET_NONBLOCKING(wake_fds[1]); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("wakeup fds = {%d, %d}\n", wake_fds[0], wake_fds[1]); -#endif + DEBUG_PRINT("wakeup fds = {%d, %d}", ps, wake_fds[0], wake_fds[1]); ERTS_POLL_EXPORT(erts_poll_control)(ps, wake_fds[0], + ERTS_POLL_OP_ADD, ERTS_POLL_EV_IN, - 1, &do_wake); -#if ERTS_POLL_USE_FALLBACK - /* We depend on the wakeup pipe being handled by kernel poll */ - if (ps->fds_status[wake_fds[0]].flags & ERTS_POLL_FD_FLG_INFLBCK) - fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", - __FILE__, __LINE__); -#endif + &do_wake); if (ps->internal_fd_limit <= wake_fds[1]) ps->internal_fd_limit = wake_fds[1] + 1; if (ps->internal_fd_limit <= wake_fds[0]) @@ -575,8 +501,6 @@ create_wakeup_pipe(ErtsPollSet ps) ps->wake_fds[1] = wake_fds[1]; } -#endif /* ERTS_POLL_USE_WAKEUP_PIPE */ - /* * --- timer fd ----------------------------------------------------------- */ @@ -587,28 +511,22 @@ create_wakeup_pipe(ErtsPollSet ps) timeouts, i.e. we want to sleep with < ms accuracy. */ static void -create_timerfd(ErtsPollSet ps) +create_timerfd(ErtsPollSet *ps) { int do_wake = 0; - int timer_fd; - timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + int timer_fd = timerfd_create(CLOCK_MONOTONIC,0); ERTS_POLL_EXPORT(erts_poll_control)(ps, timer_fd, + ERTS_POLL_OP_ADD, ERTS_POLL_EV_IN, - 1, &do_wake); -#if ERTS_POLL_USE_FALLBACK - /* We depend on the wakeup pipe being handled by kernel poll */ - if (ps->fds_status[timer_fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n", - __FILE__, __LINE__); -#endif + &do_wake); if (ps->internal_fd_limit <= timer_fd) ps->internal_fd_limit = timer_fd + 1; ps->timer_fd = timer_fd; } static ERTS_INLINE void -timerfd_set(ErtsPollSet ps, struct itimerspec *its) +timerfd_set(ErtsPollSet *ps, struct itimerspec *its) { #ifdef DEBUG struct itimerspec old_its; @@ -626,7 +544,7 @@ timerfd_set(ErtsPollSet ps, struct itimerspec *its) } static ERTS_INLINE int -timerfd_clear(ErtsPollSet ps, int res, int max_res) { +timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) { struct itimerspec its; /* we always have to clear the timer */ @@ -637,7 +555,7 @@ timerfd_clear(ErtsPollSet ps, int res, int max_res) { timerfd_settime(ps->timer_fd, 0, &its, NULL); /* only timeout fd triggered */ - if (res == 1 && ps->res_events[0].data.fd == ps->timer_fd) + if (res == 1 && pr[0].data.fd == ps->timer_fd) return 0; return res; @@ -645,14 +563,14 @@ timerfd_clear(ErtsPollSet ps, int res, int max_res) { #endif /* ERTS_POLL_USE_TIMERFD */ - /* * --- Poll set update requests ---------------------------------------------- */ -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void -enqueue_update_request(ErtsPollSet ps, int fd) +enqueue_update_request(ErtsPollSet *ps, int fd) { ErtsPollSetUpdateRequestsBlock *urqbp; @@ -667,13 +585,11 @@ enqueue_update_request(ErtsPollSet ps, int fd) urqbp = ps->curr_upd_req_block; if (urqbp->len == ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE) { - ASSERT(!urqbp->next); urqbp = erts_alloc(ERTS_ALC_T_POLLSET_UPDREQ, sizeof(ErtsPollSetUpdateRequestsBlock)); - ps->curr_upd_req_block->next = urqbp; - ps->curr_upd_req_block = urqbp; - urqbp->next = NULL; + urqbp->next = ps->curr_upd_req_block; urqbp->len = 0; + ps->curr_upd_req_block = urqbp; } ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INURQ; @@ -681,29 +597,29 @@ enqueue_update_request(ErtsPollSet ps, int fd) } static ERTS_INLINE void -free_update_requests_block(ErtsPollSet ps, +free_update_requests_block(ErtsPollSet *ps, ErtsPollSetUpdateRequestsBlock *urqbp) { if (urqbp != &ps->update_requests) erts_free(ERTS_ALC_T_POLLSET_UPDREQ, (void *) urqbp); else { - urqbp->next = NULL; urqbp->len = 0; } } -#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ /* * --- Growing poll set structures ------------------------------------------- */ -#ifndef ERTS_KERNEL_POLL_VERSION /* only one shared implementation */ +#if !ERTS_NO_KERNEL_POLL_VERSION || !ERTS_ENABLE_KERNEL_POLL +/* only one shared implementation */ #define ERTS_FD_TABLE_MIN_LENGTH 1024 #define ERTS_FD_TABLE_EXP_THRESHOLD (2048*1024) -int erts_poll_new_table_len (int old_len, int need_len) +int erts_poll_new_table_len(int old_len, int need_len) { int new_len; @@ -713,7 +629,7 @@ int erts_poll_new_table_len (int old_len, int need_len) } else { new_len = old_len; - do { + do { if (new_len < ERTS_FD_TABLE_EXP_THRESHOLD) new_len *= 2; else @@ -726,30 +642,9 @@ int erts_poll_new_table_len (int old_len, int need_len) } #endif -#if ERTS_POLL_USE_KERNEL_POLL -static void -grow_res_events(ErtsPollSet ps, int new_len) -{ - size_t new_size = sizeof( -#if ERTS_POLL_USE_EPOLL - struct epoll_event -#elif ERTS_POLL_USE_DEVPOLL - struct pollfd -#elif ERTS_POLL_USE_KQUEUE - struct kevent -#endif - ) * erts_poll_new_table_len(ps->res_events_len, new_len); - /* We do not need to save previously stored data */ - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events); - ps->res_events = erts_alloc(ERTS_ALC_T_POLL_RES_EVS, new_size); - ps->res_events_len = new_len; -} -#endif /* ERTS_POLL_USE_KERNEL_POLL */ - #if ERTS_POLL_USE_POLL static void -grow_poll_fds(ErtsPollSet ps, int min_ix) +grow_poll_fds(ErtsPollSet *ps, int min_ix) { int i; int new_len = erts_poll_new_table_len(ps->poll_fds_len, min_ix + 1); @@ -793,12 +688,20 @@ ensure_select_fds(int fd, ERTS_fd_set* in, ERTS_fd_set* out) grow_select_fds(fd, out); } } +static ERTS_INLINE int +check_select_fds(int fd, ERTS_fd_set* in, ERTS_fd_set* out) +{ + ASSERT(in->sz == out->sz); + return (ERTS_FD_SIZE(fd+1) <= in->sz); +} #else # define ensure_select_fds(fd, in, out) do {} while(0) +# define check_select_fds(fd, in, out) (1) #endif /* _DARWIN_UNLIMITED_SELECT */ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE static void -grow_fds_status(ErtsPollSet ps, int min_fd) +grow_fds_status(ErtsPollSet *ps, int min_fd) { int i; int new_len = erts_poll_new_table_len(ps->fds_status_len, min_fd + 1); @@ -817,461 +720,54 @@ grow_fds_status(ErtsPollSet ps, int min_fd) #endif ps->fds_status[i].used_events = (ErtsPollEvents) 0; ps->fds_status[i].events = (ErtsPollEvents) 0; -#if ERTS_POLL_COALESCE_KP_RES - ps->fds_status[i].res_ev_ix = (unsigned short) ERTS_POLL_MAX_RES; -#endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK ps->fds_status[i].flags = (unsigned short) 0; -#endif } ps->fds_status_len = new_len; } +#endif /* * --- Selecting fd to poll on ----------------------------------------------- */ -#if ERTS_POLL_USE_FALLBACK -static int update_fallback_pollset(ErtsPollSet ps, int fd); -#endif - -static ERTS_INLINE int -need_update(ErtsPollSet ps, int fd) -{ -#if ERTS_POLL_USE_KERNEL_POLL - int reset; -#endif - - ASSERT(fd < ps->fds_status_len); - -#if ERTS_POLL_USE_KERNEL_POLL - reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST); - if (reset && !ps->fds_status[fd].used_events) { - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; - reset = 0; - } -#elif defined(ERTS_SMP) - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; -#endif - - if (ps->fds_status[fd].used_events != ps->fds_status[fd].events) - return 1; - -#if ERTS_POLL_USE_KERNEL_POLL - return reset; -#else - return 0; -#endif -} - -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - -#if ERTS_POLL_USE_KQUEUE -#define ERTS_POLL_MIN_BATCH_BUF_SIZE 128 -#else -#define ERTS_POLL_MIN_BATCH_BUF_SIZE 64 -#endif - -typedef struct { - int len; - int size; -#if ERTS_POLL_USE_DEVPOLL - struct pollfd *buf; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *buf; - struct kevent *ebuf; -#endif -} ErtsPollBatchBuf; - - -static ERTS_INLINE void -setup_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - bbp->len = 0; -#if ERTS_POLL_USE_DEVPOLL - bbp->size = ps->res_events_len; - bbp->buf = ps->res_events; -#elif ERTS_POLL_USE_KQUEUE - bbp->size = ps->res_events_len/2; - bbp->buf = ps->res_events; - bbp->ebuf = bbp->buf + bbp->size; -#endif -} - - -#if ERTS_POLL_USE_DEVPOLL - -static void -write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - ssize_t wres; - char *buf = (char *) bbp->buf; - size_t buf_size = sizeof(struct pollfd)*bbp->len; - - while (1) { - wres = write(ps->kp_fd, (void *) buf, buf_size); - if (wres < 0) { - if (errno == EINTR) - continue; - fatal_error("%s:%d:write_batch_buf(): " - "Failed to write to /dev/poll: " - "%s (%d)\n", - __FILE__, __LINE__, - erl_errno_id(errno), errno); - } - buf_size -= wres; - if (buf_size <= 0) - break; - buf += wres; - } - - if (buf_size < 0) { - fatal_error("%s:%d:write_devpoll_buf(): Internal error\n", - __FILE__, __LINE__); - } - bbp->len = 0; -} - -#elif ERTS_POLL_USE_KQUEUE - -static void -write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp) -{ - int res; - int len = bbp->len; - struct kevent *buf = bbp->buf; - struct timespec ts = {0, 0}; - - do { - res = kevent(ps->kp_fd, buf, len, NULL, 0, &ts); - } while (res < 0 && errno == EINTR); - if (res < 0) { - int i; - struct kevent *ebuf = bbp->ebuf; - do { - res = kevent(ps->kp_fd, buf, len, ebuf, len, &ts); - } while (res < 0 && errno == EINTR); - if (res < 0) { - fatal_error("%s:%d: kevent() failed: %s (%d)\n", - __FILE__, __LINE__, erl_errno_id(errno), errno); - } - for (i = 0; i < res; i++) { - if (ebuf[i].flags & EV_ERROR) { - short filter; - int fd = (int) ebuf[i].ident; - - switch ((int) (long) ebuf[i].udata) { - - /* - * Since we use a lazy update approach EV_DELETE will - * frequently fail. This since kqueue automatically - * removes a file descriptor that is closed from the - * poll set. - */ - case ERTS_POLL_KQ_OP_DEL_R: - case ERTS_POLL_KQ_OP_DEL_W: - case ERTS_POLL_KQ_OP_HANDLED: - break; - - /* - * According to the kqueue man page EVFILT_READ support - * does not imply EVFILT_WRITE support; therefore, - * if an EV_ADD fail, we may have to remove other - * events on this fd in the kqueue pollset before - * adding fd to the fallback pollset. - */ - case ERTS_POLL_KQ_OP_ADD_W: - if (ps->fds_status[fd].used_events & ERTS_POLL_EV_IN) { - filter = EVFILT_READ; - goto rm_add_fb; - } - goto add_fb; - case ERTS_POLL_KQ_OP_ADD_R: - if (ps->fds_status[fd].used_events & ERTS_POLL_EV_OUT) { - filter = EVFILT_WRITE; - goto rm_add_fb; - } - goto add_fb; - case ERTS_POLL_KQ_OP_ADD2_W: - case ERTS_POLL_KQ_OP_ADD2_R: { - int j; - for (j = i+1; j < res; j++) { - if (fd == (int) ebuf[j].ident) { - ebuf[j].udata = (void *) ERTS_POLL_KQ_OP_HANDLED; - if (!(ebuf[j].flags & EV_ERROR)) { - switch ((int) (long) ebuf[j].udata) { - case ERTS_POLL_KQ_OP_ADD2_W: - filter = EVFILT_WRITE; - goto rm_add_fb; - case ERTS_POLL_KQ_OP_ADD2_R: - filter = EVFILT_READ; - goto rm_add_fb; - default: - fatal_error("%s:%d:write_batch_buf(): " - "Internal error", - __FILE__, __LINE__); - break; - } - } - goto add_fb; - } - } - /* The other add succeded... */ - filter = ((((int) (long) ebuf[i].udata) - == ERTS_POLL_KQ_OP_ADD2_W) - ? EVFILT_READ - : EVFILT_WRITE); - rm_add_fb: - { - struct kevent kev; - struct timespec ts = {0, 0}; - EV_SET(&kev, fd, filter, EV_DELETE, 0, 0, 0); - (void) kevent(ps->kp_fd, &kev, 1, NULL, 0, &ts); - } - - add_fb: - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; - ASSERT(ps->fds_status[fd].used_events); - ps->fds_status[fd].used_events = 0; - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - update_fallback_pollset(ps, fd); - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); - break; - } - default: - fatal_error("%s:%d:write_batch_buf(): Internal error", - __FILE__, __LINE__); - break; - } - } - } - } - bbp->len = 0; -} - -#endif /* ERTS_POLL_USE_KQUEUE */ - -static ERTS_INLINE void -batch_update_pollset(ErtsPollSet ps, int fd, ErtsPollBatchBuf *bbp) -{ - int buf_len; -#if ERTS_POLL_USE_DEVPOLL - short events; - struct pollfd *buf; -#elif ERTS_POLL_USE_KQUEUE - struct kevent *buf; -#endif - -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Doing lazy update on fd=%d\n", fd); -#endif - - if (!need_update(ps, fd)) - return; - - /* Make sure we have room for at least maximum no of entries - per fd */ - if (bbp->size - bbp->len < 2) - write_batch_buf(ps, bbp); - - buf_len = bbp->len; - buf = bbp->buf; - - ASSERT(fd < ps->fds_status_len); - -#if ERTS_POLL_USE_DEVPOLL - events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); - if (!events) { - buf[buf_len].events = POLLREMOVE; - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - } - else if (!ps->fds_status[fd].used_events) { - buf[buf_len].events = events; - erts_smp_atomic_inc_nob(&ps->no_of_user_fds); - } - else { - if ((ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) - || (ps->fds_status[fd].used_events & ~events)) { - /* Reset or removed events... */ - buf[buf_len].fd = fd; - buf[buf_len].events = POLLREMOVE; - buf[buf_len++].revents = 0; - } - buf[buf_len].events = events; - } - buf[buf_len].fd = fd; - buf[buf_len++].revents = 0; - -#elif ERTS_POLL_USE_KQUEUE - - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) - update_fallback_pollset(ps, fd); - else { /* Remove from fallback and try kqueue */ - ErtsPollEvents events = ps->fds_status[fd].events; - ps->fds_status[fd].events = (ErtsPollEvents) 0; - update_fallback_pollset(ps, fd); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - if (events) { - ps->fds_status[fd].events = events; - goto try_kqueue; - } - } - } - else { - ErtsPollEvents events, used_events; - int mod_w, mod_r; - try_kqueue: - events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); - used_events = ERTS_POLL_EV_E2N(ps->fds_status[fd].used_events); - if (!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) { - if (!used_events && - (events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) - goto do_add_rw; - mod_r = ((events & ERTS_POLL_EV_IN) - != (used_events & ERTS_POLL_EV_IN)); - mod_w = ((events & ERTS_POLL_EV_OUT) - != (used_events & ERTS_POLL_EV_OUT)); - goto do_mod; - } - else { /* Reset */ - if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) { - do_add_rw: - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_R); - buf_len++; - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_W); - buf_len++; - - } - else { - mod_r = 1; - mod_w = 1; - do_mod: - if (mod_r) { - if (events & ERTS_POLL_EV_IN) { - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_R); - buf_len++; - } - else if (used_events & ERTS_POLL_EV_IN) { - EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_DELETE, - 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_R); - buf_len++; - } - } - if (mod_w) { - if (events & ERTS_POLL_EV_OUT) { - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD, - 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_W); - buf_len++; - } - else if (used_events & ERTS_POLL_EV_OUT) { - EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_DELETE, - 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_W); - buf_len++; - } - } - } - } - if (used_events) { - if (!events) { - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - } - } - else { - if (events) - erts_smp_atomic_inc_nob(&ps->no_of_user_fds); - } - ASSERT((events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); - ASSERT((used_events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0); - } - -#endif - - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; - ps->fds_status[fd].used_events = ps->fds_status[fd].events; - - bbp->len = buf_len; -} - -#else /* !ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ - #if ERTS_POLL_USE_EPOLL static int -#if ERTS_POLL_USE_CONCURRENT_UPDATE -conc_update_pollset(ErtsPollSet ps, int fd, int *update_fallback) -#else -update_pollset(ErtsPollSet ps, int fd) -#endif +update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) { int res; - int op; + int epoll_op = EPOLL_CTL_MOD; struct epoll_event epe_templ; struct epoll_event epe; - ASSERT(fd < ps->fds_status_len); - - if (!need_update(ps, fd)) - return 0; - -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Doing update on fd=%d\n", fd); -#endif - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) { -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (!*update_fallback) { - *update_fallback = 1; - return 0; - } -#endif - if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) { - return update_fallback_pollset(ps, fd); - } - else { /* Remove from fallback and try epoll */ - ErtsPollEvents events = ps->fds_status[fd].events; - ps->fds_status[fd].events = (ErtsPollEvents) 0; - res = update_fallback_pollset(ps, fd); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - if (!events) - return res; - ps->fds_status[fd].events = events; - } - } - - epe_templ.events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events); + epe_templ.events = ERTS_POLL_EV_E2N(events); epe_templ.data.fd = fd; + if (ps->oneshot) + epe_templ.events |= EPOLLONESHOT; + #ifdef VALGRIND /* Silence invalid valgrind warning ... */ memset((void *) &epe.data, 0, sizeof(epoll_data_t)); #endif - if (epe_templ.events && ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) { - do { - /* We init 'epe' every time since epoll_ctl() may modify it - (not declared const and not documented as const). */ - epe.events = epe_templ.events; - epe.data.fd = epe_templ.data.fd; - res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); - } while (res != 0 && errno == EINTR); - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - ps->fds_status[fd].used_events = 0; - } - - if (!epe_templ.events) { + switch (op) { + case ERTS_POLL_OP_DEL: /* A note on EPOLL_CTL_DEL: linux kernel versions before 2.6.9 need a non-NULL event pointer even though it is ignored... */ - op = EPOLL_CTL_DEL; - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - } - else if (!ps->fds_status[fd].used_events) { - op = EPOLL_CTL_ADD; - erts_smp_atomic_inc_nob(&ps->no_of_user_fds); - } - else { - op = EPOLL_CTL_MOD; + epoll_op = EPOLL_CTL_DEL; + epe_templ.events = 0; + erts_atomic_dec_nob(&ps->no_of_user_fds); + break; + case ERTS_POLL_OP_ADD: + epoll_op = EPOLL_CTL_ADD; + erts_atomic_inc_nob(&ps->no_of_user_fds); + break; + case ERTS_POLL_OP_MOD: + epoll_op = EPOLL_CTL_MOD; + break; + default: + ASSERT(0); + break; } do { @@ -1279,33 +775,32 @@ update_pollset(ErtsPollSet ps, int fd) (not declared const and not documented as const). */ epe.events = epe_templ.events; epe.data.fd = epe_templ.data.fd; - res = epoll_ctl(ps->kp_fd, op, fd, &epe); + res = epoll_ctl(ps->kp_fd, epoll_op, fd, &epe); } while (res != 0 && errno == EINTR); -#if defined(ERTS_POLL_DEBUG_PRINT) && 1 +#if ERTS_POLL_DEBUG_PRINT { int saved_errno = errno; - erts_printf("%s = epoll_ctl(%d, %s, %d, {Ox%x, %d})\n", - res == 0 ? "0" : erl_errno_id(errno), - ps->kp_fd, - (op == EPOLL_CTL_ADD - ? "EPOLL_CTL_ADD" - : (op == EPOLL_CTL_MOD - ? "EPOLL_CTL_MOD" - : (op == EPOLL_CTL_DEL - ? "EPOLL_CTL_DEL" - : "UNKNOWN"))), - fd, - epe_templ.events, - fd); + DEBUG_PRINT_FD("%s = epoll_ctl(%d, %s, %d, {0x%x, %d})", + ps, fd, + res == 0 ? "0" : erl_errno_id(errno), + ps->kp_fd, + (epoll_op == EPOLL_CTL_ADD + ? "EPOLL_CTL_ADD" + : (epoll_op == EPOLL_CTL_MOD + ? "EPOLL_CTL_MOD" + : (epoll_op == EPOLL_CTL_DEL + ? "EPOLL_CTL_DEL" + : "UNKNOWN"))), + fd, + epe_templ.events, + fd); errno = saved_errno; } #endif - if (res == 0) - ps->fds_status[fd].used_events = ps->fds_status[fd].events; - else { + if (res != 0) { switch (op) { - case EPOLL_CTL_MOD: + case ERTS_POLL_OP_MOD: epe.events = 0; do { /* We init 'epe' every time since epoll_ctl() may modify it @@ -1314,29 +809,18 @@ update_pollset(ErtsPollSet ps, int fd) epe.data.fd = fd; res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe); } while (res != 0 && errno == EINTR); - ps->fds_status[fd].used_events = 0; /* Fall through ... */ - case EPOLL_CTL_ADD: { - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK; - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (!*update_fallback) { - *update_fallback = 1; - return 0; - } -#endif - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - res = update_fallback_pollset(ps, fd); - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); + case ERTS_POLL_OP_ADD: { + erts_atomic_dec_nob(&ps->no_of_user_fds); + res = ERTS_POLL_EV_NVAL; break; } - case EPOLL_CTL_DEL: { + case ERTS_POLL_OP_DEL: { /* * Since we use a lazy update approach EPOLL_CTL_DEL will * frequently fail. This since epoll automatically removes * a filedescriptor that is closed from the poll set. */ - ps->fds_status[fd].used_events = 0; res = 0; break; } @@ -1345,68 +829,308 @@ update_pollset(ErtsPollSet ps, int fd) __FILE__, __LINE__); break; } + } else { + res = events; } - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; return res; } -#if ERTS_POLL_USE_CONCURRENT_UPDATE +#endif /* ERTS_POLL_USE_EPOLL */ + +#if ERTS_POLL_USE_KQUEUE + +/* Some versions of the EV_SET macro used kevp multiple times, + so we define out own version that make sure that it is safe + to do kevp++ in the argument list. */ +#define ERTS_EV_SET(kevp, a, b, c, f) do { \ + struct kevent *kevp_ = kevp; \ + EV_SET(kevp_, a, b, c, 0, 0, f); \ + } while(0) + static int -update_pollset(ErtsPollSet ps, int fd) +update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) { - int update_fallback = 1; - return conc_update_pollset(ps, fd, &update_fallback); -} -#endif + int res = 0, len = 0; + struct kevent evts[2]; + struct timespec ts = {0, 0}; + uint32_t oneshot = 0; + + if (op == ERTS_POLL_OP_ADD) { + /* This is a hack to make the "noshell" option work; kqueue can poll + * these fds but will not report EV_EOF, so we return NVAL to use the + * fallback instead. + * + * This may be common to all pipes but we have no way to tell whether + * an fd is a pipe or not. */ + switch (fd) { + case STDIN_FILENO: + case STDOUT_FILENO: + case STDERR_FILENO: + return ERTS_POLL_EV_NVAL; + default: + break; + } + } + +#if defined(EV_DISPATCH) && !defined(__OpenBSD__) + /* If we have EV_DISPATCH we use it, unless we are on OpenBSD as the + behavior of EV_EOF seems to be edge triggered there and we need it + to be level triggered. + + The kevent descriptions for both read and write are added on OP_ADD + and removed on OP_DEL. And then after than only EV_ENABLE|EV_DISPATCH + are used. + + It could be possible to not modify the pollset when disabling and/or + deleting events, but that may cause the poll threads to be awoken + a lot more than they should so we take the cost here instead of + in the poll thread. + + Note: We need to have EV_DISPATCH both when the event is enabled and + disabled, as otherwise the event may be triggered twice on each re-arm. + Not sure if this is intended or not (can't find anything about it in the + man page), but it seems to be the way it works... + */ + + if (ps->oneshot) + oneshot = EV_DISPATCH; + + if (op == ERTS_POLL_OP_DEL) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + /* We could probably skip this delete, do we want to? */ + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE, (void *) 0); + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE, (void *) 0); + } else if (op == ERTS_POLL_OP_ADD) { + uint32_t flags; + erts_atomic_inc_nob(&ps->no_of_user_fds); + + flags = EV_ADD|oneshot; + flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); + + flags = EV_ADD|oneshot; + flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } else { + uint32_t flags; + ASSERT(op == ERTS_POLL_OP_MOD); + + flags = oneshot; + flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); + + flags = oneshot; + flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } +#else + uint32_t flags = EV_ADD; -#endif /* ERTS_POLL_USE_EPOLL */ + if (ps->oneshot) flags |= EV_ONESHOT; -#endif /* ERTS_POLL_USE_BATCH_UPDATE_POLLSET */ + if (op == ERTS_POLL_OP_DEL) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + /* We don't do anything when a delete is issued. The fds will be removed + when they are triggered, or when they are closed. */ + events = 0; + } else if (op == ERTS_POLL_OP_ADD) { + erts_atomic_inc_nob(&ps->no_of_user_fds); + } -#if ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK + if (events & ERTS_POLL_EV_IN) { + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); + } + if (events & ERTS_POLL_EV_OUT) { + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); + } -#if ERTS_POLL_USE_FALLBACK -static int update_fallback_pollset(ErtsPollSet ps, int fd) -#else -static int update_pollset(ErtsPollSet ps, int fd) #endif -{ -#ifdef ERTS_POLL_DEBUG_PRINT -#if ERTS_POLL_USE_FALLBACK - erts_printf("Doing fallback update on fd=%d\n", fd); + if (len) + do { + res = kevent(ps->kp_fd, evts, len, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); +#if ERTS_POLL_DEBUG_PRINT + { + int saved_errno = errno, i; + char keventb[255], *keventbp = keventb; + if (res < 0) + keventbp += sprintf(keventbp,"%s = ",erl_errno_id(saved_errno)); + else + keventbp += sprintf(keventbp,"%d = ",res); + keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); + for (i = 0; i < len; i++) { + const char *flags = "UNKNOWN"; + if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE"; + if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; + if (evts[i].flags == (EV_ADD)) flags = "EV_ADD"; +#ifdef EV_DISPATCH + if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; + if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; + if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; + if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; +#endif + + keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "", + evts[i].ident, + (evts[i].filter == EVFILT_READ + ? "EVFILT_READ" + : (evts[i].filter == EVFILT_WRITE + ? "EVFILT_WRITE" + : "UNKNOWN")), flags); + } + keventbp += sprintf(keventbp, "}, %d)", len); + DEBUG_PRINT_FD("%s", ps, fd, keventb); + errno = saved_errno; + } +#endif + if (res < 0) { + if (op != ERTS_POLL_OP_DEL) { +#ifdef EV_RECEIPT + struct kevent receipt_evts[2]; + len = 0; + ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE|EV_RECEIPT, (void *) 0); + ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE|EV_RECEIPT, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, len, receipt_evts, 2, &ts); + } while (res < 0 && errno == EINTR); #else - erts_printf("Doing update on fd=%d\n", fd); + ERTS_EV_SET(&evts[0], fd, EVFILT_WRITE, EV_DELETE, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); + ERTS_EV_SET(&evts[0], fd, EVFILT_READ, EV_DELETE, (void *) 0); + do { + res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts); + } while (res < 0 && errno == EINTR); +#endif + if (op == ERTS_POLL_OP_ADD) + erts_atomic_dec_nob(&ps->no_of_user_fds); + events = ERTS_POLL_EV_NVAL; + } else + events = 0; + } + return events; +} + +#endif /* ERTS_POLL_USE_KQUEUE */ + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + +static ERTS_INLINE void +init_batch_update(ErtsPollSet *ps, int len) +{ +#if ERTS_POLL_USE_DEVPOLL + ASSERT(ps->poll_fds == NULL); + ps->poll_fds = erts_alloc(ERTS_ALC_T_TMP, sizeof(struct pollfd) * len); + ps->poll_fds_ix = 0; #endif +} + +static ERTS_INLINE void +write_batch_update(ErtsPollSet *ps) +{ +#if ERTS_POLL_USE_DEVPOLL + ssize_t wres; + char *buf = (char *) ps->poll_fds; + size_t buf_size = sizeof(struct pollfd)*ps->poll_fds_ix; + + while (1) { + wres = write(ps->kp_fd, (void *) buf, buf_size); + if (wres < 0) { + if (errno == EINTR) + continue; + fatal_error("%s:%d:write_batch_buf(): " + "Failed to write to /dev/poll: " + "%s (%d)\n", + __FILE__, __LINE__, + erl_errno_id(errno), errno); + } +#if ERTS_POLL_DEBUG_PRINT + { + int saved_errno = errno, i; + char devpollb[2048], *devpollbp = devpollb; + devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd); + for (i = 0; i < wres / sizeof(struct pollfd); i++) { + if (devpollbp == devpollb) + devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd); + devpollbp += sprintf(devpollbp, "%s{fd = %d, events = %s}", + i > 0 ? ", " : "", + ps->poll_fds[i].fd, + ev2str(ps->poll_fds[i].events)); + if (devpollbp - devpollb > 512) { + devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix); + DEBUG_PRINT("%s", ps, devpollb); + devpollbp = devpollb; + } + } + devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix); + DEBUG_PRINT("%s", ps, devpollb); + errno = saved_errno; + } #endif - ASSERT(fd < ps->fds_status_len); -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].used_events - ? (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - : (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK)); + buf_size -= wres; + if (buf_size <= 0) + break; + buf += wres; + } + + if (buf_size < 0) { + fatal_error("%s:%d:write_devpoll_buf(): Internal error\n", + __FILE__, __LINE__); + } + erts_free(ERTS_ALC_T_TMP, ps->poll_fds); + ps->poll_fds = NULL; #endif +} - if (!need_update(ps, fd)) - return 0; +static ERTS_INLINE int +need_update(ErtsPollSet *ps, int fd, int *resetp) +{ + int reset; + ASSERT(fd < ps->fds_status_len); -#if ERTS_POLL_USE_FALLBACK + reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST); ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST; -#endif + + *resetp = reset; + + if (reset || ps->fds_status[fd].used_events != ps->fds_status[fd].events) + return 1; + + return 0; +} + +static int update_pollset(ErtsPollSet *ps, ErtsPollResFd pr[], int fd) +{ + int res = 0, reset = 0; + ErtsPollEvents events = ps->fds_status[fd].events; + ASSERT(fd < ps->fds_status_len); + + if (!need_update(ps, fd, &reset)) + return res; #if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ - if (!ps->fds_status[fd].events) { + if (!events) { int pix = ps->fds_status[fd].pix; int last_pix; + + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (poll)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } + if (pix < 0) { -#if ERTS_POLL_USE_FALLBACK - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); -#endif - return -1; + return res; } -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); -#endif - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); + erts_atomic_dec_nob(&ps->no_of_user_fds); last_pix = --ps->no_poll_fds; if (pix != last_pix) { /* Move last pix to this pix */ @@ -1422,127 +1146,153 @@ static int update_pollset(ErtsPollSet ps, int fd) /* Clear this fd status */ ps->fds_status[fd].pix = -1; ps->fds_status[fd].used_events = (ErtsPollEvents) 0; -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; -#endif + } else { int pix = ps->fds_status[fd].pix; if (pix < 0) { -#if ERTS_POLL_USE_FALLBACK - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) - || fd == ps->kp_fd); -#endif - erts_smp_atomic_inc_nob(&ps->no_of_user_fds); + erts_atomic_inc_nob(&ps->no_of_user_fds); ps->fds_status[fd].pix = pix = ps->no_poll_fds++; if (pix >= ps->poll_fds_len) grow_poll_fds(ps, pix); ps->poll_fds[pix].fd = fd; ps->fds_status[fd].pix = pix; -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; -#endif } -#if ERTS_POLL_USE_FALLBACK - ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK); -#endif - /* Events to be used in next poll */ - ps->poll_fds[pix].events = ev2pollev(ps->fds_status[fd].events); + ps->poll_fds[pix].events = ev2pollev(events); if (ps->poll_fds[pix].revents) { /* Remove result events that we should not poll for anymore */ ps->poll_fds[pix].revents &= ev2pollev(~(~ps->fds_status[fd].used_events - & ps->fds_status[fd].events)); + & events)); } /* Save events to be used in next poll */ - ps->fds_status[fd].used_events = ps->fds_status[fd].events; + ps->fds_status[fd].used_events = events; } - return 0; + return res; #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - { - ErtsPollEvents events = ps->fds_status[fd].events; + if (!events) { + + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (select)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } + + if (check_select_fds(fd, &ps->input_fds, &ps->output_fds)) { + ERTS_FD_CLR(fd, &ps->input_fds); + ERTS_FD_CLR(fd, &ps->output_fds); + } + + if (ps->fds_status[fd].used_events) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + ps->fds_status[fd].used_events = (ErtsPollEvents) 0; + } + + if (fd == ps->max_fd) { + int max = ps->max_fd; + for (max = ps->max_fd; max >= 0; max--) + if (ps->fds_status[max].used_events) + break; + ps->max_fd = max; + } + + } else { + ensure_select_fds(fd, &ps->input_fds, &ps->output_fds); - if ((ERTS_POLL_EV_IN & events) - != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) { - if (ERTS_POLL_EV_IN & events) { - ERTS_FD_SET(fd, &ps->input_fds); - } - else { - ERTS_FD_CLR(fd, &ps->input_fds); - } - } - if ((ERTS_POLL_EV_OUT & events) - != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) { - if (ERTS_POLL_EV_OUT & events) { - ERTS_FD_SET(fd, &ps->output_fds); - } - else { - ERTS_FD_CLR(fd, &ps->output_fds); - } - } - if (!ps->fds_status[fd].used_events) { - ASSERT(events); - erts_smp_atomic_inc_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds++; - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK; -#endif - } - else if (!events) { - ASSERT(ps->fds_status[fd].used_events); - erts_smp_atomic_dec_nob(&ps->no_of_user_fds); - ps->fds_status[fd].events = events; -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds--; - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK; -#endif - } + if (!ps->fds_status[fd].used_events) + erts_atomic_inc_nob(&ps->no_of_user_fds); + + if (events & ERTS_POLL_EV_IN) + ERTS_FD_SET(fd, &ps->input_fds); + else + ERTS_FD_CLR(fd, &ps->input_fds); + + if (events & ERTS_POLL_EV_OUT) + ERTS_FD_SET(fd, &ps->output_fds); + else + ERTS_FD_CLR(fd, &ps->output_fds); ps->fds_status[fd].used_events = events; - if (events && fd > ps->max_fd) - ps->max_fd = fd; - else if (!events && fd == ps->max_fd) { - int max = ps->max_fd; - for (max = ps->max_fd; max >= 0; max--) - if (ps->fds_status[max].used_events) - break; - ps->max_fd = max; - } + if (fd > ps->max_fd) + ps->max_fd = fd; } - return 0; -#endif -} -#endif /* ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK */ + return res; +#elif ERTS_POLL_USE_DEVPOLL -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE + if (!events) { -static void -handle_update_requests(ErtsPollSet ps) -{ - ErtsPollSetUpdateRequestsBlock *urqbp = &ps->update_requests; -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - ErtsPollBatchBuf bb; - setup_batch_buf(ps, &bb); + if (reset) { + /* When a fd has been reset, we tell the caller of erts_poll_wait + this by setting the fd as ERTS_POLL_EV_NONE */ + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE); + DEBUG_PRINT_FD("trig %s (devpoll)", ps, fd, ev2str(ERTS_POLL_EV_NONE)); + res++; + } + + ps->poll_fds[ps->poll_fds_ix].fd = fd; + ps->poll_fds[ps->poll_fds_ix].revents = 0; + ps->poll_fds[ps->poll_fds_ix++].events = POLLREMOVE; + + if (ps->fds_status[fd].used_events) { + erts_atomic_dec_nob(&ps->no_of_user_fds); + ps->fds_status[fd].used_events = 0; + } + + } else { + if (!ps->fds_status[fd].used_events) { + erts_atomic_inc_nob(&ps->no_of_user_fds); + } + ps->poll_fds[ps->poll_fds_ix].fd = fd; + ps->poll_fds[ps->poll_fds_ix].revents = 0; + ps->poll_fds[ps->poll_fds_ix++].events = ERTS_POLL_EV_E2N(events); + ps->fds_status[fd].used_events = ps->fds_status[fd].events; + } + + return res; #endif +} + +static int +handle_update_requests(ErtsPollSet *ps, ErtsPollResFd pr[], int no_fds) +{ + int res = 0; + ErtsPollSetUpdateRequestsBlock *urqbp = ps->curr_upd_req_block; while (urqbp) { ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; int i; int len = urqbp->len; + + init_batch_update(ps, len); + for (i = 0; i < len; i++) { int fd = urqbp->fds[i]; ASSERT(fd < ps->fds_status_len); - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ; -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - batch_update_pollset(ps, fd, &bb); -#else - update_pollset(ps, fd); -#endif + ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ); + + /* We have run out of PollResFd slots to put results in, + so we yield here and return later for more. */ + if (res == no_fds && pr != NULL) { + memmove(urqbp->fds, urqbp->fds+i, sizeof(int) * (len - i)); + urqbp->len -= i; + ps->curr_upd_req_block = urqbp; + write_batch_update(ps); + return res; + } + + if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ) { + ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ; + res += update_pollset(ps, pr + res, fd); + } } free_urqbp = urqbp; @@ -1550,12 +1300,9 @@ handle_update_requests(ErtsPollSet ps) free_update_requests_block(ps, free_urqbp); - } + write_batch_update(ps); -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - if (bb.len) - write_batch_buf(ps, &bb); -#endif + } ps->curr_upd_req_block = &ps->update_requests; @@ -1564,17 +1311,19 @@ handle_update_requests(ErtsPollSet ps) #endif ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(ps); + return res; } -#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ static ERTS_INLINE ErtsPollEvents -poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake) +poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, + ErtsPollEvents events, int *do_wake) { ErtsPollEvents new_events; if (fd < ps->internal_fd_limit || fd >= max_fds) { - if (fd < 0) { + if (fd < 0 || fd >= max_fds) { new_events = ERTS_POLL_EV_ERR; goto done; } @@ -1584,130 +1333,59 @@ poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake goto done; } #endif -#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } -#endif #if ERTS_POLL_USE_TIMERFD - if (fd == ps->timer_fd) { + if (fd == ps->timer_fd) { new_events = ERTS_POLL_EV_NVAL; - goto done; - } + goto done; + } #endif } +#if ERTS_POLL_USE_CONCURRENT_UPDATE + + new_events = update_pollset(ps, fd, op, events); + +#else /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ if (fd >= ps->fds_status_len) grow_fds_status(ps, fd); ASSERT(fd < ps->fds_status_len); - new_events = ps->fds_status[fd].events; - - if (events == 0) { - *do_wake = 0; - goto done; - } - - if (on) - new_events |= events; - else - new_events &= ~events; - - if (new_events == (ErtsPollEvents) 0) { -#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) - ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST; -#endif -#if ERTS_POLL_USE_FALLBACK - ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_USEFLBCK; -#endif - } - - ps->fds_status[fd].events = new_events; - - if (new_events == ps->fds_status[fd].used_events -#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP) - && !(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) -#endif - ) { - *do_wake = 0; - goto done; - } - -#if !ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE - if (update_pollset(ps, fd) != 0) - new_events = ERTS_POLL_EV_ERR; -#else /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ - -#if ERTS_POLL_USE_CONCURRENT_UPDATE - if (ERTS_POLLSET_IS_POLLED(ps)) { - int update_fallback = 0; - conc_update_pollset(ps, fd, &update_fallback); - if (!update_fallback) { - *do_wake = 0; /* no need to wake kernel poller */ - goto done; - } + if (op == ERTS_POLL_OP_DEL) { + ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST; + ps->fds_status[fd].events = 0; + *do_wake = 1; + } else if (op == ERTS_POLL_OP_ADD) { + ASSERT(ps->fds_status[fd].events == 0); + ps->fds_status[fd].events = events; + *do_wake = 1; + } else { + ASSERT(op == ERTS_POLL_OP_MOD); + ps->fds_status[fd].events = events; + *do_wake = 1; } -#endif + new_events = ps->fds_status[fd].events; enqueue_update_request(ps, fd); - -#ifdef ERTS_SMP - /* - * If new events have been added, we need to wake up the - * polling thread, but if events have been removed we don't. - */ - if ((new_events && (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) - || (~ps->fds_status[fd].used_events & new_events)) - *do_wake = 1; -#endif /* ERTS_SMP */ - -#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */ + +#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */ done: -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("0x%x = poll_control(ps, %d, 0x%x, %s) do_wake=%d\n", - (int) new_events, fd, (int) events, (on ? "on" : "off"), *do_wake); -#endif + DEBUG_PRINT_FD("%s = %s(%p, %d, %s, %s) do_wake=%d", + ps, fd, ev2str(new_events), __FUNCTION__, ps, + fd, op2str(op), ev2str(events), *do_wake); return new_events; } -void -ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps, - ErtsPollControlEntry pcev[], - int len) -{ - int i; - int do_wake; - int final_do_wake = 0; - - ERTS_POLLSET_LOCK(ps); - - for (i = 0; i < len; i++) { - do_wake = 0; - pcev[i].events = poll_control(ps, - pcev[i].fd, - pcev[i].events, - pcev[i].on, - &do_wake); - final_do_wake |= do_wake; - } - - ERTS_POLLSET_UNLOCK(ps); - -#ifdef ERTS_SMP - if (final_do_wake) - wake_poller(ps, 0, 0); -#endif /* ERTS_SMP */ - -} - ErtsPollEvents -ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ErtsSysFdType fd, + ErtsPollOp op, ErtsPollEvents events, - int on, int* do_wake) /* In: Wake up polling thread */ /* Out: Poller is woken */ { @@ -1715,15 +1393,12 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, ERTS_POLLSET_LOCK(ps); - res = poll_control(ps, fd, events, on, do_wake); + res = poll_control(ps, fd, op, events, do_wake); ERTS_POLLSET_UNLOCK(ps); -#ifdef ERTS_SMP - if (*do_wake) { - wake_poller(ps, 0, 0); - } -#endif /* ERTS_SMP */ + if (*do_wake) + wake_poller(ps, 0); return res; } @@ -1735,188 +1410,73 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps, #if ERTS_POLL_USE_KERNEL_POLL static ERTS_INLINE int -save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res) +ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { - int res = 0; - int i; - int n = chk_fds_res < max_res ? chk_fds_res : max_res; -#if ERTS_POLL_USE_WAKEUP_PIPE + int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; + int res = n; int wake_fd = ps->wake_fds[0]; -#endif -#if ERTS_POLL_USE_TIMERFD - int timer_fd = ps->timer_fd; -#endif - for (i = 0; i < n; i++) { + if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) { -#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ + for (i = 0; i < n; i++) { + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#if ERTS_POLL_DEBUG_PRINT + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); - if (ps->res_events[i].events) { - int fd = ps->res_events[i].data.fd; - int ix; - ErtsPollEvents revents; -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } -#endif -#if ERTS_POLL_USE_TIMERFD - if (fd == timer_fd) { - continue; - } -#endif - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - /* epoll_wait() can repeat the same fd in result array... */ - ix = (int) ps->fds_status[fd].res_ev_ix; - ASSERT(ix >= 0); - if (ix >= res || pr[ix].fd != fd) { - ix = res; - pr[ix].fd = fd; - pr[ix].events = (ErtsPollEvents) 0; - } - - revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); - pr[ix].events |= revents; - if (revents) { - if (res == ix) { - ps->fds_status[fd].res_ev_ix = (unsigned short) ix; - res++; - } - } - } - -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - - struct kevent *ev; - int fd; - int ix; - - ev = &ps->res_events[i]; - fd = (int) ev->ident; - ASSERT(fd < ps->fds_status_len); - ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)); - ix = (int) ps->fds_status[fd].res_ev_ix; - - ASSERT(ix >= 0); - if (ix >= res || pr[ix].fd != fd) { - ix = res; - pr[ix].fd = (int) ev->ident; - pr[ix].events = (ErtsPollEvents) 0; - } - - if (ev->filter == EVFILT_READ) { -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } -#endif - pr[ix].events |= ERTS_POLL_EV_IN; - } - else if (ev->filter == EVFILT_WRITE) - pr[ix].events |= ERTS_POLL_EV_OUT; - if (ev->flags & (EV_ERROR|EV_EOF)) { - if ((ev->flags & EV_ERROR) && (((int) ev->data) == EBADF)) - pr[ix].events |= ERTS_POLL_EV_NVAL; - else - pr[ix].events |= ERTS_POLL_EV_ERR; - } - if (pr[ix].events) { - if (res == ix) { - ps->fds_status[fd].res_ev_ix = (unsigned short) ix; - res++; - } - } - -#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ - - if (ps->res_events[i].revents) { - int fd = ps->res_events[i].fd; - ErtsPollEvents revents; -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - continue; - } -#endif + if (fd != wake_fd #if ERTS_POLL_USE_TIMERFD - if (fd == timer_fd) { - continue; - } -#endif - revents = ERTS_POLL_EV_N2E(ps->res_events[i].events); - pr[res].fd = fd; - pr[res].events = revents; - res++; - } - + && fd != ps->timer_fd #endif - - } - - return res; -} - -#endif /* ERTS_POLL_USE_KERNEL_POLL */ - -#if ERTS_POLL_USE_FALLBACK - -static int -get_kp_results(ErtsPollSet ps, ErtsPollResFd pr[], int max_res) -{ - int res; + ) + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - struct timespec ts = {0, 0}; + "kqueue" +#elif ERTS_POLL_USE_EPOLL + "epoll" +#else + "/dev/poll" #endif - - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); - - do { -#if ERTS_POLL_USE_EPOLL - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); -#elif ERTS_POLL_USE_KQUEUE - res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); + ); #endif - } while (res < 0 && errno == EINTR); - if (res < 0) { - fatal_error("%s:%d: %s() failed: %s (%d)\n", - __FILE__, __LINE__, -#if ERTS_POLL_USE_EPOLL - "epoll_wait", -#elif ERTS_POLL_USE_KQUEUE - "kevent", + if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } +#if ERTS_POLL_USE_TIMERFD + else if (fd == ps->timer_fd) { + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } #endif - erl_errno_id(errno), errno); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } +#endif + } } - return save_kp_result(ps, pr, max_res, res); + if (res == 0) + return res; + else + return n; } -#endif /* ERTS_POLL_USE_FALLBACK */ - - +#else /* !ERTS_POLL_USE_KERNEL_POLL */ static ERTS_INLINE int -save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, - int chk_fds_res, int ebadf) +ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if ERTS_POLL_USE_DEVPOLL - return save_kp_result(ps, pr, max_res, chk_fds_res); -#elif ERTS_POLL_USE_FALLBACK - if (!ps->fallback_used) - return save_kp_result(ps, pr, max_res, chk_fds_res); - else -#endif /* ERTS_POLL_USE_FALLBACK */ - { - #if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ int res = 0; -#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK int wake_fd = ps->wake_fds[0]; -#endif int i, first_ix, end_ix; /* @@ -1933,23 +1493,30 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (ps->poll_fds[i].revents != (short) 0) { int fd = ps->poll_fds[i].fd; ErtsPollEvents revents; -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, &pr[res], max_res-res); - i++; - continue; - } -#elif ERTS_POLL_USE_WAKEUP_PIPE if (fd == wake_fd) { cleanup_wakeup_pipe(ps); i++; continue; } -#endif revents = pollev2ev(ps->poll_fds[i].revents); - pr[res].fd = fd; - pr[res].events = revents; + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], revents); + + /* If an fd returns as error, we may want to check the + update_requests queue to see if it has been reset + before delivering the result?!?! This should allow + the user to do driver_dselect + close without waiting + for stop_select... */ + + DEBUG_PRINT_FD("trig %s (poll)", ps, ERTS_POLL_RES_GET_FD(&pr[res]), + ev2str(ERTS_POLL_RES_GET_EVTS(&pr[res]))); + res++; + + /* Clear the events for this fd in order to mimic + how epoll ONESHOT works */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); } i++; } @@ -1965,9 +1532,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ int res = 0; -#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK int wake_fd = ps->wake_fds[0]; -#endif int fd, first_fd, end_fd; /* @@ -1980,29 +1545,23 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (!ebadf) { while (1) { while (fd < end_fd && res < max_res) { - - pr[res].events = (ErtsPollEvents) 0; + ErtsPollEvents events = 0; if (ERTS_FD_ISSET(fd, &ps->res_input_fds)) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, &pr[res], max_res-res); - fd++; - continue; - } -#elif ERTS_POLL_USE_WAKEUP_PIPE if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].events |= ERTS_POLL_EV_IN; + events |= ERTS_POLL_EV_IN; } if (ERTS_FD_ISSET(fd, &ps->res_output_fds)) - pr[res].events |= ERTS_POLL_EV_OUT; - if (pr[res].events) { - pr[res].fd = fd; + events |= ERTS_POLL_EV_OUT; + if (events) { + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], events); res++; + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); } fd++; } @@ -2035,7 +1594,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) { oset = &ps->res_output_fds; ERTS_FD_ZERO(oset); - ERTS_FD_SET(fd, oset); + ERTS_FD_SET(fd, oset); } do { /* Initiate 'tv' each time; @@ -2044,49 +1603,31 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, sres = ERTS_SELECT(ps->max_fd+1, iset, oset, NULL, &tv); } while (sres < 0 && errno == EINTR); if (sres < 0) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, - &pr[res], - max_res-res); - fd++; - continue; - } -#elif ERTS_POLL_USE_WAKEUP_PIPE if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].fd = fd; - pr[res].events = ERTS_POLL_EV_NVAL; + ERTS_POLL_RES_SET_FD(&pr[res], fd); + ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NVAL); res++; } else if (sres > 0) { - pr[res].fd = fd; + ErtsPollEvents events = 0; + ERTS_POLL_RES_SET_FD(&pr[res], fd); if (iset && ERTS_FD_ISSET(fd, iset)) { -#if ERTS_POLL_USE_FALLBACK - if (fd == ps->kp_fd) { - res += get_kp_results(ps, - &pr[res], - max_res-res); - fd++; - continue; - } -#elif ERTS_POLL_USE_WAKEUP_PIPE if (fd == wake_fd) { cleanup_wakeup_pipe(ps); fd++; continue; } -#endif - pr[res].events |= ERTS_POLL_EV_IN; + events |= ERTS_POLL_EV_IN; } if (oset && ERTS_FD_ISSET(fd, oset)) { - pr[res].events |= ERTS_POLL_EV_OUT; + events |= ERTS_POLL_EV_OUT; } - ASSERT(pr[res].events); + ASSERT(events); + ERTS_POLL_RES_SET_EVTS(&pr[res], events); res++; } } @@ -2101,68 +1642,64 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, } ps->next_sel_fd = fd; return res; -#endif - } +#endif /* ERTS_POLL_USE_SELECT */ } +#endif /* !ERTS_POLL_USE_KERNEL_POLL */ + static ERTS_INLINE ErtsMonotonicTime -get_timeout(ErtsPollSet ps, +get_timeout(ErtsPollSet *ps, int resolution, ErtsMonotonicTime timeout_time) { - ErtsMonotonicTime timeout, save_timeout_time; + ErtsMonotonicTime timeout; if (timeout_time == ERTS_POLL_NO_TIMEOUT) { - save_timeout_time = ERTS_MONOTONIC_TIME_MIN; timeout = 0; } + else if (timeout_time == ERTS_POLL_INF_TIMEOUT) { + timeout = -1; + } else { ErtsMonotonicTime diff_time, current_time; current_time = erts_get_monotonic_time(NULL); diff_time = timeout_time - current_time; if (diff_time <= 0) { - save_timeout_time = ERTS_MONOTONIC_TIME_MIN; timeout = 0; } else { - save_timeout_time = current_time; switch (resolution) { case 1000: /* Round up to nearest even milli second */ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; if (timeout > (ErtsMonotonicTime) INT_MAX) timeout = (ErtsMonotonicTime) INT_MAX; - save_timeout_time += ERTS_MSEC_TO_MONOTONIC(timeout); timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); break; case 1000000: /* Round up to nearest even micro second */ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; - save_timeout_time += ERTS_USEC_TO_MONOTONIC(timeout); timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); break; case 1000000000: /* Round up to nearest even nano second */ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; - save_timeout_time += ERTS_NSEC_TO_MONOTONIC(timeout); timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); break; default: ERTS_INTERNAL_ERROR("Invalid resolution"); timeout = 0; - save_timeout_time = 0; break; } } } - set_timeout_time(ps, save_timeout_time); return timeout; } #if ERTS_POLL_USE_SELECT static ERTS_INLINE int -get_timeout_timeval(ErtsPollSet ps, +get_timeout_timeval(ErtsPollSet *ps, SysTimeval *tvp, ErtsMonotonicTime timeout_time) { @@ -2176,6 +1713,9 @@ get_timeout_timeval(ErtsPollSet ps, return 0; } + else if (timeout == -1) { + return -1; + } else { ErtsMonotonicTime sec = timeout/(1000*1000); tvp->tv_sec = sec; @@ -2185,7 +1725,7 @@ get_timeout_timeval(ErtsPollSet ps, ASSERT(tvp->tv_usec >= 0); ASSERT(tvp->tv_usec < 1000*1000); - return !0; + return 1; } } @@ -2195,7 +1735,7 @@ get_timeout_timeval(ErtsPollSet ps, #if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD static ERTS_INLINE int -get_timeout_timespec(ErtsPollSet ps, +get_timeout_timespec(ErtsPollSet *ps, struct timespec *tsp, ErtsMonotonicTime timeout_time) { @@ -2208,6 +1748,9 @@ get_timeout_timespec(ErtsPollSet ps, tsp->tv_nsec = 0; return 0; } + else if (timeout == -1) { + return -1; + } else { ErtsMonotonicTime sec = timeout/(1000*1000*1000); tsp->tv_sec = sec; @@ -2217,7 +1760,7 @@ get_timeout_timespec(ErtsPollSet ps, ASSERT(tsp->tv_nsec >= 0); ASSERT(tsp->tv_nsec < 1000*1000*1000); - return !0; + return 1; } } @@ -2226,7 +1769,7 @@ get_timeout_timespec(ErtsPollSet ps, #if ERTS_POLL_USE_TIMERFD static ERTS_INLINE int -get_timeout_itimerspec(ErtsPollSet ps, +get_timeout_itimerspec(ErtsPollSet *ps, struct itimerspec *itsp, ErtsMonotonicTime timeout_time) { @@ -2236,242 +1779,169 @@ get_timeout_itimerspec(ErtsPollSet ps, return get_timeout_timespec(ps, &itsp->it_value, timeout_time); } - + #endif static ERTS_INLINE int -check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res) +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time) { int res; - ERTS_MSACC_PUSH_STATE_M(); - if (erts_smp_atomic_read_nob(&ps->no_of_user_fds) == 0 - && timeout_time == ERTS_POLL_NO_TIMEOUT) { - /* Nothing to poll and zero timeout; done... */ - return 0; - } - else { - int timeout; -#if ERTS_POLL_USE_FALLBACK - if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) { - -#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); + int timeout; + DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time); + { +#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ #if ERTS_POLL_USE_TIMERFD - { - struct itimerspec its; - timeout = get_timeout_itimerspec(ps, &its, timeout_time); - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - timerfd_set(ps, &its); - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, -1); - res = timerfd_clear(ps, res, max_res); - } else { - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0); - } - } + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout > 0) { + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, pr, max_res, -1); + res = timerfd_clear(ps, pr, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + } #else /* !ERTS_POLL_USE_TIMERFD */ - timeout = (int) get_timeout(ps, 1000, timeout_time); - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = epoll_wait(ps->kp_fd, ps->res_events, max_res, timeout); + timeout = (int) get_timeout(ps, 1000, timeout_time); + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); #endif /* !ERTS_POLL_USE_TIMERFD */ -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts; - if (max_res > ps->res_events_len) - grow_res_events(ps, max_res); - timeout = get_timeout_timespec(ps, &ts, timeout_time); - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts); -#endif /* ----------------------------------------- */ - } - else /* use fallback (i.e. poll() or select()) */ -#endif /* ERTS_POLL_USE_FALLBACK */ - { - -#if ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ - /* - * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds - * is set too high. dp_nfds should not be set greater than - * the maximum number of file descriptors in the poll set. - */ - struct dvpoll poll_res; - int nfds = (int) erts_smp_atomic_read_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_WAKEUP_PIPE - nfds++; /* Wakeup pipe */ -#endif - timeout = (int) get_timeout(ps, 1000, timeout_time); - poll_res.dp_nfds = nfds < max_res ? nfds : max_res; - if (poll_res.dp_nfds > ps->res_events_len) - grow_res_events(ps, poll_res.dp_nfds); - poll_res.dp_fds = ps->res_events; - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - poll_res.dp_timeout = timeout; - res = ioctl(ps->kp_fd, DP_POLL, &poll_res); +#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ + struct timespec ts; + struct timespec *tsp; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + tsp = timeout < 0 ? NULL : &ts; + res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); +#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ + /* + * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds + * is set too high. dp_nfds should not be set greater than + * the maximum number of file descriptors in the poll set. + */ + struct dvpoll poll_res; + int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; + poll_res.dp_nfds = nfds < max_res ? nfds : max_res; + poll_res.dp_fds = pr; + poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time); + res = ioctl(ps->kp_fd, DP_POLL, &poll_res); #elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ - struct timespec ts; - timeout = get_timeout_timespec(ps, &ts, timeout_time); - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = ppoll(ps->poll_fds, ps->no_poll_fds, &ts, NULL); + struct timespec ts; + struct timespec *tsp = &ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + if (timeout < 0) tsp = NULL; + res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - timeout = (int) get_timeout(ps, 1000, timeout_time); - - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = poll(ps->poll_fds, ps->no_poll_fds, timeout); + timeout = (int) get_timeout(ps, 1000, timeout_time); + res = poll(ps->poll_fds, ps->no_poll_fds, timeout); #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval to; - timeout = get_timeout_timeval(ps, &to, timeout_time); - - ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); - ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); - - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - } - res = ERTS_SELECT(ps->max_fd + 1, - &ps->res_input_fds, - &ps->res_output_fds, - NULL, - &to); -#ifdef ERTS_SMP - if (timeout) { - erts_thr_progress_finalize_wait(NULL); - ERTS_MSACC_POP_STATE_M(); - } - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. - * - * We don't know how much of the timeout is left; therfore, - * we use a zero timeout. If no error occur and no events - * have triggered, we fake an EAGAIN error and let the caller - * restart us. - */ - to.tv_sec = 0; - to.tv_usec = 0; - ERTS_POLLSET_LOCK(ps); - handle_update_requests(ps); - ERTS_POLLSET_UNLOCK(ps); - res = ERTS_SELECT(ps->max_fd + 1, - &ps->res_input_fds, - &ps->res_output_fds, - NULL, - &to); - if (res == 0) { - errno = EAGAIN; - res = -1; - } - } -#endif /* ERTS_SMP */ - return res; + SysTimeval tv; + SysTimeval *tvp; + timeout = get_timeout_timeval(ps, &tv, timeout_time); + tvp = timeout < 0 ? NULL : &tv; + + ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); + ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); + + res = ERTS_SELECT(ps->max_fd + 1, + &ps->res_input_fds, + &ps->res_output_fds, + NULL, + tvp); #endif /* ----------------------------------------- */ - } - if (timeout) { -#ifdef ERTS_SMP - erts_thr_progress_finalize_wait(NULL); -#endif - ERTS_MSACC_POP_STATE_M(); - } - return res; } + DEBUG_PRINT_WAIT("Leaving check_fd_events(), res=%d", ps, res); + return res; } int -ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], int *len, - ErtsMonotonicTime timeout_time) + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time) { - ErtsMonotonicTime to; - int res, no_fds; + int res, no_fds, used_fds = 0; int ebadf = 0; -#ifdef ERTS_SMP + int do_wait; int ps_locked = 0; -#endif + ERTS_MSACC_DECLARE_CACHE(); no_fds = *len; -#ifdef ERTS_POLL_MAX_RES - if (no_fds >= ERTS_POLL_MAX_RES) - no_fds = ERTS_POLL_MAX_RES; -#endif - *len = 0; + ASSERT(no_fds > 0); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Entering erts_poll_wait(), timeout_time=%bps\n", - timeout_time); -#endif - - if (ERTS_POLLSET_SET_POLLED_CHK(ps)) { - res = EINVAL; /* Another thread is in erts_poll_wait() - on this pollset... */ - goto done; - } - - to = (is_woken(ps) - ? ERTS_POLL_NO_TIMEOUT /* Use zero timeout */ - : timeout_time); - -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { ERTS_POLLSET_LOCK(ps); - handle_update_requests(ps); + used_fds = handle_update_requests(ps, pr, no_fds); ERTS_POLLSET_UNLOCK(ps); + + if (used_fds == no_fds) { + *len = used_fds; + return 0; + } } #endif + do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT; + + DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); + + if (do_wait) { + tpd = tpd ? tpd : erts_thr_prgr_data(NULL); + erts_thr_progress_prepare_wait(tpd); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + } else + timeout_time = ERTS_POLL_NO_TIMEOUT; + while (1) { - res = check_fd_events(ps, to, no_fds); - if (res != 0) - break; - if (to == ERTS_POLL_NO_TIMEOUT) - break; - if (erts_get_monotonic_time(NULL) >= timeout_time) + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time); + if (res != 0) + break; + if (timeout_time == ERTS_POLL_NO_TIMEOUT) + break; + if (erts_get_monotonic_time(NULL) >= timeout_time) break; } - woke_up(ps); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; + ERTS_POLLSET_UNLOCK(ps); + return 0; + } + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; + } + } +#endif + + if (do_wait) { + erts_thr_progress_finalize_wait(tpd); + ERTS_MSACC_UPDATE_CACHE(); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); + } + + if (ERTS_POLL_USE_WAKEUP(ps)) + woke_up(ps); - if (res == 0) { - res = ETIMEDOUT; - } - else if (res < 0) { + if (res < 0) { #if ERTS_POLL_USE_SELECT if (errno == EBADF) { ebadf = 1; @@ -2480,38 +1950,34 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, #endif res = errno; } - else { + else if (res == 0) { + res = used_fds == 0 ? ETIMEDOUT : 0; +#ifdef HARD_DEBUG + check_poll_result(pr, used_fds); +#endif + *len = used_fds; + } else { #if ERTS_POLL_USE_SELECT save_results: #endif - -#ifdef ERTS_SMP ps_locked = 1; ERTS_POLLSET_LOCK(ps); -#endif - no_fds = save_poll_result(ps, pr, no_fds, res, ebadf); + used_fds += ERTS_POLL_EXPORT(save_result)(ps, pr + used_fds, no_fds - used_fds, res, ebadf); #ifdef HARD_DEBUG - check_poll_result(pr, no_fds); + check_poll_result(pr, used_fds); #endif - res = (no_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0); - *len = no_fds; + res = (used_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0); + *len = used_fds; } -#ifdef ERTS_SMP if (ps_locked) ERTS_POLLSET_UNLOCK(ps); - ERTS_POLLSET_UNSET_POLLED(ps); -#endif - done: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX); -#ifdef ERTS_POLL_DEBUG_PRINT - erts_printf("Leaving %s = erts_poll_wait()\n", - res == 0 ? "0" : erl_errno_id(res)); -#endif + DEBUG_PRINT_WAIT("Leaving %s = %s(len = %d)", ps, + res == 0 ? "0" : erl_errno_id(res), __FUNCTION__, *len); return res; } @@ -2521,55 +1987,15 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, */ void -ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set) -{ -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT - if (!set) - reset_wakeup_state(ps); - else - wake_poller(ps, 1, 0); -#endif -} - -#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT -void -ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet ps) +ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { - /* - * NOTE: This function is called from signal handlers, it, - * therefore, it has to be async-signal safe. - */ - wake_poller(ps, 1, 1); -} -#endif - -/* - * erts_poll_interrupt_timed(): - * If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it - * is not guaranteed that it will timeout before 'msec' milli seconds. - */ -void -ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps, - int set, - ErtsMonotonicTime timeout_time) -{ -#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP) - if (!set) - reset_wakeup_state(ps); - else { - ErtsMonotonicTime max_wait_time = get_timeout_time(ps); - if (max_wait_time > timeout_time) - wake_poller(ps, 1, 0); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - else { - if (ERTS_POLLSET_IS_POLLED(ps)) - erts_smp_atomic_inc_nob(&ps->no_avoided_wakeups); - erts_smp_atomic_inc_nob(&ps->no_avoided_interrupts); - } - erts_smp_atomic_inc_nob(&ps->no_interrupt_timed); -#endif + DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set); + if (ERTS_POLL_USE_WAKEUP(ps)) { + if (!set) + reset_wakeup_state(ps); + else + wake_poller(ps, 1); } -#endif } int @@ -2582,13 +2008,19 @@ ERTS_POLL_EXPORT(erts_poll_max_fds)(void) */ void -ERTS_POLL_EXPORT(erts_poll_init)(void) +ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_updates) { - erts_smp_spinlock_init(&pollsets_lock, "pollsets_lock"); - pollsets = NULL; errno = 0; + if (concurrent_updates) { +#if ERTS_POLL_USE_CONCURRENT_UPDATE + *concurrent_updates = 1; +#else + *concurrent_updates = 0; +#endif + } + #if !defined(NO_SYSCONF) max_fds = sysconf(_SC_OPEN_MAX); #elif ERTS_POLL_USE_SELECT @@ -2607,37 +2039,28 @@ ERTS_POLL_EXPORT(erts_poll_init)(void) fatal_error("erts_poll_init(): Failed to get max number of files: %s\n", erl_errno_id(errno)); -#ifdef ERTS_POLL_DEBUG_PRINT print_misc_debug_info(); -#endif } -ErtsPollSet -ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) +ErtsPollSet * +ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; #endif - ErtsPollSet ps = erts_alloc(ERTS_ALC_T_POLLSET, - sizeof(struct ErtsPollSet_)); + ErtsPollSet *ps = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(struct ERTS_POLL_EXPORT(erts_pollset))); + ps->id = id; ps->internal_fd_limit = 0; - ps->fds_status = NULL; - ps->fds_status_len = 0; - erts_smp_atomic_init_nob(&ps->no_of_user_fds, 0); + erts_atomic_init_nob(&ps->no_of_user_fds, 0); #if ERTS_POLL_USE_KERNEL_POLL ps->kp_fd = -1; #if ERTS_POLL_USE_EPOLL kp_fd = epoll_create(256); - ps->res_events_len = 0; - ps->res_events = NULL; #elif ERTS_POLL_USE_DEVPOLL kp_fd = open("/dev/poll", O_RDWR); - ps->res_events_len = 0; - ps->res_events = NULL; #elif ERTS_POLL_USE_KQUEUE kp_fd = kqueue(); - ps->res_events_len = 0; - ps->res_events = NULL; #endif if (kp_fd < 0) fatal_error("erts_poll_create_pollset(): Failed to " @@ -2651,10 +2074,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) ": %s (%d)\n", erl_errno_id(errno), errno); #endif /* ERTS_POLL_USE_KERNEL_POLL */ -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - /* res_events is also used as write buffer */ - grow_res_events(ps, ERTS_POLL_MIN_BATCH_BUF_SIZE); -#endif #if ERTS_POLL_USE_POLL ps->next_poll_fds_ix = 0; ps->no_poll_fds = 0; @@ -2663,9 +2082,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) #elif ERTS_POLL_USE_SELECT ps->next_sel_fd = 0; ps->max_fd = -1; -#if ERTS_POLL_USE_FALLBACK - ps->no_select_fds = 0; -#endif #ifdef _DARWIN_UNLIMITED_SELECT ps->input_fds.sz = 0; ps->input_fds.ptr = NULL; @@ -2682,133 +2098,76 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) ERTS_FD_ZERO(&ps->res_output_fds); #endif #endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + ps->fds_status = NULL; + ps->fds_status_len = 0; ps->update_requests.next = NULL; ps->update_requests.len = 0; ps->curr_upd_req_block = &ps->update_requests; - erts_smp_atomic32_init_nob(&ps->have_update_requests, 0); -#endif -#ifdef ERTS_SMP - erts_atomic32_init_nob(&ps->polled, 0); - erts_smp_mtx_init(&ps->mtx, "pollset"); -#endif -#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT - erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); -#endif -#if ERTS_POLL_USE_WAKEUP_PIPE - create_wakeup_pipe(ps); -#endif -#if ERTS_POLL_USE_TIMERFD - create_timerfd(ps); -#endif -#if ERTS_POLL_USE_FALLBACK - if (kp_fd >= ps->fds_status_len) - grow_fds_status(ps, kp_fd); - /* Force kernel poll fd into fallback (poll/select) set */ - ps->fds_status[kp_fd].flags - |= ERTS_POLL_FD_FLG_INFLBCK|ERTS_POLL_FD_FLG_USEFLBCK; - { - int do_wake = 0; - ERTS_POLL_EXPORT(erts_poll_control)(ps, kp_fd, ERTS_POLL_EV_IN, 1, - &do_wake); - } + erts_atomic32_init_nob(&ps->have_update_requests, 0); + erts_mtx_init(&ps->mtx, "pollset", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO); #endif #if ERTS_POLL_USE_KERNEL_POLL if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; + if (ps->id == -1) + ps->oneshot = 0; + else + ps->oneshot = 1; #endif - init_timeout_time(ps); -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - erts_smp_atomic_init_nob(&ps->no_avoided_wakeups, 0); - erts_smp_atomic_init_nob(&ps->no_avoided_interrupts, 0); - erts_smp_atomic_init_nob(&ps->no_interrupt_timed, 0); -#endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE - handle_update_requests(ps); -#endif -#if ERTS_POLL_USE_FALLBACK - ps->fallback_used = 0; -#endif - erts_smp_atomic_set_nob(&ps->no_of_user_fds, 0); /* Don't count wakeup pipe and fallback fd */ - - erts_smp_spin_lock(&pollsets_lock); - ps->next = pollsets; - pollsets = ps; - erts_smp_spin_unlock(&pollsets_lock); - - return ps; -} -void -ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) -{ - - if (ps->fds_status) - erts_free(ERTS_ALC_T_FD_STATUS, (void *) ps->fds_status); + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); + create_wakeup_pipe(ps); -#if ERTS_POLL_USE_EPOLL - if (ps->kp_fd >= 0) - close(ps->kp_fd); - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); -#elif ERTS_POLL_USE_DEVPOLL - if (ps->kp_fd >= 0) - close(ps->kp_fd); - if (ps->res_events) - erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events); -#elif ERTS_POLL_USE_POLL - if (ps->poll_fds) - erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds); -#elif ERTS_POLL_USE_SELECT -#ifdef _DARWIN_UNLIMITED_SELECT - if (ps->input_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->input_fds.ptr); - if (ps->res_input_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_input_fds.ptr); - if (ps->output_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->output_fds.ptr); - if (ps->res_output_fds.ptr) - erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_output_fds.ptr); -#endif -#endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE - { - ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; - while (urqbp) { - ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp; - urqbp = urqbp->next; - free_update_requests_block(ps, free_urqbp); - } - } -#endif -#ifdef ERTS_SMP - erts_smp_mtx_destroy(&ps->mtx); -#endif -#if ERTS_POLL_USE_WAKEUP_PIPE - if (ps->wake_fds[0] >= 0) - close(ps->wake_fds[0]); - if (ps->wake_fds[1] >= 0) - close(ps->wake_fds[1]); -#endif #if ERTS_POLL_USE_TIMERFD - if (ps->timer_fd >= 0) - close(ps->timer_fd); + create_timerfd(ps); #endif - erts_smp_spin_lock(&pollsets_lock); - if (ps == pollsets) - pollsets = pollsets->next; - else { - ErtsPollSet prev_ps; - for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next) - ; - ASSERT(ps == prev_ps->next); - prev_ps->next = ps->next; - } - erts_smp_spin_unlock(&pollsets_lock); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + handle_update_requests(ps, NULL, 0); + cleanup_wakeup_pipe(ps); +#endif +#if ERTS_POLL_USE_KERNEL_POLL && (defined(__DARWIN__) || defined(__APPLE__) && defined(__MACH__)) + { + /* + * Using kqueue on OS X is a mess of brokenness... + * + * On OS X version older than 15.6 (i.e. OS X El Capitan released in July 2015), + * a thread waiting in kevent is not woken if an event is inserted into the kqueue + * by another thread and the event becomes ready. However if a new call to kevent + * is done by the waiting thread, the new event is found. + * + * So on effected OS X versions we could trigger the wakeup pipe so that + * the waiters will be woken and re-issue the kevent. However... + * + * On OS X version older then 16 (i.e. OS X Sierra released in September 2016), + * running the emulator driver_SUITE smp_select testcase consistently causes a + * kernel panic. I don't know why or what events that trigger it. But it seems + * like updates of the pollset while another thread is sleeping in it Creates + * some kind of race that triggers the kernel panic. + * + * So to deal with this, the erts configure check what OS X version is run + * and only enabled kernel poll on OS X 16 or newer. In addition, if someone + * attempts to compile Erlang on OS X 16 and then run it on OS X 15, we do the + * run-time check below to disallow this. + */ + int major, minor, build; + os_version(&major,&minor,&build); + if (major < 16) { + erts_fprintf(stderr,"BROKEN KQUEUE!\n" + "Erlang has been compiled with kernel-poll support,\n" + "but this OS X version is known to have kernel bugs\n" + "when using kernel-poll. You have two options:\n" + " 1) update to a newer OS X version (OS X Sierra or newer)\n" + " 2) recompile erlang without kernel-poll support\n"); + erts_exit(1, ""); + } + } +#endif + erts_atomic_set_nob(&ps->no_of_user_fds, 0); /* Don't count wakeup pipe and fallback fd */ - erts_free(ERTS_ALC_T_POLLSET, (void *) ps); + return ps; } /* @@ -2816,24 +2175,18 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) */ void -ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) +ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) { -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE int pending_updates; #endif Uint size = 0; ERTS_POLLSET_LOCK(ps); - size += sizeof(struct ErtsPollSet_); + size += sizeof(struct ERTS_POLL_EXPORT(erts_pollset)); +#if !ERTS_POLL_USE_CONCURRENT_UPDATE size += ps->fds_status_len*sizeof(ErtsFdStatus); - -#if ERTS_POLL_USE_EPOLL - size += ps->res_events_len*sizeof(struct epoll_event); -#elif ERTS_POLL_USE_DEVPOLL - size += ps->res_events_len*sizeof(struct pollfd); -#elif ERTS_POLL_USE_KQUEUE - size += ps->res_events_len*sizeof(struct kevent); #endif #if ERTS_POLL_USE_POLL @@ -2845,7 +2198,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif #endif -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE { ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next; pending_updates = ps->update_requests.len; @@ -2857,7 +2210,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) } #endif - pip->primary = + pip->primary = #if ERTS_POLL_USE_KQUEUE "kqueue" #elif ERTS_POLL_USE_EPOLL @@ -2871,17 +2224,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif ; - pip->fallback = -#if !ERTS_POLL_USE_FALLBACK - NULL -#elif ERTS_POLL_USE_POLL - "poll" -#elif ERTS_POLL_USE_SELECT - "select" -#endif - ; - - pip->kernel_poll = + pip->kernel_poll = #if !ERTS_POLL_USE_KERNEL_POLL NULL #elif ERTS_POLL_USE_KQUEUE @@ -2895,34 +2238,11 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) pip->memory_size = size; - pip->poll_set_size = (int) erts_smp_atomic_read_nob(&ps->no_of_user_fds); -#if ERTS_POLL_USE_WAKEUP_PIPE + pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); pip->poll_set_size++; /* Wakeup pipe */ -#endif -#if ERTS_POLL_USE_TIMERFD - pip->poll_set_size++; /* timerfd */ -#endif - - pip->fallback_poll_set_size = -#if !ERTS_POLL_USE_FALLBACK - 0 -#elif ERTS_POLL_USE_POLL - ps->no_poll_fds -#elif ERTS_POLL_USE_SELECT - ps->no_select_fds -#endif - ; - -#if ERTS_POLL_USE_FALLBACK - /* If only kp_fd is in fallback poll set we don't use fallback... */ - if (pip->fallback_poll_set_size == 1) - pip->fallback_poll_set_size = 0; - else - pip->poll_set_size++; /* kp_fd */ -#endif pip->lazy_updates = -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE 1 #else 0 @@ -2930,21 +2250,13 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) ; pip->pending_updates = -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE +#if !ERTS_POLL_USE_CONCURRENT_UPDATE pending_updates #else 0 #endif ; - pip->batch_updates = -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET - 1 -#else - 0 -#endif - ; - pip->concurrent_updates = #if ERTS_POLL_USE_CONCURRENT_UPDATE 1 @@ -2953,13 +2265,23 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #endif ; - pip->max_fds = max_fds; + pip->is_fallback = +#if ERTS_POLL_IS_FALLBACK + 1 +#else + 0 +#endif + ; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - pip->no_avoided_wakeups = erts_smp_atomic_read_nob(&ps->no_avoided_wakeups); - pip->no_avoided_interrupts = erts_smp_atomic_read_nob(&ps->no_avoided_interrupts); - pip->no_interrupt_timed = erts_smp_atomic_read_nob(&ps->no_interrupt_timed); + pip->batch_updates = +#if ERTS_POLL_USE_DEVPOLL + 1 +#else + 0 #endif + ; + + pip->max_fds = max_fds; ERTS_POLLSET_UNLOCK(ps); @@ -2995,35 +2317,61 @@ fatal_error(char *format, ...) abort(); } -static void -fatal_error_async_signal_safe(char *error_str) +/* + * --- Debug ----------------------------------------------------------------- + */ + +#if ERTS_POLL_USE_EPOLL +uint32_t epoll_events(int kp_fd, int fd) { - if (ERTS_SOMEONE_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) { - /* See comment above in fatal_error() */ - return; + /* For epoll we read the information about what is selected upon from the proc fs.*/ + char fname[30]; + FILE *f; + unsigned int pos, flags, mnt_id; + int line = 0; + sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), kp_fd); + f = fopen(fname,"r"); + if (!f) { + fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno); + ASSERT(0); + return 0; } - if (error_str) { - int len = 0; - while (error_str[len]) - len++; - if (len) { - /* async signal safe */ - erts_silence_warn_unused_result(write(2, error_str, len)); - } + if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) { + fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno); + ASSERT(0); + return 0; } - abort(); + if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); + line += 3; + while (!feof(f)) { + /* tfd: 10 events: 40000019 data: 180000000a */ + int ev_fd; + uint32_t events; + uint64_t data; + if (fscanf(f,"tfd:%d events:%x data:%llx\n", &ev_fd, &events, + (unsigned long long*)&data) != 3) { + fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", fname, + line, + errno); + return 0; + } + if (fd == ev_fd) { + fclose(f); + return events; + } + } + fclose(f); + return 0; } - -/* - * --- Debug ----------------------------------------------------------------- - */ +#endif void -ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, +ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ErtsPollEvents ev[], int len) { int fd; +#if !ERTS_POLL_USE_CONCURRENT_UPDATE ERTS_POLLSET_LOCK(ps); for (fd = 0; fd < len; fd++) { if (fd >= ps->fds_status_len) @@ -3031,12 +2379,7 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, else { ev[fd] = ps->fds_status[fd].events; if ( -#if ERTS_POLL_USE_WAKEUP_PIPE fd == ps->wake_fds[0] || fd == ps->wake_fds[1] || -#endif -#if ERTS_POLL_USE_TIMERFD - fd == ps->timer_fd || -#endif #if ERTS_POLL_USE_KERNEL_POLL fd == ps->kp_fd || #endif @@ -3045,7 +2388,57 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps, } } ERTS_POLLSET_UNLOCK(ps); +#elif ERTS_POLL_USE_EPOLL + /* For epoll we read the information about what is selected upon from the proc fs.*/ + char fname[30]; + FILE *f; + unsigned int pos, flags, mnt_id; + int line = 0; + sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), ps->kp_fd); + for (fd = 0; fd < len; fd++) + ev[fd] = ERTS_POLL_EV_NONE; + f = fopen(fname,"r"); + if (!f) { + fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno); + return; + } + if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) { + fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno); + ASSERT(0); + return; + } + if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); + line += 3; + while (!feof(f)) { + /* tfd: 10 events: 40000019 data: 180000000a */ + int fd; + uint32_t events; + uint64_t data; + if (fscanf(f,"tfd:%d events:%x data:%llx\n", &fd, &events, + (unsigned long long*)&data) != 3) { + fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", + fname, line, errno); + ASSERT(0); + return; + } + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + continue; +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) + continue; +#endif + data &= 0xFFFFFFFF; + ASSERT(fd == data); + /* Events are the events that are being monitored, which of course include + error and hup events, but we are only interested in IN/OUT events */ + ev[fd] = (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) & ERTS_POLL_EV_N2E(events); + line++; + } +#else + for (fd = 0; fd < len; fd++) + ev[fd] = ERTS_POLL_EV_NONE; +#endif } #ifdef HARD_DEBUG @@ -3065,10 +2458,10 @@ check_poll_result(ErtsPollResFd pr[], int len) } -#if ERTS_POLL_USE_DEVPOLL +#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG) static void -check_poll_status(ErtsPollSet ps) +check_poll_status(ErtsPollSet *ps) { int i; for (i = 0; i < ps->fds_status_len; i++) { @@ -3100,34 +2493,24 @@ check_poll_status(ErtsPollSet ps) #endif /* ERTS_POLL_USE_DEVPOLL */ #endif /* HARD_DEBUG */ -#ifdef ERTS_POLL_DEBUG_PRINT static void print_misc_debug_info(void) { - erts_printf("erts_poll using: %s lazy_updates:%s batch_updates:%s\n", +#if ERTS_POLL_DEBUG_PRINT + erts_printf("erts_poll using: %s lazy_updates:%s\n", #if ERTS_POLL_USE_KQUEUE "kqueue" #elif ERTS_POLL_USE_EPOLL "epoll" #elif ERTS_POLL_USE_DEVPOLL "/dev/poll" -#endif -#if ERTS_POLL_USE_FALLBACK - "-" -#endif -#if ERTS_POLL_USE_POLL +#elif ERTS_POLL_USE_POLL "poll" #elif ERTS_POLL_USE_SELECT "select" #endif , -#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE - "true" -#else - "false" -#endif - , -#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET +#if !ERTS_POLL_USE_CONCURRENT_UPDATE "true" #else "false" @@ -3146,6 +2529,20 @@ print_misc_debug_info(void) #ifdef FD_SETSIZE erts_printf("FD_SETSIZE=%d\n", FD_SETSIZE); #endif +#endif +} + +#ifdef ERTS_ENABLE_LOCK_COUNT +void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *pollset, int enable) +{ +#if !ERTS_POLL_USE_CONCURRENT_UPDATE + if(enable) { + erts_lcnt_install_new_lock_info(&pollset->mtx.lcnt, "pollset_rm", NIL, + ERTS_LOCK_TYPE_MUTEX | ERTS_LOCK_FLAGS_CATEGORY_IO); + } else { + erts_lcnt_uninstall(&pollset->mtx.lcnt); + } +#endif + return; } - #endif diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index c16122610d..d40dabc529 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,11 +18,31 @@ * %CopyrightEnd% */ -/* - * Description: Poll interface suitable for ERTS with or without - * SMP support. +/** + * @description: Poll interface suitable for ERTS with SMP support. + * + * @author: Rickard Green + * @author: Lukas Larsson + * + * This header file exports macros and functions that are used to + * react to I/O polling events from file descriptors or wait-able + * objects. The API exported is the following: * - * Author: Rickard Green + * defines: + * ERTS_POLL_EV_NONE - No events have been set. This is not the same as 0. + * ERTS_POLL_EV_IN - Represent an IN event + * ERTS_POLL_EV_OUT - Represent an OUT event + * ERTS_POLL_EV_ERR - Represent an error event + * ERTS_POLL_EV_NVAL - Represent an invalid event + * + * macro functions: + * ErtsSysFdType ERTS_POLL_RES_GET_FD(ErtsPollResFd *evt); + * void ERTS_POLL_RES_SET_FD(ErtsPollResFd *evt, ErtsSysFdType fd); + * ErtsPollEvents ERTS_POLL_RES_GET_EVTS(ErtsPollResFd *evt) + * void ERTS_POLL_RES_SET_EVTS(ErtsPollResFd *evt, ErtsPollEvents fd); + * + * functions: + * See erl_poll_api.h */ #ifndef ERL_POLL_H__ @@ -31,34 +51,29 @@ #include "sys.h" #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN - -#if 0 -#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS -#endif +#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX #ifdef ERTS_ENABLE_KERNEL_POLL -# if defined(ERTS_KERNEL_POLL_VERSION) -# define ERTS_POLL_EXPORT(FUNC) FUNC ## _kp +# undef ERTS_ENABLE_KERNEL_POLL +# define ERTS_ENABLE_KERNEL_POLL 1 +# if defined(ERTS_NO_KERNEL_POLL_VERSION) +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk +# undef ERTS_NO_KERNEL_POLL_VERSION +# define ERTS_NO_KERNEL_POLL_VERSION 1 +# define ERTS_KERNEL_POLL_VERSION 0 # else -# define ERTS_POLL_EXPORT(FUNC) FUNC ## _nkp -# undef ERTS_POLL_DISABLE_KERNEL_POLL -# define ERTS_POLL_DISABLE_KERNEL_POLL +# undef ERTS_KERNEL_POLL_VERSION +# define ERTS_KERNEL_POLL_VERSION 1 +# define ERTS_NO_KERNEL_POLL_VERSION 0 +# define ERTS_POLL_EXPORT(FUNC) FUNC # endif #else # define ERTS_POLL_EXPORT(FUNC) FUNC -# undef ERTS_POLL_DISABLE_KERNEL_POLL -# define ERTS_POLL_DISABLE_KERNEL_POLL +# define ERTS_ENABLE_KERNEL_POLL 0 +# define ERTS_NO_KERNEL_POLL_VERSION 1 +# define ERTS_KERNEL_POLL_VERSION 0 #endif -#ifdef ERTS_POLL_DISABLE_KERNEL_POLL -# undef HAVE_SYS_EPOLL_H -# undef HAVE_SYS_EVENT_H -# undef HAVE_SYS_DEVPOLL_H -#endif - -#undef ERTS_POLL_USE_KERNEL_POLL -#define ERTS_POLL_USE_KERNEL_POLL 0 - #undef ERTS_POLL_USE_KQUEUE #define ERTS_POLL_USE_KQUEUE 0 #undef ERTS_POLL_USE_EPOLL @@ -70,68 +85,107 @@ #undef ERTS_POLL_USE_SELECT #define ERTS_POLL_USE_SELECT 0 -#if defined(HAVE_SYS_EVENT_H) -# undef ERTS_POLL_USE_KQUEUE -# define ERTS_POLL_USE_KQUEUE 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 -#elif defined(HAVE_SYS_EPOLL_H) -# undef ERTS_POLL_USE_EPOLL -# define ERTS_POLL_USE_EPOLL 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 -#elif defined(HAVE_SYS_DEVPOLL_H) -# undef ERTS_POLL_USE_DEVPOLL -# define ERTS_POLL_USE_DEVPOLL 1 -# undef ERTS_POLL_USE_KERNEL_POLL -# define ERTS_POLL_USE_KERNEL_POLL 1 +/* Defines which structure that erts_poll_wait should use to wait with + and how events should be represented */ +#define ERTS_POLL_USE_EPOLL_EVS 0 +#define ERTS_POLL_USE_KQUEUE_EVS 0 +#define ERTS_POLL_USE_DEVPOLL_EVS 0 +#define ERTS_POLL_USE_POLL_EVS 0 +#define ERTS_POLL_USE_SELECT_EVS 0 + +#define ERTS_POLL_USE_KERNEL_POLL ERTS_KERNEL_POLL_VERSION + +#if ERTS_ENABLE_KERNEL_POLL +# if defined(HAVE_SYS_EVENT_H) +# undef ERTS_POLL_USE_KQUEUE_EVS +# define ERTS_POLL_USE_KQUEUE_EVS 1 +# undef ERTS_POLL_USE_KQUEUE +# define ERTS_POLL_USE_KQUEUE ERTS_KERNEL_POLL_VERSION +# elif defined(HAVE_SYS_EPOLL_H) +# undef ERTS_POLL_USE_EPOLL_EVS +# define ERTS_POLL_USE_EPOLL_EVS 1 +# undef ERTS_POLL_USE_EPOLL +# define ERTS_POLL_USE_EPOLL ERTS_KERNEL_POLL_VERSION +# elif defined(HAVE_SYS_DEVPOLL_H) +# undef ERTS_POLL_USE_DEVPOLL_EVS +# define ERTS_POLL_USE_DEVPOLL_EVS 1 +# undef ERTS_POLL_USE_DEVPOLL +# define ERTS_POLL_USE_DEVPOLL ERTS_KERNEL_POLL_VERSION +# else +# error "Missing kernel poll implementation of erts_poll()" +# endif #endif -#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) - -#if !ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_FALLBACK +#if ERTS_NO_KERNEL_POLL_VERSION # if defined(ERTS_USE_POLL) +# undef ERTS_POLL_USE_POLL_EVS +# define ERTS_POLL_USE_POLL_EVS 1 # undef ERTS_POLL_USE_POLL # define ERTS_POLL_USE_POLL 1 # elif !defined(__WIN32__) +# undef ERTS_POLL_USE_SELECT_EVS +# define ERTS_POLL_USE_SELECT_EVS 1 # undef ERTS_POLL_USE_SELECT # define ERTS_POLL_USE_SELECT 1 # endif #endif +#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10 #define ERTS_POLL_USE_TIMERFD 0 typedef Uint32 ErtsPollEvents; -#undef ERTS_POLL_EV_E2N + +typedef enum { + ERTS_POLL_OP_ADD = 0, /* Add the FD to the pollset */ + ERTS_POLL_OP_MOD = 1, /* Modify the FD in the pollset */ + ERTS_POLL_OP_DEL = 2 /* Delete the FD from the pollset */ +} ErtsPollOp; + +#define op2str(op) (op == ERTS_POLL_OP_ADD ? "add" : \ + (op == ERTS_POLL_OP_MOD ? "mod" : "del")) #if defined(__WIN32__) /* --- win32 --------------------------------------- */ -#define ERTS_POLL_EV_IN 1 -#define ERTS_POLL_EV_OUT 2 -#define ERTS_POLL_EV_ERR 4 -#define ERTS_POLL_EV_NVAL 8 +#define ERTS_POLL_EV_IN 1 +#define ERTS_POLL_EV_OUT 2 +#define ERTS_POLL_EV_ERR 4 +#define ERTS_POLL_EV_NVAL 8 + +#define ERTS_POLL_EV_E2N(EV) (EV) +#define ERTS_POLL_EV_N2E(EV) (EV) -#elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#elif ERTS_POLL_USE_EPOLL_EVS /* --- epoll ------------------------------- */ #include <sys/epoll.h> +#if ERTS_POLL_USE_EPOLL #ifdef HAVE_SYS_TIMERFD_H #include <sys/timerfd.h> #undef ERTS_POLL_USE_TIMERFD #define ERTS_POLL_USE_TIMERFD 1 #endif +#endif #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ - ((ErtsPollEvents) (EV)) + ((ErtsPollEvents) (EV) & ~EPOLLONESHOT) #define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(EPOLLIN) #define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(EPOLLOUT) #define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(EPOLLET) #define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(EPOLLERR|EPOLLHUP) -#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ +typedef struct epoll_event ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->data.fd)) +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->data.fd = ident +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts) + +#elif ERTS_POLL_USE_DEVPOLL_EVS /* --- devpoll ----------------------------- */ #include <sys/devpoll.h> @@ -145,12 +199,37 @@ typedef Uint32 ErtsPollEvents; #define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(POLLNVAL) #define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) -#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ +typedef struct pollfd ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->fd)) +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = ident +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->revents) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->revents = ERTS_POLL_EV_E2N(evts) + +#elif ERTS_POLL_USE_KQUEUE_EVS /* --- kqueue ------------------------------ */ /* Kqueue use fallback defines (poll() or select()) */ + +#include <sys/event.h> + +#ifdef ERTS_USE_POLL +# undef ERTS_POLL_USE_POLL_EVS +# define ERTS_POLL_USE_POLL_EVS 1 +#elif !defined(__WIN32__) +# undef ERTS_POLL_USE_SELECT_EVS +# define ERTS_POLL_USE_SELECT_EVS 1 #endif -#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */ +typedef struct kevent ErtsPollResFd; + +#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->ident)) +#define ERTS_POLL_RES_SET_FD(evt, fd) (evt)->ident = fd +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((ErtsPollEvents)(evt)->udata) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->udata = (void*)(UWord)(ERTS_POLL_EV_E2N(evts)) +#endif + +#if ERTS_POLL_USE_POLL_EVS + /* --- poll -------------------------------- */ #include <poll.h> #define ERTS_POLL_EV_NKP_E2N(EV) \ @@ -169,7 +248,7 @@ typedef Uint32 ErtsPollEvents; #define ERTS_POLL_EV_NKP_NVAL ERTS_POLL_EV_N2E(POLLNVAL) #define ERTS_POLL_EV_NKP_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP) -#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ +#elif ERTS_POLL_USE_SELECT_EVS /* --- select ------------------------------ */ #define ERTS_POLL_EV_NKP_E2N(EV) (EV) #define ERTS_POLL_EV_NKP_N2E(EV) (EV) @@ -195,69 +274,65 @@ typedef Uint32 ErtsPollEvents; #endif -typedef struct ErtsPollSet_ *ErtsPollSet; - -typedef struct { - ErtsSysFdType fd; - ErtsPollEvents events; - int on; -} ErtsPollControlEntry; +#if !ERTS_ENABLE_KERNEL_POLL -typedef struct { +typedef struct _ErtsPollResFd { ErtsSysFdType fd; ErtsPollEvents events; } ErtsPollResFd; +#define ERTS_POLL_RES_GET_FD(evt) (evt)->fd +#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = (ident) +#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events) +#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts) + +#endif + +#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))) + +#define ev2str(ev) \ + (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ + ((ev) == ERTS_POLL_EV_IN ? "IN" : \ + ((ev) == ERTS_POLL_EV_OUT ? "OUT" : \ + ((ev) == (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) ? "IN|OUT" : \ + ((ev) & ERTS_POLL_EV_ERR ? "ERR" : \ + ((ev) & ERTS_POLL_EV_NVAL ? "NVAL" : "OTHER")))))) + + +typedef struct ERTS_POLL_EXPORT(erts_pollset) ErtsPollSet; + typedef struct { char *primary; - char *fallback; char *kernel_poll; Uint memory_size; - int poll_set_size; - int fallback_poll_set_size; + Uint poll_set_size; int lazy_updates; - int pending_updates; + Uint pending_updates; int batch_updates; int concurrent_updates; - int max_fds; -#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS - long no_avoided_wakeups; - long no_avoided_interrupts; - long no_interrupt_timed; -#endif + int is_fallback; + Uint max_fds; + Uint active_fds; + Uint poll_threads; } ErtsPollInfo; -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -void ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet); +#if defined(ERTS_POLL_USE_FALLBACK) && ERTS_KERNEL_POLL_VERSION +# undef ERTS_POLL_EXPORT +# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk +# include "erl_poll_api.h" +# undef ERTS_POLL_EXPORT +# define ERTS_POLL_EXPORT(FUNC) FUNC +#elif !defined(ERTS_POLL_USE_FALLBACK) +# define ERTS_POLL_USE_FALLBACK 0 #endif -void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet, - int); -void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet, - int, - ErtsMonotonicTime); -ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet, - ErtsSysFdType, - ErtsPollEvents, - int on, - int* wake_poller - ); -void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet, - ErtsPollControlEntry [], - int on); -int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet, - ErtsPollResFd [], - int *, - ErtsMonotonicTime); -int ERTS_POLL_EXPORT(erts_poll_max_fds)(void); -void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet, - ErtsPollInfo *); -ErtsPollSet ERTS_POLL_EXPORT(erts_poll_create_pollset)(void); -void ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet); -void ERTS_POLL_EXPORT(erts_poll_init)(void); -void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet, - ErtsPollEvents [], - int); +#include "erl_poll_api.h" + +/** + * Get the next size of the array that holds the file descriptors. + * This function is used in order for the check io array and the + * pollset array to be of the same size. + */ int erts_poll_new_table_len(int old_len, int need_len); #endif /* #ifndef ERL_POLL_H__ */ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h new file mode 100644 index 0000000000..f3a91e54f7 --- /dev/null +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -0,0 +1,126 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2018. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ +/** + * @description: Poll interface functions + * @author Lukas Larsson + * + * The functions in the header are used to interact with the poll + * implementation. Iff the kernel-poll implementation needs a fallback + * pollset, then all functions are exported twice. Once with a _flbk + * suffix and once without any suffix. If no fallback is needed, then + * only the non-suffix version is exported. + */ + +/** + * Initialize the poll implementation. Has to be called before any other function. + * @param[out] concurrent_waiters if not NULL, set to 1 if more then one thread + * is allowed to wait in the pollsets at the same time. + */ +void ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_waiters); +/** + * @brief Create a new pollset. + * @param id The unique debug id of this pollset. + */ +ErtsPollSet *ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id); + +/** + * Modify the contents of a pollset. This function can be called while one + * (or possibly more) thread is waiting in the pollset. + * + * @param ps the pollset to modify + * @param fd the file descriptor to modify + * @param op the type of operation to do. Normal usage is ADD,MOD...MOD,DEL. + * @param evts the events that we are changing interest to. Ignored if op is DEL. + * @param[in] wake_poller if set to 1 any thread waiting in the pollset will be woken. + * This parameter is ignored if the pollset supports concurrent waiters. + * @param[out] wake_poller set to 1 if the waiting thread was woken. + * @return The events set, or ERTS_POLL_EV_NVAL if it was not possible to add the + * fd to the pollset. + */ +ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, + ErtsSysFdType fd, + ErtsPollOp op, + ErtsPollEvents evts, + int *wake_poller); + +/** + * Wait for events to be ready in the pollset. If the erts_poll_init call + * set concurrent_waiters to 1, then multiple threads are allowed to call + * this function at the same time. + * + * When an event has been triggered on a fd, that event is disabled. To + * re-enable it the implementation has to call erts_poll_control again. + * + * @param ps the pollset to wait for events in + * @param res an array of fd results that the ready fds are put in. + * @param[in] length the length of the res array + * @param[out] length the number of ready events returned in res + * @param tpd the thread progress data to note sleep state in + * @param timeout_time the time in native to wake up at + * @return 0 on success, else the ERRNO of the error that happened. + */ +int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, + ErtsPollResFd res[], + int *length, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time); +/** + * Interrupt the thread waiting in the pollset. This function should be called + * with set = 0 before any thread calls erts_poll_wait in order to clear any + * interrupts that have happened while the thread was awake. + * + * This function has no effect on pollsets that support concurrent waiters. + * + * @param ps the pollset to wake + * @param set if 1, interrupt the pollset, if 0 clear the interrupt flag. + */ +void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set); + +/* Debug functions */ + +/** + * Get the maximum number of fds supported by the pollset + */ +int ERTS_POLL_EXPORT(erts_poll_max_fds)(void); +/** + * Get information about the given pollset + */ +void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, + ErtsPollInfo *info); +/** + * Get information about which events are currently selected. + * + * The unix fd is used to index into the array, so naturally this function does + * not work on windows. If the pollset cannot figure out what the selected + * events for a given fd is, it is set to ERTS_POLL_EV_NONE. + * + * @param ps the pollset to get events from + * @param evts an array of which events are selected on. + */ +void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, + ErtsPollEvents evts[], + int length); + +#ifdef ERTS_ENABLE_LOCK_COUNT +/** + * Enable lock counting of any locks within the pollset. + */ +void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *, int enable); +#endif diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 79f87eb3a9..d34e1a9ec0 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,14 +45,6 @@ #endif #endif -/* - * erts_check_io_time is used by the erl_check_io implementation. The - * global erts_check_io_time variable is declared here since there - * (often) exist two versions of erl_check_io (kernel-poll and - * non-kernel-poll), and we dont want two versions of this variable. - */ -erts_smp_atomic_t erts_check_io_time; - /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; @@ -150,7 +142,16 @@ sys_double_to_chars(double fp, char *buffer, size_t buffer_size) return sys_double_to_chars_ext(fp, buffer, buffer_size, SYS_DEFAULT_FLOAT_DECIMALS); } -/* Convert float to string using fixed point notation. + +#if SIZEOF_LONG == 8 +# define round_int64 lround +#elif SIZEOF_LONG_LONG == 8 +# define round_int64 llround +#else +# error "No 64-bit integer type?" +#endif + +/* Convert float to string * decimals must be >= 0 * if compact != 0, the trailing 0's will be truncated */ @@ -158,93 +159,40 @@ int sys_double_to_chars_fast(double f, char *buffer, int buffer_size, int decimals, int compact) { - /* Note that some C compilers don't support "static const" propagation - * so we use a defines */ - #define SYS_DOUBLE_RND_CONST 0.55555555555555555 + #define SYS_DOUBLE_RND_CONST 0.5 #define FRAC_SIZE 52 #define EXP_SIZE 11 - #define EXP_MASK ((1ll << EXP_SIZE) - 1) - #define MAX_DECIMALS (sizeof(cs_sys_double_pow10) \ - / sizeof(cs_sys_double_pow10[0])) - #define FRAC_MASK ((1ll << FRAC_SIZE) - 1) - #define FRAC_MASK2 ((1ll << (FRAC_SIZE + 1)) - 1) - #define MAX_FLOAT (1ll << (FRAC_SIZE+1)) - - static const double cs_sys_double_pow10[] = { - SYS_DOUBLE_RND_CONST / 1ll, - SYS_DOUBLE_RND_CONST / 10ll, - SYS_DOUBLE_RND_CONST / 100ll, - SYS_DOUBLE_RND_CONST / 1000ll, - SYS_DOUBLE_RND_CONST / 10000ll, - SYS_DOUBLE_RND_CONST / 100000ll, - SYS_DOUBLE_RND_CONST / 1000000ll, - SYS_DOUBLE_RND_CONST / 10000000ll, - SYS_DOUBLE_RND_CONST / 100000000ll, - SYS_DOUBLE_RND_CONST / 1000000000ll, - SYS_DOUBLE_RND_CONST / 10000000000ll, - SYS_DOUBLE_RND_CONST / 100000000000ll, - SYS_DOUBLE_RND_CONST / 1000000000000ll, - SYS_DOUBLE_RND_CONST / 10000000000000ll, - SYS_DOUBLE_RND_CONST / 100000000000000ll, - SYS_DOUBLE_RND_CONST / 1000000000000000ll, - SYS_DOUBLE_RND_CONST / 10000000000000000ll, - SYS_DOUBLE_RND_CONST / 100000000000000000ll, - SYS_DOUBLE_RND_CONST / 1000000000000000000ll + #define EXP_MASK (((Uint64)1 << EXP_SIZE) - 1) + #define MAX_DECIMALS (sizeof(pow10v) / sizeof(pow10v[0])) + #define FRAC_MASK (((Uint64)1 << FRAC_SIZE) - 1) + #define FRAC_MASK2 (((Uint64)1 << (FRAC_SIZE + 1)) - 1) + #define MAX_FLOAT ((Uint64)1 << (FRAC_SIZE+1)) + + static const double pow10v[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18 }; - long long mantissa, int_part = 0, frac_part = 0; - short exp; - int max; + double af; + Uint64 int_part, frac_part; int neg; - double fr; - union { long long L; double F; } x; + int has_decimals = decimals != 0; char *p = buffer; if (decimals < 0) return -1; - /* Round the number to given decimal places. The number of 5's in the - * SYS_DOUBLE_RND_CONST constant is chosen such that adding any more 5's doesn't - * change the double precision of the number, i.e.: - * 1> term_to_binary(0.55555555555555555, [{minor_version, 1}]). - * <<131,70,63,225,199,28,113,199,28,114>> - * 2> term_to_binary(0.5555555555555555555, [{minor_version, 1}]). - * <<131,70,63,225,199,28,113,199,28,114>> - */ - if (f >= 0) { - neg = 0; - fr = decimals < MAX_DECIMALS ? (f + cs_sys_double_pow10[decimals]) : f; - x.F = fr; - } else { + if (f < 0) { neg = 1; - fr = decimals < MAX_DECIMALS ? (f - cs_sys_double_pow10[decimals]) : f; - x.F = -fr; + af = -f; } - - exp = (x.L >> FRAC_SIZE) & EXP_MASK; - mantissa = x.L & FRAC_MASK; - - if (exp == EXP_MASK) { - if (mantissa == 0) { - if (neg) - *p++ = '-'; - *p++ = 'i'; - *p++ = 'n'; - *p++ = 'f'; - } else { - *p++ = 'n'; - *p++ = 'a'; - *p++ = 'n'; - } - *p = '\0'; - return p - buffer; + else { + neg = 0; + af = f; } - exp -= EXP_MASK >> 1; - mantissa |= (1ll << FRAC_SIZE); - /* Don't bother with optimizing too large numbers or too large precision */ - if (x.F > MAX_FLOAT || decimals >= MAX_DECIMALS) { + if (af > MAX_FLOAT || decimals >= MAX_DECIMALS) { int len = erts_snprintf(buffer, buffer_size, "%.*f", decimals, f); char* p = buffer + len; if (len >= buffer_size) @@ -254,62 +202,64 @@ sys_double_to_chars_fast(double f, char *buffer, int buffer_size, int decimals, p = find_first_trailing_zero(p); *p = '\0'; return p - buffer; - } else if (exp >= FRAC_SIZE) { - int_part = mantissa << (exp - FRAC_SIZE); - } else if (exp >= 0) { - int_part = mantissa >> (FRAC_SIZE - exp); - frac_part = (mantissa << (exp + 1)) & FRAC_MASK2; - } else /* if (exp < 0) */ { - frac_part = (mantissa & FRAC_MASK2) >> -(exp + 1); - } - - if (!int_part) { - if (neg) - *p++ = '-'; - *p++ = '0'; - } else { - int ret, i, n; - while (int_part != 0) { - long long j = int_part / 10; - *p++ = (char)(int_part - ((j << 3) + (j << 1)) + '0'); - int_part = j; - } - if (neg) - *p++ = '-'; - /* Reverse string */ - ret = p - buffer; - for (i = 0, n = ret/2; i < n; i++) { - int j = ret - i - 1; - char c = buffer[i]; - buffer[i] = buffer[j]; - buffer[j] = c; - } } - if (decimals > 0) { - int i; - *p++ = '.'; + if (decimals) { + double int_f = floor(af); + double frac_f = round((af - int_f) * pow10v[decimals]); - max = buffer_size - (p - buffer) - 1 /* leave room for trailing '\0' */; + int_part = (Uint64)int_f; + frac_part = (Uint64)frac_f; - if (decimals > max) - return -1; /* the number is not large enough to fit in the buffer */ + if (frac_f >= pow10v[decimals]) { + /* rounding overflow carry into int_part */ + int_part++; + frac_part = 0; + } - max = decimals; + do { + Uint64 n; + if (!frac_part) { + do { + *p++ = '0'; + } while (--decimals); + break; + } + n = frac_part / 10; + *p++ = (char)((frac_part - n*10) + '0'); + frac_part = n; + } while (--decimals); - for (i = 0; i < max; i++) { - /* frac_part *= 10; */ - frac_part = (frac_part << 3) + (frac_part << 1); + *p++ = '.'; + } + else + int_part = (Uint64)round_int64(af); - *p++ = (char)((frac_part >> (FRAC_SIZE + 1)) + '0'); - frac_part &= FRAC_MASK2; + if (!int_part) { + *p++ = '0'; + } else { + do { + Uint64 n = int_part / 10; + *p++ = (char)((int_part - n*10) + '0'); + int_part = n; + } while (int_part); + } + if (neg) + *p++ = '-'; + + {/* Reverse string */ + int i = 0; + int j = p - buffer - 1; + for ( ; i < j; i++, j--) { + char tmp = buffer[i]; + buffer[i] = buffer[j]; + buffer[j] = tmp; } - - /* Delete trailing zeroes */ - if (compact) - p = find_first_trailing_zero(p); } + /* Delete trailing zeroes */ + if (compact && has_decimals) + p = find_first_trailing_zero(p); *p = '\0'; return p - buffer; } diff --git a/erts/emulator/sys/unix/erl_child_setup.c b/erts/emulator/sys/unix/erl_child_setup.c index 69fc6c2879..129861ebd5 100644 --- a/erts/emulator/sys/unix/erl_child_setup.c +++ b/erts/emulator/sys/unix/erl_child_setup.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,8 @@ #include <stdio.h> #include <stdarg.h> #include <sys/wait.h> +#include <sys/types.h> +#include <sys/socket.h> #define WANT_NONBLOCKING @@ -131,6 +133,7 @@ static int sigchld_pipe[2]; static int start_new_child(int pipes[]) { + struct sigaction sa; int errln = -1; int size, res, i, pos = 0; char *buff, *o_buff; @@ -141,6 +144,16 @@ start_new_child(int pipes[]) /* only child executes here */ + /* Restore default handling of sigterm... */ + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + + if (sigaction(SIGTERM, &sa, 0) == -1) { + perror(NULL); + exit(1); + } + do { res = read(pipes[0], (char*)&size, sizeof(size)); } while(res < 0 && (errno == EINTR || errno == ERRNO_BLOCK)); @@ -437,6 +450,21 @@ main(int argc, char *argv[]) exit(1); } + /* Ignore SIGTERM. + Some container environments send SIGTERM to all processes + when terminating. We don't want erl_child_setup to terminate + in these cases as that will prevent beam from properly + cleaning up. + */ + sa.sa_handler = SIG_IGN; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + + if (sigaction(SIGTERM, &sa, 0) == -1) { + perror(NULL); + exit(1); + } + forker_hash_init(); SET_CLOEXEC(uds_fd); diff --git a/erts/emulator/sys/unix/erl_child_setup.h b/erts/emulator/sys/unix/erl_child_setup.h index a28b136bfc..0058b92344 100644 --- a/erts/emulator/sys/unix/erl_child_setup.h +++ b/erts/emulator/sys/unix/erl_child_setup.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2015-2015. All Rights Reserved. + * Copyright Ericsson AB 2015-2017. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ * * %CopyrightEnd% * - * This file defines the interface inbetween erts and child_setup. + * This file defines the interface between erts and child_setup. */ #ifndef _ERL_UNIX_FORKER_H diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index b64b0d87f6..ae7a3ea23e 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. + * Copyright Ericsson AB 1997-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,10 +46,10 @@ #include <signal.h> #include <setjmp.h> -#if HAVE_SYS_SOCKETIO_H +#ifdef HAVE_SYS_SOCKETIO_H # include <sys/socketio.h> #endif -#if HAVE_SYS_SOCKIO_H +#ifdef HAVE_SYS_SOCKIO_H # include <sys/sockio.h> #endif @@ -86,6 +86,10 @@ #include <sys/times.h> +#ifdef HAVE_SYS_RESOURCE_H +# include <sys/resource.h> +#endif + #ifdef HAVE_IEEEFP_H #include <ieeefp.h> #endif @@ -128,12 +132,8 @@ /* File descriptors are numbers anc consecutively allocated on Unix */ #define ERTS_SYS_CONTINOUS_FD_NUMBERS -#ifndef ERTS_SMP -# undef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -# define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#endif -typedef void *GETENV_STATE; +void erts_sys_env_init(void); /* ** For the erl_timer_sup module. @@ -264,7 +264,7 @@ erts_os_monotonic_time(void) ERTS_GLB_INLINE void erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) { - return (*erts_sys_time_data__.r.o.os_times)(mtimep, stimep); + (*erts_sys_time_data__.r.o.os_times)(mtimep, stimep); } #endif /* ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__ */ @@ -292,6 +292,8 @@ erts_sys_perf_counter() /* * Functions for measuring CPU time + * + * Note that gethrvtime is time per process and clock_gettime is per thread. */ #if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME)) @@ -300,15 +302,15 @@ typedef struct timespec SysTimespec; #if defined(HAVE_GETHRVTIME) #define sys_gethrvtime() gethrvtime() -#define sys_get_proc_cputime(t,tp) (t) = sys_gethrvtime(), \ - (tp).tv_sec = (time_t)((t)/1000000000LL), \ - (tp).tv_nsec = (long)((t)%1000000000LL) +#define sys_get_cputime(t,tp) (t) = sys_gethrvtime(), \ + (tp).tv_sec = (time_t)((t)/1000000000LL), \ + (tp).tv_nsec = (long)((t)%1000000000LL) int sys_start_hrvtime(void); int sys_stop_hrvtime(void); #elif defined(HAVE_CLOCK_GETTIME_CPU_TIME) #define sys_clock_gettime(cid,tp) clock_gettime((cid),&(tp)) -#define sys_get_proc_cputime(t,tp) sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID,(tp)) +#define sys_get_cputime(t,tp) sys_clock_gettime(CLOCK_THREAD_CPUTIME_ID,(tp)) #endif #endif @@ -322,6 +324,7 @@ extern SIGFUNC sys_signal(int, SIGFUNC); extern void sys_sigrelease(int); extern void sys_sigblock(int); extern void sys_init_suspend_handler(void); +extern void erts_sys_unix_later_init(void); /* * Handling of floating point exceptions. @@ -353,9 +356,7 @@ extern void sys_init_suspend_handler(void); #ifdef NO_FPE_SIGNALS #define erts_get_current_fp_exception() NULL -#ifdef ERTS_SMP #define erts_thread_init_fp_exception() do{}while(0) -#endif # define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0) # define __ERTS_FP_ERROR(fpexnp, f, Action) if (!isfinite(f)) { Action; } else {} # define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action) @@ -368,9 +369,7 @@ extern void sys_init_suspend_handler(void); #else /* !NO_FPE_SIGNALS */ extern volatile unsigned long *erts_get_current_fp_exception(void); -#ifdef ERTS_SMP extern void erts_thread_init_fp_exception(void); -#endif # if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) # define erts_fwait(fpexnp,f) \ __asm__ __volatile__("fwait" : "=m"(*(fpexnp)) : "m"(f)) @@ -437,10 +436,8 @@ void erts_sys_unblock_fpe(int); /* Threads */ -#ifdef USE_THREADS extern int init_async(int); extern int exit_async(void); -#endif #define ERTS_EXIT_AFTER_DUMP _exit diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index 089efec3e8..4823e549ea 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2016. All Rights Reserved. + * Copyright Ericsson AB 1996-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,7 +50,6 @@ #endif #define ERTS_WANT_BREAK_HANDLING -#define ERTS_WANT_GOT_SIGUSR1 #define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */ #include "sys.h" #include "erl_thr_progress.h" @@ -59,15 +58,10 @@ #define __DARWIN__ 1 #endif -#ifdef USE_THREADS #include "erl_threads.h" -#endif #include "erl_mseg.h" -extern char **environ; -erts_smp_rwmtx_t environ_rwmtx; - #define MAX_VSIZE 16 /* Max number of entries allowed in an I/O * vector sock_sendv(). */ @@ -80,7 +74,7 @@ erts_smp_rwmtx_t environ_rwmtx; #include "erl_check_io.h" #include "erl_cpu_topology.h" - +#include "erl_osenv.h" extern int driver_interrupt(int, int); extern void do_break(void); @@ -90,43 +84,24 @@ extern void erl_sys_args(int*, char**); extern void erts_sys_init_float(void); -extern void erl_crash_dump(char* file, int line, char* fmt, ...); - #ifdef DEBUG static int debug_log = 0; #endif -#ifdef ERTS_SMP -erts_smp_atomic32_t erts_got_sigusr1; -#define ERTS_SET_GOT_SIGUSR1 \ - erts_smp_atomic32_set_mb(&erts_got_sigusr1, 1) -#define ERTS_UNSET_GOT_SIGUSR1 \ - erts_smp_atomic32_set_mb(&erts_got_sigusr1, 0) -static erts_smp_atomic32_t have_prepared_crash_dump; +static erts_atomic32_t have_prepared_crash_dump; #define ERTS_PREPARED_CRASH_DUMP \ - ((int) erts_smp_atomic32_xchg_nob(&have_prepared_crash_dump, 1)) -#else -volatile int erts_got_sigusr1; -#define ERTS_SET_GOT_SIGUSR1 (erts_got_sigusr1 = 1) -#define ERTS_UNSET_GOT_SIGUSR1 (erts_got_sigusr1 = 0) -static volatile int have_prepared_crash_dump; -#define ERTS_PREPARED_CRASH_DUMP \ - (have_prepared_crash_dump++) -#endif + ((int) erts_atomic32_xchg_nob(&have_prepared_crash_dump, 1)) -erts_smp_atomic_t sys_misc_mem_sz; +erts_atomic_t sys_misc_mem_sz; -#if defined(ERTS_SMP) -static void smp_sig_notify(char c); +static void smp_sig_notify(int signum); static int sig_notify_fds[2] = {-1, -1}; -#if !defined(ETHR_UNUSABLE_SIGUSRX) && defined(ERTS_THR_HAVE_SIG_FUNCS) +#ifdef ERTS_SYS_SUSPEND_SIGNAL static int sig_suspend_fds[2] = {-1, -1}; -#define ERTS_SYS_SUSPEND_SIGNAL SIGUSR2 #endif -#endif jmp_buf erts_sys_sigsegv_jmp; @@ -140,149 +115,36 @@ static int max_files = -1; /* * a few variables used by the break handler */ -#ifdef ERTS_SMP -erts_smp_atomic32_t erts_break_requested; +erts_atomic32_t erts_break_requested; #define ERTS_SET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) #define ERTS_UNSET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) -#else -volatile int erts_break_requested = 0; -#define ERTS_SET_BREAK_REQUESTED (erts_break_requested = 1) -#define ERTS_UNSET_BREAK_REQUESTED (erts_break_requested = 0) -#endif + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) + + /* set early so the break handler has access to initial mode */ static struct termios initial_tty_mode; static int replace_intr = 0; /* assume yes initially, ttsl_init will clear it */ -int using_oldshell = 1; - -#ifdef ERTS_ENABLE_KERNEL_POLL - -int erts_use_kernel_poll = 0; - -struct { - int (*select)(ErlDrvPort, ErlDrvEvent, int, int); - int (*event)(ErlDrvPort, ErlDrvEvent, ErlDrvEventData); - void (*check_io_as_interrupt)(void); - void (*check_io_interrupt)(int); - void (*check_io_interrupt_tmd)(int, ErtsMonotonicTime); - void (*check_io)(int); - Uint (*size)(void); - Eterm (*info)(void *); - int (*check_io_debug)(ErtsCheckIoDebugInfo *); -} io_func = {0}; +int using_oldshell = 1; - -int -driver_select(ErlDrvPort port, ErlDrvEvent event, int mode, int on) -{ - return (*io_func.select)(port, event, mode, on); -} - -int -driver_event(ErlDrvPort port, ErlDrvEvent event, ErlDrvEventData event_data) -{ - return (*io_func.event)(port, event, event_data); -} - -Eterm erts_check_io_info(void *p) -{ - return (*io_func.info)(p); -} - -int -erts_check_io_debug(ErtsCheckIoDebugInfo *ip) +UWord +erts_sys_get_page_size(void) { - return (*io_func.check_io_debug)(ip); -} - - -static void -init_check_io(void) -{ - if (erts_use_kernel_poll) { - io_func.select = driver_select_kp; - io_func.event = driver_event_kp; -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT - io_func.check_io_as_interrupt = erts_check_io_async_sig_interrupt_kp; -#endif - io_func.check_io_interrupt = erts_check_io_interrupt_kp; - io_func.check_io_interrupt_tmd = erts_check_io_interrupt_timed_kp; - io_func.check_io = erts_check_io_kp; - io_func.size = erts_check_io_size_kp; - io_func.info = erts_check_io_info_kp; - io_func.check_io_debug = erts_check_io_debug_kp; - erts_init_check_io_kp(); - max_files = erts_check_io_max_files_kp(); - } - else { - io_func.select = driver_select_nkp; - io_func.event = driver_event_nkp; -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT - io_func.check_io_as_interrupt = erts_check_io_async_sig_interrupt_nkp; -#endif - io_func.check_io_interrupt = erts_check_io_interrupt_nkp; - io_func.check_io_interrupt_tmd = erts_check_io_interrupt_timed_nkp; - io_func.check_io = erts_check_io_nkp; - io_func.size = erts_check_io_size_nkp; - io_func.info = erts_check_io_info_nkp; - io_func.check_io_debug = erts_check_io_debug_nkp; - erts_init_check_io_nkp(); - max_files = erts_check_io_max_files_nkp(); - } -} - -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#define ERTS_CHK_IO_AS_INTR() (*io_func.check_io_as_interrupt)() +#if defined(_SC_PAGESIZE) + return (UWord) sysconf(_SC_PAGESIZE); +#elif defined(HAVE_GETPAGESIZE) + return (UWord) getpagesize(); #else -#define ERTS_CHK_IO_AS_INTR() (*io_func.check_io_interrupt)(1) + return (UWord) 4*1024; /* Guess 4 KB */ #endif -#define ERTS_CHK_IO_INTR (*io_func.check_io_interrupt) -#define ERTS_CHK_IO_INTR_TMD (*io_func.check_io_interrupt_tmd) -#define ERTS_CHK_IO (*io_func.check_io) -#define ERTS_CHK_IO_SZ (*io_func.size) - -#else /* !ERTS_ENABLE_KERNEL_POLL */ - -static void -init_check_io(void) -{ - erts_init_check_io(); - max_files = erts_check_io_max_files(); } -#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT -#define ERTS_CHK_IO_AS_INTR() erts_check_io_async_sig_interrupt() -#else -#define ERTS_CHK_IO_AS_INTR() erts_check_io_interrupt(1) -#endif -#define ERTS_CHK_IO_INTR erts_check_io_interrupt -#define ERTS_CHK_IO_INTR_TMD erts_check_io_interrupt_timed -#define ERTS_CHK_IO erts_check_io -#define ERTS_CHK_IO_SZ erts_check_io_size - -#endif - -void -erts_sys_schedule_interrupt(int set) -{ - ERTS_CHK_IO_INTR(set); -} - -#ifdef ERTS_SMP -void -erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time) -{ - ERTS_CHK_IO_INTR_TMD(set, timeout_time); -} -#endif - Uint erts_sys_misc_mem_sz(void) { - Uint res = ERTS_CHK_IO_SZ(); - res += erts_smp_atomic_read_mb(&sys_misc_mem_sz); + Uint res = erts_check_io_size(); + res += erts_atomic_read_mb(&sys_misc_mem_sz); return res; } @@ -307,7 +169,6 @@ MALLOC_USE_HASH(1); #endif #endif -#ifdef USE_THREADS #ifdef ERTS_THR_HAVE_SIG_FUNCS @@ -386,19 +247,15 @@ thr_create_prepare_child(void *vtcdp) erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); } -#endif /* #ifdef USE_THREADS */ void erts_sys_pre_init(void) { -#ifdef USE_THREADS erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER; -#endif erts_printf_add_cr_to_stdout = 1; erts_printf_add_cr_to_stderr = 1; -#ifdef USE_THREADS eid.thread_create_child_func = thr_create_prepare_child; /* Before creation in parent */ @@ -406,37 +263,29 @@ erts_sys_pre_init(void) /* After creation in parent */ eid.thread_create_parent_func = thr_create_cleanup, -#ifdef ERTS_THR_HAVE_SIG_FUNCS - sigemptyset(&thr_create_sigmask); - sigaddset(&thr_create_sigmask, SIGINT); /* block interrupt */ - sigaddset(&thr_create_sigmask, SIGUSR1); /* block user defined signal */ +#ifdef ERTS_ENABLE_LOCK_COUNT + erts_lcnt_pre_thr_init(); #endif erts_thr_init(&eid); #ifdef ERTS_ENABLE_LOCK_COUNT - erts_lcnt_init(); + erts_lcnt_post_thr_init(); +#endif + +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_init(); #endif -#endif /* USE_THREADS */ erts_init_sys_time_sup(); -#ifdef USE_THREADS -#ifdef ERTS_SMP - erts_smp_atomic32_init_nob(&erts_break_requested, 0); - erts_smp_atomic32_init_nob(&erts_got_sigusr1, 0); - erts_smp_atomic32_init_nob(&have_prepared_crash_dump, 0); -#else - erts_break_requested = 0; - erts_got_sigusr1 = 0; - have_prepared_crash_dump = 0; -#endif + erts_atomic32_init_nob(&erts_break_requested, 0); + erts_atomic32_init_nob(&have_prepared_crash_dump, 0); -#endif /* USE_THREADS */ - erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0); + erts_atomic_init_nob(&sys_misc_mem_sz, 0); { /* @@ -499,10 +348,8 @@ SIGFUNC sys_signal(int sig, SIGFUNC func) return(oact.sa_handler); } -#ifdef USE_THREADS #undef sigprocmask #define sigprocmask erts_thr_sigmask -#endif void sys_sigblock(int sig) { @@ -522,11 +369,13 @@ void sys_sigrelease(int sig) sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL); } +#ifdef ERTS_HAVE_TRY_CATCH void erts_sys_sigsegv_handler(int signo) { if (signo == SIGSEGV) { longjmp(erts_sys_sigsegv_jmp, 1); } } +#endif /* * Function returns 1 if we can read from all values in between @@ -602,10 +451,10 @@ prepare_crash_dump(int secs) close(crashdump_companion_cube_fd); envsz = sizeof(env); - i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); - if (i >= 0) { + i = erts_sys_explicit_8bit_getenv("ERL_CRASH_DUMP_NICE", env, &envsz); + if (i != 0) { int nice_val; - nice_val = i != 0 ? 0 : atoi(env); + nice_val = (i != 1) ? 0 : atoi(env); if (nice_val > 39) { nice_val = 39; } @@ -622,6 +471,28 @@ int erts_sys_prepare_crash_dump(int secs) return prepare_crash_dump(secs); } +static void signal_notify_requested(Eterm type) { + Process* p = NULL; + Eterm msg, *hp; + ErtsProcLocks locks = 0; + ErlOffHeap *ohp; + + Eterm id = erts_whereis_name_to_id(NULL, am_erl_signal_server); + + if ((p = (erts_pid2proc_opt(NULL, 0, id, 0, ERTS_P2P_FLG_INC_REFC))) != NULL) { + ErtsMessage *msgp = erts_alloc_message_heap(p, &locks, 3, &hp, &ohp); + + /* erl_signal_server ! {notify, sighup} */ + msg = TUPLE2(hp, am_notify, type); + erts_queue_message(p, locks, msgp, msg, am_system); + + if (locks) + erts_proc_unlock(p, locks); + erts_proc_dec_refc(p); + } +} + + static ERTS_INLINE void break_requested(void) { @@ -629,50 +500,17 @@ break_requested(void) * just set a flag - checked for and handled by * scheduler threads erts_check_io() (not signal handler). */ -#ifdef DEBUG - fprintf(stderr,"break!\n"); -#endif if (ERTS_BREAK_REQUESTED) erts_exit(ERTS_INTR_EXIT, ""); ERTS_SET_BREAK_REQUESTED; - ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ + /* Wake aux thread to get handle break */ + erts_aux_thread_poke(); } -/* set up signal handlers for break and quit */ -#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) -static RETSIGTYPE request_break(void) -#else static RETSIGTYPE request_break(int signum) -#endif -{ -#ifdef ERTS_SMP - smp_sig_notify('I'); -#else - break_requested(); -#endif -} - -static ERTS_INLINE void -sigusr1_exit(void) { - char env[21]; /* enough to hold any 64-bit integer */ - size_t envsz; - int i, secs = -1; - - /* We do this at interrupt level, since the main reason for - * wanting to generate a crash dump in this way is that the emulator - * is hung somewhere, so it won't be able to poll any flag we set here. - */ - ERTS_SET_GOT_SIGUSR1; - - envsz = sizeof(env); - if ((i = erts_sys_getenv_raw("ERL_CRASH_DUMP_SECONDS", env, &envsz)) >= 0) { - secs = i != 0 ? 0 : atoi(env); - } - - prepare_crash_dump(secs); - erts_exit(ERTS_ERROR_EXIT, "Received SIGUSR1\n"); + smp_sig_notify(signum); } #ifdef ETHR_UNUSABLE_SIGUSRX @@ -695,19 +533,6 @@ sys_thr_resume(erts_tid_t tid) { } #endif -#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) -static RETSIGTYPE user_signal1(void) -#else -static RETSIGTYPE user_signal1(int signum) -#endif -{ -#ifdef ERTS_SMP - smp_sig_notify('1'); -#else - sigusr1_exit(); -#endif -} - #ifdef ERTS_SYS_SUSPEND_SIGNAL #if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) static RETSIGTYPE suspend_signal(void) @@ -715,42 +540,115 @@ static RETSIGTYPE suspend_signal(void) static RETSIGTYPE suspend_signal(int signum) #endif { - int res, buf[1], __errno = errno; + int res, buf[1], tmp_errno = errno; do { res = read(sig_suspend_fds[0], buf, sizeof(int)); } while (res < 0 && errno == EINTR); /* restore previous errno in case read changed it */ - errno = __errno; + errno = tmp_errno; } #endif /* #ifdef ERTS_SYS_SUSPEND_SIGNAL */ #endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ -static void -quit_requested(void) +/* + Signal Action Comment + ───────────────────────────────────────────────────────────── + SIGHUP Term Hangup detected on controlling terminal or death of controlling process + !SIGINT Term Interrupt from keyboard + SIGQUIT Core Quit from keyboard + !SIGILL Core Illegal Instruction + SIGABRT Core Abort signal from abort(3) + !SIGFPE Core Floating point exception + !SIGKILL Term Kill signal + !SIGSEGV Core Invalid memory reference + !SIGPIPE Term Broken pipe: write to pipe with no readers + SIGALRM Term Timer signal from alarm(2) + SIGTERM Term Termination signal + SIGUSR1 Term User-defined signal 1 + SIGUSR2 Term User-defined signal 2 + !SIGCHLD Ign Child stopped or terminated + !SIGCONT Cont Continue if stopped + SIGSTOP Stop Stop process + SIGTSTP Stop Stop typed at terminal + !SIGTTIN Stop Terminal input for background process + !SIGTTOU Stop Terminal output for background process +*/ + + +static ERTS_INLINE int +signalterm_to_signum(Eterm signal) { - erts_exit(ERTS_INTR_EXIT, ""); + switch (signal) { + case am_sighup: return SIGHUP; + /* case am_sigint: return SIGINT; */ + case am_sigquit: return SIGQUIT; + /* case am_sigill: return SIGILL; */ + case am_sigabrt: return SIGABRT; + /* case am_sigsegv: return SIGSEGV; */ + case am_sigalrm: return SIGALRM; + case am_sigterm: return SIGTERM; + case am_sigusr1: return SIGUSR1; + case am_sigusr2: return SIGUSR2; + case am_sigchld: return SIGCHLD; + case am_sigstop: return SIGSTOP; + case am_sigtstp: return SIGTSTP; + default: return 0; + } } -#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL)) -static RETSIGTYPE do_quit(void) -#else -static RETSIGTYPE do_quit(int signum) -#endif +static ERTS_INLINE Eterm +signum_to_signalterm(int signum) { -#ifdef ERTS_SMP - smp_sig_notify('Q'); -#else - quit_requested(); -#endif + switch (signum) { + case SIGHUP: return am_sighup; + /* case SIGINT: return am_sigint; */ /* ^c */ + case SIGQUIT: return am_sigquit; /* ^\ */ + /* case SIGILL: return am_sigill; */ + case SIGABRT: return am_sigabrt; + /* case SIGSEGV: return am_sigsegv; */ + case SIGALRM: return am_sigalrm; + case SIGTERM: return am_sigterm; + case SIGUSR1: return am_sigusr1; + case SIGUSR2: return am_sigusr2; + case SIGCHLD: return am_sigchld; + case SIGSTOP: return am_sigstop; + case SIGTSTP: return am_sigtstp; /* ^z */ + default: return am_error; + } +} + +static RETSIGTYPE generic_signal_handler(int signum) +{ + smp_sig_notify(signum); +} + +int erts_set_signal(Eterm signal, Eterm type) { + int signum; + if ((signum = signalterm_to_signum(signal)) > 0) { + if (type == am_ignore) { + sys_signal(signum, SIG_IGN); + } else if (type == am_default) { + sys_signal(signum, SIG_DFL); + } else { + sys_signal(signum, generic_signal_handler); + } + return 1; + } + return 0; } /* Disable break */ void erts_set_ignore_break(void) { - sys_signal(SIGINT, SIG_IGN); - sys_signal(SIGQUIT, SIG_IGN); - sys_signal(SIGTSTP, SIG_IGN); + /* + * Ignore signals that can be sent to the VM by + * typing certain key combinations at the + * controlling terminal... + */ + sys_signal(SIGINT, SIG_IGN); /* Ctrl-C */ + sys_signal(SIGQUIT, SIG_IGN); /* Ctrl-\ */ + sys_signal(SIGTSTP, SIG_IGN); /* Ctrl-Z */ } /* Don't use ctrl-c for break handler but let it be @@ -760,11 +658,11 @@ void erts_replace_intr(void) { if (isatty(0)) { tcgetattr(0, &mode); - + /* here's an example of how to replace ctrl-c with ctrl-u */ /* mode.c_cc[VKILL] = 0; mode.c_cc[VINTR] = CKILL; */ - + mode.c_cc[VINTR] = 0; /* disable ctrl-c */ tcsetattr(0, TCSANOW, &mode); replace_intr = 1; @@ -773,11 +671,11 @@ void erts_replace_intr(void) { void init_break_handler(void) { - sys_signal(SIGINT, request_break); + sys_signal(SIGINT, request_break); #ifndef ETHR_UNUSABLE_SIGUSRX - sys_signal(SIGUSR1, user_signal1); + sys_signal(SIGUSR1, generic_signal_handler); #endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */ - sys_signal(SIGQUIT, do_quit); + sys_signal(SIGQUIT, generic_signal_handler); } void sys_init_suspend_handler(void) @@ -787,9 +685,15 @@ void sys_init_suspend_handler(void) #endif } +void +erts_sys_unix_later_init(void) +{ + sys_signal(SIGTERM, generic_signal_handler); +} + int sys_max_files(void) { - return(max_files); + return max_files; } /************************** OS info *******************************/ @@ -817,10 +721,7 @@ get_number(char **str_ptr) } } -void -os_flavor(char* namebuf, /* Where to return the name. */ - unsigned size) /* Size of name buffer. */ -{ +void os_flavor(char* namebuf, unsigned size) { struct utsname uts; /* Information about the system. */ char* s; @@ -833,63 +734,29 @@ os_flavor(char* namebuf, /* Where to return the name. */ strcpy(namebuf, uts.sysname); } -void -os_version(pMajor, pMinor, pBuild) -int* pMajor; /* Pointer to major version. */ -int* pMinor; /* Pointer to minor version. */ -int* pBuild; /* Pointer to build number. */ -{ +void os_version(int *pMajor, int *pMinor, int *pBuild) { struct utsname uts; /* Information about the system. */ char* release; /* Pointer to the release string: - * X.Y or X.Y.Z. - */ + * X.Y or X.Y.Z. */ (void) uname(&uts); release = uts.release; - *pMajor = get_number(&release); - *pMinor = get_number(&release); - *pBuild = get_number(&release); -} - -void init_getenv_state(GETENV_STATE *state) -{ - erts_smp_rwmtx_rlock(&environ_rwmtx); - *state = NULL; -} - -char *getenv_string(GETENV_STATE *state0) -{ - char **state = (char **) *state0; - char *cp; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); - - if (state == NULL) - state = environ; - - cp = *state++; - *state0 = (GETENV_STATE) state; - - return cp; -} - -void fini_getenv_state(GETENV_STATE *state) -{ - *state = NULL; - erts_smp_rwmtx_runlock(&environ_rwmtx); + *pMajor = get_number(&release); /* Pointer to major version. */ + *pMinor = get_number(&release); /* Pointer to minor version. */ + *pBuild = get_number(&release); /* Pointer to build number. */ } void erts_do_break_handling(void) { struct termios temp_mode; int saved = 0; - + /* * Most functions that do_break() calls are intentionally not thread safe; * therefore, make sure that all threads but this one are blocked before * proceeding! */ - erts_smp_thr_progress_block(); + erts_thr_progress_block(); /* during break we revert to initial settings */ /* this is done differently for oldshell */ @@ -901,14 +768,14 @@ void erts_do_break_handling(void) tcsetattr(0,TCSANOW,&initial_tty_mode); saved = 1; } - + /* call the break handling function, reset the flag */ do_break(); ERTS_UNSET_BREAK_REQUESTED; fflush(stdout); - + /* after break we go back to saved settings */ if (using_oldshell && !replace_intr) { SET_NONBLOCKING(1); @@ -917,9 +784,10 @@ void erts_do_break_handling(void) tcsetattr(0,TCSANOW,&temp_mode); } - erts_smp_thr_progress_unblock(); + erts_thr_progress_unblock(); } + /* Fills in the systems representation of the jam/beam process identifier. ** The Pid is put in STRING representation in the supplied buffer, ** no interpretatione of this should be done by the rest of the @@ -931,122 +799,13 @@ void sys_get_pid(char *buffer, size_t buffer_size){ erts_snprintf(buffer, buffer_size, "%lu",(unsigned long) p); } -int -erts_sys_putenv_raw(char *key, char *value) { - return erts_sys_putenv(key, value); -} -int -erts_sys_putenv(char *key, char *value) -{ - int res; - char *env; - Uint need = strlen(key) + strlen(value) + 2; - -#ifdef HAVE_COPYING_PUTENV - env = erts_alloc(ERTS_ALC_T_TMP, need); -#else - env = erts_alloc(ERTS_ALC_T_PUTENV_STR, need); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, need); -#endif - strcpy(env,key); - strcat(env,"="); - strcat(env,value); - erts_smp_rwmtx_rwlock(&environ_rwmtx); - res = putenv(env); - erts_smp_rwmtx_rwunlock(&environ_rwmtx); -#ifdef HAVE_COPYING_PUTENV - erts_free(ERTS_ALC_T_TMP, env); -#endif - return res; -} -int -erts_sys_getenv__(char *key, char *value, size_t *size) -{ - int res; - char *orig_value = getenv(key); - if (!orig_value) - res = -1; - else { - size_t len = sys_strlen(orig_value); - if (len >= *size) { - *size = len + 1; - res = 1; - } - else { - *size = len; - sys_memcpy((void *) value, (void *) orig_value, len+1); - res = 0; - } - } - return res; -} - -int -erts_sys_getenv_raw(char *key, char *value, size_t *size) { - return erts_sys_getenv(key, value, size); -} - -/* - * erts_sys_getenv - * returns: - * -1, if environment key is not set with a value - * 0, if environment key is set and value fits into buffer size - * 1, if environment key is set but does not fit into buffer size - * size is set with the needed buffer size value - */ - -int -erts_sys_getenv(char *key, char *value, size_t *size) -{ - int res; - erts_smp_rwmtx_rlock(&environ_rwmtx); - res = erts_sys_getenv__(key, value, size); - erts_smp_rwmtx_runlock(&environ_rwmtx); - return res; -} - -int -erts_sys_unsetenv(char *key) -{ - int res; - erts_smp_rwmtx_rwlock(&environ_rwmtx); - res = unsetenv(key); - erts_smp_rwmtx_rwunlock(&environ_rwmtx); - return res; -} - -void -sys_init_io(void) -{ -} - -#if (0) /* unused? */ -static int write_fill(fd, buf, len) -int fd, len; -char *buf; -{ - int i, done = 0; - - do { - if ((i = write(fd, buf+done, len-done)) < 0) { - if (errno != EINTR) - return (i); - i = 0; - } - done += i; - } while (done < len); - return (len); -} -#endif +void sys_init_io(void) { } +void erts_sys_alloc_init(void) { } extern const char pre_loaded_code[]; extern Preload pre_loaded[]; -void erts_sys_alloc_init(void) -{ -} - #if ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC void *erts_sys_aligned_alloc(UWord alignment, UWord size) { @@ -1147,9 +906,7 @@ void sys_preload_end(Preload* p) Here we assume that all schedulers are stopped so that erl_poll does not interfere with the select below. */ -int sys_get_key(fd) -int fd; -{ +int sys_get_key(int fd) { int c, ret; unsigned char rbuf[64]; fd_set fds; @@ -1168,29 +925,18 @@ int fd; if (c <= 0) return c; } - - return rbuf[0]; + return rbuf[0]; } extern int erts_initialized; void erl_assert_error(const char* expr, const char* func, const char* file, int line) -{ +{ fflush(stdout); fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n", file, line, func, expr); fflush(stderr); -#if !defined(ERTS_SMP) && 0 - /* Writing a crashdump from a failed assertion when smp support - * is enabled almost a guaranteed deadlocking, don't even bother. - * - * It could maybe be useful (but I'm not convinced) to write the - * crashdump if smp support is disabled... - */ - if (erts_initialized) - erl_crash_dump(file, line, "Assertion failed: %s\n", expr); -#endif abort(); } @@ -1201,7 +947,7 @@ erl_debug(char* fmt, ...) { char sbuf[1024]; /* Temporary buffer. */ va_list va; - + if (debug_log) { va_start(va, fmt); vsprintf(sbuf, fmt, va); @@ -1212,32 +958,17 @@ erl_debug(char* fmt, ...) #endif /* DEBUG */ -/* - * Called from schedule() when it runs out of runnable processes, - * or when Erlang code has performed INPUT_REDUCTIONS reduction - * steps. runnable == 0 iff there are no runnable Erlang processes. - */ -void -erl_sys_schedule(int runnable) -{ - ERTS_CHK_IO(!runnable); - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); -} - - -#ifdef ERTS_SMP - -static erts_smp_tid_t sig_dispatcher_tid; +static erts_tid_t sig_dispatcher_tid; static void -smp_sig_notify(char c) +smp_sig_notify(int signum) { int res; do { /* write() is async-signal safe (according to posix) */ - res = write(sig_notify_fds[1], &c, 1); + res = write(sig_notify_fds[1], &signum, sizeof(int)); } while (res < 0 && errno == EINTR); - if (res != 1) { + if (res != sizeof(int)) { char msg[] = "smp_sig_notify(): Failed to notify signal-dispatcher thread " "about received signal"; @@ -1253,57 +984,55 @@ signal_dispatcher_thread_func(void *unused) erts_lc_set_thread_name("signal_dispatcher"); #endif while (1) { - char buf[32]; - int res, i; + union {int signum; char buf[4];} sb; + Eterm signal; + int res, i = 0; /* Block on read() waiting for a signal notification to arrive... */ - res = read(sig_notify_fds[0], (void *) &buf[0], 32); + + do { + res = read(sig_notify_fds[0], (void *) &sb.buf[i], sizeof(int) - i); + i += res > 0 ? res : 0; + } while ((i < sizeof(int) && res >= 0) || (res < 0 && errno == EINTR)); + if (res < 0) { - if (errno == EINTR) - continue; erts_exit(ERTS_ABORT_EXIT, "signal-dispatcher thread got unexpected error: %s (%d)\n", erl_errno_id(errno), errno); } - for (i = 0; i < res; i++) { - /* - * NOTE 1: The signal dispatcher thread should not do work - * that takes a substantial amount of time (except - * perhaps in test and debug builds). It needs to - * be responsive, i.e, it should only dispatch work - * to other threads. - * - * NOTE 2: The signal dispatcher thread is not a blockable - * thread (i.e., not a thread managed by the - * erl_thr_progress module). This is intentional. - * We want to be able to interrupt writing of a crash - * dump by hitting C-c twice. Since it isn't a - * blockable thread it is important that it doesn't - * change the state of any data that a blocking thread - * expects to have exclusive access to (unless the - * signal dispatcher itself explicitly is blocking all - * blockable threads). - */ - switch (buf[i]) { - case 0: /* Emulator initialized */ + /* + * NOTE 1: The signal dispatcher thread should not do work + * that takes a substantial amount of time (except + * perhaps in test and debug builds). It needs to + * be responsive, i.e, it should only dispatch work + * to other threads. + * + * NOTE 2: The signal dispatcher thread is not a blockable + * thread (i.e., not a thread managed by the + * erl_thr_progress module). This is intentional. + * We want to be able to interrupt writing of a crash + * dump by hitting C-c twice. Since it isn't a + * blockable thread it is important that it doesn't + * change the state of any data that a blocking thread + * expects to have exclusive access to (unless the + * signal dispatcher itself explicitly is blocking all + * blockable threads). + */ + switch (sb.signum) { + case 0: continue; + case SIGINT: + break_requested(); break; - case 'I': /* SIGINT */ - break_requested(); - break; - case 'Q': /* SIGQUIT */ - quit_requested(); - break; - case '1': /* SIGUSR1 */ - sigusr1_exit(); - break; - default: - erts_exit(ERTS_ABORT_EXIT, - "signal-dispatcher thread received unknown " - "signal notification: '%c'\n", - buf[i]); - } - } - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); + default: + if ((signal = signum_to_signalterm(sb.signum)) == am_error) { + erts_exit(ERTS_ABORT_EXIT, + "signal-dispatcher thread received unknown " + "signal notification: '%d'\n", + sb.signum); + } + signal_notify_requested(signal); + } + ERTS_LC_ASSERT(!erts_thr_progress_is_blocking()); } return NULL; } @@ -1311,7 +1040,7 @@ signal_dispatcher_thread_func(void *unused) static void init_smp_sig_notify(void) { - erts_smp_thr_opts_t thr_opts = ERTS_SMP_THR_OPTS_DEFAULT_INITER; + erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER; thr_opts.detached = 1; thr_opts.name = "sys_sig_dispatcher"; @@ -1323,7 +1052,7 @@ init_smp_sig_notify(void) } /* Start signal handler thread */ - erts_smp_thr_create(&sig_dispatcher_tid, + erts_thr_create(&sig_dispatcher_tid, signal_dispatcher_thread_func, NULL, &thr_opts); @@ -1346,9 +1075,9 @@ init_smp_sig_suspend(void) { int erts_darwin_main_thread_pipe[2]; int erts_darwin_main_thread_result_pipe[2]; -static void initialize_darwin_main_thread_pipes(void) +static void initialize_darwin_main_thread_pipes(void) { - if (pipe(erts_darwin_main_thread_pipe) < 0 || + if (pipe(erts_darwin_main_thread_pipe) < 0 || pipe(erts_darwin_main_thread_result_pipe) < 0) { erts_exit(ERTS_ERROR_EXIT,"Fatal error initializing Darwin main thread stealing"); } @@ -1361,12 +1090,12 @@ erts_sys_main_thread(void) erts_thread_disable_fpe(); #ifdef __DARWIN__ initialize_darwin_main_thread_pipes(); -#endif +#else /* Become signal receiver thread... */ #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_set_thread_name("signal_receiver"); #endif - +#endif smp_sig_notify(0); /* Notify initialized */ /* Wait for a signal to arrive... */ @@ -1416,103 +1145,15 @@ erts_sys_main_thread(void) } } -#endif /* ERTS_SMP */ - -#ifdef ERTS_ENABLE_KERNEL_POLL /* get_value() is currently only used when - kernel-poll is enabled */ - -/* Get arg marks argument as handled by - putting NULL in argv */ -static char * -get_value(char* rest, char** argv, int* ip) -{ - char *param = argv[*ip]+1; - argv[*ip] = NULL; - if (*rest == '\0') { - char *next = argv[*ip + 1]; - if (next[0] == '-' - && next[1] == '-' - && next[2] == '\0') { - erts_fprintf(stderr, "bad \"%s\" value: \n", param); - erts_usage(); - } - (*ip)++; - argv[*ip] = NULL; - return next; - } - return rest; -} - -#endif /* ERTS_ENABLE_KERNEL_POLL */ - void erl_sys_args(int* argc, char** argv) { - int i, j; - - erts_smp_rwmtx_init(&environ_rwmtx, "environ"); - - i = 1; - ASSERT(argc && argv); - while (i < *argc) { - if(argv[i][0] == '-') { - switch (argv[i][1]) { -#ifdef ERTS_ENABLE_KERNEL_POLL - case 'K': { - char *arg = get_value(argv[i] + 2, argv, &i); - if (strcmp("true", arg) == 0) { - erts_use_kernel_poll = 1; - } - else if (strcmp("false", arg) == 0) { - erts_use_kernel_poll = 0; - } - else { - erts_fprintf(stderr, "bad \"K\" value: %s\n", arg); - erts_usage(); - } - break; - } -#endif - case '-': - goto done_parsing; - default: - break; - } - } - i++; - } - - done_parsing: - -#ifdef ERTS_ENABLE_KERNEL_POLL - if (erts_use_kernel_poll) { - char no_kp[10]; - size_t no_kp_sz = sizeof(no_kp); - int res = erts_sys_getenv_raw("ERL_NO_KERNEL_POLL", no_kp, &no_kp_sz); - if (res > 0 - || (res == 0 - && sys_strcmp("false", no_kp) != 0 - && sys_strcmp("FALSE", no_kp) != 0)) { - erts_use_kernel_poll = 0; - } - } -#endif - - init_check_io(); + max_files = erts_check_io_max_files(); -#ifdef ERTS_SMP init_smp_sig_notify(); init_smp_sig_suspend(); -#endif - - /* Handled arguments have been marked with NULL. Slide arguments - not handled towards the beginning of argv. */ - for (i = 0, j = 0; i < *argc; i++) { - if (argv[i]) - argv[j++] = argv[i]; - } - *argc = j; + erts_sys_env_init(); } diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c index 400f163652..816bdea9c5 100644 --- a/erts/emulator/sys/unix/sys_drivers.c +++ b/erts/emulator/sys/unix/sys_drivers.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2016. All Rights Reserved. + * Copyright Ericsson AB 1996-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -50,17 +50,15 @@ #include <sys/ioctl.h> #endif +#include <sys/types.h> +#include <sys/socket.h> + #define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */ #include "sys.h" -#ifdef USE_THREADS #include "erl_threads.h" -#endif - -extern char **environ; -extern erts_smp_rwmtx_t environ_rwmtx; -extern erts_smp_atomic_t sys_misc_mem_sz; +extern erts_atomic_t sys_misc_mem_sz; static Eterm forker_port; @@ -86,12 +84,6 @@ static Eterm forker_port; #define MAXIOV 16 #endif -#ifdef USE_THREADS -# define FDBLOCK 1 -#else -# define FDBLOCK 0 -#endif - /* Used by the fd driver iff the fd could not be set to non-blocking */ typedef struct ErtsSysBlocking_ { ErlDrvPDL pdl; @@ -178,9 +170,7 @@ void erl_sys_late_init(void) { SysDriverOpts opts; -#ifdef ERTS_SMP Port *port; -#endif sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */ @@ -190,20 +180,17 @@ erl_sys_late_init(void) opts.read_write = 0; opts.hide_window = 0; opts.wd = NULL; - opts.envir = NULL; + erts_osenv_init(&opts.envir); opts.exit_status = 0; opts.overlapped_io = 0; opts.spawn_type = ERTS_SPAWN_ANY; opts.argv = NULL; opts.parallelism = erts_port_parallelism; -#ifdef ERTS_SMP port = -#endif erts_open_driver(&forker_driver, make_internal_pid(0), "forker", &opts, NULL, NULL); -#ifdef ERTS_SMP erts_mtx_unlock(port->lock); -#endif + erts_sys_unix_later_init(); /* Need to be called after forker has been started */ } /* II. Prototypes */ @@ -219,10 +206,8 @@ static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*); /* II.III FD prototypes */ static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); -#if FDBLOCK static void fd_async(void *); static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data); -#endif static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, char **, ErlDrvSizeT); static void fd_stop(ErlDrvData); @@ -286,11 +271,7 @@ struct erl_drv_entry fd_driver_entry = { fd_control, NULL, outputv, -#if FDBLOCK fd_ready_async, /* ready_async */ -#else - NULL, -#endif fd_flush, /* flush */ NULL, /* call */ NULL, /* event */ @@ -362,7 +343,7 @@ static int set_blocking_data(ErtsSysDriverData *dd) { dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); + erts_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking)); dd->blocking->pdl = driver_pdl_create(dd->port_num); dd->blocking->res = 0; @@ -405,7 +386,7 @@ create_driver_data(ErlDrvPort port_num, size += sizeof(ErtsSysFdData); data = erts_alloc(ERTS_ALC_T_DRV_TAB,size); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, size); + erts_atomic_add_nob(&sys_misc_mem_sz, size); driver_data = (ErtsSysDriverData*)data; data += sizeof(*driver_data); @@ -440,7 +421,7 @@ create_driver_data(ErlDrvPort port_num, data += sizeof(*driver_data->ofd); init_fd_data(driver_data->ofd, ofd); } - if (is_blocking && FDBLOCK) + if (is_blocking) if (!set_blocking_data(driver_data)) { erts_free(ERTS_ALC_T_DRV_TAB, driver_data); return NULL; @@ -462,85 +443,55 @@ static void close_pipes(int ifd[2], int ofd[2]) close(ofd[1]); } -static char **build_unix_environment(char *block) +struct __add_spawn_env_state { + struct iovec *iov; + int *iov_index; + + Sint32 *payload_size; + char *env_block; +}; + +static void add_spawn_env_block_foreach(void *_state, + const erts_osenv_data_t *key, + const erts_osenv_data_t *value) { - int i; - int j; - int len; - char *cp; - char **cpp; - char** old_env; - - ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); - - cp = block; - len = 0; - while (*cp != '\0') { - cp += strlen(cp) + 1; - len++; - } - old_env = environ; - while (*old_env++ != NULL) { - len++; - } - - cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT, - sizeof(char *) * (len+1)); - if (cpp == NULL) { - return NULL; - } + struct __add_spawn_env_state *state; + struct iovec *iov; - cp = block; - len = 0; - while (*cp != '\0') { - cpp[len] = cp; - cp += strlen(cp) + 1; - len++; - } - - i = len; - for (old_env = environ; *old_env; old_env++) { - char* old = *old_env; - - for (j = 0; j < len; j++) { - char *s, *t; - - /* check if cpp[j] equals old - before the = sign, - i.e. - "TMPDIR=/tmp/" */ - s = cpp[j]; - t = old; - while (*s == *t && *s != '=') { - s++, t++; - } - if (*s == '=' && *t == '=') { - break; - } - } + state = (struct __add_spawn_env_state*)(_state); + iov = &state->iov[*state->iov_index]; - if (j == len) { /* New version not found */ - cpp[len++] = old; - } - } + iov->iov_base = state->env_block; - for (j = 0; j < i; ) { - size_t last = strlen(cpp[j])-1; - if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) { - cpp[j] = cpp[--len]; - if (len < i) { - i--; - } else { - j++; - } - } - else { - j++; - } - } + sys_memcpy(state->env_block, key->data, key->length); + state->env_block += key->length; + *state->env_block++ = '='; + sys_memcpy(state->env_block, value->data, value->length); + state->env_block += value->length; + *state->env_block++ = '\0'; + + iov->iov_len = state->env_block - (char*)iov->iov_base; - cpp[len] = NULL; - return cpp; + (*state->payload_size) += iov->iov_len; + (*state->iov_index)++; +} + +static void *add_spawn_env_block(const erts_osenv_t *env, struct iovec *iov, + int *iov_index, Sint32 *payload_size) { + struct __add_spawn_env_state add_state; + char *env_block; + + env_block = erts_alloc(ERTS_ALC_T_TMP, env->content_size + + env->variable_count * sizeof("=\0")); + + add_state.iov = iov; + add_state.iov_index = iov_index; + add_state.env_block = env_block; + add_state.payload_size = payload_size; + + erts_osenv_foreach_native(env, &add_state, add_spawn_env_block_foreach); + + return env_block; } static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, @@ -550,7 +501,6 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, #define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1) int len; - char **new_environ; ErtsSysDriverData *dd; char *cmd_line; char wd_buff[MAXPATHLEN+1]; @@ -617,19 +567,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len); cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0'; len = CMD_LINE_PREFIX_STR_SZ + len + 1; - } - - erts_smp_rwmtx_rlock(&environ_rwmtx); - - if (opts->envir == NULL) { - new_environ = environ; - } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) { - erts_smp_rwmtx_runlock(&environ_rwmtx); - close_pipes(ifd, ofd); - erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - errno = ENOMEM; - return ERL_DRV_ERROR_ERRNO; - } +} if ((cwd = getcwd(wd_buff, MAXPATHLEN+1)) == NULL) { /* on some OSs this call opens a fd in the @@ -638,9 +576,6 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, int err = errno; close_pipes(ifd, ofd); erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); - erts_smp_rwmtx_runlock(&environ_rwmtx); errno = err; return ERL_DRV_ERROR_ERRNO; } @@ -648,6 +583,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, wd = opts->wd; { + void *environment_block; struct iovec *io_vector; int iov_len = 5; char nullbuff[] = "\0"; @@ -660,10 +596,8 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, if (wd) iov_len++; - /* count number of elements in environment */ - while(new_environ[env_len] != NULL) - env_len++; - iov_len += 1 + env_len; /* num envs including size int */ + /* num envs including size int */ + iov_len += 1 + opts->envir.variable_count; /* count number of element in argument list */ if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { @@ -680,10 +614,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, if (!io_vector) { close_pipes(ifd, ofd); - erts_smp_rwmtx_runlock(&environ_rwmtx); erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); errno = ENOMEM; return ERL_DRV_ERROR_ERRNO; } @@ -718,16 +649,13 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, io_vector[i++].iov_len = 1; buffsz += io_vector[i-1].iov_len; + env_len = htonl(opts->envir.variable_count); io_vector[i].iov_base = (void*)&env_len; - env_len = htonl(env_len); io_vector[i++].iov_len = sizeof(env_len); buffsz += io_vector[i-1].iov_len; - for (j = 0; new_environ[j] != NULL; j++) { - io_vector[i].iov_base = new_environ[j]; - io_vector[i++].iov_len = strlen(new_environ[j]) + 1; - buffsz += io_vector[i-1].iov_len; - } + environment_block = add_spawn_env_block(&opts->envir, io_vector, &i, + &buffsz); /* only append arguments if this was a spawn_executable */ if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) { @@ -757,15 +685,12 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, /* we send the request to do the fork */ if ((res = writev(ofd[1], io_vector, iov_len > MAXIOV ? MAXIOV : iov_len)) < 0) { - if (errno == ERRNO_BLOCK) { + if (errno == ERRNO_BLOCK || errno == EINTR) { res = 0; } else { int err = errno; close_pipes(ifd, ofd); erts_free(ERTS_ALC_T_TMP, io_vector); - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); - erts_smp_rwmtx_runlock(&environ_rwmtx); erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); errno = err; return ERL_DRV_ERROR_ERRNO; @@ -786,16 +711,12 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, driver_select(port_num, ofd[1], ERL_DRV_WRITE|ERL_DRV_USE, 1); } + erts_free(ERTS_ALC_T_TMP, environment_block); erts_free(ERTS_ALC_T_TMP, io_vector); } erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); - if (new_environ != environ) - erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ); - - erts_smp_rwmtx_runlock(&environ_rwmtx); - dd = create_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes, DO_WRITE | DO_READ, opts->exit_status, 0, 0); @@ -1067,8 +988,8 @@ static void clear_fd_data(ErtsSysFdData *fdd) { if (fdd->sz > 0) { erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fdd->buf); - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz); + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz); + erts_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz); } fdd->buf = NULL; fdd->sz = 0; @@ -1079,7 +1000,7 @@ static void clear_fd_data(ErtsSysFdData *fdd) static void nbio_stop_fd(ErlDrvPort prt, ErtsSysFdData *fdd) { - driver_select(prt, abs(fdd->fd), DO_READ|DO_WRITE, 0); + driver_select(prt, abs(fdd->fd), ERL_DRV_USE_NO_CALLBACK|DO_READ|DO_WRITE, 0); clear_fd_data(fdd); SET_BLOCKING(abs(fdd->fd)); @@ -1091,13 +1012,11 @@ static void fd_stop(ErlDrvData ev) /* Does not close the fds */ ErlDrvPort prt = dd->port_num; int sz = sizeof(ErtsSysDriverData); -#if FDBLOCK if (dd->blocking) { erts_free(ERTS_ALC_T_SYS_BLOCKING, dd->blocking); dd->blocking = NULL; sz += sizeof(ErtsSysBlocking); } -#endif if (dd->ifd) { sz += sizeof(ErtsSysFdData); @@ -1109,7 +1028,7 @@ static void fd_stop(ErlDrvData ev) /* Does not close the fds */ } erts_free(ERTS_ALC_T_DRV_TAB, dd); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sz); + erts_atomic_add_nob(&sys_misc_mem_sz, -sz); } static void fd_flush(ErlDrvData ev) @@ -1190,19 +1109,19 @@ static void outputv(ErlDrvData e, ErlIOVec* ev) ev->iov[0].iov_len = pb; ev->size += pb; - if (dd->blocking && FDBLOCK) + if (dd->blocking) driver_pdl_lock(dd->blocking->pdl); if ((sz = driver_sizeq(ix)) > 0) { driver_enqv(ix, ev, 0); - if (dd->blocking && FDBLOCK) + if (dd->blocking) driver_pdl_unlock(dd->blocking->pdl); if (sz + ev->size >= (1 << 13)) set_busy_port(ix, 1); } - else if (!dd->blocking || !FDBLOCK) { + else if (!dd->blocking) { /* We try to write directly if the fd in non-blocking */ int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize; @@ -1219,7 +1138,6 @@ static void outputv(ErlDrvData e, ErlIOVec* ev) driver_enqv(ix, ev, n); /* n is the skip value */ driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1); } -#if FDBLOCK else { if (ev->size != 0) { driver_enqv(ix, ev, 0); @@ -1230,7 +1148,6 @@ static void outputv(ErlDrvData e, ErlIOVec* ev) driver_pdl_unlock(dd->blocking->pdl); } } -#endif /* return 0;*/ } @@ -1302,7 +1219,7 @@ static int port_inp_failure(ErtsSysDriverData *dd, int res) clear_fd_data(dd->ifd); } - if (dd->blocking && FDBLOCK) { + if (dd->blocking) { driver_pdl_lock(dd->blocking->pdl); if (driver_sizeq(dd->port_num) > 0) { driver_pdl_unlock(dd->blocking->pdl); @@ -1340,6 +1257,8 @@ static int port_inp_failure(ErtsSysDriverData *dd, int res) } driver_failure_eof(dd->port_num); } else if (dd->ifd) { + if (dd->alive == -1) + errno = dd->status; erl_drv_init_ack(dd->port_num, ERL_DRV_ERROR_ERRNO); } else { driver_failure_posix(dd->port_num, err); @@ -1370,10 +1289,10 @@ static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) int res; if((res = read(ready_fd, &proto, sizeof(proto))) <= 0) { + if (res < 0 && (errno == ERRNO_BLOCK || errno == EINTR)) + return; /* hmm, child setup seems to have closed the pipe too early... we close the port as there is not much else we can do */ - if (res < 0 && errno == ERRNO_BLOCK) - return; driver_select(port_num, ready_fd, ERL_DRV_READ, 0); if (res == 0) errno = EPIPE; @@ -1407,7 +1326,7 @@ static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) if (dd->ifd->fd < 0) { driver_select(port_num, abs(dd->ifd->fd), ERL_DRV_READ|ERL_DRV_USE, 0); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); + erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); dd->ifd = NULL; } @@ -1507,13 +1426,13 @@ static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd) continue; } else { /* The last message we got was split */ - char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); + char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h); if (!buf) { errno = ENOMEM; port_inp_failure(dd, -1); } else { - erts_smp_atomic_add_nob(&sys_misc_mem_sz, h); + erts_atomic_add_nob(&sys_misc_mem_sz, h); sys_memcpy(buf, cpos, bytes_left); dd->ifd->buf = buf; dd->ifd->sz = h; @@ -1548,7 +1467,7 @@ static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd) should close the output fd as soon as the command has been sent. */ driver_select(ix, ready_fd, ERL_DRV_WRITE|ERL_DRV_USE, 0); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); + erts_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData)); dd->ofd = NULL; } if (dd->terminating) @@ -1578,7 +1497,6 @@ static void stop_select(ErlDrvEvent fd, void* _) close((int)fd); } -#if FDBLOCK static void fd_async(void *async_data) @@ -1657,7 +1575,6 @@ void fd_ready_async(ErlDrvData drv_data, return; /* 0; */ } -#endif /* Forker driver */ @@ -1677,15 +1594,13 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, forker_port = erts_drvport2id(port_num); - res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz); - if (res != 0) { - if (res < 0) - erts_exit(1, - "Environment variable BINDIR is not set\n"); - if (res > 0) - erts_exit(1, - "Value of environment variable BINDIR is too large\n"); + res = erts_sys_explicit_8bit_getenv("BINDIR", bindir, &bindirsz); + if (res == 0) { + erts_exit(1, "Environment variable BINDIR is not set\n"); + } else if(res < 0) { + erts_exit(1, "Value of environment variable BINDIR is too large\n"); } + if (bindir[0] != DIR_SEPARATOR_CHAR) erts_exit(1, "Environment variable BINDIR does not contain an" @@ -1748,8 +1663,6 @@ static ErlDrvData forker_start(ErlDrvPort port_num, char* name, SET_NONBLOCKING(forker_fd); - driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1); - return (ErlDrvData)port_num; } @@ -1759,15 +1672,37 @@ static void forker_stop(ErlDrvData e) the port has been closed by the user. */ } +static ErlDrvSizeT forker_deq(ErlDrvPort port_num, ErtsSysForkerProto *proto) +{ + close(proto->u.start.fds[0]); + close(proto->u.start.fds[1]); + if (proto->u.start.fds[1] != proto->u.start.fds[2]) + close(proto->u.start.fds[2]); + + return driver_deq(port_num, sizeof(*proto)); +} + +static void forker_sigchld(Eterm port_id, int error) +{ + ErtsSysForkerProto *proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto)); + proto->action = ErtsSysForkerProtoAction_SigChld; + proto->u.sigchld.error_number = error; + proto->u.sigchld.port_id = port_id; + + /* ideally this would be a port_command call, but as command is + already used by the spawn_driver, we use control instead. + Note that when using erl_drv_port_control it is an asynchronous + control. */ + erl_drv_port_control(port_id, 'S', (char*)proto, sizeof(*proto)); +} + static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd) { int res; - ErtsSysForkerProto *proto; - - proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto)); + ErtsSysForkerProto proto; - if ((res = read(fd, proto, sizeof(*proto))) < 0) { - if (errno == ERRNO_BLOCK) + if ((res = read(fd, &proto, sizeof(proto))) < 0) { + if (errno == ERRNO_BLOCK || errno == EINTR) return; erts_exit(ERTS_DUMP_EXIT, "Failed to read from erl_child_setup: %d\n", errno); } @@ -1775,10 +1710,10 @@ static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd) if (res == 0) erts_exit(ERTS_DUMP_EXIT, "erl_child_setup closed\n"); - ASSERT(res == sizeof(*proto)); + ASSERT(res == sizeof(proto)); #ifdef FORKER_PROTO_START_ACK - if (proto->action == ErtsSysForkerProtoAction_StartAck) { + if (proto.action == ErtsSysForkerProtoAction_StartAck) { /* Ideally we would like to not have to ack each Start command being sent over the uds, but it would seem that some operating systems (only observed on FreeBSD) @@ -1788,28 +1723,15 @@ static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd) ErlDrvPort port_num = (ErlDrvPort)e; int vlen; SysIOVec *iov = driver_peekq(port_num, &vlen); - ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base; - - close(proto->u.start.fds[0]); - close(proto->u.start.fds[1]); - if (proto->u.start.fds[1] != proto->u.start.fds[2]) - close(proto->u.start.fds[2]); + ErtsSysForkerProto *qproto = (ErtsSysForkerProto *)iov[0].iov_base; - driver_deq(port_num, sizeof(*proto)); - - if (driver_sizeq(port_num) > 0) + if (forker_deq(port_num, qproto)) driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1); } else #endif { - ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld); - - /* ideally this would be a port_command call, but as command is - already used by the spawn_driver, we use control instead. - Note that when using erl_drv_port_control it is an asynchronous - control. */ - erl_drv_port_control(proto->u.sigchld.port_id, 'S', - (char*)proto, sizeof(*proto)); + ASSERT(proto.action == ErtsSysForkerProtoAction_SigChld); + forker_sigchld(proto.u.sigchld.port_id, proto.u.sigchld.error_number); } } @@ -1819,7 +1741,8 @@ static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd) ErlDrvPort port_num = (ErlDrvPort)e; #ifndef FORKER_PROTO_START_ACK - while (driver_sizeq(port_num) > 0) { + int loops = 10; + while (driver_sizeq(port_num) > 0 && --loops) { #endif int vlen; SysIOVec *iov = driver_peekq(port_num, &vlen); @@ -1827,29 +1750,42 @@ static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd) ASSERT(iov[0].iov_len >= (sizeof(*proto))); if (sys_uds_write(forker_fd, (char*)proto, sizeof(*proto), proto->u.start.fds, 3, 0) < 0) { - if (errno == ERRNO_BLOCK) + if (errno == ERRNO_BLOCK || errno == EINTR) { return; - erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); + } else if (errno == EMFILE) { + forker_sigchld(proto->u.start.port_id, errno); + if (forker_deq(port_num, proto) == 0) + driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0); + return; + } else { + erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); + } } #ifndef FORKER_PROTO_START_ACK - close(proto->u.start.fds[0]); - close(proto->u.start.fds[1]); - if (proto->u.start.fds[1] != proto->u.start.fds[2]) - close(proto->u.start.fds[2]); - driver_deq(port_num, sizeof(*proto)); + if (forker_deq(port_num, proto) == 0) + driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0); } -#endif - +#else driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0); +#endif } static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf, ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen) { + static int first_call = 1; ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf; ErlDrvPort port_num = (ErlDrvPort)e; int res; + if (first_call) { + /* + * Do driver_select here when schedulers and their pollsets have started. + */ + driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1); + first_call = 0; + } + driver_enq(port_num, buf, len); if (driver_sizeq(port_num) > sizeof(*proto)) { return 0; @@ -1857,20 +1793,21 @@ static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf, if ((res = sys_uds_write(forker_fd, (char*)proto, sizeof(*proto), proto->u.start.fds, 3, 0)) < 0) { - if (errno == ERRNO_BLOCK) { + if (errno == ERRNO_BLOCK || errno == EINTR) { driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1); return 0; + } else if (errno == EMFILE) { + forker_sigchld(proto->u.start.port_id, errno); + forker_deq(port_num, proto); + return 0; + } else { + erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); } - erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno); } #ifndef FORKER_PROTO_START_ACK ASSERT(res == sizeof(*proto)); - close(proto->u.start.fds[0]); - close(proto->u.start.fds[1]); - if (proto->u.start.fds[1] != proto->u.start.fds[2]) - close(proto->u.start.fds[2]); - driver_deq(port_num, sizeof(*proto)); + forker_deq(port_num, proto); #endif return 0; diff --git a/erts/emulator/sys/unix/sys_env.c b/erts/emulator/sys/unix/sys_env.c new file mode 100644 index 0000000000..4d8301f985 --- /dev/null +++ b/erts/emulator/sys/unix/sys_env.c @@ -0,0 +1,133 @@ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_osenv.h" +#include "erl_alloc.h" + +#include "erl_thr_progress.h" + +static erts_osenv_t sysenv_global_env; +static erts_rwmtx_t sysenv_rwmtx; + +extern char **environ; + +static void import_initial_env(void); + +void erts_sys_env_init() { + erts_rwmtx_init(&sysenv_rwmtx, "environ", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + erts_osenv_init(&sysenv_global_env); + import_initial_env(); +} + +const erts_osenv_t *erts_sys_rlock_global_osenv() { + erts_rwmtx_rlock(&sysenv_rwmtx); + return &sysenv_global_env; +} + +erts_osenv_t *erts_sys_rwlock_global_osenv() { + erts_rwmtx_rwlock(&sysenv_rwmtx); + return &sysenv_global_env; +} + +void erts_sys_rwunlock_global_osenv() { + erts_rwmtx_rwunlock(&sysenv_rwmtx); +} + +void erts_sys_runlock_global_osenv() { + erts_rwmtx_runlock(&sysenv_rwmtx); +} + +int erts_sys_explicit_8bit_putenv(char *key, char *value) { + erts_osenv_data_t env_key, env_value; + int result; + + env_key.length = sys_strlen(key); + env_key.data = key; + + env_value.length = sys_strlen(value); + env_value.data = value; + + { + erts_osenv_t *env = erts_sys_rwlock_global_osenv(); + result = erts_osenv_put_native(env, &env_key, &env_value); + erts_sys_rwunlock_global_osenv(); + } + + return result; +} + +int erts_sys_explicit_8bit_getenv(char *key, char *value, size_t *size) { + erts_osenv_data_t env_key, env_value; + int result; + + env_key.length = sys_strlen(key); + env_key.data = key; + + /* Reserve space for NUL termination. */ + env_value.length = *size - 1; + env_value.data = value; + + { + const erts_osenv_t *env = erts_sys_rlock_global_osenv(); + result = erts_osenv_get_native(env, &env_key, &env_value); + erts_sys_runlock_global_osenv(); + } + + if(result == 1) { + value[env_value.length] = '\0'; + } + + *size = env_value.length; + + return result; +} + +int erts_sys_explicit_host_getenv(char *key, char *value, size_t *size) { + char *orig_value; + size_t length; + + orig_value = getenv(key); + + if(orig_value == NULL) { + return 0; + } + + length = sys_strlen(orig_value); + + if (length >= *size) { + *size = length + 1; + return -1; + } + + sys_memcpy((void*)value, (void*)orig_value, length + 1); + *size = length; + + return 1; +} + +static void import_initial_env(void) { + char **environ_iterator, *environ_variable; + + environ_iterator = environ; + + while ((environ_variable = *(environ_iterator++)) != NULL) { + char *separator_index = strchr(environ_variable, '='); + + if (separator_index != NULL) { + erts_osenv_data_t env_key, env_value; + + env_key.length = separator_index - environ_variable; + env_key.data = environ_variable; + + env_value.length = sys_strlen(separator_index) - 1; + env_value.data = separator_index + 1; + + erts_osenv_put_native(&sysenv_global_env, &env_key, &env_value); + } + } +} diff --git a/erts/emulator/sys/unix/sys_float.c b/erts/emulator/sys/unix/sys_float.c index 6435da086f..832074f679 100644 --- a/erts/emulator/sys/unix/sys_float.c +++ b/erts/emulator/sys/unix/sys_float.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2001-2016. All Rights Reserved. + * Copyright Ericsson AB 2001-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,6 @@ erts_sys_init_float(void) #else /* !NO_FPE_SIGNALS */ -#ifdef ERTS_SMP static erts_tsd_key_t fpe_key; /* once-only initialisation early in the main thread (via erts_sys_init_float()) */ @@ -61,11 +60,6 @@ static ERTS_INLINE volatile unsigned long *erts_thread_get_fp_exception(void) { return (volatile unsigned long*)erts_tsd_get(fpe_key); } -#else /* !SMP */ -#define erts_init_fp_exception() /*empty*/ -static volatile unsigned long fp_exception; -#define erts_thread_get_fp_exception() (&fp_exception) -#endif /* SMP */ volatile unsigned long *erts_get_current_fp_exception(void) { @@ -659,11 +653,9 @@ void erts_sys_init_float(void) void erts_thread_init_float(void) { -#ifdef ERTS_SMP /* This allows Erlang schedulers to leave Erlang-process context and still have working FP exceptions. XXX: is this needed? */ erts_thread_init_fp_exception(); -#endif #ifndef NO_FPE_SIGNALS /* NOTE: diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c index 4f26639703..8ba575b7b6 100644 --- a/erts/emulator/sys/unix/sys_time.c +++ b/erts/emulator/sys/unix/sys_time.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2005-2016. All Rights Reserved. + * Copyright Ericsson AB 2005-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -160,7 +160,7 @@ struct sys_time_internal_state_read_mostly__ { #ifdef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__ struct sys_time_internal_state_write_freq__ { - erts_smp_mtx_t mtx; + erts_mtx_t mtx; #if defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME) ErtsMonotonicTime last_delivered; #endif @@ -304,8 +304,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp) erts_sys_time_data__.r.o.os_times = clock_gettime_times_verified; #endif - erts_smp_mtx_init(&internal_state.w.f.mtx, - "os_monotonic_time"); + erts_mtx_init(&internal_state.w.f.mtx, "os_monotonic_time", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); internal_state.w.f.last_delivered = clock_gettime_monotonic(); init_resp->os_monotonic_time_info.locked_use = 1; @@ -525,12 +525,12 @@ static ErtsMonotonicTime clock_gettime_monotonic_verified(void) mtime = (ErtsMonotonicTime) posix_clock_gettime(MONOTONIC_CLOCK_ID, MONOTONIC_CLOCK_ID_STR); - erts_smp_mtx_lock(&internal_state.w.f.mtx); + erts_mtx_lock(&internal_state.w.f.mtx); if (mtime < internal_state.w.f.last_delivered) mtime = internal_state.w.f.last_delivered; else internal_state.w.f.last_delivered = mtime; - erts_smp_mtx_unlock(&internal_state.w.f.mtx); + erts_mtx_unlock(&internal_state.w.f.mtx); return mtime; } @@ -547,12 +547,12 @@ static void clock_gettime_times_verified(ErtsMonotonicTime *mtimep, WALL_CLOCK_ID_STR, stimep); - erts_smp_mtx_lock(&internal_state.w.f.mtx); + erts_mtx_lock(&internal_state.w.f.mtx); if (*mtimep < internal_state.w.f.last_delivered) *mtimep = internal_state.w.f.last_delivered; else internal_state.w.f.last_delivered = *mtimep; - erts_smp_mtx_unlock(&internal_state.w.f.mtx); + erts_mtx_unlock(&internal_state.w.f.mtx); } #endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */ @@ -878,8 +878,6 @@ ErtsMonotonicTime erts_os_monotonic_time(void) { Uint32 ticks = get_tick_count(); - ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, - ticks); return ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, ticks) << internal_state.r.o.times_shift; } diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c index dd0a3b03ff..c9f73622ba 100644 --- a/erts/emulator/sys/unix/sys_uds.c +++ b/erts/emulator/sys/unix/sys_uds.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,42 @@ * %CopyrightEnd% */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#if defined(__sun__) && !defined(_XOPEN_SOURCE) +#define _XOPEN_SOURCE 500 +#endif + +#include <limits.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#ifdef HAVE_SYS_SOCKETIO_H +# include <sys/socketio.h> +#endif +#ifdef HAVE_SYS_SOCKIO_H +# include <sys/sockio.h> +#endif + +#ifdef HAVE_NET_ERRNO_H +#include <net/errno.h> +#endif + +#ifdef HAVE_DIRENT_H +# include <dirent.h> +#endif + +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + +#include <stdlib.h> +#include <string.h> +#include <errno.h> + #include "sys_uds.h" int @@ -52,8 +88,9 @@ sys_uds_readv(int fd, struct iovec *iov, size_t iov_len, if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC) { /* We assume that we have given enough space for any header - that are sent to us. So the only remaining reason to get - this flag set is if the caller has run out of file descriptors. + that are sent to us. So the only remaining reasons to get + this flag set is if the caller has run out of file descriptors + or an SELinux policy prunes the response (eg. O_APPEND on STDERR). */ errno = EMFILE; return -1; @@ -96,7 +133,7 @@ sys_uds_writev(int fd, struct iovec *iov, size_t iov_len, struct msghdr msg; struct cmsghdr *cmsg = NULL; - int res, i; + int res, i, error; /* initialize socket message */ memset(&msg, 0, sizeof(struct msghdr)); @@ -137,11 +174,22 @@ sys_uds_writev(int fd, struct iovec *iov, size_t iov_len, res = sendmsg(fd, &msg, flags); +#ifdef ETOOMANYREFS + /* Linux may give ETOOMANYREFS when there are too many fds in transit. + We map this to EMFILE as bsd and other use this error code and we want + the behaviour to be the same on all OSs */ + if (errno == ETOOMANYREFS) + errno = EMFILE; +#endif + error = errno; + if (iov_len > MAXIOV) free(iov[0].iov_base); free(msg.msg_control); + errno = error; + return res; } diff --git a/erts/emulator/sys/unix/sys_uds.h b/erts/emulator/sys/unix/sys_uds.h index a598102d5c..49a4b39250 100644 --- a/erts/emulator/sys/unix/sys_uds.h +++ b/erts/emulator/sys/unix/sys_uds.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2016. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,18 +21,6 @@ #ifndef _ERL_UNIX_UDS_H #define _ERL_UNIX_UDS_H -#ifdef HAVE_CONFIG_H -# include "config.h" -#endif - -#if defined(__sun__) && !defined(_XOPEN_SOURCE) -#define _XOPEN_SOURCE 500 -#endif - -#include <limits.h> - -#include <sys/types.h> -#include <sys/socket.h> #include <sys/uio.h> #if defined IOV_MAX @@ -43,8 +31,6 @@ #define MAXIOV 16 #endif -#include "sys.h" - int sys_uds_readv(int fd, struct iovec *iov, size_t iov_len, int *fds, int fd_count, int flags); int sys_uds_read(int fd, char *buff, size_t len, diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index f23c7ab03d..3843a27a6e 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2007-2016. All Rights Reserved. + * Copyright Ericsson AB 2007-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ */ /*#define HARDDEBUG */ +/*#define HARDTRACE */ #ifdef HARDDEBUG #ifdef HARDTRACE #define HARDTRACEF(X) my_debug_printf##X @@ -50,7 +51,7 @@ static void my_debug_printf(char *fmt, ...) va_start(args, fmt); erts_vsnprintf(buffer,1024,fmt,args); va_end(args); - erts_fprintf(stderr,"%s\r\n",buffer); + erts_printf("%s\r\n",buffer); } #else #define HARDTRACEF(X) @@ -142,7 +143,8 @@ static erts_mtx_t save_ops_mtx; static void poll_debug_init(void) { - erts_mtx_init(&save_ops_mtx, "save_ops_lock"); + erts_mtx_init(&save_ops_mtx, "save_ops_lock", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG); } void poll_debug_set_active_fd(ErtsSysFdType fd) @@ -273,53 +275,35 @@ typedef struct _Waiter { /* * The structure for a pollset. There can currently be only one... */ -struct ErtsPollSet_ { +struct erts_pollset { Waiter** waiter; int allocated_waiters; /* Size ow waiter array */ int num_waiters; /* Number of waiter threads. */ - int restore_events; /* Tells us to restore waiters events - next time around */ HANDLE event_io_ready; /* To be used when waiting for io */ /* These are used to wait for workers to enter standby */ volatile int standby_wait_counter; /* Number of threads to wait for */ CRITICAL_SECTION standby_crit; /* CS to guard the counter */ - HANDLE standby_wait_event; /* Event signalled when counte == 0 */ + HANDLE standby_wait_event; /* Event signalled when counter == 0 */ erts_atomic32_t wakeup_state; -#ifdef ERTS_SMP - erts_smp_mtx_t mtx; -#endif - erts_atomic64_t timeout_time; + erts_mtx_t mtx; }; -#ifdef ERTS_SMP #define ERTS_POLLSET_LOCK(PS) \ - erts_smp_mtx_lock(&(PS)->mtx) + erts_mtx_lock(&(PS)->mtx) #define ERTS_POLLSET_UNLOCK(PS) \ - erts_smp_mtx_unlock(&(PS)->mtx) - -#else + erts_mtx_unlock(&(PS)->mtx) -#define ERTS_POLLSET_LOCK(PS) -#define ERTS_POLLSET_UNLOCK(PS) - -#endif /* * Communication with sys_interrupt */ -#ifdef ERTS_SMP -extern erts_smp_atomic32_t erts_break_requested; +extern erts_atomic32_t erts_break_requested; #define ERTS_SET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) #define ERTS_UNSET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) -#else -extern volatile int erts_break_requested; -#define ERTS_SET_BREAK_REQUESTED (erts_break_requested = 1) -#define ERTS_UNSET_BREAK_REQUESTED (erts_break_requested = 0) -#endif + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) static erts_mtx_t break_waiter_lock; static HANDLE break_happened_event; @@ -366,43 +350,23 @@ do { \ wait_standby(PS); \ } while(0) -static ERTS_INLINE void -init_timeout_time(ErtsPollSet ps) -{ - erts_atomic64_init_nob(&ps->timeout_time, - (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX); -} - -static ERTS_INLINE void -set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time) -{ - erts_atomic64_set_relb(&ps->timeout_time, - (erts_aint64_t) time); -} - -static ERTS_INLINE ErtsMonotonicTime -get_timeout_time(ErtsPollSet ps) -{ - return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time); -} - #define ERTS_POLL_NOT_WOKEN ((erts_aint32_t) 0) #define ERTS_POLL_WOKEN_IO_READY ((erts_aint32_t) 1) #define ERTS_POLL_WOKEN_INTR ((erts_aint32_t) 2) #define ERTS_POLL_WOKEN_TIMEDOUT ((erts_aint32_t) 3) static ERTS_INLINE int -is_io_ready(ErtsPollSet ps) +is_io_ready(ErtsPollSet *ps) { return erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_WOKEN_IO_READY; } static ERTS_INLINE void -woke_up(ErtsPollSet ps) +woke_up(ErtsPollSet *ps, int waketype) { if (erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_NOT_WOKEN) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, - ERTS_POLL_WOKEN_TIMEDOUT, + waketype, ERTS_POLL_NOT_WOKEN); #ifdef DEBUG { @@ -421,7 +385,7 @@ woke_up(ErtsPollSet ps) } static ERTS_INLINE int -wakeup_cause(ErtsPollSet ps) +wakeup_cause(ErtsPollSet *ps) { int res; erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); @@ -444,46 +408,8 @@ wakeup_cause(ErtsPollSet ps) return res; } -static ERTS_INLINE DWORD -poll_wait_timeout(ErtsPollSet ps, ErtsMonotonicTime timeout_time) -{ - ErtsMonotonicTime current_time, diff_time, timeout; - - if (timeout_time == ERTS_POLL_NO_TIMEOUT) { - no_timeout: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MIN); - woke_up(ps); - return (DWORD) 0; - } - - current_time = erts_get_monotonic_time(NULL); - diff_time = timeout_time - current_time; - if (diff_time <= 0) - goto no_timeout; - - /* Round up to nearest milli second */ - timeout = (ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1); - if (timeout > INT_MAX) - timeout = INT_MAX; /* Also prevents DWORD overflow */ - - set_timeout_time(ps, current_time + ERTS_MSEC_TO_MONOTONIC(timeout)); - - ResetEvent(ps->event_io_ready); - /* - * Since we don't know the internals of ResetEvent() we issue - * a memory barrier as a safety precaution ensuring that - * the load of wakeup_state wont be reordered with stores made - * by ResetEvent(). - */ - ERTS_THR_MEMORY_BARRIER; - if (erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN) - return (DWORD) 0; - - return (DWORD) timeout; -} - static ERTS_INLINE void -wake_poller(ErtsPollSet ps, int io_ready) +wake_poller(ErtsPollSet *ps, int io_ready) { erts_aint32_t wakeup_state; if (io_ready) { @@ -518,13 +444,13 @@ wake_poller(ErtsPollSet ps, int io_ready) } static ERTS_INLINE void -reset_io_ready(ErtsPollSet ps) +reset_io_ready(ErtsPollSet *ps) { erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } static ERTS_INLINE void -restore_io_ready(ErtsPollSet ps) +restore_io_ready(ErtsPollSet *ps) { erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_IO_READY); } @@ -534,13 +460,13 @@ restore_io_ready(ErtsPollSet ps) * notifying a poller thread about I/O ready. */ static ERTS_INLINE void -notify_io_ready(ErtsPollSet ps) +notify_io_ready(ErtsPollSet *ps) { wake_poller(ps, 1); } static ERTS_INLINE void -reset_interrupt(ErtsPollSet ps) +reset_interrupt(ErtsPollSet *ps) { /* We need to keep io-ready if set */ erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); @@ -557,12 +483,12 @@ reset_interrupt(ErtsPollSet ps) } static ERTS_INLINE void -set_interrupt(ErtsPollSet ps) +set_interrupt(ErtsPollSet *ps) { wake_poller(ps, 0); } -static void setup_standby_wait(ErtsPollSet ps, int num_threads) +static void setup_standby_wait(ErtsPollSet *ps, int num_threads) { EnterCriticalSection(&(ps->standby_crit)); ps->standby_wait_counter = num_threads; @@ -570,7 +496,7 @@ static void setup_standby_wait(ErtsPollSet ps, int num_threads) LeaveCriticalSection(&(ps->standby_crit)); } -static void signal_standby(ErtsPollSet ps) +static void signal_standby(ErtsPollSet *ps) { EnterCriticalSection(&(ps->standby_crit)); --(ps->standby_wait_counter); @@ -584,7 +510,7 @@ static void signal_standby(ErtsPollSet ps) LeaveCriticalSection(&(ps->standby_crit)); } -static void wait_standby(ErtsPollSet ps) +static void wait_standby(ErtsPollSet *ps) { WaitForSingleObject(ps->standby_wait_event,INFINITE); } @@ -652,7 +578,7 @@ static void consistency_check(Waiter* w) #endif -static void new_waiter(ErtsPollSet ps) +static void new_waiter(ErtsPollSet *ps) { register Waiter* w; DWORD tid; /* Id for thread. */ @@ -677,7 +603,7 @@ static void new_waiter(ErtsPollSet ps) w->active_events = 1; w->highwater = 1; w->total_events = 1; - erts_mtx_init(&w->mtx, "pollwaiter"); + erts_mtx_init(&w->mtx, "pollwaiter", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO); /* @@ -746,7 +672,7 @@ static void *break_waiter(void *param) static void *threaded_waiter(void *param) { register Waiter* w = (Waiter *) param; - ErtsPollSet ps = (ErtsPollSet) w->xdata; + ErtsPollSet *ps = (ErtsPollSet*) w->xdata; #ifdef HARD_POLL_DEBUG2 HANDLE oold_fired[64]; int num_oold_fired; @@ -842,16 +768,16 @@ event_happened: ASSERT(WAIT_OBJECT_0 < i && i < WAIT_OBJECT_0+w->active_events); notify_io_ready(ps); - /* - * The main thread wont start working on our arrays untill we're - * stopped, so we can work in peace although the main thread runs + /* + * The main thread wont start working on our arrays until we're + * stopped, so we can work in peace although the main thread runs */ ASSERT(i >= WAIT_OBJECT_0+1); i -= WAIT_OBJECT_0; ASSERT(i >= 1); - w->active_events--; HARDDEBUGF(("i = %d, a,h,t = %d,%d,%d",i, w->active_events, w->highwater, w->total_events)); + w->active_events--; #ifdef HARD_POLL_DEBUG2 fired[num_fired++] = w->events[i]; #endif @@ -881,7 +807,7 @@ event_happened: * The actual adding and removing from pollset utilities */ -static int set_driver_select(ErtsPollSet ps, HANDLE event, ErtsPollEvents mode) +static int set_driver_select(ErtsPollSet *ps, HANDLE event, ErtsPollEvents mode) { int i; int best_waiter = -1; /* The waiter with lowest number of events. */ @@ -971,13 +897,13 @@ static int set_driver_select(ErtsPollSet ps, HANDLE event, ErtsPollEvents mode) #endif erts_mtx_unlock(&w->mtx); START_WAITER(ps,w); - HARDDEBUGF(("add select %d %d %d %d",best_waiter, + HARDDEBUGF(("%d: add select %d %d %d %d", event, best_waiter, w->active_events,w->highwater,w->total_events)); return mode; } -static int cancel_driver_select(ErtsPollSet ps, HANDLE event) +static int cancel_driver_select(ErtsPollSet *ps, HANDLE event) { int i; @@ -1032,26 +958,14 @@ static int cancel_driver_select(ErtsPollSet ps, HANDLE event) * Interface functions */ -void erts_poll_interrupt(ErtsPollSet ps, int set /* bool */) +void erts_poll_interrupt(ErtsPollSet *ps, int set /* bool */) { - HARDTRACEF(("In erts_poll_interrupt(%d)",set)); + HARDTRACEF(("In erts_poll_interrupt(%p, %d)",ps,set)); if (!set) reset_interrupt(ps); else set_interrupt(ps); - HARDTRACEF(("Out erts_poll_interrupt(%d)",set)); -} - -void erts_poll_interrupt_timed(ErtsPollSet ps, - int set /* bool */, - ErtsMonotonicTime timeout_time) -{ - HARDTRACEF(("In erts_poll_interrupt_timed(%d,%ld)",set,timeout_time)); - if (!set) - reset_interrupt(ps); - else if (get_timeout_time(ps) > timeout_time) - set_interrupt(ps); - HARDTRACEF(("Out erts_poll_interrupt_timed")); + HARDTRACEF(("Out erts_poll_interrupt(%p, %d)",ps,set)); } @@ -1060,17 +974,17 @@ void erts_poll_interrupt_timed(ErtsPollSet ps, * the only difference between ERTS_POLL_EV_IN and ERTS_POLL_EV_OUT * is which driver callback will eventually be called. */ -static ErtsPollEvents do_poll_control(ErtsPollSet ps, - ErtsSysFdType fd, - ErtsPollEvents pe, - int on /* bool */) +static ErtsPollEvents do_poll_control(ErtsPollSet *ps, + ErtsSysFdType fd, + ErtsPollOp op, + ErtsPollEvents pe) { HANDLE event = (HANDLE) fd; ErtsPollEvents mode; ErtsPollEvents result; ASSERT(event != INVALID_HANDLE_VALUE); - if (on) { + if (op != ERTS_POLL_OP_DEL) { if (pe & ERTS_POLL_EV_IN || !(pe & ERTS_POLL_EV_OUT )) { mode = ERTS_POLL_EV_IN; } else { @@ -1083,51 +997,32 @@ static ErtsPollEvents do_poll_control(ErtsPollSet ps, return result; } -ErtsPollEvents erts_poll_control(ErtsPollSet ps, +ErtsPollEvents erts_poll_control(ErtsPollSet *ps, ErtsSysFdType fd, + ErtsPollOp op, ErtsPollEvents pe, - int on, int* do_wake) /* In: Wake up polling thread */ /* Out: Poller is woken */ { ErtsPollEvents result; - HARDTRACEF(("In erts_poll_control(0x%08X, %u, %d)",(unsigned long) fd, (unsigned) pe, on)); + HARDTRACEF(("In erts_poll_control(0x%08X, %s, %s)", + (unsigned long) fd, op2str(op), ev2str(pe))); ERTS_POLLSET_LOCK(ps); - result=do_poll_control(ps,fd,pe,on); + result=do_poll_control(ps, fd, op, pe); ERTS_POLLSET_UNLOCK(ps); *do_wake = 0; /* Never any need to wake polling threads on windows */ HARDTRACEF(("Out erts_poll_control -> %u",(unsigned) result)); return result; } -void erts_poll_controlv(ErtsPollSet ps, - ErtsPollControlEntry pcev[], - int len) -{ - int i; - int hshur = 0; - int do_wake = 0; - - HARDTRACEF(("In erts_poll_controlv(%d)",len)); - ERTS_POLLSET_LOCK(ps); - - for (i = 0; i < len; i++) { - pcev[i].events = do_poll_control(ps, - pcev[i].fd, - pcev[i].events, - pcev[i].on); - } - ERTS_POLLSET_UNLOCK(ps); - HARDTRACEF(("Out erts_poll_controlv")); -} - -int erts_poll_wait(ErtsPollSet ps, +int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], int *len, - ErtsMonotonicTime timeout_time) + ErtsThrPrgrData *tpd, + Sint64 timeout_in) { int no_fds; - DWORD timeout; + DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in; EventData* ev; int res = 0; int num = 0; @@ -1138,42 +1033,6 @@ int erts_poll_wait(ErtsPollSet ps, HARDTRACEF(("In erts_poll_wait")); ERTS_POLLSET_LOCK(ps); - if (!is_io_ready(ps) && ps->restore_events) { - HARDDEBUGF(("Restore events: %d",ps->num_waiters)); - ps->restore_events = 0; - for (i = 0; i < ps->num_waiters; ++i) { - Waiter* w = ps->waiter[i]; - erts_mtx_lock(&w->mtx); - HARDDEBUGF(("Maybe reset %d %d %d %d",i, - w->active_events,w->highwater,w->total_events)); - if (w->active_events < w->total_events) { - erts_mtx_unlock(&w->mtx); - STOP_WAITER(ps,w); - HARDDEBUGF(("Need reset %d %d %d %d",i, - w->active_events,w->highwater,w->total_events)); - erts_mtx_lock(&w->mtx); - /* Need reset, just check that it doesn't have got more to tell */ - if (w->highwater != w->active_events) { - HARDDEBUGF(("Oups!")); - /* Oups, got signalled before we took the lock, can't reset */ - if(!is_io_ready(ps)) { - erts_exit(ERTS_ERROR_EXIT,"Internal error: " - "Inconsistent io structures in erl_poll.\n"); - } - START_WAITER(ps,w); - erts_mtx_unlock(&w->mtx); - ps->restore_events = 1; - continue; - } - w->active_events = w->highwater = w->total_events; - START_WAITER(ps,w); - erts_mtx_unlock(&w->mtx); - } else { - erts_mtx_unlock(&w->mtx); - } - } - } - no_fds = *len; #ifdef ERTS_POLL_MAX_RES @@ -1181,29 +1040,33 @@ int erts_poll_wait(ErtsPollSet ps, no_fds = ERTS_POLL_MAX_RES; #endif - timeout = poll_wait_timeout(ps, timeout_time); - - /*HARDDEBUGF(("timeout = %ld",(long) timeout));*/ + ResetEvent(ps->event_io_ready); + /* + * Since we don't know the internals of ResetEvent() we issue + * a memory barrier as a safety precaution ensuring that + * the load of wakeup_state wont be reordered with stores made + * by ResetEvent(). + */ + ERTS_THR_MEMORY_BARRIER; + if (erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN) + timeout = (DWORD) 0; - if (timeout > 0 && !erts_atomic32_read_nob(&break_waiter_state)) { + if (!erts_atomic32_read_nob(&break_waiter_state)) { HANDLE harr[2] = {ps->event_io_ready, break_happened_event}; - int num_h = 2; - ERTS_MSACC_PUSH_STATE_M(); + int num_h = 2, handle; + ERTS_MSACC_PUSH_STATE(); HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout)); ERTS_POLLSET_UNLOCK(ps); -#ifdef ERTS_SMP - erts_thr_progress_prepare_wait(NULL); -#endif - ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP); - WaitForMultipleObjects(num_h, harr, FALSE, timeout); -#ifdef ERTS_SMP - erts_thr_progress_finalize_wait(NULL); -#endif - ERTS_MSACC_POP_STATE_M(); + erts_thr_progress_prepare_wait(tpd); + ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); + handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout); + erts_thr_progress_finalize_wait(tpd); + ERTS_MSACC_POP_STATE(); ERTS_POLLSET_LOCK(ps); HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout)); - woke_up(ps); + if (handle == WAIT_OBJECT_0) + woke_up(ps, ERTS_POLL_WOKEN_TIMEDOUT); } ERTS_UNSET_BREAK_REQUESTED; @@ -1215,7 +1078,10 @@ int erts_poll_wait(ErtsPollSet ps, erts_mtx_unlock(&break_waiter_lock); switch (break_state) { case BREAK_WAITER_GOT_BREAK: + woke_up(ps, ERTS_POLL_WOKEN_INTR); ERTS_SET_BREAK_REQUESTED; + /* Wake aux thread to get handle break */ + erts_aux_thread_poke(); break; case BREAK_WAITER_GOT_HALT: erts_exit(0,""); @@ -1233,7 +1099,7 @@ int erts_poll_wait(ErtsPollSet ps, reset_io_ready(ps); - n = ps->num_waiters; + n = ps->num_waiters; for (i = 0; i < n; i++) { Waiter* w = ps->waiter[i]; @@ -1259,11 +1125,10 @@ int erts_poll_wait(ErtsPollSet ps, HARDDEBUGF(("To many FD's to report!")); goto done; } - HARDDEBUGF(("SET! Restore events")); - ps->restore_events = 1; HARDDEBUGF(("Report %d,%d",i,j)); - pr[num].fd = (ErtsSysFdType) w->events[j]; - pr[num].events = w->evdata[j]->mode; + ERTS_POLL_RES_SET_FD(&pr[num], w->events[j]); + ERTS_POLL_RES_SET_EVTS(&pr[num], w->evdata[j]->mode); + remove_event_from_set(w, j); #ifdef HARD_POLL_DEBUG poll_debug_reported(w->events[j],w->highwater | (j << 16)); poll_debug_reported(w->events[j],first | (last << 16)); @@ -1271,13 +1136,14 @@ int erts_poll_wait(ErtsPollSet ps, ++num; } + w->total_events = w->highwater = w->active_events; + #ifdef DEBUG consistency_check(w); #endif erts_mtx_unlock(&w->mtx); } done: - set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX); *len = num; ERTS_POLLSET_UNLOCK(ps); HARDTRACEF(("Out erts_poll_wait")); @@ -1292,7 +1158,7 @@ int erts_poll_max_fds(void) return res; } -void erts_poll_info(ErtsPollSet ps, +void erts_poll_info(ErtsPollSet *ps, ErtsPollInfo *pip) { Uint size = 0; @@ -1302,7 +1168,7 @@ void erts_poll_info(ErtsPollSet ps, HARDTRACEF(("In erts_poll_info")); ERTS_POLLSET_LOCK(ps); - size += sizeof(struct ErtsPollSet_); + size += sizeof(struct erts_pollset); size += sizeof(Waiter *) * ps->allocated_waiters; for (i = 0; i < ps->num_waiters; ++i) { Waiter *w = ps->waiter[i]; @@ -1317,16 +1183,12 @@ void erts_poll_info(ErtsPollSet ps, pip->primary = "WaitForMultipleObjects"; - pip->fallback = NULL; - pip->kernel_poll = NULL; pip->memory_size = size; pip->poll_set_size = num_events; - pip->fallback_poll_set_size = 0; - pip->lazy_updates = 0; pip->pending_updates = 0; @@ -1334,6 +1196,8 @@ void erts_poll_info(ErtsPollSet ps, pip->batch_updates = 0; pip->concurrent_updates = 0; + + pip->is_fallback = 0; ERTS_POLLSET_UNLOCK(ps); pip->max_fds = erts_poll_max_fds(); @@ -1341,10 +1205,10 @@ void erts_poll_info(ErtsPollSet ps, } -ErtsPollSet erts_poll_create_pollset(void) +ErtsPollSet *erts_poll_create_pollset(int no) { - ErtsPollSet ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, - sizeof(struct ErtsPollSet_)); + ErtsPollSet *ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(struct erts_pollset)); HARDTRACEF(("In erts_poll_create_pollset")); ps->num_waiters = 0; @@ -1355,19 +1219,15 @@ ErtsPollSet erts_poll_create_pollset(void) ps->standby_wait_counter = 0; ps->event_io_ready = CreateManualEvent(FALSE); ps->standby_wait_event = CreateManualEvent(FALSE); - ps->restore_events = 0; erts_atomic32_init_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); -#ifdef ERTS_SMP - erts_smp_mtx_init(&ps->mtx, "pollset"); -#endif - init_timeout_time(ps); + erts_mtx_init(&ps->mtx, "pollset", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO); HARDTRACEF(("Out erts_poll_create_pollset")); return ps; } -void erts_poll_destroy_pollset(ErtsPollSet ps) +void erts_poll_destroy_pollset(ErtsPollSet *ps) { int i; HARDTRACEF(("In erts_poll_destroy_pollset")); @@ -1390,9 +1250,7 @@ void erts_poll_destroy_pollset(ErtsPollSet ps) CloseHandle(ps->event_io_ready); CloseHandle(ps->standby_wait_event); ERTS_POLLSET_UNLOCK(ps); -#ifdef ERTS_SMP - erts_smp_mtx_destroy(&ps->mtx); -#endif + erts_mtx_destroy(&ps->mtx); SEL_FREE(ERTS_ALC_T_POLLSET, (void *) ps); HARDTRACEF(("Out erts_poll_destroy_pollset")); } @@ -1400,36 +1258,44 @@ void erts_poll_destroy_pollset(ErtsPollSet ps) /* * Actually mostly initializes the friend module sys_interrupt... */ -void erts_poll_init(void) +void erts_poll_init(int *concurrent_updates) { - erts_tid_t thread; #ifdef HARD_POLL_DEBUG poll_debug_init(); #endif + if (concurrent_updates) + *concurrent_updates = 0; + HARDTRACEF(("In erts_poll_init")); erts_sys_break_event = CreateManualEvent(FALSE); - erts_mtx_init(&break_waiter_lock,"break_waiter_lock"); + erts_mtx_init(&break_waiter_lock, "break_waiter_lock", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO); break_happened_event = CreateManualEvent(FALSE); erts_atomic32_init_nob(&break_waiter_state, 0); + HARDTRACEF(("Out erts_poll_init")); +} + +void erts_poll_late_init(void) +{ + erts_tid_t thread; erts_thr_create(&thread, &break_waiter, NULL, NULL); ERTS_UNSET_BREAK_REQUESTED; - HARDTRACEF(("Out erts_poll_init")); } /* * Non windows friendly interface, not used when fd's are not continous */ -void erts_poll_get_selected_events(ErtsPollSet ps, +void erts_poll_get_selected_events(ErtsPollSet *ps, ErtsPollEvents ev[], int len) { int i; HARDTRACEF(("In erts_poll_get_selected_events")); for (i = 0; i < len; ++i) - ev[i] = 0; + ev[i] = ERTS_POLL_EV_NONE; HARDTRACEF(("Out erts_poll_get_selected_events")); } diff --git a/erts/emulator/sys/win32/erl_win32_sys_ddll.c b/erts/emulator/sys/win32/erl_win32_sys_ddll.c index 274133a346..7fe1f5cc78 100644 --- a/erts/emulator/sys/win32/erl_win32_sys_ddll.c +++ b/erts/emulator/sys/win32/erl_win32_sys_ddll.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2016. All Rights Reserved. + * Copyright Ericsson AB 2006-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -46,9 +46,17 @@ static TWinDynDriverCallbacks wddc; static TWinDynNifCallbacks nif_callbacks; void erl_sys_ddll_init(void) { + WCHAR cwd_buffer[MAX_PATH]; + tls_index = TlsAlloc(); ERL_INIT_CALLBACK_STRUCTURE(wddc); + /* LOAD_WITH_ALTERED_SEARCH_PATH removes the startup directory from the + * search path, so we add it separately to be backwards compatible. */ + if (GetCurrentDirectoryW(sizeof(cwd_buffer), cwd_buffer)) { + SetDllDirectoryW(cwd_buffer); + } + #define ERL_NIF_API_FUNC_DECL(RET,NAME,ARGS) nif_callbacks.NAME = NAME #include "erl_nif_api_funcs.h" #undef ERL_NIF_API_FUNC_DECL @@ -81,7 +89,10 @@ int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* e ERTS_ALC_T_TMP, &used, EXT_LEN); wcscpy(&wcp[used/2 - 1], FILE_EXT_WCHAR); - if ((hinstance = LoadLibraryW(wcp)) == NULL) { + /* LOAD_WITH_ALTERED_SEARCH_PATH adds the specified DLL's directory to the + * dependency search path. This also removes the directory we started in, + * but we've explicitly added that in in erl_sys_ddll_init. */ + if ((hinstance = LoadLibraryExW(wcp, NULL, LOAD_WITH_ALTERED_SEARCH_PATH)) == NULL) { code = ERL_DE_DYNAMIC_ERROR_OFFSET - GetLastError(); if (err != NULL) { err->str = erts_sys_ddll_error(code); diff --git a/erts/emulator/sys/win32/erl_win_dyn_driver.h b/erts/emulator/sys/win32/erl_win_dyn_driver.h index 6f28d513c2..c683e8cf49 100644 --- a/erts/emulator/sys/win32/erl_win_dyn_driver.h +++ b/erts/emulator/sys/win32/erl_win_dyn_driver.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2003-2016. All Rights Reserved. + * Copyright Ericsson AB 2003-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,6 @@ WDD_TYPEDEF(int, driver_exit, (ErlDrvPort, int)); WDD_TYPEDEF(int, driver_failure_eof, (ErlDrvPort)); WDD_TYPEDEF(void, erl_drv_busy_msgq_limits, (ErlDrvPort, ErlDrvSizeT *, ErlDrvSizeT *)); WDD_TYPEDEF(int, driver_select, (ErlDrvPort, ErlDrvEvent, int, int)); -WDD_TYPEDEF(int, driver_event, (ErlDrvPort, ErlDrvEvent,ErlDrvEventData)); WDD_TYPEDEF(int, driver_output, (ErlDrvPort, char *, ErlDrvSizeT)); WDD_TYPEDEF(int, driver_output2, (ErlDrvPort, char *, ErlDrvSizeT ,char *, ErlDrvSizeT)); WDD_TYPEDEF(int, driver_output_binary, (ErlDrvPort, char *, ErlDrvSizeT, ErlDrvBinary*, ErlDrvSizeT, ErlDrvSizeT)); @@ -162,7 +161,7 @@ typedef struct { WDD_FTYPE(driver_failure_eof) *driver_failure_eof; WDD_FTYPE(erl_drv_busy_msgq_limits) *erl_drv_busy_msgq_limits; WDD_FTYPE(driver_select) *driver_select; - WDD_FTYPE(driver_event) *driver_event; + void *REMOVED_driver_event; WDD_FTYPE(driver_output) *driver_output; WDD_FTYPE(driver_output2) *driver_output2; WDD_FTYPE(driver_output_binary) *driver_output_binary; @@ -276,7 +275,6 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks; #define driver_failure_eof (WinDynDriverCallbacks.driver_failure_eof) #define erl_drv_busy_msgq_limits (WinDynDriverCallbacks.erl_drv_busy_msgq_limits) #define driver_select (WinDynDriverCallbacks.driver_select) -#define driver_event (WinDynDriverCallbacks.driver_event) #define driver_output (WinDynDriverCallbacks.driver_output) #define driver_output2 (WinDynDriverCallbacks.driver_output2) #define driver_output_binary (WinDynDriverCallbacks.driver_output_binary) @@ -414,7 +412,7 @@ do { \ ((W).driver_failure_eof) = driver_failure_eof; \ ((W).erl_drv_busy_msgq_limits) = erl_drv_busy_msgq_limits;\ ((W).driver_select) = driver_select; \ -((W).driver_event) = driver_event; \ +((W).REMOVED_driver_event) = NULL; \ ((W).driver_output) = driver_output; \ ((W).driver_output2) = driver_output2; \ ((W).driver_output_binary) = driver_output_binary; \ diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h index 04fbf23109..b00ba287e2 100644 --- a/erts/emulator/sys/win32/erl_win_sys.h +++ b/erts/emulator/sys/win32/erl_win_sys.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. + * Copyright Ericsson AB 1997-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -182,6 +182,8 @@ typedef LONGLONG ErtsMonotonicTime; typedef LONGLONG ErtsSysHrTime; #endif +#define ErtsStrToSint64 _strtoi64 + typedef ErtsMonotonicTime ErtsSystemTime; typedef ErtsMonotonicTime ErtsSysPerfCounter; @@ -309,10 +311,8 @@ typedef long ssize_t; #endif /* Threads */ -#ifdef USE_THREADS int init_async(int); int exit_async(void); -#endif #define ERTS_HAVE_TRY_CATCH 1 diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index cf821b05cb..a1c630d68a 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2016. All Rights Reserved. + * Copyright Ericsson AB 1996-2017. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,6 @@ void erts_sys_init_float(void); void erl_start(int, char**); void erts_exit(int n, char*, ...); void erl_error(char*, va_list); -void erl_crash_dump(char*, int, char*, ...); /* * Microsoft-specific function to map a WIN32 error code to a Posix errno. @@ -78,14 +77,13 @@ static int create_pipe(LPHANDLE, LPHANDLE, BOOL, BOOL); static int application_type(const wchar_t* originalName, wchar_t fullPath[MAX_PATH], BOOL search_in_path, BOOL handle_quotes, int *error_return); +static void *build_env_block(const erts_osenv_t *env); HANDLE erts_service_event; -#ifdef ERTS_SMP -static erts_smp_tsd_key_t win32_errstr_key; -#endif +static erts_tsd_key_t win32_errstr_key; -static erts_smp_atomic_t pipe_creation_counter; +static erts_atomic_t pipe_creation_counter; /* Results from application_type(_w) is one of */ #define APPL_NONE 0 @@ -95,10 +93,8 @@ static erts_smp_atomic_t pipe_creation_counter; static int driver_write(long, HANDLE, byte*, int); static int create_file_thread(struct async_io* aio, int mode); -#ifdef ERTS_SMP static void close_active_handle(DriverData *, HANDLE handle); static DWORD WINAPI threaded_handle_closer(LPVOID param); -#endif static DWORD WINAPI threaded_reader(LPVOID param); static DWORD WINAPI threaded_writer(LPVOID param); static DWORD WINAPI threaded_exiter(LPVOID param); @@ -137,7 +133,7 @@ static OSVERSIONINFO int_os_version; /* Version information for Win32. */ Disabled the use of CancelIoEx as its been seen to cause problem with some drivers. Not sure what to blame; faulty drivers or some form of invalid use. */ -#if defined(ERTS_SMP) && defined(USE_CANCELIOEX) +#if defined(USE_CANCELIOEX) static BOOL (WINAPI *fpCancelIoEx)(HANDLE,LPOVERLAPPED); #endif @@ -146,7 +142,7 @@ static BOOL (WINAPI *fpCancelIoEx)(HANDLE,LPOVERLAPPED); - call erl_start() to parse arguments and do other init */ -static erts_smp_atomic_t sys_misc_mem_sz; +static erts_atomic_t sys_misc_mem_sz; HMODULE beam_module = NULL; @@ -187,11 +183,17 @@ void sys_primitive_init(HMODULE beam) beam_module = (HMODULE) beam; } +UWord +erts_sys_get_page_size(void) +{ + return (UWord) 4*1024; /* Guess 4 KB */ +} + Uint erts_sys_misc_mem_sz(void) { Uint res = (Uint) erts_check_io_size(); - res += (Uint) erts_smp_atomic_read_mb(&sys_misc_mem_sz); + res += (Uint) erts_atomic_read_mb(&sys_misc_mem_sz); return res; } @@ -295,6 +297,10 @@ int erts_sys_prepare_crash_dump(int secs) return 0; } +int erts_set_signal(Eterm signal, Eterm type) { + return 0; +} + static void init_console(void) { @@ -441,9 +447,7 @@ typedef struct async_io { * the console for Windows NT). */ HANDLE fd; /* Handle for file or pipe. */ -#ifdef ERTS_SMP int async_io_active; /* if true, a close of the file will signal the event in ov */ -#endif OVERLAPPED ov; /* Control structure for overlapped reading. * When overlapped reading is simulated with * a thread, the fields are used as follows: @@ -493,7 +497,7 @@ struct driver_data { int outBufSize; /* Size of output buffer. */ byte *outbuf; /* Buffer to use for overlapped write. */ ErlDrvPort port_num; /* The port handle. */ - int packet_bytes; /* 0: continous stream, 1, 2, or 4: the number + int packet_bytes; /* 0: continuous stream, 1, 2, or 4: the number * of bytes in the packet header. */ HANDLE port_pid; /* PID of the port process. */ @@ -656,7 +660,7 @@ new_driver_data(ErlDrvPort port_num, int packet_bytes, int wait_objs_required, i dp->inbuf = DRV_BUF_ALLOC(dp->inBufSize); if (dp->inbuf == NULL) goto buf_alloc_error; - erts_smp_atomic_add_nob(&sys_misc_mem_sz, dp->inBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, dp->inBufSize); dp->outBufSize = 0; dp->outbuf = NULL; dp->port_num = port_num; @@ -682,7 +686,6 @@ buf_alloc_error: static void release_driver_data(DriverData* dp) { -#ifdef ERTS_SMP #ifdef USE_CANCELIOEX if (fpCancelIoEx != NULL) { if (dp->in.thread == (HANDLE) -1 && dp->in.fd != INVALID_HANDLE_VALUE) { @@ -725,18 +728,10 @@ release_driver_data(DriverData* dp) DEBUGF(("...done\n")); } } -#else - if (dp->in.thread == (HANDLE) -1 && dp->in.fd != INVALID_HANDLE_VALUE) { - CancelIo(dp->in.fd); - } - if (dp->out.thread == (HANDLE) -1 && dp->out.fd != INVALID_HANDLE_VALUE) { - CancelIo(dp->out.fd); - } -#endif if (dp->inbuf != NULL) { - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= dp->inBufSize); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*dp->inBufSize); + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= dp->inBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, -1*dp->inBufSize); DRV_BUF_FREE(dp->inbuf); dp->inBufSize = 0; dp->inbuf = NULL; @@ -744,8 +739,8 @@ release_driver_data(DriverData* dp) ASSERT(dp->inBufSize == 0); if (dp->outbuf != NULL) { - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); DRV_BUF_FREE(dp->outbuf); dp->outBufSize = 0; dp->outbuf = NULL; @@ -768,7 +763,6 @@ release_driver_data(DriverData* dp) unrefer_driver_data(dp); } -#ifdef ERTS_SMP struct handles_to_be_closed { HANDLE handles[MAXIMUM_WAIT_OBJECTS]; @@ -861,7 +855,6 @@ threaded_handle_closer(LPVOID param) DEBUGF(("threaded_handle_closer %p terminating\r\n", htbc)); return 0; } -#endif /* ERTS_SMP */ /* * Stores input and output file descriptors in the DriverData structure, @@ -937,9 +930,7 @@ init_async_io(DriverData *dp, AsyncIo* aio, int use_threads) aio->flushReplyEvent = NULL; aio->pendingError = 0; aio->bytesTransferred = 0; -#ifdef ERTS_SMP aio->async_io_active = 0; -#endif aio->ov.hEvent = CreateManualEvent(FALSE); if (aio->ov.hEvent == NULL) return -1; @@ -1020,9 +1011,7 @@ async_read_file(AsyncIo* aio, LPVOID buf, DWORD numToRead) ResetEvent(aio->ov.hEvent); SetEvent(aio->ioAllowed); } else { -#ifdef ERTS_SMP aio->async_io_active = 1; /* Will get 0 when the event actually happened */ -#endif if (ReadFile(aio->fd, buf, numToRead, &aio->bytesTransferred, &aio->ov)) { DEBUGF(("async_read_file: ReadFile() suceeded: %d bytes\n", @@ -1070,16 +1059,12 @@ async_write_file(AsyncIo* aio, /* Pointer to async control block. */ ResetEvent(aio->ov.hEvent); SetEvent(aio->ioAllowed); } else { -#ifdef ERTS_SMP aio->async_io_active = 1; /* Will get 0 when the event actually happened */ -#endif if (WriteFile(aio->fd, buf, numToWrite, &aio->bytesTransferred, &aio->ov)) { DEBUGF(("async_write_file: WriteFile() suceeded: %d bytes\n", aio->bytesTransferred)); -#ifdef ERTS_SMP aio->async_io_active = 0; /* The event will not be signalled */ -#endif ResetEvent(aio->ov.hEvent); return TRUE; } else { @@ -1181,7 +1166,7 @@ static int spawn_init(void) { int i; -#if defined(ERTS_SMP) && defined(USE_CANCELIOEX) +#if defined(USE_CANCELIOEX) HMODULE module = GetModuleHandle("kernel32"); fpCancelIoEx = (BOOL (WINAPI *)(HANDLE,LPOVERLAPPED)) ((module != NULL) ? GetProcAddress(module,"CancelIoEx") : NULL); @@ -1206,7 +1191,6 @@ spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts) int ok; int neededSelects = 0; SECURITY_ATTRIBUTES sa = {sizeof(SECURITY_ATTRIBUTES), NULL, TRUE}; - char* envir = opts->envir; int errno_return = -1; wchar_t *name; int len; @@ -1281,29 +1265,33 @@ spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts) name[i] = L'\0'; } DEBUGF(("Spawning \"%S\"\n", name)); - envir = win_build_environment(envir); /* Always a unicode environment */ - ok = create_child_process(name, - hChildStdin, - hChildStdout, - hChildStderr, - &dp->port_pid, - &pid, - opts->hide_window, - (LPVOID) envir, - (wchar_t *) opts->wd, - opts->spawn_type, - (wchar_t **) opts->argv, - &errno_return); - CloseHandle(hChildStdin); - CloseHandle(hChildStdout); - if (close_child_stderr && hChildStderr != INVALID_HANDLE_VALUE && - hChildStderr != 0) { - CloseHandle(hChildStderr); - } - erts_free(ERTS_ALC_T_TMP, name); - - if (envir != NULL) { - erts_free(ERTS_ALC_T_ENVIRONMENT, envir); + + { + void *environment_block = build_env_block(&opts->envir); + + ok = create_child_process(name, + hChildStdin, + hChildStdout, + hChildStderr, + &dp->port_pid, + &pid, + opts->hide_window, + environment_block, + (wchar_t *) opts->wd, + opts->spawn_type, + (wchar_t **) opts->argv, + &errno_return); + + CloseHandle(hChildStdin); + CloseHandle(hChildStdout); + + if (close_child_stderr && hChildStderr != INVALID_HANDLE_VALUE && + hChildStderr != 0) { + CloseHandle(hChildStderr); + } + + erts_free(ERTS_ALC_T_TMP, environment_block); + erts_free(ERTS_ALC_T_TMP, name); } if (!ok) { @@ -1354,6 +1342,41 @@ spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts) return retval; } +struct __build_env_state { + WCHAR *next_variable; +}; + +static void build_env_foreach(void *_state, const erts_osenv_data_t *key, + const erts_osenv_data_t *value) +{ + struct __build_env_state *state = (struct __build_env_state*)(_state); + + sys_memcpy(state->next_variable, key->data, key->length); + state->next_variable += (int)key->length / sizeof(WCHAR); + *state->next_variable++ = L'='; + + sys_memcpy(state->next_variable, value->data, value->length); + state->next_variable += (int)value->length / sizeof(WCHAR); + *state->next_variable++ = L'\0'; +} + +/* Builds an environment block suitable for CreateProcessW. */ +static void *build_env_block(const erts_osenv_t *env) { + struct __build_env_state build_state; + WCHAR *env_block; + + env_block = erts_alloc(ERTS_ALC_T_TMP, env->content_size + + (env->variable_count * sizeof(L"=\0") + sizeof(L'\0'))); + + build_state.next_variable = env_block; + + erts_osenv_foreach_native(env, &build_state, build_env_foreach); + + (*build_state.next_variable) = L'\0'; + + return env_block; +} + static int create_file_thread(AsyncIo* aio, int mode) { @@ -1424,7 +1447,7 @@ int parse_command(wchar_t* cmd){ * * If new == NULL we just calculate the length. * - * The reason for having to quote all of the is becasue CreateProcessW removes + * The reason for having to quote all of the is because CreateProcessW removes * one level of escaping since it takes a single long command line rather * than the argument chunks that unix uses. */ @@ -1753,7 +1776,7 @@ static int create_pipe(HANDLE *phRead, HANDLE *phWrite, BOOL inheritRead, BOOL o * Otherwise, create named pipes. */ - calls = (UWord) erts_smp_atomic_inc_read_nob(&pipe_creation_counter); + calls = (UWord) erts_atomic_inc_read_nob(&pipe_creation_counter); erts_snprintf(pipe_name, sizeof(pipe_name), "\\\\.\\pipe\\erlang44_%d_%bpu", getpid(), calls); @@ -2438,7 +2461,7 @@ output(ErlDrvData drv_data, char* buf, ErlDrvSizeT len) } dp->outBufSize = pb+len; - erts_smp_atomic_add_nob(&sys_misc_mem_sz, dp->outBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, dp->outBufSize); /* * Store header bytes (if any). @@ -2467,8 +2490,8 @@ output(ErlDrvData drv_data, char* buf, ErlDrvSizeT len) } else { dp->out.ov.Offset += pb+len; /* For vanilla driver. */ /* XXX OffsetHigh should be changed too. */ - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); DRV_BUF_FREE(dp->outbuf); dp->outBufSize = 0; dp->outbuf = NULL; @@ -2483,7 +2506,7 @@ output(ErlDrvData drv_data, char* buf, ErlDrvSizeT len) * event object has been signaled, indicating that there is * something to read on the corresponding file handle. * - * If the port is working in the continous stream mode (packet_bytes == 0), + * If the port is working in the continuous stream mode (packet_bytes == 0), * whatever data read will be sent straight to Erlang. * * Results: @@ -2502,11 +2525,9 @@ ready_input(ErlDrvData drv_data, ErlDrvEvent ready_event) int pb; pb = dp->packet_bytes; -#ifdef ERTS_SMP if(dp->in.thread == (HANDLE) -1) { dp->in.async_io_active = 0; } -#endif DEBUGF(("ready_input: dp %p, event 0x%x\n", dp, ready_event)); /* @@ -2524,7 +2545,7 @@ ready_input(ErlDrvData drv_data, ErlDrvEvent ready_event) #endif if (error == NO_ERROR) { - if (pb == 0) { /* Continous stream. */ + if (pb == 0) { /* Continuous stream. */ #ifdef DEBUG DEBUGF(("ready_input: %d: ", bytesRead)); erl_bin_write(dp->inbuf, 16, bytesRead); @@ -2581,8 +2602,8 @@ ready_input(ErlDrvData drv_data, ErlDrvEvent ready_event) error = ERROR_NOT_ENOUGH_MEMORY; break; /* Break out of loop into error handler. */ } - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= dp->inBufSize); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= dp->inBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, dp->totalNeeded - dp->inBufSize); dp->inBufSize = dp->totalNeeded; dp->inbuf = new_buf; @@ -2671,11 +2692,9 @@ ready_output(ErlDrvData drv_data, ErlDrvEvent ready_event) DriverData *dp = (DriverData *) drv_data; int error; -#ifdef ERTS_SMP if(dp->out.thread == (HANDLE) -1) { dp->out.async_io_active = 0; } -#endif DEBUGF(("ready_output(%p, 0x%x)\n", drv_data, ready_event)); set_busy_port(dp->port_num, 0); if (!(dp->outbuf)) { @@ -2683,8 +2702,8 @@ ready_output(ErlDrvData drv_data, ErlDrvEvent ready_event) write... */ return; } - ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); + ASSERT(erts_atomic_read_nob(&sys_misc_mem_sz) >= dp->outBufSize); + erts_atomic_add_nob(&sys_misc_mem_sz, -1*dp->outBufSize); DRV_BUF_FREE(dp->outbuf); dp->outBufSize = 0; dp->outbuf = NULL; @@ -2734,7 +2753,6 @@ sys_init_io(void) max_files = 2*erts_ptab_max(&erts_port); } -#ifdef ERTS_SMP void erts_sys_main_thread(void) { @@ -2747,7 +2765,6 @@ erts_sys_main_thread(void) WaitForSingleObject(dummy, INFINITE); } } -#endif void erts_sys_alloc_init(void) { @@ -2834,7 +2851,7 @@ Preload* sys_preloaded(void) (num_preloaded+1)*sizeof(Preload)); res_name = erts_alloc(ERTS_ALC_T_PRELOADED, (num_preloaded+1)*sizeof(unsigned)); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, + erts_atomic_add_nob(&sys_misc_mem_sz, (num_preloaded+1)*sizeof(Preload) + (num_preloaded+1)*sizeof(unsigned)); for (i = 0; i < num_preloaded; i++) { @@ -2847,7 +2864,7 @@ Preload* sys_preloaded(void) n = GETWORD(data); data += 2; preloaded[i].name = erts_alloc(ERTS_ALC_T_PRELOADED, n+1); - erts_smp_atomic_add_nob(&sys_misc_mem_sz, n+1); + erts_atomic_add_nob(&sys_misc_mem_sz, n+1); sys_memcpy(preloaded[i].name, data, n); preloaded[i].name[n] = '\0'; data += n; @@ -2929,11 +2946,7 @@ sys_get_key(int fd) char* win32_errorstr(int error) { -#ifdef SMP - LPTSTR lpBufPtr = erts_smp_tsd_get(win32_errstr_key); -#else - static LPTSTR lpBufPtr = NULL; -#endif + LPTSTR lpBufPtr = erts_tsd_get(win32_errstr_key); if (lpBufPtr) { LocalFree(lpBufPtr); } @@ -2947,9 +2960,7 @@ char* win32_errorstr(int error) 0, NULL); SetLastError(error); -#ifdef ERTS_SMP - erts_smp_tsd_set(win32_errstr_key,lpBufPtr); -#endif + erts_tsd_set(win32_errstr_key,lpBufPtr); return lpBufPtr; } @@ -3122,7 +3133,6 @@ check_supported_os_version(void) #endif } -#ifdef USE_THREADS typedef struct { int sched_bind_data; @@ -3167,37 +3177,39 @@ thr_create_prepare_child(void *vtcdp) erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); } -#endif /* USE_THREADS */ void erts_sys_pre_init(void) { -#ifdef USE_THREADS erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER; -#endif int_os_version.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); GetVersionEx(&int_os_version); check_supported_os_version(); -#ifdef USE_THREADS eid.thread_create_child_func = thr_create_prepare_child; /* Before creation in parent */ eid.thread_create_prepare_func = thr_create_prepare; /* After creation in parent */ eid.thread_create_parent_func = thr_create_cleanup; - erts_thr_init(&eid); +#ifdef ERTS_ENABLE_LOCK_COUNT + erts_lcnt_pre_thr_init(); #endif - erts_init_sys_time_sup(); + erts_thr_init(&eid); -#ifdef USE_THREADS #ifdef ERTS_ENABLE_LOCK_COUNT - erts_lcnt_init(); + erts_lcnt_post_thr_init(); #endif + +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_init(); #endif - erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0); + + erts_init_sys_time_sup(); + + erts_atomic_init_nob(&sys_misc_mem_sz, 0); } void noinherit_std_handle(DWORD type) @@ -3217,11 +3229,9 @@ void erl_sys_init(void) noinherit_std_handle(STD_INPUT_HANDLE); noinherit_std_handle(STD_ERROR_HANDLE); -#ifdef ERTS_SMP - erts_smp_tsd_key_create(&win32_errstr_key,"win32_errstr_key"); + erts_tsd_key_create(&win32_errstr_key,"win32_errstr_key"); InitializeCriticalSection(&htbc_lock); -#endif - erts_smp_atomic_init_nob(&pipe_creation_counter,0); + erts_atomic_init_nob(&pipe_creation_counter,0); /* * Test if we have named pipes or not. */ @@ -3264,42 +3274,16 @@ void erl_sys_init(void) SetStdHandle(STD_ERROR_HANDLE, GetStdHandle(STD_OUTPUT_HANDLE)); } erts_sys_init_float(); - erts_init_check_io(); /* Suppress windows error message popups */ SetErrorMode(SetErrorMode(0) | SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); } +void erts_poll_late_init(void); void erl_sys_late_init(void) { /* do nothing */ + erts_poll_late_init(); } - -void -erts_sys_schedule_interrupt(int set) -{ - erts_check_io_interrupt(set); -} - -#ifdef ERTS_SMP -void -erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time) -{ - erts_check_io_interrupt_timed(set, timeout_time); -} -#endif - -/* - * Called from schedule() when it runs out of runnable processes, - * or when Erlang code has performed INPUT_REDUCTIONS reduction - * steps. runnable == 0 iff there are no runnable Erlang processes. - */ -void -erl_sys_schedule(int runnable) -{ - erts_check_io(!runnable); - ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); -} - diff --git a/erts/emulator/sys/win32/sys_env.c b/erts/emulator/sys/win32/sys_env.c index 21ef71ad9a..c78161b344 100644 --- a/erts/emulator/sys/win32/sys_env.c +++ b/erts/emulator/sys/win32/sys_env.c @@ -1,318 +1,212 @@ -/*
- * %CopyrightBegin%
- *
- * Copyright Ericsson AB 2002-2016. All Rights Reserved.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- *
- * %CopyrightEnd%
- */
-
-#ifdef HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#include "sys.h"
-#include "erl_sys_driver.h"
-#include "erl_alloc.h"
-
-static WCHAR *merge_environment(WCHAR *current, WCHAR *add);
-static WCHAR *arg_to_env(WCHAR **arg);
-static WCHAR **env_to_arg(WCHAR *env);
-static WCHAR **find_arg(WCHAR **arg, WCHAR *str);
-static int compare(const void *a, const void *b);
-
-static erts_smp_rwmtx_t environ_rwmtx;
-
-void
-erts_sys_env_init(void)
-{
- erts_smp_rwmtx_init(&environ_rwmtx, "environ");
-}
-
-int
-erts_sys_putenv_raw(char *key, char *value)
-{
- int res;
- erts_smp_rwmtx_rwlock(&environ_rwmtx);
- res = (SetEnvironmentVariable((LPCTSTR) key,
- (LPCTSTR) value) ? 0 : 1);
- erts_smp_rwmtx_rwunlock(&environ_rwmtx);
- return res;
-}
-
-int
-erts_sys_putenv(char *key, char *value)
-{
- int res;
- WCHAR *wkey = (WCHAR *) key;
- WCHAR *wvalue = (WCHAR *) value;
- erts_smp_rwmtx_rwlock(&environ_rwmtx);
- res = (SetEnvironmentVariableW(wkey,
- wvalue) ? 0 : 1);
- erts_smp_rwmtx_rwunlock(&environ_rwmtx);
- return res;
-}
-
-int
-erts_sys_getenv(char *key, char *value, size_t *size)
-{
- size_t req_size = 0;
- int res = 0;
- DWORD new_size;
- WCHAR *wkey = (WCHAR *) key;
- WCHAR *wvalue = (WCHAR *) value;
- DWORD wsize = *size / (sizeof(WCHAR) / sizeof(char));
-
- SetLastError(0);
- erts_smp_rwmtx_rlock(&environ_rwmtx);
- new_size = GetEnvironmentVariableW(wkey,
- wvalue,
- (DWORD) wsize);
- res = !new_size && GetLastError() == ERROR_ENVVAR_NOT_FOUND ? -1 : 0;
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- if (res < 0)
- return res;
- res = new_size > wsize ? 1 : 0;
- *size = new_size * (sizeof(WCHAR) / sizeof(char));
- return res;
-}
-int
-erts_sys_getenv__(char *key, char *value, size_t *size)
-{
- size_t req_size = 0;
- int res = 0;
- DWORD new_size;
-
- SetLastError(0);
- new_size = GetEnvironmentVariable((LPCTSTR) key,
- (LPTSTR) value,
- (DWORD) *size);
- res = !new_size && GetLastError() == ERROR_ENVVAR_NOT_FOUND ? -1 : 0;
- if (res < 0)
- return res;
- res = new_size > *size ? 1 : 0;
- *size = new_size;
- return res;
-}
-
-int
-erts_sys_getenv_raw(char *key, char *value, size_t *size)
-{
- int res;
- erts_smp_rwmtx_rlock(&environ_rwmtx);
- res = erts_sys_getenv__(key, value, size);
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- return res;
-}
-
-void init_getenv_state(GETENV_STATE *state)
-{
- erts_smp_rwmtx_rlock(&environ_rwmtx);
- state->environment_strings = GetEnvironmentStringsW();
- state->next_string = state->environment_strings;
-}
-
-char *getenv_string(GETENV_STATE *state)
-{
- ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx));
- if (state->next_string[0] == L'\0') {
- return NULL;
- } else {
- WCHAR *res = state->next_string;
- state->next_string += wcslen(res) + 1;
- return (char *) res;
- }
-}
-
-void fini_getenv_state(GETENV_STATE *state)
-{
- FreeEnvironmentStringsW(state->environment_strings);
- state->environment_strings = state->next_string = NULL;
- erts_smp_rwmtx_runlock(&environ_rwmtx);
-}
-
-int erts_sys_unsetenv(char *key)
-{
- int res = 0;
- WCHAR *wkey = (WCHAR *) key;
-
- SetLastError(0);
- erts_smp_rwmtx_rlock(&environ_rwmtx);
- GetEnvironmentVariableW(wkey,
- NULL,
- 0);
- if (GetLastError() != ERROR_ENVVAR_NOT_FOUND) {
- res = (SetEnvironmentVariableW(wkey,
- NULL) ? 0 : 1);
- }
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- return res;
-}
-
-char*
-win_build_environment(char* new_env)
-{
- if (new_env == NULL) {
- return NULL;
- } else {
- WCHAR *tmp, *merged, *tmp_new;
-
- tmp_new = (WCHAR *) new_env;
-
- erts_smp_rwmtx_rlock(&environ_rwmtx);
- tmp = GetEnvironmentStringsW();
- merged = merge_environment(tmp, tmp_new);
-
- FreeEnvironmentStringsW(tmp);
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- return (char *) merged;
- }
-}
-
-static WCHAR *
-merge_environment(WCHAR *old, WCHAR *add)
-{
- WCHAR **a_arg = env_to_arg(add);
- WCHAR **c_arg = env_to_arg(old);
- WCHAR *ret;
- int i, j;
-
- for(i = 0; c_arg[i] != NULL; ++i)
- ;
-
- for(j = 0; a_arg[j] != NULL; ++j)
- ;
-
- c_arg = erts_realloc(ERTS_ALC_T_TMP,
- c_arg, (i+j+1) * sizeof(WCHAR *));
-
- for(j = 0; a_arg[j] != NULL; ++j){
- WCHAR **tmp;
- WCHAR *current = a_arg[j];
- WCHAR *eq_p = wcschr(current,L'=');
- int unset = (eq_p!=NULL && eq_p[1]==L'\0');
-
- if ((tmp = find_arg(c_arg, current)) != NULL) {
- if (!unset) {
- *tmp = current;
- } else {
- *tmp = c_arg[--i];
- c_arg[i] = NULL;
- }
- } else if (!unset) {
- c_arg[i++] = current;
- c_arg[i] = NULL;
- }
- }
- ret = arg_to_env(c_arg);
- erts_free(ERTS_ALC_T_TMP, c_arg);
- erts_free(ERTS_ALC_T_TMP, a_arg);
- return ret;
-}
-
-static WCHAR**
-find_arg(WCHAR **arg, WCHAR *str)
-{
- WCHAR *tmp;
- int len;
-
- if ((tmp = wcschr(str, L'=')) != NULL) {
- tmp++;
- len = tmp - str;
- while (*arg != NULL){
- if (_wcsnicmp(*arg, str, len) == 0){
- return arg;
- }
- ++arg;
- }
- }
- return NULL;
-}
-
-static int
-compare(const void *a, const void *b)
-{
- WCHAR *s1 = *((WCHAR **) a);
- WCHAR *s2 = *((WCHAR **) b);
- WCHAR *e1 = wcschr(s1,L'=');
- WCHAR *e2 = wcschr(s2,L'=');
- int ret;
- int len;
-
- if(!e1)
- e1 = s1 + wcslen(s1);
- if(!e2)
- e2 = s2 + wcslen(s2);
-
- if((e1 - s1) > (e2 - s2))
- len = (e2 - s2);
- else
- len = (e1 - s1);
-
- ret = _wcsnicmp(s1,s2,len);
- if (ret == 0)
- return ((e1 - s1) - (e2 - s2));
- else
- return ret;
-}
-
-static WCHAR**
-env_to_arg(WCHAR *env)
-{
- WCHAR **ret;
- WCHAR *tmp;
- int i;
- int num_strings = 0;
-
- for(tmp = env; *tmp != '\0'; tmp += wcslen(tmp)+1) {
- ++num_strings;
- }
- ret = erts_alloc(ERTS_ALC_T_TMP, sizeof(WCHAR *) * (num_strings + 1));
- i = 0;
- for(tmp = env; *tmp != '\0'; tmp += wcslen(tmp)+1){
- ret[i++] = tmp;
- }
- ret[i] = NULL;
- return ret;
-}
-
-static WCHAR *
-arg_to_env(WCHAR **arg)
-{
- WCHAR *block;
- WCHAR *ptr;
- int i;
- int totlen = 1; /* extra '\0' */
-
- for(i = 0; arg[i] != NULL; ++i) {
- totlen += wcslen(arg[i])+1;
- }
-
- /* sort the environment vector */
- qsort(arg, i, sizeof(WCHAR *), &compare);
-
- if (totlen == 1){
- block = erts_alloc(ERTS_ALC_T_ENVIRONMENT, 2 * sizeof(WCHAR));
- block[0] = block[1] = '\0';
- } else {
- block = erts_alloc(ERTS_ALC_T_ENVIRONMENT, totlen * sizeof(WCHAR));
- ptr = block;
- for(i=0; arg[i] != NULL; ++i){
- wcscpy(ptr, arg[i]);
- ptr += wcslen(ptr)+1;
- }
- *ptr = '\0';
- }
- return block;
-}
+/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2017. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_sys_driver.h" +#include "erl_alloc.h" + +static erts_osenv_t sysenv_global_env; +static erts_rwmtx_t sysenv_rwmtx; + +static void import_initial_env(void); + +void erts_sys_env_init() { + erts_rwmtx_init(&sysenv_rwmtx, "environ", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); + + erts_osenv_init(&sysenv_global_env); + import_initial_env(); +} + +const erts_osenv_t *erts_sys_rlock_global_osenv() { + erts_rwmtx_rlock(&sysenv_rwmtx); + return &sysenv_global_env; +} + +erts_osenv_t *erts_sys_rwlock_global_osenv() { + erts_rwmtx_rwlock(&sysenv_rwmtx); + return &sysenv_global_env; +} + +void erts_sys_runlock_global_osenv() { + erts_rwmtx_runlock(&sysenv_rwmtx); +} + +void erts_sys_rwunlock_global_osenv() { + erts_rwmtx_rwunlock(&sysenv_rwmtx); +} + +int erts_sys_explicit_host_getenv(char *key, char *value, size_t *size) { + size_t new_size = GetEnvironmentVariableA(key, value, (DWORD)*size); + + if(new_size == 0 && GetLastError() == ERROR_ENVVAR_NOT_FOUND) { + return 0; + } else if(new_size > *size) { + return -1; + } + + *size = new_size; + return 1; +} + +int erts_sys_explicit_8bit_putenv(char *key, char *value) { + WCHAR *wide_key, *wide_value; + int key_length, value_length; + int result; + + /* Note that we do *NOT* honor the filename encoding flags (+fnu/+fnl) + * here; the previous implementation used SetEnvironmentVariableA and + * things may break if we step away from that. */ + + key_length = MultiByteToWideChar(CP_ACP, 0, key, -1, NULL, 0); + value_length = MultiByteToWideChar(CP_ACP, 0, value, -1, NULL, 0); + + /* Report "not found" if either string isn't convertible. */ + if(key_length == 0 || value_length == 0) { + return 0; + } + + wide_key = erts_alloc(ERTS_ALC_T_TMP, key_length * sizeof(WCHAR)); + wide_value = erts_alloc(ERTS_ALC_T_TMP, value_length * sizeof(WCHAR)); + + MultiByteToWideChar(CP_ACP, 0, key, -1, wide_key, key_length); + MultiByteToWideChar(CP_ACP, 0, value, -1, wide_value, value_length); + + { + erts_osenv_data_t env_key, env_value; + erts_osenv_t *env; + + env = erts_sys_rwlock_global_osenv(); + + /* -1 to exclude the NUL terminator. */ + env_key.length = (key_length - 1) * sizeof(WCHAR); + env_key.data = wide_key; + + env_value.length = (value_length - 1) * sizeof(WCHAR); + env_value.data = wide_value; + + result = erts_osenv_put_native(env, &env_key, &env_value); + erts_sys_rwunlock_global_osenv(); + } + + erts_free(ERTS_ALC_T_TMP, wide_key); + erts_free(ERTS_ALC_T_TMP, wide_value); + + return result; +} + +int erts_sys_explicit_8bit_getenv(char *key, char *value, size_t *size) { + erts_osenv_data_t env_key, env_value; + int key_length, value_length, result; + WCHAR *wide_key, *wide_value; + + key_length = MultiByteToWideChar(CP_ACP, 0, key, -1, NULL, 0); + + /* Report "not found" if the string isn't convertible. */ + if(key_length == 0) { + return 0; + } + + wide_key = erts_alloc(ERTS_ALC_T_TMP, key_length * sizeof(WCHAR)); + MultiByteToWideChar(CP_ACP, 0, key, -1, wide_key, key_length); + + /* We assume that the worst possible size is twice the output buffer width, + * as we could theoretically be on a code page that requires surrogates. */ + value_length = (*size) * 2; + wide_value = erts_alloc(ERTS_ALC_T_TMP, value_length * sizeof(WCHAR)); + + { + const erts_osenv_t *env = erts_sys_rlock_global_osenv(); + + /* -1 to exclude the NUL terminator. */ + env_key.length = (key_length - 1) * sizeof(WCHAR); + env_key.data = wide_key; + + env_value.length = value_length * sizeof(WCHAR); + env_value.data = wide_value; + + result = erts_osenv_get_native(env, &env_key, &env_value); + erts_sys_runlock_global_osenv(); + } + + if(result == 1 && env_value.length > 0) { + /* This function doesn't NUL-terminate if the provided size is >= 0, + * so we pass (*size - 1) to reserve space for it and then do it + * manually. */ + *size = WideCharToMultiByte(CP_ACP, 0, env_value.data, + env_value.length / sizeof(WCHAR), value, *size - 1, NULL, NULL); + + if(*size == 0) { + if(GetLastError() == ERROR_INSUFFICIENT_BUFFER) { + result = -1; + } else { + result = 0; + } + } + } else { + *size = 0; + } + + if(*size > 0) { + value[*size] = '\0'; + } + + erts_free(ERTS_ALC_T_TMP, wide_key); + erts_free(ERTS_ALC_T_TMP, wide_value); + + return result; +} + +static void import_initial_env(void) { + WCHAR *environment_block, *current_variable; + + environment_block = GetEnvironmentStringsW(); + current_variable = environment_block; + + while(wcslen(current_variable) > 0) { + WCHAR *separator_index = wcschr(current_variable, L'='); + + /* We tolerate environment variables starting with '=' as the per-drive + * working directories are stored this way. */ + if(separator_index == current_variable) { + separator_index = wcschr(separator_index + 1, L'='); + } + + if(separator_index != NULL && separator_index != current_variable) { + erts_osenv_data_t env_key, env_value; + + env_key.length = (separator_index - current_variable) * sizeof(WCHAR); + env_key.data = current_variable; + + env_value.length = (wcslen(separator_index) - 1) * sizeof(WCHAR); + env_value.data = separator_index + 1; + + erts_osenv_put_native(&sysenv_global_env, &env_key, &env_value); + } + + current_variable += wcslen(current_variable) + 1; + } + + FreeEnvironmentStringsW(environment_block); +} diff --git a/erts/emulator/sys/win32/sys_interrupt.c b/erts/emulator/sys/win32/sys_interrupt.c index df838960eb..cee269eed4 100644 --- a/erts/emulator/sys/win32/sys_interrupt.c +++ b/erts/emulator/sys/win32/sys_interrupt.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. + * Copyright Ericsson AB 1997-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,17 +35,11 @@ # define WIN_SYS_INLINE __forceinline #endif -#ifdef ERTS_SMP -erts_smp_atomic32_t erts_break_requested; +erts_atomic32_t erts_break_requested; #define ERTS_SET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) #define ERTS_UNSET_BREAK_REQUESTED \ - erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) -#else -volatile int erts_break_requested = 0; -#define ERTS_SET_BREAK_REQUESTED (erts_break_requested = 1) -#define ERTS_UNSET_BREAK_REQUESTED (erts_break_requested = 0) -#endif + erts_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) extern int nohup; HANDLE erts_sys_break_event = NULL; @@ -57,14 +51,14 @@ void erts_do_break_handling(void) * therefore, make sure that all threads but this one are blocked before * proceeding! */ - erts_smp_thr_progress_block(); + erts_thr_progress_block(); /* call the break handling function, reset the flag */ do_break(); ResetEvent(erts_sys_break_event); ERTS_UNSET_BREAK_REQUESTED; - erts_smp_thr_progress_unblock(); + erts_thr_progress_unblock(); } diff --git a/erts/emulator/sys/win32/sys_time.c b/erts/emulator/sys/win32/sys_time.c index e8c67b3928..a1dd14f871 100644 --- a/erts/emulator/sys/win32/sys_time.c +++ b/erts/emulator/sys/win32/sys_time.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2016. All Rights Reserved. + * Copyright Ericsson AB 1997-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -95,7 +95,7 @@ struct sys_time_internal_state_read_mostly__ { }; struct sys_time_internal_state_write_freq__ { - erts_smp_mtx_t mtime_mtx; + erts_mtx_t mtime_mtx; ULONGLONG wrap; ULONGLONG last_tick_count; }; @@ -187,8 +187,6 @@ os_monotonic_time_gtc32(void) { ErtsMonotonicTime mtime; Uint32 ticks = (Uint32) GetTickCount(); - ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, - ticks); mtime = ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, ticks); mtime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT; @@ -205,8 +203,6 @@ os_times_gtc32(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep) ticks = (Uint32) GetTickCount(); GetSystemTime(&st); - ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, - ticks); mtime = ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, ticks); mtime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT; @@ -265,8 +261,6 @@ sys_hrtime_gtc32(void) { ErtsSysHrTime time; Uint32 ticks = (Uint32) GetTickCount(); - ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, - tick_count); time = (ErtsSysHrTime) ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd, ticks); time *= (ErtsSysHrTime) (1000 * 1000); @@ -300,8 +294,8 @@ sys_init_time(ErtsSysInitTimeResult *init_resp) module = GetModuleHandle(kernel_dll_name); if (!module) { get_tick_count: - erts_smp_mtx_init(&internal_state.w.f.mtime_mtx, - "os_monotonic_time"); + erts_mtx_init(&internal_state.w.f.mtime_mtx, "os_monotonic_time", NIL, + ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC); internal_state.w.f.wrap = 0; internal_state.w.f.last_tick_count = 0; |