aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/sys/common
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/sys/common')
-rw-r--r--erts/emulator/sys/common/erl_check_io.c3030
-rw-r--r--erts/emulator/sys/common/erl_check_io.h182
-rw-r--r--erts/emulator/sys/common/erl_mmap.c762
-rw-r--r--erts/emulator/sys/common/erl_mmap.h165
-rw-r--r--erts/emulator/sys/common/erl_mseg.c566
-rw-r--r--erts/emulator/sys/common/erl_mseg.h46
-rw-r--r--erts/emulator/sys/common/erl_mtrace_sys_wrap.c23
-rw-r--r--erts/emulator/sys/common/erl_os_monotonic_time_extender.c79
-rw-r--r--erts/emulator/sys/common/erl_os_monotonic_time_extender.h46
-rw-r--r--erts/emulator/sys/common/erl_poll.c2668
-rw-r--r--erts/emulator/sys/common/erl_poll.h316
-rw-r--r--erts/emulator/sys/common/erl_poll_api.h122
-rw-r--r--erts/emulator/sys/common/erl_sys_common_misc.c23
-rw-r--r--erts/emulator/sys/common/erl_util_queue.h21
14 files changed, 4310 insertions, 3739 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 1db673e7f3..f93d4c1557 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2017. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -28,7 +29,6 @@
#endif
#define ERL_CHECK_IO_C__
-#define ERTS_WANT_BREAK_HANDLING
#ifndef WANT_NONBLOCKING
# define WANT_NONBLOCKING
#endif
@@ -37,339 +37,460 @@
#include "erl_port.h"
#include "erl_check_io.h"
#include "erl_thr_progress.h"
+#include "erl_bif_unique.h"
#include "dtrace-wrapper.h"
-
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+#include "lttng-wrapper.h"
+#define ERTS_WANT_TIMER_WHEEL_API
+#include "erl_time.h"
+
+#if 0
+#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__)
+#define DEBUG_PRINT_FD(FMT, STATE, ...) \
+ DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \
+ (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
+ ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \
+ ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
+ (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR)
+#define DEBUG_PRINT_MODE
#else
-# include "safe_hash.h"
-# define DRV_EV_STATE_HTAB_SIZE 1024
+#define DEBUG_PRINT(...)
#endif
-typedef char EventStateType;
-#define ERTS_EV_TYPE_NONE ((EventStateType) 0)
-#define ERTS_EV_TYPE_DRV_SEL ((EventStateType) 1) /* driver_select */
-#define ERTS_EV_TYPE_DRV_EV ((EventStateType) 2) /* driver_event */
-#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */
-
-typedef char EventStateFlags;
-#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+#ifndef DEBUG_PRINT_FD
+#define DEBUG_PRINT_FD(...)
+#endif
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+# include "safe_hash.h"
+# define DRV_EV_STATE_HTAB_SIZE 1024
+#endif
-#if defined(ERTS_KERNEL_POLL_VERSION)
-# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp
-#elif defined(ERTS_NO_KERNEL_POLL_VERSION)
-# define ERTS_CIO_EXPORT(FUNC) FUNC ## _nkp
+typedef enum {
+ ERTS_EV_TYPE_NONE = 0,
+ ERTS_EV_TYPE_DRV_SEL = 1, /* driver_select */
+ ERTS_EV_TYPE_STOP_USE = 2, /* pending stop_select */
+ ERTS_EV_TYPE_NIF = 3, /* enif_select */
+ ERTS_EV_TYPE_STOP_NIF = 4 /* pending nif stop */
+} EventStateType;
+
+typedef enum {
+ ERTS_EV_FLAG_CLEAR = 0,
+ ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */
+#ifdef ERTS_ENABLE_KERNEL_POLL
+ ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd
+ and it was put in the nkp version */
#else
-# define ERTS_CIO_EXPORT(FUNC) FUNC
+ ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR,
#endif
-#define ERTS_CIO_HAVE_DRV_EVENT \
- (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL)
+ /* Combinations */
+ ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK
+} EventStateFlags;
-#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control)
-#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait)
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-#define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)
-#endif
-#define ERTS_CIO_POLL_INTR ERTS_POLL_EXPORT(erts_poll_interrupt)
-#define ERTS_CIO_POLL_INTR_TMD ERTS_POLL_EXPORT(erts_poll_interrupt_timed)
-#define ERTS_CIO_NEW_POLLSET ERTS_POLL_EXPORT(erts_poll_create_pollset)
-#define ERTS_CIO_FREE_POLLSET ERTS_POLL_EXPORT(erts_poll_destroy_pollset)
-#define ERTS_CIO_POLL_MAX_FDS ERTS_POLL_EXPORT(erts_poll_max_fds)
-#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init)
-#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info)
+#define flag2str(flags) \
+ ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \
+ ((flags) == ERTS_EV_FLAG_USED ? "USED" : \
+ ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \
+ ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR"))))
-#define GET_FD(fd) fd
+/* How many events that can be handled at once by one erts_poll_wait call */
+#define ERTS_CHECK_IO_POLL_RES_LEN 512
-static struct pollset_info
+/* Each I/O Poll Thread has one ErtsPollThread each. The ps field
+ can point to either a private ErtsPollSet or a shared one.
+ At the moment only kqueue and epoll pollsets can be
+ shared across threads.
+*/
+typedef struct erts_poll_thread
{
- ErtsPollSet ps;
- erts_smp_atomic_t in_poll_wait; /* set while doing poll */
-#ifdef ERTS_SMP
- struct removed_fd* removed_list; /* list of deselected fd's*/
- erts_smp_spinlock_t removed_list_lock;
+ ErtsPollSet *ps;
+ ErtsPollResFd *pollres;
+ int pollres_len;
+} ErtsPollThread;
+
+/* pollsetv contains pointers to the ErtsPollSets that are in use.
+ * Which pollset to use is determined by hashing the fd.
+ */
+static ErtsPollSet **pollsetv;
+#if ERTS_POLL_USE_FALLBACK
+static ErtsPollSet *flbk_pollset;
#endif
-}pollset;
-#define NUM_OF_POLLSETS 1
+static ErtsPollThread *psiv;
typedef struct {
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
SafeHashBucket hb;
#endif
ErtsSysFdType fd;
- union {
- ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+ struct {
ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */
- erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */
+ ErtsNifSelectDataState *nif; /* ERTS_EV_TYPE_NIF */
+ union {
+ erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */
+ ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */
+ } stop;
} driver;
- ErtsPollEvents events;
- unsigned short remove_cnt; /* number of removed_fd's referring to this fd */
+ ErtsPollEvents events; /* The events that have been selected upon */
+ ErtsPollEvents active_events; /* The events currently active in the pollset */
EventStateType type;
EventStateFlags flags;
} ErtsDrvEventState;
-#ifdef ERTS_SMP
-struct removed_fd {
- struct removed_fd *next;
+struct drv_ev_state_shared {
+
+ union {
+ erts_mtx_t lck;
+ byte _cache_line_alignment[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_mtx_t))];
+ } locks[ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT];
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- ErtsSysFdType fd;
+ int max_fds;
+ erts_atomic_t len;
+ ErtsDrvEventState *v;
+ erts_mtx_t grow_lock; /* prevent lock-hogging of racing growers */
#else
- ErtsDrvEventState* state;
- #ifdef DEBUG
- ErtsSysFdType fd;
- #endif
+ SafeHash tab;
+ int num_prealloc;
+ ErtsDrvEventState *prealloc_first;
+ erts_spinlock_t prealloc_lock;
#endif
-
};
-#endif
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static int max_fds = -1;
-#endif
-#define DRV_EV_STATE_LOCK_CNT 16
-static union {
- erts_smp_mtx_t lck;
- byte _cache_line_alignment[64];
-}drv_ev_state_locks[DRV_EV_STATE_LOCK_CNT];
+int ERTS_WRITE_UNLIKELY(erts_no_pollsets) = 1;
+int ERTS_WRITE_UNLIKELY(erts_no_poll_threads) = 1;
+struct drv_ev_state_shared drv_ev_state;
-#ifdef ERTS_SMP
-static ERTS_INLINE erts_smp_mtx_t* fd_mtx(ErtsSysFdType fd)
-{
+static ERTS_INLINE int fd_hash(ErtsSysFdType fd) {
int hash = (int)fd;
# ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
hash ^= (hash >> 9);
# endif
- return &drv_ev_state_locks[hash % DRV_EV_STATE_LOCK_CNT].lck;
+ return hash;
+}
+
+static ERTS_INLINE erts_mtx_t* fd_mtx(ErtsSysFdType fd)
+{
+ return &drv_ev_state.locks[fd_hash(fd) % ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT].lck;
}
-#else
-# define fd_mtx(fd) NULL
-#endif
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static erts_smp_atomic_t drv_ev_state_len;
-static ErtsDrvEventState *drv_ev_state;
-static erts_smp_mtx_t drv_ev_state_grow_lock; /* prevent lock-hogging of racing growers */
+static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd)
+{
+ return &drv_ev_state.v[(int) fd];
+}
-#else
-static SafeHash drv_ev_state_tab;
-static int num_state_prealloc;
-static ErtsDrvEventState *state_prealloc_first;
-erts_smp_spinlock_t state_prealloc_lock;
+#define new_drv_ev_state(State, fd) (State)
+#define erase_drv_ev_state(State)
+
+static ERTS_INLINE int grow_drv_ev_state(ErtsSysFdType fd) {
+ int i;
+ int old_len;
+ int new_len;
+
+ if ((unsigned)fd >= (unsigned)erts_atomic_read_nob(&drv_ev_state.len)) {
+
+ if (fd < 0 || fd >= drv_ev_state.max_fds)
+ return 0;
+
+ erts_mtx_lock(&drv_ev_state.grow_lock);
+ old_len = erts_atomic_read_nob(&drv_ev_state.len);
+ if (fd >= old_len) {
+ new_len = erts_poll_new_table_len(old_len, fd + 1);
+ if (new_len > drv_ev_state.max_fds)
+ new_len = drv_ev_state.max_fds;
+
+ for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */
+ erts_mtx_lock(&drv_ev_state.locks[i].lck);
+ }
+ drv_ev_state.v = (drv_ev_state.v
+ ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE,
+ drv_ev_state.v,
+ sizeof(ErtsDrvEventState)*new_len)
+ : erts_alloc(ERTS_ALC_T_DRV_EV_STATE,
+ sizeof(ErtsDrvEventState)*new_len));
+ ERTS_CT_ASSERT(ERTS_EV_TYPE_NONE == 0);
+ sys_memzero(drv_ev_state.v+old_len,
+ sizeof(ErtsDrvEventState) * (new_len - old_len));
+ for (i = old_len; i < new_len; i++) {
+ drv_ev_state.v[i].fd = (ErtsSysFdType) i;
+ }
+ erts_atomic_set_nob(&drv_ev_state.len, new_len);
+ for (i=0; i<ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) {
+ erts_mtx_unlock(&drv_ev_state.locks[i].lck);
+ }
+ }
+ /*else already grown by racing thread */
+
+ erts_mtx_unlock(&drv_ev_state.grow_lock);
+ }
+ return 1;
+}
-static ERTS_INLINE ErtsDrvEventState *hash_get_drv_ev_state(ErtsSysFdType fd)
+static int drv_ev_state_len(void)
+{
+ return erts_atomic_read_nob(&drv_ev_state.len);
+}
+
+#else /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */
+
+static ERTS_INLINE ErtsDrvEventState *get_drv_ev_state(ErtsSysFdType fd)
{
ErtsDrvEventState tmpl;
tmpl.fd = fd;
- return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state_tab, (void *) &tmpl);
+ return (ErtsDrvEventState *) safe_hash_get(&drv_ev_state.tab, (void *) &tmpl);
}
-static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd)
+static ERTS_INLINE ErtsDrvEventState* new_drv_ev_state(ErtsDrvEventState *state,
+ ErtsSysFdType fd)
{
ErtsDrvEventState tmpl;
+
+ if (state)
+ return state;
+
tmpl.fd = fd;
tmpl.driver.select = NULL;
+ tmpl.driver.nif = NULL;
+ tmpl.driver.stop.drv_ptr = NULL;
tmpl.events = 0;
- tmpl.remove_cnt = 0;
+ tmpl.active_events = 0;
tmpl.type = ERTS_EV_TYPE_NONE;
tmpl.flags = 0;
- return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state_tab, (void *) &tmpl);
+
+ return (ErtsDrvEventState *) safe_hash_put(&drv_ev_state.tab, (void *) &tmpl);
+}
+
+static ERTS_INLINE void erase_drv_ev_state(ErtsDrvEventState *state)
+{
+ safe_hash_erase(&drv_ev_state.tab, (void *) state);
}
-static ERTS_INLINE void hash_erase_drv_ev_state(ErtsDrvEventState *state)
+static int drv_ev_state_len(void)
{
- ASSERT(state->remove_cnt == 0);
- safe_hash_erase(&drv_ev_state_tab, (void *) state);
+ return erts_atomic_read_nob(&drv_ev_state.tab.nitems);
}
#endif /* !ERTS_SYS_CONTINOUS_FD_NUMBERS */
static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode);
-static void select_steal(ErlDrvPort ix, ErtsDrvEventState *state,
- int mode, int on);
-static void print_select_op(erts_dsprintf_buf_t *dsbufp,
- ErlDrvPort ix, ErtsSysFdType fd, int mode, int on);
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static void select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int);
-#endif
-#if ERTS_CIO_HAVE_DRV_EVENT
-static void event_steal(ErlDrvPort ix, ErtsDrvEventState *state,
- ErlDrvEventData event_data);
-static void print_event_op(erts_dsprintf_buf_t *dsbufp,
- ErlDrvPort, ErtsSysFdType, ErlDrvEventData);
+static void drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state,
+ int mode, int on);
+static void nif_select_steal(ErtsDrvEventState *state, int mode,
+ ErtsResource* resource, Eterm ref);
+
+static void print_drv_select_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort ix, ErtsSysFdType fd, int mode, int on);
+static void print_nif_select_op(erts_dsprintf_buf_t*, ErtsSysFdType,
+ int mode, ErtsResource*, Eterm ref);
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static void event_large_fd_error(ErlDrvPort, ErtsSysFdType, ErlDrvEventData);
+static void drv_select_large_fd_error(ErlDrvPort, ErtsSysFdType, int, int);
+static void nif_select_large_fd_error(ErtsSysFdType, int, ErtsResource*,Eterm ref);
#endif
+static void
+steal_pending_stop_use(erts_dsprintf_buf_t*, ErlDrvPort, ErtsDrvEventState*,
+ int mode, int on);
+static void
+steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource*,
+ ErtsDrvEventState *state, int mode, int on);
+static ERTS_INLINE void
+check_fd_cleanup(ErtsDrvEventState *state,
+ ErtsDrvSelectDataState **free_select,
+ ErtsNifSelectDataState **free_nif);
+static ERTS_INLINE void iready(Eterm id, ErtsDrvEventState *state);
+static ERTS_INLINE void oready(Eterm id, ErtsDrvEventState *state);
+#ifdef DEBUG_PRINT_MODE
+static char *drvmode2str(int mode);
+static char *nifmode2str(enum ErlNifSelectFlags mode);
#endif
-static void steal_pending_stop_select(erts_dsprintf_buf_t*, ErlDrvPort,
- ErtsDrvEventState*, int mode, int on);
-#ifdef ERTS_SMP
-ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_FD_LIST)
-#endif
+static ERTS_INLINE void
+init_iotask(ErtsIoTask *io_task, ErtsSysFdType fd)
+{
+ erts_port_task_handle_init(&io_task->task);
+ io_task->fd = fd;
+}
+
+static ERTS_INLINE int
+is_iotask_active(ErtsIoTask *io_task)
+{
+ if (erts_port_task_is_scheduled(&io_task->task))
+ return 1;
+ return 0;
+}
+
+static ERTS_INLINE ErtsDrvSelectDataState *
+alloc_drv_select_data(ErtsSysFdType fd)
+{
+ ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
+ sizeof(ErtsDrvSelectDataState));
+ dsp->inport = NIL;
+ dsp->outport = NIL;
+ init_iotask(&dsp->iniotask, fd);
+ init_iotask(&dsp->outiotask, fd);
+ return dsp;
+}
+
+static ERTS_INLINE ErtsNifSelectDataState *
+alloc_nif_select_data(void)
+{
+ ErtsNifSelectDataState *dsp = erts_alloc(ERTS_ALC_T_NIF_SEL_D_STATE,
+ sizeof(ErtsNifSelectDataState));
+ dsp->in.pid = NIL;
+ dsp->out.pid = NIL;
+ return dsp;
+}
static ERTS_INLINE void
-remember_removed(ErtsDrvEventState *state, struct pollset_info* psi)
+free_drv_select_data(ErtsDrvSelectDataState *dsp)
{
-#ifdef ERTS_SMP
- struct removed_fd *fdlp;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
- if (erts_smp_atomic_read_nob(&psi->in_poll_wait)) {
- state->remove_cnt++;
- ASSERT(state->remove_cnt > 0);
- fdlp = removed_fd_alloc();
- #if defined(ERTS_SYS_CONTINOUS_FD_NUMBERS) || defined(DEBUG)
- fdlp->fd = state->fd;
- #endif
- #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- fdlp->state = state;
- #endif
- erts_smp_spin_lock(&psi->removed_list_lock);
- fdlp->next = psi->removed_list;
- psi->removed_list = fdlp;
- erts_smp_spin_unlock(&psi->removed_list_lock);
- }
-#endif
+ ASSERT(!erts_port_task_is_scheduled(&dsp->iniotask.task));
+ ASSERT(!erts_port_task_is_scheduled(&dsp->outiotask.task));
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp);
}
+static ERTS_INLINE void
+free_nif_select_data(ErtsNifSelectDataState *dsp)
+{
+ erts_free(ERTS_ALC_T_NIF_SEL_D_STATE, dsp);
+}
static ERTS_INLINE int
-is_removed(ErtsDrvEventState *state)
+get_pollset_id(ErtsSysFdType fd)
{
-#ifdef ERTS_SMP
- /* Note that there is a possible race here, where an fd is removed
- (increasing remove_cnt) and then added again just before erts_poll_wait
- is called by erts_check_io. Any polled event on the re-added fd will then
- be falsely ignored. But that does not matter, as the event will trigger
- again next time erl_check_io is called. */
- return state->remove_cnt > 0;
-#else
- return 0;
-#endif
+ return fd_hash(fd) % erts_no_pollsets;
}
-static void
-forget_removed(struct pollset_info* psi)
+static ERTS_INLINE ErtsPollSet *
+get_pollset(ErtsSysFdType fd)
{
-#ifdef ERTS_SMP
- struct removed_fd* fdlp;
- struct removed_fd* tofree;
+ return pollsetv[get_pollset_id(fd)];
+}
- /* Fast track: if (atomic_ptr(removed_list)==NULL) return; */
+#if ERTS_POLL_USE_FALLBACK
+static ERTS_INLINE ErtsPollSet *
+get_fallback(void)
+{
+ return flbk_pollset;
+}
+#endif
- erts_smp_spin_lock(&psi->removed_list_lock);
- fdlp = psi->removed_list;
- psi->removed_list = NULL;
- erts_smp_spin_unlock(&psi->removed_list_lock);
+/*
+ * Place a fd within a pollset. This will automatically use
+ * the fallback ps if needed.
+ */
+static ERTS_INLINE ErtsPollEvents
+erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
+ ErtsPollEvents pe, int *wake_poller)
+{
+ ErtsSysFdType fd = state->fd;
+ ErtsPollEvents res = 0;
+ EventStateFlags flags = state->flags;
- while (fdlp) {
- erts_driver_t* drv_ptr = NULL;
- erts_smp_mtx_t* mtx;
- ErtsSysFdType fd;
- ErtsDrvEventState *state;
+ ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- fd = fdlp->fd;
- mtx = fd_mtx(fd);
- erts_smp_mtx_lock(mtx);
- state = &drv_ev_state[(int) fd];
-#else
- state = fdlp->state;
- fd = state->fd;
- ASSERT(fd == fdlp->fd);
- mtx = fd_mtx(fd);
- erts_smp_mtx_lock(mtx);
-#endif
- ASSERT(state->remove_cnt > 0);
- if (--state->remove_cnt == 0) {
- switch (state->type) {
- case ERTS_EV_TYPE_STOP_USE:
- /* Now we can call stop_select */
- drv_ptr = state->driver.drv_ptr;
- ASSERT(drv_ptr);
- state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- state->driver.drv_ptr = NULL;
- /* Fall through */
- case ERTS_EV_TYPE_NONE:
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- hash_erase_drv_ev_state(state);
-#endif
- break;
- case ERTS_EV_TYPE_DRV_SEL:
- case ERTS_EV_TYPE_DRV_EV:
- break;
- default:
- ASSERT(0);
- }
- }
- erts_smp_mtx_unlock(mtx);
- if (drv_ptr) {
- int was_unmasked = erts_block_fpe();
- DTRACE1(driver_stop_select, drv_ptr->name);
- (*drv_ptr->stop_select) ((ErlDrvEvent) fd, NULL);
- erts_unblock_fpe(was_unmasked);
- if (drv_ptr->handle) {
- erts_ddll_dereference_driver(drv_ptr->handle);
- }
- }
- tofree = fdlp;
- fdlp = fdlp->next;
- removed_fd_free(tofree);
+ if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
+ res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+
+#if ERTS_POLL_USE_FALLBACK
+ if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
+ /* When an add fails with NVAL, the poll/kevent operation could not
+ put that fd in the pollset, so we instead put it into a fallback pollset */
+ state->flags |= ERTS_EV_FLAG_FALLBACK;
+ res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ }
+ } else {
+ ASSERT(op != ERTS_POLL_OP_ADD);
+ res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+#endif
}
-#endif /* ERTS_SMP */
+
+ return res;
}
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static void
-grow_drv_ev_state(int min_ix)
+static ERTS_INLINE ErtsPollEvents
+erts_io_control(ErtsDrvEventState *state, ErtsPollOp op, ErtsPollEvents pe)
{
- int i;
- int new_len;
+ int wake_poller = 0;
+ return erts_io_control_wakeup(state, op, pe, &wake_poller);
+}
+
+/* ToDo: Was inline in erl_check_io.h but now need struct erts_poll_thread */
+void
+erts_io_notify_port_task_executed(ErtsPortTaskType type,
+ ErtsPortTaskHandle *pthp,
+ void (*reset_handle)(ErtsPortTaskHandle *))
+{
+ ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
+ ErtsSysFdType fd = itp->fd;
+ erts_mtx_t *mtx = fd_mtx(fd);
+ int active_events;
+ ErtsDrvEventState *state;
+ ErtsDrvSelectDataState *free_select = NULL;
+ ErtsNifSelectDataState *free_nif = NULL;
- new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_ix + 1);
- if (new_len > max_fds)
- new_len = max_fds;
+ erts_mtx_lock(mtx);
+ state = get_drv_ev_state(fd);
- erts_smp_mtx_lock(&drv_ev_state_grow_lock);
- if (erts_smp_atomic_read_nob(&drv_ev_state_len) <= min_ix) {
- for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */
- erts_smp_mtx_lock(&drv_ev_state_locks[i].lck);
- }
- drv_ev_state = (drv_ev_state
- ? erts_realloc(ERTS_ALC_T_DRV_EV_STATE,
- drv_ev_state,
- sizeof(ErtsDrvEventState)*new_len)
- : erts_alloc(ERTS_ALC_T_DRV_EV_STATE,
- sizeof(ErtsDrvEventState)*new_len));
- for (i = erts_smp_atomic_read_nob(&drv_ev_state_len); i < new_len; i++) {
- drv_ev_state[i].fd = (ErtsSysFdType) i;
- drv_ev_state[i].driver.select = NULL;
- drv_ev_state[i].events = 0;
- drv_ev_state[i].remove_cnt = 0;
- drv_ev_state[i].type = ERTS_EV_TYPE_NONE;
- drv_ev_state[i].flags = 0;
- }
- erts_smp_atomic_set_nob(&drv_ev_state_len, new_len);
- for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) {
- erts_smp_mtx_unlock(&drv_ev_state_locks[i].lck);
- }
+ active_events = state->active_events;
+
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+
+ DEBUG_PRINT_FD("executed ready_input", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
+ if (state->events & ERTS_POLL_EV_IN)
+ active_events |= ERTS_POLL_EV_IN;
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
+
+ DEBUG_PRINT_FD("executed ready_output", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
+ if (state->events & ERTS_POLL_EV_OUT)
+ active_events |= ERTS_POLL_EV_OUT;
+ break;
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
+ break;
}
- /*else already grown by racing thread */
- erts_smp_mtx_unlock(&drv_ev_state_grow_lock);
-}
-#endif /* ERTS_SYS_CONTINOUS_FD_NUMBERS */
+ reset_handle(pthp);
+
+ if (active_events) {
+ /* This is not needed if active_events has not changed */
+ if (state->active_events != active_events) {
+ ErtsPollEvents new_events;
+ state->active_events = active_events;
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events);
+
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
+ }
+ } else {
+ check_fd_cleanup(state, &free_select, &free_nif);
+ }
+
+ erts_mtx_unlock(mtx);
+ if (free_select)
+ free_drv_select_data(free_select);
+ if (free_nif)
+ free_nif_select_data(free_nif);
+}
static ERTS_INLINE void
abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type)
{
- if (is_nil(id)) {
- ASSERT(type == ERTS_EV_TYPE_NONE
- || !erts_port_task_is_scheduled(pthp));
- }
- else if (erts_port_task_is_scheduled(pthp)) {
+ if (is_not_nil(id) && erts_port_task_is_scheduled(pthp)) {
erts_port_task_abort(pthp);
ASSERT(erts_is_port_alive(id));
}
@@ -381,13 +502,7 @@ abort_tasks(ErtsDrvEventState *state, int mode)
switch (mode) {
case 0: check_type:
switch (state->type) {
-#if ERTS_CIO_HAVE_DRV_EVENT
- case ERTS_EV_TYPE_DRV_EV:
- abort_task(state->driver.event->port,
- &state->driver.event->task,
- ERTS_EV_TYPE_DRV_EV);
- return;
-#endif
+ case ERTS_EV_TYPE_NIF:
case ERTS_EV_TYPE_NONE:
return;
default:
@@ -398,14 +513,14 @@ abort_tasks(ErtsDrvEventState *state, int mode)
case ERL_DRV_WRITE:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->outport,
- &state->driver.select->outtask,
+ &state->driver.select->outiotask.task,
state->type);
if (mode == ERL_DRV_WRITE)
break;
case ERL_DRV_READ:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->inport,
- &state->driver.select->intask,
+ &state->driver.select->iniotask.task,
state->type);
break;
default:
@@ -416,16 +531,14 @@ abort_tasks(ErtsDrvEventState *state, int mode)
static void
deselect(ErtsDrvEventState *state, int mode)
{
- int do_wake = 0;
ErtsPollEvents rm_events;
- ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
- ASSERT(state->events);
+ ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
abort_tasks(state, mode);
- if (!mode)
+ if (!mode) {
rm_events = state->events;
- else {
+ } else {
rm_events = 0;
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
if (mode & ERL_DRV_READ) {
@@ -438,401 +551,551 @@ deselect(ErtsDrvEventState *state, int mode)
}
}
- state->events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, rm_events, 0, &do_wake);
+ state->events &= ~rm_events;
+ state->active_events &= ~rm_events;
if (!(state->events)) {
+ erts_io_control(state, ERTS_POLL_OP_DEL, 0);
switch (state->type) {
+ case ERTS_EV_TYPE_NIF:
+ state->driver.nif->in.pid = NIL;
+ state->driver.nif->out.pid = NIL;
+ enif_release_resource(state->driver.stop.resource->data);
+ state->driver.stop.resource = NULL;
+ break;
case ERTS_EV_TYPE_DRV_SEL:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
- break;
-#if ERTS_CIO_HAVE_DRV_EVENT
- case ERTS_EV_TYPE_DRV_EV:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task));
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
+ state->driver.select->inport = NIL;
+ state->driver.select->outport = NIL;
break;
-#endif
case ERTS_EV_TYPE_NONE:
break;
default:
ASSERT(0);
break;
}
-
- state->driver.select = NULL;
state->type = ERTS_EV_TYPE_NONE;
state->flags = 0;
- remember_removed(state, &pollset);
+ } else {
+ ErtsPollEvents new_events =
+ erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (state->active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
}
}
-
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0)
+# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE)
#else
# define IS_FD_UNKNOWN(state) ((state) == NULL)
#endif
+static ERTS_INLINE void
+check_fd_cleanup(ErtsDrvEventState *state,
+ ErtsDrvSelectDataState **free_select,
+ ErtsNifSelectDataState **free_nif)
+{
+ ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
+ *free_select = NULL;
+ if (state->driver.select
+ && (state->type != ERTS_EV_TYPE_DRV_SEL)
+ && !is_iotask_active(&state->driver.select->iniotask)
+ && !is_iotask_active(&state->driver.select->outiotask)) {
+
+ *free_select = state->driver.select;
+ state->driver.select = NULL;
+ }
+
+ *free_nif = NULL;
+ if (state->driver.nif && (state->type != ERTS_EV_TYPE_NIF)) {
+ *free_nif = state->driver.nif;
+ state->driver.nif = NULL;
+ }
+
+ if (((state->type != ERTS_EV_TYPE_NONE)
+ | (state->driver.nif != NULL)
+ | (state->driver.select != NULL)) == 0) {
+
+ erase_drv_ev_state(state);
+ }
+}
+
+#ifdef __WIN32__
+# define MUST_DEFER(MAY_SLEEP) 1
+#else
+# define MUST_DEFER(MAY_SLEEP) (MAY_SLEEP)
+#endif
int
-ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
- ErlDrvEvent e,
- int mode,
- int on)
+driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
{
void (*stop_select_fn)(ErlDrvEvent, void*) = NULL;
Port *prt = erts_drvport2port(ix);
Eterm id = erts_drvport2id(ix);
ErtsSysFdType fd = (ErtsSysFdType) e;
ErtsPollEvents ctl_events = (ErtsPollEvents) 0;
- ErtsPollEvents new_events, old_events;
+ ErtsPollEvents old_events;
+ ErtsPollEvents new_events;
+ ErtsPollOp ctl_op = ERTS_POLL_OP_MOD;
ErtsDrvEventState *state;
- int wake_poller;
+ int wake_poller = 0;
int ret;
+ ErtsDrvSelectDataState *free_select = NULL;
+ ErtsNifSelectDataState *free_nif = NULL;
#ifdef USE_VM_PROBES
DTRACE_CHARBUF(name, 64);
#endif
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO);
- if (prt == ERTS_INVALID_ERL_DRV_PORT)
+ if (prt == ERTS_INVALID_ERL_DRV_PORT) {
+ ERTS_MSACC_POP_STATE();
return -1;
+ }
- ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt));
+ ERTS_LC_ASSERT(erts_lc_is_port_locked(prt));
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) {
- if (fd < 0) {
- return -1;
- }
- if (fd >= max_fds) {
- select_large_fd_error(ix, fd, mode, on);
- return -1;
- }
- grow_drv_ev_state(fd);
+ if (!grow_drv_ev_state(fd)) {
+ if (fd > 0) drv_select_large_fd_error(ix, fd, mode, on);
+ ERTS_MSACC_POP_STATE();
+ return -1;
}
#endif
- erts_smp_mtx_lock(fd_mtx(fd));
+ erts_mtx_lock(fd_mtx(fd));
+
+ state = get_drv_ev_state(fd); /* may be NULL! */
+
+ DEBUG_PRINT_FD("driver_select(%T, %p, %s, %d)",
+ state, id, fd, drvmode2str(mode), on);
+
+ if (!on) {
+ if (IS_FD_UNKNOWN(state)) {
+ if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ /* fast track to stop_select callback */
+ stop_select_fn = prt->drv_ptr->stop_select;
+ #ifdef USE_VM_PROBES
+ strncpy(name, prt->drv_ptr->name,
+ sizeof(DTRACE_CHARBUF_NAME(name))-1);
+ name[sizeof(name)-1] = '\0';
+ #endif
+ }
+ ret = 0;
+ goto done_unknown;
+ }
+ else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ mode |= (ERL_DRV_READ | ERL_DRV_WRITE);
+ }
+ }
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- state = &drv_ev_state[(int) fd];
-#else
- state = hash_get_drv_ev_state(fd); /* may be NULL! */
-#endif
+ state = new_drv_ev_state(state, fd);
- if (!on && (mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
- if (IS_FD_UNKNOWN(state)) {
- /* fast track to stop_select callback */
- stop_select_fn = prt->drv_ptr->stop_select;
-#ifdef USE_VM_PROBES
- strncpy(name, prt->drv_ptr->name,
- sizeof(DTRACE_CHARBUF_NAME(name))-1);
- name[sizeof(name)-1] = '\0';
-#endif
- ret = 0;
- goto done_unknown;
- }
- mode |= (ERL_DRV_READ | ERL_DRV_WRITE);
- wake_poller = 1; /* to eject fd from pollset (if needed) */
+ switch (state->type) {
+ case ERTS_EV_TYPE_NIF:
+ drv_select_steal(ix, state, mode, on);
+ break;
+ case ERTS_EV_TYPE_STOP_USE: {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_drv_select_op(dsbufp, ix, state->fd, mode, on);
+ steal_pending_stop_use(dsbufp, ix, state, mode, on);
+ if (state->type == ERTS_EV_TYPE_STOP_USE) {
+ ret = 0;
+ goto done; /* stop_select still pending */
+ }
+ ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ break;
}
- else wake_poller = 0;
+ case ERTS_EV_TYPE_STOP_NIF: {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_drv_select_op(dsbufp, ix, state->fd, mode, on);
+ steal_pending_stop_nif(dsbufp, NULL, state, mode, on);
+ ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ break;
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state == NULL) {
- state = hash_new_drv_ev_state(fd);
}
-#endif
-
-#if ERTS_CIO_HAVE_DRV_EVENT
- if (state->type == ERTS_EV_TYPE_DRV_EV)
- select_steal(ix, state, mode, on);
-#endif
- if (state->type == ERTS_EV_TYPE_STOP_USE) {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_select_op(dsbufp, ix, state->fd, mode, on);
- steal_pending_stop_select(dsbufp, ix, state, mode, on);
- if (state->type == ERTS_EV_TYPE_STOP_USE) {
- ret = 0;
- goto done; /* stop_select still pending */
- }
- ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ default: break;
}
if (mode & ERL_DRV_READ) {
if (state->type == ERTS_EV_TYPE_DRV_SEL) {
Eterm owner = state->driver.select->inport;
if (owner != id && is_not_nil(owner))
- select_steal(ix, state, mode, on);
+ drv_select_steal(ix, state, mode, on);
}
- ctl_events |= ERTS_POLL_EV_IN;
+ ctl_events = ERTS_POLL_EV_IN;
}
if (mode & ERL_DRV_WRITE) {
if (state->type == ERTS_EV_TYPE_DRV_SEL) {
Eterm owner = state->driver.select->outport;
if (owner != id && is_not_nil(owner))
- select_steal(ix, state, mode, on);
+ drv_select_steal(ix, state, mode, on);
}
ctl_events |= ERTS_POLL_EV_OUT;
- }
+ }
+
ASSERT((state->type == ERTS_EV_TYPE_DRV_SEL) ||
(state->type == ERTS_EV_TYPE_NONE && !state->events));
- if (!on && !(state->flags & ERTS_EV_FLAG_USED)
- && state->events && !(state->events & ~ctl_events)) {
- /* Old driver removing all events. At least wake poller.
- It will not make close() 100% safe but it will prevent
- actions delayed by poll timeout. */
- wake_poller = 1;
- }
-
- new_events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, ctl_events, on, &wake_poller);
+ old_events = state->events;
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) {
- state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select);
- state->driver.select = NULL;
- }
- ret = -1;
- goto done;
+ if (on) {
+ ctl_events &= ~old_events;
+ state->events |= ctl_events;
+ if (ctl_events & ERTS_POLL_EV_IN && (!state->driver.select || !is_iotask_active(&state->driver.select->iniotask)))
+ state->active_events |= ERTS_POLL_EV_IN;
+ if (ctl_events & ERTS_POLL_EV_OUT && (!state->driver.select || !is_iotask_active(&state->driver.select->outiotask)))
+ state->active_events |= ERTS_POLL_EV_OUT;
+ if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
+ ctl_op = ERTS_POLL_OP_ADD;
+ }
+ }
+ else {
+ ctl_events &= old_events;
+ state->events &= ~ctl_events;
+ state->active_events &= ~ctl_events;
+
+ if (!state->events) {
+ if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
+ ctl_op = ERTS_POLL_OP_DEL;
+ }
}
- old_events = state->events;
+ if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
- ASSERT(on
- ? (new_events == (state->events | ctl_events))
- : (new_events == (state->events & ~ctl_events)));
+ new_events = erts_io_control_wakeup(state, ctl_op,
+ state->active_events,
+ &wake_poller);
- ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL
- || state->type == ERTS_EV_TYPE_NONE);
+ ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
+ }
- state->events = new_events;
- if (ctl_events) {
- if (on) {
- if (state->type == ERTS_EV_TYPE_NONE) {
- ErtsDrvSelectDataState *dsdsp
- = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
- sizeof(ErtsDrvSelectDataState));
- dsdsp->inport = NIL;
- dsdsp->outport = NIL;
- erts_port_task_handle_init(&dsdsp->intask);
- erts_port_task_handle_init(&dsdsp->outtask);
- ASSERT(state->driver.select == NULL);
- state->driver.select = dsdsp;
+ if (on) {
+ if (ctl_events) {
+ if (!state->driver.select)
+ state->driver.select = alloc_drv_select_data(state->fd);
+ if (state->type == ERTS_EV_TYPE_NONE)
state->type = ERTS_EV_TYPE_DRV_SEL;
- }
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
- if (ctl_events & ERTS_POLL_EV_IN)
+ if (ctl_events & ERTS_POLL_EV_IN) {
state->driver.select->inport = id;
- if (ctl_events & ERTS_POLL_EV_OUT)
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL))
+ iready(id, state);
+ }
+ if (ctl_events & ERTS_POLL_EV_OUT) {
state->driver.select->outport = id;
- if (mode & ERL_DRV_USE) {
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL))
+ oready(id, state);
+ }
+ if (mode & ERL_DRV_USE)
state->flags |= ERTS_EV_FLAG_USED;
- }
- }
- else { /* off */
- if (state->type == ERTS_EV_TYPE_DRV_SEL) {
- if (ctl_events & ERTS_POLL_EV_IN) {
- abort_tasks(state, ERL_DRV_READ);
- state->driver.select->inport = NIL;
- }
- if (ctl_events & ERTS_POLL_EV_OUT) {
- abort_tasks(state, ERL_DRV_WRITE);
- state->driver.select->outport = NIL;
- }
- if (new_events == 0) {
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
- if (old_events != 0) {
- remember_removed(state, &pollset);
- }
- if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
- state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
- state->driver.select = NULL;
- }
- /*else keep it, as fd will probably be selected upon again */
- }
- }
- if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
- erts_driver_t* drv_ptr = prt->drv_ptr;
- ASSERT(new_events==0);
- if (state->remove_cnt == 0 || !wake_poller) {
- /* Safe to close fd now as it is not in pollset
- or there was no need to eject fd (kernel poll) */
- stop_select_fn = drv_ptr->stop_select;
+ }
+ }
+ else { /* off */
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ abort_tasks(state, ERL_DRV_READ);
+ state->driver.select->inport = NIL;
+ }
+ if (ctl_events & ERTS_POLL_EV_OUT) {
+ abort_tasks(state, ERL_DRV_WRITE);
+ state->driver.select->outport = NIL;
+ }
+ if (state->events == 0) {
+ if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ }
+ /*else keep it, as fd will probably be selected upon again */
+ }
+ }
+ if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
+ erts_driver_t* drv_ptr = prt->drv_ptr;
+ ASSERT(state->events==0);
+ if (!wake_poller) {
+ /* Safe to close fd now as it is not in pollset
+ or there was no need to eject fd (kernel poll) */
+ stop_select_fn = drv_ptr->stop_select;
#ifdef USE_VM_PROBES
- strncpy(name, prt->drv_ptr->name, sizeof(name)-1);
- name[sizeof(name)-1] = '\0';
-#endif
- }
- else {
- /* Not safe to close fd, postpone stop_select callback. */
- state->type = ERTS_EV_TYPE_STOP_USE;
- state->driver.drv_ptr = drv_ptr;
- if (drv_ptr->handle) {
- erts_ddll_reference_referenced_driver(drv_ptr->handle);
- }
- }
- }
- }
+ strncpy(name, prt->drv_ptr->name, sizeof(name)-1);
+ name[sizeof(name)-1] = '\0';
+#endif
+ }
+ else {
+ /* Not safe to close fd, postpone stop_select callback. */
+ state->type = ERTS_EV_TYPE_STOP_USE;
+ state->driver.stop.drv_ptr = drv_ptr;
+ if (drv_ptr->handle) {
+ erts_ddll_reference_referenced_driver(drv_ptr->handle);
+ }
+ }
+ }
}
ret = 0;
-done:;
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
- }
-#endif
-done_unknown:
- erts_smp_mtx_unlock(fd_mtx(fd));
+done:
+
+ check_fd_cleanup(state,
+ &free_select,
+ &free_nif);
+
+done_unknown:
+ erts_mtx_unlock(fd_mtx(fd));
if (stop_select_fn) {
int was_unmasked = erts_block_fpe();
DTRACE1(driver_stop_select, name);
+ LTTNG1(driver_stop_select, "unknown");
(*stop_select_fn)(e, NULL);
erts_unblock_fpe(was_unmasked);
}
+ if (free_select)
+ free_drv_select_data(free_select);
+ if (free_nif)
+ free_nif_select_data(free_nif);
+
+ ERTS_MSACC_POP_STATE();
+
return ret;
}
int
-ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
- ErlDrvEvent e,
- ErlDrvEventData event_data)
+enif_select(ErlNifEnv* env,
+ ErlNifEvent e,
+ enum ErlNifSelectFlags mode,
+ void* obj,
+ const ErlNifPid* pid,
+ Eterm ref)
{
-#if !ERTS_CIO_HAVE_DRV_EVENT
- return -1;
-#else
+ int on;
+ ErtsResource* resource = DATA_TO_RESOURCE(obj);
ErtsSysFdType fd = (ErtsSysFdType) e;
- ErtsPollEvents events;
- ErtsPollEvents add_events;
- ErtsPollEvents remove_events;
- Eterm id = erts_drvport2id(ix);
+ ErtsPollEvents ctl_events = (ErtsPollEvents) 0;
+ ErtsPollEvents old_events;
+ ErtsPollOp ctl_op = ERTS_POLL_OP_MOD;
ErtsDrvEventState *state;
- int do_wake = 0;
- int ret;
- Port *prt = erts_drvport2port(ix);
+ int ret, wake_poller = 0;
+ enum { NO_STOP=0, CALL_STOP, CALL_STOP_AND_RELEASE } call_stop = NO_STOP;
+ ErtsDrvSelectDataState *free_select = NULL;
+ ErtsNifSelectDataState *free_nif = NULL;
- if (prt == ERTS_INVALID_ERL_DRV_PORT)
- return -1;
-
- ERTS_SMP_LC_ASSERT(erts_lc_is_port_locked(prt));
+ ASSERT(!(resource->monitors && resource->monitors->is_dying));
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if ((unsigned)fd >= (unsigned)erts_smp_atomic_read_nob(&drv_ev_state_len)) {
- if (fd < 0)
- return -1;
- if (fd >= max_fds) {
- event_large_fd_error(ix, fd, event_data);
- return -1;
- }
- grow_drv_ev_state(fd);
+ if (!grow_drv_ev_state(fd)) {
+ if (fd > 0) nif_select_large_fd_error(fd, mode, resource, ref);
+ return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT;
}
#endif
- erts_smp_mtx_lock(fd_mtx(fd));
+ erts_mtx_lock(fd_mtx(fd));
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- state = &drv_ev_state[(int) fd];
-#else
- /* Could use hash_new directly, but want to keep the normal case fast */
- state = hash_get_drv_ev_state(fd);
- if (state == NULL) {
- state = hash_new_drv_ev_state(fd);
+ state = get_drv_ev_state(fd); /* may be NULL! */
+
+ DEBUG_PRINT_FD("enif_select(%T, %d, %s, %p, %T, %T)",
+ state, env->proc->common.id, fd, nifmode2str(mode), resource,
+ pid ? pid->pid : THE_NON_VALUE, ref);
+
+ if (mode & ERL_NIF_SELECT_STOP) {
+ ASSERT(resource->type->stop);
+ if (IS_FD_UNKNOWN(state)) {
+ /* fast track to stop callback */
+ call_stop = CALL_STOP;
+ ret = ERL_NIF_SELECT_STOP_CALLED;
+ goto done_unknown;
+ }
+ on = 0;
+ mode = ERL_DRV_READ | ERL_DRV_WRITE | ERL_DRV_USE;
+ ctl_events = ERTS_POLL_EV_IN | ERTS_POLL_EV_OUT;
+ ctl_op = ERTS_POLL_OP_DEL;
+ }
+ else {
+ on = 1;
+ ASSERT(mode);
+ if (mode & ERL_DRV_READ) {
+ ctl_events |= ERTS_POLL_EV_IN;
+ }
+ if (mode & ERL_DRV_WRITE) {
+ ctl_events |= ERTS_POLL_EV_OUT;
+ }
}
-#endif
+
+ state = new_drv_ev_state(state,fd);
switch (state->type) {
- case ERTS_EV_TYPE_DRV_EV:
- if (state->driver.event->port == id) break;
- /*fall through*/
+ case ERTS_EV_TYPE_NIF:
+ /*
+ * Changing resource is considered stealing.
+ * Changing process and/or ref is ok (I think?).
+ */
+ if (state->driver.stop.resource != resource)
+ nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, ref);
+ break;
case ERTS_EV_TYPE_DRV_SEL:
- event_steal(ix, state, event_data);
- break;
+ nif_select_steal(state, mode, resource, ref);
+ break;
case ERTS_EV_TYPE_STOP_USE: {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_event_op(dsbufp, ix, fd, event_data);
- steal_pending_stop_select(dsbufp, ix, state, 0, 1);
- break;
- }
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_nif_select_op(dsbufp, fd, mode, resource, ref);
+ steal_pending_stop_use(dsbufp, ERTS_INVALID_ERL_DRV_PORT, state, mode, on);
+ ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ break;
+ }
+ case ERTS_EV_TYPE_STOP_NIF: {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_nif_select_op(dsbufp, fd, mode, resource, ref);
+ steal_pending_stop_nif(dsbufp, resource, state, mode, on);
+ if (state->type == ERTS_EV_TYPE_STOP_NIF) {
+ ret = ERL_NIF_SELECT_STOP_SCHEDULED; /* ?? */
+ goto done;
+ }
+ ASSERT(state->type == ERTS_EV_TYPE_NONE);
+ break;
+ }
+ default: break;
}
- ASSERT(state->type == ERTS_EV_TYPE_DRV_EV
- || state->type == ERTS_EV_TYPE_NONE);
+ ASSERT((state->type == ERTS_EV_TYPE_NIF) ||
+ (state->type == ERTS_EV_TYPE_NONE && !state->events));
- events = state->events;
+ old_events = state->events;
- if (!event_data) {
- remove_events = events;
- add_events = 0;
+ if (on) {
+ ctl_events &= ~old_events;
+ state->events |= ctl_events;
+ state->active_events |= ctl_events;
+ if (state->type == ERTS_EV_TYPE_NONE)
+ ctl_op = ERTS_POLL_OP_ADD;
}
else {
- remove_events = ~event_data->events & events;
- add_events = ~events & event_data->events;
+ ctl_events &= old_events;
+ state->events &= ~ctl_events;
+ state->active_events &= ~ctl_events;
}
- if (add_events) {
- events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, add_events, 1, &do_wake);
- if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- ret = -1;
- goto done;
- }
+ if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
+ ErtsPollEvents new_events;
+
+ new_events = erts_io_control_wakeup(state, ctl_op,
+ state->active_events,
+ &wake_poller);
+
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->type == ERTS_EV_TYPE_NIF && !old_events) {
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ state->driver.nif->in.pid = NIL;
+ state->driver.nif->out.pid = NIL;
+ state->driver.stop.resource = NULL;
+ }
+ ret = INT_MIN | ERL_NIF_SELECT_FAILED;
+ goto done;
+ }
+ ASSERT(new_events == state->events);
}
- if (remove_events) {
- events = ERTS_CIO_POLL_CTL(pollset.ps, state->fd, remove_events, 0, &do_wake);
- if (events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- ret = -1;
- goto done;
- }
- }
- if (event_data && event_data->events != 0) {
- if (state->type == ERTS_EV_TYPE_DRV_EV) {
- state->driver.event->removed_events &= ~add_events;
- state->driver.event->removed_events |= remove_events;
- }
- else {
- state->driver.event
- = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
- sizeof(ErtsDrvEventDataState));
- erts_port_task_handle_init(&state->driver.event->task);
- state->driver.event->port = id;
- state->driver.event->removed_events = (ErtsPollEvents) 0;
- state->type = ERTS_EV_TYPE_DRV_EV;
- }
- state->driver.event->data = event_data;
+
+ ASSERT(state->type == ERTS_EV_TYPE_NIF
+ || state->type == ERTS_EV_TYPE_NONE);
+
+ if (on) {
+ const Eterm recipient = pid ? pid->pid : env->proc->common.id;
+ Uint32* refn;
+ if (!state->driver.nif)
+ state->driver.nif = alloc_nif_select_data();
+ if (state->type == ERTS_EV_TYPE_NONE) {
+ state->type = ERTS_EV_TYPE_NIF;
+ state->driver.stop.resource = resource;
+ enif_keep_resource(resource->data);
+ }
+ ASSERT(state->type == ERTS_EV_TYPE_NIF);
+ ASSERT(state->driver.stop.resource == resource);
+ if (mode & ERL_DRV_READ) {
+ state->driver.nif->in.pid = recipient;
+ if (is_immed(ref)) {
+ state->driver.nif->in.immed = ref;
+ } else {
+ ASSERT(is_internal_ref(ref));
+ refn = internal_ref_numbers(ref);
+ state->driver.nif->in.immed = THE_NON_VALUE;
+ sys_memcpy(state->driver.nif->in.refn, refn,
+ sizeof(state->driver.nif->in.refn));
+ }
+ }
+ if (mode & ERL_DRV_WRITE) {
+ state->driver.nif->out.pid = recipient;
+ if (is_immed(ref)) {
+ state->driver.nif->out.immed = ref;
+ } else {
+ ASSERT(is_internal_ref(ref));
+ refn = internal_ref_numbers(ref);
+ state->driver.nif->out.immed = THE_NON_VALUE;
+ sys_memcpy(state->driver.nif->out.refn, refn,
+ sizeof(state->driver.nif->out.refn));
+ }
+ }
+ ret = 0;
}
- else {
- if (state->type == ERTS_EV_TYPE_DRV_EV) {
- abort_tasks(state, 0);
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
- }
- state->driver.select = NULL;
- state->type = ERTS_EV_TYPE_NONE;
- remember_removed(state, &pollset);
+ else { /* off */
+ if (state->type == ERTS_EV_TYPE_NIF) {
+ state->driver.nif->in.pid = NIL;
+ state->driver.nif->out.pid = NIL;
+ }
+ ASSERT(state->events==0);
+ if (!wake_poller) {
+ /*
+ * Safe to close fd now as it is not in pollset
+ * or there was no need to eject fd (kernel poll)
+ */
+ if (state->type == ERTS_EV_TYPE_NIF) {
+ ASSERT(state->driver.stop.resource == resource);
+ call_stop = CALL_STOP_AND_RELEASE;
+ state->driver.stop.resource = NULL;
+ }
+ else {
+ ASSERT(!state->driver.stop.resource);
+ call_stop = CALL_STOP;
+ }
+ state->type = ERTS_EV_TYPE_NONE;
+ ret = ERL_NIF_SELECT_STOP_CALLED;
+ }
+ else {
+ /* Not safe to close fd, postpone stop_select callback. */
+ if (state->type == ERTS_EV_TYPE_NONE) {
+ ASSERT(!state->driver.stop.resource);
+ state->driver.stop.resource = resource;
+ enif_keep_resource(resource);
+ }
+ state->type = ERTS_EV_TYPE_STOP_NIF;
+ ret = ERL_NIF_SELECT_STOP_SCHEDULED;
+ }
}
- state->events = events;
- ASSERT(event_data ? events == event_data->events : events == 0);
-
- ret = 0;
done:
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
+
+ check_fd_cleanup(state,
+ &free_select,
+ &free_nif);
+
+done_unknown:
+ erts_mtx_unlock(fd_mtx(fd));
+ if (call_stop) {
+ erts_resource_stop(resource, (ErlNifEvent)fd, 1);
+ if (call_stop == CALL_STOP_AND_RELEASE) {
+ enif_release_resource(resource->data);
+ }
}
-#endif
- erts_smp_mtx_unlock(fd_mtx(fd));
+ if (free_select)
+ free_drv_select_data(free_select);
+ if (free_nif)
+ free_nif_select_data(free_nif);
+
return ret;
-#endif
}
static ERTS_INLINE int
@@ -861,13 +1124,14 @@ need2steal(ErtsDrvEventState *state, int mode)
state,
ERL_DRV_WRITE);
break;
-#if ERTS_CIO_HAVE_DRV_EVENT
- case ERTS_EV_TYPE_DRV_EV:
- do_steal |= chk_stale(state->driver.event->port, state, 0);
- break;
-#endif
+ case ERTS_EV_TYPE_NIF:
+ ASSERT(state->driver.stop.resource);
+ do_steal = 1;
+ break;
+
case ERTS_EV_TYPE_STOP_USE:
- ASSERT(0);
+ case ERTS_EV_TYPE_STOP_NIF:
+ ASSERT(0);
break;
default:
break;
@@ -898,7 +1162,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id)
static void
steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
{
- erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd));
+ erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd);
switch (state->type) {
case ERTS_EV_TYPE_DRV_SEL: {
int deselect_mode = 0;
@@ -922,53 +1186,52 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
if (deselect_mode)
deselect(state, deselect_mode);
else {
- erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd));
- ASSERT(0);
- }
- erts_dsprintf(dsbufp, "\n");
- break;
- }
-#if ERTS_CIO_HAVE_DRV_EVENT
- case ERTS_EV_TYPE_DRV_EV: {
- Eterm eid = state->driver.event->port;
- if (is_nil(eid)) {
erts_dsprintf(dsbufp, "no one", (int) state->fd);
ASSERT(0);
}
- else {
- erts_dsprintf(dsbufp, "event driver ");
- print_driver_name(dsbufp, eid);
- erts_dsprintf(dsbufp, "%T ", eid);
- }
erts_dsprintf(dsbufp, "\n");
- deselect(state, 0);
break;
}
-#endif
- case ERTS_EV_TYPE_STOP_USE: {
+ case ERTS_EV_TYPE_NIF: {
+ Eterm iid = state->driver.nif->in.pid;
+ Eterm oid = state->driver.nif->out.pid;
+ const char* with = "with";
+ ErlNifResourceType* rt = state->driver.stop.resource->type;
+
+ erts_dsprintf(dsbufp, "resource %T:%T", rt->module, rt->name);
+
+ if (is_not_nil(iid)) {
+ erts_dsprintf(dsbufp, " %s in-pid %T", with, iid);
+ with = "and";
+ }
+ if (is_not_nil(oid)) {
+ erts_dsprintf(dsbufp, " %s out-pid %T", with, oid);
+ }
+ deselect(state, 0);
+ erts_dsprintf(dsbufp, "\n");
+ break;
+ }
+ case ERTS_EV_TYPE_STOP_USE:
+ case ERTS_EV_TYPE_STOP_NIF: {
ASSERT(0);
break;
}
default:
- erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd));
+ erts_dsprintf(dsbufp, "no one\n", (int) state->fd);
ASSERT(0);
}
}
static void
-print_select_op(erts_dsprintf_buf_t *dsbufp,
- ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
+print_drv_select_op(erts_dsprintf_buf_t *dsbufp,
+ ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
{
Port *pp = erts_drvport2port(ix);
erts_dsprintf(dsbufp,
-#ifdef __OSE__
- "driver_select(%p, %d,%s%s%s%s | %d, %d) "
-#else
"driver_select(%p, %d,%s%s%s%s, %d) "
-#endif
"by ",
ix,
- (int) GET_FD(fd),
+ (int) fd,
mode & ERL_DRV_READ ? " ERL_DRV_READ" : "",
mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "",
mode & ERL_DRV_USE ? " ERL_DRV_USE" : "",
@@ -979,11 +1242,40 @@ print_select_op(erts_dsprintf_buf_t *dsbufp,
}
static void
-select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on)
+print_nif_select_op(erts_dsprintf_buf_t *dsbufp,
+ ErtsSysFdType fd, int mode,
+ ErtsResource* resource, Eterm ref)
+{
+ erts_dsprintf(dsbufp,
+ "enif_select(_, %d,%s%s%s, %T:%T, %T) ",
+ (int) fd,
+ mode & ERL_NIF_SELECT_READ ? " READ" : "",
+ mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "",
+ mode & ERL_NIF_SELECT_STOP ? " STOP" : "",
+ resource->type->module,
+ resource->type->name,
+ ref);
+}
+
+
+static void
+drv_select_steal(ErlDrvPort ix, ErtsDrvEventState *state, int mode, int on)
+{
+ if (need2steal(state, mode)) {
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_drv_select_op(dsbufp, ix, state->fd, mode, on);
+ steal(dsbufp, state, mode);
+ erts_send_error_to_logger_nogl(dsbufp);
+ }
+}
+
+static void
+nif_select_steal(ErtsDrvEventState *state, int mode,
+ ErtsResource* resource, Eterm ref)
{
if (need2steal(state, mode)) {
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_select_op(dsbufp, ix, state->fd, mode, on);
+ print_nif_select_op(dsbufp, state->fd, mode, resource, ref);
steal(dsbufp, state, mode);
erts_send_error_to_logger_nogl(dsbufp);
}
@@ -995,14 +1287,24 @@ large_fd_error_common(erts_dsprintf_buf_t *dsbufp, ErtsSysFdType fd)
{
erts_dsprintf(dsbufp,
"fd=%d is larger than the largest allowed fd=%d\n",
- (int) fd, max_fds - 1);
+ (int) fd, drv_ev_state.max_fds - 1);
}
static void
-select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
+drv_select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
+{
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+ print_drv_select_op(dsbufp, ix, fd, mode, on);
+ erts_dsprintf(dsbufp, "failed: ");
+ large_fd_error_common(dsbufp, fd);
+ erts_send_error_to_logger_nogl(dsbufp);
+}
+static void
+nif_select_large_fd_error(ErtsSysFdType fd, int mode,
+ ErtsResource* resource, Eterm ref)
{
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_select_op(dsbufp, ix, fd, mode, on);
+ print_nif_select_op(dsbufp, fd, mode, resource, ref);
erts_dsprintf(dsbufp, "failed: ");
large_fd_error_common(dsbufp, fd);
erts_send_error_to_logger_nogl(dsbufp);
@@ -1012,198 +1314,245 @@ select_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, int mode, int on)
static void
-steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
- ErtsDrvEventState *state, int mode, int on)
+steal_pending_stop_use(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
+ ErtsDrvEventState *state, int mode, int on)
{
+ int cancel = 0;
ASSERT(state->type == ERTS_EV_TYPE_STOP_USE);
- erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select "
- "was called for driver %s\n",
- (int) GET_FD(state->fd), state->driver.drv_ptr->name);
- erts_send_error_to_logger_nogl(dsbufp);
if (on) {
/* Either fd-owner changed its mind about closing
* or closed fd before stop_select callback and fd is now reused.
* In either case stop_select should not be called.
- */
- state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- if (state->driver.drv_ptr->handle) {
- erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
- }
- state->driver.drv_ptr = NULL;
+ */
+ cancel = 1;
}
else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) {
Port *prt = erts_drvport2port(ix);
- erts_driver_t* drv_ptr = prt != ERTS_INVALID_ERL_DRV_PORT ? prt->drv_ptr : NULL;
- if (drv_ptr && drv_ptr != state->driver.drv_ptr) {
- /* Some other driver wants the stop_select callback */
- if (state->driver.drv_ptr->handle) {
- erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
- }
- if (drv_ptr->handle) {
- erts_ddll_reference_referenced_driver(drv_ptr->handle);
- }
- state->driver.drv_ptr = drv_ptr;
- }
+ if (prt == ERTS_INVALID_ERL_DRV_PORT
+ || prt->drv_ptr != state->driver.stop.drv_ptr) {
+ /* Some other driver or nif wants the stop_select callback */
+ cancel = 1;
+ }
}
+ if (cancel) {
+ erts_dsprintf(dsbufp, "called before stop_select was called for driver '%s'\n",
+ state->driver.stop.drv_ptr->name);
+ if (state->driver.stop.drv_ptr->handle) {
+ erts_ddll_dereference_driver(state->driver.stop.drv_ptr->handle);
+ }
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ state->driver.stop.drv_ptr = NULL;
+ }
+ else {
+ erts_dsprintf(dsbufp, "ignored repeated call\n");
+ }
+ erts_send_error_to_logger_nogl(dsbufp);
}
-
-#if ERTS_CIO_HAVE_DRV_EVENT
-
static void
-print_event_op(erts_dsprintf_buf_t *dsbufp,
- ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data)
+steal_pending_stop_nif(erts_dsprintf_buf_t *dsbufp, ErtsResource* resource,
+ ErtsDrvEventState *state, int mode, int on)
{
- Port *pp = erts_drvport2port(ix);
- erts_dsprintf(dsbufp, "driver_event(%p, %d, ", ix, (int) fd);
- if (!event_data)
- erts_dsprintf(dsbufp, "NULL");
- else
- erts_dsprintf(dsbufp, "{0x%x, 0x%x}",
- (unsigned int) event_data->events,
- (unsigned int) event_data->revents);
- erts_dsprintf(dsbufp, ") by ");
- if (pp != ERTS_INVALID_ERL_DRV_PORT)
- print_driver_name(dsbufp, pp->common.id);
- erts_dsprintf(dsbufp, "driver %T ", pp != ERTS_INVALID_ERL_DRV_PORT ? pp->common.id : NIL);
-}
+ int cancel = 0;
-static void
-event_steal(ErlDrvPort ix, ErtsDrvEventState *state, ErlDrvEventData event_data)
-{
- if (need2steal(state, ERL_DRV_READ|ERL_DRV_WRITE)) {
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_event_op(dsbufp, ix, state->fd, event_data);
- steal(dsbufp, state, ERL_DRV_READ|ERL_DRV_WRITE);
- erts_send_error_to_logger_nogl(dsbufp);
+ ASSERT(state->type == ERTS_EV_TYPE_STOP_NIF);
+ ASSERT(state->driver.stop.resource);
+
+ if (on) {
+ ASSERT(mode & (ERL_NIF_SELECT_READ | ERL_NIF_SELECT_WRITE));
+ /* Either fd-owner changed its mind about closing
+ * or closed fd before stop callback and fd is now reused.
+ * In either case, stop should not be called.
+ */
+ cancel = 1;
}
- else if (state->type == ERTS_EV_TYPE_DRV_SEL) {
- ASSERT(state->flags & ERTS_EV_FLAG_USED);
- deselect(state, 0);
+ else if ((mode & ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE
+ && resource != state->driver.stop.resource) {
+ /* Some driver or other resource wants the stop callback */
+ cancel = 1;
}
+
+ if (cancel) {
+ ErlNifResourceType* rt = state->driver.stop.resource->type;
+ erts_dsprintf(dsbufp, "called before stop was called for NIF resource %T:%T\n",
+ rt->module, rt->name);
+
+ enif_release_resource(state->driver.stop.resource->data);
+ state->type = ERTS_EV_TYPE_NONE;
+ state->flags = 0;
+ state->driver.stop.resource = NULL;
+ }
+ else {
+ erts_dsprintf(dsbufp, "ignored repeated call\n");
+ }
+ erts_send_error_to_logger_nogl(dsbufp);
+
}
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-static void
-event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data)
+static ERTS_INLINE int
+io_task_schedule_allowed(ErtsDrvEventState *state,
+ ErtsPortTaskType type)
{
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- print_event_op(dsbufp, ix, fd, event_data);
- erts_dsprintf(dsbufp, "failed: ");
- large_fd_error_common(dsbufp, fd);
- erts_send_error_to_logger_nogl(dsbufp);
+ ErtsIoTask *io_task;
+
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+ if (!state->driver.select)
+ return 0;
+ io_task = &state->driver.select->iniotask;
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
+ if (!state->driver.select)
+ return 0;
+ io_task = &state->driver.select->outiotask;
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid I/O-task type");
+ return 0;
+ }
+
+ return !is_iotask_active(io_task);
}
-#endif
-#endif
static ERTS_INLINE void
iready(Eterm id, ErtsDrvEventState *state)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->intask,
- ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_READ);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_INPUT)) {
+ ErtsIoTask *iotask = &state->driver.select->iniotask;
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_INPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_READ);
+ } else {
+ DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
+ state, id, state->fd);
+ }
}
}
static ERTS_INLINE void
oready(Eterm id, ErtsDrvEventState *state)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->outtask,
- ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_WRITE);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_OUTPUT)) {
+ ErtsIoTask *iotask = &state->driver.select->outiotask;
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_OUTPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_WRITE);
+ } else {
+ DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
+ }
}
}
-#if ERTS_CIO_HAVE_DRV_EVENT
static ERTS_INLINE void
-eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data)
+send_event_tuple(struct erts_nif_select_event* e, ErtsResource* resource,
+ Eterm event_atom)
{
- if (erts_port_task_schedule(id,
- &state->driver.event->task,
- ERTS_PORT_TASK_EVENT,
- (ErlDrvEvent) state->fd,
- event_data) != 0) {
- stale_drv_select(id, state, 0);
+ Process* rp = erts_proc_lookup(e->pid);
+ ErtsProcLocks rp_locks = 0;
+ ErtsMessage* mp;
+ ErlOffHeap* ohp;
+ ErtsBinary* bin;
+ Eterm* hp;
+ Uint hsz;
+ Eterm resource_term, ref_term, tuple;
+
+ if (!rp) {
+ return;
}
-}
-#endif
-static void bad_fd_in_pollset( ErtsDrvEventState *, Eterm, Eterm, ErtsPollEvents);
+ bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource);
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-void
-ERTS_CIO_EXPORT(erts_check_io_async_sig_interrupt)(void)
-{
- ERTS_CIO_POLL_AS_INTR(pollset.ps);
+ /* {select, Resource, Ref, EventAtom} */
+ if (is_value(e->immed)) {
+ hsz = 5 + ERTS_MAGIC_REF_THING_SIZE;
+ }
+ else {
+ hsz = 5 + ERTS_MAGIC_REF_THING_SIZE + ERTS_REF_THING_SIZE;
+ }
+
+ mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp);
+
+ resource_term = erts_mk_magic_ref(&hp, ohp, &bin->binary);
+ if (is_value(e->immed)) {
+ ASSERT(is_immed(e->immed));
+ ref_term = e->immed;
+ }
+ else {
+ write_ref_thing(hp, e->refn[0], e->refn[1], e->refn[2]);
+ ref_term = make_internal_ref(hp);
+ hp += ERTS_REF_THING_SIZE;
+ }
+ tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom);
+
+ ERL_MESSAGE_TOKEN(mp) = am_undefined;
+ erts_queue_message(rp, rp_locks, mp, tuple, am_system);
+
+ if (rp_locks)
+ erts_proc_unlock(rp, rp_locks);
}
-#endif
+
+static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport);
void
-ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set)
+erts_check_io_interrupt(ErtsPollThread *psi, int set)
{
- ERTS_CIO_POLL_INTR(pollset.ps, set);
+ if (psi) {
+#if ERTS_POLL_USE_FALLBACK
+ if (psi->ps == get_fallback()) {
+ erts_poll_interrupt_flbk(psi->ps, set);
+ return;
+ }
+#endif
+ erts_poll_interrupt(psi->ps, set);
+ }
}
-void
-ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set,
- erts_short_time_t msec)
-{
- ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, msec);
+ErtsPollThread *
+erts_create_pollset_thread(int id) {
+ return psiv+id;
}
void
-ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
+erts_check_io(ErtsPollThread *psi)
{
- ErtsPollResFd pollres[256];
int pollres_len;
- SysTimeval wait_time;
int poll_ret, i;
+ ERTS_MSACC_PUSH_AND_SET_STATE(ERTS_MSACC_STATE_CHECK_IO);
restart:
-#ifdef ERTS_BREAK_REQUESTED
- if (ERTS_BREAK_REQUESTED)
- erts_do_break_handling();
-#endif
-
- /* Figure out timeout value */
- if (do_wait) {
- erts_time_remaining(&wait_time);
- } else { /* poll only */
- wait_time.tv_sec = 0;
- wait_time.tv_usec = 0;
- }
-
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
- pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd);
- erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1);
+ pollres_len = psi->pollres_len;
- poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, &wait_time);
+#if ERTS_POLL_USE_FALLBACK
+ if (psi->ps == get_fallback()) {
-#ifdef ERTS_ENABLE_LOCK_CHECK
- erts_lc_check_exact(NULL, 0); /* No locks should be locked */
-#endif
+ poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len);
- erts_deliver_time(); /* sync the machine's idea of time */
+ } else
+#endif
+ {
+ poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len);
+ }
-#ifdef ERTS_BREAK_REQUESTED
- if (ERTS_BREAK_REQUESTED)
- erts_do_break_handling();
+#ifdef ERTS_ENABLE_LOCK_CHECK
+ erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
if (poll_ret != 0) {
- erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
- forget_removed(&pollset);
+
if (poll_ret == EAGAIN) {
goto restart;
}
@@ -1219,57 +1568,69 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
erl_errno_id(poll_ret), poll_ret);
erts_send_error_to_logger_nogl(dsbufp);
}
+ ERTS_MSACC_POP_STATE();
return;
}
for (i = 0; i < pollres_len; i++) {
- ErtsSysFdType fd = (ErtsSysFdType) pollres[i].fd;
+ erts_driver_t* drv_ptr = NULL;
+ ErtsResource* resource = NULL;
+ ErtsDrvSelectDataState *free_select = NULL;
+ ErtsNifSelectDataState *free_nif = NULL;
+ ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
ErtsDrvEventState *state;
+ ErtsPollEvents revents;
- erts_smp_mtx_lock(fd_mtx(fd));
+ erts_mtx_lock(fd_mtx(fd));
+
+ state = get_drv_ev_state(fd);
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- state = &drv_ev_state[ (int) fd];
-#else
- state = hash_get_drv_ev_state(fd);
if (!state) {
- goto next_pollres;
+ erts_mtx_unlock(fd_mtx(fd));
+ continue;
}
-#endif
- /* Skip this fd if it was removed from pollset */
- if (is_removed(state)) {
- goto next_pollres;
- }
+ revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
+
+ DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
+
+ if (revents & ERTS_POLL_EV_ERR) {
+ /*
+ * Handle error events by triggering all in/out events
+ * that has been selected on.
+ * We *do not* want to call a callback that corresponds
+ * to an event not selected.
+ */
+ revents = state->active_events;
+ state->active_events = 0;
+ } else {
+
+ /* Disregard any events that are not active at the moment,
+ for instance this could happen if the driver/nif does
+ select/deselect in rapid succession. */
+ revents &= state->active_events | ERTS_POLL_EV_NVAL;
+ state->active_events &= ~revents;
+
+ /* Reactivate the poll op if there are still active events */
+ if (state->active_events) {
+ ErtsPollEvents new_events;
+ DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events));
+
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+
+ /* Unable to re-enable the fd, signal all callbacks */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ revents |= state->active_events;
+ state->active_events = 0;
+ }
+ }
+ }
switch (state->type) {
case ERTS_EV_TYPE_DRV_SEL: { /* Requested via driver_select()... */
- ErtsPollEvents revents;
- ErtsPollEvents revent_mask;
-
- revent_mask = ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT);
- revent_mask |= state->events;
- revents = pollres[i].events & revent_mask;
-
- if (revents & ERTS_POLL_EV_ERR) {
- /*
- * Let the driver handle the error condition. Only input,
- * only output, or nothing might have been selected.
- * We *do not* want to call a callback that corresponds
- * to an event not selected. revents might give us a clue
- * on which one to call.
- */
- if ((revents & ERTS_POLL_EV_IN)
- || (!(revents & ERTS_POLL_EV_OUT)
- && state->events & ERTS_POLL_EV_IN)) {
- iready(state->driver.select->inport, state);
- }
- else if (state->events & ERTS_POLL_EV_OUT) {
- oready(state->driver.select->outport, state);
- }
- }
- else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
+
+ if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
if (revents & ERTS_POLL_EV_OUT) {
oready(state->driver.select->outport, state);
}
@@ -1277,49 +1638,84 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
was read (true also on the non-smp emulator since
oready() may have been called); therefore, update
revents... */
- revents &= ~(~state->events & ERTS_POLL_EV_IN);
+ revents &= state->events;
if (revents & ERTS_POLL_EV_IN) {
iready(state->driver.select->inport, state);
}
}
else if (revents & ERTS_POLL_EV_NVAL) {
bad_fd_in_pollset(state,
- state->driver.select->inport,
- state->driver.select->outport,
- state->events);
- }
- break;
- }
-
-#if ERTS_CIO_HAVE_DRV_EVENT
- case ERTS_EV_TYPE_DRV_EV: { /* Requested via driver_event()... */
- ErlDrvEventData event_data;
- ErtsPollEvents revents;
- ASSERT(state->driver.event);
- ASSERT(state->driver.event->data);
- event_data = state->driver.event->data;
- revents = pollres[i].events;
- revents &= ~state->driver.event->removed_events;
-
- if (revents) {
- event_data->events = state->events;
- event_data->revents = revents;
-
- eready(state->driver.event->port, state, event_data);
+ state->driver.select->inport,
+ state->driver.select->outport);
+ check_fd_cleanup(state, &free_select, &free_nif);
}
break;
}
-#endif
+ case ERTS_EV_TYPE_NIF: { /* Requested via enif_select()... */
+ struct erts_nif_select_event in = {NIL};
+ struct erts_nif_select_event out = {NIL};
+ ErtsResource* resource = NULL;
+
+ if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
+ if (revents & ERTS_POLL_EV_OUT) {
+ if (is_not_nil(state->driver.nif->out.pid)) {
+ out = state->driver.nif->out;
+ resource = state->driver.stop.resource;
+ state->driver.nif->out.pid = NIL;
+ }
+ }
+ if (revents & ERTS_POLL_EV_IN) {
+ if (is_not_nil(state->driver.nif->in.pid)) {
+ in = state->driver.nif->in;
+ resource = state->driver.stop.resource;
+ state->driver.nif->in.pid = NIL;
+ }
+ }
+ state->events &= ~revents;
+ }
+ else if (revents & ERTS_POLL_EV_NVAL) {
+ bad_fd_in_pollset(state, NIL, NIL);
+ check_fd_cleanup(state, &free_select, &free_nif);
+ }
+
+ erts_mtx_unlock(fd_mtx(fd));
+
+ if (is_not_nil(in.pid)) {
+ send_event_tuple(&in, resource, am_ready_input);
+ }
+ if (is_not_nil(out.pid)) {
+ send_event_tuple(&out, resource, am_ready_output);
+ }
+ continue;
+ }
+
+ case ERTS_EV_TYPE_STOP_NIF: {
+ resource = state->driver.stop.resource;
+ state->type = ERTS_EV_TYPE_NONE;
+ goto case_ERTS_EV_TYPE_NONE;
+ }
+
+ case ERTS_EV_TYPE_STOP_USE: {
+#if ERTS_POLL_USE_FALLBACK
+ ASSERT(psi->ps == get_fallback());
+#endif
+ drv_ptr = state->driver.stop.drv_ptr;
+ state->type = ERTS_EV_TYPE_NONE;
+ /* fallthrough */
case ERTS_EV_TYPE_NONE: /* Deselected ... */
+ case_ERTS_EV_TYPE_NONE:
+ ASSERT(!state->events && !state->active_events && !state->flags);
+ check_fd_cleanup(state, &free_select, &free_nif);
break;
+ }
default: { /* Error */
erts_dsprintf_buf_t *dsbufp;
dsbufp = erts_create_logger_dsbuf();
erts_dsprintf(dsbufp,
"Invalid event request type for fd in erts_poll()! "
- "fd=%d, event request type=%sd\n", (int) state->fd,
+ "fd=%d, event request type=%d\n", (int) state->fd,
(int) state->type);
ASSERT(0);
deselect(state, 0);
@@ -1327,20 +1723,55 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
}
}
- next_pollres:;
-#ifdef ERTS_SMP
- erts_smp_mtx_unlock(fd_mtx(fd));
-#endif
+ erts_mtx_unlock(fd_mtx(fd));
+
+ if (drv_ptr) {
+ int was_unmasked = erts_block_fpe();
+ DTRACE1(driver_stop_select, drv_ptr->name);
+ LTTNG1(driver_stop_select, drv_ptr->name);
+ (*drv_ptr->stop_select)((ErlDrvEvent) fd, NULL);
+ erts_unblock_fpe(was_unmasked);
+ if (drv_ptr->handle) {
+ erts_ddll_dereference_driver(drv_ptr->handle);
+ }
+ }
+ if (resource) {
+ erts_resource_stop(resource, (ErlNifEvent)fd, 1);
+ enif_release_resource(resource->data);
+ }
+ if (free_select)
+ free_drv_select_data(free_select);
+ if (free_nif)
+ free_nif_select_data(free_nif);
}
- erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
- forget_removed(&pollset);
+ /* The entire pollres array was filled with events,
+ * grow it for the next call. We do this for two reasons:
+ * 1. Pulling out more events in on go will increase throughput
+ * 2. If the polling implementation is not fair, this will make
+ * sure that we get all fds that we can. i.e. if 12 fds are
+ * constantly active, but we only have a pollres_len of 10,
+ * two of the fds may never be triggered depending on what the
+ * kernel decides to do.
+ **/
+ if (pollres_len == psi->pollres_len) {
+ int ev_state_len = drv_ev_state_len();
+ erts_free(ERTS_ALC_T_POLLSET, psi->pollres);
+ psi->pollres_len *= 2;
+ /* Never grow it larger than the current drv_ev_state.len size */
+ if (psi->pollres_len > ev_state_len)
+ psi->pollres_len = ev_state_len;
+ psi->pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * psi->pollres_len);
+ }
+
+ ERTS_MSACC_POP_STATE();
}
static void
-bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport,
- Eterm outport, ErtsPollEvents events)
+bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, Eterm outport)
{
+ ErtsPollEvents events = state->events;
erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
if (events & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
@@ -1364,27 +1795,36 @@ bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport,
erts_dsprintf(dsbufp,
"Bad %s fd in erts_poll()! fd=%d, ",
io_str, (int) state->fd);
- if (is_nil(port)) {
- ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT);
- ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT);
- erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n",
- is_nil(inport) ? am_undefined : inport,
- is_nil(outport) ? am_undefined : outport,
- ipnp->driver_name ? ipnp->driver_name : "<unknown>",
- opnp->driver_name ? opnp->driver_name : "<unknown>",
- ipnp->name ? ipnp->name : "<unknown>",
- opnp->name ? opnp->name : "<unknown>");
- erts_free_port_names(ipnp);
- erts_free_port_names(opnp);
- }
- else {
- ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT);
- erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n",
- is_nil(port) ? am_undefined : port,
- pnp->driver_name ? pnp->driver_name : "<unknown>",
- pnp->name ? pnp->name : "<unknown>");
- erts_free_port_names(pnp);
- }
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ if (is_nil(port)) {
+ ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT);
+ ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT);
+ erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n",
+ is_nil(inport) ? am_undefined : inport,
+ is_nil(outport) ? am_undefined : outport,
+ ipnp->driver_name ? ipnp->driver_name : "<unknown>",
+ opnp->driver_name ? opnp->driver_name : "<unknown>",
+ ipnp->name ? ipnp->name : "<unknown>",
+ opnp->name ? opnp->name : "<unknown>");
+ erts_free_port_names(ipnp);
+ erts_free_port_names(opnp);
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT);
+ erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n",
+ is_nil(port) ? am_undefined : port,
+ pnp->driver_name ? pnp->driver_name : "<unknown>",
+ pnp->name ? pnp->name : "<unknown>");
+ erts_free_port_names(pnp);
+ }
+ }
+ else {
+ ErlNifResourceType* rt;
+ ASSERT(state->type == ERTS_EV_TYPE_NIF);
+ ASSERT(state->driver.stop.resource);
+ rt = state->driver.stop.resource->type;
+ erts_dsprintf(dsbufp, "resource={%T,%T}\n", rt->module, rt->name);
+ }
}
else {
erts_dsprintf(dsbufp, "Bad fd in erts_poll()! fd=%d\n", (int) state->fd);
@@ -1404,25 +1844,6 @@ stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode)
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
-#ifdef __OSE__
-static SafeHashValue drv_ev_state_hash(void *des)
-{
- ErtsSysFdType fd = ((ErtsDrvEventState *) des)->fd;
- /* We use hash on signo ^ id in order for steal to happen when the
- same signo + fd is selected on by two different ports */
- SafeHashValue val = (SafeHashValue)(fd->signo ^ fd->id);
- return val ^ (val >> 8);
-}
-
-static int drv_ev_state_cmp(void *des1, void *des2)
-{
- ErtsSysFdType fd1 = ((ErtsDrvEventState *) des1)->fd;
- ErtsSysFdType fd2 = ((ErtsDrvEventState *) des2)->fd;
- if (fd1->signo == fd2->signo && fd1->id == fd2->id)
- return 0;
- return 1;
-}
-#else /* !__OSE__ && !ERTS_SYS_CONTINOUS_FD_NUMBERS i.e. probably windows */
static SafeHashValue drv_ev_state_hash(void *des)
{
SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd;
@@ -1434,21 +1855,20 @@ static int drv_ev_state_cmp(void *des1, void *des2)
return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd
? 0 : 1);
}
-#endif
static void *drv_ev_state_alloc(void *des_tmpl)
{
ErtsDrvEventState *evstate;
- erts_smp_spin_lock(&state_prealloc_lock);
- if (state_prealloc_first == NULL) {
- erts_smp_spin_unlock(&state_prealloc_lock);
+ erts_spin_lock(&drv_ev_state.prealloc_lock);
+ if (drv_ev_state.prealloc_first == NULL) {
+ erts_spin_unlock(&drv_ev_state.prealloc_lock);
evstate = (ErtsDrvEventState *)
erts_alloc(ERTS_ALC_T_DRV_EV_STATE, sizeof(ErtsDrvEventState));
} else {
- evstate = state_prealloc_first;
- state_prealloc_first = (ErtsDrvEventState *) evstate->hb.next;
- --num_state_prealloc;
- erts_smp_spin_unlock(&state_prealloc_lock);
+ evstate = drv_ev_state.prealloc_first;
+ drv_ev_state.prealloc_first = (ErtsDrvEventState *) evstate->hb.next;
+ --drv_ev_state.num_prealloc;
+ erts_spin_unlock(&drv_ev_state.prealloc_lock);
}
/* XXX: Already valid data if prealloced, could ignore template! */
*evstate = *((ErtsDrvEventState *) des_tmpl);
@@ -1458,42 +1878,201 @@ static void *drv_ev_state_alloc(void *des_tmpl)
static void drv_ev_state_free(void *des)
{
- erts_smp_spin_lock(&state_prealloc_lock);
- ((ErtsDrvEventState *) des)->hb.next = &state_prealloc_first->hb;
- state_prealloc_first = (ErtsDrvEventState *) des;
- ++num_state_prealloc;
- erts_smp_spin_unlock(&state_prealloc_lock);
+ erts_spin_lock(&drv_ev_state.prealloc_lock);
+ ((ErtsDrvEventState *) des)->hb.next = &drv_ev_state.prealloc_first->hb;
+ drv_ev_state.prealloc_first = (ErtsDrvEventState *) des;
+ ++drv_ev_state.num_prealloc;
+ erts_spin_unlock(&drv_ev_state.prealloc_lock);
}
#endif
-void
-ERTS_CIO_EXPORT(erts_init_check_io)(void)
+#define ERTS_MAX_NO_OF_POLL_THREADS ERTS_MAX_NO_OF_SCHEDULERS
+
+static char *
+get_arg(char* rest, char** argv, int* ip)
{
- erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0);
- ERTS_CIO_POLL_INIT();
- pollset.ps = ERTS_CIO_NEW_POLLSET();
-
-#ifdef ERTS_SMP
- init_removed_fd_alloc();
- pollset.removed_list = NULL;
- erts_smp_spinlock_init(&pollset.removed_list_lock,
- "pollset_rm_list");
- {
- int i;
- for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) {
-#ifdef ERTS_ENABLE_LOCK_COUNT
- erts_smp_mtx_init_x(&drv_ev_state_locks[i].lck, "drv_ev_state", make_small(i));
-#else
- erts_smp_mtx_init(&drv_ev_state_locks[i].lck, "drv_ev_state");
-#endif
+ int i = *ip;
+ if (*rest == '\0') {
+ if (argv[i+1] == NULL) {
+ erts_fprintf(stderr, "too few arguments\n");
+ erts_usage();
}
+ argv[i++] = NULL;
+ rest = argv[i];
+ }
+ argv[i] = NULL;
+ *ip = i;
+ return rest;
+}
+
+static void
+parse_args(int *argc, char **argv, int concurrent_waiters)
+{
+ int i = 0, j;
+ int no_pollsets = 0, no_poll_threads = 0,
+ no_pollsets_percentage = 0,
+ no_poll_threads_percentage = 0;
+ ASSERT(argc && argv);
+ while (i < *argc) {
+ if(argv[i][0] == '-') {
+ switch (argv[i][1]) {
+ case 'I': {
+ if (strncmp(argv[i]+2, "Ot", 2) == 0) {
+ char *arg = get_arg(argv[i]+4, argv, &i);
+ if (sscanf(arg, "%d", &no_poll_threads) != 1 ||
+ no_poll_threads < 1 ||
+ ERTS_MAX_NO_OF_POLL_THREADS < no_poll_threads) {
+ erts_fprintf(stderr,"bad I/O poll threads number: %s\n", arg);
+ erts_usage();
+ }
+ } else if (strncmp(argv[i]+2, "Op", 3) == 0) {
+ char *arg = get_arg(argv[i]+4, argv, &i);
+ if (sscanf(arg, "%d", &no_pollsets) != 1 ||
+ no_pollsets < 1) {
+ erts_fprintf(stderr,"bad I/O pollset number: %s\n", arg);
+ erts_usage();
+ }
+ } else if (strncmp(argv[i]+2, "OPt", 4) == 0) {
+ char *arg = get_arg(argv[i]+5, argv, &i);
+ if (sscanf(arg, "%d", &no_poll_threads_percentage) != 1 ||
+ no_poll_threads_percentage < 0 ||
+ no_poll_threads_percentage > 100) {
+ erts_fprintf(stderr,"bad I/O poll thread percentage number: %s\n", arg);
+ erts_usage();
+ }
+ } else if (strncmp(argv[i]+2, "OPp", 4) == 0) {
+ char *arg = get_arg(argv[i]+5, argv, &i);
+ if (sscanf(arg, "%d", &no_pollsets_percentage) != 1 ||
+ no_pollsets_percentage < 0 ||
+ no_pollsets_percentage > 100) {
+ erts_fprintf(stderr,"bad I/O pollset percentage number: %s\n", arg);
+ erts_usage();
+ }
+ } else {
+ break;
+ }
+ break;
+ }
+ case 'K':
+ (void)get_arg(argv[i]+2, argv, &i);
+ break;
+ case '-':
+ goto args_parsed;
+ default:
+ break;
+ }
+ }
+ i++;
+ }
+
+args_parsed:
+
+ if (!concurrent_waiters) {
+ no_pollsets = no_poll_threads;
+ no_pollsets_percentage = 100;
}
+
+ if (no_poll_threads == 0) {
+ if (no_poll_threads_percentage == 0)
+ no_poll_threads = 1; /* This is the default */
+ else {
+ no_poll_threads = erts_no_schedulers * no_poll_threads_percentage / 100;
+ if (no_poll_threads < 1)
+ no_poll_threads = 1;
+ }
+ }
+
+ if (no_pollsets == 0) {
+ if (no_pollsets_percentage == 0)
+ no_pollsets = 1; /* This is the default */
+ else {
+ no_pollsets = no_poll_threads * no_pollsets_percentage / 100;
+ if (no_pollsets < 1)
+ no_pollsets = 1;
+ }
+ }
+
+ if (no_poll_threads < no_pollsets) {
+ erts_fprintf(stderr,
+ "number of IO poll threads has to be greater or equal to "
+ "the number of \nIO pollsets. Current values are set to: \n"
+ " -IOt %d -IOp %d\n",
+ no_poll_threads, no_pollsets);
+ erts_usage();
+ }
+
+ /* Handled arguments have been marked with NULL. Slide arguments
+ not handled towards the beginning of argv. */
+ for (i = 0, j = 0; i < *argc; i++) {
+ if (argv[i])
+ argv[j++] = argv[i];
+ }
+ *argc = j;
+
+ erts_no_pollsets = no_pollsets;
+ erts_no_poll_threads = no_poll_threads;
+}
+
+void
+erts_init_check_io(int *argc, char **argv)
+{
+ int j, concurrent_waiters, no_poll_threads;
+ ERTS_CT_ASSERT((INT_MIN & (ERL_NIF_SELECT_STOP_CALLED |
+ ERL_NIF_SELECT_STOP_SCHEDULED |
+ ERL_NIF_SELECT_INVALID_EVENT |
+ ERL_NIF_SELECT_FAILED)) == 0);
+
+
+ erts_poll_init(&concurrent_waiters);
+#if ERTS_POLL_USE_FALLBACK
+ erts_poll_init_flbk(NULL);
+#endif
+
+ parse_args(argc, argv, concurrent_waiters);
+
+ /* Create the actual pollsets */
+ pollsetv = erts_alloc(ERTS_ALC_T_POLLSET,sizeof(ErtsPollSet *) * erts_no_pollsets);
+
+ for (j=0; j < erts_no_pollsets; j++)
+ pollsetv[j] = erts_poll_create_pollset(j);
+
+#if ERTS_POLL_USE_FALLBACK
+ flbk_pollset = erts_poll_create_pollset_flbk(-1);
+#endif
+
+ no_poll_threads = erts_no_poll_threads;
+#if ERTS_POLL_USE_FALLBACK
+ no_poll_threads++;
+#endif
+
+ psiv = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollThread) * no_poll_threads);
+
+#if ERTS_POLL_USE_FALLBACK
+ psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[0].ps = get_fallback();
+ psiv++;
#endif
+
+ for (j = 0; j < erts_no_poll_threads; j++) {
+ psiv[j].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[j].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[j].ps = pollsetv[j % erts_no_pollsets];
+ }
+
+ for (j=0; j < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; j++) {
+ erts_mtx_init(&drv_ev_state.locks[j].lck, "drv_ev_state", make_small(j),
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
+ }
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- max_fds = ERTS_CIO_POLL_MAX_FDS();
- erts_smp_atomic_init_nob(&drv_ev_state_len, 0);
- drv_ev_state = NULL;
- erts_smp_mtx_init(&drv_ev_state_grow_lock, "drv_ev_state_grow");
+ drv_ev_state.max_fds = erts_poll_max_fds();
+ erts_atomic_init_nob(&drv_ev_state.len, 0);
+ drv_ev_state.v = NULL;
+ erts_mtx_init(&drv_ev_state.grow_lock, "drv_ev_state_grow", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
#else
{
SafeHashFunctions hf;
@@ -1501,134 +2080,182 @@ ERTS_CIO_EXPORT(erts_init_check_io)(void)
hf.cmp = &drv_ev_state_cmp;
hf.alloc = &drv_ev_state_alloc;
hf.free = &drv_ev_state_free;
- num_state_prealloc = 0;
- state_prealloc_first = NULL;
- erts_smp_spinlock_init(&state_prealloc_lock,"state_prealloc");
-
- safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state_tab, "drv_ev_state_tab",
- DRV_EV_STATE_HTAB_SIZE, hf);
+ drv_ev_state.num_prealloc = 0;
+ drv_ev_state.prealloc_first = NULL;
+ erts_spinlock_init(&drv_ev_state.prealloc_lock, "state_prealloc", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_IO);
+ safe_hash_init(ERTS_ALC_T_DRV_EV_STATE, &drv_ev_state.tab, "drv_ev_state_tab",
+ ERTS_LOCK_FLAGS_CATEGORY_IO, DRV_EV_STATE_HTAB_SIZE, hf);
}
#endif
}
int
-ERTS_CIO_EXPORT(erts_check_io_max_files)(void)
+erts_check_io_max_files(void)
{
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- return max_fds;
+ return drv_ev_state.max_fds;
#else
- return ERTS_POLL_EXPORT(erts_poll_max_fds)();
+ return erts_poll_max_fds();
#endif
}
Uint
-ERTS_CIO_EXPORT(erts_check_io_size)(void)
+erts_check_io_size(void)
{
- Uint res;
+ Uint res = 0;
ErtsPollInfo pi;
- ERTS_CIO_POLL_INFO(pollset.ps, &pi);
- res = pi.memory_size;
+ int i;
+
+#if ERTS_POLL_USE_FALLBACK
+ erts_poll_info(get_fallback(), &pi);
+ res += pi.memory_size;
+#endif
+
+ for (i = 0; i < erts_no_pollsets; i++) {
+ erts_poll_info(pollsetv[i], &pi);
+ res += pi.memory_size;
+ }
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- res += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len);
+ res += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len);
#else
- res += safe_hash_table_sz(&drv_ev_state_tab);
+ res += safe_hash_table_sz(&drv_ev_state.tab);
{
SafeHashInfo hi;
- safe_hash_get_info(&hi, &drv_ev_state_tab);
+ safe_hash_get_info(&hi, &drv_ev_state.tab);
res += hi.objs * sizeof(ErtsDrvEventState);
}
- erts_smp_spin_lock(&state_prealloc_lock);
- res += num_state_prealloc * sizeof(ErtsDrvEventState);
- erts_smp_spin_unlock(&state_prealloc_lock);
+ erts_spin_lock(&drv_ev_state.prealloc_lock);
+ res += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState);
+ erts_spin_unlock(&drv_ev_state.prealloc_lock);
#endif
return res;
}
Eterm
-ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
+erts_check_io_info(void *proc)
{
Process *p = (Process *) proc;
- Eterm tags[15], values[15], res;
- Uint sz, *szp, *hp, **hpp, memory_size;
- Sint i;
- ErtsPollInfo pi;
-
- ERTS_CIO_POLL_INFO(pollset.ps, &pi);
- memory_size = pi.memory_size;
+ Eterm tags[16], values[16], res, list = NIL;
+ Uint sz, *szp, *hp, **hpp;
+ ErtsPollInfo *piv;
+ Sint i, j = 0, len;
+ int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK;
+ ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
+
+ piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
+
+#if ERTS_POLL_USE_FALLBACK
+ erts_poll_info_flbk(get_fallback(), &piv[0]);
+ piv[0].poll_threads = 1;
+ piv[0].active_fds = 0;
+ piv++;
+#endif
+
+ for (j = 0; j < erts_no_pollsets; j++) {
+ erts_poll_info(pollsetv[j], &piv[j]);
+ piv[j].active_fds = 0;
+ piv[j].poll_threads = erts_no_poll_threads / erts_no_pollsets;
+ if (erts_no_poll_threads % erts_no_pollsets > j)
+ piv[j].poll_threads++;
+ }
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len);
+ i = 0;
+ erts_mtx_lock(&drv_ev_state.grow_lock);
+ len = erts_atomic_read_nob(&drv_ev_state.len);
+ for (i = 0; i < ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT; i++) {
+ erts_mtx_lock(&drv_ev_state.locks[i].lck);
+ for (j = i; j < len; j+=ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT) {
+ ErtsDrvEventState *state = get_drv_ev_state(j);
+ int pollsetid = get_pollset_id(j);
+ ASSERT(fd_mtx(j) == &drv_ev_state.locks[i].lck);
+ if (state->flags & ERTS_EV_FLAG_FALLBACK)
+ pollsetid = -1;
+ if (state->driver.select
+ && (state->type == ERTS_EV_TYPE_DRV_SEL)
+ && (is_iotask_active(&state->driver.select->iniotask)
+ || is_iotask_active(&state->driver.select->outiotask)))
+ piv[pollsetid].active_fds++;
+ }
+ erts_mtx_unlock(&drv_ev_state.locks[i].lck);
+ }
+ erts_mtx_unlock(&drv_ev_state.grow_lock);
+
+ piv[0].memory_size += sizeof(ErtsDrvEventState) * erts_atomic_read_nob(&drv_ev_state.len);
#else
- memory_size += safe_hash_table_sz(&drv_ev_state_tab);
+ piv[0].memory_size += safe_hash_table_sz(&drv_ev_state.tab);
{
- SafeHashInfo hi;
- safe_hash_get_info(&hi, &drv_ev_state_tab);
- memory_size += hi.objs * sizeof(ErtsDrvEventState);
+ SafeHashInfo hi;
+ safe_hash_get_info(&hi, &drv_ev_state.tab);
+ piv[0].memory_size += hi.objs * sizeof(ErtsDrvEventState);
}
- erts_smp_spin_lock(&state_prealloc_lock);
- memory_size += num_state_prealloc * sizeof(ErtsDrvEventState);
- erts_smp_spin_unlock(&state_prealloc_lock);
+ erts_spin_lock(&drv_ev_state.prealloc_lock);
+ piv[0].memory_size += drv_ev_state.num_prealloc * sizeof(ErtsDrvEventState);
+ erts_spin_unlock(&drv_ev_state.prealloc_lock);
#endif
hpp = NULL;
szp = &sz;
sz = 0;
+ piv -= ERTS_POLL_USE_FALLBACK;
+
bld_it:
- i = 0;
- tags[i] = erts_bld_atom(hpp, szp, "name");
- values[i++] = erts_bld_atom(hpp, szp, "erts_poll");
+ for (j = no_pollsets-1; j >= 0; j--) {
+ i = 0;
- tags[i] = erts_bld_atom(hpp, szp, "primary");
- values[i++] = erts_bld_atom(hpp, szp, pi.primary);
+ tags[i] = erts_bld_atom(hpp, szp, "name");
+ values[i++] = erts_bld_atom(hpp, szp, "erts_poll");
- tags[i] = erts_bld_atom(hpp, szp, "fallback");
- values[i++] = erts_bld_atom(hpp, szp, pi.fallback ? pi.fallback : "false");
+ tags[i] = erts_bld_atom(hpp, szp, "primary");
+ values[i++] = erts_bld_atom(hpp, szp, piv[j].primary);
- tags[i] = erts_bld_atom(hpp, szp, "kernel_poll");
- values[i++] = erts_bld_atom(hpp, szp,
- pi.kernel_poll ? pi.kernel_poll : "false");
+ tags[i] = erts_bld_atom(hpp, szp, "kernel_poll");
+ values[i++] = erts_bld_atom(hpp, szp,
+ piv[j].kernel_poll ? piv[j].kernel_poll : "false");
- tags[i] = erts_bld_atom(hpp, szp, "memory_size");
- values[i++] = erts_bld_uint(hpp, szp, memory_size);
+ tags[i] = erts_bld_atom(hpp, szp, "memory_size");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].memory_size);
- tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.poll_set_size);
+ tags[i] = erts_bld_atom(hpp, szp, "total_poll_set_size");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_set_size);
- if (pi.fallback) {
- tags[i] = erts_bld_atom(hpp, szp, "fallback_poll_set_size");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.fallback_poll_set_size);
- }
+ tags[i] = erts_bld_atom(hpp, szp, "lazy_updates");
+ values[i++] = piv[j].lazy_updates ? am_true : am_false;
- tags[i] = erts_bld_atom(hpp, szp, "lazy_updates");
- values[i++] = pi.lazy_updates ? am_true : am_false;
+ tags[i] = erts_bld_atom(hpp, szp, "pending_updates");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].pending_updates);
- if (pi.lazy_updates) {
- tags[i] = erts_bld_atom(hpp, szp, "pending_updates");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.pending_updates);
- }
+ tags[i] = erts_bld_atom(hpp, szp, "batch_updates");
+ values[i++] = piv[j].batch_updates ? am_true : am_false;
- tags[i] = erts_bld_atom(hpp, szp, "batch_updates");
- values[i++] = pi.batch_updates ? am_true : am_false;
+ tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates");
+ values[i++] = piv[j].concurrent_updates ? am_true : am_false;
- tags[i] = erts_bld_atom(hpp, szp, "concurrent_updates");
- values[i++] = pi.concurrent_updates ? am_true : am_false;
+ tags[i] = erts_bld_atom(hpp, szp, "fallback");
+ values[i++] = piv[j].is_fallback ? am_true : am_false;
- tags[i] = erts_bld_atom(hpp, szp, "max_fds");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds);
+ tags[i] = erts_bld_atom(hpp, szp, "max_fds");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].max_fds);
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups);
+ tags[i] = erts_bld_atom(hpp, szp, "active_fds");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].active_fds);
- tags[i] = erts_bld_atom(hpp, szp, "no_avoided_interrupts");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_interrupts);
+ tags[i] = erts_bld_atom(hpp, szp, "poll_threads");
+ values[i++] = erts_bld_uint(hpp, szp, piv[j].poll_threads);
- tags[i] = erts_bld_atom(hpp, szp, "no_interrupt_timed");
- values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_interrupt_timed);
-#endif
+ res = erts_bld_2tup_list(hpp, szp, i, tags, values);
- res = erts_bld_2tup_list(hpp, szp, i, tags, values);
+ if (!hpp) {
+ *szp += 2;
+ }
+ else {
+ list = CONS(*hpp, res, list);
+ *hpp += 2;
+ }
+ }
if (!hpp) {
hp = HAlloc(p, sz);
@@ -1637,330 +2264,463 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
goto bld_it;
}
- return res;
+ erts_free(ERTS_ALC_T_TMP, piv);
+
+ return list;
}
static ERTS_INLINE ErtsPollEvents
-print_events(ErtsPollEvents ev)
+print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
{
int first = 1;
+ if(ev == ERTS_POLL_EV_NONE) {
+ erts_dsprintf(dsbufp, "N/A");
+ return 0;
+ }
if(ev & ERTS_POLL_EV_IN) {
ev &= ~ERTS_POLL_EV_IN;
- erts_printf("%s%s", first ? "" : "|", "IN");
+ erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "IN");
first = 0;
}
if(ev & ERTS_POLL_EV_OUT) {
ev &= ~ERTS_POLL_EV_OUT;
- erts_printf("%s%s", first ? "" : "|", "OUT");
+ erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "OUT");
first = 0;
}
/* The following should not appear... */
if(ev & ERTS_POLL_EV_NVAL) {
- erts_printf("%s%s", first ? "" : "|", "NVAL");
+ erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "NVAL");
first = 0;
}
if(ev & ERTS_POLL_EV_ERR) {
- erts_printf("%s%s", first ? "" : "|", "ERR");
+ erts_dsprintf(dsbufp, "%s%s", first ? "" : "|", "ERR");
first = 0;
}
if (ev)
- erts_printf("%s0x%b32x", first ? "" : "|", (Uint32) ev);
+ erts_dsprintf(dsbufp, "%s0x%b32x", first ? "" : "|", (Uint32) ev);
return ev;
}
+static ERTS_INLINE void
+print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
+{
+ const char* delim = "";
+ if(f & ERTS_EV_FLAG_USED) {
+ erts_dsprintf(dsbufp, "%s","USED");
+ delim = "|";
+ }
+ if(f & ERTS_EV_FLAG_FALLBACK) {
+ erts_dsprintf(dsbufp, "%s%s", delim, "FLBK");
+ delim = "|";
+ }
+}
+
+#ifdef DEBUG_PRINT_MODE
+
+static ERTS_INLINE char *
+drvmode2str(int mode) {
+ switch (mode) {
+ case ERL_DRV_READ|ERL_DRV_USE: return "READ|USE";
+ case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE";
+ case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE";
+ case ERL_DRV_USE: return "USE";
+ case ERL_DRV_READ: return "READ";
+ case ERL_DRV_WRITE: return "WRITE";
+ case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE";
+ default: return "UNKNOWN";
+ }
+}
+
+static ERTS_INLINE char *
+nifmode2str(enum ErlNifSelectFlags mode) {
+ switch (mode) {
+ case ERL_NIF_SELECT_READ: return "READ";
+ case ERL_NIF_SELECT_WRITE: return "WRITE";
+ case ERL_NIF_SELECT_STOP: return "STOP";
+ default: return "UNKNOWN";
+ }
+}
+
+#endif
+
typedef struct {
int used_fds;
int num_errors;
+ int no_driver_select_structs;
+ int no_enif_select_structs;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
int internal_fds;
ErtsPollEvents *epep;
#endif
} IterDebugCounters;
-static void doit_erts_check_io_debug(void *vstate, void *vcounters)
+static int erts_debug_print_checkio_state(erts_dsprintf_buf_t *dsbufp,
+ ErtsDrvEventState *state,
+ ErtsPollEvents ep_events,
+ int internal)
{
- ErtsDrvEventState *state = (ErtsDrvEventState *) vstate;
- IterDebugCounters *counters = (IterDebugCounters *) vcounters;
- ErtsPollEvents cio_events = state->events;
- ErtsSysFdType fd = state->fd;
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- int internal = 0;
- ErtsPollEvents ep_events = counters->epep[(int) fd];
-#endif
- int err = 0;
-
#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
struct stat stat_buf;
#endif
-
+ ErtsSysFdType fd = state->fd;
+ ErtsPollEvents cio_events = state->events;
+ int err = 0;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->events || ep_events) {
- if (ep_events & ERTS_POLL_EV_NVAL) {
- ep_events &= ~ERTS_POLL_EV_NVAL;
- internal = 1;
- counters->internal_fds++;
- }
- else
- counters->used_fds++;
-#else
- if (state->events) {
- counters->used_fds++;
+ ErtsPollEvents aio_events = state->active_events;
#endif
-
- erts_printf("fd=%d ", (int) fd);
-
+ erts_dsprintf(dsbufp, "pollset=%d fd=%d ",
+ state->flags & ERTS_EV_FLAG_FALLBACK ? -1 : get_pollset_id(fd), (int) fd);
+
#if defined(HAVE_FSTAT) && !defined(NO_FSTAT_ON_SYS_FD_TYPE)
- if (fstat((int) fd, &stat_buf) < 0)
- erts_printf("type=unknown ");
- else {
- erts_printf("type=");
+ if (fstat((int) fd, &stat_buf) < 0)
+ erts_dsprintf(dsbufp, "type=unknown ");
+ else {
+ erts_dsprintf(dsbufp, "type=");
#ifdef S_ISSOCK
- if (S_ISSOCK(stat_buf.st_mode))
- erts_printf("sock ");
- else
+ if (S_ISSOCK(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "sock ");
+ else
#endif
#ifdef S_ISFIFO
if (S_ISFIFO(stat_buf.st_mode))
- erts_printf("fifo ");
+ erts_dsprintf(dsbufp, "fifo ");
else
#endif
#ifdef S_ISCHR
- if (S_ISCHR(stat_buf.st_mode))
- erts_printf("chr ");
- else
+ if (S_ISCHR(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "chr ");
+ else
#endif
#ifdef S_ISDIR
- if (S_ISDIR(stat_buf.st_mode))
- erts_printf("dir ");
- else
+ if (S_ISDIR(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "dir ");
+ else
#endif
#ifdef S_ISBLK
- if (S_ISBLK(stat_buf.st_mode))
- erts_printf("blk ");
- else
+ if (S_ISBLK(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "blk ");
+ else
#endif
#ifdef S_ISREG
- if (S_ISREG(stat_buf.st_mode))
- erts_printf("reg ");
- else
+ if (S_ISREG(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "reg ");
+ else
#endif
#ifdef S_ISLNK
- if (S_ISLNK(stat_buf.st_mode))
- erts_printf("lnk ");
- else
+ if (S_ISLNK(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "lnk ");
+ else
#endif
#ifdef S_ISDOOR
- if (S_ISDOOR(stat_buf.st_mode))
- erts_printf("door ");
- else
+ if (S_ISDOOR(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "door ");
+ else
#endif
#ifdef S_ISWHT
- if (S_ISWHT(stat_buf.st_mode))
- erts_printf("wht ");
- else
+ if (S_ISWHT(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "wht ");
+ else
#endif
#ifdef S_ISXATTR
- if (S_ISXATTR(stat_buf.st_mode))
- erts_printf("xattr ");
- else
+ if (S_ISXATTR(stat_buf.st_mode))
+ erts_dsprintf(dsbufp, "xattr ");
+ else
#endif
- erts_printf("unknown ");
- }
+ erts_dsprintf(dsbufp, "unknown ");
+ }
#else
- erts_printf("type=unknown ");
+ erts_dsprintf(dsbufp, "type=unknown ");
#endif
- if (state->type == ERTS_EV_TYPE_DRV_SEL) {
- erts_printf("driver_select ");
-
+ if (state->type == ERTS_EV_TYPE_DRV_SEL) {
+ erts_dsprintf(dsbufp, "driver_select ");
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (internal) {
- erts_printf("internal ");
- err = 1;
- }
-
- if (cio_events == ep_events) {
- erts_printf("ev=");
- if (print_events(cio_events) != 0)
- err = 1;
- }
- else {
- err = 1;
- erts_printf("cio_ev=");
- print_events(cio_events);
- erts_printf(" ep_ev=");
- print_events(ep_events);
- }
+ if (internal) {
+ erts_dsprintf(dsbufp, "internal ");
+ err = 1;
+ }
+ if (aio_events == cio_events) {
+ if (cio_events == ep_events) {
+ erts_dsprintf(dsbufp, "ev=");
+ if (print_events(dsbufp, cio_events) != 0)
+ err = 1;
+ }
+ else {
+ ErtsPollEvents ev = cio_events;
+ if (ev != ep_events && ep_events != ERTS_POLL_EV_NONE)
+ err = 1;
+ erts_dsprintf(dsbufp, "cio_ev=");
+ print_events(dsbufp, cio_events);
+ erts_dsprintf(dsbufp, " ep_ev=");
+ print_events(dsbufp, ep_events);
+ }
+ } else {
+ erts_dsprintf(dsbufp, "cio_ev=");
+ print_events(dsbufp, cio_events);
+ erts_dsprintf(dsbufp, " aio_ev=");
+ print_events(dsbufp, aio_events);
+ if ((aio_events != ep_events && ep_events != ERTS_POLL_EV_NONE) ||
+ (aio_events != 0 && ep_events == ERTS_POLL_EV_NONE)) {
+ erts_dsprintf(dsbufp, " ep_ev=");
+ print_events(dsbufp, ep_events);
+ err = 1;
+ }
+ }
#else
- if (print_events(cio_events) != 0)
- err = 1;
-#endif
- erts_printf(" ");
- if (cio_events & ERTS_POLL_EV_IN) {
- Eterm id = state->driver.select->inport;
- if (is_nil(id)) {
- erts_printf("inport=none inname=none indrv=none ");
- err = 1;
- }
- else {
- ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
- erts_printf(" inport=%T inname=%s indrv=%s ",
- id,
- pnp->name ? pnp->name : "unknown",
- (pnp->driver_name
- ? pnp->driver_name
- : "unknown"));
- erts_free_port_names(pnp);
- }
- }
- if (cio_events & ERTS_POLL_EV_OUT) {
- Eterm id = state->driver.select->outport;
- if (is_nil(id)) {
- erts_printf("outport=none outname=none outdrv=none ");
- err = 1;
- }
- else {
- ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
- erts_printf(" outport=%T outname=%s outdrv=%s ",
- id,
- pnp->name ? pnp->name : "unknown",
- (pnp->driver_name
- ? pnp->driver_name
- : "unknown"));
- erts_free_port_names(pnp);
- }
- }
- }
- else if (state->type == ERTS_EV_TYPE_DRV_EV) {
- Eterm id;
- erts_printf("driver_event ");
+ if (print_events(dsbufp, cio_events) != 0)
+ err = 1;
+#endif
+ erts_dsprintf(dsbufp, " ");
+ if (cio_events & ERTS_POLL_EV_IN) {
+ Eterm id = state->driver.select->inport;
+ if (is_nil(id)) {
+ erts_dsprintf(dsbufp, "inport=none inname=none indrv=none ");
+ err = 1;
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
+ erts_dsprintf(dsbufp, " inport=%T inname=%s indrv=%s ",
+ id,
+ pnp->name ? pnp->name : "unknown",
+ (pnp->driver_name
+ ? pnp->driver_name
+ : "unknown"));
+ erts_free_port_names(pnp);
+ }
+ }
+ if (cio_events & ERTS_POLL_EV_OUT) {
+ Eterm id = state->driver.select->outport;
+ if (is_nil(id)) {
+ erts_dsprintf(dsbufp, "outport=none outname=none outdrv=none ");
+ err = 1;
+ }
+ else {
+ ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
+ erts_dsprintf(dsbufp, " outport=%T outname=%s outdrv=%s ",
+ id,
+ pnp->name ? pnp->name : "unknown",
+ (pnp->driver_name
+ ? pnp->driver_name
+ : "unknown"));
+ erts_free_port_names(pnp);
+ }
+ }
+ }
+ else if (state->type == ERTS_EV_TYPE_NIF) {
+ ErtsResource* r;
+ erts_dsprintf(dsbufp, "enif_select ");
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (internal) {
- erts_printf("internal ");
- err = 1;
- }
- if (cio_events == ep_events) {
- erts_printf("ev=0x%b32x", (Uint32) cio_events);
- }
- else {
- err = 1;
- erts_printf("cio_ev=0x%b32x", (Uint32) cio_events);
- erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events);
- }
+ if (internal) {
+ erts_dsprintf(dsbufp, "internal ");
+ err = 1;
+ }
+
+ if (cio_events == ep_events) {
+ erts_dsprintf(dsbufp, "ev=");
+ if (print_events(dsbufp, cio_events) != 0)
+ err = 1;
+ }
+ else {
+ err = 1;
+ erts_dsprintf(dsbufp, "cio_ev=");
+ print_events(dsbufp, cio_events);
+ erts_dsprintf(dsbufp, " ep_ev=");
+ print_events(dsbufp, ep_events);
+ }
#else
- erts_printf("ev=0x%b32x", (Uint32) cio_events);
+ if (print_events(dsbufp, cio_events) != 0)
+ err = 1;
#endif
- id = state->driver.event->port;
- if (is_nil(id)) {
- erts_printf(" port=none name=none drv=none ");
- err = 1;
- }
- else {
- ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT);
- erts_printf(" port=%T name=%s drv=%s ",
- id,
- pnp->name ? pnp->name : "unknown",
- (pnp->driver_name
- ? pnp->driver_name
- : "unknown"));
- erts_free_port_names(pnp);
- }
- }
+ erts_dsprintf(dsbufp, " inpid=%T", state->driver.nif->in.pid);
+ erts_dsprintf(dsbufp, " outpid=%T", state->driver.nif->out.pid);
+ r = state->driver.stop.resource;
+ erts_dsprintf(dsbufp, " resource=%p(%T:%T)", r, r->type->module, r->type->name);
+ }
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- else if (internal) {
- erts_printf("internal ");
- if (cio_events) {
- err = 1;
- erts_printf("cio_ev=");
- print_events(cio_events);
- }
- if (ep_events) {
- erts_printf("ep_ev=");
- print_events(ep_events);
- }
- }
+ else if (internal) {
+ erts_dsprintf(dsbufp, "internal ");
+ if (cio_events) {
+ err = 1;
+ erts_dsprintf(dsbufp, "cio_ev=");
+ print_events(dsbufp, cio_events);
+ }
+ if (ep_events) {
+ erts_dsprintf(dsbufp, "ep_ev=");
+ print_events(dsbufp, ep_events);
+ }
+ }
#endif
- else {
- err = 1;
- erts_printf("control_type=%d ", (int)state->type);
+ else {
+ err = 1;
+ erts_dsprintf(dsbufp, "control_type=%d ", (int)state->type);
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (cio_events == ep_events) {
- erts_printf("ev=0x%b32x", (Uint32) cio_events);
- }
- else {
- erts_printf("cio_ev=0x%b32x", (Uint32) cio_events);
- erts_printf(" ep_ev=0x%b32x", (Uint32) ep_events);
- }
+ if (cio_events == ep_events) {
+ erts_dsprintf(dsbufp, "ev=");
+ print_events(dsbufp, cio_events);
+ }
+ else {
+ erts_dsprintf(dsbufp, "cio_ev="); print_events(dsbufp, cio_events);
+ erts_dsprintf(dsbufp, " ep_ev="); print_events(dsbufp, ep_events);
+ }
#else
- erts_printf("ev=0x%b32x", (Uint32) cio_events);
+ erts_dsprintf(dsbufp, "ev=0x%b32x", (Uint32) cio_events);
#endif
+ }
+
+ erts_dsprintf(dsbufp, " flags="); print_flags(dsbufp, state->flags);
+ if (err) {
+ erts_dsprintf(dsbufp, " ERROR");
+ }
+ erts_dsprintf(dsbufp, "\r\n");
+ return err;
+}
+
+static void doit_erts_check_io_debug(void *vstate, void *vcounters,
+ erts_dsprintf_buf_t *dsbufp)
+{
+ ErtsDrvEventState *state = (ErtsDrvEventState *) vstate;
+ IterDebugCounters *counters = (IterDebugCounters *) vcounters;
+ int internal = 0;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ ErtsSysFdType fd = state->fd;
+ ErtsPollEvents ep_events = counters->epep[(int) fd];
+#else
+ ErtsPollEvents ep_events = ERTS_POLL_EV_NONE;
+#endif
+
+ if (state->driver.select) {
+ counters->no_driver_select_structs++;
+ ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE));
+ }
+ if (state->driver.nif) {
+ counters->no_enif_select_structs++;
+ ASSERT(state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE));
+ }
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (state->events || (ep_events != 0 && ep_events != ERTS_POLL_EV_NONE)) {
+ if (ep_events & ERTS_POLL_EV_NVAL) {
+ ep_events &= ~ERTS_POLL_EV_NVAL;
+ internal = 1;
+ counters->internal_fds++;
}
-
- if (err) {
+ else
+ counters->used_fds++;
+#else
+ if (state->events) {
+ counters->used_fds++;
+#endif
+ if (erts_debug_print_checkio_state(dsbufp, state, ep_events, internal)) {
counters->num_errors++;
- erts_printf(" ERROR");
}
- erts_printf("\n");
}
}
-
+
+/* ciodpi can be NULL when called from etp-commands */
int
-ERTS_CIO_EXPORT(erts_check_io_debug)(void)
+erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
{
+ erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- int fd, len;
+ int fd, len, i;
#endif
- IterDebugCounters counters;
+ IterDebugCounters counters = {0};
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
ErtsDrvEventState null_des;
null_des.driver.select = NULL;
+ null_des.driver.nif = NULL;
+ null_des.driver.stop.drv_ptr = NULL;
null_des.events = 0;
- null_des.remove_cnt = 0;
null_des.type = ERTS_EV_TYPE_NONE;
+ null_des.flags = 0;
+
+ counters.epep = erts_alloc(ERTS_ALC_T_TMP,
+ sizeof(ErtsPollEvents)*drv_ev_state.max_fds);
#endif
- erts_printf("--- fds in pollset --------------------------------------\n");
-#if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK)
+#if defined(ERTS_ENABLE_LOCK_CHECK)
erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
- erts_smp_thr_progress_block(); /* stop the world to avoid messy locking */
+ if (ciodip)
+ erts_thr_progress_block(); /* stop the world to avoid messy locking */
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- counters.epep = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollEvents)*max_fds);
- ERTS_POLL_EXPORT(erts_poll_get_selected_events)(pollset.ps, counters.epep, max_fds);
- counters.internal_fds = 0;
-#endif
- counters.used_fds = 0;
- counters.num_errors = 0;
+ len = erts_atomic_read_nob(&drv_ev_state.len);
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- len = erts_smp_atomic_read_nob(&drv_ev_state_len);
+#if ERTS_POLL_USE_FALLBACK
+ erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
+ erts_poll_get_selected_events_flbk(get_fallback(), counters.epep,
+ drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
- doit_erts_check_io_debug((void *) &drv_ev_state[fd], (void *) &counters);
+ if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
+#endif
+ erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
+
+ for (i = 0; i < erts_no_pollsets; i++) {
+ erts_poll_get_selected_events(pollsetv[i],
+ counters.epep,
+ drv_ev_state.max_fds);
+ for (fd = 0; fd < len; fd++) {
+ if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
+ && get_pollset_id(fd) == i)
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
}
- for ( ; fd < max_fds; fd++) {
- null_des.fd = fd;
- doit_erts_check_io_debug((void *) &null_des, (void *) &counters);
+ for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
+ null_des.fd = fd;
+ doit_erts_check_io_debug(&null_des, &counters, dsbufp);
}
#else
- safe_hash_for_each(&drv_ev_state_tab, &doit_erts_check_io_debug, (void *) &counters);
+ safe_hash_for_each(&drv_ev_state.tab, &doit_erts_check_io_debug,
+ &counters, dsbufp);
#endif
- erts_smp_thr_progress_unblock();
+ if (ciodip)
+ erts_thr_progress_unblock();
- erts_printf("\n");
- erts_printf("used fds=%d\n", counters.used_fds);
+ if (ciodip) {
+ ciodip->no_used_fds = counters.used_fds;
+ ciodip->no_driver_select_structs = counters.no_driver_select_structs;
+ ciodip->no_enif_select_structs = counters.no_enif_select_structs;
+ }
+
+ erts_dsprintf(dsbufp, "\n");
+ erts_dsprintf(dsbufp, "used fds=%d\n", counters.used_fds);
+ erts_dsprintf(dsbufp, "Number of driver_select() structures=%d\n", counters.no_driver_select_structs);
+ erts_dsprintf(dsbufp, "Number of enif_select() structures=%d\n", counters.no_enif_select_structs);
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
- erts_printf("internal fds=%d\n", counters.internal_fds);
+ erts_dsprintf(dsbufp, "internal fds=%d\n", counters.internal_fds);
#endif
- erts_printf("---------------------------------------------------------\n");
- fflush(stdout);
+ erts_dsprintf(dsbufp, "---------------------------------------------------------\n");
+ erts_send_error_to_logger_nogl(dsbufp);
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
erts_free(ERTS_ALC_T_TMP, (void *) counters.epep);
#endif
+
return counters.num_errors;
}
+#ifdef ERTS_ENABLE_LOCK_COUNT
+void erts_lcnt_update_cio_locks(int enable) {
+ int i;
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ erts_lcnt_enable_hash_lock_count(&drv_ev_state.tab, ERTS_LOCK_FLAGS_CATEGORY_IO, enable);
+#else
+ (void)enable;
+#endif
+
+#if ERTS_POLL_USE_FALLBACK
+ erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable);
+#endif
+
+ for (i = 0; i < erts_no_pollsets; i++)
+ erts_lcnt_enable_pollset_lock_count(pollsetv[i], enable);
+}
+#endif /* ERTS_ENABLE_LOCK_COUNT */
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index edab7947ba..443ef1264c 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -1,74 +1,109 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2017. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
-/*
- * Description: Check I/O
+/**
+ * @description Check I/O, a cross platform IO polling framework for ERTS
*
- * Author: Rickard Green
+ * @author Rickard Green
+ * @author Lukas Larsson
*/
#ifndef ERL_CHECK_IO_H__
#define ERL_CHECK_IO_H__
+#include "sys.h"
#include "erl_sys_driver.h"
-#ifdef ERTS_ENABLE_KERNEL_POLL
-
-int driver_select_kp(ErlDrvPort, ErlDrvEvent, int, int);
-int driver_select_nkp(ErlDrvPort, ErlDrvEvent, int, int);
-int driver_event_kp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
-int driver_event_nkp(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
-Uint erts_check_io_size_kp(void);
-Uint erts_check_io_size_nkp(void);
-Eterm erts_check_io_info_kp(void *);
-Eterm erts_check_io_info_nkp(void *);
-int erts_check_io_max_files_kp(void);
-int erts_check_io_max_files_nkp(void);
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-void erts_check_io_async_sig_interrupt_kp(void);
-void erts_check_io_async_sig_interrupt_nkp(void);
-#endif
-void erts_check_io_interrupt_kp(int);
-void erts_check_io_interrupt_nkp(int);
-void erts_check_io_interrupt_timed_kp(int, erts_short_time_t);
-void erts_check_io_interrupt_timed_nkp(int, erts_short_time_t);
-void erts_check_io_kp(int);
-void erts_check_io_nkp(int);
-void erts_init_check_io_kp(void);
-void erts_init_check_io_nkp(void);
-int erts_check_io_debug_kp(void);
-int erts_check_io_debug_nkp(void);
-
-#else /* !ERTS_ENABLE_KERNEL_POLL */
+/** @brief a structure that is used by each polling thread */
+struct erts_poll_thread;
+/**
+ * Get the memory size of the check io framework
+ */
Uint erts_check_io_size(void);
-Eterm erts_check_io_info(void *);
+/**
+ * Returns an Eterm with information about all the pollsets active at the
+ * moment.
+ *
+ * @param proc the Process* to allocate the result on. It is passed as
+ * void * because of header include problems.
+ */
+Eterm erts_check_io_info(void *proc);
+/**
+ * Should be called when a port IO task has been executed in order to re-enable
+ * or clear the information about the fd.
+ *
+ * @param type The type of event that has been completed.
+ * @param handle The port task handle of the event.
+ * @param reset A function pointer to be called when the port task handle
+ * should be reset.
+ */
+void erts_io_notify_port_task_executed(ErtsPortTaskType type,
+ ErtsPortTaskHandle *handle,
+ void (*reset)(ErtsPortTaskHandle *));
+/**
+ * Returns the maximum number of fds that the check io framework can handle.
+ */
int erts_check_io_max_files(void);
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-void erts_check_io_async_sig_interrupt(void);
+/**
+ * Called by any thread that should check for new IO events. This function will
+ * not return unless erts_check_io_interrupt(pt, 1) is called by another thread.
+ *
+ * @param pt the poll thread structure to use.
+ */
+void erts_check_io(struct erts_poll_thread *pt);
+/**
+ * Initialize the check io framework. This function will parse the arguments
+ * and delete any entries that it is interested in.
+ *
+ * @param argc the number of arguments
+ * @param argv an array with the arguments
+ */
+void erts_init_check_io(int *argc, char **argv);
+/**
+ * Interrupt the poll thread so that it can execute other code.
+ *
+ * Should be called with set = 0 by the waiting thread before calling
+ * erts_check_io.
+ *
+ * @param pt the poll thread to wake
+ * @param set whether to set or clear the interrupt flag
+ */
+void erts_check_io_interrupt(struct erts_poll_thread *pt, int set);
+/**
+ * Create a new poll thread structure that is associated with the number no.
+ * It is the callers responsibility that no is unique.
+ */
+struct erts_poll_thread* erts_create_pollset_thread(int no);
+#ifdef ERTS_ENABLE_LOCK_COUNT
+/**
+ * Toggle lock counting on all check io locks
+ */
+void erts_lcnt_update_cio_locks(int enable);
#endif
-void erts_check_io_interrupt(int);
-void erts_check_io_interrupt_timed(int, erts_short_time_t);
-void erts_check_io(int);
-void erts_init_check_io(void);
-#endif
+typedef struct {
+ ErtsPortTaskHandle task;
+ ErtsSysFdType fd;
+} ErtsIoTask;
+
#endif /* ERL_CHECK_IO_H__ */
@@ -76,28 +111,51 @@ void erts_init_check_io(void);
#define ERL_CHECK_IO_INTERNAL__
#endif
+#define ERTS_CHECK_IO_DRV_EV_STATE_LOCK_CNT 128
+
+/* Controls how many pollsets to allocate. Fd's are hashed into
+ each pollset based on the FD. When doing non-concurrent updates
+ there will be one pollset per thread.
+*/
+extern int erts_no_pollsets;
+extern int erts_no_poll_threads;
+
+
#ifndef ERL_CHECK_IO_INTERNAL__
#define ERL_CHECK_IO_INTERNAL__
#include "erl_poll.h"
#include "erl_port_task.h"
+#ifdef __WIN32__
/*
- * ErtsDrvEventDataState is used by driver_event() which is almost never
- * used. We allocate ErtsDrvEventDataState separate since we dont wan't
- * the size of ErtsDrvEventState to increase due to driver_event()
- * information.
+ * Current erts_poll implementation for Windows cannot handle
+ * active events in the set of events polled.
*/
-typedef struct {
- Eterm port;
- ErlDrvEventData data;
- ErtsPollEvents removed_events;
- ErtsPortTaskHandle task;
-} ErtsDrvEventDataState;
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
+#else
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
+#endif
typedef struct {
Eterm inport;
Eterm outport;
- ErtsPortTaskHandle intask;
- ErtsPortTaskHandle outtask;
+ ErtsIoTask iniotask;
+ ErtsIoTask outiotask;
} ErtsDrvSelectDataState;
+
+struct erts_nif_select_event {
+ Eterm pid;
+ Eterm immed;
+ Uint32 refn[ERTS_REF_NUMBERS];
+ Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress
+ * 1: Do deselect before next poll
+ * >1: Countdown of ignored events
+ */
+};
+
+typedef struct {
+ struct erts_nif_select_event in;
+ struct erts_nif_select_event out;
+} ErtsNifSelectDataState;
+
#endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */
diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c
index 3f6813e1a5..a9c6e72c5f 100644
--- a/erts/emulator/sys/common/erl_mmap.c
+++ b/erts/emulator/sys/common/erl_mmap.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -20,13 +21,15 @@
# include "config.h"
#endif
+#define ERTS_WANT_MEM_MAPPERS
#include "sys.h"
#include "erl_process.h"
-#include "erl_smp.h"
#include "atom.h"
#include "erl_mmap.h"
#include <stddef.h>
+#if HAVE_ERTS_MMAP
+
/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
#if defined(DEBUG) || 0
@@ -50,23 +53,22 @@
#endif
/*
- * `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after
+ * `mm->sa.bot` and `mm->sua.top` are read only after
* initialization, but the other pointers are not; i.e., only
* ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
*/
#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
- (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
- < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot))
+ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
+ < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot))
#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
- (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
- (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
- < ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot)))
+ (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
+ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
+ < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot)))
#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
- (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
- (((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \
- < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot)))
+ (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
+ (((UWord) (PTR)) - ((UWord) mm->sua.bot) \
+ < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot)))
-int erts_have_erts_mmap;
UWord erts_page_inv_mask;
#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
@@ -196,10 +198,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
do { \
- erts_smp_mtx_lock(&mmap_state.mtx); \
+ erts_mtx_lock(&mm->mtx); \
ERTS_MMAP_OP_START((IN_SZ)); \
ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
- erts_smp_mtx_unlock(&mmap_state.mtx); \
+ erts_mtx_unlock(&mm->mtx); \
} while (0)
#define ERTS_MUNMAP_OP(PTR, SZ) \
@@ -218,9 +220,9 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
do { \
- erts_smp_mtx_lock(&mmap_state.mtx); \
+ erts_mtx_lock(&mm->mtx); \
ERTS_MUNMAP_OP((PTR), (SZ)); \
- erts_smp_mtx_unlock(&mmap_state.mtx); \
+ erts_mtx_unlock(&mm->mtx); \
} while (0)
#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
@@ -246,10 +248,10 @@ static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
do { \
- erts_smp_mtx_lock(&mmap_state.mtx); \
+ erts_mtx_lock(&mm->mtx); \
ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
- erts_smp_mtx_unlock(&mmap_state.mtx); \
+ erts_mtx_unlock(&mm->mtx); \
} while (0)
#define ERTS_MMAP_OP_ABORT() \
@@ -293,13 +295,14 @@ typedef struct {
Uint nseg;
}ErtsFreeSegMap;
-static struct {
- int (*reserve_physical)(char *, UWord);
+struct ErtsMemMapper_ {
+ int (*reserve_physical)(char *, UWord, int exec);
void (*unreserve_physical)(char *, UWord);
int supercarrier;
int no_os_mmap;
+ int executable; /* is client a native code allocator? */
/*
- * Super unaligend area is located above super aligned
+ * Super unaligned area is located above super aligned
* area. That is, `sa.bot` is beginning of the super
* carrier, `sua.top` is the end of the super carrier,
* and sa.top and sua.bot moves towards eachother.
@@ -317,7 +320,7 @@ static struct {
#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
int mmap_fd;
#endif
- erts_smp_mtx_t mtx;
+ erts_mtx_t mtx;
struct {
char *free_list;
char *unused_start;
@@ -345,54 +348,67 @@ static struct {
UWord used;
} os;
} size;
-} mmap_state;
+};
+
+ErtsMemMapper erts_dflt_mmapper;
+
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ErtsMemMapper erts_literal_mmapper;
+char* erts_literals_start;
+UWord erts_literals_size;
+#endif
+
+#ifdef ERTS_HAVE_EXEC_MMAPPER
+ErtsMemMapper erts_exec_mmapper;
+#endif
+
#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
do { \
- mmap_state.size.supercarrier.used.total += (SZ); \
- mmap_state.size.supercarrier.used.sa += (SZ); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
- <= mmap_state.size.supercarrier.total); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \
- <= mmap_state.size.supercarrier.used.total); \
+ mm->size.supercarrier.used.total += (SZ); \
+ mm->size.supercarrier.used.sa += (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
+ <= mm->size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa \
+ <= mm->size.supercarrier.used.total); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
do { \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
- mmap_state.size.supercarrier.used.total -= (SZ); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \
- mmap_state.size.supercarrier.used.sa -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
+ mm->size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa >= (SZ)); \
+ mm->size.supercarrier.used.sa -= (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
do { \
- mmap_state.size.supercarrier.used.total += (SZ); \
- mmap_state.size.supercarrier.used.sua += (SZ); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
- <= mmap_state.size.supercarrier.total); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \
- <= mmap_state.size.supercarrier.used.total); \
+ mm->size.supercarrier.used.total += (SZ); \
+ mm->size.supercarrier.used.sua += (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
+ <= mm->size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua \
+ <= mm->size.supercarrier.used.total); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
do { \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
- mmap_state.size.supercarrier.used.total -= (SZ); \
- ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \
- mmap_state.size.supercarrier.used.sua -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
+ mm->size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua >= (SZ)); \
+ mm->size.supercarrier.used.sua -= (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_OS_INC(SZ) \
do { \
- ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \
- mmap_state.size.os.used += (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.os.used + (SZ) >= (SZ)); \
+ mm->size.os.used += (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_OS_DEC(SZ) \
do { \
- ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \
- mmap_state.size.os.used -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.os.used >= (SZ)); \
+ mm->size.os.used -= (SZ); \
} while (0)
static void
-add_free_desc_area(char *start, char *end)
+add_free_desc_area(ErtsMemMapper* mm, char *start, char *end)
{
ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
@@ -402,7 +418,7 @@ add_free_desc_area(char *start, char *end)
no = 1;
prev_desc = (ErtsFreeSegDesc *) start;
- prev_desc->start = mmap_state.desc.free_list;
+ prev_desc->start = mm->desc.free_list;
desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
desc_end = start + 2*sizeof(ErtsFreeSegDesc);
@@ -413,59 +429,59 @@ add_free_desc_area(char *start, char *end)
desc_end += sizeof(ErtsFreeSegDesc);
no++;
}
- mmap_state.desc.free_list = (char *) prev_desc;
- mmap_state.no.free_seg_descs += no;
+ mm->desc.free_list = (char *) prev_desc;
+ mm->no.free_seg_descs += no;
}
}
static ErtsFreeSegDesc *
-add_unused_free_desc_area(void)
+add_unused_free_desc_area(ErtsMemMapper* mm)
{
char *ptr;
- if (!mmap_state.desc.unused_start)
+ if (!mm->desc.unused_start)
return NULL;
- ERTS_MMAP_ASSERT(mmap_state.desc.unused_end);
+ ERTS_MMAP_ASSERT(mm->desc.unused_end);
ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
- <= mmap_state.desc.unused_end - mmap_state.desc.unused_start);
+ <= mm->desc.unused_end - mm->desc.unused_start);
- ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE;
- add_free_desc_area(mmap_state.desc.unused_start, ptr);
+ ptr = mm->desc.unused_start + ERTS_PAGEALIGNED_SIZE;
+ add_free_desc_area(mm, mm->desc.unused_start, ptr);
- if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
- mmap_state.desc.unused_start = ptr;
+ if ((mm->desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
+ mm->desc.unused_start = ptr;
else
- mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL;
+ mm->desc.unused_end = mm->desc.unused_start = NULL;
- ERTS_MMAP_ASSERT(mmap_state.desc.free_list);
- return (ErtsFreeSegDesc *) mmap_state.desc.free_list;
+ ERTS_MMAP_ASSERT(mm->desc.free_list);
+ return (ErtsFreeSegDesc *) mm->desc.free_list;
}
static ERTS_INLINE ErtsFreeSegDesc *
-alloc_desc(void)
+alloc_desc(ErtsMemMapper* mm)
{
ErtsFreeSegDesc *res;
- res = (ErtsFreeSegDesc *) mmap_state.desc.free_list;
+ res = (ErtsFreeSegDesc *) mm->desc.free_list;
if (!res) {
- res = add_unused_free_desc_area();
+ res = add_unused_free_desc_area(mm);
if (!res)
return NULL;
}
- mmap_state.desc.free_list = res->start;
- ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs);
- mmap_state.no.free_segs.curr++;
- if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr)
- mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr;
+ mm->desc.free_list = res->start;
+ ASSERT(mm->no.free_segs.curr < mm->no.free_seg_descs);
+ mm->no.free_segs.curr++;
+ if (mm->no.free_segs.max < mm->no.free_segs.curr)
+ mm->no.free_segs.max = mm->no.free_segs.curr;
return res;
}
static ERTS_INLINE void
-free_desc(ErtsFreeSegDesc *desc)
+free_desc(ErtsMemMapper* mm, ErtsFreeSegDesc *desc)
{
- desc->start = mmap_state.desc.free_list;
- mmap_state.desc.free_list = (char *) desc;
- ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0);
- mmap_state.no.free_segs.curr--;
+ desc->start = mm->desc.free_list;
+ mm->desc.free_list = (char *) desc;
+ ERTS_MMAP_ASSERT(mm->no.free_segs.curr > 0);
+ mm->no.free_segs.curr--;
}
static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
@@ -1224,6 +1240,7 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
#if HAVE_MMAP
# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
+# define ERTS_MMAP_PROT_EXEC (PROT_READ|PROT_WRITE|PROT_EXEC)
# if defined(MAP_ANONYMOUS)
# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
# define ERTS_MMAP_FD (-1)
@@ -1232,29 +1249,31 @@ Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
# define ERTS_MMAP_FD (-1)
# else
# define ERTS_MMAP_FLAGS (MAP_PRIVATE)
-# define ERTS_MMAP_FD mmap_state.mmap_fd
+# define ERTS_MMAP_FD mm->mmap_fd
# endif
#endif
static ERTS_INLINE void *
-os_mmap(void *hint_ptr, UWord size, int try_superalign)
+os_mmap(void *hint_ptr, UWord size, int try_superalign, int executable)
{
#if HAVE_MMAP
+ const int prot = executable ? ERTS_MMAP_PROT_EXEC : ERTS_MMAP_PROT;
void *res;
#ifdef MAP_ALIGN
if (try_superalign)
- res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT,
+ res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, prot,
ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
else
#endif
- res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT,
+ res = mmap((void *) hint_ptr, size, prot,
ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
if (res == MAP_FAILED)
return NULL;
return res;
#elif HAVE_VIRTUALALLOC
+ const DWORD prot = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
return (void *) VirtualAlloc(NULL, (SIZE_T) size,
- MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
+ MEM_COMMIT|MEM_RESERVE, prot);
#else
# error "missing mmap() or similar"
#endif
@@ -1311,16 +1330,26 @@ os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
#if HAVE_MMAP
#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
+#define ERTS_MMAP_RESERVE_PROT_EXEC (ERTS_MMAP_PROT_EXEC)
#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
+#if defined(__FreeBSD__)
+#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
+#else
#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
+#endif /* __FreeBSD__ */
#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
+#if defined(__FreeBSD__)
+#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS)
+#else
#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
+#endif /* __FreeBSD__ */
static int
-os_reserve_physical(char *ptr, UWord size)
+os_reserve_physical(char *ptr, UWord size, int exec)
{
- void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT,
+ const int prot = exec ? ERTS_MMAP_RESERVE_PROT_EXEC : ERTS_MMAP_RESERVE_PROT;
+ void *res = mmap((void *) ptr, (size_t) size, prot,
ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
return 0;
@@ -1333,14 +1362,43 @@ os_unreserve_physical(char *ptr, UWord size)
void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
- erl_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
+ erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
}
static void *
-os_mmap_virtual(char *ptr, UWord size)
+os_mmap_virtual(char *ptr, UWord size, int exec)
{
- void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
- ERTS_MMAP_VIRTUAL_FLAGS, ERTS_MMAP_FD, 0);
+ int flags = ERTS_MMAP_VIRTUAL_FLAGS;
+ void* res;
+
+#ifdef ERTS_ALC_A_EXEC
+ if (exec) {
+ ASSERT(!ptr);
+ /* OTP-19.0: Nice hack below cut-and-pasted from hipe_amd64.c */
+
+# ifdef MAP_32BIT
+ /* If we got MAP_32BIT (Linux), then use that to ask for low memory */
+ flags |= MAP_32BIT;
+# else
+ /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect
+ a plain map_hint (returns high mappings even though the
+ hint refers to a free area), so we have to use both map_hint
+ and MAP_FIXED to get addresses below the 2GB boundary.
+ This is even worse than the Linux/ppc64 case.
+ Similarly, Solaris 10 doesn't have MAP_32BIT,
+ and it doesn't respect a plain map_hint. */
+ ptr = (char*)(512*1024*1024); /* 0.5GB */
+
+# if defined(__FreeBSD__) || defined(__sun__)
+ flags |= MAP_FIXED;
+# endif
+# endif /* !MAP_32BIT */
+ }
+#else /* !ERTS_ALC_A_EXEC */
+ ASSERT(!exec);
+#endif
+ res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
+ flags, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
return NULL;
return res;
@@ -1353,7 +1411,7 @@ os_mmap_virtual(char *ptr, UWord size)
#endif /* ERTS_HAVE_OS_MMAP */
-static int reserve_noop(char *ptr, UWord size)
+static int reserve_noop(char *ptr, UWord size, int exec)
{
#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
Uint32 *uip, *end = (Uint32 *) (ptr + size);
@@ -1377,11 +1435,12 @@ static void unreserve_noop(char *ptr, UWord size)
}
static UWord
-alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
+alloc_desc_insert_free_seg(ErtsMemMapper* mm,
+ ErtsFreeSegMap *map, char* start, char* end)
{
char *ptr;
ErtsFreeSegMap *da_map;
- ErtsFreeSegDesc *desc = alloc_desc();
+ ErtsFreeSegDesc *desc = alloc_desc(mm);
if (desc) {
insert_free_seg(map, desc, start, end);
return 0;
@@ -1394,13 +1453,13 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
*/
#if ERTS_HAVE_OS_MMAP
- if (!mmap_state.no_os_mmap) {
- ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0);
+ if (!mm->no_os_mmap) {
+ ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0, 0);
if (ptr) {
- mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
+ mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
- add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE);
- desc = alloc_desc();
+ add_free_desc_area(mm, ptr, ptr+ERTS_PAGEALIGNED_SIZE);
+ desc = alloc_desc(mm);
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, start, end);
return 0;
@@ -1411,20 +1470,20 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
/*
* ...then try to find a good place in the supercarrier...
*/
- da_map = &mmap_state.sua.map;
+ da_map = &mm->sua.map;
desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
if (desc) {
- if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
+ if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0))
ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
else
desc = NULL;
}
else {
- da_map = &mmap_state.sa.map;
+ da_map = &mm->sa.map;
desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
if (desc) {
- if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
+ if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0))
ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
else
desc = NULL;
@@ -1432,15 +1491,15 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
}
if (desc) {
char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
- add_free_desc_area(desc->start, da_end);
+ add_free_desc_area(mm, desc->start, da_end);
if (da_end != desc->end)
resize_free_seg(da_map, desc, da_end, desc->end);
else {
delete_free_seg(da_map, desc);
- free_desc(desc);
+ free_desc(mm, desc);
}
- desc = alloc_desc();
+ desc = alloc_desc(mm);
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, start, end);
return 0;
@@ -1453,10 +1512,10 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
ptr = start + ERTS_PAGEALIGNED_SIZE;
ERTS_MMAP_ASSERT(ptr <= end);
- add_free_desc_area(start, ptr);
+ add_free_desc_area(mm, start, ptr);
if (ptr != end) {
- desc = alloc_desc();
+ desc = alloc_desc(mm);
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, ptr, end);
}
@@ -1465,46 +1524,46 @@ alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
}
void *
-erts_mmap(Uint32 flags, UWord *sizep)
+erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep)
{
char *seg;
UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
/* Map in premapped supercarrier */
- if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
+ if (mm->supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
char *end;
ErtsFreeSegDesc *desc;
Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
- erts_smp_mtx_lock(&mmap_state.mtx);
+ erts_mtx_lock(&mm->mtx);
ERTS_MMAP_OP_START(*sizep);
if (!superaligned) {
- desc = lookup_free_seg(&mmap_state.sua.map, asize);
+ desc = lookup_free_seg(&mm->sua.map, asize);
if (desc) {
seg = desc->start;
end = seg+asize;
- if (!mmap_state.reserve_physical(seg, asize))
+ if (!mm->reserve_physical(seg, asize, mm->executable))
goto supercarrier_reserve_failure;
if (desc->end == end) {
- delete_free_seg(&mmap_state.sua.map, desc);
- free_desc(desc);
+ delete_free_seg(&mm->sua.map, desc);
+ free_desc(mm, desc);
}
else {
ERTS_MMAP_ASSERT(end < desc->end);
- resize_free_seg(&mmap_state.sua.map, desc, end, desc->end);
+ resize_free_seg(&mm->sua.map, desc, end, desc->end);
}
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
goto supercarrier_success;
}
- if (asize <= mmap_state.sua.bot - mmap_state.sa.top) {
- if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize,
- asize))
+ if (asize <= mm->sua.bot - mm->sa.top) {
+ if (!mm->reserve_physical(mm->sua.bot - asize, asize,
+ mm->executable))
goto supercarrier_reserve_failure;
- mmap_state.sua.bot -= asize;
- seg = mmap_state.sua.bot;
+ mm->sua.bot -= asize;
+ seg = mm->sua.bot;
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
goto supercarrier_success;
}
@@ -1512,84 +1571,87 @@ erts_mmap(Uint32 flags, UWord *sizep)
asize = ERTS_SUPERALIGNED_CEILING(asize);
- desc = lookup_free_seg(&mmap_state.sa.map, asize);
+ desc = lookup_free_seg(&mm->sa.map, asize);
if (desc) {
char *start = seg = desc->start;
seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
end = seg+asize;
- if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
+ if (!mm->reserve_physical(start, (UWord) (end - start),
+ mm->executable))
goto supercarrier_reserve_failure;
ERTS_MMAP_SIZE_SC_SA_INC(asize);
if (desc->end == end) {
if (start != seg)
- resize_free_seg(&mmap_state.sa.map, desc, start, seg);
+ resize_free_seg(&mm->sa.map, desc, start, seg);
else {
- delete_free_seg(&mmap_state.sa.map, desc);
- free_desc(desc);
+ delete_free_seg(&mm->sa.map, desc);
+ free_desc(mm, desc);
}
}
else {
ERTS_MMAP_ASSERT(end < desc->end);
- resize_free_seg(&mmap_state.sa.map, desc, end, desc->end);
+ resize_free_seg(&mm->sa.map, desc, end, desc->end);
if (start != seg) {
UWord ad_sz;
- ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
start, seg);
start += ad_sz;
if (start != seg)
- mmap_state.unreserve_physical(start, (UWord) (seg - start));
+ mm->unreserve_physical(start, (UWord) (seg - start));
}
}
goto supercarrier_success;
}
if (superaligned) {
- char *start = mmap_state.sa.top;
+ char *start = mm->sa.top;
seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
- if (asize + (seg - start) <= mmap_state.sua.bot - start) {
+ if (asize + (seg - start) <= mm->sua.bot - start) {
end = seg + asize;
- if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
+ if (!mm->reserve_physical(start, (UWord) (end - start),
+ mm->executable))
goto supercarrier_reserve_failure;
- mmap_state.sa.top = end;
+ mm->sa.top = end;
ERTS_MMAP_SIZE_SC_SA_INC(asize);
if (start != seg) {
UWord ad_sz;
- ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
start, seg);
start += ad_sz;
if (start != seg)
- mmap_state.unreserve_physical(start, (UWord) (seg - start));
+ mm->unreserve_physical(start, (UWord) (seg - start));
}
goto supercarrier_success;
}
- desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE);
+ desc = lookup_free_seg(&mm->sua.map, asize + ERTS_SUPERALIGNED_SIZE);
if (desc) {
char *org_start = desc->start;
char *org_end = desc->end;
seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
end = seg + asize;
- if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg)))
+ if (!mm->reserve_physical(seg, (UWord) (org_end - seg),
+ mm->executable))
goto supercarrier_reserve_failure;
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
if (org_start != seg) {
ERTS_MMAP_ASSERT(org_start < seg);
- resize_free_seg(&mmap_state.sua.map, desc, org_start, seg);
+ resize_free_seg(&mm->sua.map, desc, org_start, seg);
desc = NULL;
}
if (end != org_end) {
UWord ad_sz = 0;
ERTS_MMAP_ASSERT(end < org_end);
if (desc)
- resize_free_seg(&mmap_state.sua.map, desc, end, org_end);
+ resize_free_seg(&mm->sua.map, desc, end, org_end);
else
- ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
end, org_end);
end += ad_sz;
if (end != org_end)
- mmap_state.unreserve_physical(end,
+ mm->unreserve_physical(end,
(UWord) (org_end - end));
}
goto supercarrier_success;
@@ -1597,20 +1659,20 @@ erts_mmap(Uint32 flags, UWord *sizep)
}
ERTS_MMAP_OP_ABORT();
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
}
#if ERTS_HAVE_OS_MMAP
/* Map using OS primitives */
- if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) {
+ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) {
if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
- seg = os_mmap(NULL, asize, 0);
+ seg = os_mmap(NULL, asize, 0, mm->executable);
if (!seg)
goto failure;
}
else {
asize = ERTS_SUPERALIGNED_CEILING(*sizep);
- seg = os_mmap(NULL, asize, 1);
+ seg = os_mmap(NULL, asize, 1, mm->executable);
if (!seg)
goto failure;
@@ -1620,7 +1682,8 @@ erts_mmap(Uint32 flags, UWord *sizep)
os_munmap(seg, asize);
- ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1);
+ ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1,
+ mm->executable);
if (!ptr)
goto failure;
@@ -1660,25 +1723,25 @@ supercarrier_success:
#endif
ERTS_MMAP_OP_END(seg, asize);
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
*sizep = asize;
return (void *) seg;
supercarrier_reserve_failure:
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
*sizep = 0;
return NULL;
}
void
-erts_munmap(Uint32 flags, void *ptr, UWord size)
+erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size)
{
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
- ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
+ ERTS_MMAP_ASSERT(!mm->no_os_mmap);
#if ERTS_HAVE_OS_MMAP
ERTS_MUNMAP_OP_LCK(ptr, size);
ERTS_MMAP_SIZE_OS_DEC(size);
@@ -1691,45 +1754,45 @@ erts_munmap(Uint32 flags, void *ptr, UWord size)
ErtsFreeSegDesc *prev, *next, *desc;
UWord ad_sz = 0;
- ERTS_MMAP_ASSERT(mmap_state.supercarrier);
+ ERTS_MMAP_ASSERT(mm->supercarrier);
start = (char *) ptr;
end = start + size;
- erts_smp_mtx_lock(&mmap_state.mtx);
+ erts_mtx_lock(&mm->mtx);
ERTS_MUNMAP_OP(ptr, size);
if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
- map = &mmap_state.sa.map;
+ map = &mm->sa.map;
adjacent_free_seg(map, start, end, &prev, &next);
ERTS_MMAP_SIZE_SC_SA_DEC(size);
- if (end == mmap_state.sa.top) {
+ if (end == mm->sa.top) {
ERTS_MMAP_ASSERT(!next);
if (prev) {
start = prev->start;
delete_free_seg(map, prev);
- free_desc(prev);
+ free_desc(mm, prev);
}
- mmap_state.sa.top = start;
+ mm->sa.top = start;
goto supercarrier_success;
}
}
else {
- map = &mmap_state.sua.map;
+ map = &mm->sua.map;
adjacent_free_seg(map, start, end, &prev, &next);
ERTS_MMAP_SIZE_SC_SUA_DEC(size);
- if (start == mmap_state.sua.bot) {
+ if (start == mm->sua.bot) {
ERTS_MMAP_ASSERT(!prev);
if (next) {
end = next->end;
delete_free_seg(map, next);
- free_desc(next);
+ free_desc(mm, next);
}
- mmap_state.sua.bot = end;
+ mm->sua.bot = end;
goto supercarrier_success;
}
}
@@ -1741,7 +1804,7 @@ erts_munmap(Uint32 flags, void *ptr, UWord size)
end = next->end;
if (prev) {
delete_free_seg(map, next);
- free_desc(next);
+ free_desc(mm, next);
goto save_prev;
}
desc = next;
@@ -1755,7 +1818,7 @@ erts_munmap(Uint32 flags, void *ptr, UWord size)
if (desc)
resize_free_seg(map, desc, start, end);
else
- ad_sz = alloc_desc_insert_free_seg(map, start, end);
+ ad_sz = alloc_desc_insert_free_seg(mm, map, start, end);
supercarrier_success: {
UWord unres_sz;
@@ -1763,30 +1826,32 @@ erts_munmap(Uint32 flags, void *ptr, UWord size)
ERTS_MMAP_ASSERT(size >= ad_sz);
unres_sz = size - ad_sz;
if (unres_sz)
- mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
+ mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
}
}
}
static void *
-remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+remap_move(ErtsMemMapper* mm,
+ Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
{
UWord size = *sizep;
- void *new_ptr = erts_mmap(flags, &size);
+ void *new_ptr = erts_mmap(mm, flags, &size);
if (!new_ptr)
return NULL;
*sizep = size;
if (old_size < size)
size = old_size;
sys_memcpy(new_ptr, ptr, (size_t) size);
- erts_munmap(flags, ptr, old_size);
+ erts_munmap(mm, flags, ptr, old_size);
return new_ptr;
}
void *
-erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+erts_mremap(ErtsMemMapper* mm,
+ Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
{
void *new_ptr;
Uint32 superaligned;
@@ -1798,11 +1863,11 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
- ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
+ ERTS_MMAP_ASSERT(!mm->no_os_mmap);
- if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) {
- new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
- old_size, sizep);
+ if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mm->supercarrier) {
+ new_ptr = remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
+ ptr, old_size, sizep);
if (new_ptr)
return new_ptr;
}
@@ -1812,7 +1877,7 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
return NULL;
}
-#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP
+#if defined(ERTS_HAVE_OS_MREMAP) || defined(ERTS_HAVE_GENUINE_OS_MMAP)
superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
if (superaligned) {
@@ -1832,7 +1897,7 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
}
}
-#if ERTS_HAVE_GENUINE_OS_MMAP
+#ifdef ERTS_HAVE_GENUINE_OS_MMAP
if (asize < old_size
&& (!superaligned
|| ERTS_IS_SUPERALIGNED(ptr))) {
@@ -1847,9 +1912,9 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
return ptr;
}
#endif
-#if ERTS_HAVE_OS_MREMAP
+#ifdef ERTS_HAVE_OS_MREMAP
if (superaligned)
- return remap_move(flags, new_ptr, old_size, sizep);
+ return remap_move(mm, flags, new_ptr, old_size, sizep);
else {
new_ptr = os_mremap(ptr, old_size, asize, 0);
if (!new_ptr)
@@ -1871,10 +1936,10 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
ErtsFreeSegDesc *prev, *next;
UWord ad_sz = 0;
- ERTS_MMAP_ASSERT(mmap_state.supercarrier);
+ ERTS_MMAP_ASSERT(mm->supercarrier);
if (ERTS_MMAPFLG_OS_ONLY & flags)
- return remap_move(flags, ptr, old_size, sizep);
+ return remap_move(mm, flags, ptr, old_size, sizep);
superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
@@ -1882,19 +1947,19 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
? ERTS_SUPERALIGNED_CEILING(*sizep)
: ERTS_PAGEALIGNED_CEILING(*sizep));
- erts_smp_mtx_lock(&mmap_state.mtx);
+ erts_mtx_lock(&mm->mtx);
if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
- ? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize))
- : (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) {
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ ? (!superaligned && lookup_free_seg(&mm->sua.map, asize))
+ : (superaligned && lookup_free_seg(&mm->sa.map, asize))) {
+ erts_mtx_unlock(&mm->mtx);
/*
* Segment currently in wrong area (due to a previous memory
* shortage), move it to the right area.
* (remap_move() will succeed)
*/
- return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
- old_size, sizep);
+ return remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
+ ptr, old_size, sizep);
}
ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
@@ -1916,18 +1981,18 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
UWord unres_sz;
new_ptr = ptr;
if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
- map = &mmap_state.sua.map;
+ map = &mm->sua.map;
ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
}
else {
- if (end == mmap_state.sa.top) {
- mmap_state.sa.top = new_end;
- mmap_state.unreserve_physical(((char *) ptr) + asize,
+ if (end == mm->sa.top) {
+ mm->sa.top = new_end;
+ mm->unreserve_physical(((char *) ptr) + asize,
old_size - asize);
goto supercarrier_resize_success;
}
ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
- map = &mmap_state.sa.map;
+ map = &mm->sa.map;
}
adjacent_free_seg(map, start, end, &prev, &next);
@@ -1935,11 +2000,11 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
if (next)
resize_free_seg(map, next, new_end, next->end);
else
- ad_sz = alloc_desc_insert_free_seg(map, new_end, end);
+ ad_sz = alloc_desc_insert_free_seg(mm, map, new_end, end);
ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
unres_sz = old_size - asize - ad_sz;
if (unres_sz)
- mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz,
+ mm->unreserve_physical(((char *) ptr) + asize + ad_sz,
unres_sz);
goto supercarrier_resize_success;
}
@@ -1949,17 +2014,18 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
- adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next);
+ adjacent_free_seg(&mm->sua.map, start, end, &prev, &next);
if (next && new_end <= next->end) {
- if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
- asize - old_size))
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
goto supercarrier_reserve_failure;
if (new_end < next->end)
- resize_free_seg(&mmap_state.sua.map, next, new_end, next->end);
+ resize_free_seg(&mm->sua.map, next, new_end, next->end);
else {
- delete_free_seg(&mmap_state.sua.map, next);
- free_desc(next);
+ delete_free_seg(&mm->sua.map, next);
+ free_desc(mm, next);
}
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
@@ -1968,28 +2034,30 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
}
else { /* Superaligned area */
- if (end == mmap_state.sa.top) {
- if (new_end <= mmap_state.sua.bot) {
- if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
- asize - old_size))
+ if (end == mm->sa.top) {
+ if (new_end <= mm->sua.bot) {
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
goto supercarrier_reserve_failure;
- mmap_state.sa.top = new_end;
+ mm->sa.top = new_end;
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
goto supercarrier_resize_success;
}
}
else {
- adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next);
+ adjacent_free_seg(&mm->sa.map, start, end, &prev, &next);
if (next && new_end <= next->end) {
- if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
- asize - old_size))
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
goto supercarrier_reserve_failure;
if (new_end < next->end)
- resize_free_seg(&mmap_state.sa.map, next, new_end, next->end);
+ resize_free_seg(&mm->sa.map, next, new_end, next->end);
else {
- delete_free_seg(&mmap_state.sa.map, next);
- free_desc(next);
+ delete_free_seg(&mm->sa.map, next);
+ free_desc(mm, next);
}
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
@@ -1999,12 +2067,12 @@ erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
}
ERTS_MMAP_OP_ABORT();
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
/* Failed to resize... */
}
- return remap_move(flags, ptr, old_size, sizep);
+ return remap_move(mm, flags, ptr, old_size, sizep);
supercarrier_resize_success:
@@ -2021,26 +2089,26 @@ supercarrier_resize_success:
#endif
ERTS_MREMAP_OP_END(new_ptr, asize);
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
*sizep = asize;
return new_ptr;
supercarrier_reserve_failure:
ERTS_MREMAP_OP_END(NULL, old_size);
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ erts_mtx_unlock(&mm->mtx);
*sizep = old_size;
return NULL;
}
-int erts_mmap_in_supercarrier(void *ptr)
+int erts_mmap_in_supercarrier(ErtsMemMapper* mm, void *ptr)
{
return ERTS_MMAP_IN_SUPERCARRIER(ptr);
}
-
static struct {
+ Eterm options;
Eterm total;
Eterm total_sa;
Eterm total_sua;
@@ -2074,6 +2142,7 @@ static void init_atoms(void)
erts_mtx_lock(&am.init_mutex);
if (!am.is_initialized) {
+ AM_INIT(options);
AM_INIT(total);
AM_INIT(total_sa);
AM_INIT(total_sua);
@@ -2102,8 +2171,9 @@ static void hard_dbg_mseg_init(void);
#endif
void
-erts_mmap_init(ErtsMMapInit *init)
+erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable)
{
+ static int is_first_call = 1;
int virtual_map = 0;
char *start = NULL, *end = NULL;
UWord pagesize;
@@ -2125,25 +2195,28 @@ erts_mmap_init(ErtsMMapInit *init)
#endif
erts_page_inv_mask = pagesize - 1;
if (pagesize & erts_page_inv_mask)
- erl_exit(-1, "erts_mmap: Invalid pagesize: %bpu\n",
+ erts_exit(1, "erts_mmap: Invalid pagesize: %bpu\n",
pagesize);
ERTS_MMAP_OP_RINGBUF_INIT();
- erts_have_erts_mmap = 0;
-
- mmap_state.supercarrier = 0;
- mmap_state.reserve_physical = reserve_noop;
- mmap_state.unreserve_physical = unreserve_noop;
+ mm->supercarrier = 0;
+ mm->reserve_physical = reserve_noop;
+ mm->unreserve_physical = unreserve_noop;
+ mm->executable = executable;
#if HAVE_MMAP && !defined(MAP_ANON)
- mmap_state.mmap_fd = open("/dev/zero", O_RDWR);
- if (mmap_state.mmap_fd < 0)
- erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n");
+ mm->mmap_fd = open("/dev/zero", O_RDWR);
+ if (mm->mmap_fd < 0)
+ erts_exit(1, "erts_mmap: Failed to open /dev/zero\n");
#endif
- erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap");
- erts_mtx_init(&am.init_mutex, "mmap_init_atoms");
+ erts_mtx_init(&mm->mtx, "erts_mmap", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+ if (is_first_call) {
+ erts_mtx_init(&am.init_mutex, "mmap_init_atoms", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
+ }
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (init->virtual_range.start) {
@@ -2152,15 +2225,15 @@ erts_mmap_init(ErtsMMapInit *init)
ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
sz = end - ptr;
- start = os_mmap_virtual(ptr, sz);
+ start = os_mmap_virtual(ptr, sz, executable);
if (!start || start > ptr || start >= end)
- erl_exit(-1,
+ erts_exit(1,
"erts_mmap: Failed to create virtual range for super carrier\n");
sz = start - ptr;
if (sz)
os_munmap(end, sz);
- mmap_state.reserve_physical = os_reserve_physical;
- mmap_state.unreserve_physical = os_unreserve_physical;
+ mm->reserve_physical = os_reserve_physical;
+ mm->unreserve_physical = os_unreserve_physical;
virtual_map = 1;
}
else
@@ -2177,9 +2250,9 @@ erts_mmap_init(ErtsMMapInit *init)
sz = ERTS_PAGEALIGNED_CEILING(init->scs);
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (!init->scrpm) {
- start = os_mmap_virtual(NULL, sz);
- mmap_state.reserve_physical = os_reserve_physical;
- mmap_state.unreserve_physical = os_unreserve_physical;
+ start = os_mmap_virtual(NULL, sz, executable);
+ mm->reserve_physical = os_reserve_physical;
+ mm->unreserve_physical = os_unreserve_physical;
virtual_map = 1;
}
else
@@ -2189,10 +2262,10 @@ erts_mmap_init(ErtsMMapInit *init)
* The whole supercarrier will by physically
* reserved all the time.
*/
- start = os_mmap(NULL, sz, 1);
+ start = os_mmap(NULL, sz, 1, executable);
}
if (!start)
- erl_exit(-1,
+ erts_exit(1,
"erts_mmap: Failed to create super carrier of size %bpu MB\n",
init->scs/1024/1024);
end = start + sz;
@@ -2205,34 +2278,32 @@ erts_mmap_init(ErtsMMapInit *init)
}
#endif
}
- if (!mmap_state.no_os_mmap)
- erts_have_erts_mmap |= ERTS_HAVE_ERTS_OS_MMAP;
#endif
- mmap_state.no.free_seg_descs = 0;
- mmap_state.no.free_segs.curr = 0;
- mmap_state.no.free_segs.max = 0;
+ mm->no.free_seg_descs = 0;
+ mm->no.free_segs.curr = 0;
+ mm->no.free_segs.max = 0;
- mmap_state.size.supercarrier.total = 0;
- mmap_state.size.supercarrier.used.total = 0;
- mmap_state.size.supercarrier.used.sa = 0;
- mmap_state.size.supercarrier.used.sua = 0;
- mmap_state.size.os.used = 0;
+ mm->size.supercarrier.total = 0;
+ mm->size.supercarrier.used.total = 0;
+ mm->size.supercarrier.used.sa = 0;
+ mm->size.supercarrier.used.sua = 0;
+ mm->size.os.used = 0;
- mmap_state.desc.new_area_hint = NULL;
+ mm->desc.new_area_hint = NULL;
if (!start) {
- mmap_state.sa.bot = NULL;
- mmap_state.sua.top = NULL;
- mmap_state.sa.bot = NULL;
- mmap_state.sua.top = NULL;
- mmap_state.no_os_mmap = 0;
- mmap_state.supercarrier = 0;
+ mm->sa.bot = NULL;
+ mm->sua.top = NULL;
+ mm->sa.bot = NULL;
+ mm->sua.top = NULL;
+ mm->no_os_mmap = 0;
+ mm->supercarrier = 0;
}
else {
size_t desc_size;
- mmap_state.no_os_mmap = init->sco;
+ mm->no_os_mmap = init->sco;
desc_size = init->scrfsd;
if (desc_size < 100)
@@ -2241,68 +2312,75 @@ erts_mmap_init(ErtsMMapInit *init)
if ((desc_size
+ ERTS_SUPERALIGNED_SIZE
+ ERTS_PAGEALIGNED_SIZE) > end - start)
- erl_exit(-1, "erts_mmap: No space for segments in super carrier\n");
+ erts_exit(1, "erts_mmap: No space for segments in super carrier\n");
- mmap_state.sa.bot = start;
- mmap_state.sa.bot += desc_size;
- mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot);
- mmap_state.sa.top = mmap_state.sa.bot;
- mmap_state.sua.top = end;
- mmap_state.sua.bot = mmap_state.sua.top;
+ mm->sa.bot = start;
+ mm->sa.bot += desc_size;
+ mm->sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mm->sa.bot);
+ mm->sa.top = mm->sa.bot;
+ mm->sua.top = end;
+ mm->sua.bot = mm->sua.top;
- mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start);
+ mm->size.supercarrier.used.total += (UWord) (mm->sa.bot - start);
- mmap_state.desc.free_list = NULL;
- mmap_state.desc.reserved = 0;
+ mm->desc.free_list = NULL;
+ mm->desc.reserved = 0;
if (end == (void *) 0) {
/*
* Very unlikely, but we need a guarantee
- * that `mmap_state.sua.top` always will
+ * that `mm->sua.top` always will
* compare as larger than all segment pointers
* into the super carrier...
*/
- mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE;
- mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
+ mm->sua.top -= ERTS_PAGEALIGNED_SIZE;
+ mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
- if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE))
+ if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE, 0))
#endif
- add_free_desc_area(mmap_state.sua.top, end);
- mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc);
+ add_free_desc_area(mm, mm->sua.top, end);
+ mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc);
}
- mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start);
+ mm->size.supercarrier.total = (UWord) (mm->sua.top - start);
/*
* Area before (and after) super carrier
* will be used for free segment descritors.
*/
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
- if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start))
- erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
+ if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start, 0))
+ erts_exit(1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
#endif
- mmap_state.desc.unused_start = start;
- mmap_state.desc.unused_end = mmap_state.sa.bot;
- mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start)
+ mm->desc.unused_start = start;
+ mm->desc.unused_end = mm->sa.bot;
+ mm->desc.reserved += ((mm->desc.unused_end - start)
/ sizeof(ErtsFreeSegDesc));
- init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER);
- init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER);
+ init_free_seg_map(&mm->sa.map, SA_SZ_ADDR_ORDER);
+ init_free_seg_map(&mm->sua.map, SZ_REVERSE_ADDR_ORDER);
- mmap_state.supercarrier = 1;
- erts_have_erts_mmap |= ERTS_HAVE_ERTS_SUPERCARRIER_MMAP;
+ mm->supercarrier = 1;
- mmap_state.desc.new_area_hint = end;
+ mm->desc.new_area_hint = end;
}
#if !ERTS_HAVE_OS_MMAP
- mmap_state.no_os_mmap = 1;
+ mm->no_os_mmap = 1;
#endif
#ifdef HARD_DEBUG_MSEG
hard_dbg_mseg_init();
#endif
+
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ if (mm == &erts_literal_mmapper) {
+ erts_literals_start = erts_literal_mmapper.sa.bot;
+ erts_literals_size = erts_literal_mmapper.sua.top - erts_literals_start;
+ }
+#endif
+ is_first_call = 0;
}
@@ -2312,7 +2390,8 @@ add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
*lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
}
-Eterm erts_mmap_info(int *print_to_p,
+Eterm erts_mmap_info(ErtsMemMapper* mm,
+ fmtfn_t *print_to_p,
void *print_to_arg,
Eterm** hpp, Uint* szp,
struct erts_mmap_info_struct* emis)
@@ -2321,35 +2400,41 @@ Eterm erts_mmap_info(int *print_to_p,
Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
Eterm group[2];
Eterm group_tags[] = { am.sizes, am.free_segs };
- Eterm list[2];
- Eterm list_tags[2]; /* { am.supercarrier, am.os } */
- int lix;
+ Eterm list[3];
+ Eterm list_tags[3]; /* { am.options, am.supercarrier, am.os } */
+ int lix = 0;
Eterm res = THE_NON_VALUE;
if (!hpp) {
- erts_smp_mtx_lock(&mmap_state.mtx);
- emis->sizes[0] = mmap_state.size.supercarrier.total;
- emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot;
- emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot;
- emis->sizes[3] = mmap_state.size.supercarrier.used.total;
- emis->sizes[4] = mmap_state.size.supercarrier.used.sa;
- emis->sizes[5] = mmap_state.size.supercarrier.used.sua;
+ erts_mtx_lock(&mm->mtx);
+ emis->sizes[0] = mm->size.supercarrier.total;
+ emis->sizes[1] = mm->sa.top - mm->sa.bot;
+ emis->sizes[2] = mm->sua.top - mm->sua.bot;
+ emis->sizes[3] = mm->size.supercarrier.used.total;
+ emis->sizes[4] = mm->size.supercarrier.used.sa;
+ emis->sizes[5] = mm->size.supercarrier.used.sua;
- emis->segs[0] = mmap_state.no.free_segs.curr;
- emis->segs[1] = mmap_state.no.free_segs.max;
- emis->segs[2] = mmap_state.no.free_seg_descs;
- emis->segs[3] = mmap_state.desc.reserved;
- emis->segs[4] = mmap_state.sa.map.nseg;
- emis->segs[5] = mmap_state.sua.map.nseg;
+ emis->segs[0] = mm->no.free_segs.curr;
+ emis->segs[1] = mm->no.free_segs.max;
+ emis->segs[2] = mm->no.free_seg_descs;
+ emis->segs[3] = mm->desc.reserved;
+ emis->segs[4] = mm->sa.map.nseg;
+ emis->segs[5] = mm->sua.map.nseg;
- emis->os_used = mmap_state.size.os.used;
- erts_smp_mtx_unlock(&mmap_state.mtx);
+ emis->os_used = mm->size.os.used;
+ erts_mtx_unlock(&mm->mtx);
}
+ list[lix] = erts_mmap_info_options(mm, "option ", print_to_p, print_to_arg,
+ hpp, szp);
+ list_tags[lix] = am.options;
+ lix++;
+
+
if (print_to_p) {
- int to = *print_to_p;
+ fmtfn_t to = *print_to_p;
void *arg = print_to_arg;
- if (mmap_state.supercarrier) {
+ if (mm->supercarrier) {
const char* prefix = "supercarrier ";
erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
@@ -2364,7 +2449,7 @@ Eterm erts_mmap_info(int *print_to_p,
erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
}
- if (!mmap_state.no_os_mmap) {
+ if (!mm->no_os_mmap) {
erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
}
}
@@ -2375,8 +2460,7 @@ Eterm erts_mmap_info(int *print_to_p,
init_atoms();
}
- lix = 0;
- if (mmap_state.supercarrier) {
+ if (mm->supercarrier) {
group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
sizeof(size_tags)/sizeof(Eterm),
size_tags, emis->sizes);
@@ -2388,7 +2472,7 @@ Eterm erts_mmap_info(int *print_to_p,
lix++;
}
- if (!mmap_state.no_os_mmap) {
+ if (!mm->no_os_mmap) {
group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
1, &am.used, &emis->os_used);
list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
@@ -2400,25 +2484,26 @@ Eterm erts_mmap_info(int *print_to_p,
return res;
}
-Eterm erts_mmap_info_options(char *prefix,
- int *print_to_p,
+Eterm erts_mmap_info_options(ErtsMemMapper* mm,
+ char *prefix,
+ fmtfn_t *print_to_p,
void *print_to_arg,
Uint **hpp,
Uint *szp)
{
- const UWord scs = mmap_state.sua.top - mmap_state.sa.bot;
- const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false;
- const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false;
+ const UWord scs = mm->sua.top - mm->sa.bot;
+ const Eterm sco = mm->no_os_mmap ? am_true : am_false;
+ const Eterm scrpm = (mm->reserve_physical == reserve_noop) ? am_true : am_false;
Eterm res = THE_NON_VALUE;
if (print_to_p) {
- int to = *print_to_p;
+ fmtfn_t to = *print_to_p;
void *arg = print_to_arg;
erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
- if (mmap_state.supercarrier) {
+ if (mm->supercarrier) {
erts_print(to, arg, "%ssco: %T\n", prefix, sco);
erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
- erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mmap_state.desc.reserved);
+ erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mm->desc.reserved);
}
}
@@ -2428,9 +2513,9 @@ Eterm erts_mmap_info_options(char *prefix,
}
res = NIL;
- if (mmap_state.supercarrier) {
+ if (mm->supercarrier) {
add_2tup(hpp, szp, &res, am.scrfsd,
- erts_bld_uint(hpp,szp, mmap_state.desc.reserved));
+ erts_bld_uint(hpp,szp, mm->desc.reserved));
add_2tup(hpp, szp, &res, am.scrpm, scrpm);
add_2tup(hpp, szp, &res, am.sco, sco);
}
@@ -2439,10 +2524,14 @@ Eterm erts_mmap_info_options(char *prefix,
return res;
}
+#endif /* HAVE_ERTS_MMAP */
Eterm erts_mmap_debug_info(Process* p)
{
- if (mmap_state.supercarrier) {
+#if HAVE_ERTS_MMAP
+ ErtsMemMapper* mm = &erts_dflt_mmapper;
+
+ if (mm->supercarrier) {
ERTS_DECL_AM(sabot);
ERTS_DECL_AM(satop);
ERTS_DECL_AM(suabot);
@@ -2452,18 +2541,17 @@ Eterm erts_mmap_debug_info(Process* p)
UWord values[4];
Eterm *hp, *hp_end;
Uint may_need;
- const Uint PTR_BIG_SZ = HALFWORD_HEAP ? 3 : 2;
-
- erts_smp_mtx_lock(&mmap_state.mtx);
- values[0] = (UWord)mmap_state.sa.bot;
- values[1] = (UWord)mmap_state.sa.top;
- values[2] = (UWord)mmap_state.sua.bot;
- values[3] = (UWord)mmap_state.sua.top;
- sa_list = build_free_seg_list(p, &mmap_state.sa.map);
- sua_list = build_free_seg_list(p, &mmap_state.sua.map);
- erts_smp_mtx_unlock(&mmap_state.mtx);
-
- may_need = 4*(2+3+PTR_BIG_SZ) + 2*(2+3);
+
+ erts_mtx_lock(&mm->mtx);
+ values[0] = (UWord)mm->sa.bot;
+ values[1] = (UWord)mm->sa.top;
+ values[2] = (UWord)mm->sua.bot;
+ values[3] = (UWord)mm->sua.top;
+ sa_list = build_free_seg_list(p, &mm->sa.map);
+ sua_list = build_free_seg_list(p, &mm->sua.map);
+ erts_mtx_unlock(&mm->mtx);
+
+ may_need = 4*(2+3+2) + 2*(2+3);
hp = HAlloc(p, may_need);
hp_end = hp + may_need;
@@ -2480,9 +2568,8 @@ Eterm erts_mmap_debug_info(Process* p)
HRelease(p, hp_end, hp);
return list;
}
- else {
- return am_undefined;
- }
+#endif
+ return am_undefined;
}
@@ -2730,7 +2817,8 @@ static void hard_dbg_mseg_init(void)
{
ErtsFreeSegDesc_fake* p;
- erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg");
+ erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG);
hard_dbg_mseg_tree.root = NULL;
hard_dbg_mseg_tree.order = ADDR_ORDER;
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
index 778a8e0e80..2a07d93c8c 100644
--- a/erts/emulator/sys/common/erl_mmap.h
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2013. All Rights Reserved.
+ * Copyright Ericsson AB 2013-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -21,17 +22,52 @@
#define ERL_MMAP_H__
#include "sys.h"
+#include "erl_printf.h"
#define ERTS_MMAP_SUPERALIGNED_BITS (18)
/* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
-#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0)
-#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1)
-#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2)
+#ifndef HAVE_MMAP
+# define HAVE_MMAP 0
+#endif
+#ifndef HAVE_MREMAP
+# define HAVE_MREMAP 0
+#endif
+#if HAVE_MMAP
+# define ERTS_HAVE_OS_MMAP 1
+# define ERTS_HAVE_GENUINE_OS_MMAP 1
+# if HAVE_MREMAP
+# define ERTS_HAVE_OS_MREMAP 1
+# endif
+/*
+ * MAP_NORESERVE is undefined in FreeBSD 10.x and later.
+ * This is to enable 64bit HiPE experimentally on FreeBSD.
+ * Note that on FreeBSD MAP_NORESERVE was "never implemented"
+ * even before 11.x (and the flag does not exist in /usr/src/sys/vm/mmap.c
+ * of 10.3-STABLE r301478 either), and HiPE was working on OTP 18.3.3,
+ * so mandating MAP_NORESERVE on FreeBSD might not be needed.
+ * See the following message on how MAP_NORESERVE was treated on FreeBSD:
+ * <http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20150202/122958.html>
+ */
+# if defined(MAP_FIXED) && (defined(MAP_NORESERVE) || defined(__FreeBSD__))
+# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1
+# endif
+#endif
+
+#ifndef HAVE_VIRTUALALLOC
+# define HAVE_VIRTUALALLOC 0
+#endif
+#if HAVE_VIRTUALALLOC
+# define ERTS_HAVE_OS_MMAP 1
+#endif
+
+#ifdef ERTS_HAVE_GENUINE_OS_MMAP
+# define HAVE_ERTS_MMAP 1
+#else
+# define HAVE_ERTS_MMAP 0
+#endif
+
-#define ERTS_HAVE_ERTS_OS_MMAP (1 << 0)
-#define ERTS_HAVE_ERTS_SUPERCARRIER_MMAP (1 << 1)
-extern int erts_have_erts_mmap;
extern UWord erts_page_inv_mask;
typedef struct {
@@ -52,23 +88,16 @@ typedef struct {
#define ERTS_MMAP_INIT_DEFAULT_INITER \
{{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1}
-void *erts_mmap(Uint32 flags, UWord *sizep);
-void erts_munmap(Uint32 flags, void *ptr, UWord size);
-void *erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep);
-int erts_mmap_in_supercarrier(void *ptr);
-void erts_mmap_init(ErtsMMapInit*);
-struct erts_mmap_info_struct
-{
- UWord sizes[6];
- UWord segs[6];
- UWord os_used;
-};
-Eterm erts_mmap_info(int *print_to_p, void *print_to_arg,
- Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*);
-Eterm erts_mmap_info_options(char *prefix, int *print_to_p, void *print_to_arg,
- Uint **hpp, Uint *szp);
-struct process;
-Eterm erts_mmap_debug_info(struct process*);
+#define ERTS_LITERAL_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(1)*1024*1024*1024)
+
+#define ERTS_MMAP_INIT_LITERAL_INITER \
+ {{NULL, NULL}, {NULL, NULL}, ERTS_LITERAL_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0}
+
+#define ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(512)*1024*1024)
+
+#define ERTS_MMAP_INIT_HIPE_EXEC_INITER \
+ {{NULL, NULL}, {NULL, NULL}, ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0}
+
#define ERTS_SUPERALIGNED_SIZE \
(1 << ERTS_MMAP_SUPERALIGNED_BITS)
@@ -96,29 +125,57 @@ Eterm erts_mmap_debug_info(struct process*);
#define ERTS_PAGEALIGNED_SIZE \
(ERTS_INV_PAGEALIGNED_MASK + 1)
-#ifndef HAVE_MMAP
-# define HAVE_MMAP 0
-#endif
-#ifndef HAVE_MREMAP
-# define HAVE_MREMAP 0
-#endif
-#if HAVE_MMAP
-# define ERTS_HAVE_OS_MMAP 1
-# define ERTS_HAVE_GENUINE_OS_MMAP 1
-# if HAVE_MREMAP
-# define ERTS_HAVE_OS_MREMAP 1
+struct process;
+Eterm erts_mmap_debug_info(struct process*);
+
+#if HAVE_ERTS_MMAP
+
+typedef struct ErtsMemMapper_ ErtsMemMapper;
+
+#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0)
+#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1)
+#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2)
+
+void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep);
+void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size);
+void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep);
+int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr);
+void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*, int executable);
+struct erts_mmap_info_struct
+{
+ UWord sizes[6];
+ UWord segs[6];
+ UWord os_used;
+};
+Eterm erts_mmap_info(ErtsMemMapper*, fmtfn_t *print_to_p, void *print_to_arg,
+ Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*);
+Eterm erts_mmap_info_options(ErtsMemMapper*,
+ char *prefix, fmtfn_t *print_to_p, void *print_to_arg,
+ Uint **hpp, Uint *szp);
+
+
+#ifdef ERTS_WANT_MEM_MAPPERS
+# include "erl_alloc_types.h"
+
+extern ErtsMemMapper erts_dflt_mmapper;
+
+# if defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+
+# if defined(ARCH_64)
+extern ErtsMemMapper erts_literal_mmapper;
# endif
-# if defined(MAP_FIXED) && defined(MAP_NORESERVE)
-# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1
+
+# if defined(ERTS_ALC_A_EXEC) && defined(__x86_64__)
+ /*
+ * On x86_64, exec_alloc employs its own super carrier 'erts_exec_mmaper'
+ * to ensure low memory for HiPE AMD64 small code model.
+ */
+# define ERTS_HAVE_EXEC_MMAPPER
+extern ErtsMemMapper erts_exec_mmapper;
# endif
-#endif
-#ifndef HAVE_VIRTUALALLOC
-# define HAVE_VIRTUALALLOC 0
-#endif
-#if HAVE_VIRTUALALLOC
-# define ERTS_HAVE_OS_MMAP 1
-#endif
+# endif /* ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION */
+#endif /* ERTS_WANT_MEM_MAPPERS */
/*#define HARD_DEBUG_MSEG*/
#ifdef HARD_DEBUG_MSEG
@@ -131,4 +188,6 @@ void hard_dbg_remove_mseg(void* seg, UWord sz);
# define HARD_DBG_REMOVE_MSEG(SEG,SZ)
#endif
+#endif /* HAVE_ERTS_MMAP */
+
#endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c
index 94a381e168..bf6de9b13a 100644
--- a/erts/emulator/sys/common/erl_mseg.c
+++ b/erts/emulator/sys/common/erl_mseg.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2017. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -30,6 +31,7 @@
# include "config.h"
#endif
+#define ERTS_WANT_MEM_MAPPERS
#include "sys.h"
#include "erl_mseg.h"
#include "global.h"
@@ -81,11 +83,13 @@ static const int debruijn[32] = {
31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
};
-#define LOG2(X) (debruijn[((Uint32)(((X) & -(X)) * 0x077CB531U)) >> 27])
+#define LSB(X) (debruijn[((Uint32)(((X) & -(X)) * 0x077CB531U)) >> 27])
#define CACHE_AREAS (32 - MSEG_ALIGN_BITS)
-#define SIZE_TO_CACHE_AREA_IDX(S) (LOG2((S)) - MSEG_ALIGN_BITS)
+/* FIXME: segment sizes > 2 GB result in bogus negative indices */
+/* NOTE: using LSB instead of proper log2 only works if S is a power of 2 */
+#define SIZE_TO_CACHE_AREA_IDX(S) (LSB((S)) - MSEG_ALIGN_BITS)
#define MAX_CACHE_SIZE (30)
#define MSEG_FLG_IS_2POW(X) ((X) & ERTS_MSEG_FLG_2POW)
@@ -98,17 +102,12 @@ static const int debruijn[32] = {
static int atoms_initialized;
-typedef struct mem_kind_t MemKind;
-
const ErtsMsegOpt_t erts_mseg_default_opt = {
1, /* Use cache */
1, /* Preserv data */
0, /* Absolute shrink threshold */
0, /* Relative shrink threshold */
0 /* Scheduler specific */
-#if HALFWORD_HEAP
- ,0 /* need low memory */
-#endif
};
@@ -141,7 +140,14 @@ struct cache_t_ {
typedef struct ErtsMsegAllctr_t_ ErtsMsegAllctr_t;
-struct mem_kind_t {
+struct ErtsMsegAllctr_t_ {
+ int ix;
+
+ int is_init_done;
+ int is_thread_safe;
+ erts_mtx_t mtx;
+
+ int is_cache_check_scheduled;
cache_t cache[MAX_CACHE_SIZE];
cache_t cache_unpowered_node;
@@ -167,29 +173,6 @@ struct mem_kind_t {
} max_ever;
} segments;
- ErtsMsegAllctr_t *ma;
- const char* name;
- MemKind* next;
-};/*MemKind*/
-
-struct ErtsMsegAllctr_t_ {
- int ix;
-
- int is_init_done;
- int is_thread_safe;
- erts_mtx_t mtx;
-
- int is_cache_check_scheduled;
-
- MemKind* mk_list;
-
-#if HALFWORD_HEAP
- MemKind low_mem;
- MemKind hi_mem;
-#else
- MemKind the_mem;
-#endif
-
Uint max_cache_size;
Uint abs_max_cache_bad_fit;
Uint rel_max_cache_bad_fit;
@@ -205,7 +188,6 @@ typedef union {
static int no_mseg_allocators;
static ErtsAlgndMsegAllctr_t *aligned_mseg_allctr;
-#ifdef ERTS_SMP
#define ERTS_MSEG_ALLCTR_IX(IX) \
(&aligned_mseg_allctr[(IX)].mseg_alloc)
@@ -216,18 +198,6 @@ static ErtsAlgndMsegAllctr_t *aligned_mseg_allctr;
#define ERTS_MSEG_ALLCTR_OPT(OPT) \
((OPT)->sched_spec ? ERTS_MSEG_ALLCTR_SS() : ERTS_MSEG_ALLCTR_IX(0))
-#else
-
-#define ERTS_MSEG_ALLCTR_IX(IX) \
- (&aligned_mseg_allctr[0].mseg_alloc)
-
-#define ERTS_MSEG_ALLCTR_SS() \
- (&aligned_mseg_allctr[0].mseg_alloc)
-
-#define ERTS_MSEG_ALLCTR_OPT(OPT) \
- (&aligned_mseg_allctr[0].mseg_alloc)
-
-#endif
#define ERTS_MSEG_LOCK(MA) \
do { \
@@ -301,22 +271,17 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) {
/* #define ERTS_PRINT_ERTS_MMAP */
static ERTS_INLINE void *
-mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep)
+mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep)
{
#ifdef ERTS_PRINT_ERTS_MMAP
UWord req_size = *sizep;
#endif
void *seg;
Uint32 mmap_flags = 0;
-#if HALFWORD_HEAP
- mmap_flags |= ((mk == &ma->low_mem)
- ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
- : ERTS_MMAPFLG_OS_ONLY);
-#endif
if (MSEG_FLG_IS_2POW(flags))
mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
- seg = erts_mmap(mmap_flags, sizep);
+ seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep);
#ifdef ERTS_PRINT_ERTS_MMAP
erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg,
@@ -330,18 +295,13 @@ mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep)
}
static ERTS_INLINE void
-mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord size) {
+mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) {
Uint32 mmap_flags = 0;
-#if HALFWORD_HEAP
- mmap_flags |= ((mk == &ma->low_mem)
- ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
- : ERTS_MMAPFLG_OS_ONLY);
-#endif
if (MSEG_FLG_IS_2POW(flags))
mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
- erts_munmap(mmap_flags, seg_p, size);
+ erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size);
#ifdef ERTS_PRINT_ERTS_MMAP
erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n",
(mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
@@ -352,22 +312,17 @@ mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord s
}
static ERTS_INLINE void *
-mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWord old_size, UWord *sizep)
+mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, UWord *sizep)
{
#ifdef ERTS_PRINT_ERTS_MMAP
UWord req_size = *sizep;
#endif
void *new_seg;
Uint32 mmap_flags = 0;
-#if HALFWORD_HEAP
- mmap_flags |= ((mk == &ma->low_mem)
- ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
- : ERTS_MMAPFLG_OS_ONLY);
-#endif
if (MSEG_FLG_IS_2POW(flags))
mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
- new_seg = erts_mremap(mmap_flags, old_seg, old_size, sizep);
+ new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep);
#ifdef ERTS_PRINT_ERTS_MMAP
erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n",
@@ -384,18 +339,15 @@ mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWor
do { \
if ((MA)->is_thread_safe) \
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&(MA)->mtx) \
- || erts_smp_thr_progress_is_blocking() \
+ || erts_thr_progress_is_blocking() \
|| ERTS_IS_CRASH_DUMPING); \
else \
ERTS_LC_ASSERT((MA)->ix == (int) erts_get_scheduler_id() \
- || erts_smp_thr_progress_is_blocking() \
+ || erts_thr_progress_is_blocking() \
|| ERTS_IS_CRASH_DUMPING); \
} while (0)
-#define ERTS_DBG_MK_CHK_THR_ACCESS(MK) \
- ERTS_DBG_MA_CHK_THR_ACCESS((MK)->ma)
#else
#define ERTS_DBG_MA_CHK_THR_ACCESS(MA)
-#define ERTS_DBG_MK_CHK_THR_ACCESS(MK)
#endif
/* Cache interface */
@@ -408,23 +360,23 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) {
c->prev = c;
}
-static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, Uint flags) {
+static ERTS_INLINE int cache_bless_segment(ErtsMsegAllctr_t *ma, void *seg, UWord size, Uint flags) {
cache_t *c;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
ASSERT(!MSEG_FLG_IS_2POW(flags) || (MSEG_FLG_IS_2POW(flags) && MAP_IS_ALIGNED(seg) && IS_2POW(size)));
- /* The idea is that sbc caching is prefered over mbc caching.
+ /* The idea is that sbc caching is preferred over mbc caching.
* Blocks are normally allocated in mb carriers and thus cached there.
* Large blocks has no such cache and it is up to mseg to cache them to speed things up.
*/
- if (!erts_circleq_is_empty(&(mk->cache_free))) {
+ if (!erts_circleq_is_empty(&(ma->cache_free))) {
/* We have free slots, use one to cache the segment */
- c = erts_circleq_head(&(mk->cache_free));
+ c = erts_circleq_head(&(ma->cache_free));
erts_circleq_remove(c);
c->seg = seg;
@@ -433,32 +385,34 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U
if (MSEG_FLG_IS_2POW(flags)) {
int ix = SIZE_TO_CACHE_AREA_IDX(size);
+ if (ix < 0)
+ return 0;
+
ASSERT(ix < CACHE_AREAS);
ASSERT((1 << (ix + MSEG_ALIGN_BITS)) == size);
- erts_circleq_push_head(&(mk->cache_powered_node[ix]), c);
+ erts_circleq_push_head(&(ma->cache_powered_node[ix]), c);
} else
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
- mk->cache_size++;
- ASSERT(mk->cache_size <= mk->ma->max_cache_size);
+ ma->cache_size++;
return 1;
- } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) {
+ } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(ma->cache_unpowered_node))) {
/* No free slots.
* Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */
- c = erts_circleq_tail(&(mk->cache_unpowered_node));
+ c = erts_circleq_tail(&(ma->cache_unpowered_node));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, ERTS_MSEG_FLG_NONE, mk, c->seg, c->size);
+ mseg_destroy(ma, ERTS_MSEG_FLG_NONE, c->seg, c->size);
mseg_cache_clear_node(c);
c->seg = seg;
c->size = size;
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
return 1;
} else if (!MSEG_FLG_IS_2POW(flags)) {
@@ -472,20 +426,20 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U
int i;
for( i = 0; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- c = erts_circleq_tail(&(mk->cache_powered_node[i]));
+ c = erts_circleq_tail(&(ma->cache_powered_node[i]));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, c->seg, c->size);
+ mseg_destroy(ma, ERTS_MSEG_FLG_2POW, c->seg, c->size);
mseg_cache_clear_node(c);
c->seg = seg;
c->size = size;
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
return 1;
}
@@ -494,11 +448,11 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, U
return 0;
}
-static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flags) {
+static ERTS_INLINE void *cache_get_segment(ErtsMsegAllctr_t *ma, UWord *size_p, Uint flags) {
UWord size = *size_p;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
if (MSEG_FLG_IS_2POW(flags)) {
@@ -509,12 +463,15 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag
ASSERT(IS_2POW(size));
+ if (ix < 0)
+ return NULL;
+
for( i = ix; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- c = erts_circleq_head(&(mk->cache_powered_node[i]));
+ c = erts_circleq_head(&(ma->cache_powered_node[i]));
erts_circleq_remove(c);
ASSERT(IS_2POW(c->size));
@@ -523,31 +480,31 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag
csize = c->size;
seg = (char*) c->seg;
- mk->cache_size--;
- mk->cache_hits++;
+ ma->cache_size--;
+ ma->cache_hits++;
/* link to free cache list */
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
- ASSERT(!(mk->cache_size < 0));
+ ASSERT(!(ma->cache_size < 0));
if (csize != size)
- mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, seg + size, csize - size);
+ mseg_destroy(ma, ERTS_MSEG_FLG_2POW, seg + size, csize - size);
return seg;
}
}
- else if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) {
+ else if (!erts_circleq_is_empty(&(ma->cache_unpowered_node))) {
void *seg;
cache_t *c;
cache_t *best = NULL;
UWord bdiff = 0;
UWord csize;
- UWord bad_max_abs = mk->ma->abs_max_cache_bad_fit;
- UWord bad_max_rel = mk->ma->rel_max_cache_bad_fit;
+ UWord bad_max_abs = ma->abs_max_cache_bad_fit;
+ UWord bad_max_rel = ma->rel_max_cache_bad_fit;
- erts_circleq_foreach(c, &(mk->cache_unpowered_node)) {
+ erts_circleq_foreach(c, &(ma->cache_unpowered_node)) {
csize = c->size;
if (csize >= size) {
if (((csize - size)*100 < bad_max_rel*size) && (csize - size) < bad_max_abs ) {
@@ -556,11 +513,11 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag
erts_circleq_remove(c);
- mk->cache_size--;
- mk->cache_hits++;
+ ma->cache_size--;
+ ma->cache_hits++;
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
*size_p = csize;
@@ -583,7 +540,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag
ASSERT(best->seg);
ASSERT(best->size > 0);
- mk->cache_hits++;
+ ma->cache_hits++;
/* Use current cache placement for remaining segment space */
@@ -607,7 +564,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flag
* using callbacks from aux-work in the scheduler.
*/
-static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_one_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) {
cache_t *c = NULL;
c = erts_circleq_tail(head);
@@ -616,19 +573,19 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, flags, mk, c->seg, c->size);
+ mseg_destroy(ma, flags, c->seg, c->size);
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
- mk->segments.current.watermark--;
- mk->cache_size--;
+ ma->segments.current.watermark--;
+ ma->cache_size--;
- ASSERT( mk->cache_size >= 0 );
+ ASSERT(ma->cache_size >= 0);
- return mk->cache_size;
+ return ma->cache_size;
}
-static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) {
cache_t *c = NULL;
while (!erts_circleq_is_empty(head)) {
@@ -639,58 +596,52 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, ca
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, flags, mk, c->seg, c->size);
+ mseg_destroy(ma, flags, c->seg, c->size);
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
-
- mk->segments.current.watermark--;
- mk->cache_size--;
+ erts_circleq_push_head(&(ma->cache_free), c);
+ ma->segments.current.watermark--;
+ ma->cache_size--;
}
- ASSERT( mk->cache_size >= 0 );
+ ASSERT(ma->cache_size >= 0);
- return mk->cache_size;
+ return ma->cache_size;
}
-/* mseg_check_memkind_cache
- * - Check if we can empty some cached segments in this
- * MemKind.
+/* mseg_check_cache
+ * - Check if we can empty some cached segments in this allocator
*/
-static Uint mseg_check_memkind_cache(MemKind *mk) {
+static Uint mseg_check_cache(ErtsMsegAllctr_t *ma) {
int i;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
for (i = 0; i < CACHE_AREAS; i++) {
- if (!erts_circleq_is_empty(&(mk->cache_powered_node[i])))
- return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i]));
+ if (!erts_circleq_is_empty(&(ma->cache_powered_node[i])))
+ return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i]));
}
- if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node));
+ if (!erts_circleq_is_empty(&(ma->cache_unpowered_node)))
+ return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node));
return 0;
}
/* mseg_cache_check
* - Check if we have some cache we can purge
- * in any of the memkinds.
*/
static void mseg_cache_check(ErtsMsegAllctr_t *ma) {
- MemKind* mk;
Uint empty_cache = 1;
ERTS_MSEG_LOCK(ma);
- for (mk = ma->mk_list; mk; mk = mk->next) {
- if (mseg_check_memkind_cache(mk))
- empty_cache = 0;
- }
+ if (mseg_check_cache(ma))
+ empty_cache = 0;
/* If all MemKinds caches are empty,
* remove aux-work callback
@@ -708,7 +659,7 @@ static void mseg_cache_check(ErtsMsegAllctr_t *ma) {
/* erts_mseg_cache_check
* - This is a callback that is scheduled as aux-work from
* schedulers and is called at some interval if we have a cache
- * on this mseg-allocator and memkind.
+ * on this mseg-allocator.
* - Purpose: Empty cache slowly so we don't collect mapped areas
* and bloat memory.
*/
@@ -718,42 +669,32 @@ void erts_mseg_cache_check(void) {
}
-/* *_mseg_clear_*_cache
+/* mseg_clear_cache
* Remove cached segments from the allocator completely
*/
-static void mseg_clear_memkind_cache(MemKind *mk) {
+
+static void mseg_clear_cache(ErtsMsegAllctr_t *ma) {
int i;
+ ERTS_MSEG_LOCK(ma);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
/* drop pow2 caches */
for (i = 0; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i]));
- ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i])));
+ mseg_drop_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i]));
+ ASSERT(erts_circleq_is_empty(&(ma->cache_powered_node[i])));
}
/* drop varied caches */
- if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node));
-
- ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node)));
- ASSERT(mk->cache_size == 0);
-}
+ if (!erts_circleq_is_empty(&(ma->cache_unpowered_node)))
+ mseg_drop_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node));
-static void mseg_clear_cache(ErtsMsegAllctr_t *ma) {
- MemKind* mk;
-
- ERTS_MSEG_LOCK(ma);
- ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-
-
- for (mk = ma->mk_list; mk; mk = mk->next) {
- mseg_clear_memkind_cache(mk);
- }
+ ASSERT(erts_circleq_is_empty(&(ma->cache_unpowered_node)));
+ ASSERT(ma->cache_size == 0);
INC_CC(ma, clear_cache);
-
ERTS_MSEG_UNLOCK(ma);
}
@@ -762,25 +703,12 @@ void erts_mseg_clear_cache(void) {
mseg_clear_cache(ERTS_MSEG_ALLCTR_IX(0));
}
-
-
-static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma,
- const ErtsMsegOpt_t *opt)
-{
-#if HALFWORD_HEAP
- return opt->low_mem ? &ma->low_mem : &ma->hi_mem;
-#else
- return &ma->the_mem;
-#endif
-}
-
static void *
mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p,
Uint flags, const ErtsMsegOpt_t *opt)
{
UWord size;
void *seg;
- MemKind* mk = memkind(ma, opt);
INC_CC(ma, alloc);
@@ -794,10 +722,10 @@ mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p,
}
}
- if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL)
+ if (opt->cache && ma->cache_size > 0 && (seg = cache_get_segment(ma, &size, flags)) != NULL)
goto done;
- seg = mseg_create(ma, flags, mk, &size);
+ seg = mseg_create(ma, flags, &size);
if (!seg)
*size_p = 0;
@@ -807,7 +735,7 @@ done:
if (erts_mtrace_enabled)
erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size);
- ERTS_MSEG_ALLOC_STAT(mk,size);
+ ERTS_MSEG_ALLOC_STAT(ma,size);
}
return seg;
@@ -818,11 +746,9 @@ static void
mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size,
Uint flags, const ErtsMsegOpt_t *opt)
{
- MemKind* mk = memkind(ma, opt);
+ ERTS_MSEG_DEALLOC_STAT(ma,size);
- ERTS_MSEG_DEALLOC_STAT(mk,size);
-
- if (opt->cache && cache_bless_segment(mk, seg, size, flags)) {
+ if (opt->cache && cache_bless_segment(ma, seg, size, flags)) {
schedule_cache_check(ma);
goto done;
}
@@ -830,7 +756,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size,
if (erts_mtrace_enabled)
erts_mtrace_crr_free(atype, SEGTYPE, seg);
- mseg_destroy(ma, flags, mk, seg, size);
+ mseg_destroy(ma, flags, seg, size);
done:
@@ -841,7 +767,6 @@ static void *
mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt)
{
- MemKind* mk;
void *new_seg;
UWord new_size;
@@ -860,7 +785,6 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
return NULL;
}
- mk = memkind(ma, opt);
new_seg = seg;
if (!MSEG_FLG_IS_2POW(flags))
@@ -875,7 +799,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
if (new_size > old_size) {
if (opt->preserv) {
- new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size);
+ new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size);
if (!new_seg)
new_size = old_size;
}
@@ -895,7 +819,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
new_size = old_size;
}
else {
- new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size);
+ new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size);
if (!new_seg)
new_size = old_size;
}
@@ -909,7 +833,7 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size));
*new_size_p = new_size;
- ERTS_MSEG_REALLOC_STAT(mk, old_size, new_size);
+ ERTS_MSEG_REALLOC_STAT(ma, old_size, new_size);
return new_seg;
}
@@ -1062,17 +986,15 @@ add_4tup(Uint **hpp, Uint *szp, Eterm *lp,
static Eterm
info_options(ErtsMsegAllctr_t *ma,
char *prefix,
- int *print_to_p,
+ fmtfn_t *print_to_p,
void *print_to_arg,
Uint **hpp,
Uint *szp)
{
- Eterm res;
-
- res = erts_mmap_info_options(prefix, print_to_p, print_to_arg, hpp, szp);
+ Eterm res = NIL;
if (print_to_p) {
- int to = *print_to_p;
+ fmtfn_t to = *print_to_p;
void *arg = print_to_arg;
erts_print(to, arg, "%samcbf: %beu\n", prefix, ma->abs_max_cache_bad_fit);
erts_print(to, arg, "%srmcbf: %beu\n", prefix, ma->rel_max_cache_bad_fit);
@@ -1100,7 +1022,7 @@ info_options(ErtsMsegAllctr_t *ma,
}
static Eterm
-info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+info_calls(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
@@ -1113,7 +1035,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
erts_print(TO, TOA, "mseg_%s calls: %b32u%09b32u\n", #CC, \
ma->calls.CC.giga_no, ma->calls.CC.no)
- int to = *print_to_p;
+ fmtfn_t to = *print_to_p;
void *arg = print_to_arg;
PRINT_CC(to, arg, alloc);
@@ -1179,90 +1101,97 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
}
static Eterm
-info_status(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
- int begin_new_max_period, Uint **hpp, Uint *szp)
+info_status(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg,
+ int begin_new_max_period, int only_sz, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
- if (mk->segments.max_ever.no < mk->segments.max.no)
- mk->segments.max_ever.no = mk->segments.max.no;
- if (mk->segments.max_ever.sz < mk->segments.max.sz)
- mk->segments.max_ever.sz = mk->segments.max.sz;
+ if (ma->segments.max_ever.no < ma->segments.max.no)
+ ma->segments.max_ever.no = ma->segments.max.no;
+ if (ma->segments.max_ever.sz < ma->segments.max.sz)
+ ma->segments.max_ever.sz = ma->segments.max.sz;
if (print_to_p) {
- int to = *print_to_p;
+ fmtfn_t to = *print_to_p;
void *arg = print_to_arg;
- erts_print(to, arg, "cached_segments: %beu\n", mk->cache_size);
- erts_print(to, arg, "cache_hits: %beu\n", mk->cache_hits);
- erts_print(to, arg, "segments: %beu %beu %beu\n",
- mk->segments.current.no, mk->segments.max.no, mk->segments.max_ever.no);
- erts_print(to, arg, "segments_size: %beu %beu %beu\n",
- mk->segments.current.sz, mk->segments.max.sz, mk->segments.max_ever.sz);
- erts_print(to, arg, "segments_watermark: %beu\n",
- mk->segments.current.watermark);
+ if (!only_sz) {
+ erts_print(to, arg, "cached_segments: %beu\n", ma->cache_size);
+ erts_print(to, arg, "cache_hits: %beu\n", ma->cache_hits);
+ erts_print(to, arg, "segments: %beu %beu %beu\n",
+ ma->segments.current.no, ma->segments.max.no, ma->segments.max_ever.no);
+ erts_print(to, arg, "segments_watermark: %beu\n",
+ ma->segments.current.watermark);
+ }
+ erts_print(to, arg, "segments_size: %beu %beu %beu\n",
+ ma->segments.current.sz, ma->segments.max.sz, ma->segments.max_ever.sz);
}
if (hpp || szp) {
res = NIL;
- add_2tup(hpp, szp, &res,
- am.segments_watermark,
- bld_unstable_uint(hpp, szp, mk->segments.current.watermark));
- add_4tup(hpp, szp, &res,
- am.segments_size,
- bld_unstable_uint(hpp, szp, mk->segments.current.sz),
- bld_unstable_uint(hpp, szp, mk->segments.max.sz),
- bld_unstable_uint(hpp, szp, mk->segments.max_ever.sz));
- add_4tup(hpp, szp, &res,
- am.segments,
- bld_unstable_uint(hpp, szp, mk->segments.current.no),
- bld_unstable_uint(hpp, szp, mk->segments.max.no),
- bld_unstable_uint(hpp, szp, mk->segments.max_ever.no));
- add_2tup(hpp, szp, &res,
- am.cache_hits,
- bld_unstable_uint(hpp, szp, mk->cache_hits));
- add_2tup(hpp, szp, &res,
- am.cached_segments,
- bld_unstable_uint(hpp, szp, mk->cache_size));
-
+ add_4tup(hpp, szp, &res,
+ am.segments_size,
+ bld_unstable_uint(hpp, szp, ma->segments.current.sz),
+ bld_unstable_uint(hpp, szp, ma->segments.max.sz),
+ bld_unstable_uint(hpp, szp, ma->segments.max_ever.sz));
+ if (!only_sz) {
+ add_2tup(hpp, szp, &res,
+ am.segments_watermark,
+ bld_unstable_uint(hpp, szp, ma->segments.current.watermark));
+ add_4tup(hpp, szp, &res,
+ am.segments,
+ bld_unstable_uint(hpp, szp, ma->segments.current.no),
+ bld_unstable_uint(hpp, szp, ma->segments.max.no),
+ bld_unstable_uint(hpp, szp, ma->segments.max_ever.no));
+ add_2tup(hpp, szp, &res,
+ am.cache_hits,
+ bld_unstable_uint(hpp, szp, ma->cache_hits));
+ add_2tup(hpp, szp, &res,
+ am.cached_segments,
+ bld_unstable_uint(hpp, szp, ma->cache_size));
+ }
}
if (begin_new_max_period) {
- mk->segments.max.no = mk->segments.current.no;
- mk->segments.max.sz = mk->segments.current.sz;
+ ma->segments.max.no = ma->segments.current.no;
+ ma->segments.max.sz = ma->segments.current.sz;
}
return res;
}
-static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
- int begin_max_per, Uint **hpp, Uint *szp)
+static Eterm info_memkind(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg,
+ int begin_max_per, int only_sz, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
Eterm atoms[3];
Eterm values[3];
- if (print_to_p) {
- erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", mk->name);
+ if (!only_sz) {
+ if (print_to_p) {
+ erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", "all memory");
+ }
+ if (hpp || szp) {
+ atoms[0] = am.name;
+ atoms[1] = am.status;
+ atoms[2] = am.calls;
+ values[0] = erts_bld_string(hpp, szp, "all memory");
+ }
}
- if (hpp || szp) {
- atoms[0] = am.name;
- atoms[1] = am.status;
- atoms[2] = am.calls;
- values[0] = erts_bld_string(hpp, szp, mk->name);
- }
- values[1] = info_status(ma, mk, print_to_p, print_to_arg, begin_max_per, hpp, szp);
- values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp);
+ res = info_status(ma, print_to_p, print_to_arg, begin_max_per, only_sz, hpp, szp);
+ if (!only_sz) {
+ values[1] = res;
+ values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp);
- if (hpp || szp)
- res = bld_2tup_list(hpp, szp, 3, atoms, values);
+ if (hpp || szp)
+ res = bld_2tup_list(hpp, szp, 3, atoms, values);
+ }
return res;
}
-
static Eterm
-info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+info_version(ErtsMsegAllctr_t *ma, fmtfn_t *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
@@ -1284,7 +1213,7 @@ info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **h
Eterm
erts_mseg_info_options(int ix,
- int *print_to_p, void *print_to_arg,
+ fmtfn_t *print_to_p, void *print_to_arg,
Uint **hpp, Uint *szp)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix);
@@ -1297,9 +1226,10 @@ erts_mseg_info_options(int ix,
Eterm
erts_mseg_info(int ix,
- int *print_to_p,
+ fmtfn_t *print_to_p,
void *print_to_arg,
int begin_max_per,
+ int only_sz,
Uint **hpp,
Uint *szp)
{
@@ -1310,29 +1240,29 @@ erts_mseg_info(int ix,
Uint n = 0;
if (hpp || szp) {
-
- if (!atoms_initialized)
- init_atoms(ma);
-
- atoms[0] = am.version;
- atoms[1] = am.options;
- atoms[2] = am.memkind;
- atoms[3] = am.memkind;
+ if (!atoms_initialized)
+ init_atoms(ma);
+ }
+ if (!only_sz) {
+ if (hpp || szp) {
+ atoms[0] = am.version;
+ atoms[1] = am.options;
+ atoms[2] = am.memkind;
+ }
+ values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
+ values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
}
- values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
- values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-#if HALFWORD_HEAP
- values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
- values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-#else
- values[n++] = info_memkind(ma, &ma->the_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-#endif
- if (hpp || szp)
- res = bld_2tup_list(hpp, szp, n, atoms, values);
+ res = info_memkind(ma, print_to_p, print_to_arg, begin_max_per, only_sz, hpp, szp);
+
+ if (!only_sz) {
+ values[n++] = res;
+ if (hpp || szp)
+ res = bld_2tup_list(hpp, szp, n, atoms, values);
+ }
ERTS_MSEG_UNLOCK(ma);
@@ -1407,13 +1337,10 @@ Uint
erts_mseg_no(const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
- MemKind* mk;
- Uint n = 0;
+ Uint n;
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
- for (mk=ma->mk_list; mk; mk=mk->next) {
- n += mk->segments.current.no;
- }
+ n = ma->segments.current.no;
ERTS_MSEG_UNLOCK(ma);
return n;
}
@@ -1425,16 +1352,16 @@ erts_mseg_unit_size(void)
}
-static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name)
+static void mem_cache_init(ErtsMsegAllctr_t *ma)
{
int i;
/* Clear all cache headers */
- mseg_cache_clear_node(&(mk->cache_free));
- mseg_cache_clear_node(&(mk->cache_unpowered_node));
+ mseg_cache_clear_node(&(ma->cache_free));
+ mseg_cache_clear_node(&(ma->cache_unpowered_node));
for (i = 0; i < CACHE_AREAS; i++) {
- mseg_cache_clear_node(&(mk->cache_powered_node[i]));
+ mseg_cache_clear_node(&(ma->cache_powered_node[i]));
}
/* Populate cache free list */
@@ -1442,25 +1369,20 @@ static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name)
ASSERT(ma->max_cache_size <= MAX_CACHE_SIZE);
for (i = 0; i < ma->max_cache_size; i++) {
- mseg_cache_clear_node(&(mk->cache[i]));
- erts_circleq_push_head(&(mk->cache_free), &(mk->cache[i]));
+ mseg_cache_clear_node(&(ma->cache[i]));
+ erts_circleq_push_head(&(ma->cache_free), &(ma->cache[i]));
}
- mk->cache_size = 0;
- mk->cache_hits = 0;
-
- mk->segments.current.watermark = 0;
- mk->segments.current.no = 0;
- mk->segments.current.sz = 0;
- mk->segments.max.no = 0;
- mk->segments.max.sz = 0;
- mk->segments.max_ever.no = 0;
- mk->segments.max_ever.sz = 0;
-
- mk->ma = ma;
- mk->name = name;
- mk->next = ma->mk_list;
- ma->mk_list = mk;
+ ma->cache_size = 0;
+ ma->cache_hits = 0;
+
+ ma->segments.current.watermark = 0;
+ ma->segments.current.no = 0;
+ ma->segments.current.sz = 0;
+ ma->segments.max.no = 0;
+ ma->segments.max.sz = 0;
+ ma->segments.max_ever.no = 0;
+ ma->segments.max_ever.sz = 0;
}
void
@@ -1469,11 +1391,7 @@ erts_mseg_init(ErtsMsegInit_t *init)
int i;
UWord x;
-#ifdef ERTS_SMP
no_mseg_allocators = init->nos + 1;
-#else
- no_mseg_allocators = 1;
-#endif
x = (UWord) malloc(sizeof(ErtsAlgndMsegAllctr_t)
*no_mseg_allocators
@@ -1485,21 +1403,22 @@ erts_mseg_init(ErtsMsegInit_t *init)
atoms_initialized = 0;
- erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
-
-#if HALFWORD_HEAP
- if (sizeof(void *) != 8)
- erl_exit(-1,"Halfword emulator cannot be run in 32bit mode");
+ erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms", NIL,
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
- init->mmap.virtual_range.start = (char *) sbrk(0);
- init->mmap.virtual_range.end = (char *) 0x100000000UL;
- init->mmap.sco = 0;
+#ifdef ERTS_HAVE_EXEC_MMAPPER
+ /* Initialize erts_exec_mapper *FIRST*, to increase probability
+ * of getting low memory for HiPE AMD64's small code model.
+ */
+ erts_mmap_init(&erts_exec_mmapper, &init->exec_mmap, 1);
+#endif
+ erts_mmap_init(&erts_dflt_mmapper, &init->dflt_mmap, 0);
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ erts_mmap_init(&erts_literal_mmapper, &init->literal_mmap, 0);
#endif
-
- erts_mmap_init(&init->mmap);
if (!IS_2POW(GET_PAGE_SIZE))
- erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE);
+ erts_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE);
ASSERT((MSEG_ALIGNED_SIZE % GET_PAGE_SIZE) == 0);
@@ -1514,7 +1433,8 @@ erts_mseg_init(ErtsMsegInit_t *init)
ma->is_thread_safe = 0;
else {
ma->is_thread_safe = 1;
- erts_mtx_init(&ma->mtx, "mseg");
+ erts_mtx_init(&ma->mtx, "mseg", make_small(i),
+ ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_ALLOCATOR);
}
ma->is_cache_check_scheduled = 0;
@@ -1528,14 +1448,7 @@ erts_mseg_init(ErtsMsegInit_t *init)
if (ma->max_cache_size > MAX_CACHE_SIZE)
ma->max_cache_size = MAX_CACHE_SIZE;
- ma->mk_list = NULL;
-
-#if HALFWORD_HEAP
- mem_kind_init(ma, &ma->low_mem, "low memory");
- mem_kind_init(ma, &ma->hi_mem, "high memory");
-#else
- mem_kind_init(ma, &ma->the_mem, "all memory");
-#endif
+ mem_cache_init(ma);
sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls));
}
@@ -1544,13 +1457,8 @@ erts_mseg_init(ErtsMsegInit_t *init)
static ERTS_INLINE Uint tot_cache_size(ErtsMsegAllctr_t *ma)
{
- MemKind* mk;
- Uint sz = 0;
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
- for (mk=ma->mk_list; mk; mk=mk->next) {
- sz += mk->cache_size;
- }
- return sz;
+ return ma->cache_size;
}
/*
diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h
index 2284b3f8f1..bba0dec499 100644
--- a/erts/emulator/sys/common/erl_mseg.h
+++ b/erts/emulator/sys/common/erl_mseg.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -41,16 +42,6 @@
#if ERTS_HAVE_MSEG_SUPER_ALIGNED
# define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS
-#else
-/* If we don't use super aligned multiblock carriers
- * we will mmap with page size alignment (and thus use corresponding
- * align bits).
- *
- * Current implementation needs this to be a constant and
- * only uses this for user dev testing so setting page size
- * to 4096 (12 bits) is fine.
- */
-# define MSEG_ALIGN_BITS (12)
#endif
#if HAVE_ERTS_MSEG
@@ -68,7 +59,9 @@ typedef struct {
Uint rmcbf;
Uint mcs;
Uint nos;
- ErtsMMapInit mmap;
+ ErtsMMapInit dflt_mmap;
+ ErtsMMapInit literal_mmap;
+ ErtsMMapInit exec_mmap;
} ErtsMsegInit_t;
#define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \
@@ -77,7 +70,9 @@ typedef struct {
20, /* rmcbf: Relative max cache bad fit */ \
10, /* mcs: Max cache size */ \
1000, /* cci: Cache check interval */ \
- ERTS_MMAP_INIT_DEFAULT_INITER \
+ ERTS_MMAP_INIT_DEFAULT_INITER, \
+ ERTS_MMAP_INIT_LITERAL_INITER, \
+ ERTS_MMAP_INIT_HIPE_EXEC_INITER \
}
typedef struct {
@@ -86,9 +81,6 @@ typedef struct {
UWord abs_shrink_th;
UWord rel_shrink_th;
int sched_spec;
-#if HALFWORD_HEAP
- int low_mem;
-#endif
} ErtsMsegOpt_t;
extern const ErtsMsegOpt_t erts_mseg_default_opt;
@@ -106,8 +98,8 @@ Uint erts_mseg_unit_size(void);
void erts_mseg_init(ErtsMsegInit_t *init);
void erts_mseg_late_init(void); /* Have to be called after all allocators,
threads and timers have been initialized. */
-Eterm erts_mseg_info_options(int, int *, void*, Uint **, Uint *);
-Eterm erts_mseg_info(int, int *, void*, int, Uint **, Uint *);
+Eterm erts_mseg_info_options(int, fmtfn_t*, void*, Uint **, Uint *);
+Eterm erts_mseg_info(int, fmtfn_t *, void*, int, int, Uint **, Uint *);
#endif /* #if HAVE_ERTS_MSEG */
diff --git a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
index 408aa7e016..fc871f94f1 100644
--- a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
+++ b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2004-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2004-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.c b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c
new file mode 100644
index 0000000000..341845cc2a
--- /dev/null
+++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c
@@ -0,0 +1,79 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2015-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "erl_os_monotonic_time_extender.h"
+
+
+static void *os_monotonic_time_extender(void *vstatep)
+{
+ ErtsOsMonotonicTimeExtendState *state = (ErtsOsMonotonicTimeExtendState *) vstatep;
+ long sleep_time = state->check_interval*1000;
+ Uint32 (*raw_os_mtime)(void) = state->raw_os_monotonic_time;
+ Uint32 last_msb = 0;
+
+ while (1) {
+ Uint32 msb = (*raw_os_mtime)() & (((Uint32) 1) << 31);
+
+ if (msb != last_msb) {
+ int ix = ((int) (last_msb >> 31)) & 1;
+ Uint32 xtnd = (Uint32) erts_atomic32_read_nob(&state->extend[ix]);
+ erts_atomic32_set_nob(&state->extend[ix], (erts_aint32_t) (xtnd + 1));
+ last_msb = msb;
+ }
+ erts_milli_sleep(sleep_time);
+ }
+
+ erts_exit(ERTS_ABORT_EXIT, "os_monotonic_time_extender thread terminating");
+ return NULL;
+}
+
+static erts_tid_t os_monotonic_extender_tid;
+
+void
+erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep,
+ Uint32 (*raw_os_monotonic_time)(void),
+ int check_seconds)
+{
+ statep->raw_os_monotonic_time = raw_os_monotonic_time;
+ erts_atomic32_init_nob(&statep->extend[0], (erts_aint32_t) 0);
+ erts_atomic32_init_nob(&statep->extend[1], (erts_aint32_t) 0);
+ statep->check_interval = check_seconds;
+
+}
+
+void
+erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep)
+{
+ erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER;
+ thr_opts.detached = 1;
+ thr_opts.suggested_stack_size = 4;
+
+#if 0
+ thr_opts.name = "os_monotonic_time_extender";
+#endif
+
+ erts_thr_create(&os_monotonic_extender_tid,
+ os_monotonic_time_extender,
+ (void*) statep,
+ &thr_opts);
+}
diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.h b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h
new file mode 100644
index 0000000000..53c32579d5
--- /dev/null
+++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h
@@ -0,0 +1,46 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2015. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_OS_MONOTONIC_TIME_EXTENDER_H__
+#define ERL_OS_MONOTONIC_TIME_EXTENDER_H__
+
+#include "sys.h"
+#include "erl_threads.h"
+
+typedef struct {
+ Uint32 (*raw_os_monotonic_time)(void);
+ erts_atomic32_t extend[2];
+ int check_interval;
+} ErtsOsMonotonicTimeExtendState;
+
+# define ERTS_EXTEND_OS_MONOTONIC_TIME(S, RT) \
+ ((((ErtsMonotonicTime) \
+ erts_atomic32_read_nob(&((S)->extend[((int) ((RT) >> 31)) & 1]))) \
+ << 32) \
+ + (RT))
+
+void
+erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep,
+ Uint32 (*raw_os_monotonic_time)(void),
+ int check_seconds);
+void
+erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep);
+
+#endif
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 0a58a625b2..7aa53e8f36 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -1,25 +1,25 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
-/*
- * Description: Poll interface suitable for ERTS with or without
- * SMP support.
+/**
+ * @description Poll interface suitable for ERTS
*
* The interface is currently implemented using:
* - select
@@ -28,12 +28,36 @@
* - epoll with poll or select as fallback
* - kqueue with poll or select as fallback
*
- * Some time in the future it will also be
- * implemented using Solaris ports.
*
+ * @author Rickard Green
+ * @author Lukas Larsson
+ *
+ * There are two major different implementations off IO polling in this
+ * file. The concurrent and non-concurrent implementations.
+ * When available epoll/kqueue are used to implement the concurrent
+ * versions. poll, select and dev/poll use non-concurrent updates.
+ *
+ * Concurrent version:
+ * In the concurrent version erts_poll_control directly modifies
+ * the kernel pollset without waking the thread that is waiting
+ * on events. Also the ErtsPollResFd type is directly mapped to
+ * the native event type, so no extra copying is needed. Note that
+ * as no locking at all is done, fds can be triggered that have been
+ * removed from the pollset. The check_io layer has to deal with this.
*
+ * Non-concurrent version:
+ * In the non-concurrent version, the pollset has an internal representation
+ * of the pollset that is updated by erts_poll_control. When an fd is updated,
+ * its number is placed in the update request queue and then the waiting thread
+ * is woken in order to see the change. The internal data in the pollset is
+ * protected by a mutex that has to be taken by both the modifying and waiting
+ * thread at different times.
+ *
+ * The non-concurrent pollset cannot have fd's closed in it while a thread is
+ * waiting on that fd. In order to fix this, when an ERTS_POLL_OP_DEL command
+ * is issued, the fd is marked as closing and the waiting thread is woken. The
+ * fd is then returned in the waiting threads results as ERTS_POLL_EV_NONE.
*
- * Author: Rickard Green
*/
#ifdef HAVE_CONFIG_H
@@ -50,7 +74,6 @@
#ifndef WANT_NONBLOCKING
# define WANT_NONBLOCKING
#endif
-#define ERTS_WANT_GOT_SIGUSR1
#include "erl_poll.h"
#if ERTS_POLL_USE_KQUEUE
@@ -62,6 +85,8 @@
# ifdef SYS_SELECT_H
# include <sys/select.h>
# endif
+#elif defined(_DARWIN_UNLIMITED_SELECT)
+# undef _DARWIN_UNLIMITED_SELECT
#endif
#ifdef NO_SYSCONF
# if ERTS_POLL_USE_SELECT
@@ -73,6 +98,8 @@
#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
+#include "erl_msacc.h"
+#include "erl_misc_utils.h"
#if !defined(ERTS_POLL_USE_EPOLL) \
&& !defined(ERTS_POLL_USE_DEVPOLL) \
@@ -81,23 +108,35 @@
#error "Missing implementation of erts_poll()"
#endif
-#if defined(ERTS_KERNEL_POLL_VERSION) && !ERTS_POLL_USE_KERNEL_POLL
-#error "Missing kernel poll implementation of erts_poll()"
-#endif
+#if 0
+#define ERTS_POLL_DEBUG_PRINT 1
-#if defined(ERTS_NO_KERNEL_POLL_VERSION) && ERTS_POLL_USE_KERNEL_POLL
-#error "Kernel poll used when it shouldn't be used"
-#endif
+#define DEBUG_PRINT(FMT, PS, ...) \
+ do { \
+ int myerrno = errno; \
+ erts_printf("%d: " FMT "\r\n", (PS)->id, ##__VA_ARGS__); \
+ errno = myerrno; \
+ } while(0)
-#if 0
-#define ERTS_POLL_DEBUG_PRINT
+/* Define to print info about modifications done to each fd */
+#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__)
+/* Define to print entry and exit from erts_poll_wait (can be very spammy) */
+//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+
+#else
+#define ERTS_POLL_DEBUG_PRINT 0
+#define DEBUG_PRINT(...)
#endif
-#if defined(DEBUG) && 0
-#define HARD_DEBUG
+#ifndef DEBUG_PRINT_FD
+#define DEBUG_PRINT_FD(...)
+#endif
+#ifndef DEBUG_PRINT_WAIT
+#define DEBUG_PRINT_WAIT(...)
#endif
-#ifdef _DARWIN_UNLIMITED_SELECT
+
+#if defined(_DARWIN_UNLIMITED_SELECT) && ERTS_POLL_USE_SELECT
typedef struct {
size_t sz;
fd_set* ptr;
@@ -143,77 +182,43 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
# define ERTS_SELECT select
#endif
-#define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \
- || ERTS_POLL_USE_KQUEUE)
-#define ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE \
- (defined(ERTS_SMP) || ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_POLL)
-
-#define ERTS_POLL_USE_CONCURRENT_UPDATE \
- (defined(ERTS_SMP) && ERTS_POLL_USE_EPOLL)
-
-#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_IS_FALLBACK (ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT) && ERTS_ENABLE_KERNEL_POLL
-#define ERTS_EV_TABLE_MIN_LENGTH 1024
-#define ERTS_EV_TABLE_EXP_THRESHOLD (2048*1024)
+#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE)
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 1
-#else
-# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 0
-#endif
+#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE)
-#define ERTS_POLL_USE_WAKEUP_PIPE \
- (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(USE_THREADS))
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
-#ifdef ERTS_SMP
+#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \
+ erts_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 1)
+#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \
+ erts_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 0)
+#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \
+ ((int) erts_atomic32_read_nob(&(PS)->have_update_requests))
#define ERTS_POLLSET_LOCK(PS) \
- erts_smp_mtx_lock(&(PS)->mtx)
+ erts_mtx_lock(&(PS)->mtx)
#define ERTS_POLLSET_UNLOCK(PS) \
- erts_smp_mtx_unlock(&(PS)->mtx)
-
-#define ERTS_POLLSET_SET_POLLED_CHK(PS) \
- ((int) erts_atomic32_xchg_nob(&(PS)->polled, (erts_aint32_t) 1))
-#define ERTS_POLLSET_UNSET_POLLED(PS) \
- erts_atomic32_set_nob(&(PS)->polled, (erts_aint32_t) 0)
-#define ERTS_POLLSET_IS_POLLED(PS) \
- ((int) erts_atomic32_read_nob(&(PS)->polled))
+ erts_mtx_unlock(&(PS)->mtx)
#else
-#define ERTS_POLLSET_LOCK(PS)
-#define ERTS_POLLSET_UNLOCK(PS)
-#define ERTS_POLLSET_SET_POLLED_CHK(PS) 0
-#define ERTS_POLLSET_UNSET_POLLED(PS)
-#define ERTS_POLLSET_IS_POLLED(PS) 0
-
-#endif
-
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
-#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS) \
- erts_smp_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 1)
-#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS) \
- erts_smp_atomic32_set_nob(&(PS)->have_update_requests, (erts_aint32_t) 0)
-#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) \
- ((int) erts_smp_atomic32_read_nob(&(PS)->have_update_requests))
-#else
#define ERTS_POLLSET_SET_HAVE_UPDATE_REQUESTS(PS)
#define ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(PS)
#define ERTS_POLLSET_HAVE_UPDATE_REQUESTS(PS) 0
-#endif
-#if ERTS_POLL_USE_FALLBACK
-# if ERTS_POLL_USE_POLL
-# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_poll_fds > 1)
-# elif ERTS_POLL_USE_SELECT
-# define ERTS_POLL_NEED_FALLBACK(PS) ((PS)->no_select_fds > 1)
-# endif
+#define ERTS_POLLSET_LOCK(PS)
+#define ERTS_POLLSET_UNLOCK(PS)
+
#endif
+
/*
* --- Data types ------------------------------------------------------------
*/
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
#define ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE 128
typedef struct ErtsPollSetUpdateRequestsBlock_ ErtsPollSetUpdateRequestsBlock;
@@ -223,183 +228,146 @@ struct ErtsPollSetUpdateRequestsBlock_ {
int fds[ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE];
};
-#endif
-
-
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
# define ERTS_POLL_FD_FLG_INURQ (((unsigned short) 1) << 0)
-#endif
-#if ERTS_POLL_USE_FALLBACK
-# define ERTS_POLL_FD_FLG_INFLBCK (((unsigned short) 1) << 1)
-# define ERTS_POLL_FD_FLG_USEFLBCK (((unsigned short) 1) << 2)
-#endif
-#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
-# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 3)
-#endif
+# define ERTS_POLL_FD_FLG_RST (((unsigned short) 1) << 1)
+
typedef struct {
#if ERTS_POLL_USE_POLL
int pix;
#endif
+
ErtsPollEvents used_events;
ErtsPollEvents events;
-#if ERTS_POLL_COALESCE_KP_RES
- unsigned short res_ev_ix;
-#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK
unsigned short flags;
-#endif
} ErtsFdStatus;
-
-#if ERTS_POLL_COALESCE_KP_RES
-/* res_ev_ix max value */
-#define ERTS_POLL_MAX_RES ((1 << sizeof(unsigned short)*8) - 1)
#endif
-#if ERTS_POLL_USE_KQUEUE
-
-#define ERTS_POLL_KQ_OP_HANDLED 1
-#define ERTS_POLL_KQ_OP_DEL_R 2
-#define ERTS_POLL_KQ_OP_DEL_W 3
-#define ERTS_POLL_KQ_OP_ADD_R 4
-#define ERTS_POLL_KQ_OP_ADD_W 5
-#define ERTS_POLL_KQ_OP_ADD2_R 6
-#define ERTS_POLL_KQ_OP_ADD2_W 7
-
-#endif
-
-struct ErtsPollSet_ {
- ErtsPollSet next;
+/*
+ * This struct is not really exported, but it's nice to
+ * get unique names in debugger for kp/nkp
+ */
+struct ERTS_POLL_EXPORT(erts_pollset) {
+ int id;
int internal_fd_limit;
- ErtsFdStatus *fds_status;
- erts_smp_atomic_t no_of_user_fds;
- int fds_status_len;
+ erts_atomic_t no_of_user_fds;
+
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
- int res_events_len;
-#if ERTS_POLL_USE_EPOLL
- struct epoll_event *res_events;
-#elif ERTS_POLL_USE_KQUEUE
- struct kevent *res_events;
-#elif ERTS_POLL_USE_DEVPOLL
- struct pollfd *res_events;
-#endif
#endif /* ERTS_POLL_USE_KERNEL_POLL */
+
#if ERTS_POLL_USE_POLL
int next_poll_fds_ix;
int no_poll_fds;
int poll_fds_len;
- struct pollfd*poll_fds;
+ struct pollfd *poll_fds;
#elif ERTS_POLL_USE_SELECT
int next_sel_fd;
int max_fd;
-#if ERTS_POLL_USE_FALLBACK
- int no_select_fds;
-#endif
ERTS_fd_set input_fds;
ERTS_fd_set res_input_fds;
ERTS_fd_set output_fds;
ERTS_fd_set res_output_fds;
+#elif ERTS_POLL_USE_DEVPOLL
+ struct pollfd *poll_fds;
+ int poll_fds_ix;
#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ ErtsFdStatus *fds_status;
+ int fds_status_len;
ErtsPollSetUpdateRequestsBlock update_requests;
ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
- erts_smp_atomic32_t have_update_requests;
-#endif
-#ifdef ERTS_SMP
- erts_atomic32_t polled;
- erts_smp_mtx_t mtx;
+ erts_atomic32_t have_update_requests;
+ erts_mtx_t mtx;
+ erts_atomic32_t wakeup_state;
#endif
+
#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fds[2];
#endif
-#if ERTS_POLL_USE_FALLBACK
- int fallback_used;
-#endif
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- erts_atomic32_t wakeup_state;
-#endif
- erts_smp_atomic32_t timeout;
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- erts_smp_atomic_t no_avoided_wakeups;
- erts_smp_atomic_t no_avoided_interrupts;
- erts_smp_atomic_t no_interrupt_timed;
-#endif
};
void erts_silence_warn_unused_result(long unused);
static void fatal_error(char *format, ...);
-static void fatal_error_async_signal_safe(char *error_str);
static int max_fds = -1;
-static ErtsPollSet pollsets;
-static erts_smp_spinlock_t pollsets_lock;
#if ERTS_POLL_USE_POLL
+#if !ERTS_POLL_IS_FALLBACK
+static ERTS_INLINE short ev2pollev(ErtsPollEvents ev)
+{
+ return ERTS_POLL_EV_E2N(ev);
+}
+
+static ERTS_INLINE ErtsPollEvents pollev2ev(short ev)
+{
+ return ERTS_POLL_EV_N2E(ev);
+}
+
+#else /* ERTS_POLL_IS_FALLBACK */
+
static ERTS_INLINE short
ev2pollev(ErtsPollEvents ev)
{
-#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE
- return ERTS_POLL_EV_E2N(ev);
-#else /* Note, we only map events we are interested in */
short res_ev = (short) 0;
if (ev & ERTS_POLL_EV_IN)
- res_ev |= ERTS_POLL_EV_NKP_IN;
+ res_ev |= ERTS_POLL_EV_NKP_IN;
if (ev & ERTS_POLL_EV_OUT)
- res_ev |= ERTS_POLL_EV_NKP_OUT;
+ res_ev |= ERTS_POLL_EV_NKP_OUT;
return res_ev;
-#endif
}
static ERTS_INLINE ErtsPollEvents
pollev2ev(short ev)
{
-#if !ERTS_POLL_USE_FALLBACK || ERTS_POLL_USE_KQUEUE
- return ERTS_POLL_EV_N2E(ev);
-#else /* Note, we only map events we are interested in */
ErtsPollEvents res_ev = (ErtsPollEvents) 0;
if (ev & ERTS_POLL_EV_NKP_IN)
- res_ev |= ERTS_POLL_EV_IN;
+ res_ev |= ERTS_POLL_EV_IN;
if (ev & ERTS_POLL_EV_NKP_OUT)
- res_ev |= ERTS_POLL_EV_OUT;
+ res_ev |= ERTS_POLL_EV_OUT;
if (ev & ERTS_POLL_EV_NKP_ERR)
- res_ev |= ERTS_POLL_EV_ERR;
+ res_ev |= ERTS_POLL_EV_ERR;
if (ev & ERTS_POLL_EV_NKP_NVAL)
- res_ev |= ERTS_POLL_EV_NVAL;
- return res_ev;
-#endif
+ res_ev |= ERTS_POLL_EV_NVAL;
+ return res_ev;
}
-#endif
+#endif /* !ERTS_POLL_IS_FALLBACK */
+
+#endif /* ERTS_POLL_USE_POLL */
+
#ifdef HARD_DEBUG
static void check_poll_result(ErtsPollResFd pr[], int len);
-#if ERTS_POLL_USE_DEVPOLL
-static void check_poll_status(ErtsPollSet ps);
-#endif /* ERTS_POLL_USE_DEVPOLL */
#endif /* HARD_DEBUG */
-#ifdef ERTS_POLL_DEBUG_PRINT
+#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG)
+static void check_poll_status(ErtsPollSet *ps);
+#endif /* ERTS_POLL_USE_DEVPOLL && DEBUG */
static void print_misc_debug_info(void);
+#if ERTS_POLL_USE_EPOLL
+uint32_t epoll_events(int kp_fd, int fd);
#endif
+
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
-reset_wakeup_state(ErtsPollSet ps)
+reset_wakeup_state(ErtsPollSet *ps)
{
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
-#endif
}
+#endif
static ERTS_INLINE int
-is_woken(ErtsPollSet ps)
+is_woken(ErtsPollSet *ps)
{
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
#else
return 0;
@@ -407,10 +375,10 @@ is_woken(ErtsPollSet ps)
}
static ERTS_INLINE int
-is_interrupted_reset(ErtsPollSet ps)
+is_interrupted_reset(ErtsPollSet *ps)
{
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- return (erts_atomic32_xchg_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
#else
return 0;
@@ -418,10 +386,10 @@ is_interrupted_reset(ErtsPollSet ps)
}
static ERTS_INLINE void
-woke_up(ErtsPollSet ps)
+woke_up(ErtsPollSet *ps)
{
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
@@ -437,33 +405,23 @@ woke_up(ErtsPollSet ps)
#if ERTS_POLL_USE_WAKEUP_PIPE
static ERTS_INLINE void
-wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe)
+wake_poller(ErtsPollSet *ps, int interrupted)
{
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int wake;
- if (async_signal_safe)
- wake = 1;
- else {
- erts_aint32_t wakeup_state;
- if (!interrupted)
- wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state,
- ERTS_POLL_WOKEN,
- ERTS_POLL_NOT_WOKEN);
- else {
- /*
- * We might unnecessarily write to the pipe, however,
- * that isn't problematic.
- */
- wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
- erts_atomic32_set_relb(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
- }
- wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
- }
- /*
- * NOTE: This function might be called from signal handlers in the
- * non-smp case; therefore, it has to be async-signal safe in
- * the non-smp case.
- */
- if (wake) {
+ erts_aint32_t wakeup_state;
+ if (!interrupted)
+ wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state,
+ ERTS_POLL_WOKEN,
+ ERTS_POLL_NOT_WOKEN);
+ else
+ wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state,
+ ERTS_POLL_WOKEN_INTR);
+ wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
+
+ if (wake)
+#endif
+ {
ssize_t res;
if (ps->wake_fds[1] < 0)
return; /* Not initialized yet */
@@ -472,36 +430,27 @@ wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe)
res = write(ps->wake_fds[1], "!", 1);
} while (res < 0 && errno == EINTR);
if (res <= 0 && errno != ERRNO_BLOCK) {
- if (async_signal_safe)
- fatal_error_async_signal_safe(__FILE__
- ":XXX:wake_poller(): "
- "Failed to write on wakeup pipe\n");
- else
- fatal_error("%s:%d:wake_poller(): "
- "Failed to write to wakeup pipe fd=%d: "
- "%s (%d)\n",
- __FILE__, __LINE__,
- ps->wake_fds[1],
- erl_errno_id(errno), errno);
+ fatal_error("%s:%d:wake_poller(): "
+ "Failed to write to wakeup pipe fd=%d: "
+ "%s (%d)\n",
+ __FILE__, __LINE__,
+ ps->wake_fds[1],
+ erl_errno_id(errno), errno);
}
}
}
static ERTS_INLINE void
-cleanup_wakeup_pipe(ErtsPollSet ps)
+cleanup_wakeup_pipe(ErtsPollSet *ps)
{
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
int intr = 0;
-#endif
int fd = ps->wake_fds[0];
int res;
do {
char buf[32];
res = read(fd, buf, sizeof(buf));
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
if (res > 0)
intr = 1;
-#endif
} while (res > 0 || (res < 0 && errno == EINTR));
if (res < 0 && errno != ERRNO_BLOCK) {
fatal_error("%s:%d:cleanup_wakeup_pipe(): "
@@ -511,14 +460,14 @@ cleanup_wakeup_pipe(ErtsPollSet ps)
fd,
erl_errno_id(errno), errno);
}
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (intr)
erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
#endif
}
static void
-create_wakeup_pipe(ErtsPollSet ps)
+create_wakeup_pipe(ErtsPollSet *ps)
{
int do_wake = 0;
int wake_fds[2];
@@ -535,20 +484,13 @@ create_wakeup_pipe(ErtsPollSet ps)
SET_NONBLOCKING(wake_fds[0]);
SET_NONBLOCKING(wake_fds[1]);
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("wakeup fds = {%d, %d}\n", wake_fds[0], wake_fds[1]);
-#endif
+ DEBUG_PRINT("wakeup fds = {%d, %d}", ps, wake_fds[0], wake_fds[1]);
ERTS_POLL_EXPORT(erts_poll_control)(ps,
wake_fds[0],
+ ERTS_POLL_OP_ADD,
ERTS_POLL_EV_IN,
- 1, &do_wake);
-#if ERTS_POLL_USE_FALLBACK
- /* We depend on the wakeup pipe being handled by kernel poll */
- if (ps->fds_status[wake_fds[0]].flags & ERTS_POLL_FD_FLG_INFLBCK)
- fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n",
- __FILE__, __LINE__);
-#endif
+ &do_wake);
if (ps->internal_fd_limit <= wake_fds[1])
ps->internal_fd_limit = wake_fds[1] + 1;
if (ps->internal_fd_limit <= wake_fds[0])
@@ -557,15 +499,16 @@ create_wakeup_pipe(ErtsPollSet ps)
ps->wake_fds[1] = wake_fds[1];
}
-#endif /* ERTS_POLL_USE_WAKEUP_PIPE */
+#endif
/*
* --- Poll set update requests ----------------------------------------------
*/
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
-enqueue_update_request(ErtsPollSet ps, int fd)
+enqueue_update_request(ErtsPollSet *ps, int fd)
{
ErtsPollSetUpdateRequestsBlock *urqbp;
@@ -580,13 +523,11 @@ enqueue_update_request(ErtsPollSet ps, int fd)
urqbp = ps->curr_upd_req_block;
if (urqbp->len == ERTS_POLLSET_UPDATE_REQ_BLOCK_SIZE) {
- ASSERT(!urqbp->next);
urqbp = erts_alloc(ERTS_ALC_T_POLLSET_UPDREQ,
sizeof(ErtsPollSetUpdateRequestsBlock));
- ps->curr_upd_req_block->next = urqbp;
- ps->curr_upd_req_block = urqbp;
- urqbp->next = NULL;
+ urqbp->next = ps->curr_upd_req_block;
urqbp->len = 0;
+ ps->curr_upd_req_block = urqbp;
}
ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INURQ;
@@ -594,72 +535,57 @@ enqueue_update_request(ErtsPollSet ps, int fd)
}
static ERTS_INLINE void
-free_update_requests_block(ErtsPollSet ps,
+free_update_requests_block(ErtsPollSet *ps,
ErtsPollSetUpdateRequestsBlock *urqbp)
{
if (urqbp != &ps->update_requests)
erts_free(ERTS_ALC_T_POLLSET_UPDREQ, (void *) urqbp);
else {
- urqbp->next = NULL;
urqbp->len = 0;
}
}
-#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */
/*
* --- Growing poll set structures -------------------------------------------
*/
-int
-ERTS_POLL_EXPORT(erts_poll_get_table_len) (int new_len)
-{
- if (new_len < ERTS_EV_TABLE_MIN_LENGTH) {
- new_len = ERTS_EV_TABLE_MIN_LENGTH;
- } else if (new_len < ERTS_EV_TABLE_EXP_THRESHOLD) {
- /* find next power of 2 */
- --new_len;
- new_len |= new_len >> 1;
- new_len |= new_len >> 2;
- new_len |= new_len >> 4;
- new_len |= new_len >> 8;
- new_len |= new_len >> 16;
- ++new_len;
- } else {
- /* grow incrementally */
- new_len += ERTS_EV_TABLE_EXP_THRESHOLD;
- }
- return new_len;
-}
+#if !ERTS_NO_KERNEL_POLL_VERSION || !ERTS_ENABLE_KERNEL_POLL
+/* only one shared implementation */
+#define ERTS_FD_TABLE_MIN_LENGTH 1024
+#define ERTS_FD_TABLE_EXP_THRESHOLD (2048*1024)
-#if ERTS_POLL_USE_KERNEL_POLL
-static void
-grow_res_events(ErtsPollSet ps, int new_len)
+int erts_poll_new_table_len(int old_len, int need_len)
{
- size_t new_size = sizeof(
-#if ERTS_POLL_USE_EPOLL
- struct epoll_event
-#elif ERTS_POLL_USE_DEVPOLL
- struct pollfd
-#elif ERTS_POLL_USE_KQUEUE
- struct kevent
-#endif
- ) * ERTS_POLL_EXPORT(erts_poll_get_table_len)(new_len);
- /* We do not need to save previously stored data */
- if (ps->res_events)
- erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events);
- ps->res_events = erts_alloc(ERTS_ALC_T_POLL_RES_EVS, new_size);
- ps->res_events_len = new_len;
+ int new_len;
+
+ ASSERT(need_len > old_len);
+ if (need_len < ERTS_FD_TABLE_MIN_LENGTH) {
+ new_len = ERTS_FD_TABLE_MIN_LENGTH;
+ }
+ else {
+ new_len = old_len;
+ do {
+ if (new_len < ERTS_FD_TABLE_EXP_THRESHOLD)
+ new_len *= 2;
+ else
+ new_len += ERTS_FD_TABLE_EXP_THRESHOLD;
+
+ } while (new_len < need_len);
+ }
+ ASSERT(new_len >= need_len);
+ return new_len;
}
-#endif /* ERTS_POLL_USE_KERNEL_POLL */
+#endif
#if ERTS_POLL_USE_POLL
static void
-grow_poll_fds(ErtsPollSet ps, int min_ix)
+grow_poll_fds(ErtsPollSet *ps, int min_ix)
{
int i;
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_ix + 1);
+ int new_len = erts_poll_new_table_len(ps->poll_fds_len, min_ix + 1);
if (new_len > max_fds)
new_len = max_fds;
ps->poll_fds = (ps->poll_fds_len
@@ -681,7 +607,7 @@ grow_poll_fds(ErtsPollSet ps, int min_ix)
static void
grow_select_fds(int fd, ERTS_fd_set* fds)
{
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(fd + 1);
+ int new_len = erts_poll_new_table_len(fds->sz, fd + 1);
if (new_len > max_fds)
new_len = max_fds;
new_len = ERTS_FD_SIZE(new_len);
@@ -704,11 +630,12 @@ ensure_select_fds(int fd, ERTS_fd_set* in, ERTS_fd_set* out)
# define ensure_select_fds(fd, in, out) do {} while(0)
#endif /* _DARWIN_UNLIMITED_SELECT */
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static void
-grow_fds_status(ErtsPollSet ps, int min_fd)
+grow_fds_status(ErtsPollSet *ps, int min_fd)
{
int i;
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_fd + 1);
+ int new_len = erts_poll_new_table_len(ps->fds_status_len, min_fd + 1);
ASSERT(min_fd < max_fds);
if (new_len > max_fds)
new_len = max_fds;
@@ -724,430 +651,26 @@ grow_fds_status(ErtsPollSet ps, int min_fd)
#endif
ps->fds_status[i].used_events = (ErtsPollEvents) 0;
ps->fds_status[i].events = (ErtsPollEvents) 0;
-#if ERTS_POLL_COALESCE_KP_RES
- ps->fds_status[i].res_ev_ix = (unsigned short) ERTS_POLL_MAX_RES;
-#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE || ERTS_POLL_USE_FALLBACK
ps->fds_status[i].flags = (unsigned short) 0;
-#endif
}
ps->fds_status_len = new_len;
}
+#endif
/*
* --- Selecting fd to poll on -----------------------------------------------
*/
-#if ERTS_POLL_USE_FALLBACK
-static int update_fallback_pollset(ErtsPollSet ps, int fd);
-#endif
-
-static ERTS_INLINE int
-need_update(ErtsPollSet ps, int fd)
-{
-#if ERTS_POLL_USE_KERNEL_POLL
- int reset;
-#endif
-
- ASSERT(fd < ps->fds_status_len);
-
-#if ERTS_POLL_USE_KERNEL_POLL
- reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST);
- if (reset && !ps->fds_status[fd].used_events) {
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
- reset = 0;
- }
-#elif defined(ERTS_SMP)
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
-#endif
-
- if (ps->fds_status[fd].used_events != ps->fds_status[fd].events)
- return 1;
-
-#if ERTS_POLL_USE_KERNEL_POLL
- return reset;
-#else
- return 0;
-#endif
-}
-
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
-
-#if ERTS_POLL_USE_KQUEUE
-#define ERTS_POLL_MIN_BATCH_BUF_SIZE 128
-#else
-#define ERTS_POLL_MIN_BATCH_BUF_SIZE 64
-#endif
-
-typedef struct {
- int len;
- int size;
-#if ERTS_POLL_USE_DEVPOLL
- struct pollfd *buf;
-#elif ERTS_POLL_USE_KQUEUE
- struct kevent *buf;
- struct kevent *ebuf;
-#endif
-} ErtsPollBatchBuf;
-
-
-static ERTS_INLINE void
-setup_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
-{
- bbp->len = 0;
-#if ERTS_POLL_USE_DEVPOLL
- bbp->size = ps->res_events_len;
- bbp->buf = ps->res_events;
-#elif ERTS_POLL_USE_KQUEUE
- bbp->size = ps->res_events_len/2;
- bbp->buf = ps->res_events;
- bbp->ebuf = bbp->buf + bbp->size;
-#endif
-}
-
-
-#if ERTS_POLL_USE_DEVPOLL
-
-static void
-write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
-{
- ssize_t wres;
- char *buf = (char *) bbp->buf;
- size_t buf_size = sizeof(struct pollfd)*bbp->len;
-
- while (1) {
- wres = write(ps->kp_fd, (void *) buf, buf_size);
- if (wres < 0) {
- if (errno == EINTR)
- continue;
- fatal_error("%s:%d:write_batch_buf(): "
- "Failed to write to /dev/poll: "
- "%s (%d)\n",
- __FILE__, __LINE__,
- erl_errno_id(errno), errno);
- }
- buf_size -= wres;
- if (buf_size <= 0)
- break;
- buf += wres;
- }
-
- if (buf_size < 0) {
- fatal_error("%s:%d:write_devpoll_buf(): Internal error\n",
- __FILE__, __LINE__);
- }
- bbp->len = 0;
-}
-
-#elif ERTS_POLL_USE_KQUEUE
-
-static void
-write_batch_buf(ErtsPollSet ps, ErtsPollBatchBuf *bbp)
-{
- int res;
- int len = bbp->len;
- struct kevent *buf = bbp->buf;
- struct timespec ts = {0, 0};
-
- do {
- res = kevent(ps->kp_fd, buf, len, NULL, 0, &ts);
- } while (res < 0 && errno == EINTR);
- if (res < 0) {
- int i;
- struct kevent *ebuf = bbp->ebuf;
- do {
- res = kevent(ps->kp_fd, buf, len, ebuf, len, &ts);
- } while (res < 0 && errno == EINTR);
- if (res < 0) {
- fatal_error("%s:%d: kevent() failed: %s (%d)\n",
- __FILE__, __LINE__, erl_errno_id(errno), errno);
- }
- for (i = 0; i < res; i++) {
- if (ebuf[i].flags & EV_ERROR) {
- short filter;
- int fd = (int) ebuf[i].ident;
-
- switch ((int) (long) ebuf[i].udata) {
-
- /*
- * Since we use a lazy update approach EV_DELETE will
- * frequently fail. This since kqueue automatically
- * removes a file descriptor that is closed from the
- * poll set.
- */
- case ERTS_POLL_KQ_OP_DEL_R:
- case ERTS_POLL_KQ_OP_DEL_W:
- case ERTS_POLL_KQ_OP_HANDLED:
- break;
-
- /*
- * According to the kqueue man page EVFILT_READ support
- * does not imply EVFILT_WRITE support; therefore,
- * if an EV_ADD fail, we may have to remove other
- * events on this fd in the kqueue pollset before
- * adding fd to the fallback pollset.
- */
- case ERTS_POLL_KQ_OP_ADD_W:
- if (ps->fds_status[fd].used_events & ERTS_POLL_EV_IN) {
- filter = EVFILT_READ;
- goto rm_add_fb;
- }
- goto add_fb;
- case ERTS_POLL_KQ_OP_ADD_R:
- if (ps->fds_status[fd].used_events & ERTS_POLL_EV_OUT) {
- filter = EVFILT_WRITE;
- goto rm_add_fb;
- }
- goto add_fb;
- case ERTS_POLL_KQ_OP_ADD2_W:
- case ERTS_POLL_KQ_OP_ADD2_R: {
- int j;
- for (j = i+1; j < res; j++) {
- if (fd == (int) ebuf[j].ident) {
- ebuf[j].udata = (void *) ERTS_POLL_KQ_OP_HANDLED;
- if (!(ebuf[j].flags & EV_ERROR)) {
- switch ((int) (long) ebuf[j].udata) {
- case ERTS_POLL_KQ_OP_ADD2_W:
- filter = EVFILT_WRITE;
- goto rm_add_fb;
- case ERTS_POLL_KQ_OP_ADD2_R:
- filter = EVFILT_READ;
- goto rm_add_fb;
- default:
- fatal_error("%s:%d:write_batch_buf(): "
- "Internal error",
- __FILE__, __LINE__);
- break;
- }
- }
- goto add_fb;
- }
- }
- /* The other add succeded... */
- filter = ((((int) (long) ebuf[i].udata)
- == ERTS_POLL_KQ_OP_ADD2_W)
- ? EVFILT_READ
- : EVFILT_WRITE);
- rm_add_fb:
- {
- struct kevent kev;
- struct timespec ts = {0, 0};
- EV_SET(&kev, fd, filter, EV_DELETE, 0, 0, 0);
- (void) kevent(ps->kp_fd, &kev, 1, NULL, 0, &ts);
- }
-
- add_fb:
- ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK;
- ASSERT(ps->fds_status[fd].used_events);
- ps->fds_status[fd].used_events = 0;
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- update_fallback_pollset(ps, fd);
- ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
- break;
- }
- default:
- fatal_error("%s:%d:write_batch_buf(): Internal error",
- __FILE__, __LINE__);
- break;
- }
- }
- }
- }
- bbp->len = 0;
-}
-
-#endif /* ERTS_POLL_USE_KQUEUE */
-
-static ERTS_INLINE void
-batch_update_pollset(ErtsPollSet ps, int fd, ErtsPollBatchBuf *bbp)
-{
- int buf_len;
-#if ERTS_POLL_USE_DEVPOLL
- short events;
- struct pollfd *buf;
-#elif ERTS_POLL_USE_KQUEUE
- struct kevent *buf;
-#endif
-
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("Doing lazy update on fd=%d\n", fd);
-#endif
-
- if (!need_update(ps, fd))
- return;
-
- /* Make sure we have room for at least maximum no of entries
- per fd */
- if (bbp->size - bbp->len < 2)
- write_batch_buf(ps, bbp);
-
- buf_len = bbp->len;
- buf = bbp->buf;
-
- ASSERT(fd < ps->fds_status_len);
-
-#if ERTS_POLL_USE_DEVPOLL
- events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
- if (!events) {
- buf[buf_len].events = POLLREMOVE;
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- }
- else if (!ps->fds_status[fd].used_events) {
- buf[buf_len].events = events;
- erts_smp_atomic_inc_nob(&ps->no_of_user_fds);
- }
- else {
- if ((ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)
- || (ps->fds_status[fd].used_events & ~events)) {
- /* Reset or removed events... */
- buf[buf_len].fd = fd;
- buf[buf_len].events = POLLREMOVE;
- buf[buf_len++].revents = 0;
- }
- buf[buf_len].events = events;
- }
- buf[buf_len].fd = fd;
- buf[buf_len++].revents = 0;
-
-#elif ERTS_POLL_USE_KQUEUE
-
- if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) {
- if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK)
- update_fallback_pollset(ps, fd);
- else { /* Remove from fallback and try kqueue */
- ErtsPollEvents events = ps->fds_status[fd].events;
- ps->fds_status[fd].events = (ErtsPollEvents) 0;
- update_fallback_pollset(ps, fd);
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
- if (events) {
- ps->fds_status[fd].events = events;
- goto try_kqueue;
- }
- }
- }
- else {
- ErtsPollEvents events, used_events;
- int mod_w, mod_r;
- try_kqueue:
- events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
- used_events = ERTS_POLL_EV_E2N(ps->fds_status[fd].used_events);
- if (!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)) {
- if (!used_events &&
- (events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT))
- goto do_add_rw;
- mod_r = ((events & ERTS_POLL_EV_IN)
- != (used_events & ERTS_POLL_EV_IN));
- mod_w = ((events & ERTS_POLL_EV_OUT)
- != (used_events & ERTS_POLL_EV_OUT));
- goto do_mod;
- }
- else { /* Reset */
- if ((events & ERTS_POLL_EV_IN) && (events & ERTS_POLL_EV_OUT)) {
- do_add_rw:
- EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD,
- 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_R);
- buf_len++;
- EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD,
- 0, 0, (void *) ERTS_POLL_KQ_OP_ADD2_W);
- buf_len++;
-
- }
- else {
- mod_r = 1;
- mod_w = 1;
- do_mod:
- if (mod_r) {
- if (events & ERTS_POLL_EV_IN) {
- EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_ADD,
- 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_R);
- buf_len++;
- }
- else if (used_events & ERTS_POLL_EV_IN) {
- EV_SET(&buf[buf_len], fd, EVFILT_READ, EV_DELETE,
- 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_R);
- buf_len++;
- }
- }
- if (mod_w) {
- if (events & ERTS_POLL_EV_OUT) {
- EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_ADD,
- 0, 0, (void *) ERTS_POLL_KQ_OP_ADD_W);
- buf_len++;
- }
- else if (used_events & ERTS_POLL_EV_OUT) {
- EV_SET(&buf[buf_len], fd, EVFILT_WRITE, EV_DELETE,
- 0, 0, (void *) ERTS_POLL_KQ_OP_DEL_W);
- buf_len++;
- }
- }
- }
- }
- if (used_events) {
- if (!events) {
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- }
- }
- else {
- if (events)
- erts_smp_atomic_inc_nob(&ps->no_of_user_fds);
- }
- ASSERT((events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0);
- ASSERT((used_events & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) == 0);
- }
-
-#endif
-
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
- ps->fds_status[fd].used_events = ps->fds_status[fd].events;
-
- bbp->len = buf_len;
-}
-
-#else /* !ERTS_POLL_USE_BATCH_UPDATE_POLLSET */
-
#if ERTS_POLL_USE_EPOLL
static int
-#if ERTS_POLL_USE_CONCURRENT_UPDATE
-conc_update_pollset(ErtsPollSet ps, int fd, int *update_fallback)
-#else
-update_pollset(ErtsPollSet ps, int fd)
-#endif
+update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
{
int res;
- int op;
+ int epoll_op = EPOLL_CTL_MOD;
struct epoll_event epe_templ;
struct epoll_event epe;
- ASSERT(fd < ps->fds_status_len);
-
- if (!need_update(ps, fd))
- return 0;
-
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("Doing update on fd=%d\n", fd);
-#endif
- if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK) {
-#if ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!*update_fallback) {
- *update_fallback = 1;
- return 0;
- }
-#endif
- if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK) {
- return update_fallback_pollset(ps, fd);
- }
- else { /* Remove from fallback and try epoll */
- ErtsPollEvents events = ps->fds_status[fd].events;
- ps->fds_status[fd].events = (ErtsPollEvents) 0;
- res = update_fallback_pollset(ps, fd);
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
- if (!events)
- return res;
- ps->fds_status[fd].events = events;
- }
- }
-
- epe_templ.events = ERTS_POLL_EV_E2N(ps->fds_status[fd].events);
+ epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT;
epe_templ.data.fd = fd;
#ifdef VALGRIND
@@ -1155,30 +678,24 @@ update_pollset(ErtsPollSet ps, int fd)
memset((void *) &epe.data, 0, sizeof(epoll_data_t));
#endif
- if (epe_templ.events && ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST) {
- do {
- /* We init 'epe' every time since epoll_ctl() may modify it
- (not declared const and not documented as const). */
- epe.events = epe_templ.events;
- epe.data.fd = epe_templ.data.fd;
- res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe);
- } while (res != 0 && errno == EINTR);
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- ps->fds_status[fd].used_events = 0;
- }
-
- if (!epe_templ.events) {
+ switch (op) {
+ case ERTS_POLL_OP_DEL:
/* A note on EPOLL_CTL_DEL: linux kernel versions before 2.6.9
need a non-NULL event pointer even though it is ignored... */
- op = EPOLL_CTL_DEL;
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- }
- else if (!ps->fds_status[fd].used_events) {
- op = EPOLL_CTL_ADD;
- erts_smp_atomic_inc_nob(&ps->no_of_user_fds);
- }
- else {
- op = EPOLL_CTL_MOD;
+ epoll_op = EPOLL_CTL_DEL;
+ epe_templ.events = 0;
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ break;
+ case ERTS_POLL_OP_ADD:
+ epoll_op = EPOLL_CTL_ADD;
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
+ break;
+ case ERTS_POLL_OP_MOD:
+ epoll_op = EPOLL_CTL_MOD;
+ break;
+ default:
+ ASSERT(0);
+ break;
}
do {
@@ -1186,33 +703,32 @@ update_pollset(ErtsPollSet ps, int fd)
(not declared const and not documented as const). */
epe.events = epe_templ.events;
epe.data.fd = epe_templ.data.fd;
- res = epoll_ctl(ps->kp_fd, op, fd, &epe);
+ res = epoll_ctl(ps->kp_fd, epoll_op, fd, &epe);
} while (res != 0 && errno == EINTR);
-#if defined(ERTS_POLL_DEBUG_PRINT) && 1
+#if ERTS_POLL_DEBUG_PRINT
{
int saved_errno = errno;
- erts_printf("%s = epoll_ctl(%d, %s, %d, {Ox%x, %d})\n",
- res == 0 ? "0" : erl_errno_id(errno),
- ps->kp_fd,
- (op == EPOLL_CTL_ADD
- ? "EPOLL_CTL_ADD"
- : (op == EPOLL_CTL_MOD
- ? "EPOLL_CTL_MOD"
- : (op == EPOLL_CTL_DEL
- ? "EPOLL_CTL_DEL"
- : "UNKNOWN"))),
- fd,
- epe_templ.events,
- fd);
+ DEBUG_PRINT_FD("%s = epoll_ctl(%d, %s, %d, {0x%x, %d})",
+ ps, fd,
+ res == 0 ? "0" : erl_errno_id(errno),
+ ps->kp_fd,
+ (epoll_op == EPOLL_CTL_ADD
+ ? "EPOLL_CTL_ADD"
+ : (epoll_op == EPOLL_CTL_MOD
+ ? "EPOLL_CTL_MOD"
+ : (epoll_op == EPOLL_CTL_DEL
+ ? "EPOLL_CTL_DEL"
+ : "UNKNOWN"))),
+ fd,
+ epe_templ.events,
+ fd);
errno = saved_errno;
}
#endif
- if (res == 0)
- ps->fds_status[fd].used_events = ps->fds_status[fd].events;
- else {
+ if (res != 0) {
switch (op) {
- case EPOLL_CTL_MOD:
+ case ERTS_POLL_OP_MOD:
epe.events = 0;
do {
/* We init 'epe' every time since epoll_ctl() may modify it
@@ -1221,29 +737,18 @@ update_pollset(ErtsPollSet ps, int fd)
epe.data.fd = fd;
res = epoll_ctl(ps->kp_fd, EPOLL_CTL_DEL, fd, &epe);
} while (res != 0 && errno == EINTR);
- ps->fds_status[fd].used_events = 0;
/* Fall through ... */
- case EPOLL_CTL_ADD: {
- ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_USEFLBCK;
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
-#if ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!*update_fallback) {
- *update_fallback = 1;
- return 0;
- }
-#endif
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
- res = update_fallback_pollset(ps, fd);
- ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
+ case ERTS_POLL_OP_ADD: {
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ res = ERTS_POLL_EV_NVAL;
break;
}
- case EPOLL_CTL_DEL: {
+ case ERTS_POLL_OP_DEL: {
/*
* Since we use a lazy update approach EPOLL_CTL_DEL will
* frequently fail. This since epoll automatically removes
* a filedescriptor that is closed from the poll set.
*/
- ps->fds_status[fd].used_events = 0;
res = 0;
break;
}
@@ -1252,68 +757,278 @@ update_pollset(ErtsPollSet ps, int fd)
__FILE__, __LINE__);
break;
}
+ } else {
+ res = events;
}
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
return res;
}
-#if ERTS_POLL_USE_CONCURRENT_UPDATE
+#endif /* ERTS_POLL_USE_EPOLL */
+
+#if ERTS_POLL_USE_KQUEUE
+
+/* Some versions of the EV_SET macro used kevp multiple times,
+ so we define out own version that make sure that it is safe
+ to do kevp++ in the argument list. */
+#define ERTS_EV_SET(kevp, a, b, c, f) do { \
+ struct kevent *kevp_ = kevp; \
+ EV_SET(kevp_, a, b, c, 0, 0, f); \
+ } while(0)
+
static int
-update_pollset(ErtsPollSet ps, int fd)
+update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
{
- int update_fallback = 1;
- return conc_update_pollset(ps, fd, &update_fallback);
-}
-#endif
-
-#endif /* ERTS_POLL_USE_EPOLL */
+ int res = 0, len = 0;
+ struct kevent evts[2];
+ struct timespec ts = {0, 0};
-#endif /* ERTS_POLL_USE_BATCH_UPDATE_POLLSET */
+#ifdef EV_DISPATCH
+ /* If we have EV_DISPATCH we use it. The kevent descriptions for both
+ read and write are added on OP_ADD and removed on OP_DEL. And then
+ after than only EV_ENABLE|EV_DISPATCH are used.
+
+ It could be possible to not modify the pollset when disabling and/or
+ deleting events, but that may cause the poll threads to be awoken
+ a lot more than they should so we take the cost here instead of
+ in the poll thread.
+
+ Note: We need to have EV_DISPATCH both when the event is enabled and
+ disabled, as otherwise the event may be triggered twice on each re-arm.
+ Not sure if this is intended or not (can't find anything about it in the
+ man page), but it seems to be the way it works...
+ */
+
+ if (op == ERTS_POLL_OP_DEL) {
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ /* We could probably skip this delete, do we want to? */
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE, (void *) 0);
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE, (void *) 0);
+ } else if (op == ERTS_POLL_OP_ADD) {
+ uint32_t flags;
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
+
+ flags = EV_ADD|EV_DISPATCH;
+ flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE);
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
+
+ flags = EV_ADD|EV_DISPATCH;
+ flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE);
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
+ } else {
+ uint32_t flags;
+ ASSERT(op == ERTS_POLL_OP_MOD);
-#if ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK
+ flags = EV_DISPATCH;
+ flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE;
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
-#if ERTS_POLL_USE_FALLBACK
-static int update_fallback_pollset(ErtsPollSet ps, int fd)
+ flags = EV_DISPATCH;
+ flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE;
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
+ }
#else
-static int update_pollset(ErtsPollSet ps, int fd)
+ uint32_t flags = EV_ADD|EV_ONESHOT;
+
+ if (op == ERTS_POLL_OP_DEL) {
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ /* We don't do anything when a delete is issued. The fds will be removed
+ when they are triggered, or when they are closed. */
+ events = 0;
+ } else if (op == ERTS_POLL_OP_ADD) {
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
+ }
+
+ if (events & ERTS_POLL_EV_IN) {
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
+ }
+ if (events & ERTS_POLL_EV_OUT) {
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
+ }
+
#endif
-{
-#ifdef ERTS_POLL_DEBUG_PRINT
-#if ERTS_POLL_USE_FALLBACK
- erts_printf("Doing fallback update on fd=%d\n", fd);
+ if (len)
+ do {
+ res = kevent(ps->kp_fd, evts, len, NULL, 0, &ts);
+ } while (res < 0 && errno == EINTR);
+#if ERTS_POLL_DEBUG_PRINT
+ {
+ int saved_errno = errno, i;
+ char keventb[255], *keventbp = keventb;
+ if (res < 0)
+ keventbp += sprintf(keventbp,"%s = ",erl_errno_id(saved_errno));
+ else
+ keventbp += sprintf(keventbp,"%d = ",res);
+ keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd);
+ for (i = 0; i < len; i++) {
+ const char *flags = "UNKNOWN";
+ if (evts[i].flags == EV_DELETE) flags = "EV_DELETE";
+ if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT";
+#ifdef EV_DISPATCH
+ if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH";
+ if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE";
+ if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH";
+ if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE";
+ if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE";
+#endif
+
+ keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "",
+ evts[i].ident,
+ (evts[i].filter == EVFILT_READ
+ ? "EVFILT_READ"
+ : (evts[i].filter == EVFILT_WRITE
+ ? "EVFILT_WRITE"
+ : "UNKNOWN")), flags);
+ }
+ keventbp += sprintf(keventbp, "}, %d)", len);
+ DEBUG_PRINT_FD("%s", ps, fd, keventb);
+ errno = saved_errno;
+ }
+#endif
+ if (res < 0) {
+ if (op != ERTS_POLL_OP_DEL) {
+#ifdef EV_RECEIPT
+ struct kevent receipt_evts[2];
+ len = 0;
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, EV_DELETE|EV_RECEIPT, (void *) 0);
+ ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, EV_DELETE|EV_RECEIPT, (void *) 0);
+ do {
+ res = kevent(ps->kp_fd, evts, len, receipt_evts, 2, &ts);
+ } while (res < 0 && errno == EINTR);
#else
- erts_printf("Doing update on fd=%d\n", fd);
+ ERTS_EV_SET(&evts[0], fd, EVFILT_WRITE, EV_DELETE, (void *) 0);
+ do {
+ res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts);
+ } while (res < 0 && errno == EINTR);
+ ERTS_EV_SET(&evts[0], fd, EVFILT_READ, EV_DELETE, (void *) 0);
+ do {
+ res = kevent(ps->kp_fd, evts, 1, NULL, 0, &ts);
+ } while (res < 0 && errno == EINTR);
+#endif
+ if (op == ERTS_POLL_OP_ADD)
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ events = ERTS_POLL_EV_NVAL;
+ } else
+ events = 0;
+ }
+ return events;
+}
+
+#endif /* ERTS_POLL_USE_KQUEUE */
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
+static ERTS_INLINE void
+init_batch_update(ErtsPollSet *ps, int len)
+{
+#if ERTS_POLL_USE_DEVPOLL
+ ASSERT(ps->poll_fds == NULL);
+ ps->poll_fds = erts_alloc(ERTS_ALC_T_TMP, sizeof(struct pollfd) * len);
+ ps->poll_fds_ix = 0;
#endif
+}
+
+static ERTS_INLINE void
+write_batch_update(ErtsPollSet *ps)
+{
+#if ERTS_POLL_USE_DEVPOLL
+ ssize_t wres;
+ char *buf = (char *) ps->poll_fds;
+ size_t buf_size = sizeof(struct pollfd)*ps->poll_fds_ix;
+
+ while (1) {
+ wres = write(ps->kp_fd, (void *) buf, buf_size);
+ if (wres < 0) {
+ if (errno == EINTR)
+ continue;
+ fatal_error("%s:%d:write_batch_buf(): "
+ "Failed to write to /dev/poll: "
+ "%s (%d)\n",
+ __FILE__, __LINE__,
+ erl_errno_id(errno), errno);
+ }
+#if ERTS_POLL_DEBUG_PRINT
+ {
+ int saved_errno = errno, i;
+ char devpollb[2048], *devpollbp = devpollb;
+ devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd);
+ for (i = 0; i < wres / sizeof(struct pollfd); i++) {
+ if (devpollbp == devpollb)
+ devpollbp += sprintf(devpollbp, "%d = devpoll(%d, {", wres, ps->kp_fd);
+ devpollbp += sprintf(devpollbp, "%s{fd = %d, events = %s}",
+ i > 0 ? ", " : "",
+ ps->poll_fds[i].fd,
+ ev2str(ps->poll_fds[i].events));
+ if (devpollbp - devpollb > 512) {
+ devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix);
+ DEBUG_PRINT("%s", ps, devpollb);
+ devpollbp = devpollb;
+ }
+ }
+ devpollbp += sprintf(devpollbp, "}, %d)", ps->poll_fds_ix);
+ DEBUG_PRINT("%s", ps, devpollb);
+ errno = saved_errno;
+ }
#endif
- ASSERT(fd < ps->fds_status_len);
-#if ERTS_POLL_USE_FALLBACK
- ASSERT(ps->fds_status[fd].used_events
- ? (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)
- : (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_USEFLBCK));
+ buf_size -= wres;
+ if (buf_size <= 0)
+ break;
+ buf += wres;
+ }
+
+ if (buf_size < 0) {
+ fatal_error("%s:%d:write_devpoll_buf(): Internal error\n",
+ __FILE__, __LINE__);
+ }
+ erts_free(ERTS_ALC_T_TMP, ps->poll_fds);
+ ps->poll_fds = NULL;
#endif
+}
- if (!need_update(ps, fd))
- return 0;
+static ERTS_INLINE int
+need_update(ErtsPollSet *ps, int fd, int *resetp)
+{
+ int reset;
+ ASSERT(fd < ps->fds_status_len);
-#if ERTS_POLL_USE_FALLBACK
+ reset = (int) (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST);
ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_RST;
-#endif
+
+ *resetp = reset;
+
+ if (reset || ps->fds_status[fd].used_events != ps->fds_status[fd].events)
+ return 1;
+
+ return 0;
+}
+
+static int update_pollset(ErtsPollSet *ps, ErtsPollResFd pr[], int fd)
+{
+ int res = 0, reset = 0;
+ ErtsPollEvents events = ps->fds_status[fd].events;
+ ASSERT(fd < ps->fds_status_len);
+
+ if (!need_update(ps, fd, &reset))
+ return res;
#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
- if (!ps->fds_status[fd].events) {
+ if (!events) {
int pix = ps->fds_status[fd].pix;
int last_pix;
+
+ if (reset) {
+ /* When a fd has been reset, we tell the caller of erts_poll_wait
+ this by setting the fd as ERTS_POLL_EV_NONE */
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE);
+ DEBUG_PRINT_FD("trig %s (poll)", ps, fd, ev2str(ERTS_POLL_EV_NONE));
+ res++;
+ }
+
if (pix < 0) {
-#if ERTS_POLL_USE_FALLBACK
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
-#endif
- return -1;
+ return res;
}
-#if ERTS_POLL_USE_FALLBACK
- ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
-#endif
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
last_pix = --ps->no_poll_fds;
if (pix != last_pix) {
/* Move last pix to this pix */
@@ -1329,127 +1044,151 @@ static int update_pollset(ErtsPollSet ps, int fd)
/* Clear this fd status */
ps->fds_status[fd].pix = -1;
ps->fds_status[fd].used_events = (ErtsPollEvents) 0;
-#if ERTS_POLL_USE_FALLBACK
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK;
-#endif
+
}
else {
int pix = ps->fds_status[fd].pix;
if (pix < 0) {
-#if ERTS_POLL_USE_FALLBACK
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK)
- || fd == ps->kp_fd);
-#endif
- erts_smp_atomic_inc_nob(&ps->no_of_user_fds);
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
ps->fds_status[fd].pix = pix = ps->no_poll_fds++;
if (pix >= ps->poll_fds_len)
grow_poll_fds(ps, pix);
ps->poll_fds[pix].fd = fd;
ps->fds_status[fd].pix = pix;
-#if ERTS_POLL_USE_FALLBACK
- ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK;
-#endif
}
-#if ERTS_POLL_USE_FALLBACK
- ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK);
-#endif
-
/* Events to be used in next poll */
- ps->poll_fds[pix].events = ev2pollev(ps->fds_status[fd].events);
+ ps->poll_fds[pix].events = ev2pollev(events);
if (ps->poll_fds[pix].revents) {
/* Remove result events that we should not poll for anymore */
ps->poll_fds[pix].revents
&= ev2pollev(~(~ps->fds_status[fd].used_events
- & ps->fds_status[fd].events));
+ & events));
}
/* Save events to be used in next poll */
- ps->fds_status[fd].used_events = ps->fds_status[fd].events;
+ ps->fds_status[fd].used_events = events;
}
- return 0;
+ return res;
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- {
- ErtsPollEvents events = ps->fds_status[fd].events;
+ if (!events) {
+
+ if (reset) {
+ /* When a fd has been reset, we tell the caller of erts_poll_wait
+ this by setting the fd as ERTS_POLL_EV_NONE */
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE);
+ DEBUG_PRINT_FD("trig %s (select)", ps, fd, ev2str(ERTS_POLL_EV_NONE));
+ res++;
+ }
+
+ ERTS_FD_CLR(fd, &ps->input_fds);
+ ERTS_FD_CLR(fd, &ps->output_fds);
+
+ if (ps->fds_status[fd].used_events) {
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ ps->fds_status[fd].used_events = (ErtsPollEvents) 0;
+ }
+
+ if (fd == ps->max_fd) {
+ int max = ps->max_fd;
+ for (max = ps->max_fd; max >= 0; max--)
+ if (ps->fds_status[max].used_events)
+ break;
+ ps->max_fd = max;
+ }
+
+ } else {
+
ensure_select_fds(fd, &ps->input_fds, &ps->output_fds);
- if ((ERTS_POLL_EV_IN & events)
- != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) {
- if (ERTS_POLL_EV_IN & events) {
- ERTS_FD_SET(fd, &ps->input_fds);
- }
- else {
- ERTS_FD_CLR(fd, &ps->input_fds);
- }
- }
- if ((ERTS_POLL_EV_OUT & events)
- != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) {
- if (ERTS_POLL_EV_OUT & events) {
- ERTS_FD_SET(fd, &ps->output_fds);
- }
- else {
- ERTS_FD_CLR(fd, &ps->output_fds);
- }
- }
- if (!ps->fds_status[fd].used_events) {
- ASSERT(events);
- erts_smp_atomic_inc_nob(&ps->no_of_user_fds);
-#if ERTS_POLL_USE_FALLBACK
- ps->no_select_fds++;
- ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_INFLBCK;
-#endif
- }
- else if (!events) {
- ASSERT(ps->fds_status[fd].used_events);
- erts_smp_atomic_dec_nob(&ps->no_of_user_fds);
- ps->fds_status[fd].events = events;
-#if ERTS_POLL_USE_FALLBACK
- ps->no_select_fds--;
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INFLBCK;
-#endif
- }
+ if (!ps->fds_status[fd].used_events)
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
+
+ if (events & ERTS_POLL_EV_IN)
+ ERTS_FD_SET(fd, &ps->input_fds);
+ else
+ ERTS_FD_CLR(fd, &ps->input_fds);
+
+ if (events & ERTS_POLL_EV_OUT)
+ ERTS_FD_SET(fd, &ps->output_fds);
+ else
+ ERTS_FD_CLR(fd, &ps->output_fds);
ps->fds_status[fd].used_events = events;
- if (events && fd > ps->max_fd)
- ps->max_fd = fd;
- else if (!events && fd == ps->max_fd) {
- int max = ps->max_fd;
- for (max = ps->max_fd; max >= 0; max--)
- if (ps->fds_status[max].used_events)
- break;
- ps->max_fd = max;
- }
+ if (fd > ps->max_fd)
+ ps->max_fd = fd;
}
- return 0;
-#endif
-}
-#endif /* ERTS_POLL_USE_POLL || ERTS_POLL_USE_SELECT || ERTS_POLL_USE_FALLBACK */
+ return res;
+#elif ERTS_POLL_USE_DEVPOLL
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+ if (!events) {
-static void
-handle_update_requests(ErtsPollSet ps)
-{
- ErtsPollSetUpdateRequestsBlock *urqbp = &ps->update_requests;
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
- ErtsPollBatchBuf bb;
- setup_batch_buf(ps, &bb);
+ if (reset) {
+ /* When a fd has been reset, we tell the caller of erts_poll_wait
+ this by setting the fd as ERTS_POLL_EV_NONE */
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NONE);
+ DEBUG_PRINT_FD("trig %s (devpoll)", ps, fd, ev2str(ERTS_POLL_EV_NONE));
+ res++;
+ }
+
+ ps->poll_fds[ps->poll_fds_ix].fd = fd;
+ ps->poll_fds[ps->poll_fds_ix].revents = 0;
+ ps->poll_fds[ps->poll_fds_ix++].events = POLLREMOVE;
+
+ if (ps->fds_status[fd].used_events) {
+ erts_atomic_dec_nob(&ps->no_of_user_fds);
+ ps->fds_status[fd].used_events = 0;
+ }
+
+ } else {
+ if (!ps->fds_status[fd].used_events) {
+ erts_atomic_inc_nob(&ps->no_of_user_fds);
+ }
+ ps->poll_fds[ps->poll_fds_ix].fd = fd;
+ ps->poll_fds[ps->poll_fds_ix].revents = 0;
+ ps->poll_fds[ps->poll_fds_ix++].events = ERTS_POLL_EV_E2N(events);
+ ps->fds_status[fd].used_events = ps->fds_status[fd].events;
+ }
+
+ return res;
#endif
+}
+
+static int
+handle_update_requests(ErtsPollSet *ps, ErtsPollResFd pr[], int no_fds)
+{
+ int res = 0;
+ ErtsPollSetUpdateRequestsBlock *urqbp = ps->curr_upd_req_block;
while (urqbp) {
ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp;
int i;
int len = urqbp->len;
+
+ init_batch_update(ps, len);
+
for (i = 0; i < len; i++) {
int fd = urqbp->fds[i];
ASSERT(fd < ps->fds_status_len);
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ;
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
- batch_update_pollset(ps, fd, &bb);
-#else
- update_pollset(ps, fd);
-#endif
+ ASSERT(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ);
+
+ /* We have run out of PollResFd slots to put results in,
+ so we yield here and return later for more. */
+ if (res == no_fds && pr != NULL) {
+ memmove(urqbp->fds, urqbp->fds+i, sizeof(int) * (len - i));
+ urqbp->len -= i;
+ ps->curr_upd_req_block = urqbp;
+ write_batch_update(ps);
+ return res;
+ }
+
+ if (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INURQ) {
+ ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_INURQ;
+ res += update_pollset(ps, pr + res, fd);
+ }
}
free_urqbp = urqbp;
@@ -1457,12 +1196,9 @@ handle_update_requests(ErtsPollSet ps)
free_update_requests_block(ps, free_urqbp);
- }
+ write_batch_update(ps);
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
- if (bb.len)
- write_batch_buf(ps, &bb);
-#endif
+ }
ps->curr_upd_req_block = &ps->update_requests;
@@ -1471,17 +1207,19 @@ handle_update_requests(ErtsPollSet ps)
#endif
ERTS_POLLSET_UNSET_HAVE_UPDATE_REQUESTS(ps);
+ return res;
}
-#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */
static ERTS_INLINE ErtsPollEvents
-poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake)
+poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op,
+ ErtsPollEvents events, int *do_wake)
{
ErtsPollEvents new_events;
if (fd < ps->internal_fd_limit || fd >= max_fds) {
- if (fd < 0) {
+ if (fd < 0 || fd >= max_fds) {
new_events = ERTS_POLL_EV_ERR;
goto done;
}
@@ -1499,116 +1237,47 @@ poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake
#endif
}
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+
+ new_events = update_pollset(ps, fd, op, events);
+
+#else /* !ERTS_POLL_USE_CONCURRENT_UPDATE */
if (fd >= ps->fds_status_len)
grow_fds_status(ps, fd);
ASSERT(fd < ps->fds_status_len);
- new_events = ps->fds_status[fd].events;
-
- if (events == 0) {
- *do_wake = 0;
- goto done;
- }
-
- if (on)
- new_events |= events;
- else
- new_events &= ~events;
-
- if (new_events == (ErtsPollEvents) 0) {
-#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
- ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST;
-#endif
-#if ERTS_POLL_USE_FALLBACK
- ps->fds_status[fd].flags &= ~ERTS_POLL_FD_FLG_USEFLBCK;
-#endif
- }
-
- ps->fds_status[fd].events = new_events;
-
- if (new_events == ps->fds_status[fd].used_events
-#if ERTS_POLL_USE_KERNEL_POLL || defined(ERTS_SMP)
- && !(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST)
-#endif
- ) {
- *do_wake = 0;
- goto done;
- }
-
-#if !ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
- if (update_pollset(ps, fd) != 0)
- new_events = ERTS_POLL_EV_ERR;
-#else /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
-
-#if ERTS_POLL_USE_CONCURRENT_UPDATE
- if (ERTS_POLLSET_IS_POLLED(ps)) {
- int update_fallback = 0;
- conc_update_pollset(ps, fd, &update_fallback);
- if (!update_fallback) {
- *do_wake = 0; /* no need to wake kernel poller */
- goto done;
- }
+ if (op == ERTS_POLL_OP_DEL) {
+ ps->fds_status[fd].flags |= ERTS_POLL_FD_FLG_RST;
+ ps->fds_status[fd].events = 0;
+ *do_wake = 1;
+ } else if (op == ERTS_POLL_OP_ADD) {
+ ASSERT(ps->fds_status[fd].events == 0);
+ ps->fds_status[fd].events = events;
+ *do_wake = 1;
+ } else {
+ ASSERT(op == ERTS_POLL_OP_MOD);
+ ps->fds_status[fd].events = events;
+ *do_wake = 1;
}
-#endif
+ new_events = ps->fds_status[fd].events;
enqueue_update_request(ps, fd);
-
-#ifdef ERTS_SMP
- /*
- * If new events have been added, we need to wake up the
- * polling thread, but if events have been removed we don't.
- */
- if ((new_events && (ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_RST))
- || (~ps->fds_status[fd].used_events & new_events))
- *do_wake = 1;
-#endif /* ERTS_SMP */
-
-#endif /* ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE */
+
+#endif /* !ERTS_POLL_USE_CONCURRENT_UPDATE */
done:
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("0x%x = poll_control(ps, %d, 0x%x, %s) do_wake=%d\n",
- (int) new_events, fd, (int) events, (on ? "on" : "off"), *do_wake);
-#endif
+ DEBUG_PRINT_FD("%s = %s(%p, %d, %s, %s) do_wake=%d",
+ ps, fd, ev2str(new_events), __FUNCTION__, ps,
+ fd, op2str(op), ev2str(events), *do_wake);
return new_events;
}
-void
-ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps,
- ErtsPollControlEntry pcev[],
- int len)
-{
- int i;
- int do_wake;
- int final_do_wake = 0;
-
- ERTS_POLLSET_LOCK(ps);
-
- for (i = 0; i < len; i++) {
- do_wake = 0;
- pcev[i].events = poll_control(ps,
- pcev[i].fd,
- pcev[i].events,
- pcev[i].on,
- &do_wake);
- final_do_wake |= do_wake;
- }
-
- ERTS_POLLSET_UNLOCK(ps);
-
-#ifdef ERTS_SMP
- if (final_do_wake)
- wake_poller(ps, 0, 0);
-#endif /* ERTS_SMP */
-
-}
-
ErtsPollEvents
-ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps,
+ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
ErtsSysFdType fd,
+ ErtsPollOp op,
ErtsPollEvents events,
- int on,
int* do_wake) /* In: Wake up polling thread */
/* Out: Poller is woken */
{
@@ -1616,15 +1285,15 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps,
ERTS_POLLSET_LOCK(ps);
- res = poll_control(ps, fd, events, on, do_wake);
+ res = poll_control(ps, fd, op, events, do_wake);
ERTS_POLLSET_UNLOCK(ps);
-#ifdef ERTS_SMP
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (*do_wake) {
- wake_poller(ps, 0, 0);
+ wake_poller(ps, 0);
}
-#endif /* ERTS_SMP */
+#endif
return res;
}
@@ -1636,175 +1305,64 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps,
#if ERTS_POLL_USE_KERNEL_POLL
static ERTS_INLINE int
-save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res)
+ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
- int res = 0;
- int i;
- int n = chk_fds_res < max_res ? chk_fds_res : max_res;
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE
+ int n = chk_fds_res < max_res ? chk_fds_res : max_res, i;
+ int res = n;
#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
#endif
for (i = 0; i < n; i++) {
-
-#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
-
- if (ps->res_events[i].events) {
- int fd = ps->res_events[i].data.fd;
- int ix;
- ErtsPollEvents revents;
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- continue;
- }
+ int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
+#ifdef DEBUG_PRINT_MODE
+ ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
#endif
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
- /* epoll_wait() can repeat the same fd in result array... */
- ix = (int) ps->fds_status[fd].res_ev_ix;
- ASSERT(ix >= 0);
- if (ix >= res || pr[ix].fd != fd) {
- ix = res;
- pr[ix].fd = fd;
- pr[ix].events = (ErtsPollEvents) 0;
- }
-
- revents = ERTS_POLL_EV_N2E(ps->res_events[i].events);
- pr[ix].events |= revents;
- if (revents) {
- if (res == ix) {
- ps->fds_status[fd].res_ev_ix = (unsigned short) ix;
- res++;
- }
- }
- }
-
-#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
-
- struct kevent *ev;
- int fd;
- int ix;
-
- ev = &ps->res_events[i];
- fd = (int) ev->ident;
- ASSERT(fd < ps->fds_status_len);
- ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
- ix = (int) ps->fds_status[fd].res_ev_ix;
-
- ASSERT(ix >= 0);
- if (ix >= res || pr[ix].fd != fd) {
- ix = res;
- pr[ix].fd = (int) ev->ident;
- pr[ix].events = (ErtsPollEvents) 0;
- }
- if (ev->filter == EVFILT_READ) {
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- continue;
- }
+ DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
+ ev2str(evts),
+#if ERTS_POLL_USE_KQUEUE
+ "kqueue"
+#elif ERTS_POLL_USE_EPOLL
+ "epoll"
+#else
+ "/dev/poll"
#endif
- pr[ix].events |= ERTS_POLL_EV_IN;
- }
- else if (ev->filter == EVFILT_WRITE)
- pr[ix].events |= ERTS_POLL_EV_OUT;
- if (ev->flags & (EV_ERROR|EV_EOF)) {
- if ((ev->flags & EV_ERROR) && (((int) ev->data) == EBADF))
- pr[ix].events |= ERTS_POLL_EV_NVAL;
- else
- pr[ix].events |= ERTS_POLL_EV_ERR;
- }
- if (pr[ix].events) {
- if (res == ix) {
- ps->fds_status[fd].res_ev_ix = (unsigned short) ix;
- res++;
- }
- }
-
-#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+ );
- if (ps->res_events[i].revents) {
- int fd = ps->res_events[i].fd;
- ErtsPollEvents revents;
#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- continue;
- }
-#endif
- revents = ERTS_POLL_EV_N2E(ps->res_events[i].events);
- pr[res].fd = fd;
- pr[res].events = revents;
- res++;
- }
-
+ if (fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ if (n == 1)
+ return 0;
+ }
+#endif
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ else {
+ /* Reset the events to emulate ONESHOT semantics */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
+ }
#endif
-
}
return res;
-}
-
-#endif /* ERTS_POLL_USE_KERNEL_POLL */
-
-#if ERTS_POLL_USE_FALLBACK
-
-static int
-get_kp_results(ErtsPollSet ps, ErtsPollResFd pr[], int max_res)
-{
- int res;
-#if ERTS_POLL_USE_KQUEUE
- struct timespec ts = {0, 0};
-#endif
-
- if (max_res > ps->res_events_len)
- grow_res_events(ps, max_res);
-
- do {
-#if ERTS_POLL_USE_EPOLL
- res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0);
-#elif ERTS_POLL_USE_KQUEUE
- res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
-#endif
- } while (res < 0 && errno == EINTR);
-
- if (res < 0) {
- fatal_error("%s:%d: %s() failed: %s (%d)\n",
- __FILE__, __LINE__,
-#if ERTS_POLL_USE_EPOLL
- "epoll_wait",
-#elif ERTS_POLL_USE_KQUEUE
- "kevent",
+#else
+ ASSERT(chk_fds_res <= max_res);
+ return chk_fds_res;
#endif
- erl_errno_id(errno), errno);
- }
-
- return save_kp_result(ps, pr, max_res, res);
}
-#endif /* ERTS_POLL_USE_FALLBACK */
-
-
+#else /* !ERTS_POLL_USE_KERNEL_POLL */
static ERTS_INLINE int
-save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
- int chk_fds_res, int ebadf)
+ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
-#if ERTS_POLL_USE_DEVPOLL
- return save_kp_result(ps, pr, max_res, chk_fds_res);
-#elif ERTS_POLL_USE_FALLBACK
- if (!ps->fallback_used)
- return save_kp_result(ps, pr, max_res, chk_fds_res);
- else
-#endif /* ERTS_POLL_USE_FALLBACK */
- {
-
#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
int res = 0;
-#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK
int wake_fd = ps->wake_fds[0];
-#endif
int i, first_ix, end_ix;
/*
@@ -1821,23 +1379,30 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
if (ps->poll_fds[i].revents != (short) 0) {
int fd = ps->poll_fds[i].fd;
ErtsPollEvents revents;
-#if ERTS_POLL_USE_FALLBACK
- if (fd == ps->kp_fd) {
- res += get_kp_results(ps, &pr[res], max_res-res);
- i++;
- continue;
- }
-#elif ERTS_POLL_USE_WAKEUP_PIPE
if (fd == wake_fd) {
cleanup_wakeup_pipe(ps);
i++;
continue;
}
-#endif
revents = pollev2ev(ps->poll_fds[i].revents);
- pr[res].fd = fd;
- pr[res].events = revents;
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], revents);
+
+ /* If an fd returns as error, we may want to check the
+ update_requests queue to see if it has been reset
+ before delivering the result?!?! This should allow
+ the user to do driver_dselect + close without waiting
+ for stop_select... */
+
+ DEBUG_PRINT_FD("trig %s (poll)", ps, ERTS_POLL_RES_GET_FD(&pr[res]),
+ ev2str(ERTS_POLL_RES_GET_EVTS(&pr[res])));
+
res++;
+
+ /* Clear the events for this fd in order to mimic
+ how epoll ONESHOT works */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
}
i++;
}
@@ -1853,9 +1418,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
int res = 0;
-#if ERTS_POLL_USE_WAKEUP_PIPE && !ERTS_POLL_USE_FALLBACK
int wake_fd = ps->wake_fds[0];
-#endif
int fd, first_fd, end_fd;
/*
@@ -1868,29 +1431,23 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
if (!ebadf) {
while (1) {
while (fd < end_fd && res < max_res) {
-
- pr[res].events = (ErtsPollEvents) 0;
+ ErtsPollEvents events = 0;
if (ERTS_FD_ISSET(fd, &ps->res_input_fds)) {
-#if ERTS_POLL_USE_FALLBACK
- if (fd == ps->kp_fd) {
- res += get_kp_results(ps, &pr[res], max_res-res);
- fd++;
- continue;
- }
-#elif ERTS_POLL_USE_WAKEUP_PIPE
if (fd == wake_fd) {
cleanup_wakeup_pipe(ps);
fd++;
continue;
}
-#endif
- pr[res].events |= ERTS_POLL_EV_IN;
+ events |= ERTS_POLL_EV_IN;
}
if (ERTS_FD_ISSET(fd, &ps->res_output_fds))
- pr[res].events |= ERTS_POLL_EV_OUT;
- if (pr[res].events) {
- pr[res].fd = fd;
+ events |= ERTS_POLL_EV_OUT;
+ if (events) {
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], events);
res++;
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
}
fd++;
}
@@ -1923,7 +1480,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) {
oset = &ps->res_output_fds;
ERTS_FD_ZERO(oset);
- ERTS_FD_SET(fd, oset);
+ ERTS_FD_SET(fd, oset);
}
do {
/* Initiate 'tv' each time;
@@ -1932,49 +1489,31 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
sres = ERTS_SELECT(ps->max_fd+1, iset, oset, NULL, &tv);
} while (sres < 0 && errno == EINTR);
if (sres < 0) {
-#if ERTS_POLL_USE_FALLBACK
- if (fd == ps->kp_fd) {
- res += get_kp_results(ps,
- &pr[res],
- max_res-res);
- fd++;
- continue;
- }
-#elif ERTS_POLL_USE_WAKEUP_PIPE
if (fd == wake_fd) {
cleanup_wakeup_pipe(ps);
fd++;
continue;
}
-#endif
- pr[res].fd = fd;
- pr[res].events = ERTS_POLL_EV_NVAL;
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], ERTS_POLL_EV_NVAL);
res++;
}
else if (sres > 0) {
- pr[res].fd = fd;
+ ErtsPollEvents events = 0;
+ ERTS_POLL_RES_SET_FD(&pr[res], fd);
if (iset && ERTS_FD_ISSET(fd, iset)) {
-#if ERTS_POLL_USE_FALLBACK
- if (fd == ps->kp_fd) {
- res += get_kp_results(ps,
- &pr[res],
- max_res-res);
- fd++;
- continue;
- }
-#elif ERTS_POLL_USE_WAKEUP_PIPE
if (fd == wake_fd) {
cleanup_wakeup_pipe(ps);
fd++;
continue;
}
-#endif
- pr[res].events |= ERTS_POLL_EV_IN;
+ events |= ERTS_POLL_EV_IN;
}
if (oset && ERTS_FD_ISSET(fd, oset)) {
- pr[res].events |= ERTS_POLL_EV_OUT;
+ events |= ERTS_POLL_EV_OUT;
}
- ASSERT(pr[res].events);
+ ASSERT(events);
+ ERTS_POLL_RES_SET_EVTS(&pr[res], events);
res++;
}
}
@@ -1989,206 +1528,145 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
}
ps->next_sel_fd = fd;
return res;
-#endif
- }
+#endif /* ERTS_POLL_USE_SELECT */
}
+#endif /* !ERTS_POLL_USE_KERNEL_POLL */
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
+check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
{
int res;
- if (erts_smp_atomic_read_nob(&ps->no_of_user_fds) == 0
- && tv->tv_usec == 0 && tv->tv_sec == 0) {
- /* Nothing to poll and zero timeout; done... */
- return 0;
- }
- else {
- long timeout = tv->tv_sec*1000 + tv->tv_usec/1000;
- if (timeout > ERTS_AINT32_T_MAX)
- timeout = ERTS_AINT32_T_MAX;
- ASSERT(timeout >= 0);
- erts_smp_atomic32_set_relb(&ps->timeout, (erts_aint32_t) timeout);
-#if ERTS_POLL_USE_FALLBACK
- if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) {
-
-#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- if (max_res > ps->res_events_len)
- grow_res_events(ps, max_res);
-#ifdef ERTS_SMP
- if (timeout)
- erts_thr_progress_prepare_wait(NULL);
-#endif
- res = epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout);
-#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
- struct timespec ts;
- if (max_res > ps->res_events_len)
- grow_res_events(ps, max_res);
-#ifdef ERTS_SMP
- if (timeout)
- erts_thr_progress_prepare_wait(NULL);
-#endif
- ts.tv_sec = tv->tv_sec;
- ts.tv_nsec = tv->tv_usec*1000;
- res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
-#endif /* ----------------------------------------- */
- }
- else /* use fallback (i.e. poll() or select()) */
-#endif /* ERTS_POLL_USE_FALLBACK */
- {
+ int timeout = do_wait ? -1 : 0;
+ DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait);
+ {
+#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+ res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
+
+#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
+ struct timespec ts = {0, 0};
+ struct timespec *tsp = timeout ? NULL : &ts;
+ res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp);
+#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+ /*
+ * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds
+ * is set too high. dp_nfds should not be set greater than
+ * the maximum number of file descriptors in the poll set.
+ */
+ struct dvpoll poll_res;
+ int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */;
+ poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
+ poll_res.dp_fds = pr;
+ poll_res.dp_timeout = timeout;
+ res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
+
+#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
+
+ res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
-#if ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
- /*
- * The ioctl() will fail with EINVAL on Solaris 10 if dp_nfds
- * is set too high. dp_nfds should not be set greater than
- * the maximum number of file descriptors in the poll set.
- */
- struct dvpoll poll_res;
- int nfds = (int) erts_smp_atomic_read_nob(&ps->no_of_user_fds);
-#if ERTS_POLL_USE_WAKEUP_PIPE
- nfds++; /* Wakeup pipe */
-#endif
- if (timeout > INT_MAX)
- timeout = INT_MAX;
- poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
- if (poll_res.dp_nfds > ps->res_events_len)
- grow_res_events(ps, poll_res.dp_nfds);
- poll_res.dp_fds = ps->res_events;
-#ifdef ERTS_SMP
- if (timeout)
- erts_thr_progress_prepare_wait(NULL);
-#endif
- poll_res.dp_timeout = (int) timeout;
- res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-#elif ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
- if (timeout > INT_MAX)
- timeout = INT_MAX;
-#ifdef ERTS_SMP
- if (timeout)
- erts_thr_progress_prepare_wait(NULL);
-#endif
- res = poll(ps->poll_fds, ps->no_poll_fds, (int) timeout);
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval to = *tv;
-
- ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
- ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
-
-#ifdef ERTS_SMP
- if (to.tv_sec || to.tv_usec)
- erts_thr_progress_prepare_wait(NULL);
-#endif
- res = ERTS_SELECT(ps->max_fd + 1,
- &ps->res_input_fds,
- &ps->res_output_fds,
- NULL,
- &to);
-#ifdef ERTS_SMP
- if (to.tv_sec || to.tv_usec)
- erts_thr_progress_finalize_wait(NULL);
- if (res < 0
- && errno == EBADF
- && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- /*
- * This may have happened because another thread deselected
- * a fd in our poll set and then closed it, i.e. the driver
- * behaved correctly. We wan't to avoid looking for a bad
- * fd, that may even not exist anymore. Therefore, handle
- * update requests and try again.
- *
- * We don't know how much of the timeout is left; therfore,
- * we use a zero timeout. If no error occur and no events
- * have triggered, we fake an EAGAIN error and let the caller
- * restart us.
- */
- to.tv_sec = 0;
- to.tv_usec = 0;
- ERTS_POLLSET_LOCK(ps);
- handle_update_requests(ps);
- ERTS_POLLSET_UNLOCK(ps);
- res = ERTS_SELECT(ps->max_fd + 1,
- &ps->res_input_fds,
- &ps->res_output_fds,
- NULL,
- &to);
- if (res == 0) {
- errno = EAGAIN;
- res = -1;
- }
- }
-#endif /* ERTS_SMP */
- return res;
+ SysTimeval tv = {0, 0};
+ SysTimeval *tvp = timeout ? NULL : &tv;
+
+ ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
+ ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
+
+ res = ERTS_SELECT(ps->max_fd + 1,
+ &ps->res_input_fds,
+ &ps->res_output_fds,
+ NULL,
+ tvp);
#endif /* ----------------------------------------- */
- }
-#ifdef ERTS_SMP
- if (timeout)
- erts_thr_progress_finalize_wait(NULL);
-#endif
- return res;
}
+ DEBUG_PRINT_WAIT("Leaving check_fd_events(), res=%d", ps, res);
+ return res;
}
int
-ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
+ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len,
- SysTimeval *utvp)
+ int *len)
{
- int res, no_fds;
+ int res, no_fds, used_fds = 0;
int ebadf = 0;
-#ifdef ERTS_SMP
+ int do_wait;
int ps_locked = 0;
-#endif
- SysTimeval *tvp;
- SysTimeval itv;
+ ERTS_MSACC_DECLARE_CACHE();
no_fds = *len;
-#ifdef ERTS_POLL_MAX_RES
- if (no_fds >= ERTS_POLL_MAX_RES)
- no_fds = ERTS_POLL_MAX_RES;
-#endif
-
*len = 0;
+ ASSERT(no_fds > 0);
- ASSERT(utvp);
-
- tvp = utvp;
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ ERTS_POLLSET_LOCK(ps);
+ used_fds = handle_update_requests(ps, pr, no_fds);
+ ERTS_POLLSET_UNLOCK(ps);
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("Entering erts_poll_wait(), timeout=%d\n",
- (int) tv->tv_sec*1000 + tv->tv_usec/1000);
+ if (used_fds == no_fds) {
+ *len = used_fds;
+ return 0;
+ }
+ }
#endif
- if (ERTS_POLLSET_SET_POLLED_CHK(ps)) {
- res = EINVAL; /* Another thread is in erts_poll_wait()
- on this pollset... */
- goto done;
- }
+ do_wait = !is_woken(ps) && used_fds == 0;
+
+ DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait);
- if (is_woken(ps)) {
- /* Use zero timeout */
- itv.tv_sec = 0;
- itv.tv_usec = 0;
- tvp = &itv;
+ if (do_wait) {
+ erts_thr_progress_prepare_wait(NULL);
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
}
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
- if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- ERTS_POLLSET_LOCK(ps);
- handle_update_requests(ps);
- ERTS_POLLSET_UNLOCK(ps);
+ while (1) {
+ res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds);
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again. This behaviour should only
+ * happen when using SELECT as the polling mechanism.
+ */
+ ERTS_POLLSET_LOCK(ps);
+ used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
+ if (used_fds == no_fds) {
+ *len = used_fds;
+ ERTS_POLLSET_UNLOCK(ps);
+ return 0;
+ }
+ res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds);
+ /* Keep the lock over the non-blocking poll in order to not
+ get any nasty races happening. */
+ ERTS_POLLSET_UNLOCK(ps);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
+ }
+ }
+#endif
+
+ if (res != 0)
+ break;
+ if (!do_wait)
+ break;
}
-#endif
- res = check_fd_events(ps, tvp, no_fds);
+ if (do_wait) {
+ erts_thr_progress_finalize_wait(NULL);
+ ERTS_MSACC_UPDATE_CACHE();
+ ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO);
+ }
woke_up(ps);
- if (res == 0) {
- res = ETIMEDOUT;
- }
- else if (res < 0) {
+ if (res < 0) {
#if ERTS_POLL_USE_SELECT
if (errno == EBADF) {
ebadf = 1;
@@ -2202,33 +1680,24 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
save_results:
#endif
-#ifdef ERTS_SMP
ps_locked = 1;
ERTS_POLLSET_LOCK(ps);
-#endif
- no_fds = save_poll_result(ps, pr, no_fds, res, ebadf);
+ used_fds += ERTS_POLL_EXPORT(save_result)(ps, pr + used_fds, no_fds - used_fds, res, ebadf);
#ifdef HARD_DEBUG
- check_poll_result(pr, no_fds);
+ check_poll_result(pr, used_fds);
#endif
- res = (no_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0);
- *len = no_fds;
+ res = (used_fds == 0 ? (is_interrupted_reset(ps) ? EINTR : EAGAIN) : 0);
+ *len = used_fds;
}
-#ifdef ERTS_SMP
if (ps_locked)
ERTS_POLLSET_UNLOCK(ps);
- ERTS_POLLSET_UNSET_POLLED(ps);
-#endif
- done:
- erts_smp_atomic32_set_relb(&ps->timeout, ERTS_AINT32_T_MAX);
-#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("Leaving %s = erts_poll_wait()\n",
- res == 0 ? "0" : erl_errno_id(res));
-#endif
+ DEBUG_PRINT_WAIT("Leaving %s = %s(len = %d)", ps,
+ res == 0 ? "0" : erl_errno_id(res), __FUNCTION__, *len);
return res;
}
@@ -2238,53 +1707,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
*/
void
-ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set)
+ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set)
{
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (!set)
reset_wakeup_state(ps);
else
- wake_poller(ps, 1, 0);
-#endif
-}
-
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-void
-ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet ps)
-{
- /*
- * NOTE: This function is called from signal handlers, it,
- * therefore, it has to be async-signal safe.
- */
- wake_poller(ps, 1, 1);
-}
-#endif
-
-/*
- * erts_poll_interrupt_timed():
- * If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it
- * is not guaranteed that it will timeout before 'msec' milli seconds.
- */
-void
-ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps,
- int set,
- erts_short_time_t msec)
-{
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)
- if (!set)
- reset_wakeup_state(ps);
- else {
- if (erts_smp_atomic32_read_acqb(&ps->timeout) > (erts_aint32_t) msec)
- wake_poller(ps, 1, 0);
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- else {
- if (ERTS_POLLSET_IS_POLLED(ps))
- erts_smp_atomic_inc_nob(&ps->no_avoided_wakeups);
- erts_smp_atomic_inc_nob(&ps->no_avoided_interrupts);
- }
- erts_smp_atomic_inc_nob(&ps->no_interrupt_timed);
-#endif
- }
+ wake_poller(ps, 1);
#endif
}
@@ -2298,13 +1727,19 @@ ERTS_POLL_EXPORT(erts_poll_max_fds)(void)
*/
void
-ERTS_POLL_EXPORT(erts_poll_init)(void)
+ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_updates)
{
- erts_smp_spinlock_init(&pollsets_lock, "pollsets_lock");
- pollsets = NULL;
errno = 0;
+ if (concurrent_updates) {
+#if ERTS_POLL_USE_CONCURRENT_UPDATE
+ *concurrent_updates = 1;
+#else
+ *concurrent_updates = 0;
+#endif
+ }
+
#if !defined(NO_SYSCONF)
max_fds = sysconf(_SC_OPEN_MAX);
#elif ERTS_POLL_USE_SELECT
@@ -2323,37 +1758,28 @@ ERTS_POLL_EXPORT(erts_poll_init)(void)
fatal_error("erts_poll_init(): Failed to get max number of files: %s\n",
erl_errno_id(errno));
-#ifdef ERTS_POLL_DEBUG_PRINT
print_misc_debug_info();
-#endif
}
-ErtsPollSet
-ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
+ErtsPollSet *
+ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id)
{
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
#endif
- ErtsPollSet ps = erts_alloc(ERTS_ALC_T_POLLSET,
- sizeof(struct ErtsPollSet_));
+ ErtsPollSet *ps = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(struct ERTS_POLL_EXPORT(erts_pollset)));
+ ps->id = id;
ps->internal_fd_limit = 0;
- ps->fds_status = NULL;
- ps->fds_status_len = 0;
- erts_smp_atomic_init_nob(&ps->no_of_user_fds, 0);
+ erts_atomic_init_nob(&ps->no_of_user_fds, 0);
#if ERTS_POLL_USE_KERNEL_POLL
ps->kp_fd = -1;
#if ERTS_POLL_USE_EPOLL
kp_fd = epoll_create(256);
- ps->res_events_len = 0;
- ps->res_events = NULL;
#elif ERTS_POLL_USE_DEVPOLL
kp_fd = open("/dev/poll", O_RDWR);
- ps->res_events_len = 0;
- ps->res_events = NULL;
#elif ERTS_POLL_USE_KQUEUE
kp_fd = kqueue();
- ps->res_events_len = 0;
- ps->res_events = NULL;
#endif
if (kp_fd < 0)
fatal_error("erts_poll_create_pollset(): Failed to "
@@ -2367,10 +1793,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
": %s (%d)\n",
erl_errno_id(errno), errno);
#endif /* ERTS_POLL_USE_KERNEL_POLL */
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
- /* res_events is also used as write buffer */
- grow_res_events(ps, ERTS_POLL_MIN_BATCH_BUF_SIZE);
-#endif
#if ERTS_POLL_USE_POLL
ps->next_poll_fds_ix = 0;
ps->no_poll_fds = 0;
@@ -2379,9 +1801,6 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
#elif ERTS_POLL_USE_SELECT
ps->next_sel_fd = 0;
ps->max_fd = -1;
-#if ERTS_POLL_USE_FALLBACK
- ps->no_select_fds = 0;
-#endif
#ifdef _DARWIN_UNLIMITED_SELECT
ps->input_fds.sz = 0;
ps->input_fds.ptr = NULL;
@@ -2398,126 +1817,66 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
ERTS_FD_ZERO(&ps->res_output_fds);
#endif
#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ ps->fds_status = NULL;
+ ps->fds_status_len = 0;
ps->update_requests.next = NULL;
ps->update_requests.len = 0;
ps->curr_upd_req_block = &ps->update_requests;
- erts_smp_atomic32_init_nob(&ps->have_update_requests, 0);
-#endif
-#ifdef ERTS_SMP
- erts_atomic32_init_nob(&ps->polled, 0);
- erts_smp_mtx_init(&ps->mtx, "pollset");
-#endif
-#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
-#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- create_wakeup_pipe(ps);
-#endif
-#if ERTS_POLL_USE_FALLBACK
- if (kp_fd >= ps->fds_status_len)
- grow_fds_status(ps, kp_fd);
- /* Force kernel poll fd into fallback (poll/select) set */
- ps->fds_status[kp_fd].flags
- |= ERTS_POLL_FD_FLG_INFLBCK|ERTS_POLL_FD_FLG_USEFLBCK;
- {
- int do_wake = 0;
- ERTS_POLL_EXPORT(erts_poll_control)(ps, kp_fd, ERTS_POLL_EV_IN, 1,
- &do_wake);
- }
+ erts_atomic32_init_nob(&ps->have_update_requests, 0);
+ erts_mtx_init(&ps->mtx, "pollset", NIL, ERTS_LOCK_FLAGS_CATEGORY_IO);
#endif
#if ERTS_POLL_USE_KERNEL_POLL
if (ps->internal_fd_limit <= kp_fd)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
#endif
- erts_smp_atomic32_init_nob(&ps->timeout, ERTS_AINT32_T_MAX);
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- erts_smp_atomic_init_nob(&ps->no_avoided_wakeups, 0);
- erts_smp_atomic_init_nob(&ps->no_avoided_interrupts, 0);
- erts_smp_atomic_init_nob(&ps->no_interrupt_timed, 0);
-#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
- handle_update_requests(ps);
-#endif
-#if ERTS_POLL_USE_FALLBACK
- ps->fallback_used = 0;
-#endif
- erts_smp_atomic_set_nob(&ps->no_of_user_fds, 0); /* Don't count wakeup pipe and fallback fd */
-
- erts_smp_spin_lock(&pollsets_lock);
- ps->next = pollsets;
- pollsets = ps;
- erts_smp_spin_unlock(&pollsets_lock);
-
- return ps;
-}
-
-void
-ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps)
-{
-
- if (ps->fds_status)
- erts_free(ERTS_ALC_T_FD_STATUS, (void *) ps->fds_status);
-
-#if ERTS_POLL_USE_EPOLL
- if (ps->kp_fd >= 0)
- close(ps->kp_fd);
- if (ps->res_events)
- erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events);
-#elif ERTS_POLL_USE_DEVPOLL
- if (ps->kp_fd >= 0)
- close(ps->kp_fd);
- if (ps->res_events)
- erts_free(ERTS_ALC_T_POLL_RES_EVS, (void *) ps->res_events);
-#elif ERTS_POLL_USE_POLL
- if (ps->poll_fds)
- erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds);
-#elif ERTS_POLL_USE_SELECT
-#ifdef _DARWIN_UNLIMITED_SELECT
- if (ps->input_fds.ptr)
- erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->input_fds.ptr);
- if (ps->res_input_fds.ptr)
- erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_input_fds.ptr);
- if (ps->output_fds.ptr)
- erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->output_fds.ptr);
- if (ps->res_output_fds.ptr)
- erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_output_fds.ptr);
-#endif
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
+ create_wakeup_pipe(ps);
+ handle_update_requests(ps, NULL, 0);
+ cleanup_wakeup_pipe(ps);
#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if ERTS_POLL_USE_KERNEL_POLL && (defined(__DARWIN__) || defined(__APPLE__) && defined(__MACH__))
{
- ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next;
- while (urqbp) {
- ErtsPollSetUpdateRequestsBlock *free_urqbp = urqbp;
- urqbp = urqbp->next;
- free_update_requests_block(ps, free_urqbp);
- }
+ /*
+ * Using kqueue on OS X is a mess of brokenness...
+ *
+ * On OS X version older than 15.6 (i.e. OS X El Capitan released in July 2015),
+ * a thread waiting in kevent is not woken if an event is inserted into the kqueue
+ * by another thread and the event becomes ready. However if a new call to kevent
+ * is done by the waiting thread, the new event is found.
+ *
+ * So on effected OS X versions we could trigger the wakeup pipe so that
+ * the waiters will be woken and re-issue the kevent. However...
+ *
+ * On OS X version older then 16 (i.e. OS X Sierra released in September 2016),
+ * running the emulator driver_SUITE smp_select testcase consistently causes a
+ * kernel panic. I don't know why or what events that trigger it. But it seems
+ * like updates of the pollset while another thread is sleeping in it Creates
+ * some kind of race that triggers the kernel panic.
+ *
+ * So to deal with this, the erts configure check what OS X version is run
+ * and only enabled kernel poll on OS X 16 or newer. In addition, if someone
+ * attempts to compile Erlang on OS X 16 and then run it on OS X 15, we do the
+ * run-time check below to disallow this.
+ */
+ int major, minor, build;
+ os_version(&major,&minor,&build);
+ if (major < 16) {
+ erts_fprintf(stderr,"BROKEN KQUEUE!\n"
+ "Erlang has been compiled with kernel-poll support,\n"
+ "but this OS X version is known to have kernel bugs\n"
+ "when using kernel-poll. You have two options:\n"
+ " 1) update to a newer OS X version (OS X Sierra or newer)\n"
+ " 2) recompile erlang without kernel-poll support\n");
+ erts_exit(1, "");
+ }
}
#endif
-#ifdef ERTS_SMP
- erts_smp_mtx_destroy(&ps->mtx);
-#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (ps->wake_fds[0] >= 0)
- close(ps->wake_fds[0]);
- if (ps->wake_fds[1] >= 0)
- close(ps->wake_fds[1]);
-#endif
-
- erts_smp_spin_lock(&pollsets_lock);
- if (ps == pollsets)
- pollsets = pollsets->next;
- else {
- ErtsPollSet prev_ps;
- for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next)
- ;
- ASSERT(ps == prev_ps->next);
- prev_ps->next = ps->next;
- }
- erts_smp_spin_unlock(&pollsets_lock);
+ erts_atomic_set_nob(&ps->no_of_user_fds, 0); /* Don't count wakeup pipe and fallback fd */
- erts_free(ERTS_ALC_T_POLLSET, (void *) ps);
+ return ps;
}
/*
@@ -2525,24 +1884,18 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps)
*/
void
-ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
+ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip)
{
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int pending_updates;
#endif
Uint size = 0;
ERTS_POLLSET_LOCK(ps);
- size += sizeof(struct ErtsPollSet_);
+ size += sizeof(struct ERTS_POLL_EXPORT(erts_pollset));
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
size += ps->fds_status_len*sizeof(ErtsFdStatus);
-
-#if ERTS_POLL_USE_EPOLL
- size += ps->res_events_len*sizeof(struct epoll_event);
-#elif ERTS_POLL_USE_DEVPOLL
- size += ps->res_events_len*sizeof(struct pollfd);
-#elif ERTS_POLL_USE_KQUEUE
- size += ps->res_events_len*sizeof(struct kevent);
#endif
#if ERTS_POLL_USE_POLL
@@ -2554,7 +1907,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
#endif
#endif
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
{
ErtsPollSetUpdateRequestsBlock *urqbp = ps->update_requests.next;
pending_updates = ps->update_requests.len;
@@ -2566,7 +1919,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
}
#endif
- pip->primary =
+ pip->primary =
#if ERTS_POLL_USE_KQUEUE
"kqueue"
#elif ERTS_POLL_USE_EPOLL
@@ -2580,17 +1933,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
#endif
;
- pip->fallback =
-#if !ERTS_POLL_USE_FALLBACK
- NULL
-#elif ERTS_POLL_USE_POLL
- "poll"
-#elif ERTS_POLL_USE_SELECT
- "select"
-#endif
- ;
-
- pip->kernel_poll =
+ pip->kernel_poll =
#if !ERTS_POLL_USE_KERNEL_POLL
NULL
#elif ERTS_POLL_USE_KQUEUE
@@ -2604,31 +1947,13 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
pip->memory_size = size;
- pip->poll_set_size = (int) erts_smp_atomic_read_nob(&ps->no_of_user_fds);
-#if ERTS_POLL_USE_WAKEUP_PIPE
+ pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds);
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pip->poll_set_size++; /* Wakeup pipe */
#endif
- pip->fallback_poll_set_size =
-#if !ERTS_POLL_USE_FALLBACK
- 0
-#elif ERTS_POLL_USE_POLL
- ps->no_poll_fds
-#elif ERTS_POLL_USE_SELECT
- ps->no_select_fds
-#endif
- ;
-
-#if ERTS_POLL_USE_FALLBACK
- /* If only kp_fd is in fallback poll set we don't use fallback... */
- if (pip->fallback_poll_set_size == 1)
- pip->fallback_poll_set_size = 0;
- else
- pip->poll_set_size++; /* kp_fd */
-#endif
-
pip->lazy_updates =
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
1
#else
0
@@ -2636,21 +1961,13 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
;
pip->pending_updates =
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pending_updates
#else
0
#endif
;
- pip->batch_updates =
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
- 1
-#else
- 0
-#endif
- ;
-
pip->concurrent_updates =
#if ERTS_POLL_USE_CONCURRENT_UPDATE
1
@@ -2659,13 +1976,23 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
#endif
;
- pip->max_fds = max_fds;
+ pip->is_fallback =
+#if ERTS_POLL_IS_FALLBACK
+ 1
+#else
+ 0
+#endif
+ ;
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- pip->no_avoided_wakeups = erts_smp_atomic_read_nob(&ps->no_avoided_wakeups);
- pip->no_avoided_interrupts = erts_smp_atomic_read_nob(&ps->no_avoided_interrupts);
- pip->no_interrupt_timed = erts_smp_atomic_read_nob(&ps->no_interrupt_timed);
+ pip->batch_updates =
+#if ERTS_POLL_USE_DEVPOLL
+ 1
+#else
+ 0
#endif
+ ;
+
+ pip->max_fds = max_fds;
ERTS_POLLSET_UNLOCK(ps);
@@ -2701,53 +2028,122 @@ fatal_error(char *format, ...)
abort();
}
-static void
-fatal_error_async_signal_safe(char *error_str)
+/*
+ * --- Debug -----------------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_EPOLL
+uint32_t epoll_events(int kp_fd, int fd)
{
- if (ERTS_SOMEONE_IS_CRASH_DUMPING || ERTS_GOT_SIGUSR1) {
- /* See comment above in fatal_error() */
- return;
+ /* For epoll we read the information about what is selected upon from the proc fs.*/
+ char fname[30];
+ FILE *f;
+ unsigned int pos, flags, mnt_id;
+ int line = 0;
+ sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), kp_fd);
+ f = fopen(fname,"r");
+ if (!f) {
+ fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno);
+ ASSERT(0);
+ return 0;
}
- if (error_str) {
- int len = 0;
- while (error_str[len])
- len++;
- if (len) {
- /* async signal safe */
- erts_silence_warn_unused_result(write(2, error_str, len));
- }
+ if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) {
+ fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno);
+ ASSERT(0);
+ return 0;
}
- abort();
+ if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id));
+ line += 3;
+ while (!feof(f)) {
+ /* tfd: 10 events: 40000019 data: 180000000a */
+ int ev_fd;
+ uint32_t events;
+ uint64_t data;
+ if (fscanf(f,"tfd:%d events:%x data:%llx\n", &ev_fd, &events,
+ (unsigned long long*)&data) != 3) {
+ fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", fname,
+ line,
+ errno);
+ return 0;
+ }
+ if (fd == ev_fd) {
+ fclose(f);
+ return events;
+ }
+ }
+ fclose(f);
+ return 0;
}
-
-/*
- * --- Debug -----------------------------------------------------------------
- */
+#endif
void
-ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps,
+ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
ErtsPollEvents ev[],
int len)
{
int fd;
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
ERTS_POLLSET_LOCK(ps);
for (fd = 0; fd < len; fd++) {
if (fd >= ps->fds_status_len)
ev[fd] = 0;
else {
ev[fd] = ps->fds_status[fd].events;
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
- ev[fd] |= ERTS_POLL_EV_NVAL;
-#endif
+ if (
+ fd == ps->wake_fds[0] || fd == ps->wake_fds[1] ||
#if ERTS_POLL_USE_KERNEL_POLL
- if (fd == ps->kp_fd)
- ev[fd] |= ERTS_POLL_EV_NVAL;
+ fd == ps->kp_fd ||
#endif
+ 0)
+ ev[fd] |= ERTS_POLL_EV_NVAL;
}
}
ERTS_POLLSET_UNLOCK(ps);
+#elif ERTS_POLL_USE_EPOLL
+ /* For epoll we read the information about what is selected upon from the proc fs.*/
+ char fname[30];
+ FILE *f;
+ unsigned int pos, flags, mnt_id;
+ int line = 0;
+ sprintf(fname,"/proc/%d/fdinfo/%d",getpid(), ps->kp_fd);
+ for (fd = 0; fd < len; fd++)
+ ev[fd] = ERTS_POLL_EV_NONE;
+ f = fopen(fname,"r");
+ if (!f) {
+ fprintf(stderr,"failed to open file %s, errno = %d\n", fname, errno);
+ return;
+ }
+ if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) {
+ fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno);
+ ASSERT(0);
+ return;
+ }
+ if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id));
+ line += 3;
+ while (!feof(f)) {
+ /* tfd: 10 events: 40000019 data: 180000000a */
+ int fd;
+ uint32_t events;
+ uint64_t data;
+ if (fscanf(f,"tfd:%d events:%x data:%llx\n", &fd, &events,
+ (unsigned long long*)&data) != 3) {
+ fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n",
+ fname, line, errno);
+ ASSERT(0);
+ return;
+ }
+ data &= 0xFFFFFFFF;
+ ASSERT(fd == data);
+ /* Events are the events that are being monitored, which of course include
+ error and hup events, but we are only interested in IN/OUT events */
+ ev[fd] = (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) & ERTS_POLL_EV_N2E(events);
+ line++;
+ }
+#else
+ for (fd = 0; fd < len; fd++)
+ ev[fd] = ERTS_POLL_EV_NONE;
+#endif
}
#ifdef HARD_DEBUG
@@ -2767,10 +2163,10 @@ check_poll_result(ErtsPollResFd pr[], int len)
}
-#if ERTS_POLL_USE_DEVPOLL
+#if ERTS_POLL_USE_DEVPOLL && defined(DEBUG)
static void
-check_poll_status(ErtsPollSet ps)
+check_poll_status(ErtsPollSet *ps)
{
int i;
for (i = 0; i < ps->fds_status_len; i++) {
@@ -2802,34 +2198,24 @@ check_poll_status(ErtsPollSet ps)
#endif /* ERTS_POLL_USE_DEVPOLL */
#endif /* HARD_DEBUG */
-#ifdef ERTS_POLL_DEBUG_PRINT
static void
print_misc_debug_info(void)
{
- erts_printf("erts_poll using: %s lazy_updates:%s batch_updates:%s\n",
+#if ERTS_POLL_DEBUG_PRINT
+ erts_printf("erts_poll using: %s lazy_updates:%s\n",
#if ERTS_POLL_USE_KQUEUE
"kqueue"
#elif ERTS_POLL_USE_EPOLL
"epoll"
#elif ERTS_POLL_USE_DEVPOLL
"/dev/poll"
-#endif
-#if ERTS_POLL_USE_FALLBACK
- "-"
-#endif
-#if ERTS_POLL_USE_POLL
+#elif ERTS_POLL_USE_POLL
"poll"
#elif ERTS_POLL_USE_SELECT
"select"
#endif
,
-#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
- "true"
-#else
- "false"
-#endif
- ,
-#if ERTS_POLL_USE_BATCH_UPDATE_POLLSET
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
"true"
#else
"false"
@@ -2848,6 +2234,20 @@ print_misc_debug_info(void)
#ifdef FD_SETSIZE
erts_printf("FD_SETSIZE=%d\n", FD_SETSIZE);
#endif
+#endif
+}
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
+void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *pollset, int enable)
+{
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+ if(enable) {
+ erts_lcnt_install_new_lock_info(&pollset->mtx.lcnt, "pollset_rm", NIL,
+ ERTS_LOCK_TYPE_MUTEX | ERTS_LOCK_FLAGS_CATEGORY_IO);
+ } else {
+ erts_lcnt_uninstall(&pollset->mtx.lcnt);
+ }
+#endif
+ return;
}
-
#endif
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index 2f1c05f401..e9a667cac1 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -1,27 +1,48 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
-/*
- * Description: Poll interface suitable for ERTS with or without
- * SMP support.
+/**
+ * @description: Poll interface suitable for ERTS with SMP support.
+ *
+ * @author: Rickard Green
+ * @author: Lukas Larsson
+ *
+ * This header file exports macros and functions that are used to
+ * react to I/O polling events from file descriptors or wait-able
+ * objects. The API exported is the following:
*
- * Author: Rickard Green
+ * defines:
+ * ERTS_POLL_EV_NONE - No events have been set. This is not the same as 0.
+ * ERTS_POLL_EV_IN - Represent an IN event
+ * ERTS_POLL_EV_OUT - Represent an OUT event
+ * ERTS_POLL_EV_ERR - Represent an error event
+ * ERTS_POLL_EV_NVAL - Represent an invalid event
+ *
+ * macro functions:
+ * ErtsSysFdType ERTS_POLL_RES_GET_FD(ErtsPollResFd *evt);
+ * void ERTS_POLL_RES_SET_FD(ErtsPollResFd *evt, ErtsSysFdType fd);
+ * ErtsPollEvents ERTS_POLL_RES_GET_EVTS(ErtsPollResFd *evt)
+ * void ERTS_POLL_RES_SET_EVTS(ErtsPollResFd *evt, ErtsPollEvents fd);
+ *
+ * functions:
+ * See erl_poll_api.h
*/
#ifndef ERL_POLL_H__
@@ -29,33 +50,29 @@
#include "sys.h"
-#if 0
-#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS
-#endif
+#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
#ifdef ERTS_ENABLE_KERNEL_POLL
-# if defined(ERTS_KERNEL_POLL_VERSION)
-# define ERTS_POLL_EXPORT(FUNC) FUNC ## _kp
+# undef ERTS_ENABLE_KERNEL_POLL
+# define ERTS_ENABLE_KERNEL_POLL 1
+# if defined(ERTS_NO_KERNEL_POLL_VERSION)
+# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk
+# undef ERTS_NO_KERNEL_POLL_VERSION
+# define ERTS_NO_KERNEL_POLL_VERSION 1
+# define ERTS_KERNEL_POLL_VERSION 0
# else
-# define ERTS_POLL_EXPORT(FUNC) FUNC ## _nkp
-# undef ERTS_POLL_DISABLE_KERNEL_POLL
-# define ERTS_POLL_DISABLE_KERNEL_POLL
+# undef ERTS_KERNEL_POLL_VERSION
+# define ERTS_KERNEL_POLL_VERSION 1
+# define ERTS_NO_KERNEL_POLL_VERSION 0
+# define ERTS_POLL_EXPORT(FUNC) FUNC
# endif
#else
# define ERTS_POLL_EXPORT(FUNC) FUNC
-# undef ERTS_POLL_DISABLE_KERNEL_POLL
-# define ERTS_POLL_DISABLE_KERNEL_POLL
-#endif
-
-#ifdef ERTS_POLL_DISABLE_KERNEL_POLL
-# undef HAVE_SYS_EPOLL_H
-# undef HAVE_SYS_EVENT_H
-# undef HAVE_SYS_DEVPOLL_H
+# define ERTS_ENABLE_KERNEL_POLL 0
+# define ERTS_NO_KERNEL_POLL_VERSION 1
+# define ERTS_KERNEL_POLL_VERSION 0
#endif
-#undef ERTS_POLL_USE_KERNEL_POLL
-#define ERTS_POLL_USE_KERNEL_POLL 0
-
#undef ERTS_POLL_USE_KQUEUE
#define ERTS_POLL_USE_KQUEUE 0
#undef ERTS_POLL_USE_EPOLL
@@ -67,78 +84,96 @@
#undef ERTS_POLL_USE_SELECT
#define ERTS_POLL_USE_SELECT 0
-#if defined(HAVE_SYS_EVENT_H)
-# undef ERTS_POLL_USE_KQUEUE
-# define ERTS_POLL_USE_KQUEUE 1
-# undef ERTS_POLL_USE_KERNEL_POLL
-# define ERTS_POLL_USE_KERNEL_POLL 1
-#elif defined(HAVE_SYS_EPOLL_H)
-# undef ERTS_POLL_USE_EPOLL
-# define ERTS_POLL_USE_EPOLL 1
-# undef ERTS_POLL_USE_KERNEL_POLL
-# define ERTS_POLL_USE_KERNEL_POLL 1
-#elif defined(HAVE_SYS_DEVPOLL_H)
-# undef ERTS_POLL_USE_DEVPOLL
-# define ERTS_POLL_USE_DEVPOLL 1
-# undef ERTS_POLL_USE_KERNEL_POLL
-# define ERTS_POLL_USE_KERNEL_POLL 1
+/* Defines which structure that erts_poll_wait should use to wait with
+ and how events should be represented */
+#define ERTS_POLL_USE_EPOLL_EVS 0
+#define ERTS_POLL_USE_KQUEUE_EVS 0
+#define ERTS_POLL_USE_DEVPOLL_EVS 0
+#define ERTS_POLL_USE_POLL_EVS 0
+#define ERTS_POLL_USE_SELECT_EVS 0
+
+#define ERTS_POLL_USE_KERNEL_POLL ERTS_KERNEL_POLL_VERSION
+
+#if ERTS_ENABLE_KERNEL_POLL
+# if defined(HAVE_SYS_EVENT_H)
+# undef ERTS_POLL_USE_KQUEUE_EVS
+# define ERTS_POLL_USE_KQUEUE_EVS 1
+# undef ERTS_POLL_USE_KQUEUE
+# define ERTS_POLL_USE_KQUEUE ERTS_KERNEL_POLL_VERSION
+# elif defined(HAVE_SYS_EPOLL_H)
+# undef ERTS_POLL_USE_EPOLL_EVS
+# define ERTS_POLL_USE_EPOLL_EVS 1
+# undef ERTS_POLL_USE_EPOLL
+# define ERTS_POLL_USE_EPOLL ERTS_KERNEL_POLL_VERSION
+# elif defined(HAVE_SYS_DEVPOLL_H)
+# undef ERTS_POLL_USE_DEVPOLL_EVS
+# define ERTS_POLL_USE_DEVPOLL_EVS 1
+# undef ERTS_POLL_USE_DEVPOLL
+# define ERTS_POLL_USE_DEVPOLL ERTS_KERNEL_POLL_VERSION
+# else
+# error "Missing kernel poll implementation of erts_poll()"
+# endif
#endif
-#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
-
-#if !ERTS_POLL_USE_KERNEL_POLL || ERTS_POLL_USE_FALLBACK
+#if ERTS_NO_KERNEL_POLL_VERSION
# if defined(ERTS_USE_POLL)
+# undef ERTS_POLL_USE_POLL_EVS
+# define ERTS_POLL_USE_POLL_EVS 1
# undef ERTS_POLL_USE_POLL
# define ERTS_POLL_USE_POLL 1
-# elif !defined(__WIN32__) && !defined(__OSE__)
+# elif !defined(__WIN32__)
+# undef ERTS_POLL_USE_SELECT_EVS
+# define ERTS_POLL_USE_SELECT_EVS 1
# undef ERTS_POLL_USE_SELECT
# define ERTS_POLL_USE_SELECT 1
# endif
#endif
-typedef Uint32 ErtsPollEvents;
-#undef ERTS_POLL_EV_E2N
+#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
-#if defined(__WIN32__) || defined(__OSE__) /* --- win32 or ose -------- */
+typedef Uint32 ErtsPollEvents;
-#define ERTS_POLL_EV_IN 1
-#define ERTS_POLL_EV_OUT 2
-#define ERTS_POLL_EV_ERR 4
-#define ERTS_POLL_EV_NVAL 8
+typedef enum {
+ ERTS_POLL_OP_ADD = 0, /* Add the FD to the pollset */
+ ERTS_POLL_OP_MOD = 1, /* Modify the FD in the pollset */
+ ERTS_POLL_OP_DEL = 2 /* Delete the FD from the pollset */
+} ErtsPollOp;
-#ifdef __OSE__
+#define op2str(op) (op == ERTS_POLL_OP_ADD ? "add" : \
+ (op == ERTS_POLL_OP_MOD ? "mod" : "del"))
-typedef struct ErtsPollOseMsgList_ {
- struct ErtsPollOseMsgList_ *next;
- union SIGNAL *data;
-} ErtsPollOseMsgList;
+#if defined(__WIN32__) /* --- win32 --------------------------------------- */
-struct erts_sys_fd_type {
- SIGSELECT signo;
- ErlDrvOseEventId id;
- ErtsPollOseMsgList *msgs;
- ErlDrvOseEventId (*resolve_signal)(union SIGNAL *sig);
- ethr_mutex mtx;
- void *extra;
-};
+#define ERTS_POLL_EV_IN 1
+#define ERTS_POLL_EV_OUT 2
+#define ERTS_POLL_EV_ERR 4
+#define ERTS_POLL_EV_NVAL 8
-#endif
+#define ERTS_POLL_EV_E2N(EV) (EV)
+#define ERTS_POLL_EV_N2E(EV) (EV)
-#elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+#elif ERTS_POLL_USE_EPOLL_EVS /* --- epoll ------------------------------- */
#include <sys/epoll.h>
#define ERTS_POLL_EV_E2N(EV) \
- ((__uint32_t) (EV))
+ ((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
- ((ErtsPollEvents) (EV))
+ ((ErtsPollEvents) (EV) & ~EPOLLONESHOT)
#define ERTS_POLL_EV_IN ERTS_POLL_EV_N2E(EPOLLIN)
#define ERTS_POLL_EV_OUT ERTS_POLL_EV_N2E(EPOLLOUT)
#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(EPOLLET)
#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(EPOLLERR|EPOLLHUP)
-#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
+typedef struct epoll_event ErtsPollResFd;
+
+#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->data.fd))
+#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->data.fd = ident
+#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events)
+#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts)
+
+#elif ERTS_POLL_USE_DEVPOLL_EVS /* --- devpoll ----------------------------- */
#include <sys/devpoll.h>
@@ -152,12 +187,37 @@ struct erts_sys_fd_type {
#define ERTS_POLL_EV_NVAL ERTS_POLL_EV_N2E(POLLNVAL)
#define ERTS_POLL_EV_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP)
-#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
+typedef struct pollfd ErtsPollResFd;
+
+#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->fd))
+#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = ident
+#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->revents)
+#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->revents = ERTS_POLL_EV_E2N(evts)
+
+#elif ERTS_POLL_USE_KQUEUE_EVS /* --- kqueue ------------------------------ */
/* Kqueue use fallback defines (poll() or select()) */
+
+#include <sys/event.h>
+
+#ifdef ERTS_USE_POLL
+# undef ERTS_POLL_USE_POLL_EVS
+# define ERTS_POLL_USE_POLL_EVS 1
+#elif !defined(__WIN32__)
+# undef ERTS_POLL_USE_SELECT_EVS
+# define ERTS_POLL_USE_SELECT_EVS 1
#endif
-#if ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
+typedef struct kevent ErtsPollResFd;
+
+#define ERTS_POLL_RES_GET_FD(evt) ((ErtsSysFdType)((evt)->ident))
+#define ERTS_POLL_RES_SET_FD(evt, fd) (evt)->ident = fd
+#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((ErtsPollEvents)(evt)->udata)
+#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->udata = (void*)(UWord)(ERTS_POLL_EV_E2N(evts))
+
+#endif
+#if ERTS_POLL_USE_POLL_EVS
+ /* --- poll -------------------------------- */
#include <poll.h>
#define ERTS_POLL_EV_NKP_E2N(EV) \
@@ -176,7 +236,7 @@ struct erts_sys_fd_type {
#define ERTS_POLL_EV_NKP_NVAL ERTS_POLL_EV_N2E(POLLNVAL)
#define ERTS_POLL_EV_NKP_ERR ERTS_POLL_EV_N2E(POLLERR|POLLHUP)
-#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
+#elif ERTS_POLL_USE_SELECT_EVS /* --- select ------------------------------ */
#define ERTS_POLL_EV_NKP_E2N(EV) (EV)
#define ERTS_POLL_EV_NKP_N2E(EV) (EV)
@@ -202,69 +262,65 @@ struct erts_sys_fd_type {
#endif
-typedef struct ErtsPollSet_ *ErtsPollSet;
-
-typedef struct {
- ErtsSysFdType fd;
- ErtsPollEvents events;
- int on;
-} ErtsPollControlEntry;
+#if !ERTS_ENABLE_KERNEL_POLL
-typedef struct {
+typedef struct _ErtsPollResFd {
ErtsSysFdType fd;
ErtsPollEvents events;
} ErtsPollResFd;
+#define ERTS_POLL_RES_GET_FD(evt) (evt)->fd
+#define ERTS_POLL_RES_SET_FD(evt, ident) (evt)->fd = (ident)
+#define ERTS_POLL_RES_GET_EVTS(evt) ERTS_POLL_EV_N2E((evt)->events)
+#define ERTS_POLL_RES_SET_EVTS(evt, evts) (evt)->events = ERTS_POLL_EV_E2N(evts)
+
+#endif
+
+#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))
+
+#define ev2str(ev) \
+ (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \
+ ((ev) == ERTS_POLL_EV_IN ? "IN" : \
+ ((ev) == ERTS_POLL_EV_OUT ? "OUT" : \
+ ((ev) == (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) ? "IN|OUT" : \
+ ((ev) & ERTS_POLL_EV_ERR ? "ERR" : \
+ ((ev) & ERTS_POLL_EV_NVAL ? "NVAL" : "OTHER"))))))
+
+
+typedef struct ERTS_POLL_EXPORT(erts_pollset) ErtsPollSet;
+
typedef struct {
char *primary;
- char *fallback;
char *kernel_poll;
Uint memory_size;
- int poll_set_size;
- int fallback_poll_set_size;
+ Uint poll_set_size;
int lazy_updates;
- int pending_updates;
+ Uint pending_updates;
int batch_updates;
int concurrent_updates;
- int max_fds;
-#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
- long no_avoided_wakeups;
- long no_avoided_interrupts;
- long no_interrupt_timed;
-#endif
+ int is_fallback;
+ Uint max_fds;
+ Uint active_fds;
+ Uint poll_threads;
} ErtsPollInfo;
-#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
-void ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet);
+#if defined(ERTS_POLL_USE_FALLBACK) && ERTS_KERNEL_POLL_VERSION
+# undef ERTS_POLL_EXPORT
+# define ERTS_POLL_EXPORT(FUNC) FUNC ## _flbk
+# include "erl_poll_api.h"
+# undef ERTS_POLL_EXPORT
+# define ERTS_POLL_EXPORT(FUNC) FUNC
+#elif !defined(ERTS_POLL_USE_FALLBACK)
+# define ERTS_POLL_USE_FALLBACK 0
#endif
-void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet,
- int);
-void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet,
- int,
- erts_short_time_t);
-ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet,
- ErtsSysFdType,
- ErtsPollEvents,
- int on,
- int* wake_poller
- );
-void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet,
- ErtsPollControlEntry [],
- int on);
-int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet,
- ErtsPollResFd [],
- int *,
- SysTimeval *);
-int ERTS_POLL_EXPORT(erts_poll_max_fds)(void);
-void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet,
- ErtsPollInfo *);
-ErtsPollSet ERTS_POLL_EXPORT(erts_poll_create_pollset)(void);
-void ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet);
-void ERTS_POLL_EXPORT(erts_poll_init)(void);
-void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet,
- ErtsPollEvents [],
- int);
-
-int ERTS_POLL_EXPORT(erts_poll_get_table_len)(int);
+
+#include "erl_poll_api.h"
+
+/**
+ * Get the next size of the array that holds the file descriptors.
+ * This function is used in order for the check io array and the
+ * pollset array to be of the same size.
+ */
+int erts_poll_new_table_len(int old_len, int need_len);
#endif /* #ifndef ERL_POLL_H__ */
diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h
new file mode 100644
index 0000000000..04beb37d1c
--- /dev/null
+++ b/erts/emulator/sys/common/erl_poll_api.h
@@ -0,0 +1,122 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+/**
+ * @description: Poll interface functions
+ * @author Lukas Larsson
+ *
+ * The functions in the header are used to interact with the poll
+ * implementation. Iff the kernel-poll implementation needs a fallback
+ * pollset, then all functions are exported twice. Once with a _flbk
+ * suffix and once without any suffix. If no fallback is needed, then
+ * only the non-suffix version is exported.
+ */
+
+/**
+ * Initialize the poll implementation. Has to be called before any other function.
+ * @param[out] concurrent_waiters if not NULL, set to 1 if more then one thread
+ * is allowed to wait in the pollsets at the same time.
+ */
+void ERTS_POLL_EXPORT(erts_poll_init)(int *concurrent_waiters);
+/**
+ * @brief Create a new pollset.
+ * @param id The unique debug id of this pollset.
+ */
+ErtsPollSet *ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id);
+
+/**
+ * Modify the contents of a pollset. This function can be called while one
+ * (or possibly more) thread is waiting in the pollset.
+ *
+ * @param ps the pollset to modify
+ * @param fd the file descriptor to modify
+ * @param op the type of operation to do. Normal usage is ADD,MOD...MOD,DEL.
+ * @param evts the events that we are changing interest to. Ignored if op is DEL.
+ * @param[in] wake_poller if set to 1 any thread waiting in the pollset will be woken.
+ * This parameter is ignored if the pollset supports concurrent waiters.
+ * @param[out] wake_poller set to 1 if the waiting thread was woken.
+ * @return The events set, or ERTS_POLL_EV_NVAL if it was not possible to add the
+ * fd to the pollset.
+ */
+ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
+ ErtsSysFdType fd,
+ ErtsPollOp op,
+ ErtsPollEvents evts,
+ int *wake_poller);
+
+/**
+ * Wait for events to be ready in the pollset. If the erts_poll_init call
+ * set concurrent_waiters to 1, then multiple threads are allowed to call
+ * this function at the same time.
+ *
+ * When an event has been triggered on a fd, that event is disabled. To
+ * re-enable it the implementation has to call erts_poll_control again.
+ *
+ * @param ps the pollset to wait for events in
+ * @param res an array of fd results that the ready fds are put in.
+ * @param[in] length the length of the res array
+ * @param[out] length the number of ready events returned in res
+ * @return 0 on success, else the ERRNO of the error that happened.
+ */
+int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
+ ErtsPollResFd res[],
+ int *length);
+/**
+ * Interrupt the thread waiting in the pollset. This function should be called
+ * with set = 0 before any thread calls erts_poll_wait in order to clear any
+ * interrupts that have happened while the thread was awake.
+ *
+ * This function has no effect on pollsets that support concurrent waiters.
+ *
+ * @param ps the pollset to wake
+ * @param set if 1, interrupt the pollset, if 0 clear the interrupt flag.
+ */
+void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set);
+
+/* Debug functions */
+
+/**
+ * Get the maximum number of fds supported by the pollset
+ */
+int ERTS_POLL_EXPORT(erts_poll_max_fds)(void);
+/**
+ * Get information about the given pollset
+ */
+void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps,
+ ErtsPollInfo *info);
+/**
+ * Get information about which events are currently selected.
+ *
+ * The unix fd is used to index into the array, so naturally this function does
+ * not work on windows. If the pollset cannot figure out what the selected
+ * events for a given fd is, it is set to ERTS_POLL_EV_NONE.
+ *
+ * @param ps the pollset to get events from
+ * @param evts an array of which events are selected on.
+ */
+void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
+ ErtsPollEvents evts[],
+ int length);
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
+/**
+ * Enable lock counting of any locks within the pollset.
+ */
+void ERTS_POLL_EXPORT(erts_lcnt_enable_pollset_lock_count)(ErtsPollSet *, int enable);
+#endif
diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c
index e3ba741058..420138ff0a 100644
--- a/erts/emulator/sys/common/erl_sys_common_misc.c
+++ b/erts/emulator/sys/common/erl_sys_common_misc.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/common/erl_util_queue.h b/erts/emulator/sys/common/erl_util_queue.h
index 47925e2264..73293e0225 100644
--- a/erts/emulator/sys/common/erl_util_queue.h
+++ b/erts/emulator/sys/common/erl_util_queue.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2013. All Rights Reserved.
+ * Copyright Ericsson AB 2013-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/