diff options
Diffstat (limited to 'erts/emulator/sys/common')
-rw-r--r-- | erts/emulator/sys/common/erl_check_io.c | 616 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_check_io.h | 24 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_mmap.h | 57 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_osenv.c | 24 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_poll.c | 567 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_poll.h | 14 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_poll_api.h | 6 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_sys_common_misc.c | 3 |
8 files changed, 938 insertions, 373 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 3e77dce1cd..fb18c837ab 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -44,13 +44,13 @@ #include "erl_time.h" #if 0 -#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) +#define DEBUG_PRINT(FMT, ...) do { erts_printf(FMT "\r\n", ##__VA_ARGS__); fflush(stdout); } while(0) #define DEBUG_PRINT_FD(FMT, STATE, ...) \ - DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ - (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR) + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) #define DEBUG_PRINT_MODE #else #define DEBUG_PRINT(...) @@ -76,22 +76,40 @@ typedef enum { typedef enum { ERTS_EV_FLAG_CLEAR = 0, ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ -#ifdef ERTS_ENABLE_KERNEL_POLL - ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ +#else + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, +#endif +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd and it was put in the nkp version */ #else ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, #endif /* Combinations */ - ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER } EventStateFlags; #define flag2str(flags) \ ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ - ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR")))) + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) /* How many events that can be handled at once by one erts_poll_wait call */ #define ERTS_CHECK_IO_POLL_RES_LEN 512 @@ -105,6 +123,7 @@ typedef struct erts_poll_thread { ErtsPollSet *ps; ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; int pollres_len; } ErtsPollThread; @@ -112,10 +131,13 @@ typedef struct erts_poll_thread * Which pollset to use is determined by hashing the fd. */ static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; #if ERTS_POLL_USE_FALLBACK static ErtsPollSet *flbk_pollset; #endif -static ErtsPollThread *psiv; +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; +#endif typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -130,10 +152,12 @@ typedef struct { ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ } stop; } driver; - ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents events; /* The events that have been selected upon */ ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; struct drv_ev_state_shared { @@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd) #if ERTS_POLL_USE_FALLBACK static ERTS_INLINE ErtsPollSet * -get_fallback(void) +get_fallback_pollset(void) { return flbk_pollset; } #endif +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; +#else + return get_pollset(fd); +#endif +} + /* * Place a fd within a pollset. This will automatically use * the fallback ps if needed. @@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); if (!(flags & ERTS_EV_FLAG_FALLBACK)) { - res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + } #if ERTS_POLL_USE_FALLBACK if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { /* When an add fails with NVAL, the poll/kevent operation could not put that fd in the pollset, so we instead put it into a fallback pollset */ state->flags |= ERTS_EV_FLAG_FALLBACK; - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); } } else { ASSERT(op != ERTS_POLL_OP_ADD); - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); #endif } @@ -425,59 +468,78 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); ErtsSysFdType fd = itp->fd; erts_mtx_t *mtx = fd_mtx(fd); - int active_events; + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; ErtsDrvEventState *state; ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + erts_mtx_lock(mtx); state = get_drv_ev_state(fd); + reset_handle(pthp); + active_events = state->active_events; - switch (type) { - case ERTS_PORT_TASK_INPUT: + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: - DEBUG_PRINT_FD("executed ready_input", state); + DEBUG_PRINT_FD("executed ready_output", state); - ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); - if (state->events & ERTS_POLL_EV_IN) - active_events |= ERTS_POLL_EV_IN; - break; - case ERTS_PORT_TASK_OUTPUT: + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } - DEBUG_PRINT_FD("executed ready_output", state); + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } - ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); - if (state->events & ERTS_POLL_EV_OUT) - active_events |= ERTS_POLL_EV_OUT; - break; - default: - erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); - break; + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + active_events = 0; + } } - reset_handle(pthp); - - if (active_events) { - /* This is not needed if active_events has not changed */ - if (state->active_events != active_events) { - ErtsPollEvents new_events; - state->active_events = active_events; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events); - - /* We were unable to re-insert the fd into the pollset, signal the callback. */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (active_events & ERTS_POLL_EV_IN) - iready(state->driver.select->inport, state); - if (active_events & ERTS_POLL_EV_OUT) - oready(state->driver.select->outport, state); - state->active_events = 0; - } - } - } else { + if (!active_events) check_fd_cleanup(state, &free_select, &free_nif); - } erts_mtx_unlock(mtx); @@ -485,6 +547,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, free_drv_select_data(free_select); if (free_nif) free_nif_select_data(free_nif); + + ERTS_MSACC_POP_STATE_M_X(); } static ERTS_INLINE void @@ -528,6 +592,96 @@ abort_tasks(ErtsDrvEventState *state, int mode) } } +static void prepare_select_msg(struct erts_nif_select_event* e, + enum ErlNifSelectFlags mode, + Eterm recipient, + ErtsResource* resource, + Eterm msg, + ErlNifEnv* msg_env, + Eterm event_atom) +{ + ErtsMessage* mp; + Eterm* hp; + Uint hsz; + + if (is_not_nil(e->pid)) { + ASSERT(e->mp); + erts_cleanup_messages(e->mp); + } + + if (mode & ERL_NIF_SELECT_CUSTOM_MSG) { + if (msg_env) { + mp = erts_create_message_from_nif_env(msg_env); + ERL_MESSAGE_TERM(mp) = msg; + } + else { + hsz = size_object(msg); + mp = erts_alloc_message(hsz, &hp); + ERL_MESSAGE_TERM(mp) = copy_struct(msg, hsz, &hp, &mp->hfrag.off_heap); + } + } + else { + ErtsBinary* bin; + Eterm resource_term, ref_term, tuple; + Eterm* hp_start; + + /* {select, Resource, Ref, EventAtom} */ + hsz = 5 + ERTS_MAGIC_REF_THING_SIZE; + if (is_internal_ref(msg)) + hsz += ERTS_REF_THING_SIZE; + else + ASSERT(is_immed(msg)); + + mp = erts_alloc_message(hsz, &hp); + hp_start = hp; + + bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); + resource_term = erts_mk_magic_ref(&hp, &mp->hfrag.off_heap, &bin->binary); + if (is_internal_ref(msg)) { + Uint32* refn = internal_ref_numbers(msg); + write_ref_thing(hp, refn[0], refn[1], refn[2]); + ref_term = make_internal_ref(hp); + hp += ERTS_REF_THING_SIZE; + } + else { + ASSERT(is_immed(msg)); + ref_term = msg; + } + tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom); + hp += 5; + ERL_MESSAGE_TERM(mp) = tuple; + ASSERT(hp == hp_start + hsz); (void)hp_start; + } + + ASSERT(is_not_nil(recipient)); + e->pid = recipient; + e->mp = mp; +} + +static ERTS_INLINE void send_select_msg(struct erts_nif_select_event* e) +{ + Process* rp = erts_proc_lookup(e->pid); + + ASSERT(is_internal_pid(e->pid)); + if (!rp) { + erts_cleanup_messages(e->mp); + return; + } + + erts_queue_message(rp, 0, e->mp, ERL_MESSAGE_TERM(e->mp), am_system); +} + +static void clear_select_event(struct erts_nif_select_event* e) +{ + if (is_not_nil(e->pid)) { + /* Discard unsent message */ + ASSERT(e->mp); + erts_cleanup_messages(e->mp); + e->mp = NULL; + e->pid = NIL; + } +} + static void deselect(ErtsDrvEventState *state, int mode) { @@ -558,8 +712,8 @@ deselect(ErtsDrvEventState *state, int mode) erts_io_control(state, ERTS_POLL_OP_DEL, 0); switch (state->type) { case ERTS_EV_TYPE_NIF: - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; + clear_select_event(&state->driver.nif->in); + clear_select_event(&state->driver.nif->out); enif_release_resource(state->driver.stop.resource->data); state->driver.stop.resource = NULL; break; @@ -689,6 +843,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) ret = 0; goto done_unknown; } + /* For some reason (don't know why), we do not clean all + events when doing ERL_DRV_USE_NO_CALLBACK. */ else if ((mode&ERL_DRV_USE_NO_CALLBACK) == ERL_DRV_USE) { mode |= (ERL_DRV_READ | ERL_DRV_WRITE); } @@ -755,11 +911,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { ctl_op = ERTS_POLL_OP_ADD; } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } else { ctl_events &= old_events; state->events &= ~ctl_events; state->active_events &= ~ctl_events; + new_events = state->active_events; + + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } if (!state->events) { if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) @@ -770,7 +937,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { new_events = erts_io_control_wakeup(state, ctl_op, - state->active_events, + new_events, &wake_poller); ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); @@ -802,6 +969,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events & ERTS_POLL_EV_IN) { abort_tasks(state, ERL_DRV_READ); state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; } if (ctl_events & ERTS_POLL_EV_OUT) { abort_tasks(state, ERL_DRV_WRITE); @@ -810,6 +978,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (state->events == 0) { if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); state->flags = 0; } /*else keep it, as fd will probably be selected upon again */ @@ -866,12 +1036,21 @@ done_unknown: } int -enif_select(ErlNifEnv* env, - ErlNifEvent e, - enum ErlNifSelectFlags mode, - void* obj, - const ErlNifPid* pid, - Eterm ref) +enif_select(ErlNifEnv* env, ErlNifEvent e, enum ErlNifSelectFlags mode, + void* obj, const ErlNifPid* pid, Eterm msg) +{ + return enif_select_x(env, e, mode, obj, pid, msg, NULL); +} + + +int +enif_select_x(ErlNifEnv* env, + ErlNifEvent e, + enum ErlNifSelectFlags mode, + void* obj, + const ErlNifPid* pid, + Eterm msg, + ErlNifEnv* msg_env) { int on; ErtsResource* resource = DATA_TO_RESOURCE(obj); @@ -885,11 +1064,11 @@ enif_select(ErlNifEnv* env, ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; - ASSERT(!resource->monitors); + ASSERT(!erts_dbg_is_resource_dying(resource)); #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS if (!grow_drv_ev_state(fd)) { - if (fd > 0) nif_select_large_fd_error(fd, mode, resource, ref); + if (fd > 0) nif_select_large_fd_error(fd, mode, resource, msg); return INT_MIN | ERL_NIF_SELECT_INVALID_EVENT; } #endif @@ -916,7 +1095,7 @@ enif_select(ErlNifEnv* env, ctl_op = ERTS_POLL_OP_DEL; } else { - on = 1; + on = !(mode & ERL_NIF_SELECT_CANCEL); ASSERT(mode); if (mode & ERL_DRV_READ) { ctl_events |= ERTS_POLL_EV_IN; @@ -935,21 +1114,21 @@ enif_select(ErlNifEnv* env, * Changing process and/or ref is ok (I think?). */ if (state->driver.stop.resource != resource) - nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, ref); + nif_select_steal(state, ERL_DRV_READ | ERL_DRV_WRITE, resource, msg); break; case ERTS_EV_TYPE_DRV_SEL: - nif_select_steal(state, mode, resource, ref); + nif_select_steal(state, mode, resource, msg); break; case ERTS_EV_TYPE_STOP_USE: { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_nif_select_op(dsbufp, fd, mode, resource, ref); + print_nif_select_op(dsbufp, fd, mode, resource, msg); steal_pending_stop_use(dsbufp, ERTS_INVALID_ERL_DRV_PORT, state, mode, on); ASSERT(state->type == ERTS_EV_TYPE_NONE); break; } case ERTS_EV_TYPE_STOP_NIF: { erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); - print_nif_select_op(dsbufp, fd, mode, resource, ref); + print_nif_select_op(dsbufp, fd, mode, resource, msg); steal_pending_stop_nif(dsbufp, resource, state, mode, on); if (state->type == ERTS_EV_TYPE_STOP_NIF) { ret = ERL_NIF_SELECT_STOP_SCHEDULED; /* ?? */ @@ -1005,7 +1184,7 @@ enif_select(ErlNifEnv* env, if (on) { const Eterm recipient = pid ? pid->pid : env->proc->common.id; - Uint32* refn; + ASSERT(is_internal_pid(recipient)); if (!state->driver.nif) state->driver.nif = alloc_nif_select_data(); if (state->type == ERTS_EV_TYPE_NONE) { @@ -1016,64 +1195,62 @@ enif_select(ErlNifEnv* env, ASSERT(state->type == ERTS_EV_TYPE_NIF); ASSERT(state->driver.stop.resource == resource); if (mode & ERL_DRV_READ) { - state->driver.nif->in.pid = recipient; - if (is_immed(ref)) { - state->driver.nif->in.immed = ref; - } else { - ASSERT(is_internal_ref(ref)); - refn = internal_ref_numbers(ref); - state->driver.nif->in.immed = THE_NON_VALUE; - sys_memcpy(state->driver.nif->in.refn, refn, - sizeof(state->driver.nif->in.refn)); - } + prepare_select_msg(&state->driver.nif->in, mode, recipient, + resource, msg, msg_env, am_ready_input); + msg_env = NULL; } if (mode & ERL_DRV_WRITE) { - state->driver.nif->out.pid = recipient; - if (is_immed(ref)) { - state->driver.nif->out.immed = ref; - } else { - ASSERT(is_internal_ref(ref)); - refn = internal_ref_numbers(ref); - state->driver.nif->out.immed = THE_NON_VALUE; - sys_memcpy(state->driver.nif->out.refn, refn, - sizeof(state->driver.nif->out.refn)); - } + prepare_select_msg(&state->driver.nif->out, mode, recipient, + resource, msg, msg_env, am_ready_output); } ret = 0; } else { /* off */ + ret = 0; if (state->type == ERTS_EV_TYPE_NIF) { - state->driver.nif->in.pid = NIL; - state->driver.nif->out.pid = NIL; - } - ASSERT(state->events==0); - if (!wake_poller) { - /* - * Safe to close fd now as it is not in pollset - * or there was no need to eject fd (kernel poll) - */ - if (state->type == ERTS_EV_TYPE_NIF) { - ASSERT(state->driver.stop.resource == resource); - call_stop = CALL_STOP_AND_RELEASE; - state->driver.stop.resource = NULL; + if (mode & ERL_NIF_SELECT_READ + && is_not_nil(state->driver.nif->in.pid)) { + clear_select_event(&state->driver.nif->in); + ret |= ERL_NIF_SELECT_READ_CANCELLED; } - else { - ASSERT(!state->driver.stop.resource); - call_stop = CALL_STOP; + if (mode & ERL_NIF_SELECT_WRITE + && is_not_nil(state->driver.nif->out.pid)) { + clear_select_event(&state->driver.nif->out); + ret |= ERL_NIF_SELECT_WRITE_CANCELLED; } - state->type = ERTS_EV_TYPE_NONE; - ret = ERL_NIF_SELECT_STOP_CALLED; } - else { - /* Not safe to close fd, postpone stop_select callback. */ - if (state->type == ERTS_EV_TYPE_NONE) { - ASSERT(!state->driver.stop.resource); - state->driver.stop.resource = resource; - enif_keep_resource(resource); + if (mode & ERL_NIF_SELECT_STOP) { + ASSERT(state->events==0); + if (!wake_poller) { + /* + * Safe to close fd now as it is not in pollset + * or there was no need to eject fd (kernel poll) + */ + if (state->type == ERTS_EV_TYPE_NIF) { + ASSERT(state->driver.stop.resource == resource); + call_stop = CALL_STOP_AND_RELEASE; + state->driver.stop.resource = NULL; + } + else { + ASSERT(!state->driver.stop.resource); + call_stop = CALL_STOP; + } + state->type = ERTS_EV_TYPE_NONE; + ret |= ERL_NIF_SELECT_STOP_CALLED; + } + else { + /* Not safe to close fd, postpone stop_select callback. */ + if (state->type == ERTS_EV_TYPE_NONE) { + ASSERT(!state->driver.stop.resource); + state->driver.stop.resource = resource; + enif_keep_resource(resource); + } + state->type = ERTS_EV_TYPE_STOP_NIF; + ret |= ERL_NIF_SELECT_STOP_SCHEDULED; } - state->type = ERTS_EV_TYPE_STOP_NIF; - ret = ERL_NIF_SELECT_STOP_SCHEDULED; } + else + ASSERT(mode & ERL_NIF_SELECT_CANCEL); } done: @@ -1251,7 +1428,8 @@ print_nif_select_op(erts_dsprintf_buf_t *dsbufp, (int) fd, mode & ERL_NIF_SELECT_READ ? " READ" : "", mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "", - mode & ERL_NIF_SELECT_STOP ? " STOP" : "", + (mode & ERL_NIF_SELECT_STOP ? " STOP" + : (mode & ERL_NIF_SELECT_CANCEL ? " CANCEL" : "")), resource->type->module, resource->type->name, ref); @@ -1426,7 +1604,8 @@ iready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); } else { DEBUG_PRINT_FD("schedule ready_input(%T, %d)", @@ -1444,7 +1623,8 @@ oready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + 0) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); } else { DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); @@ -1452,54 +1632,6 @@ oready(Eterm id, ErtsDrvEventState *state) } } -static ERTS_INLINE void -send_event_tuple(struct erts_nif_select_event* e, ErtsResource* resource, - Eterm event_atom) -{ - Process* rp = erts_proc_lookup(e->pid); - ErtsProcLocks rp_locks = 0; - ErtsMessage* mp; - ErlOffHeap* ohp; - ErtsBinary* bin; - Eterm* hp; - Uint hsz; - Eterm resource_term, ref_term, tuple; - - if (!rp) { - return; - } - - bin = ERTS_MAGIC_BIN_FROM_UNALIGNED_DATA(resource); - - /* {select, Resource, Ref, EventAtom} */ - if (is_value(e->immed)) { - hsz = 5 + ERTS_MAGIC_REF_THING_SIZE; - } - else { - hsz = 5 + ERTS_MAGIC_REF_THING_SIZE + ERTS_REF_THING_SIZE; - } - - mp = erts_alloc_message_heap(rp, &rp_locks, hsz, &hp, &ohp); - - resource_term = erts_mk_magic_ref(&hp, ohp, &bin->binary); - if (is_value(e->immed)) { - ASSERT(is_immed(e->immed)); - ref_term = e->immed; - } - else { - write_ref_thing(hp, e->refn[0], e->refn[1], e->refn[2]); - ref_term = make_internal_ref(hp); - hp += ERTS_REF_THING_SIZE; - } - tuple = TUPLE4(hp, am_select, resource_term, ref_term, event_atom); - - ERL_MESSAGE_TOKEN(mp) = am_undefined; - erts_queue_message(rp, rp_locks, mp, tuple, am_system); - - if (rp_locks) - erts_proc_unlock(rp, rp_locks); -} - static void bad_fd_in_pollset(ErtsDrvEventState *, Eterm inport, Eterm outport); void @@ -1507,7 +1639,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) { if (psi) { #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { erts_poll_interrupt_flbk(psi->ps, set); return; } @@ -1517,12 +1649,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) } ErtsPollThread * -erts_create_pollset_thread(int id) { +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; return psiv+id; } void -erts_check_io(ErtsPollThread *psi) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { int pollres_len; int poll_ret, i; @@ -1537,14 +1670,14 @@ erts_check_io(ErtsPollThread *psi) pollres_len = psi->pollres_len; #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { - poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } else #endif { - poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } #ifdef ERTS_ENABLE_LOCK_CHECK @@ -1580,7 +1713,12 @@ erts_check_io(ErtsPollThread *psi) ErtsNifSelectDataState *free_nif = NULL; ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; - ErtsPollEvents revents; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); + + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; erts_mtx_lock(fd_mtx(fd)); @@ -1591,8 +1729,6 @@ erts_check_io(ErtsPollThread *psi) continue; } - revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); if (revents & ERTS_POLL_EV_ERR) { @@ -1604,25 +1740,39 @@ erts_check_io(ErtsPollThread *psi) */ revents = state->active_events; state->active_events = 0; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } } else { /* Disregard any events that are not active at the moment, for instance this could happen if the driver/nif does select/deselect in rapid succession. */ revents &= state->active_events | ERTS_POLL_EV_NVAL; - state->active_events &= ~revents; - /* Reactivate the poll op if there are still active events */ - if (state->active_events) { - ErtsPollEvents new_events; - DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events)); + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + reactive_events = state->active_events; - /* Unable to re-enable the fd, signal all callbacks */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - revents |= state->active_events; - state->active_events = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; + + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } } } } @@ -1655,7 +1805,6 @@ erts_check_io(ErtsPollThread *psi) case ERTS_EV_TYPE_NIF: { /* Requested via enif_select()... */ struct erts_nif_select_event in = {NIL}; struct erts_nif_select_event out = {NIL}; - ErtsResource* resource = NULL; if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { @@ -1663,6 +1812,7 @@ erts_check_io(ErtsPollThread *psi) out = state->driver.nif->out; resource = state->driver.stop.resource; state->driver.nif->out.pid = NIL; + state->driver.nif->out.mp = NULL; } } if (revents & ERTS_POLL_EV_IN) { @@ -1670,6 +1820,7 @@ erts_check_io(ErtsPollThread *psi) in = state->driver.nif->in; resource = state->driver.stop.resource; state->driver.nif->in.pid = NIL; + state->driver.nif->in.mp = NULL; } } state->events &= ~revents; @@ -1682,10 +1833,10 @@ erts_check_io(ErtsPollThread *psi) erts_mtx_unlock(fd_mtx(fd)); if (is_not_nil(in.pid)) { - send_event_tuple(&in, resource, am_ready_input); + send_select_msg(&in); } if (is_not_nil(out.pid)) { - send_event_tuple(&out, resource, am_ready_output); + send_select_msg(&out); } continue; } @@ -1698,7 +1849,7 @@ erts_check_io(ErtsPollThread *psi) case ERTS_EV_TYPE_STOP_USE: { #if ERTS_POLL_USE_FALLBACK - ASSERT(psi->ps == get_fallback()); + ASSERT(psi->ps == get_fallback_pollset()); #endif drv_ptr = state->driver.stop.drv_ptr; state->type = ERTS_EV_TYPE_NONE; @@ -2036,12 +2187,17 @@ erts_init_check_io(int *argc, char **argv) for (j=0; j < erts_no_pollsets; j++) pollsetv[j] = erts_poll_create_pollset(j); -#if ERTS_POLL_USE_FALLBACK - flbk_pollset = erts_poll_create_pollset_flbk(-1); + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; #endif - no_poll_threads = erts_no_poll_threads; #if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); no_poll_threads++; #endif @@ -2051,7 +2207,15 @@ erts_init_check_io(int *argc, char **argv) psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); - psiv[0].ps = get_fallback(); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); psiv++; #endif @@ -2108,7 +2272,12 @@ erts_check_io_size(void) int i; #if ERTS_POLL_USE_FALLBACK - erts_poll_info(get_fallback(), &pi); + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); res += pi.memory_size; #endif @@ -2140,14 +2309,22 @@ erts_check_io_info(void *proc) Uint sz, *szp, *hp, **hpp; ErtsPollInfo *piv; Sint i, j = 0, len; - int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); #if ERTS_POLL_USE_FALLBACK - erts_poll_info_flbk(get_fallback(), &piv[0]); - piv[0].poll_threads = 1; + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 0; + piv[0].active_fds = 0; + piv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); + piv[0].poll_threads = 0; piv[0].active_fds = 0; piv++; #endif @@ -2200,6 +2377,7 @@ erts_check_io_info(void *proc) sz = 0; piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; bld_it: @@ -2304,15 +2482,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) static ERTS_INLINE void print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) { - const char* delim = ""; - if(f & ERTS_EV_FLAG_USED) { - erts_dsprintf(dsbufp, "%s","USED"); - delim = "|"; - } - if(f & ERTS_EV_FLAG_FALLBACK) { - erts_dsprintf(dsbufp, "%s%s", delim, "FLBK"); - delim = "|"; - } + erts_dsprintf(dsbufp, "%s", flag2str(f)); } #ifdef DEBUG_PRINT_MODE @@ -2324,6 +2494,10 @@ drvmode2str(int mode) { case ERL_DRV_WRITE|ERL_DRV_USE: return "WRITE|USE"; case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE: return "READ|WRITE|USE"; case ERL_DRV_USE: return "USE"; + case ERL_DRV_READ|ERL_DRV_USE_NO_CALLBACK: return "READ|USE_NO_CB"; + case ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "WRITE|USE_NO_CB"; + case ERL_DRV_READ|ERL_DRV_WRITE|ERL_DRV_USE_NO_CALLBACK: return "READ|WRITE|USE_NO_CB"; + case ERL_DRV_USE_NO_CALLBACK: return "USE_NO_CB"; case ERL_DRV_READ: return "READ"; case ERL_DRV_WRITE: return "WRITE"; case ERL_DRV_READ|ERL_DRV_WRITE: return "READ|WRITE"; @@ -2333,10 +2507,16 @@ drvmode2str(int mode) { static ERTS_INLINE char * nifmode2str(enum ErlNifSelectFlags mode) { + if (mode & ERL_NIF_SELECT_STOP) + return "STOP"; switch (mode) { case ERL_NIF_SELECT_READ: return "READ"; case ERL_NIF_SELECT_WRITE: return "WRITE"; - case ERL_NIF_SELECT_STOP: return "STOP"; + case ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: return "READ|WRITE"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ: return "CANCEL|READ"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_WRITE: return "CANCEL|WRITE"; + case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: + return "CANCEL|READ|WRITE"; default: return "UNKNOWN"; } } @@ -2654,13 +2834,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) #if ERTS_POLL_USE_FALLBACK erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); - erts_poll_get_selected_events_flbk(get_fallback(), counters.epep, + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } #endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); for (i = 0; i < erts_no_pollsets; i++) { @@ -2669,8 +2862,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) - && get_pollset_id(fd) == i) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } } for (fd = len ; fd < drv_ev_state.max_fds; fd++) { @@ -2717,7 +2917,7 @@ void erts_lcnt_update_cio_locks(int enable) { #endif #if ERTS_POLL_USE_FALLBACK - erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable); + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); #endif for (i = 0; i < erts_no_pollsets; i++) diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 443ef1264c..0f3fc4f7a2 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -68,7 +68,7 @@ int erts_check_io_max_files(void); * * @param pt the poll thread structure to use. */ -void erts_check_io(struct erts_poll_thread *pt); +void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time); /** * Initialize the check io framework. This function will parse the arguments * and delete any entries that it is interested in. @@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); /** * Create a new poll thread structure that is associated with the number no. * It is the callers responsibility that no is unique. + * + * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler + * @param tpd the thread progress data of the pollset thread */ -struct erts_poll_thread* erts_create_pollset_thread(int no); +struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd); #ifdef ERTS_ENABLE_LOCK_COUNT /** * Toggle lock counting on all check io locks @@ -126,16 +129,6 @@ extern int erts_no_poll_threads; #include "erl_poll.h" #include "erl_port_task.h" -#ifdef __WIN32__ -/* - * Current erts_poll implementation for Windows cannot handle - * active events in the set of events polled. - */ -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#endif - typedef struct { Eterm inport; Eterm outport; @@ -145,12 +138,7 @@ typedef struct { struct erts_nif_select_event { Eterm pid; - Eterm immed; - Uint32 refn[ERTS_REF_NUMBERS]; - Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress - * 1: Do deselect before next poll - * >1: Countdown of ignored events - */ + ErtsMessage *mp; }; typedef struct { diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 539daea419..3085bf7e19 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -176,4 +176,61 @@ void hard_dbg_remove_mseg(void* seg, UWord sz); #endif /* HAVE_ERTS_MMAP */ +/* Marks the given memory region as unused without freeing it, letting the OS + * reclaim its physical memory with the promise that we'll get it back (without + * its contents) the next time it's accessed. */ +ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size); + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +#ifdef VALGRIND + #include <valgrind/memcheck.h> + + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); + } +#elif defined(DEBUG) + /* Try to provoke crashes by filling the discard region with garbage. It's + * extremely hard to find bugs where we've discarded too much, as the + * region often retains its old contents if it's accessed before the OS + * reclaims it. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + static const char pattern[] = "DISCARDED"; + char *data; + int i; + + for(i = 0, data = ptr; i < size; i++) { + data[i] = pattern[i % sizeof(pattern)]; + } + } +#elif defined(HAVE_SYS_MMAN_H) && !(defined(__sun) || defined(__sun__)) + #include <sys/mman.h> + + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + #ifdef MADV_FREE + /* This is preferred as it doesn't necessarily free the pages right + * away, which is a bit faster than MADV_DONTNEED. */ + madvise(ptr, size, MADV_FREE); + #else + madvise(ptr, size, MADV_DONTNEED); + #endif + } +#elif defined(_WIN32) + #include <winbase.h> + + /* MEM_RESET is defined on all supported versions of Windows, and has the + * same semantics as MADV_FREE. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE); + } +#else + /* Dummy implementation. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + (void)ptr; + (void)size; + } +#endif + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif /* ERL_MMAP_H__ */ diff --git a/erts/emulator/sys/common/erl_osenv.c b/erts/emulator/sys/common/erl_osenv.c index 9f54d1dff0..f055c5f854 100644 --- a/erts/emulator/sys/common/erl_osenv.c +++ b/erts/emulator/sys/common/erl_osenv.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2017. All Rights Reserved. + * Copyright Ericsson AB 2017-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -75,7 +75,15 @@ static int compare_env_keys(const erts_osenv_data_t a, const erts_osenv_data_t b #include "erl_rbtree.h" static int compare_env_keys(const erts_osenv_data_t a, const erts_osenv_data_t b) { - int relation = sys_memcmp(a.data, b.data, MIN(a.length, b.length)); + int relation; + +#ifdef __WIN32__ + /* Environment variables are case-insensitive on Windows. */ + relation = _wcsnicmp((const WCHAR*)a.data, (const WCHAR*)b.data, + MIN(a.length, b.length) / sizeof(WCHAR)); +#else + relation = sys_memcmp(a.data, b.data, MIN(a.length, b.length)); +#endif if(relation != 0) { return relation; @@ -159,9 +167,10 @@ void erts_osenv_init(erts_osenv_t *env) { env->tree = NULL; } -static void destroy_foreach(env_rbtnode_t *node, void *_state) { +static int destroy_foreach(env_rbtnode_t *node, void *_state, Sint reds) { erts_free(ERTS_ALC_T_ENVIRONMENT, node); (void)_state; + return 1; } void erts_osenv_clear(erts_osenv_t *env) { @@ -174,7 +183,7 @@ struct __env_merge { erts_osenv_t *env; }; -static void merge_foreach(env_rbtnode_t *node, void *_state) { +static int merge_foreach(env_rbtnode_t *node, void *_state, Sint reds) { struct __env_merge *state = (struct __env_merge*)(_state); env_rbtnode_t *existing_node; @@ -183,6 +192,7 @@ static void merge_foreach(env_rbtnode_t *node, void *_state) { if(existing_node == NULL || state->overwrite_existing) { erts_osenv_put_native(state->env, &node->key, &node->value); } + return 1; } void erts_osenv_merge(erts_osenv_t *env, const erts_osenv_t *with, int overwrite) { @@ -200,7 +210,7 @@ struct __env_foreach_term { void *user_state; }; -static void foreach_term_wrapper(env_rbtnode_t *node, void *_state) { +static int foreach_term_wrapper(env_rbtnode_t *node, void *_state, Sint reds) { struct __env_foreach_term *state = (struct __env_foreach_term*)_state; Eterm key, value; @@ -210,6 +220,7 @@ static void foreach_term_wrapper(env_rbtnode_t *node, void *_state) { node->value.length, (byte*)node->value.data); state->user_callback(state->process, state->user_state, key, value); + return 1; } void erts_osenv_foreach_term(const erts_osenv_t *env, struct process *process, @@ -306,10 +317,11 @@ struct __env_foreach_native { void *user_state; }; -static void foreach_native_wrapper(env_rbtnode_t *node, void *_state) { +static int foreach_native_wrapper(env_rbtnode_t *node, void *_state, Sint reds) { struct __env_foreach_native *state = (struct __env_foreach_native*)_state; state->user_callback(state->user_state, &node->key, &node->value); + return 1; } void erts_osenv_foreach_native(const erts_osenv_t *env, void *state, diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index 70b5532af9..1b125056f5 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -75,6 +75,7 @@ # define WANT_NONBLOCKING #endif +#include "erl_thr_progress.h" #include "erl_poll.h" #if ERTS_POLL_USE_KQUEUE # include <sys/types.h> @@ -95,7 +96,6 @@ # include <limits.h> # endif #endif -#include "erl_thr_progress.h" #include "erl_driver.h" #include "erl_alloc.h" #include "erl_msacc.h" @@ -121,7 +121,8 @@ /* Define to print info about modifications done to each fd */ #define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) /* Define to print entry and exit from erts_poll_wait (can be very spammy) */ -//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0) #else #define ERTS_POLL_DEBUG_PRINT 0 @@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, #define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE) +#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0) #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; + int oneshot; #endif /* ERTS_POLL_USE_KERNEL_POLL */ #if ERTS_POLL_USE_POLL @@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) { ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; erts_atomic32_t have_update_requests; erts_mtx_t mtx; - erts_atomic32_t wakeup_state; +#else + int do_wakeup; #endif -#if ERTS_POLL_USE_WAKEUP_PIPE - int wake_fds[2]; +#if ERTS_POLL_USE_TIMERFD + int timer_fd; #endif + ErtsMonotonicTime timeout_time; + erts_atomic32_t wakeup_state; + int wake_fds[2]; }; void erts_silence_warn_unused_result(long unused); @@ -365,63 +371,47 @@ static void print_misc_debug_info(void); uint32_t epoll_events(int kp_fd, int fd); #endif - #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 -#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void reset_wakeup_state(ErtsPollSet *ps) { erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } -#endif static ERTS_INLINE int is_woken(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; -#else - return 0; -#endif } static ERTS_INLINE int is_interrupted_reset(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); -#else - return 0; -#endif } static ERTS_INLINE void woke_up(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); -#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ -#if ERTS_POLL_USE_WAKEUP_PIPE - static ERTS_INLINE void wake_poller(ErtsPollSet *ps, int interrupted) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE int wake; erts_aint32_t wakeup_state; if (!interrupted) @@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted) wake = wakeup_state == ERTS_POLL_NOT_WOKEN; if (wake) -#endif { ssize_t res; + DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted); if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ do { @@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps) fd, erl_errno_id(errno), errno); } -#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (intr) erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); -#endif } static void @@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps) ps->wake_fds[1] = wake_fds[1]; } +/* + * --- timer fd ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_TIMERFD + +/* We use the timerfd when using epoll_wait to get high accuracy + timeouts, i.e. we want to sleep with < ms accuracy. */ + +static void +create_timerfd(ErtsPollSet *ps) +{ + int do_wake = 0; + int timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + ERTS_POLL_EXPORT(erts_poll_control)(ps, + timer_fd, + ERTS_POLL_OP_ADD, + ERTS_POLL_EV_IN, + &do_wake); + if (ps->internal_fd_limit <= timer_fd) + ps->internal_fd_limit = timer_fd + 1; + ps->timer_fd = timer_fd; +} + +static ERTS_INLINE void +timerfd_set(ErtsPollSet *ps, struct itimerspec *its) +{ +#ifdef DEBUG + struct itimerspec old_its; + int res; + res = timerfd_settime(ps->timer_fd, 0, its, &old_its); + ASSERT(res == 0); + ASSERT(old_its.it_interval.tv_sec == 0 && + old_its.it_interval.tv_nsec == 0 && + old_its.it_value.tv_sec == 0 && + old_its.it_value.tv_nsec == 0); + +#else + timerfd_settime(ps->timer_fd, 0, its, NULL); #endif +} + +static ERTS_INLINE int +timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) { + + struct itimerspec its; + /* we always have to clear the timer */ + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 0; + timerfd_settime(ps->timer_fd, 0, &its, NULL); + + /* only timeout fd triggered */ + if (res == 1 && pr[0].data.fd == ps->timer_fd) + return 0; + + return res; +} + +#endif /* ERTS_POLL_USE_TIMERFD */ /* * --- Poll set update requests ---------------------------------------------- @@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) struct epoll_event epe_templ; struct epoll_event epe; - epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT; + epe_templ.events = ERTS_POLL_EV_E2N(events); epe_templ.data.fd = fd; + if (ps->oneshot) + epe_templ.events |= EPOLLONESHOT; + #ifdef VALGRIND /* Silence invalid valgrind warning ... */ memset((void *) &epe.data, 0, sizeof(epoll_data_t)); @@ -802,9 +853,27 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) int res = 0, len = 0; struct kevent evts[2]; struct timespec ts = {0, 0}; + uint32_t oneshot = 0; + + if (op == ERTS_POLL_OP_ADD) { + /* This is a hack to make the "noshell" option work; kqueue can poll + * these fds but will not report EV_EOF, so we return NVAL to use the + * fallback instead. + * + * This may be common to all pipes but we have no way to tell whether + * an fd is a pipe or not. */ + switch (fd) { + case STDIN_FILENO: + case STDOUT_FILENO: + case STDERR_FILENO: + return ERTS_POLL_EV_NVAL; + default: + break; + } + } -#if defined(EV_DISPATCH) && !defined(__OpenBSD__) - /* If we have EV_DISPATCH we use it, unless we are on OpenBSD as the +#if defined(EV_DISPATCH) && !(defined(__OpenBSD__) || defined(__NetBSD__)) + /* If we have EV_DISPATCH we use it, unless we are on OpenBSD/NetBSD as the behavior of EV_EOF seems to be edge triggered there and we need it to be level triggered. @@ -823,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) man page), but it seems to be the way it works... */ + if (ps->oneshot) + oneshot = EV_DISPATCH; + if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); /* We could probably skip this delete, do we want to? */ @@ -832,35 +904,55 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) uint32_t flags; erts_atomic_inc_nob(&ps->no_of_user_fds); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } else { uint32_t flags; ASSERT(op == ERTS_POLL_OP_MOD); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } #else - uint32_t flags = EV_ADD|EV_ONESHOT; + uint32_t flags = EV_ADD|EV_ENABLE; + + if (ps->oneshot) flags |= EV_ONESHOT; if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); /* We don't do anything when a delete is issued. The fds will be removed when they are triggered, or when they are closed. */ - events = 0; + if (ps->oneshot) + events = 0; + else { + flags = EV_DELETE; + events = ERTS_POLL_EV_IN; + } } else if (op == ERTS_POLL_OP_ADD) { erts_atomic_inc_nob(&ps->no_of_user_fds); + /* Only allow EV_IN in non-oneshot poll-sets */ + ASSERT(ps->oneshot || events == ERTS_POLL_EV_IN); + } else if (!ps->oneshot) { + ASSERT(op == ERTS_POLL_OP_MOD); + /* If we are not oneshot and do a mod we should disable the FD. + We assume that it is only the read side that is active as + currently only read is selected upon in the non-oneshot + poll-sets. */ + if (!events) + flags = EV_DISABLE; + else + flags = EV_ENABLE; + events = ERTS_POLL_EV_IN; } if (events & ERTS_POLL_EV_IN) { @@ -886,13 +978,15 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); for (i = 0; i < len; i++) { const char *flags = "UNKNOWN"; - if (evts[i].flags == EV_DELETE) flags = "EV_DELETE"; + if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE"; + if (evts[i].flags == (EV_ADD)) flags = "EV_ADD"; if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; + if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; #ifdef EV_DISPATCH if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; - if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; - if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE"; if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; #endif @@ -1256,11 +1350,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, goto done; } #endif -#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } #endif } @@ -1316,11 +1414,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ERTS_POLLSET_UNLOCK(ps); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (*do_wake) { + if (*do_wake) wake_poller(ps, 0); - } -#endif return res; } @@ -1334,52 +1429,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, static ERTS_INLINE int ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; int res = n; -#if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; -#endif - for (i = 0; i < n; i++) { - int fd = ERTS_POLL_RES_GET_FD(&pr[i]); -#ifdef DEBUG_PRINT_MODE - ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); -#endif + if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) { + + for (i = 0; i < n; i++) { + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#if ERTS_POLL_DEBUG_PRINT + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); - DEBUG_PRINT_FD("trig %s (%s)", ps, fd, - ev2str(evts), + if (fd != wake_fd +#if ERTS_POLL_USE_TIMERFD + && fd != ps->timer_fd +#endif + ) + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - "kqueue" + "kqueue" #elif ERTS_POLL_USE_EPOLL - "epoll" + "epoll" #else - "/dev/poll" + "/dev/poll" +#endif + ); #endif - ); -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); - if (n == 1) - return 0; - } + if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } +#if ERTS_POLL_USE_TIMERFD + else if (fd == ps->timer_fd) { + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } #endif #if !ERTS_POLL_USE_CONCURRENT_UPDATE - else { - /* Reset the events to emulate ONESHOT semantics */ - ps->fds_status[fd].events = 0; - enqueue_update_request(ps, fd); - } + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } #endif + } } - return res; -#else - ASSERT(chk_fds_res <= max_res); - return chk_fds_res; -#endif + if (res == 0) + return res; + else + return n; } #else /* !ERTS_POLL_USE_KERNEL_POLL */ @@ -1560,19 +1664,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, #endif /* !ERTS_POLL_USE_KERNEL_POLL */ +static ERTS_INLINE ErtsMonotonicTime +get_timeout(ErtsPollSet *ps, + int resolution, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout; + + if (timeout_time == ERTS_POLL_NO_TIMEOUT) { + timeout = 0; + } + else if (timeout_time == ERTS_POLL_INF_TIMEOUT) { + timeout = -1; + } + else { + ErtsMonotonicTime diff_time, current_time; + current_time = erts_get_monotonic_time(NULL); + diff_time = timeout_time - current_time; + if (diff_time <= 0) { + timeout = 0; + } + else { + switch (resolution) { + case 1000: + /* Round up to nearest even milli second */ + timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; + if (timeout > (ErtsMonotonicTime) INT_MAX) + timeout = (ErtsMonotonicTime) INT_MAX; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); + break; + case 1000000: + /* Round up to nearest even micro second */ + timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); + break; + case 1000000000: + /* Round up to nearest even nano second */ + timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); + break; + default: + ERTS_INTERNAL_ERROR("Invalid resolution"); + timeout = 0; + break; + } + } + } + return timeout; +} + +#if ERTS_POLL_USE_SELECT + +static ERTS_INLINE int +get_timeout_timeval(ErtsPollSet *ps, + SysTimeval *tvp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000, + timeout_time); + + if (!timeout) { + tvp->tv_sec = 0; + tvp->tv_usec = 0; + + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000); + tvp->tv_sec = sec; + tvp->tv_usec = timeout - sec*(1000*1000); + + ASSERT(tvp->tv_sec >= 0); + ASSERT(tvp->tv_usec >= 0); + ASSERT(tvp->tv_usec < 1000*1000); + + return 1; + } + +} + +#endif + +#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_timespec(ErtsPollSet *ps, + struct timespec *tsp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000*1000, + timeout_time); + + if (!timeout) { + tsp->tv_sec = 0; + tsp->tv_nsec = 0; + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000*1000); + tsp->tv_sec = sec; + tsp->tv_nsec = timeout - sec*(1000*1000*1000); + + ASSERT(tsp->tv_sec >= 0); + ASSERT(tsp->tv_nsec >= 0); + ASSERT(tsp->tv_nsec < 1000*1000*1000); + + return 1; + } +} + +#endif + +#if ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_itimerspec(ErtsPollSet *ps, + struct itimerspec *itsp, + ErtsMonotonicTime timeout_time) +{ + + itsp->it_interval.tv_sec = 0; + itsp->it_interval.tv_nsec = 0; + + return get_timeout_timespec(ps, &itsp->it_value, timeout_time); +} + +#endif + static ERTS_INLINE int -check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time) { int res; - int timeout = do_wait ? -1 : 0; - DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait); + int timeout; + DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time); { #if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#if ERTS_POLL_USE_TIMERFD + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout > 0) { + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, pr, max_res, -1); + res = timerfd_clear(ps, pr, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + } +#else /* !ERTS_POLL_USE_TIMERFD */ + timeout = (int) get_timeout(ps, 1000, timeout_time); res = epoll_wait(ps->kp_fd, pr, max_res, timeout); - +#endif /* !ERTS_POLL_USE_TIMERFD */ #elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts = {0, 0}; - struct timespec *tsp = timeout ? NULL : &ts; + struct timespec ts; + struct timespec *tsp; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + tsp = timeout < 0 ? NULL : &ts; res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); #elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ /* @@ -1584,16 +1837,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; poll_res.dp_nfds = nfds < max_res ? nfds : max_res; poll_res.dp_fds = pr; - poll_res.dp_timeout = timeout; + poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time); res = ioctl(ps->kp_fd, DP_POLL, &poll_res); - +#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ + struct timespec ts; + struct timespec *tsp = &ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + if (timeout < 0) tsp = NULL; + res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - + timeout = (int) get_timeout(ps, 1000, timeout_time); res = poll(ps->poll_fds, ps->no_poll_fds, timeout); - #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval tv = {0, 0}; - SysTimeval *tvp = timeout ? NULL : &tv; + SysTimeval tv; + SysTimeval *tvp; + timeout = get_timeout_timeval(ps, &tv, timeout_time); + tvp = timeout < 0 ? NULL : &tv; ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); @@ -1612,7 +1871,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time) { int res, no_fds, used_fds = 0; int ebadf = 0; @@ -1637,61 +1898,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, } #endif - do_wait = !is_woken(ps) && used_fds == 0; + do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT; DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); if (do_wait) { - erts_thr_progress_prepare_wait(NULL); + tpd = tpd ? tpd : erts_thr_prgr_data(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); - } + } else + timeout_time = ERTS_POLL_NO_TIMEOUT; while (1) { - res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds); + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time); + if (res != 0) + break; + if (timeout_time == ERTS_POLL_NO_TIMEOUT) + break; + if (erts_get_monotonic_time(NULL) >= timeout_time) + break; + } #if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. This behaviour should only - * happen when using SELECT as the polling mechanism. - */ - ERTS_POLLSET_LOCK(ps); - used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); - if (used_fds == no_fds) { - *len = used_fds; - ERTS_POLLSET_UNLOCK(ps); - return 0; - } - res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds); - /* Keep the lock over the non-blocking poll in order to not - get any nasty races happening. */ + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; ERTS_POLLSET_UNLOCK(ps); - if (res == 0) { - errno = EAGAIN; - res = -1; - } + return 0; + } + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; } -#endif - - if (res != 0) - break; - if (!do_wait) - break; } +#endif if (do_wait) { - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_UPDATE_CACHE(); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } - woke_up(ps); + if (ERTS_POLL_USE_WAKEUP(ps)) + woke_up(ps); if (res < 0) { #if ERTS_POLL_USE_SELECT @@ -1702,11 +1967,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, #endif res = errno; } - else { + else if (res == 0) { + res = used_fds == 0 ? ETIMEDOUT : 0; +#ifdef HARD_DEBUG + check_poll_result(pr, used_fds); +#endif + *len = used_fds; + } else { #if ERTS_POLL_USE_SELECT save_results: #endif - ps_locked = 1; ERTS_POLLSET_LOCK(ps); @@ -1736,12 +2006,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (!set) - reset_wakeup_state(ps); - else - wake_poller(ps, 1); -#endif + DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set); + if (ERTS_POLL_USE_WAKEUP(ps)) { + if (!set) + reset_wakeup_state(ps); + else + wake_poller(ps, 1); + } } int @@ -1857,10 +2128,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; + if (ps->id == -1) + ps->oneshot = 0; + else + ps->oneshot = 1; #endif -#if !ERTS_POLL_USE_CONCURRENT_UPDATE + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); create_wakeup_pipe(ps); + +#if ERTS_POLL_USE_TIMERFD + create_timerfd(ps); +#endif + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE handle_update_requests(ps, NULL, 0); cleanup_wakeup_pipe(ps); #endif @@ -1975,9 +2256,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) pip->memory_size = size; pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE pip->poll_set_size++; /* Wakeup pipe */ -#endif pip->lazy_updates = #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -2064,6 +2343,7 @@ uint32_t epoll_events(int kp_fd, int fd) { /* For epoll we read the information about what is selected upon from the proc fs.*/ char fname[30]; + char s[256]; FILE *f; unsigned int pos, flags, mnt_id; int line = 0; @@ -2081,16 +2361,17 @@ uint32_t epoll_events(int kp_fd, int fd) } if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); line += 3; - while (!feof(f)) { + while (fgets(s, sizeof(s) / sizeof(*s), f)) { /* tfd: 10 events: 40000019 data: 180000000a */ int ev_fd; uint32_t events; uint64_t data; - if (fscanf(f,"tfd:%d events:%x data:%llx\n", &ev_fd, &events, + if (sscanf(s,"tfd:%d events:%x data:%llx", &ev_fd, &events, (unsigned long long*)&data) != 3) { fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", fname, line, errno); + fclose(f); return 0; } if (fd == ev_fd) { @@ -2130,6 +2411,7 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, /* For epoll we read the information about what is selected upon from the proc fs.*/ char fname[30]; + char s[256]; FILE *f; unsigned int pos, flags, mnt_id; int line = 0; @@ -2144,22 +2426,30 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, if (fscanf(f,"pos:\t%x\nflags:\t%x", &pos, &flags) != 2) { fprintf(stderr,"failed to parse file %s, errno = %d\n", fname, errno); ASSERT(0); + fclose(f); return; } if (fscanf(f,"\nmnt_id:\t%x\n", &mnt_id)); line += 3; - while (!feof(f)) { + while (fgets(s, sizeof(s) / sizeof(*s), f)) { /* tfd: 10 events: 40000019 data: 180000000a */ int fd; uint32_t events; uint64_t data; - if (fscanf(f,"tfd:%d events:%x data:%llx\n", &fd, &events, + if (sscanf(s,"tfd:%d events:%x data:%llx", &fd, &events, (unsigned long long*)&data) != 3) { fprintf(stderr,"failed to parse file %s on line %d, errno = %d\n", fname, line, errno); ASSERT(0); + fclose(f); return; } + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + continue; +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) + continue; +#endif data &= 0xFFFFFFFF; ASSERT(fd == data); /* Events are the events that are being monitored, which of course include @@ -2167,6 +2457,7 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ev[fd] = (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT) & ERTS_POLL_EV_N2E(events); line++; } + fclose(f); #else for (fd = 0; fd < len; fd++) ev[fd] = ERTS_POLL_EV_NONE; diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index e1cea7eb8b..d40dabc529 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -51,6 +51,7 @@ #include "sys.h" #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN +#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX #ifdef ERTS_ENABLE_KERNEL_POLL # undef ERTS_ENABLE_KERNEL_POLL @@ -130,6 +131,9 @@ #endif #define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10 +#define ERTS_POLL_USE_TIMERFD 0 typedef Uint32 ErtsPollEvents; @@ -156,6 +160,14 @@ typedef enum { #include <sys/epoll.h> +#if ERTS_POLL_USE_EPOLL +#ifdef HAVE_SYS_TIMERFD_H +#include <sys/timerfd.h> +#undef ERTS_POLL_USE_TIMERFD +#define ERTS_POLL_USE_TIMERFD 1 +#endif +#endif + #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ @@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd { #endif -#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)) +#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))) #define ev2str(ev) \ (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h index 1170a549b9..f3a91e54f7 100644 --- a/erts/emulator/sys/common/erl_poll_api.h +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, * @param res an array of fd results that the ready fds are put in. * @param[in] length the length of the res array * @param[out] length the number of ready events returned in res + * @param tpd the thread progress data to note sleep state in + * @param timeout_time the time in native to wake up at * @return 0 on success, else the ERRNO of the error that happened. */ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd res[], - int *length); + int *length, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time); /** * Interrupt the thread waiting in the pollset. This function should be called * with set = 0 before any thread calls erts_poll_wait in order to clear any diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 2541ab5d31..d34e1a9ec0 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -176,6 +176,7 @@ sys_double_to_chars_fast(double f, char *buffer, int buffer_size, int decimals, double af; Uint64 int_part, frac_part; int neg; + int has_decimals = decimals != 0; char *p = buffer; if (decimals < 0) @@ -257,7 +258,7 @@ sys_double_to_chars_fast(double f, char *buffer, int buffer_size, int decimals, } /* Delete trailing zeroes */ - if (compact) + if (compact && has_decimals) p = find_first_trailing_zero(p); *p = '\0'; return p - buffer; |