diff options
Diffstat (limited to 'erts/emulator/sys')
34 files changed, 9046 insertions, 1271 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index 853dd90c34..81cb5dc4bb 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -34,6 +34,7 @@ #endif #include "sys.h" #include "global.h" +#include "erl_port.h" #include "erl_check_io.h" #include "erl_thr_progress.h" #include "dtrace-wrapper.h" @@ -51,8 +52,17 @@ typedef char EventStateType; #define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */ typedef char EventStateFlags; -#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ +#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */ +#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2) +#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4) +#ifdef DEBUG +# define ERTS_ACTIVE_FD_INC 2 +#else +# define ERTS_ACTIVE_FD_INC 128 +#endif + +#define ERTS_CHECK_IO_POLL_RES_LEN 512 #if defined(ERTS_KERNEL_POLL_VERSION) # define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp @@ -66,6 +76,7 @@ typedef char EventStateFlags; (ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL) #define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control) +#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv) #define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait) #ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT #define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt) @@ -78,10 +89,19 @@ typedef char EventStateFlags; #define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init) #define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info) +#define GET_FD(fd) fd + static struct pollset_info { ErtsPollSet ps; erts_smp_atomic_t in_poll_wait; /* set while doing poll */ + struct { + int six; /* start index */ + int eix; /* end index */ + erts_smp_atomic32_t no; + int size; + ErtsSysFdType *array; + } active_fd; #ifdef ERTS_SMP struct removed_fd* removed_list; /* list of deselected fd's*/ erts_smp_spinlock_t removed_list_lock; @@ -94,9 +114,11 @@ typedef struct { SafeHashBucket hb; #endif ErtsSysFdType fd; - union { - ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ + struct { ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */ +#if ERTS_CIO_HAVE_DRV_EVENT + ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */ +#endif erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */ } driver; ErtsPollEvents events; @@ -166,6 +188,10 @@ static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd) ErtsDrvEventState tmpl; tmpl.fd = fd; tmpl.driver.select = NULL; +#if ERTS_CIO_HAVE_DRV_EVENT + tmpl.driver.event = NULL; +#endif + tmpl.driver.drv_ptr = NULL; tmpl.events = 0; tmpl.remove_cnt = 0; tmpl.type = ERTS_EV_TYPE_NONE; @@ -206,6 +232,65 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_F #endif static ERTS_INLINE void +init_iotask(ErtsIoTask *io_task) +{ + erts_port_task_handle_init(&io_task->task); + erts_smp_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0)); +} + +static ERTS_INLINE int +is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time) +{ + if (erts_port_task_is_scheduled(&io_task->task)) + return 1; + if (erts_smp_atomic_read_nob(&io_task->executed_time) == current_cio_time) + return 1; + return 0; +} + +static ERTS_INLINE ErtsDrvSelectDataState * +alloc_drv_select_data(void) +{ + ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, + sizeof(ErtsDrvSelectDataState)); + dsp->inport = NIL; + dsp->outport = NIL; + init_iotask(&dsp->iniotask); + init_iotask(&dsp->outiotask); + return dsp; +} + +static ERTS_INLINE void +free_drv_select_data(ErtsDrvSelectDataState *dsp) +{ + ASSERT(!erts_port_task_is_scheduled(&dsp->iniotask.task)); + ASSERT(!erts_port_task_is_scheduled(&dsp->outiotask.task)); + erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp); +} + +static ERTS_INLINE ErtsDrvEventDataState * +alloc_drv_event_data(void) +{ + ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, + sizeof(ErtsDrvEventDataState)); + dep->port = NIL; + dep->data = NULL; + dep->removed_events = 0; +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + dep->deferred_events = 0; +#endif + init_iotask(&dep->iotask); + return dep; +} + +static ERTS_INLINE void +free_drv_event_data(ErtsDrvEventDataState *dep) +{ + ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task)); + erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep); +} + +static ERTS_INLINE void remember_removed(ErtsDrvEventState *state, struct pollset_info* psi) { #ifdef ERTS_SMP @@ -285,7 +370,7 @@ forget_removed(struct pollset_info* psi) drv_ptr = state->driver.drv_ptr; ASSERT(drv_ptr); state->type = ERTS_EV_TYPE_NONE; - state->flags = 0; + state->flags &= ~ERTS_EV_FLAG_USED; state->driver.drv_ptr = NULL; /* Fall through */ case ERTS_EV_TYPE_NONE: @@ -342,6 +427,10 @@ grow_drv_ev_state(int min_ix) for (i = erts_smp_atomic_read_nob(&drv_ev_state_len); i < new_len; i++) { drv_ev_state[i].fd = (ErtsSysFdType) i; drv_ev_state[i].driver.select = NULL; +#if ERTS_CIO_HAVE_DRV_EVENT + drv_ev_state[i].driver.event = NULL; +#endif + drv_ev_state[i].driver.drv_ptr = NULL; drv_ev_state[i].events = 0; drv_ev_state[i].remove_cnt = 0; drv_ev_state[i].type = ERTS_EV_TYPE_NONE; @@ -362,11 +451,7 @@ grow_drv_ev_state(int min_ix) static ERTS_INLINE void abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type) { - if (is_nil(id)) { - ASSERT(type == ERTS_EV_TYPE_NONE - || !erts_port_task_is_scheduled(pthp)); - } - else if (erts_port_task_is_scheduled(pthp)) { + if (is_not_nil(id) && erts_port_task_is_scheduled(pthp)) { erts_port_task_abort(pthp); ASSERT(erts_is_port_alive(id)); } @@ -381,7 +466,7 @@ abort_tasks(ErtsDrvEventState *state, int mode) #if ERTS_CIO_HAVE_DRV_EVENT case ERTS_EV_TYPE_DRV_EV: abort_task(state->driver.event->port, - &state->driver.event->task, + &state->driver.event->iotask.task, ERTS_EV_TYPE_DRV_EV); return; #endif @@ -395,14 +480,14 @@ abort_tasks(ErtsDrvEventState *state, int mode) case ERL_DRV_WRITE: ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); abort_task(state->driver.select->outport, - &state->driver.select->outtask, + &state->driver.select->outiotask.task, state->type); if (mode == ERL_DRV_WRITE) break; case ERL_DRV_READ: ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); abort_task(state->driver.select->inport, - &state->driver.select->intask, + &state->driver.select->iniotask.task, state->type); break; default: @@ -440,16 +525,14 @@ deselect(ErtsDrvEventState *state, int mode) if (!(state->events)) { switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: - ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask)); - ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask)); - erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, - state->driver.select); + state->driver.select->inport = NIL; + state->driver.select->outport = NIL; break; #if ERTS_CIO_HAVE_DRV_EVENT case ERTS_EV_TYPE_DRV_EV: - ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task)); - erts_free(ERTS_ALC_T_DRV_EV_D_STATE, - state->driver.event); + state->driver.event->port = NIL; + state->driver.event->data = NULL; + state->driver.event->removed_events = (ErtsPollEvents) 0; break; #endif case ERTS_EV_TYPE_NONE: @@ -459,20 +542,297 @@ deselect(ErtsDrvEventState *state, int mode) break; } - state->driver.select = NULL; state->type = ERTS_EV_TYPE_NONE; - state->flags = 0; + state->flags &= ~ERTS_EV_FLAG_USED; remember_removed(state, &pollset); } } - #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS # define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0) #else # define IS_FD_UNKNOWN(state) ((state) == NULL) #endif +static ERTS_INLINE void +check_fd_cleanup(ErtsDrvEventState *state, +#if ERTS_CIO_HAVE_DRV_EVENT + ErtsDrvEventDataState **free_event, +#endif + ErtsDrvSelectDataState **free_select) +{ + erts_aint_t current_cio_time; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd))); + + current_cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); + *free_select = NULL; + if (state->driver.select + && (state->type != ERTS_EV_TYPE_DRV_SEL) + && !is_iotask_active(&state->driver.select->iniotask, current_cio_time) + && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { + + *free_select = state->driver.select; + state->driver.select = NULL; + } + +#if ERTS_CIO_HAVE_DRV_EVENT + *free_event = NULL; + if (state->driver.event + && (state->type != ERTS_EV_TYPE_DRV_EV) + && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) { + + *free_event = state->driver.event; + state->driver.event = NULL; + } +#endif + +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (((state->type != ERTS_EV_TYPE_NONE) + | state->remove_cnt +#if ERTS_CIO_HAVE_DRV_EVENT + | (state->driver.event != NULL) +#endif + | (state->driver.select != NULL)) == 0) { + + hash_erase_drv_ev_state(state); + + } +#endif +} + +static ERTS_INLINE int +check_cleanup_active_fd(ErtsSysFdType fd, +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + ErtsPollControlEntry *pce, + int *pce_ix, +#endif + erts_aint_t current_cio_time) +{ + ErtsDrvEventState *state; + int active = 0; + erts_smp_mtx_t *mtx = fd_mtx(fd); + void *free_select = NULL; +#if ERTS_CIO_HAVE_DRV_EVENT + void *free_event = NULL; +#endif +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + ErtsPollEvents evon = 0, evoff = 0; +#endif + + erts_smp_mtx_lock(mtx); + +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS + state = &drv_ev_state[(int) fd]; +#else + state = hash_get_drv_ev_state(fd); /* may be NULL! */ + if (state) +#endif + { + if (state->driver.select) { +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) { + active = 1; + if ((state->events & ERTS_POLL_EV_IN) + && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) { + evoff |= ERTS_POLL_EV_IN; + state->flags |= ERTS_EV_FLAG_DEFER_IN_EV; + } + } + else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) { + if (state->events & ERTS_POLL_EV_IN) + evon |= ERTS_POLL_EV_IN; + state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV; + } + if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) { + active = 1; + if ((state->events & ERTS_POLL_EV_OUT) + && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) { + evoff |= ERTS_POLL_EV_OUT; + state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV; + } + } + else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) { + if (state->events & ERTS_POLL_EV_OUT) + evon |= ERTS_POLL_EV_OUT; + state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV; + } + if (active) + (void) 0; + else +#else + if (is_iotask_active(&state->driver.select->iniotask, current_cio_time) + || is_iotask_active(&state->driver.select->outiotask, current_cio_time)) + active = 1; + else +#endif + if (state->type != ERTS_EV_TYPE_DRV_SEL) { + free_select = state->driver.select; + state->driver.select = NULL; + } + } + +#if ERTS_CIO_HAVE_DRV_EVENT + if (state->driver.event) { + if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) { +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events; + if (evs) { + evoff |= evs; + state->driver.event->deferred_events |= evs; + } +#endif + active = 1; + } + else if (state->type != ERTS_EV_TYPE_DRV_EV) { + free_event = state->driver.event; + state->driver.event = NULL; + } +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + else { + ErtsPollEvents evs = state->events & state->driver.event->deferred_events; + if (evs) { + evon |= evs; + state->driver.event->deferred_events = 0; + } + } +#endif + + } +#endif + +#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0) + hash_erase_drv_ev_state(state); +#endif + + } + + erts_smp_mtx_unlock(mtx); + + if (free_select) + free_drv_select_data(free_select); +#if ERTS_CIO_HAVE_DRV_EVENT + if (free_event) + free_drv_event_data(free_event); +#endif + +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + if (evoff) { + ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; + pcep->fd = fd; + pcep->events = evoff; + pcep->on = 0; + } + if (evon) { + ErtsPollControlEntry *pcep = &pce[(*pce_ix)++]; + pcep->fd = fd; + pcep->events = evon; + pcep->on = 1; + } +#endif + + return active; +} + +static void +check_cleanup_active_fds(erts_aint_t current_cio_time) +{ + int six = pollset.active_fd.six; + int eix = pollset.active_fd.eix; + erts_aint32_t no = erts_smp_atomic32_read_dirty(&pollset.active_fd.no); + int size = pollset.active_fd.size; + int ix = six; +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + /* every fd might add two entries */ + Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no; + ErtsPollControlEntry *pctrl_entries = (pce_sz + ? erts_alloc(ERTS_ALC_T_TMP, pce_sz) + : NULL); + int pctrl_ix = 0; +#endif + + while (ix != eix) { + ErtsSysFdType fd = pollset.active_fd.array[ix]; + int nix = ix + 1; + if (nix >= size) + nix = 0; + ASSERT(fd != ERTS_SYS_FD_INVALID); + if (!check_cleanup_active_fd(fd, +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + pctrl_entries, + &pctrl_ix, +#endif + current_cio_time)) { + no--; + if (ix == six) { +#ifdef DEBUG + pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID; +#endif + six = nix; + } + else { + pollset.active_fd.array[ix] = pollset.active_fd.array[six]; +#ifdef DEBUG + pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID; +#endif + six++; + if (six >= size) + six = 0; + } + } + ix = nix; + } + +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry)); + if (pctrl_ix) + ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix); + if (pctrl_entries) + erts_free(ERTS_ALC_T_TMP, pctrl_entries); +#endif + + pollset.active_fd.six = six; + pollset.active_fd.eix = eix; + erts_smp_atomic32_set_relb(&pollset.active_fd.no, no); +} + +static ERTS_INLINE void +add_active_fd(ErtsSysFdType fd) +{ + int eix = pollset.active_fd.eix; + int size = pollset.active_fd.size; + + + pollset.active_fd.array[eix] = fd; + + erts_smp_atomic32_set_relb(&pollset.active_fd.no, + (erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + + 1)); + + eix++; + if (eix >= size) + eix = 0; + if (pollset.active_fd.six == eix) { + pollset.active_fd.six = 0; + eix = size; + size += ERTS_ACTIVE_FD_INC; + pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR, + pollset.active_fd.array, + sizeof(ErtsSysFdType)*size); + pollset.active_fd.size = size; +#ifdef DEBUG + { + int i; + for (i = eix + 1; i < size; i++) + pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; + } +#endif + + } + + pollset.active_fd.eix = eix; +} int ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, @@ -489,6 +849,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, ErtsDrvEventState *state; int wake_poller; int ret; +#if ERTS_CIO_HAVE_DRV_EVENT + ErtsDrvEventDataState *free_event = NULL; +#endif + ErtsDrvSelectDataState *free_select = NULL; #ifdef USE_VM_PROBES DTRACE_CHARBUF(name, 64); #endif @@ -524,7 +888,8 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, /* fast track to stop_select callback */ stop_select_fn = prt->drv_ptr->stop_select; #ifdef USE_VM_PROBES - strncpy(name, prt->drv_ptr->name, sizeof(name)-1); + strncpy(name, prt->drv_ptr->name, + sizeof(DTRACE_CHARBUF_NAME(name))-1); name[sizeof(name)-1] = '\0'; #endif ret = 0; @@ -589,9 +954,9 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) { state->type = ERTS_EV_TYPE_NONE; - state->flags = 0; - erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select); - state->driver.select = NULL; + state->flags &= ~ERTS_EV_FLAG_USED; + state->driver.select->inport = NIL; + state->driver.select->outport = NIL; } ret = -1; goto done; @@ -609,18 +974,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, state->events = new_events; if (ctl_events) { if (on) { - if (state->type == ERTS_EV_TYPE_NONE) { - ErtsDrvSelectDataState *dsdsp - = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE, - sizeof(ErtsDrvSelectDataState)); - dsdsp->inport = NIL; - dsdsp->outport = NIL; - erts_port_task_handle_init(&dsdsp->intask); - erts_port_task_handle_init(&dsdsp->outtask); - ASSERT(state->driver.select == NULL); - state->driver.select = dsdsp; + if (!state->driver.select) + state->driver.select = alloc_drv_select_data(); + if (state->type == ERTS_EV_TYPE_NONE) state->type = ERTS_EV_TYPE_DRV_SEL; - } ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL); if (ctl_events & ERTS_POLL_EV_IN) state->driver.select->inport = id; @@ -641,17 +998,12 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, state->driver.select->outport = NIL; } if (new_events == 0) { - ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask)); - ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask)); if (old_events != 0) { remember_removed(state, &pollset); } if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { state->type = ERTS_EV_TYPE_NONE; - state->flags = 0; - erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, - state->driver.select); - state->driver.select = NULL; + state->flags &= ~ERTS_EV_FLAG_USED; } /*else keep it, as fd will probably be selected upon again */ } @@ -682,13 +1034,15 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix, ret = 0; -done:; -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) { - hash_erase_drv_ev_state(state); - } +done: + + check_fd_cleanup(state, +#if ERTS_CIO_HAVE_DRV_EVENT + &free_event, #endif -done_unknown: + &free_select); + +done_unknown: erts_smp_mtx_unlock(fd_mtx(fd)); if (stop_select_fn) { int was_unmasked = erts_block_fpe(); @@ -696,6 +1050,12 @@ done_unknown: (*stop_select_fn)(e, NULL); erts_unblock_fpe(was_unmasked); } + if (free_select) + free_drv_select_data(free_select); +#if ERTS_CIO_HAVE_DRV_EVENT + if (free_event) + free_drv_event_data(free_event); +#endif return ret; } @@ -715,6 +1075,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, ErtsDrvEventState *state; int do_wake = 0; int ret; +#if ERTS_CIO_HAVE_DRV_EVENT + ErtsDrvEventDataState *free_event; +#endif + ErtsDrvSelectDataState *free_select; Port *prt = erts_drvport2port(ix); if (prt == ERTS_INVALID_ERL_DRV_PORT) @@ -795,10 +1159,8 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, state->driver.event->removed_events |= remove_events; } else { - state->driver.event - = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE, - sizeof(ErtsDrvEventDataState)); - erts_port_task_handle_init(&state->driver.event->task); + if (!state->driver.event) + state->driver.event = alloc_drv_event_data(); state->driver.event->port = id; state->driver.event->removed_events = (ErtsPollEvents) 0; state->type = ERTS_EV_TYPE_DRV_EV; @@ -808,10 +1170,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, else { if (state->type == ERTS_EV_TYPE_DRV_EV) { abort_tasks(state, 0); - erts_free(ERTS_ALC_T_DRV_EV_D_STATE, - state->driver.event); + state->driver.event->port = NIL; + state->driver.event->data = NULL; + state->driver.event->removed_events = (ErtsPollEvents) 0; } - state->driver.select = NULL; state->type = ERTS_EV_TYPE_NONE; remember_removed(state, &pollset); } @@ -821,12 +1183,22 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix, ret = 0; done: -#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS - if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) { - hash_erase_drv_ev_state(state); - } + + check_fd_cleanup(state, +#if ERTS_CIO_HAVE_DRV_EVENT + &free_event, #endif + &free_select); + erts_smp_mtx_unlock(fd_mtx(fd)); + + if (free_select) + free_drv_select_data(free_select); +#if ERTS_CIO_HAVE_DRV_EVENT + if (free_event) + free_drv_event_data(free_event); +#endif + return ret; #endif } @@ -874,7 +1246,7 @@ need2steal(ErtsDrvEventState *state, int mode) static void print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) { - ErtsPortNames *pnp = erts_get_port_names(id); + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); if (!pnp->name && !pnp->driver_name) erts_dsprintf(dsbufp, "%s ", "<unknown>"); else { @@ -894,7 +1266,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id) static void steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) { - erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd); + erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd)); switch (state->type) { case ERTS_EV_TYPE_DRV_SEL: { int deselect_mode = 0; @@ -918,7 +1290,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) if (deselect_mode) deselect(state, deselect_mode); else { - erts_dsprintf(dsbufp, "no one", (int) state->fd); + erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd)); ASSERT(0); } erts_dsprintf(dsbufp, "\n"); @@ -946,7 +1318,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode) break; } default: - erts_dsprintf(dsbufp, "no one\n", (int) state->fd); + erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd)); ASSERT(0); } } @@ -957,10 +1329,14 @@ print_select_op(erts_dsprintf_buf_t *dsbufp, { Port *pp = erts_drvport2port(ix); erts_dsprintf(dsbufp, +#ifdef __OSE__ + "driver_select(%p, %d,%s%s%s%s | %d, %d) " +#else "driver_select(%p, %d,%s%s%s%s, %d) " +#endif "by ", ix, - (int) fd, + (int) GET_FD(fd), mode & ERL_DRV_READ ? " ERL_DRV_READ" : "", mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "", mode & ERL_DRV_USE ? " ERL_DRV_USE" : "", @@ -1010,7 +1386,7 @@ steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, ASSERT(state->type == ERTS_EV_TYPE_STOP_USE); erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select " "was called for driver %s\n", - (int) state->fd, state->driver.drv_ptr->name); + (int) GET_FD(state->fd), state->driver.drv_ptr->name); erts_send_error_to_logger_nogl(dsbufp); if (on) { @@ -1019,7 +1395,7 @@ steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix, * In either case stop_select should not be called. */ state->type = ERTS_EV_TYPE_NONE; - state->flags = 0; + state->flags &= ~ERTS_EV_FLAG_USED; if (state->driver.drv_ptr->handle) { erts_ddll_dereference_driver(state->driver.drv_ptr->handle); } @@ -1091,38 +1467,103 @@ event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data #endif #endif +static ERTS_INLINE int +io_task_schedule_allowed(ErtsDrvEventState *state, + ErtsPortTaskType type, + erts_aint_t current_cio_time) +{ + ErtsIoTask *io_task; + + switch (type) { + case ERTS_PORT_TASK_INPUT: + if (!state->driver.select) + return 0; +#if ERTS_CIO_HAVE_DRV_EVENT + if (state->driver.event) + return 0; +#endif + io_task = &state->driver.select->iniotask; + break; + case ERTS_PORT_TASK_OUTPUT: + if (!state->driver.select) + return 0; +#if ERTS_CIO_HAVE_DRV_EVENT + if (state->driver.event) + return 0; +#endif + io_task = &state->driver.select->outiotask; + break; +#if ERTS_CIO_HAVE_DRV_EVENT + case ERTS_PORT_TASK_EVENT: + if (!state->driver.event) + return 0; + if (state->driver.select) + return 0; + io_task = &state->driver.event->iotask; + break; +#endif + default: + ERTS_INTERNAL_ERROR("Invalid I/O-task type"); + return 0; + } + + return !is_iotask_active(io_task, current_cio_time); +} + static ERTS_INLINE void -iready(Eterm id, ErtsDrvEventState *state) +iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) { - if (erts_port_task_schedule(id, - &state->driver.select->intask, - ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { - stale_drv_select(id, state, ERL_DRV_READ); + if (io_task_schedule_allowed(state, + ERTS_PORT_TASK_INPUT, + current_cio_time)) { + ErtsIoTask *iotask = &state->driver.select->iniotask; + erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); + if (erts_port_task_schedule(id, + &iotask->task, + ERTS_PORT_TASK_INPUT, + (ErlDrvEvent) state->fd) != 0) { + stale_drv_select(id, state, ERL_DRV_READ); + } + add_active_fd(state->fd); } } static ERTS_INLINE void -oready(Eterm id, ErtsDrvEventState *state) +oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time) { - if (erts_port_task_schedule(id, - &state->driver.select->outtask, - ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { - stale_drv_select(id, state, ERL_DRV_WRITE); + if (io_task_schedule_allowed(state, + ERTS_PORT_TASK_OUTPUT, + current_cio_time)) { + ErtsIoTask *iotask = &state->driver.select->outiotask; + erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); + if (erts_port_task_schedule(id, + &iotask->task, + ERTS_PORT_TASK_OUTPUT, + (ErlDrvEvent) state->fd) != 0) { + stale_drv_select(id, state, ERL_DRV_WRITE); + } + add_active_fd(state->fd); } } #if ERTS_CIO_HAVE_DRV_EVENT static ERTS_INLINE void -eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data) +eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data, + erts_aint_t current_cio_time) { - if (erts_port_task_schedule(id, - &state->driver.event->task, - ERTS_PORT_TASK_EVENT, - (ErlDrvEvent) state->fd, - event_data) != 0) { - stale_drv_select(id, state, 0); + if (io_task_schedule_allowed(state, + ERTS_PORT_TASK_EVENT, + current_cio_time)) { + ErtsIoTask *iotask = &state->driver.event->iotask; + erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time); + if (erts_port_task_schedule(id, + &iotask->task, + ERTS_PORT_TASK_EVENT, + (ErlDrvEvent) state->fd, + event_data) != 0) { + stale_drv_select(id, state, 0); + } + add_active_fd(state->fd); } } #endif @@ -1153,10 +1594,11 @@ ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set, void ERTS_CIO_EXPORT(erts_check_io)(int do_wait) { - ErtsPollResFd pollres[256]; + ErtsPollResFd *pollres; int pollres_len; SysTimeval wait_time; int poll_ret, i; + erts_aint_t current_cio_time; restart: @@ -1173,10 +1615,24 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) wait_time.tv_usec = 0; } + /* + * No need for an atomic inc op when incrementing + * erts_check_io_time, since only one thread can + * check io at a time. + */ + current_cio_time = erts_smp_atomic_read_dirty(&erts_check_io_time); + current_cio_time++; + erts_smp_atomic_set_relb(&erts_check_io_time, current_cio_time); + + check_cleanup_active_fds(current_cio_time); + #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); /* No locks should be locked */ #endif - pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd); + + pollres_len = erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN; + + pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len); erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1); @@ -1196,6 +1652,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) if (poll_ret != 0) { erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); forget_removed(&pollset); + erts_free(ERTS_ALC_T_TMP, pollres); if (poll_ret == EAGAIN) { goto restart; } @@ -1255,15 +1712,15 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) if ((revents & ERTS_POLL_EV_IN) || (!(revents & ERTS_POLL_EV_OUT) && state->events & ERTS_POLL_EV_IN)) { - iready(state->driver.select->inport, state); + iready(state->driver.select->inport, state, current_cio_time); } else if (state->events & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state); + oready(state->driver.select->outport, state, current_cio_time); } } else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) { if (revents & ERTS_POLL_EV_OUT) { - oready(state->driver.select->outport, state); + oready(state->driver.select->outport, state, current_cio_time); } /* Someone might have deselected input since revents was read (true also on the non-smp emulator since @@ -1271,7 +1728,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) revents... */ revents &= ~(~state->events & ERTS_POLL_EV_IN); if (revents & ERTS_POLL_EV_IN) { - iready(state->driver.select->inport, state); + iready(state->driver.select->inport, state, current_cio_time); } } else if (revents & ERTS_POLL_EV_NVAL) { @@ -1279,6 +1736,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) state->driver.select->inport, state->driver.select->outport, state->events); + add_active_fd(state->fd); } break; } @@ -1296,8 +1754,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) if (revents) { event_data->events = state->events; event_data->revents = revents; - - eready(state->driver.event->port, state, event_data); + eready(state->driver.event->port, state, event_data, current_cio_time); } break; } @@ -1315,6 +1772,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) (int) state->type); ASSERT(0); deselect(state, 0); + add_active_fd(state->fd); break; } } @@ -1326,6 +1784,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait) } erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0); + erts_free(ERTS_ALC_T_TMP, pollres); forget_removed(&pollset); } @@ -1357,8 +1816,8 @@ bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, "Bad %s fd in erts_poll()! fd=%d, ", io_str, (int) state->fd); if (is_nil(port)) { - ErtsPortNames *ipnp = erts_get_port_names(inport); - ErtsPortNames *opnp = erts_get_port_names(outport); + ErtsPortNames *ipnp = erts_get_port_names(inport, ERTS_INVALID_ERL_DRV_PORT); + ErtsPortNames *opnp = erts_get_port_names(outport, ERTS_INVALID_ERL_DRV_PORT); erts_dsprintf(dsbufp, "ports=%T/%T, drivers=%s/%s, names=%s/%s\n", is_nil(inport) ? am_undefined : inport, is_nil(outport) ? am_undefined : outport, @@ -1370,7 +1829,7 @@ bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, erts_free_port_names(opnp); } else { - ErtsPortNames *pnp = erts_get_port_names(port); + ErtsPortNames *pnp = erts_get_port_names(port, ERTS_INVALID_ERL_DRV_PORT); erts_dsprintf(dsbufp, "port=%T, driver=%s, name=%s\n", is_nil(port) ? am_undefined : port, pnp->driver_name ? pnp->driver_name : "<unknown>", @@ -1390,11 +1849,31 @@ bad_fd_in_pollset(ErtsDrvEventState *state, Eterm inport, static void stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode) { - erts_stale_drv_select(id, (ErlDrvEvent) state->fd, mode, 0); + erts_stale_drv_select(id, ERTS_INVALID_ERL_DRV_PORT, (ErlDrvEvent) state->fd, mode, 0); deselect(state, mode); } #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS + +#ifdef __OSE__ +static SafeHashValue drv_ev_state_hash(void *des) +{ + ErtsSysFdType fd = ((ErtsDrvEventState *) des)->fd; + /* We use hash on signo ^ id in order for steal to happen when the + same signo + fd is selected on by two different ports */ + SafeHashValue val = (SafeHashValue)(fd->signo ^ fd->id); + return val ^ (val >> 8); +} + +static int drv_ev_state_cmp(void *des1, void *des2) +{ + ErtsSysFdType fd1 = ((ErtsDrvEventState *) des1)->fd; + ErtsSysFdType fd2 = ((ErtsDrvEventState *) des2)->fd; + if (fd1->signo == fd2->signo && fd1->id == fd2->id) + return 0; + return 1; +} +#else /* !__OSE__ && !ERTS_SYS_CONTINOUS_FD_NUMBERS i.e. probably windows */ static SafeHashValue drv_ev_state_hash(void *des) { SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd; @@ -1406,6 +1885,7 @@ static int drv_ev_state_cmp(void *des1, void *des2) return ( ((ErtsDrvEventState *) des1)->fd == ((ErtsDrvEventState *) des2)->fd ? 0 : 1); } +#endif static void *drv_ev_state_alloc(void *des_tmpl) { @@ -1440,10 +1920,27 @@ static void drv_ev_state_free(void *des) void ERTS_CIO_EXPORT(erts_init_check_io)(void) { + erts_smp_atomic_init_nob(&erts_check_io_time, 0); erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0); + ERTS_CIO_POLL_INIT(); pollset.ps = ERTS_CIO_NEW_POLLSET(); + pollset.active_fd.six = 0; + pollset.active_fd.eix = 0; + erts_smp_atomic32_init_nob(&pollset.active_fd.no, 0); + pollset.active_fd.size = ERTS_ACTIVE_FD_INC; + pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR, + sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC); +#ifdef DEBUG + { + int i; + for (i = 0; i < ERTS_ACTIVE_FD_INC; i++) + pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID; + } +#endif + + #ifdef ERTS_SMP init_removed_fd_alloc(); pollset.removed_list = NULL; @@ -1519,12 +2016,27 @@ Eterm ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) { Process *p = (Process *) proc; - Eterm tags[15], values[15], res; + Eterm tags[16], values[16], res; Uint sz, *szp, *hp, **hpp, memory_size; Sint i; ErtsPollInfo pi; - - ERTS_CIO_POLL_INFO(pollset.ps, &pi); + erts_aint_t cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time); + int active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); + + while (1) { + erts_aint_t post_cio_time; + int post_active_fds; + + ERTS_CIO_POLL_INFO(pollset.ps, &pi); + + post_cio_time = erts_smp_atomic_read_mb(&erts_check_io_time); + post_active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no); + if (cio_time == post_cio_time && active_fds == post_active_fds) + break; + cio_time = post_cio_time; + active_fds = post_active_fds; + } + memory_size = pi.memory_size; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len); @@ -1588,6 +2100,9 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc) tags[i] = erts_bld_atom(hpp, szp, "max_fds"); values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds); + tags[i] = erts_bld_atom(hpp, szp, "active_fds"); + values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds); + #ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups"); values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups); @@ -1642,6 +2157,8 @@ print_events(ErtsPollEvents ev) typedef struct { int used_fds; int num_errors; + int no_driver_select_structs; + int no_driver_event_structs; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS int internal_fds; ErtsPollEvents *epep; @@ -1664,6 +2181,13 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) struct stat stat_buf; #endif + if (state->driver.select) + counters->no_driver_select_structs++; +#if ERTS_CIO_HAVE_DRV_EVENT + if (state->driver.event) + counters->no_driver_event_structs++; +#endif + #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS if (state->events || ep_events) { if (ep_events & ERTS_POLL_EV_NVAL) { @@ -1774,7 +2298,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) err = 1; } else { - ErtsPortNames *pnp = erts_get_port_names(id); + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); erts_printf(" inport=%T inname=%s indrv=%s ", id, pnp->name ? pnp->name : "unknown", @@ -1791,7 +2315,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) err = 1; } else { - ErtsPortNames *pnp = erts_get_port_names(id); + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); erts_printf(" outport=%T outname=%s outdrv=%s ", id, pnp->name ? pnp->name : "unknown", @@ -1802,6 +2326,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) } } } +#if ERTS_CIO_HAVE_DRV_EVENT else if (state->type == ERTS_EV_TYPE_DRV_EV) { Eterm id; erts_printf("driver_event "); @@ -1827,7 +2352,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) err = 1; } else { - ErtsPortNames *pnp = erts_get_port_names(id); + ErtsPortNames *pnp = erts_get_port_names(id, ERTS_INVALID_ERL_DRV_PORT); erts_printf(" port=%T name=%s drv=%s ", id, pnp->name ? pnp->name : "unknown", @@ -1837,6 +2362,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) erts_free_port_names(pnp); } } +#endif #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS else if (internal) { erts_printf("internal "); @@ -1876,18 +2402,24 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters) } int -ERTS_CIO_EXPORT(erts_check_io_debug)(void) +ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip) { #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS int fd, len; #endif IterDebugCounters counters; +#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS ErtsDrvEventState null_des; null_des.driver.select = NULL; +#if ERTS_CIO_HAVE_DRV_EVENT + null_des.driver.event = NULL; +#endif + null_des.driver.drv_ptr = NULL; null_des.events = 0; null_des.remove_cnt = 0; null_des.type = ERTS_EV_TYPE_NONE; +#endif erts_printf("--- fds in pollset --------------------------------------\n"); @@ -1904,6 +2436,8 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void) #endif counters.used_fds = 0; counters.num_errors = 0; + counters.no_driver_select_structs = 0; + counters.no_driver_event_structs = 0; #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS len = erts_smp_atomic_read_nob(&drv_ev_state_len); @@ -1920,8 +2454,16 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void) erts_smp_thr_progress_unblock(); + ciodip->no_used_fds = counters.used_fds; + ciodip->no_driver_select_structs = counters.no_driver_select_structs; + ciodip->no_driver_event_structs = counters.no_driver_event_structs; + erts_printf("\n"); erts_printf("used fds=%d\n", counters.used_fds); + erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs); +#if ERTS_CIO_HAVE_DRV_EVENT + erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs); +#endif #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS erts_printf("internal fds=%d\n", counters.internal_fds); #endif @@ -1930,6 +2472,7 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void) #ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS erts_free(ERTS_ALC_T_TMP, (void *) counters.epep); #endif + return counters.num_errors; } diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index edab7947ba..d01297d55c 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -26,6 +26,7 @@ #ifndef ERL_CHECK_IO_H__ #define ERL_CHECK_IO_H__ +#include "sys.h" #include "erl_sys_driver.h" #ifdef ERTS_ENABLE_KERNEL_POLL @@ -52,8 +53,8 @@ void erts_check_io_kp(int); void erts_check_io_nkp(int); void erts_init_check_io_kp(void); void erts_init_check_io_nkp(void); -int erts_check_io_debug_kp(void); -int erts_check_io_debug_nkp(void); +int erts_check_io_debug_kp(ErtsCheckIoDebugInfo *); +int erts_check_io_debug_nkp(ErtsCheckIoDebugInfo *); #else /* !ERTS_ENABLE_KERNEL_POLL */ @@ -70,6 +71,27 @@ void erts_init_check_io(void); #endif +extern erts_smp_atomic_t erts_check_io_time; + +typedef struct { + ErtsPortTaskHandle task; + erts_smp_atomic_t executed_time; +} ErtsIoTask; + +ERTS_GLB_INLINE void erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp); + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +ERTS_GLB_INLINE void +erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp) +{ + ErtsIoTask *itp = (ErtsIoTask *) (((char *) pthp) - offsetof(ErtsIoTask, task)); + erts_aint_t ci_time = erts_smp_atomic_read_acqb(&erts_check_io_time); + erts_smp_atomic_set_relb(&itp->executed_time, ci_time); +} + +#endif + #endif /* ERL_CHECK_IO_H__ */ #if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__) @@ -81,6 +103,16 @@ void erts_init_check_io(void); #include "erl_poll.h" #include "erl_port_task.h" +#ifdef __WIN32__ +/* + * Current erts_poll implementation for Windows cannot handle + * active events in the set of events polled. + */ +# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 +#else +# define ERTS_CIO_DEFER_ACTIVE_EVENTS 0 +#endif + /* * ErtsDrvEventDataState is used by driver_event() which is almost never * used. We allocate ErtsDrvEventDataState separate since we dont wan't @@ -91,13 +123,16 @@ typedef struct { Eterm port; ErlDrvEventData data; ErtsPollEvents removed_events; - ErtsPortTaskHandle task; +#if ERTS_CIO_DEFER_ACTIVE_EVENTS + ErtsPollEvents deferred_events; +#endif + ErtsIoTask iotask; } ErtsDrvEventDataState; typedef struct { Eterm inport; Eterm outport; - ErtsPortTaskHandle intask; - ErtsPortTaskHandle outtask; + ErtsIoTask iniotask; + ErtsIoTask outiotask; } ErtsDrvSelectDataState; #endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */ diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c new file mode 100644 index 0000000000..3f6813e1a5 --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap.c @@ -0,0 +1,2832 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_process.h" +#include "erl_smp.h" +#include "atom.h" +#include "erl_mmap.h" +#include <stddef.h> + +/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */ + +#if defined(DEBUG) || 0 +# undef ERTS_MMAP_DEBUG +# define ERTS_MMAP_DEBUG +# ifndef ERTS_MMAP_OP_RINGBUF_SZ +# define ERTS_MMAP_OP_RINGBUF_SZ 100 +# endif +#endif + +#ifndef ERTS_MMAP_OP_RINGBUF_SZ +# define ERTS_MMAP_OP_RINGBUF_SZ 0 +#endif + +/* #define ERTS_MMAP_DEBUG_FILL_AREAS */ + +#ifdef ERTS_MMAP_DEBUG +# define ERTS_MMAP_ASSERT ERTS_ASSERT +#else +# define ERTS_MMAP_ASSERT(A) ((void) 1) +#endif + +/* + * `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after + * initialization, but the other pointers are not; i.e., only + * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held. + */ +#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \ + (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ + < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot)) +#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \ + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ + (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ + < ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot))) +#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \ + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ + (((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \ + < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot))) + +int erts_have_erts_mmap; +UWord erts_page_inv_mask; + +#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG) +# undef RBT_DEBUG +# define RBT_DEBUG +#endif +#ifdef RBT_DEBUG +# define RBT_ASSERT ERTS_ASSERT +# define IF_RBT_DEBUG(C) C +#else +# define RBT_ASSERT(x) +# define IF_RBT_DEBUG(C) +#endif + +typedef struct RBTNode_ RBTNode; +struct RBTNode_ { + UWord parent_and_color; /* color in bit 0 of parent ptr */ + RBTNode *left; + RBTNode *right; +}; + +#define RED_FLG (1) +#define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG)) +#define IS_BLACK(N) (!IS_RED(N)) +#define SET_RED(N) ((N)->parent_and_color |= RED_FLG) +#define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG) + +static ERTS_INLINE RBTNode* parent(RBTNode* node) +{ + return (RBTNode*) (node->parent_and_color & ~RED_FLG); +} + +static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent) +{ + RBT_ASSERT(!((UWord)parent & RED_FLG)); + node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG); +} + +static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color) +{ + RBT_ASSERT(!((UWord)parent & RED_FLG)); + RBT_ASSERT(!(color & ~RED_FLG)); + return ((UWord)parent) | color; +} + + +enum SortOrder { + ADDR_ORDER, /* only address order */ + SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */ + SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */ +}; +#ifdef HARD_DEBUG +static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"}; +#endif + +typedef struct { + RBTNode* root; + enum SortOrder order; +}RBTree; + +#ifdef HARD_DEBUG +# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE) +# define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ) +static int rbt_assert_is_member(RBTNode* root, RBTNode* node); +static RBTNode* check_tree(RBTree* tree, Uint); +#else +# define HARD_CHECK_IS_MEMBER(ROOT,NODE) +# define HARD_CHECK_TREE(TREE,SZ) +#endif + +#if ERTS_MMAP_OP_RINGBUF_SZ + +static int mmap_op_ix; + +typedef enum { + ERTS_OP_TYPE_NONE, + ERTS_OP_TYPE_MMAP, + ERTS_OP_TYPE_MUNMAP, + ERTS_OP_TYPE_MREMAP +} ErtsMMapOpType; + +typedef struct { + ErtsMMapOpType type; + void *result; + UWord in_size; + UWord out_size; + void *old_ptr; + UWord old_size; +} ErtsMMapOp; + +static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; + +#define ERTS_MMAP_OP_RINGBUF_INIT() \ + do { \ + int ix__; \ + for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\ + mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + } \ + mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \ + } while (0) + +#define ERTS_MMAP_OP_START(SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = (SZ); \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + } while (0) + +#define ERTS_MMAP_OP_END(PTR, SZ) \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].result = (PTR); \ + mmap_ops[ix__].out_size = (SZ); \ + } while (0) + +#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MMAP_OP_START((IN_SZ)); \ + ERTS_MMAP_OP_END((RES), (OUT_SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MUNMAP_OP(PTR, SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = (PTR); \ + mmap_ops[ix__].old_size = (SZ); \ + } while (0) + +#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MUNMAP_OP((PTR), (SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = (IN_SZ); \ + mmap_ops[ix__].out_size = (OLD_SZ); \ + mmap_ops[ix__].old_ptr = (OLD_PTR); \ + mmap_ops[ix__].old_size = (OLD_SZ); \ + } while (0) + +#define ERTS_MREMAP_OP_END(PTR, SZ) \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].result = (PTR); \ + mmap_ops[mmap_op_ix].out_size = (SZ); \ + } while (0) + +#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \ + ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MMAP_OP_ABORT() \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + if (--mmap_op_ix < 0) \ + mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \ + } while (0) + +#else + +#define ERTS_MMAP_OP_RINGBUF_INIT() +#define ERTS_MMAP_OP_START(SZ) +#define ERTS_MMAP_OP_END(PTR, SZ) +#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) +#define ERTS_MUNMAP_OP(PTR, SZ) +#define ERTS_MUNMAP_OP_LCK(PTR, SZ) +#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) +#define ERTS_MREMAP_OP_END(PTR, SZ) +#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) +#define ERTS_MMAP_OP_ABORT() + +#endif + +typedef struct { + RBTNode snode; /* node in 'stree' */ + RBTNode anode; /* node in 'atree' */ + char* start; + char* end; +}ErtsFreeSegDesc; + +typedef struct { + RBTree stree; /* size ordered tree */ + RBTree atree; /* address ordered tree */ + Uint nseg; +}ErtsFreeSegMap; + +static struct { + int (*reserve_physical)(char *, UWord); + void (*unreserve_physical)(char *, UWord); + int supercarrier; + int no_os_mmap; + /* + * Super unaligend area is located above super aligned + * area. That is, `sa.bot` is beginning of the super + * carrier, `sua.top` is the end of the super carrier, + * and sa.top and sua.bot moves towards eachother. + */ + struct { + char *top; + char *bot; + ErtsFreeSegMap map; + } sua; + struct { + char *top; + char *bot; + ErtsFreeSegMap map; + } sa; +#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS)) + int mmap_fd; +#endif + erts_smp_mtx_t mtx; + struct { + char *free_list; + char *unused_start; + char *unused_end; + char *new_area_hint; + Uint reserved; + } desc; + struct { + UWord free_seg_descs; + struct { + UWord curr; + UWord max; + } free_segs; + } no; + struct { + struct { + UWord total; + struct { + UWord total; + UWord sa; + UWord sua; + } used; + } supercarrier; + struct { + UWord used; + } os; + } size; +} mmap_state; + +#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \ + do { \ + mmap_state.size.supercarrier.used.total += (SZ); \ + mmap_state.size.supercarrier.used.sa += (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ + <= mmap_state.size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \ + <= mmap_state.size.supercarrier.used.total); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ + mmap_state.size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \ + mmap_state.size.supercarrier.used.sa -= (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \ + do { \ + mmap_state.size.supercarrier.used.total += (SZ); \ + mmap_state.size.supercarrier.used.sua += (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ + <= mmap_state.size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \ + <= mmap_state.size.supercarrier.used.total); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ + mmap_state.size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \ + mmap_state.size.supercarrier.used.sua -= (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_OS_INC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \ + mmap_state.size.os.used += (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_OS_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \ + mmap_state.size.os.used -= (SZ); \ + } while (0) + + +static void +add_free_desc_area(char *start, char *end) +{ + ERTS_MMAP_ASSERT(end == (void *) 0 || end > start); + if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) { + UWord no; + ErtsFreeSegDesc *prev_desc, *desc; + char *desc_end; + + no = 1; + prev_desc = (ErtsFreeSegDesc *) start; + prev_desc->start = mmap_state.desc.free_list; + desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc)); + desc_end = start + 2*sizeof(ErtsFreeSegDesc); + + while (desc_end <= end) { + desc->start = (char *) prev_desc; + prev_desc = desc; + desc = (ErtsFreeSegDesc *) desc_end; + desc_end += sizeof(ErtsFreeSegDesc); + no++; + } + mmap_state.desc.free_list = (char *) prev_desc; + mmap_state.no.free_seg_descs += no; + } +} + +static ErtsFreeSegDesc * +add_unused_free_desc_area(void) +{ + char *ptr; + if (!mmap_state.desc.unused_start) + return NULL; + + ERTS_MMAP_ASSERT(mmap_state.desc.unused_end); + ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE + <= mmap_state.desc.unused_end - mmap_state.desc.unused_start); + + ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE; + add_free_desc_area(mmap_state.desc.unused_start, ptr); + + if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE) + mmap_state.desc.unused_start = ptr; + else + mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL; + + ERTS_MMAP_ASSERT(mmap_state.desc.free_list); + return (ErtsFreeSegDesc *) mmap_state.desc.free_list; +} + +static ERTS_INLINE ErtsFreeSegDesc * +alloc_desc(void) +{ + ErtsFreeSegDesc *res; + res = (ErtsFreeSegDesc *) mmap_state.desc.free_list; + if (!res) { + res = add_unused_free_desc_area(); + if (!res) + return NULL; + } + mmap_state.desc.free_list = res->start; + ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs); + mmap_state.no.free_segs.curr++; + if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr) + mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr; + return res; +} + +static ERTS_INLINE void +free_desc(ErtsFreeSegDesc *desc) +{ + desc->start = mmap_state.desc.free_list; + mmap_state.desc.free_list = (char *) desc; + ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0); + mmap_state.no.free_segs.curr--; +} + +static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode) +{ + return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode)); +} + +static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode) +{ + return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode)); +} + +static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node) +{ + return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node); +} + +static ERTS_INLINE SWord usable_size(enum SortOrder order, + ErtsFreeSegDesc* desc) +{ + return ((order == SA_SZ_ADDR_ORDER) ? + ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start) + : desc->end - desc->start); +} + +#ifdef HARD_DEBUG +static ERTS_INLINE SWord cmp_nodes(enum SortOrder order, + RBTNode* lhs, RBTNode* rhs) +{ + ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs); + ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs); + RBT_ASSERT(lhs != rhs); + if (order != ADDR_ORDER) { + SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc); + if (diff) return diff; + } + if (order != SZ_REVERSE_ADDR_ORDER) { + return (char*)ldesc->start - (char*)rdesc->start; + } + else { + return (char*)rdesc->start - (char*)ldesc->start; + } +} +#endif /* HARD_DEBUG */ + +static ERTS_INLINE SWord cmp_with_node(enum SortOrder order, + SWord sz, char* addr, RBTNode* rhs) +{ + ErtsFreeSegDesc* rdesc; + if (order != ADDR_ORDER) { + SWord diff; + rdesc = snode_to_desc(rhs); + diff = sz - usable_size(order, rdesc); + if (diff) return diff; + } + else + rdesc = anode_to_desc(rhs); + + if (order != SZ_REVERSE_ADDR_ORDER) + return addr - (char*)rdesc->start; + else + return (char*)rdesc->start - addr; +} + + +static ERTS_INLINE void +left_rotate(RBTNode **root, RBTNode *x) +{ + RBTNode *y = x->right; + x->right = y->left; + if (y->left) + set_parent(y->left, x); + set_parent(y, parent(x)); + if (!parent(y)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->left) + parent(x)->left = y; + else { + RBT_ASSERT(x == parent(x)->right); + parent(x)->right = y; + } + y->left = x; + set_parent(x, y); +} + +static ERTS_INLINE void +right_rotate(RBTNode **root, RBTNode *x) +{ + RBTNode *y = x->left; + x->left = y->right; + if (y->right) + set_parent(y->right, x); + set_parent(y, parent(x)); + if (!parent(y)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->right) + parent(x)->right = y; + else { + RBT_ASSERT(x == parent(x)->left); + parent(x)->left = y; + } + y->right = x; + set_parent(x, y); +} + +/* + * Replace node x with node y + * NOTE: segment descriptor of y is not changed + */ +static ERTS_INLINE void +replace(RBTNode **root, RBTNode *x, RBTNode *y) +{ + + if (!parent(x)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->left) + parent(x)->left = y; + else { + RBT_ASSERT(x == parent(x)->right); + parent(x)->right = y; + } + if (x->left) { + RBT_ASSERT(parent(x->left) == x); + set_parent(x->left, y); + } + if (x->right) { + RBT_ASSERT(parent(x->right) == x); + set_parent(x->right, y); + } + + y->parent_and_color = x->parent_and_color; + y->right = x->right; + y->left = x->left; +} + +static void +tree_insert_fixup(RBTNode** root, RBTNode *node) +{ + RBTNode *x = node, *y, *papa_x, *granpa_x; + + /* + * Rearrange the tree so that it satisfies the Red-Black Tree properties + */ + + papa_x = parent(x); + RBT_ASSERT(x != *root && IS_RED(papa_x)); + do { + + /* + * x and its parent are both red. Move the red pair up the tree + * until we get to the root or until we can separate them. + */ + + granpa_x = parent(papa_x); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(granpa_x); + + if (papa_x == granpa_x->left) { + y = granpa_x->right; + if (IS_RED(y)) { + SET_BLACK(y); + SET_BLACK(papa_x); + SET_RED(granpa_x); + x = granpa_x; + } + else { + + if (x == papa_x->right) { + left_rotate(root, papa_x); + papa_x = x; + x = papa_x->left; + } + + RBT_ASSERT(x == granpa_x->left->left); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(papa_x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(IS_BLACK(y)); + + SET_BLACK(papa_x); + SET_RED(granpa_x); + right_rotate(root, granpa_x); + + RBT_ASSERT(x == parent(x)->left); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(parent(x)->right)); + RBT_ASSERT(IS_BLACK(parent(x))); + break; + } + } + else { + RBT_ASSERT(papa_x == granpa_x->right); + y = granpa_x->left; + if (IS_RED(y)) { + SET_BLACK(y); + SET_BLACK(papa_x); + SET_RED(granpa_x); + x = granpa_x; + } + else { + + if (x == papa_x->left) { + right_rotate(root, papa_x); + papa_x = x; + x = papa_x->right; + } + + RBT_ASSERT(x == granpa_x->right->right); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(papa_x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(IS_BLACK(y)); + + SET_BLACK(papa_x); + SET_RED(granpa_x); + left_rotate(root, granpa_x); + + RBT_ASSERT(x == parent(x)->right); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(parent(x)->left)); + RBT_ASSERT(IS_BLACK(parent(x))); + break; + } + } + } while (x != *root && (papa_x=parent(x), IS_RED(papa_x))); + + SET_BLACK(*root); +} + +static void +rbt_delete(RBTree* tree, RBTNode* del) +{ + Uint spliced_is_black; + RBTNode *x, *y, *z = del, *papa_y; + RBTNode null_x; /* null_x is used to get the fixup started when we + splice out a node without children. */ + + HARD_CHECK_IS_MEMBER(tree->root, del); + HARD_CHECK_TREE(tree, 0); + + null_x.parent_and_color = parent_and_color(NULL, !RED_FLG); + + /* Remove node from tree... */ + + /* Find node to splice out */ + if (!z->left || !z->right) + y = z; + else + /* Set y to z:s successor */ + for(y = z->right; y->left; y = y->left); + /* splice out y */ + x = y->left ? y->left : y->right; + spliced_is_black = IS_BLACK(y); + papa_y = parent(y); + if (x) { + set_parent(x, papa_y); + } + else if (spliced_is_black) { + x = &null_x; + x->right = x->left = NULL; + x->parent_and_color = parent_and_color(papa_y, !RED_FLG); + y->left = x; + } + + if (!papa_y) { + RBT_ASSERT(tree->root == y); + tree->root = x; + } + else { + if (y == papa_y->left) { + papa_y->left = x; + } + else { + RBT_ASSERT(y == papa_y->right); + papa_y->right = x; + } + } + if (y != z) { + /* We spliced out the successor of z; replace z by the successor */ + RBT_ASSERT(z != &null_x); + replace(&tree->root, z, y); + } + + if (spliced_is_black) { + RBTNode* papa_x; + /* We removed a black node which makes the resulting tree + violate the Red-Black Tree properties. Fixup tree... */ + + papa_x = parent(x); + while (IS_BLACK(x) && papa_x) { + + /* + * x has an "extra black" which we move up the tree + * until we reach the root or until we can get rid of it. + * + * y is the sibbling of x + */ + + if (x == papa_x->left) { + y = papa_x->right; + RBT_ASSERT(y); + if (IS_RED(y)) { + RBT_ASSERT(y->right); + RBT_ASSERT(y->left); + SET_BLACK(y); + RBT_ASSERT(IS_BLACK(papa_x)); + SET_RED(papa_x); + left_rotate(&tree->root, papa_x); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->right; + } + RBT_ASSERT(y); + RBT_ASSERT(IS_BLACK(y)); + if (IS_BLACK(y->left) && IS_BLACK(y->right)) { + SET_RED(y); + } + else { + if (IS_BLACK(y->right)) { + SET_BLACK(y->left); + SET_RED(y); + right_rotate(&tree->root, y); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->right; + } + RBT_ASSERT(y); + if (IS_RED(papa_x)) { + + SET_BLACK(papa_x); + SET_RED(y); + } + RBT_ASSERT(y->right); + SET_BLACK(y->right); + left_rotate(&tree->root, papa_x); + x = tree->root; + break; + } + } + else { + RBT_ASSERT(x == papa_x->right); + y = papa_x->left; + RBT_ASSERT(y); + if (IS_RED(y)) { + RBT_ASSERT(y->right); + RBT_ASSERT(y->left); + SET_BLACK(y); + RBT_ASSERT(IS_BLACK(papa_x)); + SET_RED(papa_x); + right_rotate(&tree->root, papa_x); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->left; + } + RBT_ASSERT(y); + RBT_ASSERT(IS_BLACK(y)); + if (IS_BLACK(y->right) && IS_BLACK(y->left)) { + SET_RED(y); + } + else { + if (IS_BLACK(y->left)) { + SET_BLACK(y->right); + SET_RED(y); + left_rotate(&tree->root, y); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->left; + } + RBT_ASSERT(y); + if (IS_RED(papa_x)) { + SET_BLACK(papa_x); + SET_RED(y); + } + RBT_ASSERT(y->left); + SET_BLACK(y->left); + right_rotate(&tree->root, papa_x); + x = tree->root; + break; + } + } + x = papa_x; + papa_x = parent(x); + } + SET_BLACK(x); + + papa_x = parent(&null_x); + if (papa_x) { + if (papa_x->left == &null_x) + papa_x->left = NULL; + else { + RBT_ASSERT(papa_x->right == &null_x); + papa_x->right = NULL; + } + RBT_ASSERT(!null_x.left); + RBT_ASSERT(!null_x.right); + } + else if (tree->root == &null_x) { + tree->root = NULL; + RBT_ASSERT(!null_x.left); + RBT_ASSERT(!null_x.right); + } + } + HARD_CHECK_TREE(tree, 0); +} + + +static void +rbt_insert(RBTree* tree, RBTNode* node) +{ +#ifdef RBT_DEBUG + ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL; +#endif + ErtsFreeSegDesc* desc = node_to_desc(tree->order, node); + char* seg_addr = desc->start; + SWord seg_sz = desc->end - desc->start; + + HARD_CHECK_TREE(tree, 0); + + node->left = NULL; + node->right = NULL; + + if (!tree->root) { + node->parent_and_color = parent_and_color(NULL, !RED_FLG); + tree->root = node; + } + else { + RBTNode *x = tree->root; + while (1) { + SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x); + if (diff < 0) { + IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x)); + if (!x->left) { + node->parent_and_color = parent_and_color(x, RED_FLG); + x->left = node; + break; + } + x = x->left; + } + else { + RBT_ASSERT(diff > 0); + IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x)); + if (!x->right) { + node->parent_and_color = parent_and_color(x, RED_FLG); + x->right = node; + break; + } + x = x->right; + } + } + + RBT_ASSERT(parent(node)); +#ifdef RBT_DEBUG + if (tree->order == ADDR_ORDER) { + RBT_ASSERT(!dbg_under || dbg_under->end < desc->start); + RBT_ASSERT(!dbg_over || dbg_over->start > desc->end); + } +#endif + RBT_ASSERT(IS_RED(node)); + if (IS_RED(parent(node))) + tree_insert_fixup(&tree->root, node); + } + HARD_CHECK_TREE(tree, 0); +} + +/* + * Traverse tree in (reverse) sorting order + */ +static void +rbt_foreach_node(RBTree* tree, + void (*fn)(RBTNode*,void*), + void* arg, int reverse) +{ +#ifdef HARD_DEBUG + Sint blacks = -1; + Sint curr_blacks = 1; + Uint depth = 1; + Uint max_depth = 0; + Uint node_cnt = 0; +#endif + enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state; + RBTNode *x = tree->root; + + RBT_ASSERT(!x || !parent(x)); + + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + while (x) { + switch (state) { + case RECURSE_LEFT: + if (x->left) { + RBT_ASSERT(parent(x->left) == x); + #ifdef HARD_DEBUG + ++depth; + if (IS_BLACK(x->left)) + curr_blacks++; + #endif + x = x->left; + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + } + else { + #ifdef HARD_DEBUG + if (blacks < 0) + blacks = curr_blacks; + RBT_ASSERT(blacks == curr_blacks); + #endif + state = reverse ? RETURN_TO_PARENT : DO_NODE; + } + break; + + case DO_NODE: + #ifdef HARD_DEBUG + ++node_cnt; + if (depth > max_depth) + max_depth = depth; + #endif + (*fn) (x, arg); /* Do it! */ + state = reverse ? RECURSE_LEFT : RECURSE_RIGHT; + break; + + case RECURSE_RIGHT: + if (x->right) { + RBT_ASSERT(parent(x->right) == x); + #ifdef HARD_DEBUG + ++depth; + if (IS_BLACK(x->right)) + curr_blacks++; + #endif + x = x->right; + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + } + else { + #ifdef HARD_DEBUG + if (blacks < 0) + blacks = curr_blacks; + RBT_ASSERT(blacks == curr_blacks); + #endif + state = reverse ? DO_NODE : RETURN_TO_PARENT; + } + break; + + case RETURN_TO_PARENT: + #ifdef HARD_DEBUG + if (IS_BLACK(x)) + curr_blacks--; + --depth; + #endif + if (parent(x)) { + if (x == parent(x)->left) { + state = reverse ? RETURN_TO_PARENT : DO_NODE; + } + else { + RBT_ASSERT(x == parent(x)->right); + state = reverse ? DO_NODE : RETURN_TO_PARENT; + } + } + x = parent(x); + break; + } + } +#ifdef HARD_DEBUG + RBT_ASSERT(depth == 0 || (!tree->root && depth==1)); + RBT_ASSERT(curr_blacks == 0); + RBT_ASSERT((1 << (max_depth/2)) <= node_cnt); +#endif +} + +#if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG) +static RBTNode* rbt_prev_node(RBTNode* node) +{ + RBTNode* x; + if (node->left) { + for (x=node->left; x->right; x=x->right) + ; + return x; + } + for (x=node; parent(x); x=parent(x)) { + if (parent(x)->right == x) + return parent(x); + } + return NULL; +} +static RBTNode* rbt_next_node(RBTNode* node) +{ + RBTNode* x; + if (node->right) { + for (x=node->right; x->left; x=x->left) + ; + return x; + } + for (x=node; parent(x); x=parent(x)) { + if (parent(x)->left == x) + return parent(x); + } + return NULL; +} +#endif /* RBT_DEBUG || HARD_DEBUG_MSEG */ + + +/* The API to keep track of a bunch of separated (free) segments + (non-overlapping and non-adjacent). + */ +static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder); +static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end, + ErtsFreeSegDesc** under, ErtsFreeSegDesc** over); +static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end); +static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end); +static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*); +static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz); + + +static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order) +{ + map->atree.root = NULL; + map->atree.order = ADDR_ORDER; + map->stree.root = NULL; + map->stree.order = order; + map->nseg = 0; +} + +/* Lookup directly adjacent free segments to the given area [start->end]. + * The given area must not contain any free segments. + */ +static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end, + ErtsFreeSegDesc** under, ErtsFreeSegDesc** over) +{ + RBTNode* x = map->atree.root; + + *under = NULL; + *over = NULL; + while (x) { + if (start < anode_to_desc(x)->start) { + RBT_ASSERT(end <= anode_to_desc(x)->start); + if (end == anode_to_desc(x)->start) { + RBT_ASSERT(!*over); + *over = anode_to_desc(x); + } + x = x->left; + } + else { + RBT_ASSERT(start >= anode_to_desc(x)->end); + if (start == anode_to_desc(x)->end) { + RBT_ASSERT(!*under); + *under = anode_to_desc(x); + } + x = x->right; + } + } +} + +/* Initialize 'desc' and insert as new free segment [start->end]. + * The new segment must not contain or be adjacent to any free segment in 'map'. + */ +static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc, + char* start, char* end) +{ + desc->start = start; + desc->end = end; + rbt_insert(&map->atree, &desc->anode); + rbt_insert(&map->stree, &desc->snode); + map->nseg++; +} + +/* Resize existing free segment 'desc' to [start->end]. + * The new segment location must overlap the old location and + * it must not contain or be adjacent to any other free segment in 'map'. + */ +static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc, + char* start, char* end) +{ +#ifdef RBT_DEBUG + RBTNode* prev = rbt_prev_node(&desc->anode); + RBTNode* next = rbt_next_node(&desc->anode); + RBT_ASSERT(!prev || anode_to_desc(prev)->end < start); + RBT_ASSERT(!next || anode_to_desc(next)->start > end); +#endif + rbt_delete(&map->stree, &desc->snode); + desc->start = start; + desc->end = end; + rbt_insert(&map->stree, &desc->snode); +} + +/* Delete existing free segment 'desc' from 'map'. + */ +static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc) +{ + rbt_delete(&map->atree, &desc->anode); + rbt_delete(&map->stree, &desc->snode); + map->nseg--; +} + +/* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes. + */ +static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz) +{ + RBTNode* x = map->stree.root; + ErtsFreeSegDesc* best_desc = NULL; + const enum SortOrder order = map->stree.order; + + while (x) { + ErtsFreeSegDesc* desc = snode_to_desc(x); + SWord seg_sz = usable_size(order, desc); + + if (seg_sz < need_sz) { + x = x->right; + } + else { + best_desc = desc; + x = x->left; + } + } + return best_desc; +} + +struct build_arg_t +{ + Process* p; + Eterm* hp; + Eterm acc; +}; + +static void build_free_seg_tuple(RBTNode* node, void* arg) +{ + struct build_arg_t* a = (struct build_arg_t*)arg; + ErtsFreeSegDesc* desc = anode_to_desc(node); + Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start); + Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end); + Eterm tpl = TUPLE2(a->hp, start, end); + + a->hp += 3; + a->acc = CONS(a->hp, tpl, a->acc); + a->hp += 2; +} + +static +Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) +{ + struct build_arg_t barg; + Eterm* hp_end; + const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */ + + barg.p = p; + barg.hp = HAlloc(p, may_need); + hp_end = barg.hp + may_need; + barg.acc = NIL; + rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1); + + RBT_ASSERT(barg.hp <= hp_end); + HRelease(p, hp_end, barg.hp); + return barg.acc; +} + +#if ERTS_HAVE_OS_MMAP +/* Implementation of os_mmap()/os_munmap()/os_mremap()... */ + +#if HAVE_MMAP +# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE) +# if defined(MAP_ANONYMOUS) +# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) +# define ERTS_MMAP_FD (-1) +# elif defined(MAP_ANON) +# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) +# define ERTS_MMAP_FD (-1) +# else +# define ERTS_MMAP_FLAGS (MAP_PRIVATE) +# define ERTS_MMAP_FD mmap_state.mmap_fd +# endif +#endif + +static ERTS_INLINE void * +os_mmap(void *hint_ptr, UWord size, int try_superalign) +{ +#if HAVE_MMAP + void *res; +#ifdef MAP_ALIGN + if (try_superalign) + res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); + else +#endif + res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); + if (res == MAP_FAILED) + return NULL; + return res; +#elif HAVE_VIRTUALALLOC + return (void *) VirtualAlloc(NULL, (SIZE_T) size, + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); +#else +# error "missing mmap() or similar" +#endif +} + +static ERTS_INLINE void +os_munmap(void *ptr, UWord size) +{ +#if HAVE_MMAP +#ifdef ERTS_MMAP_DEBUG + int res = +#endif + munmap(ptr, size); + ERTS_MMAP_ASSERT(res == 0); +#elif HAVE_VIRTUALALLOC +#ifdef DEBUG + BOOL res = +#endif + VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE); + ERTS_MMAP_ASSERT(res != 0); +#else +# error "missing munmap() or similar" +#endif +} + +#ifdef ERTS_HAVE_OS_MREMAP +# if HAVE_MREMAP +# if defined(__NetBSD__) +# define ERTS_MREMAP_FLAGS (0) +# else +# define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE) +# endif +# endif +static ERTS_INLINE void * +os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign) +{ + void *new_seg; +#if HAVE_MREMAP + new_seg = mremap(ptr, (size_t) old_size, +# if defined(__NetBSD__) + NULL, +# endif + (size_t) new_size, ERTS_MREMAP_FLAGS); + if (new_seg == (void *) MAP_FAILED) + return NULL; + return new_seg; +#else +# error "missing mremap() or similar" +#endif +} +#endif + +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION +#if HAVE_MMAP + +#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT) +#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED) +#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE) +#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED) +#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE) +#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE) + +static int +os_reserve_physical(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT, + ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + return 0; + return 1; +} + +static void +os_unreserve_physical(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT, + ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + erl_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory"); +} + +static void * +os_mmap_virtual(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT, + ERTS_MMAP_VIRTUAL_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + return NULL; + return res; +} + +#else +#error "Missing reserve/unreserve physical memory implementation" +#endif +#endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */ + +#endif /* ERTS_HAVE_OS_MMAP */ + +static int reserve_noop(char *ptr, UWord size) +{ +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + Uint32 *uip, *end = (Uint32 *) (ptr + size); + + for (uip = (Uint32 *) ptr; uip < end; uip++) + ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef); + for (uip = (Uint32 *) ptr; uip < end; uip++) + *uip = (Uint32) 0xfeedfeed; +#endif + return 1; +} + +static void unreserve_noop(char *ptr, UWord size) +{ +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + Uint32 *uip, *end = (Uint32 *) (ptr + size); + + for (uip = (Uint32 *) ptr; uip < end; uip++) + *uip = (Uint32) 0xdeadbeef; +#endif +} + +static UWord +alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) +{ + char *ptr; + ErtsFreeSegMap *da_map; + ErtsFreeSegDesc *desc = alloc_desc(); + if (desc) { + insert_free_seg(map, desc, start, end); + return 0; + } + + /* + * Ahh; ran out of free segment descriptors. + * + * First try to map a new page... + */ + +#if ERTS_HAVE_OS_MMAP + if (!mmap_state.no_os_mmap) { + ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); + if (ptr) { + mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; + ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); + add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE); + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, start, end); + return 0; + } + } +#endif + + /* + * ...then try to find a good place in the supercarrier... + */ + da_map = &mmap_state.sua.map; + desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); + if (desc) { + if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE); + else + desc = NULL; + + } + else { + da_map = &mmap_state.sa.map; + desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); + if (desc) { + if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE); + else + desc = NULL; + } + } + if (desc) { + char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE; + add_free_desc_area(desc->start, da_end); + if (da_end != desc->end) + resize_free_seg(da_map, desc, da_end, desc->end); + else { + delete_free_seg(da_map, desc); + free_desc(desc); + } + + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, start, end); + return 0; + } + + /* + * ... and then as last resort use the first page of the + * free segment we are trying to insert for free descriptors. + */ + ptr = start + ERTS_PAGEALIGNED_SIZE; + ERTS_MMAP_ASSERT(ptr <= end); + + add_free_desc_area(start, ptr); + + if (ptr != end) { + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, ptr, end); + } + + return ERTS_PAGEALIGNED_SIZE; +} + +void * +erts_mmap(Uint32 flags, UWord *sizep) +{ + char *seg; + UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep); + + /* Map in premapped supercarrier */ + if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) { + char *end; + ErtsFreeSegDesc *desc; + Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + erts_smp_mtx_lock(&mmap_state.mtx); + + ERTS_MMAP_OP_START(*sizep); + + if (!superaligned) { + desc = lookup_free_seg(&mmap_state.sua.map, asize); + if (desc) { + seg = desc->start; + end = seg+asize; + if (!mmap_state.reserve_physical(seg, asize)) + goto supercarrier_reserve_failure; + if (desc->end == end) { + delete_free_seg(&mmap_state.sua.map, desc); + free_desc(desc); + } + else { + ERTS_MMAP_ASSERT(end < desc->end); + resize_free_seg(&mmap_state.sua.map, desc, end, desc->end); + } + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + goto supercarrier_success; + } + + if (asize <= mmap_state.sua.bot - mmap_state.sa.top) { + if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize, + asize)) + goto supercarrier_reserve_failure; + mmap_state.sua.bot -= asize; + seg = mmap_state.sua.bot; + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + goto supercarrier_success; + } + } + + asize = ERTS_SUPERALIGNED_CEILING(asize); + + desc = lookup_free_seg(&mmap_state.sa.map, asize); + if (desc) { + char *start = seg = desc->start; + seg = (char *) ERTS_SUPERALIGNED_CEILING(seg); + end = seg+asize; + if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + goto supercarrier_reserve_failure; + ERTS_MMAP_SIZE_SC_SA_INC(asize); + if (desc->end == end) { + if (start != seg) + resize_free_seg(&mmap_state.sa.map, desc, start, seg); + else { + delete_free_seg(&mmap_state.sa.map, desc); + free_desc(desc); + } + } + else { + ERTS_MMAP_ASSERT(end < desc->end); + resize_free_seg(&mmap_state.sa.map, desc, end, desc->end); + if (start != seg) { + UWord ad_sz; + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + start, seg); + start += ad_sz; + if (start != seg) + mmap_state.unreserve_physical(start, (UWord) (seg - start)); + } + } + goto supercarrier_success; + } + + if (superaligned) { + char *start = mmap_state.sa.top; + seg = (char *) ERTS_SUPERALIGNED_CEILING(start); + + if (asize + (seg - start) <= mmap_state.sua.bot - start) { + end = seg + asize; + if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + goto supercarrier_reserve_failure; + mmap_state.sa.top = end; + ERTS_MMAP_SIZE_SC_SA_INC(asize); + if (start != seg) { + UWord ad_sz; + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + start, seg); + start += ad_sz; + if (start != seg) + mmap_state.unreserve_physical(start, (UWord) (seg - start)); + } + goto supercarrier_success; + } + + desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE); + if (desc) { + char *org_start = desc->start; + char *org_end = desc->end; + + seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start); + end = seg + asize; + if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg))) + goto supercarrier_reserve_failure; + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + if (org_start != seg) { + ERTS_MMAP_ASSERT(org_start < seg); + resize_free_seg(&mmap_state.sua.map, desc, org_start, seg); + desc = NULL; + } + if (end != org_end) { + UWord ad_sz = 0; + ERTS_MMAP_ASSERT(end < org_end); + if (desc) + resize_free_seg(&mmap_state.sua.map, desc, end, org_end); + else + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + end, org_end); + end += ad_sz; + if (end != org_end) + mmap_state.unreserve_physical(end, + (UWord) (org_end - end)); + } + goto supercarrier_success; + } + } + + ERTS_MMAP_OP_ABORT(); + erts_smp_mtx_unlock(&mmap_state.mtx); + } + +#if ERTS_HAVE_OS_MMAP + /* Map using OS primitives */ + if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) { + if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { + seg = os_mmap(NULL, asize, 0); + if (!seg) + goto failure; + } + else { + asize = ERTS_SUPERALIGNED_CEILING(*sizep); + seg = os_mmap(NULL, asize, 1); + if (!seg) + goto failure; + + if (!ERTS_IS_SUPERALIGNED(seg)) { + char *ptr; + UWord sz; + + os_munmap(seg, asize); + + ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1); + if (!ptr) + goto failure; + + seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr); + sz = (UWord) (seg - ptr); + ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE); + if (sz) + os_munmap(ptr, sz); + sz = ERTS_SUPERALIGNED_SIZE - sz; + if (sz) + os_munmap(seg+asize, sz); + } + } + + ERTS_MMAP_OP_LCK(seg, *sizep, asize); + ERTS_MMAP_SIZE_OS_INC(asize); + *sizep = asize; + return (void *) seg; + } +failure: +#endif + ERTS_MMAP_OP_LCK(NULL, *sizep, 0); + *sizep = 0; + return NULL; + +supercarrier_success: + +#ifdef ERTS_MMAP_DEBUG + if (ERTS_MMAPFLG_SUPERALIGNED & flags) { + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg)); + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize)); + } + else { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + } +#endif + + ERTS_MMAP_OP_END(seg, asize); + erts_smp_mtx_unlock(&mmap_state.mtx); + + *sizep = asize; + return (void *) seg; + +supercarrier_reserve_failure: + erts_smp_mtx_unlock(&mmap_state.mtx); + *sizep = 0; + return NULL; +} + +void +erts_munmap(Uint32 flags, void *ptr, UWord size) +{ + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size)); + + if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { + ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); +#if ERTS_HAVE_OS_MMAP + ERTS_MUNMAP_OP_LCK(ptr, size); + ERTS_MMAP_SIZE_OS_DEC(size); + os_munmap(ptr, size); +#endif + } + else { + char *start, *end; + ErtsFreeSegMap *map; + ErtsFreeSegDesc *prev, *next, *desc; + UWord ad_sz = 0; + + ERTS_MMAP_ASSERT(mmap_state.supercarrier); + + start = (char *) ptr; + end = start + size; + + erts_smp_mtx_lock(&mmap_state.mtx); + + ERTS_MUNMAP_OP(ptr, size); + + if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + + map = &mmap_state.sa.map; + adjacent_free_seg(map, start, end, &prev, &next); + + ERTS_MMAP_SIZE_SC_SA_DEC(size); + if (end == mmap_state.sa.top) { + ERTS_MMAP_ASSERT(!next); + if (prev) { + start = prev->start; + delete_free_seg(map, prev); + free_desc(prev); + } + mmap_state.sa.top = start; + goto supercarrier_success; + } + } + else { + map = &mmap_state.sua.map; + adjacent_free_seg(map, start, end, &prev, &next); + + ERTS_MMAP_SIZE_SC_SUA_DEC(size); + if (start == mmap_state.sua.bot) { + ERTS_MMAP_ASSERT(!prev); + if (next) { + end = next->end; + delete_free_seg(map, next); + free_desc(next); + } + mmap_state.sua.bot = end; + goto supercarrier_success; + } + } + + desc = NULL; + + if (next) { + ERTS_MMAP_ASSERT(end < next->end); + end = next->end; + if (prev) { + delete_free_seg(map, next); + free_desc(next); + goto save_prev; + } + desc = next; + } else if (prev) { + save_prev: + ERTS_MMAP_ASSERT(prev->start < start); + start = prev->start; + desc = prev; + } + + if (desc) + resize_free_seg(map, desc, start, end); + else + ad_sz = alloc_desc_insert_free_seg(map, start, end); + + supercarrier_success: { + UWord unres_sz; + + ERTS_MMAP_ASSERT(size >= ad_sz); + unres_sz = size - ad_sz; + if (unres_sz) + mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz); + + erts_smp_mtx_unlock(&mmap_state.mtx); + } + } +} + +static void * +remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +{ + UWord size = *sizep; + void *new_ptr = erts_mmap(flags, &size); + if (!new_ptr) + return NULL; + *sizep = size; + if (old_size < size) + size = old_size; + sys_memcpy(new_ptr, ptr, (size_t) size); + erts_munmap(flags, ptr, old_size); + return new_ptr; +} + +void * +erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +{ + void *new_ptr; + Uint32 superaligned; + UWord asize; + + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep)); + + if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { + + ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); + + if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) { + new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, + old_size, sizep); + if (new_ptr) + return new_ptr; + } + + if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) { + ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size); + return NULL; + } + +#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP + superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + if (superaligned) { + asize = ERTS_SUPERALIGNED_CEILING(*sizep); + if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) { + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } + } + else { + asize = ERTS_PAGEALIGNED_CEILING(*sizep); + if (asize == old_size) { + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } + } + +#if ERTS_HAVE_GENUINE_OS_MMAP + if (asize < old_size + && (!superaligned + || ERTS_IS_SUPERALIGNED(ptr))) { + UWord um_sz; + new_ptr = ((char *) ptr) + asize; + ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr); + um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr); + ERTS_MMAP_SIZE_OS_DEC(um_sz); + os_munmap(new_ptr, um_sz); + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } +#endif +#if ERTS_HAVE_OS_MREMAP + if (superaligned) + return remap_move(flags, new_ptr, old_size, sizep); + else { + new_ptr = os_mremap(ptr, old_size, asize, 0); + if (!new_ptr) + return NULL; + if (asize > old_size) + ERTS_MMAP_SIZE_OS_INC(asize - old_size); + else + ERTS_MMAP_SIZE_OS_DEC(old_size - asize); + ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return new_ptr; + } +#endif +#endif + } + else { /* In super carrier */ + char *start, *end, *new_end; + ErtsFreeSegMap *map; + ErtsFreeSegDesc *prev, *next; + UWord ad_sz = 0; + + ERTS_MMAP_ASSERT(mmap_state.supercarrier); + + if (ERTS_MMAPFLG_OS_ONLY & flags) + return remap_move(flags, ptr, old_size, sizep); + + superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + asize = (superaligned + ? ERTS_SUPERALIGNED_CEILING(*sizep) + : ERTS_PAGEALIGNED_CEILING(*sizep)); + + erts_smp_mtx_lock(&mmap_state.mtx); + + if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr) + ? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize)) + : (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) { + erts_smp_mtx_unlock(&mmap_state.mtx); + /* + * Segment currently in wrong area (due to a previous memory + * shortage), move it to the right area. + * (remap_move() will succeed) + */ + return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, + old_size, sizep); + } + + ERTS_MREMAP_OP_START(ptr, old_size, *sizep); + + if (asize == old_size) { + new_ptr = ptr; + goto supercarrier_resize_success; + } + + start = (char *) ptr; + end = start + old_size; + new_end = start+asize; + + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + + if (asize < old_size) { + UWord unres_sz; + new_ptr = ptr; + if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + map = &mmap_state.sua.map; + ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize); + } + else { + if (end == mmap_state.sa.top) { + mmap_state.sa.top = new_end; + mmap_state.unreserve_physical(((char *) ptr) + asize, + old_size - asize); + goto supercarrier_resize_success; + } + ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize); + map = &mmap_state.sa.map; + } + + adjacent_free_seg(map, start, end, &prev, &next); + + if (next) + resize_free_seg(map, next, new_end, next->end); + else + ad_sz = alloc_desc_insert_free_seg(map, new_end, end); + ERTS_MMAP_ASSERT(old_size - asize >= ad_sz); + unres_sz = old_size - asize - ad_sz; + if (unres_sz) + mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz, + unres_sz); + goto supercarrier_resize_success; + } + + if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + + adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next); + + if (next && new_end <= next->end) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + if (new_end < next->end) + resize_free_seg(&mmap_state.sua.map, next, new_end, next->end); + else { + delete_free_seg(&mmap_state.sua.map, next); + free_desc(next); + } + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + else { /* Superaligned area */ + + if (end == mmap_state.sa.top) { + if (new_end <= mmap_state.sua.bot) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + mmap_state.sa.top = new_end; + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + else { + adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next); + if (next && new_end <= next->end) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + if (new_end < next->end) + resize_free_seg(&mmap_state.sa.map, next, new_end, next->end); + else { + delete_free_seg(&mmap_state.sa.map, next); + free_desc(next); + } + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + } + + ERTS_MMAP_OP_ABORT(); + erts_smp_mtx_unlock(&mmap_state.mtx); + + /* Failed to resize... */ + } + + return remap_move(flags, ptr, old_size, sizep); + +supercarrier_resize_success: + +#ifdef ERTS_MMAP_DEBUG + if ((ERTS_MMAPFLG_SUPERALIGNED & flags) + || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) { + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize)); + } + else { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + } +#endif + + ERTS_MREMAP_OP_END(new_ptr, asize); + erts_smp_mtx_unlock(&mmap_state.mtx); + + *sizep = asize; + return new_ptr; + +supercarrier_reserve_failure: + ERTS_MREMAP_OP_END(NULL, old_size); + erts_smp_mtx_unlock(&mmap_state.mtx); + *sizep = old_size; + return NULL; + +} + +int erts_mmap_in_supercarrier(void *ptr) +{ + return ERTS_MMAP_IN_SUPERCARRIER(ptr); +} + + +static struct { + Eterm total; + Eterm total_sa; + Eterm total_sua; + Eterm used; + Eterm used_sa; + Eterm used_sua; + Eterm max; + Eterm allocated; + Eterm reserved; + Eterm sizes; + Eterm free_segs; + Eterm supercarrier; + Eterm os; + Eterm scs; + Eterm sco; + Eterm scrpm; + Eterm scrfsd; + + int is_initialized; + erts_mtx_t init_mutex; +}am; + +static void ERTS_INLINE atom_init(Eterm *atom, char *name) +{ + *atom = am_atom_put(name, strlen(name)); +} +#define AM_INIT(AM) atom_init(&am.AM, #AM) + +static void init_atoms(void) +{ + erts_mtx_lock(&am.init_mutex); + + if (!am.is_initialized) { + AM_INIT(total); + AM_INIT(total_sa); + AM_INIT(total_sua); + AM_INIT(used); + AM_INIT(used_sa); + AM_INIT(used_sua); + AM_INIT(max); + AM_INIT(allocated); + AM_INIT(reserved); + AM_INIT(sizes); + AM_INIT(free_segs); + AM_INIT(supercarrier); + AM_INIT(os); + AM_INIT(scs); + AM_INIT(sco); + AM_INIT(scrpm); + AM_INIT(scrfsd); + am.is_initialized = 1; + } + erts_mtx_unlock(&am.init_mutex); +}; + + +#ifdef HARD_DEBUG_MSEG +static void hard_dbg_mseg_init(void); +#endif + +void +erts_mmap_init(ErtsMMapInit *init) +{ + int virtual_map = 0; + char *start = NULL, *end = NULL; + UWord pagesize; +#if defined(__WIN32__) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + pagesize = (UWord) sysinfo.dwPageSize; +#elif defined(_SC_PAGESIZE) + pagesize = (UWord) sysconf(_SC_PAGESIZE); +#elif defined(HAVE_GETPAGESIZE) + pagesize = (UWord) getpagesize(); +#else +# error "Do not know how to get page size" +#endif +#if defined(HARD_DEBUG) || 0 + erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs); + erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco); + erts_fprintf(stderr, "erts_mmap: scrfsd = %i\n", init->scrfsd); +#endif + erts_page_inv_mask = pagesize - 1; + if (pagesize & erts_page_inv_mask) + erl_exit(-1, "erts_mmap: Invalid pagesize: %bpu\n", + pagesize); + + ERTS_MMAP_OP_RINGBUF_INIT(); + + erts_have_erts_mmap = 0; + + mmap_state.supercarrier = 0; + mmap_state.reserve_physical = reserve_noop; + mmap_state.unreserve_physical = unreserve_noop; + +#if HAVE_MMAP && !defined(MAP_ANON) + mmap_state.mmap_fd = open("/dev/zero", O_RDWR); + if (mmap_state.mmap_fd < 0) + erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n"); +#endif + + erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap"); + erts_mtx_init(&am.init_mutex, "mmap_init_atoms"); + +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (init->virtual_range.start) { + char *ptr; + UWord sz; + ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start); + end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end); + sz = end - ptr; + start = os_mmap_virtual(ptr, sz); + if (!start || start > ptr || start >= end) + erl_exit(-1, + "erts_mmap: Failed to create virtual range for super carrier\n"); + sz = start - ptr; + if (sz) + os_munmap(end, sz); + mmap_state.reserve_physical = os_reserve_physical; + mmap_state.unreserve_physical = os_unreserve_physical; + virtual_map = 1; + } + else +#endif + if (init->predefined_area.start) { + start = init->predefined_area.start; + end = init->predefined_area.end; + if (end != (void *) 0 && end < start) + end = start; + } +#if ERTS_HAVE_OS_MMAP + else if (init->scs) { + UWord sz; + sz = ERTS_PAGEALIGNED_CEILING(init->scs); +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (!init->scrpm) { + start = os_mmap_virtual(NULL, sz); + mmap_state.reserve_physical = os_reserve_physical; + mmap_state.unreserve_physical = os_unreserve_physical; + virtual_map = 1; + } + else +#endif + { + /* + * The whole supercarrier will by physically + * reserved all the time. + */ + start = os_mmap(NULL, sz, 1); + } + if (!start) + erl_exit(-1, + "erts_mmap: Failed to create super carrier of size %bpu MB\n", + init->scs/1024/1024); + end = start + sz; +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + if (!virtual_map) { + Uint32 *uip; + + for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++) + *uip = (Uint32) 0xdeadbeef; + } +#endif + } + if (!mmap_state.no_os_mmap) + erts_have_erts_mmap |= ERTS_HAVE_ERTS_OS_MMAP; +#endif + + mmap_state.no.free_seg_descs = 0; + mmap_state.no.free_segs.curr = 0; + mmap_state.no.free_segs.max = 0; + + mmap_state.size.supercarrier.total = 0; + mmap_state.size.supercarrier.used.total = 0; + mmap_state.size.supercarrier.used.sa = 0; + mmap_state.size.supercarrier.used.sua = 0; + mmap_state.size.os.used = 0; + + mmap_state.desc.new_area_hint = NULL; + + if (!start) { + mmap_state.sa.bot = NULL; + mmap_state.sua.top = NULL; + mmap_state.sa.bot = NULL; + mmap_state.sua.top = NULL; + mmap_state.no_os_mmap = 0; + mmap_state.supercarrier = 0; + } + else { + size_t desc_size; + + mmap_state.no_os_mmap = init->sco; + + desc_size = init->scrfsd; + if (desc_size < 100) + desc_size = 100; + desc_size *= sizeof(ErtsFreeSegDesc); + if ((desc_size + + ERTS_SUPERALIGNED_SIZE + + ERTS_PAGEALIGNED_SIZE) > end - start) + erl_exit(-1, "erts_mmap: No space for segments in super carrier\n"); + + mmap_state.sa.bot = start; + mmap_state.sa.bot += desc_size; + mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot); + mmap_state.sa.top = mmap_state.sa.bot; + mmap_state.sua.top = end; + mmap_state.sua.bot = mmap_state.sua.top; + + mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start); + + mmap_state.desc.free_list = NULL; + mmap_state.desc.reserved = 0; + + if (end == (void *) 0) { + /* + * Very unlikely, but we need a guarantee + * that `mmap_state.sua.top` always will + * compare as larger than all segment pointers + * into the super carrier... + */ + mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE; + mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE; +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE)) +#endif + add_free_desc_area(mmap_state.sua.top, end); + mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc); + } + + mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start); + + /* + * Area before (and after) super carrier + * will be used for free segment descritors. + */ +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start)) + erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n"); +#endif + mmap_state.desc.unused_start = start; + mmap_state.desc.unused_end = mmap_state.sa.bot; + mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start) + / sizeof(ErtsFreeSegDesc)); + + init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER); + init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER); + + mmap_state.supercarrier = 1; + erts_have_erts_mmap |= ERTS_HAVE_ERTS_SUPERCARRIER_MMAP; + + mmap_state.desc.new_area_hint = end; + + } + +#if !ERTS_HAVE_OS_MMAP + mmap_state.no_os_mmap = 1; +#endif + +#ifdef HARD_DEBUG_MSEG + hard_dbg_mseg_init(); +#endif +} + + +static ERTS_INLINE void +add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2) +{ + *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp); +} + +Eterm erts_mmap_info(int *print_to_p, + void *print_to_arg, + Eterm** hpp, Uint* szp, + struct erts_mmap_info_struct* emis) +{ + Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua }; + Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua }; + Eterm group[2]; + Eterm group_tags[] = { am.sizes, am.free_segs }; + Eterm list[2]; + Eterm list_tags[2]; /* { am.supercarrier, am.os } */ + int lix; + Eterm res = THE_NON_VALUE; + + if (!hpp) { + erts_smp_mtx_lock(&mmap_state.mtx); + emis->sizes[0] = mmap_state.size.supercarrier.total; + emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot; + emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot; + emis->sizes[3] = mmap_state.size.supercarrier.used.total; + emis->sizes[4] = mmap_state.size.supercarrier.used.sa; + emis->sizes[5] = mmap_state.size.supercarrier.used.sua; + + emis->segs[0] = mmap_state.no.free_segs.curr; + emis->segs[1] = mmap_state.no.free_segs.max; + emis->segs[2] = mmap_state.no.free_seg_descs; + emis->segs[3] = mmap_state.desc.reserved; + emis->segs[4] = mmap_state.sa.map.nseg; + emis->segs[5] = mmap_state.sua.map.nseg; + + emis->os_used = mmap_state.size.os.used; + erts_smp_mtx_unlock(&mmap_state.mtx); + } + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + if (mmap_state.supercarrier) { + const char* prefix = "supercarrier "; + erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]); + erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]); + erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]); + erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]); + erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]); + erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]); + erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]); + erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]); + erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]); + erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]); + erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]); + erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]); + } + if (!mmap_state.no_os_mmap) { + erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used); + } + } + + + if (hpp || szp) { + if (!am.is_initialized) { + init_atoms(); + } + + lix = 0; + if (mmap_state.supercarrier) { + group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, + sizeof(size_tags)/sizeof(Eterm), + size_tags, emis->sizes); + group[1] = erts_bld_atom_uword_2tup_list(hpp, szp, + sizeof(seg_tags)/sizeof(Eterm), + seg_tags, emis->segs); + list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group); + list_tags[lix] = am.supercarrier; + lix++; + } + + if (!mmap_state.no_os_mmap) { + group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, + 1, &am.used, &emis->os_used); + list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group); + list_tags[lix] = am.os; + lix++; + } + res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list); + } + return res; +} + +Eterm erts_mmap_info_options(char *prefix, + int *print_to_p, + void *print_to_arg, + Uint **hpp, + Uint *szp) +{ + const UWord scs = mmap_state.sua.top - mmap_state.sa.bot; + const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false; + const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false; + Eterm res = THE_NON_VALUE; + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + erts_print(to, arg, "%sscs: %bpu\n", prefix, scs); + if (mmap_state.supercarrier) { + erts_print(to, arg, "%ssco: %T\n", prefix, sco); + erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm); + erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mmap_state.desc.reserved); + } + } + + if (hpp || szp) { + if (!am.is_initialized) { + init_atoms(); + } + + res = NIL; + if (mmap_state.supercarrier) { + add_2tup(hpp, szp, &res, am.scrfsd, + erts_bld_uint(hpp,szp, mmap_state.desc.reserved)); + add_2tup(hpp, szp, &res, am.scrpm, scrpm); + add_2tup(hpp, szp, &res, am.sco, sco); + } + add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs)); + } + return res; +} + + +Eterm erts_mmap_debug_info(Process* p) +{ + if (mmap_state.supercarrier) { + ERTS_DECL_AM(sabot); + ERTS_DECL_AM(satop); + ERTS_DECL_AM(suabot); + ERTS_DECL_AM(suatop); + Eterm sa_list, sua_list, list; + Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop }; + UWord values[4]; + Eterm *hp, *hp_end; + Uint may_need; + const Uint PTR_BIG_SZ = HALFWORD_HEAP ? 3 : 2; + + erts_smp_mtx_lock(&mmap_state.mtx); + values[0] = (UWord)mmap_state.sa.bot; + values[1] = (UWord)mmap_state.sa.top; + values[2] = (UWord)mmap_state.sua.bot; + values[3] = (UWord)mmap_state.sua.top; + sa_list = build_free_seg_list(p, &mmap_state.sa.map); + sua_list = build_free_seg_list(p, &mmap_state.sua.map); + erts_smp_mtx_unlock(&mmap_state.mtx); + + may_need = 4*(2+3+PTR_BIG_SZ) + 2*(2+3); + hp = HAlloc(p, may_need); + hp_end = hp + may_need; + + list = erts_bld_atom_uword_2tup_list(&hp, NULL, + sizeof(values)/sizeof(*values), + tags, values); + + sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3; + sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3; + list = CONS(hp, sua_list, list); hp+=2; + list = CONS(hp, sa_list, list); hp+=2; + + ASSERT(hp <= hp_end); + HRelease(p, hp_end, hp); + return list; + } + else { + return am_undefined; + } +} + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * Debug functions * +\* */ + + +#ifdef HARD_DEBUG + +static int rbt_assert_is_member(RBTNode* root, RBTNode* node) +{ + while (node != root) { + RBT_ASSERT(parent(node)); + RBT_ASSERT(parent(node)->left == node || parent(node)->right == node); + node = parent(node); + } + return 1; +} + + +#if 0 +# define PRINT_TREE +#else +# undef PRINT_TREE +#endif + +#ifdef PRINT_TREE +static void print_tree(enum SortOrder order, RBTNode*); +#endif + +/* + * Checks that the order between parent and children are correct, + * and that the Red-Black Tree properies are satisfied. if size > 0, + * check_tree() returns the node that satisfies "address order first fit" + * + * The Red-Black Tree properies are: + * 1. Every node is either red or black. + * 2. Every leaf (NIL) is black. + * 3. If a node is red, then both its children are black. + * 4. Every simple path from a node to a descendant leaf + * contains the same number of black nodes. + * + */ + +struct check_arg_t { + RBTree* tree; + ErtsFreeSegDesc* prev_seg; + Uint size; + RBTNode *res; +}; +static void check_node_callback(RBTNode* x, void* arg); + + +static RBTNode * +check_tree(RBTree* tree, Uint size) +{ + struct check_arg_t carg; + carg.tree = tree; + carg.prev_seg = NULL; + carg.size = size; + carg.res = NULL; + +#ifdef PRINT_TREE + print_tree(tree->order, tree->root); +#endif + + if (!tree->root) + return NULL; + + RBT_ASSERT(IS_BLACK(tree->root)); + RBT_ASSERT(!parent(tree->root)); + + rbt_foreach_node(tree, check_node_callback, &carg, 0); + + return carg.res; +} + +static void check_node_callback(RBTNode* x, void* arg) +{ + struct check_arg_t* a = (struct check_arg_t*) arg; + ErtsFreeSegDesc* seg; + + if (IS_RED(x)) { + RBT_ASSERT(IS_BLACK(x->right)); + RBT_ASSERT(IS_BLACK(x->left)); + } + + RBT_ASSERT(parent(x) || x == a->tree->root); + + if (x->left) { + RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0); + } + if (x->right) { + RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0); + } + + seg = node_to_desc(a->tree->order, x); + RBT_ASSERT(seg->start < seg->end); + if (a->size && (seg->end - seg->start) >= a->size) { + if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) { + a->res = x; + } + } + if (a->tree->order == ADDR_ORDER) { + RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start); + a->prev_seg = seg; + } +} + +#endif /* HARD_DEBUG */ + + +#ifdef PRINT_TREE +#define INDENT_STEP 2 + +#include <stdio.h> + +static void +print_tree_aux(enum SortOrder order, RBTNode *x, int indent) +{ + int i; + + if (x) { + ErtsFreeSegDesc* desc = node_to_desc(order, x); + print_tree_aux(order, x->right, indent + INDENT_STEP); + for (i = 0; i < indent; i++) { + putc(' ', stderr); + } + fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n", + IS_BLACK(x) ? "BLACK" : "RED", + desc->end - desc->start, desc->start, desc->end, desc); + print_tree_aux(order, x->left, indent + INDENT_STEP); + } +} + + +static void +print_tree(enum SortOrder order, RBTNode* root) +{ + fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]); + print_tree_aux(order, root, 0); + fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]); +} + +#endif /* PRINT_TREE */ + + +#ifdef FREE_SEG_API_SMOKE_TEST + +void test_it(void) +{ + ErtsFreeSegMap map; + ErtsFreeSegDesc *desc, *under, *over, *d1, *d2; + const int i = 1; /* reverse addr order */ + + { + init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER); + + insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + desc = lookup_free_seg(&map, 0x500); + ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L)); + + desc = lookup_free_seg(&map, 0x1500); + ERTS_ASSERT(desc->start == (char*)0x15000); + + adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over); + ERTS_ASSERT(!under && !over); + + adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over); + ERTS_ASSERT(!under && over->start == (char*)0x8000); + + adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over); + ERTS_ASSERT(under->end == (char*)0x10000 && !over); + + adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over); + ERTS_ASSERT(!under && !over); + + adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over); + ERTS_ASSERT(!under && over && over->start == (char*)0x11000); + + adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over); + ERTS_ASSERT(under && under->end == (char*)0x12000 && !over); + + adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over); + ERTS_ASSERT(under && under->end == (char*)0x12000 && + over && over->start == (char*)0x13000); + + adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over); + ERTS_ASSERT(!under && over && over->start == (char*)0x13000); + + d1 = lookup_free_seg(&map, 0x500); + ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L)); + + resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + d2 = lookup_free_seg(&map, 0x1200); + ERTS_ASSERT(d2 == d1); + + delete_free_seg(&map, d1); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + d1 = lookup_free_seg(&map, 0x1200); + ERTS_ASSERT(d1->start == (char*)0x15000); + } +} + +#endif /* FREE_SEG_API_SMOKE_TEST */ + + +#ifdef HARD_DEBUG_MSEG + +/* + * Debug stuff used by erl_mseg to check that it does the right thing. + * The reason for keeping it here is that we (ab)use the rb-tree code + * for keeping track of *allocated* segments. + */ + +typedef struct ErtsFreeSegDesc_fake_ { + /*RBTNode snode; Save memory by skipping unused size tree node */ + RBTNode anode; /* node in 'atree' */ + union { + char* start; + struct ErtsFreeSegDesc_fake_* next_free; + }u; + char* end; +}ErtsFreeSegDesc_fake; + +static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000]; +static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first; +RBTree hard_dbg_mseg_tree; + +static erts_mtx_t hard_dbg_mseg_mtx; + +static void hard_dbg_mseg_init(void) +{ + ErtsFreeSegDesc_fake* p; + + erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg"); + hard_dbg_mseg_tree.root = NULL; + hard_dbg_mseg_tree.order = ADDR_ORDER; + + p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) / + sizeof(*hard_dbg_mseg_desc_pool)) - 1]; + p->u.next_free = NULL; + while (--p >= hard_dbg_mseg_desc_pool) { + p->u.next_free = (p+1); + } + hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0]; +} + +static ErtsFreeSegDesc* hard_dbg_alloc_desc(void) +{ + ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first; + ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors"); + hard_dbg_mseg_desc_first = p->u.next_free; + + /* Creative pointer arithmetic to return something that looks like + * a ErtsFreeSegDesc as long as we don't use the absent 'snode'. + */ + return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode)); +} + +static void hard_dbg_free_desc(ErtsFreeSegDesc* desc) +{ + ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode; + memset(p, 0xfe, sizeof(*p)); + p->u.next_free = hard_dbg_mseg_desc_first; + hard_dbg_mseg_desc_first = p; +} + +static void check_seg_writable(void* seg, UWord sz) +{ + UWord* seg_end = (UWord*)((char*)seg + sz); + volatile UWord* p; + ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg)); + ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz)); + for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) { + UWord write_back = *p; + *p = 0xfade2b1acc; + *p = write_back; + } +} + +void hard_dbg_insert_mseg(void* seg, UWord sz) +{ + check_seg_writable(seg, sz); + erts_mtx_lock(&hard_dbg_mseg_mtx); + { + ErtsFreeSegDesc *desc = hard_dbg_alloc_desc(); + RBTNode *prev, *next; + desc->start = (char*)seg; + desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */ + rbt_insert(&hard_dbg_mseg_tree, &desc->anode); + prev = rbt_prev_node(&desc->anode); + next = rbt_next_node(&desc->anode); + ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start); + ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end); + } + erts_mtx_unlock(&hard_dbg_mseg_mtx); +} + +static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start) +{ + RBTNode* x = tree->root; + + while (x) { + ErtsFreeSegDesc* desc = anode_to_desc(x); + if (start < desc->start) { + x = x->left; + } + else if (start > desc->start) { + ERTS_ASSERT(start > desc->end); + x = x->right; + } + else + return desc; + } + return NULL; +} + +void hard_dbg_remove_mseg(void* seg, UWord sz) +{ + check_seg_writable(seg, sz); + erts_mtx_lock(&hard_dbg_mseg_mtx); + { + ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg); + ERTS_ASSERT(desc); + ERTS_ASSERT(desc->start == (char*)seg); + ERTS_ASSERT(desc->end == (char*)seg + sz - 1); + + rbt_delete(&hard_dbg_mseg_tree, &desc->anode); + hard_dbg_free_desc(desc); + } + erts_mtx_unlock(&hard_dbg_mseg_mtx); +} + +#endif /* HARD_DEBUG_MSEG */ diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h new file mode 100644 index 0000000000..778a8e0e80 --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap.h @@ -0,0 +1,134 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifndef ERL_MMAP_H__ +#define ERL_MMAP_H__ + +#include "sys.h" + +#define ERTS_MMAP_SUPERALIGNED_BITS (18) +/* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */ + +#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0) +#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1) +#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2) + +#define ERTS_HAVE_ERTS_OS_MMAP (1 << 0) +#define ERTS_HAVE_ERTS_SUPERCARRIER_MMAP (1 << 1) +extern int erts_have_erts_mmap; +extern UWord erts_page_inv_mask; + +typedef struct { + struct { + char *start; + char *end; + } virtual_range; + struct { + char *start; + char *end; + } predefined_area; + UWord scs; /* super carrier size */ + int sco; /* super carrier only? */ + UWord scrfsd; /* super carrier reserved free segment descriptors */ + int scrpm; /* super carrier reserve physical memory */ +}ErtsMMapInit; + +#define ERTS_MMAP_INIT_DEFAULT_INITER \ + {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1} + +void *erts_mmap(Uint32 flags, UWord *sizep); +void erts_munmap(Uint32 flags, void *ptr, UWord size); +void *erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep); +int erts_mmap_in_supercarrier(void *ptr); +void erts_mmap_init(ErtsMMapInit*); +struct erts_mmap_info_struct +{ + UWord sizes[6]; + UWord segs[6]; + UWord os_used; +}; +Eterm erts_mmap_info(int *print_to_p, void *print_to_arg, + Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*); +Eterm erts_mmap_info_options(char *prefix, int *print_to_p, void *print_to_arg, + Uint **hpp, Uint *szp); +struct process; +Eterm erts_mmap_debug_info(struct process*); + +#define ERTS_SUPERALIGNED_SIZE \ + (1 << ERTS_MMAP_SUPERALIGNED_BITS) +#define ERTS_INV_SUPERALIGNED_MASK \ + ((UWord) (ERTS_SUPERALIGNED_SIZE - 1)) +#define ERTS_SUPERALIGNED_MASK \ + (~ERTS_INV_SUPERALIGNED_MASK) +#define ERTS_SUPERALIGNED_FLOOR(X) \ + (((UWord) (X)) & ERTS_SUPERALIGNED_MASK) +#define ERTS_SUPERALIGNED_CEILING(X) \ + ERTS_SUPERALIGNED_FLOOR((X) + ERTS_INV_SUPERALIGNED_MASK) +#define ERTS_IS_SUPERALIGNED(X) \ + (((UWord) (X) & ERTS_INV_SUPERALIGNED_MASK) == 0) + +#define ERTS_INV_PAGEALIGNED_MASK \ + (erts_page_inv_mask) +#define ERTS_PAGEALIGNED_MASK \ + (~ERTS_INV_PAGEALIGNED_MASK) +#define ERTS_PAGEALIGNED_FLOOR(X) \ + (((UWord) (X)) & ERTS_PAGEALIGNED_MASK) +#define ERTS_PAGEALIGNED_CEILING(X) \ + ERTS_PAGEALIGNED_FLOOR((X) + ERTS_INV_PAGEALIGNED_MASK) +#define ERTS_IS_PAGEALIGNED(X) \ + (((UWord) (X) & ERTS_INV_PAGEALIGNED_MASK) == 0) +#define ERTS_PAGEALIGNED_SIZE \ + (ERTS_INV_PAGEALIGNED_MASK + 1) + +#ifndef HAVE_MMAP +# define HAVE_MMAP 0 +#endif +#ifndef HAVE_MREMAP +# define HAVE_MREMAP 0 +#endif +#if HAVE_MMAP +# define ERTS_HAVE_OS_MMAP 1 +# define ERTS_HAVE_GENUINE_OS_MMAP 1 +# if HAVE_MREMAP +# define ERTS_HAVE_OS_MREMAP 1 +# endif +# if defined(MAP_FIXED) && defined(MAP_NORESERVE) +# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1 +# endif +#endif + +#ifndef HAVE_VIRTUALALLOC +# define HAVE_VIRTUALALLOC 0 +#endif +#if HAVE_VIRTUALALLOC +# define ERTS_HAVE_OS_MMAP 1 +#endif + +/*#define HARD_DEBUG_MSEG*/ +#ifdef HARD_DEBUG_MSEG +# define HARD_DBG_INSERT_MSEG hard_dbg_insert_mseg +# define HARD_DBG_REMOVE_MSEG hard_dbg_remove_mseg +void hard_dbg_insert_mseg(void* seg, UWord sz); +void hard_dbg_remove_mseg(void* seg, UWord sz); +#else +# define HARD_DBG_INSERT_MSEG(SEG,SZ) +# define HARD_DBG_REMOVE_MSEG(SEG,SZ) +#endif + +#endif /* ERL_MMAP_H__ */ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index bd8ba82a5f..94a381e168 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -100,45 +100,6 @@ static int atoms_initialized; typedef struct mem_kind_t MemKind; -#if HALFWORD_HEAP -static int initialize_pmmap(void); -static void *pmmap(size_t size); -static int pmunmap(void *p, size_t size); -static void *pmremap(void *old_address, size_t old_size, - size_t new_size); -#endif - -#if HAVE_MMAP -/* Mmap ... */ - -#define MMAP_PROT (PROT_READ|PROT_WRITE) - - -#ifdef MAP_ANON -# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define MMAP_FD (-1) -#else -# define MMAP_FLAGS (MAP_PRIVATE) -# define MMAP_FD mmap_fd -static int mmap_fd; -#endif - -#if HAVE_MREMAP -# define HAVE_MSEG_RECREATE 1 -#else -# define HAVE_MSEG_RECREATE 0 -#endif - -#if HALFWORD_HEAP -#define CAN_PARTLY_DESTROY 0 -#else -#define CAN_PARTLY_DESTROY 1 -#endif -#else /* #if HAVE_MMAP */ -#define CAN_PARTLY_DESTROY 0 -#error "Not supported" -#endif /* #if HAVE_MMAP */ - const ErtsMsegOpt_t erts_mseg_default_opt = { 1, /* Use cache */ 1, /* Preserv data */ @@ -163,9 +124,7 @@ typedef struct { CallCounter create; CallCounter create_resize; CallCounter destroy; -#if HAVE_MSEG_RECREATE CallCounter recreate; -#endif CallCounter clear_cache; CallCounter check_cache; } ErtsMsegCalls; @@ -173,7 +132,7 @@ typedef struct { typedef struct cache_t_ cache_t; struct cache_t_ { - Uint size; + UWord size; void *seg; cache_t *next; cache_t *prev; @@ -236,11 +195,6 @@ struct ErtsMsegAllctr_t_ { Uint rel_max_cache_bad_fit; ErtsMsegCalls calls; - -#if CAN_PARTLY_DESTROY - Uint min_seg_size; -#endif - }; typedef union { @@ -344,69 +298,31 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) { } } -/* remove ErtsMsegAllctr_t from arguments? - * only used for statistics - */ -static ERTS_INLINE void * -mmap_align(ErtsMsegAllctr_t *ma, void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - - void *p, *q; - UWord d; - - p = mmap(addr, length, prot, flags, fd, offset); - - if (MAP_IS_ALIGNED(p) || p == MAP_FAILED) - return p; - - if (ma) - INC_CC(ma, create_resize); - - munmap(p, length); - - if ((p = mmap(addr, length + MSEG_ALIGNED_SIZE, prot, flags, fd, offset)) == MAP_FAILED) - return MAP_FAILED; - - q = (void *)ALIGNED_CEILING(p); - d = q - p; - - if (d > 0) - munmap(p, d); - - if (MSEG_ALIGNED_SIZE - d > 0) - munmap((void *) (q + length), MSEG_ALIGNED_SIZE - d); - - return q; -} +/* #define ERTS_PRINT_ERTS_MMAP */ static ERTS_INLINE void * -mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size) +mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep) { +#ifdef ERTS_PRINT_ERTS_MMAP + UWord req_size = *sizep; +#endif void *seg; - ASSERT(size % MSEG_ALIGNED_SIZE == 0); - + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - seg = pmmap(size); - if ((unsigned long) seg & CHECK_POINTER_MASK) { - erts_fprintf(stderr,"Pointer mask failure (0x%08lx)\n",(unsigned long) seg); - return NULL; - } - } else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#if HAVE_MMAP - { - seg = (void *) mmap_align(ma, (void *) 0, (size_t) size, - MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0); - if (seg == (void *) MAP_FAILED) - seg = NULL; - - ASSERT(MAP_IS_ALIGNED(seg) || !seg); - } -#else -# error "Missing mseg_create() implementation" + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + + seg = erts_mmap(mmap_flags, sizep); + +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg, + (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + req_size, *sizep); #endif - } INC_CC(ma, create); @@ -414,91 +330,55 @@ mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size) } static ERTS_INLINE void -mseg_destroy(ErtsMsegAllctr_t *ma, MemKind* mk, void *seg, Uint size) { - ERTS_DECLARE_DUMMY(int res); - +mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord size) { + + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - res = pmunmap((void *) seg, size); - } - else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#ifdef HAVE_MMAP - res = munmap((void *) seg, size); -#else -# error "Missing mseg_destroy() implementation" + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + + erts_munmap(mmap_flags, seg_p, size); +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n", + (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + seg_p, *size); #endif - } - - ASSERT(size % MSEG_ALIGNED_SIZE == 0); - ASSERT(res == 0); - INC_CC(ma, destroy); } -#if HAVE_MSEG_RECREATE -#if defined(__NetBsd__) -#define MREMAP_FLAGS (0) -#else -#define MREMAP_FLAGS (MREMAP_MAYMOVE) -#endif - - -/* mseg_recreate - * May return *unaligned* segments as in address not aligned to MSEG_ALIGNMENT - * it is still page aligned - * - * This is fine for single block carriers as long as we don't cache misaligned - * segments (since multiblock carriers may use them) - * - * For multiblock carriers we *need* MSEG_ALIGNMENT but mbc's will never be - * reallocated. - * - * This should probably be fixed the following way: - * 1) Use an option to segment allocation - NEED_ALIGNMENT - * 2) Add mremap_align which takes care of aligning a new a mremaped area - * 3) Fix the cache to handle of aligned and unaligned segments - */ - static ERTS_INLINE void * -mseg_recreate(ErtsMsegAllctr_t *ma, MemKind* mk, void *old_seg, Uint old_size, Uint new_size) +mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWord old_size, UWord *sizep) { +#ifdef ERTS_PRINT_ERTS_MMAP + UWord req_size = *sizep; +#endif void *new_seg; - - ASSERT(old_size % MSEG_ALIGNED_SIZE == 0); - ASSERT(new_size % MSEG_ALIGNED_SIZE == 0); - + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - new_seg = (void *) pmremap((void *) old_seg, - (size_t) old_size, - (size_t) new_size); - } - else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#if HAVE_MREMAP -#if defined(__NetBSD__) - new_seg = mremap(old_seg, (size_t)old_size, NULL, new_size, MREMAP_FLAGS); -#else - new_seg = mremap(old_seg, (size_t)old_size, (size_t)new_size, MREMAP_FLAGS); -#endif - if (new_seg == (void *) MAP_FAILED) - new_seg = NULL; -#else -#error "Missing mseg_recreate() implementation" -#endif - } + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + new_seg = erts_mremap(mmap_flags, old_seg, old_size, sizep); + +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n", + new_seg, (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + old_seg, old_size, req_size, *sizep); +#endif INC_CC(ma, recreate); return new_seg; } -#endif /* #if HAVE_MSEG_RECREATE */ - #ifdef DEBUG #define ERTS_DBG_MA_CHK_THR_ACCESS(MA) \ do { \ @@ -528,7 +408,7 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) { c->prev = c; } -static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Uint flags) { +static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, Uint flags) { cache_t *c; ERTS_DBG_MK_CHK_THR_ACCESS(mk); @@ -566,14 +446,13 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui return 1; } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) { - /* No free slots. * Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */ c = erts_circleq_tail(&(mk->cache_unpowered_node)); erts_circleq_remove(c); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, ERTS_MSEG_FLG_NONE, mk, c->seg, c->size); mseg_cache_clear_node(c); c->seg = seg; @@ -599,7 +478,8 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui c = erts_circleq_tail(&(mk->cache_powered_node[i])); erts_circleq_remove(c); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, c->seg, c->size); + mseg_cache_clear_node(c); c->seg = seg; @@ -614,18 +494,18 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui return 0; } -static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags) { +static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flags) { - Uint size = *size_p; + UWord size = *size_p; ERTS_DBG_MK_CHK_THR_ACCESS(mk); if (MSEG_FLG_IS_2POW(flags)) { int i, ix = SIZE_TO_CACHE_AREA_IDX(size); - void *seg; + char *seg; cache_t *c; - Uint csize; + UWord csize; ASSERT(IS_2POW(size)); @@ -641,7 +521,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags ASSERT(MAP_IS_ALIGNED(c->seg)); csize = c->size; - seg = c->seg; + seg = (char*) c->seg; mk->cache_size--; mk->cache_hits++; @@ -652,9 +532,8 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags ASSERT(!(mk->cache_size < 0)); - if (csize != size) { - mseg_destroy(mk->ma, mk, (char *)seg + size, csize - size); - } + if (csize != size) + mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, seg + size, csize - size); return seg; } @@ -663,10 +542,10 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags void *seg; cache_t *c; cache_t *best = NULL; - Uint bdiff = 0; - Uint csize; - Uint bad_max_abs = mk->ma->abs_max_cache_bad_fit; - Uint bad_max_rel = mk->ma->rel_max_cache_bad_fit; + UWord bdiff = 0; + UWord csize; + UWord bad_max_abs = mk->ma->abs_max_cache_bad_fit; + UWord bad_max_rel = mk->ma->rel_max_cache_bad_fit; erts_circleq_foreach(c, &(mk->cache_unpowered_node)) { csize = c->size; @@ -708,7 +587,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags /* Use current cache placement for remaining segment space */ - best->seg = seg + size; + best->seg = ((char *)seg) + size; best->size = csize - size; ASSERT((size % GET_PAGE_SIZE) == 0); @@ -728,7 +607,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags * using callbacks from aux-work in the scheduler. */ -static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { cache_t *c = NULL; c = erts_circleq_tail(head); @@ -737,7 +616,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, flags, mk, c->seg, c->size); mseg_cache_clear_node(c); erts_circleq_push_head(&(mk->cache_free), c); @@ -749,7 +628,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h return mk->cache_size; } -static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { cache_t *c = NULL; while (!erts_circleq_is_empty(head)) { @@ -760,7 +639,7 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, flags, mk, c->seg, c->size); mseg_cache_clear_node(c); erts_circleq_push_head(&(mk->cache_free), c); @@ -788,11 +667,11 @@ static Uint mseg_check_memkind_cache(MemKind *mk) { for (i = 0; i < CACHE_AREAS; i++) { if (!erts_circleq_is_empty(&(mk->cache_powered_node[i]))) - return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_powered_node[i])); + return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); } if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_unpowered_node)); + return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); return 0; } @@ -851,12 +730,12 @@ static void mseg_clear_memkind_cache(MemKind *mk) { if (erts_circleq_is_empty(&(mk->cache_powered_node[i]))) continue; - mseg_drop_memkind_cache_size(mk, &(mk->cache_powered_node[i])); + mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i]))); } /* drop varied caches */ if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - mseg_drop_memkind_cache_size(mk, &(mk->cache_unpowered_node)); + mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node))); ASSERT(mk->cache_size == 0); @@ -896,36 +775,35 @@ static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma, } static void * -mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, Uint *size_p, +mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt) { - Uint size; + UWord size; void *seg; MemKind* mk = memkind(ma, opt); INC_CC(ma, alloc); - /* Carrier align */ - size = ALIGNED_CEILING(*size_p); - - /* Cache optim (if applicable) */ - if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(size)) - size = ceil_2pow(size); + if (!MSEG_FLG_IS_2POW(flags)) + size = ERTS_PAGEALIGNED_CEILING(*size_p); + else { + size = ALIGNED_CEILING(*size_p); + if (!IS_2POW(size)) { + /* Cache optim (if applicable) */ + size = ceil_2pow(size); + } + } -#if CAN_PARTLY_DESTROY - if (size < ma->min_seg_size) - ma->min_seg_size = size; -#endif - if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL) goto done; - if ((seg = mseg_create(ma, mk, size)) == NULL) - size = 0; + seg = mseg_create(ma, flags, mk, &size); + if (!seg) + *size_p = 0; + else { done: - *size_p = size; - if (seg) { + *size_p = size; if (erts_mtrace_enabled) erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size); @@ -937,7 +815,7 @@ done: static void -mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size, +mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size, Uint flags, const ErtsMsegOpt_t *opt) { MemKind* mk = memkind(ma, opt); @@ -952,7 +830,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size, if (erts_mtrace_enabled) erts_mtrace_crr_free(atype, SEGTYPE, seg); - mseg_destroy(ma, mk, seg, size); + mseg_destroy(ma, flags, mk, seg, size); done: @@ -961,11 +839,11 @@ done: static void * mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) + UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) { MemKind* mk; void *new_seg; - Uint new_size; + UWord new_size; /* Just allocate a new segment if we didn't have one before */ if (!seg || !old_size) { @@ -985,90 +863,47 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, mk = memkind(ma, opt); new_seg = seg; - /* Carrier align */ - new_size = ALIGNED_CEILING(*new_size_p); - - /* Cache optim (if applicable) */ - if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(new_size)) - new_size = ceil_2pow(new_size); - - if (new_size == old_size) - ; - else if (new_size < old_size) { - Uint shrink_sz = old_size - new_size; + if (!MSEG_FLG_IS_2POW(flags)) + new_size = ERTS_PAGEALIGNED_CEILING(*new_size_p); + else { + new_size = ALIGNED_CEILING(*new_size_p); + if (!IS_2POW(new_size)) { + /* Cache optim (if applicable) */ + new_size = ceil_2pow(new_size); + } + } -#if CAN_PARTLY_DESTROY - if (new_size < ma->min_seg_size) - ma->min_seg_size = new_size; -#endif - /* +M<S>rsbcst <ratio> */ - if (shrink_sz < opt->abs_shrink_th - && 100*shrink_sz < opt->rel_shrink_th*old_size) { - new_size = old_size; + if (new_size > old_size) { + if (opt->preserv) { + new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); + if (!new_seg) + new_size = old_size; } else { - -#if CAN_PARTLY_DESTROY - - if (erts_mtrace_enabled) - erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); - - mseg_destroy(ma, mk, ((char *) seg) + new_size, shrink_sz); - -#elif HAVE_MSEG_RECREATE - goto do_recreate; -#else + mseg_dealloc(ma, atype, seg, old_size, flags, opt); new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - if (!new_seg) - new_size = old_size; - else { - sys_memcpy(((char *) new_seg), - ((char *) seg), - MIN(new_size, old_size)); - mseg_dealloc(ma, atype, seg, old_size, opt); - } -#endif + new_size = 0; } } - else { + else if (new_size < old_size) { + UWord shrink_sz = old_size - new_size; - if (!opt->preserv) { - mseg_dealloc(ma, atype, seg, old_size, flags, opt); - new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); + /* +M<S>rsbcst <ratio> */ + if (shrink_sz < opt->abs_shrink_th + && 100*shrink_sz < opt->rel_shrink_th*old_size) { + new_size = old_size; } else { -#if HAVE_MSEG_RECREATE -#if !CAN_PARTLY_DESTROY - do_recreate: -#endif - new_seg = mseg_recreate(ma, mk, (void *) seg, old_size, new_size); - /* ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - * will not always be aligned and it ok for now - */ - - if (erts_mtrace_enabled) - erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); + new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); if (!new_seg) new_size = old_size; -#else - new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - - if (!new_seg) - new_size = old_size; - else { - sys_memcpy(((char *) new_seg), ((char *) seg), MIN(new_size, old_size)); - mseg_dealloc(ma, atype, seg, old_size, flags, opt); - } -#endif } } + if (erts_mtrace_enabled) + erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); + INC_CC(ma, realloc); ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size)); @@ -1106,9 +941,7 @@ static struct { Eterm mseg_create; Eterm mseg_create_resize; Eterm mseg_destroy; -#if HAVE_MSEG_RECREATE Eterm mseg_recreate; -#endif Eterm mseg_clear_cache; Eterm mseg_check_cache; @@ -1129,8 +962,6 @@ init_atoms(ErtsMsegAllctr_t *ma) #ifdef DEBUG Eterm *atom; #endif - - ERTS_MSEG_UNLOCK(ma); erts_mtx_lock(&init_atoms_mutex); if (!atoms_initialized) { @@ -1163,9 +994,7 @@ init_atoms(ErtsMsegAllctr_t *ma) AM_INIT(mseg_create); AM_INIT(mseg_create_resize); AM_INIT(mseg_destroy); -#if HAVE_MSEG_RECREATE AM_INIT(mseg_recreate); -#endif AM_INIT(mseg_clear_cache); AM_INIT(mseg_check_cache); @@ -1176,7 +1005,6 @@ init_atoms(ErtsMsegAllctr_t *ma) #endif } - ERTS_MSEG_LOCK(ma); atoms_initialized = 1; erts_mtx_unlock(&init_atoms_mutex); } @@ -1239,7 +1067,9 @@ info_options(ErtsMsegAllctr_t *ma, Uint **hpp, Uint *szp) { - Eterm res = THE_NON_VALUE; + Eterm res; + + res = erts_mmap_info_options(prefix, print_to_p, print_to_arg, hpp, szp); if (print_to_p) { int to = *print_to_p; @@ -1254,7 +1084,6 @@ info_options(ErtsMsegAllctr_t *ma, if (!atoms_initialized) init_atoms(ma); - res = NIL; add_2tup(hpp, szp, &res, am.mcs, bld_uint(hpp, szp, ma->max_cache_size)); @@ -1293,9 +1122,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp PRINT_CC(to, arg, create); PRINT_CC(to, arg, create_resize); PRINT_CC(to, arg, destroy); -#if HAVE_MSEG_RECREATE PRINT_CC(to, arg, recreate); -#endif PRINT_CC(to, arg, clear_cache); PRINT_CC(to, arg, check_cache); @@ -1316,12 +1143,10 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp bld_unstable_uint(hpp, szp, ma->calls.clear_cache.giga_no), bld_unstable_uint(hpp, szp, ma->calls.clear_cache.no)); -#if HAVE_MSEG_RECREATE add_3tup(hpp, szp, &res, am.mseg_recreate, bld_unstable_uint(hpp, szp, ma->calls.recreate.giga_no), bld_unstable_uint(hpp, szp, ma->calls.recreate.no)); -#endif add_3tup(hpp, szp, &res, am.mseg_destroy, bld_unstable_uint(hpp, szp, ma->calls.destroy.giga_no), @@ -1465,14 +1290,8 @@ erts_mseg_info_options(int ix, ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix); Eterm res; - ERTS_MSEG_LOCK(ma); - - ERTS_DBG_MA_CHK_THR_ACCESS(ma); - res = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp); - ERTS_MSEG_UNLOCK(ma); - return res; } @@ -1490,10 +1309,6 @@ erts_mseg_info(int ix, Eterm values[4]; Uint n = 0; - ERTS_MSEG_LOCK(ma); - - ERTS_DBG_MA_CHK_THR_ACCESS(ma); - if (hpp || szp) { if (!atoms_initialized) @@ -1506,6 +1321,10 @@ erts_mseg_info(int ix, } values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp); values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp); + + ERTS_MSEG_LOCK(ma); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); + #if HALFWORD_HEAP values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); @@ -1521,7 +1340,7 @@ erts_mseg_info(int ix, } void * -erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMsegOpt_t *opt) +erts_mseg_alloc_opt(ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); void *seg; @@ -1529,20 +1348,23 @@ erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMse ERTS_DBG_MA_CHK_THR_ACCESS(ma); seg = mseg_alloc(ma, atype, size_p, flags, opt); ERTS_MSEG_UNLOCK(ma); + HARD_DBG_INSERT_MSEG(seg, *size_p); return seg; } void * -erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p, Uint flags) +erts_mseg_alloc(ErtsAlcType_t atype, UWord *size_p, Uint flags) { return erts_mseg_alloc_opt(atype, size_p, flags, &erts_mseg_default_opt); } void erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, - Uint size, Uint flags, const ErtsMsegOpt_t *opt) + UWord size, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); + + HARD_DBG_REMOVE_MSEG(seg, size); ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); mseg_dealloc(ma, atype, seg, size, flags, opt); @@ -1550,29 +1372,32 @@ erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, } void -erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size, Uint flags) +erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, UWord size, Uint flags) { erts_mseg_dealloc_opt(atype, seg, size, flags, &erts_mseg_default_opt); } void * erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, + UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); void *new_seg; + + HARD_DBG_REMOVE_MSEG(seg, old_size); ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); new_seg = mseg_realloc(ma, atype, seg, old_size, new_size_p, flags, opt); ERTS_MSEG_UNLOCK(ma); + HARD_DBG_INSERT_MSEG(new_seg, *new_size_p); return new_seg; } void * erts_mseg_realloc(ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, Uint flags) + UWord old_size, UWord *new_size_p, Uint flags) { return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p, flags, &erts_mseg_default_opt); @@ -1662,16 +1487,17 @@ erts_mseg_init(ErtsMsegInit_t *init) erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms"); -#if HAVE_MMAP && !defined(MAP_ANON) - mmap_fd = open("/dev/zero", O_RDWR); - if (mmap_fd < 0) - erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n"); -#endif +#if HALFWORD_HEAP + if (sizeof(void *) != 8) + erl_exit(-1,"Halfword emulator cannot be run in 32bit mode"); -#if HAVE_MMAP && HALFWORD_HEAP - initialize_pmmap(); + init->mmap.virtual_range.start = (char *) sbrk(0); + init->mmap.virtual_range.end = (char *) 0x100000000UL; + init->mmap.sco = 0; #endif + erts_mmap_init(&init->mmap); + if (!IS_2POW(GET_PAGE_SIZE)) erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE); @@ -1712,10 +1538,6 @@ erts_mseg_init(ErtsMsegInit_t *init) #endif sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls)); - -#if CAN_PARTLY_DESTROY - ma->min_seg_size = ~((Uint) 0); -#endif } } @@ -1749,447 +1571,42 @@ erts_mseg_late_init(void) #endif /* #if HAVE_ERTS_MSEG */ -unsigned long -erts_mseg_test(unsigned long op, - unsigned long a1, - unsigned long a2, - unsigned long a3) +UWord +erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3) { switch (op) { #if HAVE_ERTS_MSEG case 0x400: /* Have erts_mseg */ - return (unsigned long) 1; + return (UWord) 1; case 0x401: - return (unsigned long) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1, (Uint) 0); + return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (UWord *) a1, (Uint) 0); case 0x402: erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, (Uint) 0); - return (unsigned long) 0; + return (UWord) 0; case 0x403: - return (unsigned long) erts_mseg_realloc(ERTS_ALC_A_INVALID, + return (UWord) erts_mseg_realloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, - (Uint *) a3, + (UWord *) a3, (Uint) 0); case 0x404: erts_mseg_clear_cache(); - return (unsigned long) 0; + return (UWord) 0; case 0x405: - return (unsigned long) erts_mseg_no(&erts_mseg_default_opt); + return (UWord) erts_mseg_no(&erts_mseg_default_opt); case 0x406: { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(0); - unsigned long res; + UWord res; ERTS_MSEG_LOCK(ma); - res = (unsigned long) tot_cache_size(ma); + res = (UWord) tot_cache_size(ma); ERTS_MSEG_UNLOCK(ma); return res; } #else /* #if HAVE_ERTS_MSEG */ case 0x400: /* Have erts_mseg */ - return (unsigned long) 0; + return (UWord) 0; #endif /* #if HAVE_ERTS_MSEG */ - default: ASSERT(0); return ~((unsigned long) 0); - } - -} - - -#if HALFWORD_HEAP -/* - * Very simple page oriented mmap replacer. Works in the lower - * 32 bit address range of a 64bit program. - * Implements anonymous mmap mremap and munmap with address order first fit. - * The free list is expected to be very short... - * To be used for compressed pointers in Erlang halfword emulator - * implementation. The MacOS X version is more of a toy, it's not really - * for production as the halfword erlang VM relies on Linux specific memory - * mapping tricks. - */ - -/* #define HARDDEBUG 1 */ - -#ifdef HARDDEBUG -static void dump_freelist(void) -{ - FreeBlock *p = first; - - while (p) { - fprintf(stderr, "p = %p\r\np->num = %ld\r\np->next = %p\r\n\r\n", - (void *) p, (unsigned long) p->num, (void *) p->next); - p = p->next; - } -} - -#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) \ - fprintf(stderr,"Mapping of address %p with size %ld " \ - "does not map complete pages (%s:%d)\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) - -#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) \ - fprintf(stderr,"Mapping of address %p with size %ld " \ - "is not page aligned (%s:%d)\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) - -#define HARDDEBUG_MAP_FAILED(PTR, SZ) \ - fprintf(stderr, "Could not actually map memory " \ - "at address %p with size %ld (%s:%d) ..\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) -#else -#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) do{}while(0) -#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) do{}while(0) -#define HARDDEBUG_MAP_FAILED(PTR, SZ) do{}while(0) -#endif - - -#ifdef __APPLE__ -#define MAP_ANONYMOUS MAP_ANON -#endif - -#define INIT_LOCK() do {erts_mtx_init(&pmmap_mutex, "pmmap");} while(0) - -#define TAKE_LOCK() do {erts_mtx_lock(&pmmap_mutex);} while(0) - -#define RELEASE_LOCK() do {erts_mtx_unlock(&pmmap_mutex);} while(0) - -static erts_mtx_t pmmap_mutex; /* Also needed when !USE_THREADS */ - -typedef struct _free_block { - unsigned long num; /*pages*/ - struct _free_block *next; -} FreeBlock; - -/* Protect with lock */ -static FreeBlock *first; - -static void *do_map(void *ptr, size_t sz) -{ - void *res; - - if (ALIGNED_CEILING(sz) != sz) { - HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz); - return NULL; - } - - if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) { - HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz); - return NULL; - } - -#if HAVE_MMAP - res = mmap(ptr, sz, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS | MAP_FIXED, - -1 , 0); -#else -# error "Missing mmap support" -#endif - - if (res == MAP_FAILED) { - HARDDEBUG_MAP_FAILED(ptr, sz); - return NULL; - } - - return res; -} - -static int do_unmap(void *ptr, size_t sz) -{ - void *res; - - if (ALIGNED_CEILING(sz) != sz) { - HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz); - return 1; - } - - if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) { - HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz); - return 1; + default: ASSERT(0); return ~((UWord) 0); } - res = mmap(ptr, sz, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, - -1 , 0); - - if (res == MAP_FAILED) { - HARDDEBUG_MAP_FAILED(ptr, sz); - return 1; - } - - return 0; -} - -#ifdef __APPLE__ -/* - * The first 4 gig's are protected on Macos X for 64bit processes :( - * The range 0x1000000000 - 0x10FFFFFFFF is selected as an arbitrary - * value of a normally unused range... Real MMAP's will avoid - * it and all 32bit compressed pointers can be in that range... - * More expensive than on Linux where expansion of compressed - * poiters involves no masking (as they are in the first 4 gig's). - * It's also very uncertain if the MAP_NORESERVE flag really has - * any effect in MacOS X. Swap space may always be allocated... - */ -#define SET_RANGE_MIN() /* nothing */ -#define RANGE_MIN 0x1000000000UL -#define RANGE_MAX 0x1100000000UL -#define RANGE_MASK (RANGE_MIN) -#define EXTRA_MAP_FLAGS (MAP_FIXED) -#else -static size_t range_min; -#define SET_RANGE_MIN() do { range_min = (size_t) sbrk(0); } while (0) -#define RANGE_MIN range_min -#define RANGE_MAX 0x100000000UL -#define RANGE_MASK 0UL -#define EXTRA_MAP_FLAGS (0) -#endif - -static int initialize_pmmap(void) -{ - char *p,*q,*rptr; - size_t rsz; - FreeBlock *initial; - - SET_RANGE_MIN(); - if (sizeof(void *) != 8) { - erl_exit(1,"Halfword emulator cannot be run in 32bit mode"); - } - - p = (char *) RANGE_MIN; - q = (char *) RANGE_MAX; - - rsz = ALIGNED_FLOOR(q - p); - - rptr = mmap_align(NULL, (void *) p, rsz, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | - MAP_NORESERVE | EXTRA_MAP_FLAGS, - -1 , 0); -#ifdef HARDDEBUG - printf("p=%p, rsz = %ld, pages = %ld, got range = %p -> %p\r\n", - p, (unsigned long) rsz, (unsigned long) (rsz / MSEG_ALIGNED_SIZE), - (void *) rptr, (void*)(rptr + rsz)); -#endif - if ((UWord)(rptr + rsz) > RANGE_MAX) { - size_t rsz_trunc = RANGE_MAX - (UWord)rptr; -#ifdef HARDDEBUG - printf("Reducing mmap'ed memory from %lu to %lu Mb, reduced range = %p -> %p\r\n", - rsz/(1024*1024), rsz_trunc/(1024*1024), rptr, rptr+rsz_trunc); -#endif - munmap((void*)RANGE_MAX, rsz - rsz_trunc); - rsz = rsz_trunc; - } - if (!do_map(rptr, MSEG_ALIGNED_SIZE)) { - erl_exit(1,"Could not actually mmap first page for halfword emulator...\n"); - } - initial = (FreeBlock *) rptr; - initial->num = (rsz / MSEG_ALIGNED_SIZE); - initial->next = NULL; - first = initial; - INIT_LOCK(); - return 0; -} - -static void *pmmap(size_t size) -{ - size_t real_size = ALIGNED_CEILING(size); - size_t num_pages = real_size / MSEG_ALIGNED_SIZE; - FreeBlock **block; - FreeBlock *tail; - FreeBlock *res; - - TAKE_LOCK(); - - for (block = &first; - *block != NULL && (*block)->num < num_pages; - block = &((*block)->next)) - ; - if (!(*block)) { - RELEASE_LOCK(); - return NULL; - } - if ((*block)->num == num_pages) { - /* nice, perfect fit */ - res = *block; - *block = (*block)->next; - } else { - tail = (FreeBlock *) (((char *) ((void *) (*block))) + real_size); - if (!do_map(tail, MSEG_ALIGNED_SIZE)) { - HARDDEBUG_MAP_FAILED(tail, MSEG_ALIGNED_SIZE); - RELEASE_LOCK(); - return NULL; - } - tail->num = (*block)->num - num_pages; - tail->next = (*block)->next; - res = *block; - *block = tail; - } - - RELEASE_LOCK(); - - if (!do_map(res, real_size)) { - HARDDEBUG_MAP_FAILED(res, real_size); - return NULL; - } - - return (void *) res; -} - -static int pmunmap(void *p, size_t size) -{ - size_t real_size = ALIGNED_CEILING(size); - size_t num_pages = real_size / MSEG_ALIGNED_SIZE; - - FreeBlock *block; - FreeBlock *last; - FreeBlock *nb = (FreeBlock *) p; - - ASSERT(((unsigned long)p & CHECK_POINTER_MASK)==0); - - if (real_size > MSEG_ALIGNED_SIZE) { - if (do_unmap(((char *) p) + MSEG_ALIGNED_SIZE, real_size - MSEG_ALIGNED_SIZE)) { - return 1; - } - } - - TAKE_LOCK(); - - last = NULL; - block = first; - while(block != NULL && ((void *) block) < p) { - last = block; - block = block->next; - } - - if (block != NULL && - ((void *) block) == ((void *) (((char *) p) + real_size))) { - /* Merge new free block with following */ - nb->num = block->num + num_pages; - nb->next = block->next; - if (do_unmap(block, MSEG_ALIGNED_SIZE)) { - RELEASE_LOCK(); - return 1; - } - } else { - /* just link in */ - nb->num = num_pages; - nb->next = block; - } - if (last != NULL) { - if (p == ((void *) (((char *) last) + (last->num * MSEG_ALIGNED_SIZE)))) { - /* Merge with previous */ - last->num += nb->num; - last->next = nb->next; - if (do_unmap(nb, MSEG_ALIGNED_SIZE)) { - RELEASE_LOCK(); - return 1; - } - } else { - last->next = nb; - } - } else { - first = nb; - } - RELEASE_LOCK(); - return 0; -} - -static void *pmremap(void *old_address, size_t old_size, - size_t new_size) -{ - size_t new_real_size = ALIGNED_CEILING(new_size); - size_t new_num_pages = new_real_size / MSEG_ALIGNED_SIZE; - size_t old_real_size = ALIGNED_CEILING(old_size); - size_t old_num_pages = old_real_size / MSEG_ALIGNED_SIZE; - if (new_num_pages == old_num_pages) { - return old_address; - } else if (new_num_pages < old_num_pages) { /* Shrink */ - size_t nfb_pages = old_num_pages - new_num_pages; - size_t nfb_real_size = old_real_size - new_real_size; - void *vnfb = (void *) (((char *)old_address) + new_real_size); - FreeBlock *nfb = (FreeBlock *) vnfb; - FreeBlock **block; - TAKE_LOCK(); - for (block = &first; - *block != NULL && (*block) < nfb; - block = &((*block)->next)) - ; - if (!(*block) || - (*block) > ((FreeBlock *)(((char *) vnfb) + nfb_real_size))) { - /* Normal link in */ - if (nfb_pages > 1) { - if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE), - (nfb_pages - 1)*MSEG_ALIGNED_SIZE)) { - return NULL; - } - } - nfb->next = (*block); - nfb->num = nfb_pages; - (*block) = nfb; - } else { /* block merge */ - nfb->next = (*block)->next; - nfb->num = nfb_pages + (*block)->num; - /* unmap also the first page of the next freeblock */ - (*block) = nfb; - if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE), - nfb_pages*MSEG_ALIGNED_SIZE)) { - return NULL; - } - } - RELEASE_LOCK(); - return old_address; - } else { /* Enlarge */ - FreeBlock **block; - void *old_end = (void *) (((char *)old_address) + old_real_size); - TAKE_LOCK(); - for (block = &first; - *block != NULL && (*block) < (FreeBlock *) old_address; - block = &((*block)->next)) - ; - if ((*block) == NULL || old_end > ((void *) RANGE_MAX) || - (*block) != old_end || - (*block)->num < (new_num_pages - old_num_pages)) { - /* cannot extend */ - void *result; - RELEASE_LOCK(); - result = pmmap(new_size); - if (result == NULL) { - return NULL; - } - memcpy(result,old_address,old_size); - if (pmunmap(old_address,old_size)) { - /* Oups... */ - pmunmap(result,new_size); - return NULL; - } - return result; - } else { /* extend */ - size_t remaining_pages = (*block)->num - - (new_num_pages - old_num_pages); - if (!remaining_pages) { - void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE); - void *n = (*block)->next; - size_t x = ((*block)->num - 1) * MSEG_ALIGNED_SIZE; - if (x > 0) { - if (do_map(p,x) == NULL) { - RELEASE_LOCK(); - return NULL; - } - } - (*block) = n; - } else { - FreeBlock *nfb = (FreeBlock *) ((void *) - (((char *) old_address) + - new_real_size)); - void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE); - if (do_map(p,new_real_size - old_real_size) == NULL) { - RELEASE_LOCK(); - return NULL; - } - nfb->num = remaining_pages; - nfb->next = (*block)->next; - (*block) = nfb; - } - RELEASE_LOCK(); - return old_address; - } - } } -#endif /* HALFWORD_HEAP */ diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h index 3cab9e18da..2284b3f8f1 100644 --- a/erts/emulator/sys/common/erl_mseg.h +++ b/erts/emulator/sys/common/erl_mseg.h @@ -22,25 +22,25 @@ #include "sys.h" #include "erl_alloc_types.h" +#include "erl_mmap.h" -#ifndef HAVE_MMAP -# define HAVE_MMAP 0 -#endif -#ifndef HAVE_MREMAP -# define HAVE_MREMAP 0 -#endif - -#if HAVE_MMAP +/* + * We currently only enable mseg_alloc if we got + * a genuine mmap()/munmap() primitive. It is possible + * to utilize erts_mmap() withiout a mmap support but + * alloc_util needs to be prepared before we can do + * that. + */ +#ifdef ERTS_HAVE_GENUINE_OS_MMAP # define HAVE_ERTS_MSEG 1 -# define HAVE_SUPER_ALIGNED_MB_CARRIERS 1 +# define ERTS_HAVE_MSEG_SUPER_ALIGNED 1 #else # define HAVE_ERTS_MSEG 0 -# define HAVE_SUPER_ALIGNED_MB_CARRIERS 0 +# define ERTS_HAVE_MSEG_SUPER_ALIGNED 0 #endif -#if HAVE_SUPER_ALIGNED_MB_CARRIERS -# define MSEG_ALIGN_BITS (18) - /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */ +#if ERTS_HAVE_MSEG_SUPER_ALIGNED +# define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS #else /* If we don't use super aligned multiblock carriers * we will mmap with page size alignment (and thus use corresponding @@ -68,6 +68,7 @@ typedef struct { Uint rmcbf; Uint mcs; Uint nos; + ErtsMMapInit mmap; } ErtsMsegInit_t; #define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \ @@ -75,7 +76,8 @@ typedef struct { 4*1024*1024, /* amcbf: Absolute max cache bad fit */ \ 20, /* rmcbf: Relative max cache bad fit */ \ 10, /* mcs: Max cache size */ \ - 1000 /* cci: Cache check interval */ \ + 1000, /* cci: Cache check interval */ \ + ERTS_MMAP_INIT_DEFAULT_INITER \ } typedef struct { @@ -91,12 +93,12 @@ typedef struct { extern const ErtsMsegOpt_t erts_mseg_default_opt; -void *erts_mseg_alloc(ErtsAlcType_t, Uint *, Uint); -void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, Uint, const ErtsMsegOpt_t *); -void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint, Uint); -void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, Uint, const ErtsMsegOpt_t *); -void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *, Uint); -void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *, Uint, const ErtsMsegOpt_t *); +void *erts_mseg_alloc(ErtsAlcType_t, UWord *, Uint); +void *erts_mseg_alloc_opt(ErtsAlcType_t, UWord *, Uint, const ErtsMsegOpt_t *); +void erts_mseg_dealloc(ErtsAlcType_t, void *, UWord, Uint); +void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, UWord, Uint, const ErtsMsegOpt_t *); +void *erts_mseg_realloc(ErtsAlcType_t, void *, UWord, UWord *, Uint); +void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, UWord, UWord *, Uint, const ErtsMsegOpt_t *); void erts_mseg_clear_cache(void); void erts_mseg_cache_check(void); Uint erts_mseg_no( const ErtsMsegOpt_t *); @@ -109,9 +111,6 @@ Eterm erts_mseg_info(int, int *, void*, int, Uint **, Uint *); #endif /* #if HAVE_ERTS_MSEG */ -unsigned long erts_mseg_test(unsigned long, - unsigned long, - unsigned long, - unsigned long); +UWord erts_mseg_test(UWord, UWord, UWord, UWord); #endif /* #ifndef ERL_MSEG_H_ */ diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index a523d67158..aa412a20c8 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2006-2012. All Rights Reserved. + * Copyright Ericsson AB 2006-2013. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -40,6 +40,13 @@ # include "config.h" #endif +#if defined(__DARWIN__) || defined(__APPLE__) && defined(__MACH__) +/* Setting _DARWIN_UNLIMITED_SELECT before including sys/select.h enables + * the version of select() that does not place a limit on the fd_set. + */ +# define _DARWIN_UNLIMITED_SELECT +#endif + #ifndef WANT_NONBLOCKING # define WANT_NONBLOCKING #endif @@ -90,6 +97,52 @@ #define HARD_DEBUG #endif +#ifdef _DARWIN_UNLIMITED_SELECT +typedef struct { + size_t sz; + fd_set* ptr; +}ERTS_fd_set; +# define ERTS_FD_CLR(fd, fds) FD_CLR((fd), (fds)->ptr) +# define ERTS_FD_SET(fd, fds) FD_SET((fd), (fds)->ptr) +# define ERTS_FD_ISSET(fd,fds) FD_ISSET((fd), (fds)->ptr) +# define ERTS_FD_ZERO(fds) memset((fds)->ptr, 0, (fds)->sz) +# define ERTS_FD_SIZE(n) ((((n)+NFDBITS-1)/NFDBITS)*sizeof(fd_mask)) + +static void ERTS_FD_COPY(ERTS_fd_set *src, ERTS_fd_set *dst) +{ + if (dst->sz != src->sz) { + dst->ptr = dst->ptr + ? erts_realloc(ERTS_ALC_T_SELECT_FDS, dst->ptr, src->sz) + : erts_alloc(ERTS_ALC_T_SELECT_FDS, src->sz); + dst->sz = src->sz; + } + memcpy(dst->ptr, src->ptr, src->sz); +} + +static ERTS_INLINE +int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, + ERTS_fd_set *exceptfds, struct timeval *timeout) +{ + ASSERT(!readfds || readfds->sz >= ERTS_FD_SIZE(nfds)); + ASSERT(!writefds || writefds->sz >= ERTS_FD_SIZE(nfds)); + ASSERT(!exceptfds); + return select(nfds, + (readfds ? readfds->ptr : NULL ), + (writefds ? writefds->ptr : NULL), + NULL, + timeout); +} + +#else /* !_DARWIN_UNLIMITED_SELECT */ +# define ERTS_fd_set fd_set +# define ERTS_FD_CLR FD_CLR +# define ERTS_FD_ISSET FD_ISSET +# define ERTS_FD_SET FD_SET +# define ERTS_FD_ZERO FD_ZERO +# define ERTS_FD_COPY(src,dst) (*(dst) = *(src)) +# define ERTS_SELECT select +#endif + #define ERTS_POLL_USE_BATCH_UPDATE_POLLSET (ERTS_POLL_USE_DEVPOLL \ || ERTS_POLL_USE_KQUEUE) #define ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE \ @@ -244,10 +297,10 @@ struct ErtsPollSet_ { #if ERTS_POLL_USE_FALLBACK int no_select_fds; #endif - fd_set input_fds; - fd_set res_input_fds; - fd_set output_fds; - fd_set res_output_fds; + ERTS_fd_set input_fds; + ERTS_fd_set res_input_fds; + ERTS_fd_set output_fds; + ERTS_fd_set res_output_fds; #endif #if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE ErtsPollSetUpdateRequestsBlock update_requests; @@ -624,6 +677,33 @@ grow_poll_fds(ErtsPollSet ps, int min_ix) } #endif +#ifdef _DARWIN_UNLIMITED_SELECT +static void +grow_select_fds(int fd, ERTS_fd_set* fds) +{ + int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(fd + 1); + if (new_len > max_fds) + new_len = max_fds; + new_len = ERTS_FD_SIZE(new_len); + fds->ptr = fds->sz + ? erts_realloc(ERTS_ALC_T_SELECT_FDS, fds->ptr, new_len) + : erts_alloc(ERTS_ALC_T_SELECT_FDS, new_len); + memset((char*)fds->ptr + fds->sz, 0, new_len - fds->sz); + fds->sz = new_len; +} +static ERTS_INLINE void +ensure_select_fds(int fd, ERTS_fd_set* in, ERTS_fd_set* out) +{ + ASSERT(in->sz == out->sz); + if (ERTS_FD_SIZE(fd+1) > in->sz) { + grow_select_fds(fd, in); + grow_select_fds(fd, out); + } +} +#else +# define ensure_select_fds(fd, in, out) do {} while(0) +#endif /* _DARWIN_UNLIMITED_SELECT */ + static void grow_fds_status(ErtsPollSet ps, int min_fd) { @@ -1290,22 +1370,23 @@ static int update_pollset(ErtsPollSet ps, int fd) #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ { ErtsPollEvents events = ps->fds_status[fd].events; + ensure_select_fds(fd, &ps->input_fds, &ps->output_fds); if ((ERTS_POLL_EV_IN & events) != (ERTS_POLL_EV_IN & ps->fds_status[fd].used_events)) { if (ERTS_POLL_EV_IN & events) { - FD_SET(fd, &ps->input_fds); + ERTS_FD_SET(fd, &ps->input_fds); } else { - FD_CLR(fd, &ps->input_fds); + ERTS_FD_CLR(fd, &ps->input_fds); } } if ((ERTS_POLL_EV_OUT & events) != (ERTS_POLL_EV_OUT & ps->fds_status[fd].used_events)) { if (ERTS_POLL_EV_OUT & events) { - FD_SET(fd, &ps->output_fds); + ERTS_FD_SET(fd, &ps->output_fds); } else { - FD_CLR(fd, &ps->output_fds); + ERTS_FD_CLR(fd, &ps->output_fds); } } @@ -1789,7 +1870,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, while (fd < end_fd && res < max_res) { pr[res].events = (ErtsPollEvents) 0; - if (FD_ISSET(fd, &ps->res_input_fds)) { + if (ERTS_FD_ISSET(fd, &ps->res_input_fds)) { #if ERTS_POLL_USE_FALLBACK if (fd == ps->kp_fd) { res += get_kp_results(ps, &pr[res], max_res-res); @@ -1805,7 +1886,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, #endif pr[res].events |= ERTS_POLL_EV_IN; } - if (FD_ISSET(fd, &ps->res_output_fds)) + if (ERTS_FD_ISSET(fd, &ps->res_output_fds)) pr[res].events |= ERTS_POLL_EV_OUT; if (pr[res].events) { pr[res].fd = fd; @@ -1832,24 +1913,23 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, while (fd < end_fd && res < max_res) { if (ps->fds_status[fd].events) { int sres; - fd_set *iset = NULL; - fd_set *oset = NULL; + ERTS_fd_set *iset = NULL; + ERTS_fd_set *oset = NULL; if (ps->fds_status[fd].events & ERTS_POLL_EV_IN) { iset = &ps->res_input_fds; - FD_ZERO(iset); - FD_SET(fd, iset); + ERTS_FD_ZERO(iset); + ERTS_FD_SET(fd, iset); } if (ps->fds_status[fd].events & ERTS_POLL_EV_OUT) { oset = &ps->res_output_fds; - FD_ZERO(oset); - FD_SET(fd, oset); - + ERTS_FD_ZERO(oset); + ERTS_FD_SET(fd, oset); } do { /* Initiate 'tv' each time; select() may modify it */ SysTimeval tv = {0, 0}; - sres = select(ps->max_fd+1, iset, oset, NULL, &tv); + sres = ERTS_SELECT(ps->max_fd+1, iset, oset, NULL, &tv); } while (sres < 0 && errno == EINTR); if (sres < 0) { #if ERTS_POLL_USE_FALLBACK @@ -1873,7 +1953,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, } else if (sres > 0) { pr[res].fd = fd; - if (iset && FD_ISSET(fd, iset)) { + if (iset && ERTS_FD_ISSET(fd, iset)) { #if ERTS_POLL_USE_FALLBACK if (fd == ps->kp_fd) { res += get_kp_results(ps, @@ -1891,7 +1971,7 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, #endif pr[res].events |= ERTS_POLL_EV_IN; } - if (oset && FD_ISSET(fd, oset)) { + if (oset && ERTS_FD_ISSET(fd, oset)) { pr[res].events |= ERTS_POLL_EV_OUT; } ASSERT(pr[res].events); @@ -1992,14 +2072,14 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res) #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ SysTimeval to = *tv; - ps->res_input_fds = ps->input_fds; - ps->res_output_fds = ps->output_fds; - + ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); + ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); + #ifdef ERTS_SMP if (to.tv_sec || to.tv_usec) erts_thr_progress_prepare_wait(NULL); #endif - res = select(ps->max_fd + 1, + res = ERTS_SELECT(ps->max_fd + 1, &ps->res_input_fds, &ps->res_output_fds, NULL, @@ -2027,7 +2107,7 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res) ERTS_POLLSET_LOCK(ps); handle_update_requests(ps); ERTS_POLLSET_UNLOCK(ps); - res = select(ps->max_fd + 1, + res = ERTS_SELECT(ps->max_fd + 1, &ps->res_input_fds, &ps->res_output_fds, NULL, @@ -2077,7 +2157,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps, #ifdef ERTS_POLL_DEBUG_PRINT erts_printf("Entering erts_poll_wait(), timeout=%d\n", - (int) tv->tv_sec*1000 + tv->tv_usec/1000); + (int) tvp->tv_sec*1000 + tvp->tv_usec/1000); #endif if (ERTS_POLLSET_SET_POLLED_CHK(ps)) { @@ -2233,7 +2313,8 @@ ERTS_POLL_EXPORT(erts_poll_init)(void) max_fds = OPEN_MAX; #endif -#if ERTS_POLL_USE_SELECT && defined(FD_SETSIZE) +#if ERTS_POLL_USE_SELECT && defined(FD_SETSIZE) && \ + !defined(_DARWIN_UNLIMITED_SELECT) if (max_fds > FD_SETSIZE) max_fds = FD_SETSIZE; #endif @@ -2301,10 +2382,21 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void) #if ERTS_POLL_USE_FALLBACK ps->no_select_fds = 0; #endif - FD_ZERO(&ps->input_fds); - FD_ZERO(&ps->res_input_fds); - FD_ZERO(&ps->output_fds); - FD_ZERO(&ps->res_output_fds); +#ifdef _DARWIN_UNLIMITED_SELECT + ps->input_fds.sz = 0; + ps->input_fds.ptr = NULL; + ps->res_input_fds.sz = 0; + ps->res_input_fds.ptr = NULL; + ps->output_fds.sz = 0; + ps->output_fds.ptr = NULL; + ps->res_output_fds.sz = 0; + ps->res_output_fds.ptr = NULL; +#else + ERTS_FD_ZERO(&ps->input_fds); + ERTS_FD_ZERO(&ps->res_input_fds); + ERTS_FD_ZERO(&ps->output_fds); + ERTS_FD_ZERO(&ps->res_output_fds); +#endif #endif #if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE ps->update_requests.next = NULL; @@ -2382,6 +2474,16 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) if (ps->poll_fds) erts_free(ERTS_ALC_T_POLL_FDS, (void *) ps->poll_fds); #elif ERTS_POLL_USE_SELECT +#ifdef _DARWIN_UNLIMITED_SELECT + if (ps->input_fds.ptr) + erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->input_fds.ptr); + if (ps->res_input_fds.ptr) + erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_input_fds.ptr); + if (ps->output_fds.ptr) + erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->output_fds.ptr); + if (ps->res_output_fds.ptr) + erts_free(ERTS_ALC_T_SELECT_FDS, (void *) ps->res_output_fds.ptr); +#endif #endif #if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE { @@ -2408,7 +2510,8 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps) pollsets = pollsets->next; else { ErtsPollSet prev_ps; - for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next); + for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next) + ; ASSERT(ps == prev_ps->next); prev_ps->next = ps->next; } @@ -2445,6 +2548,10 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip) #if ERTS_POLL_USE_POLL size += ps->poll_fds_len*sizeof(struct pollfd); #elif ERTS_POLL_USE_SELECT +#ifdef _DARWIN_UNLIMITED_SELECT + size += ps->input_fds.sz + ps->res_input_fds.sz + + ps->output_fds.sz + ps->res_output_fds.sz; +#endif #endif #if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index 09ed9f41af..2f1c05f401 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -90,7 +90,7 @@ # if defined(ERTS_USE_POLL) # undef ERTS_POLL_USE_POLL # define ERTS_POLL_USE_POLL 1 -# elif !defined(__WIN32__) +# elif !defined(__WIN32__) && !defined(__OSE__) # undef ERTS_POLL_USE_SELECT # define ERTS_POLL_USE_SELECT 1 # endif @@ -99,13 +99,31 @@ typedef Uint32 ErtsPollEvents; #undef ERTS_POLL_EV_E2N -#if defined(__WIN32__) /* --- win32 ------------------------------- */ +#if defined(__WIN32__) || defined(__OSE__) /* --- win32 or ose -------- */ #define ERTS_POLL_EV_IN 1 #define ERTS_POLL_EV_OUT 2 #define ERTS_POLL_EV_ERR 4 #define ERTS_POLL_EV_NVAL 8 +#ifdef __OSE__ + +typedef struct ErtsPollOseMsgList_ { + struct ErtsPollOseMsgList_ *next; + union SIGNAL *data; +} ErtsPollOseMsgList; + +struct erts_sys_fd_type { + SIGSELECT signo; + ErlDrvOseEventId id; + ErtsPollOseMsgList *msgs; + ErlDrvOseEventId (*resolve_signal)(union SIGNAL *sig); + ethr_mutex mtx; + void *extra; +}; + +#endif + #elif ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ #include <sys/epoll.h> @@ -228,7 +246,8 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet, ErtsSysFdType, ErtsPollEvents, int on, - int* wake_poller); + int* wake_poller + ); void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet, ErtsPollControlEntry [], int on); diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c index 31ad3b82d5..e63f0bda54 100644 --- a/erts/emulator/sys/common/erl_sys_common_misc.c +++ b/erts/emulator/sys/common/erl_sys_common_misc.c @@ -44,6 +44,14 @@ #endif #endif +/* + * erts_check_io_time is used by the erl_check_io implementation. The + * global erts_check_io_time variable is declared here since there + * (often) exist two versions of erl_check_io (kernel-poll and + * non-kernel-poll), and we dont want two versions of this variable. + */ +erts_smp_atomic_t erts_check_io_time; + /* Written once and only once */ static int filename_encoding = ERL_FILENAME_UNKNOWN; @@ -52,7 +60,7 @@ static int filename_warning = ERL_FILENAME_WARNING_WARNING; /* Default unicode on windows and MacOS X */ static int user_filename_encoding = ERL_FILENAME_UTF8; #else -static int user_filename_encoding = ERL_FILENAME_LATIN1; +static int user_filename_encoding = ERL_FILENAME_UNKNOWN; #endif /* This controls the heuristic in printing characters in shell and w/ io:format("~tp", ...) etc. */ diff --git a/erts/emulator/sys/ose/beam.lmconf b/erts/emulator/sys/ose/beam.lmconf new file mode 100644 index 0000000000..4ad46b01d9 --- /dev/null +++ b/erts/emulator/sys/ose/beam.lmconf @@ -0,0 +1,26 @@ +OSE_LM_STACK_SIZES=256,512,1024,2048,4096,8192,16384,65536 +OSE_LM_SIGNAL_SIZES=31,63,127,255,1023,4095,16383,65535 +OSE_LM_POOL_SIZE=0x200000 +OSE_LM_MAIN_NAME=main +OSE_LM_MAIN_STACK_SIZE=0xF000 +OSE_LM_MAIN_PRIORITY=20 +## Has to be of a type that allows MAM +OSE_LM_PROGRAM_TYPE=APP_RAM +OSE_LM_DATA_INIT=YES +OSE_LM_BSS_INIT=YES +OSE_LM_EXEC_MODEL=SHARED +HEAP_MAX_SIZE=1000000000 +HEAP_SMALL_BUF_INIT_SIZE=20971520 +HEAP_LARGE_BUF_THRESHOLD=16000000 +HEAP_LOCK_TYPE=2 + +ERTS_DEFAULT_PRIO=24 +ERTS_SCHEDULER_PRIO=24 +ERTS_ASYNC_PRIO=22 +ERTS_AUX_PRIO=24 +ERTS_SYS_MSG_DISPATCHER_PRIO=21 + +# Setting the environment variable EFS_RESOLVE_TMO on the block to 0. +# This will eliminiate delays when trying to open files on not mounted +# volumes. +EFS_RESOLVE_TMO=0 diff --git a/erts/emulator/sys/ose/driver_int.h b/erts/emulator/sys/ose/driver_int.h new file mode 100644 index 0000000000..2c9ac955d8 --- /dev/null +++ b/erts/emulator/sys/ose/driver_int.h @@ -0,0 +1,41 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1997-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +/* + * System dependant driver declarations + */ + +#ifndef __DRIVER_INT_H__ +#define __DRIVER_INT_H__ + +#ifdef HAVE_SYS_UIO_H +#include <sys/types.h> +#include <sys/uio.h> + +typedef struct iovec SysIOVec; + +#else + +typedef struct { + char* iov_base; + int iov_len; +} SysIOVec; + +#endif + +#endif diff --git a/erts/emulator/sys/ose/erl_main.c b/erts/emulator/sys/ose/erl_main.c new file mode 100644 index 0000000000..23a9bc93a4 --- /dev/null +++ b/erts/emulator/sys/ose/erl_main.c @@ -0,0 +1,53 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2000-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include <stdlib.h> + +#include "sys.h" +#include "erl_vm.h" +#include "global.h" +#include "ose.h" + +int +main(int argc, char **argv) { + + (void)stdin;(void)stdout;(void)stderr; + + /* When starting using pm_create -c ARGV="-- -root ..", argv[0] is the first + part of ARGV and not the name of the executable. So we shuffle some + pointers here to make erl_start happy. */ + if (argv[0][0] == '-') { + int i; + char **tmp_argv = malloc(sizeof(char*)*(argc+1)); + for (i = 0; i < argc; i++) + tmp_argv[i+1] = argv[i]; + tmp_argv[0] = "beam"; + erl_start(argc+1,tmp_argv); + free(tmp_argv); + } else { + erl_start(argc,argv); + } + + stop(current_process()); + + return 0; +} diff --git a/erts/emulator/sys/ose/erl_ose_sys.h b/erts/emulator/sys/ose/erl_ose_sys.h new file mode 100644 index 0000000000..cd66d95c26 --- /dev/null +++ b/erts/emulator/sys/ose/erl_ose_sys.h @@ -0,0 +1,353 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1997-2011. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + * + * This file handles differences between different Unix systems. + * This should be the only place with conditional compilation + * depending on the type of OS. + */ + +#ifndef _ERL_OSE_SYS_H +#define _ERL_OSE_SYS_H + +#include "ose.h" +#undef NIL +#include "ramlog.h" +#include "erts.sig" + +#include "fcntl.h" +#include "math.h" +#include "stdio.h" +#include "stdlib.h" +#include "string.h" +#include "sys/param.h" +#include "sys/time.h" +#include "time.h" +#include "dirent.h" +#include "ethread.h" + +/* FIXME: configuration options */ +#define ERTS_SCHED_MIN_SPIN 1 +#define ERTS_SCHED_ONLY_POLL_SCHED_1 1 +#define ERTS_SCHED_FAIR 1 +#define NO_SYSCONF 1 +#define OPEN_MAX FOPEN_MAX + +#define MAP_ANON MAP_ANONYMOUS + +#ifndef HAVE_MMAP +# define HAVE_MMAP 0 +#endif + +#if HAVE_MMAP +# include "sys/mman.h" +#endif + +/* + * Min number of async threads + */ +#define ERTS_MIN_NO_OF_ASYNC_THREADS 1 + +/* + * Our own type of "FD's" + */ +#define ERTS_SYS_FD_TYPE struct erts_sys_fd_type* +#define NO_FSTAT_ON_SYS_FD_TYPE 1 /* They are signals, not files */ + +#include "sys/stat.h" + +/* FIXME mremap is not defined in OSE - POSIX issue */ +extern void *mremap (void *__addr, size_t __old_len, size_t __new_len, + int __flags, ...); + +/* FIXME: mremap constants */ +#define MREMAP_MAYMOVE 1 +#define MREMAP_FIXED 2 + +typedef void *GETENV_STATE; + +/* +** For the erl_timer_sup module. +*/ +#define HAVE_GETHRTIME + +typedef long long SysHrTime; +extern SysHrTime sys_gethrtime(void); + +void sys_init_hrtime(void); + +typedef time_t erts_time_t; + +typedef struct timeval SysTimeval; + +#define sys_gettimeofday(Arg) ((void) gettimeofday((Arg), NULL)) + +typedef struct { + clock_t tms_utime; + clock_t tms_stime; + clock_t tms_cutime; + clock_t tms_cstime; +} SysTimes; + +extern int erts_ticks_per_sec; + +#define SYS_CLK_TCK (erts_ticks_per_sec) + +extern clock_t sys_times(SysTimes *buffer); + +/* No use in having other resolutions than 1 Ms. */ +#define SYS_CLOCK_RESOLUTION 1 + +#ifdef NO_FPE_SIGNALS + +#define erts_get_current_fp_exception() NULL +#ifdef ERTS_SMP +#define erts_thread_init_fp_exception() do{}while(0) +#endif +# define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0) +# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!finite(f)) { Action; } else {} +# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action) +# define __ERTS_SAVE_FP_EXCEPTION(fpexnp) +# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp) + +#define erts_sys_block_fpe() 0 +#define erts_sys_unblock_fpe(x) do{}while(0) + +#else /* !NO_FPE_SIGNALS */ + +extern volatile unsigned long *erts_get_current_fp_exception(void); +#ifdef ERTS_SMP +extern void erts_thread_init_fp_exception(void); +#endif +# if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) +# define erts_fwait(fpexnp,f) \ + __asm__ __volatile__("fwait" : "=m"(*(fpexnp)) : "m"(f)) +# elif (defined(__powerpc__) || defined(__ppc__)) && defined(__GNUC__) +# define erts_fwait(fpexnp,f) \ + __asm__ __volatile__("" : "=m"(*(fpexnp)) : "fm"(f)) +# elif defined(__sparc__) && defined(__linux__) && defined(__GNUC__) +# define erts_fwait(fpexnp,f) \ + __asm__ __volatile__("" : "=m"(*(fpexnp)) : "em"(f)) +# else +# define erts_fwait(fpexnp,f) \ + __asm__ __volatile__("" : "=m"(*(fpexnp)) : "g"(f)) +# endif +# if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) + extern void erts_restore_fpu(void); +# else +# define erts_restore_fpu() /*empty*/ +# endif +# if (!defined(__GNUC__) || \ + (__GNUC__ < 2) || \ + (__GNUC__ == 2 && __GNUC_MINOR < 96)) && \ + !defined(__builtin_expect) +# define __builtin_expect(x, expected_value) (x) +# endif +static __inline__ int erts_check_fpe(volatile unsigned long *fp_exception, double f) +{ + erts_fwait(fp_exception, f); + if (__builtin_expect(*fp_exception == 0, 1)) + return 0; + *fp_exception = 0; + erts_restore_fpu(); + return 1; +} +# undef erts_fwait +# undef erts_restore_fpu +extern void erts_fp_check_init_error(volatile unsigned long *fp_exception); +static __inline__ void __ERTS_FP_CHECK_INIT(volatile unsigned long *fp_exception) +{ + if (__builtin_expect(*fp_exception == 0, 1)) + return; + erts_fp_check_init_error(fp_exception); +} +# define __ERTS_FP_ERROR(fpexnp, f, Action) do { if (erts_check_fpe((fpexnp),(f))) { Action; } } while (0) +# define __ERTS_SAVE_FP_EXCEPTION(fpexnp) unsigned long old_erl_fp_exception = *(fpexnp) +# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp) \ + do { *(fpexnp) = old_erl_fp_exception; } while (0) + /* This is for library calls where we don't trust the external + code to always throw floating-point exceptions on errors. */ +static __inline__ int erts_check_fpe_thorough(volatile unsigned long *fp_exception, double f) +{ + return erts_check_fpe(fp_exception, f) || !finite(f); +} +# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) \ + do { if (erts_check_fpe_thorough((fpexnp),(f))) { Action; } } while (0) + +int erts_sys_block_fpe(void); +void erts_sys_unblock_fpe(int); + +#endif /* !NO_FPE_SIGNALS */ + +#define ERTS_FP_CHECK_INIT(p) __ERTS_FP_CHECK_INIT(&(p)->fp_exception) +#define ERTS_FP_ERROR(p, f, A) __ERTS_FP_ERROR(&(p)->fp_exception, f, A) +#define ERTS_FP_ERROR_THOROUGH(p, f, A) __ERTS_FP_ERROR_THOROUGH(&(p)->fp_exception, f, A) + +/* FIXME: force HAVE_GETPAGESIZE and stub getpagesize */ +#ifndef HAVE_GETPAGESIZE +#define HAVE_GETPAGESIZE 1 +#endif + +extern int getpagesize(void); + +#ifndef HZ +#define HZ 60 +#endif + +/* OSE5 doesn't provide limits.h so a number of macros should be + * added manually */ + +#ifndef CHAR_BIT +#define CHAR_BIT 8 +#endif + +/* Minimum and maximum values a `signed int' can hold. */ +#ifndef INT_MAX +#define INT_MAX 2147483647 +#endif + +#ifndef INT_MIN +#define INT_MIN (-INT_MAX - 1) +#endif + +#ifndef UINT_MAX +# define UINT_MAX 4294967295U +#endif + +/* +static void erts_ose_sys_send(union SIGNAL **signal,PROCESS dst, + char* file,int line) { + SIGSELECT **ziggy = (SIGSELECT**)signal; + printf("%s:%d 0x%x Send signal 0x%x(0x%x) to 0x%x\r\n", + file,line,current_process(),ziggy[0][0],*ziggy,dst); + send(signal,dst); +} +#define send(signal,dst) erts_ose_sys_send(signal,dst,__FILE__,__LINE__) + +static void erts_ose_sys_send_w_sender(union SIGNAL **signal, + PROCESS sender,PROCESS dst, + char* file,int line) { + SIGSELECT **ziggy = (SIGSELECT**)signal; + printf("%s:%d 0x%x Send signal 0x%x(0x%x) to 0x%x as 0x%x\r\n", + file,line,current_process(),ziggy[0][0],*ziggy,dst,sender); + send_w_sender(signal,sender,dst); +} +#define send_w_sender(signal,sender,dst) \ + erts_ose_sys_send_w_sender(signal,sender,dst,__FILE__,__LINE__) + + +static union SIGNAL *erts_ose_sys_receive(SIGSELECT *sigsel, + char *file, + int line) { + SIGSELECT *sig; + int i; + + printf("%s:%d 0x%x receive({%d,",file,line,current_process(),sigsel[0]); + for (i = 1; i < sigsel[0]; i++) + printf("0x%x, ",sigsel[i]); + if (sigsel[0] != 0) + printf("0x%x",sigsel[i]); + printf("})\n"); + sig = (SIGSELECT*)receive(sigsel); + printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), + *sig,sender((union SIGNAL**)(&sig))); + return (union SIGNAL*)sig; +} +#define receive(SIGSEL) erts_ose_sys_receive(SIGSEL,__FILE__,__LINE__) + +static union SIGNAL *erts_ose_sys_receive_w_tmo(OSTIME tmo,SIGSELECT *sigsel, + char *file,int line) { + SIGSELECT *sig; + int i; + if (tmo == 0) { + sig = (SIGSELECT*)receive_w_tmo(tmo,sigsel); + if (sig != NULL) { + printf("%s:%d 0x%x receive_w_tmo(0,{%d,",file,line,current_process(), + sigsel[0]); + for (i = 1; i < sigsel[0]; i++) + printf("0x%x, ",sigsel[i]); + if (sigsel[0] != 0) + printf("0x%x",sigsel[i]); + printf("})\n"); + printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), + *sig,sender((union SIGNAL**)(&sig))); + } + } else { + printf("%s:%d 0x%x receive_w_tmo(%u,{%d,",file,line,current_process(),tmo, + sigsel[0]); + for (i = 1; i < sigsel[0]; i++) + printf("0x%x, ",sigsel[i]); + if (sigsel[0] != 0) + printf("0x%x",sigsel[i]); + printf("})\n"); + sig = (SIGSELECT*)receive_w_tmo(tmo,sigsel); + printf("%s:%d 0x%x got ",file,line,current_process()); + if (sig == NULL) + printf("TIMEOUT\n"); + else + printf("0x%x from 0x%x\n",*sig,sender((union SIGNAL**)(&sig))); + } + + return (union SIGNAL*)sig; +} + +#define receive_w_tmo(tmo,sigsel) erts_ose_sys_receive_w_tmo(tmo,sigsel, \ + __FILE__,__LINE__) + +static union SIGNAL *erts_ose_sys_receive_fsem(OSTIME tmo,SIGSELECT *sigsel, + OSFSEMVAL fsem, + char *file,int line) { + SIGSELECT *sig; + int i; + if (tmo == 0) { + sig = (SIGSELECT*)receive_fsem(tmo,sigsel,fsem); + if (sig != NULL && sig != OS_RCV_FSEM) { + printf("%s:%d 0x%x receive_fsem(0,{%d,",file,line,current_process(), + sigsel[0]); + for (i = 1; i < sigsel[0]; i++) + printf("0x%x, ",sigsel[i]); + if (sigsel[0] != 0) + printf("0x%x",sigsel[i]); + printf("},%d)\n",fsem); + printf("%s:%d 0x%x got 0x%x from 0x%x\n",file,line,current_process(), + *sig,sender((union SIGNAL**)(&sig))); + } + } else { + printf("%s:%d 0x%x receive_fsem(%u,{%d,",file,line,current_process(),tmo, + sigsel[0]); + for (i = 1; i < sigsel[0]; i++) + printf("0x%x, ",sigsel[i]); + if (sigsel[0] != 0) + printf("0x%x",sigsel[i]); + printf("},%d)\n",fsem); + sig = (SIGSELECT*)receive_fsem(tmo,sigsel,fsem); + printf("%s:%d 0x%x got ",file,line,current_process()); + if (sig == NULL) + printf("TIMEOUT\n"); + else if (sig == OS_RCV_FSEM) + printf("FSEM\n"); + else + printf("0x%x from 0x%x\n",*sig,sender((union SIGNAL**)(&sig))); + } + + return (union SIGNAL*)sig; +} + +#define receive_fsem(tmo,sigsel,fsem) \ + erts_ose_sys_receive_fsem(tmo,sigsel,fsem,__FILE__,__LINE__) +*/ +#endif /* _ERL_OSE_SYS_H */ diff --git a/erts/emulator/sys/ose/erl_ose_sys_ddll.c b/erts/emulator/sys/ose/erl_ose_sys_ddll.c new file mode 100644 index 0000000000..ebd80deeaf --- /dev/null +++ b/erts/emulator/sys/ose/erl_ose_sys_ddll.c @@ -0,0 +1,126 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Interface functions to the dynamic linker using dl* functions. + * (No support in OSE, we use static linkage instead) + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_vm.h" +#include "global.h" + + +void erl_sys_ddll_init(void) { +} + +/* + * Open a shared object + */ +int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err) +{ + return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; +} + +int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err) +{ + return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; +} + +/* + * Find a symbol in the shared object + */ +int erts_sys_ddll_sym2(void *handle, const char *func_name, void **function, + ErtsSysDdllError* err) +{ + return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; +} + +/* XXX:PaN These two will be changed with new driver interface! */ + +/* + * Load the driver init function, might appear under different names depending on object arch... + */ + +int erts_sys_ddll_load_driver_init(void *handle, void **function) +{ + void *fn; + int res; + if ((res = erts_sys_ddll_sym2(handle, "driver_init", &fn, NULL)) != ERL_DE_NO_ERROR) { + res = erts_sys_ddll_sym2(handle, "_driver_init", &fn, NULL); + } + if (res == ERL_DE_NO_ERROR) { + *function = fn; + } + return res; +} + +int erts_sys_ddll_load_nif_init(void *handle, void **function, ErtsSysDdllError* err) +{ + void *fn; + int res; + if ((res = erts_sys_ddll_sym2(handle, "nif_init", &fn, err)) != ERL_DE_NO_ERROR) { + res = erts_sys_ddll_sym2(handle, "_nif_init", &fn, err); + } + if (res == ERL_DE_NO_ERROR) { + *function = fn; + } + return res; +} + +/* + * Call the driver_init function, whatever it's really called, simple on unix... +*/ +void *erts_sys_ddll_call_init(void *function) { + void *(*initfn)(void) = function; + return (*initfn)(); +} +void *erts_sys_ddll_call_nif_init(void *function) { + return erts_sys_ddll_call_init(function); +} + + + +/* + * Close a chared object + */ +int erts_sys_ddll_close2(void *handle, ErtsSysDdllError* err) +{ + return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; +} + + +/* + * Return string that describes the (current) error + */ +char *erts_sys_ddll_error(int code) +{ + return "Unspecified error"; +} + +void erts_sys_ddll_free_error(ErtsSysDdllError* err) +{ + if (err->str != NULL) { + erts_free(ERTS_ALC_T_DDLL_TMP_BUF, err->str); + } +} diff --git a/erts/emulator/sys/ose/erl_poll.c b/erts/emulator/sys/ose/erl_poll.c new file mode 100644 index 0000000000..7d2a3d1e0b --- /dev/null +++ b/erts/emulator/sys/ose/erl_poll.c @@ -0,0 +1,780 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2006-2012. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +/* + * Description: Poll interface suitable for ERTS on OSE with or without + * SMP support. + * + * The interface is currently implemented using: + * - receive + receive_fsem + * + * Author: Lukas Larsson + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "erl_thr_progress.h" +#include "erl_driver.h" +#include "erl_alloc.h" +#include "erl_poll.h" + +#define NOFILE 4096 + +/* + * Some debug macros + */ + +/* #define HARDDEBUG +#define HARDTRACE*/ +#ifdef HARDDEBUG +#ifdef HARDTRACE +#define HARDTRACEF(X, ...) { fprintf(stderr, X, __VA_ARGS__); fprintf(stderr,"\r\n"); } +#else +#define HARDTRACEF(...) +#endif + +#else +#define HARDTRACEF(X,...) +#define HARDDEBUGF(...) +#endif + +#if 0 +#define ERTS_POLL_DEBUG_PRINT +#endif + +#if defined(DEBUG) && 0 +#define HARD_DEBUG +#endif + +# define SEL_ALLOC erts_alloc +# define SEL_REALLOC realloc_wrap +# define SEL_FREE erts_free + +#ifdef ERTS_SMP + +#define ERTS_POLLSET_LOCK(PS) \ + erts_smp_mtx_lock(&(PS)->mtx) +#define ERTS_POLLSET_UNLOCK(PS) \ + erts_smp_mtx_unlock(&(PS)->mtx) + +#else + +#define ERTS_POLLSET_LOCK(PS) +#define ERTS_POLLSET_UNLOCK(PS) + +#endif + +/* + * --- Data types ------------------------------------------------------------ + */ + +union SIGNAL { + SIGSELECT sig_no; +}; + +typedef struct erts_sigsel_item_ ErtsSigSelItem; + +struct erts_sigsel_item_ { + ErtsSigSelItem *next; + ErtsSysFdType fd; + ErtsPollEvents events; +}; + +typedef struct erts_sigsel_info_ ErtsSigSelInfo; + +struct erts_sigsel_info_ { + ErtsSigSelInfo *next; + SIGSELECT signo; + ErlDrvOseEventId (*decode)(union SIGNAL* sig); + ErtsSigSelItem *fds; +}; + +struct ErtsPollSet_ { + SIGSELECT *sigs; + ErtsSigSelInfo *info; + Uint sig_count; + Uint item_count; + PROCESS interrupt; + erts_atomic32_t wakeup_state; + erts_smp_atomic32_t timeout; +#ifdef ERTS_SMP + erts_smp_mtx_t mtx; +#endif +}; + +static int max_fds = -1; + +#define ERTS_POLL_NOT_WOKEN ((erts_aint32_t) (1 << 0)) +#define ERTS_POLL_WOKEN_INTR ((erts_aint32_t) (1 << 1)) +#define ERTS_POLL_WOKEN_TIMEDOUT ((erts_aint32_t) (1 << 2)) +#define ERTS_POLL_WOKEN_IO_READY ((erts_aint32_t) (1 << 3)) +#define ERTS_POLL_SLEEPING ((erts_aint32_t) (1 << 4)) + +/* signal list prototypes */ +static ErtsSigSelInfo *get_sigsel_info(ErtsPollSet ps, SIGSELECT signo); +static ErtsSigSelItem *get_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd); +static ErtsSigSelInfo *add_sigsel_info(ErtsPollSet ps, ErtsSysFdType fd, + ErlDrvOseEventId (*decode)(union SIGNAL* sig)); +static ErtsSigSelItem *add_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd, + ErlDrvOseEventId (*decode)(union SIGNAL* sig)); +static int del_sigsel_info(ErtsPollSet ps, ErtsSigSelInfo *info); +static int del_sigsel_item(ErtsPollSet ps, ErtsSigSelItem *item); +static int update_sigsel(ErtsPollSet ps); + +static ErtsSigSelInfo * +get_sigsel_info(ErtsPollSet ps, SIGSELECT signo) { + ErtsSigSelInfo *curr = ps->info; + while (curr != NULL) { + if (curr->signo == signo) + return curr; + curr = curr->next; + } + return NULL; +} + +static ErtsSigSelItem * +get_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd) { + ErtsSigSelInfo *info = get_sigsel_info(ps,fd->signo); + ErtsSigSelItem *curr; + + if (info == NULL) + return NULL; + + curr = info->fds; + + while (curr != NULL) { + if (curr->fd->id == fd->id) { + ASSERT(curr->fd->signo == fd->signo); + return curr; + } + curr = curr->next; + } + return NULL; +} + +static ErtsSigSelInfo * +add_sigsel_info(ErtsPollSet ps, ErtsSysFdType fd, + ErlDrvOseEventId (*decode)(union SIGNAL* sig)) { + ErtsSigSelInfo *info = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(ErtsSigSelInfo)); + info->next = ps->info; + info->fds = NULL; + info->signo = fd->signo; + info->decode = decode; + ps->info = info; + ps->sig_count++; + return info; +} + +static ErtsSigSelItem * +add_sigsel_item(ErtsPollSet ps, ErtsSysFdType fd, + ErlDrvOseEventId (*decode)(union SIGNAL* sig)) { + ErtsSigSelInfo *info = get_sigsel_info(ps,fd->signo); + ErtsSigSelItem *item = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(ErtsSigSelItem)); + if (info == NULL) + info = add_sigsel_info(ps, fd, decode); + if (info->decode != decode) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "erts_poll_control() inconsistency: multiple resolve_signal functions for same signal (%d)\n", + fd->signo); + erts_send_error_to_logger_nogl(dsbufp); + } + ASSERT(info->decode == decode); + item->next = info->fds; + item->fd = fd; + item->events = 0; + info->fds = item; + ps->item_count++; + return item; +} + +static int del_sigsel_info(ErtsPollSet ps, ErtsSigSelInfo *info) { + ErtsSigSelInfo *curr, *prev; + + if (ps->info == info) { + ps->info = ps->info->next; + } else { + curr = ps->info->next; + prev = ps->info; + + while (curr != info) { + if (curr == NULL) + return 1; + prev = curr; + curr = curr->next; + } + prev->next = curr->next; + } + + ps->sig_count--; + SEL_FREE(ERTS_ALC_T_POLLSET, info); + return 0; +} + +static int del_sigsel_item(ErtsPollSet ps, ErtsSigSelItem *item) { + ErtsSigSelInfo *info = get_sigsel_info(ps,item->fd->signo); + ErtsSigSelItem *curr, *prev; + + ps->item_count--; + ASSERT(ps->item_count >= 0); + + if (info->fds == item) { + info->fds = info->fds->next; + SEL_FREE(ERTS_ALC_T_POLLSET,item); + if (info->fds == NULL) + return del_sigsel_info(ps,info); + return 0; + } + + curr = info->fds->next; + prev = info->fds; + + while (curr != item) { + if (curr == NULL) { + /* We did not find an item to delete so we have to + * increment item count again. + */ + ps->item_count++; + return 1; + } + prev = curr; + curr = curr->next; + } + prev->next = curr->next; + SEL_FREE(ERTS_ALC_T_POLLSET,item); + return 0; +} + +#ifdef ERTS_SMP + +static void update_redir_tables(ErtsPollSet ps) { + struct OS_redir_entry *redir_table; + PROCESS sched_1 = ERTS_SCHEDULER_IX(0)->tid.id; + int i; + redir_table = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(struct OS_redir_entry)*(ps->sig_count+1)); + + redir_table[0].sig = ps->sig_count+1; + redir_table[0].pid = 0; + + for (i = 1; i < ps->sig_count+1; i++) { + redir_table[i].sig = ps->sigs[i]; + redir_table[i].pid = sched_1; + } + + for (i = 1; i < erts_no_schedulers; i++) { + ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(i); + set_redirection(esdp->tid.id,redir_table); + } + + SEL_FREE(ERTS_ALC_T_POLLSET,redir_table); +} + +#endif + +static int update_sigsel(ErtsPollSet ps) { + ErtsSigSelInfo *info = ps->info; + + int i; + + if (ps->sigs != NULL) + SEL_FREE(ERTS_ALC_T_POLLSET,ps->sigs); + + if (ps->sig_count == 0) { + /* If there are no signals we place a non-valid signal to make sure that + * we do not trigger on a any unrelated signals which are sent to the + * process. + */ + ps->sigs = SEL_ALLOC(ERTS_ALC_T_POLLSET,sizeof(SIGSELECT)*(2)); + ps->sigs[0] = 1; + ps->sigs[1] = ERTS_SIGNAL_INVALID; + return 0; + } + + ps->sigs = SEL_ALLOC(ERTS_ALC_T_POLLSET,sizeof(SIGSELECT)*(ps->sig_count+1)); + ps->sigs[0] = ps->sig_count; + + for (i = 1; info != NULL; i++, info = info->next) + ps->sigs[i] = info->signo; + +#ifdef ERTS_SMP + update_redir_tables(ps); +#endif + + return 0; +} + +static ERTS_INLINE void +wake_poller(ErtsPollSet ps) +{ + erts_aint32_t wakeup_state; + + ERTS_THR_MEMORY_BARRIER; + wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); + while (wakeup_state != ERTS_POLL_WOKEN_IO_READY + && wakeup_state != ERTS_POLL_WOKEN_INTR) { + erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_INTR, + wakeup_state); + if (act == wakeup_state) { + wakeup_state = act; + break; + } + wakeup_state = act; + } + if (wakeup_state == ERTS_POLL_SLEEPING) { + /* + * Since we don't know the internals of signal_fsem() we issue + * a memory barrier as a safety precaution ensuring that + * the store we just made to wakeup_state wont be reordered + * with loads in signal_fsem(). + */ + ERTS_THR_MEMORY_BARRIER; + signal_fsem(ps->interrupt); + } +} + +static ERTS_INLINE void +reset_interrupt(ErtsPollSet ps) +{ + /* We need to keep io-ready if set */ + erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); + while (wakeup_state != ERTS_POLL_NOT_WOKEN && + wakeup_state != ERTS_POLL_SLEEPING) { + erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_NOT_WOKEN, + wakeup_state); + if (wakeup_state == act) + break; + wakeup_state = act; + } + ERTS_THR_MEMORY_BARRIER; +} + +static ERTS_INLINE void +set_interrupt(ErtsPollSet ps) +{ + wake_poller(ps); +} + +void erts_poll_interrupt(ErtsPollSet ps,int set) { + HARDTRACEF("erts_poll_interrupt called!\n"); + + if (!set) + reset_interrupt(ps); + else + set_interrupt(ps); + +} + +void erts_poll_interrupt_timed(ErtsPollSet ps,int set,erts_short_time_t msec) { + HARDTRACEF("erts_poll_interrupt_timed called!\n"); + + if (!set) + reset_interrupt(ps); + else if (erts_smp_atomic32_read_acqb(&ps->timeout) > (erts_aint32_t) msec) + set_interrupt(ps); +} + +ErtsPollEvents erts_poll_control(ErtsPollSet ps, ErtsSysFdType fd, + ErtsPollEvents pe, int on, int* do_wake) { + ErtsSigSelItem *curr; + ErtsPollEvents new_events; + int old_sig_count; + + HARDTRACEF( + "%ux: In erts_poll_control, fd = %d, pe = %d, on = %d, *do_wake = %d, curr = 0x%xu", + ps, fd, pe, on, do_wake, curr); + + ERTS_POLLSET_LOCK(ps); + + if (on && (pe & ERTS_POLL_EV_IN) && (pe & ERTS_POLL_EV_OUT)) { + /* Check to make sure both in and out are not used at the same time */ + new_events = ERTS_POLL_EV_NVAL; + goto done; + } + + curr = get_sigsel_item(ps, fd); + old_sig_count = ps->sig_count; + + if (curr == NULL && on) { + curr = add_sigsel_item(ps, fd, fd->resolve_signal); + } else if (curr == NULL && !on) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } + + new_events = curr->events; + + if (pe == 0) { + *do_wake = 0; + goto done; + } + + if (on) { + new_events |= pe; + curr->events = new_events; + } else { + new_events &= ~pe; + curr->events = new_events; + if (new_events == 0 && del_sigsel_item(ps, curr)) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } + } + + if (ps->sig_count != old_sig_count) { + if (update_sigsel(ps)) + new_events = ERTS_POLL_EV_NVAL; + } +done: + ERTS_POLLSET_UNLOCK(ps); + HARDTRACEF("%ux: Out erts_poll_control", ps); + return new_events; +} + +int erts_poll_wait(ErtsPollSet ps, + ErtsPollResFd pr[], + int *len, + SysTimeval *utvp) { + int res = ETIMEDOUT, no_fds, currid = 0; + OSTIME timeout; + union SIGNAL *sig; + // HARDTRACEF("%ux: In erts_poll_wait",ps); + if (ps->interrupt == (PROCESS)0) + ps->interrupt = current_process(); + + ASSERT(current_process() == ps->interrupt); + ASSERT(get_fsem(current_process()) == 0); + ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) & + (ERTS_POLL_NOT_WOKEN | ERTS_POLL_WOKEN_INTR)); + /* Max no of spots avable in pr */ + no_fds = *len; + + *len = 0; + + ASSERT(utvp); + + /* erts_printf("Entering erts_poll_wait(), timeout=%d\n", + (int) utvp->tv_sec*1000 + utvp->tv_usec/1000); */ + + timeout = utvp->tv_sec*1000 + utvp->tv_usec/1000; + + if (timeout > ((time_t) ERTS_AINT32_T_MAX)) + timeout = ERTS_AINT32_T_MAX; + erts_smp_atomic32_set_relb(&ps->timeout, (erts_aint32_t) timeout); + + while (currid < no_fds) { + if (timeout > 0) { + erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_SLEEPING, + ERTS_POLL_NOT_WOKEN); + if (act == ERTS_POLL_NOT_WOKEN) { +#ifdef ERTS_SMP + erts_thr_progress_prepare_wait(NULL); +#endif + sig = receive_fsem(timeout, ps->sigs, 1); +#ifdef ERTS_SMP + erts_thr_progress_finalize_wait(NULL); +#endif + } else { + ASSERT(act == ERTS_POLL_WOKEN_INTR); + sig = OS_RCV_FSEM; + } + } else + sig = receive_w_tmo(0, ps->sigs); + + if (sig == NULL) { + if (timeout > 0) { + erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_TIMEDOUT, + ERTS_POLL_SLEEPING); + if (act == ERTS_POLL_WOKEN_INTR) + /* Restore fsem as it was signaled but we got a timeout */ + wait_fsem(1); + } else + erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_TIMEDOUT, + ERTS_POLL_NOT_WOKEN); + break; + } else if (sig == OS_RCV_FSEM) { + ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) == ERTS_POLL_WOKEN_INTR); + break; + } + { + ErtsSigSelInfo *info = get_sigsel_info(ps, sig->sig_no); + struct erts_sys_fd_type fd = { sig->sig_no, info->decode(sig) }; + ErtsSigSelItem *item = get_sigsel_item(ps, &fd); + + ASSERT(sig); + if (currid == 0 && timeout > 0) { + erts_aint32_t act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_IO_READY, + ERTS_POLL_SLEEPING); + if (act == ERTS_POLL_WOKEN_INTR) { + /* Restore fsem as it was signaled but we got a msg */ + wait_fsem(1); + act = erts_atomic32_cmpxchg_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_IO_READY, + ERTS_POLL_WOKEN_INTR); + } + } else if (currid == 0) { + erts_atomic32_set_nob(&ps->wakeup_state, + ERTS_POLL_WOKEN_IO_READY); + } + + if (item == NULL) { + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf( + dsbufp, + "erts_poll_wait() failed: found unkown signal id %d (signo %u) " + "(curr_proc 0x%x)\n", + fd.id, fd.signo, current_process()); + erts_send_error_to_logger_nogl(dsbufp); + timeout = 0; + /* Under normal circumstances the signal is deallocated by the + * driver that issued the select operation. But in this case + * there's no driver waiting for such signal so we have to + * deallocate it here */ + if (sig) + free_buf(&sig); + } else { + int i; + struct erts_sys_fd_type *fd = NULL; + ErtsPollOseMsgList *tl,*new; + + /* Check if this fd has already been triggered by a previous signal */ + for (i = 0; i < currid;i++) { + if (pr[i].fd == item->fd) { + fd = pr[i].fd; + pr[i].events |= item->events; + break; + } + } + + /* First time this fd is triggered */ + if (fd == NULL) { + pr[currid].fd = item->fd; + pr[currid].events = item->events; + fd = item->fd; + timeout = 0; + currid++; + } + + /* Insert new signal in approriate list */ + new = erts_alloc(ERTS_ALC_T_FD_SIG_LIST,sizeof(ErtsPollOseMsgList)); + new->next = NULL; + new->data = sig; + + ethr_mutex_lock(&fd->mtx); + tl = fd->msgs; + + if (tl == NULL) { + fd->msgs = new; + } else { + while (tl->next != NULL) + tl = tl->next; + tl->next = new; + } + ethr_mutex_unlock(&fd->mtx); + } + + } + } + + { + erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state); + + switch (wakeup_state) { + case ERTS_POLL_WOKEN_IO_READY: + res = 0; + break; + case ERTS_POLL_WOKEN_INTR: + res = EINTR; + break; + case ERTS_POLL_WOKEN_TIMEDOUT: + res = ETIMEDOUT; + break; + case ERTS_POLL_NOT_WOKEN: + /* This happens when we get an invalid signal only */ + res = EINVAL; + break; + default: + res = 0; + erl_exit(ERTS_ABORT_EXIT, + "%s:%d: Internal error: Invalid wakeup_state=%d\n", + __FILE__, __LINE__, (int) wakeup_state); + } + } + + erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); + erts_smp_atomic32_set_nob(&ps->timeout, ERTS_AINT32_T_MAX); + + *len = currid; + + // HARDTRACEF("%ux: Out erts_poll_wait",ps); + return res; +} + +int erts_poll_max_fds(void) +{ + + HARDTRACEF("In/Out erts_poll_max_fds -> %d",max_fds); + return max_fds; +} + +void erts_poll_info(ErtsPollSet ps, + ErtsPollInfo *pip) +{ + Uint size = 0; + Uint num_events = 0; + + size += sizeof(struct ErtsPollSet_); + size += sizeof(ErtsSigSelInfo)*ps->sig_count; + size += sizeof(ErtsSigSelItem)*ps->item_count; + size += sizeof(SIGSELECT)*(ps->sig_count+1); + + pip->primary = "receive_fsem"; + + pip->fallback = NULL; + + pip->kernel_poll = NULL; + + pip->memory_size = size; + + pip->poll_set_size = num_events; + + pip->fallback_poll_set_size = 0; + + pip->lazy_updates = 0; + + pip->pending_updates = 0; + + pip->batch_updates = 0; + + pip->concurrent_updates = 0; + + + pip->max_fds = erts_poll_max_fds(); + HARDTRACEF("%ux: Out erts_poll_info",ps); + +} + +ErtsPollSet erts_poll_create_pollset(void) +{ + ErtsPollSet ps = SEL_ALLOC(ERTS_ALC_T_POLLSET, + sizeof(struct ErtsPollSet_)); + + ps->sigs = NULL; + ps->sig_count = 0; + ps->item_count = 0; + ps->info = NULL; + ps->interrupt = (PROCESS)0; + erts_atomic32_init_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); + erts_smp_atomic32_init_nob(&ps->timeout, ERTS_AINT32_T_MAX); +#ifdef ERTS_SMP + erts_smp_mtx_init(&ps->mtx, "pollset"); +#endif + update_sigsel(ps); + HARDTRACEF("%ux: Out erts_poll_create_pollset",ps); + return ps; +} + +void erts_poll_destroy_pollset(ErtsPollSet ps) +{ + ErtsSigSelInfo *info; + for (info = ps->info; ps->info != NULL; info = ps->info, ps->info = ps->info->next) { + ErtsSigSelItem *item; + for (item = info->fds; info->fds != NULL; item = info->fds, info->fds = info->fds->next) + SEL_FREE(ERTS_ALC_T_POLLSET, item); + SEL_FREE(ERTS_ALC_T_POLLSET, info); + } + + SEL_FREE(ERTS_ALC_T_POLLSET,ps->sigs); + +#ifdef ERTS_SMP + erts_smp_mtx_destroy(&ps->mtx); +#endif + + SEL_FREE(ERTS_ALC_T_POLLSET,ps); +} + +void erts_poll_init(void) +{ + HARDTRACEF("In %s", __FUNCTION__); + max_fds = 256; + + HARDTRACEF("Out %s", __FUNCTION__); +} + + +/* OSE driver functions */ + +union SIGNAL *erl_drv_ose_get_signal(ErlDrvEvent drv_ev) { + struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; + ethr_mutex_lock(&ev->mtx); + if (ev->msgs == NULL) { + ethr_mutex_unlock(&ev->mtx); + return NULL; + } else { + ErtsPollOseMsgList *msg = ev->msgs; + union SIGNAL *sig = (union SIGNAL*)msg->data; + ASSERT(msg->data); + ev->msgs = msg->next; + ethr_mutex_unlock(&ev->mtx); + erts_free(ERTS_ALC_T_FD_SIG_LIST,msg); + restore(sig); + return sig; + } +} + +ErlDrvEvent +erl_drv_ose_event_alloc(SIGSELECT signo, ErlDrvOseEventId id, + ErlDrvOseEventId (*resolve_signal)(union SIGNAL *sig), void *extra) { + struct erts_sys_fd_type *ev = erts_alloc(ERTS_ALC_T_DRV_EV, + sizeof(struct erts_sys_fd_type)); + ev->signo = signo; + ev->extra = extra; + ev->id = id; + ev->msgs = NULL; + ev->resolve_signal = resolve_signal; + ethr_mutex_init(&ev->mtx); + return (ErlDrvEvent)ev; +} + +void erl_drv_ose_event_free(ErlDrvEvent drv_ev) { + struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; + ASSERT(ev->msgs == NULL); + ethr_mutex_destroy(&ev->mtx); + erts_free(ERTS_ALC_T_DRV_EV,ev); +} + +void erl_drv_ose_event_fetch(ErlDrvEvent drv_ev, SIGSELECT *signo, + ErlDrvOseEventId *id, void **extra) { + struct erts_sys_fd_type *ev = (struct erts_sys_fd_type *)drv_ev; + if (signo) + *signo = ev->signo; + if (extra) + *extra = ev->extra; + if (id) + *id = ev->id; +} diff --git a/erts/emulator/sys/ose/erts.sig b/erts/emulator/sys/ose/erts.sig new file mode 100644 index 0000000000..78b883ee6c --- /dev/null +++ b/erts/emulator/sys/ose/erts.sig @@ -0,0 +1,17 @@ +#ifndef ERTS_OSE_SIGNALS +#define ERTS_OSE_SIGNALS + +#ifndef ERTS_OSE_SIGNAL_BASE +#define ERTS_OSE_SIGNAL_BASE 0x01900280 +#endif + +#define ERTS_SIGNAL_INVALID ERTS_OSE_SIGNAL_BASE +#define ERTS_SIGNAL_FD_DRV_CONFIG ERTS_OSE_SIGNAL_BASE+1 +#define ERTS_SIGNAL_FD_DRV_ASYNC ERTS_OSE_SIGNAL_BASE+2 +#define ERTS_SIGNAL_OSE_DRV_ATTACH ERTS_OSE_SIGNAL_BASE+3 +#define ERTS_SIGNAL_OSE_DRV_HUNT ERTS_OSE_SIGNAL_BASE+4 + +#define ERTS_SIGNAL_RUN_ERL_SETUP ERTS_OSE_SIGNAL_BASE+100 +#define ERTS_SIGNAL_RUN_ERL_DAEMON ERTS_OSE_SIGNAL_BASE+101 + +#endif diff --git a/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf b/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf new file mode 100644 index 0000000000..a19d23facf --- /dev/null +++ b/erts/emulator/sys/ose/gcc_4.4.3_lm_ppc.lcf @@ -0,0 +1,182 @@ +/******************************************************************************* + * Copyright (C) 2013-2014 by Enea Software AB, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +OUTPUT_ARCH("powerpc") +ENTRY("crt0_lm") +MEMORY +{ + rom : ORIGIN = 0x01000000, LENGTH = 0x01000000 + ram : ORIGIN = 0x02000000, LENGTH = 0x01000000 +} +PHDRS +{ + ph_conf PT_LOAD ; + ph_rom PT_LOAD ; + ph_ram PT_LOAD ; +} +SECTIONS +{ + .text : + { + *(.text_first) + *(.text) + *(.text.*) + *(.stub) + *(oscode) + *(.init*) + *(.fini*) + *(.gnu.warning) + *(.gnu.linkonce.t.*) + *(.glue_7t) + *(.glue_7) + } > rom :ph_rom = 0 + .ose_sfk_biosentry : + { + *(.ose_sfk_biosentry) + } > rom :ph_rom + .ctors : + { + __CTOR_LIST__ = .; + *(.ctors) + *(SORT(.ctors.*)) + __CTOR_END__ = .; + } > rom :ph_rom + .dtors : + { + __DTOR_LIST__ = .; + *(.dtors) + *(SORT(.dtors.*)) + __DTOR_END__ = .; + } > rom :ph_rom + OSESYMS : + { + *(.osesyms) + } > rom :ph_rom + .rodata : + { + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + } > rom :ph_rom + .eh_frame_hdr : + { + *(.eh_frame_hdr) + } > rom :ph_rom + .eh_frame : + { + __EH_FRAME_BEGIN__ = .; + *(.eh_frame) + LONG(0) + __EH_FRAME_END__ = .; + } > rom :ph_rom + .gcc_except_table : + { + *(.gcc_except_table .gcc_except_table.*) + } > rom :ph_rom + .sdata2 : + { + PROVIDE (_SDA2_BASE_ = .); + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + } > rom :ph_rom + .sbss2 : + { + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + } > rom :ph_rom + LMCONF : + { + obj/?*?/ose_confd.o(.rodata) + *(LMCONF) + } > rom :ph_conf + .data : + { + LONG(0xDEADBABE) + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + . = ALIGN(0x10); + } > ram :ph_ram = 0 + .sdata2 : + { + _SDA2_BASE_ = .; + *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) + }> ram :ph_ram + .sdata : + { + PROVIDE (_SDA_BASE_ = .); + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + } > ram :ph_ram + .sbss : + { + *(.sbss) + *(.sbss.*) + *(.scommon) + *(.gnu.linkonce.sb.*) + } > ram :ph_ram + .bss (NOLOAD) : + { + *(.bss) + *(.bss.*) + *(COMMON) + *(.gnu.linkonce.b.*) + *(.osvars) + } > ram :ph_ram + .ignore (NOLOAD) : + { + *(.rel.dyn) + } > ram :ph_ram + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + .debug_info 0 : { *(.debug_info) *(.gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } +} +__OSESYMS_START = ADDR(OSESYMS); +__OSESYMS_END = ADDR(OSESYMS) + SIZEOF(OSESYMS); diff --git a/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf b/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf new file mode 100644 index 0000000000..3440c2961b --- /dev/null +++ b/erts/emulator/sys/ose/gcc_4.6.3_lm_ppc.lcf @@ -0,0 +1,242 @@ +/******************************************************************************* + * Copyright (C) 2013-2014 by Enea Software AB, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + ******************************************************************************/ + +OUTPUT_FORMAT("elf32-powerpc", "elf32-powerpc", "elf32-powerpc") +OUTPUT_ARCH("powerpc") + +ENTRY("crt0_lm") + +/* Note: + * You may have to increase the length of the "rom" memory region and the + * origin and length of the "ram" memory region below depending on the size + * of the code and data in your load module. + */ + +MEMORY +{ + conf : ORIGIN = 0x00100000, LENGTH = 0x00030000 + rom : ORIGIN = 0x01000000, LENGTH = 0x01000000 + ram : ORIGIN = 0x03000000, LENGTH = 0x01000000 +} + +PHDRS +{ + ph_conf PT_LOAD ; + ph_rom PT_LOAD ; + ph_ram PT_LOAD ; +} + +SECTIONS +{ +/*--------------------------------------------------------------------------- + * Load module configuration area + *-------------------------------------------------------------------------*/ + + /* Load module configuration section. */ + LMCONF : + { + obj/?*?/ose_confd.o(.rodata) + *(LMCONF) + } > conf :ph_conf + +/*--------------------------------------------------------------------------- + * Read-only area + *-------------------------------------------------------------------------*/ + + /* Code section. */ + .text : + { + *(.text) + *(.text.*) + *(.stub) + *(oscode) + *(.init*) + *(.fini*) + *(.gnu.warning) + *(.gnu.linkonce.t.*) + } > rom :ph_rom = 0 + + /* OSE symbols section. */ + OSESYMS : + { + *(.osesyms) + } > rom :ph_rom + + /* Read-only data section. */ + .rodata : + { + *(.rodata) + *(.rodata.*) + *(.gnu.linkonce.r.*) + } > rom :ph_rom + + /* C++ exception handling section. */ + .eh_frame : + { + __EH_FRAME_BEGIN__ = .; + *(.eh_frame) + LONG(0) + __EH_FRAME_END__ = .; + } > rom :ph_rom + + /* C++ exception handling section. */ + .gcc_except_table : + { + *(.gcc_except_table .gcc_except_table.*) + } > rom :ph_rom + + /* PowerPC EABI initialized read-only data section. */ + .sdata2 : + { + PROVIDE (_SDA2_BASE_ = .); + *(.sdata2) + *(.sdata2.*) + *(.gnu.linkonce.s2.*) + } > rom :ph_rom + + /* PowerPC EABI uninitialized read-only data section. */ + .sbss2 : + { + *(.sbss2) + *(.sbss2.*) + *(.gnu.linkonce.sb2.*) + } > rom :ph_rom + +/*--------------------------------------------------------------------------- + * Read-write area + *-------------------------------------------------------------------------*/ + + /*------------------------------------------------------------------- + * Initialized data (copied by PM) + *-----------------------------------------------------------------*/ + + /* Data section. */ + .data : + { + *(.data) + *(.data.*) + *(.gnu.linkonce.d.*) + SORT(CONSTRUCTORS) + } > ram :ph_ram + + /* C++ constructor section. */ + .ctors : + { + __CTOR_LIST__ = .; + *(.ctors) + *(SORT(.ctors.*)) + __CTOR_END__ = .; + } > ram :ph_ram + + /* C++ destructor section. */ + .dtors : + { + __DTOR_LIST__ = .; + *(.dtors) + *(SORT(.dtors.*)) + __DTOR_END__ = .; + } > ram :ph_ram + + + /* Small data section. */ + .sdata ALIGN(0x10) : + { + PROVIDE (_SDA_BASE_ = .); + *(.sdata) + *(.sdata.*) + *(.gnu.linkonce.s.*) + } > ram :ph_ram + + /*------------------------------------------------------------------- + * Uninitialized data (cleared by PM) + *-----------------------------------------------------------------*/ + + /* Small bss section. */ + .sbss : + { + *(.sbss) + *(.sbss.*) + *(.scommon) + *(.gnu.linkonce.sb.*) + } > ram :ph_ram + + /* Bss section. */ + .bss : + { + *(.bss) + *(.bss.*) + *(COMMON) + *(.gnu.linkonce.b.*) + } > ram :ph_ram + +/*--------------------------------------------------------------------------- + * Debug information + *-------------------------------------------------------------------------*/ + + /* + * Stabs debug sections. + */ + + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + + /* + * DWARF debug sections. + */ + + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info) *(.gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } +} diff --git a/erts/emulator/sys/ose/sys.c b/erts/emulator/sys/ose/sys.c new file mode 100644 index 0000000000..5b950a7dae --- /dev/null +++ b/erts/emulator/sys/ose/sys.c @@ -0,0 +1,1846 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif +#include "sys/time.h" +#include "time.h" +#include "sys/uio.h" +#include "termios.h" +#include "ctype.h" +#include "termios.h" + +#ifdef HAVE_FCNTL_H +#include "fcntl.h" +#endif + +#ifdef HAVE_SYS_IOCTL_H +#include "sys/ioctl.h" +#endif + +#define ERTS_WANT_BREAK_HANDLING +#define WANT_NONBLOCKING +#include "sys.h" +#include "erl_thr_progress.h" + +#ifdef USE_THREADS +#include "erl_threads.h" +#endif + +#include "erl_mseg.h" + +#include "unistd.h" +#include "efs.h" +#include "erl_printf.h" +#include "aio.h" +#include "pm.h" +#include "fcntl.h" + +/* Set the define to 1 to get some logging */ +#if 0 +#include "ramlog.h" +#define LOG(output) ramlog_printf output +#else +#define LOG(output) +#endif + +extern char **environ; +static erts_smp_rwmtx_t environ_rwmtx; +static PROCESS sig_proxy_pid = 0; + +#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O + * vector sock_sendv(). + */ +/* + * Don't need global.h, but bif_table.h (included by bif.h), + * won't compile otherwise + */ +#include "global.h" +#include "bif.h" + +#include "erl_sys_driver.h" +#include "erl_check_io.h" +#include "erl_cpu_topology.h" + +/* The priority for reader/writer processes */ +#define FD_PROC_PRI get_pri(current_process()) + +typedef struct ErtsSysReportExit_ ErtsSysReportExit; +struct ErtsSysReportExit_ { + ErtsSysReportExit *next; + Eterm port; + int pid; + int ifd; + int ofd; + ErlDrvEvent attach_event; + ErlDrvEvent input_event; + ErlDrvEvent output_event; +}; + +/* This data is shared by these drivers - initialized by spawn_init() */ +static struct driver_data { + ErlDrvPort port_num; + int ofd; + int ifd; + int packet_bytes; + ErtsSysReportExit *report_exit; + int pid; + int alive; + int status; + ErlDrvEvent input_event; + ErlDrvEvent output_event; + struct aiocb aiocb; + FmHandle handle; + char *install_handle; +} *driver_data; /* indexed by fd */ + +struct async { + SIGSELECT signo; + ErlDrvTermData port; + ErlDrvTermData proc; + PROCESS spid; + PROCESS target; + Uint32 ref; +}; + +static ErtsSysReportExit *report_exit_list; +static ERTS_INLINE void report_exit_status(ErtsSysReportExit *rep, int status); + +extern int driver_interrupt(int, int); +extern void do_break(void); + +extern void erl_sys_args(int*, char**); + +/* The following two defs should probably be moved somewhere else */ + +extern void erts_sys_init_float(void); + +extern void erl_crash_dump(char* file, int line, char* fmt, ...); + +#define DIR_SEPARATOR_CHAR '/' + +#if defined(DEBUG) +#define ERL_BUILD_TYPE_MARKER ".debug" +#else /* opt */ +#define ERL_BUILD_TYPE_MARKER +#endif + +#define CHILD_SETUP_PROG_NAME "child_setup" ERL_BUILD_TYPE_MARKER + +#ifdef DEBUG +static int debug_log = 0; +#endif + +#ifdef ERTS_SMP +static erts_smp_atomic32_t have_prepared_crash_dump; +#define ERTS_PREPARED_CRASH_DUMP \ + ((int) erts_smp_atomic32_xchg_nob(&have_prepared_crash_dump, 1)) +#else +static volatile int have_prepared_crash_dump; +#define ERTS_PREPARED_CRASH_DUMP \ + (have_prepared_crash_dump++) +#endif + +static erts_smp_atomic_t sys_misc_mem_sz; + +#if defined(ERTS_SMP) +erts_mtx_t chld_stat_mtx; +#endif + +#if defined(ERTS_SMP) /* ------------------------------------------------- */ +#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx) +#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx) + +#else /* ------------------------------------------------------------------- */ +#define CHLD_STAT_LOCK +#define CHLD_STAT_UNLOCK +static volatile int children_died; +#endif + +#define SET_AIO(REQ,FD,SIZE,BUFF) \ + memset(&(REQ),0,sizeof(REQ)); \ + (REQ).aio_fildes = FD; \ + (REQ).aio_offset = FM_POSITION_CURRENT; \ + (REQ).aio_nbytes = SIZE; \ + (REQ).aio_buf = BUFF; \ + (REQ).aio_sigevent.sigev_notify = SIGEV_NONE + +/* the first sizeof(struct aiocb *) bytes of the write buffer + * will contain the pointer to the aiocb struct, this needs + * to be freed between asynchronous writes. + * A write of 0 bytes is ignored. */ +#define WRITE_AIO(FD,SIZE,BUFF) do { \ + if (SIZE > 0) { \ + struct aiocb *write_req = driver_alloc(sizeof(struct aiocb)); \ + char *write_buff = driver_alloc((sizeof(char)*SIZE)+1+ \ + (sizeof(struct aiocb *))); \ + *(struct aiocb **)write_buff = (struct aiocb *)write_req; \ + write_buff += sizeof(struct aiocb *); \ + memcpy(write_buff,BUFF,SIZE+1); \ + SET_AIO(*write_req,FD,SIZE,write_buff); \ + if (aio_write(write_req)) \ + ramlog_printf("%s:%d: write failed with %d\n", \ + __FILE__,__LINE__,errno); \ + } \ +} while(0) + +/* free the write_buffer and write_req + * created in the WRITE_AIO() request macro */ +#define FREE_AIO(ptr) do { \ + struct aiocb *aiocb_ptr; \ + char *buffer_ptr; \ + aiocb_ptr = *(struct aiocb **)((ptr)-sizeof(struct aiocb *)); \ + buffer_ptr = (((char*)ptr)-sizeof(struct aiocb *)); \ + driver_free(aiocb_ptr); \ + driver_free(buffer_ptr); \ +} while(0) + +#define DISPATCH_AIO(sig) do { \ + if (aio_dispatch(sig)) \ + ramlog_printf("%s:%d: dispatch failed with %d\n", \ + __FILE__,__LINE__,errno); \ + } while(0) + +#define AIO_PIPE_SIZE 1024 + +/* debug print macros */ +#define DEBUG_RES 0 + +#ifdef DEBUG_RES +#define DEBUG_CHECK_RES(actual, expected) \ + do { \ + if (actual != expected ) { \ + ramlog_printf("Result check failed" \ + " got: 0x%08x expected:0x%08x\nat: %s:%d\n", \ + actual, expected, __FILE__, __LINE__); \ + abort(); /* This might perhaps be too harsh? */ \ + } \ + } while(0) +#else +#define DEBUG_CHECK_RES +#endif + +static struct fd_data { + char pbuf[4]; /* hold partial packet bytes */ + int psz; /* size of pbuf */ + char *buf; + char *cpos; + int sz; + int remain; /* for input on fd */ +} *fd_data; /* indexed by fd */ + +/********************* General functions ****************************/ + +/* This is used by both the drivers and general I/O, must be set early */ +static int max_files = -1; + +/* + * a few variables used by the break handler + */ +#ifdef ERTS_SMP +erts_smp_atomic32_t erts_break_requested; +#define ERTS_SET_BREAK_REQUESTED \ + erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 1) +#define ERTS_UNSET_BREAK_REQUESTED \ + erts_smp_atomic32_set_nob(&erts_break_requested, (erts_aint32_t) 0) +#else +volatile int erts_break_requested = 0; +#define ERTS_SET_BREAK_REQUESTED (erts_break_requested = 1) +#define ERTS_UNSET_BREAK_REQUESTED (erts_break_requested = 0) +#endif +/* set early so the break handler has access to initial mode */ +static struct termios initial_tty_mode; +static int replace_intr = 0; +/* assume yes initially, ttsl_init will clear it */ +int using_oldshell = 1; +static PROCESS get_signal_proxy_pid(void); + +static void +init_check_io(void) +{ + erts_init_check_io(); + max_files = erts_check_io_max_files(); +} + +#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT +#define ERTS_CHK_IO_AS_INTR() erts_check_io_async_sig_interrupt() +#else +#define ERTS_CHK_IO_AS_INTR() erts_check_io_interrupt(1) +#endif +#define ERTS_CHK_IO_INTR erts_check_io_interrupt +#define ERTS_CHK_IO_INTR_TMD erts_check_io_interrupt_timed +#define ERTS_CHK_IO erts_check_io +#define ERTS_CHK_IO_SZ erts_check_io_size + + +void +erts_sys_schedule_interrupt(int set) +{ + ERTS_CHK_IO_INTR(set); +} + +#ifdef ERTS_SMP +void +erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec) +{ + ERTS_CHK_IO_INTR_TMD(set, msec); +} +#endif + +Uint +erts_sys_misc_mem_sz(void) +{ + Uint res = ERTS_CHK_IO_SZ(); + res += erts_smp_atomic_read_mb(&sys_misc_mem_sz); + return res; +} + +/* + * reset the terminal to the original settings on exit + */ +void sys_tty_reset(int exit_code) +{ + if (using_oldshell && !replace_intr) { + SET_BLOCKING(0); + } + else if (isatty(0)) { + tcsetattr(0,TCSANOW,&initial_tty_mode); + } +} + +#ifdef USE_THREADS + +typedef struct { + int sched_bind_data; +} erts_thr_create_data_t; + +/* + * thr_create_prepare() is called in parent thread before thread creation. + * Returned value is passed as argument to thr_create_cleanup(). + */ +static void * +thr_create_prepare(void) +{ + erts_thr_create_data_t *tcdp; + + tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t)); + + tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare(); + + return (void *) tcdp; +} + + +/* thr_create_cleanup() is called in parent thread after thread creation. */ +static void +thr_create_cleanup(void *vtcdp) +{ + erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; + + erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data); + + erts_free(ERTS_ALC_T_TMP, tcdp); +} + +static void +thr_create_prepare_child(void *vtcdp) +{ + erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp; + +#ifdef ERTS_ENABLE_LOCK_COUNT + erts_lcnt_thread_setup(); +#endif + + erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data); +} + +#endif /* #ifdef USE_THREADS */ + +/* The two functions below are stolen from win_con.c + They have to use malloc/free/realloc directly becasue + we want to do able to do erts_printf very early on. + */ +#define VPRINTF_BUF_INC_SIZE 128 +static erts_dsprintf_buf_t * +grow_vprintf_buf(erts_dsprintf_buf_t *dsbufp, size_t need) +{ + char *buf; + size_t size; + + ASSERT(dsbufp); + + if (!dsbufp->str) { + size = (((need + VPRINTF_BUF_INC_SIZE - 1) + / VPRINTF_BUF_INC_SIZE) + * VPRINTF_BUF_INC_SIZE); + buf = (char *) malloc(size * sizeof(char)); + } + else { + size_t free_size = dsbufp->size - dsbufp->str_len; + + if (need <= free_size) + return dsbufp; + + size = need - free_size + VPRINTF_BUF_INC_SIZE; + size = (((size + VPRINTF_BUF_INC_SIZE - 1) + / VPRINTF_BUF_INC_SIZE) + * VPRINTF_BUF_INC_SIZE); + size += dsbufp->size; + buf = (char *) realloc((void *) dsbufp->str, + size * sizeof(char)); + } + if (!buf) + return NULL; + if (buf != dsbufp->str) + dsbufp->str = buf; + dsbufp->size = size; + return dsbufp; +} + +static int erts_sys_ramlog_printf(char *format, va_list arg_list) +{ + int res,i; + erts_dsprintf_buf_t dsbuf = ERTS_DSPRINTF_BUF_INITER(grow_vprintf_buf); + res = erts_vdsprintf(&dsbuf, format, arg_list); + if (res >= 0) { + for (i = 0; i < dsbuf.str_len; i+= 50) + /* We print 50 characters at a time because otherwise + the ramlog looks broken */ + ramlog_printf("%.*s",dsbuf.str_len-50 < 0?dsbuf.str_len:50,dsbuf.str+i); + } + if (dsbuf.str) + free((void *) dsbuf.str); + return res; +} + +void +erts_sys_pre_init(void) +{ + erts_printf_add_cr_to_stdout = 1; + erts_printf_add_cr_to_stderr = 1; +#ifdef USE_THREADS + { + erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER; + + eid.thread_create_child_func = thr_create_prepare_child; + /* Before creation in parent */ + eid.thread_create_prepare_func = thr_create_prepare; + /* After creation in parent */ + eid.thread_create_parent_func = thr_create_cleanup, + + erts_thr_init(&eid); + + report_exit_list = NULL; + +#ifdef ERTS_ENABLE_LOCK_COUNT + erts_lcnt_init(); +#endif + +#if defined(ERTS_SMP) + erts_mtx_init(&chld_stat_mtx, "child_status"); +#endif + } +#ifdef ERTS_SMP + erts_smp_atomic32_init_nob(&erts_break_requested, 0); + erts_smp_atomic32_init_nob(&have_prepared_crash_dump, 0); +#else + erts_break_requested = 0; + have_prepared_crash_dump = 0; +#endif +#if !defined(ERTS_SMP) + children_died = 0; +#endif +#endif /* USE_THREADS */ + + erts_printf_stdout_func = erts_sys_ramlog_printf; + + erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0); +} + +void +erl_sys_init(void) +{ + +#ifdef USE_SETLINEBUF + setlinebuf(stdout); +#else + setvbuf(stdout, (char *)NULL, _IOLBF, BUFSIZ); +#endif + + erts_sys_init_float(); + + /* we save this so the break handler can set and reset it properly */ + /* also so that we can reset on exit (break handler or not) */ + if (isatty(0)) { + tcgetattr(0,&initial_tty_mode); + } + tzset(); /* Required at least for NetBSD with localtime_r() */ +} + +static ERTS_INLINE int +prepare_crash_dump(int secs) +{ +#define NUFBUF (3) + int i, max; + char env[21]; /* enough to hold any 64-bit integer */ + size_t envsz; + /*DeclareTmpHeapNoproc(heap,NUFBUF);*/ + /*Eterm *hp = heap;*/ + /*Eterm list = NIL;*/ + int has_heart = 0; + + UseTmpHeapNoproc(NUFBUF); + + if (ERTS_PREPARED_CRASH_DUMP) + return 0; /* We have already been called */ + + + /* Positive secs means an alarm must be set + * 0 or negative means no alarm + * + * Set alarm before we try to write to a port + * we don't want to hang on a port write with + * no alarm. + * + */ + +#if 0 /*ose TBD!!!*/ + if (secs >= 0) { + alarm((unsigned int)secs); + } +#endif + + /* Make sure we unregister at epmd (unknown fd) and get at least + one free filedescriptor (for erl_crash.dump) */ + + max = max_files; + if (max < 1024) + max = 1024; + for (i = 3; i < max; i++) { + close(i); + } + + envsz = sizeof(env); + i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz); + if (i >= 0) { + int nice_val; + nice_val = i != 0 ? 0 : atoi(env); + if (nice_val > 39) { + nice_val = 39; + } + set_pri(nice_val); + } + + UnUseTmpHeapNoproc(NUFBUF); +#undef NUFBUF + return has_heart; +} + +int erts_sys_prepare_crash_dump(int secs) +{ + return prepare_crash_dump(secs); +} + +static ERTS_INLINE void +break_requested(void) +{ + /* + * just set a flag - checked for and handled by + * scheduler threads erts_check_io() (not signal handler). + */ +#ifdef DEBUG + fprintf(stderr,"break!\n"); +#endif + if (ERTS_BREAK_REQUESTED) + erl_exit(ERTS_INTR_EXIT, ""); + + ERTS_SET_BREAK_REQUESTED; + ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */ +} + +/* Disable break */ +void erts_set_ignore_break(void) { + +} + +/* Don't use ctrl-c for break handler but let it be + used by the shell instead (see user_drv.erl) */ +void erts_replace_intr(void) { + struct termios mode; + + if (isatty(0)) { + tcgetattr(0, &mode); + + /* here's an example of how to replace ctrl-c with ctrl-u */ + /* mode.c_cc[VKILL] = 0; + mode.c_cc[VINTR] = CKILL; */ + + mode.c_cc[VINTR] = 0; /* disable ctrl-c */ + tcsetattr(0, TCSANOW, &mode); + replace_intr = 1; + } +} + +void init_break_handler(void) +{ + +} + +int sys_max_files(void) +{ + return(max_files); +} + + +/************************** OS info *******************************/ + +/* Used by erlang:info/1. */ +/* (This code was formerly in drv.XXX/XXX_os_drv.c) */ + +char os_type[] = "ose"; + +void +os_flavor(char* namebuf, /* Where to return the name. */ + unsigned size) /* Size of name buffer. */ +{ +#if 0 + struct utsname uts; /* Information about the system. */ + char* s; + + (void) uname(&uts); + for (s = uts.sysname; *s; s++) { + if (isupper((int) *s)) { + *s = tolower((int) *s); + } + } + strcpy(namebuf, uts.sysname); +#else + strncpy(namebuf, "release", size); +#endif +} + +void +os_version(pMajor, pMinor, pBuild) +int* pMajor; /* Pointer to major version. */ +int* pMinor; /* Pointer to minor version. */ +int* pBuild; /* Pointer to build number. */ +{ + *pMajor = 5; + *pMinor = 7; + *pBuild = 0; +} + +void init_getenv_state(GETENV_STATE *state) +{ + erts_smp_rwmtx_rlock(&environ_rwmtx); + *state = NULL; +} + +char **environ; /*ose - needs replacement*/ + +char *getenv_string(GETENV_STATE *state0) +{ + char **state = (char **) *state0; + char *cp; + + ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx)); + + if (state == NULL) + state = environ; + + cp = *state++; + *state0 = (GETENV_STATE) state; + + return cp; +} + +void fini_getenv_state(GETENV_STATE *state) +{ + *state = NULL; + erts_smp_rwmtx_runlock(&environ_rwmtx); +} + + +/************************** Port I/O *******************************/ + +/* I. Common stuff */ + +union SIGNAL { + SIGSELECT sig_no; + struct FmReadPtr fm_read_reply; + struct FmWritePtr fm_write_reply; + struct async async; +}; + +/* II. The spawn/fd drivers */ + +/* + * Decreasing the size of it below 16384 is not allowed. + */ +#define ERTS_SYS_READ_BUF_SZ (64*1024) + +/* Driver interfaces */ +static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*); +static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*); +static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT, + char **, ErlDrvSizeT); +static int spawn_init(void); +static void fd_stop(ErlDrvData); +static void erl_stop(ErlDrvData); +static void ready_input(ErlDrvData, ErlDrvEvent); +static void ready_output(ErlDrvData, ErlDrvEvent); +static void output(ErlDrvData, char*, ErlDrvSizeT); +static void stop_select(ErlDrvEvent, void*); + +static PROCESS +get_signal_proxy_pid(void) { + union SIGNAL *sig; + SIGSELECT any_sig[] = {1,ERTS_SIGNAL_OSE_DRV_ATTACH}; + + if (!sig_proxy_pid) { + sig = alloc(sizeof(union SIGNAL), ERTS_SIGNAL_OSE_DRV_ATTACH); + hunt("ose_signal_driver_proxy", 0, NULL, &sig); + sig = receive(any_sig); + sig_proxy_pid = sender(&sig); + free_buf(&sig); + } + ASSERT(sig_proxy_pid); + return sig_proxy_pid; +} + +static ErlDrvOseEventId +resolve_signal(union SIGNAL* sig) { + switch(sig->sig_no) { + + case FM_READ_PTR_REPLY: + return (ErlDrvOseEventId)sig->fm_read_reply.handle; + + case FM_WRITE_PTR_REPLY: + return (ErlDrvOseEventId)sig->fm_write_reply.handle; + + case ERTS_SIGNAL_OSE_DRV_ATTACH: + return (ErlDrvOseEventId)sig->async.target; + + default: + break; + } + return (ErlDrvOseEventId)-1; +} + +struct erl_drv_entry spawn_driver_entry = { + spawn_init, + spawn_start, + NULL, /* erl_stop, */ + output, + ready_input, + ready_output, + "spawn", + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + ERL_DRV_FLAG_USE_PORT_LOCKING, + NULL, NULL, + stop_select +}; +struct erl_drv_entry fd_driver_entry = { + NULL, + fd_start, + fd_stop, + output, + ready_input, + ready_output, + "fd", + NULL, + NULL, + fd_control, + NULL, + NULL, + NULL, /* ready_async */ + NULL, /* flush */ + NULL, /* call */ + NULL, /* event */ + ERL_DRV_EXTENDED_MARKER, + ERL_DRV_EXTENDED_MAJOR_VERSION, + ERL_DRV_EXTENDED_MINOR_VERSION, + 0, /* ERL_DRV_FLAGs */ + NULL, /* handle2 */ + NULL, /* process_exit */ + stop_select +}; + +static void +set_spawn_fd(int local_fd, int remote_fd, PROCESS remote_pid) { + PROCESS vm_pid; + FmHandle handle; + char env_val[55]; + char env_name[10]; + EfsStatus efs_res; + + /* get pid of pipevm and handle of chosen fd */ + efs_res = efs_examine_fd(local_fd, FLIB_FD_VMPID, &vm_pid, 0); + DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); + + /* setup the file descriptor to buffer per line */ + efs_res = efs_config_fd(local_fd, FLIB_FD_BUFMODE, FM_BUFF_LINE, + FLIB_FD_BUFSIZE, 80, 0); + DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); + + /* duplicate handle and set spawn pid owner */ + efs_res = efs_dup_to(local_fd, remote_pid, &handle); + DEBUG_CHECK_RES(efs_res, EFS_SUCCESS); + + sprintf(env_name, "FD%d", remote_fd); + + /* Syntax of the environment variable: + * "FD#" "<pid of pipevm>,<handle>,<buffer mode>,<buff size>,<omode>" */ + sprintf(env_val, "0x%lx,0x%lx,%lu,%lu,0x%x", + vm_pid, handle, + FM_BUFF_LINE, 80, + O_APPEND); + + set_env(remote_pid, env_name, env_val); +} + +static ErlDrvData +set_driver_data(ErlDrvPort port_num, + int ifd, + int ofd, + int packet_bytes, + int read_write, + int exit_status, + PROCESS pid) +{ + Port *prt; + ErtsSysReportExit *report_exit; + + prt = erts_drvport2port(port_num); + if (prt != ERTS_INVALID_ERL_DRV_PORT) { + prt->os_pid = pid; + } + + /* READ */ + if (read_write & DO_READ) { + EfsStatus res = efs_examine_fd(ifd, FLIB_FD_HANDLE, + &driver_data[ifd].handle, 0); + if (res != EFS_SUCCESS) + ramlog_printf("%s:%d: efs_examine_fd(%d) failed with %d\n", + __FILE__,__LINE__,ifd,errno); + driver_data[ifd].ifd = ifd; + driver_data[ifd].packet_bytes = packet_bytes; + driver_data[ifd].port_num = port_num; + driver_data[ifd].pid = pid; + + /* async read struct */ + memset(&driver_data[ifd].aiocb, 0, sizeof(struct aiocb)); + driver_data[ifd].aiocb.aio_buf = driver_alloc(AIO_PIPE_SIZE); + driver_data[ifd].aiocb.aio_fildes = ifd; + driver_data[ifd].aiocb.aio_nbytes = (packet_bytes?packet_bytes:AIO_PIPE_SIZE); + driver_data[ifd].alive = 1; + driver_data[ifd].status = 0; + driver_data[ifd].input_event = + erl_drv_ose_event_alloc(FM_READ_PTR_REPLY, + driver_data[ifd].handle, resolve_signal, + &driver_data[ifd].ifd); + + /* READ & WRITE */ + if (read_write & DO_WRITE) { + driver_data[ifd].ofd = ofd; + efs_examine_fd(ofd, FLIB_FD_HANDLE, &driver_data[ofd].handle, 0); + + driver_data[ifd].output_event = + erl_drv_ose_event_alloc(FM_WRITE_PTR_REPLY, + driver_data[ofd].handle, resolve_signal, + &driver_data[ofd].ofd); + driver_data[ofd].pid = pid; + if (ifd != ofd) { + driver_data[ofd] = driver_data[ifd]; + driver_data[ofd].aiocb.aio_buf = NULL; + } + } + else { /* READ ONLY */ + driver_data[ifd].ofd = -1; + } + + /* enable input event */ + (void) driver_select(port_num, driver_data[ifd].input_event, + (ERL_DRV_READ | ERL_DRV_USE), 1); + + if (aio_read(&driver_data[ifd].aiocb)) + ramlog_printf("%s:%d: aio_read(%d) failed with %d\n", + __FILE__,__LINE__,ifd,errno); + } + else { /* WRITE ONLY */ + efs_examine_fd(ofd, FLIB_FD_HANDLE, &driver_data[ofd].handle, 0); + driver_data[ofd].packet_bytes = packet_bytes; + driver_data[ofd].port_num = port_num; + driver_data[ofd].ofd = ofd; + driver_data[ofd].pid = pid; + driver_data[ofd].alive = 1; + driver_data[ofd].status = 0; + driver_data[ofd].output_event = + erl_drv_ose_event_alloc(FM_WRITE_PTR_REPLY, driver_data[ofd].handle, + resolve_signal, &driver_data[ofd].ofd); + driver_data[ofd].input_event = driver_data[ofd].output_event; + } + + /* this is used for spawned load modules, and is needed + * to properly uninstall them */ + if (exit_status) { + struct PmProgramInfo *info; + int install_handle_size; + union SIGNAL *sig; + PmStatus pm_status; + report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT, + sizeof(ErtsSysReportExit)); + report_exit->next = report_exit_list; + report_exit->port = erts_drvport2id(port_num); + report_exit->pid = pid; + report_exit->ifd = (read_write & DO_READ) ? ifd : -1; + report_exit->ofd = (read_write & DO_WRITE) ? ofd : -1; + report_exit_list = report_exit; + report_exit->attach_event = + erl_drv_ose_event_alloc(ERTS_SIGNAL_OSE_DRV_ATTACH, pid, + resolve_signal, &driver_data[ifd].ifd); + + /* setup ifd and ofd report exit */ + driver_data[ifd].report_exit = report_exit; + driver_data[ofd].report_exit = report_exit; + + pm_status = ose_pm_program_info(pid, &info); + DEBUG_CHECK_RES(pm_status, PM_SUCCESS); + + install_handle_size = strlen(info->install_handle)+1; + driver_data[ifd].install_handle = driver_alloc(install_handle_size); + strcpy(driver_data[ifd].install_handle, + info->install_handle); + + free_buf((union SIGNAL **)&info); + + sig = alloc(sizeof(struct async), ERTS_SIGNAL_OSE_DRV_ATTACH); + sig->async.target = pid; + send(&sig, get_signal_proxy_pid()); + + /* this event will trigger when we receive an attach signal + * from the recently dead load module */ + (void)driver_select(port_num,report_exit->attach_event, DO_READ, 1); + } + else { + report_exit = NULL; + } + + /* the return value is the pointer to the driver_data struct we created + * in this function, it will be used in the drivers input + * and output functions */ + return (ErlDrvData)((!(read_write & DO_READ) && read_write & DO_WRITE) + ? &driver_data[ofd] + : &driver_data[ifd]); +} + +static int spawn_init() +{ + int i; + + driver_data = (struct driver_data *) + erts_alloc(ERTS_ALC_T_DRV_TAB, max_files * sizeof(struct driver_data)); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, + max_files * sizeof(struct driver_data)); + + for (i = 0; i < max_files; i++) + driver_data[i].pid = -1; + + return 1; +} + +static void +init_fd_data(int fd, ErlDrvPort port_num) +{ + fd_data[fd].buf = NULL; + fd_data[fd].cpos = NULL; + fd_data[fd].remain = 0; + fd_data[fd].sz = 0; + fd_data[fd].psz = 0; +} + +/* FIXME write a decent text on pipes on ose */ +static ErlDrvData +spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) +{ + int ifd[2]; + int ofd[2]; + static uint32_t ticker = 1; + PmStatus pm_status; + OSDOMAIN domain = PM_NEW_DOMAIN; + PROCESS progpid, mainbid, mainpid; + char *handle = NULL; + struct PmProgramInfo *info; + char *args = NULL; + char *tmp_handle; + ErlDrvData res = (ErlDrvData)-1; + int handle_size; + char *ptr; + + + args = driver_alloc(strlen(name)+1); + strcpy(args, name); + /* We need to handle name in three parts + * - install handle (must be unique) + * - install binary (needed for ose_pm_install_load_module()) + * - full path (as argument to the spawned applications env.var + */ + + /* full path including arguments */ + args = driver_alloc(strlen(name)+1); + strcpy(args, name); + + /* handle path */ + tmp_handle = strrchr(name, '/'); + if (tmp_handle == NULL) { + tmp_handle = name; + } + else { + tmp_handle++; + } + + /* handle args */ + ptr = strchr(tmp_handle, ' '); + if (ptr != NULL) { + *ptr = '\0'; + handle_size = ptr - tmp_handle; + } + else { + handle_size = strlen(name)+1; + } + + /* make room for ticker */ + handle_size += (ticker<10)?3:((ticker<100)?4:5); + handle = driver_alloc(handle_size); + + do { + snprintf(handle, handle_size, "%s_%d", tmp_handle, ticker); + pm_status = ose_pm_install_load_module(0, "ELF", name, handle, + 0, 0, NULL); + ticker++; + } while (pm_status == PM_EINSTALL_HANDLE_ALREADY_INSTALLED); + + if (pm_status != PM_SUCCESS) { + errno = ENOSYS; /* FIXME add comment */ + return ERL_DRV_ERROR_ERRNO; + } + + /* Create Program */ + pm_status = ose_pm_create_program(&domain, handle, 0, 0, + NULL, &progpid, &mainbid); + DEBUG_CHECK_RES(pm_status, PM_SUCCESS); + + /* Get the mainpid from the newly created program */ + pm_status = ose_pm_program_info(progpid, &info); + DEBUG_CHECK_RES(pm_status, PM_SUCCESS); + + mainpid = info->main_process; + free_buf ((union SIGNAL **)&info); + + /* pipevm needs to be started + * pipe will return 0 if success, -1 if not, + * errno will be set */ + if (pipe(ifd) != 0 || pipe(ofd) != 0) { + DEBUG_CHECK_RES(0, -1); + ASSERT(0); + } + + /* setup driver data */ + res = set_driver_data(port_num, ofd[0], ifd[1], opts->packet_bytes, + opts->read_write, 1 /* opts->exit_status */, progpid); + + /* init the fd_data array for read/write */ + init_fd_data(ofd[0], port_num); + init_fd_data(ifd[1], port_num); + + /* setup additional configurations + * for the spawned applications environment */ + if (args != NULL) { + set_env(progpid, "ARGV", args); + } + set_env(mainbid, "EFS_RESOLVE_TMO", 0); + set_spawn_fd(ifd[0], 0, mainpid); + set_spawn_fd(ofd[1], 1, mainpid); + set_spawn_fd(ofd[1], 2, mainpid); + + /* start the spawned program */ + pm_status = ose_pm_start_program(mainbid); + DEBUG_CHECK_RES(pm_status, PM_SUCCESS); + + /* close unused fd's */ + close(ifd[0]); + close(ofd[1]); + + if (handle) { + driver_free(handle); + } + + return (ErlDrvData)res; +} + +#define FD_DEF_HEIGHT 24 +#define FD_DEF_WIDTH 80 +/* Control op */ +#define FD_CTRL_OP_GET_WINSIZE 100 + +static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height) +{ +#ifdef TIOCGWINSZ + struct winsize ws; + if (ioctl(fd,TIOCGWINSZ,&ws) == 0) { + *width = (Uint32) ws.ws_col; + *height = (Uint32) ws.ws_row; + return 0; + } +#endif + return -1; +} + +static ErlDrvSSizeT fd_control(ErlDrvData drv_data, + unsigned int command, + char *buf, ErlDrvSizeT len, + char **rbuf, ErlDrvSizeT rlen) +{ + struct driver_data *data = (struct driver_data *)drv_data; + char resbuff[2*sizeof(Uint32)]; + switch (command) { + case FD_CTRL_OP_GET_WINSIZE: + { + Uint32 w,h; + if (fd_get_window_size(data->ifd,&w,&h)) + return 0; + memcpy(resbuff,&w,sizeof(Uint32)); + memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32)); + } + break; + default: + return 0; + } + if (rlen < 2*sizeof(Uint32)) { + *rbuf = driver_alloc(2*sizeof(Uint32)); + } + memcpy(*rbuf,resbuff,2*sizeof(Uint32)); + return 2*sizeof(Uint32); +} + +static ErlDrvData fd_start(ErlDrvPort port_num, char* name, + SysDriverOpts* opts) +{ + ErlDrvData res; + + CHLD_STAT_LOCK; + if (opts->read_write & DO_READ) { + init_fd_data(opts->ifd, port_num); + } + if (opts->read_write & DO_WRITE) { + init_fd_data(opts->ofd, port_num); + } + res = set_driver_data(port_num, opts->ifd, opts->ofd, + opts->packet_bytes, + opts->read_write, 0, -1); + CHLD_STAT_UNLOCK; + return res; +} + +static void clear_fd_data(int fd) +{ + if (fd_data[fd].sz > 0) { + erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf); + ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz); + } + fd_data[fd].buf = NULL; + fd_data[fd].sz = 0; + fd_data[fd].remain = 0; + fd_data[fd].cpos = NULL; + fd_data[fd].psz = 0; +} + +static void nbio_stop_fd(ErlDrvPort prt, ErlDrvEvent ev) +{ + int *fd; + driver_select(prt,ev,DO_READ|DO_WRITE,0); + erl_drv_ose_event_fetch(ev, NULL, NULL, (void **)&fd); + clear_fd_data(*fd); + SET_BLOCKING(*fd); +} + +static void fd_stop(ErlDrvData drv_data) /* Does not close the fds */ +{ + struct driver_data *data = (struct driver_data *)drv_data; + + if (data->ofd != -1) { + if (data->ifd != data->ofd) { /* read and write */ + nbio_stop_fd(data->port_num, data->input_event); + nbio_stop_fd(data->port_num, data->output_event); + } + else { /* write only */ + nbio_stop_fd(data->port_num, data->output_event); + } + } + else { /* read only */ + nbio_stop_fd(data->port_num, data->input_event); + } +} + + +static void erl_stop(ErlDrvData drv_data) +{ + struct driver_data *data = (struct driver_data *)drv_data; + + CHLD_STAT_LOCK; + data->pid = -1; + CHLD_STAT_UNLOCK; + + if (data->ofd != -1) { + if (data->ifd != data->ofd) { /* read and write */ + nbio_stop_fd(data->port_num, data->input_event); + nbio_stop_fd(data->port_num, data->output_event); + } + else { /* write only */ + nbio_stop_fd(data->port_num, data->output_event); + } + } + else { /* read only */ + nbio_stop_fd(data->port_num, data->input_event); + } + close(data->ifd); + close(data->ofd); +} + +/* The parameter e is a pointer to the driver_data structure + * related to the fd to be used as output */ +static void output(ErlDrvData drv_data, char* buf, ErlDrvSizeT len) +{ + ErlDrvSizeT sz; + char lb[4]; + char* lbp; + struct driver_data *data = (struct driver_data *)drv_data; + + if (((data->packet_bytes == 2) && + (len > 0xffff)) || (data->packet_bytes == 1 && len > 0xff)) { + driver_failure_posix(data->port_num, EINVAL); + return; /* -1; */ + } + put_int32(len, lb); + lbp = lb + (4-(data->packet_bytes)); + + if ((sz = driver_sizeq(data->port_num)) > 0) { + if (data->packet_bytes != 0) { + driver_enq(data->port_num, lbp, data->packet_bytes); + } + driver_enq(data->port_num, buf, len); + + if (sz + len + data->packet_bytes >= (1 << 13)) + set_busy_port(data->port_num, 1); + } + else { + char *pbbuf; + if (data->packet_bytes != 0) { + pbbuf = malloc(len + data->packet_bytes); + int i; + for (i = 0; i < data->packet_bytes; i++) { + *pbbuf++ = *lbp++; + } + strncpy(pbbuf, buf, len); + pbbuf -= data->packet_bytes; + } + driver_select(data->port_num, data->output_event, + ERL_DRV_WRITE|ERL_DRV_USE, 1); + WRITE_AIO(data->ofd, + (data->packet_bytes ? len+data->packet_bytes : len), + (data->packet_bytes ? pbbuf : buf)); + if (data->packet_bytes != 0) free(pbbuf); + } + return; /* 0; */ +} + +/* This function is being run when we in recieve + * either a read of 0 bytes, or the attach signal from a dying + * spawned load module */ +static int port_inp_failure(ErlDrvPort port_num, ErlDrvEvent ready_fd, int res) + /* Result: 0 (eof) or -1 (error) */ +{ + int *fd; + SIGSELECT sig_no; + ASSERT(res <= 0); + + erl_drv_ose_event_fetch(ready_fd,&sig_no, NULL, (void **)&fd); + /* As we need to handle two signals, we do this in two steps */ + if (driver_data[*fd].alive) { + report_exit_status(driver_data[*fd].report_exit, 0); /* status? */ + } + else { + driver_select(port_num,ready_fd,DO_READ|DO_WRITE,0); + clear_fd_data(*fd); + driver_report_exit(driver_data[*fd].port_num, driver_data[*fd].status); + /* As we do not really know if the spawn has crashed or exited nicely + * we do not check the result status of the following call.. FIXME + * can we handle this in a better way? */ + ose_pm_uninstall_load_module(driver_data[*fd].install_handle); + driver_free(driver_data[*fd].install_handle); + driver_free((void *)driver_data[*fd].aiocb.aio_buf); + + close(*fd); + } + + return 0; +} + +/* The parameter e is a pointer to the driver_data structure + * related to the fd to be used as output. + * ready_fd is the event that triggered this call to ready_input */ +static void ready_input(ErlDrvData drv_data, ErlDrvEvent ready_fd) +{ + int res; + Uint h; + char *buf; + union SIGNAL *sig; + struct driver_data *data = (struct driver_data *)drv_data; + + sig = erl_drv_ose_get_signal(ready_fd); + ASSERT(sig); + + + while (sig) { + /* If we've recieved an attach signal, we need to handle + * it in port_inp_failure */ + if (sig->sig_no == ERTS_SIGNAL_OSE_DRV_ATTACH) { + port_inp_failure(data->port_num, ready_fd, 0); + } + else { + res = sig->fm_read_reply.actual; + if (res == 0) { + port_inp_failure(data->port_num, ready_fd, res); + break; + } + + if (data->packet_bytes == 0) { + if (res < 0) { + if ((errno != EINTR) && (errno != ERRNO_BLOCK)) { + port_inp_failure(data->port_num, ready_fd, res); + } + } + else if (res == 0) { + /* read of 0 bytes, eof, otherside of pipe is assumed dead */ + port_inp_failure(data->port_num, ready_fd, res); + break; + } + else { + buf = driver_alloc(res); + memcpy(buf, (void *)data->aiocb.aio_buf, res); + driver_select(data->port_num, data->output_event, + ERL_DRV_WRITE|ERL_DRV_USE, 1); + driver_output(data->port_num, (char*) buf, res); + driver_free(buf); + } + /* clear the previous read */ + memset(data->aiocb.aio_buf, 0, res); + + /* issue a new read */ + DISPATCH_AIO(sig); + aio_read(&data->aiocb); + } + else if (data->packet_bytes && fd_data[data->ifd].remain > 0) { + /* we've read a partial package, or a header */ + + if (res == fd_data[data->ifd].remain) { /* we are done! */ + char *buf = data->aiocb.aio_buf; + int i; + + /* do we have anything buffered? */ + if (fd_data[data->ifd].buf != NULL) { + memcpy(fd_data[data->ifd].buf + fd_data[data->ifd].sz, + buf, res); + buf = fd_data[data->ifd].buf; + } + + fd_data[data->ifd].sz += res; + driver_output(data->port_num, buf, (fd_data[data->ifd].sz>0?fd_data[data->ifd].sz:res)); + clear_fd_data(data->ifd); + + /* clear the previous read */ + memset(data->aiocb.aio_buf, 0, res); + + /* issue a new read */ + DISPATCH_AIO(sig); + data->aiocb.aio_nbytes = data->packet_bytes; + + if (data->aiocb.aio_buf == NULL) { + port_inp_failure(data->port_num, ready_fd, -1); + } + aio_read(&data->aiocb); + } + else if(res < fd_data[data->ifd].remain) { /* received part of a package */ + if (fd_data[data->ifd].sz == 0) { + + fd_data[data->ifd].sz += res; + memcpy(fd_data[data->ifd].buf, data->aiocb.aio_buf, res); + fd_data[data->ifd].remain -= res; + } + else { + memcpy(fd_data[data->ifd].buf + fd_data[data->ifd].sz, + data->aiocb.aio_buf, res); + fd_data[data->ifd].sz += res; + fd_data[data->ifd].remain -= res; + } + /* clear the previous read */ + memset(data->aiocb.aio_buf, 0, res); + + /* issue a new read */ + DISPATCH_AIO(sig); + data->aiocb.aio_nbytes = fd_data[data->ifd].remain; + + if (data->aiocb.aio_buf == NULL) { + port_inp_failure(data->port_num, ready_fd, -1); + } + aio_read(&data->aiocb); + } + } + else if (data->packet_bytes && fd_data[data->ifd].remain == 0) { /* we've recieved a header */ + + /* analyze the header FIXME */ + switch (data->packet_bytes) { + case 1: h = get_int8(data->aiocb.aio_buf); break; + case 2: h = get_int16(data->aiocb.aio_buf); break; + case 4: h = get_int32(data->aiocb.aio_buf); break; + } + + fd_data[data->ifd].buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h + data->packet_bytes); + fd_data[data->ifd].remain = ((h + data->packet_bytes) - res); + + /* clear the previous read */ + memset(data->aiocb.aio_buf, 0, data->packet_bytes); + + /* issue a new read */ + DISPATCH_AIO(sig); + data->aiocb.aio_nbytes = h; + + if (data->aiocb.aio_buf == NULL) { + port_inp_failure(data->port_num, ready_fd, -1); + } + aio_read(&data->aiocb); + } + } + sig = erl_drv_ose_get_signal(ready_fd); + } +} + + +/* The parameter e is a pointer to the driver_data structure + * related to the fd to be used as output. + * ready_fd is the event that triggered this call to ready_input */ +static void ready_output(ErlDrvData drv_data, ErlDrvEvent ready_fd) +{ + SysIOVec *iov; + int vlen; + int res; + union SIGNAL *sig; + struct driver_data *data = (struct driver_data *)drv_data; + + sig = erl_drv_ose_get_signal(ready_fd); + ASSERT(sig); + + while (sig != NULL) { + if (sig->fm_write_reply.actual <= 0) { + int status; + + status = efs_status_to_errno(sig->fm_write_reply.status); + driver_select(data->port_num, ready_fd, ERL_DRV_WRITE, 0); + DISPATCH_AIO(sig); + FREE_AIO(sig->fm_write_reply.buffer); + + driver_failure_posix(data->port_num, status); + } + else { /* written bytes > 0 */ + iov = driver_peekq(data->port_num, &vlen); + if (vlen > 0) { + DISPATCH_AIO(sig); + FREE_AIO(sig->fm_write_reply.buffer); + res = driver_deq(data->port_num, iov[0].iov_len); + if (res > 0) { + iov = driver_peekq(data->port_num, &vlen); + WRITE_AIO(data->ofd, iov[0].iov_len, iov[0].iov_base); + } + } + else if (vlen == 0) { + DISPATCH_AIO(sig); + FREE_AIO(sig->fm_write_reply.buffer); + } + + } + sig = erl_drv_ose_get_signal(ready_fd); + } +} + +static void stop_select(ErlDrvEvent ready_fd, void* _) +{ + int *fd; + erl_drv_ose_event_fetch(ready_fd, NULL, NULL, (void **)&fd); + erl_drv_ose_event_free(ready_fd); + close(*fd); +} + + +void erts_do_break_handling(void) +{ + struct termios temp_mode; + int saved = 0; + + /* + * Most functions that do_break() calls are intentionally not thread safe; + * therefore, make sure that all threads but this one are blocked before + * proceeding! + */ + erts_smp_thr_progress_block(); + + /* during break we revert to initial settings */ + /* this is done differently for oldshell */ + if (using_oldshell && !replace_intr) { + SET_BLOCKING(1); + } + else if (isatty(0)) { + tcgetattr(0,&temp_mode); + tcsetattr(0,TCSANOW,&initial_tty_mode); + saved = 1; + } + + /* call the break handling function, reset the flag */ + do_break(); + + fflush(stdout); + + /* after break we go back to saved settings */ + if (using_oldshell && !replace_intr) { + SET_NONBLOCKING(1); + } + else if (saved) { + tcsetattr(0,TCSANOW,&temp_mode); + } + + erts_smp_thr_progress_unblock(); +} + +static pid_t +getpid(void) +{ + return get_bid(current_process()); +} + +int getpagesize(void) +{ + return 1024; +} + + +/* Fills in the systems representation of the jam/beam process identifier. +** The Pid is put in STRING representation in the supplied buffer, +** no interpretatione of this should be done by the rest of the +** emulator. The buffer should be at least 21 bytes long. +*/ +void sys_get_pid(char *buffer, size_t buffer_size){ + pid_t p = getpid(); + /* Assume the pid is scalar and can rest in an unsigned long... */ + erts_snprintf(buffer, buffer_size, "%lu",(unsigned long) p); +} + +int +erts_sys_putenv_raw(char *key, char *value) { + return erts_sys_putenv(key, value); +} +int +erts_sys_putenv(char *key, char *value) +{ + int res; + + erts_smp_rwmtx_rwlock(&environ_rwmtx); + res = set_env(get_bid(current_process()), key, + value); + erts_smp_rwmtx_rwunlock(&environ_rwmtx); + return res; +} + + +int +erts_sys_unsetenv(char *key) +{ + int res; + + erts_smp_rwmtx_rwlock(&environ_rwmtx); + res = set_env(get_bid(current_process()),key,NULL); + erts_smp_rwmtx_rwunlock(&environ_rwmtx); + + return res; +} + +int +erts_sys_getenv__(char *key, char *value, size_t *size) +{ + int res; + char *orig_value = get_env(get_bid(current_process()), key); + if (!orig_value) + res = -1; + else { + size_t len = sys_strlen(orig_value); + if (len >= *size) { + *size = len + 1; + res = 1; + } + else { + *size = len; + sys_memcpy((void *) value, (void *) orig_value, len+1); + res = 0; + } + free_buf((union SIGNAL **)&orig_value); + } + return res; +} + +int +erts_sys_getenv_raw(char *key, char *value, size_t *size) { + return erts_sys_getenv(key, value, size); +} + +/* + * erts_sys_getenv + * returns: + * -1, if environment key is not set with a value + * 0, if environment key is set and value fits into buffer res + * 1, if environment key is set but does not fit into buffer res + * res is set with the needed buffer res value + */ + +int +erts_sys_getenv(char *key, char *value, size_t *size) +{ + int res; + erts_smp_rwmtx_rlock(&environ_rwmtx); + res = erts_sys_getenv__(key, value, size); + erts_smp_rwmtx_runlock(&environ_rwmtx); + return res; +} + +void +sys_init_io(void) +{ + fd_data = (struct fd_data *) + erts_alloc(ERTS_ALC_T_FD_TAB, max_files * sizeof(struct fd_data)); + erts_smp_atomic_add_nob(&sys_misc_mem_sz, + max_files * sizeof(struct fd_data)); +} + +extern const char pre_loaded_code[]; +extern Preload pre_loaded[]; + +void erts_sys_alloc_init(void) +{ +} + +void *erts_sys_alloc(ErtsAlcType_t t, void *x, Uint sz) +{ + void *res = malloc((size_t) sz); +#if HAVE_ERTS_MSEG + if (!res) { + erts_mseg_clear_cache(); + return malloc((size_t) sz); + } +#endif + return res; +} + +void *erts_sys_realloc(ErtsAlcType_t t, void *x, void *p, Uint sz) +{ + void *res = realloc(p, (size_t) sz); +#if HAVE_ERTS_MSEG + if (!res) { + erts_mseg_clear_cache(); + return realloc(p, (size_t) sz); + } +#endif + return res; +} + +void erts_sys_free(ErtsAlcType_t t, void *x, void *p) +{ + free(p); +} + +/* Return a pointer to a vector of names of preloaded modules */ + +Preload* +sys_preloaded(void) +{ + return pre_loaded; +} + +/* Return a pointer to preloaded code for module "module" */ +unsigned char* +sys_preload_begin(Preload* p) +{ + return p->code; +} + +/* Clean up if allocated */ +void sys_preload_end(Preload* p) +{ + /* Nothing */ +} + +/* Read a key from console (?) */ + +int sys_get_key(fd) +int fd; +{ + int c; + unsigned char rbuf[64]; + + fflush(stdout); /* Flush query ??? */ + + if ((c = read(fd,rbuf,64)) <= 0) { + return c; + } + + return rbuf[0]; +} + + +#ifdef DEBUG + +extern int erts_initialized; +void +erl_assert_error(const char* expr, const char* func, + const char* file, int line) +{ + fflush(stdout); + fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n", + file, line, func, expr); + fflush(stderr); + ramlog_printf("%s:%d:%s() Assertion failed: %s\n", + file, line, func, expr); + + abort(); +} + +void +erl_debug(char* fmt, ...) +{ + char sbuf[1024]; /* Temporary buffer. */ + va_list va; + + if (debug_log) { + va_start(va, fmt); + vsprintf(sbuf, fmt, va); + va_end(va); + fprintf(stderr, "%s", sbuf); + } +} + +#endif /* DEBUG */ + +static ERTS_INLINE void +report_exit_status(ErtsSysReportExit *rep, int status) +{ + if (rep->ifd >= 0) { + driver_data[rep->ifd].alive = 0; + driver_data[rep->ifd].status = status; + } + if (rep->ofd >= 0) { + driver_data[rep->ofd].alive = 0; + driver_data[rep->ofd].status = status; + } + + erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep); +} + +#define ERTS_REPORT_EXIT_STATUS report_exit_status + +/* + * Called from schedule() when it runs out of runnable processes, + * or when Erlang code has performed INPUT_REDUCTIONS reduction + * steps. runnable == 0 iff there are no runnable Erlang processes. + */ +void +erl_sys_schedule(int runnable) +{ + ASSERT(get_fsem(current_process()) == 0); +#ifdef ERTS_SMP + ASSERT(erts_get_scheduler_data()->no == 1); + ERTS_CHK_IO(!runnable); +#else + ERTS_CHK_IO( 1 ); +#endif + ASSERT(get_fsem(current_process()) == 0); + ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking()); +} + + +#ifdef ERTS_SMP + +void +erts_sys_main_thread(void) +{ + erts_thread_disable_fpe(); + + /* Become signal receiver thread... */ +#ifdef ERTS_ENABLE_LOCK_CHECK + erts_lc_set_thread_name("signal_receiver"); +#endif + + while (1) { + static const SIGSELECT sigsel[] = {0}; + union SIGNAL *msg = receive(sigsel); + + fprintf(stderr,"Main thread got message %d from 0x%x!!\r\n", + msg->sig_no, sender(&msg)); + free_buf(&msg); + } +} + +#endif /* ERTS_SMP */ + +void +erl_sys_args(int* argc, char** argv) +{ + int i, j; + + erts_smp_rwmtx_init(&environ_rwmtx, "environ"); + + init_check_io(); + + /* Handled arguments have been marked with NULL. Slide arguments + not handled towards the beginning of argv. */ + for (i = 0, j = 0; i < *argc; i++) { + if (argv[i]) + argv[j++] = argv[i]; + } + *argc = j; + +} diff --git a/erts/emulator/sys/ose/sys_float.c b/erts/emulator/sys/ose/sys_float.c new file mode 100644 index 0000000000..d9d6bb7c04 --- /dev/null +++ b/erts/emulator/sys/ose/sys_float.c @@ -0,0 +1,844 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2001-2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "global.h" +#include "erl_process.h" + + +#ifdef NO_FPE_SIGNALS + +void +erts_sys_init_float(void) +{ +# ifdef SIGFPE + sys_sigset(SIGFPE, SIG_IGN); /* Ignore so we can test for NaN and Inf */ +# endif +} + +#else /* !NO_FPE_SIGNALS */ + +#ifdef ERTS_SMP +static erts_tsd_key_t fpe_key; + +/* once-only initialisation early in the main thread (via erts_sys_init_float()) */ +static void erts_init_fp_exception(void) +{ + /* XXX: the wrappers prevent using a pthread destructor to + deallocate the key's value; so when/where do we do that? */ + erts_tsd_key_create(&fpe_key); +} + +void erts_thread_init_fp_exception(void) +{ + unsigned long *fpe = erts_alloc(ERTS_ALC_T_FP_EXCEPTION, sizeof(*fpe)); + *fpe = 0L; + erts_tsd_set(fpe_key, fpe); +} + +static ERTS_INLINE volatile unsigned long *erts_thread_get_fp_exception(void) +{ + return (volatile unsigned long*)erts_tsd_get(fpe_key); +} +#else /* !SMP */ +#define erts_init_fp_exception() /*empty*/ +static volatile unsigned long fp_exception; +#define erts_thread_get_fp_exception() (&fp_exception) +#endif /* SMP */ + +volatile unsigned long *erts_get_current_fp_exception(void) +{ + Process *c_p; + + c_p = erts_get_current_process(); + if (c_p) + return &c_p->fp_exception; + return erts_thread_get_fp_exception(); +} + +static void set_current_fp_exception(unsigned long pc) +{ + volatile unsigned long *fpexnp = erts_get_current_fp_exception(); + ASSERT(fpexnp != NULL); + *fpexnp = pc; +} + +void erts_fp_check_init_error(volatile unsigned long *fpexnp) +{ + char buf[64]; + snprintf(buf, sizeof buf, "ERTS_FP_CHECK_INIT at %p: detected unhandled FPE at %p\r\n", + __builtin_return_address(0), (void*)*fpexnp); + if (write(2, buf, strlen(buf)) <= 0) + erl_exit(ERTS_ABORT_EXIT, "%s", buf); + *fpexnp = 0; +#if defined(__i386__) || defined(__x86_64__) + erts_restore_fpu(); +#endif +} + +/* Is there no standard identifier for Darwin/MacOSX ? */ +#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__) +#define __DARWIN__ 1 +#endif + +#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__) + +static void unmask_x87(void) +{ + unsigned short cw; + + __asm__ __volatile__("fstcw %0" : "=m"(cw)); + cw &= ~(0x01|0x04|0x08); /* unmask IM, ZM, OM */ + __asm__ __volatile__("fldcw %0" : : "m"(cw)); +} + +/* mask x87 FPE, return true if the previous state was unmasked */ +static int mask_x87(void) +{ + unsigned short cw; + int unmasked; + + __asm__ __volatile__("fstcw %0" : "=m"(cw)); + unmasked = (cw & (0x01|0x04|0x08)) == 0; + /* or just set cw = 0x37f */ + cw |= (0x01|0x04|0x08); /* mask IM, ZM, OM */ + __asm__ __volatile__("fldcw %0" : : "m"(cw)); + return unmasked; +} + +static void unmask_sse2(void) +{ + unsigned int mxcsr; + + __asm__ __volatile__("stmxcsr %0" : "=m"(mxcsr)); + mxcsr &= ~(0x003F|0x0680); /* clear exn flags, unmask OM, ZM, IM (not PM, UM, DM) */ + __asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr)); +} + +/* mask SSE2 FPE, return true if the previous state was unmasked */ +static int mask_sse2(void) +{ + unsigned int mxcsr; + int unmasked; + + __asm__ __volatile__("stmxcsr %0" : "=m"(mxcsr)); + unmasked = (mxcsr & 0x0680) == 0; + /* or just set mxcsr = 0x1f80 */ + mxcsr &= ~0x003F; /* clear exn flags */ + mxcsr |= 0x0680; /* mask OM, ZM, IM (not PM, UM, DM) */ + __asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr)); + return unmasked; +} + +#if defined(__x86_64__) + +static inline int cpu_has_sse2(void) { return 1; } + +#else /* !__x86_64__ */ + +/* + * Check if an x86-32 processor has SSE2. + */ +static unsigned int xor_eflags(unsigned int mask) +{ + unsigned int eax, edx; + + eax = mask; /* eax = mask */ + __asm__("pushfl\n\t" + "popl %0\n\t" /* edx = original EFLAGS */ + "xorl %0, %1\n\t" /* eax = mask ^ EFLAGS */ + "pushl %1\n\t" + "popfl\n\t" /* new EFLAGS = mask ^ original EFLAGS */ + "pushfl\n\t" + "popl %1\n\t" /* eax = new EFLAGS */ + "xorl %0, %1\n\t" /* eax = new EFLAGS ^ old EFLAGS */ + "pushl %0\n\t" + "popfl" /* restore original EFLAGS */ + : "=d"(edx), "=a"(eax) + : "1"(eax)); + return eax; +} + +static __inline__ unsigned int cpuid_eax(unsigned int op) +{ + unsigned int eax, save_ebx; + + /* In PIC mode i386 reserves EBX. So we must save + and restore it ourselves to not upset gcc. */ + __asm__( + "movl %%ebx, %1\n\t" + "cpuid\n\t" + "movl %1, %%ebx" + : "=a"(eax), "=m"(save_ebx) + : "0"(op) + : "cx", "dx"); + return eax; +} + +static __inline__ unsigned int cpuid_edx(unsigned int op) +{ + unsigned int eax, edx, save_ebx; + + /* In PIC mode i386 reserves EBX. So we must save + and restore it ourselves to not upset gcc. */ + __asm__( + "movl %%ebx, %2\n\t" + "cpuid\n\t" + "movl %2, %%ebx" + : "=a"(eax), "=d"(edx), "=m"(save_ebx) + : "0"(op) + : "cx"); + return edx; +} + +/* The AC bit, bit #18, is a new bit introduced in the EFLAGS + * register on the Intel486 processor to generate alignment + * faults. This bit cannot be set on the Intel386 processor. + */ +static __inline__ int is_386(void) +{ + return ((xor_eflags(1<<18) >> 18) & 1) == 0; +} + +/* Newer x86 processors have a CPUID instruction, as indicated by + * the ID bit (#21) in EFLAGS being modifiable. + */ +static __inline__ int has_CPUID(void) +{ + return (xor_eflags(1<<21) >> 21) & 1; +} + +static int cpu_has_sse2(void) +{ + unsigned int maxlev, features; + static int has_sse2 = -1; + + if (has_sse2 >= 0) + return has_sse2; + has_sse2 = 0; + + if (is_386()) + return 0; + if (!has_CPUID()) + return 0; + maxlev = cpuid_eax(0); + /* Intel A-step Pentium had a preliminary version of CPUID. + It also didn't have SSE2. */ + if ((maxlev & 0xFFFFFF00) == 0x0500) + return 0; + /* If max level is zero then CPUID cannot report any features. */ + if (maxlev == 0) + return 0; + features = cpuid_edx(1); + has_sse2 = (features & (1 << 26)) != 0; + + return has_sse2; +} +#endif /* !__x86_64__ */ + +static void unmask_fpe(void) +{ + __asm__ __volatile__("fnclex"); + unmask_x87(); + if (cpu_has_sse2()) + unmask_sse2(); +} + +static void unmask_fpe_conditional(int unmasked) +{ + if (unmasked) + unmask_fpe(); +} + +/* mask x86 FPE, return true if the previous state was unmasked */ +static int mask_fpe(void) +{ + int unmasked; + + unmasked = mask_x87(); + if (cpu_has_sse2()) + unmasked |= mask_sse2(); + return unmasked; +} + +void erts_restore_fpu(void) +{ + __asm__ __volatile__("fninit"); + unmask_x87(); + if (cpu_has_sse2()) + unmask_sse2(); +} + +#elif defined(__sparc__) && defined(__linux__) + +#if defined(__arch64__) +#define LDX "ldx" +#define STX "stx" +#else +#define LDX "ld" +#define STX "st" +#endif + +static void unmask_fpe(void) +{ + unsigned long fsr; + + __asm__(STX " %%fsr, %0" : "=m"(fsr)); + fsr &= ~(0x1FUL << 23); /* clear FSR[TEM] field */ + fsr |= (0x1AUL << 23); /* enable NV, OF, DZ exceptions */ + __asm__ __volatile__(LDX " %0, %%fsr" : : "m"(fsr)); +} + +static void unmask_fpe_conditional(int unmasked) +{ + if (unmasked) + unmask_fpe(); +} + +/* mask SPARC FPE, return true if the previous state was unmasked */ +static int mask_fpe(void) +{ + unsigned long fsr; + int unmasked; + + __asm__(STX " %%fsr, %0" : "=m"(fsr)); + unmasked = ((fsr >> 23) & 0x1A) == 0x1A; + fsr &= ~(0x1FUL << 23); /* clear FSR[TEM] field */ + __asm__ __volatile__(LDX " %0, %%fsr" : : "m"(fsr)); + return unmasked; +} + +#elif (defined(__powerpc__) && defined(__linux__)) || (defined(__ppc__) && defined(__DARWIN__)) + +#if defined(__linux__) +#include <sys/prctl.h> + +static void set_fpexc_precise(void) +{ + if (prctl(PR_SET_FPEXC, PR_FP_EXC_PRECISE) < 0) { + perror("PR_SET_FPEXC"); + exit(1); + } +} + +#elif defined(__DARWIN__) + +#include <mach/mach.h> +#include <pthread.h> + +/* + * FE0 FE1 MSR bits + * 0 0 floating-point exceptions disabled + * 0 1 floating-point imprecise nonrecoverable + * 1 0 floating-point imprecise recoverable + * 1 1 floating-point precise mode + * + * Apparently: + * - Darwin 5.5 (MacOS X <= 10.1) starts with FE0 == FE1 == 0, + * and resets FE0 and FE1 to 0 after each SIGFPE. + * - Darwin 6.0 (MacOS X 10.2) starts with FE0 == FE1 == 1, + * and does not reset FE0 or FE1 after a SIGFPE. + */ +#define FE0_MASK (1<<11) +#define FE1_MASK (1<<8) + +/* a thread cannot get or set its own MSR bits */ +static void *fpu_fpe_enable(void *arg) +{ + thread_t t = *(thread_t*)arg; + struct ppc_thread_state state; + unsigned int state_size = PPC_THREAD_STATE_COUNT; + + if (thread_get_state(t, PPC_THREAD_STATE, (natural_t*)&state, &state_size) != KERN_SUCCESS) { + perror("thread_get_state"); + exit(1); + } + if ((state.srr1 & (FE1_MASK|FE0_MASK)) != (FE1_MASK|FE0_MASK)) { +#if 1 + /* This would also have to be performed in the SIGFPE handler + to work around the MSR reset older Darwin releases do. */ + state.srr1 |= (FE1_MASK|FE0_MASK); + thread_set_state(t, PPC_THREAD_STATE, (natural_t*)&state, state_size); +#else + fprintf(stderr, "srr1 == 0x%08x, your Darwin is too old\n", state.srr1); + exit(1); +#endif + } + return NULL; /* Ok, we appear to be on Darwin 6.0 or later */ +} + +static void set_fpexc_precise(void) +{ + thread_t self = mach_thread_self(); + pthread_t enabler; + + if (pthread_create(&enabler, NULL, fpu_fpe_enable, &self)) { + perror("pthread_create"); + } else if (pthread_join(enabler, NULL)) { + perror("pthread_join"); + } +} + +#endif + +static void set_fpscr(unsigned int fpscr) +{ + union { + double d; + unsigned int fpscr[2]; + } u; + + u.fpscr[0] = 0xFFF80000; + u.fpscr[1] = fpscr; + __asm__ __volatile__("mtfsf 255,%0" : : "f"(u.d)); +} + +static unsigned int get_fpscr(void) +{ + union { + double d; + unsigned int fpscr[2]; + } u; + + __asm__("mffs %0" : "=f"(u.d)); + return u.fpscr[1]; +} + +static void unmask_fpe(void) +{ + set_fpexc_precise(); + set_fpscr(0x80|0x40|0x10); /* VE, OE, ZE; not UE or XE */ +} + +static void unmask_fpe_conditional(int unmasked) +{ + if (unmasked) + unmask_fpe(); +} + +/* mask PowerPC FPE, return true if the previous state was unmasked */ +static int mask_fpe(void) +{ + int unmasked; + + unmasked = (get_fpscr() & (0x80|0x40|0x10)) == (0x80|0x40|0x10); + set_fpscr(0x00); + return unmasked; +} + +#else + +static void unmask_fpe(void) +{ + fpsetmask(FP_X_INV | FP_X_OFL | FP_X_DZ); +} + +static void unmask_fpe_conditional(int unmasked) +{ + if (unmasked) + unmask_fpe(); +} + +/* mask IEEE FPE, return true if previous state was unmasked */ +static int mask_fpe(void) +{ + const fp_except unmasked_mask = FP_X_INV | FP_X_OFL | FP_X_DZ; + fp_except old_mask; + + old_mask = fpsetmask(0); + return (old_mask & unmasked_mask) == unmasked_mask; +} + +#endif + +#if (defined(__linux__) && (defined(__i386__) || defined(__x86_64__) || defined(__sparc__) || defined(__powerpc__))) || (defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__) || defined(__ppc__))) || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__))) || ((defined(__NetBSD__) || defined(__OpenBSD__)) && defined(__x86_64__)) || (defined(__sun__) && defined(__x86_64__)) + +#if defined(__linux__) && defined(__i386__) +#if !defined(X86_FXSR_MAGIC) +#define X86_FXSR_MAGIC 0x0000 +#endif +#elif defined(__FreeBSD__) && defined(__x86_64__) +#include <sys/types.h> +#include <machine/fpu.h> +#elif defined(__FreeBSD__) && defined(__i386__) +#include <sys/types.h> +#include <machine/npx.h> +#elif defined(__DARWIN__) +#include <machine/signal.h> +#elif defined(__OpenBSD__) && defined(__x86_64__) +#include <sys/types.h> +#include <machine/fpu.h> +#endif +#if !(defined(__OpenBSD__) && defined(__x86_64__)) +#include <ucontext.h> +#endif +#include <string.h> + +#if defined(__linux__) && defined(__x86_64__) +#define mc_pc(mc) ((mc)->gregs[REG_RIP]) +#elif defined(__linux__) && defined(__i386__) +#define mc_pc(mc) ((mc)->gregs[REG_EIP]) +#elif defined(__DARWIN__) && defined(__i386__) +#ifdef DARWIN_MODERN_MCONTEXT +#define mc_pc(mc) ((mc)->__ss.__eip) +#else +#define mc_pc(mc) ((mc)->ss.eip) +#endif +#elif defined(__DARWIN__) && defined(__x86_64__) +#ifdef DARWIN_MODERN_MCONTEXT +#define mc_pc(mc) ((mc)->__ss.__rip) +#else +#define mc_pc(mc) ((mc)->ss.rip) +#endif +#elif defined(__FreeBSD__) && defined(__x86_64__) +#define mc_pc(mc) ((mc)->mc_rip) +#elif defined(__FreeBSD__) && defined(__i386__) +#define mc_pc(mc) ((mc)->mc_eip) +#elif defined(__NetBSD__) && defined(__x86_64__) +#define mc_pc(mc) ((mc)->__gregs[_REG_RIP]) +#elif defined(__NetBSD__) && defined(__i386__) +#define mc_pc(mc) ((mc)->__gregs[_REG_EIP]) +#elif defined(__OpenBSD__) && defined(__x86_64__) +#define mc_pc(mc) ((mc)->sc_rip) +#elif defined(__sun__) && defined(__x86_64__) +#define mc_pc(mc) ((mc)->gregs[REG_RIP]) +#endif + +static void fpe_sig_action(int sig, siginfo_t *si, void *puc) +{ + ucontext_t *uc = puc; + unsigned long pc; + +#if defined(__linux__) +#if defined(__x86_64__) + mcontext_t *mc = &uc->uc_mcontext; + fpregset_t fpstate = mc->fpregs; + pc = mc_pc(mc); + /* A failed SSE2 instruction will restart. To avoid + looping we mask SSE2 exceptions now and unmask them + again later in erts_check_fpe()/erts_restore_fpu(). + On RISCs we update PC to skip the failed instruction, + but the ever increasing complexity of the x86 instruction + set encoding makes that a poor solution here. */ + fpstate->mxcsr = 0x1F80; + fpstate->swd &= ~0xFF; +#elif defined(__i386__) + mcontext_t *mc = &uc->uc_mcontext; + fpregset_t fpstate = mc->fpregs; + pc = mc_pc(mc); + if ((fpstate->status >> 16) == X86_FXSR_MAGIC) + ((struct _fpstate*)fpstate)->mxcsr = 0x1F80; + fpstate->sw &= ~0xFF; +#elif defined(__sparc__) && defined(__arch64__) + /* on SPARC the 3rd parameter points to a sigcontext not a ucontext */ + struct sigcontext *sc = (struct sigcontext*)puc; + pc = sc->sigc_regs.tpc; + sc->sigc_regs.tpc = sc->sigc_regs.tnpc; + sc->sigc_regs.tnpc += 4; +#elif defined(__sparc__) + /* on SPARC the 3rd parameter points to a sigcontext not a ucontext */ + struct sigcontext *sc = (struct sigcontext*)puc; + pc = sc->si_regs.pc; + sc->si_regs.pc = sc->si_regs.npc; + sc->si_regs.npc = (unsigned long)sc->si_regs.npc + 4; +#elif defined(__powerpc__) +#if defined(__powerpc64__) + mcontext_t *mc = &uc->uc_mcontext; + unsigned long *regs = &mc->gp_regs[0]; +#else + mcontext_t *mc = uc->uc_mcontext.uc_regs; + unsigned long *regs = &mc->gregs[0]; +#endif + pc = regs[PT_NIP]; + regs[PT_NIP] += 4; + regs[PT_FPSCR] = 0x80|0x40|0x10; /* VE, OE, ZE; not UE or XE */ +#endif +#elif defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__)) +#ifdef DARWIN_MODERN_MCONTEXT + mcontext_t mc = uc->uc_mcontext; + pc = mc_pc(mc); + mc->__fs.__fpu_mxcsr = 0x1F80; + *(unsigned short *)&mc->__fs.__fpu_fsw &= ~0xFF; +#else + mcontext_t mc = uc->uc_mcontext; + pc = mc_pc(mc); + mc->fs.fpu_mxcsr = 0x1F80; + *(unsigned short *)&mc->fs.fpu_fsw &= ~0xFF; +#endif /* DARWIN_MODERN_MCONTEXT */ +#elif defined(__DARWIN__) && defined(__ppc__) + mcontext_t mc = uc->uc_mcontext; + pc = mc->ss.srr0; + mc->ss.srr0 += 4; + mc->fs.fpscr = 0x80|0x40|0x10; +#elif defined(__FreeBSD__) && defined(__x86_64__) + mcontext_t *mc = &uc->uc_mcontext; + struct savefpu *savefpu = (struct savefpu*)&mc->mc_fpstate; + struct envxmm *envxmm = &savefpu->sv_env; + pc = mc_pc(mc); + envxmm->en_mxcsr = 0x1F80; + envxmm->en_sw &= ~0xFF; +#elif defined(__FreeBSD__) && defined(__i386__) + mcontext_t *mc = &uc->uc_mcontext; + union savefpu *savefpu = (union savefpu*)&mc->mc_fpstate; + pc = mc_pc(mc); + if (mc->mc_fpformat == _MC_FPFMT_XMM) { + struct envxmm *envxmm = &savefpu->sv_xmm.sv_env; + envxmm->en_mxcsr = 0x1F80; + envxmm->en_sw &= ~0xFF; + } else { + struct env87 *env87 = &savefpu->sv_87.sv_env; + env87->en_sw &= ~0xFF; + } +#elif defined(__NetBSD__) && defined(__x86_64__) + mcontext_t *mc = &uc->uc_mcontext; + struct fxsave64 *fxsave = (struct fxsave64 *)&mc->__fpregs; + pc = mc_pc(mc); + fxsave->fx_mxcsr = 0x1F80; + fxsave->fx_fsw &= ~0xFF; +#elif defined(__NetBSD__) && defined(__i386__) + mcontext_t *mc = &uc->uc_mcontext; + pc = mc_pc(mc); + if (uc->uc_flags & _UC_FXSAVE) { + struct envxmm *envxmm = (struct envxmm *)&mc->__fpregs; + envxmm->en_mxcsr = 0x1F80; + envxmm->en_sw &= ~0xFF; + } else { + struct env87 *env87 = (struct env87 *)&mc->__fpregs; + env87->en_sw &= ~0xFF; + } +#elif defined(__OpenBSD__) && defined(__x86_64__) + struct fxsave64 *fxsave = uc->sc_fpstate; + pc = mc_pc(uc); + fxsave->fx_mxcsr = 0x1F80; + fxsave->fx_fsw &= ~0xFF; +#elif defined(__sun__) && defined(__x86_64__) + mcontext_t *mc = &uc->uc_mcontext; + struct fpchip_state *fpstate = &mc->fpregs.fp_reg_set.fpchip_state; + pc = mc_pc(mc); + fpstate->mxcsr = 0x1F80; + fpstate->sw &= ~0xFF; +#endif +#if 0 + { + char buf[64]; + snprintf(buf, sizeof buf, "%s: FPE at %p\r\n", __FUNCTION__, (void*)pc); + write(2, buf, strlen(buf)); + } +#endif + set_current_fp_exception(pc); +} + +static void erts_thread_catch_fp_exceptions(void) +{ + struct sigaction act; + memset(&act, 0, sizeof act); + act.sa_sigaction = fpe_sig_action; + act.sa_flags = SA_SIGINFO; + sigaction(SIGFPE, &act, NULL); + unmask_fpe(); +} + +#else /* !((__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __x86_64__ || __ppc__))) */ + +static void fpe_sig_handler(int sig) +{ + set_current_fp_exception(1); /* XXX: convert to sigaction so we can get the trap PC */ +} + +static void erts_thread_catch_fp_exceptions(void) +{ + sys_sigset(SIGFPE, fpe_sig_handler); + unmask_fpe(); +} + +#endif /* (__linux__ && (__i386__ || __x86_64__ || __powerpc__)) || (__DARWIN__ && (__i386__ || __x86_64__ || __ppc__))) */ + +/* once-only initialisation early in the main thread */ +void erts_sys_init_float(void) +{ + erts_init_fp_exception(); + erts_thread_catch_fp_exceptions(); + erts_printf_block_fpe = erts_sys_block_fpe; + erts_printf_unblock_fpe = erts_sys_unblock_fpe; +} + +#endif /* NO_FPE_SIGNALS */ + +void erts_thread_init_float(void) +{ +#ifdef ERTS_SMP + /* This allows Erlang schedulers to leave Erlang-process context + and still have working FP exceptions. XXX: is this needed? */ + erts_thread_init_fp_exception(); +#endif + +#ifndef NO_FPE_SIGNALS + /* NOTE: + * erts_thread_disable_fpe() is called in all threads at + * creation. We at least need to call unmask_fpe() + */ +#if defined(__DARWIN__) || defined(__FreeBSD__) + /* Darwin (7.9.0) does not appear to propagate FP exception settings + to a new thread from its parent. So if we want FP exceptions, we + must manually re-enable them in each new thread. + FreeBSD 6.1 appears to suffer from a similar issue. */ + erts_thread_catch_fp_exceptions(); +#else + unmask_fpe(); +#endif + +#endif +} + +void erts_thread_disable_fpe(void) +{ +#if !defined(NO_FPE_SIGNALS) + (void)mask_fpe(); +#endif +} + +#if !defined(NO_FPE_SIGNALS) +int erts_sys_block_fpe(void) +{ + return mask_fpe(); +} + +void erts_sys_unblock_fpe(int unmasked) +{ + unmask_fpe_conditional(unmasked); +} +#endif + +/* The following check is incorporated from the Vee machine */ + +#define ISDIGIT(d) ((d) >= '0' && (d) <= '9') + +/* + ** Convert a double to ascii format 0.dddde[+|-]ddd + ** return number of characters converted or -1 if error. + ** + ** These two functions should maybe use localeconv() to pick up + ** the current radix character, but since it is uncertain how + ** expensive such a system call is, and since no-one has heard + ** of other radix characters than '.' and ',' an ad-hoc + ** low execution time solution is used instead. + */ + +int +sys_double_to_chars_ext(double fp, char *buffer, size_t buffer_size, size_t decimals) +{ + char *s = buffer; + + if (erts_snprintf(buffer, buffer_size, "%.*e", decimals, fp) >= buffer_size) + return -1; + /* Search upto decimal point */ + if (*s == '+' || *s == '-') s++; + while (ISDIGIT(*s)) s++; + if (*s == ',') *s++ = '.'; /* Replace ',' with '.' */ + /* Scan to end of string */ + while (*s) s++; + return s-buffer; /* i.e strlen(buffer) */ +} + +/* Float conversion */ + +int +sys_chars_to_double(char* buf, double* fp) +{ +#ifndef NO_FPE_SIGNALS + volatile unsigned long *fpexnp = erts_get_current_fp_exception(); +#endif + char *s = buf, *t, *dp; + + /* Robert says that something like this is what he really wanted: + * (The [.,] radix test is NOT what Robert wanted - it was added later) + * + * 7 == sscanf(Tbuf, "%[+-]%[0-9][.,]%[0-9]%[eE]%[+-]%[0-9]%s", ....); + * if (*s2 == 0 || *s3 == 0 || *s4 == 0 || *s6 == 0 || *s7) + * break; + */ + + /* Scan string to check syntax. */ + if (*s == '+' || *s == '-') s++; + if (!ISDIGIT(*s)) /* Leading digits. */ + return -1; + while (ISDIGIT(*s)) s++; + if (*s != '.' && *s != ',') /* Decimal part. */ + return -1; + dp = s++; /* Remember decimal point pos just in case */ + if (!ISDIGIT(*s)) + return -1; + while (ISDIGIT(*s)) s++; + if (*s == 'e' || *s == 'E') { + /* There is an exponent. */ + s++; + if (*s == '+' || *s == '-') s++; + if (!ISDIGIT(*s)) + return -1; + while (ISDIGIT(*s)) s++; + } + if (*s) /* That should be it */ + return -1; + +#ifdef NO_FPE_SIGNALS + errno = 0; +#endif + __ERTS_FP_CHECK_INIT(fpexnp); + *fp = strtod(buf, &t); + __ERTS_FP_ERROR_THOROUGH(fpexnp, *fp, return -1); + if (t != s) { /* Whole string not scanned */ + /* Try again with other radix char */ + *dp = (*dp == '.') ? ',' : '.'; + errno = 0; + __ERTS_FP_CHECK_INIT(fpexnp); + *fp = strtod(buf, &t); + __ERTS_FP_ERROR_THOROUGH(fpexnp, *fp, return -1); + } + +#ifdef NO_FPE_SIGNALS + if (errno == ERANGE) { + if (*fp == HUGE_VAL || *fp == -HUGE_VAL) { + /* overflow, should give error */ + return -1; + } else if (t == s && *fp == 0.0) { + /* This should give 0.0 - OTP-7178 */ + errno = 0; + + } else if (*fp == 0.0) { + return -1; + } + } +#endif + return 0; +} + +int +matherr(struct exception *exc) +{ +#if !defined(NO_FPE_SIGNALS) + volatile unsigned long *fpexnp = erts_get_current_fp_exception(); + if (fpexnp != NULL) + *fpexnp = (unsigned long)__builtin_return_address(0); +#endif + return 1; +} diff --git a/erts/emulator/sys/ose/sys_time.c b/erts/emulator/sys/ose/sys_time.c new file mode 100644 index 0000000000..7e96f68424 --- /dev/null +++ b/erts/emulator/sys/ose/sys_time.c @@ -0,0 +1,56 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2005-2009. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "global.h" + +/******************* Routines for time measurement *********************/ + +int erts_ticks_per_sec = 0; /* Will be SYS_CLK_TCK in erl_unix_sys.h */ + +int sys_init_time(void) +{ + return SYS_CLOCK_RESOLUTION; +} + +clock_t sys_times(SysTimes *now) { + now->tms_utime = now->tms_stime = now->tms_cutime = now->tms_cstime = 0; + return 0; +} + +static OSTICK last_tick_count = 0; +static SysHrTime wrap = 0; +static OSTICK us_per_tick; + +void sys_init_hrtime() { + us_per_tick = system_tick(); +} + +SysHrTime sys_gethrtime() { + OSTICK ticks = get_ticks(); + if (ticks < (SysHrTime) last_tick_count) { + wrap += 1ULL << 32; + } + last_tick_count = ticks; + return ((((SysHrTime) ticks) + wrap) * 1000*us_per_tick); +} diff --git a/erts/emulator/sys/unix/erl_child_setup.c b/erts/emulator/sys/unix/erl_child_setup.c index 7c6e4a2f37..94eb6b1547 100644 --- a/erts/emulator/sys/unix/erl_child_setup.c +++ b/erts/emulator/sys/unix/erl_child_setup.c @@ -54,6 +54,17 @@ void sys_sigrelease(int sig) #endif /* !SIG_SIGNAL */ #endif /* !SIG_SIGSET */ +#if defined(__ANDROID__) +int __system_properties_fd(void); +#endif /* __ANDROID__ */ + +#if defined(__ANDROID__) +#define SHELL "/system/bin/sh" +#else +#define SHELL "/bin/sh" +#endif /* __ANDROID__ */ + + int main(int argc, char *argv[]) { @@ -89,8 +100,23 @@ main(int argc, char *argv[]) if (sscanf(argv[CS_ARGV_FD_CR_IX], "%d:%d", &from, &to) != 2) return 1; + +#if defined(__ANDROID__) + for (i = from; i <= to; i++) { + if (i!=__system_properties_fd) + (void) close(i); + } +#else for (i = from; i <= to; i++) (void) close(i); +#endif /* __ANDROID__ */ + +#if defined(HAVE_CLOSEFROM) + closefrom(from); +#else + for (i = from; i <= to; i++) + (void) close(i); +#endif if (!(argv[CS_ARGV_WD_IX][0] == '.' && argv[CS_ARGV_WD_IX][1] == '\0') && chdir(argv[CS_ARGV_WD_IX]) < 0) @@ -116,7 +142,25 @@ main(int argc, char *argv[]) execv(argv[CS_ARGV_NO_OF_ARGS],&(argv[CS_ARGV_NO_OF_ARGS + 1])); } } else { - execl("/bin/sh", "sh", "-c", argv[CS_ARGV_CMD_IX], (char *) NULL); + execl(SHELL, "sh", "-c", argv[CS_ARGV_CMD_IX], (char *) NULL); } return 1; } + + + +#if defined(__ANDROID__) +int __system_properties_fd(void) +{ + int s, fd; + char *env; + + env = getenv("ANDROID_PROPERTY_WORKSPACE"); + if (!env) { + return -1; + } + fd = atoi(env); + return fd; +} +#endif /* __ANDROID__ */ + diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h index c8fcec8547..f7a6298d5b 100644 --- a/erts/emulator/sys/unix/erl_unix_sys.h +++ b/erts/emulator/sys/unix/erl_unix_sys.h @@ -107,6 +107,10 @@ #endif #include <netdb.h> +#ifdef HAVE_POSIX_MEMALIGN +# define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1 +#endif + /* * Make sure that MAXPATHLEN is defined. */ @@ -123,12 +127,14 @@ # endif #endif +/* + * Min number of async threads + */ +#define ERTS_MIN_NO_OF_ASYNC_THREADS 0 + /* File descriptors are numbers anc consecutively allocated on Unix */ #define ERTS_SYS_CONTINOUS_FD_NUMBERS -#define HAVE_ERTS_CHECK_IO_DEBUG -int erts_check_io_debug(void); - #ifndef ERTS_SMP # undef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT # define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT @@ -221,8 +227,24 @@ extern void sys_stop_cat(void); */ #ifdef USE_ISINF_ISNAN /* simulate finite() */ -# define finite(f) (!isinf(f) && !isnan(f)) -# define HAVE_FINITE +# define isfinite(f) (!isinf(f) && !isnan(f)) +# define HAVE_ISFINITE +#elif defined(__GNUC__) && defined(HAVE_FINITE) +/* We use finite in gcc as it emits assembler instead of + the function call that isfinite emits. The assembler is + significantly faster. */ +# ifdef isfinite +# undef isfinite +# endif +# define isfinite finite +# ifndef HAVE_ISFINITE +# define HAVE_ISFINITE +# endif +#elif defined(isfinite) && !defined(HAVE_ISFINITE) +# define HAVE_ISFINITE +#elif !defined(HAVE_ISFINITE) && defined(HAVE_FINITE) +# define isfinite finite +# define HAVE_ISFINITE #endif #ifdef NO_FPE_SIGNALS @@ -232,7 +254,7 @@ extern void sys_stop_cat(void); #define erts_thread_init_fp_exception() do{}while(0) #endif # define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0) -# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!finite(f)) { Action; } else {} +# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!isfinite(f)) { Action; } else {} # define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action) # define __ERTS_SAVE_FP_EXCEPTION(fpexnp) # define __ERTS_RESTORE_FP_EXCEPTION(fpexnp) @@ -296,7 +318,7 @@ static __inline__ void __ERTS_FP_CHECK_INIT(volatile unsigned long *fp_exception code to always throw floating-point exceptions on errors. */ static __inline__ int erts_check_fpe_thorough(volatile unsigned long *fp_exception, double f) { - return erts_check_fpe(fp_exception, f) || !finite(f); + return erts_check_fpe(fp_exception, f) || !isfinite(f); } # define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) \ do { if (erts_check_fpe_thorough((fpexnp),(f))) { Action; } } while (0) diff --git a/erts/emulator/sys/unix/erl_unix_sys_ddll.c b/erts/emulator/sys/unix/erl_unix_sys_ddll.c index 12c47d0088..2659d623c7 100644 --- a/erts/emulator/sys/unix/erl_unix_sys_ddll.c +++ b/erts/emulator/sys/unix/erl_unix_sys_ddll.c @@ -101,7 +101,7 @@ void erl_sys_ddll_init(void) { /* * Open a shared object */ -int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* err) +int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err) { #if defined(HAVE_DLOPEN) char* dlname; @@ -123,6 +123,7 @@ int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err) { +#if defined(HAVE_DLOPEN) int ret = ERL_DE_NO_ERROR; char *str; dlerror(); @@ -148,6 +149,9 @@ int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err) ret = ERL_DE_DYNAMIC_ERROR_OFFSET - find_errcode(str, err); } return ret; +#else + return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY; +#endif } /* diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c index a7ea4b2490..0d677d5f34 100644 --- a/erts/emulator/sys/unix/sys.c +++ b/erts/emulator/sys/unix/sys.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -149,6 +149,13 @@ extern void erl_crash_dump(char* file, int line, char* fmt, ...); #define DIR_SEPARATOR_CHAR '/' +#if defined(__ANDROID__) +#define SHELL "/system/bin/sh" +#else +#define SHELL "/bin/sh" +#endif /* __ANDROID__ */ + + #if defined(DEBUG) #define ERL_BUILD_TYPE_MARKER ".debug" #elif defined(PURIFY) @@ -277,7 +284,7 @@ struct { void (*check_io)(int); Uint (*size)(void); Eterm (*info)(void *); - int (*check_io_debug)(void); + int (*check_io_debug)(ErtsCheckIoDebugInfo *); } io_func = {0}; @@ -299,9 +306,9 @@ Eterm erts_check_io_info(void *p) } int -erts_check_io_debug(void) +erts_check_io_debug(ErtsCheckIoDebugInfo *ip) { - return (*io_func.check_io_debug)(); + return (*io_func.check_io_debug)(ip); } @@ -409,8 +416,10 @@ void sys_tty_reset(int exit_code) #ifdef __tile__ /* Direct malloc to spread memory around the caches of multiple tiles. */ #include <malloc.h> +#if defined(MALLOC_USE_HASH) MALLOC_USE_HASH(1); #endif +#endif #ifdef USE_THREADS @@ -547,6 +556,25 @@ erts_sys_pre_init(void) #endif #endif /* USE_THREADS */ erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0); + + { + /* + * Unfortunately we depend on fd 0,1,2 in the old shell code. + * So if for some reason we do not have those open when we start + * we have to open them here. Not doing this can cause the emulator + * to deadlock when reaping the fd_driver ports :( + */ + int fd; + /* Make sure fd 0 is open */ + if ((fd = open("/dev/null", O_RDONLY)) != 0) + close(fd); + /* Make sure fds 1 and 2 are open */ + while (fd < 3) { + fd = open("/dev/null", O_WRONLY); + } + close(fd); + } + } void @@ -1575,7 +1603,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op } } } else { - execle("/bin/sh", "sh", "-c", cmd_line, (char *) NULL, new_environ); + execle(SHELL, "sh", "-c", cmd_line, (char *) NULL, new_environ); } child_error: _exit(1); @@ -1696,7 +1724,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op fcntl(i, F_SETFD, 1); qnx_spawn_options.flags = _SPAWN_SETSID; - if ((pid = spawnl(P_NOWAIT, "/bin/sh", "/bin/sh", "-c", cmd_line, + if ((pid = spawnl(P_NOWAIT, SHELL, SHELL, "-c", cmd_line, (char *) 0)) < 0) { erts_free(ERTS_ALC_T_TMP, (void *) cmd_line); reset_qnx_spawn(); @@ -2489,6 +2517,16 @@ erts_sys_getenv(char *key, char *value, size_t *size) return res; } +int +erts_sys_unsetenv(char *key) +{ + int res; + erts_smp_rwmtx_rwlock(&environ_rwmtx); + res = unsetenv(key); + erts_smp_rwmtx_rwunlock(&environ_rwmtx); + return res; +} + void sys_init_io(void) { @@ -2524,6 +2562,52 @@ void erts_sys_alloc_init(void) { } +#if ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC +void *erts_sys_aligned_alloc(UWord alignment, UWord size) +{ +#ifdef HAVE_POSIX_MEMALIGN + void *ptr = NULL; + int error; + ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */ + error = posix_memalign(&ptr, (size_t) alignment, (size_t) size); +#if HAVE_ERTS_MSEG + if (error || !ptr) { + erts_mseg_clear_cache(); + error = posix_memalign(&ptr, (size_t) alignment, (size_t) size); + } +#endif + if (error) { + errno = error; + return NULL; + } + if (!ptr) + errno = ENOMEM; + ASSERT(!ptr || (((UWord) ptr) & (alignment - 1)) == 0); + return ptr; +#else +# error "Missing erts_sys_aligned_alloc() implementation" +#endif +} + +void erts_sys_aligned_free(UWord alignment, void *ptr) +{ + ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */ + free(ptr); +} + +void *erts_sys_aligned_realloc(UWord alignment, void *ptr, UWord size, UWord old_size) +{ + void *new_ptr = erts_sys_aligned_alloc(alignment, size); + if (new_ptr) { + UWord copy_size = old_size < size ? old_size : size; + sys_memcpy(new_ptr, ptr, (size_t) copy_size); + erts_sys_aligned_free(alignment, ptr); + } + return new_ptr; +} + +#endif + void *erts_sys_alloc(ErtsAlcType_t t, void *x, Uint sz) { void *res = malloc((size_t) sz); @@ -2592,15 +2676,13 @@ int fd; } -#ifdef DEBUG - extern int erts_initialized; void -erl_assert_error(char* expr, char* file, int line) +erl_assert_error(const char* expr, const char* func, const char* file, int line) { fflush(stdout); - fprintf(stderr, "Assertion failed: %s in %s, line %d\n", - expr, file, line); + fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n", + file, line, func, expr); fflush(stderr); #if !defined(ERTS_SMP) && 0 /* Writing a crashdump from a failed assertion when smp support @@ -2615,6 +2697,8 @@ erl_assert_error(char* expr, char* file, int line) abort(); } +#ifdef DEBUG + void erl_debug(char* fmt, ...) { diff --git a/erts/emulator/sys/unix/sys_float.c b/erts/emulator/sys/unix/sys_float.c index f2f4de869d..cafeab547e 100644 --- a/erts/emulator/sys/unix/sys_float.c +++ b/erts/emulator/sys/unix/sys_float.c @@ -46,7 +46,7 @@ static void erts_init_fp_exception(void) { /* XXX: the wrappers prevent using a pthread destructor to deallocate the key's value; so when/where do we do that? */ - erts_tsd_key_create(&fpe_key); + erts_tsd_key_create(&fpe_key,"fp_exception"); } void erts_thread_init_fp_exception(void) @@ -152,7 +152,7 @@ static int mask_sse2(void) #if defined(__x86_64__) -static inline int cpu_has_sse2(void) { return 1; } +static ERTS_INLINE int cpu_has_sse2(void) { return 1; } #else /* !__x86_64__ */ @@ -179,7 +179,7 @@ static unsigned int xor_eflags(unsigned int mask) return eax; } -static __inline__ unsigned int cpuid_eax(unsigned int op) +static ERTS_INLINE unsigned int cpuid_eax(unsigned int op) { unsigned int eax, save_ebx; @@ -195,7 +195,7 @@ static __inline__ unsigned int cpuid_eax(unsigned int op) return eax; } -static __inline__ unsigned int cpuid_edx(unsigned int op) +static ERTS_INLINE unsigned int cpuid_edx(unsigned int op) { unsigned int eax, edx, save_ebx; @@ -215,7 +215,7 @@ static __inline__ unsigned int cpuid_edx(unsigned int op) * register on the Intel486 processor to generate alignment * faults. This bit cannot be set on the Intel386 processor. */ -static __inline__ int is_386(void) +static ERTS_INLINE int is_386(void) { return ((xor_eflags(1<<18) >> 18) & 1) == 0; } @@ -223,7 +223,7 @@ static __inline__ int is_386(void) /* Newer x86 processors have a CPUID instruction, as indicated by * the ID bit (#21) in EFLAGS being modifiable. */ -static __inline__ int has_CPUID(void) +static ERTS_INLINE int has_CPUID(void) { return (xor_eflags(1<<21) >> 21) & 1; } @@ -832,6 +832,8 @@ sys_chars_to_double(char* buf, double* fp) return 0; } +#ifdef USE_MATHERR + int matherr(struct exception *exc) { @@ -842,3 +844,5 @@ matherr(struct exception *exc) #endif return 1; } + +#endif diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 7a1d129cd5..972170d465 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1085,7 +1085,7 @@ void erts_poll_controlv(ErtsPollSet ps, pcev[i].events, pcev[i].on); } - ERTS_POLLSET_LOCK(ps); + ERTS_POLLSET_UNLOCK(ps); HARDTRACEF(("Out erts_poll_controlv")); } diff --git a/erts/emulator/sys/win32/erl_win32_sys_ddll.c b/erts/emulator/sys/win32/erl_win32_sys_ddll.c index 4c8d83ab16..338f0d7386 100644 --- a/erts/emulator/sys/win32/erl_win32_sys_ddll.c +++ b/erts/emulator/sys/win32/erl_win32_sys_ddll.c @@ -38,7 +38,7 @@ #include "erl_nif.h" #define EXT_LEN 4 -#define FILE_EXT ".dll" +#define FILE_EXT_WCHAR L".dll" static DWORD tls_index = 0; static TWinDynDriverCallbacks wddc; @@ -57,11 +57,15 @@ void erl_sys_ddll_init(void) { /* * Open a shared object + * Expecting 'full_name' as an UTF-8 string. */ -int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* err) +int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err) { + HINSTANCE hinstance; int len; - char dlname[MAXPATHLEN + 1]; + wchar_t* wcp; + Sint used; + int code; if ((len = sys_strlen(full_name)) >= MAXPATHLEN - EXT_LEN) { if (err != NULL) { @@ -69,10 +73,26 @@ int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* } return ERL_DE_LOAD_ERROR_NAME_TO_LONG; } - sys_strcpy(dlname, full_name); - sys_strcpy(dlname+len, FILE_EXT); - return erts_sys_ddll_open_noext(dlname, handle, err); + + wcp = (wchar_t*)erts_convert_filename_to_wchar((byte*)full_name, len, + NULL, 0, + ERTS_ALC_T_TMP, &used, EXT_LEN); + wcscpy(&wcp[used/2 - 1], FILE_EXT_WCHAR); + + if ((hinstance = LoadLibraryW(wcp)) == NULL) { + code = ERL_DE_DYNAMIC_ERROR_OFFSET - GetLastError(); + if (err != NULL) { + err->str = erts_sys_ddll_error(code); + } + } + else { + code = ERL_DE_NO_ERROR; + *handle = (void *) hinstance; + } + erts_free(ERTS_ALC_T_TMP, wcp); + return code; } + int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err) { HINSTANCE hinstance; diff --git a/erts/emulator/sys/win32/erl_win_dyn_driver.h b/erts/emulator/sys/win32/erl_win_dyn_driver.h index a7c53c904d..4010d939e5 100644 --- a/erts/emulator/sys/win32/erl_win_dyn_driver.h +++ b/erts/emulator/sys/win32/erl_win_dyn_driver.h @@ -80,8 +80,8 @@ WDD_TYPEDEF(int, erl_drv_output_term, (ErlDrvTermData, ErlDrvTermData*, int)); WDD_TYPEDEF(int, driver_output_term, (ErlDrvPort, ErlDrvTermData*, int)); WDD_TYPEDEF(int, erl_drv_send_term, (ErlDrvTermData, ErlDrvTermData, ErlDrvTermData*, int)); WDD_TYPEDEF(int, driver_send_term, (ErlDrvPort, ErlDrvTermData, ErlDrvTermData*, int)); +WDD_TYPEDEF(unsigned int, driver_async_port_key, (ErlDrvPort)); WDD_TYPEDEF(long, driver_async, (ErlDrvPort,unsigned int*,void (*)(void*),void*,void (*)(void*))); -WDD_TYPEDEF(int, driver_async_cancel, (unsigned int)); WDD_TYPEDEF(int, driver_lock_driver, (ErlDrvPort)); WDD_TYPEDEF(void *, driver_dl_open, (char *)); WDD_TYPEDEF(void *, driver_dl_sym, (void *, char *)); @@ -197,8 +197,8 @@ typedef struct { WDD_FTYPE(driver_output_term) *driver_output_term; WDD_FTYPE(erl_drv_send_term) *erl_drv_send_term; WDD_FTYPE(driver_send_term) *driver_send_term; + WDD_FTYPE(driver_async_port_key) *driver_async_port_key; WDD_FTYPE(driver_async) *driver_async; - WDD_FTYPE(driver_async_cancel) *driver_async_cancel; WDD_FTYPE(driver_lock_driver) *driver_lock_driver; WDD_FTYPE(driver_dl_open) *driver_dl_open; WDD_FTYPE(driver_dl_sym) *driver_dl_sym; @@ -308,8 +308,8 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks; #define driver_output_term (WinDynDriverCallbacks.driver_output_term) #define erl_drv_send_term (WinDynDriverCallbacks.erl_drv_send_term) #define driver_send_term (WinDynDriverCallbacks.driver_send_term) +#define driver_async_port_key (WinDynDriverCallbacks.driver_async_port_key) #define driver_async (WinDynDriverCallbacks.driver_async) -#define driver_async_cancel (WinDynDriverCallbacks.driver_async_cancel) #define driver_lock_driver (WinDynDriverCallbacks.driver_lock_driver) #define driver_dl_open (WinDynDriverCallbacks.driver_dl_open) #define driver_dl_sym (WinDynDriverCallbacks.driver_dl_sym) @@ -443,8 +443,8 @@ do { \ ((W).driver_output_term) = driver_output_term; \ ((W).erl_drv_send_term) = erl_drv_send_term; \ ((W).driver_send_term) = driver_send_term; \ +((W).driver_async_port_key) = driver_async_port_key; \ ((W).driver_async) = driver_async; \ -((W).driver_async_cancel) = driver_async_cancel; \ ((W).driver_lock_driver) = driver_lock_driver; \ ((W).driver_dl_open) = driver_dl_open; \ ((W).driver_dl_sym) = driver_dl_sym; \ diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h index 5ce1a61303..838f0c61eb 100644 --- a/erts/emulator/sys/win32/erl_win_sys.h +++ b/erts/emulator/sys/win32/erl_win_sys.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2012. All Rights Reserved. + * Copyright Ericsson AB 1997-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -60,16 +60,18 @@ #include <windows.h> #undef WIN32_LEAN_AND_MEAN -/* - * Define MAXPATHLEN in terms of MAXPATH if available. - */ - -#ifndef MAXPATH -#define MAXPATH MAX_PATH -#endif /* MAXPATH */ #ifndef MAXPATHLEN -#define MAXPATHLEN MAXPATH +#define MAXPATHLEN 4096 +/* + erts-6.0 (OTP 17.0): + We now accept windows paths longer than 260 (MAX_PATH) by conversion to + UNC path format. In order to also return long paths from the driver we + increased MAXPATHLEN from 260 to larger (but arbitrary) value 4096. + It would of course be nicer to instead dynamically allocate large enough + tmp buffers when efile_drv needs to return really long paths, and do that + for unix as well. + */ #endif /* MAXPATHLEN */ /* @@ -82,7 +84,6 @@ #define NO_ERF #define NO_ERFC -#define NO_SYSLOG #define NO_SYSCONF #define NO_DAEMON #define NO_PWD @@ -95,6 +96,8 @@ # define ERTS_I64_LITERAL(X) X##i64 #endif +#define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1 + /* * Practial Windows specific macros. */ @@ -102,16 +105,18 @@ #define CreateAutoEvent(state) CreateEvent(NULL, FALSE, state, NULL) #define CreateManualEvent(state) CreateEvent(NULL, TRUE, state, NULL) +/* + * Min number of async threads + */ +#define ERTS_MIN_NO_OF_ASYNC_THREADS 0 /* * Our own type of "FD's" */ +#define ERTS_SYS_FD_INVALID INVALID_HANDLE_VALUE #define ERTS_SYS_FD_TYPE HANDLE #define NO_FSTAT_ON_SYS_FD_TYPE 1 /* They are events, not files */ -#define HAVE_ERTS_CHECK_IO_DEBUG -int erts_check_io_debug(void); - /* * For erl_time_sup */ diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index e0422de026..0ded6b274e 100755..100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1996-2013. All Rights Reserved. + * Copyright Ericsson AB 1996-2014. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -32,7 +32,7 @@ #include "erl_threads.h" #include "../../drivers/win32/win_con.h" #include "erl_cpu_topology.h" - +#include <malloc.h> void erts_sys_init_float(void); @@ -69,17 +69,14 @@ static void async_read_file(struct async_io* aio, LPVOID buf, DWORD numToRead); static int async_write_file(struct async_io* aio, LPVOID buf, DWORD numToWrite); static int get_overlapped_result(struct async_io* aio, LPDWORD pBytesRead, BOOL wait); -static BOOL create_child_process(char *, HANDLE, HANDLE, +static BOOL create_child_process(wchar_t *, HANDLE, HANDLE, HANDLE, LPHANDLE, LPDWORD, BOOL, - LPVOID, LPTSTR, unsigned, - char **, int *); + LPVOID, wchar_t*, unsigned, + wchar_t **, int *); static int create_pipe(LPHANDLE, LPHANDLE, BOOL, BOOL); -static int application_type(const char* originalName, char fullPath[MAX_PATH], +static int application_type(const wchar_t* originalName, wchar_t fullPath[MAX_PATH], BOOL search_in_path, BOOL handle_quotes, int *error_return); -static int application_type_w(const WCHAR *originalName, WCHAR fullPath[MAX_PATH], - BOOL search_in_path, BOOL handle_quotes, - int *error_return); HANDLE erts_service_event; @@ -399,11 +396,12 @@ int* pBuild; /* Pointer to build number. */ * Definitions for driver flags. */ -#define DF_OVR_READY 1 /* Overlapped result is ready. */ -#define DF_EXIT_THREAD 2 /* The thread should exit. */ -#define DF_XLAT_CR 4 /* The thread should translate CRs. */ -#define DF_DROP_IF_INVH 8 /* Drop packages instead of crash if +#define DF_OVR_READY 1 /* Overlapped result is ready. */ +#define DF_EXIT_THREAD 2 /* The thread should exit. */ +#define DF_XLAT_CR 4 /* The thread should translate CRs. */ +#define DF_DROP_IF_INVH 8 /* Drop packages instead of crash if invalid handle (stderr) */ +#define DF_THREAD_FLUSHED 16 /* The thread should exit. */ #define OV_BUFFER_PTR(dp) ((LPVOID) ((dp)->ov.Internal)) #define OV_NUM_TO_READ(dp) ((dp)->ov.InternalHigh) @@ -1172,7 +1170,7 @@ spawn_init(void) } static ErlDrvData -spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) +spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts) { HANDLE hToChild = INVALID_HANDLE_VALUE; /* Write handle to child. */ HANDLE hFromChild = INVALID_HANDLE_VALUE; /* Read handle from child. */ @@ -1188,7 +1186,9 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) SECURITY_ATTRIBUTES sa = {sizeof(SECURITY_ATTRIBUTES), NULL, TRUE}; char* envir = opts->envir; int errno_return = -1; - + wchar_t *name; + int len; + if (opts->read_write & DO_READ) neededSelects++; if (opts->read_write & DO_WRITE) @@ -1246,26 +1246,40 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts) * Spawn the port program. */ - DEBUGF(("Spawning \"%s\"\n", name)); + if ((len = MultiByteToWideChar(CP_UTF8, 0, utf8_name, -1, NULL, 0)) > 0) { + name = erts_alloc(ERTS_ALC_T_TMP, len*sizeof(wchar_t)); + MultiByteToWideChar(CP_UTF8, 0, utf8_name, -1, name, len); + } else { /* Not valid utf-8, just convert byte to wchar */ + int i; + len = strlen(utf8_name); + name = erts_alloc(ERTS_ALC_T_TMP, (1+len)*sizeof(wchar_t)); + for(i=0; i<len; i++) { + name[i] = (wchar_t) utf8_name[i]; + } + name[i] = L'\0'; + } + DEBUGF(("Spawning \"%S\"\n", name)); envir = win_build_environment(envir); /* Always a unicode environment */ - ok = create_child_process(name, - hChildStdin, - hChildStdout, - hChildStderr, - &dp->port_pid, - &pid, - opts->hide_window, - (LPVOID) envir, - (LPTSTR) opts->wd, - opts->spawn_type, - opts->argv, - &errno_return); + ok = create_child_process(name, + hChildStdin, + hChildStdout, + hChildStderr, + &dp->port_pid, + &pid, + opts->hide_window, + (LPVOID) envir, + (wchar_t *) opts->wd, + opts->spawn_type, + (wchar_t **) opts->argv, + &errno_return); CloseHandle(hChildStdin); CloseHandle(hChildStdout); if (close_child_stderr && hChildStderr != INVALID_HANDLE_VALUE && hChildStderr != 0) { CloseHandle(hChildStderr); } + erts_free(ERTS_ALC_T_TMP, name); + if (envir != NULL) { erts_free(ERTS_ALC_T_ENVIRONMENT, envir); } @@ -1343,9 +1357,9 @@ create_file_thread(AsyncIo* aio, int mode) * Example: input = "\"Program Files\"\\erl arg1 arg2" * gives 19 as result. * The length returned is equivalent with length(argv[0]) if the - * comman line should have been prepared by _setargv for the main function + * command line should have been prepared by _setargv for the main function */ -int parse_command(char* cmd){ +int parse_command(wchar_t* cmd){ #define NORMAL 2 #define STRING 1 #define STOP 0 @@ -1353,17 +1367,17 @@ int parse_command(char* cmd){ int state = NORMAL; while (cmd[i]) { switch (cmd[i]) { - case '"': + case L'"': if (state == NORMAL) state = STRING; else state = NORMAL; break; - case '\\': - if ((state == STRING) && (cmd[i+1]=='"')) + case L'\\': + if ((state == STRING) && (cmd[i+1]==L'"')) i++; break; - case ' ': + case L' ': if (state == NORMAL) state = STOP; break; @@ -1378,7 +1392,7 @@ int parse_command(char* cmd){ return i; } -static BOOL need_quotes(WCHAR *str) +static BOOL need_quotes(wchar_t *str) { int in_quote = 0; int backslashed = 0; @@ -1442,7 +1456,7 @@ static BOOL need_quotes(WCHAR *str) static BOOL create_child_process ( - char *origcmd, /* Command line for child process (including + wchar_t *origcmd, /* Command line for child process (including * name of executable). Or whole executable if st is * ERTS_SPAWN_EXECUTABLE */ @@ -1453,57 +1467,53 @@ create_child_process LPDWORD pdwID, /* Pointer to variable to received Process ID */ BOOL hide, /* Hide the window unconditionally. */ LPVOID env, /* Environment for the child */ - LPTSTR wd, /* Working dir for the child */ + wchar_t *wd, /* Working dir for the child */ unsigned st, /* Flags for spawn, tells us how to interpret origcmd */ - char **argv, /* Argument vector if given. */ + wchar_t **argv, /* Argument vector if given. */ int *errno_return /* Place to put an errno in in case of failure */ ) -{ +{ PROCESS_INFORMATION piProcInfo = {0}; BOOL ok = FALSE; int applType; /* Not to be changed for different types of executables */ int staticCreateFlags = GetPriorityClass(GetCurrentProcess()); int createFlags = DETACHED_PROCESS; - char *newcmdline = NULL; + wchar_t *newcmdline = NULL; int cmdlength; - char* thecommand; - LPTSTR appname = NULL; + wchar_t* thecommand; + wchar_t* appname = NULL; HANDLE hProcess = GetCurrentProcess(); - - *errno_return = -1; + STARTUPINFOW siStartInfo = {0}; + wchar_t execPath[MAX_PATH]; + *errno_return = -1; + siStartInfo.cb = sizeof(STARTUPINFOW); + siStartInfo.dwFlags = STARTF_USESTDHANDLES; + siStartInfo.hStdInput = hStdin; + siStartInfo.hStdOutput = hStdout; + siStartInfo.hStdError = hStderr; if (st != ERTS_SPAWN_EXECUTABLE) { - STARTUPINFO siStartInfo = {0}; - char execPath[MAX_PATH]; - - siStartInfo.cb = sizeof(STARTUPINFO); - siStartInfo.dwFlags = STARTF_USESTDHANDLES; - siStartInfo.hStdInput = hStdin; - siStartInfo.hStdOutput = hStdout; - siStartInfo.hStdError = hStderr; - /* * Parse out the program name from the command line (it can be quoted and * contain spaces). */ - newcmdline = erts_alloc(ERTS_ALC_T_TMP, 2048); + newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, 2048*sizeof(wchar_t)); cmdlength = parse_command(origcmd); - thecommand = (char *) erts_alloc(ERTS_ALC_T_TMP, cmdlength+1); - strncpy(thecommand, origcmd, cmdlength); - thecommand[cmdlength] = '\0'; - DEBUGF(("spawn command: %s\n", thecommand)); - - applType = application_type(thecommand, execPath, TRUE, - TRUE, errno_return); - DEBUGF(("application_type returned for (%s) is %d\n", thecommand, applType)); + thecommand = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (cmdlength+1)*sizeof(wchar_t)); + wcsncpy(thecommand, origcmd, cmdlength); + thecommand[cmdlength] = L'\0'; + DEBUGF(("spawn command: %S\n", thecommand)); + + applType = application_type(thecommand, execPath, TRUE, TRUE, errno_return); + DEBUGF(("application_type returned for (%S) is %d\n", thecommand, applType)); erts_free(ERTS_ALC_T_TMP, (void *) thecommand); if (applType == APPL_NONE) { erts_free(ERTS_ALC_T_TMP,newcmdline); return FALSE; } - newcmdline[0] = '\0'; + newcmdline[0] = L'\0'; if (applType == APPL_DOS) { /* @@ -1512,11 +1522,11 @@ create_child_process * a normal process inside of a hidden console application, * and then run that hidden console as a detached process. */ - + siStartInfo.wShowWindow = SW_HIDE; siStartInfo.dwFlags |= STARTF_USESHOWWINDOW; createFlags = CREATE_NEW_CONSOLE; - strcat(newcmdline, "cmd.exe /c "); + wcscat(newcmdline, L"cmd.exe /c "); } else if (hide) { DEBUGF(("hiding window\n")); siStartInfo.wShowWindow = SW_HIDE; @@ -1524,35 +1534,25 @@ create_child_process createFlags = 0; } - strcat(newcmdline, execPath); - strcat(newcmdline, origcmd+cmdlength); - DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags)); - ok = CreateProcessA(appname, - newcmdline, - NULL, - NULL, - TRUE, - createFlags | staticCreateFlags | - CREATE_UNICODE_ENVIRONMENT, - env, - wd, - &siStartInfo, + wcscat(newcmdline, execPath); + wcscat(newcmdline, origcmd+cmdlength); + DEBUGF(("Creating child process: %S, createFlags = %d\n", newcmdline, createFlags)); + ok = CreateProcessW(appname, + newcmdline, + NULL, + NULL, + TRUE, + createFlags | staticCreateFlags | + CREATE_UNICODE_ENVIRONMENT, + env, + wd, + &siStartInfo, &piProcInfo); } else { /* ERTS_SPAWN_EXECUTABLE, filename and args are in unicode ({utf16,little}) */ int run_cmd = 0; - STARTUPINFOW siStartInfo = {0}; - WCHAR execPath[MAX_PATH]; - - siStartInfo.cb = sizeof(STARTUPINFOW); - siStartInfo.dwFlags = STARTF_USESTDHANDLES; - siStartInfo.hStdInput = hStdin; - siStartInfo.hStdOutput = hStdout; - siStartInfo.hStdError = hStderr; - - applType = application_type_w((WCHAR *) origcmd, execPath, FALSE, FALSE, - errno_return); + applType = application_type(origcmd, execPath, FALSE, FALSE, errno_return); if (applType == APPL_NONE) { return FALSE; } @@ -1572,37 +1572,37 @@ create_child_process createFlags = 0; } if (run_cmd) { - WCHAR cmdPath[MAX_PATH]; + wchar_t cmdPath[MAX_PATH]; int cmdType; - cmdType = application_type_w(L"cmd.exe", cmdPath, TRUE, FALSE, errno_return); + cmdType = application_type(L"cmd.exe", cmdPath, TRUE, FALSE, errno_return); if (cmdType == APPL_NONE || cmdType == APPL_DOS) { return FALSE; } - appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(cmdPath)+1)*sizeof(WCHAR)); - wcscpy((WCHAR *) appname,cmdPath); + appname = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(cmdPath)+1)*sizeof(wchar_t)); + wcscpy(appname,cmdPath); } else { - appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(execPath)+1)*sizeof(WCHAR)); - wcscpy((WCHAR *) appname, execPath); + appname = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(execPath)+1)*sizeof(wchar_t)); + wcscpy(appname, execPath); } if (argv == NULL) { BOOL orig_need_q = need_quotes(execPath); - WCHAR *ptr; + wchar_t *ptr; int ocl = wcslen(execPath); if (run_cmd) { - newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP, - (ocl + ((orig_need_q) ? 3 : 1) - + 11)*sizeof(WCHAR)); - memcpy(newcmdline,L"cmd.exe /c ",11*sizeof(WCHAR)); - ptr = (WCHAR *) (newcmdline + (11*sizeof(WCHAR))); + newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, + (ocl + ((orig_need_q) ? 3 : 1) + + 11)*sizeof(wchar_t)); + memcpy(newcmdline,L"cmd.exe /c ",11*sizeof(wchar_t)); + ptr = newcmdline + 11; } else { - newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP, - (ocl + ((orig_need_q) ? 3 : 1))*sizeof(WCHAR)); - ptr = (WCHAR *) newcmdline; + newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, + (ocl + ((orig_need_q) ? 3 : 1))*sizeof(wchar_t)); + ptr = (wchar_t *) newcmdline; } if (orig_need_q) { *ptr++ = L'"'; } - memcpy(ptr,execPath,ocl*sizeof(WCHAR)); + memcpy(ptr,execPath,ocl*sizeof(wchar_t)); ptr += ocl; if (orig_need_q) { *ptr++ = L'"'; @@ -1610,12 +1610,12 @@ create_child_process *ptr = L'\0'; } else { int sum = 1; /* '\0' */ - WCHAR **ar = (WCHAR **) argv; - WCHAR *n; - char *save_arg0 = NULL; - if (argv[0] == erts_default_arg0 || run_cmd) { + wchar_t **ar = argv; + wchar_t *n; + wchar_t *save_arg0 = NULL; + if (argv[0] == (wchar_t *) erts_default_arg0 || run_cmd) { save_arg0 = argv[0]; - argv[0] = (char *) execPath; + argv[0] = execPath; } if (run_cmd) { sum += 11; /* cmd.exe /c */ @@ -1628,11 +1628,11 @@ create_child_process sum++; /* space */ ++ar; } - ar = (WCHAR **) argv; - newcmdline = erts_alloc(ERTS_ALC_T_TMP, sum*sizeof(WCHAR)); - n = (WCHAR *) newcmdline; + ar = argv; + newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, sum*sizeof(wchar_t)); + n = newcmdline; if (run_cmd) { - memcpy(n,L"cmd.exe /c ",11*sizeof(WCHAR)); + memcpy(n,L"cmd.exe /c ",11*sizeof(wchar_t)); n += 11; } while (*ar != NULL) { @@ -1641,7 +1641,7 @@ create_child_process if (q) { *n++ = L'"'; } - memcpy(n,*ar,sum*sizeof(WCHAR)); + memcpy(n,*ar,sum*sizeof(wchar_t)); n += sum; if (q) { *n++ = L'"'; @@ -1656,16 +1656,16 @@ create_child_process } DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags)); - ok = CreateProcessW((WCHAR *) appname, - (WCHAR *) newcmdline, - NULL, - NULL, - TRUE, - createFlags | staticCreateFlags | - CREATE_UNICODE_ENVIRONMENT, - env, - (WCHAR *) wd, - &siStartInfo, + ok = CreateProcessW((wchar_t *) appname, + (wchar_t *) newcmdline, + NULL, + NULL, + TRUE, + createFlags | staticCreateFlags | + CREATE_UNICODE_ENVIRONMENT, + env, + wd, + &siStartInfo, &piProcInfo); } /* end SPAWN_EXECUTABLE */ @@ -1774,180 +1774,23 @@ static int create_pipe(HANDLE *phRead, HANDLE *phWrite, BOOL inheritRead, BOOL o return TRUE; } - - - -static int application_type -( - const char *originalName, /* Name of the application to find. */ - char fullPath[MAX_PATH], /* Filled with complete path to - * application. */ - BOOL search_in_path, /* If we should search the system wide path */ - BOOL handle_quotes, /* If we should handle quotes around executable */ - int *error_return /* A place to put an error code */ - ) -{ - int applType, i; - HANDLE hFile; - char *ext, *rest; - char buf[2]; - DWORD read; - IMAGE_DOS_HEADER header; - static char extensions[][5] = {"", ".com", ".exe", ".bat"}; - int is_quoted; - int len; - - /* Look for the program as an external program. First try the name - * as it is, then try adding .com, .exe, and .bat, in that order, to - * the name, looking for an executable. - * NOTE! that we does not support execution of .com programs on Windows NT - * - * - * Using the raw SearchPath() procedure doesn't do quite what is - * necessary. If the name of the executable already contains a '.' - * character, it will not try appending the specified extension when - * searching (in other words, SearchPath will not find the program - * "a.b.exe" if the arguments specified "a.b" and ".exe"). - * So, first look for the file as it is named. Then manually append - * the extensions, looking for a match. (') - */ - - len = strlen(originalName); - is_quoted = handle_quotes && len > 0 && originalName[0] == '"' && - originalName[len-1] == '"'; - - applType = APPL_NONE; - *error_return = ENOENT; - for (i = 0; i < (int) (sizeof(extensions) / sizeof(extensions[0])); i++) { - if(is_quoted) { - lstrcpyn(fullPath, originalName+1, MAX_PATH - 7); - len = strlen(fullPath); - if(len > 0) { - fullPath[len-1] = '\0'; - } - } else { - lstrcpyn(fullPath, originalName, MAX_PATH - 5); - } - lstrcat(fullPath, extensions[i]); - SearchPath((search_in_path) ? NULL : ".", fullPath, NULL, MAX_PATH, fullPath, &rest); - - /* - * Ignore matches on directories or data files, return if identified - * a known type. - */ - - if (GetFileAttributes(fullPath) & FILE_ATTRIBUTE_DIRECTORY) { - continue; - } - - ext = strrchr(fullPath, '.'); - if ((ext != NULL) && (strcmpi(ext, ".bat") == 0)) { - *error_return = EACCES; - applType = APPL_DOS; - break; - } - - hFile = CreateFile(fullPath, GENERIC_READ, FILE_SHARE_READ, NULL, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (hFile == INVALID_HANDLE_VALUE) { - continue; - } - - *error_return = EACCES; /* If considered an error, - it's an access error */ - header.e_magic = 0; - ReadFile(hFile, (void *) &header, sizeof(header), &read, NULL); - if (header.e_magic != IMAGE_DOS_SIGNATURE) { - /* - * Doesn't have the magic number for relocatable executables. If - * filename ends with .com, assume it's a DOS application anyhow. - * Note that we didn't make this assumption at first, because some - * supposed .com files are really 32-bit executables with all the - * magic numbers and everything. - */ - - CloseHandle(hFile); - if ((ext != NULL) && (strcmpi(ext, ".com") == 0)) { - applType = APPL_DOS; - break; - } - continue; - } - if (header.e_lfarlc != sizeof(header)) { - /* - * All Windows 3.X and Win32 and some DOS programs have this value - * set here. If it doesn't, assume that since it already had the - * other magic number it was a DOS application. - */ - - CloseHandle(hFile); - applType = APPL_DOS; - break; - } - - /* - * The DWORD at header.e_lfanew points to yet another magic number. - */ - - buf[0] = '\0'; - SetFilePointer(hFile, header.e_lfanew, NULL, FILE_BEGIN); - ReadFile(hFile, (void *) buf, 2, &read, NULL); - CloseHandle(hFile); - - if ((buf[0] == 'L') && (buf[1] == 'E')) { - applType = APPL_DOS; - } else if ((buf[0] == 'N') && (buf[1] == 'E')) { - applType = APPL_WIN3X; - } else if ((buf[0] == 'P') && (buf[1] == 'E')) { - applType = APPL_WIN32; - } else { - continue; - } - break; - } - - if (applType == APPL_NONE) { - return APPL_NONE; - } - - if ((applType == APPL_DOS) || (applType == APPL_WIN3X)) { - /* - * Replace long path name of executable with short path name for - * 16-bit applications. Otherwise the application may not be able - * to correctly parse its own command line to separate off the - * application name from the arguments. - */ - - GetShortPathName(fullPath, fullPath, MAX_PATH); - } - if (is_quoted) { - /* restore quotes on quoted program name */ - len = strlen(fullPath); - memmove(fullPath+1,fullPath,len); - fullPath[0]='"'; - fullPath[len+1]='"'; - fullPath[len+2]='\0'; - } - return applType; -} - -static int application_type_w (const WCHAR *originalName, /* Name of the application to find. */ - WCHAR wfullpath[MAX_PATH],/* Filled with complete path to +static int application_type (const wchar_t *originalName, /* Name of the application to find. */ + wchar_t wfullpath[MAX_PATH],/* Filled with complete path to * application. */ - BOOL search_in_path, /* If we should search the system wide path */ - BOOL handle_quotes, /* If we should handle quotes around executable */ - int *error_return) /* A place to put an error code */ + BOOL search_in_path, /* If we should search the system wide path */ + BOOL handle_quotes, /* If we should handle quotes around executable */ + int *error_return) /* A place to put an error code */ { int applType, i; HANDLE hFile; - WCHAR *ext, *rest; + wchar_t *ext, *rest; char buf[2]; DWORD read; IMAGE_DOS_HEADER header; - static WCHAR extensions[][5] = {L"", L".com", L".exe", L".bat"}; + static wchar_t extensions[][5] = {L"", L".com", L".exe", L".bat"}; int is_quoted; int len; - WCHAR xfullpath[MAX_PATH]; + wchar_t xfullpath[MAX_PATH]; len = wcslen(originalName); is_quoted = handle_quotes && len > 0 && originalName[0] == L'"' && @@ -2062,7 +1905,7 @@ static int application_type_w (const WCHAR *originalName, /* Name of the applica if (is_quoted) { /* restore quotes on quoted program name */ len = wcslen(wfullpath); - memmove(wfullpath+1,wfullpath,len*sizeof(WCHAR)); + memmove(wfullpath+1,wfullpath,len*sizeof(wchar_t)); wfullpath[0]=L'"'; wfullpath[len+1]=L'"'; wfullpath[len+2]=L'\0'; @@ -2141,8 +1984,9 @@ threaded_writer(LPVOID param) for (;;) { handle = WaitForMultipleObjects(2, handles, FALSE, INFINITE); - if (aio->flags & DF_EXIT_THREAD) + if (aio->flags & DF_EXIT_THREAD) { break; + } buf = OV_BUFFER_PTR(aio); numToWrite = OV_NUM_TO_READ(aio); @@ -2150,6 +1994,7 @@ threaded_writer(LPVOID param) if (handle == (WAIT_OBJECT_0 + 1) && numToWrite == 0) { SetEvent(aio->flushReplyEvent); + aio->flags |= DF_THREAD_FLUSHED; continue; } @@ -2197,6 +2042,7 @@ threaded_writer(LPVOID param) if (aio->flags & DF_EXIT_THREAD) break; } + aio->flags |= DF_THREAD_FLUSHED; CloseHandle(aio->fd); aio->fd = INVALID_HANDLE_VALUE; unrefer_driver_data(aio->dp); @@ -2340,9 +2186,11 @@ static void fd_stop(ErlDrvData data) (void) driver_select(dp->port_num, (ErlDrvEvent)dp->out.ov.hEvent, ERL_DRV_WRITE, 0); - ASSERT(dp->out.flushEvent); - SetEvent(dp->out.flushEvent); - WaitForSingleObject(dp->out.flushReplyEvent, INFINITE); + do { + ASSERT(dp->out.flushEvent); + SetEvent(dp->out.flushEvent); + } while (WaitForSingleObject(dp->out.flushReplyEvent, 10) == WAIT_TIMEOUT + || !(dp->out.flags & DF_THREAD_FLUSHED)); } } @@ -2433,12 +2281,12 @@ threaded_exiter(LPVOID param) */ i = 0; if (dp->out.thread != (HANDLE) -1) { - dp->out.flags = DF_EXIT_THREAD; + dp->out.flags |= DF_EXIT_THREAD; SetEvent(dp->out.ioAllowed); handles[i++] = dp->out.thread; } if (dp->in.thread != (HANDLE) -1) { - dp->in.flags = DF_EXIT_THREAD; + dp->in.flags |= DF_EXIT_THREAD; SetEvent(dp->in.ioAllowed); handles[i++] = dp->in.thread; } @@ -2906,6 +2754,30 @@ void erts_sys_free(ErtsAlcType_t t, void *x, void *p) free(p); } +void *erts_sys_aligned_alloc(UWord alignment, UWord size) +{ + void *ptr; + ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */ + ptr = _aligned_malloc((size_t) size, (size_t) alignment); + ASSERT(!ptr || (((UWord) ptr) & (alignment - 1)) == 0); + return ptr; +} + +void erts_sys_aligned_free(UWord alignment, void *ptr) +{ + ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */ + _aligned_free(ptr); +} + +void *erts_sys_aligned_realloc(UWord alignment, void *ptr, UWord size, UWord old_size) +{ + void *new_ptr; + ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */ + new_ptr = _aligned_realloc(ptr, (size_t) size, (size_t) alignment); + ASSERT(!new_ptr || (((UWord) new_ptr) & (alignment - 1)) == 0); + return new_ptr; +} + static Preload* preloaded = NULL; static unsigned* res_name = NULL; static int num_preloaded = 0; @@ -3194,7 +3066,7 @@ erl_bin_write(buf, sz, max) } void -erl_assert_error(char* expr, char* file, int line) +erl_assert_error(const char* expr, const char* func, const char* file, int line) { char message[1024]; @@ -3325,7 +3197,7 @@ void erl_sys_init(void) noinherit_std_handle(STD_ERROR_HANDLE); #ifdef ERTS_SMP - erts_smp_tsd_key_create(&win32_errstr_key); + erts_smp_tsd_key_create(&win32_errstr_key,"win32_errstr_key"); InitializeCriticalSection(&htbc_lock); #endif erts_smp_atomic_init_nob(&pipe_creation_counter,0); diff --git a/erts/emulator/sys/win32/sys_env.c b/erts/emulator/sys/win32/sys_env.c index 754f4c6e4c..9f977ad6c8 100644 --- a/erts/emulator/sys/win32/sys_env.c +++ b/erts/emulator/sys/win32/sys_env.c @@ -141,6 +141,24 @@ void fini_getenv_state(GETENV_STATE *state) erts_smp_rwmtx_runlock(&environ_rwmtx);
}
+int erts_sys_unsetenv(char *key)
+{
+ int res = 0;
+ WCHAR *wkey = (WCHAR *) key;
+
+ SetLastError(0);
+ erts_smp_rwmtx_rlock(&environ_rwmtx);
+ GetEnvironmentVariableW(wkey,
+ NULL,
+ 0);
+ if (GetLastError() != ERROR_ENVVAR_NOT_FOUND) {
+ res = (SetEnvironmentVariableW(wkey,
+ NULL) ? 0 : 1);
+ }
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ return res;
+}
+
char*
win_build_environment(char* new_env)
{
diff --git a/erts/emulator/sys/win32/sys_float.c b/erts/emulator/sys/win32/sys_float.c index fb1ffc3089..0e19746cf5 100644 --- a/erts/emulator/sys/win32/sys_float.c +++ b/erts/emulator/sys/win32/sys_float.c @@ -52,7 +52,7 @@ void erts_thread_disable_fpe(void) int sys_chars_to_double(char *buf, double *fp) { - char *s = buf, *t, *dp; + unsigned char *s = buf, *t, *dp; /* Robert says that something like this is what he really wanted: * (The [.,] radix test is NOT what Robert wanted - it was added later) @@ -120,7 +120,7 @@ sys_chars_to_double(char *buf, double *fp) int sys_double_to_chars_ext(double fp, char *buffer, size_t buffer_size, size_t decimals) { - char *s = buffer; + unsigned char *s = buffer; if (erts_snprintf(buffer, buffer_size, "%.*e", decimals, fp) >= buffer_size) return -1; @@ -133,6 +133,8 @@ sys_double_to_chars_ext(double fp, char *buffer, size_t buffer_size, size_t deci return s-buffer; /* i.e strlen(buffer) */ } +#ifdef USE_MATHERR + int matherr(struct _exception *exc) { @@ -141,6 +143,8 @@ matherr(struct _exception *exc) return 1; } +#endif + static void fpe_exception(int sig) { diff --git a/erts/emulator/sys/win32/sys_time.c b/erts/emulator/sys/win32/sys_time.c index 2f2dfc8197..b84c8f85ce 100644 --- a/erts/emulator/sys/win32/sys_time.c +++ b/erts/emulator/sys/win32/sys_time.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 1997-2011. All Rights Reserved. + * Copyright Ericsson AB 1997-2013. All Rights Reserved. * * The contents of this file are subject to the Erlang Public License, * Version 1.1, (the "License"); you may not use this file except in @@ -63,6 +63,8 @@ static SysHrTime wrap = 0; static DWORD last_tick_count = 0; +static erts_smp_mtx_t wrap_lock; +static ULONGLONG (WINAPI *pGetTickCount64)(void) = NULL; /* Getting timezone information is a heavy operation, so we want to do this only once */ @@ -77,11 +79,23 @@ static int days_in_month[2][13] = { int sys_init_time(void) { + char kernel_dll_name[] = "kernel32"; + HMODULE module; + + module = GetModuleHandle(kernel_dll_name); + pGetTickCount64 = (module != NULL) ? + (ULONGLONG (WINAPI *)(void)) + GetProcAddress(module,"GetTickCount64") : + NULL; + if(GetTimeZoneInformation(&static_tzi) && static_tzi.StandardDate.wMonth != 0 && static_tzi.DaylightDate.wMonth != 0) { have_static_tzi = 1; } + + erts_smp_mtx_init(&wrap_lock, "sys_gethrtime"); + return 1; } @@ -363,15 +377,39 @@ sys_gettimeofday(SysTimeval *tv) EPOCH_JULIAN_DIFF); } +extern int erts_initialized; SysHrTime sys_gethrtime(void) { - DWORD ticks = (SysHrTime) (GetTickCount() & 0x7FFFFFFF); - if (ticks < (SysHrTime) last_tick_count) { - wrap += LL_LITERAL(1) << 31; + if (pGetTickCount64 != NULL) { + return ((SysHrTime) pGetTickCount64()) * LL_LITERAL(1000000); + } else { + DWORD ticks; + SysHrTime res; + erts_smp_mtx_lock(&wrap_lock); + ticks = (SysHrTime) (GetTickCount() & 0x7FFFFFFF); + if (ticks < (SysHrTime) last_tick_count) { + /* Detect a race that should no longer be here... */ + if ((((SysHrTime) last_tick_count) - ((SysHrTime) ticks)) > 1000) { + wrap += LL_LITERAL(1) << 31; + } else { + /* + * XXX Debug: Violates locking order, remove all this, + * after testing! + */ + erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf(); + erts_dsprintf(dsbufp, "Did not wrap when last_tick %d " + "and tick %d", + last_tick_count, ticks); + erts_send_error_to_logger_nogl(dsbufp); + ticks = last_tick_count; + } + } + last_tick_count = ticks; + res = ((((LONGLONG) ticks) + wrap) * LL_LITERAL(1000000)); + erts_smp_mtx_unlock(&wrap_lock); + return res; } - last_tick_count = ticks; - return ((((LONGLONG) ticks) + wrap) * LL_LITERAL(1000000)); } clock_t |