aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/sys
diff options
context:
space:
mode:
authorSverker Eriksson <[email protected]>2017-08-30 20:55:08 +0200
committerSverker Eriksson <[email protected]>2017-08-30 20:55:08 +0200
commit7c67bbddb53c364086f66260701bc54a61c9659c (patch)
tree92ab0d4b91d5e2f6e7a3f9d61ea25089e8a71fe0 /erts/emulator/sys
parent97dc5e7f396129222419811c173edc7fa767b0f8 (diff)
parent3b7a6ffddc819bf305353a593904cea9e932e7dc (diff)
downloadotp-7c67bbddb53c364086f66260701bc54a61c9659c.tar.gz
otp-7c67bbddb53c364086f66260701bc54a61c9659c.tar.bz2
otp-7c67bbddb53c364086f66260701bc54a61c9659c.zip
Merge tag 'OTP-19.0' into sverker/19/binary_to_atom-utf8-crash/ERL-474/OTP-14590
Diffstat (limited to 'erts/emulator/sys')
-rw-r--r--erts/emulator/sys/common/erl_check_io.c784
-rw-r--r--erts/emulator/sys/common/erl_check_io.h74
-rw-r--r--erts/emulator/sys/common/erl_mmap.c2918
-rw-r--r--erts/emulator/sys/common/erl_mmap.h181
-rw-r--r--erts/emulator/sys/common/erl_mseg.c1283
-rw-r--r--erts/emulator/sys/common/erl_mseg.h82
-rw-r--r--erts/emulator/sys/common/erl_mtrace_sys_wrap.c23
-rw-r--r--erts/emulator/sys/common/erl_os_monotonic_time_extender.c89
-rw-r--r--erts/emulator/sys/common/erl_os_monotonic_time_extender.h66
-rw-r--r--erts/emulator/sys/common/erl_poll.c503
-rw-r--r--erts/emulator/sys/common/erl_poll.h46
-rw-r--r--erts/emulator/sys/common/erl_sys_common_misc.c33
-rw-r--r--erts/emulator/sys/common/erl_util_queue.h21
-rw-r--r--erts/emulator/sys/unix/driver_int.h23
-rw-r--r--erts/emulator/sys/unix/erl_child_setup.c576
-rw-r--r--erts/emulator/sys/unix/erl_child_setup.h77
-rw-r--r--erts/emulator/sys/unix/erl_main.c23
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h294
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys_ddll.c29
-rw-r--r--erts/emulator/sys/unix/sys.c2124
-rw-r--r--erts/emulator/sys/unix/sys_drivers.c1862
-rw-r--r--erts/emulator/sys/unix/sys_float.c141
-rw-r--r--erts/emulator/sys/unix/sys_time.c997
-rw-r--r--erts/emulator/sys/unix/sys_uds.c155
-rw-r--r--erts/emulator/sys/unix/sys_uds.h57
-rw-r--r--erts/emulator/sys/win32/dosmap.c23
-rw-r--r--erts/emulator/sys/win32/driver_int.h23
-rw-r--r--erts/emulator/sys/win32/erl_main.c23
-rw-r--r--erts/emulator/sys/win32/erl_poll.c105
-rw-r--r--erts/emulator/sys/win32/erl_win32_sys_ddll.c58
-rw-r--r--erts/emulator/sys/win32/erl_win_dyn_driver.h49
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h156
-rw-r--r--[-rwxr-xr-x]erts/emulator/sys/win32/sys.c641
-rw-r--r--erts/emulator/sys/win32/sys_env.c39
-rw-r--r--erts/emulator/sys/win32/sys_float.c26
-rw-r--r--erts/emulator/sys/win32/sys_interrupt.c29
-rw-r--r--erts/emulator/sys/win32/sys_time.c421
37 files changed, 9967 insertions, 4087 deletions
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 7035dc77df..44a77f3ea5 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -34,12 +35,15 @@
#endif
#include "sys.h"
#include "global.h"
+#include "erl_port.h"
#include "erl_check_io.h"
#include "erl_thr_progress.h"
#include "dtrace-wrapper.h"
+#include "lttng-wrapper.h"
+#define ERTS_WANT_TIMER_WHEEL_API
+#include "erl_time.h"
-#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
-#else
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
# include "safe_hash.h"
# define DRV_EV_STATE_HTAB_SIZE 1024
#endif
@@ -51,8 +55,17 @@ typedef char EventStateType;
#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */
typedef char EventStateFlags;
-#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2)
+#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4)
+
+#ifdef DEBUG
+# define ERTS_ACTIVE_FD_INC 2
+#else
+# define ERTS_ACTIVE_FD_INC 128
+#endif
+#define ERTS_CHECK_IO_POLL_RES_LEN 512
#if defined(ERTS_KERNEL_POLL_VERSION)
# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp
@@ -66,6 +79,7 @@ typedef char EventStateFlags;
(ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL)
#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control)
+#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv)
#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait)
#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
#define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)
@@ -78,10 +92,19 @@ typedef char EventStateFlags;
#define ERTS_CIO_POLL_INIT ERTS_POLL_EXPORT(erts_poll_init)
#define ERTS_CIO_POLL_INFO ERTS_POLL_EXPORT(erts_poll_info)
+#define GET_FD(fd) fd
+
static struct pollset_info
{
ErtsPollSet ps;
erts_smp_atomic_t in_poll_wait; /* set while doing poll */
+ struct {
+ int six; /* start index */
+ int eix; /* end index */
+ erts_smp_atomic32_t no;
+ int size;
+ ErtsSysFdType *array;
+ } active_fd;
#ifdef ERTS_SMP
struct removed_fd* removed_list; /* list of deselected fd's*/
erts_smp_spinlock_t removed_list_lock;
@@ -94,9 +117,11 @@ typedef struct {
SafeHashBucket hb;
#endif
ErtsSysFdType fd;
- union {
- ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+ struct {
ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+#endif
erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */
} driver;
ErtsPollEvents events;
@@ -166,6 +191,10 @@ static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd)
ErtsDrvEventState tmpl;
tmpl.fd = fd;
tmpl.driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ tmpl.driver.event = NULL;
+#endif
+ tmpl.driver.drv_ptr = NULL;
tmpl.events = 0;
tmpl.remove_cnt = 0;
tmpl.type = ERTS_EV_TYPE_NONE;
@@ -206,6 +235,69 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_F
#endif
static ERTS_INLINE void
+init_iotask(ErtsIoTask *io_task)
+{
+ erts_port_task_handle_init(&io_task->task);
+ erts_smp_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0));
+}
+
+static ERTS_INLINE int
+is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time)
+{
+ if (erts_port_task_is_scheduled(&io_task->task))
+ return 1;
+ if (erts_smp_atomic_read_nob(&io_task->executed_time) == current_cio_time)
+ return 1;
+ return 0;
+}
+
+static ERTS_INLINE ErtsDrvSelectDataState *
+alloc_drv_select_data(void)
+{
+ ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
+ sizeof(ErtsDrvSelectDataState));
+ dsp->inport = NIL;
+ dsp->outport = NIL;
+ init_iotask(&dsp->iniotask);
+ init_iotask(&dsp->outiotask);
+ return dsp;
+}
+
+static ERTS_INLINE void
+free_drv_select_data(ErtsDrvSelectDataState *dsp)
+{
+ ASSERT(!erts_port_task_is_scheduled(&dsp->iniotask.task));
+ ASSERT(!erts_port_task_is_scheduled(&dsp->outiotask.task));
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp);
+}
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+
+static ERTS_INLINE ErtsDrvEventDataState *
+alloc_drv_event_data(void)
+{
+ ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
+ sizeof(ErtsDrvEventDataState));
+ dep->port = NIL;
+ dep->data = NULL;
+ dep->removed_events = 0;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ dep->deferred_events = 0;
+#endif
+ init_iotask(&dep->iotask);
+ return dep;
+}
+
+static ERTS_INLINE void
+free_drv_event_data(ErtsDrvEventDataState *dep)
+{
+ ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task));
+ erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep);
+}
+
+#endif /* ERTS_CIO_HAVE_DRV_EVENT */
+
+static ERTS_INLINE void
remember_removed(ErtsDrvEventState *state, struct pollset_info* psi)
{
#ifdef ERTS_SMP
@@ -285,7 +377,7 @@ forget_removed(struct pollset_info* psi)
drv_ptr = state->driver.drv_ptr;
ASSERT(drv_ptr);
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
state->driver.drv_ptr = NULL;
/* Fall through */
case ERTS_EV_TYPE_NONE:
@@ -304,6 +396,7 @@ forget_removed(struct pollset_info* psi)
if (drv_ptr) {
int was_unmasked = erts_block_fpe();
DTRACE1(driver_stop_select, drv_ptr->name);
+ LTTNG1(driver_stop_select, drv_ptr->name);
(*drv_ptr->stop_select) ((ErlDrvEvent) fd, NULL);
erts_unblock_fpe(was_unmasked);
if (drv_ptr->handle) {
@@ -322,14 +415,16 @@ static void
grow_drv_ev_state(int min_ix)
{
int i;
+ int old_len;
int new_len;
- new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_ix + 1);
- if (new_len > max_fds)
- new_len = max_fds;
-
erts_smp_mtx_lock(&drv_ev_state_grow_lock);
- if (erts_smp_atomic_read_nob(&drv_ev_state_len) <= min_ix) {
+ old_len = erts_smp_atomic_read_nob(&drv_ev_state_len);
+ if (min_ix >= old_len) {
+ new_len = erts_poll_new_table_len(old_len, min_ix + 1);
+ if (new_len > max_fds)
+ new_len = max_fds;
+
for (i=0; i<DRV_EV_STATE_LOCK_CNT; i++) { /* lock all fd's */
erts_smp_mtx_lock(&drv_ev_state_locks[i].lck);
}
@@ -339,9 +434,13 @@ grow_drv_ev_state(int min_ix)
sizeof(ErtsDrvEventState)*new_len)
: erts_alloc(ERTS_ALC_T_DRV_EV_STATE,
sizeof(ErtsDrvEventState)*new_len));
- for (i = erts_smp_atomic_read_nob(&drv_ev_state_len); i < new_len; i++) {
+ for (i = old_len; i < new_len; i++) {
drv_ev_state[i].fd = (ErtsSysFdType) i;
drv_ev_state[i].driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ drv_ev_state[i].driver.event = NULL;
+#endif
+ drv_ev_state[i].driver.drv_ptr = NULL;
drv_ev_state[i].events = 0;
drv_ev_state[i].remove_cnt = 0;
drv_ev_state[i].type = ERTS_EV_TYPE_NONE;
@@ -362,11 +461,7 @@ grow_drv_ev_state(int min_ix)
static ERTS_INLINE void
abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type)
{
- if (is_nil(id)) {
- ASSERT(type == ERTS_EV_TYPE_NONE
- || !erts_port_task_is_scheduled(pthp));
- }
- else if (erts_port_task_is_scheduled(pthp)) {
+ if (is_not_nil(id) && erts_port_task_is_scheduled(pthp)) {
erts_port_task_abort(pthp);
ASSERT(erts_is_port_alive(id));
}
@@ -381,7 +476,7 @@ abort_tasks(ErtsDrvEventState *state, int mode)
#if ERTS_CIO_HAVE_DRV_EVENT
case ERTS_EV_TYPE_DRV_EV:
abort_task(state->driver.event->port,
- &state->driver.event->task,
+ &state->driver.event->iotask.task,
ERTS_EV_TYPE_DRV_EV);
return;
#endif
@@ -395,14 +490,14 @@ abort_tasks(ErtsDrvEventState *state, int mode)
case ERL_DRV_WRITE:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->outport,
- &state->driver.select->outtask,
+ &state->driver.select->outiotask.task,
state->type);
if (mode == ERL_DRV_WRITE)
break;
case ERL_DRV_READ:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->inport,
- &state->driver.select->intask,
+ &state->driver.select->iniotask.task,
state->type);
break;
default:
@@ -440,16 +535,14 @@ deselect(ErtsDrvEventState *state, int mode)
if (!(state->events)) {
switch (state->type) {
case ERTS_EV_TYPE_DRV_SEL:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
+ state->driver.select->inport = NIL;
+ state->driver.select->outport = NIL;
break;
#if ERTS_CIO_HAVE_DRV_EVENT
case ERTS_EV_TYPE_DRV_EV:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task));
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
+ state->driver.event->port = NIL;
+ state->driver.event->data = NULL;
+ state->driver.event->removed_events = (ErtsPollEvents) 0;
break;
#endif
case ERTS_EV_TYPE_NONE:
@@ -459,20 +552,297 @@ deselect(ErtsDrvEventState *state, int mode)
break;
}
- state->driver.select = NULL;
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
remember_removed(state, &pollset);
}
}
-
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0)
#else
# define IS_FD_UNKNOWN(state) ((state) == NULL)
#endif
+static ERTS_INLINE void
+check_fd_cleanup(ErtsDrvEventState *state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState **free_event,
+#endif
+ ErtsDrvSelectDataState **free_select)
+{
+ erts_aint_t current_cio_time;
+
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
+
+ current_cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ *free_select = NULL;
+ if (state->driver.select
+ && (state->type != ERTS_EV_TYPE_DRV_SEL)
+ && !is_iotask_active(&state->driver.select->iniotask, current_cio_time)
+ && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) {
+
+ *free_select = state->driver.select;
+ state->driver.select = NULL;
+ }
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ *free_event = NULL;
+ if (state->driver.event
+ && (state->type != ERTS_EV_TYPE_DRV_EV)
+ && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) {
+
+ *free_event = state->driver.event;
+ state->driver.event = NULL;
+ }
+#endif
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (((state->type != ERTS_EV_TYPE_NONE)
+ | state->remove_cnt
+#if ERTS_CIO_HAVE_DRV_EVENT
+ | (state->driver.event != NULL)
+#endif
+ | (state->driver.select != NULL)) == 0) {
+
+ hash_erase_drv_ev_state(state);
+
+ }
+#endif
+}
+
+static ERTS_INLINE int
+check_cleanup_active_fd(ErtsSysFdType fd,
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollControlEntry *pce,
+ int *pce_ix,
+#endif
+ erts_aint_t current_cio_time)
+{
+ ErtsDrvEventState *state;
+ int active = 0;
+ erts_smp_mtx_t *mtx = fd_mtx(fd);
+ void *free_select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ void *free_event = NULL;
+#endif
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents evon = 0, evoff = 0;
+#endif
+
+ erts_smp_mtx_lock(mtx);
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ state = &drv_ev_state[(int) fd];
+#else
+ state = hash_get_drv_ev_state(fd); /* may be NULL! */
+ if (state)
+#endif
+ {
+ if (state->driver.select) {
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) {
+ active = 1;
+ if ((state->events & ERTS_POLL_EV_IN)
+ && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) {
+ evoff |= ERTS_POLL_EV_IN;
+ state->flags |= ERTS_EV_FLAG_DEFER_IN_EV;
+ }
+ }
+ else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) {
+ if (state->events & ERTS_POLL_EV_IN)
+ evon |= ERTS_POLL_EV_IN;
+ state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV;
+ }
+ if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) {
+ active = 1;
+ if ((state->events & ERTS_POLL_EV_OUT)
+ && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) {
+ evoff |= ERTS_POLL_EV_OUT;
+ state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV;
+ }
+ }
+ else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) {
+ if (state->events & ERTS_POLL_EV_OUT)
+ evon |= ERTS_POLL_EV_OUT;
+ state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV;
+ }
+ if (active)
+ (void) 0;
+ else
+#else
+ if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)
+ || is_iotask_active(&state->driver.select->outiotask, current_cio_time))
+ active = 1;
+ else
+#endif
+ if (state->type != ERTS_EV_TYPE_DRV_SEL) {
+ free_select = state->driver.select;
+ state->driver.select = NULL;
+ }
+ }
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event) {
+ if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) {
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events;
+ if (evs) {
+ evoff |= evs;
+ state->driver.event->deferred_events |= evs;
+ }
+#endif
+ active = 1;
+ }
+ else if (state->type != ERTS_EV_TYPE_DRV_EV) {
+ free_event = state->driver.event;
+ state->driver.event = NULL;
+ }
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ else {
+ ErtsPollEvents evs = state->events & state->driver.event->deferred_events;
+ if (evs) {
+ evon |= evs;
+ state->driver.event->deferred_events = 0;
+ }
+ }
+#endif
+
+ }
+#endif
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0)
+ hash_erase_drv_ev_state(state);
+#endif
+
+ }
+
+ erts_smp_mtx_unlock(mtx);
+
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
+
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ if (evoff) {
+ ErtsPollControlEntry *pcep = &pce[(*pce_ix)++];
+ pcep->fd = fd;
+ pcep->events = evoff;
+ pcep->on = 0;
+ }
+ if (evon) {
+ ErtsPollControlEntry *pcep = &pce[(*pce_ix)++];
+ pcep->fd = fd;
+ pcep->events = evon;
+ pcep->on = 1;
+ }
+#endif
+
+ return active;
+}
+
+static void
+check_cleanup_active_fds(erts_aint_t current_cio_time)
+{
+ int six = pollset.active_fd.six;
+ int eix = pollset.active_fd.eix;
+ erts_aint32_t no = erts_smp_atomic32_read_dirty(&pollset.active_fd.no);
+ int size = pollset.active_fd.size;
+ int ix = six;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ /* every fd might add two entries */
+ Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no;
+ ErtsPollControlEntry *pctrl_entries = (pce_sz
+ ? erts_alloc(ERTS_ALC_T_TMP, pce_sz)
+ : NULL);
+ int pctrl_ix = 0;
+#endif
+
+ while (ix != eix) {
+ ErtsSysFdType fd = pollset.active_fd.array[ix];
+ int nix = ix + 1;
+ if (nix >= size)
+ nix = 0;
+ ASSERT(fd != ERTS_SYS_FD_INVALID);
+ if (!check_cleanup_active_fd(fd,
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ pctrl_entries,
+ &pctrl_ix,
+#endif
+ current_cio_time)) {
+ no--;
+ if (ix == six) {
+#ifdef DEBUG
+ pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID;
+#endif
+ six = nix;
+ }
+ else {
+ pollset.active_fd.array[ix] = pollset.active_fd.array[six];
+#ifdef DEBUG
+ pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID;
+#endif
+ six++;
+ if (six >= size)
+ six = 0;
+ }
+ }
+ ix = nix;
+ }
+
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry));
+ if (pctrl_ix)
+ ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix);
+ if (pctrl_entries)
+ erts_free(ERTS_ALC_T_TMP, pctrl_entries);
+#endif
+
+ pollset.active_fd.six = six;
+ pollset.active_fd.eix = eix;
+ erts_smp_atomic32_set_relb(&pollset.active_fd.no, no);
+}
+
+static ERTS_INLINE void
+add_active_fd(ErtsSysFdType fd)
+{
+ int eix = pollset.active_fd.eix;
+ int size = pollset.active_fd.size;
+
+
+ pollset.active_fd.array[eix] = fd;
+
+ erts_smp_atomic32_set_relb(&pollset.active_fd.no,
+ (erts_smp_atomic32_read_dirty(&pollset.active_fd.no)
+ + 1));
+
+ eix++;
+ if (eix >= size)
+ eix = 0;
+ if (pollset.active_fd.six == eix) {
+ pollset.active_fd.six = 0;
+ eix = size;
+ size += ERTS_ACTIVE_FD_INC;
+ pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR,
+ pollset.active_fd.array,
+ sizeof(ErtsSysFdType)*size);
+ pollset.active_fd.size = size;
+#ifdef DEBUG
+ {
+ int i;
+ for (i = eix + 1; i < size; i++)
+ pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID;
+ }
+#endif
+
+ }
+
+ pollset.active_fd.eix = eix;
+}
int
ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
@@ -489,6 +859,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
ErtsDrvEventState *state;
int wake_poller;
int ret;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *free_event = NULL;
+#endif
+ ErtsDrvSelectDataState *free_select = NULL;
#ifdef USE_VM_PROBES
DTRACE_CHARBUF(name, 64);
#endif
@@ -524,7 +898,8 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
/* fast track to stop_select callback */
stop_select_fn = prt->drv_ptr->stop_select;
#ifdef USE_VM_PROBES
- strncpy(name, prt->drv_ptr->name, sizeof(name)-1);
+ strncpy(name, prt->drv_ptr->name,
+ sizeof(DTRACE_CHARBUF_NAME(name))-1);
name[sizeof(name)-1] = '\0';
#endif
ret = 0;
@@ -589,9 +964,9 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) {
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select);
- state->driver.select = NULL;
+ state->flags &= ~ERTS_EV_FLAG_USED;
+ state->driver.select->inport = NIL;
+ state->driver.select->outport = NIL;
}
ret = -1;
goto done;
@@ -609,18 +984,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
state->events = new_events;
if (ctl_events) {
if (on) {
- if (state->type == ERTS_EV_TYPE_NONE) {
- ErtsDrvSelectDataState *dsdsp
- = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
- sizeof(ErtsDrvSelectDataState));
- dsdsp->inport = NIL;
- dsdsp->outport = NIL;
- erts_port_task_handle_init(&dsdsp->intask);
- erts_port_task_handle_init(&dsdsp->outtask);
- ASSERT(state->driver.select == NULL);
- state->driver.select = dsdsp;
+ if (!state->driver.select)
+ state->driver.select = alloc_drv_select_data();
+ if (state->type == ERTS_EV_TYPE_NONE)
state->type = ERTS_EV_TYPE_DRV_SEL;
- }
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
if (ctl_events & ERTS_POLL_EV_IN)
state->driver.select->inport = id;
@@ -641,17 +1008,12 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
state->driver.select->outport = NIL;
}
if (new_events == 0) {
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
if (old_events != 0) {
remember_removed(state, &pollset);
}
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
- state->driver.select = NULL;
+ state->flags &= ~ERTS_EV_FLAG_USED;
}
/*else keep it, as fd will probably be selected upon again */
}
@@ -682,20 +1044,29 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
ret = 0;
-done:;
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
- }
+done:
+
+ check_fd_cleanup(state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ &free_event,
#endif
-done_unknown:
+ &free_select);
+
+done_unknown:
erts_smp_mtx_unlock(fd_mtx(fd));
if (stop_select_fn) {
int was_unmasked = erts_block_fpe();
DTRACE1(driver_stop_select, name);
+ LTTNG1(driver_stop_select, "unknown");
(*stop_select_fn)(e, NULL);
erts_unblock_fpe(was_unmasked);
}
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
return ret;
}
@@ -715,6 +1086,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
ErtsDrvEventState *state;
int do_wake = 0;
int ret;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *free_event;
+#endif
+ ErtsDrvSelectDataState *free_select;
Port *prt = erts_drvport2port(ix);
if (prt == ERTS_INVALID_ERL_DRV_PORT)
@@ -795,10 +1170,8 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
state->driver.event->removed_events |= remove_events;
}
else {
- state->driver.event
- = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
- sizeof(ErtsDrvEventDataState));
- erts_port_task_handle_init(&state->driver.event->task);
+ if (!state->driver.event)
+ state->driver.event = alloc_drv_event_data();
state->driver.event->port = id;
state->driver.event->removed_events = (ErtsPollEvents) 0;
state->type = ERTS_EV_TYPE_DRV_EV;
@@ -808,10 +1181,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
else {
if (state->type == ERTS_EV_TYPE_DRV_EV) {
abort_tasks(state, 0);
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
+ state->driver.event->port = NIL;
+ state->driver.event->data = NULL;
+ state->driver.event->removed_events = (ErtsPollEvents) 0;
}
- state->driver.select = NULL;
state->type = ERTS_EV_TYPE_NONE;
remember_removed(state, &pollset);
}
@@ -821,12 +1194,22 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
ret = 0;
done:
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
- }
+
+ check_fd_cleanup(state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ &free_event,
#endif
+ &free_select);
+
erts_smp_mtx_unlock(fd_mtx(fd));
+
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
+
return ret;
#endif
}
@@ -894,7 +1277,7 @@ print_driver_name(erts_dsprintf_buf_t *dsbufp, Eterm id)
static void
steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
{
- erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) state->fd);
+ erts_dsprintf(dsbufp, "stealing control of fd=%d from ", (int) GET_FD(state->fd));
switch (state->type) {
case ERTS_EV_TYPE_DRV_SEL: {
int deselect_mode = 0;
@@ -918,7 +1301,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
if (deselect_mode)
deselect(state, deselect_mode);
else {
- erts_dsprintf(dsbufp, "no one", (int) state->fd);
+ erts_dsprintf(dsbufp, "no one", (int) GET_FD(state->fd));
ASSERT(0);
}
erts_dsprintf(dsbufp, "\n");
@@ -946,7 +1329,7 @@ steal(erts_dsprintf_buf_t *dsbufp, ErtsDrvEventState *state, int mode)
break;
}
default:
- erts_dsprintf(dsbufp, "no one\n", (int) state->fd);
+ erts_dsprintf(dsbufp, "no one\n", (int) GET_FD(state->fd));
ASSERT(0);
}
}
@@ -960,7 +1343,7 @@ print_select_op(erts_dsprintf_buf_t *dsbufp,
"driver_select(%p, %d,%s%s%s%s, %d) "
"by ",
ix,
- (int) fd,
+ (int) GET_FD(fd),
mode & ERL_DRV_READ ? " ERL_DRV_READ" : "",
mode & ERL_DRV_WRITE ? " ERL_DRV_WRITE" : "",
mode & ERL_DRV_USE ? " ERL_DRV_USE" : "",
@@ -1010,7 +1393,7 @@ steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
ASSERT(state->type == ERTS_EV_TYPE_STOP_USE);
erts_dsprintf(dsbufp, "failed: fd=%d (re)selected before stop_select "
"was called for driver %s\n",
- (int) state->fd, state->driver.drv_ptr->name);
+ (int) GET_FD(state->fd), state->driver.drv_ptr->name);
erts_send_error_to_logger_nogl(dsbufp);
if (on) {
@@ -1019,7 +1402,7 @@ steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
* In either case stop_select should not be called.
*/
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
if (state->driver.drv_ptr->handle) {
erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
}
@@ -1091,38 +1474,103 @@ event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data
#endif
#endif
+static ERTS_INLINE int
+io_task_schedule_allowed(ErtsDrvEventState *state,
+ ErtsPortTaskType type,
+ erts_aint_t current_cio_time)
+{
+ ErtsIoTask *io_task;
+
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+ if (!state->driver.select)
+ return 0;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ return 0;
+#endif
+ io_task = &state->driver.select->iniotask;
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
+ if (!state->driver.select)
+ return 0;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ return 0;
+#endif
+ io_task = &state->driver.select->outiotask;
+ break;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_PORT_TASK_EVENT:
+ if (!state->driver.event)
+ return 0;
+ if (state->driver.select)
+ return 0;
+ io_task = &state->driver.event->iotask;
+ break;
+#endif
+ default:
+ ERTS_INTERNAL_ERROR("Invalid I/O-task type");
+ return 0;
+ }
+
+ return !is_iotask_active(io_task, current_cio_time);
+}
+
static ERTS_INLINE void
-iready(Eterm id, ErtsDrvEventState *state)
+iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->intask,
- ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_READ);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_INPUT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.select->iniotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_INPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_READ);
+ }
+ add_active_fd(state->fd);
}
}
static ERTS_INLINE void
-oready(Eterm id, ErtsDrvEventState *state)
+oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->outtask,
- ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_WRITE);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_OUTPUT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.select->outiotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_OUTPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_WRITE);
+ }
+ add_active_fd(state->fd);
}
}
#if ERTS_CIO_HAVE_DRV_EVENT
static ERTS_INLINE void
-eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data)
+eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data,
+ erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.event->task,
- ERTS_PORT_TASK_EVENT,
- (ErlDrvEvent) state->fd,
- event_data) != 0) {
- stale_drv_select(id, state, 0);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_EVENT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.event->iotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_EVENT,
+ (ErlDrvEvent) state->fd,
+ event_data) != 0) {
+ stale_drv_select(id, state, 0);
+ }
+ add_active_fd(state->fd);
}
}
#endif
@@ -1145,18 +1593,22 @@ ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set)
void
ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set,
- erts_short_time_t msec)
+ ErtsMonotonicTime timeout_time)
{
- ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, msec);
+ ERTS_CIO_POLL_INTR_TMD(pollset.ps, set, timeout_time);
}
void
ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
{
- ErtsPollResFd pollres[256];
+ ErtsPollResFd *pollres;
int pollres_len;
- SysTimeval wait_time;
+ ErtsMonotonicTime timeout_time;
int poll_ret, i;
+ erts_aint_t current_cio_time;
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+
+ ASSERT(esdp);
restart:
@@ -1166,28 +1618,37 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
#endif
/* Figure out timeout value */
- if (do_wait) {
- erts_time_remaining(&wait_time);
- } else { /* poll only */
- wait_time.tv_sec = 0;
- wait_time.tv_usec = 0;
- }
+ timeout_time = (do_wait
+ ? erts_check_next_timeout_time(esdp)
+ : ERTS_POLL_NO_TIMEOUT /* poll only */);
+
+ /*
+ * No need for an atomic inc op when incrementing
+ * erts_check_io_time, since only one thread can
+ * check io at a time.
+ */
+ current_cio_time = erts_smp_atomic_read_dirty(&erts_check_io_time);
+ current_cio_time++;
+ erts_smp_atomic_set_relb(&erts_check_io_time, current_cio_time);
+
+ check_cleanup_active_fds(current_cio_time);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
- pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd);
+
+ pollres_len = erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN;
+
+ pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len);
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1);
- poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, &wait_time);
+ poll_ret = ERTS_CIO_POLL_WAIT(pollset.ps, pollres, &pollres_len, timeout_time);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
- erts_deliver_time(); /* sync the machine's idea of time */
-
#ifdef ERTS_BREAK_REQUESTED
if (ERTS_BREAK_REQUESTED)
erts_do_break_handling();
@@ -1196,6 +1657,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if (poll_ret != 0) {
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
forget_removed(&pollset);
+ erts_free(ERTS_ALC_T_TMP, pollres);
if (poll_ret == EAGAIN) {
goto restart;
}
@@ -1255,15 +1717,15 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if ((revents & ERTS_POLL_EV_IN)
|| (!(revents & ERTS_POLL_EV_OUT)
&& state->events & ERTS_POLL_EV_IN)) {
- iready(state->driver.select->inport, state);
+ iready(state->driver.select->inport, state, current_cio_time);
}
else if (state->events & ERTS_POLL_EV_OUT) {
- oready(state->driver.select->outport, state);
+ oready(state->driver.select->outport, state, current_cio_time);
}
}
else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
if (revents & ERTS_POLL_EV_OUT) {
- oready(state->driver.select->outport, state);
+ oready(state->driver.select->outport, state, current_cio_time);
}
/* Someone might have deselected input since revents
was read (true also on the non-smp emulator since
@@ -1271,7 +1733,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
revents... */
revents &= ~(~state->events & ERTS_POLL_EV_IN);
if (revents & ERTS_POLL_EV_IN) {
- iready(state->driver.select->inport, state);
+ iready(state->driver.select->inport, state, current_cio_time);
}
}
else if (revents & ERTS_POLL_EV_NVAL) {
@@ -1279,6 +1741,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
state->driver.select->inport,
state->driver.select->outport,
state->events);
+ add_active_fd(state->fd);
}
break;
}
@@ -1296,8 +1759,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if (revents) {
event_data->events = state->events;
event_data->revents = revents;
-
- eready(state->driver.event->port, state, event_data);
+ eready(state->driver.event->port, state, event_data, current_cio_time);
}
break;
}
@@ -1315,6 +1777,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
(int) state->type);
ASSERT(0);
deselect(state, 0);
+ add_active_fd(state->fd);
break;
}
}
@@ -1326,6 +1789,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
}
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
+ erts_free(ERTS_ALC_T_TMP, pollres);
forget_removed(&pollset);
}
@@ -1395,6 +1859,7 @@ stale_drv_select(Eterm id, ErtsDrvEventState *state, int mode)
}
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+
static SafeHashValue drv_ev_state_hash(void *des)
{
SafeHashValue val = (SafeHashValue) ((ErtsDrvEventState *) des)->fd;
@@ -1440,10 +1905,27 @@ static void drv_ev_state_free(void *des)
void
ERTS_CIO_EXPORT(erts_init_check_io)(void)
{
+ erts_smp_atomic_init_nob(&erts_check_io_time, 0);
erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0);
+
ERTS_CIO_POLL_INIT();
pollset.ps = ERTS_CIO_NEW_POLLSET();
+ pollset.active_fd.six = 0;
+ pollset.active_fd.eix = 0;
+ erts_smp_atomic32_init_nob(&pollset.active_fd.no, 0);
+ pollset.active_fd.size = ERTS_ACTIVE_FD_INC;
+ pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR,
+ sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC);
+#ifdef DEBUG
+ {
+ int i;
+ for (i = 0; i < ERTS_ACTIVE_FD_INC; i++)
+ pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID;
+ }
+#endif
+
+
#ifdef ERTS_SMP
init_removed_fd_alloc();
pollset.removed_list = NULL;
@@ -1519,12 +2001,27 @@ Eterm
ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
{
Process *p = (Process *) proc;
- Eterm tags[15], values[15], res;
+ Eterm tags[16], values[16], res;
Uint sz, *szp, *hp, **hpp, memory_size;
Sint i;
ErtsPollInfo pi;
-
- ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+ erts_aint_t cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ int active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no);
+
+ while (1) {
+ erts_aint_t post_cio_time;
+ int post_active_fds;
+
+ ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+
+ post_cio_time = erts_smp_atomic_read_mb(&erts_check_io_time);
+ post_active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no);
+ if (cio_time == post_cio_time && active_fds == post_active_fds)
+ break;
+ cio_time = post_cio_time;
+ active_fds = post_active_fds;
+ }
+
memory_size = pi.memory_size;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len);
@@ -1588,6 +2085,9 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
tags[i] = erts_bld_atom(hpp, szp, "max_fds");
values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds);
+ tags[i] = erts_bld_atom(hpp, szp, "active_fds");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds);
+
#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups");
values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups);
@@ -1642,6 +2142,8 @@ print_events(ErtsPollEvents ev)
typedef struct {
int used_fds;
int num_errors;
+ int no_driver_select_structs;
+ int no_driver_event_structs;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
int internal_fds;
ErtsPollEvents *epep;
@@ -1664,6 +2166,13 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
struct stat stat_buf;
#endif
+ if (state->driver.select)
+ counters->no_driver_select_structs++;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ counters->no_driver_event_structs++;
+#endif
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
if (state->events || ep_events) {
if (ep_events & ERTS_POLL_EV_NVAL) {
@@ -1802,6 +2311,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
}
}
}
+#if ERTS_CIO_HAVE_DRV_EVENT
else if (state->type == ERTS_EV_TYPE_DRV_EV) {
Eterm id;
erts_printf("driver_event ");
@@ -1837,6 +2347,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
erts_free_port_names(pnp);
}
}
+#endif
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
else if (internal) {
erts_printf("internal ");
@@ -1876,18 +2387,24 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
}
int
-ERTS_CIO_EXPORT(erts_check_io_debug)(void)
+ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip)
{
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
int fd, len;
#endif
IterDebugCounters counters;
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
ErtsDrvEventState null_des;
null_des.driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ null_des.driver.event = NULL;
+#endif
+ null_des.driver.drv_ptr = NULL;
null_des.events = 0;
null_des.remove_cnt = 0;
null_des.type = ERTS_EV_TYPE_NONE;
+#endif
erts_printf("--- fds in pollset --------------------------------------\n");
@@ -1904,6 +2421,8 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
#endif
counters.used_fds = 0;
counters.num_errors = 0;
+ counters.no_driver_select_structs = 0;
+ counters.no_driver_event_structs = 0;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
len = erts_smp_atomic_read_nob(&drv_ev_state_len);
@@ -1920,8 +2439,16 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
erts_smp_thr_progress_unblock();
+ ciodip->no_used_fds = counters.used_fds;
+ ciodip->no_driver_select_structs = counters.no_driver_select_structs;
+ ciodip->no_driver_event_structs = counters.no_driver_event_structs;
+
erts_printf("\n");
erts_printf("used fds=%d\n", counters.used_fds);
+ erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs);
+#endif
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
erts_printf("internal fds=%d\n", counters.internal_fds);
#endif
@@ -1930,6 +2457,7 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
erts_free(ERTS_ALC_T_TMP, (void *) counters.epep);
#endif
+
return counters.num_errors;
}
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index edab7947ba..14f1ea3f43 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -26,6 +27,7 @@
#ifndef ERL_CHECK_IO_H__
#define ERL_CHECK_IO_H__
+#include "sys.h"
#include "erl_sys_driver.h"
#ifdef ERTS_ENABLE_KERNEL_POLL
@@ -46,14 +48,14 @@ void erts_check_io_async_sig_interrupt_nkp(void);
#endif
void erts_check_io_interrupt_kp(int);
void erts_check_io_interrupt_nkp(int);
-void erts_check_io_interrupt_timed_kp(int, erts_short_time_t);
-void erts_check_io_interrupt_timed_nkp(int, erts_short_time_t);
+void erts_check_io_interrupt_timed_kp(int, ErtsMonotonicTime);
+void erts_check_io_interrupt_timed_nkp(int, ErtsMonotonicTime);
void erts_check_io_kp(int);
void erts_check_io_nkp(int);
void erts_init_check_io_kp(void);
void erts_init_check_io_nkp(void);
-int erts_check_io_debug_kp(void);
-int erts_check_io_debug_nkp(void);
+int erts_check_io_debug_kp(ErtsCheckIoDebugInfo *);
+int erts_check_io_debug_nkp(ErtsCheckIoDebugInfo *);
#else /* !ERTS_ENABLE_KERNEL_POLL */
@@ -64,12 +66,33 @@ int erts_check_io_max_files(void);
void erts_check_io_async_sig_interrupt(void);
#endif
void erts_check_io_interrupt(int);
-void erts_check_io_interrupt_timed(int, erts_short_time_t);
+void erts_check_io_interrupt_timed(int, ErtsMonotonicTime);
void erts_check_io(int);
void erts_init_check_io(void);
#endif
+extern erts_smp_atomic_t erts_check_io_time;
+
+typedef struct {
+ ErtsPortTaskHandle task;
+ erts_smp_atomic_t executed_time;
+} ErtsIoTask;
+
+ERTS_GLB_INLINE void erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE void
+erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp)
+{
+ ErtsIoTask *itp = (ErtsIoTask *) (((char *) pthp) - offsetof(ErtsIoTask, task));
+ erts_aint_t ci_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ erts_smp_atomic_set_relb(&itp->executed_time, ci_time);
+}
+
+#endif
+
#endif /* ERL_CHECK_IO_H__ */
#if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__)
@@ -81,6 +104,16 @@ void erts_init_check_io(void);
#include "erl_poll.h"
#include "erl_port_task.h"
+#ifdef __WIN32__
+/*
+ * Current erts_poll implementation for Windows cannot handle
+ * active events in the set of events polled.
+ */
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
+#else
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 0
+#endif
+
/*
* ErtsDrvEventDataState is used by driver_event() which is almost never
* used. We allocate ErtsDrvEventDataState separate since we dont wan't
@@ -91,13 +124,16 @@ typedef struct {
Eterm port;
ErlDrvEventData data;
ErtsPollEvents removed_events;
- ErtsPortTaskHandle task;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents deferred_events;
+#endif
+ ErtsIoTask iotask;
} ErtsDrvEventDataState;
typedef struct {
Eterm inport;
Eterm outport;
- ErtsPortTaskHandle intask;
- ErtsPortTaskHandle outtask;
+ ErtsIoTask iniotask;
+ ErtsIoTask outiotask;
} ErtsDrvSelectDataState;
#endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */
diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c
new file mode 100644
index 0000000000..7bbb406f29
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mmap.c
@@ -0,0 +1,2918 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_process.h"
+#include "erl_smp.h"
+#include "atom.h"
+#include "erl_mmap.h"
+#include <stddef.h>
+
+#if HAVE_ERTS_MMAP
+
+/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
+
+#if defined(DEBUG) || 0
+# undef ERTS_MMAP_DEBUG
+# define ERTS_MMAP_DEBUG
+# ifndef ERTS_MMAP_OP_RINGBUF_SZ
+# define ERTS_MMAP_OP_RINGBUF_SZ 100
+# endif
+#endif
+
+#ifndef ERTS_MMAP_OP_RINGBUF_SZ
+# define ERTS_MMAP_OP_RINGBUF_SZ 0
+#endif
+
+/* #define ERTS_MMAP_DEBUG_FILL_AREAS */
+
+#ifdef ERTS_MMAP_DEBUG
+# define ERTS_MMAP_ASSERT ERTS_ASSERT
+#else
+# define ERTS_MMAP_ASSERT(A) ((void) 1)
+#endif
+
+/*
+ * `mm->sa.bot` and `mm->sua.top` are read only after
+ * initialization, but the other pointers are not; i.e., only
+ * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
+ */
+#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
+ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
+ < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot))
+#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
+ (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
+ (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
+ < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot)))
+#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
+ (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
+ (((UWord) (PTR)) - ((UWord) mm->sua.bot) \
+ < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot)))
+
+UWord erts_page_inv_mask;
+
+#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
+# undef RBT_DEBUG
+# define RBT_DEBUG
+#endif
+#ifdef RBT_DEBUG
+# define RBT_ASSERT ERTS_ASSERT
+# define IF_RBT_DEBUG(C) C
+#else
+# define RBT_ASSERT(x)
+# define IF_RBT_DEBUG(C)
+#endif
+
+typedef struct RBTNode_ RBTNode;
+struct RBTNode_ {
+ UWord parent_and_color; /* color in bit 0 of parent ptr */
+ RBTNode *left;
+ RBTNode *right;
+};
+
+#define RED_FLG (1)
+#define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG))
+#define IS_BLACK(N) (!IS_RED(N))
+#define SET_RED(N) ((N)->parent_and_color |= RED_FLG)
+#define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG)
+
+static ERTS_INLINE RBTNode* parent(RBTNode* node)
+{
+ return (RBTNode*) (node->parent_and_color & ~RED_FLG);
+}
+
+static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent)
+{
+ RBT_ASSERT(!((UWord)parent & RED_FLG));
+ node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG);
+}
+
+static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color)
+{
+ RBT_ASSERT(!((UWord)parent & RED_FLG));
+ RBT_ASSERT(!(color & ~RED_FLG));
+ return ((UWord)parent) | color;
+}
+
+
+enum SortOrder {
+ ADDR_ORDER, /* only address order */
+ SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */
+ SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */
+};
+#ifdef HARD_DEBUG
+static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"};
+#endif
+
+typedef struct {
+ RBTNode* root;
+ enum SortOrder order;
+}RBTree;
+
+#ifdef HARD_DEBUG
+# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE)
+# define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ)
+static int rbt_assert_is_member(RBTNode* root, RBTNode* node);
+static RBTNode* check_tree(RBTree* tree, Uint);
+#else
+# define HARD_CHECK_IS_MEMBER(ROOT,NODE)
+# define HARD_CHECK_TREE(TREE,SZ)
+#endif
+
+#if ERTS_MMAP_OP_RINGBUF_SZ
+
+static int mmap_op_ix;
+
+typedef enum {
+ ERTS_OP_TYPE_NONE,
+ ERTS_OP_TYPE_MMAP,
+ ERTS_OP_TYPE_MUNMAP,
+ ERTS_OP_TYPE_MREMAP
+} ErtsMMapOpType;
+
+typedef struct {
+ ErtsMMapOpType type;
+ void *result;
+ UWord in_size;
+ UWord out_size;
+ void *old_ptr;
+ UWord old_size;
+} ErtsMMapOp;
+
+static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
+
+#define ERTS_MMAP_OP_RINGBUF_INIT() \
+ do { \
+ int ix__; \
+ for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\
+ mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ } \
+ mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
+ } while (0)
+
+#define ERTS_MMAP_OP_START(SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = (SZ); \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ } while (0)
+
+#define ERTS_MMAP_OP_END(PTR, SZ) \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].result = (PTR); \
+ mmap_ops[ix__].out_size = (SZ); \
+ } while (0)
+
+#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
+ do { \
+ erts_smp_mtx_lock(&mm->mtx); \
+ ERTS_MMAP_OP_START((IN_SZ)); \
+ ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
+ erts_smp_mtx_unlock(&mm->mtx); \
+ } while (0)
+
+#define ERTS_MUNMAP_OP(PTR, SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = (PTR); \
+ mmap_ops[ix__].old_size = (SZ); \
+ } while (0)
+
+#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
+ do { \
+ erts_smp_mtx_lock(&mm->mtx); \
+ ERTS_MUNMAP_OP((PTR), (SZ)); \
+ erts_smp_mtx_unlock(&mm->mtx); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = (IN_SZ); \
+ mmap_ops[ix__].out_size = (OLD_SZ); \
+ mmap_ops[ix__].old_ptr = (OLD_PTR); \
+ mmap_ops[ix__].old_size = (OLD_SZ); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_END(PTR, SZ) \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].result = (PTR); \
+ mmap_ops[mmap_op_ix].out_size = (SZ); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
+ do { \
+ erts_smp_mtx_lock(&mm->mtx); \
+ ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
+ ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
+ erts_smp_mtx_unlock(&mm->mtx); \
+ } while (0)
+
+#define ERTS_MMAP_OP_ABORT() \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ if (--mmap_op_ix < 0) \
+ mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
+ } while (0)
+
+#else
+
+#define ERTS_MMAP_OP_RINGBUF_INIT()
+#define ERTS_MMAP_OP_START(SZ)
+#define ERTS_MMAP_OP_END(PTR, SZ)
+#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ)
+#define ERTS_MUNMAP_OP(PTR, SZ)
+#define ERTS_MUNMAP_OP_LCK(PTR, SZ)
+#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ)
+#define ERTS_MREMAP_OP_END(PTR, SZ)
+#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ)
+#define ERTS_MMAP_OP_ABORT()
+
+#endif
+
+typedef struct {
+ RBTNode snode; /* node in 'stree' */
+ RBTNode anode; /* node in 'atree' */
+ char* start;
+ char* end;
+}ErtsFreeSegDesc;
+
+typedef struct {
+ RBTree stree; /* size ordered tree */
+ RBTree atree; /* address ordered tree */
+ Uint nseg;
+}ErtsFreeSegMap;
+
+struct ErtsMemMapper_ {
+ int (*reserve_physical)(char *, UWord, int exec);
+ void (*unreserve_physical)(char *, UWord);
+ int supercarrier;
+ int no_os_mmap;
+ int executable; /* is client a native code allocator? */
+ /*
+ * Super unaligned area is located above super aligned
+ * area. That is, `sa.bot` is beginning of the super
+ * carrier, `sua.top` is the end of the super carrier,
+ * and sa.top and sua.bot moves towards eachother.
+ */
+ struct {
+ char *top;
+ char *bot;
+ ErtsFreeSegMap map;
+ } sua;
+ struct {
+ char *top;
+ char *bot;
+ ErtsFreeSegMap map;
+ } sa;
+#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
+ int mmap_fd;
+#endif
+ erts_smp_mtx_t mtx;
+ struct {
+ char *free_list;
+ char *unused_start;
+ char *unused_end;
+ char *new_area_hint;
+ Uint reserved;
+ } desc;
+ struct {
+ UWord free_seg_descs;
+ struct {
+ UWord curr;
+ UWord max;
+ } free_segs;
+ } no;
+ struct {
+ struct {
+ UWord total;
+ struct {
+ UWord total;
+ UWord sa;
+ UWord sua;
+ } used;
+ } supercarrier;
+ struct {
+ UWord used;
+ } os;
+ } size;
+};
+
+ErtsMemMapper erts_dflt_mmapper;
+
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ErtsMemMapper erts_literal_mmapper;
+char* erts_literals_start;
+UWord erts_literals_size;
+#endif
+
+#ifdef ERTS_ALC_A_EXEC
+ErtsMemMapper erts_exec_mmapper;
+#endif
+
+
+
+#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
+ do { \
+ mm->size.supercarrier.used.total += (SZ); \
+ mm->size.supercarrier.used.sa += (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
+ <= mm->size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa \
+ <= mm->size.supercarrier.used.total); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
+ mm->size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa >= (SZ)); \
+ mm->size.supercarrier.used.sa -= (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
+ do { \
+ mm->size.supercarrier.used.total += (SZ); \
+ mm->size.supercarrier.used.sua += (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
+ <= mm->size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua \
+ <= mm->size.supercarrier.used.total); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
+ mm->size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua >= (SZ)); \
+ mm->size.supercarrier.used.sua -= (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_OS_INC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mm->size.os.used + (SZ) >= (SZ)); \
+ mm->size.os.used += (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_OS_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mm->size.os.used >= (SZ)); \
+ mm->size.os.used -= (SZ); \
+ } while (0)
+
+
+static void
+add_free_desc_area(ErtsMemMapper* mm, char *start, char *end)
+{
+ ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
+ if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
+ UWord no;
+ ErtsFreeSegDesc *prev_desc, *desc;
+ char *desc_end;
+
+ no = 1;
+ prev_desc = (ErtsFreeSegDesc *) start;
+ prev_desc->start = mm->desc.free_list;
+ desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
+ desc_end = start + 2*sizeof(ErtsFreeSegDesc);
+
+ while (desc_end <= end) {
+ desc->start = (char *) prev_desc;
+ prev_desc = desc;
+ desc = (ErtsFreeSegDesc *) desc_end;
+ desc_end += sizeof(ErtsFreeSegDesc);
+ no++;
+ }
+ mm->desc.free_list = (char *) prev_desc;
+ mm->no.free_seg_descs += no;
+ }
+}
+
+static ErtsFreeSegDesc *
+add_unused_free_desc_area(ErtsMemMapper* mm)
+{
+ char *ptr;
+ if (!mm->desc.unused_start)
+ return NULL;
+
+ ERTS_MMAP_ASSERT(mm->desc.unused_end);
+ ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
+ <= mm->desc.unused_end - mm->desc.unused_start);
+
+ ptr = mm->desc.unused_start + ERTS_PAGEALIGNED_SIZE;
+ add_free_desc_area(mm, mm->desc.unused_start, ptr);
+
+ if ((mm->desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
+ mm->desc.unused_start = ptr;
+ else
+ mm->desc.unused_end = mm->desc.unused_start = NULL;
+
+ ERTS_MMAP_ASSERT(mm->desc.free_list);
+ return (ErtsFreeSegDesc *) mm->desc.free_list;
+}
+
+static ERTS_INLINE ErtsFreeSegDesc *
+alloc_desc(ErtsMemMapper* mm)
+{
+ ErtsFreeSegDesc *res;
+ res = (ErtsFreeSegDesc *) mm->desc.free_list;
+ if (!res) {
+ res = add_unused_free_desc_area(mm);
+ if (!res)
+ return NULL;
+ }
+ mm->desc.free_list = res->start;
+ ASSERT(mm->no.free_segs.curr < mm->no.free_seg_descs);
+ mm->no.free_segs.curr++;
+ if (mm->no.free_segs.max < mm->no.free_segs.curr)
+ mm->no.free_segs.max = mm->no.free_segs.curr;
+ return res;
+}
+
+static ERTS_INLINE void
+free_desc(ErtsMemMapper* mm, ErtsFreeSegDesc *desc)
+{
+ desc->start = mm->desc.free_list;
+ mm->desc.free_list = (char *) desc;
+ ERTS_MMAP_ASSERT(mm->no.free_segs.curr > 0);
+ mm->no.free_segs.curr--;
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
+{
+ return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode));
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode)
+{
+ return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode));
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node)
+{
+ return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node);
+}
+
+static ERTS_INLINE SWord usable_size(enum SortOrder order,
+ ErtsFreeSegDesc* desc)
+{
+ return ((order == SA_SZ_ADDR_ORDER) ?
+ ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start)
+ : desc->end - desc->start);
+}
+
+#ifdef HARD_DEBUG
+static ERTS_INLINE SWord cmp_nodes(enum SortOrder order,
+ RBTNode* lhs, RBTNode* rhs)
+{
+ ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs);
+ ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs);
+ RBT_ASSERT(lhs != rhs);
+ if (order != ADDR_ORDER) {
+ SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc);
+ if (diff) return diff;
+ }
+ if (order != SZ_REVERSE_ADDR_ORDER) {
+ return (char*)ldesc->start - (char*)rdesc->start;
+ }
+ else {
+ return (char*)rdesc->start - (char*)ldesc->start;
+ }
+}
+#endif /* HARD_DEBUG */
+
+static ERTS_INLINE SWord cmp_with_node(enum SortOrder order,
+ SWord sz, char* addr, RBTNode* rhs)
+{
+ ErtsFreeSegDesc* rdesc;
+ if (order != ADDR_ORDER) {
+ SWord diff;
+ rdesc = snode_to_desc(rhs);
+ diff = sz - usable_size(order, rdesc);
+ if (diff) return diff;
+ }
+ else
+ rdesc = anode_to_desc(rhs);
+
+ if (order != SZ_REVERSE_ADDR_ORDER)
+ return addr - (char*)rdesc->start;
+ else
+ return (char*)rdesc->start - addr;
+}
+
+
+static ERTS_INLINE void
+left_rotate(RBTNode **root, RBTNode *x)
+{
+ RBTNode *y = x->right;
+ x->right = y->left;
+ if (y->left)
+ set_parent(y->left, x);
+ set_parent(y, parent(x));
+ if (!parent(y)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->left)
+ parent(x)->left = y;
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ parent(x)->right = y;
+ }
+ y->left = x;
+ set_parent(x, y);
+}
+
+static ERTS_INLINE void
+right_rotate(RBTNode **root, RBTNode *x)
+{
+ RBTNode *y = x->left;
+ x->left = y->right;
+ if (y->right)
+ set_parent(y->right, x);
+ set_parent(y, parent(x));
+ if (!parent(y)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->right)
+ parent(x)->right = y;
+ else {
+ RBT_ASSERT(x == parent(x)->left);
+ parent(x)->left = y;
+ }
+ y->right = x;
+ set_parent(x, y);
+}
+
+/*
+ * Replace node x with node y
+ * NOTE: segment descriptor of y is not changed
+ */
+static ERTS_INLINE void
+replace(RBTNode **root, RBTNode *x, RBTNode *y)
+{
+
+ if (!parent(x)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->left)
+ parent(x)->left = y;
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ parent(x)->right = y;
+ }
+ if (x->left) {
+ RBT_ASSERT(parent(x->left) == x);
+ set_parent(x->left, y);
+ }
+ if (x->right) {
+ RBT_ASSERT(parent(x->right) == x);
+ set_parent(x->right, y);
+ }
+
+ y->parent_and_color = x->parent_and_color;
+ y->right = x->right;
+ y->left = x->left;
+}
+
+static void
+tree_insert_fixup(RBTNode** root, RBTNode *node)
+{
+ RBTNode *x = node, *y, *papa_x, *granpa_x;
+
+ /*
+ * Rearrange the tree so that it satisfies the Red-Black Tree properties
+ */
+
+ papa_x = parent(x);
+ RBT_ASSERT(x != *root && IS_RED(papa_x));
+ do {
+
+ /*
+ * x and its parent are both red. Move the red pair up the tree
+ * until we get to the root or until we can separate them.
+ */
+
+ granpa_x = parent(papa_x);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(granpa_x);
+
+ if (papa_x == granpa_x->left) {
+ y = granpa_x->right;
+ if (IS_RED(y)) {
+ SET_BLACK(y);
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ x = granpa_x;
+ }
+ else {
+
+ if (x == papa_x->right) {
+ left_rotate(root, papa_x);
+ papa_x = x;
+ x = papa_x->left;
+ }
+
+ RBT_ASSERT(x == granpa_x->left->left);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(papa_x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(IS_BLACK(y));
+
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ right_rotate(root, granpa_x);
+
+ RBT_ASSERT(x == parent(x)->left);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(parent(x)->right));
+ RBT_ASSERT(IS_BLACK(parent(x)));
+ break;
+ }
+ }
+ else {
+ RBT_ASSERT(papa_x == granpa_x->right);
+ y = granpa_x->left;
+ if (IS_RED(y)) {
+ SET_BLACK(y);
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ x = granpa_x;
+ }
+ else {
+
+ if (x == papa_x->left) {
+ right_rotate(root, papa_x);
+ papa_x = x;
+ x = papa_x->right;
+ }
+
+ RBT_ASSERT(x == granpa_x->right->right);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(papa_x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(IS_BLACK(y));
+
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ left_rotate(root, granpa_x);
+
+ RBT_ASSERT(x == parent(x)->right);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(parent(x)->left));
+ RBT_ASSERT(IS_BLACK(parent(x)));
+ break;
+ }
+ }
+ } while (x != *root && (papa_x=parent(x), IS_RED(papa_x)));
+
+ SET_BLACK(*root);
+}
+
+static void
+rbt_delete(RBTree* tree, RBTNode* del)
+{
+ Uint spliced_is_black;
+ RBTNode *x, *y, *z = del, *papa_y;
+ RBTNode null_x; /* null_x is used to get the fixup started when we
+ splice out a node without children. */
+
+ HARD_CHECK_IS_MEMBER(tree->root, del);
+ HARD_CHECK_TREE(tree, 0);
+
+ null_x.parent_and_color = parent_and_color(NULL, !RED_FLG);
+
+ /* Remove node from tree... */
+
+ /* Find node to splice out */
+ if (!z->left || !z->right)
+ y = z;
+ else
+ /* Set y to z:s successor */
+ for(y = z->right; y->left; y = y->left);
+ /* splice out y */
+ x = y->left ? y->left : y->right;
+ spliced_is_black = IS_BLACK(y);
+ papa_y = parent(y);
+ if (x) {
+ set_parent(x, papa_y);
+ }
+ else if (spliced_is_black) {
+ x = &null_x;
+ x->right = x->left = NULL;
+ x->parent_and_color = parent_and_color(papa_y, !RED_FLG);
+ y->left = x;
+ }
+
+ if (!papa_y) {
+ RBT_ASSERT(tree->root == y);
+ tree->root = x;
+ }
+ else {
+ if (y == papa_y->left) {
+ papa_y->left = x;
+ }
+ else {
+ RBT_ASSERT(y == papa_y->right);
+ papa_y->right = x;
+ }
+ }
+ if (y != z) {
+ /* We spliced out the successor of z; replace z by the successor */
+ RBT_ASSERT(z != &null_x);
+ replace(&tree->root, z, y);
+ }
+
+ if (spliced_is_black) {
+ RBTNode* papa_x;
+ /* We removed a black node which makes the resulting tree
+ violate the Red-Black Tree properties. Fixup tree... */
+
+ papa_x = parent(x);
+ while (IS_BLACK(x) && papa_x) {
+
+ /*
+ * x has an "extra black" which we move up the tree
+ * until we reach the root or until we can get rid of it.
+ *
+ * y is the sibbling of x
+ */
+
+ if (x == papa_x->left) {
+ y = papa_x->right;
+ RBT_ASSERT(y);
+ if (IS_RED(y)) {
+ RBT_ASSERT(y->right);
+ RBT_ASSERT(y->left);
+ SET_BLACK(y);
+ RBT_ASSERT(IS_BLACK(papa_x));
+ SET_RED(papa_x);
+ left_rotate(&tree->root, papa_x);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->right;
+ }
+ RBT_ASSERT(y);
+ RBT_ASSERT(IS_BLACK(y));
+ if (IS_BLACK(y->left) && IS_BLACK(y->right)) {
+ SET_RED(y);
+ }
+ else {
+ if (IS_BLACK(y->right)) {
+ SET_BLACK(y->left);
+ SET_RED(y);
+ right_rotate(&tree->root, y);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->right;
+ }
+ RBT_ASSERT(y);
+ if (IS_RED(papa_x)) {
+
+ SET_BLACK(papa_x);
+ SET_RED(y);
+ }
+ RBT_ASSERT(y->right);
+ SET_BLACK(y->right);
+ left_rotate(&tree->root, papa_x);
+ x = tree->root;
+ break;
+ }
+ }
+ else {
+ RBT_ASSERT(x == papa_x->right);
+ y = papa_x->left;
+ RBT_ASSERT(y);
+ if (IS_RED(y)) {
+ RBT_ASSERT(y->right);
+ RBT_ASSERT(y->left);
+ SET_BLACK(y);
+ RBT_ASSERT(IS_BLACK(papa_x));
+ SET_RED(papa_x);
+ right_rotate(&tree->root, papa_x);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->left;
+ }
+ RBT_ASSERT(y);
+ RBT_ASSERT(IS_BLACK(y));
+ if (IS_BLACK(y->right) && IS_BLACK(y->left)) {
+ SET_RED(y);
+ }
+ else {
+ if (IS_BLACK(y->left)) {
+ SET_BLACK(y->right);
+ SET_RED(y);
+ left_rotate(&tree->root, y);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->left;
+ }
+ RBT_ASSERT(y);
+ if (IS_RED(papa_x)) {
+ SET_BLACK(papa_x);
+ SET_RED(y);
+ }
+ RBT_ASSERT(y->left);
+ SET_BLACK(y->left);
+ right_rotate(&tree->root, papa_x);
+ x = tree->root;
+ break;
+ }
+ }
+ x = papa_x;
+ papa_x = parent(x);
+ }
+ SET_BLACK(x);
+
+ papa_x = parent(&null_x);
+ if (papa_x) {
+ if (papa_x->left == &null_x)
+ papa_x->left = NULL;
+ else {
+ RBT_ASSERT(papa_x->right == &null_x);
+ papa_x->right = NULL;
+ }
+ RBT_ASSERT(!null_x.left);
+ RBT_ASSERT(!null_x.right);
+ }
+ else if (tree->root == &null_x) {
+ tree->root = NULL;
+ RBT_ASSERT(!null_x.left);
+ RBT_ASSERT(!null_x.right);
+ }
+ }
+ HARD_CHECK_TREE(tree, 0);
+}
+
+
+static void
+rbt_insert(RBTree* tree, RBTNode* node)
+{
+#ifdef RBT_DEBUG
+ ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL;
+#endif
+ ErtsFreeSegDesc* desc = node_to_desc(tree->order, node);
+ char* seg_addr = desc->start;
+ SWord seg_sz = desc->end - desc->start;
+
+ HARD_CHECK_TREE(tree, 0);
+
+ node->left = NULL;
+ node->right = NULL;
+
+ if (!tree->root) {
+ node->parent_and_color = parent_and_color(NULL, !RED_FLG);
+ tree->root = node;
+ }
+ else {
+ RBTNode *x = tree->root;
+ while (1) {
+ SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x);
+ if (diff < 0) {
+ IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x));
+ if (!x->left) {
+ node->parent_and_color = parent_and_color(x, RED_FLG);
+ x->left = node;
+ break;
+ }
+ x = x->left;
+ }
+ else {
+ RBT_ASSERT(diff > 0);
+ IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x));
+ if (!x->right) {
+ node->parent_and_color = parent_and_color(x, RED_FLG);
+ x->right = node;
+ break;
+ }
+ x = x->right;
+ }
+ }
+
+ RBT_ASSERT(parent(node));
+#ifdef RBT_DEBUG
+ if (tree->order == ADDR_ORDER) {
+ RBT_ASSERT(!dbg_under || dbg_under->end < desc->start);
+ RBT_ASSERT(!dbg_over || dbg_over->start > desc->end);
+ }
+#endif
+ RBT_ASSERT(IS_RED(node));
+ if (IS_RED(parent(node)))
+ tree_insert_fixup(&tree->root, node);
+ }
+ HARD_CHECK_TREE(tree, 0);
+}
+
+/*
+ * Traverse tree in (reverse) sorting order
+ */
+static void
+rbt_foreach_node(RBTree* tree,
+ void (*fn)(RBTNode*,void*),
+ void* arg, int reverse)
+{
+#ifdef HARD_DEBUG
+ Sint blacks = -1;
+ Sint curr_blacks = 1;
+ Uint depth = 1;
+ Uint max_depth = 0;
+ Uint node_cnt = 0;
+#endif
+ enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state;
+ RBTNode *x = tree->root;
+
+ RBT_ASSERT(!x || !parent(x));
+
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ while (x) {
+ switch (state) {
+ case RECURSE_LEFT:
+ if (x->left) {
+ RBT_ASSERT(parent(x->left) == x);
+ #ifdef HARD_DEBUG
+ ++depth;
+ if (IS_BLACK(x->left))
+ curr_blacks++;
+ #endif
+ x = x->left;
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ }
+ else {
+ #ifdef HARD_DEBUG
+ if (blacks < 0)
+ blacks = curr_blacks;
+ RBT_ASSERT(blacks == curr_blacks);
+ #endif
+ state = reverse ? RETURN_TO_PARENT : DO_NODE;
+ }
+ break;
+
+ case DO_NODE:
+ #ifdef HARD_DEBUG
+ ++node_cnt;
+ if (depth > max_depth)
+ max_depth = depth;
+ #endif
+ (*fn) (x, arg); /* Do it! */
+ state = reverse ? RECURSE_LEFT : RECURSE_RIGHT;
+ break;
+
+ case RECURSE_RIGHT:
+ if (x->right) {
+ RBT_ASSERT(parent(x->right) == x);
+ #ifdef HARD_DEBUG
+ ++depth;
+ if (IS_BLACK(x->right))
+ curr_blacks++;
+ #endif
+ x = x->right;
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ }
+ else {
+ #ifdef HARD_DEBUG
+ if (blacks < 0)
+ blacks = curr_blacks;
+ RBT_ASSERT(blacks == curr_blacks);
+ #endif
+ state = reverse ? DO_NODE : RETURN_TO_PARENT;
+ }
+ break;
+
+ case RETURN_TO_PARENT:
+ #ifdef HARD_DEBUG
+ if (IS_BLACK(x))
+ curr_blacks--;
+ --depth;
+ #endif
+ if (parent(x)) {
+ if (x == parent(x)->left) {
+ state = reverse ? RETURN_TO_PARENT : DO_NODE;
+ }
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ state = reverse ? DO_NODE : RETURN_TO_PARENT;
+ }
+ }
+ x = parent(x);
+ break;
+ }
+ }
+#ifdef HARD_DEBUG
+ RBT_ASSERT(depth == 0 || (!tree->root && depth==1));
+ RBT_ASSERT(curr_blacks == 0);
+ RBT_ASSERT((1 << (max_depth/2)) <= node_cnt);
+#endif
+}
+
+#if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG)
+static RBTNode* rbt_prev_node(RBTNode* node)
+{
+ RBTNode* x;
+ if (node->left) {
+ for (x=node->left; x->right; x=x->right)
+ ;
+ return x;
+ }
+ for (x=node; parent(x); x=parent(x)) {
+ if (parent(x)->right == x)
+ return parent(x);
+ }
+ return NULL;
+}
+static RBTNode* rbt_next_node(RBTNode* node)
+{
+ RBTNode* x;
+ if (node->right) {
+ for (x=node->right; x->left; x=x->left)
+ ;
+ return x;
+ }
+ for (x=node; parent(x); x=parent(x)) {
+ if (parent(x)->left == x)
+ return parent(x);
+ }
+ return NULL;
+}
+#endif /* RBT_DEBUG || HARD_DEBUG_MSEG */
+
+
+/* The API to keep track of a bunch of separated (free) segments
+ (non-overlapping and non-adjacent).
+ */
+static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder);
+static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end,
+ ErtsFreeSegDesc** under, ErtsFreeSegDesc** over);
+static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
+static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
+static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*);
+static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz);
+
+
+static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order)
+{
+ map->atree.root = NULL;
+ map->atree.order = ADDR_ORDER;
+ map->stree.root = NULL;
+ map->stree.order = order;
+ map->nseg = 0;
+}
+
+/* Lookup directly adjacent free segments to the given area [start->end].
+ * The given area must not contain any free segments.
+ */
+static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end,
+ ErtsFreeSegDesc** under, ErtsFreeSegDesc** over)
+{
+ RBTNode* x = map->atree.root;
+
+ *under = NULL;
+ *over = NULL;
+ while (x) {
+ if (start < anode_to_desc(x)->start) {
+ RBT_ASSERT(end <= anode_to_desc(x)->start);
+ if (end == anode_to_desc(x)->start) {
+ RBT_ASSERT(!*over);
+ *over = anode_to_desc(x);
+ }
+ x = x->left;
+ }
+ else {
+ RBT_ASSERT(start >= anode_to_desc(x)->end);
+ if (start == anode_to_desc(x)->end) {
+ RBT_ASSERT(!*under);
+ *under = anode_to_desc(x);
+ }
+ x = x->right;
+ }
+ }
+}
+
+/* Initialize 'desc' and insert as new free segment [start->end].
+ * The new segment must not contain or be adjacent to any free segment in 'map'.
+ */
+static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
+ char* start, char* end)
+{
+ desc->start = start;
+ desc->end = end;
+ rbt_insert(&map->atree, &desc->anode);
+ rbt_insert(&map->stree, &desc->snode);
+ map->nseg++;
+}
+
+/* Resize existing free segment 'desc' to [start->end].
+ * The new segment location must overlap the old location and
+ * it must not contain or be adjacent to any other free segment in 'map'.
+ */
+static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
+ char* start, char* end)
+{
+#ifdef RBT_DEBUG
+ RBTNode* prev = rbt_prev_node(&desc->anode);
+ RBTNode* next = rbt_next_node(&desc->anode);
+ RBT_ASSERT(!prev || anode_to_desc(prev)->end < start);
+ RBT_ASSERT(!next || anode_to_desc(next)->start > end);
+#endif
+ rbt_delete(&map->stree, &desc->snode);
+ desc->start = start;
+ desc->end = end;
+ rbt_insert(&map->stree, &desc->snode);
+}
+
+/* Delete existing free segment 'desc' from 'map'.
+ */
+static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc)
+{
+ rbt_delete(&map->atree, &desc->anode);
+ rbt_delete(&map->stree, &desc->snode);
+ map->nseg--;
+}
+
+/* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes.
+ */
+static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz)
+{
+ RBTNode* x = map->stree.root;
+ ErtsFreeSegDesc* best_desc = NULL;
+ const enum SortOrder order = map->stree.order;
+
+ while (x) {
+ ErtsFreeSegDesc* desc = snode_to_desc(x);
+ SWord seg_sz = usable_size(order, desc);
+
+ if (seg_sz < need_sz) {
+ x = x->right;
+ }
+ else {
+ best_desc = desc;
+ x = x->left;
+ }
+ }
+ return best_desc;
+}
+
+struct build_arg_t
+{
+ Process* p;
+ Eterm* hp;
+ Eterm acc;
+};
+
+static void build_free_seg_tuple(RBTNode* node, void* arg)
+{
+ struct build_arg_t* a = (struct build_arg_t*)arg;
+ ErtsFreeSegDesc* desc = anode_to_desc(node);
+ Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start);
+ Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end);
+ Eterm tpl = TUPLE2(a->hp, start, end);
+
+ a->hp += 3;
+ a->acc = CONS(a->hp, tpl, a->acc);
+ a->hp += 2;
+}
+
+static
+Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
+{
+ struct build_arg_t barg;
+ Eterm* hp_end;
+ const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */
+
+ barg.p = p;
+ barg.hp = HAlloc(p, may_need);
+ hp_end = barg.hp + may_need;
+ barg.acc = NIL;
+ rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1);
+
+ RBT_ASSERT(barg.hp <= hp_end);
+ HRelease(p, hp_end, barg.hp);
+ return barg.acc;
+}
+
+#if ERTS_HAVE_OS_MMAP
+/* Implementation of os_mmap()/os_munmap()/os_mremap()... */
+
+#if HAVE_MMAP
+# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
+# define ERTS_MMAP_PROT_EXEC (PROT_READ|PROT_WRITE|PROT_EXEC)
+# if defined(MAP_ANONYMOUS)
+# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
+# define ERTS_MMAP_FD (-1)
+# elif defined(MAP_ANON)
+# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
+# define ERTS_MMAP_FD (-1)
+# else
+# define ERTS_MMAP_FLAGS (MAP_PRIVATE)
+# define ERTS_MMAP_FD mm->mmap_fd
+# endif
+#endif
+
+static ERTS_INLINE void *
+os_mmap(void *hint_ptr, UWord size, int try_superalign, int executable)
+{
+#if HAVE_MMAP
+ const int prot = executable ? ERTS_MMAP_PROT_EXEC : ERTS_MMAP_PROT;
+ void *res;
+#ifdef MAP_ALIGN
+ if (try_superalign)
+ res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, prot,
+ ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
+ else
+#endif
+ res = mmap((void *) hint_ptr, size, prot,
+ ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == MAP_FAILED)
+ return NULL;
+ return res;
+#elif HAVE_VIRTUALALLOC
+ const DWORD prot = executable ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
+ return (void *) VirtualAlloc(NULL, (SIZE_T) size,
+ MEM_COMMIT|MEM_RESERVE, prot);
+#else
+# error "missing mmap() or similar"
+#endif
+}
+
+static ERTS_INLINE void
+os_munmap(void *ptr, UWord size)
+{
+#if HAVE_MMAP
+#ifdef ERTS_MMAP_DEBUG
+ int res =
+#endif
+ munmap(ptr, size);
+ ERTS_MMAP_ASSERT(res == 0);
+#elif HAVE_VIRTUALALLOC
+#ifdef DEBUG
+ BOOL res =
+#endif
+ VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE);
+ ERTS_MMAP_ASSERT(res != 0);
+#else
+# error "missing munmap() or similar"
+#endif
+}
+
+#ifdef ERTS_HAVE_OS_MREMAP
+# if HAVE_MREMAP
+# if defined(__NetBSD__)
+# define ERTS_MREMAP_FLAGS (0)
+# else
+# define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE)
+# endif
+# endif
+static ERTS_INLINE void *
+os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
+{
+ void *new_seg;
+#if HAVE_MREMAP
+ new_seg = mremap(ptr, (size_t) old_size,
+# if defined(__NetBSD__)
+ NULL,
+# endif
+ (size_t) new_size, ERTS_MREMAP_FLAGS);
+ if (new_seg == (void *) MAP_FAILED)
+ return NULL;
+ return new_seg;
+#else
+# error "missing mremap() or similar"
+#endif
+}
+#endif
+
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+#if HAVE_MMAP
+
+#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
+#define ERTS_MMAP_RESERVE_PROT_EXEC (ERTS_MMAP_PROT_EXEC)
+#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
+#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
+#if defined(__FreeBSD__)
+#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
+#else
+#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
+#endif /* __FreeBSD__ */
+#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
+#if defined(__FreeBSD__)
+#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS)
+#else
+#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
+#endif /* __FreeBSD__ */
+
+static int
+os_reserve_physical(char *ptr, UWord size, int exec)
+{
+ const int prot = exec ? ERTS_MMAP_RESERVE_PROT_EXEC : ERTS_MMAP_RESERVE_PROT;
+ void *res = mmap((void *) ptr, (size_t) size, prot,
+ ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ return 0;
+ return 1;
+}
+
+static void
+os_unreserve_physical(char *ptr, UWord size)
+{
+ void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
+ ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
+}
+
+static void *
+os_mmap_virtual(char *ptr, UWord size, int exec)
+{
+ int flags = ERTS_MMAP_VIRTUAL_FLAGS;
+ void* res;
+
+#ifdef ERTS_ALC_A_EXEC
+ if (exec) {
+ ASSERT(!ptr);
+ /* OTP-19.0: Nice hack below cut-and-pasted from hipe_amd64.c */
+
+# ifdef MAP_32BIT
+ /* If we got MAP_32BIT (Linux), then use that to ask for low memory */
+ flags |= MAP_32BIT;
+# else
+ /* FreeBSD doesn't have MAP_32BIT, and it doesn't respect
+ a plain map_hint (returns high mappings even though the
+ hint refers to a free area), so we have to use both map_hint
+ and MAP_FIXED to get addresses below the 2GB boundary.
+ This is even worse than the Linux/ppc64 case.
+ Similarly, Solaris 10 doesn't have MAP_32BIT,
+ and it doesn't respect a plain map_hint. */
+ ptr = (char*)(512*1024*1024); /* 0.5GB */
+
+# if defined(__FreeBSD__) || defined(__sun__)
+ flags |= MAP_FIXED;
+# endif
+# endif /* !MAP_32BIT */
+ }
+#else /* !ERTS_ALC_A_EXEC */
+ ASSERT(!exec);
+#endif
+ res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
+ flags, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ return NULL;
+ return res;
+}
+
+#else
+#error "Missing reserve/unreserve physical memory implementation"
+#endif
+#endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */
+
+#endif /* ERTS_HAVE_OS_MMAP */
+
+static int reserve_noop(char *ptr, UWord size, int exec)
+{
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ Uint32 *uip, *end = (Uint32 *) (ptr + size);
+
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef);
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ *uip = (Uint32) 0xfeedfeed;
+#endif
+ return 1;
+}
+
+static void unreserve_noop(char *ptr, UWord size)
+{
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ Uint32 *uip, *end = (Uint32 *) (ptr + size);
+
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ *uip = (Uint32) 0xdeadbeef;
+#endif
+}
+
+static UWord
+alloc_desc_insert_free_seg(ErtsMemMapper* mm,
+ ErtsFreeSegMap *map, char* start, char* end)
+{
+ char *ptr;
+ ErtsFreeSegMap *da_map;
+ ErtsFreeSegDesc *desc = alloc_desc(mm);
+ if (desc) {
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+
+ /*
+ * Ahh; ran out of free segment descriptors.
+ *
+ * First try to map a new page...
+ */
+
+#if ERTS_HAVE_OS_MMAP
+ if (!mm->no_os_mmap) {
+ ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0, 0);
+ if (ptr) {
+ mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
+ ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
+ add_free_desc_area(mm, ptr, ptr+ERTS_PAGEALIGNED_SIZE);
+ desc = alloc_desc(mm);
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+ }
+#endif
+
+ /*
+ * ...then try to find a good place in the supercarrier...
+ */
+ da_map = &mm->sua.map;
+ desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
+ if (desc) {
+ if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0))
+ ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
+ else
+ desc = NULL;
+
+ }
+ else {
+ da_map = &mm->sa.map;
+ desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
+ if (desc) {
+ if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE, 0))
+ ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
+ else
+ desc = NULL;
+ }
+ }
+ if (desc) {
+ char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
+ add_free_desc_area(mm, desc->start, da_end);
+ if (da_end != desc->end)
+ resize_free_seg(da_map, desc, da_end, desc->end);
+ else {
+ delete_free_seg(da_map, desc);
+ free_desc(mm, desc);
+ }
+
+ desc = alloc_desc(mm);
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+
+ /*
+ * ... and then as last resort use the first page of the
+ * free segment we are trying to insert for free descriptors.
+ */
+ ptr = start + ERTS_PAGEALIGNED_SIZE;
+ ERTS_MMAP_ASSERT(ptr <= end);
+
+ add_free_desc_area(mm, start, ptr);
+
+ if (ptr != end) {
+ desc = alloc_desc(mm);
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, ptr, end);
+ }
+
+ return ERTS_PAGEALIGNED_SIZE;
+}
+
+void *
+erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep)
+{
+ char *seg;
+ UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
+
+ /* Map in premapped supercarrier */
+ if (mm->supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
+ char *end;
+ ErtsFreeSegDesc *desc;
+ Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ erts_smp_mtx_lock(&mm->mtx);
+
+ ERTS_MMAP_OP_START(*sizep);
+
+ if (!superaligned) {
+ desc = lookup_free_seg(&mm->sua.map, asize);
+ if (desc) {
+ seg = desc->start;
+ end = seg+asize;
+ if (!mm->reserve_physical(seg, asize, mm->executable))
+ goto supercarrier_reserve_failure;
+ if (desc->end == end) {
+ delete_free_seg(&mm->sua.map, desc);
+ free_desc(mm, desc);
+ }
+ else {
+ ERTS_MMAP_ASSERT(end < desc->end);
+ resize_free_seg(&mm->sua.map, desc, end, desc->end);
+ }
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ goto supercarrier_success;
+ }
+
+ if (asize <= mm->sua.bot - mm->sa.top) {
+ if (!mm->reserve_physical(mm->sua.bot - asize, asize,
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ mm->sua.bot -= asize;
+ seg = mm->sua.bot;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ goto supercarrier_success;
+ }
+ }
+
+ asize = ERTS_SUPERALIGNED_CEILING(asize);
+
+ desc = lookup_free_seg(&mm->sa.map, asize);
+ if (desc) {
+ char *start = seg = desc->start;
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
+ end = seg+asize;
+ if (!mm->reserve_physical(start, (UWord) (end - start),
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize);
+ if (desc->end == end) {
+ if (start != seg)
+ resize_free_seg(&mm->sa.map, desc, start, seg);
+ else {
+ delete_free_seg(&mm->sa.map, desc);
+ free_desc(mm, desc);
+ }
+ }
+ else {
+ ERTS_MMAP_ASSERT(end < desc->end);
+ resize_free_seg(&mm->sa.map, desc, end, desc->end);
+ if (start != seg) {
+ UWord ad_sz;
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
+ start, seg);
+ start += ad_sz;
+ if (start != seg)
+ mm->unreserve_physical(start, (UWord) (seg - start));
+ }
+ }
+ goto supercarrier_success;
+ }
+
+ if (superaligned) {
+ char *start = mm->sa.top;
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
+
+ if (asize + (seg - start) <= mm->sua.bot - start) {
+ end = seg + asize;
+ if (!mm->reserve_physical(start, (UWord) (end - start),
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ mm->sa.top = end;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize);
+ if (start != seg) {
+ UWord ad_sz;
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
+ start, seg);
+ start += ad_sz;
+ if (start != seg)
+ mm->unreserve_physical(start, (UWord) (seg - start));
+ }
+ goto supercarrier_success;
+ }
+
+ desc = lookup_free_seg(&mm->sua.map, asize + ERTS_SUPERALIGNED_SIZE);
+ if (desc) {
+ char *org_start = desc->start;
+ char *org_end = desc->end;
+
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
+ end = seg + asize;
+ if (!mm->reserve_physical(seg, (UWord) (org_end - seg),
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ if (org_start != seg) {
+ ERTS_MMAP_ASSERT(org_start < seg);
+ resize_free_seg(&mm->sua.map, desc, org_start, seg);
+ desc = NULL;
+ }
+ if (end != org_end) {
+ UWord ad_sz = 0;
+ ERTS_MMAP_ASSERT(end < org_end);
+ if (desc)
+ resize_free_seg(&mm->sua.map, desc, end, org_end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
+ end, org_end);
+ end += ad_sz;
+ if (end != org_end)
+ mm->unreserve_physical(end,
+ (UWord) (org_end - end));
+ }
+ goto supercarrier_success;
+ }
+ }
+
+ ERTS_MMAP_OP_ABORT();
+ erts_smp_mtx_unlock(&mm->mtx);
+ }
+
+#if ERTS_HAVE_OS_MMAP
+ /* Map using OS primitives */
+ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) {
+ if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
+ seg = os_mmap(NULL, asize, 0, mm->executable);
+ if (!seg)
+ goto failure;
+ }
+ else {
+ asize = ERTS_SUPERALIGNED_CEILING(*sizep);
+ seg = os_mmap(NULL, asize, 1, mm->executable);
+ if (!seg)
+ goto failure;
+
+ if (!ERTS_IS_SUPERALIGNED(seg)) {
+ char *ptr;
+ UWord sz;
+
+ os_munmap(seg, asize);
+
+ ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1,
+ mm->executable);
+ if (!ptr)
+ goto failure;
+
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr);
+ sz = (UWord) (seg - ptr);
+ ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE);
+ if (sz)
+ os_munmap(ptr, sz);
+ sz = ERTS_SUPERALIGNED_SIZE - sz;
+ if (sz)
+ os_munmap(seg+asize, sz);
+ }
+ }
+
+ ERTS_MMAP_OP_LCK(seg, *sizep, asize);
+ ERTS_MMAP_SIZE_OS_INC(asize);
+ *sizep = asize;
+ return (void *) seg;
+ }
+failure:
+#endif
+ ERTS_MMAP_OP_LCK(NULL, *sizep, 0);
+ *sizep = 0;
+ return NULL;
+
+supercarrier_success:
+
+#ifdef ERTS_MMAP_DEBUG
+ if (ERTS_MMAPFLG_SUPERALIGNED & flags) {
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg));
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
+ }
+ else {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+ }
+#endif
+
+ ERTS_MMAP_OP_END(seg, asize);
+ erts_smp_mtx_unlock(&mm->mtx);
+
+ *sizep = asize;
+ return (void *) seg;
+
+supercarrier_reserve_failure:
+ erts_smp_mtx_unlock(&mm->mtx);
+ *sizep = 0;
+ return NULL;
+}
+
+void
+erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size)
+{
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
+
+ if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
+ ERTS_MMAP_ASSERT(!mm->no_os_mmap);
+#if ERTS_HAVE_OS_MMAP
+ ERTS_MUNMAP_OP_LCK(ptr, size);
+ ERTS_MMAP_SIZE_OS_DEC(size);
+ os_munmap(ptr, size);
+#endif
+ }
+ else {
+ char *start, *end;
+ ErtsFreeSegMap *map;
+ ErtsFreeSegDesc *prev, *next, *desc;
+ UWord ad_sz = 0;
+
+ ERTS_MMAP_ASSERT(mm->supercarrier);
+
+ start = (char *) ptr;
+ end = start + size;
+
+ erts_smp_mtx_lock(&mm->mtx);
+
+ ERTS_MUNMAP_OP(ptr, size);
+
+ if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+
+ map = &mm->sa.map;
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ ERTS_MMAP_SIZE_SC_SA_DEC(size);
+ if (end == mm->sa.top) {
+ ERTS_MMAP_ASSERT(!next);
+ if (prev) {
+ start = prev->start;
+ delete_free_seg(map, prev);
+ free_desc(mm, prev);
+ }
+ mm->sa.top = start;
+ goto supercarrier_success;
+ }
+ }
+ else {
+ map = &mm->sua.map;
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ ERTS_MMAP_SIZE_SC_SUA_DEC(size);
+ if (start == mm->sua.bot) {
+ ERTS_MMAP_ASSERT(!prev);
+ if (next) {
+ end = next->end;
+ delete_free_seg(map, next);
+ free_desc(mm, next);
+ }
+ mm->sua.bot = end;
+ goto supercarrier_success;
+ }
+ }
+
+ desc = NULL;
+
+ if (next) {
+ ERTS_MMAP_ASSERT(end < next->end);
+ end = next->end;
+ if (prev) {
+ delete_free_seg(map, next);
+ free_desc(mm, next);
+ goto save_prev;
+ }
+ desc = next;
+ } else if (prev) {
+ save_prev:
+ ERTS_MMAP_ASSERT(prev->start < start);
+ start = prev->start;
+ desc = prev;
+ }
+
+ if (desc)
+ resize_free_seg(map, desc, start, end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(mm, map, start, end);
+
+ supercarrier_success: {
+ UWord unres_sz;
+
+ ERTS_MMAP_ASSERT(size >= ad_sz);
+ unres_sz = size - ad_sz;
+ if (unres_sz)
+ mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
+
+ erts_smp_mtx_unlock(&mm->mtx);
+ }
+ }
+}
+
+static void *
+remap_move(ErtsMemMapper* mm,
+ Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+{
+ UWord size = *sizep;
+ void *new_ptr = erts_mmap(mm, flags, &size);
+ if (!new_ptr)
+ return NULL;
+ *sizep = size;
+ if (old_size < size)
+ size = old_size;
+ sys_memcpy(new_ptr, ptr, (size_t) size);
+ erts_munmap(mm, flags, ptr, old_size);
+ return new_ptr;
+}
+
+void *
+erts_mremap(ErtsMemMapper* mm,
+ Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+{
+ void *new_ptr;
+ Uint32 superaligned;
+ UWord asize;
+
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep));
+
+ if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
+
+ ERTS_MMAP_ASSERT(!mm->no_os_mmap);
+
+ if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mm->supercarrier) {
+ new_ptr = remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
+ ptr, old_size, sizep);
+ if (new_ptr)
+ return new_ptr;
+ }
+
+ if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) {
+ ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size);
+ return NULL;
+ }
+
+#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP
+ superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ if (superaligned) {
+ asize = ERTS_SUPERALIGNED_CEILING(*sizep);
+ if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) {
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+ }
+ else {
+ asize = ERTS_PAGEALIGNED_CEILING(*sizep);
+ if (asize == old_size) {
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+ }
+
+#if ERTS_HAVE_GENUINE_OS_MMAP
+ if (asize < old_size
+ && (!superaligned
+ || ERTS_IS_SUPERALIGNED(ptr))) {
+ UWord um_sz;
+ new_ptr = ((char *) ptr) + asize;
+ ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr);
+ um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr);
+ ERTS_MMAP_SIZE_OS_DEC(um_sz);
+ os_munmap(new_ptr, um_sz);
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+#endif
+#if ERTS_HAVE_OS_MREMAP
+ if (superaligned)
+ return remap_move(mm, flags, new_ptr, old_size, sizep);
+ else {
+ new_ptr = os_mremap(ptr, old_size, asize, 0);
+ if (!new_ptr)
+ return NULL;
+ if (asize > old_size)
+ ERTS_MMAP_SIZE_OS_INC(asize - old_size);
+ else
+ ERTS_MMAP_SIZE_OS_DEC(old_size - asize);
+ ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return new_ptr;
+ }
+#endif
+#endif
+ }
+ else { /* In super carrier */
+ char *start, *end, *new_end;
+ ErtsFreeSegMap *map;
+ ErtsFreeSegDesc *prev, *next;
+ UWord ad_sz = 0;
+
+ ERTS_MMAP_ASSERT(mm->supercarrier);
+
+ if (ERTS_MMAPFLG_OS_ONLY & flags)
+ return remap_move(mm, flags, ptr, old_size, sizep);
+
+ superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ asize = (superaligned
+ ? ERTS_SUPERALIGNED_CEILING(*sizep)
+ : ERTS_PAGEALIGNED_CEILING(*sizep));
+
+ erts_smp_mtx_lock(&mm->mtx);
+
+ if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
+ ? (!superaligned && lookup_free_seg(&mm->sua.map, asize))
+ : (superaligned && lookup_free_seg(&mm->sa.map, asize))) {
+ erts_smp_mtx_unlock(&mm->mtx);
+ /*
+ * Segment currently in wrong area (due to a previous memory
+ * shortage), move it to the right area.
+ * (remap_move() will succeed)
+ */
+ return remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
+ ptr, old_size, sizep);
+ }
+
+ ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
+
+ if (asize == old_size) {
+ new_ptr = ptr;
+ goto supercarrier_resize_success;
+ }
+
+ start = (char *) ptr;
+ end = start + old_size;
+ new_end = start+asize;
+
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+
+ if (asize < old_size) {
+ UWord unres_sz;
+ new_ptr = ptr;
+ if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+ map = &mm->sua.map;
+ ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
+ }
+ else {
+ if (end == mm->sa.top) {
+ mm->sa.top = new_end;
+ mm->unreserve_physical(((char *) ptr) + asize,
+ old_size - asize);
+ goto supercarrier_resize_success;
+ }
+ ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
+ map = &mm->sa.map;
+ }
+
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ if (next)
+ resize_free_seg(map, next, new_end, next->end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(mm, map, new_end, end);
+ ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
+ unres_sz = old_size - asize - ad_sz;
+ if (unres_sz)
+ mm->unreserve_physical(((char *) ptr) + asize + ad_sz,
+ unres_sz);
+ goto supercarrier_resize_success;
+ }
+
+ if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+
+ adjacent_free_seg(&mm->sua.map, start, end, &prev, &next);
+
+ if (next && new_end <= next->end) {
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ if (new_end < next->end)
+ resize_free_seg(&mm->sua.map, next, new_end, next->end);
+ else {
+ delete_free_seg(&mm->sua.map, next);
+ free_desc(mm, next);
+ }
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ else { /* Superaligned area */
+
+ if (end == mm->sa.top) {
+ if (new_end <= mm->sua.bot) {
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ mm->sa.top = new_end;
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ else {
+ adjacent_free_seg(&mm->sa.map, start, end, &prev, &next);
+ if (next && new_end <= next->end) {
+ if (!mm->reserve_physical(((char *) ptr) + old_size,
+ asize - old_size,
+ mm->executable))
+ goto supercarrier_reserve_failure;
+ if (new_end < next->end)
+ resize_free_seg(&mm->sa.map, next, new_end, next->end);
+ else {
+ delete_free_seg(&mm->sa.map, next);
+ free_desc(mm, next);
+ }
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ }
+
+ ERTS_MMAP_OP_ABORT();
+ erts_smp_mtx_unlock(&mm->mtx);
+
+ /* Failed to resize... */
+ }
+
+ return remap_move(mm, flags, ptr, old_size, sizep);
+
+supercarrier_resize_success:
+
+#ifdef ERTS_MMAP_DEBUG
+ if ((ERTS_MMAPFLG_SUPERALIGNED & flags)
+ || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) {
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
+ }
+ else {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+ }
+#endif
+
+ ERTS_MREMAP_OP_END(new_ptr, asize);
+ erts_smp_mtx_unlock(&mm->mtx);
+
+ *sizep = asize;
+ return new_ptr;
+
+supercarrier_reserve_failure:
+ ERTS_MREMAP_OP_END(NULL, old_size);
+ erts_smp_mtx_unlock(&mm->mtx);
+ *sizep = old_size;
+ return NULL;
+
+}
+
+int erts_mmap_in_supercarrier(ErtsMemMapper* mm, void *ptr)
+{
+ return ERTS_MMAP_IN_SUPERCARRIER(ptr);
+}
+
+static struct {
+ Eterm options;
+ Eterm total;
+ Eterm total_sa;
+ Eterm total_sua;
+ Eterm used;
+ Eterm used_sa;
+ Eterm used_sua;
+ Eterm max;
+ Eterm allocated;
+ Eterm reserved;
+ Eterm sizes;
+ Eterm free_segs;
+ Eterm supercarrier;
+ Eterm os;
+ Eterm scs;
+ Eterm sco;
+ Eterm scrpm;
+ Eterm scrfsd;
+
+ int is_initialized;
+ erts_mtx_t init_mutex;
+}am;
+
+static void ERTS_INLINE atom_init(Eterm *atom, char *name)
+{
+ *atom = am_atom_put(name, strlen(name));
+}
+#define AM_INIT(AM) atom_init(&am.AM, #AM)
+
+static void init_atoms(void)
+{
+ erts_mtx_lock(&am.init_mutex);
+
+ if (!am.is_initialized) {
+ AM_INIT(options);
+ AM_INIT(total);
+ AM_INIT(total_sa);
+ AM_INIT(total_sua);
+ AM_INIT(used);
+ AM_INIT(used_sa);
+ AM_INIT(used_sua);
+ AM_INIT(max);
+ AM_INIT(allocated);
+ AM_INIT(reserved);
+ AM_INIT(sizes);
+ AM_INIT(free_segs);
+ AM_INIT(supercarrier);
+ AM_INIT(os);
+ AM_INIT(scs);
+ AM_INIT(sco);
+ AM_INIT(scrpm);
+ AM_INIT(scrfsd);
+ am.is_initialized = 1;
+ }
+ erts_mtx_unlock(&am.init_mutex);
+};
+
+
+#ifdef HARD_DEBUG_MSEG
+static void hard_dbg_mseg_init(void);
+#endif
+
+void
+erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init, int executable)
+{
+ static int is_first_call = 1;
+ int virtual_map = 0;
+ char *start = NULL, *end = NULL;
+ UWord pagesize;
+#if defined(__WIN32__)
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ pagesize = (UWord) sysinfo.dwPageSize;
+#elif defined(_SC_PAGESIZE)
+ pagesize = (UWord) sysconf(_SC_PAGESIZE);
+#elif defined(HAVE_GETPAGESIZE)
+ pagesize = (UWord) getpagesize();
+#else
+# error "Do not know how to get page size"
+#endif
+#if defined(HARD_DEBUG) || 0
+ erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs);
+ erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco);
+ erts_fprintf(stderr, "erts_mmap: scrfsd = %i\n", init->scrfsd);
+#endif
+ erts_page_inv_mask = pagesize - 1;
+ if (pagesize & erts_page_inv_mask)
+ erts_exit(1, "erts_mmap: Invalid pagesize: %bpu\n",
+ pagesize);
+
+ ERTS_MMAP_OP_RINGBUF_INIT();
+
+ mm->supercarrier = 0;
+ mm->reserve_physical = reserve_noop;
+ mm->unreserve_physical = unreserve_noop;
+ mm->executable = executable;
+
+#if HAVE_MMAP && !defined(MAP_ANON)
+ mm->mmap_fd = open("/dev/zero", O_RDWR);
+ if (mm->mmap_fd < 0)
+ erts_exit(1, "erts_mmap: Failed to open /dev/zero\n");
+#endif
+
+ erts_smp_mtx_init(&mm->mtx, "erts_mmap");
+ if (is_first_call) {
+ erts_mtx_init(&am.init_mutex, "mmap_init_atoms");
+ }
+
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (init->virtual_range.start) {
+ char *ptr;
+ UWord sz;
+ ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
+ end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
+ sz = end - ptr;
+ start = os_mmap_virtual(ptr, sz, executable);
+ if (!start || start > ptr || start >= end)
+ erts_exit(1,
+ "erts_mmap: Failed to create virtual range for super carrier\n");
+ sz = start - ptr;
+ if (sz)
+ os_munmap(end, sz);
+ mm->reserve_physical = os_reserve_physical;
+ mm->unreserve_physical = os_unreserve_physical;
+ virtual_map = 1;
+ }
+ else
+#endif
+ if (init->predefined_area.start) {
+ start = init->predefined_area.start;
+ end = init->predefined_area.end;
+ if (end != (void *) 0 && end < start)
+ end = start;
+ }
+#if ERTS_HAVE_OS_MMAP
+ else if (init->scs) {
+ UWord sz;
+ sz = ERTS_PAGEALIGNED_CEILING(init->scs);
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (!init->scrpm) {
+ start = os_mmap_virtual(NULL, sz, executable);
+ mm->reserve_physical = os_reserve_physical;
+ mm->unreserve_physical = os_unreserve_physical;
+ virtual_map = 1;
+ }
+ else
+#endif
+ {
+ /*
+ * The whole supercarrier will by physically
+ * reserved all the time.
+ */
+ start = os_mmap(NULL, sz, 1, executable);
+ }
+ if (!start)
+ erts_exit(1,
+ "erts_mmap: Failed to create super carrier of size %bpu MB\n",
+ init->scs/1024/1024);
+ end = start + sz;
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ if (!virtual_map) {
+ Uint32 *uip;
+
+ for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++)
+ *uip = (Uint32) 0xdeadbeef;
+ }
+#endif
+ }
+#endif
+
+ mm->no.free_seg_descs = 0;
+ mm->no.free_segs.curr = 0;
+ mm->no.free_segs.max = 0;
+
+ mm->size.supercarrier.total = 0;
+ mm->size.supercarrier.used.total = 0;
+ mm->size.supercarrier.used.sa = 0;
+ mm->size.supercarrier.used.sua = 0;
+ mm->size.os.used = 0;
+
+ mm->desc.new_area_hint = NULL;
+
+ if (!start) {
+ mm->sa.bot = NULL;
+ mm->sua.top = NULL;
+ mm->sa.bot = NULL;
+ mm->sua.top = NULL;
+ mm->no_os_mmap = 0;
+ mm->supercarrier = 0;
+ }
+ else {
+ size_t desc_size;
+
+ mm->no_os_mmap = init->sco;
+
+ desc_size = init->scrfsd;
+ if (desc_size < 100)
+ desc_size = 100;
+ desc_size *= sizeof(ErtsFreeSegDesc);
+ if ((desc_size
+ + ERTS_SUPERALIGNED_SIZE
+ + ERTS_PAGEALIGNED_SIZE) > end - start)
+ erts_exit(1, "erts_mmap: No space for segments in super carrier\n");
+
+ mm->sa.bot = start;
+ mm->sa.bot += desc_size;
+ mm->sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mm->sa.bot);
+ mm->sa.top = mm->sa.bot;
+ mm->sua.top = end;
+ mm->sua.bot = mm->sua.top;
+
+ mm->size.supercarrier.used.total += (UWord) (mm->sa.bot - start);
+
+ mm->desc.free_list = NULL;
+ mm->desc.reserved = 0;
+
+ if (end == (void *) 0) {
+ /*
+ * Very unlikely, but we need a guarantee
+ * that `mm->sua.top` always will
+ * compare as larger than all segment pointers
+ * into the super carrier...
+ */
+ mm->sua.top -= ERTS_PAGEALIGNED_SIZE;
+ mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE, 0))
+#endif
+ add_free_desc_area(mm, mm->sua.top, end);
+ mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc);
+ }
+
+ mm->size.supercarrier.total = (UWord) (mm->sua.top - start);
+
+ /*
+ * Area before (and after) super carrier
+ * will be used for free segment descritors.
+ */
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start, 0))
+ erts_exit(1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
+#endif
+ mm->desc.unused_start = start;
+ mm->desc.unused_end = mm->sa.bot;
+ mm->desc.reserved += ((mm->desc.unused_end - start)
+ / sizeof(ErtsFreeSegDesc));
+
+ init_free_seg_map(&mm->sa.map, SA_SZ_ADDR_ORDER);
+ init_free_seg_map(&mm->sua.map, SZ_REVERSE_ADDR_ORDER);
+
+ mm->supercarrier = 1;
+
+ mm->desc.new_area_hint = end;
+
+ }
+
+#if !ERTS_HAVE_OS_MMAP
+ mm->no_os_mmap = 1;
+#endif
+
+#ifdef HARD_DEBUG_MSEG
+ hard_dbg_mseg_init();
+#endif
+
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ if (mm == &erts_literal_mmapper) {
+ erts_literals_start = erts_literal_mmapper.sa.bot;
+ erts_literals_size = erts_literal_mmapper.sua.top - erts_literals_start;
+ }
+#endif
+ is_first_call = 0;
+}
+
+
+static ERTS_INLINE void
+add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
+{
+ *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
+}
+
+Eterm erts_mmap_info(ErtsMemMapper* mm,
+ int *print_to_p,
+ void *print_to_arg,
+ Eterm** hpp, Uint* szp,
+ struct erts_mmap_info_struct* emis)
+{
+ Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua };
+ Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
+ Eterm group[2];
+ Eterm group_tags[] = { am.sizes, am.free_segs };
+ Eterm list[3];
+ Eterm list_tags[3]; /* { am.options, am.supercarrier, am.os } */
+ int lix = 0;
+ Eterm res = THE_NON_VALUE;
+
+ if (!hpp) {
+ erts_smp_mtx_lock(&mm->mtx);
+ emis->sizes[0] = mm->size.supercarrier.total;
+ emis->sizes[1] = mm->sa.top - mm->sa.bot;
+ emis->sizes[2] = mm->sua.top - mm->sua.bot;
+ emis->sizes[3] = mm->size.supercarrier.used.total;
+ emis->sizes[4] = mm->size.supercarrier.used.sa;
+ emis->sizes[5] = mm->size.supercarrier.used.sua;
+
+ emis->segs[0] = mm->no.free_segs.curr;
+ emis->segs[1] = mm->no.free_segs.max;
+ emis->segs[2] = mm->no.free_seg_descs;
+ emis->segs[3] = mm->desc.reserved;
+ emis->segs[4] = mm->sa.map.nseg;
+ emis->segs[5] = mm->sua.map.nseg;
+
+ emis->os_used = mm->size.os.used;
+ erts_smp_mtx_unlock(&mm->mtx);
+ }
+
+ list[lix] = erts_mmap_info_options(mm, "option ", print_to_p, print_to_arg,
+ hpp, szp);
+ list_tags[lix] = am.options;
+ lix++;
+
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+ if (mm->supercarrier) {
+ const char* prefix = "supercarrier ";
+ erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
+ erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
+ erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]);
+ erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]);
+ erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]);
+ erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]);
+ erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]);
+ erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]);
+ erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]);
+ erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]);
+ erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
+ erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
+ }
+ if (!mm->no_os_mmap) {
+ erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
+ }
+ }
+
+
+ if (hpp || szp) {
+ if (!am.is_initialized) {
+ init_atoms();
+ }
+
+ if (mm->supercarrier) {
+ group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ sizeof(size_tags)/sizeof(Eterm),
+ size_tags, emis->sizes);
+ group[1] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ sizeof(seg_tags)/sizeof(Eterm),
+ seg_tags, emis->segs);
+ list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group);
+ list_tags[lix] = am.supercarrier;
+ lix++;
+ }
+
+ if (!mm->no_os_mmap) {
+ group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ 1, &am.used, &emis->os_used);
+ list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
+ list_tags[lix] = am.os;
+ lix++;
+ }
+ res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list);
+ }
+ return res;
+}
+
+Eterm erts_mmap_info_options(ErtsMemMapper* mm,
+ char *prefix,
+ int *print_to_p,
+ void *print_to_arg,
+ Uint **hpp,
+ Uint *szp)
+{
+ const UWord scs = mm->sua.top - mm->sa.bot;
+ const Eterm sco = mm->no_os_mmap ? am_true : am_false;
+ const Eterm scrpm = (mm->reserve_physical == reserve_noop) ? am_true : am_false;
+ Eterm res = THE_NON_VALUE;
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+ erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
+ if (mm->supercarrier) {
+ erts_print(to, arg, "%ssco: %T\n", prefix, sco);
+ erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
+ erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mm->desc.reserved);
+ }
+ }
+
+ if (hpp || szp) {
+ if (!am.is_initialized) {
+ init_atoms();
+ }
+
+ res = NIL;
+ if (mm->supercarrier) {
+ add_2tup(hpp, szp, &res, am.scrfsd,
+ erts_bld_uint(hpp,szp, mm->desc.reserved));
+ add_2tup(hpp, szp, &res, am.scrpm, scrpm);
+ add_2tup(hpp, szp, &res, am.sco, sco);
+ }
+ add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs));
+ }
+ return res;
+}
+
+#endif /* HAVE_ERTS_MMAP */
+
+Eterm erts_mmap_debug_info(Process* p)
+{
+#if HAVE_ERTS_MMAP
+ ErtsMemMapper* mm = &erts_dflt_mmapper;
+
+ if (mm->supercarrier) {
+ ERTS_DECL_AM(sabot);
+ ERTS_DECL_AM(satop);
+ ERTS_DECL_AM(suabot);
+ ERTS_DECL_AM(suatop);
+ Eterm sa_list, sua_list, list;
+ Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop };
+ UWord values[4];
+ Eterm *hp, *hp_end;
+ Uint may_need;
+
+ erts_smp_mtx_lock(&mm->mtx);
+ values[0] = (UWord)mm->sa.bot;
+ values[1] = (UWord)mm->sa.top;
+ values[2] = (UWord)mm->sua.bot;
+ values[3] = (UWord)mm->sua.top;
+ sa_list = build_free_seg_list(p, &mm->sa.map);
+ sua_list = build_free_seg_list(p, &mm->sua.map);
+ erts_smp_mtx_unlock(&mm->mtx);
+
+ may_need = 4*(2+3+2) + 2*(2+3);
+ hp = HAlloc(p, may_need);
+ hp_end = hp + may_need;
+
+ list = erts_bld_atom_uword_2tup_list(&hp, NULL,
+ sizeof(values)/sizeof(*values),
+ tags, values);
+
+ sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3;
+ sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3;
+ list = CONS(hp, sua_list, list); hp+=2;
+ list = CONS(hp, sa_list, list); hp+=2;
+
+ ASSERT(hp <= hp_end);
+ HRelease(p, hp_end, hp);
+ return list;
+ }
+#endif
+ return am_undefined;
+}
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Debug functions *
+\* */
+
+
+#ifdef HARD_DEBUG
+
+static int rbt_assert_is_member(RBTNode* root, RBTNode* node)
+{
+ while (node != root) {
+ RBT_ASSERT(parent(node));
+ RBT_ASSERT(parent(node)->left == node || parent(node)->right == node);
+ node = parent(node);
+ }
+ return 1;
+}
+
+
+#if 0
+# define PRINT_TREE
+#else
+# undef PRINT_TREE
+#endif
+
+#ifdef PRINT_TREE
+static void print_tree(enum SortOrder order, RBTNode*);
+#endif
+
+/*
+ * Checks that the order between parent and children are correct,
+ * and that the Red-Black Tree properies are satisfied. if size > 0,
+ * check_tree() returns the node that satisfies "address order first fit"
+ *
+ * The Red-Black Tree properies are:
+ * 1. Every node is either red or black.
+ * 2. Every leaf (NIL) is black.
+ * 3. If a node is red, then both its children are black.
+ * 4. Every simple path from a node to a descendant leaf
+ * contains the same number of black nodes.
+ *
+ */
+
+struct check_arg_t {
+ RBTree* tree;
+ ErtsFreeSegDesc* prev_seg;
+ Uint size;
+ RBTNode *res;
+};
+static void check_node_callback(RBTNode* x, void* arg);
+
+
+static RBTNode *
+check_tree(RBTree* tree, Uint size)
+{
+ struct check_arg_t carg;
+ carg.tree = tree;
+ carg.prev_seg = NULL;
+ carg.size = size;
+ carg.res = NULL;
+
+#ifdef PRINT_TREE
+ print_tree(tree->order, tree->root);
+#endif
+
+ if (!tree->root)
+ return NULL;
+
+ RBT_ASSERT(IS_BLACK(tree->root));
+ RBT_ASSERT(!parent(tree->root));
+
+ rbt_foreach_node(tree, check_node_callback, &carg, 0);
+
+ return carg.res;
+}
+
+static void check_node_callback(RBTNode* x, void* arg)
+{
+ struct check_arg_t* a = (struct check_arg_t*) arg;
+ ErtsFreeSegDesc* seg;
+
+ if (IS_RED(x)) {
+ RBT_ASSERT(IS_BLACK(x->right));
+ RBT_ASSERT(IS_BLACK(x->left));
+ }
+
+ RBT_ASSERT(parent(x) || x == a->tree->root);
+
+ if (x->left) {
+ RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0);
+ }
+ if (x->right) {
+ RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0);
+ }
+
+ seg = node_to_desc(a->tree->order, x);
+ RBT_ASSERT(seg->start < seg->end);
+ if (a->size && (seg->end - seg->start) >= a->size) {
+ if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) {
+ a->res = x;
+ }
+ }
+ if (a->tree->order == ADDR_ORDER) {
+ RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start);
+ a->prev_seg = seg;
+ }
+}
+
+#endif /* HARD_DEBUG */
+
+
+#ifdef PRINT_TREE
+#define INDENT_STEP 2
+
+#include <stdio.h>
+
+static void
+print_tree_aux(enum SortOrder order, RBTNode *x, int indent)
+{
+ int i;
+
+ if (x) {
+ ErtsFreeSegDesc* desc = node_to_desc(order, x);
+ print_tree_aux(order, x->right, indent + INDENT_STEP);
+ for (i = 0; i < indent; i++) {
+ putc(' ', stderr);
+ }
+ fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n",
+ IS_BLACK(x) ? "BLACK" : "RED",
+ desc->end - desc->start, desc->start, desc->end, desc);
+ print_tree_aux(order, x->left, indent + INDENT_STEP);
+ }
+}
+
+
+static void
+print_tree(enum SortOrder order, RBTNode* root)
+{
+ fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]);
+ print_tree_aux(order, root, 0);
+ fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]);
+}
+
+#endif /* PRINT_TREE */
+
+
+#ifdef FREE_SEG_API_SMOKE_TEST
+
+void test_it(void)
+{
+ ErtsFreeSegMap map;
+ ErtsFreeSegDesc *desc, *under, *over, *d1, *d2;
+ const int i = 1; /* reverse addr order */
+
+ {
+ init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER);
+
+ insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ desc = lookup_free_seg(&map, 0x500);
+ ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L));
+
+ desc = lookup_free_seg(&map, 0x1500);
+ ERTS_ASSERT(desc->start == (char*)0x15000);
+
+ adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over);
+ ERTS_ASSERT(!under && !over);
+
+ adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over);
+ ERTS_ASSERT(!under && over->start == (char*)0x8000);
+
+ adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over);
+ ERTS_ASSERT(under->end == (char*)0x10000 && !over);
+
+ adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over);
+ ERTS_ASSERT(!under && !over);
+
+ adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over);
+ ERTS_ASSERT(!under && over && over->start == (char*)0x11000);
+
+ adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over);
+ ERTS_ASSERT(under && under->end == (char*)0x12000 && !over);
+
+ adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over);
+ ERTS_ASSERT(under && under->end == (char*)0x12000 &&
+ over && over->start == (char*)0x13000);
+
+ adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over);
+ ERTS_ASSERT(!under && over && over->start == (char*)0x13000);
+
+ d1 = lookup_free_seg(&map, 0x500);
+ ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L));
+
+ resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ d2 = lookup_free_seg(&map, 0x1200);
+ ERTS_ASSERT(d2 == d1);
+
+ delete_free_seg(&map, d1);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ d1 = lookup_free_seg(&map, 0x1200);
+ ERTS_ASSERT(d1->start == (char*)0x15000);
+ }
+}
+
+#endif /* FREE_SEG_API_SMOKE_TEST */
+
+
+#ifdef HARD_DEBUG_MSEG
+
+/*
+ * Debug stuff used by erl_mseg to check that it does the right thing.
+ * The reason for keeping it here is that we (ab)use the rb-tree code
+ * for keeping track of *allocated* segments.
+ */
+
+typedef struct ErtsFreeSegDesc_fake_ {
+ /*RBTNode snode; Save memory by skipping unused size tree node */
+ RBTNode anode; /* node in 'atree' */
+ union {
+ char* start;
+ struct ErtsFreeSegDesc_fake_* next_free;
+ }u;
+ char* end;
+}ErtsFreeSegDesc_fake;
+
+static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000];
+static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first;
+RBTree hard_dbg_mseg_tree;
+
+static erts_mtx_t hard_dbg_mseg_mtx;
+
+static void hard_dbg_mseg_init(void)
+{
+ ErtsFreeSegDesc_fake* p;
+
+ erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg");
+ hard_dbg_mseg_tree.root = NULL;
+ hard_dbg_mseg_tree.order = ADDR_ORDER;
+
+ p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) /
+ sizeof(*hard_dbg_mseg_desc_pool)) - 1];
+ p->u.next_free = NULL;
+ while (--p >= hard_dbg_mseg_desc_pool) {
+ p->u.next_free = (p+1);
+ }
+ hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0];
+}
+
+static ErtsFreeSegDesc* hard_dbg_alloc_desc(void)
+{
+ ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first;
+ ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors");
+ hard_dbg_mseg_desc_first = p->u.next_free;
+
+ /* Creative pointer arithmetic to return something that looks like
+ * a ErtsFreeSegDesc as long as we don't use the absent 'snode'.
+ */
+ return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode));
+}
+
+static void hard_dbg_free_desc(ErtsFreeSegDesc* desc)
+{
+ ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode;
+ memset(p, 0xfe, sizeof(*p));
+ p->u.next_free = hard_dbg_mseg_desc_first;
+ hard_dbg_mseg_desc_first = p;
+}
+
+static void check_seg_writable(void* seg, UWord sz)
+{
+ UWord* seg_end = (UWord*)((char*)seg + sz);
+ volatile UWord* p;
+ ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg));
+ ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz));
+ for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) {
+ UWord write_back = *p;
+ *p = 0xfade2b1acc;
+ *p = write_back;
+ }
+}
+
+void hard_dbg_insert_mseg(void* seg, UWord sz)
+{
+ check_seg_writable(seg, sz);
+ erts_mtx_lock(&hard_dbg_mseg_mtx);
+ {
+ ErtsFreeSegDesc *desc = hard_dbg_alloc_desc();
+ RBTNode *prev, *next;
+ desc->start = (char*)seg;
+ desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */
+ rbt_insert(&hard_dbg_mseg_tree, &desc->anode);
+ prev = rbt_prev_node(&desc->anode);
+ next = rbt_next_node(&desc->anode);
+ ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start);
+ ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end);
+ }
+ erts_mtx_unlock(&hard_dbg_mseg_mtx);
+}
+
+static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start)
+{
+ RBTNode* x = tree->root;
+
+ while (x) {
+ ErtsFreeSegDesc* desc = anode_to_desc(x);
+ if (start < desc->start) {
+ x = x->left;
+ }
+ else if (start > desc->start) {
+ ERTS_ASSERT(start > desc->end);
+ x = x->right;
+ }
+ else
+ return desc;
+ }
+ return NULL;
+}
+
+void hard_dbg_remove_mseg(void* seg, UWord sz)
+{
+ check_seg_writable(seg, sz);
+ erts_mtx_lock(&hard_dbg_mseg_mtx);
+ {
+ ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg);
+ ERTS_ASSERT(desc);
+ ERTS_ASSERT(desc->start == (char*)seg);
+ ERTS_ASSERT(desc->end == (char*)seg + sz - 1);
+
+ rbt_delete(&hard_dbg_mseg_tree, &desc->anode);
+ hard_dbg_free_desc(desc);
+ }
+ erts_mtx_unlock(&hard_dbg_mseg_mtx);
+}
+
+#endif /* HARD_DEBUG_MSEG */
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
new file mode 100644
index 0000000000..fa51b663fa
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -0,0 +1,181 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2013-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_MMAP_H__
+#define ERL_MMAP_H__
+
+#include "sys.h"
+
+#define ERTS_MMAP_SUPERALIGNED_BITS (18)
+/* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
+
+#ifndef HAVE_MMAP
+# define HAVE_MMAP 0
+#endif
+#ifndef HAVE_MREMAP
+# define HAVE_MREMAP 0
+#endif
+#if HAVE_MMAP
+# define ERTS_HAVE_OS_MMAP 1
+# define ERTS_HAVE_GENUINE_OS_MMAP 1
+# if HAVE_MREMAP
+# define ERTS_HAVE_OS_MREMAP 1
+# endif
+/*
+ * MAP_NORESERVE is undefined in FreeBSD 10.x and later.
+ * This is to enable 64bit HiPE experimentally on FreeBSD.
+ * Note that on FreeBSD MAP_NORESERVE was "never implemented"
+ * even before 11.x (and the flag does not exist in /usr/src/sys/vm/mmap.c
+ * of 10.3-STABLE r301478 either), and HiPE was working on OTP 18.3.3,
+ * so mandating MAP_NORESERVE on FreeBSD might not be needed.
+ * See the following message on how MAP_NORESERVE was treated on FreeBSD:
+ * <http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20150202/122958.html>
+ */
+# if defined(MAP_FIXED) && (defined(MAP_NORESERVE) || defined(__FreeBSD__))
+# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1
+# endif
+#endif
+
+#ifndef HAVE_VIRTUALALLOC
+# define HAVE_VIRTUALALLOC 0
+#endif
+#if HAVE_VIRTUALALLOC
+# define ERTS_HAVE_OS_MMAP 1
+#endif
+
+#ifdef ERTS_HAVE_GENUINE_OS_MMAP
+# define HAVE_ERTS_MMAP 1
+#else
+# define HAVE_ERTS_MMAP 0
+#endif
+
+
+extern UWord erts_page_inv_mask;
+
+typedef struct {
+ struct {
+ char *start;
+ char *end;
+ } virtual_range;
+ struct {
+ char *start;
+ char *end;
+ } predefined_area;
+ UWord scs; /* super carrier size */
+ int sco; /* super carrier only? */
+ UWord scrfsd; /* super carrier reserved free segment descriptors */
+ int scrpm; /* super carrier reserve physical memory */
+}ErtsMMapInit;
+
+#define ERTS_MMAP_INIT_DEFAULT_INITER \
+ {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1}
+
+#define ERTS_LITERAL_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(1)*1024*1024*1024)
+
+#define ERTS_MMAP_INIT_LITERAL_INITER \
+ {{NULL, NULL}, {NULL, NULL}, ERTS_LITERAL_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0}
+
+#define ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE (UWORD_CONSTANT(512)*1024*1024)
+
+#define ERTS_MMAP_INIT_HIPE_EXEC_INITER \
+ {{NULL, NULL}, {NULL, NULL}, ERTS_HIPE_EXEC_VIRTUAL_AREA_SIZE, 1, (1 << 10), 0}
+
+
+#define ERTS_SUPERALIGNED_SIZE \
+ (1 << ERTS_MMAP_SUPERALIGNED_BITS)
+#define ERTS_INV_SUPERALIGNED_MASK \
+ ((UWord) (ERTS_SUPERALIGNED_SIZE - 1))
+#define ERTS_SUPERALIGNED_MASK \
+ (~ERTS_INV_SUPERALIGNED_MASK)
+#define ERTS_SUPERALIGNED_FLOOR(X) \
+ (((UWord) (X)) & ERTS_SUPERALIGNED_MASK)
+#define ERTS_SUPERALIGNED_CEILING(X) \
+ ERTS_SUPERALIGNED_FLOOR((X) + ERTS_INV_SUPERALIGNED_MASK)
+#define ERTS_IS_SUPERALIGNED(X) \
+ (((UWord) (X) & ERTS_INV_SUPERALIGNED_MASK) == 0)
+
+#define ERTS_INV_PAGEALIGNED_MASK \
+ (erts_page_inv_mask)
+#define ERTS_PAGEALIGNED_MASK \
+ (~ERTS_INV_PAGEALIGNED_MASK)
+#define ERTS_PAGEALIGNED_FLOOR(X) \
+ (((UWord) (X)) & ERTS_PAGEALIGNED_MASK)
+#define ERTS_PAGEALIGNED_CEILING(X) \
+ ERTS_PAGEALIGNED_FLOOR((X) + ERTS_INV_PAGEALIGNED_MASK)
+#define ERTS_IS_PAGEALIGNED(X) \
+ (((UWord) (X) & ERTS_INV_PAGEALIGNED_MASK) == 0)
+#define ERTS_PAGEALIGNED_SIZE \
+ (ERTS_INV_PAGEALIGNED_MASK + 1)
+
+struct process;
+Eterm erts_mmap_debug_info(struct process*);
+
+#if HAVE_ERTS_MMAP
+
+typedef struct ErtsMemMapper_ ErtsMemMapper;
+
+#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0)
+#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1)
+#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2)
+
+void *erts_mmap(ErtsMemMapper*, Uint32 flags, UWord *sizep);
+void erts_munmap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord size);
+void *erts_mremap(ErtsMemMapper*, Uint32 flags, void *ptr, UWord old_size, UWord *sizep);
+int erts_mmap_in_supercarrier(ErtsMemMapper*, void *ptr);
+void erts_mmap_init(ErtsMemMapper*, ErtsMMapInit*, int executable);
+struct erts_mmap_info_struct
+{
+ UWord sizes[6];
+ UWord segs[6];
+ UWord os_used;
+};
+Eterm erts_mmap_info(ErtsMemMapper*, int *print_to_p, void *print_to_arg,
+ Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*);
+Eterm erts_mmap_info_options(ErtsMemMapper*,
+ char *prefix, int *print_to_p, void *print_to_arg,
+ Uint **hpp, Uint *szp);
+
+
+#ifdef ERTS_WANT_MEM_MAPPERS
+# include "erl_alloc_types.h"
+
+extern ErtsMemMapper erts_dflt_mmapper;
+# if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+extern ErtsMemMapper erts_literal_mmapper;
+# endif
+# ifdef ERTS_ALC_A_EXEC
+extern ErtsMemMapper erts_exec_mmapper;
+# endif
+#endif /* ERTS_WANT_MEM_MAPPERS */
+
+/*#define HARD_DEBUG_MSEG*/
+#ifdef HARD_DEBUG_MSEG
+# define HARD_DBG_INSERT_MSEG hard_dbg_insert_mseg
+# define HARD_DBG_REMOVE_MSEG hard_dbg_remove_mseg
+void hard_dbg_insert_mseg(void* seg, UWord sz);
+void hard_dbg_remove_mseg(void* seg, UWord sz);
+#else
+# define HARD_DBG_INSERT_MSEG(SEG,SZ)
+# define HARD_DBG_REMOVE_MSEG(SEG,SZ)
+#endif
+
+#endif /* HAVE_ERTS_MMAP */
+
+#endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c
index 2748edba02..f3306a888c 100644
--- a/erts/emulator/sys/common/erl_mseg.c
+++ b/erts/emulator/sys/common/erl_mseg.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -30,6 +31,7 @@
# include "config.h"
#endif
+#define ERTS_WANT_MEM_MAPPERS
#include "sys.h"
#include "erl_mseg.h"
#include "global.h"
@@ -98,56 +100,12 @@ static const int debruijn[32] = {
static int atoms_initialized;
-typedef struct mem_kind_t MemKind;
-
-#if HALFWORD_HEAP
-static int initialize_pmmap(void);
-static void *pmmap(size_t size);
-static int pmunmap(void *p, size_t size);
-static void *pmremap(void *old_address, size_t old_size,
- size_t new_size);
-#endif
-
-#if HAVE_MMAP
-/* Mmap ... */
-
-#define MMAP_PROT (PROT_READ|PROT_WRITE)
-
-
-#ifdef MAP_ANON
-# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
-# define MMAP_FD (-1)
-#else
-# define MMAP_FLAGS (MAP_PRIVATE)
-# define MMAP_FD mmap_fd
-static int mmap_fd;
-#endif
-
-#if HAVE_MREMAP
-# define HAVE_MSEG_RECREATE 1
-#else
-# define HAVE_MSEG_RECREATE 0
-#endif
-
-#if HALFWORD_HEAP
-#define CAN_PARTLY_DESTROY 0
-#else
-#define CAN_PARTLY_DESTROY 1
-#endif
-#else /* #if HAVE_MMAP */
-#define CAN_PARTLY_DESTROY 0
-#error "Not supported"
-#endif /* #if HAVE_MMAP */
-
const ErtsMsegOpt_t erts_mseg_default_opt = {
1, /* Use cache */
1, /* Preserv data */
0, /* Absolute shrink threshold */
0, /* Relative shrink threshold */
0 /* Scheduler specific */
-#if HALFWORD_HEAP
- ,0 /* need low memory */
-#endif
};
@@ -163,9 +121,7 @@ typedef struct {
CallCounter create;
CallCounter create_resize;
CallCounter destroy;
-#if HAVE_MSEG_RECREATE
CallCounter recreate;
-#endif
CallCounter clear_cache;
CallCounter check_cache;
} ErtsMsegCalls;
@@ -173,7 +129,7 @@ typedef struct {
typedef struct cache_t_ cache_t;
struct cache_t_ {
- Uint size;
+ UWord size;
void *seg;
cache_t *next;
cache_t *prev;
@@ -182,7 +138,14 @@ struct cache_t_ {
typedef struct ErtsMsegAllctr_t_ ErtsMsegAllctr_t;
-struct mem_kind_t {
+struct ErtsMsegAllctr_t_ {
+ int ix;
+
+ int is_init_done;
+ int is_thread_safe;
+ erts_mtx_t mtx;
+
+ int is_cache_check_scheduled;
cache_t cache[MAX_CACHE_SIZE];
cache_t cache_unpowered_node;
@@ -208,39 +171,11 @@ struct mem_kind_t {
} max_ever;
} segments;
- ErtsMsegAllctr_t *ma;
- const char* name;
- MemKind* next;
-};/*MemKind*/
-
-struct ErtsMsegAllctr_t_ {
- int ix;
-
- int is_init_done;
- int is_thread_safe;
- erts_mtx_t mtx;
-
- int is_cache_check_scheduled;
-
- MemKind* mk_list;
-
-#if HALFWORD_HEAP
- MemKind low_mem;
- MemKind hi_mem;
-#else
- MemKind the_mem;
-#endif
-
Uint max_cache_size;
Uint abs_max_cache_bad_fit;
Uint rel_max_cache_bad_fit;
ErtsMsegCalls calls;
-
-#if CAN_PARTLY_DESTROY
- Uint min_seg_size;
-#endif
-
};
typedef union {
@@ -344,69 +279,26 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) {
}
}
-/* remove ErtsMsegAllctr_t from arguments?
- * only used for statistics
- */
-static ERTS_INLINE void *
-mmap_align(ErtsMsegAllctr_t *ma, void *addr, size_t length, int prot, int flags, int fd, off_t offset) {
-
- char *p, *q;
- UWord d;
-
- p = mmap(addr, length, prot, flags, fd, offset);
-
- if (MAP_IS_ALIGNED(p) || p == MAP_FAILED)
- return p;
-
- if (ma)
- INC_CC(ma, create_resize);
-
- munmap(p, length);
-
- if ((p = mmap(addr, length + MSEG_ALIGNED_SIZE, prot, flags, fd, offset)) == MAP_FAILED)
- return MAP_FAILED;
-
- q = (void *)ALIGNED_CEILING((char *)p);
- d = (UWord)(q - p);
-
- if (d > 0)
- munmap(p, d);
-
- if (MSEG_ALIGNED_SIZE - d > 0)
- munmap((void *)(q + length), MSEG_ALIGNED_SIZE - d);
-
- return q;
-}
+/* #define ERTS_PRINT_ERTS_MMAP */
static ERTS_INLINE void *
-mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size)
+mseg_create(ErtsMsegAllctr_t *ma, Uint flags, UWord *sizep)
{
- void *seg;
- ASSERT(size % MSEG_ALIGNED_SIZE == 0);
-
-#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- seg = pmmap(size);
- if ((unsigned long) seg & CHECK_POINTER_MASK) {
- erts_fprintf(stderr,"Pointer mask failure (0x%08lx)\n",(unsigned long) seg);
- return NULL;
- }
- } else
+#ifdef ERTS_PRINT_ERTS_MMAP
+ UWord req_size = *sizep;
#endif
- {
-#if HAVE_MMAP
- {
- seg = (void *) mmap_align(ma, (void *) 0, (size_t) size,
- MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0);
- if (seg == (void *) MAP_FAILED)
- seg = NULL;
-
- ASSERT(MAP_IS_ALIGNED(seg) || !seg);
- }
-#else
-# error "Missing mseg_create() implementation"
+ void *seg;
+ Uint32 mmap_flags = 0;
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
+
+ seg = erts_mmap(&erts_dflt_mmapper, mmap_flags, sizep);
+
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg,
+ (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ req_size, *sizep);
#endif
- }
INC_CC(ma, create);
@@ -414,91 +306,45 @@ mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size)
}
static ERTS_INLINE void
-mseg_destroy(ErtsMsegAllctr_t *ma, MemKind* mk, void *seg, Uint size) {
- ERTS_DECLARE_DUMMY(int res);
-
-#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- res = pmunmap((void *) seg, size);
- }
- else
-#endif
- {
-#ifdef HAVE_MMAP
- res = munmap((void *) seg, size);
-#else
-# error "Missing mseg_destroy() implementation"
+mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, void *seg_p, UWord size) {
+
+ Uint32 mmap_flags = 0;
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
+
+ erts_munmap(&erts_dflt_mmapper, mmap_flags, seg_p, size);
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n",
+ (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ seg_p, *size);
#endif
- }
-
- ASSERT(size % MSEG_ALIGNED_SIZE == 0);
- ASSERT(res == 0);
-
INC_CC(ma, destroy);
}
-#if HAVE_MSEG_RECREATE
-#if defined(__NetBSD__)
-#define MREMAP_FLAGS (0)
-#else
-#define MREMAP_FLAGS (MREMAP_MAYMOVE)
-#endif
-
-
-/* mseg_recreate
- * May return *unaligned* segments as in address not aligned to MSEG_ALIGNMENT
- * it is still page aligned
- *
- * This is fine for single block carriers as long as we don't cache misaligned
- * segments (since multiblock carriers may use them)
- *
- * For multiblock carriers we *need* MSEG_ALIGNMENT but mbc's will never be
- * reallocated.
- *
- * This should probably be fixed the following way:
- * 1) Use an option to segment allocation - NEED_ALIGNMENT
- * 2) Add mremap_align which takes care of aligning a new a mremaped area
- * 3) Fix the cache to handle of aligned and unaligned segments
- */
-
static ERTS_INLINE void *
-mseg_recreate(ErtsMsegAllctr_t *ma, MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
+mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, void *old_seg, UWord old_size, UWord *sizep)
{
+#ifdef ERTS_PRINT_ERTS_MMAP
+ UWord req_size = *sizep;
+#endif
void *new_seg;
+ Uint32 mmap_flags = 0;
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
- ASSERT(old_size % MSEG_ALIGNED_SIZE == 0);
- ASSERT(new_size % MSEG_ALIGNED_SIZE == 0);
+ new_seg = erts_mremap(&erts_dflt_mmapper, mmap_flags, old_seg, old_size, sizep);
-#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- new_seg = (void *) pmremap((void *) old_seg,
- (size_t) old_size,
- (size_t) new_size);
- }
- else
-#endif
- {
-#if HAVE_MREMAP
-#if defined(__NetBSD__)
- new_seg = mremap(old_seg, (size_t)old_size, NULL, new_size, MREMAP_FLAGS);
-#else
- new_seg = mremap(old_seg, (size_t)old_size, (size_t)new_size, MREMAP_FLAGS);
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n",
+ new_seg, (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ old_seg, old_size, req_size, *sizep);
#endif
- if (new_seg == (void *) MAP_FAILED)
- new_seg = NULL;
-#else
-#error "Missing mseg_recreate() implementation"
-#endif
- }
-
INC_CC(ma, recreate);
return new_seg;
}
-#endif /* #if HAVE_MSEG_RECREATE */
-
#ifdef DEBUG
#define ERTS_DBG_MA_CHK_THR_ACCESS(MA) \
do { \
@@ -511,11 +357,8 @@ do { \
|| erts_smp_thr_progress_is_blocking() \
|| ERTS_IS_CRASH_DUMPING); \
} while (0)
-#define ERTS_DBG_MK_CHK_THR_ACCESS(MK) \
- ERTS_DBG_MA_CHK_THR_ACCESS((MK)->ma)
#else
#define ERTS_DBG_MA_CHK_THR_ACCESS(MA)
-#define ERTS_DBG_MK_CHK_THR_ACCESS(MK)
#endif
/* Cache interface */
@@ -528,10 +371,10 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) {
c->prev = c;
}
-static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Uint flags) {
+static ERTS_INLINE int cache_bless_segment(ErtsMsegAllctr_t *ma, void *seg, UWord size, Uint flags) {
cache_t *c;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
ASSERT(!MSEG_FLG_IS_2POW(flags) || (MSEG_FLG_IS_2POW(flags) && MAP_IS_ALIGNED(seg) && IS_2POW(size)));
@@ -540,11 +383,11 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
* Large blocks has no such cache and it is up to mseg to cache them to speed things up.
*/
- if (!erts_circleq_is_empty(&(mk->cache_free))) {
+ if (!erts_circleq_is_empty(&(ma->cache_free))) {
/* We have free slots, use one to cache the segment */
- c = erts_circleq_head(&(mk->cache_free));
+ c = erts_circleq_head(&(ma->cache_free));
erts_circleq_remove(c);
c->seg = seg;
@@ -556,30 +399,28 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
ASSERT(ix < CACHE_AREAS);
ASSERT((1 << (ix + MSEG_ALIGN_BITS)) == size);
- erts_circleq_push_head(&(mk->cache_powered_node[ix]), c);
+ erts_circleq_push_head(&(ma->cache_powered_node[ix]), c);
} else
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
- mk->cache_size++;
- ASSERT(mk->cache_size <= mk->ma->max_cache_size);
+ ma->cache_size++;
return 1;
- } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) {
-
+ } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(ma->cache_unpowered_node))) {
/* No free slots.
* Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */
- c = erts_circleq_tail(&(mk->cache_unpowered_node));
+ c = erts_circleq_tail(&(ma->cache_unpowered_node));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(ma, ERTS_MSEG_FLG_NONE, c->seg, c->size);
mseg_cache_clear_node(c);
c->seg = seg;
c->size = size;
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
return 1;
} else if (!MSEG_FLG_IS_2POW(flags)) {
@@ -593,19 +434,20 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
int i;
for( i = 0; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- c = erts_circleq_tail(&(mk->cache_powered_node[i]));
+ c = erts_circleq_tail(&(ma->cache_powered_node[i]));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(ma, ERTS_MSEG_FLG_2POW, c->seg, c->size);
+
mseg_cache_clear_node(c);
c->seg = seg;
c->size = size;
- erts_circleq_push_head(&(mk->cache_unpowered_node), c);
+ erts_circleq_push_head(&(ma->cache_unpowered_node), c);
return 1;
}
@@ -614,61 +456,60 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
return 0;
}
-static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags) {
+static ERTS_INLINE void *cache_get_segment(ErtsMsegAllctr_t *ma, UWord *size_p, Uint flags) {
- Uint size = *size_p;
+ UWord size = *size_p;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
if (MSEG_FLG_IS_2POW(flags)) {
int i, ix = SIZE_TO_CACHE_AREA_IDX(size);
- void *seg;
+ char *seg;
cache_t *c;
- Uint csize;
+ UWord csize;
ASSERT(IS_2POW(size));
for( i = ix; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- c = erts_circleq_head(&(mk->cache_powered_node[i]));
+ c = erts_circleq_head(&(ma->cache_powered_node[i]));
erts_circleq_remove(c);
ASSERT(IS_2POW(c->size));
ASSERT(MAP_IS_ALIGNED(c->seg));
csize = c->size;
- seg = c->seg;
+ seg = (char*) c->seg;
- mk->cache_size--;
- mk->cache_hits++;
+ ma->cache_size--;
+ ma->cache_hits++;
/* link to free cache list */
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
- ASSERT(!(mk->cache_size < 0));
+ ASSERT(!(ma->cache_size < 0));
- if (csize != size) {
- mseg_destroy(mk->ma, mk, (char *)seg + size, csize - size);
- }
+ if (csize != size)
+ mseg_destroy(ma, ERTS_MSEG_FLG_2POW, seg + size, csize - size);
return seg;
}
}
- else if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) {
+ else if (!erts_circleq_is_empty(&(ma->cache_unpowered_node))) {
void *seg;
cache_t *c;
cache_t *best = NULL;
- Uint bdiff = 0;
- Uint csize;
- Uint bad_max_abs = mk->ma->abs_max_cache_bad_fit;
- Uint bad_max_rel = mk->ma->rel_max_cache_bad_fit;
+ UWord bdiff = 0;
+ UWord csize;
+ UWord bad_max_abs = ma->abs_max_cache_bad_fit;
+ UWord bad_max_rel = ma->rel_max_cache_bad_fit;
- erts_circleq_foreach(c, &(mk->cache_unpowered_node)) {
+ erts_circleq_foreach(c, &(ma->cache_unpowered_node)) {
csize = c->size;
if (csize >= size) {
if (((csize - size)*100 < bad_max_rel*size) && (csize - size) < bad_max_abs ) {
@@ -677,11 +518,11 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
erts_circleq_remove(c);
- mk->cache_size--;
- mk->cache_hits++;
+ ma->cache_size--;
+ ma->cache_hits++;
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
*size_p = csize;
@@ -704,7 +545,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
ASSERT(best->seg);
ASSERT(best->size > 0);
- mk->cache_hits++;
+ ma->cache_hits++;
/* Use current cache placement for remaining segment space */
@@ -728,7 +569,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
* using callbacks from aux-work in the scheduler.
*/
-static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_one_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) {
cache_t *c = NULL;
c = erts_circleq_tail(head);
@@ -737,19 +578,19 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(ma, flags, c->seg, c->size);
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
+ erts_circleq_push_head(&(ma->cache_free), c);
- mk->segments.current.watermark--;
- mk->cache_size--;
+ ma->segments.current.watermark--;
+ ma->cache_size--;
- ASSERT( mk->cache_size >= 0 );
+ ASSERT(ma->cache_size >= 0);
- return mk->cache_size;
+ return ma->cache_size;
}
-static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_cache_size(ErtsMsegAllctr_t *ma, Uint flags, cache_t *head) {
cache_t *c = NULL;
while (!erts_circleq_is_empty(head)) {
@@ -760,58 +601,52 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head)
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(ma, flags, c->seg, c->size);
mseg_cache_clear_node(c);
- erts_circleq_push_head(&(mk->cache_free), c);
-
- mk->segments.current.watermark--;
- mk->cache_size--;
+ erts_circleq_push_head(&(ma->cache_free), c);
+ ma->segments.current.watermark--;
+ ma->cache_size--;
}
- ASSERT( mk->cache_size >= 0 );
+ ASSERT(ma->cache_size >= 0);
- return mk->cache_size;
+ return ma->cache_size;
}
-/* mseg_check_memkind_cache
- * - Check if we can empty some cached segments in this
- * MemKind.
+/* mseg_check_cache
+ * - Check if we can empty some cached segments in this allocator
*/
-static Uint mseg_check_memkind_cache(MemKind *mk) {
+static Uint mseg_check_cache(ErtsMsegAllctr_t *ma) {
int i;
- ERTS_DBG_MK_CHK_THR_ACCESS(mk);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
for (i = 0; i < CACHE_AREAS; i++) {
- if (!erts_circleq_is_empty(&(mk->cache_powered_node[i])))
- return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_powered_node[i]));
+ if (!erts_circleq_is_empty(&(ma->cache_powered_node[i])))
+ return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i]));
}
- if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_unpowered_node));
+ if (!erts_circleq_is_empty(&(ma->cache_unpowered_node)))
+ return mseg_drop_one_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node));
return 0;
}
/* mseg_cache_check
* - Check if we have some cache we can purge
- * in any of the memkinds.
*/
static void mseg_cache_check(ErtsMsegAllctr_t *ma) {
- MemKind* mk;
Uint empty_cache = 1;
ERTS_MSEG_LOCK(ma);
- for (mk = ma->mk_list; mk; mk = mk->next) {
- if (mseg_check_memkind_cache(mk))
- empty_cache = 0;
- }
+ if (mseg_check_cache(ma))
+ empty_cache = 0;
/* If all MemKinds caches are empty,
* remove aux-work callback
@@ -829,7 +664,7 @@ static void mseg_cache_check(ErtsMsegAllctr_t *ma) {
/* erts_mseg_cache_check
* - This is a callback that is scheduled as aux-work from
* schedulers and is called at some interval if we have a cache
- * on this mseg-allocator and memkind.
+ * on this mseg-allocator.
* - Purpose: Empty cache slowly so we don't collect mapped areas
* and bloat memory.
*/
@@ -839,42 +674,32 @@ void erts_mseg_cache_check(void) {
}
-/* *_mseg_clear_*_cache
+/* mseg_clear_cache
* Remove cached segments from the allocator completely
*/
-static void mseg_clear_memkind_cache(MemKind *mk) {
+
+static void mseg_clear_cache(ErtsMsegAllctr_t *ma) {
int i;
+ ERTS_MSEG_LOCK(ma);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
/* drop pow2 caches */
for (i = 0; i < CACHE_AREAS; i++) {
- if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
+ if (erts_circleq_is_empty(&(ma->cache_powered_node[i])))
continue;
- mseg_drop_memkind_cache_size(mk, &(mk->cache_powered_node[i]));
- ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i])));
+ mseg_drop_cache_size(ma, ERTS_MSEG_FLG_2POW, &(ma->cache_powered_node[i]));
+ ASSERT(erts_circleq_is_empty(&(ma->cache_powered_node[i])));
}
/* drop varied caches */
- if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- mseg_drop_memkind_cache_size(mk, &(mk->cache_unpowered_node));
+ if (!erts_circleq_is_empty(&(ma->cache_unpowered_node)))
+ mseg_drop_cache_size(ma, ERTS_MSEG_FLG_NONE, &(ma->cache_unpowered_node));
- ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node)));
- ASSERT(mk->cache_size == 0);
-}
-
-static void mseg_clear_cache(ErtsMsegAllctr_t *ma) {
- MemKind* mk;
-
- ERTS_MSEG_LOCK(ma);
- ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-
-
- for (mk = ma->mk_list; mk; mk = mk->next) {
- mseg_clear_memkind_cache(mk);
- }
+ ASSERT(erts_circleq_is_empty(&(ma->cache_unpowered_node)));
+ ASSERT(ma->cache_size == 0);
INC_CC(ma, clear_cache);
-
ERTS_MSEG_UNLOCK(ma);
}
@@ -883,53 +708,39 @@ void erts_mseg_clear_cache(void) {
mseg_clear_cache(ERTS_MSEG_ALLCTR_IX(0));
}
-
-
-static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma,
- const ErtsMsegOpt_t *opt)
-{
-#if HALFWORD_HEAP
- return opt->low_mem ? &ma->low_mem : &ma->hi_mem;
-#else
- return &ma->the_mem;
-#endif
-}
-
static void *
-mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, Uint *size_p,
+mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p,
Uint flags, const ErtsMsegOpt_t *opt)
{
- Uint size;
+ UWord size;
void *seg;
- MemKind* mk = memkind(ma, opt);
INC_CC(ma, alloc);
- /* Carrier align */
- size = ALIGNED_CEILING(*size_p);
-
- /* Cache optim (if applicable) */
- if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(size))
- size = ceil_2pow(size);
+ if (!MSEG_FLG_IS_2POW(flags))
+ size = ERTS_PAGEALIGNED_CEILING(*size_p);
+ else {
+ size = ALIGNED_CEILING(*size_p);
+ if (!IS_2POW(size)) {
+ /* Cache optim (if applicable) */
+ size = ceil_2pow(size);
+ }
+ }
-#if CAN_PARTLY_DESTROY
- if (size < ma->min_seg_size)
- ma->min_seg_size = size;
-#endif
-
- if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL)
+ if (opt->cache && ma->cache_size > 0 && (seg = cache_get_segment(ma, &size, flags)) != NULL)
goto done;
- if ((seg = mseg_create(ma, mk, size)) == NULL)
- size = 0;
+ seg = mseg_create(ma, flags, &size);
+ if (!seg)
+ *size_p = 0;
+ else {
done:
- *size_p = size;
- if (seg) {
+ *size_p = size;
if (erts_mtrace_enabled)
erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size);
- ERTS_MSEG_ALLOC_STAT(mk,size);
+ ERTS_MSEG_ALLOC_STAT(ma,size);
}
return seg;
@@ -937,14 +748,12 @@ done:
static void
-mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size,
+mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size,
Uint flags, const ErtsMsegOpt_t *opt)
{
- MemKind* mk = memkind(ma, opt);
-
- ERTS_MSEG_DEALLOC_STAT(mk,size);
+ ERTS_MSEG_DEALLOC_STAT(ma,size);
- if (opt->cache && cache_bless_segment(mk, seg, size, flags)) {
+ if (opt->cache && cache_bless_segment(ma, seg, size, flags)) {
schedule_cache_check(ma);
goto done;
}
@@ -952,7 +761,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size,
if (erts_mtrace_enabled)
erts_mtrace_crr_free(atype, SEGTYPE, seg);
- mseg_destroy(ma, mk, seg, size);
+ mseg_destroy(ma, flags, seg, size);
done:
@@ -961,11 +770,10 @@ done:
static void *
mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p, Uint flags, const ErtsMsegOpt_t *opt)
+ UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt)
{
- MemKind* mk;
void *new_seg;
- Uint new_size;
+ UWord new_size;
/* Just allocate a new segment if we didn't have one before */
if (!seg || !old_size) {
@@ -982,99 +790,55 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
return NULL;
}
- mk = memkind(ma, opt);
new_seg = seg;
- /* Carrier align */
- new_size = ALIGNED_CEILING(*new_size_p);
-
- /* Cache optim (if applicable) */
- if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(new_size))
- new_size = ceil_2pow(new_size);
-
- if (new_size == old_size)
- ;
- else if (new_size < old_size) {
- Uint shrink_sz = old_size - new_size;
+ if (!MSEG_FLG_IS_2POW(flags))
+ new_size = ERTS_PAGEALIGNED_CEILING(*new_size_p);
+ else {
+ new_size = ALIGNED_CEILING(*new_size_p);
+ if (!IS_2POW(new_size)) {
+ /* Cache optim (if applicable) */
+ new_size = ceil_2pow(new_size);
+ }
+ }
-#if CAN_PARTLY_DESTROY
- if (new_size < ma->min_seg_size)
- ma->min_seg_size = new_size;
-#endif
- /* +M<S>rsbcst <ratio> */
- if (shrink_sz < opt->abs_shrink_th
- && 100*shrink_sz < opt->rel_shrink_th*old_size) {
- new_size = old_size;
+ if (new_size > old_size) {
+ if (opt->preserv) {
+ new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size);
+ if (!new_seg)
+ new_size = old_size;
}
else {
-
-#if CAN_PARTLY_DESTROY
-
- if (erts_mtrace_enabled)
- erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
-
- mseg_destroy(ma, mk, ((char *) seg) + new_size, shrink_sz);
-
-#elif HAVE_MSEG_RECREATE
- goto do_recreate;
-#else
+ mseg_dealloc(ma, atype, seg, old_size, flags, opt);
new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
-
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
-
if (!new_seg)
- new_size = old_size;
- else {
- sys_memcpy(((char *) new_seg),
- ((char *) seg),
- MIN(new_size, old_size));
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- }
-#endif
+ new_size = 0;
}
}
- else {
+ else if (new_size < old_size) {
+ UWord shrink_sz = old_size - new_size;
- if (!opt->preserv) {
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
+ /* +M<S>rsbcst <ratio> */
+ if (shrink_sz < opt->abs_shrink_th
+ && 100*shrink_sz < opt->rel_shrink_th*old_size) {
+ new_size = old_size;
}
else {
-#if HAVE_MSEG_RECREATE
-#if !CAN_PARTLY_DESTROY
- do_recreate:
-#endif
- new_seg = mseg_recreate(ma, mk, (void *) seg, old_size, new_size);
- /* ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
- * will not always be aligned and it ok for now
- */
-
- if (erts_mtrace_enabled)
- erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
- if (!new_seg)
- new_size = old_size;
-#else
- new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
-
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
-
+ new_seg = mseg_recreate(ma, flags, (void *) seg, old_size, &new_size);
if (!new_seg)
new_size = old_size;
- else {
- sys_memcpy(((char *) new_seg), ((char *) seg), MIN(new_size, old_size));
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- }
-#endif
}
}
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
+
INC_CC(ma, realloc);
ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size));
*new_size_p = new_size;
- ERTS_MSEG_REALLOC_STAT(mk, old_size, new_size);
+ ERTS_MSEG_REALLOC_STAT(ma, old_size, new_size);
return new_seg;
}
@@ -1106,9 +870,7 @@ static struct {
Eterm mseg_create;
Eterm mseg_create_resize;
Eterm mseg_destroy;
-#if HAVE_MSEG_RECREATE
Eterm mseg_recreate;
-#endif
Eterm mseg_clear_cache;
Eterm mseg_check_cache;
@@ -1129,8 +891,6 @@ init_atoms(ErtsMsegAllctr_t *ma)
#ifdef DEBUG
Eterm *atom;
#endif
-
- ERTS_MSEG_UNLOCK(ma);
erts_mtx_lock(&init_atoms_mutex);
if (!atoms_initialized) {
@@ -1163,9 +923,7 @@ init_atoms(ErtsMsegAllctr_t *ma)
AM_INIT(mseg_create);
AM_INIT(mseg_create_resize);
AM_INIT(mseg_destroy);
-#if HAVE_MSEG_RECREATE
AM_INIT(mseg_recreate);
-#endif
AM_INIT(mseg_clear_cache);
AM_INIT(mseg_check_cache);
@@ -1176,7 +934,6 @@ init_atoms(ErtsMsegAllctr_t *ma)
#endif
}
- ERTS_MSEG_LOCK(ma);
atoms_initialized = 1;
erts_mtx_unlock(&init_atoms_mutex);
}
@@ -1239,7 +996,7 @@ info_options(ErtsMsegAllctr_t *ma,
Uint **hpp,
Uint *szp)
{
- Eterm res = THE_NON_VALUE;
+ Eterm res = NIL;
if (print_to_p) {
int to = *print_to_p;
@@ -1254,7 +1011,6 @@ info_options(ErtsMsegAllctr_t *ma,
if (!atoms_initialized)
init_atoms(ma);
- res = NIL;
add_2tup(hpp, szp, &res,
am.mcs,
bld_uint(hpp, szp, ma->max_cache_size));
@@ -1293,9 +1049,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
PRINT_CC(to, arg, create);
PRINT_CC(to, arg, create_resize);
PRINT_CC(to, arg, destroy);
-#if HAVE_MSEG_RECREATE
PRINT_CC(to, arg, recreate);
-#endif
PRINT_CC(to, arg, clear_cache);
PRINT_CC(to, arg, check_cache);
@@ -1316,12 +1070,10 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
bld_unstable_uint(hpp, szp, ma->calls.clear_cache.giga_no),
bld_unstable_uint(hpp, szp, ma->calls.clear_cache.no));
-#if HAVE_MSEG_RECREATE
add_3tup(hpp, szp, &res,
am.mseg_recreate,
bld_unstable_uint(hpp, szp, ma->calls.recreate.giga_no),
bld_unstable_uint(hpp, szp, ma->calls.recreate.no));
-#endif
add_3tup(hpp, szp, &res,
am.mseg_destroy,
bld_unstable_uint(hpp, szp, ma->calls.destroy.giga_no),
@@ -1354,88 +1106,95 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
}
static Eterm
-info_status(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
- int begin_new_max_period, Uint **hpp, Uint *szp)
+info_status(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg,
+ int begin_new_max_period, int only_sz, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
- if (mk->segments.max_ever.no < mk->segments.max.no)
- mk->segments.max_ever.no = mk->segments.max.no;
- if (mk->segments.max_ever.sz < mk->segments.max.sz)
- mk->segments.max_ever.sz = mk->segments.max.sz;
+ if (ma->segments.max_ever.no < ma->segments.max.no)
+ ma->segments.max_ever.no = ma->segments.max.no;
+ if (ma->segments.max_ever.sz < ma->segments.max.sz)
+ ma->segments.max_ever.sz = ma->segments.max.sz;
if (print_to_p) {
int to = *print_to_p;
void *arg = print_to_arg;
- erts_print(to, arg, "cached_segments: %beu\n", mk->cache_size);
- erts_print(to, arg, "cache_hits: %beu\n", mk->cache_hits);
- erts_print(to, arg, "segments: %beu %beu %beu\n",
- mk->segments.current.no, mk->segments.max.no, mk->segments.max_ever.no);
- erts_print(to, arg, "segments_size: %beu %beu %beu\n",
- mk->segments.current.sz, mk->segments.max.sz, mk->segments.max_ever.sz);
- erts_print(to, arg, "segments_watermark: %beu\n",
- mk->segments.current.watermark);
+ if (!only_sz) {
+ erts_print(to, arg, "cached_segments: %beu\n", ma->cache_size);
+ erts_print(to, arg, "cache_hits: %beu\n", ma->cache_hits);
+ erts_print(to, arg, "segments: %beu %beu %beu\n",
+ ma->segments.current.no, ma->segments.max.no, ma->segments.max_ever.no);
+ erts_print(to, arg, "segments_watermark: %beu\n",
+ ma->segments.current.watermark);
+ }
+ erts_print(to, arg, "segments_size: %beu %beu %beu\n",
+ ma->segments.current.sz, ma->segments.max.sz, ma->segments.max_ever.sz);
}
if (hpp || szp) {
res = NIL;
- add_2tup(hpp, szp, &res,
- am.segments_watermark,
- bld_unstable_uint(hpp, szp, mk->segments.current.watermark));
- add_4tup(hpp, szp, &res,
- am.segments_size,
- bld_unstable_uint(hpp, szp, mk->segments.current.sz),
- bld_unstable_uint(hpp, szp, mk->segments.max.sz),
- bld_unstable_uint(hpp, szp, mk->segments.max_ever.sz));
- add_4tup(hpp, szp, &res,
- am.segments,
- bld_unstable_uint(hpp, szp, mk->segments.current.no),
- bld_unstable_uint(hpp, szp, mk->segments.max.no),
- bld_unstable_uint(hpp, szp, mk->segments.max_ever.no));
- add_2tup(hpp, szp, &res,
- am.cache_hits,
- bld_unstable_uint(hpp, szp, mk->cache_hits));
- add_2tup(hpp, szp, &res,
- am.cached_segments,
- bld_unstable_uint(hpp, szp, mk->cache_size));
-
+ add_4tup(hpp, szp, &res,
+ am.segments_size,
+ bld_unstable_uint(hpp, szp, ma->segments.current.sz),
+ bld_unstable_uint(hpp, szp, ma->segments.max.sz),
+ bld_unstable_uint(hpp, szp, ma->segments.max_ever.sz));
+ if (!only_sz) {
+ add_2tup(hpp, szp, &res,
+ am.segments_watermark,
+ bld_unstable_uint(hpp, szp, ma->segments.current.watermark));
+ add_4tup(hpp, szp, &res,
+ am.segments,
+ bld_unstable_uint(hpp, szp, ma->segments.current.no),
+ bld_unstable_uint(hpp, szp, ma->segments.max.no),
+ bld_unstable_uint(hpp, szp, ma->segments.max_ever.no));
+ add_2tup(hpp, szp, &res,
+ am.cache_hits,
+ bld_unstable_uint(hpp, szp, ma->cache_hits));
+ add_2tup(hpp, szp, &res,
+ am.cached_segments,
+ bld_unstable_uint(hpp, szp, ma->cache_size));
+ }
}
if (begin_new_max_period) {
- mk->segments.max.no = mk->segments.current.no;
- mk->segments.max.sz = mk->segments.current.sz;
+ ma->segments.max.no = ma->segments.current.no;
+ ma->segments.max.sz = ma->segments.current.sz;
}
return res;
}
-static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
- int begin_max_per, Uint **hpp, Uint *szp)
+static Eterm info_memkind(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg,
+ int begin_max_per, int only_sz, Uint **hpp, Uint *szp)
{
Eterm res = THE_NON_VALUE;
Eterm atoms[3];
Eterm values[3];
- if (print_to_p) {
- erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", mk->name);
+ if (!only_sz) {
+ if (print_to_p) {
+ erts_print(*print_to_p, print_to_arg, "memory kind: %s\n", "all memory");
+ }
+ if (hpp || szp) {
+ atoms[0] = am.name;
+ atoms[1] = am.status;
+ atoms[2] = am.calls;
+ values[0] = erts_bld_string(hpp, szp, "all memory");
+ }
}
- if (hpp || szp) {
- atoms[0] = am.name;
- atoms[1] = am.status;
- atoms[2] = am.calls;
- values[0] = erts_bld_string(hpp, szp, mk->name);
- }
- values[1] = info_status(ma, mk, print_to_p, print_to_arg, begin_max_per, hpp, szp);
- values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp);
+ res = info_status(ma, print_to_p, print_to_arg, begin_max_per, only_sz, hpp, szp);
+ if (!only_sz) {
+ values[1] = res;
+ values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp);
- if (hpp || szp)
- res = bld_2tup_list(hpp, szp, 3, atoms, values);
+ if (hpp || szp)
+ res = bld_2tup_list(hpp, szp, 3, atoms, values);
+ }
return res;
}
-
static Eterm
info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
{
@@ -1465,14 +1224,8 @@ erts_mseg_info_options(int ix,
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix);
Eterm res;
- ERTS_MSEG_LOCK(ma);
-
- ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-
res = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
- ERTS_MSEG_UNLOCK(ma);
-
return res;
}
@@ -1481,6 +1234,7 @@ erts_mseg_info(int ix,
int *print_to_p,
void *print_to_arg,
int begin_max_per,
+ int only_sz,
Uint **hpp,
Uint *szp)
{
@@ -1490,30 +1244,30 @@ erts_mseg_info(int ix,
Eterm values[4];
Uint n = 0;
- ERTS_MSEG_LOCK(ma);
+ if (hpp || szp) {
+ if (!atoms_initialized)
+ init_atoms(ma);
+ }
+ if (!only_sz) {
+ if (hpp || szp) {
+ atoms[0] = am.version;
+ atoms[1] = am.options;
+ atoms[2] = am.memkind;
+ }
+ values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
+ values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
+ }
+ ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
- if (hpp || szp) {
-
- if (!atoms_initialized)
- init_atoms(ma);
+ res = info_memkind(ma, print_to_p, print_to_arg, begin_max_per, only_sz, hpp, szp);
- atoms[0] = am.version;
- atoms[1] = am.options;
- atoms[2] = am.memkind;
- atoms[3] = am.memkind;
+ if (!only_sz) {
+ values[n++] = res;
+ if (hpp || szp)
+ res = bld_2tup_list(hpp, szp, n, atoms, values);
}
- values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
- values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
-#if HALFWORD_HEAP
- values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
- values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-#else
- values[n++] = info_memkind(ma, &ma->the_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-#endif
- if (hpp || szp)
- res = bld_2tup_list(hpp, szp, n, atoms, values);
ERTS_MSEG_UNLOCK(ma);
@@ -1521,7 +1275,7 @@ erts_mseg_info(int ix,
}
void *
-erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMsegOpt_t *opt)
+erts_mseg_alloc_opt(ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
void *seg;
@@ -1529,20 +1283,23 @@ erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMse
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
seg = mseg_alloc(ma, atype, size_p, flags, opt);
ERTS_MSEG_UNLOCK(ma);
+ HARD_DBG_INSERT_MSEG(seg, *size_p);
return seg;
}
void *
-erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p, Uint flags)
+erts_mseg_alloc(ErtsAlcType_t atype, UWord *size_p, Uint flags)
{
return erts_mseg_alloc_opt(atype, size_p, flags, &erts_mseg_default_opt);
}
void
erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg,
- Uint size, Uint flags, const ErtsMsegOpt_t *opt)
+ UWord size, Uint flags, const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
+
+ HARD_DBG_REMOVE_MSEG(seg, size);
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
mseg_dealloc(ma, atype, seg, size, flags, opt);
@@ -1550,29 +1307,32 @@ erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg,
}
void
-erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size, Uint flags)
+erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, UWord size, Uint flags)
{
erts_mseg_dealloc_opt(atype, seg, size, flags, &erts_mseg_default_opt);
}
void *
erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p,
+ UWord old_size, UWord *new_size_p,
Uint flags,
const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
void *new_seg;
+
+ HARD_DBG_REMOVE_MSEG(seg, old_size);
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
new_seg = mseg_realloc(ma, atype, seg, old_size, new_size_p, flags, opt);
ERTS_MSEG_UNLOCK(ma);
+ HARD_DBG_INSERT_MSEG(new_seg, *new_size_p);
return new_seg;
}
void *
erts_mseg_realloc(ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p, Uint flags)
+ UWord old_size, UWord *new_size_p, Uint flags)
{
return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p,
flags, &erts_mseg_default_opt);
@@ -1582,13 +1342,10 @@ Uint
erts_mseg_no(const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
- MemKind* mk;
- Uint n = 0;
+ Uint n;
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
- for (mk=ma->mk_list; mk; mk=mk->next) {
- n += mk->segments.current.no;
- }
+ n = ma->segments.current.no;
ERTS_MSEG_UNLOCK(ma);
return n;
}
@@ -1600,16 +1357,16 @@ erts_mseg_unit_size(void)
}
-static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name)
+static void mem_cache_init(ErtsMsegAllctr_t *ma)
{
int i;
/* Clear all cache headers */
- mseg_cache_clear_node(&(mk->cache_free));
- mseg_cache_clear_node(&(mk->cache_unpowered_node));
+ mseg_cache_clear_node(&(ma->cache_free));
+ mseg_cache_clear_node(&(ma->cache_unpowered_node));
for (i = 0; i < CACHE_AREAS; i++) {
- mseg_cache_clear_node(&(mk->cache_powered_node[i]));
+ mseg_cache_clear_node(&(ma->cache_powered_node[i]));
}
/* Populate cache free list */
@@ -1617,25 +1374,20 @@ static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name)
ASSERT(ma->max_cache_size <= MAX_CACHE_SIZE);
for (i = 0; i < ma->max_cache_size; i++) {
- mseg_cache_clear_node(&(mk->cache[i]));
- erts_circleq_push_head(&(mk->cache_free), &(mk->cache[i]));
+ mseg_cache_clear_node(&(ma->cache[i]));
+ erts_circleq_push_head(&(ma->cache_free), &(ma->cache[i]));
}
- mk->cache_size = 0;
- mk->cache_hits = 0;
-
- mk->segments.current.watermark = 0;
- mk->segments.current.no = 0;
- mk->segments.current.sz = 0;
- mk->segments.max.no = 0;
- mk->segments.max.sz = 0;
- mk->segments.max_ever.no = 0;
- mk->segments.max_ever.sz = 0;
-
- mk->ma = ma;
- mk->name = name;
- mk->next = ma->mk_list;
- ma->mk_list = mk;
+ ma->cache_size = 0;
+ ma->cache_hits = 0;
+
+ ma->segments.current.watermark = 0;
+ ma->segments.current.no = 0;
+ ma->segments.current.sz = 0;
+ ma->segments.max.no = 0;
+ ma->segments.max.sz = 0;
+ ma->segments.max_ever.no = 0;
+ ma->segments.max_ever.sz = 0;
}
void
@@ -1662,18 +1414,19 @@ erts_mseg_init(ErtsMsegInit_t *init)
erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
-#if HAVE_MMAP && !defined(MAP_ANON)
- mmap_fd = open("/dev/zero", O_RDWR);
- if (mmap_fd < 0)
- erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n");
+#ifdef ERTS_ALC_A_EXEC
+ /* Initialize erts_exec_mapper *FIRST*, to increase probability
+ * of getting low memory for HiPE AMD64's small code model.
+ */
+ erts_mmap_init(&erts_exec_mmapper, &init->exec_mmap, 1);
#endif
-
-#if HAVE_MMAP && HALFWORD_HEAP
- initialize_pmmap();
+ erts_mmap_init(&erts_dflt_mmapper, &init->dflt_mmap, 0);
+#if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
+ erts_mmap_init(&erts_literal_mmapper, &init->literal_mmap, 0);
#endif
if (!IS_2POW(GET_PAGE_SIZE))
- erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE);
+ erts_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE);
ASSERT((MSEG_ALIGNED_SIZE % GET_PAGE_SIZE) == 0);
@@ -1702,33 +1455,17 @@ erts_mseg_init(ErtsMsegInit_t *init)
if (ma->max_cache_size > MAX_CACHE_SIZE)
ma->max_cache_size = MAX_CACHE_SIZE;
- ma->mk_list = NULL;
-
-#if HALFWORD_HEAP
- mem_kind_init(ma, &ma->low_mem, "low memory");
- mem_kind_init(ma, &ma->hi_mem, "high memory");
-#else
- mem_kind_init(ma, &ma->the_mem, "all memory");
-#endif
+ mem_cache_init(ma);
sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls));
-
-#if CAN_PARTLY_DESTROY
- ma->min_seg_size = ~((Uint) 0);
-#endif
}
}
static ERTS_INLINE Uint tot_cache_size(ErtsMsegAllctr_t *ma)
{
- MemKind* mk;
- Uint sz = 0;
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
- for (mk=ma->mk_list; mk; mk=mk->next) {
- sz += mk->cache_size;
- }
- return sz;
+ return ma->cache_size;
}
/*
@@ -1757,7 +1494,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
case 0x400: /* Have erts_mseg */
return (UWord) 1;
case 0x401:
- return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1, (Uint) 0);
+ return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (UWord *) a1, (Uint) 0);
case 0x402:
erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, (Uint) 0);
return (UWord) 0;
@@ -1765,7 +1502,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
return (UWord) erts_mseg_realloc(ERTS_ALC_A_INVALID,
(void *) a1,
(Uint) a2,
- (Uint *) a3,
+ (UWord *) a3,
(Uint) 0);
case 0x404:
erts_mseg_clear_cache();
@@ -1788,405 +1525,3 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
}
}
-
-
-#if HALFWORD_HEAP
-/*
- * Very simple page oriented mmap replacer. Works in the lower
- * 32 bit address range of a 64bit program.
- * Implements anonymous mmap mremap and munmap with address order first fit.
- * The free list is expected to be very short...
- * To be used for compressed pointers in Erlang halfword emulator
- * implementation. The MacOS X version is more of a toy, it's not really
- * for production as the halfword erlang VM relies on Linux specific memory
- * mapping tricks.
- */
-
-/* #define HARDDEBUG 1 */
-
-#ifdef HARDDEBUG
-static void dump_freelist(void)
-{
- FreeBlock *p = first;
-
- while (p) {
- fprintf(stderr, "p = %p\r\np->num = %ld\r\np->next = %p\r\n\r\n",
- (void *) p, (unsigned long) p->num, (void *) p->next);
- p = p->next;
- }
-}
-
-#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) \
- fprintf(stderr,"Mapping of address %p with size %ld " \
- "does not map complete pages (%s:%d)\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-
-#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) \
- fprintf(stderr,"Mapping of address %p with size %ld " \
- "is not page aligned (%s:%d)\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-
-#define HARDDEBUG_MAP_FAILED(PTR, SZ) \
- fprintf(stderr, "Could not actually map memory " \
- "at address %p with size %ld (%s:%d) ..\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-#else
-#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) do{}while(0)
-#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) do{}while(0)
-#define HARDDEBUG_MAP_FAILED(PTR, SZ) do{}while(0)
-#endif
-
-
-#ifdef __APPLE__
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#define INIT_LOCK() do {erts_mtx_init(&pmmap_mutex, "pmmap");} while(0)
-
-#define TAKE_LOCK() do {erts_mtx_lock(&pmmap_mutex);} while(0)
-
-#define RELEASE_LOCK() do {erts_mtx_unlock(&pmmap_mutex);} while(0)
-
-static erts_mtx_t pmmap_mutex; /* Also needed when !USE_THREADS */
-
-typedef struct _free_block {
- unsigned long num; /*pages*/
- struct _free_block *next;
-} FreeBlock;
-
-/* Protect with lock */
-static FreeBlock *first;
-
-static void *do_map(void *ptr, size_t sz)
-{
- void *res;
-
- if (ALIGNED_CEILING(sz) != sz) {
- HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz);
- return NULL;
- }
-
- if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) {
- HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz);
- return NULL;
- }
-
-#if HAVE_MMAP
- res = mmap(ptr, sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE |
- MAP_ANONYMOUS | MAP_FIXED,
- -1 , 0);
-#else
-# error "Missing mmap support"
-#endif
-
- if (res == MAP_FAILED) {
- HARDDEBUG_MAP_FAILED(ptr, sz);
- return NULL;
- }
-
- return res;
-}
-
-static int do_unmap(void *ptr, size_t sz)
-{
- void *res;
-
- if (ALIGNED_CEILING(sz) != sz) {
- HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz);
- return 1;
- }
-
- if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) {
- HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz);
- return 1;
- }
-
- res = mmap(ptr, sz,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED,
- -1 , 0);
-
- if (res == MAP_FAILED) {
- HARDDEBUG_MAP_FAILED(ptr, sz);
- return 1;
- }
-
- return 0;
-}
-
-#ifdef __APPLE__
-/*
- * The first 4 gig's are protected on Macos X for 64bit processes :(
- * The range 0x1000000000 - 0x10FFFFFFFF is selected as an arbitrary
- * value of a normally unused range... Real MMAP's will avoid
- * it and all 32bit compressed pointers can be in that range...
- * More expensive than on Linux where expansion of compressed
- * poiters involves no masking (as they are in the first 4 gig's).
- * It's also very uncertain if the MAP_NORESERVE flag really has
- * any effect in MacOS X. Swap space may always be allocated...
- */
-#define SET_RANGE_MIN() /* nothing */
-#define RANGE_MIN 0x1000000000UL
-#define RANGE_MAX 0x1100000000UL
-#define RANGE_MASK (RANGE_MIN)
-#define EXTRA_MAP_FLAGS (MAP_FIXED)
-#else
-static size_t range_min;
-#define SET_RANGE_MIN() do { range_min = (size_t) sbrk(0); } while (0)
-#define RANGE_MIN range_min
-#define RANGE_MAX 0x100000000UL
-#define RANGE_MASK 0UL
-#define EXTRA_MAP_FLAGS (0)
-#endif
-
-static int initialize_pmmap(void)
-{
- char *p,*q,*rptr;
- size_t rsz;
- FreeBlock *initial;
-
- SET_RANGE_MIN();
- if (sizeof(void *) != 8) {
- erl_exit(1,"Halfword emulator cannot be run in 32bit mode");
- }
-
- p = (char *) RANGE_MIN;
- q = (char *) RANGE_MAX;
-
- rsz = ALIGNED_FLOOR(q - p);
-
- rptr = mmap_align(NULL, (void *) p, rsz,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS |
- MAP_NORESERVE | EXTRA_MAP_FLAGS,
- -1 , 0);
-#ifdef HARDDEBUG
- printf("p=%p, rsz = %ld, pages = %ld, got range = %p -> %p\r\n",
- p, (unsigned long) rsz, (unsigned long) (rsz / MSEG_ALIGNED_SIZE),
- (void *) rptr, (void*)(rptr + rsz));
-#endif
- if ((UWord)(rptr + rsz) > RANGE_MAX) {
- size_t rsz_trunc = RANGE_MAX - (UWord)rptr;
-#ifdef HARDDEBUG
- printf("Reducing mmap'ed memory from %lu to %lu Mb, reduced range = %p -> %p\r\n",
- rsz/(1024*1024), rsz_trunc/(1024*1024), rptr, rptr+rsz_trunc);
-#endif
- munmap((void*)RANGE_MAX, rsz - rsz_trunc);
- rsz = rsz_trunc;
- }
- if (!do_map(rptr, MSEG_ALIGNED_SIZE)) {
- erl_exit(1,"Could not actually mmap first page for halfword emulator...\n");
- }
- initial = (FreeBlock *) rptr;
- initial->num = (rsz / MSEG_ALIGNED_SIZE);
- initial->next = NULL;
- first = initial;
- INIT_LOCK();
- return 0;
-}
-
-static void *pmmap(size_t size)
-{
- size_t real_size = ALIGNED_CEILING(size);
- size_t num_pages = real_size / MSEG_ALIGNED_SIZE;
- FreeBlock **block;
- FreeBlock *tail;
- FreeBlock *res;
-
- TAKE_LOCK();
-
- for (block = &first;
- *block != NULL && (*block)->num < num_pages;
- block = &((*block)->next))
- ;
- if (!(*block)) {
- RELEASE_LOCK();
- return NULL;
- }
- if ((*block)->num == num_pages) {
- /* nice, perfect fit */
- res = *block;
- *block = (*block)->next;
- } else {
- tail = (FreeBlock *) (((char *) ((void *) (*block))) + real_size);
- if (!do_map(tail, MSEG_ALIGNED_SIZE)) {
- HARDDEBUG_MAP_FAILED(tail, MSEG_ALIGNED_SIZE);
- RELEASE_LOCK();
- return NULL;
- }
- tail->num = (*block)->num - num_pages;
- tail->next = (*block)->next;
- res = *block;
- *block = tail;
- }
-
- RELEASE_LOCK();
-
- if (!do_map(res, real_size)) {
- HARDDEBUG_MAP_FAILED(res, real_size);
- return NULL;
- }
-
- return (void *) res;
-}
-
-static int pmunmap(void *p, size_t size)
-{
- size_t real_size = ALIGNED_CEILING(size);
- size_t num_pages = real_size / MSEG_ALIGNED_SIZE;
-
- FreeBlock *block;
- FreeBlock *last;
- FreeBlock *nb = (FreeBlock *) p;
-
- ASSERT(((unsigned long)p & CHECK_POINTER_MASK)==0);
-
- if (real_size > MSEG_ALIGNED_SIZE) {
- if (do_unmap(((char *) p) + MSEG_ALIGNED_SIZE, real_size - MSEG_ALIGNED_SIZE)) {
- return 1;
- }
- }
-
- TAKE_LOCK();
-
- last = NULL;
- block = first;
- while(block != NULL && ((void *) block) < p) {
- last = block;
- block = block->next;
- }
-
- if (block != NULL &&
- ((void *) block) == ((void *) (((char *) p) + real_size))) {
- /* Merge new free block with following */
- nb->num = block->num + num_pages;
- nb->next = block->next;
- if (do_unmap(block, MSEG_ALIGNED_SIZE)) {
- RELEASE_LOCK();
- return 1;
- }
- } else {
- /* just link in */
- nb->num = num_pages;
- nb->next = block;
- }
- if (last != NULL) {
- if (p == ((void *) (((char *) last) + (last->num * MSEG_ALIGNED_SIZE)))) {
- /* Merge with previous */
- last->num += nb->num;
- last->next = nb->next;
- if (do_unmap(nb, MSEG_ALIGNED_SIZE)) {
- RELEASE_LOCK();
- return 1;
- }
- } else {
- last->next = nb;
- }
- } else {
- first = nb;
- }
- RELEASE_LOCK();
- return 0;
-}
-
-static void *pmremap(void *old_address, size_t old_size,
- size_t new_size)
-{
- size_t new_real_size = ALIGNED_CEILING(new_size);
- size_t new_num_pages = new_real_size / MSEG_ALIGNED_SIZE;
- size_t old_real_size = ALIGNED_CEILING(old_size);
- size_t old_num_pages = old_real_size / MSEG_ALIGNED_SIZE;
- if (new_num_pages == old_num_pages) {
- return old_address;
- } else if (new_num_pages < old_num_pages) { /* Shrink */
- size_t nfb_pages = old_num_pages - new_num_pages;
- size_t nfb_real_size = old_real_size - new_real_size;
- void *vnfb = (void *) (((char *)old_address) + new_real_size);
- FreeBlock *nfb = (FreeBlock *) vnfb;
- FreeBlock **block;
- TAKE_LOCK();
- for (block = &first;
- *block != NULL && (*block) < nfb;
- block = &((*block)->next))
- ;
- if (!(*block) ||
- (*block) > ((FreeBlock *)(((char *) vnfb) + nfb_real_size))) {
- /* Normal link in */
- if (nfb_pages > 1) {
- if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE),
- (nfb_pages - 1)*MSEG_ALIGNED_SIZE)) {
- return NULL;
- }
- }
- nfb->next = (*block);
- nfb->num = nfb_pages;
- (*block) = nfb;
- } else { /* block merge */
- nfb->next = (*block)->next;
- nfb->num = nfb_pages + (*block)->num;
- /* unmap also the first page of the next freeblock */
- (*block) = nfb;
- if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE),
- nfb_pages*MSEG_ALIGNED_SIZE)) {
- return NULL;
- }
- }
- RELEASE_LOCK();
- return old_address;
- } else { /* Enlarge */
- FreeBlock **block;
- void *old_end = (void *) (((char *)old_address) + old_real_size);
- TAKE_LOCK();
- for (block = &first;
- *block != NULL && (*block) < (FreeBlock *) old_address;
- block = &((*block)->next))
- ;
- if ((*block) == NULL || old_end > ((void *) RANGE_MAX) ||
- (*block) != old_end ||
- (*block)->num < (new_num_pages - old_num_pages)) {
- /* cannot extend */
- void *result;
- RELEASE_LOCK();
- result = pmmap(new_size);
- if (result == NULL) {
- return NULL;
- }
- memcpy(result,old_address,old_size);
- if (pmunmap(old_address,old_size)) {
- /* Oups... */
- pmunmap(result,new_size);
- return NULL;
- }
- return result;
- } else { /* extend */
- size_t remaining_pages = (*block)->num -
- (new_num_pages - old_num_pages);
- if (!remaining_pages) {
- void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE);
- void *n = (*block)->next;
- size_t x = ((*block)->num - 1) * MSEG_ALIGNED_SIZE;
- if (x > 0) {
- if (do_map(p,x) == NULL) {
- RELEASE_LOCK();
- return NULL;
- }
- }
- (*block) = n;
- } else {
- FreeBlock *nfb = (FreeBlock *) ((void *)
- (((char *) old_address) +
- new_real_size));
- void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE);
- if (do_map(p,new_real_size - old_real_size) == NULL) {
- RELEASE_LOCK();
- return NULL;
- }
- nfb->num = remaining_pages;
- nfb->next = (*block)->next;
- (*block) = nfb;
- }
- RELEASE_LOCK();
- return old_address;
- }
- }
-}
-#endif /* HALFWORD_HEAP */
diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h
index a4f250ceab..a43b409e94 100644
--- a/erts/emulator/sys/common/erl_mseg.h
+++ b/erts/emulator/sys/common/erl_mseg.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -22,35 +23,25 @@
#include "sys.h"
#include "erl_alloc_types.h"
+#include "erl_mmap.h"
-#ifndef HAVE_MMAP
-# define HAVE_MMAP 0
-#endif
-#ifndef HAVE_MREMAP
-# define HAVE_MREMAP 0
-#endif
-
-#if HAVE_MMAP
+/*
+ * We currently only enable mseg_alloc if we got
+ * a genuine mmap()/munmap() primitive. It is possible
+ * to utilize erts_mmap() withiout a mmap support but
+ * alloc_util needs to be prepared before we can do
+ * that.
+ */
+#ifdef ERTS_HAVE_GENUINE_OS_MMAP
# define HAVE_ERTS_MSEG 1
-# define HAVE_SUPER_ALIGNED_MB_CARRIERS 1
+# define ERTS_HAVE_MSEG_SUPER_ALIGNED 1
#else
# define HAVE_ERTS_MSEG 0
-# define HAVE_SUPER_ALIGNED_MB_CARRIERS 0
+# define ERTS_HAVE_MSEG_SUPER_ALIGNED 0
#endif
-#if HAVE_SUPER_ALIGNED_MB_CARRIERS
-# define MSEG_ALIGN_BITS (18)
- /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
-#else
-/* If we don't use super aligned multiblock carriers
- * we will mmap with page size alignment (and thus use corresponding
- * align bits).
- *
- * Current implementation needs this to be a constant and
- * only uses this for user dev testing so setting page size
- * to 4096 (12 bits) is fine.
- */
-# define MSEG_ALIGN_BITS (12)
+#if ERTS_HAVE_MSEG_SUPER_ALIGNED
+# define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS
#endif
#if HAVE_ERTS_MSEG
@@ -68,6 +59,9 @@ typedef struct {
Uint rmcbf;
Uint mcs;
Uint nos;
+ ErtsMMapInit dflt_mmap;
+ ErtsMMapInit literal_mmap;
+ ErtsMMapInit exec_mmap;
} ErtsMsegInit_t;
#define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \
@@ -75,7 +69,10 @@ typedef struct {
4*1024*1024, /* amcbf: Absolute max cache bad fit */ \
20, /* rmcbf: Relative max cache bad fit */ \
10, /* mcs: Max cache size */ \
- 1000 /* cci: Cache check interval */ \
+ 1000, /* cci: Cache check interval */ \
+ ERTS_MMAP_INIT_DEFAULT_INITER, \
+ ERTS_MMAP_INIT_LITERAL_INITER, \
+ ERTS_MMAP_INIT_HIPE_EXEC_INITER \
}
typedef struct {
@@ -84,19 +81,16 @@ typedef struct {
UWord abs_shrink_th;
UWord rel_shrink_th;
int sched_spec;
-#if HALFWORD_HEAP
- int low_mem;
-#endif
} ErtsMsegOpt_t;
extern const ErtsMsegOpt_t erts_mseg_default_opt;
-void *erts_mseg_alloc(ErtsAlcType_t, Uint *, Uint);
-void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, Uint, const ErtsMsegOpt_t *);
-void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint, Uint);
-void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, Uint, const ErtsMsegOpt_t *);
-void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *, Uint);
-void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *, Uint, const ErtsMsegOpt_t *);
+void *erts_mseg_alloc(ErtsAlcType_t, UWord *, Uint);
+void *erts_mseg_alloc_opt(ErtsAlcType_t, UWord *, Uint, const ErtsMsegOpt_t *);
+void erts_mseg_dealloc(ErtsAlcType_t, void *, UWord, Uint);
+void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, UWord, Uint, const ErtsMsegOpt_t *);
+void *erts_mseg_realloc(ErtsAlcType_t, void *, UWord, UWord *, Uint);
+void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, UWord, UWord *, Uint, const ErtsMsegOpt_t *);
void erts_mseg_clear_cache(void);
void erts_mseg_cache_check(void);
Uint erts_mseg_no( const ErtsMsegOpt_t *);
@@ -105,7 +99,7 @@ void erts_mseg_init(ErtsMsegInit_t *init);
void erts_mseg_late_init(void); /* Have to be called after all allocators,
threads and timers have been initialized. */
Eterm erts_mseg_info_options(int, int *, void*, Uint **, Uint *);
-Eterm erts_mseg_info(int, int *, void*, int, Uint **, Uint *);
+Eterm erts_mseg_info(int, int *, void*, int, int, Uint **, Uint *);
#endif /* #if HAVE_ERTS_MSEG */
diff --git a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
index 408aa7e016..fc871f94f1 100644
--- a/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
+++ b/erts/emulator/sys/common/erl_mtrace_sys_wrap.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2004-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2004-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.c b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c
new file mode 100644
index 0000000000..d53190fdd5
--- /dev/null
+++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.c
@@ -0,0 +1,89 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2015-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include "erl_os_monotonic_time_extender.h"
+
+#ifdef USE_THREADS
+
+static void *os_monotonic_time_extender(void *vstatep)
+{
+ ErtsOsMonotonicTimeExtendState *state = (ErtsOsMonotonicTimeExtendState *) vstatep;
+ long sleep_time = state->check_interval*1000;
+ Uint32 (*raw_os_mtime)(void) = state->raw_os_monotonic_time;
+ Uint32 last_msb = 0;
+
+ while (1) {
+ Uint32 msb = (*raw_os_mtime)() & (((Uint32) 1) << 31);
+
+ if (msb != last_msb) {
+ int ix = ((int) (last_msb >> 31)) & 1;
+ Uint32 xtnd = (Uint32) erts_atomic32_read_nob(&state->extend[ix]);
+ erts_atomic32_set_nob(&state->extend[ix], (erts_aint32_t) (xtnd + 1));
+ last_msb = msb;
+ }
+ erts_milli_sleep(sleep_time);
+ }
+
+ erts_exit(ERTS_ABORT_EXIT, "os_monotonic_time_extender thread terminating");
+ return NULL;
+}
+
+static erts_tid_t os_monotonic_extender_tid;
+#endif
+
+void
+erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep,
+ Uint32 (*raw_os_monotonic_time)(void),
+ int check_seconds)
+{
+#ifdef USE_THREADS
+ statep->raw_os_monotonic_time = raw_os_monotonic_time;
+ erts_atomic32_init_nob(&statep->extend[0], (erts_aint32_t) 0);
+ erts_atomic32_init_nob(&statep->extend[1], (erts_aint32_t) 0);
+ statep->check_interval = check_seconds;
+
+#else
+ statep->extend[0] = (Uint32) 0;
+ statep->extend[1] = (Uint32) 0;
+ statep->last_msb = (ErtsMonotonicTime) 0;
+#endif
+}
+
+void
+erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep)
+{
+#ifdef USE_THREADS
+ erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER;
+ thr_opts.detached = 1;
+ thr_opts.suggested_stack_size = 4;
+
+#if 0
+ thr_opts.name = "os_monotonic_time_extender";
+#endif
+
+ erts_thr_create(&os_monotonic_extender_tid,
+ os_monotonic_time_extender,
+ (void*) statep,
+ &thr_opts);
+#endif
+}
diff --git a/erts/emulator/sys/common/erl_os_monotonic_time_extender.h b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h
new file mode 100644
index 0000000000..8089c9aed9
--- /dev/null
+++ b/erts/emulator/sys/common/erl_os_monotonic_time_extender.h
@@ -0,0 +1,66 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2015. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_OS_MONOTONIC_TIME_EXTENDER_H__
+#define ERL_OS_MONOTONIC_TIME_EXTENDER_H__
+
+#include "sys.h"
+#include "erl_threads.h"
+
+typedef struct {
+#ifdef USE_THREADS
+ Uint32 (*raw_os_monotonic_time)(void);
+ erts_atomic32_t extend[2];
+ int check_interval;
+#else
+ Uint32 extend[2];
+ ErtsMonotonicTime last_msb;
+#endif
+} ErtsOsMonotonicTimeExtendState;
+
+#ifdef USE_THREADS
+# define ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(S, RT) ((void) 1)
+# define ERTS_EXTEND_OS_MONOTONIC_TIME(S, RT) \
+ ((((ErtsMonotonicTime) \
+ erts_atomic32_read_nob(&((S)->extend[((int) ((RT) >> 31)) & 1]))) \
+ << 32) \
+ + (RT))
+#else
+# define ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(S, RT) \
+ do { \
+ Uint32 msb__ = (RT) & (((Uint32) 1) << 31); \
+ if (msb__ != (S)->last_msb) { \
+ int ix__ = ((int) ((S)->last_msb >> 31)) & 1; \
+ (S)->extend[ix__]++; \
+ (S)->last_msb = msb; \
+ } \
+ } while (0)
+# define ERTS_EXTEND_OS_MONOTONIC_TIME(S, RT) \
+ ((((ErtsMonotonicTime) (S)->extend[((int) ((RT) >> 31)) & 1]) << 32) + (RT))
+#endif
+
+void
+erts_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep,
+ Uint32 (*raw_os_monotonic_time)(void),
+ int check_seconds);
+void
+erts_late_init_os_monotonic_time_extender(ErtsOsMonotonicTimeExtendState *statep);
+
+#endif
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 5861b30315..e394d84f73 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -73,6 +74,7 @@
#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
+#include "erl_msacc.h"
#if !defined(ERTS_POLL_USE_EPOLL) \
&& !defined(ERTS_POLL_USE_DEVPOLL) \
@@ -123,8 +125,8 @@ static ERTS_INLINE
int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
ERTS_fd_set *exceptfds, struct timeval *timeout)
{
- ASSERT(!readfds || readfds->sz >= nfds);
- ASSERT(!writefds || writefds->sz >= nfds);
+ ASSERT(!readfds || readfds->sz >= ERTS_FD_SIZE(nfds));
+ ASSERT(!writefds || writefds->sz >= ERTS_FD_SIZE(nfds));
ASSERT(!exceptfds);
return select(nfds,
(readfds ? readfds->ptr : NULL ),
@@ -153,9 +155,6 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
#define ERTS_POLL_COALESCE_KP_RES (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
-#define ERTS_EV_TABLE_MIN_LENGTH 1024
-#define ERTS_EV_TABLE_EXP_THRESHOLD (2048*1024)
-
#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
# define ERTS_POLL_ASYNC_INTERRUPT_SUPPORT 1
#else
@@ -314,13 +313,16 @@ struct ErtsPollSet_ {
#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fds[2];
#endif
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd;
+#endif
#if ERTS_POLL_USE_FALLBACK
int fallback_used;
#endif
#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
erts_atomic32_t wakeup_state;
#endif
- erts_smp_atomic32_t timeout;
+ erts_atomic64_t timeout_time;
#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
erts_smp_atomic_t no_avoided_wakeups;
erts_smp_atomic_t no_avoided_interrupts;
@@ -384,6 +386,26 @@ static void check_poll_status(ErtsPollSet ps);
static void print_misc_debug_info(void);
#endif
+static ERTS_INLINE void
+init_timeout_time(ErtsPollSet ps)
+{
+ erts_atomic64_init_nob(&ps->timeout_time,
+ (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX);
+}
+
+static ERTS_INLINE void
+set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time)
+{
+ erts_atomic64_set_relb(&ps->timeout_time,
+ (erts_aint64_t) time);
+}
+
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout_time(ErtsPollSet ps)
+{
+ return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time);
+}
+
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
@@ -410,7 +432,7 @@ static ERTS_INLINE int
is_interrupted_reset(ErtsPollSet ps)
{
#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- return (erts_atomic32_xchg_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
+ return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
#else
return 0;
@@ -421,7 +443,7 @@ static ERTS_INLINE void
woke_up(ErtsPollSet ps)
{
#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
- erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
+ erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
@@ -448,14 +470,9 @@ wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe)
wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state,
ERTS_POLL_WOKEN,
ERTS_POLL_NOT_WOKEN);
- else {
- /*
- * We might unnecessarily write to the pipe, however,
- * that isn't problematic.
- */
- wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
- erts_atomic32_set_relb(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
- }
+ else
+ wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state,
+ ERTS_POLL_WOKEN_INTR);
wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
}
/*
@@ -560,6 +577,75 @@ create_wakeup_pipe(ErtsPollSet ps)
#endif /* ERTS_POLL_USE_WAKEUP_PIPE */
/*
+ * --- timer fd -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_TIMERFD
+
+/* We use the timerfd when using epoll_wait to get high accuracy
+ timeouts, i.e. we want to sleep with < ms accuracy. */
+
+static void
+create_timerfd(ErtsPollSet ps)
+{
+ int do_wake = 0;
+ int timer_fd;
+ timer_fd = timerfd_create(CLOCK_MONOTONIC,0);
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ timer_fd,
+ ERTS_POLL_EV_IN,
+ 1, &do_wake);
+#if ERTS_POLL_USE_FALLBACK
+ /* We depend on the wakeup pipe being handled by kernel poll */
+ if (ps->fds_status[timer_fd].flags & ERTS_POLL_FD_FLG_INFLBCK)
+ fatal_error("%s:%d:create_wakeup_pipe(): Internal error\n",
+ __FILE__, __LINE__);
+#endif
+ if (ps->internal_fd_limit <= timer_fd)
+ ps->internal_fd_limit = timer_fd + 1;
+ ps->timer_fd = timer_fd;
+}
+
+static ERTS_INLINE void
+timerfd_set(ErtsPollSet ps, struct itimerspec *its)
+{
+#ifdef DEBUG
+ struct itimerspec old_its;
+ int res;
+ res = timerfd_settime(ps->timer_fd, 0, its, &old_its);
+ ASSERT(res == 0);
+ ASSERT(old_its.it_interval.tv_sec == 0 &&
+ old_its.it_interval.tv_nsec == 0 &&
+ old_its.it_value.tv_sec == 0 &&
+ old_its.it_value.tv_nsec == 0);
+
+#else
+ timerfd_settime(ps->timer_fd, 0, its, NULL);
+#endif
+}
+
+static ERTS_INLINE int
+timerfd_clear(ErtsPollSet ps, int res, int max_res) {
+
+ struct itimerspec its;
+ /* we always have to clear the timer */
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 0;
+ timerfd_settime(ps->timer_fd, 0, &its, NULL);
+
+ /* only timeout fd triggered */
+ if (res == 1 && ps->res_events[0].data.fd == ps->timer_fd)
+ return 0;
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_TIMERFD */
+
+
+/*
* --- Poll set update requests ----------------------------------------------
*/
#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
@@ -611,27 +697,33 @@ free_update_requests_block(ErtsPollSet ps,
* --- Growing poll set structures -------------------------------------------
*/
-int
-ERTS_POLL_EXPORT(erts_poll_get_table_len) (int new_len)
+#ifndef ERTS_KERNEL_POLL_VERSION /* only one shared implementation */
+
+#define ERTS_FD_TABLE_MIN_LENGTH 1024
+#define ERTS_FD_TABLE_EXP_THRESHOLD (2048*1024)
+
+int erts_poll_new_table_len (int old_len, int need_len)
{
- if (new_len < ERTS_EV_TABLE_MIN_LENGTH) {
- new_len = ERTS_EV_TABLE_MIN_LENGTH;
- } else if (new_len < ERTS_EV_TABLE_EXP_THRESHOLD) {
- /* find next power of 2 */
- --new_len;
- new_len |= new_len >> 1;
- new_len |= new_len >> 2;
- new_len |= new_len >> 4;
- new_len |= new_len >> 8;
- new_len |= new_len >> 16;
- ++new_len;
- } else {
- /* grow incrementally */
- new_len += ERTS_EV_TABLE_EXP_THRESHOLD;
+ int new_len;
+
+ ASSERT(need_len > old_len);
+ if (need_len < ERTS_FD_TABLE_MIN_LENGTH) {
+ new_len = ERTS_FD_TABLE_MIN_LENGTH;
+ }
+ else {
+ new_len = old_len;
+ do {
+ if (new_len < ERTS_FD_TABLE_EXP_THRESHOLD)
+ new_len *= 2;
+ else
+ new_len += ERTS_FD_TABLE_EXP_THRESHOLD;
+
+ } while (new_len < need_len);
}
+ ASSERT(new_len >= need_len);
return new_len;
}
-
+#endif
#if ERTS_POLL_USE_KERNEL_POLL
static void
@@ -645,7 +737,7 @@ grow_res_events(ErtsPollSet ps, int new_len)
#elif ERTS_POLL_USE_KQUEUE
struct kevent
#endif
- ) * ERTS_POLL_EXPORT(erts_poll_get_table_len)(new_len);
+ ) * erts_poll_new_table_len(ps->res_events_len, new_len);
/* We do not need to save previously stored data */
if (ps->res_events)
erts_free(ERTS_ALC_T_POLL_RES_EVS, ps->res_events);
@@ -659,7 +751,7 @@ static void
grow_poll_fds(ErtsPollSet ps, int min_ix)
{
int i;
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_ix + 1);
+ int new_len = erts_poll_new_table_len(ps->poll_fds_len, min_ix + 1);
if (new_len > max_fds)
new_len = max_fds;
ps->poll_fds = (ps->poll_fds_len
@@ -681,7 +773,7 @@ grow_poll_fds(ErtsPollSet ps, int min_ix)
static void
grow_select_fds(int fd, ERTS_fd_set* fds)
{
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(fd + 1);
+ int new_len = erts_poll_new_table_len(fds->sz, fd + 1);
if (new_len > max_fds)
new_len = max_fds;
new_len = ERTS_FD_SIZE(new_len);
@@ -708,7 +800,7 @@ static void
grow_fds_status(ErtsPollSet ps, int min_fd)
{
int i;
- int new_len = ERTS_POLL_EXPORT(erts_poll_get_table_len)(min_fd + 1);
+ int new_len = erts_poll_new_table_len(ps->fds_status_len, min_fd + 1);
ASSERT(min_fd < max_fds);
if (new_len > max_fds)
new_len = max_fds;
@@ -1497,6 +1589,12 @@ poll_control(ErtsPollSet ps, int fd, ErtsPollEvents events, int on, int *do_wake
goto done;
}
#endif
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
+#endif
}
if (fd >= ps->fds_status_len)
@@ -1644,6 +1742,9 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res)
#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
#endif
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd = ps->timer_fd;
+#endif
for (i = 0; i < n; i++) {
@@ -1659,6 +1760,11 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res)
continue;
}
#endif
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == timer_fd) {
+ continue;
+ }
+#endif
ASSERT(!(ps->fds_status[fd].flags & ERTS_POLL_FD_FLG_INFLBCK));
/* epoll_wait() can repeat the same fd in result array... */
ix = (int) ps->fds_status[fd].res_ev_ix;
@@ -1733,6 +1839,11 @@ save_kp_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res, int chk_fds_res)
continue;
}
#endif
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == timer_fd) {
+ continue;
+ }
+#endif
revents = ERTS_POLL_EV_N2E(ps->res_events[i].events);
pr[res].fd = fd;
pr[res].events = revents;
@@ -1993,44 +2104,192 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
}
}
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout(ErtsPollSet ps,
+ int resolution,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout, save_timeout_time;
+
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ save_timeout_time = ERTS_MONOTONIC_TIME_MIN;
+ timeout = 0;
+ }
+ else {
+ ErtsMonotonicTime diff_time, current_time;
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0) {
+ save_timeout_time = ERTS_MONOTONIC_TIME_MIN;
+ timeout = 0;
+ }
+ else {
+ save_timeout_time = current_time;
+ switch (resolution) {
+ case 1000:
+ /* Round up to nearest even milli second */
+ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1;
+ if (timeout > (ErtsMonotonicTime) INT_MAX)
+ timeout = (ErtsMonotonicTime) INT_MAX;
+ save_timeout_time += ERTS_MSEC_TO_MONOTONIC(timeout);
+ break;
+ case 1000000:
+ /* Round up to nearest even micro second */
+ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1;
+ save_timeout_time += ERTS_USEC_TO_MONOTONIC(timeout);
+ break;
+ case 1000000000:
+ /* Round up to nearest even nano second */
+ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1;
+ save_timeout_time += ERTS_NSEC_TO_MONOTONIC(timeout);
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid resolution");
+ timeout = 0;
+ save_timeout_time = 0;
+ break;
+ }
+ }
+ }
+ set_timeout_time(ps, save_timeout_time);
+ return timeout;
+}
+
+#if ERTS_POLL_USE_SELECT
+
+static ERTS_INLINE int
+get_timeout_timeval(ErtsPollSet ps,
+ SysTimeval *tvp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
+
+ return 0;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000);
+ tvp->tv_sec = sec;
+ tvp->tv_usec = timeout - sec*(1000*1000);
+
+ ASSERT(tvp->tv_sec >= 0);
+ ASSERT(tvp->tv_usec >= 0);
+ ASSERT(tvp->tv_usec < 1000*1000);
+
+ return !0;
+ }
+
+}
+
+#endif
+
+#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_timespec(ErtsPollSet ps,
+ struct timespec *tsp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return 0;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000*1000);
+ tsp->tv_sec = sec;
+ tsp->tv_nsec = timeout - sec*(1000*1000*1000);
+
+ ASSERT(tsp->tv_sec >= 0);
+ ASSERT(tsp->tv_nsec >= 0);
+ ASSERT(tsp->tv_nsec < 1000*1000*1000);
+
+ return !0;
+ }
+}
+
+#endif
+
+#if ERTS_POLL_USE_TIMERFD
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
+get_timeout_itimerspec(ErtsPollSet ps,
+ struct itimerspec *itsp,
+ ErtsMonotonicTime timeout_time)
+{
+
+ itsp->it_interval.tv_sec = 0;
+ itsp->it_interval.tv_nsec = 0;
+
+ return get_timeout_timespec(ps, &itsp->it_value, timeout_time);
+}
+
+#endif
+
+static ERTS_INLINE int
+check_fd_events(ErtsPollSet ps, ErtsMonotonicTime timeout_time, int max_res)
{
int res;
+ ERTS_MSACC_PUSH_STATE_M();
if (erts_smp_atomic_read_nob(&ps->no_of_user_fds) == 0
- && tv->tv_usec == 0 && tv->tv_sec == 0) {
+ && timeout_time == ERTS_POLL_NO_TIMEOUT) {
/* Nothing to poll and zero timeout; done... */
return 0;
}
else {
- long timeout = tv->tv_sec*1000 + tv->tv_usec/1000;
- if (timeout > ERTS_AINT32_T_MAX)
- timeout = ERTS_AINT32_T_MAX;
- ASSERT(timeout >= 0);
- erts_smp_atomic32_set_relb(&ps->timeout, (erts_aint32_t) timeout);
+ int timeout;
#if ERTS_POLL_USE_FALLBACK
if (!(ps->fallback_used = ERTS_POLL_NEED_FALLBACK(ps))) {
#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
- if (timeout > INT_MAX)
- timeout = INT_MAX;
if (max_res > ps->res_events_len)
grow_res_events(ps, max_res);
+#if ERTS_POLL_USE_TIMERFD
+ {
+ struct itimerspec its;
+ timeout = get_timeout_itimerspec(ps, &its, timeout_time);
+ if (timeout) {
+#ifdef ERTS_SMP
+ erts_thr_progress_prepare_wait(NULL);
+#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ timerfd_set(ps, &its);
+ res = epoll_wait(ps->kp_fd, ps->res_events, max_res, -1);
+ res = timerfd_clear(ps, res, max_res);
+ } else {
+ res = epoll_wait(ps->kp_fd, ps->res_events, max_res, 0);
+ }
+ }
+#else /* !ERTS_POLL_USE_TIMERFD */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
- res = epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
+ res = epoll_wait(ps->kp_fd, ps->res_events, max_res, timeout);
+#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
struct timespec ts;
if (max_res > ps->res_events_len)
grow_res_events(ps, max_res);
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
- ts.tv_sec = tv->tv_sec;
- ts.tv_nsec = tv->tv_usec*1000;
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
#endif /* ----------------------------------------- */
}
@@ -2049,44 +2308,62 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
#if ERTS_POLL_USE_WAKEUP_PIPE
nfds++; /* Wakeup pipe */
#endif
- if (timeout > INT_MAX)
- timeout = INT_MAX;
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
if (poll_res.dp_nfds > ps->res_events_len)
grow_res_events(ps, poll_res.dp_nfds);
poll_res.dp_fds = ps->res_events;
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
- poll_res.dp_timeout = (int) timeout;
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
+ poll_res.dp_timeout = timeout;
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-#elif ERTS_POLL_USE_POLL /* --- poll -------------------------------- */
- if (timeout > INT_MAX)
- timeout = INT_MAX;
+#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
+ struct timespec ts;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_prepare_wait(NULL);
#endif
- res = poll(ps->poll_fds, ps->no_poll_fds, (int) timeout);
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
+ res = ppoll(ps->poll_fds, ps->no_poll_fds, &ts, NULL);
+#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
+
+ if (timeout) {
+#ifdef ERTS_SMP
+ erts_thr_progress_prepare_wait(NULL);
+#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
+ res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval to = *tv;
+ SysTimeval to;
+ timeout = get_timeout_timeval(ps, &to, timeout_time);
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
-
+
+ if (timeout) {
#ifdef ERTS_SMP
- if (to.tv_sec || to.tv_usec)
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
+ }
res = ERTS_SELECT(ps->max_fd + 1,
- &ps->res_input_fds,
- &ps->res_output_fds,
- NULL,
- &to);
+ &ps->res_input_fds,
+ &ps->res_output_fds,
+ NULL,
+ &to);
#ifdef ERTS_SMP
- if (to.tv_sec || to.tv_usec)
+ if (timeout) {
erts_thr_progress_finalize_wait(NULL);
+ ERTS_MSACC_POP_STATE_M();
+ }
if (res < 0
&& errno == EBADF
&& ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
@@ -2108,10 +2385,10 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
handle_update_requests(ps);
ERTS_POLLSET_UNLOCK(ps);
res = ERTS_SELECT(ps->max_fd + 1,
- &ps->res_input_fds,
- &ps->res_output_fds,
- NULL,
- &to);
+ &ps->res_input_fds,
+ &ps->res_output_fds,
+ NULL,
+ &to);
if (res == 0) {
errno = EAGAIN;
res = -1;
@@ -2121,10 +2398,12 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
return res;
#endif /* ----------------------------------------- */
}
+ if (timeout) {
#ifdef ERTS_SMP
- if (timeout)
erts_thr_progress_finalize_wait(NULL);
#endif
+ ERTS_MSACC_POP_STATE_M();
+ }
return res;
}
}
@@ -2133,15 +2412,14 @@ int
ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
ErtsPollResFd pr[],
int *len,
- SysTimeval *utvp)
+ ErtsMonotonicTime timeout_time)
{
+ ErtsMonotonicTime to;
int res, no_fds;
int ebadf = 0;
#ifdef ERTS_SMP
int ps_locked = 0;
#endif
- SysTimeval *tvp;
- SysTimeval itv;
no_fds = *len;
#ifdef ERTS_POLL_MAX_RES
@@ -2151,13 +2429,9 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
*len = 0;
- ASSERT(utvp);
-
- tvp = utvp;
-
#ifdef ERTS_POLL_DEBUG_PRINT
- erts_printf("Entering erts_poll_wait(), timeout=%d\n",
- (int) tv->tv_sec*1000 + tv->tv_usec/1000);
+ erts_printf("Entering erts_poll_wait(), timeout_time=%bps\n",
+ timeout_time);
#endif
if (ERTS_POLLSET_SET_POLLED_CHK(ps)) {
@@ -2166,12 +2440,9 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
goto done;
}
- if (is_woken(ps)) {
- /* Use zero timeout */
- itv.tv_sec = 0;
- itv.tv_usec = 0;
- tvp = &itv;
- }
+ to = (is_woken(ps)
+ ? ERTS_POLL_NO_TIMEOUT /* Use zero timeout */
+ : timeout_time);
#if ERTS_POLL_USE_UPDATE_REQUESTS_QUEUE
if (ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
@@ -2181,7 +2452,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
}
#endif
- res = check_fd_events(ps, tvp, no_fds);
+ res = check_fd_events(ps, to, no_fds);
woke_up(ps);
@@ -2224,7 +2495,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
#endif
done:
- erts_smp_atomic32_set_relb(&ps->timeout, ERTS_AINT32_T_MAX);
+ set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX);
#ifdef ERTS_POLL_DEBUG_PRINT
erts_printf("Leaving %s = erts_poll_wait()\n",
res == 0 ? "0" : erl_errno_id(res));
@@ -2268,13 +2539,14 @@ ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet ps)
void
ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps,
int set,
- erts_short_time_t msec)
+ ErtsMonotonicTime timeout_time)
{
#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)
if (!set)
reset_wakeup_state(ps);
else {
- if (erts_smp_atomic32_read_acqb(&ps->timeout) > (erts_aint32_t) msec)
+ ErtsMonotonicTime max_wait_time = get_timeout_time(ps);
+ if (max_wait_time > timeout_time)
wake_poller(ps, 1, 0);
#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
else {
@@ -2414,6 +2686,9 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
#if ERTS_POLL_USE_WAKEUP_PIPE
create_wakeup_pipe(ps);
#endif
+#if ERTS_POLL_USE_TIMERFD
+ create_timerfd(ps);
+#endif
#if ERTS_POLL_USE_FALLBACK
if (kp_fd >= ps->fds_status_len)
grow_fds_status(ps, kp_fd);
@@ -2431,7 +2706,7 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
#endif
- erts_smp_atomic32_init_nob(&ps->timeout, ERTS_AINT32_T_MAX);
+ init_timeout_time(ps);
#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
erts_smp_atomic_init_nob(&ps->no_avoided_wakeups, 0);
erts_smp_atomic_init_nob(&ps->no_avoided_interrupts, 0);
@@ -2504,13 +2779,18 @@ ERTS_POLL_EXPORT(erts_poll_destroy_pollset)(ErtsPollSet ps)
if (ps->wake_fds[1] >= 0)
close(ps->wake_fds[1]);
#endif
+#if ERTS_POLL_USE_TIMERFD
+ if (ps->timer_fd >= 0)
+ close(ps->timer_fd);
+#endif
erts_smp_spin_lock(&pollsets_lock);
if (ps == pollsets)
pollsets = pollsets->next;
else {
ErtsPollSet prev_ps;
- for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next);
+ for (prev_ps = pollsets; ps != prev_ps->next; prev_ps = prev_ps->next)
+ ;
ASSERT(ps == prev_ps->next);
prev_ps->next = ps->next;
}
@@ -2607,6 +2887,9 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet ps, ErtsPollInfo *pip)
#if ERTS_POLL_USE_WAKEUP_PIPE
pip->poll_set_size++; /* Wakeup pipe */
#endif
+#if ERTS_POLL_USE_TIMERFD
+ pip->poll_set_size++; /* timerfd */
+#endif
pip->fallback_poll_set_size =
#if !ERTS_POLL_USE_FALLBACK
@@ -2735,14 +3018,18 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet ps,
ev[fd] = 0;
else {
ev[fd] = ps->fds_status[fd].events;
+ if (
#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
- ev[fd] |= ERTS_POLL_EV_NVAL;
+ fd == ps->wake_fds[0] || fd == ps->wake_fds[1] ||
+#endif
+#if ERTS_POLL_USE_TIMERFD
+ fd == ps->timer_fd ||
#endif
#if ERTS_POLL_USE_KERNEL_POLL
- if (fd == ps->kp_fd)
- ev[fd] |= ERTS_POLL_EV_NVAL;
+ fd == ps->kp_fd ||
#endif
+ 0)
+ ev[fd] |= ERTS_POLL_EV_NVAL;
}
}
ERTS_POLLSET_UNLOCK(ps);
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index 09ed9f41af..c16122610d 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -29,6 +30,8 @@
#include "sys.h"
+#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
+
#if 0
#define ERTS_POLL_COUNT_AVOIDED_WAKEUPS
#endif
@@ -96,10 +99,12 @@
# endif
#endif
+#define ERTS_POLL_USE_TIMERFD 0
+
typedef Uint32 ErtsPollEvents;
#undef ERTS_POLL_EV_E2N
-#if defined(__WIN32__) /* --- win32 ------------------------------- */
+#if defined(__WIN32__) /* --- win32 --------------------------------------- */
#define ERTS_POLL_EV_IN 1
#define ERTS_POLL_EV_OUT 2
@@ -110,8 +115,14 @@ typedef Uint32 ErtsPollEvents;
#include <sys/epoll.h>
+#ifdef HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#undef ERTS_POLL_USE_TIMERFD
+#define ERTS_POLL_USE_TIMERFD 1
+#endif
+
#define ERTS_POLL_EV_E2N(EV) \
- ((__uint32_t) (EV))
+ ((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
((ErtsPollEvents) (EV))
@@ -223,19 +234,20 @@ void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet,
int);
void ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet,
int,
- erts_short_time_t);
+ ErtsMonotonicTime);
ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet,
ErtsSysFdType,
ErtsPollEvents,
int on,
- int* wake_poller);
+ int* wake_poller
+ );
void ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet,
ErtsPollControlEntry [],
int on);
int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet,
ErtsPollResFd [],
int *,
- SysTimeval *);
+ ErtsMonotonicTime);
int ERTS_POLL_EXPORT(erts_poll_max_fds)(void);
void ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet,
ErtsPollInfo *);
@@ -246,6 +258,6 @@ void ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet,
ErtsPollEvents [],
int);
-int ERTS_POLL_EXPORT(erts_poll_get_table_len)(int);
+int erts_poll_new_table_len(int old_len, int need_len);
#endif /* #ifndef ERL_POLL_H__ */
diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c
index 31ad3b82d5..79f87eb3a9 100644
--- a/erts/emulator/sys/common/erl_sys_common_misc.c
+++ b/erts/emulator/sys/common/erl_sys_common_misc.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -44,6 +45,14 @@
#endif
#endif
+/*
+ * erts_check_io_time is used by the erl_check_io implementation. The
+ * global erts_check_io_time variable is declared here since there
+ * (often) exist two versions of erl_check_io (kernel-poll and
+ * non-kernel-poll), and we dont want two versions of this variable.
+ */
+erts_smp_atomic_t erts_check_io_time;
+
/* Written once and only once */
static int filename_encoding = ERL_FILENAME_UNKNOWN;
@@ -52,7 +61,7 @@ static int filename_warning = ERL_FILENAME_WARNING_WARNING;
/* Default unicode on windows and MacOS X */
static int user_filename_encoding = ERL_FILENAME_UTF8;
#else
-static int user_filename_encoding = ERL_FILENAME_LATIN1;
+static int user_filename_encoding = ERL_FILENAME_UNKNOWN;
#endif
/* This controls the heuristic in printing characters in shell and w/
io:format("~tp", ...) etc. */
diff --git a/erts/emulator/sys/common/erl_util_queue.h b/erts/emulator/sys/common/erl_util_queue.h
index 47925e2264..73293e0225 100644
--- a/erts/emulator/sys/common/erl_util_queue.h
+++ b/erts/emulator/sys/common/erl_util_queue.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2013. All Rights Reserved.
+ * Copyright Ericsson AB 2013-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/unix/driver_int.h b/erts/emulator/sys/unix/driver_int.h
index a7ee8087ab..840b832878 100644
--- a/erts/emulator/sys/unix/driver_int.h
+++ b/erts/emulator/sys/unix/driver_int.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2009. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/unix/erl_child_setup.c b/erts/emulator/sys/unix/erl_child_setup.c
index 7c6e4a2f37..6beb316350 100644
--- a/erts/emulator/sys/unix/erl_child_setup.c
+++ b/erts/emulator/sys/unix/erl_child_setup.c
@@ -1,100 +1,252 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
/*
- * After a vfork() (or fork()) the child exec()s to this program which
- * sets up the child and exec()s to the user program (see spawn_start()
- * in sys.c and ticket OTP-4389).
+ * This program is started at erts startup and all fork's that
+ * have to be done are done in here. This is done for a couple
+ * of reasons:
+ * - Allow usage of fork without a memory explosion.
+ * -- we do not want to use vfork, as it blocks the VM
+ * until the execv is done, and if the program that
+ * is to be executed is on an NFS that is unavailable,
+ * the execv can block for a very long time.
+ * -- we cannot do fork inside the VM as that would temporarily
+ * duplicate the memory usage of the VM per parallel exec.
+ *
+ * Some implementation notes:
+ * - A single Unix Domain Socket is setup in between the VM and
+ * this program. Over that UDS the file descriptors that should
+ * be used to talk to the child program are sent.
+ * The actual command to execute, together with options and the
+ * environment, is sent over the pipe represented by the
+ * file descriptors mentioned above. We don't send the
+ * command over the UDS as that would increase the likely hood
+ * that it's buffer would be full.
+ *
+ * - Since it is this program that execv's, it has to take care of
+ * all the SIGCHLD signals that the child programs generate. The
+ * signals are received and the pid+exit reason is sent as data
+ * on the UDS to the VM. The VM is then able to map the pid to the
+ * port of the child program that just exited and deliver the status
+ * code if requested.
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
-#define NEED_CHILD_SETUP_DEFINES
-#include "sys.h"
-#include "erl_misc_utils.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/wait.h>
-#ifdef SIG_SIGSET /* Old SysV */
-void sys_sigrelease(int sig)
+#define WANT_NONBLOCKING
+
+#include "erl_driver.h"
+#include "sys_uds.h"
+#include "hash.h"
+#include "erl_term.h"
+#include "erl_child_setup.h"
+
+#define SET_CLOEXEC(fd) fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC)
+
+#if defined(__ANDROID__)
+#define SHELL "/system/bin/sh"
+#else
+#define SHELL "/bin/sh"
+#endif /* __ANDROID__ */
+
+//#define HARD_DEBUG
+#ifdef HARD_DEBUG
+#define DEBUG_PRINT(fmt, ...) fprintf(stderr, fmt "\r\n", ##__VA_ARGS__)
+#else
+#define DEBUG_PRINT(fmt, ...)
+#endif
+
+#define ABORT(fmt, ...) do { \
+ fprintf(stderr, "erl_child_setup: " fmt "\r\n", ##__VA_ARGS__); \
+ abort(); \
+ } while(0)
+
+#ifdef DEBUG
+void
+erl_assert_error(const char* expr, const char* func, const char* file, int line)
{
- sigrelse(sig);
+ fflush(stdout);
+ fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n",
+ file, line, func, expr);
+ fflush(stderr);
+ abort();
}
-#else /* !SIG_SIGSET */
-#ifdef SIG_SIGNAL /* Old BSD */
-sys_sigrelease(int sig)
+#endif
+
+void sys_sigblock(int sig)
{
- sigsetmask(sigblock(0) & ~sigmask(sig));
+ sigset_t mask;
+
+ sigemptyset(&mask);
+ sigaddset(&mask, sig);
+ sigprocmask(SIG_BLOCK, &mask, (sigset_t *)NULL);
}
-#else /* !SIG_SIGNAL */ /* The True Way - POSIX!:-) */
+
void sys_sigrelease(int sig)
{
sigset_t mask;
-
sigemptyset(&mask);
sigaddset(&mask, sig);
sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL);
}
-#endif /* !SIG_SIGNAL */
-#endif /* !SIG_SIGSET */
-int
-main(int argc, char *argv[])
+static void add_os_pid_to_port_id_mapping(Eterm, pid_t);
+static Eterm get_port_id(pid_t);
+static int forker_hash_init(void);
+
+static int max_files = -1;
+static int sigchld_pipe[2];
+
+static int
+start_new_child(int pipes[])
{
- int i, from, to;
- int erts_spawn_executable = 0;
+ int size, res, i, pos = 0;
+ char *buff, *o_buff;
+
+ char *cmd, *wd, **new_environ, **args = NULL;
+
+ Sint cnt, flags;
+
+ /* only child executes here */
+
+ do {
+ res = read(pipes[0], (char*)&size, sizeof(size));
+ } while(res < 0 && (errno == EINTR || errno == ERRNO_BLOCK));
+
+ if (res <= 0) {
+ goto child_error;
+ }
+
+ buff = malloc(size);
+
+ DEBUG_PRINT("size = %d", size);
+
+ do {
+ if ((res = read(pipes[0], buff + pos, size - pos)) < 0) {
+ if (errno == ERRNO_BLOCK || errno == EINTR)
+ continue;
+ goto child_error;
+ }
+ if (res == 0) {
+ errno = EPIPE;
+ goto child_error;
+ }
+ pos += res;
+ } while(size - pos != 0);
+
+ o_buff = buff;
- /* OBSERVE!
- * Keep child setup after fork() (implemented in sys.c) up to date
- * if changes are made here.
- */
+ flags = get_int32(buff);
+ buff += sizeof(Sint32);
- if (argc != CS_ARGV_NO_OF_ARGS) {
- if (argc < CS_ARGV_NO_OF_ARGS) {
- return 1;
- } else {
- erts_spawn_executable = 1;
- }
+ DEBUG_PRINT("flags = %d", flags);
+
+ cmd = buff;
+ buff += strlen(buff) + 1;
+ if (*buff == '\0') {
+ wd = NULL;
+ } else {
+ wd = buff;
+ buff += strlen(buff) + 1;
+ }
+ buff++;
+
+ DEBUG_PRINT("wd = %s", wd);
+
+ cnt = get_int32(buff);
+ buff += sizeof(Sint32);
+ new_environ = malloc(sizeof(char*)*(cnt + 1));
+
+ DEBUG_PRINT("env_len = %ld", cnt);
+ for (i = 0; i < cnt; i++, buff++) {
+ new_environ[i] = buff;
+ while(*buff != '\0') buff++;
+ }
+ new_environ[cnt] = NULL;
+
+ if (o_buff + size != buff) {
+ /* This is a spawn executable call */
+ cnt = get_int32(buff);
+ buff += sizeof(Sint32);
+ args = malloc(sizeof(char*)*(cnt + 1));
+ for (i = 0; i < cnt; i++, buff++) {
+ args[i] = buff;
+ while(*buff != '\0') buff++;
+ }
+ args[cnt] = NULL;
}
- if (strcmp("false", argv[CS_ARGV_UNBIND_IX]) != 0)
- if (erts_unbind_from_cpu_str(argv[CS_ARGV_UNBIND_IX]) != 0)
- return 1;
+ if (o_buff + size != buff) {
+ errno = EINVAL;
+ goto child_error;
+ }
+
+ DEBUG_PRINT("read ack");
+ do {
+ ErtsSysForkerProto proto;
+ res = read(pipes[0], &proto, sizeof(proto));
+ if (res > 0) {
+ ASSERT(proto.action == ErtsSysForkerProtoAction_Ack);
+ ASSERT(res == sizeof(proto));
+ }
+ } while(res < 0 && (errno == EINTR || errno == ERRNO_BLOCK));
+ if (res < 1) {
+ errno = EPIPE;
+ goto child_error;
+ }
+
+ DEBUG_PRINT("Do that forking business: '%s'\n",cmd);
- for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++) {
- if (argv[CS_ARGV_DUP2_OP_IX(i)][0] == '-'
- && argv[CS_ARGV_DUP2_OP_IX(i)][1] == '\0')
- break;
- if (sscanf(argv[CS_ARGV_DUP2_OP_IX(i)], "%d:%d", &from, &to) != 2)
- return 1;
- if (dup2(from, to) < 0)
- return 1;
+ /* When the dup2'ing below is done, only
+ fd's 0, 1, 2 and maybe 3, 4 should survive the
+ exec. All other fds (i.e. the unix domain sockets
+ and stray pipe ends) should have CLOEXEC set on them
+ so they will be closed when the exec happens */
+ if (flags & FORKER_FLAG_USE_STDIO) {
+ /* stdin for process */
+ if (flags & FORKER_FLAG_DO_WRITE &&
+ dup2(pipes[0], 0) < 0)
+ goto child_error;
+ /* stdout for process */
+ if (flags & FORKER_FLAG_DO_READ &&
+ dup2(pipes[1], 1) < 0)
+ goto child_error;
+ }
+ else { /* XXX will fail if pipes[0] == 4 (unlikely..) */
+ if (flags & FORKER_FLAG_DO_READ && dup2(pipes[1], 4) < 0)
+ goto child_error;
+ if (flags & FORKER_FLAG_DO_WRITE && dup2(pipes[0], 3) < 0)
+ goto child_error;
}
- if (sscanf(argv[CS_ARGV_FD_CR_IX], "%d:%d", &from, &to) != 2)
- return 1;
- for (i = from; i <= to; i++)
- (void) close(i);
+ if (dup2(pipes[2], 2) < 0)
+ goto child_error;
- if (!(argv[CS_ARGV_WD_IX][0] == '.' && argv[CS_ARGV_WD_IX][1] == '\0')
- && chdir(argv[CS_ARGV_WD_IX]) < 0)
- return 1;
+ if (wd && chdir(wd) < 0)
+ goto child_error;
#if defined(USE_SETPGRP_NOARGS) /* SysV */
(void) setpgrp();
@@ -104,19 +256,301 @@ main(int argc, char *argv[])
(void) setsid();
#endif
+ close(pipes[0]);
+ close(pipes[1]);
+ close(pipes[2]);
+
sys_sigrelease(SIGCHLD);
- sys_sigrelease(SIGINT);
- sys_sigrelease(SIGUSR1);
-
- if (erts_spawn_executable) {
- if (argv[CS_ARGV_NO_OF_ARGS + 1] == NULL) {
- execl(argv[CS_ARGV_NO_OF_ARGS],argv[CS_ARGV_NO_OF_ARGS],
- (char *) NULL);
- } else {
- execv(argv[CS_ARGV_NO_OF_ARGS],&(argv[CS_ARGV_NO_OF_ARGS + 1]));
- }
+
+ if (args) {
+ /* spawn_executable */
+ execve(cmd, args, new_environ);
} else {
- execl("/bin/sh", "sh", "-c", argv[CS_ARGV_CMD_IX], (char *) NULL);
+ execle(SHELL, "sh", "-c", cmd, (char *) NULL, new_environ);
}
+child_error:
+ DEBUG_PRINT("exec error: %d\r\n",errno);
+ _exit(128 + errno);
+}
+
+
+/*
+ * [OTP-3906]
+ * Solaris signal management gets confused when threads are used and a
+ * lot of child processes dies. The confusion results in that SIGCHLD
+ * signals aren't delivered to the emulator which in turn results in
+ * a lot of defunct processes in the system.
+ *
+ * The problem seems to appear when a signal is frequently
+ * blocked/unblocked at the same time as the signal is frequently
+ * propagated. The child waiter thread is a workaround for this problem.
+ * The SIGCHLD signal is always blocked (in all threads), and the child
+ * waiter thread fetches the signal by a call to sigwait(). See
+ * child_waiter().
+ *
+ * This should be a non-issue since the fork:ing was moved outside of
+ * the emulator into erl_child_setup. I'm leaving the comment here
+ * for posterity. */
+
+static void handle_sigchld(int sig) {
+ int buff[2], res;
+
+ sys_sigblock(SIGCHLD);
+
+ while ((buff[0] = waitpid((pid_t)(-1), buff+1, WNOHANG)) > 0) {
+ do {
+ res = write(sigchld_pipe[1], buff, sizeof(buff));
+ } while (res < 0 && errno == EINTR);
+ if (res <= 0)
+ ABORT("Failed to write to sigchld_pipe (%d): %d (%d)", sigchld_pipe[1], res, errno);
+ DEBUG_PRINT("Reap child %d (%d)", buff[0], buff[1]);
+ }
+
+ sys_sigrelease(SIGCHLD);
+}
+
+#if defined(__ANDROID__)
+static int system_properties_fd(void)
+{
+ static int fd = -2;
+ char *env;
+
+ if (fd != -2) return fd;
+ env = getenv("ANDROID_PROPERTY_WORKSPACE");
+ if (!env) {
+ fd = -1;
+ return -1;
+ }
+ fd = atoi(env);
+ return fd;
+}
+#endif /* __ANDROID__ */
+
+int
+main(int argc, char *argv[])
+{
+ /* This fd should be open from beam */
+ int uds_fd = 3, max_fd = 3;
+#ifndef HAVE_CLOSEFROM
+ int i;
+#endif
+ struct sigaction sa;
+
+ if (argc < 1 || sscanf(argv[1],"%d",&max_files) != 1) {
+ ABORT("Invalid arguments to child_setup");
+ }
+
+/* We close all fds except the uds from beam.
+ All other fds from now on will have the
+ CLOEXEC flags set on them. This means that we
+ only have to close a very limited number of fds
+ after we fork before the exec. */
+#if defined(HAVE_CLOSEFROM)
+ closefrom(4);
+#else
+ for (i = 4; i < max_files; i++)
+#if defined(__ANDROID__)
+ if (i != system_properties_fd())
+#endif
+ (void) close(i);
+#endif
+
+ if (pipe(sigchld_pipe) < 0) {
+ ABORT("Failed to setup sigchld pipe (%d)", errno);
+ }
+
+ SET_CLOEXEC(sigchld_pipe[0]);
+ SET_CLOEXEC(sigchld_pipe[1]);
+
+ max_fd = max_fd < sigchld_pipe[0] ? sigchld_pipe[0] : max_fd;
+
+ sa.sa_handler = &handle_sigchld;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_RESTART | SA_NOCLDSTOP;
+ if (sigaction(SIGCHLD, &sa, 0) == -1) {
+ perror(0);
+ exit(1);
+ }
+
+ forker_hash_init();
+
+ SET_CLOEXEC(uds_fd);
+
+ DEBUG_PRINT("Starting forker %d", max_files);
+
+ while (1) {
+ fd_set read_fds;
+ int res;
+ FD_ZERO(&read_fds);
+ FD_SET(uds_fd, &read_fds);
+ FD_SET(sigchld_pipe[0], &read_fds);
+ DEBUG_PRINT("child_setup selecting on %d, %d (%d)",
+ uds_fd, sigchld_pipe[0], max_fd);
+ res = select(max_fd+1, &read_fds, NULL, NULL, NULL);
+
+ if (res < 0) {
+ if (errno == EINTR) continue;
+ ABORT("Select failed: %d (%d)",res, errno);
+ }
+
+ if (FD_ISSET(uds_fd, &read_fds)) {
+ int pipes[3], res, os_pid;
+ ErtsSysForkerProto proto;
+ errno = 0;
+ if ((res = sys_uds_read(uds_fd, (char*)&proto, sizeof(proto),
+ pipes, 3, MSG_DONTWAIT)) < 0) {
+ if (errno == EINTR)
+ continue;
+ DEBUG_PRINT("erl_child_setup failed to read from uds: %d, %d", res, errno);
+ _exit(0);
+ }
+
+ if (res == 0) {
+ DEBUG_PRINT("uds was closed!");
+ _exit(0);
+ }
+ /* Since we use unix domain sockets and send the entire data in
+ one go we *should* get the entire payload at once. */
+ ASSERT(res == sizeof(proto));
+ ASSERT(proto.action == ErtsSysForkerProtoAction_Start);
+
+ sys_sigblock(SIGCHLD);
+
+ errno = 0;
+
+ os_pid = fork();
+ if (os_pid == 0)
+ start_new_child(pipes);
+
+ add_os_pid_to_port_id_mapping(proto.u.start.port_id, os_pid);
+
+ /* We write an ack here, but expect the reply on
+ the pipes[0] inside the fork */
+ proto.action = ErtsSysForkerProtoAction_Go;
+ proto.u.go.os_pid = os_pid;
+ proto.u.go.error_number = errno;
+ while (write(pipes[1], &proto, sizeof(proto)) < 0 && errno == EINTR)
+ ; /* remove gcc warning */
+
+#ifdef FORKER_PROTO_START_ACK
+ proto.action = ErtsSysForkerProtoAction_StartAck;
+ while (write(uds_fd, &proto, sizeof(proto)) < 0 && errno == EINTR)
+ ; /* remove gcc warning */
+#endif
+
+ sys_sigrelease(SIGCHLD);
+ close(pipes[0]);
+ close(pipes[1]);
+ close(pipes[2]);
+ }
+
+ if (FD_ISSET(sigchld_pipe[0], &read_fds)) {
+ int ibuff[2];
+ ErtsSysForkerProto proto;
+ res = read(sigchld_pipe[0], ibuff, sizeof(ibuff));
+ if (res <= 0) {
+ if (errno == EINTR)
+ continue;
+ ABORT("Failed to read from sigchld pipe: %d (%d)", res, errno);
+ }
+
+ proto.u.sigchld.port_id = get_port_id((pid_t)(ibuff[0]));
+
+ if (proto.u.sigchld.port_id == THE_NON_VALUE)
+ continue; /* exit status report not requested */
+
+ proto.action = ErtsSysForkerProtoAction_SigChld;
+ proto.u.sigchld.error_number = ibuff[1];
+ DEBUG_PRINT("send %s to %d", buff, uds_fd);
+ if (write(uds_fd, &proto, sizeof(proto)) < 0) {
+ if (errno == EINTR)
+ continue;
+ /* The uds was close, which most likely means that the VM
+ has exited. This will be detected when we try to read
+ from the uds_fd. */
+ DEBUG_PRINT("Failed to write to uds: %d (%d)", uds_fd, errno);
+ }
+ }
+ }
+ return 1;
+}
+
+typedef struct exit_status {
+ HashBucket hb;
+ pid_t os_pid;
+ Eterm port_id;
+} ErtsSysExitStatus;
+
+static Hash *forker_hash;
+
+static void add_os_pid_to_port_id_mapping(Eterm port_id, pid_t os_pid)
+{
+ if (port_id != THE_NON_VALUE) {
+ /* exit status report requested */
+ ErtsSysExitStatus es;
+ es.os_pid = os_pid;
+ es.port_id = port_id;
+ hash_put(forker_hash, &es);
+ }
+}
+
+static Eterm get_port_id(pid_t os_pid)
+{
+ ErtsSysExitStatus est, *es;
+ Eterm port_id;
+ est.os_pid = os_pid;
+ es = hash_remove(forker_hash, &est);
+ if (!es) return THE_NON_VALUE;
+ port_id = es->port_id;
+ free(es);
+ return port_id;
+}
+
+static int fcmp(void *a, void *b)
+{
+ ErtsSysExitStatus *sa = a;
+ ErtsSysExitStatus *sb = b;
+ return !(sa->os_pid == sb->os_pid);
+}
+
+static HashValue fhash(void *e)
+{
+ ErtsSysExitStatus *se = e;
+ Uint32 val = se->os_pid;
+ val = (val+0x7ed55d16) + (val<<12);
+ val = (val^0xc761c23c) ^ (val>>19);
+ val = (val+0x165667b1) + (val<<5);
+ val = (val+0xd3a2646c) ^ (val<<9);
+ val = (val+0xfd7046c5) + (val<<3);
+ val = (val^0xb55a4f09) ^ (val>>16);
+ return val;
+}
+
+static void *falloc(void *e)
+{
+ ErtsSysExitStatus *se = e;
+ ErtsSysExitStatus *ne = malloc(sizeof(ErtsSysExitStatus));
+ ne->os_pid = se->os_pid;
+ ne->port_id = se->port_id;
+ return ne;
+}
+
+static void *meta_alloc(int type, size_t size) { return malloc(size); }
+static void meta_free(int type, void *p) { free(p); }
+
+static int forker_hash_init(void)
+{
+ HashFunctions forker_hash_functions;
+ forker_hash_functions.hash = fhash;
+ forker_hash_functions.cmp = fcmp;
+ forker_hash_functions.alloc = falloc;
+ forker_hash_functions.free = free;
+ forker_hash_functions.meta_alloc = meta_alloc;
+ forker_hash_functions.meta_free = meta_free;
+ forker_hash_functions.meta_print = NULL;
+
+ forker_hash = hash_new(0, "forker_hash",
+ 16, forker_hash_functions);
+
return 1;
}
diff --git a/erts/emulator/sys/unix/erl_child_setup.h b/erts/emulator/sys/unix/erl_child_setup.h
new file mode 100644
index 0000000000..a28b136bfc
--- /dev/null
+++ b/erts/emulator/sys/unix/erl_child_setup.h
@@ -0,0 +1,77 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2015-2015. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ *
+ * This file defines the interface inbetween erts and child_setup.
+ */
+
+#ifndef _ERL_UNIX_FORKER_H
+#define _ERL_UNIX_FORKER_H
+
+#include "sys.h"
+
+#ifdef __FreeBSD__
+/* The freebsd sendmsg man page explicitly states that
+ you should not close fds before they are known
+ to have reached the other side, so this Ack protects
+ against that. */
+#define FORKER_PROTO_START_ACK 1
+#endif
+
+#define FORKER_ARGV_NO_OF_ARGS 3
+#define FORKER_ARGV_PROGNAME_IX 0 /* Program name */
+#define FORKER_ARGV_MAX_FILES 1 /* max_files */
+
+#define FORKER_FLAG_USE_STDIO (1 << 0) /* dup the pipe to stdin/stderr */
+#define FORKER_FLAG_EXIT_STATUS (1 << 1) /* send the exit status to parent */
+#define FORKER_FLAG_DO_READ (1 << 2) /* dup write fd */
+#define FORKER_FLAG_DO_WRITE (1 << 3) /* dup read fd */
+
+#if SIZEOF_VOID_P == SIZEOF_LONG
+typedef unsigned long ErtsSysPortId;
+#elif SIZEOF_VOID_P == SIZEOF_INT
+typedef unsigned int ErtsSysPortId;
+#elif SIZEOF_VOID_P == SIZEOF_LONG_LONG
+typedef unsigned long long ErtsSysPortId;
+#endif
+
+typedef struct ErtsSysForkerProto_ {
+ enum {
+ ErtsSysForkerProtoAction_Start,
+ ErtsSysForkerProtoAction_StartAck,
+ ErtsSysForkerProtoAction_Go,
+ ErtsSysForkerProtoAction_SigChld,
+ ErtsSysForkerProtoAction_Ack
+ } action;
+ union {
+ struct {
+ ErtsSysPortId port_id;
+ int fds[3];
+ } start;
+ struct {
+ pid_t os_pid;
+ int error_number;
+ } go;
+ struct {
+ ErtsSysPortId port_id;
+ int error_number;
+ } sigchld;
+ } u;
+} ErtsSysForkerProto;
+
+#endif /* #ifndef _ERL_UNIX_FORKER_H */
diff --git a/erts/emulator/sys/unix/erl_main.c b/erts/emulator/sys/unix/erl_main.c
index b26f93f77e..972b93a505 100644
--- a/erts/emulator/sys/unix/erl_main.c
+++ b/erts/emulator/sys/unix/erl_main.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2000-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2000-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index c8fcec8547..3a0d23cd36 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2011. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*
@@ -29,9 +30,7 @@
#include <limits.h>
#include <stdlib.h>
#include <string.h>
-#ifndef QNX
#include <memory.h>
-#endif
#if defined(__sun__) && defined(__SVR4) && !defined(__EXTENSIONS__)
# define __EXTENSIONS__
@@ -45,7 +44,7 @@
#include <fcntl.h>
#include "erl_errno.h"
#include <signal.h>
-
+#include <setjmp.h>
#if HAVE_SYS_SOCKETIO_H
# include <sys/socketio.h>
@@ -91,11 +90,6 @@
#include <ieeefp.h>
#endif
-#ifdef QNX
-#include <process.h>
-#include <sys/qnx_glob.h>
-#endif
-
#include <pwd.h>
#ifndef HZ
@@ -107,14 +101,17 @@
#endif
#include <netdb.h>
+#ifdef HAVE_MACH_ABSOLUTE_TIME
+#include <mach/mach_time.h>
+#endif
+
+#ifdef HAVE_POSIX_MEMALIGN
+# define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1
+#endif
+
/*
* Make sure that MAXPATHLEN is defined.
*/
-#ifdef GETHRTIME_WITH_CLOCK_GETTIME
-#undef HAVE_GETHRTIME
-#define HAVE_GETHRTIME 1
-#endif
-
#ifndef MAXPATHLEN
# ifdef PATH_MAX
# define MAXPATHLEN PATH_MAX
@@ -123,24 +120,19 @@
# endif
#endif
+/*
+ * Min number of async threads
+ */
+#define ERTS_MIN_NO_OF_ASYNC_THREADS 0
+
/* File descriptors are numbers anc consecutively allocated on Unix */
#define ERTS_SYS_CONTINOUS_FD_NUMBERS
-#define HAVE_ERTS_CHECK_IO_DEBUG
-int erts_check_io_debug(void);
-
#ifndef ERTS_SMP
# undef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
# define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
#endif
-#ifndef ENABLE_CHILD_WAITER_THREAD
-# ifdef ERTS_SMP
-# define ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
-void erts_check_children(void);
-# endif
-#endif
-
typedef void *GETENV_STATE;
/*
@@ -154,35 +146,155 @@ typedef struct timeval SysTimeval;
typedef struct tms SysTimes;
-extern int erts_ticks_per_sec;
-
-#define SYS_CLK_TCK (erts_ticks_per_sec)
+#define SYS_CLK_TCK (erts_sys_time_data__.r.o.ticks_per_sec)
#define sys_times(Arg) times(Arg)
-#define ERTS_WRAP_SYS_TIMES 1
-extern int erts_ticks_per_sec_wrap;
-#define SYS_CLK_TCK_WRAP (erts_ticks_per_sec_wrap)
-extern clock_t sys_times_wrap(void);
+#if SIZEOF_LONG == 8
+typedef long ErtsMonotonicTime;
+typedef long ErtsSysHrTime;
+#elif SIZEOF_LONG_LONG == 8
+typedef long long ErtsMonotonicTime;
+typedef long long ErtsSysHrTime;
+#else
+#error No signed 64-bit type found...
+#endif
-#ifdef HAVE_GETHRTIME
-#ifdef GETHRTIME_WITH_CLOCK_GETTIME
-typedef long long SysHrTime;
+typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsSysHrTime ErtsSysPerfCounter;
-extern SysHrTime sys_gethrtime(void);
-#define sys_init_hrtime() /* Nothing */
+#define ERTS_MONOTONIC_TIME_MIN ((ErtsMonotonicTime) (1ULL << 63))
+#define ERTS_MONOTONIC_TIME_MAX (~ERTS_MONOTONIC_TIME_MIN)
-#else /* Real gethrtime (Solaris) */
+/*
+ * OS monotonic time and OS system time
+ */
+#undef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
-typedef hrtime_t SysHrTime;
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) \
+ && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+# if defined(__linux__)
+# define ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__ 1
+# endif
+#endif
-#define sys_gethrtime() gethrtime()
-#define sys_init_hrtime() /* Nothing */
+ErtsSystemTime erts_os_system_time(void);
-#endif /* GETHRTIME_WITH_CLOCK_GETTIME */
-#endif /* HAVE_GETHRTIME */
+#undef ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT
+#undef ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT
+#undef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
-#if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME))
+#if defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+# define ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT 1
+# define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT (1000*1000*1000)
+# if defined(__linux__)
+# define ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__ 1
+# endif
+#elif defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME)
+# define ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT 1
+# define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT (1000*1000*1000)
+#elif defined(OS_MONOTONIC_TIME_USING_GETHRTIME)
+# define ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT 1
+# define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT (1000*1000*1000)
+#elif defined(OS_MONOTONIC_TIME_USING_TIMES)
+# define ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT 1
+/* Time unit determined at runtime... */
+# define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT 0
+#else /* No OS monotonic available... */
+# define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT (1000*1000)
+#endif
+
+/*
+ * erts_sys_hrtime() is the highest resolution
+ * time function found. Time unit is nano-seconds.
+ * It may or may not be monotonic.
+ */
+ErtsSysHrTime erts_sys_hrtime(void);
+#define ERTS_HRTIME_UNIT (1000*1000*1000)
+
+struct erts_sys_time_read_only_data__ {
+#ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
+ ErtsMonotonicTime (*os_monotonic_time)(void);
+#endif
+#ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
+ void (*os_times)(ErtsMonotonicTime *, ErtsSystemTime *);
+#endif
+ ErtsSysPerfCounter (*perf_counter)(void);
+ ErtsSysPerfCounter perf_counter_unit;
+ int ticks_per_sec;
+};
+
+typedef struct {
+ union {
+ struct erts_sys_time_read_only_data__ o;
+ char align__[(((sizeof(struct erts_sys_time_read_only_data__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } r;
+} ErtsSysTimeData__;
+
+extern ErtsSysTimeData__ erts_sys_time_data__;
+
+#ifdef ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT
+
+#ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
+ERTS_GLB_INLINE
+#endif
+ErtsMonotonicTime erts_os_monotonic_time(void);
+
+#ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
+ERTS_GLB_INLINE
+#endif
+void erts_os_times(ErtsMonotonicTime *, ErtsSystemTime *);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__
+
+ERTS_GLB_INLINE ErtsMonotonicTime
+erts_os_monotonic_time(void)
+{
+ return (*erts_sys_time_data__.r.o.os_monotonic_time)();
+}
+
+#endif /* ERTS_OS_MONOTONIC_INLINE_FUNC_PTR_CALL__ */
+
+#ifdef ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__
+
+ERTS_GLB_INLINE void
+erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ return (*erts_sys_time_data__.r.o.os_times)(mtimep, stimep);
+}
+
+#endif /* ERTS_OS_TIMES_INLINE_FUNC_PTR_CALL__ */
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+#endif /* ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT */
+
+/*
+ * Functions for getting the performance counter
+ */
+
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
+#define erts_sys_perf_counter_unit() erts_sys_time_data__.r.o.perf_counter_unit
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter()
+{
+ return (*erts_sys_time_data__.r.o.perf_counter)();
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+/*
+ * Functions for measuring CPU time
+ */
+
+#if (defined(HAVE_GETHRVTIME) || defined(HAVE_CLOCK_GETTIME_CPU_TIME))
typedef long long SysCpuTime;
typedef struct timespec SysTimespec;
@@ -194,7 +306,7 @@ typedef struct timespec SysTimespec;
int sys_start_hrvtime(void);
int sys_stop_hrvtime(void);
-#elif defined(HAVE_CLOCK_GETTIME)
+#elif defined(HAVE_CLOCK_GETTIME_CPU_TIME)
#define sys_clock_gettime(cid,tp) clock_gettime((cid),&(tp))
#define sys_get_proc_cputime(t,tp) sys_clock_gettime(CLOCK_PROCESS_CPUTIME_ID,(tp))
@@ -205,26 +317,39 @@ int sys_stop_hrvtime(void);
#define SYS_CLOCK_RESOLUTION 1
/* These are defined in sys.c */
-#if defined(SIG_SIGSET) /* Old SysV */
-RETSIGTYPE (*sys_sigset())();
-#elif defined(SIG_SIGNAL) /* Old BSD */
-RETSIGTYPE (*sys_sigset())();
-#else
-RETSIGTYPE (*sys_sigset(int, RETSIGTYPE (*func)(int)))(int);
-#endif
+typedef void (*SIGFUNC)(int);
+extern SIGFUNC sys_signal(int, SIGFUNC);
extern void sys_sigrelease(int);
extern void sys_sigblock(int);
-extern void sys_stop_cat(void);
+extern void sys_init_suspend_handler(void);
/*
* Handling of floating point exceptions.
*/
#ifdef USE_ISINF_ISNAN /* simulate finite() */
-# define finite(f) (!isinf(f) && !isnan(f))
-# define HAVE_FINITE
+# define isfinite(f) (!isinf(f) && !isnan(f))
+# define HAVE_ISFINITE
+#elif (defined(__GNUC__) && !defined(__llvm__)) && defined(HAVE_FINITE)
+/* We use finite in gcc as it emits assembler instead of
+ the function call that isfinite emits. The assembler is
+ significantly faster. */
+# ifdef isfinite
+# undef isfinite
+# endif
+# define isfinite finite
+# ifndef HAVE_ISFINITE
+# define HAVE_ISFINITE
+# endif
+#elif defined(isfinite) && !defined(HAVE_ISFINITE)
+# define HAVE_ISFINITE
+#elif !defined(HAVE_ISFINITE) && defined(HAVE_FINITE)
+# define isfinite finite
+# define HAVE_ISFINITE
#endif
+#define erts_isfinite isfinite
+
#ifdef NO_FPE_SIGNALS
#define erts_get_current_fp_exception() NULL
@@ -232,7 +357,7 @@ extern void sys_stop_cat(void);
#define erts_thread_init_fp_exception() do{}while(0)
#endif
# define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0)
-# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!finite(f)) { Action; } else {}
+# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!isfinite(f)) { Action; } else {}
# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action)
# define __ERTS_SAVE_FP_EXCEPTION(fpexnp)
# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp)
@@ -296,7 +421,7 @@ static __inline__ void __ERTS_FP_CHECK_INIT(volatile unsigned long *fp_exception
code to always throw floating-point exceptions on errors. */
static __inline__ int erts_check_fpe_thorough(volatile unsigned long *fp_exception, double f)
{
- return erts_check_fpe(fp_exception, f) || !finite(f);
+ return erts_check_fpe(fp_exception, f) || !isfinite(f);
}
# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) \
do { if (erts_check_fpe_thorough((fpexnp),(f))) { Action; } } while (0)
@@ -311,19 +436,6 @@ void erts_sys_unblock_fpe(int);
#define ERTS_FP_ERROR_THOROUGH(p, f, A) __ERTS_FP_ERROR_THOROUGH(&(p)->fp_exception, f, A)
-#ifdef NEED_CHILD_SETUP_DEFINES
-/* The child setup argv[] */
-#define CS_ARGV_PROGNAME_IX 0 /* Program name */
-#define CS_ARGV_UNBIND_IX 1 /* Unbind from cpu */
-#define CS_ARGV_WD_IX 2 /* Working directory */
-#define CS_ARGV_CMD_IX 3 /* Command */
-#define CS_ARGV_FD_CR_IX 4 /* Fd close range */
-#define CS_ARGV_DUP2_OP_IX(N) ((N) + 5) /* dup2 operations */
-
-#define CS_ARGV_NO_OF_DUP2_OPS 3 /* Number of dup2 ops */
-#define CS_ARGV_NO_OF_ARGS 8 /* Number of arguments */
-#endif /* #ifdef NEED_CHILD_SETUP_DEFINES */
-
/* Threads */
#ifdef USE_THREADS
extern int init_async(int);
@@ -332,4 +444,28 @@ extern int exit_async(void);
#define ERTS_EXIT_AFTER_DUMP _exit
+#if !defined(__APPLE__) && !defined(__MACH__)
+/* Some OS X versions do not allow (ab)using signal handlers like this */
+#define ERTS_HAVE_TRY_CATCH 1
+
+/* We try to simulate a try catch in C with the help of signal handlers.
+ * Only use this as a very last resort, as it is not very portable and
+ * quite unstable. It is also not thread safe, so make sure that only
+ * one thread can call this at a time!
+ */
+extern void erts_sys_sigsegv_handler(int);
+extern jmp_buf erts_sys_sigsegv_jmp;
+#define ERTS_SYS_TRY_CATCH(EXPR,CATCH) \
+ do { \
+ SIGFUNC prev_handler = sys_signal(SIGSEGV, \
+ erts_sys_sigsegv_handler); \
+ if (!setjmp(erts_sys_sigsegv_jmp)) { \
+ EXPR; \
+ } else { \
+ CATCH; \
+ } \
+ sys_signal(SIGSEGV,prev_handler); \
+ } while(0)
+#endif
+
#endif /* #ifndef _ERL_UNIX_SYS_H */
diff --git a/erts/emulator/sys/unix/erl_unix_sys_ddll.c b/erts/emulator/sys/unix/erl_unix_sys_ddll.c
index 12c47d0088..8f1ceac883 100644
--- a/erts/emulator/sys/unix/erl_unix_sys_ddll.c
+++ b/erts/emulator/sys/unix/erl_unix_sys_ddll.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -101,7 +102,7 @@ void erl_sys_ddll_init(void) {
/*
* Open a shared object
*/
-int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* err)
+int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err)
{
#if defined(HAVE_DLOPEN)
char* dlname;
@@ -123,6 +124,7 @@ int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError*
int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err)
{
+#if defined(HAVE_DLOPEN)
int ret = ERL_DE_NO_ERROR;
char *str;
dlerror();
@@ -148,6 +150,9 @@ int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err)
ret = ERL_DE_DYNAMIC_ERROR_OFFSET - find_errcode(str, err);
}
return ret;
+#else
+ return ERL_DE_ERROR_NO_DDLL_FUNCTIONALITY;
+#endif
}
/*
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index a7ea4b2490..6fb86f6dda 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1996-2013. All Rights Reserved.
+ * Copyright Ericsson AB 1996-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -34,6 +35,7 @@
#include <termios.h>
#include <ctype.h>
#include <sys/utsname.h>
+#include <sys/select.h>
#ifdef ISC32
#include <sys/bsdtypes.h>
@@ -47,7 +49,6 @@
#include <sys/ioctl.h>
#endif
-#define NEED_CHILD_SETUP_DEFINES
#define ERTS_WANT_BREAK_HANDLING
#define ERTS_WANT_GOT_SIGUSR1
#define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */
@@ -65,7 +66,7 @@
#include "erl_mseg.h"
extern char **environ;
-static erts_smp_rwmtx_t environ_rwmtx;
+erts_smp_rwmtx_t environ_rwmtx;
#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O
* vector sock_sendv().
@@ -74,68 +75,12 @@ static erts_smp_rwmtx_t environ_rwmtx;
* Don't need global.h, but bif_table.h (included by bif.h),
* won't compile otherwise
*/
-#include "global.h"
+#include "global.h"
#include "bif.h"
-#include "erl_sys_driver.h"
#include "erl_check_io.h"
#include "erl_cpu_topology.h"
-#ifndef DISABLE_VFORK
-#define DISABLE_VFORK 0
-#endif
-
-#ifdef USE_THREADS
-# ifdef ENABLE_CHILD_WAITER_THREAD
-# define CHLDWTHR ENABLE_CHILD_WAITER_THREAD
-# else
-# define CHLDWTHR 0
-# endif
-#else
-# define CHLDWTHR 0
-#endif
-/*
- * [OTP-3906]
- * Solaris signal management gets confused when threads are used and a
- * lot of child processes dies. The confusion results in that SIGCHLD
- * signals aren't delivered to the emulator which in turn results in
- * a lot of defunct processes in the system.
- *
- * The problem seems to appear when a signal is frequently
- * blocked/unblocked at the same time as the signal is frequently
- * propagated. The child waiter thread is a workaround for this problem.
- * The SIGCHLD signal is always blocked (in all threads), and the child
- * waiter thread fetches the signal by a call to sigwait(). See
- * child_waiter().
- */
-
-typedef struct ErtsSysReportExit_ ErtsSysReportExit;
-struct ErtsSysReportExit_ {
- ErtsSysReportExit *next;
- Eterm port;
- int pid;
- int ifd;
- int ofd;
-#if CHLDWTHR && !defined(ERTS_SMP)
- int status;
-#endif
-};
-
-/* This data is shared by these drivers - initialized by spawn_init() */
-static struct driver_data {
- ErlDrvPort port_num;
- int ofd, packet_bytes;
- ErtsSysReportExit *report_exit;
- int pid;
- int alive;
- int status;
-} *driver_data; /* indexed by fd */
-
-static ErtsSysReportExit *report_exit_list;
-#if CHLDWTHR && !defined(ERTS_SMP)
-static ErtsSysReportExit *report_exit_transit_list;
-#endif
-
extern int driver_interrupt(int, int);
extern void do_break(void);
@@ -147,26 +92,6 @@ extern void erts_sys_init_float(void);
extern void erl_crash_dump(char* file, int line, char* fmt, ...);
-#define DIR_SEPARATOR_CHAR '/'
-
-#if defined(DEBUG)
-#define ERL_BUILD_TYPE_MARKER ".debug"
-#elif defined(PURIFY)
-#define ERL_BUILD_TYPE_MARKER ".purify"
-#elif defined(QUANTIFY)
-#define ERL_BUILD_TYPE_MARKER ".quantify"
-#elif defined(PURECOV)
-#define ERL_BUILD_TYPE_MARKER ".purecov"
-#elif defined(VALGRIND)
-#define ERL_BUILD_TYPE_MARKER ".valgrind"
-#else /* opt */
-#define ERL_BUILD_TYPE_MARKER
-#endif
-
-#define CHILD_SETUP_PROG_NAME "child_setup" ERL_BUILD_TYPE_MARKER
-#if !DISABLE_VFORK
-static char *child_setup_prog;
-#endif
#ifdef DEBUG
static int debug_log = 0;
@@ -190,54 +115,22 @@ static volatile int have_prepared_crash_dump;
(have_prepared_crash_dump++)
#endif
-static erts_smp_atomic_t sys_misc_mem_sz;
+erts_smp_atomic_t sys_misc_mem_sz;
#if defined(ERTS_SMP)
static void smp_sig_notify(char c);
static int sig_notify_fds[2] = {-1, -1};
-#elif defined(USE_THREADS)
-static int async_fd[2];
-#endif
-#if CHLDWTHR || defined(ERTS_SMP)
-erts_mtx_t chld_stat_mtx;
-#endif
-#if CHLDWTHR
-static erts_tid_t child_waiter_tid;
-/* chld_stat_mtx is used to protect against concurrent accesses
- of the driver_data fields pid, alive, and status. */
-erts_cnd_t chld_stat_cnd;
-static long children_alive;
-#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx)
-#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx)
-#define CHLD_STAT_WAIT erts_cnd_wait(&chld_stat_cnd, &chld_stat_mtx)
-#define CHLD_STAT_SIGNAL erts_cnd_signal(&chld_stat_cnd)
-#elif defined(ERTS_SMP) /* ------------------------------------------------- */
-#define CHLD_STAT_LOCK erts_mtx_lock(&chld_stat_mtx)
-#define CHLD_STAT_UNLOCK erts_mtx_unlock(&chld_stat_mtx)
-
-#else /* ------------------------------------------------------------------- */
-#define CHLD_STAT_LOCK
-#define CHLD_STAT_UNLOCK
-static volatile int children_died;
+#if !defined(ETHR_UNUSABLE_SIGUSRX) && defined(ERTS_THR_HAVE_SIG_FUNCS)
+static int sig_suspend_fds[2] = {-1, -1};
+#define ERTS_SYS_SUSPEND_SIGNAL SIGUSR2
#endif
+#endif
-static struct fd_data {
- char pbuf[4]; /* hold partial packet bytes */
- int psz; /* size of pbuf */
- char *buf;
- char *cpos;
- int sz;
- int remain; /* for input on fd */
-} *fd_data; /* indexed by fd */
-
-/* static FUNCTION(int, write_fill, (int, char*, int)); unused? */
-static void note_child_death(int, int);
+jmp_buf erts_sys_sigsegv_jmp;
-#if CHLDWTHR
-static void* child_waiter(void *);
-#endif
+static int crashdump_companion_cube_fd = -1;
/********************* General functions ****************************/
@@ -273,11 +166,11 @@ struct {
int (*event)(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
void (*check_io_as_interrupt)(void);
void (*check_io_interrupt)(int);
- void (*check_io_interrupt_tmd)(int, erts_short_time_t);
+ void (*check_io_interrupt_tmd)(int, ErtsMonotonicTime);
void (*check_io)(int);
Uint (*size)(void);
Eterm (*info)(void *);
- int (*check_io_debug)(void);
+ int (*check_io_debug)(ErtsCheckIoDebugInfo *);
} io_func = {0};
@@ -299,9 +192,9 @@ Eterm erts_check_io_info(void *p)
}
int
-erts_check_io_debug(void)
+erts_check_io_debug(ErtsCheckIoDebugInfo *ip)
{
- return (*io_func.check_io_debug)();
+ return (*io_func.check_io_debug)(ip);
}
@@ -379,9 +272,9 @@ erts_sys_schedule_interrupt(int set)
#ifdef ERTS_SMP
void
-erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec)
+erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time)
{
- ERTS_CHK_IO_INTR_TMD(set, msec);
+ ERTS_CHK_IO_INTR_TMD(set, timeout_time);
}
#endif
@@ -409,15 +302,18 @@ void sys_tty_reset(int exit_code)
#ifdef __tile__
/* Direct malloc to spread memory around the caches of multiple tiles. */
#include <malloc.h>
+#if defined(MALLOC_USE_HASH)
MALLOC_USE_HASH(1);
#endif
+#endif
#ifdef USE_THREADS
#ifdef ERTS_THR_HAVE_SIG_FUNCS
+
/*
* Child thread inherits parents signal mask at creation. In order to
- * guarantee that the main thread will receive all SIGINT, SIGCHLD, and
+ * guarantee that the main thread will receive all SIGINT, and
* SIGUSR1 signals sent to the process, we block these signals in the
* parent thread when creating a new thread.
*/
@@ -495,11 +391,14 @@ thr_create_prepare_child(void *vtcdp)
void
erts_sys_pre_init(void)
{
+#ifdef USE_THREADS
+ erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
+#endif
+
erts_printf_add_cr_to_stdout = 1;
erts_printf_add_cr_to_stderr = 1;
+
#ifdef USE_THREADS
- {
- erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
eid.thread_create_child_func = thr_create_prepare_child;
/* Before creation in parent */
@@ -510,29 +409,21 @@ erts_sys_pre_init(void)
#ifdef ERTS_THR_HAVE_SIG_FUNCS
sigemptyset(&thr_create_sigmask);
sigaddset(&thr_create_sigmask, SIGINT); /* block interrupt */
- sigaddset(&thr_create_sigmask, SIGCHLD); /* block child signals */
sigaddset(&thr_create_sigmask, SIGUSR1); /* block user defined signal */
#endif
erts_thr_init(&eid);
- report_exit_list = NULL;
-
#ifdef ERTS_ENABLE_LOCK_COUNT
erts_lcnt_init();
#endif
-#if CHLDWTHR || defined(ERTS_SMP)
- erts_mtx_init(&chld_stat_mtx, "child_status");
-#endif
-#if CHLDWTHR
-#ifndef ERTS_SMP
- report_exit_transit_list = NULL;
-#endif
- erts_cnd_init(&chld_stat_cnd);
- children_alive = 0;
-#endif
- }
+#endif /* USE_THREADS */
+
+ erts_init_sys_time_sup();
+
+#ifdef USE_THREADS
+
#ifdef ERTS_SMP
erts_smp_atomic32_init_nob(&erts_break_requested, 0);
erts_smp_atomic32_init_nob(&erts_got_sigusr1, 0);
@@ -542,49 +433,42 @@ erts_sys_pre_init(void)
erts_got_sigusr1 = 0;
have_prepared_crash_dump = 0;
#endif
-#if !CHLDWTHR && !defined(ERTS_SMP)
- children_died = 0;
-#endif
+
#endif /* USE_THREADS */
+
erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0);
+
+ {
+ /*
+ * Unfortunately we depend on fd 0,1,2 in the old shell code.
+ * So if for some reason we do not have those open when we start
+ * we have to open them here. Not doing this can cause the emulator
+ * to deadlock when reaping the fd_driver ports :(
+ */
+ int fd;
+ /* Make sure fd 0 is open */
+ if ((fd = open("/dev/null", O_RDONLY)) != 0)
+ close(fd);
+ /* Make sure fds 1 and 2 are open */
+ while (fd < 3) {
+ fd = open("/dev/null", O_WRONLY);
+ }
+ close(fd);
+ }
+
+ /* We need a file descriptor to close in the crashdump creation.
+ * We close this one to be sure we can get a fd for our real file ...
+ * so, we create one here ... a stone to carry all the way home.
+ */
+
+ crashdump_companion_cube_fd = open("/dev/null", O_RDONLY);
+
+ /* don't lose it, there will be cake */
}
void
erl_sys_init(void)
{
-#if !DISABLE_VFORK
- {
- int res;
- char bindir[MAXPATHLEN];
- size_t bindirsz = sizeof(bindir);
- Uint csp_path_sz;
-
- res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz);
- if (res != 0) {
- if (res < 0)
- erl_exit(-1,
- "Environment variable BINDIR is not set\n");
- if (res > 0)
- erl_exit(-1,
- "Value of environment variable BINDIR is too large\n");
- }
- if (bindir[0] != DIR_SEPARATOR_CHAR)
- erl_exit(-1,
- "Environment variable BINDIR does not contain an"
- " absolute path\n");
- csp_path_sz = (strlen(bindir)
- + 1 /* DIR_SEPARATOR_CHAR */
- + sizeof(CHILD_SETUP_PROG_NAME)
- + 1);
- child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz);
- erts_smp_atomic_add_nob(&sys_misc_mem_sz, csp_path_sz);
- erts_snprintf(child_setup_prog, csp_path_sz,
- "%s%c%s",
- bindir,
- DIR_SEPARATOR_CHAR,
- CHILD_SETUP_PROG_NAME);
- }
-#endif
#ifdef USE_SETLINEBUF
setlinebuf(stdout);
@@ -604,39 +488,7 @@ erl_sys_init(void)
/* signal handling */
-#ifdef SIG_SIGSET /* Old SysV */
-RETSIGTYPE (*sys_sigset(sig, func))()
-int sig;
-RETSIGTYPE (*func)();
-{
- return(sigset(sig, func));
-}
-void sys_sigblock(int sig)
-{
- sighold(sig);
-}
-void sys_sigrelease(int sig)
-{
- sigrelse(sig);
-}
-#else /* !SIG_SIGSET */
-#ifdef SIG_SIGNAL /* Old BSD */
-RETSIGTYPE (*sys_sigset(sig, func))(int, int)
-int sig;
-RETSIGTYPE (*func)();
-{
- return(signal(sig, func));
-}
-sys_sigblock(int sig)
-{
- sigblock(sig);
-}
-sys_sigrelease(int sig)
-{
- sigsetmask(sigblock(0) & ~sigmask(sig));
-}
-#else /* !SIG_SIGNAL */ /* The True Way - POSIX!:-) */
-RETSIGTYPE (*sys_sigset(int sig, RETSIGTYPE (*func)(int)))(int)
+SIGFUNC sys_signal(int sig, SIGFUNC func)
{
struct sigaction act, oact;
@@ -669,36 +521,47 @@ void sys_sigrelease(int sig)
sigaddset(&mask, sig);
sigprocmask(SIG_UNBLOCK, &mask, (sigset_t *)NULL);
}
-#endif /* !SIG_SIGNAL */
-#endif /* !SIG_SIGSET */
-#if (0) /* not used? -- gordon */
-static void (*break_func)();
-static RETSIGTYPE break_handler(int sig)
-{
-#ifdef QNX
- /* Turn off SIGCHLD during break processing */
- sys_sigblock(SIGCHLD);
-#endif
- (*break_func)();
-#ifdef QNX
- sys_sigrelease(SIGCHLD);
-#endif
+void erts_sys_sigsegv_handler(int signo) {
+ if (signo == SIGSEGV) {
+ longjmp(erts_sys_sigsegv_jmp, 1);
+ }
+}
+
+/*
+ * Function returns 1 if we can read from all values in between
+ * start and stop.
+ */
+int
+erts_sys_is_area_readable(char *start, char *stop) {
+ int fds[2];
+ if (!pipe(fds)) {
+ /* We let write try to figure out if the pointers are readable */
+ int res = write(fds[1], start, (char*)stop - (char*)start);
+ if (res == -1) {
+ close(fds[0]);
+ close(fds[1]);
+ return 0;
+ }
+ close(fds[0]);
+ close(fds[1]);
+ return 1;
+ }
+ return 0;
+
}
-#endif /* 0 */
static ERTS_INLINE int
prepare_crash_dump(int secs)
{
#define NUFBUF (3)
- int i, max;
+ int i;
char env[21]; /* enough to hold any 64-bit integer */
size_t envsz;
DeclareTmpHeapNoproc(heap,NUFBUF);
Port *heart_port;
Eterm *hp = heap;
Eterm list = NIL;
- int heart_fd[2] = {-1,-1};
int has_heart = 0;
UseTmpHeapNoproc(NUFBUF);
@@ -721,43 +584,22 @@ prepare_crash_dump(int secs)
alarm((unsigned int)secs);
}
- if (heart_port) {
- /* hearts input fd
- * We "know" drv_data is the in_fd since the port is started with read|write
- */
- heart_fd[0] = (int)heart_port->drv_data;
- heart_fd[1] = (int)driver_data[heart_fd[0]].ofd;
- has_heart = 1;
+ /* close all viable sockets via emergency close callbacks.
+ * Specifically we want to close epmd sockets.
+ */
- list = CONS(hp, make_small(8), list); hp += 2;
+ erts_emergency_close_ports();
+ if (heart_port) {
+ has_heart = 1;
+ list = CONS(hp, make_small(8), list); hp += 2;
/* send to heart port, CMD = 8, i.e. prepare crash dump =o */
erts_port_output(NULL, ERTS_PORT_SIG_FLG_FORCE_IMM_CALL, heart_port,
heart_port->common.id, list, NULL);
}
- /* Make sure we unregister at epmd (unknown fd) and get at least
- one free filedescriptor (for erl_crash.dump) */
-
- max = max_files;
- if (max < 1024)
- max = 1024;
- for (i = 3; i < max; i++) {
-#if defined(ERTS_SMP)
- /* We don't want to close the signal notification pipe... */
- if (i == sig_notify_fds[0] || i == sig_notify_fds[1])
- continue;
-#elif defined(USE_THREADS)
- /* We don't want to close the async notification pipe... */
- if (i == async_fd[0] || i == async_fd[1])
- continue;
-#endif
- /* We don't want to close our heart yet ... */
- if (i == heart_fd[0] || i == heart_fd[1])
- continue;
-
- close(i);
- }
+ /* Make sure we have a fd for our crashdump file. */
+ close(crashdump_companion_cube_fd);
envsz = sizeof(env);
i = erts_sys_getenv__("ERL_CRASH_DUMP_NICE", env, &envsz);
@@ -791,7 +633,7 @@ break_requested(void)
fprintf(stderr,"break!\n");
#endif
if (ERTS_BREAK_REQUESTED)
- erl_exit(ERTS_INTR_EXIT, "");
+ erts_exit(ERTS_INTR_EXIT, "");
ERTS_SET_BREAK_REQUESTED;
ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */
@@ -830,14 +672,28 @@ sigusr1_exit(void)
}
prepare_crash_dump(secs);
- erl_exit(1, "Received SIGUSR1\n");
+ erts_exit(ERTS_ERROR_EXIT, "Received SIGUSR1\n");
}
#ifdef ETHR_UNUSABLE_SIGUSRX
#warning "Unusable SIGUSR1 & SIGUSR2. Disabling use of these signals"
-#endif
-#ifndef ETHR_UNUSABLE_SIGUSRX
+#else
+
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
+void
+sys_thr_suspend(erts_tid_t tid) {
+ erts_thr_kill(tid, ERTS_SYS_SUSPEND_SIGNAL);
+}
+
+void
+sys_thr_resume(erts_tid_t tid) {
+ int i = 0, res;
+ do {
+ res = write(sig_suspend_fds[1],&i,sizeof(i));
+ } while (res < 0 && errno == EAGAIN);
+}
+#endif
#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL))
static RETSIGTYPE user_signal1(void)
@@ -852,27 +708,27 @@ static RETSIGTYPE user_signal1(int signum)
#endif
}
-#ifdef QUANTIFY
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL))
-static RETSIGTYPE user_signal2(void)
+static RETSIGTYPE suspend_signal(void)
#else
-static RETSIGTYPE user_signal2(int signum)
+static RETSIGTYPE suspend_signal(int signum)
#endif
{
-#ifdef ERTS_SMP
- smp_sig_notify('2');
-#else
- quantify_save_data();
-#endif
+ int res;
+ int buf[1];
+ do {
+ res = read(sig_suspend_fds[0], buf, sizeof(int));
+ } while (res < 0 && errno == EINTR);
}
-#endif
+#endif /* #ifdef ERTS_SYS_SUSPEND_SIGNAL */
#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */
static void
quit_requested(void)
{
- erl_exit(ERTS_INTR_EXIT, "");
+ erts_exit(ERTS_INTR_EXIT, "");
}
#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL))
@@ -890,9 +746,9 @@ static RETSIGTYPE do_quit(int signum)
/* Disable break */
void erts_set_ignore_break(void) {
- sys_sigset(SIGINT, SIG_IGN);
- sys_sigset(SIGQUIT, SIG_IGN);
- sys_sigset(SIGTSTP, SIG_IGN);
+ sys_signal(SIGINT, SIG_IGN);
+ sys_signal(SIGQUIT, SIG_IGN);
+ sys_signal(SIGTSTP, SIG_IGN);
}
/* Don't use ctrl-c for break handler but let it be
@@ -915,65 +771,24 @@ void erts_replace_intr(void) {
void init_break_handler(void)
{
- sys_sigset(SIGINT, request_break);
+ sys_signal(SIGINT, request_break);
#ifndef ETHR_UNUSABLE_SIGUSRX
- sys_sigset(SIGUSR1, user_signal1);
-#ifdef QUANTIFY
- sys_sigset(SIGUSR2, user_signal2);
-#endif
+ sys_signal(SIGUSR1, user_signal1);
#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */
- sys_sigset(SIGQUIT, do_quit);
-}
-
-int sys_max_files(void)
-{
- return(max_files);
+ sys_signal(SIGQUIT, do_quit);
}
-static void block_signals(void)
+void sys_init_suspend_handler(void)
{
-#if !CHLDWTHR
- sys_sigblock(SIGCHLD);
-#endif
-#ifndef ERTS_SMP
- sys_sigblock(SIGINT);
-#ifndef ETHR_UNUSABLE_SIGUSRX
- sys_sigblock(SIGUSR1);
-#endif
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
+ sys_signal(ERTS_SYS_SUSPEND_SIGNAL, suspend_signal);
#endif
}
-static void unblock_signals(void)
-{
- /* Update erl_child_setup.c if changed */
-#if !CHLDWTHR
- sys_sigrelease(SIGCHLD);
-#endif
-#ifndef ERTS_SMP
- sys_sigrelease(SIGINT);
-#ifndef ETHR_UNUSABLE_SIGUSRX
- sys_sigrelease(SIGUSR1);
-#endif /* #ifndef ETHR_UNUSABLE_SIGUSRX */
-#endif
-}
-/************************** Time stuff **************************/
-#ifdef HAVE_GETHRTIME
-#ifdef GETHRTIME_WITH_CLOCK_GETTIME
-
-SysHrTime sys_gethrtime(void)
+int sys_max_files(void)
{
- struct timespec ts;
- long long result;
- if (clock_gettime(CLOCK_MONOTONIC,&ts) != 0) {
- erl_exit(1,"Fatal, could not get clock_monotonic value!, "
- "errno = %d\n", errno);
- }
- result = ((long long) ts.tv_sec) * 1000000000LL +
- ((long long) ts.tv_nsec);
- return (SysHrTime) result;
+ return(max_files);
}
-#endif
-#endif
/************************** OS info *******************************/
@@ -1062,1306 +877,6 @@ void fini_getenv_state(GETENV_STATE *state)
erts_smp_rwmtx_runlock(&environ_rwmtx);
}
-
-/************************** Port I/O *******************************/
-
-
-
-/* I. Common stuff */
-
-/*
- * Decreasing the size of it below 16384 is not allowed.
- */
-
-/* II. The spawn/fd/vanilla drivers */
-
-#define ERTS_SYS_READ_BUF_SZ (64*1024)
-
-/* Driver interfaces */
-static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
-static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
-static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT,
- char **, ErlDrvSizeT);
-static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*);
-static int spawn_init(void);
-static void fd_stop(ErlDrvData);
-static void stop(ErlDrvData);
-static void ready_input(ErlDrvData, ErlDrvEvent);
-static void ready_output(ErlDrvData, ErlDrvEvent);
-static void output(ErlDrvData, char*, ErlDrvSizeT);
-static void outputv(ErlDrvData, ErlIOVec*);
-static void stop_select(ErlDrvEvent, void*);
-
-struct erl_drv_entry spawn_driver_entry = {
- spawn_init,
- spawn_start,
- stop,
- output,
- ready_input,
- ready_output,
- "spawn",
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- ERL_DRV_FLAG_USE_PORT_LOCKING,
- NULL, NULL,
- stop_select
-};
-struct erl_drv_entry fd_driver_entry = {
- NULL,
- fd_start,
- fd_stop,
- output,
- ready_input,
- ready_output,
- "fd",
- NULL,
- NULL,
- fd_control,
- NULL,
- outputv,
- NULL, /* ready_async */
- NULL, /* flush */
- NULL, /* call */
- NULL, /* event */
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- 0, /* ERL_DRV_FLAGs */
- NULL, /* handle2 */
- NULL, /* process_exit */
- stop_select
-};
-struct erl_drv_entry vanilla_driver_entry = {
- NULL,
- vanilla_start,
- stop,
- output,
- ready_input,
- ready_output,
- "vanilla",
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL,
- NULL, /* flush */
- NULL, /* call */
- NULL, /* event */
- ERL_DRV_EXTENDED_MARKER,
- ERL_DRV_EXTENDED_MAJOR_VERSION,
- ERL_DRV_EXTENDED_MINOR_VERSION,
- 0, /* ERL_DRV_FLAGs */
- NULL, /* handle2 */
- NULL, /* process_exit */
- stop_select
-};
-
-/* Handle SIGCHLD signals. */
-#if (defined(SIG_SIGSET) || defined(SIG_SIGNAL))
-static RETSIGTYPE onchld(void)
-#else
-static RETSIGTYPE onchld(int signum)
-#endif
-{
-#if CHLDWTHR
- ASSERT(0); /* We should *never* catch a SIGCHLD signal */
-#elif defined(ERTS_SMP)
- smp_sig_notify('C');
-#else
- children_died = 1;
- ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */
-#endif
-}
-
-static int set_driver_data(ErlDrvPort port_num,
- int ifd,
- int ofd,
- int packet_bytes,
- int read_write,
- int exit_status,
- int pid)
-{
- Port *prt;
- ErtsSysReportExit *report_exit;
-
- if (!exit_status)
- report_exit = NULL;
- else {
- report_exit = erts_alloc(ERTS_ALC_T_PRT_REP_EXIT,
- sizeof(ErtsSysReportExit));
- report_exit->next = report_exit_list;
- report_exit->port = erts_drvport2id(port_num);
- report_exit->pid = pid;
- report_exit->ifd = read_write & DO_READ ? ifd : -1;
- report_exit->ofd = read_write & DO_WRITE ? ofd : -1;
-#if CHLDWTHR && !defined(ERTS_SMP)
- report_exit->status = 0;
-#endif
- report_exit_list = report_exit;
- }
-
- prt = erts_drvport2port(port_num);
- if (prt != ERTS_INVALID_ERL_DRV_PORT)
- prt->os_pid = pid;
-
- if (read_write & DO_READ) {
- driver_data[ifd].packet_bytes = packet_bytes;
- driver_data[ifd].port_num = port_num;
- driver_data[ifd].report_exit = report_exit;
- driver_data[ifd].pid = pid;
- driver_data[ifd].alive = 1;
- driver_data[ifd].status = 0;
- if (read_write & DO_WRITE) {
- driver_data[ifd].ofd = ofd;
- if (ifd != ofd)
- driver_data[ofd] = driver_data[ifd]; /* structure copy */
- } else { /* DO_READ only */
- driver_data[ifd].ofd = -1;
- }
- (void) driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1);
- return(ifd);
- } else { /* DO_WRITE only */
- driver_data[ofd].packet_bytes = packet_bytes;
- driver_data[ofd].port_num = port_num;
- driver_data[ofd].report_exit = report_exit;
- driver_data[ofd].ofd = ofd;
- driver_data[ofd].pid = pid;
- driver_data[ofd].alive = 1;
- driver_data[ofd].status = 0;
- return(ofd);
- }
-}
-
-static int spawn_init()
-{
- int i;
-#if CHLDWTHR
- erts_thr_opts_t thr_opts = ERTS_THR_OPTS_DEFAULT_INITER;
- thr_opts.detached = 0;
- thr_opts.suggested_stack_size = 0; /* Smallest possible */
-#endif
-
- sys_sigset(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */
- driver_data = (struct driver_data *)
- erts_alloc(ERTS_ALC_T_DRV_TAB, max_files * sizeof(struct driver_data));
- erts_smp_atomic_add_nob(&sys_misc_mem_sz,
- max_files * sizeof(struct driver_data));
-
- for (i = 0; i < max_files; i++)
- driver_data[i].pid = -1;
-
-#if CHLDWTHR
- sys_sigblock(SIGCHLD);
-#endif
-
- sys_sigset(SIGCHLD, onchld); /* Reap children */
-
-#if CHLDWTHR
- erts_thr_create(&child_waiter_tid, child_waiter, NULL, &thr_opts);
-#endif
-
- return 1;
-}
-
-static void close_pipes(int ifd[2], int ofd[2], int read_write)
-{
- if (read_write & DO_READ) {
- (void) close(ifd[0]);
- (void) close(ifd[1]);
- }
- if (read_write & DO_WRITE) {
- (void) close(ofd[0]);
- (void) close(ofd[1]);
- }
-}
-
-static void init_fd_data(int fd, ErlDrvPort port_num)
-{
- fd_data[fd].buf = NULL;
- fd_data[fd].cpos = NULL;
- fd_data[fd].remain = 0;
- fd_data[fd].sz = 0;
- fd_data[fd].psz = 0;
-}
-
-static char **build_unix_environment(char *block)
-{
- int i;
- int j;
- int len;
- char *cp;
- char **cpp;
- char** old_env;
-
- ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx));
-
- cp = block;
- len = 0;
- while (*cp != '\0') {
- cp += strlen(cp) + 1;
- len++;
- }
- old_env = environ;
- while (*old_env++ != NULL) {
- len++;
- }
-
- cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT,
- sizeof(char *) * (len+1));
- if (cpp == NULL) {
- return NULL;
- }
-
- cp = block;
- len = 0;
- while (*cp != '\0') {
- cpp[len] = cp;
- cp += strlen(cp) + 1;
- len++;
- }
-
- i = len;
- for (old_env = environ; *old_env; old_env++) {
- char* old = *old_env;
-
- for (j = 0; j < len; j++) {
- char *s, *t;
-
- s = cpp[j];
- t = old;
- while (*s == *t && *s != '=') {
- s++, t++;
- }
- if (*s == '=' && *t == '=') {
- break;
- }
- }
-
- if (j == len) { /* New version not found */
- cpp[len++] = old;
- }
- }
-
- for (j = 0; j < i; ) {
- size_t last = strlen(cpp[j])-1;
- if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) {
- cpp[j] = cpp[--len];
- if (len < i) {
- i--;
- } else {
- j++;
- }
- }
- else {
- j++;
- }
- }
-
- cpp[len] = NULL;
- return cpp;
-}
-
-/*
- [arndt] In most Unix systems, including Solaris 2.5, 'fork' allocates memory
- in swap space for the child of a 'fork', whereas 'vfork' does not do this.
- The natural call to use here is therefore 'vfork'. Due to a bug in
- 'vfork' in Solaris 2.5 (apparently fixed in 2.6), using 'vfork'
- can be dangerous in what seems to be these circumstances:
- If the child code under a vfork sets the signal action to SIG_DFL
- (or SIG_IGN)
- for any signal which was previously set to a signal handler, the
- state of the parent is clobbered, so that the later arrival of
- such a signal yields a sigsegv in the parent. If the signal was
- not set to a signal handler, but ignored, all seems to work.
- If you change the forking code below, beware of this.
- */
-
-static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts)
-{
-#define CMD_LINE_PREFIX_STR "exec "
-#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1)
-
- int ifd[2], ofd[2], len, pid, i;
- char **volatile new_environ; /* volatile since a vfork() then cannot
- cause 'new_environ' to be clobbered
- in the parent process. */
- int saved_errno;
- long res;
- char *cmd_line;
-#ifndef QNX
- int unbind;
-#endif
-#if !DISABLE_VFORK
- int no_vfork;
- size_t no_vfork_sz = sizeof(no_vfork);
-
- no_vfork = (erts_sys_getenv_raw("ERL_NO_VFORK",
- (char *) &no_vfork,
- &no_vfork_sz) >= 0);
-#endif
-
- switch (opts->read_write) {
- case DO_READ:
- if (pipe(ifd) < 0)
- return ERL_DRV_ERROR_ERRNO;
- if (ifd[0] >= max_files) {
- close_pipes(ifd, ofd, opts->read_write);
- errno = EMFILE;
- return ERL_DRV_ERROR_ERRNO;
- }
- ofd[1] = -1; /* keep purify happy */
- break;
- case DO_WRITE:
- if (pipe(ofd) < 0) return ERL_DRV_ERROR_ERRNO;
- if (ofd[1] >= max_files) {
- close_pipes(ifd, ofd, opts->read_write);
- errno = EMFILE;
- return ERL_DRV_ERROR_ERRNO;
- }
- ifd[0] = -1; /* keep purify happy */
- break;
- case DO_READ|DO_WRITE:
- if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO;
- errno = EMFILE; /* default for next two conditions */
- if (ifd[0] >= max_files || pipe(ofd) < 0) {
- close_pipes(ifd, ofd, DO_READ);
- return ERL_DRV_ERROR_ERRNO;
- }
- if (ofd[1] >= max_files) {
- close_pipes(ifd, ofd, opts->read_write);
- errno = EMFILE;
- return ERL_DRV_ERROR_ERRNO;
- }
- break;
- default:
- ASSERT(0);
- return ERL_DRV_ERROR_GENERAL;
- }
-
- if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
- /* started with spawn_executable, not with spawn */
- len = strlen(name);
- cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1);
- if (!cmd_line) {
- close_pipes(ifd, ofd, opts->read_write);
- errno = ENOMEM;
- return ERL_DRV_ERROR_ERRNO;
- }
- memcpy((void *) cmd_line,(void *) name, len);
- cmd_line[len] = '\0';
- if (access(cmd_line,X_OK) != 0) {
- int save_errno = errno;
- erts_free(ERTS_ALC_T_TMP, cmd_line);
- errno = save_errno;
- return ERL_DRV_ERROR_ERRNO;
- }
- } else {
- /* make the string suitable for giving to "sh" */
- len = strlen(name);
- cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP,
- CMD_LINE_PREFIX_STR_SZ + len + 1);
- if (!cmd_line) {
- close_pipes(ifd, ofd, opts->read_write);
- errno = ENOMEM;
- return ERL_DRV_ERROR_ERRNO;
- }
- memcpy((void *) cmd_line,
- (void *) CMD_LINE_PREFIX_STR,
- CMD_LINE_PREFIX_STR_SZ);
- memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len);
- cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0';
- }
-
- erts_smp_rwmtx_rlock(&environ_rwmtx);
-
- if (opts->envir == NULL) {
- new_environ = environ;
- } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) {
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
- errno = ENOMEM;
- return ERL_DRV_ERROR_ERRNO;
- }
-
-#ifndef QNX
- /* Block child from SIGINT and SIGUSR1. Must be before fork()
- to be safe. */
- block_signals();
-
- CHLD_STAT_LOCK;
-
- unbind = erts_sched_bind_atfork_prepare();
-
-#if !DISABLE_VFORK
- /* See fork/vfork discussion before this function. */
- if (no_vfork) {
-#endif
-
- DEBUGF(("Using fork\n"));
- pid = fork();
-
- if (pid == 0) {
- /* The child! Setup child... */
-
- if (erts_sched_bind_atfork_child(unbind) != 0)
- goto child_error;
-
- /* OBSERVE!
- * Keep child setup after vfork() (implemented below and in
- * erl_child_setup.c) up to date if changes are made here.
- */
-
- if (opts->use_stdio) {
- if (opts->read_write & DO_READ) {
- /* stdout for process */
- if (dup2(ifd[1], 1) < 0)
- goto child_error;
- if(opts->redir_stderr)
- /* stderr for process */
- if (dup2(ifd[1], 2) < 0)
- goto child_error;
- }
- if (opts->read_write & DO_WRITE)
- /* stdin for process */
- if (dup2(ofd[0], 0) < 0)
- goto child_error;
- }
- else { /* XXX will fail if ofd[0] == 4 (unlikely..) */
- if (opts->read_write & DO_READ)
- if (dup2(ifd[1], 4) < 0)
- goto child_error;
- if (opts->read_write & DO_WRITE)
- if (dup2(ofd[0], 3) < 0)
- goto child_error;
- }
-
- for (i = opts->use_stdio ? 3 : 5; i < max_files; i++)
- (void) close(i);
-
- if (opts->wd && chdir(opts->wd) < 0)
- goto child_error;
-
-#if defined(USE_SETPGRP_NOARGS) /* SysV */
- (void) setpgrp();
-#elif defined(USE_SETPGRP) /* BSD */
- (void) setpgrp(0, getpid());
-#else /* POSIX */
- (void) setsid();
-#endif
-
- unblock_signals();
-
- if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
- if (opts->argv == NULL) {
- execle(cmd_line,cmd_line,(char *) NULL, new_environ);
- } else {
- if (opts->argv[0] == erts_default_arg0) {
- opts->argv[0] = cmd_line;
- }
- execve(cmd_line, opts->argv, new_environ);
- if (opts->argv[0] == cmd_line) {
- opts->argv[0] = erts_default_arg0;
- }
- }
- } else {
- execle("/bin/sh", "sh", "-c", cmd_line, (char *) NULL, new_environ);
- }
- child_error:
- _exit(1);
- }
-#if !DISABLE_VFORK
- }
-#define ENOUGH_BYTES (44)
- else { /* Use vfork() */
- char **cs_argv= erts_alloc(ERTS_ALC_T_TMP,(CS_ARGV_NO_OF_ARGS + 1)*
- sizeof(char *));
- char fd_close_range[ENOUGH_BYTES]; /* 44 bytes are enough to */
- char dup2_op[CS_ARGV_NO_OF_DUP2_OPS][ENOUGH_BYTES]; /* hold any "%d:%d" string */
- /* on a 64-bit machine. */
-
- /* Setup argv[] for the child setup program (implemented in
- erl_child_setup.c) */
- i = 0;
- if (opts->use_stdio) {
- if (opts->read_write & DO_READ){
- /* stdout for process */
- erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 1);
- if(opts->redir_stderr)
- /* stderr for process */
- erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 2);
- }
- if (opts->read_write & DO_WRITE)
- /* stdin for process */
- erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 0);
- } else { /* XXX will fail if ofd[0] == 4 (unlikely..) */
- if (opts->read_write & DO_READ)
- erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ifd[1], 4);
- if (opts->read_write & DO_WRITE)
- erts_snprintf(&dup2_op[i++][0], ENOUGH_BYTES, "%d:%d", ofd[0], 3);
- }
- for (; i < CS_ARGV_NO_OF_DUP2_OPS; i++)
- strcpy(&dup2_op[i][0], "-");
- erts_snprintf(fd_close_range, ENOUGH_BYTES, "%d:%d", opts->use_stdio ? 3 : 5, max_files-1);
-
- cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog;
- cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : ".";
- cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind);
- cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range;
- for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++)
- cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0];
-
- if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
- int num = 0;
- int j = 0;
- if (opts->argv != NULL) {
- for(; opts->argv[num] != NULL; ++num)
- ;
- }
- cs_argv = erts_realloc(ERTS_ALC_T_TMP,cs_argv, (CS_ARGV_NO_OF_ARGS + 1 + num + 1) * sizeof(char *));
- cs_argv[CS_ARGV_CMD_IX] = "-";
- cs_argv[CS_ARGV_NO_OF_ARGS] = cmd_line;
- if (opts->argv != NULL) {
- for (;opts->argv[j] != NULL; ++j) {
- if (opts->argv[j] == erts_default_arg0) {
- cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = cmd_line;
- } else {
- cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = opts->argv[j];
- }
- }
- }
- cs_argv[CS_ARGV_NO_OF_ARGS + 1 + j] = NULL;
- } else {
- cs_argv[CS_ARGV_CMD_IX] = cmd_line; /* Command */
- cs_argv[CS_ARGV_NO_OF_ARGS] = NULL;
- }
- DEBUGF(("Using vfork\n"));
- pid = vfork();
-
- if (pid == 0) {
- /* The child! */
-
- /* Observe!
- * OTP-4389: The child setup program (implemented in
- * erl_child_setup.c) will perform the necessary setup of the
- * child before it execs to the user program. This because
- * vfork() only allow an *immediate* execve() or _exit() in the
- * child.
- */
- execve(child_setup_prog, cs_argv, new_environ);
- _exit(1);
- }
- erts_free(ERTS_ALC_T_TMP,cs_argv);
- }
-#undef ENOUGH_BYTES
-#endif
-
- erts_sched_bind_atfork_parent(unbind);
-
- if (pid == -1) {
- saved_errno = errno;
- CHLD_STAT_UNLOCK;
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
- unblock_signals();
- close_pipes(ifd, ofd, opts->read_write);
- errno = saved_errno;
- return ERL_DRV_ERROR_ERRNO;
- }
-#else /* QNX */
- if (opts->use_stdio) {
- if (opts->read_write & DO_READ)
- qnx_spawn_options.iov[1] = ifd[1]; /* stdout for process */
- if (opts->read_write & DO_WRITE)
- qnx_spawn_options.iov[0] = ofd[0]; /* stdin for process */
- }
- else {
- if (opts->read_write & DO_READ)
- qnx_spawn_options.iov[4] = ifd[1];
- if (opts->read_write & DO_WRITE)
- qnx_spawn_options.iov[3] = ofd[0];
- }
- /* Close fds on exec */
- for (i = 3; i < max_files; i++)
- fcntl(i, F_SETFD, 1);
-
- qnx_spawn_options.flags = _SPAWN_SETSID;
- if ((pid = spawnl(P_NOWAIT, "/bin/sh", "/bin/sh", "-c", cmd_line,
- (char *) 0)) < 0) {
- erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
- reset_qnx_spawn();
- erts_smp_rwmtx_runlock(&environ_rwmtx);
- close_pipes(ifd, ofd, opts->read_write);
- return ERL_DRV_ERROR_GENERAL;
- }
- reset_qnx_spawn();
-#endif /* QNX */
-
- erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
-
- if (new_environ != environ)
- erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ);
-
- if (opts->read_write & DO_READ)
- (void) close(ifd[1]);
- if (opts->read_write & DO_WRITE)
- (void) close(ofd[0]);
-
- if (opts->read_write & DO_READ) {
- SET_NONBLOCKING(ifd[0]);
- init_fd_data(ifd[0], port_num);
- }
- if (opts->read_write & DO_WRITE) {
- SET_NONBLOCKING(ofd[1]);
- init_fd_data(ofd[1], port_num);
- }
-
- res = set_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes,
- opts->read_write, opts->exit_status, pid);
- /* Don't unblock SIGCHLD until now, since the call above must
- first complete putting away the info about our new subprocess. */
- unblock_signals();
-
-#if CHLDWTHR
- ASSERT(children_alive >= 0);
-
- if (!(children_alive++))
- CHLD_STAT_SIGNAL; /* Wake up child waiter thread if no children
- was alive before we fork()ed ... */
-#endif
- /* Don't unlock chld_stat_mtx until now of the same reason as above */
- CHLD_STAT_UNLOCK;
-
- erts_smp_rwmtx_runlock(&environ_rwmtx);
-
- return (ErlDrvData)res;
-#undef CMD_LINE_PREFIX_STR
-#undef CMD_LINE_PREFIX_STR_SZ
-}
-
-#ifdef QNX
-static reset_qnx_spawn()
-{
- int i;
-
- /* Reset qnx_spawn_options */
- qnx_spawn_options.flags = 0;
- qnx_spawn_options.iov[0] = 0xff;
- qnx_spawn_options.iov[1] = 0xff;
- qnx_spawn_options.iov[2] = 0xff;
- qnx_spawn_options.iov[3] = 0xff;
-}
-#endif
-
-#define FD_DEF_HEIGHT 24
-#define FD_DEF_WIDTH 80
-/* Control op */
-#define FD_CTRL_OP_GET_WINSIZE 100
-
-static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height)
-{
-#ifdef TIOCGWINSZ
- struct winsize ws;
- if (ioctl(fd,TIOCGWINSZ,&ws) == 0) {
- *width = (Uint32) ws.ws_col;
- *height = (Uint32) ws.ws_row;
- return 0;
- }
-#endif
- return -1;
-}
-
-static ErlDrvSSizeT fd_control(ErlDrvData drv_data,
- unsigned int command,
- char *buf, ErlDrvSizeT len,
- char **rbuf, ErlDrvSizeT rlen)
-{
- int fd = (int)(long)drv_data;
- char resbuff[2*sizeof(Uint32)];
- switch (command) {
- case FD_CTRL_OP_GET_WINSIZE:
- {
- Uint32 w,h;
- if (fd_get_window_size(fd,&w,&h))
- return 0;
- memcpy(resbuff,&w,sizeof(Uint32));
- memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32));
- }
- break;
- default:
- return 0;
- }
- if (rlen < 2*sizeof(Uint32)) {
- *rbuf = driver_alloc(2*sizeof(Uint32));
- }
- memcpy(*rbuf,resbuff,2*sizeof(Uint32));
- return 2*sizeof(Uint32);
-}
-
-static ErlDrvData fd_start(ErlDrvPort port_num, char* name,
- SysDriverOpts* opts)
-{
- ErlDrvData res;
-
- if (((opts->read_write & DO_READ) && opts->ifd >= max_files) ||
- ((opts->read_write & DO_WRITE) && opts->ofd >= max_files))
- return ERL_DRV_ERROR_GENERAL;
-
- /*
- * Historical:
- *
- * "Note about nonblocking I/O.
- *
- * At least on Solaris, setting the write end of a TTY to nonblocking,
- * will set the input end to nonblocking as well (and vice-versa).
- * If erl is run in a pipeline like this: cat | erl
- * the input end of the TTY will be the standard input of cat.
- * And cat is not prepared to handle nonblocking I/O."
- *
- * Actually, the reason for this is not that the tty itself gets set
- * in non-blocking mode, but that the "input end" (cat's stdin) and
- * the "output end" (erlang's stdout) are typically the "same" file
- * descriptor, dup()'ed from a single fd by one of this process'
- * ancestors.
- *
- * The workaround for this problem used to be a rather bad kludge,
- * interposing an extra process ("internal cat") between erlang's
- * stdout and the original stdout, allowing erlang to set its stdout
- * in non-blocking mode without affecting the stdin of the preceding
- * process in the pipeline - and being a kludge, it caused all kinds
- * of weird problems.
- *
- * So, this is the current logic:
- *
- * The only reason to set non-blocking mode on the output fd at all is
- * if it's something that can cause a write() to block, of course,
- * i.e. primarily if it points to a tty, socket, pipe, or fifo.
- *
- * If we don't set non-blocking mode when we "should" have, and output
- * becomes blocked, the entire runtime system will be suspended - this
- * is normally bad of course, and can happen fairly "easily" - e.g. user
- * hits ^S on tty - but doesn't necessarily happen.
- *
- * If we do set non-blocking mode when we "shouldn't" have, the runtime
- * system will end up seeing EOF on the input fd (due to the preceding
- * process dying), which typically will cause the entire runtime system
- * to terminate immediately (due to whatever erlang process is seeing
- * the EOF taking it as a signal to halt the system). This is *very* bad.
- *
- * I.e. we should take a conservative approach, and only set non-
- * blocking mode when we a) need to, and b) are reasonably certain
- * that it won't be a problem. And as in the example above, the problem
- * occurs when input fd and output fd point to different "things".
- *
- * However, determining that they are not just the same "type" of
- * "thing", but actually the same instance of that type of thing, is
- * unreasonably complex in many/most cases.
- *
- * Also, with pipes, sockets, and fifos it's far from obvious that the
- * user *wants* non-blocking output: If you're running erlang inside
- * some complex pipeline, you're probably not running a real-time system
- * that must never stop, but rather *want* it to suspend if the output
- * channel is "full".
- *
- * So, the bottom line: We will only set the output fd non-blocking if
- * it points to a tty, and either a) the input fd also points to a tty,
- * or b) we can make sure that setting the output fd non-blocking
- * doesn't interfere with someone else's input, via a somewhat milder
- * kludge than the above.
- *
- * Also keep in mind that while this code is almost exclusively run as
- * a result of an erlang open_port({fd,0,1}, ...), that isn't the only
- * case - it can be called with any old pre-existing file descriptors,
- * the relations between which (if they're even two) we can only guess
- * at - still, we try our best...
- */
-
- if (opts->read_write & DO_READ) {
- init_fd_data(opts->ifd, port_num);
- }
- if (opts->read_write & DO_WRITE) {
- init_fd_data(opts->ofd, port_num);
-
- /* If we don't have a read end, all bets are off - no non-blocking. */
- if (opts->read_write & DO_READ) {
-
- if (isatty(opts->ofd)) { /* output fd is a tty:-) */
-
- if (isatty(opts->ifd)) { /* input fd is also a tty */
-
- /* To really do this "right", we should also check that
- input and output fd point to the *same* tty - but
- this seems like overkill; ttyname() isn't for free,
- and this is a very common case - and it's hard to
- imagine a scenario where setting non-blocking mode
- here would cause problems - go ahead and do it. */
-
- SET_NONBLOCKING(opts->ofd);
-
- } else { /* output fd is a tty, input fd isn't */
-
- /* This is a "problem case", but also common (see the
- example above) - i.e. it makes sense to try a bit
- harder before giving up on non-blocking mode: Try to
- re-open the tty that the output fd points to, and if
- successful replace the original one with the "new" fd
- obtained this way, and set *that* one in non-blocking
- mode. (Yes, this is a kludge.)
-
- However, re-opening the tty may fail in a couple of
- (unusual) cases:
-
- 1) The name of the tty (or an equivalent one, i.e.
- same major/minor number) can't be found, because
- it actually lives somewhere other than /dev (or
- wherever ttyname() looks for it), and isn't
- equivalent to any of those that do live in the
- "standard" place - this should be *very* unusual.
-
- 2) Permissions on the tty don't allow us to open it -
- it's perfectly possible to have an fd open to an
- object whose permissions wouldn't allow us to open
- it. This is not as unusual as it sounds, one case
- is if the user has su'ed to someone else (not
- root) - we have a read/write fd open to the tty
- (because it has been inherited all the way down
- here), but we have neither read nor write
- permission for the tty.
-
- In these cases, we finally give up, and don't set the
- output fd in non-blocking mode. */
-
- char *tty;
- int nfd;
-
- if ((tty = ttyname(opts->ofd)) != NULL &&
- (nfd = open(tty, O_WRONLY)) != -1) {
- dup2(nfd, opts->ofd);
- close(nfd);
- SET_NONBLOCKING(opts->ofd);
- }
- }
- }
- }
- }
- CHLD_STAT_LOCK;
- res = (ErlDrvData)(long)set_driver_data(port_num, opts->ifd, opts->ofd,
- opts->packet_bytes,
- opts->read_write, 0, -1);
- CHLD_STAT_UNLOCK;
- return res;
-}
-
-static void clear_fd_data(int fd)
-{
- if (fd_data[fd].sz > 0) {
- erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fd_data[fd].buf);
- ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fd_data[fd].sz);
- erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fd_data[fd].sz);
- }
- fd_data[fd].buf = NULL;
- fd_data[fd].sz = 0;
- fd_data[fd].remain = 0;
- fd_data[fd].cpos = NULL;
- fd_data[fd].psz = 0;
-}
-
-static void nbio_stop_fd(ErlDrvPort prt, int fd)
-{
- driver_select(prt,fd,DO_READ|DO_WRITE,0);
- clear_fd_data(fd);
- SET_BLOCKING(fd);
-}
-
-static void fd_stop(ErlDrvData fd) /* Does not close the fds */
-{
- int ofd;
-
- nbio_stop_fd(driver_data[(int)(long)fd].port_num, (int)(long)fd);
- ofd = driver_data[(int)(long)fd].ofd;
- if (ofd != (int)(long)fd && ofd != -1)
- nbio_stop_fd(driver_data[(int)(long)fd].port_num, (int)(long)ofd);
-}
-
-static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name,
- SysDriverOpts* opts)
-{
- int flags, fd;
- ErlDrvData res;
-
- flags = (opts->read_write == DO_READ ? O_RDONLY :
- opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC :
- O_RDWR|O_CREAT);
- if ((fd = open(name, flags, 0666)) < 0)
- return ERL_DRV_ERROR_GENERAL;
- if (fd >= max_files) {
- close(fd);
- return ERL_DRV_ERROR_GENERAL;
- }
- SET_NONBLOCKING(fd);
- init_fd_data(fd, port_num);
-
- CHLD_STAT_LOCK;
- res = (ErlDrvData)(long)set_driver_data(port_num, fd, fd,
- opts->packet_bytes,
- opts->read_write, 0, -1);
- CHLD_STAT_UNLOCK;
- return res;
-}
-
-/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */
-/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */
-
-static void stop(ErlDrvData fd)
-{
- ErlDrvPort prt;
- int ofd;
-
- prt = driver_data[(int)(long)fd].port_num;
- nbio_stop_fd(prt, (int)(long)fd);
-
- ofd = driver_data[(int)(long)fd].ofd;
- if (ofd != (int)(long)fd && (int)(long)ofd != -1)
- nbio_stop_fd(prt, ofd);
- else
- ofd = -1;
-
- CHLD_STAT_LOCK;
-
- /* Mark as unused. */
- driver_data[(int)(long)fd].pid = -1;
-
- CHLD_STAT_UNLOCK;
-
- /* SMP note: Close has to be last thing done (open file descriptors work
- as locks on driver_data[] entries) */
- driver_select(prt, (int)(long)fd, ERL_DRV_USE, 0); /* close(fd); */
- if (ofd >= 0) {
- driver_select(prt, (int)(long)ofd, ERL_DRV_USE, 0); /* close(ofd); */
- }
-}
-
-static void outputv(ErlDrvData e, ErlIOVec* ev)
-{
- int fd = (int)(long)e;
- ErlDrvPort ix = driver_data[fd].port_num;
- int pb = driver_data[fd].packet_bytes;
- int ofd = driver_data[fd].ofd;
- ssize_t n;
- ErlDrvSizeT sz;
- char lb[4];
- char* lbp;
- ErlDrvSizeT len = ev->size;
-
- /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
- /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/
- if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) {
- driver_failure_posix(ix, EINVAL);
- return; /* -1; */
- }
- /* Handles 0 <= pb <= 4 only */
- put_int32((Uint32) len, lb);
- lbp = lb + (4-pb);
-
- ev->iov[0].iov_base = lbp;
- ev->iov[0].iov_len = pb;
- ev->size += pb;
- if ((sz = driver_sizeq(ix)) > 0) {
- driver_enqv(ix, ev, 0);
- if (sz + ev->size >= (1 << 13))
- set_busy_port(ix, 1);
- }
- else {
- int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize;
-
- n = writev(ofd, (const void *) (ev->iov), vsize);
- if (n == ev->size)
- return; /* 0;*/
- if (n < 0) {
- if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
- driver_failure_posix(ix, errno);
- return; /* -1;*/
- }
- n = 0;
- }
- driver_enqv(ix, ev, n); /* n is the skip value */
- driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
- }
- /* return 0;*/
-}
-
-
-static void output(ErlDrvData e, char* buf, ErlDrvSizeT len)
-{
- int fd = (int)(long)e;
- ErlDrvPort ix = driver_data[fd].port_num;
- int pb = driver_data[fd].packet_bytes;
- int ofd = driver_data[fd].ofd;
- ssize_t n;
- ErlDrvSizeT sz;
- char lb[4];
- char* lbp;
- struct iovec iv[2];
-
- /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
- if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) {
- driver_failure_posix(ix, EINVAL);
- return; /* -1; */
- }
- put_int32(len, lb);
- lbp = lb + (4-pb);
-
- if ((sz = driver_sizeq(ix)) > 0) {
- driver_enq(ix, lbp, pb);
- driver_enq(ix, buf, len);
- if (sz + len + pb >= (1 << 13))
- set_busy_port(ix, 1);
- }
- else {
- iv[0].iov_base = lbp;
- iv[0].iov_len = pb; /* should work for pb=0 */
- iv[1].iov_base = buf;
- iv[1].iov_len = len;
- n = writev(ofd, iv, 2);
- if (n == pb+len)
- return; /* 0; */
- if (n < 0) {
- if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
- driver_failure_posix(ix, errno);
- return; /* -1; */
- }
- n = 0;
- }
- if (n < pb) {
- driver_enq(ix, lbp+n, pb-n);
- driver_enq(ix, buf, len);
- }
- else {
- n -= pb;
- driver_enq(ix, buf+n, len-n);
- }
- driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
- }
- return; /* 0; */
-}
-
-static int port_inp_failure(ErlDrvPort port_num, int ready_fd, int res)
- /* Result: 0 (eof) or -1 (error) */
-{
- int err = errno;
-
- ASSERT(res <= 0);
- (void) driver_select(port_num, ready_fd, ERL_DRV_READ|ERL_DRV_WRITE, 0);
- clear_fd_data(ready_fd);
- if (res == 0) {
- if (driver_data[ready_fd].report_exit) {
- CHLD_STAT_LOCK;
-
- if (driver_data[ready_fd].alive) {
- /*
- * We have eof and want to report exit status, but the process
- * hasn't exited yet. When it does report_exit_status() will
- * driver_select() this fd which will make sure that we get
- * back here with driver_data[ready_fd].alive == 0 and
- * driver_data[ready_fd].status set.
- */
- CHLD_STAT_UNLOCK;
- return 0;
- }
- else {
- int status = driver_data[ready_fd].status;
- CHLD_STAT_UNLOCK;
-
- /* We need not be prepared for stopped/continued processes. */
- if (WIFSIGNALED(status))
- status = 128 + WTERMSIG(status);
- else
- status = WEXITSTATUS(status);
-
- driver_report_exit(driver_data[ready_fd].port_num, status);
- }
- }
- driver_failure_eof(port_num);
- } else {
- driver_failure_posix(port_num, err);
- }
- return 0;
-}
-
-/* fd is the drv_data that is returned from the */
-/* initial start routine */
-/* ready_fd is the descriptor that is ready to read */
-
-static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd)
-{
- int fd = (int)(long)e;
- ErlDrvPort port_num;
- int packet_bytes;
- int res;
- Uint h;
-
- port_num = driver_data[fd].port_num;
- packet_bytes = driver_data[fd].packet_bytes;
-
- if (packet_bytes == 0) {
- byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
- ERTS_SYS_READ_BUF_SZ);
- res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
- if (res < 0) {
- if ((errno != EINTR) && (errno != ERRNO_BLOCK))
- port_inp_failure(port_num, ready_fd, res);
- }
- else if (res == 0)
- port_inp_failure(port_num, ready_fd, res);
- else
- driver_output(port_num, (char*) read_buf, res);
- erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
- }
- else if (fd_data[ready_fd].remain > 0) { /* We try to read the remainder */
- /* space is allocated in buf */
- res = read(ready_fd, fd_data[ready_fd].cpos,
- fd_data[ready_fd].remain);
- if (res < 0) {
- if ((errno != EINTR) && (errno != ERRNO_BLOCK))
- port_inp_failure(port_num, ready_fd, res);
- }
- else if (res == 0) {
- port_inp_failure(port_num, ready_fd, res);
- }
- else if (res == fd_data[ready_fd].remain) { /* we're done */
- driver_output(port_num, fd_data[ready_fd].buf,
- fd_data[ready_fd].sz);
- clear_fd_data(ready_fd);
- }
- else { /* if (res < fd_data[ready_fd].remain) */
- fd_data[ready_fd].cpos += res;
- fd_data[ready_fd].remain -= res;
- }
- }
- else if (fd_data[ready_fd].remain == 0) { /* clean fd */
- byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
- ERTS_SYS_READ_BUF_SZ);
- /* We make one read attempt and see what happens */
- res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
- if (res < 0) {
- if ((errno != EINTR) && (errno != ERRNO_BLOCK))
- port_inp_failure(port_num, ready_fd, res);
- }
- else if (res == 0) { /* eof */
- port_inp_failure(port_num, ready_fd, res);
- }
- else if (res < packet_bytes - fd_data[ready_fd].psz) {
- memcpy(fd_data[ready_fd].pbuf+fd_data[ready_fd].psz,
- read_buf, res);
- fd_data[ready_fd].psz += res;
- }
- else { /* if (res >= packet_bytes) */
- unsigned char* cpos = read_buf;
- int bytes_left = res;
-
- while (1) {
- int psz = fd_data[ready_fd].psz;
- char* pbp = fd_data[ready_fd].pbuf + psz;
-
- while(bytes_left && (psz < packet_bytes)) {
- *pbp++ = *cpos++;
- bytes_left--;
- psz++;
- }
-
- if (psz < packet_bytes) {
- fd_data[ready_fd].psz = psz;
- break;
- }
- fd_data[ready_fd].psz = 0;
-
- switch (packet_bytes) {
- case 1: h = get_int8(fd_data[ready_fd].pbuf); break;
- case 2: h = get_int16(fd_data[ready_fd].pbuf); break;
- case 4: h = get_int32(fd_data[ready_fd].pbuf); break;
- default: ASSERT(0); return; /* -1; */
- }
-
- if (h <= (bytes_left)) {
- driver_output(port_num, (char*) cpos, h);
- cpos += h;
- bytes_left -= h;
- continue;
- }
- else { /* The last message we got was split */
- char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h);
- if (!buf) {
- errno = ENOMEM;
- port_inp_failure(port_num, ready_fd, -1);
- }
- else {
- erts_smp_atomic_add_nob(&sys_misc_mem_sz, h);
- sys_memcpy(buf, cpos, bytes_left);
- fd_data[ready_fd].buf = buf;
- fd_data[ready_fd].sz = h;
- fd_data[ready_fd].remain = h - bytes_left;
- fd_data[ready_fd].cpos = buf + bytes_left;
- }
- break;
- }
- }
- }
- erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
- }
-}
-
-
-/* fd is the drv_data that is returned from the */
-/* initial start routine */
-/* ready_fd is the descriptor that is ready to read */
-
-static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd)
-{
- int fd = (int)(long)e;
- ErlDrvPort ix = driver_data[fd].port_num;
- int n;
- struct iovec* iv;
- int vsize;
-
-
- if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) {
- driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
- return; /* 0; */
- }
- vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
- if ((n = writev(ready_fd, iv, vsize)) > 0) {
- if (driver_deq(ix, n) == 0)
- set_busy_port(ix, 0);
- }
- else if (n < 0) {
- if (errno == ERRNO_BLOCK || errno == EINTR)
- return; /* 0; */
- else {
- int res = errno;
- driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
- driver_failure_posix(ix, res);
- return; /* -1; */
- }
- }
- return; /* 0; */
-}
-
-static void stop_select(ErlDrvEvent fd, void* _)
-{
- close((int)fd);
-}
-
-
void erts_do_break_handling(void)
{
struct termios temp_mode;
@@ -2489,13 +1004,19 @@ erts_sys_getenv(char *key, char *value, size_t *size)
return res;
}
+int
+erts_sys_unsetenv(char *key)
+{
+ int res;
+ erts_smp_rwmtx_rwlock(&environ_rwmtx);
+ res = unsetenv(key);
+ erts_smp_rwmtx_rwunlock(&environ_rwmtx);
+ return res;
+}
+
void
sys_init_io(void)
{
- fd_data = (struct fd_data *)
- erts_alloc(ERTS_ALC_T_FD_TAB, max_files * sizeof(struct fd_data));
- erts_smp_atomic_add_nob(&sys_misc_mem_sz,
- max_files * sizeof(struct fd_data));
}
#if (0) /* unused? */
@@ -2524,6 +1045,52 @@ void erts_sys_alloc_init(void)
{
}
+#if ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC
+void *erts_sys_aligned_alloc(UWord alignment, UWord size)
+{
+#ifdef HAVE_POSIX_MEMALIGN
+ void *ptr = NULL;
+ int error;
+ ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */
+ error = posix_memalign(&ptr, (size_t) alignment, (size_t) size);
+#if HAVE_ERTS_MSEG
+ if (error || !ptr) {
+ erts_mseg_clear_cache();
+ error = posix_memalign(&ptr, (size_t) alignment, (size_t) size);
+ }
+#endif
+ if (error) {
+ errno = error;
+ return NULL;
+ }
+ if (!ptr)
+ errno = ENOMEM;
+ ASSERT(!ptr || (((UWord) ptr) & (alignment - 1)) == 0);
+ return ptr;
+#else
+# error "Missing erts_sys_aligned_alloc() implementation"
+#endif
+}
+
+void erts_sys_aligned_free(UWord alignment, void *ptr)
+{
+ ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */
+ free(ptr);
+}
+
+void *erts_sys_aligned_realloc(UWord alignment, void *ptr, UWord size, UWord old_size)
+{
+ void *new_ptr = erts_sys_aligned_alloc(alignment, size);
+ if (new_ptr) {
+ UWord copy_size = old_size < size ? old_size : size;
+ sys_memcpy(new_ptr, ptr, (size_t) copy_size);
+ erts_sys_aligned_free(alignment, ptr);
+ }
+ return new_ptr;
+}
+
+#endif
+
void *erts_sys_alloc(ErtsAlcType_t t, void *x, Uint sz)
{
void *res = malloc((size_t) sz);
@@ -2574,33 +1141,43 @@ void sys_preload_end(Preload* p)
/* Nothing */
}
-/* Read a key from console (?) */
-
+/* Read a key from console, used by break.c
+ Here we assume that all schedulers are stopped so that erl_poll
+ does not interfere with the select below.
+*/
int sys_get_key(fd)
int fd;
{
- int c;
+ int c, ret;
unsigned char rbuf[64];
+ fd_set fds;
fflush(stdout); /* Flush query ??? */
- if ((c = read(fd,rbuf,64)) <= 0) {
- return c;
+ FD_ZERO(&fds);
+ FD_SET(fd,&fds);
+
+ ret = select(fd+1, &fds, NULL, NULL, NULL);
+
+ if (ret == 1) {
+ do {
+ c = read(fd,rbuf,64);
+ } while (c < 0 && errno == EAGAIN);
+ if (c <= 0)
+ return c;
}
return rbuf[0];
}
-#ifdef DEBUG
-
extern int erts_initialized;
void
-erl_assert_error(char* expr, char* file, int line)
+erl_assert_error(const char* expr, const char* func, const char* file, int line)
{
fflush(stdout);
- fprintf(stderr, "Assertion failed: %s in %s, line %d\n",
- expr, file, line);
+ fprintf(stderr, "%s:%d:%s() Assertion failed: %s\n",
+ file, line, func, expr);
fflush(stderr);
#if !defined(ERTS_SMP) && 0
/* Writing a crashdump from a failed assertion when smp support
@@ -2615,6 +1192,8 @@ erl_assert_error(char* expr, char* file, int line)
abort();
}
+#ifdef DEBUG
+
void
erl_debug(char* fmt, ...)
{
@@ -2631,179 +1210,6 @@ erl_debug(char* fmt, ...)
#endif /* DEBUG */
-static ERTS_INLINE void
-report_exit_status(ErtsSysReportExit *rep, int status)
-{
- Port *pp;
-#ifdef ERTS_SMP
- CHLD_STAT_UNLOCK;
- pp = erts_thr_id2port_sflgs(rep->port,
- ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP);
- CHLD_STAT_LOCK;
-#else
- pp = erts_id2port_sflgs(rep->port,
- NULL,
- 0,
- ERTS_PORT_SFLGS_INVALID_DRIVER_LOOKUP);
-#endif
- if (pp) {
- if (rep->ifd >= 0) {
- driver_data[rep->ifd].alive = 0;
- driver_data[rep->ifd].status = status;
- (void) driver_select(ERTS_Port2ErlDrvPort(pp),
- rep->ifd,
- (ERL_DRV_READ|ERL_DRV_USE),
- 1);
- }
- if (rep->ofd >= 0) {
- driver_data[rep->ofd].alive = 0;
- driver_data[rep->ofd].status = status;
- (void) driver_select(ERTS_Port2ErlDrvPort(pp),
- rep->ofd,
- (ERL_DRV_WRITE|ERL_DRV_USE),
- 1);
- }
-#ifdef ERTS_SMP
- erts_thr_port_release(pp);
-#else
- erts_port_release(pp);
-#endif
- }
- erts_free(ERTS_ALC_T_PRT_REP_EXIT, rep);
-}
-
-#if !CHLDWTHR /* ---------------------------------------------------------- */
-
-#define ERTS_REPORT_EXIT_STATUS report_exit_status
-
-static int check_children(void)
-{
- int res = 0;
- int pid;
- int status;
-
-#ifndef ERTS_SMP
- if (children_died)
-#endif
- {
- sys_sigblock(SIGCHLD);
- CHLD_STAT_LOCK;
- while ((pid = waitpid(-1, &status, WNOHANG)) > 0)
- note_child_death(pid, status);
-#ifndef ERTS_SMP
- children_died = 0;
-#endif
- CHLD_STAT_UNLOCK;
- sys_sigrelease(SIGCHLD);
- res = 1;
- }
- return res;
-}
-
-#ifdef ERTS_SMP
-
-void
-erts_check_children(void)
-{
- (void) check_children();
-}
-
-#endif
-
-#elif CHLDWTHR && defined(ERTS_SMP) /* ------------------------------------- */
-
-#define ERTS_REPORT_EXIT_STATUS report_exit_status
-
-#define check_children() (0)
-
-
-#else /* CHLDWTHR && !defined(ERTS_SMP) ------------------------------------ */
-
-#define ERTS_REPORT_EXIT_STATUS initiate_report_exit_status
-
-static ERTS_INLINE void
-initiate_report_exit_status(ErtsSysReportExit *rep, int status)
-{
- rep->next = report_exit_transit_list;
- rep->status = status;
- report_exit_transit_list = rep;
- erts_sys_schedule_interrupt(1);
-}
-
-static int check_children(void)
-{
- int res;
- ErtsSysReportExit *rep;
- CHLD_STAT_LOCK;
- rep = report_exit_transit_list;
- res = rep != NULL;
- while (rep) {
- ErtsSysReportExit *curr_rep = rep;
- rep = rep->next;
- report_exit_status(curr_rep, curr_rep->status);
- }
- report_exit_transit_list = NULL;
- CHLD_STAT_UNLOCK;
- return res;
-}
-
-#endif /* ------------------------------------------------------------------ */
-
-static void note_child_death(int pid, int status)
-{
- ErtsSysReportExit **repp = &report_exit_list;
- ErtsSysReportExit *rep = report_exit_list;
-
- while (rep) {
- if (pid == rep->pid) {
- *repp = rep->next;
- ERTS_REPORT_EXIT_STATUS(rep, status);
- break;
- }
- repp = &rep->next;
- rep = rep->next;
- }
-}
-
-#if CHLDWTHR
-
-static void *
-child_waiter(void *unused)
-{
- int pid;
- int status;
-
-#ifdef ERTS_ENABLE_LOCK_CHECK
- erts_lc_set_thread_name("child waiter");
-#endif
-
- while(1) {
-#ifdef DEBUG
- int waitpid_errno;
-#endif
- pid = waitpid(-1, &status, 0);
-#ifdef DEBUG
- waitpid_errno = errno;
-#endif
- CHLD_STAT_LOCK;
- if (pid < 0) {
- ASSERT(waitpid_errno == ECHILD);
- }
- else {
- children_alive--;
- ASSERT(children_alive >= 0);
- note_child_death(pid, status);
- }
- while (!children_alive)
- CHLD_STAT_WAIT; /* Wait for children to wait on... :) */
- CHLD_STAT_UNLOCK;
- }
-
- return NULL;
-}
-
-#endif
-
/*
* Called from schedule() when it runs out of runnable processes,
* or when Erlang code has performed INPUT_REDUCTIONS reduction
@@ -2812,13 +1218,8 @@ child_waiter(void *unused)
void
erl_sys_schedule(int runnable)
{
-#ifdef ERTS_SMP
ERTS_CHK_IO(!runnable);
-#else
- ERTS_CHK_IO(runnable ? 0 : !check_children());
-#endif
ERTS_SMP_LC_ASSERT(!erts_thr_progress_is_blocking());
- (void) check_children();
}
@@ -2846,10 +1247,6 @@ smp_sig_notify(char c)
static void *
signal_dispatcher_thread_func(void *unused)
{
-#if !CHLDWTHR
- int initialized = 0;
- int notify_check_children = 0;
-#endif
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_set_thread_name("signal_dispatcher");
#endif
@@ -2861,7 +1258,7 @@ signal_dispatcher_thread_func(void *unused)
if (res < 0) {
if (errno == EINTR)
continue;
- erl_exit(ERTS_ABORT_EXIT,
+ erts_exit(ERTS_ABORT_EXIT,
"signal-dispatcher thread got unexpected error: %s (%d)\n",
erl_errno_id(errno),
errno);
@@ -2887,19 +1284,7 @@ signal_dispatcher_thread_func(void *unused)
*/
switch (buf[i]) {
case 0: /* Emulator initialized */
-#if !CHLDWTHR
- initialized = 1;
- if (!notify_check_children)
-#endif
- break;
-#if !CHLDWTHR
- case 'C': /* SIGCHLD */
- if (initialized)
- erts_smp_notify_check_children_needed();
- else
- notify_check_children = 1;
- break;
-#endif
+ break;
case 'I': /* SIGINT */
break_requested();
break;
@@ -2909,15 +1294,8 @@ signal_dispatcher_thread_func(void *unused)
case '1': /* SIGUSR1 */
sigusr1_exit();
break;
-#ifdef QUANTIFY
- case '2': /* SIGUSR2 */
- quantify_save_data(); /* Might take a substantial amount of
- time, but this is a test/debug
- build */
- break;
-#endif
default:
- erl_exit(ERTS_ABORT_EXIT,
+ erts_exit(ERTS_ABORT_EXIT,
"signal-dispatcher thread received unknown "
"signal notification: '%c'\n",
buf[i]);
@@ -2933,9 +1311,10 @@ init_smp_sig_notify(void)
{
erts_smp_thr_opts_t thr_opts = ERTS_SMP_THR_OPTS_DEFAULT_INITER;
thr_opts.detached = 1;
+ thr_opts.name = "sys_sig_dispatcher";
if (pipe(sig_notify_fds) < 0) {
- erl_exit(ERTS_ABORT_EXIT,
+ erts_exit(ERTS_ABORT_EXIT,
"Failed to create signal-dispatcher pipe: %s (%d)\n",
erl_errno_id(errno),
errno);
@@ -2947,6 +1326,19 @@ init_smp_sig_notify(void)
NULL,
&thr_opts);
}
+
+static void
+init_smp_sig_suspend(void) {
+#ifdef ERTS_SYS_SUSPEND_SIGNAL
+ if (pipe(sig_suspend_fds) < 0) {
+ erts_exit(ERTS_ABORT_EXIT,
+ "Failed to create sig_suspend pipe: %s (%d)\n",
+ erl_errno_id(errno),
+ errno);
+ }
+#endif
+}
+
#ifdef __DARWIN__
int erts_darwin_main_thread_pipe[2];
@@ -2956,7 +1348,7 @@ static void initialize_darwin_main_thread_pipes(void)
{
if (pipe(erts_darwin_main_thread_pipe) < 0 ||
pipe(erts_darwin_main_thread_result_pipe) < 0) {
- erl_exit(1,"Fatal error initializing Darwin main thread stealing");
+ erts_exit(ERTS_ERROR_EXIT,"Fatal error initializing Darwin main thread stealing");
}
}
@@ -2974,9 +1366,11 @@ erts_sys_main_thread(void)
#endif
smp_sig_notify(0); /* Notify initialized */
- while (1) {
- /* Wait for a signal to arrive... */
+
+ /* Wait for a signal to arrive... */
+
#ifdef __DARWIN__
+ while (1) {
/*
* The wx driver needs to be able to steal the main thread for Cocoa to
* work properly.
@@ -2991,12 +1385,24 @@ erts_sys_main_thread(void)
void* (*func)(void*);
void* arg;
void *resp;
- read(erts_darwin_main_thread_pipe[0],&func,sizeof(void* (*)(void*)));
- read(erts_darwin_main_thread_pipe[0],&arg, sizeof(void*));
+ res = read(erts_darwin_main_thread_pipe[0],&func,sizeof(void* (*)(void*)));
+ if (res != sizeof(void* (*)(void*)))
+ break;
+ res = read(erts_darwin_main_thread_pipe[0],&arg,sizeof(void*));
+ if (res != sizeof(void*))
+ break;
resp = (*func)(arg);
write(erts_darwin_main_thread_result_pipe[1],&resp,sizeof(void *));
}
-#else
+
+ if (res == -1 && errno != EINTR)
+ break;
+ }
+ /* Something broke with the main thread pipe, so we ignore it for now.
+ Most probably erts has closed this pipe and is about to exit. */
+#endif /* #ifdef __DARWIN__ */
+
+ while (1) {
#ifdef DEBUG
int res =
#else
@@ -3005,7 +1411,6 @@ erts_sys_main_thread(void)
select(0, NULL, NULL, NULL, NULL);
ASSERT(res < 0);
ASSERT(errno == EINTR);
-#endif
}
}
@@ -3097,6 +1502,7 @@ erl_sys_args(int* argc, char** argv)
#ifdef ERTS_SMP
init_smp_sig_notify();
+ init_smp_sig_suspend();
#endif
/* Handled arguments have been marked with NULL. Slide arguments
diff --git a/erts/emulator/sys/unix/sys_drivers.c b/erts/emulator/sys/unix/sys_drivers.c
new file mode 100644
index 0000000000..812112fb91
--- /dev/null
+++ b/erts/emulator/sys/unix/sys_drivers.c
@@ -0,0 +1,1862 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 1996-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#ifdef ISC32
+#define _POSIX_SOURCE
+#define _XOPEN_SOURCE
+#endif
+
+#include <sys/times.h> /* ! */
+#include <time.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+#include <termios.h>
+#include <ctype.h>
+#include <sys/utsname.h>
+#include <sys/select.h>
+#include <arpa/inet.h>
+
+#ifdef ISC32
+#include <sys/bsdtypes.h>
+#endif
+
+#include <termios.h>
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+#ifdef HAVE_SYS_IOCTL_H
+#include <sys/ioctl.h>
+#endif
+
+#define WANT_NONBLOCKING /* must define this to pull in defs from sys.h */
+#include "sys.h"
+
+#ifdef USE_THREADS
+#include "erl_threads.h"
+#endif
+
+extern char **environ;
+extern erts_smp_rwmtx_t environ_rwmtx;
+
+extern erts_smp_atomic_t sys_misc_mem_sz;
+
+static Eterm forker_port;
+
+#define MAX_VSIZE 16 /* Max number of entries allowed in an I/O
+ * vector sock_sendv().
+ */
+/*
+ * Don't need global.h, but erl_cpu_topology.h won't compile otherwise
+ */
+#include "global.h"
+#include "erl_cpu_topology.h"
+
+#include "erl_sys_driver.h"
+#include "sys_uds.h"
+
+#include "erl_child_setup.h"
+
+#if defined IOV_MAX
+#define MAXIOV IOV_MAX
+#elif defined UIO_MAXIOV
+#define MAXIOV UIO_MAXIOV
+#else
+#define MAXIOV 16
+#endif
+
+#ifdef USE_THREADS
+# define FDBLOCK 1
+#else
+# define FDBLOCK 0
+#endif
+
+/* Used by the fd driver iff the fd could not be set to non-blocking */
+typedef struct ErtsSysBlocking_ {
+ ErlDrvPDL pdl;
+ int res;
+ int err;
+ unsigned int pkey;
+} ErtsSysBlocking;
+
+typedef struct fd_data {
+ int fd;
+ char pbuf[4]; /* hold partial packet bytes */
+ int psz; /* size of pbuf */
+ char *buf;
+ char *cpos;
+ int sz;
+ int remain; /* for input on fd */
+} ErtsSysFdData;
+
+typedef struct driver_data {
+ ErlDrvPort port_num;
+ ErtsSysFdData *ofd;
+ ErtsSysFdData *ifd;
+ int packet_bytes;
+ int pid;
+ int alive;
+ int status;
+ int terminating;
+ ErtsSysBlocking *blocking;
+} ErtsSysDriverData;
+
+#define DIR_SEPARATOR_CHAR '/'
+
+#if defined(__ANDROID__)
+#define SHELL "/system/bin/sh"
+#else
+#define SHELL "/bin/sh"
+#endif /* __ANDROID__ */
+
+#if defined(DEBUG)
+#define ERL_BUILD_TYPE_MARKER ".debug"
+#elif defined(PURIFY)
+#define ERL_BUILD_TYPE_MARKER ".purify"
+#elif defined(QUANTIFY)
+#define ERL_BUILD_TYPE_MARKER ".quantify"
+#elif defined(PURECOV)
+#define ERL_BUILD_TYPE_MARKER ".purecov"
+#elif defined(VALGRIND)
+#define ERL_BUILD_TYPE_MARKER ".valgrind"
+#else /* opt */
+#define ERL_BUILD_TYPE_MARKER
+#endif
+
+#ifdef DEBUG
+#define close(fd) do { int res = close(fd); ASSERT(res > -1); } while(0)
+#endif
+
+#define CHILD_SETUP_PROG_NAME "erl_child_setup" ERL_BUILD_TYPE_MARKER
+
+// #define HARD_DEBUG
+#ifdef HARD_DEBUG
+#define driver_select(port_num, fd, flags, onoff) \
+ do { \
+ if (((flags) & ERL_DRV_READ) && onoff) \
+ fprintf(stderr,"%010d %p: read select %d\r\n", __LINE__, port_num, (int)fd); \
+ if (((flags) & ERL_DRV_WRITE) && onoff) \
+ fprintf(stderr,"%010d %p: writ select %d\r\n", __LINE__, port_num, (int)fd); \
+ if (((flags) & ERL_DRV_READ) && !onoff) \
+ fprintf(stderr,"%010d %p: read unsele %d\r\n", __LINE__, port_num, (int)fd); \
+ if (((flags) & ERL_DRV_WRITE) && !onoff) \
+ fprintf(stderr,"%010d %p: writ unsele %d\r\n", __LINE__, port_num, (int)fd); \
+ driver_select_nkp(port_num, fd, flags, onoff); \
+ } while(0)
+#endif
+
+/*
+ * Decreasing the size of it below 16384 is not allowed.
+ */
+
+#define ERTS_SYS_READ_BUF_SZ (64*1024)
+
+/* I. Initialization */
+
+void
+erl_sys_late_init(void)
+{
+ SysDriverOpts opts;
+#ifdef ERTS_SMP
+ Port *port;
+#endif
+
+ sys_signal(SIGPIPE, SIG_IGN); /* Ignore - we'll handle the write failure */
+
+ opts.packet_bytes = 0;
+ opts.use_stdio = 1;
+ opts.redir_stderr = 0;
+ opts.read_write = 0;
+ opts.hide_window = 0;
+ opts.wd = NULL;
+ opts.envir = NULL;
+ opts.exit_status = 0;
+ opts.overlapped_io = 0;
+ opts.spawn_type = ERTS_SPAWN_ANY;
+ opts.argv = NULL;
+ opts.parallelism = erts_port_parallelism;
+
+#ifdef ERTS_SMP
+ port =
+#endif
+ erts_open_driver(&forker_driver, make_internal_pid(0), "forker", &opts, NULL, NULL);
+#ifdef ERTS_SMP
+ erts_mtx_unlock(port->lock);
+#endif
+}
+
+/* II. Prototypes */
+
+/* II.I Spawn prototypes */
+static ErlDrvData spawn_start(ErlDrvPort, char*, SysDriverOpts*);
+static ErlDrvSSizeT spawn_control(ErlDrvData, unsigned int, char *,
+ ErlDrvSizeT, char **, ErlDrvSizeT);
+
+/* II.II Vanilla prototypes */
+static ErlDrvData vanilla_start(ErlDrvPort, char*, SysDriverOpts*);
+
+
+/* II.III FD prototypes */
+static ErlDrvData fd_start(ErlDrvPort, char*, SysDriverOpts*);
+#if FDBLOCK
+static void fd_async(void *);
+static void fd_ready_async(ErlDrvData drv_data, ErlDrvThreadData thread_data);
+#endif
+static ErlDrvSSizeT fd_control(ErlDrvData, unsigned int, char *, ErlDrvSizeT,
+ char **, ErlDrvSizeT);
+static void fd_stop(ErlDrvData);
+static void fd_flush(ErlDrvData);
+
+/* II.IV Common prototypes */
+static void stop(ErlDrvData);
+static void ready_input(ErlDrvData, ErlDrvEvent);
+static void ready_output(ErlDrvData, ErlDrvEvent);
+static void output(ErlDrvData, char*, ErlDrvSizeT);
+static void outputv(ErlDrvData, ErlIOVec*);
+static void stop_select(ErlDrvEvent, void*);
+
+/* II.V Forker prototypes */
+static ErlDrvData forker_start(ErlDrvPort, char*, SysDriverOpts*);
+static void forker_stop(ErlDrvData);
+static void forker_ready_input(ErlDrvData, ErlDrvEvent);
+static void forker_ready_output(ErlDrvData, ErlDrvEvent);
+static ErlDrvSSizeT forker_control(ErlDrvData, unsigned int, char *,
+ ErlDrvSizeT, char **, ErlDrvSizeT);
+
+/* III Driver entries */
+
+/* III.I The spawn driver */
+struct erl_drv_entry spawn_driver_entry = {
+ NULL,
+ spawn_start,
+ stop,
+ output,
+ ready_input,
+ ready_output,
+ "spawn",
+ NULL,
+ NULL,
+ spawn_control,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ERL_DRV_EXTENDED_MARKER,
+ ERL_DRV_EXTENDED_MAJOR_VERSION,
+ ERL_DRV_EXTENDED_MINOR_VERSION,
+ ERL_DRV_FLAG_USE_PORT_LOCKING | ERL_DRV_FLAG_USE_INIT_ACK,
+ NULL, NULL,
+ stop_select
+};
+
+/* III.II The fd driver */
+struct erl_drv_entry fd_driver_entry = {
+ NULL,
+ fd_start,
+ fd_stop,
+ output,
+ ready_input,
+ ready_output,
+ "fd",
+ NULL,
+ NULL,
+ fd_control,
+ NULL,
+ outputv,
+#if FDBLOCK
+ fd_ready_async, /* ready_async */
+#else
+ NULL,
+#endif
+ fd_flush, /* flush */
+ NULL, /* call */
+ NULL, /* event */
+ ERL_DRV_EXTENDED_MARKER,
+ ERL_DRV_EXTENDED_MAJOR_VERSION,
+ ERL_DRV_EXTENDED_MINOR_VERSION,
+ 0, /* ERL_DRV_FLAGs */
+ NULL, /* handle2 */
+ NULL, /* process_exit */
+ stop_select
+};
+
+/* III.III The vanilla driver */
+struct erl_drv_entry vanilla_driver_entry = {
+ NULL,
+ vanilla_start,
+ stop,
+ output,
+ ready_input,
+ ready_output,
+ "vanilla",
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL, /* flush */
+ NULL, /* call */
+ NULL, /* event */
+ ERL_DRV_EXTENDED_MARKER,
+ ERL_DRV_EXTENDED_MAJOR_VERSION,
+ ERL_DRV_EXTENDED_MINOR_VERSION,
+ 0, /* ERL_DRV_FLAGs */
+ NULL, /* handle2 */
+ NULL, /* process_exit */
+ stop_select
+};
+
+/* III.III The forker driver */
+struct erl_drv_entry forker_driver_entry = {
+ NULL,
+ forker_start,
+ forker_stop,
+ NULL,
+ forker_ready_input,
+ forker_ready_output,
+ "spawn_forker",
+ NULL,
+ NULL,
+ forker_control,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ ERL_DRV_EXTENDED_MARKER,
+ ERL_DRV_EXTENDED_MAJOR_VERSION,
+ ERL_DRV_EXTENDED_MINOR_VERSION,
+ 0,
+ NULL, NULL,
+ stop_select
+};
+
+/* Untility functions */
+
+static int set_blocking_data(ErtsSysDriverData *dd) {
+
+ dd->blocking = erts_alloc(ERTS_ALC_T_SYS_BLOCKING, sizeof(ErtsSysBlocking));
+
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, sizeof(ErtsSysBlocking));
+
+ dd->blocking->pdl = driver_pdl_create(dd->port_num);
+ dd->blocking->res = 0;
+ dd->blocking->err = 0;
+ dd->blocking->pkey = driver_async_port_key(dd->port_num);
+
+ return 1;
+}
+
+static void init_fd_data(ErtsSysFdData *fd_data, int fd)
+{
+ fd_data->fd = fd;
+ fd_data->buf = NULL;
+ fd_data->cpos = NULL;
+ fd_data->remain = 0;
+ fd_data->sz = 0;
+ fd_data->psz = 0;
+}
+
+static ErtsSysDriverData *
+create_driver_data(ErlDrvPort port_num,
+ int ifd,
+ int ofd,
+ int packet_bytes,
+ int read_write,
+ int exit_status,
+ int pid,
+ int is_blocking)
+{
+ Port *prt;
+ ErtsSysDriverData *driver_data;
+ char *data;
+ int size = sizeof(ErtsSysDriverData);
+
+ if (read_write & DO_READ)
+ size += sizeof(ErtsSysFdData);
+
+ if ((read_write & DO_WRITE) &&
+ ((ifd != ofd || ofd == -1) || !(read_write & DO_READ)))
+ size += sizeof(ErtsSysFdData);
+
+ data = erts_alloc(ERTS_ALC_T_DRV_TAB,size);
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, size);
+
+ driver_data = (ErtsSysDriverData*)data;
+ data += sizeof(*driver_data);
+
+ prt = erts_drvport2port(port_num);
+ if (prt != ERTS_INVALID_ERL_DRV_PORT)
+ prt->os_pid = pid;
+
+ driver_data->packet_bytes = packet_bytes;
+ driver_data->port_num = port_num;
+ driver_data->pid = pid;
+ driver_data->alive = exit_status ? 1 : 0;
+ driver_data->status = 0;
+ driver_data->terminating = 0;
+ driver_data->blocking = NULL;
+
+ if (read_write & DO_READ) {
+ driver_data->ifd = (ErtsSysFdData*)data;
+ data += sizeof(*driver_data->ifd);
+ init_fd_data(driver_data->ifd, ifd);
+ driver_select(port_num, ifd, (ERL_DRV_READ|ERL_DRV_USE), 1);
+ } else {
+ driver_data->ifd = NULL;
+ }
+
+ if (read_write & DO_WRITE) {
+ if (ofd != -1 && ifd == ofd && read_write & DO_READ) {
+ /* This is for when ifd and ofd are the same fd */
+ driver_data->ofd = driver_data->ifd;
+ } else {
+ driver_data->ofd = (ErtsSysFdData*)data;
+ data += sizeof(*driver_data->ofd);
+ init_fd_data(driver_data->ofd, ofd);
+ }
+ if (is_blocking && FDBLOCK)
+ if (!set_blocking_data(driver_data)) {
+ erts_free(ERTS_ALC_T_DRV_TAB, driver_data);
+ return NULL;
+ }
+ } else {
+ driver_data->ofd = NULL;
+ }
+
+ return driver_data;
+}
+
+/* Spawn driver */
+
+static void close_pipes(int ifd[2], int ofd[2])
+{
+ close(ifd[0]);
+ close(ifd[1]);
+ close(ofd[0]);
+ close(ofd[1]);
+}
+
+static char **build_unix_environment(char *block)
+{
+ int i;
+ int j;
+ int len;
+ char *cp;
+ char **cpp;
+ char** old_env;
+
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_rwmtx_is_rlocked(&environ_rwmtx));
+
+ cp = block;
+ len = 0;
+ while (*cp != '\0') {
+ cp += strlen(cp) + 1;
+ len++;
+ }
+ old_env = environ;
+ while (*old_env++ != NULL) {
+ len++;
+ }
+
+ cpp = (char **) erts_alloc_fnf(ERTS_ALC_T_ENVIRONMENT,
+ sizeof(char *) * (len+1));
+ if (cpp == NULL) {
+ return NULL;
+ }
+
+ cp = block;
+ len = 0;
+ while (*cp != '\0') {
+ cpp[len] = cp;
+ cp += strlen(cp) + 1;
+ len++;
+ }
+
+ i = len;
+ for (old_env = environ; *old_env; old_env++) {
+ char* old = *old_env;
+
+ for (j = 0; j < len; j++) {
+ char *s, *t;
+
+ /* check if cpp[j] equals old
+ before the = sign,
+ i.e.
+ "TMPDIR=/tmp/" */
+ s = cpp[j];
+ t = old;
+ while (*s == *t && *s != '=') {
+ s++, t++;
+ }
+ if (*s == '=' && *t == '=') {
+ break;
+ }
+ }
+
+ if (j == len) { /* New version not found */
+ cpp[len++] = old;
+ }
+ }
+
+ for (j = 0; j < i; ) {
+ size_t last = strlen(cpp[j])-1;
+ if (cpp[j][last] == '=' && strchr(cpp[j], '=') == cpp[j]+last) {
+ cpp[j] = cpp[--len];
+ if (len < i) {
+ i--;
+ } else {
+ j++;
+ }
+ }
+ else {
+ j++;
+ }
+ }
+
+ cpp[len] = NULL;
+ return cpp;
+}
+
+static ErlDrvData spawn_start(ErlDrvPort port_num, char* name,
+ SysDriverOpts* opts)
+{
+#define CMD_LINE_PREFIX_STR "exec "
+#define CMD_LINE_PREFIX_STR_SZ (sizeof(CMD_LINE_PREFIX_STR) - 1)
+
+ int len;
+ char **new_environ;
+ ErtsSysDriverData *dd;
+ char *cmd_line;
+ char wd_buff[MAXPATHLEN+1];
+ char *wd;
+ int ifd[2], ofd[2], stderrfd;
+
+ if (pipe(ifd) < 0) return ERL_DRV_ERROR_ERRNO;
+ errno = EMFILE; /* default for next three conditions */
+ if (ifd[0] >= sys_max_files() || pipe(ofd) < 0) {
+ close(ifd[0]);
+ close(ifd[1]);
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ if (ofd[1] >= sys_max_files()) {
+ close_pipes(ifd, ofd);
+ errno = EMFILE;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+
+ SET_NONBLOCKING(ifd[0]);
+ SET_NONBLOCKING(ofd[1]);
+
+ stderrfd = opts->redir_stderr ? ifd[1] : dup(2);
+
+ if (stderrfd >= sys_max_files() || stderrfd < 0) {
+ close_pipes(ifd, ofd);
+ if (stderrfd > -1)
+ close(stderrfd);
+ return ERL_DRV_ERROR_ERRNO;
+ }
+
+ if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
+ /* started with spawn_executable, not with spawn */
+ len = strlen(name);
+ cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP, len + 1);
+ if (!cmd_line) {
+ close_pipes(ifd, ofd);
+ errno = ENOMEM;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ memcpy((void *) cmd_line,(void *) name, len);
+ cmd_line[len] = '\0';
+ len = len + 1;
+ if (access(cmd_line,X_OK) != 0) {
+ int save_errno = errno;
+ erts_free(ERTS_ALC_T_TMP, cmd_line);
+ close_pipes(ifd, ofd);
+ errno = save_errno;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ } else {
+ /* make the string suitable for giving to "sh" */
+ len = strlen(name);
+ cmd_line = (char *) erts_alloc_fnf(ERTS_ALC_T_TMP,
+ CMD_LINE_PREFIX_STR_SZ + len + 1);
+ if (!cmd_line) {
+ close_pipes(ifd, ofd);
+ errno = ENOMEM;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ memcpy((void *) cmd_line,
+ (void *) CMD_LINE_PREFIX_STR,
+ CMD_LINE_PREFIX_STR_SZ);
+ memcpy((void *) (cmd_line + CMD_LINE_PREFIX_STR_SZ), (void *) name, len);
+ cmd_line[CMD_LINE_PREFIX_STR_SZ + len] = '\0';
+ len = CMD_LINE_PREFIX_STR_SZ + len + 1;
+ }
+
+ erts_smp_rwmtx_rlock(&environ_rwmtx);
+
+ if (opts->envir == NULL) {
+ new_environ = environ;
+ } else if ((new_environ = build_unix_environment(opts->envir)) == NULL) {
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ close_pipes(ifd, ofd);
+ erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
+ errno = ENOMEM;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+
+ if (opts->wd == NULL) {
+ if ((wd = getcwd(wd_buff, MAXPATHLEN+1)) == NULL) {
+ /* on some OSs this call opens a fd in the
+ background which means that this can
+ return EMFILE */
+ int err = errno;
+ close_pipes(ifd, ofd);
+ erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
+ if (new_environ != environ)
+ erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ);
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ errno = err;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ } else {
+ wd = opts->wd;
+ }
+
+ {
+ struct iovec *io_vector;
+ int iov_len = 5;
+ char nullbuff[] = "\0";
+ int j, i = 0, res;
+ Sint32 buffsz = 0, env_len = 0, argv_len = 0,
+ flags = (opts->use_stdio ? FORKER_FLAG_USE_STDIO : 0)
+ | (opts->exit_status ? FORKER_FLAG_EXIT_STATUS : 0)
+ | (opts->read_write & DO_READ ? FORKER_FLAG_DO_READ : 0)
+ | (opts->read_write & DO_WRITE ? FORKER_FLAG_DO_WRITE : 0);
+
+ /* count number of elements in environment */
+ while(new_environ[env_len] != NULL)
+ env_len++;
+ iov_len += 1 + env_len; /* num envs including size int */
+
+ /* count number of element in argument list */
+ if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
+ if (opts->argv != NULL) {
+ while(opts->argv[argv_len] != NULL)
+ argv_len++;
+ } else {
+ argv_len++;
+ }
+ iov_len += 1 + argv_len; /* num argvs including size int */
+ }
+
+ io_vector = erts_alloc_fnf(ERTS_ALC_T_TMP, sizeof(struct iovec) * iov_len);
+
+ if (!io_vector) {
+ close_pipes(ifd, ofd);
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
+ if (new_environ != environ)
+ erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ);
+ errno = ENOMEM;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+
+ io_vector[i].iov_base = (void*)&buffsz;
+ io_vector[i++].iov_len = sizeof(buffsz);
+
+ io_vector[i].iov_base = (void*)&flags;
+ flags = htonl(flags);
+ io_vector[i++].iov_len = sizeof(flags);
+ buffsz += sizeof(flags);
+
+ io_vector[i].iov_base = cmd_line;
+ io_vector[i++].iov_len = len;
+ buffsz += len;
+
+ io_vector[i].iov_base = wd;
+ io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
+ buffsz += io_vector[i++].iov_len;
+
+ io_vector[i].iov_base = nullbuff;
+ io_vector[i++].iov_len = 1;
+ buffsz += io_vector[i-1].iov_len;
+
+ io_vector[i].iov_base = (void*)&env_len;
+ env_len = htonl(env_len);
+ io_vector[i++].iov_len = sizeof(env_len);
+ buffsz += io_vector[i-1].iov_len;
+
+ for (j = 0; new_environ[j] != NULL; j++) {
+ io_vector[i].iov_base = new_environ[j];
+ io_vector[i++].iov_len = strlen(new_environ[j]) + 1;
+ buffsz += io_vector[i-1].iov_len;
+ }
+
+ /* only append arguments if this was a spawn_executable */
+ if (opts->spawn_type == ERTS_SPAWN_EXECUTABLE) {
+
+ io_vector[i].iov_base = (void*)&argv_len;
+ argv_len = htonl(argv_len);
+ io_vector[i++].iov_len = sizeof(argv_len);
+ buffsz += io_vector[i-1].iov_len;
+
+ if (opts->argv) {
+ /* If there are arguments we copy in the references to
+ them into the iov */
+ for (j = 0; opts->argv[j]; j++) {
+ if (opts->argv[j] == erts_default_arg0)
+ io_vector[i].iov_base = cmd_line;
+ else
+ io_vector[i].iov_base = opts->argv[j];
+ io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
+ buffsz += io_vector[i++].iov_len;
+ }
+ } else {
+ io_vector[i].iov_base = cmd_line;
+ io_vector[i].iov_len = strlen(io_vector[i].iov_base) + 1;
+ buffsz += io_vector[i++].iov_len;
+ }
+ }
+
+ /* we send the request to do the fork */
+ if ((res = writev(ofd[1], io_vector, iov_len > MAXIOV ? MAXIOV : iov_len)) < 0) {
+ if (errno == ERRNO_BLOCK) {
+ res = 0;
+ } else {
+ int err = errno;
+ close_pipes(ifd, ofd);
+ erts_free(ERTS_ALC_T_TMP, io_vector);
+ if (new_environ != environ)
+ erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ);
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
+ errno = err;
+ return ERL_DRV_ERROR_ERRNO;
+ }
+ }
+
+ if (res < (buffsz + sizeof(buffsz))) {
+ /* we only wrote part of the command payload. Enqueue the rest. */
+ for (i = 0; i < iov_len; i++) {
+ driver_enq(port_num, io_vector[i].iov_base, io_vector[i].iov_len);
+ }
+ driver_deq(port_num, res);
+ driver_select(port_num, ofd[1], ERL_DRV_WRITE|ERL_DRV_USE, 1);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, io_vector);
+ }
+
+ erts_free(ERTS_ALC_T_TMP, (void *) cmd_line);
+
+ if (new_environ != environ)
+ erts_free(ERTS_ALC_T_ENVIRONMENT, (void *) new_environ);
+
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+
+ dd = create_driver_data(port_num, ifd[0], ofd[1], opts->packet_bytes,
+ DO_WRITE | DO_READ, opts->exit_status,
+ 0, 0);
+
+ {
+ /* send ofd[0] + ifd[1] + stderrfd to forker port */
+ ErtsSysForkerProto *proto =
+ erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA,
+ sizeof(ErtsSysForkerProto));
+ memset(proto, 0, sizeof(ErtsSysForkerProto));
+ proto->action = ErtsSysForkerProtoAction_Start;
+ proto->u.start.fds[0] = ofd[0];
+ proto->u.start.fds[1] = ifd[1];
+ proto->u.start.fds[2] = stderrfd;
+ proto->u.start.port_id = opts->exit_status ? erts_drvport2id(port_num) : THE_NON_VALUE;
+ if (erl_drv_port_control(forker_port, 'S', (char*)proto, sizeof(*proto))) {
+ /* The forker port has been killed, we close both fd's which will
+ make open_port throw an epipe error */
+ close(ofd[0]);
+ close(ifd[1]);
+ }
+ }
+
+ /* we set these fds to negative to mark if
+ they should be closed after the handshake */
+ if (!(opts->read_write & DO_READ))
+ dd->ifd->fd *= -1;
+
+ if (!(opts->read_write & DO_WRITE))
+ dd->ofd->fd *= -1;
+
+ return (ErlDrvData)dd;
+#undef CMD_LINE_PREFIX_STR
+#undef CMD_LINE_PREFIX_STR_SZ
+}
+
+static ErlDrvSSizeT spawn_control(ErlDrvData e, unsigned int cmd, char *buf,
+ ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
+{
+ ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
+ ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
+
+ ASSERT(len == sizeof(*proto));
+ ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld);
+
+ dd->status = proto->u.sigchld.error_number;
+ dd->alive = -1;
+
+ if (dd->ifd)
+ driver_select(dd->port_num, abs(dd->ifd->fd), ERL_DRV_READ | ERL_DRV_USE, 1);
+
+ if (dd->ofd)
+ driver_select(dd->port_num, abs(dd->ofd->fd), ERL_DRV_WRITE | ERL_DRV_USE, 1);
+
+ return 0;
+}
+
+#define FD_DEF_HEIGHT 24
+#define FD_DEF_WIDTH 80
+/* Control op */
+#define FD_CTRL_OP_GET_WINSIZE 100
+
+static int fd_get_window_size(int fd, Uint32 *width, Uint32 *height)
+{
+#ifdef TIOCGWINSZ
+ struct winsize ws;
+ if (ioctl(fd,TIOCGWINSZ,&ws) == 0) {
+ *width = (Uint32) ws.ws_col;
+ *height = (Uint32) ws.ws_row;
+ return 0;
+ }
+#endif
+ return -1;
+}
+
+static ErlDrvSSizeT fd_control(ErlDrvData drv_data,
+ unsigned int command,
+ char *buf, ErlDrvSizeT len,
+ char **rbuf, ErlDrvSizeT rlen)
+{
+ int fd = (int)(long)drv_data;
+ char resbuff[2*sizeof(Uint32)];
+ switch (command) {
+ case FD_CTRL_OP_GET_WINSIZE:
+ {
+ Uint32 w,h;
+ if (fd_get_window_size(fd,&w,&h))
+ return 0;
+ memcpy(resbuff,&w,sizeof(Uint32));
+ memcpy(resbuff+sizeof(Uint32),&h,sizeof(Uint32));
+ }
+ break;
+ default:
+ return 0;
+ }
+ if (rlen < 2*sizeof(Uint32)) {
+ *rbuf = driver_alloc(2*sizeof(Uint32));
+ }
+ memcpy(*rbuf,resbuff,2*sizeof(Uint32));
+ return 2*sizeof(Uint32);
+}
+
+static ErlDrvData fd_start(ErlDrvPort port_num, char* name,
+ SysDriverOpts* opts)
+{
+ int non_blocking = 0;
+
+ if (((opts->read_write & DO_READ) && opts->ifd >= sys_max_files()) ||
+ ((opts->read_write & DO_WRITE) && opts->ofd >= sys_max_files()))
+ return ERL_DRV_ERROR_GENERAL;
+
+ /*
+ * Historical:
+ *
+ * "Note about nonblocking I/O.
+ *
+ * At least on Solaris, setting the write end of a TTY to nonblocking,
+ * will set the input end to nonblocking as well (and vice-versa).
+ * If erl is run in a pipeline like this: cat | erl
+ * the input end of the TTY will be the standard input of cat.
+ * And cat is not prepared to handle nonblocking I/O."
+ *
+ * Actually, the reason for this is not that the tty itself gets set
+ * in non-blocking mode, but that the "input end" (cat's stdin) and
+ * the "output end" (erlang's stdout) are typically the "same" file
+ * descriptor, dup()'ed from a single fd by one of this process'
+ * ancestors.
+ *
+ * The workaround for this problem used to be a rather bad kludge,
+ * interposing an extra process ("internal cat") between erlang's
+ * stdout and the original stdout, allowing erlang to set its stdout
+ * in non-blocking mode without affecting the stdin of the preceding
+ * process in the pipeline - and being a kludge, it caused all kinds
+ * of weird problems.
+ *
+ * So, this is the current logic:
+ *
+ * The only reason to set non-blocking mode on the output fd at all is
+ * if it's something that can cause a write() to block, of course,
+ * i.e. primarily if it points to a tty, socket, pipe, or fifo.
+ *
+ * If we don't set non-blocking mode when we "should" have, and output
+ * becomes blocked, the entire runtime system will be suspended - this
+ * is normally bad of course, and can happen fairly "easily" - e.g. user
+ * hits ^S on tty - but doesn't necessarily happen.
+ *
+ * If we do set non-blocking mode when we "shouldn't" have, the runtime
+ * system will end up seeing EOF on the input fd (due to the preceding
+ * process dying), which typically will cause the entire runtime system
+ * to terminate immediately (due to whatever erlang process is seeing
+ * the EOF taking it as a signal to halt the system). This is *very* bad.
+ *
+ * I.e. we should take a conservative approach, and only set non-
+ * blocking mode when we a) need to, and b) are reasonably certain
+ * that it won't be a problem. And as in the example above, the problem
+ * occurs when input fd and output fd point to different "things".
+ *
+ * However, determining that they are not just the same "type" of
+ * "thing", but actually the same instance of that type of thing, is
+ * unreasonably complex in many/most cases.
+ *
+ * Also, with pipes, sockets, and fifos it's far from obvious that the
+ * user *wants* non-blocking output: If you're running erlang inside
+ * some complex pipeline, you're probably not running a real-time system
+ * that must never stop, but rather *want* it to suspend if the output
+ * channel is "full".
+ *
+ * So, the bottom line: We will only set the output fd non-blocking if
+ * it points to a tty, and either a) the input fd also points to a tty,
+ * or b) we can make sure that setting the output fd non-blocking
+ * doesn't interfere with someone else's input, via a somewhat milder
+ * kludge than the above.
+ *
+ * Also keep in mind that while this code is almost exclusively run as
+ * a result of an erlang open_port({fd,0,1}, ...), that isn't the only
+ * case - it can be called with any old pre-existing file descriptors,
+ * the relations between which (if they're even two) we can only guess
+ * at - still, we try our best...
+ *
+ * Added note OTP 18: Some systems seem to use stdout/stderr to log data
+ * using unix pipes, so we cannot allow the system to block on a write.
+ * Therefore we use an async thread to write the data to fd's that could
+ * not be set to non-blocking. When no async threads are available we
+ * fall back on the old behaviour.
+ *
+ * Also the guarantee about what is delivered to the OS has changed.
+ * Pre 18 the fd driver did no flushing of data before terminating.
+ * Now it does. This is because we want to be able to guarantee that things
+ * such as escripts and friends really have outputted all data before
+ * terminating. This could potentially block the termination of the system
+ * for a very long time, but if the user wants to terminate fast she should
+ * use erlang:halt with flush=false.
+ */
+
+ /* Try to figure out if we can use non-blocking writes */
+ if (opts->read_write & DO_WRITE) {
+
+ /* If we don't have a read end, all bets are off - no non-blocking. */
+ if (opts->read_write & DO_READ) {
+
+ if (isatty(opts->ofd)) { /* output fd is a tty:-) */
+
+ if (isatty(opts->ifd)) { /* input fd is also a tty */
+
+ /* To really do this "right", we should also check that
+ input and output fd point to the *same* tty - but
+ this seems like overkill; ttyname() isn't for free,
+ and this is a very common case - and it's hard to
+ imagine a scenario where setting non-blocking mode
+ here would cause problems - go ahead and do it. */
+
+ non_blocking = 1;
+ SET_NONBLOCKING(opts->ofd);
+
+ } else { /* output fd is a tty, input fd isn't */
+
+ /* This is a "problem case", but also common (see the
+ example above) - i.e. it makes sense to try a bit
+ harder before giving up on non-blocking mode: Try to
+ re-open the tty that the output fd points to, and if
+ successful replace the original one with the "new" fd
+ obtained this way, and set *that* one in non-blocking
+ mode. (Yes, this is a kludge.)
+
+ However, re-opening the tty may fail in a couple of
+ (unusual) cases:
+
+ 1) The name of the tty (or an equivalent one, i.e.
+ same major/minor number) can't be found, because
+ it actually lives somewhere other than /dev (or
+ wherever ttyname() looks for it), and isn't
+ equivalent to any of those that do live in the
+ "standard" place - this should be *very* unusual.
+
+ 2) Permissions on the tty don't allow us to open it -
+ it's perfectly possible to have an fd open to an
+ object whose permissions wouldn't allow us to open
+ it. This is not as unusual as it sounds, one case
+ is if the user has su'ed to someone else (not
+ root) - we have a read/write fd open to the tty
+ (because it has been inherited all the way down
+ here), but we have neither read nor write
+ permission for the tty.
+
+ In these cases, we finally give up, and don't set the
+ output fd in non-blocking mode. */
+
+ char *tty;
+ int nfd;
+
+ if ((tty = ttyname(opts->ofd)) != NULL &&
+ (nfd = open(tty, O_WRONLY)) != -1) {
+ dup2(nfd, opts->ofd);
+ close(nfd);
+ non_blocking = 1;
+ SET_NONBLOCKING(opts->ofd);
+ }
+ }
+ }
+ }
+ }
+ return (ErlDrvData)create_driver_data(port_num, opts->ifd, opts->ofd,
+ opts->packet_bytes,
+ opts->read_write, 0, -1,
+ !non_blocking);
+}
+
+static void clear_fd_data(ErtsSysFdData *fdd)
+{
+ if (fdd->sz > 0) {
+ erts_free(ERTS_ALC_T_FD_ENTRY_BUF, (void *) fdd->buf);
+ ASSERT(erts_smp_atomic_read_nob(&sys_misc_mem_sz) >= fdd->sz);
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, -1*fdd->sz);
+ }
+ fdd->buf = NULL;
+ fdd->sz = 0;
+ fdd->remain = 0;
+ fdd->cpos = NULL;
+ fdd->psz = 0;
+}
+
+static void nbio_stop_fd(ErlDrvPort prt, ErtsSysFdData *fdd)
+{
+ driver_select(prt, abs(fdd->fd), DO_READ|DO_WRITE, 0);
+ clear_fd_data(fdd);
+ SET_BLOCKING(abs(fdd->fd));
+
+}
+
+static void fd_stop(ErlDrvData ev) /* Does not close the fds */
+{
+ ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
+ ErlDrvPort prt = dd->port_num;
+ int sz = sizeof(ErtsSysDriverData);
+
+#if FDBLOCK
+ if (dd->blocking) {
+ erts_free(ERTS_ALC_T_SYS_BLOCKING, dd->blocking);
+ dd->blocking = NULL;
+ sz += sizeof(ErtsSysBlocking);
+ }
+#endif
+
+ if (dd->ifd) {
+ sz += sizeof(ErtsSysFdData);
+ nbio_stop_fd(prt, dd->ifd);
+ }
+ if (dd->ofd && dd->ofd != dd->ifd) {
+ sz += sizeof(ErtsSysFdData);
+ nbio_stop_fd(prt, dd->ofd);
+ }
+
+ erts_free(ERTS_ALC_T_DRV_TAB, dd);
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sz);
+}
+
+static void fd_flush(ErlDrvData ev)
+{
+ ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
+ if (!dd->terminating)
+ dd->terminating = 1;
+}
+
+static ErlDrvData vanilla_start(ErlDrvPort port_num, char* name,
+ SysDriverOpts* opts)
+{
+ int flags, fd;
+ ErlDrvData res;
+
+ flags = (opts->read_write == DO_READ ? O_RDONLY :
+ opts->read_write == DO_WRITE ? O_WRONLY|O_CREAT|O_TRUNC :
+ O_RDWR|O_CREAT);
+ if ((fd = open(name, flags, 0666)) < 0)
+ return ERL_DRV_ERROR_GENERAL;
+ if (fd >= sys_max_files()) {
+ close(fd);
+ return ERL_DRV_ERROR_GENERAL;
+ }
+ SET_NONBLOCKING(fd);
+
+ res = (ErlDrvData)(long)create_driver_data(port_num, fd, fd,
+ opts->packet_bytes,
+ opts->read_write, 0, -1, 0);
+ return res;
+}
+
+/* Note that driver_data[fd].ifd == fd if the port was opened for reading, */
+/* otherwise (i.e. write only) driver_data[fd].ofd = fd. */
+
+static void stop(ErlDrvData ev)
+{
+ ErtsSysDriverData* dd = (ErtsSysDriverData*)ev;
+ ErlDrvPort prt = dd->port_num;
+
+ if (dd->ifd) {
+ nbio_stop_fd(prt, dd->ifd);
+ driver_select(prt, abs(dd->ifd->fd), ERL_DRV_USE, 0); /* close(ifd); */
+ }
+
+ if (dd->ofd && dd->ofd != dd->ifd) {
+ nbio_stop_fd(prt, dd->ofd);
+ driver_select(prt, abs(dd->ofd->fd), ERL_DRV_USE, 0); /* close(ofd); */
+ }
+
+ erts_free(ERTS_ALC_T_DRV_TAB, dd);
+}
+
+/* used by fd_driver */
+static void outputv(ErlDrvData e, ErlIOVec* ev)
+{
+ ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
+ ErlDrvPort ix = dd->port_num;
+ int pb = dd->packet_bytes;
+ int ofd = dd->ofd ? dd->ofd->fd : -1;
+ ssize_t n;
+ ErlDrvSizeT sz;
+ char lb[4];
+ char* lbp;
+ ErlDrvSizeT len = ev->size;
+
+ /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
+ /* if (pb >= 0 && (len & (((ErlDrvSizeT)1 << (pb*8))) - 1) != len) {*/
+ if (((pb == 2) && (len > 0xffff)) || (pb == 1 && len > 0xff)) {
+ driver_failure_posix(ix, EINVAL);
+ return; /* -1; */
+ }
+ /* Handles 0 <= pb <= 4 only */
+ put_int32((Uint32) len, lb);
+ lbp = lb + (4-pb);
+
+ ev->iov[0].iov_base = lbp;
+ ev->iov[0].iov_len = pb;
+ ev->size += pb;
+
+ if (dd->blocking && FDBLOCK)
+ driver_pdl_lock(dd->blocking->pdl);
+
+ if ((sz = driver_sizeq(ix)) > 0) {
+ driver_enqv(ix, ev, 0);
+
+ if (dd->blocking && FDBLOCK)
+ driver_pdl_unlock(dd->blocking->pdl);
+
+ if (sz + ev->size >= (1 << 13))
+ set_busy_port(ix, 1);
+ }
+ else if (!dd->blocking || !FDBLOCK) {
+ /* We try to write directly if the fd in non-blocking */
+ int vsize = ev->vsize > MAX_VSIZE ? MAX_VSIZE : ev->vsize;
+
+ n = writev(ofd, (const void *) (ev->iov), vsize);
+ if (n == ev->size)
+ return; /* 0;*/
+ if (n < 0) {
+ if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
+ driver_failure_posix(ix, errno);
+ return; /* -1;*/
+ }
+ n = 0;
+ }
+ driver_enqv(ix, ev, n); /* n is the skip value */
+ driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
+ }
+#if FDBLOCK
+ else {
+ if (ev->size != 0) {
+ driver_enqv(ix, ev, 0);
+ driver_pdl_unlock(dd->blocking->pdl);
+ driver_async(ix, &dd->blocking->pkey,
+ fd_async, dd, NULL);
+ } else {
+ driver_pdl_unlock(dd->blocking->pdl);
+ }
+ }
+#endif
+ /* return 0;*/
+}
+
+/* Used by spawn_driver and vanilla driver */
+static void output(ErlDrvData e, char* buf, ErlDrvSizeT len)
+{
+ ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
+ ErlDrvPort ix = dd->port_num;
+ int pb = dd->packet_bytes;
+ int ofd = dd->ofd ? dd->ofd->fd : -1;
+ ssize_t n;
+ ErlDrvSizeT sz;
+ char lb[4];
+ char* lbp;
+ struct iovec iv[2];
+
+ /* (len > ((unsigned long)-1 >> (4-pb)*8)) */
+ if (((pb == 2) && (len > 0xffff))
+ || (pb == 1 && len > 0xff)
+ || dd->pid == 0 /* Attempt at output before port is ready */) {
+ driver_failure_posix(ix, EINVAL);
+ return; /* -1; */
+ }
+ put_int32(len, lb);
+ lbp = lb + (4-pb);
+
+ if ((sz = driver_sizeq(ix)) > 0) {
+ driver_enq(ix, lbp, pb);
+ driver_enq(ix, buf, len);
+ if (sz + len + pb >= (1 << 13))
+ set_busy_port(ix, 1);
+ }
+ else {
+ iv[0].iov_base = lbp;
+ iv[0].iov_len = pb; /* should work for pb=0 */
+ iv[1].iov_base = buf;
+ iv[1].iov_len = len;
+ n = writev(ofd, iv, 2);
+ if (n == pb+len)
+ return; /* 0; */
+ if (n < 0) {
+ if ((errno != EINTR) && (errno != ERRNO_BLOCK)) {
+ driver_failure_posix(ix, errno);
+ return; /* -1; */
+ }
+ n = 0;
+ }
+ if (n < pb) {
+ driver_enq(ix, lbp+n, pb-n);
+ driver_enq(ix, buf, len);
+ }
+ else {
+ n -= pb;
+ driver_enq(ix, buf+n, len-n);
+ }
+ driver_select(ix, ofd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
+ }
+ return; /* 0; */
+}
+
+static int port_inp_failure(ErtsSysDriverData *dd, int res)
+ /* Result: 0 (eof) or -1 (error) */
+{
+ int err = errno;
+
+ ASSERT(res <= 0);
+ if (dd->ifd) {
+ driver_select(dd->port_num, dd->ifd->fd, ERL_DRV_READ|ERL_DRV_WRITE, 0);
+ clear_fd_data(dd->ifd);
+ }
+
+ if (dd->blocking && FDBLOCK) {
+ driver_pdl_lock(dd->blocking->pdl);
+ if (driver_sizeq(dd->port_num) > 0) {
+ driver_pdl_unlock(dd->blocking->pdl);
+ /* We have stuff in the output queue, so we just
+ set the state to terminating and wait for fd_async_ready
+ to terminate the port */
+ if (res == 0)
+ dd->terminating = 2;
+ else
+ dd->terminating = -err;
+ return 0;
+ }
+ driver_pdl_unlock(dd->blocking->pdl);
+ }
+
+ if (res == 0) {
+ if (dd->alive == 1) {
+ /*
+ * We have eof and want to report exit status, but the process
+ * hasn't exited yet. When it does ready_input will
+ * driver_select() this fd which will make sure that we get
+ * back here with dd->alive == -1 and dd->status set.
+ */
+ return 0;
+ }
+ else if (dd->alive == -1) {
+ int status = dd->status;
+
+ /* We need not be prepared for stopped/continued processes. */
+ if (WIFSIGNALED(status))
+ status = 128 + WTERMSIG(status);
+ else
+ status = WEXITSTATUS(status);
+ driver_report_exit(dd->port_num, status);
+ }
+ driver_failure_eof(dd->port_num);
+ } else if (dd->ifd) {
+ erl_drv_init_ack(dd->port_num, ERL_DRV_ERROR_ERRNO);
+ } else {
+ driver_failure_posix(dd->port_num, err);
+ }
+ return 0;
+}
+
+/* fd is the drv_data that is returned from the */
+/* initial start routine */
+/* ready_fd is the descriptor that is ready to read */
+
+static void ready_input(ErlDrvData e, ErlDrvEvent ready_fd)
+{
+ ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
+ ErlDrvPort port_num;
+ int packet_bytes;
+ int res;
+ Uint h;
+
+ port_num = dd->port_num;
+ packet_bytes = dd->packet_bytes;
+
+ ASSERT(abs(dd->ifd->fd) == ready_fd);
+
+ if (dd->pid == 0) {
+ /* the pid is sent from erl_child_setup. spawn driver only. */
+ ErtsSysForkerProto proto;
+ int res;
+
+ if((res = read(ready_fd, &proto, sizeof(proto))) <= 0) {
+ /* hmm, child setup seems to have closed the pipe too early...
+ we close the port as there is not much else we can do */
+ if (res < 0 && errno == ERRNO_BLOCK)
+ return;
+ driver_select(port_num, ready_fd, ERL_DRV_READ, 0);
+ if (res == 0)
+ errno = EPIPE;
+ port_inp_failure(dd, -1);
+ return;
+ }
+
+ ASSERT(proto.action == ErtsSysForkerProtoAction_Go);
+ dd->pid = proto.u.go.os_pid;
+
+ if (dd->pid == -1) {
+ /* Setup failed! The only reason why this should happen is if
+ the fork fails. */
+ errno = proto.u.go.error_number;
+ port_inp_failure(dd, -1);
+ return;
+ }
+
+ proto.action = ErtsSysForkerProtoAction_Ack;
+
+ if (driver_sizeq(port_num) > 0) {
+ driver_enq(port_num, (char*)&proto, sizeof(proto));
+ } else {
+ if (write(abs(dd->ofd->fd), &proto, sizeof(proto)) < 0)
+ if (errno == ERRNO_BLOCK || errno == EINTR)
+ driver_enq(port_num, (char*)&proto, sizeof(proto));
+ /* do nothing on failure here. If the ofd is broken, then
+ the ifd will probably also be broken and trigger
+ a port_inp_failure */
+ }
+
+ if (dd->ifd->fd < 0) {
+ driver_select(port_num, abs(dd->ifd->fd), ERL_DRV_READ|ERL_DRV_USE, 0);
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
+ dd->ifd = NULL;
+ }
+
+ if (dd->ofd->fd < 0 || driver_sizeq(port_num) > 0)
+ /* we select in order to close fd or write to queue,
+ child setup will close this fd if fd < 0 */
+ driver_select(port_num, abs(dd->ofd->fd), ERL_DRV_WRITE|ERL_DRV_USE, 1);
+
+ erl_drv_set_os_pid(port_num, dd->pid);
+ erl_drv_init_ack(port_num, e);
+ return;
+ }
+
+ if (packet_bytes == 0) {
+ byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
+ ERTS_SYS_READ_BUF_SZ);
+ res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
+ if (res < 0) {
+ if ((errno != EINTR) && (errno != ERRNO_BLOCK))
+ port_inp_failure(dd, res);
+ }
+ else if (res == 0)
+ port_inp_failure(dd, res);
+ else
+ driver_output(port_num, (char*) read_buf, res);
+ erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
+ }
+ else if (dd->ifd->remain > 0) { /* We try to read the remainder */
+ /* space is allocated in buf */
+ res = read(ready_fd, dd->ifd->cpos,
+ dd->ifd->remain);
+ if (res < 0) {
+ if ((errno != EINTR) && (errno != ERRNO_BLOCK))
+ port_inp_failure(dd, res);
+ }
+ else if (res == 0) {
+ port_inp_failure(dd, res);
+ }
+ else if (res == dd->ifd->remain) { /* we're done */
+ driver_output(port_num, dd->ifd->buf,
+ dd->ifd->sz);
+ clear_fd_data(dd->ifd);
+ }
+ else { /* if (res < dd->ifd->remain) */
+ dd->ifd->cpos += res;
+ dd->ifd->remain -= res;
+ }
+ }
+ else if (dd->ifd->remain == 0) { /* clean fd */
+ byte *read_buf = (byte *) erts_alloc(ERTS_ALC_T_SYS_READ_BUF,
+ ERTS_SYS_READ_BUF_SZ);
+ /* We make one read attempt and see what happens */
+ res = read(ready_fd, read_buf, ERTS_SYS_READ_BUF_SZ);
+ if (res < 0) {
+ if ((errno != EINTR) && (errno != ERRNO_BLOCK))
+ port_inp_failure(dd, res);
+ }
+ else if (res == 0) { /* eof */
+ port_inp_failure(dd, res);
+ }
+ else if (res < packet_bytes - dd->ifd->psz) {
+ memcpy(dd->ifd->pbuf+dd->ifd->psz,
+ read_buf, res);
+ dd->ifd->psz += res;
+ }
+ else { /* if (res >= packet_bytes) */
+ unsigned char* cpos = read_buf;
+ int bytes_left = res;
+
+ while (1) {
+ int psz = dd->ifd->psz;
+ char* pbp = dd->ifd->pbuf + psz;
+
+ while(bytes_left && (psz < packet_bytes)) {
+ *pbp++ = *cpos++;
+ bytes_left--;
+ psz++;
+ }
+
+ if (psz < packet_bytes) {
+ dd->ifd->psz = psz;
+ break;
+ }
+ dd->ifd->psz = 0;
+
+ switch (packet_bytes) {
+ case 1: h = get_int8(dd->ifd->pbuf); break;
+ case 2: h = get_int16(dd->ifd->pbuf); break;
+ case 4: h = get_int32(dd->ifd->pbuf); break;
+ default: ASSERT(0); return; /* -1; */
+ }
+
+ if (h <= (bytes_left)) {
+ driver_output(port_num, (char*) cpos, h);
+ cpos += h;
+ bytes_left -= h;
+ continue;
+ }
+ else { /* The last message we got was split */
+ char *buf = erts_alloc_fnf(ERTS_ALC_T_FD_ENTRY_BUF, h);
+ if (!buf) {
+ errno = ENOMEM;
+ port_inp_failure(dd, -1);
+ }
+ else {
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, h);
+ sys_memcpy(buf, cpos, bytes_left);
+ dd->ifd->buf = buf;
+ dd->ifd->sz = h;
+ dd->ifd->remain = h - bytes_left;
+ dd->ifd->cpos = buf + bytes_left;
+ }
+ break;
+ }
+ }
+ }
+ erts_free(ERTS_ALC_T_SYS_READ_BUF, (void *) read_buf);
+ }
+}
+
+
+/* fd is the drv_data that is returned from the */
+/* initial start routine */
+/* ready_fd is the descriptor that is ready to read */
+
+static void ready_output(ErlDrvData e, ErlDrvEvent ready_fd)
+{
+ ErtsSysDriverData *dd = (ErtsSysDriverData*)e;
+ ErlDrvPort ix = dd->port_num;
+ int n;
+ struct iovec* iv;
+ int vsize;
+
+ if ((iv = (struct iovec*) driver_peekq(ix, &vsize)) == NULL) {
+ driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
+ if (dd->pid > 0 && dd->ofd->fd < 0) {
+ /* The port was opened with 'in' option, which means we
+ should close the output fd as soon as the command has
+ been sent. */
+ driver_select(ix, ready_fd, ERL_DRV_WRITE|ERL_DRV_USE, 0);
+ erts_smp_atomic_add_nob(&sys_misc_mem_sz, -sizeof(ErtsSysFdData));
+ dd->ofd = NULL;
+ }
+ if (dd->terminating)
+ driver_failure_atom(dd->port_num,"normal");
+ return; /* 0; */
+ }
+ vsize = vsize > MAX_VSIZE ? MAX_VSIZE : vsize;
+ if ((n = writev(ready_fd, iv, vsize)) > 0) {
+ if (driver_deq(ix, n) == 0)
+ set_busy_port(ix, 0);
+ }
+ else if (n < 0) {
+ if (errno == ERRNO_BLOCK || errno == EINTR)
+ return; /* 0; */
+ else {
+ int res = errno;
+ driver_select(ix, ready_fd, ERL_DRV_WRITE, 0);
+ driver_failure_posix(ix, res);
+ return; /* -1; */
+ }
+ }
+ return; /* 0; */
+}
+
+static void stop_select(ErlDrvEvent fd, void* _)
+{
+ close((int)fd);
+}
+
+#if FDBLOCK
+
+static void
+fd_async(void *async_data)
+{
+ int res;
+ ErtsSysDriverData *dd = (ErtsSysDriverData *)async_data;
+ SysIOVec *iov0;
+ SysIOVec *iov;
+ int iovlen;
+ int err = 0;
+ /* much of this code is stolen from efile_drv:invoke_writev */
+ driver_pdl_lock(dd->blocking->pdl);
+ iov0 = driver_peekq(dd->port_num, &iovlen);
+ iovlen = iovlen < MAXIOV ? iovlen : MAXIOV;
+ iov = erts_alloc_fnf(ERTS_ALC_T_SYS_WRITE_BUF,
+ sizeof(SysIOVec)*iovlen);
+ if (!iov) {
+ res = -1;
+ err = ENOMEM;
+ driver_pdl_unlock(dd->blocking->pdl);
+ } else {
+ memcpy(iov,iov0,iovlen*sizeof(SysIOVec));
+ driver_pdl_unlock(dd->blocking->pdl);
+
+ do {
+ res = writev(dd->ofd->fd, iov, iovlen);
+ } while (res < 0 && errno == EINTR);
+ if (res < 0)
+ err = errno;
+ err = errno;
+
+ erts_free(ERTS_ALC_T_SYS_WRITE_BUF, iov);
+ }
+ dd->blocking->res = res;
+ dd->blocking->err = err;
+}
+
+void fd_ready_async(ErlDrvData drv_data,
+ ErlDrvThreadData thread_data) {
+ ErtsSysDriverData *dd = (ErtsSysDriverData *)thread_data;
+ ErlDrvPort port_num = dd->port_num;
+
+ ASSERT(dd->blocking);
+
+ if (dd->blocking->res > 0) {
+ driver_pdl_lock(dd->blocking->pdl);
+ if (driver_deq(port_num, dd->blocking->res) == 0) {
+ driver_pdl_unlock(dd->blocking->pdl);
+ set_busy_port(port_num, 0);
+ if (dd->terminating) {
+ /* The port is has been ordered to terminate
+ from either fd_flush or port_inp_failure */
+ if (dd->terminating == 1)
+ driver_failure_atom(port_num, "normal");
+ else if (dd->terminating == 2)
+ driver_failure_eof(port_num);
+ else if (dd->terminating < 0)
+ driver_failure_posix(port_num, -dd->terminating);
+ return; /* -1; */
+ }
+ } else {
+ driver_pdl_unlock(dd->blocking->pdl);
+ /* still data left to write in queue */
+ driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
+ return /* 0; */;
+ }
+ } else if (dd->blocking->res < 0) {
+ if (dd->blocking->err == ERRNO_BLOCK) {
+ set_busy_port(port_num, 1);
+ /* still data left to write in queue */
+ driver_async(port_num, &dd->blocking->pkey, fd_async, dd, NULL);
+ } else
+ driver_failure_posix(port_num, dd->blocking->err);
+ return; /* -1; */
+ }
+ return; /* 0; */
+}
+
+#endif
+
+/* Forker driver */
+
+static int forker_fd;
+
+static ErlDrvData forker_start(ErlDrvPort port_num, char* name,
+ SysDriverOpts* opts)
+{
+
+ int i;
+ int fds[2];
+ int res, unbind;
+ char bindir[MAXPATHLEN];
+ size_t bindirsz = sizeof(bindir);
+ Uint csp_path_sz;
+ char *child_setup_prog;
+
+ forker_port = erts_drvport2id(port_num);
+
+ res = erts_sys_getenv_raw("BINDIR", bindir, &bindirsz);
+ if (res != 0) {
+ if (res < 0)
+ erts_exit(1,
+ "Environment variable BINDIR is not set\n");
+ if (res > 0)
+ erts_exit(1,
+ "Value of environment variable BINDIR is too large\n");
+ }
+ if (bindir[0] != DIR_SEPARATOR_CHAR)
+ erts_exit(1,
+ "Environment variable BINDIR does not contain an"
+ " absolute path\n");
+ csp_path_sz = (strlen(bindir)
+ + 1 /* DIR_SEPARATOR_CHAR */
+ + sizeof(CHILD_SETUP_PROG_NAME)
+ + 1);
+ child_setup_prog = erts_alloc(ERTS_ALC_T_CS_PROG_PATH, csp_path_sz);
+ erts_snprintf(child_setup_prog, csp_path_sz,
+ "%s%c%s",
+ bindir,
+ DIR_SEPARATOR_CHAR,
+ CHILD_SETUP_PROG_NAME);
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fds) < 0) {
+ erts_exit(ERTS_ABORT_EXIT,
+ "Could not open unix domain socket in spawn_init: %d\n",
+ errno);
+ }
+
+ forker_fd = fds[0];
+
+ unbind = erts_sched_bind_atfork_prepare();
+
+ i = fork();
+
+ if (i == 0) {
+ /* The child */
+ char *cs_argv[FORKER_ARGV_NO_OF_ARGS] =
+ {CHILD_SETUP_PROG_NAME, NULL, NULL};
+ char buff[128];
+
+ erts_sched_bind_atfork_child(unbind);
+
+ snprintf(buff, 128, "%d", sys_max_files());
+ cs_argv[FORKER_ARGV_MAX_FILES] = buff;
+
+ /* We preallocate fd 3 for the uds fd */
+ if (fds[1] != 3) {
+ dup2(fds[1], 3);
+ }
+
+#if defined(USE_SETPGRP_NOARGS) /* SysV */
+ (void) setpgrp();
+#elif defined(USE_SETPGRP) /* BSD */
+ (void) setpgrp(0, getpid());
+#else /* POSIX */
+ (void) setsid();
+#endif
+
+ execv(child_setup_prog, cs_argv);
+ _exit(1);
+ }
+
+ erts_sched_bind_atfork_parent(unbind);
+
+ erts_free(ERTS_ALC_T_CS_PROG_PATH, child_setup_prog);
+
+ close(fds[1]);
+
+ SET_NONBLOCKING(forker_fd);
+
+ driver_select(port_num, forker_fd, ERL_DRV_READ|ERL_DRV_USE, 1);
+
+ return (ErlDrvData)port_num;
+}
+
+static void forker_stop(ErlDrvData e)
+{
+ /* we probably should do something here,
+ the port has been closed by the user. */
+}
+
+static void forker_ready_input(ErlDrvData e, ErlDrvEvent fd)
+{
+ int res;
+ ErtsSysForkerProto *proto;
+
+ proto = erts_alloc(ERTS_ALC_T_DRV_CTRL_DATA, sizeof(*proto));
+
+ if ((res = read(fd, proto, sizeof(*proto))) < 0) {
+ if (errno == ERRNO_BLOCK)
+ return;
+ erts_exit(ERTS_DUMP_EXIT, "Failed to read from erl_child_setup: %d\n", errno);
+ }
+
+ if (res == 0)
+ erts_exit(ERTS_DUMP_EXIT, "erl_child_setup closed\n");
+
+ ASSERT(res == sizeof(*proto));
+
+#ifdef FORKER_PROTO_START_ACK
+ if (proto->action == ErtsSysForkerProtoAction_StartAck) {
+ /* Ideally we would like to not have to ack each Start
+ command being sent over the uds, but it would seem
+ that some operating systems (only observed on FreeBSD)
+ throw away data on the uds when the socket becomes full,
+ so we have to.
+ */
+ ErlDrvPort port_num = (ErlDrvPort)e;
+ int vlen;
+ SysIOVec *iov = driver_peekq(port_num, &vlen);
+ ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base;
+
+ close(proto->u.start.fds[0]);
+ close(proto->u.start.fds[1]);
+ if (proto->u.start.fds[1] != proto->u.start.fds[2])
+ close(proto->u.start.fds[2]);
+
+ driver_deq(port_num, sizeof(*proto));
+
+ if (driver_sizeq(port_num) > 0)
+ driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
+ } else
+#endif
+ {
+ ASSERT(proto->action == ErtsSysForkerProtoAction_SigChld);
+
+ /* ideally this would be a port_command call, but as command is
+ already used by the spawn_driver, we use control instead.
+ Note that when using erl_drv_port_control it is an asynchronous
+ control. */
+ erl_drv_port_control(proto->u.sigchld.port_id, 'S',
+ (char*)proto, sizeof(*proto));
+ }
+
+}
+
+static void forker_ready_output(ErlDrvData e, ErlDrvEvent fd)
+{
+ ErlDrvPort port_num = (ErlDrvPort)e;
+
+#ifndef FORKER_PROTO_START_ACK
+ while (driver_sizeq(port_num) > 0) {
+#endif
+ int vlen;
+ SysIOVec *iov = driver_peekq(port_num, &vlen);
+ ErtsSysForkerProto *proto = (ErtsSysForkerProto *)iov[0].iov_base;
+ ASSERT(iov[0].iov_len >= (sizeof(*proto)));
+ if (sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
+ proto->u.start.fds, 3, 0) < 0) {
+ if (errno == ERRNO_BLOCK)
+ return;
+ erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
+ }
+#ifndef FORKER_PROTO_START_ACK
+ close(proto->u.start.fds[0]);
+ close(proto->u.start.fds[1]);
+ if (proto->u.start.fds[1] != proto->u.start.fds[2])
+ close(proto->u.start.fds[2]);
+ driver_deq(port_num, sizeof(*proto));
+ }
+#endif
+
+ driver_select(port_num, forker_fd, ERL_DRV_WRITE, 0);
+}
+
+static ErlDrvSSizeT forker_control(ErlDrvData e, unsigned int cmd, char *buf,
+ ErlDrvSizeT len, char **rbuf, ErlDrvSizeT rlen)
+{
+ ErtsSysForkerProto *proto = (ErtsSysForkerProto *)buf;
+ ErlDrvPort port_num = (ErlDrvPort)e;
+ int res;
+
+ driver_enq(port_num, buf, len);
+ if (driver_sizeq(port_num) > sizeof(*proto)) {
+ return 0;
+ }
+
+ if ((res = sys_uds_write(forker_fd, (char*)proto, sizeof(*proto),
+ proto->u.start.fds, 3, 0)) < 0) {
+ if (errno == ERRNO_BLOCK) {
+ driver_select(port_num, forker_fd, ERL_DRV_WRITE|ERL_DRV_USE, 1);
+ return 0;
+ }
+ erts_exit(ERTS_DUMP_EXIT, "Failed to write to erl_child_setup: %d\n", errno);
+ }
+
+#ifndef FORKER_PROTO_START_ACK
+ ASSERT(res == sizeof(*proto));
+ close(proto->u.start.fds[0]);
+ close(proto->u.start.fds[1]);
+ if (proto->u.start.fds[1] != proto->u.start.fds[2])
+ close(proto->u.start.fds[2]);
+ driver_deq(port_num, sizeof(*proto));
+#endif
+
+ return 0;
+}
diff --git a/erts/emulator/sys/unix/sys_float.c b/erts/emulator/sys/unix/sys_float.c
index 689be98969..6435da086f 100644
--- a/erts/emulator/sys/unix/sys_float.c
+++ b/erts/emulator/sys/unix/sys_float.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2001-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2001-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -32,7 +33,7 @@ void
erts_sys_init_float(void)
{
# ifdef SIGFPE
- sys_sigset(SIGFPE, SIG_IGN); /* Ignore so we can test for NaN and Inf */
+ sys_signal(SIGFPE, SIG_IGN); /* Ignore so we can test for NaN and Inf */
# endif
}
@@ -46,13 +47,13 @@ static void erts_init_fp_exception(void)
{
/* XXX: the wrappers prevent using a pthread destructor to
deallocate the key's value; so when/where do we do that? */
- erts_tsd_key_create(&fpe_key);
+ erts_tsd_key_create(&fpe_key,"fp_exception");
}
void erts_thread_init_fp_exception(void)
{
unsigned long *fpe = erts_alloc(ERTS_ALC_T_FP_EXCEPTION, sizeof(*fpe));
- *fpe = 0L;
+ *fpe = 0;
erts_tsd_set(fpe_key, fpe);
}
@@ -85,11 +86,11 @@ static void set_current_fp_exception(unsigned long pc)
void erts_fp_check_init_error(volatile unsigned long *fpexnp)
{
- char buf[64];
+ char buf[128];
snprintf(buf, sizeof buf, "ERTS_FP_CHECK_INIT at %p: detected unhandled FPE at %p\r\n",
__builtin_return_address(0), (void*)*fpexnp);
if (write(2, buf, strlen(buf)) <= 0)
- erl_exit(ERTS_ABORT_EXIT, "%s", buf);
+ erts_exit(ERTS_ABORT_EXIT, "%s", buf);
*fpexnp = 0;
#if defined(__i386__) || defined(__x86_64__)
erts_restore_fpu();
@@ -101,6 +102,17 @@ void erts_fp_check_init_error(volatile unsigned long *fpexnp)
#define __DARWIN__ 1
#endif
+/*
+ * Define two processor and possibly OS-specific primitives:
+ *
+ * static void unmask_fpe(void);
+ * -- unmask invalid, overflow, and divide-by-zero exceptions
+ *
+ * static int mask_fpe(void);
+ * -- mask invalid, overflow, and divide-by-zero exceptions
+ * -- return non-zero if the previous state was unmasked
+ */
+
#if (defined(__i386__) || defined(__x86_64__)) && defined(__GNUC__)
static void unmask_x87(void)
@@ -112,7 +124,6 @@ static void unmask_x87(void)
__asm__ __volatile__("fldcw %0" : : "m"(cw));
}
-/* mask x87 FPE, return true if the previous state was unmasked */
static int mask_x87(void)
{
unsigned short cw;
@@ -135,7 +146,6 @@ static void unmask_sse2(void)
__asm__ __volatile__("ldmxcsr %0" : : "m"(mxcsr));
}
-/* mask SSE2 FPE, return true if the previous state was unmasked */
static int mask_sse2(void)
{
unsigned int mxcsr;
@@ -256,21 +266,19 @@ static int cpu_has_sse2(void)
}
#endif /* !__x86_64__ */
-static void unmask_fpe(void)
+static void unmask_fpe_internal(void)
{
- __asm__ __volatile__("fnclex");
unmask_x87();
if (cpu_has_sse2())
unmask_sse2();
}
-static void unmask_fpe_conditional(int unmasked)
+static void unmask_fpe(void)
{
- if (unmasked)
- unmask_fpe();
+ __asm__ __volatile__("fnclex");
+ unmask_fpe_internal();
}
-/* mask x86 FPE, return true if the previous state was unmasked */
static int mask_fpe(void)
{
int unmasked;
@@ -284,9 +292,7 @@ static int mask_fpe(void)
void erts_restore_fpu(void)
{
__asm__ __volatile__("fninit");
- unmask_x87();
- if (cpu_has_sse2())
- unmask_sse2();
+ unmask_fpe_internal();
}
#elif defined(__sparc__) && defined(__linux__)
@@ -309,13 +315,6 @@ static void unmask_fpe(void)
__asm__ __volatile__(LDX " %0, %%fsr" : : "m"(fsr));
}
-static void unmask_fpe_conditional(int unmasked)
-{
- if (unmasked)
- unmask_fpe();
-}
-
-/* mask SPARC FPE, return true if the previous state was unmasked */
static int mask_fpe(void)
{
unsigned long fsr;
@@ -430,13 +429,6 @@ static void unmask_fpe(void)
set_fpscr(0x80|0x40|0x10); /* VE, OE, ZE; not UE or XE */
}
-static void unmask_fpe_conditional(int unmasked)
-{
- if (unmasked)
- unmask_fpe();
-}
-
-/* mask PowerPC FPE, return true if the previous state was unmasked */
static int mask_fpe(void)
{
int unmasked;
@@ -446,20 +438,13 @@ static int mask_fpe(void)
return unmasked;
}
-#else
+#else /* !(x86 || (sparc && linux) || (powerpc && (linux || darwin))) */
static void unmask_fpe(void)
{
fpsetmask(FP_X_INV | FP_X_OFL | FP_X_DZ);
}
-static void unmask_fpe_conditional(int unmasked)
-{
- if (unmasked)
- unmask_fpe();
-}
-
-/* mask IEEE FPE, return true if previous state was unmasked */
static int mask_fpe(void)
{
const fp_except unmasked_mask = FP_X_INV | FP_X_OFL | FP_X_DZ;
@@ -471,6 +456,16 @@ static int mask_fpe(void)
#endif
+/*
+ * Define a processor and OS-specific SIGFPE handler.
+ *
+ * The effect of receiving a SIGFPE should be:
+ * 1. Update the processor context:
+ * a) on x86: mask FP exceptions, do not skip faulting instruction
+ * b) on SPARC and PowerPC: unmask FP exceptions, skip faulting instruction
+ * 2. call set_current_fp_exception with the PC of the faulting instruction
+ */
+
#if (defined(__linux__) && (defined(__i386__) || defined(__x86_64__) || defined(__sparc__) || defined(__powerpc__))) || (defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__) || defined(__ppc__))) || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__))) || ((defined(__NetBSD__) || defined(__OpenBSD__)) && defined(__x86_64__)) || (defined(__sun__) && defined(__x86_64__))
#if defined(__linux__) && defined(__i386__)
@@ -498,18 +493,8 @@ static int mask_fpe(void)
#define mc_pc(mc) ((mc)->gregs[REG_RIP])
#elif defined(__linux__) && defined(__i386__)
#define mc_pc(mc) ((mc)->gregs[REG_EIP])
-#elif defined(__DARWIN__) && defined(__i386__)
-#ifdef DARWIN_MODERN_MCONTEXT
-#define mc_pc(mc) ((mc)->__ss.__eip)
-#else
-#define mc_pc(mc) ((mc)->ss.eip)
-#endif
-#elif defined(__DARWIN__) && defined(__x86_64__)
-#ifdef DARWIN_MODERN_MCONTEXT
-#define mc_pc(mc) ((mc)->__ss.__rip)
-#else
-#define mc_pc(mc) ((mc)->ss.rip)
-#endif
+#elif defined(__DARWIN__)
+# error "Floating-point exceptions not supported on MacOS X"
#elif defined(__FreeBSD__) && defined(__x86_64__)
#define mc_pc(mc) ((mc)->mc_rip)
#elif defined(__FreeBSD__) && defined(__i386__)
@@ -529,8 +514,7 @@ static void fpe_sig_action(int sig, siginfo_t *si, void *puc)
ucontext_t *uc = puc;
unsigned long pc;
-#if defined(__linux__)
-#if defined(__x86_64__)
+#if defined(__linux__) && defined(__x86_64__)
mcontext_t *mc = &uc->uc_mcontext;
fpregset_t fpstate = mc->fpregs;
pc = mc_pc(mc);
@@ -542,26 +526,26 @@ static void fpe_sig_action(int sig, siginfo_t *si, void *puc)
set encoding makes that a poor solution here. */
fpstate->mxcsr = 0x1F80;
fpstate->swd &= ~0xFF;
-#elif defined(__i386__)
+#elif defined(__linux__) && defined(__i386__)
mcontext_t *mc = &uc->uc_mcontext;
fpregset_t fpstate = mc->fpregs;
pc = mc_pc(mc);
if ((fpstate->status >> 16) == X86_FXSR_MAGIC)
((struct _fpstate*)fpstate)->mxcsr = 0x1F80;
fpstate->sw &= ~0xFF;
-#elif defined(__sparc__) && defined(__arch64__)
+#elif defined(__linux__) && defined(__sparc__) && defined(__arch64__)
/* on SPARC the 3rd parameter points to a sigcontext not a ucontext */
struct sigcontext *sc = (struct sigcontext*)puc;
pc = sc->sigc_regs.tpc;
sc->sigc_regs.tpc = sc->sigc_regs.tnpc;
sc->sigc_regs.tnpc += 4;
-#elif defined(__sparc__)
+#elif defined(__linux__) && defined(__sparc__)
/* on SPARC the 3rd parameter points to a sigcontext not a ucontext */
struct sigcontext *sc = (struct sigcontext*)puc;
pc = sc->si_regs.pc;
sc->si_regs.pc = sc->si_regs.npc;
sc->si_regs.npc = (unsigned long)sc->si_regs.npc + 4;
-#elif defined(__powerpc__)
+#elif defined(__linux__) && defined(__powerpc__)
#if defined(__powerpc64__)
mcontext_t *mc = &uc->uc_mcontext;
unsigned long *regs = &mc->gp_regs[0];
@@ -572,19 +556,8 @@ static void fpe_sig_action(int sig, siginfo_t *si, void *puc)
pc = regs[PT_NIP];
regs[PT_NIP] += 4;
regs[PT_FPSCR] = 0x80|0x40|0x10; /* VE, OE, ZE; not UE or XE */
-#endif
#elif defined(__DARWIN__) && (defined(__i386__) || defined(__x86_64__))
-#ifdef DARWIN_MODERN_MCONTEXT
- mcontext_t mc = uc->uc_mcontext;
- pc = mc_pc(mc);
- mc->__fs.__fpu_mxcsr = 0x1F80;
- *(unsigned short *)&mc->__fs.__fpu_fsw &= ~0xFF;
-#else
- mcontext_t mc = uc->uc_mcontext;
- pc = mc_pc(mc);
- mc->fs.fpu_mxcsr = 0x1F80;
- *(unsigned short *)&mc->fs.fpu_fsw &= ~0xFF;
-#endif /* DARWIN_MODERN_MCONTEXT */
+# error "Floating-point exceptions not supported on MacOS X"
#elif defined(__DARWIN__) && defined(__ppc__)
mcontext_t mc = uc->uc_mcontext;
pc = mc->ss.srr0;
@@ -640,7 +613,7 @@ static void fpe_sig_action(int sig, siginfo_t *si, void *puc)
#endif
#if 0
{
- char buf[64];
+ char buf[128];
snprintf(buf, sizeof buf, "%s: FPE at %p\r\n", __FUNCTION__, (void*)pc);
write(2, buf, strlen(buf));
}
@@ -667,7 +640,7 @@ static void fpe_sig_handler(int sig)
static void erts_thread_catch_fp_exceptions(void)
{
- sys_sigset(SIGFPE, fpe_sig_handler);
+ sys_signal(SIGFPE, fpe_sig_handler);
unmask_fpe();
}
@@ -725,7 +698,8 @@ int erts_sys_block_fpe(void)
void erts_sys_unblock_fpe(int unmasked)
{
- unmask_fpe_conditional(unmasked);
+ if (unmasked)
+ unmask_fpe();
}
#endif
@@ -837,11 +811,6 @@ sys_chars_to_double(char* buf, double* fp)
int
matherr(struct exception *exc)
{
-#if !defined(NO_FPE_SIGNALS)
- volatile unsigned long *fpexnp = erts_get_current_fp_exception();
- if (fpexnp != NULL)
- *fpexnp = (unsigned long)__builtin_return_address(0);
-#endif
return 1;
}
diff --git a/erts/emulator/sys/unix/sys_time.c b/erts/emulator/sys/unix/sys_time.c
index fcce54a2c4..4f26639703 100644
--- a/erts/emulator/sys/unix/sys_time.c
+++ b/erts/emulator/sys/unix/sys_time.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2005-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2005-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -31,8 +32,21 @@
# undef _FILE_OFFSET_BITS
#endif
+#include <stdlib.h>
#include "sys.h"
#include "global.h"
+#include "erl_os_monotonic_time_extender.h"
+
+#undef ERTS_HAVE_ERTS_OS_TIMES_IMPL__
+#undef ERTS_HAVE_ERTS_SYS_HRTIME_IMPL__
+
+#if defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME) \
+ || defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME) \
+ || defined(SYS_HRTIME_USING_MACH_CLOCK_GET_TIME)
+# include <mach/clock.h>
+# include <mach/mach.h>
+# define ERTS_MACH_CLOCKS
+#endif
#ifdef NO_SYSCONF
# define TICKS_PER_SEC() HZ
@@ -51,11 +65,27 @@
# include <fcntl.h>
#endif
+static void init_perf_counter(void);
+
/******************* Routines for time measurement *********************/
-int erts_ticks_per_sec = 0; /* Will be SYS_CLK_TCK in erl_unix_sys.h */
-int erts_ticks_per_sec_wrap = 0; /* Will be SYS_CLK_TCK_WRAP */
-static int ticks_bsr = 0; /* Shift wrapped tick value this much to the right */
+#undef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
+#undef ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__
+#undef ERTS_SYS_TIME_INTERNAL_STATE_READ_MOSTLY__
+
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+
+static Uint32
+get_tick_count(void)
+{
+ struct tms unused;
+ return (Uint32) times(&unused);
+}
+
+#define ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__
+#define ERTS_SYS_TIME_INTERNAL_STATE_READ_MOSTLY__
+
+#endif
/*
* init timers, chose a tick length, and return it.
@@ -63,40 +93,939 @@ static int ticks_bsr = 0; /* Shift wrapped tick value this much to the right */
* does almost everything. Other platforms have to
* emulate Unix in this sense.
*/
-int sys_init_time(void)
+
+ErtsSysTimeData__ erts_sys_time_data__ erts_align_attribute(ERTS_CACHE_LINE_SIZE);
+
+#if defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+
+#define ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
+
+static ErtsMonotonicTime clock_gettime_monotonic(void);
+static ErtsMonotonicTime clock_gettime_monotonic_verified(void);
+#if defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW)
+static ErtsMonotonicTime clock_gettime_monotonic_raw(void);
+#endif
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+static void clock_gettime_times(ErtsMonotonicTime *, ErtsSystemTime *);
+static void clock_gettime_times_verified(ErtsMonotonicTime *, ErtsSystemTime *);
+#if defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW)
+static void clock_gettime_times_raw(ErtsMonotonicTime *, ErtsSystemTime *);
+#endif
+#endif
+
+#endif /* defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME) */
+
+#ifdef ERTS_MACH_CLOCKS
+# define ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__
+typedef struct {
+ clock_id_t id;
+ clock_serv_t srv;
+ char *name;
+} ErtsMachClock;
+
+typedef struct {
+ host_name_port_t host;
+ struct {
+ ErtsMachClock monotonic;
+ ErtsMachClock wall;
+ } clock;
+} ErtsMachClocks;
+static void mach_clocks_init(void);
+static void mach_clocks_fini(void);
+# ifdef HAVE_CLOCK_GET_ATTRIBUTES
+# define ERTS_HAVE_MACH_CLOCK_GETRES
+static Sint64
+mach_clock_getres(ErtsMachClock *clk);
+# endif
+#endif /* ERTS_MACH_CLOCKS */
+
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__
+struct sys_time_internal_state_read_only__ {
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+ int times_shift;
+#endif
+#ifdef ERTS_MACH_CLOCKS
+ ErtsMachClocks mach;
+#endif
+};
+#endif
+
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_READ_MOSTLY__
+struct sys_time_internal_state_read_mostly__ {
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+ ErtsOsMonotonicTimeExtendState os_mtime_xtnd;
+#endif
+};
+#endif
+
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
+struct sys_time_internal_state_write_freq__ {
+ erts_smp_mtx_t mtx;
+#if defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+ ErtsMonotonicTime last_delivered;
+#endif
+};
+#endif
+
+#if defined(ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__) \
+ || defined(ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__)
+static struct {
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_READ_ONLY__
+ union {
+ struct sys_time_internal_state_read_only__ o;
+ char align__[(((sizeof(struct sys_time_internal_state_read_only__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } r;
+#endif
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_READ_MOSTLY__
+ union {
+ struct sys_time_internal_state_read_mostly__ m;
+ char align__[(((sizeof(struct sys_time_internal_state_read_mostly__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } wr;
+#endif
+#ifdef ERTS_SYS_TIME_INTERNAL_STATE_WRITE_FREQ__
+ union {
+ struct sys_time_internal_state_write_freq__ f;
+ char align__[(((sizeof(struct sys_time_internal_state_write_freq__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } w;
+#endif
+} internal_state erts_align_attribute(ERTS_CACHE_LINE_SIZE);
+#endif
+
+void
+sys_init_time(ErtsSysInitTimeResult *init_resp)
{
+#if defined(ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT)
+ int major, minor, build, vsn;
+#endif
+#if defined(ERTS_MACH_CLOCKS)
+ mach_clocks_init();
+#endif
+#if !defined(ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT)
+
+ init_resp->have_os_monotonic_time = 0;
+
+#else /* defined(ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT) */
+
+#ifdef ERTS_HAVE_CORRECTED_OS_MONOTONIC_TIME
+ init_resp->have_corrected_os_monotonic_time = 1;
+#else
+ init_resp->have_corrected_os_monotonic_time = 0;
+#endif
+
+ init_resp->os_monotonic_time_info.resolution = (Uint64) 1000*1000*1000;
+#if defined(ERTS_HAVE_MACH_CLOCK_GETRES) && defined(MONOTONIC_CLOCK_ID)
+ init_resp->os_monotonic_time_info.resolution
+ = mach_clock_getres(&internal_state.r.o.mach.clock.monotonic);
+#elif defined(HAVE_CLOCK_GETRES) && defined(MONOTONIC_CLOCK_ID)
+ {
+ struct timespec ts;
+ if (clock_getres(MONOTONIC_CLOCK_ID, &ts) == 0) {
+ if (ts.tv_sec == 0 && ts.tv_nsec != 0)
+ init_resp->os_monotonic_time_info.resolution /= ts.tv_nsec;
+ else if (ts.tv_sec >= 1)
+ init_resp->os_monotonic_time_info.resolution = 1;
+ }
+ }
+#endif
+
+#ifdef MONOTONIC_CLOCK_ID_STR
+ init_resp->os_monotonic_time_info.clock_id = MONOTONIC_CLOCK_ID_STR;
+#else
+ init_resp->os_monotonic_time_info.clock_id = NULL;
+#endif
+
+ init_resp->os_monotonic_time_info.locked_use = 0;
+
+#if defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+ init_resp->os_monotonic_time_info.func = "clock_gettime";
+#elif defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME)
+ init_resp->os_monotonic_time_info.func = "clock_get_time";
+#elif defined(OS_MONOTONIC_TIME_USING_GETHRTIME)
+ init_resp->os_monotonic_time_info.func = "gethrtime";
+#elif defined(OS_MONOTONIC_TIME_USING_TIMES)
+ init_resp->os_monotonic_time_info.func = "times";
+#else
+# error Unknown erts_os_monotonic_time() implementation
+#endif
+
+ init_resp->have_os_monotonic_time = 1;
+
+ os_version(&major, &minor, &build);
+
+ vsn = ERTS_MK_VSN_INT(major, minor, build);
+
+
+#if defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+ if (vsn >= ERTS_MK_VSN_INT(2, 6, 33)) {
+ erts_sys_time_data__.r.o.os_monotonic_time =
+ clock_gettime_monotonic;
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+ erts_sys_time_data__.r.o.os_times =
+ clock_gettime_times;
+#endif
+ }
+ else {
+ /*
+ * Linux versions prior to 2.6.33 have a
+ * known bug that sometimes cause the NTP
+ * adjusted monotonic clock to take small
+ * steps backwards. Use raw monotonic clock
+ * if it is present; otherwise, fall back
+ * on locked verification of values.
+ */
+ init_resp->have_corrected_os_monotonic_time = 0;
+#if defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW)
+ /* We know that CLOCK_MONOTONIC_RAW is defined,
+ but we don't know if we got a kernel that
+ supports it. Support for CLOCK_MONOTONIC_RAW
+ appeared in kernel 2.6.28... */
+ if (vsn >= ERTS_MK_VSN_INT(2, 6, 28)) {
+ erts_sys_time_data__.r.o.os_monotonic_time =
+ clock_gettime_monotonic_raw;
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+ erts_sys_time_data__.r.o.os_times =
+ clock_gettime_times_raw;
+#endif
+ init_resp->os_monotonic_time_info.clock_id =
+ "CLOCK_MONOTONIC_RAW";
+ }
+ else
+#endif /* defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW) */
+ {
+ erts_sys_time_data__.r.o.os_monotonic_time =
+ clock_gettime_monotonic_verified;
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+ erts_sys_time_data__.r.o.os_times =
+ clock_gettime_times_verified;
+#endif
+ erts_smp_mtx_init(&internal_state.w.f.mtx,
+ "os_monotonic_time");
+ internal_state.w.f.last_delivered
+ = clock_gettime_monotonic();
+ init_resp->os_monotonic_time_info.locked_use = 1;
+ }
+ }
+#else /* !(defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)) */
+ {
+ char flavor[1024];
+
+ os_flavor(flavor, sizeof(flavor));
+
+ if (sys_strcmp(flavor, "sunos") == 0) {
+ /*
+ * Don't trust hrtime on multi processors
+ * on SunOS prior to SunOS 5.8
+ */
+ if (vsn < ERTS_MK_VSN_INT(5, 8, 0)) {
+#if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_CONF)
+ if (sysconf(_SC_NPROCESSORS_CONF) > 1)
+#endif
+ init_resp->have_os_monotonic_time = 0;
+ }
+ }
+ }
+#endif /* !(defined(__linux__) && defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)) */
+
+#endif /* defined(ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT) */
+
+#ifdef ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT
+ init_resp->os_monotonic_time_unit = ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT;
+#endif
+ init_resp->sys_clock_resolution = SYS_CLOCK_RESOLUTION;
+
/*
- * This (erts_ticks_per_sec) is only for times() (CLK_TCK),
- * the resolution is always one millisecond..
+ * This (erts_sys_time_data__.r.o.ticks_per_sec) is only for
+ * times() (CLK_TCK), the resolution is always one millisecond..
*/
- if ((erts_ticks_per_sec = TICKS_PER_SEC()) < 0)
- erl_exit(1, "Can't get clock ticks/sec\n");
- if (erts_ticks_per_sec >= 1000) {
- /* Workaround for beta linux kernels, need to be done in runtime
- to make erlang run on both 2.4 and 2.5 kernels. In the future,
- the kernel ticks might as
- well be used as a high res timer instead, but that's for when the
- majority uses kernels with HZ == 1024 */
- ticks_bsr = 3;
- } else {
- ticks_bsr = 0;
+ if ((erts_sys_time_data__.r.o.ticks_per_sec = TICKS_PER_SEC()) < 0)
+ erts_exit(ERTS_ABORT_EXIT, "Can't get clock ticks/sec\n");
+
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+#if ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT
+# error Time unit is supposed to be determined at runtime...
+#endif
+ {
+ ErtsMonotonicTime resolution = erts_sys_time_data__.r.o.ticks_per_sec;
+ ErtsMonotonicTime time_unit = resolution;
+ int shift = 0;
+
+ while (time_unit < 1000*1000) {
+ time_unit <<= 1;
+ shift++;
+ }
+
+ init_resp->os_monotonic_time_info.resolution = resolution;
+ init_resp->os_monotonic_time_unit = time_unit;
+ init_resp->os_monotonic_time_info.extended = 1;
+ internal_state.r.o.times_shift = shift;
+
+ erts_init_os_monotonic_time_extender(&internal_state.wr.m.os_mtime_xtnd,
+ get_tick_count,
+ (1 << 29) / resolution);
+ }
+#endif /* defined(OS_MONOTONIC_TIME_USING_TIMES) */
+
+#ifdef WALL_CLOCK_ID_STR
+ init_resp->os_system_time_info.clock_id = WALL_CLOCK_ID_STR;
+#else
+ init_resp->os_system_time_info.clock_id = NULL;
+#endif
+
+ init_resp->os_system_time_info.locked_use = 0;
+ init_resp->os_system_time_info.resolution = (Uint64) 1000*1000*1000;
+#if defined(ERTS_HAVE_MACH_CLOCK_GETRES) && defined(WALL_CLOCK_ID)
+ init_resp->os_system_time_info.resolution
+ = mach_clock_getres(&internal_state.r.o.mach.clock.wall);
+#elif defined(HAVE_CLOCK_GETRES) && defined(WALL_CLOCK_ID)
+ {
+ struct timespec ts;
+ if (clock_getres(WALL_CLOCK_ID, &ts) == 0) {
+ if (ts.tv_sec == 0 && ts.tv_nsec != 0)
+ init_resp->os_system_time_info.resolution /= ts.tv_nsec;
+ else if (ts.tv_sec >= 1)
+ init_resp->os_system_time_info.resolution = 1;
+ }
+ }
+#endif
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+ init_resp->os_system_time_info.func = "clock_gettime";
+#elif defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME)
+ init_resp->os_system_time_info.func = "clock_get_time";
+#elif defined(OS_SYSTEM_TIME_GETTIMEOFDAY)
+ init_resp->os_system_time_info.func = "gettimeofday";
+ init_resp->os_system_time_info.resolution = 1000*1000;
+ init_resp->os_system_time_info.clock_id = NULL;
+#else
+# error Missing erts_os_system_time() implementation
+#endif
+
+ init_perf_counter();
+
+}
+
+void
+erts_late_sys_init_time(void)
+{
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+ erts_late_init_os_monotonic_time_extender(&internal_state.wr.m.os_mtime_xtnd);
+#endif
+}
+
+static ERTS_INLINE ErtsSystemTime
+adj_stime_time_unit(ErtsSystemTime stime, Uint32 res)
+{
+ if (res == ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT)
+ return stime;
+ if (res == (Uint32) 1000*1000*1000
+ && ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT == 1000*1000)
+ return stime/1000;
+ if (res == (Uint32) 1000*1000
+ && ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT == 1000*1000*1000)
+ return stime*1000;
+ return ((ErtsSystemTime)
+ erts_time_unit_conversion(stime,
+ (Uint32) res,
+ (Uint32) ERTS_MONOTONIC_TIME_UNIT));
+}
+
+#define ERTS_TimeSpec2Sint64(TS) \
+ ((((Sint64) (TS)->tv_sec) * ((Sint64) 1000*1000*1000)) \
+ + ((Sint64) (TS)->tv_nsec))
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * POSIX clock_gettime() *
+\* */
+
+#if defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME) \
+ || defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+static ERTS_INLINE Sint64
+posix_clock_gettime(clockid_t id, char *name)
+{
+ struct timespec ts;
+
+ if (clock_gettime(id, &ts) != 0) {
+ int err = errno;
+ char *errstr = err ? strerror(err) : "unknown";
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_gettime(%s, _) failed: %s (%d)\n",
+ name, errstr, err);
+ }
+ return ERTS_TimeSpec2Sint64(&ts);
+}
+
+#endif /* defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME) \
+ || defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+ErtsSystemTime
+erts_os_system_time(void)
+{
+ Sint64 stime = posix_clock_gettime(WALL_CLOCK_ID,
+ WALL_CLOCK_ID_STR);
+ return adj_stime_time_unit((ErtsSystemTime) stime,
+ (Uint32) 1000*1000*1000);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+#if defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME)
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+#define ERTS_HAVE_ERTS_OS_TIMES_IMPL__
+
+static ERTS_INLINE void
+posix_clock_gettime_times(clockid_t mid, char *mname,
+ ErtsMonotonicTime *mtimep,
+ clockid_t sid, char *sname,
+ ErtsSystemTime *stimep)
+{
+ struct timespec mts, sts;
+ int mres, sres, merr, serr;
+
+ mres = clock_gettime(mid, &mts);
+ merr = errno;
+ sres = clock_gettime(sid, &sts);
+ serr = errno;
+
+ if (mres != 0) {
+ char *errstr = merr ? strerror(merr) : "unknown";
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_gettime(%s, _) failed: %s (%d)\n",
+ mname, errstr, merr);
+ }
+ if (sres != 0) {
+ char *errstr = serr ? strerror(serr) : "unknown";
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_gettime(%s, _) failed: %s (%d)\n",
+ sname, errstr, serr);
+ }
+
+ *mtimep = (ErtsMonotonicTime) ERTS_TimeSpec2Sint64(&mts);
+ *stimep = (ErtsSystemTime) ERTS_TimeSpec2Sint64(&sts);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+#if defined(__linux__)
+
+static ErtsMonotonicTime clock_gettime_monotonic_verified(void)
+{
+ ErtsMonotonicTime mtime;
+
+ mtime = (ErtsMonotonicTime) posix_clock_gettime(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR);
+
+ erts_smp_mtx_lock(&internal_state.w.f.mtx);
+ if (mtime < internal_state.w.f.last_delivered)
+ mtime = internal_state.w.f.last_delivered;
+ else
+ internal_state.w.f.last_delivered = mtime;
+ erts_smp_mtx_unlock(&internal_state.w.f.mtx);
+
+ return mtime;
+}
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+static void clock_gettime_times_verified(ErtsMonotonicTime *mtimep,
+ ErtsSystemTime *stimep)
+{
+ posix_clock_gettime_times(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR,
+ mtimep,
+ WALL_CLOCK_ID,
+ WALL_CLOCK_ID_STR,
+ stimep);
+
+ erts_smp_mtx_lock(&internal_state.w.f.mtx);
+ if (*mtimep < internal_state.w.f.last_delivered)
+ *mtimep = internal_state.w.f.last_delivered;
+ else
+ internal_state.w.f.last_delivered = *mtimep;
+ erts_smp_mtx_unlock(&internal_state.w.f.mtx);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+static ErtsMonotonicTime clock_gettime_monotonic(void)
+{
+ return (ErtsMonotonicTime) posix_clock_gettime(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR);
+}
+
+#if defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW)
+
+static ErtsMonotonicTime clock_gettime_monotonic_raw(void)
+{
+ return (ErtsMonotonicTime) posix_clock_gettime(CLOCK_MONOTONIC_RAW,
+ "CLOCK_MONOTONIC_RAW");
+}
+
+#endif /* defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW) */
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+static void clock_gettime_times(ErtsMonotonicTime *mtimep,
+ ErtsSystemTime *stimep)
+{
+ posix_clock_gettime_times(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR,
+ mtimep,
+ WALL_CLOCK_ID,
+ WALL_CLOCK_ID_STR,
+ stimep);
+}
+
+#if defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW)
+
+static void clock_gettime_times_raw(ErtsMonotonicTime *mtimep,
+ ErtsSystemTime *stimep)
+{
+ posix_clock_gettime_times(CLOCK_MONOTONIC_RAW,
+ "CLOCK_MONOTONIC_RAW",
+ mtimep,
+ WALL_CLOCK_ID,
+ WALL_CLOCK_ID_STR,
+ stimep);
+}
+
+#endif /* defined(HAVE_CLOCK_GETTIME_MONOTONIC_RAW) */
+
+#endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+#else /* !defined(__linux__) */
+
+ErtsMonotonicTime erts_os_monotonic_time(void)
+{
+ return posix_clock_gettime(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR);
+}
+
+#if defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME)
+
+void erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ posix_clock_gettime_times(MONOTONIC_CLOCK_ID,
+ MONOTONIC_CLOCK_ID_STR,
+ mtimep,
+ WALL_CLOCK_ID,
+ WALL_CLOCK_ID_STR,
+ stimep);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_CLOCK_GETTIME) */
+
+#endif /* !defined(__linux__) */
+
+#endif /* defined(OS_MONOTONIC_TIME_USING_CLOCK_GETTIME) */
+
+#if defined(SYS_HRTIME_USING_CLOCK_GETTIME)
+# define ERTS_HAVE_ERTS_SYS_HRTIME_IMPL__
+
+ErtsSysHrTime
+erts_sys_hrtime(void)
+{
+ return (ErtsSysHrTime) posix_clock_gettime(HRTIME_CLOCK_ID,
+ HRTIME_CLOCK_ID_STR);
+}
+
+#endif /* defined(SYS_HRTIME_USING_CLOCK_GETTIME) */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * MACH clock_get_time() *
+\* */
+
+#if defined(ERTS_MACH_CLOCKS)
+
+static void
+mach_clocks_fini(void)
+{
+ mach_port_t task = mach_task_self();
+ mach_port_deallocate(task, internal_state.r.o.mach.host);
+#if defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME)
+ mach_port_deallocate(task, internal_state.r.o.mach.clock.monotonic.srv);
+#endif
+#if defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME)
+ mach_port_deallocate(task, internal_state.r.o.mach.clock.wall.srv);
+#endif
+}
+
+static void
+mach_clocks_init(void)
+{
+ kern_return_t kret;
+ host_name_port_t host;
+ clock_id_t id;
+ clock_serv_t *clck_srv_p;
+ char *name;
+
+ host = internal_state.r.o.mach.host = mach_host_self();
+
+#if defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME) \
+ || defined(SYS_HRTIME_USING_MACH_CLOCK_GET_TIME)
+ id = internal_state.r.o.mach.clock.monotonic.id = MONOTONIC_CLOCK_ID;
+ name = internal_state.r.o.mach.clock.monotonic.name = MONOTONIC_CLOCK_ID_STR;
+ clck_srv_p = &internal_state.r.o.mach.clock.monotonic.srv;
+ kret = host_get_clock_service(host, id, clck_srv_p);
+ if (kret != KERN_SUCCESS) {
+ erts_exit(ERTS_ABORT_EXIT,
+ "host_get_clock_service(_, %s, _) failed\n",
+ name);
+ }
+#endif
+
+#if defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME)
+ id = internal_state.r.o.mach.clock.wall.id = WALL_CLOCK_ID;
+ name = internal_state.r.o.mach.clock.wall.name = WALL_CLOCK_ID_STR;
+ clck_srv_p = &internal_state.r.o.mach.clock.wall.srv;
+ kret = host_get_clock_service(host, id, clck_srv_p);
+ if (kret != KERN_SUCCESS) {
+ erts_exit(ERTS_ABORT_EXIT,
+ "host_get_clock_service(_, %s, _) failed\n",
+ name);
+ }
+#endif
+
+ if (atexit(mach_clocks_fini) != 0) {
+ int err = errno;
+ char *errstr = err ? strerror(err) : "unknown";
+ erts_exit(ERTS_ABORT_EXIT,
+ "Failed to register mach_clocks_fini() "
+ "for call at exit: %s (%d)\n",
+ errstr, err);
+ }
+}
+
+#ifdef ERTS_HAVE_MACH_CLOCK_GETRES
+
+static Sint64
+mach_clock_getres(ErtsMachClock *clk)
+{
+ kern_return_t kret;
+ natural_t attr[1];
+ mach_msg_type_number_t cnt;
+
+ cnt = sizeof(attr);
+ kret = clock_get_attributes(clk->srv,
+ CLOCK_GET_TIME_RES,
+ (clock_attr_t) attr,
+ &cnt);
+ if (kret != KERN_SUCCESS || cnt != 1) {
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_get_attributes(%s, _) failed\n",
+ clk->name);
+ }
+
+ return (Sint64) attr[0];
+}
+
+#endif /* ERTS_HAVE_MACH_CLOCK_GETRES */
+
+static ERTS_INLINE Sint64
+mach_clock_get_time(ErtsMachClock *clk)
+{
+ kern_return_t kret;
+ mach_timespec_t time_spec;
+
+ kret = clock_get_time(clk->srv, &time_spec);
+ if (kret != KERN_SUCCESS)
+ erts_exit(ERTS_ABORT_EXIT, "clock_get_time(%s, _) failed\n", clk->name);
+
+ return ERTS_TimeSpec2Sint64(&time_spec);
+}
+
+#endif /* defined(ERTS_MACH_CLOCKS) */
+
+#if defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME)
+
+#define ERTS_HAVE_ERTS_OS_TIMES_IMPL__
+
+ErtsSystemTime
+erts_os_system_time(void)
+{
+ Sint64 stime = mach_clock_get_time(&internal_state.r.o.mach.clock.wall);
+ return adj_stime_time_unit((ErtsSystemTime) stime,
+ (Uint32) 1000*1000*1000);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME) */
+
+#if defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME)
+
+ErtsMonotonicTime
+erts_os_monotonic_time(void)
+{
+ return (ErtsMonotonicTime)
+ mach_clock_get_time(&internal_state.r.o.mach.clock.monotonic);
+}
+
+#if defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME)
+
+#define ERTS_HAVE_ERTS_OS_TIMES_IMPL__
+
+void
+erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ kern_return_t mkret, skret;
+ mach_timespec_t mon_time_spec, sys_time_spec;
+
+ mkret = clock_get_time(internal_state.r.o.mach.clock.monotonic.srv,
+ &mon_time_spec);
+ skret = clock_get_time(internal_state.r.o.mach.clock.wall.srv,
+ &sys_time_spec);
+
+ if (mkret != KERN_SUCCESS)
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_get_time(%s, _) failed\n",
+ internal_state.r.o.mach.clock.monotonic.name);
+ if (skret != KERN_SUCCESS)
+ erts_exit(ERTS_ABORT_EXIT,
+ "clock_get_time(%s, _) failed\n",
+ internal_state.r.o.mach.clock.wall.name);
+
+ *mtimep = (ErtsMonotonicTime) ERTS_TimeSpec2Sint64(&mon_time_spec);
+ *stimep = (ErtsSystemTime) ERTS_TimeSpec2Sint64(&sys_time_spec);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_USING_MACH_CLOCK_GET_TIME) */
+
+#endif /* defined(OS_MONOTONIC_TIME_USING_MACH_CLOCK_GET_TIME) */
+
+#if defined(SYS_HRTIME_USING_MACH_CLOCK_GET_TIME)
+
+#define ERTS_HAVE_ERTS_SYS_HRTIME_IMPL__
+
+ErtsSysHrTime
+erts_sys_hrtime(void)
+{
+ return (ErtsSysHrTime)
+ mach_clock_get_time(&internal_state.r.o.mach.clock.monotonic);
+}
+
+#endif /* defined(SYS_HRTIME_USING_MACH_CLOCK_GET_TIME) */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Solaris gethrtime() - OS monotonic time *
+\* */
+
+#if defined(OS_MONOTONIC_TIME_USING_GETHRTIME)
+
+ErtsMonotonicTime erts_os_monotonic_time(void)
+{
+ return (ErtsMonotonicTime) gethrtime();
+}
+
+#endif /* defined(OS_MONOTONIC_TIME_USING_GETHRTIME) */
+
+#if defined(SYS_HRTIME_USING_GETHRTIME)
+
+#define ERTS_HAVE_ERTS_SYS_HRTIME_IMPL__
+
+ErtsSysHrTime
+erts_sys_hrtime(void)
+{
+ return (ErtsSysHrTime) gethrtime();
+}
+
+#endif /* defined(SYS_HRTIME_USING_GETHRTIME) */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * gettimeofday() - OS system time *
+\* */
+
+#if defined(OS_SYSTEM_TIME_GETTIMEOFDAY)
+
+ErtsSystemTime
+erts_os_system_time(void)
+{
+ ErtsSystemTime stime;
+ struct timeval tv;
+
+ if (gettimeofday(&tv, NULL) != 0) {
+ int err = errno;
+ char *errstr = err ? strerror(err) : "unknown";
+ erts_exit(ERTS_ABORT_EXIT,
+ "gettimeofday(_, NULL) failed: %s (%d)\n",
+ errstr, err);
+ }
+
+ stime = (ErtsSystemTime) tv.tv_sec;
+ stime *= (ErtsSystemTime) 1000*1000;
+ stime += (ErtsSystemTime) tv.tv_usec;
+
+ return adj_stime_time_unit(stime, (Uint32) 1000*1000);
+}
+
+#endif /* defined(OS_SYSTEM_TIME_GETTIMEOFDAY) */
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * times() - OS monotonic time *
+\* */
+
+#if defined(OS_MONOTONIC_TIME_USING_TIMES)
+
+ErtsMonotonicTime
+erts_os_monotonic_time(void)
+{
+ Uint32 ticks = get_tick_count();
+ ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ return ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks) << internal_state.r.o.times_shift;
+}
+
+#endif
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Fallbacks *
+\* */
+
+#ifndef ERTS_HAVE_ERTS_SYS_HRTIME_IMPL__
+
+ErtsSysHrTime
+erts_sys_hrtime(void)
+{
+ return (ErtsSysHrTime) ERTS_MONOTONIC_TO_NSEC(erts_os_system_time());
+}
+
+#endif
+
+#if !defined(ERTS_HAVE_ERTS_OS_TIMES_IMPL__) \
+ && defined(ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT)
+
+void
+erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ *mtimep = erts_os_monotonic_time();
+ *stimep = erts_os_system_time();
+}
+
+#endif
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Performance counter functions *
+\* */
+
+
+/* What resolution to spin to in micro seconds */
+#define RESOLUTION 100
+/* How many iterations to spin */
+#define ITERATIONS 1
+/* How many significant figures to round to */
+#define SIGFIGS 3
+
+static ErtsSysPerfCounter calculate_perf_counter_unit(void) {
+ int i;
+ ErtsSysPerfCounter pre, post;
+ double value = 0;
+ double round_factor;
+#if defined(HAVE_GETHRTIME) && defined(GETHRTIME_WITH_CLOCK_GETTIME)
+ struct timespec basetime,comparetime;
+#define __GETTIME(arg) clock_gettime(CLOCK_MONOTONIC,arg)
+#define __GETUSEC(arg) (arg.tv_nsec / 1000)
+#else
+ SysTimeval basetime,comparetime;
+#define __GETTIME(arg) sys_gettimeofday(arg)
+#define __GETUSEC(arg) arg.tv_usec
+#endif
+
+ for (i = 0; i < ITERATIONS; i++) {
+ /* Make sure usec just flipped over at current resolution */
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ pre = erts_sys_perf_counter();
+
+ __GETTIME(&basetime);
+ do {
+ __GETTIME(&comparetime);
+ } while ((__GETUSEC(basetime) / RESOLUTION) == (__GETUSEC(comparetime) / RESOLUTION));
+
+ post = erts_sys_perf_counter();
+
+ value += post - pre;
}
- erts_ticks_per_sec_wrap = (erts_ticks_per_sec >> ticks_bsr);
- return SYS_CLOCK_RESOLUTION;
+ /* After this value is ticks per us */
+ value /= (RESOLUTION*ITERATIONS);
+
+ /* We round to 3 significant figures */
+ round_factor = pow(10.0, SIGFIGS - ceil(log10(value)));
+ value = ((ErtsSysPerfCounter)(value * round_factor + 0.5)) / round_factor;
+
+ /* convert to ticks per second */
+ return 1000000 * value;
}
-clock_t sys_times_wrap(void)
+static int have_rdtscp(void)
{
- SysTimes dummy;
- clock_t result = (sys_times(&dummy) >> ticks_bsr);
- return result;
+#if defined(ETHR_X86_RUNTIME_CONF__)
+ /* On early x86 cpu's the tsc varies with the current speed of the cpu,
+ which means that the time per tick vary depending on the current
+ load of the cpu. We do not want this as it would give very scewed
+ numbers when the cpu is mostly idle.
+ The linux kernel seems to think that checking for constant and
+ reliable is enough to trust the counter so we do the same.
+
+ If this test is not good enough, I don't know what we'll do.
+ Maybe fallback on erts_sys_hrtime always, but that would be a shame as
+ rdtsc is about 3 times faster than hrtime... */
+ return ETHR_X86_RUNTIME_CONF_HAVE_CONSTANT_TSC__ &&
+ ETHR_X86_RUNTIME_CONF_HAVE_TSC_RELIABLE__;
+#else
+ return 0;
+#endif
}
+static ErtsSysPerfCounter rdtsc(void)
+{
+ /* It may have been a good idea to put the cpuid instruction before
+ the rdtsc, but I decided against it because it is not really
+ needed for msacc, and it slows it down by quite a bit (5-7 times slower).
+ As a result though, this timestamp becomes much less
+ accurate as it might be re-ordered to be executed way before or after this
+ function is called.
+ */
+ ErtsSysPerfCounter ts;
+#if defined(__x86_64__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ "shl $32, %%rdx\n\t"
+ "or %%rdx, %0" : "=a" (ts) : : "rdx");
+#elif defined(__i386__)
+ __asm__ __volatile__ ("rdtsc\n\t"
+ : "=A" (ts) );
+#endif
+ return ts;
+}
+static void init_perf_counter(void)
+{
+ if (have_rdtscp()) {
+ erts_sys_time_data__.r.o.perf_counter = rdtsc;
+ erts_sys_time_data__.r.o.perf_counter_unit = calculate_perf_counter_unit();
+ } else {
+ erts_sys_time_data__.r.o.perf_counter = erts_sys_hrtime;
+ erts_sys_time_data__.r.o.perf_counter_unit = ERTS_HRTIME_UNIT;
+ }
+}
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#ifdef HAVE_GETHRVTIME_PROCFS_IOCTL
+/* The code below only has effect on solaris < 10,
+ needed in order for gehhrvtime to work properly */
int sys_start_hrvtime(void)
{
long msacct = PR_MSACCT;
diff --git a/erts/emulator/sys/unix/sys_uds.c b/erts/emulator/sys/unix/sys_uds.c
new file mode 100644
index 0000000000..dd0a3b03ff
--- /dev/null
+++ b/erts/emulator/sys/unix/sys_uds.c
@@ -0,0 +1,155 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#include "sys_uds.h"
+
+int
+sys_uds_readv(int fd, struct iovec *iov, size_t iov_len,
+ int *fds, int fd_count, int flags) {
+ struct msghdr msg;
+ struct cmsghdr *cmsg = NULL;
+ char ancillary_buff[256] = {0};
+ int res, i = 0;
+
+ /* setup a place to fill in message contents */
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_len;
+
+ /* provide space for the ancillary data */
+ msg.msg_control = ancillary_buff;
+ msg.msg_controllen = sizeof(ancillary_buff);
+
+ if((res = recvmsg(fd, &msg, flags)) < 0) {
+#if defined(__APPLE__) && defined(__MACH__) && !defined(__DARWIN__)
+ /* When some OS X versions run out of fd's
+ they give EMSGSIZE instead of EMFILE.
+ We remap this as we want the correct
+ error to appear for the user */
+ if (errno == EMSGSIZE)
+ errno = EMFILE;
+#endif
+ return res;
+ }
+
+ if((msg.msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
+ {
+ /* We assume that we have given enough space for any header
+ that are sent to us. So the only remaining reason to get
+ this flag set is if the caller has run out of file descriptors.
+ */
+ errno = EMFILE;
+ return -1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg) ) {
+ if ((cmsg->cmsg_level == SOL_SOCKET) &&
+ (cmsg->cmsg_type == SCM_RIGHTS)) {
+ int *cmsg_data = (int *)CMSG_DATA(cmsg);
+ while ((char*)cmsg_data < (char*)cmsg + cmsg->cmsg_len) {
+ if (i < fd_count) {
+ fds[i++] = *cmsg_data++;
+ } else {
+ /* for some strange reason, we have received more FD's
+ than we wanted... close them if we are not running
+ debug. */
+ if(i >= fd_count) abort();
+ close(*cmsg_data++);
+ }
+ }
+ }
+ }
+
+ return res;
+}
+
+int
+sys_uds_read(int fd, char *buff, size_t len,
+ int *fds, int fd_count, int flags) {
+ struct iovec iov;
+ iov.iov_base = buff;
+ iov.iov_len = len;
+ return sys_uds_readv(fd, &iov, 1, fds, fd_count, flags);
+}
+
+
+int
+sys_uds_writev(int fd, struct iovec *iov, size_t iov_len,
+ int *fds, int fd_count, int flags) {
+
+ struct msghdr msg;
+ struct cmsghdr *cmsg = NULL;
+ int res, i;
+
+ /* initialize socket message */
+ memset(&msg, 0, sizeof(struct msghdr));
+
+ /* We flatten the iov if it is too long */
+ if (iov_len > MAXIOV) {
+ int size = 0;
+ char *buff;
+ for (i = 0; i < iov_len; i++)
+ size += iov[i].iov_len;
+ buff = malloc(size);
+
+ for (i = 0; i < iov_len; i++) {
+ memcpy(buff, iov[i].iov_base, iov[i].iov_len);
+ buff += iov[i].iov_len;
+ }
+
+ iov[0].iov_base = buff - size;
+ iov[0].iov_len = size;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ } else {
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_len;
+ }
+
+ /* initialize the ancillary data */
+ msg.msg_control = calloc(1, CMSG_SPACE(sizeof(int) * fd_count));
+ msg.msg_controllen = CMSG_SPACE(sizeof(int) * fd_count);
+
+ /* copy the fd array into the ancillary data */
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if(!cmsg) abort();
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(int) * fd_count);
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(int) * fd_count);
+
+ res = sendmsg(fd, &msg, flags);
+
+ if (iov_len > MAXIOV)
+ free(iov[0].iov_base);
+
+ free(msg.msg_control);
+
+ return res;
+}
+
+int
+sys_uds_write(int fd, char *buff, size_t len,
+ int *fds, int fd_count, int flags) {
+ struct iovec iov;
+ iov.iov_base = buff;
+ iov.iov_len = len;
+ return sys_uds_writev(fd, &iov, 1, fds, fd_count, flags);
+}
diff --git a/erts/emulator/sys/unix/sys_uds.h b/erts/emulator/sys/unix/sys_uds.h
new file mode 100644
index 0000000000..a598102d5c
--- /dev/null
+++ b/erts/emulator/sys/unix/sys_uds.h
@@ -0,0 +1,57 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef _ERL_UNIX_UDS_H
+#define _ERL_UNIX_UDS_H
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#if defined(__sun__) && !defined(_XOPEN_SOURCE)
+#define _XOPEN_SOURCE 500
+#endif
+
+#include <limits.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/uio.h>
+
+#if defined IOV_MAX
+#define MAXIOV IOV_MAX
+#elif defined UIO_MAXIOV
+#define MAXIOV UIO_MAXIOV
+#else
+#define MAXIOV 16
+#endif
+
+#include "sys.h"
+
+int sys_uds_readv(int fd, struct iovec *iov, size_t iov_len,
+ int *fds, int fd_count, int flags);
+int sys_uds_read(int fd, char *buff, size_t len,
+ int *fds, int fd_count, int flags);
+int sys_uds_writev(int fd, struct iovec *iov, size_t iov_len,
+ int *fds, int fd_count, int flags);
+int sys_uds_write(int fd, char *buff, size_t len,
+ int *fds, int fd_count, int flags);
+
+#endif /* #ifndef _ERL_UNIX_UDS_H */
diff --git a/erts/emulator/sys/win32/dosmap.c b/erts/emulator/sys/win32/dosmap.c
index 15416a66c5..95fbba384b 100644
--- a/erts/emulator/sys/win32/dosmap.c
+++ b/erts/emulator/sys/win32/dosmap.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1998-2009. All Rights Reserved.
+ * Copyright Ericsson AB 1998-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/win32/driver_int.h b/erts/emulator/sys/win32/driver_int.h
index 97e188816e..50097d3fd2 100644
--- a/erts/emulator/sys/win32/driver_int.h
+++ b/erts/emulator/sys/win32/driver_int.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2009. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/win32/erl_main.c b/erts/emulator/sys/win32/erl_main.c
index 5471bffb52..9eee300f48 100644
--- a/erts/emulator/sys/win32/erl_main.c
+++ b/erts/emulator/sys/win32/erl_main.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2000-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2000-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 7a1d129cd5..f23c7ab03d 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2007-2011. All Rights Reserved.
+ * Copyright Ericsson AB 2007-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -25,6 +26,8 @@
#include "sys.h"
#include "erl_alloc.h"
#include "erl_poll.h"
+#include "erl_time.h"
+#include "erl_msacc.h"
/*
* Some debug macros
@@ -285,7 +288,7 @@ struct ErtsPollSet_ {
#ifdef ERTS_SMP
erts_smp_mtx_t mtx;
#endif
- erts_smp_atomic32_t timeout;
+ erts_atomic64_t timeout_time;
};
#ifdef ERTS_SMP
@@ -363,6 +366,26 @@ do { \
wait_standby(PS); \
} while(0)
+static ERTS_INLINE void
+init_timeout_time(ErtsPollSet ps)
+{
+ erts_atomic64_init_nob(&ps->timeout_time,
+ (erts_aint64_t) ERTS_MONOTONIC_TIME_MAX);
+}
+
+static ERTS_INLINE void
+set_timeout_time(ErtsPollSet ps, ErtsMonotonicTime time)
+{
+ erts_atomic64_set_relb(&ps->timeout_time,
+ (erts_aint64_t) time);
+}
+
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout_time(ErtsPollSet ps)
+{
+ return (ErtsMonotonicTime) erts_atomic64_read_acqb(&ps->timeout_time);
+}
+
#define ERTS_POLL_NOT_WOKEN ((erts_aint32_t) 0)
#define ERTS_POLL_WOKEN_IO_READY ((erts_aint32_t) 1)
#define ERTS_POLL_WOKEN_INTR ((erts_aint32_t) 2)
@@ -401,7 +424,7 @@ static ERTS_INLINE int
wakeup_cause(ErtsPollSet ps)
{
int res;
- erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
+ erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
switch (wakeup_state) {
case ERTS_POLL_WOKEN_IO_READY:
res = 0;
@@ -414,7 +437,7 @@ wakeup_cause(ErtsPollSet ps)
break;
default:
res = 0;
- erl_exit(ERTS_ABORT_EXIT,
+ erts_exit(ERTS_ABORT_EXIT,
"%s:%d: Internal error: Invalid wakeup_state=%d\n",
__FILE__, __LINE__, (int) wakeup_state);
}
@@ -422,15 +445,29 @@ wakeup_cause(ErtsPollSet ps)
}
static ERTS_INLINE DWORD
-poll_wait_timeout(ErtsPollSet ps, SysTimeval *tvp)
+poll_wait_timeout(ErtsPollSet ps, ErtsMonotonicTime timeout_time)
{
- time_t timeout = tvp->tv_sec * 1000 + tvp->tv_usec / 1000;
+ ErtsMonotonicTime current_time, diff_time, timeout;
- if (timeout <= 0) {
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ no_timeout:
+ set_timeout_time(ps, ERTS_MONOTONIC_TIME_MIN);
woke_up(ps);
return (DWORD) 0;
}
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0)
+ goto no_timeout;
+
+ /* Round up to nearest milli second */
+ timeout = (ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1);
+ if (timeout > INT_MAX)
+ timeout = INT_MAX; /* Also prevents DWORD overflow */
+
+ set_timeout_time(ps, current_time + ERTS_MSEC_TO_MONOTONIC(timeout));
+
ResetEvent(ps->event_io_ready);
/*
* Since we don't know the internals of ResetEvent() we issue
@@ -442,10 +479,6 @@ poll_wait_timeout(ErtsPollSet ps, SysTimeval *tvp)
if (erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN)
return (DWORD) 0;
- if (timeout > ((time_t) ERTS_AINT32_T_MAX))
- timeout = ERTS_AINT32_T_MAX; /* Also prevents DWORD overflow */
-
- erts_smp_atomic32_set_relb(&ps->timeout, (erts_aint32_t) timeout);
return (DWORD) timeout;
}
@@ -454,9 +487,8 @@ wake_poller(ErtsPollSet ps, int io_ready)
{
erts_aint32_t wakeup_state;
if (io_ready) {
- /* We may set the event multiple times. This is, however, harmless. */
- wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
- erts_atomic32_set_relb(&ps->wakeup_state, ERTS_POLL_WOKEN_IO_READY);
+ wakeup_state = erts_atomic32_xchg_relb(&ps->wakeup_state,
+ ERTS_POLL_WOKEN_IO_READY);
}
else {
ERTS_THR_MEMORY_BARRIER;
@@ -544,7 +576,7 @@ static void signal_standby(ErtsPollSet ps)
--(ps->standby_wait_counter);
if (ps->standby_wait_counter < 0) {
LeaveCriticalSection(&(ps->standby_crit));
- erl_exit(1,"Standby signalled by more threads than expected");
+ erts_exit(ERTS_ERROR_EXIT,"Standby signalled by more threads than expected");
}
if (!(ps->standby_wait_counter)) {
SetEvent(ps->standby_wait_event);
@@ -706,7 +738,7 @@ static void *break_waiter(void *param)
erts_mtx_unlock(&break_waiter_lock);
break;
default:
- erl_exit(1,"Unexpected event in break_waiter");
+ erts_exit(ERTS_ERROR_EXIT,"Unexpected event in break_waiter");
}
}
}
@@ -1012,12 +1044,12 @@ void erts_poll_interrupt(ErtsPollSet ps, int set /* bool */)
void erts_poll_interrupt_timed(ErtsPollSet ps,
int set /* bool */,
- erts_short_time_t msec)
+ ErtsMonotonicTime timeout_time)
{
- HARDTRACEF(("In erts_poll_interrupt_timed(%d,%ld)",set,msec));
+ HARDTRACEF(("In erts_poll_interrupt_timed(%d,%ld)",set,timeout_time));
if (!set)
reset_interrupt(ps);
- else if (erts_smp_atomic32_read_acqb(&ps->timeout) > (erts_aint32_t) msec)
+ else if (get_timeout_time(ps) > timeout_time)
set_interrupt(ps);
HARDTRACEF(("Out erts_poll_interrupt_timed"));
}
@@ -1085,14 +1117,14 @@ void erts_poll_controlv(ErtsPollSet ps,
pcev[i].events,
pcev[i].on);
}
- ERTS_POLLSET_LOCK(ps);
+ ERTS_POLLSET_UNLOCK(ps);
HARDTRACEF(("Out erts_poll_controlv"));
}
int erts_poll_wait(ErtsPollSet ps,
ErtsPollResFd pr[],
int *len,
- SysTimeval *tvp)
+ ErtsMonotonicTime timeout_time)
{
int no_fds;
DWORD timeout;
@@ -1125,7 +1157,7 @@ int erts_poll_wait(ErtsPollSet ps,
HARDDEBUGF(("Oups!"));
/* Oups, got signalled before we took the lock, can't reset */
if(!is_io_ready(ps)) {
- erl_exit(1,"Internal error: "
+ erts_exit(ERTS_ERROR_EXIT,"Internal error: "
"Inconsistent io structures in erl_poll.\n");
}
START_WAITER(ps,w);
@@ -1149,23 +1181,26 @@ int erts_poll_wait(ErtsPollSet ps,
no_fds = ERTS_POLL_MAX_RES;
#endif
- timeout = poll_wait_timeout(ps, tvp);
+ timeout = poll_wait_timeout(ps, timeout_time);
/*HARDDEBUGF(("timeout = %ld",(long) timeout));*/
if (timeout > 0 && !erts_atomic32_read_nob(&break_waiter_state)) {
HANDLE harr[2] = {ps->event_io_ready, break_happened_event};
int num_h = 2;
+ ERTS_MSACC_PUSH_STATE_M();
HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
ERTS_POLLSET_UNLOCK(ps);
#ifdef ERTS_SMP
erts_thr_progress_prepare_wait(NULL);
#endif
+ ERTS_MSACC_SET_STATE_CACHED_M(ERTS_MSACC_STATE_SLEEP);
WaitForMultipleObjects(num_h, harr, FALSE, timeout);
#ifdef ERTS_SMP
erts_thr_progress_finalize_wait(NULL);
#endif
+ ERTS_MSACC_POP_STATE_M();
ERTS_POLLSET_LOCK(ps);
HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
woke_up(ps);
@@ -1183,7 +1218,7 @@ int erts_poll_wait(ErtsPollSet ps,
ERTS_SET_BREAK_REQUESTED;
break;
case BREAK_WAITER_GOT_HALT:
- erl_exit(0,"");
+ erts_exit(0,"");
break;
default:
break;
@@ -1242,7 +1277,7 @@ int erts_poll_wait(ErtsPollSet ps,
erts_mtx_unlock(&w->mtx);
}
done:
- erts_smp_atomic32_set_nob(&ps->timeout, ERTS_AINT32_T_MAX);
+ set_timeout_time(ps, ERTS_MONOTONIC_TIME_MAX);
*len = num;
ERTS_POLLSET_UNLOCK(ps);
HARDTRACEF(("Out erts_poll_wait"));
@@ -1326,7 +1361,7 @@ ErtsPollSet erts_poll_create_pollset(void)
#ifdef ERTS_SMP
erts_smp_mtx_init(&ps->mtx, "pollset");
#endif
- erts_smp_atomic32_init_nob(&ps->timeout, ERTS_AINT32_T_MAX);
+ init_timeout_time(ps);
HARDTRACEF(("Out erts_poll_create_pollset"));
return ps;
diff --git a/erts/emulator/sys/win32/erl_win32_sys_ddll.c b/erts/emulator/sys/win32/erl_win32_sys_ddll.c
index 4c8d83ab16..274133a346 100644
--- a/erts/emulator/sys/win32/erl_win32_sys_ddll.c
+++ b/erts/emulator/sys/win32/erl_win32_sys_ddll.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2006-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -38,7 +39,7 @@
#include "erl_nif.h"
#define EXT_LEN 4
-#define FILE_EXT ".dll"
+#define FILE_EXT_WCHAR L".dll"
static DWORD tls_index = 0;
static TWinDynDriverCallbacks wddc;
@@ -51,17 +52,22 @@ void erl_sys_ddll_init(void) {
#define ERL_NIF_API_FUNC_DECL(RET,NAME,ARGS) nif_callbacks.NAME = NAME
#include "erl_nif_api_funcs.h"
#undef ERL_NIF_API_FUNC_DECL
-
+ nif_callbacks.erts_alc_test = erts_alc_test;
+
return;
}
/*
* Open a shared object
+ * Expecting 'full_name' as an UTF-8 string.
*/
-int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError* err)
+int erts_sys_ddll_open(const char *full_name, void **handle, ErtsSysDdllError* err)
{
+ HINSTANCE hinstance;
int len;
- char dlname[MAXPATHLEN + 1];
+ wchar_t* wcp;
+ Sint used;
+ int code;
if ((len = sys_strlen(full_name)) >= MAXPATHLEN - EXT_LEN) {
if (err != NULL) {
@@ -69,10 +75,26 @@ int erts_sys_ddll_open2(const char *full_name, void **handle, ErtsSysDdllError*
}
return ERL_DE_LOAD_ERROR_NAME_TO_LONG;
}
- sys_strcpy(dlname, full_name);
- sys_strcpy(dlname+len, FILE_EXT);
- return erts_sys_ddll_open_noext(dlname, handle, err);
+
+ wcp = (wchar_t*)erts_convert_filename_to_wchar((byte*)full_name, len,
+ NULL, 0,
+ ERTS_ALC_T_TMP, &used, EXT_LEN);
+ wcscpy(&wcp[used/2 - 1], FILE_EXT_WCHAR);
+
+ if ((hinstance = LoadLibraryW(wcp)) == NULL) {
+ code = ERL_DE_DYNAMIC_ERROR_OFFSET - GetLastError();
+ if (err != NULL) {
+ err->str = erts_sys_ddll_error(code);
+ }
+ }
+ else {
+ code = ERL_DE_NO_ERROR;
+ *handle = (void *) hinstance;
+ }
+ erts_free(ERTS_ALC_T_TMP, wcp);
+ return code;
}
+
int erts_sys_ddll_open_noext(char *dlname, void **handle, ErtsSysDdllError* err)
{
HINSTANCE hinstance;
diff --git a/erts/emulator/sys/win32/erl_win_dyn_driver.h b/erts/emulator/sys/win32/erl_win_dyn_driver.h
index a7c53c904d..6f28d513c2 100644
--- a/erts/emulator/sys/win32/erl_win_dyn_driver.h
+++ b/erts/emulator/sys/win32/erl_win_dyn_driver.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2003-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2003-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -80,8 +81,8 @@ WDD_TYPEDEF(int, erl_drv_output_term, (ErlDrvTermData, ErlDrvTermData*, int));
WDD_TYPEDEF(int, driver_output_term, (ErlDrvPort, ErlDrvTermData*, int));
WDD_TYPEDEF(int, erl_drv_send_term, (ErlDrvTermData, ErlDrvTermData, ErlDrvTermData*, int));
WDD_TYPEDEF(int, driver_send_term, (ErlDrvPort, ErlDrvTermData, ErlDrvTermData*, int));
+WDD_TYPEDEF(unsigned int, driver_async_port_key, (ErlDrvPort));
WDD_TYPEDEF(long, driver_async, (ErlDrvPort,unsigned int*,void (*)(void*),void*,void (*)(void*)));
-WDD_TYPEDEF(int, driver_async_cancel, (unsigned int));
WDD_TYPEDEF(int, driver_lock_driver, (ErlDrvPort));
WDD_TYPEDEF(void *, driver_dl_open, (char *));
WDD_TYPEDEF(void *, driver_dl_sym, (void *, char *));
@@ -102,6 +103,11 @@ WDD_TYPEDEF(ErlDrvSInt, driver_pdl_inc_refc, (ErlDrvPDL));
WDD_TYPEDEF(ErlDrvSInt, driver_pdl_dec_refc, (ErlDrvPDL));
WDD_TYPEDEF(void, driver_system_info, (ErlDrvSysInfo *, size_t));
WDD_TYPEDEF(int, driver_get_now, (ErlDrvNowData *));
+WDD_TYPEDEF(ErlDrvTime, erl_drv_monotonic_time, (ErlDrvTimeUnit));
+WDD_TYPEDEF(ErlDrvTime, erl_drv_time_offset, (ErlDrvTimeUnit));
+WDD_TYPEDEF(ErlDrvTime, erl_drv_convert_time_unit, (ErlDrvTime,
+ ErlDrvTimeUnit,
+ ErlDrvTimeUnit));
WDD_TYPEDEF(int, driver_monitor_process, (ErlDrvPort port,
ErlDrvTermData process,
ErlDrvMonitor *monitor));
@@ -144,8 +150,8 @@ WDD_TYPEDEF(ErlDrvTid, erl_drv_thread_self, (void));
WDD_TYPEDEF(int, erl_drv_equal_tids, (ErlDrvTid tid1, ErlDrvTid tid2));
WDD_TYPEDEF(void, erl_drv_thread_exit, (void *resp));
WDD_TYPEDEF(int, erl_drv_thread_join, (ErlDrvTid, void **respp));
-WDD_TYPEDEF(int, erl_drv_putenv, (char *key, char *value));
-WDD_TYPEDEF(int, erl_drv_getenv, (char *key, char *value, size_t *value_size));
+WDD_TYPEDEF(int, erl_drv_putenv, (const char *key, char *value));
+WDD_TYPEDEF(int, erl_drv_getenv, (const char *key, char *value, size_t *value_size));
typedef struct {
WDD_FTYPE(null_func) *null_func;
@@ -197,8 +203,8 @@ typedef struct {
WDD_FTYPE(driver_output_term) *driver_output_term;
WDD_FTYPE(erl_drv_send_term) *erl_drv_send_term;
WDD_FTYPE(driver_send_term) *driver_send_term;
+ WDD_FTYPE(driver_async_port_key) *driver_async_port_key;
WDD_FTYPE(driver_async) *driver_async;
- WDD_FTYPE(driver_async_cancel) *driver_async_cancel;
WDD_FTYPE(driver_lock_driver) *driver_lock_driver;
WDD_FTYPE(driver_dl_open) *driver_dl_open;
WDD_FTYPE(driver_dl_sym) *driver_dl_sym;
@@ -216,6 +222,9 @@ typedef struct {
WDD_FTYPE(driver_pdl_dec_refc) *driver_pdl_dec_refc;
WDD_FTYPE(driver_system_info) *driver_system_info;
WDD_FTYPE(driver_get_now) *driver_get_now;
+ WDD_FTYPE(erl_drv_monotonic_time) *erl_drv_monotonic_time;
+ WDD_FTYPE(erl_drv_time_offset) *erl_drv_time_offset;
+ WDD_FTYPE(erl_drv_convert_time_unit) *erl_drv_convert_time_unit;
WDD_FTYPE(driver_monitor_process) *driver_monitor_process;
WDD_FTYPE(driver_demonitor_process) *driver_demonitor_process;
WDD_FTYPE(driver_get_monitored_process) *driver_get_monitored_process;
@@ -308,8 +317,8 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks;
#define driver_output_term (WinDynDriverCallbacks.driver_output_term)
#define erl_drv_send_term (WinDynDriverCallbacks.erl_drv_send_term)
#define driver_send_term (WinDynDriverCallbacks.driver_send_term)
+#define driver_async_port_key (WinDynDriverCallbacks.driver_async_port_key)
#define driver_async (WinDynDriverCallbacks.driver_async)
-#define driver_async_cancel (WinDynDriverCallbacks.driver_async_cancel)
#define driver_lock_driver (WinDynDriverCallbacks.driver_lock_driver)
#define driver_dl_open (WinDynDriverCallbacks.driver_dl_open)
#define driver_dl_sym (WinDynDriverCallbacks.driver_dl_sym)
@@ -327,6 +336,9 @@ extern TWinDynDriverCallbacks WinDynDriverCallbacks;
#define driver_pdl_dec_refc (WinDynDriverCallbacks.driver_pdl_dec_refc)
#define driver_system_info (WinDynDriverCallbacks.driver_system_info)
#define driver_get_now (WinDynDriverCallbacks.driver_get_now)
+#define erl_drv_monotonic_time (WinDynDriverCallbacks.erl_drv_monotonic_time)
+#define erl_drv_time_offset (WinDynDriverCallbacks.erl_drv_time_offset)
+#define erl_drv_convert_time_unit (WinDynDriverCallbacks.erl_drv_convert_time_unit)
#define driver_monitor_process \
(WinDynDriverCallbacks.driver_monitor_process)
#define driver_demonitor_process \
@@ -443,8 +455,8 @@ do { \
((W).driver_output_term) = driver_output_term; \
((W).erl_drv_send_term) = erl_drv_send_term; \
((W).driver_send_term) = driver_send_term; \
+((W).driver_async_port_key) = driver_async_port_key; \
((W).driver_async) = driver_async; \
-((W).driver_async_cancel) = driver_async_cancel; \
((W).driver_lock_driver) = driver_lock_driver; \
((W).driver_dl_open) = driver_dl_open; \
((W).driver_dl_sym) = driver_dl_sym; \
@@ -462,6 +474,9 @@ do { \
((W).driver_pdl_dec_refc) = driver_pdl_dec_refc; \
((W).driver_system_info) = driver_system_info; \
((W).driver_get_now) = driver_get_now; \
+((W).erl_drv_monotonic_time) = erl_drv_monotonic_time; \
+((W).erl_drv_time_offset) = erl_drv_time_offset; \
+((W).erl_drv_convert_time_unit) = erl_drv_convert_time_unit; \
((W).driver_monitor_process) = driver_monitor_process; \
((W).driver_demonitor_process) = driver_demonitor_process; \
((W).driver_get_monitored_process) = driver_get_monitored_process; \
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index 5ce1a61303..04fbf23109 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2012. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -60,16 +61,18 @@
#include <windows.h>
#undef WIN32_LEAN_AND_MEAN
-/*
- * Define MAXPATHLEN in terms of MAXPATH if available.
- */
-
-#ifndef MAXPATH
-#define MAXPATH MAX_PATH
-#endif /* MAXPATH */
#ifndef MAXPATHLEN
-#define MAXPATHLEN MAXPATH
+#define MAXPATHLEN 4096
+/*
+ erts-6.0 (OTP 17.0):
+ We now accept windows paths longer than 260 (MAX_PATH) by conversion to
+ UNC path format. In order to also return long paths from the driver we
+ increased MAXPATHLEN from 260 to larger (but arbitrary) value 4096.
+ It would of course be nicer to instead dynamically allocate large enough
+ tmp buffers when efile_drv needs to return really long paths, and do that
+ for unix as well.
+ */
#endif /* MAXPATHLEN */
/*
@@ -82,7 +85,6 @@
#define NO_ERF
#define NO_ERFC
-#define NO_SYSLOG
#define NO_SYSCONF
#define NO_DAEMON
#define NO_PWD
@@ -95,6 +97,8 @@
# define ERTS_I64_LITERAL(X) X##i64
#endif
+#define ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC 1
+
/*
* Practial Windows specific macros.
*/
@@ -102,22 +106,21 @@
#define CreateAutoEvent(state) CreateEvent(NULL, FALSE, state, NULL)
#define CreateManualEvent(state) CreateEvent(NULL, TRUE, state, NULL)
+/*
+ * Min number of async threads
+ */
+#define ERTS_MIN_NO_OF_ASYNC_THREADS 0
/*
* Our own type of "FD's"
*/
+#define ERTS_SYS_FD_INVALID INVALID_HANDLE_VALUE
#define ERTS_SYS_FD_TYPE HANDLE
#define NO_FSTAT_ON_SYS_FD_TYPE 1 /* They are events, not files */
-#define HAVE_ERTS_CHECK_IO_DEBUG
-int erts_check_io_debug(void);
-
/*
* For erl_time_sup
*/
-#define HAVE_GETHRTIME
-
-#define sys_init_hrtime() /* Nothing */
#define SYS_CLK_TCK 1000
#define SYS_CLOCK_RESOLUTION 1
@@ -159,18 +162,95 @@ typedef struct {
#if defined (__GNUC__)
typedef unsigned long long Uint64;
typedef long long Sint64;
-
-typedef long long SysHrTime;
+# ifdef ULLONG_MAX
+# define ERTS_UINT64_MAX ULLONG_MAX
+# endif
+# ifdef LLONG_MAX
+# define ERTS_SINT64_MAX LLONG_MAX
+# endif
+# ifdef LLONG_MIN
+# define ERTS_SINT64_MIN LLONG_MIN
+# endif
+
+typedef long long ErtsMonotonicTime;
+typedef long long ErtsSysHrTime;
#else
typedef ULONGLONG Uint64;
typedef LONGLONG Sint64;
-typedef LONGLONG SysHrTime;
+typedef LONGLONG ErtsMonotonicTime;
+typedef LONGLONG ErtsSysHrTime;
#endif
-extern int sys_init_time(void);
+typedef ErtsMonotonicTime ErtsSystemTime;
+typedef ErtsMonotonicTime ErtsSysPerfCounter;
+
+ErtsSystemTime erts_os_system_time(void);
+
+#define ERTS_MONOTONIC_TIME_MIN ((ErtsMonotonicTime) (1ULL << 63))
+#define ERTS_MONOTONIC_TIME_MAX (~ERTS_MONOTONIC_TIME_MIN)
+
+#define ERTS_HAVE_OS_MONOTONIC_TIME_SUPPORT 1
+#define ERTS_COMPILE_TIME_MONOTONIC_TIME_UNIT 0
+
+struct erts_sys_time_read_only_data__ {
+ ErtsMonotonicTime (*os_monotonic_time)(void);
+ void (*os_times)(ErtsMonotonicTime *, ErtsSystemTime*);
+ ErtsSysHrTime (*sys_hrtime)(void);
+};
+
+typedef struct {
+ union {
+ struct erts_sys_time_read_only_data__ o;
+ char align__[(((sizeof(struct erts_sys_time_read_only_data__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } r;
+} ErtsSysTimeData__;
+
+extern ErtsSysTimeData__ erts_sys_time_data__;
+
+ERTS_GLB_INLINE ErtsMonotonicTime erts_os_monotonic_time(void);
+ERTS_GLB_INLINE void erts_os_times(ErtsMonotonicTime *,
+ ErtsSystemTime *);
+ERTS_GLB_INLINE ErtsSysHrTime erts_sys_hrtime(void);
+ERTS_GLB_INLINE ErtsSysPerfCounter erts_sys_perf_counter(void);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE ErtsMonotonicTime
+erts_os_monotonic_time(void)
+{
+ return (*erts_sys_time_data__.r.o.os_monotonic_time)();
+}
+
+ERTS_GLB_INLINE void
+erts_os_times(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ (*erts_sys_time_data__.r.o.os_times)(mtimep, stimep);
+}
+
+ERTS_GLB_INLINE ErtsSysHrTime
+erts_sys_hrtime(void)
+{
+ return (*erts_sys_time_data__.r.o.sys_hrtime)();
+}
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter(void)
+{
+ return (*erts_sys_time_data__.r.o.sys_hrtime)();
+}
+
+ERTS_GLB_INLINE ErtsSysPerfCounter
+erts_sys_perf_counter_unit(void)
+{
+ return 1000 * 1000 * 1000;
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
extern void sys_gettimeofday(SysTimeval *tv);
-extern SysHrTime sys_gethrtime(void);
extern clock_t sys_times(SysTimes *buffer);
extern char *win_build_environment(char *);
@@ -202,6 +282,8 @@ extern volatile int erl_fp_exception;
int _finite(double x);
#endif
+#define erts_isfinite _finite
+
/*#define NO_FPE_SIGNALS*/
#define erts_get_current_fp_exception() NULL
#define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0)
@@ -231,4 +313,16 @@ typedef long ssize_t;
int init_async(int);
int exit_async(void);
#endif
+
+#define ERTS_HAVE_TRY_CATCH 1
+
+#define ERTS_SYS_TRY_CATCH(EXPR,CATCH) \
+ __try { \
+ EXPR; \
+ } \
+ __except(GetExceptionCode() == EXCEPTION_ACCESS_VIOLATION ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) \
+ { \
+ CATCH; \
+ }
+
#endif
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 922967c698..cf821b05cb 100755..100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1996-2013. All Rights Reserved.
+ * Copyright Ericsson AB 1996-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -32,12 +33,12 @@
#include "erl_threads.h"
#include "../../drivers/win32/win_con.h"
#include "erl_cpu_topology.h"
-
+#include <malloc.h>
void erts_sys_init_float(void);
void erl_start(int, char**);
-void erl_exit(int n, char*, ...);
+void erts_exit(int n, char*, ...);
void erl_error(char*, va_list);
void erl_crash_dump(char*, int, char*, ...);
@@ -69,17 +70,14 @@ static void async_read_file(struct async_io* aio, LPVOID buf, DWORD numToRead);
static int async_write_file(struct async_io* aio, LPVOID buf, DWORD numToWrite);
static int get_overlapped_result(struct async_io* aio,
LPDWORD pBytesRead, BOOL wait);
-static BOOL create_child_process(char *, HANDLE, HANDLE,
+static BOOL create_child_process(wchar_t *, HANDLE, HANDLE,
HANDLE, LPHANDLE, LPDWORD, BOOL,
- LPVOID, LPTSTR, unsigned,
- char **, int *);
+ LPVOID, wchar_t*, unsigned,
+ wchar_t **, int *);
static int create_pipe(LPHANDLE, LPHANDLE, BOOL, BOOL);
-static int application_type(const char* originalName, char fullPath[MAX_PATH],
+static int application_type(const wchar_t* originalName, wchar_t fullPath[MAX_PATH],
BOOL search_in_path, BOOL handle_quotes,
int *error_return);
-static int application_type_w(const WCHAR *originalName, WCHAR fullPath[MAX_PATH],
- BOOL search_in_path, BOOL handle_quotes,
- int *error_return);
HANDLE erts_service_event;
@@ -202,7 +200,7 @@ erts_sys_misc_mem_sz(void)
*/
void sys_tty_reset(int exit_code)
{
- if (exit_code > 0)
+ if (exit_code == ERTS_ERROR_EXIT)
ConWaitForExit();
else
ConNormalExit();
@@ -250,6 +248,27 @@ void erl_sys_args(int* argc, char** argv)
#endif
}
+/*
+ * Function returns 1 if we can read from all values in between
+ * start and stop.
+ */
+int
+erts_sys_is_area_readable(char *start, char *stop) {
+ volatile char tmp;
+ __try
+ {
+ while(start < stop) {
+ tmp = *start;
+ start++;
+ }
+ }
+ __except(EXCEPTION_EXECUTE_HANDLER)
+ {
+ return 0;
+ }
+ return 1;
+}
+
int erts_sys_prepare_crash_dump(int secs)
{
Port *heart_port;
@@ -1173,7 +1192,7 @@ spawn_init(void)
}
static ErlDrvData
-spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts)
+spawn_start(ErlDrvPort port_num, char* utf8_name, SysDriverOpts* opts)
{
HANDLE hToChild = INVALID_HANDLE_VALUE; /* Write handle to child. */
HANDLE hFromChild = INVALID_HANDLE_VALUE; /* Read handle from child. */
@@ -1189,7 +1208,9 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts)
SECURITY_ATTRIBUTES sa = {sizeof(SECURITY_ATTRIBUTES), NULL, TRUE};
char* envir = opts->envir;
int errno_return = -1;
-
+ wchar_t *name;
+ int len;
+
if (opts->read_write & DO_READ)
neededSelects++;
if (opts->read_write & DO_WRITE)
@@ -1247,26 +1268,40 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts)
* Spawn the port program.
*/
- DEBUGF(("Spawning \"%s\"\n", name));
+ if ((len = MultiByteToWideChar(CP_UTF8, 0, utf8_name, -1, NULL, 0)) > 0) {
+ name = erts_alloc(ERTS_ALC_T_TMP, len*sizeof(wchar_t));
+ MultiByteToWideChar(CP_UTF8, 0, utf8_name, -1, name, len);
+ } else { /* Not valid utf-8, just convert byte to wchar */
+ int i;
+ len = strlen(utf8_name);
+ name = erts_alloc(ERTS_ALC_T_TMP, (1+len)*sizeof(wchar_t));
+ for(i=0; i<len; i++) {
+ name[i] = (wchar_t) utf8_name[i];
+ }
+ name[i] = L'\0';
+ }
+ DEBUGF(("Spawning \"%S\"\n", name));
envir = win_build_environment(envir); /* Always a unicode environment */
- ok = create_child_process(name,
- hChildStdin,
- hChildStdout,
- hChildStderr,
- &dp->port_pid,
- &pid,
- opts->hide_window,
- (LPVOID) envir,
- (LPTSTR) opts->wd,
- opts->spawn_type,
- opts->argv,
- &errno_return);
+ ok = create_child_process(name,
+ hChildStdin,
+ hChildStdout,
+ hChildStderr,
+ &dp->port_pid,
+ &pid,
+ opts->hide_window,
+ (LPVOID) envir,
+ (wchar_t *) opts->wd,
+ opts->spawn_type,
+ (wchar_t **) opts->argv,
+ &errno_return);
CloseHandle(hChildStdin);
CloseHandle(hChildStdout);
if (close_child_stderr && hChildStderr != INVALID_HANDLE_VALUE &&
hChildStderr != 0) {
CloseHandle(hChildStderr);
}
+ erts_free(ERTS_ALC_T_TMP, name);
+
if (envir != NULL) {
erts_free(ERTS_ALC_T_ENVIRONMENT, envir);
}
@@ -1299,10 +1334,8 @@ spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* opts)
retval = set_driver_data(dp, hFromChild, hToChild, opts->read_write,
opts->exit_status);
if (retval != ERL_DRV_ERROR_GENERAL && retval != ERL_DRV_ERROR_ERRNO) {
- Port *prt = erts_drvport2port(port_num);
- /* We assume that this cannot generate a negative number */
- ASSERT(prt != ERTS_INVALID_ERL_DRV_PORT);
- prt->os_pid = (SWord) pid;
+ /* We assume that this cannot generate a negative number */
+ erl_drv_set_os_pid(port_num, pid);
}
}
@@ -1344,9 +1377,9 @@ create_file_thread(AsyncIo* aio, int mode)
* Example: input = "\"Program Files\"\\erl arg1 arg2"
* gives 19 as result.
* The length returned is equivalent with length(argv[0]) if the
- * comman line should have been prepared by _setargv for the main function
+ * command line should have been prepared by _setargv for the main function
*/
-int parse_command(char* cmd){
+int parse_command(wchar_t* cmd){
#define NORMAL 2
#define STRING 1
#define STOP 0
@@ -1354,17 +1387,17 @@ int parse_command(char* cmd){
int state = NORMAL;
while (cmd[i]) {
switch (cmd[i]) {
- case '"':
+ case L'"':
if (state == NORMAL)
state = STRING;
else
state = NORMAL;
break;
- case '\\':
- if ((state == STRING) && (cmd[i+1]=='"'))
+ case L'\\':
+ if ((state == STRING) && (cmd[i+1]==L'"'))
i++;
break;
- case ' ':
+ case L' ':
if (state == NORMAL)
state = STOP;
break;
@@ -1379,39 +1412,46 @@ int parse_command(char* cmd){
return i;
}
-static BOOL need_quotes(WCHAR *str)
-{
- int in_quote = 0;
- int backslashed = 0;
- int naked_space = 0;
- while (*str != L'\0') {
- switch (*str) {
- case L'\\' :
- backslashed = !backslashed;
- break;
- case L'"':
- if (backslashed) {
- backslashed=0;
- } else {
- in_quote = !in_quote;
- }
- break;
- case L' ':
- backslashed = 0;
- if (!(backslashed || in_quote)) {
- naked_space++;
- }
- break;
- default:
- backslashed = 0;
+/*
+ * Translating of command line arguments to correct format. In the examples
+ * below the '' are not part of the actual string.
+ * 'io:format("hello").' -> 'io:format(\"hello\").'
+ * 'io:format("is anybody in there?").' -> '"io:format(\"is anybody in there?\")."'
+ * 'Just nod if you can hear me.' -> '"Just nod if you can hear me."'
+ * 'Is there ""anyone at home?' -> '"Is there \"\"anyone at home?"'
+ * 'Relax."' -> 'Relax.\"'
+ *
+ * If new == NULL we just calculate the length.
+ *
+ * The reason for having to quote all of the is becasue CreateProcessW removes
+ * one level of escaping since it takes a single long command line rather
+ * than the argument chunks that unix uses.
+ */
+static int escape_and_quote(wchar_t *str, wchar_t *new, BOOL *quoted) {
+ int i, j = 0;
+ if (new == NULL)
+ *quoted = FALSE;
+ else if (*quoted)
+ new[j++] = L'"';
+ for ( i = 0; str[i] != L'\0'; i++,j++) {
+ if (str[i] == L' ' && new == NULL && *quoted == FALSE) {
+ *quoted = TRUE;
+ j++;
+ }
+ /* check if we have to escape quotes */
+ if (str[i] == L'"') {
+ if (new) new[j] = L'\\';
+ j++;
}
- ++str;
+ if (new) new[j] = str[i];
}
- return (naked_space > 0);
+ if (*quoted) {
+ if (new) new[j] = L'"';
+ j++;
+ }
+ return j;
}
-
-
/*
*----------------------------------------------------------------------
@@ -1443,7 +1483,7 @@ static BOOL need_quotes(WCHAR *str)
static BOOL
create_child_process
(
- char *origcmd, /* Command line for child process (including
+ wchar_t *origcmd, /* Command line for child process (including
* name of executable). Or whole executable if st is
* ERTS_SPAWN_EXECUTABLE
*/
@@ -1454,57 +1494,53 @@ create_child_process
LPDWORD pdwID, /* Pointer to variable to received Process ID */
BOOL hide, /* Hide the window unconditionally. */
LPVOID env, /* Environment for the child */
- LPTSTR wd, /* Working dir for the child */
+ wchar_t *wd, /* Working dir for the child */
unsigned st, /* Flags for spawn, tells us how to interpret origcmd */
- char **argv, /* Argument vector if given. */
+ wchar_t **argv, /* Argument vector if given. */
int *errno_return /* Place to put an errno in in case of failure */
)
-{
+{
PROCESS_INFORMATION piProcInfo = {0};
BOOL ok = FALSE;
int applType;
/* Not to be changed for different types of executables */
int staticCreateFlags = GetPriorityClass(GetCurrentProcess());
int createFlags = DETACHED_PROCESS;
- char *newcmdline = NULL;
+ wchar_t *newcmdline = NULL;
int cmdlength;
- char* thecommand;
- LPTSTR appname = NULL;
+ wchar_t* thecommand;
+ wchar_t* appname = NULL;
HANDLE hProcess = GetCurrentProcess();
-
- *errno_return = -1;
+ STARTUPINFOW siStartInfo = {0};
+ wchar_t execPath[MAX_PATH];
+ *errno_return = -1;
+ siStartInfo.cb = sizeof(STARTUPINFOW);
+ siStartInfo.dwFlags = STARTF_USESTDHANDLES;
+ siStartInfo.hStdInput = hStdin;
+ siStartInfo.hStdOutput = hStdout;
+ siStartInfo.hStdError = hStderr;
if (st != ERTS_SPAWN_EXECUTABLE) {
- STARTUPINFO siStartInfo = {0};
- char execPath[MAX_PATH];
-
- siStartInfo.cb = sizeof(STARTUPINFO);
- siStartInfo.dwFlags = STARTF_USESTDHANDLES;
- siStartInfo.hStdInput = hStdin;
- siStartInfo.hStdOutput = hStdout;
- siStartInfo.hStdError = hStderr;
-
/*
* Parse out the program name from the command line (it can be quoted and
* contain spaces).
*/
- newcmdline = erts_alloc(ERTS_ALC_T_TMP, 2048);
cmdlength = parse_command(origcmd);
- thecommand = (char *) erts_alloc(ERTS_ALC_T_TMP, cmdlength+1);
- strncpy(thecommand, origcmd, cmdlength);
- thecommand[cmdlength] = '\0';
- DEBUGF(("spawn command: %s\n", thecommand));
-
- applType = application_type(thecommand, execPath, TRUE,
- TRUE, errno_return);
- DEBUGF(("application_type returned for (%s) is %d\n", thecommand, applType));
+ newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (MAX_PATH+wcslen(origcmd)-cmdlength)*sizeof(wchar_t));
+ thecommand = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (cmdlength+1)*sizeof(wchar_t));
+ wcsncpy(thecommand, origcmd, cmdlength);
+ thecommand[cmdlength] = L'\0';
+ DEBUGF(("spawn command: %S\n", thecommand));
+
+ applType = application_type(thecommand, execPath, TRUE, TRUE, errno_return);
+ DEBUGF(("application_type returned for (%S) is %d\n", thecommand, applType));
erts_free(ERTS_ALC_T_TMP, (void *) thecommand);
if (applType == APPL_NONE) {
erts_free(ERTS_ALC_T_TMP,newcmdline);
return FALSE;
}
- newcmdline[0] = '\0';
+ newcmdline[0] = L'\0';
if (applType == APPL_DOS) {
/*
@@ -1513,11 +1549,11 @@ create_child_process
* a normal process inside of a hidden console application,
* and then run that hidden console as a detached process.
*/
-
+
siStartInfo.wShowWindow = SW_HIDE;
siStartInfo.dwFlags |= STARTF_USESHOWWINDOW;
createFlags = CREATE_NEW_CONSOLE;
- strcat(newcmdline, "cmd.exe /c ");
+ wcscat(newcmdline, L"cmd.exe /c ");
} else if (hide) {
DEBUGF(("hiding window\n"));
siStartInfo.wShowWindow = SW_HIDE;
@@ -1525,35 +1561,25 @@ create_child_process
createFlags = 0;
}
- strcat(newcmdline, execPath);
- strcat(newcmdline, origcmd+cmdlength);
- DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags));
- ok = CreateProcessA(appname,
- newcmdline,
- NULL,
- NULL,
- TRUE,
- createFlags | staticCreateFlags |
- CREATE_UNICODE_ENVIRONMENT,
- env,
- wd,
- &siStartInfo,
+ wcscat(newcmdline, execPath);
+ wcscat(newcmdline, origcmd+cmdlength);
+ DEBUGF(("Creating child process: %S, createFlags = %d\n", newcmdline, createFlags));
+ ok = CreateProcessW(appname,
+ newcmdline,
+ NULL,
+ NULL,
+ TRUE,
+ createFlags | staticCreateFlags |
+ CREATE_UNICODE_ENVIRONMENT,
+ env,
+ wd,
+ &siStartInfo,
&piProcInfo);
} else { /* ERTS_SPAWN_EXECUTABLE, filename and args are in unicode ({utf16,little}) */
int run_cmd = 0;
- STARTUPINFOW siStartInfo = {0};
- WCHAR execPath[MAX_PATH];
-
- siStartInfo.cb = sizeof(STARTUPINFOW);
- siStartInfo.dwFlags = STARTF_USESTDHANDLES;
- siStartInfo.hStdInput = hStdin;
- siStartInfo.hStdOutput = hStdout;
- siStartInfo.hStdError = hStderr;
-
- applType = application_type_w((WCHAR *) origcmd, execPath, FALSE, FALSE,
- errno_return);
+ applType = application_type(origcmd, execPath, FALSE, FALSE, errno_return);
if (applType == APPL_NONE) {
return FALSE;
}
@@ -1573,100 +1599,87 @@ create_child_process
createFlags = 0;
}
if (run_cmd) {
- WCHAR cmdPath[MAX_PATH];
+ wchar_t cmdPath[MAX_PATH];
int cmdType;
- cmdType = application_type_w(L"cmd.exe", cmdPath, TRUE, FALSE, errno_return);
+ cmdType = application_type(L"cmd.exe", cmdPath, TRUE, FALSE, errno_return);
if (cmdType == APPL_NONE || cmdType == APPL_DOS) {
return FALSE;
}
- appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(cmdPath)+1)*sizeof(WCHAR));
- wcscpy((WCHAR *) appname,cmdPath);
+ appname = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(cmdPath)+1)*sizeof(wchar_t));
+ wcscpy(appname,cmdPath);
} else {
- appname = (char *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(execPath)+1)*sizeof(WCHAR));
- wcscpy((WCHAR *) appname, execPath);
+ appname = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, (wcslen(execPath)+1)*sizeof(wchar_t));
+ wcscpy(appname, execPath);
}
if (argv == NULL) {
- BOOL orig_need_q = need_quotes(execPath);
- WCHAR *ptr;
- int ocl = wcslen(execPath);
+ BOOL orig_need_q;
+ wchar_t *ptr;
+ int ocl = escape_and_quote(execPath, NULL, &orig_need_q);
if (run_cmd) {
- newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP,
- (ocl + ((orig_need_q) ? 3 : 1)
- + 11)*sizeof(WCHAR));
- memcpy(newcmdline,L"cmd.exe /c ",11*sizeof(WCHAR));
- ptr = (WCHAR *) (newcmdline + (11*sizeof(WCHAR)));
+ newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP,
+ (ocl + 1 + 11)*sizeof(wchar_t));
+ memcpy(newcmdline,L"cmd.exe /c ",11*sizeof(wchar_t));
+ ptr = newcmdline + 11;
} else {
- newcmdline = (char *) erts_alloc(ERTS_ALC_T_TMP,
- (ocl + ((orig_need_q) ? 3 : 1))*sizeof(WCHAR));
- ptr = (WCHAR *) newcmdline;
- }
- if (orig_need_q) {
- *ptr++ = L'"';
+ newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP,
+ (ocl + 1)*sizeof(wchar_t));
+ ptr = (wchar_t *) newcmdline;
}
- memcpy(ptr,execPath,ocl*sizeof(WCHAR));
- ptr += ocl;
- if (orig_need_q) {
- *ptr++ = L'"';
- }
- *ptr = L'\0';
+ ptr += escape_and_quote(execPath, ptr, &orig_need_q);
+ ptr[0] = L'\0';
} else {
- int sum = 1; /* '\0' */
- WCHAR **ar = (WCHAR **) argv;
- WCHAR *n;
- char *save_arg0 = NULL;
- if (argv[0] == erts_default_arg0 || run_cmd) {
+ int sum = 0;
+ BOOL *qte = NULL;
+ wchar_t **ar = argv;
+ wchar_t *n;
+ wchar_t *save_arg0 = NULL;
+ if (argv[0] == (wchar_t *) erts_default_arg0 || run_cmd) {
save_arg0 = argv[0];
- argv[0] = (char *) execPath;
+ argv[0] = execPath;
}
if (run_cmd) {
sum += 11; /* cmd.exe /c */
}
+
+ while (*ar != NULL) ar++;
+ qte = erts_alloc(ERTS_ALC_T_TMP, (ar - argv)*sizeof(BOOL));
+
+ ar = argv;
while (*ar != NULL) {
- sum += wcslen(*ar);
- if (need_quotes(*ar)) {
- sum += 2; /* quotes */
- }
+ sum += escape_and_quote(*ar,NULL,qte+(ar - argv));
sum++; /* space */
++ar;
}
- ar = (WCHAR **) argv;
- newcmdline = erts_alloc(ERTS_ALC_T_TMP, sum*sizeof(WCHAR));
- n = (WCHAR *) newcmdline;
+ ar = argv;
+ newcmdline = (wchar_t *) erts_alloc(ERTS_ALC_T_TMP, sum*sizeof(wchar_t));
+ n = newcmdline;
if (run_cmd) {
- memcpy(n,L"cmd.exe /c ",11*sizeof(WCHAR));
+ memcpy(n,L"cmd.exe /c ",11*sizeof(wchar_t));
n += 11;
}
while (*ar != NULL) {
- int q = need_quotes(*ar);
- sum = wcslen(*ar);
- if (q) {
- *n++ = L'"';
- }
- memcpy(n,*ar,sum*sizeof(WCHAR));
- n += sum;
- if (q) {
- *n++ = L'"';
- }
+ n += escape_and_quote(*ar,n,qte+(ar - argv));
*n++ = L' ';
++ar;
}
- *(n-1) = L'\0';
+ *(n-1) = L'\0'; /* overwrite last space with '\0' */
if (save_arg0 != NULL) {
argv[0] = save_arg0;
}
+ erts_free(ERTS_ALC_T_TMP, qte);
}
- DEBUGF(("Creating child process: %s, createFlags = %d\n", newcmdline, createFlags));
- ok = CreateProcessW((WCHAR *) appname,
- (WCHAR *) newcmdline,
- NULL,
- NULL,
- TRUE,
- createFlags | staticCreateFlags |
- CREATE_UNICODE_ENVIRONMENT,
- env,
- (WCHAR *) wd,
- &siStartInfo,
+ DEBUGF((stderr,"Creating child process: %S, createFlags = %d\n", newcmdline, createFlags));
+ ok = CreateProcessW((wchar_t *) appname,
+ (wchar_t *) newcmdline,
+ NULL,
+ NULL,
+ TRUE,
+ createFlags | staticCreateFlags |
+ CREATE_UNICODE_ENVIRONMENT,
+ env,
+ wd,
+ &siStartInfo,
&piProcInfo);
} /* end SPAWN_EXECUTABLE */
@@ -1775,180 +1788,23 @@ static int create_pipe(HANDLE *phRead, HANDLE *phWrite, BOOL inheritRead, BOOL o
return TRUE;
}
-
-
-
-static int application_type
-(
- const char *originalName, /* Name of the application to find. */
- char fullPath[MAX_PATH], /* Filled with complete path to
- * application. */
- BOOL search_in_path, /* If we should search the system wide path */
- BOOL handle_quotes, /* If we should handle quotes around executable */
- int *error_return /* A place to put an error code */
- )
-{
- int applType, i;
- HANDLE hFile;
- char *ext, *rest;
- char buf[2];
- DWORD read;
- IMAGE_DOS_HEADER header;
- static char extensions[][5] = {"", ".com", ".exe", ".bat"};
- int is_quoted;
- int len;
-
- /* Look for the program as an external program. First try the name
- * as it is, then try adding .com, .exe, and .bat, in that order, to
- * the name, looking for an executable.
- * NOTE! that we does not support execution of .com programs on Windows NT
- *
- *
- * Using the raw SearchPath() procedure doesn't do quite what is
- * necessary. If the name of the executable already contains a '.'
- * character, it will not try appending the specified extension when
- * searching (in other words, SearchPath will not find the program
- * "a.b.exe" if the arguments specified "a.b" and ".exe").
- * So, first look for the file as it is named. Then manually append
- * the extensions, looking for a match. (')
- */
-
- len = strlen(originalName);
- is_quoted = handle_quotes && len > 0 && originalName[0] == '"' &&
- originalName[len-1] == '"';
-
- applType = APPL_NONE;
- *error_return = ENOENT;
- for (i = 0; i < (int) (sizeof(extensions) / sizeof(extensions[0])); i++) {
- if(is_quoted) {
- lstrcpyn(fullPath, originalName+1, MAX_PATH - 7);
- len = strlen(fullPath);
- if(len > 0) {
- fullPath[len-1] = '\0';
- }
- } else {
- lstrcpyn(fullPath, originalName, MAX_PATH - 5);
- }
- lstrcat(fullPath, extensions[i]);
- SearchPath((search_in_path) ? NULL : ".", fullPath, NULL, MAX_PATH, fullPath, &rest);
-
- /*
- * Ignore matches on directories or data files, return if identified
- * a known type.
- */
-
- if (GetFileAttributes(fullPath) & FILE_ATTRIBUTE_DIRECTORY) {
- continue;
- }
-
- ext = strrchr(fullPath, '.');
- if ((ext != NULL) && (strcmpi(ext, ".bat") == 0)) {
- *error_return = EACCES;
- applType = APPL_DOS;
- break;
- }
-
- hFile = CreateFile(fullPath, GENERIC_READ, FILE_SHARE_READ, NULL,
- OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
- if (hFile == INVALID_HANDLE_VALUE) {
- continue;
- }
-
- *error_return = EACCES; /* If considered an error,
- it's an access error */
- header.e_magic = 0;
- ReadFile(hFile, (void *) &header, sizeof(header), &read, NULL);
- if (header.e_magic != IMAGE_DOS_SIGNATURE) {
- /*
- * Doesn't have the magic number for relocatable executables. If
- * filename ends with .com, assume it's a DOS application anyhow.
- * Note that we didn't make this assumption at first, because some
- * supposed .com files are really 32-bit executables with all the
- * magic numbers and everything.
- */
-
- CloseHandle(hFile);
- if ((ext != NULL) && (strcmpi(ext, ".com") == 0)) {
- applType = APPL_DOS;
- break;
- }
- continue;
- }
- if (header.e_lfarlc != sizeof(header)) {
- /*
- * All Windows 3.X and Win32 and some DOS programs have this value
- * set here. If it doesn't, assume that since it already had the
- * other magic number it was a DOS application.
- */
-
- CloseHandle(hFile);
- applType = APPL_DOS;
- break;
- }
-
- /*
- * The DWORD at header.e_lfanew points to yet another magic number.
- */
-
- buf[0] = '\0';
- SetFilePointer(hFile, header.e_lfanew, NULL, FILE_BEGIN);
- ReadFile(hFile, (void *) buf, 2, &read, NULL);
- CloseHandle(hFile);
-
- if ((buf[0] == 'L') && (buf[1] == 'E')) {
- applType = APPL_DOS;
- } else if ((buf[0] == 'N') && (buf[1] == 'E')) {
- applType = APPL_WIN3X;
- } else if ((buf[0] == 'P') && (buf[1] == 'E')) {
- applType = APPL_WIN32;
- } else {
- continue;
- }
- break;
- }
-
- if (applType == APPL_NONE) {
- return APPL_NONE;
- }
-
- if ((applType == APPL_DOS) || (applType == APPL_WIN3X)) {
- /*
- * Replace long path name of executable with short path name for
- * 16-bit applications. Otherwise the application may not be able
- * to correctly parse its own command line to separate off the
- * application name from the arguments.
- */
-
- GetShortPathName(fullPath, fullPath, MAX_PATH);
- }
- if (is_quoted) {
- /* restore quotes on quoted program name */
- len = strlen(fullPath);
- memmove(fullPath+1,fullPath,len);
- fullPath[0]='"';
- fullPath[len+1]='"';
- fullPath[len+2]='\0';
- }
- return applType;
-}
-
-static int application_type_w (const WCHAR *originalName, /* Name of the application to find. */
- WCHAR wfullpath[MAX_PATH],/* Filled with complete path to
+static int application_type (const wchar_t *originalName, /* Name of the application to find. */
+ wchar_t wfullpath[MAX_PATH],/* Filled with complete path to
* application. */
- BOOL search_in_path, /* If we should search the system wide path */
- BOOL handle_quotes, /* If we should handle quotes around executable */
- int *error_return) /* A place to put an error code */
+ BOOL search_in_path, /* If we should search the system wide path */
+ BOOL handle_quotes, /* If we should handle quotes around executable */
+ int *error_return) /* A place to put an error code */
{
int applType, i;
HANDLE hFile;
- WCHAR *ext, *rest;
+ wchar_t *ext, *rest;
char buf[2];
DWORD read;
IMAGE_DOS_HEADER header;
- static WCHAR extensions[][5] = {L"", L".com", L".exe", L".bat"};
+ static wchar_t extensions[][5] = {L"", L".com", L".exe", L".bat"};
int is_quoted;
int len;
- WCHAR xfullpath[MAX_PATH];
+ wchar_t xfullpath[MAX_PATH];
len = wcslen(originalName);
is_quoted = handle_quotes && len > 0 && originalName[0] == L'"' &&
@@ -2063,7 +1919,7 @@ static int application_type_w (const WCHAR *originalName, /* Name of the applica
if (is_quoted) {
/* restore quotes on quoted program name */
len = wcslen(wfullpath);
- memmove(wfullpath+1,wfullpath,len*sizeof(WCHAR));
+ memmove(wfullpath+1,wfullpath,len*sizeof(wchar_t));
wfullpath[0]=L'"';
wfullpath[len+1]=L'"';
wfullpath[len+2]=L'\0';
@@ -2348,7 +2204,7 @@ static void fd_stop(ErlDrvData data)
ASSERT(dp->out.flushEvent);
SetEvent(dp->out.flushEvent);
} while (WaitForSingleObject(dp->out.flushReplyEvent, 10) == WAIT_TIMEOUT
- || !(dp->out.flags & DF_THREAD_FLUSHED));
+ && !(dp->out.flags & DF_THREAD_FLUSHED));
}
}
@@ -2912,6 +2768,30 @@ void erts_sys_free(ErtsAlcType_t t, void *x, void *p)
free(p);
}
+void *erts_sys_aligned_alloc(UWord alignment, UWord size)
+{
+ void *ptr;
+ ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */
+ ptr = _aligned_malloc((size_t) size, (size_t) alignment);
+ ASSERT(!ptr || (((UWord) ptr) & (alignment - 1)) == 0);
+ return ptr;
+}
+
+void erts_sys_aligned_free(UWord alignment, void *ptr)
+{
+ ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */
+ _aligned_free(ptr);
+}
+
+void *erts_sys_aligned_realloc(UWord alignment, void *ptr, UWord size, UWord old_size)
+{
+ void *new_ptr;
+ ASSERT(alignment && (alignment & (alignment-1)) == 0); /* power of 2 */
+ new_ptr = _aligned_realloc(ptr, (size_t) size, (size_t) alignment);
+ ASSERT(!new_ptr || (((UWord) new_ptr) & (alignment - 1)) == 0);
+ return new_ptr;
+}
+
static Preload* preloaded = NULL;
static unsigned* res_name = NULL;
static int num_preloaded = 0;
@@ -3199,8 +3079,10 @@ erl_bin_write(buf, sz, max)
}
}
+#endif /* DEBUG */
+
void
-erl_assert_error(char* expr, char* file, int line)
+erl_assert_error(const char* expr, const char* func, const char* file, int line)
{
char message[1024];
@@ -3214,7 +3096,6 @@ erl_assert_error(char* expr, char* file, int line)
DebugBreak();
}
-#endif /* DEBUG */
static void
check_supported_os_version(void)
@@ -3228,13 +3109,13 @@ check_supported_os_version(void)
|| int_os_version.dwMajorVersion < major
|| (int_os_version.dwMajorVersion == major
&& int_os_version.dwMinorVersion < minor))
- erl_exit(-1,
+ erts_exit(1,
"Windows version not supported "
"(min required: winnt %d.%d)\n",
major, minor);
}
#else
- erl_exit(-1,
+ erts_exit(1,
"Windows version not supported "
"(min required: win %d.%d)\n",
nt_major, nt_minor);
@@ -3291,25 +3172,31 @@ thr_create_prepare_child(void *vtcdp)
void
erts_sys_pre_init(void)
{
+#ifdef USE_THREADS
+ erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
+#endif
int_os_version.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
GetVersionEx(&int_os_version);
check_supported_os_version();
+
#ifdef USE_THREADS
- {
- erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
+ eid.thread_create_child_func = thr_create_prepare_child;
+ /* Before creation in parent */
+ eid.thread_create_prepare_func = thr_create_prepare;
+ /* After creation in parent */
+ eid.thread_create_parent_func = thr_create_cleanup;
- eid.thread_create_child_func = thr_create_prepare_child;
- /* Before creation in parent */
- eid.thread_create_prepare_func = thr_create_prepare;
- /* After creation in parent */
- eid.thread_create_parent_func = thr_create_cleanup,
+ erts_thr_init(&eid);
+#endif
+
+ erts_init_sys_time_sup();
- erts_thr_init(&eid);
+#ifdef USE_THREADS
#ifdef ERTS_ENABLE_LOCK_COUNT
- erts_lcnt_init();
+ erts_lcnt_init();
#endif
- }
#endif
+
erts_smp_atomic_init_nob(&sys_misc_mem_sz, 0);
}
@@ -3331,7 +3218,7 @@ void erl_sys_init(void)
noinherit_std_handle(STD_ERROR_HANDLE);
#ifdef ERTS_SMP
- erts_smp_tsd_key_create(&win32_errstr_key);
+ erts_smp_tsd_key_create(&win32_errstr_key,"win32_errstr_key");
InitializeCriticalSection(&htbc_lock);
#endif
erts_smp_atomic_init_nob(&pipe_creation_counter,0);
@@ -3385,6 +3272,12 @@ void erl_sys_init(void)
}
void
+erl_sys_late_init(void)
+{
+ /* do nothing */
+}
+
+void
erts_sys_schedule_interrupt(int set)
{
erts_check_io_interrupt(set);
@@ -3392,9 +3285,9 @@ erts_sys_schedule_interrupt(int set)
#ifdef ERTS_SMP
void
-erts_sys_schedule_interrupt_timed(int set, erts_short_time_t msec)
+erts_sys_schedule_interrupt_timed(int set, ErtsMonotonicTime timeout_time)
{
- erts_check_io_interrupt_timed(set, msec);
+ erts_check_io_interrupt_timed(set, timeout_time);
}
#endif
diff --git a/erts/emulator/sys/win32/sys_env.c b/erts/emulator/sys/win32/sys_env.c
index 754f4c6e4c..21ef71ad9a 100644
--- a/erts/emulator/sys/win32/sys_env.c
+++ b/erts/emulator/sys/win32/sys_env.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2002-2012. All Rights Reserved.
+ * Copyright Ericsson AB 2002-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
*
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -141,6 +142,24 @@ void fini_getenv_state(GETENV_STATE *state)
erts_smp_rwmtx_runlock(&environ_rwmtx);
}
+int erts_sys_unsetenv(char *key)
+{
+ int res = 0;
+ WCHAR *wkey = (WCHAR *) key;
+
+ SetLastError(0);
+ erts_smp_rwmtx_rlock(&environ_rwmtx);
+ GetEnvironmentVariableW(wkey,
+ NULL,
+ 0);
+ if (GetLastError() != ERROR_ENVVAR_NOT_FOUND) {
+ res = (SetEnvironmentVariableW(wkey,
+ NULL) ? 0 : 1);
+ }
+ erts_smp_rwmtx_runlock(&environ_rwmtx);
+ return res;
+}
+
char*
win_build_environment(char* new_env)
{
diff --git a/erts/emulator/sys/win32/sys_float.c b/erts/emulator/sys/win32/sys_float.c
index 0e19746cf5..2b2d6ab7d3 100644
--- a/erts/emulator/sys/win32/sys_float.c
+++ b/erts/emulator/sys/win32/sys_float.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2013. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -138,8 +139,7 @@ sys_double_to_chars_ext(double fp, char *buffer, size_t buffer_size, size_t deci
int
matherr(struct _exception *exc)
{
- erl_fp_exception = 1;
- DEBUGF(("FP exception (matherr) (0x%x) (%d)\n", exc->type, erl_fp_exception));
+ DEBUGF(("FP exception (matherr) (0x%x)\n", exc->type));
return 1;
}
diff --git a/erts/emulator/sys/win32/sys_interrupt.c b/erts/emulator/sys/win32/sys_interrupt.c
index 36c43e93da..df838960eb 100644
--- a/erts/emulator/sys/win32/sys_interrupt.c
+++ b/erts/emulator/sys/win32/sys_interrupt.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2012. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -80,7 +81,7 @@ BOOL WINAPI ctrl_handler_ignore_break(DWORD dwCtrlType)
return TRUE;
/* else pour through... */
case CTRL_CLOSE_EVENT:
- erl_exit(0, "");
+ erts_exit(0, "");
break;
}
return TRUE;
@@ -105,7 +106,7 @@ BOOL WINAPI ctrl_handler_replace_intr(DWORD dwCtrlType)
/* else pour through... */
case CTRL_CLOSE_EVENT:
case CTRL_SHUTDOWN_EVENT:
- erl_exit(0, "");
+ erts_exit(0, "");
break;
}
return TRUE;
@@ -132,7 +133,7 @@ BOOL WINAPI ctrl_handler(DWORD dwCtrlType)
return TRUE;
/* else pour through... */
case CTRL_CLOSE_EVENT:
- erl_exit(0, "");
+ erts_exit(0, "");
break;
}
return TRUE;
diff --git a/erts/emulator/sys/win32/sys_time.c b/erts/emulator/sys/win32/sys_time.c
index b84c8f85ce..e8c67b3928 100644
--- a/erts/emulator/sys/win32/sys_time.c
+++ b/erts/emulator/sys/win32/sys_time.c
@@ -1,18 +1,19 @@
/*
* %CopyrightBegin%
*
- * Copyright Ericsson AB 1997-2013. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2016. All Rights Reserved.
*
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- *
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*
* %CopyrightEnd%
*/
@@ -25,6 +26,11 @@
#endif
#include "sys.h"
#include "assert.h"
+#include "erl_os_monotonic_time_extender.h"
+#include "erl_time.h"
+
+/* Need to look more closely at qpc before use... */
+#define ERTS_DISABLE_USE_OF_QPC_FOR_MONOTONIC_TIME 1
#define LL_LITERAL(X) ERTS_I64_LITERAL(X)
@@ -61,11 +67,6 @@
(epoch) = ((ull.QuadPart / TICKS_PER_SECOND) - EPOCH_JULIAN_DIFF); \
} while(0)
-static SysHrTime wrap = 0;
-static DWORD last_tick_count = 0;
-static erts_smp_mtx_t wrap_lock;
-static ULONGLONG (WINAPI *pGetTickCount64)(void) = NULL;
-
/* Getting timezone information is a heavy operation, so we want to do this
only once */
@@ -76,27 +77,349 @@ static int days_in_month[2][13] = {
{0,31,28,31,30,31,30,31,31,30,31,30,31},
{0,31,29,31,30,31,30,31,31,30,31,30,31}};
-int
-sys_init_time(void)
+#define ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT 10
+
+/*
+ * erts_os_monotonic_time()
+ */
+
+struct sys_time_internal_state_read_only__ {
+ ULONGLONG (WINAPI *pGetTickCount64)(void);
+ BOOL (WINAPI *pQueryPerformanceCounter)(LARGE_INTEGER *);
+ Sint32 pcf;
+ int using_get_tick_count_time_unit;
+};
+
+struct sys_time_internal_state_read_mostly__ {
+ ErtsOsMonotonicTimeExtendState os_mtime_xtnd;
+};
+
+struct sys_time_internal_state_write_freq__ {
+ erts_smp_mtx_t mtime_mtx;
+ ULONGLONG wrap;
+ ULONGLONG last_tick_count;
+};
+
+__declspec(align(ASSUMED_CACHE_LINE_SIZE)) struct {
+ union {
+ struct sys_time_internal_state_read_only__ o;
+ char align__[(((sizeof(struct sys_time_internal_state_read_only__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } r;
+ union {
+ struct sys_time_internal_state_read_mostly__ m;
+ char align__[(((sizeof(struct sys_time_internal_state_read_mostly__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } wr;
+ union {
+ struct sys_time_internal_state_write_freq__ f;
+ char align__[(((sizeof(struct sys_time_internal_state_write_freq__) - 1)
+ / ASSUMED_CACHE_LINE_SIZE) + 1)
+ * ASSUMED_CACHE_LINE_SIZE];
+ } w;
+} internal_state;
+
+__declspec(align(ASSUMED_CACHE_LINE_SIZE)) ErtsSysTimeData__ erts_sys_time_data__;
+
+
+static ERTS_INLINE ErtsSystemTime
+SystemTime2MilliSec(SYSTEMTIME *stp)
+{
+ ErtsSystemTime stime;
+ FILETIME ft;
+ ULARGE_INTEGER ull;
+
+ SystemTimeToFileTime(stp, &ft);
+ FILETIME_TO_ULI(ull,ft);
+ /* now in 100 ns units */
+ stime = (ErtsSystemTime) ull.QuadPart;
+ stime -= (((ErtsSystemTime) EPOCH_JULIAN_DIFF)
+ * ((ErtsSystemTime) (10*1000*1000)));
+ stime /= (ErtsSystemTime) (10*1000); /* ms */
+ return stime;
+}
+
+static ErtsMonotonicTime
+os_monotonic_time_qpc(void)
{
+ LARGE_INTEGER pc;
+
+ if (!(*internal_state.r.o.pQueryPerformanceCounter)(&pc))
+ erts_exit(ERTS_ABORT_EXIT, "QueryPerformanceCounter() failed\n");
+
+ return (ErtsMonotonicTime) pc.QuadPart;
+}
+
+static void
+os_times_qpc(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ LARGE_INTEGER pc;
+ SYSTEMTIME st;
+ ErtsSystemTime stime;
+ BOOL qpcr;
+
+ qpcr = (*internal_state.r.o.pQueryPerformanceCounter)(&pc);
+ GetSystemTime(&st);
+
+ if (!qpcr)
+ erts_exit(ERTS_ABORT_EXIT, "QueryPerformanceCounter() failed\n");
+
+ *mtimep = (ErtsMonotonicTime) pc.QuadPart;
+
+ stime = SystemTime2MilliSec(&st);
+
+ *stimep = ((ErtsSystemTime)
+ erts_time_unit_conversion((Uint64) stime,
+ (Uint32) 1000,
+ internal_state.r.o.pcf));
+}
+
+static Uint32
+get_tick_count(void)
+{
+ return (Uint32) GetTickCount();
+}
+
+static ErtsMonotonicTime
+os_monotonic_time_gtc32(void)
+{
+ ErtsMonotonicTime mtime;
+ Uint32 ticks = (Uint32) GetTickCount();
+ ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ mtime = ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ mtime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ return mtime;
+}
+
+static void
+os_times_gtc32(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ SYSTEMTIME st;
+ ErtsSystemTime stime, mtime;
+ Uint32 ticks;
+
+ ticks = (Uint32) GetTickCount();
+ GetSystemTime(&st);
+
+ ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ mtime = ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ mtime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ *mtimep = mtime;
+
+ stime = SystemTime2MilliSec(&st);
+ stime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ *stimep = stime;
+
+}
+
+static ErtsMonotonicTime
+os_monotonic_time_gtc64(void)
+{
+ ULONGLONG ticks = (*internal_state.r.o.pGetTickCount64)();
+ ErtsMonotonicTime mtime = (ErtsMonotonicTime) ticks;
+ return mtime << ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+}
+
+static void
+os_times_gtc64(ErtsMonotonicTime *mtimep, ErtsSystemTime *stimep)
+{
+ SYSTEMTIME st;
+ ErtsSystemTime stime, mtime;
+ ULONGLONG ticks;
+
+ ticks = (*internal_state.r.o.pGetTickCount64)();
+ GetSystemTime(&st);
+
+ mtime = (ErtsMonotonicTime) ticks;
+ mtime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ *mtimep = mtime;
+
+ stime = SystemTime2MilliSec(&st);
+ stime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ *stimep = stime;
+}
+
+static ErtsSysHrTime
+sys_hrtime_qpc(void)
+{
+ LARGE_INTEGER pc;
+
+ if (!(*internal_state.r.o.pQueryPerformanceCounter)(&pc))
+ erts_exit(ERTS_ABORT_EXIT, "QueryPerformanceCounter() failed\n");
+
+ ASSERT(pc.QuadPart > 0);
+
+ return (ErtsSysHrTime) erts_time_unit_conversion((Uint64) pc.QuadPart,
+ internal_state.r.o.pcf,
+ (Uint32) 1000*1000*1000);
+}
+
+static ErtsSysHrTime
+sys_hrtime_gtc32(void)
+{
+ ErtsSysHrTime time;
+ Uint32 ticks = (Uint32) GetTickCount();
+ ERTS_CHK_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ tick_count);
+ time = (ErtsSysHrTime) ERTS_EXTEND_OS_MONOTONIC_TIME(&internal_state.wr.m.os_mtime_xtnd,
+ ticks);
+ time *= (ErtsSysHrTime) (1000 * 1000);
+ return time;
+}
+
+static ErtsSysHrTime
+sys_hrtime_gtc64(void)
+{
+ ErtsSysHrTime time = (*internal_state.r.o.pGetTickCount64)();
+ time *= (ErtsSysHrTime) (1000*1000);
+ return time;
+}
+
+/*
+ * Init
+ */
+
+void
+sys_init_time(ErtsSysInitTimeResult *init_resp)
+{
+ ErtsMonotonicTime (*os_mtime_func)(void);
+ void (*os_times_func)(ErtsMonotonicTime *, ErtsSystemTime *);
+ ErtsSysHrTime (*sys_hrtime_func)(void) = NULL;
+ ErtsMonotonicTime time_unit;
char kernel_dll_name[] = "kernel32";
HMODULE module;
+ init_resp->os_monotonic_time_info.clock_id = NULL;
+
module = GetModuleHandle(kernel_dll_name);
- pGetTickCount64 = (module != NULL) ?
- (ULONGLONG (WINAPI *)(void))
- GetProcAddress(module,"GetTickCount64") :
- NULL;
+ if (!module) {
+ get_tick_count:
+ erts_smp_mtx_init(&internal_state.w.f.mtime_mtx,
+ "os_monotonic_time");
+ internal_state.w.f.wrap = 0;
+ internal_state.w.f.last_tick_count = 0;
+
+ init_resp->os_monotonic_time_info.func = "GetTickCount";
+ init_resp->os_monotonic_time_info.locked_use = 0;
+ /* 10-16 ms resolution according to MicroSoft documentation */
+ init_resp->os_monotonic_time_info.resolution = 100; /* 10 ms */
+ time_unit = (ErtsMonotonicTime) 1000;
+ time_unit <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ internal_state.r.o.using_get_tick_count_time_unit = 1;
+ os_mtime_func = os_monotonic_time_gtc32;
+ os_times_func = os_times_gtc32;
+ init_resp->os_monotonic_time_info.extended = 1;
+ erts_init_os_monotonic_time_extender(&internal_state.wr.m.os_mtime_xtnd,
+ get_tick_count,
+ 60*60*24*7); /* Check once a week */
+ if (!sys_hrtime_func)
+ sys_hrtime_func = sys_hrtime_gtc32;
+ }
+ else {
+ int major, minor, build;
+
+ os_version(&major, &minor, &build);
+
+ if (major < 6) {
+
+ get_tick_count64:
+
+ internal_state.r.o.pGetTickCount64
+ = ((ULONGLONG (WINAPI *)(void))
+ GetProcAddress(module, "GetTickCount64"));
+ if (!internal_state.r.o.pGetTickCount64)
+ goto get_tick_count;
+
+ init_resp->os_monotonic_time_info.func = "GetTickCount64";
+ init_resp->os_monotonic_time_info.locked_use = 0;
+ /* 10-16 ms resolution according to MicroSoft documentation */
+ init_resp->os_monotonic_time_info.resolution = 100; /* 10 ms */
+ time_unit = (ErtsMonotonicTime) 1000;
+ time_unit <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ internal_state.r.o.using_get_tick_count_time_unit = 1;
+ os_mtime_func = os_monotonic_time_gtc64;
+ os_times_func = os_times_gtc64;
+ if (!sys_hrtime_func)
+ sys_hrtime_func = sys_hrtime_gtc64;
+ }
+ else { /* Vista or newer... */
+
+ LARGE_INTEGER pf;
+ BOOL (WINAPI *QPF)(LARGE_INTEGER *);
+
+ QPF = ((BOOL (WINAPI *)(LARGE_INTEGER *))
+ GetProcAddress(module, "QueryPerformanceFrequency"));
+ if (!QPF)
+ goto get_tick_count64;
+ if (!(*QPF)(&pf))
+ goto get_tick_count64;
+
+ internal_state.r.o.pQueryPerformanceCounter
+ = ((BOOL (WINAPI *)(LARGE_INTEGER *))
+ GetProcAddress(module, "QueryPerformanceCounter"));
+ if (!internal_state.r.o.pQueryPerformanceCounter)
+ goto get_tick_count64;
+
+ if (pf.QuadPart > (((LONGLONG) 1) << 32))
+ goto get_tick_count64;
+
+ internal_state.r.o.pcf = (Uint32) pf.QuadPart;
+ sys_hrtime_func = sys_hrtime_qpc;
+
+ /*
+ * We only use QueryPerformanceCounter() for
+ * os-monotonic-time if its frequency is equal
+ * to, or larger than GHz in order to ensure
+ * that the user wont be able to observe faulty
+ * order between values retrieved on different threads.
+ */
+ if (pf.QuadPart < (LONGLONG) 1000*1000*1000)
+ goto get_tick_count64;
+
+ if (ERTS_DISABLE_USE_OF_QPC_FOR_MONOTONIC_TIME)
+ goto get_tick_count64;
+
+ init_resp->os_monotonic_time_info.func = "QueryPerformanceCounter";
+ init_resp->os_monotonic_time_info.locked_use = 0;
+ time_unit = (ErtsMonotonicTime) pf.QuadPart;
+ internal_state.r.o.using_get_tick_count_time_unit = 0;
+ init_resp->os_monotonic_time_info.resolution = time_unit;
+ os_mtime_func = os_monotonic_time_qpc;
+ os_times_func = os_times_qpc;
+ }
+ }
+
+ erts_sys_time_data__.r.o.os_monotonic_time = os_mtime_func;
+ erts_sys_time_data__.r.o.os_times = os_times_func;
+ erts_sys_time_data__.r.o.sys_hrtime = sys_hrtime_func;
+ init_resp->os_monotonic_time_unit = time_unit;
+ init_resp->have_os_monotonic_time = 1;
+ init_resp->have_corrected_os_monotonic_time = 0;
+ init_resp->sys_clock_resolution = 1;
+
+ init_resp->os_system_time_info.func = "GetSystemTime";
+ init_resp->os_system_time_info.clock_id = NULL;
+ init_resp->os_system_time_info.resolution = 100;
+ init_resp->os_system_time_info.locked_use = 0;
if(GetTimeZoneInformation(&static_tzi) &&
static_tzi.StandardDate.wMonth != 0 &&
static_tzi.DaylightDate.wMonth != 0) {
have_static_tzi = 1;
}
+}
- erts_smp_mtx_init(&wrap_lock, "sys_gethrtime");
-
- return 1;
+void
+erts_late_sys_init_time(void)
+{
+ if (erts_sys_time_data__.r.o.os_monotonic_time == os_monotonic_time_gtc32)
+ erts_late_init_os_monotonic_time_extender(&internal_state.wr.m.os_mtime_xtnd);
}
/* Returns a switchtimes for DST as UTC filetimes given data from a
@@ -377,41 +700,27 @@ sys_gettimeofday(SysTimeval *tv)
EPOCH_JULIAN_DIFF);
}
-extern int erts_initialized;
-SysHrTime
-sys_gethrtime(void)
+ErtsSystemTime
+erts_os_system_time(void)
{
- if (pGetTickCount64 != NULL) {
- return ((SysHrTime) pGetTickCount64()) * LL_LITERAL(1000000);
- } else {
- DWORD ticks;
- SysHrTime res;
- erts_smp_mtx_lock(&wrap_lock);
- ticks = (SysHrTime) (GetTickCount() & 0x7FFFFFFF);
- if (ticks < (SysHrTime) last_tick_count) {
- /* Detect a race that should no longer be here... */
- if ((((SysHrTime) last_tick_count) - ((SysHrTime) ticks)) > 1000) {
- wrap += LL_LITERAL(1) << 31;
- } else {
- /*
- * XXX Debug: Violates locking order, remove all this,
- * after testing!
- */
- erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
- erts_dsprintf(dsbufp, "Did not wrap when last_tick %d "
- "and tick %d",
- last_tick_count, ticks);
- erts_send_error_to_logger_nogl(dsbufp);
- ticks = last_tick_count;
- }
- }
- last_tick_count = ticks;
- res = ((((LONGLONG) ticks) + wrap) * LL_LITERAL(1000000));
- erts_smp_mtx_unlock(&wrap_lock);
- return res;
+ SYSTEMTIME st;
+ ErtsSystemTime stime;
+
+ GetSystemTime(&st);
+ stime = SystemTime2MilliSec(&st);
+
+ if (internal_state.r.o.using_get_tick_count_time_unit) {
+ stime <<= ERTS_GET_TICK_COUNT_TIME_UNIT_SHIFT;
+ return stime;
}
+
+ return ((ErtsSystemTime)
+ erts_time_unit_conversion((Uint64) stime,
+ (Uint32) 1000,
+ internal_state.r.o.pcf));
}
+
clock_t
sys_times(SysTimes *buffer) {
clock_t kernel_ticks = (GetTickCount() /