aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/beam/beam_ranges.c2
-rw-r--r--erts/emulator/beam/bif.c8
-rw-r--r--erts/emulator/beam/big.c7
-rw-r--r--erts/emulator/beam/erl_alloc.types4
-rw-r--r--erts/emulator/beam/erl_alloc_util.c323
-rw-r--r--erts/emulator/beam/erl_alloc_util.h13
-rw-r--r--erts/emulator/beam/erl_bif_info.c46
-rw-r--r--erts/emulator/beam/erl_bif_port.c15
-rw-r--r--erts/emulator/beam/erl_init.c18
-rw-r--r--erts/emulator/beam/erl_nif.c1
-rw-r--r--erts/emulator/beam/erl_nif.h23
-rw-r--r--erts/emulator/beam/erl_port_task.c24
-rw-r--r--erts/emulator/beam/erl_port_task.h2
-rw-r--r--erts/emulator/beam/erl_process.c68
-rw-r--r--erts/emulator/beam/erl_process.h1
-rw-r--r--erts/emulator/beam/sys.h14
-rw-r--r--erts/emulator/beam/utils.c1
-rw-r--r--erts/emulator/drivers/common/inet_drv.c12
-rw-r--r--erts/emulator/drivers/win32/win_efile.c4
-rw-r--r--erts/emulator/hipe/hipe_amd64_bifs.m45
-rw-r--r--erts/emulator/hipe/hipe_bif0.c2
-rw-r--r--erts/emulator/hipe/hipe_mode_switch.c20
-rw-r--r--erts/emulator/hipe/hipe_risc_glue.h8
-rw-r--r--erts/emulator/hipe/hipe_x86_glue.h8
-rw-r--r--erts/emulator/internal_doc/CarrierMigration.md104
-rw-r--r--erts/emulator/internal_doc/SuperCarrier.md191
-rw-r--r--erts/emulator/sys/common/erl_check_io.c694
-rw-r--r--erts/emulator/sys/common/erl_check_io.h45
-rw-r--r--erts/emulator/sys/common/erl_sys_common_misc.c8
-rw-r--r--erts/emulator/sys/unix/erl_unix_sys.h27
-rw-r--r--erts/emulator/sys/unix/sys.c6
-rw-r--r--erts/emulator/sys/win32/erl_poll.c2
-rw-r--r--erts/emulator/sys/win32/erl_win_sys.h4
-rw-r--r--erts/emulator/test/a_SUITE.erl14
-rw-r--r--erts/emulator/test/driver_SUITE.erl66
-rw-r--r--erts/emulator/test/float_SUITE_data/fp_drv.c17
-rw-r--r--erts/emulator/test/nif_SUITE.erl3
-rw-r--r--erts/emulator/test/nif_SUITE_data/nif_SUITE.c13
-rw-r--r--erts/emulator/test/port_SUITE.erl51
-rw-r--r--erts/emulator/test/z_SUITE.erl28
40 files changed, 1611 insertions, 291 deletions
diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c
index 0f2d5d0c2a..cb6470638f 100644
--- a/erts/emulator/beam/beam_ranges.c
+++ b/erts/emulator/beam/beam_ranges.c
@@ -282,7 +282,7 @@ find_range(BeamInstr* pc)
while (low < high) {
if (pc < mid->start) {
high = mid;
- } else if (pc > RANGE_END(mid)) {
+ } else if (pc >= RANGE_END(mid)) {
low = mid + 1;
} else {
erts_smp_atomic_set_nob(&r[active].mid, (erts_aint_t) mid);
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index a5be8e1529..42dd160e38 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -2772,6 +2772,7 @@ static int do_list_to_integer(Process *p, Eterm orig_list,
Eterm *integer, Eterm *rest)
{
Sint i = 0;
+ Uint ui = 0;
int skip = 0;
int neg = 0;
int n = 0;
@@ -2825,8 +2826,8 @@ static int do_list_to_integer(Process *p, Eterm orig_list,
unsigned_val(CAR(list_val(lst))) > '9') {
break;
}
- i = i * 10;
- i = i + unsigned_val(CAR(list_val(lst))) - '0';
+ ui = ui * 10;
+ ui = ui + unsigned_val(CAR(list_val(lst))) - '0';
n++;
lst = CDR(list_val(lst));
if (is_nil(lst)) {
@@ -2850,7 +2851,8 @@ static int do_list_to_integer(Process *p, Eterm orig_list,
*/
if (n <= SMALL_DIGITS) { /* It must be small */
- if (neg) i = -i;
+ if (neg) i = -(Sint)ui;
+ else i = (Sint)ui;
res = make_small(i);
} else {
lg2 = (n+1)*230/69+1;
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index a8710dd910..de7d370938 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -274,10 +274,9 @@
_b = _b << _s; \
_vn1 = _b >> H_EXP; \
_vn0 = _b & LO_MASK; \
- /* Sometimes _s is 0 which triggers undefined behaviour for the \
- (_a0>>(D_EXP-_s)) shift, but this is ok because the \
- & -s will make it all to 0 later anyways. */ \
- _un32 = (_a1 << _s) | ((_a0>>(D_EXP-_s)) & (-_s >> (D_EXP-1))); \
+ /* If needed to avoid undefined behaviour */ \
+ if (_s) _un32 = (_a1 << _s) | ((_a0>>(D_EXP-_s)) & (-_s >> (D_EXP-1))); \
+ else _un32 = _a1; \
_un10 = _a0 << _s; \
_un1 = _un10 >> H_EXP; \
_un0 = _un10 & LO_MASK; \
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 37354b7f8d..21434eb117 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -267,7 +267,6 @@ type CODE_IX_LOCK_Q SHORT_LIVED SYSTEM code_ix_lock_q
type PROC_INTERVAL LONG_LIVED SYSTEM process_interval
type BUSY_CALLER_TAB SHORT_LIVED SYSTEM busy_caller_table
type BUSY_CALLER SHORT_LIVED SYSTEM busy_caller
-type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap
type PROC_SYS_TSK SHORT_LIVED PROCESSES proc_sys_task
type PROC_SYS_TSK_QS SHORT_LIVED PROCESSES proc_sys_task_queues
@@ -364,6 +363,7 @@ type NLINK_SH STANDARD_LOW PROCESSES nlink_sh
type AINFO_REQ STANDARD_LOW SYSTEM alloc_info_request
type SCHED_WTIME_REQ STANDARD_LOW SYSTEM sched_wall_time_request
type GC_INFO_REQ STANDARD_LOW SYSTEM gc_info_request
+type PORT_DATA_HEAP STANDARD_LOW SYSTEM port_data_heap
+else # "fullword"
@@ -383,6 +383,7 @@ type NLINK_SH FIXED_SIZE PROCESSES nlink_sh
type AINFO_REQ SHORT_LIVED SYSTEM alloc_info_request
type SCHED_WTIME_REQ SHORT_LIVED SYSTEM sched_wall_time_request
type GC_INFO_REQ SHORT_LIVED SYSTEM gc_info_request
+type PORT_DATA_HEAP STANDARD SYSTEM port_data_heap
+endif
@@ -397,6 +398,7 @@ type DRV_EV_STATE LONG_LIVED SYSTEM driver_event_state
type DRV_EV_D_STATE FIXED_SIZE SYSTEM driver_event_data_state
type DRV_SEL_D_STATE FIXED_SIZE SYSTEM driver_select_data_state
type FD_LIST SHORT_LIVED SYSTEM fd_list
+type ACTIVE_FD_ARR SHORT_LIVED SYSTEM active_fd_array
type POLLSET LONG_LIVED SYSTEM pollset
type POLLSET_UPDREQ SHORT_LIVED SYSTEM pollset_update_req
type POLL_FDS LONG_LIVED SYSTEM poll_fds
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index a4e164bf51..e3172dc4fb 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -205,7 +205,7 @@ MBC after deallocating first block:
ASSERT(((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
(B)->bhdr = ((Sz) | (F)), \
(B)->u.carrier = (C))
-
+
# define IS_MBC_FIRST_ABLK(AP,B) \
((((UWord)(B) & ~ERTS_SACRR_UNIT_MASK) == MBC_HEADER_SIZE(AP)) \
&& ((B)->bhdr & MBC_ABLK_OFFSET_MASK) == 0)
@@ -378,9 +378,8 @@ do { \
#ifdef ERTS_SMP
#define SBC_HEADER_SIZE \
- (UNIT_CEILING(sizeof(Carrier_t) \
- - sizeof(ErtsAlcCPoolData_t) \
- + ABLK_HDR_SZ) \
+ (UNIT_CEILING(offsetof(Carrier_t, cpool) \
+ + ABLK_HDR_SZ) \
- ABLK_HDR_SZ)
#else
#define SBC_HEADER_SIZE \
@@ -929,6 +928,88 @@ unlink_carrier(CarrierList_t *cl, Carrier_t *crr)
#ifdef ERTS_SMP
+#ifdef DEBUG
+static int is_in_list(ErtsDoubleLink_t* sentinel, ErtsDoubleLink_t* node)
+{
+ ErtsDoubleLink_t* p;
+
+ ASSERT(node != sentinel);
+ for (p = sentinel->next; p != sentinel; p = p->next) {
+ if (p == node)
+ return 1;
+ }
+ return 0;
+}
+#endif /* DEBUG */
+
+static ERTS_INLINE void
+link_edl_after(ErtsDoubleLink_t* after_me, ErtsDoubleLink_t* node)
+{
+ ErtsDoubleLink_t* before_me = after_me->next;
+ ASSERT(node != after_me && node != before_me);
+ node->next = before_me;
+ node->prev = after_me;
+ before_me->prev = node;
+ after_me->next = node;
+}
+
+static ERTS_INLINE void
+link_edl_before(ErtsDoubleLink_t* before_me, ErtsDoubleLink_t* node)
+{
+ ErtsDoubleLink_t* after_me = before_me->prev;
+ ASSERT(node != before_me && node != after_me);
+ node->next = before_me;
+ node->prev = after_me;
+ before_me->prev = node;
+ after_me->next = node;
+}
+
+static ERTS_INLINE void
+unlink_edl(ErtsDoubleLink_t* node)
+{
+ node->next->prev = node->prev;
+ node->prev->next = node->next;
+}
+
+static ERTS_INLINE void
+relink_edl_before(ErtsDoubleLink_t* before_me, ErtsDoubleLink_t* node)
+{
+ if (node != before_me && node != before_me->prev) {
+ unlink_edl(node);
+ link_edl_before(before_me, node);
+ }
+}
+
+static ERTS_INLINE int is_abandoned(Carrier_t *crr)
+{
+ return crr->cpool.abandoned.next != NULL;
+}
+
+static ERTS_INLINE void
+link_abandoned_carrier(ErtsDoubleLink_t* list, Carrier_t *crr)
+{
+ ASSERT(!is_abandoned(crr));
+
+ link_edl_after(list, &crr->cpool.abandoned);
+
+ ASSERT(crr->cpool.abandoned.next != &crr->cpool.abandoned);
+ ASSERT(crr->cpool.abandoned.prev != &crr->cpool.abandoned);
+}
+
+static ERTS_INLINE void
+unlink_abandoned_carrier(Carrier_t *crr)
+{
+ ASSERT(is_in_list(&crr->cpool.orig_allctr->cpool.pooled_list,
+ &crr->cpool.abandoned) ||
+ is_in_list(&crr->cpool.orig_allctr->cpool.traitor_list,
+ &crr->cpool.abandoned));
+
+ unlink_edl(&crr->cpool.abandoned);
+
+ crr->cpool.abandoned.next = NULL;
+ crr->cpool.abandoned.prev = NULL;
+}
+
static ERTS_INLINE void
clear_busy_pool_carrier(Allctr_t *allctr, Carrier_t *crr)
{
@@ -955,7 +1036,7 @@ clear_busy_pool_carrier(Allctr_t *allctr, Carrier_t *crr)
}
}
-#endif
+#endif /* ERTS_SMP */
#if 0
#define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B) \
@@ -1775,6 +1856,18 @@ handle_delayed_dealloc(Allctr_t *allctr,
* data has been overwritten by the queue.
*/
Carrier_t *crr = FIRST_BLK_TO_MBC(allctr, blk);
+
+ /* Restore word overwritten by the dd-queue as it will be read
+ * if this carrier is pulled from dc_list by cpool_fetch()
+ */
+ ERTS_ALC_CPOOL_ASSERT(FBLK_TO_MBC(blk) != crr);
+ ERTS_ALC_CPOOL_ASSERT(sizeof(ErtsAllctrDDBlock_t) == sizeof(void*));
+#ifdef MBC_ABLK_OFFSET_BITS
+ blk->u.carrier = crr;
+#else
+ blk->carrier = crr;
+#endif
+
ERTS_ALC_CPOOL_ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(allctr));
ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr);
ERTS_ALC_CPOOL_ASSERT(((erts_aint_t) allctr)
@@ -2563,10 +2656,9 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
#ifdef ERTS_SMP
#define ERTS_ALC_MAX_DEALLOC_CARRIER 10
-#define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT 10
+#define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT 20
+#define ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT 10
#define ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT 100
-#define ERTS_ALC_CPOOL_MAX_NO_CARRIERS 5
-#define ERTS_ALC_CPOOL_INSERT_ALLOWED_OFFSET 100
#define ERTS_ALC_CPOOL_MAX_FAILED_STAT_READS 3
#define ERTS_ALC_CPOOL_PTR_MOD_MRK (((erts_aint_t) 1) << 0)
@@ -2743,9 +2835,6 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr)
(erts_aint_t) CARRIER_SZ(crr));
erts_atomic_inc_nob(&allctr->cpool.stat.no_carriers);
- erts_smp_atomic_set_nob(&crr->allctr,
- ((erts_aint_t) allctr)|ERTS_CRR_ALCTR_FLG_IN_POOL);
-
/*
* We search in 'next' direction and begin by passing
* one element before trying to insert. This in order to
@@ -2804,6 +2893,9 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr)
cpool_set_mod_marked(&cpd2p->prev,
(erts_aint_t) &crr->cpool,
(erts_aint_t) cpd1p);
+
+ erts_smp_atomic_set_wb(&crr->allctr,
+ ((erts_aint_t) allctr)|ERTS_CRR_ALCTR_FLG_IN_POOL);
}
static void
@@ -2904,59 +2996,163 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr)
static Carrier_t *
cpool_fetch(Allctr_t *allctr, UWord size)
{
- int i;
+ int i, i_stop, has_passed_sentinel;
Carrier_t *crr;
ErtsAlcCPoolData_t *cpdp;
- ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+ ErtsAlcCPoolData_t *cpool_entrance;
+ ErtsAlcCPoolData_t *sentinel;
+ ErtsDoubleLink_t* dl;
+ ErtsDoubleLink_t* first_old_traitor;
ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */
|| erts_thr_progress_is_managed_thread());
- i = 0;
+ i = ERTS_ALC_CPOOL_MAX_FETCH_INSPECT;
+ first_old_traitor = allctr->cpool.traitor_list.next;
+ cpool_entrance = NULL;
- /* First; check our own pending dealloc carrier list... */
- crr = allctr->cpool.dc_list.last;
- while (crr && i < ERTS_ALC_CPOOL_MAX_FETCH_INSPECT) {
- if (erts_atomic_read_nob(&crr->cpool.max_size) >= size) {
- unlink_carrier(&allctr->cpool.dc_list, crr);
-#ifdef ERTS_ALC_CPOOL_DEBUG
- ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_xchg_nob(&crr->allctr,
- ((erts_aint_t) allctr))
- == (((erts_aint_t) allctr) & ~ERTS_CRR_ALCTR_FLG_MASK));
-#else
- erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr));
-#endif
- return crr;
+ /*
+ * Search my own pooled_list,
+ * i.e my abandoned carriers that were in the pool last time I checked.
+ */
+
+ dl = allctr->cpool.pooled_list.next;
+ while(dl != &allctr->cpool.pooled_list) {
+ erts_aint_t exp, act;
+ crr = (Carrier_t *) (((char *) dl) - offsetof(Carrier_t, cpool.abandoned));
+
+ ASSERT(!is_in_list(&allctr->cpool.traitor_list, dl));
+ ASSERT(crr->cpool.orig_allctr == allctr);
+ dl = dl->next;
+ exp = erts_smp_atomic_read_rb(&crr->allctr);
+ if ((exp & ERTS_CRR_ALCTR_FLG_MASK) == ERTS_CRR_ALCTR_FLG_IN_POOL
+ && erts_atomic_read_nob(&crr->cpool.max_size) >= size) {
+ /* Try to fetch it... */
+ act = erts_smp_atomic_cmpxchg_mb(&crr->allctr,
+ (erts_aint_t) allctr,
+ exp);
+ if (act == exp) {
+ cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr);
+ unlink_abandoned_carrier(crr);
+
+ /* Move sentinel to continue next search from here */
+ relink_edl_before(dl, &allctr->cpool.pooled_list);
+ return crr;
+ }
+ exp = act;
+ }
+ if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) {
+ if (!cpool_entrance)
+ cpool_entrance = &crr->cpool;
+ }
+ else { /* Not in pool, move to traitor_list */
+ unlink_abandoned_carrier(crr);
+ link_abandoned_carrier(&allctr->cpool.traitor_list, crr);
+ }
+ if (--i <= 0) {
+ /* Move sentinel to continue next search from here */
+ relink_edl_before(dl, &allctr->cpool.pooled_list);
+ return NULL;
}
- crr = crr->prev;
- i++;
}
- /* ... then the pool ... */
+ /* Now search traitor_list.
+ * i.e carriers employed by other allocators last time I checked.
+ * They might have been abandoned since then.
+ */
+
+ i_stop = (i < ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT ?
+ 0 : i - ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT);
+ dl = first_old_traitor;
+ while(dl != &allctr->cpool.traitor_list) {
+ erts_aint_t exp, act;
+ crr = (Carrier_t *) (((char *) dl) - offsetof(Carrier_t, cpool.abandoned));
+ ASSERT(dl != &allctr->cpool.pooled_list);
+ ASSERT(crr->cpool.orig_allctr == allctr);
+ dl = dl->next;
+ exp = erts_smp_atomic_read_rb(&crr->allctr);
+ if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) {
+ if (!(exp & ERTS_CRR_ALCTR_FLG_BUSY)
+ && erts_atomic_read_nob(&crr->cpool.max_size) >= size) {
+ /* Try to fetch it... */
+ act = erts_smp_atomic_cmpxchg_mb(&crr->allctr,
+ (erts_aint_t) allctr,
+ exp);
+ if (act == exp) {
+ cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr);
+ unlink_abandoned_carrier(crr);
+
+ /* Move sentinel to continue next search from here */
+ relink_edl_before(dl, &allctr->cpool.traitor_list);
+ return crr;
+ }
+ exp = act;
+ }
+ if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) {
+ if (!cpool_entrance)
+ cpool_entrance = &crr->cpool;
+
+ /* Move to pooled_list */
+ unlink_abandoned_carrier(crr);
+ link_abandoned_carrier(&allctr->cpool.pooled_list, crr);
+ }
+ }
+ if (--i <= i_stop) {
+ /* Move sentinel to continue next search from here */
+ relink_edl_before(dl, &allctr->cpool.traitor_list);
+ if (i > 0)
+ break;
+ else
+ return NULL;
+ }
+ }
/*
- * We search in 'prev' direction and begin by passing
- * one element before trying to fetch. This in order to
- * avoid contention with threads inserting elements.
+ * Finally search the shared pool and try employ foreign carriers
*/
- cpdp = cpool_aint2cpd(cpool_read(&sentinel->prev));
- if (cpdp == sentinel)
- return NULL;
+ sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+ if (cpool_entrance) {
+ /* We saw a pooled carried above, use it as entrance into the pool
+ */
+ cpdp = cpool_entrance;
+ }
+ else {
+ /* No pooled carried seen above. Start search at cpool sentinel,
+ * but begin by passing one element before trying to fetch.
+ * This in order to avoid contention with threads inserting elements.
+ */
+ cpool_entrance = sentinel;
+ cpdp = cpool_aint2cpd(cpool_read(&cpool_entrance->prev));
+ if (cpdp == sentinel)
+ return NULL;
+ }
- while (i < ERTS_ALC_CPOOL_MAX_FETCH_INSPECT) {
+ has_passed_sentinel = 0;
+ while (1) {
erts_aint_t exp;
cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
- if (cpdp == sentinel) {
+ if (cpdp == cpool_entrance) {
+ if (cpool_entrance == sentinel) {
+ cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
+ if (cpdp == sentinel)
+ return NULL;
+ }
+ i = 0; /* Last one to inspect */
+ }
+ else if (cpdp == sentinel) {
+ if (has_passed_sentinel) {
+ /* We been here before. cpool_entrance must have been removed */
+ return NULL;
+ }
cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
if (cpdp == sentinel)
return NULL;
- i = ERTS_ALC_CPOOL_MAX_FETCH_INSPECT; /* Last one to inspect */
+ has_passed_sentinel = 1;
}
- crr = (Carrier_t *) (((char *) cpdp) - offsetof(Carrier_t, cpool));
+ crr = (Carrier_t *)(((char *)cpdp) - offsetof(Carrier_t, cpool));
exp = erts_smp_atomic_read_rb(&crr->allctr);
- if (((exp & (ERTS_CRR_ALCTR_FLG_IN_POOL|ERTS_CRR_ALCTR_FLG_BUSY))
- == ERTS_CRR_ALCTR_FLG_IN_POOL)
+ if (((exp & (ERTS_CRR_ALCTR_FLG_MASK)) == ERTS_CRR_ALCTR_FLG_IN_POOL)
&& (erts_atomic_read_nob(&cpdp->max_size) >= size)) {
erts_aint_t act;
/* Try to fetch it... */
@@ -2965,11 +3161,35 @@ cpool_fetch(Allctr_t *allctr, UWord size)
exp);
if (act == exp) {
cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr);
+ if (crr->cpool.orig_allctr == allctr) {
+ unlink_abandoned_carrier(crr);
+ }
return crr;
}
}
- i++;
+ if (--i <= 0)
+ return NULL;
+ }
+
+ /* Last; check our own pending dealloc carrier list... */
+ crr = allctr->cpool.dc_list.last;
+ while (crr) {
+ if (erts_atomic_read_nob(&crr->cpool.max_size) >= size) {
+ unlink_carrier(&allctr->cpool.dc_list, crr);
+#ifdef ERTS_ALC_CPOOL_DEBUG
+ ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_xchg_nob(&crr->allctr,
+ ((erts_aint_t) allctr))
+ == (((erts_aint_t) allctr) & ~ERTS_CRR_ALCTR_FLG_MASK));
+#else
+ erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr));
+#endif
+ return crr;
+ }
+ crr = crr->prev;
+ if (--i <= 0)
+ return NULL;
}
+
return NULL;
}
@@ -3066,6 +3286,9 @@ schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr)
return;
}
+ if (is_abandoned(crr))
+ unlink_abandoned_carrier(crr);
+
if (crr->cpool.thr_prgr == ERTS_THR_PRGR_INVALID
|| erts_thr_progress_has_reached(crr->cpool.thr_prgr)) {
dealloc_carrier(allctr, crr, 1);
@@ -3112,6 +3335,8 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr)
limit = (csz/100)*allctr->cpool.util_limit;
crr->cpool.abandon_limit = limit;
}
+ crr->cpool.abandoned.next = NULL;
+ crr->cpool.abandoned.prev = NULL;
}
static void
@@ -3142,6 +3367,9 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr)
STAT_MBC_CPOOL_INSERT(allctr, crr);
unlink_carrier(&allctr->mbc_list, crr);
+ if (crr->cpool.orig_allctr == allctr) {
+ link_abandoned_carrier(&allctr->cpool.pooled_list, crr);
+ }
allctr->remove_mbc(allctr, crr);
@@ -3649,6 +3877,11 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp)
if (busy_pcrr_pp && *busy_pcrr_pp) {
ERTS_ALC_CPOOL_ASSERT(*busy_pcrr_pp == crr);
*busy_pcrr_pp = NULL;
+ ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_read_nob(&crr->allctr)
+ == (((erts_aint_t) allctr)
+ | ERTS_CRR_ALCTR_FLG_IN_POOL
+ | ERTS_CRR_ALCTR_FLG_BUSY));
+ erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr));
cpool_delete(allctr, allctr, crr);
}
else
@@ -5528,6 +5761,10 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
allctr->min_block_size = sz;
}
+ allctr->cpool.pooled_list.next = &allctr->cpool.pooled_list;
+ allctr->cpool.pooled_list.prev = &allctr->cpool.pooled_list;
+ allctr->cpool.traitor_list.next = &allctr->cpool.traitor_list;
+ allctr->cpool.traitor_list.prev = &allctr->cpool.traitor_list;
allctr->cpool.dc_list.first = NULL;
allctr->cpool.dc_list.last = NULL;
allctr->cpool.abandon_limit = 0;
diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h
index 7be6b1ed9d..eee920e66c 100644
--- a/erts/emulator/beam/erl_alloc_util.h
+++ b/erts/emulator/beam/erl_alloc_util.h
@@ -268,6 +268,11 @@ typedef union {char c[ERTS_ALLOC_ALIGN_BYTES]; long l; double d;} Unit_t;
#ifdef ERTS_SMP
+typedef struct ErtsDoubleLink_t_ {
+ struct ErtsDoubleLink_t_ *next;
+ struct ErtsDoubleLink_t_ *prev;
+}ErtsDoubleLink_t;
+
typedef struct {
erts_atomic_t next;
erts_atomic_t prev;
@@ -277,6 +282,7 @@ typedef struct {
UWord abandon_limit;
UWord blocks;
UWord blocks_size;
+ ErtsDoubleLink_t abandoned; /* node in pooled_list or traitor_list */
} ErtsAlcCPoolData_t;
#endif
@@ -500,7 +506,12 @@ struct Allctr_t_ {
CarrierList_t sbc_list;
#ifdef ERTS_SMP
struct {
- CarrierList_t dc_list;
+ /* pooled_list, traitor list and dc_list contain only
+ carriers _created_ by this allocator */
+ ErtsDoubleLink_t pooled_list;
+ ErtsDoubleLink_t traitor_list;
+ CarrierList_t dc_list;
+
UWord abandon_limit;
int disable_abandon;
int check_limit_count;
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 6efe9d9550..b90362d82c 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -27,6 +27,7 @@
#include "erl_process.h"
#include "error.h"
#include "erl_driver.h"
+#include "erl_nif.h"
#include "bif.h"
#include "big.h"
#include "erl_version.h"
@@ -90,7 +91,7 @@ static char erts_system_version[] = ("Erlang/OTP " ERLANG_OTP_RELEASE
" [smp:%beu:%beu]"
#endif
#ifdef USE_THREADS
-#ifdef ERTS_DIRTY_SCHEDULERS
+#if defined(ERTS_DIRTY_SCHEDULERS) && defined(ERTS_SMP)
" [ds:%beu:%beu:%beu]"
#endif
" [async-threads:%d]"
@@ -2459,6 +2460,13 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
ERL_DRV_EXTENDED_MINOR_VERSION);
hp = HAlloc(BIF_P, 2*n);
BIF_RET(buf_to_intlist(&hp, buf, n, NIL));
+ } else if (ERTS_IS_ATOM_STR("nif_version", BIF_ARG_1)) {
+ char buf[42];
+ int n = erts_snprintf(buf, 42, "%d.%d",
+ ERL_NIF_MAJOR_VERSION,
+ ERL_NIF_MINOR_VERSION);
+ hp = HAlloc(BIF_P, 2*n);
+ BIF_RET(buf_to_intlist(&hp, buf, n, NIL));
} else if (ERTS_IS_ATOM_STR("smp_support", BIF_ARG_1)) {
#ifdef ERTS_SMP
BIF_RET(am_true);
@@ -2696,6 +2704,9 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
? am_disabled
: am_enabled);
}
+ else if (ERTS_IS_ATOM_STR("eager_check_io",BIF_ARG_1)) {
+ BIF_RET(erts_eager_check_io ? am_true : am_false);
+ }
BIF_ERROR(BIF_P, BADARG);
}
@@ -3304,17 +3315,38 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
BIF_RET(make_small((Uint) words));
}
else if (ERTS_IS_ATOM_STR("check_io_debug", BIF_ARG_1)) {
- /* Used by (emulator) */
- int res;
+ /* Used by driver_SUITE (emulator) */
+ Uint sz, *szp;
+ Eterm res, *hp, **hpp;
+ int no_errors;
+ ErtsCheckIoDebugInfo ciodi = {0};
#ifdef HAVE_ERTS_CHECK_IO_DEBUG
erts_smp_proc_unlock(BIF_P,ERTS_PROC_LOCK_MAIN);
- res = erts_check_io_debug();
+ no_errors = erts_check_io_debug(&ciodi);
erts_smp_proc_lock(BIF_P,ERTS_PROC_LOCK_MAIN);
#else
- res = 0;
+ no_errors = 0;
#endif
- ASSERT(res >= 0);
- BIF_RET(erts_make_integer((Uint) res, BIF_P));
+ sz = 0;
+ szp = &sz;
+ hpp = NULL;
+ while (1) {
+ res = erts_bld_tuple(hpp, szp, 4,
+ erts_bld_uint(hpp, szp,
+ (Uint) no_errors),
+ erts_bld_uint(hpp, szp,
+ (Uint) ciodi.no_used_fds),
+ erts_bld_uint(hpp, szp,
+ (Uint) ciodi.no_driver_select_structs),
+ erts_bld_uint(hpp, szp,
+ (Uint) ciodi.no_driver_event_structs));
+ if (hpp)
+ break;
+ hp = HAlloc(BIF_P, sz);
+ szp = NULL;
+ hpp = &hp;
+ }
+ BIF_RET(res);
}
else if (ERTS_IS_ATOM_STR("process_info_args", BIF_ARG_1)) {
/* Used by process_SUITE (emulator) */
diff --git a/erts/emulator/beam/erl_bif_port.c b/erts/emulator/beam/erl_bif_port.c
index afb33c1cdb..64bd598ba6 100644
--- a/erts/emulator/beam/erl_bif_port.c
+++ b/erts/emulator/beam/erl_bif_port.c
@@ -493,8 +493,8 @@ void
erts_cleanup_port_data(Port *prt)
{
ASSERT(erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLGS_INVALID_LOOKUP);
- cleanup_old_port_data(erts_smp_atomic_read_nob(&prt->data));
- erts_smp_atomic_set_nob(&prt->data, (erts_aint_t) THE_NON_VALUE);
+ cleanup_old_port_data(erts_smp_atomic_xchg_nob(&prt->data,
+ (erts_aint_t) NULL));
}
Uint
@@ -554,6 +554,7 @@ BIF_RETTYPE port_set_data_2(BIF_ALIST_2)
hp = &pdhp->heap[0];
pdhp->off_heap.first = NULL;
pdhp->off_heap.overhead = 0;
+ pdhp->hsize = hsize;
pdhp->data = copy_struct(BIF_ARG_2, hsize, &hp, &pdhp->off_heap);
data = (erts_aint_t) pdhp;
ASSERT((data & 0x3) == 0);
@@ -561,8 +562,14 @@ BIF_RETTYPE port_set_data_2(BIF_ALIST_2)
data = erts_smp_atomic_xchg_wb(&prt->data, data);
+ if (data == (erts_aint_t)NULL) {
+ /* Port terminated by racing thread */
+ data = erts_smp_atomic_xchg_wb(&prt->data, data);
+ ASSERT(data != (erts_aint_t)NULL);
+ cleanup_old_port_data(data);
+ BIF_ERROR(BIF_P, BADARG);
+ }
cleanup_old_port_data(data);
-
BIF_RET(am_true);
}
@@ -581,6 +588,8 @@ BIF_RETTYPE port_get_data_1(BIF_ALIST_1)
BIF_ERROR(BIF_P, BADARG);
data = erts_smp_atomic_read_ddrb(&prt->data);
+ if (data == (erts_aint_t)NULL)
+ BIF_ERROR(BIF_P, BADARG); /* Port terminated by racing thread */
if ((data & 0x3) != 0) {
res = (Eterm) (UWord) data;
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 88c4006934..61f8385efc 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -548,6 +548,8 @@ void erts_usage(void)
erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
erts_fprintf(stderr, "-sct cput set cpu topology,\n");
erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
+ erts_fprintf(stderr, "-secio bool enable/disable eager check I/O scheduling,\n");
+ erts_fprintf(stderr, " see the erl(1) documentation for more info.\n");
#if ERTS_HAVE_SCHED_UTIL_BALANCING_SUPPORT_OPT
erts_fprintf(stderr, "-sub bool enable/disable scheduler utilization balancing,\n");
#else
@@ -1674,6 +1676,22 @@ erl_start(int argc, char **argv)
erts_usage();
}
}
+ else if (has_prefix("ecio", sub_param)) {
+ arg = get_arg(sub_param+4, argv[i+1], &i);
+#ifndef __OSE__
+ if (sys_strcmp("true", arg) == 0)
+ erts_eager_check_io = 1;
+ else
+#endif
+ if (sys_strcmp("false", arg) == 0)
+ erts_eager_check_io = 0;
+ else {
+ erts_fprintf(stderr,
+ "bad schedule eager check I/O value '%s'\n",
+ arg);
+ erts_usage();
+ }
+ }
else if (has_prefix("pp", sub_param)) {
arg = get_arg(sub_param+2, argv[i+1], &i);
if (sys_strcmp(arg, "true") == 0)
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index ede5f335dc..adc3520ebb 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -1646,6 +1646,7 @@ init_nif_sched_data(ErlNifEnv* env, NativeFunPtr direct_fp, NativeFunPtr indirec
ep->m = env->mod_nif;
ep->fp = indirect_fp;
proc->freason = TRAP;
+ proc->arity = argc;
return THE_NON_VALUE;
}
diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h
index 226fc199a1..849024453c 100644
--- a/erts/emulator/beam/erl_nif.h
+++ b/erts/emulator/beam/erl_nif.h
@@ -241,21 +241,10 @@ extern TWinDynNifCallbacks WinDynNifCallbacks;
# else
# define ERL_NIF_INIT_DECL(MODNAME) __declspec(dllexport) ErlNifEntry* nif_init(TWinDynNifCallbacks* callbacks)
# endif
-# ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-# define ERL_NIF_INIT_BODY do { \
- memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks)); \
- entry.options = ERL_NIF_DIRTY_NIF_OPTION; \
- } while(0)
-# else
-# define ERL_NIF_INIT_BODY memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks))
-# endif
+# define ERL_NIF_INIT_BODY memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks))
#else
# define ERL_NIF_INIT_GLOB
-# ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-# define ERL_NIF_INIT_BODY entry.options = ERL_NIF_DIRTY_NIF_OPTION
-# else
-# define ERL_NIF_INIT_BODY
-# endif
+# define ERL_NIF_INIT_BODY
# ifdef STATIC_ERLANG_NIF
# define ERL_NIF_INIT_DECL(MODNAME) ErlNifEntry* MODNAME ## _nif_init(void)
# else
@@ -263,6 +252,11 @@ extern TWinDynNifCallbacks WinDynNifCallbacks;
# endif
#endif
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+# define ERL_NIF_ENTRY_OPTIONS ERL_NIF_DIRTY_NIF_OPTION
+#else
+# define ERL_NIF_ENTRY_OPTIONS 0
+#endif
#ifdef __cplusplus
}
@@ -288,7 +282,8 @@ ERL_NIF_INIT_DECL(NAME) \
sizeof(FUNCS) / sizeof(*FUNCS), \
FUNCS, \
LOAD, RELOAD, UPGRADE, UNLOAD, \
- ERL_NIF_VM_VARIANT \
+ ERL_NIF_VM_VARIANT, \
+ ERL_NIF_ENTRY_OPTIONS \
}; \
ERL_NIF_INIT_BODY; \
return &entry; \
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 682f6f8f4b..2aa0a27197 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -32,6 +32,7 @@
#include "global.h"
#include "erl_port_task.h"
#include "dist.h"
+#include "erl_check_io.h"
#include "dtrace-wrapper.h"
#include <stdarg.h>
@@ -550,6 +551,16 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
+reset_executed_io_task_handle(ErtsPortTask *ptp)
+{
+ if (ptp->u.alive.handle) {
+ ASSERT(ptp == handle2task(ptp->u.alive.handle));
+ erts_io_notify_port_task_executed(ptp->u.alive.handle);
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+}
+
+static ERTS_INLINE void
set_handle(ErtsPortTask *ptp, ErtsPortTaskHandle *pthp)
{
ptp->u.alive.handle = pthp;
@@ -1396,10 +1407,7 @@ erts_port_task_schedule(Eterm id,
erts_aint32_t act, add_flags;
unsigned int prof_runnable_ports;
- if (pthp && erts_port_task_is_scheduled(pthp)) {
- ASSERT(0);
- erts_port_task_abort(pthp);
- }
+ ERTS_LC_ASSERT(!pthp || !erts_port_task_is_scheduled(pthp));
ASSERT(is_internal_port(id));
@@ -1699,8 +1707,6 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
goto aborted_port_task;
}
- reset_handle(ptp);
-
if (erts_system_monitor_long_schedule != 0) {
start_time = erts_timestamp_millis();
}
@@ -1711,6 +1717,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
switch (ptp->type) {
case ERTS_PORT_TASK_TIMEOUT:
+ reset_handle(ptp);
reds = ERTS_PORT_REDS_TIMEOUT;
if (!(state & ERTS_PORT_SFLGS_DEAD)) {
DTRACE_DRIVER(driver_timeout, pp);
@@ -1725,6 +1732,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
+ reset_executed_io_task_handle(ptp);
io_tasks_executed++;
break;
case ERTS_PORT_TASK_OUTPUT:
@@ -1733,6 +1741,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
DTRACE_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
+ reset_executed_io_task_handle(ptp);
io_tasks_executed++;
break;
case ERTS_PORT_TASK_EVENT:
@@ -1742,10 +1751,12 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
(*pp->drv_ptr->event)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event,
ptp->u.alive.td.io.event_data);
+ reset_executed_io_task_handle(ptp);
io_tasks_executed++;
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
+ reset_handle(ptp);
ASSERT((state & ERTS_PORT_SFLGS_DEAD) == 0);
if (!pp->sched.taskq.bpq)
reds = ptp->u.alive.td.psig.callback(pp,
@@ -1763,6 +1774,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
break;
}
case ERTS_PORT_TASK_DIST_CMD:
+ reset_handle(ptp);
reds = erts_dist_command(pp, CONTEXT_REDS - pp->reds);
break;
default:
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index 9ef0cfcedc..406cd3c492 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -156,7 +156,7 @@ erts_port_task_handle_init(ErtsPortTaskHandle *pthp)
ERTS_GLB_INLINE int
erts_port_task_is_scheduled(ErtsPortTaskHandle *pthp)
{
- return ((void *) erts_smp_atomic_read_nob(pthp)) != NULL;
+ return ((void *) erts_smp_atomic_read_acqb(pthp)) != NULL;
}
ERTS_GLB_INLINE void erts_port_task_pre_init_sched(ErtsPortTaskSched *ptsp,
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 7b08e1773a..f84677dea4 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -148,6 +148,12 @@ extern BeamInstr beam_apply[];
extern BeamInstr beam_exit[];
extern BeamInstr beam_continue_exit[];
+#ifdef __OSE__
+/* Eager check I/O not supported on OSE yet. */
+int erts_eager_check_io = 0;
+#else
+int erts_eager_check_io = 0;
+#endif
int erts_sched_compact_load;
int erts_sched_balance_util = 0;
Uint erts_no_schedulers;
@@ -2381,29 +2387,47 @@ try_set_sys_scheduling(void)
#endif
static ERTS_INLINE int
-prepare_for_sys_schedule(ErtsSchedulerData *esdp)
+prepare_for_sys_schedule(ErtsSchedulerData *esdp, int non_blocking)
{
+ if (non_blocking && erts_eager_check_io) {
#ifdef ERTS_SMP
- while (!erts_port_task_have_outstanding_io_tasks()
- && try_set_sys_scheduling()) {
#ifdef ERTS_SCHED_ONLY_POLL_SCHED_1
- if (esdp->no != 1) {
- /* If we are not scheduler 1 and ERTS_SCHED_ONLY_POLL_SCHED_1 is used
- then we make sure to wake scheduler 1 */
- ErtsRunQueue *rq = ERTS_RUNQ_IX(0);
- clear_sys_scheduling();
- wake_scheduler(rq);
- return 0;
- }
+ if (esdp->no != 1) {
+ /* If we are not scheduler 1 and ERTS_SCHED_ONLY_POLL_SCHED_1 is used
+ then we make sure to wake scheduler 1 */
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(0);
+ wake_scheduler(rq);
+ return 0;
+ }
#endif
- if (!erts_port_task_have_outstanding_io_tasks())
+ return try_set_sys_scheduling();
+#else
return 1;
- clear_sys_scheduling();
+#endif
}
- return 0;
+ else {
+#ifdef ERTS_SMP
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+#ifdef ERTS_SCHED_ONLY_POLL_SCHED_1
+ if (esdp->no != 1) {
+ /* If we are not scheduler 1 and ERTS_SCHED_ONLY_POLL_SCHED_1 is used
+ then we make sure to wake scheduler 1 */
+ ErtsRunQueue *rq = ERTS_RUNQ_IX(0);
+ clear_sys_scheduling();
+ wake_scheduler(rq);
+ return 0;
+ }
+#endif
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
#else
- return !erts_port_task_have_outstanding_io_tasks();
+ return !erts_port_task_have_outstanding_io_tasks();
#endif
+ }
}
#ifdef ERTS_SMP
@@ -2780,7 +2804,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
* be waiting in erl_sys_schedule()
*/
- if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule(esdp)) {
+ if (ERTS_SCHEDULER_IS_DIRTY(esdp) || !prepare_for_sys_schedule(esdp, 0)) {
sched_waiting(esdp->no, rq);
@@ -2944,7 +2968,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
* Got to check that we still got I/O tasks; otherwise
* we have to continue checking for I/O...
*/
- if (!prepare_for_sys_schedule(esdp)) {
+ if (!prepare_for_sys_schedule(esdp, 0)) {
spincount *= ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
goto tse_wait;
}
@@ -2966,7 +2990,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
* Got to check that we still got I/O tasks; otherwise
* we have to wait in erl_sys_schedule() after all...
*/
- if (!prepare_for_sys_schedule(esdp)) {
+ if (!prepare_for_sys_schedule(esdp, 0)) {
/*
* Not allowed to wait in erl_sys_schedule;
* do tse wait instead...
@@ -9200,7 +9224,7 @@ Process *schedule(Process *p, int calls)
}
else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) &&
(fcalls > input_reductions &&
- prepare_for_sys_schedule(esdp))) {
+ prepare_for_sys_schedule(esdp, !0))) {
/*
* Schedule system-level activities.
*/
@@ -9208,8 +9232,6 @@ Process *schedule(Process *p, int calls)
erts_smp_atomic32_set_relb(&function_calls, 0);
fcalls = 0;
- ASSERT(!erts_port_task_have_outstanding_io_tasks());
-
#if 0 /* Not needed since we wont wait in sys schedule */
erts_sys_schedule_interrupt(0);
#endif
@@ -9241,7 +9263,9 @@ Process *schedule(Process *p, int calls)
if (RUNQ_READ_LEN(&rq->ports.info.len)) {
int have_outstanding_io;
have_outstanding_io = erts_port_task_execute(rq, &esdp->current_port);
- if ((have_outstanding_io && fcalls > 2*input_reductions)
+ if ((!erts_eager_check_io
+ && have_outstanding_io
+ && fcalls > 2*input_reductions)
|| rq->halt_in_progress) {
/*
* If we have performed more than 2*INPUT_REDUCTIONS since
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 3b0798207e..27a3a3553b 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -104,6 +104,7 @@ struct saved_calls {
};
extern Export exp_send, exp_receive, exp_timeout;
+extern int erts_eager_check_io;
extern int erts_sched_compact_load;
extern int erts_sched_balance_util;
extern Uint erts_no_schedulers;
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 3d8dd9c6d0..c29d4b3777 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -66,8 +66,12 @@
*/
#ifndef ERTS_SYS_FD_TYPE
+#define ERTS_SYS_FD_INVALID ((ErtsSysFdType) -1)
typedef int ErtsSysFdType;
#else
+#ifndef ERTS_SYS_FD_INVALID
+# error missing ERTS_SYS_FD_INVALID
+#endif
typedef ERTS_SYS_FD_TYPE ErtsSysFdType;
#endif
@@ -501,7 +505,7 @@ extern volatile int erts_writing_erl_crash_dump;
# define NO_ERF
# define NO_ERFC
/* This definition doesn't take NaN into account, but matherr() gets those */
-# define finite(x) (fabs(x) != HUGE_VAL)
+# define isfinite(x) (fabs(x) != HUGE_VAL)
# define USE_MATHERR
# define HAVE_FINITE
#endif
@@ -744,6 +748,14 @@ void init_getenv_state(GETENV_STATE *);
char * getenv_string(GETENV_STATE *);
void fini_getenv_state(GETENV_STATE *);
+#define HAVE_ERTS_CHECK_IO_DEBUG
+typedef struct {
+ int no_used_fds;
+ int no_driver_select_structs;
+ int no_driver_event_structs;
+} ErtsCheckIoDebugInfo;
+int erts_check_io_debug(ErtsCheckIoDebugInfo *ip);
+
/* xxxP */
#define SYS_DEFAULT_FLOAT_DECIMALS 20
void init_sys_float(void);
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 55f9e68e78..f20e6e5665 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -48,6 +48,7 @@
#include "erl_sched_spec_pre_alloc.h"
#include "beam_bp.h"
#include "erl_ptab.h"
+#include "erl_check_io.h"
#undef M_TRIM_THRESHOLD
#undef M_TOP_PAD
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 891589d1c5..db8a251fdd 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -4542,11 +4542,13 @@ static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type,
inet_address name;
unsigned int sz = sizeof(name);
- /* check that it is a socket and that the socket is bound */
- if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz)))
- return ctl_error(sock_errno(), rbuf, rsize);
- if (name.sa.sa_family != domain)
- return ctl_error(EINVAL, rbuf, rsize);
+ if (bound) {
+ /* check that it is a socket and that the socket is bound */
+ if (IS_SOCKET_ERROR(sock_name(s, (struct sockaddr*) &name, &sz)))
+ return ctl_error(sock_errno(), rbuf, rsize);
+ if (name.sa.sa_family != domain)
+ return ctl_error(EINVAL, rbuf, rsize);
+ }
#ifdef __OSE__
/* for fdopen duplicating the sd will allow to uniquely identify
the signal from OSE with erlang port */
diff --git a/erts/emulator/drivers/win32/win_efile.c b/erts/emulator/drivers/win32/win_efile.c
index a321bb9641..7e4043fc1b 100644
--- a/erts/emulator/drivers/win32/win_efile.c
+++ b/erts/emulator/drivers/win32/win_efile.c
@@ -1288,6 +1288,10 @@ do_fileinfo(Efile_call_state* state, Efile_info* pInfo,
{
HANDLE handle; /* Handle returned by CreateFile() */
BY_HANDLE_FILE_INFORMATION fileInfo; /* from CreateFile() */
+
+ /* We initialise nNumberOfLinks as GetFileInformationByHandle
+ does not always initialise this field */
+ fileInfo.nNumberOfLinks = 1;
if (handle = CreateFileW(name, GENERIC_READ, FILE_SHARE_FLAGS, NULL,
OPEN_EXISTING, 0, NULL)) {
GetFileInformationByHandle(handle, &fileInfo);
diff --git a/erts/emulator/hipe/hipe_amd64_bifs.m4 b/erts/emulator/hipe/hipe_amd64_bifs.m4
index 0de69a617f..a3219c7586 100644
--- a/erts/emulator/hipe/hipe_amd64_bifs.m4
+++ b/erts/emulator/hipe/hipe_amd64_bifs.m4
@@ -39,7 +39,10 @@ define(HANDLE_GOT_MBUF,`
jmp 2b')
`#if defined(ERTS_ENABLE_LOCK_CHECK) && defined(ERTS_SMP)
-# define CALL_BIF(F) movq $CSYM(F), P_BIF_CALLEE(P); call CSYM(hipe_debug_bif_wrapper)
+# define CALL_BIF(F) \
+ movq CSYM(F)@GOTPCREL(%rip), %r11; \
+ movq %r11, P_BIF_CALLEE(P); \
+ call CSYM(hipe_debug_bif_wrapper)
#else
# define CALL_BIF(F) call CSYM(F)
#endif'
diff --git a/erts/emulator/hipe/hipe_bif0.c b/erts/emulator/hipe/hipe_bif0.c
index 2497d51df1..c9eee2acf2 100644
--- a/erts/emulator/hipe/hipe_bif0.c
+++ b/erts/emulator/hipe/hipe_bif0.c
@@ -1022,7 +1022,7 @@ BIF_RETTYPE hipe_conv_big_to_float(BIF_ALIST_1)
*/
void hipe_emulate_fpe(Process* p)
{
- if (!finite(p->hipe.float_result)) {
+ if (!isfinite(p->hipe.float_result)) {
p->fp_exception = 1;
}
}
diff --git a/erts/emulator/hipe/hipe_mode_switch.c b/erts/emulator/hipe/hipe_mode_switch.c
index 4ddc2790b1..1ae1d17b7f 100644
--- a/erts/emulator/hipe/hipe_mode_switch.c
+++ b/erts/emulator/hipe/hipe_mode_switch.c
@@ -2,7 +2,7 @@
* %CopyrightBegin%
*
- * Copyright Ericsson AB 2001-2013. All Rights Reserved.
+ * Copyright Ericsson AB 2001-2014. All Rights Reserved.
*
* The contents of this file are subject to the Erlang Public License,
* Version 1.1, (the "License"); you may not use this file except in
@@ -187,6 +187,9 @@ void hipe_set_call_trap(Uint *bfun, void *nfun, int is_closure)
void hipe_reserve_beam_trap_frame(Process *p, Eterm reg[], unsigned arity)
{
+ if (!hipe_bifcall_from_native_is_recursive(p))
+ return;
+
/* ensure that at least 2 words are available on the BEAM stack */
if ((p->stop - 2) < p->htop) {
DPRINTF("calling gc to reserve BEAM stack size");
@@ -195,25 +198,26 @@ void hipe_reserve_beam_trap_frame(Process *p, Eterm reg[], unsigned arity)
}
p->stop -= 2;
p->stop[0] = NIL;
- p->stop[1] = NIL;
+ p->stop[1] = hipe_beam_catch_throw;
}
static __inline__ void
hipe_push_beam_trap_frame(Process *p, Eterm reg[], unsigned arity)
{
- if (p->flags & F_DISABLE_GC) {
+ if (&p->stop[1] < p->hend && p->stop[1] == hipe_beam_catch_throw) {
/* Trap frame already reserved */
- ASSERT(p->stop[0] == NIL && p->stop[1] == NIL);
+ ASSERT(p->stop[0] == NIL);
}
else {
+ ASSERT(!(p->flags & F_DISABLE_GC));
if ((p->stop - 2) < p->htop) {
DPRINTF("calling gc to increase BEAM stack size");
p->fcalls -= erts_garbage_collect(p, 2, reg, arity);
ASSERT(!((p->stop - 2) < p->htop));
}
p->stop -= 2;
+ p->stop[1] = hipe_beam_catch_throw;
}
- p->stop[1] = hipe_beam_catch_throw;
p->stop[0] = make_cp(p->cp);
++p->catches;
p->cp = hipe_beam_pc_return;
@@ -221,12 +225,16 @@ hipe_push_beam_trap_frame(Process *p, Eterm reg[], unsigned arity)
void hipe_unreserve_beam_trap_frame(Process *p)
{
- ASSERT(p->stop[0] == NIL && p->stop[1] == NIL);
+ if (!hipe_bifcall_from_native_is_recursive(p))
+ return;
+
+ ASSERT(p->stop[0] == NIL && p->stop[1] == hipe_beam_catch_throw);
p->stop += 2;
}
static __inline__ void hipe_pop_beam_trap_frame(Process *p)
{
+ ASSERT(p->stop[1] == hipe_beam_catch_throw);
p->cp = cp_val(p->stop[0]);
--p->catches;
p->stop += 2;
diff --git a/erts/emulator/hipe/hipe_risc_glue.h b/erts/emulator/hipe/hipe_risc_glue.h
index cc2671c016..dbb7086dae 100644
--- a/erts/emulator/hipe/hipe_risc_glue.h
+++ b/erts/emulator/hipe/hipe_risc_glue.h
@@ -214,6 +214,14 @@ hipe_trap_from_native_is_recursive(Process *p)
return 0;
}
+/* Native called BIF. Is it a recursive call?
+ i.e should we return back to native when BIF is done? */
+static __inline__ int
+hipe_bifcall_from_native_is_recursive(Process *p)
+{
+ return (p->hipe.nra != (void(*)(void))&nbif_return);
+}
+
/* Native makes a call which needs to unload the parameters.
This differs from hipe_call_from_native_is_recursive() in
diff --git a/erts/emulator/hipe/hipe_x86_glue.h b/erts/emulator/hipe/hipe_x86_glue.h
index 63ad250d60..4b6e495b9a 100644
--- a/erts/emulator/hipe/hipe_x86_glue.h
+++ b/erts/emulator/hipe/hipe_x86_glue.h
@@ -207,6 +207,14 @@ hipe_trap_from_native_is_recursive(Process *p)
return 0;
}
+/* Native called BIF. Is it a recursive call?
+ i.e should we return back to native when BIF is done? */
+static __inline__ int
+hipe_bifcall_from_native_is_recursive(Process *p)
+{
+ return (*p->hipe.nsp != (Eterm)nbif_return);
+}
+
/* Native makes a call which needs to unload the parameters.
This differs from hipe_call_from_native_is_recursive() in
diff --git a/erts/emulator/internal_doc/CarrierMigration.md b/erts/emulator/internal_doc/CarrierMigration.md
index b93c11c6ec..7afdb70aef 100644
--- a/erts/emulator/internal_doc/CarrierMigration.md
+++ b/erts/emulator/internal_doc/CarrierMigration.md
@@ -146,28 +146,53 @@ Since the carrier has been unlinked from the data structure of
available free blocks, no more allocations will be made in the
carrier. The allocator instance putting the carrier into the pool,
however, still has the responsibility of performing deallocations in
-it while it remains in the pool.
+it while it remains in the pool. The allocator instance with this
+deallocation responsibility is here called the **employer**.
-Each carrier has a flag field containing information about allocator
-instance owning the carrier, a flag indicating if the carrier is in
+Each carrier has a flag field containing information about the
+employing allocator instance, a flag indicating if the carrier is in
the pool or not, and a flag indicating if it is busy or not. When the
-carrier is in the pool, the owning allocator instance needs to mark it
+carrier is in the pool, the employing allocator instance needs to mark it
as busy while operating on it. If another thread inspects it in order
-to try to fetch it from the pool, it will abort the fetch if it is
-busy. When fetching the carrier from the pool, ownership will changed
-and further deallocations in the carrier will be redirected to the new
-owner using the delayed dealloc functionality.
+to try to fetch it from the pool, it will skip it if it is busy. When
+fetching the carrier from the pool, employment will change and further
+deallocations in the carrier will be redirected to the new
+employer using the delayed dealloc functionality.
If a carrier in the pool becomes empty, it will be withdrawn from the
pool. All carriers that become empty are also always passed to its
-originating allocator instance for deallocation using the delayed
+**owning** allocator instance for deallocation using the delayed
dealloc functionality. Since carriers this way always will be
-deallocated by the allocator instance that allocated the carrier the
+deallocated by the owner, that allocated the carrier, the
underlying functionality of allocating and deallocating carriers can
remain simple and doesn't have to bother about multiple threads. In a
NUMA system we will also not mix carriers originating from multiple
NUMA nodes.
+In short:
+
+* The allocator instance that created a carrier **owns** it.
+* An empty carrier is always deallocated by its **owner**.
+* **Ownership** never changes.
+* The allocator instance that uses a carrier **employs** it.
+* An **employer** can abandon a carrier into the pool.
+* Pooled carriers are not allocated from.
+* Deallocation in a pooled carrier is still performed by its **employer**.
+* **Employment** can only change when a carrier is fetched from the pool.
+
+### Searching the pool ###
+
+To harbor real time characteristics, searching the pool is
+limited. We only inspect a limited number of carriers. If none of
+those carriers had a free block large enough to satisfy the allocation
+request, the search will fail. A carrier in the pool can also be busy,
+if another thread is currently doing block deallocation work on the
+carrier. A busy carrier will also be skipped by the search as it can
+not satisfy the request. The pool is lock free and we do not want to
+block, waiting for the other thread to finish.
+
+#### Before OTP 17.4 ####
+
When an allocator instance needs more carrier space, it always begins
by inspecting its own carriers that are waiting for thread progress
before they can be deallocated. If no such carrier could be found, it
@@ -176,6 +201,65 @@ it will allocate a new carrier. Regardless of where the allocator
instance gets the carrier from it the just links in the carrier into
its data structure of free blocks.
+#### After OTP 17.4 ####
+
+The old search algorithm had a problem as the search always started at
+the same position in the pool, the sentinel. This could lead to
+contention from concurrent searching processes. But even worse, it
+could lead to a "bad" state when searches fail with a high rate
+leading to new carriers instead being allocated. These new carriers
+may later be inserted into the pool due to bad utilization. If the
+frequency of insertions into the pool is higher than successful
+fetching from the pool, memory will eventually get exhausted.
+
+This "bad" state, consist of a cluster of small and/or highly
+fragmented carriers located at the sentinel in the pool. The largest free
+block in such a "bad" carrier is rather small, making it not able to satisfy
+most allocation requests. As the search always started at the
+sentinel, any such "bad" carriers that had been left in the pool would
+eventually cluster together at the sentinel. All searches first
+have to skip past this cluster of "bad" carriers to reach a "good"
+carrier. When the cluster gets to the same size as the search limit,
+all searches will essentially fail.
+
+To counter the "bad cluster" problem and also ease the contention, the
+search will now always start by first looking at the allocators **own**
+carriers. That is, carriers that were initially created by the
+allocator itself and later had been abandoned to the pool. If none of
+our own abandoned carrier would do, then the search continues into the
+pool, as before, to look for carriers created by other
+allocators. However, if we have at least one abandoned carrier of our
+own, that could not satisfy the request, we can use that as entry point
+into the pool.
+
+The result is that we prefer carriers created by the thread itself,
+which is good for NUMA performance. And we get more entry points when
+searching the pool, which will ease contention and clustering.
+
+To do the first search among own carriers, every allocator instance
+has two new lists; `pooled_list` and `traitor_list`. These lists are only
+accessed by the allocator itself and they only contain the allocators
+own carriers. When an owned carrier is abandoned and put in the
+pool, it is also linked into `pooled_list`. When we search our
+`pooled_list` and find a carrier that is no longer in the pool, we
+move that carrier from `pooled_list` to `traitor_list` as it is now
+employed by another allocator. If searching `pooled_list` fails, we
+also do a limited search of `traitor_list`. When finding an abandoned
+carrier in `traitor_list` it is either employed, or moved back to
+`pooled_list` if it could not satisfy the allocation request.
+
+When searching `pooled_list` and `traitor_list` we always start at the
+point where the last search ended. This to avoid clustering
+problems and increase the probability to find a "good" carrier. As
+`pooled_list` and `traitor_list` are only accessed by the owning
+allocator instance, they need no thread synchronization at all.
+
+Furthermore, the search for own carriers that are scheduled
+for deallocation is now done as the last search option. The idea is
+that it is better to reuse a poorly utilized carrier, than to
+resurrect an empty carrier that was just about to be released back to
+the OS.
+
### Result ###
The use of this strategy of abandoning carriers with poor utilization
diff --git a/erts/emulator/internal_doc/SuperCarrier.md b/erts/emulator/internal_doc/SuperCarrier.md
new file mode 100644
index 0000000000..0ad6af41de
--- /dev/null
+++ b/erts/emulator/internal_doc/SuperCarrier.md
@@ -0,0 +1,191 @@
+Super Carrier
+=============
+
+A super carrier is large memory area, allocated at VM start, which can
+be used during runtime to allocate normal carriers from.
+
+The super carrier feature was introduced in OTP R16B03. It is
+enabled with command line option +MMscs <size in Mb>
+and can be configured with other options.
+
+Problem
+-------
+
+The initial motivation for this feature was customers asking for a way
+to pre-allocate physcial memory at VM start for it to use.
+
+Other problems were different experienced limitations of the OS
+implementation of mmap:
+
+* Increasingly bad performance of mmap/munmap as the number of mmap'ed areas grow.
+* Fragmentation problem between mmap'ed areas.
+
+A third problem was management of low memory in the halfword
+emulator. The implementation used a naive linear search structure to
+hold free segments which would lead to poor performance when
+fragmentation increased.
+
+
+Solution
+--------
+
+Allocate one large continious area of address space at VM start and
+then use that area to satisfy our dynamic memory need during
+runtime. In other words: implement our own mmap.
+
+### Use cases ###
+
+If command line option +MMscrpm (Reserve Physical Memory) is set to
+false, only virtual space is allocated for the super carrier from
+start. The super carrier then acts as an "alternative mmap" implementation
+without changing the consumption of physical memory pages. Physical
+pages will be reserved on demand when an allocation is done from the super
+carrier and be unreserved when the memory is released back to the
+super carrier.
+
+If +MMscrpm is set to true, which is default, the initial allocation
+will reserve physical memory for the entire super carrier. This can be
+used by users that want to ensure a certain *minimum* amount of
+physical memory for the VM.
+
+However, what reservation of physical memory actually means highly
+depends on the operating system, and how it is configured. For
+example, different memory overcommit settings on Linux drastically
+change the behaviour.
+
+A third feature is to have the super carrier limit the *maximum*
+amount of memory used by the VM. If +MMsco (Super Carrier Only) is set
+to true, which is default, allocations will only be done from the
+super carrier. When the super carrier gets full, the VM will fail due
+to out of memory.
+If +MMsco is false, allocations will use mmap directly if the super
+carrier is full.
+
+
+
+### Implementation ###
+
+The entire super carrier implementation is kept in erl_mmap.c. The
+name suggest that it can be viewed as our own mmap implementation.
+
+A super carrier needs to satisfy two slightly different kinds of
+allocation requests; multi block carriers (MBC) and single block
+carriers (SBC). They are both rather large blocks of continious
+memory, but MBCs and SBCs have different demands on alignment and
+size.
+
+SBCs can have arbitrary size and do only need minimum 8-byte
+alignment.
+
+MBCs are more restricted. They can only have a number of fixed
+sizes that are powers of 2. The start address need to have a very
+large aligment (currently 256 kb, called "super alignment"). This is a
+design choice that allows very low overhead per allocated block in the
+MBC.
+
+To reduce fragmentation within the super carrier, it is good to keep SBCs
+and MBCs apart. MBCs with their uniform alignment and sizes can be
+packed very efficiently together. SBCs without demand for aligment can
+also be allocated quite efficiently together. But mixing them can lead
+to a lot of memory wasted when we need to create large holes of
+padding to the next alignment limit.
+
+The super carrier thus contains two areas. One area for MBCs growing from
+the bottom and up. And one area for SBCs growing from the top and
+down. Like a process with a heap and a stack growing towards each
+other.
+
+
+### Data structures ###
+
+The MBC area is called **sa** as in super aligned and the SBC area is
+called **sua** as in super un-aligned.
+
+Note that the "super" in super alignment and the "super" in super
+carrier has nothing to do with each other. We could have choosen
+another naming to avoid confusion, such as "meta" carrier or "giant"
+aligment.
+
+ +-------+ <---- sua.top
+ | sua |
+ | |
+ |-------| <---- sua.bot
+ | |
+ | |
+ | |
+ |-------| <---- sa.top
+ | |
+ | sa |
+ | |
+ +-------+ <---- sa.bot
+
+
+When a carrier is deallocated a free memory segment will be created
+inside the corresponding area, unless the carrier was at the very top
+(in `sa`) or bottom (in `sua`) in which case the area will just shrink
+down or up.
+
+We need to keep track of all the free segments in order to reuse them
+for new carrier allocations. One initial idea was to use the same
+mechanism that is used to keep track of free blocks within MBCs
+(alloc_util and the different strategies). However, that would not be
+as straight forward as one can think and can also waste quite a lot of
+memory as it uses prepended block headers. The granularity of the
+super carrier is one memory page (usually 4kb). We want to allocate
+and free entire pages and we don't want to waste an entire page just
+to hold the block header of the following pages.
+
+Instead we store the meta information about all the free segments in a
+dedicated area apart from the `sa` and `sua` areas. Every free segment is
+represented by a descriptor struct (`ErtsFreeSegDesc`).
+
+ typedef struct {
+ RBTNode snode; /* node in 'stree' */
+ RBTNode anode; /* node in 'atree' */
+ char* start;
+ char* end;
+ }ErtsFreeSegDesc;
+
+To find the smallest free segment that will satisfy a carrier allocation
+(best fit), the free segments are organized in a tree sorted by
+size (`stree`). We search in this tree at allocation. If no free segment of
+sufficient size was found, the area (`sa` or `sua`) is instead expanded.
+If two or more free segments with equal size exist, the one at lowest
+address is choosen for `sa` and highest address for `sua`.
+
+At carrier deallocation, we want to coalesce with any adjacent free
+segments, to form one large free segment. To do that, all free
+segments are also organized in a tree sorted in address order (`atree`).
+
+So, in total we keep four trees of free descriptors for the super
+carrier; two for `sa` and two for `sua`. They all use the same
+red-black-tree implementation that support the different sorting
+orders used.
+
+When allocating a new MBC we first search after a free segment in `sa`,
+then try to raise `sa.top`, and then as a fallback try to search after a
+free segment in `sua`. When an MBC is allocated in `sua`, a larger segment
+is allocated which is then trimmed to obtain the right
+alignment. Allocation search for an SBC is done in reverse order. When
+an SBC is allocated in `sa`, the size is aligned up to super aligned
+size.
+
+### The free descriptor area ###
+
+As mentioned above, the descriptors for the free segments are
+allocated in a separate area. This area has a constant configurable
+size (+MMscrfsd) that defaults to 65536 descriptors. This should be
+more than enough in most cases. If the descriptors area should fill up,
+new descriptor areas will be allocated first directly from the OS, and
+then from `sua` and `sa` in the super carrier, and lastly from the memory
+segment itself which is being deallocated. Allocating free descriptor
+areas from the super carrier is only a last resort, and should be
+avoided, as it creates fragmentation.
+
+### Halfword emulator ###
+
+The halfword emulator uses the super carrier implementation to manage
+its low memory mappings thar are needed for all term storage. The
+super carrier can here not be configured by command line options. One
+could imagine a second configurable instance of the super carrier used
+by high memory allocation, but that has not been implemented.
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 1db673e7f3..81cb5dc4bb 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -52,8 +52,17 @@ typedef char EventStateType;
#define ERTS_EV_TYPE_STOP_USE ((EventStateType) 3) /* pending stop_select */
typedef char EventStateFlags;
-#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+#define ERTS_EV_FLAG_USED ((EventStateFlags) 1) /* ERL_DRV_USE has been turned on */
+#define ERTS_EV_FLAG_DEFER_IN_EV ((EventStateFlags) 2)
+#define ERTS_EV_FLAG_DEFER_OUT_EV ((EventStateFlags) 4)
+#ifdef DEBUG
+# define ERTS_ACTIVE_FD_INC 2
+#else
+# define ERTS_ACTIVE_FD_INC 128
+#endif
+
+#define ERTS_CHECK_IO_POLL_RES_LEN 512
#if defined(ERTS_KERNEL_POLL_VERSION)
# define ERTS_CIO_EXPORT(FUNC) FUNC ## _kp
@@ -67,6 +76,7 @@ typedef char EventStateFlags;
(ERTS_POLL_USE_POLL && !ERTS_POLL_USE_KERNEL_POLL)
#define ERTS_CIO_POLL_CTL ERTS_POLL_EXPORT(erts_poll_control)
+#define ERTS_CIO_POLL_CTLV ERTS_POLL_EXPORT(erts_poll_controlv)
#define ERTS_CIO_POLL_WAIT ERTS_POLL_EXPORT(erts_poll_wait)
#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
#define ERTS_CIO_POLL_AS_INTR ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)
@@ -85,6 +95,13 @@ static struct pollset_info
{
ErtsPollSet ps;
erts_smp_atomic_t in_poll_wait; /* set while doing poll */
+ struct {
+ int six; /* start index */
+ int eix; /* end index */
+ erts_smp_atomic32_t no;
+ int size;
+ ErtsSysFdType *array;
+ } active_fd;
#ifdef ERTS_SMP
struct removed_fd* removed_list; /* list of deselected fd's*/
erts_smp_spinlock_t removed_list_lock;
@@ -97,9 +114,11 @@ typedef struct {
SafeHashBucket hb;
#endif
ErtsSysFdType fd;
- union {
- ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+ struct {
ErtsDrvSelectDataState *select; /* ERTS_EV_TYPE_DRV_SEL */
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *event; /* ERTS_EV_TYPE_DRV_EV */
+#endif
erts_driver_t* drv_ptr; /* ERTS_EV_TYPE_STOP_USE */
} driver;
ErtsPollEvents events;
@@ -169,6 +188,10 @@ static ERTS_INLINE ErtsDrvEventState* hash_new_drv_ev_state(ErtsSysFdType fd)
ErtsDrvEventState tmpl;
tmpl.fd = fd;
tmpl.driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ tmpl.driver.event = NULL;
+#endif
+ tmpl.driver.drv_ptr = NULL;
tmpl.events = 0;
tmpl.remove_cnt = 0;
tmpl.type = ERTS_EV_TYPE_NONE;
@@ -209,6 +232,65 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(removed_fd, struct removed_fd, 64, ERTS_ALC_T_F
#endif
static ERTS_INLINE void
+init_iotask(ErtsIoTask *io_task)
+{
+ erts_port_task_handle_init(&io_task->task);
+ erts_smp_atomic_init_nob(&io_task->executed_time, ~((erts_aint_t) 0));
+}
+
+static ERTS_INLINE int
+is_iotask_active(ErtsIoTask *io_task, erts_aint_t current_cio_time)
+{
+ if (erts_port_task_is_scheduled(&io_task->task))
+ return 1;
+ if (erts_smp_atomic_read_nob(&io_task->executed_time) == current_cio_time)
+ return 1;
+ return 0;
+}
+
+static ERTS_INLINE ErtsDrvSelectDataState *
+alloc_drv_select_data(void)
+{
+ ErtsDrvSelectDataState *dsp = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
+ sizeof(ErtsDrvSelectDataState));
+ dsp->inport = NIL;
+ dsp->outport = NIL;
+ init_iotask(&dsp->iniotask);
+ init_iotask(&dsp->outiotask);
+ return dsp;
+}
+
+static ERTS_INLINE void
+free_drv_select_data(ErtsDrvSelectDataState *dsp)
+{
+ ASSERT(!erts_port_task_is_scheduled(&dsp->iniotask.task));
+ ASSERT(!erts_port_task_is_scheduled(&dsp->outiotask.task));
+ erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, dsp);
+}
+
+static ERTS_INLINE ErtsDrvEventDataState *
+alloc_drv_event_data(void)
+{
+ ErtsDrvEventDataState *dep = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
+ sizeof(ErtsDrvEventDataState));
+ dep->port = NIL;
+ dep->data = NULL;
+ dep->removed_events = 0;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ dep->deferred_events = 0;
+#endif
+ init_iotask(&dep->iotask);
+ return dep;
+}
+
+static ERTS_INLINE void
+free_drv_event_data(ErtsDrvEventDataState *dep)
+{
+ ASSERT(!erts_port_task_is_scheduled(&dep->iotask.task));
+ erts_free(ERTS_ALC_T_DRV_EV_D_STATE, dep);
+}
+
+static ERTS_INLINE void
remember_removed(ErtsDrvEventState *state, struct pollset_info* psi)
{
#ifdef ERTS_SMP
@@ -288,7 +370,7 @@ forget_removed(struct pollset_info* psi)
drv_ptr = state->driver.drv_ptr;
ASSERT(drv_ptr);
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
state->driver.drv_ptr = NULL;
/* Fall through */
case ERTS_EV_TYPE_NONE:
@@ -345,6 +427,10 @@ grow_drv_ev_state(int min_ix)
for (i = erts_smp_atomic_read_nob(&drv_ev_state_len); i < new_len; i++) {
drv_ev_state[i].fd = (ErtsSysFdType) i;
drv_ev_state[i].driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ drv_ev_state[i].driver.event = NULL;
+#endif
+ drv_ev_state[i].driver.drv_ptr = NULL;
drv_ev_state[i].events = 0;
drv_ev_state[i].remove_cnt = 0;
drv_ev_state[i].type = ERTS_EV_TYPE_NONE;
@@ -365,11 +451,7 @@ grow_drv_ev_state(int min_ix)
static ERTS_INLINE void
abort_task(Eterm id, ErtsPortTaskHandle *pthp, EventStateType type)
{
- if (is_nil(id)) {
- ASSERT(type == ERTS_EV_TYPE_NONE
- || !erts_port_task_is_scheduled(pthp));
- }
- else if (erts_port_task_is_scheduled(pthp)) {
+ if (is_not_nil(id) && erts_port_task_is_scheduled(pthp)) {
erts_port_task_abort(pthp);
ASSERT(erts_is_port_alive(id));
}
@@ -384,7 +466,7 @@ abort_tasks(ErtsDrvEventState *state, int mode)
#if ERTS_CIO_HAVE_DRV_EVENT
case ERTS_EV_TYPE_DRV_EV:
abort_task(state->driver.event->port,
- &state->driver.event->task,
+ &state->driver.event->iotask.task,
ERTS_EV_TYPE_DRV_EV);
return;
#endif
@@ -398,14 +480,14 @@ abort_tasks(ErtsDrvEventState *state, int mode)
case ERL_DRV_WRITE:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->outport,
- &state->driver.select->outtask,
+ &state->driver.select->outiotask.task,
state->type);
if (mode == ERL_DRV_WRITE)
break;
case ERL_DRV_READ:
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
abort_task(state->driver.select->inport,
- &state->driver.select->intask,
+ &state->driver.select->iniotask.task,
state->type);
break;
default:
@@ -443,16 +525,14 @@ deselect(ErtsDrvEventState *state, int mode)
if (!(state->events)) {
switch (state->type) {
case ERTS_EV_TYPE_DRV_SEL:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
+ state->driver.select->inport = NIL;
+ state->driver.select->outport = NIL;
break;
#if ERTS_CIO_HAVE_DRV_EVENT
case ERTS_EV_TYPE_DRV_EV:
- ASSERT(!erts_port_task_is_scheduled(&state->driver.event->task));
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
+ state->driver.event->port = NIL;
+ state->driver.event->data = NULL;
+ state->driver.event->removed_events = (ErtsPollEvents) 0;
break;
#endif
case ERTS_EV_TYPE_NONE:
@@ -462,20 +542,297 @@ deselect(ErtsDrvEventState *state, int mode)
break;
}
- state->driver.select = NULL;
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
remember_removed(state, &pollset);
}
}
-
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
# define IS_FD_UNKNOWN(state) ((state)->type == ERTS_EV_TYPE_NONE && (state)->remove_cnt == 0)
#else
# define IS_FD_UNKNOWN(state) ((state) == NULL)
#endif
+static ERTS_INLINE void
+check_fd_cleanup(ErtsDrvEventState *state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState **free_event,
+#endif
+ ErtsDrvSelectDataState **free_select)
+{
+ erts_aint_t current_cio_time;
+
+ ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(fd_mtx(state->fd)));
+
+ current_cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ *free_select = NULL;
+ if (state->driver.select
+ && (state->type != ERTS_EV_TYPE_DRV_SEL)
+ && !is_iotask_active(&state->driver.select->iniotask, current_cio_time)
+ && !is_iotask_active(&state->driver.select->outiotask, current_cio_time)) {
+
+ *free_select = state->driver.select;
+ state->driver.select = NULL;
+ }
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ *free_event = NULL;
+ if (state->driver.event
+ && (state->type != ERTS_EV_TYPE_DRV_EV)
+ && !is_iotask_active(&state->driver.event->iotask, current_cio_time)) {
+
+ *free_event = state->driver.event;
+ state->driver.event = NULL;
+ }
+#endif
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (((state->type != ERTS_EV_TYPE_NONE)
+ | state->remove_cnt
+#if ERTS_CIO_HAVE_DRV_EVENT
+ | (state->driver.event != NULL)
+#endif
+ | (state->driver.select != NULL)) == 0) {
+
+ hash_erase_drv_ev_state(state);
+
+ }
+#endif
+}
+
+static ERTS_INLINE int
+check_cleanup_active_fd(ErtsSysFdType fd,
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollControlEntry *pce,
+ int *pce_ix,
+#endif
+ erts_aint_t current_cio_time)
+{
+ ErtsDrvEventState *state;
+ int active = 0;
+ erts_smp_mtx_t *mtx = fd_mtx(fd);
+ void *free_select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ void *free_event = NULL;
+#endif
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents evon = 0, evoff = 0;
+#endif
+
+ erts_smp_mtx_lock(mtx);
+
+#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ state = &drv_ev_state[(int) fd];
+#else
+ state = hash_get_drv_ev_state(fd); /* may be NULL! */
+ if (state)
+#endif
+ {
+ if (state->driver.select) {
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)) {
+ active = 1;
+ if ((state->events & ERTS_POLL_EV_IN)
+ && !(state->flags & ERTS_EV_FLAG_DEFER_IN_EV)) {
+ evoff |= ERTS_POLL_EV_IN;
+ state->flags |= ERTS_EV_FLAG_DEFER_IN_EV;
+ }
+ }
+ else if (state->flags & ERTS_EV_FLAG_DEFER_IN_EV) {
+ if (state->events & ERTS_POLL_EV_IN)
+ evon |= ERTS_POLL_EV_IN;
+ state->flags &= ~ERTS_EV_FLAG_DEFER_IN_EV;
+ }
+ if (is_iotask_active(&state->driver.select->outiotask, current_cio_time)) {
+ active = 1;
+ if ((state->events & ERTS_POLL_EV_OUT)
+ && !(state->flags & ERTS_EV_FLAG_DEFER_OUT_EV)) {
+ evoff |= ERTS_POLL_EV_OUT;
+ state->flags |= ERTS_EV_FLAG_DEFER_OUT_EV;
+ }
+ }
+ else if (state->flags & ERTS_EV_FLAG_DEFER_OUT_EV) {
+ if (state->events & ERTS_POLL_EV_OUT)
+ evon |= ERTS_POLL_EV_OUT;
+ state->flags &= ~ERTS_EV_FLAG_DEFER_OUT_EV;
+ }
+ if (active)
+ (void) 0;
+ else
+#else
+ if (is_iotask_active(&state->driver.select->iniotask, current_cio_time)
+ || is_iotask_active(&state->driver.select->outiotask, current_cio_time))
+ active = 1;
+ else
+#endif
+ if (state->type != ERTS_EV_TYPE_DRV_SEL) {
+ free_select = state->driver.select;
+ state->driver.select = NULL;
+ }
+ }
+
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event) {
+ if (is_iotask_active(&state->driver.event->iotask, current_cio_time)) {
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents evs = state->events & ~state->driver.event->deferred_events;
+ if (evs) {
+ evoff |= evs;
+ state->driver.event->deferred_events |= evs;
+ }
+#endif
+ active = 1;
+ }
+ else if (state->type != ERTS_EV_TYPE_DRV_EV) {
+ free_event = state->driver.event;
+ state->driver.event = NULL;
+ }
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ else {
+ ErtsPollEvents evs = state->events & state->driver.event->deferred_events;
+ if (evs) {
+ evon |= evs;
+ state->driver.event->deferred_events = 0;
+ }
+ }
+#endif
+
+ }
+#endif
+
+#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
+ if (((state->type != ERTS_EV_TYPE_NONE) | state->remove_cnt | active) == 0)
+ hash_erase_drv_ev_state(state);
+#endif
+
+ }
+
+ erts_smp_mtx_unlock(mtx);
+
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
+
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ if (evoff) {
+ ErtsPollControlEntry *pcep = &pce[(*pce_ix)++];
+ pcep->fd = fd;
+ pcep->events = evoff;
+ pcep->on = 0;
+ }
+ if (evon) {
+ ErtsPollControlEntry *pcep = &pce[(*pce_ix)++];
+ pcep->fd = fd;
+ pcep->events = evon;
+ pcep->on = 1;
+ }
+#endif
+
+ return active;
+}
+
+static void
+check_cleanup_active_fds(erts_aint_t current_cio_time)
+{
+ int six = pollset.active_fd.six;
+ int eix = pollset.active_fd.eix;
+ erts_aint32_t no = erts_smp_atomic32_read_dirty(&pollset.active_fd.no);
+ int size = pollset.active_fd.size;
+ int ix = six;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ /* every fd might add two entries */
+ Uint pce_sz = 2*sizeof(ErtsPollControlEntry)*no;
+ ErtsPollControlEntry *pctrl_entries = (pce_sz
+ ? erts_alloc(ERTS_ALC_T_TMP, pce_sz)
+ : NULL);
+ int pctrl_ix = 0;
+#endif
+
+ while (ix != eix) {
+ ErtsSysFdType fd = pollset.active_fd.array[ix];
+ int nix = ix + 1;
+ if (nix >= size)
+ nix = 0;
+ ASSERT(fd != ERTS_SYS_FD_INVALID);
+ if (!check_cleanup_active_fd(fd,
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ pctrl_entries,
+ &pctrl_ix,
+#endif
+ current_cio_time)) {
+ no--;
+ if (ix == six) {
+#ifdef DEBUG
+ pollset.active_fd.array[ix] = ERTS_SYS_FD_INVALID;
+#endif
+ six = nix;
+ }
+ else {
+ pollset.active_fd.array[ix] = pollset.active_fd.array[six];
+#ifdef DEBUG
+ pollset.active_fd.array[six] = ERTS_SYS_FD_INVALID;
+#endif
+ six++;
+ if (six >= size)
+ six = 0;
+ }
+ }
+ ix = nix;
+ }
+
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ASSERT(pctrl_ix <= pce_sz/sizeof(ErtsPollControlEntry));
+ if (pctrl_ix)
+ ERTS_CIO_POLL_CTLV(pollset.ps, pctrl_entries, pctrl_ix);
+ if (pctrl_entries)
+ erts_free(ERTS_ALC_T_TMP, pctrl_entries);
+#endif
+
+ pollset.active_fd.six = six;
+ pollset.active_fd.eix = eix;
+ erts_smp_atomic32_set_relb(&pollset.active_fd.no, no);
+}
+
+static ERTS_INLINE void
+add_active_fd(ErtsSysFdType fd)
+{
+ int eix = pollset.active_fd.eix;
+ int size = pollset.active_fd.size;
+
+
+ pollset.active_fd.array[eix] = fd;
+
+ erts_smp_atomic32_set_relb(&pollset.active_fd.no,
+ (erts_smp_atomic32_read_dirty(&pollset.active_fd.no)
+ + 1));
+
+ eix++;
+ if (eix >= size)
+ eix = 0;
+ if (pollset.active_fd.six == eix) {
+ pollset.active_fd.six = 0;
+ eix = size;
+ size += ERTS_ACTIVE_FD_INC;
+ pollset.active_fd.array = erts_realloc(ERTS_ALC_T_ACTIVE_FD_ARR,
+ pollset.active_fd.array,
+ sizeof(ErtsSysFdType)*size);
+ pollset.active_fd.size = size;
+#ifdef DEBUG
+ {
+ int i;
+ for (i = eix + 1; i < size; i++)
+ pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID;
+ }
+#endif
+
+ }
+
+ pollset.active_fd.eix = eix;
+}
int
ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
@@ -492,6 +849,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
ErtsDrvEventState *state;
int wake_poller;
int ret;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *free_event = NULL;
+#endif
+ ErtsDrvSelectDataState *free_select = NULL;
#ifdef USE_VM_PROBES
DTRACE_CHARBUF(name, 64);
#endif
@@ -593,9 +954,9 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
if (state->type == ERTS_EV_TYPE_DRV_SEL && !state->events) {
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE, state->driver.select);
- state->driver.select = NULL;
+ state->flags &= ~ERTS_EV_FLAG_USED;
+ state->driver.select->inport = NIL;
+ state->driver.select->outport = NIL;
}
ret = -1;
goto done;
@@ -613,18 +974,10 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
state->events = new_events;
if (ctl_events) {
if (on) {
- if (state->type == ERTS_EV_TYPE_NONE) {
- ErtsDrvSelectDataState *dsdsp
- = erts_alloc(ERTS_ALC_T_DRV_SEL_D_STATE,
- sizeof(ErtsDrvSelectDataState));
- dsdsp->inport = NIL;
- dsdsp->outport = NIL;
- erts_port_task_handle_init(&dsdsp->intask);
- erts_port_task_handle_init(&dsdsp->outtask);
- ASSERT(state->driver.select == NULL);
- state->driver.select = dsdsp;
+ if (!state->driver.select)
+ state->driver.select = alloc_drv_select_data();
+ if (state->type == ERTS_EV_TYPE_NONE)
state->type = ERTS_EV_TYPE_DRV_SEL;
- }
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL);
if (ctl_events & ERTS_POLL_EV_IN)
state->driver.select->inport = id;
@@ -645,17 +998,12 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
state->driver.select->outport = NIL;
}
if (new_events == 0) {
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->intask));
- ASSERT(!erts_port_task_is_scheduled(&state->driver.select->outtask));
if (old_events != 0) {
remember_removed(state, &pollset);
}
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
- erts_free(ERTS_ALC_T_DRV_SEL_D_STATE,
- state->driver.select);
- state->driver.select = NULL;
+ state->flags &= ~ERTS_EV_FLAG_USED;
}
/*else keep it, as fd will probably be selected upon again */
}
@@ -686,13 +1034,15 @@ ERTS_CIO_EXPORT(driver_select)(ErlDrvPort ix,
ret = 0;
-done:;
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
- }
+done:
+
+ check_fd_cleanup(state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ &free_event,
#endif
-done_unknown:
+ &free_select);
+
+done_unknown:
erts_smp_mtx_unlock(fd_mtx(fd));
if (stop_select_fn) {
int was_unmasked = erts_block_fpe();
@@ -700,6 +1050,12 @@ done_unknown:
(*stop_select_fn)(e, NULL);
erts_unblock_fpe(was_unmasked);
}
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
return ret;
}
@@ -719,6 +1075,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
ErtsDrvEventState *state;
int do_wake = 0;
int ret;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ ErtsDrvEventDataState *free_event;
+#endif
+ ErtsDrvSelectDataState *free_select;
Port *prt = erts_drvport2port(ix);
if (prt == ERTS_INVALID_ERL_DRV_PORT)
@@ -799,10 +1159,8 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
state->driver.event->removed_events |= remove_events;
}
else {
- state->driver.event
- = erts_alloc(ERTS_ALC_T_DRV_EV_D_STATE,
- sizeof(ErtsDrvEventDataState));
- erts_port_task_handle_init(&state->driver.event->task);
+ if (!state->driver.event)
+ state->driver.event = alloc_drv_event_data();
state->driver.event->port = id;
state->driver.event->removed_events = (ErtsPollEvents) 0;
state->type = ERTS_EV_TYPE_DRV_EV;
@@ -812,10 +1170,10 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
else {
if (state->type == ERTS_EV_TYPE_DRV_EV) {
abort_tasks(state, 0);
- erts_free(ERTS_ALC_T_DRV_EV_D_STATE,
- state->driver.event);
+ state->driver.event->port = NIL;
+ state->driver.event->data = NULL;
+ state->driver.event->removed_events = (ErtsPollEvents) 0;
}
- state->driver.select = NULL;
state->type = ERTS_EV_TYPE_NONE;
remember_removed(state, &pollset);
}
@@ -825,12 +1183,22 @@ ERTS_CIO_EXPORT(driver_event)(ErlDrvPort ix,
ret = 0;
done:
-#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
- if (state->type == ERTS_EV_TYPE_NONE && state->remove_cnt == 0) {
- hash_erase_drv_ev_state(state);
- }
+
+ check_fd_cleanup(state,
+#if ERTS_CIO_HAVE_DRV_EVENT
+ &free_event,
#endif
+ &free_select);
+
erts_smp_mtx_unlock(fd_mtx(fd));
+
+ if (free_select)
+ free_drv_select_data(free_select);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (free_event)
+ free_drv_event_data(free_event);
+#endif
+
return ret;
#endif
}
@@ -1027,7 +1395,7 @@ steal_pending_stop_select(erts_dsprintf_buf_t *dsbufp, ErlDrvPort ix,
* In either case stop_select should not be called.
*/
state->type = ERTS_EV_TYPE_NONE;
- state->flags = 0;
+ state->flags &= ~ERTS_EV_FLAG_USED;
if (state->driver.drv_ptr->handle) {
erts_ddll_dereference_driver(state->driver.drv_ptr->handle);
}
@@ -1099,38 +1467,103 @@ event_large_fd_error(ErlDrvPort ix, ErtsSysFdType fd, ErlDrvEventData event_data
#endif
#endif
+static ERTS_INLINE int
+io_task_schedule_allowed(ErtsDrvEventState *state,
+ ErtsPortTaskType type,
+ erts_aint_t current_cio_time)
+{
+ ErtsIoTask *io_task;
+
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+ if (!state->driver.select)
+ return 0;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ return 0;
+#endif
+ io_task = &state->driver.select->iniotask;
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
+ if (!state->driver.select)
+ return 0;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ return 0;
+#endif
+ io_task = &state->driver.select->outiotask;
+ break;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ case ERTS_PORT_TASK_EVENT:
+ if (!state->driver.event)
+ return 0;
+ if (state->driver.select)
+ return 0;
+ io_task = &state->driver.event->iotask;
+ break;
+#endif
+ default:
+ ERTS_INTERNAL_ERROR("Invalid I/O-task type");
+ return 0;
+ }
+
+ return !is_iotask_active(io_task, current_cio_time);
+}
+
static ERTS_INLINE void
-iready(Eterm id, ErtsDrvEventState *state)
+iready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->intask,
- ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_READ);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_INPUT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.select->iniotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_INPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_READ);
+ }
+ add_active_fd(state->fd);
}
}
static ERTS_INLINE void
-oready(Eterm id, ErtsDrvEventState *state)
+oready(Eterm id, ErtsDrvEventState *state, erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.select->outtask,
- ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
- stale_drv_select(id, state, ERL_DRV_WRITE);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_OUTPUT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.select->outiotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_OUTPUT,
+ (ErlDrvEvent) state->fd) != 0) {
+ stale_drv_select(id, state, ERL_DRV_WRITE);
+ }
+ add_active_fd(state->fd);
}
}
#if ERTS_CIO_HAVE_DRV_EVENT
static ERTS_INLINE void
-eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data)
+eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data,
+ erts_aint_t current_cio_time)
{
- if (erts_port_task_schedule(id,
- &state->driver.event->task,
- ERTS_PORT_TASK_EVENT,
- (ErlDrvEvent) state->fd,
- event_data) != 0) {
- stale_drv_select(id, state, 0);
+ if (io_task_schedule_allowed(state,
+ ERTS_PORT_TASK_EVENT,
+ current_cio_time)) {
+ ErtsIoTask *iotask = &state->driver.event->iotask;
+ erts_smp_atomic_set_nob(&iotask->executed_time, current_cio_time);
+ if (erts_port_task_schedule(id,
+ &iotask->task,
+ ERTS_PORT_TASK_EVENT,
+ (ErlDrvEvent) state->fd,
+ event_data) != 0) {
+ stale_drv_select(id, state, 0);
+ }
+ add_active_fd(state->fd);
}
}
#endif
@@ -1161,10 +1594,11 @@ ERTS_CIO_EXPORT(erts_check_io_interrupt_timed)(int set,
void
ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
{
- ErtsPollResFd pollres[256];
+ ErtsPollResFd *pollres;
int pollres_len;
SysTimeval wait_time;
int poll_ret, i;
+ erts_aint_t current_cio_time;
restart:
@@ -1181,10 +1615,24 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
wait_time.tv_usec = 0;
}
+ /*
+ * No need for an atomic inc op when incrementing
+ * erts_check_io_time, since only one thread can
+ * check io at a time.
+ */
+ current_cio_time = erts_smp_atomic_read_dirty(&erts_check_io_time);
+ current_cio_time++;
+ erts_smp_atomic_set_relb(&erts_check_io_time, current_cio_time);
+
+ check_cleanup_active_fds(current_cio_time);
+
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0); /* No locks should be locked */
#endif
- pollres_len = sizeof(pollres)/sizeof(ErtsPollResFd);
+
+ pollres_len = erts_smp_atomic32_read_dirty(&pollset.active_fd.no) + ERTS_CHECK_IO_POLL_RES_LEN;
+
+ pollres = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollResFd)*pollres_len);
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 1);
@@ -1204,6 +1652,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if (poll_ret != 0) {
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
forget_removed(&pollset);
+ erts_free(ERTS_ALC_T_TMP, pollres);
if (poll_ret == EAGAIN) {
goto restart;
}
@@ -1263,15 +1712,15 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if ((revents & ERTS_POLL_EV_IN)
|| (!(revents & ERTS_POLL_EV_OUT)
&& state->events & ERTS_POLL_EV_IN)) {
- iready(state->driver.select->inport, state);
+ iready(state->driver.select->inport, state, current_cio_time);
}
else if (state->events & ERTS_POLL_EV_OUT) {
- oready(state->driver.select->outport, state);
+ oready(state->driver.select->outport, state, current_cio_time);
}
}
else if (revents & (ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT)) {
if (revents & ERTS_POLL_EV_OUT) {
- oready(state->driver.select->outport, state);
+ oready(state->driver.select->outport, state, current_cio_time);
}
/* Someone might have deselected input since revents
was read (true also on the non-smp emulator since
@@ -1279,7 +1728,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
revents... */
revents &= ~(~state->events & ERTS_POLL_EV_IN);
if (revents & ERTS_POLL_EV_IN) {
- iready(state->driver.select->inport, state);
+ iready(state->driver.select->inport, state, current_cio_time);
}
}
else if (revents & ERTS_POLL_EV_NVAL) {
@@ -1287,6 +1736,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
state->driver.select->inport,
state->driver.select->outport,
state->events);
+ add_active_fd(state->fd);
}
break;
}
@@ -1304,8 +1754,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
if (revents) {
event_data->events = state->events;
event_data->revents = revents;
-
- eready(state->driver.event->port, state, event_data);
+ eready(state->driver.event->port, state, event_data, current_cio_time);
}
break;
}
@@ -1323,6 +1772,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
(int) state->type);
ASSERT(0);
deselect(state, 0);
+ add_active_fd(state->fd);
break;
}
}
@@ -1334,6 +1784,7 @@ ERTS_CIO_EXPORT(erts_check_io)(int do_wait)
}
erts_smp_atomic_set_nob(&pollset.in_poll_wait, 0);
+ erts_free(ERTS_ALC_T_TMP, pollres);
forget_removed(&pollset);
}
@@ -1469,10 +1920,27 @@ static void drv_ev_state_free(void *des)
void
ERTS_CIO_EXPORT(erts_init_check_io)(void)
{
+ erts_smp_atomic_init_nob(&erts_check_io_time, 0);
erts_smp_atomic_init_nob(&pollset.in_poll_wait, 0);
+
ERTS_CIO_POLL_INIT();
pollset.ps = ERTS_CIO_NEW_POLLSET();
+ pollset.active_fd.six = 0;
+ pollset.active_fd.eix = 0;
+ erts_smp_atomic32_init_nob(&pollset.active_fd.no, 0);
+ pollset.active_fd.size = ERTS_ACTIVE_FD_INC;
+ pollset.active_fd.array = erts_alloc(ERTS_ALC_T_ACTIVE_FD_ARR,
+ sizeof(ErtsSysFdType)*ERTS_ACTIVE_FD_INC);
+#ifdef DEBUG
+ {
+ int i;
+ for (i = 0; i < ERTS_ACTIVE_FD_INC; i++)
+ pollset.active_fd.array[i] = ERTS_SYS_FD_INVALID;
+ }
+#endif
+
+
#ifdef ERTS_SMP
init_removed_fd_alloc();
pollset.removed_list = NULL;
@@ -1548,12 +2016,27 @@ Eterm
ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
{
Process *p = (Process *) proc;
- Eterm tags[15], values[15], res;
+ Eterm tags[16], values[16], res;
Uint sz, *szp, *hp, **hpp, memory_size;
Sint i;
ErtsPollInfo pi;
-
- ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+ erts_aint_t cio_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ int active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no);
+
+ while (1) {
+ erts_aint_t post_cio_time;
+ int post_active_fds;
+
+ ERTS_CIO_POLL_INFO(pollset.ps, &pi);
+
+ post_cio_time = erts_smp_atomic_read_mb(&erts_check_io_time);
+ post_active_fds = (int) erts_smp_atomic32_read_acqb(&pollset.active_fd.no);
+ if (cio_time == post_cio_time && active_fds == post_active_fds)
+ break;
+ cio_time = post_cio_time;
+ active_fds = post_active_fds;
+ }
+
memory_size = pi.memory_size;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
memory_size += sizeof(ErtsDrvEventState) * erts_smp_atomic_read_nob(&drv_ev_state_len);
@@ -1617,6 +2100,9 @@ ERTS_CIO_EXPORT(erts_check_io_info)(void *proc)
tags[i] = erts_bld_atom(hpp, szp, "max_fds");
values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.max_fds);
+ tags[i] = erts_bld_atom(hpp, szp, "active_fds");
+ values[i++] = erts_bld_uint(hpp, szp, (Uint) active_fds);
+
#ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
tags[i] = erts_bld_atom(hpp, szp, "no_avoided_wakeups");
values[i++] = erts_bld_uint(hpp, szp, (Uint) pi.no_avoided_wakeups);
@@ -1671,6 +2157,8 @@ print_events(ErtsPollEvents ev)
typedef struct {
int used_fds;
int num_errors;
+ int no_driver_select_structs;
+ int no_driver_event_structs;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
int internal_fds;
ErtsPollEvents *epep;
@@ -1693,6 +2181,13 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
struct stat stat_buf;
#endif
+ if (state->driver.select)
+ counters->no_driver_select_structs++;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ if (state->driver.event)
+ counters->no_driver_event_structs++;
+#endif
+
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
if (state->events || ep_events) {
if (ep_events & ERTS_POLL_EV_NVAL) {
@@ -1831,6 +2326,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
}
}
}
+#if ERTS_CIO_HAVE_DRV_EVENT
else if (state->type == ERTS_EV_TYPE_DRV_EV) {
Eterm id;
erts_printf("driver_event ");
@@ -1866,6 +2362,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
erts_free_port_names(pnp);
}
}
+#endif
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
else if (internal) {
erts_printf("internal ");
@@ -1905,7 +2402,7 @@ static void doit_erts_check_io_debug(void *vstate, void *vcounters)
}
int
-ERTS_CIO_EXPORT(erts_check_io_debug)(void)
+ERTS_CIO_EXPORT(erts_check_io_debug)(ErtsCheckIoDebugInfo *ciodip)
{
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
int fd, len;
@@ -1915,6 +2412,10 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
ErtsDrvEventState null_des;
null_des.driver.select = NULL;
+#if ERTS_CIO_HAVE_DRV_EVENT
+ null_des.driver.event = NULL;
+#endif
+ null_des.driver.drv_ptr = NULL;
null_des.events = 0;
null_des.remove_cnt = 0;
null_des.type = ERTS_EV_TYPE_NONE;
@@ -1935,6 +2436,8 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
#endif
counters.used_fds = 0;
counters.num_errors = 0;
+ counters.no_driver_select_structs = 0;
+ counters.no_driver_event_structs = 0;
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
len = erts_smp_atomic_read_nob(&drv_ev_state_len);
@@ -1951,8 +2454,16 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
erts_smp_thr_progress_unblock();
+ ciodip->no_used_fds = counters.used_fds;
+ ciodip->no_driver_select_structs = counters.no_driver_select_structs;
+ ciodip->no_driver_event_structs = counters.no_driver_event_structs;
+
erts_printf("\n");
erts_printf("used fds=%d\n", counters.used_fds);
+ erts_printf("Number of driver_select() structures=%d\n", counters.no_driver_select_structs);
+#if ERTS_CIO_HAVE_DRV_EVENT
+ erts_printf("Number of driver_event() structures=%d\n", counters.no_driver_event_structs);
+#endif
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
erts_printf("internal fds=%d\n", counters.internal_fds);
#endif
@@ -1961,6 +2472,7 @@ ERTS_CIO_EXPORT(erts_check_io_debug)(void)
#ifdef ERTS_SYS_CONTINOUS_FD_NUMBERS
erts_free(ERTS_ALC_T_TMP, (void *) counters.epep);
#endif
+
return counters.num_errors;
}
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index edab7947ba..d01297d55c 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -26,6 +26,7 @@
#ifndef ERL_CHECK_IO_H__
#define ERL_CHECK_IO_H__
+#include "sys.h"
#include "erl_sys_driver.h"
#ifdef ERTS_ENABLE_KERNEL_POLL
@@ -52,8 +53,8 @@ void erts_check_io_kp(int);
void erts_check_io_nkp(int);
void erts_init_check_io_kp(void);
void erts_init_check_io_nkp(void);
-int erts_check_io_debug_kp(void);
-int erts_check_io_debug_nkp(void);
+int erts_check_io_debug_kp(ErtsCheckIoDebugInfo *);
+int erts_check_io_debug_nkp(ErtsCheckIoDebugInfo *);
#else /* !ERTS_ENABLE_KERNEL_POLL */
@@ -70,6 +71,27 @@ void erts_init_check_io(void);
#endif
+extern erts_smp_atomic_t erts_check_io_time;
+
+typedef struct {
+ ErtsPortTaskHandle task;
+ erts_smp_atomic_t executed_time;
+} ErtsIoTask;
+
+ERTS_GLB_INLINE void erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE void
+erts_io_notify_port_task_executed(ErtsPortTaskHandle *pthp)
+{
+ ErtsIoTask *itp = (ErtsIoTask *) (((char *) pthp) - offsetof(ErtsIoTask, task));
+ erts_aint_t ci_time = erts_smp_atomic_read_acqb(&erts_check_io_time);
+ erts_smp_atomic_set_relb(&itp->executed_time, ci_time);
+}
+
+#endif
+
#endif /* ERL_CHECK_IO_H__ */
#if !defined(ERL_CHECK_IO_C__) && !defined(ERTS_ALLOC_C__)
@@ -81,6 +103,16 @@ void erts_init_check_io(void);
#include "erl_poll.h"
#include "erl_port_task.h"
+#ifdef __WIN32__
+/*
+ * Current erts_poll implementation for Windows cannot handle
+ * active events in the set of events polled.
+ */
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
+#else
+# define ERTS_CIO_DEFER_ACTIVE_EVENTS 0
+#endif
+
/*
* ErtsDrvEventDataState is used by driver_event() which is almost never
* used. We allocate ErtsDrvEventDataState separate since we dont wan't
@@ -91,13 +123,16 @@ typedef struct {
Eterm port;
ErlDrvEventData data;
ErtsPollEvents removed_events;
- ErtsPortTaskHandle task;
+#if ERTS_CIO_DEFER_ACTIVE_EVENTS
+ ErtsPollEvents deferred_events;
+#endif
+ ErtsIoTask iotask;
} ErtsDrvEventDataState;
typedef struct {
Eterm inport;
Eterm outport;
- ErtsPortTaskHandle intask;
- ErtsPortTaskHandle outtask;
+ ErtsIoTask iniotask;
+ ErtsIoTask outiotask;
} ErtsDrvSelectDataState;
#endif /* #ifndef ERL_CHECK_IO_INTERNAL__ */
diff --git a/erts/emulator/sys/common/erl_sys_common_misc.c b/erts/emulator/sys/common/erl_sys_common_misc.c
index e3ba741058..e63f0bda54 100644
--- a/erts/emulator/sys/common/erl_sys_common_misc.c
+++ b/erts/emulator/sys/common/erl_sys_common_misc.c
@@ -44,6 +44,14 @@
#endif
#endif
+/*
+ * erts_check_io_time is used by the erl_check_io implementation. The
+ * global erts_check_io_time variable is declared here since there
+ * (often) exist two versions of erl_check_io (kernel-poll and
+ * non-kernel-poll), and we dont want two versions of this variable.
+ */
+erts_smp_atomic_t erts_check_io_time;
+
/* Written once and only once */
static int filename_encoding = ERL_FILENAME_UNKNOWN;
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index 176fc049a7..f7a6298d5b 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -135,9 +135,6 @@
/* File descriptors are numbers anc consecutively allocated on Unix */
#define ERTS_SYS_CONTINOUS_FD_NUMBERS
-#define HAVE_ERTS_CHECK_IO_DEBUG
-int erts_check_io_debug(void);
-
#ifndef ERTS_SMP
# undef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
# define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
@@ -230,8 +227,24 @@ extern void sys_stop_cat(void);
*/
#ifdef USE_ISINF_ISNAN /* simulate finite() */
-# define finite(f) (!isinf(f) && !isnan(f))
-# define HAVE_FINITE
+# define isfinite(f) (!isinf(f) && !isnan(f))
+# define HAVE_ISFINITE
+#elif defined(__GNUC__) && defined(HAVE_FINITE)
+/* We use finite in gcc as it emits assembler instead of
+ the function call that isfinite emits. The assembler is
+ significantly faster. */
+# ifdef isfinite
+# undef isfinite
+# endif
+# define isfinite finite
+# ifndef HAVE_ISFINITE
+# define HAVE_ISFINITE
+# endif
+#elif defined(isfinite) && !defined(HAVE_ISFINITE)
+# define HAVE_ISFINITE
+#elif !defined(HAVE_ISFINITE) && defined(HAVE_FINITE)
+# define isfinite finite
+# define HAVE_ISFINITE
#endif
#ifdef NO_FPE_SIGNALS
@@ -241,7 +254,7 @@ extern void sys_stop_cat(void);
#define erts_thread_init_fp_exception() do{}while(0)
#endif
# define __ERTS_FP_CHECK_INIT(fpexnp) do {} while (0)
-# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!finite(f)) { Action; } else {}
+# define __ERTS_FP_ERROR(fpexnp, f, Action) if (!isfinite(f)) { Action; } else {}
# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) __ERTS_FP_ERROR(fpexnp, f, Action)
# define __ERTS_SAVE_FP_EXCEPTION(fpexnp)
# define __ERTS_RESTORE_FP_EXCEPTION(fpexnp)
@@ -305,7 +318,7 @@ static __inline__ void __ERTS_FP_CHECK_INIT(volatile unsigned long *fp_exception
code to always throw floating-point exceptions on errors. */
static __inline__ int erts_check_fpe_thorough(volatile unsigned long *fp_exception, double f)
{
- return erts_check_fpe(fp_exception, f) || !finite(f);
+ return erts_check_fpe(fp_exception, f) || !isfinite(f);
}
# define __ERTS_FP_ERROR_THOROUGH(fpexnp, f, Action) \
do { if (erts_check_fpe_thorough((fpexnp),(f))) { Action; } } while (0)
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index c3d7440409..0d677d5f34 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -284,7 +284,7 @@ struct {
void (*check_io)(int);
Uint (*size)(void);
Eterm (*info)(void *);
- int (*check_io_debug)(void);
+ int (*check_io_debug)(ErtsCheckIoDebugInfo *);
} io_func = {0};
@@ -306,9 +306,9 @@ Eterm erts_check_io_info(void *p)
}
int
-erts_check_io_debug(void)
+erts_check_io_debug(ErtsCheckIoDebugInfo *ip)
{
- return (*io_func.check_io_debug)();
+ return (*io_func.check_io_debug)(ip);
}
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 7a1d129cd5..972170d465 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1085,7 +1085,7 @@ void erts_poll_controlv(ErtsPollSet ps,
pcev[i].events,
pcev[i].on);
}
- ERTS_POLLSET_LOCK(ps);
+ ERTS_POLLSET_UNLOCK(ps);
HARDTRACEF(("Out erts_poll_controlv"));
}
diff --git a/erts/emulator/sys/win32/erl_win_sys.h b/erts/emulator/sys/win32/erl_win_sys.h
index a78dbf64af..838f0c61eb 100644
--- a/erts/emulator/sys/win32/erl_win_sys.h
+++ b/erts/emulator/sys/win32/erl_win_sys.h
@@ -113,12 +113,10 @@
/*
* Our own type of "FD's"
*/
+#define ERTS_SYS_FD_INVALID INVALID_HANDLE_VALUE
#define ERTS_SYS_FD_TYPE HANDLE
#define NO_FSTAT_ON_SYS_FD_TYPE 1 /* They are events, not files */
-#define HAVE_ERTS_CHECK_IO_DEBUG
-int erts_check_io_debug(void);
-
/*
* For erl_time_sup
*/
diff --git a/erts/emulator/test/a_SUITE.erl b/erts/emulator/test/a_SUITE.erl
index 195c9c0a5f..17579be416 100644
--- a/erts/emulator/test/a_SUITE.erl
+++ b/erts/emulator/test/a_SUITE.erl
@@ -97,23 +97,13 @@ display_check_io(ChkIo) ->
catch erlang:display('--- CHECK IO INFO ---'),
catch erlang:display(ChkIo),
catch erts_debug:set_internal_state(available_internal_state, true),
- NoOfErrorFds = (catch erts_debug:get_internal_state(check_io_debug)),
+ NoOfErrorFds = (catch element(1, erts_debug:get_internal_state(check_io_debug))),
catch erlang:display({'NoOfErrorFds', NoOfErrorFds}),
catch erts_debug:set_internal_state(available_internal_state, false),
catch erlang:display('--- CHECK IO INFO ---'),
ok.
get_check_io_info() ->
- ChkIo = erlang:system_info(check_io),
- case lists:keysearch(pending_updates, 1, ChkIo) of
- {value, {pending_updates, 0}} ->
- display_check_io(ChkIo),
- ChkIo;
- false ->
- ChkIo;
- _ ->
- receive after 10 -> ok end,
- get_check_io_info()
- end.
+ z_SUITE:get_check_io_info().
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index 336b6188f6..623d62f876 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -31,8 +31,9 @@
end_per_suite/1, init_per_group/2,end_per_group/2,
init_per_testcase/2,
end_per_testcase/2,
+
+ a_test/1,
outputv_echo/1,
-
timer_measure/1,
timer_cancel/1,
timer_change/1,
@@ -79,7 +80,8 @@
thr_free_drv/1,
async_blast/1,
thr_msg_blast/1,
- consume_timeslice/1]).
+ consume_timeslice/1,
+ z_test/1]).
-export([bin_prefix/2]).
@@ -122,19 +124,19 @@ init_per_testcase(Case, Config) when is_atom(Case), is_list(Config) ->
_ -> erts_debug:set_internal_state(available_internal_state, true)
end,
erlang:display({init_per_testcase, Case}),
- ?line 0 = erts_debug:get_internal_state(check_io_debug),
+ ?line 0 = element(1, erts_debug:get_internal_state(check_io_debug)),
[{watchdog, Dog},{testcase, Case}|Config].
end_per_testcase(Case, Config) ->
Dog = ?config(watchdog, Config),
erlang:display({end_per_testcase, Case}),
- ?line 0 = erts_debug:get_internal_state(check_io_debug),
+ ?line 0 = element(1, erts_debug:get_internal_state(check_io_debug)),
?t:timetrap_cancel(Dog).
suite() -> [{ct_hooks,[ts_install_cth]}].
-all() ->
- [outputv_errors, outputv_echo, queue_echo, {group, timer},
+all() -> %% Keep a_test first and z_test last...
+ [a_test, outputv_errors, outputv_echo, queue_echo, {group, timer},
driver_unloaded, io_ready_exit, use_fallback_pollset,
bad_fd_in_pollset, driver_event, fd_change,
steal_control, otp_6602, driver_system_info_base_ver,
@@ -151,7 +153,8 @@ all() ->
thr_free_drv,
async_blast,
thr_msg_blast,
- consume_timeslice].
+ consume_timeslice,
+ z_test].
groups() ->
[{timer, [],
@@ -917,8 +920,7 @@ steal_control_test(Hndl = {erts_poll_info, Before}) ->
end.
chkio_test_init(Config) when is_list(Config) ->
- ?line wait_until_no_pending_updates(),
- ?line ChkIo = erlang:system_info(check_io),
+ ?line ChkIo = get_stable_check_io_info(),
?line case catch lists:keysearch(name, 1, ChkIo) of
{value, {name, erts_poll}} ->
?line ?t:format("Before test: ~p~n", [ChkIo]),
@@ -937,8 +939,7 @@ chkio_test_fini({skipped, _} = Res) ->
chkio_test_fini({chkio_test_result, Res, Before}) ->
?line ok = erl_ddll:unload_driver('chkio_drv'),
?line ok = erl_ddll:stop(),
- ?line wait_until_no_pending_updates(),
- ?line After = erlang:system_info(check_io),
+ ?line After = get_stable_check_io_info(),
?line ?t:format("After test: ~p~n", [After]),
?line verify_chkio_state(Before, After),
?line Res.
@@ -985,7 +986,7 @@ chkio_test({erts_poll_info, Before},
?line Fun(),
?line During = erlang:system_info(check_io),
?line erlang:display(During),
- ?line 0 = erts_debug:get_internal_state(check_io_debug),
+ ?line 0 = element(1, erts_debug:get_internal_state(check_io_debug)),
?line ?t:format("During test: ~p~n", [During]),
?line chk_chkio_port(Port),
?line case erlang:port_control(Port, ?CHKIO_STOP, "") of
@@ -1034,18 +1035,22 @@ verify_chkio_state(Before, After) ->
After)
end,
?line ok.
-
-
-wait_until_no_pending_updates() ->
- case lists:keysearch(pending_updates, 1, erlang:system_info(check_io)) of
- {value, {pending_updates, 0}} ->
- ok;
- false ->
- ok;
+get_stable_check_io_info() ->
+ ChkIo = erlang:system_info(check_io),
+ PendUpdNo = case lists:keysearch(pending_updates, 1, ChkIo) of
+ {value, {pending_updates, PendNo}} ->
+ PendNo;
+ false ->
+ 0
+ end,
+ {value, {active_fds, ActFds}} = lists:keysearch(active_fds, 1, ChkIo),
+ case {PendUpdNo, ActFds} of
+ {0, 0} ->
+ ChkIo;
_ ->
receive after 10 -> ok end,
- wait_until_no_pending_updates()
+ get_stable_check_io_info()
end.
otp_6602(doc) -> ["Missed port lock when stealing control of fd from a "
@@ -1199,8 +1204,8 @@ check_si_res(["sched_thrs", Value]) ->
?line Value = integer_to_list(erlang:system_info(schedulers));
%% Data added in 3rd version of driver_system_info() (driver version 1.5)
-check_si_res(["emu_nif_vsn", _Value]) ->
- true;
+check_si_res(["emu_nif_vsn", Value]) ->
+ ?line Value = erlang:system_info(nif_version);
%% Data added in 4th version of driver_system_info() (driver version 3.1)
check_si_res(["dirty_sched", _Value]) ->
@@ -2387,10 +2392,25 @@ count_proc_sched(Ps, PNs) ->
PNs
end.
+a_test(Config) when is_list(Config) ->
+ check_io_debug().
+
+z_test(Config) when is_list(Config) ->
+ check_io_debug().
+
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% Utilities
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+check_io_debug() ->
+ get_stable_check_io_info(),
+ {NoErrorFds, NoUsedFds, NoDrvSelStructs, NoDrvEvStructs}
+ = erts_debug:get_internal_state(check_io_debug),
+ 0 = NoErrorFds,
+ NoUsedFds = NoDrvSelStructs,
+ 0 = NoDrvEvStructs,
+ ok.
+
%flush_msgs() ->
% receive
% M ->
diff --git a/erts/emulator/test/float_SUITE_data/fp_drv.c b/erts/emulator/test/float_SUITE_data/fp_drv.c
index b80385c3f9..82d18d6440 100644
--- a/erts/emulator/test/float_SUITE_data/fp_drv.c
+++ b/erts/emulator/test/float_SUITE_data/fp_drv.c
@@ -29,9 +29,14 @@
#if defined (__GNUC__)
int _finite(double x);
#endif
-#ifndef finite
-#define finite _finite
+#ifndef isfinite
+#define isfinite _finite
#endif
+#elif !defined(HAVE_ISFINITE) && defined(HAVE_FINITE)
+/* If not windows and we do not have isfinite */
+#define isfinite finite
+#elif !defined(HAVE_ISFINITE)
+# error "No finite function found!"
#endif
#include "erl_driver.h"
@@ -79,21 +84,21 @@ do_test(void *unused)
x = 3.23e133;
y = 3.57e257;
z = x*y;
- if (finite(z))
+ if (isfinite(z))
return "is finite (1)";
x = 5.0;
y = 0.0;
z = x/y;
- if (finite(z))
+ if (isfinite(z))
return "is finite (2)";
z = log(-1.0);
- if (finite(z))
+ if (isfinite(z))
return "is finite (3)";
z = log(0.0);
- if (finite(z))
+ if (isfinite(z))
return "is finite (4)";
return "ok";
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index 14e6585220..4560077a51 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -1564,6 +1564,8 @@ dirty_nif(Config) when is_list(Config) ->
Val2 = "Erlang",
Val3 = list_to_binary([Val2, 0]),
{Val1, Val2, Val3} = call_dirty_nif(Val1, Val2, Val3),
+ LargeArray = lists:duplicate(1000, ok),
+ LargeArray = call_dirty_nif_zero_args(),
ok
catch
error:badarg ->
@@ -1740,6 +1742,7 @@ call_nif_schedule(_,_) -> ?nif_stub.
call_dirty_nif(_,_,_) -> ?nif_stub.
send_from_dirty_nif(_) -> ?nif_stub.
call_dirty_nif_exception() -> ?nif_stub.
+call_dirty_nif_zero_args() -> ?nif_stub.
%% maps
is_map_nif(_) -> ?nif_stub.
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index 291c903947..85544db2ab 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -1623,6 +1623,18 @@ static ERL_NIF_TERM call_dirty_nif_exception(ErlNifEnv* env, int argc, const ERL
call_dirty_nif_exception, argc-1, argv);
}
}
+
+static ERL_NIF_TERM call_dirty_nif_zero_args(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+ int i;
+ ERL_NIF_TERM result[1000];
+ ERL_NIF_TERM ok = enif_make_atom(env, "ok");
+ assert(argc == 0);
+ for (i = 0; i < sizeof(result)/sizeof(*result); i++) {
+ result[i] = ok;
+ }
+ return enif_make_list_from_array(env, result, i);
+}
#endif
static ERL_NIF_TERM is_map_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
@@ -1807,6 +1819,7 @@ static ErlNifFunc nif_funcs[] =
{"call_dirty_nif", 3, call_dirty_nif},
{"send_from_dirty_nif", 1, send_from_dirty_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND},
{"call_dirty_nif_exception", 0, call_dirty_nif_exception, ERL_NIF_DIRTY_JOB_IO_BOUND},
+ {"call_dirty_nif_zero_args", 0, call_dirty_nif_zero_args, ERL_NIF_DIRTY_JOB_CPU_BOUND},
#endif
{"is_map_nif", 1, is_map_nif},
{"get_map_size_nif", 1, get_map_size_nif},
diff --git a/erts/emulator/test/port_SUITE.erl b/erts/emulator/test/port_SUITE.erl
index e01b2f253b..8792f25d76 100644
--- a/erts/emulator/test/port_SUITE.erl
+++ b/erts/emulator/test/port_SUITE.erl
@@ -90,6 +90,7 @@
mix_up_ports/1, otp_5112/1, otp_5119/1, otp_6224/1,
exit_status_multi_scheduling_block/1, ports/1,
spawn_driver/1, spawn_executable/1, close_deaf_port/1,
+ port_setget_data/1,
unregister_name/1, parallelism_option/1]).
-export([do_iter_max_ports/2]).
@@ -115,6 +116,7 @@ all() ->
mix_up_ports, otp_5112, otp_5119,
exit_status_multi_scheduling_block, ports, spawn_driver,
spawn_executable, close_deaf_port, unregister_name,
+ port_setget_data,
parallelism_option].
groups() ->
@@ -2333,6 +2335,55 @@ close_deaf_port_1(N, Cmd) ->
{comment, "Could not spawn more than " ++ integer_to_list(N) ++ " OS processes."}
end.
+%% Test undocumented port_set_data/2 and port_get_data/1
+%% Hammer from multiple processes a while
+%% and then abrubtly close the port (OTP-12208).
+port_setget_data(Config) when is_list(Config) ->
+ ok = load_driver(?config(data_dir, Config), "echo_drv"),
+ Port = erlang:open_port({spawn_driver, "echo_drv"}, []),
+
+ NSched = erlang:system_info(schedulers_online),
+ PRs = lists:map(fun(I) ->
+ spawn_opt(fun() -> port_setget_data_hammer(Port,1) end,
+ [monitor, {scheduler, I rem NSched}])
+ end,
+ lists:seq(1,10)),
+ receive after 100 -> ok end,
+ Papa = self(),
+ lists:foreach(fun({Pid,_}) -> Pid ! {Papa,prepare_for_close} end, PRs),
+ lists:foreach(fun({Pid,_}) ->
+ receive {Pid,prepare_for_close} -> ok end
+ end,
+ PRs),
+ port_close(Port),
+ lists:foreach(fun({Pid,Ref}) ->
+ receive {'DOWN', Ref, process, Pid, normal} -> ok end
+ end,
+ PRs),
+ ok.
+
+port_setget_data_hammer(Port, N) ->
+ Rand = random:uniform(3),
+ try case Rand of
+ 1 -> true = erlang:port_set_data(Port, atom);
+ 2 -> true = erlang:port_set_data(Port, {1,2,3});
+ 3 -> erlang:port_get_data(Port)
+ end
+ catch
+ error:badarg ->
+ true = get(prepare_for_close),
+ io:format("~p did ~p rounds before port closed\n", [self(), N]),
+ exit(normal)
+ end,
+ receive {Papa, prepare_for_close} ->
+ put(prepare_for_close, true),
+ Papa ! {self(),prepare_for_close}
+ after 0 ->
+ ok
+ end,
+ port_setget_data_hammer(Port, N+1).
+
+
wait_until(Fun) ->
case catch Fun() of
true ->
diff --git a/erts/emulator/test/z_SUITE.erl b/erts/emulator/test/z_SUITE.erl
index 4b3075a164..b0c6224dfe 100644
--- a/erts/emulator/test/z_SUITE.erl
+++ b/erts/emulator/test/z_SUITE.erl
@@ -38,7 +38,7 @@
-export([schedulers_alive/1, node_container_refc_check/1,
long_timers/1, pollset_size/1,
- check_io_debug/1]).
+ check_io_debug/1, get_check_io_info/0]).
-define(DEFAULT_TIMEOUT, ?t:minutes(5)).
@@ -288,11 +288,14 @@ check_io_debug(Config) when is_list(Config) ->
end.
check_io_debug_test() ->
+ ?line erlang:display(get_check_io_info()),
?line erts_debug:set_internal_state(available_internal_state, true),
- ?line erlang:display(erlang:system_info(check_io)),
- ?line NoOfErrorFds = erts_debug:get_internal_state(check_io_debug),
+ ?line {NoErrorFds, NoUsedFds, NoDrvSelStructs, NoDrvEvStructs}
+ = erts_debug:get_internal_state(check_io_debug),
?line erts_debug:set_internal_state(available_internal_state, false),
- ?line 0 = NoOfErrorFds,
+ ?line 0 = NoErrorFds,
+ ?line NoUsedFds = NoDrvSelStructs,
+ ?line 0 = NoDrvEvStructs,
?line ok.
@@ -305,7 +308,7 @@ display_check_io(ChkIo) ->
catch erlang:display('--- CHECK IO INFO ---'),
catch erlang:display(ChkIo),
catch erts_debug:set_internal_state(available_internal_state, true),
- NoOfErrorFds = (catch erts_debug:get_internal_state(check_io_debug)),
+ NoOfErrorFds = (catch element(1, erts_debug:get_internal_state(check_io_debug))),
catch erlang:display({'NoOfErrorFds', NoOfErrorFds}),
catch erts_debug:set_internal_state(available_internal_state, false),
catch erlang:display('--- CHECK IO INFO ---'),
@@ -313,14 +316,19 @@ display_check_io(ChkIo) ->
get_check_io_info() ->
ChkIo = erlang:system_info(check_io),
- case lists:keysearch(pending_updates, 1, ChkIo) of
- {value, {pending_updates, 0}} ->
+ PendUpdNo = case lists:keysearch(pending_updates, 1, ChkIo) of
+ {value, {pending_updates, PendNo}} ->
+ PendNo;
+ false ->
+ 0
+ end,
+ {value, {active_fds, ActFds}} = lists:keysearch(active_fds, 1, ChkIo),
+ case {PendUpdNo, ActFds} of
+ {0, 0} ->
display_check_io(ChkIo),
ChkIo;
- false ->
- ChkIo;
_ ->
- receive after 10 -> ok end,
+ receive after 100 -> ok end,
get_check_io_info()
end.