aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator')
-rw-r--r--erts/emulator/beam/big.c12
-rw-r--r--erts/emulator/beam/erl_afit_alloc.c2
-rw-r--r--erts/emulator/beam/erl_alloc_util.c200
-rw-r--r--erts/emulator/beam/erl_alloc_util.h4
-rw-r--r--erts/emulator/beam/erl_ao_firstfit_alloc.c61
-rw-r--r--erts/emulator/beam/erl_async.c2
-rw-r--r--erts/emulator/beam/erl_bestfit_alloc.c2
-rw-r--r--erts/emulator/beam/erl_bif_lists.c10
-rw-r--r--erts/emulator/beam/erl_db_catree.c16
-rw-r--r--erts/emulator/beam/erl_gc.c2
-rw-r--r--erts/emulator/beam/erl_goodfit_alloc.c2
-rw-r--r--erts/emulator/beam/erl_hl_timer.c26
-rw-r--r--erts/emulator/beam/erl_init.c4
-rw-r--r--erts/emulator/beam/erl_port.h2
-rw-r--r--erts/emulator/beam/erl_port_task.c81
-rw-r--r--erts/emulator/beam/erl_port_task.h21
-rw-r--r--erts/emulator/beam/erl_process.c200
-rw-r--r--erts/emulator/beam/erl_process.h4
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h9
-rw-r--r--erts/emulator/beam/erl_thr_progress.c34
-rw-r--r--erts/emulator/beam/erl_thr_progress.h29
-rw-r--r--erts/emulator/beam/erl_trace.c15
-rw-r--r--erts/emulator/drivers/common/inet_drv.c310
-rw-r--r--erts/emulator/nifs/common/prim_file_nif.c4
-rw-r--r--erts/emulator/sys/common/erl_check_io.c318
-rw-r--r--erts/emulator/sys/common/erl_check_io.h21
-rw-r--r--erts/emulator/sys/common/erl_mmap.h57
-rw-r--r--erts/emulator/sys/common/erl_poll.c511
-rw-r--r--erts/emulator/sys/common/erl_poll.h14
-rw-r--r--erts/emulator/sys/common/erl_poll_api.h6
-rw-r--r--erts/emulator/sys/win32/erl_poll.c10
-rw-r--r--erts/emulator/sys/win32/sys.c4
-rw-r--r--erts/emulator/test/big_SUITE.erl18
-rw-r--r--erts/emulator/test/driver_SUITE.erl17
-rw-r--r--erts/emulator/test/driver_SUITE_data/chkio_drv.c49
-rw-r--r--erts/emulator/test/scheduler_SUITE.erl27
-rw-r--r--erts/emulator/test/signal_SUITE.erl2
37 files changed, 1605 insertions, 501 deletions
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 84338769e0..dac9574fa5 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -668,27 +668,25 @@ static dsize_t I_mul(ErtsDigit* x, dsize_t xl, ErtsDigit* y, dsize_t yl, ErtsDig
static dsize_t I_sqr(ErtsDigit* x, dsize_t xl, ErtsDigit* r)
{
- ErtsDigit d_next = *x;
ErtsDigit d;
ErtsDigit* r0 = r;
ErtsDigit* s = r;
if ((r + xl) == x) /* "Inline" operation */
*x = 0;
- x++;
while(xl--) {
- ErtsDigit* y = x;
+ ErtsDigit* y;
ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0;
ErtsDigit b0, b1;
ErtsDigit z0, z1, z2;
ErtsDigit t;
dsize_t y_l = xl;
-
+
+ d = *x;
+ x++;
+ y = x;
s = r;
- d = d_next;
- d_next = *x;
- x++;
DMUL(d, d, b1, b0);
DSUMc(*s, b0, y_3, t);
diff --git a/erts/emulator/beam/erl_afit_alloc.c b/erts/emulator/beam/erl_afit_alloc.c
index 38289ea78a..f07137c883 100644
--- a/erts/emulator/beam/erl_afit_alloc.c
+++ b/erts/emulator/beam/erl_afit_alloc.c
@@ -102,6 +102,8 @@ erts_afalc_start(AFAllctr_t *afallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index b7a8b9c2d0..d238d38d27 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -42,6 +42,7 @@
#include "global.h"
#include "big.h"
+#include "erl_mmap.h"
#include "erl_mtrace.h"
#define GET_ERL_ALLOC_UTIL_IMPL
#include "erl_alloc_util.h"
@@ -90,6 +91,8 @@ static int initialized = 0;
#define SYS_ALLOC_CARRIER_FLOOR(X) ((X) & SYS_ALLOC_CARRIER_MASK)
#define SYS_ALLOC_CARRIER_CEILING(X) \
SYS_ALLOC_CARRIER_FLOOR((X) + INV_SYS_ALLOC_CARRIER_MASK)
+#define SYS_PAGE_SIZE (sys_page_size)
+#define SYS_PAGE_SZ_MASK ((UWord)(SYS_PAGE_SIZE - 1))
#if 0
/* Can be useful for debugging */
@@ -98,6 +101,8 @@ static int initialized = 0;
/* alloc_util global parameters */
static Uint sys_alloc_carrier_size;
+static Uint sys_page_size;
+
#if HAVE_ERTS_MSEG
static Uint max_mseg_carriers;
#endif
@@ -872,6 +877,8 @@ static ERTS_INLINE void clr_bit(UWord* map, Uint ix)
&= ~((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS));
}
+#ifdef DEBUG
+
static ERTS_INLINE int is_bit_set(UWord* map, Uint ix)
{
ASSERT(ix / ERTS_VSPACE_WORD_BITS < VSPACE_MAP_SZ);
@@ -879,6 +886,8 @@ static ERTS_INLINE int is_bit_set(UWord* map, Uint ix)
& ((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS));
}
+#endif
+
UWord erts_literal_vspace_map[VSPACE_MAP_SZ];
static void set_literal_range(void* start, Uint size)
@@ -2540,9 +2549,155 @@ mbc_alloc(Allctr_t *allctr, Uint size)
return BLK2UMEM(blk);
}
+typedef struct {
+ char *ptr;
+ UWord size;
+} ErtsMemDiscardRegion;
+
+/* Construct a discard region for the user memory of a free block, letting the
+ * OS reclaim its physical memory when required.
+ *
+ * Note that we're ignoring both the footer and everything that comes before
+ * the minimum block size as the allocator uses those areas to manage the
+ * block. */
+static void ERTS_INLINE
+mem_discard_start(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *out)
+{
+ UWord size = BLK_SZ(block);
+
+ ASSERT(size >= allocator->min_block_size);
+
+ if (size > (allocator->min_block_size + FBLK_FTR_SZ)) {
+ out->size = size - allocator->min_block_size - FBLK_FTR_SZ;
+ } else {
+ out->size = 0;
+ }
+
+ out->ptr = (char*)block + allocator->min_block_size;
+}
+
+/* Expands a discard region into a neighboring free block, allowing us to
+ * discard the block header and first page.
+ *
+ * This is very important in small-allocation scenarios where no single block
+ * is large enough to be discarded on its own. */
+static void ERTS_INLINE
+mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor,
+ ErtsMemDiscardRegion *region)
+{
+ char *neighbor_start;
+
+ ASSERT(IS_FREE_BLK(neighbor));
+
+ neighbor_start = (char*)neighbor;
+
+ if (region->ptr >= neighbor_start) {
+ char *region_start_page;
+
+ region_start_page = region->ptr - SYS_PAGE_SIZE;
+ region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK);
+
+ /* Expand if our first page begins within the previous free block's
+ * unused data. */
+ if (region_start_page >= (neighbor_start + allocator->min_block_size)) {
+ region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ;
+ region->ptr = region_start_page;
+ }
+ } else {
+ char *region_end_page;
+ UWord neighbor_size;
+
+ ASSERT(region->ptr <= neighbor_start);
+
+ region_end_page = region->ptr + region->size + SYS_PAGE_SIZE;
+ region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK);
+
+ neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ;
+
+ /* Expand if our last page ends anywhere within the next free block,
+ * sans the footer we'll inherit. */
+ if (region_end_page < neighbor_start + neighbor_size) {
+ region->size += region_end_page - (region->ptr + region->size);
+ }
+ }
+}
+
+static void ERTS_INLINE
+mem_discard_finish(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *region)
+{
+#ifdef DEBUG
+ char *block_start, *block_end;
+ UWord block_size;
+
+ block_size = BLK_SZ(block);
+
+ /* Ensure that the region is completely covered by the legal area of the
+ * free block. This must hold even when the region is too small to be
+ * discarded. */
+ if (region->size > 0) {
+ ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ);
+
+ block_start = (char*)block + allocator->min_block_size;
+ block_end = (char*)block + block_size - FBLK_FTR_SZ;
+
+ ASSERT(region->size == 0 ||
+ (region->ptr + region->size <= block_end &&
+ region->ptr >= block_start &&
+ region->size <= block_size));
+ }
+#else
+ (void)allocator;
+ (void)block;
+#endif
+
+ if (region->size > SYS_PAGE_SIZE) {
+ UWord align_offset, size;
+ char *ptr;
+
+ align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK);
+
+ size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK;
+ ptr = region->ptr + align_offset;
+
+ if (size > 0) {
+ ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK));
+ ASSERT(!(size & SYS_PAGE_SZ_MASK));
+
+ erts_mem_discard(ptr, size);
+ }
+ }
+}
+
+static void
+carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier)
+{
+ static const int MAX_BLOCKS_TO_DISCARD = 100;
+ Block_t *block;
+ int i;
+
+ block = allocator->first_fblk_in_mbc(allocator, carrier);
+ i = 0;
+
+ while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) {
+ ErtsMemDiscardRegion region;
+
+ ASSERT(IS_FREE_BLK(block));
+
+ mem_discard_start(allocator, block, &region);
+ mem_discard_finish(allocator, block, &region);
+
+ block = allocator->next_fblk_in_mbc(allocator, carrier, block);
+ i++;
+ }
+}
+
static void
mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp)
{
+ ErtsMemDiscardRegion discard_region = {0};
+ int discard;
Uint is_first_blk;
Uint is_last_blk;
Uint blk_sz;
@@ -2558,6 +2713,21 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
ASSERT(IS_MBC_BLK(blk));
ASSERT(blk_sz >= allctr->min_block_size);
+#ifndef DEBUG
+ /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly
+ * expensive operation which doesn't do much good if we use it again soon
+ * after, so we limit it to deallocations on pooled carriers. */
+ discard = busy_pcrr_pp && *busy_pcrr_pp;
+#else
+ /* Always discard in debug mode, regardless of whether we're in the pool or
+ * not. */
+ discard = 1;
+#endif
+
+ if (discard) {
+ mem_discard_start(allctr, blk, &discard_region);
+ }
+
HARD_CHECK_BLK_CARRIER(allctr, blk);
crr = ABLK_TO_MBC(blk);
@@ -2575,6 +2745,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
blk = PREV_BLK(blk);
(*allctr->unlink_free_block)(allctr, blk);
+ if (discard) {
+ mem_discard_coalesce(allctr, blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(blk);
is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2590,6 +2764,11 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
if (IS_FREE_BLK(nxt_blk)) {
/* Coalesce with next block... */
(*allctr->unlink_free_block)(allctr, nxt_blk);
+
+ if (discard) {
+ mem_discard_coalesce(allctr, nxt_blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(nxt_blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2625,10 +2804,16 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
else {
(*allctr->link_free_block)(allctr, blk);
HARD_CHECK_BLK_CARRIER(allctr, blk);
- if (busy_pcrr_pp && *busy_pcrr_pp)
+
+ if (discard) {
+ mem_discard_finish(allctr, blk, &discard_region);
+ }
+
+ if (busy_pcrr_pp && *busy_pcrr_pp) {
update_pooled_tree(allctr, crr, blk_sz);
- else
+ } else {
check_abandon_carrier(allctr, blk, busy_pcrr_pp);
+ }
}
}
@@ -3781,6 +3966,9 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr)
unlink_carrier(&allctr->mbc_list, crr);
allctr->remove_mbc(allctr, crr);
+ /* Mark our free blocks as unused and reclaimable to the OS. */
+ carrier_mem_discard_free_blocks(allctr, crr);
+
cpool_insert(allctr, crr);
@@ -6471,6 +6659,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
erts_atomic_init_nob(&allctr->cpool.stat.carriers_size, 0);
erts_atomic_init_nob(&allctr->cpool.stat.no_carriers, 0);
if (!init->ts && init->acul && init->acnl) {
+ ASSERT(allctr->add_mbc);
+ ASSERT(allctr->remove_mbc);
+ ASSERT(allctr->largest_fblk_in_mbc);
+ ASSERT(allctr->first_fblk_in_mbc);
+ ASSERT(allctr->next_fblk_in_mbc);
+
allctr->cpool.util_limit = init->acul;
allctr->cpool.in_pool_limit = init->acnl;
allctr->cpool.fblk_min_limit = init->acfml;
@@ -6676,6 +6870,8 @@ erts_alcu_init(AlcUInit_t *init)
#endif
allow_sys_alloc_carriers = init->sac;
+ sys_page_size = erts_sys_get_page_size();
+
#ifdef DEBUG
carrier_alignment = sizeof(Unit_t);
#endif
diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h
index 9ab8589bf3..ea1afe8f58 100644
--- a/erts/emulator/beam/erl_alloc_util.h
+++ b/erts/emulator/beam/erl_alloc_util.h
@@ -684,10 +684,12 @@ struct Allctr_t_ {
void (*creating_mbc) (Allctr_t *, Carrier_t *);
void (*destroying_mbc) (Allctr_t *, Carrier_t *);
- /* The three callbacks below are needed to support carrier migration */
+ /* The five callbacks below are needed to support carrier migration. */
void (*add_mbc) (Allctr_t *, Carrier_t *);
void (*remove_mbc) (Allctr_t *, Carrier_t *);
UWord (*largest_fblk_in_mbc) (Allctr_t *, Carrier_t *);
+ Block_t * (*first_fblk_in_mbc) (Allctr_t *, Carrier_t *);
+ Block_t * (*next_fblk_in_mbc) (Allctr_t *, Carrier_t *, Block_t *);
#if HAVE_ERTS_MSEG
void* (*mseg_alloc)(Allctr_t*, Uint *size_p, Uint flags);
diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c
index 0e3e4c890a..f2ad2f6532 100644
--- a/erts/emulator/beam/erl_ao_firstfit_alloc.c
+++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c
@@ -241,6 +241,9 @@ static void aoff_add_mbc(Allctr_t*, Carrier_t*);
static void aoff_remove_mbc(Allctr_t*, Carrier_t*);
static UWord aoff_largest_fblk_in_mbc(Allctr_t*, Carrier_t*);
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *, Carrier_t *);
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *, Carrier_t *, Block_t *);
+
/* Generic tree functions used by both carrier and block trees. */
static void rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del);
static void rbt_insert(enum AOFFSortOrder, AOFF_RBTree_t** root, AOFF_RBTree_t* blk);
@@ -326,6 +329,8 @@ erts_aoffalc_start(AOFFAllctr_t *alc,
allctr->add_mbc = aoff_add_mbc;
allctr->remove_mbc = aoff_remove_mbc;
allctr->largest_fblk_in_mbc = aoff_largest_fblk_in_mbc;
+ allctr->first_fblk_in_mbc = aoff_first_fblk_in_mbc;
+ allctr->next_fblk_in_mbc = aoff_next_fblk_in_mbc;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
@@ -1058,6 +1063,62 @@ static UWord aoff_largest_fblk_in_mbc(Allctr_t* allctr, Carrier_t* carrier)
return crr->rbt_node.hdr.bhdr;
}
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier)
+{
+ AOFF_Carrier_t *crr = (AOFF_Carrier_t*)carrier;
+
+ (void)allctr;
+
+ if (crr->root) {
+ AOFF_RBTree_t *blk;
+
+ /* Descend to the rightmost block of the tree. */
+ for (blk = crr->root; blk->right; blk = blk->right);
+
+ return (Block_t*)blk;
+ }
+
+ return NULL;
+}
+
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier,
+ Block_t *block)
+{
+ AOFF_RBTree_t *parent, *blk;
+
+ (void)allctr;
+ (void)carrier;
+
+ blk = (AOFF_RBTree_t*)block;
+
+ if (blk->left) {
+ /* Descend to the rightmost block of the left subtree. */
+ for (blk = blk->left; blk->right; blk = blk->right);
+
+ return (Block_t*)blk;
+ }
+
+ while (blk->parent) {
+ parent = blk->parent;
+
+ /* If we ascend from the right we know we haven't visited our parent
+ * yet, because we always descend as far as we can to the right when
+ * entering a subtree. */
+ if (parent->right == blk) {
+ ASSERT(parent->left != blk);
+ return (Block_t*)parent;
+ }
+
+ /* If we ascend from the left we know we've already visited our
+ * parent, and will need to keep ascending until we do so from the
+ * right or reach the end of the tree. */
+ ASSERT(parent->left == blk);
+ blk = parent;
+ }
+
+ return NULL;
+}
+
/*
* info_options()
*/
diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c
index 605a2b3461..44655ad5df 100644
--- a/erts/emulator/beam/erl_async.c
+++ b/erts/emulator/beam/erl_async.c
@@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q,
case ERTS_THR_Q_NEED_THR_PRGR:
{
ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q);
- erts_thr_progress_wakeup(NULL, prgr);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr);
/*
* We do no dequeue finalizing in hope that a new async
* job will arrive before we are woken due to thread
diff --git a/erts/emulator/beam/erl_bestfit_alloc.c b/erts/emulator/beam/erl_bestfit_alloc.c
index 9cb1199c2a..ca81c14b96 100644
--- a/erts/emulator/beam/erl_bestfit_alloc.c
+++ b/erts/emulator/beam/erl_bestfit_alloc.c
@@ -209,6 +209,8 @@ erts_bfalc_start(BFAllctr_t *bfallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 9b6b84d000..b23fa77f5f 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -1040,7 +1040,8 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2)
Eterm list;
Eterm item;
int non_immed_key;
- int max_iter = 10 * CONTEXT_REDS;
+ int reds_left = ERTS_BIF_REDS_LEFT(BIF_P);
+ int max_iter = 16 * reds_left;
if (is_nil(BIF_ARG_2)) {
BIF_RET(am_false);
@@ -1058,14 +1059,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2)
}
item = CAR(list_val(list));
if ((item == term) || (non_immed_key && eq(item, term))) {
- BIF_RET2(am_true, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_true, reds_left - max_iter/16);
}
list = CDR(list_val(list));
}
if (is_not_nil(list)) {
+ BUMP_REDS(BIF_P, reds_left - max_iter/16);
BIF_ERROR(BIF_P, BADARG);
}
- BIF_RET2(am_false, CONTEXT_REDS - max_iter/10);
+ BIF_RET2(am_false, reds_left - max_iter/16);
}
static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
@@ -1159,8 +1161,6 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
if (is_nil(list)) {
BIF_RET(tail);
} else if (is_list(list)) {
- ASSERT(is_list(tail));
-
if (cells_left > CELLS_PER_RED) {
return lists_reverse_alloc(c_p, list, tail);
}
diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c
index b642ae009d..75ac1c4a93 100644
--- a/erts/emulator/beam/erl_db_catree.c
+++ b/erts/emulator/beam/erl_db_catree.c
@@ -621,9 +621,9 @@ static TreeDbTerm* join_trees(TreeDbTerm *left_root_param,
}
#ifdef DEBUG
-# define FORCE_RANDOM_SPLIT_JOIN
+# define PROVOKE_RANDOM_SPLIT_JOIN
#endif
-#ifdef FORCE_RANDOM_SPLIT_JOIN
+#ifdef PROVOKE_RANDOM_SPLIT_JOIN
static int dbg_fastrand(void)
{
static int g_seed = 648835;
@@ -631,8 +631,12 @@ static int dbg_fastrand(void)
return (g_seed>>16)&0x7FFF;
}
-static void dbg_maybe_force_splitjoin(DbTableCATreeNode* base_node)
+static void dbg_provoke_random_splitjoin(DbTableCATree* tb,
+ DbTableCATreeNode* base_node)
{
+ if (tb->common.status & DB_CATREE_FORCE_SPLIT)
+ return;
+
switch (dbg_fastrand() % 8) {
case 1:
base_node->u.base.lock_statistics = 1+ERL_DB_CATREE_HIGH_CONTENTION_LIMIT;
@@ -643,8 +647,8 @@ static void dbg_maybe_force_splitjoin(DbTableCATreeNode* base_node)
}
}
#else
-# define dbg_maybe_force_splitjoin(N)
-#endif /* FORCE_RANDOM_SPLIT_JOIN */
+# define dbg_provoke_random_splitjoin(T,N)
+#endif /* PROVOKE_RANDOM_SPLIT_JOIN */
static ERTS_INLINE
int try_wlock_base_node(DbTableCATreeBaseNode *base_node)
@@ -691,7 +695,7 @@ void wunlock_adapt_base_node(DbTableCATree* tb,
DbTableCATreeNode* parent,
int current_level)
{
- dbg_maybe_force_splitjoin(node);
+ dbg_provoke_random_splitjoin(tb,node);
if ((!node->u.base.root && parent && !(tb->common.status
& DB_CATREE_FORCE_SPLIT))
|| node->u.base.lock_statistics < ERL_DB_CATREE_LOW_CONTENTION_LIMIT) {
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index b4df418cd5..d5dfb096b1 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -2477,7 +2477,7 @@ erts_copy_one_frag(Eterm** hpp, ErlOffHeap* off_heap,
*hpp = hp;
for (i = 0; i < nrefs; i++) {
- if (is_not_immed(refs[i]))
+ if (is_not_immed(refs[i]) && !erts_is_literal(refs[i],boxed_val(refs[i])))
refs[i] = offset_ptr(refs[i], offs);
}
bp->off_heap.first = NULL;
diff --git a/erts/emulator/beam/erl_goodfit_alloc.c b/erts/emulator/beam/erl_goodfit_alloc.c
index 01d4aa54ff..68b9579433 100644
--- a/erts/emulator/beam/erl_goodfit_alloc.c
+++ b/erts/emulator/beam/erl_goodfit_alloc.c
@@ -226,6 +226,8 @@ erts_gfalc_start(GFAllctr_t *gfallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c
index 6ec6f8065e..ef7a55fa38 100644
--- a/erts/emulator/beam/erl_hl_timer.c
+++ b/erts/emulator/beam/erl_hl_timer.c
@@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo)
check_canceled_queue(esdp, esdp->timer_service);
- timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
-
- create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
- ? create_tw_timer
- : create_hl_timer);
- tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
- (void *) c_prt, c_prt->common.id,
- THE_NON_VALUE, NULL, NULL, NULL);
- erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ if (tmo == 0) {
+ erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT);
+ erts_port_task_schedule(c_prt->common.id,
+ &c_prt->timeout_task,
+ ERTS_PORT_TASK_TIMEOUT);
+ } else {
+
+ timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo);
+
+ create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC
+ ? create_tw_timer
+ : create_hl_timer);
+ tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT,
+ (void *) c_prt, c_prt->common.id,
+ THE_NON_VALUE, NULL, NULL, NULL);
+ erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr);
+ }
}
void
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index dcf9f90c99..08c9125840 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -2320,8 +2320,8 @@ system_cleanup(int flush_async)
* The exiting thread might be waiting for
* us to block; need to update status...
*/
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(erts_thr_prgr_data(NULL), 0);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
}
/* Wait forever... */
while (1)
diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h
index 2be0a5bf74..25976d38cc 100644
--- a/erts/emulator/beam/erl_port.h
+++ b/erts/emulator/beam/erl_port.h
@@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p);
#define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11))
/* Last port to terminate halts the emulator */
#define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12))
+/* Check if the event in ready_input should be cleaned */
+#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13))
#ifdef DEBUG
/* Only debug: make sure all flags aren't cleared unintentionally */
#define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31))
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 4928d80f27..c8f2e88127 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
typedef union {
struct { /* I/O tasks */
ErlDrvEvent event;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ int is_scheduler_event;
+#endif
} io;
struct {
ErtsProc2PortSigCallback callback;
@@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ {
ErtsPortTaskBusyCaller pre_alloc_busy_caller;
};
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
static void begin_port_cleanup(Port *pp,
ErtsPortTask **execq,
@@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp)
}
static ERTS_INLINE void
-reset_executed_io_task_handle(ErtsPortTask *ptp)
+reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp)
{
if (ptp->u.alive.handle) {
ASSERT(ptp == handle2task(ptp->u.alive.handle));
- /* The port task handle is reset inside task_executed */
- erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
- reset_port_task_handle);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) {
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
+ } else {
+ reset_port_task_handle(ptp->u.alive.handle);
+ }
+ } else
+#endif
+ {
+ /* The port task handle is reset inside task_executed */
+ erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle,
+ reset_port_task_handle);
+ }
}
}
@@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp)
res = - 1; /* Task already aborted, executing, or executed */
else {
reset_port_task_handle(pthp);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ switch (ptp->type) {
+ case ERTS_PORT_TASK_INPUT:
+ case ERTS_PORT_TASK_OUTPUT:
+ if (ptp->u.alive.td.io.is_scheduler_event) {
+ ASSERT(erts_atomic_read_nob(
+ &erts_port_task_outstanding_io_tasks) > 0);
+ erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks);
+ }
+ break;
+ default:
+ break;
+ }
+#endif
+
res = 0;
}
}
@@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id,
va_list argp;
va_start(argp, type);
ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int);
+#endif
va_end(argp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks);
+#endif
break;
}
case ERTS_PORT_TASK_PROC_SIG: {
@@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
int processing_busy_q;
int vreds = 0;
int reds = 0;
- erts_aint_t io_tasks_executed = 0;
int fpe_was_unmasked;
erts_aint32_t state;
int active;
Uint64 start_time = 0;
ErtsSchedulerData *esdp = runq->scheduler;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_aint_t io_tasks_executed = 0;
+#endif
ERTS_MSACC_PUSH_STATE_M();
ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq));
@@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
for input and output */
(*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_OUTPUT:
reds = ERTS_PORT_REDS_OUTPUT;
@@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
LTTNG_DRIVER(driver_ready_output, pp);
(*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data,
ptp->u.alive.td.io.event);
- reset_executed_io_task_handle(ptp);
- io_tasks_executed++;
+ reset_executed_io_task_handle(pp, ptp);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (ptp->u.alive.td.io.is_scheduler_event)
+ io_tasks_executed++;
+#endif
break;
case ERTS_PORT_TASK_PROC_SIG: {
ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data;
@@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
erts_unblock_fpe(fpe_was_unmasked);
ERTS_MSACC_POP_STATE_M();
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (io_tasks_executed) {
+ ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
+ >= io_tasks_executed);
+ erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks,
+ -1*io_tasks_executed);
+ }
+#endif
+
ASSERT(runq == erts_get_runq_port(pp));
active = finalize_exec(pp, &execq, processing_busy_q);
@@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq)
void
erts_port_task_init(void)
{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks,
+ (erts_aint_t) 0);
+#endif
init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads
+ 1); /* aux_thread */
init_busy_caller_table_alloc();
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index ae78a7d8a3..ca5183b305 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#ifndef ERL_PORT_TASK_H__
#define ERL_PORT_TASK_H__
+#include "erl_poll.h"
+
#undef ERTS_INCLUDE_SCHEDULER_INTERNALS
#if (defined(ERL_PROCESS_C__) \
|| defined(ERL_PORT_TASK_C__) \
@@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle;
#define ERTS_PT_FLG_BAD_OUTPUT (1 << 4)
typedef enum {
- ERTS_PORT_TASK_INPUT,
- ERTS_PORT_TASK_OUTPUT,
+ ERTS_PORT_TASK_INPUT = 0,
+ ERTS_PORT_TASK_OUTPUT = 1,
ERTS_PORT_TASK_TIMEOUT,
ERTS_PORT_TASK_DIST_CMD,
ERTS_PORT_TASK_PROC_SIG
@@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp);
ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp);
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void);
+/* NOTE: Do not access any of the exported variables directly */
+extern erts_atomic_t erts_port_task_outstanding_io_tasks;
+#endif
+
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
ERTS_GLB_INLINE void
@@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp)
erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING);
}
+#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING
+ERTS_GLB_INLINE int
+erts_port_task_have_outstanding_io_tasks(void)
+{
+ return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks)
+ != 0);
+}
+#endif
+
#endif
#ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 1dde9800f8..a24f4bc193 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
typedef struct {
int aux_work;
int tse;
- int sys_schedule;
} ErtsBusyWaitParams;
static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1];
@@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat;
static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static erts_atomic32_t doing_sys_schedule;
+#endif
static erts_atomic32_t no_empty_run_queues;
long erts_runq_supervision_interval = 0;
static ethr_event runq_supervision_event;
@@ -1646,7 +1648,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
}
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1656,7 +1658,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val)
if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) {
awdp->latest_wakeup = val;
haw_chk_later_cleanup_op_wakeup(awdp, val);
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
@@ -1670,7 +1672,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val,
else {
awdp->latest_wakeup = val;
awdp->later_op.size = thr_prgr_later_cleanup_op_threshold;
- erts_thr_progress_wakeup(awdp->esdp, val);
+ erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val);
}
}
}
@@ -3066,6 +3068,7 @@ aux_thread(void *unused)
ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1);
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
+ ErtsThrPrgrData *tpd;
int thr_prgr_active = 1;
ERTS_MSACC_DECLARE_CACHE();
@@ -3087,12 +3090,16 @@ aux_thread(void *unused)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
#if ERTS_POLL_USE_FALLBACK
- ssi->psi = erts_create_pollset_thread(-1);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ssi->psi = erts_create_pollset_thread(-2, tpd);
+#else
+ ssi->psi = erts_create_pollset_thread(-1, tpd);
+#endif
#endif
sched_prep_spin_wait(ssi);
@@ -3105,11 +3112,11 @@ aux_thread(void *unused)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
@@ -3120,7 +3127,7 @@ aux_thread(void *unused)
#endif
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
#if ERTS_POLL_USE_FALLBACK
@@ -3132,11 +3139,11 @@ aux_thread(void *unused)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(ssi->psi);
+ erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT);
}
}
#else
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
flgs = sched_spin_wait(ssi, 0);
@@ -3153,7 +3160,7 @@ aux_thread(void *unused)
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER);
}
}
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
#endif
}
@@ -3171,7 +3178,8 @@ poll_thread(void *arg)
erts_aint32_t aux_work;
ErtsThrPrgrCallbacks callbacks;
int thr_prgr_active = 1;
- struct erts_poll_thread *psi = erts_create_pollset_thread(id);
+ struct erts_poll_thread *psi;
+ ErtsThrPrgrData *tpd;
ERTS_MSACC_DECLARE_CACHE();
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -3192,9 +3200,12 @@ poll_thread(void *arg)
callbacks.wait = thr_prgr_wait;
callbacks.finalize_wait = thr_prgr_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
init_aux_work_data(awdp, NULL, NULL);
awdp->ssi = ssi;
+
+ psi = erts_create_pollset_thread(id, tpd);
+
ssi->psi = psi;
sched_prep_spin_wait(ssi);
@@ -3207,16 +3218,16 @@ poll_thread(void *arg)
aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
if (aux_work) {
if (!thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 1);
+ erts_thr_progress_active(tpd, thr_prgr_active = 1);
aux_work = handle_aux_work(awdp, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (aux_work && erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
}
if (!aux_work) {
if (thr_prgr_active)
- erts_thr_progress_active(NULL, thr_prgr_active = 0);
+ erts_thr_progress_active(tpd, thr_prgr_active = 0);
flgs = sched_spin_wait(ssi, 0);
@@ -3226,7 +3237,7 @@ poll_thread(void *arg)
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
- erts_check_io(psi);
+ erts_check_io(psi, ERTS_POLL_INF_TIMEOUT);
}
}
}
@@ -3236,6 +3247,59 @@ poll_thread(void *arg)
return NULL;
}
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ERTS_INLINE void
+clear_sys_scheduling(void)
+{
+ erts_atomic32_set_mb(&doing_sys_schedule, 0);
+}
+
+static ERTS_INLINE int
+try_set_sys_scheduling(void)
+{
+ return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0);
+}
+
+
+static ERTS_INLINE int
+prepare_for_sys_schedule(void)
+{
+ while (!erts_port_task_have_outstanding_io_tasks()
+ && try_set_sys_scheduling()) {
+ if (!erts_port_task_have_outstanding_io_tasks())
+ return 1;
+ clear_sys_scheduling();
+ }
+ return 0;
+}
+
+static void
+check_io_timer(void *null)
+{
+ ErtsSchedulerData *esdp = erts_get_scheduler_data();
+ if (prepare_for_sys_schedule()) {
+ erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT);
+ clear_sys_scheduling();
+ }
+
+ /* The timer is cleared if this schedulers run-queue became empty
+ or if the CHECKIO flag was cleared. The CHECKIO flags is cleared
+ when a check_balance assigns another scheduler to be the poller in
+ the overload scenario. */
+ if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == ERTS_RUNQ_FLG_CHECKIO) {
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT,
+ check_io_timer, NULL);
+ } else {
+ ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO);
+ }
+}
+
+#else
+#define clear_sys_scheduling()
+#define prepare_for_sys_schedule() 0
+#endif
+
static void
scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
{
@@ -3291,13 +3355,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (aux_work) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1);
ERTS_MSACC_UPDATE_CACHE();
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
}
if (aux_work) {
@@ -3305,7 +3369,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
current_time = erts_get_monotonic_time(esdp);
if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3324,19 +3388,36 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
if (do_timeout) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
- else {
+ else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) {
+ /* We sleep in check_io, only for normal schedulers */
+ if (thr_prgr_active) {
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
+ sched_wall_time_change(esdp, 0);
+ }
+ flgs = sched_spin_wait(ssi, 0);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+ if (flgs & ERTS_SSI_FLG_SLEEPING) {
+ ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+ ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+ erts_check_io(ssi->psi, timeout_time);
+ current_time = erts_get_monotonic_time(esdp);
+ }
+ }
+ clear_sys_scheduling();
+ } else {
if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(esdp);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp));
}
-
flgs = sched_spin_wait(ssi, spincount);
if (flgs & ERTS_SSI_FLG_SLEEPING) {
ASSERT(flgs & ERTS_SSI_FLG_WAITING);
@@ -3366,7 +3447,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
}
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp))
- erts_thr_progress_finalize_wait(esdp);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp));
}
if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time)
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -3395,7 +3476,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
if (ERTS_SCHEDULER_IS_DIRTY(esdp))
dirty_sched_wall_time_change(esdp, working = 1);
else if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
@@ -4576,6 +4657,15 @@ check_balance(ErtsRunQueue *c_rq)
if (blnc_no_rqs == 1) {
c_rq->check_balance_reds = INT_MAX;
erts_atomic32_set_nob(&balance_info.checking_balance, 0);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS;
+ if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO))
+ == 0) {
+ ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO);
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO);
+#endif
return;
}
@@ -5095,6 +5185,19 @@ erts_fprintf(stderr, "--------------------------------\n");
/* Publish new migration paths... */
erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ if (full_scheds == current_active) {
+ ERTS_ASSERT(full_scheds <= current_active);
+ /* All active schedulers ran for full, we need to do active polling,
+ so we setup a timer that does active polling */
+ if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) {
+ /* Active polling is not running, start it */
+ erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL);
+ }
+ run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO;
+ }
+#endif
+
/* Reset balance statistics in all online queues */
for (qix = 0; qix < blnc_no_rqs; qix++) {
Uint32 flags = run_queue_info[qix].flags;
@@ -5104,6 +5207,8 @@ erts_fprintf(stderr, "--------------------------------\n");
ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK));
if (rq->waiting)
flags |= ERTS_RUNQ_FLG_OUT_OF_WORK;
+ if (rq != c_rq)
+ flags &= ~ERTS_RUNQ_FLG_CHECKIO;
rq->full_reds_history_sum
= run_queue_info[qix].full_reds_history_sum;
@@ -5113,8 +5218,7 @@ erts_fprintf(stderr, "--------------------------------\n");
ERTS_DBG_CHK_FULL_REDS_HISTORY(rq);
rq->out_of_work_count = 0;
- (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags);
-
+ (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags);
rq->max_len = erts_atomic32_read_dirty(&rq->len);
for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) {
ErtsRunQueueInfo *rqi;
@@ -5553,7 +5657,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str)
return EINVAL;
}
- params->sys_schedule = sys_sched;
params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT;
params->aux_work = sys_sched * aux_work_fact;
@@ -5764,6 +5867,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th
size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs;
erts_aligned_run_queues =
erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs);
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_atomic32_init_nob(&doing_sys_schedule, 0);
+#endif
erts_atomic32_init_nob(&no_empty_run_queues, 0);
erts_no_run_queues = n;
@@ -7551,7 +7657,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (aux_work|evacuate) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp),
+ thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
if (aux_work)
@@ -7559,8 +7666,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
aux_work,
1);
- if (aux_work && erts_thr_progress_update(esdp))
- erts_thr_progress_leader_update(esdp);
+ if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp)))
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (evacuate) {
erts_runq_lock(esdp->run_queue);
evacuate_run_queue(esdp->run_queue, &sbp);
@@ -7579,18 +7686,18 @@ suspend_scheduler(ErtsSchedulerData *esdp)
if (!aux_work && current_time < timeout_time) {
/* go to sleep... */
if (thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 0);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0);
sched_wall_time_change(esdp, 0);
}
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL));
suspend_normal_scheduler_sleep(esdp);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL));
current_time = erts_get_monotonic_time(esdp);
}
if (current_time >= timeout_time) {
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
erts_bump_timers(esdp->timer_wheel, current_time);
@@ -7647,7 +7754,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
profile_scheduler(make_small(esdp->no), am_active);
if (!thr_prgr_active) {
- erts_thr_progress_active(esdp, thr_prgr_active = 1);
+ erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
sched_wall_time_change(esdp, 1);
}
}
@@ -8282,6 +8389,11 @@ sched_thread_func(void *vesdp)
erts_msacc_init_thread("scheduler", no, 1);
erts_thr_progress_register_managed_thread(esdp, &callbacks, 0);
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ esdp->ssi->psi = erts_create_pollset_thread(-1, NULL);
+#endif
+
erts_alloc_register_scheduler(vesdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
{
@@ -9296,12 +9408,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls)
}
}
- leader_update = erts_thr_progress_update(esdp);
+ leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp));
aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
if (aux_work | leader_update) {
erts_runq_unlock(rq);
if (leader_update)
- erts_thr_progress_leader_update(esdp);
+ erts_thr_progress_leader_update(erts_thr_prgr_data(esdp));
if (aux_work)
handle_aux_work(&esdp->aux_work_data, aux_work, 0);
erts_runq_lock(rq);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 8d20ccdf90..a1b029adbe 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size;
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9))
#define ERTS_RUNQ_FLG_HALTING \
(((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10))
+#define ERTS_RUNQ_FLG_CHECKIO \
+ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11))
-#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11)
+#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12)
#define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \
(ERTS_RUNQ_FLGS_EMIGRATE_QMASK \
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index b119c59ab3..74cc966cbe 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
erts_sspa_chunk_t *chnk;
erts_sspa_chunk_header_t *chdr;
erts_sspa_blk_t *res;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC);
chnk = erts_sspa_cix2chunk(data, cix);
chdr = &chnk->aligned.header;
@@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix)
chdr->local.last = NULL;
ERTS_SSPA_DBG_CHK_LCL(chdr);
}
- if (chdr->local.cnt <= chdr->local.lim)
- return (char *) erts_sspa_process_remote_frees(chdr, res);
+ if (chdr->local.cnt <= chdr->local.lim) {
+ res = erts_sspa_process_remote_frees(chdr, res);
+ ERTS_MSACC_POP_STATE_M_X();
+ return (char*) res;
+ }
else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
chdr->head.no_thr_progress_check++;
ASSERT(res);
+ ERTS_MSACC_POP_STATE_M_X();
return (char *) res;
}
diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c
index aa08eb40ec..bac437efe9 100644
--- a/erts/emulator/beam/erl_thr_progress.c
+++ b/erts/emulator/beam/erl_thr_progress.c
@@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w)
}
}
+ErtsThrPrgrData *erts_thr_progress_data(void) {
+ return erts_tsd_get(erts_thr_prgr_data_key__);
+}
+
void
erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
{
@@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks)
}
-void
+ErtsThrPrgrData *
erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
ErtsThrPrgrCallbacks *callbacks,
int pref_wakeup)
@@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
wakeup_managed(id);
}
callbacks->finalize_wait(callbacks->arg);
+ return tpd;
}
static ERTS_INLINE int
@@ -796,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd)
== ERTS_THR_PRGR_LFLG_NO_LEADER))
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
}
@@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd)
}
int
-erts_thr_progress_update(ErtsSchedulerData *esdp)
+erts_thr_progress_update(ErtsThrPrgrData *tpd)
{
- return update(thr_prgr_data(esdp));
+ return update(tpd);
}
int
-erts_thr_progress_leader_update(ErtsSchedulerData *esdp)
+erts_thr_progress_leader_update(ErtsThrPrgrData *tpd)
{
- return leader_update(thr_prgr_data(esdp));
+ return leader_update(tpd);
}
void
-erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd)
{
erts_aint32_t lflgs;
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -884,14 +888,13 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
== ERTS_THR_PRGR_LFLG_NO_LEADER
&& got_sched_wakeups()) {
/* Someone need to make progress */
- wakeup_managed(0);
+ wakeup_managed(tpd->id);
}
}
void
-erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
+erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
ErtsThrPrgrVal current, val;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
}
void
-erts_thr_progress_active(ErtsSchedulerData *esdp, int on)
+erts_thr_progress_active(ErtsThrPrgrData *tpd, int on)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
#ifdef ERTS_ENABLE_LOCK_CHECK
erts_lc_check_exact(NULL, 0);
@@ -973,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle)
== (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM)
&& got_sched_wakeups()) {
/* Others waiting for us... */
- wakeup_managed(0);
+ wakeup_managed(1);
}
}
}
@@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value)
}
void
-erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+erts_thr_progress_wakeup(ErtsThrPrgrData *tpd,
ErtsThrPrgrVal value)
{
- ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+
ASSERT(!tpd->is_temporary);
if (tpd->is_managed)
request_wakeup_managed(tpd, value);
diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h
index 8329995b24..00a9e61407 100644
--- a/erts/emulator/beam/erl_thr_progress.h
+++ b/erts/emulator/beam/erl_thr_progress.h
@@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__;
void erts_thr_progress_pre_init(void);
void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged);
-void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
- ErtsThrPrgrCallbacks *,
- int);
+ErtsThrPrgrData *erts_thr_progress_register_managed_thread(
+ ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int);
void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *);
-void erts_thr_progress_active(ErtsSchedulerData *esdp, int on);
-void erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+void erts_thr_progress_active(ErtsThrPrgrData *, int on);
+void erts_thr_progress_wakeup(ErtsThrPrgrData *,
ErtsThrPrgrVal value);
-int erts_thr_progress_update(ErtsSchedulerData *esdp);
-int erts_thr_progress_leader_update(ErtsSchedulerData *esdp);
-void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp);
-void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp);
+int erts_thr_progress_update(ErtsThrPrgrData *);
+int erts_thr_progress_leader_update(ErtsThrPrgrData *);
+void erts_thr_progress_prepare_wait(ErtsThrPrgrData *);
+void erts_thr_progress_finalize_wait(ErtsThrPrgrData *);
ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void);
void erts_thr_progress_unmanaged_continue__(int umrefc_ix);
+ErtsThrPrgrData *erts_thr_progress_data(void);
void erts_thr_progress_dbg_print_state(void);
+ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp);
+
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc);
ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc);
@@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val);
#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE ErtsThrPrgrData *
+erts_thr_prgr_data(ErtsSchedulerData *esdp) {
+ if (esdp) {
+ return &esdp->thr_progress_data;
+ } else {
+ return erts_thr_progress_data();
+ }
+}
+
ERTS_GLB_INLINE ErtsThrPrgrVal
erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc)
{
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index 53a020e7a5..2350d4c02f 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused)
{
ErtsThrPrgrCallbacks callbacks;
ErtsSysMsgQ *local_sys_message_queue = NULL;
+ ErtsThrPrgrData *tpd;
int wait = 0;
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused)
callbacks.wait = sys_msg_dispatcher_wait;
callbacks.finalize_wait = sys_msg_dispatcher_fin_wait;
- erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
+ tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0);
while (1) {
int end_wait = 0;
@@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused)
if (!sys_message_queue) {
erts_mtx_unlock(&smq_mtx);
end_wait = 1;
- erts_thr_progress_active(NULL, 0);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_active(tpd, 0);
+ erts_thr_progress_prepare_wait(tpd);
erts_mtx_lock(&smq_mtx);
}
@@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused)
erts_mtx_unlock(&smq_mtx);
if (end_wait) {
- erts_thr_progress_finalize_wait(NULL);
- erts_thr_progress_active(NULL, 1);
+ erts_thr_progress_finalize_wait(tpd);
+ erts_thr_progress_active(tpd, 1);
}
/* Send trace messages ... */
@@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused)
Process *proc = NULL;
Port *port = NULL;
- if (erts_thr_progress_update(NULL))
- erts_thr_progress_leader_update(NULL);
+ if (erts_thr_progress_update(tpd))
+ erts_thr_progress_leader_update(tpd);
#ifdef DEBUG_PRINTOUTS
print_msg_type(smqp);
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index dbe0201caf..c75b4045f7 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -38,6 +38,7 @@
#include <ctype.h>
#include <sys/types.h>
#include <errno.h>
+#include <stdint.h>
#define IDENTITY(c) c
#define STRINGIFY_1(b) IDENTITY(#b)
@@ -955,6 +956,7 @@ static size_t my_strnlen(const char *s, size_t maxlen)
#endif
#endif
+typedef struct _tcp_descriptor tcp_descriptor;
#if defined(TCP_CORK)
#define INET_TCP_NOPUSH TCP_CORK
@@ -1010,16 +1012,19 @@ typedef struct _multi_timer_data {
struct _multi_timer_data *prev;
} MultiTimerData;
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
- ErlDrvTermData caller, unsigned timeout,
- void (*timeout_fun)(ErlDrvData drv_data,
- ErlDrvTermData caller));
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ ErlDrvTermData caller, unsigned timeout,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data);
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p);
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p);
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller));
static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller);
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port);
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port);
typedef struct {
int id; /* id used to identify reply */
@@ -1278,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry =
};
#endif
-typedef struct {
+struct _tcp_descriptor {
inet_descriptor inet; /* common data structure (DON'T MOVE) */
int high; /* high watermark */
int low; /* low watermark */
@@ -1294,7 +1299,8 @@ typedef struct {
int http_state; /* 0 = response|request 1=headers fields */
inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */
inet_async_multi_op *multi_last;
- MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd; /* Timer structures for multiple accept */
+ MultiTimerData *mtd_cache; /* A cache for timer allocations */
#ifdef HAVE_SENDFILE
struct {
ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time
@@ -1310,7 +1316,7 @@ typedef struct {
Uint64 length;
} sendfile;
#endif
-} tcp_descriptor;
+};
/* send function */
static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len);
@@ -1320,7 +1326,10 @@ static int tcp_deliver(tcp_descriptor* desc, int len);
static int tcp_shutdown_error(tcp_descriptor* desc, int err);
+#ifdef HAVE_SENDFILE
static int tcp_inet_sendfile(tcp_descriptor* desc);
+static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error);
+#endif
static int tcp_inet_output(tcp_descriptor* desc, HANDLE event);
static int tcp_inet_input(tcp_descriptor* desc, HANDLE event);
@@ -9772,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args)
desc->tcp_add_flags = 0;
desc->http_state = 0;
desc->mtd = NULL;
+ desc->mtd_cache = NULL;
desc->multi_first = desc->multi_last = NULL;
DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port));
return (ErlDrvData) desc;
@@ -9875,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc)
driver_demonitor_process(desc->inet.port, &monitor);
send_async_error(desc->inet.dport, id, caller, am_closed);
}
- clean_multi_timers(&(desc->mtd), desc->inet.port);
}
-
else if (desc->inet.state == INET_STATE_CONNECTING) {
async_error_am(INETP(desc), am_closed);
}
else if (desc->inet.state == INET_STATE_CONNECTED) {
async_error_am_all(INETP(desc), am_closed);
}
+ clean_multi_timers(desc, desc->inet.port);
}
/*
@@ -9926,6 +9935,15 @@ static void tcp_desc_close(tcp_descriptor* desc)
erl_inet_close(INETP(desc));
}
+static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(!desc->inet.active);
+ sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
+ desc->i_remain = 0;
+ async_error_am(INETP(desc), am_timeout);
+}
+
/* TCP requests from Erlang */
static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
char* buf, ErlDrvSizeT len,
@@ -10096,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
if (time_left <= 0) {
time_left = 1;
}
- omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller,
+ omtd = add_multi_timer(desc, desc->inet.port, ocaller,
time_left, &tcp_inet_multi_timeout);
}
enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor);
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10116,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
return ctl_xerror("noproc", rbuf, rsize);
}
if (timeout != INET_INFINITY) {
- mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller,
+ mtd = add_multi_timer(desc, desc->inet.port, caller,
timeout, &tcp_inet_multi_timeout);
}
enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor);
@@ -10213,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
async_error_am(INETP(desc), am_timeout);
else {
if (timeout != INET_INFINITY)
- driver_set_timer(desc->inet.port, timeout);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ timeout, &tcp_inet_recv_timeout);
if (!INETP(desc)->is_ignored)
sock_select(INETP(desc),(FD_READ|FD_CLOSE),1);
else
@@ -10300,12 +10319,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
desc->tcp_add_flags |= TCP_ADDF_SENDFILE;
/* See if we can finish sending without selecting & rescheduling. */
- tcp_inet_sendfile(desc);
-
- if(desc->sendfile.length > 0) {
- sock_select(INETP(desc), FD_WRITE, 1);
+ if (tcp_inet_sendfile(desc) == 0) {
+ if(desc->sendfile.length > 0) {
+ sock_select(INETP(desc), FD_WRITE, 1);
+ }
}
-
return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize);
#else
return ctl_error(ENOTSUP, rbuf, rsize);
@@ -10319,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
}
+static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy)
+{
+ tcp_descriptor* desc = (tcp_descriptor*)e;
+ ASSERT(IS_BUSY(INETP(desc)));
+ ASSERT(desc->busy_on_send);
+ desc->inet.caller = desc->inet.busy_caller;
+ desc->inet.state &= ~INET_F_BUSY;
+ desc->busy_on_send = 0;
+ set_busy_port(desc->inet.port, 0);
+ inet_reply_error_am(INETP(desc), am_timeout);
+ if (desc->send_timeout_close) {
+ tcp_desc_close(desc);
+ }
+}
+
/*
** tcp_inet_timeout:
** called when timer expire:
** TCP socket may be:
**
-** a) receiving -- deselect
+** a) receiving -- send timeout
** b) connecting -- close socket
** c) accepting -- reset listener
**
@@ -10338,26 +10371,9 @@ static void tcp_inet_timeout(ErlDrvData e)
DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n",
(long)desc->inet.port, desc->inet.s));
if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */
- fire_multi_timers(&(desc->mtd), desc->inet.port, e);
+ fire_multi_timers(desc, desc->inet.port, e);
} else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) {
- if (desc->busy_on_send) {
- ASSERT(IS_BUSY(INETP(desc)));
- desc->inet.caller = desc->inet.busy_caller;
- desc->inet.state &= ~INET_F_BUSY;
- desc->busy_on_send = 0;
- set_busy_port(desc->inet.port, 0);
- inet_reply_error_am(INETP(desc), am_timeout);
- if (desc->send_timeout_close) {
- tcp_desc_close(desc);
- }
- }
- else {
- /* assume recv timeout */
- ASSERT(!desc->inet.active);
- sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
- desc->i_remain = 0;
- async_error_am(INETP(desc), am_timeout);
- }
+ fire_multi_timers(desc, desc->inet.port, e);
}
else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) {
/* assume connect timeout */
@@ -10487,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp)
return;
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
if (desc->multi_first == NULL) {
sock_select(INETP(desc),FD_ACCEPT,0);
@@ -10518,6 +10534,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
#ifdef DEBUG
long port = (long) desc->inet.port; /* Used after driver_exit() */
#endif
+ int blocking_send = 0;
DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n",
port, desc->inet.s, __FILE__, __LINE__));
if (IS_BUSY(INETP(desc))) {
@@ -10525,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port));
}
@@ -10533,16 +10550,25 @@ static int tcp_recv_closed(tcp_descriptor* desc)
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port));
- } else {
+ blocking_send = 1;
+ }
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, ENOTCONN);
+ blocking_send = 1;
+ }
+#endif
+ if (!blocking_send) {
/* No blocking send op to reply to right now.
* If next op is a send, make sure it returns {error,closed}
* rather than {error,enotconn}.
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+
if (!desc->inet.active) {
- /* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ /* We must cancel any timer here ! */
+ clean_multi_timers(desc, INETP(desc)->port);
/* passive mode do not terminate port ! */
tcp_clear_input(desc);
if (desc->inet.exitf) {
@@ -10577,16 +10603,21 @@ static int tcp_recv_error(tcp_descriptor* desc, int err)
desc->inet.caller = desc->inet.busy_caller;
tcp_clear_output(desc);
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
inet_reply_error_am(INETP(desc), am_closed);
}
+#ifdef HAVE_SENDFILE
+ if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) {
+ tcp_sendfile_aborted(desc, err);
+ }
+#endif
if (!desc->inet.active) {
/* We must cancel any timer here ! */
- driver_cancel_timer(desc->inet.port);
+ clean_multi_timers(desc, INETP(desc)->port);
tcp_clear_input(desc);
if (desc->inet.exitf) {
tcp_desc_close(desc);
@@ -10691,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
if (len == 0) {
/* empty buffer or waiting for more input */
if ((desc->i_buf == NULL) || (desc->i_remain > 0))
- return count;
+ return 0;
if ((n = tcp_remain(desc, &len)) != 0) {
if (n < 0) /* packet error */
return n;
if (len > 0) /* more data pending */
desc->i_remain = len;
- return count;
+ return 0;
}
}
@@ -10749,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len)
len = 0;
if (!desc->inet.active) {
- if (!desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- }
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout);
sock_select(INETP(desc),(FD_READ|FD_CLOSE),0);
if (desc->i_buf != NULL)
tcp_restart_input(desc);
@@ -10777,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
int len;
int nread;
- if (desc->i_buf == NULL) { /* allocte a read buffer */
+ if (desc->i_buf == NULL) { /* allocate a read buffer */
int sz = (request_len > 0) ? request_len : desc->inet.bufsz;
if ((desc->i_buf = alloc_buffer(sz)) == NULL)
@@ -10850,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len)
return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start);
}
else {
- if ((nread = tcp_remain(desc, &len)) < 0)
+ nread = tcp_remain(desc, &len);
+ if (nread < 0)
return tcp_recv_error(desc, EMSGSIZE);
else if (nread == 0)
- return tcp_deliver(desc, len);
+ return tcp_deliver(desc, len);
else if (len > 0)
desc->i_remain = len; /* set remain */
}
@@ -11172,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event)
}
if (timeout != NULL) {
- remove_multi_timer(&(desc->mtd), desc->inet.port, timeout);
+ remove_multi_timer(desc, desc->inet.port, timeout);
}
driver_demonitor_process(desc->inet.port, &monitor);
@@ -11231,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
if (IS_BUSY(INETP(desc))) {
desc->inet.caller = desc->inet.busy_caller;
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
- desc->busy_on_send = 0;
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
+ desc->busy_on_send = 0;
}
desc->inet.state &= ~INET_F_BUSY;
set_busy_port(desc->inet.port, 0);
@@ -11247,27 +11277,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n",
(long)desc->inet.port, __FILE__, __LINE__));
if (desc->inet.active) {
+ ErlDrvTermData err_atom;
if (show_econnreset) {
tcp_error_message(desc, err);
- tcp_closed_message(desc);
- inet_reply_error(INETP(desc), err);
+ err_atom = error_atom(err);
} else {
- tcp_closed_message(desc);
- inet_reply_error_am(INETP(desc), am_closed);
+ err_atom = am_closed;
}
+ tcp_closed_message(desc);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE))
+ inet_reply_error_am(INETP(desc), err_atom);
+
if (desc->inet.exitf)
driver_exit(desc->inet.port, 0);
else
tcp_desc_close(desc);
} else {
tcp_close_check(desc);
- tcp_desc_close(desc);
if (desc->inet.caller) {
- if (show_econnreset)
- inet_reply_error(INETP(desc), err);
- else
- inet_reply_error_am(INETP(desc), am_closed);
+ if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) {
+ if (show_econnreset)
+ inet_reply_error(INETP(desc), err);
+ else
+ inet_reply_error_am(INETP(desc), am_closed);
+ }
}
else {
/* No blocking send op to reply to right now.
@@ -11276,6 +11310,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err)
*/
desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND;
}
+ tcp_desc_close(desc);
/*
* Make sure that the next receive operation gets an {error,closed}
@@ -11332,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err)
return tcp_send_or_shutdown_error(desc, err);
}
+static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy)
+{
+ tcp_descriptor *desc = (tcp_descriptor*)data;
+ (void)tcp_inet_output(desc, INETP(desc)->s);
+}
+
/*
** Send non-blocking vector data
*/
@@ -11384,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11399,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev)
INETP(desc)->is_ignored |= INET_IGNORE_WRITE;
n = 0;
} else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) {
- n = 0;
+ driver_enqv(ix, ev, 0);
+ add_multi_timer(desc, INETP(desc)->port, 0,
+ 0, &tcp_inet_delay_send);
+ return 0;
} else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov,
vsize, &n, 0))) {
if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) {
@@ -11482,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len)
set_busy_port(desc->inet.port, 1);
if (desc->send_timeout != INET_INFINITY) {
desc->busy_on_send = 1;
- driver_set_timer(desc->inet.port, desc->send_timeout);
+ add_multi_timer(desc, INETP(desc)->port,
+ 0 /* arg */, desc->send_timeout /* timeout */,
+ &tcp_inet_send_timeout);
}
return 1;
}
@@ -11586,7 +11634,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) {
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port,
+ &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
@@ -11788,8 +11837,8 @@ socket_error: {
DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n",
(long)desc->inet.port, socket_errno, errno));
- result = tcp_send_error(desc, socket_errno);
tcp_sendfile_aborted(desc, socket_errno);
+ result = tcp_send_error(desc, socket_errno);
goto done;
}
@@ -11893,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
#ifdef __WIN32__
desc->inet.send_would_block = 1;
#endif
+ /* If DELAY_SEND is set ready_output may have
+ been called without doing select so we do
+ a select in order to get into the correct
+ state */
+ if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND)
+ sock_select(INETP(desc), FD_WRITE, 1);
goto done;
} else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning
0 when sending packets with
@@ -11918,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event)
set_busy_port(desc->inet.port, 0);
/* if we have a timer then cancel and send ok to client */
if (desc->busy_on_send) {
- driver_cancel_timer(desc->inet.port);
+ cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout);
desc->busy_on_send = 0;
}
inet_reply_ok(INETP(desc));
@@ -12730,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
udesc->i_buf = NULL;
if (!desc->active) {
async_error(desc, err);
- driver_cancel_timer(desc->port);
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
}
else {
@@ -12819,7 +12874,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event)
return count;
count++;
if (!desc->active) {
- driver_cancel_timer(desc->port); /* possibly cancel */
+ driver_cancel_timer(desc->port);
sock_select(desc,FD_READ,0);
return count; /* passive mode (read one packet only) */
}
@@ -12898,55 +12953,69 @@ make_noninheritable_handle(SOCKET s)
* Multi-timers
*/
-static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port,
+static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvData data)
{
ErlDrvTime next_timeout;
- if (!*first) {
+ MultiTimerData *curr = desc->mtd;
+ if (!curr) {
ASSERT(0);
return;
}
#ifdef DEBUG
{
ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC);
- ASSERT(chk >= (*first)->when);
+ ASSERT(chk >= curr->when);
}
#endif
do {
- MultiTimerData *save = *first;
- *first = save->next;
+ MultiTimerData *save = curr;
+
(*(save->timeout_function))(data,save->caller);
- FREE(save);
- if (*first == NULL) {
+
+ curr = curr->next;
+
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = save;
+ else
+ FREE(save);
+
+ if (curr == NULL) {
+ desc->mtd = NULL;
return;
}
- (*first)->prev = NULL;
- next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ curr->prev = NULL;
+ next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
} while (next_timeout <= 0);
+ desc->mtd = curr;
driver_set_timer(port, (unsigned long) next_timeout);
}
-static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port)
+static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port)
{
- MultiTimerData *p;
- if (*first) {
+ if (desc->mtd) {
driver_cancel_timer(port);
}
- while (*first) {
- p = *first;
- *first = p->next;
- FREE(p);
+ while (desc->mtd) {
+ MultiTimerData *p = desc->mtd;
+ desc->mtd = p->next;
+ FREE(p);
+ }
+ desc->mtd = NULL;
+ if (desc->mtd_cache) {
+ FREE(desc->mtd_cache);
+ desc->mtd_cache = NULL;
}
}
-static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p)
+static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p)
{
if (p->prev != NULL) {
p->prev->next = p->next;
} else {
driver_cancel_timer(port);
- *first = p->next;
- if (*first) {
- ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
+ desc->mtd = p->next;
+ if (desc->mtd) {
+ ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC);
if (ntmo < 0)
ntmo = 0;
driver_set_timer(port, (unsigned long) ntmo);
@@ -12955,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim
if (p->next != NULL) {
p->next->prev = p->prev;
}
- FREE(p);
+ if (desc->mtd_cache == NULL)
+ desc->mtd_cache = p;
+ else
+ FREE(p);
+}
+
+/* Cancel a timer based on the timeout_fun */
+static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
+ void (*timeout_fun)(ErlDrvData drv_data,
+ ErlDrvTermData caller))
+{
+ MultiTimerData *timer = desc->mtd;
+ while(timer && timer->timeout_function != timeout_fun) {
+ timer = timer->next;
+ }
+ if (timer) {
+ remove_multi_timer(desc, port, timer);
+ }
}
-static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
+static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port,
ErlDrvTermData caller, unsigned timeout,
void (*timeout_fun)(ErlDrvData drv_data,
ErlDrvTermData caller))
{
MultiTimerData *mtd, *p, *s;
- mtd = ALLOC(sizeof(MultiTimerData));
- mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1;
+
+ /* Use cached timer if available */
+ if (desc->mtd_cache != NULL) {
+ mtd = desc->mtd_cache;
+ desc->mtd_cache = NULL;
+ } else
+ mtd = ALLOC(sizeof(MultiTimerData));
+
+ if (timeout)
+ mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout);
+ else
+ mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */
+
mtd->timeout_function = timeout_fun;
mtd->caller = caller;
mtd->next = mtd->prev = NULL;
- for(p = *first,s = NULL; p != NULL; s = p, p = p->next) {
+
+ /* Find correct slot in timer linked list */
+ for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) {
if (p->when >= mtd->when) {
break;
}
}
+ /* Insert in linked list */
if (!p) {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
}
} else {
if (!s) {
- *first = mtd;
+ desc->mtd = mtd;
} else {
s->next = mtd;
mtd->prev = s;
@@ -12992,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port,
mtd->next = p;
p->prev = mtd;
}
+ /* Possibly set new timer */
if (!s) {
- if (mtd->next) {
- driver_cancel_timer(port);
- }
driver_set_timer(port,timeout);
}
return mtd;
diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c
index 0ada345442..0b5eccbde2 100644
--- a/erts/emulator/nifs/common/prim_file_nif.c
+++ b/erts/emulator/nifs/common/prim_file_nif.c
@@ -936,7 +936,7 @@ static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_
posix_errno_t posix_errno;
efile_path_t path;
- Uint32 permissions;
+ Uint permissions;
ASSERT(argc == 2);
if(!enif_get_uint(env, argv[1], &permissions)) {
@@ -956,7 +956,7 @@ static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
posix_errno_t posix_errno;
efile_path_t path;
- Sint32 uid, gid;
+ Sint uid, gid;
ASSERT(argc == 3);
if(!enif_get_int(env, argv[1], &uid) || !enif_get_int(env, argv[2], &gid)) {
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index f421794f91..b4609007c9 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -46,11 +46,11 @@
#if 0
#define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__)
#define DEBUG_PRINT_FD(FMT, STATE, ...) \
- DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \
+ DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \
(STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \
ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \
ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \
- (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR)
+ (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR)
#define DEBUG_PRINT_MODE
#else
#define DEBUG_PRINT(...)
@@ -76,22 +76,40 @@ typedef enum {
typedef enum {
ERTS_EV_FLAG_CLEAR = 0,
ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */
-#ifdef ERTS_ENABLE_KERNEL_POLL
- ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated
+ to scheduler pollset */
+ ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in
+ scheduler pollset */
+#else
+ ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+ ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR,
+#endif
+#ifdef ERTS_POLL_USE_FALLBACK
+ ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd
and it was put in the nkp version */
#else
ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR,
#endif
/* Combinations */
- ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK
+ ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK,
+ ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER,
+ ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER
} EventStateFlags;
#define flag2str(flags) \
((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \
((flags) == ERTS_EV_FLAG_USED ? "USED" : \
((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \
- ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR"))))
+ ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \
+ ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \
+ ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \
+ ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \
+ "ERROR"))))))))
/* How many events that can be handled at once by one erts_poll_wait call */
#define ERTS_CHECK_IO_POLL_RES_LEN 512
@@ -105,6 +123,7 @@ typedef struct erts_poll_thread
{
ErtsPollSet *ps;
ErtsPollResFd *pollres;
+ ErtsThrPrgrData *tpd;
int pollres_len;
} ErtsPollThread;
@@ -112,10 +131,13 @@ typedef struct erts_poll_thread
* Which pollset to use is determined by hashing the fd.
*/
static ErtsPollSet **pollsetv;
+static ErtsPollThread *psiv;
#if ERTS_POLL_USE_FALLBACK
static ErtsPollSet *flbk_pollset;
#endif
-static ErtsPollThread *psiv;
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+static ErtsPollSet *sched_pollset;
+#endif
typedef struct {
#ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS
@@ -130,10 +152,12 @@ typedef struct {
ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */
} stop;
} driver;
- ErtsPollEvents events; /* The events that have been selected upon */
+ ErtsPollEvents events; /* The events that have been selected upon */
ErtsPollEvents active_events; /* The events currently active in the pollset */
EventStateType type;
EventStateFlags flags;
+ int count; /* Number of times this fd has triggered
+ without being deselected. */
} ErtsDrvEventState;
struct drv_ev_state_shared {
@@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd)
#if ERTS_POLL_USE_FALLBACK
static ERTS_INLINE ErtsPollSet *
-get_fallback(void)
+get_fallback_pollset(void)
{
return flbk_pollset;
}
#endif
+static ERTS_INLINE ErtsPollSet *
+get_scheduler_pollset(ErtsSysFdType fd)
+{
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ return sched_pollset;
+#else
+ return get_pollset(fd);
+#endif
+}
+
/*
* Place a fd within a pollset. This will automatically use
* the fallback ps if needed.
@@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op,
ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd)));
if (!(flags & ERTS_EV_FLAG_FALLBACK)) {
- res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+
+ if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) {
+ erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
+ if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) {
+ res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller);
+ } else {
+ res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller);
+ }
#if ERTS_POLL_USE_FALLBACK
if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) {
/* When an add fails with NVAL, the poll/kevent operation could not
put that fd in the pollset, so we instead put it into a fallback pollset */
state->flags |= ERTS_EV_FLAG_FALLBACK;
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
}
} else {
ASSERT(op != ERTS_POLL_OP_ADD);
- res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller);
+ res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller);
#endif
}
@@ -425,59 +468,77 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task);
ErtsSysFdType fd = itp->fd;
erts_mtx_t *mtx = fd_mtx(fd);
- int active_events;
+ ErtsPollOp op = ERTS_POLL_OP_MOD;
+ int active_events, new_events = 0;
ErtsDrvEventState *state;
ErtsDrvSelectDataState *free_select = NULL;
ErtsNifSelectDataState *free_nif = NULL;
+ ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO);
+
erts_mtx_lock(mtx);
state = get_drv_ev_state(fd);
+ reset_handle(pthp);
+
active_events = state->active_events;
- switch (type) {
- case ERTS_PORT_TASK_INPUT:
+ if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) {
+ switch (type) {
+ case ERTS_PORT_TASK_INPUT:
+
+ DEBUG_PRINT_FD("executed ready_input", state);
+
+ ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
+ if (state->events & ERTS_POLL_EV_IN) {
+ active_events |= ERTS_POLL_EV_IN;
+ if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) {
+ if (!(state->flags & ERTS_EV_FLAG_SCHEDULER))
+ op = ERTS_POLL_OP_ADD;
+ state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER;
+ new_events = ERTS_POLL_EV_IN;
+ DEBUG_PRINT_FD("moving to scheduler ps", state);
+ } else
+ new_events = active_events;
+ if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING)
+ state->count++;
+ }
+ break;
+ case ERTS_PORT_TASK_OUTPUT:
- DEBUG_PRINT_FD("executed ready_input", state);
+ DEBUG_PRINT_FD("executed ready_output", state);
- ASSERT(!(state->active_events & ERTS_POLL_EV_IN));
- if (state->events & ERTS_POLL_EV_IN)
- active_events |= ERTS_POLL_EV_IN;
- break;
- case ERTS_PORT_TASK_OUTPUT:
+ ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
+ if (state->events & ERTS_POLL_EV_OUT) {
+ active_events |= ERTS_POLL_EV_OUT;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN)
+ new_events = ERTS_POLL_EV_OUT;
+ else
+ new_events = active_events;
+ }
+ break;
+ default:
+ erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
+ break;
+ }
- DEBUG_PRINT_FD("executed ready_output", state);
+ if (state->active_events != active_events && new_events) {
+ state->active_events = active_events;
+ new_events = erts_io_control(state, op, new_events);
+ }
- ASSERT(!(state->active_events & ERTS_POLL_EV_OUT));
- if (state->events & ERTS_POLL_EV_OUT)
- active_events |= ERTS_POLL_EV_OUT;
- break;
- default:
- erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type");
- break;
+ /* We were unable to re-insert the fd into the pollset, signal the callback. */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ if (state->active_events & ERTS_POLL_EV_IN)
+ iready(state->driver.select->inport, state);
+ if (state->active_events & ERTS_POLL_EV_OUT)
+ oready(state->driver.select->outport, state);
+ state->active_events = 0;
+ }
}
- reset_handle(pthp);
-
- if (active_events) {
- /* This is not needed if active_events has not changed */
- if (state->active_events != active_events) {
- ErtsPollEvents new_events;
- state->active_events = active_events;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events);
-
- /* We were unable to re-insert the fd into the pollset, signal the callback. */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- if (active_events & ERTS_POLL_EV_IN)
- iready(state->driver.select->inport, state);
- if (active_events & ERTS_POLL_EV_OUT)
- oready(state->driver.select->outport, state);
- state->active_events = 0;
- }
- }
- } else {
+ if (!active_events)
check_fd_cleanup(state, &free_select, &free_nif);
- }
erts_mtx_unlock(mtx);
@@ -485,6 +546,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type,
free_drv_select_data(free_select);
if (free_nif)
free_nif_select_data(free_nif);
+
+ ERTS_MSACC_POP_STATE_M_X();
}
static ERTS_INLINE void
@@ -755,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) {
ctl_op = ERTS_POLL_OP_ADD;
}
+ new_events = state->active_events;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ new_events &= ~ERTS_POLL_EV_IN;
}
else {
ctl_events &= old_events;
state->events &= ~ctl_events;
state->active_events &= ~ctl_events;
+ new_events = state->active_events;
+
+ if (ctl_events & ERTS_POLL_EV_IN) {
+ state->count = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ new_events = 0;
+ }
+ }
if (!state->events) {
if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE)
@@ -770,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) {
new_events = erts_io_control_wakeup(state, ctl_op,
- state->active_events,
+ new_events,
&wake_poller);
ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE);
@@ -802,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (ctl_events & ERTS_POLL_EV_IN) {
abort_tasks(state, ERL_DRV_READ);
state->driver.select->inport = NIL;
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
}
if (ctl_events & ERTS_POLL_EV_OUT) {
abort_tasks(state, ERL_DRV_WRITE);
@@ -810,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on)
if (state->events == 0) {
if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) {
state->type = ERTS_EV_TYPE_NONE;
+ if (state->flags & ERTS_EV_FLAG_SCHEDULER)
+ erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP);
state->flags = 0;
}
/*else keep it, as fd will probably be selected upon again */
@@ -1440,7 +1517,8 @@ iready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_INPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) {
stale_drv_select(id, state, ERL_DRV_READ);
} else {
DEBUG_PRINT_FD("schedule ready_input(%T, %d)",
@@ -1458,7 +1536,8 @@ oready(Eterm id, ErtsDrvEventState *state)
if (erts_port_task_schedule(id,
&iotask->task,
ERTS_PORT_TASK_OUTPUT,
- (ErlDrvEvent) state->fd) != 0) {
+ (ErlDrvEvent) state->fd,
+ 0) != 0) {
stale_drv_select(id, state, ERL_DRV_WRITE);
} else {
DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd);
@@ -1520,7 +1599,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
{
if (psi) {
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
erts_poll_interrupt_flbk(psi->ps, set);
return;
}
@@ -1530,12 +1609,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set)
}
ErtsPollThread *
-erts_create_pollset_thread(int id) {
+erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) {
+ psiv[id].tpd = tpd;
return psiv+id;
}
void
-erts_check_io(ErtsPollThread *psi)
+erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time)
{
int pollres_len;
int poll_ret, i;
@@ -1550,14 +1630,14 @@ erts_check_io(ErtsPollThread *psi)
pollres_len = psi->pollres_len;
#if ERTS_POLL_USE_FALLBACK
- if (psi->ps == get_fallback()) {
+ if (psi->ps == get_fallback_pollset()) {
- poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
} else
#endif
{
- poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len);
+ poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time);
}
#ifdef ERTS_ENABLE_LOCK_CHECK
@@ -1593,7 +1673,12 @@ erts_check_io(ErtsPollThread *psi)
ErtsNifSelectDataState *free_nif = NULL;
ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]);
ErtsDrvEventState *state;
- ErtsPollEvents revents;
+ ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
+
+ /* The fd will be set to -1 if a pollset internal fd was triggered
+ that was determined to be too expensive to remove from the result.
+ */
+ if (fd == -1) continue;
erts_mtx_lock(fd_mtx(fd));
@@ -1604,8 +1689,6 @@ erts_check_io(ErtsPollThread *psi)
continue;
}
- revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]);
-
DEBUG_PRINT_FD("triggered %s", state, ev2str(revents));
if (revents & ERTS_POLL_EV_ERR) {
@@ -1617,25 +1700,39 @@ erts_check_io(ErtsPollThread *psi)
*/
revents = state->active_events;
state->active_events = 0;
+
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ erts_io_control(state, ERTS_POLL_OP_MOD, 0);
+ state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER;
+ }
} else {
/* Disregard any events that are not active at the moment,
for instance this could happen if the driver/nif does
select/deselect in rapid succession. */
revents &= state->active_events | ERTS_POLL_EV_NVAL;
- state->active_events &= ~revents;
- /* Reactivate the poll op if there are still active events */
- if (state->active_events) {
- ErtsPollEvents new_events;
- DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events));
+ if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) {
+ ErtsPollEvents reactive_events;
+ state->active_events &= ~revents;
- new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events);
+ reactive_events = state->active_events;
- /* Unable to re-enable the fd, signal all callbacks */
- if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
- revents |= state->active_events;
- state->active_events = 0;
+ if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER)
+ reactive_events &= ~ERTS_POLL_EV_IN;
+
+ /* Reactivate the poll op if there are still active events */
+ if (reactive_events) {
+ ErtsPollEvents new_events;
+ DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events));
+
+ new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events);
+
+ /* Unable to re-enable the fd, signal all callbacks */
+ if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) {
+ revents |= reactive_events;
+ state->active_events &= ~reactive_events;
+ }
}
}
}
@@ -1711,7 +1808,7 @@ erts_check_io(ErtsPollThread *psi)
case ERTS_EV_TYPE_STOP_USE: {
#if ERTS_POLL_USE_FALLBACK
- ASSERT(psi->ps == get_fallback());
+ ASSERT(psi->ps == get_fallback_pollset());
#endif
drv_ptr = state->driver.stop.drv_ptr;
state->type = ERTS_EV_TYPE_NONE;
@@ -2049,12 +2146,17 @@ erts_init_check_io(int *argc, char **argv)
for (j=0; j < erts_no_pollsets; j++)
pollsetv[j] = erts_poll_create_pollset(j);
-#if ERTS_POLL_USE_FALLBACK
- flbk_pollset = erts_poll_create_pollset_flbk(-1);
+ no_poll_threads = erts_no_poll_threads;
+
+ j = -1;
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ sched_pollset = erts_poll_create_pollset(j--);
+ no_poll_threads++;
#endif
- no_poll_threads = erts_no_poll_threads;
#if ERTS_POLL_USE_FALLBACK
+ flbk_pollset = erts_poll_create_pollset_flbk(j--);
no_poll_threads++;
#endif
@@ -2064,7 +2166,15 @@ erts_init_check_io(int *argc, char **argv)
psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
- psiv[0].ps = get_fallback();
+ psiv[0].ps = get_fallback_pollset();
+ psiv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN;
+ psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET,
+ sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN);
+ psiv[0].ps = get_scheduler_pollset(0);
psiv++;
#endif
@@ -2121,7 +2231,12 @@ erts_check_io_size(void)
int i;
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info(get_fallback(), &pi);
+ erts_poll_info(get_fallback_pollset(), &pi);
+ res += pi.memory_size;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &pi);
res += pi.memory_size;
#endif
@@ -2153,13 +2268,21 @@ erts_check_io_info(void *proc)
Uint sz, *szp, *hp, **hpp;
ErtsPollInfo *piv;
Sint i, j = 0, len;
- int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK;
+ int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING;
ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1);
+ ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1);
piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets);
#if ERTS_POLL_USE_FALLBACK
- erts_poll_info_flbk(get_fallback(), &piv[0]);
+ erts_poll_info_flbk(get_fallback_pollset(), &piv[0]);
+ piv[0].poll_threads = 1;
+ piv[0].active_fds = 0;
+ piv++;
+#endif
+
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_poll_info(get_scheduler_pollset(0), &piv[0]);
piv[0].poll_threads = 1;
piv[0].active_fds = 0;
piv++;
@@ -2213,6 +2336,7 @@ erts_check_io_info(void *proc)
sz = 0;
piv -= ERTS_POLL_USE_FALLBACK;
+ piv -= ERTS_POLL_USE_SCHEDULER_POLLING;
bld_it:
@@ -2317,15 +2441,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev)
static ERTS_INLINE void
print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f)
{
- const char* delim = "";
- if(f & ERTS_EV_FLAG_USED) {
- erts_dsprintf(dsbufp, "%s","USED");
- delim = "|";
- }
- if(f & ERTS_EV_FLAG_FALLBACK) {
- erts_dsprintf(dsbufp, "%s%s", delim, "FLBK");
- delim = "|";
- }
+ erts_dsprintf(dsbufp, "%s", flag2str(f));
}
#ifdef DEBUG_PRINT_MODE
@@ -2673,13 +2789,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
#if ERTS_POLL_USE_FALLBACK
erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n");
- erts_poll_get_selected_events_flbk(get_fallback(), counters.epep,
+ erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep,
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
}
#endif
+#if ERTS_POLL_USE_SCHEDULER_POLLING
+ erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n");
+ erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep,
+ drv_ev_state.max_fds);
+ for (fd = 0; fd < len; fd++) {
+ if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) {
+ if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE)
+ counters.epep[fd] &= ~ERTS_POLL_EV_OUT;
+ doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
+ }
+#endif
+
erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n");
for (i = 0; i < erts_no_pollsets; i++) {
@@ -2688,8 +2817,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip)
drv_ev_state.max_fds);
for (fd = 0; fd < len; fd++) {
if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK)
- && get_pollset_id(fd) == i)
+ && get_pollset_id(fd) == i) {
+ if (counters.epep[fd] != ERTS_POLL_EV_NONE &&
+ drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) {
+ /* We add the in flag if it is enabled in the scheduler pollset
+ and get_selected_events works on the platform */
+ counters.epep[fd] |= ERTS_POLL_EV_IN;
+ }
doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp);
+ }
}
}
for (fd = len ; fd < drv_ev_state.max_fds; fd++) {
@@ -2736,7 +2872,7 @@ void erts_lcnt_update_cio_locks(int enable) {
#endif
#if ERTS_POLL_USE_FALLBACK
- erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable);
+ erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable);
#endif
for (i = 0; i < erts_no_pollsets; i++)
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index 443ef1264c..31182be5ec 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -68,7 +68,7 @@ int erts_check_io_max_files(void);
*
* @param pt the poll thread structure to use.
*/
-void erts_check_io(struct erts_poll_thread *pt);
+void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time);
/**
* Initialize the check io framework. This function will parse the arguments
* and delete any entries that it is interested in.
@@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set);
/**
* Create a new poll thread structure that is associated with the number no.
* It is the callers responsibility that no is unique.
+ *
+ * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler
+ * @param tpd the thread progress data of the pollset thread
*/
-struct erts_poll_thread* erts_create_pollset_thread(int no);
+struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd);
#ifdef ERTS_ENABLE_LOCK_COUNT
/**
* Toggle lock counting on all check io locks
@@ -126,16 +129,6 @@ extern int erts_no_poll_threads;
#include "erl_poll.h"
#include "erl_port_task.h"
-#ifdef __WIN32__
-/*
- * Current erts_poll implementation for Windows cannot handle
- * active events in the set of events polled.
- */
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#else
-# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1
-#endif
-
typedef struct {
Eterm inport;
Eterm outport;
@@ -147,10 +140,6 @@ struct erts_nif_select_event {
Eterm pid;
Eterm immed;
Uint32 refn[ERTS_REF_NUMBERS];
- Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress
- * 1: Do deselect before next poll
- * >1: Countdown of ignored events
- */
};
typedef struct {
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
index 539daea419..e1ff0fe80a 100644
--- a/erts/emulator/sys/common/erl_mmap.h
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -176,4 +176,61 @@ void hard_dbg_remove_mseg(void* seg, UWord sz);
#endif /* HAVE_ERTS_MMAP */
+/* Marks the given memory region as unused without freeing it, letting the OS
+ * reclaim its physical memory with the promise that we'll get it back (without
+ * its contents) the next time it's accessed. */
+ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef VALGRIND
+ #include <valgrind/memcheck.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+ }
+#elif defined(DEBUG)
+ /* Try to provoke crashes by filling the discard region with garbage. It's
+ * extremely hard to find bugs where we've discarded too much, as the
+ * region often retains its old contents if it's accessed before the OS
+ * reclaims it. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ static const char pattern[] = "DISCARDED";
+ char *data;
+ int i;
+
+ for(i = 0, data = ptr; i < size; i++) {
+ data[i] = pattern[i % sizeof(pattern)];
+ }
+ }
+#elif defined(HAVE_SYS_MMAN_H)
+ #include <sys/mman.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ #ifdef MADV_FREE
+ /* This is preferred as it doesn't necessarily free the pages right
+ * away, which is a bit faster than MADV_DONTNEED. */
+ madvise(ptr, size, MADV_FREE);
+ #else
+ madvise(ptr, size, MADV_DONTNEED);
+ #endif
+ }
+#elif defined(_WIN32)
+ #include <winbase.h>
+
+ /* MEM_RESET is defined on all supported versions of Windows, and has the
+ * same semantics as MADV_FREE. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE);
+ }
+#else
+ /* Dummy implementation. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ (void)ptr;
+ (void)size;
+ }
+#endif
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index b4d1575ee5..51d50933ff 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -75,6 +75,7 @@
# define WANT_NONBLOCKING
#endif
+#include "erl_thr_progress.h"
#include "erl_poll.h"
#if ERTS_POLL_USE_KQUEUE
# include <sys/types.h>
@@ -95,7 +96,6 @@
# include <limits.h>
# endif
#endif
-#include "erl_thr_progress.h"
#include "erl_driver.h"
#include "erl_alloc.h"
#include "erl_msacc.h"
@@ -121,7 +121,8 @@
/* Define to print info about modifications done to each fd */
#define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__)
/* Define to print entry and exit from erts_poll_wait (can be very spammy) */
-//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__)
+// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0)
#else
#define ERTS_POLL_DEBUG_PRINT 0
@@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds,
#define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE)
-#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE)
+#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0)
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
#if ERTS_POLL_USE_KERNEL_POLL
int kp_fd;
+ int oneshot;
#endif /* ERTS_POLL_USE_KERNEL_POLL */
#if ERTS_POLL_USE_POLL
@@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) {
ErtsPollSetUpdateRequestsBlock *curr_upd_req_block;
erts_atomic32_t have_update_requests;
erts_mtx_t mtx;
- erts_atomic32_t wakeup_state;
+#else
+ int do_wakeup;
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
- int wake_fds[2];
+#if ERTS_POLL_USE_TIMERFD
+ int timer_fd;
#endif
+ ErtsMonotonicTime timeout_time;
+ erts_atomic32_t wakeup_state;
+ int wake_fds[2];
};
void erts_silence_warn_unused_result(long unused);
@@ -365,63 +371,47 @@ static void print_misc_debug_info(void);
uint32_t epoll_events(int kp_fd, int fd);
#endif
-
#define ERTS_POLL_NOT_WOKEN 0
#define ERTS_POLL_WOKEN -1
#define ERTS_POLL_WOKEN_INTR 1
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
static ERTS_INLINE void
reset_wakeup_state(ErtsPollSet *ps)
{
erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
}
-#endif
static ERTS_INLINE int
is_woken(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
-#else
- return 0;
-#endif
}
static ERTS_INLINE int
is_interrupted_reset(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
== ERTS_POLL_WOKEN_INTR);
-#else
- return 0;
-#endif
}
static ERTS_INLINE void
woke_up(ErtsPollSet *ps)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state);
if (wakeup_state == ERTS_POLL_NOT_WOKEN)
(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
ERTS_POLL_WOKEN,
ERTS_POLL_NOT_WOKEN);
ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN);
-#endif
}
/*
* --- Wakeup pipe -----------------------------------------------------------
*/
-#if ERTS_POLL_USE_WAKEUP_PIPE
-
static ERTS_INLINE void
wake_poller(ErtsPollSet *ps, int interrupted)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
int wake;
erts_aint32_t wakeup_state;
if (!interrupted)
@@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted)
wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
if (wake)
-#endif
{
ssize_t res;
+ DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted);
if (ps->wake_fds[1] < 0)
return; /* Not initialized yet */
do {
@@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps)
fd,
erl_errno_id(errno), errno);
}
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
if (intr)
erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
-#endif
}
static void
@@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps)
ps->wake_fds[1] = wake_fds[1];
}
+/*
+ * --- timer fd -----------------------------------------------------------
+ */
+
+#if ERTS_POLL_USE_TIMERFD
+
+/* We use the timerfd when using epoll_wait to get high accuracy
+ timeouts, i.e. we want to sleep with < ms accuracy. */
+
+static void
+create_timerfd(ErtsPollSet *ps)
+{
+ int do_wake = 0;
+ int timer_fd = timerfd_create(CLOCK_MONOTONIC,0);
+ ERTS_POLL_EXPORT(erts_poll_control)(ps,
+ timer_fd,
+ ERTS_POLL_OP_ADD,
+ ERTS_POLL_EV_IN,
+ &do_wake);
+ if (ps->internal_fd_limit <= timer_fd)
+ ps->internal_fd_limit = timer_fd + 1;
+ ps->timer_fd = timer_fd;
+}
+
+static ERTS_INLINE void
+timerfd_set(ErtsPollSet *ps, struct itimerspec *its)
+{
+#ifdef DEBUG
+ struct itimerspec old_its;
+ int res;
+ res = timerfd_settime(ps->timer_fd, 0, its, &old_its);
+ ASSERT(res == 0);
+ ASSERT(old_its.it_interval.tv_sec == 0 &&
+ old_its.it_interval.tv_nsec == 0 &&
+ old_its.it_value.tv_sec == 0 &&
+ old_its.it_value.tv_nsec == 0);
+
+#else
+ timerfd_settime(ps->timer_fd, 0, its, NULL);
#endif
+}
+
+static ERTS_INLINE int
+timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) {
+
+ struct itimerspec its;
+ /* we always have to clear the timer */
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 0;
+ timerfd_settime(ps->timer_fd, 0, &its, NULL);
+
+ /* only timeout fd triggered */
+ if (res == 1 && pr[0].data.fd == ps->timer_fd)
+ return 0;
+
+ return res;
+}
+
+#endif /* ERTS_POLL_USE_TIMERFD */
/*
* --- Poll set update requests ----------------------------------------------
@@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
struct epoll_event epe_templ;
struct epoll_event epe;
- epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT;
+ epe_templ.events = ERTS_POLL_EV_E2N(events);
epe_templ.data.fd = fd;
+ if (ps->oneshot)
+ epe_templ.events |= EPOLLONESHOT;
+
#ifdef VALGRIND
/* Silence invalid valgrind warning ... */
memset((void *) &epe.data, 0, sizeof(epoll_data_t));
@@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
int res = 0, len = 0;
struct kevent evts[2];
struct timespec ts = {0, 0};
+ uint32_t oneshot = 0;
if (op == ERTS_POLL_OP_ADD) {
/* This is a hack to make the "noshell" option work; kqueue can poll
@@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
man page), but it seems to be the way it works...
*/
+ if (ps->oneshot)
+ oneshot = EV_DISPATCH;
+
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
/* We could probably skip this delete, do we want to? */
@@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
uint32_t flags;
erts_atomic_inc_nob(&ps->no_of_user_fds);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_ADD|EV_DISPATCH;
+ flags = EV_ADD|oneshot;
flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE);
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
} else {
uint32_t flags;
ASSERT(op == ERTS_POLL_OP_MOD);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN);
- flags = EV_DISPATCH;
+ flags = oneshot;
flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE;
ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT);
}
#else
- uint32_t flags = EV_ADD|EV_ONESHOT;
+ uint32_t flags = EV_ADD;
+
+ if (ps->oneshot) flags |= EV_ONESHOT;
if (op == ERTS_POLL_OP_DEL) {
erts_atomic_dec_nob(&ps->no_of_user_fds);
@@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events)
keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd);
for (i = 0; i < len; i++) {
const char *flags = "UNKNOWN";
- if (evts[i].flags == EV_DELETE) flags = "EV_DELETE";
+ if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE";
if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT";
+ if (evts[i].flags == (EV_ADD)) flags = "EV_ADD";
#ifdef EV_DISPATCH
if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH";
if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE";
if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH";
- if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE";
+ if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE";
+ if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE";
#endif
keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "",
@@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op,
goto done;
}
#endif
-#if ERTS_POLL_USE_WAKEUP_PIPE
if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) {
new_events = ERTS_POLL_EV_NVAL;
goto done;
}
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd) {
+ new_events = ERTS_POLL_EV_NVAL;
+ goto done;
+ }
#endif
}
@@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
ERTS_POLLSET_UNLOCK(ps);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (*do_wake) {
+ if (*do_wake)
wake_poller(ps, 0);
- }
-#endif
return res;
}
@@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
static ERTS_INLINE int
ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE
int n = chk_fds_res < max_res ? chk_fds_res : max_res, i;
int res = n;
-#if ERTS_POLL_USE_WAKEUP_PIPE
int wake_fd = ps->wake_fds[0];
-#endif
- for (i = 0; i < n; i++) {
- int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
-#ifdef DEBUG_PRINT_MODE
- ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
-#endif
+ if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) {
+
+ for (i = 0; i < n; i++) {
+ int fd = ERTS_POLL_RES_GET_FD(&pr[i]);
+#if ERTS_POLL_DEBUG_PRINT
+ ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i);
- DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
- ev2str(evts),
+ if (fd != wake_fd
+#if ERTS_POLL_USE_TIMERFD
+ && fd != ps->timer_fd
+#endif
+ )
+ DEBUG_PRINT_FD("trig %s (%s)", ps, fd,
+ ev2str(evts),
#if ERTS_POLL_USE_KQUEUE
- "kqueue"
+ "kqueue"
#elif ERTS_POLL_USE_EPOLL
- "epoll"
+ "epoll"
#else
- "/dev/poll"
+ "/dev/poll"
+#endif
+ );
#endif
- );
-#if ERTS_POLL_USE_WAKEUP_PIPE
- if (fd == wake_fd) {
- cleanup_wakeup_pipe(ps);
- ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
- if (n == 1)
- return 0;
- }
+ if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) {
+ cleanup_wakeup_pipe(ps);
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
+#if ERTS_POLL_USE_TIMERFD
+ else if (fd == ps->timer_fd) {
+ ERTS_POLL_RES_SET_FD(&pr[i], -1);
+ ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE);
+ res--;
+ }
#endif
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- else {
- /* Reset the events to emulate ONESHOT semantics */
- ps->fds_status[fd].events = 0;
- enqueue_update_request(ps, fd);
- }
+ else {
+ /* Reset the events to emulate ONESHOT semantics */
+ ps->fds_status[fd].events = 0;
+ enqueue_update_request(ps, fd);
+ }
#endif
+ }
}
- return res;
-#else
- ASSERT(chk_fds_res <= max_res);
- return chk_fds_res;
-#endif
+ if (res == 0)
+ return res;
+ else
+ return n;
}
#else /* !ERTS_POLL_USE_KERNEL_POLL */
@@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res,
#endif /* !ERTS_POLL_USE_KERNEL_POLL */
+static ERTS_INLINE ErtsMonotonicTime
+get_timeout(ErtsPollSet *ps,
+ int resolution,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout;
+
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT) {
+ timeout = 0;
+ }
+ else if (timeout_time == ERTS_POLL_INF_TIMEOUT) {
+ timeout = -1;
+ }
+ else {
+ ErtsMonotonicTime diff_time, current_time;
+ current_time = erts_get_monotonic_time(NULL);
+ diff_time = timeout_time - current_time;
+ if (diff_time <= 0) {
+ timeout = 0;
+ }
+ else {
+ switch (resolution) {
+ case 1000:
+ /* Round up to nearest even milli second */
+ timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1;
+ if (timeout > (ErtsMonotonicTime) INT_MAX)
+ timeout = (ErtsMonotonicTime) INT_MAX;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000);
+ break;
+ case 1000000:
+ /* Round up to nearest even micro second */
+ timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000);
+ break;
+ case 1000000000:
+ /* Round up to nearest even nano second */
+ timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1;
+ timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000);
+ break;
+ default:
+ ERTS_INTERNAL_ERROR("Invalid resolution");
+ timeout = 0;
+ break;
+ }
+ }
+ }
+ return timeout;
+}
+
+#if ERTS_POLL_USE_SELECT
+
+static ERTS_INLINE int
+get_timeout_timeval(ErtsPollSet *ps,
+ SysTimeval *tvp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tvp->tv_sec = 0;
+ tvp->tv_usec = 0;
+
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000);
+ tvp->tv_sec = sec;
+ tvp->tv_usec = timeout - sec*(1000*1000);
+
+ ASSERT(tvp->tv_sec >= 0);
+ ASSERT(tvp->tv_usec >= 0);
+ ASSERT(tvp->tv_usec < 1000*1000);
+
+ return 1;
+ }
+
+}
+
+#endif
+
+#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_timespec(ErtsPollSet *ps,
+ struct timespec *tsp,
+ ErtsMonotonicTime timeout_time)
+{
+ ErtsMonotonicTime timeout = get_timeout(ps,
+ 1000*1000*1000,
+ timeout_time);
+
+ if (!timeout) {
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return 0;
+ }
+ else if (timeout == -1) {
+ return -1;
+ }
+ else {
+ ErtsMonotonicTime sec = timeout/(1000*1000*1000);
+ tsp->tv_sec = sec;
+ tsp->tv_nsec = timeout - sec*(1000*1000*1000);
+
+ ASSERT(tsp->tv_sec >= 0);
+ ASSERT(tsp->tv_nsec >= 0);
+ ASSERT(tsp->tv_nsec < 1000*1000*1000);
+
+ return 1;
+ }
+}
+
+#endif
+
+#if ERTS_POLL_USE_TIMERFD
+
+static ERTS_INLINE int
+get_timeout_itimerspec(ErtsPollSet *ps,
+ struct itimerspec *itsp,
+ ErtsMonotonicTime timeout_time)
+{
+
+ itsp->it_interval.tv_sec = 0;
+ itsp->it_interval.tv_nsec = 0;
+
+ return get_timeout_timespec(ps, &itsp->it_value, timeout_time);
+}
+
+#endif
+
static ERTS_INLINE int
-check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
+check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time)
{
int res;
- int timeout = do_wait ? -1 : 0;
- DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait);
+ int timeout;
+ DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time);
{
#if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */
+#if ERTS_POLL_USE_TIMERFD
+ struct itimerspec its;
+ timeout = get_timeout_itimerspec(ps, &its, timeout_time);
+ if (timeout > 0) {
+ timerfd_set(ps, &its);
+ res = epoll_wait(ps->kp_fd, pr, max_res, -1);
+ res = timerfd_clear(ps, pr, res, max_res);
+ } else {
+ res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
+ }
+#else /* !ERTS_POLL_USE_TIMERFD */
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = epoll_wait(ps->kp_fd, pr, max_res, timeout);
-
+#endif /* !ERTS_POLL_USE_TIMERFD */
#elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */
- struct timespec ts = {0, 0};
- struct timespec *tsp = timeout ? NULL : &ts;
+ struct timespec ts;
+ struct timespec *tsp;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ tsp = timeout < 0 ? NULL : &ts;
res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp);
#elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */
/*
@@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */;
poll_res.dp_nfds = nfds < max_res ? nfds : max_res;
poll_res.dp_fds = pr;
- poll_res.dp_timeout = timeout;
+ poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time);
res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
-
+#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */
+ struct timespec ts;
+ struct timespec *tsp = &ts;
+ timeout = get_timeout_timespec(ps, &ts, timeout_time);
+ if (timeout < 0) tsp = NULL;
+ res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL);
#elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */
-
+ timeout = (int) get_timeout(ps, 1000, timeout_time);
res = poll(ps->poll_fds, ps->no_poll_fds, timeout);
-
#elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */
- SysTimeval tv = {0, 0};
- SysTimeval *tvp = timeout ? NULL : &tv;
+ SysTimeval tv;
+ SysTimeval *tvp;
+ timeout = get_timeout_timeval(ps, &tv, timeout_time);
+ tvp = timeout < 0 ? NULL : &tv;
ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds);
ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds);
@@ -1629,7 +1854,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res)
int
ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time)
{
int res, no_fds, used_fds = 0;
int ebadf = 0;
@@ -1654,61 +1881,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
}
#endif
- do_wait = !is_woken(ps) && used_fds == 0;
+ do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT;
DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait);
if (do_wait) {
- erts_thr_progress_prepare_wait(NULL);
+ tpd = tpd ? tpd : erts_thr_prgr_data(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
- }
+ } else
+ timeout_time = ERTS_POLL_NO_TIMEOUT;
while (1) {
- res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds);
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time);
+ if (res != 0)
+ break;
+ if (timeout_time == ERTS_POLL_NO_TIMEOUT)
+ break;
+ if (erts_get_monotonic_time(NULL) >= timeout_time)
+ break;
+ }
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (res < 0
- && errno == EBADF
- && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
- /*
- * This may have happened because another thread deselected
- * a fd in our poll set and then closed it, i.e. the driver
- * behaved correctly. We wan't to avoid looking for a bad
- * fd, that may even not exist anymore. Therefore, handle
- * update requests and try again. This behaviour should only
- * happen when using SELECT as the polling mechanism.
- */
- ERTS_POLLSET_LOCK(ps);
- used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
- if (used_fds == no_fds) {
- *len = used_fds;
- ERTS_POLLSET_UNLOCK(ps);
- return 0;
- }
- res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds);
- /* Keep the lock over the non-blocking poll in order to not
- get any nasty races happening. */
+ if (res < 0
+ && errno == EBADF
+ && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
+ /*
+ * This may have happened because another thread deselected
+ * a fd in our poll set and then closed it, i.e. the driver
+ * behaved correctly. We wan't to avoid looking for a bad
+ * fd, that may even not exist anymore. Therefore, handle
+ * update requests and try again. This behaviour should only
+ * happen when using SELECT as the polling mechanism.
+ */
+ ERTS_POLLSET_LOCK(ps);
+ used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds);
+ if (used_fds == no_fds) {
+ *len = used_fds;
ERTS_POLLSET_UNLOCK(ps);
- if (res == 0) {
- errno = EAGAIN;
- res = -1;
- }
+ return 0;
+ }
+ res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT);
+ /* Keep the lock over the non-blocking poll in order to not
+ get any nasty races happening. */
+ ERTS_POLLSET_UNLOCK(ps);
+ if (res == 0) {
+ errno = EAGAIN;
+ res = -1;
}
-#endif
-
- if (res != 0)
- break;
- if (!do_wait)
- break;
}
+#endif
if (do_wait) {
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_UPDATE_CACHE();
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO);
}
- woke_up(ps);
+ if (ERTS_POLL_USE_WAKEUP(ps))
+ woke_up(ps);
if (res < 0) {
#if ERTS_POLL_USE_SELECT
@@ -1719,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
#endif
res = errno;
}
- else {
+ else if (res == 0) {
+ res = used_fds == 0 ? ETIMEDOUT : 0;
+#ifdef HARD_DEBUG
+ check_poll_result(pr, used_fds);
+#endif
+ *len = used_fds;
+ } else {
#if ERTS_POLL_USE_SELECT
save_results:
#endif
-
ps_locked = 1;
ERTS_POLLSET_LOCK(ps);
@@ -1753,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
void
ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set)
{
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
- if (!set)
- reset_wakeup_state(ps);
- else
- wake_poller(ps, 1);
-#endif
+ DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set);
+ if (ERTS_POLL_USE_WAKEUP(ps)) {
+ if (!set)
+ reset_wakeup_state(ps);
+ else
+ wake_poller(ps, 1);
+ }
}
int
@@ -1874,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id)
if (ps->internal_fd_limit <= kp_fd)
ps->internal_fd_limit = kp_fd + 1;
ps->kp_fd = kp_fd;
+ if (ps->id == -1)
+ ps->oneshot = 0;
+ else
+ ps->oneshot = 1;
#endif
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
+
erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
create_wakeup_pipe(ps);
+
+#if ERTS_POLL_USE_TIMERFD
+ create_timerfd(ps);
+#endif
+
+#if !ERTS_POLL_USE_CONCURRENT_UPDATE
handle_update_requests(ps, NULL, 0);
cleanup_wakeup_pipe(ps);
#endif
@@ -1992,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip)
pip->memory_size = size;
pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds);
-#if !ERTS_POLL_USE_CONCURRENT_UPDATE
pip->poll_set_size++; /* Wakeup pipe */
-#endif
pip->lazy_updates =
#if !ERTS_POLL_USE_CONCURRENT_UPDATE
@@ -2177,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps,
ASSERT(0);
return;
}
+ if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1])
+ continue;
+#if ERTS_POLL_USE_TIMERFD
+ if (fd == ps->timer_fd)
+ continue;
+#endif
data &= 0xFFFFFFFF;
ASSERT(fd == data);
/* Events are the events that are being monitored, which of course include
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index e1cea7eb8b..d40dabc529 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -51,6 +51,7 @@
#include "sys.h"
#define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN
+#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX
#ifdef ERTS_ENABLE_KERNEL_POLL
# undef ERTS_ENABLE_KERNEL_POLL
@@ -130,6 +131,9 @@
#endif
#define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL)
+#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10
+#define ERTS_POLL_USE_TIMERFD 0
typedef Uint32 ErtsPollEvents;
@@ -156,6 +160,14 @@ typedef enum {
#include <sys/epoll.h>
+#if ERTS_POLL_USE_EPOLL
+#ifdef HAVE_SYS_TIMERFD_H
+#include <sys/timerfd.h>
+#undef ERTS_POLL_USE_TIMERFD
+#define ERTS_POLL_USE_TIMERFD 1
+#endif
+#endif
+
#define ERTS_POLL_EV_E2N(EV) \
((uint32_t) (EV))
#define ERTS_POLL_EV_N2E(EV) \
@@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd {
#endif
-#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))
+#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)))
#define ev2str(ev) \
(((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \
diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h
index 1170a549b9..f3a91e54f7 100644
--- a/erts/emulator/sys/common/erl_poll_api.h
+++ b/erts/emulator/sys/common/erl_poll_api.h
@@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps,
* @param res an array of fd results that the ready fds are put in.
* @param[in] length the length of the res array
* @param[out] length the number of ready events returned in res
+ * @param tpd the thread progress data to note sleep state in
+ * @param timeout_time the time in native to wake up at
* @return 0 on success, else the ERRNO of the error that happened.
*/
int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps,
ErtsPollResFd res[],
- int *length);
+ int *length,
+ ErtsThrPrgrData *tpd,
+ ErtsMonotonicTime timeout_time);
/**
* Interrupt the thread waiting in the pollset. This function should be called
* with set = 0 before any thread calls erts_poll_wait in order to clear any
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 39bb4d515e..3843a27a6e 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1017,10 +1017,12 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps,
int erts_poll_wait(ErtsPollSet *ps,
ErtsPollResFd pr[],
- int *len)
+ int *len,
+ ErtsThrPrgrData *tpd,
+ Sint64 timeout_in)
{
int no_fds;
- DWORD timeout = INFINITE;
+ DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in;
EventData* ev;
int res = 0;
int num = 0;
@@ -1056,10 +1058,10 @@ int erts_poll_wait(ErtsPollSet *ps,
HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
ERTS_POLLSET_UNLOCK(ps);
- erts_thr_progress_prepare_wait(NULL);
+ erts_thr_progress_prepare_wait(tpd);
ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP);
handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout);
- erts_thr_progress_finalize_wait(NULL);
+ erts_thr_progress_finalize_wait(tpd);
ERTS_MSACC_POP_STATE();
ERTS_POLLSET_LOCK(ps);
HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index a1c630d68a..b95aadc9b2 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -186,7 +186,9 @@ void sys_primitive_init(HMODULE beam)
UWord
erts_sys_get_page_size(void)
{
- return (UWord) 4*1024; /* Guess 4 KB */
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return (UWord)info.dwPageSize;
}
Uint
diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl
index 0a42b09903..5b602dd4dc 100644
--- a/erts/emulator/test/big_SUITE.erl
+++ b/erts/emulator/test/big_SUITE.erl
@@ -168,7 +168,11 @@ eval({op,_,Op,A0,B0}, LFH) ->
Res = eval_op(Op, A, B),
erlang:garbage_collect(),
Res;
-eval({integer,_,I}, _) -> I;
+eval({integer,_,I}, _) ->
+ %% "Parasitic" ("symbiotic"?) test of squaring all numbers
+ %% found in the test data.
+ test_squaring(I),
+ I;
eval({call,_,{atom,_,Local},Args0}, LFH) ->
Args = eval_list(Args0, LFH),
LFH(Local, Args).
@@ -192,6 +196,18 @@ eval_op('bxor', A, B) -> A bxor B;
eval_op('bsl', A, B) -> A bsl B;
eval_op('bsr', A, B) -> A bsr B.
+test_squaring(I) ->
+ %% Multiplying an integer by itself is specially optimized, so we
+ %% should take special care to test squaring. The optimization
+ %% will kick in when the two operands have the same address.
+ Sqr = I * I,
+
+ %% This expression will be multiplied in the usual way, because
+ %% the the two operands for '*' are stored at different addresses.
+ Sqr = I * ((I + id(1)) - id(1)),
+
+ ok.
+
%% Built in test functions
fac(0) -> 1;
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index 9ffb484eb4..94501dad84 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -1754,7 +1754,7 @@ smp_select0(Config) ->
ProcFun = fun()-> io:format("Worker ~p starting\n",[self()]),
Port = open_port({spawn, DrvName}, []),
smp_select_loop(Port, 100000),
- sleep(1000), % wait for driver to handle pending events
+ smp_select_done(Port),
true = erlang:port_close(Port),
Master ! {ok,self()},
io:format("Worker ~p finished\n",[self()])
@@ -1784,6 +1784,21 @@ smp_select_loop(Port, N) ->
smp_select_loop(Port, N-1)
end.
+smp_select_done(Port) ->
+ case erlang:port_control(Port, ?CHKIO_SMP_SELECT, "done") of
+ "wait" ->
+ receive
+ {Port, done} ->
+ ok
+ after 10*1000 ->
+ %% Seems we have a lost ready_input event.
+ %% Go ahead anyway, port will crash VM when closed.
+ ok
+ end;
+
+ "ok" -> ok
+ end.
+
smp_select_wait([], _) ->
ok;
smp_select_wait(Pids, TimeoutMsg) ->
diff --git a/erts/emulator/test/driver_SUITE_data/chkio_drv.c b/erts/emulator/test/driver_SUITE_data/chkio_drv.c
index ee8f28e8b1..b9ee155b4b 100644
--- a/erts/emulator/test/driver_SUITE_data/chkio_drv.c
+++ b/erts/emulator/test/driver_SUITE_data/chkio_drv.c
@@ -90,7 +90,7 @@ typedef struct chkio_smp_select {
int next_read;
int next_write;
int first_write;
- enum {Closed, Opened, Selected, Waiting} state;
+ enum {Closed, Opened, Selected, Waiting, WaitingDone} state;
int wasSelected;
unsigned rand_state;
}ChkioSmpSelect;
@@ -292,18 +292,20 @@ stop_steal_aux(ChkioDrvData *cddp)
static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port)
{
switch (pip->state) {
+ case WaitingDone:
case Waiting: {
int word;
- fprintf(stderr, "Closing pipe in state Waiting. Event lost?\n");
+ fprintf(stderr, "Closing pipe in state Waiting*. Event lost?\r\n");
for (;;) {
int bytes = read(pip->read_fd, &word, sizeof(word));
if (bytes != sizeof(word)) {
if (bytes != 0) {
- fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\n", bytes, errno);
+ fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\r\n",
+ bytes, errno);
}
break;
}
- fprintf(stderr, "Read from pipe: %d\n", word);
+ fprintf(stderr, "Read from pipe: %d\r\n", word);
}
abort();
}
@@ -318,6 +320,8 @@ static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port)
close(pip->write_fd);
pip->state = Closed;
break;
+ case Closed:
+ break;
}
driver_free(pip);
}
@@ -383,6 +387,9 @@ chkio_drv_start(ErlDrvPort port, char *command)
cddp->id = driver_mk_port(port);
cddp->test = CHKIO_STOP;
cddp->test_data = NULL;
+
+ drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */
+
return (ErlDrvData) cddp;
#endif
}
@@ -526,7 +533,7 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
printf("Read event on uninitiated pipe %d\n", fd);
abort();
}
- if (pip->state != Selected && pip->state != Waiting) {
+ if (pip->state != Selected && pip->state != Waiting && pip->state != WaitingDone) {
printf("Read event on pipe in strange state %d\n", pip->state);
abort();
}
@@ -536,9 +543,9 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
inPipe = (pip->next_write - pip->next_read);
if (inPipe == 0) {
bytes = read(pip->read_fd, &word, sizeof(word));
- printf("Unexpected empty pipe, expected %u -> %u, bytes=%d, word=%d, written=%d\n",
- pip->next_read, pip->next_write-1, bytes, word,
- (pip->next_write - pip->first_write));
+ printf("Unexpected empty pipe: ptr=%p, fds=%d->%d, read bytes=%d, word=%d, written=%d\n",
+ pip, pip->write_fd, pip->read_fd,
+ bytes, word, (pip->next_write - pip->first_write));
/*abort();
Allow unexpected events as it's been seen to be triggered by epoll
on Linux. Most of the time the unwanted events are filtered by
@@ -564,7 +571,20 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event)
TRACEF(("Read %d from fd=%d\n", word, fd));
pip->next_read++;
}
- pip->state = Selected; /* not Waiting anymore */
+ if (pip->state == WaitingDone) {
+ if (pip->next_write == pip->next_read) {
+ /* All data read, send {Port, done} */
+ ErlDrvTermData spec[] = {ERL_DRV_PORT, driver_mk_port(cddp->port),
+ ERL_DRV_ATOM, driver_mk_atom("done"),
+ ERL_DRV_TUPLE, 2};
+ erl_drv_output_term(driver_mk_port(cddp->port),
+ spec, sizeof(spec) / sizeof(spec[0]));
+ pip->state = Selected;
+ }
+ }
+ else {
+ pip->state = Selected; /* not Waiting anymore */
+ }
break;
}
case CHKIO_DRV_USE:
@@ -962,6 +982,16 @@ chkio_drv_control(ErlDrvData drv_data,
}
case CHKIO_SMP_SELECT: {
ChkioSmpSelect* pip = (ChkioSmpSelect*) cddp->test_data;
+ if (len == 4 && memcmp(buf, "done", 4) == 0) {
+ if (pip && pip->state == Waiting) {
+ pip->state = WaitingDone;
+ res_str = "wait";
+ }
+ else
+ res_str = "ok";
+ res_len = -1;
+ break;
+ }
if (pip == NULL) {
erl_drv_mutex_lock(smp_pipes_mtx);
if (smp_pipes) {
@@ -1014,7 +1044,6 @@ chkio_drv_control(ErlDrvData drv_data,
if (pip->wasSelected && (op & 1)) {
TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd,
pip->read_fd));
- drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */
if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd,
DO_READ|ERL_DRV_USE, 0)
|| close(pip->write_fd)) {
diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl
index f04efb9003..2e0dfa42f3 100644
--- a/erts/emulator/test/scheduler_SUITE.erl
+++ b/erts/emulator/test/scheduler_SUITE.erl
@@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) ->
{Conc, PollType, KP} = get_ioconfig(Config),
{Sched, SchedOnln, _} = get_sstate(Config, ""),
- [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
- [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
-
- [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
-
if
Conc ->
- [5] = get_ionum(Config,"+IOt 5 +IOp 1"),
- [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"),
- [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+ [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
- [2] = get_ionum(Config, "+S 2 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4 +IOPt 100"),
- [4] = get_ionum(Config, "+S 4:2 +IOPt 100"),
- [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
+ [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
+ [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
+ [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
+
+ [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"),
+ [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"),
+ [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"),
fail = get_ionum(Config, "+IOt 1 +IOp 2"),
ok;
not Conc ->
+ [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"),
+ [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"),
+ [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"),
+
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"),
[1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"),
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"),
diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl
index fab2f45f28..4e6baa9e0e 100644
--- a/erts/emulator/test/signal_SUITE.erl
+++ b/erts/emulator/test/signal_SUITE.erl
@@ -85,7 +85,7 @@ xm_sig_order_proc() ->
receive
may_not_reach -> exit(bad_signal_order);
may_reach -> ok
- after 0 -> ok
+ after 0 -> erlang:yield()
end,
xm_sig_order_proc().