diff options
Diffstat (limited to 'erts/emulator')
37 files changed, 1605 insertions, 501 deletions
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c index 84338769e0..dac9574fa5 100644 --- a/erts/emulator/beam/big.c +++ b/erts/emulator/beam/big.c @@ -668,27 +668,25 @@ static dsize_t I_mul(ErtsDigit* x, dsize_t xl, ErtsDigit* y, dsize_t yl, ErtsDig static dsize_t I_sqr(ErtsDigit* x, dsize_t xl, ErtsDigit* r) { - ErtsDigit d_next = *x; ErtsDigit d; ErtsDigit* r0 = r; ErtsDigit* s = r; if ((r + xl) == x) /* "Inline" operation */ *x = 0; - x++; while(xl--) { - ErtsDigit* y = x; + ErtsDigit* y; ErtsDigit y_0 = 0, y_1 = 0, y_2 = 0, y_3 = 0; ErtsDigit b0, b1; ErtsDigit z0, z1, z2; ErtsDigit t; dsize_t y_l = xl; - + + d = *x; + x++; + y = x; s = r; - d = d_next; - d_next = *x; - x++; DMUL(d, d, b1, b0); DSUMc(*s, b0, y_3, t); diff --git a/erts/emulator/beam/erl_afit_alloc.c b/erts/emulator/beam/erl_afit_alloc.c index 38289ea78a..f07137c883 100644 --- a/erts/emulator/beam/erl_afit_alloc.c +++ b/erts/emulator/beam/erl_afit_alloc.c @@ -102,6 +102,8 @@ erts_afalc_start(AFAllctr_t *afallctr, allctr->add_mbc = NULL; allctr->remove_mbc = NULL; allctr->largest_fblk_in_mbc = NULL; + allctr->first_fblk_in_mbc = NULL; + allctr->next_fblk_in_mbc = NULL; allctr->init_atoms = init_atoms; #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index b7a8b9c2d0..d238d38d27 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -42,6 +42,7 @@ #include "global.h" #include "big.h" +#include "erl_mmap.h" #include "erl_mtrace.h" #define GET_ERL_ALLOC_UTIL_IMPL #include "erl_alloc_util.h" @@ -90,6 +91,8 @@ static int initialized = 0; #define SYS_ALLOC_CARRIER_FLOOR(X) ((X) & SYS_ALLOC_CARRIER_MASK) #define SYS_ALLOC_CARRIER_CEILING(X) \ SYS_ALLOC_CARRIER_FLOOR((X) + INV_SYS_ALLOC_CARRIER_MASK) +#define SYS_PAGE_SIZE (sys_page_size) +#define SYS_PAGE_SZ_MASK ((UWord)(SYS_PAGE_SIZE - 1)) #if 0 /* Can be useful for debugging */ @@ -98,6 +101,8 @@ static int initialized = 0; /* alloc_util global parameters */ static Uint sys_alloc_carrier_size; +static Uint sys_page_size; + #if HAVE_ERTS_MSEG static Uint max_mseg_carriers; #endif @@ -872,6 +877,8 @@ static ERTS_INLINE void clr_bit(UWord* map, Uint ix) &= ~((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#ifdef DEBUG + static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) { ASSERT(ix / ERTS_VSPACE_WORD_BITS < VSPACE_MAP_SZ); @@ -879,6 +886,8 @@ static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) & ((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#endif + UWord erts_literal_vspace_map[VSPACE_MAP_SZ]; static void set_literal_range(void* start, Uint size) @@ -2540,9 +2549,155 @@ mbc_alloc(Allctr_t *allctr, Uint size) return BLK2UMEM(blk); } +typedef struct { + char *ptr; + UWord size; +} ErtsMemDiscardRegion; + +/* Construct a discard region for the user memory of a free block, letting the + * OS reclaim its physical memory when required. + * + * Note that we're ignoring both the footer and everything that comes before + * the minimum block size as the allocator uses those areas to manage the + * block. */ +static void ERTS_INLINE +mem_discard_start(Allctr_t *allocator, Block_t *block, + ErtsMemDiscardRegion *out) +{ + UWord size = BLK_SZ(block); + + ASSERT(size >= allocator->min_block_size); + + if (size > (allocator->min_block_size + FBLK_FTR_SZ)) { + out->size = size - allocator->min_block_size - FBLK_FTR_SZ; + } else { + out->size = 0; + } + + out->ptr = (char*)block + allocator->min_block_size; +} + +/* Expands a discard region into a neighboring free block, allowing us to + * discard the block header and first page. + * + * This is very important in small-allocation scenarios where no single block + * is large enough to be discarded on its own. */ +static void ERTS_INLINE +mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor, + ErtsMemDiscardRegion *region) +{ + char *neighbor_start; + + ASSERT(IS_FREE_BLK(neighbor)); + + neighbor_start = (char*)neighbor; + + if (region->ptr >= neighbor_start) { + char *region_start_page; + + region_start_page = region->ptr - SYS_PAGE_SIZE; + region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK); + + /* Expand if our first page begins within the previous free block's + * unused data. */ + if (region_start_page >= (neighbor_start + allocator->min_block_size)) { + region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ; + region->ptr = region_start_page; + } + } else { + char *region_end_page; + UWord neighbor_size; + + ASSERT(region->ptr <= neighbor_start); + + region_end_page = region->ptr + region->size + SYS_PAGE_SIZE; + region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK); + + neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ; + + /* Expand if our last page ends anywhere within the next free block, + * sans the footer we'll inherit. */ + if (region_end_page < neighbor_start + neighbor_size) { + region->size += region_end_page - (region->ptr + region->size); + } + } +} + +static void ERTS_INLINE +mem_discard_finish(Allctr_t *allocator, Block_t *block, + ErtsMemDiscardRegion *region) +{ +#ifdef DEBUG + char *block_start, *block_end; + UWord block_size; + + block_size = BLK_SZ(block); + + /* Ensure that the region is completely covered by the legal area of the + * free block. This must hold even when the region is too small to be + * discarded. */ + if (region->size > 0) { + ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ); + + block_start = (char*)block + allocator->min_block_size; + block_end = (char*)block + block_size - FBLK_FTR_SZ; + + ASSERT(region->size == 0 || + (region->ptr + region->size <= block_end && + region->ptr >= block_start && + region->size <= block_size)); + } +#else + (void)allocator; + (void)block; +#endif + + if (region->size > SYS_PAGE_SIZE) { + UWord align_offset, size; + char *ptr; + + align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK); + + size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK; + ptr = region->ptr + align_offset; + + if (size > 0) { + ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK)); + ASSERT(!(size & SYS_PAGE_SZ_MASK)); + + erts_mem_discard(ptr, size); + } + } +} + +static void +carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier) +{ + static const int MAX_BLOCKS_TO_DISCARD = 100; + Block_t *block; + int i; + + block = allocator->first_fblk_in_mbc(allocator, carrier); + i = 0; + + while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) { + ErtsMemDiscardRegion region; + + ASSERT(IS_FREE_BLK(block)); + + mem_discard_start(allocator, block, ®ion); + mem_discard_finish(allocator, block, ®ion); + + block = allocator->next_fblk_in_mbc(allocator, carrier, block); + i++; + } +} + static void mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp) { + ErtsMemDiscardRegion discard_region = {0}; + int discard; Uint is_first_blk; Uint is_last_blk; Uint blk_sz; @@ -2558,6 +2713,21 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp ASSERT(IS_MBC_BLK(blk)); ASSERT(blk_sz >= allctr->min_block_size); +#ifndef DEBUG + /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly + * expensive operation which doesn't do much good if we use it again soon + * after, so we limit it to deallocations on pooled carriers. */ + discard = busy_pcrr_pp && *busy_pcrr_pp; +#else + /* Always discard in debug mode, regardless of whether we're in the pool or + * not. */ + discard = 1; +#endif + + if (discard) { + mem_discard_start(allctr, blk, &discard_region); + } + HARD_CHECK_BLK_CARRIER(allctr, blk); crr = ABLK_TO_MBC(blk); @@ -2575,6 +2745,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp blk = PREV_BLK(blk); (*allctr->unlink_free_block)(allctr, blk); + if (discard) { + mem_discard_coalesce(allctr, blk, &discard_region); + } + blk_sz += MBC_FBLK_SZ(blk); is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk); SET_MBC_FBLK_SZ(blk, blk_sz); @@ -2590,6 +2764,11 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp if (IS_FREE_BLK(nxt_blk)) { /* Coalesce with next block... */ (*allctr->unlink_free_block)(allctr, nxt_blk); + + if (discard) { + mem_discard_coalesce(allctr, nxt_blk, &discard_region); + } + blk_sz += MBC_FBLK_SZ(nxt_blk); SET_MBC_FBLK_SZ(blk, blk_sz); @@ -2625,10 +2804,16 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp else { (*allctr->link_free_block)(allctr, blk); HARD_CHECK_BLK_CARRIER(allctr, blk); - if (busy_pcrr_pp && *busy_pcrr_pp) + + if (discard) { + mem_discard_finish(allctr, blk, &discard_region); + } + + if (busy_pcrr_pp && *busy_pcrr_pp) { update_pooled_tree(allctr, crr, blk_sz); - else + } else { check_abandon_carrier(allctr, blk, busy_pcrr_pp); + } } } @@ -3781,6 +3966,9 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr) unlink_carrier(&allctr->mbc_list, crr); allctr->remove_mbc(allctr, crr); + /* Mark our free blocks as unused and reclaimable to the OS. */ + carrier_mem_discard_free_blocks(allctr, crr); + cpool_insert(allctr, crr); @@ -6471,6 +6659,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) erts_atomic_init_nob(&allctr->cpool.stat.carriers_size, 0); erts_atomic_init_nob(&allctr->cpool.stat.no_carriers, 0); if (!init->ts && init->acul && init->acnl) { + ASSERT(allctr->add_mbc); + ASSERT(allctr->remove_mbc); + ASSERT(allctr->largest_fblk_in_mbc); + ASSERT(allctr->first_fblk_in_mbc); + ASSERT(allctr->next_fblk_in_mbc); + allctr->cpool.util_limit = init->acul; allctr->cpool.in_pool_limit = init->acnl; allctr->cpool.fblk_min_limit = init->acfml; @@ -6676,6 +6870,8 @@ erts_alcu_init(AlcUInit_t *init) #endif allow_sys_alloc_carriers = init->sac; + sys_page_size = erts_sys_get_page_size(); + #ifdef DEBUG carrier_alignment = sizeof(Unit_t); #endif diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index 9ab8589bf3..ea1afe8f58 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -684,10 +684,12 @@ struct Allctr_t_ { void (*creating_mbc) (Allctr_t *, Carrier_t *); void (*destroying_mbc) (Allctr_t *, Carrier_t *); - /* The three callbacks below are needed to support carrier migration */ + /* The five callbacks below are needed to support carrier migration. */ void (*add_mbc) (Allctr_t *, Carrier_t *); void (*remove_mbc) (Allctr_t *, Carrier_t *); UWord (*largest_fblk_in_mbc) (Allctr_t *, Carrier_t *); + Block_t * (*first_fblk_in_mbc) (Allctr_t *, Carrier_t *); + Block_t * (*next_fblk_in_mbc) (Allctr_t *, Carrier_t *, Block_t *); #if HAVE_ERTS_MSEG void* (*mseg_alloc)(Allctr_t*, Uint *size_p, Uint flags); diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c index 0e3e4c890a..f2ad2f6532 100644 --- a/erts/emulator/beam/erl_ao_firstfit_alloc.c +++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c @@ -241,6 +241,9 @@ static void aoff_add_mbc(Allctr_t*, Carrier_t*); static void aoff_remove_mbc(Allctr_t*, Carrier_t*); static UWord aoff_largest_fblk_in_mbc(Allctr_t*, Carrier_t*); +static Block_t *aoff_first_fblk_in_mbc(Allctr_t *, Carrier_t *); +static Block_t *aoff_next_fblk_in_mbc(Allctr_t *, Carrier_t *, Block_t *); + /* Generic tree functions used by both carrier and block trees. */ static void rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del); static void rbt_insert(enum AOFFSortOrder, AOFF_RBTree_t** root, AOFF_RBTree_t* blk); @@ -326,6 +329,8 @@ erts_aoffalc_start(AOFFAllctr_t *alc, allctr->add_mbc = aoff_add_mbc; allctr->remove_mbc = aoff_remove_mbc; allctr->largest_fblk_in_mbc = aoff_largest_fblk_in_mbc; + allctr->first_fblk_in_mbc = aoff_first_fblk_in_mbc; + allctr->next_fblk_in_mbc = aoff_next_fblk_in_mbc; allctr->init_atoms = init_atoms; #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG @@ -1058,6 +1063,62 @@ static UWord aoff_largest_fblk_in_mbc(Allctr_t* allctr, Carrier_t* carrier) return crr->rbt_node.hdr.bhdr; } +static Block_t *aoff_first_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier) +{ + AOFF_Carrier_t *crr = (AOFF_Carrier_t*)carrier; + + (void)allctr; + + if (crr->root) { + AOFF_RBTree_t *blk; + + /* Descend to the rightmost block of the tree. */ + for (blk = crr->root; blk->right; blk = blk->right); + + return (Block_t*)blk; + } + + return NULL; +} + +static Block_t *aoff_next_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier, + Block_t *block) +{ + AOFF_RBTree_t *parent, *blk; + + (void)allctr; + (void)carrier; + + blk = (AOFF_RBTree_t*)block; + + if (blk->left) { + /* Descend to the rightmost block of the left subtree. */ + for (blk = blk->left; blk->right; blk = blk->right); + + return (Block_t*)blk; + } + + while (blk->parent) { + parent = blk->parent; + + /* If we ascend from the right we know we haven't visited our parent + * yet, because we always descend as far as we can to the right when + * entering a subtree. */ + if (parent->right == blk) { + ASSERT(parent->left != blk); + return (Block_t*)parent; + } + + /* If we ascend from the left we know we've already visited our + * parent, and will need to keep ascending until we do so from the + * right or reach the end of the tree. */ + ASSERT(parent->left == blk); + blk = parent; + } + + return NULL; +} + /* * info_options() */ diff --git a/erts/emulator/beam/erl_async.c b/erts/emulator/beam/erl_async.c index 605a2b3461..44655ad5df 100644 --- a/erts/emulator/beam/erl_async.c +++ b/erts/emulator/beam/erl_async.c @@ -336,7 +336,7 @@ static ERTS_INLINE ErtsAsync *async_get(ErtsThrQ_t *q, case ERTS_THR_Q_NEED_THR_PRGR: { ErtsThrPrgrVal prgr = erts_thr_q_need_thr_progress(q); - erts_thr_progress_wakeup(NULL, prgr); + erts_thr_progress_wakeup(erts_thr_prgr_data(NULL), prgr); /* * We do no dequeue finalizing in hope that a new async * job will arrive before we are woken due to thread diff --git a/erts/emulator/beam/erl_bestfit_alloc.c b/erts/emulator/beam/erl_bestfit_alloc.c index 9cb1199c2a..ca81c14b96 100644 --- a/erts/emulator/beam/erl_bestfit_alloc.c +++ b/erts/emulator/beam/erl_bestfit_alloc.c @@ -209,6 +209,8 @@ erts_bfalc_start(BFAllctr_t *bfallctr, allctr->add_mbc = NULL; allctr->remove_mbc = NULL; allctr->largest_fblk_in_mbc = NULL; + allctr->first_fblk_in_mbc = NULL; + allctr->next_fblk_in_mbc = NULL; allctr->init_atoms = init_atoms; #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c index 9b6b84d000..b23fa77f5f 100644 --- a/erts/emulator/beam/erl_bif_lists.c +++ b/erts/emulator/beam/erl_bif_lists.c @@ -1040,7 +1040,8 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2) Eterm list; Eterm item; int non_immed_key; - int max_iter = 10 * CONTEXT_REDS; + int reds_left = ERTS_BIF_REDS_LEFT(BIF_P); + int max_iter = 16 * reds_left; if (is_nil(BIF_ARG_2)) { BIF_RET(am_false); @@ -1058,14 +1059,15 @@ BIF_RETTYPE lists_member_2(BIF_ALIST_2) } item = CAR(list_val(list)); if ((item == term) || (non_immed_key && eq(item, term))) { - BIF_RET2(am_true, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_true, reds_left - max_iter/16); } list = CDR(list_val(list)); } if (is_not_nil(list)) { + BUMP_REDS(BIF_P, reds_left - max_iter/16); BIF_ERROR(BIF_P, BADARG); } - BIF_RET2(am_false, CONTEXT_REDS - max_iter/10); + BIF_RET2(am_false, reds_left - max_iter/16); } static BIF_RETTYPE lists_reverse_alloc(Process *c_p, @@ -1159,8 +1161,6 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p, if (is_nil(list)) { BIF_RET(tail); } else if (is_list(list)) { - ASSERT(is_list(tail)); - if (cells_left > CELLS_PER_RED) { return lists_reverse_alloc(c_p, list, tail); } diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c index b642ae009d..75ac1c4a93 100644 --- a/erts/emulator/beam/erl_db_catree.c +++ b/erts/emulator/beam/erl_db_catree.c @@ -621,9 +621,9 @@ static TreeDbTerm* join_trees(TreeDbTerm *left_root_param, } #ifdef DEBUG -# define FORCE_RANDOM_SPLIT_JOIN +# define PROVOKE_RANDOM_SPLIT_JOIN #endif -#ifdef FORCE_RANDOM_SPLIT_JOIN +#ifdef PROVOKE_RANDOM_SPLIT_JOIN static int dbg_fastrand(void) { static int g_seed = 648835; @@ -631,8 +631,12 @@ static int dbg_fastrand(void) return (g_seed>>16)&0x7FFF; } -static void dbg_maybe_force_splitjoin(DbTableCATreeNode* base_node) +static void dbg_provoke_random_splitjoin(DbTableCATree* tb, + DbTableCATreeNode* base_node) { + if (tb->common.status & DB_CATREE_FORCE_SPLIT) + return; + switch (dbg_fastrand() % 8) { case 1: base_node->u.base.lock_statistics = 1+ERL_DB_CATREE_HIGH_CONTENTION_LIMIT; @@ -643,8 +647,8 @@ static void dbg_maybe_force_splitjoin(DbTableCATreeNode* base_node) } } #else -# define dbg_maybe_force_splitjoin(N) -#endif /* FORCE_RANDOM_SPLIT_JOIN */ +# define dbg_provoke_random_splitjoin(T,N) +#endif /* PROVOKE_RANDOM_SPLIT_JOIN */ static ERTS_INLINE int try_wlock_base_node(DbTableCATreeBaseNode *base_node) @@ -691,7 +695,7 @@ void wunlock_adapt_base_node(DbTableCATree* tb, DbTableCATreeNode* parent, int current_level) { - dbg_maybe_force_splitjoin(node); + dbg_provoke_random_splitjoin(tb,node); if ((!node->u.base.root && parent && !(tb->common.status & DB_CATREE_FORCE_SPLIT)) || node->u.base.lock_statistics < ERL_DB_CATREE_LOW_CONTENTION_LIMIT) { diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c index b4df418cd5..d5dfb096b1 100644 --- a/erts/emulator/beam/erl_gc.c +++ b/erts/emulator/beam/erl_gc.c @@ -2477,7 +2477,7 @@ erts_copy_one_frag(Eterm** hpp, ErlOffHeap* off_heap, *hpp = hp; for (i = 0; i < nrefs; i++) { - if (is_not_immed(refs[i])) + if (is_not_immed(refs[i]) && !erts_is_literal(refs[i],boxed_val(refs[i]))) refs[i] = offset_ptr(refs[i], offs); } bp->off_heap.first = NULL; diff --git a/erts/emulator/beam/erl_goodfit_alloc.c b/erts/emulator/beam/erl_goodfit_alloc.c index 01d4aa54ff..68b9579433 100644 --- a/erts/emulator/beam/erl_goodfit_alloc.c +++ b/erts/emulator/beam/erl_goodfit_alloc.c @@ -226,6 +226,8 @@ erts_gfalc_start(GFAllctr_t *gfallctr, allctr->add_mbc = NULL; allctr->remove_mbc = NULL; allctr->largest_fblk_in_mbc = NULL; + allctr->first_fblk_in_mbc = NULL; + allctr->next_fblk_in_mbc = NULL; allctr->init_atoms = init_atoms; #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG diff --git a/erts/emulator/beam/erl_hl_timer.c b/erts/emulator/beam/erl_hl_timer.c index 6ec6f8065e..ef7a55fa38 100644 --- a/erts/emulator/beam/erl_hl_timer.c +++ b/erts/emulator/beam/erl_hl_timer.c @@ -3041,15 +3041,23 @@ erts_set_port_timer(Port *c_prt, Sint64 tmo) check_canceled_queue(esdp, esdp->timer_service); - timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); - - create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC - ? create_tw_timer - : create_hl_timer); - tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, - (void *) c_prt, c_prt->common.id, - THE_NON_VALUE, NULL, NULL, NULL); - erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + if (tmo == 0) { + erts_atomic_set_relb(&c_prt->common.timer, ERTS_PTMR_TIMEDOUT); + erts_port_task_schedule(c_prt->common.id, + &c_prt->timeout_task, + ERTS_PORT_TASK_TIMEOUT); + } else { + + timeout_pos = get_timeout_pos(erts_get_monotonic_time(esdp), tmo); + + create_timer = (tmo < ERTS_TIMER_WHEEL_MSEC + ? create_tw_timer + : create_hl_timer); + tmr = (void *) create_timer(esdp, timeout_pos, 0, ERTS_TMR_PORT, + (void *) c_prt, c_prt->common.id, + THE_NON_VALUE, NULL, NULL, NULL); + erts_atomic_set_relb(&c_prt->common.timer, (erts_aint_t) tmr); + } } void diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c index dcf9f90c99..08c9125840 100644 --- a/erts/emulator/beam/erl_init.c +++ b/erts/emulator/beam/erl_init.c @@ -2320,8 +2320,8 @@ system_cleanup(int flush_async) * The exiting thread might be waiting for * us to block; need to update status... */ - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(erts_thr_prgr_data(NULL), 0); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); } /* Wait forever... */ while (1) diff --git a/erts/emulator/beam/erl_port.h b/erts/emulator/beam/erl_port.h index 2be0a5bf74..25976d38cc 100644 --- a/erts/emulator/beam/erl_port.h +++ b/erts/emulator/beam/erl_port.h @@ -334,6 +334,8 @@ Eterm erts_request_io_bytes(Process *c_p); #define ERTS_PORT_SFLG_INVALID ((Uint32) (1 << 11)) /* Last port to terminate halts the emulator */ #define ERTS_PORT_SFLG_HALT ((Uint32) (1 << 12)) +/* Check if the event in ready_input should be cleaned */ +#define ERTS_PORT_SFLG_CHECK_FD_CLEANUP ((Uint32) (1 << 13)) #ifdef DEBUG /* Only debug: make sure all flags aren't cleared unintentionally */ #define ERTS_PORT_SFLG_PORT_DEBUG ((Uint32) (1 << 31)) diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c index 4928d80f27..c8f2e88127 100644 --- a/erts/emulator/beam/erl_port_task.c +++ b/erts/emulator/beam/erl_port_task.c @@ -97,6 +97,9 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q typedef union { struct { /* I/O tasks */ ErlDrvEvent event; +#if ERTS_POLL_USE_SCHEDULER_POLLING + int is_scheduler_event; +#endif } io; struct { ErtsProc2PortSigCallback callback; @@ -141,6 +144,9 @@ struct ErtsPortTaskBusyCallerTable_ { ErtsPortTaskBusyCaller pre_alloc_busy_caller; }; +#if ERTS_POLL_USE_SCHEDULER_POLLING +erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif static void begin_port_cleanup(Port *pp, ErtsPortTask **execq, @@ -578,13 +584,26 @@ reset_handle(ErtsPortTask *ptp) } static ERTS_INLINE void -reset_executed_io_task_handle(ErtsPortTask *ptp) +reset_executed_io_task_handle(Port *prt, ErtsPortTask *ptp) { if (ptp->u.alive.handle) { ASSERT(ptp == handle2task(ptp->u.alive.handle)); - /* The port task handle is reset inside task_executed */ - erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, - reset_port_task_handle); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) { + if ((erts_atomic32_read_nob(&prt->state) & ERTS_PORT_SFLG_CHECK_FD_CLEANUP)) { + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + erts_atomic32_read_band_nob(&prt->state, ~ERTS_PORT_SFLG_CHECK_FD_CLEANUP); + } else { + reset_port_task_handle(ptp->u.alive.handle); + } + } else +#endif + { + /* The port task handle is reset inside task_executed */ + erts_io_notify_port_task_executed(ptp->type, ptp->u.alive.handle, + reset_port_task_handle); + } } } @@ -1307,6 +1326,22 @@ erts_port_task_abort(ErtsPortTaskHandle *pthp) res = - 1; /* Task already aborted, executing, or executed */ else { reset_port_task_handle(pthp); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + switch (ptp->type) { + case ERTS_PORT_TASK_INPUT: + case ERTS_PORT_TASK_OUTPUT: + if (ptp->u.alive.td.io.is_scheduler_event) { + ASSERT(erts_atomic_read_nob( + &erts_port_task_outstanding_io_tasks) > 0); + erts_atomic_dec_relb(&erts_port_task_outstanding_io_tasks); + } + break; + default: + break; + } +#endif + res = 0; } } @@ -1442,7 +1477,14 @@ erts_port_task_schedule(Eterm id, va_list argp; va_start(argp, type); ptp->u.alive.td.io.event = va_arg(argp, ErlDrvEvent); +#if ERTS_POLL_USE_SCHEDULER_POLLING + ptp->u.alive.td.io.is_scheduler_event = va_arg(argp, int); +#endif va_end(argp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + erts_atomic_inc_relb(&erts_port_task_outstanding_io_tasks); +#endif break; } case ERTS_PORT_TASK_PROC_SIG: { @@ -1621,12 +1663,14 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) int processing_busy_q; int vreds = 0; int reds = 0; - erts_aint_t io_tasks_executed = 0; int fpe_was_unmasked; erts_aint32_t state; int active; Uint64 start_time = 0; ErtsSchedulerData *esdp = runq->scheduler; +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_aint_t io_tasks_executed = 0; +#endif ERTS_MSACC_PUSH_STATE_M(); ERTS_LC_ASSERT(erts_lc_runq_is_locked(runq)); @@ -1722,8 +1766,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) for input and output */ (*pp->drv_ptr->ready_input)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_OUTPUT: reds = ERTS_PORT_REDS_OUTPUT; @@ -1732,8 +1779,11 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) LTTNG_DRIVER(driver_ready_output, pp); (*pp->drv_ptr->ready_output)((ErlDrvData) pp->drv_data, ptp->u.alive.td.io.event); - reset_executed_io_task_handle(ptp); - io_tasks_executed++; + reset_executed_io_task_handle(pp, ptp); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (ptp->u.alive.td.io.is_scheduler_event) + io_tasks_executed++; +#endif break; case ERTS_PORT_TASK_PROC_SIG: { ErtsProc2PortSigData *sigdp = &ptp->u.alive.td.psig.data; @@ -1799,6 +1849,15 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp) erts_unblock_fpe(fpe_was_unmasked); ERTS_MSACC_POP_STATE_M(); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (io_tasks_executed) { + ASSERT(erts_atomic_read_nob(&erts_port_task_outstanding_io_tasks) + >= io_tasks_executed); + erts_atomic_add_relb(&erts_port_task_outstanding_io_tasks, + -1*io_tasks_executed); + } +#endif + ASSERT(runq == erts_get_runq_port(pp)); active = finalize_exec(pp, &execq, processing_busy_q); @@ -2086,6 +2145,10 @@ erts_dequeue_port(ErtsRunQueue *rq) void erts_port_task_init(void) { +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic_init_nob(&erts_port_task_outstanding_io_tasks, + (erts_aint_t) 0); +#endif init_port_task_alloc(erts_no_schedulers + erts_no_poll_threads + 1); /* aux_thread */ init_busy_caller_table_alloc(); diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h index ae78a7d8a3..ca5183b305 100644 --- a/erts/emulator/beam/erl_port_task.h +++ b/erts/emulator/beam/erl_port_task.h @@ -38,6 +38,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #ifndef ERL_PORT_TASK_H__ #define ERL_PORT_TASK_H__ +#include "erl_poll.h" + #undef ERTS_INCLUDE_SCHEDULER_INTERNALS #if (defined(ERL_PROCESS_C__) \ || defined(ERL_PORT_TASK_C__) \ @@ -54,8 +56,8 @@ typedef erts_atomic_t ErtsPortTaskHandle; #define ERTS_PT_FLG_BAD_OUTPUT (1 << 4) typedef enum { - ERTS_PORT_TASK_INPUT, - ERTS_PORT_TASK_OUTPUT, + ERTS_PORT_TASK_INPUT = 0, + ERTS_PORT_TASK_OUTPUT = 1, ERTS_PORT_TASK_TIMEOUT, ERTS_PORT_TASK_DIST_CMD, ERTS_PORT_TASK_PROC_SIG @@ -134,6 +136,12 @@ ERTS_GLB_INLINE void erts_port_task_sched_unlock(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE int erts_port_task_sched_lock_is_locked(ErtsPortTaskSched *ptsp); ERTS_GLB_INLINE void erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp); +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int erts_port_task_have_outstanding_io_tasks(void); +/* NOTE: Do not access any of the exported variables directly */ +extern erts_atomic_t erts_port_task_outstanding_io_tasks; +#endif + #if ERTS_GLB_INLINE_INCL_FUNC_DEF ERTS_GLB_INLINE void @@ -211,6 +219,15 @@ erts_port_task_sched_enter_exiting_state(ErtsPortTaskSched *ptsp) erts_atomic32_read_bor_nob(&ptsp->flags, ERTS_PTS_FLG_EXITING); } +#if defined(ERTS_INCLUDE_SCHEDULER_INTERNALS) && ERTS_POLL_USE_SCHEDULER_POLLING +ERTS_GLB_INLINE int +erts_port_task_have_outstanding_io_tasks(void) +{ + return (erts_atomic_read_acqb(&erts_port_task_outstanding_io_tasks) + != 0); +} +#endif + #endif #ifdef ERTS_INCLUDE_SCHEDULER_INTERNALS diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c index 1dde9800f8..a24f4bc193 100644 --- a/erts/emulator/beam/erl_process.c +++ b/erts/emulator/beam/erl_process.c @@ -174,7 +174,6 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE]; typedef struct { int aux_work; int tse; - int sys_schedule; } ErtsBusyWaitParams; static ErtsBusyWaitParams sched_busy_wait_params[ERTS_SCHED_TYPE_LAST + 1]; @@ -344,6 +343,9 @@ erts_sched_stat_t erts_sched_stat; static erts_tsd_key_t ERTS_WRITE_UNLIKELY(sched_data_key); +#if ERTS_POLL_USE_SCHEDULER_POLLING +static erts_atomic32_t doing_sys_schedule; +#endif static erts_atomic32_t no_empty_run_queues; long erts_runq_supervision_interval = 0; static ethr_event runq_supervision_event; @@ -1646,7 +1648,7 @@ haw_thr_prgr_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); } - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1656,7 +1658,7 @@ haw_thr_prgr_soft_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val) if (erts_thr_progress_cmp(val, awdp->latest_wakeup) > 0) { awdp->latest_wakeup = val; haw_chk_later_cleanup_op_wakeup(awdp, val); - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } @@ -1670,7 +1672,7 @@ haw_thr_prgr_later_cleanup_op_wakeup(ErtsAuxWorkData *awdp, ErtsThrPrgrVal val, else { awdp->latest_wakeup = val; awdp->later_op.size = thr_prgr_later_cleanup_op_threshold; - erts_thr_progress_wakeup(awdp->esdp, val); + erts_thr_progress_wakeup(erts_thr_prgr_data(awdp->esdp), val); } } } @@ -3066,6 +3068,7 @@ aux_thread(void *unused) ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1); erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; + ErtsThrPrgrData *tpd; int thr_prgr_active = 1; ERTS_MSACC_DECLARE_CACHE(); @@ -3087,12 +3090,16 @@ aux_thread(void *unused) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 1); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; #if ERTS_POLL_USE_FALLBACK - ssi->psi = erts_create_pollset_thread(-1); +#if ERTS_POLL_USE_SCHEDULER_POLLING + ssi->psi = erts_create_pollset_thread(-2, tpd); +#else + ssi->psi = erts_create_pollset_thread(-1, tpd); +#endif #endif sched_prep_spin_wait(ssi); @@ -3105,11 +3112,11 @@ aux_thread(void *unused) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { @@ -3120,7 +3127,7 @@ aux_thread(void *unused) #endif if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); #if ERTS_POLL_USE_FALLBACK @@ -3132,11 +3139,11 @@ aux_thread(void *unused) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(ssi->psi); + erts_check_io(ssi->psi, ERTS_POLL_INF_TIMEOUT); } } #else - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); flgs = sched_spin_wait(ssi, 0); @@ -3153,7 +3160,7 @@ aux_thread(void *unused) ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_OTHER); } } - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); #endif } @@ -3171,7 +3178,8 @@ poll_thread(void *arg) erts_aint32_t aux_work; ErtsThrPrgrCallbacks callbacks; int thr_prgr_active = 1; - struct erts_poll_thread *psi = erts_create_pollset_thread(id); + struct erts_poll_thread *psi; + ErtsThrPrgrData *tpd; ERTS_MSACC_DECLARE_CACHE(); #ifdef ERTS_ENABLE_LOCK_CHECK @@ -3192,9 +3200,12 @@ poll_thread(void *arg) callbacks.wait = thr_prgr_wait; callbacks.finalize_wait = thr_prgr_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); init_aux_work_data(awdp, NULL, NULL); awdp->ssi = ssi; + + psi = erts_create_pollset_thread(id, tpd); + ssi->psi = psi; sched_prep_spin_wait(ssi); @@ -3207,16 +3218,16 @@ poll_thread(void *arg) aux_work = erts_atomic32_read_acqb(&ssi->aux_work); if (aux_work) { if (!thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 1); + erts_thr_progress_active(tpd, thr_prgr_active = 1); aux_work = handle_aux_work(awdp, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (aux_work && erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); } if (!aux_work) { if (thr_prgr_active) - erts_thr_progress_active(NULL, thr_prgr_active = 0); + erts_thr_progress_active(tpd, thr_prgr_active = 0); flgs = sched_spin_wait(ssi, 0); @@ -3226,7 +3237,7 @@ poll_thread(void *arg) if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); ASSERT(flgs & ERTS_SSI_FLG_WAITING); - erts_check_io(psi); + erts_check_io(psi, ERTS_POLL_INF_TIMEOUT); } } } @@ -3236,6 +3247,59 @@ poll_thread(void *arg) return NULL; } +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ERTS_INLINE void +clear_sys_scheduling(void) +{ + erts_atomic32_set_mb(&doing_sys_schedule, 0); +} + +static ERTS_INLINE int +try_set_sys_scheduling(void) +{ + return 0 == erts_atomic32_cmpxchg_acqb(&doing_sys_schedule, 1, 0); +} + + +static ERTS_INLINE int +prepare_for_sys_schedule(void) +{ + while (!erts_port_task_have_outstanding_io_tasks() + && try_set_sys_scheduling()) { + if (!erts_port_task_have_outstanding_io_tasks()) + return 1; + clear_sys_scheduling(); + } + return 0; +} + +static void +check_io_timer(void *null) +{ + ErtsSchedulerData *esdp = erts_get_scheduler_data(); + if (prepare_for_sys_schedule()) { + erts_check_io(esdp->ssi->psi, ERTS_POLL_NO_TIMEOUT); + clear_sys_scheduling(); + } + + /* The timer is cleared if this schedulers run-queue became empty + or if the CHECKIO flag was cleared. The CHECKIO flags is cleared + when a check_balance assigns another scheduler to be the poller in + the overload scenario. */ + if ((ERTS_RUNQ_FLGS_GET_NOB(esdp->run_queue) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == ERTS_RUNQ_FLG_CHECKIO) { + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, + check_io_timer, NULL); + } else { + ERTS_RUNQ_FLGS_UNSET(esdp->run_queue, ERTS_RUNQ_FLG_CHECKIO); + } +} + +#else +#define clear_sys_scheduling() +#define prepare_for_sys_schedule() 0 +#endif + static void scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) { @@ -3291,13 +3355,13 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (aux_work) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } aux_work = handle_aux_work(&esdp->aux_work_data, aux_work, 1); ERTS_MSACC_UPDATE_CACHE(); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); } if (aux_work) { @@ -3305,7 +3369,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) current_time = erts_get_monotonic_time(esdp); if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -3324,19 +3388,36 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } if (do_timeout) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } - else { + else if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && prepare_for_sys_schedule()) { + /* We sleep in check_io, only for normal schedulers */ + if (thr_prgr_active) { + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); + sched_wall_time_change(esdp, 0); + } + flgs = sched_spin_wait(ssi, 0); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING); + if (flgs & ERTS_SSI_FLG_SLEEPING) { + ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING); + ASSERT(flgs & ERTS_SSI_FLG_WAITING); + erts_check_io(ssi->psi, timeout_time); + current_time = erts_get_monotonic_time(esdp); + } + } + clear_sys_scheduling(); + } else { if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) { if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(esdp); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(esdp)); } - flgs = sched_spin_wait(ssi, spincount); if (flgs & ERTS_SSI_FLG_SLEEPING) { ASSERT(flgs & ERTS_SSI_FLG_WAITING); @@ -3366,7 +3447,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) } } if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) - erts_thr_progress_finalize_wait(esdp); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(esdp)); } if (!ERTS_SCHEDULER_IS_DIRTY(esdp) && current_time >= timeout_time) erts_bump_timers(esdp->timer_wheel, current_time); @@ -3395,7 +3476,7 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq) if (ERTS_SCHEDULER_IS_DIRTY(esdp)) dirty_sched_wall_time_change(esdp, working = 1); else if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } @@ -4576,6 +4657,15 @@ check_balance(ErtsRunQueue *c_rq) if (blnc_no_rqs == 1) { c_rq->check_balance_reds = INT_MAX; erts_atomic32_set_nob(&balance_info.checking_balance, 0); +#if ERTS_POLL_USE_SCHEDULER_POLLING + c_rq->check_balance_reds = ERTS_RUNQ_CALL_CHECK_BALANCE_REDS; + if ((ERTS_RUNQ_FLGS_GET_NOB(c_rq) & (ERTS_RUNQ_FLG_OUT_OF_WORK|ERTS_RUNQ_FLG_CHECKIO)) + == 0) { + ERTS_RUNQ_FLGS_SET(c_rq, ERTS_RUNQ_FLG_CHECKIO); + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + ERTS_RUNQ_FLGS_UNSET(c_rq, ERTS_RUNQ_FLGS_MIGRATION_INFO); +#endif return; } @@ -5095,6 +5185,19 @@ erts_fprintf(stderr, "--------------------------------\n"); /* Publish new migration paths... */ erts_atomic_set_wb(&erts_migration_paths, (erts_aint_t) new_mpaths); +#if ERTS_POLL_USE_SCHEDULER_POLLING + if (full_scheds == current_active) { + ERTS_ASSERT(full_scheds <= current_active); + /* All active schedulers ran for full, we need to do active polling, + so we setup a timer that does active polling */ + if (!(ERTS_RUNQ_FLGS_GET_NOB(c_rq) & ERTS_RUNQ_FLG_CHECKIO)) { + /* Active polling is not running, start it */ + erts_start_timer_callback(ERTS_POLL_SCHEDULER_POLLING_TIMEOUT, check_io_timer, NULL); + } + run_queue_info[c_rq->ix].flags |= ERTS_RUNQ_FLG_CHECKIO; + } +#endif + /* Reset balance statistics in all online queues */ for (qix = 0; qix < blnc_no_rqs; qix++) { Uint32 flags = run_queue_info[qix].flags; @@ -5104,6 +5207,8 @@ erts_fprintf(stderr, "--------------------------------\n"); ASSERT(!(flags & ERTS_RUNQ_FLG_OUT_OF_WORK)); if (rq->waiting) flags |= ERTS_RUNQ_FLG_OUT_OF_WORK; + if (rq != c_rq) + flags &= ~ERTS_RUNQ_FLG_CHECKIO; rq->full_reds_history_sum = run_queue_info[qix].full_reds_history_sum; @@ -5113,8 +5218,7 @@ erts_fprintf(stderr, "--------------------------------\n"); ERTS_DBG_CHK_FULL_REDS_HISTORY(rq); rq->out_of_work_count = 0; - (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO, flags); - + (void) ERTS_RUNQ_FLGS_READ_BSET(rq, ERTS_RUNQ_FLGS_MIGRATION_INFO|ERTS_RUNQ_FLG_CHECKIO, flags); rq->max_len = erts_atomic32_read_dirty(&rq->len); for (pix = 0; pix < ERTS_NO_PRIO_LEVELS; pix++) { ErtsRunQueueInfo *rqi; @@ -5553,7 +5657,6 @@ erts_sched_set_busy_wait_threshold(ErtsSchedType sched_type, char *str) return EINVAL; } - params->sys_schedule = sys_sched; params->tse = sys_sched * ERTS_SCHED_TSE_SLEEP_SPINCOUNT_FACT; params->aux_work = sys_sched * aux_work_fact; @@ -5764,6 +5867,9 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online, int no_poll_th size_runqs = sizeof(ErtsAlignedRunQueue) * tot_rqs; erts_aligned_run_queues = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_RUNQS, size_runqs); +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_atomic32_init_nob(&doing_sys_schedule, 0); +#endif erts_atomic32_init_nob(&no_empty_run_queues, 0); erts_no_run_queues = n; @@ -7551,7 +7657,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (aux_work|evacuate) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), + thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } if (aux_work) @@ -7559,8 +7666,8 @@ suspend_scheduler(ErtsSchedulerData *esdp) aux_work, 1); - if (aux_work && erts_thr_progress_update(esdp)) - erts_thr_progress_leader_update(esdp); + if (aux_work && erts_thr_progress_update(erts_thr_prgr_data(esdp))) + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (evacuate) { erts_runq_lock(esdp->run_queue); evacuate_run_queue(esdp->run_queue, &sbp); @@ -7579,18 +7686,18 @@ suspend_scheduler(ErtsSchedulerData *esdp) if (!aux_work && current_time < timeout_time) { /* go to sleep... */ if (thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 0); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 0); sched_wall_time_change(esdp, 0); } - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(erts_thr_prgr_data(NULL)); suspend_normal_scheduler_sleep(esdp); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(erts_thr_prgr_data(NULL)); current_time = erts_get_monotonic_time(esdp); } if (current_time >= timeout_time) { if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } erts_bump_timers(esdp->timer_wheel, current_time); @@ -7647,7 +7754,7 @@ suspend_scheduler(ErtsSchedulerData *esdp) profile_scheduler(make_small(esdp->no), am_active); if (!thr_prgr_active) { - erts_thr_progress_active(esdp, thr_prgr_active = 1); + erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1); sched_wall_time_change(esdp, 1); } } @@ -8282,6 +8389,11 @@ sched_thread_func(void *vesdp) erts_msacc_init_thread("scheduler", no, 1); erts_thr_progress_register_managed_thread(esdp, &callbacks, 0); + +#if ERTS_POLL_USE_SCHEDULER_POLLING + esdp->ssi->psi = erts_create_pollset_thread(-1, NULL); +#endif + erts_alloc_register_scheduler(vesdp); #ifdef ERTS_ENABLE_LOCK_CHECK { @@ -9296,12 +9408,12 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls) } } - leader_update = erts_thr_progress_update(esdp); + leader_update = erts_thr_progress_update(erts_thr_prgr_data(esdp)); aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work); if (aux_work | leader_update) { erts_runq_unlock(rq); if (leader_update) - erts_thr_progress_leader_update(esdp); + erts_thr_progress_leader_update(erts_thr_prgr_data(esdp)); if (aux_work) handle_aux_work(&esdp->aux_work_data, aux_work, 0); erts_runq_lock(rq); diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h index 8d20ccdf90..a1b029adbe 100644 --- a/erts/emulator/beam/erl_process.h +++ b/erts/emulator/beam/erl_process.h @@ -173,8 +173,10 @@ extern int erts_dio_sched_thread_suggested_stack_size; (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 9)) #define ERTS_RUNQ_FLG_HALTING \ (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 10)) +#define ERTS_RUNQ_FLG_CHECKIO \ + (((Uint32) 1) << (ERTS_RUNQ_FLG_BASE2 + 11)) -#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 11) +#define ERTS_RUNQ_FLG_MAX (ERTS_RUNQ_FLG_BASE2 + 12) #define ERTS_RUNQ_FLGS_MIGRATION_QMASKS \ (ERTS_RUNQ_FLGS_EMIGRATE_QMASK \ diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h index b119c59ab3..74cc966cbe 100644 --- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h +++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h @@ -188,6 +188,7 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) erts_sspa_chunk_t *chnk; erts_sspa_chunk_header_t *chdr; erts_sspa_blk_t *res; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_ALLOC); chnk = erts_sspa_cix2chunk(data, cix); chdr = &chnk->aligned.header; @@ -201,11 +202,15 @@ erts_sspa_alloc(erts_sspa_data_t *data, int cix) chdr->local.last = NULL; ERTS_SSPA_DBG_CHK_LCL(chdr); } - if (chdr->local.cnt <= chdr->local.lim) - return (char *) erts_sspa_process_remote_frees(chdr, res); + if (chdr->local.cnt <= chdr->local.lim) { + res = erts_sspa_process_remote_frees(chdr, res); + ERTS_MSACC_POP_STATE_M_X(); + return (char*) res; + } else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS) chdr->head.no_thr_progress_check++; ASSERT(res); + ERTS_MSACC_POP_STATE_M_X(); return (char *) res; } diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c index aa08eb40ec..bac437efe9 100644 --- a/erts/emulator/beam/erl_thr_progress.c +++ b/erts/emulator/beam/erl_thr_progress.c @@ -508,6 +508,10 @@ init_wakeup_request_array(ErtsThrPrgrVal *w) } } +ErtsThrPrgrData *erts_thr_progress_data(void) { + return erts_tsd_get(erts_thr_prgr_data_key__); +} + void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) { @@ -551,7 +555,7 @@ erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *callbacks) } -void +ErtsThrPrgrData * erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *callbacks, int pref_wakeup) @@ -630,6 +634,7 @@ erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, wakeup_managed(id); } callbacks->finalize_wait(callbacks->arg); + return tpd; } static ERTS_INLINE int @@ -796,7 +801,7 @@ leader_update(ErtsThrPrgrData *tpd) == ERTS_THR_PRGR_LFLG_NO_LEADER)) && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } } @@ -849,23 +854,22 @@ update(ErtsThrPrgrData *tpd) } int -erts_thr_progress_update(ErtsSchedulerData *esdp) +erts_thr_progress_update(ErtsThrPrgrData *tpd) { - return update(thr_prgr_data(esdp)); + return update(tpd); } int -erts_thr_progress_leader_update(ErtsSchedulerData *esdp) +erts_thr_progress_leader_update(ErtsThrPrgrData *tpd) { - return leader_update(thr_prgr_data(esdp)); + return leader_update(tpd); } void -erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) +erts_thr_progress_prepare_wait(ErtsThrPrgrData *tpd) { erts_aint32_t lflgs; - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -884,14 +888,13 @@ erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp) == ERTS_THR_PRGR_LFLG_NO_LEADER && got_sched_wakeups()) { /* Someone need to make progress */ - wakeup_managed(0); + wakeup_managed(tpd->id); } } void -erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) +erts_thr_progress_finalize_wait(ErtsThrPrgrData *tpd) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); ErtsThrPrgrVal current, val; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -921,9 +924,8 @@ erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp) } void -erts_thr_progress_active(ErtsSchedulerData *esdp, int on) +erts_thr_progress_active(ErtsThrPrgrData *tpd, int on) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); #ifdef ERTS_ENABLE_LOCK_CHECK erts_lc_check_exact(NULL, 0); @@ -973,7 +975,7 @@ unmanaged_continue(ErtsThrPrgrDelayHandle handle) == (ERTS_THR_PRGR_LFLG_NO_LEADER|ERTS_THR_PRGR_LFLG_WAITING_UM) && got_sched_wakeups()) { /* Others waiting for us... */ - wakeup_managed(0); + wakeup_managed(1); } } } @@ -1182,10 +1184,10 @@ request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) } void -erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +erts_thr_progress_wakeup(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value) { - ErtsThrPrgrData *tpd = thr_prgr_data(esdp); + ASSERT(!tpd->is_temporary); if (tpd->is_managed) request_wakeup_managed(tpd, value); diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h index 8329995b24..00a9e61407 100644 --- a/erts/emulator/beam/erl_thr_progress.h +++ b/erts/emulator/beam/erl_thr_progress.h @@ -123,22 +123,24 @@ extern ErtsThrPrgr erts_thr_prgr__; void erts_thr_progress_pre_init(void); void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged); -void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp, - ErtsThrPrgrCallbacks *, - int); +ErtsThrPrgrData *erts_thr_progress_register_managed_thread( + ErtsSchedulerData *esdp, ErtsThrPrgrCallbacks *, int); void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrCallbacks *); -void erts_thr_progress_active(ErtsSchedulerData *esdp, int on); -void erts_thr_progress_wakeup(ErtsSchedulerData *esdp, +void erts_thr_progress_active(ErtsThrPrgrData *, int on); +void erts_thr_progress_wakeup(ErtsThrPrgrData *, ErtsThrPrgrVal value); -int erts_thr_progress_update(ErtsSchedulerData *esdp); -int erts_thr_progress_leader_update(ErtsSchedulerData *esdp); -void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp); -void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp); +int erts_thr_progress_update(ErtsThrPrgrData *); +int erts_thr_progress_leader_update(ErtsThrPrgrData *); +void erts_thr_progress_prepare_wait(ErtsThrPrgrData *); +void erts_thr_progress_finalize_wait(ErtsThrPrgrData *); ErtsThrPrgrDelayHandle erts_thr_progress_unmanaged_delay__(void); void erts_thr_progress_unmanaged_continue__(int umrefc_ix); +ErtsThrPrgrData *erts_thr_progress_data(void); void erts_thr_progress_dbg_print_state(void); +ERTS_GLB_INLINE ErtsThrPrgrData *erts_thr_prgr_data(ErtsSchedulerData *esdp); + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc); ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_mb__(ERTS_THR_PRGR_ATOMIC *atmc); @@ -161,6 +163,15 @@ ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val); #if ERTS_GLB_INLINE_INCL_FUNC_DEF +ERTS_GLB_INLINE ErtsThrPrgrData * +erts_thr_prgr_data(ErtsSchedulerData *esdp) { + if (esdp) { + return &esdp->thr_progress_data; + } else { + return erts_thr_progress_data(); + } +} + ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_nob__(ERTS_THR_PRGR_ATOMIC *atmc) { diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c index 53a020e7a5..2350d4c02f 100644 --- a/erts/emulator/beam/erl_trace.c +++ b/erts/emulator/beam/erl_trace.c @@ -2177,6 +2177,7 @@ sys_msg_dispatcher_func(void *unused) { ErtsThrPrgrCallbacks callbacks; ErtsSysMsgQ *local_sys_message_queue = NULL; + ErtsThrPrgrData *tpd; int wait = 0; #ifdef ERTS_ENABLE_LOCK_CHECK @@ -2189,7 +2190,7 @@ sys_msg_dispatcher_func(void *unused) callbacks.wait = sys_msg_dispatcher_wait; callbacks.finalize_wait = sys_msg_dispatcher_fin_wait; - erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); + tpd = erts_thr_progress_register_managed_thread(NULL, &callbacks, 0); while (1) { int end_wait = 0; @@ -2210,8 +2211,8 @@ sys_msg_dispatcher_func(void *unused) if (!sys_message_queue) { erts_mtx_unlock(&smq_mtx); end_wait = 1; - erts_thr_progress_active(NULL, 0); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_active(tpd, 0); + erts_thr_progress_prepare_wait(tpd); erts_mtx_lock(&smq_mtx); } @@ -2225,8 +2226,8 @@ sys_msg_dispatcher_func(void *unused) erts_mtx_unlock(&smq_mtx); if (end_wait) { - erts_thr_progress_finalize_wait(NULL); - erts_thr_progress_active(NULL, 1); + erts_thr_progress_finalize_wait(tpd); + erts_thr_progress_active(tpd, 1); } /* Send trace messages ... */ @@ -2239,8 +2240,8 @@ sys_msg_dispatcher_func(void *unused) Process *proc = NULL; Port *port = NULL; - if (erts_thr_progress_update(NULL)) - erts_thr_progress_leader_update(NULL); + if (erts_thr_progress_update(tpd)) + erts_thr_progress_leader_update(tpd); #ifdef DEBUG_PRINTOUTS print_msg_type(smqp); diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c index dbe0201caf..c75b4045f7 100644 --- a/erts/emulator/drivers/common/inet_drv.c +++ b/erts/emulator/drivers/common/inet_drv.c @@ -38,6 +38,7 @@ #include <ctype.h> #include <sys/types.h> #include <errno.h> +#include <stdint.h> #define IDENTITY(c) c #define STRINGIFY_1(b) IDENTITY(#b) @@ -955,6 +956,7 @@ static size_t my_strnlen(const char *s, size_t maxlen) #endif #endif +typedef struct _tcp_descriptor tcp_descriptor; #if defined(TCP_CORK) #define INET_TCP_NOPUSH TCP_CORK @@ -1010,16 +1012,19 @@ typedef struct _multi_timer_data { struct _multi_timer_data *prev; } MultiTimerData; -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, - ErlDrvTermData caller, unsigned timeout, - void (*timeout_fun)(ErlDrvData drv_data, - ErlDrvTermData caller)); -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + ErlDrvTermData caller, unsigned timeout, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data); -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p); +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p); +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)); static void tcp_inet_multi_timeout(ErlDrvData e, ErlDrvTermData caller); -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port); +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port); typedef struct { int id; /* id used to identify reply */ @@ -1278,7 +1283,7 @@ static struct erl_drv_entry sctp_inet_driver_entry = }; #endif -typedef struct { +struct _tcp_descriptor { inet_descriptor inet; /* common data structure (DON'T MOVE) */ int high; /* high watermark */ int low; /* low watermark */ @@ -1294,7 +1299,8 @@ typedef struct { int http_state; /* 0 = response|request 1=headers fields */ inet_async_multi_op *multi_first;/* NULL == no multi-accept-queue, op is in ordinary queue */ inet_async_multi_op *multi_last; - MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd; /* Timer structures for multiple accept */ + MultiTimerData *mtd_cache; /* A cache for timer allocations */ #ifdef HAVE_SENDFILE struct { ErlDrvSizeT ioq_skip; /* The number of bytes in the queue at the time @@ -1310,7 +1316,7 @@ typedef struct { Uint64 length; } sendfile; #endif -} tcp_descriptor; +}; /* send function */ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len); @@ -1320,7 +1326,10 @@ static int tcp_deliver(tcp_descriptor* desc, int len); static int tcp_shutdown_error(tcp_descriptor* desc, int err); +#ifdef HAVE_SENDFILE static int tcp_inet_sendfile(tcp_descriptor* desc); +static int tcp_sendfile_aborted(tcp_descriptor* desc, int socket_error); +#endif static int tcp_inet_output(tcp_descriptor* desc, HANDLE event); static int tcp_inet_input(tcp_descriptor* desc, HANDLE event); @@ -9772,6 +9781,7 @@ static ErlDrvData prep_tcp_inet_start(ErlDrvPort port, char* args) desc->tcp_add_flags = 0; desc->http_state = 0; desc->mtd = NULL; + desc->mtd_cache = NULL; desc->multi_first = desc->multi_last = NULL; DEBUGF(("tcp_inet_start(%ld) }\r\n", (long)port)); return (ErlDrvData) desc; @@ -9875,15 +9885,14 @@ static void tcp_close_check(tcp_descriptor* desc) driver_demonitor_process(desc->inet.port, &monitor); send_async_error(desc->inet.dport, id, caller, am_closed); } - clean_multi_timers(&(desc->mtd), desc->inet.port); } - else if (desc->inet.state == INET_STATE_CONNECTING) { async_error_am(INETP(desc), am_closed); } else if (desc->inet.state == INET_STATE_CONNECTED) { async_error_am_all(INETP(desc), am_closed); } + clean_multi_timers(desc, desc->inet.port); } /* @@ -9926,6 +9935,15 @@ static void tcp_desc_close(tcp_descriptor* desc) erl_inet_close(INETP(desc)); } +static void tcp_inet_recv_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(!desc->inet.active); + sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); + desc->i_remain = 0; + async_error_am(INETP(desc), am_timeout); +} + /* TCP requests from Erlang */ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf, ErlDrvSizeT len, @@ -10096,12 +10114,12 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, if (time_left <= 0) { time_left = 1; } - omtd = add_multi_timer(&(desc->mtd), desc->inet.port, ocaller, + omtd = add_multi_timer(desc, desc->inet.port, ocaller, time_left, &tcp_inet_multi_timeout); } enq_old_multi_op(desc, oid, oreq, ocaller, omtd, &omonitor); if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10116,7 +10134,7 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, return ctl_xerror("noproc", rbuf, rsize); } if (timeout != INET_INFINITY) { - mtd = add_multi_timer(&(desc->mtd), desc->inet.port, caller, + mtd = add_multi_timer(desc, desc->inet.port, caller, timeout, &tcp_inet_multi_timeout); } enq_multi_op(desc, tbuf, INET_REQ_ACCEPT, caller, mtd, &monitor); @@ -10213,7 +10231,8 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, async_error_am(INETP(desc), am_timeout); else { if (timeout != INET_INFINITY) - driver_set_timer(desc->inet.port, timeout); + add_multi_timer(desc, INETP(desc)->port, 0, + timeout, &tcp_inet_recv_timeout); if (!INETP(desc)->is_ignored) sock_select(INETP(desc),(FD_READ|FD_CLOSE),1); else @@ -10300,12 +10319,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, desc->tcp_add_flags |= TCP_ADDF_SENDFILE; /* See if we can finish sending without selecting & rescheduling. */ - tcp_inet_sendfile(desc); - - if(desc->sendfile.length > 0) { - sock_select(INETP(desc), FD_WRITE, 1); + if (tcp_inet_sendfile(desc) == 0) { + if(desc->sendfile.length > 0) { + sock_select(INETP(desc), FD_WRITE, 1); + } } - return ctl_reply(INET_REP_OK, NULL, 0, rbuf, rsize); #else return ctl_error(ENOTSUP, rbuf, rsize); @@ -10319,12 +10337,27 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd, } +static void tcp_inet_send_timeout(ErlDrvData e, ErlDrvTermData dummy) +{ + tcp_descriptor* desc = (tcp_descriptor*)e; + ASSERT(IS_BUSY(INETP(desc))); + ASSERT(desc->busy_on_send); + desc->inet.caller = desc->inet.busy_caller; + desc->inet.state &= ~INET_F_BUSY; + desc->busy_on_send = 0; + set_busy_port(desc->inet.port, 0); + inet_reply_error_am(INETP(desc), am_timeout); + if (desc->send_timeout_close) { + tcp_desc_close(desc); + } +} + /* ** tcp_inet_timeout: ** called when timer expire: ** TCP socket may be: ** -** a) receiving -- deselect +** a) receiving -- send timeout ** b) connecting -- close socket ** c) accepting -- reset listener ** @@ -10338,26 +10371,9 @@ static void tcp_inet_timeout(ErlDrvData e) DEBUGF(("tcp_inet_timeout(%ld) {s=%d\r\n", (long)desc->inet.port, desc->inet.s)); if ((state & INET_F_MULTI_CLIENT)) { /* Multi-client always means multi-timers */ - fire_multi_timers(&(desc->mtd), desc->inet.port, e); + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTED) == INET_STATE_CONNECTED) { - if (desc->busy_on_send) { - ASSERT(IS_BUSY(INETP(desc))); - desc->inet.caller = desc->inet.busy_caller; - desc->inet.state &= ~INET_F_BUSY; - desc->busy_on_send = 0; - set_busy_port(desc->inet.port, 0); - inet_reply_error_am(INETP(desc), am_timeout); - if (desc->send_timeout_close) { - tcp_desc_close(desc); - } - } - else { - /* assume recv timeout */ - ASSERT(!desc->inet.active); - sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); - desc->i_remain = 0; - async_error_am(INETP(desc), am_timeout); - } + fire_multi_timers(desc, desc->inet.port, e); } else if ((state & INET_STATE_CONNECTING) == INET_STATE_CONNECTING) { /* assume connect timeout */ @@ -10487,7 +10503,7 @@ static void tcp_inet_process_exit(ErlDrvData e, ErlDrvMonitor *monitorp) return; } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } if (desc->multi_first == NULL) { sock_select(INETP(desc),FD_ACCEPT,0); @@ -10518,6 +10534,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) #ifdef DEBUG long port = (long) desc->inet.port; /* Used after driver_exit() */ #endif + int blocking_send = 0; DEBUGF(("tcp_recv_closed(%ld): s=%d, in %s, line %d\r\n", port, desc->inet.s, __FILE__, __LINE__)); if (IS_BUSY(INETP(desc))) { @@ -10525,7 +10542,7 @@ static int tcp_recv_closed(tcp_descriptor* desc) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; DEBUGF(("tcp_recv_closed(%ld): busy on send\r\n", port)); } @@ -10533,16 +10550,25 @@ static int tcp_recv_closed(tcp_descriptor* desc) set_busy_port(desc->inet.port, 0); inet_reply_error_am(INETP(desc), am_closed); DEBUGF(("tcp_recv_closed(%ld): busy reply 'closed'\r\n", port)); - } else { + blocking_send = 1; + } +#ifdef HAVE_SENDFILE + if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + tcp_sendfile_aborted(desc, ENOTCONN); + blocking_send = 1; + } +#endif + if (!blocking_send) { /* No blocking send op to reply to right now. * If next op is a send, make sure it returns {error,closed} * rather than {error,enotconn}. */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; } + if (!desc->inet.active) { - /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + /* We must cancel any timer here ! */ + clean_multi_timers(desc, INETP(desc)->port); /* passive mode do not terminate port ! */ tcp_clear_input(desc); if (desc->inet.exitf) { @@ -10577,16 +10603,21 @@ static int tcp_recv_error(tcp_descriptor* desc, int err) desc->inet.caller = desc->inet.busy_caller; tcp_clear_output(desc); if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; set_busy_port(desc->inet.port, 0); inet_reply_error_am(INETP(desc), am_closed); } +#ifdef HAVE_SENDFILE + if (desc->tcp_add_flags & TCP_ADDF_SENDFILE) { + tcp_sendfile_aborted(desc, err); + } +#endif if (!desc->inet.active) { /* We must cancel any timer here ! */ - driver_cancel_timer(desc->inet.port); + clean_multi_timers(desc, INETP(desc)->port); tcp_clear_input(desc); if (desc->inet.exitf) { tcp_desc_close(desc); @@ -10691,13 +10722,13 @@ static int tcp_deliver(tcp_descriptor* desc, int len) if (len == 0) { /* empty buffer or waiting for more input */ if ((desc->i_buf == NULL) || (desc->i_remain > 0)) - return count; + return 0; if ((n = tcp_remain(desc, &len)) != 0) { if (n < 0) /* packet error */ return n; if (len > 0) /* more data pending */ desc->i_remain = len; - return count; + return 0; } } @@ -10749,9 +10780,7 @@ static int tcp_deliver(tcp_descriptor* desc, int len) len = 0; if (!desc->inet.active) { - if (!desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - } + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_recv_timeout); sock_select(INETP(desc),(FD_READ|FD_CLOSE),0); if (desc->i_buf != NULL) tcp_restart_input(desc); @@ -10777,7 +10806,7 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) int len; int nread; - if (desc->i_buf == NULL) { /* allocte a read buffer */ + if (desc->i_buf == NULL) { /* allocate a read buffer */ int sz = (request_len > 0) ? request_len : desc->inet.bufsz; if ((desc->i_buf = alloc_buffer(sz)) == NULL) @@ -10850,10 +10879,11 @@ static int tcp_recv(tcp_descriptor* desc, int request_len) return tcp_deliver(desc, desc->i_ptr - desc->i_ptr_start); } else { - if ((nread = tcp_remain(desc, &len)) < 0) + nread = tcp_remain(desc, &len); + if (nread < 0) return tcp_recv_error(desc, EMSGSIZE); else if (nread == 0) - return tcp_deliver(desc, len); + return tcp_deliver(desc, len); else if (len > 0) desc->i_remain = len; /* set remain */ } @@ -11172,7 +11202,7 @@ static int tcp_inet_input(tcp_descriptor* desc, HANDLE event) } if (timeout != NULL) { - remove_multi_timer(&(desc->mtd), desc->inet.port, timeout); + remove_multi_timer(desc, desc->inet.port, timeout); } driver_demonitor_process(desc->inet.port, &monitor); @@ -11231,8 +11261,8 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) if (IS_BUSY(INETP(desc))) { desc->inet.caller = desc->inet.busy_caller; if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); - desc->busy_on_send = 0; + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); + desc->busy_on_send = 0; } desc->inet.state &= ~INET_F_BUSY; set_busy_port(desc->inet.port, 0); @@ -11247,27 +11277,31 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) DEBUGF(("driver_failure_eof(%ld) in %s, line %d\r\n", (long)desc->inet.port, __FILE__, __LINE__)); if (desc->inet.active) { + ErlDrvTermData err_atom; if (show_econnreset) { tcp_error_message(desc, err); - tcp_closed_message(desc); - inet_reply_error(INETP(desc), err); + err_atom = error_atom(err); } else { - tcp_closed_message(desc); - inet_reply_error_am(INETP(desc), am_closed); + err_atom = am_closed; } + tcp_closed_message(desc); + if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) + inet_reply_error_am(INETP(desc), err_atom); + if (desc->inet.exitf) driver_exit(desc->inet.port, 0); else tcp_desc_close(desc); } else { tcp_close_check(desc); - tcp_desc_close(desc); if (desc->inet.caller) { - if (show_econnreset) - inet_reply_error(INETP(desc), err); - else - inet_reply_error_am(INETP(desc), am_closed); + if (!(desc->tcp_add_flags & TCP_ADDF_SENDFILE)) { + if (show_econnreset) + inet_reply_error(INETP(desc), err); + else + inet_reply_error_am(INETP(desc), am_closed); + } } else { /* No blocking send op to reply to right now. @@ -11276,6 +11310,7 @@ static int tcp_send_or_shutdown_error(tcp_descriptor* desc, int err) */ desc->tcp_add_flags |= TCP_ADDF_DELAYED_CLOSE_SEND; } + tcp_desc_close(desc); /* * Make sure that the next receive operation gets an {error,closed} @@ -11332,6 +11367,12 @@ static int tcp_shutdown_error(tcp_descriptor* desc, int err) return tcp_send_or_shutdown_error(desc, err); } +static void tcp_inet_delay_send(ErlDrvData data, ErlDrvTermData dummy) +{ + tcp_descriptor *desc = (tcp_descriptor*)data; + (void)tcp_inet_output(desc, INETP(desc)->s); +} + /* ** Send non-blocking vector data */ @@ -11384,7 +11425,9 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(desc, INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11399,7 +11442,10 @@ static int tcp_sendv(tcp_descriptor* desc, ErlIOVec* ev) INETP(desc)->is_ignored |= INET_IGNORE_WRITE; n = 0; } else if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) { - n = 0; + driver_enqv(ix, ev, 0); + add_multi_timer(desc, INETP(desc)->port, 0, + 0, &tcp_inet_delay_send); + return 0; } else if (IS_SOCKET_ERROR(sock_sendv(desc->inet.s, ev->iov, vsize, &n, 0))) { if ((sock_errno() != ERRNO_BLOCK) && (sock_errno() != EINTR)) { @@ -11482,7 +11528,9 @@ static int tcp_send(tcp_descriptor* desc, char* ptr, ErlDrvSizeT len) set_busy_port(desc->inet.port, 1); if (desc->send_timeout != INET_INFINITY) { desc->busy_on_send = 1; - driver_set_timer(desc->inet.port, desc->send_timeout); + add_multi_timer(desc, INETP(desc)->port, + 0 /* arg */, desc->send_timeout /* timeout */, + &tcp_inet_send_timeout); } return 1; } @@ -11586,7 +11634,8 @@ static int tcp_sendfile_completed(tcp_descriptor* desc) { /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, + &tcp_inet_send_timeout); desc->busy_on_send = 0; } @@ -11788,8 +11837,8 @@ socket_error: { DEBUGF(("tcp_inet_sendfile(%ld): send errno = %d (errno %d)\r\n", (long)desc->inet.port, socket_errno, errno)); - result = tcp_send_error(desc, socket_errno); tcp_sendfile_aborted(desc, socket_errno); + result = tcp_send_error(desc, socket_errno); goto done; } @@ -11893,6 +11942,12 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) #ifdef __WIN32__ desc->inet.send_would_block = 1; #endif + /* If DELAY_SEND is set ready_output may have + been called without doing select so we do + a select in order to get into the correct + state */ + if (desc->tcp_add_flags & TCP_ADDF_DELAY_SEND) + sock_select(INETP(desc), FD_WRITE, 1); goto done; } else if (n == 0) { /* Workaround for redhat/CentOS 6.3 returning 0 when sending packets with @@ -11918,7 +11973,7 @@ static int tcp_inet_output(tcp_descriptor* desc, HANDLE event) set_busy_port(desc->inet.port, 0); /* if we have a timer then cancel and send ok to client */ if (desc->busy_on_send) { - driver_cancel_timer(desc->inet.port); + cancel_multi_timer(desc, INETP(desc)->port, &tcp_inet_send_timeout); desc->busy_on_send = 0; } inet_reply_ok(INETP(desc)); @@ -12730,7 +12785,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) udesc->i_buf = NULL; if (!desc->active) { async_error(desc, err); - driver_cancel_timer(desc->port); + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); } else { @@ -12819,7 +12874,7 @@ static int packet_inet_input(udp_descriptor* udesc, HANDLE event) return count; count++; if (!desc->active) { - driver_cancel_timer(desc->port); /* possibly cancel */ + driver_cancel_timer(desc->port); sock_select(desc,FD_READ,0); return count; /* passive mode (read one packet only) */ } @@ -12898,55 +12953,69 @@ make_noninheritable_handle(SOCKET s) * Multi-timers */ -static void fire_multi_timers(MultiTimerData **first, ErlDrvPort port, +static void fire_multi_timers(tcp_descriptor *desc, ErlDrvPort port, ErlDrvData data) { ErlDrvTime next_timeout; - if (!*first) { + MultiTimerData *curr = desc->mtd; + if (!curr) { ASSERT(0); return; } #ifdef DEBUG { ErlDrvTime chk = erl_drv_monotonic_time(ERL_DRV_MSEC); - ASSERT(chk >= (*first)->when); + ASSERT(chk >= curr->when); } #endif do { - MultiTimerData *save = *first; - *first = save->next; + MultiTimerData *save = curr; + (*(save->timeout_function))(data,save->caller); - FREE(save); - if (*first == NULL) { + + curr = curr->next; + + if (desc->mtd_cache == NULL) + desc->mtd_cache = save; + else + FREE(save); + + if (curr == NULL) { + desc->mtd = NULL; return; } - (*first)->prev = NULL; - next_timeout = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + curr->prev = NULL; + next_timeout = curr->when - erl_drv_monotonic_time(ERL_DRV_MSEC); } while (next_timeout <= 0); + desc->mtd = curr; driver_set_timer(port, (unsigned long) next_timeout); } -static void clean_multi_timers(MultiTimerData **first, ErlDrvPort port) +static void clean_multi_timers(tcp_descriptor *desc, ErlDrvPort port) { - MultiTimerData *p; - if (*first) { + if (desc->mtd) { driver_cancel_timer(port); } - while (*first) { - p = *first; - *first = p->next; - FREE(p); + while (desc->mtd) { + MultiTimerData *p = desc->mtd; + desc->mtd = p->next; + FREE(p); + } + desc->mtd = NULL; + if (desc->mtd_cache) { + FREE(desc->mtd_cache); + desc->mtd_cache = NULL; } } -static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTimerData *p) +static void remove_multi_timer(tcp_descriptor *desc, ErlDrvPort port, MultiTimerData *p) { if (p->prev != NULL) { p->prev->next = p->next; } else { driver_cancel_timer(port); - *first = p->next; - if (*first) { - ErlDrvTime ntmo = (*first)->when - erl_drv_monotonic_time(ERL_DRV_MSEC); + desc->mtd = p->next; + if (desc->mtd) { + ErlDrvTime ntmo = desc->mtd->when - erl_drv_monotonic_time(ERL_DRV_MSEC); if (ntmo < 0) ntmo = 0; driver_set_timer(port, (unsigned long) ntmo); @@ -12955,36 +13024,67 @@ static void remove_multi_timer(MultiTimerData **first, ErlDrvPort port, MultiTim if (p->next != NULL) { p->next->prev = p->prev; } - FREE(p); + if (desc->mtd_cache == NULL) + desc->mtd_cache = p; + else + FREE(p); +} + +/* Cancel a timer based on the timeout_fun */ +static void cancel_multi_timer(tcp_descriptor *desc, ErlDrvPort port, + void (*timeout_fun)(ErlDrvData drv_data, + ErlDrvTermData caller)) +{ + MultiTimerData *timer = desc->mtd; + while(timer && timer->timeout_function != timeout_fun) { + timer = timer->next; + } + if (timer) { + remove_multi_timer(desc, port, timer); + } } -static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, +static MultiTimerData *add_multi_timer(tcp_descriptor *desc, ErlDrvPort port, ErlDrvTermData caller, unsigned timeout, void (*timeout_fun)(ErlDrvData drv_data, ErlDrvTermData caller)) { MultiTimerData *mtd, *p, *s; - mtd = ALLOC(sizeof(MultiTimerData)); - mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout) + 1; + + /* Use cached timer if available */ + if (desc->mtd_cache != NULL) { + mtd = desc->mtd_cache; + desc->mtd_cache = NULL; + } else + mtd = ALLOC(sizeof(MultiTimerData)); + + if (timeout) + mtd->when = erl_drv_monotonic_time(ERL_DRV_MSEC) + ((ErlDrvTime) timeout); + else + mtd->when = INT64_MIN; /* Don't have to get the time for 0 msec timeouts */ + mtd->timeout_function = timeout_fun; mtd->caller = caller; mtd->next = mtd->prev = NULL; - for(p = *first,s = NULL; p != NULL; s = p, p = p->next) { + + /* Find correct slot in timer linked list */ + for(p = desc->mtd,s = NULL; p != NULL; s = p, p = p->next) { if (p->when >= mtd->when) { break; } } + /* Insert in linked list */ if (!p) { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; } } else { if (!s) { - *first = mtd; + desc->mtd = mtd; } else { s->next = mtd; mtd->prev = s; @@ -12992,10 +13092,8 @@ static MultiTimerData *add_multi_timer(MultiTimerData **first, ErlDrvPort port, mtd->next = p; p->prev = mtd; } + /* Possibly set new timer */ if (!s) { - if (mtd->next) { - driver_cancel_timer(port); - } driver_set_timer(port,timeout); } return mtd; diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c index 0ada345442..0b5eccbde2 100644 --- a/erts/emulator/nifs/common/prim_file_nif.c +++ b/erts/emulator/nifs/common/prim_file_nif.c @@ -936,7 +936,7 @@ static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_ posix_errno_t posix_errno; efile_path_t path; - Uint32 permissions; + Uint permissions; ASSERT(argc == 2); if(!enif_get_uint(env, argv[1], &permissions)) { @@ -956,7 +956,7 @@ static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a posix_errno_t posix_errno; efile_path_t path; - Sint32 uid, gid; + Sint uid, gid; ASSERT(argc == 3); if(!enif_get_int(env, argv[1], &uid) || !enif_get_int(env, argv[2], &gid)) { diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c index f421794f91..b4609007c9 100644 --- a/erts/emulator/sys/common/erl_check_io.c +++ b/erts/emulator/sys/common/erl_check_io.c @@ -46,11 +46,11 @@ #if 0 #define DEBUG_PRINT(FMT, ...) erts_printf(FMT "\r\n", ##__VA_ARGS__) #define DEBUG_PRINT_FD(FMT, STATE, ...) \ - DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%d)", \ + DEBUG_PRINT("%d: " FMT " (ev=%s, ac=%s, flg=%s)", \ (STATE) ? (STATE)->fd : (ErtsSysFdType)-1, ##__VA_ARGS__, \ ev2str((STATE) ? (STATE)->events : ERTS_POLL_EV_NONE), \ ev2str((STATE) ? (STATE)->active_events : ERTS_POLL_EV_NONE), \ - (STATE) ? (STATE)->flags : ERTS_EV_FLAG_CLEAR) + (STATE) ? flag2str((STATE)->flags) : ERTS_EV_FLAG_CLEAR) #define DEBUG_PRINT_MODE #else #define DEBUG_PRINT(...) @@ -76,22 +76,40 @@ typedef enum { typedef enum { ERTS_EV_FLAG_CLEAR = 0, ERTS_EV_FLAG_USED = 1, /* ERL_DRV_USE has been turned on */ -#ifdef ERTS_ENABLE_KERNEL_POLL - ERTS_EV_FLAG_FALLBACK = 2, /* Set when kernel poll rejected fd +#if ERTS_POLL_USE_SCHEDULER_POLLING + ERTS_EV_FLAG_SCHEDULER = 2, /* Set when the fd has been migrated + to scheduler pollset */ + ERTS_EV_FLAG_IN_SCHEDULER = 4, /* Set when the fd is currently in + scheduler pollset */ +#else + ERTS_EV_FLAG_SCHEDULER = ERTS_EV_FLAG_CLEAR, + ERTS_EV_FLAG_IN_SCHEDULER = ERTS_EV_FLAG_CLEAR, +#endif +#ifdef ERTS_POLL_USE_FALLBACK + ERTS_EV_FLAG_FALLBACK = 8, /* Set when kernel poll rejected fd and it was put in the nkp version */ #else ERTS_EV_FLAG_FALLBACK = ERTS_EV_FLAG_CLEAR, #endif /* Combinations */ - ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK + ERTS_EV_FLAG_USED_FALLBACK = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_FALLBACK, + ERTS_EV_FLAG_USED_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_USED_IN_SCHEDULER = ERTS_EV_FLAG_USED | ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER, + ERTS_EV_FLAG_UNUSED_SCHEDULER = ERTS_EV_FLAG_SCHEDULER, + ERTS_EV_FLAG_UNUSED_IN_SCHEDULER = ERTS_EV_FLAG_SCHEDULER | ERTS_EV_FLAG_IN_SCHEDULER } EventStateFlags; #define flag2str(flags) \ ((flags) == ERTS_EV_FLAG_CLEAR ? "CLEAR" : \ ((flags) == ERTS_EV_FLAG_USED ? "USED" : \ ((flags) == ERTS_EV_FLAG_FALLBACK ? "FLBK" : \ - ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : "ERROR")))) + ((flags) == ERTS_EV_FLAG_USED_FALLBACK ? "USED|FLBK" : \ + ((flags) == ERTS_EV_FLAG_USED_SCHEDULER ? "USED|SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_SCHEDULER ? "SCHD" : \ + ((flags) == ERTS_EV_FLAG_USED_IN_SCHEDULER ? "USED|IN_SCHD" : \ + ((flags) == ERTS_EV_FLAG_UNUSED_IN_SCHEDULER ? "IN_SCHD" : \ + "ERROR")))))))) /* How many events that can be handled at once by one erts_poll_wait call */ #define ERTS_CHECK_IO_POLL_RES_LEN 512 @@ -105,6 +123,7 @@ typedef struct erts_poll_thread { ErtsPollSet *ps; ErtsPollResFd *pollres; + ErtsThrPrgrData *tpd; int pollres_len; } ErtsPollThread; @@ -112,10 +131,13 @@ typedef struct erts_poll_thread * Which pollset to use is determined by hashing the fd. */ static ErtsPollSet **pollsetv; +static ErtsPollThread *psiv; #if ERTS_POLL_USE_FALLBACK static ErtsPollSet *flbk_pollset; #endif -static ErtsPollThread *psiv; +#if ERTS_POLL_USE_SCHEDULER_POLLING +static ErtsPollSet *sched_pollset; +#endif typedef struct { #ifndef ERTS_SYS_CONTINOUS_FD_NUMBERS @@ -130,10 +152,12 @@ typedef struct { ErtsResource* resource; /* ERTS_EV_TYPE_STOP_NIF */ } stop; } driver; - ErtsPollEvents events; /* The events that have been selected upon */ + ErtsPollEvents events; /* The events that have been selected upon */ ErtsPollEvents active_events; /* The events currently active in the pollset */ EventStateType type; EventStateFlags flags; + int count; /* Number of times this fd has triggered + without being deselected. */ } ErtsDrvEventState; struct drv_ev_state_shared { @@ -370,12 +394,22 @@ get_pollset(ErtsSysFdType fd) #if ERTS_POLL_USE_FALLBACK static ERTS_INLINE ErtsPollSet * -get_fallback(void) +get_fallback_pollset(void) { return flbk_pollset; } #endif +static ERTS_INLINE ErtsPollSet * +get_scheduler_pollset(ErtsSysFdType fd) +{ +#if ERTS_POLL_USE_SCHEDULER_POLLING + return sched_pollset; +#else + return get_pollset(fd); +#endif +} + /* * Place a fd within a pollset. This will automatically use * the fallback ps if needed. @@ -391,18 +425,27 @@ erts_io_control_wakeup(ErtsDrvEventState *state, ErtsPollOp op, ERTS_LC_ASSERT(erts_lc_mtx_is_locked(fd_mtx(state->fd))); if (!(flags & ERTS_EV_FLAG_FALLBACK)) { - res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + + if (op == ERTS_POLL_OP_DEL && (flags & ERTS_EV_FLAG_SCHEDULER)) { + erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } + if (!(flags & ERTS_EV_FLAG_IN_SCHEDULER) || (pe & ERTS_POLL_EV_OUT)) { + res = erts_poll_control(get_pollset(fd), fd, op, pe, wake_poller); + } else { + res = erts_poll_control(get_scheduler_pollset(fd), fd, op, pe, wake_poller); + } #if ERTS_POLL_USE_FALLBACK if (op == ERTS_POLL_OP_ADD && res == ERTS_POLL_EV_NVAL) { /* When an add fails with NVAL, the poll/kevent operation could not put that fd in the pollset, so we instead put it into a fallback pollset */ state->flags |= ERTS_EV_FLAG_FALLBACK; - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); } } else { ASSERT(op != ERTS_POLL_OP_ADD); - res = erts_poll_control_flbk(get_fallback(), fd, op, pe, wake_poller); + res = erts_poll_control_flbk(get_fallback_pollset(), fd, op, pe, wake_poller); #endif } @@ -425,59 +468,77 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, ErtsIoTask *itp = ErtsContainerStruct(pthp, ErtsIoTask, task); ErtsSysFdType fd = itp->fd; erts_mtx_t *mtx = fd_mtx(fd); - int active_events; + ErtsPollOp op = ERTS_POLL_OP_MOD; + int active_events, new_events = 0; ErtsDrvEventState *state; ErtsDrvSelectDataState *free_select = NULL; ErtsNifSelectDataState *free_nif = NULL; + ERTS_MSACC_PUSH_AND_SET_STATE_M_X(ERTS_MSACC_STATE_CHECK_IO); + erts_mtx_lock(mtx); state = get_drv_ev_state(fd); + reset_handle(pthp); + active_events = state->active_events; - switch (type) { - case ERTS_PORT_TASK_INPUT: + if (!(state->flags & ERTS_EV_FLAG_IN_SCHEDULER) || type == ERTS_PORT_TASK_OUTPUT) { + switch (type) { + case ERTS_PORT_TASK_INPUT: + + DEBUG_PRINT_FD("executed ready_input", state); + + ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); + if (state->events & ERTS_POLL_EV_IN) { + active_events |= ERTS_POLL_EV_IN; + if (state->count > 10 && ERTS_POLL_USE_SCHEDULER_POLLING) { + if (!(state->flags & ERTS_EV_FLAG_SCHEDULER)) + op = ERTS_POLL_OP_ADD; + state->flags |= ERTS_EV_FLAG_IN_SCHEDULER|ERTS_EV_FLAG_SCHEDULER; + new_events = ERTS_POLL_EV_IN; + DEBUG_PRINT_FD("moving to scheduler ps", state); + } else + new_events = active_events; + if (!(state->flags & ERTS_EV_FLAG_FALLBACK) && ERTS_POLL_USE_SCHEDULER_POLLING) + state->count++; + } + break; + case ERTS_PORT_TASK_OUTPUT: - DEBUG_PRINT_FD("executed ready_input", state); + DEBUG_PRINT_FD("executed ready_output", state); - ASSERT(!(state->active_events & ERTS_POLL_EV_IN)); - if (state->events & ERTS_POLL_EV_IN) - active_events |= ERTS_POLL_EV_IN; - break; - case ERTS_PORT_TASK_OUTPUT: + ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); + if (state->events & ERTS_POLL_EV_OUT) { + active_events |= ERTS_POLL_EV_OUT; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER && active_events & ERTS_POLL_EV_IN) + new_events = ERTS_POLL_EV_OUT; + else + new_events = active_events; + } + break; + default: + erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); + break; + } - DEBUG_PRINT_FD("executed ready_output", state); + if (state->active_events != active_events && new_events) { + state->active_events = active_events; + new_events = erts_io_control(state, op, new_events); + } - ASSERT(!(state->active_events & ERTS_POLL_EV_OUT)); - if (state->events & ERTS_POLL_EV_OUT) - active_events |= ERTS_POLL_EV_OUT; - break; - default: - erts_exit(ERTS_ABORT_EXIT, "Invalid IO port task type"); - break; + /* We were unable to re-insert the fd into the pollset, signal the callback. */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + if (state->active_events & ERTS_POLL_EV_IN) + iready(state->driver.select->inport, state); + if (state->active_events & ERTS_POLL_EV_OUT) + oready(state->driver.select->outport, state); + state->active_events = 0; + } } - reset_handle(pthp); - - if (active_events) { - /* This is not needed if active_events has not changed */ - if (state->active_events != active_events) { - ErtsPollEvents new_events; - state->active_events = active_events; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, active_events); - - /* We were unable to re-insert the fd into the pollset, signal the callback. */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - if (active_events & ERTS_POLL_EV_IN) - iready(state->driver.select->inport, state); - if (active_events & ERTS_POLL_EV_OUT) - oready(state->driver.select->outport, state); - state->active_events = 0; - } - } - } else { + if (!active_events) check_fd_cleanup(state, &free_select, &free_nif); - } erts_mtx_unlock(mtx); @@ -485,6 +546,8 @@ erts_io_notify_port_task_executed(ErtsPortTaskType type, free_drv_select_data(free_select); if (free_nif) free_nif_select_data(free_nif); + + ERTS_MSACC_POP_STATE_M_X(); } static ERTS_INLINE void @@ -755,11 +818,22 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (old_events == 0 && !(state->flags & ERTS_EV_FLAG_USED)) { ctl_op = ERTS_POLL_OP_ADD; } + new_events = state->active_events; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + new_events &= ~ERTS_POLL_EV_IN; } else { ctl_events &= old_events; state->events &= ~ctl_events; state->active_events &= ~ctl_events; + new_events = state->active_events; + + if (ctl_events & ERTS_POLL_EV_IN) { + state->count = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + new_events = 0; + } + } if (!state->events) { if (!(state->flags & ERTS_EV_FLAG_USED) || mode & ERL_DRV_USE) @@ -770,7 +844,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events || ctl_op == ERTS_POLL_OP_DEL) { new_events = erts_io_control_wakeup(state, ctl_op, - state->active_events, + new_events, &wake_poller); ASSERT(state->type == ERTS_EV_TYPE_DRV_SEL || state->type == ERTS_EV_TYPE_NONE); @@ -802,6 +876,7 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (ctl_events & ERTS_POLL_EV_IN) { abort_tasks(state, ERL_DRV_READ); state->driver.select->inport = NIL; + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; } if (ctl_events & ERTS_POLL_EV_OUT) { abort_tasks(state, ERL_DRV_WRITE); @@ -810,6 +885,8 @@ driver_select(ErlDrvPort ix, ErlDrvEvent e, int mode, int on) if (state->events == 0) { if ((mode & ERL_DRV_USE) || !(state->flags & ERTS_EV_FLAG_USED)) { state->type = ERTS_EV_TYPE_NONE; + if (state->flags & ERTS_EV_FLAG_SCHEDULER) + erts_atomic32_read_bor_nob(&prt->state, ERTS_PORT_SFLG_CHECK_FD_CLEANUP); state->flags = 0; } /*else keep it, as fd will probably be selected upon again */ @@ -1440,7 +1517,8 @@ iready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_INPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + state->flags & ERTS_EV_FLAG_IN_SCHEDULER) != 0) { stale_drv_select(id, state, ERL_DRV_READ); } else { DEBUG_PRINT_FD("schedule ready_input(%T, %d)", @@ -1458,7 +1536,8 @@ oready(Eterm id, ErtsDrvEventState *state) if (erts_port_task_schedule(id, &iotask->task, ERTS_PORT_TASK_OUTPUT, - (ErlDrvEvent) state->fd) != 0) { + (ErlDrvEvent) state->fd, + 0) != 0) { stale_drv_select(id, state, ERL_DRV_WRITE); } else { DEBUG_PRINT_FD("schedule ready_output(%T, %d)", state, id, state->fd); @@ -1520,7 +1599,7 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) { if (psi) { #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { erts_poll_interrupt_flbk(psi->ps, set); return; } @@ -1530,12 +1609,13 @@ erts_check_io_interrupt(ErtsPollThread *psi, int set) } ErtsPollThread * -erts_create_pollset_thread(int id) { +erts_create_pollset_thread(int id, ErtsThrPrgrData *tpd) { + psiv[id].tpd = tpd; return psiv+id; } void -erts_check_io(ErtsPollThread *psi) +erts_check_io(ErtsPollThread *psi, ErtsMonotonicTime timeout_time) { int pollres_len; int poll_ret, i; @@ -1550,14 +1630,14 @@ erts_check_io(ErtsPollThread *psi) pollres_len = psi->pollres_len; #if ERTS_POLL_USE_FALLBACK - if (psi->ps == get_fallback()) { + if (psi->ps == get_fallback_pollset()) { - poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait_flbk(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } else #endif { - poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len); + poll_ret = erts_poll_wait(psi->ps, psi->pollres, &pollres_len, psi->tpd, timeout_time); } #ifdef ERTS_ENABLE_LOCK_CHECK @@ -1593,7 +1673,12 @@ erts_check_io(ErtsPollThread *psi) ErtsNifSelectDataState *free_nif = NULL; ErtsSysFdType fd = (ErtsSysFdType) ERTS_POLL_RES_GET_FD(&psi->pollres[i]); ErtsDrvEventState *state; - ErtsPollEvents revents; + ErtsPollEvents revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); + + /* The fd will be set to -1 if a pollset internal fd was triggered + that was determined to be too expensive to remove from the result. + */ + if (fd == -1) continue; erts_mtx_lock(fd_mtx(fd)); @@ -1604,8 +1689,6 @@ erts_check_io(ErtsPollThread *psi) continue; } - revents = ERTS_POLL_RES_GET_EVTS(&psi->pollres[i]); - DEBUG_PRINT_FD("triggered %s", state, ev2str(revents)); if (revents & ERTS_POLL_EV_ERR) { @@ -1617,25 +1700,39 @@ erts_check_io(ErtsPollThread *psi) */ revents = state->active_events; state->active_events = 0; + + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) { + erts_io_control(state, ERTS_POLL_OP_MOD, 0); + state->flags &= ~ERTS_EV_FLAG_IN_SCHEDULER; + } } else { /* Disregard any events that are not active at the moment, for instance this could happen if the driver/nif does select/deselect in rapid succession. */ revents &= state->active_events | ERTS_POLL_EV_NVAL; - state->active_events &= ~revents; - /* Reactivate the poll op if there are still active events */ - if (state->active_events) { - ErtsPollEvents new_events; - DEBUG_PRINT_FD("re-enable %s", state, ev2str(state->active_events)); + if (psi->ps != get_scheduler_pollset(fd) || !ERTS_POLL_USE_SCHEDULER_POLLING) { + ErtsPollEvents reactive_events; + state->active_events &= ~revents; - new_events = erts_io_control(state, ERTS_POLL_OP_MOD, state->active_events); + reactive_events = state->active_events; - /* Unable to re-enable the fd, signal all callbacks */ - if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { - revents |= state->active_events; - state->active_events = 0; + if (state->flags & ERTS_EV_FLAG_IN_SCHEDULER) + reactive_events &= ~ERTS_POLL_EV_IN; + + /* Reactivate the poll op if there are still active events */ + if (reactive_events) { + ErtsPollEvents new_events; + DEBUG_PRINT_FD("re-enable %s", state, ev2str(reactive_events)); + + new_events = erts_io_control(state, ERTS_POLL_OP_MOD, reactive_events); + + /* Unable to re-enable the fd, signal all callbacks */ + if (new_events & (ERTS_POLL_EV_ERR|ERTS_POLL_EV_NVAL)) { + revents |= reactive_events; + state->active_events &= ~reactive_events; + } } } } @@ -1711,7 +1808,7 @@ erts_check_io(ErtsPollThread *psi) case ERTS_EV_TYPE_STOP_USE: { #if ERTS_POLL_USE_FALLBACK - ASSERT(psi->ps == get_fallback()); + ASSERT(psi->ps == get_fallback_pollset()); #endif drv_ptr = state->driver.stop.drv_ptr; state->type = ERTS_EV_TYPE_NONE; @@ -2049,12 +2146,17 @@ erts_init_check_io(int *argc, char **argv) for (j=0; j < erts_no_pollsets; j++) pollsetv[j] = erts_poll_create_pollset(j); -#if ERTS_POLL_USE_FALLBACK - flbk_pollset = erts_poll_create_pollset_flbk(-1); + no_poll_threads = erts_no_poll_threads; + + j = -1; + +#if ERTS_POLL_USE_SCHEDULER_POLLING + sched_pollset = erts_poll_create_pollset(j--); + no_poll_threads++; #endif - no_poll_threads = erts_no_poll_threads; #if ERTS_POLL_USE_FALLBACK + flbk_pollset = erts_poll_create_pollset_flbk(j--); no_poll_threads++; #endif @@ -2064,7 +2166,15 @@ erts_init_check_io(int *argc, char **argv) psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); - psiv[0].ps = get_fallback(); + psiv[0].ps = get_fallback_pollset(); + psiv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + psiv[0].pollres_len = ERTS_CHECK_IO_POLL_RES_LEN; + psiv[0].pollres = erts_alloc(ERTS_ALC_T_POLLSET, + sizeof(ErtsPollResFd) * ERTS_CHECK_IO_POLL_RES_LEN); + psiv[0].ps = get_scheduler_pollset(0); psiv++; #endif @@ -2121,7 +2231,12 @@ erts_check_io_size(void) int i; #if ERTS_POLL_USE_FALLBACK - erts_poll_info(get_fallback(), &pi); + erts_poll_info(get_fallback_pollset(), &pi); + res += pi.memory_size; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &pi); res += pi.memory_size; #endif @@ -2153,13 +2268,21 @@ erts_check_io_info(void *proc) Uint sz, *szp, *hp, **hpp; ErtsPollInfo *piv; Sint i, j = 0, len; - int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK; + int no_pollsets = erts_no_pollsets + ERTS_POLL_USE_FALLBACK + ERTS_POLL_USE_SCHEDULER_POLLING; ERTS_CT_ASSERT(ERTS_POLL_USE_FALLBACK == 0 || ERTS_POLL_USE_FALLBACK == 1); + ERTS_CT_ASSERT(ERTS_POLL_USE_SCHEDULER_POLLING == 0 || ERTS_POLL_USE_SCHEDULER_POLLING == 1); piv = erts_alloc(ERTS_ALC_T_TMP, sizeof(ErtsPollInfo) * no_pollsets); #if ERTS_POLL_USE_FALLBACK - erts_poll_info_flbk(get_fallback(), &piv[0]); + erts_poll_info_flbk(get_fallback_pollset(), &piv[0]); + piv[0].poll_threads = 1; + piv[0].active_fds = 0; + piv++; +#endif + +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_poll_info(get_scheduler_pollset(0), &piv[0]); piv[0].poll_threads = 1; piv[0].active_fds = 0; piv++; @@ -2213,6 +2336,7 @@ erts_check_io_info(void *proc) sz = 0; piv -= ERTS_POLL_USE_FALLBACK; + piv -= ERTS_POLL_USE_SCHEDULER_POLLING; bld_it: @@ -2317,15 +2441,7 @@ print_events(erts_dsprintf_buf_t *dsbufp, ErtsPollEvents ev) static ERTS_INLINE void print_flags(erts_dsprintf_buf_t *dsbufp, EventStateFlags f) { - const char* delim = ""; - if(f & ERTS_EV_FLAG_USED) { - erts_dsprintf(dsbufp, "%s","USED"); - delim = "|"; - } - if(f & ERTS_EV_FLAG_FALLBACK) { - erts_dsprintf(dsbufp, "%s%s", delim, "FLBK"); - delim = "|"; - } + erts_dsprintf(dsbufp, "%s", flag2str(f)); } #ifdef DEBUG_PRINT_MODE @@ -2673,13 +2789,26 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) #if ERTS_POLL_USE_FALLBACK erts_dsprintf(dsbufp, "--- fds in flbk pollset ---------------------------------\n"); - erts_poll_get_selected_events_flbk(get_fallback(), counters.epep, + erts_poll_get_selected_events_flbk(get_fallback_pollset(), counters.epep, drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); } #endif +#if ERTS_POLL_USE_SCHEDULER_POLLING + erts_dsprintf(dsbufp, "--- fds in scheduler pollset ----------------------------\n"); + erts_poll_get_selected_events(get_scheduler_pollset(0), counters.epep, + drv_ev_state.max_fds); + for (fd = 0; fd < len; fd++) { + if (drv_ev_state.v[fd].flags & ERTS_EV_FLAG_SCHEDULER) { + if (drv_ev_state.v[fd].events && drv_ev_state.v[fd].events != ERTS_POLL_EV_NONE) + counters.epep[fd] &= ~ERTS_POLL_EV_OUT; + doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } + } +#endif + erts_dsprintf(dsbufp, "--- fds in pollset --------------------------------------\n"); for (i = 0; i < erts_no_pollsets; i++) { @@ -2688,8 +2817,15 @@ erts_check_io_debug(ErtsCheckIoDebugInfo *ciodip) drv_ev_state.max_fds); for (fd = 0; fd < len; fd++) { if (!(drv_ev_state.v[fd].flags & ERTS_EV_FLAG_FALLBACK) - && get_pollset_id(fd) == i) + && get_pollset_id(fd) == i) { + if (counters.epep[fd] != ERTS_POLL_EV_NONE && + drv_ev_state.v[fd].flags & ERTS_EV_FLAG_IN_SCHEDULER) { + /* We add the in flag if it is enabled in the scheduler pollset + and get_selected_events works on the platform */ + counters.epep[fd] |= ERTS_POLL_EV_IN; + } doit_erts_check_io_debug(&drv_ev_state.v[fd], &counters, dsbufp); + } } } for (fd = len ; fd < drv_ev_state.max_fds; fd++) { @@ -2736,7 +2872,7 @@ void erts_lcnt_update_cio_locks(int enable) { #endif #if ERTS_POLL_USE_FALLBACK - erts_lcnt_enable_pollset_lock_count_flbk(get_fallback(), enable); + erts_lcnt_enable_pollset_lock_count_flbk(get_fallback_pollset(), enable); #endif for (i = 0; i < erts_no_pollsets; i++) diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h index 443ef1264c..31182be5ec 100644 --- a/erts/emulator/sys/common/erl_check_io.h +++ b/erts/emulator/sys/common/erl_check_io.h @@ -68,7 +68,7 @@ int erts_check_io_max_files(void); * * @param pt the poll thread structure to use. */ -void erts_check_io(struct erts_poll_thread *pt); +void erts_check_io(struct erts_poll_thread *pt, ErtsMonotonicTime timeout_time); /** * Initialize the check io framework. This function will parse the arguments * and delete any entries that it is interested in. @@ -90,8 +90,11 @@ void erts_check_io_interrupt(struct erts_poll_thread *pt, int set); /** * Create a new poll thread structure that is associated with the number no. * It is the callers responsibility that no is unique. + * + * @param no the id of the pollset thread, -2 = aux thread, -1 = scheduler + * @param tpd the thread progress data of the pollset thread */ -struct erts_poll_thread* erts_create_pollset_thread(int no); +struct erts_poll_thread* erts_create_pollset_thread(int no, ErtsThrPrgrData *tpd); #ifdef ERTS_ENABLE_LOCK_COUNT /** * Toggle lock counting on all check io locks @@ -126,16 +129,6 @@ extern int erts_no_poll_threads; #include "erl_poll.h" #include "erl_port_task.h" -#ifdef __WIN32__ -/* - * Current erts_poll implementation for Windows cannot handle - * active events in the set of events polled. - */ -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#else -# define ERTS_CIO_DEFER_ACTIVE_EVENTS 1 -#endif - typedef struct { Eterm inport; Eterm outport; @@ -147,10 +140,6 @@ struct erts_nif_select_event { Eterm pid; Eterm immed; Uint32 refn[ERTS_REF_NUMBERS]; - Sint32 ddeselect_cnt; /* 0: No delayed deselect in progress - * 1: Do deselect before next poll - * >1: Countdown of ignored events - */ }; typedef struct { diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h index 539daea419..e1ff0fe80a 100644 --- a/erts/emulator/sys/common/erl_mmap.h +++ b/erts/emulator/sys/common/erl_mmap.h @@ -176,4 +176,61 @@ void hard_dbg_remove_mseg(void* seg, UWord sz); #endif /* HAVE_ERTS_MMAP */ +/* Marks the given memory region as unused without freeing it, letting the OS + * reclaim its physical memory with the promise that we'll get it back (without + * its contents) the next time it's accessed. */ +ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size); + +#if ERTS_GLB_INLINE_INCL_FUNC_DEF + +#ifdef VALGRIND + #include <valgrind/memcheck.h> + + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + VALGRIND_MAKE_MEM_UNDEFINED(ptr, size); + } +#elif defined(DEBUG) + /* Try to provoke crashes by filling the discard region with garbage. It's + * extremely hard to find bugs where we've discarded too much, as the + * region often retains its old contents if it's accessed before the OS + * reclaims it. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + static const char pattern[] = "DISCARDED"; + char *data; + int i; + + for(i = 0, data = ptr; i < size; i++) { + data[i] = pattern[i % sizeof(pattern)]; + } + } +#elif defined(HAVE_SYS_MMAN_H) + #include <sys/mman.h> + + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + #ifdef MADV_FREE + /* This is preferred as it doesn't necessarily free the pages right + * away, which is a bit faster than MADV_DONTNEED. */ + madvise(ptr, size, MADV_FREE); + #else + madvise(ptr, size, MADV_DONTNEED); + #endif + } +#elif defined(_WIN32) + #include <winbase.h> + + /* MEM_RESET is defined on all supported versions of Windows, and has the + * same semantics as MADV_FREE. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE); + } +#else + /* Dummy implementation. */ + ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) { + (void)ptr; + (void)size; + } +#endif + +#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */ + #endif /* ERL_MMAP_H__ */ diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c index b4d1575ee5..51d50933ff 100644 --- a/erts/emulator/sys/common/erl_poll.c +++ b/erts/emulator/sys/common/erl_poll.c @@ -75,6 +75,7 @@ # define WANT_NONBLOCKING #endif +#include "erl_thr_progress.h" #include "erl_poll.h" #if ERTS_POLL_USE_KQUEUE # include <sys/types.h> @@ -95,7 +96,6 @@ # include <limits.h> # endif #endif -#include "erl_thr_progress.h" #include "erl_driver.h" #include "erl_alloc.h" #include "erl_msacc.h" @@ -121,7 +121,8 @@ /* Define to print info about modifications done to each fd */ #define DEBUG_PRINT_FD(FMT, PS, FD, ...) DEBUG_PRINT("%d: " FMT, PS, FD, ##__VA_ARGS__) /* Define to print entry and exit from erts_poll_wait (can be very spammy) */ -//#define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__) +// #define DEBUG_PRINT_WAIT(FMT, PS, ...) do { if ((PS)->id != -1) DEBUG_PRINT(FMT, PS, ##__VA_ARGS__); } while(0) #else #define ERTS_POLL_DEBUG_PRINT 0 @@ -200,7 +201,7 @@ int ERTS_SELECT(int nfds, ERTS_fd_set *readfds, ERTS_fd_set *writefds, #define ERTS_POLL_USE_CONCURRENT_UPDATE (ERTS_POLL_USE_EPOLL || ERTS_POLL_USE_KQUEUE) -#define ERTS_POLL_USE_WAKEUP_PIPE (!ERTS_POLL_USE_CONCURRENT_UPDATE) +#define ERTS_POLL_USE_WAKEUP(ps) (!ERTS_POLL_USE_CONCURRENT_UPDATE || (ps)->id < 0) #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -269,6 +270,7 @@ struct ERTS_POLL_EXPORT(erts_pollset) { #if ERTS_POLL_USE_KERNEL_POLL int kp_fd; + int oneshot; #endif /* ERTS_POLL_USE_KERNEL_POLL */ #if ERTS_POLL_USE_POLL @@ -295,12 +297,16 @@ struct ERTS_POLL_EXPORT(erts_pollset) { ErtsPollSetUpdateRequestsBlock *curr_upd_req_block; erts_atomic32_t have_update_requests; erts_mtx_t mtx; - erts_atomic32_t wakeup_state; +#else + int do_wakeup; #endif -#if ERTS_POLL_USE_WAKEUP_PIPE - int wake_fds[2]; +#if ERTS_POLL_USE_TIMERFD + int timer_fd; #endif + ErtsMonotonicTime timeout_time; + erts_atomic32_t wakeup_state; + int wake_fds[2]; }; void erts_silence_warn_unused_result(long unused); @@ -365,63 +371,47 @@ static void print_misc_debug_info(void); uint32_t epoll_events(int kp_fd, int fd); #endif - #define ERTS_POLL_NOT_WOKEN 0 #define ERTS_POLL_WOKEN -1 #define ERTS_POLL_WOKEN_INTR 1 -#if !ERTS_POLL_USE_CONCURRENT_UPDATE static ERTS_INLINE void reset_wakeup_state(ErtsPollSet *ps) { erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN); } -#endif static ERTS_INLINE int is_woken(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN; -#else - return 0; -#endif } static ERTS_INLINE int is_interrupted_reset(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE return (erts_atomic32_xchg_acqb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN) == ERTS_POLL_WOKEN_INTR); -#else - return 0; -#endif } static ERTS_INLINE void woke_up(ErtsPollSet *ps) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE erts_aint32_t wakeup_state = erts_atomic32_read_acqb(&ps->wakeup_state); if (wakeup_state == ERTS_POLL_NOT_WOKEN) (void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state, ERTS_POLL_WOKEN, ERTS_POLL_NOT_WOKEN); ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN); -#endif } /* * --- Wakeup pipe ----------------------------------------------------------- */ -#if ERTS_POLL_USE_WAKEUP_PIPE - static ERTS_INLINE void wake_poller(ErtsPollSet *ps, int interrupted) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE int wake; erts_aint32_t wakeup_state; if (!interrupted) @@ -434,9 +424,9 @@ wake_poller(ErtsPollSet *ps, int interrupted) wake = wakeup_state == ERTS_POLL_NOT_WOKEN; if (wake) -#endif { ssize_t res; + DEBUG_PRINT_WAIT("wake_poller(%d)", ps, interrupted); if (ps->wake_fds[1] < 0) return; /* Not initialized yet */ do { @@ -474,10 +464,8 @@ cleanup_wakeup_pipe(ErtsPollSet *ps) fd, erl_errno_id(errno), errno); } -#if !ERTS_POLL_USE_CONCURRENT_UPDATE if (intr) erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR); -#endif } static void @@ -513,7 +501,67 @@ create_wakeup_pipe(ErtsPollSet *ps) ps->wake_fds[1] = wake_fds[1]; } +/* + * --- timer fd ----------------------------------------------------------- + */ + +#if ERTS_POLL_USE_TIMERFD + +/* We use the timerfd when using epoll_wait to get high accuracy + timeouts, i.e. we want to sleep with < ms accuracy. */ + +static void +create_timerfd(ErtsPollSet *ps) +{ + int do_wake = 0; + int timer_fd = timerfd_create(CLOCK_MONOTONIC,0); + ERTS_POLL_EXPORT(erts_poll_control)(ps, + timer_fd, + ERTS_POLL_OP_ADD, + ERTS_POLL_EV_IN, + &do_wake); + if (ps->internal_fd_limit <= timer_fd) + ps->internal_fd_limit = timer_fd + 1; + ps->timer_fd = timer_fd; +} + +static ERTS_INLINE void +timerfd_set(ErtsPollSet *ps, struct itimerspec *its) +{ +#ifdef DEBUG + struct itimerspec old_its; + int res; + res = timerfd_settime(ps->timer_fd, 0, its, &old_its); + ASSERT(res == 0); + ASSERT(old_its.it_interval.tv_sec == 0 && + old_its.it_interval.tv_nsec == 0 && + old_its.it_value.tv_sec == 0 && + old_its.it_value.tv_nsec == 0); + +#else + timerfd_settime(ps->timer_fd, 0, its, NULL); #endif +} + +static ERTS_INLINE int +timerfd_clear(ErtsPollSet *ps, ErtsPollResFd pr[], int res, int max_res) { + + struct itimerspec its; + /* we always have to clear the timer */ + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 0; + timerfd_settime(ps->timer_fd, 0, &its, NULL); + + /* only timeout fd triggered */ + if (res == 1 && pr[0].data.fd == ps->timer_fd) + return 0; + + return res; +} + +#endif /* ERTS_POLL_USE_TIMERFD */ /* * --- Poll set update requests ---------------------------------------------- @@ -691,9 +739,12 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) struct epoll_event epe_templ; struct epoll_event epe; - epe_templ.events = ERTS_POLL_EV_E2N(events) | EPOLLONESHOT; + epe_templ.events = ERTS_POLL_EV_E2N(events); epe_templ.data.fd = fd; + if (ps->oneshot) + epe_templ.events |= EPOLLONESHOT; + #ifdef VALGRIND /* Silence invalid valgrind warning ... */ memset((void *) &epe.data, 0, sizeof(epoll_data_t)); @@ -802,6 +853,7 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) int res = 0, len = 0; struct kevent evts[2]; struct timespec ts = {0, 0}; + uint32_t oneshot = 0; if (op == ERTS_POLL_OP_ADD) { /* This is a hack to make the "noshell" option work; kqueue can poll @@ -840,6 +892,9 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) man page), but it seems to be the way it works... */ + if (ps->oneshot) + oneshot = EV_DISPATCH; + if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); /* We could probably skip this delete, do we want to? */ @@ -849,27 +904,29 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) uint32_t flags; erts_atomic_inc_nob(&ps->no_of_user_fds); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_IN) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_ADD|EV_DISPATCH; + flags = EV_ADD|oneshot; flags |= ((events & ERTS_POLL_EV_OUT) ? 0 : EV_DISABLE); ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } else { uint32_t flags; ASSERT(op == ERTS_POLL_OP_MOD); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_IN) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_READ, flags, (void *) ERTS_POLL_EV_IN); - flags = EV_DISPATCH; + flags = oneshot; flags |= (events & ERTS_POLL_EV_OUT) ? EV_ENABLE : EV_DISABLE; ERTS_EV_SET(&evts[len++], fd, EVFILT_WRITE, flags, (void *) ERTS_POLL_EV_OUT); } #else - uint32_t flags = EV_ADD|EV_ONESHOT; + uint32_t flags = EV_ADD; + + if (ps->oneshot) flags |= EV_ONESHOT; if (op == ERTS_POLL_OP_DEL) { erts_atomic_dec_nob(&ps->no_of_user_fds); @@ -903,14 +960,17 @@ update_pollset(ErtsPollSet *ps, int fd, ErtsPollOp op, ErtsPollEvents events) keventbp += sprintf(keventbp, "kevent(%d, {",ps->kp_fd); for (i = 0; i < len; i++) { const char *flags = "UNKNOWN"; - if (evts[i].flags == EV_DELETE) flags = "EV_DELETE"; + if (evts[i].flags == (EV_DELETE)) flags = "EV_DELETE"; if (evts[i].flags == (EV_ADD|EV_ONESHOT)) flags = "EV_ADD|EV_ONESHOT"; + if (evts[i].flags == (EV_ADD)) flags = "EV_ADD"; #ifdef EV_DISPATCH if (evts[i].flags == (EV_ADD|EV_DISPATCH)) flags = "EV_ADD|EV_DISPATCH"; if (evts[i].flags == (EV_ADD|EV_DISABLE)) flags = "EV_ADD|EV_DISABLE"; if (evts[i].flags == (EV_ENABLE|EV_DISPATCH)) flags = "EV_ENABLE|EV_DISPATCH"; - if (evts[i].flags == EV_DISABLE) flags = "EV_DISABLE"; + if (evts[i].flags == (EV_ENABLE)) flags = "EV_ENABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; if (evts[i].flags == (EV_DISABLE|EV_DISPATCH)) flags = "EV_DISABLE|EV_DISABLE"; + if (evts[i].flags == (EV_DISABLE)) flags = "EV_DISABLE"; #endif keventbp += sprintf(keventbp, "%s{%lu, %s, %s}",i > 0 ? ", " : "", @@ -1273,11 +1333,15 @@ poll_control(ErtsPollSet *ps, int fd, ErtsPollOp op, goto done; } #endif -#if ERTS_POLL_USE_WAKEUP_PIPE if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) { new_events = ERTS_POLL_EV_NVAL; goto done; } +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) { + new_events = ERTS_POLL_EV_NVAL; + goto done; + } #endif } @@ -1333,11 +1397,8 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, ERTS_POLLSET_UNLOCK(ps); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (*do_wake) { + if (*do_wake) wake_poller(ps, 0); - } -#endif return res; } @@ -1351,52 +1412,61 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, static ERTS_INLINE int ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, int chk_fds_res, int ebadf) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_WAKEUP_PIPE int n = chk_fds_res < max_res ? chk_fds_res : max_res, i; int res = n; -#if ERTS_POLL_USE_WAKEUP_PIPE int wake_fd = ps->wake_fds[0]; -#endif - for (i = 0; i < n; i++) { - int fd = ERTS_POLL_RES_GET_FD(&pr[i]); -#ifdef DEBUG_PRINT_MODE - ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); -#endif + if (ERTS_POLL_USE_WAKEUP(ps) || ERTS_POLL_DEBUG_PRINT || ERTS_POLL_USE_TIMERFD) { + + for (i = 0; i < n; i++) { + int fd = ERTS_POLL_RES_GET_FD(&pr[i]); +#if ERTS_POLL_DEBUG_PRINT + ErtsPollEvents evts = ERTS_POLL_RES_GET_EVTS(pr+i); - DEBUG_PRINT_FD("trig %s (%s)", ps, fd, - ev2str(evts), + if (fd != wake_fd +#if ERTS_POLL_USE_TIMERFD + && fd != ps->timer_fd +#endif + ) + DEBUG_PRINT_FD("trig %s (%s)", ps, fd, + ev2str(evts), #if ERTS_POLL_USE_KQUEUE - "kqueue" + "kqueue" #elif ERTS_POLL_USE_EPOLL - "epoll" + "epoll" #else - "/dev/poll" + "/dev/poll" +#endif + ); #endif - ); -#if ERTS_POLL_USE_WAKEUP_PIPE - if (fd == wake_fd) { - cleanup_wakeup_pipe(ps); - ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); - if (n == 1) - return 0; - } + if (ERTS_POLL_USE_WAKEUP(ps) && fd == wake_fd) { + cleanup_wakeup_pipe(ps); + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } +#if ERTS_POLL_USE_TIMERFD + else if (fd == ps->timer_fd) { + ERTS_POLL_RES_SET_FD(&pr[i], -1); + ERTS_POLL_RES_SET_EVTS(&pr[i], ERTS_POLL_EV_NONE); + res--; + } #endif #if !ERTS_POLL_USE_CONCURRENT_UPDATE - else { - /* Reset the events to emulate ONESHOT semantics */ - ps->fds_status[fd].events = 0; - enqueue_update_request(ps, fd); - } + else { + /* Reset the events to emulate ONESHOT semantics */ + ps->fds_status[fd].events = 0; + enqueue_update_request(ps, fd); + } #endif + } } - return res; -#else - ASSERT(chk_fds_res <= max_res); - return chk_fds_res; -#endif + if (res == 0) + return res; + else + return n; } #else /* !ERTS_POLL_USE_KERNEL_POLL */ @@ -1577,19 +1647,168 @@ ERTS_POLL_EXPORT(save_result)(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, #endif /* !ERTS_POLL_USE_KERNEL_POLL */ +static ERTS_INLINE ErtsMonotonicTime +get_timeout(ErtsPollSet *ps, + int resolution, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout; + + if (timeout_time == ERTS_POLL_NO_TIMEOUT) { + timeout = 0; + } + else if (timeout_time == ERTS_POLL_INF_TIMEOUT) { + timeout = -1; + } + else { + ErtsMonotonicTime diff_time, current_time; + current_time = erts_get_monotonic_time(NULL); + diff_time = timeout_time - current_time; + if (diff_time <= 0) { + timeout = 0; + } + else { + switch (resolution) { + case 1000: + /* Round up to nearest even milli second */ + timeout = ERTS_MONOTONIC_TO_MSEC(diff_time - 1) + 1; + if (timeout > (ErtsMonotonicTime) INT_MAX) + timeout = (ErtsMonotonicTime) INT_MAX; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000); + break; + case 1000000: + /* Round up to nearest even micro second */ + timeout = ERTS_MONOTONIC_TO_USEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000); + break; + case 1000000000: + /* Round up to nearest even nano second */ + timeout = ERTS_MONOTONIC_TO_NSEC(diff_time - 1) + 1; + timeout -= ERTS_PREMATURE_TIMEOUT(timeout, 1000*1000*1000); + break; + default: + ERTS_INTERNAL_ERROR("Invalid resolution"); + timeout = 0; + break; + } + } + } + return timeout; +} + +#if ERTS_POLL_USE_SELECT + +static ERTS_INLINE int +get_timeout_timeval(ErtsPollSet *ps, + SysTimeval *tvp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000, + timeout_time); + + if (!timeout) { + tvp->tv_sec = 0; + tvp->tv_usec = 0; + + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000); + tvp->tv_sec = sec; + tvp->tv_usec = timeout - sec*(1000*1000); + + ASSERT(tvp->tv_sec >= 0); + ASSERT(tvp->tv_usec >= 0); + ASSERT(tvp->tv_usec < 1000*1000); + + return 1; + } + +} + +#endif + +#if ERTS_POLL_USE_KQUEUE || (ERTS_POLL_USE_POLL && defined(HAVE_PPOLL)) || ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_timespec(ErtsPollSet *ps, + struct timespec *tsp, + ErtsMonotonicTime timeout_time) +{ + ErtsMonotonicTime timeout = get_timeout(ps, + 1000*1000*1000, + timeout_time); + + if (!timeout) { + tsp->tv_sec = 0; + tsp->tv_nsec = 0; + return 0; + } + else if (timeout == -1) { + return -1; + } + else { + ErtsMonotonicTime sec = timeout/(1000*1000*1000); + tsp->tv_sec = sec; + tsp->tv_nsec = timeout - sec*(1000*1000*1000); + + ASSERT(tsp->tv_sec >= 0); + ASSERT(tsp->tv_nsec >= 0); + ASSERT(tsp->tv_nsec < 1000*1000*1000); + + return 1; + } +} + +#endif + +#if ERTS_POLL_USE_TIMERFD + +static ERTS_INLINE int +get_timeout_itimerspec(ErtsPollSet *ps, + struct itimerspec *itsp, + ErtsMonotonicTime timeout_time) +{ + + itsp->it_interval.tv_sec = 0; + itsp->it_interval.tv_nsec = 0; + + return get_timeout_timespec(ps, &itsp->it_value, timeout_time); +} + +#endif + static ERTS_INLINE int -check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) +check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int max_res, ErtsMonotonicTime timeout_time) { int res; - int timeout = do_wait ? -1 : 0; - DEBUG_PRINT_WAIT("Entering check_fd_events(), do_wait=%d", ps, do_wait); + int timeout; + DEBUG_PRINT_WAIT("Entering check_fd_events(), timeout=%d", ps, timeout_time); { #if ERTS_POLL_USE_EPOLL /* --- epoll ------------------------------- */ +#if ERTS_POLL_USE_TIMERFD + struct itimerspec its; + timeout = get_timeout_itimerspec(ps, &its, timeout_time); + if (timeout > 0) { + timerfd_set(ps, &its); + res = epoll_wait(ps->kp_fd, pr, max_res, -1); + res = timerfd_clear(ps, pr, res, max_res); + } else { + res = epoll_wait(ps->kp_fd, pr, max_res, timeout); + } +#else /* !ERTS_POLL_USE_TIMERFD */ + timeout = (int) get_timeout(ps, 1000, timeout_time); res = epoll_wait(ps->kp_fd, pr, max_res, timeout); - +#endif /* !ERTS_POLL_USE_TIMERFD */ #elif ERTS_POLL_USE_KQUEUE /* --- kqueue ------------------------------ */ - struct timespec ts = {0, 0}; - struct timespec *tsp = timeout ? NULL : &ts; + struct timespec ts; + struct timespec *tsp; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + tsp = timeout < 0 ? NULL : &ts; res = kevent(ps->kp_fd, NULL, 0, pr, max_res, tsp); #elif ERTS_POLL_USE_DEVPOLL /* --- devpoll ----------------------------- */ /* @@ -1601,16 +1820,22 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int nfds = (int) erts_atomic_read_nob(&ps->no_of_user_fds) + 1 /* wakeup pipe */; poll_res.dp_nfds = nfds < max_res ? nfds : max_res; poll_res.dp_fds = pr; - poll_res.dp_timeout = timeout; + poll_res.dp_timeout = (int) get_timeout(ps, 1000, timeout_time); res = ioctl(ps->kp_fd, DP_POLL, &poll_res); - +#elif ERTS_POLL_USE_POLL && defined(HAVE_PPOLL) /* --- ppoll ---------------- */ + struct timespec ts; + struct timespec *tsp = &ts; + timeout = get_timeout_timespec(ps, &ts, timeout_time); + if (timeout < 0) tsp = NULL; + res = ppoll(ps->poll_fds, ps->no_poll_fds, tsp, NULL); #elif ERTS_POLL_USE_POLL /* --- poll --------------------------------- */ - + timeout = (int) get_timeout(ps, 1000, timeout_time); res = poll(ps->poll_fds, ps->no_poll_fds, timeout); - #elif ERTS_POLL_USE_SELECT /* --- select ------------------------------ */ - SysTimeval tv = {0, 0}; - SysTimeval *tvp = timeout ? NULL : &tv; + SysTimeval tv; + SysTimeval *tvp; + timeout = get_timeout_timeval(ps, &tv, timeout_time); + tvp = timeout < 0 ? NULL : &tv; ERTS_FD_COPY(&ps->input_fds, &ps->res_input_fds); ERTS_FD_COPY(&ps->output_fds, &ps->res_output_fds); @@ -1629,7 +1854,9 @@ check_fd_events(ErtsPollSet *ps, ErtsPollResFd pr[], int do_wait, int max_res) int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time) { int res, no_fds, used_fds = 0; int ebadf = 0; @@ -1654,61 +1881,65 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, } #endif - do_wait = !is_woken(ps) && used_fds == 0; + do_wait = !is_woken(ps) && used_fds == 0 && timeout_time != ERTS_POLL_NO_TIMEOUT; DEBUG_PRINT_WAIT("Entering %s(), do_wait=%d", ps, __FUNCTION__, do_wait); if (do_wait) { - erts_thr_progress_prepare_wait(NULL); + tpd = tpd ? tpd : erts_thr_prgr_data(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); - } + } else + timeout_time = ERTS_POLL_NO_TIMEOUT; while (1) { - res = check_fd_events(ps, pr + used_fds, do_wait, no_fds - used_fds); + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, timeout_time); + if (res != 0) + break; + if (timeout_time == ERTS_POLL_NO_TIMEOUT) + break; + if (erts_get_monotonic_time(NULL) >= timeout_time) + break; + } #if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (res < 0 - && errno == EBADF - && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { - /* - * This may have happened because another thread deselected - * a fd in our poll set and then closed it, i.e. the driver - * behaved correctly. We wan't to avoid looking for a bad - * fd, that may even not exist anymore. Therefore, handle - * update requests and try again. This behaviour should only - * happen when using SELECT as the polling mechanism. - */ - ERTS_POLLSET_LOCK(ps); - used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); - if (used_fds == no_fds) { - *len = used_fds; - ERTS_POLLSET_UNLOCK(ps); - return 0; - } - res = check_fd_events(ps, pr + used_fds, 0, no_fds - used_fds); - /* Keep the lock over the non-blocking poll in order to not - get any nasty races happening. */ + if (res < 0 + && errno == EBADF + && ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) { + /* + * This may have happened because another thread deselected + * a fd in our poll set and then closed it, i.e. the driver + * behaved correctly. We wan't to avoid looking for a bad + * fd, that may even not exist anymore. Therefore, handle + * update requests and try again. This behaviour should only + * happen when using SELECT as the polling mechanism. + */ + ERTS_POLLSET_LOCK(ps); + used_fds += handle_update_requests(ps, pr + used_fds, no_fds - used_fds); + if (used_fds == no_fds) { + *len = used_fds; ERTS_POLLSET_UNLOCK(ps); - if (res == 0) { - errno = EAGAIN; - res = -1; - } + return 0; + } + res = check_fd_events(ps, pr + used_fds, no_fds - used_fds, ERTS_POLL_NO_TIMEOUT); + /* Keep the lock over the non-blocking poll in order to not + get any nasty races happening. */ + ERTS_POLLSET_UNLOCK(ps); + if (res == 0) { + errno = EAGAIN; + res = -1; } -#endif - - if (res != 0) - break; - if (!do_wait) - break; } +#endif if (do_wait) { - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_UPDATE_CACHE(); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_CHECK_IO); } - woke_up(ps); + if (ERTS_POLL_USE_WAKEUP(ps)) + woke_up(ps); if (res < 0) { #if ERTS_POLL_USE_SELECT @@ -1719,11 +1950,16 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, #endif res = errno; } - else { + else if (res == 0) { + res = used_fds == 0 ? ETIMEDOUT : 0; +#ifdef HARD_DEBUG + check_poll_result(pr, used_fds); +#endif + *len = used_fds; + } else { #if ERTS_POLL_USE_SELECT save_results: #endif - ps_locked = 1; ERTS_POLLSET_LOCK(ps); @@ -1753,12 +1989,13 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, void ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet *ps, int set) { -#if !ERTS_POLL_USE_CONCURRENT_UPDATE - if (!set) - reset_wakeup_state(ps); - else - wake_poller(ps, 1); -#endif + DEBUG_PRINT_WAIT("poll_interrupt(%d)", ps, set); + if (ERTS_POLL_USE_WAKEUP(ps)) { + if (!set) + reset_wakeup_state(ps); + else + wake_poller(ps, 1); + } } int @@ -1874,10 +2111,20 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(int id) if (ps->internal_fd_limit <= kp_fd) ps->internal_fd_limit = kp_fd + 1; ps->kp_fd = kp_fd; + if (ps->id == -1) + ps->oneshot = 0; + else + ps->oneshot = 1; #endif -#if !ERTS_POLL_USE_CONCURRENT_UPDATE + erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0); create_wakeup_pipe(ps); + +#if ERTS_POLL_USE_TIMERFD + create_timerfd(ps); +#endif + +#if !ERTS_POLL_USE_CONCURRENT_UPDATE handle_update_requests(ps, NULL, 0); cleanup_wakeup_pipe(ps); #endif @@ -1992,9 +2239,7 @@ ERTS_POLL_EXPORT(erts_poll_info)(ErtsPollSet *ps, ErtsPollInfo *pip) pip->memory_size = size; pip->poll_set_size = (int) erts_atomic_read_nob(&ps->no_of_user_fds); -#if !ERTS_POLL_USE_CONCURRENT_UPDATE pip->poll_set_size++; /* Wakeup pipe */ -#endif pip->lazy_updates = #if !ERTS_POLL_USE_CONCURRENT_UPDATE @@ -2177,6 +2422,12 @@ ERTS_POLL_EXPORT(erts_poll_get_selected_events)(ErtsPollSet *ps, ASSERT(0); return; } + if (fd == ps->wake_fds[0] || fd == ps->wake_fds[1]) + continue; +#if ERTS_POLL_USE_TIMERFD + if (fd == ps->timer_fd) + continue; +#endif data &= 0xFFFFFFFF; ASSERT(fd == data); /* Events are the events that are being monitored, which of course include diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h index e1cea7eb8b..d40dabc529 100644 --- a/erts/emulator/sys/common/erl_poll.h +++ b/erts/emulator/sys/common/erl_poll.h @@ -51,6 +51,7 @@ #include "sys.h" #define ERTS_POLL_NO_TIMEOUT ERTS_MONOTONIC_TIME_MIN +#define ERTS_POLL_INF_TIMEOUT ERTS_MONOTONIC_TIME_MAX #ifdef ERTS_ENABLE_KERNEL_POLL # undef ERTS_ENABLE_KERNEL_POLL @@ -130,6 +131,9 @@ #endif #define ERTS_POLL_USE_FALLBACK (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_USE_SCHEDULER_POLLING (ERTS_POLL_USE_KQUEUE || ERTS_POLL_USE_EPOLL) +#define ERTS_POLL_SCHEDULER_POLLING_TIMEOUT 10 +#define ERTS_POLL_USE_TIMERFD 0 typedef Uint32 ErtsPollEvents; @@ -156,6 +160,14 @@ typedef enum { #include <sys/epoll.h> +#if ERTS_POLL_USE_EPOLL +#ifdef HAVE_SYS_TIMERFD_H +#include <sys/timerfd.h> +#undef ERTS_POLL_USE_TIMERFD +#define ERTS_POLL_USE_TIMERFD 1 +#endif +#endif + #define ERTS_POLL_EV_E2N(EV) \ ((uint32_t) (EV)) #define ERTS_POLL_EV_N2E(EV) \ @@ -276,7 +288,7 @@ typedef struct _ErtsPollResFd { #endif -#define ERTS_POLL_EV_NONE (UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR)) +#define ERTS_POLL_EV_NONE ERTS_POLL_EV_N2E((UINT_MAX & ~(ERTS_POLL_EV_IN|ERTS_POLL_EV_OUT|ERTS_POLL_EV_NVAL|ERTS_POLL_EV_ERR))) #define ev2str(ev) \ (((ev) == 0 || (ev) == ERTS_POLL_EV_NONE) ? "NONE" : \ diff --git a/erts/emulator/sys/common/erl_poll_api.h b/erts/emulator/sys/common/erl_poll_api.h index 1170a549b9..f3a91e54f7 100644 --- a/erts/emulator/sys/common/erl_poll_api.h +++ b/erts/emulator/sys/common/erl_poll_api.h @@ -72,11 +72,15 @@ ErtsPollEvents ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet *ps, * @param res an array of fd results that the ready fds are put in. * @param[in] length the length of the res array * @param[out] length the number of ready events returned in res + * @param tpd the thread progress data to note sleep state in + * @param timeout_time the time in native to wake up at * @return 0 on success, else the ERRNO of the error that happened. */ int ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet *ps, ErtsPollResFd res[], - int *length); + int *length, + ErtsThrPrgrData *tpd, + ErtsMonotonicTime timeout_time); /** * Interrupt the thread waiting in the pollset. This function should be called * with set = 0 before any thread calls erts_poll_wait in order to clear any diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c index 39bb4d515e..3843a27a6e 100644 --- a/erts/emulator/sys/win32/erl_poll.c +++ b/erts/emulator/sys/win32/erl_poll.c @@ -1017,10 +1017,12 @@ ErtsPollEvents erts_poll_control(ErtsPollSet *ps, int erts_poll_wait(ErtsPollSet *ps, ErtsPollResFd pr[], - int *len) + int *len, + ErtsThrPrgrData *tpd, + Sint64 timeout_in) { int no_fds; - DWORD timeout = INFINITE; + DWORD timeout = timeout_in == -1 ? INFINITE : timeout_in; EventData* ev; int res = 0; int num = 0; @@ -1056,10 +1058,10 @@ int erts_poll_wait(ErtsPollSet *ps, HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout)); ERTS_POLLSET_UNLOCK(ps); - erts_thr_progress_prepare_wait(NULL); + erts_thr_progress_prepare_wait(tpd); ERTS_MSACC_SET_STATE_CACHED(ERTS_MSACC_STATE_SLEEP); handle = WaitForMultipleObjects(num_h, harr, FALSE, timeout); - erts_thr_progress_finalize_wait(NULL); + erts_thr_progress_finalize_wait(tpd); ERTS_MSACC_POP_STATE(); ERTS_POLLSET_LOCK(ps); HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout)); diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c index a1c630d68a..b95aadc9b2 100644 --- a/erts/emulator/sys/win32/sys.c +++ b/erts/emulator/sys/win32/sys.c @@ -186,7 +186,9 @@ void sys_primitive_init(HMODULE beam) UWord erts_sys_get_page_size(void) { - return (UWord) 4*1024; /* Guess 4 KB */ + SYSTEM_INFO info; + GetSystemInfo(&info); + return (UWord)info.dwPageSize; } Uint diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl index 0a42b09903..5b602dd4dc 100644 --- a/erts/emulator/test/big_SUITE.erl +++ b/erts/emulator/test/big_SUITE.erl @@ -168,7 +168,11 @@ eval({op,_,Op,A0,B0}, LFH) -> Res = eval_op(Op, A, B), erlang:garbage_collect(), Res; -eval({integer,_,I}, _) -> I; +eval({integer,_,I}, _) -> + %% "Parasitic" ("symbiotic"?) test of squaring all numbers + %% found in the test data. + test_squaring(I), + I; eval({call,_,{atom,_,Local},Args0}, LFH) -> Args = eval_list(Args0, LFH), LFH(Local, Args). @@ -192,6 +196,18 @@ eval_op('bxor', A, B) -> A bxor B; eval_op('bsl', A, B) -> A bsl B; eval_op('bsr', A, B) -> A bsr B. +test_squaring(I) -> + %% Multiplying an integer by itself is specially optimized, so we + %% should take special care to test squaring. The optimization + %% will kick in when the two operands have the same address. + Sqr = I * I, + + %% This expression will be multiplied in the usual way, because + %% the the two operands for '*' are stored at different addresses. + Sqr = I * ((I + id(1)) - id(1)), + + ok. + %% Built in test functions fac(0) -> 1; diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl index 9ffb484eb4..94501dad84 100644 --- a/erts/emulator/test/driver_SUITE.erl +++ b/erts/emulator/test/driver_SUITE.erl @@ -1754,7 +1754,7 @@ smp_select0(Config) -> ProcFun = fun()-> io:format("Worker ~p starting\n",[self()]), Port = open_port({spawn, DrvName}, []), smp_select_loop(Port, 100000), - sleep(1000), % wait for driver to handle pending events + smp_select_done(Port), true = erlang:port_close(Port), Master ! {ok,self()}, io:format("Worker ~p finished\n",[self()]) @@ -1784,6 +1784,21 @@ smp_select_loop(Port, N) -> smp_select_loop(Port, N-1) end. +smp_select_done(Port) -> + case erlang:port_control(Port, ?CHKIO_SMP_SELECT, "done") of + "wait" -> + receive + {Port, done} -> + ok + after 10*1000 -> + %% Seems we have a lost ready_input event. + %% Go ahead anyway, port will crash VM when closed. + ok + end; + + "ok" -> ok + end. + smp_select_wait([], _) -> ok; smp_select_wait(Pids, TimeoutMsg) -> diff --git a/erts/emulator/test/driver_SUITE_data/chkio_drv.c b/erts/emulator/test/driver_SUITE_data/chkio_drv.c index ee8f28e8b1..b9ee155b4b 100644 --- a/erts/emulator/test/driver_SUITE_data/chkio_drv.c +++ b/erts/emulator/test/driver_SUITE_data/chkio_drv.c @@ -90,7 +90,7 @@ typedef struct chkio_smp_select { int next_read; int next_write; int first_write; - enum {Closed, Opened, Selected, Waiting} state; + enum {Closed, Opened, Selected, Waiting, WaitingDone} state; int wasSelected; unsigned rand_state; }ChkioSmpSelect; @@ -292,18 +292,20 @@ stop_steal_aux(ChkioDrvData *cddp) static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port) { switch (pip->state) { + case WaitingDone: case Waiting: { int word; - fprintf(stderr, "Closing pipe in state Waiting. Event lost?\n"); + fprintf(stderr, "Closing pipe in state Waiting*. Event lost?\r\n"); for (;;) { int bytes = read(pip->read_fd, &word, sizeof(word)); if (bytes != sizeof(word)) { if (bytes != 0) { - fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\n", bytes, errno); + fprintf(stderr, "Failed to read from pipe, bytes=%d, errno=%d\r\n", + bytes, errno); } break; } - fprintf(stderr, "Read from pipe: %d\n", word); + fprintf(stderr, "Read from pipe: %d\r\n", word); } abort(); } @@ -318,6 +320,8 @@ static void free_smp_select(ChkioSmpSelect* pip, ErlDrvPort port) close(pip->write_fd); pip->state = Closed; break; + case Closed: + break; } driver_free(pip); } @@ -383,6 +387,9 @@ chkio_drv_start(ErlDrvPort port, char *command) cddp->id = driver_mk_port(port); cddp->test = CHKIO_STOP; cddp->test_data = NULL; + + drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */ + return (ErlDrvData) cddp; #endif } @@ -526,7 +533,7 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) printf("Read event on uninitiated pipe %d\n", fd); abort(); } - if (pip->state != Selected && pip->state != Waiting) { + if (pip->state != Selected && pip->state != Waiting && pip->state != WaitingDone) { printf("Read event on pipe in strange state %d\n", pip->state); abort(); } @@ -536,9 +543,9 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) inPipe = (pip->next_write - pip->next_read); if (inPipe == 0) { bytes = read(pip->read_fd, &word, sizeof(word)); - printf("Unexpected empty pipe, expected %u -> %u, bytes=%d, word=%d, written=%d\n", - pip->next_read, pip->next_write-1, bytes, word, - (pip->next_write - pip->first_write)); + printf("Unexpected empty pipe: ptr=%p, fds=%d->%d, read bytes=%d, word=%d, written=%d\n", + pip, pip->write_fd, pip->read_fd, + bytes, word, (pip->next_write - pip->first_write)); /*abort(); Allow unexpected events as it's been seen to be triggered by epoll on Linux. Most of the time the unwanted events are filtered by @@ -564,7 +571,20 @@ chkio_drv_ready_input(ErlDrvData drv_data, ErlDrvEvent event) TRACEF(("Read %d from fd=%d\n", word, fd)); pip->next_read++; } - pip->state = Selected; /* not Waiting anymore */ + if (pip->state == WaitingDone) { + if (pip->next_write == pip->next_read) { + /* All data read, send {Port, done} */ + ErlDrvTermData spec[] = {ERL_DRV_PORT, driver_mk_port(cddp->port), + ERL_DRV_ATOM, driver_mk_atom("done"), + ERL_DRV_TUPLE, 2}; + erl_drv_output_term(driver_mk_port(cddp->port), + spec, sizeof(spec) / sizeof(spec[0])); + pip->state = Selected; + } + } + else { + pip->state = Selected; /* not Waiting anymore */ + } break; } case CHKIO_DRV_USE: @@ -962,6 +982,16 @@ chkio_drv_control(ErlDrvData drv_data, } case CHKIO_SMP_SELECT: { ChkioSmpSelect* pip = (ChkioSmpSelect*) cddp->test_data; + if (len == 4 && memcmp(buf, "done", 4) == 0) { + if (pip && pip->state == Waiting) { + pip->state = WaitingDone; + res_str = "wait"; + } + else + res_str = "ok"; + res_len = -1; + break; + } if (pip == NULL) { erl_drv_mutex_lock(smp_pipes_mtx); if (smp_pipes) { @@ -1014,7 +1044,6 @@ chkio_drv_control(ErlDrvData drv_data, if (pip->wasSelected && (op & 1)) { TRACEF(("%T: Close pipe [%d->%d]\n", cddp->id, pip->write_fd, pip->read_fd)); - drv_use_singleton.fd_stop_select = -2; /* disable stop_select asserts */ if (driver_select(cddp->port, (ErlDrvEvent)(ErlDrvSInt)pip->read_fd, DO_READ|ERL_DRV_USE, 0) || close(pip->write_fd)) { diff --git a/erts/emulator/test/scheduler_SUITE.erl b/erts/emulator/test/scheduler_SUITE.erl index f04efb9003..2e0dfa42f3 100644 --- a/erts/emulator/test/scheduler_SUITE.erl +++ b/erts/emulator/test/scheduler_SUITE.erl @@ -1450,26 +1450,29 @@ poll_threads(Config) when is_list(Config) -> {Conc, PollType, KP} = get_ioconfig(Config), {Sched, SchedOnln, _} = get_sstate(Config, ""), - [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), - [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), - - [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - if Conc -> - [5] = get_ionum(Config,"+IOt 5 +IOp 1"), - [3, 2] = get_ionum(Config,"+IOt 5 +IOp 2"), - [2, 2, 2, 2, 2] = get_ionum(Config,"+IOt 10 +IOPp 50"), + [1, 1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), - [2] = get_ionum(Config, "+S 2 +IOPt 100"), - [4] = get_ionum(Config, "+S 4 +IOPt 100"), - [4] = get_ionum(Config, "+S 4:2 +IOPt 100"), - [4, 4] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), + [5, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), + [3, 2, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), + [2, 2, 2, 2, 2, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), + + [2, 1] = get_ionum(Config, "+S 2 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4 +IOPt 100"), + [4, 1] = get_ionum(Config, "+S 4:2 +IOPt 100"), + [4, 4, 1] = get_ionum(Config, "+S 8 +IOPt 100 +IOPp 25"), fail = get_ionum(Config, "+IOt 1 +IOp 2"), ok; not Conc -> + [1, 1] = get_ionum(Config,"+IOt 2 +IOp 2"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 5"), + [1, 1] = get_ionum(Config, "+S 2 +IOPt 100 +IOPp 100"), + [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 1"), [1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 5 +IOp 2"), [1, 1, 1, 1, 1, 1, 1, 1, 1, 1] = get_ionum(Config,"+IOt 10 +IOPp 50"), diff --git a/erts/emulator/test/signal_SUITE.erl b/erts/emulator/test/signal_SUITE.erl index fab2f45f28..4e6baa9e0e 100644 --- a/erts/emulator/test/signal_SUITE.erl +++ b/erts/emulator/test/signal_SUITE.erl @@ -85,7 +85,7 @@ xm_sig_order_proc() -> receive may_not_reach -> exit(bad_signal_order); may_reach -> ok - after 0 -> ok + after 0 -> erlang:yield() end, xm_sig_order_proc(). |