diff options
author | Sverker Eriksson <[email protected]> | 2017-12-08 19:02:15 +0100 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2017-12-20 15:19:32 +0100 |
commit | a7f87e104e769cb7fed65076193ef0bc4c9f08fd (patch) | |
tree | 752e96976099443ab94f07258ef53a0aadf1a5b9 | |
parent | 97152092fd4e5fe827a4dac42f3b51ae634ba1ff (diff) | |
download | otp-a7f87e104e769cb7fed65076193ef0bc4c9f08fd.tar.gz otp-a7f87e104e769cb7fed65076193ef0bc4c9f08fd.tar.bz2 otp-a7f87e104e769cb7fed65076193ef0bc4c9f08fd.zip |
erts: Improve carrier pool search
* Give back carrier to owner when put in pool with use of dd-queue.
* Replace pooled_list with pooled_tree for more efficient search
of all owned pooled carriers.
* Remove traitor_list as it does not serve much purpose anymore.
* Add HOMECOMING bit flag in crr->allctr atomic to
(1) avoid double enqueue into dd-enqueue.
(2) trigger read barrier in get_used_allctr for newly received carriers.
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.c | 659 | ||||
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.h | 39 | ||||
-rw-r--r-- | erts/emulator/beam/erl_ao_firstfit_alloc.c | 126 | ||||
-rw-r--r-- | erts/emulator/beam/erl_ao_firstfit_alloc.h | 1 | ||||
-rw-r--r-- | erts/emulator/internal_doc/CarrierMigration.md | 138 |
5 files changed, 539 insertions, 424 deletions
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index f0597e778a..dbf27f1d2f 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -375,8 +375,10 @@ do { \ #define ERTS_CRR_ALCTR_FLG_IN_POOL (((erts_aint_t) 1) << 0) #define ERTS_CRR_ALCTR_FLG_BUSY (((erts_aint_t) 1) << 1) +#define ERTS_CRR_ALCTR_FLG_HOMECOMING (((erts_aint_t) 1) << 2) #define ERTS_CRR_ALCTR_FLG_MASK (ERTS_CRR_ALCTR_FLG_IN_POOL | \ - ERTS_CRR_ALCTR_FLG_BUSY) + ERTS_CRR_ALCTR_FLG_BUSY | \ + ERTS_CRR_ALCTR_FLG_HOMECOMING) #ifdef ERTS_SMP #define SBC_HEADER_SIZE \ @@ -583,7 +585,7 @@ do { \ DEBUG_CHECK_CARRIER_NO_SZ((AP)); \ } while (0) -#define STAT_MBC_CPOOL_INSERT(AP, CRR) \ +#define STAT_MBC_ABANDON(AP, CRR) \ do { \ UWord csz__ = CARRIER_SZ((CRR)); \ if (IS_MSEG_CARRIER((CRR))) \ @@ -1143,90 +1145,25 @@ unlink_carrier(CarrierList_t *cl, Carrier_t *crr) ASSERT(crr->next); crr->next->prev = crr->prev; } -} - -#ifdef ERTS_SMP - #ifdef DEBUG -static int is_in_list(ErtsDoubleLink_t* sentinel, ErtsDoubleLink_t* node) -{ - ErtsDoubleLink_t* p; - - ASSERT(node != sentinel); - for (p = sentinel->next; p != sentinel; p = p->next) { - if (p == node) - return 1; - } - return 0; -} -#endif /* DEBUG */ - -static ERTS_INLINE void -link_edl_after(ErtsDoubleLink_t* after_me, ErtsDoubleLink_t* node) -{ - ErtsDoubleLink_t* before_me = after_me->next; - ASSERT(node != after_me && node != before_me); - node->next = before_me; - node->prev = after_me; - before_me->prev = node; - after_me->next = node; -} - -static ERTS_INLINE void -link_edl_before(ErtsDoubleLink_t* before_me, ErtsDoubleLink_t* node) -{ - ErtsDoubleLink_t* after_me = before_me->prev; - ASSERT(node != before_me && node != after_me); - node->next = before_me; - node->prev = after_me; - before_me->prev = node; - after_me->next = node; -} - -static ERTS_INLINE void -unlink_edl(ErtsDoubleLink_t* node) -{ - node->next->prev = node->prev; - node->prev->next = node->next; + crr->next = crr; + crr->prev = crr; +#endif } -static ERTS_INLINE void -relink_edl_before(ErtsDoubleLink_t* before_me, ErtsDoubleLink_t* node) -{ - if (node != before_me && node != before_me->prev) { - unlink_edl(node); - link_edl_before(before_me, node); - } -} +#ifdef ERTS_SMP static ERTS_INLINE int is_abandoned(Carrier_t *crr) { - return crr->cpool.abandoned.next != NULL; -} - -static ERTS_INLINE void -link_abandoned_carrier(ErtsDoubleLink_t* list, Carrier_t *crr) -{ - ASSERT(!is_abandoned(crr)); - - link_edl_after(list, &crr->cpool.abandoned); - - ASSERT(crr->cpool.abandoned.next != &crr->cpool.abandoned); - ASSERT(crr->cpool.abandoned.prev != &crr->cpool.abandoned); + return crr->cpool.state != ERTS_MBC_IS_HOME; } static ERTS_INLINE void unlink_abandoned_carrier(Carrier_t *crr) { - ASSERT(is_in_list(&crr->cpool.orig_allctr->cpool.pooled_list, - &crr->cpool.abandoned) || - is_in_list(&crr->cpool.orig_allctr->cpool.traitor_list, - &crr->cpool.abandoned)); - - unlink_edl(&crr->cpool.abandoned); - - crr->cpool.abandoned.next = NULL; - crr->cpool.abandoned.prev = NULL; + if (crr->cpool.state == ERTS_MBC_WAS_POOLED) { + aoff_remove_pooled_mbc(crr->cpool.orig_allctr, crr); + } } static ERTS_INLINE void @@ -1234,24 +1171,19 @@ clear_busy_pool_carrier(Allctr_t *allctr, Carrier_t *crr) { if (crr) { erts_aint_t max_size; - erts_aint_t new_val; + erts_aint_t iallctr; max_size = (erts_aint_t) allctr->largest_fblk_in_mbc(allctr, crr); erts_atomic_set_nob(&crr->cpool.max_size, max_size); - new_val = (((erts_aint_t) allctr)|ERTS_CRR_ALCTR_FLG_IN_POOL); - -#ifdef ERTS_ALC_CPOOL_DEBUG - { - erts_aint_t old_val = new_val|ERTS_CRR_ALCTR_FLG_BUSY; + iallctr = erts_smp_atomic_read_nob(&crr->allctr); + ERTS_ALC_CPOOL_ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) + == ((erts_aint_t)allctr | + ERTS_CRR_ALCTR_FLG_IN_POOL | + ERTS_CRR_ALCTR_FLG_BUSY)); - ERTS_ALC_CPOOL_ASSERT(old_val - == erts_smp_atomic_xchg_relb(&crr->allctr, - new_val)); - } -#else - erts_smp_atomic_set_relb(&crr->allctr, new_val); -#endif + iallctr &= ~ERTS_CRR_ALCTR_FLG_BUSY; + erts_smp_atomic_set_relb(&crr->allctr, iallctr); } } @@ -1667,6 +1599,11 @@ dealloc_mbc(Allctr_t *allctr, Carrier_t *crr) #ifdef ERTS_SMP +static void set_new_allctr_abandon_limit(Allctr_t*); +static void abandon_carrier(Allctr_t*, Carrier_t*); +static void poolify_my_carrier(Allctr_t*, Carrier_t*); +static void enqueue_homecoming(Allctr_t*, Carrier_t*); + static ERTS_INLINE Allctr_t* get_pref_allctr(void *extra) { @@ -1733,9 +1670,23 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, erts_aint_t act; ERTS_ALC_CPOOL_ASSERT(!(iallctr & ERTS_CRR_ALCTR_FLG_BUSY)); - act = erts_smp_atomic_cmpxchg_ddrb(&crr->allctr, - iallctr|ERTS_CRR_ALCTR_FLG_BUSY, - iallctr); + if (iallctr & ERTS_CRR_ALCTR_FLG_HOMECOMING) { + /* + * This carrier has just been given back to us by writing + * to crr->allctr with a write barrier (see abandon_carrier). + * + * We need a mathing read barrier to guarantee a correct view + * of the carrier for deallocation work. + */ + act = erts_smp_atomic_cmpxchg_rb(&crr->allctr, + iallctr|ERTS_CRR_ALCTR_FLG_BUSY, + iallctr); + } + else { + act = erts_smp_atomic_cmpxchg_ddrb(&crr->allctr, + iallctr|ERTS_CRR_ALCTR_FLG_BUSY, + iallctr); + } if (act == iallctr) { *busy_pcrr_pp = crr; break; @@ -1751,13 +1702,6 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, erts_mtx_unlock(&pref_allctr->mutex); } } - - ERTS_ALC_CPOOL_ASSERT( - (((iallctr & ~ERTS_CRR_ALCTR_FLG_MASK) == (erts_aint_t) pref_allctr) - ? (((iallctr & ERTS_CRR_ALCTR_FLG_MASK) == ERTS_CRR_ALCTR_FLG_IN_POOL) - || ((iallctr & ERTS_CRR_ALCTR_FLG_MASK) == 0)) - : 1)); - return used_allctr; } } @@ -2009,9 +1953,9 @@ handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr) /* Carrier migrated; need to redirect block to new owner... */ int cinit = used_allctr->dd.ix - allctr->dd.ix; - ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); + ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); - DEC_CC(allctr->calls.this_free); + DEC_CC(allctr->calls.this_free); ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type = type; if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit)) @@ -2020,8 +1964,9 @@ handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr) } } -static void -schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr); +static void schedule_dealloc_carrier(Allctr_t*, Carrier_t*); +static void dealloc_my_carrier(Allctr_t*, Carrier_t*); + static ERTS_INLINE int handle_delayed_dealloc(Allctr_t *allctr, @@ -2086,24 +2031,58 @@ handle_delayed_dealloc(Allctr_t *allctr, if (blk->bhdr == HOMECOMING_MBC_BLK_HDR) { /* * A multiblock carrier that previously has been migrated away - * from us and now is back to be deallocated. For more info - * see schedule_dealloc_carrier(). + * from us, was sent back to us either because + * - it became empty and we need to deallocated it, or + * - it was inserted into the pool and we need to update our pooled_tree */ Carrier_t *crr = ErtsContainerStruct(blk, Carrier_t, cpool.homecoming_dd.blk); + Block_t* first_blk = MBC_TO_FIRST_BLK(allctr, crr); + erts_aint_t iallctr; ERTS_ALC_CPOOL_ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(allctr)); ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); - ERTS_ALC_CPOOL_ASSERT(((erts_aint_t) allctr) - != (erts_smp_atomic_read_nob(&crr->allctr) - & ~ERTS_CRR_ALCTR_FLG_MASK)); - erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr)); - - schedule_dealloc_carrier(allctr, crr); + iallctr = erts_smp_atomic_read_nob(&crr->allctr); + ASSERT(iallctr & ERTS_CRR_ALCTR_FLG_HOMECOMING); + while (1) { + if ((iallctr & (~ERTS_CRR_ALCTR_FLG_MASK | + ERTS_CRR_ALCTR_FLG_IN_POOL)) + == (erts_aint_t)allctr) { + /* + * Carrier is home (mine and not in pool) + */ + ASSERT(!(iallctr & ERTS_CRR_ALCTR_FLG_BUSY)); + erts_smp_atomic_set_nob(&crr->allctr, (erts_aint_t)allctr); + if (IS_FREE_LAST_MBC_BLK(first_blk)) + dealloc_my_carrier(allctr, crr); + else + ASSERT(crr->cpool.state == ERTS_MBC_IS_HOME); + } + else { + erts_aint_t exp = iallctr; + erts_aint_t want = iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING; + + iallctr = erts_smp_atomic_cmpxchg_nob(&crr->allctr, + want, + exp); + if (iallctr != exp) + continue; /* retry */ + + ASSERT(crr->cpool.state != ERTS_MBC_IS_HOME); + unlink_abandoned_carrier(crr); + if (iallctr & ERTS_CRR_ALCTR_FLG_IN_POOL) + poolify_my_carrier(allctr, crr); + else + crr->cpool.state = ERTS_MBC_WAS_TRAITOR; + } + break; + } } else { - ASSERT(IS_SBC_BLK(blk) || !IS_FREE_BLK(blk)); + ASSERT(IS_SBC_BLK(blk) || (ABLK_TO_MBC(blk) != + ErtsContainerStruct(blk, Carrier_t, + cpool.homecoming_dd.blk))); INC_CC(allctr->calls.this_free); @@ -2145,23 +2124,26 @@ enqueue_dealloc_other_instance(ErtsAlcType_t type, erts_alloc_notify_delayed_dealloc(allctr->ix); } -#endif - -#ifdef ERTS_SMP -static void -set_new_allctr_abandon_limit(Allctr_t *allctr); -static void -abandon_carrier(Allctr_t *allctr, Carrier_t *crr); - +static ERTS_INLINE void +update_pooled_tree(Allctr_t *allctr, Carrier_t *crr, Uint blk_sz) +{ + if (allctr == crr->cpool.orig_allctr && crr->cpool.state == ERTS_MBC_WAS_POOLED) { + /* + * Update pooled_tree with a potentially new (larger) max_sz + */ + AOFF_RBTree_t* crr_node = &crr->cpool.pooled; + if (blk_sz > crr_node->hdr.bhdr) { + crr_node->hdr.bhdr = blk_sz; + erts_aoff_larger_max_size(crr_node); + } + } +} static ERTS_INLINE void check_abandon_carrier(Allctr_t *allctr, Block_t *fblk, Carrier_t **busy_pcrr_pp) { Carrier_t *crr; - if (busy_pcrr_pp && *busy_pcrr_pp) - return; - if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr)) return; @@ -2243,6 +2225,7 @@ dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_ else { Carrier_t *busy_pcrr_p; Allctr_t *used_allctr; + used_allctr = get_used_allctr(allctr, ERTS_ALC_TS_PREF_LOCK_NO, ptr, NULL, &busy_pcrr_p); if (used_allctr == allctr) { @@ -2259,10 +2242,10 @@ dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_ /* Carrier migrated; need to redirect block to new owner... */ int cinit = used_allctr->dd.ix - allctr->dd.ix; - ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); + ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); - if (dec_cc_on_redirect) - DEC_CC(allctr->calls.this_free); + if (dec_cc_on_redirect) + DEC_CC(allctr->calls.this_free); if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(used_allctr->ix); } @@ -2507,16 +2490,17 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) ASSERT(blk_sz % sizeof(Unit_t) == 0); ASSERT(IS_MBC_BLK(blk)); - if (is_first_blk - && is_last_blk - && allctr->main_carrier != FIRST_BLK_TO_MBC(allctr, blk)) { - destroy_carrier(allctr, blk, busy_pcrr_pp); + if (is_first_blk && is_last_blk && crr != allctr->main_carrier) { + destroy_carrier(allctr, blk, busy_pcrr_pp); } else { (*allctr->link_free_block)(allctr, blk); HARD_CHECK_BLK_CARRIER(allctr, blk); #ifdef ERTS_SMP - check_abandon_carrier(allctr, blk, busy_pcrr_pp); + if (busy_pcrr_pp && *busy_pcrr_pp) + update_pooled_tree(allctr, crr, blk_sz); + else + check_abandon_carrier(allctr, blk, busy_pcrr_pp); #endif } } @@ -2552,8 +2536,19 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, #else /* !MBC_REALLOC_ALWAYS_MOVES */ #ifdef ERTS_SMP - if (busy_pcrr_pp && *busy_pcrr_pp) - goto realloc_move; /* Don't want to use carrier in pool */ + if (busy_pcrr_pp && *busy_pcrr_pp) { + /* + * Don't want to use carrier in pool + */ + new_p = mbc_alloc(allctr, size); + if (!new_p) + return NULL; + new_blk = UMEM2BLK(new_p); + ASSERT(!(IS_MBC_BLK(new_blk) && ABLK_TO_MBC(new_blk) == *busy_pcrr_pp)); + sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ)); + mbc_free(allctr, p, busy_pcrr_pp); + return new_p; + } #endif get_blk_sz = blk_sz = UMEMSZ2BLKSZ(allctr, size); @@ -2789,9 +2784,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, if (cand_blk_sz < get_blk_sz) { /* We wont fit in cand_blk get a new one */ -#ifdef ERTS_SMP - realloc_move: -#endif + #endif /* !MBC_REALLOC_ALWAYS_MOVES */ new_p = mbc_alloc(allctr, size); @@ -2896,8 +2889,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, #ifdef ERTS_SMP #define ERTS_ALC_MAX_DEALLOC_CARRIER 10 -#define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT 20 -#define ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT 10 +#define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT 100 #define ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT 100 #define ERTS_ALC_CPOOL_MAX_FAILED_STAT_READS 3 @@ -3061,19 +3053,18 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr) ErtsAlcCPoolData_t *cpd1p, *cpd2p; erts_aint_t val; ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel; + Allctr_t *orig_allctr = crr->cpool.orig_allctr; ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */ || erts_thr_progress_is_managed_thread()); - ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_read_nob(&crr->allctr) - == (erts_aint_t) allctr); - erts_atomic_add_nob(&allctr->cpool.stat.blocks_size, + erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size, (erts_aint_t) crr->cpool.blocks_size); - erts_atomic_add_nob(&allctr->cpool.stat.no_blocks, + erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks, (erts_aint_t) crr->cpool.blocks); - erts_atomic_add_nob(&allctr->cpool.stat.carriers_size, + erts_atomic_add_nob(&orig_allctr->cpool.stat.carriers_size, (erts_aint_t) CARRIER_SZ(crr)); - erts_atomic_inc_nob(&allctr->cpool.stat.no_carriers); + erts_atomic_inc_nob(&orig_allctr->cpool.stat.no_carriers); /* * We search in 'next' direction and begin by passing @@ -3134,8 +3125,6 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr) (erts_aint_t) &crr->cpool, (erts_aint_t) cpd1p); - erts_smp_atomic_set_wb(&crr->allctr, - ((erts_aint_t) allctr)|ERTS_CRR_ALCTR_FLG_IN_POOL); LTTNG3(carrier_pool_put, ERTS_ALC_A2AD(allctr->alloc_no), allctr->ix, CARRIER_SZ(crr)); } @@ -3237,130 +3226,119 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr) static Carrier_t * cpool_fetch(Allctr_t *allctr, UWord size) { - int i, i_stop, has_passed_sentinel; + enum { IGNORANT, HAS_SEEN_SENTINEL, THE_LAST_ONE } loop_state; + int i; Carrier_t *crr; + Carrier_t *reinsert_crr = NULL; ErtsAlcCPoolData_t *cpdp; - ErtsAlcCPoolData_t *cpool_entrance; + ErtsAlcCPoolData_t *cpool_entrance = NULL; ErtsAlcCPoolData_t *sentinel; - ErtsDoubleLink_t* dl; - ErtsDoubleLink_t* first_old_traitor; ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */ || erts_thr_progress_is_managed_thread()); i = ERTS_ALC_CPOOL_MAX_FETCH_INSPECT; - first_old_traitor = allctr->cpool.traitor_list.next; - cpool_entrance = NULL; LTTNG3(carrier_pool_get, ERTS_ALC_A2AD(allctr->alloc_no), allctr->ix, (unsigned long)size); /* - * Search my own pooled_list, + * Search my own pooled_tree, * i.e my abandoned carriers that were in the pool last time I checked. */ + do { + erts_aint_t exp, act; + + crr = aoff_lookup_pooled_mbc(allctr, size); + if (!crr) + break; + + ASSERT(crr->cpool.state == ERTS_MBC_WAS_POOLED); + ASSERT(crr->cpool.orig_allctr == allctr); + + aoff_remove_pooled_mbc(allctr, crr); + + exp = erts_smp_atomic_read_nob(&crr->allctr); + if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { + ASSERT((exp & ~ERTS_CRR_ALCTR_FLG_MASK) == (erts_aint_t)allctr); + if (erts_atomic_read_nob(&crr->cpool.max_size) < size) { + /* + * This carrier has been fetched and inserted back again + * by a foreign allocator. That's why it has a stale search size. + */ + ASSERT(exp & ERTS_CRR_ALCTR_FLG_HOMECOMING); + crr->cpool.pooled.hdr.bhdr = erts_atomic_read_nob(&crr->cpool.max_size); + aoff_add_pooled_mbc(allctr, crr); + continue; + } + else if (exp & ERTS_CRR_ALCTR_FLG_BUSY) { + /* + * This must be our own carrier as part of a realloc call. + * Skip it to make things simpler. + * Must wait to re-insert to not be found again by lookup. + */ + ASSERT(!reinsert_crr); + reinsert_crr = crr; + continue; + } + + /* Try to fetch it... */ + act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, + exp & ~ERTS_CRR_ALCTR_FLG_IN_POOL, + exp); + if (act == exp) { + cpool_delete(allctr, allctr, crr); + crr->cpool.state = ERTS_MBC_IS_HOME; + + if (reinsert_crr) + aoff_add_pooled_mbc(allctr, reinsert_crr); + return crr; + } + exp = act; + } - dl = allctr->cpool.pooled_list.next; - while(dl != &allctr->cpool.pooled_list) { - erts_aint_t exp, act; - crr = (Carrier_t *) (((char *) dl) - offsetof(Carrier_t, cpool.abandoned)); + /* Not in pool anymore */ + ASSERT(!(exp & ERTS_CRR_ALCTR_FLG_BUSY)); + crr->cpool.state = ERTS_MBC_WAS_TRAITOR; - ASSERT(!is_in_list(&allctr->cpool.traitor_list, dl)); - ASSERT(crr->cpool.orig_allctr == allctr); - dl = dl->next; - exp = erts_smp_atomic_read_rb(&crr->allctr); - if ((exp & ERTS_CRR_ALCTR_FLG_MASK) == ERTS_CRR_ALCTR_FLG_IN_POOL - && erts_atomic_read_nob(&crr->cpool.max_size) >= size) { - /* Try to fetch it... */ - act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, - (erts_aint_t) allctr, - exp); - if (act == exp) { - cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr); - unlink_abandoned_carrier(crr); + }while (--i > 0); - /* Move sentinel to continue next search from here */ - relink_edl_before(dl, &allctr->cpool.pooled_list); - return crr; - } - exp = act; - } - if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { - if (!cpool_entrance) - cpool_entrance = &crr->cpool; - } - else { /* Not in pool, move to traitor_list */ - unlink_abandoned_carrier(crr); - link_abandoned_carrier(&allctr->cpool.traitor_list, crr); - } - if (--i <= 0) { - /* Move sentinel to continue next search from here */ - relink_edl_before(dl, &allctr->cpool.pooled_list); - return NULL; - } - } + if (reinsert_crr) + aoff_add_pooled_mbc(allctr, reinsert_crr); - /* Now search traitor_list. - * i.e carriers employed by other allocators last time I checked. - * They might have been abandoned since then. + /* + * Try find a nice cpool_entrance */ + while (allctr->cpool.pooled_tree) { + erts_aint_t iallctr; + + crr = ErtsContainerStruct(allctr->cpool.pooled_tree, Carrier_t, cpool.pooled); + iallctr = erts_smp_atomic_read_nob(&crr->allctr); + if (iallctr & ERTS_CRR_ALCTR_FLG_IN_POOL) { + cpool_entrance = &crr->cpool; + break; + } + /* Not in pool anymore */ + ASSERT(!(iallctr & ERTS_CRR_ALCTR_FLG_BUSY)); + aoff_remove_pooled_mbc(allctr, crr); + crr->cpool.state = ERTS_MBC_WAS_TRAITOR; - i_stop = (i < ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT ? - 0 : i - ERTS_ALC_CPOOL_MAX_TRAITOR_INSPECT); - dl = first_old_traitor; - while(dl != &allctr->cpool.traitor_list) { - erts_aint_t exp, act; - crr = (Carrier_t *) (((char *) dl) - offsetof(Carrier_t, cpool.abandoned)); - ASSERT(dl != &allctr->cpool.pooled_list); - ASSERT(crr->cpool.orig_allctr == allctr); - dl = dl->next; - exp = erts_smp_atomic_read_rb(&crr->allctr); - if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { - if (!(exp & ERTS_CRR_ALCTR_FLG_BUSY) - && erts_atomic_read_nob(&crr->cpool.max_size) >= size) { - /* Try to fetch it... */ - act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, - (erts_aint_t) allctr, - exp); - if (act == exp) { - cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr); - unlink_abandoned_carrier(crr); - - /* Move sentinel to continue next search from here */ - relink_edl_before(dl, &allctr->cpool.traitor_list); - return crr; - } - exp = act; - } - if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { - if (!cpool_entrance) - cpool_entrance = &crr->cpool; - - /* Move to pooled_list */ - unlink_abandoned_carrier(crr); - link_abandoned_carrier(&allctr->cpool.pooled_list, crr); - } - } - if (--i <= i_stop) { - /* Move sentinel to continue next search from here */ - relink_edl_before(dl, &allctr->cpool.traitor_list); - if (i > 0) - break; - else - return NULL; - } + if (--i <= 0) + return NULL; } + /* * Finally search the shared pool and try employ foreign carriers */ - sentinel = &carrier_pool[allctr->alloc_no].sentinel; if (cpool_entrance) { - /* We saw a pooled carried above, use it as entrance into the pool + /* + * We saw a pooled carried above, use it as entrance into the pool */ cpdp = cpool_entrance; } else { - /* No pooled carried seen above. Start search at cpool sentinel, + /* + * No pooled carried seen above. Start search at cpool sentinel, * but begin by passing one element before trying to fetch. * This in order to avoid contention with threads inserting elements. */ @@ -3370,8 +3348,8 @@ cpool_fetch(Allctr_t *allctr, UWord size) goto check_dc_list; } - has_passed_sentinel = 0; - while (1) { + loop_state = IGNORANT; + do { erts_aint_t exp; cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev)); if (cpdp == cpool_entrance) { @@ -3380,38 +3358,41 @@ cpool_fetch(Allctr_t *allctr, UWord size) if (cpdp == sentinel) break; } - i = 0; /* Last one to inspect */ + loop_state = THE_LAST_ONE; } else if (cpdp == sentinel) { - if (has_passed_sentinel) { + if (loop_state == HAS_SEEN_SENTINEL) { /* We been here before. cpool_entrance must have been removed */ break; } cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev)); if (cpdp == sentinel) break; - has_passed_sentinel = 1; + loop_state = HAS_SEEN_SENTINEL; } - crr = (Carrier_t *)(((char *)cpdp) - offsetof(Carrier_t, cpool)); + crr = ErtsContainerStruct(cpdp, Carrier_t, cpool); exp = erts_smp_atomic_read_rb(&crr->allctr); - if (((exp & (ERTS_CRR_ALCTR_FLG_MASK)) == ERTS_CRR_ALCTR_FLG_IN_POOL) - && (erts_atomic_read_nob(&cpdp->max_size) >= size)) { + if (((exp & (ERTS_CRR_ALCTR_FLG_IN_POOL | + ERTS_CRR_ALCTR_FLG_BUSY)) == ERTS_CRR_ALCTR_FLG_IN_POOL) + && (erts_atomic_read_nob(&cpdp->max_size) >= size)) { erts_aint_t act; - /* Try to fetch it... */ - act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, - (erts_aint_t) allctr, - exp); + erts_aint_t want = (((erts_aint_t) allctr) + | (exp & ERTS_CRR_ALCTR_FLG_HOMECOMING)); + /* Try to fetch it... */ + act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, want, exp); if (act == exp) { cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr); if (crr->cpool.orig_allctr == allctr) { unlink_abandoned_carrier(crr); - } + crr->cpool.state = ERTS_MBC_IS_HOME; + } return crr; } } + if (--i <= 0) return NULL; - } + }while (loop_state != THE_LAST_ONE); check_dc_list: /* Last; check our own pending dealloc carrier list... */ @@ -3420,13 +3401,8 @@ check_dc_list: if (erts_atomic_read_nob(&crr->cpool.max_size) >= size) { Block_t* blk; unlink_carrier(&allctr->cpool.dc_list, crr); -#ifdef ERTS_ALC_CPOOL_DEBUG - ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_xchg_nob(&crr->allctr, - ((erts_aint_t) allctr)) - == (((erts_aint_t) allctr) & ~ERTS_CRR_ALCTR_FLG_MASK)); -#else - erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr)); -#endif + ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_read_nob(&crr->allctr) + == ((erts_aint_t) allctr)); blk = MBC_TO_FIRST_BLK(allctr, crr); ASSERT(FBLK_TO_MBC(blk) == crr); allctr->link_free_block(allctr, blk); @@ -3491,9 +3467,6 @@ static void schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr) { Allctr_t *orig_allctr; - Block_t *blk; - int check_pending_dealloc; - erts_aint_t max_size; ASSERT(IS_MB_CARRIER(crr)); @@ -3504,9 +3477,17 @@ schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr) orig_allctr = crr->cpool.orig_allctr; - if (allctr != orig_allctr) { - int cinit = orig_allctr->dd.ix - allctr->dd.ix; - + if (allctr == orig_allctr) { + if (!(erts_smp_atomic_read_nob(&crr->allctr) & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { + dealloc_my_carrier(allctr, crr); + } + /*else + * Carrier was abandoned earlier by other thread and + * is still waiting for us in dd-queue. + * handle_delayed_dealloc() will handle it when crr is dequeued. + */ + } + else { /* * We send the carrier to its origin for deallocation. * This in order: @@ -3515,31 +3496,39 @@ schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr) * - to ensure that we always only reuse empty carriers * originating from our own thread specific mseg_alloc * instance which is beneficial on NUMA systems. - * - * The receiver will recognize that this is a carrier to - * deallocate (and not a block which is the common case) - * since the block is an mbc block that is free and last - * in the carrier. */ - blk = MBC_TO_FIRST_BLK(allctr, crr); - ERTS_ALC_CPOOL_ASSERT(IS_FREE_LAST_MBC_BLK(blk)); - - ERTS_ALC_CPOOL_ASSERT(IS_MBC_FIRST_ABLK(allctr, blk)); - ERTS_ALC_CPOOL_ASSERT(crr == FBLK_TO_MBC(blk)); - ERTS_ALC_CPOOL_ASSERT(crr == FIRST_BLK_TO_MBC(allctr, blk)); - ERTS_ALC_CPOOL_ASSERT(((erts_aint_t) allctr) - == (erts_smp_atomic_read_nob(&crr->allctr) - & ~ERTS_CRR_ALCTR_FLG_MASK)); - - blk = &crr->cpool.homecoming_dd.blk; - ERTS_ALC_CPOOL_ASSERT(blk->bhdr == HOMECOMING_MBC_BLK_HDR); - if (ddq_enqueue(&orig_allctr->dd.q, BLK2UMEM(blk), cinit)) - erts_alloc_notify_delayed_dealloc(orig_allctr->ix); - return; + erts_aint_t iallctr; +#ifdef ERTS_ALC_CPOOL_DEBUG + Block_t* first_blk = MBC_TO_FIRST_BLK(allctr, crr); + ERTS_ALC_CPOOL_ASSERT(IS_FREE_LAST_MBC_BLK(first_blk)); + + ERTS_ALC_CPOOL_ASSERT(IS_MBC_FIRST_ABLK(allctr, first_blk)); + ERTS_ALC_CPOOL_ASSERT(crr == FBLK_TO_MBC(first_blk)); + ERTS_ALC_CPOOL_ASSERT(crr == FIRST_BLK_TO_MBC(allctr, first_blk)); + ERTS_ALC_CPOOL_ASSERT((erts_smp_atomic_read_nob(&crr->allctr) + & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) + == (erts_aint_t) allctr); +#endif + + iallctr = (erts_aint_t)orig_allctr | ERTS_CRR_ALCTR_FLG_HOMECOMING; + if (!(erts_smp_atomic_xchg_nob(&crr->allctr, iallctr) + & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { + enqueue_homecoming(allctr, crr); + } } +} - if (is_abandoned(crr)) - unlink_abandoned_carrier(crr); +static void dealloc_my_carrier(Allctr_t *allctr, Carrier_t *crr) +{ + Block_t *blk; + int check_pending_dealloc; + erts_aint_t max_size; + + ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); + if (is_abandoned(crr)) { + unlink_abandoned_carrier(crr); + crr->cpool.state = ERTS_MBC_IS_HOME; + } if (crr->cpool.thr_prgr == ERTS_THR_PRGR_INVALID || erts_thr_progress_has_reached(crr->cpool.thr_prgr)) { @@ -3590,8 +3579,7 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr) limit = (csz/100)*allctr->cpool.util_limit; crr->cpool.abandon_limit = limit; } - crr->cpool.abandoned.next = NULL; - crr->cpool.abandoned.prev = NULL; + crr->cpool.state = ERTS_MBC_IS_HOME; } static void @@ -3618,22 +3606,64 @@ static void abandon_carrier(Allctr_t *allctr, Carrier_t *crr) { erts_aint_t max_size; + erts_aint_t iallctr; - STAT_MBC_CPOOL_INSERT(allctr, crr); + STAT_MBC_ABANDON(allctr, crr); unlink_carrier(&allctr->mbc_list, crr); - if (crr->cpool.orig_allctr == allctr) { - link_abandoned_carrier(&allctr->cpool.pooled_list, crr); - } - allctr->remove_mbc(allctr, crr); + set_new_allctr_abandon_limit(allctr); max_size = (erts_aint_t) allctr->largest_fblk_in_mbc(allctr, crr); erts_atomic_set_nob(&crr->cpool.max_size, max_size); - cpool_insert(allctr, crr); - set_new_allctr_abandon_limit(allctr); + + iallctr = erts_smp_atomic_read_nob(&crr->allctr); + if (allctr == crr->cpool.orig_allctr) { + /* preserve HOMECOMING flag */ + ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == (erts_aint_t)allctr); + erts_smp_atomic_set_wb(&crr->allctr, iallctr | ERTS_CRR_ALCTR_FLG_IN_POOL); + poolify_my_carrier(allctr, crr); + } + else { + ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == (erts_aint_t)allctr); + iallctr = ((erts_aint_t)crr->cpool.orig_allctr | + ERTS_CRR_ALCTR_FLG_HOMECOMING | + ERTS_CRR_ALCTR_FLG_IN_POOL); + if (!(erts_smp_atomic_xchg_wb(&crr->allctr, iallctr) + & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { + + enqueue_homecoming(allctr, crr); + } + } +} + +static void +enqueue_homecoming(Allctr_t* allctr, Carrier_t* crr) +{ + Allctr_t* orig_allctr = crr->cpool.orig_allctr; + const int cinit = orig_allctr->dd.ix - allctr->dd.ix; + Block_t* dd_blk = &crr->cpool.homecoming_dd.blk; + + /* + * The receiver will recognize this as a carrier + * (and not a block which is the common case) + * since the block header is HOMECOMING_MBC_BLK_HDR. + */ + ASSERT(dd_blk->bhdr == HOMECOMING_MBC_BLK_HDR); + if (ddq_enqueue(&orig_allctr->dd.q, BLK2UMEM(dd_blk), cinit)) + erts_alloc_notify_delayed_dealloc(orig_allctr->ix); +} + +static void +poolify_my_carrier(Allctr_t *allctr, Carrier_t *crr) +{ + ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); + + crr->cpool.pooled.hdr.bhdr = erts_atomic_read_nob(&crr->cpool.max_size); + aoff_add_pooled_mbc(allctr, crr); + crr->cpool.state = ERTS_MBC_WAS_POOLED; } static void @@ -4153,16 +4183,21 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp) #ifdef ERTS_SMP if (busy_pcrr_pp && *busy_pcrr_pp) { + erts_aint_t iallctr = erts_smp_atomic_read_nob(&crr->allctr); ERTS_ALC_CPOOL_ASSERT(*busy_pcrr_pp == crr); - *busy_pcrr_pp = NULL; - ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_read_nob(&crr->allctr) - == (((erts_aint_t) allctr) - | ERTS_CRR_ALCTR_FLG_IN_POOL - | ERTS_CRR_ALCTR_FLG_BUSY)); - erts_smp_atomic_set_nob(&crr->allctr, ((erts_aint_t) allctr)); + ERTS_ALC_CPOOL_ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) + == (((erts_aint_t) allctr) + | ERTS_CRR_ALCTR_FLG_IN_POOL + | ERTS_CRR_ALCTR_FLG_BUSY)); + ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); + + *busy_pcrr_pp = NULL; + erts_smp_atomic_set_nob(&crr->allctr, + (iallctr & ~(ERTS_CRR_ALCTR_FLG_IN_POOL | + ERTS_CRR_ALCTR_FLG_BUSY))); cpool_delete(allctr, allctr, crr); } - else + else #endif { unlink_carrier(&allctr->mbc_list, crr); @@ -5565,12 +5600,13 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) pref_allctr = get_pref_allctr(extra); used_allctr = get_used_allctr(pref_allctr, ERTS_ALC_TS_PREF_LOCK_IF_USED, p, NULL, &busy_pcrr_p); - if (pref_allctr != used_allctr) + if (pref_allctr != used_allctr) { enqueue_dealloc_other_instance(type, - used_allctr, - p, - (used_allctr->dd.ix - - pref_allctr->dd.ix)); + used_allctr, + p, + (used_allctr->dd.ix + - pref_allctr->dd.ix)); + } else { ERTS_ALCU_DBG_CHK_THR_ACCESS(used_allctr); do_erts_alcu_free(type, used_allctr, p, &busy_pcrr_p); @@ -6040,10 +6076,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->min_block_size = sz; } - allctr->cpool.pooled_list.next = &allctr->cpool.pooled_list; - allctr->cpool.pooled_list.prev = &allctr->cpool.pooled_list; - allctr->cpool.traitor_list.next = &allctr->cpool.traitor_list; - allctr->cpool.traitor_list.prev = &allctr->cpool.traitor_list; + allctr->cpool.pooled_tree = NULL; allctr->cpool.dc_list.first = NULL; allctr->cpool.dc_list.last = NULL; allctr->cpool.abandon_limit = 0; diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h index 1291a6c10e..e5bf494e97 100644 --- a/erts/emulator/beam/erl_alloc_util.h +++ b/erts/emulator/beam/erl_alloc_util.h @@ -307,6 +307,7 @@ typedef union {char c[ERTS_ALLOC_ALIGN_BYTES]; long l; double d;} Unit_t; typedef struct Carrier_t_ Carrier_t; + typedef struct { UWord bhdr; #if !MBC_ABLK_OFFSET_BITS @@ -373,13 +374,24 @@ typedef struct { typedef UWord FreeBlkFtr_t; /* Footer of a free block */ - +/* This AOFF stuff really belong in erl_ao_firstfit_alloc.h */ +typedef struct AOFF_RBTree_t_ AOFF_RBTree_t; +struct AOFF_RBTree_t_ { + Block_t hdr; + AOFF_RBTree_t *parent; + AOFF_RBTree_t *left; + AOFF_RBTree_t *right; + Uint32 flags; + Uint32 max_sz; /* of all blocks in this sub-tree */ +}; #ifdef ERTS_SMP +void aoff_add_pooled_mbc(Allctr_t*, Carrier_t*); +void aoff_remove_pooled_mbc(Allctr_t*, Carrier_t*); +Carrier_t* aoff_lookup_pooled_mbc(Allctr_t*, Uint size); +void erts_aoff_larger_max_size(AOFF_RBTree_t *node); +#endif -typedef struct ErtsDoubleLink_t_ { - struct ErtsDoubleLink_t_ *next; - struct ErtsDoubleLink_t_ *prev; -}ErtsDoubleLink_t; +#ifdef ERTS_SMP typedef struct { ErtsFakeDDBlock_t homecoming_dd; @@ -391,7 +403,12 @@ typedef struct { UWord abandon_limit; UWord blocks; UWord blocks_size; - ErtsDoubleLink_t abandoned; /* node in pooled_list or traitor_list */ + enum { + ERTS_MBC_IS_HOME, + ERTS_MBC_WAS_POOLED, + ERTS_MBC_WAS_TRAITOR + } state; + AOFF_RBTree_t pooled; /* node in pooled_tree */ } ErtsAlcCPoolData_t; #endif @@ -566,15 +583,14 @@ struct Allctr_t_ { UWord crr_set_flgs; UWord crr_clr_flgs; - /* Carriers */ + /* Carriers *employed* by this allocator */ CarrierList_t mbc_list; CarrierList_t sbc_list; #ifdef ERTS_SMP struct { - /* pooled_list, traitor list and dc_list contain only - carriers _created_ by this allocator */ - ErtsDoubleLink_t pooled_list; - ErtsDoubleLink_t traitor_list; + /* pooled_tree and dc_list contain only + carriers *created* by this allocator */ + AOFF_RBTree_t* pooled_tree; CarrierList_t dc_list; UWord abandon_limit; @@ -686,7 +702,6 @@ void erts_alcu_assert_failed(char* expr, char* file, int line, char *func); int is_sbc_blk(Block_t*); #endif - #endif /* #if defined(GET_ERL_ALLOC_UTIL_IMPL) && !defined(ERL_ALLOC_UTIL_IMPL__) */ diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c index 05ba1f9891..b781db152f 100644 --- a/erts/emulator/beam/erl_ao_firstfit_alloc.c +++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c @@ -92,18 +92,6 @@ #define RBT_ASSERT(x) #endif - -/* Types... */ -typedef struct AOFF_RBTree_t_ AOFF_RBTree_t; - -struct AOFF_RBTree_t_ { - Block_t hdr; - AOFF_RBTree_t *parent; - AOFF_RBTree_t *left; - AOFF_RBTree_t *right; - Uint32 flags; - Uint32 max_sz; /* of all blocks in this sub-tree */ -}; #define AOFF_BLK_SZ(B) MBC_FBLK_SZ(&(B)->hdr) /* BF block nodes keeps list of all with equal size @@ -120,7 +108,7 @@ typedef struct AOFF_Carrier_t_ AOFF_Carrier_t; struct AOFF_Carrier_t_ { Carrier_t crr; - AOFF_RBTree_t rbt_node; /* My node in the carrier tree */ + AOFF_RBTree_t rbt_node; /* My node in the carrier tree */ AOFF_RBTree_t* root; /* Root of my block tree */ }; #define RBT_NODE_TO_MBC(PTR) ErtsContainerStruct((PTR), AOFF_Carrier_t, rbt_node) @@ -136,8 +124,9 @@ struct AOFF_Carrier_t_ { */ #ifdef HARD_DEBUG -# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE) +# define HARD_CHECK_IS_MEMBER(ROOT,NODE) ASSERT(rbt_is_member(ROOT,NODE)) # define HARD_CHECK_TREE(CRR,FLV,ROOT,SZ) check_tree(CRR, FLV, ROOT, SZ) +static int rbt_is_member(AOFF_RBTree_t* root, AOFF_RBTree_t* node); static AOFF_RBTree_t * check_tree(Carrier_t* within_crr, enum AOFF_Flavor flavor, AOFF_RBTree_t* root, Uint); #else # define HARD_CHECK_IS_MEMBER(ROOT,NODE) @@ -179,6 +168,27 @@ static ERTS_INLINE void lower_max_size(AOFF_RBTree_t *node, else ASSERT(new_max == old_max); } +#ifdef ERTS_SMP +/* + * Set possibly new larger 'max_sz' of node and propagate change toward root + */ +void erts_aoff_larger_max_size(AOFF_RBTree_t *node) +{ + AOFF_RBTree_t* x = node; + const Uint new_sz = node->hdr.bhdr; + + ASSERT(!x->left || x->left->max_sz <= x->max_sz); + ASSERT(!x->right || x->right->max_sz <= x->max_sz); + + while (new_sz > x->max_sz) { + x->max_sz = new_sz; + x = x->parent; + if (!x) + break; + } +} +#endif + static ERTS_INLINE SWord cmp_blocks(enum AOFF_Flavor flavor, AOFF_RBTree_t* lhs, AOFF_RBTree_t* rhs) { @@ -220,9 +230,6 @@ static UWord aoff_largest_fblk_in_mbc(Allctr_t*, Carrier_t*); static void rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del); static void rbt_insert(enum AOFF_Flavor flavor, AOFF_RBTree_t** root, AOFF_RBTree_t* blk); static AOFF_RBTree_t* rbt_search(AOFF_RBTree_t* root, Uint size); -#ifdef HARD_DEBUG -static int rbt_assert_is_member(AOFF_RBTree_t* root, AOFF_RBTree_t* node); -#endif static Eterm info_options(Allctr_t *, char *, fmtfn_t *, void *, Uint **, Uint *); static void init_atoms(void); @@ -719,19 +726,20 @@ aoff_link_free_block(Allctr_t *allctr, Block_t *block) rbt_insert(alc->flavor, &blk_crr->root, blk); - /* Update the carrier tree with a potentially new (larger) max_sz - */ + /* + * Update carrier tree with a potentially new (larger) max_sz + */ crr_node = &blk_crr->rbt_node; if (blk_sz > crr_node->hdr.bhdr) { - ASSERT(blk_sz == blk_crr->root->max_sz); - crr_node->hdr.bhdr = blk_sz; - while (blk_sz > crr_node->max_sz) { - crr_node->max_sz = blk_sz; - crr_node = crr_node->parent; - if (!crr_node) break; - } + ASSERT(blk_sz == blk_crr->root->max_sz); + crr_node->hdr.bhdr = blk_sz; + while (blk_sz > crr_node->max_sz) { + crr_node->max_sz = blk_sz; + crr_node = crr_node->parent; + if (!crr_node) break; + } } - HARD_CHECK_TREE(&blk_crr->crr, alc->flavor, blk_crr->root, 0); + HARD_CHECK_TREE(NULL, AOFF_AOFF, alc->mbc_root, 0); } static void @@ -826,6 +834,18 @@ rbt_search(AOFF_RBTree_t* root, Uint size) } } +#ifdef ERTS_SMP +Carrier_t* aoff_lookup_pooled_mbc(Allctr_t* allctr, Uint size) +{ + AOFF_RBTree_t* node; + + if (!allctr->cpool.pooled_tree) + return NULL; + node = rbt_search(allctr->cpool.pooled_tree, size); + return node ? ErtsContainerStruct(node, Carrier_t, cpool.pooled) : NULL; +} +#endif + static Block_t * aoff_get_free_block(Allctr_t *allctr, Uint size, Block_t *cand_blk, Uint cand_size) @@ -920,16 +940,31 @@ static void aoff_add_mbc(Allctr_t *allctr, Carrier_t *carrier) HARD_CHECK_TREE(NULL, 0, *root, 0); } +#ifdef ERTS_SMP +void aoff_add_pooled_mbc(Allctr_t *allctr, Carrier_t *crr) +{ + AOFF_RBTree_t **root = &allctr->cpool.pooled_tree; + + ASSERT(allctr == crr->cpool.orig_allctr); + HARD_CHECK_TREE(NULL, 0, *root, 0); + + /* Link carrier in address order tree + */ + rbt_insert(AOFF_AOFF, root, &crr->cpool.pooled); + + HARD_CHECK_TREE(NULL, 0, *root, 0); +} +#endif + static void aoff_remove_mbc(Allctr_t *allctr, Carrier_t *carrier) { - AOFFAllctr_t *alc = (AOFFAllctr_t *) allctr; - AOFF_Carrier_t *crr = (AOFF_Carrier_t*) carrier; - AOFF_RBTree_t **root = &alc->mbc_root; + AOFF_RBTree_t **root = &((AOFFAllctr_t*)allctr)->mbc_root; + AOFF_Carrier_t *crr = (AOFF_Carrier_t*)carrier; ASSERT(allctr == ERTS_ALC_CARRIER_TO_ALLCTR(carrier)); if (!IS_CRR_IN_TREE(crr,*root)) - return; + return; HARD_CHECK_TREE(NULL, 0, *root, 0); @@ -942,6 +977,26 @@ static void aoff_remove_mbc(Allctr_t *allctr, Carrier_t *carrier) HARD_CHECK_TREE(NULL, 0, *root, 0); } +#ifdef ERTS_SMP +void aoff_remove_pooled_mbc(Allctr_t *allctr, Carrier_t *crr) +{ + ASSERT(allctr == crr->cpool.orig_allctr); + + HARD_CHECK_TREE(NULL, 0, allctr->cpool.pooled_tree, 0); + + rbt_delete(&allctr->cpool.pooled_tree, &crr->cpool.pooled); +#ifdef DEBUG + crr->cpool.pooled.parent = NULL; + crr->cpool.pooled.left = NULL; + crr->cpool.pooled.right = NULL; + crr->cpool.pooled.max_sz = 0; +#endif + HARD_CHECK_TREE(NULL, 0, allctr->cpool.pooled_tree, 0); + +} +#endif + + static UWord aoff_largest_fblk_in_mbc(Allctr_t* allctr, Carrier_t* carrier) { AOFF_Carrier_t *crr = (AOFF_Carrier_t*) carrier; @@ -1085,12 +1140,13 @@ erts_aoffalc_test(UWord op, UWord a1, UWord a2) #ifdef HARD_DEBUG - -static int rbt_assert_is_member(AOFF_RBTree_t* root, AOFF_RBTree_t* node) +static int rbt_is_member(AOFF_RBTree_t* root, AOFF_RBTree_t* node) { while (node != root) { - ASSERT(node->parent); - ASSERT(node->parent->left == node || node->parent->right == node); + if (!node->parent || (node->parent->left != node && + node->parent->right != node)) { + return 0; + } node = node->parent; } return 1; diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.h b/erts/emulator/beam/erl_ao_firstfit_alloc.h index 7349c6ab19..a3fca7d3e3 100644 --- a/erts/emulator/beam/erl_ao_firstfit_alloc.h +++ b/erts/emulator/beam/erl_ao_firstfit_alloc.h @@ -53,7 +53,6 @@ Allctr_t *erts_aoffalc_start(AOFFAllctr_t *, AOFFAllctrInit_t*, AllctrInit_t *); #define GET_ERL_ALLOC_UTIL_IMPL #include "erl_alloc_util.h" - struct AOFFAllctr_t_ { Allctr_t allctr; /* Has to be first! */ diff --git a/erts/emulator/internal_doc/CarrierMigration.md b/erts/emulator/internal_doc/CarrierMigration.md index 2a9594db25..3a796d11b7 100644 --- a/erts/emulator/internal_doc/CarrierMigration.md +++ b/erts/emulator/internal_doc/CarrierMigration.md @@ -3,17 +3,17 @@ Carrier Migration The ERTS memory allocators manage memory blocks in two types of raw memory chunks. We call these chunks of raw memory -*carriers*. Singleblock carriers which only contain one large block, -and multiblock carriers which contain multiple blocks. A carrier is +*carriers*. Single-block carriers which only contain one large block, +and multi-block carriers which contain multiple blocks. A carrier is typically created using `mmap()` on unix systems. However, how a carrier is created is of minor importance. An allocator instance -typically manages a mixture of single- and multiblock carriers. +typically manages a mixture of single- and multi-block carriers. Problem ------- When a carrier is empty, i.e. contains only one large free block, it -is deallocated. Since multiblock carriers can contain both allocated +is deallocated. Since multi-block carriers can contain both allocated blocks and free blocks at the same time, an allocator instance might be stuck with a large amount of poorly utilized carriers if the memory load decreases. After a peak in memory usage it is expected that not @@ -23,9 +23,9 @@ can usually be reused if the memory load increases again. However, since each scheduler thread manages its own set of allocator instances, and memory load is not necessarily correlated to CPU load, we might get into a situation where there are lots of poorly utilized -multiblock carriers on some allocator instances while we need to -allocate new multiblock carriers on other allocator instances. In -scenarios like this, the demand for multiblock carriers in the system +multi-block carriers on some allocator instances while we need to +allocate new multi-block carriers on other allocator instances. In +scenarios like this, the demand for multi-block carriers in the system might increase at the same time as the actual memory demand in the system has decreased which is both unwanted and quite unexpected for the end user. @@ -34,7 +34,7 @@ Solution -------- In order to prevent scenarios like this we've implemented support for -migration of multiblock carriers between allocator instances of the +migration of multi-block carriers between allocator instances of the same type. ### Management of Free Blocks ### @@ -44,7 +44,7 @@ and add it to another we need to be able to move references to the free blocks of the carrier between the allocator instances. The allocator instance specific data structure referring to the free blocks it manages often refers to the same carrier from multiple -places. For example, when the address order bestfit strategy is used +places. For example, when the address order best-fit strategy is used this data structure is a binary search tree spanning all carriers that the allocator instance manages. Free blocks in one specific carrier can be referred to from potentially every other carrier that is @@ -135,7 +135,7 @@ carriers between scheduler specific allocator instances of the same allocator type. Each allocator instance keeps track of the current utilization of its -multiblock carriers. When the total utilization falls below the "abandon +multi-block carriers. When the total utilization falls below the "abandon carrier utilization limit" it starts to inspect the utilization of the current carrier when deallocations are made. If also the utilization of the carrier falls below the "abandon carrier utilization limit" it @@ -144,31 +144,45 @@ and inserts the carrier into the pool. Since the carrier has been unlinked from the data structure of available free blocks, no more allocations will be made in the -carrier. The allocator instance putting the carrier into the pool, -however, still has the responsibility of performing deallocations in -it while it remains in the pool. The allocator instance with this -deallocation responsibility is here called the **employer**. - -Each carrier has a flag field containing information about the -employing allocator instance, a flag indicating if the carrier is in -the pool or not, and a flag indicating if it is busy or not. When the -carrier is in the pool, the employing allocator instance needs to mark it -as busy while operating on it. If another thread inspects it in order -to try to fetch it from the pool, it will skip it if it is busy. When -fetching the carrier from the pool, employment will change and further +carrier. + +The allocator instance that created a carrier is called its **owner**. +Ownership never changes. + +The allocator instance that has the responsibility to perform deallocations in a +carrier is called its **employer**. The employer may also perform allocations if +the carrier is not in the pool. Employment may change when a carrier is fetched from +or inserted into the pool. + +Deallocations in a carrier, while it remains in the pool, is always performed +the owner. That is, all pooled carriers are employed by their owners. + +Each carrier has an atomic word containing a pointer to the employing allocator +instance and three bit flags; IN_POOL, BUSY and HOMECOMING. + +When fetching a carrier from the pool, employment may change and further deallocations in the carrier will be redirected to the new employer using the delayed dealloc functionality. -If a carrier in the pool becomes empty, it will be withdrawn from the -pool. All carriers that become empty are also always passed to its -**owning** allocator instance for deallocation using the delayed -dealloc functionality. Since carriers this way always will be -deallocated by the owner that allocated the carrier, the +When a foreign allocator instance abandons a carrier back into the pool, it will +also pass it back to its **owner** using the delayed dealloc queue. When doing +this it will set the HOMECOMING bit flag to mark it as "enqueued". The owner +will later clear the HOMECOMING bit when the carrier is dequeued. This mechanism +prevents a carrier from being enqueued again before it has been dequeued. + +When a carrier becomes empty, it will be deallocated. Carrier deallocation is +always done by the owner that allocated the carrier. By doing this, the underlying functionality of allocating and deallocating carriers can remain simple and doesn't have to bother about multiple threads. In a NUMA system we will also not mix carriers originating from multiple NUMA nodes. +If a carrier in the pool becomes empty, it will be withdrawn from the +pool and be deallocated by the owner which already employs it. + +If a carrier employed by a foreign allocator becomes empty, it will be passed +back to the owner for deallocation using the delayed dealloc functionality. + In short: * The allocator instance that created a carrier **owns** it. @@ -177,34 +191,31 @@ In short: * The allocator instance that uses a carrier **employs** it. * An **employer** can abandon a carrier into the pool. * Pooled carriers are not allocated from. -* Deallocation in a pooled carrier is still performed by its **employer**. -* **Employment** can only change when a carrier is fetched from the pool. +* Pooled carriers are always **employed** by their **owner**. +* **Employment** can only change from **owner** to a foreign allocator + when a carrier is fetched from the pool. + ### Searching the pool ### +When an allocator instance needs more carrier space, it inspects the pool. If no +carrier could be fetched from the pool, it will allocate a new +carrier. Regardless of where the allocator instance gets the carrier from, it +just links in the carrier into its data structure of free blocks. + To harbor real time characteristics, searching the pool is limited. We only inspect a limited number of carriers. If none of those carriers had a free block large enough to satisfy the allocation -request, the search will fail. A carrier in the pool can also be busy +request, the search will fail. A carrier in the pool can also be BUSY if another thread is currently doing block deallocation work on the -carrier. A busy carrier will also be skipped by the search as it can +carrier. A BUSY carrier will also be skipped by the search as it can not satisfy the request. The pool is lock-free and we do not want to block, waiting for the other thread to finish. -#### Before OTP 17.4 #### +### The bad cluster problem ### -When an allocator instance needs more carrier space, it always begins -by inspecting its own carriers that are waiting for thread progress -before they can be deallocated. If no such carrier could be found, it -then inspects the pool. If no carrier could be fetched from the pool, -it will allocate a new carrier. Regardless of where the allocator -instance gets the carrier from it the just links in the carrier into -its data structure of free blocks. - -#### After OTP 17.4 #### - -The old search algorithm had a problem as the search always started at -the same position in the pool, the sentinel. This could lead to +Before OTP-17.4 the search algorithm had a problem as the search always started +at the same position in the pool, the sentinel. This could lead to contention from concurrent searching processes. But even worse, it could lead to a "bad" state when searches fail with a high rate leading to new carriers instead being allocated. These new carriers @@ -236,26 +247,27 @@ The result is that we prefer carriers created by the thread itself, which is good for NUMA performance. And we get more entry points when searching the pool, which will ease contention and clustering. +### Our own pooled tree ### + To do the first search among own carriers, every allocator instance -has two new lists: `pooled_list` and `traitor_list`. These lists are only -accessed by the allocator itself and they only contain the allocator's -own carriers. When an owned carrier is abandoned and put in the -pool, it is also linked into `pooled_list`. When we search our -`pooled_list` and find a carrier that is no longer in the pool, we -move that carrier from `pooled_list` to `traitor_list` as it is now -employed by another allocator. If searching `pooled_list` fails, we -also do a limited search of `traitor_list`. When finding an abandoned -carrier in `traitor_list` it is either employed or moved back to -`pooled_list` if it could not satisfy the allocation request. - -When searching `pooled_list` and `traitor_list` we always start at the -point where the last search ended. This to avoid clustering -problems and increase the probability to find a "good" carrier. As -`pooled_list` and `traitor_list` are only accessed by the owning -allocator instance, they need no thread synchronization at all. +has a `pooled_tree` of carriers. This tree is only accessed by the allocator +itself and can only contain its own carriers. When a carrier is +abandoned and put in the pool, it is also inserted into `pooled_tree`. This is +either done direct, if the carrier was already employed by its owner, or by +first passing it back to the owner via the delayed dealloc queue. + +When we search our `pooled_tree` and find a carrier that is no longer in the +pool, we remove that carrier from `pooled_tree` and mark it as TRAITOR, as it is +now employed by a foreign allocator. We will not find any carriers in +`pooled_tree` that are marked as BUSY by other threads. + +If no carrier in `pooled_tree` had a large enough free block, we search it again +to find any carrier that may act as an entry point into the shared list of all +pooled carriers. This in order to, if possible, avoid starting at the sentinel +and thereby ease the "bad clustering" problem. Furthermore, the search for own carriers that are scheduled -for deallocation is now done as the last search option. The idea is +for deallocation is done as the last search option. The idea is that it is better to reuse a poorly utilized carrier than to resurrect an empty carrier that was just about to be released back to the OS. @@ -271,14 +283,14 @@ load did not. When using the `aoffcaobf` or `aoff` strategies compared to `gf` or `bf`, we loose some performance since we get more modifications in the data structure of free blocks. This performance penalty is however -reduced using the `aoffcbf` strategy. A tradeoff between memory +reduced using the `aoffcbf` strategy. A trade off between memory consumption and performance is however inevitable, and it is up to the user to decide what is most important. Further work ------------ -It would be quite easy to extend this to allow migration of multiblock +It would be quite easy to extend this to allow migration of multi-block carriers between all allocator types. More or less the only obstacle is maintenance of the statistics information. |