diff options
Diffstat (limited to 'erts/emulator/beam/erl_alloc_util.c')
-rw-r--r-- | erts/emulator/beam/erl_alloc_util.c | 2579 |
1 files changed, 2020 insertions, 559 deletions
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c index ae4ff4f453..25ac3bc5af 100644 --- a/erts/emulator/beam/erl_alloc_util.c +++ b/erts/emulator/beam/erl_alloc_util.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2002-2017. All Rights Reserved. + * Copyright Ericsson AB 2002-2018. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,12 +42,15 @@ #include "global.h" #include "big.h" +#include "erl_mmap.h" #include "erl_mtrace.h" #define GET_ERL_ALLOC_UTIL_IMPL #include "erl_alloc_util.h" #include "erl_mseg.h" #include "erl_threads.h" #include "erl_thr_progress.h" +#include "erl_bif_unique.h" +#include "erl_nif.h" #ifdef ERTS_ENABLE_LOCK_COUNT #include "erl_lock_count.h" @@ -88,15 +91,18 @@ static int initialized = 0; #define SYS_ALLOC_CARRIER_FLOOR(X) ((X) & SYS_ALLOC_CARRIER_MASK) #define SYS_ALLOC_CARRIER_CEILING(X) \ SYS_ALLOC_CARRIER_FLOOR((X) + INV_SYS_ALLOC_CARRIER_MASK) +#define SYS_PAGE_SIZE (sys_page_size) +#define SYS_PAGE_SZ_MASK ((UWord)(SYS_PAGE_SIZE - 1)) #if 0 /* Can be useful for debugging */ #define MBC_REALLOC_ALWAYS_MOVES #endif - /* alloc_util global parameters */ static Uint sys_alloc_carrier_size; +static Uint sys_page_size; + #if HAVE_ERTS_MSEG static Uint max_mseg_carriers; #endif @@ -111,34 +117,67 @@ static int allow_sys_alloc_carriers; #define DEC_CC(CC) ((CC)--) -/* Multi block carrier (MBC) memory layout in R16: +/* Multi block carrier (MBC) memory layout in OTP 22: Empty MBC: -[Carrier_t|pad|Block_t L0T|fhdr| free... ] +[Carrier_t|pad|Block_t L0T0|fhdr| free... ] MBC after allocating first block: -[Carrier_t|pad|Block_t 000| udata |pad|Block_t L0T|fhdr| free... ] +[Carrier_t|pad|Block_t 0000| udata |pad|Block_t L0T0|fhdr| free... ] MBC after allocating second block: -[Carrier_t|pad|Block_t 000| udata |pad|Block_t 000| udata |pad|Block_t L0T|fhdr| free... ] +[Carrier_t|pad|Block_t 0000| udata |pad|Block_t 0000| udata |pad|Block_t L0T0|fhdr| free... ] MBC after deallocating first block: -[Carrier_t|pad|Block_t 00T|fhdr| free |FreeBlkFtr_t|Block_t 0P0| udata |pad|Block_t L0T|fhdr| free... ] +[Carrier_t|pad|Block_t 00T0|fhdr| free |FreeBlkFtr_t|Block_t 0P00| udata |pad|Block_t L0T0|fhdr| free... ] +MBC after allocating first block, with allocation tagging enabled: +[Carrier_t|pad|Block_t 000A| udata |atag|pad|Block_t L0T0|fhdr| free... ] udata = Allocated user data + atag = A tag with basic metadata about this allocation pad = Padding to ensure correct alignment for user data fhdr = Allocator specific header to keep track of free block free = Unused free memory T = This block is free (THIS_FREE_BLK_HDR_FLG) P = Previous block is free (PREV_FREE_BLK_HDR_FLG) L = Last block in carrier (LAST_BLK_HDR_FLG) + A = Block has an allocation tag footer, only valid for allocated blocks + (ATAG_BLK_HDR_FLG) */ /* Single block carrier (SBC): -[Carrier_t|pad|Block_t 111| udata... ] +[Carrier_t|pad|Block_t 1110| udata... ] +[Carrier_t|pad|Block_t 111A| udata | atag] */ +/* Allocation tags ... + * + * These are added to the footer of every block when enabled. Currently they + * consist of the allocation type and an atom identifying the allocating + * driver/nif (or 'system' if that can't be determined), but the format is not + * supposed to be set in stone. + * + * The packing scheme requires that the atom values are small enough to fit + * into a word with ERTS_ALC_N_BITS to spare. Users must check for overflow + * before MAKE_ATAG(). */ + +typedef UWord alcu_atag_t; + +#define MAKE_ATAG(IdAtom, TypeNum) \ + (ASSERT((TypeNum) >= ERTS_ALC_N_MIN && (TypeNum) <= ERTS_ALC_N_MAX), \ + ASSERT(atom_val(IdAtom) <= MAX_ATAG_ATOM_ID), \ + (atom_val(IdAtom) << ERTS_ALC_N_BITS) | (TypeNum)) + +#define ATAG_ID(AT) (make_atom((AT) >> ERTS_ALC_N_BITS)) +#define ATAG_TYPE(AT) ((AT) & ERTS_ALC_N_MASK) + +#define MAX_ATAG_ATOM_ID (ERTS_UWORD_MAX >> ERTS_ALC_N_BITS) + +#define DBG_IS_VALID_ATAG(AT) \ + (ATAG_TYPE(AT) >= ERTS_ALC_N_MIN && \ + ATAG_TYPE(AT) <= ERTS_ALC_N_MAX && \ + ATAG_ID(AT) <= MAX_ATAG_ATOM_ID) /* Blocks ... */ @@ -153,10 +192,22 @@ MBC after deallocating first block: #endif #define FBLK_FTR_SZ (sizeof(FreeBlkFtr_t)) +#define BLK_HAS_ATAG(B) \ + (!!((B)->bhdr & ATAG_BLK_HDR_FLG)) + +#define GET_BLK_ATAG(B) \ + (ASSERT(BLK_HAS_ATAG(B)), \ + ((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1]) +#define SET_BLK_ATAG(B, T) \ + ((B)->bhdr |= ATAG_BLK_HDR_FLG, \ + ((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1] = (T)) + +#define BLK_ATAG_SZ(AP) ((AP)->atags ? sizeof(alcu_atag_t) : 0) + #define UMEMSZ2BLKSZ(AP, SZ) \ - (ABLK_HDR_SZ + (SZ) <= (AP)->min_block_size \ + (ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ) <= (AP)->min_block_size \ ? (AP)->min_block_size \ - : UNIT_CEILING(ABLK_HDR_SZ + (SZ))) + : UNIT_CEILING(ABLK_HDR_SZ + BLK_ATAG_SZ(AP) + (SZ))) #define UMEM2BLK(P) ((Block_t *) (((char *) (P)) - ABLK_HDR_SZ)) #define BLK2UMEM(P) ((void *) (((char *) (P)) + ABLK_HDR_SZ)) @@ -167,13 +218,13 @@ MBC after deallocating first block: (((FreeBlkFtr_t *) (((char *) (B)) + (SZ)))[-1] = (SZ)) #define SET_MBC_ABLK_SZ(B, SZ) \ - (ASSERT(((SZ) & FLG_MASK) == 0), \ + (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \ (B)->bhdr = (((B)->bhdr) & ~MBC_ABLK_SZ_MASK) | (SZ)) #define SET_MBC_FBLK_SZ(B, SZ) \ - (ASSERT(((SZ) & FLG_MASK) == 0), \ + (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \ (B)->bhdr = (((B)->bhdr) & ~MBC_FBLK_SZ_MASK) | (SZ)) #define SET_SBC_BLK_SZ(B, SZ) \ - (ASSERT(((SZ) & FLG_MASK) == 0), \ + (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \ (B)->bhdr = (((B)->bhdr) & ~SBC_BLK_SZ_MASK) | (SZ)) #define SET_PREV_BLK_FREE(AP,B) \ (ASSERT(!IS_MBC_FIRST_BLK(AP,B)), \ @@ -199,12 +250,12 @@ MBC after deallocating first block: # define SET_MBC_ABLK_HDR(B, Sz, F, C) \ (ASSERT(((Sz) & ~MBC_ABLK_SZ_MASK) == 0), \ - ASSERT(!((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \ + ASSERT(!((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \ (B)->bhdr = ((Sz) | (F) | (BLK_CARRIER_OFFSET(B,C) << MBC_ABLK_OFFSET_SHIFT))) # define SET_MBC_FBLK_HDR(B, Sz, F, C) \ (ASSERT(((Sz) & ~MBC_FBLK_SZ_MASK) == 0), \ - ASSERT(((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \ + ASSERT(((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \ (B)->bhdr = ((Sz) | (F)), \ (B)->u.carrier = (C)) @@ -221,8 +272,8 @@ MBC after deallocating first block: # define SET_BLK_FREE(B) \ (ASSERT(!IS_PREV_BLK_FREE(B)), \ (B)->u.carrier = ABLK_TO_MBC(B), \ - (B)->bhdr |= THIS_FREE_BLK_HDR_FLG, \ - (B)->bhdr &= (MBC_ABLK_SZ_MASK|FLG_MASK)) + (B)->bhdr &= (MBC_ABLK_SZ_MASK|LAST_BLK_HDR_FLG), \ + (B)->bhdr |= THIS_FREE_BLK_HDR_FLG) # define SET_BLK_ALLOCED(B) \ (ASSERT(((B)->bhdr & (MBC_ABLK_OFFSET_MASK|THIS_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \ @@ -234,15 +285,16 @@ MBC after deallocating first block: # define MBC_SZ_MAX_LIMIT ((UWord)~0) # define SET_MBC_ABLK_HDR(B, Sz, F, C) \ - (ASSERT(((Sz) & FLG_MASK) == 0), \ - ASSERT(!((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \ - ASSERT((UWord)(F) < SBC_BLK_HDR_FLG), \ + (ASSERT(((Sz) & BLK_FLG_MASK) == 0), \ + ASSERT(((F) & ~BLK_FLG_MASK) == 0), \ + ASSERT(!((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \ (B)->bhdr = ((Sz) | (F)), \ (B)->carrier = (C)) # define SET_MBC_FBLK_HDR(B, Sz, F, C) \ - (ASSERT(((Sz) & FLG_MASK) == 0), \ - ASSERT(((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \ + (ASSERT(((Sz) & BLK_FLG_MASK) == 0), \ + ASSERT(((F) & ~BLK_FLG_MASK) == 0), \ + ASSERT(((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \ (B)->bhdr = ((Sz) | (F)), \ (B)->carrier = (C)) @@ -261,7 +313,7 @@ MBC after deallocating first block: #endif /* !MBC_ABLK_OFFSET_BITS */ #define SET_SBC_BLK_HDR(B, Sz) \ - (ASSERT(((Sz) & FLG_MASK) == 0), (B)->bhdr = ((Sz) | (SBC_BLK_HDR_FLG))) + (ASSERT(((Sz) & BLK_FLG_MASK) == 0), (B)->bhdr = ((Sz) | (SBC_BLK_HDR_FLG))) #define BLK_UMEM_SZ(B) \ @@ -284,7 +336,7 @@ MBC after deallocating first block: #define GET_PREV_FREE_BLK_HDR_FLG(B) \ ((B)->bhdr & PREV_FREE_BLK_HDR_FLG) #define GET_BLK_HDR_FLGS(B) \ - ((B)->bhdr & FLG_MASK) + ((B)->bhdr & BLK_FLG_MASK) #define NXT_BLK(B) \ (ASSERT(IS_MBC_BLK(B)), \ @@ -305,9 +357,6 @@ MBC after deallocating first block: # define ERTS_ALC_CPOOL_DEBUG #endif -#ifndef ERTS_SMP -# undef ERTS_ALC_CPOOL_DEBUG -#endif #ifdef ERTS_ALC_CPOOL_DEBUG # define ERTS_ALC_CPOOL_ASSERT(A) \ @@ -322,13 +371,8 @@ MBC after deallocating first block: # define ERTS_ALC_CPOOL_ASSERT(A) ((void) 1) #endif -#ifdef ERTS_SMP #define ERTS_ALC_IS_CPOOL_ENABLED(A) ((A)->cpool.util_limit) -#else -#define ERTS_ALC_IS_CPOOL_ENABLED(A) (0) -#endif -#ifdef ERTS_SMP #define ERTS_ALC_CPOOL_MAX_DISABLE_ABANDON 1000 #define ERTS_ALC_CPOOL_ALLOC_OP_INC 8 @@ -367,11 +411,6 @@ do { \ } \ } while (0) -#else -#define ERTS_ALC_CPOOL_ALLOC_OP(A) -#define ERTS_ALC_CPOOL_REALLOC_OP(A) -#define ERTS_ALC_CPOOL_FREE_OP(A) -#endif #define ERTS_CRR_ALCTR_FLG_IN_POOL (((erts_aint_t) 1) << 0) #define ERTS_CRR_ALCTR_FLG_BUSY (((erts_aint_t) 1) << 1) @@ -380,17 +419,10 @@ do { \ ERTS_CRR_ALCTR_FLG_BUSY | \ ERTS_CRR_ALCTR_FLG_HOMECOMING) -#ifdef ERTS_SMP #define SBC_HEADER_SIZE \ (UNIT_CEILING(offsetof(Carrier_t, cpool) \ + ABLK_HDR_SZ) \ - ABLK_HDR_SZ) -#else -#define SBC_HEADER_SIZE \ - (UNIT_CEILING(sizeof(Carrier_t) \ - + ABLK_HDR_SZ) \ - - ABLK_HDR_SZ) -#endif #define MBC_HEADER_SIZE(AP) ((AP)->mbc_header_size) @@ -403,8 +435,8 @@ do { \ #define SCH_SBC SBC_CARRIER_HDR_FLAG #define SET_CARRIER_HDR(C, Sz, F, AP) \ - (ASSERT(((Sz) & FLG_MASK) == 0), (C)->chdr = ((Sz) | (F)), \ - erts_smp_atomic_init_nob(&(C)->allctr, (erts_aint_t) (AP))) + (ASSERT(((Sz) & CRR_FLG_MASK) == 0), (C)->chdr = ((Sz) | (F)), \ + erts_atomic_init_nob(&(C)->allctr, (erts_aint_t) (AP))) #define BLK_TO_SBC(B) \ ((Carrier_t *) (((char *) (B)) - SBC_HEADER_SIZE)) @@ -428,8 +460,8 @@ do { \ (!IS_SB_CARRIER((C))) #define SET_CARRIER_SZ(C, SZ) \ - (ASSERT(((SZ) & FLG_MASK) == 0), \ - ((C)->chdr = ((C)->chdr & FLG_MASK) | (SZ))) + (ASSERT(((SZ) & CRR_FLG_MASK) == 0), \ + ((C)->chdr = ((C)->chdr & CRR_FLG_MASK) | (SZ))) #define CFLG_SBC (1 << 0) #define CFLG_MBC (1 << 1) @@ -559,10 +591,12 @@ do { \ STAT_MSEG_MBC_ALLOC((AP), csz__); \ else \ STAT_SYS_ALLOC_MBC_ALLOC((AP), csz__); \ - (AP)->mbcs.blocks.curr.no += (CRR)->cpool.blocks; \ + set_new_allctr_abandon_limit(AP); \ + (AP)->mbcs.blocks.curr.no += (CRR)->cpool.blocks[(AP)->alloc_no]; \ if ((AP)->mbcs.blocks.max.no < (AP)->mbcs.blocks.curr.no) \ (AP)->mbcs.blocks.max.no = (AP)->mbcs.blocks.curr.no; \ - (AP)->mbcs.blocks.curr.size += (CRR)->cpool.blocks_size; \ + (AP)->mbcs.blocks.curr.size += \ + (CRR)->cpool.blocks_size[(AP)->alloc_no]; \ if ((AP)->mbcs.blocks.max.size < (AP)->mbcs.blocks.curr.size) \ (AP)->mbcs.blocks.max.size = (AP)->mbcs.blocks.curr.size; \ } while (0) @@ -585,30 +619,34 @@ do { \ DEBUG_CHECK_CARRIER_NO_SZ((AP)); \ } while (0) -#define STAT_MBC_ABANDON(AP, CRR) \ -do { \ - UWord csz__ = CARRIER_SZ((CRR)); \ - if (IS_MSEG_CARRIER((CRR))) \ - STAT_MSEG_MBC_FREE((AP), csz__); \ - else \ - STAT_SYS_ALLOC_MBC_FREE((AP), csz__); \ - ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.no \ - >= (CRR)->cpool.blocks); \ - (AP)->mbcs.blocks.curr.no -= (CRR)->cpool.blocks; \ - ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.size \ - >= (CRR)->cpool.blocks_size); \ - (AP)->mbcs.blocks.curr.size -= (CRR)->cpool.blocks_size; \ +#define STAT_MBC_FREE(AP, CRR) \ +do { \ + UWord csz__ = CARRIER_SZ((CRR)); \ + if (IS_MSEG_CARRIER((CRR))) { \ + STAT_MSEG_MBC_FREE((AP), csz__); \ + } else { \ + STAT_SYS_ALLOC_MBC_FREE((AP), csz__); \ + } \ + set_new_allctr_abandon_limit(AP); \ } while (0) -#ifdef ERTS_SMP -#define STAT_MBC_BLK_ALLOC_CRR(CRR, BSZ) \ +#define STAT_MBC_ABANDON(AP, CRR) \ +do { \ + STAT_MBC_FREE(AP, CRR); \ + ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.no \ + >= (CRR)->cpool.blocks[(AP)->alloc_no]); \ + (AP)->mbcs.blocks.curr.no -= (CRR)->cpool.blocks[(AP)->alloc_no]; \ + ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.size \ + >= (CRR)->cpool.blocks_size[(AP)->alloc_no]); \ + (AP)->mbcs.blocks.curr.size -= (CRR)->cpool.blocks_size[(AP)->alloc_no]; \ +} while (0) + +#define STAT_MBC_BLK_ALLOC_CRR(AP, CRR, BSZ) \ do { \ - (CRR)->cpool.blocks++; \ - (CRR)->cpool.blocks_size += (BSZ); \ + (CRR)->cpool.blocks[(AP)->alloc_no]++; \ + (CRR)->cpool.blocks_size[(AP)->alloc_no] += (BSZ); \ + (CRR)->cpool.total_blocks_size += (BSZ); \ } while (0) -#else -#define STAT_MBC_BLK_ALLOC_CRR(CRR, BSZ) ((void) (CRR)) /* Get rid of warning */ -#endif #define STAT_MBC_BLK_ALLOC(AP, CRR, BSZ, FLGS) \ do { \ @@ -619,54 +657,67 @@ do { \ cstats__->blocks.curr.size += (BSZ); \ if (cstats__->blocks.max.size < cstats__->blocks.curr.size) \ cstats__->blocks.max.size = cstats__->blocks.curr.size; \ - STAT_MBC_BLK_ALLOC_CRR((CRR), (BSZ)); \ + STAT_MBC_BLK_ALLOC_CRR((AP), (CRR), (BSZ)); \ } while (0) static ERTS_INLINE int stat_cpool_mbc_blk_free(Allctr_t *allctr, + ErtsAlcType_t type, Carrier_t *crr, Carrier_t **busy_pcrr_pp, UWord blksz) { -#ifdef ERTS_SMP + Allctr_t *orig_allctr; + int alloc_no; - ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks > 0); - crr->cpool.blocks--; - ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size >= blksz); - crr->cpool.blocks_size -= blksz; + alloc_no = ERTS_ALC_T2A(type); - if (!busy_pcrr_pp || !*busy_pcrr_pp) - return 0; + ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks[alloc_no] > 0); + crr->cpool.blocks[alloc_no]--; + ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size[alloc_no] >= blksz); + crr->cpool.blocks_size[alloc_no] -= blksz; + ERTS_ALC_CPOOL_ASSERT(crr->cpool.total_blocks_size >= blksz); + crr->cpool.total_blocks_size -= blksz; + + if (allctr->alloc_no == alloc_no && (!busy_pcrr_pp || !*busy_pcrr_pp)) { + /* This is a local block, so we should not update the pool + * statistics. */ + return 0; + } - ERTS_ALC_CPOOL_ASSERT(crr == *busy_pcrr_pp); + /* This is either a foreign block that's been fetched from the pool, or any + * block that's in the pool. The carrier's owner keeps the statistics for + * both pooled and foreign blocks. */ + + orig_allctr = crr->cpool.orig_allctr; + + ERTS_ALC_CPOOL_ASSERT(alloc_no != allctr->alloc_no || + (crr == *busy_pcrr_pp && allctr == orig_allctr)); #ifdef ERTS_ALC_CPOOL_DEBUG ERTS_ALC_CPOOL_ASSERT( - erts_atomic_dec_read_nob(&allctr->cpool.stat.no_blocks) >= 0); + erts_atomic_dec_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]) >= 0); ERTS_ALC_CPOOL_ASSERT( - erts_atomic_add_read_nob(&allctr->cpool.stat.blocks_size, + erts_atomic_add_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no], -((erts_aint_t) blksz)) >= 0); #else - erts_atomic_dec_nob(&allctr->cpool.stat.no_blocks); - erts_atomic_add_nob(&allctr->cpool.stat.blocks_size, + erts_atomic_dec_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]); + erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no], -((erts_aint_t) blksz)); #endif return 1; -#else - return 0; -#endif } -#define STAT_MBC_BLK_FREE(AP, CRR, BPCRRPP, BSZ, FLGS) \ -do { \ - if (!stat_cpool_mbc_blk_free((AP), (CRR), (BPCRRPP), (BSZ))) { \ - CarriersStats_t *cstats__ = &(AP)->mbcs; \ - ASSERT(cstats__->blocks.curr.no > 0); \ - cstats__->blocks.curr.no--; \ - ASSERT(cstats__->blocks.curr.size >= (BSZ)); \ - cstats__->blocks.curr.size -= (BSZ); \ - } \ +#define STAT_MBC_BLK_FREE(AP, TYPE, CRR, BPCRRPP, BSZ, FLGS) \ +do { \ + if (!stat_cpool_mbc_blk_free((AP), (TYPE), (CRR), (BPCRRPP), (BSZ))) { \ + CarriersStats_t *cstats__ = &(AP)->mbcs; \ + ASSERT(cstats__->blocks.curr.no > 0); \ + cstats__->blocks.curr.no--; \ + ASSERT(cstats__->blocks.curr.size >= (BSZ)); \ + cstats__->blocks.curr.size -= (BSZ); \ + } \ } while (0) /* Debug stuff... */ @@ -691,12 +742,7 @@ do { \ #endif #ifdef DEBUG -#ifdef USE_THREADS -# ifdef ERTS_SMP # define IS_ACTUALLY_BLOCKING (erts_thr_progress_is_blocking()) -# else -# define IS_ACTUALLY_BLOCKING 0 -# endif #define ERTS_ALCU_DBG_CHK_THR_ACCESS(A) \ do { \ if (!(A)->thread_safe && !IS_ACTUALLY_BLOCKING) { \ @@ -705,7 +751,7 @@ do { \ (A)->debug.saved_tid = 1; \ } \ else { \ - ERTS_SMP_LC_ASSERT( \ + ERTS_LC_ASSERT( \ ethr_equal_tids((A)->debug.tid, erts_thr_self())); \ } \ } \ @@ -713,16 +759,69 @@ do { \ #else #define ERTS_ALCU_DBG_CHK_THR_ACCESS(A) #endif -#else -#define ERTS_ALCU_DBG_CHK_THR_ACCESS(A) -#endif static void make_name_atoms(Allctr_t *allctr); static Block_t *create_carrier(Allctr_t *, Uint, UWord); static void destroy_carrier(Allctr_t *, Block_t *, Carrier_t **); -static void mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp); -static void dealloc_block(Allctr_t *, void *, ErtsAlcFixList_t *, int); +static void mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp); +static void dealloc_block(Allctr_t *, ErtsAlcType_t, Uint32, void *, ErtsAlcFixList_t *); + +static alcu_atag_t determine_alloc_tag(Allctr_t *allocator, ErtsAlcType_t type) +{ + ErtsSchedulerData *esdp; + Eterm id; + + ERTS_CT_ASSERT(_unchecked_atom_val(am_system) <= MAX_ATAG_ATOM_ID); + ASSERT(allocator->atags); + + esdp = erts_get_scheduler_data(); + id = am_system; + + if (esdp) { + if (esdp->current_nif) { + Module *mod = erts_nif_get_module((esdp->current_nif)->mod_nif); + + /* Mod can be NULL if a resource destructor allocates memory after + * the module has been unloaded. */ + if (mod) { + id = make_atom(mod->module); + } + } else if (esdp->current_port) { + Port *p = esdp->current_port; + id = (p->drv_ptr)->name_atom; + } + + /* We fall back to 'system' if we can't pack the driver/NIF name into + * the tag. This may be a bit misleading but we've made no promises + * that the information is complete. + * + * This can only happen on 32-bit emulators when a new driver/NIF has + * been loaded *after* 16 million atoms have been used, and supporting + * that fringe case is not worth an extra word. 64-bit emulators are + * unaffected since the atom cache limits atom indexes to 32 bits. */ + if(MAX_ATOM_TABLE_SIZE > MAX_ATAG_ATOM_ID) { + if (atom_val(id) > MAX_ATAG_ATOM_ID) { + id = am_system; + } + } + } + + return MAKE_ATAG(id, ERTS_ALC_T2N(type)); +} + +static void set_alloc_tag(Allctr_t *allocator, void *p, alcu_atag_t tag) +{ + Block_t *block; + + ASSERT(DBG_IS_VALID_ATAG(tag)); + ASSERT(allocator->atags && p); + (void)allocator; + + block = UMEM2BLK(p); + + SET_BLK_ATAG(block, tag); +} /* internal data... */ @@ -778,6 +877,8 @@ static ERTS_INLINE void clr_bit(UWord* map, Uint ix) &= ~((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#ifdef DEBUG + static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) { ASSERT(ix / ERTS_VSPACE_WORD_BITS < VSPACE_MAP_SZ); @@ -785,6 +886,8 @@ static ERTS_INLINE int is_bit_set(UWord* map, Uint ix) & ((UWord)1 << (ix % ERTS_VSPACE_WORD_BITS)); } +#endif + UWord erts_literal_vspace_map[VSPACE_MAP_SZ]; static void set_literal_range(void* start, Uint size) @@ -864,7 +967,7 @@ erts_alcu_literal_32_mseg_alloc(Allctr_t *allctr, Uint *size_p, Uint flags) Uint sz = ERTS_SUPERALIGNED_CEILING(*size_p); ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); res = erts_alcu_mseg_alloc(allctr, &sz, flags); if (res) { @@ -882,7 +985,7 @@ erts_alcu_literal_32_mseg_realloc(Allctr_t *allctr, void *seg, Uint new_sz = ERTS_SUPERALIGNED_CEILING(*new_size_p); ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); if (seg && old_size) clear_literal_range(seg, old_size); @@ -900,7 +1003,7 @@ erts_alcu_literal_32_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, { ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); erts_alcu_mseg_dealloc(allctr, seg, size, flags); @@ -910,7 +1013,7 @@ erts_alcu_literal_32_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, #elif defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION) /* For allocators that have their own mmapper (super carrier), - * like literal_alloc and exec_alloc on amd64 + * like literal_alloc. */ void* erts_alcu_mmapper_mseg_alloc(Allctr_t *allctr, Uint *size_p, Uint flags) @@ -952,10 +1055,10 @@ erts_alcu_mmapper_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, } #endif /* ARCH_64 && ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION */ -#if defined(ERTS_ALC_A_EXEC) && !defined(ERTS_HAVE_EXEC_MMAPPER) +#if defined(ERTS_ALC_A_EXEC) /* - * For exec_alloc on non-amd64 that just need memory with PROT_EXEC + * For exec_alloc that need memory with PROT_EXEC */ void* erts_alcu_exec_mseg_alloc(Allctr_t *allctr, Uint *size_p, Uint flags) @@ -994,7 +1097,7 @@ erts_alcu_exec_mseg_dealloc(Allctr_t *allctr, void *seg, Uint size, Uint flags) ASSERT(r == 0); (void)r; erts_alcu_mseg_dealloc(allctr, seg, size, flags); } -#endif /* ERTS_ALC_A_EXEC && !ERTS_HAVE_EXEC_MMAPPER */ +#endif /* ERTS_ALC_A_EXEC */ #endif /* HAVE_ERTS_MSEG */ @@ -1060,7 +1163,7 @@ erts_alcu_literal_32_sys_alloc(Allctr_t *allctr, Uint* size_p, int superalign) Uint size = ERTS_SUPERALIGNED_CEILING(*size_p); ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); res = erts_alcu_sys_alloc(allctr, &size, 1); if (res) { @@ -1078,7 +1181,7 @@ erts_alcu_literal_32_sys_realloc(Allctr_t *allctr, void *ptr, Uint* size_p, Uint ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); if (ptr && old_size) clear_literal_range(ptr, old_size); @@ -1095,7 +1198,7 @@ erts_alcu_literal_32_sys_dealloc(Allctr_t *allctr, void *ptr, Uint size, int sup { ERTS_LC_ASSERT(allctr->alloc_no == ERTS_ALC_A_LITERAL && allctr->t == 0); - ERTS_SMP_LC_ASSERT(allctr->thread_safe); + ERTS_LC_ASSERT(allctr->thread_safe); erts_alcu_sys_dealloc(allctr, ptr, size, 1); @@ -1197,8 +1300,6 @@ unlink_carrier(CarrierList_t *cl, Carrier_t *crr) #endif } -#ifdef ERTS_SMP - static ERTS_INLINE int is_abandoned(Carrier_t *crr) { return crr->cpool.state != ERTS_MBC_IS_HOME; @@ -1222,18 +1323,17 @@ clear_busy_pool_carrier(Allctr_t *allctr, Carrier_t *crr) max_size = (erts_aint_t) allctr->largest_fblk_in_mbc(allctr, crr); erts_atomic_set_nob(&crr->cpool.max_size, max_size); - iallctr = erts_smp_atomic_read_nob(&crr->allctr); + iallctr = erts_atomic_read_nob(&crr->allctr); ERTS_ALC_CPOOL_ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == ((erts_aint_t)allctr | ERTS_CRR_ALCTR_FLG_IN_POOL | ERTS_CRR_ALCTR_FLG_BUSY)); iallctr &= ~ERTS_CRR_ALCTR_FLG_BUSY; - erts_smp_atomic_set_relb(&crr->allctr, iallctr); + erts_atomic_set_relb(&crr->allctr, iallctr); } } -#endif /* ERTS_SMP */ #if 0 #define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B) \ @@ -1255,32 +1355,9 @@ chk_fix_list(Allctr_t *allctr, ErtsAlcFixList_t *fix, int ix, int before) #define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B) #endif +static ERTS_INLINE Allctr_t *get_pref_allctr(void *extra); static void *mbc_alloc(Allctr_t *allctr, Uint size); -#ifdef ERTS_SMP -typedef struct { - ErtsAllctrDDBlock_t ddblock__; /* must be first */ - ErtsAlcType_t fix_type; -} ErtsAllctrFixDDBlock_t; -#endif - -#define ERTS_ALC_FIX_NO_UNUSE (((ErtsAlcType_t) 1) << ERTS_ALC_N_BITS) - -static ERTS_INLINE void -dealloc_fix_block(Allctr_t *allctr, - ErtsAlcType_t type, - void *ptr, - ErtsAlcFixList_t *fix, - int dec_cc_on_redirect) -{ -#ifdef ERTS_SMP - /* May be redirected... */ - ASSERT((type & ERTS_ALC_FIX_NO_UNUSE) == 0); - ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type = type | ERTS_ALC_FIX_NO_UNUSE; -#endif - dealloc_block(allctr, ptr, fix, dec_cc_on_redirect); -} - static ERTS_INLINE void sched_fix_shrink(Allctr_t *allctr, int on) { @@ -1311,12 +1388,10 @@ fix_cpool_check_shrink(Allctr_t *allctr, fix->u.cpool.shrink_list = 0; else { void *p; -#ifdef ERTS_SMP if (busy_pcrr_pp) { clear_busy_pool_carrier(allctr, *busy_pcrr_pp); *busy_pcrr_pp = NULL; } -#endif fix->u.cpool.shrink_list--; p = fix->list; fix->list = *((void **) p); @@ -1324,7 +1399,7 @@ fix_cpool_check_shrink(Allctr_t *allctr, if (fix->u.cpool.min_list_size > fix->list_size) fix->u.cpool.min_list_size = fix->list_size; - dealloc_fix_block(allctr, type, p, fix, 0); + dealloc_block(allctr, type, DEALLOC_FLG_FIX_SHRINK, p, fix); } } } @@ -1335,11 +1410,9 @@ fix_cpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size) void *res; ErtsAlcFixList_t *fix; - ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type - && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE); - - fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE]; - ASSERT(size == fix->type_size); + fix = &allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)]; + ASSERT(type == fix->type && size == fix->type_size); + ASSERT(size >= sizeof(ErtsAllctrDDBlock_t)); res = fix->list; if (res) { @@ -1368,21 +1441,39 @@ fix_cpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size) static ERTS_INLINE void fix_cpool_free(Allctr_t *allctr, ErtsAlcType_t type, + Uint32 flags, void *p, - Carrier_t **busy_pcrr_pp, - int unuse) + Carrier_t **busy_pcrr_pp) { ErtsAlcFixList_t *fix; + Allctr_t *fix_allctr; - ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type - && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE); + /* If this isn't a fix allocator we need to update the fix list of our + * neighboring fix_alloc to keep the statistics consistent. */ + if (!allctr->fix) { + ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[ERTS_ALC_A_FIXED_SIZE]; + fix_allctr = get_pref_allctr(tspec); + ASSERT(!fix_allctr->thread_safe); + ASSERT(allctr != fix_allctr); + } + else { + fix_allctr = allctr; + } - fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE]; + ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(fix_allctr)); + ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(allctr)); - if (unuse) - fix->u.cpool.used--; + fix = &fix_allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)]; + ASSERT(type == fix->type); - if ((!busy_pcrr_pp || !*busy_pcrr_pp) + if (!(flags & DEALLOC_FLG_FIX_SHRINK)) { + fix->u.cpool.used--; + } + + /* We don't want foreign blocks to be long-lived, so we skip recycling if + * allctr != fix_allctr. */ + if (allctr == fix_allctr + && (!busy_pcrr_pp || !*busy_pcrr_pp) && !fix->u.cpool.shrink_list && fix->list_size < ERTS_ALCU_FIX_MAX_LIST_SZ) { *((void **) p) = fix->list; @@ -1395,7 +1486,7 @@ fix_cpool_free(Allctr_t *allctr, if (IS_SBC_BLK(blk)) destroy_carrier(allctr, blk, NULL); else - mbc_free(allctr, p, busy_pcrr_pp); + mbc_free(allctr, type, p, busy_pcrr_pp); fix->u.cpool.allocated--; fix_cpool_check_shrink(allctr, type, fix, busy_pcrr_pp); } @@ -1409,10 +1500,8 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) int ix, o; int flush = flgs == 0; -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); -#endif for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) { ErtsAlcFixList_t *fix = &allctr->fix[ix]; @@ -1424,7 +1513,7 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) fix->u.cpool.shrink_list = fix->u.cpool.min_list_size; fix->u.cpool.min_list_size = fix->list_size; } - type = (ErtsAlcType_t) (ix + ERTS_ALC_N_MIN_A_FIXED_SIZE); + type = ERTS_ALC_N2T((ErtsAlcType_t) (ix + ERTS_ALC_N_MIN_A_FIXED_SIZE)); for (o = 0; o < ERTS_ALC_FIX_MAX_SHRINK_OPS || flush; o++) { void *ptr; @@ -1438,7 +1527,7 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) fix->list = *((void **) ptr); fix->list_size--; fix->u.cpool.shrink_list--; - dealloc_fix_block(allctr, type, ptr, fix, 0); + dealloc_block(allctr, type, DEALLOC_FLG_FIX_SHRINK, ptr, fix); } if (fix->u.cpool.min_list_size > fix->list_size) fix->u.cpool.min_list_size = fix->list_size; @@ -1452,10 +1541,8 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) if (all_empty) sched_fix_shrink(allctr, 0); -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); -#endif return res; } @@ -1466,11 +1553,9 @@ fix_nocpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size) ErtsAlcFixList_t *fix; void *res; - ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type - && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE); - - fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE]; - ASSERT(size == fix->type_size); + fix = &allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)]; + ASSERT(type == fix->type && size == fix->type_size); + ASSERT(size >= sizeof(ErtsAllctrDDBlock_t)); ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1); fix->u.nocpool.used++; @@ -1487,7 +1572,7 @@ fix_nocpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size) if (IS_SBC_BLK(blk)) destroy_carrier(allctr, blk, NULL); else - mbc_free(allctr, p, NULL); + mbc_free(allctr, type, p, NULL); fix->u.nocpool.allocated--; } ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0); @@ -1522,10 +1607,8 @@ fix_nocpool_free(Allctr_t *allctr, Block_t *blk; ErtsAlcFixList_t *fix; - ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type - && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE); - - fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE]; + fix = &allctr->fix[ERTS_ALC_T2N(type) - ERTS_ALC_N_MIN_A_FIXED_SIZE]; + ASSERT(fix->type == type); ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1); fix->u.nocpool.used--; @@ -1544,7 +1627,7 @@ fix_nocpool_free(Allctr_t *allctr, if (IS_SBC_BLK(blk)) destroy_carrier(allctr, blk, NULL); else - mbc_free(allctr, p, NULL); + mbc_free(allctr, type, p, NULL); p = fix->list; fix->list = *((void **) p); fix->list_size--; @@ -1555,7 +1638,7 @@ fix_nocpool_free(Allctr_t *allctr, if (IS_SBC_BLK(blk)) destroy_carrier(allctr, blk, NULL); else - mbc_free(allctr, p, NULL); + mbc_free(allctr, type, p, NULL); ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0); } @@ -1567,10 +1650,8 @@ fix_nocpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) int ix, o; int flush = flgs == 0; -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); -#endif for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) { ErtsAlcFixList_t *fix = &allctr->fix[ix]; @@ -1598,7 +1679,7 @@ fix_nocpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) ptr = fix->list; fix->list = *((void **) ptr); fix->list_size--; - dealloc_block(allctr, ptr, NULL, 0); + dealloc_block(allctr, fix->type, 0, ptr, NULL); fix->u.nocpool.allocated--; } if (fix->list_size != 0) { @@ -1612,10 +1693,8 @@ fix_nocpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs) if (all_empty) sched_fix_shrink(allctr, 0); -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); -#endif return res; } @@ -1641,8 +1720,8 @@ dealloc_mbc(Allctr_t *allctr, Carrier_t *crr) dealloc_carrier(allctr, crr, 1); } -#ifdef ERTS_SMP +static UWord allctr_abandon_limit(Allctr_t *allctr); static void set_new_allctr_abandon_limit(Allctr_t*); static void abandon_carrier(Allctr_t*, Carrier_t*); static void poolify_my_carrier(Allctr_t*, Carrier_t*); @@ -1687,7 +1766,7 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, crr = BLK_TO_SBC(blk); if (sizep) *sizep = SBC_BLK_SZ(blk) - ABLK_HDR_SZ; - iallctr = erts_smp_atomic_read_dirty(&crr->allctr); + iallctr = erts_atomic_read_dirty(&crr->allctr); } else { crr = ABLK_TO_MBC(blk); @@ -1695,10 +1774,10 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, if (sizep) *sizep = MBC_ABLK_SZ(blk) - ABLK_HDR_SZ; if (!ERTS_ALC_IS_CPOOL_ENABLED(pref_allctr)) - iallctr = erts_smp_atomic_read_dirty(&crr->allctr); + iallctr = erts_atomic_read_dirty(&crr->allctr); else { int locked_pref_allctr = 0; - iallctr = erts_smp_atomic_read_ddrb(&crr->allctr); + iallctr = erts_atomic_read_ddrb(&crr->allctr); if (ERTS_ALC_TS_PREF_LOCK_IF_USED == pref_lock && pref_allctr->thread_safe) { @@ -1722,14 +1801,14 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, * We need a mathing read barrier to guarantee a correct view * of the carrier for deallocation work. */ - act = erts_smp_atomic_cmpxchg_rb(&crr->allctr, - iallctr|ERTS_CRR_ALCTR_FLG_BUSY, - iallctr); + act = erts_atomic_cmpxchg_rb(&crr->allctr, + iallctr|ERTS_CRR_ALCTR_FLG_BUSY, + iallctr); } else { - act = erts_smp_atomic_cmpxchg_ddrb(&crr->allctr, - iallctr|ERTS_CRR_ALCTR_FLG_BUSY, - iallctr); + act = erts_atomic_cmpxchg_ddrb(&crr->allctr, + iallctr|ERTS_CRR_ALCTR_FLG_BUSY, + iallctr); } if (act == iallctr) { *busy_pcrr_pp = crr; @@ -1764,7 +1843,7 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep, static void init_dd_queue(ErtsAllctrDDQueue_t *ddq) { - erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL); + erts_atomic_init_nob(&ddq->tail.data.marker.u.atmc_next, ERTS_AINT_NULL); erts_atomic_init_nob(&ddq->tail.data.last, (erts_aint_t) &ddq->tail.data.marker); erts_atomic_init_nob(&ddq->tail.data.um_refc[0], 0); @@ -1785,17 +1864,17 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) erts_aint_t itmp; ErtsAllctrDDBlock_t *enq, *this = ptr; - erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL); + erts_atomic_init_nob(&this->u.atmc_next, ERTS_AINT_NULL); /* Enqueue at end of list... */ enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last); - itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next, + itmp = erts_atomic_cmpxchg_relb(&enq->u.atmc_next, (erts_aint_t) this, ERTS_AINT_NULL); if (itmp == ERTS_AINT_NULL) { /* We are required to move last pointer */ #ifdef DEBUG - ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next)); + ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->u.atmc_next)); ASSERT(((erts_aint_t) enq) == erts_atomic_xchg_relb(&ddq->tail.data.last, (erts_aint_t) this)); @@ -1813,8 +1892,8 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) while (1) { erts_aint_t itmp2; - erts_atomic_set_nob(&this->atmc_next, itmp); - itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next, + erts_atomic_set_nob(&this->u.atmc_next, itmp); + itmp2 = erts_atomic_cmpxchg_relb(&enq->u.atmc_next, (erts_aint_t) this, itmp); if (itmp == itmp2) @@ -1823,7 +1902,7 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit) itmp = itmp2; else { enq = (ErtsAllctrDDBlock_t *) itmp2; - itmp = erts_atomic_read_acqb(&enq->atmc_next); + itmp = erts_atomic_read_acqb(&enq->u.atmc_next); ASSERT(itmp != ERTS_AINT_NULL); } i++; @@ -1839,8 +1918,8 @@ check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast) erts_aint_t itmp; ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast; - erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL); - itmp = erts_atomic_cmpxchg_relb(&last->atmc_next, + erts_atomic_init_nob(&ddq->tail.data.marker.u.atmc_next, ERTS_AINT_NULL); + itmp = erts_atomic_cmpxchg_relb(&last->u.atmc_next, (erts_aint_t) &ddq->tail.data.marker, ERTS_AINT_NULL); if (itmp == ERTS_AINT_NULL) { @@ -1891,7 +1970,7 @@ ddq_dequeue(ErtsAllctrDDQueue_t *ddq) ASSERT(ddq->head.used_marker); ddq->head.used_marker = 0; blk = ((ErtsAllctrDDBlock_t *) - erts_atomic_read_nob(&blk->atmc_next)); + erts_atomic_read_nob(&blk->u.atmc_next)); if (blk == ddq->head.unref_end) { ddq->head.first = blk; return NULL; @@ -1899,7 +1978,7 @@ ddq_dequeue(ErtsAllctrDDQueue_t *ddq) } ddq->head.first = ((ErtsAllctrDDBlock_t *) - erts_atomic_read_nob(&blk->atmc_next)); + erts_atomic_read_nob(&blk->u.atmc_next)); ASSERT(ddq->head.first); @@ -1961,19 +2040,13 @@ check_pending_dealloc_carrier(Allctr_t *allctr, int *need_more_work); static void -handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr) +handle_delayed_fix_dealloc(Allctr_t *allctr, ErtsAlcType_t type, Uint32 flags, + void *ptr) { - ErtsAlcType_t type; - - type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type; - - ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE - <= (type & ~ERTS_ALC_FIX_NO_UNUSE)); - ASSERT((type & ~ERTS_ALC_FIX_NO_UNUSE) - <= ERTS_ALC_N_MAX_A_FIXED_SIZE); + ASSERT(ERTS_ALC_IS_FIX_TYPE(type)); if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr)) - fix_nocpool_free(allctr, (type & ~ERTS_ALC_FIX_NO_UNUSE), ptr); + fix_nocpool_free(allctr, type, ptr); else { Block_t *blk = UMEM2BLK(ptr); Carrier_t *busy_pcrr_p; @@ -1988,20 +2061,24 @@ handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr) NULL, &busy_pcrr_p); if (used_allctr == allctr) { doit: - fix_cpool_free(allctr, (type & ~ERTS_ALC_FIX_NO_UNUSE), - ptr, &busy_pcrr_p, - !(type & ERTS_ALC_FIX_NO_UNUSE)); + fix_cpool_free(allctr, type, flags, ptr, &busy_pcrr_p); clear_busy_pool_carrier(allctr, busy_pcrr_p); } else { /* Carrier migrated; need to redirect block to new owner... */ - int cinit = used_allctr->dd.ix - allctr->dd.ix; + ErtsAllctrDDBlock_t *dd_block; + int cinit; + + dd_block = (ErtsAllctrDDBlock_t*)ptr; + dd_block->flags = flags; + dd_block->type = type; ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); DEC_CC(allctr->calls.this_free); - ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type = type; + cinit = used_allctr->dd.ix - allctr->dd.ix; + if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(used_allctr->ix); } @@ -2025,7 +2102,6 @@ handle_delayed_dealloc(Allctr_t *allctr, int need_mr_wrk = 0; int have_checked_incoming = 0; int ops = 0; - ErtsAlcFixList_t *fix; int res; ErtsAllctrDDQueue_t *ddq; @@ -2034,8 +2110,6 @@ handle_delayed_dealloc(Allctr_t *allctr, ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); - fix = allctr->fix; - ddq = &allctr->dd.q; res = 0; @@ -2087,7 +2161,7 @@ handle_delayed_dealloc(Allctr_t *allctr, ERTS_ALC_CPOOL_ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(allctr)); ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); - iallctr = erts_smp_atomic_read_nob(&crr->allctr); + iallctr = erts_atomic_read_nob(&crr->allctr); ASSERT(iallctr & ERTS_CRR_ALCTR_FLG_HOMECOMING); while (1) { if ((iallctr & (~ERTS_CRR_ALCTR_FLG_MASK | @@ -2097,7 +2171,7 @@ handle_delayed_dealloc(Allctr_t *allctr, * Carrier is home (mine and not in pool) */ ASSERT(!(iallctr & ERTS_CRR_ALCTR_FLG_BUSY)); - erts_smp_atomic_set_nob(&crr->allctr, (erts_aint_t)allctr); + erts_atomic_set_nob(&crr->allctr, (erts_aint_t)allctr); if (IS_FREE_LAST_MBC_BLK(first_blk)) dealloc_my_carrier(allctr, crr); else @@ -2107,7 +2181,7 @@ handle_delayed_dealloc(Allctr_t *allctr, erts_aint_t exp = iallctr; erts_aint_t want = iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING; - iallctr = erts_smp_atomic_cmpxchg_nob(&crr->allctr, + iallctr = erts_atomic_cmpxchg_nob(&crr->allctr, want, exp); if (iallctr != exp) @@ -2124,16 +2198,27 @@ handle_delayed_dealloc(Allctr_t *allctr, } } else { + ErtsAllctrDDBlock_t *dd_block; + ErtsAlcType_t type; + Uint32 flags; + + dd_block = (ErtsAllctrDDBlock_t*)ptr; + flags = dd_block->flags; + type = dd_block->type; + + flags |= DEALLOC_FLG_REDIRECTED; + ASSERT(IS_SBC_BLK(blk) || (ABLK_TO_MBC(blk) != ErtsContainerStruct(blk, Carrier_t, cpool.homecoming_dd.blk))); INC_CC(allctr->calls.this_free); - if (fix) - handle_delayed_fix_dealloc(allctr, ptr); - else - dealloc_block(allctr, ptr, NULL, 1); + if (ERTS_ALC_IS_FIX_TYPE(type)) { + handle_delayed_fix_dealloc(allctr, type, flags, ptr); + } else { + dealloc_block(allctr, type, flags, ptr, NULL); + } } } @@ -2161,8 +2246,10 @@ enqueue_dealloc_other_instance(ErtsAlcType_t type, void *ptr, int cinit) { - if (allctr->fix) - ((ErtsAllctrFixDDBlock_t*) ptr)->fix_type = type; + ErtsAllctrDDBlock_t *dd_block = ((ErtsAllctrDDBlock_t*)ptr); + + dd_block->type = type; + dd_block->flags = 0; if (ddq_enqueue(&allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(allctr->ix); @@ -2192,10 +2279,7 @@ check_abandon_carrier(Allctr_t *allctr, Block_t *fblk, Carrier_t **busy_pcrr_pp) if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr)) return; - allctr->cpool.check_limit_count--; - if (--allctr->cpool.check_limit_count <= 0) - set_new_allctr_abandon_limit(allctr); - + ASSERT(allctr->cpool.abandon_limit == allctr_abandon_limit(allctr)); ASSERT(erts_thr_progress_is_managed_thread()); if (allctr->cpool.disable_abandon) @@ -2213,7 +2297,7 @@ check_abandon_carrier(Allctr_t *allctr, Block_t *fblk, Carrier_t **busy_pcrr_pp) if (allctr->main_carrier == crr) return; - if (crr->cpool.blocks_size > crr->cpool.abandon_limit) + if (crr->cpool.total_blocks_size > crr->cpool.abandon_limit) return; if (crr->cpool.thr_prgr != ERTS_THR_PRGR_INVALID @@ -2243,37 +2327,32 @@ erts_alcu_check_delayed_dealloc(Allctr_t *allctr, thr_prgr_p, more_work); } -#endif #define ERTS_ALCU_HANDLE_DD_IN_OP(Allctr, Locked) \ handle_delayed_dealloc((Allctr), (Locked), 1, \ ERTS_ALCU_DD_OPS_LIM_LOW, NULL, NULL, NULL) static void -dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_redirect) +dealloc_block(Allctr_t *allctr, ErtsAlcType_t type, Uint32 flags, void *ptr, + ErtsAlcFixList_t *fix) { Block_t *blk = UMEM2BLK(ptr); - ERTS_SMP_LC_ASSERT(!allctr->thread_safe + ASSERT(!fix || type == fix->type); + + ERTS_LC_ASSERT(!allctr->thread_safe || erts_lc_mtx_is_locked(&allctr->mutex)); if (IS_SBC_BLK(blk)) { destroy_carrier(allctr, blk, NULL); -#ifdef ERTS_SMP if (fix && ERTS_ALC_IS_CPOOL_ENABLED(allctr)) { - ErtsAlcType_t type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type; - if (!(type & ERTS_ALC_FIX_NO_UNUSE)) + if (!(flags & DEALLOC_FLG_FIX_SHRINK)) fix->u.cpool.used--; fix->u.cpool.allocated--; } -#endif } -#ifndef ERTS_SMP - else - mbc_free(allctr, ptr, NULL); -#else else if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr)) - mbc_free(allctr, ptr, NULL); + mbc_free(allctr, type, ptr, NULL); else { Carrier_t *busy_pcrr_p; Allctr_t *used_allctr; @@ -2282,27 +2361,33 @@ dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_ NULL, &busy_pcrr_p); if (used_allctr == allctr) { if (fix) { - ErtsAlcType_t type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type; - if (!(type & ERTS_ALC_FIX_NO_UNUSE)) + if (!(flags & DEALLOC_FLG_FIX_SHRINK)) fix->u.cpool.used--; fix->u.cpool.allocated--; } - mbc_free(allctr, ptr, &busy_pcrr_p); + mbc_free(allctr, type, ptr, &busy_pcrr_p); clear_busy_pool_carrier(allctr, busy_pcrr_p); } else { /* Carrier migrated; need to redirect block to new owner... */ - int cinit = used_allctr->dd.ix - allctr->dd.ix; + ErtsAllctrDDBlock_t *dd_block; + int cinit; + + dd_block = (ErtsAllctrDDBlock_t*)ptr; + dd_block->flags = flags; + dd_block->type = type; ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p); - if (dec_cc_on_redirect) + if (flags & DEALLOC_FLG_REDIRECTED) DEC_CC(allctr->calls.this_free); + + cinit = used_allctr->dd.ix - allctr->dd.ix; + if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit)) erts_alloc_notify_delayed_dealloc(used_allctr->ix); } } -#endif } /* Multi block carrier alloc/realloc/free ... */ @@ -2464,9 +2549,155 @@ mbc_alloc(Allctr_t *allctr, Uint size) return BLK2UMEM(blk); } +typedef struct { + char *ptr; + UWord size; +} ErtsMemDiscardRegion; + +/* Construct a discard region for the user memory of a free block, letting the + * OS reclaim its physical memory when required. + * + * Note that we're ignoring both the footer and everything that comes before + * the minimum block size as the allocator uses those areas to manage the + * block. */ +static void ERTS_INLINE +mem_discard_start(Allctr_t *allocator, Block_t *block, + ErtsMemDiscardRegion *out) +{ + UWord size = BLK_SZ(block); + + ASSERT(size >= allocator->min_block_size); + + if (size > (allocator->min_block_size + FBLK_FTR_SZ)) { + out->size = size - allocator->min_block_size - FBLK_FTR_SZ; + } else { + out->size = 0; + } + + out->ptr = (char*)block + allocator->min_block_size; +} + +/* Expands a discard region into a neighboring free block, allowing us to + * discard the block header and first page. + * + * This is very important in small-allocation scenarios where no single block + * is large enough to be discarded on its own. */ +static void ERTS_INLINE +mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor, + ErtsMemDiscardRegion *region) +{ + char *neighbor_start; + + ASSERT(IS_FREE_BLK(neighbor)); + + neighbor_start = (char*)neighbor; + + if (region->ptr >= neighbor_start) { + char *region_start_page; + + region_start_page = region->ptr - SYS_PAGE_SIZE; + region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK); + + /* Expand if our first page begins within the previous free block's + * unused data. */ + if (region_start_page >= (neighbor_start + allocator->min_block_size)) { + region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ; + region->ptr = region_start_page; + } + } else { + char *region_end_page; + UWord neighbor_size; + + ASSERT(region->ptr <= neighbor_start); + + region_end_page = region->ptr + region->size + SYS_PAGE_SIZE; + region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK); + + neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ; + + /* Expand if our last page ends anywhere within the next free block, + * sans the footer we'll inherit. */ + if (region_end_page < neighbor_start + neighbor_size) { + region->size += region_end_page - (region->ptr + region->size); + } + } +} + +static void ERTS_INLINE +mem_discard_finish(Allctr_t *allocator, Block_t *block, + ErtsMemDiscardRegion *region) +{ +#ifdef DEBUG + char *block_start, *block_end; + UWord block_size; + + block_size = BLK_SZ(block); + + /* Ensure that the region is completely covered by the legal area of the + * free block. This must hold even when the region is too small to be + * discarded. */ + if (region->size > 0) { + ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ); + + block_start = (char*)block + allocator->min_block_size; + block_end = (char*)block + block_size - FBLK_FTR_SZ; + + ASSERT(region->size == 0 || + (region->ptr + region->size <= block_end && + region->ptr >= block_start && + region->size <= block_size)); + } +#else + (void)allocator; + (void)block; +#endif + + if (region->size > SYS_PAGE_SIZE) { + UWord align_offset, size; + char *ptr; + + align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK); + + size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK; + ptr = region->ptr + align_offset; + + if (size > 0) { + ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK)); + ASSERT(!(size & SYS_PAGE_SZ_MASK)); + + erts_mem_discard(ptr, size); + } + } +} + +static void +carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier) +{ + static const int MAX_BLOCKS_TO_DISCARD = 100; + Block_t *block; + int i; + + block = allocator->first_fblk_in_mbc(allocator, carrier); + i = 0; + + while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) { + ErtsMemDiscardRegion region; + + ASSERT(IS_FREE_BLK(block)); + + mem_discard_start(allocator, block, ®ion); + mem_discard_finish(allocator, block, ®ion); + + block = allocator->next_fblk_in_mbc(allocator, carrier, block); + i++; + } +} + static void -mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) +mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp) { + ErtsMemDiscardRegion discard_region = {0}; + int discard; Uint is_first_blk; Uint is_last_blk; Uint blk_sz; @@ -2482,12 +2713,28 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) ASSERT(IS_MBC_BLK(blk)); ASSERT(blk_sz >= allctr->min_block_size); +#ifndef DEBUG + /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly + * expensive operation which doesn't do much good if we use it again soon + * after, so we limit it to deallocations on pooled carriers. */ + discard = busy_pcrr_pp && *busy_pcrr_pp; +#else + /* Always discard in debug mode, regardless of whether we're in the pool or + * not. */ + discard = 1; +#endif + + if (discard) { + mem_discard_start(allctr, blk, &discard_region); + } + HARD_CHECK_BLK_CARRIER(allctr, blk); crr = ABLK_TO_MBC(blk); ERTS_ALC_CPOOL_FREE_OP(allctr); - STAT_MBC_BLK_FREE(allctr, crr, busy_pcrr_pp, blk_sz, alcu_flgs); + + STAT_MBC_BLK_FREE(allctr, type, crr, busy_pcrr_pp, blk_sz, alcu_flgs); is_first_blk = IS_MBC_FIRST_ABLK(allctr, blk); is_last_blk = IS_LAST_BLK(blk); @@ -2498,6 +2745,10 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) blk = PREV_BLK(blk); (*allctr->unlink_free_block)(allctr, blk); + if (discard) { + mem_discard_coalesce(allctr, blk, &discard_region); + } + blk_sz += MBC_FBLK_SZ(blk); is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk); SET_MBC_FBLK_SZ(blk, blk_sz); @@ -2513,6 +2764,11 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) if (IS_FREE_BLK(nxt_blk)) { /* Coalesce with next block... */ (*allctr->unlink_free_block)(allctr, nxt_blk); + + if (discard) { + mem_discard_coalesce(allctr, nxt_blk, &discard_region); + } + blk_sz += MBC_FBLK_SZ(nxt_blk); SET_MBC_FBLK_SZ(blk, blk_sz); @@ -2548,18 +2804,22 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) else { (*allctr->link_free_block)(allctr, blk); HARD_CHECK_BLK_CARRIER(allctr, blk); -#ifdef ERTS_SMP - if (busy_pcrr_pp && *busy_pcrr_pp) + + if (discard) { + mem_discard_finish(allctr, blk, &discard_region); + } + + if (busy_pcrr_pp && *busy_pcrr_pp) { update_pooled_tree(allctr, crr, blk_sz); - else + } else { check_abandon_carrier(allctr, blk, busy_pcrr_pp); -#endif + } } } static void * -mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, - Carrier_t **busy_pcrr_pp) +mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size, + Uint32 alcu_flgs, Carrier_t **busy_pcrr_pp) { void *new_p; Uint old_blk_sz; @@ -2587,7 +2847,6 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, return NULL; #else /* !MBC_REALLOC_ALWAYS_MOVES */ -#ifdef ERTS_SMP if (busy_pcrr_pp && *busy_pcrr_pp) { /* * Don't want to use carrier in pool @@ -2598,10 +2857,9 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, new_blk = UMEM2BLK(new_p); ASSERT(!(IS_MBC_BLK(new_blk) && ABLK_TO_MBC(new_blk) == *busy_pcrr_pp)); sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ)); - mbc_free(allctr, p, busy_pcrr_pp); + mbc_free(allctr, type, p, busy_pcrr_pp); return new_p; } -#endif get_blk_sz = blk_sz = UMEMSZ2BLKSZ(allctr, size); @@ -2676,7 +2934,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, crr = ABLK_TO_MBC(blk); ERTS_ALC_CPOOL_REALLOC_OP(allctr); - STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs); + STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs); STAT_MBC_BLK_ALLOC(allctr, crr, blk_sz, alcu_flgs); ASSERT(MBC_BLK_SZ(blk) >= allctr->min_block_size); @@ -2722,9 +2980,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, HARD_CHECK_BLK_CARRIER(allctr, blk); -#ifdef ERTS_SMP check_abandon_carrier(allctr, nxt_blk, NULL); -#endif return p; } @@ -2782,7 +3038,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, } ERTS_ALC_CPOOL_REALLOC_OP(allctr); - STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs); + STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs); STAT_MBC_BLK_ALLOC(allctr, crr, blk_sz, alcu_flgs); ASSERT(IS_ALLOCED_BLK(blk)); @@ -2843,7 +3099,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, if (!new_p) return NULL; sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ)); - mbc_free(allctr, p, busy_pcrr_pp); + mbc_free(allctr, type, p, busy_pcrr_pp); return new_p; @@ -2873,7 +3129,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, 1); new_p = BLK2UMEM(new_blk); sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ)); - mbc_free(allctr, p, NULL); + mbc_free(allctr, type, p, NULL); return new_p; } else { @@ -2930,7 +3186,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, 0); ERTS_ALC_CPOOL_FREE_OP(allctr); - STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs); + STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs); return new_p; } @@ -2938,11 +3194,9 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, #endif /* !MBC_REALLOC_ALWAYS_MOVES */ } -#ifdef ERTS_SMP #define ERTS_ALC_MAX_DEALLOC_CARRIER 10 #define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT 100 -#define ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT 100 #define ERTS_ALC_CPOOL_MAX_FAILED_STAT_READS 3 #define ERTS_ALC_CPOOL_PTR_MOD_MRK (((erts_aint_t) 1) << 0) @@ -2969,14 +3223,11 @@ typedef union { # error "Carrier pool implementation assumes ERTS_ALC_A_MIN > ERTS_ALC_A_INVALID" #endif -/* - * The pool is only allowed to be manipulated by managed - * threads except in the alloc_SUITE:cpool case. In this - * test case carrier_pool[ERTS_ALC_A_INVALID] will be - * used. - */ +/* The pools are only allowed to be manipulated by managed threads except in + * the alloc_SUITE:cpool test, where only test_carrier_pool is used. */ -static ErtsAlcCrrPool_t carrier_pool[ERTS_ALC_A_MAX+1] erts_align_attribute(ERTS_CACHE_LINE_SIZE); +static ErtsAlcCrrPool_t firstfit_carrier_pool; +static ErtsAlcCrrPool_t test_carrier_pool; #define ERTS_ALC_CPOOL_MAX_BACKOFF (1 << 8) @@ -2997,12 +3248,12 @@ backoff(int n) static int cpool_dbg_is_in_pool(Allctr_t *allctr, Carrier_t *crr) { - ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel; + ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel; ErtsAlcCPoolData_t *cpdp = sentinel; Carrier_t *tmp_crr; while (1) { - cpdp = (ErtsAlcCPoolData_t *) (erts_atomic_read_ddrb(&cpdp->next) & ~FLG_MASK); + cpdp = (ErtsAlcCPoolData_t *) (erts_atomic_read_ddrb(&cpdp->next) & ~CRR_FLG_MASK); if (cpdp == sentinel) return 0; tmp_crr = (Carrier_t *) (((char *) cpdp) - offsetof(Carrier_t, cpool)); @@ -3014,7 +3265,7 @@ cpool_dbg_is_in_pool(Allctr_t *allctr, Carrier_t *crr) static int cpool_is_empty(Allctr_t *allctr) { - ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel; + ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel; return ((erts_atomic_read_rb(&sentinel->next) == (erts_aint_t) sentinel) && (erts_atomic_read_rb(&sentinel->prev) == (erts_aint_t) sentinel)); } @@ -3104,16 +3355,31 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr) { ErtsAlcCPoolData_t *cpd1p, *cpd2p; erts_aint_t val; - ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel; + ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel; Allctr_t *orig_allctr = crr->cpool.orig_allctr; - ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */ + ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */ || erts_thr_progress_is_managed_thread()); - erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size, - (erts_aint_t) crr->cpool.blocks_size); - erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks, - (erts_aint_t) crr->cpool.blocks); + { + int alloc_no = allctr->alloc_no; + + ERTS_ALC_CPOOL_ASSERT( + erts_atomic_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no]) >= 0 && + crr->cpool.blocks_size[alloc_no] >= 0); + + ERTS_ALC_CPOOL_ASSERT( + erts_atomic_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]) >= 0 && + crr->cpool.blocks[alloc_no] >= 0); + + /* We only modify the counter for our current type since the others are + * conceptually still in the pool. */ + erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no], + ((erts_aint_t) crr->cpool.blocks_size[alloc_no])); + erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no], + ((erts_aint_t) crr->cpool.blocks[alloc_no])); + } + erts_atomic_add_nob(&orig_allctr->cpool.stat.carriers_size, (erts_aint_t) CARRIER_SZ(crr)); erts_atomic_inc_nob(&orig_allctr->cpool.stat.no_carriers); @@ -3186,10 +3452,10 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr) ErtsAlcCPoolData_t *cpd1p, *cpd2p; erts_aint_t val; #ifdef ERTS_ALC_CPOOL_DEBUG - ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel; + ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel; #endif - ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */ + ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */ || erts_thr_progress_is_managed_thread()); ERTS_ALC_CPOOL_ASSERT(sentinel != &crr->cpool); @@ -3265,28 +3531,43 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr) crr->cpool.thr_prgr = erts_thr_progress_later(NULL); - erts_atomic_add_nob(&prev_allctr->cpool.stat.blocks_size, - -((erts_aint_t) crr->cpool.blocks_size)); - erts_atomic_add_nob(&prev_allctr->cpool.stat.no_blocks, - -((erts_aint_t) crr->cpool.blocks)); - erts_atomic_add_nob(&prev_allctr->cpool.stat.carriers_size, + { + Allctr_t *orig_allctr = crr->cpool.orig_allctr; + int alloc_no = allctr->alloc_no; + + ERTS_ALC_CPOOL_ASSERT(orig_allctr == prev_allctr); + + ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size[alloc_no] <= + erts_atomic_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no])); + + ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks[alloc_no] <= + erts_atomic_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no])); + + /* We only modify the counters for our current type since the others + * were, conceptually, never taken out of the pool. */ + erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no], + -((erts_aint_t) crr->cpool.blocks_size[alloc_no])); + erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no], + -((erts_aint_t) crr->cpool.blocks[alloc_no])); + + erts_atomic_add_nob(&orig_allctr->cpool.stat.carriers_size, -((erts_aint_t) CARRIER_SZ(crr))); - erts_atomic_dec_wb(&prev_allctr->cpool.stat.no_carriers); + erts_atomic_dec_wb(&orig_allctr->cpool.stat.no_carriers); + } } static Carrier_t * cpool_fetch(Allctr_t *allctr, UWord size) { - enum { IGNORANT, HAS_SEEN_SENTINEL, THE_LAST_ONE } loop_state; - int i; + int i, seen_sentinel; Carrier_t *crr; Carrier_t *reinsert_crr = NULL; ErtsAlcCPoolData_t *cpdp; ErtsAlcCPoolData_t *cpool_entrance = NULL; ErtsAlcCPoolData_t *sentinel; - ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */ + ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */ || erts_thr_progress_is_managed_thread()); i = ERTS_ALC_CPOOL_MAX_FETCH_INSPECT; @@ -3308,7 +3589,7 @@ cpool_fetch(Allctr_t *allctr, UWord size) aoff_remove_pooled_mbc(allctr, crr); - exp = erts_smp_atomic_read_nob(&crr->allctr); + exp = erts_atomic_read_nob(&crr->allctr); if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { ASSERT((exp & ~ERTS_CRR_ALCTR_FLG_MASK) == (erts_aint_t)allctr); if (erts_atomic_read_nob(&crr->cpool.max_size) < size) { @@ -3335,9 +3616,9 @@ cpool_fetch(Allctr_t *allctr, UWord size) } /* Try to fetch it... */ - act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, - exp & ~ERTS_CRR_ALCTR_FLG_IN_POOL, - exp); + act = erts_atomic_cmpxchg_mb(&crr->allctr, + exp & ~ERTS_CRR_ALCTR_FLG_IN_POOL, + exp); if (act == exp) { cpool_delete(allctr, allctr, crr); crr->cpool.state = ERTS_MBC_IS_HOME; @@ -3368,7 +3649,7 @@ cpool_fetch(Allctr_t *allctr, UWord size) erts_aint_t iallctr; crr = ErtsContainerStruct(allctr->cpool.pooled_tree, Carrier_t, cpool.pooled); - iallctr = erts_smp_atomic_read_nob(&crr->allctr); + iallctr = erts_atomic_read_nob(&crr->allctr); if (iallctr & ERTS_CRR_ALCTR_FLG_IN_POOL) { cpool_entrance = &crr->cpool; break; @@ -3388,50 +3669,41 @@ cpool_fetch(Allctr_t *allctr, UWord size) /* * Finally search the shared pool and try employ foreign carriers */ - sentinel = &carrier_pool[allctr->alloc_no].sentinel; + sentinel = allctr->cpool.sentinel; if (cpool_entrance) { /* * We saw a pooled carried above, use it as entrance into the pool */ - cpdp = cpool_entrance; } else { /* - * No pooled carried seen above. Start search at cpool sentinel, + * No pooled carrier seen above. Start search at cpool sentinel, * but begin by passing one element before trying to fetch. * This in order to avoid contention with threads inserting elements. */ - cpool_entrance = sentinel; - cpdp = cpool_aint2cpd(cpool_read(&cpool_entrance->prev)); - if (cpdp == sentinel) + cpool_entrance = cpool_aint2cpd(cpool_read(&sentinel->prev)); + if (cpool_entrance == sentinel) goto check_dc_list; } - loop_state = IGNORANT; + cpdp = cpool_entrance; + seen_sentinel = 0; do { erts_aint_t exp; cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev)); - if (cpdp == cpool_entrance) { - if (cpool_entrance == sentinel) { - cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev)); - if (cpdp == sentinel) - break; - } - loop_state = THE_LAST_ONE; - } - else if (cpdp == sentinel) { - if (loop_state == HAS_SEEN_SENTINEL) { + if (cpdp == sentinel) { + if (seen_sentinel) { /* We been here before. cpool_entrance must have been removed */ INC_CC(allctr->cpool.stat.entrance_removed); break; } - cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev)); - if (cpdp == sentinel) - break; - loop_state = HAS_SEEN_SENTINEL; + seen_sentinel = 1; + continue; } + ASSERT(cpdp != cpool_entrance || seen_sentinel); + crr = ErtsContainerStruct(cpdp, Carrier_t, cpool); - exp = erts_smp_atomic_read_rb(&crr->allctr); + exp = erts_atomic_read_rb(&crr->allctr); if (erts_atomic_read_nob(&cpdp->max_size) < size) { INC_CC(allctr->cpool.stat.skip_size); @@ -3442,7 +3714,7 @@ cpool_fetch(Allctr_t *allctr, UWord size) erts_aint_t want = (((erts_aint_t) allctr) | (exp & ERTS_CRR_ALCTR_FLG_HOMECOMING)); /* Try to fetch it... */ - act = erts_smp_atomic_cmpxchg_mb(&crr->allctr, want, exp); + act = erts_atomic_cmpxchg_mb(&crr->allctr, want, exp); if (act == exp) { cpool_delete(allctr, ((Allctr_t *) (act & ~ERTS_CRR_ALCTR_FLG_MASK)), crr); if (crr->cpool.orig_allctr == allctr) { @@ -3462,7 +3734,7 @@ cpool_fetch(Allctr_t *allctr, UWord size) INC_CC(allctr->cpool.stat.fail_shared); return NULL; } - }while (loop_state != THE_LAST_ONE); + }while (cpdp != cpool_entrance); check_dc_list: /* Last; check our own pending dealloc carrier list... */ @@ -3471,7 +3743,7 @@ check_dc_list: if (erts_atomic_read_nob(&crr->cpool.max_size) >= size) { Block_t* blk; unlink_carrier(&allctr->cpool.dc_list, crr); - ERTS_ALC_CPOOL_ASSERT(erts_smp_atomic_read_nob(&crr->allctr) + ERTS_ALC_CPOOL_ASSERT(erts_atomic_read_nob(&crr->allctr) == ((erts_aint_t) allctr)); blk = MBC_TO_FIRST_BLK(allctr, crr); ASSERT(FBLK_TO_MBC(blk) == crr); @@ -3553,7 +3825,7 @@ schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr) orig_allctr = crr->cpool.orig_allctr; if (allctr == orig_allctr) { - if (!(erts_smp_atomic_read_nob(&crr->allctr) & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { + if (!(erts_atomic_read_nob(&crr->allctr) & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { dealloc_my_carrier(allctr, crr); } /*else @@ -3580,13 +3852,13 @@ schedule_dealloc_carrier(Allctr_t *allctr, Carrier_t *crr) ERTS_ALC_CPOOL_ASSERT(IS_MBC_FIRST_ABLK(allctr, first_blk)); ERTS_ALC_CPOOL_ASSERT(crr == FBLK_TO_MBC(first_blk)); ERTS_ALC_CPOOL_ASSERT(crr == FIRST_BLK_TO_MBC(allctr, first_blk)); - ERTS_ALC_CPOOL_ASSERT((erts_smp_atomic_read_nob(&crr->allctr) + ERTS_ALC_CPOOL_ASSERT((erts_atomic_read_nob(&crr->allctr) & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == (erts_aint_t) allctr); #endif iallctr = (erts_aint_t)orig_allctr | ERTS_CRR_ALCTR_FLG_HOMECOMING; - if (!(erts_smp_atomic_xchg_nob(&crr->allctr, iallctr) + if (!(erts_atomic_xchg_nob(&crr->allctr, iallctr) & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { enqueue_homecoming(allctr, crr); } @@ -3641,8 +3913,9 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr) crr->cpool.orig_allctr = allctr; crr->cpool.thr_prgr = ERTS_THR_PRGR_INVALID; erts_atomic_init_nob(&crr->cpool.max_size, 0); - crr->cpool.blocks = 0; - crr->cpool.blocks_size = 0; + sys_memset(&crr->cpool.blocks_size, 0, sizeof(crr->cpool.blocks_size)); + sys_memset(&crr->cpool.blocks, 0, sizeof(crr->cpool.blocks)); + crr->cpool.total_blocks_size = 0; if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr)) crr->cpool.abandon_limit = 0; else { @@ -3657,14 +3930,14 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr) crr->cpool.state = ERTS_MBC_IS_HOME; } -static void -set_new_allctr_abandon_limit(Allctr_t *allctr) + + +static UWord +allctr_abandon_limit(Allctr_t *allctr) { UWord limit; UWord csz; - allctr->cpool.check_limit_count = ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT; - csz = allctr->mbcs.curr.norm.mseg.size; csz += allctr->mbcs.curr.norm.sys_alloc.size; @@ -3674,7 +3947,13 @@ set_new_allctr_abandon_limit(Allctr_t *allctr) else limit = (csz/100)*allctr->cpool.util_limit; - allctr->cpool.abandon_limit = limit; + return limit; +} + +static void ERTS_INLINE +set_new_allctr_abandon_limit(Allctr_t *allctr) +{ + allctr->cpool.abandon_limit = allctr_abandon_limit(allctr); } static void @@ -3686,16 +3965,18 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr) unlink_carrier(&allctr->mbc_list, crr); allctr->remove_mbc(allctr, crr); - set_new_allctr_abandon_limit(allctr); + + /* Mark our free blocks as unused and reclaimable to the OS. */ + carrier_mem_discard_free_blocks(allctr, crr); cpool_insert(allctr, crr); - iallctr = erts_smp_atomic_read_nob(&crr->allctr); + iallctr = erts_atomic_read_nob(&crr->allctr); if (allctr == crr->cpool.orig_allctr) { /* preserve HOMECOMING flag */ ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == (erts_aint_t)allctr); - erts_smp_atomic_set_wb(&crr->allctr, iallctr | ERTS_CRR_ALCTR_FLG_IN_POOL); + erts_atomic_set_wb(&crr->allctr, iallctr | ERTS_CRR_ALCTR_FLG_IN_POOL); poolify_my_carrier(allctr, crr); } else { @@ -3703,7 +3984,7 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr) iallctr = ((erts_aint_t)crr->cpool.orig_allctr | ERTS_CRR_ALCTR_FLG_HOMECOMING | ERTS_CRR_ALCTR_FLG_IN_POOL); - if (!(erts_smp_atomic_xchg_wb(&crr->allctr, iallctr) + if (!(erts_atomic_xchg_wb(&crr->allctr, iallctr) & ERTS_CRR_ALCTR_FLG_HOMECOMING)) { enqueue_homecoming(allctr, crr); @@ -3739,7 +4020,8 @@ poolify_my_carrier(Allctr_t *allctr, Carrier_t *crr) } static void -cpool_read_stat(Allctr_t *allctr, UWord *nocp, UWord *cszp, UWord *nobp, UWord *bszp) +cpool_read_stat(Allctr_t *allctr, int alloc_no, + UWord *nocp, UWord *cszp, UWord *nobp, UWord *bszp) { int i; UWord noc = 0, csz = 0, nob = 0, bsz = 0; @@ -3759,10 +4041,10 @@ cpool_read_stat(Allctr_t *allctr, UWord *nocp, UWord *cszp, UWord *nobp, UWord * ? erts_atomic_read_nob(&allctr->cpool.stat.carriers_size) : 0); tnob = (UWord) (nobp - ? erts_atomic_read_nob(&allctr->cpool.stat.no_blocks) + ? erts_atomic_read_nob(&allctr->cpool.stat.no_blocks[alloc_no]) : 0); tbsz = (UWord) (bszp - ? erts_atomic_read_nob(&allctr->cpool.stat.blocks_size) + ? erts_atomic_read_nob(&allctr->cpool.stat.blocks_size[alloc_no]) : 0); if (tnoc == noc && tcsz == csz && tnob == nob && tbsz == bsz) break; @@ -3784,7 +4066,6 @@ cpool_read_stat(Allctr_t *allctr, UWord *nocp, UWord *cszp, UWord *nobp, UWord * } -#endif /* ERTS_SMP */ #ifdef DEBUG @@ -3885,7 +4166,6 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) blk_sz = UMEMSZ2BLKSZ(allctr, umem_sz); } -#ifdef ERTS_SMP allctr->cpool.disable_abandon = ERTS_ALC_CPOOL_MAX_DISABLE_ABANDON; if ((flags & (CFLG_MBC|CFLG_NO_CPOOL)) == CFLG_MBC @@ -3902,7 +4182,6 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) return blk; } } -#endif #if HAVE_ERTS_MSEG @@ -4020,6 +4299,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) #if HAVE_ERTS_MSEG mbc_final_touch: #endif + set_new_allctr_abandon_limit(allctr); blk = MBC_TO_FIRST_BLK(allctr, crr); @@ -4032,9 +4312,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags) allctr->main_carrier = crr; } -#ifdef ERTS_SMP cpool_init_carrier_data(allctr, crr); -#endif link_carrier(&allctr->mbc_list, crr); @@ -4240,7 +4518,6 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp) else { ASSERT(IS_MBC_FIRST_FBLK(allctr, blk)); crr = FIRST_BLK_TO_MBC(allctr, blk); - crr_sz = CARRIER_SZ(crr); #ifdef DEBUG if (!allctr->stopped) { @@ -4254,9 +4531,8 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp) } #endif -#ifdef ERTS_SMP if (busy_pcrr_pp && *busy_pcrr_pp) { - erts_aint_t iallctr = erts_smp_atomic_read_nob(&crr->allctr); + erts_aint_t iallctr = erts_atomic_read_nob(&crr->allctr); ERTS_ALC_CPOOL_ASSERT(*busy_pcrr_pp == crr); ERTS_ALC_CPOOL_ASSERT((iallctr & ~ERTS_CRR_ALCTR_FLG_HOMECOMING) == (((erts_aint_t) allctr) @@ -4265,24 +4541,15 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp) ERTS_ALC_CPOOL_ASSERT(allctr == crr->cpool.orig_allctr); *busy_pcrr_pp = NULL; - erts_smp_atomic_set_nob(&crr->allctr, - (iallctr & ~(ERTS_CRR_ALCTR_FLG_IN_POOL | - ERTS_CRR_ALCTR_FLG_BUSY))); + erts_atomic_set_nob(&crr->allctr, + (iallctr & ~(ERTS_CRR_ALCTR_FLG_IN_POOL | + ERTS_CRR_ALCTR_FLG_BUSY))); cpool_delete(allctr, allctr, crr); } - else -#endif + else { unlink_carrier(&allctr->mbc_list, crr); -#if HAVE_ERTS_MSEG - if (IS_MSEG_CARRIER(crr)) { - ASSERT(crr_sz % ERTS_SACRR_UNIT_SZ == 0); - STAT_MSEG_MBC_FREE(allctr, crr_sz); - } - else -#endif - STAT_SYS_ALLOC_MBC_FREE(allctr, crr_sz); - + STAT_MBC_FREE(allctr, crr); if (allctr->remove_mbc) allctr->remove_mbc(allctr, crr); } @@ -4296,17 +4563,13 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp) LTTNG5(carrier_destroy, ERTS_ALC_A2AD(allctr->alloc_no), allctr->ix, - crr_sz, + CARRIER_SZ(crr), mbc_stats, sbc_stats); } #endif -#ifdef ERTS_SMP schedule_dealloc_carrier(allctr, crr); -#else - dealloc_mbc(allctr, crr); -#endif } } @@ -4322,6 +4585,7 @@ static struct { Eterm e; Eterm t; Eterm ramv; + Eterm atags; #if HAVE_ERTS_MSEG Eterm asbcst; Eterm rsbcst; @@ -4350,7 +4614,6 @@ static struct { Eterm fix_types; Eterm mbcs; -#ifdef ERTS_SMP Eterm mbcs_pool; Eterm fetch; Eterm fail_pooled; @@ -4363,7 +4626,6 @@ static struct { Eterm skip_homecoming; Eterm skip_race; Eterm entrance_removed; -#endif Eterm sbcs; Eterm sys_alloc_carriers_size; @@ -4379,6 +4641,8 @@ static struct { Eterm blocks_size; Eterm blocks; + Eterm foreign_blocks; + Eterm calls; Eterm sys_alloc; Eterm sys_free; @@ -4393,11 +4657,11 @@ static struct { #endif } am; -static Eterm fix_type_atoms[ERTS_ALC_NO_FIXED_SIZES]; +static Eterm alloc_type_atoms[ERTS_ALC_N_MAX + 1]; static ERTS_INLINE void atom_init(Eterm *atom, char *name) { - *atom = am_atom_put(name, strlen(name)); + *atom = am_atom_put(name, sys_strlen(name)); } #define AM_INIT(AM) atom_init(&am.AM, #AM) @@ -4424,6 +4688,7 @@ init_atoms(Allctr_t *allctr) AM_INIT(e); AM_INIT(t); AM_INIT(ramv); + AM_INIT(atags); #if HAVE_ERTS_MSEG AM_INIT(asbcst); AM_INIT(rsbcst); @@ -4452,7 +4717,6 @@ init_atoms(Allctr_t *allctr) AM_INIT(fix_types); AM_INIT(mbcs); -#ifdef ERTS_SMP AM_INIT(mbcs_pool); AM_INIT(fetch); AM_INIT(fail_pooled); @@ -4465,7 +4729,6 @@ init_atoms(Allctr_t *allctr) AM_INIT(skip_homecoming); AM_INIT(skip_race); AM_INIT(entrance_removed); -#endif AM_INIT(sbcs); AM_INIT(sys_alloc_carriers_size); @@ -4480,6 +4743,7 @@ init_atoms(Allctr_t *allctr) AM_INIT(carriers); AM_INIT(blocks_size); AM_INIT(blocks); + AM_INIT(foreign_blocks); AM_INIT(calls); AM_INIT(sys_alloc); @@ -4497,12 +4761,12 @@ init_atoms(Allctr_t *allctr) } #endif - for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) { - ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix; - char *name = (char *) ERTS_ALC_N2TD(n); - size_t len = strlen(name); - fix_type_atoms[ix] = am_atom_put(name, len); - } + for (ix = ERTS_ALC_N_MIN; ix <= ERTS_ALC_N_MAX; ix++) { + const char *name = ERTS_ALC_N2TD(ix); + size_t len = sys_strlen(name); + + alloc_type_atoms[ix] = am_atom_put(name, len); + } } if (allctr && !allctr->atoms_initialized) { @@ -4622,15 +4886,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type internal: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_T2TD(fix->type), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[ERTS_ALC_T2N(fix->type)], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -4650,15 +4913,14 @@ sz_info_fix(Allctr_t *allctr, erts_print(to, arg, "fix type: %s %bpu %bpu\n", - (char *) ERTS_ALC_N2TD(ERTS_ALC_N_MIN_A_FIXED_SIZE - + ix), + (char *) ERTS_ALC_T2TD(fix->type), alloced, used); } if (hpp || szp) { add_3tup(hpp, szp, &res, - fix_type_atoms[ix], + alloc_type_atoms[ERTS_ALC_T2N(fix->type)], bld_unstable_uint(hpp, szp, alloced), bld_unstable_uint(hpp, szp, used)); } @@ -4715,7 +4977,6 @@ sz_info_carriers(Allctr_t *allctr, return res; } -#ifdef ERTS_SMP static Eterm info_cpool(Allctr_t *allctr, @@ -4732,9 +4993,9 @@ info_cpool(Allctr_t *allctr, noc = csz = nob = bsz = ~0; if (print_to_p || hpp) { if (sz_only) - cpool_read_stat(allctr, NULL, &csz, NULL, &bsz); + cpool_read_stat(allctr, allctr->alloc_no, NULL, &csz, NULL, &bsz); else - cpool_read_stat(allctr, &noc, &csz, &nob, &bsz); + cpool_read_stat(allctr, allctr->alloc_no, &noc, &csz, &nob, &bsz); } if (print_to_p) { @@ -4749,6 +5010,10 @@ info_cpool(Allctr_t *allctr, } if (hpp || szp) { + Eterm foreign_blocks; + int i; + + foreign_blocks = NIL; res = NIL; if (!sz_only) { @@ -4795,28 +5060,66 @@ info_cpool(Allctr_t *allctr, add_3tup(hpp, szp, &res, am.entrance_removed, bld_unstable_uint(hpp, szp, ERTS_ALC_CC_GIGA_VAL(allctr->cpool.stat.entrance_removed)), bld_unstable_uint(hpp, szp, ERTS_ALC_CC_VAL(allctr->cpool.stat.entrance_removed))); + } add_2tup(hpp, szp, &res, am.carriers_size, bld_unstable_uint(hpp, szp, csz)); - } - if (!sz_only) - add_2tup(hpp, szp, &res, - am.carriers, - bld_unstable_uint(hpp, szp, noc)); + + if (!sz_only) { + add_2tup(hpp, szp, &res, + am.carriers, + bld_unstable_uint(hpp, szp, noc)); + } + add_2tup(hpp, szp, &res, am.blocks_size, bld_unstable_uint(hpp, szp, bsz)); - if (!sz_only) + + if (!sz_only) { add_2tup(hpp, szp, &res, am.blocks, bld_unstable_uint(hpp, szp, nob)); + } + + for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { + const char *name_str; + Eterm name, info; + + if (i == allctr->alloc_no) { + continue; + } + + cpool_read_stat(allctr, i, NULL, NULL, &nob, &bsz); + + if (bsz == 0 && (nob == 0 || sz_only)) { + continue; + } + + name_str = ERTS_ALC_A2AD(i); + info = NIL; + + add_2tup(hpp, szp, &info, + am.blocks_size, + bld_unstable_uint(hpp, szp, bsz)); + + if (!sz_only) { + add_2tup(hpp, szp, &info, + am.blocks, + bld_unstable_uint(hpp, szp, nob)); + } + + name = am_atom_put(name_str, sys_strlen(name_str)); + + add_2tup(hpp, szp, &foreign_blocks, name, info); + } + + add_2tup(hpp, szp, &res, am.foreign_blocks, foreign_blocks); } return res; } -#endif /* ERTS_SMP */ static Eterm info_carriers(Allctr_t *allctr, @@ -4940,20 +5243,20 @@ make_name_atoms(Allctr_t *allctr) char realloc[] = "realloc"; char free[] = "free"; char buf[MAX_ATOM_CHARACTERS]; - size_t prefix_len = strlen(allctr->name_prefix); + size_t prefix_len = sys_strlen(allctr->name_prefix); if (prefix_len > MAX_ATOM_CHARACTERS + sizeof(realloc) - 1) erts_exit(ERTS_ERROR_EXIT,"Too long allocator name: %salloc\n",allctr->name_prefix); - memcpy((void *) buf, (void *) allctr->name_prefix, prefix_len); + sys_memcpy((void *) buf, (void *) allctr->name_prefix, prefix_len); - memcpy((void *) &buf[prefix_len], (void *) alloc, sizeof(alloc) - 1); + sys_memcpy((void *) &buf[prefix_len], (void *) alloc, sizeof(alloc) - 1); allctr->name.alloc = am_atom_put(buf, prefix_len + sizeof(alloc) - 1); - memcpy((void *) &buf[prefix_len], (void *) realloc, sizeof(realloc) - 1); + sys_memcpy((void *) &buf[prefix_len], (void *) realloc, sizeof(realloc) - 1); allctr->name.realloc = am_atom_put(buf, prefix_len + sizeof(realloc) - 1); - memcpy((void *) &buf[prefix_len], (void *) free, sizeof(free) - 1); + sys_memcpy((void *) &buf[prefix_len], (void *) free, sizeof(free) - 1); allctr->name.free = am_atom_put(buf, prefix_len + sizeof(free) - 1); } @@ -5071,13 +5374,9 @@ info_options(Allctr_t *allctr, return res; } -#ifdef ERTS_SMP acul = allctr->cpool.util_limit; acnl = allctr->cpool.in_pool_limit; acfml = allctr->cpool.fblk_min_limit; -#else - acul = 0; acnl = 0; acfml = 0; -#endif if (print_to_p) { char topt[21]; /* Enough for any 64-bit integer */ @@ -5090,6 +5389,7 @@ info_options(Allctr_t *allctr, "option e: true\n" "option t: %s\n" "option ramv: %s\n" + "option atags: %s\n" "option sbct: %beu\n" #if HAVE_ERTS_MSEG "option asbcst: %bpu\n" @@ -5108,6 +5408,7 @@ info_options(Allctr_t *allctr, "option acul: %bpu\n", topt, allctr->ramv ? "true" : "false", + allctr->atags ? "true" : "false", allctr->sbc_threshold, #if HAVE_ERTS_MSEG allctr->mseg_opt.abs_shrink_th, @@ -5177,6 +5478,7 @@ info_options(Allctr_t *allctr, am_sbct, bld_uint(hpp, szp, allctr->sbc_threshold)); add_2tup(hpp, szp, &res, am.ramv, allctr->ramv ? am_true : am_false); + add_2tup(hpp, szp, &res, am.atags, allctr->atags ? am_true : am_false); add_2tup(hpp, szp, &res, am.t, (allctr->t ? am_true : am_false)); add_2tup(hpp, szp, &res, am.e, am_true); } @@ -5266,19 +5568,15 @@ erts_alcu_info_options(Allctr_t *allctr, if (hpp || szp) ensure_atoms_initialized(allctr); -#ifdef USE_THREADS if (allctr->thread_safe) { erts_allctr_wrapper_pre_lock(); erts_mtx_lock(&allctr->mutex); } -#endif res = info_options(allctr, print_to_p, print_to_arg, hpp, szp); -#ifdef USE_THREADS if (allctr->thread_safe) { erts_mtx_unlock(&allctr->mutex); erts_allctr_wrapper_pre_unlock(); } -#endif return res; } @@ -5294,9 +5592,7 @@ erts_alcu_sz_info(Allctr_t *allctr, Uint *szp) { Eterm res, mbcs, sbcs, fix = THE_NON_VALUE; -#ifdef ERTS_SMP Eterm mbcs_pool; -#endif res = THE_NON_VALUE; @@ -5311,12 +5607,10 @@ erts_alcu_sz_info(Allctr_t *allctr, if (hpp || szp) ensure_atoms_initialized(allctr); -#ifdef USE_THREADS if (allctr->thread_safe) { erts_allctr_wrapper_pre_lock(); erts_mtx_lock(&allctr->mutex); } -#endif ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); @@ -5332,23 +5626,19 @@ erts_alcu_sz_info(Allctr_t *allctr, fix = sz_info_fix(allctr, internal, print_to_p, print_to_arg, hpp, szp); mbcs = sz_info_carriers(allctr, &allctr->mbcs, "mbcs ", print_to_p, print_to_arg, hpp, szp); -#ifdef ERTS_SMP if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) mbcs_pool = info_cpool(allctr, 1, "mbcs_pool ", print_to_p, print_to_arg, hpp, szp); else mbcs_pool = THE_NON_VALUE; /* shut up annoying warning... */ -#endif sbcs = sz_info_carriers(allctr, &allctr->sbcs, "sbcs ", print_to_p, print_to_arg, hpp, szp); if (hpp || szp) { res = NIL; add_2tup(hpp, szp, &res, am.sbcs, sbcs); -#ifdef ERTS_SMP if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) add_2tup(hpp, szp, &res, am.mbcs_pool, mbcs_pool); -#endif add_2tup(hpp, szp, &res, am.mbcs, mbcs); add_fix_types(allctr, internal, hpp, szp, &res, fix); } @@ -5359,12 +5649,10 @@ erts_alcu_sz_info(Allctr_t *allctr, } -#ifdef USE_THREADS if (allctr->thread_safe) { erts_mtx_unlock(&allctr->mutex); erts_allctr_wrapper_pre_unlock(); } -#endif return res; } @@ -5380,9 +5668,7 @@ erts_alcu_info(Allctr_t *allctr, Uint *szp) { Eterm res, sett, mbcs, sbcs, calls, fix = THE_NON_VALUE; -#ifdef ERTS_SMP Eterm mbcs_pool; -#endif res = THE_NON_VALUE; @@ -5397,12 +5683,10 @@ erts_alcu_info(Allctr_t *allctr, if (hpp || szp) ensure_atoms_initialized(allctr); -#ifdef USE_THREADS if (allctr->thread_safe) { erts_allctr_wrapper_pre_lock(); erts_mtx_lock(&allctr->mutex); } -#endif ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); @@ -5427,13 +5711,11 @@ erts_alcu_info(Allctr_t *allctr, fix = sz_info_fix(allctr, internal, print_to_p, print_to_arg, hpp, szp); mbcs = info_carriers(allctr, &allctr->mbcs, "mbcs ", print_to_p, print_to_arg, hpp, szp); -#ifdef ERTS_SMP if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) mbcs_pool = info_cpool(allctr, 0, "mbcs_pool ", print_to_p, print_to_arg, hpp, szp); else mbcs_pool = THE_NON_VALUE; /* shut up annoying warning... */ -#endif sbcs = info_carriers(allctr, &allctr->sbcs, "sbcs ", print_to_p, print_to_arg, hpp, szp); calls = info_calls(allctr, print_to_p, print_to_arg, hpp, szp); @@ -5443,10 +5725,8 @@ erts_alcu_info(Allctr_t *allctr, add_2tup(hpp, szp, &res, am.calls, calls); add_2tup(hpp, szp, &res, am.sbcs, sbcs); -#ifdef ERTS_SMP if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) add_2tup(hpp, szp, &res, am.mbcs_pool, mbcs_pool); -#endif add_2tup(hpp, szp, &res, am.mbcs, mbcs); add_fix_types(allctr, internal, hpp, szp, &res, fix); add_2tup(hpp, szp, &res, am.options, sett); @@ -5462,25 +5742,34 @@ erts_alcu_info(Allctr_t *allctr, } -#ifdef USE_THREADS if (allctr->thread_safe) { erts_mtx_unlock(&allctr->mutex); erts_allctr_wrapper_pre_unlock(); } -#endif return res; } +void +erts_alcu_foreign_size(Allctr_t *allctr, ErtsAlcType_t alloc_no, AllctrSize_t *size) +{ + if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) { + UWord csz, bsz; + cpool_read_stat(allctr, alloc_no, NULL, &csz, NULL, &bsz); + size->carriers = csz; + size->blocks = bsz; + } else { + size->carriers = 0; + size->blocks = 0; + } +} void erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t *fi, int fisz) { -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); -#endif size->carriers = allctr->mbcs.curr.norm.mseg.size; size->carriers += allctr->mbcs.curr.norm.sys_alloc.size; @@ -5490,14 +5779,12 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t * size->blocks = allctr->mbcs.blocks.curr.size; size->blocks += allctr->sbcs.blocks.curr.size; -#ifdef ERTS_SMP if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) { UWord csz, bsz; - cpool_read_stat(allctr, NULL, &csz, NULL, &bsz); + cpool_read_stat(allctr, allctr->alloc_no, NULL, &csz, NULL, &bsz); size->blocks += bsz; size->carriers += csz; } -#endif if (fi) { int ix; @@ -5519,29 +5806,31 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t * } } -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); -#endif } /* ----------------------------------------------------------------------- */ static ERTS_INLINE void * -do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) +do_erts_alcu_alloc(ErtsAlcType_t type, Allctr_t *allctr, Uint size) { - Allctr_t *allctr = (Allctr_t *) extra; void *res; ASSERT(initialized); ASSERT(allctr); - ERTS_SMP_LC_ASSERT(!allctr->thread_safe + ERTS_LC_ASSERT(!allctr->thread_safe || erts_lc_mtx_is_locked(&allctr->mutex)); ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); + /* Reject sizes that can't fit into the header word. */ + if (size > ~BLK_FLG_MASK) { + return NULL; + } + #if ALLOC_ZERO_EQ_NULL if (!size) return NULL; @@ -5569,41 +5858,57 @@ do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) void *erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size) { + Allctr_t *allctr = (Allctr_t *) extra; void *res; -#ifdef ERTS_SMP + ASSERT(!"This is not thread safe"); -#elif defined(USE_THREADS) - ASSERT(erts_equal_tids(erts_main_thread, erts_thr_self())); -#endif - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + DEBUG_CHECK_ALIGNMENT(res); + return res; } -#ifdef USE_THREADS void * erts_alcu_alloc_ts(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); - DEBUG_CHECK_ALIGNMENT(res); + res = do_erts_alcu_alloc(type, allctr, size); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } erts_mtx_unlock(&allctr->mutex); + + DEBUG_CHECK_ALIGNMENT(res); + return res; } -#ifdef ERTS_SMP void * erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5613,11 +5918,19 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size) allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5630,65 +5943,65 @@ void * erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size) { Allctr_t *pref_allctr; + alcu_atag_t tag = 0; void *res; pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } + if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); -#ifdef ERTS_SMP ASSERT(pref_allctr->dd.use); ERTS_ALCU_HANDLE_DD_IN_OP(pref_allctr, 1); -#endif ERTS_ALCU_DBG_CHK_THR_ACCESS(pref_allctr); res = do_erts_alcu_alloc(type, pref_allctr, size); -#ifdef ERTS_SMP if (!res && ERTS_ALCU_HANDLE_DD_IN_OP(pref_allctr, 1)) { /* Cleaned up a bit more; try one more time... */ res = do_erts_alcu_alloc(type, pref_allctr, size); } -#endif + + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); DEBUG_CHECK_ALIGNMENT(res); - return res; } -#endif -#endif /* ------------------------------------------------------------------------- */ static ERTS_INLINE void -do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, +do_erts_alcu_free(ErtsAlcType_t type, Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; ASSERT(initialized); ASSERT(allctr); - ERTS_SMP_LC_ASSERT(!allctr->thread_safe + ERTS_LC_ASSERT(!allctr->thread_safe || erts_lc_mtx_is_locked(&allctr->mutex)); ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); if (p) { - INC_CC(allctr->calls.this_free); - if (allctr->fix) { + if (ERTS_ALC_IS_FIX_TYPE(type)) { if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) - fix_cpool_free(allctr, type, p, busy_pcrr_pp, 1); + fix_cpool_free(allctr, type, 0, p, busy_pcrr_pp); else fix_nocpool_free(allctr, type, p); } @@ -5697,28 +6010,27 @@ do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p, if (IS_SBC_BLK(blk)) destroy_carrier(allctr, blk, NULL); else - mbc_free(allctr, p, busy_pcrr_pp); + mbc_free(allctr, type, p, busy_pcrr_pp); } } } void erts_alcu_free(ErtsAlcType_t type, void *extra, void *p) { - do_erts_alcu_free(type, extra, p, NULL); + Allctr_t *allctr = (Allctr_t *) extra; + do_erts_alcu_free(type, allctr, p, NULL); } -#ifdef USE_THREADS void erts_alcu_free_ts(ErtsAlcType_t type, void *extra, void *p) { Allctr_t *allctr = (Allctr_t *) extra; erts_mtx_lock(&allctr->mutex); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); erts_mtx_unlock(&allctr->mutex); } -#ifdef ERTS_SMP void erts_alcu_free_thr_spec(ErtsAlcType_t type, void *extra, void *p) @@ -5769,21 +6081,18 @@ erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p) } } -#endif -#endif /* ------------------------------------------------------------------------- */ static ERTS_INLINE void * do_erts_alcu_realloc(ErtsAlcType_t type, - void *extra, + Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs, Carrier_t **busy_pcrr_pp) { - Allctr_t *allctr = (Allctr_t *) extra; Block_t *blk; void *res; @@ -5791,22 +6100,27 @@ do_erts_alcu_realloc(ErtsAlcType_t type, ASSERT(allctr); - ERTS_SMP_LC_ASSERT(!allctr->thread_safe + ERTS_LC_ASSERT(!allctr->thread_safe || erts_lc_mtx_is_locked(&allctr->mutex)); ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr); if (!p) { - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_alloc); return res; } + /* Reject sizes that can't fit into the header word. */ + if (size > ~BLK_FLG_MASK) { + return NULL; + } + #if ALLOC_ZERO_EQ_NULL if (!size) { ASSERT(p); - do_erts_alcu_free(type, extra, p, busy_pcrr_pp); + do_erts_alcu_free(type, allctr, p, busy_pcrr_pp); INC_CC(allctr->calls.this_realloc); DEC_CC(allctr->calls.this_free); return NULL; @@ -5819,7 +6133,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, if (size < allctr->sbc_threshold) { if (IS_MBC_BLK(blk)) - res = mbc_realloc(allctr, p, size, alcu_flgs, busy_pcrr_pp); + res = mbc_realloc(allctr, type, p, size, alcu_flgs, busy_pcrr_pp); else { Uint used_sz = SBC_HEADER_SIZE + ABLK_HDR_SZ + size; Uint crr_sz; @@ -5878,7 +6192,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type, sys_memcpy((void *) res, (void *) p, MIN(MBC_ABLK_SZ(blk) - ABLK_HDR_SZ, size)); - mbc_free(allctr, p, busy_pcrr_pp); + mbc_free(allctr, type, p, busy_pcrr_pp); } else res = NULL; @@ -5891,19 +6205,29 @@ do_erts_alcu_realloc(ErtsAlcType_t type, void * erts_alcu_realloc(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_realloc(type, extra, p, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } void * erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) { + Allctr_t *allctr = (Allctr_t *)extra; void *res; - res = do_erts_alcu_alloc(type, extra, size); + + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = erts_alcu_realloc(type, extra, p, size); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5913,24 +6237,42 @@ erts_alcu_realloc_mv(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + DEBUG_CHECK_ALIGNMENT(res); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, determine_alloc_tag(allctr, type)); + } + return res; } -#ifdef USE_THREADS - void * erts_alcu_realloc_ts(ErtsAlcType_t type, void *extra, void *ptr, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_realloc(type, extra, ptr, size, 0, NULL); + + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } @@ -5938,11 +6280,17 @@ void * erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) { Allctr_t *allctr = (Allctr_t *) extra; + alcu_atag_t tag = 0; void *res; + + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + erts_mtx_lock(&allctr->mutex); - res = do_erts_alcu_alloc(type, extra, size); + res = do_erts_alcu_alloc(type, allctr, size); if (!res) - res = erts_alcu_realloc_ts(type, extra, p, size); + res = do_erts_alcu_realloc(type, allctr, p, size, 0, NULL); else { Block_t *blk; size_t cpy_size; @@ -5952,14 +6300,20 @@ erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size) if (cpy_size > size) cpy_size = size; sys_memcpy(res, p, cpy_size); - do_erts_alcu_free(type, extra, p, NULL); + do_erts_alcu_free(type, allctr, p, NULL); } + + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); + return res; } -#ifdef ERTS_SMP void * erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, @@ -5967,6 +6321,7 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -5976,11 +6331,19 @@ erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + if (allctr->thread_safe) erts_mtx_unlock(&allctr->mutex); @@ -5995,6 +6358,7 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, { ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra; int ix; + alcu_atag_t tag = 0; Allctr_t *allctr; void *res; @@ -6004,14 +6368,16 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, allctr = tspec->allctr[ix]; + if (allctr->atags) { + tag = determine_alloc_tag(allctr, type); + } + if (allctr->thread_safe) erts_mtx_lock(&allctr->mutex); res = do_erts_alcu_alloc(type, allctr, size); if (!res) { - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); - res = erts_alcu_realloc_thr_spec(type, allctr, ptr, size); + res = do_erts_alcu_realloc(type, allctr, ptr, size, 0, NULL); } else { Block_t *blk; @@ -6023,41 +6389,42 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra, cpy_size = size; sys_memcpy(res, ptr, cpy_size); do_erts_alcu_free(type, allctr, ptr, NULL); - if (allctr->thread_safe) - erts_mtx_unlock(&allctr->mutex); } + if (allctr->atags && res) { + set_alloc_tag(allctr, res, tag); + } + + if (allctr->thread_safe) + erts_mtx_unlock(&allctr->mutex); + DEBUG_CHECK_ALIGNMENT(res); return res; } static ERTS_INLINE void * -realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size, +realloc_thr_pref(ErtsAlcType_t type, Allctr_t *pref_allctr, void *p, Uint size, int force_move) { void *res; - Allctr_t *pref_allctr, *used_allctr; + Allctr_t *used_allctr; UWord old_user_size; Carrier_t *busy_pcrr_p; -#ifdef ERTS_SMP + alcu_atag_t tag = 0; int retried; -#endif - - if (!p) - return erts_alcu_alloc_thr_pref(type, extra, size); - pref_allctr = get_pref_allctr(extra); + if (pref_allctr->atags) { + tag = determine_alloc_tag(pref_allctr, type); + } if (pref_allctr->thread_safe) erts_mtx_lock(&pref_allctr->mutex); -#ifdef ERTS_SMP ASSERT(pref_allctr->dd.use); ERTS_ALCU_HANDLE_DD_IN_OP(pref_allctr, 1); retried = 0; restart: -#endif used_allctr = get_used_allctr(pref_allctr, ERTS_ALC_TS_PREF_LOCK_NO, p, &old_user_size, &busy_pcrr_p); @@ -6073,13 +6440,16 @@ restart: 0, &busy_pcrr_p); clear_busy_pool_carrier(used_allctr, busy_pcrr_p); -#ifdef ERTS_SMP if (!res && !retried && ERTS_ALCU_HANDLE_DD_IN_OP(pref_allctr, 1)) { /* Cleaned up a bit more; try one more time... */ retried = 1; goto restart; } -#endif + + if (pref_allctr->atags && res) { + set_alloc_tag(pref_allctr, res, tag); + } + if (pref_allctr->thread_safe) erts_mtx_unlock(&pref_allctr->mutex); } @@ -6088,6 +6458,9 @@ restart: if (!res) goto unlock_ts_return; else { + if (pref_allctr->atags) { + set_alloc_tag(pref_allctr, res, tag); + } DEBUG_CHECK_ALIGNMENT(res); @@ -6118,25 +6491,37 @@ restart: } } + DEBUG_CHECK_ALIGNMENT(res); + return res; } void * erts_alcu_realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 0); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 0); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } void * erts_alcu_realloc_mv_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size) { - return realloc_thr_pref(type, extra, p, size, 1); + if (p) { + Allctr_t *pref_allctr = get_pref_allctr(extra); + + return realloc_thr_pref(type, pref_allctr, p, size, 1); + } + + return erts_alcu_alloc_thr_pref(type, extra, size); } -#endif -#endif static Uint adjust_sbct(Allctr_t* allctr, Uint sbct) { @@ -6175,12 +6560,21 @@ int erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) { /* erts_alcu_start assumes that allctr has been zeroed */ + int i; if (((UWord)allctr & ERTS_CRR_ALCTR_FLG_MASK) != 0) { erts_exit(ERTS_ABORT_EXIT, "%s:%d:erts_alcu_start: Alignment error\n", __FILE__, __LINE__); } + /* The various fields packed into the header word must not overlap */ + ERTS_CT_ASSERT(!(MBC_ABLK_OFFSET_MASK & MBC_ABLK_SZ_MASK)); + ERTS_CT_ASSERT(!(MBC_ABLK_OFFSET_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(MBC_ABLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(MBC_FBLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(SBC_BLK_SZ_MASK & BLK_FLG_MASK)); + ERTS_CT_ASSERT(!(CRR_SZ_MASK & CRR_FLG_MASK)); + if (!initialized) goto error; @@ -6188,10 +6582,8 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) sys_memcpy((void *) &allctr->mseg_opt, (void *) &erts_mseg_default_opt, sizeof(ErtsMsegOpt_t)); -#ifdef ERTS_SMP if (init->tspec || init->tpref) allctr->mseg_opt.sched_spec = 1; -#endif /* ERTS_SMP */ #endif /* HAVE_ERTS_MSEG */ allctr->name_prefix = init->name_prefix; @@ -6200,6 +6592,11 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->ix = init->ix; allctr->alloc_no = init->alloc_no; + allctr->alloc_strat = init->alloc_strat; + + ASSERT(allctr->alloc_no >= ERTS_ALC_A_MIN && + allctr->alloc_no <= ERTS_ALC_A_MAX); + if (allctr->alloc_no < ERTS_ALC_A_MIN || ERTS_ALC_A_MAX < allctr->alloc_no) allctr->alloc_no = ERTS_ALC_A_INVALID; @@ -6219,6 +6616,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->t = 0; allctr->ramv = init->ramv; + allctr->atags = init->atags; allctr->main_carrier_size = init->mmbcs; #if HAVE_ERTS_MSEG @@ -6249,11 +6647,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) goto error; allctr->min_block_size = UNIT_CEILING(allctr->min_block_size + sizeof(FreeBlkFtr_t)); -#ifdef ERTS_SMP if (init->tpref) { Uint sz = ABLK_HDR_SZ; - sz += (init->fix ? - sizeof(ErtsAllctrFixDDBlock_t) : sizeof(ErtsAllctrDDBlock_t)); + sz += sizeof(ErtsAllctrDDBlock_t); sz = UNIT_CEILING(sz); if (sz > allctr->min_block_size) allctr->min_block_size = sz; @@ -6264,22 +6660,41 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->cpool.dc_list.last = NULL; allctr->cpool.abandon_limit = 0; allctr->cpool.disable_abandon = 0; - erts_atomic_init_nob(&allctr->cpool.stat.blocks_size, 0); - erts_atomic_init_nob(&allctr->cpool.stat.no_blocks, 0); + for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) { + erts_atomic_init_nob(&allctr->cpool.stat.blocks_size[i], 0); + erts_atomic_init_nob(&allctr->cpool.stat.no_blocks[i], 0); + } erts_atomic_init_nob(&allctr->cpool.stat.carriers_size, 0); erts_atomic_init_nob(&allctr->cpool.stat.no_carriers, 0); - allctr->cpool.check_limit_count = ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT; if (!init->ts && init->acul && init->acnl) { + ASSERT(allctr->add_mbc); + ASSERT(allctr->remove_mbc); + ASSERT(allctr->largest_fblk_in_mbc); + ASSERT(allctr->first_fblk_in_mbc); + ASSERT(allctr->next_fblk_in_mbc); + allctr->cpool.util_limit = init->acul; allctr->cpool.in_pool_limit = init->acnl; allctr->cpool.fblk_min_limit = init->acfml; + + if (allctr->alloc_strat == ERTS_ALC_S_FIRSTFIT) { + allctr->cpool.sentinel = &firstfit_carrier_pool.sentinel; + } + else if (allctr->alloc_no != ERTS_ALC_A_TEST) { + ERTS_INTERNAL_ERROR("Impossible carrier migration config."); + } } else { allctr->cpool.util_limit = 0; allctr->cpool.in_pool_limit = 0; allctr->cpool.fblk_min_limit = 0; } -#endif + + /* The invasive tests don't really care whether the pool is enabled or not, + * so we need to set this unconditionally for this allocator type. */ + if (allctr->alloc_no == ERTS_ALC_A_TEST) { + allctr->cpool.sentinel = &test_carrier_pool.sentinel; + } allctr->sbc_threshold = adjust_sbct(allctr, init->sbct); @@ -6288,7 +6703,6 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->mseg_opt.abs_shrink_th = ~((UWord) 0) / 100; #endif -#ifdef USE_THREADS if (init->ts) { allctr->thread_safe = 1; @@ -6299,7 +6713,6 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->debug.saved_tid = 0; #endif } -#endif if(!allctr->get_free_block || !allctr->link_free_block @@ -6312,14 +6725,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) if (allctr->mbc_header_size < sizeof(Carrier_t)) goto error; -#ifdef ERTS_SMP allctr->dd.use = 0; if (init->tpref) { allctr->dd.use = 1; init_dd_queue(&allctr->dd.q); allctr->dd.ix = init->ix; } -#endif allctr->mbc_header_size = (UNIT_CEILING(allctr->mbc_header_size + ABLK_HDR_SZ) - ABLK_HDR_SZ); @@ -6377,10 +6788,8 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) | CFLG_NO_CPOOL | CFLG_MAIN_CARRIER); if (!blk) { -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_destroy(&allctr->mutex); -#endif erts_exit(ERTS_ABORT_EXIT, "Failed to create main carrier for %salloc\n", init->name_prefix); @@ -6398,11 +6807,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) allctr->fix_shrink_scheduled = 0; for (i = 0; i < ERTS_ALC_NO_FIXED_SIZES; i++) { allctr->fix[i].type_size = init->fix_type_size[i]; + allctr->fix[i].type = ERTS_ALC_N2T(i + ERTS_ALC_N_MIN_A_FIXED_SIZE); allctr->fix[i].list_size = 0; allctr->fix[i].list = NULL; -#ifdef ERTS_SMP - ASSERT(allctr->fix[i].type_size >= sizeof(ErtsAllctrFixDDBlock_t)); -#endif if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) { allctr->fix[i].u.cpool.min_list_size = 0; allctr->fix[i].u.cpool.shrink_list = 0; @@ -6422,10 +6829,8 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init) error: -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_destroy(&allctr->mutex); -#endif return 0; @@ -6443,10 +6848,8 @@ erts_alcu_stop(Allctr_t *allctr) while (allctr->mbc_list.first) destroy_carrier(allctr, MBC_TO_FIRST_BLK(allctr, allctr->mbc_list.first), NULL); -#ifdef USE_THREADS if (allctr->thread_safe) erts_mtx_destroy(&allctr->mutex); -#endif } @@ -6455,14 +6858,16 @@ erts_alcu_stop(Allctr_t *allctr) void erts_alcu_init(AlcUInit_t *init) { -#ifdef ERTS_SMP - int i; - for (i = 0; i <= ERTS_ALC_A_MAX; i++) { - ErtsAlcCPoolData_t *sentinel = &carrier_pool[i].sentinel; - erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel); - erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel); - } -#endif + ErtsAlcCPoolData_t *sentinel; + + sentinel = &firstfit_carrier_pool.sentinel; + erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel); + erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel); + + sentinel = &test_carrier_pool.sentinel; + erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel); + erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel); + ERTS_CT_ASSERT(SBC_BLK_SZ_MASK == MBC_FBLK_SZ_MASK); /* see BLK_SZ */ #if HAVE_ERTS_MSEG ASSERT(erts_mseg_unit_size() == ERTS_SACRR_UNIT_SZ); @@ -6473,6 +6878,8 @@ erts_alcu_init(AlcUInit_t *init) #endif allow_sys_alloc_carriers = init->sac; + sys_page_size = erts_sys_get_page_size(); + #ifdef DEBUG carrier_alignment = sizeof(Unit_t); #endif @@ -6484,6 +6891,1072 @@ erts_alcu_init(AlcUInit_t *init) initialized = 1; } +/* ------------------------------------------------------------------------- */ + +/* Allocation histograms and carrier information is gathered by walking through + * all carriers associated with each allocator instance. This is done as + * aux_yield_work on the scheduler that owns each instance. + * + * Yielding is implemented by temporarily inserting a "dummy carrier" at the + * last position. It's permanently "busy" so it won't get picked up by someone + * else when in the carrier pool, and we never make the employer aware of it + * through callbacks so we can't accidentally allocate on it. + * + * Plain malloc/free is used to guarantee we won't allocate with the allocator + * we're scanning. */ + +/* Yield between carriers once this many blocks have been processed. Note that + * a single carrier scan may exceed this figure. */ +#ifndef DEBUG + #define BLOCKSCAN_REDUCTIONS (8000) +#else + #define BLOCKSCAN_REDUCTIONS (400) +#endif + +/* Abort a single carrier scan after this many blocks to prevent really large + * MBCs from blocking forever. */ +#define BLOCKSCAN_BAILOUT_THRESHOLD (16000) + +typedef struct alcu_blockscan { + /* A per-scheduler list used when multiple scans have been queued. The + * current scanner will always run until completion/abort before moving on + * to the next. */ + struct alcu_blockscan *scanner_queue; + + Allctr_t *allocator; + Process *process; + + int (*current_op)(struct alcu_blockscan *scanner); + int (*next_op)(struct alcu_blockscan *scanner); + int reductions; + + ErtsAlcCPoolData_t *cpool_cursor; + CarrierList_t *current_clist; + Carrier_t *clist_cursor; + Carrier_t dummy_carrier; + + /* Called if the process that started this job dies before we're done. */ + void (*abort)(void *user_data); + + /* Called on each carrier. The callback must return the number of blocks + * scanned to yield properly between carriers. + * + * Note that it's not possible to "yield back" into a carrier. */ + int (*scan)(Allctr_t *, void *user_data, Carrier_t *); + + /* Called when all carriers have been scanned. The callback may return + * non-zero to yield. */ + int (*finish)(void *user_data); + + void *user_data; +} blockscan_t; + +static Carrier_t *blockscan_restore_clist_cursor(blockscan_t *state) +{ + Carrier_t *cursor = state->clist_cursor; + + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + if (cursor == &state->dummy_carrier) { + cursor = cursor->next; + + unlink_carrier(state->current_clist, state->clist_cursor); + } + + return cursor; +} + +static void blockscan_save_clist_cursor(blockscan_t *state, Carrier_t *after) +{ + ASSERT(state->clist_cursor == (state->current_clist)->first || + state->clist_cursor == &state->dummy_carrier); + + state->clist_cursor = &state->dummy_carrier; + + (state->clist_cursor)->next = after->next; + (state->clist_cursor)->prev = after; + + relink_carrier(state->current_clist, state->clist_cursor); +} + +static int blockscan_clist_yielding(blockscan_t *state) +{ + Carrier_t *cursor = blockscan_restore_clist_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor) { + /* Skip dummy carriers inserted by another (concurrent) block scan. + * This can happen when scanning thread-safe allocators from multiple + * schedulers. */ + if (CARRIER_SZ(cursor) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + cursor); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_clist_cursor(state, cursor); + return 1; + } + } + + cursor = cursor->next; + } + + return 0; +} + +static ErtsAlcCPoolData_t *blockscan_restore_cpool_cursor(blockscan_t *state) +{ + ErtsAlcCPoolData_t *cursor; + + cursor = cpool_aint2cpd(cpool_read(&(state->cpool_cursor)->next)); + + if (state->cpool_cursor == &state->dummy_carrier.cpool) { + cpool_delete(state->allocator, state->allocator, &state->dummy_carrier); + } + + return cursor; +} + +static void blockscan_save_cpool_cursor(blockscan_t *state, + ErtsAlcCPoolData_t *after) +{ + ErtsAlcCPoolData_t *dummy_carrier, *prev_carrier, *next_carrier; + + dummy_carrier = &state->dummy_carrier.cpool; + + next_carrier = cpool_aint2cpd(cpool_mod_mark(&after->next)); + prev_carrier = cpool_aint2cpd(cpool_mod_mark(&next_carrier->prev)); + + cpool_init(&dummy_carrier->next, (erts_aint_t)next_carrier); + cpool_init(&dummy_carrier->prev, (erts_aint_t)prev_carrier); + + cpool_set_mod_marked(&prev_carrier->next, + (erts_aint_t)dummy_carrier, + (erts_aint_t)next_carrier); + cpool_set_mod_marked(&next_carrier->prev, + (erts_aint_t)dummy_carrier, + (erts_aint_t)prev_carrier); + + state->cpool_cursor = dummy_carrier; +} + +static int blockscan_cpool_yielding(blockscan_t *state) +{ + ErtsAlcCPoolData_t *sentinel, *cursor; + + sentinel = (state->allocator)->cpool.sentinel; + cursor = blockscan_restore_cpool_cursor(state); + + if (ERTS_PROC_IS_EXITING(state->process)) { + return 0; + } + + while (cursor != sentinel) { + Carrier_t *carrier; + erts_aint_t exp; + + /* When a deallocation happens on a pooled carrier it will be routed to + * its owner, so the only way to be sure that it isn't modified while + * scanning is to skip all carriers that aren't ours. The deallocations + * deferred to us will get handled when we're done. */ + while (cursor->orig_allctr != state->allocator) { + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + + if (cursor == sentinel) { + return 0; + } + } + + carrier = ErtsContainerStruct(cursor, Carrier_t, cpool); + exp = erts_atomic_read_rb(&carrier->allctr); + + if (exp & ERTS_CRR_ALCTR_FLG_IN_POOL) { + ASSERT(state->allocator == (Allctr_t*)(exp & ~ERTS_CRR_ALCTR_FLG_MASK)); + ASSERT(!(exp & ERTS_CRR_ALCTR_FLG_BUSY)); + + if (erts_atomic_cmpxchg_acqb(&carrier->allctr, + exp | ERTS_CRR_ALCTR_FLG_BUSY, + exp) == exp) { + /* Skip dummy carriers inserted by another (concurrent) block + * scan. This can happen when scanning thread-safe allocators + * from multiple schedulers. */ + if (CARRIER_SZ(carrier) > 0) { + int blocks_scanned = state->scan(state->allocator, + state->user_data, + carrier); + + state->reductions -= blocks_scanned; + + if (state->reductions <= 0) { + blockscan_save_cpool_cursor(state, cursor); + erts_atomic_set_relb(&carrier->allctr, exp); + + return 1; + } + } + + erts_atomic_set_relb(&carrier->allctr, exp); + } + } + + cursor = cpool_aint2cpd(cpool_read(&cursor->next)); + } + + return 0; +} + +static int blockscan_yield_helper(blockscan_t *state, + int (*yielding_op)(blockscan_t*)) +{ + /* Note that we don't check whether to abort here; only yielding_op knows + * whether the carrier is still in the list/pool. */ + + if ((state->allocator)->thread_safe) { + /* Locked scans have to be as short as possible. */ + state->reductions = 1; + + erts_mtx_lock(&(state->allocator)->mutex); + } else { + state->reductions = BLOCKSCAN_REDUCTIONS; + } + + if (yielding_op(state)) { + state->next_op = state->current_op; + } + + if ((state->allocator)->thread_safe) { + erts_mtx_unlock(&(state->allocator)->mutex); + } + + return 1; +} + +/* */ + +static int blockscan_finish(blockscan_t *state) +{ + if (ERTS_PROC_IS_EXITING(state->process)) { + state->abort(state->user_data); + return 0; + } + + state->current_op = blockscan_finish; + + return state->finish(state->user_data); +} + +static int blockscan_sweep_sbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_sbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_SBC, state->allocator); + state->current_clist = &(state->allocator)->sbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_sbcs; + state->next_op = blockscan_finish; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_mbcs(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_mbcs) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + state->current_clist = &(state->allocator)->mbc_list; + state->clist_cursor = (state->current_clist)->first; + } + + state->current_op = blockscan_sweep_mbcs; + state->next_op = blockscan_sweep_sbcs; + + return blockscan_yield_helper(state, blockscan_clist_yielding); +} + +static int blockscan_sweep_cpool(blockscan_t *state) +{ + if (state->current_op != blockscan_sweep_cpool) { + SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator); + state->cpool_cursor = (state->allocator)->cpool.sentinel; + } + + state->current_op = blockscan_sweep_cpool; + state->next_op = blockscan_sweep_mbcs; + + return blockscan_yield_helper(state, blockscan_cpool_yielding); +} + +static int blockscan_get_specific_allocator(int allocator_num, + int sched_id, + Allctr_t **out) +{ + ErtsAllocatorInfo_t *ai; + Allctr_t *allocator; + + ASSERT(allocator_num >= ERTS_ALC_A_MIN && + allocator_num <= ERTS_ALC_A_MAX); + ASSERT(sched_id >= 0 && sched_id <= erts_no_schedulers); + + ai = &erts_allctrs_info[allocator_num]; + + if (!ai->enabled || !ai->alloc_util) { + return 0; + } + + if (!ai->thr_spec) { + if (sched_id != 0) { + /* Only thread-specific allocators can be scanned on a specific + * scheduler. */ + return 0; + } + + allocator = (Allctr_t*)ai->extra; + ASSERT(allocator->thread_safe); + } else { + ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t*)ai->extra; + + ASSERT(sched_id < tspec->size); + + allocator = tspec->allctr[sched_id]; + } + + *out = allocator; + + return 1; +} + +static void blockscan_sched_trampoline(void *arg) +{ + ErtsAlcuBlockscanYieldData *yield; + ErtsSchedulerData *esdp; + blockscan_t *scanner; + + esdp = erts_get_scheduler_data(); + scanner = (blockscan_t*)arg; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (yield->last != NULL) { + blockscan_t *prev_scanner = yield->last; + + ASSERT(prev_scanner->scanner_queue == NULL); + + prev_scanner->scanner_queue = scanner; + } else { + yield->current = scanner; + } + + scanner->scanner_queue = NULL; + yield->last = scanner; + + erts_notify_new_aux_yield_work(esdp); +} + +static void blockscan_dispatch(blockscan_t *scanner, Process *owner, + Allctr_t *allocator, int sched_id) +{ + ASSERT(erts_get_scheduler_id() != 0); + + if (sched_id == 0) { + /* Global instances are always handled on the current scheduler. */ + sched_id = ERTS_ALC_GET_THR_IX(); + ASSERT(allocator->thread_safe); + } + + scanner->allocator = allocator; + scanner->process = owner; + + erts_proc_inc_refc(scanner->process); + + cpool_init_carrier_data(scanner->allocator, &scanner->dummy_carrier); + erts_atomic_init_nob(&(scanner->dummy_carrier).allctr, + (erts_aint_t)allocator | ERTS_CRR_ALCTR_FLG_BUSY); + + if (ERTS_ALC_IS_CPOOL_ENABLED(scanner->allocator)) { + scanner->next_op = blockscan_sweep_cpool; + } else { + scanner->next_op = blockscan_sweep_mbcs; + } + + /* Aux yield jobs can only be set up while running on the scheduler that + * services them, so we move there before continuing. + * + * We can't drive the scan itself through this since the scheduler will + * always finish *all* misc aux work in one go which makes it impossible to + * yield. */ + erts_schedule_misc_aux_work(sched_id, blockscan_sched_trampoline, scanner); +} + +int erts_handle_yielded_alcu_blockscan(ErtsSchedulerData *esdp, + ErtsAlcuBlockscanYieldData *yield) +{ + blockscan_t *scanner = yield->current; + + (void)esdp; + + ASSERT((yield->last == NULL) == (yield->current == NULL)); + + if (scanner) { + if (scanner->next_op(scanner)) { + return 1; + } + + ASSERT(ERTS_PROC_IS_EXITING(scanner->process) || + scanner->current_op == blockscan_finish); + + yield->current = scanner->scanner_queue; + + if (yield->current == NULL) { + ASSERT(scanner == yield->last); + yield->last = NULL; + } + + erts_proc_dec_refc(scanner->process); + + /* Plain free is intentional. */ + free(scanner); + + return yield->current != NULL; + } + + return 0; +} + +void erts_alcu_sched_spec_data_init(ErtsSchedulerData *esdp) +{ + ErtsAlcuBlockscanYieldData *yield; + + yield = ERTS_SCHED_AUX_YIELD_DATA(esdp, alcu_blockscan); + + yield->current = NULL; + yield->last = NULL; +} + +/* ------------------------------------------------------------------------- */ + +static ERTS_INLINE int u64_log2(Uint64 v) +{ + static const int log2_tab64[64] = { + 63, 0, 58, 1, 59, 47, 53, 2, + 60, 39, 48, 27, 54, 33, 42, 3, + 61, 51, 37, 40, 49, 18, 28, 20, + 55, 30, 34, 11, 43, 14, 22, 4, + 62, 57, 46, 52, 38, 26, 32, 41, + 50, 36, 17, 19, 29, 10, 13, 21, + 56, 45, 25, 31, 35, 16, 9, 12, + 44, 24, 15, 8, 23, 7, 6, 5}; + + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return log2_tab64[((Uint64)((v - (v >> 1))*0x07EDD5E59A4E28C2)) >> 58]; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct hist_tree__ { + struct hist_tree__ *parent; + struct hist_tree__ *left; + struct hist_tree__ *right; + + int is_red; + + alcu_atag_t tag; + UWord histogram[1]; +} hist_tree_t; + +#define ERTS_RBT_PREFIX hist_tree +#define ERTS_RBT_T hist_tree_t +#define ERTS_RBT_KEY_T UWord +#define ERTS_RBT_FLAGS_T int +#define ERTS_RBT_INIT_EMPTY_TNODE(T) ((void)0) +#define ERTS_RBT_IS_RED(T) ((T)->is_red) +#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1) +#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T)) +#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0) +#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red) +#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F) +#define ERTS_RBT_GET_PARENT(T) ((T)->parent) +#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P) +#define ERTS_RBT_GET_RIGHT(T) ((T)->right) +#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R)) +#define ERTS_RBT_GET_LEFT(T) ((T)->left) +#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L)) +#define ERTS_RBT_GET_KEY(T) ((T)->tag) +#define ERTS_RBT_IS_LT(KX, KY) (KX < KY) +#define ERTS_RBT_IS_EQ(KX, KY) (KX == KY) +#define ERTS_RBT_WANT_FOREACH_DESTROY_YIELDING +#define ERTS_RBT_WANT_FOREACH_DESTROY +#define ERTS_RBT_WANT_INSERT +#define ERTS_RBT_WANT_LOOKUP +#define ERTS_RBT_UNDEF + +#include "erl_rbtree.h" + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + hist_tree_rbt_yield_state_t hist_tree_yield; + hist_tree_t *hist_tree; + UWord hist_count; + + UWord hist_slot_start; + int hist_slot_count; + + UWord unscanned_size; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_ahist_t; + +static void gather_ahist_update(gather_ahist_t *state, UWord tag, UWord size) +{ + hist_tree_t *hist_node; + UWord size_interval; + int hist_slot; + + hist_node = hist_tree_rbt_lookup(state->hist_tree, tag); + + if (hist_node == NULL) { + /* Plain calloc is intentional. */ + hist_node = (hist_tree_t*)calloc(1, sizeof(hist_tree_t) + + (state->hist_slot_count - 1) * + sizeof(hist_node->histogram[0])); + hist_node->tag = tag; + + hist_tree_rbt_insert(&state->hist_tree, hist_node); + state->hist_count++; + } + + size_interval = (size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + hist_node->histogram[hist_slot]++; +} + +static int gather_ahist_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_ahist_t *state; + int blocks_scanned; + Block_t *block; + + state = (gather_ahist_t*)user_data; + blocks_scanned = 1; + + if (IS_SB_CARRIER(carrier)) { + alcu_atag_t tag; + + block = SBC2BLK(allocator, carrier); + + if (BLK_HAS_ATAG(block)) { + tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(tag)); + + gather_ahist_update(state, tag, SBC_BLK_SZ(block)); + } + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + ASSERT(IS_MB_CARRIER(carrier)); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + if (IS_ALLOCED_BLK(block) && BLK_HAS_ATAG(block)) { + alcu_atag_t tag = GET_BLK_ATAG(block); + + ASSERT(DBG_IS_VALID_ATAG(tag)); + + gather_ahist_update(state, tag, block_size); + } + + scanned_bytes += block_size; + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + state->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + return blocks_scanned; +} + +static int gather_ahist_append_result(hist_tree_t *node, void *arg, Sint reds) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + Eterm histogram_tuple, tag_tuple; + + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + hp = erts_produce_heap(&state->msg_factory, 7 + state->hist_slot_count, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(node->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + hp[0] = make_arityval(3); + hp[1] = ATAG_ID(node->tag); + hp[2] = alloc_type_atoms[ATAG_TYPE(node->tag)]; + hp[3] = histogram_tuple; + + tag_tuple = make_tuple(hp); + hp += 4; + + state->result_list = CONS(hp, tag_tuple, state->result_list); + + /* Plain free is intentional. */ + free(node); + return 1; +} + +static void gather_ahist_send(gather_ahist_t *state) +{ + Eterm result_tuple, unscanned_size, task_ref; + + Uint term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->hist_count > 0)); + ASSERT(state->building_result); + + term_size = 4 + erts_iref_storage_heap_size(&state->iref); + term_size += IS_USMALL(0, state->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + unscanned_size = bld_unstable_uint(&hp, NULL, state->unscanned_size); + + hp[0] = make_arityval(3); + hp[1] = task_ref; + hp[2] = unscanned_size; + hp[3] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_ahist_finish(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, unscanned size, [{Tag, {Histogram}} | Rest]} */ + minimum_size = 4 + erts_iref_storage_heap_size(&state->iref) + + state->hist_count * (7 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + ERTS_RBT_YIELD_STAT_INIT(&state->hist_tree_yield); + + state->result_list = NIL; + state->building_result = 1; + } + + if (!hist_tree_rbt_foreach_destroy_yielding(&state->hist_tree, + &gather_ahist_append_result, + state, + &state->hist_tree_yield, + BLOCKSCAN_REDUCTIONS)) { + return 1; + } + + gather_ahist_send(state); + + return 0; +} + +static int gather_ahist_destroy_result(hist_tree_t *node, void *arg, Sint reds) +{ + (void)arg; + free(node); + return 1; +} + +static void gather_ahist_abort(void *arg) +{ + gather_ahist_t *state = (gather_ahist_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + hist_tree_rbt_foreach_destroy(&state->hist_tree, + &gather_ahist_destroy_result, + NULL); +} + +int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_ahist_t *gather_state; + blockscan_t *scanner; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } + + ensure_atoms_initialized(allocator); + + /* Plain calloc is intentional. */ + gather_state = (gather_ahist_t*)calloc(1, sizeof(gather_ahist_t)); + scanner = &gather_state->common; + + scanner->abort = gather_ahist_abort; + scanner->scan = gather_ahist_scan; + scanner->finish = gather_ahist_finish; + scanner->user_data = gather_state; + + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + +/* ------------------------------------------------------------------------- */ + +typedef struct chist_node__ { + struct chist_node__ *next; + + UWord carrier_size; + UWord unscanned_size; + UWord allocated_size; + + /* BLOCKSCAN_BAILOUT_THRESHOLD guarantees we won't overflow this or the + * counters in the free block histogram. */ + int allocated_count; + int flags; + + int histogram[1]; +} chist_node_t; + +typedef struct { + blockscan_t common; + + ErtsIRefStorage iref; + Process *process; + + Eterm allocator_desc; + + chist_node_t *info_list; + UWord info_count; + + UWord hist_slot_start; + int hist_slot_count; + + ErtsHeapFactory msg_factory; + int building_result; + Eterm result_list; +} gather_cinfo_t; + +static int gather_cinfo_scan(Allctr_t *allocator, + void *user_data, + Carrier_t *carrier) +{ + gather_cinfo_t *state; + chist_node_t *node; + int blocks_scanned; + Block_t *block; + + state = (gather_cinfo_t*)user_data; + node = calloc(1, sizeof(chist_node_t) + + (state->hist_slot_count - 1) * + sizeof(node->histogram[0])); + blocks_scanned = 1; + + /* ERTS_CRR_ALCTR_FLG_BUSY is ignored since we've set it ourselves and it + * would be misleading to include it. */ + node->flags = erts_atomic_read_rb(&carrier->allctr) & + (ERTS_CRR_ALCTR_FLG_MASK & ~ERTS_CRR_ALCTR_FLG_BUSY); + node->carrier_size = CARRIER_SZ(carrier); + + if (IS_SB_CARRIER(carrier)) { + UWord block_size; + + block = SBC2BLK(allocator, carrier); + block_size = SBC_BLK_SZ(block); + + node->allocated_size = block_size; + node->allocated_count = 1; + } else { + UWord scanned_bytes = MBC_HEADER_SIZE(allocator); + + block = MBC_TO_FIRST_BLK(allocator, carrier); + + while (1) { + UWord block_size = MBC_BLK_SZ(block); + + scanned_bytes += block_size; + + if (IS_ALLOCED_BLK(block)) { + node->allocated_size += block_size; + node->allocated_count++; + } else { + UWord size_interval; + int hist_slot; + + size_interval = (block_size / state->hist_slot_start); + size_interval = u64_log2(size_interval + 1); + + hist_slot = MIN(size_interval, state->hist_slot_count - 1); + + node->histogram[hist_slot]++; + } + + if (blocks_scanned >= BLOCKSCAN_BAILOUT_THRESHOLD) { + node->unscanned_size += CARRIER_SZ(carrier) - scanned_bytes; + break; + } else if (IS_LAST_BLK(block)) { + break; + } + + block = NXT_BLK(block); + blocks_scanned++; + } + } + + node->next = state->info_list; + state->info_list = node; + state->info_count++; + + return blocks_scanned; +} + +static void gather_cinfo_append_result(gather_cinfo_t *state, + chist_node_t *info) +{ + Eterm carrier_size, unscanned_size, allocated_size; + Eterm histogram_tuple, carrier_tuple; + + Uint term_size; + Eterm *hp; + int ix; + + ASSERT(state->building_result); + + term_size = 11 + state->hist_slot_count; + term_size += IS_USMALL(0, info->carrier_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->unscanned_size) ? 0 : BIG_UINT_HEAP_SIZE; + term_size += IS_USMALL(0, info->allocated_size) ? 0 : BIG_UINT_HEAP_SIZE; + + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + hp[0] = make_arityval(state->hist_slot_count); + + for (ix = 0; ix < state->hist_slot_count; ix++) { + hp[1 + ix] = make_small(info->histogram[ix]); + } + + histogram_tuple = make_tuple(hp); + hp += 1 + state->hist_slot_count; + + carrier_size = bld_unstable_uint(&hp, NULL, info->carrier_size); + unscanned_size = bld_unstable_uint(&hp, NULL, info->unscanned_size); + allocated_size = bld_unstable_uint(&hp, NULL, info->allocated_size); + + hp[0] = make_arityval(7); + hp[1] = state->allocator_desc; + hp[2] = carrier_size; + hp[3] = unscanned_size; + hp[4] = allocated_size; + hp[5] = make_small(info->allocated_count); + hp[6] = (info->flags & ERTS_CRR_ALCTR_FLG_IN_POOL) ? am_true : am_false; + hp[7] = histogram_tuple; + + carrier_tuple = make_tuple(hp); + hp += 8; + + state->result_list = CONS(hp, carrier_tuple, state->result_list); + + free(info); +} + +static void gather_cinfo_send(gather_cinfo_t *state) +{ + Eterm result_tuple, task_ref; + + int term_size; + Eterm *hp; + + ASSERT((state->result_list == NIL) ^ (state->info_count > 0)); + ASSERT(state->building_result); + + term_size = 3 + erts_iref_storage_heap_size(&state->iref); + hp = erts_produce_heap(&state->msg_factory, term_size, 0); + + task_ref = erts_iref_storage_make_ref(&state->iref, &hp, + &(state->msg_factory.message)->hfrag.off_heap, 0); + + hp[0] = make_arityval(2); + hp[1] = task_ref; + hp[2] = state->result_list; + + result_tuple = make_tuple(hp); + + erts_factory_trim_and_close(&state->msg_factory, &result_tuple, 1); + + erts_queue_message(state->process, 0, state->msg_factory.message, + result_tuple, am_system); +} + +static int gather_cinfo_finish(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + int reductions = BLOCKSCAN_REDUCTIONS; + + if (!state->building_result) { + ErtsMessage *message; + Uint minimum_size; + Eterm *hp; + + /* {Ref, [{Carrier size, unscanned size, allocated size, + * allocated block count, {Free block histogram}} | Rest]} */ + minimum_size = 3 + erts_iref_storage_heap_size(&state->iref) + + state->info_count * (11 + state->hist_slot_count); + + message = erts_alloc_message(minimum_size, &hp); + erts_factory_selfcontained_message_init(&state->msg_factory, + message, hp); + + state->result_list = NIL; + state->building_result = 1; + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + gather_cinfo_append_result(state, current); + + if (reductions-- <= 0) { + return 1; + } + } + + gather_cinfo_send(state); + + return 0; +} + +static void gather_cinfo_abort(void *arg) +{ + gather_cinfo_t *state = (gather_cinfo_t*)arg; + + if (state->building_result) { + erts_factory_undo(&state->msg_factory); + } + + while (state->info_list) { + chist_node_t *current = state->info_list; + state->info_list = current->next; + + free(current); + } +} + +int erts_alcu_gather_carrier_info(struct process *p, int allocator_num, + int sched_id, int hist_width, + UWord hist_start, Eterm ref) +{ + gather_cinfo_t *gather_state; + blockscan_t *scanner; + + const char *allocator_desc; + Allctr_t *allocator; + + ASSERT(is_internal_ref(ref)); + + if (!blockscan_get_specific_allocator(allocator_num, + sched_id, + &allocator)) { + return 0; + } + + allocator_desc = ERTS_ALC_A2AD(allocator_num); + + /* Plain calloc is intentional. */ + gather_state = (gather_cinfo_t*)calloc(1, sizeof(gather_cinfo_t)); + scanner = &gather_state->common; + + scanner->abort = gather_cinfo_abort; + scanner->scan = gather_cinfo_scan; + scanner->finish = gather_cinfo_finish; + scanner->user_data = gather_state; + + gather_state->allocator_desc = erts_atom_put((byte *)allocator_desc, + sys_strlen(allocator_desc), + ERTS_ATOM_ENC_LATIN1, 1); + erts_iref_storage_save(&gather_state->iref, ref); + gather_state->hist_slot_start = hist_start * 2; + gather_state->hist_slot_count = hist_width; + gather_state->process = p; + + blockscan_dispatch(scanner, p, allocator, sched_id); + + return 1; +} + /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ * NOTE: erts_alcu_test() is only supposed to be used for testing. * @@ -6530,7 +8003,6 @@ erts_alcu_test(UWord op, UWord a1, UWord a2) case 0x01c: return (UWord) BLK_TO_MBC((Block_t*) a1); case 0x01d: ((Allctr_t*) a1)->add_mbc((Allctr_t*)a1, (Carrier_t*)a2); break; case 0x01e: ((Allctr_t*) a1)->remove_mbc((Allctr_t*)a1, (Carrier_t*)a2); break; -#ifdef ERTS_SMP case 0x01f: return (UWord) sizeof(ErtsAlcCrrPool_t); case 0x020: SET_CARRIER_HDR((Carrier_t *) a2, 0, SCH_SYS_ALLOC|SCH_MBC, (Allctr_t *) a1); @@ -6544,14 +8016,6 @@ erts_alcu_test(UWord op, UWord a1, UWord a2) return (UWord) a2; case 0x023: return (UWord) cpool_is_empty((Allctr_t *) a1); case 0x024: return (UWord) cpool_dbg_is_in_pool((Allctr_t *) a1, (Carrier_t *) a2); -#else - case 0x01f: return (UWord) 0; - case 0x020: return (UWord) 0; - case 0x021: return (UWord) 0; - case 0x022: return (UWord) 0; - case 0x023: return (UWord) 0; - case 0x024: return (UWord) 0; -#endif case 0x025: /* UMEM2BLK_TEST*/ #ifdef DEBUG # ifdef HARD_DEBUG @@ -6609,15 +8073,12 @@ erts_alcu_verify_unused(Allctr_t *allctr) void erts_alcu_verify_unused_ts(Allctr_t *allctr) { -#ifdef USE_THREADS erts_mtx_lock(&allctr->mutex); -#endif erts_alcu_verify_unused(allctr); -#ifdef USE_THREADS erts_mtx_unlock(&allctr->mutex); -#endif } + #ifdef DEBUG int is_sbc_blk(Block_t* blk) { |