aboutsummaryrefslogtreecommitdiffstats
path: root/erts
diff options
context:
space:
mode:
authorJohn Högberg <[email protected]>2018-08-29 16:21:17 +0200
committerJohn Högberg <[email protected]>2018-12-07 12:28:51 +0100
commit1d549eddbeeccf7d108310466de5f63af04b004c (patch)
tree6c3623605f7f1b80ec732caac2d4adb1b6bb08d3 /erts
parent4b9836a2dada60005e01067af04a0e0d9361fcee (diff)
downloadotp-1d549eddbeeccf7d108310466de5f63af04b004c.tar.gz
otp-1d549eddbeeccf7d108310466de5f63af04b004c.tar.bz2
otp-1d549eddbeeccf7d108310466de5f63af04b004c.zip
Mark free blocks in pooled carriers as unused (MADV_FREE)
This lets the OS reclaim the physical memory associated with these blocks which reduces the impact of long-lived awkward allocations. A small allocated block will still keep a huge carrier alive, but the unused part of the carrier will now be available to the OS. Co-authored-by: Dmytro Lytovchenko <[email protected]>
Diffstat (limited to 'erts')
-rw-r--r--erts/configure.in2
-rw-r--r--erts/emulator/beam/erl_afit_alloc.c2
-rw-r--r--erts/emulator/beam/erl_alloc_util.c196
-rw-r--r--erts/emulator/beam/erl_alloc_util.h4
-rw-r--r--erts/emulator/beam/erl_ao_firstfit_alloc.c61
-rw-r--r--erts/emulator/beam/erl_bestfit_alloc.c2
-rw-r--r--erts/emulator/beam/erl_goodfit_alloc.c2
-rw-r--r--erts/emulator/sys/common/erl_mmap.h57
-rw-r--r--erts/emulator/sys/win32/sys.c4
9 files changed, 325 insertions, 5 deletions
diff --git a/erts/configure.in b/erts/configure.in
index d211b81878..19fadc713e 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -1439,7 +1439,7 @@ dnl Some Linuxes needs <sys/socketio.h> instead of <sys/sockio.h>
dnl
AC_CHECK_HEADERS(fcntl.h limits.h unistd.h syslog.h dlfcn.h ieeefp.h \
sys/types.h sys/stropts.h sys/sysctl.h \
- sys/ioctl.h sys/time.h sys/uio.h \
+ sys/ioctl.h sys/time.h sys/uio.h sys/mman.h \
sys/socket.h sys/sockio.h sys/socketio.h \
net/errno.h malloc.h arpa/nameser.h libdlpi.h \
pty.h util.h libutil.h utmp.h langinfo.h poll.h sdkddkver.h)
diff --git a/erts/emulator/beam/erl_afit_alloc.c b/erts/emulator/beam/erl_afit_alloc.c
index 38289ea78a..f07137c883 100644
--- a/erts/emulator/beam/erl_afit_alloc.c
+++ b/erts/emulator/beam/erl_afit_alloc.c
@@ -102,6 +102,8 @@ erts_afalc_start(AFAllctr_t *afallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index b7a8b9c2d0..ce18a23228 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -42,6 +42,7 @@
#include "global.h"
#include "big.h"
+#include "erl_mmap.h"
#include "erl_mtrace.h"
#define GET_ERL_ALLOC_UTIL_IMPL
#include "erl_alloc_util.h"
@@ -90,6 +91,8 @@ static int initialized = 0;
#define SYS_ALLOC_CARRIER_FLOOR(X) ((X) & SYS_ALLOC_CARRIER_MASK)
#define SYS_ALLOC_CARRIER_CEILING(X) \
SYS_ALLOC_CARRIER_FLOOR((X) + INV_SYS_ALLOC_CARRIER_MASK)
+#define SYS_PAGE_SIZE (sys_page_size)
+#define SYS_PAGE_SZ_MASK ((UWord)(SYS_PAGE_SIZE - 1))
#if 0
/* Can be useful for debugging */
@@ -98,6 +101,8 @@ static int initialized = 0;
/* alloc_util global parameters */
static Uint sys_alloc_carrier_size;
+static Uint sys_page_size;
+
#if HAVE_ERTS_MSEG
static Uint max_mseg_carriers;
#endif
@@ -2540,9 +2545,155 @@ mbc_alloc(Allctr_t *allctr, Uint size)
return BLK2UMEM(blk);
}
+typedef struct {
+ char *ptr;
+ UWord size;
+} ErtsMemDiscardRegion;
+
+/* Construct a discard region for the user memory of a free block, letting the
+ * OS reclaim its physical memory when required.
+ *
+ * Note that we're ignoring both the footer and everything that comes before
+ * the minimum block size as the allocator uses those areas to manage the
+ * block. */
+static void ERTS_INLINE
+mem_discard_start(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *out)
+{
+ UWord size = BLK_SZ(block);
+
+ ASSERT(size >= allocator->min_block_size);
+
+ if (size > (allocator->min_block_size + FBLK_FTR_SZ)) {
+ out->size = size - allocator->min_block_size - FBLK_FTR_SZ;
+ } else {
+ out->size = 0;
+ }
+
+ out->ptr = (char*)block + allocator->min_block_size;
+}
+
+/* Expands a discard region into a neighboring free block, allowing us to
+ * discard the block header and first page.
+ *
+ * This is very important in small-allocation scenarios where no single block
+ * is large enough to be discarded on its own. */
+static void ERTS_INLINE
+mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor,
+ ErtsMemDiscardRegion *region)
+{
+ char *neighbor_start;
+
+ ASSERT(IS_FREE_BLK(neighbor));
+
+ neighbor_start = (char*)neighbor;
+
+ if (region->ptr >= neighbor_start) {
+ char *region_start_page;
+
+ region_start_page = region->ptr - SYS_PAGE_SIZE;
+ region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK);
+
+ /* Expand if our first page begins within the previous free block's
+ * unused data. */
+ if (region_start_page >= (neighbor_start + allocator->min_block_size)) {
+ region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ;
+ region->ptr = region_start_page;
+ }
+ } else {
+ char *region_end_page;
+ UWord neighbor_size;
+
+ ASSERT(region->ptr <= neighbor_start);
+
+ region_end_page = region->ptr + region->size + SYS_PAGE_SIZE;
+ region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK);
+
+ neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ;
+
+ /* Expand if our last page ends anywhere within the next free block,
+ * sans the footer we'll inherit. */
+ if (region_end_page < neighbor_start + neighbor_size) {
+ region->size += region_end_page - (region->ptr + region->size);
+ }
+ }
+}
+
+static void ERTS_INLINE
+mem_discard_finish(Allctr_t *allocator, Block_t *block,
+ ErtsMemDiscardRegion *region)
+{
+#ifdef DEBUG
+ char *block_start, *block_end;
+ UWord block_size;
+
+ block_size = BLK_SZ(block);
+
+ /* Ensure that the region is completely covered by the legal area of the
+ * free block. This must hold even when the region is too small to be
+ * discarded. */
+ if (region->size > 0) {
+ ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ);
+
+ block_start = (char*)block + allocator->min_block_size;
+ block_end = (char*)block + block_size - FBLK_FTR_SZ;
+
+ ASSERT(region->size == 0 ||
+ (region->ptr + region->size <= block_end &&
+ region->ptr >= block_start &&
+ region->size <= block_size));
+ }
+#else
+ (void)allocator;
+ (void)block;
+#endif
+
+ if (region->size > SYS_PAGE_SIZE) {
+ UWord align_offset, size;
+ char *ptr;
+
+ align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK);
+
+ size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK;
+ ptr = region->ptr + align_offset;
+
+ if (size > 0) {
+ ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK));
+ ASSERT(!(size & SYS_PAGE_SZ_MASK));
+
+ erts_mem_discard(ptr, size);
+ }
+ }
+}
+
+static void
+carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier)
+{
+ static const int MAX_BLOCKS_TO_DISCARD = 100;
+ Block_t *block;
+ int i;
+
+ block = allocator->first_fblk_in_mbc(allocator, carrier);
+ i = 0;
+
+ while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) {
+ ErtsMemDiscardRegion region;
+
+ ASSERT(IS_FREE_BLK(block));
+
+ mem_discard_start(allocator, block, &region);
+ mem_discard_finish(allocator, block, &region);
+
+ block = allocator->next_fblk_in_mbc(allocator, carrier, block);
+ i++;
+ }
+}
+
static void
mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp)
{
+ ErtsMemDiscardRegion discard_region = {0};
+ int discard;
Uint is_first_blk;
Uint is_last_blk;
Uint blk_sz;
@@ -2558,6 +2709,21 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
ASSERT(IS_MBC_BLK(blk));
ASSERT(blk_sz >= allctr->min_block_size);
+#ifndef DEBUG
+ /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly
+ * expensive operation which doesn't do much good if we use it again soon
+ * after, so we limit it to deallocations on pooled carriers. */
+ discard = busy_pcrr_pp && *busy_pcrr_pp;
+#else
+ /* Always discard in debug mode, regardless of whether we're in the pool or
+ * not. */
+ discard = 1;
+#endif
+
+ if (discard) {
+ mem_discard_start(allctr, blk, &discard_region);
+ }
+
HARD_CHECK_BLK_CARRIER(allctr, blk);
crr = ABLK_TO_MBC(blk);
@@ -2575,6 +2741,10 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
blk = PREV_BLK(blk);
(*allctr->unlink_free_block)(allctr, blk);
+ if (discard) {
+ mem_discard_coalesce(allctr, blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(blk);
is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2590,6 +2760,11 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
if (IS_FREE_BLK(nxt_blk)) {
/* Coalesce with next block... */
(*allctr->unlink_free_block)(allctr, nxt_blk);
+
+ if (discard) {
+ mem_discard_coalesce(allctr, nxt_blk, &discard_region);
+ }
+
blk_sz += MBC_FBLK_SZ(nxt_blk);
SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2625,10 +2800,16 @@ mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp
else {
(*allctr->link_free_block)(allctr, blk);
HARD_CHECK_BLK_CARRIER(allctr, blk);
- if (busy_pcrr_pp && *busy_pcrr_pp)
+
+ if (discard) {
+ mem_discard_finish(allctr, blk, &discard_region);
+ }
+
+ if (busy_pcrr_pp && *busy_pcrr_pp) {
update_pooled_tree(allctr, crr, blk_sz);
- else
+ } else {
check_abandon_carrier(allctr, blk, busy_pcrr_pp);
+ }
}
}
@@ -3781,6 +3962,9 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr)
unlink_carrier(&allctr->mbc_list, crr);
allctr->remove_mbc(allctr, crr);
+ /* Mark our free blocks as unused and reclaimable to the OS. */
+ carrier_mem_discard_free_blocks(allctr, crr);
+
cpool_insert(allctr, crr);
@@ -6471,6 +6655,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
erts_atomic_init_nob(&allctr->cpool.stat.carriers_size, 0);
erts_atomic_init_nob(&allctr->cpool.stat.no_carriers, 0);
if (!init->ts && init->acul && init->acnl) {
+ ASSERT(allctr->add_mbc);
+ ASSERT(allctr->remove_mbc);
+ ASSERT(allctr->largest_fblk_in_mbc);
+ ASSERT(allctr->first_fblk_in_mbc);
+ ASSERT(allctr->next_fblk_in_mbc);
+
allctr->cpool.util_limit = init->acul;
allctr->cpool.in_pool_limit = init->acnl;
allctr->cpool.fblk_min_limit = init->acfml;
@@ -6676,6 +6866,8 @@ erts_alcu_init(AlcUInit_t *init)
#endif
allow_sys_alloc_carriers = init->sac;
+ sys_page_size = erts_sys_get_page_size();
+
#ifdef DEBUG
carrier_alignment = sizeof(Unit_t);
#endif
diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h
index 9ab8589bf3..ea1afe8f58 100644
--- a/erts/emulator/beam/erl_alloc_util.h
+++ b/erts/emulator/beam/erl_alloc_util.h
@@ -684,10 +684,12 @@ struct Allctr_t_ {
void (*creating_mbc) (Allctr_t *, Carrier_t *);
void (*destroying_mbc) (Allctr_t *, Carrier_t *);
- /* The three callbacks below are needed to support carrier migration */
+ /* The five callbacks below are needed to support carrier migration. */
void (*add_mbc) (Allctr_t *, Carrier_t *);
void (*remove_mbc) (Allctr_t *, Carrier_t *);
UWord (*largest_fblk_in_mbc) (Allctr_t *, Carrier_t *);
+ Block_t * (*first_fblk_in_mbc) (Allctr_t *, Carrier_t *);
+ Block_t * (*next_fblk_in_mbc) (Allctr_t *, Carrier_t *, Block_t *);
#if HAVE_ERTS_MSEG
void* (*mseg_alloc)(Allctr_t*, Uint *size_p, Uint flags);
diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c
index 0e3e4c890a..f2ad2f6532 100644
--- a/erts/emulator/beam/erl_ao_firstfit_alloc.c
+++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c
@@ -241,6 +241,9 @@ static void aoff_add_mbc(Allctr_t*, Carrier_t*);
static void aoff_remove_mbc(Allctr_t*, Carrier_t*);
static UWord aoff_largest_fblk_in_mbc(Allctr_t*, Carrier_t*);
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *, Carrier_t *);
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *, Carrier_t *, Block_t *);
+
/* Generic tree functions used by both carrier and block trees. */
static void rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del);
static void rbt_insert(enum AOFFSortOrder, AOFF_RBTree_t** root, AOFF_RBTree_t* blk);
@@ -326,6 +329,8 @@ erts_aoffalc_start(AOFFAllctr_t *alc,
allctr->add_mbc = aoff_add_mbc;
allctr->remove_mbc = aoff_remove_mbc;
allctr->largest_fblk_in_mbc = aoff_largest_fblk_in_mbc;
+ allctr->first_fblk_in_mbc = aoff_first_fblk_in_mbc;
+ allctr->next_fblk_in_mbc = aoff_next_fblk_in_mbc;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
@@ -1058,6 +1063,62 @@ static UWord aoff_largest_fblk_in_mbc(Allctr_t* allctr, Carrier_t* carrier)
return crr->rbt_node.hdr.bhdr;
}
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier)
+{
+ AOFF_Carrier_t *crr = (AOFF_Carrier_t*)carrier;
+
+ (void)allctr;
+
+ if (crr->root) {
+ AOFF_RBTree_t *blk;
+
+ /* Descend to the rightmost block of the tree. */
+ for (blk = crr->root; blk->right; blk = blk->right);
+
+ return (Block_t*)blk;
+ }
+
+ return NULL;
+}
+
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier,
+ Block_t *block)
+{
+ AOFF_RBTree_t *parent, *blk;
+
+ (void)allctr;
+ (void)carrier;
+
+ blk = (AOFF_RBTree_t*)block;
+
+ if (blk->left) {
+ /* Descend to the rightmost block of the left subtree. */
+ for (blk = blk->left; blk->right; blk = blk->right);
+
+ return (Block_t*)blk;
+ }
+
+ while (blk->parent) {
+ parent = blk->parent;
+
+ /* If we ascend from the right we know we haven't visited our parent
+ * yet, because we always descend as far as we can to the right when
+ * entering a subtree. */
+ if (parent->right == blk) {
+ ASSERT(parent->left != blk);
+ return (Block_t*)parent;
+ }
+
+ /* If we ascend from the left we know we've already visited our
+ * parent, and will need to keep ascending until we do so from the
+ * right or reach the end of the tree. */
+ ASSERT(parent->left == blk);
+ blk = parent;
+ }
+
+ return NULL;
+}
+
/*
* info_options()
*/
diff --git a/erts/emulator/beam/erl_bestfit_alloc.c b/erts/emulator/beam/erl_bestfit_alloc.c
index 9cb1199c2a..ca81c14b96 100644
--- a/erts/emulator/beam/erl_bestfit_alloc.c
+++ b/erts/emulator/beam/erl_bestfit_alloc.c
@@ -209,6 +209,8 @@ erts_bfalc_start(BFAllctr_t *bfallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_goodfit_alloc.c b/erts/emulator/beam/erl_goodfit_alloc.c
index 01d4aa54ff..68b9579433 100644
--- a/erts/emulator/beam/erl_goodfit_alloc.c
+++ b/erts/emulator/beam/erl_goodfit_alloc.c
@@ -226,6 +226,8 @@ erts_gfalc_start(GFAllctr_t *gfallctr,
allctr->add_mbc = NULL;
allctr->remove_mbc = NULL;
allctr->largest_fblk_in_mbc = NULL;
+ allctr->first_fblk_in_mbc = NULL;
+ allctr->next_fblk_in_mbc = NULL;
allctr->init_atoms = init_atoms;
#ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
index 539daea419..e1ff0fe80a 100644
--- a/erts/emulator/sys/common/erl_mmap.h
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -176,4 +176,61 @@ void hard_dbg_remove_mseg(void* seg, UWord sz);
#endif /* HAVE_ERTS_MMAP */
+/* Marks the given memory region as unused without freeing it, letting the OS
+ * reclaim its physical memory with the promise that we'll get it back (without
+ * its contents) the next time it's accessed. */
+ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef VALGRIND
+ #include <valgrind/memcheck.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+ }
+#elif defined(DEBUG)
+ /* Try to provoke crashes by filling the discard region with garbage. It's
+ * extremely hard to find bugs where we've discarded too much, as the
+ * region often retains its old contents if it's accessed before the OS
+ * reclaims it. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ static const char pattern[] = "DISCARDED";
+ char *data;
+ int i;
+
+ for(i = 0, data = ptr; i < size; i++) {
+ data[i] = pattern[i % sizeof(pattern)];
+ }
+ }
+#elif defined(HAVE_SYS_MMAN_H)
+ #include <sys/mman.h>
+
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ #ifdef MADV_FREE
+ /* This is preferred as it doesn't necessarily free the pages right
+ * away, which is a bit faster than MADV_DONTNEED. */
+ madvise(ptr, size, MADV_FREE);
+ #else
+ madvise(ptr, size, MADV_DONTNEED);
+ #endif
+ }
+#elif defined(_WIN32)
+ #include <winbase.h>
+
+ /* MEM_RESET is defined on all supported versions of Windows, and has the
+ * same semantics as MADV_FREE. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE);
+ }
+#else
+ /* Dummy implementation. */
+ ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+ (void)ptr;
+ (void)size;
+ }
+#endif
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
#endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index a1c630d68a..b95aadc9b2 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -186,7 +186,9 @@ void sys_primitive_init(HMODULE beam)
UWord
erts_sys_get_page_size(void)
{
- return (UWord) 4*1024; /* Guess 4 KB */
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return (UWord)info.dwPageSize;
}
Uint