diff options
author | Sverker Eriksson <[email protected]> | 2013-10-15 15:19:43 +0200 |
---|---|---|
committer | Sverker Eriksson <[email protected]> | 2013-10-15 15:19:43 +0200 |
commit | 356c1e9312a640ac211b6d5d4110372742acfcde (patch) | |
tree | 1961a33422f0f4bdae6c1bc647b93a2441fe41e0 /erts/emulator/sys/common | |
parent | bbe80d2ad4ef0ca607ba557102212cb7c9a06e2c (diff) | |
parent | 5db2c05af6efaf636b2e41e3a1f305c592a71f34 (diff) | |
download | otp-356c1e9312a640ac211b6d5d4110372742acfcde.tar.gz otp-356c1e9312a640ac211b6d5d4110372742acfcde.tar.bz2 otp-356c1e9312a640ac211b6d5d4110372742acfcde.zip |
Merge branch 'rickard-sverker/supercarrier/OTP-11149' into maint
* rickard-sverker/supercarrier/OTP-11149: (29 commits)
erts: Add test case for erts_mmap
erts: Add mutex to init_atoms in erts_mmap.c
erts: Fix lock violation for init_atoms in erl_mmap.c
erts: Fix misc minor bugs in supercarrier initialization
erts: Add erts_mmap stats
erts: Add erts_bld_tupleX macros
erts: Rename erts_bld_atom_uint_2tup_list to *_uword_*
erts: Fix bug in lookup_free_seg
erts: Fix race bug in erts_munmap
erts: Add HARD_DBG_MSEG
erts: Refactor rbt_insert in erl_mmap
erts: erts_mmap improved free seg desc management
erts: Add documentation for +MMsc* system flags
erts: Allow page aligned erts_munmap()
erts: Sort tree in super aligned sizes (SA_SZ_ADDR_ORDER)
erts: Fix ASSERT bug and void* arithmetics
erts: Add mmap argument to erts_debug:get_internal_state
erts: Improve erts_mmap out of free descriptor management
erts: Cleanup erl_mmap
erts: Add __func__ to ERTS_ASSERT macro
...
Diffstat (limited to 'erts/emulator/sys/common')
-rw-r--r-- | erts/emulator/sys/common/erl_mmap.c | 2832 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_mmap.h | 134 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_mseg.c | 876 | ||||
-rw-r--r-- | erts/emulator/sys/common/erl_mseg.h | 36 |
4 files changed, 3133 insertions, 745 deletions
diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c new file mode 100644 index 0000000000..a9da7430fb --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap.c @@ -0,0 +1,2832 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2002-2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include "sys.h" +#include "erl_process.h" +#include "erl_smp.h" +#include "atom.h" +#include "erl_mmap.h" +#include <stddef.h> + +/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */ + +#if defined(DEBUG) || 0 +# undef ERTS_MMAP_DEBUG +# define ERTS_MMAP_DEBUG +# ifndef ERTS_MMAP_OP_RINGBUF_SZ +# define ERTS_MMAP_OP_RINGBUF_SZ 100 +# endif +#endif + +#ifndef ERTS_MMAP_OP_RINGBUF_SZ +# define ERTS_MMAP_OP_RINGBUF_SZ 0 +#endif + +/* #define ERTS_MMAP_DEBUG_FILL_AREAS */ + +#ifdef ERTS_MMAP_DEBUG +# define ERTS_MMAP_ASSERT ERTS_ASSERT +#else +# define ERTS_MMAP_ASSERT(A) ((void) 1) +#endif + +/* + * `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after + * initialization, but the other pointers are not; i.e., only + * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held. + */ +#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \ + (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ + < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot)) +#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \ + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ + (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \ + < ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot))) +#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \ + (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \ + (((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \ + < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot))) + +int erts_have_erts_mmap; +UWord erts_page_inv_mask; + +#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG) +# undef RBT_DEBUG +# define RBT_DEBUG +#endif +#ifdef RBT_DEBUG +# define RBT_ASSERT ERTS_ASSERT +# define IF_RBT_DEBUG(C) C +#else +# define RBT_ASSERT(x) +# define IF_RBT_DEBUG(C) +#endif + +typedef struct RBTNode_ RBTNode; +struct RBTNode_ { + UWord parent_and_color; /* color in bit 0 of parent ptr */ + RBTNode *left; + RBTNode *right; +}; + +#define RED_FLG (1) +#define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG)) +#define IS_BLACK(N) (!IS_RED(N)) +#define SET_RED(N) ((N)->parent_and_color |= RED_FLG) +#define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG) + +static ERTS_INLINE RBTNode* parent(RBTNode* node) +{ + return (RBTNode*) (node->parent_and_color & ~RED_FLG); +} + +static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent) +{ + RBT_ASSERT(!((UWord)parent & RED_FLG)); + node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG); +} + +static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color) +{ + RBT_ASSERT(!((UWord)parent & RED_FLG)); + RBT_ASSERT(!(color & ~RED_FLG)); + return ((UWord)parent) | color; +} + + +enum SortOrder { + ADDR_ORDER, /* only address order */ + SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */ + SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */ +}; +#ifdef HARD_DEBUG +static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"}; +#endif + +typedef struct { + RBTNode* root; + enum SortOrder order; +}RBTree; + +#ifdef HARD_DEBUG +# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE) +# define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ) +static int rbt_assert_is_member(RBTNode* root, RBTNode* node); +static RBTNode* check_tree(RBTree* tree, Uint); +#else +# define HARD_CHECK_IS_MEMBER(ROOT,NODE) +# define HARD_CHECK_TREE(TREE,SZ) +#endif + +#if ERTS_MMAP_OP_RINGBUF_SZ + +static int mmap_op_ix; + +typedef enum { + ERTS_OP_TYPE_NONE, + ERTS_OP_TYPE_MMAP, + ERTS_OP_TYPE_MUNMAP, + ERTS_OP_TYPE_MREMAP +} ErtsMMapOpType; + +typedef struct { + ErtsMMapOpType type; + void *result; + UWord in_size; + UWord out_size; + void *old_ptr; + UWord old_size; +} ErtsMMapOp; + +static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ]; + +#define ERTS_MMAP_OP_RINGBUF_INIT() \ + do { \ + int ix__; \ + for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\ + mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + } \ + mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \ + } while (0) + +#define ERTS_MMAP_OP_START(SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = (SZ); \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + } while (0) + +#define ERTS_MMAP_OP_END(PTR, SZ) \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].result = (PTR); \ + mmap_ops[ix__].out_size = (SZ); \ + } while (0) + +#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MMAP_OP_START((IN_SZ)); \ + ERTS_MMAP_OP_END((RES), (OUT_SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MUNMAP_OP(PTR, SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = (PTR); \ + mmap_ops[ix__].old_size = (SZ); \ + } while (0) + +#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MUNMAP_OP((PTR), (SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \ + do { \ + int ix__; \ + if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \ + mmap_op_ix = 0; \ + ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = (IN_SZ); \ + mmap_ops[ix__].out_size = (OLD_SZ); \ + mmap_ops[ix__].old_ptr = (OLD_PTR); \ + mmap_ops[ix__].old_size = (OLD_SZ); \ + } while (0) + +#define ERTS_MREMAP_OP_END(PTR, SZ) \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].result = (PTR); \ + mmap_ops[mmap_op_ix].out_size = (SZ); \ + } while (0) + +#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \ + do { \ + erts_smp_mtx_lock(&mmap_state.mtx); \ + ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \ + ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \ + erts_smp_mtx_unlock(&mmap_state.mtx); \ + } while (0) + +#define ERTS_MMAP_OP_ABORT() \ + do { \ + int ix__ = mmap_op_ix; \ + mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \ + mmap_ops[ix__].result = NULL; \ + mmap_ops[ix__].in_size = 0; \ + mmap_ops[ix__].out_size = 0; \ + mmap_ops[ix__].old_ptr = NULL; \ + mmap_ops[ix__].old_size = 0; \ + if (--mmap_op_ix < 0) \ + mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \ + } while (0) + +#else + +#define ERTS_MMAP_OP_RINGBUF_INIT() +#define ERTS_MMAP_OP_START(SZ) +#define ERTS_MMAP_OP_END(PTR, SZ) +#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) +#define ERTS_MUNMAP_OP(PTR, SZ) +#define ERTS_MUNMAP_OP_LCK(PTR, SZ) +#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) +#define ERTS_MREMAP_OP_END(PTR, SZ) +#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) +#define ERTS_MMAP_OP_ABORT() + +#endif + +typedef struct { + RBTNode snode; /* node in 'stree' */ + RBTNode anode; /* node in 'atree' */ + char* start; + char* end; +}ErtsFreeSegDesc; + +typedef struct { + RBTree stree; /* size ordered tree */ + RBTree atree; /* address ordered tree */ + Uint nseg; +}ErtsFreeSegMap; + +static struct { + int (*reserve_physical)(char *, UWord); + void (*unreserve_physical)(char *, UWord); + int supercarrier; + int no_os_mmap; + /* + * Super unaligend area is located above super aligned + * area. That is, `sa.bot` is beginning of the super + * carrier, `sua.top` is the end of the super carrier, + * and sa.top and sua.bot moves towards eachother. + */ + struct { + char *top; + char *bot; + ErtsFreeSegMap map; + } sua; + struct { + char *top; + char *bot; + ErtsFreeSegMap map; + } sa; +#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS)) + int mmap_fd; +#endif + erts_smp_mtx_t mtx; + struct { + char *free_list; + char *unused_start; + char *unused_end; + char *new_area_hint; + Uint reserved; + } desc; + struct { + UWord free_seg_descs; + struct { + UWord curr; + UWord max; + } free_segs; + } no; + struct { + struct { + UWord total; + struct { + UWord total; + UWord sa; + UWord sua; + } used; + } supercarrier; + struct { + UWord used; + } os; + } size; +} mmap_state; + +#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \ + do { \ + mmap_state.size.supercarrier.used.total += (SZ); \ + mmap_state.size.supercarrier.used.sa += (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ + <= mmap_state.size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \ + <= mmap_state.size.supercarrier.used.total); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ + mmap_state.size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \ + mmap_state.size.supercarrier.used.sa -= (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \ + do { \ + mmap_state.size.supercarrier.used.total += (SZ); \ + mmap_state.size.supercarrier.used.sua += (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \ + <= mmap_state.size.supercarrier.total); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \ + <= mmap_state.size.supercarrier.used.total); \ + } while (0) +#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \ + mmap_state.size.supercarrier.used.total -= (SZ); \ + ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \ + mmap_state.size.supercarrier.used.sua -= (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_OS_INC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \ + mmap_state.size.os.used += (SZ); \ + } while (0) +#define ERTS_MMAP_SIZE_OS_DEC(SZ) \ + do { \ + ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \ + mmap_state.size.os.used -= (SZ); \ + } while (0) + + +static void +add_free_desc_area(char *start, char *end) +{ + ERTS_MMAP_ASSERT(end == (void *) 0 || end > start); + if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) { + UWord no; + ErtsFreeSegDesc *prev_desc, *desc; + char *desc_end; + + no = 1; + prev_desc = (ErtsFreeSegDesc *) start; + prev_desc->start = mmap_state.desc.free_list; + desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc)); + desc_end = start + 2*sizeof(ErtsFreeSegDesc); + + while (desc_end <= end) { + desc->start = (char *) prev_desc; + prev_desc = desc; + desc = (ErtsFreeSegDesc *) desc_end; + desc_end += sizeof(ErtsFreeSegDesc); + no++; + } + mmap_state.desc.free_list = (char *) prev_desc; + mmap_state.no.free_seg_descs += no; + } +} + +static ErtsFreeSegDesc * +add_unused_free_desc_area(void) +{ + char *ptr; + if (!mmap_state.desc.unused_start) + return NULL; + + ERTS_MMAP_ASSERT(mmap_state.desc.unused_end); + ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE + <= mmap_state.desc.unused_end - mmap_state.desc.unused_start); + + ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE; + add_free_desc_area(mmap_state.desc.unused_start, ptr); + + if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE) + mmap_state.desc.unused_start = ptr; + else + mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL; + + ERTS_MMAP_ASSERT(mmap_state.desc.free_list); + return (ErtsFreeSegDesc *) mmap_state.desc.free_list; +} + +static ERTS_INLINE ErtsFreeSegDesc * +alloc_desc(void) +{ + ErtsFreeSegDesc *res; + res = (ErtsFreeSegDesc *) mmap_state.desc.free_list; + if (!res) { + res = add_unused_free_desc_area(); + if (!res) + return NULL; + } + mmap_state.desc.free_list = res->start; + ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs); + mmap_state.no.free_segs.curr++; + if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr) + mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr; + return res; +} + +static ERTS_INLINE void +free_desc(ErtsFreeSegDesc *desc) +{ + desc->start = mmap_state.desc.free_list; + mmap_state.desc.free_list = (char *) desc; + ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0); + mmap_state.no.free_segs.curr--; +} + +static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode) +{ + return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode)); +} + +static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode) +{ + return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode)); +} + +static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node) +{ + return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node); +} + +static ERTS_INLINE SWord usable_size(enum SortOrder order, + ErtsFreeSegDesc* desc) +{ + return ((order == SA_SZ_ADDR_ORDER) ? + ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start) + : desc->end - desc->start); +} + +#ifdef HARD_DEBUG +static ERTS_INLINE SWord cmp_nodes(enum SortOrder order, + RBTNode* lhs, RBTNode* rhs) +{ + ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs); + ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs); + RBT_ASSERT(lhs != rhs); + if (order != ADDR_ORDER) { + SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc); + if (diff) return diff; + } + if (order != SZ_REVERSE_ADDR_ORDER) { + return (char*)ldesc->start - (char*)rdesc->start; + } + else { + return (char*)rdesc->start - (char*)ldesc->start; + } +} +#endif /* HARD_DEBUG */ + +static ERTS_INLINE SWord cmp_with_node(enum SortOrder order, + SWord sz, char* addr, RBTNode* rhs) +{ + ErtsFreeSegDesc* rdesc; + if (order != ADDR_ORDER) { + SWord diff; + rdesc = snode_to_desc(rhs); + diff = sz - usable_size(order, rdesc); + if (diff) return diff; + } + else + rdesc = anode_to_desc(rhs); + + if (order != SZ_REVERSE_ADDR_ORDER) + return addr - (char*)rdesc->start; + else + return (char*)rdesc->start - addr; +} + + +static ERTS_INLINE void +left_rotate(RBTNode **root, RBTNode *x) +{ + RBTNode *y = x->right; + x->right = y->left; + if (y->left) + set_parent(y->left, x); + set_parent(y, parent(x)); + if (!parent(y)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->left) + parent(x)->left = y; + else { + RBT_ASSERT(x == parent(x)->right); + parent(x)->right = y; + } + y->left = x; + set_parent(x, y); +} + +static ERTS_INLINE void +right_rotate(RBTNode **root, RBTNode *x) +{ + RBTNode *y = x->left; + x->left = y->right; + if (y->right) + set_parent(y->right, x); + set_parent(y, parent(x)); + if (!parent(y)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->right) + parent(x)->right = y; + else { + RBT_ASSERT(x == parent(x)->left); + parent(x)->left = y; + } + y->right = x; + set_parent(x, y); +} + +/* + * Replace node x with node y + * NOTE: segment descriptor of y is not changed + */ +static ERTS_INLINE void +replace(RBTNode **root, RBTNode *x, RBTNode *y) +{ + + if (!parent(x)) { + RBT_ASSERT(*root == x); + *root = y; + } + else if (x == parent(x)->left) + parent(x)->left = y; + else { + RBT_ASSERT(x == parent(x)->right); + parent(x)->right = y; + } + if (x->left) { + RBT_ASSERT(parent(x->left) == x); + set_parent(x->left, y); + } + if (x->right) { + RBT_ASSERT(parent(x->right) == x); + set_parent(x->right, y); + } + + y->parent_and_color = x->parent_and_color; + y->right = x->right; + y->left = x->left; +} + +static void +tree_insert_fixup(RBTNode** root, RBTNode *node) +{ + RBTNode *x = node, *y, *papa_x, *granpa_x; + + /* + * Rearrange the tree so that it satisfies the Red-Black Tree properties + */ + + papa_x = parent(x); + RBT_ASSERT(x != *root && IS_RED(papa_x)); + do { + + /* + * x and its parent are both red. Move the red pair up the tree + * until we get to the root or until we can separate them. + */ + + granpa_x = parent(papa_x); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(granpa_x); + + if (papa_x == granpa_x->left) { + y = granpa_x->right; + if (IS_RED(y)) { + SET_BLACK(y); + SET_BLACK(papa_x); + SET_RED(granpa_x); + x = granpa_x; + } + else { + + if (x == papa_x->right) { + left_rotate(root, papa_x); + papa_x = x; + x = papa_x->left; + } + + RBT_ASSERT(x == granpa_x->left->left); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(papa_x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(IS_BLACK(y)); + + SET_BLACK(papa_x); + SET_RED(granpa_x); + right_rotate(root, granpa_x); + + RBT_ASSERT(x == parent(x)->left); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(parent(x)->right)); + RBT_ASSERT(IS_BLACK(parent(x))); + break; + } + } + else { + RBT_ASSERT(papa_x == granpa_x->right); + y = granpa_x->left; + if (IS_RED(y)) { + SET_BLACK(y); + SET_BLACK(papa_x); + SET_RED(granpa_x); + x = granpa_x; + } + else { + + if (x == papa_x->left) { + right_rotate(root, papa_x); + papa_x = x; + x = papa_x->right; + } + + RBT_ASSERT(x == granpa_x->right->right); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(papa_x)); + RBT_ASSERT(IS_BLACK(granpa_x)); + RBT_ASSERT(IS_BLACK(y)); + + SET_BLACK(papa_x); + SET_RED(granpa_x); + left_rotate(root, granpa_x); + + RBT_ASSERT(x == parent(x)->right); + RBT_ASSERT(IS_RED(x)); + RBT_ASSERT(IS_RED(parent(x)->left)); + RBT_ASSERT(IS_BLACK(parent(x))); + break; + } + } + } while (x != *root && (papa_x=parent(x), IS_RED(papa_x))); + + SET_BLACK(*root); +} + +static void +rbt_delete(RBTree* tree, RBTNode* del) +{ + Uint spliced_is_black; + RBTNode *x, *y, *z = del, *papa_y; + RBTNode null_x; /* null_x is used to get the fixup started when we + splice out a node without children. */ + + HARD_CHECK_IS_MEMBER(tree->root, del); + HARD_CHECK_TREE(tree, 0); + + null_x.parent_and_color = parent_and_color(NULL, !RED_FLG); + + /* Remove node from tree... */ + + /* Find node to splice out */ + if (!z->left || !z->right) + y = z; + else + /* Set y to z:s successor */ + for(y = z->right; y->left; y = y->left); + /* splice out y */ + x = y->left ? y->left : y->right; + spliced_is_black = IS_BLACK(y); + papa_y = parent(y); + if (x) { + set_parent(x, papa_y); + } + else if (spliced_is_black) { + x = &null_x; + x->right = x->left = NULL; + x->parent_and_color = parent_and_color(papa_y, !RED_FLG); + y->left = x; + } + + if (!papa_y) { + RBT_ASSERT(tree->root == y); + tree->root = x; + } + else { + if (y == papa_y->left) { + papa_y->left = x; + } + else { + RBT_ASSERT(y == papa_y->right); + papa_y->right = x; + } + } + if (y != z) { + /* We spliced out the successor of z; replace z by the successor */ + RBT_ASSERT(z != &null_x); + replace(&tree->root, z, y); + } + + if (spliced_is_black) { + RBTNode* papa_x; + /* We removed a black node which makes the resulting tree + violate the Red-Black Tree properties. Fixup tree... */ + + papa_x = parent(x); + while (IS_BLACK(x) && papa_x) { + + /* + * x has an "extra black" which we move up the tree + * until we reach the root or until we can get rid of it. + * + * y is the sibbling of x + */ + + if (x == papa_x->left) { + y = papa_x->right; + RBT_ASSERT(y); + if (IS_RED(y)) { + RBT_ASSERT(y->right); + RBT_ASSERT(y->left); + SET_BLACK(y); + RBT_ASSERT(IS_BLACK(papa_x)); + SET_RED(papa_x); + left_rotate(&tree->root, papa_x); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->right; + } + RBT_ASSERT(y); + RBT_ASSERT(IS_BLACK(y)); + if (IS_BLACK(y->left) && IS_BLACK(y->right)) { + SET_RED(y); + } + else { + if (IS_BLACK(y->right)) { + SET_BLACK(y->left); + SET_RED(y); + right_rotate(&tree->root, y); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->right; + } + RBT_ASSERT(y); + if (IS_RED(papa_x)) { + + SET_BLACK(papa_x); + SET_RED(y); + } + RBT_ASSERT(y->right); + SET_BLACK(y->right); + left_rotate(&tree->root, papa_x); + x = tree->root; + break; + } + } + else { + RBT_ASSERT(x == papa_x->right); + y = papa_x->left; + RBT_ASSERT(y); + if (IS_RED(y)) { + RBT_ASSERT(y->right); + RBT_ASSERT(y->left); + SET_BLACK(y); + RBT_ASSERT(IS_BLACK(papa_x)); + SET_RED(papa_x); + right_rotate(&tree->root, papa_x); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->left; + } + RBT_ASSERT(y); + RBT_ASSERT(IS_BLACK(y)); + if (IS_BLACK(y->right) && IS_BLACK(y->left)) { + SET_RED(y); + } + else { + if (IS_BLACK(y->left)) { + SET_BLACK(y->right); + SET_RED(y); + left_rotate(&tree->root, y); + RBT_ASSERT(papa_x == parent(x)); + y = papa_x->left; + } + RBT_ASSERT(y); + if (IS_RED(papa_x)) { + SET_BLACK(papa_x); + SET_RED(y); + } + RBT_ASSERT(y->left); + SET_BLACK(y->left); + right_rotate(&tree->root, papa_x); + x = tree->root; + break; + } + } + x = papa_x; + papa_x = parent(x); + } + SET_BLACK(x); + + papa_x = parent(&null_x); + if (papa_x) { + if (papa_x->left == &null_x) + papa_x->left = NULL; + else { + RBT_ASSERT(papa_x->right == &null_x); + papa_x->right = NULL; + } + RBT_ASSERT(!null_x.left); + RBT_ASSERT(!null_x.right); + } + else if (tree->root == &null_x) { + tree->root = NULL; + RBT_ASSERT(!null_x.left); + RBT_ASSERT(!null_x.right); + } + } + HARD_CHECK_TREE(tree, 0); +} + + +static void +rbt_insert(RBTree* tree, RBTNode* node) +{ +#ifdef RBT_DEBUG + ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL; +#endif + ErtsFreeSegDesc* desc = node_to_desc(tree->order, node); + char* seg_addr = desc->start; + SWord seg_sz = desc->end - desc->start; + + HARD_CHECK_TREE(tree, 0); + + node->left = NULL; + node->right = NULL; + + if (!tree->root) { + node->parent_and_color = parent_and_color(NULL, !RED_FLG); + tree->root = node; + } + else { + RBTNode *x = tree->root; + while (1) { + SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x); + if (diff < 0) { + IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x)); + if (!x->left) { + node->parent_and_color = parent_and_color(x, RED_FLG); + x->left = node; + break; + } + x = x->left; + } + else { + RBT_ASSERT(diff > 0); + IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x)); + if (!x->right) { + node->parent_and_color = parent_and_color(x, RED_FLG); + x->right = node; + break; + } + x = x->right; + } + } + + RBT_ASSERT(parent(node)); +#ifdef RBT_DEBUG + if (tree->order == ADDR_ORDER) { + RBT_ASSERT(!dbg_under || dbg_under->end < desc->start); + RBT_ASSERT(!dbg_over || dbg_over->start > desc->end); + } +#endif + RBT_ASSERT(IS_RED(node)); + if (IS_RED(parent(node))) + tree_insert_fixup(&tree->root, node); + } + HARD_CHECK_TREE(tree, 0); +} + +/* + * Traverse tree in (reverse) sorting order + */ +static void +rbt_foreach_node(RBTree* tree, + void (*fn)(RBTNode*,void*), + void* arg, int reverse) +{ +#ifdef HARD_DEBUG + Sint blacks = -1; + Sint curr_blacks = 1; + Uint depth = 1; + Uint max_depth = 0; + Uint node_cnt = 0; +#endif + enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state; + RBTNode *x = tree->root; + + RBT_ASSERT(!x || !parent(x)); + + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + while (x) { + switch (state) { + case RECURSE_LEFT: + if (x->left) { + RBT_ASSERT(parent(x->left) == x); + #ifdef HARD_DEBUG + ++depth; + if (IS_BLACK(x->left)) + curr_blacks++; + #endif + x = x->left; + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + } + else { + #ifdef HARD_DEBUG + if (blacks < 0) + blacks = curr_blacks; + RBT_ASSERT(blacks == curr_blacks); + #endif + state = reverse ? RETURN_TO_PARENT : DO_NODE; + } + break; + + case DO_NODE: + #ifdef HARD_DEBUG + ++node_cnt; + if (depth > max_depth) + max_depth = depth; + #endif + (*fn) (x, arg); /* Do it! */ + state = reverse ? RECURSE_LEFT : RECURSE_RIGHT; + break; + + case RECURSE_RIGHT: + if (x->right) { + RBT_ASSERT(parent(x->right) == x); + #ifdef HARD_DEBUG + ++depth; + if (IS_BLACK(x->right)) + curr_blacks++; + #endif + x = x->right; + state = reverse ? RECURSE_RIGHT : RECURSE_LEFT; + } + else { + #ifdef HARD_DEBUG + if (blacks < 0) + blacks = curr_blacks; + RBT_ASSERT(blacks == curr_blacks); + #endif + state = reverse ? DO_NODE : RETURN_TO_PARENT; + } + break; + + case RETURN_TO_PARENT: + #ifdef HARD_DEBUG + if (IS_BLACK(x)) + curr_blacks--; + --depth; + #endif + if (parent(x)) { + if (x == parent(x)->left) { + state = reverse ? RETURN_TO_PARENT : DO_NODE; + } + else { + RBT_ASSERT(x == parent(x)->right); + state = reverse ? DO_NODE : RETURN_TO_PARENT; + } + } + x = parent(x); + break; + } + } +#ifdef HARD_DEBUG + RBT_ASSERT(depth == 0 || (!tree->root && depth==1)); + RBT_ASSERT(curr_blacks == 0); + RBT_ASSERT((1 << (max_depth/2)) <= node_cnt); +#endif +} + +#if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG) +static RBTNode* rbt_prev_node(RBTNode* node) +{ + RBTNode* x; + if (node->left) { + for (x=node->left; x->right; x=x->right) + ; + return x; + } + for (x=node; parent(x); x=parent(x)) { + if (parent(x)->right == x) + return parent(x); + } + return NULL; +} +static RBTNode* rbt_next_node(RBTNode* node) +{ + RBTNode* x; + if (node->right) { + for (x=node->right; x->left; x=x->left) + ; + return x; + } + for (x=node; parent(x); x=parent(x)) { + if (parent(x)->left == x) + return parent(x); + } + return NULL; +} +#endif /* RBT_DEBUG || HARD_DEBUG_MSEG */ + + +/* The API to keep track of a bunch of separated (free) segments + (non-overlapping and non-adjacent). + */ +static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder); +static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end, + ErtsFreeSegDesc** under, ErtsFreeSegDesc** over); +static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end); +static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end); +static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*); +static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz); + + +static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order) +{ + map->atree.root = NULL; + map->atree.order = ADDR_ORDER; + map->stree.root = NULL; + map->stree.order = order; + map->nseg = 0; +} + +/* Lookup directly adjacent free segments to the given area [start->end]. + * The given area must not contain any free segments. + */ +static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end, + ErtsFreeSegDesc** under, ErtsFreeSegDesc** over) +{ + RBTNode* x = map->atree.root; + + *under = NULL; + *over = NULL; + while (x) { + if (start < anode_to_desc(x)->start) { + RBT_ASSERT(end <= anode_to_desc(x)->start); + if (end == anode_to_desc(x)->start) { + RBT_ASSERT(!*over); + *over = anode_to_desc(x); + } + x = x->left; + } + else { + RBT_ASSERT(start >= anode_to_desc(x)->end); + if (start == anode_to_desc(x)->end) { + RBT_ASSERT(!*under); + *under = anode_to_desc(x); + } + x = x->right; + } + } +} + +/* Initialize 'desc' and insert as new free segment [start->end]. + * The new segment must not contain or be adjacent to any free segment in 'map'. + */ +static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc, + char* start, char* end) +{ + desc->start = start; + desc->end = end; + rbt_insert(&map->atree, &desc->anode); + rbt_insert(&map->stree, &desc->snode); + map->nseg++; +} + +/* Resize existing free segment 'desc' to [start->end]. + * The new segment location must overlap the old location and + * it must not contain or be adjacent to any other free segment in 'map'. + */ +static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc, + char* start, char* end) +{ +#ifdef RBT_DEBUG + RBTNode* prev = rbt_prev_node(&desc->anode); + RBTNode* next = rbt_next_node(&desc->anode); + RBT_ASSERT(!prev || anode_to_desc(prev)->end < start); + RBT_ASSERT(!next || anode_to_desc(next)->start > end); +#endif + rbt_delete(&map->stree, &desc->snode); + desc->start = start; + desc->end = end; + rbt_insert(&map->stree, &desc->snode); +} + +/* Delete existing free segment 'desc' from 'map'. + */ +static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc) +{ + rbt_delete(&map->atree, &desc->anode); + rbt_delete(&map->stree, &desc->snode); + map->nseg--; +} + +/* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes. + */ +static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz) +{ + RBTNode* x = map->stree.root; + ErtsFreeSegDesc* best_desc = NULL; + const enum SortOrder order = map->stree.order; + + while (x) { + ErtsFreeSegDesc* desc = snode_to_desc(x); + SWord seg_sz = usable_size(order, desc); + + if (seg_sz < need_sz) { + x = x->right; + } + else { + best_desc = desc; + x = x->left; + } + } + return best_desc; +} + +struct build_arg_t +{ + Process* p; + Eterm* hp; + Eterm acc; +}; + +static void build_free_seg_tuple(RBTNode* node, void* arg) +{ + struct build_arg_t* a = (struct build_arg_t*)arg; + ErtsFreeSegDesc* desc = anode_to_desc(node); + Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start); + Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end); + Eterm tpl = TUPLE2(a->hp, start, end); + + a->hp += 3; + a->acc = CONS(a->hp, tpl, a->acc); + a->hp += 2; +} + +static +Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map) +{ + struct build_arg_t barg; + Eterm* hp_end; + const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */ + + barg.p = p; + barg.hp = HAlloc(p, may_need); + hp_end = barg.hp + may_need; + barg.acc = NIL; + rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1); + + RBT_ASSERT(barg.hp <= hp_end); + HRelease(p, hp_end, barg.hp); + return barg.acc; +} + +#if ERTS_HAVE_OS_MMAP +/* Implementation of os_mmap()/os_munmap()/os_mremap()... */ + +#if HAVE_MMAP +# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE) +# if defined(MAP_ANONYMOUS) +# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) +# define ERTS_MMAP_FD (-1) +# elif defined(MAP_ANON) +# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) +# define ERTS_MMAP_FD (-1) +# else +# define ERTS_MMAP_FLAGS (MAP_PRIVATE) +# define ERTS_MMAP_FD mmap_state.mmap_fd +# endif +#endif + +static ERTS_INLINE void * +os_mmap(void *hint_ptr, UWord size, int try_superalign) +{ +#if HAVE_MMAP + void *res; +#ifdef MAP_ALIGN + if (try_superalign) + res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0); + else +#endif + res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT, + ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0); + if (res == MAP_FAILED) + return NULL; + return res; +#elif HAVE_VIRTUALALLOC + return (void *) VirtualAlloc(NULL, (SIZE_T) size, + MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE); +#else +# error "missing mmap() or similar" +#endif +} + +static ERTS_INLINE void +os_munmap(void *ptr, UWord size) +{ +#if HAVE_MMAP +#ifdef ERTS_MMAP_DEBUG + int res = +#endif + munmap(ptr, size); + ERTS_MMAP_ASSERT(res == 0); +#elif HAVE_VIRTUALALLOC +#ifdef DEBUG + BOOL res = +#endif + VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE); + ERTS_MMAP_ASSERT(res != 0); +#else +# error "missing munmap() or similar" +#endif +} + +#ifdef ERTS_HAVE_OS_MREMAP +# if HAVE_MREMAP +# if defined(__NetBSD__) +# define ERTS_MREMAP_FLAGS (0) +# else +# define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE) +# endif +# endif +static ERTS_INLINE void * +os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign) +{ + void *new_seg; +#if HAVE_MREMAP + new_seg = mremap(ptr, (size_t) old_size, +# if defined(__NetBSD__) + NULL, +# endif + (size_t) new_size, ERTS_MREMAP_FLAGS); + if (new_seg == (void *) MAP_FAILED) + return NULL; + return new_seg; +#else +# error "missing mremap() or similar" +#endif +} +#endif + +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION +#if HAVE_MMAP + +#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT) +#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED) +#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE) +#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED) +#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE) +#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE) + +static int +os_reserve_physical(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT, + ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + return 0; + return 1; +} + +static void +os_unreserve_physical(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT, + ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + erl_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory"); +} + +static void * +os_mmap_virtual(char *ptr, UWord size) +{ + void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT, + ERTS_MMAP_VIRTUAL_FLAGS, ERTS_MMAP_FD, 0); + if (res == (void *) MAP_FAILED) + return NULL; + return res; +} + +#else +#error "Missing reserve/unreserve physical memory implementation" +#endif +#endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */ + +#endif /* ERTS_HAVE_OS_MMAP */ + +static int reserve_noop(char *ptr, UWord size) +{ +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + Uint32 *uip, *end = (Uint32 *) (ptr + size); + + for (uip = (Uint32 *) ptr; uip < end; uip++) + ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef); + for (uip = (Uint32 *) ptr; uip < end; uip++) + *uip = (Uint32) 0xfeedfeed; +#endif + return 1; +} + +static void unreserve_noop(char *ptr, UWord size) +{ +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + Uint32 *uip, *end = (Uint32 *) (ptr + size); + + for (uip = (Uint32 *) ptr; uip < end; uip++) + *uip = (Uint32) 0xdeadbeef; +#endif +} + +static UWord +alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end) +{ + char *ptr; + ErtsFreeSegMap *da_map; + ErtsFreeSegDesc *desc = alloc_desc(); + if (desc) { + insert_free_seg(map, desc, start, end); + return 0; + } + + /* + * Ahh; ran out of free segment descriptors. + * + * First try to map a new page... + */ + +#if ERTS_HAVE_OS_MMAP + if (!mmap_state.no_os_mmap) { + ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0); + if (ptr) { + mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE; + ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE); + add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE); + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, start, end); + return 0; + } + } +#endif + + /* + * ...then try to find a good place in the supercarrier... + */ + da_map = &mmap_state.sua.map; + desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); + if (desc) { + if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE); + else + desc = NULL; + + } + else { + da_map = &mmap_state.sa.map; + desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE); + if (desc) { + if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE)) + ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE); + else + desc = NULL; + } + } + if (desc) { + char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE; + add_free_desc_area(desc->start, da_end); + if (da_end != desc->end) + resize_free_seg(da_map, desc, da_end, desc->end); + else { + delete_free_seg(da_map, desc); + free_desc(desc); + } + + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, start, end); + return 0; + } + + /* + * ... and then as last resort use the first page of the + * free segment we are trying to insert for free descriptors. + */ + ptr = start + ERTS_PAGEALIGNED_SIZE; + ERTS_MMAP_ASSERT(ptr <= end); + + add_free_desc_area(start, ptr); + + if (ptr != end) { + desc = alloc_desc(); + ERTS_MMAP_ASSERT(desc); + insert_free_seg(map, desc, ptr, end); + } + + return ERTS_PAGEALIGNED_SIZE; +} + +void * +erts_mmap(Uint32 flags, UWord *sizep) +{ + char *seg; + UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep); + + /* Map in premapped supercarrier */ + if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) { + char *end; + ErtsFreeSegDesc *desc; + Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + erts_smp_mtx_lock(&mmap_state.mtx); + + ERTS_MMAP_OP_START(*sizep); + + if (!superaligned) { + desc = lookup_free_seg(&mmap_state.sua.map, asize); + if (desc) { + seg = desc->start; + end = seg+asize; + if (!mmap_state.reserve_physical(seg, asize)) + goto supercarrier_reserve_failure; + if (desc->end == end) { + delete_free_seg(&mmap_state.sua.map, desc); + free_desc(desc); + } + else { + ERTS_MMAP_ASSERT(end < desc->end); + resize_free_seg(&mmap_state.sua.map, desc, end, desc->end); + } + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + goto supercarrier_success; + } + + if (asize <= mmap_state.sua.bot - mmap_state.sa.top) { + if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize, + asize)) + goto supercarrier_reserve_failure; + mmap_state.sua.bot -= asize; + seg = mmap_state.sua.bot; + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + goto supercarrier_success; + } + } + + asize = ERTS_SUPERALIGNED_CEILING(asize); + + desc = lookup_free_seg(&mmap_state.sa.map, asize); + if (desc) { + char *start = seg = desc->start; + seg = (char *) ERTS_SUPERALIGNED_CEILING(seg); + end = seg+asize; + if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + goto supercarrier_reserve_failure; + ERTS_MMAP_SIZE_SC_SA_INC(asize); + if (desc->end == end) { + if (start != seg) + resize_free_seg(&mmap_state.sa.map, desc, start, seg); + else { + delete_free_seg(&mmap_state.sa.map, desc); + free_desc(desc); + } + } + else { + ERTS_MMAP_ASSERT(end < desc->end); + resize_free_seg(&mmap_state.sa.map, desc, end, desc->end); + if (start != seg) { + UWord ad_sz; + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + start, seg); + start += ad_sz; + if (start != seg) + mmap_state.unreserve_physical(start, (UWord) (seg - start)); + } + } + goto supercarrier_success; + } + + if (superaligned) { + char *start = mmap_state.sa.top; + seg = (char *) ERTS_SUPERALIGNED_CEILING(start); + + if (asize + (seg - start) <= mmap_state.sua.bot - start) { + end = seg + asize; + if (!mmap_state.reserve_physical(start, (UWord) (end - start))) + goto supercarrier_reserve_failure; + mmap_state.sa.top = end; + ERTS_MMAP_SIZE_SC_SA_INC(asize); + if (start != seg) { + UWord ad_sz; + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + start, seg); + start += ad_sz; + if (start != seg) + mmap_state.unreserve_physical(start, (UWord) (seg - start)); + } + goto supercarrier_success; + } + + desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE); + if (desc) { + char *org_start = desc->start; + char *org_end = desc->end; + + seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start); + end = seg + asize; + if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg))) + goto supercarrier_reserve_failure; + ERTS_MMAP_SIZE_SC_SUA_INC(asize); + if (org_start != seg) { + ERTS_MMAP_ASSERT(org_start < seg); + resize_free_seg(&mmap_state.sua.map, desc, org_start, seg); + desc = NULL; + } + if (end != org_end) { + UWord ad_sz = 0; + ERTS_MMAP_ASSERT(end < org_end); + if (desc) + resize_free_seg(&mmap_state.sua.map, desc, end, org_end); + else + ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map, + end, org_end); + end += ad_sz; + if (end != org_end) + mmap_state.unreserve_physical(end, + (UWord) (org_end - end)); + } + goto supercarrier_success; + } + } + + ERTS_MMAP_OP_ABORT(); + erts_smp_mtx_unlock(&mmap_state.mtx); + } + +#if ERTS_HAVE_OS_MMAP + /* Map using OS primitives */ + if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) { + if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) { + seg = os_mmap(NULL, asize, 0); + if (!seg) + goto failure; + } + else { + asize = ERTS_SUPERALIGNED_CEILING(*sizep); + seg = os_mmap(NULL, asize, 1); + if (!seg) + goto failure; + + if (!ERTS_IS_SUPERALIGNED(seg)) { + char *ptr; + UWord sz; + + os_munmap(seg, asize); + + ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1); + if (!ptr) + goto failure; + + seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr); + sz = (UWord) (seg - ptr); + ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE); + if (sz) + os_munmap(ptr, sz); + sz = ERTS_SUPERALIGNED_SIZE - sz; + if (sz) + os_munmap(seg+asize, sz); + } + } + + ERTS_MMAP_OP_LCK(seg, *sizep, asize); + ERTS_MMAP_SIZE_OS_INC(asize); + *sizep = asize; + return (void *) seg; + } +failure: +#endif + ERTS_MMAP_OP_LCK(NULL, *sizep, 0); + *sizep = 0; + return NULL; + +supercarrier_success: + +#ifdef ERTS_MMAP_DEBUG + if (ERTS_MMAPFLG_SUPERALIGNED & flags) { + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg)); + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize)); + } + else { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + } +#endif + + ERTS_MMAP_OP_END(seg, asize); + erts_smp_mtx_unlock(&mmap_state.mtx); + + *sizep = asize; + return (void *) seg; + +supercarrier_reserve_failure: + erts_smp_mtx_unlock(&mmap_state.mtx); + *sizep = 0; + return NULL; +} + +void +erts_munmap(Uint32 flags, void *ptr, UWord size) +{ + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size)); + + if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { + ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); +#if ERTS_HAVE_OS_MMAP + ERTS_MUNMAP_OP_LCK(ptr, size); + ERTS_MMAP_SIZE_OS_DEC(size); + os_munmap(ptr, size); +#endif + } + else { + char *start, *end; + ErtsFreeSegMap *map; + ErtsFreeSegDesc *prev, *next, *desc; + UWord ad_sz = 0; + + ERTS_MMAP_ASSERT(mmap_state.supercarrier); + + start = (char *) ptr; + end = start + size; + + erts_smp_mtx_lock(&mmap_state.mtx); + + ERTS_MUNMAP_OP(ptr, size); + + if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + + map = &mmap_state.sa.map; + adjacent_free_seg(map, start, end, &prev, &next); + + ERTS_MMAP_SIZE_SC_SA_DEC(size); + if (end == mmap_state.sa.top) { + ERTS_MMAP_ASSERT(!next); + if (prev) { + start = prev->start; + delete_free_seg(map, prev); + free_desc(prev); + } + mmap_state.sa.top = start; + goto supercarrier_success; + } + } + else { + map = &mmap_state.sua.map; + adjacent_free_seg(map, start, end, &prev, &next); + + ERTS_MMAP_SIZE_SC_SUA_DEC(size); + if (start == mmap_state.sua.bot) { + ERTS_MMAP_ASSERT(!prev); + if (next) { + end = next->end; + delete_free_seg(map, next); + free_desc(next); + } + mmap_state.sua.bot = end; + goto supercarrier_success; + } + } + + desc = NULL; + + if (next) { + ERTS_MMAP_ASSERT(end < next->end); + end = next->end; + if (prev) { + delete_free_seg(map, next); + free_desc(next); + goto save_prev; + } + desc = next; + } else if (prev) { + save_prev: + ERTS_MMAP_ASSERT(prev->start < start); + start = prev->start; + desc = prev; + } + + if (desc) + resize_free_seg(map, desc, start, end); + else + ad_sz = alloc_desc_insert_free_seg(map, start, end); + + supercarrier_success: { + UWord unres_sz; + + ERTS_MMAP_ASSERT(size >= ad_sz); + unres_sz = size - ad_sz; + if (unres_sz) + mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz); + + erts_smp_mtx_unlock(&mmap_state.mtx); + } + } +} + +static void * +remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +{ + UWord size = *sizep; + void *new_ptr = erts_mmap(flags, &size); + if (!new_ptr) + return NULL; + *sizep = size; + if (old_size < size) + size = old_size; + sys_memcpy(new_ptr, ptr, (size_t) size); + erts_munmap(flags, ptr, old_size); + return new_ptr; +} + +void * +erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep) +{ + void *new_ptr; + Uint32 superaligned; + UWord asize; + + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep)); + + if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) { + + ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap); + + if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) { + new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, + old_size, sizep); + if (new_ptr) + return new_ptr; + } + + if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) { + ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size); + return NULL; + } + +#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP + superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + if (superaligned) { + asize = ERTS_SUPERALIGNED_CEILING(*sizep); + if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) { + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } + } + else { + asize = ERTS_PAGEALIGNED_CEILING(*sizep); + if (asize == old_size) { + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } + } + +#if ERTS_HAVE_GENUINE_OS_MMAP + if (asize < old_size + && (!superaligned + || ERTS_IS_SUPERALIGNED(ptr))) { + UWord um_sz; + new_ptr = ((char *) ptr) + asize; + ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr); + um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr); + ERTS_MMAP_SIZE_OS_DEC(um_sz); + os_munmap(new_ptr, um_sz); + ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return ptr; + } +#endif +#if ERTS_HAVE_OS_MREMAP + if (superaligned) + return remap_move(flags, new_ptr, old_size, sizep); + else { + new_ptr = os_mremap(ptr, old_size, asize, 0); + if (!new_ptr) + return NULL; + if (asize > old_size) + ERTS_MMAP_SIZE_OS_INC(asize - old_size); + else + ERTS_MMAP_SIZE_OS_DEC(old_size - asize); + ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize); + *sizep = asize; + return new_ptr; + } +#endif +#endif + } + else { /* In super carrier */ + char *start, *end, *new_end; + ErtsFreeSegMap *map; + ErtsFreeSegDesc *prev, *next; + UWord ad_sz = 0; + + ERTS_MMAP_ASSERT(mmap_state.supercarrier); + + if (ERTS_MMAPFLG_OS_ONLY & flags) + return remap_move(flags, ptr, old_size, sizep); + + superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags); + + asize = (superaligned + ? ERTS_SUPERALIGNED_CEILING(*sizep) + : ERTS_PAGEALIGNED_CEILING(*sizep)); + + erts_smp_mtx_lock(&mmap_state.mtx); + + if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr) + ? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize)) + : (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) { + erts_smp_mtx_unlock(&mmap_state.mtx); + /* + * Segment currently in wrong area (due to a previous memory + * shortage), move it to the right area. + * (remap_move() will succeed) + */ + return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr, + old_size, sizep); + } + + ERTS_MREMAP_OP_START(ptr, old_size, *sizep); + + if (asize == old_size) { + new_ptr = ptr; + goto supercarrier_resize_success; + } + + start = (char *) ptr; + end = start + old_size; + new_end = start+asize; + + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + + if (asize < old_size) { + UWord unres_sz; + new_ptr = ptr; + if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + map = &mmap_state.sua.map; + ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize); + } + else { + if (end == mmap_state.sa.top) { + mmap_state.sa.top = new_end; + mmap_state.unreserve_physical(((char *) ptr) + asize, + old_size - asize); + goto supercarrier_resize_success; + } + ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize); + map = &mmap_state.sa.map; + } + + adjacent_free_seg(map, start, end, &prev, &next); + + if (next) + resize_free_seg(map, next, new_end, next->end); + else + ad_sz = alloc_desc_insert_free_seg(map, new_end, end); + ERTS_MMAP_ASSERT(old_size - asize >= ad_sz); + unres_sz = old_size - asize - ad_sz; + if (unres_sz) + mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz, + unres_sz); + goto supercarrier_resize_success; + } + + if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + + adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next); + + if (next && new_end <= next->end) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + if (new_end < next->end) + resize_free_seg(&mmap_state.sua.map, next, new_end, next->end); + else { + delete_free_seg(&mmap_state.sua.map, next); + free_desc(next); + } + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + else { /* Superaligned area */ + + if (end == mmap_state.sa.top) { + if (new_end <= mmap_state.sua.bot) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + mmap_state.sa.top = new_end; + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + else { + adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next); + if (next && new_end <= next->end) { + if (!mmap_state.reserve_physical(((char *) ptr) + old_size, + asize - old_size)) + goto supercarrier_reserve_failure; + if (new_end < next->end) + resize_free_seg(&mmap_state.sa.map, next, new_end, next->end); + else { + delete_free_seg(&mmap_state.sa.map, next); + free_desc(next); + } + new_ptr = ptr; + ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size); + goto supercarrier_resize_success; + } + } + } + + ERTS_MMAP_OP_ABORT(); + erts_smp_mtx_unlock(&mmap_state.mtx); + + /* Failed to resize... */ + } + + return remap_move(flags, ptr, old_size, sizep); + +supercarrier_resize_success: + +#ifdef ERTS_MMAP_DEBUG + if ((ERTS_MMAPFLG_SUPERALIGNED & flags) + || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) { + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize)); + } + else { + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr)); + ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize)); + } +#endif + + ERTS_MREMAP_OP_END(new_ptr, asize); + erts_smp_mtx_unlock(&mmap_state.mtx); + + *sizep = asize; + return new_ptr; + +supercarrier_reserve_failure: + ERTS_MREMAP_OP_END(NULL, old_size); + erts_smp_mtx_unlock(&mmap_state.mtx); + *sizep = old_size; + return NULL; + +} + +int erts_mmap_in_supercarrier(void *ptr) +{ + return ERTS_MMAP_IN_SUPERCARRIER(ptr); +} + + +static struct { + Eterm total; + Eterm total_sa; + Eterm total_sua; + Eterm used; + Eterm used_sa; + Eterm used_sua; + Eterm max; + Eterm allocated; + Eterm reserved; + Eterm sizes; + Eterm free_segs; + Eterm supercarrier; + Eterm os; + Eterm scs; + Eterm sco; + Eterm scrpm; + Eterm scmgc; + + int is_initialized; + erts_mtx_t init_mutex; +}am; + +static void ERTS_INLINE atom_init(Eterm *atom, char *name) +{ + *atom = am_atom_put(name, strlen(name)); +} +#define AM_INIT(AM) atom_init(&am.AM, #AM) + +static void init_atoms(void) +{ + erts_mtx_lock(&am.init_mutex); + + if (!am.is_initialized) { + AM_INIT(total); + AM_INIT(total_sa); + AM_INIT(total_sua); + AM_INIT(used); + AM_INIT(used_sa); + AM_INIT(used_sua); + AM_INIT(max); + AM_INIT(allocated); + AM_INIT(reserved); + AM_INIT(sizes); + AM_INIT(free_segs); + AM_INIT(supercarrier); + AM_INIT(os); + AM_INIT(scs); + AM_INIT(sco); + AM_INIT(scrpm); + AM_INIT(scmgc); + am.is_initialized = 1; + } + erts_mtx_unlock(&am.init_mutex); +}; + + +#ifdef HARD_DEBUG_MSEG +static void hard_dbg_mseg_init(void); +#endif + +void +erts_mmap_init(ErtsMMapInit *init) +{ + int virtual_map = 0; + char *start = NULL, *end = NULL; + UWord pagesize; +#if defined(__WIN32__) + SYSTEM_INFO sysinfo; + GetSystemInfo(&sysinfo); + pagesize = (UWord) sysinfo.dwPageSize; +#elif defined(_SC_PAGESIZE) + pagesize = (UWord) sysconf(_SC_PAGESIZE); +#elif defined(HAVE_GETPAGESIZE) + pagesize = (UWord) getpagesize(); +#else +# error "Do not know how to get page size" +#endif +#if defined(HARD_DEBUG) || 0 + erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs); + erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco); + erts_fprintf(stderr, "erts_mmap: scmgc = %i\n", init->scmgc); +#endif + erts_page_inv_mask = pagesize - 1; + if (pagesize & erts_page_inv_mask) + erl_exit(-1, "erts_mmap: Invalid pagesize: %bpu\n", + pagesize); + + ERTS_MMAP_OP_RINGBUF_INIT(); + + erts_have_erts_mmap = 0; + + mmap_state.supercarrier = 0; + mmap_state.reserve_physical = reserve_noop; + mmap_state.unreserve_physical = unreserve_noop; + +#if HAVE_MMAP && !defined(MAP_ANON) + mmap_state.mmap_fd = open("/dev/zero", O_RDWR); + if (mmap_state.mmap_fd < 0) + erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n"); +#endif + + erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap"); + erts_mtx_init(&am.init_mutex, "mmap_init_atoms"); + +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (init->virtual_range.start) { + char *ptr; + UWord sz; + ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start); + end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end); + sz = end - ptr; + start = os_mmap_virtual(ptr, sz); + if (!start || start > ptr || start >= end) + erl_exit(-1, + "erts_mmap: Failed to create virtual range for super carrier\n"); + sz = start - ptr; + if (sz) + os_munmap(end, sz); + mmap_state.reserve_physical = os_reserve_physical; + mmap_state.unreserve_physical = os_unreserve_physical; + virtual_map = 1; + } + else +#endif + if (init->predefined_area.start) { + start = init->predefined_area.start; + end = init->predefined_area.end; + if (end != (void *) 0 && end < start) + end = start; + } +#if ERTS_HAVE_OS_MMAP + else if (init->scs) { + UWord sz; + sz = ERTS_PAGEALIGNED_CEILING(init->scs); +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (!init->scrpm) { + start = os_mmap_virtual(NULL, sz); + mmap_state.reserve_physical = os_reserve_physical; + mmap_state.unreserve_physical = os_unreserve_physical; + virtual_map = 1; + } + else +#endif + { + /* + * The whole supercarrier will by physically + * reserved all the time. + */ + start = os_mmap(NULL, sz, 1); + } + if (!start) + erl_exit(-1, + "erts_mmap: Failed to create super carrier of size %bpu MB\n", + init->scs/1024/1024); + end = start + sz; +#ifdef ERTS_MMAP_DEBUG_FILL_AREAS + if (!virtual_map) { + Uint32 *uip; + + for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++) + *uip = (Uint32) 0xdeadbeef; + } +#endif + } + if (!mmap_state.no_os_mmap) + erts_have_erts_mmap |= ERTS_HAVE_ERTS_OS_MMAP; +#endif + + mmap_state.no.free_seg_descs = 0; + mmap_state.no.free_segs.curr = 0; + mmap_state.no.free_segs.max = 0; + + mmap_state.size.supercarrier.total = 0; + mmap_state.size.supercarrier.used.total = 0; + mmap_state.size.supercarrier.used.sa = 0; + mmap_state.size.supercarrier.used.sua = 0; + mmap_state.size.os.used = 0; + + mmap_state.desc.new_area_hint = NULL; + + if (!start) { + mmap_state.sa.bot = NULL; + mmap_state.sua.top = NULL; + mmap_state.sa.bot = NULL; + mmap_state.sua.top = NULL; + mmap_state.no_os_mmap = 0; + mmap_state.supercarrier = 0; + } + else { + size_t desc_size; + + mmap_state.no_os_mmap = init->sco; + + desc_size = init->scmgc; + if (desc_size < 100) + desc_size = 100; + desc_size *= sizeof(ErtsFreeSegDesc); + if ((desc_size + + ERTS_SUPERALIGNED_SIZE + + ERTS_PAGEALIGNED_SIZE) > end - start) + erl_exit(-1, "erts_mmap: No space for segments in super carrier\n"); + + mmap_state.sa.bot = start; + mmap_state.sa.bot += desc_size; + mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot); + mmap_state.sa.top = mmap_state.sa.bot; + mmap_state.sua.top = end; + mmap_state.sua.bot = mmap_state.sua.top; + + mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start); + + mmap_state.desc.free_list = NULL; + mmap_state.desc.reserved = 0; + + if (end == (void *) 0) { + /* + * Very unlikely, but we need a guarantee + * that `mmap_state.sua.top` always will + * compare as larger than all segment pointers + * into the super carrier... + */ + mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE; + mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE; +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE)) +#endif + add_free_desc_area(mmap_state.sua.top, end); + mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc); + } + + mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start); + + /* + * Area before (and after) super carrier + * will be used for free segment descritors. + */ +#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION + if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start)) + erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n"); +#endif + mmap_state.desc.unused_start = start; + mmap_state.desc.unused_end = mmap_state.sa.bot; + mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start) + / sizeof(ErtsFreeSegDesc)); + + init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER); + init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER); + + mmap_state.supercarrier = 1; + erts_have_erts_mmap |= ERTS_HAVE_ERTS_SUPERCARRIER_MMAP; + + mmap_state.desc.new_area_hint = end; + + } + +#if !ERTS_HAVE_OS_MMAP + mmap_state.no_os_mmap = 1; +#endif + +#ifdef HARD_DEBUG_MSEG + hard_dbg_mseg_init(); +#endif +} + + +static ERTS_INLINE void +add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2) +{ + *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp); +} + +Eterm erts_mmap_info(int *print_to_p, + void *print_to_arg, + Eterm** hpp, Uint* szp, + struct erts_mmap_info_struct* emis) +{ + Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua }; + Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua }; + Eterm group[2]; + Eterm group_tags[] = { am.sizes, am.free_segs }; + Eterm list[2]; + Eterm list_tags[2]; /* { am.supercarrier, am.os } */ + int lix; + Eterm res = THE_NON_VALUE; + + if (!hpp) { + erts_smp_mtx_lock(&mmap_state.mtx); + emis->sizes[0] = mmap_state.size.supercarrier.total; + emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot; + emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot; + emis->sizes[3] = mmap_state.size.supercarrier.used.total; + emis->sizes[4] = mmap_state.size.supercarrier.used.sa; + emis->sizes[5] = mmap_state.size.supercarrier.used.sua; + + emis->segs[0] = mmap_state.no.free_segs.curr; + emis->segs[1] = mmap_state.no.free_segs.max; + emis->segs[2] = mmap_state.no.free_seg_descs; + emis->segs[3] = mmap_state.desc.reserved; + emis->segs[4] = mmap_state.sa.map.nseg; + emis->segs[5] = mmap_state.sua.map.nseg; + + emis->os_used = mmap_state.size.os.used; + erts_smp_mtx_unlock(&mmap_state.mtx); + } + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + if (mmap_state.supercarrier) { + const char* prefix = "supercarrier "; + erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]); + erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]); + erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]); + erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]); + erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]); + erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]); + erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]); + erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]); + erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]); + erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]); + erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]); + erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]); + } + if (!mmap_state.no_os_mmap) { + erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used); + } + } + + + if (hpp || szp) { + if (!am.is_initialized) { + init_atoms(); + } + + lix = 0; + if (mmap_state.supercarrier) { + group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, + sizeof(size_tags)/sizeof(Eterm), + size_tags, emis->sizes); + group[1] = erts_bld_atom_uword_2tup_list(hpp, szp, + sizeof(seg_tags)/sizeof(Eterm), + seg_tags, emis->segs); + list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group); + list_tags[lix] = am.supercarrier; + lix++; + } + + if (!mmap_state.no_os_mmap) { + group[0] = erts_bld_atom_uword_2tup_list(hpp, szp, + 1, &am.used, &emis->os_used); + list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group); + list_tags[lix] = am.os; + lix++; + } + res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list); + } + return res; +} + +Eterm erts_mmap_info_options(char *prefix, + int *print_to_p, + void *print_to_arg, + Uint **hpp, + Uint *szp) +{ + const UWord scs = mmap_state.sua.top - mmap_state.sa.bot; + const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false; + const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false; + Eterm res = THE_NON_VALUE; + + if (print_to_p) { + int to = *print_to_p; + void *arg = print_to_arg; + erts_print(to, arg, "%sscs: %bpu\n", prefix, scs); + if (mmap_state.supercarrier) { + erts_print(to, arg, "%ssco: %T\n", prefix, sco); + erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm); + erts_print(to, arg, "%sscmgc: %beu\n", prefix, mmap_state.desc.reserved); + } + } + + if (hpp || szp) { + if (!am.is_initialized) { + init_atoms(); + } + + res = NIL; + if (mmap_state.supercarrier) { + add_2tup(hpp, szp, &res, am.scmgc, + erts_bld_uint(hpp,szp, mmap_state.desc.reserved)); + add_2tup(hpp, szp, &res, am.scrpm, scrpm); + add_2tup(hpp, szp, &res, am.sco, sco); + } + add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs)); + } + return res; +} + + +Eterm erts_mmap_debug_info(Process* p) +{ + if (mmap_state.supercarrier) { + ERTS_DECL_AM(sabot); + ERTS_DECL_AM(satop); + ERTS_DECL_AM(suabot); + ERTS_DECL_AM(suatop); + Eterm sa_list, sua_list, list; + Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop }; + UWord values[4]; + Eterm *hp, *hp_end; + Uint may_need; + const Uint PTR_BIG_SZ = HALFWORD_HEAP ? 3 : 2; + + erts_smp_mtx_lock(&mmap_state.mtx); + values[0] = (UWord)mmap_state.sa.bot; + values[1] = (UWord)mmap_state.sa.top; + values[2] = (UWord)mmap_state.sua.bot; + values[3] = (UWord)mmap_state.sua.top; + sa_list = build_free_seg_list(p, &mmap_state.sa.map); + sua_list = build_free_seg_list(p, &mmap_state.sua.map); + erts_smp_mtx_unlock(&mmap_state.mtx); + + may_need = 4*(2+3+PTR_BIG_SZ) + 2*(2+3); + hp = HAlloc(p, may_need); + hp_end = hp + may_need; + + list = erts_bld_atom_uword_2tup_list(&hp, NULL, + sizeof(values)/sizeof(*values), + tags, values); + + sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3; + sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3; + list = CONS(hp, sua_list, list); hp+=2; + list = CONS(hp, sa_list, list); hp+=2; + + ASSERT(hp <= hp_end); + HRelease(p, hp_end, hp); + return list; + } + else { + return am_undefined; + } +} + + +/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\ + * Debug functions * +\* */ + + +#ifdef HARD_DEBUG + +static int rbt_assert_is_member(RBTNode* root, RBTNode* node) +{ + while (node != root) { + RBT_ASSERT(parent(node)); + RBT_ASSERT(parent(node)->left == node || parent(node)->right == node); + node = parent(node); + } + return 1; +} + + +#if 0 +# define PRINT_TREE +#else +# undef PRINT_TREE +#endif + +#ifdef PRINT_TREE +static void print_tree(enum SortOrder order, RBTNode*); +#endif + +/* + * Checks that the order between parent and children are correct, + * and that the Red-Black Tree properies are satisfied. if size > 0, + * check_tree() returns the node that satisfies "address order first fit" + * + * The Red-Black Tree properies are: + * 1. Every node is either red or black. + * 2. Every leaf (NIL) is black. + * 3. If a node is red, then both its children are black. + * 4. Every simple path from a node to a descendant leaf + * contains the same number of black nodes. + * + */ + +struct check_arg_t { + RBTree* tree; + ErtsFreeSegDesc* prev_seg; + Uint size; + RBTNode *res; +}; +static void check_node_callback(RBTNode* x, void* arg); + + +static RBTNode * +check_tree(RBTree* tree, Uint size) +{ + struct check_arg_t carg; + carg.tree = tree; + carg.prev_seg = NULL; + carg.size = size; + carg.res = NULL; + +#ifdef PRINT_TREE + print_tree(tree->order, tree->root); +#endif + + if (!tree->root) + return NULL; + + RBT_ASSERT(IS_BLACK(tree->root)); + RBT_ASSERT(!parent(tree->root)); + + rbt_foreach_node(tree, check_node_callback, &carg, 0); + + return carg.res; +} + +static void check_node_callback(RBTNode* x, void* arg) +{ + struct check_arg_t* a = (struct check_arg_t*) arg; + ErtsFreeSegDesc* seg; + + if (IS_RED(x)) { + RBT_ASSERT(IS_BLACK(x->right)); + RBT_ASSERT(IS_BLACK(x->left)); + } + + RBT_ASSERT(parent(x) || x == a->tree->root); + + if (x->left) { + RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0); + } + if (x->right) { + RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0); + } + + seg = node_to_desc(a->tree->order, x); + RBT_ASSERT(seg->start < seg->end); + if (a->size && (seg->end - seg->start) >= a->size) { + if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) { + a->res = x; + } + } + if (a->tree->order == ADDR_ORDER) { + RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start); + a->prev_seg = seg; + } +} + +#endif /* HARD_DEBUG */ + + +#ifdef PRINT_TREE +#define INDENT_STEP 2 + +#include <stdio.h> + +static void +print_tree_aux(enum SortOrder order, RBTNode *x, int indent) +{ + int i; + + if (x) { + ErtsFreeSegDesc* desc = node_to_desc(order, x); + print_tree_aux(order, x->right, indent + INDENT_STEP); + for (i = 0; i < indent; i++) { + putc(' ', stderr); + } + fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n", + IS_BLACK(x) ? "BLACK" : "RED", + desc->end - desc->start, desc->start, desc->end, desc); + print_tree_aux(order, x->left, indent + INDENT_STEP); + } +} + + +static void +print_tree(enum SortOrder order, RBTNode* root) +{ + fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]); + print_tree_aux(order, root, 0); + fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]); +} + +#endif /* PRINT_TREE */ + + +#ifdef FREE_SEG_API_SMOKE_TEST + +void test_it(void) +{ + ErtsFreeSegMap map; + ErtsFreeSegDesc *desc, *under, *over, *d1, *d2; + const int i = 1; /* reverse addr order */ + + { + init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER); + + insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + desc = lookup_free_seg(&map, 0x500); + ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L)); + + desc = lookup_free_seg(&map, 0x1500); + ERTS_ASSERT(desc->start == (char*)0x15000); + + adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over); + ERTS_ASSERT(!under && !over); + + adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over); + ERTS_ASSERT(!under && over->start == (char*)0x8000); + + adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over); + ERTS_ASSERT(under->end == (char*)0x10000 && !over); + + adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over); + ERTS_ASSERT(!under && !over); + + adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over); + ERTS_ASSERT(!under && over && over->start == (char*)0x11000); + + adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over); + ERTS_ASSERT(under && under->end == (char*)0x12000 && !over); + + adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over); + ERTS_ASSERT(under && under->end == (char*)0x12000 && + over && over->start == (char*)0x13000); + + adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over); + ERTS_ASSERT(!under && over && over->start == (char*)0x13000); + + d1 = lookup_free_seg(&map, 0x500); + ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L)); + + resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + d2 = lookup_free_seg(&map, 0x1200); + ERTS_ASSERT(d2 == d1); + + delete_free_seg(&map, d1); + HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0); + + d1 = lookup_free_seg(&map, 0x1200); + ERTS_ASSERT(d1->start == (char*)0x15000); + } +} + +#endif /* FREE_SEG_API_SMOKE_TEST */ + + +#ifdef HARD_DEBUG_MSEG + +/* + * Debug stuff used by erl_mseg to check that it does the right thing. + * The reason for keeping it here is that we (ab)use the rb-tree code + * for keeping track of *allocated* segments. + */ + +typedef struct ErtsFreeSegDesc_fake_ { + /*RBTNode snode; Save memory by skipping unused size tree node */ + RBTNode anode; /* node in 'atree' */ + union { + char* start; + struct ErtsFreeSegDesc_fake_* next_free; + }u; + char* end; +}ErtsFreeSegDesc_fake; + +static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000]; +static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first; +RBTree hard_dbg_mseg_tree; + +static erts_mtx_t hard_dbg_mseg_mtx; + +static void hard_dbg_mseg_init(void) +{ + ErtsFreeSegDesc_fake* p; + + erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg"); + hard_dbg_mseg_tree.root = NULL; + hard_dbg_mseg_tree.order = ADDR_ORDER; + + p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) / + sizeof(*hard_dbg_mseg_desc_pool)) - 1]; + p->u.next_free = NULL; + while (--p >= hard_dbg_mseg_desc_pool) { + p->u.next_free = (p+1); + } + hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0]; +} + +static ErtsFreeSegDesc* hard_dbg_alloc_desc(void) +{ + ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first; + ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors"); + hard_dbg_mseg_desc_first = p->u.next_free; + + /* Creative pointer arithmetic to return something that looks like + * a ErtsFreeSegDesc as long as we don't use the absent 'snode'. + */ + return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode)); +} + +static void hard_dbg_free_desc(ErtsFreeSegDesc* desc) +{ + ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode; + memset(p, 0xfe, sizeof(*p)); + p->u.next_free = hard_dbg_mseg_desc_first; + hard_dbg_mseg_desc_first = p; +} + +static void check_seg_writable(void* seg, UWord sz) +{ + UWord* seg_end = (UWord*)((char*)seg + sz); + volatile UWord* p; + ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg)); + ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz)); + for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) { + UWord write_back = *p; + *p = 0xfade2b1acc; + *p = write_back; + } +} + +void hard_dbg_insert_mseg(void* seg, UWord sz) +{ + check_seg_writable(seg, sz); + erts_mtx_lock(&hard_dbg_mseg_mtx); + { + ErtsFreeSegDesc *desc = hard_dbg_alloc_desc(); + RBTNode *prev, *next; + desc->start = (char*)seg; + desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */ + rbt_insert(&hard_dbg_mseg_tree, &desc->anode); + prev = rbt_prev_node(&desc->anode); + next = rbt_next_node(&desc->anode); + ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start); + ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end); + } + erts_mtx_unlock(&hard_dbg_mseg_mtx); +} + +static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start) +{ + RBTNode* x = tree->root; + + while (x) { + ErtsFreeSegDesc* desc = anode_to_desc(x); + if (start < desc->start) { + x = x->left; + } + else if (start > desc->start) { + ERTS_ASSERT(start > desc->end); + x = x->right; + } + else + return desc; + } + return NULL; +} + +void hard_dbg_remove_mseg(void* seg, UWord sz) +{ + check_seg_writable(seg, sz); + erts_mtx_lock(&hard_dbg_mseg_mtx); + { + ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg); + ERTS_ASSERT(desc); + ERTS_ASSERT(desc->start == (char*)seg); + ERTS_ASSERT(desc->end == (char*)seg + sz - 1); + + rbt_delete(&hard_dbg_mseg_tree, &desc->anode); + hard_dbg_free_desc(desc); + } + erts_mtx_unlock(&hard_dbg_mseg_mtx); +} + +#endif /* HARD_DEBUG_MSEG */ diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h new file mode 100644 index 0000000000..e6934dbb26 --- /dev/null +++ b/erts/emulator/sys/common/erl_mmap.h @@ -0,0 +1,134 @@ +/* + * %CopyrightBegin% + * + * Copyright Ericsson AB 2013. All Rights Reserved. + * + * The contents of this file are subject to the Erlang Public License, + * Version 1.1, (the "License"); you may not use this file except in + * compliance with the License. You should have received a copy of the + * Erlang Public License along with this software. If not, it can be + * retrieved online at http://www.erlang.org/. + * + * Software distributed under the License is distributed on an "AS IS" + * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See + * the License for the specific language governing rights and limitations + * under the License. + * + * %CopyrightEnd% + */ + +#ifndef ERL_MMAP_H__ +#define ERL_MMAP_H__ + +#include "sys.h" + +#define ERTS_MMAP_SUPERALIGNED_BITS (18) +/* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */ + +#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0) +#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1) +#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2) + +#define ERTS_HAVE_ERTS_OS_MMAP (1 << 0) +#define ERTS_HAVE_ERTS_SUPERCARRIER_MMAP (1 << 1) +extern int erts_have_erts_mmap; +extern UWord erts_page_inv_mask; + +typedef struct { + struct { + char *start; + char *end; + } virtual_range; + struct { + char *start; + char *end; + } predefined_area; + UWord scs; /* super carrier size */ + int sco; /* super carrier only? */ + Uint scmgc; /* super carrier: max guaranteed (number of) carriers */ + int scrpm; +}ErtsMMapInit; + +#define ERTS_MMAP_INIT_DEFAULT_INITER \ + {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1} + +void *erts_mmap(Uint32 flags, UWord *sizep); +void erts_munmap(Uint32 flags, void *ptr, UWord size); +void *erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep); +int erts_mmap_in_supercarrier(void *ptr); +void erts_mmap_init(ErtsMMapInit*); +struct erts_mmap_info_struct +{ + UWord sizes[6]; + UWord segs[6]; + UWord os_used; +}; +Eterm erts_mmap_info(int *print_to_p, void *print_to_arg, + Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*); +Eterm erts_mmap_info_options(char *prefix, int *print_to_p, void *print_to_arg, + Uint **hpp, Uint *szp); +struct process; +Eterm erts_mmap_debug_info(struct process*); + +#define ERTS_SUPERALIGNED_SIZE \ + (1 << ERTS_MMAP_SUPERALIGNED_BITS) +#define ERTS_INV_SUPERALIGNED_MASK \ + ((UWord) (ERTS_SUPERALIGNED_SIZE - 1)) +#define ERTS_SUPERALIGNED_MASK \ + (~ERTS_INV_SUPERALIGNED_MASK) +#define ERTS_SUPERALIGNED_FLOOR(X) \ + (((UWord) (X)) & ERTS_SUPERALIGNED_MASK) +#define ERTS_SUPERALIGNED_CEILING(X) \ + ERTS_SUPERALIGNED_FLOOR((X) + ERTS_INV_SUPERALIGNED_MASK) +#define ERTS_IS_SUPERALIGNED(X) \ + (((UWord) (X) & ERTS_INV_SUPERALIGNED_MASK) == 0) + +#define ERTS_INV_PAGEALIGNED_MASK \ + (erts_page_inv_mask) +#define ERTS_PAGEALIGNED_MASK \ + (~ERTS_INV_PAGEALIGNED_MASK) +#define ERTS_PAGEALIGNED_FLOOR(X) \ + (((UWord) (X)) & ERTS_PAGEALIGNED_MASK) +#define ERTS_PAGEALIGNED_CEILING(X) \ + ERTS_PAGEALIGNED_FLOOR((X) + ERTS_INV_PAGEALIGNED_MASK) +#define ERTS_IS_PAGEALIGNED(X) \ + (((UWord) (X) & ERTS_INV_PAGEALIGNED_MASK) == 0) +#define ERTS_PAGEALIGNED_SIZE \ + (ERTS_INV_PAGEALIGNED_MASK + 1) + +#ifndef HAVE_MMAP +# define HAVE_MMAP 0 +#endif +#ifndef HAVE_MREMAP +# define HAVE_MREMAP 0 +#endif +#if HAVE_MMAP +# define ERTS_HAVE_OS_MMAP 1 +# define ERTS_HAVE_GENUINE_OS_MMAP 1 +# if HAVE_MREMAP +# define ERTS_HAVE_OS_MREMAP 1 +# endif +# if defined(MAP_FIXED) && defined(MAP_NORESERVE) +# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1 +# endif +#endif + +#ifndef HAVE_VIRTUALALLOC +# define HAVE_VIRTUALALLOC 0 +#endif +#if HAVE_VIRTUALALLOC +# define ERTS_HAVE_OS_MMAP 1 +#endif + +/*#define HARD_DEBUG_MSEG*/ +#ifdef HARD_DEBUG_MSEG +# define HARD_DBG_INSERT_MSEG hard_dbg_insert_mseg +# define HARD_DBG_REMOVE_MSEG hard_dbg_remove_mseg +void hard_dbg_insert_mseg(void* seg, UWord sz); +void hard_dbg_remove_mseg(void* seg, UWord sz); +#else +# define HARD_DBG_INSERT_MSEG(SEG,SZ) +# define HARD_DBG_REMOVE_MSEG(SEG,SZ) +#endif + +#endif /* ERL_MMAP_H__ */ diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c index 2748edba02..94a381e168 100644 --- a/erts/emulator/sys/common/erl_mseg.c +++ b/erts/emulator/sys/common/erl_mseg.c @@ -100,45 +100,6 @@ static int atoms_initialized; typedef struct mem_kind_t MemKind; -#if HALFWORD_HEAP -static int initialize_pmmap(void); -static void *pmmap(size_t size); -static int pmunmap(void *p, size_t size); -static void *pmremap(void *old_address, size_t old_size, - size_t new_size); -#endif - -#if HAVE_MMAP -/* Mmap ... */ - -#define MMAP_PROT (PROT_READ|PROT_WRITE) - - -#ifdef MAP_ANON -# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE) -# define MMAP_FD (-1) -#else -# define MMAP_FLAGS (MAP_PRIVATE) -# define MMAP_FD mmap_fd -static int mmap_fd; -#endif - -#if HAVE_MREMAP -# define HAVE_MSEG_RECREATE 1 -#else -# define HAVE_MSEG_RECREATE 0 -#endif - -#if HALFWORD_HEAP -#define CAN_PARTLY_DESTROY 0 -#else -#define CAN_PARTLY_DESTROY 1 -#endif -#else /* #if HAVE_MMAP */ -#define CAN_PARTLY_DESTROY 0 -#error "Not supported" -#endif /* #if HAVE_MMAP */ - const ErtsMsegOpt_t erts_mseg_default_opt = { 1, /* Use cache */ 1, /* Preserv data */ @@ -163,9 +124,7 @@ typedef struct { CallCounter create; CallCounter create_resize; CallCounter destroy; -#if HAVE_MSEG_RECREATE CallCounter recreate; -#endif CallCounter clear_cache; CallCounter check_cache; } ErtsMsegCalls; @@ -173,7 +132,7 @@ typedef struct { typedef struct cache_t_ cache_t; struct cache_t_ { - Uint size; + UWord size; void *seg; cache_t *next; cache_t *prev; @@ -236,11 +195,6 @@ struct ErtsMsegAllctr_t_ { Uint rel_max_cache_bad_fit; ErtsMsegCalls calls; - -#if CAN_PARTLY_DESTROY - Uint min_seg_size; -#endif - }; typedef union { @@ -344,69 +298,31 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) { } } -/* remove ErtsMsegAllctr_t from arguments? - * only used for statistics - */ -static ERTS_INLINE void * -mmap_align(ErtsMsegAllctr_t *ma, void *addr, size_t length, int prot, int flags, int fd, off_t offset) { - - char *p, *q; - UWord d; - - p = mmap(addr, length, prot, flags, fd, offset); - - if (MAP_IS_ALIGNED(p) || p == MAP_FAILED) - return p; - - if (ma) - INC_CC(ma, create_resize); - - munmap(p, length); - - if ((p = mmap(addr, length + MSEG_ALIGNED_SIZE, prot, flags, fd, offset)) == MAP_FAILED) - return MAP_FAILED; - - q = (void *)ALIGNED_CEILING((char *)p); - d = (UWord)(q - p); - - if (d > 0) - munmap(p, d); - - if (MSEG_ALIGNED_SIZE - d > 0) - munmap((void *)(q + length), MSEG_ALIGNED_SIZE - d); - - return q; -} +/* #define ERTS_PRINT_ERTS_MMAP */ static ERTS_INLINE void * -mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size) +mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep) { +#ifdef ERTS_PRINT_ERTS_MMAP + UWord req_size = *sizep; +#endif void *seg; - ASSERT(size % MSEG_ALIGNED_SIZE == 0); - + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - seg = pmmap(size); - if ((unsigned long) seg & CHECK_POINTER_MASK) { - erts_fprintf(stderr,"Pointer mask failure (0x%08lx)\n",(unsigned long) seg); - return NULL; - } - } else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#if HAVE_MMAP - { - seg = (void *) mmap_align(ma, (void *) 0, (size_t) size, - MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0); - if (seg == (void *) MAP_FAILED) - seg = NULL; - - ASSERT(MAP_IS_ALIGNED(seg) || !seg); - } -#else -# error "Missing mseg_create() implementation" + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + + seg = erts_mmap(mmap_flags, sizep); + +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg, + (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + req_size, *sizep); #endif - } INC_CC(ma, create); @@ -414,91 +330,55 @@ mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size) } static ERTS_INLINE void -mseg_destroy(ErtsMsegAllctr_t *ma, MemKind* mk, void *seg, Uint size) { - ERTS_DECLARE_DUMMY(int res); - +mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord size) { + + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - res = pmunmap((void *) seg, size); - } - else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#ifdef HAVE_MMAP - res = munmap((void *) seg, size); -#else -# error "Missing mseg_destroy() implementation" + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + + erts_munmap(mmap_flags, seg_p, size); +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n", + (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + seg_p, *size); #endif - } - - ASSERT(size % MSEG_ALIGNED_SIZE == 0); - ASSERT(res == 0); - INC_CC(ma, destroy); } -#if HAVE_MSEG_RECREATE -#if defined(__NetBSD__) -#define MREMAP_FLAGS (0) -#else -#define MREMAP_FLAGS (MREMAP_MAYMOVE) -#endif - - -/* mseg_recreate - * May return *unaligned* segments as in address not aligned to MSEG_ALIGNMENT - * it is still page aligned - * - * This is fine for single block carriers as long as we don't cache misaligned - * segments (since multiblock carriers may use them) - * - * For multiblock carriers we *need* MSEG_ALIGNMENT but mbc's will never be - * reallocated. - * - * This should probably be fixed the following way: - * 1) Use an option to segment allocation - NEED_ALIGNMENT - * 2) Add mremap_align which takes care of aligning a new a mremaped area - * 3) Fix the cache to handle of aligned and unaligned segments - */ - static ERTS_INLINE void * -mseg_recreate(ErtsMsegAllctr_t *ma, MemKind* mk, void *old_seg, Uint old_size, Uint new_size) +mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWord old_size, UWord *sizep) { +#ifdef ERTS_PRINT_ERTS_MMAP + UWord req_size = *sizep; +#endif void *new_seg; - - ASSERT(old_size % MSEG_ALIGNED_SIZE == 0); - ASSERT(new_size % MSEG_ALIGNED_SIZE == 0); - + Uint32 mmap_flags = 0; #if HALFWORD_HEAP - if (mk == &ma->low_mem) { - new_seg = (void *) pmremap((void *) old_seg, - (size_t) old_size, - (size_t) new_size); - } - else + mmap_flags |= ((mk == &ma->low_mem) + ? ERTS_MMAPFLG_SUPERCARRIER_ONLY + : ERTS_MMAPFLG_OS_ONLY); #endif - { -#if HAVE_MREMAP -#if defined(__NetBSD__) - new_seg = mremap(old_seg, (size_t)old_size, NULL, new_size, MREMAP_FLAGS); -#else - new_seg = mremap(old_seg, (size_t)old_size, (size_t)new_size, MREMAP_FLAGS); -#endif - if (new_seg == (void *) MAP_FAILED) - new_seg = NULL; -#else -#error "Missing mseg_recreate() implementation" -#endif - } + if (MSEG_FLG_IS_2POW(flags)) + mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED; + new_seg = erts_mremap(mmap_flags, old_seg, old_size, sizep); + +#ifdef ERTS_PRINT_ERTS_MMAP + erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n", + new_seg, (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua", + old_seg, old_size, req_size, *sizep); +#endif INC_CC(ma, recreate); return new_seg; } -#endif /* #if HAVE_MSEG_RECREATE */ - #ifdef DEBUG #define ERTS_DBG_MA_CHK_THR_ACCESS(MA) \ do { \ @@ -528,7 +408,7 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) { c->prev = c; } -static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Uint flags) { +static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, Uint flags) { cache_t *c; ERTS_DBG_MK_CHK_THR_ACCESS(mk); @@ -566,14 +446,13 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui return 1; } else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) { - /* No free slots. * Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */ c = erts_circleq_tail(&(mk->cache_unpowered_node)); erts_circleq_remove(c); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, ERTS_MSEG_FLG_NONE, mk, c->seg, c->size); mseg_cache_clear_node(c); c->seg = seg; @@ -599,7 +478,8 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui c = erts_circleq_tail(&(mk->cache_powered_node[i])); erts_circleq_remove(c); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, c->seg, c->size); + mseg_cache_clear_node(c); c->seg = seg; @@ -614,18 +494,18 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui return 0; } -static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags) { +static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flags) { - Uint size = *size_p; + UWord size = *size_p; ERTS_DBG_MK_CHK_THR_ACCESS(mk); if (MSEG_FLG_IS_2POW(flags)) { int i, ix = SIZE_TO_CACHE_AREA_IDX(size); - void *seg; + char *seg; cache_t *c; - Uint csize; + UWord csize; ASSERT(IS_2POW(size)); @@ -641,7 +521,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags ASSERT(MAP_IS_ALIGNED(c->seg)); csize = c->size; - seg = c->seg; + seg = (char*) c->seg; mk->cache_size--; mk->cache_hits++; @@ -652,9 +532,8 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags ASSERT(!(mk->cache_size < 0)); - if (csize != size) { - mseg_destroy(mk->ma, mk, (char *)seg + size, csize - size); - } + if (csize != size) + mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, seg + size, csize - size); return seg; } @@ -663,10 +542,10 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags void *seg; cache_t *c; cache_t *best = NULL; - Uint bdiff = 0; - Uint csize; - Uint bad_max_abs = mk->ma->abs_max_cache_bad_fit; - Uint bad_max_rel = mk->ma->rel_max_cache_bad_fit; + UWord bdiff = 0; + UWord csize; + UWord bad_max_abs = mk->ma->abs_max_cache_bad_fit; + UWord bad_max_rel = mk->ma->rel_max_cache_bad_fit; erts_circleq_foreach(c, &(mk->cache_unpowered_node)) { csize = c->size; @@ -728,7 +607,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags * using callbacks from aux-work in the scheduler. */ -static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { cache_t *c = NULL; c = erts_circleq_tail(head); @@ -737,7 +616,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, flags, mk, c->seg, c->size); mseg_cache_clear_node(c); erts_circleq_push_head(&(mk->cache_free), c); @@ -749,7 +628,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h return mk->cache_size; } -static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) { +static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) { cache_t *c = NULL; while (!erts_circleq_is_empty(head)) { @@ -760,7 +639,7 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) if (erts_mtrace_enabled) erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg); - mseg_destroy(mk->ma, mk, c->seg, c->size); + mseg_destroy(mk->ma, flags, mk, c->seg, c->size); mseg_cache_clear_node(c); erts_circleq_push_head(&(mk->cache_free), c); @@ -788,11 +667,11 @@ static Uint mseg_check_memkind_cache(MemKind *mk) { for (i = 0; i < CACHE_AREAS; i++) { if (!erts_circleq_is_empty(&(mk->cache_powered_node[i]))) - return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_powered_node[i])); + return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); } if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_unpowered_node)); + return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); return 0; } @@ -851,12 +730,12 @@ static void mseg_clear_memkind_cache(MemKind *mk) { if (erts_circleq_is_empty(&(mk->cache_powered_node[i]))) continue; - mseg_drop_memkind_cache_size(mk, &(mk->cache_powered_node[i])); + mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i])); ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i]))); } /* drop varied caches */ if (!erts_circleq_is_empty(&(mk->cache_unpowered_node))) - mseg_drop_memkind_cache_size(mk, &(mk->cache_unpowered_node)); + mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node)); ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node))); ASSERT(mk->cache_size == 0); @@ -896,36 +775,35 @@ static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma, } static void * -mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, Uint *size_p, +mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt) { - Uint size; + UWord size; void *seg; MemKind* mk = memkind(ma, opt); INC_CC(ma, alloc); - /* Carrier align */ - size = ALIGNED_CEILING(*size_p); - - /* Cache optim (if applicable) */ - if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(size)) - size = ceil_2pow(size); + if (!MSEG_FLG_IS_2POW(flags)) + size = ERTS_PAGEALIGNED_CEILING(*size_p); + else { + size = ALIGNED_CEILING(*size_p); + if (!IS_2POW(size)) { + /* Cache optim (if applicable) */ + size = ceil_2pow(size); + } + } -#if CAN_PARTLY_DESTROY - if (size < ma->min_seg_size) - ma->min_seg_size = size; -#endif - if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL) goto done; - if ((seg = mseg_create(ma, mk, size)) == NULL) - size = 0; + seg = mseg_create(ma, flags, mk, &size); + if (!seg) + *size_p = 0; + else { done: - *size_p = size; - if (seg) { + *size_p = size; if (erts_mtrace_enabled) erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size); @@ -937,7 +815,7 @@ done: static void -mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size, +mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size, Uint flags, const ErtsMsegOpt_t *opt) { MemKind* mk = memkind(ma, opt); @@ -952,7 +830,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size, if (erts_mtrace_enabled) erts_mtrace_crr_free(atype, SEGTYPE, seg); - mseg_destroy(ma, mk, seg, size); + mseg_destroy(ma, flags, mk, seg, size); done: @@ -961,11 +839,11 @@ done: static void * mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) + UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) { MemKind* mk; void *new_seg; - Uint new_size; + UWord new_size; /* Just allocate a new segment if we didn't have one before */ if (!seg || !old_size) { @@ -985,90 +863,47 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, mk = memkind(ma, opt); new_seg = seg; - /* Carrier align */ - new_size = ALIGNED_CEILING(*new_size_p); - - /* Cache optim (if applicable) */ - if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(new_size)) - new_size = ceil_2pow(new_size); - - if (new_size == old_size) - ; - else if (new_size < old_size) { - Uint shrink_sz = old_size - new_size; + if (!MSEG_FLG_IS_2POW(flags)) + new_size = ERTS_PAGEALIGNED_CEILING(*new_size_p); + else { + new_size = ALIGNED_CEILING(*new_size_p); + if (!IS_2POW(new_size)) { + /* Cache optim (if applicable) */ + new_size = ceil_2pow(new_size); + } + } -#if CAN_PARTLY_DESTROY - if (new_size < ma->min_seg_size) - ma->min_seg_size = new_size; -#endif - /* +M<S>rsbcst <ratio> */ - if (shrink_sz < opt->abs_shrink_th - && 100*shrink_sz < opt->rel_shrink_th*old_size) { - new_size = old_size; + if (new_size > old_size) { + if (opt->preserv) { + new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); + if (!new_seg) + new_size = old_size; } else { - -#if CAN_PARTLY_DESTROY - - if (erts_mtrace_enabled) - erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); - - mseg_destroy(ma, mk, ((char *) seg) + new_size, shrink_sz); - -#elif HAVE_MSEG_RECREATE - goto do_recreate; -#else + mseg_dealloc(ma, atype, seg, old_size, flags, opt); new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - if (!new_seg) - new_size = old_size; - else { - sys_memcpy(((char *) new_seg), - ((char *) seg), - MIN(new_size, old_size)); - mseg_dealloc(ma, atype, seg, old_size, flags, opt); - } -#endif + new_size = 0; } } - else { + else if (new_size < old_size) { + UWord shrink_sz = old_size - new_size; - if (!opt->preserv) { - mseg_dealloc(ma, atype, seg, old_size, flags, opt); - new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); + /* +M<S>rsbcst <ratio> */ + if (shrink_sz < opt->abs_shrink_th + && 100*shrink_sz < opt->rel_shrink_th*old_size) { + new_size = old_size; } else { -#if HAVE_MSEG_RECREATE -#if !CAN_PARTLY_DESTROY - do_recreate: -#endif - new_seg = mseg_recreate(ma, mk, (void *) seg, old_size, new_size); - /* ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - * will not always be aligned and it ok for now - */ - - if (erts_mtrace_enabled) - erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); + new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size); if (!new_seg) new_size = old_size; -#else - new_seg = mseg_alloc(ma, atype, &new_size, flags, opt); - - ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg); - - if (!new_seg) - new_size = old_size; - else { - sys_memcpy(((char *) new_seg), ((char *) seg), MIN(new_size, old_size)); - mseg_dealloc(ma, atype, seg, old_size, flags, opt); - } -#endif } } + if (erts_mtrace_enabled) + erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size); + INC_CC(ma, realloc); ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size)); @@ -1106,9 +941,7 @@ static struct { Eterm mseg_create; Eterm mseg_create_resize; Eterm mseg_destroy; -#if HAVE_MSEG_RECREATE Eterm mseg_recreate; -#endif Eterm mseg_clear_cache; Eterm mseg_check_cache; @@ -1129,8 +962,6 @@ init_atoms(ErtsMsegAllctr_t *ma) #ifdef DEBUG Eterm *atom; #endif - - ERTS_MSEG_UNLOCK(ma); erts_mtx_lock(&init_atoms_mutex); if (!atoms_initialized) { @@ -1163,9 +994,7 @@ init_atoms(ErtsMsegAllctr_t *ma) AM_INIT(mseg_create); AM_INIT(mseg_create_resize); AM_INIT(mseg_destroy); -#if HAVE_MSEG_RECREATE AM_INIT(mseg_recreate); -#endif AM_INIT(mseg_clear_cache); AM_INIT(mseg_check_cache); @@ -1176,7 +1005,6 @@ init_atoms(ErtsMsegAllctr_t *ma) #endif } - ERTS_MSEG_LOCK(ma); atoms_initialized = 1; erts_mtx_unlock(&init_atoms_mutex); } @@ -1239,7 +1067,9 @@ info_options(ErtsMsegAllctr_t *ma, Uint **hpp, Uint *szp) { - Eterm res = THE_NON_VALUE; + Eterm res; + + res = erts_mmap_info_options(prefix, print_to_p, print_to_arg, hpp, szp); if (print_to_p) { int to = *print_to_p; @@ -1254,7 +1084,6 @@ info_options(ErtsMsegAllctr_t *ma, if (!atoms_initialized) init_atoms(ma); - res = NIL; add_2tup(hpp, szp, &res, am.mcs, bld_uint(hpp, szp, ma->max_cache_size)); @@ -1293,9 +1122,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp PRINT_CC(to, arg, create); PRINT_CC(to, arg, create_resize); PRINT_CC(to, arg, destroy); -#if HAVE_MSEG_RECREATE PRINT_CC(to, arg, recreate); -#endif PRINT_CC(to, arg, clear_cache); PRINT_CC(to, arg, check_cache); @@ -1316,12 +1143,10 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp bld_unstable_uint(hpp, szp, ma->calls.clear_cache.giga_no), bld_unstable_uint(hpp, szp, ma->calls.clear_cache.no)); -#if HAVE_MSEG_RECREATE add_3tup(hpp, szp, &res, am.mseg_recreate, bld_unstable_uint(hpp, szp, ma->calls.recreate.giga_no), bld_unstable_uint(hpp, szp, ma->calls.recreate.no)); -#endif add_3tup(hpp, szp, &res, am.mseg_destroy, bld_unstable_uint(hpp, szp, ma->calls.destroy.giga_no), @@ -1465,14 +1290,8 @@ erts_mseg_info_options(int ix, ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix); Eterm res; - ERTS_MSEG_LOCK(ma); - - ERTS_DBG_MA_CHK_THR_ACCESS(ma); - res = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp); - ERTS_MSEG_UNLOCK(ma); - return res; } @@ -1490,10 +1309,6 @@ erts_mseg_info(int ix, Eterm values[4]; Uint n = 0; - ERTS_MSEG_LOCK(ma); - - ERTS_DBG_MA_CHK_THR_ACCESS(ma); - if (hpp || szp) { if (!atoms_initialized) @@ -1506,6 +1321,10 @@ erts_mseg_info(int ix, } values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp); values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp); + + ERTS_MSEG_LOCK(ma); + ERTS_DBG_MA_CHK_THR_ACCESS(ma); + #if HALFWORD_HEAP values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp); @@ -1521,7 +1340,7 @@ erts_mseg_info(int ix, } void * -erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMsegOpt_t *opt) +erts_mseg_alloc_opt(ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); void *seg; @@ -1529,20 +1348,23 @@ erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMse ERTS_DBG_MA_CHK_THR_ACCESS(ma); seg = mseg_alloc(ma, atype, size_p, flags, opt); ERTS_MSEG_UNLOCK(ma); + HARD_DBG_INSERT_MSEG(seg, *size_p); return seg; } void * -erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p, Uint flags) +erts_mseg_alloc(ErtsAlcType_t atype, UWord *size_p, Uint flags) { return erts_mseg_alloc_opt(atype, size_p, flags, &erts_mseg_default_opt); } void erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, - Uint size, Uint flags, const ErtsMsegOpt_t *opt) + UWord size, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); + + HARD_DBG_REMOVE_MSEG(seg, size); ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); mseg_dealloc(ma, atype, seg, size, flags, opt); @@ -1550,29 +1372,32 @@ erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, } void -erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size, Uint flags) +erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, UWord size, Uint flags) { erts_mseg_dealloc_opt(atype, seg, size, flags, &erts_mseg_default_opt); } void * erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, + UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt) { ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt); void *new_seg; + + HARD_DBG_REMOVE_MSEG(seg, old_size); ERTS_MSEG_LOCK(ma); ERTS_DBG_MA_CHK_THR_ACCESS(ma); new_seg = mseg_realloc(ma, atype, seg, old_size, new_size_p, flags, opt); ERTS_MSEG_UNLOCK(ma); + HARD_DBG_INSERT_MSEG(new_seg, *new_size_p); return new_seg; } void * erts_mseg_realloc(ErtsAlcType_t atype, void *seg, - Uint old_size, Uint *new_size_p, Uint flags) + UWord old_size, UWord *new_size_p, Uint flags) { return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p, flags, &erts_mseg_default_opt); @@ -1662,16 +1487,17 @@ erts_mseg_init(ErtsMsegInit_t *init) erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms"); -#if HAVE_MMAP && !defined(MAP_ANON) - mmap_fd = open("/dev/zero", O_RDWR); - if (mmap_fd < 0) - erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n"); -#endif +#if HALFWORD_HEAP + if (sizeof(void *) != 8) + erl_exit(-1,"Halfword emulator cannot be run in 32bit mode"); -#if HAVE_MMAP && HALFWORD_HEAP - initialize_pmmap(); + init->mmap.virtual_range.start = (char *) sbrk(0); + init->mmap.virtual_range.end = (char *) 0x100000000UL; + init->mmap.sco = 0; #endif + erts_mmap_init(&init->mmap); + if (!IS_2POW(GET_PAGE_SIZE)) erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE); @@ -1712,10 +1538,6 @@ erts_mseg_init(ErtsMsegInit_t *init) #endif sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls)); - -#if CAN_PARTLY_DESTROY - ma->min_seg_size = ~((Uint) 0); -#endif } } @@ -1757,7 +1579,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3) case 0x400: /* Have erts_mseg */ return (UWord) 1; case 0x401: - return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1, (Uint) 0); + return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (UWord *) a1, (Uint) 0); case 0x402: erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, (Uint) 0); return (UWord) 0; @@ -1765,7 +1587,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3) return (UWord) erts_mseg_realloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, - (Uint *) a3, + (UWord *) a3, (Uint) 0); case 0x404: erts_mseg_clear_cache(); @@ -1788,405 +1610,3 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3) } } - - -#if HALFWORD_HEAP -/* - * Very simple page oriented mmap replacer. Works in the lower - * 32 bit address range of a 64bit program. - * Implements anonymous mmap mremap and munmap with address order first fit. - * The free list is expected to be very short... - * To be used for compressed pointers in Erlang halfword emulator - * implementation. The MacOS X version is more of a toy, it's not really - * for production as the halfword erlang VM relies on Linux specific memory - * mapping tricks. - */ - -/* #define HARDDEBUG 1 */ - -#ifdef HARDDEBUG -static void dump_freelist(void) -{ - FreeBlock *p = first; - - while (p) { - fprintf(stderr, "p = %p\r\np->num = %ld\r\np->next = %p\r\n\r\n", - (void *) p, (unsigned long) p->num, (void *) p->next); - p = p->next; - } -} - -#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) \ - fprintf(stderr,"Mapping of address %p with size %ld " \ - "does not map complete pages (%s:%d)\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) - -#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) \ - fprintf(stderr,"Mapping of address %p with size %ld " \ - "is not page aligned (%s:%d)\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) - -#define HARDDEBUG_MAP_FAILED(PTR, SZ) \ - fprintf(stderr, "Could not actually map memory " \ - "at address %p with size %ld (%s:%d) ..\r\n", \ - (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__) -#else -#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) do{}while(0) -#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) do{}while(0) -#define HARDDEBUG_MAP_FAILED(PTR, SZ) do{}while(0) -#endif - - -#ifdef __APPLE__ -#define MAP_ANONYMOUS MAP_ANON -#endif - -#define INIT_LOCK() do {erts_mtx_init(&pmmap_mutex, "pmmap");} while(0) - -#define TAKE_LOCK() do {erts_mtx_lock(&pmmap_mutex);} while(0) - -#define RELEASE_LOCK() do {erts_mtx_unlock(&pmmap_mutex);} while(0) - -static erts_mtx_t pmmap_mutex; /* Also needed when !USE_THREADS */ - -typedef struct _free_block { - unsigned long num; /*pages*/ - struct _free_block *next; -} FreeBlock; - -/* Protect with lock */ -static FreeBlock *first; - -static void *do_map(void *ptr, size_t sz) -{ - void *res; - - if (ALIGNED_CEILING(sz) != sz) { - HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz); - return NULL; - } - - if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) { - HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz); - return NULL; - } - -#if HAVE_MMAP - res = mmap(ptr, sz, - PROT_READ | PROT_WRITE, MAP_PRIVATE | - MAP_ANONYMOUS | MAP_FIXED, - -1 , 0); -#else -# error "Missing mmap support" -#endif - - if (res == MAP_FAILED) { - HARDDEBUG_MAP_FAILED(ptr, sz); - return NULL; - } - - return res; -} - -static int do_unmap(void *ptr, size_t sz) -{ - void *res; - - if (ALIGNED_CEILING(sz) != sz) { - HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz); - return 1; - } - - if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) { - HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz); - return 1; - } - - res = mmap(ptr, sz, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED, - -1 , 0); - - if (res == MAP_FAILED) { - HARDDEBUG_MAP_FAILED(ptr, sz); - return 1; - } - - return 0; -} - -#ifdef __APPLE__ -/* - * The first 4 gig's are protected on Macos X for 64bit processes :( - * The range 0x1000000000 - 0x10FFFFFFFF is selected as an arbitrary - * value of a normally unused range... Real MMAP's will avoid - * it and all 32bit compressed pointers can be in that range... - * More expensive than on Linux where expansion of compressed - * poiters involves no masking (as they are in the first 4 gig's). - * It's also very uncertain if the MAP_NORESERVE flag really has - * any effect in MacOS X. Swap space may always be allocated... - */ -#define SET_RANGE_MIN() /* nothing */ -#define RANGE_MIN 0x1000000000UL -#define RANGE_MAX 0x1100000000UL -#define RANGE_MASK (RANGE_MIN) -#define EXTRA_MAP_FLAGS (MAP_FIXED) -#else -static size_t range_min; -#define SET_RANGE_MIN() do { range_min = (size_t) sbrk(0); } while (0) -#define RANGE_MIN range_min -#define RANGE_MAX 0x100000000UL -#define RANGE_MASK 0UL -#define EXTRA_MAP_FLAGS (0) -#endif - -static int initialize_pmmap(void) -{ - char *p,*q,*rptr; - size_t rsz; - FreeBlock *initial; - - SET_RANGE_MIN(); - if (sizeof(void *) != 8) { - erl_exit(1,"Halfword emulator cannot be run in 32bit mode"); - } - - p = (char *) RANGE_MIN; - q = (char *) RANGE_MAX; - - rsz = ALIGNED_FLOOR(q - p); - - rptr = mmap_align(NULL, (void *) p, rsz, - PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | - MAP_NORESERVE | EXTRA_MAP_FLAGS, - -1 , 0); -#ifdef HARDDEBUG - printf("p=%p, rsz = %ld, pages = %ld, got range = %p -> %p\r\n", - p, (unsigned long) rsz, (unsigned long) (rsz / MSEG_ALIGNED_SIZE), - (void *) rptr, (void*)(rptr + rsz)); -#endif - if ((UWord)(rptr + rsz) > RANGE_MAX) { - size_t rsz_trunc = RANGE_MAX - (UWord)rptr; -#ifdef HARDDEBUG - printf("Reducing mmap'ed memory from %lu to %lu Mb, reduced range = %p -> %p\r\n", - rsz/(1024*1024), rsz_trunc/(1024*1024), rptr, rptr+rsz_trunc); -#endif - munmap((void*)RANGE_MAX, rsz - rsz_trunc); - rsz = rsz_trunc; - } - if (!do_map(rptr, MSEG_ALIGNED_SIZE)) { - erl_exit(1,"Could not actually mmap first page for halfword emulator...\n"); - } - initial = (FreeBlock *) rptr; - initial->num = (rsz / MSEG_ALIGNED_SIZE); - initial->next = NULL; - first = initial; - INIT_LOCK(); - return 0; -} - -static void *pmmap(size_t size) -{ - size_t real_size = ALIGNED_CEILING(size); - size_t num_pages = real_size / MSEG_ALIGNED_SIZE; - FreeBlock **block; - FreeBlock *tail; - FreeBlock *res; - - TAKE_LOCK(); - - for (block = &first; - *block != NULL && (*block)->num < num_pages; - block = &((*block)->next)) - ; - if (!(*block)) { - RELEASE_LOCK(); - return NULL; - } - if ((*block)->num == num_pages) { - /* nice, perfect fit */ - res = *block; - *block = (*block)->next; - } else { - tail = (FreeBlock *) (((char *) ((void *) (*block))) + real_size); - if (!do_map(tail, MSEG_ALIGNED_SIZE)) { - HARDDEBUG_MAP_FAILED(tail, MSEG_ALIGNED_SIZE); - RELEASE_LOCK(); - return NULL; - } - tail->num = (*block)->num - num_pages; - tail->next = (*block)->next; - res = *block; - *block = tail; - } - - RELEASE_LOCK(); - - if (!do_map(res, real_size)) { - HARDDEBUG_MAP_FAILED(res, real_size); - return NULL; - } - - return (void *) res; -} - -static int pmunmap(void *p, size_t size) -{ - size_t real_size = ALIGNED_CEILING(size); - size_t num_pages = real_size / MSEG_ALIGNED_SIZE; - - FreeBlock *block; - FreeBlock *last; - FreeBlock *nb = (FreeBlock *) p; - - ASSERT(((unsigned long)p & CHECK_POINTER_MASK)==0); - - if (real_size > MSEG_ALIGNED_SIZE) { - if (do_unmap(((char *) p) + MSEG_ALIGNED_SIZE, real_size - MSEG_ALIGNED_SIZE)) { - return 1; - } - } - - TAKE_LOCK(); - - last = NULL; - block = first; - while(block != NULL && ((void *) block) < p) { - last = block; - block = block->next; - } - - if (block != NULL && - ((void *) block) == ((void *) (((char *) p) + real_size))) { - /* Merge new free block with following */ - nb->num = block->num + num_pages; - nb->next = block->next; - if (do_unmap(block, MSEG_ALIGNED_SIZE)) { - RELEASE_LOCK(); - return 1; - } - } else { - /* just link in */ - nb->num = num_pages; - nb->next = block; - } - if (last != NULL) { - if (p == ((void *) (((char *) last) + (last->num * MSEG_ALIGNED_SIZE)))) { - /* Merge with previous */ - last->num += nb->num; - last->next = nb->next; - if (do_unmap(nb, MSEG_ALIGNED_SIZE)) { - RELEASE_LOCK(); - return 1; - } - } else { - last->next = nb; - } - } else { - first = nb; - } - RELEASE_LOCK(); - return 0; -} - -static void *pmremap(void *old_address, size_t old_size, - size_t new_size) -{ - size_t new_real_size = ALIGNED_CEILING(new_size); - size_t new_num_pages = new_real_size / MSEG_ALIGNED_SIZE; - size_t old_real_size = ALIGNED_CEILING(old_size); - size_t old_num_pages = old_real_size / MSEG_ALIGNED_SIZE; - if (new_num_pages == old_num_pages) { - return old_address; - } else if (new_num_pages < old_num_pages) { /* Shrink */ - size_t nfb_pages = old_num_pages - new_num_pages; - size_t nfb_real_size = old_real_size - new_real_size; - void *vnfb = (void *) (((char *)old_address) + new_real_size); - FreeBlock *nfb = (FreeBlock *) vnfb; - FreeBlock **block; - TAKE_LOCK(); - for (block = &first; - *block != NULL && (*block) < nfb; - block = &((*block)->next)) - ; - if (!(*block) || - (*block) > ((FreeBlock *)(((char *) vnfb) + nfb_real_size))) { - /* Normal link in */ - if (nfb_pages > 1) { - if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE), - (nfb_pages - 1)*MSEG_ALIGNED_SIZE)) { - return NULL; - } - } - nfb->next = (*block); - nfb->num = nfb_pages; - (*block) = nfb; - } else { /* block merge */ - nfb->next = (*block)->next; - nfb->num = nfb_pages + (*block)->num; - /* unmap also the first page of the next freeblock */ - (*block) = nfb; - if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE), - nfb_pages*MSEG_ALIGNED_SIZE)) { - return NULL; - } - } - RELEASE_LOCK(); - return old_address; - } else { /* Enlarge */ - FreeBlock **block; - void *old_end = (void *) (((char *)old_address) + old_real_size); - TAKE_LOCK(); - for (block = &first; - *block != NULL && (*block) < (FreeBlock *) old_address; - block = &((*block)->next)) - ; - if ((*block) == NULL || old_end > ((void *) RANGE_MAX) || - (*block) != old_end || - (*block)->num < (new_num_pages - old_num_pages)) { - /* cannot extend */ - void *result; - RELEASE_LOCK(); - result = pmmap(new_size); - if (result == NULL) { - return NULL; - } - memcpy(result,old_address,old_size); - if (pmunmap(old_address,old_size)) { - /* Oups... */ - pmunmap(result,new_size); - return NULL; - } - return result; - } else { /* extend */ - size_t remaining_pages = (*block)->num - - (new_num_pages - old_num_pages); - if (!remaining_pages) { - void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE); - void *n = (*block)->next; - size_t x = ((*block)->num - 1) * MSEG_ALIGNED_SIZE; - if (x > 0) { - if (do_map(p,x) == NULL) { - RELEASE_LOCK(); - return NULL; - } - } - (*block) = n; - } else { - FreeBlock *nfb = (FreeBlock *) ((void *) - (((char *) old_address) + - new_real_size)); - void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE); - if (do_map(p,new_real_size - old_real_size) == NULL) { - RELEASE_LOCK(); - return NULL; - } - nfb->num = remaining_pages; - nfb->next = (*block)->next; - (*block) = nfb; - } - RELEASE_LOCK(); - return old_address; - } - } -} -#endif /* HALFWORD_HEAP */ diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h index a1b000f51c..2284b3f8f1 100644 --- a/erts/emulator/sys/common/erl_mseg.h +++ b/erts/emulator/sys/common/erl_mseg.h @@ -22,15 +22,16 @@ #include "sys.h" #include "erl_alloc_types.h" +#include "erl_mmap.h" -#ifndef HAVE_MMAP -# define HAVE_MMAP 0 -#endif -#ifndef HAVE_MREMAP -# define HAVE_MREMAP 0 -#endif - -#if HAVE_MMAP +/* + * We currently only enable mseg_alloc if we got + * a genuine mmap()/munmap() primitive. It is possible + * to utilize erts_mmap() withiout a mmap support but + * alloc_util needs to be prepared before we can do + * that. + */ +#ifdef ERTS_HAVE_GENUINE_OS_MMAP # define HAVE_ERTS_MSEG 1 # define ERTS_HAVE_MSEG_SUPER_ALIGNED 1 #else @@ -39,8 +40,7 @@ #endif #if ERTS_HAVE_MSEG_SUPER_ALIGNED -# define MSEG_ALIGN_BITS (18) - /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */ +# define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS #else /* If we don't use super aligned multiblock carriers * we will mmap with page size alignment (and thus use corresponding @@ -68,6 +68,7 @@ typedef struct { Uint rmcbf; Uint mcs; Uint nos; + ErtsMMapInit mmap; } ErtsMsegInit_t; #define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \ @@ -75,7 +76,8 @@ typedef struct { 4*1024*1024, /* amcbf: Absolute max cache bad fit */ \ 20, /* rmcbf: Relative max cache bad fit */ \ 10, /* mcs: Max cache size */ \ - 1000 /* cci: Cache check interval */ \ + 1000, /* cci: Cache check interval */ \ + ERTS_MMAP_INIT_DEFAULT_INITER \ } typedef struct { @@ -91,12 +93,12 @@ typedef struct { extern const ErtsMsegOpt_t erts_mseg_default_opt; -void *erts_mseg_alloc(ErtsAlcType_t, Uint *, Uint); -void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, Uint, const ErtsMsegOpt_t *); -void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint, Uint); -void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, Uint, const ErtsMsegOpt_t *); -void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *, Uint); -void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *, Uint, const ErtsMsegOpt_t *); +void *erts_mseg_alloc(ErtsAlcType_t, UWord *, Uint); +void *erts_mseg_alloc_opt(ErtsAlcType_t, UWord *, Uint, const ErtsMsegOpt_t *); +void erts_mseg_dealloc(ErtsAlcType_t, void *, UWord, Uint); +void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, UWord, Uint, const ErtsMsegOpt_t *); +void *erts_mseg_realloc(ErtsAlcType_t, void *, UWord, UWord *, Uint); +void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, UWord, UWord *, Uint, const ErtsMsegOpt_t *); void erts_mseg_clear_cache(void); void erts_mseg_cache_check(void); Uint erts_mseg_no( const ErtsMsegOpt_t *); |