aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/sys/common
diff options
context:
space:
mode:
Diffstat (limited to 'erts/emulator/sys/common')
-rw-r--r--erts/emulator/sys/common/erl_mmap.c2832
-rw-r--r--erts/emulator/sys/common/erl_mmap.h134
-rw-r--r--erts/emulator/sys/common/erl_mseg.c876
-rw-r--r--erts/emulator/sys/common/erl_mseg.h36
4 files changed, 3133 insertions, 745 deletions
diff --git a/erts/emulator/sys/common/erl_mmap.c b/erts/emulator/sys/common/erl_mmap.c
new file mode 100644
index 0000000000..a9da7430fb
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mmap.c
@@ -0,0 +1,2832 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2002-2013. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_process.h"
+#include "erl_smp.h"
+#include "atom.h"
+#include "erl_mmap.h"
+#include <stddef.h>
+
+/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
+
+#if defined(DEBUG) || 0
+# undef ERTS_MMAP_DEBUG
+# define ERTS_MMAP_DEBUG
+# ifndef ERTS_MMAP_OP_RINGBUF_SZ
+# define ERTS_MMAP_OP_RINGBUF_SZ 100
+# endif
+#endif
+
+#ifndef ERTS_MMAP_OP_RINGBUF_SZ
+# define ERTS_MMAP_OP_RINGBUF_SZ 0
+#endif
+
+/* #define ERTS_MMAP_DEBUG_FILL_AREAS */
+
+#ifdef ERTS_MMAP_DEBUG
+# define ERTS_MMAP_ASSERT ERTS_ASSERT
+#else
+# define ERTS_MMAP_ASSERT(A) ((void) 1)
+#endif
+
+/*
+ * `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after
+ * initialization, but the other pointers are not; i.e., only
+ * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
+ */
+#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
+ (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
+ < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot))
+#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
+ (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
+ (((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
+ < ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot)))
+#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
+ (ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
+ (((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \
+ < ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot)))
+
+int erts_have_erts_mmap;
+UWord erts_page_inv_mask;
+
+#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
+# undef RBT_DEBUG
+# define RBT_DEBUG
+#endif
+#ifdef RBT_DEBUG
+# define RBT_ASSERT ERTS_ASSERT
+# define IF_RBT_DEBUG(C) C
+#else
+# define RBT_ASSERT(x)
+# define IF_RBT_DEBUG(C)
+#endif
+
+typedef struct RBTNode_ RBTNode;
+struct RBTNode_ {
+ UWord parent_and_color; /* color in bit 0 of parent ptr */
+ RBTNode *left;
+ RBTNode *right;
+};
+
+#define RED_FLG (1)
+#define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG))
+#define IS_BLACK(N) (!IS_RED(N))
+#define SET_RED(N) ((N)->parent_and_color |= RED_FLG)
+#define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG)
+
+static ERTS_INLINE RBTNode* parent(RBTNode* node)
+{
+ return (RBTNode*) (node->parent_and_color & ~RED_FLG);
+}
+
+static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent)
+{
+ RBT_ASSERT(!((UWord)parent & RED_FLG));
+ node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG);
+}
+
+static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color)
+{
+ RBT_ASSERT(!((UWord)parent & RED_FLG));
+ RBT_ASSERT(!(color & ~RED_FLG));
+ return ((UWord)parent) | color;
+}
+
+
+enum SortOrder {
+ ADDR_ORDER, /* only address order */
+ SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */
+ SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */
+};
+#ifdef HARD_DEBUG
+static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"};
+#endif
+
+typedef struct {
+ RBTNode* root;
+ enum SortOrder order;
+}RBTree;
+
+#ifdef HARD_DEBUG
+# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE)
+# define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ)
+static int rbt_assert_is_member(RBTNode* root, RBTNode* node);
+static RBTNode* check_tree(RBTree* tree, Uint);
+#else
+# define HARD_CHECK_IS_MEMBER(ROOT,NODE)
+# define HARD_CHECK_TREE(TREE,SZ)
+#endif
+
+#if ERTS_MMAP_OP_RINGBUF_SZ
+
+static int mmap_op_ix;
+
+typedef enum {
+ ERTS_OP_TYPE_NONE,
+ ERTS_OP_TYPE_MMAP,
+ ERTS_OP_TYPE_MUNMAP,
+ ERTS_OP_TYPE_MREMAP
+} ErtsMMapOpType;
+
+typedef struct {
+ ErtsMMapOpType type;
+ void *result;
+ UWord in_size;
+ UWord out_size;
+ void *old_ptr;
+ UWord old_size;
+} ErtsMMapOp;
+
+static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
+
+#define ERTS_MMAP_OP_RINGBUF_INIT() \
+ do { \
+ int ix__; \
+ for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\
+ mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ } \
+ mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
+ } while (0)
+
+#define ERTS_MMAP_OP_START(SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = (SZ); \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ } while (0)
+
+#define ERTS_MMAP_OP_END(PTR, SZ) \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].result = (PTR); \
+ mmap_ops[ix__].out_size = (SZ); \
+ } while (0)
+
+#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
+ do { \
+ erts_smp_mtx_lock(&mmap_state.mtx); \
+ ERTS_MMAP_OP_START((IN_SZ)); \
+ ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
+ erts_smp_mtx_unlock(&mmap_state.mtx); \
+ } while (0)
+
+#define ERTS_MUNMAP_OP(PTR, SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = (PTR); \
+ mmap_ops[ix__].old_size = (SZ); \
+ } while (0)
+
+#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
+ do { \
+ erts_smp_mtx_lock(&mmap_state.mtx); \
+ ERTS_MUNMAP_OP((PTR), (SZ)); \
+ erts_smp_mtx_unlock(&mmap_state.mtx); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
+ do { \
+ int ix__; \
+ if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
+ mmap_op_ix = 0; \
+ ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = (IN_SZ); \
+ mmap_ops[ix__].out_size = (OLD_SZ); \
+ mmap_ops[ix__].old_ptr = (OLD_PTR); \
+ mmap_ops[ix__].old_size = (OLD_SZ); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_END(PTR, SZ) \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].result = (PTR); \
+ mmap_ops[mmap_op_ix].out_size = (SZ); \
+ } while (0)
+
+#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
+ do { \
+ erts_smp_mtx_lock(&mmap_state.mtx); \
+ ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
+ ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
+ erts_smp_mtx_unlock(&mmap_state.mtx); \
+ } while (0)
+
+#define ERTS_MMAP_OP_ABORT() \
+ do { \
+ int ix__ = mmap_op_ix; \
+ mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
+ mmap_ops[ix__].result = NULL; \
+ mmap_ops[ix__].in_size = 0; \
+ mmap_ops[ix__].out_size = 0; \
+ mmap_ops[ix__].old_ptr = NULL; \
+ mmap_ops[ix__].old_size = 0; \
+ if (--mmap_op_ix < 0) \
+ mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
+ } while (0)
+
+#else
+
+#define ERTS_MMAP_OP_RINGBUF_INIT()
+#define ERTS_MMAP_OP_START(SZ)
+#define ERTS_MMAP_OP_END(PTR, SZ)
+#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ)
+#define ERTS_MUNMAP_OP(PTR, SZ)
+#define ERTS_MUNMAP_OP_LCK(PTR, SZ)
+#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ)
+#define ERTS_MREMAP_OP_END(PTR, SZ)
+#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ)
+#define ERTS_MMAP_OP_ABORT()
+
+#endif
+
+typedef struct {
+ RBTNode snode; /* node in 'stree' */
+ RBTNode anode; /* node in 'atree' */
+ char* start;
+ char* end;
+}ErtsFreeSegDesc;
+
+typedef struct {
+ RBTree stree; /* size ordered tree */
+ RBTree atree; /* address ordered tree */
+ Uint nseg;
+}ErtsFreeSegMap;
+
+static struct {
+ int (*reserve_physical)(char *, UWord);
+ void (*unreserve_physical)(char *, UWord);
+ int supercarrier;
+ int no_os_mmap;
+ /*
+ * Super unaligend area is located above super aligned
+ * area. That is, `sa.bot` is beginning of the super
+ * carrier, `sua.top` is the end of the super carrier,
+ * and sa.top and sua.bot moves towards eachother.
+ */
+ struct {
+ char *top;
+ char *bot;
+ ErtsFreeSegMap map;
+ } sua;
+ struct {
+ char *top;
+ char *bot;
+ ErtsFreeSegMap map;
+ } sa;
+#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
+ int mmap_fd;
+#endif
+ erts_smp_mtx_t mtx;
+ struct {
+ char *free_list;
+ char *unused_start;
+ char *unused_end;
+ char *new_area_hint;
+ Uint reserved;
+ } desc;
+ struct {
+ UWord free_seg_descs;
+ struct {
+ UWord curr;
+ UWord max;
+ } free_segs;
+ } no;
+ struct {
+ struct {
+ UWord total;
+ struct {
+ UWord total;
+ UWord sa;
+ UWord sua;
+ } used;
+ } supercarrier;
+ struct {
+ UWord used;
+ } os;
+ } size;
+} mmap_state;
+
+#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
+ do { \
+ mmap_state.size.supercarrier.used.total += (SZ); \
+ mmap_state.size.supercarrier.used.sa += (SZ); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
+ <= mmap_state.size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \
+ <= mmap_state.size.supercarrier.used.total); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
+ mmap_state.size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \
+ mmap_state.size.supercarrier.used.sa -= (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
+ do { \
+ mmap_state.size.supercarrier.used.total += (SZ); \
+ mmap_state.size.supercarrier.used.sua += (SZ); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
+ <= mmap_state.size.supercarrier.total); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \
+ <= mmap_state.size.supercarrier.used.total); \
+ } while (0)
+#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
+ mmap_state.size.supercarrier.used.total -= (SZ); \
+ ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \
+ mmap_state.size.supercarrier.used.sua -= (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_OS_INC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \
+ mmap_state.size.os.used += (SZ); \
+ } while (0)
+#define ERTS_MMAP_SIZE_OS_DEC(SZ) \
+ do { \
+ ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \
+ mmap_state.size.os.used -= (SZ); \
+ } while (0)
+
+
+static void
+add_free_desc_area(char *start, char *end)
+{
+ ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
+ if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
+ UWord no;
+ ErtsFreeSegDesc *prev_desc, *desc;
+ char *desc_end;
+
+ no = 1;
+ prev_desc = (ErtsFreeSegDesc *) start;
+ prev_desc->start = mmap_state.desc.free_list;
+ desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
+ desc_end = start + 2*sizeof(ErtsFreeSegDesc);
+
+ while (desc_end <= end) {
+ desc->start = (char *) prev_desc;
+ prev_desc = desc;
+ desc = (ErtsFreeSegDesc *) desc_end;
+ desc_end += sizeof(ErtsFreeSegDesc);
+ no++;
+ }
+ mmap_state.desc.free_list = (char *) prev_desc;
+ mmap_state.no.free_seg_descs += no;
+ }
+}
+
+static ErtsFreeSegDesc *
+add_unused_free_desc_area(void)
+{
+ char *ptr;
+ if (!mmap_state.desc.unused_start)
+ return NULL;
+
+ ERTS_MMAP_ASSERT(mmap_state.desc.unused_end);
+ ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
+ <= mmap_state.desc.unused_end - mmap_state.desc.unused_start);
+
+ ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE;
+ add_free_desc_area(mmap_state.desc.unused_start, ptr);
+
+ if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
+ mmap_state.desc.unused_start = ptr;
+ else
+ mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL;
+
+ ERTS_MMAP_ASSERT(mmap_state.desc.free_list);
+ return (ErtsFreeSegDesc *) mmap_state.desc.free_list;
+}
+
+static ERTS_INLINE ErtsFreeSegDesc *
+alloc_desc(void)
+{
+ ErtsFreeSegDesc *res;
+ res = (ErtsFreeSegDesc *) mmap_state.desc.free_list;
+ if (!res) {
+ res = add_unused_free_desc_area();
+ if (!res)
+ return NULL;
+ }
+ mmap_state.desc.free_list = res->start;
+ ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs);
+ mmap_state.no.free_segs.curr++;
+ if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr)
+ mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr;
+ return res;
+}
+
+static ERTS_INLINE void
+free_desc(ErtsFreeSegDesc *desc)
+{
+ desc->start = mmap_state.desc.free_list;
+ mmap_state.desc.free_list = (char *) desc;
+ ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0);
+ mmap_state.no.free_segs.curr--;
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
+{
+ return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode));
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode)
+{
+ return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode));
+}
+
+static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node)
+{
+ return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node);
+}
+
+static ERTS_INLINE SWord usable_size(enum SortOrder order,
+ ErtsFreeSegDesc* desc)
+{
+ return ((order == SA_SZ_ADDR_ORDER) ?
+ ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start)
+ : desc->end - desc->start);
+}
+
+#ifdef HARD_DEBUG
+static ERTS_INLINE SWord cmp_nodes(enum SortOrder order,
+ RBTNode* lhs, RBTNode* rhs)
+{
+ ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs);
+ ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs);
+ RBT_ASSERT(lhs != rhs);
+ if (order != ADDR_ORDER) {
+ SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc);
+ if (diff) return diff;
+ }
+ if (order != SZ_REVERSE_ADDR_ORDER) {
+ return (char*)ldesc->start - (char*)rdesc->start;
+ }
+ else {
+ return (char*)rdesc->start - (char*)ldesc->start;
+ }
+}
+#endif /* HARD_DEBUG */
+
+static ERTS_INLINE SWord cmp_with_node(enum SortOrder order,
+ SWord sz, char* addr, RBTNode* rhs)
+{
+ ErtsFreeSegDesc* rdesc;
+ if (order != ADDR_ORDER) {
+ SWord diff;
+ rdesc = snode_to_desc(rhs);
+ diff = sz - usable_size(order, rdesc);
+ if (diff) return diff;
+ }
+ else
+ rdesc = anode_to_desc(rhs);
+
+ if (order != SZ_REVERSE_ADDR_ORDER)
+ return addr - (char*)rdesc->start;
+ else
+ return (char*)rdesc->start - addr;
+}
+
+
+static ERTS_INLINE void
+left_rotate(RBTNode **root, RBTNode *x)
+{
+ RBTNode *y = x->right;
+ x->right = y->left;
+ if (y->left)
+ set_parent(y->left, x);
+ set_parent(y, parent(x));
+ if (!parent(y)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->left)
+ parent(x)->left = y;
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ parent(x)->right = y;
+ }
+ y->left = x;
+ set_parent(x, y);
+}
+
+static ERTS_INLINE void
+right_rotate(RBTNode **root, RBTNode *x)
+{
+ RBTNode *y = x->left;
+ x->left = y->right;
+ if (y->right)
+ set_parent(y->right, x);
+ set_parent(y, parent(x));
+ if (!parent(y)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->right)
+ parent(x)->right = y;
+ else {
+ RBT_ASSERT(x == parent(x)->left);
+ parent(x)->left = y;
+ }
+ y->right = x;
+ set_parent(x, y);
+}
+
+/*
+ * Replace node x with node y
+ * NOTE: segment descriptor of y is not changed
+ */
+static ERTS_INLINE void
+replace(RBTNode **root, RBTNode *x, RBTNode *y)
+{
+
+ if (!parent(x)) {
+ RBT_ASSERT(*root == x);
+ *root = y;
+ }
+ else if (x == parent(x)->left)
+ parent(x)->left = y;
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ parent(x)->right = y;
+ }
+ if (x->left) {
+ RBT_ASSERT(parent(x->left) == x);
+ set_parent(x->left, y);
+ }
+ if (x->right) {
+ RBT_ASSERT(parent(x->right) == x);
+ set_parent(x->right, y);
+ }
+
+ y->parent_and_color = x->parent_and_color;
+ y->right = x->right;
+ y->left = x->left;
+}
+
+static void
+tree_insert_fixup(RBTNode** root, RBTNode *node)
+{
+ RBTNode *x = node, *y, *papa_x, *granpa_x;
+
+ /*
+ * Rearrange the tree so that it satisfies the Red-Black Tree properties
+ */
+
+ papa_x = parent(x);
+ RBT_ASSERT(x != *root && IS_RED(papa_x));
+ do {
+
+ /*
+ * x and its parent are both red. Move the red pair up the tree
+ * until we get to the root or until we can separate them.
+ */
+
+ granpa_x = parent(papa_x);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(granpa_x);
+
+ if (papa_x == granpa_x->left) {
+ y = granpa_x->right;
+ if (IS_RED(y)) {
+ SET_BLACK(y);
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ x = granpa_x;
+ }
+ else {
+
+ if (x == papa_x->right) {
+ left_rotate(root, papa_x);
+ papa_x = x;
+ x = papa_x->left;
+ }
+
+ RBT_ASSERT(x == granpa_x->left->left);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(papa_x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(IS_BLACK(y));
+
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ right_rotate(root, granpa_x);
+
+ RBT_ASSERT(x == parent(x)->left);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(parent(x)->right));
+ RBT_ASSERT(IS_BLACK(parent(x)));
+ break;
+ }
+ }
+ else {
+ RBT_ASSERT(papa_x == granpa_x->right);
+ y = granpa_x->left;
+ if (IS_RED(y)) {
+ SET_BLACK(y);
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ x = granpa_x;
+ }
+ else {
+
+ if (x == papa_x->left) {
+ right_rotate(root, papa_x);
+ papa_x = x;
+ x = papa_x->right;
+ }
+
+ RBT_ASSERT(x == granpa_x->right->right);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(papa_x));
+ RBT_ASSERT(IS_BLACK(granpa_x));
+ RBT_ASSERT(IS_BLACK(y));
+
+ SET_BLACK(papa_x);
+ SET_RED(granpa_x);
+ left_rotate(root, granpa_x);
+
+ RBT_ASSERT(x == parent(x)->right);
+ RBT_ASSERT(IS_RED(x));
+ RBT_ASSERT(IS_RED(parent(x)->left));
+ RBT_ASSERT(IS_BLACK(parent(x)));
+ break;
+ }
+ }
+ } while (x != *root && (papa_x=parent(x), IS_RED(papa_x)));
+
+ SET_BLACK(*root);
+}
+
+static void
+rbt_delete(RBTree* tree, RBTNode* del)
+{
+ Uint spliced_is_black;
+ RBTNode *x, *y, *z = del, *papa_y;
+ RBTNode null_x; /* null_x is used to get the fixup started when we
+ splice out a node without children. */
+
+ HARD_CHECK_IS_MEMBER(tree->root, del);
+ HARD_CHECK_TREE(tree, 0);
+
+ null_x.parent_and_color = parent_and_color(NULL, !RED_FLG);
+
+ /* Remove node from tree... */
+
+ /* Find node to splice out */
+ if (!z->left || !z->right)
+ y = z;
+ else
+ /* Set y to z:s successor */
+ for(y = z->right; y->left; y = y->left);
+ /* splice out y */
+ x = y->left ? y->left : y->right;
+ spliced_is_black = IS_BLACK(y);
+ papa_y = parent(y);
+ if (x) {
+ set_parent(x, papa_y);
+ }
+ else if (spliced_is_black) {
+ x = &null_x;
+ x->right = x->left = NULL;
+ x->parent_and_color = parent_and_color(papa_y, !RED_FLG);
+ y->left = x;
+ }
+
+ if (!papa_y) {
+ RBT_ASSERT(tree->root == y);
+ tree->root = x;
+ }
+ else {
+ if (y == papa_y->left) {
+ papa_y->left = x;
+ }
+ else {
+ RBT_ASSERT(y == papa_y->right);
+ papa_y->right = x;
+ }
+ }
+ if (y != z) {
+ /* We spliced out the successor of z; replace z by the successor */
+ RBT_ASSERT(z != &null_x);
+ replace(&tree->root, z, y);
+ }
+
+ if (spliced_is_black) {
+ RBTNode* papa_x;
+ /* We removed a black node which makes the resulting tree
+ violate the Red-Black Tree properties. Fixup tree... */
+
+ papa_x = parent(x);
+ while (IS_BLACK(x) && papa_x) {
+
+ /*
+ * x has an "extra black" which we move up the tree
+ * until we reach the root or until we can get rid of it.
+ *
+ * y is the sibbling of x
+ */
+
+ if (x == papa_x->left) {
+ y = papa_x->right;
+ RBT_ASSERT(y);
+ if (IS_RED(y)) {
+ RBT_ASSERT(y->right);
+ RBT_ASSERT(y->left);
+ SET_BLACK(y);
+ RBT_ASSERT(IS_BLACK(papa_x));
+ SET_RED(papa_x);
+ left_rotate(&tree->root, papa_x);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->right;
+ }
+ RBT_ASSERT(y);
+ RBT_ASSERT(IS_BLACK(y));
+ if (IS_BLACK(y->left) && IS_BLACK(y->right)) {
+ SET_RED(y);
+ }
+ else {
+ if (IS_BLACK(y->right)) {
+ SET_BLACK(y->left);
+ SET_RED(y);
+ right_rotate(&tree->root, y);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->right;
+ }
+ RBT_ASSERT(y);
+ if (IS_RED(papa_x)) {
+
+ SET_BLACK(papa_x);
+ SET_RED(y);
+ }
+ RBT_ASSERT(y->right);
+ SET_BLACK(y->right);
+ left_rotate(&tree->root, papa_x);
+ x = tree->root;
+ break;
+ }
+ }
+ else {
+ RBT_ASSERT(x == papa_x->right);
+ y = papa_x->left;
+ RBT_ASSERT(y);
+ if (IS_RED(y)) {
+ RBT_ASSERT(y->right);
+ RBT_ASSERT(y->left);
+ SET_BLACK(y);
+ RBT_ASSERT(IS_BLACK(papa_x));
+ SET_RED(papa_x);
+ right_rotate(&tree->root, papa_x);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->left;
+ }
+ RBT_ASSERT(y);
+ RBT_ASSERT(IS_BLACK(y));
+ if (IS_BLACK(y->right) && IS_BLACK(y->left)) {
+ SET_RED(y);
+ }
+ else {
+ if (IS_BLACK(y->left)) {
+ SET_BLACK(y->right);
+ SET_RED(y);
+ left_rotate(&tree->root, y);
+ RBT_ASSERT(papa_x == parent(x));
+ y = papa_x->left;
+ }
+ RBT_ASSERT(y);
+ if (IS_RED(papa_x)) {
+ SET_BLACK(papa_x);
+ SET_RED(y);
+ }
+ RBT_ASSERT(y->left);
+ SET_BLACK(y->left);
+ right_rotate(&tree->root, papa_x);
+ x = tree->root;
+ break;
+ }
+ }
+ x = papa_x;
+ papa_x = parent(x);
+ }
+ SET_BLACK(x);
+
+ papa_x = parent(&null_x);
+ if (papa_x) {
+ if (papa_x->left == &null_x)
+ papa_x->left = NULL;
+ else {
+ RBT_ASSERT(papa_x->right == &null_x);
+ papa_x->right = NULL;
+ }
+ RBT_ASSERT(!null_x.left);
+ RBT_ASSERT(!null_x.right);
+ }
+ else if (tree->root == &null_x) {
+ tree->root = NULL;
+ RBT_ASSERT(!null_x.left);
+ RBT_ASSERT(!null_x.right);
+ }
+ }
+ HARD_CHECK_TREE(tree, 0);
+}
+
+
+static void
+rbt_insert(RBTree* tree, RBTNode* node)
+{
+#ifdef RBT_DEBUG
+ ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL;
+#endif
+ ErtsFreeSegDesc* desc = node_to_desc(tree->order, node);
+ char* seg_addr = desc->start;
+ SWord seg_sz = desc->end - desc->start;
+
+ HARD_CHECK_TREE(tree, 0);
+
+ node->left = NULL;
+ node->right = NULL;
+
+ if (!tree->root) {
+ node->parent_and_color = parent_and_color(NULL, !RED_FLG);
+ tree->root = node;
+ }
+ else {
+ RBTNode *x = tree->root;
+ while (1) {
+ SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x);
+ if (diff < 0) {
+ IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x));
+ if (!x->left) {
+ node->parent_and_color = parent_and_color(x, RED_FLG);
+ x->left = node;
+ break;
+ }
+ x = x->left;
+ }
+ else {
+ RBT_ASSERT(diff > 0);
+ IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x));
+ if (!x->right) {
+ node->parent_and_color = parent_and_color(x, RED_FLG);
+ x->right = node;
+ break;
+ }
+ x = x->right;
+ }
+ }
+
+ RBT_ASSERT(parent(node));
+#ifdef RBT_DEBUG
+ if (tree->order == ADDR_ORDER) {
+ RBT_ASSERT(!dbg_under || dbg_under->end < desc->start);
+ RBT_ASSERT(!dbg_over || dbg_over->start > desc->end);
+ }
+#endif
+ RBT_ASSERT(IS_RED(node));
+ if (IS_RED(parent(node)))
+ tree_insert_fixup(&tree->root, node);
+ }
+ HARD_CHECK_TREE(tree, 0);
+}
+
+/*
+ * Traverse tree in (reverse) sorting order
+ */
+static void
+rbt_foreach_node(RBTree* tree,
+ void (*fn)(RBTNode*,void*),
+ void* arg, int reverse)
+{
+#ifdef HARD_DEBUG
+ Sint blacks = -1;
+ Sint curr_blacks = 1;
+ Uint depth = 1;
+ Uint max_depth = 0;
+ Uint node_cnt = 0;
+#endif
+ enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state;
+ RBTNode *x = tree->root;
+
+ RBT_ASSERT(!x || !parent(x));
+
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ while (x) {
+ switch (state) {
+ case RECURSE_LEFT:
+ if (x->left) {
+ RBT_ASSERT(parent(x->left) == x);
+ #ifdef HARD_DEBUG
+ ++depth;
+ if (IS_BLACK(x->left))
+ curr_blacks++;
+ #endif
+ x = x->left;
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ }
+ else {
+ #ifdef HARD_DEBUG
+ if (blacks < 0)
+ blacks = curr_blacks;
+ RBT_ASSERT(blacks == curr_blacks);
+ #endif
+ state = reverse ? RETURN_TO_PARENT : DO_NODE;
+ }
+ break;
+
+ case DO_NODE:
+ #ifdef HARD_DEBUG
+ ++node_cnt;
+ if (depth > max_depth)
+ max_depth = depth;
+ #endif
+ (*fn) (x, arg); /* Do it! */
+ state = reverse ? RECURSE_LEFT : RECURSE_RIGHT;
+ break;
+
+ case RECURSE_RIGHT:
+ if (x->right) {
+ RBT_ASSERT(parent(x->right) == x);
+ #ifdef HARD_DEBUG
+ ++depth;
+ if (IS_BLACK(x->right))
+ curr_blacks++;
+ #endif
+ x = x->right;
+ state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
+ }
+ else {
+ #ifdef HARD_DEBUG
+ if (blacks < 0)
+ blacks = curr_blacks;
+ RBT_ASSERT(blacks == curr_blacks);
+ #endif
+ state = reverse ? DO_NODE : RETURN_TO_PARENT;
+ }
+ break;
+
+ case RETURN_TO_PARENT:
+ #ifdef HARD_DEBUG
+ if (IS_BLACK(x))
+ curr_blacks--;
+ --depth;
+ #endif
+ if (parent(x)) {
+ if (x == parent(x)->left) {
+ state = reverse ? RETURN_TO_PARENT : DO_NODE;
+ }
+ else {
+ RBT_ASSERT(x == parent(x)->right);
+ state = reverse ? DO_NODE : RETURN_TO_PARENT;
+ }
+ }
+ x = parent(x);
+ break;
+ }
+ }
+#ifdef HARD_DEBUG
+ RBT_ASSERT(depth == 0 || (!tree->root && depth==1));
+ RBT_ASSERT(curr_blacks == 0);
+ RBT_ASSERT((1 << (max_depth/2)) <= node_cnt);
+#endif
+}
+
+#if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG)
+static RBTNode* rbt_prev_node(RBTNode* node)
+{
+ RBTNode* x;
+ if (node->left) {
+ for (x=node->left; x->right; x=x->right)
+ ;
+ return x;
+ }
+ for (x=node; parent(x); x=parent(x)) {
+ if (parent(x)->right == x)
+ return parent(x);
+ }
+ return NULL;
+}
+static RBTNode* rbt_next_node(RBTNode* node)
+{
+ RBTNode* x;
+ if (node->right) {
+ for (x=node->right; x->left; x=x->left)
+ ;
+ return x;
+ }
+ for (x=node; parent(x); x=parent(x)) {
+ if (parent(x)->left == x)
+ return parent(x);
+ }
+ return NULL;
+}
+#endif /* RBT_DEBUG || HARD_DEBUG_MSEG */
+
+
+/* The API to keep track of a bunch of separated (free) segments
+ (non-overlapping and non-adjacent).
+ */
+static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder);
+static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end,
+ ErtsFreeSegDesc** under, ErtsFreeSegDesc** over);
+static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
+static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
+static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*);
+static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz);
+
+
+static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order)
+{
+ map->atree.root = NULL;
+ map->atree.order = ADDR_ORDER;
+ map->stree.root = NULL;
+ map->stree.order = order;
+ map->nseg = 0;
+}
+
+/* Lookup directly adjacent free segments to the given area [start->end].
+ * The given area must not contain any free segments.
+ */
+static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end,
+ ErtsFreeSegDesc** under, ErtsFreeSegDesc** over)
+{
+ RBTNode* x = map->atree.root;
+
+ *under = NULL;
+ *over = NULL;
+ while (x) {
+ if (start < anode_to_desc(x)->start) {
+ RBT_ASSERT(end <= anode_to_desc(x)->start);
+ if (end == anode_to_desc(x)->start) {
+ RBT_ASSERT(!*over);
+ *over = anode_to_desc(x);
+ }
+ x = x->left;
+ }
+ else {
+ RBT_ASSERT(start >= anode_to_desc(x)->end);
+ if (start == anode_to_desc(x)->end) {
+ RBT_ASSERT(!*under);
+ *under = anode_to_desc(x);
+ }
+ x = x->right;
+ }
+ }
+}
+
+/* Initialize 'desc' and insert as new free segment [start->end].
+ * The new segment must not contain or be adjacent to any free segment in 'map'.
+ */
+static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
+ char* start, char* end)
+{
+ desc->start = start;
+ desc->end = end;
+ rbt_insert(&map->atree, &desc->anode);
+ rbt_insert(&map->stree, &desc->snode);
+ map->nseg++;
+}
+
+/* Resize existing free segment 'desc' to [start->end].
+ * The new segment location must overlap the old location and
+ * it must not contain or be adjacent to any other free segment in 'map'.
+ */
+static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
+ char* start, char* end)
+{
+#ifdef RBT_DEBUG
+ RBTNode* prev = rbt_prev_node(&desc->anode);
+ RBTNode* next = rbt_next_node(&desc->anode);
+ RBT_ASSERT(!prev || anode_to_desc(prev)->end < start);
+ RBT_ASSERT(!next || anode_to_desc(next)->start > end);
+#endif
+ rbt_delete(&map->stree, &desc->snode);
+ desc->start = start;
+ desc->end = end;
+ rbt_insert(&map->stree, &desc->snode);
+}
+
+/* Delete existing free segment 'desc' from 'map'.
+ */
+static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc)
+{
+ rbt_delete(&map->atree, &desc->anode);
+ rbt_delete(&map->stree, &desc->snode);
+ map->nseg--;
+}
+
+/* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes.
+ */
+static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz)
+{
+ RBTNode* x = map->stree.root;
+ ErtsFreeSegDesc* best_desc = NULL;
+ const enum SortOrder order = map->stree.order;
+
+ while (x) {
+ ErtsFreeSegDesc* desc = snode_to_desc(x);
+ SWord seg_sz = usable_size(order, desc);
+
+ if (seg_sz < need_sz) {
+ x = x->right;
+ }
+ else {
+ best_desc = desc;
+ x = x->left;
+ }
+ }
+ return best_desc;
+}
+
+struct build_arg_t
+{
+ Process* p;
+ Eterm* hp;
+ Eterm acc;
+};
+
+static void build_free_seg_tuple(RBTNode* node, void* arg)
+{
+ struct build_arg_t* a = (struct build_arg_t*)arg;
+ ErtsFreeSegDesc* desc = anode_to_desc(node);
+ Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start);
+ Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end);
+ Eterm tpl = TUPLE2(a->hp, start, end);
+
+ a->hp += 3;
+ a->acc = CONS(a->hp, tpl, a->acc);
+ a->hp += 2;
+}
+
+static
+Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
+{
+ struct build_arg_t barg;
+ Eterm* hp_end;
+ const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */
+
+ barg.p = p;
+ barg.hp = HAlloc(p, may_need);
+ hp_end = barg.hp + may_need;
+ barg.acc = NIL;
+ rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1);
+
+ RBT_ASSERT(barg.hp <= hp_end);
+ HRelease(p, hp_end, barg.hp);
+ return barg.acc;
+}
+
+#if ERTS_HAVE_OS_MMAP
+/* Implementation of os_mmap()/os_munmap()/os_mremap()... */
+
+#if HAVE_MMAP
+# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
+# if defined(MAP_ANONYMOUS)
+# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
+# define ERTS_MMAP_FD (-1)
+# elif defined(MAP_ANON)
+# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
+# define ERTS_MMAP_FD (-1)
+# else
+# define ERTS_MMAP_FLAGS (MAP_PRIVATE)
+# define ERTS_MMAP_FD mmap_state.mmap_fd
+# endif
+#endif
+
+static ERTS_INLINE void *
+os_mmap(void *hint_ptr, UWord size, int try_superalign)
+{
+#if HAVE_MMAP
+ void *res;
+#ifdef MAP_ALIGN
+ if (try_superalign)
+ res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT,
+ ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
+ else
+#endif
+ res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT,
+ ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == MAP_FAILED)
+ return NULL;
+ return res;
+#elif HAVE_VIRTUALALLOC
+ return (void *) VirtualAlloc(NULL, (SIZE_T) size,
+ MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
+#else
+# error "missing mmap() or similar"
+#endif
+}
+
+static ERTS_INLINE void
+os_munmap(void *ptr, UWord size)
+{
+#if HAVE_MMAP
+#ifdef ERTS_MMAP_DEBUG
+ int res =
+#endif
+ munmap(ptr, size);
+ ERTS_MMAP_ASSERT(res == 0);
+#elif HAVE_VIRTUALALLOC
+#ifdef DEBUG
+ BOOL res =
+#endif
+ VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE);
+ ERTS_MMAP_ASSERT(res != 0);
+#else
+# error "missing munmap() or similar"
+#endif
+}
+
+#ifdef ERTS_HAVE_OS_MREMAP
+# if HAVE_MREMAP
+# if defined(__NetBSD__)
+# define ERTS_MREMAP_FLAGS (0)
+# else
+# define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE)
+# endif
+# endif
+static ERTS_INLINE void *
+os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
+{
+ void *new_seg;
+#if HAVE_MREMAP
+ new_seg = mremap(ptr, (size_t) old_size,
+# if defined(__NetBSD__)
+ NULL,
+# endif
+ (size_t) new_size, ERTS_MREMAP_FLAGS);
+ if (new_seg == (void *) MAP_FAILED)
+ return NULL;
+ return new_seg;
+#else
+# error "missing mremap() or similar"
+#endif
+}
+#endif
+
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+#if HAVE_MMAP
+
+#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
+#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
+#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
+#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
+#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
+#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
+
+static int
+os_reserve_physical(char *ptr, UWord size)
+{
+ void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT,
+ ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ return 0;
+ return 1;
+}
+
+static void
+os_unreserve_physical(char *ptr, UWord size)
+{
+ void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
+ ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ erl_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
+}
+
+static void *
+os_mmap_virtual(char *ptr, UWord size)
+{
+ void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
+ ERTS_MMAP_VIRTUAL_FLAGS, ERTS_MMAP_FD, 0);
+ if (res == (void *) MAP_FAILED)
+ return NULL;
+ return res;
+}
+
+#else
+#error "Missing reserve/unreserve physical memory implementation"
+#endif
+#endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */
+
+#endif /* ERTS_HAVE_OS_MMAP */
+
+static int reserve_noop(char *ptr, UWord size)
+{
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ Uint32 *uip, *end = (Uint32 *) (ptr + size);
+
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef);
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ *uip = (Uint32) 0xfeedfeed;
+#endif
+ return 1;
+}
+
+static void unreserve_noop(char *ptr, UWord size)
+{
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ Uint32 *uip, *end = (Uint32 *) (ptr + size);
+
+ for (uip = (Uint32 *) ptr; uip < end; uip++)
+ *uip = (Uint32) 0xdeadbeef;
+#endif
+}
+
+static UWord
+alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
+{
+ char *ptr;
+ ErtsFreeSegMap *da_map;
+ ErtsFreeSegDesc *desc = alloc_desc();
+ if (desc) {
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+
+ /*
+ * Ahh; ran out of free segment descriptors.
+ *
+ * First try to map a new page...
+ */
+
+#if ERTS_HAVE_OS_MMAP
+ if (!mmap_state.no_os_mmap) {
+ ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0);
+ if (ptr) {
+ mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
+ ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
+ add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE);
+ desc = alloc_desc();
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+ }
+#endif
+
+ /*
+ * ...then try to find a good place in the supercarrier...
+ */
+ da_map = &mmap_state.sua.map;
+ desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
+ if (desc) {
+ if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
+ ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
+ else
+ desc = NULL;
+
+ }
+ else {
+ da_map = &mmap_state.sa.map;
+ desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
+ if (desc) {
+ if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
+ ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
+ else
+ desc = NULL;
+ }
+ }
+ if (desc) {
+ char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
+ add_free_desc_area(desc->start, da_end);
+ if (da_end != desc->end)
+ resize_free_seg(da_map, desc, da_end, desc->end);
+ else {
+ delete_free_seg(da_map, desc);
+ free_desc(desc);
+ }
+
+ desc = alloc_desc();
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, start, end);
+ return 0;
+ }
+
+ /*
+ * ... and then as last resort use the first page of the
+ * free segment we are trying to insert for free descriptors.
+ */
+ ptr = start + ERTS_PAGEALIGNED_SIZE;
+ ERTS_MMAP_ASSERT(ptr <= end);
+
+ add_free_desc_area(start, ptr);
+
+ if (ptr != end) {
+ desc = alloc_desc();
+ ERTS_MMAP_ASSERT(desc);
+ insert_free_seg(map, desc, ptr, end);
+ }
+
+ return ERTS_PAGEALIGNED_SIZE;
+}
+
+void *
+erts_mmap(Uint32 flags, UWord *sizep)
+{
+ char *seg;
+ UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
+
+ /* Map in premapped supercarrier */
+ if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
+ char *end;
+ ErtsFreeSegDesc *desc;
+ Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ erts_smp_mtx_lock(&mmap_state.mtx);
+
+ ERTS_MMAP_OP_START(*sizep);
+
+ if (!superaligned) {
+ desc = lookup_free_seg(&mmap_state.sua.map, asize);
+ if (desc) {
+ seg = desc->start;
+ end = seg+asize;
+ if (!mmap_state.reserve_physical(seg, asize))
+ goto supercarrier_reserve_failure;
+ if (desc->end == end) {
+ delete_free_seg(&mmap_state.sua.map, desc);
+ free_desc(desc);
+ }
+ else {
+ ERTS_MMAP_ASSERT(end < desc->end);
+ resize_free_seg(&mmap_state.sua.map, desc, end, desc->end);
+ }
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ goto supercarrier_success;
+ }
+
+ if (asize <= mmap_state.sua.bot - mmap_state.sa.top) {
+ if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize,
+ asize))
+ goto supercarrier_reserve_failure;
+ mmap_state.sua.bot -= asize;
+ seg = mmap_state.sua.bot;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ goto supercarrier_success;
+ }
+ }
+
+ asize = ERTS_SUPERALIGNED_CEILING(asize);
+
+ desc = lookup_free_seg(&mmap_state.sa.map, asize);
+ if (desc) {
+ char *start = seg = desc->start;
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
+ end = seg+asize;
+ if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
+ goto supercarrier_reserve_failure;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize);
+ if (desc->end == end) {
+ if (start != seg)
+ resize_free_seg(&mmap_state.sa.map, desc, start, seg);
+ else {
+ delete_free_seg(&mmap_state.sa.map, desc);
+ free_desc(desc);
+ }
+ }
+ else {
+ ERTS_MMAP_ASSERT(end < desc->end);
+ resize_free_seg(&mmap_state.sa.map, desc, end, desc->end);
+ if (start != seg) {
+ UWord ad_sz;
+ ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ start, seg);
+ start += ad_sz;
+ if (start != seg)
+ mmap_state.unreserve_physical(start, (UWord) (seg - start));
+ }
+ }
+ goto supercarrier_success;
+ }
+
+ if (superaligned) {
+ char *start = mmap_state.sa.top;
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
+
+ if (asize + (seg - start) <= mmap_state.sua.bot - start) {
+ end = seg + asize;
+ if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
+ goto supercarrier_reserve_failure;
+ mmap_state.sa.top = end;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize);
+ if (start != seg) {
+ UWord ad_sz;
+ ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ start, seg);
+ start += ad_sz;
+ if (start != seg)
+ mmap_state.unreserve_physical(start, (UWord) (seg - start));
+ }
+ goto supercarrier_success;
+ }
+
+ desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE);
+ if (desc) {
+ char *org_start = desc->start;
+ char *org_end = desc->end;
+
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
+ end = seg + asize;
+ if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg)))
+ goto supercarrier_reserve_failure;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize);
+ if (org_start != seg) {
+ ERTS_MMAP_ASSERT(org_start < seg);
+ resize_free_seg(&mmap_state.sua.map, desc, org_start, seg);
+ desc = NULL;
+ }
+ if (end != org_end) {
+ UWord ad_sz = 0;
+ ERTS_MMAP_ASSERT(end < org_end);
+ if (desc)
+ resize_free_seg(&mmap_state.sua.map, desc, end, org_end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
+ end, org_end);
+ end += ad_sz;
+ if (end != org_end)
+ mmap_state.unreserve_physical(end,
+ (UWord) (org_end - end));
+ }
+ goto supercarrier_success;
+ }
+ }
+
+ ERTS_MMAP_OP_ABORT();
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ }
+
+#if ERTS_HAVE_OS_MMAP
+ /* Map using OS primitives */
+ if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) {
+ if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
+ seg = os_mmap(NULL, asize, 0);
+ if (!seg)
+ goto failure;
+ }
+ else {
+ asize = ERTS_SUPERALIGNED_CEILING(*sizep);
+ seg = os_mmap(NULL, asize, 1);
+ if (!seg)
+ goto failure;
+
+ if (!ERTS_IS_SUPERALIGNED(seg)) {
+ char *ptr;
+ UWord sz;
+
+ os_munmap(seg, asize);
+
+ ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1);
+ if (!ptr)
+ goto failure;
+
+ seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr);
+ sz = (UWord) (seg - ptr);
+ ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE);
+ if (sz)
+ os_munmap(ptr, sz);
+ sz = ERTS_SUPERALIGNED_SIZE - sz;
+ if (sz)
+ os_munmap(seg+asize, sz);
+ }
+ }
+
+ ERTS_MMAP_OP_LCK(seg, *sizep, asize);
+ ERTS_MMAP_SIZE_OS_INC(asize);
+ *sizep = asize;
+ return (void *) seg;
+ }
+failure:
+#endif
+ ERTS_MMAP_OP_LCK(NULL, *sizep, 0);
+ *sizep = 0;
+ return NULL;
+
+supercarrier_success:
+
+#ifdef ERTS_MMAP_DEBUG
+ if (ERTS_MMAPFLG_SUPERALIGNED & flags) {
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg));
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
+ }
+ else {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+ }
+#endif
+
+ ERTS_MMAP_OP_END(seg, asize);
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+
+ *sizep = asize;
+ return (void *) seg;
+
+supercarrier_reserve_failure:
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ *sizep = 0;
+ return NULL;
+}
+
+void
+erts_munmap(Uint32 flags, void *ptr, UWord size)
+{
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
+
+ if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
+ ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
+#if ERTS_HAVE_OS_MMAP
+ ERTS_MUNMAP_OP_LCK(ptr, size);
+ ERTS_MMAP_SIZE_OS_DEC(size);
+ os_munmap(ptr, size);
+#endif
+ }
+ else {
+ char *start, *end;
+ ErtsFreeSegMap *map;
+ ErtsFreeSegDesc *prev, *next, *desc;
+ UWord ad_sz = 0;
+
+ ERTS_MMAP_ASSERT(mmap_state.supercarrier);
+
+ start = (char *) ptr;
+ end = start + size;
+
+ erts_smp_mtx_lock(&mmap_state.mtx);
+
+ ERTS_MUNMAP_OP(ptr, size);
+
+ if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+
+ map = &mmap_state.sa.map;
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ ERTS_MMAP_SIZE_SC_SA_DEC(size);
+ if (end == mmap_state.sa.top) {
+ ERTS_MMAP_ASSERT(!next);
+ if (prev) {
+ start = prev->start;
+ delete_free_seg(map, prev);
+ free_desc(prev);
+ }
+ mmap_state.sa.top = start;
+ goto supercarrier_success;
+ }
+ }
+ else {
+ map = &mmap_state.sua.map;
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ ERTS_MMAP_SIZE_SC_SUA_DEC(size);
+ if (start == mmap_state.sua.bot) {
+ ERTS_MMAP_ASSERT(!prev);
+ if (next) {
+ end = next->end;
+ delete_free_seg(map, next);
+ free_desc(next);
+ }
+ mmap_state.sua.bot = end;
+ goto supercarrier_success;
+ }
+ }
+
+ desc = NULL;
+
+ if (next) {
+ ERTS_MMAP_ASSERT(end < next->end);
+ end = next->end;
+ if (prev) {
+ delete_free_seg(map, next);
+ free_desc(next);
+ goto save_prev;
+ }
+ desc = next;
+ } else if (prev) {
+ save_prev:
+ ERTS_MMAP_ASSERT(prev->start < start);
+ start = prev->start;
+ desc = prev;
+ }
+
+ if (desc)
+ resize_free_seg(map, desc, start, end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(map, start, end);
+
+ supercarrier_success: {
+ UWord unres_sz;
+
+ ERTS_MMAP_ASSERT(size >= ad_sz);
+ unres_sz = size - ad_sz;
+ if (unres_sz)
+ mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
+
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ }
+ }
+}
+
+static void *
+remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+{
+ UWord size = *sizep;
+ void *new_ptr = erts_mmap(flags, &size);
+ if (!new_ptr)
+ return NULL;
+ *sizep = size;
+ if (old_size < size)
+ size = old_size;
+ sys_memcpy(new_ptr, ptr, (size_t) size);
+ erts_munmap(flags, ptr, old_size);
+ return new_ptr;
+}
+
+void *
+erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
+{
+ void *new_ptr;
+ Uint32 superaligned;
+ UWord asize;
+
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep));
+
+ if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
+
+ ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
+
+ if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) {
+ new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
+ old_size, sizep);
+ if (new_ptr)
+ return new_ptr;
+ }
+
+ if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) {
+ ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size);
+ return NULL;
+ }
+
+#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP
+ superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ if (superaligned) {
+ asize = ERTS_SUPERALIGNED_CEILING(*sizep);
+ if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) {
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+ }
+ else {
+ asize = ERTS_PAGEALIGNED_CEILING(*sizep);
+ if (asize == old_size) {
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+ }
+
+#if ERTS_HAVE_GENUINE_OS_MMAP
+ if (asize < old_size
+ && (!superaligned
+ || ERTS_IS_SUPERALIGNED(ptr))) {
+ UWord um_sz;
+ new_ptr = ((char *) ptr) + asize;
+ ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr);
+ um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr);
+ ERTS_MMAP_SIZE_OS_DEC(um_sz);
+ os_munmap(new_ptr, um_sz);
+ ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return ptr;
+ }
+#endif
+#if ERTS_HAVE_OS_MREMAP
+ if (superaligned)
+ return remap_move(flags, new_ptr, old_size, sizep);
+ else {
+ new_ptr = os_mremap(ptr, old_size, asize, 0);
+ if (!new_ptr)
+ return NULL;
+ if (asize > old_size)
+ ERTS_MMAP_SIZE_OS_INC(asize - old_size);
+ else
+ ERTS_MMAP_SIZE_OS_DEC(old_size - asize);
+ ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize);
+ *sizep = asize;
+ return new_ptr;
+ }
+#endif
+#endif
+ }
+ else { /* In super carrier */
+ char *start, *end, *new_end;
+ ErtsFreeSegMap *map;
+ ErtsFreeSegDesc *prev, *next;
+ UWord ad_sz = 0;
+
+ ERTS_MMAP_ASSERT(mmap_state.supercarrier);
+
+ if (ERTS_MMAPFLG_OS_ONLY & flags)
+ return remap_move(flags, ptr, old_size, sizep);
+
+ superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
+
+ asize = (superaligned
+ ? ERTS_SUPERALIGNED_CEILING(*sizep)
+ : ERTS_PAGEALIGNED_CEILING(*sizep));
+
+ erts_smp_mtx_lock(&mmap_state.mtx);
+
+ if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
+ ? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize))
+ : (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) {
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ /*
+ * Segment currently in wrong area (due to a previous memory
+ * shortage), move it to the right area.
+ * (remap_move() will succeed)
+ */
+ return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
+ old_size, sizep);
+ }
+
+ ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
+
+ if (asize == old_size) {
+ new_ptr = ptr;
+ goto supercarrier_resize_success;
+ }
+
+ start = (char *) ptr;
+ end = start + old_size;
+ new_end = start+asize;
+
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+
+ if (asize < old_size) {
+ UWord unres_sz;
+ new_ptr = ptr;
+ if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+ map = &mmap_state.sua.map;
+ ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
+ }
+ else {
+ if (end == mmap_state.sa.top) {
+ mmap_state.sa.top = new_end;
+ mmap_state.unreserve_physical(((char *) ptr) + asize,
+ old_size - asize);
+ goto supercarrier_resize_success;
+ }
+ ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
+ map = &mmap_state.sa.map;
+ }
+
+ adjacent_free_seg(map, start, end, &prev, &next);
+
+ if (next)
+ resize_free_seg(map, next, new_end, next->end);
+ else
+ ad_sz = alloc_desc_insert_free_seg(map, new_end, end);
+ ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
+ unres_sz = old_size - asize - ad_sz;
+ if (unres_sz)
+ mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz,
+ unres_sz);
+ goto supercarrier_resize_success;
+ }
+
+ if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+
+ adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next);
+
+ if (next && new_end <= next->end) {
+ if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
+ asize - old_size))
+ goto supercarrier_reserve_failure;
+ if (new_end < next->end)
+ resize_free_seg(&mmap_state.sua.map, next, new_end, next->end);
+ else {
+ delete_free_seg(&mmap_state.sua.map, next);
+ free_desc(next);
+ }
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ else { /* Superaligned area */
+
+ if (end == mmap_state.sa.top) {
+ if (new_end <= mmap_state.sua.bot) {
+ if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
+ asize - old_size))
+ goto supercarrier_reserve_failure;
+ mmap_state.sa.top = new_end;
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ else {
+ adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next);
+ if (next && new_end <= next->end) {
+ if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
+ asize - old_size))
+ goto supercarrier_reserve_failure;
+ if (new_end < next->end)
+ resize_free_seg(&mmap_state.sa.map, next, new_end, next->end);
+ else {
+ delete_free_seg(&mmap_state.sa.map, next);
+ free_desc(next);
+ }
+ new_ptr = ptr;
+ ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
+ goto supercarrier_resize_success;
+ }
+ }
+ }
+
+ ERTS_MMAP_OP_ABORT();
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+
+ /* Failed to resize... */
+ }
+
+ return remap_move(flags, ptr, old_size, sizep);
+
+supercarrier_resize_success:
+
+#ifdef ERTS_MMAP_DEBUG
+ if ((ERTS_MMAPFLG_SUPERALIGNED & flags)
+ || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) {
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
+ }
+ else {
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr));
+ ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
+ }
+#endif
+
+ ERTS_MREMAP_OP_END(new_ptr, asize);
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+
+ *sizep = asize;
+ return new_ptr;
+
+supercarrier_reserve_failure:
+ ERTS_MREMAP_OP_END(NULL, old_size);
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ *sizep = old_size;
+ return NULL;
+
+}
+
+int erts_mmap_in_supercarrier(void *ptr)
+{
+ return ERTS_MMAP_IN_SUPERCARRIER(ptr);
+}
+
+
+static struct {
+ Eterm total;
+ Eterm total_sa;
+ Eterm total_sua;
+ Eterm used;
+ Eterm used_sa;
+ Eterm used_sua;
+ Eterm max;
+ Eterm allocated;
+ Eterm reserved;
+ Eterm sizes;
+ Eterm free_segs;
+ Eterm supercarrier;
+ Eterm os;
+ Eterm scs;
+ Eterm sco;
+ Eterm scrpm;
+ Eterm scmgc;
+
+ int is_initialized;
+ erts_mtx_t init_mutex;
+}am;
+
+static void ERTS_INLINE atom_init(Eterm *atom, char *name)
+{
+ *atom = am_atom_put(name, strlen(name));
+}
+#define AM_INIT(AM) atom_init(&am.AM, #AM)
+
+static void init_atoms(void)
+{
+ erts_mtx_lock(&am.init_mutex);
+
+ if (!am.is_initialized) {
+ AM_INIT(total);
+ AM_INIT(total_sa);
+ AM_INIT(total_sua);
+ AM_INIT(used);
+ AM_INIT(used_sa);
+ AM_INIT(used_sua);
+ AM_INIT(max);
+ AM_INIT(allocated);
+ AM_INIT(reserved);
+ AM_INIT(sizes);
+ AM_INIT(free_segs);
+ AM_INIT(supercarrier);
+ AM_INIT(os);
+ AM_INIT(scs);
+ AM_INIT(sco);
+ AM_INIT(scrpm);
+ AM_INIT(scmgc);
+ am.is_initialized = 1;
+ }
+ erts_mtx_unlock(&am.init_mutex);
+};
+
+
+#ifdef HARD_DEBUG_MSEG
+static void hard_dbg_mseg_init(void);
+#endif
+
+void
+erts_mmap_init(ErtsMMapInit *init)
+{
+ int virtual_map = 0;
+ char *start = NULL, *end = NULL;
+ UWord pagesize;
+#if defined(__WIN32__)
+ SYSTEM_INFO sysinfo;
+ GetSystemInfo(&sysinfo);
+ pagesize = (UWord) sysinfo.dwPageSize;
+#elif defined(_SC_PAGESIZE)
+ pagesize = (UWord) sysconf(_SC_PAGESIZE);
+#elif defined(HAVE_GETPAGESIZE)
+ pagesize = (UWord) getpagesize();
+#else
+# error "Do not know how to get page size"
+#endif
+#if defined(HARD_DEBUG) || 0
+ erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs);
+ erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco);
+ erts_fprintf(stderr, "erts_mmap: scmgc = %i\n", init->scmgc);
+#endif
+ erts_page_inv_mask = pagesize - 1;
+ if (pagesize & erts_page_inv_mask)
+ erl_exit(-1, "erts_mmap: Invalid pagesize: %bpu\n",
+ pagesize);
+
+ ERTS_MMAP_OP_RINGBUF_INIT();
+
+ erts_have_erts_mmap = 0;
+
+ mmap_state.supercarrier = 0;
+ mmap_state.reserve_physical = reserve_noop;
+ mmap_state.unreserve_physical = unreserve_noop;
+
+#if HAVE_MMAP && !defined(MAP_ANON)
+ mmap_state.mmap_fd = open("/dev/zero", O_RDWR);
+ if (mmap_state.mmap_fd < 0)
+ erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n");
+#endif
+
+ erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap");
+ erts_mtx_init(&am.init_mutex, "mmap_init_atoms");
+
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (init->virtual_range.start) {
+ char *ptr;
+ UWord sz;
+ ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
+ end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
+ sz = end - ptr;
+ start = os_mmap_virtual(ptr, sz);
+ if (!start || start > ptr || start >= end)
+ erl_exit(-1,
+ "erts_mmap: Failed to create virtual range for super carrier\n");
+ sz = start - ptr;
+ if (sz)
+ os_munmap(end, sz);
+ mmap_state.reserve_physical = os_reserve_physical;
+ mmap_state.unreserve_physical = os_unreserve_physical;
+ virtual_map = 1;
+ }
+ else
+#endif
+ if (init->predefined_area.start) {
+ start = init->predefined_area.start;
+ end = init->predefined_area.end;
+ if (end != (void *) 0 && end < start)
+ end = start;
+ }
+#if ERTS_HAVE_OS_MMAP
+ else if (init->scs) {
+ UWord sz;
+ sz = ERTS_PAGEALIGNED_CEILING(init->scs);
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (!init->scrpm) {
+ start = os_mmap_virtual(NULL, sz);
+ mmap_state.reserve_physical = os_reserve_physical;
+ mmap_state.unreserve_physical = os_unreserve_physical;
+ virtual_map = 1;
+ }
+ else
+#endif
+ {
+ /*
+ * The whole supercarrier will by physically
+ * reserved all the time.
+ */
+ start = os_mmap(NULL, sz, 1);
+ }
+ if (!start)
+ erl_exit(-1,
+ "erts_mmap: Failed to create super carrier of size %bpu MB\n",
+ init->scs/1024/1024);
+ end = start + sz;
+#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
+ if (!virtual_map) {
+ Uint32 *uip;
+
+ for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++)
+ *uip = (Uint32) 0xdeadbeef;
+ }
+#endif
+ }
+ if (!mmap_state.no_os_mmap)
+ erts_have_erts_mmap |= ERTS_HAVE_ERTS_OS_MMAP;
+#endif
+
+ mmap_state.no.free_seg_descs = 0;
+ mmap_state.no.free_segs.curr = 0;
+ mmap_state.no.free_segs.max = 0;
+
+ mmap_state.size.supercarrier.total = 0;
+ mmap_state.size.supercarrier.used.total = 0;
+ mmap_state.size.supercarrier.used.sa = 0;
+ mmap_state.size.supercarrier.used.sua = 0;
+ mmap_state.size.os.used = 0;
+
+ mmap_state.desc.new_area_hint = NULL;
+
+ if (!start) {
+ mmap_state.sa.bot = NULL;
+ mmap_state.sua.top = NULL;
+ mmap_state.sa.bot = NULL;
+ mmap_state.sua.top = NULL;
+ mmap_state.no_os_mmap = 0;
+ mmap_state.supercarrier = 0;
+ }
+ else {
+ size_t desc_size;
+
+ mmap_state.no_os_mmap = init->sco;
+
+ desc_size = init->scmgc;
+ if (desc_size < 100)
+ desc_size = 100;
+ desc_size *= sizeof(ErtsFreeSegDesc);
+ if ((desc_size
+ + ERTS_SUPERALIGNED_SIZE
+ + ERTS_PAGEALIGNED_SIZE) > end - start)
+ erl_exit(-1, "erts_mmap: No space for segments in super carrier\n");
+
+ mmap_state.sa.bot = start;
+ mmap_state.sa.bot += desc_size;
+ mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot);
+ mmap_state.sa.top = mmap_state.sa.bot;
+ mmap_state.sua.top = end;
+ mmap_state.sua.bot = mmap_state.sua.top;
+
+ mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start);
+
+ mmap_state.desc.free_list = NULL;
+ mmap_state.desc.reserved = 0;
+
+ if (end == (void *) 0) {
+ /*
+ * Very unlikely, but we need a guarantee
+ * that `mmap_state.sua.top` always will
+ * compare as larger than all segment pointers
+ * into the super carrier...
+ */
+ mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE;
+ mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE))
+#endif
+ add_free_desc_area(mmap_state.sua.top, end);
+ mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc);
+ }
+
+ mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start);
+
+ /*
+ * Area before (and after) super carrier
+ * will be used for free segment descritors.
+ */
+#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
+ if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start))
+ erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
+#endif
+ mmap_state.desc.unused_start = start;
+ mmap_state.desc.unused_end = mmap_state.sa.bot;
+ mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start)
+ / sizeof(ErtsFreeSegDesc));
+
+ init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER);
+ init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER);
+
+ mmap_state.supercarrier = 1;
+ erts_have_erts_mmap |= ERTS_HAVE_ERTS_SUPERCARRIER_MMAP;
+
+ mmap_state.desc.new_area_hint = end;
+
+ }
+
+#if !ERTS_HAVE_OS_MMAP
+ mmap_state.no_os_mmap = 1;
+#endif
+
+#ifdef HARD_DEBUG_MSEG
+ hard_dbg_mseg_init();
+#endif
+}
+
+
+static ERTS_INLINE void
+add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
+{
+ *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
+}
+
+Eterm erts_mmap_info(int *print_to_p,
+ void *print_to_arg,
+ Eterm** hpp, Uint* szp,
+ struct erts_mmap_info_struct* emis)
+{
+ Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua };
+ Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
+ Eterm group[2];
+ Eterm group_tags[] = { am.sizes, am.free_segs };
+ Eterm list[2];
+ Eterm list_tags[2]; /* { am.supercarrier, am.os } */
+ int lix;
+ Eterm res = THE_NON_VALUE;
+
+ if (!hpp) {
+ erts_smp_mtx_lock(&mmap_state.mtx);
+ emis->sizes[0] = mmap_state.size.supercarrier.total;
+ emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot;
+ emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot;
+ emis->sizes[3] = mmap_state.size.supercarrier.used.total;
+ emis->sizes[4] = mmap_state.size.supercarrier.used.sa;
+ emis->sizes[5] = mmap_state.size.supercarrier.used.sua;
+
+ emis->segs[0] = mmap_state.no.free_segs.curr;
+ emis->segs[1] = mmap_state.no.free_segs.max;
+ emis->segs[2] = mmap_state.no.free_seg_descs;
+ emis->segs[3] = mmap_state.desc.reserved;
+ emis->segs[4] = mmap_state.sa.map.nseg;
+ emis->segs[5] = mmap_state.sua.map.nseg;
+
+ emis->os_used = mmap_state.size.os.used;
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+ }
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+ if (mmap_state.supercarrier) {
+ const char* prefix = "supercarrier ";
+ erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
+ erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
+ erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]);
+ erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]);
+ erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]);
+ erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]);
+ erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]);
+ erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]);
+ erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]);
+ erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]);
+ erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
+ erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
+ }
+ if (!mmap_state.no_os_mmap) {
+ erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
+ }
+ }
+
+
+ if (hpp || szp) {
+ if (!am.is_initialized) {
+ init_atoms();
+ }
+
+ lix = 0;
+ if (mmap_state.supercarrier) {
+ group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ sizeof(size_tags)/sizeof(Eterm),
+ size_tags, emis->sizes);
+ group[1] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ sizeof(seg_tags)/sizeof(Eterm),
+ seg_tags, emis->segs);
+ list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group);
+ list_tags[lix] = am.supercarrier;
+ lix++;
+ }
+
+ if (!mmap_state.no_os_mmap) {
+ group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
+ 1, &am.used, &emis->os_used);
+ list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
+ list_tags[lix] = am.os;
+ lix++;
+ }
+ res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list);
+ }
+ return res;
+}
+
+Eterm erts_mmap_info_options(char *prefix,
+ int *print_to_p,
+ void *print_to_arg,
+ Uint **hpp,
+ Uint *szp)
+{
+ const UWord scs = mmap_state.sua.top - mmap_state.sa.bot;
+ const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false;
+ const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false;
+ Eterm res = THE_NON_VALUE;
+
+ if (print_to_p) {
+ int to = *print_to_p;
+ void *arg = print_to_arg;
+ erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
+ if (mmap_state.supercarrier) {
+ erts_print(to, arg, "%ssco: %T\n", prefix, sco);
+ erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
+ erts_print(to, arg, "%sscmgc: %beu\n", prefix, mmap_state.desc.reserved);
+ }
+ }
+
+ if (hpp || szp) {
+ if (!am.is_initialized) {
+ init_atoms();
+ }
+
+ res = NIL;
+ if (mmap_state.supercarrier) {
+ add_2tup(hpp, szp, &res, am.scmgc,
+ erts_bld_uint(hpp,szp, mmap_state.desc.reserved));
+ add_2tup(hpp, szp, &res, am.scrpm, scrpm);
+ add_2tup(hpp, szp, &res, am.sco, sco);
+ }
+ add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs));
+ }
+ return res;
+}
+
+
+Eterm erts_mmap_debug_info(Process* p)
+{
+ if (mmap_state.supercarrier) {
+ ERTS_DECL_AM(sabot);
+ ERTS_DECL_AM(satop);
+ ERTS_DECL_AM(suabot);
+ ERTS_DECL_AM(suatop);
+ Eterm sa_list, sua_list, list;
+ Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop };
+ UWord values[4];
+ Eterm *hp, *hp_end;
+ Uint may_need;
+ const Uint PTR_BIG_SZ = HALFWORD_HEAP ? 3 : 2;
+
+ erts_smp_mtx_lock(&mmap_state.mtx);
+ values[0] = (UWord)mmap_state.sa.bot;
+ values[1] = (UWord)mmap_state.sa.top;
+ values[2] = (UWord)mmap_state.sua.bot;
+ values[3] = (UWord)mmap_state.sua.top;
+ sa_list = build_free_seg_list(p, &mmap_state.sa.map);
+ sua_list = build_free_seg_list(p, &mmap_state.sua.map);
+ erts_smp_mtx_unlock(&mmap_state.mtx);
+
+ may_need = 4*(2+3+PTR_BIG_SZ) + 2*(2+3);
+ hp = HAlloc(p, may_need);
+ hp_end = hp + may_need;
+
+ list = erts_bld_atom_uword_2tup_list(&hp, NULL,
+ sizeof(values)/sizeof(*values),
+ tags, values);
+
+ sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3;
+ sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3;
+ list = CONS(hp, sua_list, list); hp+=2;
+ list = CONS(hp, sa_list, list); hp+=2;
+
+ ASSERT(hp <= hp_end);
+ HRelease(p, hp_end, hp);
+ return list;
+ }
+ else {
+ return am_undefined;
+ }
+}
+
+
+/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
+ * Debug functions *
+\* */
+
+
+#ifdef HARD_DEBUG
+
+static int rbt_assert_is_member(RBTNode* root, RBTNode* node)
+{
+ while (node != root) {
+ RBT_ASSERT(parent(node));
+ RBT_ASSERT(parent(node)->left == node || parent(node)->right == node);
+ node = parent(node);
+ }
+ return 1;
+}
+
+
+#if 0
+# define PRINT_TREE
+#else
+# undef PRINT_TREE
+#endif
+
+#ifdef PRINT_TREE
+static void print_tree(enum SortOrder order, RBTNode*);
+#endif
+
+/*
+ * Checks that the order between parent and children are correct,
+ * and that the Red-Black Tree properies are satisfied. if size > 0,
+ * check_tree() returns the node that satisfies "address order first fit"
+ *
+ * The Red-Black Tree properies are:
+ * 1. Every node is either red or black.
+ * 2. Every leaf (NIL) is black.
+ * 3. If a node is red, then both its children are black.
+ * 4. Every simple path from a node to a descendant leaf
+ * contains the same number of black nodes.
+ *
+ */
+
+struct check_arg_t {
+ RBTree* tree;
+ ErtsFreeSegDesc* prev_seg;
+ Uint size;
+ RBTNode *res;
+};
+static void check_node_callback(RBTNode* x, void* arg);
+
+
+static RBTNode *
+check_tree(RBTree* tree, Uint size)
+{
+ struct check_arg_t carg;
+ carg.tree = tree;
+ carg.prev_seg = NULL;
+ carg.size = size;
+ carg.res = NULL;
+
+#ifdef PRINT_TREE
+ print_tree(tree->order, tree->root);
+#endif
+
+ if (!tree->root)
+ return NULL;
+
+ RBT_ASSERT(IS_BLACK(tree->root));
+ RBT_ASSERT(!parent(tree->root));
+
+ rbt_foreach_node(tree, check_node_callback, &carg, 0);
+
+ return carg.res;
+}
+
+static void check_node_callback(RBTNode* x, void* arg)
+{
+ struct check_arg_t* a = (struct check_arg_t*) arg;
+ ErtsFreeSegDesc* seg;
+
+ if (IS_RED(x)) {
+ RBT_ASSERT(IS_BLACK(x->right));
+ RBT_ASSERT(IS_BLACK(x->left));
+ }
+
+ RBT_ASSERT(parent(x) || x == a->tree->root);
+
+ if (x->left) {
+ RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0);
+ }
+ if (x->right) {
+ RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0);
+ }
+
+ seg = node_to_desc(a->tree->order, x);
+ RBT_ASSERT(seg->start < seg->end);
+ if (a->size && (seg->end - seg->start) >= a->size) {
+ if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) {
+ a->res = x;
+ }
+ }
+ if (a->tree->order == ADDR_ORDER) {
+ RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start);
+ a->prev_seg = seg;
+ }
+}
+
+#endif /* HARD_DEBUG */
+
+
+#ifdef PRINT_TREE
+#define INDENT_STEP 2
+
+#include <stdio.h>
+
+static void
+print_tree_aux(enum SortOrder order, RBTNode *x, int indent)
+{
+ int i;
+
+ if (x) {
+ ErtsFreeSegDesc* desc = node_to_desc(order, x);
+ print_tree_aux(order, x->right, indent + INDENT_STEP);
+ for (i = 0; i < indent; i++) {
+ putc(' ', stderr);
+ }
+ fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n",
+ IS_BLACK(x) ? "BLACK" : "RED",
+ desc->end - desc->start, desc->start, desc->end, desc);
+ print_tree_aux(order, x->left, indent + INDENT_STEP);
+ }
+}
+
+
+static void
+print_tree(enum SortOrder order, RBTNode* root)
+{
+ fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]);
+ print_tree_aux(order, root, 0);
+ fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]);
+}
+
+#endif /* PRINT_TREE */
+
+
+#ifdef FREE_SEG_API_SMOKE_TEST
+
+void test_it(void)
+{
+ ErtsFreeSegMap map;
+ ErtsFreeSegDesc *desc, *under, *over, *d1, *d2;
+ const int i = 1; /* reverse addr order */
+
+ {
+ init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER);
+
+ insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+ insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ desc = lookup_free_seg(&map, 0x500);
+ ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L));
+
+ desc = lookup_free_seg(&map, 0x1500);
+ ERTS_ASSERT(desc->start == (char*)0x15000);
+
+ adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over);
+ ERTS_ASSERT(!under && !over);
+
+ adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over);
+ ERTS_ASSERT(!under && over->start == (char*)0x8000);
+
+ adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over);
+ ERTS_ASSERT(under->end == (char*)0x10000 && !over);
+
+ adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over);
+ ERTS_ASSERT(!under && !over);
+
+ adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over);
+ ERTS_ASSERT(!under && over && over->start == (char*)0x11000);
+
+ adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over);
+ ERTS_ASSERT(under && under->end == (char*)0x12000 && !over);
+
+ adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over);
+ ERTS_ASSERT(under && under->end == (char*)0x12000 &&
+ over && over->start == (char*)0x13000);
+
+ adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over);
+ ERTS_ASSERT(!under && over && over->start == (char*)0x13000);
+
+ d1 = lookup_free_seg(&map, 0x500);
+ ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L));
+
+ resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ d2 = lookup_free_seg(&map, 0x1200);
+ ERTS_ASSERT(d2 == d1);
+
+ delete_free_seg(&map, d1);
+ HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
+
+ d1 = lookup_free_seg(&map, 0x1200);
+ ERTS_ASSERT(d1->start == (char*)0x15000);
+ }
+}
+
+#endif /* FREE_SEG_API_SMOKE_TEST */
+
+
+#ifdef HARD_DEBUG_MSEG
+
+/*
+ * Debug stuff used by erl_mseg to check that it does the right thing.
+ * The reason for keeping it here is that we (ab)use the rb-tree code
+ * for keeping track of *allocated* segments.
+ */
+
+typedef struct ErtsFreeSegDesc_fake_ {
+ /*RBTNode snode; Save memory by skipping unused size tree node */
+ RBTNode anode; /* node in 'atree' */
+ union {
+ char* start;
+ struct ErtsFreeSegDesc_fake_* next_free;
+ }u;
+ char* end;
+}ErtsFreeSegDesc_fake;
+
+static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000];
+static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first;
+RBTree hard_dbg_mseg_tree;
+
+static erts_mtx_t hard_dbg_mseg_mtx;
+
+static void hard_dbg_mseg_init(void)
+{
+ ErtsFreeSegDesc_fake* p;
+
+ erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg");
+ hard_dbg_mseg_tree.root = NULL;
+ hard_dbg_mseg_tree.order = ADDR_ORDER;
+
+ p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) /
+ sizeof(*hard_dbg_mseg_desc_pool)) - 1];
+ p->u.next_free = NULL;
+ while (--p >= hard_dbg_mseg_desc_pool) {
+ p->u.next_free = (p+1);
+ }
+ hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0];
+}
+
+static ErtsFreeSegDesc* hard_dbg_alloc_desc(void)
+{
+ ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first;
+ ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors");
+ hard_dbg_mseg_desc_first = p->u.next_free;
+
+ /* Creative pointer arithmetic to return something that looks like
+ * a ErtsFreeSegDesc as long as we don't use the absent 'snode'.
+ */
+ return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode));
+}
+
+static void hard_dbg_free_desc(ErtsFreeSegDesc* desc)
+{
+ ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode;
+ memset(p, 0xfe, sizeof(*p));
+ p->u.next_free = hard_dbg_mseg_desc_first;
+ hard_dbg_mseg_desc_first = p;
+}
+
+static void check_seg_writable(void* seg, UWord sz)
+{
+ UWord* seg_end = (UWord*)((char*)seg + sz);
+ volatile UWord* p;
+ ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg));
+ ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz));
+ for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) {
+ UWord write_back = *p;
+ *p = 0xfade2b1acc;
+ *p = write_back;
+ }
+}
+
+void hard_dbg_insert_mseg(void* seg, UWord sz)
+{
+ check_seg_writable(seg, sz);
+ erts_mtx_lock(&hard_dbg_mseg_mtx);
+ {
+ ErtsFreeSegDesc *desc = hard_dbg_alloc_desc();
+ RBTNode *prev, *next;
+ desc->start = (char*)seg;
+ desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */
+ rbt_insert(&hard_dbg_mseg_tree, &desc->anode);
+ prev = rbt_prev_node(&desc->anode);
+ next = rbt_next_node(&desc->anode);
+ ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start);
+ ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end);
+ }
+ erts_mtx_unlock(&hard_dbg_mseg_mtx);
+}
+
+static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start)
+{
+ RBTNode* x = tree->root;
+
+ while (x) {
+ ErtsFreeSegDesc* desc = anode_to_desc(x);
+ if (start < desc->start) {
+ x = x->left;
+ }
+ else if (start > desc->start) {
+ ERTS_ASSERT(start > desc->end);
+ x = x->right;
+ }
+ else
+ return desc;
+ }
+ return NULL;
+}
+
+void hard_dbg_remove_mseg(void* seg, UWord sz)
+{
+ check_seg_writable(seg, sz);
+ erts_mtx_lock(&hard_dbg_mseg_mtx);
+ {
+ ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg);
+ ERTS_ASSERT(desc);
+ ERTS_ASSERT(desc->start == (char*)seg);
+ ERTS_ASSERT(desc->end == (char*)seg + sz - 1);
+
+ rbt_delete(&hard_dbg_mseg_tree, &desc->anode);
+ hard_dbg_free_desc(desc);
+ }
+ erts_mtx_unlock(&hard_dbg_mseg_mtx);
+}
+
+#endif /* HARD_DEBUG_MSEG */
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
new file mode 100644
index 0000000000..e6934dbb26
--- /dev/null
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -0,0 +1,134 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2013. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ERL_MMAP_H__
+#define ERL_MMAP_H__
+
+#include "sys.h"
+
+#define ERTS_MMAP_SUPERALIGNED_BITS (18)
+/* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
+
+#define ERTS_MMAPFLG_OS_ONLY (((Uint32) 1) << 0)
+#define ERTS_MMAPFLG_SUPERCARRIER_ONLY (((Uint32) 1) << 1)
+#define ERTS_MMAPFLG_SUPERALIGNED (((Uint32) 1) << 2)
+
+#define ERTS_HAVE_ERTS_OS_MMAP (1 << 0)
+#define ERTS_HAVE_ERTS_SUPERCARRIER_MMAP (1 << 1)
+extern int erts_have_erts_mmap;
+extern UWord erts_page_inv_mask;
+
+typedef struct {
+ struct {
+ char *start;
+ char *end;
+ } virtual_range;
+ struct {
+ char *start;
+ char *end;
+ } predefined_area;
+ UWord scs; /* super carrier size */
+ int sco; /* super carrier only? */
+ Uint scmgc; /* super carrier: max guaranteed (number of) carriers */
+ int scrpm;
+}ErtsMMapInit;
+
+#define ERTS_MMAP_INIT_DEFAULT_INITER \
+ {{NULL, NULL}, {NULL, NULL}, 0, 1, (1 << 16), 1}
+
+void *erts_mmap(Uint32 flags, UWord *sizep);
+void erts_munmap(Uint32 flags, void *ptr, UWord size);
+void *erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep);
+int erts_mmap_in_supercarrier(void *ptr);
+void erts_mmap_init(ErtsMMapInit*);
+struct erts_mmap_info_struct
+{
+ UWord sizes[6];
+ UWord segs[6];
+ UWord os_used;
+};
+Eterm erts_mmap_info(int *print_to_p, void *print_to_arg,
+ Eterm** hpp, Uint* szp, struct erts_mmap_info_struct*);
+Eterm erts_mmap_info_options(char *prefix, int *print_to_p, void *print_to_arg,
+ Uint **hpp, Uint *szp);
+struct process;
+Eterm erts_mmap_debug_info(struct process*);
+
+#define ERTS_SUPERALIGNED_SIZE \
+ (1 << ERTS_MMAP_SUPERALIGNED_BITS)
+#define ERTS_INV_SUPERALIGNED_MASK \
+ ((UWord) (ERTS_SUPERALIGNED_SIZE - 1))
+#define ERTS_SUPERALIGNED_MASK \
+ (~ERTS_INV_SUPERALIGNED_MASK)
+#define ERTS_SUPERALIGNED_FLOOR(X) \
+ (((UWord) (X)) & ERTS_SUPERALIGNED_MASK)
+#define ERTS_SUPERALIGNED_CEILING(X) \
+ ERTS_SUPERALIGNED_FLOOR((X) + ERTS_INV_SUPERALIGNED_MASK)
+#define ERTS_IS_SUPERALIGNED(X) \
+ (((UWord) (X) & ERTS_INV_SUPERALIGNED_MASK) == 0)
+
+#define ERTS_INV_PAGEALIGNED_MASK \
+ (erts_page_inv_mask)
+#define ERTS_PAGEALIGNED_MASK \
+ (~ERTS_INV_PAGEALIGNED_MASK)
+#define ERTS_PAGEALIGNED_FLOOR(X) \
+ (((UWord) (X)) & ERTS_PAGEALIGNED_MASK)
+#define ERTS_PAGEALIGNED_CEILING(X) \
+ ERTS_PAGEALIGNED_FLOOR((X) + ERTS_INV_PAGEALIGNED_MASK)
+#define ERTS_IS_PAGEALIGNED(X) \
+ (((UWord) (X) & ERTS_INV_PAGEALIGNED_MASK) == 0)
+#define ERTS_PAGEALIGNED_SIZE \
+ (ERTS_INV_PAGEALIGNED_MASK + 1)
+
+#ifndef HAVE_MMAP
+# define HAVE_MMAP 0
+#endif
+#ifndef HAVE_MREMAP
+# define HAVE_MREMAP 0
+#endif
+#if HAVE_MMAP
+# define ERTS_HAVE_OS_MMAP 1
+# define ERTS_HAVE_GENUINE_OS_MMAP 1
+# if HAVE_MREMAP
+# define ERTS_HAVE_OS_MREMAP 1
+# endif
+# if defined(MAP_FIXED) && defined(MAP_NORESERVE)
+# define ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION 1
+# endif
+#endif
+
+#ifndef HAVE_VIRTUALALLOC
+# define HAVE_VIRTUALALLOC 0
+#endif
+#if HAVE_VIRTUALALLOC
+# define ERTS_HAVE_OS_MMAP 1
+#endif
+
+/*#define HARD_DEBUG_MSEG*/
+#ifdef HARD_DEBUG_MSEG
+# define HARD_DBG_INSERT_MSEG hard_dbg_insert_mseg
+# define HARD_DBG_REMOVE_MSEG hard_dbg_remove_mseg
+void hard_dbg_insert_mseg(void* seg, UWord sz);
+void hard_dbg_remove_mseg(void* seg, UWord sz);
+#else
+# define HARD_DBG_INSERT_MSEG(SEG,SZ)
+# define HARD_DBG_REMOVE_MSEG(SEG,SZ)
+#endif
+
+#endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c
index 2748edba02..94a381e168 100644
--- a/erts/emulator/sys/common/erl_mseg.c
+++ b/erts/emulator/sys/common/erl_mseg.c
@@ -100,45 +100,6 @@ static int atoms_initialized;
typedef struct mem_kind_t MemKind;
-#if HALFWORD_HEAP
-static int initialize_pmmap(void);
-static void *pmmap(size_t size);
-static int pmunmap(void *p, size_t size);
-static void *pmremap(void *old_address, size_t old_size,
- size_t new_size);
-#endif
-
-#if HAVE_MMAP
-/* Mmap ... */
-
-#define MMAP_PROT (PROT_READ|PROT_WRITE)
-
-
-#ifdef MAP_ANON
-# define MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
-# define MMAP_FD (-1)
-#else
-# define MMAP_FLAGS (MAP_PRIVATE)
-# define MMAP_FD mmap_fd
-static int mmap_fd;
-#endif
-
-#if HAVE_MREMAP
-# define HAVE_MSEG_RECREATE 1
-#else
-# define HAVE_MSEG_RECREATE 0
-#endif
-
-#if HALFWORD_HEAP
-#define CAN_PARTLY_DESTROY 0
-#else
-#define CAN_PARTLY_DESTROY 1
-#endif
-#else /* #if HAVE_MMAP */
-#define CAN_PARTLY_DESTROY 0
-#error "Not supported"
-#endif /* #if HAVE_MMAP */
-
const ErtsMsegOpt_t erts_mseg_default_opt = {
1, /* Use cache */
1, /* Preserv data */
@@ -163,9 +124,7 @@ typedef struct {
CallCounter create;
CallCounter create_resize;
CallCounter destroy;
-#if HAVE_MSEG_RECREATE
CallCounter recreate;
-#endif
CallCounter clear_cache;
CallCounter check_cache;
} ErtsMsegCalls;
@@ -173,7 +132,7 @@ typedef struct {
typedef struct cache_t_ cache_t;
struct cache_t_ {
- Uint size;
+ UWord size;
void *seg;
cache_t *next;
cache_t *prev;
@@ -236,11 +195,6 @@ struct ErtsMsegAllctr_t_ {
Uint rel_max_cache_bad_fit;
ErtsMsegCalls calls;
-
-#if CAN_PARTLY_DESTROY
- Uint min_seg_size;
-#endif
-
};
typedef union {
@@ -344,69 +298,31 @@ schedule_cache_check(ErtsMsegAllctr_t *ma) {
}
}
-/* remove ErtsMsegAllctr_t from arguments?
- * only used for statistics
- */
-static ERTS_INLINE void *
-mmap_align(ErtsMsegAllctr_t *ma, void *addr, size_t length, int prot, int flags, int fd, off_t offset) {
-
- char *p, *q;
- UWord d;
-
- p = mmap(addr, length, prot, flags, fd, offset);
-
- if (MAP_IS_ALIGNED(p) || p == MAP_FAILED)
- return p;
-
- if (ma)
- INC_CC(ma, create_resize);
-
- munmap(p, length);
-
- if ((p = mmap(addr, length + MSEG_ALIGNED_SIZE, prot, flags, fd, offset)) == MAP_FAILED)
- return MAP_FAILED;
-
- q = (void *)ALIGNED_CEILING((char *)p);
- d = (UWord)(q - p);
-
- if (d > 0)
- munmap(p, d);
-
- if (MSEG_ALIGNED_SIZE - d > 0)
- munmap((void *)(q + length), MSEG_ALIGNED_SIZE - d);
-
- return q;
-}
+/* #define ERTS_PRINT_ERTS_MMAP */
static ERTS_INLINE void *
-mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size)
+mseg_create(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, UWord *sizep)
{
+#ifdef ERTS_PRINT_ERTS_MMAP
+ UWord req_size = *sizep;
+#endif
void *seg;
- ASSERT(size % MSEG_ALIGNED_SIZE == 0);
-
+ Uint32 mmap_flags = 0;
#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- seg = pmmap(size);
- if ((unsigned long) seg & CHECK_POINTER_MASK) {
- erts_fprintf(stderr,"Pointer mask failure (0x%08lx)\n",(unsigned long) seg);
- return NULL;
- }
- } else
+ mmap_flags |= ((mk == &ma->low_mem)
+ ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
+ : ERTS_MMAPFLG_OS_ONLY);
#endif
- {
-#if HAVE_MMAP
- {
- seg = (void *) mmap_align(ma, (void *) 0, (size_t) size,
- MMAP_PROT, MMAP_FLAGS, MMAP_FD, 0);
- if (seg == (void *) MAP_FAILED)
- seg = NULL;
-
- ASSERT(MAP_IS_ALIGNED(seg) || !seg);
- }
-#else
-# error "Missing mseg_create() implementation"
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
+
+ seg = erts_mmap(mmap_flags, sizep);
+
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "%p = erts_mmap(%s, {%bpu, %bpu});\n", seg,
+ (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ req_size, *sizep);
#endif
- }
INC_CC(ma, create);
@@ -414,91 +330,55 @@ mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size)
}
static ERTS_INLINE void
-mseg_destroy(ErtsMsegAllctr_t *ma, MemKind* mk, void *seg, Uint size) {
- ERTS_DECLARE_DUMMY(int res);
-
+mseg_destroy(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *seg_p, UWord size) {
+
+ Uint32 mmap_flags = 0;
#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- res = pmunmap((void *) seg, size);
- }
- else
+ mmap_flags |= ((mk == &ma->low_mem)
+ ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
+ : ERTS_MMAPFLG_OS_ONLY);
#endif
- {
-#ifdef HAVE_MMAP
- res = munmap((void *) seg, size);
-#else
-# error "Missing mseg_destroy() implementation"
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
+
+ erts_munmap(mmap_flags, seg_p, size);
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "erts_munmap(%s, %p, %bpu);\n",
+ (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ seg_p, *size);
#endif
- }
-
- ASSERT(size % MSEG_ALIGNED_SIZE == 0);
- ASSERT(res == 0);
-
INC_CC(ma, destroy);
}
-#if HAVE_MSEG_RECREATE
-#if defined(__NetBSD__)
-#define MREMAP_FLAGS (0)
-#else
-#define MREMAP_FLAGS (MREMAP_MAYMOVE)
-#endif
-
-
-/* mseg_recreate
- * May return *unaligned* segments as in address not aligned to MSEG_ALIGNMENT
- * it is still page aligned
- *
- * This is fine for single block carriers as long as we don't cache misaligned
- * segments (since multiblock carriers may use them)
- *
- * For multiblock carriers we *need* MSEG_ALIGNMENT but mbc's will never be
- * reallocated.
- *
- * This should probably be fixed the following way:
- * 1) Use an option to segment allocation - NEED_ALIGNMENT
- * 2) Add mremap_align which takes care of aligning a new a mremaped area
- * 3) Fix the cache to handle of aligned and unaligned segments
- */
-
static ERTS_INLINE void *
-mseg_recreate(ErtsMsegAllctr_t *ma, MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
+mseg_recreate(ErtsMsegAllctr_t *ma, Uint flags, MemKind* mk, void *old_seg, UWord old_size, UWord *sizep)
{
+#ifdef ERTS_PRINT_ERTS_MMAP
+ UWord req_size = *sizep;
+#endif
void *new_seg;
-
- ASSERT(old_size % MSEG_ALIGNED_SIZE == 0);
- ASSERT(new_size % MSEG_ALIGNED_SIZE == 0);
-
+ Uint32 mmap_flags = 0;
#if HALFWORD_HEAP
- if (mk == &ma->low_mem) {
- new_seg = (void *) pmremap((void *) old_seg,
- (size_t) old_size,
- (size_t) new_size);
- }
- else
+ mmap_flags |= ((mk == &ma->low_mem)
+ ? ERTS_MMAPFLG_SUPERCARRIER_ONLY
+ : ERTS_MMAPFLG_OS_ONLY);
#endif
- {
-#if HAVE_MREMAP
-#if defined(__NetBSD__)
- new_seg = mremap(old_seg, (size_t)old_size, NULL, new_size, MREMAP_FLAGS);
-#else
- new_seg = mremap(old_seg, (size_t)old_size, (size_t)new_size, MREMAP_FLAGS);
-#endif
- if (new_seg == (void *) MAP_FAILED)
- new_seg = NULL;
-#else
-#error "Missing mseg_recreate() implementation"
-#endif
- }
+ if (MSEG_FLG_IS_2POW(flags))
+ mmap_flags |= ERTS_MMAPFLG_SUPERALIGNED;
+ new_seg = erts_mremap(mmap_flags, old_seg, old_size, sizep);
+
+#ifdef ERTS_PRINT_ERTS_MMAP
+ erts_fprintf(stderr, "%p = erts_mremap(%s, %p, %bpu, {%bpu, %bpu});\n",
+ new_seg, (mmap_flags & ERTS_MMAPFLG_SUPERALIGNED) ? "sa" : "sua",
+ old_seg, old_size, req_size, *sizep);
+#endif
INC_CC(ma, recreate);
return new_seg;
}
-#endif /* #if HAVE_MSEG_RECREATE */
-
#ifdef DEBUG
#define ERTS_DBG_MA_CHK_THR_ACCESS(MA) \
do { \
@@ -528,7 +408,7 @@ static ERTS_INLINE void mseg_cache_clear_node(cache_t *c) {
c->prev = c;
}
-static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Uint flags) {
+static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, UWord size, Uint flags) {
cache_t *c;
ERTS_DBG_MK_CHK_THR_ACCESS(mk);
@@ -566,14 +446,13 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
return 1;
} else if (!MSEG_FLG_IS_2POW(flags) && !erts_circleq_is_empty(&(mk->cache_unpowered_node))) {
-
/* No free slots.
* Evict oldest slot from unpowered cache so we can cache an unpowered (sbc) segment */
c = erts_circleq_tail(&(mk->cache_unpowered_node));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(mk->ma, ERTS_MSEG_FLG_NONE, mk, c->seg, c->size);
mseg_cache_clear_node(c);
c->seg = seg;
@@ -599,7 +478,8 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
c = erts_circleq_tail(&(mk->cache_powered_node[i]));
erts_circleq_remove(c);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, c->seg, c->size);
+
mseg_cache_clear_node(c);
c->seg = seg;
@@ -614,18 +494,18 @@ static ERTS_INLINE int cache_bless_segment(MemKind *mk, void *seg, Uint size, Ui
return 0;
}
-static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags) {
+static ERTS_INLINE void *cache_get_segment(MemKind *mk, UWord *size_p, Uint flags) {
- Uint size = *size_p;
+ UWord size = *size_p;
ERTS_DBG_MK_CHK_THR_ACCESS(mk);
if (MSEG_FLG_IS_2POW(flags)) {
int i, ix = SIZE_TO_CACHE_AREA_IDX(size);
- void *seg;
+ char *seg;
cache_t *c;
- Uint csize;
+ UWord csize;
ASSERT(IS_2POW(size));
@@ -641,7 +521,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
ASSERT(MAP_IS_ALIGNED(c->seg));
csize = c->size;
- seg = c->seg;
+ seg = (char*) c->seg;
mk->cache_size--;
mk->cache_hits++;
@@ -652,9 +532,8 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
ASSERT(!(mk->cache_size < 0));
- if (csize != size) {
- mseg_destroy(mk->ma, mk, (char *)seg + size, csize - size);
- }
+ if (csize != size)
+ mseg_destroy(mk->ma, ERTS_MSEG_FLG_2POW, mk, seg + size, csize - size);
return seg;
}
@@ -663,10 +542,10 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
void *seg;
cache_t *c;
cache_t *best = NULL;
- Uint bdiff = 0;
- Uint csize;
- Uint bad_max_abs = mk->ma->abs_max_cache_bad_fit;
- Uint bad_max_rel = mk->ma->rel_max_cache_bad_fit;
+ UWord bdiff = 0;
+ UWord csize;
+ UWord bad_max_abs = mk->ma->abs_max_cache_bad_fit;
+ UWord bad_max_rel = mk->ma->rel_max_cache_bad_fit;
erts_circleq_foreach(c, &(mk->cache_unpowered_node)) {
csize = c->size;
@@ -728,7 +607,7 @@ static ERTS_INLINE void *cache_get_segment(MemKind *mk, Uint *size_p, Uint flags
* using callbacks from aux-work in the scheduler.
*/
-static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) {
cache_t *c = NULL;
c = erts_circleq_tail(head);
@@ -737,7 +616,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(mk->ma, flags, mk, c->seg, c->size);
mseg_cache_clear_node(c);
erts_circleq_push_head(&(mk->cache_free), c);
@@ -749,7 +628,7 @@ static ERTS_INLINE Uint mseg_drop_one_memkind_cache_size(MemKind *mk, cache_t *h
return mk->cache_size;
}
-static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head) {
+static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, Uint flags, cache_t *head) {
cache_t *c = NULL;
while (!erts_circleq_is_empty(head)) {
@@ -760,7 +639,7 @@ static ERTS_INLINE Uint mseg_drop_memkind_cache_size(MemKind *mk, cache_t *head)
if (erts_mtrace_enabled)
erts_mtrace_crr_free(SEGTYPE, SEGTYPE, c->seg);
- mseg_destroy(mk->ma, mk, c->seg, c->size);
+ mseg_destroy(mk->ma, flags, mk, c->seg, c->size);
mseg_cache_clear_node(c);
erts_circleq_push_head(&(mk->cache_free), c);
@@ -788,11 +667,11 @@ static Uint mseg_check_memkind_cache(MemKind *mk) {
for (i = 0; i < CACHE_AREAS; i++) {
if (!erts_circleq_is_empty(&(mk->cache_powered_node[i])))
- return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_powered_node[i]));
+ return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i]));
}
if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- return mseg_drop_one_memkind_cache_size(mk, &(mk->cache_unpowered_node));
+ return mseg_drop_one_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node));
return 0;
}
@@ -851,12 +730,12 @@ static void mseg_clear_memkind_cache(MemKind *mk) {
if (erts_circleq_is_empty(&(mk->cache_powered_node[i])))
continue;
- mseg_drop_memkind_cache_size(mk, &(mk->cache_powered_node[i]));
+ mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_2POW, &(mk->cache_powered_node[i]));
ASSERT(erts_circleq_is_empty(&(mk->cache_powered_node[i])));
}
/* drop varied caches */
if (!erts_circleq_is_empty(&(mk->cache_unpowered_node)))
- mseg_drop_memkind_cache_size(mk, &(mk->cache_unpowered_node));
+ mseg_drop_memkind_cache_size(mk, ERTS_MSEG_FLG_NONE, &(mk->cache_unpowered_node));
ASSERT(erts_circleq_is_empty(&(mk->cache_unpowered_node)));
ASSERT(mk->cache_size == 0);
@@ -896,36 +775,35 @@ static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma,
}
static void *
-mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, Uint *size_p,
+mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, UWord *size_p,
Uint flags, const ErtsMsegOpt_t *opt)
{
- Uint size;
+ UWord size;
void *seg;
MemKind* mk = memkind(ma, opt);
INC_CC(ma, alloc);
- /* Carrier align */
- size = ALIGNED_CEILING(*size_p);
-
- /* Cache optim (if applicable) */
- if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(size))
- size = ceil_2pow(size);
+ if (!MSEG_FLG_IS_2POW(flags))
+ size = ERTS_PAGEALIGNED_CEILING(*size_p);
+ else {
+ size = ALIGNED_CEILING(*size_p);
+ if (!IS_2POW(size)) {
+ /* Cache optim (if applicable) */
+ size = ceil_2pow(size);
+ }
+ }
-#if CAN_PARTLY_DESTROY
- if (size < ma->min_seg_size)
- ma->min_seg_size = size;
-#endif
-
if (opt->cache && mk->cache_size > 0 && (seg = cache_get_segment(mk, &size, flags)) != NULL)
goto done;
- if ((seg = mseg_create(ma, mk, size)) == NULL)
- size = 0;
+ seg = mseg_create(ma, flags, mk, &size);
+ if (!seg)
+ *size_p = 0;
+ else {
done:
- *size_p = size;
- if (seg) {
+ *size_p = size;
if (erts_mtrace_enabled)
erts_mtrace_crr_alloc(seg, atype, ERTS_MTRACE_SEGMENT_ID, size);
@@ -937,7 +815,7 @@ done:
static void
-mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size,
+mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, UWord size,
Uint flags, const ErtsMsegOpt_t *opt)
{
MemKind* mk = memkind(ma, opt);
@@ -952,7 +830,7 @@ mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size,
if (erts_mtrace_enabled)
erts_mtrace_crr_free(atype, SEGTYPE, seg);
- mseg_destroy(ma, mk, seg, size);
+ mseg_destroy(ma, flags, mk, seg, size);
done:
@@ -961,11 +839,11 @@ done:
static void *
mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p, Uint flags, const ErtsMsegOpt_t *opt)
+ UWord old_size, UWord *new_size_p, Uint flags, const ErtsMsegOpt_t *opt)
{
MemKind* mk;
void *new_seg;
- Uint new_size;
+ UWord new_size;
/* Just allocate a new segment if we didn't have one before */
if (!seg || !old_size) {
@@ -985,90 +863,47 @@ mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
mk = memkind(ma, opt);
new_seg = seg;
- /* Carrier align */
- new_size = ALIGNED_CEILING(*new_size_p);
-
- /* Cache optim (if applicable) */
- if (MSEG_FLG_IS_2POW(flags) && !IS_2POW(new_size))
- new_size = ceil_2pow(new_size);
-
- if (new_size == old_size)
- ;
- else if (new_size < old_size) {
- Uint shrink_sz = old_size - new_size;
+ if (!MSEG_FLG_IS_2POW(flags))
+ new_size = ERTS_PAGEALIGNED_CEILING(*new_size_p);
+ else {
+ new_size = ALIGNED_CEILING(*new_size_p);
+ if (!IS_2POW(new_size)) {
+ /* Cache optim (if applicable) */
+ new_size = ceil_2pow(new_size);
+ }
+ }
-#if CAN_PARTLY_DESTROY
- if (new_size < ma->min_seg_size)
- ma->min_seg_size = new_size;
-#endif
- /* +M<S>rsbcst <ratio> */
- if (shrink_sz < opt->abs_shrink_th
- && 100*shrink_sz < opt->rel_shrink_th*old_size) {
- new_size = old_size;
+ if (new_size > old_size) {
+ if (opt->preserv) {
+ new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size);
+ if (!new_seg)
+ new_size = old_size;
}
else {
-
-#if CAN_PARTLY_DESTROY
-
- if (erts_mtrace_enabled)
- erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
-
- mseg_destroy(ma, mk, ((char *) seg) + new_size, shrink_sz);
-
-#elif HAVE_MSEG_RECREATE
- goto do_recreate;
-#else
+ mseg_dealloc(ma, atype, seg, old_size, flags, opt);
new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
-
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
-
if (!new_seg)
- new_size = old_size;
- else {
- sys_memcpy(((char *) new_seg),
- ((char *) seg),
- MIN(new_size, old_size));
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- }
-#endif
+ new_size = 0;
}
}
- else {
+ else if (new_size < old_size) {
+ UWord shrink_sz = old_size - new_size;
- if (!opt->preserv) {
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
+ /* +M<S>rsbcst <ratio> */
+ if (shrink_sz < opt->abs_shrink_th
+ && 100*shrink_sz < opt->rel_shrink_th*old_size) {
+ new_size = old_size;
}
else {
-#if HAVE_MSEG_RECREATE
-#if !CAN_PARTLY_DESTROY
- do_recreate:
-#endif
- new_seg = mseg_recreate(ma, mk, (void *) seg, old_size, new_size);
- /* ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
- * will not always be aligned and it ok for now
- */
-
- if (erts_mtrace_enabled)
- erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
+ new_seg = mseg_recreate(ma, flags, mk, (void *) seg, old_size, &new_size);
if (!new_seg)
new_size = old_size;
-#else
- new_seg = mseg_alloc(ma, atype, &new_size, flags, opt);
-
- ASSERT(MAP_IS_ALIGNED(new_seg) || !new_seg);
-
- if (!new_seg)
- new_size = old_size;
- else {
- sys_memcpy(((char *) new_seg), ((char *) seg), MIN(new_size, old_size));
- mseg_dealloc(ma, atype, seg, old_size, flags, opt);
- }
-#endif
}
}
+ if (erts_mtrace_enabled)
+ erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
+
INC_CC(ma, realloc);
ASSERT(!MSEG_FLG_IS_2POW(flags) || IS_2POW(new_size));
@@ -1106,9 +941,7 @@ static struct {
Eterm mseg_create;
Eterm mseg_create_resize;
Eterm mseg_destroy;
-#if HAVE_MSEG_RECREATE
Eterm mseg_recreate;
-#endif
Eterm mseg_clear_cache;
Eterm mseg_check_cache;
@@ -1129,8 +962,6 @@ init_atoms(ErtsMsegAllctr_t *ma)
#ifdef DEBUG
Eterm *atom;
#endif
-
- ERTS_MSEG_UNLOCK(ma);
erts_mtx_lock(&init_atoms_mutex);
if (!atoms_initialized) {
@@ -1163,9 +994,7 @@ init_atoms(ErtsMsegAllctr_t *ma)
AM_INIT(mseg_create);
AM_INIT(mseg_create_resize);
AM_INIT(mseg_destroy);
-#if HAVE_MSEG_RECREATE
AM_INIT(mseg_recreate);
-#endif
AM_INIT(mseg_clear_cache);
AM_INIT(mseg_check_cache);
@@ -1176,7 +1005,6 @@ init_atoms(ErtsMsegAllctr_t *ma)
#endif
}
- ERTS_MSEG_LOCK(ma);
atoms_initialized = 1;
erts_mtx_unlock(&init_atoms_mutex);
}
@@ -1239,7 +1067,9 @@ info_options(ErtsMsegAllctr_t *ma,
Uint **hpp,
Uint *szp)
{
- Eterm res = THE_NON_VALUE;
+ Eterm res;
+
+ res = erts_mmap_info_options(prefix, print_to_p, print_to_arg, hpp, szp);
if (print_to_p) {
int to = *print_to_p;
@@ -1254,7 +1084,6 @@ info_options(ErtsMsegAllctr_t *ma,
if (!atoms_initialized)
init_atoms(ma);
- res = NIL;
add_2tup(hpp, szp, &res,
am.mcs,
bld_uint(hpp, szp, ma->max_cache_size));
@@ -1293,9 +1122,7 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
PRINT_CC(to, arg, create);
PRINT_CC(to, arg, create_resize);
PRINT_CC(to, arg, destroy);
-#if HAVE_MSEG_RECREATE
PRINT_CC(to, arg, recreate);
-#endif
PRINT_CC(to, arg, clear_cache);
PRINT_CC(to, arg, check_cache);
@@ -1316,12 +1143,10 @@ info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp
bld_unstable_uint(hpp, szp, ma->calls.clear_cache.giga_no),
bld_unstable_uint(hpp, szp, ma->calls.clear_cache.no));
-#if HAVE_MSEG_RECREATE
add_3tup(hpp, szp, &res,
am.mseg_recreate,
bld_unstable_uint(hpp, szp, ma->calls.recreate.giga_no),
bld_unstable_uint(hpp, szp, ma->calls.recreate.no));
-#endif
add_3tup(hpp, szp, &res,
am.mseg_destroy,
bld_unstable_uint(hpp, szp, ma->calls.destroy.giga_no),
@@ -1465,14 +1290,8 @@ erts_mseg_info_options(int ix,
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix);
Eterm res;
- ERTS_MSEG_LOCK(ma);
-
- ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-
res = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
- ERTS_MSEG_UNLOCK(ma);
-
return res;
}
@@ -1490,10 +1309,6 @@ erts_mseg_info(int ix,
Eterm values[4];
Uint n = 0;
- ERTS_MSEG_LOCK(ma);
-
- ERTS_DBG_MA_CHK_THR_ACCESS(ma);
-
if (hpp || szp) {
if (!atoms_initialized)
@@ -1506,6 +1321,10 @@ erts_mseg_info(int ix,
}
values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
+
+ ERTS_MSEG_LOCK(ma);
+ ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+
#if HALFWORD_HEAP
values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
@@ -1521,7 +1340,7 @@ erts_mseg_info(int ix,
}
void *
-erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMsegOpt_t *opt)
+erts_mseg_alloc_opt(ErtsAlcType_t atype, UWord *size_p, Uint flags, const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
void *seg;
@@ -1529,20 +1348,23 @@ erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, Uint flags, const ErtsMse
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
seg = mseg_alloc(ma, atype, size_p, flags, opt);
ERTS_MSEG_UNLOCK(ma);
+ HARD_DBG_INSERT_MSEG(seg, *size_p);
return seg;
}
void *
-erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p, Uint flags)
+erts_mseg_alloc(ErtsAlcType_t atype, UWord *size_p, Uint flags)
{
return erts_mseg_alloc_opt(atype, size_p, flags, &erts_mseg_default_opt);
}
void
erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg,
- Uint size, Uint flags, const ErtsMsegOpt_t *opt)
+ UWord size, Uint flags, const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
+
+ HARD_DBG_REMOVE_MSEG(seg, size);
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
mseg_dealloc(ma, atype, seg, size, flags, opt);
@@ -1550,29 +1372,32 @@ erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg,
}
void
-erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size, Uint flags)
+erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, UWord size, Uint flags)
{
erts_mseg_dealloc_opt(atype, seg, size, flags, &erts_mseg_default_opt);
}
void *
erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p,
+ UWord old_size, UWord *new_size_p,
Uint flags,
const ErtsMsegOpt_t *opt)
{
ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
void *new_seg;
+
+ HARD_DBG_REMOVE_MSEG(seg, old_size);
ERTS_MSEG_LOCK(ma);
ERTS_DBG_MA_CHK_THR_ACCESS(ma);
new_seg = mseg_realloc(ma, atype, seg, old_size, new_size_p, flags, opt);
ERTS_MSEG_UNLOCK(ma);
+ HARD_DBG_INSERT_MSEG(new_seg, *new_size_p);
return new_seg;
}
void *
erts_mseg_realloc(ErtsAlcType_t atype, void *seg,
- Uint old_size, Uint *new_size_p, Uint flags)
+ UWord old_size, UWord *new_size_p, Uint flags)
{
return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p,
flags, &erts_mseg_default_opt);
@@ -1662,16 +1487,17 @@ erts_mseg_init(ErtsMsegInit_t *init)
erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
-#if HAVE_MMAP && !defined(MAP_ANON)
- mmap_fd = open("/dev/zero", O_RDWR);
- if (mmap_fd < 0)
- erl_exit(ERTS_ABORT_EXIT, "erts_mseg: unable to open /dev/zero\n");
-#endif
+#if HALFWORD_HEAP
+ if (sizeof(void *) != 8)
+ erl_exit(-1,"Halfword emulator cannot be run in 32bit mode");
-#if HAVE_MMAP && HALFWORD_HEAP
- initialize_pmmap();
+ init->mmap.virtual_range.start = (char *) sbrk(0);
+ init->mmap.virtual_range.end = (char *) 0x100000000UL;
+ init->mmap.sco = 0;
#endif
+ erts_mmap_init(&init->mmap);
+
if (!IS_2POW(GET_PAGE_SIZE))
erl_exit(ERTS_ABORT_EXIT, "erts_mseg: Unexpected page_size %beu\n", GET_PAGE_SIZE);
@@ -1712,10 +1538,6 @@ erts_mseg_init(ErtsMsegInit_t *init)
#endif
sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls));
-
-#if CAN_PARTLY_DESTROY
- ma->min_seg_size = ~((Uint) 0);
-#endif
}
}
@@ -1757,7 +1579,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
case 0x400: /* Have erts_mseg */
return (UWord) 1;
case 0x401:
- return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (Uint *) a1, (Uint) 0);
+ return (UWord) erts_mseg_alloc(ERTS_ALC_A_INVALID, (UWord *) a1, (Uint) 0);
case 0x402:
erts_mseg_dealloc(ERTS_ALC_A_INVALID, (void *) a1, (Uint) a2, (Uint) 0);
return (UWord) 0;
@@ -1765,7 +1587,7 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
return (UWord) erts_mseg_realloc(ERTS_ALC_A_INVALID,
(void *) a1,
(Uint) a2,
- (Uint *) a3,
+ (UWord *) a3,
(Uint) 0);
case 0x404:
erts_mseg_clear_cache();
@@ -1788,405 +1610,3 @@ erts_mseg_test(UWord op, UWord a1, UWord a2, UWord a3)
}
}
-
-
-#if HALFWORD_HEAP
-/*
- * Very simple page oriented mmap replacer. Works in the lower
- * 32 bit address range of a 64bit program.
- * Implements anonymous mmap mremap and munmap with address order first fit.
- * The free list is expected to be very short...
- * To be used for compressed pointers in Erlang halfword emulator
- * implementation. The MacOS X version is more of a toy, it's not really
- * for production as the halfword erlang VM relies on Linux specific memory
- * mapping tricks.
- */
-
-/* #define HARDDEBUG 1 */
-
-#ifdef HARDDEBUG
-static void dump_freelist(void)
-{
- FreeBlock *p = first;
-
- while (p) {
- fprintf(stderr, "p = %p\r\np->num = %ld\r\np->next = %p\r\n\r\n",
- (void *) p, (unsigned long) p->num, (void *) p->next);
- p = p->next;
- }
-}
-
-#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) \
- fprintf(stderr,"Mapping of address %p with size %ld " \
- "does not map complete pages (%s:%d)\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-
-#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) \
- fprintf(stderr,"Mapping of address %p with size %ld " \
- "is not page aligned (%s:%d)\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-
-#define HARDDEBUG_MAP_FAILED(PTR, SZ) \
- fprintf(stderr, "Could not actually map memory " \
- "at address %p with size %ld (%s:%d) ..\r\n", \
- (void *) (PTR), (unsigned long) (SZ),__FILE__, __LINE__)
-#else
-#define HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(PTR, SZ) do{}while(0)
-#define HARDDEBUG_HW_UNALIGNED_ALIGNMENT(PTR, SZ) do{}while(0)
-#define HARDDEBUG_MAP_FAILED(PTR, SZ) do{}while(0)
-#endif
-
-
-#ifdef __APPLE__
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#define INIT_LOCK() do {erts_mtx_init(&pmmap_mutex, "pmmap");} while(0)
-
-#define TAKE_LOCK() do {erts_mtx_lock(&pmmap_mutex);} while(0)
-
-#define RELEASE_LOCK() do {erts_mtx_unlock(&pmmap_mutex);} while(0)
-
-static erts_mtx_t pmmap_mutex; /* Also needed when !USE_THREADS */
-
-typedef struct _free_block {
- unsigned long num; /*pages*/
- struct _free_block *next;
-} FreeBlock;
-
-/* Protect with lock */
-static FreeBlock *first;
-
-static void *do_map(void *ptr, size_t sz)
-{
- void *res;
-
- if (ALIGNED_CEILING(sz) != sz) {
- HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz);
- return NULL;
- }
-
- if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) {
- HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz);
- return NULL;
- }
-
-#if HAVE_MMAP
- res = mmap(ptr, sz,
- PROT_READ | PROT_WRITE, MAP_PRIVATE |
- MAP_ANONYMOUS | MAP_FIXED,
- -1 , 0);
-#else
-# error "Missing mmap support"
-#endif
-
- if (res == MAP_FAILED) {
- HARDDEBUG_MAP_FAILED(ptr, sz);
- return NULL;
- }
-
- return res;
-}
-
-static int do_unmap(void *ptr, size_t sz)
-{
- void *res;
-
- if (ALIGNED_CEILING(sz) != sz) {
- HARDDEBUG_HW_INCOMPLETE_ALIGNMENT(ptr, sz);
- return 1;
- }
-
- if (((unsigned long) ptr) % MSEG_ALIGNED_SIZE) {
- HARDDEBUG_HW_UNALIGNED_ALIGNMENT(ptr, sz);
- return 1;
- }
-
- res = mmap(ptr, sz,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_FIXED,
- -1 , 0);
-
- if (res == MAP_FAILED) {
- HARDDEBUG_MAP_FAILED(ptr, sz);
- return 1;
- }
-
- return 0;
-}
-
-#ifdef __APPLE__
-/*
- * The first 4 gig's are protected on Macos X for 64bit processes :(
- * The range 0x1000000000 - 0x10FFFFFFFF is selected as an arbitrary
- * value of a normally unused range... Real MMAP's will avoid
- * it and all 32bit compressed pointers can be in that range...
- * More expensive than on Linux where expansion of compressed
- * poiters involves no masking (as they are in the first 4 gig's).
- * It's also very uncertain if the MAP_NORESERVE flag really has
- * any effect in MacOS X. Swap space may always be allocated...
- */
-#define SET_RANGE_MIN() /* nothing */
-#define RANGE_MIN 0x1000000000UL
-#define RANGE_MAX 0x1100000000UL
-#define RANGE_MASK (RANGE_MIN)
-#define EXTRA_MAP_FLAGS (MAP_FIXED)
-#else
-static size_t range_min;
-#define SET_RANGE_MIN() do { range_min = (size_t) sbrk(0); } while (0)
-#define RANGE_MIN range_min
-#define RANGE_MAX 0x100000000UL
-#define RANGE_MASK 0UL
-#define EXTRA_MAP_FLAGS (0)
-#endif
-
-static int initialize_pmmap(void)
-{
- char *p,*q,*rptr;
- size_t rsz;
- FreeBlock *initial;
-
- SET_RANGE_MIN();
- if (sizeof(void *) != 8) {
- erl_exit(1,"Halfword emulator cannot be run in 32bit mode");
- }
-
- p = (char *) RANGE_MIN;
- q = (char *) RANGE_MAX;
-
- rsz = ALIGNED_FLOOR(q - p);
-
- rptr = mmap_align(NULL, (void *) p, rsz,
- PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS |
- MAP_NORESERVE | EXTRA_MAP_FLAGS,
- -1 , 0);
-#ifdef HARDDEBUG
- printf("p=%p, rsz = %ld, pages = %ld, got range = %p -> %p\r\n",
- p, (unsigned long) rsz, (unsigned long) (rsz / MSEG_ALIGNED_SIZE),
- (void *) rptr, (void*)(rptr + rsz));
-#endif
- if ((UWord)(rptr + rsz) > RANGE_MAX) {
- size_t rsz_trunc = RANGE_MAX - (UWord)rptr;
-#ifdef HARDDEBUG
- printf("Reducing mmap'ed memory from %lu to %lu Mb, reduced range = %p -> %p\r\n",
- rsz/(1024*1024), rsz_trunc/(1024*1024), rptr, rptr+rsz_trunc);
-#endif
- munmap((void*)RANGE_MAX, rsz - rsz_trunc);
- rsz = rsz_trunc;
- }
- if (!do_map(rptr, MSEG_ALIGNED_SIZE)) {
- erl_exit(1,"Could not actually mmap first page for halfword emulator...\n");
- }
- initial = (FreeBlock *) rptr;
- initial->num = (rsz / MSEG_ALIGNED_SIZE);
- initial->next = NULL;
- first = initial;
- INIT_LOCK();
- return 0;
-}
-
-static void *pmmap(size_t size)
-{
- size_t real_size = ALIGNED_CEILING(size);
- size_t num_pages = real_size / MSEG_ALIGNED_SIZE;
- FreeBlock **block;
- FreeBlock *tail;
- FreeBlock *res;
-
- TAKE_LOCK();
-
- for (block = &first;
- *block != NULL && (*block)->num < num_pages;
- block = &((*block)->next))
- ;
- if (!(*block)) {
- RELEASE_LOCK();
- return NULL;
- }
- if ((*block)->num == num_pages) {
- /* nice, perfect fit */
- res = *block;
- *block = (*block)->next;
- } else {
- tail = (FreeBlock *) (((char *) ((void *) (*block))) + real_size);
- if (!do_map(tail, MSEG_ALIGNED_SIZE)) {
- HARDDEBUG_MAP_FAILED(tail, MSEG_ALIGNED_SIZE);
- RELEASE_LOCK();
- return NULL;
- }
- tail->num = (*block)->num - num_pages;
- tail->next = (*block)->next;
- res = *block;
- *block = tail;
- }
-
- RELEASE_LOCK();
-
- if (!do_map(res, real_size)) {
- HARDDEBUG_MAP_FAILED(res, real_size);
- return NULL;
- }
-
- return (void *) res;
-}
-
-static int pmunmap(void *p, size_t size)
-{
- size_t real_size = ALIGNED_CEILING(size);
- size_t num_pages = real_size / MSEG_ALIGNED_SIZE;
-
- FreeBlock *block;
- FreeBlock *last;
- FreeBlock *nb = (FreeBlock *) p;
-
- ASSERT(((unsigned long)p & CHECK_POINTER_MASK)==0);
-
- if (real_size > MSEG_ALIGNED_SIZE) {
- if (do_unmap(((char *) p) + MSEG_ALIGNED_SIZE, real_size - MSEG_ALIGNED_SIZE)) {
- return 1;
- }
- }
-
- TAKE_LOCK();
-
- last = NULL;
- block = first;
- while(block != NULL && ((void *) block) < p) {
- last = block;
- block = block->next;
- }
-
- if (block != NULL &&
- ((void *) block) == ((void *) (((char *) p) + real_size))) {
- /* Merge new free block with following */
- nb->num = block->num + num_pages;
- nb->next = block->next;
- if (do_unmap(block, MSEG_ALIGNED_SIZE)) {
- RELEASE_LOCK();
- return 1;
- }
- } else {
- /* just link in */
- nb->num = num_pages;
- nb->next = block;
- }
- if (last != NULL) {
- if (p == ((void *) (((char *) last) + (last->num * MSEG_ALIGNED_SIZE)))) {
- /* Merge with previous */
- last->num += nb->num;
- last->next = nb->next;
- if (do_unmap(nb, MSEG_ALIGNED_SIZE)) {
- RELEASE_LOCK();
- return 1;
- }
- } else {
- last->next = nb;
- }
- } else {
- first = nb;
- }
- RELEASE_LOCK();
- return 0;
-}
-
-static void *pmremap(void *old_address, size_t old_size,
- size_t new_size)
-{
- size_t new_real_size = ALIGNED_CEILING(new_size);
- size_t new_num_pages = new_real_size / MSEG_ALIGNED_SIZE;
- size_t old_real_size = ALIGNED_CEILING(old_size);
- size_t old_num_pages = old_real_size / MSEG_ALIGNED_SIZE;
- if (new_num_pages == old_num_pages) {
- return old_address;
- } else if (new_num_pages < old_num_pages) { /* Shrink */
- size_t nfb_pages = old_num_pages - new_num_pages;
- size_t nfb_real_size = old_real_size - new_real_size;
- void *vnfb = (void *) (((char *)old_address) + new_real_size);
- FreeBlock *nfb = (FreeBlock *) vnfb;
- FreeBlock **block;
- TAKE_LOCK();
- for (block = &first;
- *block != NULL && (*block) < nfb;
- block = &((*block)->next))
- ;
- if (!(*block) ||
- (*block) > ((FreeBlock *)(((char *) vnfb) + nfb_real_size))) {
- /* Normal link in */
- if (nfb_pages > 1) {
- if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE),
- (nfb_pages - 1)*MSEG_ALIGNED_SIZE)) {
- return NULL;
- }
- }
- nfb->next = (*block);
- nfb->num = nfb_pages;
- (*block) = nfb;
- } else { /* block merge */
- nfb->next = (*block)->next;
- nfb->num = nfb_pages + (*block)->num;
- /* unmap also the first page of the next freeblock */
- (*block) = nfb;
- if (do_unmap((void *)(((char *) vnfb) + MSEG_ALIGNED_SIZE),
- nfb_pages*MSEG_ALIGNED_SIZE)) {
- return NULL;
- }
- }
- RELEASE_LOCK();
- return old_address;
- } else { /* Enlarge */
- FreeBlock **block;
- void *old_end = (void *) (((char *)old_address) + old_real_size);
- TAKE_LOCK();
- for (block = &first;
- *block != NULL && (*block) < (FreeBlock *) old_address;
- block = &((*block)->next))
- ;
- if ((*block) == NULL || old_end > ((void *) RANGE_MAX) ||
- (*block) != old_end ||
- (*block)->num < (new_num_pages - old_num_pages)) {
- /* cannot extend */
- void *result;
- RELEASE_LOCK();
- result = pmmap(new_size);
- if (result == NULL) {
- return NULL;
- }
- memcpy(result,old_address,old_size);
- if (pmunmap(old_address,old_size)) {
- /* Oups... */
- pmunmap(result,new_size);
- return NULL;
- }
- return result;
- } else { /* extend */
- size_t remaining_pages = (*block)->num -
- (new_num_pages - old_num_pages);
- if (!remaining_pages) {
- void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE);
- void *n = (*block)->next;
- size_t x = ((*block)->num - 1) * MSEG_ALIGNED_SIZE;
- if (x > 0) {
- if (do_map(p,x) == NULL) {
- RELEASE_LOCK();
- return NULL;
- }
- }
- (*block) = n;
- } else {
- FreeBlock *nfb = (FreeBlock *) ((void *)
- (((char *) old_address) +
- new_real_size));
- void *p = (void *) (((char *) (*block)) + MSEG_ALIGNED_SIZE);
- if (do_map(p,new_real_size - old_real_size) == NULL) {
- RELEASE_LOCK();
- return NULL;
- }
- nfb->num = remaining_pages;
- nfb->next = (*block)->next;
- (*block) = nfb;
- }
- RELEASE_LOCK();
- return old_address;
- }
- }
-}
-#endif /* HALFWORD_HEAP */
diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h
index a1b000f51c..2284b3f8f1 100644
--- a/erts/emulator/sys/common/erl_mseg.h
+++ b/erts/emulator/sys/common/erl_mseg.h
@@ -22,15 +22,16 @@
#include "sys.h"
#include "erl_alloc_types.h"
+#include "erl_mmap.h"
-#ifndef HAVE_MMAP
-# define HAVE_MMAP 0
-#endif
-#ifndef HAVE_MREMAP
-# define HAVE_MREMAP 0
-#endif
-
-#if HAVE_MMAP
+/*
+ * We currently only enable mseg_alloc if we got
+ * a genuine mmap()/munmap() primitive. It is possible
+ * to utilize erts_mmap() withiout a mmap support but
+ * alloc_util needs to be prepared before we can do
+ * that.
+ */
+#ifdef ERTS_HAVE_GENUINE_OS_MMAP
# define HAVE_ERTS_MSEG 1
# define ERTS_HAVE_MSEG_SUPER_ALIGNED 1
#else
@@ -39,8 +40,7 @@
#endif
#if ERTS_HAVE_MSEG_SUPER_ALIGNED
-# define MSEG_ALIGN_BITS (18)
- /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
+# define MSEG_ALIGN_BITS ERTS_MMAP_SUPERALIGNED_BITS
#else
/* If we don't use super aligned multiblock carriers
* we will mmap with page size alignment (and thus use corresponding
@@ -68,6 +68,7 @@ typedef struct {
Uint rmcbf;
Uint mcs;
Uint nos;
+ ErtsMMapInit mmap;
} ErtsMsegInit_t;
#define ERTS_MSEG_INIT_DEFAULT_INITIALIZER \
@@ -75,7 +76,8 @@ typedef struct {
4*1024*1024, /* amcbf: Absolute max cache bad fit */ \
20, /* rmcbf: Relative max cache bad fit */ \
10, /* mcs: Max cache size */ \
- 1000 /* cci: Cache check interval */ \
+ 1000, /* cci: Cache check interval */ \
+ ERTS_MMAP_INIT_DEFAULT_INITER \
}
typedef struct {
@@ -91,12 +93,12 @@ typedef struct {
extern const ErtsMsegOpt_t erts_mseg_default_opt;
-void *erts_mseg_alloc(ErtsAlcType_t, Uint *, Uint);
-void *erts_mseg_alloc_opt(ErtsAlcType_t, Uint *, Uint, const ErtsMsegOpt_t *);
-void erts_mseg_dealloc(ErtsAlcType_t, void *, Uint, Uint);
-void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, Uint, Uint, const ErtsMsegOpt_t *);
-void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *, Uint);
-void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *, Uint, const ErtsMsegOpt_t *);
+void *erts_mseg_alloc(ErtsAlcType_t, UWord *, Uint);
+void *erts_mseg_alloc_opt(ErtsAlcType_t, UWord *, Uint, const ErtsMsegOpt_t *);
+void erts_mseg_dealloc(ErtsAlcType_t, void *, UWord, Uint);
+void erts_mseg_dealloc_opt(ErtsAlcType_t, void *, UWord, Uint, const ErtsMsegOpt_t *);
+void *erts_mseg_realloc(ErtsAlcType_t, void *, UWord, UWord *, Uint);
+void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, UWord, UWord *, Uint, const ErtsMsegOpt_t *);
void erts_mseg_clear_cache(void);
void erts_mseg_cache_check(void);
Uint erts_mseg_no( const ErtsMsegOpt_t *);