/*
* %CopyrightBegin%
*
* Copyright Ericsson AB 2002-2013. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* %CopyrightEnd%
*/
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include "sys.h"
#include "erl_process.h"
#include "erl_smp.h"
#include "atom.h"
#include "erl_mmap.h"
#include <stddef.h>
/* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
#if defined(DEBUG) || 0
# undef ERTS_MMAP_DEBUG
# define ERTS_MMAP_DEBUG
# ifndef ERTS_MMAP_OP_RINGBUF_SZ
# define ERTS_MMAP_OP_RINGBUF_SZ 100
# endif
#endif
#ifndef ERTS_MMAP_OP_RINGBUF_SZ
# define ERTS_MMAP_OP_RINGBUF_SZ 0
#endif
/* #define ERTS_MMAP_DEBUG_FILL_AREAS */
#ifdef ERTS_MMAP_DEBUG
# define ERTS_MMAP_ASSERT ERTS_ASSERT
#else
# define ERTS_MMAP_ASSERT(A) ((void) 1)
#endif
/*
* `mmap_state.sa.bot` and `mmap_state.sua.top` are read only after
* initialization, but the other pointers are not; i.e., only
* ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
*/
#define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
(((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
< ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sa.bot))
#define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
(ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
(((UWord) (PTR)) - ((UWord) mmap_state.sa.bot) \
< ((UWord) mmap_state.sa.top) - ((UWord) mmap_state.sa.bot)))
#define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
(ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&mmap_state.mtx)), \
(((UWord) (PTR)) - ((UWord) mmap_state.sua.bot) \
< ((UWord) mmap_state.sua.top) - ((UWord) mmap_state.sua.bot)))
UWord erts_page_inv_mask;
#if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
# undef RBT_DEBUG
# define RBT_DEBUG
#endif
#ifdef RBT_DEBUG
# define RBT_ASSERT ERTS_ASSERT
# define IF_RBT_DEBUG(C) C
#else
# define RBT_ASSERT(x)
# define IF_RBT_DEBUG(C)
#endif
typedef struct RBTNode_ RBTNode;
struct RBTNode_ {
UWord parent_and_color; /* color in bit 0 of parent ptr */
RBTNode *left;
RBTNode *right;
};
#define RED_FLG (1)
#define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG))
#define IS_BLACK(N) (!IS_RED(N))
#define SET_RED(N) ((N)->parent_and_color |= RED_FLG)
#define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG)
static ERTS_INLINE RBTNode* parent(RBTNode* node)
{
return (RBTNode*) (node->parent_and_color & ~RED_FLG);
}
static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent)
{
RBT_ASSERT(!((UWord)parent & RED_FLG));
node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG);
}
static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color)
{
RBT_ASSERT(!((UWord)parent & RED_FLG));
RBT_ASSERT(!(color & ~RED_FLG));
return ((UWord)parent) | color;
}
enum SortOrder {
ADDR_ORDER, /* only address order */
SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */
SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */
};
#ifdef HARD_DEBUG
static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"};
#endif
typedef struct {
RBTNode* root;
enum SortOrder order;
}RBTree;
#ifdef HARD_DEBUG
# define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE)
# define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ)
static int rbt_assert_is_member(RBTNode* root, RBTNode* node);
static RBTNode* check_tree(RBTree* tree, Uint);
#else
# define HARD_CHECK_IS_MEMBER(ROOT,NODE)
# define HARD_CHECK_TREE(TREE,SZ)
#endif
#if ERTS_MMAP_OP_RINGBUF_SZ
static int mmap_op_ix;
typedef enum {
ERTS_OP_TYPE_NONE,
ERTS_OP_TYPE_MMAP,
ERTS_OP_TYPE_MUNMAP,
ERTS_OP_TYPE_MREMAP
} ErtsMMapOpType;
typedef struct {
ErtsMMapOpType type;
void *result;
UWord in_size;
UWord out_size;
void *old_ptr;
UWord old_size;
} ErtsMMapOp;
static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
#define ERTS_MMAP_OP_RINGBUF_INIT() \
do { \
int ix__; \
for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\
mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
mmap_ops[ix__].result = NULL; \
mmap_ops[ix__].in_size = 0; \
mmap_ops[ix__].out_size = 0; \
mmap_ops[ix__].old_ptr = NULL; \
mmap_ops[ix__].old_size = 0; \
} \
mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
} while (0)
#define ERTS_MMAP_OP_START(SZ) \
do { \
int ix__; \
if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
mmap_op_ix = 0; \
ix__ = mmap_op_ix; \
mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \
mmap_ops[ix__].result = NULL; \
mmap_ops[ix__].in_size = (SZ); \
mmap_ops[ix__].out_size = 0; \
mmap_ops[ix__].old_ptr = NULL; \
mmap_ops[ix__].old_size = 0; \
} while (0)
#define ERTS_MMAP_OP_END(PTR, SZ) \
do { \
int ix__ = mmap_op_ix; \
mmap_ops[ix__].result = (PTR); \
mmap_ops[ix__].out_size = (SZ); \
} while (0)
#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
do { \
erts_smp_mtx_lock(&mmap_state.mtx); \
ERTS_MMAP_OP_START((IN_SZ)); \
ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
erts_smp_mtx_unlock(&mmap_state.mtx); \
} while (0)
#define ERTS_MUNMAP_OP(PTR, SZ) \
do { \
int ix__; \
if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
mmap_op_ix = 0; \
ix__ = mmap_op_ix; \
mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \
mmap_ops[ix__].result = NULL; \
mmap_ops[ix__].in_size = 0; \
mmap_ops[ix__].out_size = 0; \
mmap_ops[ix__].old_ptr = (PTR); \
mmap_ops[ix__].old_size = (SZ); \
} while (0)
#define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
do { \
erts_smp_mtx_lock(&mmap_state.mtx); \
ERTS_MUNMAP_OP((PTR), (SZ)); \
erts_smp_mtx_unlock(&mmap_state.mtx); \
} while (0)
#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
do { \
int ix__; \
if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
mmap_op_ix = 0; \
ix__ = mmap_op_ix; \
mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \
mmap_ops[ix__].result = NULL; \
mmap_ops[ix__].in_size = (IN_SZ); \
mmap_ops[ix__].out_size = (OLD_SZ); \
mmap_ops[ix__].old_ptr = (OLD_PTR); \
mmap_ops[ix__].old_size = (OLD_SZ); \
} while (0)
#define ERTS_MREMAP_OP_END(PTR, SZ) \
do { \
int ix__ = mmap_op_ix; \
mmap_ops[ix__].result = (PTR); \
mmap_ops[mmap_op_ix].out_size = (SZ); \
} while (0)
#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
do { \
erts_smp_mtx_lock(&mmap_state.mtx); \
ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
erts_smp_mtx_unlock(&mmap_state.mtx); \
} while (0)
#define ERTS_MMAP_OP_ABORT() \
do { \
int ix__ = mmap_op_ix; \
mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
mmap_ops[ix__].result = NULL; \
mmap_ops[ix__].in_size = 0; \
mmap_ops[ix__].out_size = 0; \
mmap_ops[ix__].old_ptr = NULL; \
mmap_ops[ix__].old_size = 0; \
if (--mmap_op_ix < 0) \
mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
} while (0)
#else
#define ERTS_MMAP_OP_RINGBUF_INIT()
#define ERTS_MMAP_OP_START(SZ)
#define ERTS_MMAP_OP_END(PTR, SZ)
#define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ)
#define ERTS_MUNMAP_OP(PTR, SZ)
#define ERTS_MUNMAP_OP_LCK(PTR, SZ)
#define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ)
#define ERTS_MREMAP_OP_END(PTR, SZ)
#define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ)
#define ERTS_MMAP_OP_ABORT()
#endif
typedef struct {
RBTNode snode; /* node in 'stree' */
RBTNode anode; /* node in 'atree' */
char* start;
char* end;
}ErtsFreeSegDesc;
typedef struct {
RBTree stree; /* size ordered tree */
RBTree atree; /* address ordered tree */
Uint nseg;
}ErtsFreeSegMap;
static struct {
int (*reserve_physical)(char *, UWord);
void (*unreserve_physical)(char *, UWord);
int supercarrier;
int no_os_mmap;
/*
* Super unaligend area is located above super aligned
* area. That is, `sa.bot` is beginning of the super
* carrier, `sua.top` is the end of the super carrier,
* and sa.top and sua.bot moves towards eachother.
*/
struct {
char *top;
char *bot;
ErtsFreeSegMap map;
} sua;
struct {
char *top;
char *bot;
ErtsFreeSegMap map;
} sa;
#if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
int mmap_fd;
#endif
erts_smp_mtx_t mtx;
struct {
char *free_list;
char *unused_start;
char *unused_end;
char *new_area_hint;
Uint reserved;
} desc;
struct {
UWord free_seg_descs;
struct {
UWord curr;
UWord max;
} free_segs;
} no;
struct {
struct {
UWord total;
struct {
UWord total;
UWord sa;
UWord sua;
} used;
} supercarrier;
struct {
UWord used;
} os;
} size;
} mmap_state;
#define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
do { \
mmap_state.size.supercarrier.used.total += (SZ); \
mmap_state.size.supercarrier.used.sa += (SZ); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
<= mmap_state.size.supercarrier.total); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa \
<= mmap_state.size.supercarrier.used.total); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
do { \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
mmap_state.size.supercarrier.used.total -= (SZ); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sa >= (SZ)); \
mmap_state.size.supercarrier.used.sa -= (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
do { \
mmap_state.size.supercarrier.used.total += (SZ); \
mmap_state.size.supercarrier.used.sua += (SZ); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total \
<= mmap_state.size.supercarrier.total); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua \
<= mmap_state.size.supercarrier.used.total); \
} while (0)
#define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
do { \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.total >= (SZ)); \
mmap_state.size.supercarrier.used.total -= (SZ); \
ERTS_MMAP_ASSERT(mmap_state.size.supercarrier.used.sua >= (SZ)); \
mmap_state.size.supercarrier.used.sua -= (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_OS_INC(SZ) \
do { \
ERTS_MMAP_ASSERT(mmap_state.size.os.used + (SZ) >= (SZ)); \
mmap_state.size.os.used += (SZ); \
} while (0)
#define ERTS_MMAP_SIZE_OS_DEC(SZ) \
do { \
ERTS_MMAP_ASSERT(mmap_state.size.os.used >= (SZ)); \
mmap_state.size.os.used -= (SZ); \
} while (0)
static void
add_free_desc_area(char *start, char *end)
{
ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
UWord no;
ErtsFreeSegDesc *prev_desc, *desc;
char *desc_end;
no = 1;
prev_desc = (ErtsFreeSegDesc *) start;
prev_desc->start = mmap_state.desc.free_list;
desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
desc_end = start + 2*sizeof(ErtsFreeSegDesc);
while (desc_end <= end) {
desc->start = (char *) prev_desc;
prev_desc = desc;
desc = (ErtsFreeSegDesc *) desc_end;
desc_end += sizeof(ErtsFreeSegDesc);
no++;
}
mmap_state.desc.free_list = (char *) prev_desc;
mmap_state.no.free_seg_descs += no;
}
}
static ErtsFreeSegDesc *
add_unused_free_desc_area(void)
{
char *ptr;
if (!mmap_state.desc.unused_start)
return NULL;
ERTS_MMAP_ASSERT(mmap_state.desc.unused_end);
ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
<= mmap_state.desc.unused_end - mmap_state.desc.unused_start);
ptr = mmap_state.desc.unused_start + ERTS_PAGEALIGNED_SIZE;
add_free_desc_area(mmap_state.desc.unused_start, ptr);
if ((mmap_state.desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
mmap_state.desc.unused_start = ptr;
else
mmap_state.desc.unused_end = mmap_state.desc.unused_start = NULL;
ERTS_MMAP_ASSERT(mmap_state.desc.free_list);
return (ErtsFreeSegDesc *) mmap_state.desc.free_list;
}
static ERTS_INLINE ErtsFreeSegDesc *
alloc_desc(void)
{
ErtsFreeSegDesc *res;
res = (ErtsFreeSegDesc *) mmap_state.desc.free_list;
if (!res) {
res = add_unused_free_desc_area();
if (!res)
return NULL;
}
mmap_state.desc.free_list = res->start;
ASSERT(mmap_state.no.free_segs.curr < mmap_state.no.free_seg_descs);
mmap_state.no.free_segs.curr++;
if (mmap_state.no.free_segs.max < mmap_state.no.free_segs.curr)
mmap_state.no.free_segs.max = mmap_state.no.free_segs.curr;
return res;
}
static ERTS_INLINE void
free_desc(ErtsFreeSegDesc *desc)
{
desc->start = mmap_state.desc.free_list;
mmap_state.desc.free_list = (char *) desc;
ERTS_MMAP_ASSERT(mmap_state.no.free_segs.curr > 0);
mmap_state.no.free_segs.curr--;
}
static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
{
return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode));
}
static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode)
{
return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode));
}
static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node)
{
return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node);
}
static ERTS_INLINE SWord usable_size(enum SortOrder order,
ErtsFreeSegDesc* desc)
{
return ((order == SA_SZ_ADDR_ORDER) ?
ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start)
: desc->end - desc->start);
}
#ifdef HARD_DEBUG
static ERTS_INLINE SWord cmp_nodes(enum SortOrder order,
RBTNode* lhs, RBTNode* rhs)
{
ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs);
ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs);
RBT_ASSERT(lhs != rhs);
if (order != ADDR_ORDER) {
SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc);
if (diff) return diff;
}
if (order != SZ_REVERSE_ADDR_ORDER) {
return (char*)ldesc->start - (char*)rdesc->start;
}
else {
return (char*)rdesc->start - (char*)ldesc->start;
}
}
#endif /* HARD_DEBUG */
static ERTS_INLINE SWord cmp_with_node(enum SortOrder order,
SWord sz, char* addr, RBTNode* rhs)
{
ErtsFreeSegDesc* rdesc;
if (order != ADDR_ORDER) {
SWord diff;
rdesc = snode_to_desc(rhs);
diff = sz - usable_size(order, rdesc);
if (diff) return diff;
}
else
rdesc = anode_to_desc(rhs);
if (order != SZ_REVERSE_ADDR_ORDER)
return addr - (char*)rdesc->start;
else
return (char*)rdesc->start - addr;
}
static ERTS_INLINE void
left_rotate(RBTNode **root, RBTNode *x)
{
RBTNode *y = x->right;
x->right = y->left;
if (y->left)
set_parent(y->left, x);
set_parent(y, parent(x));
if (!parent(y)) {
RBT_ASSERT(*root == x);
*root = y;
}
else if (x == parent(x)->left)
parent(x)->left = y;
else {
RBT_ASSERT(x == parent(x)->right);
parent(x)->right = y;
}
y->left = x;
set_parent(x, y);
}
static ERTS_INLINE void
right_rotate(RBTNode **root, RBTNode *x)
{
RBTNode *y = x->left;
x->left = y->right;
if (y->right)
set_parent(y->right, x);
set_parent(y, parent(x));
if (!parent(y)) {
RBT_ASSERT(*root == x);
*root = y;
}
else if (x == parent(x)->right)
parent(x)->right = y;
else {
RBT_ASSERT(x == parent(x)->left);
parent(x)->left = y;
}
y->right = x;
set_parent(x, y);
}
/*
* Replace node x with node y
* NOTE: segment descriptor of y is not changed
*/
static ERTS_INLINE void
replace(RBTNode **root, RBTNode *x, RBTNode *y)
{
if (!parent(x)) {
RBT_ASSERT(*root == x);
*root = y;
}
else if (x == parent(x)->left)
parent(x)->left = y;
else {
RBT_ASSERT(x == parent(x)->right);
parent(x)->right = y;
}
if (x->left) {
RBT_ASSERT(parent(x->left) == x);
set_parent(x->left, y);
}
if (x->right) {
RBT_ASSERT(parent(x->right) == x);
set_parent(x->right, y);
}
y->parent_and_color = x->parent_and_color;
y->right = x->right;
y->left = x->left;
}
static void
tree_insert_fixup(RBTNode** root, RBTNode *node)
{
RBTNode *x = node, *y, *papa_x, *granpa_x;
/*
* Rearrange the tree so that it satisfies the Red-Black Tree properties
*/
papa_x = parent(x);
RBT_ASSERT(x != *root && IS_RED(papa_x));
do {
/*
* x and its parent are both red. Move the red pair up the tree
* until we get to the root or until we can separate them.
*/
granpa_x = parent(papa_x);
RBT_ASSERT(IS_RED(x));
RBT_ASSERT(IS_BLACK(granpa_x));
RBT_ASSERT(granpa_x);
if (papa_x == granpa_x->left) {
y = granpa_x->right;
if (IS_RED(y)) {
SET_BLACK(y);
SET_BLACK(papa_x);
SET_RED(granpa_x);
x = granpa_x;
}
else {
if (x == papa_x->right) {
left_rotate(root, papa_x);
papa_x = x;
x = papa_x->left;
}
RBT_ASSERT(x == granpa_x->left->left);
RBT_ASSERT(IS_RED(x));
RBT_ASSERT(IS_RED(papa_x));
RBT_ASSERT(IS_BLACK(granpa_x));
RBT_ASSERT(IS_BLACK(y));
SET_BLACK(papa_x);
SET_RED(granpa_x);
right_rotate(root, granpa_x);
RBT_ASSERT(x == parent(x)->left);
RBT_ASSERT(IS_RED(x));
RBT_ASSERT(IS_RED(parent(x)->right));
RBT_ASSERT(IS_BLACK(parent(x)));
break;
}
}
else {
RBT_ASSERT(papa_x == granpa_x->right);
y = granpa_x->left;
if (IS_RED(y)) {
SET_BLACK(y);
SET_BLACK(papa_x);
SET_RED(granpa_x);
x = granpa_x;
}
else {
if (x == papa_x->left) {
right_rotate(root, papa_x);
papa_x = x;
x = papa_x->right;
}
RBT_ASSERT(x == granpa_x->right->right);
RBT_ASSERT(IS_RED(x));
RBT_ASSERT(IS_RED(papa_x));
RBT_ASSERT(IS_BLACK(granpa_x));
RBT_ASSERT(IS_BLACK(y));
SET_BLACK(papa_x);
SET_RED(granpa_x);
left_rotate(root, granpa_x);
RBT_ASSERT(x == parent(x)->right);
RBT_ASSERT(IS_RED(x));
RBT_ASSERT(IS_RED(parent(x)->left));
RBT_ASSERT(IS_BLACK(parent(x)));
break;
}
}
} while (x != *root && (papa_x=parent(x), IS_RED(papa_x)));
SET_BLACK(*root);
}
static void
rbt_delete(RBTree* tree, RBTNode* del)
{
Uint spliced_is_black;
RBTNode *x, *y, *z = del, *papa_y;
RBTNode null_x; /* null_x is used to get the fixup started when we
splice out a node without children. */
HARD_CHECK_IS_MEMBER(tree->root, del);
HARD_CHECK_TREE(tree, 0);
null_x.parent_and_color = parent_and_color(NULL, !RED_FLG);
/* Remove node from tree... */
/* Find node to splice out */
if (!z->left || !z->right)
y = z;
else
/* Set y to z:s successor */
for(y = z->right; y->left; y = y->left);
/* splice out y */
x = y->left ? y->left : y->right;
spliced_is_black = IS_BLACK(y);
papa_y = parent(y);
if (x) {
set_parent(x, papa_y);
}
else if (spliced_is_black) {
x = &null_x;
x->right = x->left = NULL;
x->parent_and_color = parent_and_color(papa_y, !RED_FLG);
y->left = x;
}
if (!papa_y) {
RBT_ASSERT(tree->root == y);
tree->root = x;
}
else {
if (y == papa_y->left) {
papa_y->left = x;
}
else {
RBT_ASSERT(y == papa_y->right);
papa_y->right = x;
}
}
if (y != z) {
/* We spliced out the successor of z; replace z by the successor */
RBT_ASSERT(z != &null_x);
replace(&tree->root, z, y);
}
if (spliced_is_black) {
RBTNode* papa_x;
/* We removed a black node which makes the resulting tree
violate the Red-Black Tree properties. Fixup tree... */
papa_x = parent(x);
while (IS_BLACK(x) && papa_x) {
/*
* x has an "extra black" which we move up the tree
* until we reach the root or until we can get rid of it.
*
* y is the sibbling of x
*/
if (x == papa_x->left) {
y = papa_x->right;
RBT_ASSERT(y);
if (IS_RED(y)) {
RBT_ASSERT(y->right);
RBT_ASSERT(y->left);
SET_BLACK(y);
RBT_ASSERT(IS_BLACK(papa_x));
SET_RED(papa_x);
left_rotate(&tree->root, papa_x);
RBT_ASSERT(papa_x == parent(x));
y = papa_x->right;
}
RBT_ASSERT(y);
RBT_ASSERT(IS_BLACK(y));
if (IS_BLACK(y->left) && IS_BLACK(y->right)) {
SET_RED(y);
}
else {
if (IS_BLACK(y->right)) {
SET_BLACK(y->left);
SET_RED(y);
right_rotate(&tree->root, y);
RBT_ASSERT(papa_x == parent(x));
y = papa_x->right;
}
RBT_ASSERT(y);
if (IS_RED(papa_x)) {
SET_BLACK(papa_x);
SET_RED(y);
}
RBT_ASSERT(y->right);
SET_BLACK(y->right);
left_rotate(&tree->root, papa_x);
x = tree->root;
break;
}
}
else {
RBT_ASSERT(x == papa_x->right);
y = papa_x->left;
RBT_ASSERT(y);
if (IS_RED(y)) {
RBT_ASSERT(y->right);
RBT_ASSERT(y->left);
SET_BLACK(y);
RBT_ASSERT(IS_BLACK(papa_x));
SET_RED(papa_x);
right_rotate(&tree->root, papa_x);
RBT_ASSERT(papa_x == parent(x));
y = papa_x->left;
}
RBT_ASSERT(y);
RBT_ASSERT(IS_BLACK(y));
if (IS_BLACK(y->right) && IS_BLACK(y->left)) {
SET_RED(y);
}
else {
if (IS_BLACK(y->left)) {
SET_BLACK(y->right);
SET_RED(y);
left_rotate(&tree->root, y);
RBT_ASSERT(papa_x == parent(x));
y = papa_x->left;
}
RBT_ASSERT(y);
if (IS_RED(papa_x)) {
SET_BLACK(papa_x);
SET_RED(y);
}
RBT_ASSERT(y->left);
SET_BLACK(y->left);
right_rotate(&tree->root, papa_x);
x = tree->root;
break;
}
}
x = papa_x;
papa_x = parent(x);
}
SET_BLACK(x);
papa_x = parent(&null_x);
if (papa_x) {
if (papa_x->left == &null_x)
papa_x->left = NULL;
else {
RBT_ASSERT(papa_x->right == &null_x);
papa_x->right = NULL;
}
RBT_ASSERT(!null_x.left);
RBT_ASSERT(!null_x.right);
}
else if (tree->root == &null_x) {
tree->root = NULL;
RBT_ASSERT(!null_x.left);
RBT_ASSERT(!null_x.right);
}
}
HARD_CHECK_TREE(tree, 0);
}
static void
rbt_insert(RBTree* tree, RBTNode* node)
{
#ifdef RBT_DEBUG
ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL;
#endif
ErtsFreeSegDesc* desc = node_to_desc(tree->order, node);
char* seg_addr = desc->start;
SWord seg_sz = desc->end - desc->start;
HARD_CHECK_TREE(tree, 0);
node->left = NULL;
node->right = NULL;
if (!tree->root) {
node->parent_and_color = parent_and_color(NULL, !RED_FLG);
tree->root = node;
}
else {
RBTNode *x = tree->root;
while (1) {
SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x);
if (diff < 0) {
IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x));
if (!x->left) {
node->parent_and_color = parent_and_color(x, RED_FLG);
x->left = node;
break;
}
x = x->left;
}
else {
RBT_ASSERT(diff > 0);
IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x));
if (!x->right) {
node->parent_and_color = parent_and_color(x, RED_FLG);
x->right = node;
break;
}
x = x->right;
}
}
RBT_ASSERT(parent(node));
#ifdef RBT_DEBUG
if (tree->order == ADDR_ORDER) {
RBT_ASSERT(!dbg_under || dbg_under->end < desc->start);
RBT_ASSERT(!dbg_over || dbg_over->start > desc->end);
}
#endif
RBT_ASSERT(IS_RED(node));
if (IS_RED(parent(node)))
tree_insert_fixup(&tree->root, node);
}
HARD_CHECK_TREE(tree, 0);
}
/*
* Traverse tree in (reverse) sorting order
*/
static void
rbt_foreach_node(RBTree* tree,
void (*fn)(RBTNode*,void*),
void* arg, int reverse)
{
#ifdef HARD_DEBUG
Sint blacks = -1;
Sint curr_blacks = 1;
Uint depth = 1;
Uint max_depth = 0;
Uint node_cnt = 0;
#endif
enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state;
RBTNode *x = tree->root;
RBT_ASSERT(!x || !parent(x));
state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
while (x) {
switch (state) {
case RECURSE_LEFT:
if (x->left) {
RBT_ASSERT(parent(x->left) == x);
#ifdef HARD_DEBUG
++depth;
if (IS_BLACK(x->left))
curr_blacks++;
#endif
x = x->left;
state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
}
else {
#ifdef HARD_DEBUG
if (blacks < 0)
blacks = curr_blacks;
RBT_ASSERT(blacks == curr_blacks);
#endif
state = reverse ? RETURN_TO_PARENT : DO_NODE;
}
break;
case DO_NODE:
#ifdef HARD_DEBUG
++node_cnt;
if (depth > max_depth)
max_depth = depth;
#endif
(*fn) (x, arg); /* Do it! */
state = reverse ? RECURSE_LEFT : RECURSE_RIGHT;
break;
case RECURSE_RIGHT:
if (x->right) {
RBT_ASSERT(parent(x->right) == x);
#ifdef HARD_DEBUG
++depth;
if (IS_BLACK(x->right))
curr_blacks++;
#endif
x = x->right;
state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
}
else {
#ifdef HARD_DEBUG
if (blacks < 0)
blacks = curr_blacks;
RBT_ASSERT(blacks == curr_blacks);
#endif
state = reverse ? DO_NODE : RETURN_TO_PARENT;
}
break;
case RETURN_TO_PARENT:
#ifdef HARD_DEBUG
if (IS_BLACK(x))
curr_blacks--;
--depth;
#endif
if (parent(x)) {
if (x == parent(x)->left) {
state = reverse ? RETURN_TO_PARENT : DO_NODE;
}
else {
RBT_ASSERT(x == parent(x)->right);
state = reverse ? DO_NODE : RETURN_TO_PARENT;
}
}
x = parent(x);
break;
}
}
#ifdef HARD_DEBUG
RBT_ASSERT(depth == 0 || (!tree->root && depth==1));
RBT_ASSERT(curr_blacks == 0);
RBT_ASSERT((1 << (max_depth/2)) <= node_cnt);
#endif
}
#if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG)
static RBTNode* rbt_prev_node(RBTNode* node)
{
RBTNode* x;
if (node->left) {
for (x=node->left; x->right; x=x->right)
;
return x;
}
for (x=node; parent(x); x=parent(x)) {
if (parent(x)->right == x)
return parent(x);
}
return NULL;
}
static RBTNode* rbt_next_node(RBTNode* node)
{
RBTNode* x;
if (node->right) {
for (x=node->right; x->left; x=x->left)
;
return x;
}
for (x=node; parent(x); x=parent(x)) {
if (parent(x)->left == x)
return parent(x);
}
return NULL;
}
#endif /* RBT_DEBUG || HARD_DEBUG_MSEG */
/* The API to keep track of a bunch of separated (free) segments
(non-overlapping and non-adjacent).
*/
static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder);
static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end,
ErtsFreeSegDesc** under, ErtsFreeSegDesc** over);
static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*);
static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz);
static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order)
{
map->atree.root = NULL;
map->atree.order = ADDR_ORDER;
map->stree.root = NULL;
map->stree.order = order;
map->nseg = 0;
}
/* Lookup directly adjacent free segments to the given area [start->end].
* The given area must not contain any free segments.
*/
static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end,
ErtsFreeSegDesc** under, ErtsFreeSegDesc** over)
{
RBTNode* x = map->atree.root;
*under = NULL;
*over = NULL;
while (x) {
if (start < anode_to_desc(x)->start) {
RBT_ASSERT(end <= anode_to_desc(x)->start);
if (end == anode_to_desc(x)->start) {
RBT_ASSERT(!*over);
*over = anode_to_desc(x);
}
x = x->left;
}
else {
RBT_ASSERT(start >= anode_to_desc(x)->end);
if (start == anode_to_desc(x)->end) {
RBT_ASSERT(!*under);
*under = anode_to_desc(x);
}
x = x->right;
}
}
}
/* Initialize 'desc' and insert as new free segment [start->end].
* The new segment must not contain or be adjacent to any free segment in 'map'.
*/
static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
char* start, char* end)
{
desc->start = start;
desc->end = end;
rbt_insert(&map->atree, &desc->anode);
rbt_insert(&map->stree, &desc->snode);
map->nseg++;
}
/* Resize existing free segment 'desc' to [start->end].
* The new segment location must overlap the old location and
* it must not contain or be adjacent to any other free segment in 'map'.
*/
static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
char* start, char* end)
{
#ifdef RBT_DEBUG
RBTNode* prev = rbt_prev_node(&desc->anode);
RBTNode* next = rbt_next_node(&desc->anode);
RBT_ASSERT(!prev || anode_to_desc(prev)->end < start);
RBT_ASSERT(!next || anode_to_desc(next)->start > end);
#endif
rbt_delete(&map->stree, &desc->snode);
desc->start = start;
desc->end = end;
rbt_insert(&map->stree, &desc->snode);
}
/* Delete existing free segment 'desc' from 'map'.
*/
static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc)
{
rbt_delete(&map->atree, &desc->anode);
rbt_delete(&map->stree, &desc->snode);
map->nseg--;
}
/* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes.
*/
static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz)
{
RBTNode* x = map->stree.root;
ErtsFreeSegDesc* best_desc = NULL;
const enum SortOrder order = map->stree.order;
while (x) {
ErtsFreeSegDesc* desc = snode_to_desc(x);
SWord seg_sz = usable_size(order, desc);
if (seg_sz < need_sz) {
x = x->right;
}
else {
best_desc = desc;
x = x->left;
}
}
return best_desc;
}
struct build_arg_t
{
Process* p;
Eterm* hp;
Eterm acc;
};
static void build_free_seg_tuple(RBTNode* node, void* arg)
{
struct build_arg_t* a = (struct build_arg_t*)arg;
ErtsFreeSegDesc* desc = anode_to_desc(node);
Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start);
Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end);
Eterm tpl = TUPLE2(a->hp, start, end);
a->hp += 3;
a->acc = CONS(a->hp, tpl, a->acc);
a->hp += 2;
}
static
Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
{
struct build_arg_t barg;
Eterm* hp_end;
const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */
barg.p = p;
barg.hp = HAlloc(p, may_need);
hp_end = barg.hp + may_need;
barg.acc = NIL;
rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1);
RBT_ASSERT(barg.hp <= hp_end);
HRelease(p, hp_end, barg.hp);
return barg.acc;
}
#if ERTS_HAVE_OS_MMAP
/* Implementation of os_mmap()/os_munmap()/os_mremap()... */
#if HAVE_MMAP
# define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
# if defined(MAP_ANONYMOUS)
# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
# define ERTS_MMAP_FD (-1)
# elif defined(MAP_ANON)
# define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
# define ERTS_MMAP_FD (-1)
# else
# define ERTS_MMAP_FLAGS (MAP_PRIVATE)
# define ERTS_MMAP_FD mmap_state.mmap_fd
# endif
#endif
static ERTS_INLINE void *
os_mmap(void *hint_ptr, UWord size, int try_superalign)
{
#if HAVE_MMAP
void *res;
#ifdef MAP_ALIGN
if (try_superalign)
res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT,
ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
else
#endif
res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT,
ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
if (res == MAP_FAILED)
return NULL;
return res;
#elif HAVE_VIRTUALALLOC
return (void *) VirtualAlloc(NULL, (SIZE_T) size,
MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
#else
# error "missing mmap() or similar"
#endif
}
static ERTS_INLINE void
os_munmap(void *ptr, UWord size)
{
#if HAVE_MMAP
#ifdef ERTS_MMAP_DEBUG
int res =
#endif
munmap(ptr, size);
ERTS_MMAP_ASSERT(res == 0);
#elif HAVE_VIRTUALALLOC
#ifdef DEBUG
BOOL res =
#endif
VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE);
ERTS_MMAP_ASSERT(res != 0);
#else
# error "missing munmap() or similar"
#endif
}
#ifdef ERTS_HAVE_OS_MREMAP
# if HAVE_MREMAP
# if defined(__NetBSD__)
# define ERTS_MREMAP_FLAGS (0)
# else
# define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE)
# endif
# endif
static ERTS_INLINE void *
os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
{
void *new_seg;
#if HAVE_MREMAP
new_seg = mremap(ptr, (size_t) old_size,
# if defined(__NetBSD__)
NULL,
# endif
(size_t) new_size, ERTS_MREMAP_FLAGS);
if (new_seg == (void *) MAP_FAILED)
return NULL;
return new_seg;
#else
# error "missing mremap() or similar"
#endif
}
#endif
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
#if HAVE_MMAP
#define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
#define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
#define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
#define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
#define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
#define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
static int
os_reserve_physical(char *ptr, UWord size)
{
void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT,
ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
return 0;
return 1;
}
static void
os_unreserve_physical(char *ptr, UWord size)
{
void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
erl_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
}
static void *
os_mmap_virtual(char *ptr, UWord size)
{
void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
ERTS_MMAP_VIRTUAL_FLAGS, ERTS_MMAP_FD, 0);
if (res == (void *) MAP_FAILED)
return NULL;
return res;
}
#else
#error "Missing reserve/unreserve physical memory implementation"
#endif
#endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */
#endif /* ERTS_HAVE_OS_MMAP */
static int reserve_noop(char *ptr, UWord size)
{
#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
Uint32 *uip, *end = (Uint32 *) (ptr + size);
for (uip = (Uint32 *) ptr; uip < end; uip++)
ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef);
for (uip = (Uint32 *) ptr; uip < end; uip++)
*uip = (Uint32) 0xfeedfeed;
#endif
return 1;
}
static void unreserve_noop(char *ptr, UWord size)
{
#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
Uint32 *uip, *end = (Uint32 *) (ptr + size);
for (uip = (Uint32 *) ptr; uip < end; uip++)
*uip = (Uint32) 0xdeadbeef;
#endif
}
static UWord
alloc_desc_insert_free_seg(ErtsFreeSegMap *map, char* start, char* end)
{
char *ptr;
ErtsFreeSegMap *da_map;
ErtsFreeSegDesc *desc = alloc_desc();
if (desc) {
insert_free_seg(map, desc, start, end);
return 0;
}
/*
* Ahh; ran out of free segment descriptors.
*
* First try to map a new page...
*/
#if ERTS_HAVE_OS_MMAP
if (!mmap_state.no_os_mmap) {
ptr = os_mmap(mmap_state.desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0);
if (ptr) {
mmap_state.desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
add_free_desc_area(ptr, ptr+ERTS_PAGEALIGNED_SIZE);
desc = alloc_desc();
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, start, end);
return 0;
}
}
#endif
/*
* ...then try to find a good place in the supercarrier...
*/
da_map = &mmap_state.sua.map;
desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
if (desc) {
if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
else
desc = NULL;
}
else {
da_map = &mmap_state.sa.map;
desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
if (desc) {
if (mmap_state.reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
else
desc = NULL;
}
}
if (desc) {
char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
add_free_desc_area(desc->start, da_end);
if (da_end != desc->end)
resize_free_seg(da_map, desc, da_end, desc->end);
else {
delete_free_seg(da_map, desc);
free_desc(desc);
}
desc = alloc_desc();
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, start, end);
return 0;
}
/*
* ... and then as last resort use the first page of the
* free segment we are trying to insert for free descriptors.
*/
ptr = start + ERTS_PAGEALIGNED_SIZE;
ERTS_MMAP_ASSERT(ptr <= end);
add_free_desc_area(start, ptr);
if (ptr != end) {
desc = alloc_desc();
ERTS_MMAP_ASSERT(desc);
insert_free_seg(map, desc, ptr, end);
}
return ERTS_PAGEALIGNED_SIZE;
}
void *
erts_mmap(Uint32 flags, UWord *sizep)
{
char *seg;
UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
/* Map in premapped supercarrier */
if (mmap_state.supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
char *end;
ErtsFreeSegDesc *desc;
Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
erts_smp_mtx_lock(&mmap_state.mtx);
ERTS_MMAP_OP_START(*sizep);
if (!superaligned) {
desc = lookup_free_seg(&mmap_state.sua.map, asize);
if (desc) {
seg = desc->start;
end = seg+asize;
if (!mmap_state.reserve_physical(seg, asize))
goto supercarrier_reserve_failure;
if (desc->end == end) {
delete_free_seg(&mmap_state.sua.map, desc);
free_desc(desc);
}
else {
ERTS_MMAP_ASSERT(end < desc->end);
resize_free_seg(&mmap_state.sua.map, desc, end, desc->end);
}
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
goto supercarrier_success;
}
if (asize <= mmap_state.sua.bot - mmap_state.sa.top) {
if (!mmap_state.reserve_physical(mmap_state.sua.bot - asize,
asize))
goto supercarrier_reserve_failure;
mmap_state.sua.bot -= asize;
seg = mmap_state.sua.bot;
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
goto supercarrier_success;
}
}
asize = ERTS_SUPERALIGNED_CEILING(asize);
desc = lookup_free_seg(&mmap_state.sa.map, asize);
if (desc) {
char *start = seg = desc->start;
seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
end = seg+asize;
if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
goto supercarrier_reserve_failure;
ERTS_MMAP_SIZE_SC_SA_INC(asize);
if (desc->end == end) {
if (start != seg)
resize_free_seg(&mmap_state.sa.map, desc, start, seg);
else {
delete_free_seg(&mmap_state.sa.map, desc);
free_desc(desc);
}
}
else {
ERTS_MMAP_ASSERT(end < desc->end);
resize_free_seg(&mmap_state.sa.map, desc, end, desc->end);
if (start != seg) {
UWord ad_sz;
ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
start, seg);
start += ad_sz;
if (start != seg)
mmap_state.unreserve_physical(start, (UWord) (seg - start));
}
}
goto supercarrier_success;
}
if (superaligned) {
char *start = mmap_state.sa.top;
seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
if (asize + (seg - start) <= mmap_state.sua.bot - start) {
end = seg + asize;
if (!mmap_state.reserve_physical(start, (UWord) (end - start)))
goto supercarrier_reserve_failure;
mmap_state.sa.top = end;
ERTS_MMAP_SIZE_SC_SA_INC(asize);
if (start != seg) {
UWord ad_sz;
ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
start, seg);
start += ad_sz;
if (start != seg)
mmap_state.unreserve_physical(start, (UWord) (seg - start));
}
goto supercarrier_success;
}
desc = lookup_free_seg(&mmap_state.sua.map, asize + ERTS_SUPERALIGNED_SIZE);
if (desc) {
char *org_start = desc->start;
char *org_end = desc->end;
seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
end = seg + asize;
if (!mmap_state.reserve_physical(seg, (UWord) (org_end - seg)))
goto supercarrier_reserve_failure;
ERTS_MMAP_SIZE_SC_SUA_INC(asize);
if (org_start != seg) {
ERTS_MMAP_ASSERT(org_start < seg);
resize_free_seg(&mmap_state.sua.map, desc, org_start, seg);
desc = NULL;
}
if (end != org_end) {
UWord ad_sz = 0;
ERTS_MMAP_ASSERT(end < org_end);
if (desc)
resize_free_seg(&mmap_state.sua.map, desc, end, org_end);
else
ad_sz = alloc_desc_insert_free_seg(&mmap_state.sua.map,
end, org_end);
end += ad_sz;
if (end != org_end)
mmap_state.unreserve_physical(end,
(UWord) (org_end - end));
}
goto supercarrier_success;
}
}
ERTS_MMAP_OP_ABORT();
erts_smp_mtx_unlock(&mmap_state.mtx);
}
#if ERTS_HAVE_OS_MMAP
/* Map using OS primitives */
if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mmap_state.no_os_mmap) {
if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
seg = os_mmap(NULL, asize, 0);
if (!seg)
goto failure;
}
else {
asize = ERTS_SUPERALIGNED_CEILING(*sizep);
seg = os_mmap(NULL, asize, 1);
if (!seg)
goto failure;
if (!ERTS_IS_SUPERALIGNED(seg)) {
char *ptr;
UWord sz;
os_munmap(seg, asize);
ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1);
if (!ptr)
goto failure;
seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr);
sz = (UWord) (seg - ptr);
ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE);
if (sz)
os_munmap(ptr, sz);
sz = ERTS_SUPERALIGNED_SIZE - sz;
if (sz)
os_munmap(seg+asize, sz);
}
}
ERTS_MMAP_OP_LCK(seg, *sizep, asize);
ERTS_MMAP_SIZE_OS_INC(asize);
*sizep = asize;
return (void *) seg;
}
failure:
#endif
ERTS_MMAP_OP_LCK(NULL, *sizep, 0);
*sizep = 0;
return NULL;
supercarrier_success:
#ifdef ERTS_MMAP_DEBUG
if (ERTS_MMAPFLG_SUPERALIGNED & flags) {
ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg));
ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
}
else {
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
}
#endif
ERTS_MMAP_OP_END(seg, asize);
erts_smp_mtx_unlock(&mmap_state.mtx);
*sizep = asize;
return (void *) seg;
supercarrier_reserve_failure:
erts_smp_mtx_unlock(&mmap_state.mtx);
*sizep = 0;
return NULL;
}
void
erts_munmap(Uint32 flags, void *ptr, UWord size)
{
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
#if ERTS_HAVE_OS_MMAP
ERTS_MUNMAP_OP_LCK(ptr, size);
ERTS_MMAP_SIZE_OS_DEC(size);
os_munmap(ptr, size);
#endif
}
else {
char *start, *end;
ErtsFreeSegMap *map;
ErtsFreeSegDesc *prev, *next, *desc;
UWord ad_sz = 0;
ERTS_MMAP_ASSERT(mmap_state.supercarrier);
start = (char *) ptr;
end = start + size;
erts_smp_mtx_lock(&mmap_state.mtx);
ERTS_MUNMAP_OP(ptr, size);
if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
map = &mmap_state.sa.map;
adjacent_free_seg(map, start, end, &prev, &next);
ERTS_MMAP_SIZE_SC_SA_DEC(size);
if (end == mmap_state.sa.top) {
ERTS_MMAP_ASSERT(!next);
if (prev) {
start = prev->start;
delete_free_seg(map, prev);
free_desc(prev);
}
mmap_state.sa.top = start;
goto supercarrier_success;
}
}
else {
map = &mmap_state.sua.map;
adjacent_free_seg(map, start, end, &prev, &next);
ERTS_MMAP_SIZE_SC_SUA_DEC(size);
if (start == mmap_state.sua.bot) {
ERTS_MMAP_ASSERT(!prev);
if (next) {
end = next->end;
delete_free_seg(map, next);
free_desc(next);
}
mmap_state.sua.bot = end;
goto supercarrier_success;
}
}
desc = NULL;
if (next) {
ERTS_MMAP_ASSERT(end < next->end);
end = next->end;
if (prev) {
delete_free_seg(map, next);
free_desc(next);
goto save_prev;
}
desc = next;
} else if (prev) {
save_prev:
ERTS_MMAP_ASSERT(prev->start < start);
start = prev->start;
desc = prev;
}
if (desc)
resize_free_seg(map, desc, start, end);
else
ad_sz = alloc_desc_insert_free_seg(map, start, end);
supercarrier_success: {
UWord unres_sz;
ERTS_MMAP_ASSERT(size >= ad_sz);
unres_sz = size - ad_sz;
if (unres_sz)
mmap_state.unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
erts_smp_mtx_unlock(&mmap_state.mtx);
}
}
}
static void *
remap_move(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
{
UWord size = *sizep;
void *new_ptr = erts_mmap(flags, &size);
if (!new_ptr)
return NULL;
*sizep = size;
if (old_size < size)
size = old_size;
sys_memcpy(new_ptr, ptr, (size_t) size);
erts_munmap(flags, ptr, old_size);
return new_ptr;
}
void *
erts_mremap(Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
{
void *new_ptr;
Uint32 superaligned;
UWord asize;
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep));
if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
ERTS_MMAP_ASSERT(!mmap_state.no_os_mmap);
if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mmap_state.supercarrier) {
new_ptr = remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
old_size, sizep);
if (new_ptr)
return new_ptr;
}
if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) {
ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size);
return NULL;
}
#if ERTS_HAVE_OS_MREMAP || ERTS_HAVE_GENUINE_OS_MMAP
superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
if (superaligned) {
asize = ERTS_SUPERALIGNED_CEILING(*sizep);
if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) {
ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
*sizep = asize;
return ptr;
}
}
else {
asize = ERTS_PAGEALIGNED_CEILING(*sizep);
if (asize == old_size) {
ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
*sizep = asize;
return ptr;
}
}
#if ERTS_HAVE_GENUINE_OS_MMAP
if (asize < old_size
&& (!superaligned
|| ERTS_IS_SUPERALIGNED(ptr))) {
UWord um_sz;
new_ptr = ((char *) ptr) + asize;
ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr);
um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr);
ERTS_MMAP_SIZE_OS_DEC(um_sz);
os_munmap(new_ptr, um_sz);
ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
*sizep = asize;
return ptr;
}
#endif
#if ERTS_HAVE_OS_MREMAP
if (superaligned)
return remap_move(flags, new_ptr, old_size, sizep);
else {
new_ptr = os_mremap(ptr, old_size, asize, 0);
if (!new_ptr)
return NULL;
if (asize > old_size)
ERTS_MMAP_SIZE_OS_INC(asize - old_size);
else
ERTS_MMAP_SIZE_OS_DEC(old_size - asize);
ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize);
*sizep = asize;
return new_ptr;
}
#endif
#endif
}
else { /* In super carrier */
char *start, *end, *new_end;
ErtsFreeSegMap *map;
ErtsFreeSegDesc *prev, *next;
UWord ad_sz = 0;
ERTS_MMAP_ASSERT(mmap_state.supercarrier);
if (ERTS_MMAPFLG_OS_ONLY & flags)
return remap_move(flags, ptr, old_size, sizep);
superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
asize = (superaligned
? ERTS_SUPERALIGNED_CEILING(*sizep)
: ERTS_PAGEALIGNED_CEILING(*sizep));
erts_smp_mtx_lock(&mmap_state.mtx);
if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
? (!superaligned && lookup_free_seg(&mmap_state.sua.map, asize))
: (superaligned && lookup_free_seg(&mmap_state.sa.map, asize))) {
erts_smp_mtx_unlock(&mmap_state.mtx);
/*
* Segment currently in wrong area (due to a previous memory
* shortage), move it to the right area.
* (remap_move() will succeed)
*/
return remap_move(ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags, ptr,
old_size, sizep);
}
ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
if (asize == old_size) {
new_ptr = ptr;
goto supercarrier_resize_success;
}
start = (char *) ptr;
end = start + old_size;
new_end = start+asize;
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
if (asize < old_size) {
UWord unres_sz;
new_ptr = ptr;
if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
map = &mmap_state.sua.map;
ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
}
else {
if (end == mmap_state.sa.top) {
mmap_state.sa.top = new_end;
mmap_state.unreserve_physical(((char *) ptr) + asize,
old_size - asize);
goto supercarrier_resize_success;
}
ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
map = &mmap_state.sa.map;
}
adjacent_free_seg(map, start, end, &prev, &next);
if (next)
resize_free_seg(map, next, new_end, next->end);
else
ad_sz = alloc_desc_insert_free_seg(map, new_end, end);
ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
unres_sz = old_size - asize - ad_sz;
if (unres_sz)
mmap_state.unreserve_physical(((char *) ptr) + asize + ad_sz,
unres_sz);
goto supercarrier_resize_success;
}
if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
adjacent_free_seg(&mmap_state.sua.map, start, end, &prev, &next);
if (next && new_end <= next->end) {
if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
asize - old_size))
goto supercarrier_reserve_failure;
if (new_end < next->end)
resize_free_seg(&mmap_state.sua.map, next, new_end, next->end);
else {
delete_free_seg(&mmap_state.sua.map, next);
free_desc(next);
}
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
goto supercarrier_resize_success;
}
}
else { /* Superaligned area */
if (end == mmap_state.sa.top) {
if (new_end <= mmap_state.sua.bot) {
if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
asize - old_size))
goto supercarrier_reserve_failure;
mmap_state.sa.top = new_end;
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
goto supercarrier_resize_success;
}
}
else {
adjacent_free_seg(&mmap_state.sa.map, start, end, &prev, &next);
if (next && new_end <= next->end) {
if (!mmap_state.reserve_physical(((char *) ptr) + old_size,
asize - old_size))
goto supercarrier_reserve_failure;
if (new_end < next->end)
resize_free_seg(&mmap_state.sa.map, next, new_end, next->end);
else {
delete_free_seg(&mmap_state.sa.map, next);
free_desc(next);
}
new_ptr = ptr;
ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
goto supercarrier_resize_success;
}
}
}
ERTS_MMAP_OP_ABORT();
erts_smp_mtx_unlock(&mmap_state.mtx);
/* Failed to resize... */
}
return remap_move(flags, ptr, old_size, sizep);
supercarrier_resize_success:
#ifdef ERTS_MMAP_DEBUG
if ((ERTS_MMAPFLG_SUPERALIGNED & flags)
|| ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) {
ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr));
ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
}
else {
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr));
ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
}
#endif
ERTS_MREMAP_OP_END(new_ptr, asize);
erts_smp_mtx_unlock(&mmap_state.mtx);
*sizep = asize;
return new_ptr;
supercarrier_reserve_failure:
ERTS_MREMAP_OP_END(NULL, old_size);
erts_smp_mtx_unlock(&mmap_state.mtx);
*sizep = old_size;
return NULL;
}
int erts_mmap_in_supercarrier(void *ptr)
{
return ERTS_MMAP_IN_SUPERCARRIER(ptr);
}
static struct {
Eterm total;
Eterm total_sa;
Eterm total_sua;
Eterm used;
Eterm used_sa;
Eterm used_sua;
Eterm max;
Eterm allocated;
Eterm reserved;
Eterm sizes;
Eterm free_segs;
Eterm supercarrier;
Eterm os;
Eterm scs;
Eterm sco;
Eterm scrpm;
Eterm scrfsd;
int is_initialized;
erts_mtx_t init_mutex;
}am;
static void ERTS_INLINE atom_init(Eterm *atom, char *name)
{
*atom = am_atom_put(name, strlen(name));
}
#define AM_INIT(AM) atom_init(&am.AM, #AM)
static void init_atoms(void)
{
erts_mtx_lock(&am.init_mutex);
if (!am.is_initialized) {
AM_INIT(total);
AM_INIT(total_sa);
AM_INIT(total_sua);
AM_INIT(used);
AM_INIT(used_sa);
AM_INIT(used_sua);
AM_INIT(max);
AM_INIT(allocated);
AM_INIT(reserved);
AM_INIT(sizes);
AM_INIT(free_segs);
AM_INIT(supercarrier);
AM_INIT(os);
AM_INIT(scs);
AM_INIT(sco);
AM_INIT(scrpm);
AM_INIT(scrfsd);
am.is_initialized = 1;
}
erts_mtx_unlock(&am.init_mutex);
};
#ifdef HARD_DEBUG_MSEG
static void hard_dbg_mseg_init(void);
#endif
void
erts_mmap_init(ErtsMMapInit *init)
{
int virtual_map = 0;
char *start = NULL, *end = NULL;
UWord pagesize;
#if defined(__WIN32__)
SYSTEM_INFO sysinfo;
GetSystemInfo(&sysinfo);
pagesize = (UWord) sysinfo.dwPageSize;
#elif defined(_SC_PAGESIZE)
pagesize = (UWord) sysconf(_SC_PAGESIZE);
#elif defined(HAVE_GETPAGESIZE)
pagesize = (UWord) getpagesize();
#else
# error "Do not know how to get page size"
#endif
#if defined(HARD_DEBUG) || 0
erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs);
erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco);
erts_fprintf(stderr, "erts_mmap: scrfsd = %i\n", init->scrfsd);
#endif
erts_page_inv_mask = pagesize - 1;
if (pagesize & erts_page_inv_mask)
erl_exit(-1, "erts_mmap: Invalid pagesize: %bpu\n",
pagesize);
ERTS_MMAP_OP_RINGBUF_INIT();
mmap_state.supercarrier = 0;
mmap_state.reserve_physical = reserve_noop;
mmap_state.unreserve_physical = unreserve_noop;
#if HAVE_MMAP && !defined(MAP_ANON)
mmap_state.mmap_fd = open("/dev/zero", O_RDWR);
if (mmap_state.mmap_fd < 0)
erl_exit(-1, "erts_mmap: Failed to open /dev/zero\n");
#endif
erts_smp_mtx_init(&mmap_state.mtx, "erts_mmap");
erts_mtx_init(&am.init_mutex, "mmap_init_atoms");
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (init->virtual_range.start) {
char *ptr;
UWord sz;
ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
sz = end - ptr;
start = os_mmap_virtual(ptr, sz);
if (!start || start > ptr || start >= end)
erl_exit(-1,
"erts_mmap: Failed to create virtual range for super carrier\n");
sz = start - ptr;
if (sz)
os_munmap(end, sz);
mmap_state.reserve_physical = os_reserve_physical;
mmap_state.unreserve_physical = os_unreserve_physical;
virtual_map = 1;
}
else
#endif
if (init->predefined_area.start) {
start = init->predefined_area.start;
end = init->predefined_area.end;
if (end != (void *) 0 && end < start)
end = start;
}
#if ERTS_HAVE_OS_MMAP
else if (init->scs) {
UWord sz;
sz = ERTS_PAGEALIGNED_CEILING(init->scs);
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (!init->scrpm) {
start = os_mmap_virtual(NULL, sz);
mmap_state.reserve_physical = os_reserve_physical;
mmap_state.unreserve_physical = os_unreserve_physical;
virtual_map = 1;
}
else
#endif
{
/*
* The whole supercarrier will by physically
* reserved all the time.
*/
start = os_mmap(NULL, sz, 1);
}
if (!start)
erl_exit(-1,
"erts_mmap: Failed to create super carrier of size %bpu MB\n",
init->scs/1024/1024);
end = start + sz;
#ifdef ERTS_MMAP_DEBUG_FILL_AREAS
if (!virtual_map) {
Uint32 *uip;
for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++)
*uip = (Uint32) 0xdeadbeef;
}
#endif
}
#endif
mmap_state.no.free_seg_descs = 0;
mmap_state.no.free_segs.curr = 0;
mmap_state.no.free_segs.max = 0;
mmap_state.size.supercarrier.total = 0;
mmap_state.size.supercarrier.used.total = 0;
mmap_state.size.supercarrier.used.sa = 0;
mmap_state.size.supercarrier.used.sua = 0;
mmap_state.size.os.used = 0;
mmap_state.desc.new_area_hint = NULL;
if (!start) {
mmap_state.sa.bot = NULL;
mmap_state.sua.top = NULL;
mmap_state.sa.bot = NULL;
mmap_state.sua.top = NULL;
mmap_state.no_os_mmap = 0;
mmap_state.supercarrier = 0;
}
else {
size_t desc_size;
mmap_state.no_os_mmap = init->sco;
desc_size = init->scrfsd;
if (desc_size < 100)
desc_size = 100;
desc_size *= sizeof(ErtsFreeSegDesc);
if ((desc_size
+ ERTS_SUPERALIGNED_SIZE
+ ERTS_PAGEALIGNED_SIZE) > end - start)
erl_exit(-1, "erts_mmap: No space for segments in super carrier\n");
mmap_state.sa.bot = start;
mmap_state.sa.bot += desc_size;
mmap_state.sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mmap_state.sa.bot);
mmap_state.sa.top = mmap_state.sa.bot;
mmap_state.sua.top = end;
mmap_state.sua.bot = mmap_state.sua.top;
mmap_state.size.supercarrier.used.total += (UWord) (mmap_state.sa.bot - start);
mmap_state.desc.free_list = NULL;
mmap_state.desc.reserved = 0;
if (end == (void *) 0) {
/*
* Very unlikely, but we need a guarantee
* that `mmap_state.sua.top` always will
* compare as larger than all segment pointers
* into the super carrier...
*/
mmap_state.sua.top -= ERTS_PAGEALIGNED_SIZE;
mmap_state.size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (!virtual_map || os_reserve_physical(mmap_state.sua.top, ERTS_PAGEALIGNED_SIZE))
#endif
add_free_desc_area(mmap_state.sua.top, end);
mmap_state.desc.reserved += (end - mmap_state.sua.top) / sizeof(ErtsFreeSegDesc);
}
mmap_state.size.supercarrier.total = (UWord) (mmap_state.sua.top - start);
/*
* Area before (and after) super carrier
* will be used for free segment descritors.
*/
#ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
if (virtual_map && !os_reserve_physical(start, mmap_state.sa.bot - start))
erl_exit(-1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
#endif
mmap_state.desc.unused_start = start;
mmap_state.desc.unused_end = mmap_state.sa.bot;
mmap_state.desc.reserved += ((mmap_state.desc.unused_end - start)
/ sizeof(ErtsFreeSegDesc));
init_free_seg_map(&mmap_state.sa.map, SA_SZ_ADDR_ORDER);
init_free_seg_map(&mmap_state.sua.map, SZ_REVERSE_ADDR_ORDER);
mmap_state.supercarrier = 1;
mmap_state.desc.new_area_hint = end;
}
#if !ERTS_HAVE_OS_MMAP
mmap_state.no_os_mmap = 1;
#endif
#ifdef HARD_DEBUG_MSEG
hard_dbg_mseg_init();
#endif
}
static ERTS_INLINE void
add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
{
*lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
}
Eterm erts_mmap_info(int *print_to_p,
void *print_to_arg,
Eterm** hpp, Uint* szp,
struct erts_mmap_info_struct* emis)
{
Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua };
Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
Eterm group[2];
Eterm group_tags[] = { am.sizes, am.free_segs };
Eterm list[2];
Eterm list_tags[2]; /* { am.supercarrier, am.os } */
int lix;
Eterm res = THE_NON_VALUE;
if (!hpp) {
erts_smp_mtx_lock(&mmap_state.mtx);
emis->sizes[0] = mmap_state.size.supercarrier.total;
emis->sizes[1] = mmap_state.sa.top - mmap_state.sa.bot;
emis->sizes[2] = mmap_state.sua.top - mmap_state.sua.bot;
emis->sizes[3] = mmap_state.size.supercarrier.used.total;
emis->sizes[4] = mmap_state.size.supercarrier.used.sa;
emis->sizes[5] = mmap_state.size.supercarrier.used.sua;
emis->segs[0] = mmap_state.no.free_segs.curr;
emis->segs[1] = mmap_state.no.free_segs.max;
emis->segs[2] = mmap_state.no.free_seg_descs;
emis->segs[3] = mmap_state.desc.reserved;
emis->segs[4] = mmap_state.sa.map.nseg;
emis->segs[5] = mmap_state.sua.map.nseg;
emis->os_used = mmap_state.size.os.used;
erts_smp_mtx_unlock(&mmap_state.mtx);
}
if (print_to_p) {
int to = *print_to_p;
void *arg = print_to_arg;
if (mmap_state.supercarrier) {
const char* prefix = "supercarrier ";
erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]);
erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]);
erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]);
erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]);
erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]);
erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]);
erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]);
erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]);
erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
}
if (!mmap_state.no_os_mmap) {
erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
}
}
if (hpp || szp) {
if (!am.is_initialized) {
init_atoms();
}
lix = 0;
if (mmap_state.supercarrier) {
group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
sizeof(size_tags)/sizeof(Eterm),
size_tags, emis->sizes);
group[1] = erts_bld_atom_uword_2tup_list(hpp, szp,
sizeof(seg_tags)/sizeof(Eterm),
seg_tags, emis->segs);
list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group);
list_tags[lix] = am.supercarrier;
lix++;
}
if (!mmap_state.no_os_mmap) {
group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
1, &am.used, &emis->os_used);
list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
list_tags[lix] = am.os;
lix++;
}
res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list);
}
return res;
}
Eterm erts_mmap_info_options(char *prefix,
int *print_to_p,
void *print_to_arg,
Uint **hpp,
Uint *szp)
{
const UWord scs = mmap_state.sua.top - mmap_state.sa.bot;
const Eterm sco = mmap_state.no_os_mmap ? am_true : am_false;
const Eterm scrpm = (mmap_state.reserve_physical == reserve_noop) ? am_true : am_false;
Eterm res = THE_NON_VALUE;
if (print_to_p) {
int to = *print_to_p;
void *arg = print_to_arg;
erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
if (mmap_state.supercarrier) {
erts_print(to, arg, "%ssco: %T\n", prefix, sco);
erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mmap_state.desc.reserved);
}
}
if (hpp || szp) {
if (!am.is_initialized) {
init_atoms();
}
res = NIL;
if (mmap_state.supercarrier) {
add_2tup(hpp, szp, &res, am.scrfsd,
erts_bld_uint(hpp,szp, mmap_state.desc.reserved));
add_2tup(hpp, szp, &res, am.scrpm, scrpm);
add_2tup(hpp, szp, &res, am.sco, sco);
}
add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs));
}
return res;
}
Eterm erts_mmap_debug_info(Process* p)
{
if (mmap_state.supercarrier) {
ERTS_DECL_AM(sabot);
ERTS_DECL_AM(satop);
ERTS_DECL_AM(suabot);
ERTS_DECL_AM(suatop);
Eterm sa_list, sua_list, list;
Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop };
UWord values[4];
Eterm *hp, *hp_end;
Uint may_need;
erts_smp_mtx_lock(&mmap_state.mtx);
values[0] = (UWord)mmap_state.sa.bot;
values[1] = (UWord)mmap_state.sa.top;
values[2] = (UWord)mmap_state.sua.bot;
values[3] = (UWord)mmap_state.sua.top;
sa_list = build_free_seg_list(p, &mmap_state.sa.map);
sua_list = build_free_seg_list(p, &mmap_state.sua.map);
erts_smp_mtx_unlock(&mmap_state.mtx);
may_need = 4*(2+3+2) + 2*(2+3);
hp = HAlloc(p, may_need);
hp_end = hp + may_need;
list = erts_bld_atom_uword_2tup_list(&hp, NULL,
sizeof(values)/sizeof(*values),
tags, values);
sa_list = TUPLE2(hp, am_atom_put("sa_free_segs",12), sa_list); hp+=3;
sua_list = TUPLE2(hp, am_atom_put("sua_free_segs",13), sua_list); hp+=3;
list = CONS(hp, sua_list, list); hp+=2;
list = CONS(hp, sa_list, list); hp+=2;
ASSERT(hp <= hp_end);
HRelease(p, hp_end, hp);
return list;
}
else {
return am_undefined;
}
}
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
* Debug functions *
\* */
#ifdef HARD_DEBUG
static int rbt_assert_is_member(RBTNode* root, RBTNode* node)
{
while (node != root) {
RBT_ASSERT(parent(node));
RBT_ASSERT(parent(node)->left == node || parent(node)->right == node);
node = parent(node);
}
return 1;
}
#if 0
# define PRINT_TREE
#else
# undef PRINT_TREE
#endif
#ifdef PRINT_TREE
static void print_tree(enum SortOrder order, RBTNode*);
#endif
/*
* Checks that the order between parent and children are correct,
* and that the Red-Black Tree properies are satisfied. if size > 0,
* check_tree() returns the node that satisfies "address order first fit"
*
* The Red-Black Tree properies are:
* 1. Every node is either red or black.
* 2. Every leaf (NIL) is black.
* 3. If a node is red, then both its children are black.
* 4. Every simple path from a node to a descendant leaf
* contains the same number of black nodes.
*
*/
struct check_arg_t {
RBTree* tree;
ErtsFreeSegDesc* prev_seg;
Uint size;
RBTNode *res;
};
static void check_node_callback(RBTNode* x, void* arg);
static RBTNode *
check_tree(RBTree* tree, Uint size)
{
struct check_arg_t carg;
carg.tree = tree;
carg.prev_seg = NULL;
carg.size = size;
carg.res = NULL;
#ifdef PRINT_TREE
print_tree(tree->order, tree->root);
#endif
if (!tree->root)
return NULL;
RBT_ASSERT(IS_BLACK(tree->root));
RBT_ASSERT(!parent(tree->root));
rbt_foreach_node(tree, check_node_callback, &carg, 0);
return carg.res;
}
static void check_node_callback(RBTNode* x, void* arg)
{
struct check_arg_t* a = (struct check_arg_t*) arg;
ErtsFreeSegDesc* seg;
if (IS_RED(x)) {
RBT_ASSERT(IS_BLACK(x->right));
RBT_ASSERT(IS_BLACK(x->left));
}
RBT_ASSERT(parent(x) || x == a->tree->root);
if (x->left) {
RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0);
}
if (x->right) {
RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0);
}
seg = node_to_desc(a->tree->order, x);
RBT_ASSERT(seg->start < seg->end);
if (a->size && (seg->end - seg->start) >= a->size) {
if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) {
a->res = x;
}
}
if (a->tree->order == ADDR_ORDER) {
RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start);
a->prev_seg = seg;
}
}
#endif /* HARD_DEBUG */
#ifdef PRINT_TREE
#define INDENT_STEP 2
#include <stdio.h>
static void
print_tree_aux(enum SortOrder order, RBTNode *x, int indent)
{
int i;
if (x) {
ErtsFreeSegDesc* desc = node_to_desc(order, x);
print_tree_aux(order, x->right, indent + INDENT_STEP);
for (i = 0; i < indent; i++) {
putc(' ', stderr);
}
fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n",
IS_BLACK(x) ? "BLACK" : "RED",
desc->end - desc->start, desc->start, desc->end, desc);
print_tree_aux(order, x->left, indent + INDENT_STEP);
}
}
static void
print_tree(enum SortOrder order, RBTNode* root)
{
fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]);
print_tree_aux(order, root, 0);
fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]);
}
#endif /* PRINT_TREE */
#ifdef FREE_SEG_API_SMOKE_TEST
void test_it(void)
{
ErtsFreeSegMap map;
ErtsFreeSegDesc *desc, *under, *over, *d1, *d2;
const int i = 1; /* reverse addr order */
{
init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER);
insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
desc = lookup_free_seg(&map, 0x500);
ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L));
desc = lookup_free_seg(&map, 0x1500);
ERTS_ASSERT(desc->start == (char*)0x15000);
adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over);
ERTS_ASSERT(!under && !over);
adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over);
ERTS_ASSERT(!under && over->start == (char*)0x8000);
adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over);
ERTS_ASSERT(under->end == (char*)0x10000 && !over);
adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over);
ERTS_ASSERT(!under && !over);
adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over);
ERTS_ASSERT(!under && over && over->start == (char*)0x11000);
adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over);
ERTS_ASSERT(under && under->end == (char*)0x12000 && !over);
adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over);
ERTS_ASSERT(under && under->end == (char*)0x12000 &&
over && over->start == (char*)0x13000);
adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over);
ERTS_ASSERT(!under && over && over->start == (char*)0x13000);
d1 = lookup_free_seg(&map, 0x500);
ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L));
resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
d2 = lookup_free_seg(&map, 0x1200);
ERTS_ASSERT(d2 == d1);
delete_free_seg(&map, d1);
HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
d1 = lookup_free_seg(&map, 0x1200);
ERTS_ASSERT(d1->start == (char*)0x15000);
}
}
#endif /* FREE_SEG_API_SMOKE_TEST */
#ifdef HARD_DEBUG_MSEG
/*
* Debug stuff used by erl_mseg to check that it does the right thing.
* The reason for keeping it here is that we (ab)use the rb-tree code
* for keeping track of *allocated* segments.
*/
typedef struct ErtsFreeSegDesc_fake_ {
/*RBTNode snode; Save memory by skipping unused size tree node */
RBTNode anode; /* node in 'atree' */
union {
char* start;
struct ErtsFreeSegDesc_fake_* next_free;
}u;
char* end;
}ErtsFreeSegDesc_fake;
static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000];
static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first;
RBTree hard_dbg_mseg_tree;
static erts_mtx_t hard_dbg_mseg_mtx;
static void hard_dbg_mseg_init(void)
{
ErtsFreeSegDesc_fake* p;
erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg");
hard_dbg_mseg_tree.root = NULL;
hard_dbg_mseg_tree.order = ADDR_ORDER;
p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) /
sizeof(*hard_dbg_mseg_desc_pool)) - 1];
p->u.next_free = NULL;
while (--p >= hard_dbg_mseg_desc_pool) {
p->u.next_free = (p+1);
}
hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0];
}
static ErtsFreeSegDesc* hard_dbg_alloc_desc(void)
{
ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first;
ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors");
hard_dbg_mseg_desc_first = p->u.next_free;
/* Creative pointer arithmetic to return something that looks like
* a ErtsFreeSegDesc as long as we don't use the absent 'snode'.
*/
return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode));
}
static void hard_dbg_free_desc(ErtsFreeSegDesc* desc)
{
ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode;
memset(p, 0xfe, sizeof(*p));
p->u.next_free = hard_dbg_mseg_desc_first;
hard_dbg_mseg_desc_first = p;
}
static void check_seg_writable(void* seg, UWord sz)
{
UWord* seg_end = (UWord*)((char*)seg + sz);
volatile UWord* p;
ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg));
ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz));
for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) {
UWord write_back = *p;
*p = 0xfade2b1acc;
*p = write_back;
}
}
void hard_dbg_insert_mseg(void* seg, UWord sz)
{
check_seg_writable(seg, sz);
erts_mtx_lock(&hard_dbg_mseg_mtx);
{
ErtsFreeSegDesc *desc = hard_dbg_alloc_desc();
RBTNode *prev, *next;
desc->start = (char*)seg;
desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */
rbt_insert(&hard_dbg_mseg_tree, &desc->anode);
prev = rbt_prev_node(&desc->anode);
next = rbt_next_node(&desc->anode);
ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start);
ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end);
}
erts_mtx_unlock(&hard_dbg_mseg_mtx);
}
static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start)
{
RBTNode* x = tree->root;
while (x) {
ErtsFreeSegDesc* desc = anode_to_desc(x);
if (start < desc->start) {
x = x->left;
}
else if (start > desc->start) {
ERTS_ASSERT(start > desc->end);
x = x->right;
}
else
return desc;
}
return NULL;
}
void hard_dbg_remove_mseg(void* seg, UWord sz)
{
check_seg_writable(seg, sz);
erts_mtx_lock(&hard_dbg_mseg_mtx);
{
ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg);
ERTS_ASSERT(desc);
ERTS_ASSERT(desc->start == (char*)seg);
ERTS_ASSERT(desc->end == (char*)seg + sz - 1);
rbt_delete(&hard_dbg_mseg_tree, &desc->anode);
hard_dbg_free_desc(desc);
}
erts_mtx_unlock(&hard_dbg_mseg_mtx);
}
#endif /* HARD_DEBUG_MSEG */