Optimize memory allocation

A number of memory allocation optimizations have been implemented. Most optimizations reduce contention caused by synchronization between threads during allocation and deallocation of memory. Most notably: * Synchronization of memory management in scheduler specific allocator instances has been rewritten to use lock-free synchronization. * Synchronization of memory management in scheduler specific pre-allocators has been rewritten to use lock-free synchronization. * The 'mseg_alloc' memory segment allocator now use scheduler specific instances instead of one instance. Apart from reducing contention this also ensures that memory allocators always create memory segments on the local NUMA node on a NUMA system.
author: Rickard Green <[email protected]> 2010-09-15 22:14:51 +0200
committer: Rickard Green <[email protected]> 2011-11-13 20:39:30 +0100
commit: a67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree: 07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator/beam/erl_alloc.h
parent: 55358c54778ead444e51f565d00175ba887ef182 (diff)
download: otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip
1 files changed, 47 insertions, 139 deletions
diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h
index 80cb82c393..f4133cdb1a 100644
--- a/erts/emulator/beam/erl_alloc.h
+++ b/erts/emulator/beam/erl_alloc.h
@@ -43,43 +43,40 @@
 #  define ERTS_ALC_INLINE
 #endif
 
-#define ERTS_FIX_CORE_ALLOCATOR ERTS_ALC_A_LONG_LIVED
-extern ErtsAlcType_t erts_fix_core_allocator_ix;
-
-typedef struct {
-    Uint total;
-    Uint used;
-} ErtsFixInfo;
+#define ERTS_ALC_NO_FIXED_SIZES \
+  (ERTS_ALC_N_MAX_A_FIXED_SIZE - ERTS_ALC_N_MIN_A_FIXED_SIZE + 1)
 
 void erts_sys_alloc_init(void);
 void *erts_sys_alloc(ErtsAlcType_t, void *, Uint);
 void *erts_sys_realloc(ErtsAlcType_t, void *, void *, Uint);
 void erts_sys_free(ErtsAlcType_t, void *, void *);
 
-
-void erts_init_fix_alloc(Uint, void *(*)(Uint));
-Uint erts_get_fix_size(ErtsAlcType_t);
-void erts_set_fix_size(ErtsAlcType_t, Uint);
-void erts_fix_info(ErtsAlcType_t, ErtsFixInfo *);
-void *erts_fix_alloc(ErtsAlcType_t, void *, Uint);
-void *erts_fix_realloc(ErtsAlcType_t, void *, void*, Uint);
-void erts_fix_free(ErtsAlcType_t, void *, void*);
-
-
 Eterm erts_memory(int *, void *, void *, Eterm);
 Eterm erts_allocated_areas(int *, void *, void *);
 
 Eterm erts_alloc_util_allocators(void *proc);
 void erts_allocator_info(int, void *);
-Eterm erts_allocator_info_term(void *proc, Eterm which_alloc, int only_sz);
 Eterm erts_allocator_options(void *proc);
 
+struct process;
+
+int erts_request_alloc_info(struct process *c_p, Eterm ref, Eterm allocs,
+			    int only_sz);
+
 #define ERTS_ALLOC_INIT_DEF_OPTS_INITER {0}
 typedef struct {
-    int dummy;
+    int ncpu;
 } ErtsAllocInitOpts;
 
+typedef struct {
+    Allctr_t *deallctr[ERTS_ALC_A_MAX+1];
+    int pref_ix[ERTS_ALC_A_MAX+1];
+    int flist_ix[ERTS_ALC_A_MAX+1];
+    int pre_alc_ix;
+} ErtsSchedAllocData;
+
 void erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop);
+void erts_alloc_late_init(void);
 
 #if defined(GET_ERTS_ALC_TEST) || defined(ERTS_ALC_INTERNAL__)
 /* Only for testing */
@@ -126,15 +123,19 @@ extern ErtsAllocatorInfo_t erts_allctrs_info[ERTS_ALC_A_MAX+1];
 
 typedef struct {
     int enabled;
-    int all_thr_safe;
+    int dd;
+    int aix;
     int size;
     Allctr_t **allctr;
 } ErtsAllocatorThrSpec_t;
 
 extern ErtsAllocatorThrSpec_t erts_allctr_thr_spec[ERTS_ALC_A_MAX+1];
 
-int erts_alc_get_thr_ix(void);
-void erts_alloc_reg_scheduler_id(Uint id);
+void erts_alloc_register_scheduler(void *vesdp);
+void erts_alloc_scheduler_handle_delayed_dealloc(void *vesdp,
+						 int *need_thr_progress,
+						 int *more_work);
+erts_aint32_t erts_alloc_fix_alloc_shrink(int ix, erts_aint32_t flgs);
 
 __decl_noreturn void erts_alloc_enomem(ErtsAlcType_t,Uint)		
      __noreturn;
@@ -252,6 +253,8 @@ void *erts_realloc_fnf(ErtsAlcType_t type, void *ptr, Uint size)
 
 #endif /* #if ERTS_ALC_DO_INLINE || defined(ERTS_ALC_INTERNAL__) */
 
+#define ERTS_ALC_GET_THR_IX() ((int) erts_get_scheduler_id())
+
 typedef void (*erts_alloc_verify_func_t)(Allctr_t *);
 
 erts_alloc_verify_func_t
@@ -436,136 +439,41 @@ NAME##_free(TYPE *p)							\
     }									\
 }
 
-typedef struct {
-    void *start;
-    void *end;
-    int chunks_mem_size;
-} erts_sched_pref_quick_alloc_data_t;
-
-#ifdef DEBUG
-#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)				\
-do {									\
-    ASSERT((void *) (C) < (void *) (P));				\
-    ASSERT((void *) (P)							\
-	   < (void *) (((char *) (C)) + (A)->chunks_mem_size));		\
-} while (0)
-#else
-#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)
-#endif
+#include "erl_sched_spec_pre_alloc.h"
 
 #define ERTS_SCHED_PREF_PRE_ALLOC_IMPL(NAME, TYPE, PASZ)		\
-union erts_qa_##NAME##__ {						\
+union erts_sspa_##NAME##__ {						\
+    erts_sspa_blk_t next;						\
     TYPE type;								\
-    union erts_qa_##NAME##__ *next;					\
 };									\
-typedef struct {							\
-    erts_smp_spinlock_t lock;						\
-    union erts_qa_##NAME##__ *freelist;					\
-    union erts_qa_##NAME##__ pre_alloced[1];				\
-} erts_qa_##NAME##_chunk__;						\
-static erts_sched_pref_quick_alloc_data_t *qa_data_##NAME##__;		\
-static ERTS_INLINE erts_qa_##NAME##_chunk__ *				\
-get_##NAME##_chunk_ix(int cix)						\
-{									\
-    char *ptr = (char *) qa_data_##NAME##__->start;			\
-    ptr += cix*qa_data_##NAME##__->chunks_mem_size;			\
-    return (erts_qa_##NAME##_chunk__ *) ptr;				\
-}									\
-static ERTS_INLINE erts_qa_##NAME##_chunk__ *				\
-get_##NAME##_chunk_ptr(void *ptr)					\
-{									\
-    int cix;								\
-    size_t diff;							\
-    if (ptr < qa_data_##NAME##__->start || qa_data_##NAME##__->end <= ptr)\
-	return NULL;							\
-    diff = ((char *) ptr) -  ((char *) qa_data_##NAME##__->start);	\
-    cix = diff / qa_data_##NAME##__->chunks_mem_size;			\
-    return get_##NAME##_chunk_ix(cix);					\
-}									\
+									\
+static erts_sspa_data_t *sspa_data_##NAME##__;				\
+									\
 static void								\
 init_##NAME##_alloc(void)						\
 {									\
-    size_t tot_size;							\
-    size_t chunk_mem_size;						\
-    char *chunk_start;							\
-    int cix;								\
-    int no_blocks = ERTS_PRE_ALLOC_SIZE((PASZ));			\
-    int no_blocks_per_chunk = 2*((no_blocks-1)/erts_no_schedulers + 1);	\
-    no_blocks = no_blocks_per_chunk * erts_no_schedulers;		\
-    chunk_mem_size = sizeof(erts_qa_##NAME##_chunk__);			\
-    chunk_mem_size += (sizeof(union erts_qa_##NAME##__)			\
-		       * (no_blocks_per_chunk - 1));			\
-    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);	\
-    tot_size = sizeof(erts_sched_pref_quick_alloc_data_t);		\
-    tot_size += ERTS_CACHE_LINE_SIZE - 1;				\
-    tot_size += chunk_mem_size*erts_no_schedulers;			\
-    qa_data_##NAME##__ = erts_alloc(ERTS_ALC_T_PRE_ALLOC_DATA,tot_size);\
-    chunk_start = (((char *) qa_data_##NAME##__)			\
-		   + sizeof(erts_sched_pref_quick_alloc_data_t));	\
-    if ((((UWord) chunk_start) & ERTS_CACHE_LINE_MASK) != ((UWord) 0))	\
-	chunk_start = ((char *)						\
-		       ((((UWord) chunk_start) & ~ERTS_CACHE_LINE_MASK)	\
-		       + ERTS_CACHE_LINE_SIZE));			\
-    qa_data_##NAME##__->chunks_mem_size = chunk_mem_size;		\
-    qa_data_##NAME##__->start = (void *) chunk_start;			\
-    qa_data_##NAME##__->end = (chunk_start				\
-			       + chunk_mem_size*erts_no_schedulers);	\
-    for (cix = 0; cix < erts_no_schedulers; cix++) {			\
-	int i;								\
-	erts_qa_##NAME##_chunk__ *chunk = get_##NAME##_chunk_ix(cix);	\
-	erts_smp_spinlock_init(&chunk->lock, #NAME "_alloc_lock");	\
-	chunk->freelist = &chunk->pre_alloced[0];			\
-	for (i = 1; i < no_blocks_per_chunk; i++) {			\
-	    ERTS_PRE_ALLOC_CLOBBER(&chunk->pre_alloced[i-1],		\
-				   union erts_qa_##NAME##__);		\
-	    chunk->pre_alloced[i-1].next = &chunk->pre_alloced[i];	\
-	}								\
-	ERTS_PRE_ALLOC_CLOBBER(&chunk->pre_alloced[no_blocks_per_chunk-1],\
-			       union erts_qa_##NAME##__);		\
-	chunk->pre_alloced[no_blocks_per_chunk-1].next = NULL;		\
-    }									\
+    sspa_data_##NAME##__ =						\
+	erts_sspa_create(sizeof(union erts_sspa_##NAME##__),		\
+			 ERTS_PRE_ALLOC_SIZE((PASZ)));			\
 }									\
-static ERTS_INLINE TYPE *						\
+									\
+static TYPE *								\
 NAME##_alloc(void)							\
 {									\
-    int cix = ((int) erts_get_scheduler_id()) - 1;			\
-    TYPE *res;								\
-    if (cix < 0)							\
-	res = NULL;							\
-    else {								\
-	erts_qa_##NAME##_chunk__ *chunk = get_##NAME##_chunk_ix(cix);	\
-	erts_smp_spin_lock(&chunk->lock);				\
-	if (!chunk->freelist)						\
-	    res = NULL;							\
-	else {								\
-	    res = &chunk->freelist->type;				\
-	    chunk->freelist = chunk->freelist->next;			\
-	    ERTS_SPPA_DBG_CHK_IN_CHNK(qa_data_##NAME##__, chunk, res);	\
-	}								\
-	erts_smp_spin_unlock(&chunk->lock);				\
-    }									\
-    return res;								\
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();		\
+    if (!esdp)								\
+	return NULL;							\
+    return (TYPE *) erts_sspa_alloc(sspa_data_##NAME##__,		\
+				    (int) esdp->no - 1);		\
 }									\
-static ERTS_INLINE int							\
+									\
+static int								\
 NAME##_free(TYPE *p)							\
 {									\
-    erts_qa_##NAME##_chunk__ *chunk;					\
-    chunk = get_##NAME##_chunk_ptr((void *) p);				\
-    if (!chunk)								\
-	return 0;							\
-    else {								\
-	union erts_qa_##NAME##__ *up;					\
-	ERTS_SPPA_DBG_CHK_IN_CHNK(qa_data_##NAME##__, chunk, p);	\
-	up = ((union erts_qa_##NAME##__ *)				\
-	      (((char *) p)						\
-	       - ((char *) &((union erts_qa_##NAME##__ *) 0)->type)));	\
-	erts_smp_spin_lock(&chunk->lock);				\
-	ERTS_PRE_ALLOC_CLOBBER(up, union erts_qa_##NAME##__);		\
-	up->next = chunk->freelist;					\
-	chunk->freelist = up;						\
-	erts_smp_spin_unlock(&chunk->lock);				\
-	return 1;							\
-    }									\
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();		\
+    return erts_sspa_free(sspa_data_##NAME##__,				\
+			  esdp ? (int) esdp->no - 1 : -1,		\
+			  (char *) p);					\
 }
 
 #ifdef DEBUG
author	Rickard Green <[email protected]>	2010-09-15 22:14:51 +0200
committer	Rickard Green <[email protected]>	2011-11-13 20:39:30 +0100
commit	a67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree	07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator/beam/erl_alloc.h
parent	55358c54778ead444e51f565d00175ba887ef182 (diff)
download	otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2 otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip