aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam/erl_sched_spec_pre_alloc.h
diff options
context:
space:
mode:
authorRickard Green <[email protected]>2010-09-15 22:14:51 +0200
committerRickard Green <[email protected]>2011-11-13 20:39:30 +0100
commita67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator/beam/erl_sched_spec_pre_alloc.h
parent55358c54778ead444e51f565d00175ba887ef182 (diff)
downloadotp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip
Optimize memory allocation
A number of memory allocation optimizations have been implemented. Most optimizations reduce contention caused by synchronization between threads during allocation and deallocation of memory. Most notably: * Synchronization of memory management in scheduler specific allocator instances has been rewritten to use lock-free synchronization. * Synchronization of memory management in scheduler specific pre-allocators has been rewritten to use lock-free synchronization. * The 'mseg_alloc' memory segment allocator now use scheduler specific instances instead of one instance. Apart from reducing contention this also ensures that memory allocators always create memory segments on the local NUMA node on a NUMA system.
Diffstat (limited to 'erts/emulator/beam/erl_sched_spec_pre_alloc.h')
-rw-r--r--erts/emulator/beam/erl_sched_spec_pre_alloc.h239
1 files changed, 239 insertions, 0 deletions
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
new file mode 100644
index 0000000000..d36066c399
--- /dev/null
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -0,0 +1,239 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Scheduler specific pre-allocators. Each scheduler
+ * thread allocates memory in its own private chunk of
+ * memory. Memory blocks deallocated by remote
+ * schedulers (or other threads) are passed back to
+ * the chunk owner via a lock-free data structure.
+ *
+ * Author: Rickard Green
+ */
+
+#ifndef ERTS_SCHED_SPEC_PRE_ALLOC_H__
+#define ERTS_SCHED_SPEC_PRE_ALLOC_H__
+
+#ifdef ERTS_SMP
+
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#define ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#include "erl_thr_progress.h"
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+
+#ifdef DEBUG
+#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P) \
+do { \
+ ASSERT((void *) (C) < (void *) (P)); \
+ ASSERT((void *) (P) \
+ < (void *) (((char *) (C)) + (A)->chunks_mem_size)); \
+} while (0)
+#else
+#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)
+#endif
+
+#ifdef DEBUG
+extern Uint erts_no_schedulers;
+#endif
+
+#define ERTS_SSPA_FORCE_THR_CHECK_PROGRESS 10
+#define ERTS_SSPA_MAX_GET_NEW_LOCAL 5
+
+typedef struct {
+ char *start;
+ char *end;
+ int chunks_mem_size;
+} erts_sspa_data_t;
+
+typedef union erts_sspa_blk_t_ erts_sspa_blk_t;
+union erts_sspa_blk_t_ {
+ erts_atomic_t next_atmc;
+ erts_sspa_blk_t *next_ptr;
+};
+
+typedef struct {
+ erts_sspa_blk_t *first;
+ erts_sspa_blk_t *last;
+ int cnt;
+ int lim;
+} erts_sspa_local_freelist_t;
+
+typedef struct {
+ erts_sspa_blk_t marker;
+ erts_atomic_t last;
+ erts_atomic_t um_refc[2];
+ erts_atomic32_t um_refc_ix;
+} erts_sspa_tail_t;
+
+typedef struct {
+ /*
+ * This structure needs to be cache line aligned for best
+ * performance.
+ */
+ union {
+ /* Modified by threads returning memory to this chunk */
+ erts_sspa_tail_t data;
+ char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_tail_t))];
+ } tail;
+ /*
+ * Everything below this point is *only* accessed by the
+ * thread owning this chunk.
+ */
+ struct {
+ int no_thr_progress_check;
+ int used_marker;
+ erts_sspa_blk_t *first;
+ erts_sspa_blk_t *unref_end;
+ struct {
+ ErtsThrPrgrVal thr_progress;
+ int thr_progress_reached;
+ int um_refc_ix;
+ erts_sspa_blk_t *unref_end;
+ } next;
+ } head;
+ erts_sspa_local_freelist_t local;
+} erts_sspa_chunk_header_t;
+
+typedef struct {
+ union {
+ erts_sspa_chunk_header_t header;
+ char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(
+ sizeof(erts_sspa_chunk_header_t))];
+ } aligned;
+ char data[1];
+} erts_sspa_chunk_t;
+
+#ifdef DEBUG
+ERTS_GLB_INLINE void
+check_local_list(erts_sspa_chunk_header_t *chdr);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE void
+check_local_list(erts_sspa_chunk_header_t *chdr)
+{
+ erts_sspa_blk_t *blk;
+ int n = 0;
+ for (blk = chdr->local.first; blk; blk = blk->next_ptr)
+ n++;
+ ASSERT(n == chdr->local.cnt);
+}
+#endif
+#define ERTS_SSPA_DBG_CHK_LCL(CHDR) check_local_list((CHDR))
+#else
+#define ERTS_SSPA_DBG_CHK_LCL(CHDR)
+#endif
+
+erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
+ int pa_size);
+void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
+ erts_sspa_blk_t *blk);
+erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
+ erts_sspa_blk_t *old_res);
+
+ERTS_GLB_INLINE erts_sspa_chunk_t *erts_sspa_cix2chunk(erts_sspa_data_t *data,
+ int cix);
+ERTS_GLB_INLINE int erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr);
+ERTS_GLB_INLINE char *erts_sspa_alloc(erts_sspa_data_t *data, int cix);
+ERTS_GLB_INLINE int erts_sspa_free(erts_sspa_data_t *data, int cix, char *blk);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE erts_sspa_chunk_t *
+erts_sspa_cix2chunk(erts_sspa_data_t *data, int cix)
+{
+ ASSERT(0 <= cix && cix < erts_no_schedulers);
+ return (erts_sspa_chunk_t *) (data->start + cix*data->chunks_mem_size);
+}
+
+ERTS_GLB_INLINE int
+erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr)
+{
+ int cix;
+ size_t diff;
+ if ((char *) ptr < data->start || data->end <= (char *) ptr)
+ return -1;
+ diff = ((char *) ptr) - data->start;
+ cix = (int) diff / data->chunks_mem_size;
+ ASSERT(0 <= cix && cix < erts_no_schedulers);
+ return cix;
+}
+
+ERTS_GLB_INLINE char *
+erts_sspa_alloc(erts_sspa_data_t *data, int cix)
+{
+ erts_sspa_chunk_t *chnk;
+ erts_sspa_chunk_header_t *chdr;
+ erts_sspa_blk_t *res;
+
+ chnk = erts_sspa_cix2chunk(data, cix);
+ chdr = &chnk->aligned.header;
+ res = chdr->local.first;
+ ERTS_SSPA_DBG_CHK_LCL(chdr);
+ if (res) {
+ ERTS_SSPA_DBG_CHK_LCL(chdr);
+ chdr->local.first = res->next_ptr;
+ chdr->local.cnt--;
+ if (!chdr->local.first)
+ chdr->local.last = NULL;
+ ERTS_SSPA_DBG_CHK_LCL(chdr);
+ }
+ if (chdr->local.cnt <= chdr->local.lim)
+ return (char *) erts_sspa_process_remote_frees(chdr, res);
+ else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
+ chdr->head.no_thr_progress_check++;
+ ASSERT(res);
+ return (char *) res;
+}
+
+ERTS_GLB_INLINE int
+erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk)
+{
+ erts_sspa_chunk_t *chnk;
+ erts_sspa_chunk_header_t *chdr;
+ erts_sspa_blk_t *blk = (erts_sspa_blk_t *) cblk;
+ int chnk_cix = erts_sspa_ptr2cix(data, blk);
+
+ if (chnk_cix < 0)
+ return 0;
+
+ chnk = erts_sspa_cix2chunk(data, chnk_cix);
+ chdr = &chnk->aligned.header;
+ if (chnk_cix != cix) {
+ /* Remote chunk */
+ erts_sspa_remote_free(chdr, blk);
+ }
+ else {
+ /* Local chunk */
+ ERTS_SSPA_DBG_CHK_LCL(chdr);
+ blk->next_ptr = chdr->local.first;
+ chdr->local.first = blk;
+ if (!chdr->local.last)
+ chdr->local.last = blk;
+ chdr->local.cnt++;
+ ERTS_SSPA_DBG_CHK_LCL(chdr);
+ }
+
+ return 1;
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+#endif /* ERTS_SMP */
+
+#endif /* ERTS_SCHED_SPEC_PRE_ALLOC_H__ */