From af8380c319c01cfbf6262c3356462823b9e88c1c Mon Sep 17 00:00:00 2001
From: Sverker Eriksson <sverker@erlang.org>
Date: Wed, 28 Jun 2017 20:00:09 +0200
Subject: erts: Add ERTS_THR_PREF_QUICK_ALLOC_IMPL

usable from any (managed?) thread.
---
 erts/emulator/beam/erl_alloc.h                | 80 ++++++++++++++++++++++++++-
 erts/emulator/beam/erl_sched_spec_pre_alloc.c | 30 +++++++---
 erts/emulator/beam/erl_sched_spec_pre_alloc.h | 13 ++++-
 3 files changed, 111 insertions(+), 12 deletions(-)

(limited to 'erts')

diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h
index c661d0b226..117f96a4ad 100644
--- a/erts/emulator/beam/erl_alloc.h
+++ b/erts/emulator/beam/erl_alloc.h
@@ -402,6 +402,32 @@ NAME##_free(TYPE *p)							\
 	erts_free(ALCT, (void *) p);					\
 }
 
+#define ERTS_THR_PREF_AUX(NAME, TYPE, PASZ)				\
+ERTS_THR_PREF_PRE_ALLOC_IMPL(NAME##_pre, TYPE, PASZ)
+
+#define ERTS_THR_PREF_QUICK_ALLOC_IMPL(NAME, TYPE, PASZ, ALCT)	        \
+ERTS_THR_PREF_AUX(NAME, TYPE, PASZ)					\
+static void								\
+init_##NAME##_alloc(int nthreads)					\
+{									\
+    init_##NAME##_pre_alloc(nthreads);			                \
+}									\
+static ERTS_INLINE TYPE *						\
+NAME##_alloc(void)							\
+{									\
+    TYPE *res = NAME##_pre_alloc();					\
+    if (!res)								\
+	res = erts_alloc(ALCT, sizeof(TYPE));				\
+    return res;								\
+}									\
+static ERTS_INLINE void							\
+NAME##_free(TYPE *p)							\
+{									\
+    if (!NAME##_pre_free(p))						\
+	erts_free(ALCT, (void *) p);					\
+}
+
+
 #ifdef DEBUG
 #define ERTS_PRE_ALLOC_SIZE(SZ) ((SZ) < 1000 ? (SZ)/10 + 10 : 100)
 #define ERTS_PRE_ALLOC_CLOBBER(P, T) memset((void *) (P), 0xfd, sizeof(T))
@@ -482,7 +508,8 @@ init_##NAME##_alloc(void)						\
 {									\
     sspa_data_##NAME##__ =						\
 	erts_sspa_create(sizeof(union erts_sspa_##NAME##__),		\
-			 ERTS_PRE_ALLOC_SIZE((PASZ)));			\
+			 ERTS_PRE_ALLOC_SIZE((PASZ)), 			\
+                         0, NULL);                                      \
 }									\
 									\
 static TYPE *								\
@@ -504,6 +531,57 @@ NAME##_free(TYPE *p)							\
 			  (char *) p);					\
 }
 
+
+#define ERTS_THR_PREF_PRE_ALLOC_IMPL(NAME, TYPE, PASZ)		        \
+union erts_sspa_##NAME##__ {						\
+    erts_sspa_blk_t next;						\
+    TYPE type;								\
+};									\
+									\
+static erts_sspa_data_t *sspa_data_##NAME##__;				\
+									\
+static void								\
+init_##NAME##_alloc(int nthreads)					\
+{									\
+    sspa_data_##NAME##__ =						\
+	erts_sspa_create(sizeof(union erts_sspa_##NAME##__),		\
+			 ERTS_PRE_ALLOC_SIZE((PASZ)),			\
+                         nthreads,                                      \
+                         #NAME);                                        \
+}									\
+                                                                        \
+void								        \
+erts_##NAME##_alloc_init_thread(void)				        \
+{									\
+    int id = erts_atomic_inc_read_nob(&sspa_data_##NAME##__->id_generator);\
+    if (id > sspa_data_##NAME##__->nthreads) {                          \
+        erts_exit(ERTS_ABORT_EXIT,                                      \
+                  "%s:%d:%s(): Too many threads for '" #NAME "'\n",     \
+                  __FILE__, __LINE__, __func__);                        \
+    }                                                                   \
+    erts_tsd_set(sspa_data_##NAME##__->tsd_key, (void*)(SWord)id);      \
+}									\
+									\
+static TYPE *								\
+NAME##_alloc(void)							\
+{									\
+    int id = (int)(SWord)erts_tsd_get(sspa_data_##NAME##__->tsd_key);   \
+    if (id == 0)                                                        \
+        return NULL;                                                    \
+    return (TYPE *) erts_sspa_alloc(sspa_data_##NAME##__,		\
+                                    id-1);		                \
+}									\
+									\
+static int								\
+NAME##_free(TYPE *p)							\
+{									\
+    int id = (int)(SWord)erts_tsd_get(sspa_data_##NAME##__->tsd_key);   \
+    return erts_sspa_free(sspa_data_##NAME##__,				\
+			  id - 1,		                        \
+			  (char *) p);					\
+}
+
+
 #ifdef DEBUG
 #define ERTS_ALC_DBG_BLK_SZ(PTR) (*(((UWord *) (PTR)) - 2))
 #endif /* #ifdef DEBUG */
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
index 6cb7ccab8d..ab204303d7 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.c
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
@@ -37,7 +37,7 @@
 #include "erl_thr_progress.h"
 
 erts_sspa_data_t *
-erts_sspa_create(size_t blk_sz, int pa_size)
+erts_sspa_create(size_t blk_sz, int pa_size, int nthreads, const char* name)
 {
     erts_sspa_data_t *data;
     size_t tot_size;
@@ -48,22 +48,30 @@ erts_sspa_create(size_t blk_sz, int pa_size)
     int no_blocks = pa_size;
     int no_blocks_per_chunk;
 
-    if (erts_no_schedulers == 1)
+    if (!name) { /* schedulers only variant */
+        ASSERT(!nthreads);
+        nthreads = erts_no_schedulers;
+    }
+    else {
+        ASSERT(nthreads > 0);
+    }
+
+    if (nthreads == 1)
 	no_blocks_per_chunk = no_blocks;
     else {
 	int extra = (no_blocks - 1)/4 + 1;
 	if (extra == 0)
 	    extra = 1;
 	no_blocks_per_chunk = no_blocks;
-	no_blocks_per_chunk += extra*erts_no_schedulers;
-	no_blocks_per_chunk /= erts_no_schedulers;
+	no_blocks_per_chunk += extra * nthreads;
+	no_blocks_per_chunk /= nthreads;
     }
-    no_blocks = no_blocks_per_chunk * erts_no_schedulers;
+    no_blocks = no_blocks_per_chunk * nthreads;
     chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_chunk_header_t));
     chunk_mem_size += blk_sz * no_blocks_per_chunk;
     chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);
     tot_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
-    tot_size += chunk_mem_size*erts_no_schedulers;
+    tot_size += chunk_mem_size * nthreads;
 
     p = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_PRE_ALLOC_DATA, tot_size);
     data = (erts_sspa_data_t *) p;
@@ -72,10 +80,16 @@ erts_sspa_create(size_t blk_sz, int pa_size)
 
     data->chunks_mem_size = chunk_mem_size;
     data->start = chunk_start;
-    data->end = chunk_start + chunk_mem_size*erts_no_schedulers;
+    data->end = chunk_start + chunk_mem_size * nthreads;
+    data->nthreads = nthreads;
+
+    if (name) { /* thread variant */
+        erts_tsd_key_create(&data->tsd_key, (char*)name);
+        erts_atomic_init_nob(&data->id_generator, 0);
+    }
 
     /* Initialize all chunks */
-    for (cix = 0; cix < erts_no_schedulers; cix++) {
+    for (cix = 0; cix < nthreads; cix++) {
 	erts_sspa_chunk_t *chnk = erts_sspa_cix2chunk(data, cix);
 	erts_sspa_chunk_header_t *chdr = &chnk->aligned.header;
 	erts_sspa_blk_t *blk;
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
index 1307e65962..d232db0e69 100644
--- a/erts/emulator/beam/erl_sched_spec_pre_alloc.h
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -59,6 +59,11 @@ typedef struct {
     char *start;
     char *end;
     int chunks_mem_size;
+    int nthreads;
+
+    /* Used only by thread variant: */
+    erts_tsd_key_t tsd_key;
+    erts_atomic_t id_generator;
 } erts_sspa_data_t;
 
 typedef union erts_sspa_blk_t_ erts_sspa_blk_t;
@@ -140,7 +145,9 @@ check_local_list(erts_sspa_chunk_header_t *chdr)
 #endif
 
 erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
-				   int pa_size);
+				   int pa_size,
+                                   int nthreads,
+                                   const char* name);
 void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
 			   erts_sspa_blk_t *blk,
 			   int cinit);
@@ -158,7 +165,7 @@ ERTS_GLB_INLINE int erts_sspa_free(erts_sspa_data_t *data, int cix, char *blk);
 ERTS_GLB_INLINE erts_sspa_chunk_t *
 erts_sspa_cix2chunk(erts_sspa_data_t *data, int cix)
 {
-    ASSERT(0 <= cix && cix < erts_no_schedulers);
+    ASSERT(0 <= cix && cix < data->nthreads);
     return (erts_sspa_chunk_t *) (data->start + cix*data->chunks_mem_size);
 }
 
@@ -171,7 +178,7 @@ erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr)
 	return -1;
     diff = ((char *) ptr) - data->start;
     cix = (int) diff / data->chunks_mem_size;
-    ASSERT(0 <= cix && cix < erts_no_schedulers);
+    ASSERT(0 <= cix && cix < data->nthreads);
     return cix;
 }
 
-- 
cgit v1.2.3