Optimize memory allocation

A number of memory allocation optimizations have been implemented. Most optimizations reduce contention caused by synchronization between threads during allocation and deallocation of memory. Most notably: * Synchronization of memory management in scheduler specific allocator instances has been rewritten to use lock-free synchronization. * Synchronization of memory management in scheduler specific pre-allocators has been rewritten to use lock-free synchronization. * The 'mseg_alloc' memory segment allocator now use scheduler specific instances instead of one instance. Apart from reducing contention this also ensures that memory allocators always create memory segments on the local NUMA node on a NUMA system.
author: Rickard Green <[email protected]> 2010-09-15 22:14:51 +0200
committer: Rickard Green <[email protected]> 2011-11-13 20:39:30 +0100
commit: a67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree: 07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator
parent: 55358c54778ead444e51f565d00175ba887ef182 (diff)
download: otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2
otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip
46 files changed, 5741 insertions, 1992 deletions
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 620402fbfb..6ccad081e5 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -725,7 +725,7 @@ RUN_OBJS = \
 	$(OBJDIR)/external.o		$(OBJDIR)/dist.o \
 	$(OBJDIR)/binary.o		$(OBJDIR)/erl_db.o \
 	$(OBJDIR)/erl_db_util.o		$(OBJDIR)/erl_db_hash.o \
-	$(OBJDIR)/erl_db_tree.o		$(OBJDIR)/fix_alloc.o \
+	$(OBJDIR)/erl_db_tree.o		$(OBJDIR)/erl_thr_progress.o \
 	$(OBJDIR)/big.o			$(OBJDIR)/hash.o \
 	$(OBJDIR)/index.o		$(OBJDIR)/atom.o \
 	$(OBJDIR)/module.o		$(OBJDIR)/export.o \
@@ -742,7 +742,8 @@ RUN_OBJS = \
 	$(OBJDIR)/erl_bif_re.o		$(OBJDIR)/erl_unicode.o \
 	$(OBJDIR)/packet_parser.o	$(OBJDIR)/safe_hash.o \
 	$(OBJDIR)/erl_zlib.o		$(OBJDIR)/erl_nif.o \
-	$(OBJDIR)/erl_bif_binary.o      $(OBJDIR)/erl_ao_firstfit_alloc.o
+	$(OBJDIR)/erl_bif_binary.o      $(OBJDIR)/erl_ao_firstfit_alloc.o \
+	$(OBJDIR)/erl_sched_spec_pre_alloc.o
 
 ifeq ($(TARGET),win32)
 DRV_OBJS = \
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index 68d64fb7b0..37738faae3 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -69,6 +69,8 @@ atom ac
 atom active
 atom all
 atom all_but_first
+atom alloc_info
+atom alloc_sizes
 atom allocated
 atom allocated_areas
 atom allocator
@@ -553,5 +555,6 @@ atom warning_msg
 atom wordsize
 atom write_concurrency
 atom xor
+atom x86
 atom yes
 atom yield
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index ba30fa85b8..987008c937 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -160,10 +160,6 @@ bif erlang:md5_update/2
 bif 'erl.util.crypt.md5':update/2	ebif_md5_update_2
 bif erlang:md5_final/1
 bif 'erl.util.crypt.md5':final/1	ebif_md5_final_1
-bif erlang:memory/0
-bif 'erl.lang':memory/0			ebif_memory_0
-bif erlang:memory/1
-bif 'erl.lang':memory/1			ebif_memory_1
 bif erlang:module_loaded/1
 bif 'erl.system.code':is_loaded/1	ebif_is_loaded_1 module_loaded_1
 bif erlang:function_exported/3
diff --git a/erts/emulator/beam/erl_afit_alloc.c b/erts/emulator/beam/erl_afit_alloc.c
index bcc7ea04ae..570cc59be2 100644
--- a/erts/emulator/beam/erl_afit_alloc.c
+++ b/erts/emulator/beam/erl_afit_alloc.c
@@ -65,16 +65,20 @@ erts_afalc_start(AFAllctr_t *afallctr,
 		 AFAllctrInit_t *afinit,
 		 AllctrInit_t *init)
 {
-    AFAllctr_t nulled_state = {{0}};
-    /* {{0}} is used instead of {0}, in order to avoid (an incorrect) gcc
-       warning. gcc warns if {0} is used as initializer of a struct when
-       the first member is a struct (not if, for example, the third member
-       is a struct). */
+    struct {
+	int dummy;
+	AFAllctr_t allctr;
+    } zero = {0};
+    /* The struct with a dummy element first is used in order to avoid (an
+       incorrect) gcc warning. gcc warns if {0} is used as initializer of
+       a struct when the first member is a struct (not if, for example,
+       the third member is a struct). */
+
     Allctr_t *allctr = (Allctr_t *) afallctr;
 
-    init->sbmbct = 0; /* Small mbc not supported by afit */
+    sys_memcpy((void *) afallctr, (void *) &zero.allctr, sizeof(AFAllctr_t));
 
-    sys_memcpy((void *) afallctr, (void *) &nulled_state, sizeof(AFAllctr_t));
+    init->sbmbct = 0; /* Small mbc not supported by afit */
 
     allctr->mbc_header_size		= sizeof(Carrier_t);
     allctr->min_mbc_size		= MIN_MBC_SZ;
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 9af80dd7a9..80951c9b50 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -40,6 +40,7 @@
 #include "erl_mseg.h"
 #include "erl_monitors.h"
 #include "erl_bif_timer.h"
+#include "erl_cpu_topology.h"
 #if defined(ERTS_ALC_T_DRV_SEL_D_STATE) || defined(ERTS_ALC_T_DRV_EV_D_STATE)
 #include "erl_check_io.h"
 #endif
@@ -54,7 +55,14 @@
 #include "erl_ao_firstfit_alloc.h"
 
 
-#define ERTS_ALC_DEFAULT_MAX_THR_PREF 16
+#if ERTS_MAX_NO_OF_SCHEDULERS > ERTS_AU_MAX_PREF_ALLOC_INSTANCES
+#  error "Too many schedulers; cannot create that many pref alloc instances"
+#endif
+
+#define ERTS_ALC_FIX_TYPE_IX(T) \
+  (ERTS_ALC_T2N((T)) - ERTS_ALC_N_MIN_A_FIXED_SIZE)
+
+#define ERTS_ALC_DEFAULT_MAX_THR_PREF ERTS_MAX_NO_OF_SCHEDULERS
 
 #if defined(SMALL_MEMORY) || defined(PURIFY) || defined(VALGRIND)
 #define AU_ALLOC_DEFAULT_ENABLE(X)	0
@@ -106,24 +114,43 @@ static ErtsAllocatorState_t eheap_alloc_state;
 static ErtsAllocatorState_t binary_alloc_state;
 static ErtsAllocatorState_t ets_alloc_state;
 static ErtsAllocatorState_t driver_alloc_state;
+static ErtsAllocatorState_t fix_alloc_state;
 
-ErtsAlcType_t erts_fix_core_allocator_ix;
-#ifdef ERTS_ALC_N_MIN_A_FIXED_SIZE
-static void *(*fix_core_allocator)(ErtsAlcType_t, void *, Uint);
-static void *fix_core_extra;
-static void *fix_core_alloc(Uint size)
+typedef struct {
+    erts_smp_atomic32_t refc;
+    int only_sz;
+    Uint req_sched;
+    Process *proc;
+    Eterm ref;
+    Eterm ref_heap[REF_THING_SIZE];
+    int allocs[ERTS_ALC_A_MAX-ERTS_ALC_A_MIN+1+2];
+} ErtsAllocInfoReq;
+
+#define ERTS_ALC_INFO_A_ALLOC_UTIL (ERTS_ALC_A_MAX + 1)
+#define ERTS_ALC_INFO_A_MSEG_ALLOC (ERTS_ALC_A_MAX + 2)
+#define ERTS_ALC_INFO_A_MAX ERTS_ALC_INFO_A_MSEG_ALLOC
+
+#if !HALFWORD_HEAP
+ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(aireq,
+				 ErtsAllocInfoReq,
+				 5,
+				 ERTS_ALC_T_AINFO_REQ)
+#else
+static ERTS_INLINE ErtsAllocInfoReq *
+aireq_alloc(void)
 {
-    void *res;
-    res = (*fix_core_allocator)(ERTS_ALC_T_UNDEF, fix_core_extra, size);
-    if (erts_mtrace_enabled)
-	erts_mtrace_crr_alloc(res,
-			      ERTS_ALC_A_FIXED_SIZE,
-			      erts_fix_core_allocator_ix,
-			      size);
-    return res;
+    return erts_alloc(ERTS_ALC_T_AINFO_REQ, sizeof(ErtsAllocInfoReq));
+}
+
+static ERTS_INLINE void
+aireq_free(ErtsAllocInfoReq *ptr)
+{
+    erts_free(ERTS_ALC_T_AINFO_REQ, ptr);
 }
 #endif
 
+ErtsAlcType_t erts_fix_core_allocator_ix;
+
 enum allctr_type {
     GOODFIT,
     BESTFIT,
@@ -181,6 +208,7 @@ typedef struct {
     struct au_init binary_alloc;
     struct au_init ets_alloc;
     struct au_init driver_alloc;
+    struct au_init fix_alloc;
 #if HALFWORD_HEAP
     struct au_init sbmbc_low_alloc;
     struct au_init std_low_alloc;
@@ -393,46 +421,52 @@ set_default_driver_alloc_opts(struct au_init *ip)
     ip->init.util.ts 		= ERTS_ALC_MTA_DRIVER;
 }
 
+static void
+set_default_fix_alloc_opts(struct au_init *ip,
+			   size_t *fix_type_sizes)
+{
+    SET_DEFAULT_ALLOC_OPTS(ip);
+    ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
+    ip->thr_spec		= 1;
+    ip->atype			= BESTFIT;
+    ip->init.bf.ao = 1;
+    ip->init.util.name_prefix	= "fix_";
+    ip->init.util.fix_type_size	= fix_type_sizes;
+    ip->init.util.alloc_no	= ERTS_ALC_A_FIXED_SIZE;
+#ifndef SMALL_MEMORY
+    ip->init.util.mmbcs 	= 128*1024; /* Main carrier size */
+#else
+    ip->init.util.mmbcs 	= 128*1024; /* Main carrier size */
+#endif
+    ip->init.util.ts 		= ERTS_ALC_MTA_FIXED_SIZE;
+}
+
 #ifdef ERTS_SMP
 
 static void
 adjust_tpref(struct au_init *ip, int no_sched)
 {
     if (ip->thr_spec) {
-	Uint allocs;
-	if (ip->thr_spec < 0) {/* User specified amount */
-	    allocs = abs(ip->thr_spec);
-	    if (allocs > no_sched)
-		allocs = no_sched;
-	}
-	else if (no_sched > ERTS_ALC_DEFAULT_MAX_THR_PREF)
-	    allocs = ERTS_ALC_DEFAULT_MAX_THR_PREF;
-	else 
-	    allocs = no_sched;
-	if (allocs <= 1)
-	    ip->thr_spec = 0;
-	else {
-	    ip->thr_spec = (int) allocs;
-	    ip->thr_spec *= -1; /* thread preferred */
-
-	    /* If default ... */
-
-	    /* ... shrink main multi-block carrier size */
-	    if (ip->default_.mmbcs)
-		ip->init.util.mmbcs /= ERTS_MIN(4, allocs);
-	    /* ... shrink largest multi-block carrier size */
-	    if (ip->default_.lmbcs)
-		ip->init.util.lmbcs /= ERTS_MIN(2, allocs);
-	    /* ... shrink smallest multi-block carrier size */
-	    if (ip->default_.smbcs)
-		ip->init.util.smbcs /= ERTS_MIN(4, allocs);
-	    /* ... and more than three allocators shrink
-	       max mseg multi-block carriers */
-	    if (ip->default_.mmmbc && allocs > 2) {
-		ip->init.util.mmmbc /= ERTS_MIN(4, allocs - 1);
-		if (ip->init.util.mmmbc < 3)
-		    ip->init.util.mmmbc = 3;
-	    }
+	ip->thr_spec = no_sched;
+	ip->thr_spec *= -1; /* thread preferred */
+
+	/* If default ... */
+
+	/* ... shrink main multi-block carrier size */
+	if (ip->default_.mmbcs)
+	    ip->init.util.mmbcs /= ERTS_MIN(4, no_sched);
+	/* ... shrink largest multi-block carrier size */
+	if (ip->default_.lmbcs)
+	    ip->init.util.lmbcs /= ERTS_MIN(2, no_sched);
+	/* ... shrink smallest multi-block carrier size */
+	if (ip->default_.smbcs)
+	    ip->init.util.smbcs /= ERTS_MIN(4, no_sched);
+	/* ... and more than three allocators shrink
+	   max mseg multi-block carriers */
+	if (ip->default_.mmmbc && no_sched > 2) {
+	    ip->init.util.mmmbc /= ERTS_MIN(4, no_sched - 1);
+	    if (ip->init.util.mmmbc < 3)
+		ip->init.util.mmmbc = 3;
 	}
     }
 }
@@ -442,7 +476,7 @@ adjust_tpref(struct au_init *ip, int no_sched)
 static void handle_args(int *, char **, erts_alc_hndl_args_init_t *);
 
 static void
-set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init);
+set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init, int ncpu);
 
 static void
 start_au_allocator(ErtsAlcType_t alctr_n,
@@ -456,8 +490,6 @@ refuse_af_strategy(struct au_init *init)
 	init->atype = GOODFIT;
 }
 
-static void init_thr_ix(int static_ixs);
-
 #ifdef HARD_DEBUG
 static void hdbg_init(void);
 #endif
@@ -466,7 +498,7 @@ void
 erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 {
     UWord extra_block_size = 0;
-    int i;
+    int i, ncpu;
     erts_alc_hndl_args_init_t init = {
 	0,
 #if HAVE_ERTS_MSEG
@@ -474,17 +506,34 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 #endif
 	ERTS_DEFAULT_TRIM_THRESHOLD,
 	ERTS_DEFAULT_TOP_PAD,
-	ERTS_DEFAULT_ALCU_INIT
+	ERTS_DEFAULT_ALCU_INIT,
     };
+    size_t fix_type_sizes[ERTS_ALC_NO_FIXED_SIZES] = {0};
 
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_PROC)]
+	= sizeof(Process);
+#if !HALFWORD_HEAP
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_MONITOR_SH)]
+	= ERTS_MONITOR_SH_SIZE;
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_NLINK_SH)]
+	= ERTS_LINK_SH_SIZE;
+#endif
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_DRV_EV_D_STATE)]
+	= sizeof(ErtsDrvEventDataState);
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_DRV_SEL_D_STATE)]
+	= sizeof(ErtsDrvSelectDataState);
+    fix_type_sizes[ERTS_ALC_FIX_TYPE_IX(ERTS_ALC_T_MSG_REF)]
+	= sizeof(ErlMessage);
 #ifdef HARD_DEBUG
     hdbg_init();
 #endif
 
     erts_have_sbmbc_alloc = 0;
+    ncpu = eaiop->ncpu;
+    if (ncpu < 1)
+	ncpu = 1;
 
     erts_sys_alloc_init();
-    init_thr_ix(erts_no_schedulers);
     erts_init_utils_mem();
 
     set_default_sbmbc_alloc_opts(&init.sbmbc_alloc);
@@ -496,20 +545,23 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
     set_default_binary_alloc_opts(&init.binary_alloc);
     set_default_ets_alloc_opts(&init.ets_alloc);
     set_default_driver_alloc_opts(&init.driver_alloc);
+    set_default_fix_alloc_opts(&init.fix_alloc,
+			       fix_type_sizes);
 
     if (argc && argv)
 	handle_args(argc, argv, &init);
 
-    if (erts_no_schedulers <= 1) {
-	init.sbmbc_alloc.thr_spec = 0;
-	init.sl_alloc.thr_spec = 0;
-	init.std_alloc.thr_spec = 0;
-	init.ll_alloc.thr_spec = 0;
-	init.eheap_alloc.thr_spec = 0;
-	init.binary_alloc.thr_spec = 0;
-	init.ets_alloc.thr_spec = 0;
-	init.driver_alloc.thr_spec = 0;
-    }
+#ifndef ERTS_SMP
+    init.sbmbc_alloc.thr_spec = 0;
+    init.sl_alloc.thr_spec = 0;
+    init.std_alloc.thr_spec = 0;
+    init.ll_alloc.thr_spec = 0;
+    init.eheap_alloc.thr_spec = 0;
+    init.binary_alloc.thr_spec = 0;
+    init.ets_alloc.thr_spec = 0;
+    init.driver_alloc.thr_spec = 0;
+    init.fix_alloc.thr_spec = 0;
+#endif
 
     if (init.erts_alloc_config) {
 	/* Adjust flags that erts_alloc_config won't like */
@@ -522,6 +574,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 	init.binary_alloc.thr_spec = 0;
 	init.ets_alloc.thr_spec = 0;
 	init.driver_alloc.thr_spec = 0;
+	init.fix_alloc.thr_spec = 0;	
     }
 
 #ifdef ERTS_SMP
@@ -538,6 +591,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
     adjust_tpref(&init.binary_alloc, erts_no_schedulers);
     adjust_tpref(&init.ets_alloc, erts_no_schedulers);
     adjust_tpref(&init.driver_alloc, erts_no_schedulers);
+    adjust_tpref(&init.fix_alloc, erts_no_schedulers);
 
 #else
     /* No thread specific if not smp */
@@ -556,6 +610,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
     refuse_af_strategy(&init.binary_alloc);
     refuse_af_strategy(&init.ets_alloc);
     refuse_af_strategy(&init.driver_alloc);
+    refuse_af_strategy(&init.fix_alloc);
 
 #ifdef ERTS_SMP 
     if (!init.temp_alloc.thr_spec)
@@ -564,6 +619,7 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 
     erts_mtrace_pre_init();
 #if HAVE_ERTS_MSEG
+    init.mseg.nos = erts_no_schedulers;
     erts_mseg_init(&init.mseg);
 #endif
     erts_alcu_init(&init.alloc_util);
@@ -583,20 +639,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 	erts_allctrs_info[i].extra	= NULL;
     }
 
-#ifdef ERTS_ALC_N_MIN_A_FIXED_SIZE
-#if !defined(PURIFY) && !defined(VALGRIND)
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].alloc		= erts_fix_alloc;
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].realloc		= erts_fix_realloc;
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].free		= erts_fix_free;
-    erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].enabled	= 1;
-#else
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].alloc		= erts_sys_alloc;
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].realloc		= erts_sys_realloc;
-    erts_allctrs[ERTS_ALC_A_FIXED_SIZE].free		= erts_sys_free;
-    erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].enabled	= 0;
-#endif
-#endif
-
     erts_allctrs[ERTS_ALC_A_SYSTEM].alloc		= erts_sys_alloc;
     erts_allctrs[ERTS_ALC_A_SYSTEM].realloc		= erts_sys_realloc;
     erts_allctrs[ERTS_ALC_A_SYSTEM].free		= erts_sys_free;
@@ -621,20 +663,21 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
     init.ll_low_alloc.init.util.force = 1;
     init.ll_low_alloc.init.util.low_mem = 1;
 
-    set_au_allocator(ERTS_ALC_A_SBMBC_LOW, &init.sbmbc_low_alloc);
-    set_au_allocator(ERTS_ALC_A_STANDARD_LOW, &init.std_low_alloc);
-    set_au_allocator(ERTS_ALC_A_LONG_LIVED_LOW, &init.ll_low_alloc);
+    set_au_allocator(ERTS_ALC_A_SBMBC_LOW, &init.sbmbc_low_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_STANDARD_LOW, &init.std_low_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_LONG_LIVED_LOW, &init.ll_low_alloc, ncpu);
 #endif /* HALFWORD */
 
-    set_au_allocator(ERTS_ALC_A_TEMPORARY, &init.temp_alloc);
-    set_au_allocator(ERTS_ALC_A_SBMBC, &init.sbmbc_alloc);
-    set_au_allocator(ERTS_ALC_A_SHORT_LIVED, &init.sl_alloc);
-    set_au_allocator(ERTS_ALC_A_STANDARD, &init.std_alloc);
-    set_au_allocator(ERTS_ALC_A_LONG_LIVED, &init.ll_alloc);
-    set_au_allocator(ERTS_ALC_A_EHEAP, &init.eheap_alloc);
-    set_au_allocator(ERTS_ALC_A_BINARY, &init.binary_alloc);
-    set_au_allocator(ERTS_ALC_A_ETS, &init.ets_alloc);
-    set_au_allocator(ERTS_ALC_A_DRIVER, &init.driver_alloc);
+    set_au_allocator(ERTS_ALC_A_TEMPORARY, &init.temp_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_SBMBC, &init.sbmbc_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_SHORT_LIVED, &init.sl_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_STANDARD, &init.std_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_LONG_LIVED, &init.ll_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_EHEAP, &init.eheap_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_BINARY, &init.binary_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_ETS, &init.ets_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_DRIVER, &init.driver_alloc, ncpu);
+    set_au_allocator(ERTS_ALC_A_FIXED_SIZE, &init.fix_alloc, ncpu);
 
     for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) {
 	if (!erts_allctrs[i].alloc)
@@ -650,10 +693,6 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 
     sys_alloc_opt(SYS_ALLOC_OPT_TRIM_THRESHOLD, init.trim_threshold);
     sys_alloc_opt(SYS_ALLOC_OPT_TOP_PAD, init.top_pad);
-    if (erts_allctrs_info[ERTS_FIX_CORE_ALLOCATOR].enabled)
-	erts_fix_core_allocator_ix = ERTS_FIX_CORE_ALLOCATOR;
-    else
-	erts_fix_core_allocator_ix = ERTS_ALC_A_SYSTEM;
 
     erts_mtrace_init(init.instr.mtrace, init.instr.nodename);
 
@@ -710,49 +749,40 @@ erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop)
 		       &init.driver_alloc,
 		       &driver_alloc_state);
 
-    fix_core_allocator	= erts_allctrs[erts_fix_core_allocator_ix].alloc;
-    fix_core_extra	= erts_allctrs[erts_fix_core_allocator_ix].extra;
+    start_au_allocator(ERTS_ALC_A_FIXED_SIZE,
+		       &init.fix_alloc,
+		       &fix_alloc_state);
 
     erts_mtrace_install_wrapper_functions();
     extra_block_size += erts_instr_init(init.instr.stat, init.instr.map);
 
+#if !HALFWORD_HEAP
+    init_aireq_alloc();
+#endif
+
 #ifdef DEBUG
     extra_block_size += install_debug_functions();
 #endif
 
-#ifdef ERTS_ALC_N_MIN_A_FIXED_SIZE
-
-    erts_init_fix_alloc(extra_block_size, fix_core_alloc);
-
+}
 
-#if !defined(PURIFY) && !defined(VALGRIND)
-    erts_set_fix_size(ERTS_ALC_T_PROC,		sizeof(Process));
-    erts_set_fix_size(ERTS_ALC_T_DB_TABLE,	sizeof(DbTable));
-    erts_set_fix_size(ERTS_ALC_T_ATOM,		sizeof(Atom));
+void
+erts_alloc_late_init(void)
+{
 
-    erts_set_fix_size(ERTS_ALC_T_MODULE,	sizeof(Module));
-    erts_set_fix_size(ERTS_ALC_T_REG_PROC,	sizeof(RegProc));
-    erts_set_fix_size(ERTS_ALC_T_FUN_ENTRY,	sizeof(ErlFunEntry));
-#ifdef ERTS_ALC_T_DRV_EV_D_STATE
-    erts_set_fix_size(ERTS_ALC_T_DRV_EV_D_STATE,
-		      sizeof(ErtsDrvEventDataState));
-#endif
-#ifdef ERTS_ALC_T_DRV_SEL_D_STATE
-    erts_set_fix_size(ERTS_ALC_T_DRV_SEL_D_STATE,
-		      sizeof(ErtsDrvSelectDataState));
-#endif
-#if !HALFWORD_HEAP
-    erts_set_fix_size(ERTS_ALC_T_EXPORT,	sizeof(Export));
-    erts_set_fix_size(ERTS_ALC_T_MONITOR_SH,	ERTS_MONITOR_SH_SIZE*sizeof(Uint));
-    erts_set_fix_size(ERTS_ALC_T_NLINK_SH,	ERTS_LINK_SH_SIZE*sizeof(Uint));
-#endif
-#endif
-#endif
+}
 
+static void *
+erts_realloc_fixed_size(ErtsAlcType_t type, void *extra, void *p, Uint size)
+{
+    erl_exit(ERTS_ABORT_EXIT,
+	     "Attempt to reallocate a block of the fixed size type %s\n",
+	     ERTS_ALC_T2TD(type));
 }
 
+
 static void
-set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
+set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init, int ncpu)
 {
     ErtsAllocatorFunctions_t *af = &erts_allctrs[alctr_n];
     ErtsAllocatorInfo_t *ai = &erts_allctrs_info[alctr_n];
@@ -764,6 +794,12 @@ set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
     if (init->init.util.force)
 	init->enable = 1;
 
+    tspec->enabled = 0;
+    tspec->dd = 0;
+    tspec->aix = alctr_n;
+    tspec->size	= 0;
+    ai->thr_spec = 0;
+
     if (!init->enable) {
 	af->alloc = erts_sys_alloc;
 	af->realloc = erts_sys_realloc;
@@ -775,14 +811,14 @@ set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
 	return;
     }
 
-    tspec->enabled = 0;
-    tspec->all_thr_safe = 0;
-    ai->thr_spec = 0;
 #ifdef USE_THREADS
+#ifdef ERTS_SMP
     if (init->thr_spec) {
 	if (init->thr_spec > 0) {
 	    af->alloc = erts_alcu_alloc_thr_spec;
-	    if (init->init.util.ramv)
+	    if (init->init.util.fix_type_size)
+		af->realloc = erts_realloc_fixed_size;
+	    else if (init->init.util.ramv)
 		af->realloc = erts_alcu_realloc_mv_thr_spec;
 	    else
 		af->realloc = erts_alcu_realloc_thr_spec;
@@ -790,12 +826,14 @@ set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
 	}
 	else {
 	    af->alloc = erts_alcu_alloc_thr_pref;
-	    if (init->init.util.ramv)
+	    if (init->init.util.fix_type_size)
+		af->realloc = erts_realloc_fixed_size;
+	    else if (init->init.util.ramv)
 		af->realloc = erts_alcu_realloc_mv_thr_pref;
 	    else
 		af->realloc = erts_alcu_realloc_thr_pref;
 	    af->free = erts_alcu_free_thr_pref;
-	    tspec->all_thr_safe = 1;
+	    tspec->dd = 1;
 	}
 
 	tspec->enabled	= 1;
@@ -803,9 +841,13 @@ set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
 
 	ai->thr_spec	= tspec->size;
     }
-    else if (init->init.util.ts) {
+    else
+#endif
+	if (init->init.util.ts) {
 	af->alloc = erts_alcu_alloc_ts;
-	if (init->init.util.ramv)
+	if (init->init.util.fix_type_size)
+	    af->realloc = erts_realloc_fixed_size;
+	else if (init->init.util.ramv)
 	    af->realloc = erts_alcu_realloc_mv_ts;
 	else
 	    af->realloc = erts_alcu_realloc_ts;
@@ -815,7 +857,9 @@ set_au_allocator(ErtsAlcType_t alctr_n, struct au_init *init)
 #endif
     {
 	af->alloc = erts_alcu_alloc;
-	if (init->init.util.ramv)
+	if (init->init.util.fix_type_size)
+	    af->realloc = erts_realloc_fixed_size;
+	else if (init->init.util.ramv)
 	    af->realloc = erts_alcu_realloc_mv;
 	else
 	    af->realloc = erts_alcu_realloc;
@@ -838,12 +882,14 @@ start_au_allocator(ErtsAlcType_t alctr_n,
     ErtsAllocatorFunctions_t *af = &erts_allctrs[alctr_n];
     ErtsAllocatorInfo_t *ai = &erts_allctrs_info[alctr_n];
     ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[alctr_n];
+    ErtsAlcFixList_t *fix_lists = NULL;
+    size_t fix_list_size = 0;
 
     if (!init->enable)
 	return;
 
     if (init->thr_spec) {
-	void *states = erts_sys_alloc(0,
+	char *states = erts_sys_alloc(0,
 				      NULL,
 				      ((sizeof(Allctr_t *)
 					* (tspec->size + 1))
@@ -855,18 +901,40 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 		     "Failed to allocate allocator states for %salloc\n",
 		     init->init.util.name_prefix);
 	tspec->allctr = (Allctr_t **) states;
-	states = ((char *) states) + sizeof(Allctr_t *) * (tspec->size + 1);
+	states += sizeof(Allctr_t *) * (tspec->size + 1);
 	states = ((((UWord) states) & ERTS_CACHE_LINE_MASK)
-		  ? (void *) ((((UWord) states) & ~ERTS_CACHE_LINE_MASK)
+		  ? (char *) ((((UWord) states) & ~ERTS_CACHE_LINE_MASK)
 			      + ERTS_CACHE_LINE_SIZE)
-		  : (void *) states);
-	tspec->allctr[0] = init->thr_spec > 0 ? (Allctr_t *) state : (Allctr_t *) NULL;
+		  : (char *) states);
+	tspec->allctr[0] = (Allctr_t *) state;
 	size = tspec->size;
 	for (i = 1; i < size; i++)
 	    tspec->allctr[i] = (Allctr_t *)
 		&((ErtsAllocatorState_t *) states)[i-1];
     }
 
+    if (init->init.util.fix_type_size) {
+	size_t tot_fix_list_size;
+	fix_list_size = sizeof(ErtsAlcFixList_t)*ERTS_ALC_NO_FIXED_SIZES;
+	fix_list_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(fix_list_size);
+	tot_fix_list_size = fix_list_size;
+	if (init->thr_spec)
+	    tot_fix_list_size *= tspec->size;
+	fix_lists = erts_sys_alloc(0,
+				   NULL,
+				   (tot_fix_list_size
+				    + ERTS_CACHE_LINE_SIZE - 1));
+	if (!fix_lists)
+	    erl_exit(ERTS_ABORT_EXIT,
+		     "Failed to allocate fix lists for %salloc\n",
+		     init->init.util.name_prefix);
+
+	if (((UWord) fix_lists) & ERTS_CACHE_LINE_MASK)
+		fix_lists = ((ErtsAlcFixList_t *)
+		       ((((UWord) fix_lists) & ~ERTS_CACHE_LINE_MASK)
+			+ ERTS_CACHE_LINE_SIZE));
+    }
+
     for (i = 0; i < size; i++) {
 	void *as;
 	atype = init->atype;
@@ -877,25 +945,32 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 	    as0 = (void *) tspec->allctr[i];
 	    if (!as0)
 		continue;
-	    if (i == 0) {
-		if (atype == AFIT)
-		    atype = GOODFIT;
-		init->init.util.ts = 1;
+	    if (init->thr_spec < 0) {
+		init->init.util.ts = i == 0;
+		init->init.util.tspec = 0;
+		init->init.util.tpref = -1*init->thr_spec + 1;
 	    }
 	    else {
-		if (init->thr_spec < 0) {
+		if (i != 0)
+		    init->init.util.ts = 0;
+		else {
+		    if (atype == AFIT)
+			atype = GOODFIT;
 		    init->init.util.ts = 1;
-		    init->init.util.tspec = 0;
-		    init->init.util.tpref = -1*init->thr_spec;
 		}
-		else {
-		    init->init.util.ts = 0;
-		    init->init.util.tspec = init->thr_spec + 1;
-		    init->init.util.tpref = 0;
-		}   
-	    }
+		init->init.util.tspec = init->thr_spec + 1;
+		init->init.util.tpref = 0;
+	    }   
+	}
+
+	if (fix_lists) {
+	    init->init.util.fix = fix_lists;
+	    fix_lists = ((ErtsAlcFixList_t *)
+			 (((char *) fix_lists) + fix_list_size));
 	}
 
+	init->init.util.ix = i;
+
 	switch (atype) {
 	case GOODFIT:
 	    as = (void *) erts_gfalc_start((GFAllctr_t *) as0,
@@ -931,11 +1006,8 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 	af->extra = as;
     }
 
-    if (init->thr_spec) {
+    if (init->thr_spec)
 	af->extra = tspec;
-	init->init.util.ts = 1;
-    }
-
     ai->extra = af->extra;
 }
 
@@ -1055,34 +1127,6 @@ get_amount_value(char *param_end, char** argv, int* ip)
     return (Uint) tmp;
 }
 
-static int
-get_bool_or_possitive_amount_value(int *bool, Uint *amount,
-				   char *param_end, char** argv, int* ip)
-{
-    char *param = argv[*ip]+1;
-    char *value = get_value(param_end, argv, ip);
-    if (strcmp(value, "true") == 0) {
-	*bool = 1; 
-	return 1;
-    }
-    else if (strcmp(value, "false") == 0) {
-	*bool = 0; 
-	return 1;
-    }
-    else {
-	Sint tmp;
-	char *rest;
-	errno = 0;
-	tmp = (Sint) strtol(value, &rest, 10);
-	if (errno != 0 || rest == value || tmp <= 0) {
-	    bad_value(param, param_end, value);
-	    return -1;
-	}
-	*amount = (Uint) tmp;
-	return 0;
-    }
-}
-
 static void
 handle_au_arg(struct au_init *auip,
 	      char* sub_param,
@@ -1197,25 +1241,16 @@ handle_au_arg(struct au_init *auip,
 	    goto bad_switch;
 	break;
     case 't': {
-	Uint no;
-	int enable;
-	int res = get_bool_or_possitive_amount_value(&enable,
-						     &no,
-						     sub_param+1,
-						     argv,
-						     ip);
-	if (res > 0)
-	    auip->thr_spec = enable ? 1 : 0;
+	int res = get_bool_value(sub_param+1, argv, ip);
+	if (res > 0) {
+	    auip->thr_spec = 1;
+	    break;
+	}
 	else if (res == 0) {
-	    int allocs = (int) no;
-	    if (allocs < 0)
-		allocs = INT_MIN;
-	    else {
-		allocs *= -1;
-	    }
-	    auip->thr_spec = allocs;
+	    auip->thr_spec = 0;
+	    break;
 	}
-	break;
+	goto bad_switch;
     }
     default:
     bad_switch:
@@ -1234,6 +1269,7 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init)
 	&init->eheap_alloc,
 	&init->ll_alloc,
 	&init->driver_alloc,
+	&init->fix_alloc,
 	&init->sl_alloc,
 	&init->temp_alloc
     };
@@ -1264,14 +1300,8 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init)
 		case 'E':
 		    handle_au_arg(&init->ets_alloc, &argv[i][3], argv, &i);
 		    break;
-		case 'F': /* fix_alloc */
-		    if (has_prefix("e", param+2)) {
-			arg = get_value(param+3, argv, &i);
-			if (strcmp("true", arg) != 0)
-			    bad_value(param, param+3, arg);
-		    }
-		    else
-			bad_param(param, param+2);
+		case 'F':
+		    handle_au_arg(&init->fix_alloc, &argv[i][3], argv, &i);
 		    break;
 		case 'H':
 		    handle_au_arg(&init->eheap_alloc, &argv[i][3], argv, &i);
@@ -1298,12 +1328,6 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init)
 #endif
 			    get_amount_value(argv[i]+6, argv, &i);
 		    }
-		    else if (has_prefix("cci", argv[i]+3)) {
-#if HAVE_ERTS_MSEG
-			init->mseg.cci =
-#endif
-			    get_amount_value(argv[i]+6, argv, &i);
-		    }
 		    else {
 			bad_param(param, param+2);
 		    }
@@ -1389,6 +1413,7 @@ handle_args(int *argc, char **argv, erts_alc_hndl_args_init_t *init)
 			    set_default_binary_alloc_opts(&init->binary_alloc);
 			    set_default_ets_alloc_opts(&init->ets_alloc);
 			    set_default_driver_alloc_opts(&init->driver_alloc);
+			    set_default_driver_alloc_opts(&init->fix_alloc);
 
 			    init->driver_alloc.enable = 0;
 			    if (strcmp("r9c", arg) == 0) {
@@ -1523,43 +1548,74 @@ static char *type_no_str(ErtsAlcType_t n)
 
 #define type_str(T) type_no_str(ERTS_ALC_T2N((T)))
 
-erts_tsd_key_t thr_ix_key;
-erts_spinlock_t alloc_thr_ix_lock;
-int last_thr_ix;
-int first_dyn_thr_ix;
-
-static void
-init_thr_ix(int static_ixs)
+void
+erts_alloc_register_scheduler(void *vesdp)
 {
-    erts_tsd_key_create(&thr_ix_key);
-    erts_spinlock_init(&alloc_thr_ix_lock, "alloc_thr_ix_lock");
-    last_thr_ix = -4711;
-    first_dyn_thr_ix = static_ixs+1;
+    ErtsSchedulerData *esdp = (ErtsSchedulerData *) vesdp;
+    int ix = (int) esdp->no;
+    int aix;
+
+    for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) {
+	ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix];
+	esdp->alloc_data.deallctr[aix] = NULL;
+	esdp->alloc_data.pref_ix[aix] = -1;
+	if (tspec->enabled) {
+	    if (!tspec->dd)
+		esdp->alloc_data.pref_ix[aix] = ix;
+	    else {
+		Allctr_t *allctr = tspec->allctr[ix];
+		ASSERT(allctr);
+		esdp->alloc_data.deallctr[aix] = allctr;
+		esdp->alloc_data.pref_ix[aix] = ix;
+	    }
+	}
+    }
 }
 
-int
-erts_alc_get_thr_ix(void)
+void
+erts_alloc_scheduler_handle_delayed_dealloc(void *vesdp,
+					    int *need_thr_progress,
+					    int *more_work)
 {
-    int ix = (int)(long) erts_tsd_get(thr_ix_key);
-    if (ix == 0) {
-	erts_spin_lock(&alloc_thr_ix_lock);
-	last_thr_ix++;
-	if (last_thr_ix < 0) 
-	    last_thr_ix = first_dyn_thr_ix;
-	ix = last_thr_ix;
-	erts_spin_unlock(&alloc_thr_ix_lock);
-	erts_tsd_set(thr_ix_key, (void *)(long) ix);
+    ErtsSchedulerData *esdp = (ErtsSchedulerData *) vesdp;
+    int aix;
+    for (aix = ERTS_ALC_A_MIN; aix <= ERTS_ALC_A_MAX; aix++) {
+	Allctr_t *allctr;
+	if (esdp)
+	    allctr = esdp->alloc_data.deallctr[aix];
+	else {
+	    ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[aix];
+	    if (tspec->enabled && tspec->dd)
+		allctr = tspec->allctr[0];
+	    else
+		allctr = NULL;
+	}
+	if (allctr) {
+	    erts_alcu_check_delayed_dealloc(allctr,
+					    1,
+					    need_thr_progress,
+					    more_work);
+	}
     }
-    ASSERT(ix > 0);
-    return ix;
 }
 
-void erts_alloc_reg_scheduler_id(Uint id)
+erts_aint32_t
+erts_alloc_fix_alloc_shrink(int ix, erts_aint32_t flgs)
 {
-    int ix = (int) id;
-    ASSERT(0 < ix && ix <= first_dyn_thr_ix);
-    ASSERT(0 == (int) (long) erts_tsd_get(thr_ix_key));
-    erts_tsd_set(thr_ix_key, (void *)(long) ix);
+#ifdef ERTS_SMP
+    ErtsAllocatorThrSpec_t *tspec;
+    tspec = &erts_allctr_thr_spec[ERTS_ALC_A_FIXED_SIZE];
+    if (erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].thr_spec && tspec->enabled)
+	return erts_alcu_fix_alloc_shrink(tspec->allctr[ix], flgs);
+    if (ix == 0 && erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].extra)
+	return erts_alcu_fix_alloc_shrink(
+	    erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].extra, flgs);
+#else
+    if (ix == 1 && erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].extra)
+	return erts_alcu_fix_alloc_shrink(
+	    erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].extra, flgs);
+#endif
+    return 0;
 }
 
 static void
@@ -1574,14 +1630,12 @@ erts_alloc_get_verify_unused_temp_alloc(Allctr_t **allctr)
     if (erts_allctrs_info[ERTS_ALC_A_TEMPORARY].alloc_util
 	&& erts_allctrs_info[ERTS_ALC_A_TEMPORARY].thr_spec) {
 	ErtsAllocatorThrSpec_t *tspec;
+	int ix = ERTS_ALC_GET_THR_IX();
 	tspec = &erts_allctr_thr_spec[ERTS_ALC_A_TEMPORARY];
-	if (!tspec->all_thr_safe) {
-	    int ix = erts_alc_get_thr_ix();
 
-	    if (ix < tspec->size) {
-		*allctr = tspec->allctr[ix];
-		return erts_alcu_verify_unused;
-	    }
+	if (ix < tspec->size) {
+	    *allctr = tspec->allctr[ix];
+	    return erts_alcu_verify_unused;
 	}
     }
 
@@ -1680,7 +1734,7 @@ erts_realloc_n_enomem(ErtsAlcType_t n, void *ptr, Uint size)
 }
 
 static ERTS_INLINE UWord
-alcu_size(ErtsAlcType_t ai)
+alcu_size(ErtsAlcType_t ai, ErtsAlcUFixInfo_t *fi, int fisz)
 {
     UWord res = 0;
 
@@ -1690,22 +1744,20 @@ alcu_size(ErtsAlcType_t ai)
     if (!erts_allctrs_info[ai].thr_spec) {
 	Allctr_t *allctr = erts_allctrs_info[ai].extra;
 	AllctrSize_t asize;
-	erts_alcu_current_size(allctr, &asize);
+	erts_alcu_current_size(allctr, &asize, fi, fisz);
 	res += asize.blocks;
     }
     else {
 	ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[ai];
 	int i;
 
-	ASSERT(tspec->all_thr_safe);
-
 	ASSERT(tspec->enabled);
 
 	for (i = tspec->size - 1; i >= 0; i--) {
 	    Allctr_t *allctr = tspec->allctr[i];
 	    AllctrSize_t asize;
 	    if (allctr) {
-		erts_alcu_current_size(allctr, &asize);
+		erts_alcu_current_size(allctr, &asize, fi, fisz);
 		res += asize.blocks;
 	    }
 	}
@@ -1733,7 +1785,6 @@ alcu_is_low(ErtsAlcType_t ai)
 	int found_one = 0;
 # endif
 
-	ASSERT(tspec->all_thr_safe);
 	ASSERT(tspec->enabled);
 
 	for (i = tspec->size - 1; i >= 0; i--) {
@@ -1757,11 +1808,24 @@ alcu_is_low(ErtsAlcType_t ai)
 }
 #endif /* HALFWORD */
 
+static ERTS_INLINE void
+add_fix_values(UWord *ap, UWord *up, ErtsAlcUFixInfo_t *fi, ErtsAlcType_t type)
+{
+    int ix = ERTS_ALC_T2N(type) - ERTS_ALC_N_MIN_A_FIXED_SIZE;
+    ASSERT(0 <= ix && ix < ERTS_ALC_NO_FIXED_SIZES);
+
+    *ap += (UWord) fi[ix].allocated;
+    *up += (UWord) fi[ix].used;
+}
+
 Eterm
 erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
 {
+/*
+ * NOTE! When updating this function, make sure to also update
+ *       erlang:memory/[0,1] in $ERL_TOP/erts/preloaded/src/erlang.erl
+ */
 #define ERTS_MEM_NEED_ALL_ALCU (!erts_instr_stat && want_tot_or_sys)
-    ErtsFixInfo efi;
     struct {
 	int total;
 	int processes;
@@ -1800,6 +1864,11 @@ erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
     Eterm res = THE_NON_VALUE;
     ErtsAlcType_t ai;
     int only_one_value = 0;
+    ErtsAlcUFixInfo_t fi[ERTS_ALC_NO_FIXED_SIZES] = {{0,0}};
+
+    ERTS_SMP_LC_ASSERT(erts_smp_is_system_blocked(0)
+		       || (ERTS_IS_CRASH_DUMPING
+			   && erts_smp_is_system_blocked(ERTS_BS_FLG_ALLOW_GC)));
 
     /* Figure out whats wanted... */
 
@@ -1969,7 +2038,6 @@ erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
     for (ai = ERTS_ALC_A_MIN; ai <= ERTS_ALC_A_MAX; ai++) {
 	switch (ai) {
 	case ERTS_ALC_A_SYSTEM:
-	case ERTS_ALC_A_FIXED_SIZE:
 	case ERTS_ALC_A_SBMBC:
 #if HALFWORD_HEAP
 	case ERTS_ALC_A_SBMBC_LOW:
@@ -2029,11 +2097,15 @@ erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
 		case ERTS_ALC_A_BINARY:
 		    save = &size.binary;
 		    break;
+		case ERTS_ALC_A_FIXED_SIZE:
+		    asz = alcu_size(ai, fi, ERTS_ALC_NO_FIXED_SIZES);
+		    size.total += asz;
+		    continue;
 		default:
 		    save = NULL;
 		    break;
 		}
-		asz = alcu_size(ai);
+		asz = alcu_size(ai, NULL, 0);
 		if (save)
 		    *save = asz;
 		size.total += asz;
@@ -2053,8 +2125,11 @@ erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
 
 	if (ERTS_MEM_NEED_ALL_ALCU)
 	    tmp = size.processes;
-	else
-	    tmp = alcu_size(ERTS_ALC_A_EHEAP);
+	else {
+	    alcu_size(ERTS_ALC_A_FIXED_SIZE,
+		      fi, ERTS_ALC_NO_FIXED_SIZES);
+	    tmp = alcu_size(ERTS_ALC_A_EHEAP, NULL, 0);
+	}
 	tmp += erts_max_processes*sizeof(Process*);
 #ifdef HYBRID
 	tmp += erts_max_processes*sizeof(Process*);
@@ -2064,69 +2139,54 @@ erts_memory(int *print_to_p, void *print_to_arg, void *proc, Eterm earg)
 
 	size.processes = size.processes_used = tmp;
 
-#if HALFWORD_HEAP
-	/* BUG: We ignore link and monitor memory */
-#else
-	erts_fix_info(ERTS_ALC_T_NLINK_SH, &efi);
-	size.processes += efi.total;
-	size.processes_used += efi.used;
+	add_fix_values(&size.processes,
+		       &size.processes_used,
+		       fi,
+		       ERTS_ALC_T_PROC);
+#if !HALFWORD_HEAP
+	add_fix_values(&size.processes,
+		       &size.processes_used,
+		       fi,
+		       ERTS_ALC_T_MONITOR_SH);
 
-	erts_fix_info(ERTS_ALC_T_MONITOR_SH, &efi);
-	size.processes += efi.total;
-	size.processes_used += efi.used;
+	add_fix_values(&size.processes,
+		       &size.processes_used,
+		       fi,
+		       ERTS_ALC_T_NLINK_SH);
 #endif
-
-	erts_fix_info(ERTS_ALC_T_PROC, &efi);
-	size.processes += efi.total;
-	size.processes_used += efi.used;
-
-	erts_fix_info(ERTS_ALC_T_REG_PROC, &efi);
-	size.processes += efi.total;
-	size.processes_used += efi.used;
-
+	add_fix_values(&size.processes,
+		       &size.processes_used,
+		       fi,
+		       ERTS_ALC_T_MSG_REF);
     }
 
     if (want.atom || want.atom_used) {
 	Uint reserved_atom_space, atom_space;
 	erts_atom_get_text_space_sizes(&reserved_atom_space, &atom_space);
 	size.atom = size.atom_used = atom_table_sz();
-	erts_fix_info(ERTS_ALC_T_ATOM, &efi);
 
-	if (want.atom) {
+	if (want.atom)
 	    size.atom += reserved_atom_space;
-	    size.atom += efi.total;
-	}
 
-	if (want.atom_used) {
+	if (want.atom_used)
 	    size.atom_used += atom_space;
-	    size.atom_used += efi.used;
-	}
     }
 
     if (!ERTS_MEM_NEED_ALL_ALCU && want.binary)
-	size.binary = alcu_size(ERTS_ALC_A_BINARY);
+	size.binary = alcu_size(ERTS_ALC_A_BINARY, NULL, 0);
 
     if (want.code) {
 	size.code = module_table_sz();
-	erts_fix_info(ERTS_ALC_T_MODULE, &efi);
-	size.code += efi.used;
 	size.code += export_table_sz();
-#if HALFWORD_HEAP
 	size.code += export_list_size() * sizeof(Export);
-#else
-	erts_fix_info(ERTS_ALC_T_EXPORT, &efi);
-	size.code += efi.used;
-#endif
 	size.code += erts_fun_table_sz();
-	erts_fix_info(ERTS_ALC_T_FUN_ENTRY, &efi);
-	size.code += efi.used;
 	size.code += allocated_modules*sizeof(Range);
 	size.code += erts_total_code_size;
     }
 
     if (want.ets) {
 	if (!ERTS_MEM_NEED_ALL_ALCU)
-	    size.ets = alcu_size(ERTS_ALC_A_ETS);
+	    size.ets = alcu_size(ERTS_ALC_A_ETS, NULL, 0);
 	size.ets += erts_get_ets_misc_mem_size();
     }
 
@@ -2199,13 +2259,10 @@ struct aa_values {
 Eterm
 erts_allocated_areas(int *print_to_p, void *print_to_arg, void *proc)
 {
-#define MAX_AA_VALUES \
-  (20 + (ERTS_ALC_N_MAX_A_FIXED_SIZE - ERTS_ALC_N_MIN_A_FIXED_SIZE + 1))
-
+#define MAX_AA_VALUES (23)
     struct aa_values values[MAX_AA_VALUES];
     Eterm res = THE_NON_VALUE;
     int i, length;
-    ErtsFixInfo efi;
     Uint reserved_atom_space, atom_space;
 
     if (proc) {
@@ -2270,6 +2327,11 @@ erts_allocated_areas(int *print_to_p, void *print_to_arg, void *proc)
     i++;
 
     values[i].arity = 2;
+    values[i].name = "export_list";
+    values[i].ui[0] = export_list_size() * sizeof(Export);
+    i++;
+
+    values[i].arity = 2;
     values[i].name = "register_table";
     values[i].ui[0] = process_reg_sz();
     i++;
@@ -2314,22 +2376,15 @@ erts_allocated_areas(int *print_to_p, void *print_to_arg, void *proc)
     values[i].ui[0] = erts_tot_link_lh_size();
     i++;
 
-    {
-	Uint n;
-
-	for (n = ERTS_ALC_N_MIN_A_FIXED_SIZE;
-	     n <= ERTS_ALC_N_MAX_A_FIXED_SIZE;
-	     n++) {
-	    erts_fix_info(ERTS_ALC_N2T(n), &efi);
-
-	    values[i].arity = 3;
-	    values[i].name = ERTS_ALC_N2TD(n);
-	    values[i].ui[0] = efi.total;
-	    values[i].ui[1] = efi.used;
-	    i++;
-	}    
+    values[i].arity = 2;
+    values[i].name = "process_table";
+    values[i].ui[0] = erts_max_processes*sizeof(Process*);
+    i++;
 
-    }
+    values[i].arity = 2;
+    values[i].name = "ets_misc";
+    values[i].ui[0] = erts_get_ets_misc_mem_size();
+    i++;
 
     length = i;
     ASSERT(length <= MAX_AA_VALUES);
@@ -2423,17 +2478,16 @@ erts_alloc_util_allocators(void *proc)
     Uint sz;
     int i;
     /*
-     * Currently all allocators except sys_alloc and fix_alloc are
+     * Currently all allocators except sys_alloc are
      * alloc_util allocators.
      */
-    sz = ((ERTS_ALC_A_MAX + 1 - ERTS_ALC_A_MIN) - 2)*2;
+    sz = ((ERTS_ALC_A_MAX + 1 - ERTS_ALC_A_MIN) - 1)*2;
     ASSERT(sz > 0);
     hp = HAlloc((Process *) proc, sz);
     res = NIL;
     for (i = ERTS_ALC_A_MAX; i >= ERTS_ALC_A_MIN; i--) {
 	switch (i) {
 	case ERTS_ALC_A_SYSTEM:
-	case ERTS_ALC_A_FIXED_SIZE:
 	    break;
 	default: {
 	    char *alc_str = (char *) ERTS_ALC_A2AD(i);
@@ -2447,259 +2501,6 @@ erts_alloc_util_allocators(void *proc)
     return res;
 }
 
-Eterm
-erts_allocator_info_term(void *proc, Eterm which_alloc, int only_sz)
-{
-#define ERTS_AIT_RET(R) \
-  do { res = (R); goto done; } while (0)
-#define ERTS_AIT_HALLOC(P, S) \
-  do { hp = HAlloc((P), (S)); hp_end = hp + (S); } while (0)
-
-    ErtsAlcType_t i;
-    Uint sz = 0;
-    Uint *hp = NULL;
-    Uint *hp_end = NULL;
-    Eterm res = am_undefined;
-
-    if (is_not_atom(which_alloc))
-	goto done;
-
-    for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) {
-	if (erts_is_atom_str((char *) ERTS_ALC_A2AD(i), which_alloc)) {
-	    if (!erts_allctrs_info[i].enabled)
-		ERTS_AIT_RET(am_false);
-	    else {
-		if (erts_allctrs_info[i].alloc_util) {
-		    Eterm ires, tmp;
-		    Eterm **hpp;
-		    Uint *szp;
-		    Eterm (*info_func)(Allctr_t *,
-				       int,
-				       int *,
-				       void *,
-				       Uint **,
-				       Uint *);
-
-		    info_func = (only_sz
-				 ? erts_alcu_sz_info
-				 : erts_alcu_info);
-
-		    if (erts_allctrs_info[i].thr_spec) {
-			ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[i];
-			int j;
-			int block_system = !tspec->all_thr_safe;
-
-			if (block_system) {
-			    erts_smp_proc_unlock(proc, ERTS_PROC_LOCK_MAIN);
-			    erts_smp_block_system(0);
-			}
-			ASSERT(tspec->enabled);
-
-			szp = &sz;
-			hpp = NULL;
-
-			while (1) {
-			    ires = NIL;
-			    for (j = tspec->size - 1; j >= 0; j--) {
-				Allctr_t *allctr = tspec->allctr[j];
-				if (allctr) {
-				    tmp = erts_bld_tuple(hpp,
-							 szp,
-							 3,
-							 erts_bld_atom(hpp,
-								       szp,
-								       "instance"),
-							 make_small((Uint) j),
-							 (*info_func)(allctr,
-								      hpp != NULL,
-								      NULL,
-								      NULL,
-								      hpp,
-								      szp));
-				    ires = erts_bld_cons(hpp, szp, tmp, ires);
-				}
-			    }
-			    if (hpp)
-				break;
-			    ERTS_AIT_HALLOC((Process *) proc, sz);
-			    hpp = &hp;
-			    szp = NULL;
-			}
-
-			if (block_system) {
-			    erts_smp_release_system();
-			    erts_smp_proc_lock(proc, ERTS_PROC_LOCK_MAIN);
-			}
-		    }
-		    else {
-			Allctr_t *allctr = erts_allctrs_info[i].extra;
-			szp = &sz;
-			hpp = NULL;
-			while (1) {
-			    ires = NIL;
-			    tmp = erts_bld_tuple(hpp,
-						 szp,
-						 3,
-						 erts_bld_atom(hpp,
-							       szp,
-							       "instance"),
-						 make_small((Uint) 0),
-						 (*info_func)(allctr,
-							      hpp != NULL,
-							      NULL,
-							      NULL,
-							      hpp,
-							      szp));
-			    ires = erts_bld_cons(hpp, szp, tmp, ires);
-			    if (hpp)
-				break;
-			    ERTS_AIT_HALLOC((Process *) proc, sz);
-			    hpp = &hp;
-			    szp = NULL;
-			}
-		    }
-		    ERTS_AIT_RET(ires);
-		}
-		else {
-		    Eterm *szp, **hpp;
-		    
-		    switch (i) {
-		    case ERTS_ALC_A_SYSTEM: {
-			SysAllocStat sas;
-			Eterm opts_am;
-			Eterm opts;
-			Eterm as[4]; /* Ok even if !HEAP_ON_C_STACK, not really heap data on stack */
-			Eterm ts[4]; /* Ok even if !HEAP_ON_C_STACK, not really heap data on stack */
-			int l;
-
-			if (only_sz)
-			    ERTS_AIT_RET(NIL);
-
-			sys_alloc_stat(&sas);
-			opts_am = am_atom_put("options", 7);
-
-			szp = &sz;
-			hpp = NULL;
-
-		    restart_sys_alloc:
-			l = 0;
-			as[l] = am_atom_put("e", 1);
-			ts[l++] = am_true;
-			as[l] = am_atom_put("m", 1);
-			ts[l++] = am_atom_put("libc", 4);
-			if(sas.trim_threshold >= 0) {
-			    as[l] = am_atom_put("tt", 2);
-			    ts[l++] = erts_bld_uint(hpp, szp,
-						    (Uint) sas.trim_threshold);
-			}
-			if(sas.top_pad >= 0) {
-			    as[l] = am_atom_put("tp", 2);
-			    ts[l++] = erts_bld_uint(hpp, szp, (Uint) sas.top_pad);
-			}
-
-			opts = erts_bld_2tup_list(hpp, szp, l, as, ts);
-			res = erts_bld_2tup_list(hpp, szp, 1, &opts_am, &opts);
-			
-			if (szp) {
-			    ERTS_AIT_HALLOC((Process *) proc, sz);
-			    szp = NULL;
-			    hpp = &hp;
-			    goto restart_sys_alloc;
-			}
-			ERTS_AIT_RET(res);
-		    }
-		    case ERTS_ALC_A_FIXED_SIZE: {
-			ErtsAlcType_t n;
-			Eterm as[2], vs[2];
-
-			if (only_sz)
-			    ERTS_AIT_RET(NIL);
-
-			as[0] = am_atom_put("options", 7);
-			as[1] = am_atom_put("pools", 5);
-
-			szp = &sz;
-			hpp = NULL;
-
-		    restart_fix_alloc:
-
-			vs[0] = erts_bld_cons(hpp, szp,
-					      erts_bld_tuple(hpp, szp, 2, 
-							     am_atom_put("e",
-									 1),
-							     am_true),
-					      NIL);
-
-			vs[1] = NIL;
-			for (n = ERTS_ALC_N_MIN_A_FIXED_SIZE;
-			     n <= ERTS_ALC_N_MAX_A_FIXED_SIZE;
-			     n++) {
-			    ErtsFixInfo efi;
-			    erts_fix_info(ERTS_ALC_N2T(n), &efi);
-
-			    vs[1] = erts_bld_cons(
-				hpp, szp,
-				erts_bld_tuple(
-				    hpp, szp, 3,
-				    am_atom_put((char *) ERTS_ALC_N2TD(n),
-						strlen(ERTS_ALC_N2TD(n))),
-				    erts_bld_uint(hpp, szp, efi.total),
-				    erts_bld_uint(hpp, szp, efi.used)),
-				vs[1]);
-
-			}
-			
-			res = erts_bld_2tup_list(hpp, szp, 2, as, vs);
-			if (szp) {
-			    ERTS_AIT_HALLOC((Process *) proc, sz);
-			    szp = NULL;
-			    hpp = &hp;
-			    goto restart_fix_alloc;
-			}
-			ERTS_AIT_RET(res);
-		    }
-		    default:
-			ASSERT(0);
-			goto done;
-		    }
-		}
-	    }
-	}
-    }
-
-    if (ERTS_IS_ATOM_STR("mseg_alloc", which_alloc)) {
-#if HAVE_ERTS_MSEG
-	if (only_sz)
-	    ERTS_AIT_RET(NIL);
-	erts_mseg_info(NULL, NULL, 0, NULL, &sz);
-	if (sz)
-	    ERTS_AIT_HALLOC((Process *) proc, sz);
-	ERTS_AIT_RET(erts_mseg_info(NULL, NULL, 1, &hp, NULL));
-#else
-	ERTS_AIT_RET(am_false);
-#endif
-
-    }
-    else if (ERTS_IS_ATOM_STR("alloc_util", which_alloc)) {
-	if (only_sz)
-	    ERTS_AIT_RET(NIL);
-	erts_alcu_au_info_options(NULL, NULL, NULL, &sz);
-	if (sz)
-	    ERTS_AIT_HALLOC((Process *) proc, sz);
-	ERTS_AIT_RET(erts_alcu_au_info_options(NULL, NULL, &hp, NULL));
-    }
-
- done:
-    if (hp) {
-	ASSERT(hp_end >= hp);
-	HRelease((Process *) proc, hp_end, hp);
-    }
-    return res;
-
-#undef ERTS_AIT_RET
-#undef ERTS_AIT_HALLOC
-}
-
 void
 erts_allocator_info(int to, void *arg)
 {
@@ -2748,22 +2549,6 @@ erts_allocator_info(int to, void *arg)
 			    erts_print(to, arg, "option tp: %d\n", sas.top_pad);
 			break;
 		    }
-		    case ERTS_ALC_A_FIXED_SIZE: {
-			ErtsAlcType_t n;
-			erts_print(to, arg, "option e: true\n");
-
-			for (n = ERTS_ALC_N_MIN_A_FIXED_SIZE;
-			     n <= ERTS_ALC_N_MAX_A_FIXED_SIZE;
-			     n++) {
-			    ErtsFixInfo efi;
-			    erts_fix_info(ERTS_ALC_N2T(n), &efi);
-			    erts_print(to, arg, "%s: %lu %lu\n",
-				       ERTS_ALC_N2TD(n),
-				       efi.total,
-				       efi.used);
-			}
-			break;
-		    }
 		    default:
 			ASSERT(0);
 			break;
@@ -2774,8 +2559,18 @@ erts_allocator_info(int to, void *arg)
     }
 
 #if HAVE_ERTS_MSEG
-    erts_print(to, arg, "=allocator:mseg_alloc\n");
-    erts_mseg_info(&to, arg, 0, NULL, NULL);
+    {
+#ifdef ERTS_SMP
+	int max = (int) erts_no_schedulers;
+#else
+	int max = 0;
+#endif
+	int i;
+	for (i = 0; i <= max; i++) {
+	    erts_print(to, arg, "=allocator:mseg_alloc[%d]\n", i);
+	    erts_mseg_info(i, &to, arg, 0, NULL, NULL);
+	}
+    }
 #endif
 
     erts_print(to, arg, "=allocator:alloc_util\n");
@@ -2829,7 +2624,7 @@ erts_allocator_options(void *proc)
 		use_mseg++;
 #endif
 		if (erts_allctr_thr_spec[a].enabled)
-		    allctr = erts_allctr_thr_spec[a].allctr[1];
+		    allctr = erts_allctr_thr_spec[a].allctr[0];
 		else
 		    allctr = erts_allctrs_info[a].extra;
 		tmp = erts_alcu_info_options(allctr, NULL, NULL, hpp, szp);
@@ -2878,7 +2673,7 @@ erts_allocator_options(void *proc)
 #if HAVE_ERTS_MSEG
     if (use_mseg) {
 	atoms[length] = am_atom_put("mseg_alloc", 10);
-	terms[length++] = erts_mseg_info_options(NULL, NULL, hpp, szp);
+	terms[length++] = erts_mseg_info_options(0, NULL, NULL, hpp, szp);
     }
 #endif
 
@@ -2982,6 +2777,313 @@ void *erts_alloc_permanent_cache_aligned(ErtsAlcType_t type, Uint size)
     return (void*)v;
 }
 
+static void
+reply_alloc_info(void *vair)
+{
+    ErtsAllocInfoReq *air = (ErtsAllocInfoReq *) vair;
+    Uint sched_id = erts_get_scheduler_id();
+    int global_instances = air->req_sched == sched_id;
+    ErtsProcLocks rp_locks;
+    Process *rp = air->proc;
+    Eterm ref_copy = NIL, ai_list, msg;
+    Eterm *hp = NULL, *hp_end = NULL, *hp_start = NULL;
+    Eterm **hpp;
+    Uint sz, *szp;
+    ErlOffHeap *ohp = NULL;
+    ErlHeapFragment *bp = NULL;
+    int i;
+    Eterm (*info_func)(Allctr_t *,
+		       int,
+		       int *,
+		       void *,
+		       Uint **,
+		       Uint *) = (air->only_sz
+				  ? erts_alcu_sz_info
+				  : erts_alcu_info);
+
+    rp_locks = air->req_sched == sched_id ? ERTS_PROC_LOCK_MAIN : 0;
+
+    sz = 0;
+    hpp = NULL;
+    szp = &sz;
+
+    while (1) {
+
+	if (hpp)
+	    ref_copy = STORE_NC(hpp, ohp, air->ref);
+	else
+	    *szp += REF_THING_SIZE;
+
+	ai_list = NIL;
+	for (i = 0; air->allocs[i] != ERTS_ALC_A_INVALID; i++);
+	for (i--; i >= 0; i--) {
+	    int ai = air->allocs[i];
+	    Allctr_t *allctr;
+	    Eterm ainfo;
+	    Eterm alloc_atom;
+	    if (global_instances) {
+		switch (ai) {
+		case ERTS_ALC_A_SYSTEM: {
+		    alloc_atom = erts_bld_atom(hpp, szp, "sys_alloc");
+		    ainfo = NIL;
+		    if (!air->only_sz) {
+			SysAllocStat sas;
+			if (hpp)
+			    sys_alloc_stat(&sas);
+			if (szp) {
+			    /* ensure ehough heap */
+			    sas.top_pad = INT_MAX;
+			    sas.trim_threshold = INT_MAX;
+			}
+			if (sas.top_pad >= 0) {
+			    ainfo = erts_bld_cons(
+				hpp, szp,
+				erts_bld_tuple(
+				    hpp, szp, 2,
+				    erts_bld_atom(hpp, szp, "tp"),
+				    erts_bld_uint(
+					hpp, szp,
+					(Uint) sas.top_pad)),
+				ainfo);
+			}
+			if (sas.trim_threshold >= 0) {
+			    ainfo = erts_bld_cons(
+				hpp, szp,
+				erts_bld_tuple(
+				    hpp, szp, 2,
+				    erts_bld_atom(hpp, szp, "tt"),
+				    erts_bld_uint(
+					hpp, szp,
+					(Uint) sas.trim_threshold)),
+				ainfo);
+			}
+			ainfo = erts_bld_cons(hpp, szp,
+					      erts_bld_tuple(
+						  hpp, szp, 2,
+						  erts_bld_atom(hpp, szp,
+								"m"),
+						  erts_bld_atom(hpp, szp,
+								"libc")),
+					      ainfo);
+			ainfo = erts_bld_cons(hpp, szp,
+					      erts_bld_tuple(
+						  hpp, szp, 2,
+						  erts_bld_atom(hpp, szp,
+								"e"),
+						  am_true),
+					      ainfo);
+			ainfo = erts_bld_tuple(hpp, szp, 2,
+					       erts_bld_atom(hpp, szp,
+							     "otps"),
+					       ainfo);
+		    }
+		    ainfo = erts_bld_tuple(hpp, szp, 3,
+					   alloc_atom,
+					   make_small(0),
+					   ainfo);
+		    break;
+		}
+		case ERTS_ALC_INFO_A_ALLOC_UTIL:
+		    alloc_atom = erts_bld_atom(hpp, szp, "alloc_util");
+		    ainfo = (air->only_sz
+			     ? NIL
+			     : erts_alcu_au_info_options(NULL, NULL,
+							 hpp, szp));
+		    ainfo = erts_bld_tuple(hpp, szp, 3,
+					   alloc_atom,
+					   make_small(0),
+					   ainfo);
+		    break;
+		case ERTS_ALC_INFO_A_MSEG_ALLOC:
+		    alloc_atom = erts_bld_atom(hpp, szp, "mseg_alloc");
+#if HAVE_ERTS_MSEG
+		    ainfo = (air->only_sz
+			     ? NIL
+			     : erts_mseg_info(0, NULL, NULL, hpp != NULL,
+					      hpp, szp));
+		    ainfo = erts_bld_tuple(hpp, szp, 3,
+					   alloc_atom,
+					   make_small(0),
+					   ainfo);
+#else
+		    ainfo = erts_bld_tuple(hpp, szp, 2, alloc_atom,
+					   am_false);
+#endif
+			break;
+		default:
+		    alloc_atom = erts_bld_atom(hpp, szp,
+					       (char *) ERTS_ALC_A2AD(ai));
+		    if (!erts_allctrs_info[ai].enabled) 
+			ainfo = erts_bld_tuple(hpp, szp, 2, alloc_atom,
+					       am_false);
+		    else if (erts_allctrs_info[ai].alloc_util) {
+			if (erts_allctrs_info[ai].thr_spec)
+			    allctr = erts_allctr_thr_spec[ai].allctr[0];
+			else
+			    allctr = erts_allctrs_info[ai].extra;
+			ainfo = info_func(allctr, hpp != NULL, NULL,
+					  NULL, hpp, szp);
+			ainfo = erts_bld_tuple(hpp, szp, 3, alloc_atom,
+					       make_small(0), ainfo);
+		    }
+		    else {
+			erl_exit(ERTS_ABORT_EXIT, "%s:%d: internal error\n",
+				 __FILE__, __LINE__);
+		    }
+		}
+		ai_list = erts_bld_cons(hpp, szp,
+					ainfo, ai_list);
+	    }
+	    switch (ai) {
+	    case ERTS_ALC_A_SYSTEM:
+	    case ERTS_ALC_INFO_A_ALLOC_UTIL:
+		break;
+	    case ERTS_ALC_INFO_A_MSEG_ALLOC:
+#if HAVE_ERTS_MSEG && defined(ERTS_SMP)
+		alloc_atom = erts_bld_atom(hpp, szp, "mseg_alloc");
+		ainfo = (air->only_sz
+			 ? NIL
+			 : erts_mseg_info(sched_id, NULL, NULL,
+					  hpp != NULL, hpp, szp));
+		ainfo = erts_bld_tuple(hpp, szp, 3,
+				       alloc_atom,
+				       make_small(sched_id),
+				       ainfo);
+		ai_list = erts_bld_cons(hpp, szp, ainfo, ai_list);
+#endif
+		break;
+	    default:
+		if (erts_allctrs_info[ai].thr_spec) {
+		    alloc_atom = erts_bld_atom(hpp, szp,
+					       (char *) ERTS_ALC_A2AD(ai));
+		    allctr = erts_allctr_thr_spec[ai].allctr[sched_id];
+		    ainfo = info_func(allctr, hpp != NULL, NULL,
+				      NULL, hpp, szp);
+		    ai_list = erts_bld_cons(hpp, szp,
+					    erts_bld_tuple(
+						hpp, szp,
+						3,
+						alloc_atom,
+						make_small(sched_id),
+						ainfo),
+					    ai_list);
+		}
+		break;
+	    }
+	    msg = erts_bld_tuple(hpp, szp,
+				 3,
+				 ref_copy,
+				 make_small(sched_id),
+				 ai_list);
+
+	}
+	if (hpp)
+	    break;
+
+	hp = erts_alloc_message_heap(sz, &bp, &ohp, rp, &rp_locks);
+	hp_start = hp;
+	hp_end = hp + sz;
+	szp = NULL;
+	hpp = &hp;
+    }
+    if (bp)
+	bp = erts_resize_message_buffer(bp, hp - hp_start, &msg, 1);
+    else {
+	ASSERT(hp);
+	HRelease(rp, hp_end, hp);	    
+    }
+
+    erts_queue_message(rp, &rp_locks, bp, msg, NIL);
+
+    if (air->req_sched == sched_id)
+	rp_locks &= ~ERTS_PROC_LOCK_MAIN;
+ 
+    erts_smp_proc_unlock(rp, rp_locks);
+    erts_smp_proc_dec_refc(rp);
+
+    if (erts_smp_atomic32_dec_read_nob(&air->refc) == 0)
+	aireq_free(air);
+}
+
+int
+erts_request_alloc_info(struct process *c_p,
+			Eterm ref,
+			Eterm allocs,
+			int only_sz)
+{
+    ErtsAllocInfoReq *air = aireq_alloc();
+    Eterm req_ai[ERTS_ALC_A_MAX+1+2] = {0};
+    Eterm alist;
+    Eterm *hp;
+    int airix = 0, ai;
+
+    air->req_sched = erts_get_scheduler_id();
+
+    air->only_sz = only_sz;
+
+    air->proc = c_p;
+
+    if (is_not_internal_ref(ref))
+	return 0;
+
+    hp = &air->ref_heap[0];
+    air->ref = STORE_NC(&hp, NULL, ref);
+
+    if (is_not_list(allocs))
+	return 0;
+
+    alist = allocs;
+
+    while (is_list(alist)) {
+	int saved = 0;
+	Eterm* consp = list_val(alist);
+	Eterm alloc = CAR(consp);
+
+	for (ai = ERTS_ALC_A_MIN; ai <= ERTS_ALC_A_MAX; ai++)
+	    if (erts_is_atom_str((char *) erts_alc_a2ad[ai], alloc))
+		goto save_alloc;
+	if (erts_is_atom_str("mseg_alloc", alloc)) {
+	    ai = ERTS_ALC_INFO_A_MSEG_ALLOC;
+	    goto save_alloc;
+	}
+	if (erts_is_atom_str("alloc_util", alloc)) {
+	    ai = ERTS_ALC_INFO_A_ALLOC_UTIL;
+	save_alloc:
+	    if (req_ai[ai])
+		return 0;
+	    air->allocs[airix++] = ai;
+	    req_ai[ai] = 1;
+	    saved = 1;
+	}
+
+	if (!saved)
+	    return 0;
+
+	alist = CDR(consp);
+    }
+
+    if (is_not_nil(alist))
+	return 0;
+
+    air->allocs[airix] = ERTS_ALC_A_INVALID;
+
+    erts_smp_atomic32_init_nob(&air->refc,
+			       (erts_aint32_t) erts_no_schedulers);
+
+    erts_smp_proc_add_refc(c_p, (Sint32) erts_no_schedulers);
+
+#ifdef ERTS_SMP
+    if (erts_no_schedulers > 1)
+	erts_smp_schedule_misc_aux_work(1,
+					erts_no_schedulers,
+					reply_alloc_info,
+					(void *) air);
+#endif
+
+    reply_alloc_info((void *) air);
+
+    return 1;
+}
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
  * Deprecated functions                                                    *
diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h
index 80cb82c393..f4133cdb1a 100644
--- a/erts/emulator/beam/erl_alloc.h
+++ b/erts/emulator/beam/erl_alloc.h
@@ -43,43 +43,40 @@
 #  define ERTS_ALC_INLINE
 #endif
 
-#define ERTS_FIX_CORE_ALLOCATOR ERTS_ALC_A_LONG_LIVED
-extern ErtsAlcType_t erts_fix_core_allocator_ix;
-
-typedef struct {
-    Uint total;
-    Uint used;
-} ErtsFixInfo;
+#define ERTS_ALC_NO_FIXED_SIZES \
+  (ERTS_ALC_N_MAX_A_FIXED_SIZE - ERTS_ALC_N_MIN_A_FIXED_SIZE + 1)
 
 void erts_sys_alloc_init(void);
 void *erts_sys_alloc(ErtsAlcType_t, void *, Uint);
 void *erts_sys_realloc(ErtsAlcType_t, void *, void *, Uint);
 void erts_sys_free(ErtsAlcType_t, void *, void *);
 
-
-void erts_init_fix_alloc(Uint, void *(*)(Uint));
-Uint erts_get_fix_size(ErtsAlcType_t);
-void erts_set_fix_size(ErtsAlcType_t, Uint);
-void erts_fix_info(ErtsAlcType_t, ErtsFixInfo *);
-void *erts_fix_alloc(ErtsAlcType_t, void *, Uint);
-void *erts_fix_realloc(ErtsAlcType_t, void *, void*, Uint);
-void erts_fix_free(ErtsAlcType_t, void *, void*);
-
-
 Eterm erts_memory(int *, void *, void *, Eterm);
 Eterm erts_allocated_areas(int *, void *, void *);
 
 Eterm erts_alloc_util_allocators(void *proc);
 void erts_allocator_info(int, void *);
-Eterm erts_allocator_info_term(void *proc, Eterm which_alloc, int only_sz);
 Eterm erts_allocator_options(void *proc);
 
+struct process;
+
+int erts_request_alloc_info(struct process *c_p, Eterm ref, Eterm allocs,
+			    int only_sz);
+
 #define ERTS_ALLOC_INIT_DEF_OPTS_INITER {0}
 typedef struct {
-    int dummy;
+    int ncpu;
 } ErtsAllocInitOpts;
 
+typedef struct {
+    Allctr_t *deallctr[ERTS_ALC_A_MAX+1];
+    int pref_ix[ERTS_ALC_A_MAX+1];
+    int flist_ix[ERTS_ALC_A_MAX+1];
+    int pre_alc_ix;
+} ErtsSchedAllocData;
+
 void erts_alloc_init(int *argc, char **argv, ErtsAllocInitOpts *eaiop);
+void erts_alloc_late_init(void);
 
 #if defined(GET_ERTS_ALC_TEST) || defined(ERTS_ALC_INTERNAL__)
 /* Only for testing */
@@ -126,15 +123,19 @@ extern ErtsAllocatorInfo_t erts_allctrs_info[ERTS_ALC_A_MAX+1];
 
 typedef struct {
     int enabled;
-    int all_thr_safe;
+    int dd;
+    int aix;
     int size;
     Allctr_t **allctr;
 } ErtsAllocatorThrSpec_t;
 
 extern ErtsAllocatorThrSpec_t erts_allctr_thr_spec[ERTS_ALC_A_MAX+1];
 
-int erts_alc_get_thr_ix(void);
-void erts_alloc_reg_scheduler_id(Uint id);
+void erts_alloc_register_scheduler(void *vesdp);
+void erts_alloc_scheduler_handle_delayed_dealloc(void *vesdp,
+						 int *need_thr_progress,
+						 int *more_work);
+erts_aint32_t erts_alloc_fix_alloc_shrink(int ix, erts_aint32_t flgs);
 
 __decl_noreturn void erts_alloc_enomem(ErtsAlcType_t,Uint)		
      __noreturn;
@@ -252,6 +253,8 @@ void *erts_realloc_fnf(ErtsAlcType_t type, void *ptr, Uint size)
 
 #endif /* #if ERTS_ALC_DO_INLINE || defined(ERTS_ALC_INTERNAL__) */
 
+#define ERTS_ALC_GET_THR_IX() ((int) erts_get_scheduler_id())
+
 typedef void (*erts_alloc_verify_func_t)(Allctr_t *);
 
 erts_alloc_verify_func_t
@@ -436,136 +439,41 @@ NAME##_free(TYPE *p)							\
     }									\
 }
 
-typedef struct {
-    void *start;
-    void *end;
-    int chunks_mem_size;
-} erts_sched_pref_quick_alloc_data_t;
-
-#ifdef DEBUG
-#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)				\
-do {									\
-    ASSERT((void *) (C) < (void *) (P));				\
-    ASSERT((void *) (P)							\
-	   < (void *) (((char *) (C)) + (A)->chunks_mem_size));		\
-} while (0)
-#else
-#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)
-#endif
+#include "erl_sched_spec_pre_alloc.h"
 
 #define ERTS_SCHED_PREF_PRE_ALLOC_IMPL(NAME, TYPE, PASZ)		\
-union erts_qa_##NAME##__ {						\
+union erts_sspa_##NAME##__ {						\
+    erts_sspa_blk_t next;						\
     TYPE type;								\
-    union erts_qa_##NAME##__ *next;					\
 };									\
-typedef struct {							\
-    erts_smp_spinlock_t lock;						\
-    union erts_qa_##NAME##__ *freelist;					\
-    union erts_qa_##NAME##__ pre_alloced[1];				\
-} erts_qa_##NAME##_chunk__;						\
-static erts_sched_pref_quick_alloc_data_t *qa_data_##NAME##__;		\
-static ERTS_INLINE erts_qa_##NAME##_chunk__ *				\
-get_##NAME##_chunk_ix(int cix)						\
-{									\
-    char *ptr = (char *) qa_data_##NAME##__->start;			\
-    ptr += cix*qa_data_##NAME##__->chunks_mem_size;			\
-    return (erts_qa_##NAME##_chunk__ *) ptr;				\
-}									\
-static ERTS_INLINE erts_qa_##NAME##_chunk__ *				\
-get_##NAME##_chunk_ptr(void *ptr)					\
-{									\
-    int cix;								\
-    size_t diff;							\
-    if (ptr < qa_data_##NAME##__->start || qa_data_##NAME##__->end <= ptr)\
-	return NULL;							\
-    diff = ((char *) ptr) -  ((char *) qa_data_##NAME##__->start);	\
-    cix = diff / qa_data_##NAME##__->chunks_mem_size;			\
-    return get_##NAME##_chunk_ix(cix);					\
-}									\
+									\
+static erts_sspa_data_t *sspa_data_##NAME##__;				\
+									\
 static void								\
 init_##NAME##_alloc(void)						\
 {									\
-    size_t tot_size;							\
-    size_t chunk_mem_size;						\
-    char *chunk_start;							\
-    int cix;								\
-    int no_blocks = ERTS_PRE_ALLOC_SIZE((PASZ));			\
-    int no_blocks_per_chunk = 2*((no_blocks-1)/erts_no_schedulers + 1);	\
-    no_blocks = no_blocks_per_chunk * erts_no_schedulers;		\
-    chunk_mem_size = sizeof(erts_qa_##NAME##_chunk__);			\
-    chunk_mem_size += (sizeof(union erts_qa_##NAME##__)			\
-		       * (no_blocks_per_chunk - 1));			\
-    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);	\
-    tot_size = sizeof(erts_sched_pref_quick_alloc_data_t);		\
-    tot_size += ERTS_CACHE_LINE_SIZE - 1;				\
-    tot_size += chunk_mem_size*erts_no_schedulers;			\
-    qa_data_##NAME##__ = erts_alloc(ERTS_ALC_T_PRE_ALLOC_DATA,tot_size);\
-    chunk_start = (((char *) qa_data_##NAME##__)			\
-		   + sizeof(erts_sched_pref_quick_alloc_data_t));	\
-    if ((((UWord) chunk_start) & ERTS_CACHE_LINE_MASK) != ((UWord) 0))	\
-	chunk_start = ((char *)						\
-		       ((((UWord) chunk_start) & ~ERTS_CACHE_LINE_MASK)	\
-		       + ERTS_CACHE_LINE_SIZE));			\
-    qa_data_##NAME##__->chunks_mem_size = chunk_mem_size;		\
-    qa_data_##NAME##__->start = (void *) chunk_start;			\
-    qa_data_##NAME##__->end = (chunk_start				\
-			       + chunk_mem_size*erts_no_schedulers);	\
-    for (cix = 0; cix < erts_no_schedulers; cix++) {			\
-	int i;								\
-	erts_qa_##NAME##_chunk__ *chunk = get_##NAME##_chunk_ix(cix);	\
-	erts_smp_spinlock_init(&chunk->lock, #NAME "_alloc_lock");	\
-	chunk->freelist = &chunk->pre_alloced[0];			\
-	for (i = 1; i < no_blocks_per_chunk; i++) {			\
-	    ERTS_PRE_ALLOC_CLOBBER(&chunk->pre_alloced[i-1],		\
-				   union erts_qa_##NAME##__);		\
-	    chunk->pre_alloced[i-1].next = &chunk->pre_alloced[i];	\
-	}								\
-	ERTS_PRE_ALLOC_CLOBBER(&chunk->pre_alloced[no_blocks_per_chunk-1],\
-			       union erts_qa_##NAME##__);		\
-	chunk->pre_alloced[no_blocks_per_chunk-1].next = NULL;		\
-    }									\
+    sspa_data_##NAME##__ =						\
+	erts_sspa_create(sizeof(union erts_sspa_##NAME##__),		\
+			 ERTS_PRE_ALLOC_SIZE((PASZ)));			\
 }									\
-static ERTS_INLINE TYPE *						\
+									\
+static TYPE *								\
 NAME##_alloc(void)							\
 {									\
-    int cix = ((int) erts_get_scheduler_id()) - 1;			\
-    TYPE *res;								\
-    if (cix < 0)							\
-	res = NULL;							\
-    else {								\
-	erts_qa_##NAME##_chunk__ *chunk = get_##NAME##_chunk_ix(cix);	\
-	erts_smp_spin_lock(&chunk->lock);				\
-	if (!chunk->freelist)						\
-	    res = NULL;							\
-	else {								\
-	    res = &chunk->freelist->type;				\
-	    chunk->freelist = chunk->freelist->next;			\
-	    ERTS_SPPA_DBG_CHK_IN_CHNK(qa_data_##NAME##__, chunk, res);	\
-	}								\
-	erts_smp_spin_unlock(&chunk->lock);				\
-    }									\
-    return res;								\
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();		\
+    if (!esdp)								\
+	return NULL;							\
+    return (TYPE *) erts_sspa_alloc(sspa_data_##NAME##__,		\
+				    (int) esdp->no - 1);		\
 }									\
-static ERTS_INLINE int							\
+									\
+static int								\
 NAME##_free(TYPE *p)							\
 {									\
-    erts_qa_##NAME##_chunk__ *chunk;					\
-    chunk = get_##NAME##_chunk_ptr((void *) p);				\
-    if (!chunk)								\
-	return 0;							\
-    else {								\
-	union erts_qa_##NAME##__ *up;					\
-	ERTS_SPPA_DBG_CHK_IN_CHNK(qa_data_##NAME##__, chunk, p);	\
-	up = ((union erts_qa_##NAME##__ *)				\
-	      (((char *) p)						\
-	       - ((char *) &((union erts_qa_##NAME##__ *) 0)->type)));	\
-	erts_smp_spin_lock(&chunk->lock);				\
-	ERTS_PRE_ALLOC_CLOBBER(up, union erts_qa_##NAME##__);		\
-	up->next = chunk->freelist;					\
-	chunk->freelist = up;						\
-	erts_smp_spin_unlock(&chunk->lock);				\
-	return 1;							\
-    }									\
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();		\
+    return erts_sspa_free(sspa_data_##NAME##__,				\
+			  esdp ? (int) esdp->no - 1 : -1,		\
+			  (char *) p);					\
 }
 
 #ifdef DEBUG
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index eda0831441..7a27ee28ec 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -133,29 +133,25 @@ class	SYSTEM		system_data
 #       should be deallocated before the emulator starts executing Erlang
 #       code again.
 #
-# NOTE: When adding or removing a type which uses the FIXED_SIZE allocator,
-#       also add or remove initialization of the type in erts_alloc_init()
-#       (erl_alloc.c).
-#
 #	<TYPE>		<ALLOCATOR>	<CLASS>		<DESCRIPTION>
 
 type	SBMBC		SBMBC		SYSTEM		small_block_mbc
 type	PROC		FIXED_SIZE	PROCESSES	proc
-type	ATOM		FIXED_SIZE	ATOM		atom_entry
-type	MODULE		FIXED_SIZE	CODE		module_entry
-type	REG_PROC	FIXED_SIZE	PROCESSES	reg_proc
+type	ATOM		LONG_LIVED	ATOM		atom_entry
+type	MODULE		LONG_LIVED	CODE		module_entry
+type	REG_PROC	STANDARD	PROCESSES	reg_proc
 type	LINK_LH		STANDARD	PROCESSES	link_lh
 type	SUSPEND_MON	STANDARD	PROCESSES	suspend_monitor
 type	PEND_SUSPEND	SHORT_LIVED	PROCESSES	pending_suspend
 type	PROC_LIST	SHORT_LIVED	PROCESSES	proc_list
-type	FUN_ENTRY	FIXED_SIZE	CODE		fun_entry
+type	FUN_ENTRY	LONG_LIVED	CODE		fun_entry
 type	ATOM_TXT	LONG_LIVED	ATOM		atom_text
 type 	BEAM_REGISTER	EHEAP		PROCESSES	beam_register
 type	HEAP		EHEAP		PROCESSES	heap
 type	OLD_HEAP	EHEAP		PROCESSES	old_heap
 type	HEAP_FRAG	EHEAP		PROCESSES	heap_frag
 type	TMP_HEAP	TEMPORARY	PROCESSES	tmp_heap
-type	MSG_REF		SHORT_LIVED	PROCESSES	msg_ref
+type	MSG_REF		FIXED_SIZE	PROCESSES	msg_ref
 type	MSG_ROOTS	TEMPORARY	PROCESSES	msg_roots
 type	ROOTSET		TEMPORARY	PROCESSES	root_set
 type	LOADER_TMP	TEMPORARY	CODE		loader_tmp
@@ -199,7 +195,7 @@ type	TMP_DIST_BUF	TEMPORARY	SYSTEM		tmp_dist_buf
 type	ASYNC_Q		LONG_LIVED	SYSTEM		async_queue
 type	ESTACK		TEMPORARY	SYSTEM		estack
 type	PORT_CALL_BUF	TEMPORARY	SYSTEM		port_call_buf
-type	DB_TABLE	FIXED_SIZE	ETS		db_tab
+type	DB_TABLE	ETS		ETS		db_tab
 type	DB_FIXATION	SHORT_LIVED	ETS		db_fixation
 type	DB_FIX_DEL	SHORT_LIVED	ETS		fixed_del
 type	DB_TABLES	LONG_LIVED	ETS		db_tabs
@@ -256,6 +252,7 @@ type	TMP_CPU_IDS	SHORT_LIVED	SYSTEM		tmp_cpu_ids
 type	EXT_TERM_DATA	SHORT_LIVED	PROCESSES	external_term_data
 type	ZLIB		STANDARD	SYSTEM		zlib
 type	CPU_GRPS_MAP	LONG_LIVED	SYSTEM		cpu_groups_map
+type	AUX_WORK_TMO	LONG_LIVED	SYSTEM		aux_work_timeouts
 
 +if smp
 type	ASYNC		SHORT_LIVED	SYSTEM		async
@@ -273,6 +270,8 @@ type	PROC_LCK_QS	LONG_LIVED	SYSTEM		proc_lock_queues
 type	RUNQ_BLNS	LONG_LIVED	SYSTEM		run_queue_balancing
 type	MISC_AUX_WORK_Q	LONG_LIVED	SYSTEM		misc_aux_work_q
 type	MISC_AUX_WORK	SHORT_LIVED	SYSTEM		misc_aux_work
+type	THR_PRGR_IDATA	LONG_LIVED	SYSTEM		thr_prgr_internal_data
+type	THR_PRGR_DATA	LONG_LIVED	SYSTEM		thr_prgr_data
 +endif
 
 #
@@ -346,10 +345,10 @@ type	DB_MS_PSDO_PROC	LONG_LIVED_LOW	ETS		db_match_pseudo_proc
 type	SCHDLR_DATA	LONG_LIVED_LOW	SYSTEM		scheduler_data
 type  	LL_TEMP_TERM 	LONG_LIVED_LOW	SYSTEM		ll_temp_term
 
-# no FIXED_SIZE for low memory
-type	EXPORT		STANDARD_LOW	CODE		export_entry
+type	EXPORT		LONG_LIVED_LOW	CODE		export_entry
 type	MONITOR_SH	STANDARD_LOW	PROCESSES	monitor_sh
 type	NLINK_SH	STANDARD_LOW	PROCESSES	nlink_sh
+type	AINFO_REQ	STANDARD_LOW	SYSTEM		alloc_info_request
 
 +else # "fullword"
 
@@ -362,9 +361,10 @@ type	DB_MS_PSDO_PROC	LONG_LIVED	ETS		db_match_pseudo_proc
 type	SCHDLR_DATA	LONG_LIVED	SYSTEM		scheduler_data
 type  	LL_TEMP_TERM 	LONG_LIVED	SYSTEM		ll_temp_term
 
-type	EXPORT		FIXED_SIZE	CODE		export_entry
+type	EXPORT		LONG_LIVED	CODE		export_entry
 type	MONITOR_SH	FIXED_SIZE	PROCESSES	monitor_sh
 type	NLINK_SH	FIXED_SIZE	PROCESSES	nlink_sh
+type	AINFO_REQ	SHORT_LIVED	SYSTEM		alloc_info_request
 
 +endif
 
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index d51ed0c36d..daf8822564 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -46,6 +46,7 @@
 #include "erl_alloc_util.h"
 #include "erl_mseg.h"
 #include "erl_threads.h"
+#include "erl_thr_progress.h"
 
 #ifdef ERTS_ENABLE_LOCK_COUNT
 #include "erl_lock_count.h"
@@ -61,6 +62,13 @@
 #warning "* * * * * * * * * *"
 #endif
 
+#define ERTS_ALCU_DD_OPS_LIM_HIGH 20
+#define ERTS_ALCU_DD_OPS_LIM_LOW 2
+
+/* Fix alloc limit */
+#define ERTS_ALCU_FIX_MAX_LIST_SZ 1000
+#define ERTS_ALC_FIX_MAX_SHRINK_OPS 30
+
 #define ALLOC_ZERO_EQ_NULL 0
 
 static int atoms_initialized = 0;
@@ -269,7 +277,6 @@ static void check_blk_carrier(Allctr_t *, Block_t *);
 #define HARD_CHECK_BLK_CARRIER(A, B)
 #endif
 
-
 /* Statistics updating ... */
 
 #ifdef DEBUG
@@ -465,26 +472,36 @@ do {									\
 
 #ifdef DEBUG
 #ifdef USE_THREADS
-#define ERTS_ALCU_DBG_CHK_THR_SPEC(A)					\
+#define ERTS_ALCU_DBG_CHK_THR_ACCESS(A)					\
 do {									\
     if (!(A)->thread_safe) {						\
-	if (!(A)->debug.saved_tid)					\
+	if (!(A)->debug.saved_tid) {					\
 	    (A)->debug.tid = erts_thr_self();				\
+	    (A)->debug.saved_tid = 1;					\
+	}								\
 	else {								\
-	    ASSERT(ethr_equal_tids((A)->debug.tid, erts_thr_self()));	\
+	    ERTS_SMP_LC_ASSERT(						\
+		ethr_equal_tids((A)->debug.tid, erts_thr_self())	\
+		|| erts_is_system_blocked(ERTS_BS_FLG_ALLOW_GC)		\
+		|| (ERTS_IS_CRASH_DUMPING				\
+		    && erts_is_system_blocked(ERTS_BS_FLG_ALLOW_GC)));	\
 	}								\
     }									\
 } while (0)
 #else
-#define ERTS_ALCU_DBG_CHK_THR_SPEC(A)
+#define ERTS_ALCU_DBG_CHK_THR_ACCESS(A)
 #endif
 #else
-#define ERTS_ALCU_DBG_CHK_THR_SPEC(A)
+#define ERTS_ALCU_DBG_CHK_THR_ACCESS(A)
 #endif
 
 
 static void make_name_atoms(Allctr_t *allctr);
 
+static Block_t *create_carrier(Allctr_t *, Uint, UWord);
+static void destroy_carrier(Allctr_t *, Block_t *);
+static void mbc_free(Allctr_t *allctr, void *p);
+
 
 /* mseg ... */
 
@@ -651,6 +668,446 @@ static void destroy_sbmbc(Allctr_t *allctr, Block_t *blk);
 static Block_t *create_carrier(Allctr_t *, Uint, UWord);
 static void destroy_carrier(Allctr_t *, Block_t *);
 
+#if 0
+#define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B)			\
+    do { if ((FIX)) chk_fix_list((A), (FIX), (IX), (B)); } while (0)
+static void
+chk_fix_list(Allctr_t *allctr, ErtsAlcFixList_t *fix, int ix, int before)
+{
+    void *p;
+    int n;
+    for (n = 0, p = fix[ix].list; p; p = *((void **) p))
+	n++;
+    if (n != fix[ix].list_size) {
+	erts_fprintf(stderr, "FOUND IT ts=%d, sched=%d, ix=%d, n=%d, ls=%d %s!\n",
+		     allctr->thread_safe, allctr->ix, ix, n, fix[ix].list_size, before ? "before" : "after");
+	abort();
+    }
+}
+#else
+#define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B)
+#endif
+
+erts_aint32_t
+erts_alcu_fix_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs)
+{
+    int all_empty = 1;
+    erts_aint32_t res = 0;
+    int ix, o;
+    ErtsAlcFixList_t *fix = allctr->fix;
+    int flush = flgs == 0;
+
+#ifdef USE_THREADS
+    if (allctr->thread_safe)
+	erts_mtx_lock(&allctr->mutex);
+#endif
+
+    for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) {
+	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
+	if (flgs & ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM) {
+	    fix[ix].limit = fix[ix].max_used;
+	    if (fix[ix].limit < fix[ix].used)
+		fix[ix].limit = fix[ix].used;
+	    fix[ix].max_used = fix[ix].used;
+	    ASSERT(fix[ix].limit >= 0);
+
+	}
+	if (flush) {
+	    fix[ix].limit = 0;
+	    fix[ix].max_used = fix[ix].used;
+	    ASSERT(fix[ix].limit >= 0);
+	}
+	for (o = 0; o < ERTS_ALC_FIX_MAX_SHRINK_OPS || flush; o++) {
+	    Block_t *blk;
+	    void *ptr;
+
+	    if (!flush && fix[ix].limit >= fix[ix].allocated)
+		break;
+	    if (fix[ix].list_size == 0)
+		break;
+	    ptr = fix[ix].list;
+	    fix[ix].list = *((void **) ptr);
+	    fix[ix].list_size--;	    
+
+	    blk = UMEM2BLK(ptr);
+
+	    if (IS_SBC_BLK(blk))
+		destroy_carrier(allctr, blk);
+	    else
+		mbc_free(allctr, ptr);
+
+	    fix[ix].allocated--;
+	}
+	if (fix[ix].list_size != 0) {
+	    if (fix[ix].limit < fix[ix].allocated)
+		res |= ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC;
+	    all_empty = 0;
+	}
+	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
+    }
+
+    if (all_empty && allctr->fix_shrink_scheduled) {
+	allctr->fix_shrink_scheduled = 0;
+	erts_set_aux_work_timeout(allctr->ix,
+				  (ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+				   | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC),
+				  0);
+    }
+
+#ifdef USE_THREADS
+    if (allctr->thread_safe)
+	erts_mtx_unlock(&allctr->mutex);
+#endif
+
+    return res;
+}
+
+#ifdef ERTS_SMP
+
+#define ERTS_ALCU_DD_FIX_TYPE_OFFS \
+  ((sizeof(ErtsAllctrDDBlock_t)-1)/sizeof(UWord) + 1)
+
+#define ERTS_AU_PREF_ALLOC_IX_MASK \
+   ((((UWord) 1) << ERTS_AU_PREF_ALLOC_BITS) - 1)
+#define ERTS_AU_PREF_ALLOC_SIZE_MASK \
+   ((((UWord) 1) << (sizeof(UWord)*8 - ERTS_AU_PREF_ALLOC_BITS)) - 1)
+
+static ERTS_INLINE int
+get_pref_allctr(void *extra, Allctr_t **allctr)
+{
+    ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
+    int pref_ix;
+
+    pref_ix = ERTS_ALC_GET_THR_IX();
+
+    ASSERT(sizeof(UWord) == sizeof(Allctr_t *));
+    ASSERT(0 <= pref_ix && pref_ix < tspec->size);
+
+    *allctr = tspec->allctr[pref_ix];
+    return pref_ix;
+}
+
+static ERTS_INLINE void *
+get_used_allctr(void *extra, void *p, Allctr_t **allctr, UWord *sizep)
+{
+    ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
+    void *ptr = (void *) (((char *) p) - sizeof(UWord));
+    UWord ainfo = *((UWord *) ptr);
+    int aix = (int) (ainfo & ERTS_AU_PREF_ALLOC_IX_MASK);
+    *allctr = tspec->allctr[aix];
+    if (sizep)
+	*sizep = ((ainfo >> ERTS_AU_PREF_ALLOC_BITS)
+		  & ERTS_AU_PREF_ALLOC_SIZE_MASK);
+    return ptr;
+}
+
+static ERTS_INLINE void *
+put_used_allctr(void *p, int ix, UWord size)
+{
+    UWord ainfo = (size >= ERTS_AU_PREF_ALLOC_SIZE_MASK
+		   ? ERTS_AU_PREF_ALLOC_SIZE_MASK
+		   : size);
+    ainfo <<= ERTS_AU_PREF_ALLOC_BITS;
+    ainfo |= (UWord) ix;
+    *((UWord *) p) = ainfo;
+    return (void *) (((char *) p) + sizeof(UWord));
+}
+
+static void
+init_dd_queue(ErtsAllctrDDQueue_t *ddq)
+{
+    erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL);
+    erts_atomic_init_nob(&ddq->tail.data.last,
+			 (erts_aint_t) &ddq->tail.data.marker);
+    erts_atomic_init_nob(&ddq->tail.data.um_refc[0], 0);
+    erts_atomic_init_nob(&ddq->tail.data.um_refc[1], 0);
+    erts_atomic32_init_nob(&ddq->tail.data.um_refc_ix, 0);
+    ddq->head.first = &ddq->tail.data.marker;
+    ddq->head.unref_end = &ddq->tail.data.marker;
+    ddq->head.next.thr_progress = erts_thr_progress_current();
+    ddq->head.next.thr_progress_reached = 1;
+    ddq->head.next.um_refc_ix = 1;
+    ddq->head.next.unref_end = &ddq->tail.data.marker;
+    ddq->head.used_marker = 1;
+}
+
+static ERTS_INLINE erts_aint_t
+ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr)
+{
+    erts_aint_t ilast, itmp;
+    ErtsAllctrDDBlock_t *this = ptr;
+
+    erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL);
+
+    /* Enqueue at end of list... */
+
+    ilast = erts_atomic_read_nob(&ddq->tail.data.last);
+    while (1) {
+	ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
+	itmp = erts_atomic_cmpxchg_mb(&last->atmc_next,
+				       (erts_aint_t) this,
+				       ERTS_AINT_NULL);
+	if (itmp == ERTS_AINT_NULL)
+	    break;
+	ilast = itmp;
+    }
+
+    /* Move last pointer forward... */
+    while (1) {
+	if (erts_atomic_read_rb(&this->atmc_next) != ERTS_AINT_NULL) {
+	    /* Someone else will move it forward */
+	    return erts_atomic_read_rb(&ddq->tail.data.last);
+	}
+	itmp = erts_atomic_cmpxchg_mb(&ddq->tail.data.last,
+				      (erts_aint_t) this,
+				      ilast);
+	if (ilast == itmp)
+	    return (erts_aint_t) this;
+	ilast = itmp;
+    }
+}
+
+static ERTS_INLINE int
+ddq_enqueue(ErtsAlcType_t type, ErtsAllctrDDQueue_t *ddq, void *ptr)
+{
+    erts_aint_t ilast;
+    int um_refc_ix = 0;
+    int managed_thread = erts_thr_progress_is_managed_thread();
+    if (!managed_thread) {
+	um_refc_ix = erts_atomic32_read_acqb(&ddq->tail.data.um_refc_ix);
+	while (1) {
+	    int tmp_um_refc_ix;
+	    erts_atomic_inc_acqb(&ddq->tail.data.um_refc[um_refc_ix]);
+	    tmp_um_refc_ix = erts_atomic32_read_acqb(&ddq->tail.data.um_refc_ix);
+	    if (tmp_um_refc_ix == um_refc_ix)
+		break;
+	    erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]);
+	    um_refc_ix = tmp_um_refc_ix;
+	}
+    }
+
+    ilast = ddq_managed_thread_enqueue(ddq, ptr);
+
+    if (!managed_thread)
+	erts_atomic_dec_relb(&ddq->tail.data.um_refc[um_refc_ix]);
+    return ilast == (erts_aint_t) ptr;
+}
+
+static ERTS_INLINE void *
+ddq_dequeue(ErtsAllctrDDQueue_t *ddq)
+{
+    ErtsAllctrDDBlock_t *blk;
+
+    if (ddq->head.first == ddq->head.unref_end)
+	return NULL;
+
+    blk = ddq->head.first;
+    if (blk == &ddq->tail.data.marker) {
+	ASSERT(ddq->head.used_marker);
+	ddq->head.used_marker = 0;
+	blk = ((ErtsAllctrDDBlock_t *)
+	       erts_atomic_read_nob(&blk->atmc_next));
+	if (blk == ddq->head.unref_end) {
+	    ddq->head.first = blk;
+	    return NULL;
+	}
+    }
+
+    ddq->head.first = ((ErtsAllctrDDBlock_t *)
+		       erts_atomic_read_nob(&blk->atmc_next));
+
+    ASSERT(ddq->head.first);
+
+    return (void *) blk;
+}
+
+static int
+ddq_check_incoming(ErtsAllctrDDQueue_t *ddq)
+{
+    erts_aint_t ilast = erts_atomic_read_nob(&ddq->tail.data.last);
+    if (((ErtsAllctrDDBlock_t *) ilast) == &ddq->tail.data.marker
+	&& ddq->head.first == &ddq->tail.data.marker) {
+	/* Nothing more to do... */
+	return 0;
+    }
+
+    if (ddq->head.next.thr_progress_reached
+	|| erts_thr_progress_has_reached(ddq->head.next.thr_progress)) {
+	int um_refc_ix;
+	ddq->head.next.thr_progress_reached = 1;
+	um_refc_ix = ddq->head.next.um_refc_ix;
+	if (erts_atomic_read_acqb(&ddq->tail.data.um_refc[um_refc_ix]) == 0) {
+	    /* Move unreferenced end pointer forward... */
+
+	    ddq->head.unref_end = ddq->head.next.unref_end;
+
+	    if (!ddq->head.used_marker
+		&& ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast) {
+		ddq->head.used_marker = 1;
+		ilast = ddq_managed_thread_enqueue(ddq, &ddq->tail.data.marker);
+	    }
+
+	    if (ddq->head.unref_end == (ErtsAllctrDDBlock_t *) ilast)
+		ERTS_THR_MEMORY_BARRIER;
+	    else {
+		ddq->head.next.unref_end = (ErtsAllctrDDBlock_t *) ilast;
+		ERTS_THR_MEMORY_BARRIER;
+		ddq->head.next.thr_progress = erts_thr_progress_later();
+		erts_atomic32_set_relb(&ddq->tail.data.um_refc_ix,
+				       um_refc_ix);
+		ddq->head.next.um_refc_ix = um_refc_ix == 0 ? 1 : 0;
+		ddq->head.next.thr_progress_reached = 0;
+	    }
+	}
+    }
+    return 1;
+}
+
+static ERTS_INLINE int
+handle_delayed_dealloc(Allctr_t *allctr,
+		       int allctr_locked,
+		       int use_limit,
+		       int ops_limit,
+		       int *need_thr_progress,
+		       int *need_more_work)
+{
+    int need_thr_prgr = 0;
+    int need_mr_wrk = 0;
+    int have_checked_incoming = 0;
+    int ops = 0;
+    ErtsAlcFixList_t *fix;
+    int res;
+    ErtsAllctrDDQueue_t *ddq;
+
+    if (allctr->thread_safe && !allctr_locked)
+	erts_mtx_lock(&allctr->mutex);
+
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
+
+    fix = allctr->fix;
+
+    ddq = &allctr->dd.q;
+
+    res = 0;
+
+    while (1) {
+	Block_t *blk;
+	void *ptr;
+	int ix;
+
+	if (use_limit && ++ops > ops_limit) {
+	    if (ddq->head.first != ddq->head.unref_end) {
+		need_mr_wrk = 1;
+		if (need_more_work)
+		    *need_more_work |= 1;
+	    }
+	    break;
+	}
+
+    dequeue:
+	ptr = ddq_dequeue(ddq);
+	if (!ptr) {
+	    if (have_checked_incoming)
+		break;
+	    need_thr_prgr = ddq_check_incoming(ddq);
+	    if (need_thr_progress)
+		*need_thr_progress |= need_thr_prgr;
+	    have_checked_incoming = 1;
+	    goto dequeue;
+	}
+
+	res = 1;
+
+	INC_CC(allctr->calls.this_free);
+
+	if (fix) {
+	    ErtsAlcType_t type;
+
+	    type = (ErtsAlcType_t) ((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS];
+	    ix = type - ERTS_ALC_N_MIN_A_FIXED_SIZE;
+	    ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
+	    fix[ix].used--;
+	    if (fix[ix].allocated < fix[ix].limit
+		&& fix[ix].list_size < ERTS_ALCU_FIX_MAX_LIST_SZ) {
+		*((void **) ptr) = fix[ix].list;
+		fix[ix].list = ptr;
+		fix[ix].list_size++;
+		if (!allctr->fix_shrink_scheduled) {
+		    allctr->fix_shrink_scheduled = 1;
+		    erts_set_aux_work_timeout(
+			allctr->ix,
+			(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+			 | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC),
+			1);
+		}
+		ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
+		continue;
+	    }
+	    fix[ix].allocated--;
+	    if (fix[ix].list && fix[ix].allocated > fix[ix].limit) {
+		blk = UMEM2BLK(ptr);
+		if (IS_SBC_BLK(blk))
+		    destroy_carrier(allctr, blk);
+		else
+		    mbc_free(allctr, ptr);
+		ptr = fix[ix].list;
+		fix[ix].list = *((void **) ptr);
+		fix[ix].list_size--;
+		fix[ix].allocated--;
+	    }
+	}
+
+	blk = UMEM2BLK(ptr);
+
+	if (IS_SBC_BLK(blk))
+	    destroy_carrier(allctr, blk);
+	else
+	    mbc_free(allctr, ptr);
+	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
+    }
+
+    if (need_thr_progress && !(need_thr_prgr | need_mr_wrk)) {
+	need_thr_prgr = ddq_check_incoming(ddq);
+	*need_thr_progress |= need_thr_prgr;
+    }
+
+    if (allctr->thread_safe && !allctr_locked)
+	erts_mtx_unlock(&allctr->mutex);
+   return res;
+}
+
+static ERTS_INLINE void
+enqueue_dealloc_other_instance(ErtsAlcType_t type, Allctr_t *allctr, void *ptr)
+{
+    if (allctr->fix)
+	((UWord *) ptr)[ERTS_ALCU_DD_FIX_TYPE_OFFS] = (UWord) type;
+
+    if (ddq_enqueue(type, &allctr->dd.q, ptr))
+	erts_alloc_notify_delayed_dealloc(allctr->ix);
+}
+
+#endif
+
+void
+erts_alcu_check_delayed_dealloc(Allctr_t *allctr,
+				int limit,
+				int *need_thr_progress,
+				int *more_work)
+{
+#ifdef ERTS_SMP
+    handle_delayed_dealloc(allctr,
+			   0,
+			   limit,
+			   ERTS_ALCU_DD_OPS_LIM_HIGH,
+			   need_thr_progress,
+			   more_work);
+#endif
+}
+
+#define ERTS_ALCU_HANDLE_DD_IN_OP(Allctr, Locked) \
+    handle_delayed_dealloc((Allctr), (Locked), 1, \
+			 ERTS_ALCU_DD_OPS_LIM_LOW, NULL, NULL)
+
 /* Multi block carrier alloc/realloc/free ... */
 
 /* NOTE! mbc_alloc() may in case of memory shortage place the requested
@@ -680,8 +1137,21 @@ mbc_alloc_block(Allctr_t *allctr, Uint size, Uint *blk_szp, Uint32 *alcu_flgsp)
 	}
     }
 
+#ifdef ERTS_SMP
+    if (allctr->dd.use)
+	ERTS_ALCU_HANDLE_DD_IN_OP(allctr, 1);
+#endif
+
     blk = (*allctr->get_free_block)(allctr, get_blk_sz, NULL, 0, *alcu_flgsp);
 
+#ifdef ERTS_SMP
+    if (!blk && allctr->dd.use) {
+	if (ERTS_ALCU_HANDLE_DD_IN_OP(allctr, 1))
+	    blk = (*allctr->get_free_block)(allctr, get_blk_sz, NULL, 0,
+					    *alcu_flgsp);
+    }
+#endif
+
     if (!blk) {
 	if ((*alcu_flgsp) & ERTS_ALCU_FLG_SBMBC) 
 	    blk = create_sbmbc(allctr, get_blk_sz);
@@ -939,6 +1409,11 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs)
     Uint is_last_blk;
 #endif /* #ifndef MBC_REALLOC_ALWAYS_MOVES */
 
+#ifdef ERTS_SMP
+    if (allctr->dd.use)
+	ERTS_ALCU_HANDLE_DD_IN_OP(allctr, 1);
+#endif
+
     ASSERT(p);
     ASSERT(size);
     ASSERT(size < allctr->sbc_threshold);
@@ -1005,7 +1480,6 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs)
 						cand_blk,
 						cand_blk_sz,
 						alcu_flgs);
-
 	    if (new_blk || cand_blk != blk)
 		goto move_into_new_blk;
 	}
@@ -1441,7 +1915,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags)
 	goto try_sys_alloc;
     if (flags & CFLG_FORCE_MSEG)
 	goto try_mseg;
-    if (erts_mseg_no() >= max_mseg_carriers)
+    if (erts_mseg_no(&allctr->mseg_opt) >= max_mseg_carriers)
 	goto try_sys_alloc;
     if (flags & CFLG_SBC) {
 	if (allctr->sbcs.curr.norm.mseg.no >= allctr->max_mseg_sbcs)
@@ -1840,8 +2314,12 @@ static struct {
     Eterm ycs;
 
     /* Eterm sbmbcs; */
+
+    Eterm fix_types;
+
     Eterm mbcs;
     Eterm sbcs;
+
     Eterm sys_alloc_carriers_size;
 #if HAVE_ERTS_MSEG
     Eterm mseg_alloc_carriers_size;
@@ -1871,6 +2349,8 @@ static struct {
 #endif
 } am;
 
+static Eterm fix_type_atoms[ERTS_ALC_NO_FIXED_SIZES];
+
 static ERTS_INLINE void atom_init(Eterm *atom, char *name)
 {
     *atom = am_atom_put(name, strlen(name));
@@ -1891,6 +2371,7 @@ init_atoms(Allctr_t *allctr)
     erts_mtx_lock(&init_atoms_mtx);
 
     if (!atoms_initialized) {
+	int ix;
 #ifdef DEBUG
 	Eterm *atom;
 
@@ -1933,8 +2414,12 @@ init_atoms(Allctr_t *allctr)
 	AM_INIT(ycs);
 
 	/*AM_INIT(sbmbcs);*/
+
+	AM_INIT(fix_types);
+
 	AM_INIT(mbcs);
 	AM_INIT(sbcs);
+
 	AM_INIT(sys_alloc_carriers_size);
 #if HAVE_ERTS_MSEG
 	AM_INIT(mseg_alloc_carriers_size);
@@ -1965,6 +2450,13 @@ init_atoms(Allctr_t *allctr)
 	    ASSERT(*atom != THE_NON_VALUE);
 	}
 #endif
+
+	for (ix = 0; ix < ERTS_ALC_NO_FIXED_SIZES; ix++) {
+	    ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix;
+	    char *name = (char *) ERTS_ALC_N2TD(n);
+	    size_t len = strlen(name);
+	    fix_type_atoms[ix] = am_atom_put(name, len);
+	}
     }
 
     
@@ -2043,6 +2535,48 @@ add_4tup(Uint **hpp, Uint *szp, Eterm *lp,
 }
 
 static Eterm
+sz_info_fix(Allctr_t *allctr,
+	    int *print_to_p,
+	    void *print_to_arg,
+	    Uint **hpp,
+	    Uint *szp)
+{
+    Eterm res;
+    int ix;
+    ErtsAlcFixList_t *fix = allctr->fix;
+
+    ASSERT(fix);
+
+    res = NIL;
+
+    for (ix = ERTS_ALC_NO_FIXED_SIZES-1; ix >= 0; ix--) {
+	ErtsAlcType_t n = ix + ERTS_ALC_N_MIN_A_FIXED_SIZE;
+	Uint alloced = (fix[ix].type_size * fix[ix].allocated);
+	Uint used = fix[ix].type_size*fix[ix].used;
+
+	if (print_to_p) {
+	    int to = *print_to_p;
+	    void *arg = print_to_arg;
+	    erts_print(to,
+		       arg,
+		       "fix type: %s %bpu %bpu\n",
+		       (char *) ERTS_ALC_N2TD(n),
+		       alloced,
+		       used);
+	}
+
+	if (hpp || szp) {
+	    add_3tup(hpp, szp, &res,
+		     fix_type_atoms[ix],
+		     bld_unstable_uint(hpp, szp, alloced),
+		     bld_unstable_uint(hpp, szp, used));
+	}
+    }
+
+    return res;
+}
+
+static Eterm
 sz_info_carriers(Allctr_t *allctr,
 		 CarriersStats_t *cs,
 		 char *prefix,
@@ -2590,7 +3124,7 @@ erts_alcu_sz_info(Allctr_t *allctr,
 		  Uint **hpp,
 		  Uint *szp)
 {
-    Eterm res, sbmbcs, mbcs, sbcs;
+    Eterm res, sbmbcs, mbcs, sbcs, fix = THE_NON_VALUE;
 
     res  = THE_NON_VALUE;
 
@@ -2607,6 +3141,8 @@ erts_alcu_sz_info(Allctr_t *allctr,
 	erts_mtx_lock(&allctr->mutex);
 #endif
 
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
+
     if (hpp || szp)
 	ensure_atoms_initialized(allctr);
 
@@ -2619,6 +3155,8 @@ erts_alcu_sz_info(Allctr_t *allctr,
     update_max_ever_values(&allctr->mbcs);
     update_max_ever_values(&allctr->sbcs);
 
+    if (allctr->fix)
+	fix = sz_info_fix(allctr, print_to_p, print_to_arg, hpp, szp);
     sbmbcs = sz_info_carriers(allctr, &allctr->sbmbcs, "sbmbcs ", print_to_p,
 			      print_to_arg, hpp, szp);
     mbcs = sz_info_carriers(allctr, &allctr->mbcs, "mbcs ", print_to_p,
@@ -2631,6 +3169,8 @@ erts_alcu_sz_info(Allctr_t *allctr,
 	add_2tup(hpp, szp, &res, am.sbcs, sbcs);
 	add_2tup(hpp, szp, &res, am.mbcs, mbcs);
 	add_2tup(hpp, szp, &res, am.sbmbcs, sbmbcs);
+	if (allctr->fix)
+	    add_2tup(hpp, szp, &res, am.fix_types, fix);
     }
 
     if (begin_max_period) {
@@ -2656,7 +3196,7 @@ erts_alcu_info(Allctr_t *allctr,
 	       Uint **hpp,
 	       Uint *szp)
 {
-    Eterm res, sett, sbmbcs, mbcs, sbcs, calls;
+    Eterm res, sett, sbmbcs, mbcs, sbcs, calls, fix = THE_NON_VALUE;
 
     res  = THE_NON_VALUE;
 
@@ -2673,6 +3213,8 @@ erts_alcu_info(Allctr_t *allctr,
 	erts_mtx_lock(&allctr->mutex);
 #endif
 
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
+
     if (hpp || szp)
 	ensure_atoms_initialized(allctr);
 
@@ -2694,6 +3236,8 @@ erts_alcu_info(Allctr_t *allctr,
     }
 
     sett = info_options(allctr, print_to_p, print_to_arg, hpp, szp);
+    if (allctr->fix)
+	fix = sz_info_fix(allctr, print_to_p, print_to_arg, hpp, szp);
     sbmbcs = info_carriers(allctr, &allctr->sbmbcs, "sbmbcs ", print_to_p,
 			   print_to_arg, hpp, szp);
     mbcs = info_carriers(allctr, &allctr->mbcs, "mbcs ", print_to_p,
@@ -2709,6 +3253,8 @@ erts_alcu_info(Allctr_t *allctr,
 	add_2tup(hpp, szp, &res, am.sbcs, sbcs);
 	add_2tup(hpp, szp, &res, am.mbcs, mbcs);
 	add_2tup(hpp, szp, &res, am.sbmbcs, sbmbcs);
+	if (allctr->fix)
+	    add_2tup(hpp, szp, &res, am.fix_types, fix);
 	add_2tup(hpp, szp, &res, am.options, sett);
 	add_3tup(hpp, szp, &res,
 		 am.versions,
@@ -2733,7 +3279,7 @@ erts_alcu_info(Allctr_t *allctr,
 
 
 void
-erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size)
+erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t *fi, int fisz)
 {
 
 #ifdef USE_THREADS
@@ -2751,6 +3297,18 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size)
     size->blocks += allctr->sbmbcs.blocks.curr.size;
     size->blocks += allctr->sbcs.blocks.curr.size;
 
+    if (fi) {
+	int ix;
+	for (ix = 0; ix < fisz; ix++) {
+	    if (allctr->fix) {
+		fi[ix].allocated += (allctr->fix[ix].type_size
+				     * allctr->fix[ix].allocated);
+		fi[ix].used += (allctr->fix[ix].type_size
+				* allctr->fix[ix].used);
+	    }
+	}
+    }
+
 #ifdef USE_THREADS
     if (allctr->thread_safe)
 	erts_mtx_unlock(&allctr->mutex);
@@ -2764,12 +3322,16 @@ do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size)
 {
     Allctr_t *allctr = (Allctr_t *) extra; 
     void *res;
+    ErtsAlcFixList_t *fix;
 
     ASSERT(initialized);
 
     ASSERT(allctr);
 
-    ERTS_ALCU_DBG_CHK_THR_SPEC(allctr);
+    ERTS_SMP_LC_ASSERT(!allctr->thread_safe
+		       || erts_lc_mtx_is_locked(&allctr->mutex));
+
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
 #if ALLOC_ZERO_EQ_NULL
     if (!size)
@@ -2778,18 +3340,61 @@ do_erts_alcu_alloc(ErtsAlcType_t type, void *extra, Uint size)
 
     INC_CC(allctr->calls.this_alloc);
 
+    fix = allctr->fix;
+    if (fix) {
+	int ix = type - ERTS_ALC_N_MIN_A_FIXED_SIZE;
+	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
+	fix[ix].used++;
+	res = fix[ix].list;
+	if (res) {
+	    fix[ix].list_size--;
+	    fix[ix].list = *((void **) res);
+	    if (fix[ix].list && fix[ix].allocated > fix[ix].limit) {
+		void *p = fix[ix].list;
+		Block_t *blk;
+		fix[ix].list = *((void **) p);
+		fix[ix].list_size--;
+		blk = UMEM2BLK(p);
+		if (IS_SBC_BLK(blk))
+		    destroy_carrier(allctr, blk);
+		else
+		    mbc_free(allctr, p);
+		fix[ix].allocated--;
+	    }
+	    ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
+	    return res;
+	}
+	if (size < 2*sizeof(UWord))
+	    size += sizeof(UWord);
+	if (fix[ix].limit < fix[ix].used)
+	    fix[ix].limit = fix[ix].used;
+	if (fix[ix].max_used < fix[ix].used)
+	    fix[ix].max_used = fix[ix].used;
+	fix[ix].allocated++;
+    }
+
     if (size >= allctr->sbc_threshold) {
+	Block_t *blk;
+#ifdef ERTS_SMP
+	if (allctr->dd.use)
+	    ERTS_ALCU_HANDLE_DD_IN_OP(allctr, 1);
+#endif
 #if HALFWORD_HEAP
-	Block_t *blk = create_carrier(allctr, size,
-				      CFLG_SBC | CFLG_FORCE_MSEG);
+	blk = create_carrier(allctr, size,
+			     CFLG_SBC | CFLG_FORCE_MSEG);
 #else
-	Block_t *blk = create_carrier(allctr, size, CFLG_SBC);
+	blk = create_carrier(allctr, size, CFLG_SBC);
 #endif
 	res = blk ? BLK2UMEM(blk) : NULL;
     }
     else
 	res = mbc_alloc(allctr, size);
 
+    if (!res && fix) {
+	int ix = type - ERTS_ALC_N_MIN_A_FIXED_SIZE;
+	fix[ix].allocated--;
+	fix[ix].used--;
+    }
     return res;
 }
 
@@ -2818,29 +3423,28 @@ erts_alcu_alloc_ts(ErtsAlcType_t type, void *extra, Uint size)
     return res;
 }
 
+#ifdef ERTS_SMP
+
 void *
 erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size)
 {
     ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix = erts_alc_get_thr_ix();
+    int ix;
     Allctr_t *allctr;
-    int unlock;
     void *res;
 
-    ASSERT(ix > 0);
-    if (ix < tspec->size) {
-	allctr = tspec->allctr[ix];
-	unlock = 0;
-    }
-    else {
-	allctr = tspec->allctr[0];
-	unlock = 1;
+    ix = ERTS_ALC_GET_THR_IX();
+
+    ASSERT(0 <= ix && ix < tspec->size);
+
+    allctr = tspec->allctr[ix];
+
+    if (allctr->thread_safe)
 	erts_mtx_lock(&allctr->mutex);
-    }
 
     res = do_erts_alcu_alloc(type, allctr, size);
 
-    if (unlock)
+    if (allctr->thread_safe)
 	erts_mtx_unlock(&allctr->mutex);
 
     DEBUG_CHECK_ALIGNMENT(res);
@@ -2851,51 +3455,96 @@ erts_alcu_alloc_thr_spec(ErtsAlcType_t type, void *extra, Uint size)
 void *
 erts_alcu_alloc_thr_pref(ErtsAlcType_t type, void *extra, Uint size)
 {
-    ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix = erts_alc_get_thr_ix();
-    Allctr_t *allctr;
+    int pref_ix;
+    Allctr_t *pref_allctr;
     void *res;
 
-    ASSERT(sizeof(UWord) == sizeof(Allctr_t *));
-    ASSERT(ix > 0);
-    if (ix >= tspec->size)
-	ix = (ix % (tspec->size - 1)) + 1;
-    allctr = tspec->allctr[ix];
-    erts_mtx_lock(&allctr->mutex);
-    res = do_erts_alcu_alloc(type, allctr, size + sizeof(UWord));
-    if (res) {
-	*((Allctr_t **) res) = allctr;
-	res = (void *) (((char *) res) + sizeof(UWord));
-    }
-    erts_mtx_unlock(&allctr->mutex);
+    pref_ix = get_pref_allctr(extra, &pref_allctr);
+
+    if (pref_allctr->thread_safe)
+	erts_mtx_lock(&pref_allctr->mutex);
+
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(pref_allctr);
+
+    res = do_erts_alcu_alloc(type, pref_allctr, size + sizeof(UWord));
+    if (pref_allctr->thread_safe)
+	erts_mtx_unlock(&pref_allctr->mutex);
+
+    if (res)
+	res = put_used_allctr(res, pref_ix, size);
+
     DEBUG_CHECK_ALIGNMENT(res);
+
+
     return res;
 }
 
 #endif
 
+#endif
+
 /* ------------------------------------------------------------------------- */
 
 static ERTS_INLINE void
 do_erts_alcu_free(ErtsAlcType_t type, void *extra, void *p)
 {
+    int ix;
     Allctr_t *allctr = (Allctr_t *) extra; 
     ASSERT(initialized);
 
     ASSERT(allctr);
 
-    ERTS_ALCU_DBG_CHK_THR_SPEC(allctr);
+    ERTS_SMP_LC_ASSERT(!allctr->thread_safe
+		       || erts_lc_mtx_is_locked(&allctr->mutex));
+
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
     if (p) {
+	ErtsAlcFixList_t *fix = allctr->fix;
 	Block_t *blk;
 
 	INC_CC(allctr->calls.this_free);
 
+	if (fix) {
+	    ix = type - ERTS_ALC_N_MIN_A_FIXED_SIZE;
+	    ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
+	    fix[ix].used--;
+	    if (fix[ix].allocated < fix[ix].limit
+		&& fix[ix].list_size < ERTS_ALCU_FIX_MAX_LIST_SZ) {
+		*((void **) p) = fix[ix].list;
+		fix[ix].list = p;
+		fix[ix].list_size++;
+		if (!allctr->fix_shrink_scheduled) {
+		    allctr->fix_shrink_scheduled = 1;
+		    erts_set_aux_work_timeout(
+			allctr->ix,
+			(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+			 | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC),
+			1);
+		}
+		ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
+		return;
+	    }
+	    fix[ix].allocated--;
+	    if (fix[ix].list && fix[ix].allocated > fix[ix].limit) {
+		blk = UMEM2BLK(p);
+		if (IS_SBC_BLK(blk))
+		    destroy_carrier(allctr, blk);
+		else
+		    mbc_free(allctr, p);
+		p = fix[ix].list;
+		fix[ix].list = *((void **) p);
+		fix[ix].list_size--;
+		fix[ix].allocated--;
+	    }
+	}
+
 	blk = UMEM2BLK(p);
 	if (IS_SBC_BLK(blk))
 	    destroy_carrier(allctr, blk);
 	else
 	    mbc_free(allctr, p);
+	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
     }
 }
 
@@ -2915,44 +3564,56 @@ erts_alcu_free_ts(ErtsAlcType_t type, void *extra, void *p)
     erts_mtx_unlock(&allctr->mutex);
 }
 
+#ifdef ERTS_SMP
+
 void
 erts_alcu_free_thr_spec(ErtsAlcType_t type, void *extra, void *p)
 {
     ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix = erts_alc_get_thr_ix();
-    int unlock;
+    int ix;
     Allctr_t *allctr;
 
-    ASSERT(ix > 0);
-    if (ix < tspec->size) {
-	allctr = tspec->allctr[ix];
-	unlock = 0;
-    }
-    else {
-	allctr = tspec->allctr[0];
-	unlock = 1;
+    ix = ERTS_ALC_GET_THR_IX();
+
+    ASSERT(0 <= ix && ix < tspec->size);
+
+    allctr = tspec->allctr[ix];
+
+    if (allctr->thread_safe)
 	erts_mtx_lock(&allctr->mutex);
-    }
 
     do_erts_alcu_free(type, allctr, p);
-    if (unlock)
+
+    if (allctr->thread_safe)
 	erts_mtx_unlock(&allctr->mutex);
 }
 
 void
-erts_alcu_free_thr_pref(ErtsAlcType_t type, void *unused, void *p)
+erts_alcu_free_thr_pref(ErtsAlcType_t type, void *extra, void *p)
 {
     if (p) {
-	void *ptr = (void *) (((char *) p) - sizeof(UWord));
-	Allctr_t *allctr = *((Allctr_t **) ptr);
-	erts_mtx_lock(&allctr->mutex);
-	do_erts_alcu_free(type, allctr, ptr);
-	erts_mtx_unlock(&allctr->mutex);
+	Allctr_t *pref_allctr, *used_allctr;
+	void *ptr;
+
+	get_pref_allctr(extra, &pref_allctr);
+	ptr = get_used_allctr(extra, p, &used_allctr, NULL);
+	if (pref_allctr != used_allctr)
+	    enqueue_dealloc_other_instance(type, used_allctr, ptr);
+	else {
+	    if (used_allctr->thread_safe)
+		erts_mtx_lock(&used_allctr->mutex);
+	    ERTS_ALCU_DBG_CHK_THR_ACCESS(used_allctr);
+	    do_erts_alcu_free(type, used_allctr, ptr);
+	    if (used_allctr->thread_safe)
+		erts_mtx_unlock(&used_allctr->mutex);
+	}
     }
 }
 
 #endif
 
+#endif
+
 /* ------------------------------------------------------------------------- */
 
 static ERTS_INLINE void *
@@ -2970,7 +3631,10 @@ do_erts_alcu_realloc(ErtsAlcType_t type,
 
     ASSERT(allctr);
 
-    ERTS_ALCU_DBG_CHK_THR_SPEC(allctr);
+    ERTS_SMP_LC_ASSERT(!allctr->thread_safe
+		       || erts_lc_mtx_is_locked(&allctr->mutex));
+
+    ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
     if (!p) {
 	res = do_erts_alcu_alloc(type, extra, size);
@@ -3063,6 +3727,10 @@ do_erts_alcu_realloc(ErtsAlcType_t type,
     }
     else {
 	Block_t *new_blk;
+#ifdef ERTS_SMP
+	if (allctr->dd.use)
+	    ERTS_ALCU_HANDLE_DD_IN_OP(allctr, 1);
+#endif
 	if(IS_SBC_BLK(blk)) {
 	do_carrier_resize:
 #if HALFWORD_HEAP
@@ -3166,30 +3834,29 @@ erts_alcu_realloc_mv_ts(ErtsAlcType_t type, void *extra, void *p, Uint size)
     return res;
 }
 
+#ifdef ERTS_SMP
+
 void *
 erts_alcu_realloc_thr_spec(ErtsAlcType_t type, void *extra,
 			   void *ptr, Uint size)
 {
     ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix = erts_alc_get_thr_ix();
+    int ix;
     Allctr_t *allctr;
-    int unlock;
     void *res;
 
-    ASSERT(ix > 0);
-    if (ix < tspec->size) {
-	allctr = tspec->allctr[ix];
-	unlock = 0;
-    }
-    else {
-	allctr = tspec->allctr[0];
-	unlock = 1;
+    ix = ERTS_ALC_GET_THR_IX();
+
+    ASSERT(0 <= ix && ix < tspec->size);
+
+    allctr = tspec->allctr[ix];
+
+    if (allctr->thread_safe)
 	erts_mtx_lock(&allctr->mutex);
-    }
 
     res = do_erts_alcu_realloc(type, allctr, ptr, size, 0);
 
-    if (unlock)
+    if (allctr->thread_safe)
 	erts_mtx_unlock(&allctr->mutex);
 
     DEBUG_CHECK_ALIGNMENT(res);
@@ -3202,26 +3869,22 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra,
 			      void *ptr, Uint size)
 {
     ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix = erts_alc_get_thr_ix();
+    int ix;
     Allctr_t *allctr;
-    int unlock;
     void *res;
 
-    ASSERT(ix > 0);
-    if (ix < tspec->size) {
-	allctr = tspec->allctr[ix];
-	unlock = 0;
-    }
-    else {
-	allctr = tspec->allctr[0];
-	unlock = 1;
-	erts_mtx_lock(&allctr->mutex);
-    }
+    ix = ERTS_ALC_GET_THR_IX();
 
+    ASSERT(0 <= ix && ix < tspec->size);
+
+    allctr = tspec->allctr[ix];
+
+    if (allctr->thread_safe)
+	erts_mtx_lock(&allctr->mutex);
 
     res = do_erts_alcu_alloc(type, allctr, size);
     if (!res) {
-	if (unlock)
+	if (allctr->thread_safe)
 	    erts_mtx_unlock(&allctr->mutex);
 	res = erts_alcu_realloc_thr_spec(type, allctr, ptr, size);
     }
@@ -3235,7 +3898,7 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra,
 	    cpy_size = size;
 	sys_memcpy(res, ptr, cpy_size);
 	do_erts_alcu_free(type, allctr, ptr);
-	if (unlock)
+	if (allctr->thread_safe)
 	    erts_mtx_unlock(&allctr->mutex);
     }
 
@@ -3244,129 +3907,101 @@ erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t type, void *extra,
     return res;
 }
 
-void *
-erts_alcu_realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size)
+static ERTS_INLINE void *
+realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size,
+		 int force_move)
 {
-    ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix;
+    int pref_ix;
     void *ptr, *res;
     Allctr_t *pref_allctr, *used_allctr;
+    UWord old_user_size;
 
     if (!p)
 	return erts_alcu_alloc_thr_pref(type, extra, size);
 
-    ptr = (void *) (((char *) p) - sizeof(UWord));
-    used_allctr = *((Allctr_t **) ptr);
+    pref_ix = get_pref_allctr(extra, &pref_allctr);
+    ptr = get_used_allctr(extra, p, &used_allctr, &old_user_size);
 
-    ix = erts_alc_get_thr_ix();
-    ASSERT(ix > 0);
-    if (ix >= tspec->size)
-	ix = (ix % (tspec->size - 1)) + 1;
-    pref_allctr = tspec->allctr[ix];
     ASSERT(used_allctr && pref_allctr);
 
-    erts_mtx_lock(&used_allctr->mutex);
-    res = do_erts_alcu_realloc(type,
-			       used_allctr,
-			       ptr,
-			       size + sizeof(UWord),
-			       (pref_allctr != used_allctr
-				? ERTS_ALCU_FLG_FAIL_REALLOC_MOVE
-				: 0));
-    erts_mtx_unlock(&used_allctr->mutex);
-    if (res) {
-	ASSERT(used_allctr == *((Allctr_t **) res));
-	res = (void *) (((char *) res) + sizeof(UWord));
-	DEBUG_CHECK_ALIGNMENT(res);
+    if (!force_move && used_allctr == pref_allctr) {
+	if (used_allctr->thread_safe)
+	    erts_mtx_lock(&used_allctr->mutex);
+	ERTS_ALCU_DBG_CHK_THR_ACCESS(used_allctr);
+	res = do_erts_alcu_realloc(type,
+				   used_allctr,
+				   ptr,
+				   size + sizeof(UWord),
+				   0);
+	if (used_allctr->thread_safe)
+	    erts_mtx_unlock(&used_allctr->mutex);
+	if (res)
+	    res = put_used_allctr(res, pref_ix, size);
     }
     else {
-	erts_mtx_lock(&pref_allctr->mutex);
+	if (pref_allctr->thread_safe)
+	    erts_mtx_lock(&pref_allctr->mutex);
 	res = do_erts_alcu_alloc(type, pref_allctr, size + sizeof(UWord));
-	erts_mtx_unlock(&pref_allctr->mutex);
+	if (pref_allctr->thread_safe && (!force_move
+					 || used_allctr != pref_allctr))
+	    erts_mtx_unlock(&pref_allctr->mutex);
 	if (res) {
 	    Block_t *blk;
 	    size_t cpy_size;
 
-	    *((Allctr_t **) res) = pref_allctr;
-	    res = (void *) (((char *) res) + sizeof(UWord));
+	    res = put_used_allctr(res, pref_ix, size);
 
 	    DEBUG_CHECK_ALIGNMENT(res);
 
-	    erts_mtx_lock(&used_allctr->mutex);
 	    blk = UMEM2BLK(ptr);
-	    cpy_size = BLK_SZ(blk) - ABLK_HDR_SZ - sizeof(UWord);
+	    if (old_user_size != ERTS_AU_PREF_ALLOC_SIZE_MASK)
+		cpy_size = old_user_size;
+	    else {
+		if (used_allctr->thread_safe && (!force_move
+						 || used_allctr != pref_allctr))
+		    erts_mtx_lock(&used_allctr->mutex);
+		ERTS_SMP_LC_ASSERT(erts_lc_mtx_is_locked(&used_allctr->mutex));
+		cpy_size = BLK_SZ(blk);
+		if (used_allctr->thread_safe && (!force_move
+						 || used_allctr != pref_allctr))
+		    erts_mtx_unlock(&used_allctr->mutex);
+		cpy_size -= ABLK_HDR_SZ + sizeof(UWord);
+	    }
 	    if (cpy_size > size)
 		cpy_size = size;
 	    sys_memcpy(res, p, cpy_size);
-	    do_erts_alcu_free(type, used_allctr, ptr);
-	    erts_mtx_unlock(&used_allctr->mutex);
+
+	    if (!force_move || used_allctr != pref_allctr)
+		enqueue_dealloc_other_instance(type, used_allctr, ptr);
+	    else {
+		do_erts_alcu_free(type, used_allctr, ptr);
+		ASSERT(pref_allctr == used_allctr);
+		if (pref_allctr->thread_safe)
+		    erts_mtx_unlock(&pref_allctr->mutex);
+	    }
 	}
     }
 
     return res;
 }
 
+void *
+erts_alcu_realloc_thr_pref(ErtsAlcType_t type, void *extra, void *p, Uint size)
+{
+    return realloc_thr_pref(type, extra, p, size, 0);
+}
 
 void *
 erts_alcu_realloc_mv_thr_pref(ErtsAlcType_t type, void *extra,
 			      void *p, Uint size)
 {
-    ErtsAllocatorThrSpec_t *tspec = (ErtsAllocatorThrSpec_t *) extra;
-    int ix;
-    void *ptr, *res;
-    Allctr_t *pref_allctr, *used_allctr;
-
-    if (!p)
-	return erts_alcu_alloc_thr_pref(type, extra, size);
-
-    ptr = (void *) (((char *) p) - sizeof(UWord));
-    used_allctr = *((Allctr_t **) ptr);
-
-    ix = erts_alc_get_thr_ix();
-    ASSERT(ix > 0);
-    if (ix >= tspec->size)
-	ix = (ix % (tspec->size - 1)) + 1;
-    pref_allctr = tspec->allctr[ix];
-    ASSERT(used_allctr && pref_allctr);
-
-    erts_mtx_lock(&pref_allctr->mutex);
-    res = do_erts_alcu_alloc(type, pref_allctr, size + sizeof(UWord));
-    if (!res) {
-	erts_mtx_unlock(&pref_allctr->mutex);
-	res = erts_alcu_realloc_thr_pref(type, extra, p, size);
-    }
-    else {
-	Block_t *blk;
-	size_t cpy_size;
-	Allctr_t *allctr;
-
-	*((Allctr_t **) res) = pref_allctr;
-	res = (void *) (((char *) res) + sizeof(UWord));
-
-	DEBUG_CHECK_ALIGNMENT(res);
-
-	if (used_allctr == pref_allctr)
-	    allctr = pref_allctr;
-	else {
-	    erts_mtx_unlock(&pref_allctr->mutex);
-	    allctr = used_allctr;
-	    erts_mtx_lock(&allctr->mutex);
-	}
-
-	blk = UMEM2BLK(ptr);
-	cpy_size = BLK_SZ(blk) - ABLK_HDR_SZ - sizeof(UWord);
-	if (cpy_size > size)
-	    cpy_size = size;
-	sys_memcpy(res, p, cpy_size);
-	do_erts_alcu_free(type, allctr, ptr);
-	erts_mtx_unlock(&allctr->mutex);
-    }
-
-    return res;
+    return realloc_thr_pref(type, extra, p, size, 1);
 }
 
 #endif
 
+#endif
+
 /* ------------------------------------------------------------------------- */
 
 int
@@ -3381,6 +4016,10 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
     sys_memcpy((void *) &allctr->mseg_opt,
 	       (void *) &erts_mseg_default_opt,
 	       sizeof(ErtsMsegOpt_t));
+#ifdef ERTS_SMP
+    if (init->tspec || init->tpref)
+	allctr->mseg_opt.sched_spec = 1;
+#endif
 # if HALFWORD_HEAP
     allctr->mseg_opt.low_mem = init->low_mem;
 # endif
@@ -3390,6 +4029,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
     if (!allctr->name_prefix)
 	goto error;
 
+    allctr->ix				= init->ix;
     allctr->alloc_no			= init->alloc_no;
     if (allctr->alloc_no < ERTS_ALC_A_MIN
 	|| ERTS_ALC_A_MAX < allctr->alloc_no)
@@ -3431,6 +4071,18 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 	goto error;
     allctr->min_block_size		= UNIT_CEILING(allctr->min_block_size
 						       + sizeof(UWord));
+#if ERTS_SMP
+    if (init->tpref) {
+	Uint sz = sizeof(Block_t);
+	sz += ERTS_ALCU_DD_FIX_TYPE_OFFS*sizeof(UWord);
+	if (init->fix)
+	    sz += sizeof(UWord);
+	sz = UNIT_CEILING(sz);
+	if (sz > allctr->min_block_size)
+	    allctr->min_block_size = sz;
+    }
+#endif
+
 
 
     allctr->sbmbc_threshold		= init->sbmbct;
@@ -3493,7 +4145,8 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 
     if (allctr->mbc_header_size < sizeof(Carrier_t))
 	goto error;
-#ifdef USE_THREADS
+#ifdef ERTS_SMP
+    allctr->dd.use = 0;
     if (init->tpref) {
 	allctr->mbc_header_size = (UNIT_CEILING(allctr->mbc_header_size
 						+ FBLK_FTR_SZ
@@ -3507,6 +4160,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 						+ sizeof(UWord))
 				   - ABLK_HDR_SZ
 				   - sizeof(UWord));
+
+	allctr->dd.use = 1;
+	init_dd_queue(&allctr->dd.q);
     }
     else
 #endif
@@ -3548,6 +4204,21 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 
     }
 
+    if (init->fix) {
+	int i;
+	allctr->fix = init->fix;
+	allctr->fix_shrink_scheduled = 0;
+	for (i = 0; i < ERTS_ALC_NO_FIXED_SIZES; i++) {
+	    allctr->fix[i].max_used = 0;
+	    allctr->fix[i].limit = 0;
+	    allctr->fix[i].type_size = init->fix_type_size[i];
+	    allctr->fix[i].list_size = 0;
+	    allctr->fix[i].list = NULL;
+	    allctr->fix[i].allocated = 0;
+	    allctr->fix[i].used = 0;
+	}
+    }
+
     return 1;
 
  error:
diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h
index fed4d3dbe6..df560a0de2 100644
--- a/erts/emulator/beam/erl_alloc_util.h
+++ b/erts/emulator/beam/erl_alloc_util.h
@@ -20,10 +20,13 @@
 #ifndef ERL_ALLOC_UTIL__
 #define ERL_ALLOC_UTIL__
 
-#define ERTS_ALCU_VSN_STR "2.2"
+#define ERTS_ALCU_VSN_STR "3.0"
 
 #include "erl_alloc_types.h"
 
+#define ERTS_AU_PREF_ALLOC_BITS 11
+#define ERTS_AU_MAX_PREF_ALLOC_INSTANCES (1 << ERTS_AU_PREF_ALLOC_BITS)
+
 typedef struct Allctr_t_ Allctr_t;
 
 typedef struct {
@@ -35,6 +38,7 @@ typedef struct {
     char *name_prefix;
     ErtsAlcType_t alloc_no;
     int force;
+    int ix;
     int ts;
     int tspec;
     int tpref;
@@ -53,6 +57,9 @@ typedef struct {
     UWord mbcgs;
     UWord sbmbct;
     UWord sbmbcs;
+
+    void *fix;
+    size_t *fix_type_size;
 } AllctrInit_t;
 
 typedef struct {
@@ -60,6 +67,11 @@ typedef struct {
     UWord carriers;
 } AllctrSize_t;
 
+typedef struct {
+    UWord allocated;
+    UWord used;
+} ErtsAlcUFixInfo_t;
+
 #ifndef SMALL_MEMORY
 
 #define ERTS_DEFAULT_ALCU_INIT {                                           \
@@ -71,6 +83,7 @@ typedef struct {
     NULL,                                                                  \
     ERTS_ALC_A_INVALID,	/* (number) alloc_no: allocator number           */\
     0,			/* (bool)   force:  force enabled                */\
+    0,			/* (number) ix: instance index                   */\
     1,			/* (bool)   ts:     thread safe                  */\
     0,			/* (bool)   tspec:  thread specific              */\
     0,			/* (bool)   tpref:  thread preferred             */\
@@ -88,7 +101,10 @@ typedef struct {
     1024*1024,		/* (bytes)  smbcs:  smallest mbc size            */\
     10,			/* (amount) mbcgs:  mbc growth stages            */\
     256,	       	/* (bytes)  sbmbct:  small block mbc threshold   */\
-    8*1024		/* (bytes)  sbmbcs:  small block mbc size        */\
+    8*1024,		/* (bytes)  sbmbcs:  small block mbc size        */ \
+    /* --- Data not options -------------------------------------------- */\
+    NULL,		/* (ptr)    fix                                  */\
+    NULL		/* (ptr)    fix_type_size                        */\
 }
 
 #else /* if SMALL_MEMORY */
@@ -102,6 +118,7 @@ typedef struct {
     NULL,                                                                  \
     ERTS_ALC_A_INVALID,	/* (number) alloc_no: allocator number           */\
     0,			/* (bool)   force:  force enabled                */\
+    0,			/* (number) ix: instance index                   */\
     1,			/* (bool)   ts:     thread safe                  */\
     0,			/* (bool)   tspec:  thread specific              */\
     0,			/* (bool)   tpref:  thread preferred             */\
@@ -118,7 +135,10 @@ typedef struct {
     128*1024,		/* (bytes)  smbcs:  smallest mbc size            */\
     10,			/* (amount) mbcgs:  mbc growth stages            */\
     256,	       	/* (bytes)  sbmbct:  small block mbc threshold   */\
-    8*1024		/* (bytes)  sbmbcs:  small block mbc size        */\
+    8*1024,		/* (bytes)  sbmbcs:  small block mbc size        */ \
+    /* --- Data not options -------------------------------------------- */\
+    NULL,		/* (ptr)    fix                                  */\
+    NULL		/* (ptr)    fix_type_size                        */\
 }
 
 #endif
@@ -132,6 +152,7 @@ void *	erts_alcu_alloc_ts(ErtsAlcType_t, void *, Uint);
 void *	erts_alcu_realloc_ts(ErtsAlcType_t, void *, void *, Uint);
 void *	erts_alcu_realloc_mv_ts(ErtsAlcType_t, void *, void *, Uint);
 void	erts_alcu_free_ts(ErtsAlcType_t, void *, void *);
+#ifdef ERTS_SMP
 void *	erts_alcu_alloc_thr_spec(ErtsAlcType_t, void *, Uint);
 void *	erts_alcu_realloc_thr_spec(ErtsAlcType_t, void *, void *, Uint);
 void *	erts_alcu_realloc_mv_thr_spec(ErtsAlcType_t, void *, void *, Uint);
@@ -141,12 +162,16 @@ void *	erts_alcu_realloc_thr_pref(ErtsAlcType_t, void *, void *, Uint);
 void *	erts_alcu_realloc_mv_thr_pref(ErtsAlcType_t, void *, void *, Uint);
 void	erts_alcu_free_thr_pref(ErtsAlcType_t, void *, void *);
 #endif
+#endif
 Eterm	erts_alcu_au_info_options(int *, void *, Uint **, Uint *);
 Eterm	erts_alcu_info_options(Allctr_t *, int *, void *, Uint **, Uint *);
 Eterm	erts_alcu_sz_info(Allctr_t *, int, int *, void *, Uint **, Uint *);
 Eterm	erts_alcu_info(Allctr_t *, int, int *, void *, Uint **, Uint *);
 void	erts_alcu_init(AlcUInit_t *);
-void    erts_alcu_current_size(Allctr_t *, AllctrSize_t *);
+void    erts_alcu_current_size(Allctr_t *, AllctrSize_t *,
+			       ErtsAlcUFixInfo_t *, int);
+void    erts_alcu_check_delayed_dealloc(Allctr_t *, int, int *, int *);
+erts_aint32_t erts_alcu_fix_alloc_shrink(Allctr_t *, erts_aint32_t);
 
 #endif
 
@@ -246,7 +271,74 @@ typedef struct {
     } blocks;
 } CarriersStats_t;
 
+#ifdef ERTS_SMP
+
+typedef union ErtsAllctrDDBlock_t_ ErtsAllctrDDBlock_t;
+
+union ErtsAllctrDDBlock_t_ {
+    erts_atomic_t atmc_next;
+    ErtsAllctrDDBlock_t *ptr_next;
+};
+
+typedef struct {
+    ErtsAllctrDDBlock_t marker;
+    erts_atomic_t last;
+    erts_atomic_t um_refc[2];
+    erts_atomic32_t um_refc_ix;
+} ErtsDDTail_t;
+
+typedef struct {
+    /*
+     * This structure needs to be cache line aligned for best
+     * performance.
+     */
+    union {
+	/* Modified by threads returning memory to this allocator */
+	ErtsDDTail_t data;
+	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsDDTail_t))];
+    } tail;
+    /*
+     * Everything below this point is *only* accessed by the
+     * thread owning the allocator.
+     */
+    struct {
+	ErtsAllctrDDBlock_t *first;
+	ErtsAllctrDDBlock_t *unref_end;
+	struct {
+	    ErtsThrPrgrVal thr_progress;
+	    int thr_progress_reached;
+	    int um_refc_ix;
+	    ErtsAllctrDDBlock_t *unref_end;
+	} next;
+	int used_marker;
+    } head;
+} ErtsAllctrDDQueue_t;
+
+#endif
+
+typedef struct {
+    size_t type_size;
+    SWord list_size;
+    void *list;
+    SWord max_used;
+    SWord limit;
+    SWord allocated;
+    SWord used;
+} ErtsAlcFixList_t;
+
 struct Allctr_t_ {
+#ifdef ERTS_SMP
+    struct {
+	/*
+	 * We want the queue at the beginning of
+	 * the Allctr_t struct, due to cache line
+	 * alignment reasons.
+	 */
+	ErtsAllctrDDQueue_t q;
+	int		use;
+	int		ix;
+    } dd;
+#endif
 
     /* Allocator name prefix */
     char *		name_prefix;
@@ -254,6 +346,9 @@ struct Allctr_t_ {
     /* Allocator number */
     ErtsAlcType_t	alloc_no;
 
+    /* Instance index */
+    int			ix;
+
     /* Alloc, realloc and free names as atoms */
     struct {
 	Eterm		alloc;
@@ -278,6 +373,7 @@ struct Allctr_t_ {
     Uint		mbc_growth_stages;
     Uint		sbmbc_threshold;
     Uint		sbmbc_size;
+
 #if HAVE_ERTS_MSEG
     ErtsMsegOpt_t	mseg_opt;
 #endif
@@ -315,6 +411,10 @@ struct Allctr_t_ {
     void		(*check_mbc)		(Allctr_t *, Carrier_t *);
 #endif
 
+    int			fix_n_base;
+    int			fix_shrink_scheduled;
+    ErtsAlcFixList_t	*fix;
+
 #ifdef USE_THREADS
     /* Mutex for this allocator */
     erts_mtx_t		mutex;
@@ -323,6 +423,7 @@ struct Allctr_t_ {
 	Allctr_t	*prev;
 	Allctr_t	*next;
     } ts_list;
+
 #endif
 
     int			atoms_initialized;
diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c
index 90d8ea7300..5bdb752d3a 100644
--- a/erts/emulator/beam/erl_ao_firstfit_alloc.c
+++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c
@@ -170,14 +170,18 @@ erts_aoffalc_start(AOFFAllctr_t *alc,
 		   AOFFAllctrInit_t* aoffinit,
 		   AllctrInit_t *init)
 {
-    AOFFAllctr_t nulled_state = {{0}};
-    /* {{0}} is used instead of {0}, in order to avoid (an incorrect) gcc
-       warning. gcc warns if {0} is used as initializer of a struct when
-       the first member is a struct (not if, for example, the third member
-       is a struct). */
+    struct {
+	int dummy;
+	AOFFAllctr_t allctr;
+    } zero = {0};
+    /* The struct with a dummy element first is used in order to avoid (an
+       incorrect) gcc warning. gcc warns if {0} is used as initializer of
+       a struct when the first member is a struct (not if, for example,
+       the third member is a struct). */
+
     Allctr_t *allctr = (Allctr_t *) alc;
 
-    sys_memcpy((void *) alc, (void *) &nulled_state, sizeof(AOFFAllctr_t));
+    sys_memcpy((void *) alc, (void *) &zero.allctr, sizeof(AOFFAllctr_t));
 
     allctr->mbc_header_size		= sizeof(Carrier_t);
     allctr->min_mbc_size		= MIN_MBC_SZ;
diff --git a/erts/emulator/beam/erl_bestfit_alloc.c b/erts/emulator/beam/erl_bestfit_alloc.c
index f2199d41a1..c50fdeb4e8 100644
--- a/erts/emulator/beam/erl_bestfit_alloc.c
+++ b/erts/emulator/beam/erl_bestfit_alloc.c
@@ -161,14 +161,18 @@ erts_bfalc_start(BFAllctr_t *bfallctr,
 		 BFAllctrInit_t *bfinit,
 		 AllctrInit_t *init)
 {
-    BFAllctr_t nulled_state = {{0}};
-    /* {{0}} is used instead of {0}, in order to avoid (an incorrect) gcc
-       warning. gcc warns if {0} is used as initializer of a struct when
-       the first member is a struct (not if, for example, the third member
-       is a struct). */
+    struct {
+	int dummy;
+	BFAllctr_t allctr;
+    } zero = {0};
+    /* The struct with a dummy element first is used in order to avoid (an
+       incorrect) gcc warning. gcc warns if {0} is used as initializer of
+       a struct when the first member is a struct (not if, for example,
+       the third member is a struct). */
+
     Allctr_t *allctr = (Allctr_t *) bfallctr;
 
-    sys_memcpy((void *) bfallctr, (void *) &nulled_state, sizeof(BFAllctr_t));
+    sys_memcpy((void *) bfallctr, (void *) &zero.allctr, sizeof(BFAllctr_t));
 
     bfallctr->address_order		= bfinit->ao;
 
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 6a74596f76..70d728340a 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -52,6 +52,9 @@
 #include <valgrind/memcheck.h>
 #endif
 
+static Export* alloc_info_trap = NULL;
+static Export* alloc_sizes_trap = NULL;
+
 #define DECL_AM(S) Eterm AM_ ## S = am_atom_put(#S, sizeof(#S) - 1)
 
 /* Keep erts_system_version as a global variable for easy access from a core */
@@ -1633,9 +1636,19 @@ info_1_tuple(Process* BIF_P,	/* Pointer to current process. */
 
     sel = *tp++;
 
-    if (sel == am_allocator_sizes && arity == 2) {
-	return erts_allocator_info_term(BIF_P, *tp, 1);
-    } else if (sel == am_wordsize && arity == 2) {
+    if (sel == am_allocator_sizes) {
+	switch (arity) {
+	case 2:
+	    ERTS_BIF_PREP_TRAP1(ret, alloc_sizes_trap, BIF_P, *tp);
+	    return ret;
+	case 3:
+	    if (erts_request_alloc_info(BIF_P, tp[0], tp[1], 1))
+		return am_true;
+	default:
+	    goto badarg;
+	}
+    }
+    else if (sel == am_wordsize && arity == 2) {
 	if (tp[0] == am_internal) {
 	    return make_small(sizeof(Eterm));
 	}
@@ -1682,8 +1695,17 @@ info_1_tuple(Process* BIF_P,	/* Pointer to current process. */
 	}
 	else
 	    goto badarg;
-    } else if (sel == am_allocator && arity == 2) {
-	return erts_allocator_info_term(BIF_P, *tp, 0);
+    } else if (sel == am_allocator) {
+	switch (arity) {
+	case 2:
+	    ERTS_BIF_PREP_TRAP1(ret, alloc_info_trap, BIF_P, *tp);
+	    return ret;
+	case 3:
+	    if (erts_request_alloc_info(BIF_P, tp[0], tp[1], 0))
+		return am_true;
+	default:
+	    goto badarg;
+	}
     } else if (ERTS_IS_ATOM_STR("internal_cpu_topology", sel) && arity == 2) {
 	return erts_get_cpu_topology_term(BIF_P, *tp);
     } else if (ERTS_IS_ATOM_STR("cpu_topology", sel) && arity == 2) {
@@ -2546,8 +2568,12 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
 	res = erts_bld_uint(&hp, NULL, erts_dist_buf_busy_limit);
 	BIF_RET(res);
     } else if (ERTS_IS_ATOM_STR("print_ethread_info", BIF_ARG_1)) {
+#if defined(ETHR_NATIVE_ATOMIC32_IMPL) \
+    || defined(ETHR_NATIVE_ATOMIC64_IMPL) \
+    || defined(ETHR_NATIVE_DW_ATOMIC_IMPL)
 	int i;
 	char **str;
+#endif
 #ifdef ETHR_NATIVE_ATOMIC32_IMPL
 	erts_printf("32-bit native atomics: %s\n",
 		    ETHR_NATIVE_ATOMIC32_IMPL);
@@ -2610,6 +2636,12 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
 #endif
 	BIF_RET(am_true);
     }
+#ifdef ERTS_SMP
+    else if (ERTS_IS_ATOM_STR("thread_progress", BIF_ARG_1)) {
+	erts_thr_progress_dbg_print_state();
+	BIF_RET(am_true);
+    }
+#endif
 
     BIF_ERROR(BIF_P, BADARG);
 }
@@ -3170,26 +3202,6 @@ BIF_RETTYPE statistics_1(BIF_ALIST_1)
     BIF_ERROR(BIF_P, BADARG);
 }
 
-BIF_RETTYPE memory_0(BIF_ALIST_0)
-{
-    BIF_RETTYPE res = erts_memory(NULL, NULL, BIF_P, THE_NON_VALUE);
-    switch (res) {
-    case am_badarg: BIF_ERROR(BIF_P, EXC_INTERNAL_ERROR); /* never... */
-    case am_notsup: BIF_ERROR(BIF_P, EXC_NOTSUP);
-    default: BIF_RET(res);
-    }
-}
-
-BIF_RETTYPE memory_1(BIF_ALIST_1)
-{
-    BIF_RETTYPE res = erts_memory(NULL, NULL, BIF_P, BIF_ARG_1);
-    switch (res) {
-    case am_badarg: BIF_ERROR(BIF_P, BADARG);
-    case am_notsup: BIF_ERROR(BIF_P, EXC_NOTSUP);
-    default: BIF_RET(res);
-    }
-}
-
 BIF_RETTYPE error_logger_warning_map_0(BIF_ALIST_0)
 {
     BIF_RET(erts_error_logger_warnings);
@@ -3291,6 +3303,15 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
 	    BIF_RET(am_false);
 #endif
 	}
+	else if (ERTS_IS_ATOM_STR("memory", BIF_ARG_1)) {
+	    Eterm res;
+	    erts_smp_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN);
+	    erts_smp_block_system(0);
+	    erts_smp_proc_lock(BIF_P, ERTS_PROC_LOCK_MAIN);
+	    res = erts_memory(NULL, NULL, BIF_P, THE_NON_VALUE);
+	    erts_smp_release_system();
+	    BIF_RET(res);
+	}
     }
     else if (is_tuple(BIF_ARG_1)) {
 	Eterm* tp = tuple_val(BIF_ARG_1);
@@ -3493,6 +3514,7 @@ BIF_RETTYPE erts_debug_get_internal_state_1(BIF_ALIST_1)
 
 static erts_smp_atomic_t hipe_test_reschedule_flag;
 
+
 BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
 {
     /*
@@ -3766,6 +3788,13 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
 	    BIF_ERROR(BIF_P,  EXC_NOTSUP);
 #endif
 	}
+	else if (ERTS_IS_ATOM_STR("wait", BIF_ARG_1)) {
+	    if (ERTS_IS_ATOM_STR("deallocations", BIF_ARG_2)) {
+		if (erts_debug_wait_deallocations(BIF_P)) {
+		    ERTS_BIF_YIELD_RETURN(BIF_P, am_ok);
+		}
+	    }
+	}
     }
 
     BIF_ERROR(BIF_P, BADARG);
@@ -4018,5 +4047,7 @@ erts_bif_info_init(void)
     erts_smp_atomic_init_nob(&available_internal_state, 0);
     erts_smp_atomic_init_nob(&hipe_test_reschedule_flag, 0);
 
+    alloc_info_trap = erts_export_put(am_erlang, am_alloc_info, 1);
+    alloc_sizes_trap = erts_export_put(am_erlang, am_alloc_sizes, 1);
     process_info_init();
 }
diff --git a/erts/emulator/beam/erl_goodfit_alloc.c b/erts/emulator/beam/erl_goodfit_alloc.c
index 8322b233ac..e7d4ac2b67 100644
--- a/erts/emulator/beam/erl_goodfit_alloc.c
+++ b/erts/emulator/beam/erl_goodfit_alloc.c
@@ -190,16 +190,20 @@ erts_gfalc_start(GFAllctr_t *gfallctr,
 		 GFAllctrInit_t *gfinit,
 		 AllctrInit_t *init)
 {
-    GFAllctr_t nulled_state = {{0}};
-    /* {{0}} is used instead of {0}, in order to avoid (an incorrect) gcc
-       warning. gcc warns if {0} is used as initializer of a struct when
-       the first member is a struct (not if, for example, the third member
-       is a struct). */
+    struct {
+	int dummy;
+	GFAllctr_t allctr;
+    } zero = {0};
+    /* The struct with a dummy element first is used in order to avoid (an
+       incorrect) gcc warning. gcc warns if {0} is used as initializer of
+       a struct when the first member is a struct (not if, for example,
+       the third member is a struct). */
+
     Allctr_t *allctr = (Allctr_t *) gfallctr;
 
-    init->sbmbct = 0; /* Small mbc not yet supported by goodfit */
+    sys_memcpy((void *) gfallctr, (void *) &zero.allctr, sizeof(GFAllctr_t));
 
-    sys_memcpy((void *) gfallctr, (void *) &nulled_state, sizeof(GFAllctr_t));
+    init->sbmbct = 0; /* Small mbc not yet supported by goodfit */
 
     allctr->mbc_header_size		= sizeof(Carrier_t);
     allctr->min_mbc_size		= MIN_MBC_SZ;
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index eaa5f9eefc..ef86e6db5e 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -42,6 +42,7 @@
 #include "erl_misc_utils.h"
 #include "packet_parser.h"
 #include "erl_cpu_topology.h"
+#include "erl_thr_progress.h"
 
 #ifdef HIPE
 #include "hipe_mode_switch.h"	/* for hipe_mode_switch_init() */
@@ -258,6 +259,8 @@ erl_init(int ncpu)
 			 no_schedulers,
 			 no_schedulers_online);
     erts_init_cpu_topology(); /* Must be after init_scheduling */
+    erts_alloc_late_init();
+
     H_MIN_SIZE      = erts_next_heap_size(H_MIN_SIZE, 0);
     BIN_VH_MIN_SIZE = erts_next_heap_size(BIN_VH_MIN_SIZE, 0);
 
@@ -642,6 +645,9 @@ early_init(int *argc, char **argv) /*
     erts_use_r9_pids_ports = 0;
 
     erts_sys_pre_init();
+#ifdef ERTS_SMP
+    erts_thr_progress_pre_init();
+#endif
 
 #ifdef ERTS_ENABLE_LOCK_CHECK
     erts_lc_init();
@@ -765,11 +771,15 @@ early_init(int *argc, char **argv) /*
 
     erts_no_schedulers = (Uint) no_schedulers;
 #endif
+    erts_early_init_scheduling(no_schedulers);
 
+    alloc_opts.ncpu = ncpu;
     erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes)
 						 -M flags. */
     /* Require allocators */
-    erts_early_init_scheduling();
+#ifdef ERTS_SMP
+    erts_thr_progress_init(no_schedulers, no_schedulers+1, 0);
+#endif
     erts_init_utils();
     erts_early_init_cpu_topology(no_schedulers,
 				 &max_main_threads,
@@ -1521,9 +1531,6 @@ system_cleanup(int exit_code)
 #if defined(USE_THREADS)
     exit_async();
 #endif
-#if HAVE_ERTS_MSEG
-    erts_mseg_exit();
-#endif
 
     /*
      * A lot more cleaning could/should have been done...
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index 587d82f2bb..02d1407a2d 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -125,7 +125,7 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"drv_ev_state",				"address"		},
     {	"safe_hash",				"address"		},
     {   "pollset_rm_list",                      NULL                    },
-    {   "removed_fd_pre_alloc_lock",            NULL                    },
+    {   "removed_fd_pre_alloc_lock",            "address"               },
     {   "state_prealloc",                       NULL                    },
     {	"schdlr_sspnd",				NULL			},
     {	"run_queue",				"address"		},
@@ -151,10 +151,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"mtrace_op",				NULL			},
     {	"instr_x",				NULL			},
     {	"instr",				NULL			},
-    {	"fix_alloc",				"index"			},
     {	"alcu_allocator",			"index"			},
     {	"sbmbc_alloc",				"index"			},
-    {	"alcu_delayed_free",			"index"			},
     {	"mseg",					NULL			},
 #if HALFWORD_HEAP
     {	"pmmap",				NULL			},
@@ -183,7 +181,6 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"sched_stat",				NULL			},
     {	"run_queue_sleep_list",			"address"		},
 #endif
-    {	"alloc_thr_ix_lock",			NULL			},
 #ifdef ERTS_SMP
     {	"proc_lck_qs_alloc",			NULL 			},
 #endif
diff --git a/erts/emulator/beam/erl_mtrace.c b/erts/emulator/beam/erl_mtrace.c
index b1478758a1..358c67bf20 100644
--- a/erts/emulator/beam/erl_mtrace.c
+++ b/erts/emulator/beam/erl_mtrace.c
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 2003-2010. All Rights Reserved.
+ * Copyright Ericsson AB 2003-2011. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -503,12 +503,6 @@ write_trace_header(char *nodename, char *pid, char *hostname)
 	    case ERTS_ALC_A_SYSTEM:
 		PUT_UI16(tracep, ERTS_MTRACE_SEGMENT_ID);
 		break;
-	    case ERTS_ALC_A_FIXED_SIZE:
-		if (erts_allctrs_info[ERTS_FIX_CORE_ALLOCATOR].enabled)
-		    PUT_UI16(tracep, ERTS_FIX_CORE_ALLOCATOR);
-		else
-		    PUT_UI16(tracep, ERTS_ALC_A_SYSTEM);
-		break;
 	    default:
 		PUT_UI16(tracep, ERTS_MTRACE_SEGMENT_ID);
 		break;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 9574435415..4dda17f559 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -39,6 +39,7 @@
 #include "erl_binary.h"
 #include "beam_bp.h"
 #include "erl_cpu_topology.h"
+#include "erl_thr_progress.h"
 
 #define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
 #define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -127,6 +128,8 @@ ErtsLcPSDLocks erts_psd_required_locks[ERTS_PSD_SIZE];
 
 int erts_disable_proc_not_running_opt;
 
+static ErtsAuxWorkData *aux_thread_aux_work_data;
+
 #define ERTS_SCHDLR_SSPND_CHNG_WAITER		(((erts_aint32_t) 1) << 0)
 #define ERTS_SCHDLR_SSPND_CHNG_MSB		(((erts_aint32_t) 1) << 1)
 #define ERTS_SCHDLR_SSPND_CHNG_ONLN		(((erts_aint32_t) 1) << 2)
@@ -152,6 +155,7 @@ do {									\
 static struct {
     erts_smp_mtx_t mtx;
     erts_smp_cnd_t cnd;
+    int aux_thread;
     int online;
     int curr_online;
     int wait_curr_online;
@@ -213,8 +217,6 @@ Uint erts_no_run_queues;
 
 ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
 
-#ifdef ERTS_SMP
-
 typedef union {
     ErtsSchedulerSleepInfo ssi;
     char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerSleepInfo))];
@@ -222,8 +224,6 @@ typedef union {
 
 static ErtsAlignedSchedulerSleepInfo *aligned_sched_sleep_info;
 
-#endif
-
 #ifndef BM_COUNTERS
 static int processes_busy;
 #endif
@@ -285,8 +285,9 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
 				 ERTS_ALC_T_PROC_LIST)
 
 #define ERTS_SCHED_SLEEP_INFO_IX(IX)					\
-  (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers),			\
-   &aligned_sched_sleep_info[(IX)].ssi)
+    (ASSERT_EXPR(-1 <= ((int) (IX))					\
+		 && ((int) (IX)) < ((int) erts_no_schedulers)),		\
+     &aligned_sched_sleep_info[(IX)].ssi)
 
 #define ERTS_FOREACH_RUNQ(RQVAR, DO)					\
 do {									\
@@ -339,6 +340,57 @@ static void exec_misc_ops(ErtsRunQueue *);
 static void print_function_from_pc(int to, void *to_arg, BeamInstr* x);
 static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
 			      int yreg);
+
+static void aux_work_timeout(void *unused);
+static void aux_work_timeout_early_init(int no_schedulers);
+static void aux_work_timeout_late_init(void);
+static void setup_aux_work_timer(void);
+
+#if defined(DEBUG) || 0
+#define ERTS_DBG_CHK_AUX_WORK_VAL(V) dbg_chk_aux_work_val((V))
+static void
+dbg_chk_aux_work_val(erts_aint32_t value)
+{
+    erts_aint32_t valid = 0;
+
+#ifdef ERTS_SSI_AUX_WORK_SET_TMO
+    valid |= ERTS_SSI_AUX_WORK_SET_TMO;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_CHECK_CHILDREN
+    valid |= ERTS_SSI_AUX_WORK_CHECK_CHILDREN;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_MISC
+    valid |= ERTS_SSI_AUX_WORK_MISC;
+#endif
+
+#ifdef ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+    valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC
+    valid |= ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_DD
+    valid |= ERTS_SSI_AUX_WORK_DD;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_DD
+    valid |= ERTS_SSI_AUX_WORK_DD_THR_PRGR;
+#endif
+#ifdef ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK
+    valid |= ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK;
+#endif
+
+    if (~valid & value)
+	erl_exit(ERTS_ABORT_EXIT,
+		 "Invalid aux_work value found: 0x%x\n",
+		 ~valid & value);
+}
+#define ERTS_DBG_CHK_SSI_AUX_WORK(SSI) \
+  ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&(SSI)->aux_work))
+#else
+#define ERTS_DBG_CHK_AUX_WORK_VAL(V)
+#define ERTS_DBG_CHK_SSI_AUX_WORK(SSI)
+#endif
+
 #ifdef ERTS_SMP
 static void handle_pending_exiters(ErtsProcList *);
 
@@ -570,6 +622,18 @@ erts_psd_set_init(Process *p, ErtsProcLocks plocks, int ix, void *data)
 
 #ifdef ERTS_SMP
 
+static void
+prepare_for_block(void *vrq)
+{
+    erts_smp_runq_unlock((ErtsRunQueue *) vrq);
+}
+
+static void
+resume_after_block(void *vrq)
+{
+    erts_smp_runq_lock((ErtsRunQueue *) vrq);
+}
+
 void
 erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flags)
 {
@@ -589,6 +653,68 @@ erts_sched_finish_poke(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flags)
     }
 }
 
+#endif
+
+static ERTS_INLINE void
+set_aux_work_flags_wakeup_nob(ErtsSchedulerSleepInfo *ssi,
+			      erts_aint32_t flgs)
+{
+    erts_aint32_t old_flgs;
+
+    ERTS_DBG_CHK_SSI_AUX_WORK(ssi);
+
+    old_flgs = erts_atomic32_read_nob(&ssi->aux_work);
+    if ((old_flgs & flgs) == 0) {
+
+	old_flgs = erts_atomic32_read_bor_nob(&ssi->aux_work, flgs);
+
+	if ((old_flgs & flgs) == 0) {
+#ifdef ERTS_SMP
+	    erts_sched_poke(ssi);
+#else
+	    erts_sys_schedule_interrupt(1);
+#endif
+	}
+    }
+}
+
+#if 0 /* Currently not used */
+
+static ERTS_INLINE void
+set_aux_work_flags_wakeup_relb(ErtsSchedulerSleepInfo *ssi,
+			       erts_aint32_t flgs)
+{
+    erts_aint32_t old_flgs;
+
+    ERTS_DBG_CHK_SSI_AUX_WORK(ssi);
+
+    old_flgs = erts_atomic32_read_bor_relb(&ssi->aux_work, flgs);
+
+    if ((old_flgs & flgs) == 0) {
+#ifdef ERTS_SMP
+	erts_sched_poke(ssi);
+#else
+	erts_sys_schedule_interrupt(1);
+#endif
+    }
+}
+
+#endif
+
+static ERTS_INLINE erts_aint32_t
+set_aux_work_flags(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flgs)
+{
+    return erts_atomic32_read_bor_nob(&ssi->aux_work, flgs);
+}
+
+static ERTS_INLINE erts_aint32_t
+unset_aux_work_flags(ErtsSchedulerSleepInfo *ssi, erts_aint32_t flgs)
+{
+    return erts_atomic32_read_band_nob(&ssi->aux_work, ~flgs);
+}
+
+#ifdef ERTS_SMP
+
 typedef struct erts_misc_aux_work_t_ erts_misc_aux_work_t;
 struct erts_misc_aux_work_t_ {
     erts_misc_aux_work_t *next;
@@ -623,24 +749,27 @@ init_misc_aux_work(void)
 
     misc_aux_work_queues = 
 	erts_alloc_permanent_cache_aligned(ERTS_ALC_T_MISC_AUX_WORK_Q,
-					   erts_no_schedulers *
-					   sizeof(erts_algnd_misc_aux_work_q_t));
+					   sizeof(erts_algnd_misc_aux_work_q_t)
+					   * (erts_no_schedulers+1));
 
-    for (ix = 0; ix < erts_no_schedulers; ix++) {
+    for (ix = 0; ix <= erts_no_schedulers; ix++) {
 	erts_smp_mtx_init_x(&misc_aux_work_queues[ix].data.mtx,
 			    "misc_aux_work_queue",
-			    make_small(ix + 1));
+			    make_small(ix));
 	misc_aux_work_queues[ix].data.first = NULL;
 	misc_aux_work_queues[ix].data.last = NULL;
     }
 }
 
-static void
-handle_misc_aux_work(ErtsSchedulerData *esdp)
+static erts_aint32_t
+handle_misc_aux_work(ErtsAuxWorkData *awdp,
+		     erts_aint32_t aux_work)
 {
-    int ix = (int) esdp->no - 1;
+    int ix = (int) awdp->sched_id;
     erts_misc_aux_work_t *mawp;
 
+    unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_MISC);
+
     erts_smp_mtx_lock(&misc_aux_work_queues[ix].data.mtx);
     mawp = misc_aux_work_queues[ix].data.first;
     misc_aux_work_queues[ix].data.first = NULL;
@@ -654,6 +783,35 @@ handle_misc_aux_work(ErtsSchedulerData *esdp)
 	mawp = mawp->next;
 	misc_aux_work_free(free_mawp);
     }
+
+    return aux_work & ~ERTS_SSI_AUX_WORK_MISC;
+}
+
+static void
+smp_schedule_misc_aux_work(int ix,
+			   void (*func)(void *),
+			   void *arg)
+{
+    erts_aint32_t aux_work;
+    erts_misc_aux_work_t *mawp;
+    ErtsSchedulerSleepInfo *ssi;
+
+    mawp = misc_aux_work_alloc();
+
+    mawp->func = func;
+    mawp->arg = arg;
+    mawp->next = NULL;
+
+    erts_smp_mtx_lock(&misc_aux_work_queues[ix].data.mtx);
+    if (!misc_aux_work_queues[ix].data.last)
+	misc_aux_work_queues[ix].data.first = mawp;
+    else
+	misc_aux_work_queues[ix].data.last->next = mawp;
+    misc_aux_work_queues[ix].data.last = mawp;
+    erts_smp_mtx_unlock(&misc_aux_work_queues[ix].data.mtx);
+
+    set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
+				  ERTS_SSI_AUX_WORK_MISC);
 }
 
 void
@@ -667,111 +825,425 @@ erts_smp_schedule_misc_aux_work(int ignore_self,
     if (ignore_self) {
 	ErtsSchedulerData *esdp = erts_get_scheduler_data();
 	if (esdp)
-	    ignore_ix = (int) esdp->no - 1;
+	    ignore_ix = (int) esdp->no;
     }
 
-    ASSERT(0 <= max_sched && max_sched <= erts_no_schedulers);
+    ASSERT(0 < max_sched && max_sched <= erts_no_schedulers);
 
-    for (ix = 0; ix < max_sched; ix++) {
-	erts_aint32_t aux_work;
-	erts_misc_aux_work_t *mawp;
-	ErtsSchedulerSleepInfo *ssi;
+    for (ix = 1; ix <= max_sched; ix++) {
 	if (ix == ignore_ix)
 	    continue;
+	smp_schedule_misc_aux_work(ix, func, arg);
+    }
+}
 
-	mawp = misc_aux_work_alloc();
+#endif
 
-	mawp->func = func;
-	mawp->arg = arg;
-	mawp->next = NULL;
+static erts_aint32_t
+handle_fix_alloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    ErtsSchedulerSleepInfo *ssi = awdp->ssi;
+    erts_aint32_t res;
 
-	erts_smp_mtx_lock(&misc_aux_work_queues[ix].data.mtx);
-	if (!misc_aux_work_queues[ix].data.last)
-	    misc_aux_work_queues[ix].data.first = mawp;
-	else
-	    misc_aux_work_queues[ix].data.last->next = mawp;
-	misc_aux_work_queues[ix].data.last = mawp;
-	erts_smp_mtx_unlock(&misc_aux_work_queues[ix].data.mtx);
-
-	ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
-	aux_work = erts_smp_atomic32_read_bor_nob(&ssi->aux_work,
-						  ERTS_SSI_AUX_WORK_MISC);
-	if ((aux_work & ERTS_SSI_AUX_WORK_MISC) == 0)
-	    erts_sched_poke(ssi);
-   }
+    unset_aux_work_flags(ssi, (ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+			       | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC));
+    aux_work &= ~(ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+		  | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC);
+    res = erts_alloc_fix_alloc_shrink(awdp->sched_id, aux_work);
+    if (res) {
+	set_aux_work_flags(ssi, res);
+	aux_work |= res;
+    }
+
+    return aux_work;
+}
+
+#ifdef ERTS_SMP
+
+void
+erts_alloc_notify_delayed_dealloc(int ix)
+{
+    set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(ix-1),
+				  ERTS_SSI_AUX_WORK_DD);
+}
+
+static erts_aint32_t
+handle_delayed_dealloc(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    ErtsSchedulerSleepInfo *ssi = awdp->ssi;
+    int need_thr_progress = 0;
+    int more_work = 0;
+
+    unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD);
+    erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp,
+						&need_thr_progress,
+						&more_work);
+    if (more_work) {
+	if (set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD)
+	    & ERTS_SSI_AUX_WORK_DD_THR_PRGR) {
+	    unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR);
+	    aux_work &= ~ERTS_SSI_AUX_WORK_DD_THR_PRGR;
+	}
+	return aux_work;
+    }
+
+    if (need_thr_progress) {
+	set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR);
+	awdp->dd.thr_prgr = erts_thr_progress_later();
+	erts_thr_progress_wakeup(awdp->esdp, awdp->dd.thr_prgr);
+    }
+    else if (awdp->dd.completed_callback) {
+	awdp->dd.completed_callback(awdp->dd.completed_arg);
+	awdp->dd.completed_callback = NULL;
+	awdp->dd.completed_arg = NULL;
+    }
+    return aux_work & ~ERTS_SSI_AUX_WORK_DD;
 }
 
+static erts_aint32_t
+handle_delayed_dealloc_thr_prgr(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    ErtsSchedulerSleepInfo *ssi;
+    int need_thr_progress;
+    int more_work;
+
+    if (!erts_thr_progress_has_reached(awdp->dd.thr_prgr))
+	return aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR;
+
+    ssi = awdp->ssi;
+    need_thr_progress = 0;
+    more_work = 0;
+
+    erts_alloc_scheduler_handle_delayed_dealloc((void *) awdp->esdp,
+						&need_thr_progress,
+						&more_work);
+    if (more_work) {
+	set_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD);
+	unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR);
+	return ((aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR)
+		| ERTS_SSI_AUX_WORK_DD);
+    }
+
+    if (need_thr_progress) {
+	awdp->dd.thr_prgr = erts_thr_progress_later();
+	erts_thr_progress_wakeup(awdp->esdp, awdp->dd.thr_prgr);
+    }
+    else {
+	unset_aux_work_flags(ssi, ERTS_SSI_AUX_WORK_DD_THR_PRGR);
+	if (awdp->dd.completed_callback) {
+	    awdp->dd.completed_callback(awdp->dd.completed_arg);
+	    awdp->dd.completed_callback = NULL;
+	    awdp->dd.completed_arg = NULL;
+	}
+    }
+
+    return aux_work & ~ERTS_SSI_AUX_WORK_DD_THR_PRGR;
+}
+
+static erts_atomic32_t completed_dealloc_count;
+
+static void
+completed_dealloc(void *vproc)
+{
+    if (erts_atomic32_dec_read_mb(&completed_dealloc_count) == 0) {
+	erts_resume((Process *) vproc, (ErtsProcLocks) 0);
+	erts_smp_proc_dec_refc((Process *) vproc);
+    }
+}
+
+static void
+setup_completed_dealloc(void *vproc)
+{
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();
+    ErtsAuxWorkData *awdp = (esdp
+			     ? &esdp->aux_work_data
+			     : aux_thread_aux_work_data);
+    erts_alloc_fix_alloc_shrink(awdp->sched_id, 0);
+    set_aux_work_flags_wakeup_nob(awdp->ssi, ERTS_SSI_AUX_WORK_DD);
+    awdp->dd.completed_callback = completed_dealloc;
+    awdp->dd.completed_arg = vproc;
+}
+
+static void
+prep_setup_completed_dealloc(void *vproc)
+{
+    erts_aint32_t count = (erts_aint32_t) (erts_no_schedulers+1);
+    if (erts_atomic32_dec_read_mb(&completed_dealloc_count) == count) {
+	/* scheduler threads */
+	erts_smp_schedule_misc_aux_work(0,
+					erts_no_schedulers,
+					setup_completed_dealloc,
+					vproc);
+	/* aux_thread */
+	smp_schedule_misc_aux_work(0,
+				   setup_completed_dealloc,
+				   vproc);
+    }
+}
+
+#endif /* ERTS_SMP */
+
+int
+erts_debug_wait_deallocations(Process *c_p)
+{
+#ifndef ERTS_SMP
+    erts_alloc_fix_alloc_shrink(1, 0);
+    return 1;
+#else
+    /* Only one process at a time can do this */
+    erts_aint32_t count = (erts_aint32_t) (2*(erts_no_schedulers+1));
+    if (0 == erts_atomic32_cmpxchg_mb(&completed_dealloc_count,
+				      count,
+				      0)) {
+	erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
+	erts_smp_proc_inc_refc(c_p);
+	/* scheduler threads */
+	erts_smp_schedule_misc_aux_work(0,
+					erts_no_schedulers,
+					prep_setup_completed_dealloc,
+					(void *) c_p);
+	/* aux_thread */
+	smp_schedule_misc_aux_work(0,
+				   prep_setup_completed_dealloc,
+				   (void *) c_p);
+	return 1;
+    }
+    return 0;
+#endif
+}
+
+
 #ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
 void
 erts_smp_notify_check_children_needed(void)
 {
     int i;
+    for (i = 0; i < erts_no_schedulers; i++)
+	set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(i),
+				      ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+}
 
-    for (i = 0; i < erts_no_schedulers; i++) {
-	erts_aint32_t aux_work;
-	ErtsSchedulerSleepInfo *ssi;
-	ssi = ERTS_SCHED_SLEEP_INFO_IX(i);
-	aux_work = erts_smp_atomic32_read_bor_nob(&ssi->aux_work,
-						  ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
-	if (!(aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN))
-	    erts_sched_poke(ssi);
-    }
+static erts_aint32_t
+handle_check_children(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
+    erts_check_children();
+    return aux_work & ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN;
 }
+
 #endif
 
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
+#ifdef ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK
+
+static erts_aint32_t
+handle_mseg_cache_check(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK);
+    erts_mseg_cache_check();
+    return aux_work & ~ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK;
+}
+
+#endif
+
+static erts_aint32_t
+handle_setup_aux_work_timer(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
+{
+    unset_aux_work_flags(awdp->ssi, ERTS_SSI_AUX_WORK_SET_TMO);
+    setup_aux_work_timer();
+    return aux_work & ~ERTS_SSI_AUX_WORK_SET_TMO;
+}
+
 static ERTS_INLINE erts_aint32_t
-blockable_aux_work(ErtsSchedulerData *esdp,
-		   ErtsSchedulerSleepInfo *ssi,
-		   erts_aint32_t aux_work)
+handle_aux_work(ErtsAuxWorkData *awdp, erts_aint32_t aux_work)
 {
-    if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
-	if (aux_work & ERTS_SSI_AUX_WORK_MISC) {
-	    aux_work = erts_smp_atomic32_read_band_nob(&ssi->aux_work,
-						       ~ERTS_SSI_AUX_WORK_MISC);
-	    aux_work &= ~ERTS_SSI_AUX_WORK_MISC;
-	    handle_misc_aux_work(esdp);
-	}
+    /*
+     * Handlers are *only* allowed to modify flags in return value
+     * and ssi flags that are explicity handled by the handler.
+     * Handlers are, e.g., not allowed to read the ssi flag field and
+     * then unconditionally return that value.
+     */
+    ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    if (aux_work & ERTS_SSI_AUX_WORK_SET_TMO) {
+	aux_work = handle_setup_aux_work_timer(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
+#ifdef ERTS_SMP
+    if (aux_work & ERTS_SSI_AUX_WORK_MISC) {
+	aux_work = handle_misc_aux_work(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
+#endif
 #ifdef ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
-	if (aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN) {
-	    aux_work = erts_smp_atomic32_band_nob(&ssi->aux_work,
-						  ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN);
-	    aux_work &= ~ERTS_SSI_AUX_WORK_CHECK_CHILDREN;
-	    erts_check_children();
-	}
+    if (aux_work & ERTS_SSI_AUX_WORK_CHECK_CHILDREN) {
+	aux_work = handle_check_children(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
+#endif
+    if (aux_work & (ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM
+		    | ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC)) {
+	aux_work = handle_fix_alloc(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
+#ifdef ERTS_SMP
+    if (aux_work & ERTS_SSI_AUX_WORK_DD) {
+	aux_work = handle_delayed_dealloc(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
+    if (aux_work & ERTS_SSI_AUX_WORK_DD_THR_PRGR) {
+	aux_work = handle_delayed_dealloc_thr_prgr(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
+    }
 #endif
+#ifdef ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK
+    if (aux_work & ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK) {
+	aux_work = handle_mseg_cache_check(awdp, aux_work);
+	ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
     }
+#endif
+    ERTS_DBG_CHK_AUX_WORK_VAL(aux_work);
     return aux_work;
 }
 
-#endif
+typedef struct {
+    union {
+	ErlTimer data;
+	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErlTimer))];
+    } timer;
 
-#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
-static ERTS_INLINE erts_aint32_t
-nonblockable_aux_work(ErtsSchedulerData *esdp,
-		      ErtsSchedulerSleepInfo *ssi,
-		      erts_aint32_t aux_work)
+    int initialized;
+    erts_atomic32_t refc;
+    erts_atomic32_t type[1];
+} ErtsAuxWorkTmo;
+
+static ErtsAuxWorkTmo *aux_work_tmo;
+
+static void
+aux_work_timeout_early_init(int no_schedulers)
 {
-    if (aux_work & ERTS_SSI_NONBLOCKABLE_AUX_WORK_MASK) {
+    int i;
+    UWord p;
+
+    /*
+     * This is done really early. Our own allocators have
+     * not been started yet.
+     */
+
+    p = (UWord) malloc((sizeof(ErtsAuxWorkTmo)
+			+ sizeof(erts_atomic32_t)*(no_schedulers+1))
+		       + ERTS_CACHE_LINE_SIZE-1);
+    if (p & ERTS_CACHE_LINE_MASK)
+	p = (p & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE;
+    ASSERT((p & ERTS_CACHE_LINE_MASK) == 0);
+
+    aux_work_tmo = (ErtsAuxWorkTmo *) p;
+    aux_work_tmo->initialized = 0;
+    erts_atomic32_init_nob(&aux_work_tmo->refc, 0);
+    for (i = 0; i <= no_schedulers; i++)
+	erts_atomic32_init_nob(&aux_work_tmo->type[i], 0);
+}
 
+void
+aux_work_timeout_late_init(void)
+{
+    aux_work_tmo->initialized = 1;
+    if (erts_atomic32_read_nob(&aux_work_tmo->refc)) {
+	aux_work_tmo->timer.data.active = 0;
+	erts_set_timer(&aux_work_tmo->timer.data,
+		       aux_work_timeout,
+		       NULL,
+		       NULL,
+		       1000);
     }
 }
-#endif
 
 static void
-prepare_for_block(void *vrq)
+aux_work_timeout(void *unused)
 {
-    erts_smp_runq_unlock((ErtsRunQueue *) vrq);
+    erts_aint32_t refc;
+    int i;
+#ifdef ERTS_SMP
+    i = 0;
+#else
+    i = 1;
+#endif
+
+    for (; i <= erts_no_schedulers; i++) {
+	erts_aint32_t type;
+	type = erts_atomic32_read_acqb(&aux_work_tmo->type[i]);
+	if (type)
+	    set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(i-1),
+					  type);
+    }
+
+    refc = erts_atomic32_read_nob(&aux_work_tmo->refc);
+    ASSERT(refc >= 1);
+    if (refc != 1
+	|| 1 != erts_atomic32_cmpxchg_relb(&aux_work_tmo->refc, 0, 1)) {
+	/* Setup next timeout... */
+	aux_work_tmo->timer.data.active = 0;
+	erts_set_timer(&aux_work_tmo->timer.data,
+		       aux_work_timeout,
+		       NULL,
+		       NULL,
+		       1000);
+    }
 }
 
 static void
-resume_after_block(void *vrq)
+setup_aux_work_timer(void)
 {
-    erts_smp_runq_lock((ErtsRunQueue *) vrq);
+#ifndef ERTS_SMP
+    if (!erts_get_scheduler_data())
+	set_aux_work_flags_wakeup_nob(ERTS_SCHED_SLEEP_INFO_IX(0),
+				      ERTS_SSI_AUX_WORK_SET_TMO);
+    else
+#endif
+    {
+	aux_work_tmo->timer.data.active = 0;
+	erts_set_timer(&aux_work_tmo->timer.data,
+		       aux_work_timeout,
+		       NULL,
+		       NULL,
+		       1000);
+    }
 }
 
+erts_aint32_t
+erts_set_aux_work_timeout(int ix, erts_aint32_t type, int enable)
+{
+    erts_aint32_t old, refc;
+
+#ifndef ERTS_SMP
+    ix = 1;
 #endif
 
+    ERTS_DBG_CHK_AUX_WORK_VAL(type);
+    ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&aux_work_tmo->type[ix]));
+//    erts_fprintf(stderr, "t(%d, 0x%x, %d)\n", ix, type, enable);
+
+    if (!enable) {
+	old = erts_atomic32_read_band_mb(&aux_work_tmo->type[ix], ~type);
+	ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&aux_work_tmo->type[ix]));
+	if (old != 0 && (old & ~type) == 0)
+	    erts_atomic32_dec_relb(&aux_work_tmo->refc);
+	return old;
+    }
+
+    old = erts_atomic32_read_bor_mb(&aux_work_tmo->type[ix], type);
+    ERTS_DBG_CHK_AUX_WORK_VAL(erts_atomic32_read_nob(&aux_work_tmo->type[ix]));
+    if (old == 0 && type != 0) {
+	refc = erts_atomic32_inc_read_acqb(&aux_work_tmo->refc);
+	if (refc == 1) {
+	    erts_atomic32_inc_acqb(&aux_work_tmo->refc);
+	    if (aux_work_tmo->initialized) 
+		setup_aux_work_timer();
+	}
+    }
+    return old;
+}
+
+
+
 static ERTS_INLINE void
 sched_waiting_sys(Uint no, ErtsRunQueue *rq)
 {
@@ -800,8 +1272,6 @@ sched_active_sys(Uint no, ErtsRunQueue *rq)
 Uint
 erts_active_schedulers(void)
 {
-    /* RRRRRRRRR */
-
     Uint as = erts_no_schedulers;
 
     ERTS_ATOMIC_FOREACH_RUNQ(rq, as -= abs(rq->waiting));
@@ -988,6 +1458,10 @@ sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type)
 
     if (sleep_type == ERTS_SSI_FLG_TSE_SLEEPING)
 	erts_tse_reset(ssi->event);
+    else {
+	ASSERT(sleep_type == ERTS_SSI_FLG_POLL_SLEEPING);
+	erts_sys_schedule_interrupt(0);
+    }
 
     while (1) {
 	oflgs = erts_smp_atomic32_cmpxchg_acqb(&ssi->flags, nflgs, xflgs);
@@ -1006,16 +1480,96 @@ sched_set_sleeptype(ErtsSchedulerSleepInfo *ssi, erts_aint32_t sleep_type)
   (((FLGS) & (ERTS_SSI_FLG_WAITING|ERTS_SSI_FLG_SUSPENDED))	\
    != ERTS_SSI_FLG_WAITING)
 
+
+static void
+poke_ssi(void *vssi)
+{
+    erts_sched_poke((ErtsSchedulerSleepInfo *) vssi);
+}
+
+static void init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp);
+
+static void *
+aux_thread(void *unused)
+{
+    ErtsAuxWorkData *awdp = aux_thread_aux_work_data;
+    ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(-1);
+    erts_aint32_t aux_work;
+    ErtsThrPrgrWakeupCallback wake_me;
+    int thr_prgr_active = 1;
+
+    wake_me.wakeup = poke_ssi;
+    wake_me.arg = (void *) ssi;
+
+    erts_thr_progress_register_managed_thread(NULL, &wake_me, 1);
+    init_aux_work_data(awdp, NULL);
+    awdp->ssi = ssi;
+
+    erts_register_blockable_thread();
+
+    ssi->event = erts_tse_fetch();
+
+    erts_smp_mtx_lock(&schdlr_sspnd.mtx);
+    schdlr_sspnd.aux_thread = 1;
+    erts_smp_cnd_signal(&schdlr_sspnd.cnd);
+    erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
+
+    sched_prep_spin_wait(ssi);
+
+    while (1) {
+	erts_aint32_t flgs;
+
+	aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
+	if (aux_work) {
+	    if (!thr_prgr_active)
+		erts_thr_progress_active(NULL, thr_prgr_active = 1);
+	    aux_work = handle_aux_work(awdp, aux_work);
+	    if (aux_work && erts_thr_progress_update(NULL))
+		erts_thr_progress_leader_update(NULL);
+	}
+
+	erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+	if (!aux_work) {
+	    if (thr_prgr_active)
+		erts_thr_progress_active(NULL, thr_prgr_active = 0);
+	    erts_thr_progress_prepare_wait(NULL);
+
+	    flgs = sched_spin_wait(ssi, 0);
+
+	    if (flgs & ERTS_SSI_FLG_SLEEPING) {
+		ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+		flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
+		if (flgs & ERTS_SSI_FLG_SLEEPING) {
+		    int res;
+		    ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
+		    ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+		    do {
+			res = erts_tse_wait(ssi->event);
+		    } while (res == EINTR);
+		}
+	    }
+	    erts_thr_progress_finalize_wait(NULL);
+	}
+
+	erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+	flgs = sched_prep_spin_wait(ssi);
+    }
+    return NULL;
+}
+
+#endif /* ERTS_SMP */
+
 static void
 scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 {
     ErtsSchedulerSleepInfo *ssi = esdp->ssi;
     int spincount;
+    erts_aint32_t aux_work = 0;
+#ifdef ERTS_SMP
+    int thr_prgr_active = 1;
     erts_aint32_t flgs;
-#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
-    || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
-    erts_aint32_t aux_work;
-#endif
 
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
@@ -1049,36 +1603,44 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 
     tse_wait:
 
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-    tse_blockable_aux_work:
-	aux_work = blockable_aux_work(esdp, ssi, aux_work);
-#endif
-	erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
-
 	while (1) {
 
-#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
-#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	    aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-#endif
-	    nonblockable_aux_work(esdp, ssi, aux_work);
-#endif
+	    aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
+	    if (aux_work) {
+		if (!thr_prgr_active)
+		    erts_thr_progress_active(esdp, thr_prgr_active = 1);
+		aux_work = handle_aux_work(&esdp->aux_work_data, aux_work);
+		if (aux_work && erts_thr_progress_update(esdp))
+		    erts_thr_progress_leader_update(esdp);
+	    }
 
-	    flgs = sched_spin_wait(ssi, spincount);
-	    if (flgs & ERTS_SSI_FLG_SLEEPING) {
-		ASSERT(flgs & ERTS_SSI_FLG_WAITING);
-		flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
+	    erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+	    if (aux_work)
+		flgs = erts_smp_atomic32_read_acqb(&ssi->flags);
+	    else {
+		if (thr_prgr_active)
+		    erts_thr_progress_active(esdp, thr_prgr_active = 0);
+		erts_thr_progress_prepare_wait(esdp);
+
+		flgs = sched_spin_wait(ssi, spincount);
 		if (flgs & ERTS_SSI_FLG_SLEEPING) {
-		    int res;
-		    ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
 		    ASSERT(flgs & ERTS_SSI_FLG_WAITING);
-		    do {
-			res = erts_tse_wait(ssi->event);
-		    } while (res == EINTR);
+		    flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_TSE_SLEEPING);
+		    if (flgs & ERTS_SSI_FLG_SLEEPING) {
+			int res;
+			ASSERT(flgs & ERTS_SSI_FLG_TSE_SLEEPING);
+			ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+			do {
+			    res = erts_tse_wait(ssi->event);
+			} while (res == EINTR);
+		    }
 		}
+		erts_thr_progress_finalize_wait(esdp);
 	    }
 
+	    erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
 	    if (!(flgs & ERTS_SSI_FLG_WAITING)) {
 		ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
 		break;
@@ -1092,18 +1654,8 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 		break;
 	    }
 
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	    aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-	    if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
-		erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
-		goto tse_blockable_aux_work;
-	    }
-#endif
-
 	}
 
-	erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
-
 	if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
 	    erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
 
@@ -1111,7 +1663,9 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	sched_active(esdp->no, rq);
 
     }
-    else {
+    else
+#endif
+    {
 	erts_aint_t dt;
 
 	erts_smp_atomic32_set_relb(&function_calls, 0);
@@ -1135,18 +1689,27 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	    if (dt) erts_bump_timer(dt);
 
 	sys_aux_work:
-
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	    aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-	    aux_work = blockable_aux_work(esdp, ssi, aux_work);
+#ifndef ERTS_SMP
+	    erts_sys_schedule_interrupt(0);
 #endif
-#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
-#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	    aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
+
+	    aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
+	    if (aux_work) {
+#ifdef ERTS_SMP
+		if (!thr_prgr_active)
+		    erts_thr_progress_active(esdp, thr_prgr_active = 1);
 #endif
-	    nonblockable_aux_work(esdp, ssi, aux_work);
+		aux_work = handle_aux_work(&esdp->aux_work_data, aux_work);
+#ifdef ERTS_SMP
+		if (aux_work && erts_thr_progress_update(esdp))
+		    erts_thr_progress_leader_update(esdp);
 #endif
+	    }
 
+#ifndef ERTS_SMP
+	    if (rq->len != 0 || rq->misc.start)
+		goto sys_woken;
+#else
 	    flgs = erts_smp_atomic32_read_acqb(&ssi->flags);
 	    if (!(flgs & ERTS_SSI_FLG_WAITING)) {
 		ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
@@ -1168,10 +1731,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 		    goto tse_wait;
 		}
 	    }
+#endif
 	}
 
 	erts_smp_runq_lock(rq);
 
+#ifdef ERTS_SMP
 	/*
 	 * If we got new I/O tasks we aren't allowed to
 	 * sleep in erl_sys_schedule().
@@ -1183,64 +1748,81 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	     * Got to check that we still got I/O tasks; otherwise
 	     * we have to wait in erl_sys_schedule() after all...
 	     */
-	    if (prepare_for_sys_schedule())
-		goto do_sys_schedule;
-
-	    /*
-	     * Not allowed to wait in erl_sys_schedule;
-	     * do tse wait instead...
-	     */
-	    sched_change_waiting_sys_to_waiting(esdp->no, rq);
+	    if (!prepare_for_sys_schedule()) {
+		/*
+		 * Not allowed to wait in erl_sys_schedule;
+		 * do tse wait instead...
+		 */
+		sched_change_waiting_sys_to_waiting(esdp->no, rq);
+		erts_smp_runq_unlock(rq);
+		spincount = 0;
+		goto tse_wait;
+	    }
+	}
+#endif
+	if (aux_work) {
 	    erts_smp_runq_unlock(rq);
-	    spincount = 0;
-	    goto tse_wait;
+	    goto sys_poll_aux_work;
 	}
-	else {
-	do_sys_schedule:
-	    erts_sys_schedule_interrupt(0);
-	    flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
-	    if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
-		if (!(flgs & ERTS_SSI_FLG_WAITING))
-		    goto sys_locked_woken;
-		erts_smp_runq_unlock(rq);
-		flgs = sched_prep_cont_spin_wait(ssi);
-		if (!(flgs & ERTS_SSI_FLG_WAITING)) {
-		    ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
-		    goto sys_woken;
-		}
-		ASSERT(!erts_port_task_have_outstanding_io_tasks());
-		goto sys_poll_aux_work;
+#ifdef ERTS_SMP
+	if (thr_prgr_active)
+	    erts_thr_progress_active(esdp, thr_prgr_active = 0);
+	flgs = sched_set_sleeptype(ssi, ERTS_SSI_FLG_POLL_SLEEPING);
+	if (!(flgs & ERTS_SSI_FLG_SLEEPING)) {
+	    if (!(flgs & ERTS_SSI_FLG_WAITING))
+		goto sys_locked_woken;
+	    erts_smp_runq_unlock(rq);
+	    flgs = sched_prep_cont_spin_wait(ssi);
+	    if (!(flgs & ERTS_SSI_FLG_WAITING)) {
+		ASSERT(!(flgs & ERTS_SSI_FLG_SLEEPING));
+		goto sys_woken;
 	    }
+	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
+	    goto sys_poll_aux_work;
+	}
 
-	    ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
-	    ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+	ASSERT(flgs & ERTS_SSI_FLG_POLL_SLEEPING);
+	ASSERT(flgs & ERTS_SSI_FLG_WAITING);
+#endif
 
-	    erts_smp_runq_unlock(rq);
+	erts_smp_runq_unlock(rq);
 
-	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
+	ASSERT(!erts_port_task_have_outstanding_io_tasks());
 
-	    erl_sys_schedule(0);
+	erl_sys_schedule(0);
 
-	    dt = erts_do_time_read_and_reset();
-	    if (dt) erts_bump_timer(dt);
+	dt = erts_do_time_read_and_reset();
+	if (dt) erts_bump_timer(dt);
 
-	    flgs = sched_prep_cont_spin_wait(ssi);
-	    if (flgs & ERTS_SSI_FLG_WAITING)
-		goto sys_aux_work;
+#ifndef ERTS_SMP
+	if (rq->len == 0 && !rq->misc.start)
+	    goto sys_aux_work;
+    sys_woken:
+#else
+	flgs = sched_prep_cont_spin_wait(ssi);
+	if (flgs & ERTS_SSI_FLG_WAITING)
+	    goto sys_aux_work;
 
-	sys_woken:
-	    erts_smp_runq_lock(rq);
-	sys_locked_woken:
-	    clear_sys_scheduling();
-	    if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
-		erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
-	    sched_active_sys(esdp->no, rq);
-	}
+    sys_woken:
+	erts_smp_runq_lock(rq);
+    sys_locked_woken:
+	clear_sys_scheduling();
+	if (flgs & ~ERTS_SSI_FLG_SUSPENDED)
+	    erts_smp_atomic32_read_band_nob(&ssi->flags, ERTS_SSI_FLG_SUSPENDED);
+#endif
+	sched_active_sys(esdp->no, rq);
     }
 
+#ifdef ERTS_SMP
+    if (!thr_prgr_active)
+	erts_thr_progress_active(esdp, thr_prgr_active = 1);
+
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
+#endif
 }
 
+#ifdef ERTS_SMP
+
 static ERTS_INLINE erts_aint32_t
 ssi_flags_set_wake(ErtsSchedulerSleepInfo *ssi)
 {
@@ -2547,8 +3129,9 @@ erts_debug_nbalance(void)
 }
 
 void
-erts_early_init_scheduling(void)
+erts_early_init_scheduling(int no_schedulers)
 {
+    aux_work_timeout_early_init(no_schedulers);
     wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
 }
 
@@ -2569,12 +3152,24 @@ erts_sched_set_wakeup_limit(char *str)
 	return EINVAL;
     return 0;
 }
-	
+
+static void
+init_aux_work_data(ErtsAuxWorkData *awdp, ErtsSchedulerData *esdp)
+{
+    awdp->sched_id = esdp ? (int) esdp->no : 0;
+    awdp->esdp = esdp;
+    awdp->ssi = esdp ? esdp->ssi : NULL;
+#ifdef ERTS_SMP
+    awdp->dd.thr_prgr = ERTS_THR_PRGR_VAL_WAITING;
+    awdp->dd.completed_callback = NULL;
+    awdp->dd.completed_arg = NULL;
+#endif
+}
 
 void
 erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
 {
-    int ix, n;
+    int ix, n, no_ssi;
 
 #ifndef ERTS_SMP
     mrq = 0;
@@ -2684,23 +3279,31 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
     n = (int) no_schedulers;
     erts_no_schedulers = n;
 
-#ifdef ERTS_SMP
     /* Create and initialize scheduler sleep info */
-
+#ifdef ERTS_SMP
+    no_ssi = n+1;
+#else
+    no_ssi = 1;
+#endif
     aligned_sched_sleep_info =
-	erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_SLP_INFO,
-					   n * sizeof(ErtsAlignedSchedulerSleepInfo));
-
-    for (ix = 0; ix < n; ix++) {
-	ErtsSchedulerSleepInfo *ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
+	erts_alloc_permanent_cache_aligned(
+	    ERTS_ALC_T_SCHDLR_SLP_INFO,
+	    no_ssi*sizeof(ErtsAlignedSchedulerSleepInfo));
+    for (ix = 0; ix < no_ssi; ix++) {
+	ErtsSchedulerSleepInfo *ssi = &aligned_sched_sleep_info[ix].ssi;
+#ifdef ERTS_SMP
 #if 0 /* no need to initialize these... */
 	ssi->next = NULL;
 	ssi->prev = NULL;
 #endif
 	erts_smp_atomic32_init_nob(&ssi->flags, 0);
 	ssi->event = NULL; /* initialized in sched_thread_func */
-	erts_smp_atomic32_init_nob(&ssi->aux_work, 0);
+#endif
+	erts_atomic32_init_nob(&ssi->aux_work, 0);
     }
+
+#ifdef ERTS_SMP
+    aligned_sched_sleep_info++;
 #endif
 
     /* Create and initialize scheduler specific data */
@@ -2714,7 +3317,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
 #ifdef ERTS_SMP
 	erts_bits_init_state(&esdp->erl_bits_state);
 	esdp->match_pseudo_process = NULL;
-	esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
 	esdp->free_process = NULL;
 #if HALFWORD_HEAP
 	/* Registers need to be heap allocated (correct memory range) for tracing to work */
@@ -2725,6 +3327,7 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
 	esdp->num_tmp_heap_used = 0;
 #endif
 	esdp->no = (Uint) ix+1;
+	esdp->ssi = ERTS_SCHED_SLEEP_INFO_IX(ix);
 	esdp->current_process = NULL;
 	esdp->current_port = NULL;
 
@@ -2745,13 +3348,23 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
 #ifdef ERTS_SMP
 	erts_smp_atomic32_init_nob(&esdp->chk_cpu_bind, 0);
 #endif
+	init_aux_work_data(&esdp->aux_work_data, esdp);
     }
 
 #ifdef ERTS_SMP
+    init_misc_aux_work();
+
+    erts_atomic32_init_nob(&completed_dealloc_count, 0); /* debug only */
+
+    aux_thread_aux_work_data =
+	erts_alloc_permanent_cache_aligned(ERTS_ALC_T_SCHDLR_DATA,
+					   sizeof(ErtsAuxWorkData));
+
     erts_smp_mtx_init(&schdlr_sspnd.mtx, "schdlr_sspnd");
     erts_smp_cnd_init(&schdlr_sspnd.cnd);
 
     erts_smp_atomic32_init_nob(&schdlr_sspnd.changing, 0);
+    schdlr_sspnd.aux_thread = 0;
     schdlr_sspnd.online = no_schedulers_online;
     schdlr_sspnd.curr_online = no_schedulers;
     schdlr_sspnd.msb.ongoing = 0;
@@ -2809,6 +3422,8 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
     /* init port tasks */
     erts_port_task_init();
 
+    aux_work_timeout_late_init();
+
 #ifndef ERTS_SMP
 #ifdef ERTS_DO_VERIFY_UNUSED_TEMP_ALLOC
     erts_scheduler_data->verify_unused_temp_alloc
@@ -3058,10 +3673,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
     long active_schedulers;
     int curr_online = 1;
     int wake = 0;
-#if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
-    || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
     erts_aint32_t aux_work;
-#endif
+    int thr_prgr_active = 1;
 
     /*
      * Schedulers may be suspended in two different ways:
@@ -3134,40 +3747,46 @@ suspend_scheduler(ErtsSchedulerData *esdp)
 		break;
 	    erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
 
-
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-	    aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-	blockable_aux_work:
-	    blockable_aux_work(esdp, ssi, aux_work);
-#endif
-
-	    erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
 	    while (1) {
 		erts_aint32_t flgs;
-#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
-#ifndef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-		aux_work = erts_smp_atomic32_read(&ssi->aux_work);
-#endif
-		nonblockable_aux_work(esdp, ssi, aux_work);
-#endif
 
-		flgs = sched_spin_suspended(ssi,
-					    ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT);
-		if (flgs == (ERTS_SSI_FLG_SLEEPING
-			     | ERTS_SSI_FLG_WAITING
-			     | ERTS_SSI_FLG_SUSPENDED)) {
-		    flgs = sched_set_suspended_sleeptype(ssi);
+		aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
+		if (aux_work) {
+		    if (!thr_prgr_active)
+			erts_thr_progress_active(esdp, thr_prgr_active = 1);
+		    aux_work = handle_aux_work(&esdp->aux_work_data, aux_work);
+		    if (aux_work && erts_thr_progress_update(esdp))
+			erts_thr_progress_leader_update(esdp);
+		}
+
+		erts_smp_activity_begin(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
+		if (!aux_work) {
+		    if (thr_prgr_active)
+			erts_thr_progress_active(esdp, thr_prgr_active = 0);
+		    erts_thr_progress_prepare_wait(esdp);
+		    flgs = sched_spin_suspended(ssi,
+						ERTS_SCHED_SUSPEND_SLEEP_SPINCOUNT);
 		    if (flgs == (ERTS_SSI_FLG_SLEEPING
-				 | ERTS_SSI_FLG_TSE_SLEEPING
 				 | ERTS_SSI_FLG_WAITING
 				 | ERTS_SSI_FLG_SUSPENDED)) {
-			int res;
-			do {
-			    res = erts_tse_wait(ssi->event);
-			} while (res == EINTR);
+			flgs = sched_set_suspended_sleeptype(ssi);
+			if (flgs == (ERTS_SSI_FLG_SLEEPING
+				     | ERTS_SSI_FLG_TSE_SLEEPING
+				     | ERTS_SSI_FLG_WAITING
+				     | ERTS_SSI_FLG_SUSPENDED)) {
+			    int res;
+
+			    do {
+				res = erts_tse_wait(ssi->event);
+			    } while (res == EINTR);
+			}
 		    }
+		    erts_thr_progress_finalize_wait(esdp);
 		}
 
+		erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
+
 		flgs = sched_prep_spin_suspended(ssi, (ERTS_SSI_FLG_WAITING
 						       | ERTS_SSI_FLG_SUSPENDED));
 		if (!(flgs & ERTS_SSI_FLG_SUSPENDED))
@@ -3175,19 +3794,8 @@ suspend_scheduler(ErtsSchedulerData *esdp)
 		changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing);
 		if (changing & ~ERTS_SCHDLR_SSPND_CHNG_WAITER)
 		    break;
-
-
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-		aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-		if (aux_work & ERTS_SSI_BLOCKABLE_AUX_WORK_MASK) {
-		    erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
-		    goto blockable_aux_work;
-		}
-#endif
-
 	    }
 
-	    erts_smp_activity_end(ERTS_ACTIVITY_WAIT, NULL, NULL, NULL);
 
 	    erts_smp_mtx_lock(&schdlr_sspnd.mtx);
 	    changing = erts_smp_atomic32_read_nob(&schdlr_sspnd.changing);
@@ -3213,6 +3821,9 @@ suspend_scheduler(ErtsSchedulerData *esdp)
     if (erts_system_profile_flags.scheduler)
     	profile_scheduler(make_small(esdp->no), am_active);
 
+    if (!thr_prgr_active)
+	erts_thr_progress_active(esdp, thr_prgr_active = 1);
+
     erts_smp_runq_lock(esdp->run_queue);
     non_empty_runq(esdp->run_queue);
 
@@ -3720,8 +4331,14 @@ erts_multi_scheduling_blockers(Process *p)
 static void *
 sched_thread_func(void *vesdp)
 {
+    ErtsThrPrgrWakeupCallback wake_me;
+    ErtsSchedulerData *esdp = vesdp;
+    Uint no = esdp->no;
 #ifdef ERTS_SMP
-    Uint no = ((ErtsSchedulerData *) vesdp)->no;
+    wake_me.wakeup = poke_ssi;
+    wake_me.arg = (void *) esdp->ssi;
+    erts_thr_progress_register_managed_thread(esdp, &wake_me, 0);
+    erts_alloc_register_scheduler(vesdp);
 #endif
 #ifdef ERTS_ENABLE_LOCK_CHECK
     {
@@ -3730,11 +4347,13 @@ sched_thread_func(void *vesdp)
 	erts_lc_set_thread_name(&buf[0]);
     }
 #endif
-    erts_alloc_reg_scheduler_id(no);
     erts_tsd_set(sched_data_key, vesdp);
 #ifdef ERTS_SMP
+#if HAVE_ERTS_MSEG
+    erts_mseg_late_init();
+#endif
 
-    erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp);
+    erts_sched_init_check_cpu_bind(esdp);
 
     erts_proc_lock_prepare_proc_lock_waiter();
     ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
@@ -3754,17 +4373,18 @@ sched_thread_func(void *vesdp)
     if (--schdlr_sspnd.curr_online == schdlr_sspnd.wait_curr_online) {
 	erts_smp_atomic32_read_band_nob(&schdlr_sspnd.changing,
 					~ERTS_SCHDLR_SSPND_CHNG_ONLN);
-	if (((ErtsSchedulerData *) vesdp)->no != 1)
+	if (no != 1)
 	    erts_smp_cnd_signal(&schdlr_sspnd.cnd);
     }
 
-    if (((ErtsSchedulerData *) vesdp)->no == 1) {
+    if (no == 1) {
 	if (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online) {
 	    erts_smp_activity_begin(ERTS_ACTIVITY_WAIT,
 				    susp_sched_prep_block,
 				    susp_sched_resume_block,
 				    NULL);
-	    while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online)
+	    while (schdlr_sspnd.curr_online != schdlr_sspnd.wait_curr_online
+		   || !schdlr_sspnd.aux_thread)
 		erts_smp_cnd_wait(&schdlr_sspnd.cnd, &schdlr_sspnd.mtx);
 	    erts_smp_activity_end(ERTS_ACTIVITY_WAIT,
 				  susp_sched_prep_block,
@@ -3776,19 +4396,22 @@ sched_thread_func(void *vesdp)
     erts_smp_mtx_unlock(&schdlr_sspnd.mtx);
 
 #ifdef ERTS_DO_VERIFY_UNUSED_TEMP_ALLOC
-    ((ErtsSchedulerData *) vesdp)->verify_unused_temp_alloc
+    esdp->verify_unused_temp_alloc
 	= erts_alloc_get_verify_unused_temp_alloc(
-	    &((ErtsSchedulerData *) vesdp)->verify_unused_temp_alloc_data);
+	    &esdp->verify_unused_temp_alloc_data);
     ERTS_VERIFY_UNUSED_TEMP_ALLOC(NULL);
 #endif
 
     process_main();
     /* No schedulers should *ever* terminate */
-    erl_exit(ERTS_ABORT_EXIT, "Scheduler thread number %beu terminated\n",
-	     ((ErtsSchedulerData *) vesdp)->no);
+    erl_exit(ERTS_ABORT_EXIT,
+	     "Scheduler thread number %beu terminated\n",
+	     no);
     return NULL;
 }
 
+static ethr_tid aux_tid;
+
 void
 erts_start_schedulers(void)
 {
@@ -3810,6 +4433,10 @@ erts_start_schedulers(void)
 
     erts_block_system(0);
 
+    res = ethr_thr_create(&aux_tid, aux_thread, NULL, &opts);
+    if (res != 0)
+	erl_exit(1, "Failed to create aux thread\n");
+
     while (actual < wanted) {
 	ErtsSchedulerData *esdp = ERTS_SCHEDULER_IX(actual);
 	actual++;
@@ -5355,23 +5982,19 @@ Process *schedule(Process *p, int calls)
 	    }
 	}
 
-#if defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK) \
-	|| defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK)
 	{
-	    ErtsSchedulerSleepInfo *ssi = esdp->ssi;
-	    erts_aint32_t aux_work = erts_smp_atomic32_read_nob(&ssi->aux_work);
-	    if (aux_work) {
+	    erts_aint32_t aux_work;
+	    int leader_update = erts_thr_progress_update(esdp);
+	    aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
+	    if (aux_work | leader_update) {
 		erts_smp_runq_unlock(rq);
-#ifdef ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-		aux_work = blockable_aux_work(esdp, ssi, aux_work);
-#endif
-#ifdef ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK
-		nonblockable_aux_work(esdp, ssi, aux_work);
-#endif
+		if (leader_update)
+		    erts_thr_progress_leader_update(esdp);
+		if (aux_work)
+		    handle_aux_work(&esdp->aux_work_data, aux_work);
 		erts_smp_runq_lock(rq);
 	    }
 	}
-#endif
 
 	erts_smp_chk_system_block(prepare_for_block,
 				  resume_after_block,
@@ -5380,17 +6003,21 @@ Process *schedule(Process *p, int calls)
 	ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING);
 	ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
-#endif
+#else /* ERTS_SMP */
+	{
+	    erts_aint32_t aux_work;
+	    aux_work = erts_atomic32_read_acqb(&esdp->ssi->aux_work);
+	    if (aux_work)
+		handle_aux_work(&esdp->aux_work_data, aux_work);
+	}
+#endif /* ERTS_SMP */
 
 	ASSERT(rq->len == rq->procs.len + rq->ports.info.len);
 
-#ifndef ERTS_SMP
+	if (rq->len == 0 && !rq->misc.start) {
 
-	if (rq->len == 0 && !rq->misc.start)
-	    goto do_sys_schedule;
+#ifdef ERTS_SMP
 
-#else /* ERTS_SMP */
-	if (rq->len == 0 && !rq->misc.start) {
 	    ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
 	    rq->wakeup_other = 0;
@@ -5421,26 +6048,17 @@ Process *schedule(Process *p, int calls)
 		}
 	    }
 
+#endif
+
 	    scheduler_wait(&fcalls, esdp, rq);
 
+#ifdef ERTS_SMP
 	    non_empty_runq(rq);
+#endif
 
 	    goto check_activities_to_run;
 	}
-	else
-#endif /* ERTS_SMP */
-	if (fcalls > input_reductions && prepare_for_sys_schedule()) {
-	    int runnable;
-
-#ifdef ERTS_SMP
-	    runnable = 1;
-#else
-	do_sys_schedule:
-	    runnable = rq->len != 0;
-	    if (!runnable)
-		sched_waiting_sys(esdp->no, rq);
-#endif
-
+	else if (fcalls > input_reductions && prepare_for_sys_schedule()) {
 	    /*
 	     * Schedule system-level activities.
 	     */
@@ -5450,11 +6068,11 @@ Process *schedule(Process *p, int calls)
 
 	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
 
-#ifdef ERTS_SMP
-	    /* erts_sys_schedule_interrupt(0); */
+#if 0 /* Not needed since we wont wait in sys schedule */
+	    erts_sys_schedule_interrupt(0);
 #endif
 	    erts_smp_runq_unlock(rq);
-	    erl_sys_schedule(runnable);
+	    erl_sys_schedule(1);
 	    dt = erts_do_time_read_and_reset();
 	    if (dt) erts_bump_timer(dt);
 #ifdef ERTS_SMP
@@ -5462,8 +6080,6 @@ Process *schedule(Process *p, int calls)
 	    clear_sys_scheduling();
 	    goto continue_check_activities_to_run;
 #else
-	    if (!runnable)
-		sched_active_sys(esdp->no, rq);
 	    goto check_activities_to_run;
 #endif
 	}
@@ -5778,20 +6394,9 @@ erts_sched_stat_term(Process *p, int total)
 void
 erts_schedule_misc_op(void (*func)(void *), void *arg)
 {
-    ErtsRunQueue *rq;
-    ErtsMiscOpList *molp = misc_op_list_alloc();
     ErtsSchedulerData *esdp = erts_get_scheduler_data();
-
-    if (esdp) {
-	rq = esdp->run_queue;
-    } else {
-	/*
-	 * This can only happen when the sys msg dispatcher
-	 * thread schedules misc ops (this happens *very*
-	 * seldom; only when trace drivers are unloaded).
-	 */
-	rq =  ERTS_RUNQ_IX(0);
-    }
+    ErtsRunQueue *rq = esdp ? esdp->run_queue : ERTS_RUNQ_IX(0);
+    ErtsMiscOpList *molp = misc_op_list_alloc();
 
     erts_smp_runq_lock(rq);
 
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 739aef3130..43593f32d9 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -53,11 +53,17 @@ typedef struct process Process;
 #include "erl_time.h"
 #include "erl_atom_table.h"
 #include "external.h"
+#include "erl_mseg.h"
 
 #ifdef HIPE
 #include "hipe_process.h"
 #endif
 
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#define ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#include "erl_thr_progress.h"
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+
 struct ErtsNodesMonitor_;
 struct port;
 
@@ -242,16 +248,20 @@ typedef enum {
   | ERTS_SSI_FLG_WAITING				\
   | ERTS_SSI_FLG_SUSPENDED)
 
-#define ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK
-
-#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN	(((erts_aint32_t) 1) << 0)
-#define ERTS_SSI_AUX_WORK_MISC			(((erts_aint32_t) 1) << 1)
+#define ERTS_SSI_AUX_WORK_SET_TMO		(((erts_aint32_t) 1) << 0)
+#define ERTS_SSI_AUX_WORK_CHECK_CHILDREN	(((erts_aint32_t) 1) << 1)
+#define ERTS_SSI_AUX_WORK_MISC			(((erts_aint32_t) 1) << 2)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_LOWER_LIM	(((erts_aint32_t) 1) << 3)
+#define ERTS_SSI_AUX_WORK_FIX_ALLOC_DEALLOC	(((erts_aint32_t) 1) << 4)
+#ifdef ERTS_SMP
+#define ERTS_SSI_AUX_WORK_DD			(((erts_aint32_t) 1) << 5)
+#define ERTS_SSI_AUX_WORK_DD_THR_PRGR		(((erts_aint32_t) 1) << 6)
+#endif
+#define ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK	(((erts_aint32_t) 1) << 7)
 
-#define ERTS_SSI_BLOCKABLE_AUX_WORK_MASK \
-  (ERTS_SSI_AUX_WORK_CHECK_CHILDREN \
-   | ERTS_SSI_AUX_WORK_MISC)
-#define ERTS_SSI_NONBLOCKABLE_AUX_WORK_MASK \
-  (0)
+#if !HAVE_ERTS_MSEG
+#  undef ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK
+#endif
 
 typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
 
@@ -261,11 +271,13 @@ typedef struct {
 } ErtsSchedulerSleepList;
 
 struct ErtsSchedulerSleepInfo_ {
+#ifdef ERTS_SMP
     ErtsSchedulerSleepInfo *next;
     ErtsSchedulerSleepInfo *prev;
     erts_smp_atomic32_t flags;
     erts_tse_t *event;
-    erts_smp_atomic32_t aux_work;
+#endif
+    erts_atomic32_t aux_work;
 };
 
 /* times to reschedule low prio process before running */
@@ -386,6 +398,22 @@ do {								\
     (RQ)->wakeup_other_reds += (REDS);				\
 } while (0)
 
+typedef struct {
+    int sched_id;
+    ErtsSchedulerData *esdp;
+    ErtsSchedulerSleepInfo *ssi;
+    struct {
+	int ix;
+    } misc;
+#ifdef ERTS_SMP
+    struct {
+	ErtsThrPrgrVal thr_prgr;
+	void (*completed_callback)(void *);
+	void (*completed_arg)(void *);
+    } dd;
+#endif
+} ErtsAuxWorkData;
+
 struct ErtsSchedulerData_ {
 
 #ifdef ERTS_SMP
@@ -403,8 +431,8 @@ struct ErtsSchedulerData_ {
     ethr_tid tid;		/* Thread id */
     struct erl_bits_state erl_bits_state; /* erl_bits.c state */
     void *match_pseudo_process; /* erl_db_util.c:db_prog_match() */
-    ErtsSchedulerSleepInfo *ssi;
     Process *free_process;
+    ErtsThrPrgrData thr_progress_data;
 #endif
 #if !HEAP_ON_C_STACK
     Eterm tmp_heap[TMP_HEAP_SIZE];
@@ -413,16 +441,19 @@ struct ErtsSchedulerData_ {
     Eterm cmp_tmp_heap[CMP_TMP_HEAP_SIZE];
     Eterm erl_arith_tmp_heap[ERL_ARITH_TMP_HEAP_SIZE];
 #endif
-
+    ErtsSchedulerSleepInfo *ssi;
     Process *current_process;
     Uint no;			/* Scheduler number */
     struct port *current_port;
     ErtsRunQueue *run_queue;
     int virtual_reds;
     int cpu_id;			/* >= 0 when bound */
+    ErtsAuxWorkData aux_work_data;
 
     ErtsAtomCacheMap atom_cache_map;
 
+    ErtsSchedAllocData alloc_data;
+
 #ifdef ERTS_SMP
     /* NOTE: These fields are modified under held mutexes by other threads */
     erts_smp_atomic32_t chk_cpu_bind; /* Only used when common run queue */
@@ -1032,7 +1063,7 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags;
 
 void erts_pre_init_process(void);
 void erts_late_init_process(void);
-void erts_early_init_scheduling(void);
+void erts_early_init_scheduling(int);
 void erts_init_scheduling(int, int, int);
 
 ErtsProcList *erts_proclist_create(Process *);
@@ -1058,6 +1089,7 @@ erts_block_multi_scheduling(Process *, ErtsProcLocks, int, int);
 int erts_is_multi_scheduling_blocked(void);
 Eterm erts_multi_scheduling_blockers(Process *);
 void erts_start_schedulers(void);
+void erts_alloc_notify_delayed_dealloc(int);
 void erts_smp_notify_check_children_needed(void);
 void
 erts_smp_schedule_misc_aux_work(int ignore_self,
@@ -1065,6 +1097,7 @@ erts_smp_schedule_misc_aux_work(int ignore_self,
 				void (*func)(void *),
 				void *arg);
 #endif
+erts_aint32_t erts_set_aux_work_timeout(int, erts_aint32_t, int);
 void erts_sched_notify_check_cpu_bind(void);
 Uint erts_active_schedulers(void);
 void erts_init_process(int);
@@ -1148,6 +1181,7 @@ Sint erts_test_next_pid(int, Uint);
 Eterm erts_debug_processes(Process *c_p);
 Eterm erts_debug_processes_bif_info(Process *c_p);
 Uint erts_debug_nbalance(void);
+int erts_debug_wait_deallocations(Process *c_p);
 
 #ifdef ERTS_SMP
 #  define ERTS_GET_SCHEDULER_DATA_FROM_PROC(PROC) ((PROC)->scheduler_data)
diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h
index cd3b2182fd..97f250138e 100644
--- a/erts/emulator/beam/erl_process_lock.h
+++ b/erts/emulator/beam/erl_process_lock.h
@@ -651,7 +651,7 @@ ERTS_GLB_INLINE int erts_smp_proc_trylock(Process *, ErtsProcLocks);
 
 ERTS_GLB_INLINE void erts_smp_proc_inc_refc(Process *);
 ERTS_GLB_INLINE void erts_smp_proc_dec_refc(Process *);
-
+ERTS_GLB_INLINE void erts_smp_proc_add_refc(Process *, Sint32);
 
 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
 
@@ -737,6 +737,21 @@ ERTS_GLB_INLINE void erts_smp_proc_dec_refc(Process *p)
 #endif
 }
 
+ERTS_GLB_INLINE void erts_smp_proc_add_refc(Process *p, Sint32 refc)
+{
+#ifdef ERTS_SMP
+    Process *fp;
+    erts_pix_lock_t *pixlck = ERTS_PID2PIXLOCK(p->id);
+    erts_pix_lock(pixlck);
+    ERTS_LC_ASSERT(p->lock.refc > 0);
+    p->lock.refc += refc;
+    fp = p->lock.refc == 0 ? p : NULL; 
+    erts_pix_unlock(pixlck);
+    if (fp)
+	erts_free_proc(fp);
+#endif
+}
+
 #endif /* #if ERTS_GLB_INLINE_INCL_FUNC_DEF */
 
 #ifdef ERTS_SMP
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.c b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
new file mode 100644
index 0000000000..a7ccea7403
--- /dev/null
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.c
@@ -0,0 +1,305 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Scheduler specific pre-allocators. Each scheduler
+ *              thread allocates memory in its own private chunk of
+ *              memory. Memory blocks deallocated by remote
+ *              schedulers (or other threads) are passed back to
+ *              the chunk owner via a lock-free data structure.
+ *
+ * Author: 	Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#ifdef ERTS_SMP
+
+#include "erl_process.h"
+#include "erl_thr_progress.h"
+
+erts_sspa_data_t *
+erts_sspa_create(size_t blk_sz, int pa_size)
+{
+    erts_sspa_data_t *data;
+    size_t tot_size;
+    size_t chunk_mem_size;
+    char *p;
+    char *chunk_start;
+    int cix;
+    int no_blocks = pa_size;
+    int no_blocks_per_chunk;
+
+    if (erts_no_schedulers == 1)
+	no_blocks_per_chunk = no_blocks;
+    else {
+	int extra = (no_blocks - 1)/4 + 1;
+	if (extra == 0)
+	    extra = 1;
+	no_blocks_per_chunk = no_blocks;
+	no_blocks_per_chunk += extra*erts_no_schedulers;
+	no_blocks_per_chunk /= erts_no_schedulers;
+    }
+    no_blocks = no_blocks_per_chunk * erts_no_schedulers;
+    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_chunk_header_t));
+    chunk_mem_size += blk_sz * no_blocks_per_chunk;
+    chunk_mem_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(chunk_mem_size);
+    tot_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
+    tot_size += chunk_mem_size*erts_no_schedulers;
+
+    p = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_PRE_ALLOC_DATA, tot_size);
+    data = (erts_sspa_data_t *) p;
+    p += ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_data_t));
+    chunk_start = p;
+
+    data->chunks_mem_size = chunk_mem_size;
+    data->start = chunk_start;
+    data->end = chunk_start + chunk_mem_size*erts_no_schedulers;
+
+    /* Initialize all chunks */
+    for (cix = 0; cix < erts_no_schedulers; cix++) {
+	erts_sspa_chunk_t *chnk = erts_sspa_cix2chunk(data, cix);
+	erts_sspa_chunk_header_t *chdr = &chnk->aligned.header;
+	erts_sspa_blk_t *blk;
+	int i;
+
+	erts_atomic_init_nob(&chdr->tail.data.last, (erts_aint_t) &chdr->tail.data.marker);
+	erts_atomic_init_nob(&chdr->tail.data.marker.next_atmc, ERTS_AINT_NULL);
+	erts_atomic_init_nob(&chdr->tail.data.um_refc[0], 0);
+	erts_atomic_init_nob(&chdr->tail.data.um_refc[1], 0);
+	erts_atomic32_init_nob(&chdr->tail.data.um_refc_ix, 0);
+
+	chdr->head.no_thr_progress_check = 0;
+	chdr->head.used_marker = 1;
+	chdr->head.first = &chdr->tail.data.marker;
+	chdr->head.unref_end = &chdr->tail.data.marker;
+	chdr->head.next.thr_progress = erts_thr_progress_current();
+	chdr->head.next.thr_progress_reached = 1;
+	chdr->head.next.um_refc_ix = 1;
+	chdr->head.next.unref_end = &chdr->tail.data.marker;
+
+	p = &chnk->data[0];
+	chdr->local.first = (erts_sspa_blk_t *) p;
+	blk = (erts_sspa_blk_t *) p;
+	for (i = 0; i < no_blocks_per_chunk; i++) {
+	    blk = (erts_sspa_blk_t *) p;
+	    p += blk_sz;
+	    blk->next_ptr = (erts_sspa_blk_t *) p;
+	}
+
+	blk->next_ptr = NULL;
+	chdr->local.last = blk;
+	chdr->local.cnt = no_blocks_per_chunk;
+	chdr->local.lim = no_blocks_per_chunk / 3;
+
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+    }
+
+    return data;
+}
+
+static ERTS_INLINE erts_aint_t
+enqueue_remote_managed_thread(erts_sspa_chunk_header_t *chdr,
+			      erts_sspa_blk_t *this,
+			      int want_last)
+{
+    erts_aint_t ilast, itmp;
+
+    erts_atomic_init_nob(&this->next_atmc, ERTS_AINT_NULL);
+
+    /* Enqueue at end of list... */
+
+    ilast = erts_atomic_read_nob(&chdr->tail.data.last);
+    while (1) {
+	erts_sspa_blk_t *last = (erts_sspa_blk_t *) ilast;
+	itmp = erts_atomic_cmpxchg_mb(&last->next_atmc,
+				       (erts_aint_t) this,
+				       ERTS_AINT_NULL);
+	if (itmp == ERTS_AINT_NULL)
+	    break;
+	ilast = itmp;
+    }
+
+    /* Move last pointer forward... */
+    while (1) {
+	erts_aint_t itmp;
+	if (want_last) {
+	    if (erts_atomic_read_rb(&this->next_atmc) != ERTS_AINT_NULL) {
+		/* Someone else will move it forward */
+		return erts_atomic_read_nob(&chdr->tail.data.last);
+	    }
+	}
+	else {
+	    if (erts_atomic_read_nob(&this->next_atmc) != ERTS_AINT_NULL) {
+		/* Someone else will move it forward */
+		return ERTS_AINT_NULL;
+	    }
+	}
+	itmp = erts_atomic_cmpxchg_mb(&chdr->tail.data.last,
+				      (erts_aint_t) this,
+				      ilast);
+	if (ilast == itmp)
+	    return want_last ? (erts_aint_t) this : ERTS_AINT_NULL;
+	ilast = itmp;
+    }
+}
+
+void
+erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr, erts_sspa_blk_t *blk)
+{
+    int um_refc_ix = 0;
+    int managed_thread = erts_thr_progress_is_managed_thread();
+    if (!managed_thread) {
+	um_refc_ix = erts_atomic32_read_acqb(&chdr->tail.data.um_refc_ix);
+	while (1) {
+	    int tmp_um_refc_ix;
+	    erts_atomic_inc_acqb(&chdr->tail.data.um_refc[um_refc_ix]);
+	    tmp_um_refc_ix = erts_atomic32_read_acqb(&chdr->tail.data.um_refc_ix);
+	    if (tmp_um_refc_ix == um_refc_ix)
+		break;
+	    erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]);
+	    um_refc_ix = tmp_um_refc_ix;
+	}
+    }
+
+    (void) enqueue_remote_managed_thread(chdr, blk, 0);
+
+    if (!managed_thread)
+	erts_atomic_dec_relb(&chdr->tail.data.um_refc[um_refc_ix]);
+}
+
+static ERTS_INLINE void
+fetch_remote(erts_sspa_chunk_header_t *chdr, int max)
+{
+    int new_local = 0;
+
+    if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
+	chdr->head.no_thr_progress_check++;
+    else {
+	erts_aint_t ilast;
+
+	chdr->head.no_thr_progress_check = 0;
+
+	ilast = erts_atomic_read_nob(&chdr->tail.data.last);
+	if (((erts_sspa_blk_t *) ilast) == &chdr->tail.data.marker
+	    && chdr->head.first == &chdr->tail.data.marker)
+	    return;
+
+	if (chdr->head.next.thr_progress_reached
+	    || erts_thr_progress_has_reached(chdr->head.next.thr_progress)) {
+	    int um_refc_ix;
+	    chdr->head.next.thr_progress_reached = 1;
+	    um_refc_ix = chdr->head.next.um_refc_ix;
+	    if (erts_atomic_read_acqb(&chdr->tail.data.um_refc[um_refc_ix]) == 0) {
+
+		/* Move unreferenced end pointer forward... */
+
+		chdr->head.unref_end = chdr->head.next.unref_end;
+
+		if (!chdr->head.used_marker
+		    && chdr->head.unref_end == (erts_sspa_blk_t *) ilast) {
+		    /* Need to equeue marker */
+		    chdr->head.used_marker = 1;
+		    ilast = enqueue_remote_managed_thread(chdr,
+							  &chdr->tail.data.marker,
+							  1);
+		}
+
+		if (chdr->head.unref_end == (erts_sspa_blk_t *) ilast)
+		    ERTS_THR_MEMORY_BARRIER;
+		else {
+		    chdr->head.next.unref_end = (erts_sspa_blk_t *) ilast;
+		    ERTS_THR_MEMORY_BARRIER;
+		    chdr->head.next.thr_progress = erts_thr_progress_later();
+		    erts_atomic32_set_relb(&chdr->tail.data.um_refc_ix,
+					   um_refc_ix);
+		    chdr->head.next.um_refc_ix = um_refc_ix == 0 ? 1 : 0;
+		    chdr->head.next.thr_progress_reached = 0;
+		}
+	    }
+	}
+    }
+
+    if (new_local < max && chdr->head.first != chdr->head.unref_end) {
+	erts_sspa_blk_t *first, *this, *next, *last;
+	first = chdr->head.first;
+	if (first == &chdr->tail.data.marker) {
+	    chdr->head.used_marker = 0;
+	    first = ((erts_sspa_blk_t *)
+		     erts_atomic_read_nob(&first->next_atmc));
+	    chdr->head.first = first;
+	}
+	if (first != chdr->head.unref_end) {
+
+	    ERTS_SSPA_DBG_CHK_LCL(chdr);
+
+	    this = last = first;
+	    do {
+		next = (erts_sspa_blk_t *) erts_atomic_read_nob(&this->next_atmc);
+		if (this == &chdr->tail.data.marker)
+		    chdr->head.used_marker = 0;
+		else {
+		    last->next_ptr = this;
+		    last = this;
+		    new_local++;
+		}
+		this = next;
+	    } while (new_local < max && this != chdr->head.unref_end);
+	    chdr->head.first = this;
+	    if (!chdr->local.last)
+		chdr->local.first = first;
+	    else
+		chdr->local.last->next_ptr = first;
+	    chdr->local.last = last;
+	    last->next_ptr = NULL;
+	    chdr->local.cnt += new_local;
+
+	    ERTS_SSPA_DBG_CHK_LCL(chdr);
+	}
+    }
+
+}
+
+erts_sspa_blk_t *
+erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
+			       erts_sspa_blk_t *old_res)
+{
+    erts_sspa_blk_t *res = old_res;
+
+    fetch_remote(chdr, ERTS_SSPA_MAX_GET_NEW_LOCAL);
+
+    if (!res && chdr->local.first) {
+
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+
+	res = chdr->local.first;
+	chdr->local.first = res->next_ptr;
+	chdr->local.cnt--;
+	if (!chdr->local.first)
+	    chdr->local.last = NULL;
+
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+    }
+
+    return res;
+}
+
+#endif /* ERTS_SMP */
diff --git a/erts/emulator/beam/erl_sched_spec_pre_alloc.h b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
new file mode 100644
index 0000000000..d36066c399
--- /dev/null
+++ b/erts/emulator/beam/erl_sched_spec_pre_alloc.h
@@ -0,0 +1,239 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Scheduler specific pre-allocators. Each scheduler
+ *              thread allocates memory in its own private chunk of
+ *              memory. Memory blocks deallocated by remote
+ *              schedulers (or other threads) are passed back to
+ *              the chunk owner via a lock-free data structure.
+ *
+ * Author: 	Rickard Green
+ */
+
+#ifndef ERTS_SCHED_SPEC_PRE_ALLOC_H__
+#define ERTS_SCHED_SPEC_PRE_ALLOC_H__
+
+#ifdef ERTS_SMP
+
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#define ERL_THR_PROGRESS_TSD_TYPE_ONLY
+#include "erl_thr_progress.h"
+#undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
+
+#ifdef DEBUG
+#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)				\
+do {									\
+    ASSERT((void *) (C) < (void *) (P));				\
+    ASSERT((void *) (P)							\
+	   < (void *) (((char *) (C)) + (A)->chunks_mem_size));		\
+} while (0)
+#else
+#define ERTS_SPPA_DBG_CHK_IN_CHNK(A, C, P)
+#endif
+
+#ifdef DEBUG
+extern Uint erts_no_schedulers;
+#endif
+
+#define ERTS_SSPA_FORCE_THR_CHECK_PROGRESS 10
+#define ERTS_SSPA_MAX_GET_NEW_LOCAL 5
+
+typedef struct {
+    char *start;
+    char *end;
+    int chunks_mem_size;
+} erts_sspa_data_t;
+
+typedef union erts_sspa_blk_t_ erts_sspa_blk_t;
+union erts_sspa_blk_t_ {
+    erts_atomic_t next_atmc;
+    erts_sspa_blk_t *next_ptr;
+};
+
+typedef struct {
+    erts_sspa_blk_t *first;
+    erts_sspa_blk_t *last;
+    int cnt;
+    int lim;
+} erts_sspa_local_freelist_t;
+
+typedef struct {
+    erts_sspa_blk_t marker;
+    erts_atomic_t last;
+    erts_atomic_t um_refc[2];
+    erts_atomic32_t um_refc_ix;
+} erts_sspa_tail_t;
+
+typedef struct {
+    /*
+     * This structure needs to be cache line aligned for best
+     * performance.
+     */
+    union {
+	/* Modified by threads returning memory to this chunk */
+	erts_sspa_tail_t data;
+	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(erts_sspa_tail_t))];
+    } tail;
+    /*
+     * Everything below this point is *only* accessed by the
+     * thread owning this chunk.
+     */
+    struct {
+	int no_thr_progress_check;
+	int used_marker;
+	erts_sspa_blk_t *first;
+	erts_sspa_blk_t *unref_end;
+	struct {
+	    ErtsThrPrgrVal thr_progress;
+	    int thr_progress_reached;
+	    int um_refc_ix;
+	    erts_sspa_blk_t *unref_end;
+	} next;
+    } head;
+    erts_sspa_local_freelist_t local;
+} erts_sspa_chunk_header_t;
+
+typedef struct {
+    union {
+	erts_sspa_chunk_header_t header;
+	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(
+		sizeof(erts_sspa_chunk_header_t))];
+    } aligned;
+    char data[1];
+} erts_sspa_chunk_t;
+
+#ifdef DEBUG
+ERTS_GLB_INLINE void
+check_local_list(erts_sspa_chunk_header_t *chdr);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+ERTS_GLB_INLINE void
+check_local_list(erts_sspa_chunk_header_t *chdr)
+{
+    erts_sspa_blk_t *blk;
+    int n = 0;
+    for (blk = chdr->local.first; blk; blk = blk->next_ptr)
+	n++;
+    ASSERT(n == chdr->local.cnt);
+}
+#endif
+#define ERTS_SSPA_DBG_CHK_LCL(CHDR) check_local_list((CHDR))
+#else
+#define ERTS_SSPA_DBG_CHK_LCL(CHDR)
+#endif
+
+erts_sspa_data_t *erts_sspa_create(size_t blk_sz,
+				   int pa_size);
+void erts_sspa_remote_free(erts_sspa_chunk_header_t *chdr,
+			   erts_sspa_blk_t *blk);
+erts_sspa_blk_t *erts_sspa_process_remote_frees(erts_sspa_chunk_header_t *chdr,
+						erts_sspa_blk_t *old_res);
+
+ERTS_GLB_INLINE erts_sspa_chunk_t *erts_sspa_cix2chunk(erts_sspa_data_t *data,
+						       int cix);
+ERTS_GLB_INLINE int erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr);
+ERTS_GLB_INLINE char *erts_sspa_alloc(erts_sspa_data_t *data, int cix);
+ERTS_GLB_INLINE int erts_sspa_free(erts_sspa_data_t *data, int cix, char *blk);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE erts_sspa_chunk_t *
+erts_sspa_cix2chunk(erts_sspa_data_t *data, int cix)
+{
+    ASSERT(0 <= cix && cix < erts_no_schedulers);
+    return (erts_sspa_chunk_t *) (data->start + cix*data->chunks_mem_size);
+}
+
+ERTS_GLB_INLINE int
+erts_sspa_ptr2cix(erts_sspa_data_t *data, void *ptr)
+{
+    int cix;
+    size_t diff;
+    if ((char *) ptr < data->start || data->end <= (char *) ptr)
+	return -1;
+    diff = ((char *) ptr) - data->start;
+    cix = (int) diff / data->chunks_mem_size;
+    ASSERT(0 <= cix && cix < erts_no_schedulers);
+    return cix;
+}
+
+ERTS_GLB_INLINE char *
+erts_sspa_alloc(erts_sspa_data_t *data, int cix)
+{
+    erts_sspa_chunk_t *chnk;
+    erts_sspa_chunk_header_t *chdr;
+    erts_sspa_blk_t *res;
+
+    chnk = erts_sspa_cix2chunk(data, cix);
+    chdr = &chnk->aligned.header;
+    res = chdr->local.first;
+    ERTS_SSPA_DBG_CHK_LCL(chdr);
+    if (res) {
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+	chdr->local.first = res->next_ptr;
+	chdr->local.cnt--;
+	if (!chdr->local.first)
+	    chdr->local.last = NULL;
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+    }
+    if (chdr->local.cnt <= chdr->local.lim)
+	return (char *) erts_sspa_process_remote_frees(chdr, res);
+    else if (chdr->head.no_thr_progress_check < ERTS_SSPA_FORCE_THR_CHECK_PROGRESS)
+	chdr->head.no_thr_progress_check++;
+    ASSERT(res);
+    return (char *) res;
+}
+
+ERTS_GLB_INLINE int
+erts_sspa_free(erts_sspa_data_t *data, int cix, char *cblk)
+{
+    erts_sspa_chunk_t *chnk;
+    erts_sspa_chunk_header_t *chdr;
+    erts_sspa_blk_t *blk = (erts_sspa_blk_t *) cblk;
+    int chnk_cix = erts_sspa_ptr2cix(data, blk);
+
+    if (chnk_cix < 0)
+	return 0;
+
+    chnk = erts_sspa_cix2chunk(data, chnk_cix);
+    chdr = &chnk->aligned.header;
+    if (chnk_cix != cix) {
+	/* Remote chunk */
+	erts_sspa_remote_free(chdr, blk);
+    }
+    else {
+	/* Local chunk */
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+	blk->next_ptr = chdr->local.first;
+	chdr->local.first = blk;
+	if (!chdr->local.last)
+	    chdr->local.last = blk;
+	chdr->local.cnt++;
+	ERTS_SSPA_DBG_CHK_LCL(chdr);
+    }
+
+    return 1;
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
+#endif /* ERTS_SMP */
+
+#endif /* ERTS_SCHED_SPEC_PRE_ALLOC_H__ */
diff --git a/erts/emulator/beam/erl_thr_progress.c b/erts/emulator/beam/erl_thr_progress.c
new file mode 100644
index 0000000000..f96ae4b70d
--- /dev/null
+++ b/erts/emulator/beam/erl_thr_progress.c
@@ -0,0 +1,1010 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Thread progress information. Used by lock free algorithms
+ *              to determine when all involved threads are guaranteed to
+ *              have passed a specific point of execution.
+ *
+ *              Usage instructions below.
+ *
+ * Author: 	Rickard Green
+ */
+
+/*
+ * ------ Usage instructions -----------------------------------------------
+ *
+ * This module keeps track of the progress of a set of managed threads. Only
+ * threads that behave well can be allowed to be managed. A managed thread
+ * should update its thread progress frequently. Currently only scheduler
+ * threads and the aux_thread are managed threads. We typically do not want
+ * any async threads as managed threads since they cannot guarantee a
+ * frequent update of thread progress, since they execute user implemented
+ * driver code.
+ *
+ * erts_thr_progress_current() returns the global current thread progress
+ * value of managed threads. I.e., the latest progress value that all
+ * managed threads have reached. Thread progress values are opaque.
+ *
+ * erts_thr_progress_has_reached(VAL) returns a value != 0 if current
+ * global thread progress has reached or passed VAL.
+ *
+ * erts_thr_progress_later() returns a thread progress value in the future
+ * which no managed thread have yet reached.
+ *
+ * All threads issue a full memory barrier when reaching a new thread
+ * progress value. They only reach new thread progress values in specific
+ * controlled states when calling erts_thr_progress_update(). Schedulers
+ * call erts_thr_progress_update() in between execution of processes,
+ * when going to sleep and when waking up.
+ *
+ * Sleeping managed threads are considered to have reached next thread
+ * progress value immediately. They are not woken and do therefore not
+ * issue any memory barriers when reaching a new thread progress value.
+ * A sleeping thread do however immediately issue a memory barrier upon
+ * wakeup.
+ *
+ * Both managed and registered unmanaged threads may request wakeup when
+ * the global thread progress reach a certain value using
+ * erts_thr_progress_wakeup().
+ *
+ * Note that thread progress values are opaque, and that you are only
+ * allowed to use thread progress values retrieved from this API!
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <stddef.h> /* offsetof() */
+#include "erl_thr_progress.h"
+
+#ifdef ERTS_SMP
+
+/*
+ * We use a 64-bit value for thread progress. By this wrapping of
+ * the thread progress will more or less never occur.
+ *
+ * On 32-bit systems we therefore need a double word atomic.
+ */
+
+#define ERTS_THR_PRGR_PRINT_LEADER 0
+#define ERTS_THR_PRGR_PRINT_VAL 0
+
+#define ERTS_THR_PRGR_LFLG_NO_LEADER (((erts_aint32_t) 1) << 31)
+#define ERTS_THR_PRGR_LFLG_ACTIVE_MASK (~ERTS_THR_PRGR_LFLG_NO_LEADER)
+
+#define ERTS_THR_PRGR_LFLGS_ACTIVE(LFLGS) \
+    ((LFLGS) & ERTS_THR_PRGR_LFLG_ACTIVE_MASK)
+
+#define ERTS_THR_PRGR_LFLGS_ALL_WAITING(LFLGS) \
+    (((LFLGS) & (ERTS_THR_PRGR_LFLG_NO_LEADER \
+		 |ERTS_THR_PRGR_LFLG_ACTIVE_MASK)) \
+     == ERTS_THR_PRGR_LFLG_NO_LEADER)
+
+#define read_acqb erts_thr_prgr_read_acqb__
+
+#ifdef ARCH_64
+
+static ERTS_INLINE void
+set_mb(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    erts_atomic_set_mb(atmc, val);
+}
+
+static ERTS_INLINE void
+set_nob(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    erts_atomic_set_nob(atmc, val);
+}
+
+static ERTS_INLINE ErtsThrPrgrVal
+read_nob(ERTS_THR_PRGR_ATOMIC *atmc)
+{
+    return (ErtsThrPrgrVal) erts_atomic_read_nob(atmc);
+}
+
+static ERTS_INLINE void
+init_nob(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    erts_atomic_init_nob(atmc, val);
+}
+
+#else
+
+#undef dw_sint_to_val
+#define dw_sint_to_val erts_thr_prgr_dw_sint_to_val__
+
+static void
+val_to_dw_sint(ethr_dw_sint_t *dw_sint, ErtsThrPrgrVal val)
+{
+#ifdef ETHR_SU_DW_NAINT_T__
+    dw_sint->dw_sint = (ETHR_SU_DW_NAINT_T__) val;
+#else
+    dw_sint->sint[ETHR_DW_SINT_LOW_WORD]
+	= (ethr_sint_t) (val & 0xffffffff);
+    dw_sint->sint[ETHR_DW_SINT_HIGH_WORD]
+	= (ethr_sint_t) ((val >> 32) & 0xffffffff);
+#endif
+}
+
+static ERTS_INLINE void
+set_mb(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    ethr_dw_sint_t dw_sint;
+    val_to_dw_sint(&dw_sint, val);
+    erts_dw_atomic_set_mb(atmc, &dw_sint);
+}
+
+static ERTS_INLINE void
+set_nob(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    ethr_dw_sint_t dw_sint;
+    val_to_dw_sint(&dw_sint, val);
+    erts_dw_atomic_set_nob(atmc, &dw_sint);
+}
+
+static ERTS_INLINE ErtsThrPrgrVal
+read_nob(ERTS_THR_PRGR_ATOMIC *atmc)
+{
+    ethr_dw_sint_t dw_sint;
+    erts_dw_atomic_read_nob(atmc, &dw_sint);
+    return erts_thr_prgr_dw_sint_to_val__(&dw_sint);
+}
+
+static ERTS_INLINE void
+init_nob(ERTS_THR_PRGR_ATOMIC *atmc, ErtsThrPrgrVal val)
+{
+    ethr_dw_sint_t dw_sint;
+    val_to_dw_sint(&dw_sint, val);
+    erts_dw_atomic_init_nob(atmc, &dw_sint);
+}
+
+#endif
+
+/* #define ERTS_THR_PROGRESS_STATE_DEBUG */
+
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+
+#ifdef __GNUC__
+#warning "Thread progress state debug is on"
+#endif
+
+#define ERTS_THR_PROGRESS_STATE_DEBUG_LEADER	(((erts_aint32_t) 1) << 0)
+#define ERTS_THR_PROGRESS_STATE_DEBUG_ACTIVE	(((erts_aint32_t) 1) << 1)
+
+#define ERTS_THR_PROGRESS_STATE_DEBUG_INIT(ID)						\
+    erts_atomic32_init_nob(&intrnl->thr[(ID)].data.state_debug,				\
+			   ERTS_THR_PROGRESS_STATE_DEBUG_ACTIVE)
+
+#define ERTS_THR_PROGRESS_STATE_DEBUG_SET_ACTIVE(ID, ON)				\
+do {											\
+    erts_aint32_t state_debug__;							\
+    state_debug__ = erts_atomic32_read_nob(&intrnl->thr[(ID)].data.state_debug);	\
+    if ((ON))										\
+	state_debug__ |= ERTS_THR_PROGRESS_STATE_DEBUG_ACTIVE;				\
+    else										\
+	state_debug__ &= ~ERTS_THR_PROGRESS_STATE_DEBUG_ACTIVE;				\
+    erts_atomic32_set_nob(&intrnl->thr[(ID)].data.state_debug, state_debug__);		\
+} while (0)
+
+#define ERTS_THR_PROGRESS_STATE_DEBUG_SET_LEADER(ID, ON)				\
+do {											\
+    erts_aint32_t state_debug__;							\
+    state_debug__ = erts_atomic32_read_nob(&intrnl->thr[(ID)].data.state_debug);	\
+    if ((ON))										\
+	state_debug__ |= ERTS_THR_PROGRESS_STATE_DEBUG_LEADER;				\
+    else										\
+	state_debug__ &= ~ERTS_THR_PROGRESS_STATE_DEBUG_LEADER;				\
+    erts_atomic32_set_nob(&intrnl->thr[(ID)].data.state_debug, state_debug__);		\
+} while (0)
+
+#else
+
+#define ERTS_THR_PROGRESS_STATE_DEBUG_INIT(ID)
+#define ERTS_THR_PROGRESS_STATE_DEBUG_SET_ACTIVE(ID, ON)
+#define ERTS_THR_PROGRESS_STATE_DEBUG_SET_LEADER(ID, ON)
+
+#endif /* ERTS_THR_PROGRESS_STATE_DEBUG */
+
+#define ERTS_THR_PRGR_BM_BITS 32
+#define ERTS_THR_PRGR_BM_SHIFT 5
+#define ERTS_THR_PRGR_BM_MASK 0x1f
+
+#define ERTS_THR_PRGR_WAKEUP_DATA_MASK (ERTS_THR_PRGR_WAKEUP_DATA_SIZE - 1)
+
+#define ERTS_THR_PRGR_WAKEUP_IX(V) \
+    ((int) ((V) & ERTS_THR_PRGR_WAKEUP_DATA_MASK))
+
+typedef struct {
+    erts_atomic32_t len;
+    int id[1];
+} ErtsThrPrgrManagedWakeupData;
+
+typedef struct {
+    erts_atomic32_t len;
+    int high_sz;
+    int low_sz;
+    erts_atomic32_t *high;
+    erts_atomic32_t *low;
+} ErtsThrPrgrUnmanagedWakeupData;
+
+typedef struct {
+    erts_atomic32_t lflgs;
+
+    erts_atomic32_t pref_wakeup_used;
+    erts_atomic32_t managed_id;
+    erts_atomic32_t unmanaged_id;    
+} ErtsThrPrgrMiscVolatile;
+
+typedef struct {
+    ERTS_THR_PRGR_ATOMIC current;
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+    erts_atomic32_t state_debug;
+#endif
+} ErtsThrPrgrElement;
+
+typedef union {
+    ErtsThrPrgrElement data;
+    char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsThrPrgrElement))];
+} ErtsThrPrgrArray;
+
+typedef struct {
+    union {
+	ErtsThrPrgrMiscVolatile tile;
+	char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(
+		sizeof(ErtsThrPrgrMiscVolatile))];
+    } vola;
+    ErtsThrPrgrArray *thr;
+    struct {
+	int no;
+	ErtsThrPrgrWakeupCallback *callback;
+	ErtsThrPrgrManagedWakeupData *data[ERTS_THR_PRGR_WAKEUP_DATA_SIZE];
+    } managed;
+    struct {
+	int no;
+	ErtsThrPrgrWakeupCallback *callback;
+	ErtsThrPrgrUnmanagedWakeupData *data[ERTS_THR_PRGR_WAKEUP_DATA_SIZE];
+    } unmanaged;
+} ErtsThrPrgrInternalData;
+
+static ErtsThrPrgrInternalData *intrnl;
+
+ErtsThrPrgr erts_thr_prgr__;
+
+erts_tsd_key_t erts_thr_prgr_data_key__;
+
+static void handle_wakeup_requests(ErtsThrPrgrVal current);
+static int got_sched_wakeups(void);
+
+static ERTS_INLINE void
+wakeup_managed(int id)
+{
+    ErtsThrPrgrWakeupCallback *wdp = &intrnl->managed.callback[id];
+    ASSERT(0 <= id && id < intrnl->managed.no);
+    wdp->wakeup(wdp->arg);
+}
+
+
+static ERTS_INLINE void
+wakeup_unmanaged(int id)
+{
+    ErtsThrPrgrWakeupCallback *wdp = &intrnl->unmanaged.callback[id];
+    ASSERT(0 <= id && id < intrnl->unmanaged.no);
+    wdp->wakeup(wdp->arg);
+}
+
+static ERTS_INLINE ErtsThrPrgrData *
+thr_prgr_data(ErtsSchedulerData *esdp)
+{
+    ErtsThrPrgrData *tpd;
+    if (esdp)
+	tpd = &esdp->thr_progress_data;
+    else
+	tpd = erts_tsd_get(erts_thr_prgr_data_key__);
+    ASSERT(tpd);
+    return tpd;
+}
+
+void
+erts_thr_progress_pre_init(void)
+{
+    intrnl = NULL;
+    erts_tsd_key_create(&erts_thr_prgr_data_key__);
+    init_nob(&erts_thr_prgr__.current, 0);
+}
+
+void
+erts_thr_progress_init(int no_schedulers, int managed, int unmanaged)
+{
+    int i, j, um_low, um_high;
+    char *ptr;
+    size_t cb_sz, intrnl_sz, thr_arr_sz, m_wakeup_size, um_wakeup_size,
+	tot_size;
+
+    intrnl_sz = sizeof(ErtsThrPrgrInternalData);
+    intrnl_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(intrnl_sz);
+
+    cb_sz = sizeof(ErtsThrPrgrWakeupCallback)*(managed+unmanaged);
+    cb_sz = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(cb_sz);
+
+    thr_arr_sz = sizeof(ErtsThrPrgrArray)*managed;
+    ASSERT(thr_arr_sz == ERTS_ALC_CACHE_LINE_ALIGN_SIZE(thr_arr_sz));
+
+    m_wakeup_size = sizeof(ErtsThrPrgrManagedWakeupData);
+    m_wakeup_size += (managed - 1)*sizeof(int);
+    m_wakeup_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(m_wakeup_size);
+
+    um_low = (unmanaged - 1)/ERTS_THR_PRGR_BM_BITS + 1;
+    um_high = (um_low - 1)/ERTS_THR_PRGR_BM_BITS + 1;
+
+    um_wakeup_size = sizeof(ErtsThrPrgrUnmanagedWakeupData);
+    um_wakeup_size += (um_high + um_low)*sizeof(erts_atomic32_t);
+    um_wakeup_size = ERTS_ALC_CACHE_LINE_ALIGN_SIZE(um_wakeup_size);
+
+    tot_size = intrnl_sz;
+    tot_size += cb_sz;
+    tot_size += thr_arr_sz;
+    tot_size += m_wakeup_size*ERTS_THR_PRGR_WAKEUP_DATA_SIZE;
+    tot_size += um_wakeup_size*ERTS_THR_PRGR_WAKEUP_DATA_SIZE;
+
+    ptr = erts_alloc_permanent_cache_aligned(ERTS_ALC_T_THR_PRGR_IDATA,
+					     tot_size);
+
+    intrnl = (ErtsThrPrgrInternalData *) ptr;
+    ptr += intrnl_sz;
+
+    erts_atomic32_init_nob(&intrnl->vola.tile.lflgs,
+			   ERTS_THR_PRGR_LFLG_NO_LEADER);
+    erts_atomic32_init_nob(&intrnl->vola.tile.pref_wakeup_used, 0);
+    erts_atomic32_init_nob(&intrnl->vola.tile.managed_id, no_schedulers);
+    erts_atomic32_init_nob(&intrnl->vola.tile.unmanaged_id, -1);
+
+    intrnl->thr = (ErtsThrPrgrArray *) ptr;
+    ptr += thr_arr_sz;
+    for (i = 0; i < managed; i++)
+	init_nob(&intrnl->thr[i].data.current, 0);
+
+    intrnl->managed.callback = (ErtsThrPrgrWakeupCallback *) ptr;
+    intrnl->unmanaged.callback = &intrnl->managed.callback[managed];
+    ptr += cb_sz;
+
+    intrnl->managed.no = managed;
+    for (i = 0; i < managed; i++) {
+	intrnl->managed.callback[i].arg = NULL;
+	intrnl->managed.callback[i].wakeup = NULL;
+    }
+
+    intrnl->unmanaged.no = unmanaged;
+    for (i = 0; i < unmanaged; i++) {
+	intrnl->unmanaged.callback[i].arg = NULL;
+	intrnl->unmanaged.callback[i].wakeup = NULL;
+    }
+
+    for (i = 0; i < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; i++) {
+	intrnl->managed.data[i] = (ErtsThrPrgrManagedWakeupData *) ptr;
+	erts_atomic32_init_nob(&intrnl->managed.data[i]->len, 0);
+	ptr += m_wakeup_size;
+    }
+
+    for (i = 0; i < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; i++) {
+	erts_atomic32_t *bm;
+	intrnl->unmanaged.data[i] = (ErtsThrPrgrUnmanagedWakeupData *) ptr;
+	erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->len, 0);
+	bm = (erts_atomic32_t *) (ptr + sizeof(ErtsThrPrgrUnmanagedWakeupData));
+	intrnl->unmanaged.data[i]->high = bm;
+	intrnl->unmanaged.data[i]->high_sz = um_high;
+	for (j = 0; j < um_high; j++)
+	    erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->high[j], 0);
+	intrnl->unmanaged.data[i]->low
+	    = &intrnl->unmanaged.data[i]->high[um_high];	
+	intrnl->unmanaged.data[i]->low_sz = um_low;
+	for (j = 0; j < um_low; j++)
+	    erts_atomic32_init_nob(&intrnl->unmanaged.data[i]->low[j], 0);
+	ptr += um_wakeup_size;
+    }
+    ERTS_THR_MEMORY_BARRIER;
+}
+
+static void
+init_wakeup_request_array(ErtsThrPrgrVal *w)
+{
+    int i;
+    ErtsThrPrgrVal current;
+
+    current = read_acqb(&erts_thr_prgr__.current);
+    for (i = 0; i < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; i++) {
+	w[i] = current - ((ErtsThrPrgrVal) (ERTS_THR_PRGR_WAKEUP_DATA_SIZE + i));
+	if (w[i] > current)
+	    w[i]--;
+    }
+}
+
+void
+erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrWakeupCallback *callback)
+{
+    ErtsThrPrgrData *tpd;
+    if (erts_tsd_get(erts_thr_prgr_data_key__))
+	erl_exit(ERTS_ABORT_EXIT,
+		 "%s:%d:%s(): Double register of thread\n",
+		 __FILE__, __LINE__, __func__);
+    /*
+     * We only allocate the part up to the leader field
+     * which is the first field only used by managed threads
+     */
+    tpd = erts_alloc(ERTS_ALC_T_THR_PRGR_DATA,
+		     offsetof(ErtsThrPrgrData, leader));
+    tpd->id = (int) erts_atomic32_inc_read_nob(&intrnl->vola.tile.unmanaged_id);
+    tpd->is_managed = 0;
+    ASSERT(tpd->id >= 0);
+    if (tpd->id >= intrnl->unmanaged.no)
+	erl_exit(ERTS_ABORT_EXIT,
+		 "%s:%d:%s(): Too many unmanaged registered threads\n",
+		 __FILE__, __LINE__, __func__);
+
+    init_wakeup_request_array(&tpd->wakeup_request[0]);
+    erts_tsd_set(erts_thr_prgr_data_key__, (void *) tpd);
+
+    intrnl->unmanaged.callback[tpd->id] = *callback;
+}
+
+
+void
+erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
+					  ErtsThrPrgrWakeupCallback *callback,
+					  int pref_wakeup)
+{
+    ErtsThrPrgrData *tpd;
+    if (erts_tsd_get(erts_thr_prgr_data_key__))
+	erl_exit(ERTS_ABORT_EXIT,
+		 "%s:%d:%s(): Double register of thread\n",
+		 __FILE__, __LINE__, __func__);
+    if (esdp)
+	tpd = &esdp->thr_progress_data;
+    else
+	tpd = erts_alloc(ERTS_ALC_T_THR_PRGR_DATA, sizeof(ErtsThrPrgrData));
+
+    if (pref_wakeup
+	&& !erts_atomic32_xchg_nob(&intrnl->vola.tile.pref_wakeup_used, 1))
+	tpd->id = 0;
+    else if (esdp)
+	tpd->id = (int) esdp->no;
+    else
+	tpd->id = erts_atomic32_inc_read_nob(&intrnl->vola.tile.managed_id);
+    ASSERT(tpd->id >= 0);
+    if (tpd->id >= intrnl->managed.no)
+	erl_exit(ERTS_ABORT_EXIT,
+		 "%s:%d:%s(): Too many managed registered threads\n",
+		 __FILE__, __LINE__, __func__);
+
+    tpd->is_managed = 1;
+
+    init_wakeup_request_array(&tpd->wakeup_request[0]);
+
+    ERTS_THR_PROGRESS_STATE_DEBUG_INIT(tpd->id);
+
+    tpd->leader = 0;
+    tpd->active = 1;
+    tpd->previous.local = 0;
+    tpd->previous.current = ERTS_THR_PRGR_VAL_WAITING;
+    erts_tsd_set(erts_thr_prgr_data_key__, (void *) tpd);
+
+    erts_atomic32_inc_nob(&intrnl->vola.tile.lflgs);
+    intrnl->managed.callback[tpd->id] = *callback;
+}
+
+static ERTS_INLINE int
+leader_update(ErtsThrPrgrData *tpd)
+{
+    if (tpd->leader) {
+	erts_aint32_t lflgs;
+	ErtsThrPrgrVal next;
+	int ix, sz, make_progress;
+
+	if (tpd->previous.current == ERTS_THR_PRGR_VAL_WAITING) {
+	    /* Took over as leader from another thread */
+	    tpd->previous.current = read_acqb(&erts_thr_prgr__.current);
+	    tpd->previous.next = tpd->previous.current;
+	    tpd->previous.next++;
+	    if (tpd->previous.next == ERTS_THR_PRGR_VAL_WAITING)
+		tpd->previous.next = 0;
+	}
+
+	if (tpd->previous.local == tpd->previous.current) {
+	    ErtsThrPrgrVal val = tpd->previous.current + 1;
+	    if (val == ERTS_THR_PRGR_VAL_WAITING)
+		val = 0;
+	    tpd->previous.local = val;
+	    set_mb(&intrnl->thr[tpd->id].data.current, val);
+	}
+
+	next = tpd->previous.next;
+
+	make_progress = 1;
+	sz = intrnl->managed.no;
+	for (ix = 0; ix < sz; ix++) {
+	    ErtsThrPrgrVal tmp;
+	    tmp = read_nob(&intrnl->thr[ix].data.current);
+	    if (tmp != next && tmp != ERTS_THR_PRGR_VAL_WAITING) {
+		make_progress = 0;
+		ASSERT(erts_thr_progress_has_passed__(next, tmp));
+		break;
+	    }
+	}
+
+	if (make_progress) {
+	    ErtsThrPrgrVal current = next;
+
+	    next++;
+	    if (next == ERTS_THR_PRGR_VAL_WAITING)
+		next = 0;
+
+	    set_nob(&intrnl->thr[tpd->id].data.current, next);
+	    set_mb(&erts_thr_prgr__.current, current);
+	    tpd->previous.local = next;
+	    tpd->previous.next = next;
+	    tpd->previous.current = current;
+
+#if ERTS_THR_PRGR_PRINT_VAL
+	    if (current % 1000 == 0)
+		erts_fprintf(stderr, "%b64u\n", current);
+#endif
+	    handle_wakeup_requests(current);
+	}
+
+	if (!tpd->active) {
+	    tpd->leader = 0;
+	    tpd->previous.current = ERTS_THR_PRGR_VAL_WAITING;
+#if ERTS_THR_PRGR_PRINT_LEADER
+	    erts_fprintf(stderr, "L <- %d\n", tpd->id);
+#endif
+	    ERTS_THR_PROGRESS_STATE_DEBUG_SET_LEADER(tpd->id, 0);
+
+	    lflgs = erts_atomic32_read_bor_relb(&intrnl->vola.tile.lflgs,
+						ERTS_THR_PRGR_LFLG_NO_LEADER);
+	    if (ERTS_THR_PRGR_LFLGS_ACTIVE(lflgs) == 0 && got_sched_wakeups())
+		wakeup_managed(0);
+	}
+    }
+
+    return tpd->leader;
+}
+
+static int
+update(ErtsThrPrgrData *tpd)
+{
+    ErtsThrPrgrVal val;
+
+    if (!tpd->leader) {
+	erts_aint32_t lflgs;
+	val = read_acqb(&erts_thr_prgr__.current);
+	if (tpd->previous.local == val) {
+	    val++;
+	    if (val == ERTS_THR_PRGR_VAL_WAITING)
+		val = 0;
+	    tpd->previous.local = val;
+	    set_mb(&intrnl->thr[tpd->id].data.current, val);
+	}
+
+	lflgs = erts_atomic32_read_nob(&intrnl->vola.tile.lflgs);
+	if ((lflgs & ERTS_THR_PRGR_LFLG_NO_LEADER)
+	    && (tpd->active || ERTS_THR_PRGR_LFLGS_ACTIVE(lflgs) == 0)) {
+	    /* Try to take over leadership... */
+	    erts_aint32_t olflgs;
+	    olflgs = erts_atomic32_read_band_acqb(
+		&intrnl->vola.tile.lflgs,
+		~ERTS_THR_PRGR_LFLG_NO_LEADER);
+	    if (olflgs & ERTS_THR_PRGR_LFLG_NO_LEADER) {
+		tpd->leader = 1;
+#if ERTS_THR_PRGR_PRINT_LEADER
+		erts_fprintf(stderr, "L -> %d\n", tpd->id);
+#endif
+		ERTS_THR_PROGRESS_STATE_DEBUG_SET_LEADER(tpd->id, 1);
+	    }
+	}
+    }
+    return tpd->leader;
+}
+
+int
+erts_thr_progress_update(ErtsSchedulerData *esdp)
+{
+    return update(thr_prgr_data(esdp));
+}
+
+
+int
+erts_thr_progress_leader_update(ErtsSchedulerData *esdp)
+{
+    return leader_update(thr_prgr_data(esdp));
+}
+
+void
+erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp)
+{
+    erts_aint32_t lflgs;
+    ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+
+    tpd->previous.local = ERTS_THR_PRGR_VAL_WAITING;
+    set_mb(&intrnl->thr[tpd->id].data.current, ERTS_THR_PRGR_VAL_WAITING);
+
+    lflgs = erts_atomic32_read_nob(&intrnl->vola.tile.lflgs);
+    if (ERTS_THR_PRGR_LFLGS_ALL_WAITING(lflgs) && got_sched_wakeups())
+	wakeup_managed(0); /* Someone need to make progress */
+}
+
+void
+erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp)
+{
+    ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+    ErtsThrPrgrVal current, val;
+
+    /*
+     * We aren't allowed to continue until our thread
+     * progress is past global current.
+     */
+    val = current = read_acqb(&erts_thr_prgr__.current);
+    while (1) {
+	val++;
+	if (val == ERTS_THR_PRGR_VAL_WAITING)
+	    val = 0;
+	tpd->previous.local = val;
+	set_mb(&intrnl->thr[tpd->id].data.current, val);
+	val = read_acqb(&erts_thr_prgr__.current);
+	if (current == val)
+	    break;
+	current = val;
+    }
+    if (update(tpd))
+	leader_update(tpd);
+}
+
+void
+erts_thr_progress_active(ErtsSchedulerData *esdp, int on)
+{
+    ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+
+    ERTS_THR_PROGRESS_STATE_DEBUG_SET_ACTIVE(tpd->id, on);
+
+    if (on) {
+	ASSERT(!tpd->active);
+	tpd->active = 1;
+	erts_atomic32_inc_nob(&intrnl->vola.tile.lflgs);
+    }
+    else {
+	ASSERT(tpd->active);
+	tpd->active = 0;
+	erts_atomic32_dec_nob(&intrnl->vola.tile.lflgs);
+
+	if (update(tpd))
+	    leader_update(tpd);
+    }
+
+#ifdef DEBUG
+    {
+	erts_aint32_t n = erts_atomic32_read_nob(&intrnl->vola.tile.lflgs);
+	n &= ERTS_THR_PRGR_LFLG_ACTIVE_MASK;
+	ASSERT(tpd->active <= n && n <= intrnl->managed.no);
+    }
+#endif
+
+}
+
+static ERTS_INLINE int
+has_reached_wakeup(ErtsThrPrgrVal wakeup)
+{
+    /*
+     * Exactly the same as erts_thr_progress_has_reached(), but
+     * also verify valid wakeup requests in debug mode.
+     */
+    ErtsThrPrgrVal current;
+
+    current = read_acqb(&erts_thr_prgr__.current);
+
+#if ERTS_THR_PRGR_DBG_CHK_WAKEUP_REQUEST_VALUE
+    {
+	ErtsThrPrgrVal limit;
+	/*
+	 * erts_thr_progress_later() returns values which are
+	 * equal to 'current + 2'. That is, users should never
+	 * get a hold of values larger than that.
+	 *
+	 * That is, valid values are values less than 'current + 3'.
+	 *
+	 * Values larger than this won't work with the wakeup
+	 * algorithm.
+	 */
+
+	limit = current + 3;
+	if (limit == ERTS_THR_PRGR_VAL_WAITING)
+	    limit = 0;
+	else if (limit < current) /* Wrapped */
+	    limit + 1;
+
+	if (!erts_thr_progress_has_passed__(limit, wakeup))
+	    erl_exit(ERTS_ABORT_EXIT,
+		     "Invalid wakeup request value found:"
+		     " current=%b64u, wakeup=%b64u, limit=%b64u",
+		     current, wakeup, limit);
+    }
+#endif
+
+    if (current == wakeup)
+	return 1;
+    return erts_thr_progress_has_passed__(current, wakeup);
+}
+
+static void
+request_wakeup_managed(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value)
+{
+    ErtsThrPrgrManagedWakeupData *mwd;
+    int ix, wix;
+
+    /*
+     * Only managed threads that aren't in waiting state
+     * are allowed to call this function.
+     */
+
+    ASSERT(tpd->is_managed);
+    ASSERT(tpd->previous.local != ERTS_THR_PRGR_VAL_WAITING);
+
+    if (has_reached_wakeup(value))
+	wakeup_managed(tpd->id);
+
+    wix = ERTS_THR_PRGR_WAKEUP_IX(value);
+    if (tpd->wakeup_request[wix] == value)
+	return; /* Already got a request registered */
+
+    ASSERT(erts_thr_progress_has_passed__(value,
+					  tpd->wakeup_request[wix]));
+
+
+    if (tpd->previous.local == value) {
+	/*
+	 * We have already confirmed this value. We need to request
+	 * wakeup for a value later than our latest confirmed value in
+	 * order to prevent progress from reaching the requested value
+	 * while we are writing the request.
+	 *
+	 * It is ok to move the wakeup request forward since the only
+	 * guarantee we make (and can make) is that the thread will be
+	 * woken some time *after* the requested value has been reached.
+	 */
+	value++;
+	if (value == ERTS_THR_PRGR_VAL_WAITING)
+	    value = 0;
+
+	wix = ERTS_THR_PRGR_WAKEUP_IX(value);
+	if (tpd->wakeup_request[wix] == value)
+	    return; /* Already got a request registered */
+
+	ASSERT(erts_thr_progress_has_passed__(value,
+					      tpd->wakeup_request[wix]));
+    }
+
+    tpd->wakeup_request[wix] = value;
+
+    mwd = intrnl->managed.data[wix];
+
+    ix = erts_atomic32_inc_read_nob(&mwd->len) - 1;
+    mwd->id[ix] = tpd->id;
+
+    ASSERT(!erts_thr_progress_has_reached(value));
+
+    /*
+     * This thread is guarranteed to issue a full memory barrier:
+     * - after the request has been written, but
+     * - before the global thread progress reach the (possibly
+     *   increased) requested wakeup value.
+     */
+}
+
+static void
+request_wakeup_unmanaged(ErtsThrPrgrData *tpd, ErtsThrPrgrVal value)
+{
+    int wix, ix, id, bit;
+    ErtsThrPrgrUnmanagedWakeupData *umwd;
+
+    ASSERT(!tpd->is_managed);
+
+    /*
+     * Thread progress *can* reach and pass our requested value while
+     * we are writing the request.
+     */
+
+    if (has_reached_wakeup(value))
+	wakeup_unmanaged(tpd->id);
+
+    wix = ERTS_THR_PRGR_WAKEUP_IX(value);
+
+    if (tpd->wakeup_request[wix] == value)
+	return; /* Already got a request registered */
+
+    ASSERT(erts_thr_progress_has_passed__(value,
+					  tpd->wakeup_request[wix]));
+
+    umwd = intrnl->unmanaged.data[wix];
+
+    id = tpd->id;
+
+    bit = id & ERTS_THR_PRGR_BM_MASK;
+    ix = id >> ERTS_THR_PRGR_BM_SHIFT;
+    ASSERT(0 <= ix && ix < umwd->low_sz);
+    erts_atomic32_read_bor_nob(&umwd->low[ix], 1 << bit);
+
+    bit = ix & ERTS_THR_PRGR_BM_MASK;
+    ix >>= ERTS_THR_PRGR_BM_SHIFT;
+    ASSERT(0 <= ix && ix < umwd->high_sz);
+    erts_atomic32_read_bor_nob(&umwd->high[ix], 1 << bit);
+
+    erts_atomic32_inc_mb(&umwd->len);
+
+    if (erts_thr_progress_has_reached(value))
+	wakeup_unmanaged(tpd->id);
+    else
+	tpd->wakeup_request[wix] = value;
+}
+
+void
+erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+			 ErtsThrPrgrVal value)
+{
+    ErtsThrPrgrData *tpd = thr_prgr_data(esdp);
+    ASSERT(tpd);
+    if (tpd->is_managed)
+	request_wakeup_managed(tpd, value);
+    else
+	request_wakeup_unmanaged(tpd, value);
+}
+
+static void
+wakeup_unmanaged_threads(ErtsThrPrgrUnmanagedWakeupData *umwd)
+{
+    int hix;
+    for (hix = 0; hix < umwd->high_sz; hix++) {
+	erts_aint32_t hmask = erts_atomic32_read_nob(&umwd->high[hix]);
+	if (hmask) {
+	    int hbase = hix << ERTS_THR_PRGR_BM_SHIFT;
+	    int hbit;
+	    for (hbit = 0; hbit < ERTS_THR_PRGR_BM_BITS; hbit++) {
+		if (hmask & (1 << hbit)) {
+		    erts_aint_t lmask;
+		    int lix = hbase + hbit;
+		    ASSERT(0 <= lix && lix < umwd->low_sz);
+		    lmask = erts_atomic32_read_nob(&umwd->low[lix]);
+		    if (lmask) {
+			int lbase = lix << ERTS_THR_PRGR_BM_SHIFT;
+			int lbit;
+			for (lbit = 0; lbit < ERTS_THR_PRGR_BM_BITS; lbit++) {
+			    if (lmask & (1 << lbit)) {
+				int id = lbase + lbit;
+				wakeup_unmanaged(id);
+			    }
+			}
+			erts_atomic32_set_nob(&umwd->low[lix], 0);
+		    }
+		}
+	    }
+	    erts_atomic32_set_nob(&umwd->high[hix], 0);
+	}
+    }
+}
+
+
+static void
+handle_wakeup_requests(ErtsThrPrgrVal current)
+{
+    ErtsThrPrgrManagedWakeupData *mwd;
+    ErtsThrPrgrUnmanagedWakeupData *umwd;
+    int wix, len, i;
+
+    wix = ERTS_THR_PRGR_WAKEUP_IX(current);
+
+    mwd = intrnl->managed.data[wix];
+    len = erts_atomic32_read_nob(&mwd->len);
+    ASSERT(len >= 0);
+    if (len) {
+	for (i = 0; i < len; i++)
+	    wakeup_managed(mwd->id[i]);
+	erts_atomic32_set_nob(&mwd->len, 0);
+    }
+
+    umwd = intrnl->unmanaged.data[wix];
+    len = erts_atomic32_read_nob(&umwd->len);
+    ASSERT(len >= 0);
+    if (len) {
+	wakeup_unmanaged_threads(umwd);
+	erts_atomic32_set_nob(&umwd->len, 0);
+    }
+
+}
+
+static int
+got_sched_wakeups(void)
+{
+    int wix;
+
+    ERTS_THR_MEMORY_BARRIER;
+
+    for (wix = 0; wix < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; wix++) {
+ 	ErtsThrPrgrManagedWakeupData **mwd = intrnl->managed.data;
+	if (erts_atomic32_read_nob(&mwd[wix]->len))
+	    return 1;
+    }
+    for (wix = 0; wix < ERTS_THR_PRGR_WAKEUP_DATA_SIZE; wix++) {
+ 	ErtsThrPrgrUnmanagedWakeupData **umwd = intrnl->unmanaged.data;
+	if (erts_atomic32_read_nob(&umwd[wix]->len))
+	    return 1;
+    }
+    return 0;
+}
+
+
+void erts_thr_progress_dbg_print_state(void)
+{
+    int id;
+    int sz = intrnl->managed.no;
+
+    erts_fprintf(stderr, "--- thread progress ---\n");
+    erts_fprintf(stderr,"current=%b64u\n", erts_thr_progress_current());
+    for (id = 0; id < sz; id++) {
+	ErtsThrPrgrVal current = read_nob(&intrnl->thr[id].data.current);
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+	erts_aint32_t state_debug;
+	char *active, *leader;
+
+	state_debug = erts_atomic32_read_nob(&intrnl->thr[id].data.state_debug);
+	active = (state_debug & ERTS_THR_PROGRESS_STATE_DEBUG_ACTIVE
+		  ? "true"
+		  : "false");
+	leader = (state_debug & ERTS_THR_PROGRESS_STATE_DEBUG_LEADER
+		  ? "true"
+		  : "false");
+#endif
+	if (current == ERTS_THR_PRGR_VAL_WAITING)
+	    erts_fprintf(stderr,
+			 "  id=%d, current=WAITING"
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+			 ", active=%s, leader=%s"
+#endif
+			 "\n", id
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+			 , active, leader
+#endif
+		);
+	else
+	    erts_fprintf(stderr,
+			 "  id=%d, current=%b64u"
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+			 ", active=%s, leader=%s"
+#endif
+			 "\n", id, current
+#ifdef ERTS_THR_PROGRESS_STATE_DEBUG
+			 , active, leader
+#endif
+		);
+    }
+    erts_fprintf(stderr, "-----------------------\n");
+    
+
+}
+
+#endif
diff --git a/erts/emulator/beam/erl_thr_progress.h b/erts/emulator/beam/erl_thr_progress.h
new file mode 100644
index 0000000000..36880c10f0
--- /dev/null
+++ b/erts/emulator/beam/erl_thr_progress.h
@@ -0,0 +1,210 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Thread progress information. Used by lock free algorithms
+ *              to determine when all involved threads are guaranteed to
+ *              have passed a specific point of execution.
+ *
+ *              Usage instructions can be found in ert_thr_progress.c
+ *
+ * Author: 	Rickard Green
+ */
+
+#if !defined(ERL_THR_PROGRESS_H__TSD_TYPE__)
+#define ERL_THR_PROGRESS_H__TSD_TYPE__
+
+#include "sys.h"
+
+#ifdef ERTS_SMP
+
+typedef Uint64 ErtsThrPrgrVal;
+
+#define ERTS_THR_PRGR_WAKEUP_DATA_SIZE 4 /* Need to be an even power of 2. */
+
+typedef struct {
+    int id;
+    int is_managed;
+    ErtsThrPrgrVal wakeup_request[ERTS_THR_PRGR_WAKEUP_DATA_SIZE];
+
+    /* --- Part below only for managed threads */
+
+    int leader; /* Needs to be first in the managed threads part */
+    int active;
+    struct {
+	ErtsThrPrgrVal local;
+	ErtsThrPrgrVal next;
+	ErtsThrPrgrVal current;
+    } previous;
+} ErtsThrPrgrData;
+#endif /* ERTS_SMP */
+
+#endif
+
+#if !defined(ERL_THR_PROGRESS_H__) && !defined(ERL_THR_PROGRESS_TSD_TYPE_ONLY)
+#define ERL_THR_PROGRESS_H__
+
+#include "erl_threads.h"
+#include "erl_process.h"
+
+#ifdef ERTS_SMP
+
+#define ERTS_THR_PRGR_VAL_WAITING (~((ErtsThrPrgrVal) 0))
+
+extern erts_tsd_key_t erts_thr_prgr_data_key__;
+
+#ifdef ARCH_64
+#  define ERTS_THR_PRGR_ATOMIC erts_atomic_t
+#else /* ARCH_32 */
+#  define ERTS_THR_PRGR_ATOMIC erts_dw_atomic_t
+#endif
+
+typedef struct {
+    void *arg;
+    void (*wakeup)(void *arg);
+} ErtsThrPrgrWakeupCallback;
+
+typedef struct {
+    ERTS_THR_PRGR_ATOMIC current;
+} ErtsThrPrgr;
+
+extern ErtsThrPrgr erts_thr_prgr__;
+
+void erts_thr_progress_pre_init(void);
+void erts_thr_progress_init(int no_schedulers, int managed, int unmanaged);
+void erts_thr_progress_register_managed_thread(ErtsSchedulerData *esdp,
+					       ErtsThrPrgrWakeupCallback *,
+					       int);
+void erts_thr_progress_register_unmanaged_thread(ErtsThrPrgrWakeupCallback *);
+void erts_thr_progress_active(ErtsSchedulerData *esdp, int on);
+void erts_thr_progress_wakeup(ErtsSchedulerData *esdp,
+			      ErtsThrPrgrVal value);
+int erts_thr_progress_update(ErtsSchedulerData *esdp);
+int erts_thr_progress_leader_update(ErtsSchedulerData *esdp);
+void erts_thr_progress_prepare_wait(ErtsSchedulerData *esdp);
+void erts_thr_progress_finalize_wait(ErtsSchedulerData *esdp);
+
+void erts_thr_progress_dbg_print_state(void);
+
+#ifdef ARCH_32
+#define ERTS_THR_PRGR_ATOMIC erts_dw_atomic_t
+ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_dw_sint_to_val__(ethr_dw_sint_t *dw_sint);
+#endif
+ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc);
+
+ERTS_GLB_INLINE int erts_thr_progress_is_managed_thread(void);
+ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_progress_later(void);
+ERTS_GLB_INLINE ErtsThrPrgrVal erts_thr_progress_current(void);
+ERTS_GLB_INLINE int erts_thr_progress_has_passed__(ErtsThrPrgrVal val1, ErtsThrPrgrVal val2);
+ERTS_GLB_INLINE int erts_thr_progress_has_reached(ErtsThrPrgrVal val);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef ARCH_64
+
+ERTS_GLB_INLINE ErtsThrPrgrVal
+erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc)
+{
+    return (ErtsThrPrgrVal) erts_atomic_read_acqb(atmc);
+}
+
+#else /* ARCH_32 */
+
+ERTS_GLB_INLINE ErtsThrPrgrVal
+erts_thr_prgr_dw_sint_to_val__(ethr_dw_sint_t *dw_sint)
+{
+#ifdef ETHR_SU_DW_NAINT_T__
+    return (ErtsThrPrgrVal) dw_sint->dw_sint;
+#else
+    ErtsThrPrgrVal res;
+    res = (ErtsThrPrgrVal) ((Uint32) dw_sint->sint[ETHR_DW_SINT_HIGH_WORD]);
+    res <<= 32;
+    res |= (ErtsThrPrgrVal) ((Uint32) dw_sint->sint[ETHR_DW_SINT_LOW_WORD]);
+    return res;
+#endif
+}
+
+ERTS_GLB_INLINE ErtsThrPrgrVal
+erts_thr_prgr_read_acqb__(ERTS_THR_PRGR_ATOMIC *atmc)
+{
+    ethr_dw_sint_t dw_sint;
+    erts_dw_atomic_read_acqb(atmc, &dw_sint);
+    return erts_thr_prgr_dw_sint_to_val__(&dw_sint);
+}
+
+#endif
+
+ERTS_GLB_INLINE int
+erts_thr_progress_is_managed_thread(void)
+{
+    ErtsThrPrgrData *tpd = erts_tsd_get(erts_thr_prgr_data_key__);
+    return tpd && tpd->is_managed;
+}
+
+ERTS_GLB_INLINE ErtsThrPrgrVal
+erts_thr_progress_later(void)
+{
+    ErtsThrPrgrVal val = erts_thr_prgr_read_acqb__(&erts_thr_prgr__.current);
+    if (val == (ERTS_THR_PRGR_VAL_WAITING-((ErtsThrPrgrVal)2)))
+	return ((ErtsThrPrgrVal) 0);
+    else if (val == (ERTS_THR_PRGR_VAL_WAITING-((ErtsThrPrgrVal)1)))
+	return ((ErtsThrPrgrVal) 1);
+    else
+	return val + ((ErtsThrPrgrVal) 2);
+}
+
+ERTS_GLB_INLINE ErtsThrPrgrVal
+erts_thr_progress_current(void)
+{
+    return erts_thr_prgr_read_acqb__(&erts_thr_prgr__.current);
+}
+
+ERTS_GLB_INLINE int
+erts_thr_progress_has_passed__(ErtsThrPrgrVal val1, ErtsThrPrgrVal val0)
+{
+    if ((((((ErtsThrPrgrVal) 1) << 63) & val1)
+	 ^ ((((ErtsThrPrgrVal) 1) << 63) & val0)) != 0) {
+	/* May have wrapped... */
+	if (val1 < (((ErtsThrPrgrVal) 1) << 62)
+	    && val0 > (((ErtsThrPrgrVal) 3) << 62)) {
+	    /*
+	     * 'val1' has wrapped but 'val0' has not yet wrapped. While in
+	     * these ranges 'current' is considered later than 'val0'.
+	     */
+	    return 1;
+	}
+    }
+    return val1 > val0;
+}
+
+ERTS_GLB_INLINE int
+erts_thr_progress_has_reached(ErtsThrPrgrVal val)
+{
+    ErtsThrPrgrVal current;
+    current = erts_thr_prgr_read_acqb__(&erts_thr_prgr__.current);
+    if (current == val)
+	return 1;
+    return erts_thr_progress_has_passed__(current, val);
+}
+
+#endif
+
+#endif /* ERTS_SMP */
+
+#endif
diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h
index 12eaf39ec7..b4b6d0dfd5 100644
--- a/erts/emulator/beam/erl_threads.h
+++ b/erts/emulator/beam/erl_threads.h
@@ -193,6 +193,8 @@ typedef struct { int gcc_is_buggy; } erts_rwlock_t;
 
 #endif /* #ifdef USE_THREADS */
 
+#define ERTS_AINT_NULL ((erts_aint_t) NULL)
+
 #define ERTS_AINT_T_MAX (~(((erts_aint_t) 1) << (sizeof(erts_aint_t)*8-1)))
 #define ERTS_AINT_T_MIN ((((erts_aint_t) 1) << (sizeof(erts_aint_t)*8-1)))
 #define ERTS_AINT32_T_MAX (~(((erts_aint32_t) 1) << (sizeof(erts_aint32_t)*8-1)))
diff --git a/erts/emulator/beam/fix_alloc.c b/erts/emulator/beam/fix_alloc.c
deleted file mode 100644
index 5637281597..0000000000
--- a/erts/emulator/beam/fix_alloc.c
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * %CopyrightBegin%
- * 
- * Copyright Ericsson AB 1996-2009. All Rights Reserved.
- * 
- * The contents of this file are subject to the Erlang Public License,
- * Version 1.1, (the "License"); you may not use this file except in
- * compliance with the License. You should have received a copy of the
- * Erlang Public License along with this software. If not, it can be
- * retrieved online at http://www.erlang.org/.
- * 
- * Software distributed under the License is distributed on an "AS IS"
- * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
- * the License for the specific language governing rights and limitations
- * under the License.
- * 
- * %CopyrightEnd%
- */
-/* General purpose Memory allocator for fixed block size objects         */
-/* This allocater is at least an order of magnitude faster than malloc() */
-
-
-#define NOPERBLOCK 20
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
-
-#include "sys.h"
-#include "erl_vm.h"
-#include "global.h"
-#include "erl_db.h"
-
-#ifdef ERTS_ALC_N_MIN_A_FIXED_SIZE
-
-#if ERTS_ALC_MTA_FIXED_SIZE
-#include "erl_threads.h"
-#include "erl_smp.h"
-#  ifdef ERTS_SMP
-#    define FA_LOCK(FA) erts_smp_spin_lock(&(FA)->slck)
-#    define FA_UNLOCK(FA) erts_smp_spin_unlock(&(FA)->slck)
-#  else
-#    define FA_LOCK(FA) erts_mtx_lock(&(FA)->mtx)
-#    define FA_UNLOCK(FA) erts_mtx_unlock(&(FA)->mtx)
-#  endif
-#else
-#  define FA_LOCK(FA)
-#  define FA_UNLOCK(FA)
-#endif
-
-typedef union {double d; long l;} align_t;
-
-typedef struct fix_alloc_block {
-    struct fix_alloc_block *next;
-    align_t mem[1];
-} FixAllocBlock;
-
-typedef struct fix_alloc {
-    Uint item_size;
-    void *freelist;
-    Uint no_free;
-    Uint no_blocks;
-    FixAllocBlock *blocks;
-#if ERTS_ALC_MTA_FIXED_SIZE
-#  ifdef ERTS_SMP
-    erts_smp_spinlock_t slck;
-#  else
-    erts_mtx_t mtx;
-#  endif
-#endif
-} FixAlloc;
-
-static void *(*core_alloc)(Uint); 
-static Uint xblk_sz;
-
-static FixAlloc **fa;
-#define FA_SZ (1 + ERTS_ALC_N_MAX_A_FIXED_SIZE - ERTS_ALC_N_MIN_A_FIXED_SIZE)
-
-#define FIX_IX(N) ((N) - ERTS_ALC_N_MIN_A_FIXED_SIZE)
-
-#define FIX_POOL_SZ(I_SZ) \
-  ((I_SZ)*NOPERBLOCK + sizeof(FixAllocBlock) - sizeof(align_t))
-
-#if defined(DEBUG) && !ERTS_ALC_MTA_FIXED_SIZE
-static int first_time;
-#endif
-
-void erts_init_fix_alloc(Uint extra_block_size,
-			 void *(*alloc)(Uint))
-{
-    int i;
-
-    xblk_sz = extra_block_size;
-    core_alloc = alloc;
-
-    fa = (FixAlloc **) (*core_alloc)(FA_SZ * sizeof(FixAlloc *));
-    if (!fa)
-	erts_alloc_enomem(ERTS_ALC_T_UNDEF, FA_SZ * sizeof(FixAlloc *));
-
-    for (i = 0; i < FA_SZ; i++)
-	fa[i] = NULL;
-#if defined(DEBUG) && !ERTS_ALC_MTA_FIXED_SIZE
-    first_time = 1;
-#endif
-}
-
-Uint
-erts_get_fix_size(ErtsAlcType_t type)
-{
-    Uint i = FIX_IX(ERTS_ALC_T2N(type));
-    return i < FA_SZ && fa[i] ? fa[i]->item_size : 0;
-}
-
-void
-erts_set_fix_size(ErtsAlcType_t type, Uint size)
-{
-    Uint sz;
-    Uint i;
-    FixAlloc *fs;
-    ErtsAlcType_t t_no = ERTS_ALC_T2N(type);
-    sz = xblk_sz + size;
-
-#ifdef DEBUG
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= t_no);
-    ASSERT(t_no <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-#endif
-
-    while (sz % sizeof(align_t) != 0)     /* Alignment */
-	sz++;
-
-    i = FIX_IX(t_no);
-    fs = (FixAlloc *) (*core_alloc)(sizeof(FixAlloc));
-    if (!fs)
-	erts_alloc_n_enomem(t_no, sizeof(FixAlloc));
-    
-    fs->item_size = sz;
-    fs->no_blocks = 0;
-    fs->no_free = 0;
-    fs->blocks = NULL;
-    fs->freelist = NULL;
-    if (fa[i])
-	erl_exit(-1, "Attempt to overwrite existing fix size (%d)", i);
-    fa[i] = fs;
-
-#if ERTS_ALC_MTA_FIXED_SIZE
-#ifdef ERTS_SMP
-    erts_smp_spinlock_init_x(&fs->slck, "fix_alloc", make_small(i));
-#else
-    erts_mtx_init_x(&fs->mtx, "fix_alloc", make_small(i));
-#endif
-#endif
-
-}
-
-void
-erts_fix_info(ErtsAlcType_t type, ErtsFixInfo *efip)
-{
-    Uint i;
-    FixAlloc *f;
-#ifdef DEBUG
-    FixAllocBlock *b;
-    void *fp;
-#endif
-    Uint real_item_size;
-    ErtsAlcType_t t_no = ERTS_ALC_T2N(type);
-
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= t_no);
-    ASSERT(t_no <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-
-    i = FIX_IX(t_no);
-    f = fa[i];
-
-    efip->total	= sizeof(FixAlloc *);
-    efip->used	= 0;
-    if (!f)
-	return;
-
-    real_item_size = f->item_size - xblk_sz;
-
-    FA_LOCK(f);
-
-    efip->total += sizeof(FixAlloc);
-    efip->total += f->no_blocks*FIX_POOL_SZ(real_item_size);
-    efip->used = efip->total - f->no_free*real_item_size;
-
-#ifdef DEBUG
-    ASSERT(efip->total >= efip->used);
-    for(i = 0, b = f->blocks; b; i++, b = b->next);
-    ASSERT(f->no_blocks == i);
-    for (i = 0, fp = f->freelist; fp; i++, fp = *((void **) fp));
-    ASSERT(f->no_free == i);
-#endif
-
-    FA_UNLOCK(f);
-
-}
-
-void
-erts_fix_free(ErtsAlcType_t t_no, void *extra, void* ptr)
-{
-    Uint i;
-    FixAlloc *f;
-
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= t_no);
-    ASSERT(t_no <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-
-    i = FIX_IX(t_no);
-    f = fa[i];
-
-    FA_LOCK(f);
-    *((void **) ptr) = f->freelist;
-    f->freelist = ptr;
-    f->no_free++;
-    FA_UNLOCK(f);
-}
-
-
-void *erts_fix_realloc(ErtsAlcType_t t_no, void *extra, void* ptr, Uint size)
-{
-    erts_alc_fatal_error(ERTS_ALC_E_NOTSUP, ERTS_ALC_O_REALLOC, t_no);
-    return NULL;
-}
-
-void *erts_fix_alloc(ErtsAlcType_t t_no, void *extra, Uint size)
-{
-    void *ret;
-    int i;
-    FixAlloc *f;
-
-#if defined(DEBUG) && !ERTS_ALC_MTA_FIXED_SIZE
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= t_no);
-    ASSERT(t_no <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-    if (first_time) { /* Check that all sizes have been initialized */
-	int i;
-	for (i = 0; i < FA_SZ; i++)
-	    ASSERT(fa[i]);
-	first_time = 0;
-    }
-#endif
-
-
-    i = FIX_IX(t_no);
-    f = fa[i];
-
-    ASSERT(f);
-    ASSERT(f->item_size >= size);
-
-    FA_LOCK(f);
-    if (f->freelist == NULL) {  /* Gotta alloc some more mem */
-	char *ptr;
-	FixAllocBlock *bl;
-	Uint n;
-
-
-	FA_UNLOCK(f);
-	bl = (*core_alloc)(FIX_POOL_SZ(f->item_size));
-	if (!bl)
-	    return NULL;
-
-	FA_LOCK(f);
-	bl->next = f->blocks;  /* link in first */
-	f->blocks = bl;
-
-	n = NOPERBLOCK;
-	ptr = (char *) &f->blocks->mem[0];
-	while(n--) {
-	    *((void **) ptr) = f->freelist;
-	    f->freelist = (void *) ptr;
-	    ptr += f->item_size;
-	}
-#if !ERTS_ALC_MTA_FIXED_SIZE 
-	ASSERT(f->no_free == 0);
-#endif
-	f->no_free += NOPERBLOCK;
-	f->no_blocks++;
-    }
-
-    ret = f->freelist;
-    f->freelist = *((void **) f->freelist);
-    ASSERT(f->no_free > 0);
-    f->no_free--;
-
-    FA_UNLOCK(f);
-
-    return ret;
-}
-
-#endif /* #ifdef ERTS_ALC_N_MIN_A_FIXED_SIZE */
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 249df54015..6687e02485 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -546,7 +546,7 @@ ERTS_GLB_INLINE void erts_may_save_closed_port(Port *prt)
 	tombstone = (Eterm*) erts_smp_atomic_add_read_nob(&erts_dead_ports_ptr,
 							  -(erts_aint_t)sizeof(Eterm));
 	ASSERT(tombstone+1 != NULL);
-	ASSERT(prt->snapshot == erts_smp_atomic_read_nob(&erts_ports_snapshot) - 1);
+	ASSERT(prt->snapshot == erts_smp_atomic32_read_nob(&erts_ports_snapshot) - 1);
 	*tombstone = prt->id;
     }
     /*else no ongoing snapshot or port was already included or created after snapshot */
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 669a601b35..9f5747205d 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -616,13 +616,10 @@ extern char *erts_sys_ddll_error(int code);
  * System interfaces for startup.
  */
 
-
-#ifdef ERTS_SMP
 void erts_sys_schedule_interrupt(int set);
+#ifdef ERTS_SMP
 void erts_sys_schedule_interrupt_timed(int set, long msec);
 void erts_sys_main_thread(void);
-#else
-#define erts_sys_schedule_interrupt(Set)
 #endif
 
 extern void erts_sys_prepare_crash_dump(void);
@@ -697,10 +694,10 @@ int erts_write_env(char *key, char *value);
 int sys_alloc_opt(int, int);
 
 typedef struct {
-  Sint trim_threshold;
-  Sint top_pad;
-  Sint mmap_threshold;
-  Sint mmap_max;
+  int trim_threshold;
+  int top_pad;
+  int mmap_threshold;
+  int mmap_max;
 } SysAllocStat;
 
 void sys_alloc_stat(SysAllocStat *);
diff --git a/erts/emulator/beam/time.c b/erts/emulator/beam/time.c
index 8fa8c1cfe0..db9a24e0a3 100644
--- a/erts/emulator/beam/time.c
+++ b/erts/emulator/beam/time.c
@@ -444,7 +444,7 @@ erts_time_left(ErlTimer *p)
 }
 
 #ifdef DEBUG
-void erts_p_slpq()
+void erts_p_slpq(void)
 {
     int i;
     ErlTimer* p;
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 3f6accba2d..3fa84bd13c 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -42,6 +42,8 @@
 #include "erl_threads.h"
 #include "erl_smp.h"
 #include "erl_time.h"
+#include "erl_thr_progress.h"
+#include "erl_sched_spec_pre_alloc.h"
 
 #undef M_TRIM_THRESHOLD
 #undef M_TOP_PAD
@@ -3250,10 +3252,10 @@ erts_cancel_smp_ptimer(ErtsSmpPTimer *ptimer)
 
 #endif
 
-static Sint trim_threshold;
-static Sint top_pad;
-static Sint mmap_threshold;
-static Sint mmap_max;
+static int trim_threshold;
+static int top_pad;
+static int mmap_threshold;
+static int mmap_max;
 
 Uint tot_bin_allocated;
 
@@ -3276,8 +3278,8 @@ int
 sys_alloc_opt(int opt, int value)
 {
 #if HAVE_MALLOPT
-  Sint m_opt;
-  Sint *curr_val;
+  int m_opt;
+  int *curr_val;
 
   switch(opt) {
   case SYS_ALLOC_OPT_TRIM_THRESHOLD:
@@ -3317,7 +3319,7 @@ sys_alloc_opt(int opt, int value)
   }
 
   if(mallopt(m_opt, value)) {
-    *curr_val = (Sint) value;
+    *curr_val = value;
     return 1;
   }
 
diff --git a/erts/emulator/hipe/hipe_bif_list.m4 b/erts/emulator/hipe/hipe_bif_list.m4
index 083788997b..9780437847 100644
--- a/erts/emulator/hipe/hipe_bif_list.m4
+++ b/erts/emulator/hipe/hipe_bif_list.m4
@@ -150,7 +150,6 @@
 /*
  * Zero-arity BIFs that can fail.
  */
-fail_bif_interface_0(nbif_memory_0, memory_0)
 fail_bif_interface_0(nbif_processes_0, processes_0)
 
 /*
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index 57321259f9..6d4ad459cc 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -66,6 +66,9 @@ typedef char EventStateFlags;
 
 #define ERTS_CIO_POLL_CTL	ERTS_POLL_EXPORT(erts_poll_control)
 #define ERTS_CIO_POLL_WAIT	ERTS_POLL_EXPORT(erts_poll_wait)
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+#define ERTS_CIO_POLL_AS_INTR 	ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)
+#endif
 #define ERTS_CIO_POLL_INTR 	ERTS_POLL_EXPORT(erts_poll_interrupt)
 #define ERTS_CIO_POLL_INTR_TMD	ERTS_POLL_EXPORT(erts_poll_interrupt_timed)
 #define ERTS_CIO_NEW_POLLSET 	ERTS_POLL_EXPORT(erts_poll_create_pollset)
@@ -1115,6 +1118,14 @@ eready(Eterm id, ErtsDrvEventState *state, ErlDrvEventData event_data)
 
 static void bad_fd_in_pollset( ErtsDrvEventState *, Eterm, Eterm, ErtsPollEvents);
 
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+void
+ERTS_CIO_EXPORT(erts_check_io_async_sig_interrupt)(void)
+{
+    ERTS_CIO_POLL_AS_INTR(pollset.ps);
+}
+#endif
+
 void
 ERTS_CIO_EXPORT(erts_check_io_interrupt)(int set)
 {
diff --git a/erts/emulator/sys/common/erl_check_io.h b/erts/emulator/sys/common/erl_check_io.h
index 9b45a63913..7cc1658062 100644
--- a/erts/emulator/sys/common/erl_check_io.h
+++ b/erts/emulator/sys/common/erl_check_io.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  * 
- * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2011. All Rights Reserved.
  * 
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -40,6 +40,10 @@ Eterm erts_check_io_info_kp(void *);
 Eterm erts_check_io_info_nkp(void *);
 int erts_check_io_max_files_kp(void);
 int erts_check_io_max_files_nkp(void);
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+void erts_check_io_async_sig_interrupt_kp(void);
+void erts_check_io_async_sig_interrupt_nkp(void);
+#endif
 void erts_check_io_interrupt_kp(int);
 void erts_check_io_interrupt_nkp(int);
 void erts_check_io_interrupt_timed_kp(int, long);
@@ -56,6 +60,9 @@ int erts_check_io_debug_nkp(void);
 Uint erts_check_io_size(void);
 Eterm erts_check_io_info(void *);
 int erts_check_io_max_files(void);
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+void erts_check_io_async_sig_interrupt(void);
+#endif
 void erts_check_io_interrupt(int);
 void erts_check_io_interrupt_timed(int, long);
 void erts_check_io(int);
diff --git a/erts/emulator/sys/common/erl_mseg.c b/erts/emulator/sys/common/erl_mseg.c
index eaef6680dd..3a90db607b 100644
--- a/erts/emulator/sys/common/erl_mseg.c
+++ b/erts/emulator/sys/common/erl_mseg.c
@@ -36,14 +36,11 @@
 #include "erl_threads.h"
 #include "erl_mtrace.h"
 #include "erl_time.h"
+#include "erl_alloc.h"
 #include "big.h"
 
 #if HAVE_ERTS_MSEG
 
-#if defined(USE_THREADS) && !defined(ERTS_SMP)
-#  define ERTS_THREADS_NO_SMP
-#endif
-
 #define SEGTYPE ERTS_MTRACE_SEGMENT_ID
 
 #ifndef HAVE_GETPAGESIZE
@@ -75,16 +72,9 @@
 
 static int atoms_initialized;
 
-static Uint cache_check_interval;
-
 typedef struct mem_kind_t MemKind;
 
-static void check_cache(void *unused);
 static void mseg_clear_cache(MemKind*);
-static int is_cache_check_scheduled;
-#ifdef ERTS_THREADS_NO_SMP
-static int is_cache_check_requested;
-#endif
 
 #if HALFWORD_HEAP
 static int initialize_pmmap(void);
@@ -138,7 +128,8 @@ const ErtsMsegOpt_t erts_mseg_default_opt = {
     1,			/* Use cache		     */
     1,			/* Preserv data		     */
     0,			/* Absolute shrink threshold */
-    0			/* Relative shrink threshold */
+    0,			/* Relative shrink threshold */
+    0			/* Scheduler specific        */
 #if HALFWORD_HEAP
     ,0                  /* need low memory */
 #endif
@@ -157,11 +148,10 @@ typedef struct {
     Uint32 no;
 } CallCounter;
 
-static int is_init_done;
 static Uint page_size;
 static Uint page_shift;
 
-static struct {
+typedef struct {
     CallCounter alloc;
     CallCounter dealloc;
     CallCounter realloc;
@@ -172,7 +162,9 @@ static struct {
 #endif
     CallCounter clear_cache;
     CallCounter check_cache;
-} calls;
+} ErtsMsegCalls;
+
+typedef struct ErtsMsegAllctr_t_ ErtsMsegAllctr_t;
 
 struct mem_kind_t {
     cache_desc_t cache_descs[MAX_CACHE_SIZE];
@@ -201,25 +193,84 @@ struct mem_kind_t {
 	} max_ever;
     } segments;
 
+    ErtsMsegAllctr_t *ma;
     const char* name;
     MemKind* next;
 };/*MemKind*/
 
+struct ErtsMsegAllctr_t_ {
+    int ix;
+
+    int is_init_done;
+    int is_thread_safe;
+    erts_mtx_t mtx;
+
+    int is_cache_check_scheduled;
+
+    MemKind* mk_list;
+
 #if HALFWORD_HEAP
-static MemKind low_mem, hi_mem;
+    MemKind low_mem;
+    MemKind hi_mem;
 #else
-static MemKind the_mem;
+    MemKind the_mem;
 #endif
-static MemKind* mk_list = NULL;
 
-static Uint max_cache_size;
-static Uint abs_max_cache_bad_fit;
-static Uint rel_max_cache_bad_fit;
+    Uint max_cache_size;
+    Uint abs_max_cache_bad_fit;
+    Uint rel_max_cache_bad_fit;
+
+    ErtsMsegCalls calls;
 
 #if CAN_PARTLY_DESTROY
-static Uint min_seg_size;
+    Uint min_seg_size;
+#endif
+
+};
+
+typedef union {
+    ErtsMsegAllctr_t mseg_alloc;
+    char align__[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsMsegAllctr_t))];
+} ErtsAlgndMsegAllctr_t;
+
+static int no_mseg_allocators;
+static ErtsAlgndMsegAllctr_t *aligned_mseg_allctr;
+
+#ifdef ERTS_SMP
+
+#define ERTS_MSEG_ALLCTR_IX(IX) \
+  (&aligned_mseg_allctr[(IX)].mseg_alloc)
+
+#define ERTS_MSEG_ALLCTR_SS() \
+  ERTS_MSEG_ALLCTR_IX((int) erts_get_scheduler_id())
+
+#define ERTS_MSEG_ALLCTR_OPT(OPT) \
+  ((OPT)->sched_spec ? ERTS_MSEG_ALLCTR_SS() : ERTS_MSEG_ALLCTR_IX(0))
+
+#else
+
+#define ERTS_MSEG_ALLCTR_IX(IX) \
+  (&aligned_mseg_allctr[0].mseg_alloc)
+
+#define ERTS_MSEG_ALLCTR_SS() \
+  (&aligned_mseg_allctr[0].mseg_alloc)
+
+#define ERTS_MSEG_ALLCTR_OPT(OPT) \
+  (&aligned_mseg_allctr[0].mseg_alloc)
+
 #endif
 
+#define ERTS_MSEG_LOCK(MA)		\
+do {					\
+    if ((MA)->is_thread_safe)		\
+	erts_mtx_lock(&(MA)->mtx);	\
+} while (0)
+
+#define ERTS_MSEG_UNLOCK(MA)		\
+do {					\
+    if ((MA)->is_thread_safe)		\
+	erts_mtx_unlock(&(MA)->mtx);	\
+} while (0)
 
 #define ERTS_MSEG_ALLOC_STAT(C,SZ)					\
 do {									\
@@ -250,104 +301,44 @@ do {									\
 
 #define ONE_GIGA (1000000000)
 
-#define ZERO_CC(CC) (calls.CC.no = 0, calls.CC.giga_no = 0)
+#define ZERO_CC(MA, CC) ((MA)->calls.CC.no = 0,				\
+			 (MA)->calls.CC.giga_no = 0)
 
-#define INC_CC(CC) (calls.CC.no == ONE_GIGA - 1				\
-		    ? (calls.CC.giga_no++, calls.CC.no = 0)		\
-		    : calls.CC.no++)
+#define INC_CC(MA, CC) ((MA)->calls.CC.no == ONE_GIGA - 1		\
+			? ((MA)->calls.CC.giga_no++,			\
+			   (MA)->calls.CC.no = 0)			\
+			: (MA)->calls.CC.no++)
 
-#define DEC_CC(CC) (calls.CC.no == 0					\
-		    ? (calls.CC.giga_no--,				\
-		       calls.CC.no = ONE_GIGA - 1)			\
-		    : calls.CC.no--)
+#define DEC_CC(MA, CC) ((MA)->calls.CC.no == 0				\
+			? ((MA)->calls.CC.giga_no--,			\
+			   (MA)->calls.CC.no = ONE_GIGA - 1)		\
+			: (MA)->calls.CC.no--)
 
 
-static erts_mtx_t mseg_mutex; /* Also needed when !USE_THREADS */
 static erts_mtx_t init_atoms_mutex; /* Also needed when !USE_THREADS */
 
-#ifdef USE_THREADS
-#ifdef ERTS_THREADS_NO_SMP
-static erts_tid_t main_tid;
-static int async_handle = -1;
-#endif
-
-static void thread_safe_init(void)
-{
-    erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
-    erts_mtx_init(&mseg_mutex, "mseg");
-
-#ifdef ERTS_THREADS_NO_SMP
-    main_tid = erts_thr_self();
-#endif
-}
-
-#endif
-
-static ErlTimer cache_check_timer;
 
 static ERTS_INLINE void
-schedule_cache_check(void)
-{
-    if (!is_cache_check_scheduled && is_init_done) {
-#ifdef ERTS_THREADS_NO_SMP
-	if (!erts_equal_tids(erts_thr_self(), main_tid)) {
-	    if (!is_cache_check_requested) {
-		is_cache_check_requested = 1;
-		sys_async_ready(async_handle);
-	    }
-	}
-	else
-#endif
-	{
-	    cache_check_timer.active = 0;
-	    erts_set_timer(&cache_check_timer,
-			  check_cache,
-			  NULL,
-			  NULL,
-			  cache_check_interval);
-	    is_cache_check_scheduled = 1;
-#ifdef ERTS_THREADS_NO_SMP
-	    is_cache_check_requested = 0;
-#endif
-	}
-    }
-}
-
-#ifdef ERTS_THREADS_NO_SMP
-
-static void
-check_schedule_cache_check(void)
+schedule_cache_check(ErtsMsegAllctr_t *ma)
 {
-    erts_mtx_lock(&mseg_mutex);
-    if (is_cache_check_requested
-	&& !is_cache_check_scheduled) {
-	schedule_cache_check();
-    }
-    erts_mtx_unlock(&mseg_mutex);    
-}
-
-#endif
 
-static void
-mseg_shutdown(void)
-{
-    MemKind* mk;
-    erts_mtx_lock(&mseg_mutex);
-    for (mk=mk_list; mk; mk=mk->next) {
-	mseg_clear_cache(mk);
+    if (!ma->is_cache_check_scheduled && ma->is_init_done) {
+	erts_set_aux_work_timeout(ma->ix,
+				  ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK,
+				  1);
+	ma->is_cache_check_scheduled = 1;
     }
-    erts_mtx_unlock(&mseg_mutex);
 }
 
 static ERTS_INLINE void *
-mseg_create(MemKind* mk, Uint size)
+mseg_create(ErtsMsegAllctr_t *ma, MemKind* mk, Uint size)
 {
     void *seg;
 
     ASSERT(size % page_size == 0);
 
 #if HALFWORD_HEAP
-    if (mk == &low_mem) {
+    if (mk == &ma->low_mem) {
 	seg = pmmap(size);
 	if ((unsigned long) seg & CHECK_POINTER_MASK) {
 	    erts_fprintf(stderr,"Pointer mask failure (0x%08lx)\n",(unsigned long) seg);
@@ -371,18 +362,18 @@ mseg_create(MemKind* mk, Uint size)
 #endif
     }
 
-    INC_CC(create);
+    INC_CC(ma, create);
 
     return seg;
 }
 
 static ERTS_INLINE void
-mseg_destroy(MemKind* mk, void *seg, Uint size)
+mseg_destroy(ErtsMsegAllctr_t *ma, MemKind* mk, void *seg, Uint size)
 {
     int res;
 
 #if HALFWORD_HEAP
-    if (mk == &low_mem) {
+    if (mk == &ma->low_mem) {
 	res = pmunmap((void *) seg, size);
     }
     else
@@ -401,14 +392,14 @@ mseg_destroy(MemKind* mk, void *seg, Uint size)
     ASSERT(size % page_size == 0);
     ASSERT(res == 0);
 
-    INC_CC(destroy);
+    INC_CC(ma, destroy);
 
 }
 
 #if HAVE_MSEG_RECREATE
 
 static ERTS_INLINE void *
-mseg_recreate(MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
+mseg_recreate(ErtsMsegAllctr_t *ma, MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
 {
     void *new_seg;
 
@@ -416,7 +407,7 @@ mseg_recreate(MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
     ASSERT(new_size % page_size == 0);
 
 #if HALFWORD_HEAP
-    if (mk == &low_mem) {
+    if (mk == &ma->low_mem) {
 	new_seg = (void *) pmremap((void *) old_seg,
 				   (size_t) old_size,
 				   (size_t) new_size);
@@ -447,19 +438,39 @@ mseg_recreate(MemKind* mk, void *old_seg, Uint old_size, Uint new_size)
 #endif
     }
 
-    INC_CC(recreate);
+    INC_CC(ma, recreate);
 
     return new_seg;
 }
 
 #endif /* #if HAVE_MSEG_RECREATE */
 
+#ifdef DEBUG
+#define ERTS_DBG_MA_CHK_THR_ACCESS(MA)					\
+do {									\
+    if ((MA)->is_thread_safe)						\
+	ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&(MA)->mtx)		\
+		       || erts_smp_is_system_blocked(0)			\
+		       || (ERTS_IS_CRASH_DUMPING			\
+			   && erts_smp_is_system_blocked(ERTS_BS_FLG_ALLOW_GC)));\
+    else								\
+	ERTS_LC_ASSERT((MA)->ix == (int) erts_get_scheduler_id()	\
+		       || erts_smp_is_system_blocked(0)			\
+		       || (ERTS_IS_CRASH_DUMPING			\
+			   && erts_smp_is_system_blocked(ERTS_BS_FLG_ALLOW_GC)));\
+} while (0)
+#define ERTS_DBG_MK_CHK_THR_ACCESS(MK) \
+  ERTS_DBG_MA_CHK_THR_ACCESS((MK)->ma)
+#else
+#define ERTS_DBG_MA_CHK_THR_ACCESS(MA)
+#define ERTS_DBG_MK_CHK_THR_ACCESS(MK)
+#endif
 
 static ERTS_INLINE cache_desc_t * 
 alloc_cd(MemKind* mk)
 {    
     cache_desc_t *cd = mk->free_cache_descs;
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     if (cd)
 	mk->free_cache_descs = cd->next;
     return cd;
@@ -468,7 +479,7 @@ alloc_cd(MemKind* mk)
 static ERTS_INLINE void
 free_cd(MemKind* mk, cache_desc_t *cd)
 {
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     cd->next = mk->free_cache_descs;
     mk->free_cache_descs = cd;
 }
@@ -477,7 +488,7 @@ free_cd(MemKind* mk, cache_desc_t *cd)
 static ERTS_INLINE void
 link_cd(MemKind* mk, cache_desc_t *cd)
 {
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     if (mk->cache)
 	mk->cache->prev = cd;
     cd->next = mk->cache;
@@ -496,7 +507,7 @@ link_cd(MemKind* mk, cache_desc_t *cd)
 static ERTS_INLINE void
 end_link_cd(MemKind* mk, cache_desc_t *cd)
 {
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     if (mk->cache_end)
 	mk->cache_end->next = cd;
     cd->next = NULL;
@@ -515,7 +526,7 @@ end_link_cd(MemKind* mk, cache_desc_t *cd)
 static ERTS_INLINE void
 unlink_cd(MemKind* mk, cache_desc_t *cd)
 {
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     if (cd->next)
 	cd->next->prev = cd->prev;
     else
@@ -533,7 +544,7 @@ static ERTS_INLINE void
 check_cache_limits(MemKind* mk)
 {
     cache_desc_t *cd;
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     mk->max_cached_seg_size = 0;
     mk->min_cached_seg_size = ~((Uint) 0);
     for (cd = mk->cache; cd; cd = cd->next) {
@@ -551,7 +562,7 @@ adjust_cache_size(MemKind* mk, int force_check_limits)
     int check_limits = force_check_limits;
     Sint max_cached = ((Sint) mk->segments.current.watermark
 		       - (Sint) mk->segments.current.no);
-    ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mseg_mutex));
+    ERTS_DBG_MK_CHK_THR_ACCESS(mk);
     while (((Sint) mk->cache_size) > max_cached && ((Sint) mk->cache_size) > 0) {
 	ASSERT(mk->cache_end);
 	cd = mk->cache_end;
@@ -562,7 +573,7 @@ adjust_cache_size(MemKind* mk, int force_check_limits)
 	}
 	if (erts_mtrace_enabled)
 	    erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg);
-	mseg_destroy(mk, cd->seg, cd->size);
+	mseg_destroy(mk->ma, mk, cd->seg, cd->size);
 	unlink_cd(mk,cd);
 	free_cd(mk,cd);
     }
@@ -571,7 +582,7 @@ adjust_cache_size(MemKind* mk, int force_check_limits)
 	check_cache_limits(mk);
 }
 
-static void
+static Uint
 check_one_cache(MemKind* mk)
 {
     if (mk->segments.current.watermark > mk->segments.current.no)
@@ -579,23 +590,37 @@ check_one_cache(MemKind* mk)
     adjust_cache_size(mk, 0);
 
     if (mk->cache_size)
-	schedule_cache_check();
+	schedule_cache_check(mk->ma);
+    return mk->cache_size;
 }
 
-static void check_cache(void* unused)
+static void do_cache_check(ErtsMsegAllctr_t *ma)
 {
+    int empty_cache = 1;
     MemKind* mk;
-    erts_mtx_lock(&mseg_mutex);
 
-    is_cache_check_scheduled = 0;
+    ERTS_MSEG_LOCK(ma);
 
-    for (mk=mk_list; mk; mk=mk->next) {
-	check_one_cache(mk);
+    for (mk=ma->mk_list; mk; mk=mk->next) {
+	if (check_one_cache(mk))
+	    empty_cache = 0;
+    }
+
+    if (empty_cache) {
+	ma->is_cache_check_scheduled = 0;
+	erts_set_aux_work_timeout(ma->ix,
+				  ERTS_SSI_AUX_WORK_MSEG_CACHE_CHECK,
+				  0);
     }
 
-    INC_CC(check_cache);
+    INC_CC(ma, check_cache);
 
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_UNLOCK(ma);
+}
+
+void erts_mseg_cache_check(void)
+{
+    do_cache_check(ERTS_MSEG_ALLCTR_SS());
 }
 
 static void
@@ -611,42 +636,44 @@ mseg_clear_cache(MemKind* mk)
 
     mk->segments.current.watermark = mk->segments.current.no;
 
-    INC_CC(clear_cache);
+    INC_CC(mk->ma, clear_cache);
 }
 
-static ERTS_INLINE MemKind* memkind(const ErtsMsegOpt_t *opt)
+static ERTS_INLINE MemKind* memkind(ErtsMsegAllctr_t *ma,
+				    const ErtsMsegOpt_t *opt)
 {
 #if HALFWORD_HEAP
-    return opt->low_mem ? &low_mem : &hi_mem;
+    return opt->low_mem ? &ma->low_mem : &ma->hi_mem;
 #else
-    return &the_mem;
+    return &ma->the_mem;
 #endif
 }
 
 static void *
-mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
+mseg_alloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, Uint *size_p,
+	   const ErtsMsegOpt_t *opt)
 {
     Uint max, min, diff_size, size;
     cache_desc_t *cd, *cand_cd;
     void *seg;
-    MemKind* mk = memkind(opt);
+    MemKind* mk = memkind(ma, opt);
 
-    INC_CC(alloc);
+    INC_CC(ma, alloc);
 
     size = PAGE_CEILING(*size_p);
 
 #if CAN_PARTLY_DESTROY
-    if (size < min_seg_size)	
-	min_seg_size = size;
+    if (size < ma->min_seg_size)	
+	ma->min_seg_size = size;
 #endif
 
     if (!opt->cache) {
     create_seg:
 	adjust_cache_size(mk,0);
-	seg = mseg_create(mk, size);
+	seg = mseg_create(ma, mk, size);
 	if (!seg) {
 	    mseg_clear_cache(mk);
-	    seg = mseg_create(mk, size);
+	    seg = mseg_create(ma, mk, size);
 	    if (!seg)
 		size = 0;
 	}
@@ -667,10 +694,10 @@ mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
 
 	diff_size = mk->min_cached_seg_size - size;
 
-	if (diff_size > abs_max_cache_bad_fit)
+	if (diff_size > ma->abs_max_cache_bad_fit)
 	    goto create_seg;
 
-	if (100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size))
+	if (100*PAGES(diff_size) > ma->rel_max_cache_bad_fit*PAGES(size))
 	    goto create_seg;
 
     }
@@ -708,8 +735,8 @@ mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
 
     diff_size = cand_cd->size - size;
 
-    if (diff_size > abs_max_cache_bad_fit
-	|| 100*PAGES(diff_size) > rel_max_cache_bad_fit*PAGES(size)) {
+    if (diff_size > ma->abs_max_cache_bad_fit
+	|| 100*PAGES(diff_size) > ma->rel_max_cache_bad_fit*PAGES(size)) {
 	if (mk->max_cached_seg_size < cand_cd->size)
 	    mk->max_cached_seg_size = cand_cd->size;
 	if (mk->min_cached_seg_size > cand_cd->size)
@@ -740,18 +767,18 @@ mseg_alloc(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
 
 
 static void
-mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size,
+mseg_dealloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg, Uint size,
 	     const ErtsMsegOpt_t *opt)
 {
-    MemKind* mk = memkind(opt);
+    MemKind* mk = memkind(ma, opt);
     cache_desc_t *cd;
 
     ERTS_MSEG_DEALLOC_STAT(mk,size);
 
-    if (!opt->cache || max_cache_size == 0) {
+    if (!opt->cache || ma->max_cache_size == 0) {
 	if (erts_mtrace_enabled)
 	    erts_mtrace_crr_free(atype, SEGTYPE, seg);
-	mseg_destroy(mk, seg, size);
+	mseg_destroy(ma, mk, seg, size);
     }
     else {
 	int check_limits = 0;
@@ -769,7 +796,7 @@ mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size,
 	    }
 	    if (erts_mtrace_enabled)
 		erts_mtrace_crr_free(SEGTYPE, SEGTYPE, cd->seg);
-	    mseg_destroy(mk, cd->seg, cd->size);
+	    mseg_destroy(ma, mk, cd->seg, cd->size);
 	    unlink_cd(mk,cd);
 	    free_cd(mk,cd);
 	}
@@ -790,33 +817,34 @@ mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size,
 	if (check_limits)
 	    check_cache_limits(mk);
 
-	schedule_cache_check();
+	schedule_cache_check(ma);
 
     }
 
-    INC_CC(dealloc);
+    INC_CC(ma, dealloc);
 }
 
 static void *
-mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
-	     const ErtsMsegOpt_t *opt)
+mseg_realloc(ErtsMsegAllctr_t *ma, ErtsAlcType_t atype, void *seg,
+	     Uint old_size, Uint *new_size_p, const ErtsMsegOpt_t *opt)
 {
-    MemKind* mk = memkind(opt);
+    MemKind* mk;
     void *new_seg;
     Uint new_size;
 
     if (!seg || !old_size) {
-	new_seg = mseg_alloc(atype, new_size_p, opt);
-	DEC_CC(alloc);
+	new_seg = mseg_alloc(ma, atype, new_size_p, opt);
+	DEC_CC(ma, alloc);
 	return new_seg;
     }
 
     if (!(*new_size_p)) {
-	mseg_dealloc(atype, seg, old_size, opt);
-	DEC_CC(dealloc);
+	mseg_dealloc(ma, atype, seg, old_size, opt);
+	DEC_CC(ma, dealloc);
 	return NULL;
     }
 
+    mk = memkind(ma, opt);
     new_seg = seg;
     new_size = PAGE_CEILING(*new_size_p);
 
@@ -826,8 +854,8 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
 	Uint shrink_sz = old_size - new_size;
 
 #if CAN_PARTLY_DESTROY
-	if (new_size < min_seg_size)	
-	    min_seg_size = new_size;
+	if (new_size < ma->min_seg_size)	
+	    ma->min_seg_size = new_size;
 #endif
 
 	if (shrink_sz < opt->abs_shrink_th
@@ -838,7 +866,7 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
 
 #if CAN_PARTLY_DESTROY
 
-	    if (shrink_sz > min_seg_size
+	    if (shrink_sz > ma->min_seg_size
 		&& mk->free_cache_descs
 		&& opt->cache) {
 		cache_desc_t *cd;
@@ -857,7 +885,7 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
 					    new_size);
 		    erts_mtrace_crr_alloc(cd->seg, SEGTYPE, SEGTYPE, cd->size);
 		}
-		schedule_cache_check();
+		schedule_cache_check(ma);
 	    }
 	    else {
 		if (erts_mtrace_enabled)
@@ -866,7 +894,7 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
 					    SEGTYPE,
 					    seg,
 					    new_size);
-		mseg_destroy(mk, ((char *) seg) + new_size, shrink_sz);
+		mseg_destroy(ma, mk, ((char *) seg) + new_size, shrink_sz);
 	    }
 
 #elif HAVE_MSEG_RECREATE
@@ -875,14 +903,14 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
 
 #else
 
-	    new_seg = mseg_alloc(atype, &new_size, opt);
+	    new_seg = mseg_alloc(ma, atype, &new_size, opt);
 	    if (!new_seg)
 		new_size = old_size;
 	    else {
 		sys_memcpy(((char *) new_seg),
 			   ((char *) seg),
 			   MIN(new_size, old_size));
-		mseg_dealloc(atype, seg, old_size, opt);
+		mseg_dealloc(ma, atype, seg, old_size, opt);
 	    }
 
 #endif
@@ -892,34 +920,34 @@ mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size, Uint *new_size_p,
     else {
 
 	if (!opt->preserv) {
-	    mseg_dealloc(atype, seg, old_size, opt);
-	    new_seg = mseg_alloc(atype, &new_size, opt);
+	    mseg_dealloc(ma, atype, seg, old_size, opt);
+	    new_seg = mseg_alloc(ma, atype, &new_size, opt);
 	}
 	else {
 #if HAVE_MSEG_RECREATE
 #if !CAN_PARTLY_DESTROY
 	do_recreate:
 #endif
-	    new_seg = mseg_recreate(mk, (void *) seg, old_size, new_size);
+	    new_seg = mseg_recreate(ma, mk, (void *) seg, old_size, new_size);
 	    if (erts_mtrace_enabled)
 		erts_mtrace_crr_realloc(new_seg, atype, SEGTYPE, seg, new_size);
 	    if (!new_seg)
 		new_size = old_size;
 #else
-	    new_seg = mseg_alloc(atype, &new_size, opt);
+	    new_seg = mseg_alloc(ma, atype, &new_size, opt);
 	    if (!new_seg)
 		new_size = old_size;
 	    else {
 		sys_memcpy(((char *) new_seg),
 			   ((char *) seg),
 			   MIN(new_size, old_size));
-		mseg_dealloc(atype, seg, old_size, opt);
+		mseg_dealloc(ma, atype, seg, old_size, opt);
 	    }
 #endif
 	}
     }
 
-    INC_CC(realloc);
+    INC_CC(ma, realloc);
 
     *new_size_p = new_size;
 
@@ -937,7 +965,6 @@ static struct {
     Eterm amcbf;
     Eterm rmcbf;
     Eterm mcs;
-    Eterm cci;
 
     Eterm memkind;
     Eterm name;
@@ -973,13 +1000,13 @@ static void ERTS_INLINE atom_init(Eterm *atom, char *name)
 #define AM_INIT(AM) atom_init(&am.AM, #AM)
 
 static void
-init_atoms(void)
+init_atoms(ErtsMsegAllctr_t *ma)
 {
 #ifdef DEBUG
     Eterm *atom;
 #endif
 
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_UNLOCK(ma);
     erts_mtx_lock(&init_atoms_mutex);
 
     if (!atoms_initialized) {
@@ -997,7 +1024,6 @@ init_atoms(void)
 	AM_INIT(amcbf);
 	AM_INIT(rmcbf);
 	AM_INIT(mcs);
-	AM_INIT(cci);
 
 	AM_INIT(status);
 	AM_INIT(cached_segments);
@@ -1025,7 +1051,7 @@ init_atoms(void)
 #endif
     }
 
-    erts_mtx_lock(&mseg_mutex);
+    ERTS_MSEG_LOCK(ma);
     atoms_initialized = 1;
     erts_mtx_unlock(&init_atoms_mutex);
 }
@@ -1082,7 +1108,8 @@ add_4tup(Uint **hpp, Uint *szp, Eterm *lp,
 }
 
 static Eterm
-info_options(char *prefix,
+info_options(ErtsMsegAllctr_t *ma,
+	     char *prefix,
 	     int *print_to_p,
 	     void *print_to_arg,
 	     Uint **hpp,
@@ -1093,30 +1120,26 @@ info_options(char *prefix,
     if (print_to_p) {
 	int to = *print_to_p;
 	void *arg = print_to_arg;
-	erts_print(to, arg, "%samcbf: %beu\n", prefix, abs_max_cache_bad_fit);
-	erts_print(to, arg, "%srmcbf: %beu\n", prefix, rel_max_cache_bad_fit);
-	erts_print(to, arg, "%smcs: %beu\n", prefix, max_cache_size);
-	erts_print(to, arg, "%scci: %beu\n", prefix, cache_check_interval);
+	erts_print(to, arg, "%samcbf: %beu\n", prefix, ma->abs_max_cache_bad_fit);
+	erts_print(to, arg, "%srmcbf: %beu\n", prefix, ma->rel_max_cache_bad_fit);
+	erts_print(to, arg, "%smcs: %beu\n", prefix, ma->max_cache_size);
     }
 
     if (hpp || szp) {
 
 	if (!atoms_initialized)
-	    init_atoms();
+	    init_atoms(ma);
 
 	res = NIL;
 	add_2tup(hpp, szp, &res,
-		 am.cci,
-		 bld_uint(hpp, szp, cache_check_interval));
-	add_2tup(hpp, szp, &res,
 		 am.mcs,
-		 bld_uint(hpp, szp, max_cache_size));
+		 bld_uint(hpp, szp, ma->max_cache_size));
 	add_2tup(hpp, szp, &res,
 		 am.rmcbf,
-		 bld_uint(hpp, szp, rel_max_cache_bad_fit));
+		 bld_uint(hpp, szp, ma->rel_max_cache_bad_fit));
 	add_2tup(hpp, szp, &res,
 		 am.amcbf,
-		 bld_uint(hpp, szp, abs_max_cache_bad_fit));
+		 bld_uint(hpp, szp, ma->abs_max_cache_bad_fit));
 
     }
 
@@ -1124,18 +1147,18 @@ info_options(char *prefix,
 }
 
 static Eterm
-info_calls(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+info_calls(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
 {
     Eterm res = THE_NON_VALUE;
 
     if (print_to_p) {
 
-#define PRINT_CC(TO, TOA, CC)						\
-    if (calls.CC.giga_no == 0)						\
-	erts_print(TO, TOA, "mseg_%s calls: %b32u\n", #CC, calls.CC.no);	\
-    else								\
+#define PRINT_CC(TO, TOA, CC)							\
+    if (ma->calls.CC.giga_no == 0)						\
+	erts_print(TO, TOA, "mseg_%s calls: %b32u\n", #CC, ma->calls.CC.no);	\
+    else									\
 	erts_print(TO, TOA, "mseg_%s calls: %b32u%09b32u\n", #CC,		\
-		   calls.CC.giga_no, calls.CC.no)
+		   ma->calls.CC.giga_no, ma->calls.CC.no)
 
 	int to = *print_to_p;
 	void *arg = print_to_arg;
@@ -1161,48 +1184,48 @@ info_calls(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
 
 	add_3tup(hpp, szp, &res,
 		 am.mseg_check_cache,
-		 bld_unstable_uint(hpp, szp, calls.check_cache.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.check_cache.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.check_cache.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.check_cache.no));
 	add_3tup(hpp, szp, &res,
 		 am.mseg_clear_cache,
-		 bld_unstable_uint(hpp, szp, calls.clear_cache.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.clear_cache.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.clear_cache.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.clear_cache.no));
 
 #if HAVE_MSEG_RECREATE
 	add_3tup(hpp, szp, &res,
 		 am.mseg_recreate,
-		 bld_unstable_uint(hpp, szp, calls.recreate.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.recreate.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.recreate.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.recreate.no));
 #endif
 	add_3tup(hpp, szp, &res,
 		 am.mseg_destroy,
-		 bld_unstable_uint(hpp, szp, calls.destroy.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.destroy.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.destroy.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.destroy.no));
 	add_3tup(hpp, szp, &res,
 		 am.mseg_create,
-		 bld_unstable_uint(hpp, szp, calls.create.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.create.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.create.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.create.no));
 
 
 	add_3tup(hpp, szp, &res,
 		 am.mseg_realloc,
-		 bld_unstable_uint(hpp, szp, calls.realloc.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.realloc.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.realloc.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.realloc.no));
 	add_3tup(hpp, szp, &res,
 		 am.mseg_dealloc,
-		 bld_unstable_uint(hpp, szp, calls.dealloc.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.dealloc.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.dealloc.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.dealloc.no));
 	add_3tup(hpp, szp, &res,
 		 am.mseg_alloc,
-		 bld_unstable_uint(hpp, szp, calls.alloc.giga_no),
-		 bld_unstable_uint(hpp, szp, calls.alloc.no));
+		 bld_unstable_uint(hpp, szp, ma->calls.alloc.giga_no),
+		 bld_unstable_uint(hpp, szp, ma->calls.alloc.no));
     }
 
     return res;
 }
 
 static Eterm
-info_status(MemKind* mk, int *print_to_p, void *print_to_arg,
+info_status(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
 	    int begin_new_max_period, Uint **hpp, Uint *szp)
 {
     Eterm res = THE_NON_VALUE;
@@ -1258,7 +1281,7 @@ info_status(MemKind* mk, int *print_to_p, void *print_to_arg,
     return res;
 }
 
-static Eterm info_memkind(MemKind* mk, int *print_to_p, void *print_to_arg,
+static Eterm info_memkind(ErtsMsegAllctr_t *ma, MemKind* mk, int *print_to_p, void *print_to_arg,
 			  int begin_max_per, Uint **hpp, Uint *szp)
 {
     Eterm res = THE_NON_VALUE;
@@ -1274,8 +1297,8 @@ static Eterm info_memkind(MemKind* mk, int *print_to_p, void *print_to_arg,
 	atoms[2] = am.calls;
 	values[0] = erts_bld_string(hpp, szp, mk->name);
     }
-    values[1] = info_status(mk, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-    values[2] = info_calls(print_to_p, print_to_arg, hpp, szp);
+    values[1] = info_status(ma, mk, print_to_p, print_to_arg, begin_max_per, hpp, szp);
+    values[2] = info_calls(ma, print_to_p, print_to_arg, hpp, szp);
 
     if (hpp || szp)
 	res = bld_2tup_list(hpp, szp, 3, atoms, values);
@@ -1285,7 +1308,7 @@ static Eterm info_memkind(MemKind* mk, int *print_to_p, void *print_to_arg,
 
 
 static Eterm
-info_version(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
+info_version(ErtsMsegAllctr_t *ma, int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
 {
     Eterm res = THE_NON_VALUE;
 
@@ -1306,56 +1329,64 @@ info_version(int *print_to_p, void *print_to_arg, Uint **hpp, Uint *szp)
 \*                                                                           */
 
 Eterm
-erts_mseg_info_options(int *print_to_p, void *print_to_arg,
+erts_mseg_info_options(int ix,
+		       int *print_to_p, void *print_to_arg,
 		       Uint **hpp, Uint *szp)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix);
     Eterm res;
 
-    erts_mtx_lock(&mseg_mutex);
+    ERTS_MSEG_LOCK(ma);
 
-    res = info_options("option ", print_to_p, print_to_arg, hpp, szp);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
 
-    erts_mtx_unlock(&mseg_mutex);
+    res = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
+
+    ERTS_MSEG_UNLOCK(ma);
 
     return res;
 }
 
 Eterm
-erts_mseg_info(int *print_to_p,
+erts_mseg_info(int ix,
+	       int *print_to_p,
 	       void *print_to_arg,
 	       int begin_max_per,
 	       Uint **hpp,
 	       Uint *szp)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(ix);
     Eterm res = THE_NON_VALUE;
     Eterm atoms[4];
     Eterm values[4];
     Uint n = 0;
 
-    erts_mtx_lock(&mseg_mutex);
+    ERTS_MSEG_LOCK(ma);
+
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
 
     if (hpp || szp) {
 	
 	if (!atoms_initialized)
-	    init_atoms();
+	    init_atoms(ma);
 
 	atoms[0] = am.version;
 	atoms[1] = am.options;
 	atoms[2] = am.memkind;
 	atoms[3] = am.memkind;
     }
-    values[n++] = info_version(print_to_p, print_to_arg, hpp, szp);
-    values[n++] = info_options("option ", print_to_p, print_to_arg, hpp, szp);
+    values[n++] = info_version(ma, print_to_p, print_to_arg, hpp, szp);
+    values[n++] = info_options(ma, "option ", print_to_p, print_to_arg, hpp, szp);
 #if HALFWORD_HEAP
-    values[n++] = info_memkind(&low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
-    values[n++] = info_memkind(&hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
+    values[n++] = info_memkind(ma, &ma->low_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
+    values[n++] = info_memkind(ma, &ma->hi_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
 #else
-    values[n++] = info_memkind(&the_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
+    values[n++] = info_memkind(ma, &ma->the_mem, print_to_p, print_to_arg, begin_max_per, hpp, szp);
 #endif
     if (hpp || szp)
 	res = bld_2tup_list(hpp, szp, n, atoms, values);
 
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_UNLOCK(ma);
 
     return res;
 }
@@ -1363,10 +1394,12 @@ erts_mseg_info(int *print_to_p,
 void *
 erts_mseg_alloc_opt(ErtsAlcType_t atype, Uint *size_p, const ErtsMsegOpt_t *opt)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
     void *seg;
-    erts_mtx_lock(&mseg_mutex);
-    seg = mseg_alloc(atype, size_p, opt);
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    seg = mseg_alloc(ma, atype, size_p, opt);
+    ERTS_MSEG_UNLOCK(ma);
     return seg;
 }
 
@@ -1377,12 +1410,14 @@ erts_mseg_alloc(ErtsAlcType_t atype, Uint *size_p)
 }
 
 void
-erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg, Uint size,
-		      const ErtsMsegOpt_t *opt)
+erts_mseg_dealloc_opt(ErtsAlcType_t atype, void *seg,
+		      Uint size, const ErtsMsegOpt_t *opt)
 {
-    erts_mtx_lock(&mseg_mutex);
-    mseg_dealloc(atype, seg, size, opt);
-    erts_mtx_unlock(&mseg_mutex);
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    mseg_dealloc(ma, atype, seg, size, opt);
+    ERTS_MSEG_UNLOCK(ma);
 }
 
 void
@@ -1392,44 +1427,60 @@ erts_mseg_dealloc(ErtsAlcType_t atype, void *seg, Uint size)
 }
 
 void *
-erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg, Uint old_size,
-		      Uint *new_size_p, const ErtsMsegOpt_t *opt)
+erts_mseg_realloc_opt(ErtsAlcType_t atype, void *seg,
+		      Uint old_size, Uint *new_size_p,
+		      const ErtsMsegOpt_t *opt)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
     void *new_seg;
-    erts_mtx_lock(&mseg_mutex);
-    new_seg = mseg_realloc(atype, seg, old_size, new_size_p, opt);
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    new_seg = mseg_realloc(ma, atype, seg, old_size, new_size_p, opt);
+    ERTS_MSEG_UNLOCK(ma);
     return new_seg;
 }
 
 void *
-erts_mseg_realloc(ErtsAlcType_t atype, void *seg, Uint old_size,
-		  Uint *new_size_p)
+erts_mseg_realloc(ErtsAlcType_t atype, void *seg,
+		  Uint old_size, Uint *new_size_p)
 {
-    return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p, &erts_mseg_default_opt);
+    return erts_mseg_realloc_opt(atype, seg, old_size, new_size_p,
+				 &erts_mseg_default_opt);
 }
 
 void
 erts_mseg_clear_cache(void)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_SS();
     MemKind* mk;
-    erts_mtx_lock(&mseg_mutex);
-    for (mk=mk_list; mk; mk=mk->next) {
+
+start:
+
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    for (mk=ma->mk_list; mk; mk=mk->next) {
 	mseg_clear_cache(mk);
     }
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_UNLOCK(ma);
+
+    if (ma->ix != 0) {
+	ma = ERTS_MSEG_ALLCTR_IX(0);
+	goto start;
+    }
 }
 
 Uint
-erts_mseg_no(void)
+erts_mseg_no(const ErtsMsegOpt_t *opt)
 {
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_OPT(opt);
     MemKind* mk;
     Uint n = 0;
-    erts_mtx_lock(&mseg_mutex);
-    for (mk=mk_list; mk; mk=mk->next) {
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    for (mk=ma->mk_list; mk; mk=mk->next) {
 	n += mk->segments.current.no;
     }
-    erts_mtx_unlock(&mseg_mutex);
+    ERTS_MSEG_UNLOCK(ma);
     return n;
 }
 
@@ -1439,7 +1490,7 @@ erts_mseg_unit_size(void)
     return page_size;
 }
 
-static void mem_kind_init(MemKind* mk, const char* name)
+static void mem_kind_init(ErtsMsegAllctr_t *ma, MemKind* mk, const char* name)
 {
     unsigned i;
 
@@ -1450,10 +1501,10 @@ static void mem_kind_init(MemKind* mk, const char* name)
     mk->cache_size = 0;
     mk->cache_hits = 0;
 
-    if (max_cache_size > 0) {
-	for (i = 0; i < max_cache_size - 1; i++)
+    if (ma->max_cache_size > 0) {
+	for (i = 0; i < ma->max_cache_size - 1; i++)
 	    mk->cache_descs[i].next = &mk->cache_descs[i + 1];
-	mk->cache_descs[max_cache_size - 1].next = NULL;
+	mk->cache_descs[ma->max_cache_size - 1].next = NULL;
 	mk->free_cache_descs = &mk->cache_descs[0];
     }
     else
@@ -1467,30 +1518,38 @@ static void mem_kind_init(MemKind* mk, const char* name)
     mk->segments.max_ever.no = 0;
     mk->segments.max_ever.sz = 0;
 
+    mk->ma = ma;
     mk->name = name;
-    mk->next = mk_list;
-    mk_list = mk;
+    mk->next = ma->mk_list;
+    ma->mk_list = mk;
 }
 
 
+
+
 void
 erts_mseg_init(ErtsMsegInit_t *init)
 {
-    atoms_initialized = 0;
-    is_init_done = 0;
+    int i;
+    UWord x;
 
-    /* Options ... */
+#ifdef ERTS_SMP
+    no_mseg_allocators = init->nos + 1;
+#else
+    no_mseg_allocators = 1;
+#endif
 
-    abs_max_cache_bad_fit	= init->amcbf;
-    rel_max_cache_bad_fit	= init->rmcbf;
-    max_cache_size		= init->mcs;
-    cache_check_interval	= init->cci;
+    x = (UWord) malloc(sizeof(ErtsAlgndMsegAllctr_t)
+		       *no_mseg_allocators
+		       + (ERTS_CACHE_LINE_SIZE-1));
+    if (x & ERTS_CACHE_LINE_MASK)
+	x = (x & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE;
+    ASSERT((x & ERTS_CACHE_LINE_MASK) == 0);
+    aligned_mseg_allctr = (ErtsAlgndMsegAllctr_t *) x;
 
-    /* */
+    atoms_initialized = 0;
 
-#ifdef USE_THREADS
-    thread_safe_init();
-#endif
+    erts_mtx_init(&init_atoms_mutex, "mseg_init_atoms");
 
 #if HAVE_MMAP && !defined(MAP_ANON)
     mmap_fd = open("/dev/zero", O_RDWR);
@@ -1512,34 +1571,55 @@ erts_mseg_init(ErtsMsegInit_t *init)
 	page_shift++;
     }
 
-    sys_memzero((void *) &calls, sizeof(calls));
+    for (i = 0; i < no_mseg_allocators; i++) {
+	ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(i);
 
-#if CAN_PARTLY_DESTROY
-    min_seg_size = ~((Uint) 0);
-#endif
+	ma->ix = i;
+
+	ma->is_init_done = 0;
+
+	if (i != 0)
+	    ma->is_thread_safe = 0;
+	else {
+	    ma->is_thread_safe = 1;
+	    erts_mtx_init(&ma->mtx, "mseg");
+	}
+
+	ma->is_cache_check_scheduled = 0;
+
+	/* Options ... */
+
+	ma->abs_max_cache_bad_fit = init->amcbf;
+	ma->rel_max_cache_bad_fit = init->rmcbf;
+	ma->max_cache_size = init->mcs;
 
-    if (max_cache_size > MAX_CACHE_SIZE)
-	max_cache_size = MAX_CACHE_SIZE;
+	if (ma->max_cache_size > MAX_CACHE_SIZE)
+	    ma->max_cache_size = MAX_CACHE_SIZE;
+
+	ma->mk_list = NULL;
 
 #if HALFWORD_HEAP
-    mem_kind_init(&low_mem, "low memory");
-    mem_kind_init(&hi_mem, "high memory");
+	mem_kind_init(ma, &ma->low_mem, "low memory");
+	mem_kind_init(ma, &ma->hi_mem, "high memory");
 #else
-    mem_kind_init(&the_mem, "all memory");
+	mem_kind_init(ma, &ma->the_mem, "all memory");
 #endif
 
-    is_cache_check_scheduled = 0;
-#ifdef ERTS_THREADS_NO_SMP
-    is_cache_check_requested = 0;
+	sys_memzero((void *) &ma->calls, sizeof(ErtsMsegCalls));
+
+#if CAN_PARTLY_DESTROY
+	ma->min_seg_size = ~((Uint) 0);
 #endif
+    }
 }
 
 
-static ERTS_INLINE Uint tot_cache_size(void)
+static ERTS_INLINE Uint tot_cache_size(ErtsMsegAllctr_t *ma)
 {
     MemKind* mk;
     Uint sz = 0;
-    for (mk=mk_list; mk; mk=mk->next) {
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    for (mk=ma->mk_list; mk; mk=mk->next) {
 	sz += mk->cache_size;
     }
     return sz;
@@ -1552,25 +1632,13 @@ static ERTS_INLINE Uint tot_cache_size(void)
 void
 erts_mseg_late_init(void)
 {
-#ifdef ERTS_THREADS_NO_SMP
-    int handle =
-	erts_register_async_ready_callback(
-	    check_schedule_cache_check);
-#endif
-    erts_mtx_lock(&mseg_mutex);
-    is_init_done = 1;
-#ifdef ERTS_THREADS_NO_SMP
-    async_handle = handle;
-#endif
-    if (tot_cache_size())
-	schedule_cache_check();
-    erts_mtx_unlock(&mseg_mutex);
-}
-
-void
-erts_mseg_exit(void)
-{
-    mseg_shutdown();
+    ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_SS();
+    ERTS_MSEG_LOCK(ma);
+    ERTS_DBG_MA_CHK_THR_ACCESS(ma);
+    ma->is_init_done = 1;
+    if (tot_cache_size(ma))
+	schedule_cache_check(ma);
+    ERTS_MSEG_UNLOCK(ma);
 }
 
 #endif /* #if HAVE_ERTS_MSEG */
@@ -1599,12 +1667,13 @@ erts_mseg_test(unsigned long op,
 	erts_mseg_clear_cache();
 	return (unsigned long) 0;
     case 0x405:
-	return (unsigned long) erts_mseg_no();
+	return (unsigned long) erts_mseg_no(&erts_mseg_default_opt);
     case 0x406: {
+	ErtsMsegAllctr_t *ma = ERTS_MSEG_ALLCTR_IX(0);
 	unsigned long res;
-	erts_mtx_lock(&mseg_mutex);
-	res = (unsigned long) tot_cache_size();
-	erts_mtx_unlock(&mseg_mutex);
+	ERTS_MSEG_LOCK(ma);
+	res = (unsigned long) tot_cache_size(ma);
+	ERTS_MSEG_UNLOCK(ma);
 	return res;
     }
 #else /* #if HAVE_ERTS_MSEG */
diff --git a/erts/emulator/sys/common/erl_mseg.h b/erts/emulator/sys/common/erl_mseg.h
index 8f116030a8..741080fb78 100644
--- a/erts/emulator/sys/common/erl_mseg.h
+++ b/erts/emulator/sys/common/erl_mseg.h
@@ -44,7 +44,7 @@ typedef struct {
     Uint amcbf;
     Uint rmcbf;
     Uint mcs;
-    Uint cci;
+    Uint nos;
 } ErtsMsegInit_t;
 
 #define ERTS_MSEG_INIT_DEFAULT_INITIALIZER				\
@@ -60,6 +60,7 @@ typedef struct {
     int  preserv;
     UWord abs_shrink_th;
     UWord rel_shrink_th;
+    int sched_spec;
 #if HALFWORD_HEAP
     int low_mem;
 #endif
@@ -75,14 +76,14 @@ void *erts_mseg_realloc(ErtsAlcType_t, void *, Uint, Uint *);
 void *erts_mseg_realloc_opt(ErtsAlcType_t, void *, Uint, Uint *,
 			    const ErtsMsegOpt_t *);
 void  erts_mseg_clear_cache(void);
-Uint  erts_mseg_no(void);
+void  erts_mseg_cache_check(void);
+Uint  erts_mseg_no( const ErtsMsegOpt_t *);
 Uint  erts_mseg_unit_size(void);
 void  erts_mseg_init(ErtsMsegInit_t *init);
 void  erts_mseg_late_init(void); /* Have to be called after all allocators,
 				   threads and timers have been initialized. */
-void  erts_mseg_exit(void);
-Eterm erts_mseg_info_options(int *, void*, Uint **, Uint *);
-Eterm erts_mseg_info(int *, void*, int, Uint **, Uint *);
+Eterm erts_mseg_info_options(int, int *, void*, Uint **, Uint *);
+Eterm erts_mseg_info(int, int *, void*, int, Uint **, Uint *);
 
 #endif /* #if HAVE_ERTS_MSEG */
 
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 9bd64f5908..80db2055a2 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -68,6 +68,7 @@
 #    endif
 #  endif
 #endif
+#include "erl_thr_progress.h"
 #include "erl_driver.h"
 #include "erl_alloc.h"
 
@@ -114,7 +115,7 @@
 #endif
 
 #define ERTS_POLL_USE_WAKEUP_PIPE \
-   (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP))
+   (ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(USE_THREADS))
 
 #ifdef ERTS_SMP
 
@@ -261,7 +262,6 @@ struct ErtsPollSet_ {
 #ifdef ERTS_SMP
     erts_atomic32_t polled;
     erts_smp_mtx_t mtx;
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
 #endif
 #if ERTS_POLL_USE_WAKEUP_PIPE
     int wake_fds[2];
@@ -269,10 +269,8 @@ struct ErtsPollSet_ {
 #if ERTS_POLL_USE_FALLBACK
     int fallback_used;
 #endif
-#ifdef ERTS_SMP
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
     erts_atomic32_t wakeup_state;
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    volatile int wakeup_state;
 #endif
     erts_smp_atomic32_t timeout;
 #ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
@@ -345,21 +343,16 @@ static void print_misc_debug_info(void);
 static ERTS_INLINE void
 reset_wakeup_state(ErtsPollSet ps)
 {
-#ifdef ERTS_SMP
-    erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
-    ERTS_THR_MEMORY_BARRIER;
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    ps->wakeup_state = 0;
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+    erts_atomic32_set_mb(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN);
 #endif
 }
 
 static ERTS_INLINE int
 is_woken(ErtsPollSet ps)
 {
-#ifdef ERTS_SMP
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
     return erts_atomic32_read_acqb(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN;
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    return ps->wakeup_state != ERTS_POLL_NOT_WOKEN;
 #else
     return 0;
 #endif
@@ -368,13 +361,9 @@ is_woken(ErtsPollSet ps)
 static ERTS_INLINE int
 is_interrupted_reset(ErtsPollSet ps)
 {
-#ifdef ERTS_SMP
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
     return (erts_atomic32_xchg_nob(&ps->wakeup_state, ERTS_POLL_NOT_WOKEN)
 	    == ERTS_POLL_WOKEN_INTR);
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    int res = ps->wakeup_state == ERTS_POLL_WOKEN_INTR;
-    ps->wakeup_state = ERTS_POLL_NOT_WOKEN;
-    return res;
 #else
     return 0;
 #endif
@@ -383,16 +372,13 @@ is_interrupted_reset(ErtsPollSet ps)
 static ERTS_INLINE void
 woke_up(ErtsPollSet ps)
 {
-#ifdef ERTS_SMP
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
     erts_aint32_t wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
     if (wakeup_state == ERTS_POLL_NOT_WOKEN)
 	(void) erts_atomic32_cmpxchg_nob(&ps->wakeup_state,
 					 ERTS_POLL_WOKEN,
 					 ERTS_POLL_NOT_WOKEN);
     ASSERT(erts_atomic32_read_nob(&ps->wakeup_state) != ERTS_POLL_NOT_WOKEN);
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    if (ps->wakeup_state == ERTS_POLL_NOT_WOKEN)
-	ps->wakeup_state = ERTS_POLL_WOKEN;
 #endif
 }
 
@@ -403,28 +389,27 @@ woke_up(ErtsPollSet ps)
 #if ERTS_POLL_USE_WAKEUP_PIPE
 
 static ERTS_INLINE void
-wake_poller(ErtsPollSet ps, int interrupted)
+wake_poller(ErtsPollSet ps, int interrupted, int async_signal_safe)
 {
-    int wake = 0;
-#ifdef ERTS_SMP
-    erts_aint32_t wakeup_state;
-    if (!interrupted)
-	wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state,
-						  ERTS_POLL_WOKEN,
-						  ERTS_POLL_NOT_WOKEN);
+    int wake;
+    if (async_signal_safe)
+	wake = 1;
     else {
-	/*
-	 * We might unnecessarily write to the pipe, however,
-	 * that isn't problematic.
-	 */
-	wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
-	erts_atomic32_set_relb(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
+	erts_aint32_t wakeup_state;
+	if (!interrupted)
+	    wakeup_state = erts_atomic32_cmpxchg_relb(&ps->wakeup_state,
+						      ERTS_POLL_WOKEN,
+						      ERTS_POLL_NOT_WOKEN);
+	else {
+	    /*
+	     * We might unnecessarily write to the pipe, however,
+	     * that isn't problematic.
+	     */
+	    wakeup_state = erts_atomic32_read_nob(&ps->wakeup_state);
+	    erts_atomic32_set_relb(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
+	}
+	wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
     }
-    wake = wakeup_state == ERTS_POLL_NOT_WOKEN;
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    wake = ps->wakeup_state == ERTS_POLL_NOT_WOKEN;
-    ps->wakeup_state = interrupted ? ERTS_POLL_WOKEN_INTR : ERTS_POLL_NOT_WOKEN;
-#endif
     /*
      * NOTE: This function might be called from signal handlers in the
      *       non-smp case; therefore, it has to be async-signal safe in
@@ -439,9 +424,17 @@ wake_poller(ErtsPollSet ps, int interrupted)
 	    res = write(ps->wake_fds[1], "!", 1);
 	} while (res < 0 && errno == EINTR);
 	if (res <= 0 && errno != ERRNO_BLOCK) {
-	    fatal_error_async_signal_safe(__FILE__
-					  ":XXX:wake_poller(): "
-					  "Failed to write on wakeup pipe\n");
+	    if (async_signal_safe)
+		fatal_error_async_signal_safe(__FILE__
+					      ":XXX:wake_poller(): "
+					      "Failed to write on wakeup pipe\n");
+	    else
+		fatal_error("%s:%d:wake_poller(): "
+			    "Failed to write to wakeup pipe fd=%d: "
+			    "%s (%d)\n",
+			    __FILE__, __LINE__,
+			    ps->wake_fds[1],
+			    erl_errno_id(errno), errno);
 	}
     }
 }
@@ -449,11 +442,18 @@ wake_poller(ErtsPollSet ps, int interrupted)
 static ERTS_INLINE void
 cleanup_wakeup_pipe(ErtsPollSet ps)
 {
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+    int intr = 0;
+#endif
     int fd = ps->wake_fds[0];
     int res;
     do {
 	char buf[32];
 	res = read(fd, buf, sizeof(buf));
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+	if (res > 0)
+	    intr = 1;
+#endif
     } while (res > 0 || (res < 0 && errno == EINTR));
     if (res < 0 && errno != ERRNO_BLOCK) {
 	fatal_error("%s:%d:cleanup_wakeup_pipe(): "
@@ -463,6 +463,10 @@ cleanup_wakeup_pipe(ErtsPollSet ps)
 		    fd,
 		    erl_errno_id(errno), errno);
     }
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+    if (intr)
+	erts_atomic32_set_nob(&ps->wakeup_state, ERTS_POLL_WOKEN_INTR);
+#endif
 }
 
 static void
@@ -1497,7 +1501,7 @@ ERTS_POLL_EXPORT(erts_poll_controlv)(ErtsPollSet ps,
 
 #ifdef ERTS_SMP
     if (final_do_wake)
-	wake_poller(ps, 0);
+	wake_poller(ps, 0, 0);
 #endif /* ERTS_SMP */
 
 }
@@ -1520,7 +1524,7 @@ ERTS_POLL_EXPORT(erts_poll_control)(ErtsPollSet ps,
 
 #ifdef ERTS_SMP
     if (*do_wake) {
-	wake_poller(ps, 0);
+	wake_poller(ps, 0, 0);
     }
 #endif /* ERTS_SMP */
 
@@ -1893,9 +1897,9 @@ save_poll_result(ErtsPollSet ps, ErtsPollResFd pr[], int max_res,
 }
 
 static ERTS_INLINE int
-check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
+check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res)
 {
-    ASSERT(!*ps_locked);
+    int res;
     if (erts_smp_atomic_read_nob(&ps->no_of_user_fds) == 0
 	&& tv->tv_usec == 0 && tv->tv_sec == 0) {
 	/* Nothing to poll and zero timeout; done... */
@@ -1915,16 +1919,23 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
 		timeout = INT_MAX;
 	    if (max_res > ps->res_events_len)
 		grow_res_events(ps, max_res);
-	    return epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout);
+#ifdef ERTS_SMP
+	    if (timeout)
+		erts_thr_progress_prepare_wait(NULL);
+#endif
+	    res = epoll_wait(ps->kp_fd, ps->res_events, max_res, (int)timeout);
 #elif ERTS_POLL_USE_KQUEUE	/* --- kqueue ------------------------------ */
 	    struct timespec ts;
-	    ts.tv_sec = tv->tv_sec;
-	    ts.tv_nsec = tv->tv_usec*1000;
 	    if (max_res > ps->res_events_len)
 		grow_res_events(ps, max_res);
-	    return kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
+#ifdef ERTS_SMP
+	    if (timeout)
+		erts_thr_progress_prepare_wait(NULL);
+#endif
+	    ts.tv_sec = tv->tv_sec;
+	    ts.tv_nsec = tv->tv_usec*1000;
+	    res = kevent(ps->kp_fd, NULL, 0, ps->res_events, max_res, &ts);
 #endif				/* ----------------------------------------- */
-
 	}
 	else /* use fallback (i.e. poll() or select()) */
 #endif /* ERTS_POLL_USE_FALLBACK */
@@ -1947,22 +1958,38 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
 	    if (poll_res.dp_nfds > ps->res_events_len)
 		grow_res_events(ps, poll_res.dp_nfds);
 	    poll_res.dp_fds = ps->res_events;
+#ifdef ERTS_SMP
+	    if (timeout)
+		erts_thr_progress_prepare_wait(NULL);
+#endif
 	    poll_res.dp_timeout = (int) timeout;
-	    return ioctl(ps->kp_fd, DP_POLL, &poll_res);
+	    res = ioctl(ps->kp_fd, DP_POLL, &poll_res);
 #elif ERTS_POLL_USE_POLL	/* --- poll -------------------------------- */
 	    if (timeout > INT_MAX)
 		timeout = INT_MAX;
-	    return poll(ps->poll_fds, ps->no_poll_fds, (int) timeout);
+#ifdef ERTS_SMP
+	    if (timeout)
+		erts_thr_progress_prepare_wait(NULL);
+#endif
+	    res = poll(ps->poll_fds, ps->no_poll_fds, (int) timeout);
 #elif ERTS_POLL_USE_SELECT	/* --- select ------------------------------ */
-	    int res;
+	    SysTimeval to = *tv;
+
 	    ps->res_input_fds = ps->input_fds;
 	    ps->res_output_fds = ps->output_fds;
+
+#ifdef ERTS_SMP
+	    if (to.tv_sec || to.tv_usec)
+		erts_thr_progress_prepare_wait(NULL);
+#endif
 	    res = select(ps->max_fd + 1,
 			 &ps->res_input_fds,
 			 &ps->res_output_fds,
 			 NULL,
-			 tv);
+			 &to);
 #ifdef ERTS_SMP
+	    if (to.tv_sec || to.tv_usec)
+		erts_thr_progress_finalize_wait(NULL);
 	    if (res < 0
 		&& errno == EBADF
 		&& ERTS_POLLSET_HAVE_UPDATE_REQUESTS(ps)) {
@@ -1978,15 +2005,16 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
 		 * have triggered, we fake an EAGAIN error and let the caller
 		 * restart us.
 		 */
-		SysTimeval zero_tv = {0, 0};
-		*ps_locked = 1;
+		to.tv_sec = 0;
+		to.tv_usec = 0;
 		ERTS_POLLSET_LOCK(ps);
 		handle_update_requests(ps);
+		ERTS_POLLSET_UNLOCK(ps);
 		res = select(ps->max_fd + 1,
 			     &ps->res_input_fds,
 			     &ps->res_output_fds,
 			     NULL,
-			     &zero_tv);
+			     &to);
 		if (res == 0) {
 		    errno = EAGAIN;
 		    res = -1;
@@ -1996,6 +2024,11 @@ check_fd_events(ErtsPollSet ps, SysTimeval *tv, int max_res, int *ps_locked)
 	    return res;
 #endif				/* ----------------------------------------- */
 	}
+#ifdef ERTS_SMP
+	if (timeout)
+	    erts_thr_progress_finalize_wait(NULL);
+#endif
+	return res;
     }
 }
 
@@ -2007,7 +2040,9 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
 {
     int res, no_fds;
     int ebadf = 0;
-    int ps_locked;
+#ifdef ERTS_SMP
+    int ps_locked = 0;
+#endif
     SysTimeval *tvp;
     SysTimeval itv;
 
@@ -2049,8 +2084,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
     }
 #endif
 
-    ps_locked = 0;
-    res = check_fd_events(ps, tvp, no_fds, &ps_locked);
+    res = check_fd_events(ps, tvp, no_fds);
 
     woke_up(ps);
 
@@ -2072,10 +2106,8 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
 #endif
 
 #ifdef ERTS_SMP
-	if (!ps_locked) {
-	    ps_locked = 1;
-	    ERTS_POLLSET_LOCK(ps);
-	}
+	ps_locked = 1;
+	ERTS_POLLSET_LOCK(ps);
 #endif
 
 	no_fds = save_poll_result(ps, pr, no_fds, res, ebadf);
@@ -2111,19 +2143,26 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
 void
 ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet ps, int set)
 {
-#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT || defined(ERTS_SMP)
-    /*
-     * NOTE: This function might be called from signal handlers in the
-     *       non-smp case; therefore, it has to be async-signal safe in
-     *       the non-smp case.
-     */
+#if defined(USE_THREADS)
     if (!set)
 	reset_wakeup_state(ps);
     else
-	wake_poller(ps, 1);
+	wake_poller(ps, 1, 0);
 #endif
 }
 
+#if ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
+void
+ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet ps)
+{
+    /*
+     * NOTE: This function is called from signal handlers, it,
+     *       therefore, it has to be async-signal safe.
+     */
+    wake_poller(ps, 1, 1);
+}
+#endif
+
 /*
  * erts_poll_interrupt_timed():
  *   If 'set' != 0, interrupt thread blocked in erts_poll_wait() if it
@@ -2139,7 +2178,7 @@ ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet ps,
 	reset_wakeup_state(ps);
     else {
 	if (erts_smp_atomic32_read_acqb(&ps->timeout) > (erts_aint32_t) msec)
-	    wake_poller(ps, 1);
+	    wake_poller(ps, 1, 0);
 #ifdef ERTS_POLL_COUNT_AVOIDED_WAKEUPS
 	else {
 	    if (ERTS_POLLSET_IS_POLLED(ps))
@@ -2266,10 +2305,8 @@ ERTS_POLL_EXPORT(erts_poll_create_pollset)(void)
     erts_atomic32_init_nob(&ps->polled, 0);
     erts_smp_mtx_init(&ps->mtx, "pollset");
 #endif
-#ifdef ERTS_SMP
+#if defined(USE_THREADS) || ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
     erts_atomic32_init_nob(&ps->wakeup_state, (erts_aint32_t) 0);
-#elif ERTS_POLL_ASYNC_INTERRUPT_SUPPORT
-    ps->wakeup_state = 0;
 #endif
 #if ERTS_POLL_USE_WAKEUP_PIPE
     create_wakeup_pipe(ps);
diff --git a/erts/emulator/sys/common/erl_poll.h b/erts/emulator/sys/common/erl_poll.h
index 725a77a152..e0296c6a33 100644
--- a/erts/emulator/sys/common/erl_poll.h
+++ b/erts/emulator/sys/common/erl_poll.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  * 
- * Copyright Ericsson AB 2006-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2006-2011. All Rights Reserved.
  * 
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -216,6 +216,9 @@ typedef struct {
 #endif
 } ErtsPollInfo;
 
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+void		ERTS_POLL_EXPORT(erts_poll_async_sig_interrupt)(ErtsPollSet);
+#endif
 void		ERTS_POLL_EXPORT(erts_poll_interrupt)(ErtsPollSet,
 						      int);
 void		ERTS_POLL_EXPORT(erts_poll_interrupt_timed)(ErtsPollSet,
diff --git a/erts/emulator/sys/unix/erl_unix_sys.h b/erts/emulator/sys/unix/erl_unix_sys.h
index d8d51b192c..9a5ed9f5bc 100644
--- a/erts/emulator/sys/unix/erl_unix_sys.h
+++ b/erts/emulator/sys/unix/erl_unix_sys.h
@@ -129,10 +129,12 @@
 #define HAVE_ERTS_CHECK_IO_DEBUG
 int erts_check_io_debug(void);
 
-
-#ifndef ENABLE_CHILD_WAITER_THREAD
+#ifndef ERTS_SMP
 #  undef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
 #  define ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+#endif
+
+#ifndef ENABLE_CHILD_WAITER_THREAD
 #  ifdef ERTS_SMP
 #    define ERTS_SMP_SCHEDULERS_NEED_TO_CHECK_CHILDREN
 void erts_check_children(void);
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index 82d2c64d81..15e110a6cc 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -263,6 +263,7 @@ int erts_use_kernel_poll = 0;
 struct {
     int (*select)(ErlDrvPort, ErlDrvEvent, int, int);
     int (*event)(ErlDrvPort, ErlDrvEvent, ErlDrvEventData);
+    void (*check_io_as_interrupt)(void);
     void (*check_io_interrupt)(int);
     void (*check_io_interrupt_tmd)(int, long);
     void (*check_io)(int);
@@ -302,6 +303,9 @@ init_check_io(void)
     if (erts_use_kernel_poll) {
 	io_func.select			= driver_select_kp;
 	io_func.event			= driver_event_kp;
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+	io_func.check_io_as_interrupt	= erts_check_io_async_sig_interrupt_kp;
+#endif
 	io_func.check_io_interrupt	= erts_check_io_interrupt_kp;
 	io_func.check_io_interrupt_tmd	= erts_check_io_interrupt_timed_kp;
 	io_func.check_io		= erts_check_io_kp;
@@ -314,6 +318,9 @@ init_check_io(void)
     else {
 	io_func.select			= driver_select_nkp;
 	io_func.event			= driver_event_nkp;
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+	io_func.check_io_as_interrupt	= erts_check_io_async_sig_interrupt_nkp;
+#endif
 	io_func.check_io_interrupt	= erts_check_io_interrupt_nkp;
 	io_func.check_io_interrupt_tmd	= erts_check_io_interrupt_timed_nkp;
 	io_func.check_io		= erts_check_io_nkp;
@@ -325,6 +332,11 @@ init_check_io(void)
     }
 }
 
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+#define ERTS_CHK_IO_AS_INTR()	(*io_func.check_io_as_interrupt)()
+#else
+#define ERTS_CHK_IO_AS_INTR()	(*io_func.check_io_interrupt)(1)
+#endif
 #define ERTS_CHK_IO_INTR	(*io_func.check_io_interrupt)
 #define ERTS_CHK_IO_INTR_TMD	(*io_func.check_io_interrupt_tmd)
 #define ERTS_CHK_IO		(*io_func.check_io)
@@ -339,6 +351,11 @@ init_check_io(void)
     max_files = erts_check_io_max_files();
 }
 
+#ifdef ERTS_POLL_NEED_ASYNC_INTERRUPT_SUPPORT
+#define ERTS_CHK_IO_AS_INTR()	erts_check_io_async_sig_interrupt()
+#else
+#define ERTS_CHK_IO_AS_INTR()	erts_check_io_interrupt(1)
+#endif
 #define ERTS_CHK_IO_INTR	erts_check_io_interrupt
 #define ERTS_CHK_IO_INTR_TMD	erts_check_io_interrupt_timed
 #define ERTS_CHK_IO		erts_check_io
@@ -346,13 +363,13 @@ init_check_io(void)
 
 #endif
 
-#ifdef ERTS_SMP
 void
 erts_sys_schedule_interrupt(int set)
 {
     ERTS_CHK_IO_INTR(set);
 }
 
+#ifdef ERTS_SMP
 void
 erts_sys_schedule_interrupt_timed(int set, long msec)
 {
@@ -731,7 +748,7 @@ break_requested(void)
       erl_exit(ERTS_INTR_EXIT, "");
 
   ERTS_SET_BREAK_REQUESTED;
-  ERTS_CHK_IO_INTR(1); /* Make sure we don't sleep in poll */
+  ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */
 }
 
 /* set up signal handlers for break and quit */
@@ -1145,7 +1162,7 @@ static RETSIGTYPE onchld(int signum)
     smp_sig_notify('C');
 #else
     children_died = 1;
-    ERTS_CHK_IO_INTR(1); /* Make sure we don't sleep in poll */
+    ERTS_CHK_IO_AS_INTR(); /* Make sure we don't sleep in poll */
 #endif
 }
 
@@ -2853,7 +2870,6 @@ erl_sys_schedule(int runnable)
     ERTS_CHK_IO(!runnable);
     ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING);
 #else
-    ERTS_CHK_IO_INTR(0);
     if (runnable) {
 	ERTS_CHK_IO(0);		/* Poll for I/O */
 	check_async_ready();	/* Check async completions */
diff --git a/erts/emulator/sys/vxworks/sys.c b/erts/emulator/sys/vxworks/sys.c
index c6e7b65f32..08c4f3f4e5 100644
--- a/erts/emulator/sys/vxworks/sys.c
+++ b/erts/emulator/sys/vxworks/sys.c
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  *
- * Copyright Ericsson AB 1997-2010. All Rights Reserved.
+ * Copyright Ericsson AB 1997-2011. All Rights Reserved.
  *
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -238,6 +238,12 @@ erl_sys_args(int* argc, char** argv)
     ASSERT(max_files <= erts_vxworks_max_files);
 }
 
+void
+erts_sys_schedule_interrupt(int set)
+{
+    erts_check_io_interrupt(set);
+}
+
 /*
  * Called from schedule() when it runs out of runnable processes,
  * or when Erlang code has performed INPUT_REDUCTIONS reduction
@@ -246,7 +252,6 @@ erl_sys_args(int* argc, char** argv)
 void
 erl_sys_schedule(int runnable)
 {	
-    erts_check_io_interrupt(0);
     erts_check_io(!runnable);
 }
 
@@ -309,7 +314,7 @@ static void request_break(void)
   fprintf(stderr,"break!\n");
 #endif
   erts_break_requested = 1;
-  erts_check_io_interrupt(1); /* Make sure we don't sleep in erts_poll_wait */
+  erts_check_io_async_sig_interrupt(1); /* Make sure we don't sleep in erts_poll_wait */
 }
 
 static void do_quit(void)
diff --git a/erts/emulator/sys/win32/erl_poll.c b/erts/emulator/sys/win32/erl_poll.c
index 735c420d8e..ab4ef05118 100644
--- a/erts/emulator/sys/win32/erl_poll.c
+++ b/erts/emulator/sys/win32/erl_poll.c
@@ -1159,7 +1159,13 @@ int erts_poll_wait(ErtsPollSet ps,
 
 	HARDDEBUGF(("Start waiting %d [%d]",num_h, (int) timeout));
 	ERTS_POLLSET_UNLOCK(ps);
+#ifdef ERTS_SMP
+	erts_thr_progress_prepare_wait(NULL);
+#endif
 	WaitForMultipleObjects(num_h, harr, FALSE, timeout);
+#ifdef ERTS_SMP
+	erts_thr_progress_finalize_wait(NULL);
+#endif
 	ERTS_POLLSET_LOCK(ps);
 	HARDDEBUGF(("Stop waiting %d [%d]",num_h, (int) timeout));
 	woke_up(ps);
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 3e151c26d5..a6b1606dd8 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -3360,13 +3360,13 @@ void erl_sys_init(void)
 		 SEM_FAILCRITICALERRORS | SEM_NOOPENFILEERRORBOX); 
 }
 
-#ifdef ERTS_SMP
 void
 erts_sys_schedule_interrupt(int set)
 {
     erts_check_io_interrupt(set);
 }
 
+#ifdef ERTS_SMP
 void
 erts_sys_schedule_interrupt_timed(int set, long msec)
 {
@@ -3386,7 +3386,6 @@ erl_sys_schedule(int runnable)
     erts_check_io(!runnable);
     ERTS_SMP_LC_ASSERT(!ERTS_LC_IS_BLOCKING);
 #else
-    erts_check_io_interrupt(0);
     if (runnable) {
 	erts_check_io(0);	/* Poll for I/O */
 	check_async_ready();	/* Check async completions */
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index a77ea4f3be..bcb0257ed1 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -75,7 +75,8 @@
 	 smp_select/1,
 	 driver_select_use/1,
 	 thread_mseg_alloc_cache_clean/1,
-	 otp_9302/1]).
+	 otp_9302/1,
+	 thr_free_drv/1]).
 
 -export([bin_prefix/2]).
 
@@ -143,7 +144,8 @@ all() ->
      otp_6879, caller, many_events, missing_callbacks,
      smp_select, driver_select_use,
      thread_mseg_alloc_cache_clean,
-     otp_9302].
+     otp_9302,
+     thr_free_drv].
 
 groups() -> 
     [{timer, [],
@@ -1792,7 +1794,7 @@ driver_select_use0(Config) ->
 
 thread_mseg_alloc_cache_clean(Config) when is_list(Config) ->
     case {erlang:system_info(threads),
-	  erlang:system_info({allocator,mseg_alloc}),
+	  mseg_inst_info(0),
 	  driver_alloc_sbct()} of
 	{_, false, _} ->
 	    ?line {skipped, "No mseg_alloc"};
@@ -1804,13 +1806,13 @@ thread_mseg_alloc_cache_clean(Config) when is_list(Config) ->
 	    ?line {skipped, "driver_alloc() using too large single block threshold"};
 	{_, _, 0} ->
 	    ?line {skipped, "driver_alloc() using too low single block threshold"};
-	{true, MsegAllocInfo, SBCT} ->
+	{true, _MsegAllocInfo, SBCT} ->
 	    ?line DrvName = 'thr_alloc_drv',
 	    ?line Path = ?config(data_dir, Config),
 	    ?line erl_ddll:start(),
 	    ?line ok = load_driver(Path, DrvName),   
 	    ?line Port = open_port({spawn, DrvName}, []),
-	    ?line CCI = mseg_alloc_cci(MsegAllocInfo),
+	    ?line CCI = 1000,
 	    ?line ?t:format("CCI = ~p~n", [CCI]),
 	    ?line CCC = mseg_alloc_ccc(),
 	    ?line ?t:format("CCC = ~p~n", [CCC]),
@@ -1831,7 +1833,7 @@ mseg_alloc_cci(MsegAllocInfo) ->
     ?line CCI.
 
 mseg_alloc_ccc() ->
-    mseg_alloc_ccc(erlang:system_info({allocator,mseg_alloc})).
+    mseg_alloc_ccc(mseg_inst_info(0)).
 
 mseg_alloc_ccc(MsegAllocInfo) ->
     ?line {value,{memkind, MKL}} = lists:keysearch(memkind,1,MsegAllocInfo),
@@ -1841,7 +1843,7 @@ mseg_alloc_ccc(MsegAllocInfo) ->
     ?line GigaCCC*1000000000 + CCC.
 
 mseg_alloc_cached_segments() ->
-    mseg_alloc_cached_segments(erlang:system_info({allocator,mseg_alloc})).
+    mseg_alloc_cached_segments(mseg_inst_info(0)).
 
 mseg_alloc_cached_segments(MsegAllocInfo) ->
     MemName = case is_halfword_vm() of
@@ -1859,6 +1861,13 @@ mseg_alloc_cached_segments(MsegAllocInfo) ->
 	= lists:keysearch(cached_segments, 1, SL),
     ?line CS.
 
+mseg_inst_info(I) ->
+    {value, {instance, I, Value}}
+	= lists:keysearch(I,
+			  2,
+			  erlang:system_info({allocator,mseg_alloc})),
+    Value.
+
 is_halfword_vm() ->
     case {erlang:system_info({wordsize, internal}),
 	  erlang:system_info({wordsize, external})} of
@@ -1914,6 +1923,38 @@ otp_9302(Config) when is_list(Config) ->
     ?line port_close(Port),
     ?line ok.
 
+thr_free_drv(Config) when is_list(Config) ->
+    ?line Path = ?config(data_dir, Config),
+    ?line erl_ddll:start(),
+    ?line ok = load_driver(Path, thr_free_drv),
+    ?line MemBefore = driver_alloc_size(),
+%    io:format("SID=~p", [erlang:system_info(scheduler_id)]),
+    ?line Port = open_port({spawn, thr_free_drv}, []),
+    ?line MemPeek = driver_alloc_size(),
+    ?line true = is_port(Port),
+    ?line ok = thr_free_drv_control(Port, 0),
+    ?line port_close(Port),
+    ?line MemAfter = driver_alloc_size(),
+    ?line io:format("MemPeek=~p~n", [MemPeek]),
+    ?line io:format("MemBefore=~p, MemAfter=~p~n", [MemBefore, MemAfter]),
+    ?line MemBefore = MemAfter,
+    ?line case MemPeek of
+	      undefined -> ok;
+	      _ ->
+		  ?line true = MemPeek > MemBefore
+	  end,
+    ?line ok.
+
+thr_free_drv_control(Port, N) ->
+    case erlang:port_control(Port, 0, "") of
+	"done" ->
+	    ok;
+	"more" ->
+	    erlang:yield(),
+%	    io:format("N=~p, SID=~p", [N, erlang:system_info(scheduler_id)]),
+	    thr_free_drv_control(Port, N+1)
+    end.
+
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 %% 		Utilities
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -2077,3 +2118,33 @@ start_node(Config) when is_list(Config) ->
 
 stop_node(Node) ->
     ?t:stop_node(Node).
+
+wait_deallocations() ->
+    try
+	erts_debug:set_internal_state(wait, deallocations)
+    catch error:undef ->
+	    erts_debug:set_internal_state(available_internal_state, true),
+	    wait_deallocations()
+    end.
+
+driver_alloc_size() ->
+    wait_deallocations(),
+    case erlang:system_info({allocator_sizes, driver_alloc}) of
+	false ->
+	    undefined;
+	MemInfo ->
+	    CS = lists:foldl(
+		   fun ({instance, _, L}, Acc) ->
+			   {value,{_,SBMBCS}} = lists:keysearch(sbmbcs, 1, L),
+			   {value,{_,MBCS}} = lists:keysearch(mbcs, 1, L),
+			   {value,{_,SBCS}} = lists:keysearch(sbcs, 1, L),
+			   [SBMBCS,MBCS,SBCS | Acc]
+		   end,
+		   [],
+		   MemInfo),
+	    lists:foldl(
+	      fun(L, Sz0) ->
+		      {value,{_,Sz,_,_}} = lists:keysearch(blocks_size, 1, L),
+		      Sz0+Sz
+	      end, 0, CS)
+    end.
diff --git a/erts/emulator/test/driver_SUITE_data/Makefile.src b/erts/emulator/test/driver_SUITE_data/Makefile.src
index 5b3ba1557e..62ab5169c0 100644
--- a/erts/emulator/test/driver_SUITE_data/Makefile.src
+++ b/erts/emulator/test/driver_SUITE_data/Makefile.src
@@ -12,7 +12,8 @@ MISC_DRVS =		outputv_drv@dll@ \
 			many_events_drv@dll@ \
 			missing_callback_drv@dll@ \
 			thr_alloc_drv@dll@ \
-			otp_9302_drv@dll@
+			otp_9302_drv@dll@ \
+			thr_free_drv@dll@
 
 SYS_INFO_DRVS = 	sys_info_1_0_drv@dll@ \
 			sys_info_1_1_drv@dll@ \
diff --git a/erts/emulator/test/driver_SUITE_data/thr_free_drv.c b/erts/emulator/test/driver_SUITE_data/thr_free_drv.c
new file mode 100644
index 0000000000..622a62ebea
--- /dev/null
+++ b/erts/emulator/test/driver_SUITE_data/thr_free_drv.c
@@ -0,0 +1,241 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2011. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include "erl_driver.h"
+
+#define BLOCKS_PER_THREAD 100000
+#define NO_THREADS 10
+#define BLOCKS_PER_CTRL 1000
+
+typedef struct {
+    ErlDrvMutex *mtx;
+    ErlDrvCond *cnd;
+    int b;
+    int *go;
+    int *skip;
+    void *blocks[BLOCKS_PER_THREAD];
+} test_thread_data;
+
+typedef struct {
+    ErlDrvPort port;
+    int b;
+    int go;
+    int skip;
+    test_thread_data ttd[NO_THREADS+1];
+    ErlDrvTid tids[NO_THREADS+1];    
+} test_data;
+
+static ErlDrvData start(ErlDrvPort port, char *command);
+static void stop(ErlDrvData data);
+static int control(ErlDrvData drv_data, unsigned int command, char *buf,
+		   int len, char **rbuf, int rlen);
+
+static ErlDrvEntry thr_free_drv_entry = { 
+    NULL /* init */,
+    start,
+    stop,
+    NULL /* output */,
+    NULL /* ready_input */,
+    NULL /* ready_output */,
+    "thr_free_drv",
+    NULL /* finish */,
+    NULL /* handle */,
+    control,
+    NULL /* timeout */,
+    NULL /* outputv */,
+    NULL /* ready_async */,
+    NULL /* flush */,
+    NULL /* call */,
+    NULL /* event */,
+    ERL_DRV_EXTENDED_MARKER,
+    ERL_DRV_EXTENDED_MAJOR_VERSION,
+    ERL_DRV_EXTENDED_MINOR_VERSION,
+    ERL_DRV_FLAG_USE_PORT_LOCKING,
+    NULL /* handle2 */,
+    NULL /* handle_monitor */
+};
+
+DRIVER_INIT(thr_free_drv)
+{
+    return &thr_free_drv_entry;
+}
+
+void *
+test_thread(void *vttd)
+{
+    test_thread_data *ttd = (test_thread_data *) vttd;
+    int i, skip;
+
+    erl_drv_mutex_lock(ttd->mtx);
+
+    while (!*ttd->go)
+	erl_drv_cond_wait(ttd->cnd, ttd->mtx);
+    skip = *ttd->skip;
+    erl_drv_mutex_unlock(ttd->mtx);
+
+    if (!skip) {
+	for (i = 0; i < BLOCKS_PER_THREAD; i++)
+	    driver_free(ttd->blocks[i]);
+    }
+    return NULL;
+}
+
+ErlDrvData start(ErlDrvPort port, char *command)
+{
+    int join = 0, t, b, res;
+    test_thread_data *ttd;
+    test_data *td = driver_alloc(sizeof(test_data));
+    if (!td)
+	return ERL_DRV_ERROR_GENERAL;
+    ttd = td->ttd;
+    for (b = 0; b < BLOCKS_PER_THREAD; b++)
+	for (t = 0; t <= NO_THREADS; t++)
+	    ttd[t].blocks[b] = NULL;
+    ttd[0].mtx = NULL;
+    ttd[0].cnd = NULL;
+
+    for (b = 0; b < BLOCKS_PER_THREAD; b++) {
+	for (t = 0; t <= NO_THREADS; t++) {
+	    ttd[t].blocks[b] = driver_alloc(1);
+	    if (ttd[t].blocks[b] == NULL)
+		goto fail;
+	}
+    }
+
+    td->b = -1;
+    td->go = 0;
+    td->skip = 0;
+
+    ttd[0].mtx = erl_drv_mutex_create("test_mutex");
+    if (!ttd[0].mtx)
+	goto fail;
+    ttd[0].cnd = erl_drv_cond_create("test_cnd");
+    if (!ttd[0].cnd)
+	goto fail;
+    ttd[0].go = &td->go;
+    ttd[0].skip = &td->skip;
+
+    for (t = 1; t <= NO_THREADS; t++) {
+	ttd[t].mtx = ttd[0].mtx;
+	ttd[t].cnd = ttd[0].cnd;
+	ttd[t].go = ttd[0].go;
+	ttd[t].skip = ttd[0].skip;
+	res = erl_drv_thread_create("test_thread",
+				    &td->tids[t],
+				    test_thread,
+				    &ttd[t],
+				    NULL);
+	if (res != 0)
+	    goto fail;
+	join = t;
+    }
+
+    td->port = port;
+
+    return (ErlDrvData) td;
+
+fail:
+
+    if (join) {
+	erl_drv_mutex_lock(ttd[0].mtx);
+	td->go = 1;
+	td->skip = 1;
+	erl_drv_cond_broadcast(ttd[0].cnd);
+	erl_drv_mutex_unlock(ttd[0].mtx);
+	for (t = 1; t <= join; t++)
+	    erl_drv_thread_join(td->tids[t], NULL);
+    }
+
+    if (ttd[0].mtx)
+	erl_drv_mutex_destroy(ttd[0].mtx);
+    if (ttd[0].cnd)
+	erl_drv_cond_destroy(ttd[0].cnd);
+
+    for (b = 0; b < BLOCKS_PER_THREAD; b++) {
+	for (t = 0; t <= NO_THREADS; t++) {
+	    if (ttd[t].blocks[b] != NULL)
+		driver_free(ttd[t].blocks[b]);
+	}
+    }
+
+    return ERL_DRV_ERROR_GENERAL;
+}
+
+static void stop(ErlDrvData drv_data)
+{
+    test_data *td = (test_data *) drv_data;
+    int t, b;
+    for (t = 1; t <= NO_THREADS; t++)
+	erl_drv_thread_join(td->tids[t], NULL);
+    for (b = 0; b < BLOCKS_PER_THREAD; b++) {
+	if (td->ttd[0].blocks[b])
+	    driver_free(td->ttd[0].blocks[b]);
+    }
+    erl_drv_mutex_destroy(td->ttd[0].mtx);
+    erl_drv_cond_destroy(td->ttd[0].cnd);
+    driver_free(td);
+}
+
+static int control(ErlDrvData drv_data, unsigned int command, char *buf,
+		   int len, char **rbuf, int rlen)
+{
+    test_data *td = (test_data *) drv_data;
+    char *result = "failure";
+    int i, b;
+    int res;
+    int result_len;
+
+    if (td->b == -1) {
+	erl_drv_mutex_lock(td->ttd[0].mtx);
+	td->go = 1;
+	erl_drv_cond_broadcast(td->ttd[0].cnd);
+	erl_drv_mutex_unlock(td->ttd[0].mtx);
+	td->b = 0;
+    }
+
+    for (i = 0, b = td->b; i < BLOCKS_PER_CTRL && b < BLOCKS_PER_THREAD; i++, b++) {
+	driver_free(td->ttd[0].blocks[b]);
+	td->ttd[0].blocks[b] = NULL;
+    }
+
+    td->b = b;
+    if (b >= BLOCKS_PER_THREAD)
+	result = "done";
+    else
+	result = "more";
+
+    result_len = strlen(result);
+    if (result_len <= rlen) {
+	memcpy(*rbuf, result, result_len);
+	return result_len;
+    }
+    else {
+	*rbuf = driver_alloc(result_len);
+	if (!*rbuf) {
+	    driver_failure_posix(td->port, ENOMEM);
+	    return 0;
+	}
+	else {
+	    memcpy(*rbuf, result, result_len);
+	    return result_len;
+	}
+    }
+}
diff --git a/erts/emulator/test/mtx_SUITE.erl b/erts/emulator/test/mtx_SUITE.erl
index e0a7878bd8..879d2f61dd 100644
--- a/erts/emulator/test/mtx_SUITE.erl
+++ b/erts/emulator/test/mtx_SUITE.erl
@@ -62,16 +62,29 @@ init_per_suite(Config) when is_list(Config) ->
     Config.
 
 end_per_suite(Config) when is_list(Config) ->
+    catch erts_debug:set_internal_state(available_internal_state, false),
     Config.
 
 init_per_testcase(_Case, Config) ->
     Dog = ?t:timetrap(?t:minutes(15)),
+    %% Wait for deallocations to complete since we measure
+    %% runtime in test cases.
+    wait_deallocations(),
     [{watchdog, Dog}|Config].
 
 end_per_testcase(_Func, Config) ->
     Dog = ?config(watchdog, Config),
     ?t:timetrap_cancel(Dog).
 
+wait_deallocations() ->
+    try
+	erts_debug:set_internal_state(wait, deallocations)
+    catch
+	error:undef ->
+	    erts_debug:set_internal_state(available_internal_state, true),
+	    wait_deallocations()
+    end.
+
 suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl
index 9b782b35a2..0350eb671d 100644
--- a/erts/emulator/test/system_info_SUITE.erl
+++ b/erts/emulator/test/system_info_SUITE.erl
@@ -37,7 +37,7 @@
 	 init_per_group/2,end_per_group/2, 
 	 init_per_testcase/2, end_per_testcase/2]).
 
--export([process_count/1, system_version/1, misc_smoke_tests/1, heap_size/1, wordsize/1]).
+-export([process_count/1, system_version/1, misc_smoke_tests/1, heap_size/1, wordsize/1, memory/1]).
 
 -define(DEFAULT_TIMEOUT, ?t:minutes(2)).
 
@@ -45,7 +45,7 @@ suite() -> [{ct_hooks,[ts_install_cth]}].
 
 all() -> 
     [process_count, system_version, misc_smoke_tests,
-     heap_size, wordsize].
+     heap_size, wordsize, memory].
 
 groups() -> 
     [].
@@ -187,3 +187,312 @@ wordsize(Config) when is_list(Config) ->
 	Other ->
 	    exit({unexpected_wordsizes,Other})
     end.
+
+memory(doc) -> ["Verify that erlang:memory/0 and memory results in crashdump produce are similar"];
+memory(Config) when is_list(Config) ->
+    %%
+    %% Verify that erlang:memory/0 and memory results in
+    %% crashdump produce are similar.
+    %%
+    %% erlang:memory/0 requests information from each scheduler
+    %% thread and puts the information together in erlang code
+    %% (erlang.erl).
+    %%
+    %% When a crash dump is written we cannot use the
+    %% erlang:memory/0 implementation. The crashdump implementation
+    %% is a pure C implementation inspecting all allocator instances
+    %% after the system has been blocked (erts_memory() in erl_alloc.c).
+    %%
+    %% Since we got two implementations, modifications can easily
+    %% cause them to produce different results.
+    %%
+    %% erts_debug:get_internal_state(memory) blocks the system and
+    %% execute the same code as the crash dump writing uses.
+    %%
+
+    erts_debug:set_internal_state(available_internal_state, true),
+    %% Use a large heap size on the controling process in
+    %% order to avoid changes in its heap size during
+    %% comparisons.
+    MinHeapSize = process_flag(min_heap_size, 1024*1024), 
+    Prio = process_flag(priority, max),
+    try
+	erlang:memory(), %% first call will init stat atoms
+	garbage_collect(), %% blow up heap
+	memory_test(Config)
+    catch
+	error:notsup -> {skipped, "erlang:memory() not supported"}
+    after
+	process_flag(min_heap_size, MinHeapSize),
+	process_flag(priority, Prio),
+	catch erts_debug:set_internal_state(available_internal_state, false)
+    end.
+
+memory_test(_Config) ->
+
+    MWs = spawn_mem_workers(),
+
+    DPs = mem_workers_call(MWs,
+			   fun () ->
+				   mapn(fun (_) ->
+						spawn(fun () ->
+							      receive
+							      after infinity ->
+								      ok
+							      end
+						      end)
+					end,
+					1000 div erlang:system_info(schedulers_online))
+			   end,
+			   []),
+    cmp_memory(MWs, "spawn procs"),
+
+    Ps = lists:flatten(DPs),
+
+    mem_workers_call(MWs, 
+		     fun () ->
+			     lists:foreach(fun (P) -> link(P) end, Ps)
+		     end,
+		     []),
+    cmp_memory(MWs, "link procs"),
+    mem_workers_call(MWs,
+		     fun () ->
+			     lists:foreach(fun (P) -> unlink(P) end, Ps)
+		     end,
+		     []),
+    cmp_memory(MWs, "unlink procs"),
+
+    DMs = mem_workers_call(MWs,
+			   fun () ->
+				   lists:map(fun (P) ->
+						     monitor(process, P)
+					     end, Ps)
+			   end,
+			   []),
+    cmp_memory(MWs, "monitor procs"),
+    Ms = lists:flatten(DMs),
+    mem_workers_call(MWs,
+		     fun () ->
+			     lists:foreach(fun (M) ->
+						   demonitor(M)
+					   end, Ms)
+		     end,
+		     []),
+    cmp_memory(MWs, "demonitor procs"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     lists:foreach(fun (P) ->
+						   P ! {a, "message", make_ref()}
+					   end, Ps)
+		     end,
+		     []),
+    cmp_memory(MWs, "message procs"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     Mons = lists:map(fun (P) ->
+						      exit(P, kill),
+						      monitor(process, P)
+					      end,
+					      Ps),
+			     lists:foreach(fun (Mon) ->
+						   receive
+						       {'DOWN', Mon, _, _, _} -> ok
+						   end
+					   end,
+					   Mons)
+		     end, []),
+    cmp_memory(MWs, "kill procs"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     put(binary_data,
+				 mapn(fun (_) -> list_to_binary(lists:duplicate(256,$?)) end, 100))
+		     end,
+		     []),
+
+    cmp_memory(MWs, "store binary data"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     put(binary_data, false),
+			     garbage_collect()
+		     end,
+		     []),
+    cmp_memory(MWs, "release binary data"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     list_to_atom("an ugly atom "++integer_to_list(erlang:system_info(scheduler_id))),
+			     list_to_atom("another ugly atom "++integer_to_list(erlang:system_info(scheduler_id))),
+			     list_to_atom("yet another ugly atom "++integer_to_list(erlang:system_info(scheduler_id)))
+		     end,
+		     []),
+    cmp_memory(MWs, "new atoms"),
+
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     T = ets:new(?MODULE, []),
+			     ets:insert(T, {gurka, lists:seq(1,10000)}),
+			     ets:insert(T, {banan, lists:seq(1,1024)}),
+			     ets:insert(T, {appelsin, make_ref()}),
+			     put(ets_id, T)
+		     end,
+		     []),
+    cmp_memory(MWs, "store ets data"),
+
+    mem_workers_call(MWs,
+		     fun () ->
+			     ets:delete(get(ets_id)),
+			     put(ets_id, false)
+		     end,
+		     []),
+    cmp_memory(MWs, "remove ets data"),
+
+    lists:foreach(fun (MW) ->
+			  unlink(MW),
+			  Mon = monitor(process, MW),
+			  exit(MW, kill),
+			  receive
+			      {'DOWN', Mon, _, _, _} -> ok
+			  end
+		  end,
+		  MWs),
+    ok.
+
+mem_worker() ->
+    receive
+	{call, From, Fun, Args} ->
+	    From ! {reply, self(), apply(Fun, Args)},
+	    mem_worker();
+	{cast, _From, Fun, Args} ->
+	    apply(Fun, Args),
+	    mem_worker()
+    end.
+
+mem_workers_call(MWs, Fun, Args) ->
+    lists:foreach(fun (MW) ->
+			  MW ! {call, self(), Fun, Args}
+		  end,
+		  MWs),
+    lists:map(fun (MW) ->
+		      receive
+			  {reply, MW, Res} ->
+			      Res
+		      end
+	      end,
+	      MWs).
+
+mem_workers_cast(MWs, Fun, Args) ->
+    lists:foreach(fun (MW) ->
+			  MW ! {cast, self(), Fun, Args}
+		  end,
+		  MWs).
+
+spawn_mem_workers() ->
+    spawn_mem_workers(erlang:system_info(schedulers_online)).
+
+spawn_mem_workers(0) ->
+    [];
+spawn_mem_workers(N) ->
+    [spawn_opt(fun () -> mem_worker() end,
+	       [{scheduler, N rem erlang:system_info(schedulers_online) + 1},
+		link]) | spawn_mem_workers(N-1)].
+
+
+
+mem_get(X, Mem) ->
+    case lists:keyfind(X, 1, Mem) of
+	{X, Val} -> Val;
+	false -> false
+    end.
+
+cmp_memory(What, Mem1, Mem2, 1) ->
+    R1 = mem_get(What, Mem1),
+    R2 = mem_get(What, Mem2),
+    true = R1 == R2;
+cmp_memory(What, Mem1, Mem2, RelDiff) ->
+    %% We allow RealDiff diff
+    R1 = mem_get(What, Mem1),
+    R2 = mem_get(What, Mem2),
+    case R1 == R2 of
+	true ->
+	    ok;
+	false ->
+	    case R1 > R2 of
+		true ->
+		    true = R2*RelDiff > R1;
+		false ->
+		    true = R1*RelDiff > R2
+	    end
+    end.
+
+pos_int(Val) when Val >= 0 ->
+    Val;
+pos_int(Val) ->
+    exit({not_pos_int, Val}).
+
+check_sane_memory(Mem) ->
+    Tot = pos_int(mem_get(total, Mem)),
+    Proc = pos_int(mem_get(processes, Mem)),
+    ProcUsed = pos_int(mem_get(processes_used, Mem)),
+    Sys = pos_int(mem_get(system, Mem)),
+    Atom = pos_int(mem_get(atom, Mem)),
+    AtomUsed = pos_int(mem_get(atom_used, Mem)),
+    Bin = pos_int(mem_get(binary, Mem)),
+    Code = pos_int(mem_get(code, Mem)),
+    Ets = pos_int(mem_get(ets, Mem)),
+
+    Tot = Proc + Sys,
+    true = Sys > Atom + Bin + Code + Ets,
+    true = Proc >= ProcUsed,
+    true = Atom >= AtomUsed,
+
+    case mem_get(maximum, Mem) of
+	false -> ok;
+	Max -> true = pos_int(Max) >= Tot
+    end,
+    ok.
+
+cmp_memory(MWs, Str) ->
+    erlang:display(Str),
+    lists:foreach(fun (MW) -> garbage_collect(MW) end, MWs),
+    garbage_collect(),
+    erts_debug:set_internal_state(wait, deallocations),
+
+    EDM = erts_debug:get_internal_state(memory),
+    EM = erlang:memory(),
+
+    io:format("~s:~n"
+	      "erlang:memory() = ~p~n"
+	      "crash dump memory = ~p~n",
+	      [Str, EM, EDM]),
+
+    ?line check_sane_memory(EM),
+    ?line check_sane_memory(EDM),
+
+    %% We expect these to always give us exactly the same result
+
+    ?line cmp_memory(atom, EM, EDM, 1),
+    ?line cmp_memory(atom_used, EM, EDM, 1),
+    ?line cmp_memory(binary, EM, EDM, 1),
+    ?line cmp_memory(code, EM, EDM, 1),
+    ?line cmp_memory(ets, EM, EDM, 1),
+
+    %% Total, processes, processes_used, and system will seldom
+    %% give us exactly the same result since the two readings
+    %% aren't taken atomically.
+
+    ?line cmp_memory(total, EM, EDM, 1.05),
+    ?line cmp_memory(processes, EM, EDM, 1.05),
+    ?line cmp_memory(processes_used, EM, EDM, 1.05),
+    ?line cmp_memory(system, EM, EDM, 1.05),
+
+    ok.
+    
+mapn(_Fun, 0) ->
+    [];
+mapn(Fun, N) ->
+    [Fun(N) | mapn(Fun, N-1)].
author	Rickard Green <[email protected]>	2010-09-15 22:14:51 +0200
committer	Rickard Green <[email protected]>	2011-11-13 20:39:30 +0100
commit	a67e91e658bdbba24fcc3c79b06fdf10ff830bc9 (patch)
tree	07f9e6b1fd715d516d2571521307fe1b9d7c3948 /erts/emulator
parent	55358c54778ead444e51f565d00175ba887ef182 (diff)
download	otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.gz otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.tar.bz2 otp-a67e91e658bdbba24fcc3c79b06fdf10ff830bc9.zip