30 files changed, 1785 insertions, 497 deletions
diff --git a/erts/autoconf/vxworks/sed.general b/erts/autoconf/vxworks/sed.general
index 96a70e4148..32da809c9d 100644
--- a/erts/autoconf/vxworks/sed.general
+++ b/erts/autoconf/vxworks/sed.general
@@ -98,7 +98,6 @@ s|@INSTALL_PROGRAM@|${INSTALL}|
 s|@INSTALL_SCRIPT@|${INSTALL}|
 s|@INSTALL_DATA@|${INSTALL} -m 644|
 s|@INSTALL_DIR@|$(INSTALL) -d|
-s|@RM@|/bin/rm|
 s|@MKDIR@|/bin/mkdir|
 s|@ERLANG_OSTYPE@|vxworks|
 s|@vxworks_reclaim@|reclaim.h|
diff --git a/erts/configure.in b/erts/configure.in
index cbebca97a3..4a4f478521 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -422,6 +422,56 @@ if test X"$with_ets_write_concurrency_locks" != X""; then
 		      [Define to override the default number of write_concurrency locks])
 fi
 
+AC_ARG_WITH(spectre-mitigation,
+    AS_HELP_STRING([--with-spectre-mitigation={yes|incomplete}],
+                   [enable spectre mitigation, either fully or with mitigations
+                    disabled in a handful places like the interpreter])
+    AS_HELP_STRING([--without-spectre-mitigation],
+                   [build without spectre mitigation]),
+    [],[with_spectre_mitigation=no])
+
+case "$with_spectre_mitigation" in
+    no) ;;
+    yes) ;;
+    incomplete) ;;
+    *) AC_MSG_ERROR([Invalid spectre mitigation setting]) ;;
+esac
+
+i_noretpoline_attr=""
+
+if test X"$with_spectre_mitigation" != X"no"; then
+    CFLAGS="$CFLAGS -mindirect-branch=thunk"
+
+    AC_MSG_CHECKING([for spectre mitigation])
+    AC_COMPILE_IFELSE(
+        [AC_LANG_PROGRAM([],[return 0;])],
+        [AC_MSG_RESULT([yes])],
+        [AC_MSG_ERROR([no])])
+
+    if test X"$with_spectre_mitigation" = X"incomplete"; then
+        # gcc and clang support this attribute if they're recent enough. Note
+        # that we must compile with -Werror to check for actual support as they
+        # warn rather than error out on unsupported attributes.
+
+        i_noretpoline_attr='__attribute__((__indirect_branch__("keep")))'
+        i_preserve_cflags="$CFLAGS"
+        CFLAGS="$CFLAGS -Werror"
+
+        AC_MSG_CHECKING([whether spectre mitigation can be disabled on a per-function basis])
+        AC_COMPILE_IFELSE(
+            [AC_LANG_PROGRAM([$i_noretpoline_attr],[return 0;])],
+            [AC_MSG_RESULT([yes])],
+            [AC_MSG_ERROR([no])])
+
+        CFLAGS="$i_preserve_cflags"
+    fi
+fi
+
+AC_DEFINE_UNQUOTED(ERTS_NO_RETPOLINE, $i_noretpoline_attr,
+                   [Per-function attribute for disabling retpoline. This is
+                    *only* defined when --with-spectre-mitigation=incomplete
+                    and has no effects otherwise])
+
 dnl ----------------------------------------------------------------------
 dnl Checks for programs.
 dnl ----------------------------------------------------------------------
@@ -753,11 +803,6 @@ AC_SUBST(LIBCARBON)
 
 _search_path=/bin:/usr/bin:/usr/local/bin:$PATH
 
-AC_PATH_PROG(RM, rm, false, $_search_path)
-if test "$ac_cv_path_RM" = false; then
-  AC_MSG_ERROR([No 'rm' command found])
-fi
-
 AC_PATH_PROG(MKDIR, mkdir, false, $_search_path)
 if test "$ac_cv_path_MKDIR" = false; then
   AC_MSG_ERROR([No 'mkdir' command found])
@@ -772,9 +817,9 @@ _search_path=
 
 
 # Remove old configuration information.
-# Next line should be placed after AC_PATH_PROG(RM, ...), but before
-# first output to CONN_INFO. So this is just the right place.
-$RM -f "$ERL_TOP/erts/CONF_INFO"
+# Next line should be before first output to CONN_INFO. So this is
+# just the right place.
+rm -f "$ERL_TOP/erts/CONF_INFO"
 
 dnl Check if we should/can build a sharing-preserving emulator
 AC_MSG_CHECKING(if we are building a sharing-preserving emulator)
@@ -812,7 +857,7 @@ fi
 
 ## Delete previous failed configure results
 if test -f doc/CONF_INFO; then
-   $RM doc/CONF_INFO
+   rm -f doc/CONF_INFO
 fi
 
 AC_CHECK_PROGS(XSLTPROC, xsltproc)
@@ -4004,14 +4049,14 @@ if test "$enable_dtrace_test" = "yes" ; then
 		AC_MSG_CHECKING([for 2-stage DTrace precompilation])
                 AC_TRY_COMPILE([ #include "foo-dtrace.h" ],
                     [ERLANG_DIST_PORT_BUSY_ENABLED();],
-		    [$RM -f $DTRACE_2STEP_TEST
+		    [rm -f $DTRACE_2STEP_TEST
 		     dtrace -G $DTRACE_CPP $DTRACE_BITS_FLAG -Iemulator/beam -o $DTRACE_2STEP_TEST -s emulator/beam/erlang_dtrace.d conftest.$OBJEXT 2>&AS_MESSAGE_LOG_FD
                      if test -f $DTRACE_2STEP_TEST; then
-			$RM $DTRACE_2STEP_TEST
+			rm -f $DTRACE_2STEP_TEST
                         DTRACE_ENABLED_2STEP=yes
 		     fi],
                     [])
-		$RM -f foo-dtrace.h
+		rm -f foo-dtrace.h
 		AS_IF([test "x$DTRACE_ENABLED_2STEP" = "xyes"],
 		      [AC_MSG_RESULT([yes])],
                       [AC_MSG_RESULT([no])])
@@ -4215,7 +4260,7 @@ ssl_done=yes # Default only one run
 
 # Remove all SKIP files from previous runs
 for a in ssl crypto ssh; do
-  $RM -f $ERL_TOP/lib/$a/SKIP
+  rm -f $ERL_TOP/lib/$a/SKIP
 done
 
 SSL_DYNAMIC_ONLY=$enable_dynamic_ssl
@@ -4836,7 +4881,7 @@ need_java="jinterface ic/java_src"
 
 # Remove all SKIP files from previous runs
 for a in $need_java ; do
-  $RM -f $ERL_TOP/lib/$a/SKIP
+  rm -f $ERL_TOP/lib/$a/SKIP
 done
 
 if test "X$with_javac" = "Xno"; then
@@ -4887,7 +4932,7 @@ dnl this deliberately does not believe that 'gcc' is a C++ compiler
 AC_CHECK_TOOLS(CXX, [$CCC c++ g++ CC cxx cc++ cl], false)
 
 # Remove SKIP file from previous run
-$RM -f $ERL_TOP/lib/orber/SKIP
+rm -f $ERL_TOP/lib/orber/SKIP
 
 if test "$CXX" = false; then
   echo "No C++ compiler found" > $ERL_TOP/lib/orber/SKIP
@@ -5042,7 +5087,6 @@ AC_CONFIG_FILES([../make/make_emakefile:../make/make_emakefile.in],
 dnl
 dnl The ones below should be moved to their respective lib
 dnl
-dnl  ../lib/ssl/c_src/$host/Makefile:../lib/ssl/c_src/Makefile.in
 AC_CONFIG_FILES([
   ../lib/ic/c_src/$host/Makefile:../lib/ic/c_src/Makefile.in
   ../lib/os_mon/c_src/$host/Makefile:../lib/os_mon/c_src/Makefile.in
@@ -5052,4 +5096,6 @@ AC_CONFIG_FILES([
   ../lib/tools/c_src/$host/Makefile:../lib/tools/c_src/Makefile.in
   ])
 
+AC_CONFIG_FILES([../make/install_dir_data.sh:../make/install_dir_data.sh.in], [chmod +x ../make/install_dir_data.sh])
+
 AC_OUTPUT
diff --git a/erts/doc/src/erl_dist_protocol.xml b/erts/doc/src/erl_dist_protocol.xml
index 610351db6c..79f703455a 100644
--- a/erts/doc/src/erl_dist_protocol.xml
+++ b/erts/doc/src/erl_dist_protocol.xml
@@ -829,6 +829,14 @@ DiB == gen_digest(ChA, ICA)?
         <item>
           <p>The node understand UTF-8 encoded atoms.</p>
         </item>
+        <tag><c>-define(DFLAG_MAP_TAG, 16#20000).</c></tag>
+        <item>
+          <p>The node understand the map tag.</p>
+        </item>
+        <tag><c>-define(DFLAG_BIG_CREATION, 16#40000).</c></tag>
+        <item>
+          <p>The node understand big node creation.</p>
+        </item>
       </taglist>
     </section>
   </section>
diff --git a/erts/doc/src/erl_ext_dist.xml b/erts/doc/src/erl_ext_dist.xml
index b7090d0472..5813af1d57 100644
--- a/erts/doc/src/erl_ext_dist.xml
+++ b/erts/doc/src/erl_ext_dist.xml
@@ -386,44 +386,6 @@
   </section>
 
     <section>
-      <marker id="REFERENCE_EXT"/>
-      <title>REFERENCE_EXT</title>
-      <table align="left">
-        <row>
-          <cell align="center">1</cell>
-          <cell align="center">N</cell>
-          <cell align="center">4</cell>
-          <cell align="center">1</cell>
-        </row>
-        <row>
-          <cell align="center"><c>101</c></cell>
-          <cell align="center"><c>Node</c></cell>
-          <cell align="center"><c>ID</c></cell>
-          <cell align="center"><c>Creation</c></cell>
-        </row>
-      <tcaption>REFERENCE_EXT</tcaption></table>
-      <p>
-        Encodes a reference object (an object generated with
-        <seealso marker="erlang:make_ref/0">erlang:make_ref/0</seealso>).
-        The <c>Node</c> term is an encoded atom, that is,
-        <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>, 
-        <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>, or
-        <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>. 
-        The <c>ID</c> field contains a big-endian unsigned integer,
-        but <em>is to be regarded as uninterpreted data</em>,
-        as this field is node-specific.
-        <c>Creation</c> is a byte containing a node serial number, which
-        makes it possible to separate old (crashed) nodes from a new one.
-      </p>
-      <p>
-        In <c>ID</c>, only 18 bits are significant; the rest are to be 0.
-        In <c>Creation</c>, only two bits are significant; the rest are to be 0.
-        See <seealso marker="#NEW_REFERENCE_EXT">
-        <c>NEW_REFERENCE_EXT</c></seealso>.
-      </p>
-    </section>
-
-    <section>
       <marker id="PORT_EXT"/>
       <title>PORT_EXT</title>
 	<table align="left">
@@ -441,13 +403,46 @@
 	  </row>
 	<tcaption>PORT_EXT</tcaption></table>
 	<p>
-	  Encodes a port object (obtained from
-          <seealso marker="erlang:open_port/2">
-          <c>erlang:open_port/2</c></seealso>).
-	  The <c>ID</c> is a node-specific identifier for a local port.
+	  Same as <seealso marker="#NEW_PORT_EXT"><c>NEW_PORT_EXT</c></seealso>
+	  except the <c>Creation</c> field is only one byte and only two
+	  bits are significant, the rest are to be 0.
+	</p>
+    </section>
+
+    <section>
+      <marker id="NEW_PORT_EXT"/>
+      <title>NEW_PORT_EXT</title>
+	<table align="left">
+	  <row>
+	    <cell align="center">1</cell>
+	    <cell align="center">N</cell>
+	    <cell align="center">4</cell>
+	    <cell align="center">4</cell>
+	  </row>
+	  <row>
+	    <cell align="center"><c>89</c></cell>
+	    <cell align="center"><c>Node</c></cell>
+	    <cell align="center"><c>ID</c></cell>
+	    <cell align="center"><c>Creation</c></cell>
+	  </row>
+	<tcaption>NEW_PORT_EXT</tcaption></table>
+	<p>
+	  Encodes a port identifier (obtained from
+          <seealso marker="erlang#open_port/2"><c>erlang:open_port/2</c></seealso>).
+	  <c>Node</c> is an encoded atom, that is,
+	  <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+	  <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>
+	  or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.
+	  <c>ID</c> is a 32-bit big endian unsigned integer. Only 28 bits are
+	  significant; the rest are to be 0. The <c>Creation</c> works just like in
+	  <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso>.
 	  Port operations are not allowed across node boundaries.
-	  The <c>Creation</c> works just like in
-	  <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>.
+	</p>
+	<p>Introduced in OTP 19, but only to be decoded and echoed back. Not
+	  encoded for local ports. Planned to supersede <seealso marker="#PORT_EXT">
+	  <c>PORT_EXT</c></seealso> in OTP 23 when
+	  <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso>
+	  becomes mandatory.
 	</p>
     </section>
 
@@ -471,12 +466,65 @@
 	  </row>
 	<tcaption>PID_EXT</tcaption></table>
 	<p>
-	  Encodes a process identifier object (obtained from
-          <seealso marker="erlang:spawn/3"><c>erlang:spawn/3</c></seealso> or
-	  friends). The <c>ID</c> and <c>Creation</c> fields works just like in
-	  <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>, while
-	  the <c>Serial</c> field is used to improve safety.	  
-	  In <c>ID</c>, only 15 bits are significant; the rest are to be 0.
+	  Same as <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso>
+	  except the <c>Creation</c> field is only one byte and only two
+	  bits are significant, the rest are to be 0.
+	</p>
+    </section>
+
+    <section>
+      <marker id="NEW_PID_EXT"/>
+      <title>NEW_PID_EXT</title>
+	<table align="left">
+	  <row>
+	    <cell align="center">1</cell>
+	    <cell align="center">N</cell>
+	    <cell align="center">4</cell>
+	    <cell align="center">4</cell>
+	    <cell align="center">4</cell>
+	  </row>
+	  <row>
+	    <cell align="center"><c>88</c></cell>
+	    <cell align="center"><c>Node</c></cell>
+	    <cell align="center"><c>ID</c></cell>
+	    <cell align="center"><c>Serial</c></cell>
+	    <cell align="center"><c>Creation</c></cell>
+	  </row>
+	<tcaption>NEW_PID_EXT</tcaption></table>
+	<p>
+	  Encodes an Erlang process identifier object.
+	</p>
+	<taglist>
+	  <tag><c>Node</c></tag>
+	  <item><p>The name of the originating node, encoded using
+	    <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+	    <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>
+	    or <seealso
+	    marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.</p>
+	  </item>
+	  <tag><c>ID</c></tag>
+	  <item><p>A 32-bit big endian unsigned integer. Only 15 bits are
+	    significant; the rest are to be 0.</p>
+	  </item>
+	  <tag><c>Serial</c></tag>
+	  <item><p>A 32-bit big endian unsigned integer. Only 13 bits are
+	    significant; the rest are to be 0.</p>
+	  </item>
+	  <tag><c>Creation</c></tag>
+	  <item><p>A 32-bit big endian unsigned integer. All identifiers
+	    originating from the same node incarnation must have identical <c>Creation</c>
+	    values. This makes it possible to separate identifiers from old
+	    (crashed) nodes from a new one. The value zero should be avoided for
+	    normal operations as it is used as a wild card for debug purpose
+	    (like a pid returned by <seealso marker="erts:erlang#list_to_pid/1">
+	    erlang:list_to_pid/1</seealso>).</p>
+	  </item>
+	</taglist>
+	<p>Introduced in OTP 19, but only to be decoded and echoed back. Not
+	  encoded for local processes. Planned to supersede <seealso marker="#PID_EXT">
+	  <c>PID_EXT</c></seealso> in OTP 23 when
+	  <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso>
+	  becomes mandatory.
 	</p>
     </section>
 
@@ -700,6 +748,30 @@
     </section>
 
     <section>
+      <marker id="REFERENCE_EXT"/>
+      <title>REFERENCE_EXT (deprecated)</title>
+      <table align="left">
+        <row>
+          <cell align="center">1</cell>
+          <cell align="center">N</cell>
+          <cell align="center">4</cell>
+          <cell align="center">1</cell>
+        </row>
+        <row>
+          <cell align="center"><c>101</c></cell>
+          <cell align="center"><c>Node</c></cell>
+          <cell align="center"><c>ID</c></cell>
+          <cell align="center"><c>Creation</c></cell>
+        </row>
+      <tcaption>REFERENCE_EXT</tcaption></table>
+      <p>
+	The same as <seealso marker="#NEW_REFERENCE_EXT">
+	<c>NEW_REFERENCE_EXT</c></seealso> except <c>ID</c> is only one word
+	(<c>Len</c> = 1).
+      </p>
+    </section>
+
+    <section>
       <marker id="NEW_REFERENCE_EXT"/>
       <title>NEW_REFERENCE_EXT</title>
 	<table align="left">
@@ -719,29 +791,68 @@
 	  </row>
 	<tcaption>NEW_REFERENCE_EXT</tcaption></table>
 	<p>
-	  <c>Node</c> and <c>Creation</c> are as in
-	  <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>.
-	</p>
-	<p>
-	  <c>ID</c> contains a sequence of big-endian unsigned integers
-	  (4 bytes each, so <c>N'</c> is a multiple of 4),
-	  but is to be regarded as uninterpreted data.
-	</p>
-	<p>
-	  <c>N'</c> = 4 * <c>Len</c>.
-	</p>
-	<p>
-	  In the first word (4 bytes) of <c>ID</c>, only 18 bits are
-	  significant, the rest are to be 0.
-	  In <c>Creation</c>, only two bits are significant,
-	  the rest are to be 0.
+	  The same as <seealso marker="#NEWER_REFERENCE_EXT">
+	  <c>NEWER_REFERENCE_EXT</c></seealso> <em>except</em>:
 	</p>
+	<taglist>
+	  <tag><c>ID</c></tag>
+	  <item><p>In the first word (4 bytes) of <c>ID</c>, only 18 bits are
+	    significant, the rest must be 0.</p>
+	  </item>
+	  <tag><c>Creation</c></tag>
+	  <item><p>Only one byte long and only two bits are significant, the rest must be 0.</p>
+	  </item>
+	</taglist>
+    </section>
+
+    <section>
+      <marker id="NEWER_REFERENCE_EXT"/>
+      <title>NEWER_REFERENCE_EXT</title>
+	<table align="left">
+	  <row>
+	    <cell align="center">1</cell>
+	    <cell align="center">2</cell>
+	    <cell align="center">N</cell>
+	    <cell align="center">4</cell>
+	    <cell align="center">N'</cell>
+	  </row>
+	  <row>
+	    <cell align="center"><c>90</c></cell>
+	    <cell align="center"><c>Len</c></cell>
+	    <cell align="center"><c>Node</c></cell>
+	    <cell align="center"><c>Creation</c></cell>
+	    <cell align="center"><c>ID ...</c></cell>
+	  </row>
+	<tcaption>NEWER_REFERENCE_EXT</tcaption></table>
 	<p>
-	  <c>NEW_REFERENCE_EXT</c> was introduced with distribution version 4.
-	  In version 4, <c>N'</c> is to be at most 12.
+	  Encodes a reference term generated with
+	  <seealso marker="erts:erlang#make_ref/0">erlang:make_ref/0</seealso>.
 	</p>
-	<p>
-	  See <seealso marker="#REFERENCE_EXT"><c>REFERENCE_EXT</c></seealso>.
+	<taglist>
+	  <tag><c>Node</c></tag>
+	  <item><p>The name of the originating node, encoded using
+	    <seealso marker="#ATOM_UTF8_EXT"><c>ATOM_UTF8_EXT</c></seealso>,
+	    <seealso marker="#SMALL_ATOM_UTF8_EXT"><c>SMALL_ATOM_UTF8_EXT</c></seealso>
+	    or <seealso marker="#ATOM_CACHE_REF"><c>ATOM_CACHE_REF</c></seealso>.</p>
+	  </item>
+	  <tag><c>Len</c></tag>
+	  <item><p>A 16-bit big endian unsigned integer not larger than 3.</p>
+	  </item>
+	  <tag><c>ID</c></tag>
+	  <item><p>A sequence of <c>Len</c> big-endian unsigned integers
+	    (4 bytes each, so <c>N'</c>&nbsp;=&nbsp;4&nbsp;*&nbsp;<c>Len</c>),
+	    but is to be regarded as uninterpreted data.</p>
+	  </item>
+	  <tag><c>Creation</c></tag>
+	  <item><p>Works just like in
+	  <seealso marker="#NEW_PID_EXT"><c>NEW_PID_EXT</c></seealso>.</p>
+	  </item>
+	</taglist>
+	<p>Introduced in OTP 19, but only to be decoded and echoed back. Not
+	  encoded for local references. Planned to supersede <seealso marker="#NEW_REFERENCE_EXT">
+	  <c>NEW_REFERENCE_EXT</c></seealso> in OTP 23 when
+	  <seealso marker="erl_dist_protocol#dflags"><c>DFLAG_BIG_CREATON</c></seealso>
+	  becomes mandatory.
 	</p>
     </section>
 
diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml
index 21168eee23..63c27ee171 100644
--- a/erts/doc/src/notes.xml
+++ b/erts/doc/src/notes.xml
@@ -31,6 +31,310 @@
   </header>
   <p>This document describes the changes made to the ERTS application.</p>
 
+<section><title>Erts 9.3.3.10</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixes of install/release phase in build system.</p>
+	    <list> <item>The source tree was modified when
+	    installing/releasing and/or applying a patch.</item>
+	    <item>Some files were installed with wrong access
+	    rights.</item> <item>If applying a patch (using
+	    <c>otp_patch_apply</c>) as another user (except root)
+	    than the user that built the source, the documentation
+	    was not properly updated.</item> </list>
+          <p>
+	    Own Id: OTP-15551</p>
+        </item>
+        <item>
+          <p>
+	    Minor fixes for <c>make clean</c>.</p>
+          <p>
+	    Own Id: OTP-15657</p>
+        </item>
+        <item>
+          <p>
+	    Fixed a bug in all <c>ets:select*</c> and
+	    <c>ets:match*</c> functions that could in some rare cases
+	    lead to very poor performance.</p>
+          <p>
+	    Own Id: OTP-15660 Aux Id: ERL-869 </p>
+        </item>
+        <item>
+          <p>
+	    Fix a possible deadlock when terminating the ERTS caused
+	    by a dirty scheduler not releasing it's run-queue lock
+	    when terminating.</p>
+          <p>
+	    Own Id: OTP-15690 Aux Id: PR-2172 </p>
+        </item>
+        <item>
+	    <p>Add missing documentation of new external tags
+	    <c>NEW_PID</c>, <c>NEW_PORT</c> and
+	    <c>NEWER_REFERENCE</c> introduced in OTP 19.</p> <p>These
+	    new tags are planned to be "activated" in OTP 23 when
+	    distribution capability flag <c>DFLAG_BIG_CREATION</c>
+	    becomes mandatory. Older nodes (>= 19) are able to decode
+	    these new tags and send them back to the new node. Nodes
+	    older than OTP 23 will however never encode their own
+	    local pids, ports and references using the new tags.</p>
+          <p>
+	    Own Id: OTP-15766</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.9</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>Added an optional <c>./configure</c> flag to compile
+	    the emulator with spectre mitigation:
+	    <c>--with-spectre-mitigation</c></p>
+	    <p>Note that this requires a recent version of GCC with
+	    support for spectre mitigation and the
+	    <c>--mindirect-branch=thunk</c> flag, such as
+	    <c>8.1</c>.</p>
+          <p>
+	    Own Id: OTP-15430 Aux Id: ERIERL-237 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.8</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    A bug that could cause dirty schedulers to become
+	    unresponsive has been fixed.</p>
+          <p>
+	    Own Id: OTP-15509 Aux Id: PR-2027, PR-2093 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.7</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed bug in operator <c>band</c> of two negative
+	    operands causing erroneous result if the absolute value
+	    of one of the operands have the lowest <c>N*W</c> bits as
+	    zero and the other absolute value is not larger than
+	    <c>N*W</c> bits. <c>N</c> is an integer of 1 or larger
+	    and <c>W</c> is 32 or 64 depending on word size.</p>
+          <p>
+	    Own Id: OTP-15487 Aux Id: ERL-804 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.6</title>
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>List subtraction (The <c>--</c> operator) will now
+	    yield properly on large inputs.</p>
+          <p>
+	    Own Id: OTP-15371</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.5</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    ERTS internal trees of monitor structures could get into
+	    an inconsistent state. This could cause <c>'DOWN'</c>
+	    messages not to be delivered when they should, as well as
+	    delivery of <c>'DOWN'</c> messages that should not be
+	    delivered.</p>
+          <p>
+	    This bug was introduced in ERTS version 9.0 (OTP 20.0)
+	    and was fixed in ERTS version 10.0 (OTP 21.0) due to a
+	    rewrite of the monitor code. That is, this bug only exist
+	    in the OTP 20 release.</p>
+          <p>
+	    Own Id: OTP-15399 Aux Id: ERL-751, ERIERL-262, OTP-14205 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.4</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed bug in <c>ets:select_replace</c> when called with a
+	    fully bound key could cause a following call to
+	    <c>ets:next</c> or <c>ets:prev</c> to crash the emulator
+	    or return invalid result.</p>
+          <p>
+	    Own Id: OTP-15346</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.3</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed a bug which caused an emulator crash when
+	    <c>enif_send()</c> was called by a NIF that executed on a
+	    dirty scheduler. The bug was either triggered when the
+	    NIF called <c>enif_send()</c> without a message
+	    environment, or when the process executing the NIF was
+	    <c>send</c> traced.</p>
+          <p>
+	    Own Id: OTP-15223</p>
+        </item>
+        <item>
+          <p>
+	    Fixed a bug causing some Erlang references to be
+	    inconsistently ordered. This could for example cause
+	    failure to look up certain elements with references as
+	    keys in search data structures. This bug was introduced
+	    in R13B02.</p>
+          <p>
+	    Thanks to Simon Cornish for finding the bug and supplying
+	    a fix.</p>
+          <p>
+	    *** POTENTIAL INCOMPATIBILITY ***</p>
+          <p>
+	    Own Id: OTP-15225</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.2</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixed a race condition in the inet driver that could
+	    cause receive to hang when the emulator was compiled with
+	    gcc 8.</p>
+          <p>
+	    Own Id: OTP-15158 Aux Id: ERL-654 </p>
+        </item>
+        <item>
+          <p>
+	    Fix bug in generation of erl_crash.dump, which could
+	    cause VM to crash.</p>
+          <p>
+	    Bug exist since erts-9.2 (OTP-20.2).</p>
+          <p>
+	    Own Id: OTP-15181</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3.1</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixed a rare bug that could cause processes to be
+	    scheduled after they had been freed.</p>
+          <p>
+	    Own Id: OTP-15067 Aux Id: ERL-573 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.3</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed bug in <c>ets</c> that could cause VM crash if
+	    process A terminates after fixating a table and process B
+	    deletes the table at "the same time". The table fixation
+	    could be done with <c>ets:safe_fixtable</c> or if process
+	    A terminates in the middle of a long running
+	    <c>select</c> or <c>match</c> call.</p>
+          <p>
+	    Own Id: OTP-15109</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.2</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed bug in <c>enif_binary_to_term</c> which could cause
+	    memory corruption for immediate terms (atoms, small
+	    integers, pids, ports, empty lists).</p>
+          <p>
+	    Own Id: OTP-15080</p>
+        </item>
+        <item>
+          <p>
+	    Fixed bug in <c>erlang:system_profile/2</c> that could
+	    cause superfluous <c>{profile,_,active,_,_}</c> messages
+	    for terminating processes.</p>
+          <p>
+	    Own Id: OTP-15085</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 9.3.1</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>Fixed a crash in <c>heart:get_cmd/0</c> when the
+	    stored command was too long.</p>
+          <p>
+	    Own Id: OTP-15034</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>Erts 9.3</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 1916b97a89..116b87affd 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -271,7 +271,6 @@ DEXPORT = @DEXPORT@
 RANLIB  = @RANLIB@
 STRIP   = strip
 PERL    = @PERL@
-RM	= @RM@
 MKDIR	= @MKDIR@
 
 USING_MINGW=@MIXED_CYGWIN_MINGW@
@@ -461,13 +460,13 @@ $(ERTS_LIB):
 
 .PHONY: clean
 clean:
-	$(RM) -f $(GENERATE)
-	$(RM) -rf $(TARGET)/*.c $(TARGET)/*.h $(TARGET)/*-GENERATED
-	$(RM) -rf $(TARGET)/*/*
-	$(RM) -rf obj/$(TARGET)
-	$(RM) -rf pcre/obj/$(TARGET) $(PCRE_GENINC)
-	$(RM) -rf zlib/obj/$(TARGET)
-	$(RM) -rf bin/$(TARGET)
+	$(RM) $(GENERATE)
+	$(RM) -r $(TARGET)/*.c $(TARGET)/*.h $(TARGET)/*-GENERATED
+	$(RM) -r $(TARGET)/*/*
+	$(RM) -r obj/$(TARGET)
+	$(RM) -r pcre/obj/$(TARGET) $(PCRE_GENINC)
+	$(RM) -r zlib/obj/$(TARGET)
+	$(RM) -r bin/$(TARGET)
 	cd $(ERTS_LIB_DIR) && $(MAKE) clean
 
 .PHONY: docs
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index b4e6c35579..7db0938b39 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -1237,6 +1237,7 @@ init_emulator(void)
  * the instructions' C labels to the loader.
  * The second call starts execution of BEAM code. This call never returns.
  */
+ERTS_NO_RETPOLINE
 void process_main(Eterm * x_reg_array, FloatDef* f_reg_array)
 {
     static int init_done = 0;
diff --git a/erts/emulator/beam/beam_ranges.c b/erts/emulator/beam/beam_ranges.c
index c7b17d58f3..9570fb34db 100644
--- a/erts/emulator/beam/beam_ranges.c
+++ b/erts/emulator/beam/beam_ranges.c
@@ -35,10 +35,8 @@ typedef struct {
 
 /*
  * Used for crash dumping of literals. The size of erts_dump_lit_areas is
- * always twice the number of active ranges (to allow for literals in both
- * current and old code).
+ * always at least the number of active ranges.
  */
-
 ErtsLiteralArea** erts_dump_lit_areas;
 Uint erts_dump_num_lit_areas;
 
@@ -180,8 +178,8 @@ erts_end_staging_ranges(int commit)
 				(erts_aint_t) (r[dst].modules +
 					       r[dst].n / 2));
 
-        if (r[dst].allocated * 2 > erts_dump_num_lit_areas) {
-            erts_dump_num_lit_areas *= 2;
+        if (r[dst].allocated > erts_dump_num_lit_areas) {
+            erts_dump_num_lit_areas = r[dst].allocated * 2;
             erts_dump_lit_areas = (ErtsLiteralArea **)
                 erts_realloc(ERTS_ALC_T_CRASH_DUMP,
                              (void *) erts_dump_lit_areas,
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index c5cb268f09..60f0ecf42b 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -1159,8 +1159,11 @@ static dsize_t I_band(ErtsDigit* x, dsize_t xl, short xsgn,
 		*r++ = ~c1 & ~c2;
 		x++; y++;
 	    }
-	    while(xl--)
-		*r++ = ~*x++;
+	    while(xl--) {
+                DSUBb(*x,0,b1,c1);
+                *r++ = ~c1;
+		x++;
+            }
 	}
     }
     return I_btrail(r0, r, sign);
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 252bf1cc7e..af1133b853 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -322,6 +322,7 @@ type	THR_PRGR_DATA	LONG_LIVED	SYSTEM		thr_prgr_data
 type	T_THR_PRGR_DATA	SHORT_LIVED	SYSTEM		temp_thr_prgr_data
 type	RELEASE_LAREA	SHORT_LIVED	SYSTEM		release_literal_area
 +endif
+type    LIST_TRAP       SHORT_LIVED     PROCESSES       list_bif_trap_state
 
 #
 # Types used for special emulators
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 4050fb6146..43d99db0b2 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -4459,7 +4459,16 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
                                             refbin));
             }
         }
-
+        else if (ERTS_IS_ATOM_STR("mbuf", BIF_ARG_1)) {
+            Uint sz = size_object(BIF_ARG_2);
+            ErlHeapFragment* frag = new_message_buffer(sz);
+            Eterm *hp = frag->mem;
+            Eterm copy = copy_struct(BIF_ARG_2, sz, &hp, &frag->off_heap);
+            frag->next = BIF_P->mbuf;
+            BIF_P->mbuf = frag;
+            BIF_P->mbuf_sz += sz;
+            BIF_RET(copy);
+        }
     }
 
     BIF_ERROR(BIF_P, BADARG);
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index 73d327da3e..94a41c285a 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -29,12 +29,13 @@
 #include "sys.h"
 #include "erl_vm.h"
 #include "global.h"
-#include "erl_process.h"
-#include "error.h"
 #include "bif.h"
+#include "erl_binary.h"
+
 
 static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
 
+
 static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
 {
     Eterm list;
@@ -146,103 +147,725 @@ BIF_RETTYPE append_2(BIF_ALIST_2)
     return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
 }
 
-/*
- * erlang:'--'/2
- */
+/* erlang:'--'/2
+ *
+ * Subtracts a list from another (LHS -- RHS), removing the first occurrence of
+ * each element in LHS from RHS. There is no type coercion so the elements must
+ * match exactly.
+ *
+ * The BIF is broken into several stages that can all trap individually, and it
+ * chooses its algorithm based on input size. If either input is small it will
+ * use a linear scan tuned to which side it's on, and if both inputs are large
+ * enough it will convert RHS into a multiset to provide good asymptotic
+ * behavior. */
 
-#define SMALL_VEC_SIZE 10
-static Eterm subtract(Process* p, Eterm A, Eterm B)
-{
-    Eterm  list;
-    Eterm* hp;
-    Uint  need;
-    Eterm  res;
-    Eterm small_vec[SMALL_VEC_SIZE];	/* Preallocated memory for small lists */
-    Eterm* vec_p;
-    Eterm* vp;
-    Sint i;
-    Sint n;
-    Sint m;
-    
-    if ((n = erts_list_length(A)) < 0) {
-	BIF_ERROR(p, BADARG);
+#define SUBTRACT_LHS_THRESHOLD 16
+#define SUBTRACT_RHS_THRESHOLD 16
+
+typedef enum {
+    SUBTRACT_STAGE_START,
+    SUBTRACT_STAGE_LEN_LHS,
+
+    /* Naive linear scan that's efficient when
+     * LEN_LHS <= SUBTRACT_LHS_THRESHOLD. */
+    SUBTRACT_STAGE_NAIVE_LHS,
+
+    SUBTRACT_STAGE_LEN_RHS,
+
+    /* As SUBTRACT_STAGE_NAIVE_LHS but for RHS. */
+    SUBTRACT_STAGE_NAIVE_RHS,
+
+    /* Creates a multiset from RHS for faster lookups before sweeping through
+     * LHS. The set is implemented as a red-black tree and duplicate elements
+     * are handled by a counter on each node. */
+    SUBTRACT_STAGE_SET_BUILD,
+    SUBTRACT_STAGE_SET_FINISH
+} ErtsSubtractCtxStage;
+
+typedef struct subtract_node__ {
+    struct subtract_node__ *parent;
+    struct subtract_node__ *left;
+    struct subtract_node__ *right;
+    int is_red;
+
+    Eterm key;
+    Uint count;
+} subtract_tree_t;
+
+typedef struct {
+    ErtsSubtractCtxStage stage;
+
+    Eterm lhs_original;
+    Eterm rhs_original;
+
+    Uint lhs_remaining;
+    Uint rhs_remaining;
+
+    Eterm iterator;
+
+    Eterm *result_cdr;
+    Eterm result;
+
+    union {
+        Eterm lhs_elements[SUBTRACT_LHS_THRESHOLD];
+        Eterm rhs_elements[SUBTRACT_RHS_THRESHOLD];
+
+        struct {
+            subtract_tree_t *tree;
+
+            /* A memory area for the tree's nodes, saving us the need to have
+             * one allocation per node. */
+            subtract_tree_t *alloc_start;
+            subtract_tree_t *alloc;
+        } rhs_set;
+    } u;
+} ErtsSubtractContext;
+
+#define ERTS_RBT_PREFIX subtract
+#define ERTS_RBT_T subtract_tree_t
+#define ERTS_RBT_KEY_T Eterm
+#define ERTS_RBT_FLAGS_T int
+#define ERTS_RBT_INIT_EMPTY_TNODE(T) \
+    do { \
+        (T)->parent = NULL; \
+        (T)->left = NULL; \
+        (T)->right = NULL; \
+    } while(0)
+#define ERTS_RBT_IS_RED(T) ((T)->is_red)
+#define ERTS_RBT_SET_RED(T) ((T)->is_red = 1)
+#define ERTS_RBT_IS_BLACK(T) (!ERTS_RBT_IS_RED(T))
+#define ERTS_RBT_SET_BLACK(T) ((T)->is_red = 0)
+#define ERTS_RBT_GET_FLAGS(T) ((T)->is_red)
+#define ERTS_RBT_SET_FLAGS(T, F) ((T)->is_red = F)
+#define ERTS_RBT_GET_PARENT(T) ((T)->parent)
+#define ERTS_RBT_SET_PARENT(T, P) ((T)->parent = P)
+#define ERTS_RBT_GET_RIGHT(T) ((T)->right)
+#define ERTS_RBT_SET_RIGHT(T, R) ((T)->right = (R))
+#define ERTS_RBT_GET_LEFT(T) ((T)->left)
+#define ERTS_RBT_SET_LEFT(T, L) ((T)->left = (L))
+#define ERTS_RBT_GET_KEY(T) ((T)->key)
+#define ERTS_RBT_IS_LT(KX, KY) (CMP_TERM(KX, KY) < 0)
+#define ERTS_RBT_IS_EQ(KX, KY) EQ(KX, KY)
+#define ERTS_RBT_WANT_LOOKUP_INSERT
+#define ERTS_RBT_WANT_LOOKUP
+#define ERTS_RBT_WANT_DELETE
+#define ERTS_RBT_UNDEF
+
+#include "erl_rbtree.h"
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context);
+
+static void subtract_ctx_dtor(ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_SET_BUILD:
+        case SUBTRACT_STAGE_SET_FINISH:
+            erts_free(ERTS_ALC_T_LIST_TRAP, context->u.rhs_set.alloc_start);
+            break;
+        default:
+            break;
     }
-    if ((m = erts_list_length(B)) < 0) {
-	BIF_ERROR(p, BADARG);
+}
+
+static int subtract_ctx_bin_dtor(Binary *context_bin) {
+    ErtsSubtractContext *context = ERTS_MAGIC_BIN_DATA(context_bin);
+    subtract_ctx_dtor(context);
+    return 1;
+}
+
+static void subtract_ctx_move(ErtsSubtractContext *from,
+                              ErtsSubtractContext *to) {
+    int uses_result_cdr = 0;
+
+    to->stage = from->stage;
+
+    to->lhs_original = from->lhs_original;
+    to->rhs_original = from->rhs_original;
+
+    to->lhs_remaining = from->lhs_remaining;
+    to->rhs_remaining = from->rhs_remaining;
+
+    to->iterator = from->iterator;
+    to->result = from->result;
+
+    switch (to->stage) {
+        case SUBTRACT_STAGE_NAIVE_LHS:
+            sys_memcpy(to->u.lhs_elements,
+                       from->u.lhs_elements,
+                       sizeof(Eterm) * to->lhs_remaining);
+            break;
+        case SUBTRACT_STAGE_NAIVE_RHS:
+            sys_memcpy(to->u.rhs_elements,
+                       from->u.rhs_elements,
+                       sizeof(Eterm) * to->rhs_remaining);
+
+            uses_result_cdr = 1;
+            break;
+        case SUBTRACT_STAGE_SET_FINISH:
+            uses_result_cdr = 1;
+            /* FALL THROUGH */
+        case SUBTRACT_STAGE_SET_BUILD:
+            to->u.rhs_set.alloc_start = from->u.rhs_set.alloc_start;
+            to->u.rhs_set.alloc = from->u.rhs_set.alloc;
+            to->u.rhs_set.tree = from->u.rhs_set.tree;
+            break;
+        default:
+            break;
     }
-    
-    if (n == 0)
-	BIF_RET(NIL);
-    if (m == 0)
-	BIF_RET(A);
-    
-    /* allocate element vector */
-    if (n <= SMALL_VEC_SIZE)
-	vec_p = small_vec;
-    else
-	vec_p = (Eterm*) erts_alloc(ERTS_ALC_T_TMP, n * sizeof(Eterm));
-    
-    /* PUT ALL ELEMENTS IN VP */
-    vp = vec_p;
-    list = A;
-    i = n;
-    while(i--) {
-	Eterm* listp = list_val(list);
-	*vp++ = CAR(listp);
-	list = CDR(listp);
+
+    if (uses_result_cdr) {
+        if (from->result_cdr == &from->result) {
+            to->result_cdr = &to->result;
+        } else {
+            to->result_cdr = from->result_cdr;
+        }
     }
-    
-    /* UNMARK ALL DELETED CELLS */
-    list = B;
-    m = 0;  /* number of deleted elements */
-    while(is_list(list)) {
-	Eterm* listp = list_val(list);
-	Eterm  elem = CAR(listp);
-	i = n;
-	vp = vec_p;
-	while(i--) {
-	    if (is_value(*vp) && eq(*vp, elem)) {
-		*vp = THE_NON_VALUE;
-		m++;
-		break;
-	    }
-	    vp++;
-	}
-	list = CDR(listp);
+}
+
+static Eterm subtract_create_trap_state(Process *p,
+                                        ErtsSubtractContext *context) {
+    Binary *state_bin;
+    Eterm *hp;
+
+    state_bin = erts_create_magic_binary(sizeof(ErtsSubtractContext),
+                                         subtract_ctx_bin_dtor);
+
+    subtract_ctx_move(context, ERTS_MAGIC_BIN_DATA(state_bin));
+
+    hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+
+    return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static int subtract_enter_len_lhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_LHS;
+
+    context->iterator = context->lhs_original;
+    context->lhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_enter_len_rhs(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_LEN_RHS;
+
+    context->iterator = context->rhs_original;
+    context->rhs_remaining = 0;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_get_length(Process *p, Eterm *iterator_p, Uint *count_p) {
+    static const Sint ELEMENTS_PER_RED = 32;
+
+    Sint budget, count;
+    Eterm iterator;
+
+    budget = ELEMENTS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    iterator = *iterator_p;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    for (count = 0; count < budget && is_list(iterator); count++) {
+        iterator = CDR(list_val(iterator));
     }
-    
-    if (m == n)      /* All deleted ? */
-	res = NIL;
-    else if (m == 0)  /* None deleted ? */
-	res = A;
-    else {			/* REBUILD LIST */
-	res = NIL;
-	need = 2*(n - m);
-	hp = HAlloc(p, need);
-	vp = vec_p + n - 1;
-	while(vp >= vec_p) {
-	    if (is_value(*vp)) {
-		res = CONS(hp, *vp, res);
-		hp += 2;
-	    }
-	    vp--;
-	}
+
+    if (!is_list(iterator) && !is_nil(iterator)) {
+        return -1;
+    }
+
+    BUMP_REDS(p, count / ELEMENTS_PER_RED);
+
+    *iterator_p = iterator;
+    *count_p += count;
+
+    if (is_nil(iterator)) {
+        return 1;
     }
-    if (vec_p != small_vec)
-	erts_free(ERTS_ALC_T_TMP, (void *) vec_p);
-    BIF_RET(res);
+
+    return 0;
 }
 
-BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_enter_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_LHS;
+
+    context->iterator = context->rhs_original;
+    context->result = NIL;
+
+    iterator = context->lhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_LHS_THRESHOLD);
+
+        context->u.lhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->lhs_remaining);
+
+    return subtract_continue(p, context);
 }
 
-BIF_RETTYPE subtract_2(BIF_ALIST_2)
-{
-    return subtract(BIF_P, BIF_ARG_1, BIF_ARG_2);
+static int subtract_naive_lhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = 0; found_at < context->lhs_remaining; found_at++) {
+            if (EQ(value, context->u.lhs_elements[found_at])) {
+                /* We shift the array one step down as we have to preserve
+                 * order.
+                 *
+                 * Note that we can't exit early as that would suppress errors
+                 * in the right-hand side (this runs prior to determining the
+                 * length of RHS). */
+
+                context->lhs_remaining--;
+                sys_memmove(&context->u.lhs_elements[found_at],
+                            &context->u.lhs_elements[found_at + 1],
+                            (context->lhs_remaining - found_at) * sizeof(Eterm));
+                break;
+            }
+        }
+
+        checks += MAX(1, context->lhs_remaining);
+        context->iterator = next;
+    }
+
+    BUMP_REDS(p, MIN(checks, budget) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        return 0;
+    } else if (!is_nil(context->iterator)) {
+        return -1;
+    }
+
+    if (context->lhs_remaining > 0) {
+        Eterm *hp;
+        int i;
+
+        hp = HAlloc(p, context->lhs_remaining * 2);
+
+        for (i = context->lhs_remaining - 1; i >= 0; i--) {
+            Eterm value = context->u.lhs_elements[i];
+
+            context->result = CONS(hp, value, context->result);
+            hp += 2;
+        }
+    }
+
+    ASSERT(context->lhs_remaining > 0 || context->result == NIL);
+
+    return 1;
+}
+
+static int subtract_enter_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    Eterm iterator;
+    int i = 0;
+
+    context->stage = SUBTRACT_STAGE_NAIVE_RHS;
+
+    context->iterator = context->lhs_original;
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    iterator = context->rhs_original;
+
+    while (is_list(iterator)) {
+        const Eterm *cell = list_val(iterator);
+
+        ASSERT(i < SUBTRACT_RHS_THRESHOLD);
+
+        context->u.rhs_elements[i++] = CAR(cell);
+        iterator = CDR(cell);
+    }
+
+    ASSERT(i == context->rhs_remaining);
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_naive_rhs(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 16;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        const Eterm *cell;
+        Eterm value, next;
+        int found_at;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        for (found_at = context->rhs_remaining - 1; found_at >= 0; found_at--) {
+            if (EQ(value, context->u.rhs_elements[found_at])) {
+                break;
+            }
+        }
+
+        if (found_at < 0) {
+            /* Destructively add the value to the result. This is safe
+             * since the GC is disabled and the unfinished term is never
+             * leaked to the outside world. */
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else if (found_at >= 0) {
+            Eterm swap;
+
+            if (context->rhs_remaining-- == 1) {
+                /* We've run out of items to remove, so the rest of the
+                 * result will be equal to the remainder of the input. We know
+                 * that LHS is well-formed as any errors would've been reported
+                 * during length determination. */
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            swap = context->u.rhs_elements[context->rhs_remaining];
+            context->u.rhs_elements[found_at] = swap;
+        }
+
+        checks += context->rhs_remaining;
+        context->iterator = next;
+        context->lhs_remaining--;
+    }
+
+    /* The result only has to be terminated when returning it to the user, but
+     * we're doing it when trapping as well to prevent headaches when
+     * debugging. */
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, MIN(budget, checks) / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
+}
+
+static int subtract_enter_set_build(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_BUILD;
+
+    context->u.rhs_set.alloc_start =
+        erts_alloc(ERTS_ALC_T_LIST_TRAP,
+                   context->rhs_remaining * sizeof(subtract_tree_t));
+
+    context->u.rhs_set.alloc = context->u.rhs_set.alloc_start;
+    context->u.rhs_set.tree = NULL;
+
+    context->iterator = context->rhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_build(Process *p, ErtsSubtractContext *context) {
+    const static Sint INSERTIONS_PER_RED = 16;
+    Sint budget, insertions;
+
+    budget = INSERTIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    insertions = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (insertions < budget && is_list(context->iterator)) {
+        subtract_tree_t *existing_node, *new_node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        new_node = context->u.rhs_set.alloc;
+        new_node->key = value;
+        new_node->count = 1;
+
+        existing_node = subtract_rbt_lookup_insert(&context->u.rhs_set.tree,
+                                                   new_node);
+
+        if (existing_node != NULL) {
+            existing_node->count++;
+        } else {
+            context->u.rhs_set.alloc++;
+        }
+
+        context->iterator = next;
+        insertions++;
+    }
+
+    BUMP_REDS(p, insertions / INSERTIONS_PER_RED);
+
+    ASSERT(is_list(context->iterator) || is_nil(context->iterator));
+    ASSERT(context->u.rhs_set.tree != NULL);
+
+    return is_nil(context->iterator);
+}
+
+static int subtract_enter_set_finish(Process *p, ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_SET_FINISH;
+
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    context->iterator = context->lhs_original;
+
+    return subtract_continue(p, context);
+}
+
+static int subtract_set_finish(Process *p, ErtsSubtractContext *context) {
+    const Sint CHECKS_PER_RED = 8;
+    Sint checks, budget;
+
+    budget = CHECKS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    checks = 0;
+
+#ifdef DEBUG
+    budget = budget / 10 + 1;
+#endif
+
+    while (checks < budget && is_list(context->iterator)) {
+        subtract_tree_t *node;
+        const Eterm *cell;
+        Eterm value, next;
+
+        cell = list_val(context->iterator);
+        value = CAR(cell);
+        next = CDR(cell);
+
+        ASSERT(context->rhs_remaining > 0);
+
+        node = subtract_rbt_lookup(context->u.rhs_set.tree, value);
+
+        if (node == NULL) {
+            Eterm *hp = HAllocX(p, 2, context->lhs_remaining * 2);
+
+            *context->result_cdr = make_list(hp);
+            context->result_cdr = &CDR(hp);
+
+            CAR(hp) = value;
+        } else {
+            if (context->rhs_remaining-- == 1) {
+                *context->result_cdr = next;
+
+                BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+                return 1;
+            }
+
+            if (node->count-- == 1) {
+                subtract_rbt_delete(&context->u.rhs_set.tree, node);
+            }
+        }
+
+        context->iterator = next;
+        context->lhs_remaining--;
+        checks++;
+    }
+
+    *context->result_cdr = NIL;
+
+    BUMP_REDS(p, checks / CHECKS_PER_RED);
+
+    if (is_list(context->iterator)) {
+        ASSERT(context->lhs_remaining > 0 && context->rhs_remaining > 0);
+        return 0;
+    }
+
+    return 1;
+}
+
+static int subtract_continue(Process *p, ErtsSubtractContext *context) {
+    switch (context->stage) {
+        case SUBTRACT_STAGE_START: {
+            return subtract_enter_len_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_LHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->lhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            if (context->lhs_remaining <= SUBTRACT_LHS_THRESHOLD) {
+                return subtract_enter_naive_lhs(p, context);
+            }
+
+            return subtract_enter_len_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_LHS: {
+            return subtract_naive_lhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_LEN_RHS: {
+            int res = subtract_get_length(p,
+                                          &context->iterator,
+                                          &context->rhs_remaining);
+
+            if (res != 1) {
+                return res;
+            }
+
+            /* We've walked through both lists fully now so we no longer need
+             * to check for errors past this point. */
+
+            if (context->rhs_remaining <= SUBTRACT_RHS_THRESHOLD) {
+                return subtract_enter_naive_rhs(p, context);
+            }
+
+            return subtract_enter_set_build(p, context);
+        }
+
+        case SUBTRACT_STAGE_NAIVE_RHS: {
+            return subtract_naive_rhs(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_BUILD: {
+            int res = subtract_set_build(p, context);
+
+            if (res != 1) {
+                return res;
+            }
+
+            return subtract_enter_set_finish(p, context);
+        }
+
+        case SUBTRACT_STAGE_SET_FINISH: {
+            return subtract_set_finish(p, context);
+        }
+
+        default:
+            ERTS_ASSERT(!"unreachable");
+    }
+}
+
+static int subtract_start(Process *p, Eterm lhs, Eterm rhs,
+                          ErtsSubtractContext *context) {
+    context->stage = SUBTRACT_STAGE_START;
+
+    context->lhs_original = lhs;
+    context->rhs_original = rhs;
+
+    return subtract_continue(p, context);
 }
 
+/* erlang:'--'/2 */
+static Eterm subtract(Export *bif_entry, BIF_ALIST_2) {
+    Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+    if ((is_list(lhs) || is_nil(lhs)) && (is_list(rhs) || is_nil(rhs))) {
+        /* We start with the context on the stack in the hopes that we won't
+         * have to trap. */
+        ErtsSubtractContext context;
+        int res;
+
+        res = subtract_start(BIF_P, lhs, rhs, &context);
+
+        if (res == 0) {
+            Eterm state_mref;
+
+            state_mref = subtract_create_trap_state(BIF_P, &context);
+            erts_set_gc_state(BIF_P, 0);
+
+            BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+        }
+
+        subtract_ctx_dtor(&context);
+
+        if (res < 0) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        BIF_RET(context.result);
+    } else if (is_internal_magic_ref(lhs)) {
+        ErtsSubtractContext *context;
+        int (*dtor)(Binary*);
+        Binary *magic_bin;
+
+        int res;
+
+        magic_bin = erts_magic_ref2bin(lhs);
+        dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+        if (dtor != subtract_ctx_bin_dtor) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        ASSERT(BIF_P->flags & F_DISABLE_GC);
+        ASSERT(rhs == NIL);
+
+        context = ERTS_MAGIC_BIN_DATA(magic_bin);
+        res = subtract_continue(BIF_P, context);
+
+        if (res == 0) {
+            BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+        }
+
+        erts_set_gc_state(BIF_P, 1);
+
+        if (res < 0) {
+            ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+                                    context->lhs_original,
+                                    context->rhs_original);
+        }
+
+        BIF_RET(context->result);
+    }
+
+    ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+    BIF_ERROR(BIF_P, BADARG);
+}
+
+BIF_RETTYPE ebif_minusminus_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_ebif_minusminus_2], BIF_CALL_ARGS);
+}
+
+BIF_RETTYPE subtract_2(BIF_ALIST_2) {
+    return subtract(bif_export[BIF_subtract_2], BIF_CALL_ARGS);
+}
+
+
 BIF_RETTYPE lists_member_2(BIF_ALIST_2)
 {
     Eterm term;
diff --git a/erts/emulator/beam/erl_bif_unique.h b/erts/emulator/beam/erl_bif_unique.h
index 9aa631fde9..e3a633080d 100644
--- a/erts/emulator/beam/erl_bif_unique.h
+++ b/erts/emulator/beam/erl_bif_unique.h
@@ -242,11 +242,11 @@ erts_internal_ref_number_cmp(Uint32 num1[ERTS_REF_NUMBERS],
 			     Uint32 num2[ERTS_REF_NUMBERS])
 {
     if (num1[2] != num2[2])
-	return (int) ((Sint64) num1[2] - (Sint64) num2[2]);
+	return num1[2] > num2[2] ? 1 : -1;
     if (num1[1] != num2[1])
-	return (int) ((Sint64) num1[1] - (Sint64) num2[1]);
+	return num1[1] > num2[1] ? 1 : -1;
     if (num1[0] != num2[0])
-	return (int) ((Sint64) num1[0] - (Sint64) num2[0]);
+	return num1[0] > num2[0] ? 1 : -1;
     return 0;
 }
 
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c
index 6d4a895ef6..68d984014f 100644
--- a/erts/emulator/beam/erl_db.c
+++ b/erts/emulator/beam/erl_db.c
@@ -1948,8 +1948,6 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1)
         save_owned_table(BIF_P, tb);
     }
 
-    tid_clear(BIF_P, tb);
-
     if (is_table_named(tb))
 	remove_named_tab(tb, 0);
     
@@ -1958,6 +1956,7 @@ BIF_RETTYPE ets_delete_1(BIF_ALIST_1)
     tb->common.heir = am_none;
 
     reds -= free_fixations_locked(BIF_P, tb);
+    tid_clear(BIF_P, tb);
     db_unlock(tb, LCK_WRITE);
 
     if (free_table_continue(BIF_P, tb, reds) < 0) {
@@ -3680,7 +3679,6 @@ erts_db_process_exiting(Process *c_p, ErtsProcLocks c_p_locks)
                 && give_away_to_heir(c_p, tb)) {
                 break;
             }
-            tid_clear(c_p, tb);
             /* Clear all access bits. */
             tb->common.status &= ~(DB_PROTECTED | DB_PUBLIC | DB_PRIVATE);
             tb->common.status |= DB_DELETE;
@@ -3690,6 +3688,7 @@ erts_db_process_exiting(Process *c_p, ErtsProcLocks c_p_locks)
 
             free_heir_data(tb);
             reds -= free_fixations_locked(c_p, tb);
+            tid_clear(c_p, tb);
             db_unlock(tb, LCK_WRITE);
             state->op = FREE_OWNED_TABLE;
             break;
@@ -3850,7 +3849,7 @@ static void free_fixations_op(DbFixation* fix, void* vctx)
     struct free_fixations_ctx* ctx = (struct free_fixations_ctx*) vctx;
     erts_aint_t diff;
 
-    ASSERT(!btid2tab(fix->tabs.btid));
+    ASSERT(btid2tab(fix->tabs.btid) == ctx->tb);
     ASSERT(fix->counter > 0);
     ASSERT(ctx->tb->common.status & DB_DELETE);
 
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index cf928a9035..47d304f860 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -1328,11 +1328,7 @@ static int match_traverse(Process* p, DbTableHash* tb,
                 unlock_hash_function(lck);
                 break;
             }
-            if (iterations_left <= 0 || MBUF(p)) {
-                /*
-                 * We have either reached our limit, or just created some heap fragments.
-                 * Since many heap fragments will make the GC slower, trap and GC now.
-                 */
+            if (iterations_left <= 0) {
                 unlock_hash_function(lck);
                 ret_value = on_trap(context_ptr, slot_ix, got, &mpi.mp, ret);
                 goto done;
@@ -1451,11 +1447,7 @@ static int match_traverse_continue(Process* p, DbTableHash* tb,
                 unlock_hash_function(lck);
                 break;
             }
-            if (iterations_left <= 0 || MBUF(p)) {
-                /*
-                 * We have either reached our limit, or just created some heap fragments.
-                 * Since many heap fragments will make the GC slower, trap and GC now.
-                 */
+            if (iterations_left <= 0) {
                 unlock_hash_function(lck);
                 ret_value = on_trap(context_ptr, slot_ix, got, mpp, ret);
                 goto done;
diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c
index 7c80e92e50..d2b0bf95bd 100644
--- a/erts/emulator/beam/erl_db_tree.c
+++ b/erts/emulator/beam/erl_db_tree.c
@@ -1884,22 +1884,14 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
     sc.mp = mpi.mp;
     sc.all_objects = mpi.all_objects;
 
-    stack = get_static_stack(tb);
     if (!mpi.got_partial && mpi.some_limitation &&
             CMP_EQ(mpi.least,mpi.most)) {
-        TreeDbTerm* term = *(mpi.save_term);
         doit_select_replace(tb,mpi.save_term,&sc,0 /* dummy */);
-        if (stack != NULL) {
-            if (TOP_NODE(stack) == term)
-                // throw away potentially invalid reference
-                REPLACE_TOP_NODE(stack, *(mpi.save_term));
-            release_stack(tb, stack);
-        }
+        reset_static_stack(tb); /* may refer replaced term */
         RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE);
     }
 
-    if (stack == NULL)
-        stack = get_any_stack(tb);
+    stack = get_any_stack(tb);
 
     if (mpi.some_limitation) {
         if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
@@ -3344,13 +3336,6 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
     if (is_value(ret)) {
 	sc->accum = CONS(hp, ret, sc->accum);
     }
-    if (MBUF(sc->p)) {
-	/*
-	 * Force a trap and GC if a heap fragment was created. Many heap fragments
-	 * make the GC slow.
-	 */
-	sc->max = 0;
-    }
     if (--(sc->max) <= 0) {
 	return 0;
     }
@@ -3407,13 +3392,6 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 	++(sc->got);
 	sc->accum = CONS(hp, ret, sc->accum);
     }
-    if (MBUF(sc->p)) {
-	/*
-	 * Force a trap and GC if a heap fragment was created. Many heap fragments
-	 * make the GC slow.
-	 */
-	sc->max = 0;
-    }
     if (--(sc->max) <= 0 || sc->got == sc->chunk_size) {
 	return 0;
     }
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index b96db5d054..f8e964e2cf 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -409,8 +409,18 @@ static void full_flush_env(ErlNifEnv* env)
 static void full_cache_env(ErlNifEnv* env)
 {    
 #ifdef ERTS_DIRTY_SCHEDULERS
-    if (env->proc->static_flags & ERTS_STC_FLG_SHADOW_PROC)
+    if (env->proc->static_flags & ERTS_STC_FLG_SHADOW_PROC) {
 	erts_cache_dirty_shadow_proc(env->proc);
+        /*
+         * If shadow proc had heap fragments when flushed
+         * those have now been moved to the real proc.
+         * Ensure heap pointers do not point into a heap
+         * fragment on real proc...
+         */
+        ASSERT(!env->proc->mbuf);
+	env->hp_end = HEAP_LIMIT(env->proc);
+	env->hp = HEAP_TOP(env->proc);
+    }
 #endif
     cache_env(env);
 }
@@ -1251,8 +1261,10 @@ size_t enif_binary_to_term(ErlNifEnv *dst_env,
     if (is_non_value(*term)) {
         return 0;
     }
-    erts_factory_close(&factory);
-    cache_env(dst_env);
+    if (size > 0) {
+        erts_factory_close(&factory);
+        cache_env(dst_env);
+    }
 
     ASSERT(bp > data);
     return bp - data;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 9b22f024b0..be94418755 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -88,10 +88,6 @@
 #undef HARDDEBUG
 #endif
 
-#ifdef HARDDEBUG
-#define HARDDEBUG_RUNQS
-#endif
-
 #ifdef HIPE
 #include "hipe_mode_switch.h"	/* for hipe_init_process() */
 #include "hipe_signal.h"	/* for hipe_thread_signal_init() */
@@ -3420,6 +3416,46 @@ static void suspend_scheduler(ErtsSchedulerData *esdp);
 
 #endif /* ERTS_SMP */
 
+#ifdef HARDDEBUG
+#define ERTS_HDBG_CHK_SLEEP_LIST(SL, L, F, FN) \
+     check_sleepers_list((SL), (L), (F), (FN))
+static void check_sleepers_list(ErtsSchedulerSleepList *sl,
+                                int lock,
+                                ErtsSchedulerSleepInfo *find,
+                                ErtsSchedulerSleepInfo *find_not)
+{
+    ErtsSchedulerSleepInfo *last_out;
+    int found = 0;
+
+    if (lock)
+        erts_smp_spin_lock(&sl->lock);
+
+    ERTS_ASSERT(!find_not || (!find_not->next && !find_not->prev));
+    
+    last_out = sl->list;
+    if (last_out) {
+        ErtsSchedulerSleepInfo *tmp = last_out;
+        do {
+            ERTS_ASSERT(tmp->next);
+            ERTS_ASSERT(tmp->prev);
+            ERTS_ASSERT(tmp->next->prev == tmp);
+            ERTS_ASSERT(tmp->prev->next == tmp);
+            ERTS_ASSERT(tmp != find_not);
+            if (tmp == find)
+                found = !0;
+            tmp = tmp->next;
+            
+        } while (tmp != last_out);
+    }
+    ERTS_ASSERT(!find || found);
+
+    if (lock)
+        erts_smp_spin_unlock(&sl->lock);
+}
+#else
+#define ERTS_HDBG_CHK_SLEEP_LIST(SL, L, F, FN) ((void) 0)
+#endif
+
 static void
 scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 {
@@ -3436,32 +3472,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 
     ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(rq));
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-    if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix))
-	erts_smp_spin_lock(&rq->sleepers.lock);
-#endif
     flgs = sched_prep_spin_wait(ssi);
     if (flgs & ERTS_SSI_FLG_SUSPENDED) {
 	/* Go suspend instead... */
-#ifdef ERTS_DIRTY_SCHEDULERS
-	if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix))
-	    erts_smp_spin_unlock(&rq->sleepers.lock);
-#endif
 	return;
     }
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-    if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) {
-	ssi->prev = NULL;
-	ssi->next = rq->sleepers.list;
-	if (rq->sleepers.list)
-	    rq->sleepers.list->prev = ssi;
-	rq->sleepers.list = ssi;
-	erts_smp_spin_unlock(&rq->sleepers.lock);
-        dirty_active(esdp, -1);
-    }
-#endif
-
     /*
      * If all schedulers are waiting, one of them *should*
      * be waiting in erl_sys_schedule()
@@ -3471,6 +3487,28 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 
 	sched_waiting(esdp->no, rq);
 
+#ifdef ERTS_DIRTY_SCHEDULERS
+        if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) {
+            erts_smp_spin_lock(&rq->sleepers.lock);
+            ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, ssi);
+            ASSERT(!ssi->next); /* Not in sleepers list */
+            ASSERT(!ssi->prev);
+            if (!rq->sleepers.list) {
+                ssi->next = ssi->prev = ssi;
+                rq->sleepers.list = ssi;
+            }
+            else {
+                ssi->prev = rq->sleepers.list;
+                ssi->next = rq->sleepers.list->next;
+                ssi->prev->next = ssi;
+                ssi->next->prev = ssi;
+            }
+            ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, ssi, NULL);
+            erts_smp_spin_unlock(&rq->sleepers.lock);
+            dirty_active(esdp, -1);
+        }
+#endif
+
 	erts_smp_runq_unlock(rq);
 
 	spincount = sched_busy_wait.tse;
@@ -3597,7 +3635,31 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 	    erts_thr_progress_active(esdp, thr_prgr_active = 1);
 	    sched_wall_time_change(esdp, 1);
         }
-
+        
+#ifdef ERTS_DIRTY_SCHEDULERS
+        if (ERTS_RUNQ_IX_IS_DIRTY(rq->ix)) {
+            erts_smp_spin_lock(&rq->sleepers.lock);
+            ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, ssi->next ? ssi : NULL, NULL);
+            if (ssi->next) { /* Still in list... */
+                if (ssi->next == ssi) {
+                    ASSERT(rq->sleepers.list == ssi);
+                    ASSERT(ssi->prev == ssi);
+                    rq->sleepers.list = NULL;
+                }
+                else {
+                    ASSERT(ssi->prev != ssi);
+                    if (rq->sleepers.list == ssi)
+                        rq->sleepers.list = ssi->next;
+                    ssi->prev->next = ssi->next;
+                    ssi->next->prev = ssi->prev;
+                }
+                ssi->next = ssi->prev = NULL;
+            }
+            ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, ssi);
+            erts_smp_spin_unlock(&rq->sleepers.lock);
+        }
+#endif
+        
 	erts_smp_runq_lock(rq);
 	sched_active(esdp->no, rq);
 
@@ -3883,55 +3945,44 @@ wake_scheduler(ErtsRunQueue *rq)
 
 #ifdef ERTS_DIRTY_SCHEDULERS
 static void
-wake_dirty_schedulers(ErtsRunQueue *rq, int one)
+wake_dirty_scheduler(ErtsRunQueue *rq)
 {
-    ErtsSchedulerSleepInfo *ssi;
+    ErtsSchedulerSleepInfo *lo_ssi, *fo_ssi;
     ErtsSchedulerSleepList *sl;
 
     ASSERT(ERTS_RUNQ_IX_IS_DIRTY(rq->ix));
 
     sl = &rq->sleepers;
     erts_smp_spin_lock(&sl->lock);
-    ssi = sl->list;
-    if (!ssi) {
+    ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, NULL);
+    lo_ssi = sl->list;
+    if (!lo_ssi) {
 	erts_smp_spin_unlock(&sl->lock);
-	if (one)
-	    wake_scheduler(rq);
-    } else if (one) {
+        wake_scheduler(rq);
+    }
+    else {
 	erts_aint32_t flgs;
-	if (ssi->prev)
-	    ssi->prev->next = ssi->next;
-	else {
-	    ASSERT(sl->list == ssi);
-	    sl->list = ssi->next;
+        fo_ssi = lo_ssi->next;
+        ASSERT(fo_ssi->prev == lo_ssi);
+        if (fo_ssi == lo_ssi) {
+	    ASSERT(lo_ssi->prev == lo_ssi);
+            sl->list = NULL;
+        }
+        else {
+            ASSERT(lo_ssi->prev != lo_ssi);
+	    lo_ssi->next = fo_ssi->next;
+	    fo_ssi->next->prev = fo_ssi->prev;
 	}
-	if (ssi->next)
-	    ssi->next->prev = ssi->prev;
-
-	erts_smp_spin_unlock(&sl->lock);
-
-	ERTS_THR_MEMORY_BARRIER;
-	flgs = ssi_flags_set_wake(ssi);
-	erts_sched_finish_poke(ssi, flgs);
-    } else {
-	sl->list = NULL;
+        fo_ssi->next = fo_ssi->prev = NULL;
+        ERTS_HDBG_CHK_SLEEP_LIST(&rq->sleepers, 0, NULL, fo_ssi);
 	erts_smp_spin_unlock(&sl->lock);
 
 	ERTS_THR_MEMORY_BARRIER;
-	do {
-	    ErtsSchedulerSleepInfo *wake_ssi = ssi;
-	    ssi = ssi->next;
-	    erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
-	} while (ssi);
+	flgs = ssi_flags_set_wake(fo_ssi);
+	erts_sched_finish_poke(fo_ssi, flgs);
     }
 }
 
-static void
-wake_dirty_scheduler(ErtsRunQueue *rq)
-{
-    wake_dirty_schedulers(rq, 1);
-}
-
 #endif
 
 #define ERTS_NO_USED_RUNQS_SHIFT 16
@@ -4176,6 +4227,8 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep)
     ERTS_SMP_DATA_DEPENDENCY_READ_MEMORY_BARRIER;
 
     state = erts_smp_atomic32_read_nob(&p->state);
+    ASSERT(state & ERTS_PSFLG_IN_RUNQ);
+
     if (statep)
 	*statep = state;
 
@@ -4183,8 +4236,7 @@ dequeue_process(ErtsRunQueue *runq, int prio_q, erts_aint32_t *statep)
 
     rqi = &runq->procs.prio_info[prio];
 
-    if (p)
-	unqueue_process(runq, rpq, rqi, prio, NULL, p);
+    unqueue_process(runq, rpq, rqi, prio, NULL, p);
 
     return p;
 }
@@ -4512,7 +4564,7 @@ evacuate_run_queue(ErtsRunQueue *rq,
 
 	erts_smp_runq_unlock(to_rq);
 	smp_notify_inc_runq(to_rq);
-	erts_smp_runq_lock(to_rq);
+	erts_smp_runq_lock(rq);
     }
 
     if (rq->ports.start) {
@@ -4593,22 +4645,17 @@ evacuate_run_queue(ErtsRunQueue *rq,
 		    free_proxy_proc(proc);
 		else {
 		    erts_aint32_t clr_bits;
-#ifdef DEBUG
-		    erts_aint32_t old;
-#endif
 
 		    clr_bits = ERTS_PSFLG_IN_RUNQ;
 		    clr_bits |= qbit << ERTS_PSFLGS_IN_PRQ_MASK_OFFSET;
 
-#ifdef DEBUG
-		    old =
-#else
-			(void)
-#endif
-			erts_smp_atomic32_read_band_mb(&proc->state,
-						       ~clr_bits);
-			ASSERT((old & clr_bits) == clr_bits);
+                    state = erts_smp_atomic32_read_band_mb(&proc->state, ~clr_bits);
+                    ASSERT((state & clr_bits) == clr_bits);
 
+                    if (state & ERTS_PSFLG_FREE) {
+                        /* free and not queued by proxy */
+                        erts_proc_dec_refc(proc);
+		    }
 		}
 
 		goto handle_next_proc;
@@ -6371,6 +6418,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
     for (ix = 0; ix < no_dirty_cpu_schedulers; ix++) {
 	ErtsSchedulerSleepInfo *ssi = &aligned_dirty_cpu_sched_sleep_info[ix].ssi;
 	erts_smp_atomic32_init_nob(&ssi->flags, 0);
+        ssi->next = NULL;
+        ssi->prev = NULL;
 	ssi->event = NULL; /* initialized in sched_dirty_cpu_thread_func */
 	erts_atomic32_init_nob(&ssi->aux_work, 0);
     }
@@ -6381,6 +6430,8 @@ erts_init_scheduling(int no_schedulers, int no_schedulers_online
     for (ix = 0; ix < no_dirty_io_schedulers; ix++) {
 	ErtsSchedulerSleepInfo *ssi = &aligned_dirty_io_sched_sleep_info[ix].ssi;
 	erts_smp_atomic32_init_nob(&ssi->flags, 0);
+        ssi->next = NULL;
+        ssi->prev = NULL;
 	ssi->event = NULL; /* initialized in sched_dirty_io_thread_func */
 	erts_atomic32_init_nob(&ssi->aux_work, 0);
     }
@@ -6726,13 +6777,14 @@ fin_dirty_enq_s_change(Process *p,
 	/* Already enqueue by someone else... */
 	if (pstruct_reserved) {
 	    /* We reserved process struct for enqueue; clear it... */
-#ifdef DEBUG
-	    erts_aint32_t old =
-#else
-		(void)
-#endif
-		erts_smp_atomic32_read_band_nob(&p->state, ~ERTS_PSFLG_IN_RUNQ);
-	    ASSERT(old & ERTS_PSFLG_IN_RUNQ);
+	    erts_aint32_t state;
+
+            state = erts_smp_atomic32_read_band_nob(&p->state, ~ERTS_PSFLG_IN_RUNQ);
+            ASSERT(state & ERTS_PSFLG_IN_RUNQ);
+
+            if (state & ERTS_PSFLG_FREE) {
+                erts_proc_dec_refc(p);
+            }
 	}
 	return 0;
     }
@@ -6903,8 +6955,9 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p,
 	enqueue = ERTS_ENQUEUE_NOT;
 
 	n &= ~running_flgs;
-	if ((a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS))
-	    || (a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE) {
+	if ((!!(a & (ERTS_PSFLG_ACTIVE_SYS|ERTS_PSFLG_DIRTY_ACTIVE_SYS))
+	    | ((a & (ERTS_PSFLG_ACTIVE|ERTS_PSFLG_SUSPENDED)) == ERTS_PSFLG_ACTIVE))
+            & !(a & ERTS_PSFLG_FREE)) {
 	    enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a);
 	}
 	a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e);
@@ -7117,13 +7170,13 @@ change_proc_schedule_state(Process *p,
 
 	if (((n & (ERTS_PSFLG_SUSPENDED
 		   | ERTS_PSFLG_ACTIVE)) == ERTS_PSFLG_ACTIVE)
-	    && (!(a & (ERTS_PSFLG_ACTIVE_SYS
-		       | ERTS_PSFLG_RUNNING
-		       | ERTS_PSFLG_RUNNING_SYS
-		       | ERTS_PSFLG_DIRTY_RUNNING
-		       | ERTS_PSFLG_DIRTY_RUNNING_SYS)
-		  && (!(a & ERTS_PSFLG_ACTIVE)
-		      || (a & ERTS_PSFLG_SUSPENDED))))) {
+            & ((a & (ERTS_PSFLG_SUSPENDED
+                     | ERTS_PSFLG_ACTIVE)) != ERTS_PSFLG_ACTIVE)
+	    & !(a & (ERTS_PSFLG_ACTIVE_SYS
+                     | ERTS_PSFLG_RUNNING
+                     | ERTS_PSFLG_RUNNING_SYS
+                     | ERTS_PSFLG_DIRTY_RUNNING
+                     | ERTS_PSFLG_DIRTY_RUNNING_SYS))) {
 	    /* We activated a prevously inactive process */
 	    profile_runnable_proc(p, am_active);
 	}
@@ -7158,129 +7211,72 @@ erts_schedule_process(Process *p, erts_aint32_t state, ErtsProcLocks locks)
     schedule_process(p, state, locks);
 }
 
-static int
-schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st,
-			  erts_aint32_t *fail_state_p)
-{
-    int res;
-    int locked;
-    ErtsProcSysTaskQs *stqs, *free_stqs;
-    erts_aint32_t fail_state, state, a, n, enq_prio;
+/* Enqueues the given sys task on the process and schedules it. The task may be
+ * NULL if only scheduling is desired. */
+static ERTS_INLINE erts_aint32_t
+active_sys_enqueue(Process *p, ErtsProcSysTask *sys_task,
+                   erts_aint32_t task_prio, erts_aint32_t enable_flags,
+                   erts_aint32_t state, erts_aint32_t *fail_state_p)
+{
+    int runnable_procs = erts_system_profile_flags.runnable_procs;
+    erts_aint32_t n, a, enq_prio, fail_state;
+    int already_scheduled;
+    int status_locked;
     int enqueue; /* < 0 -> use proxy */
-    unsigned int prof_runnable_procs;
 
+    enable_flags |= ERTS_PSFLG_ACTIVE_SYS;
     fail_state = *fail_state_p;
-
-    res = 1; /* prepare for success */
-    st->next = st->prev = st; /* Prep for empty prio queue */
-    state = erts_smp_atomic32_read_nob(&p->state);
-    prof_runnable_procs = erts_system_profile_flags.runnable_procs;
-    locked = 0;
-    free_stqs = NULL;
-    if (state & ERTS_PSFLG_ACTIVE_SYS)
-	stqs = NULL;
-    else {
-    alloc_qs:
-	stqs = proc_sys_task_queues_alloc();
-	stqs->qmask = 1 << prio;
-	stqs->ncount = 0;
-	stqs->q[PRIORITY_MAX] = NULL;
-	stqs->q[PRIORITY_HIGH] = NULL;
-	stqs->q[PRIORITY_NORMAL] = NULL;
-	stqs->q[PRIORITY_LOW] = NULL;
-	stqs->q[prio] = st;
-    }
-
-    if (!locked) {
-	locked = 1;
-	erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS);
-
-	state = erts_smp_atomic32_read_nob(&p->state);
-	if (state & fail_state) {
-	    *fail_state_p = (state & fail_state);
-	    free_stqs = stqs;
-	    res = 0;
-	    goto cleanup;
-	}
-    }
-
-    if (!p->sys_task_qs) {
-	if (stqs)
-	    p->sys_task_qs = stqs;
-	else
-	    goto alloc_qs;
-    }
-    else {
-	free_stqs = stqs;
-	stqs = p->sys_task_qs;
-	if (!stqs->q[prio]) {
-	    stqs->q[prio] = st;
-	    stqs->qmask |= 1 << prio;
-	}
-	else {
-	    st->next = stqs->q[prio];
-	    st->prev = stqs->q[prio]->prev;
-	    st->next->prev = st;
-	    st->prev->next = st;
-	    ASSERT(stqs->qmask & (1 << prio));
-	}
-    }
-
-    if (ERTS_PSFLGS_GET_ACT_PRIO(state) > prio) {
-	erts_aint32_t n, a, e;
-	/* Need to elevate actual prio */
-
-	a = state;
-	do {
-	    if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) {
-		n = a;
-		break;
-	    }
-	    n = e = a;
-	    n &= ~ERTS_PSFLGS_ACT_PRIO_MASK;
-	    n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET);
-	    a = erts_smp_atomic32_cmpxchg_nob(&p->state, n, e);    
-	} while (a != e);
-	state = n;
-    }
-
-
-    a = state;
+    already_scheduled = 0;
+    status_locked = 0;
     enq_prio = -1;
+    a = state;
 
-    /* Status lock prevents out of order "runnable proc" trace msgs */
-    ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p));
+    ERTS_SMP_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)));
+    ASSERT(fail_state & (ERTS_PSFLG_EXITING | ERTS_PSFLG_FREE));
+    ASSERT(!(fail_state & enable_flags));
+    ASSERT(!(state & ERTS_PSFLG_PROXY));
 
-    if (!prof_runnable_procs) {
-	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
-	locked = 0;
+    /* When runnable_procs is enabled, we need to take the status lock to
+     * prevent trace messages from being sent in the wrong order. The lock must
+     * be held over the call to add2runq.
+     *
+     * Otherwise, we only need to take it when we're enqueuing a task and can
+     * safely release it before add2runq. */
+    if (sys_task || runnable_procs) {
+        erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS);
+        status_locked = 1;
     }
 
-    ASSERT(!(state & ERTS_PSFLG_PROXY));
-
     while (1) {
 	erts_aint32_t e;
 	n = e = a;
 
-	if (a & ERTS_PSFLG_FREE)
-	    goto cleanup; /* We don't want to schedule free processes... */
+	if (a & fail_state) {
+            *fail_state_p = a & fail_state;
+	    goto cleanup;
+        }
 
 	enqueue = ERTS_ENQUEUE_NOT;
-	n |= ERTS_PSFLG_ACTIVE_SYS;
+	n |= enable_flags;
+
 	if (!(a & (ERTS_PSFLG_RUNNING
 		   | ERTS_PSFLG_RUNNING_SYS
 		   | ERTS_PSFLG_DIRTY_RUNNING
-		   | ERTS_PSFLG_DIRTY_RUNNING_SYS)))
+		   | ERTS_PSFLG_DIRTY_RUNNING_SYS))) {
 	    enqueue = check_enqueue_in_prio_queue(p, &enq_prio, &n, a);
+        }
+
 	a = erts_smp_atomic32_cmpxchg_mb(&p->state, n, e);
-	if (a == e)
+	if (a == e) {
 	    break;
-	if (a == n && enqueue == ERTS_ENQUEUE_NOT)
-	    goto cleanup;
+	}
+        else if (a == n && enqueue == ERTS_ENQUEUE_NOT) {
+	    already_scheduled = 1;
+            break;
+        }
     }
 
-    if (prof_runnable_procs) {
-
+    if (!already_scheduled && runnable_procs) {
 	if (!(a & (ERTS_PSFLG_ACTIVE_SYS
 		   | ERTS_PSFLG_RUNNING
 		   | ERTS_PSFLG_RUNNING_SYS
@@ -7290,24 +7286,89 @@ schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st,
 	    /* We activated a prevously inactive process */
 	    profile_runnable_proc(p, am_active);
 	}
+    }
 
-	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
-	locked = 0;
+    if (sys_task) {
+        ErtsProcSysTaskQs *stqs = p->sys_task_qs;
+
+        if (!stqs) {
+            sys_task->next = sys_task->prev = sys_task;
+
+            stqs = proc_sys_task_queues_alloc();
+
+            stqs->qmask = 1 << task_prio;
+            stqs->ncount = 0;
+            stqs->q[PRIORITY_MAX] = NULL;
+            stqs->q[PRIORITY_HIGH] = NULL;
+            stqs->q[PRIORITY_NORMAL] = NULL;
+            stqs->q[PRIORITY_LOW] = NULL;
+            stqs->q[task_prio] = sys_task;
+
+            p->sys_task_qs = stqs;
+        }
+        else {
+            if (!stqs->q[task_prio]) {
+                sys_task->next = sys_task->prev = sys_task;
+
+                stqs->q[task_prio] = sys_task;
+                stqs->qmask |= 1 << task_prio;
+            }
+            else {
+                sys_task->next = stqs->q[task_prio];
+                sys_task->prev = stqs->q[task_prio]->prev;
+                sys_task->next->prev = sys_task;
+                sys_task->prev->next = sys_task;
+                ASSERT(stqs->qmask & (1 << task_prio));
+            }
+        }
     }
 
-    add2runq(enqueue, enq_prio, p, n, NULL);
+    if (status_locked && !runnable_procs) {
+        erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+        status_locked = 0;
+    }
+
+    if (!already_scheduled) {
+        add2runq(enqueue, enq_prio, p, n, NULL);
+    }
 
 cleanup:
+    if (status_locked) {
+        erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+    }
 
-    if (locked)
-	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+    return n;
+}
 
-    if (free_stqs)
-	proc_sys_task_queues_free(free_stqs);
+static int
+schedule_process_sys_task(Process *p, erts_aint32_t prio, ErtsProcSysTask *st,
+			  erts_aint32_t *fail_state_p)
+{
+    erts_aint32_t fail_state, state;
 
-    ERTS_SMP_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)));
+    /* Elevate priority if needed. */
+    state = erts_smp_atomic32_read_nob(&p->state);
+    if (ERTS_PSFLGS_GET_ACT_PRIO(state) > prio) {
+        erts_aint32_t n, a, e;
 
-    return res;
+        a = state;
+        do {
+            if (ERTS_PSFLGS_GET_ACT_PRIO(a) <= prio) {
+                n = a;
+                break;
+            }
+            n = e = a;
+            n &= ~ERTS_PSFLGS_ACT_PRIO_MASK;
+            n |= (prio << ERTS_PSFLGS_ACT_PRIO_OFFSET);
+            a = erts_smp_atomic32_cmpxchg_nob(&p->state, n, e);
+        } while (a != e);
+
+        state = n;
+    }
+
+    fail_state = *fail_state_p;
+
+    return !(active_sys_enqueue(p, st, prio, 0, state, fail_state_p) & fail_state);
 }
 
 static ERTS_INLINE int
@@ -7895,6 +7956,11 @@ suspend_scheduler(ErtsSchedulerData *esdp)
         return;
     }
 
+#ifdef HARDDEBUG
+    if (sched_type != ERTS_SCHED_NORMAL)
+        ERTS_HDBG_CHK_SLEEP_LIST(&esdp->run_queue->sleepers, !0, NULL, ssi);
+#endif
+    
     if (erts_smp_atomic32_read_nob(&ssi->flags) & ERTS_SSI_FLG_MSB_EXEC) {
         ASSERT(no == 1);
         if (!msb_scheduler_type_switch(sched_type, esdp, no))
@@ -10484,6 +10550,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls)
 
 	if (!is_normal_sched & !!(flags & ERTS_RUNQ_FLG_HALTING)) {
 	    /* Wait for emulator to terminate... */
+	    erts_smp_runq_unlock(rq);
 	    while (1)
 		erts_milli_sleep(1000*1000);
 	}
@@ -10758,6 +10825,7 @@ Process *erts_schedule(ErtsSchedulerData *esdp, Process *p, int calls)
 			}
 			else if (state & ERTS_PSFLG_FREE) {
 			    /* free and not queued by proxy */
+                            ASSERT(state & ERTS_PSFLG_IN_RUNQ);
 			    erts_proc_dec_refc(p);
 			}
                         if (!is_normal_sched)
@@ -14071,7 +14139,9 @@ erts_continue_exit_process(Process *p)
 	    n = e = a;
 	    ASSERT(a & ERTS_PSFLG_EXITING);
 	    n |= ERTS_PSFLG_FREE;
-	    n &= ~ERTS_PSFLG_ACTIVE;
+            n &= ~(ERTS_PSFLG_ACTIVE
+                   | ERTS_PSFLG_ACTIVE_SYS
+                   | ERTS_PSFLG_DIRTY_ACTIVE_SYS);
 	    if ((n & ERTS_PSFLG_IN_RUNQ) && !refc_inced) {
 		erts_proc_inc_refc(p);
 		refc_inced = 1;
@@ -14524,12 +14594,12 @@ void erts_halt(int code)
     if (-1 == erts_smp_atomic32_cmpxchg_acqb(&erts_halt_progress,
 					     erts_no_schedulers,
 					     -1)) {
+        notify_reap_ports_relb();
 #ifdef ERTS_DIRTY_SCHEDULERS
         ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_CPU_RUNQ, ERTS_RUNQ_FLG_HALTING);
         ERTS_RUNQ_FLGS_SET(ERTS_DIRTY_IO_RUNQ, ERTS_RUNQ_FLG_HALTING);
 #endif
 	erts_halt_code = code;
-	notify_reap_ports_relb();
     }
 }
 
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 5cac939771..b3385b780f 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -368,7 +368,7 @@ typedef struct ErtsSchedulerSleepInfo_ ErtsSchedulerSleepInfo;
 #ifdef ERTS_DIRTY_SCHEDULERS
 typedef struct {
     erts_smp_spinlock_t lock;
-    ErtsSchedulerSleepInfo *list;
+    ErtsSchedulerSleepInfo *list; /* circular lifo list; points to last out */
 } ErtsSchedulerSleepList;
 #endif
 
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index d7116bd2c3..5fc42f231a 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -3142,7 +3142,7 @@ tailrecur_ne:
 		ASSERT(alen == blen);
 		for (i = (Sint) alen - 1; i >= 0; i--)
 		    if (anum[i] != bnum[i])
-			RETURN_NEQ((Sint32) (anum[i] - bnum[i]));
+			RETURN_NEQ(anum[i] < bnum[i] ? -1 : 1);
 		goto pop_next;
 	    case (_TAG_HEADER_EXTERNAL_REF >> _TAG_PRIMARY_SIZE):
 		if (is_internal_ref(b)) {
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 554c48059f..f6de343f72 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -1018,6 +1018,7 @@ typedef struct {
     inet_async_op* oph;          /* queue head or NULL */
     inet_async_op* opt;          /* queue tail or NULL */
     inet_async_op  op_queue[INET_MAX_ASYNC];  /* call queue */
+    int op_ref;                 /* queue reference generator  */
 
     int   active;               /* 0 = passive, 1 = active, 2 = active once */
     Sint16 active_count;        /* counter for {active,N} */
@@ -1268,8 +1269,7 @@ static int packet_inet_output(udp_descriptor* udesc, HANDLE event);
 /* convert descriptor pointer to inet_descriptor pointer */
 #define INETP(d) (&(d)->inet)
 
-static int async_ref = 0;          /* async reference id generator */
-#define NEW_ASYNC_ID() ((async_ref++) & 0xffff)
+#define NEW_ASYNC_ID(desc) ((desc)->op_ref++ & 0xffff)
 
 /* check for transition from active to passive */
 #define INET_CHECK_ACTIVE_TO_PASSIVE(inet)                              \
@@ -1921,7 +1921,7 @@ static void enq_multi_op(tcp_descriptor *desc, char *buf, int req,
 			 ErlDrvTermData caller, MultiTimerData *timeout,
 			 ErlDrvMonitor *monitorp)
 {
-    int id = NEW_ASYNC_ID();
+    int id = NEW_ASYNC_ID(INETP(desc));
     enq_old_multi_op(desc,id,req,caller,timeout,monitorp);
     if (buf != NULL)
 	put_int16(id, buf);
@@ -1990,7 +1990,7 @@ static int remove_multi_op(tcp_descriptor *desc, int *id_p, int *req_p,
 static int enq_async_w_tmo(inet_descriptor* desc, char* buf, int req, unsigned timeout,
 			   ErlDrvMonitor *monitorp)
 {
-    int id = NEW_ASYNC_ID();
+    int id = NEW_ASYNC_ID(desc);
     inet_async_op* opp;
 
     if ((opp = desc->oph) == NULL)            /* queue empty */
@@ -8423,6 +8423,7 @@ static ErlDrvData inet_start(ErlDrvPort port, int size, int protocol)
     desc->delimiter    = '\n';         /* line delimiting char */
     desc->oph = NULL;
     desc->opt = NULL;
+    desc->op_ref = 0;
 
     desc->peer_ptr = NULL;
     desc->name_ptr = NULL;
diff --git a/erts/emulator/test/big_SUITE.erl b/erts/emulator/test/big_SUITE.erl
index c308760211..b25868d3cb 100644
--- a/erts/emulator/test/big_SUITE.erl
+++ b/erts/emulator/test/big_SUITE.erl
@@ -24,7 +24,7 @@
 
 -export([t_div/1, eq_28/1, eq_32/1, eq_big/1, eq_math/1, big_literals/1,
 	 borders/1, negative/1, big_float_1/1, big_float_2/1,
-         bxor_2pow/1,
+         bxor_2pow/1, band_2pow/1,
 	 shift_limit_1/1, powmod/1, system_limit/1, toobig/1, otp_6692/1]).
 
 %% Internal exports.
@@ -43,7 +43,7 @@ suite() ->
 all() -> 
     [t_div, eq_28, eq_32, eq_big, eq_math, big_literals,
      borders, negative, {group, big_float}, shift_limit_1,
-     bxor_2pow,
+     bxor_2pow, band_2pow,
      powmod, system_limit, toobig, otp_6692].
 
 groups() -> 
@@ -449,3 +449,38 @@ my_bxor(A, B, N, Acc0) ->
                false -> Acc0 bor (1 bsl N)
           end,
     my_bxor(A bsr 1, B bsr 1, N+1, Acc1).
+
+
+%% ERL-804
+band_2pow(_Config) ->
+    IL = lists:seq(8*3, 8*16, 4),
+    JL = lists:seq(0, 64),
+    [band_2pow_1((1 bsl I), (1 bsl J))
+     || I <- IL, J <- JL],
+    ok.
+
+band_2pow_1(A, B) ->
+    for(-1,1, fun(Ad) ->
+                      for(-1,1, fun(Bd) ->
+                                        band_2pow_2(A+Ad, B+Bd),
+                                        band_2pow_2(-A+Ad, B+Bd),
+                                        band_2pow_2(A+Ad, -B+Bd),
+                                        band_2pow_2(-A+Ad, -B+Bd)
+                                end)
+              end).
+
+band_2pow_2(A, B) ->
+    Correct = my_band(A, B),
+    case A band B of
+        Correct -> ok;
+        Wrong ->
+            io:format("~.16# band ~.16#\n", [A,B]),
+            io:format("Expected ~.16#\n", [Correct]),
+            io:format("Got      ~.16#\n", [Wrong]),
+            ct:fail({failed, 'band'})
+
+    end.
+
+%% Implement band without band
+my_band(A, B) ->
+    bnot ((bnot A) bor (bnot B)).
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index ac2b136a38..4237715c4b 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -2594,16 +2594,23 @@ nif_term_to_binary(Config) ->
 
 nif_binary_to_term(Config) ->
     ensure_lib_loaded(Config),
-    T = {#{ok => nok}, <<0:8096>>, lists:seq(1,100)},
+    BigMap = maps:from_list([{I,-I} || I <- lists:seq(1,100)]),
+    [nif_binary_to_term_do(T)
+     || T <- [{#{ok => nok}, <<0:8096>>, lists:seq(1,100)},
+              atom, 42, self(), BigMap]],
+    ok.
+
+nif_binary_to_term_do(T) ->
+    Dummy = [true|false],
     Bin = term_to_binary(T),
     Len = byte_size(Bin),
-    {Len,T} = binary_to_term_nif(Bin, undefined, 0),
+    {Len,T,Dummy} = binary_to_term_nif(Bin, undefined, 0),
     Len = binary_to_term_nif(Bin, self(), 0),
-    T = receive M -> M after 1000 -> timeout end,
+    {T,Dummy} = receive M -> M after 1000 -> timeout end,
 
-    {Len, T} = binary_to_term_nif(Bin, undefined, ?ERL_NIF_BIN2TERM_SAFE),
+    {Len,T,Dummy} = binary_to_term_nif(Bin, undefined, ?ERL_NIF_BIN2TERM_SAFE),
     false = binary_to_term_nif(<<131,100,0,14,"undefined_atom">>,
-			   undefined, ?ERL_NIF_BIN2TERM_SAFE),
+                               undefined, ?ERL_NIF_BIN2TERM_SAFE),
     false = binary_to_term_nif(Bin, undefined, 1),
     ok.
 
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index b47d013bd2..87cd3650ff 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -2384,7 +2384,7 @@ static ERL_NIF_TERM term_to_binary(ErlNifEnv* env, int argc, const ERL_NIF_TERM
 static ERL_NIF_TERM binary_to_term(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
     ErlNifBinary bin;
-    ERL_NIF_TERM term, ret_term;
+    ERL_NIF_TERM term, dummy, ret_term;
     ErlNifPid pid;
     ErlNifEnv *msg_env = env;
     unsigned int opts;
@@ -2397,6 +2397,9 @@ static ERL_NIF_TERM binary_to_term(ErlNifEnv* env, int argc, const ERL_NIF_TERM
 	|| !enif_get_uint(env, argv[2], &opts))
         return enif_make_badarg(env);
 
+    /* build dummy heap term first to provoke OTP-15080 */
+    dummy = enif_make_list_cell(msg_env, atom_true, atom_false);
+
     ret = enif_binary_to_term(msg_env, bin.data, bin.size, &term,
 			      (ErlNifBinaryToTerm)opts);
     if (!ret)
@@ -2404,11 +2407,12 @@ static ERL_NIF_TERM binary_to_term(ErlNifEnv* env, int argc, const ERL_NIF_TERM
 
     ret_term = enif_make_uint64(env, ret);
     if (msg_env != env) {
-        enif_send(env, &pid, msg_env, term);
+        enif_send(env, &pid, msg_env,
+                  enif_make_tuple2(msg_env, term, dummy));
         enif_free_env(msg_env);
         return ret_term;
     } else {
-        return enif_make_tuple2(env, ret_term, term);
+        return enif_make_tuple3(env, ret_term, term, dummy);
     }
 }
 
diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl
index a8bcfac84d..899a5c26bd 100644
--- a/erts/emulator/test/process_SUITE.erl
+++ b/erts/emulator/test/process_SUITE.erl
@@ -58,6 +58,7 @@
 	 no_priority_inversion2/1,
 	 system_task_blast/1,
 	 system_task_on_suspended/1,
+         system_task_failed_enqueue/1,
 	 gc_request_when_gc_disabled/1,
 	 gc_request_blast_when_gc_disabled/1]).
 -export([prio_server/2, prio_client/2, init/1, handle_event/2]).
@@ -104,7 +105,7 @@ groups() ->
        otp_7738_resume]},
      {system_task, [],
       [no_priority_inversion, no_priority_inversion2,
-       system_task_blast, system_task_on_suspended,
+       system_task_blast, system_task_on_suspended, system_task_failed_enqueue,
        gc_request_when_gc_disabled, gc_request_blast_when_gc_disabled]}].
 
 init_per_suite(Config) ->
@@ -2531,6 +2532,57 @@ system_task_on_suspended(Config) when is_list(Config) ->
 	    ok
     end.
 
+%% When a system task couldn't be enqueued due to the process being in an
+%% incompatible state, it would linger in the system task list and get executed
+%% anyway the next time the process was scheduled. This would result in a
+%% double-free at best.
+%%
+%% This test continuously purges modules while other processes run dirty code,
+%% which will provoke this error as ERTS_PSTT_CPC can't be enqueued while a
+%% process is running dirty code.
+system_task_failed_enqueue(Config) when is_list(Config) ->
+    case erlang:system_info(dirty_cpu_schedulers) of
+        N when N > 0 ->
+            system_task_failed_enqueue_1(Config);
+        _ ->
+            {skipped, "No dirty scheduler support"}
+    end.
+
+system_task_failed_enqueue_1(Config) ->
+    Priv = proplists:get_value(priv_dir, Config),
+
+    Purgers = [spawn_link(fun() -> purge_loop(Priv, Id) end)
+               || Id <- lists:seq(1, erlang:system_info(schedulers))],
+    Hogs = [spawn_link(fun() -> dirty_loop() end)
+            || _ <- lists:seq(1, erlang:system_info(dirty_cpu_schedulers))],
+
+    ct:sleep(5000),
+
+    [begin
+         unlink(Pid),
+         exit(Pid, kill)
+     end || Pid <- (Purgers ++ Hogs)],
+
+    ok.
+
+purge_loop(PrivDir, Id) ->
+    Mod = "failed_enq_" ++ integer_to_list(Id),
+    Path = PrivDir ++ "/" ++ Mod,
+    file:write_file(Path ++ ".erl",
+                    "-module('" ++ Mod ++ "').\n" ++
+                        "-export([t/0]).\n" ++
+                        "t() -> ok."),
+    purge_loop_1(Path).
+purge_loop_1(Path) ->
+    {ok, Mod} = compile:file(Path, []),
+    erlang:delete_module(Mod),
+    erts_code_purger:purge(Mod),
+    purge_loop_1(Path).
+
+dirty_loop() ->
+    ok = erts_debug:dirty_cpu(reschedule, 10000),
+    dirty_loop().
+
 gc_request_when_gc_disabled(Config) when is_list(Config) ->
     AIS = erts_debug:set_internal_state(available_internal_state, true),
     gc_request_when_gc_disabled_do(ref),
diff --git a/erts/emulator/test/ref_SUITE.erl b/erts/emulator/test/ref_SUITE.erl
index 5f519d522e..74df857c65 100644
--- a/erts/emulator/test/ref_SUITE.erl
+++ b/erts/emulator/test/ref_SUITE.erl
@@ -22,6 +22,7 @@
 
 -export([all/0, suite/0]).
 -export([wrap_1/1]).
+-export([compare_list/1, compare_ets/1]).
 
 -export([loop_ref/1]).
 
@@ -32,7 +33,7 @@ suite() ->
      {timetrap, {minutes, 2}}].
 
 all() -> 
-    [wrap_1].
+    [wrap_1, compare_list, compare_ets].
 
 %% Check that refs don't wrap around easily.
 wrap_1(Config) when is_list(Config) ->
@@ -53,3 +54,28 @@ loop_ref(Parent) ->
 loop_ref(R, R, _) -> ok;
 loop_ref(R0, _, N) ->
     loop_ref(R0, make_ref(), N+1).
+
+%% Check that ref ordering works
+compare_list(Config) when is_list(Config) ->
+    %% Although this test uses external refs, it would apply the same to plain refs
+    ExtRef1 = <<131,114,0,3,100,0,3,110,64,98,3, 0,0,173,156, 0,216,0,4, 0,0,0,0>>,
+    ExtRef2 = <<131,114,0,3,100,0,3,110,64,98,3, 0,1,31,27,   129,4,0,1, 0,0,0,0>>,
+
+    Ref1 = binary_to_term(ExtRef1), %% #Ref<[email protected]>
+    Ref2 = binary_to_term(ExtRef2), %% #Ref<[email protected]>
+    OrderedList = [Ref1, Ref2],
+    OrderedList = lists:sort(OrderedList),
+    ok.
+
+%% This is the scarier case since it makes terms "invisible" in ets or Mnesia
+%% (the underlying fault cause is the same as compare_list/1)
+compare_ets(Config) when is_list(Config) ->
+    W2s = [610350147,899574699,2994196869,686384822,2397690439, 923302211],
+    ExtRefBase = <<131,114,0,3,100,0,3,110,64,98,3>>,
+    ExtRefs = [<<ExtRefBase/binary, 1:32, W2:32, 0:32>> || W2 <- W2s],
+    Refs = [binary_to_term(Bin) || Bin <- ExtRefs],
+
+    Ets = ets:new(refbug, [ordered_set]),
+    ets:insert(Ets, [{Ref,Ref} || Ref <- Refs]),
+    0 = length([R || R <- ets:tab2list(Ets), ets:lookup(Ets, element(1,R)) == []]),
+    ok.
diff --git a/erts/emulator/test/system_profile_SUITE.erl b/erts/emulator/test/system_profile_SUITE.erl
index 9b678fcff9..00237c329c 100644
--- a/erts/emulator/test/system_profile_SUITE.erl
+++ b/erts/emulator/test/system_profile_SUITE.erl
@@ -95,18 +95,20 @@ do_runnable_procs({TsType, TsTypeFlag}) ->
     % FIXME: Set #laps and #nodes in config file
     Nodes = 10,
     Laps = 10,
-    Master = ring(Nodes),
+    All = ring(Nodes, [link,monitor]),
+    [Master | _] = All,
     undefined = erlang:system_profile(Pid, [runnable_procs]++TsTypeFlag),
     % loop a message
     ok = ring_message(Master, message, Laps),
+    ok = kill_ring(Master),
+    [receive {'DOWN', _, process, P, _} -> ok end || P <- All],
     Events = get_profiler_events(),
-    kill_em_all = kill_ring(Master),
     erlang:system_profile(undefined, []),
     put(master, Master),
     put(laps, Laps),
     true = has_runnable_event(TsType, Events),
     Pids = sort_events_by_pid(Events),
-    ok = check_events(TsType, Pids),
+    ok = check_events(TsType, Pids, (Laps+1)*2+2, (Laps+1)*2),
     erase(),
     exit(Pid,kill),
     ok.
@@ -139,7 +141,7 @@ do_runnable_ports({TsType, TsTypeFlag}, Config) ->
     erlang:system_profile(undefined, []),
     true = has_runnable_event(TsType, Events),
     Pids = sort_events_by_pid(Events),
-    ok = check_events(TsType, Pids),
+    ok = check_events(TsType, Pids, Laps*2+2, Laps*2),
     erase(),
     exit(Pid,kill),
     ok.
@@ -172,12 +174,12 @@ dont_profile_profiler(Config) when is_list(Config) ->
 
     Nodes = 10,
     Laps = 10,
-    Master = ring(Nodes),
+    [Master|_] = ring(Nodes, [link]),
     undefined = erlang:system_profile(Pid, [runnable_procs]),
     % loop a message
     ok = ring_message(Master, message, Laps),
     erlang:system_profile(undefined, []),
-    kill_em_all = kill_ring(Master),
+    ok = kill_ring(Master),
     Events = get_profiler_events(),
     false  = has_profiler_pid_event(Events, Pid),
 
@@ -249,27 +251,28 @@ check_block_system({TsType, TsTypeFlag}, Nodes) ->
 
 %%% Check events
 
-check_events(_TsType, []) -> ok;
-check_events(TsType, [Pid | Pids]) ->
+check_events(_TsType, [], _, _) -> ok;
+check_events(TsType, [Pid | Pids], ExpMaster, ExpMember) ->
     Master = get(master),
-    Laps = get(laps),
     CheckPids = get(pids),
     {Events, N} = get_pid_events(Pid),
     ok = check_event_flow(Events),
     ok = check_event_ts(TsType, Events),
     IsMember = lists:member(Pid, CheckPids),
-    case Pid of
-    	Master ->
-	    io:format("Expected ~p and got ~p profile events from ~p: ok~n", [Laps*2+2, N, Pid]),
-	    N = Laps*2 + 2,
-    	    check_events(TsType, Pids);
-	Pid when IsMember == true ->
-	    io:format("Expected ~p and got ~p profile events from ~p: ok~n", [Laps*2, N, Pid]),
-	    N = Laps*2,
-    	    check_events(TsType, Pids);
-	Pid ->
-	    check_events(TsType, Pids)
-    end.
+    {Title,Exp} = case Pid of
+                      Master -> {master,ExpMaster};
+                      Pid when IsMember == true -> {member,ExpMember};
+                      _ -> {other,N}
+                  end,
+    ok = case N of
+             Exp -> ok;
+             _ ->
+                 io:format("Expected ~p and got ~p profile events from ~p ~p:~n~p~n",
+                           [Exp, N, Title, Pid, Events]),
+                 error
+         end,
+    check_events(TsType, Pids, ExpMaster, ExpMember).
+
 
 %% timestamp consistency check for descending timestamps
 
@@ -297,7 +300,13 @@ check_event_ts(TsType, [{Pid, _, _, TS1}=Event | Events], {Pid,_,_,TS0}) ->
 %% consistency check for active vs. inactive activity (runnable)
 
 check_event_flow(Events) ->
-    check_event_flow(Events, undefined).
+    case check_event_flow(Events, undefined) of
+        ok -> ok;
+        Error ->
+            io:format("Events = ~p\n", [Events]),
+            Error
+    end.
+
 check_event_flow([], _) -> ok;
 check_event_flow([Event | PidEvents], undefined) ->
     check_event_flow(PidEvents, Event);
@@ -337,10 +346,11 @@ sort_events_by_pid([Event | Events],Pids) ->
 %% API
 
 % Returns master pid
-ring(N) -> 
-    Pids = build_ring(N, []),
+ring(N, SpawnOpt) -> 
+    Pids = build_ring(N, [], SpawnOpt),
     put(pids, Pids),
-    setup_ring(Pids).
+    setup_ring(Pids),
+    Pids.
 
 ring_message(Master, Message, Laps) ->
     Master ! {message, Master, Laps, Message},
@@ -348,13 +358,19 @@ ring_message(Master, Message, Laps) ->
     	{laps_complete, Master} -> ok
     end.
 
-kill_ring(Master) -> Master ! kill_em_all.
+kill_ring(Master) ->
+    Master ! kill_em_all,
+    ok.
 
 %% Process ring helpers
 
-build_ring(0, Pids) -> Pids;
-build_ring(N, Pids) ->
-    build_ring(N - 1, [spawn_link(?MODULE, ring_loop, [undefined]) | Pids]).
+build_ring(0, Pids, _) -> Pids;
+build_ring(N, Pids, SpawnOpt) ->
+    Pid = case spawn_opt(?MODULE, ring_loop, [undefined], SpawnOpt) of
+              {P,_} -> P;
+              P -> P
+          end,
+    build_ring(N-1, [Pid | Pids], SpawnOpt).
 
 setup_ring([Master | Relayers]) ->
     % Relayers may not include the master pid
@@ -383,15 +399,13 @@ ring_loop(RelayTo) ->
         {message, Master, Lap, Msg}=Message ->
             case {self(), Lap} of
                 {Master, 0} ->
-                    get(supervisor) ! {laps_complete, self()},
-                    ring_loop(RelayTo);
+                    get(supervisor) ! {laps_complete, self()};
                 {Master, Lap} ->
-                    RelayTo ! {message, Master, Lap - 1, Msg},
-                    ring_loop(RelayTo);
+                    RelayTo ! {message, Master, Lap - 1, Msg};
                 _ ->
-                    RelayTo ! Message,
-                    ring_loop(RelayTo)
-            end
+                    RelayTo ! Message
+            end,
+            ring_loop(RelayTo)
     end.
 
 %%%
diff --git a/erts/etc/common/heart.c b/erts/etc/common/heart.c
index bc353e384e..8f1e89b638 100644
--- a/erts/etc/common/heart.c
+++ b/erts/etc/common/heart.c
@@ -825,11 +825,8 @@ write_message(fd, mp)
   int   fd;
   struct msg *mp;
 {
-  int   len;
-  char* tmp;
+  int len = ntohs(mp->len);
 
-  tmp = (char*) &(mp->len);
-  len = (*tmp * 256) + *(tmp+1); 
   if ((len == 0) || (len > MSG_BODY_SIZE)) {
     return MSG_HDR_SIZE;
   }				/* cc68k wants (char *) */
diff --git a/erts/lib_src/Makefile.in b/erts/lib_src/Makefile.in
index 601f3917a8..2f7cb5888c 100644
--- a/erts/lib_src/Makefile.in
+++ b/erts/lib_src/Makefile.in
@@ -29,7 +29,6 @@ CC=@CC@
 LD=@LD@
 AR=@AR@
 RANLIB=@RANLIB@
-RM=@RM@
 MKDIR=@MKDIR@
 INSTALL=@INSTALL@
 INSTALL_DIR=@INSTALL_DIR@
@@ -533,9 +532,9 @@ release_docs_spec:
 #
 .PHONY: clean
 clean:
-	$(RM) -rf ../lib/internal/$(TARGET)/*
-	$(RM) -rf ../lib/$(TARGET)/*
-	$(RM) -rf obj/$(TARGET)/*
+	$(RM) -r ../lib/internal/$(TARGET)/*
+	$(RM) -r ../lib/$(TARGET)/*
+	$(RM) -r obj/$(TARGET)/*
 
 #
 # Make dependencies
diff --git a/erts/vsn.mk b/erts/vsn.mk
index c3a62a5535..ad7bbc4d87 100644
--- a/erts/vsn.mk
+++ b/erts/vsn.mk
@@ -18,7 +18,7 @@
 # %CopyrightEnd%
 # 
 
-VSN = 9.3
+VSN = 9.3.3.10
 
 # Port number 4365 in 4.2
 # Port number 4366 in 4.3