140 files changed, 8599 insertions, 5872 deletions
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4
index 3d227e462c..0ca2755802 100644
--- a/erts/aclocal.m4
+++ b/erts/aclocal.m4
@@ -122,6 +122,9 @@ dnl
 
 AC_DEFUN(LM_WINDOWS_ENVIRONMENT,
 [
+
+if test "X$windows_environment_" != "Xchecked"; then
+windows_environment_=checked
 MIXED_CYGWIN=no
 MIXED_MSYS=no
 
@@ -197,6 +200,8 @@ else
 fi
 
 AC_SUBST(MIXED_MSYS)
+
+fi
 ])		
 	
 dnl ----------------------------------------------------------------------
@@ -2856,3 +2861,248 @@ AC_DEFUN([LM_HARDWARE_ARCH], [
 
     AC_SUBST(ARCH)
 ])
+
+dnl
+dnl--------------------------------------------------------------------
+dnl Dynamic Erlang Drivers
+dnl
+dnl Linking to produce dynamic Erlang drivers to be loaded by Erlang's
+dnl Dynamic Driver Loader and Linker (DDLL). Below the prefix DED is an
+dnl abbreviation for `Dynamic Erlang Driver'.
+dnl
+dnl For DED we need something quite sloppy, which allows undefined references 
+dnl (notably driver functions) in the resulting shared library. 
+dnl Example of Makefile rule (and settings of macros):
+dnl
+dnl LIBS = @LIBS@
+dnl LD = @DED_LD@
+dnl LDFLAGS = @DED_LDFLAGS@
+dnl soname = @ldsoname@
+dnl
+dnl my_drv.so:   my_drv.o my_utils.o
+dnl              $(LD) $(LDFLAGS) $(soname) $@ -o $@ $^ -lc $(LIBS)
+dnl
+dnl--------------------------------------------------------------------
+dnl
+
+AC_DEFUN(ERL_DED,
+	[
+
+USER_LD=$LD
+USER_LDFLAGS="$LDFLAGS"
+
+LM_CHECK_THR_LIB
+
+DED_CC=$CC
+DED_GCC=$GCC
+
+DED_CFLAGS=
+DED_OSTYPE=unix
+case $host_os in
+     linux*)
+	DED_CFLAGS="-D_GNU_SOURCE" ;;
+     win32)
+	DED_CFLAGS="-D_WIN32_WINNT=0x0600 -DWINVER=0x0600"
+        DED_OSTYPE=win32 ;;
+     *)
+        ;;
+esac
+
+
+DED_WARN_FLAGS="-Wall -Wstrict-prototypes"
+case "$host_cpu" in
+  tile*)
+    # tile-gcc is a bit stricter with -Wmissing-prototypes than other gccs,
+    # and too strict for our taste.
+    ;;
+  *)
+    DED_WARN_FLAGS="$DED_WARN_FLAGS -Wmissing-prototypes";;
+esac
+  
+LM_TRY_ENABLE_CFLAG([-Wdeclaration-after-statement], [DED_WARN_FLAGS])
+
+LM_TRY_ENABLE_CFLAG([-Werror=return-type], [DED_WERRORFLAGS])
+LM_TRY_ENABLE_CFLAG([-Werror=implicit], [DED_WERRORFLAGS])
+LM_TRY_ENABLE_CFLAG([-Werror=undef], [DED_WERRORFLAGS])
+
+DED_SYS_INCLUDE="-I${ERL_TOP}/erts/emulator/beam -I${ERL_TOP}/erts/include -I${ERL_TOP}/erts/include/$host -I${ERL_TOP}/erts/include/internal -I${ERL_TOP}/erts/include/internal/$host -I${ERL_TOP}/erts/emulator/sys/$DED_OSTYPE -I${ERL_TOP}/erts/emulator/sys/common"
+DED_INCLUDE=$DED_SYS_INCLUDE
+
+if test "$THR_DEFS" = ""; then
+    DED_THR_DEFS="-D_THREAD_SAFE -D_REENTRANT"
+else
+    DED_THR_DEFS="$THR_DEFS"
+fi
+# DED_EMU_THR_DEFS=$EMU_THR_DEFS
+DED_CFLAGS="$CFLAGS $CPPFLAGS $DED_CFLAGS"
+if test "x$GCC" = xyes; then
+    DED_STATIC_CFLAGS="$DED_CFLAGS"
+    DED_CFLAGS="$DED_CFLAGS -fPIC"
+fi
+
+DED_EXT=so
+case $host_os in
+    win32) DED_EXT=dll;;
+    darwin*)
+	DED_CFLAGS="$DED_CFLAGS -fno-common"
+	DED_STATIC_CFLAGS="$DED_STATIC_CFLAGS -fno-common";;
+    *)
+	;;
+esac
+
+DED_STATIC_CFLAGS="$DED_STATIC_CFLAGS -DSTATIC_ERLANG_NIF -DSTATIC_ERLANG_DRIVER"
+
+if test "$CFLAG_RUNTIME_LIBRARY_PATH" = ""; then
+
+  CFLAG_RUNTIME_LIBRARY_PATH="-Wl,-R"
+  case $host_os in
+    darwin*)
+	CFLAG_RUNTIME_LIBRARY_PATH=
+	;;
+    win32)
+	CFLAG_RUNTIME_LIBRARY_PATH=
+	;;
+    osf*)
+	CFLAG_RUNTIME_LIBRARY_PATH="-Wl,-rpath,"
+	;;
+    *)
+	;;
+  esac
+
+fi
+
+# If DED_LD is set in environment, we expect all DED_LD* variables
+# to be specified (cross compiling)
+if test "x$DED_LD" = "x"; then
+
+DED_LD_FLAG_RUNTIME_LIBRARY_PATH="-R"
+case $host_os in
+	win32)
+		DED_LD="ld.sh"
+		DED_LDFLAGS="-dll"
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH=
+	;;
+	solaris2*|sysv4*)
+		DED_LDFLAGS="-G"
+		if test X${enable_m64_build} = Xyes; then
+			DED_LDFLAGS="-64 $DED_LDFLAGS"
+		fi
+	;;
+	aix4*)
+		DED_LDFLAGS="-G -bnoentry -bexpall"
+	;;
+	freebsd2*)
+		# Non-ELF GNU linker
+		DED_LDFLAGS="-Bshareable"
+	;;
+	darwin*)
+		# Mach-O linker: a shared lib and a loadable
+		# object file is not the same thing.
+		DED_LDFLAGS="-bundle -bundle_loader ${ERL_TOP}/bin/$host/beam.smp"
+		if test X${enable_m64_build} = Xyes; then
+		  DED_LDFLAGS="-m64 $DED_LDFLAGS"
+		else
+		  if test X${enable_m32_build} = Xyes; then
+		    DED_LDFLAGS="-m32 $DED_LDFLAGS"
+		  else
+		    AC_CHECK_SIZEOF(void *)
+		    case "$ac_cv_sizeof_void_p" in
+		      8)
+			DED_LDFLAGS="-m64 $DED_LDFLAGS";;
+		      *)
+		        ;;
+		    esac
+		  fi
+		fi
+		DED_LD="$CC"
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
+	;;
+	linux*)
+		DED_LD="$CC"
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
+		DED_LDFLAGS="-shared -Wl,-Bsymbolic"
+		if test X${enable_m64_build} = Xyes; then
+			DED_LDFLAGS="-m64 $DED_LDFLAGS"
+		fi;
+		if test X${enable_m32_build} = Xyes; then
+			DED_LDFLAGS="-m32 $DED_LDFLAGS"
+		fi
+	;;	
+	freebsd*)
+		DED_LD="$CC"
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
+		DED_LDFLAGS="-shared"
+		if test X${enable_m64_build} = Xyes; then
+			DED_LDFLAGS="-m64 $DED_LDFLAGS"
+		fi;
+		if test X${enable_m32_build} = Xyes; then
+			DED_LDFLAGS="-m32 $DED_LDFLAGS"
+		fi
+	;;	
+	openbsd*)
+		DED_LD="$CC"
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
+		DED_LDFLAGS="-shared"
+	;;
+	osf*)
+		# NOTE! Whitespace after -rpath is important.
+		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="-rpath "
+		DED_LDFLAGS="-shared -expect_unresolved '*'"
+	;;
+	*)
+		# assume GNU linker and ELF
+		DED_LDFLAGS="-shared"
+		# GNU linker has no option for 64bit build, should not propagate -m64
+	;;
+esac
+
+if test "$DED_LD" = "" && test "$USER_LD" != ""; then
+    DED_LD="$USER_LD"
+    DED_LDFLAGS="$USER_LDFLAGS $DED_LDFLAGS"
+fi
+
+DED_LIBS=$LIBS
+
+fi # "x$DED_LD" = "x"
+
+AC_CHECK_TOOL(DED_LD, ld, false)
+test "$DED_LD" != "false" || AC_MSG_ERROR([No linker found])
+
+AC_MSG_CHECKING(for static compiler flags)
+DED_STATIC_CFLAGS="$DED_WERRORFLAGS $DED_WFLAGS $DED_THR_DEFS $DED_STATIC_CFLAGS"
+AC_MSG_RESULT([$DED_STATIC_CFLAGS])
+AC_MSG_CHECKING(for basic compiler flags for loadable drivers)
+DED_BASIC_CFLAGS=$DED_CFLAGS
+AC_MSG_RESULT([$DED_CFLAGS])
+AC_MSG_CHECKING(for compiler flags for loadable drivers)
+DED_CFLAGS="$DED_WERRORFLAGS $DED_WARN_FLAGS $DED_THR_DEFS $DED_CFLAGS"
+AC_MSG_RESULT([$DED_CFLAGS])
+AC_MSG_CHECKING(for linker for loadable drivers)
+AC_MSG_RESULT([$DED_LD])
+AC_MSG_CHECKING(for linker flags for loadable drivers)
+AC_MSG_RESULT([$DED_LDFLAGS])
+AC_MSG_CHECKING(for 'runtime library path' linker flag)
+if test "x$DED_LD_FLAG_RUNTIME_LIBRARY_PATH" != "x"; then
+	AC_MSG_RESULT([$DED_LD_FLAG_RUNTIME_LIBRARY_PATH])
+else
+	AC_MSG_RESULT([not found])
+fi
+
+AC_SUBST(DED_CC)
+AC_SUBST(DED_GCC)
+AC_SUBST(DED_EXT)
+AC_SUBST(DED_SYS_INCLUDE)
+AC_SUBST(DED_INCLUDE)
+AC_SUBST(DED_BASIC_CFLAGS)
+AC_SUBST(DED_CFLAGS)
+AC_SUBST(DED_STATIC_CFLAGS)
+AC_SUBST(DED_WARN_FLAGS)
+AC_SUBST(DED_WERRORFLAGS)
+AC_SUBST(DED_LD)
+AC_SUBST(DED_LDFLAGS)
+AC_SUBST(DED_LD_FLAG_RUNTIME_LIBRARY_PATH)
+AC_SUBST(DED_LIBS)
+AC_SUBST(DED_THR_DEFS)
+AC_SUBST(DED_OSTYPE)
+
+])
diff --git a/erts/configure.in b/erts/configure.in
index 9245e4dc90..14c8b50680 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -26,11 +26,6 @@ AC_PREREQ(2.59)
 
 LM_PRECIOUS_VARS
 
-if test "x$no_recursion" != "xyes" -a "x$OVERRIDE_CONFIG_CACHE" = "x"; then
-    # We do not want to use a common cache!
-    cache_file=/dev/null
-fi
-
 dnl How to set srcdir absolute is taken from the GNU Emacs distribution
 #### Make srcdir absolute, if it isn't already.  It's important to
 #### avoid running the path through pwd unnecessary, since pwd can
@@ -470,7 +465,10 @@ dnl
 dnl Make sure we find config.h
 dnl
 
-extra_flags="-I${ERL_TOP}/erts/$host $OTP_EXTRA_FLAGS"
+ERTS_CONFIG_H_IDIR="-I${ERL_TOP}/erts/$host"
+AC_SUBST(ERTS_CONFIG_H_IDIR)
+
+extra_flags="$ERTS_CONFIG_H_IDIR $OTP_EXTRA_FLAGS"
 CFLAGS="$CFLAGS $extra_flags"
 DEBUG_CFLAGS="-g $CPPFLAGS $extra_flags $DEBUG_CFLAGS"
 DEBUG_FLAGS=-g
@@ -502,23 +500,6 @@ case $CFLAGS in
 		;;
 esac
 
-
-
-CFLAG_RUNTIME_LIBRARY_PATH="-Wl,-R"
-case $host_os in
-  darwin*)
-	CFLAG_RUNTIME_LIBRARY_PATH=
-	;;
-  win32)
-	CFLAG_RUNTIME_LIBRARY_PATH=
-	;;
-  osf*)
-	CFLAG_RUNTIME_LIBRARY_PATH="-Wl,-rpath,"
-	;;
-  *)
-	;;
-esac
-
 lfs_conf=ok
 lfs_source=none
 if test "${LFS_CFLAGS+set}" = "set" || \
@@ -605,7 +586,6 @@ AC_SUBST(DEBUG_FLAGS)
 AC_SUBST(DEBUG_CFLAGS)
 AC_SUBST(WFLAGS)
 AC_SUBST(WERRORFLAGS)
-AC_SUBST(CFLAG_RUNTIME_LIBRARY_PATH)
 
 ## Check if we can do profile guided optimization of beam_emu
 LM_CHECK_ENABLE_CFLAG([-fprofile-generate -Werror],[PROFILE_GENERATE])
@@ -949,9 +929,6 @@ esac
 
 AC_SUBST(LD)
 
-LDFLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
-AC_SUBST(LDFLAG_RUNTIME_LIBRARY_PATH)
-
 dnl Check for cygwin and object/exe files extension
 dnl AC_CYGWIN is deprecated
 AC_EXEEXT
@@ -1523,7 +1500,7 @@ dnl Some Linuxes needs <sys/socketio.h> instead of <sys/sockio.h>
 dnl
 AC_CHECK_HEADERS(fcntl.h limits.h unistd.h syslog.h dlfcn.h ieeefp.h \
                  sys/types.h sys/stropts.h sys/sysctl.h \
-                 sys/ioctl.h sys/time.h sys/uio.h \
+                 sys/ioctl.h sys/time.h sys/uio.h sys/mman.h \
                  sys/socket.h sys/sockio.h sys/socketio.h \
                  net/errno.h malloc.h arpa/nameser.h libdlpi.h \
 		 pty.h util.h libutil.h utmp.h langinfo.h poll.h sdkddkver.h)
@@ -2074,29 +2051,6 @@ esac
 
 AC_CHECK_DECLS([posix2time, time2posix],,,[#include <time.h>])
 
-disable_vfork=false
-if test "x$EMU_THR_LIB_NAME" != "x"; then
-	AC_MSG_CHECKING([if vfork is known to hang multithreaded applications])
-	case $host_os in
-		osf*)
-			AC_MSG_RESULT(yes)
-			disable_vfork=true;;
-		*)
-			AC_MSG_RESULT(no);;
-	esac
-fi
-
-if test $disable_vfork = false; then
-	AC_FUNC_VFORK
-	if test $ac_cv_func_vfork_works = no; then
-		disable_vfork=true
-	fi
-fi
-
-if test $disable_vfork = true; then
-	AC_DEFINE(DISABLE_VFORK, 1, [Define if you want to disable vfork.])
-fi
-
 AC_FUNC_VPRINTF
 
 dnl The AC_DEFINEs are necessary for autoheader to work. :-(
@@ -3033,165 +2987,6 @@ dnl ----------------------------------------------------------------------
 dnl Stuff that should be moved into their respective application
 dnl ----------------------------------------------------------------------
 
-dnl crypto
-#--------------------------------------------------------------------
-# Dynamic Erlang Drivers
-#
-# Linking to produce dynamic Erlang drivers to be loaded by Erlang's
-# Dynamic Driver Loader and Linker (DDLL). Below the prefix DED is an
-# abbreviation for `Dynamic Erlang Driver'.
-#
-# For DED we need something quite sloppy, which allows undefined references 
-# (notably driver functions) in the resulting shared library. 
-# Example of Makefile rule (and settings of macros):
-#
-# LIBS = @LIBS@
-# LD = @DED_LD@
-# LDFLAGS = @DED_LDFLAGS@
-# soname = @ldsoname@
-#
-# my_drv.so:   my_drv.o my_utils.o
-#              $(LD) $(LDFLAGS) $(soname) $@ -o $@ $^ -lc $(LIBS)
-#
-#--------------------------------------------------------------------
-
-DED_SYS_INCLUDE="-I${ERL_TOP}/erts/emulator/beam -I${ERL_TOP}/erts/include -I${ERL_TOP}/erts/include/$host -I${ERL_TOP}/erts/include/internal -I${ERL_TOP}/erts/include/internal/$host -I${ERL_TOP}/erts/emulator/sys/$ERLANG_OSTYPE -I${ERL_TOP}/erts/emulator/sys/common"
-
-if test "X$ETHR_DEFS" = "X"; then
-    DED_THR_DEFS="-D_THREAD_SAFE -D_REENTRANT"
-else
-    DED_THR_DEFS="$ETHR_DEFS"
-fi
-DED_EMU_THR_DEFS=$EMU_THR_DEFS
-DED_CFLAGS="$CFLAGS $CPPFLAGS"
-if test "x$GCC" = xyes; then
-    DED_STATIC_CFLAGS="$DED_CFLAGS"
-    DED_CFLAGS="$DED_CFLAGS -fPIC"
-fi
-
-DED_EXT=so
-case $host_os in
-    win32) DED_EXT=dll;;
-    darwin*)
-	DED_CFLAGS="$DED_CFLAGS -fno-common"
-	DED_STATIC_CFLAGS="$DED_STATIC_CFLAGS -fno-common";;
-    *)
-	;;
-esac
-
-DED_STATIC_CFLAGS="$DED_STATIC_CFLAGS -DSTATIC_ERLANG_NIF -DSTATIC_ERLANG_DRIVER"
-
-# If DED_LD is set in environment, we expect all DED_LD* variables
-# to be specified (cross compiling)
-if test "x$DED_LD" = "x"; then
-
-DED_LD_FLAG_RUNTIME_LIBRARY_PATH="-R"
-case $host_os in
-	win32)
-		DED_LD="ld.sh"
-		DED_LDFLAGS="-dll"
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH=
-	;;
-	solaris2*|sysv4*)
-		DED_LDFLAGS="-G"
-		if test X${enable_m64_build} = Xyes; then
-			DED_LDFLAGS="-64 $DED_LDFLAGS"
-		fi
-	;;
-	aix4*)
-		DED_LDFLAGS="-G -bnoentry -bexpall"
-	;;
-	freebsd2*)
-		# Non-ELF GNU linker
-		DED_LDFLAGS="-Bshareable"
-	;;
-	darwin*)
-		# Mach-O linker: a shared lib and a loadable
-		# object file is not the same thing.
-		DED_LDFLAGS="-bundle -bundle_loader ${ERL_TOP}/bin/$host/beam.smp"
-		case $ARCH in
-			amd64)
-				DED_LDFLAGS="-m64 $DED_LDFLAGS"
-				;;
-			*)
-				;;
-		esac
-		DED_LD="$CC"
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
-	;;
-	linux*)
-		DED_LD="$CC"
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
-		DED_LDFLAGS="-shared -Wl,-Bsymbolic"
-		if test X${enable_m64_build} = Xyes; then
-			DED_LDFLAGS="-m64 $DED_LDFLAGS"
-		fi;
-		if test X${enable_m32_build} = Xyes; then
-			DED_LDFLAGS="-m32 $DED_LDFLAGS"
-		fi
-	;;	
-	freebsd*)
-		DED_LD="$CC"
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
-		DED_LDFLAGS="-shared"
-		if test X${enable_m64_build} = Xyes; then
-			DED_LDFLAGS="-m64 $DED_LDFLAGS"
-		fi;
-		if test X${enable_m32_build} = Xyes; then
-			DED_LDFLAGS="-m32 $DED_LDFLAGS"
-		fi
-	;;	
-	openbsd*)
-		DED_LD="$CC"
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="$CFLAG_RUNTIME_LIBRARY_PATH"
-		DED_LDFLAGS="-shared"
-	;;
-	osf*)
-		# NOTE! Whitespace after -rpath is important.
-		DED_LD_FLAG_RUNTIME_LIBRARY_PATH="-rpath "
-		DED_LDFLAGS="-shared -expect_unresolved '*'"
-	;;
-	*)
-		# assume GNU linker and ELF
-		DED_LDFLAGS="-shared"
-		# GNU linker has no option for 64bit build, should not propagate -m64
-	;;
-esac
-
-if test "$DED_LD" = "" && test "$USER_LD" != ""; then
-    DED_LD="$USER_LD"
-    DED_LDFLAGS="$USER_LDFLAGS $DED_LDFLAGS"
-fi
-
-fi # "x$DED_LD" = "x"
-
-AC_CHECK_TOOL(DED_LD, ld, false)
-test "$DED_LD" != "false" || AC_MSG_ERROR([No linker found])
-
-AC_MSG_CHECKING(for compiler flags for loadable drivers)
-AC_MSG_RESULT([$DED_CFLAGS])
-AC_MSG_CHECKING(for linker for loadable drivers)
-AC_MSG_RESULT([$DED_LD])
-AC_MSG_CHECKING(for linker flags for loadable drivers)
-AC_MSG_RESULT([$DED_LDFLAGS])
-AC_MSG_CHECKING(for 'runtime library path' linker flag)
-if test "x$DED_LD_FLAG_RUNTIME_LIBRARY_PATH" != "x"; then
-	AC_MSG_RESULT([$DED_LD_FLAG_RUNTIME_LIBRARY_PATH])
-else
-	AC_MSG_RESULT([not found])
-fi
-
-AC_SUBST(DED_EXT)
-AC_SUBST(DED_SYS_INCLUDE)
-AC_SUBST(DED_CFLAGS)
-AC_SUBST(DED_STATIC_CFLAGS)
-AC_SUBST(DED_LD)
-AC_SUBST(DED_LDFLAGS)
-AC_SUBST(DED_LD_FLAG_RUNTIME_LIBRARY_PATH)
-AC_SUBST(DED_THR_DEFS)
-AC_SUBST(DED_EMU_THR_DEFS)
-AC_SUBST(STATIC_CFLAGS)
-
 dnl
 dnl We should look for a compiler that handles jump tables, for beam_emu 
 dnl to be optimized
@@ -3348,733 +3143,6 @@ if test "$enable_lttng_test" = "yes" ; then
 fi
 
 
-dnl
-dnl SSL, SSH and CRYPTO need the OpenSSL libraries
-dnl
-dnl Check flags --with-ssl, --without-ssl --with-ssl=PATH.
-dnl If no option is given or --with-ssl is set without a path then we
-dnl search for OpenSSL libraries and header files in the standard locations. 
-dnl If set to --without-ssl we disable the use of SSL, SSH and CRYPTO.
-dnl If set to --with-ssl=PATH we use that path as the prefix, i.e. we
-dnl use "PATH/include" and "PATH/lib".
-
-AC_SUBST(SSL_INCLUDE)
-AC_SUBST(SSL_INCDIR)
-AC_SUBST(SSL_LIBDIR)
-AC_SUBST(SSL_FLAGS)
-AC_SUBST(SSL_CRYPTO_LIBNAME)
-AC_SUBST(SSL_SSL_LIBNAME)
-AC_SUBST(SSL_CC_RUNTIME_LIBRARY_PATH)
-AC_SUBST(SSL_LD_RUNTIME_LIBRARY_PATH)
-AC_SUBST(SSL_DED_LD_RUNTIME_LIBRARY_PATH)
-AC_SUBST(SSL_DYNAMIC_ONLY)
-AC_SUBST(SSL_LINK_WITH_KERBEROS)
-AC_SUBST(STATIC_KERBEROS_LIBS)
-AC_SUBST(SSL_LINK_WITH_ZLIB)
-AC_SUBST(STATIC_ZLIB_LIBS)
-
-std_ssl_locations="/usr/local /usr/sfw /usr /opt/local /usr/pkg /usr/local/openssl /usr/lib/openssl /usr/openssl /usr/local/ssl /usr/lib/ssl /usr/ssl /"
-
-AC_ARG_WITH(ssl-zlib,
-AS_HELP_STRING([--with-ssl-zlib=PATH],
-               [specify location of ZLib to be used by OpenSSL])
-AS_HELP_STRING([--with-ssl-zlib],
-               [link SSL with  Zlib (default if found)])
-AS_HELP_STRING([--without-ssl-zlib],
-               [don't link SSL with ZLib]))
-
-
-if  test "x$with_ssl_zlib" = "xno"; then
-	SSL_LINK_WITH_ZLIB=no
-	STATIC_ZLIB_LIBS=
-elif test "x$with_ssl_zlib" = "xyes" || test "x$with_ssl_zlib" = "x"; then
-	if test $erl_xcomp_without_sysroot = yes; then
-		AC_MSG_WARN([Cannot search for zlib; missing cross system root (erl_xcomp_sysroot).])
-		SSL_LINK_WITH_ZLIB=no	
-		STATIC_ZLIB_LIBS=	
-	elif  test "x$MIXED_CYGWIN" = "xyes" -o "x$MIXED_MSYS" = "xyes"; then
-		SSL_LINK_WITH_ZLIB=no	
-		STATIC_ZLIB_LIBS=	
-	else
-		SSL_LINK_WITH_ZLIB=no
-		STATIC_ZLIB_LIBS=
-		AC_MSG_CHECKING(for static ZLib to be used by SSL in standard locations) 
-		for rdir in $std_ssl_locations; do
-			dir="$erl_xcomp_sysroot$rdir"
-			if test "x$ac_cv_sizeof_void_p" = "x8"; then
-				if test -f "$dir/lib64/libz.a"; then
-					SSL_LINK_WITH_ZLIB=yes
-					STATIC_ZLIB_LIBS="$dir/lib64/libz.a"
-					break
-				elif test -f "$dir/lib/64/libz.a"; then
-					SSL_LINK_WITH_ZLIB=yes
-					STATIC_ZLIB_LIBS="$dir/lib/64/libz.a"
-					break
-				fi
-			fi
-			if test -f "$dir/lib/libz.a"; then
-					SSL_LINK_WITH_ZLIB=yes
-				STATIC_ZLIB_LIBS="$dir/lib/libz.a"
-				break
-			fi
-		done
-		if test "x$SSL_LINK_WITH_ZLIB" = "xno"; then
-		       	AC_MSG_RESULT([no])
-		else
-			AC_MSG_RESULT([$STATIC_ZLIB_LIBS])
-		fi
-	fi				
-else
-	SSL_LINK_WITH_ZLIB=no
-	STATIC_ZLIB_LIBS=
-	if test -f "$with_ssl_zlib/libz.a"; then
-		SSL_LINK_WITH_ZLIB=yes
-		STATIC_ZLIB_LIBS=$with_ssl_zlib/libz.a
-	elif test -f "$with_ssl_zlib/lib/libz.a"; then
-		SSL_LINK_WITH_ZLIB=yes
-		STATIC_ZLIB_LIBS=$with_ssl_zlib/lib/libz.a
-	fi
-	if test "x$ac_cv_sizeof_void_p" = "x8"; then
-		if test -f "$with_ssl_zlib/lib64/libz.a"; then
-			SSL_LINK_WITH_ZLIB=yes
-			STATIC_ZLIB_LIBS=$with_ssl_zlib/lib64/libz.a
-		elif test -f "$with_ssl_zlib/lib/64/libz.a"; then
-			SSL_LINK_WITH_ZLIB=yes
-			STATIC_ZLIB_LIBS=$with_ssl_zlib/lib/64/libz.a
-		fi
-	fi
-	if test "x$SSL_LINK_WITH_ZLIB" = "xno"; then
-	       	AC_MSG_ERROR(Invalid path to option --with-ssl-zlib=PATH)
-	fi
-fi
-		
-			
-AC_ARG_WITH(ssl,
-AS_HELP_STRING([--with-ssl=PATH], [specify location of OpenSSL include and lib])
-AS_HELP_STRING([--with-ssl], [use SSL (default)])
-AS_HELP_STRING([--without-ssl], [don't use SSL]))
-
-AC_ARG_WITH(ssl-incl,
-AS_HELP_STRING([--with-ssl-incl=PATH], [location of OpenSSL include dir, if different than specified by --with-ssl=PATH]),
-[
-case X$with_ssl in
-    X | Xyes | Xno) AC_MSG_ERROR([--with-ssl-incl=PATH set without --with-ssl=PATH]);;
-esac
-],
-[with_ssl_incl=$with_ssl]) #default
-
-AC_ARG_WITH(ssl-rpath,
-AS_HELP_STRING([--with-ssl-rpath=yes|no|PATHS],
-               [runtime library path for OpenSSL. Default is "yes", which equates to a
-	       number of standard locations. If "no", then no runtime
-	       library paths will be used. Anything else should be a
-	       comma separated list of paths.]),
-[
-case X$with_ssl in
-    Xno) AC_MSG_ERROR([--with-ssl-rpath set without --with-ssl]);;
-esac
-],
-[with_ssl_rpath=yes]) #default
-
-
-AC_ARG_ENABLE(dynamic-ssl-lib,
-AS_HELP_STRING([--disable-dynamic-ssl-lib],
-               [disable using dynamic openssl libraries]),
-[ case "$enableval" in
-    no) enable_dynamic_ssl=no ;;
-    *)  enable_dynamic_ssl=yes ;;
-  esac ], enable_dynamic_ssl=yes)
-
-#----------------------------------------------------------------------
-# We actually might do the SSL tests twice due to late discovery of 
-# kerberos problems with static linking, in case we redo it all trying
-# dynamic SSL libraries instead.
-#----------------------------------------------------------------------
-
-ssl_done=no
-
-while test "x$ssl_done" != "xyes"; do
-
-ssl_done=yes # Default only one run
-
-# Remove all SKIP files from previous runs
-for a in ssl crypto ssh; do
-  $RM -f $ERL_TOP/lib/$a/SKIP
-done
-
-SSL_DYNAMIC_ONLY=$enable_dynamic_ssl
-SSL_STATIC_ONLY=no
-
-case "$erl_xcomp_without_sysroot-$with_ssl" in
-  yes-* | no-no)
-    SSL_APP=
-    CRYPTO_APP=
-    SSH_APP=
-    if test "$with_ssl" = "no"; then
-	skip="User gave --without-ssl option"
-    else
-	skip="Cannot search for ssl; missing cross system root (erl_xcomp_sysroot)."
-    fi
-    for a in ssl crypto ssh; do
-        echo "$skip" > $ERL_TOP/lib/$a/SKIP
-    done
-    ;;
-  no-yes | no- )
-    # On windows, we could try to find the installation
-    # of Shining Light OpenSSL, which can be found by poking in
-    # the uninstall section in the registry, it's worth a try...
-    extra_dir=""
-    if  test "x$MIXED_CYGWIN" = "xyes"; then
-    	AC_CHECK_PROG(REGTOOL, regtool, regtool, false)
-	if test "$ac_cv_prog_REGTOOL" != false; then
-		wrp="/machine/software/microsoft/windows/currentversion/"
-	   	if test "x$ARCH" = "xamd64"; then
-		   urp="uninstall/openssl (64-bit)_is1/inno setup: app path"
-		   regtool_subsystem=-w
-		else
-		   urp="uninstall/openssl (32-bit)_is1/inno setup: app path"
-		   regtool_subsystem=-W
-		fi	
-		rp="$wrp$urp"
-		if regtool -q $regtool_subsystem get "$rp" > /dev/null; then
-		   true
-		else
-		   # Fallback to unspecified wordlength
-		   urp="uninstall/openssl_is1/inno setup: app path"
-		   rp="$wrp$urp"
-		fi	
-		if regtool -q $regtool_subsystem get "$rp" > /dev/null; then
-			ssl_install_dir=`regtool -q $regtool_subsystem get "$rp"`
-			# Try hard to get rid of spaces...
-			if cygpath -d "$ssl_install_dir" > /dev/null 2>&1; then
-				ssl_install_dir=`cygpath -d "$ssl_install_dir"`
-			fi
-			extra_dir=`cygpath $ssl_install_dir`
-		fi
-	fi
-    elif test "x$MIXED_MSYS" = "xyes"; then
-    	AC_CHECK_PROG(REGTOOL, reg_query.sh, reg_query.sh, false)
-	if test "$ac_cv_prog_REGTOOL" != false; then
-	   	if test "x$ARCH" = "xamd64"; then
-		   rp="HKLM/SOFTWARE/Microsoft/Windows/CurrentVersion/Uninstall/OpenSSL (64-bit)_is1"
-		else
-		   rp="HKLM/SOFTWARE/Microsoft/Windows/CurrentVersion/Uninstall/OpenSSL_is1"
-		fi	
-		key="Inno Setup: App Path"
-		if "$ac_cv_prog_REGTOOL" "$rp" "$key" > /dev/null; then
-			ssl_install_dir=`"$ac_cv_prog_REGTOOL" "$rp" "$key"`
-			extra_dir=`win2msys_path.sh "$ssl_install_dir"`
-		fi
-	fi
-    fi
-    # We search for OpenSSL in the common OS standard locations.
-    SSL_APP=ssl
-    CRYPTO_APP=crypto
-    SSH_APP=ssh
-
-    SSL_CRYPTO_LIBNAME=crypto
-    SSL_SSL_LIBNAME=ssl
-    
-    if  test "x$MIXED_CYGWIN" = "xyes" -o "x$MIXED_MSYS" = "xyes"; then
-    	if test "x$ARCH" = "xamd64"; then
-	  std_win_ssl_locations="/cygdrive/c/OpenSSL-Win64 /c/OpenSSL-Win64 /opt/local64/pgm/OpenSSL"
-	else
-	  std_win_ssl_locations="/cygdrive/c/OpenSSL-Win32 /c/OpenSSL-Win32 /cygdrive/c/OpenSSL /c/OpenSSL /opt/local/pgm/OpenSSL"
-	fi
-    else
-        std_win_ssl_locations=""
-    fi
-
-
-    AC_MSG_CHECKING(for OpenSSL >= 0.9.8c in standard locations)
-    for rdir in $extra_dir $std_win_ssl_locations $std_ssl_locations; do
-	dir="$erl_xcomp_sysroot$rdir"
-	if test -f "$erl_xcomp_isysroot$rdir/include/openssl/opensslv.h"; then
-		is_real_ssl=yes
-		SSL_INCDIR="$dir"
-		if test "x$MIXED_CYGWIN" = "xyes" -o "x$MIXED_MSYS" = "xyes"; then
-			if test -f "$dir/lib/VC/libeay32.lib"; then
-				SSL_RUNTIME_LIBDIR="$rdir/lib/VC"
-				SSL_LIBDIR="$dir/lib/VC"
-				SSL_CRYPTO_LIBNAME=libeay32
-				SSL_SSL_LIBNAME=ssleay32
-			elif test -f "$dir/lib/VC/openssl.lib"; then 
-				SSL_RUNTIME_LIBDIR="$rdir/lib/VC"
-				SSL_LIBDIR="$dir/lib/VC"
-			elif test -f $dir/lib/VC/libeay32MD.lib; then
-		            SSL_CRYPTO_LIBNAME=libeay32MD
-		            SSL_SSL_LIBNAME=ssleay32MD
-			    if test "x$enable_dynamic_ssl" = "xno" && \
-	                       test -f  $dir/lib/VC/static/libeay32MD.lib; then
-				  SSL_RUNTIME_LIBDIR="$rdir/lib/VC/static"
-				  SSL_LIBDIR="$dir/lib/VC/static"
-		            else
-			          SSL_RUNTIME_LIBDIR="$rdir/lib/VC"
-				  SSL_LIBDIR="$dir/lib/VC"
-                            fi 
-			elif test -f "$dir/lib/libeay32.lib"; then
-				SSL_RUNTIME_LIBDIR="$rdir/lib"
-				SSL_LIBDIR="$dir/lib"
-				SSL_CRYPTO_LIBNAME=libeay32
-				SSL_SSL_LIBNAME=ssleay32
-			elif test -f "$dir/lib/openssl.lib"; then
-				SSL_RUNTIME_LIBDIR="$rdir/lib"
-				SSL_LIBDIR="$dir/lib"
-			else
-				is_real_ssl=no
-			fi
-		elif test -f "$dir/lib/powerpc/libsslcrypto.a"; then
-			SSL_CRYPTO_LIBNAME=sslcrypto
-			SSL_LIBDIR="$dir/lib/powerpc/"
-			SSL_RUNTIME_LIBDIR="$rdir/lib/powerpc/"
-		else
-			if test "x$ac_cv_sizeof_void_p" = "x8"; then
-				if test -f "$dir/lib64/libcrypto.a"; then
-					SSL_RUNTIME_LIBDIR="$rdir/lib64"
-					SSL_LIBDIR="$dir/lib64"
-				elif test -f "$dir/lib/64/libcrypto.a"; then
-					SSL_RUNTIME_LIBDIR="$rdir/lib/64"
-					SSL_LIBDIR="$dir/lib/64"
-				elif test -f "$dir/lib64/libcrypto.so"; then
-					SSL_RUNTIME_LIBDIR="$rdir/lib64"
-					SSL_LIBDIR="$dir/lib64"
-				elif test -f "$dir/lib/64/libcrypto.so"; then
-					SSL_RUNTIME_LIBDIR="$rdir/lib/64"
-					SSL_LIBDIR="$dir/lib/64"
-				else
-					SSL_RUNTIME_LIBDIR="$rdir/lib"
-					SSL_LIBDIR="$dir/lib"
-				fi
-			else	 
-				SSL_RUNTIME_LIBDIR="$rdir/lib"
-				SSL_LIBDIR="$dir/lib"
-			fi
-		fi
-		if test '!' -f "$SSL_LIBDIR/lib${SSL_CRYPTO_LIBNAME}.a"; then
-			SSL_DYNAMIC_ONLY=yes
-		elif test '!' -f "$SSL_LIBDIR/lib${SSL_CRYPTO_LIBNAME}.so" -a '!' -f "$SSL_LIBDIR/lib${SSL_CRYPTO_LIBNAME}.dylib"; then
-			SSL_STATIC_ONLY=yes
-		fi
-		SSL_BINDIR="$rdir/bin"
-		if test "x$is_real_ssl" = "xyes" ; then
-			SSL_INCLUDE="-I$dir/include"
-			old_CPPFLAGS=$CPPFLAGS
-			CPPFLAGS=$SSL_INCLUDE
-			AC_EGREP_CPP(^yes$,[
-#include <openssl/opensslv.h>
-#if OPENSSL_VERSION_NUMBER >= 0x0090803fL
-yes
-#endif
-			],[
-			ssl_found=yes
-			],[
-			SSL_APP=
-			ssl_found=no
-         		])
-         		CPPFLAGS=$old_CPPFLAGS
-			if test "x$ssl_found" = "xyes"; then
-			   	if test "x$MIXED_CYGWIN" = "xyes" -o "x$MIXED_MSYS" = "xyes"; then
-				   	ssl_linkable=yes
-				elif test "x${SSL_CRYPTO_LIBNAME}" = "xsslcrypto"; then
-				# This should only be triggered seen OSE
-					ssl_linkable=yes
-				else
-					saveCFLAGS="$CFLAGS"
- 					saveLDFLAGS="$LDFLAGS"
-					saveLIBS="$LIBS"
- 					CFLAGS="$CFLAGS $SSL_INCLUDE"
-					if test "x$SSL_STATIC_ONLY" = "xyes"; then
-						LIBS="${SSL_LIBDIR}/lib${SSL_CRYPTO_LIBNAME}.a"
-					else
-						LDFLAGS="$LDFLAGS -L$SSL_LIBDIR"
-						LIBS="$LIBS -l${SSL_CRYPTO_LIBNAME}"
-					fi
- 					AC_TRY_LINK([
-					#include <stdio.h>
- 					#include <openssl/hmac.h>],
- 					[ 
-					HMAC(0, 0, 0, 0, 0, 0, 0);
- 					],
- 					[ssl_linkable=yes],
- 					[ssl_linkable=no])
- 					CFLAGS="$saveCFLAGS"
- 					LDFLAGS="$saveLDFLAGS"
-					LIBS="$saveLIBS"
-				fi
- 			fi
-	 		if test "x$ssl_found" = "xyes" && test "x$ssl_linkable" = "xyes"; then
-			 	AC_MSG_RESULT([$dir])
-	   			break;
-         		fi
-		fi	
-       fi
-    done
-
-    if test "x$ssl_found" != "xyes" ; then
-	dnl 
-	dnl If no SSL found above, check whether we are running on OpenBSD.
-	dnl
-	case $host_os in
-	openbsd*)
-	       if test -f "$erl_xcomp_isysroot/usr/include/openssl/opensslv.h"; then
-		  # Trust OpenBSD to have everything the in the correct locations.
-		  ssl_found=yes
-		  ssl_linkable=yes
-		  SSL_INCDIR="$erl_xcomp_sysroot/usr"
-	          AC_MSG_RESULT([$SSL_INCDIR])
-		  SSL_RUNTIME_LIB="/usr/lib"
-		  SSL_LIB="$erl_xcomp_sysroot/usr/lib"
-		  SSL_BINDIR="/usr/sbin"
-		  dnl OpenBSD requires us to link with -L and -l
-		  SSL_DYNAMIC_ONLY="yes" 
-		fi
-		;;
-	esac
-    fi
-dnl		Now, certain linuxes have a 64bit libcrypto
-dnl		that cannot build shared libraries (i.e. not PIC)
-dnl		One could argue that this is wrong, but
-dnl		so it is - be adoptable
-    if test "$ssl_found" = "yes" && test "$ssl_linkable" = "yes" && test "$SSL_DYNAMIC_ONLY" != "yes"; then
-	case $host_os in
-		linux*)
-			saveCFLAGS="$CFLAGS"
- 			saveLDFLAGS="$LDFLAGS"
-			saveLIBS="$LIBS"
- 			CFLAGS="$DED_CFLAGS $SSL_INCLUDE"
- 			LDFLAGS="$DED_LDFLAGS"
-			LIBS="$SSL_LIBDIR/libcrypto.a $STATIC_ZLIB_LIBS"
- 			AC_TRY_LINK([
-			#include <stdio.h>
- 			#include <openssl/hmac.h>],
- 			[ 
-			HMAC(0, 0, 0, 0, 0, 0, 0);
- 			],
- 			[ssl_dyn_linkable=yes],
- 			[ssl_dyn_linkable=no])
- 			CFLAGS="$saveCFLAGS"
- 			LDFLAGS="$saveLDFLAGS"
-			LIBS="$saveLIBS"
-			if test "x$ssl_dyn_linkable" != "xyes"; then
-				SSL_DYNAMIC_ONLY=yes
-				AC_MSG_WARN([SSL will be linked against dynamic lib as static lib is not purely relocatable])
-			fi
-			;;
-	esac
-    fi					
-	
-				
-
-
-    if test "x$ssl_found" != "xyes" || test  "x$ssl_linkable" != "xyes"; then
-       if test "x$ssl_found" = "xyes"; then 
-       		AC_MSG_RESULT([found; but not usable])
-       else
-       		AC_MSG_RESULT([no])
-       fi
-       SSL_APP=
-       CRYPTO_APP=
-       SSH_APP=
-       AC_MSG_WARN([No (usable) OpenSSL found, skipping ssl, ssh and crypto applications])
-
-       for a in ssl crypto ssh; do
-           echo "No usable OpenSSL found" > $ERL_TOP/lib/$a/SKIP
-       done
-    fi
-    ;;
-  *)
-    # Option given with PATH to package
-    if test ! -d "$with_ssl" ; then
-       AC_MSG_ERROR(Invalid path to option --with-ssl=PATH)
-    fi
-    if test ! -d "$with_ssl_incl" ; then
-       AC_MSG_ERROR(Invalid path to option --with-ssl-incl=PATH)
-    fi
-    SSL_INCDIR="$with_ssl_incl"
-    SSL_CRYPTO_LIBNAME=crypto
-    SSL_SSL_LIBNAME=ssl
-    if test "x$MIXED_CYGWIN" = "xyes" -o "x$MIXED_MSYS" = "xyes" && test -d "$with_ssl/lib/VC"; then
-	if test -f "$with_ssl/lib/VC/libeay32.lib"; then
-	    SSL_LIBDIR="$with_ssl/lib/VC"
-	    SSL_CRYPTO_LIBNAME=libeay32
-	    SSL_SSL_LIBNAME=ssleay32
-	elif test -f "$with_ssl/lib/VC/openssl.lib"; then 
-	    SSL_LIBDIR="$with_ssl/lib/VC"
-	elif test -f $with_ssl/lib/VC/libeay32MD.lib; then
-	    SSL_CRYPTO_LIBNAME=libeay32MD
-            SSL_SSL_LIBNAME=ssleay32MD
-	    if test "x$enable_dynamic_ssl" = "xno" && \
-	       test -f  $with_ssl/lib/VC/static/libeay32MD.lib; then
-		SSL_LIBDIR="$with_ssl/lib/VC/static"
-	    else
-		SSL_LIBDIR="$with_ssl/lib/VC"
-            fi 
-	elif test -f "$with_ssl/lib/libeay32.lib"; then
-	    SSL_LIBDIR="$with_ssl/lib"
-	    SSL_CRYPTO_LIBNAME=libeay32
-	    SSL_SSL_LIBNAME=ssleay32
-	else
-	    # This probably wont work, but that's what the user said, so...
-	    SSL_LIBDIR="$with_ssl/lib"
-	fi
-    elif test -f "$dir/lib/powerpc/libsslcrypto.a"; then
-	    SSL_CRYPTO_LIBNAME=sslcrypto
-	    SSL_LIBDIR="$with_ssl/lib/powerpc/"
-    elif test "x$ac_cv_sizeof_void_p" = "x8"; then
-	if test -f "$with_ssl/lib64/libcrypto.a"; then
-		SSL_LIBDIR="$with_ssl/lib64"
-	elif test -f "$with_ssl/lib/64/libcrypto.a"; then
-		SSL_LIBDIR="$with_ssl/lib/64"
-	elif test -f "$with_ssl/lib64/libcrypto.so"; then
-		SSL_LIBDIR="$with_ssl/lib64"
-	elif test -f "$with_ssl/lib/64/libcrypto.so"; then
-		SSL_LIBDIR="$with_ssl/lib/64"
-	else
-		SSL_LIBDIR="$with_ssl/lib"
-	fi
-    else	 
-	SSL_LIBDIR="$with_ssl/lib"
-    fi
-    if test '!' -f "${SSL_LIBDIR}/lib${SSL_CRYPTO_LIBNAME}.a"; then
-	SSL_DYNAMIC_ONLY=yes
-    elif test '!' -f ${SSL_LIBDIR}/lib${SSL_CRYPTO_LIBNAME}.so -a '!' -f "$SSL_LIBDIR/lib${SSL_CRYPTO_LIBNAME}.dylib"; then
-	SSL_STATIC_ONLY=yes
-    fi
-    SSL_INCLUDE="-I$with_ssl_incl/include"
-    SSL_APP=ssl
-    CRYPTO_APP=crypto
-    SSH_APP=ssh
-    if test "$cross_compiling" = "yes"; then
-	SSL_RUNTIME_LIBDIR=`echo "$SSL_LIBDIR" | sed -n "s|^$erl_xcomp_sysroot\(/*\)\(.*\)\$|/\2|p"`
-    else
-	SSL_RUNTIME_LIBDIR="$SSL_LIBDIR"
-    fi
-esac
-
-if test "x$SSL_APP" != "x" ; then
-    dnl We found openssl, now check if we use kerberos 5 support
-    dnl FIXME: Do we still support platforms that have Kerberos?
-    AC_MSG_CHECKING(for OpenSSL kerberos 5 support)
-    old_CPPFLAGS=$CPPFLAGS
-    CPPFLAGS=$SSL_INCLUDE
-    AC_EGREP_CPP(^yes$,[
-#include <openssl/opensslv.h>
-#include <openssl/opensslconf.h>
-#if OPENSSL_VERSION_NUMBER < 0x1010000fL && !defined(OPENSSL_NO_KRB5)
-yes
-#endif
-      ],[
-      AC_MSG_RESULT([yes])
-      ssl_krb5_enabled=yes
-      if test "x$SSL_DYNAMIC_ONLY" != "xyes"; then
-          if test -f "$SSL_LIBDIR/libkrb5.a"; then
-              SSL_LINK_WITH_KERBEROS=yes
-	      STATIC_KERBEROS_LIBS="$SSL_LIBDIR/libkrb5.a"
-	      if test -f "$SSL_LIBDIR/libkrb5support.a"; then
-		  STATIC_KERBEROS_LIBS="$STATIC_KERBEROS_LIBS $SSL_LIBDIR/libkrb5support.a"
-	      fi
-	      if test -f "$SSL_LIBDIR/libk5crypto.a"; then
-		  STATIC_KERBEROS_LIBS="$STATIC_KERBEROS_LIBS $SSL_LIBDIR/libk5crypto.a"
-	      fi
-	      if test -f "$SSL_LIBDIR/libresolv.a"; then
-		  STATIC_KERBEROS_LIBS="$STATIC_KERBEROS_LIBS $SSL_LIBDIR/libresolv.a"
-	      fi
-	      if test -f "$SSL_LIBDIR/libcom_err.a"; then
-		  STATIC_KERBEROS_LIBS="$STATIC_KERBEROS_LIBS $SSL_LIBDIR/libcom_err.a"
-	      fi
-          else
-	      AC_MSG_WARN([Kerberos needed but no kerberos static libraries found])
-	      AC_MSG_WARN([Rescanning for dynamic SSL libraries])
-	      enable_dynamic_ssl=yes
-	      ssl_done=no
-	      SSL_LINK_WITH_KERBEROS=no
-	      STATIC_KERBEROS_LIBS=""
-	      ssl_krb5_enabled=no
-	      SSL_WITH_KERBEROS=no
-          fi
-      else 	
-	      SSL_LINK_WITH_KERBEROS=no
-	      STATIC_KERBEROS_LIBS=""
-      fi
-      ],[
-      AC_MSG_RESULT([no])
-      ssl_krb5_enabled=no
-      SSL_WITH_KERBEROS=no
-      ])
-    CPPFLAGS=$old_CPPFLAGS
-    SSL_KRB5_INCLUDE=
-    if test "x$ssl_krb5_enabled" = "xyes" ; then
-        AC_MSG_CHECKING(for krb5.h in standard locations)
-	for dir in $extra_dir "$SSL_INCDIR/include" "$SSL_INCDIR/include/openssl" \
-		"$SSL_INCDIR/include/kerberos" \
-		"$erl_xcomp_isysroot/cygdrive/c/kerberos/include" \
-		"$erl_xcomp_isysroot/usr/local/kerberos/include" \
-		"$erl_xcomp_isysroot/usr/kerberos/include" \
-		"$erl_xcomp_isysroot/usr/include"
-	do
-            if test -f "$dir/krb5.h" ; then
-                SSL_KRB5_INCLUDE="$dir"
-		break
-            fi
-        done
-        if test "x$SSL_KRB5_INCLUDE" = "x" ; then
-	    AC_MSG_RESULT([not found])
-	    SSL_APP=
-	    CRYPTO_APP=
-	    SSH_APP=
-	    AC_MSG_WARN([OpenSSL is configured for kerberos but no krb5.h found])
-	    for a in ssl crypto ssh ; do
-	 	echo "OpenSSL is configured for kerberos but no krb5.h found" > $ERL_TOP/lib/$a/SKIP
-	    done
-        else
-            AC_MSG_RESULT([found in $SSL_KRB5_INCLUDE])
-            SSL_INCLUDE="$SSL_INCLUDE -I$SSL_KRB5_INCLUDE"
-        fi
-    fi
-fi
-
-done # while test ssl_done != yes
-
-SSL_CC_RUNTIME_LIBRARY_PATH=
-SSL_LD_RUNTIME_LIBRARY_PATH=
-SSL_DED_LD_RUNTIME_LIBRARY_PATH=
-cc_rflg="$CFLAG_RUNTIME_LIBRARY_PATH"
-ld_rflg="$LDFLAG_RUNTIME_LIBRARY_PATH"
-ded_ld_rflg="$DED_LD_FLAG_RUNTIME_LIBRARY_PATH"
-
-
-case "$with_ssl_rpath" in
-
-yes)   # Use standard lib locations for ssl runtime library path
-
-  if test "$SSL_APP" != "" && test "$SSL_DYNAMIC_ONLY" = "yes" && \
-   { test "$cc_rflg" != "" || test "$ld_rflg" != "" || test "$ded_ld_rflg" != ""; } ; then
-
-    AC_MSG_CHECKING(for ssl runtime library path to use)
-
-    libdirs="/lib"
-
-    if test "$ac_cv_sizeof_void_p" = "8"; then
-	dir_lib64=no
-	dir_lib_64=no
-
-	case "$SSL_RUNTIME_LIBDIR" in
-	    */lib/64 | */lib/64/ ) dir_lib_64=yes;;
-	    */lib64 | */lib64/ ) dir_lib64=yes;;
-	    *) ;;
-	esac
-
-	for dir in $std_ssl_locations; do
-	    test $dir_lib_64 = no &&
-	    	test -d "$erl_xcomp_sysroot$dir/lib/64" &&
-		    dir_lib_64=yes
-	    test $dir_lib64 = no &&
-		test -d "$erl_xcomp_sysroot$dir/lib64" &&
-		    dir_lib64=yes
-	done
-
-	test $dir_lib_64 = yes && libdirs="/lib/64 $libdirs"
-	test $dir_lib64 = yes && libdirs="/lib64 $libdirs"
-    fi
-
-    for type in std x_std curr; do
-
-        cc_rpath="$cc_rflg$SSL_RUNTIME_LIBDIR"
-        ld_rpath="$ld_rflg$SSL_RUNTIME_LIBDIR"
-        ded_ld_rpath="$ded_ld_rflg$SSL_RUNTIME_LIBDIR"
-	rpath="$SSL_RUNTIME_LIBDIR"
-
-	if test $type != curr; then
-	    for ldir in $libdirs; do
-		for dir in $std_ssl_locations; do
-		    test "$SSL_LIBDIR" != "$dir$ldir" || continue
-		    test $type != x_std || test -d "$dir$ldir" || continue
-		    test "$cc_rflg" = "" ||
-			cc_rpath="$cc_rpath $cc_rflg$dir$ldir"
-		    test "$ld_rflg" = "" ||
-			ld_rpath="$ld_rpath $ld_rflg$dir$ldir"
-		    test "$ded_ld_rflg" = "" ||
-			ded_ld_rpath="$ded_ld_rpath $ded_ld_rflg$dir$ldir"
-		    rpath="$rpath:$dir$ldir"
-                done
-	    done
-	fi
-
-	saveCFLAGS="$CFLAGS"
-	saveLDFLAGS="$LDFLAGS"
-	saveLIBS="$LIBS"
-	CFLAGS="$CFLAGS $SSL_INCLUDE"
-	LDFLAGS="$LDFLAGS $ld_rpath -L$SSL_LIBDIR"
-	LIBS="-lcrypto"
-	AC_TRY_LINK([
-                         #include <stdio.h>
-                         #include <openssl/hmac.h>
-                    ],
-                    [ 
-			 HMAC(0, 0, 0, 0, 0, 0, 0);
-                    ],
-                    [rpath_success=yes],
-                    [rpath_success=no])
-	CFLAGS="$saveCFLAGS"
-	LDFLAGS="$saveLDFLAGS"
-	LIBS="$saveLIBS"
-
-	test "$rpath_success" = "yes" && break
-    done
-
-    test "$rpath_success" = "yes" || { cc_rpath=; ld_rpath=; ded_ld_rpath=; rpath=; }
-
-    SSL_CC_RUNTIME_LIBRARY_PATH="$cc_rpath"
-    SSL_LD_RUNTIME_LIBRARY_PATH="$ld_rpath"
-    SSL_DED_LD_RUNTIME_LIBRARY_PATH="$ded_ld_rpath"
-
-    AC_MSG_RESULT([$rpath])
-    test "$rpath" != "" || AC_MSG_WARN([Cannot set run path during linking])
-  fi
-  ;;
-
-no)   # Use no ssl runtime library path
-  SSL_DED_LD_RUNTIME_LIBRARY_PATH=
-  ;;
-
-*)    # Use ssl runtime library paths set by --with-ssl-rpath (without any check)
-  ded_ld_rpath=
-  delimit=
-  for dir in `echo $with_ssl_rpath | sed "s/,/ /g"`; do
-      ded_ld_rpath="$ded_ld_rpath$delimit$ded_ld_rflg$dir"
-      delimit=" "
-  done
-  SSL_DED_LD_RUNTIME_LIBRARY_PATH="$ded_ld_rpath"
-  ;;
-
-esac
-
-
-AC_ARG_ENABLE(fips,
-AS_HELP_STRING([--enable-fips], [enable OpenSSL FIPS mode support])
-AS_HELP_STRING([--disable-fips], [disable OpenSSL FIPS mode support (default)]),
-[ case "$enableval" in
-    yes) enable_fips_support=yes ;;
-    *)   enable_fips_support=no ;;
-  esac ], enable_fips_support=no)
-
-if test "x$enable_fips_support" = "xyes" && test "$CRYPTO_APP" != ""; then
-   saveCFLAGS="$CFLAGS"
-   saveLDFLAGS="$LDFLAGS"
-   saveLIBS="$LIBS"
-   CFLAGS="$CFLAGS $SSL_INCLUDE"
-   LDFLAGS="$LDFLAGS $SSL_LD_RUNTIME_LIBRARY_PATH -L$SSL_LIBDIR"
-   LIBS="-lcrypto"
-   AC_CHECK_FUNC([FIPS_mode_set],
-   [SSL_FLAGS="-DFIPS_SUPPORT"],
-   [SSL_FLAGS=])
-   CFLAGS="$saveCFLAGS"
-   LDFLAGS="$saveLDFLAGS"
-   LIBS="$saveLIBS"
-else
-   SSL_FLAGS=
-fi
-
 #--------------------------------------------------------------------
 # Os mon stuff.
 #--------------------------------------------------------------------
@@ -4215,6 +3283,8 @@ AC_DEFINE_UNQUOTED(ERTS_EMU_CMDLINE_FLAGS,
 "$STATIC_CFLAGS $CFLAGS $DEBUG_CFLAGS $EMU_THR_DEFS $DEFS $WERRORFLAGS $WFLAGS",
 [The only reason ERTS_EMU_CMDLINE_FLAGS exists is to force modification of config.h when the emulator command line flags are modified by configure])
 
+AC_SUBST(STATIC_CFLAGS)
+
 dnl ----------------------------------------------------------------------
 dnl Directories needed for the build
 dnl ----------------------------------------------------------------------
@@ -4317,7 +3387,6 @@ AC_CONFIG_FILES([
   include/internal/$host/erts_internal.mk:include/internal/erts_internal.mk.in
   lib_src/$host/Makefile:lib_src/Makefile.in
   ../make/$host/otp.mk:../make/otp.mk.in
-  ../make/$host/otp_ded.mk:../make/otp_ded.mk.in
 ])
 
 AC_CONFIG_FILES([../make/make_emakefile:../make/make_emakefile.in],
@@ -4329,7 +3398,6 @@ dnl
 dnl  ../lib/ssl/c_src/$host/Makefile:../lib/ssl/c_src/Makefile.in
 AC_CONFIG_FILES([
   ../lib/os_mon/c_src/$host/Makefile:../lib/os_mon/c_src/Makefile.in
-  ../lib/crypto/c_src/$host/Makefile:../lib/crypto/c_src/Makefile.in
   ../lib/runtime_tools/c_src/$host/Makefile:../lib/runtime_tools/c_src/Makefile.in
   ../lib/tools/c_src/$host/Makefile:../lib/tools/c_src/Makefile.in
   ])
diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml
index 5cbeddabd9..3fe6e00d57 100644
--- a/erts/doc/src/erl_nif.xml
+++ b/erts/doc/src/erl_nif.xml
@@ -3056,7 +3056,8 @@ enif_map_iterator_destroy(env, &amp;iter);</code>
 	<p>Argument <c>mode</c> describes the type of events to wait for. It can be
 	  <c>ERL_NIF_SELECT_READ</c>, <c>ERL_NIF_SELECT_WRITE</c> or a bitwise
 	  OR combination to wait for both. It can also be <c>ERL_NIF_SELECT_STOP</c>
-	  which is described further below. When a read or write event is triggered,
+	  or <c>ERL_NIF_SELECT_CANCEL</c> which are described further
+	  below. When a read or write event is triggered,
 	  a notification message like this is sent to the process identified by
 	  <c>pid</c>:</p>
 	<code type="none">{select, Obj, Ref, ready_input | ready_output}</code>
@@ -3077,13 +3078,21 @@ enif_map_iterator_destroy(env, &amp;iter);</code>
 	<p>The notifications are one-shot only. To receive further notifications of the same
 	  type (read or write), repeated calls to <c>enif_select</c> must be made
 	  after receiving each notification.</p>
+	<p><c>ERL_NIF_SELECT_CANCEL</c> can be used to cancel previously
+	  selected events. It must be used in a bitwise OR combination with
+	  <c>ERL_NIF_SELECT_READ</c> and/or <c>ERL_NIF_SELECT_WRITE</c> to
+	  indicate which type of event to cancel. The return value will
+	  tell if the event was actualy cancelled or if a notification may
+	  already have been sent.</p>
 	<p>Use <c>ERL_NIF_SELECT_STOP</c> as <c>mode</c> in order to safely
 	  close an event object that has been passed to <c>enif_select</c>. The
 	  <seealso marker="#ErlNifResourceStop"><c>stop</c></seealso> callback
 	  of the resource <c>obj</c> will be called when it is safe to close
 	  the event object. This safe way of closing event objects must be used
-	  even if all notifications have been received and no further calls to
-	  <c>enif_select</c> have been made.</p>
+	  even if all notifications have been received (or cancelled) and no
+	  further calls to <c>enif_select</c> have been made.
+	  <c>ERL_NIF_SELECT_STOP</c> will first cancel any selected events
+	  before it calls or schedules the <c>stop</c> callback.</p>
 	<p>The first call to <c>enif_select</c> for a specific OS <c>event</c> will establish
 	  a relation between the event object and the containing resource. All subsequent calls
 	  for an <c>event</c> must pass its containing resource as argument
@@ -3105,7 +3114,15 @@ enif_map_iterator_destroy(env, &amp;iter);</code>
           <item>The stop callback was called directly by <c>enif_select</c>.</item>
           <tag><c>ERL_NIF_SELECT_STOP_SCHEDULED</c></tag>
           <item>The stop callback was scheduled to run on some other thread
-	    or later by this thread.</item>
+	  or later by this thread.</item>
+	  <tag><c>ERL_NIF_SELECT_READ_CANCELLED</c></tag>
+          <item>A read event was cancelled by <c>ERL_NIF_SELECT_CANCEL</c> or
+	  <c>ERL_NIF_SELECT_STOP</c> and is guaranteed not to generate a
+	  <c>ready_input</c> notification message.</item>
+	  <tag><c>ERL_NIF_SELECT_WRITE_CANCELLED</c></tag>
+          <item>A write event was cancelled by <c>ERL_NIF_SELECT_CANCEL</c> or
+	  <c>ERL_NIF_SELECT_STOP</c> and is guaranteed not to generate a
+	  <c>ready_output</c> notification message.</item>
 	</taglist>
 	<p>Returns a negative value if the call failed where the following bits can be set:</p>
         <taglist>
@@ -3131,6 +3148,11 @@ if (retval &amp; ERL_NIF_SELECT_STOP_CALLED) {
 }
 </code>
         </note>
+	<note><p>The mode flag <c>ERL_NIF_SELECT_CANCEL</c> and the return flags
+	  <c>ERL_NIF_SELECT_READ_CANCELLED</c> and
+	  <c>ERL_NIF_SELECT_WRITE_CANCELLED</c> were introduced in erts-11.0
+	  (OTP-22.0).</p>
+	</note>
       </desc>
     </func>
 
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index a42323b13d..6f27c0d58d 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -1751,6 +1751,10 @@ true</pre>
           <item>
             <p><c>Pid</c> is the process identifier of the process
               that originally created the fun.</p>
+            <p>It might point to the <c>init</c> process if the
+              <c>Fun</c> was statically allocated when module was
+              loaded (this optimisation is performed for local
+              functions that do not capture the enviornment).</p>
           </item>
           <tag><c>{index, Index}</c></tag>
           <item>
diff --git a/erts/doc/src/erts_alloc.xml b/erts/doc/src/erts_alloc.xml
index a094217959..962bc9a244 100644
--- a/erts/doc/src/erts_alloc.xml
+++ b/erts/doc/src/erts_alloc.xml
@@ -487,11 +487,10 @@
             utilization value used. Once a carrier is abandoned, no new
             allocations are made in it. When an allocator instance gets an
             increased multiblock carrier need, it first tries to fetch an
-            abandoned carrier from an allocator instance of the same
-            allocator type. If no abandoned carrier can be fetched, it
-            creates a new empty carrier. When an abandoned carrier has been
-            fetched, it will function as an ordinary carrier. This feature has
-            special requirements on the
+            abandoned carrier from another allocator instance. If no abandoned
+            carrier can be fetched, it creates a new empty carrier. When an
+            abandoned carrier has been fetched, it will function as an ordinary
+            carrier. This feature has special requirements on the
             <seealso marker="#M_as">allocation strategy</seealso> used. Only
             the strategies <c>aoff</c>, <c>aoffcbf</c>, <c>aoffcaobf</c>,
 	    <c>ageffcaoff</c>m, <c>ageffcbf</c> and <c>ageffcaobf</c>
@@ -584,7 +583,7 @@
             carriers are decided in section
             <seealso marker="#mseg_mbc_sizes">
             The alloc_util Framework</seealso>. On
-            32-bit Unix style OS this limit cannot be set &gt; 128 MB.</p>
+            32-bit Unix style OS this limit cannot be set &gt; 64 MB.</p>
         </item>
         <tag><marker id="M_mbcgs"/><c><![CDATA[+M<S>mbcgs <ratio>]]></c></tag>
         <item>
diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml
index 078eced525..2f20b5844e 100644
--- a/erts/doc/src/notes.xml
+++ b/erts/doc/src/notes.xml
@@ -10772,7 +10772,7 @@
 	    you use erlang:halt/2 with an integer first argument and
 	    an option list containing {flush,false} as the second
 	    argument. Note that now is flushing not dependant of the
-	    exit code, and you can not only flush async threads
+	    exit code, and you cannot only flush async threads
 	    operations which we deemed as a strange behaviour anyway.
 	    </p>
 	    <p>Also, erlang:halt/1,2 has gotten a new feature: If the
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 57a9d45887..b74f8371f5 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -633,8 +633,8 @@ GENERATE += $(TTF_DIR)/driver_tab.c
 # This list must be consistent with PRE_LOADED_MODULES in
 # erts/preloaded/src/Makefile.
 
-PRELOAD_BEAM =	$(ERL_TOP)/erts/preloaded/ebin/otp_ring0.beam \
-		$(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
+PRELOAD_BEAM =	$(ERL_TOP)/erts/preloaded/ebin/erts_code_purger.beam \
+		$(ERL_TOP)/erts/preloaded/ebin/erl_init.beam \
 		$(ERL_TOP)/erts/preloaded/ebin/init.beam \
 		$(ERL_TOP)/erts/preloaded/ebin/prim_buffer.beam \
 		$(ERL_TOP)/erts/preloaded/ebin/prim_eval.beam \
@@ -879,7 +879,7 @@ RUN_OBJS += \
 	$(OBJDIR)/erl_thr_queue.o	$(OBJDIR)/erl_sched_spec_pre_alloc.o \
 	$(OBJDIR)/erl_ptab.o		$(OBJDIR)/erl_map.o \
 	$(OBJDIR)/erl_msacc.o		$(OBJDIR)/erl_lock_flags.o \
-	$(OBJDIR)/erl_io_queue.o
+	$(OBJDIR)/erl_io_queue.o	$(OBJDIR)/erl_db_catree.o
 
 LTTNG_OBJS = $(OBJDIR)/erlang_lttng.o
 NIF_OBJS = \
diff --git a/erts/emulator/beam/arith_instrs.tab b/erts/emulator/beam/arith_instrs.tab
index b828e86788..574fceec5b 100644
--- a/erts/emulator/beam/arith_instrs.tab
+++ b/erts/emulator/beam/arith_instrs.tab
@@ -19,21 +19,22 @@
 // %CopyrightEnd%
 //
 
-OUTLINED_ARITH_2(Fail, Live, Name, BIF, Op1, Op2, Dst) {
+OUTLINED_ARITH_2(Fail, Name, BIF, Op1, Op2, Dst) {
     Eterm result;
-    Uint live = $Live;
-    HEAVY_SWAPOUT;
-    reg[live] = $Op1;
-    reg[live+1] = $Op2;
-    result = erts_gc_$Name (c_p, reg, live);
-    HEAVY_SWAPIN;
+#ifdef DEBUG
+    Eterm* orig_htop = HTOP;
+    Eterm* orig_stop = E;
+#endif
+    DEBUG_SWAPOUT;
+    result = erts_$Name (c_p, $Op1, $Op2);
+    DEBUG_SWAPIN;
+    ASSERT(orig_htop == HTOP && orig_stop == E);
     ERTS_HOLE_CHECK(c_p);
     if (ERTS_LIKELY(is_value(result))) {
-        $REFRESH_GEN_DEST();
         $Dst = result;
         $NEXT0();
     }
-    $BIF_ERROR_ARITY_2($Fail, $BIF, reg[live], reg[live+1]);
+    $BIF_ERROR_ARITY_2($Fail, $BIF, $Op1, $Op2);
 }
 
 
@@ -48,7 +49,7 @@ plus.fetch(Op1, Op2) {
     PlusOp2 = $Op2;
 }
 
-plus.execute(Fail, Live, Dst) {
+plus.execute(Fail, Dst) {
     if (ERTS_LIKELY(is_both_small(PlusOp1, PlusOp2))) {
         Sint i = signed_val(PlusOp1) + signed_val(PlusOp2);
         if (ERTS_LIKELY(IS_SSMALL(i))) {
@@ -56,7 +57,7 @@ plus.execute(Fail, Live, Dst) {
             $NEXT0();
         }
     }
-    $OUTLINED_ARITH_2($Fail, $Live, mixed_plus, BIF_splus_2, PlusOp1, PlusOp2, $Dst);
+    $OUTLINED_ARITH_2($Fail, mixed_plus, BIF_splus_2, PlusOp1, PlusOp2, $Dst);
 }
 
 i_minus := minus.fetch.execute;
@@ -70,7 +71,7 @@ minus.fetch(Op1, Op2) {
     MinusOp2 = $Op2;
 }
 
-minus.execute(Fail, Live, Dst) {
+minus.execute(Fail, Dst) {
     if (ERTS_LIKELY(is_both_small(MinusOp1, MinusOp2))) {
         Sint i = signed_val(MinusOp1) - signed_val(MinusOp2);
         if (ERTS_LIKELY(IS_SSMALL(i))) {
@@ -78,7 +79,7 @@ minus.execute(Fail, Live, Dst) {
             $NEXT0();
         }
     }
-    $OUTLINED_ARITH_2($Fail, $Live, mixed_minus, BIF_sminus_2, MinusOp1, MinusOp2, $Dst);
+    $OUTLINED_ARITH_2($Fail, mixed_minus, BIF_sminus_2, MinusOp1, MinusOp2, $Dst);
 }
 
 i_increment := increment.fetch.execute;
@@ -91,9 +92,8 @@ increment.fetch(Src) {
     increment_reg_val = $Src;
 }
 
-increment.execute(IncrementVal, Live, Dst) {
+increment.execute(IncrementVal, Dst) {
     Eterm increment_val = $IncrementVal;
-    Uint live;
     Eterm result;
 
     if (ERTS_LIKELY(is_small(increment_reg_val))) {
@@ -103,15 +103,9 @@ increment.execute(IncrementVal, Live, Dst) {
             $NEXT0();
         }
     }
-    live = $Live;
-    HEAVY_SWAPOUT;
-    reg[live] = increment_reg_val;
-    reg[live+1] = make_small(increment_val);
-    result = erts_gc_mixed_plus(c_p, reg, live);
-    HEAVY_SWAPIN;
+    result = erts_mixed_plus(c_p, increment_reg_val, make_small(increment_val));
     ERTS_HOLE_CHECK(c_p);
     if (ERTS_LIKELY(is_value(result))) {
-        $REFRESH_GEN_DEST();
         $Dst = result;
         $NEXT0();
     }
@@ -119,19 +113,19 @@ increment.execute(IncrementVal, Live, Dst) {
     goto find_func_info;
 }
 
-i_times(Fail, Live, Op1, Op2, Dst) {
+i_times(Fail, Op1, Op2, Dst) {
     Eterm op1 = $Op1;
     Eterm op2 = $Op2;
-    $OUTLINED_ARITH_2($Fail, $Live, mixed_times, BIF_stimes_2, op1, op2, $Dst);
+    $OUTLINED_ARITH_2($Fail, mixed_times, BIF_stimes_2, op1, op2, $Dst);
 }
 
-i_m_div(Fail, Live, Op1, Op2, Dst) {
+i_m_div(Fail, Op1, Op2, Dst) {
     Eterm op1 = $Op1;
     Eterm op2 = $Op2;
-    $OUTLINED_ARITH_2($Fail, $Live, mixed_div, BIF_div_2, op1, op2, $Dst);
+    $OUTLINED_ARITH_2($Fail, mixed_div, BIF_div_2, op1, op2, $Dst);
 }
 
-i_int_div(Fail, Live, Op1, Op2, Dst) {
+i_int_div(Fail, Op1, Op2, Dst) {
     Eterm op1 = $Op1;
     Eterm op2 = $Op2;
     if (ERTS_UNLIKELY(op2 == SMALL_ZERO)) {
@@ -144,7 +138,7 @@ i_int_div(Fail, Live, Op1, Op2, Dst) {
             $NEXT0();
         }
     }
-    $OUTLINED_ARITH_2($Fail, $Live, int_div, BIF_intdiv_2, op1, op2, $Dst);
+    $OUTLINED_ARITH_2($Fail, int_div, BIF_intdiv_2, op1, op2, $Dst);
 }
 
 i_rem := rem.fetch.execute;
@@ -158,7 +152,7 @@ rem.fetch(Src1, Src2) {
     RemOp2 = $Src2;
 }
 
-rem.execute(Fail, Live, Dst) {
+rem.execute(Fail, Dst) {
     if (ERTS_UNLIKELY(RemOp2 == SMALL_ZERO)) {
         c_p->freason = BADARITH;
         $BIF_ERROR_ARITY_2($Fail, BIF_rem_2, RemOp1, RemOp2);
@@ -166,7 +160,7 @@ rem.execute(Fail, Live, Dst) {
         $Dst = make_small(signed_val(RemOp1) % signed_val(RemOp2));
         $NEXT0();
     } else {
-        $OUTLINED_ARITH_2($Fail, $Live, int_rem, BIF_rem_2, RemOp1, RemOp2, $Dst);
+        $OUTLINED_ARITH_2($Fail, int_rem, BIF_rem_2, RemOp1, RemOp2, $Dst);
     }
 }
 
@@ -181,7 +175,7 @@ band.fetch(Src1, Src2) {
     BandOp2 = $Src2;
 }
 
-band.execute(Fail, Live, Dst) {
+band.execute(Fail, Dst) {
     if (ERTS_LIKELY(is_both_small(BandOp1, BandOp2))) {
         /*
          * No need to untag -- TAG & TAG == TAG.
@@ -189,10 +183,10 @@ band.execute(Fail, Live, Dst) {
         $Dst = BandOp1 & BandOp2;
         $NEXT0();
     }
-    $OUTLINED_ARITH_2($Fail, $Live, band, BIF_band_2, BandOp1, BandOp2, $Dst);
+    $OUTLINED_ARITH_2($Fail, band, BIF_band_2, BandOp1, BandOp2, $Dst);
 }
 
-i_bor(Fail, Live, Src1, Src2, Dst) {
+i_bor(Fail, Src1, Src2, Dst) {
     if (ERTS_LIKELY(is_both_small($Src1, $Src2))) {
         /*
          * No need to untag -- TAG | TAG == TAG.
@@ -200,10 +194,10 @@ i_bor(Fail, Live, Src1, Src2, Dst) {
         $Dst = $Src1 | $Src2;
         $NEXT0();
     }
-    $OUTLINED_ARITH_2($Fail, $Live, bor, BIF_bor_2, $Src1, $Src2, $Dst);
+    $OUTLINED_ARITH_2($Fail, bor, BIF_bor_2, $Src1, $Src2, $Dst);
 }
 
-i_bxor(Fail, Live, Src1, Src2, Dst) {
+i_bxor(Fail, Src1, Src2, Dst) {
     if (ERTS_LIKELY(is_both_small($Src1, $Src2))) {
         /*
          * TAG ^ TAG == 0.
@@ -214,7 +208,7 @@ i_bxor(Fail, Live, Src1, Src2, Dst) {
         $Dst = ($Src1 ^ $Src2) | make_small(0);
         $NEXT0();
     }
-    $OUTLINED_ARITH_2($Fail, $Live, bxor, BIF_bxor_2, $Src1, $Src2, $Dst);
+    $OUTLINED_ARITH_2($Fail, bxor, BIF_bxor_2, $Src1, $Src2, $Dst);
 }
 
 i_bsl := shift.setup_bsl.execute;
@@ -265,7 +259,7 @@ shift.setup_bsl(Src1, Src2) {
     }
 }
 
-shift.execute(Fail, Live, Dst) {
+shift.execute(Fail, Dst) {
     Uint big_words_needed;
 
     if (ERTS_LIKELY(is_small(Op1))) {
@@ -320,7 +314,9 @@ shift.execute(Fail, Live, Dst) {
         }
         {
             Eterm tmp_big[2];
-            Sint big_need_size = BIG_NEED_SIZE(big_words_needed+1);
+            Sint big_need_size = 1 + BIG_NEED_SIZE(big_words_needed+1);
+            Eterm* hp;
+            Eterm* hp_end;
 
             /*
              * Slightly conservative check the size to avoid
@@ -331,15 +327,16 @@ shift.execute(Fail, Live, Dst) {
             if (big_need_size-8 > BIG_ARITY_MAX) {
                 $SYSTEM_LIMIT($Fail);
             }
-            $GC_TEST_PRESERVE(big_need_size+1, $Live, Op1);
+            hp = HeapFragOnlyAlloc(c_p, big_need_size);
             if (is_small(Op1)) {
                 Op1 = small_to_big(signed_val(Op1), tmp_big);
             }
-            Op1 = big_lshift(Op1, shift_left_count, HTOP);
+            Op1 = big_lshift(Op1, shift_left_count, hp);
+            hp_end = hp + big_need_size;
             if (is_big(Op1)) {
-                HTOP += bignum_header_arity(*HTOP) + 1;
+                hp += bignum_header_arity(*hp) + 1;
             }
-            HEAP_SPACE_VERIFIED(0);
+            HRelease(c_p, hp_end, hp);
             if (ERTS_UNLIKELY(is_nil(Op1))) {
                 /*
                  * This result must have been only slighty larger
@@ -349,7 +346,6 @@ shift.execute(Fail, Live, Dst) {
                 $SYSTEM_LIMIT($Fail);
             }
             ERTS_HOLE_CHECK(c_p);
-            $REFRESH_GEN_DEST();
             $Dst = Op1;
             $NEXT0();
         }
@@ -366,31 +362,28 @@ shift.execute(Fail, Live, Dst) {
         reg[0] = Op1;
         reg[1] = Op2;
         SWAPOUT;
-        if (IsOpCode(I[0], i_bsl_ssjtd)) {
+        if (IsOpCode(I[0], i_bsl_ssjd)) {
             I = handle_error(c_p, I, reg, &bif_export[BIF_bsl_2]->info.mfa);
         } else {
-            ASSERT(IsOpCode(I[0], i_bsr_ssjtd));
+            ASSERT(IsOpCode(I[0], i_bsr_ssjd));
             I = handle_error(c_p, I, reg, &bif_export[BIF_bsr_2]->info.mfa);
         }
         goto post_error_handling;
     }
 }
 
-i_int_bnot(Fail, Src, Live, Dst) {
+i_int_bnot(Fail, Src, Dst) {
     Eterm bnot_val = $Src;
+    Eterm result;
+
     if (ERTS_LIKELY(is_small(bnot_val))) {
-        bnot_val = make_small(~signed_val(bnot_val));
+        result = make_small(~signed_val(bnot_val));
     } else {
-        Uint live = $Live;
-        HEAVY_SWAPOUT;
-        reg[live] = bnot_val;
-        bnot_val = erts_gc_bnot(c_p, reg, live);
-        HEAVY_SWAPIN;
+        result = erts_bnot(c_p, bnot_val);
         ERTS_HOLE_CHECK(c_p);
-        if (ERTS_UNLIKELY(is_nil(bnot_val))) {
-            $BIF_ERROR_ARITY_1($Fail, BIF_bnot_1, reg[live]);
+        if (ERTS_UNLIKELY(is_nil(result))) {
+            $BIF_ERROR_ARITY_1($Fail, BIF_bnot_1, bnot_val);
         }
-        $REFRESH_GEN_DEST();
     }
-    $Dst = bnot_val;
+    $Dst = result;
 }
diff --git a/erts/emulator/beam/atom.names b/erts/emulator/beam/atom.names
index a14f22b19e..3f7aa0983c 100644
--- a/erts/emulator/beam/atom.names
+++ b/erts/emulator/beam/atom.names
@@ -209,7 +209,6 @@ atom dirty_nif_finalizer
 atom disable_trace
 atom disabled
 atom discard
-atom display_items
 atom dist
 atom dist_cmd
 atom dist_ctrl_put_data
@@ -238,6 +237,7 @@ atom eof
 atom eol
 atom Eq='=:='
 atom Eqeq='=='
+atom erl_init
 atom erl_tracer
 atom erlang
 atom erl_signal_server
diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index 9633de2021..f71efd708f 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -558,23 +558,6 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
 	case 'I':
         case 'W':
 	    switch (op) {
-	    case op_i_gc_bif1_jWstd:
-	    case op_i_gc_bif2_jWtssd:
-	    case op_i_gc_bif3_jWtssd:
-		{
-		    const ErtsGcBif* p;
-		    BifFunction gcf = (BifFunction) *ap;
-		    for (p = erts_gc_bifs; p->bif != 0; p++) {
-			if (p->gc_bif == gcf) {
-			    print_bif_name(to, to_arg, p->bif);
-			    break;
-			}
-		    }
-		    if (p->bif == 0) {
-			erts_print(to, to_arg, "%d", (Uint)gcf);
-		    }
-		    break;
-		}
 	    case op_i_make_fun_Wt:
                 if (*sign == 'W') {
                     ErlFunEntry* fe = (ErlFunEntry *) *ap;
@@ -786,8 +769,8 @@ print_op(fmtfn_t to, void *to_arg, int op, int size, BeamInstr* addr)
 	    }
 	}
 	break;
-    case op_i_put_tuple_xI:
-    case op_i_put_tuple_yI:
+    case op_put_tuple2_xI:
+    case op_put_tuple2_yI:
     case op_new_map_dtI:
     case op_update_map_assoc_sdtI:
     case op_update_map_exact_jsdtI:
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index e909a0b4da..6b34024a5a 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -206,8 +206,12 @@ void** beam_ops;
 #ifdef DEBUG
 #  /* The stack pointer is used in an assertion. */
 #  define LIGHT_SWAPOUT SWAPOUT
+#  define DEBUG_SWAPOUT SWAPOUT
+#  define DEBUG_SWAPIN  SWAPIN
 #else
 #  define LIGHT_SWAPOUT HEAP_TOP(c_p) = HTOP
+#  define DEBUG_SWAPOUT
+#  define DEBUG_SWAPIN
 #endif
 
 /*
@@ -386,7 +390,6 @@ do {                                            \
  */
 static void init_emulator_finish(void) NOINLINE;
 static ErtsCodeMFA *ubif2mfa(void* uf) NOINLINE;
-static ErtsCodeMFA *gcbif2mfa(void* gcf) NOINLINE;
 static BeamInstr* handle_error(Process* c_p, BeamInstr* pc,
 			       Eterm* reg, ErtsCodeMFA* bif_mfa) NOINLINE;
 static BeamInstr* call_error_handler(Process* p, ErtsCodeMFA* mfa,
@@ -1301,18 +1304,6 @@ void erts_dirty_process_main(ErtsSchedulerData *esdp)
 }
 
 static ErtsCodeMFA *
-gcbif2mfa(void* gcf)
-{
-    int i;
-    for (i = 0; erts_gc_bifs[i].bif; i++) {
-	if (erts_gc_bifs[i].gc_bif == gcf)
-	    return &bif_export[erts_gc_bifs[i].exp_ix]->info.mfa;
-    }
-    erts_exit(ERTS_ERROR_EXIT, "bad gc bif");
-    return NULL;
-}
-
-static ErtsCodeMFA *
 ubif2mfa(void* uf)
 {
     int i;
@@ -1320,7 +1311,7 @@ ubif2mfa(void* uf)
 	if (erts_u_bifs[i].bif == uf)
 	    return &bif_export[erts_u_bifs[i].exp_ix]->info.mfa;
     }
-    erts_exit(ERTS_ERROR_EXIT, "bad u bif");
+    erts_exit(ERTS_ERROR_EXIT, "bad u bif: %p\n", uf);
     return NULL;
 }
 
@@ -3062,12 +3053,14 @@ erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p
     Uint need;
     flatmap_t *old_mp, *mp;
     Eterm res;
+    Eterm* old_hp;
     Eterm* hp;
     Eterm* E;
     Eterm* old_keys;
     Eterm* old_vals;
     Eterm new_key;
     Eterm map;
+    int changed = 0;
 
     n /= 2;		/* Number of values to be updated */
     ASSERT(n > 0);
@@ -3134,6 +3127,7 @@ erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p
      * Update map, keeping the old key tuple.
      */
 
+    old_hp = p->htop;
     hp = p->htop;
     E = p->stop;
 
@@ -3156,20 +3150,26 @@ erts_gc_update_map_exact(Process* p, Eterm* reg, Uint live, Uint n, Eterm* new_p
 	    /* Not same keys */
 	    *hp++ = *old_vals;
 	} else {
-	    GET_TERM(new_p[1], *hp);
-	    hp++;
-	    n--;
+            GET_TERM(new_p[1], *hp);
+            if(*hp != *old_vals) changed = 1;
+            hp++;
+            n--;
 	    if (n == 0) {
-		/*
-		 * All updates done. Copy remaining values
-		 * and return the result.
-		 */
-		for (i++, old_vals++; i < num_old; i++) {
-		    *hp++ = *old_vals++;
-		}
-		ASSERT(hp == p->htop + need);
-		p->htop = hp;
-		return res;
+                /*
+                * All updates done. Copy remaining values
+                * if any changed or return the original one.
+                */
+                if(changed) {
+		    for (i++, old_vals++; i < num_old; i++) {
+		        *hp++ = *old_vals++;
+		    }
+		    ASSERT(hp == p->htop + need);
+		    p->htop = hp;
+		    return res;
+                } else {
+                    p->htop = old_hp;
+                    return map;
+                }
 	    } else {
 		new_p += 2;
 		GET_TERM(*new_p, new_key);
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index e61199a8fd..400a58a75c 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2868,6 +2868,7 @@ load_code(LoaderState* stp)
 	    break;
 	case op_bs_put_string_WW:
 	case op_i_bs_match_string_xfWW:
+	case op_i_bs_match_string_yfWW:
 	    new_string_patch(stp, ci-1);
 	    break;
 
@@ -3579,38 +3580,36 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms,
 }
 
 static GenOp*
-gen_increment(LoaderState* stp, GenOpArg Reg, GenOpArg Integer,
-	      GenOpArg Live, GenOpArg Dst)
+gen_increment(LoaderState* stp, GenOpArg Reg,
+              GenOpArg Integer, GenOpArg Dst)
 {
     GenOp* op;
 
     NEW_GENOP(stp, op);
-    op->op = genop_i_increment_4;
-    op->arity = 4;
+    op->op = genop_i_increment_3;
+    op->arity = 3;
     op->next = NULL;
     op->a[0] = Reg;
     op->a[1].type = TAG_u;
     op->a[1].val = Integer.val;
-    op->a[2] = Live;
-    op->a[3] = Dst;
+    op->a[2] = Dst;
     return op;
 }
 
 static GenOp*
-gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer,
-			 GenOpArg Live, GenOpArg Dst)
+gen_increment_from_minus(LoaderState* stp, GenOpArg Reg,
+                         GenOpArg Integer, GenOpArg Dst)
 {
     GenOp* op;
 
     NEW_GENOP(stp, op);
-    op->op = genop_i_increment_4;
-    op->arity = 4;
+    op->op = genop_i_increment_3;
+    op->arity = 3;
     op->next = NULL;
     op->a[0] = Reg;
     op->a[1].type = TAG_u;
     op->a[1].val = -Integer.val;
-    op->a[2] = Live;
-    op->a[3] = Dst;
+    op->a[2] = Dst;
     return op;
 }
 
@@ -4245,115 +4244,57 @@ gen_make_fun2(LoaderState* stp, GenOpArg idx)
 {
     ErlFunEntry* fe;
     GenOp* op;
+    Uint arity, num_free;
 
     if (idx.val >= stp->num_lambdas) {
-	stp->lambda_error = "missing or short chunk 'FunT'";
-	fe = 0;
+        stp->lambda_error = "missing or short chunk 'FunT'";
+        fe = 0;
+        num_free = 0;
+        arity = 0;
     } else {
-	fe = stp->lambdas[idx.val].fe;
+        fe = stp->lambdas[idx.val].fe;
+        num_free = stp->lambdas[idx.val].num_free;
+        arity = fe->arity;
     }
 
     NEW_GENOP(stp, op);
-    op->op = genop_i_make_fun_2;
-    op->arity = 2;
-    op->a[0].type = TAG_u;
-    op->a[0].val = (BeamInstr) fe;
-    op->a[1].type = TAG_u;
-    op->a[1].val = stp->lambdas[idx.val].num_free;
-    op->next = NULL;
-    return op;
-}
-
-static GenOp*
-translate_gc_bif(LoaderState* stp, GenOp* op, GenOpArg Bif)
-{
-    const ErtsGcBif* p;
-    BifFunction bf;
 
-    bf = stp->import[Bif.val].bf;
-    for (p = erts_gc_bifs; p->bif != 0; p++) {
-	if (p->bif == bf) {
-	    op->a[1].type = TAG_u;
-	    op->a[1].val = (BeamInstr) p->gc_bif;
-	    return op;
-	}
+    /*
+     * It's possible this is called before init process is started,
+     * skip the optimisation in such case.
+     */
+    if (num_free == 0 && erts_init_process_id != ERTS_INVALID_PID) {
+        Uint lit;
+        Eterm* hp;
+        ErlFunThing* funp;
+
+        lit = new_literal(stp, &hp, ERL_FUN_SIZE);
+        funp = (ErlFunThing *) hp;
+        erts_refc_inc(&fe->refc, 2);
+        funp->thing_word = HEADER_FUN;
+        funp->next = NULL;
+        funp->fe = fe;
+        funp->num_free = 0;
+        funp->creator = erts_init_process_id;
+        funp->arity = arity;
+
+        op->op = genop_move_2;
+        op->arity = 2;
+        op->a[0].type = TAG_q;
+        op->a[0].val = lit;
+        op->a[1].type = TAG_x;
+        op->a[1].val = 0;
+    } else {
+        op->op = genop_i_make_fun_2;
+        op->arity = 2;
+        op->a[0].type = TAG_u;
+        op->a[0].val = (BeamInstr) fe;
+        op->a[1].type = TAG_u;
+        op->a[1].val = num_free;
     }
 
-    op->op = genop_unsupported_guard_bif_3;
-    op->arity = 3;
-    op->a[0].type = TAG_a;
-    op->a[0].val = stp->import[Bif.val].module;
-    op->a[1].type = TAG_a;
-    op->a[1].val = stp->import[Bif.val].function;
-    op->a[2].type = TAG_u;
-    op->a[2].val = stp->import[Bif.val].arity;
-    return op;
-}
-
-/*
- * Rewrite gc_bifs with one parameter (the common case).
- */
-static GenOp*
-gen_guard_bif1(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif,
-	      GenOpArg Src, GenOpArg Dst)
-{
-    GenOp* op;
-
-    NEW_GENOP(stp, op);
-    op->next = NULL;
-    op->op = genop_i_gc_bif1_5;
-    op->arity = 5;
-    op->a[0] = Fail;
-    /* op->a[1] is set by translate_gc_bif() */
-    op->a[2] = Src;
-    op->a[3] = Live;
-    op->a[4] = Dst;
-    return translate_gc_bif(stp, op, Bif);
-}
-
-/*
- * This is used by the ops.tab rule that rewrites gc_bifs with two parameters.
- */
-static GenOp*
-gen_guard_bif2(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif,
-	      GenOpArg S1, GenOpArg S2, GenOpArg Dst)
-{
-    GenOp* op;
-
-    NEW_GENOP(stp, op);
-    op->next = NULL;
-    op->op = genop_i_gc_bif2_6;
-    op->arity = 6;
-    op->a[0] = Fail;
-    /* op->a[1] is set by translate_gc_bif() */
-    op->a[2] = Live;
-    op->a[3] = S1;
-    op->a[4] = S2;
-    op->a[5] = Dst;
-    return translate_gc_bif(stp, op, Bif);
-}
-
-/*
- * This is used by the ops.tab rule that rewrites gc_bifs with three parameters.
- */
-static GenOp*
-gen_guard_bif3(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif,
-	      GenOpArg S1, GenOpArg S2, GenOpArg S3, GenOpArg Dst)
-{
-    GenOp* op;
-
-    NEW_GENOP(stp, op);
     op->next = NULL;
-    op->op = genop_ii_gc_bif3_7;
-    op->arity = 7;
-    op->a[0] = Fail;
-    /* op->a[1] is set by translate_gc_bif() */
-    op->a[2] = Live;
-    op->a[3] = S1;
-    op->a[4] = S2;
-    op->a[5] = S3;
-    op->a[6] = Dst;
-    return translate_gc_bif(stp, op, Bif);
+    return op;
 }
 
 static GenOp*
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index 015c051cc1..4741d7451d 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -218,11 +218,8 @@ BIF_RETTYPE link_1(BIF_ALIST_1)
              * We have (pending) connection.
              * Setup link and enqueue link signal.
              */
-#ifdef DEBUG
-            int inserted =
-#endif
-                erts_link_dist_insert(&ldp->b, dep->mld);
-            ASSERT(inserted);
+            int inserted = erts_link_dist_insert(&ldp->b, dep->mld);
+            ASSERT(inserted); (void)inserted;
             erts_de_runlock(dep);
 
             code = erts_dsig_send_link(&dsd, BIF_P->common.id, BIF_ARG_1);
@@ -567,12 +564,8 @@ BIF_RETTYPE monitor_2(BIF_ALIST_2)
 
             case ERTS_DSIG_PREP_PENDING:
             case ERTS_DSIG_PREP_CONNECTED: {
-#ifdef DEBUG
-                int inserted =
-#endif
-
-                erts_monitor_dist_insert(&mdp->target, dep->mld);
-                ASSERT(inserted);
+                int inserted = erts_monitor_dist_insert(&mdp->target, dep->mld);
+                ASSERT(inserted); (void)inserted;
                 erts_de_runlock(dep);
 
                 code = erts_dsig_send_monitor(&dsd, BIF_P->common.id, target, ref);
@@ -1803,6 +1796,7 @@ ebif_bang_2(BIF_ALIST_2)
 #define SEND_INTERNAL_ERROR	(-6)
 #define SEND_AWAIT_RESULT	(-7)
 #define SEND_YIELD_CONTINUE     (-8)
+#define SEND_SYSTEM_LIMIT		(-9)
 
 
 static Sint remote_send(Process *p, DistEntry *dep,
@@ -1842,6 +1836,8 @@ static Sint remote_send(Process *p, DistEntry *dep,
 	    res = SEND_YIELD_RETURN;
 	else if (code == ERTS_DSIG_SEND_CONTINUE)
 	    res = SEND_YIELD_CONTINUE;
+	else if (code == ERTS_DSIG_SEND_TOO_LRG)
+	    res = SEND_SYSTEM_LIMIT;
 	else
 	    res = 0;
 	break;
@@ -2162,6 +2158,9 @@ BIF_RETTYPE send_3(BIF_ALIST_3)
     case SEND_BADARG:
 	ERTS_BIF_PREP_ERROR(retval, p, BADARG);
 	break;
+    case SEND_SYSTEM_LIMIT:
+	ERTS_BIF_PREP_ERROR(retval, p, SYSTEM_LIMIT);
+	break;
     case SEND_USER_ERROR:
 	ERTS_BIF_PREP_ERROR(retval, p, EXC_ERROR);
 	break;
@@ -2218,6 +2217,10 @@ static BIF_RETTYPE dsend_continue_trap_1(BIF_ALIST_1)
 	BUMP_ALL_REDS(BIF_P);
 	BIF_TRAP1(&dsend_continue_trap_export, BIF_P, BIF_ARG_1);
     }
+    case ERTS_DSIG_SEND_TOO_LRG: { /*SEND_SYSTEM_LIMIT*/
+	erts_set_gc_state(BIF_P, 1);
+	BIF_ERROR(BIF_P, SYSTEM_LIMIT);
+    }
     default:
 	erts_exit(ERTS_ABORT_EXIT, "dsend_continue_trap invalid result %d\n", (int)result);
 	break;
@@ -2275,6 +2278,9 @@ Eterm erl_send(Process *p, Eterm to, Eterm msg)
     case SEND_BADARG:
 	ERTS_BIF_PREP_ERROR(retval, p, BADARG);
 	break;
+    case SEND_SYSTEM_LIMIT:
+	ERTS_BIF_PREP_ERROR(retval, p, SYSTEM_LIMIT);
+	break;
     case SEND_USER_ERROR:
 	ERTS_BIF_PREP_ERROR(retval, p, EXC_ERROR);
 	break;
@@ -2732,9 +2738,7 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1)
     Uint num_chars, num_built, num_eaten;
     byte* err_pos;
     Eterm res;
-#ifdef DEBUG
     int ares;
-#endif
 
     if (is_not_atom(BIF_ARG_1))
 	BIF_ERROR(BIF_P, BADARG);
@@ -2744,11 +2748,9 @@ BIF_RETTYPE atom_to_list_1(BIF_ALIST_1)
     if (ap->len == 0)
 	BIF_RET(NIL);	/* the empty atom */
 
-#ifdef DEBUG
     ares =
-#endif
 	erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
-    ASSERT(ares == ERTS_UTF8_OK);
+    ASSERT(ares == ERTS_UTF8_OK); (void)ares;
     
     res = erts_utf8_to_list(BIF_P, num_chars, ap->name, ap->len, ap->len,
 			    &num_built, &num_eaten, NIL);
@@ -4442,13 +4444,6 @@ BIF_RETTYPE system_flag_2(BIF_ALIST_2)
         erts_proc_lock(BIF_P, ERTS_PROC_LOCK_MAIN);
 
         BIF_RET(old_value);
-    } else if (BIF_ARG_1 == am_display_items) {
-	int oval = display_items;
-	if (!is_small(BIF_ARG_2) || (n = signed_val(BIF_ARG_2)) < 0) {
-	    goto error;
-	}
-	display_items = n < 32 ? 32 : n;
-	BIF_RET(make_small(oval));
     } else if (BIF_ARG_1 == am_debug_flags) {
 	BIF_RET(am_true);
     } else if (BIF_ARG_1 == am_backtrace_depth) {
@@ -5151,17 +5146,11 @@ BIF_RETTYPE send_to_logger_2(BIF_ALIST_2)
     else if (len == 0)
 	buf = "";
     else {
-#ifdef DEBUG
 	ErlDrvSizeT len2;
-#endif
 	buf = (byte *) erts_alloc(ERTS_ALC_T_TMP, len+1);
-#ifdef DEBUG
 	len2 =
-#else
-	(void)
-#endif
 	    erts_iolist_to_buf(BIF_ARG_2, buf, len);
-	ASSERT(len2 == len);
+	ASSERT(len2 == len); (void)len2;
 	buf[len] = '\0';
 	switch (BIF_ARG_1) {
 	case am_info:
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index d4ba90a61a..f141407c0e 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -41,7 +41,7 @@
 #
 
 
-gcbif erlang:abs/1
+ubif erlang:abs/1
 bif erlang:adler32/1
 bif erlang:adler32/2
 bif erlang:adler32_combine/3
@@ -66,7 +66,7 @@ bif erlang:exit/2
 bif erlang:exit_signal/2
 bif erlang:external_size/1
 bif erlang:external_size/2
-gcbif erlang:float/1
+ubif erlang:float/1
 bif erlang:float_to_list/1
 bif erlang:float_to_list/2
 bif erlang:fun_info/2
@@ -84,7 +84,7 @@ bif erlang:phash2/2
 ubif erlang:hd/1
 bif erlang:integer_to_list/1
 bif erlang:is_alive/0
-gcbif erlang:length/1
+ubif erlang:length/1
 bif erlang:link/1
 bif erlang:list_to_atom/1
 bif erlang:list_to_binary/1
@@ -133,10 +133,10 @@ bif erlang:processes/0
 bif erlang:put/2
 bif erlang:register/2
 bif erlang:registered/0
-gcbif erlang:round/1
+ubif erlang:round/1
 ubif erlang:self/0
 bif erlang:setelement/3
-gcbif erlang:size/1
+ubif erlang:size/1
 bif erlang:spawn/3
 bif erlang:spawn_link/3
 bif erlang:split_binary/2
@@ -146,7 +146,7 @@ bif erlang:term_to_binary/2
 bif erlang:throw/1
 bif erlang:time/0
 ubif erlang:tl/1
-gcbif erlang:trunc/1
+ubif erlang:trunc/1
 bif erlang:tuple_to_list/1
 bif erlang:universaltime/0
 bif erlang:universaltime_to_localtime/1
@@ -481,8 +481,8 @@ bif erlang:list_to_existing_atom/1
 #
 ubif erlang:is_bitstring/1
 ubif erlang:tuple_size/1
-gcbif erlang:byte_size/1
-gcbif erlang:bit_size/1
+ubif erlang:byte_size/1
+ubif erlang:bit_size/1
 bif erlang:list_to_bitstring/1
 bif erlang:bitstring_to_list/1
 
@@ -534,8 +534,8 @@ bif erlang:binary_to_term/2
 #
 # The searching/splitting/substituting thingies
 #
-gcbif erlang:binary_part/2
-gcbif erlang:binary_part/3
+ubif erlang:binary_part/2
+ubif erlang:binary_part/3
 
 bif binary:compile_pattern/1
 bif binary:match/2
@@ -623,14 +623,13 @@ bif io:printable_range/0
 bif re:inspect/2
 
 ubif erlang:is_map/1
-gcbif erlang:map_size/1
+ubif erlang:map_size/1
 bif maps:find/2
 bif maps:get/2
 bif maps:from_list/1
 bif maps:is_key/2
 bif maps:keys/1
 bif maps:merge/2
-bif maps:new/0
 bif maps:put/3
 bif maps:remove/2
 bif maps:update/3
@@ -672,8 +671,8 @@ bif maps:take/2
 # New in 20.0
 #
 
-gcbif erlang:floor/1
-gcbif erlang:ceil/1
+ubif erlang:floor/1
+ubif erlang:ceil/1
 bif math:floor/1
 bif math:ceil/1
 bif math:fmod/2
diff --git a/erts/emulator/beam/bif_instrs.tab b/erts/emulator/beam/bif_instrs.tab
index 00854471a9..ce9e61a838 100644
--- a/erts/emulator/beam/bif_instrs.tab
+++ b/erts/emulator/beam/bif_instrs.tab
@@ -31,13 +31,20 @@
 
 CALL_GUARD_BIF(BF, TmpReg, Dst) {
     Eterm result;
+#ifdef DEBUG
+    Eterm* orig_htop = HTOP;
+    Eterm* orig_stop = E;
+#endif
 
     ERTS_DBG_CHK_REDS(c_p, FCALLS);
     c_p->fcalls = FCALLS;
     PROCESS_MAIN_CHK_LOCKS(c_p);
     ASSERT(!ERTS_PROC_IS_EXITING(c_p));
     ERTS_CHK_MBUF_SZ(c_p);
+    DEBUG_SWAPOUT;
     result = (*$BF)(c_p, $TmpReg, I);
+    DEBUG_SWAPIN;
+    ASSERT(orig_htop == HTOP && orig_stop == E);
     ERTS_CHK_MBUF_SZ(c_p);
     ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result));
     ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
@@ -55,7 +62,7 @@ CALL_GUARD_BIF(BF, TmpReg, Dst) {
 // to the code for the next clause.  We don't support tracing
 // of guard BIFs.
 
-bif1(Fail, Bif, Src, Dst) {
+i_bif1(Fail, Bif, Src, Dst) {
     ErtsBifFunc bf;
     Eterm tmp_reg[1];
 
@@ -70,7 +77,7 @@ bif1(Fail, Bif, Src, Dst) {
 // Guard BIF in body.  It can fail like any BIF.  No trace support.
 //
 
-bif1_body(Bif, Src, Dst) {
+i_bif1_body(Bif, Src, Dst) {
     ErtsBifFunc bf;
     Eterm tmp_reg[1];
 
@@ -118,155 +125,120 @@ i_bif2_body(Bif, Src1, Src2, Dst) {
     goto post_error_handling;
 }
 
-//
-// Garbage-collecting BIF with one argument in either guard or body.
-//
+// Guard BIF in head (binary_part/3).  On failure, ignore the error
+// and jump to the code for the next clause.  We don't support tracing
+// of guard BIFs.
 
-i_gc_bif1(Fail, Bif, Src, Live, Dst) {
-    typedef Eterm (*GcBifFunction)(Process*, Eterm*, Uint);
-    GcBifFunction bf;
-    Eterm result;
-    Uint live = (Uint) $Live;
+i_bif3(Fail, Bif, Src1, Src2, Src3, Dst) {
+    ErtsBifFunc bf;
+    Eterm tmp_reg[3];
 
-    x(live) = $Src;
-    bf = (GcBifFunction) $Bif;
-    ERTS_DBG_CHK_REDS(c_p, FCALLS);
-    c_p->fcalls = FCALLS;
-    SWAPOUT;
-    PROCESS_MAIN_CHK_LOCKS(c_p);
-    ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
-    ERTS_CHK_MBUF_SZ(c_p);
-    result = (*bf)(c_p, reg, live);
-    ERTS_CHK_MBUF_SZ(c_p);
-    ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
-    ERTS_REQ_PROC_MAIN_LOCK(c_p);
-    PROCESS_MAIN_CHK_LOCKS(c_p);
-    SWAPIN;
-    ERTS_HOLE_CHECK(c_p);
-    FCALLS = c_p->fcalls;
-    ERTS_DBG_CHK_REDS(c_p, FCALLS);
-    if (ERTS_LIKELY(is_value(result))) {
-        $REFRESH_GEN_DEST();
-        $Dst = result;
-        $NEXT0();
-    }
-    if (ERTS_LIKELY($Fail != 0)) { /* Handle error in guard. */
-        $JUMP($Fail);
-    }
+    tmp_reg[0] = $Src1;
+    tmp_reg[1] = $Src2;
+    tmp_reg[2] = $Src3;
+    bf = (BifFunction) $Bif;
+    $CALL_GUARD_BIF(bf, tmp_reg, $Dst);
+
+    $FAIL($Fail);
+}
+
+// Guard BIF in body with three arguments (binary_part/3).
+
+i_bif3_body(Bif, Src1, Src2, Src3, Dst) {
+    ErtsBifFunc bf;
+    Eterm tmp_reg[3];
 
-    /* Handle error in body. */
-    x(0) = x(live);
-    I = handle_error(c_p, I, reg, gcbif2mfa((void *) bf));
+    tmp_reg[0] = $Src1;
+    tmp_reg[1] = $Src2;
+    tmp_reg[2] = $Src3;
+    bf = (BifFunction) $Bif;
+    $CALL_GUARD_BIF(bf, tmp_reg, $Dst);
+    reg[0] = tmp_reg[0];
+    reg[1] = tmp_reg[1];
+    SWAPOUT;
+    I = handle_error(c_p, I, reg, ubif2mfa((void *) bf));
     goto post_error_handling;
 }
 
 //
-// Garbage-collecting BIF with two arguments in either guard or body.
+// length/1 is the only guard BIF that does not execute in constant
+// time. Here follows special instructions to allow the calculation of
+// the list length to be broken in several chunks to avoid hogging
+// the scheduler for a long time.
 //
 
-i_gc_bif2(Fail, Bif, Live, Src1, Src2, Dst) {
-    typedef Eterm (*GcBifFunction)(Process*, Eterm*, Uint);
-    GcBifFunction bf;
-    Eterm result;
-    Uint live = (Uint) $Live;
+i_length_setup(Live, Src) {
+    Uint live = $Live;
+    Eterm src = $Src;
 
-    /*
-     * XXX This calling convention does not make sense. 'live'
-     * should point out the first argument, not the second
-     * (i.e. 'live' should not be incremented below).
-     */
-    x(live) = $Src1;
-    x(live+1) = $Src2;
-    live++;
+    reg[live] = src;
+    reg[live+1] = make_small(0);
+    reg[live+2] = src;
 
-    bf = (GcBifFunction) $Bif;
-    ERTS_DBG_CHK_REDS(c_p, FCALLS);
-    c_p->fcalls = FCALLS;
-    SWAPOUT;
-    PROCESS_MAIN_CHK_LOCKS(c_p);
-    ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
-    ERTS_CHK_MBUF_SZ(c_p);
-    result = (*bf)(c_p, reg, live);
-    ERTS_CHK_MBUF_SZ(c_p);
-    ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
-    ERTS_REQ_PROC_MAIN_LOCK(c_p);
-    PROCESS_MAIN_CHK_LOCKS(c_p);
-    SWAPIN;
-    ERTS_HOLE_CHECK(c_p);
-    FCALLS = c_p->fcalls;
-    ERTS_DBG_CHK_REDS(c_p, FCALLS);
-    if (ERTS_LIKELY(is_value(result))) {
-        $REFRESH_GEN_DEST();
-        $Dst = result;
-        $NEXT0();
-    }
-
-    if (ERTS_LIKELY($Fail != 0)) { /* Handle error in guard. */
-        $JUMP($Fail);
-    }
-
-    /* Handle error in body. */
-    live--;
-    x(0) = x(live);
-    x(1) = x(live+1);
-    I = handle_error(c_p, I, reg, gcbif2mfa((void *) bf));
-    goto post_error_handling;
+    /* This instruction is always followed by i_length */
+    SET_I($NEXT_INSTRUCTION);
+    goto i_length_start__;
+    //| -no_next
 }
 
 //
-// Garbage-collecting BIF with three arguments in either guard or body.
+// This instruction can be executed one or more times. When entering
+// this instruction, the X registers have the following contents:
+//
+// reg[live+0] The remainder of the list.
+// reg[live+1] The length so far (tagged integer).
+// reg[live+2] The original list. Only used if an error is generated
+//             (if the final tail of the list is not []).
 //
 
-i_gc_bif3(Fail, Bif, Live, Src2, Src3, Dst) {
-    typedef Eterm (*GcBifFunction)(Process*, Eterm*, Uint);
-    GcBifFunction bf;
-    Eterm result;
-    Uint live = (Uint) $Live;
+i_length := i_length.start.execute;
 
-    /*
-     * XXX This calling convention does not make sense. 'live'
-     * should point out the first argument, not the third
-     * (i.e. 'live' should not be incremented below).
-     */
-    x(live) = x(SCRATCH_X_REG);
-    x(live+1) = $Src2;
-    x(live+2) = $Src3;
-    live += 2;
+i_length.start() {
+ i_length_start__:
+    ;
+}
+
+i_length.execute(Fail, Live, Dst) {
+    Eterm result;
+    Uint live;
 
-    bf = (GcBifFunction) $Bif;
     ERTS_DBG_CHK_REDS(c_p, FCALLS);
     c_p->fcalls = FCALLS;
-    SWAPOUT;
     PROCESS_MAIN_CHK_LOCKS(c_p);
-    ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
+    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
     ERTS_CHK_MBUF_SZ(c_p);
-    result = (*bf)(c_p, reg, live);
+    DEBUG_SWAPOUT;
+
+    live = $Live;
+    result = erts_trapping_length_1(c_p, reg+live);
+
+    DEBUG_SWAPIN;
     ERTS_CHK_MBUF_SZ(c_p);
+    ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(result));
     ERTS_VERIFY_UNUSED_TEMP_ALLOC(c_p);
-    ERTS_REQ_PROC_MAIN_LOCK(c_p);
     PROCESS_MAIN_CHK_LOCKS(c_p);
-    SWAPIN;
     ERTS_HOLE_CHECK(c_p);
     FCALLS = c_p->fcalls;
     ERTS_DBG_CHK_REDS(c_p, FCALLS);
     if (ERTS_LIKELY(is_value(result))) {
+        /* Successful calculation of the list length. */
         $REFRESH_GEN_DEST();
         $Dst = result;
         $NEXT0();
+    } else if (c_p->freason == TRAP) {
+        /*
+         * Good so far, but there is more work to do. Yield.
+         */
+        $SET_CP_I_ABS(I);
+        SWAPOUT;
+        c_p->arity = live + 3;
+        c_p->current = NULL;
+        goto context_switch3;
+    } else {
+        /* Error. */
+        $BIF_ERROR_ARITY_1($Fail, BIF_length_1, reg[live+2]);
     }
-
-    /* Handle error in guard. */
-    if (ERTS_LIKELY($Fail != 0)) {
-        $JUMP($Fail);
-    }
-
-    /* Handle error in body. */
-    live -= 2;
-    x(0) = x(live);
-    x(1) = x(live+1);
-    x(2) = x(live+2);
-    I = handle_error(c_p, I, reg, gcbif2mfa((void *) bf));
-    goto post_error_handling;
+    //| -no_next
 }
 
 //
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index 7556205063..a1ad75708c 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -42,7 +42,7 @@ typedef Uint16   ErtsHalfDigit;
 #undef  BIG_HAVE_DOUBLE_DIGIT
 typedef Uint32   ErtsHalfDigit;
 #else
-#error "can not determine machine size"
+#error "cannot determine machine size"
 #endif
 
 typedef Uint  dsize_t;	 /* Vector size type */
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 9ff52c92b8..81531f6cc8 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -108,6 +108,7 @@ process_killer(void)
 		    erts_exit(0, "");
 		switch(j) {
 		case 'k':
+                    ASSERT(erts_init_process_id != ERTS_INVALID_PID);
                     /* Send a 'kill' exit signal from init process */
                     erts_proc_sig_send_exit(NULL, erts_init_process_id,
                                             rp->common.id, am_kill, NIL,
diff --git a/erts/emulator/beam/bs_instrs.tab b/erts/emulator/beam/bs_instrs.tab
index 61eb02a7a2..2dde70c2e1 100644
--- a/erts/emulator/beam/bs_instrs.tab
+++ b/erts/emulator/beam/bs_instrs.tab
@@ -102,6 +102,7 @@ i_bs_get_binary_all2(Fail, Ms, Live, Unit, Dst) {
         LIGHT_SWAPIN;
         HEAP_SPACE_VERIFIED(0);
         ASSERT(is_value(_result));
+        $REFRESH_GEN_DEST();
         $Dst = _result;
     } else {
 	HEAP_SPACE_VERIFIED(0);
@@ -123,6 +124,7 @@ i_bs_get_binary2(Fail, Ms, Live, Sz, Flags, Dst) {
     if (is_non_value(_result)) {
         $FAIL($Fail);
     } else {
+        $REFRESH_GEN_DEST();
         $Dst = _result;
     }
 }
@@ -139,6 +141,7 @@ i_bs_get_binary_imm2(Fail, Ms, Live, Sz, Flags, Dst) {
     if (is_non_value(_result)) {
         $FAIL($Fail);
     } else {
+        $REFRESH_GEN_DEST();
         $Dst = _result;
     }
 }
@@ -161,6 +164,7 @@ i_bs_get_float2(Fail, Ms, Live, Sz, Flags, Dst) {
     if (is_non_value(_result)) {
         $FAIL($Fail);
     } else {
+        $REFRESH_GEN_DEST();
         $Dst = _result;
     }
 }
@@ -724,26 +728,34 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
         $FAIL($Fail);
     }
     header = *boxed_val(context);
-    slots = $Slots;
+
+    /* Reserve a slot for the start position. */
+    slots = $Slots + 1;
     live = $Live;
+
     if (header_is_bin_matchstate(header)) {
         ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context);
         Uint actual_slots = HEADER_NUM_SLOTS(header);
+
+        /* We're not compatible with contexts created by bs_start_match3. */
+        ASSERT(actual_slots >= 1);
+
         ms->save_offset[0] = ms->mb.offset;
-        if (actual_slots < slots) {
-            ErlBinMatchState* dst;
+        if (ERTS_UNLIKELY(actual_slots < slots)) {
+            ErlBinMatchState* expanded;
             Uint live = $Live;
             Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
-
             $GC_TEST_PRESERVE(wordsneeded, live, context);
             ms = (ErlBinMatchState *) boxed_val(context);
-            dst = (ErlBinMatchState *) HTOP;
-            *dst = *ms;
+            expanded = (ErlBinMatchState *) HTOP;
+            *expanded = *ms;
             *HTOP = HEADER_BIN_MATCHSTATE(slots);
             HTOP += wordsneeded;
             HEAP_SPACE_VERIFIED(0);
-            $Dst = make_matchstate(dst);
+            context = make_matchstate(expanded);
+            $REFRESH_GEN_DEST();
         }
+        $Dst = context;
     } else if (is_binary_header(header)) {
         Eterm result;
         Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
@@ -758,6 +770,7 @@ bs_start_match.execute(Fail, Live, Slots, Dst) {
         if (is_non_value(result)) {
             $FAIL($Fail);
         }
+        $REFRESH_GEN_DEST();
         $Dst = result;
     } else {
         $FAIL($Fail);
@@ -906,6 +919,7 @@ i_bs_get_integer(Fail, Live, FlagsAndUnit, Ms, Sz, Dst) {
         }
         wordsneeded = 1+WSIZE(NBYTES((Uint) size));
         $GC_TEST_PRESERVE(wordsneeded, $Live, ms);
+        $REFRESH_GEN_DEST();
     }
     mb = ms_matchbuffer(ms);
     LIGHT_SWAPOUT;
@@ -939,6 +953,7 @@ i_bs_get_utf8(Ctx, Fail, Dst) {
     if (is_non_value(result)) {
         $FAIL($Fail);
     }
+    $REFRESH_GEN_DEST();
     $Dst = result;
 }
 
@@ -949,6 +964,7 @@ i_bs_get_utf16(Ctx, Fail, Flags, Dst) {
     if (is_non_value(result)) {
         $FAIL($Fail);
     }
+    $REFRESH_GEN_DEST();
     $Dst = result;
 }
 
@@ -1029,10 +1045,289 @@ i_bs_match_string(Ctx, Fail, Bits, Ptr) {
 
 i_bs_save2(Src, Slot) {
     ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+    ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
     _ms->save_offset[$Slot] = _ms->mb.offset;
 }
 
 i_bs_restore2(Src, Slot) {
     ErlBinMatchState* _ms = (ErlBinMatchState*) boxed_val((Eterm) $Src);
+    ASSERT(HEADER_NUM_SLOTS(_ms->thing_word) > $Slot);
     _ms->mb.offset = _ms->save_offset[$Slot];
 }
+
+bs_get_tail(Src, Dst, Live) {
+    ErlBinMatchBuffer* mb;
+    Uint size, offs;
+    ErlSubBin* sb;
+    Eterm context;
+
+    context = $Src;
+
+    ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+    $GC_TEST_PRESERVE(ERL_SUB_BIN_SIZE, $Live, context);
+
+    mb = ms_matchbuffer(context);
+
+    offs = mb->offset;
+    size = mb->size - offs;
+
+    sb = (ErlSubBin *) HTOP;
+    HTOP += ERL_SUB_BIN_SIZE;
+
+    sb->thing_word = HEADER_SUB_BIN;
+    sb->size = BYTE_OFFSET(size);
+    sb->bitsize = BIT_OFFSET(size);
+    sb->offs = BYTE_OFFSET(offs);
+    sb->bitoffs = BIT_OFFSET(offs);
+    sb->is_writable = 0;
+    sb->orig = mb->orig;
+
+    $REFRESH_GEN_DEST();
+    $Dst = make_binary(sb);
+}
+
+
+%if ARCH_64
+
+i_bs_start_match3_gp(Src, Live, Fail, Dst, Pos) {
+    Eterm context, header;
+    Uint position, live;
+
+    context = $Src;
+    live = $Live;
+
+    if (!is_boxed(context)) {
+        $FAIL($Fail);
+    }
+
+    header = *boxed_val(context);
+
+    if (header_is_bin_matchstate(header)) {
+        ErlBinMatchBuffer *mb;
+
+        ASSERT(HEADER_NUM_SLOTS(header) == 0);
+
+        mb = ms_matchbuffer(context);
+        position = mb->offset;
+
+        $Dst = context;
+    } else if (is_binary_header(header)) {
+        ErlBinMatchState *ms;
+
+        $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+        HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+        c_p->stop = E;	/* Needed for checking in HeapOnlyAlloc(). */
+#endif
+        ms = erts_bs_start_match_3(c_p, context);
+        HTOP = HEAP_TOP(c_p);
+        HEAP_SPACE_VERIFIED(0);
+
+        if (ms == NULL) {
+            $FAIL($Fail);
+        }
+
+        $REFRESH_GEN_DEST();
+        $Dst = make_matchstate(ms);
+        position = ms->mb.offset;
+    } else {
+        $FAIL($Fail);
+    }
+
+    ASSERT(IS_USMALL(0, position));
+    $Pos = make_small(position);
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+    Eterm context, header;
+    Uint live;
+
+    context = $Src;
+    live = $Live;
+
+    if (!is_boxed(context)) {
+        $FAIL($Fail);
+    }
+
+    header = *boxed_val(context);
+
+    if (header_is_bin_matchstate(header)) {
+        ASSERT(HEADER_NUM_SLOTS(header) == 0);
+        $Dst = context;
+    } else if (is_binary_header(header)) {
+        ErlBinMatchState *ms;
+
+        $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(0), live, context);
+        HEAP_TOP(c_p) = HTOP;
+#ifdef DEBUG
+        c_p->stop = E;	/* Needed for checking in HeapOnlyAlloc(). */
+#endif
+        ms = erts_bs_start_match_3(c_p, context);
+        HTOP = HEAP_TOP(c_p);
+        HEAP_SPACE_VERIFIED(0);
+
+        if (ms == NULL) {
+            $FAIL($Fail);
+        }
+
+        $REFRESH_GEN_DEST();
+        $Dst = make_matchstate(ms);
+    } else {
+        $FAIL($Fail);
+    }
+}
+
+bs_set_position(Ctx, Pos) {
+    ErlBinMatchBuffer* mb;
+    Eterm context;
+
+    context = $Ctx;
+    ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+    mb = ms_matchbuffer(context);
+    mb->offset = unsigned_val($Pos);
+}
+
+i_bs_get_position(Ctx, Dst) {
+    ErlBinMatchBuffer* mb;
+    Eterm context;
+
+    context = $Ctx;
+    ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+
+    mb = ms_matchbuffer(context);
+    $Dst = make_small(mb->offset);
+}
+
+%else
+
+#
+# Unlike their 64-bit counterparts, the 32-bit position instructions operate on
+# an offset from the "base position" of the context because storing raw
+# positions would lead to the creation of far too many bigints.
+#
+# When a match context is reused we check whether its position fits into an
+# immediate, and create a new match context if it does not. This means we only
+# have to allocate stuff roughly once every 16MB rather than every time we
+# match at a position beyond 16MB.
+#
+
+bs_set_position(Ctx, Pos) {
+    Eterm context, position;
+    ErlBinMatchState *ms;
+
+    context = $Ctx;
+    position = $Pos;
+
+    ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+    ms = (ErlBinMatchState*)boxed_val(context);
+
+    if (ERTS_LIKELY(is_small(position))) {
+        ms->mb.offset = ms->save_offset[0] + unsigned_val(position);
+    } else {
+        ASSERT(is_big(position));
+        ms->mb.offset = ms->save_offset[0] + *BIG_V(big_val(position));
+    }
+}
+
+bs_get_position(Ctx, Dst, Live) {
+    ErlBinMatchState *ms;
+    Eterm context;
+    Uint position;
+
+    context = $Ctx;
+
+    ASSERT(header_is_bin_matchstate(*boxed_val(context)));
+    ms = (ErlBinMatchState*)boxed_val(context);
+
+    position = ms->mb.offset - ms->save_offset[0];
+
+    if (ERTS_LIKELY(IS_USMALL(0, position))) {
+        $Dst = make_small(position);
+    } else {
+        Eterm *hp;
+
+        $GC_TEST_PRESERVE(BIG_UINT_HEAP_SIZE, $Live, context);
+
+        hp = HTOP;
+        HTOP += BIG_UINT_HEAP_SIZE;
+
+        *hp = make_pos_bignum_header(1);
+        BIG_DIGIT(hp, 0) = position;
+
+        $REFRESH_GEN_DEST();
+        $Dst = make_big(hp);
+    }
+}
+
+i_bs_start_match3(Src, Live, Fail, Dst) {
+    Eterm context, header;
+    Uint live;
+
+    context = $Src;
+    live = $Live;
+
+    if (!is_boxed(context)) {
+        $FAIL($Fail);
+    }
+
+    header = *boxed_val(context);
+
+    if (header_is_bin_matchstate(header)) {
+        ErlBinMatchState *current_ms;
+        Uint position;
+
+        ASSERT(HEADER_NUM_SLOTS(header) == 1);
+
+        current_ms = (ErlBinMatchState*)boxed_val(context);
+        position = current_ms->mb.offset - current_ms->save_offset[0];
+
+        if (ERTS_LIKELY(IS_USMALL(0, position))) {
+            $Dst = context;
+        } else {
+            ErlBinMatchState *new_ms;
+
+            $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+            current_ms = (ErlBinMatchState*)boxed_val(context);
+
+            new_ms = (ErlBinMatchState*)HTOP;
+            HTOP += ERL_BIN_MATCHSTATE_SIZE(1);
+
+            new_ms->thing_word = HEADER_BIN_MATCHSTATE(1);
+            new_ms->save_offset[0] = current_ms->mb.offset;
+            new_ms->mb = current_ms->mb;
+
+            $REFRESH_GEN_DEST();
+            $Dst = make_matchstate(new_ms);
+        }
+    } else if (is_binary_header(header)) {
+        Eterm result;
+
+        $GC_TEST_PRESERVE(ERL_BIN_MATCHSTATE_SIZE(1), live, context);
+        HEAP_TOP(c_p) = HTOP;
+
+#ifdef DEBUG
+        c_p->stop = E;	/* Needed for checking in HeapOnlyAlloc(). */
+#endif
+
+        /* We intentionally use erts_bs_start_match_2 so that we can use
+         * save_offset as a base for all saved positions on this context,
+         * allowing us to avoid bigints for much longer. */
+        result = erts_bs_start_match_2(c_p, context, 1);
+
+        HTOP = HEAP_TOP(c_p);
+        HEAP_SPACE_VERIFIED(0);
+
+        if (is_non_value(result)) {
+            $FAIL($Fail);
+        }
+
+        $REFRESH_GEN_DEST();
+        $Dst = result;
+    } else {
+        $FAIL($Fail);
+    }
+}
+
+%endif
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index 0633bff3c2..15642e1669 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -574,9 +574,7 @@ int erts_do_net_exits(DistEntry *dep, Eterm reason)
         }
 
 	if (dep->state == ERTS_DE_STATE_EXITING) {
-#ifdef DEBUG
 	    ASSERT(erts_atomic32_read_nob(&dep->qflgs) & ERTS_DE_QFLG_EXIT);
-#endif
 	}
 	else {
 	    dep->state = ERTS_DE_STATE_EXITING;
@@ -1343,10 +1341,7 @@ int erts_net_message(Port *prt,
                                                  from, to);
             ASSERT(ldp->a.other.item == to);
             ASSERT(eq(ldp->b.other.item, from));
-#ifdef DEBUG
-            code =
-#endif
-                erts_link_dist_insert(&ldp->a, dep->mld);
+            code = erts_link_dist_insert(&ldp->a, dep->mld);
             ASSERT(code);
 
             if (erts_proc_sig_send_link(NULL, to, &ldp->b))
@@ -1354,10 +1349,7 @@ int erts_net_message(Port *prt,
 
             /* Failed to send signal; cleanup and reply noproc... */
 
-#ifdef DEBUG
-            code =
-#endif
-                erts_link_dist_delete(&ldp->a);
+            code = erts_link_dist_delete(&ldp->a);
             ASSERT(code);
             erts_link_release_both(ldp);
         }
@@ -1904,6 +1896,12 @@ erts_dsig_send(ErtsDSigData *dsdp, struct erts_dsig_send_context* ctx)
 	    ASSERT(ctx->obuf->ext_endp <= &ctx->obuf->data[0] + ctx->data_size);
 
 	    ctx->data_size = ctx->obuf->ext_endp - ctx->obuf->extp;
+	    if (ctx->data_size > (Uint) INT_MAX) {
+		free_dist_obuf(ctx->obuf);
+                ctx->obuf = NULL;
+		retval = ERTS_DSIG_SEND_TOO_LRG;
+		goto done;
+	    }
 
             ctx->obuf->hopefull_flags = ctx->u.ec.hopefull_flags;
 	    /*
@@ -3881,28 +3879,22 @@ monitor_node(Process* p, Eterm Node, Eterm Bool, Eterm Options)
                                                   Node);
             mdep = (ErtsMonitorDataExtended *) erts_monitor_to_data(mon);
             if (created) {
-#ifdef DEBUG
                 int inserted =
-#endif
                     erts_monitor_dist_insert(&mdep->md.target, dep->mld);
-                ASSERT(inserted);
+                ASSERT(inserted); (void)inserted;
                 ASSERT(mdep->dist->connection_id == dep->connection_id);
             }
             else if (mdep->dist->connection_id != dep->connection_id) {
                 ErtsMonitorDataExtended *mdep2;
                 ErtsMonitor *mon2;
-#ifdef DEBUG
                 int inserted;
-#endif
                 mdep2 = ((ErtsMonitorDataExtended *)
                          erts_monitor_create(ERTS_MON_TYPE_NODE, NIL,
                                              p->common.id, Node, NIL));
                 mon2 = &mdep2->md.origin;
-#ifdef DEBUG
                 inserted =
-#endif
                     erts_monitor_dist_insert(&mdep->md.target, dep->mld);
-                ASSERT(inserted);
+                ASSERT(inserted); (void)inserted;
                 ASSERT(mdep2->dist->connection_id == dep->connection_id);
 
                 mdep2->uptr.node_monitors = mdep->uptr.node_monitors;
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index d4d7874a70..845fab229a 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -378,6 +378,7 @@ typedef struct {
 #define ERTS_DSIG_SEND_OK	0
 #define ERTS_DSIG_SEND_YIELD	1
 #define ERTS_DSIG_SEND_CONTINUE 2
+#define ERTS_DSIG_SEND_TOO_LRG  3
 
 extern int erts_dsig_send_link(ErtsDSigData *, Eterm, Eterm);
 extern int erts_dsig_send_msg(Eterm, Eterm, ErtsSendContext*);
diff --git a/erts/emulator/beam/erl_afit_alloc.c b/erts/emulator/beam/erl_afit_alloc.c
index 38289ea78a..f07137c883 100644
--- a/erts/emulator/beam/erl_afit_alloc.c
+++ b/erts/emulator/beam/erl_afit_alloc.c
@@ -102,6 +102,8 @@ erts_afalc_start(AFAllctr_t *afallctr,
     allctr->add_mbc                     = NULL;
     allctr->remove_mbc                  = NULL;
     allctr->largest_fblk_in_mbc         = NULL;
+    allctr->first_fblk_in_mbc           = NULL;
+    allctr->next_fblk_in_mbc            = NULL;
     allctr->init_atoms			= init_atoms;
 
 #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 9e36d5e0d1..e6169ebeaa 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -64,9 +64,6 @@
 #  error "Too many schedulers; cannot create that many pref alloc instances"
 #endif
 
-#define ERTS_ALC_FIX_TYPE_IX(T) \
-  (ERTS_ALC_T2N((T)) - ERTS_ALC_N_MIN_A_FIXED_SIZE)
-
 #define ERTS_ALC_DEFAULT_MAX_THR_PREF ERTS_MAX_NO_OF_SCHEDULERS
 
 #if defined(SMALL_MEMORY) || defined(PURIFY) || defined(VALGRIND)
@@ -156,20 +153,13 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(aireq,
 
 ErtsAlcType_t erts_fix_core_allocator_ix;
 
-enum allctr_type {
-    GOODFIT,
-    BESTFIT,
-    AFIT,
-    FIRSTFIT
-};
-
 struct au_init {
     int enable;
     int thr_spec;
     int disable_allowed;
     int thr_spec_allowed;
     int carrier_migration_allowed;
-    enum allctr_type	atype;
+    ErtsAlcStrat_t	astrat;
     struct {
 	AllctrInit_t	util;
 	GFAllctrInit_t	gf;
@@ -219,7 +209,9 @@ typedef struct {
     struct au_init test_alloc;
 } erts_alc_hndl_args_init_t;
 
-#define ERTS_AU_INIT__ {0, 0, 1, 1, 1, GOODFIT, DEFAULT_ALLCTR_INIT, {1,1,1,1}}
+#define ERTS_AU_INIT__ {0, 0, 1, 1, 1, \
+                        ERTS_ALC_S_GOODFIT, DEFAULT_ALLCTR_INIT, \
+                        {1,1,1,1}}
 
 #define SET_DEFAULT_ALLOC_OPTS(IP)					\
 do {									\
@@ -233,7 +225,7 @@ set_default_sl_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= GOODFIT;
+    ip->astrat			= ERTS_ALC_S_GOODFIT;
     ip->init.util.name_prefix	= "sl_";
     ip->init.util.alloc_no	= ERTS_ALC_A_SHORT_LIVED;
 #ifndef SMALL_MEMORY
@@ -252,7 +244,7 @@ set_default_std_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.util.name_prefix	= "std_";
     ip->init.util.alloc_no	= ERTS_ALC_A_STANDARD;
 #ifndef SMALL_MEMORY
@@ -270,7 +262,7 @@ set_default_ll_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 0;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.bf.ao		= 1;
     ip->init.util.ramv		= 0;
     ip->init.util.mmsbc		= 0;
@@ -299,7 +291,7 @@ set_default_literal_alloc_opts(struct au_init *ip)
     ip->disable_allowed         = 0;
     ip->thr_spec_allowed        = 0;
     ip->carrier_migration_allowed = 0;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.bf.ao		= 1;
     ip->init.util.ramv		= 0;
     ip->init.util.mmsbc		= 0;
@@ -349,7 +341,7 @@ set_default_exec_alloc_opts(struct au_init *ip)
     ip->disable_allowed         = 0;
     ip->thr_spec_allowed        = 0;
     ip->carrier_migration_allowed = 0;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.bf.ao		= 1;
     ip->init.util.ramv		= 0;
     ip->init.util.mmsbc		= 0;
@@ -378,7 +370,7 @@ set_default_temp_alloc_opts(struct au_init *ip)
     ip->thr_spec		= 1;
     ip->disable_allowed         = 0;
     ip->carrier_migration_allowed = 0;
-    ip->atype			= AFIT;
+    ip->astrat			= ERTS_ALC_S_AFIT;
     ip->init.util.name_prefix	= "temp_";
     ip->init.util.alloc_no	= ERTS_ALC_A_TEMPORARY;
 #ifndef SMALL_MEMORY
@@ -397,7 +389,7 @@ set_default_eheap_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= GOODFIT;
+    ip->astrat			= ERTS_ALC_S_GOODFIT;
     ip->init.util.name_prefix	= "eheap_";
     ip->init.util.alloc_no	= ERTS_ALC_A_EHEAP;
 #ifndef SMALL_MEMORY
@@ -416,7 +408,7 @@ set_default_binary_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.util.name_prefix	= "binary_";
     ip->init.util.alloc_no	= ERTS_ALC_A_BINARY;
 #ifndef SMALL_MEMORY
@@ -435,7 +427,7 @@ set_default_ets_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.util.name_prefix	= "ets_";
     ip->init.util.alloc_no	= ERTS_ALC_A_ETS;
 #ifndef SMALL_MEMORY
@@ -453,7 +445,7 @@ set_default_driver_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.util.name_prefix	= "driver_";
     ip->init.util.alloc_no	= ERTS_ALC_A_DRIVER;
 #ifndef SMALL_MEMORY
@@ -473,7 +465,7 @@ set_default_fix_alloc_opts(struct au_init *ip,
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= AU_ALLOC_DEFAULT_ENABLE(1);
     ip->thr_spec		= 1;
-    ip->atype			= BESTFIT;
+    ip->astrat			= ERTS_ALC_S_BESTFIT;
     ip->init.bf.ao = 1;
     ip->init.util.name_prefix	= "fix_";
     ip->init.util.fix_type_size	= fix_type_sizes;
@@ -493,7 +485,7 @@ set_default_test_alloc_opts(struct au_init *ip)
     SET_DEFAULT_ALLOC_OPTS(ip);
     ip->enable			= 0; /* Disabled by default */
     ip->thr_spec		= -1 * erts_no_schedulers;
-    ip->atype			= FIRSTFIT;
+    ip->astrat			= ERTS_ALC_S_FIRSTFIT;
     ip->init.aoff.crr_order     = FF_AOFF;
     ip->init.aoff.blk_order     = FF_BF;
     ip->init.util.name_prefix	= "test_";
@@ -552,8 +544,8 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 static void
 refuse_af_strategy(struct au_init *init)
 {
-    if (init->atype == AFIT)
-	init->atype = GOODFIT;
+    if (init->astrat == ERTS_ALC_S_AFIT)
+	init->astrat = ERTS_ALC_S_GOODFIT;
 }
 
 #ifdef HARD_DEBUG
@@ -576,7 +568,10 @@ static void adjust_fix_alloc_sizes(UWord extra_block_size)
 	    for (i=0; i < tspec->size; i++) {
 		Allctr_t* allctr = tspec->allctr[i];
 		for (j=0; j < ERTS_ALC_NO_FIXED_SIZES; ++j) {
-		    allctr->fix[j].type_size += extra_block_size;
+                    size_t size = allctr->fix[j].type_size;
+                    size = MAX(size + extra_block_size,
+                               sizeof(ErtsAllctrDDBlock_t));
+		    allctr->fix[j].type_size = size;
 		}
 	    }
 	}
@@ -584,8 +579,11 @@ static void adjust_fix_alloc_sizes(UWord extra_block_size)
 	{
 	    Allctr_t* allctr = erts_allctrs_info[ERTS_ALC_A_FIXED_SIZE].extra;
 	    for (j=0; j < ERTS_ALC_NO_FIXED_SIZES; ++j) {
-		allctr->fix[j].type_size += extra_block_size;
-	    }	
+                size_t size = allctr->fix[j].type_size;
+                size = MAX(size + extra_block_size,
+                           sizeof(ErtsAllctrDDBlock_t));
+                allctr->fix[j].type_size = size;
+	    }
 	}
     }
 }
@@ -597,7 +595,7 @@ strategy_support_carrier_migration(struct au_init *auip)
      * Currently only aoff* and ageff* support carrier
      * migration, i.e, type AOFIRSTFIT.
      */
-    return auip->atype == FIRSTFIT;
+    return auip->astrat == ERTS_ALC_S_FIRSTFIT;
 }
 
 static ERTS_INLINE void
@@ -612,7 +610,7 @@ adjust_carrier_migration_support(struct au_init *auip)
 	 */
 	if (!strategy_support_carrier_migration(auip)) {
 	    /* Default to aoffcbf */
-	    auip->atype = FIRSTFIT;
+	    auip->astrat = ERTS_ALC_S_FIRSTFIT;
 	    auip->init.aoff.crr_order = FF_AOFF;
 	    auip->init.aoff.blk_order = FF_BF;
 	}
@@ -1018,7 +1016,7 @@ start_au_allocator(ErtsAlcType_t alctr_n,
     int i;
     int size = 1;
     void *as0;
-    enum allctr_type atype;
+    ErtsAlcStrat_t astrat;
     ErtsAllocatorFunctions_t *af = &erts_allctrs[alctr_n];
     ErtsAllocatorInfo_t *ai = &erts_allctrs_info[alctr_n];
     ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[alctr_n];
@@ -1077,7 +1075,7 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 
     for (i = 0; i < size; i++) {
 	Allctr_t *as;
-	atype = init->atype;
+	astrat = init->astrat;
 
 	if (!init->thr_spec)
 	    as0 = state;
@@ -1094,8 +1092,8 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 		if (i != 0)
 		    init->init.util.ts = 0;
 		else {
-		    if (atype == AFIT)
-			atype = GOODFIT;
+		    if (astrat == ERTS_ALC_S_AFIT)
+			astrat = ERTS_ALC_S_GOODFIT;
 		    init->init.util.ts = 1;
 		}
 		init->init.util.tspec = init->thr_spec + 1;
@@ -1109,25 +1107,26 @@ start_au_allocator(ErtsAlcType_t alctr_n,
 			 (((char *) fix_lists) + fix_list_size));
 	}
 
+        init->init.util.alloc_strat = astrat;
 	init->init.util.ix = i;
 
-	switch (atype) {
-	case GOODFIT:
+	switch (astrat) {
+	case ERTS_ALC_S_GOODFIT:
 	    as = erts_gfalc_start((GFAllctr_t *) as0,
 					   &init->init.gf,
 					   &init->init.util);
 	    break;
-	case BESTFIT:
+	case ERTS_ALC_S_BESTFIT:
 	    as = erts_bfalc_start((BFAllctr_t *) as0,
 					   &init->init.bf,
 					   &init->init.util);
 	    break;
-	case AFIT:
+	case ERTS_ALC_S_AFIT:
 	    as = erts_afalc_start((AFAllctr_t *) as0,
 					   &init->init.af,
 					   &init->init.util);
 	    break;
-	case FIRSTFIT:
+	case ERTS_ALC_S_FIRSTFIT:
 	    as = erts_aoffalc_start((AOFFAllctr_t *) as0,
 					     &init->init.aoff,
 					     &init->init.util);
@@ -1363,51 +1362,59 @@ handle_au_arg(struct au_init *auip,
 	else if(has_prefix("as", sub_param)) {
 	    char *alg = get_value(sub_param + 2, argv, ip);
 	    if (sys_strcmp("bf", alg) == 0) {
-		auip->atype = BESTFIT;
+		auip->astrat = ERTS_ALC_S_BESTFIT;
 		auip->init.bf.ao = 0;
 	    }
 	    else if (sys_strcmp("aobf", alg) == 0) {
-		auip->atype = BESTFIT;
+		auip->astrat = ERTS_ALC_S_BESTFIT;
 		auip->init.bf.ao = 1;
 	    }
 	    else if (sys_strcmp("gf", alg) == 0) {
-		auip->atype = GOODFIT;
+		auip->astrat = ERTS_ALC_S_GOODFIT;
 	    }
 	    else if (sys_strcmp("af", alg) == 0) {
-		auip->atype = AFIT;
+		auip->astrat = ERTS_ALC_S_AFIT;
 	    }
 	    else if (sys_strcmp("aoff", alg) == 0) {
-		auip->atype = FIRSTFIT;
+		auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AOFF;
 		auip->init.aoff.blk_order = FF_AOFF;
 	    }
 	    else if (sys_strcmp("aoffcbf", alg) == 0) {
-		auip->atype = FIRSTFIT;
+		auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AOFF;
 		auip->init.aoff.blk_order = FF_BF;
 	    }
 	    else if (sys_strcmp("aoffcaobf", alg) == 0) {
-		auip->atype = FIRSTFIT;
+		auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AOFF;
 		auip->init.aoff.blk_order = FF_AOBF;
 	    }
             else if (sys_strcmp("ageffcaoff", alg) == 0) {
-                auip->atype = FIRSTFIT;
+                auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AGEFF;
 		auip->init.aoff.blk_order = FF_AOFF;
             }
             else if (sys_strcmp("ageffcbf", alg) == 0) {
-                auip->atype = FIRSTFIT;
+                auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AGEFF;
 		auip->init.aoff.blk_order = FF_BF;
             }
             else if (sys_strcmp("ageffcaobf", alg) == 0) {
-                auip->atype = FIRSTFIT;
+                auip->astrat = ERTS_ALC_S_FIRSTFIT;
 		auip->init.aoff.crr_order = FF_AGEFF;
 		auip->init.aoff.blk_order = FF_AOBF;
             }
 	    else {
-		bad_value(param, sub_param + 1, alg);
+                if (auip->init.util.alloc_no == ERTS_ALC_A_TEST
+                    && sys_strcmp("chaosff", alg) == 0) {
+                    auip->astrat = ERTS_ALC_S_FIRSTFIT;
+                    auip->init.aoff.crr_order = FF_CHAOS;
+                    auip->init.aoff.blk_order = FF_CHAOS;
+                }
+                else {
+                    bad_value(param, sub_param + 1, alg);
+                }
 	    }
 	    if (!strategy_support_carrier_migration(auip))
 		auip->init.util.acul = 0;
@@ -2030,33 +2037,55 @@ erts_realloc_n_enomem(ErtsAlcType_t n, void *ptr, Uint size)
 }
 
 static ERTS_INLINE UWord
-alcu_size(ErtsAlcType_t ai, ErtsAlcUFixInfo_t *fi, int fisz)
+alcu_size(ErtsAlcType_t alloc_no, ErtsAlcUFixInfo_t *fi, int fisz)
 {
-    UWord res = 0;
+    UWord res;
+    int ai;
 
-    ASSERT(erts_allctrs_info[ai].enabled);
-    ASSERT(erts_allctrs_info[ai].alloc_util);
+    if (!erts_allctrs_info[alloc_no].thr_spec) {
+        AllctrSize_t size;
+        Allctr_t *allctr;
 
-    if (!erts_allctrs_info[ai].thr_spec) {
-	Allctr_t *allctr = erts_allctrs_info[ai].extra;
-	AllctrSize_t asize;
-	erts_alcu_current_size(allctr, &asize, fi, fisz);
-	res += asize.blocks;
+        allctr = erts_allctrs_info[alloc_no].extra;
+        erts_alcu_current_size(allctr, &size, fi, fisz);
+
+        return size.blocks;
     }
-    else {
-	ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[ai];
-	int i;
 
-	ASSERT(tspec->enabled);
+    res = 0;
 
-	for (i = tspec->size - 1; i >= 0; i--) {
-	    Allctr_t *allctr = tspec->allctr[i];
-	    AllctrSize_t asize;
-	    if (allctr) {
-		erts_alcu_current_size(allctr, &asize, fi, fisz);
-		res += asize.blocks;
-	    }
-	}
+    /* Thread-specific allocators can migrate carriers across types, so we have
+     * to visit every allocator type to gather information on blocks that were
+     * allocated by us. */
+    for (ai = ERTS_ALC_A_MIN; ai < ERTS_ALC_A_MAX; ai++) {
+        ErtsAllocatorThrSpec_t *tspec;
+        Allctr_t *allctr;
+        int i;
+
+        if (!erts_allctrs_info[ai].thr_spec) {
+            continue;
+        }
+
+        tspec = &erts_allctr_thr_spec[ai];
+        ASSERT(tspec->enabled);
+
+        for (i = tspec->size - 1; i >= 0; i--) {
+            allctr = tspec->allctr[i];
+
+            if (allctr) {
+                AllctrSize_t size;
+
+                if (ai == alloc_no) {
+                    erts_alcu_current_size(allctr, &size, fi, fisz);
+                } else {
+                    erts_alcu_foreign_size(allctr, alloc_no, &size);
+                }
+
+                ASSERT(((SWord)size.blocks) >= 0);
+
+                res += size.blocks;
+            }
+        }
     }
 
     return res;
@@ -2400,6 +2429,7 @@ erts_memory(fmtfn_t *print_to_p, void *print_to_arg, void *proc, Eterm earg)
     }
 
     if (want_tot_or_sys) {
+        ASSERT(size.total >= size.processes);
 	size.system = size.total - size.processes;
     }
 
@@ -3459,29 +3489,29 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	switch (op) {
 	case 0xf00:
 	    if (((Allctr_t *) a1)->thread_safe)
-		return (UWord) erts_alcu_alloc_ts(ERTS_ALC_T_UNDEF,
+		return (UWord) erts_alcu_alloc_ts(ERTS_ALC_T_TEST,
 							  (void *) a1,
 							  (Uint) a2);
 	    else
-		return (UWord) erts_alcu_alloc(ERTS_ALC_T_UNDEF,
+		return (UWord) erts_alcu_alloc(ERTS_ALC_T_TEST,
 						       (void *) a1,
 						       (Uint) a2);
 	case 0xf01:
 	    if (((Allctr_t *) a1)->thread_safe)
-		return (UWord) erts_alcu_realloc_ts(ERTS_ALC_T_UNDEF,
+		return (UWord) erts_alcu_realloc_ts(ERTS_ALC_T_TEST,
 							    (void *) a1,
 							    (void *) a2,
 							    (Uint) a3);
 	    else
-		return (UWord) erts_alcu_realloc(ERTS_ALC_T_UNDEF,
+		return (UWord) erts_alcu_realloc(ERTS_ALC_T_TEST,
 							 (void *) a1,
 							 (void *) a2,
 							 (Uint) a3);
 	case 0xf02:
 	    if (((Allctr_t *) a1)->thread_safe)
-		erts_alcu_free_ts(ERTS_ALC_T_UNDEF, (void *) a1, (void *) a2);
+		erts_alcu_free_ts(ERTS_ALC_T_TEST, (void *) a1, (void *) a2);
 	    else
-		erts_alcu_free(ERTS_ALC_T_UNDEF, (void *) a1, (void *) a2);
+		erts_alcu_free(ERTS_ALC_T_TEST, (void *) a1, (void *) a2);
 	    return 0;
 	case 0xf03: {
 	    Allctr_t *allctr;
@@ -3489,8 +3519,10 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 
 	    SET_DEFAULT_ALLOC_OPTS(&init);
 	    init.enable = 1;
-	    init.atype = GOODFIT;
+	    init.astrat = ERTS_ALC_S_GOODFIT;
 	    init.init.util.name_prefix = (char *) a1;
+	    init.init.util.alloc_no = ERTS_ALC_A_TEST;
+	    init.init.util.alloc_strat = init.astrat;
 	    init.init.util.ts = 1;
 	    if ((char **) a3) {
 		char **argv = (char **) a3;
@@ -3504,31 +3536,31 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 		}
 	    }
 
-	    switch (init.atype) {
-	    case GOODFIT:
+	    switch (init.astrat) {
+	    case ERTS_ALC_S_GOODFIT:
 		allctr = erts_gfalc_start((GFAllctr_t *)
-					  erts_alloc(ERTS_ALC_T_UNDEF,
+					  erts_alloc(ERTS_ALC_T_TEST,
 						     sizeof(GFAllctr_t)),
 					  &init.init.gf,
 					  &init.init.util);
 		break;
-	    case BESTFIT:
+	    case ERTS_ALC_S_BESTFIT:
 		allctr = erts_bfalc_start((BFAllctr_t *)
-					  erts_alloc(ERTS_ALC_T_UNDEF,
+					  erts_alloc(ERTS_ALC_T_TEST,
 						     sizeof(BFAllctr_t)),
 					  &init.init.bf,
 					  &init.init.util);
 		break;
-	    case AFIT:
+	    case ERTS_ALC_S_AFIT:
 		allctr = erts_afalc_start((AFAllctr_t *)
-					  erts_alloc(ERTS_ALC_T_UNDEF,
+					  erts_alloc(ERTS_ALC_T_TEST,
 							    sizeof(AFAllctr_t)),
 					  &init.init.af,
 					  &init.init.util);
 		break;
-	    case FIRSTFIT:
+	    case ERTS_ALC_S_FIRSTFIT:
 		allctr = erts_aoffalc_start((AOFFAllctr_t *)
-					  erts_alloc(ERTS_ALC_T_UNDEF,
+					  erts_alloc(ERTS_ALC_T_TEST,
 						     sizeof(AOFFAllctr_t)),
 					  &init.init.aoff,
 					  &init.init.util);
@@ -3544,7 +3576,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	}
 	case 0xf04:
 	    erts_alcu_stop((Allctr_t *) a1);
-	    erts_free(ERTS_ALC_T_UNDEF, (void *) a1);
+	    erts_free(ERTS_ALC_T_TEST, (void *) a1);
 	    break;
 	case 0xf05: return (UWord) 1;
 	case 0xf06: return (UWord) ((Allctr_t *) a1)->thread_safe;
@@ -3554,7 +3586,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	case 0xf07: return (UWord) ((Allctr_t *) a1)->thread_safe;
 #endif
 	case 0xf08: {
-	    ethr_mutex *mtx = erts_alloc(ERTS_ALC_T_UNDEF, sizeof(ethr_mutex));
+	    ethr_mutex *mtx = erts_alloc(ERTS_ALC_T_TEST, sizeof(ethr_mutex));
 	    if (ethr_mutex_init(mtx) != 0)
 		ERTS_ALC_TEST_ABORT;
 	    return (UWord) mtx;
@@ -3563,7 +3595,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	    ethr_mutex *mtx = (ethr_mutex *) a1;
 	    if (ethr_mutex_destroy(mtx) != 0)
 		ERTS_ALC_TEST_ABORT;
-	    erts_free(ERTS_ALC_T_UNDEF, (void *) mtx);
+	    erts_free(ERTS_ALC_T_TEST, (void *) mtx);
 	    break;
 	}
 	case 0xf0a:
@@ -3573,7 +3605,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	    ethr_mutex_unlock((ethr_mutex *) a1);
 	    break;
 	case 0xf0c: {
-	    ethr_cond *cnd = erts_alloc(ERTS_ALC_T_UNDEF, sizeof(ethr_cond));
+	    ethr_cond *cnd = erts_alloc(ERTS_ALC_T_TEST, sizeof(ethr_cond));
 	    if (ethr_cond_init(cnd) != 0)
 		ERTS_ALC_TEST_ABORT;
 	    return (UWord) cnd;
@@ -3582,7 +3614,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	    ethr_cond *cnd = (ethr_cond *) a1;
 	    if (ethr_cond_destroy(cnd) != 0)
 		ERTS_ALC_TEST_ABORT;
-	    erts_free(ERTS_ALC_T_UNDEF, (void *) cnd);
+	    erts_free(ERTS_ALC_T_TEST, (void *) cnd);
 	    break;
 	}
 	case 0xf0e:
@@ -3596,7 +3628,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	    break;
 	}
 	case 0xf10: {
-	    ethr_tid *tid = erts_alloc(ERTS_ALC_T_UNDEF, sizeof(ethr_tid));
+	    ethr_tid *tid = erts_alloc(ERTS_ALC_T_TEST, sizeof(ethr_tid));
 	    if (ethr_thr_create(tid,
 				(void * (*)(void *)) a1,
 				(void *) a2,
@@ -3608,7 +3640,7 @@ UWord erts_alc_test(UWord op, UWord a1, UWord a2, UWord a3)
 	    ethr_tid *tid = (ethr_tid *) a1;
 	    if (ethr_thr_join(*tid, NULL) != 0)
 		ERTS_ALC_TEST_ABORT;
-	    erts_free(ERTS_ALC_T_UNDEF, (void *) tid);
+	    erts_free(ERTS_ALC_T_TEST, (void *) tid);
 	    break;
 	}
 	case 0xf12:
@@ -3960,9 +3992,10 @@ check_memory_fence(void *ptr, Uint *size, ErtsAlcType_t n, int func)
 static ErtsAllocatorFunctions_t real_allctrs[ERTS_ALC_A_MAX+1];
 
 static void *
-debug_alloc(ErtsAlcType_t n, void *extra, Uint size)
+debug_alloc(ErtsAlcType_t type, void *extra, Uint size)
 {
     ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra;
+    ErtsAlcType_t n;
     Uint dsize;
     void *res;
 
@@ -3970,9 +4003,11 @@ debug_alloc(ErtsAlcType_t n, void *extra, Uint size)
     erts_hdbg_chk_blks();
 #endif
 
+    n = ERTS_ALC_T2N(type);
+
     ASSERT(ERTS_ALC_N_MIN <= n && n <= ERTS_ALC_N_MAX);
     dsize = size + FENCE_SZ;
-    res = (*real_af->alloc)(n, real_af->extra, dsize);
+    res = (*real_af->alloc)(type, real_af->extra, dsize);
 
     res = set_memory_fence(res, size, n);
 
@@ -3986,14 +4021,17 @@ debug_alloc(ErtsAlcType_t n, void *extra, Uint size)
 
 
 static void *
-debug_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size)
+debug_realloc(ErtsAlcType_t type, void *extra, void *ptr, Uint size)
 {
     ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra;
+    ErtsAlcType_t n;
     Uint dsize;
     Uint old_size;
     void *dptr;
     void *res;
 
+    n = ERTS_ALC_T2N(type);
+
     ASSERT(ERTS_ALC_N_MIN <= n && n <= ERTS_ALC_N_MAX);
 
     dsize = size + FENCE_SZ;
@@ -4003,12 +4041,12 @@ debug_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size)
     erts_hdbg_chk_blks();
 #endif
 
-    if (old_size > size)
+    if (ptr && old_size > size)
 	sys_memset((void *) (((char *) ptr) + size),
 		   0xf,
 		   sizeof(Uint) + old_size - size);
 
-    res = (*real_af->realloc)(n, real_af->extra, dptr, dsize);
+    res = (*real_af->realloc)(type, real_af->extra, dptr, dsize);
 
     res = set_memory_fence(res, size, n);
 
@@ -4021,12 +4059,16 @@ debug_realloc(ErtsAlcType_t n, void *extra, void *ptr, Uint size)
 }
 
 static void
-debug_free(ErtsAlcType_t n, void *extra, void *ptr)
+debug_free(ErtsAlcType_t type, void *extra, void *ptr)
 {
     ErtsAllocatorFunctions_t *real_af = (ErtsAllocatorFunctions_t *) extra;
+    ErtsAlcType_t n;
     void *dptr;
     Uint size;
-    int free_pattern = n;
+    int free_pattern;
+
+    n = ERTS_ALC_T2N(type);
+    free_pattern = n;
 
     ASSERT(ERTS_ALC_N_MIN <= n && n <= ERTS_ALC_N_MAX);
 
@@ -4044,7 +4086,7 @@ debug_free(ErtsAlcType_t n, void *extra, void *ptr)
 #endif
     sys_memset((void *) dptr, free_pattern, size + FENCE_SZ);
 
-    (*real_af->free)(n, real_af->extra, dptr);
+    (*real_af->free)(type, real_af->extra, dptr);
 
 #ifdef PRINT_OPS
     fprintf(stderr, "free(%s, 0x%lx)\r\n", ERTS_ALC_N2TD(n), (Uint) ptr);
diff --git a/erts/emulator/beam/erl_alloc.h b/erts/emulator/beam/erl_alloc.h
index fcb58ff58a..c13cf3f5b0 100644
--- a/erts/emulator/beam/erl_alloc.h
+++ b/erts/emulator/beam/erl_alloc.h
@@ -26,10 +26,23 @@
 #define ERL_THR_PROGRESS_TSD_TYPE_ONLY
 #include "erl_thr_progress.h"
 #undef ERL_THR_PROGRESS_TSD_TYPE_ONLY
-#include "erl_alloc_util.h"
 #include "erl_threads.h"
 #include "erl_mmap.h"
 
+typedef enum {
+    ERTS_ALC_S_INVALID = 0,
+
+    ERTS_ALC_S_GOODFIT,
+    ERTS_ALC_S_BESTFIT,
+    ERTS_ALC_S_AFIT,
+    ERTS_ALC_S_FIRSTFIT,
+
+    ERTS_ALC_S_MIN = ERTS_ALC_S_GOODFIT,
+    ERTS_ALC_S_MAX = ERTS_ALC_S_FIRSTFIT
+} ErtsAlcStrat_t;
+
+#include "erl_alloc_util.h"
+
 #ifdef DEBUG
 #  undef ERTS_ALC_WANT_INLINE
 #  define ERTS_ALC_WANT_INLINE 0
@@ -52,6 +65,14 @@
 #define ERTS_ALC_NO_FIXED_SIZES \
   (ERTS_ALC_N_MAX_A_FIXED_SIZE - ERTS_ALC_N_MIN_A_FIXED_SIZE + 1)
 
+#define ERTS_ALC_IS_FIX_TYPE(T) \
+    (ERTS_ALC_T2N(T) >= ERTS_ALC_N_MIN_A_FIXED_SIZE && \
+     ERTS_ALC_T2N(T) <= ERTS_ALC_N_MAX_A_FIXED_SIZE)
+
+#define ERTS_ALC_FIX_TYPE_IX(T) \
+  (ASSERT(ERTS_ALC_IS_FIX_TYPE(T)), \
+   ERTS_ALC_T2N((T)) - ERTS_ALC_N_MIN_A_FIXED_SIZE)
+
 void erts_sys_alloc_init(void);
 void *erts_sys_alloc(ErtsAlcType_t, void *, Uint);
 void *erts_sys_realloc(ErtsAlcType_t, void *, void *, Uint);
@@ -228,7 +249,7 @@ void *erts_alloc(ErtsAlcType_t type, Uint size)
     void *res;
     ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
     res = (*erts_allctrs[ERTS_ALC_T2A(type)].alloc)(
-            ERTS_ALC_T2N(type),
+            type,
             erts_allctrs[ERTS_ALC_T2A(type)].extra,
             size);
     if (!res)
@@ -243,7 +264,7 @@ void *erts_realloc(ErtsAlcType_t type, void *ptr, Uint size)
     void *res;
     ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
     res = (*erts_allctrs[ERTS_ALC_T2A(type)].realloc)(
-	ERTS_ALC_T2N(type),
+	type,
 	erts_allctrs[ERTS_ALC_T2A(type)].extra,
 	ptr,
 	size);
@@ -258,7 +279,7 @@ void erts_free(ErtsAlcType_t type, void *ptr)
 {
     ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
     (*erts_allctrs[ERTS_ALC_T2A(type)].free)(
-	ERTS_ALC_T2N(type),
+	type,
 	erts_allctrs[ERTS_ALC_T2A(type)].extra,
 	ptr);
     ERTS_MSACC_POP_STATE_X();
@@ -271,7 +292,7 @@ void *erts_alloc_fnf(ErtsAlcType_t type, Uint size)
     void *res;
     ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
     res = (*erts_allctrs[ERTS_ALC_T2A(type)].alloc)(
-	ERTS_ALC_T2N(type),
+	type,
 	erts_allctrs[ERTS_ALC_T2A(type)].extra,
 	size);
     ERTS_MSACC_POP_STATE_X();
@@ -285,7 +306,7 @@ void *erts_realloc_fnf(ErtsAlcType_t type, void *ptr, Uint size)
     void *res;
     ERTS_MSACC_PUSH_AND_SET_STATE_X(ERTS_MSACC_STATE_ALLOC);
     res = (*erts_allctrs[ERTS_ALC_T2A(type)].realloc)(
-	ERTS_ALC_T2N(type),
+	type,
 	erts_allctrs[ERTS_ALC_T2A(type)].extra,
 	ptr,
 	size);
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 4f03a34390..08dcc5ea9a 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -30,10 +30,10 @@
 #   name space).
 # * Types, allocators, classes, and descriptions have different name
 #   spaces.
-# * The type, allocator, and class names INVALID are reserved and can
-#   not be used.
+# * The type, allocator, and class names INVALID are reserved and
+#   cannot be used.
 # * The descriptions invalid_allocator, invalid_class, and invalid_type
-#   are reserved and can not be used.
+#   are reserved and cannot be used.
 # * Declarations can be done conditionally by use of a
 #     +if <boolean_variable>
 #
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index 0be4562785..d238d38d27 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -42,6 +42,7 @@
 
 #include "global.h"
 #include "big.h"
+#include "erl_mmap.h"
 #include "erl_mtrace.h"
 #define GET_ERL_ALLOC_UTIL_IMPL
 #include "erl_alloc_util.h"
@@ -90,15 +91,18 @@ static int initialized = 0;
 #define SYS_ALLOC_CARRIER_FLOOR(X)	((X) & SYS_ALLOC_CARRIER_MASK)
 #define SYS_ALLOC_CARRIER_CEILING(X) \
   SYS_ALLOC_CARRIER_FLOOR((X) + INV_SYS_ALLOC_CARRIER_MASK)
+#define SYS_PAGE_SIZE                   (sys_page_size)
+#define SYS_PAGE_SZ_MASK                ((UWord)(SYS_PAGE_SIZE - 1))
 
 #if 0
 /* Can be useful for debugging */
 #define MBC_REALLOC_ALWAYS_MOVES
 #endif
 
-
 /* alloc_util global parameters */
 static Uint sys_alloc_carrier_size;
+static Uint sys_page_size;
+
 #if HAVE_ERTS_MSEG
 static Uint max_mseg_carriers;
 #endif
@@ -113,32 +117,38 @@ static int allow_sys_alloc_carriers;
 
 #define DEC_CC(CC) ((CC)--)
 
-/* Multi block carrier (MBC) memory layout in R16: 
+/* Multi block carrier (MBC) memory layout in OTP 22:
 
 Empty MBC:
-[Carrier_t|pad|Block_t L0T|fhdr| free... ]
+[Carrier_t|pad|Block_t L0T0|fhdr| free... ]
 
 MBC after allocating first block:
-[Carrier_t|pad|Block_t 000|        udata        |pad|Block_t L0T|fhdr| free... ]
+[Carrier_t|pad|Block_t 0000|        udata        |pad|Block_t L0T0|fhdr| free... ]
 
 MBC after allocating second block:
-[Carrier_t|pad|Block_t 000|        udata        |pad|Block_t 000|   udata   |pad|Block_t L0T|fhdr| free... ]
+[Carrier_t|pad|Block_t 0000|        udata        |pad|Block_t 0000|   udata   |pad|Block_t L0T0|fhdr| free... ]
 
 MBC after deallocating first block:
-[Carrier_t|pad|Block_t 00T|fhdr| free  |FreeBlkFtr_t|Block_t 0P0|   udata   |pad|Block_t L0T|fhdr| free... ]
+[Carrier_t|pad|Block_t 00T0|fhdr| free  |FreeBlkFtr_t|Block_t 0P00|   udata   |pad|Block_t L0T0|fhdr| free... ]
 
+MBC after allocating first block, with allocation tagging enabled:
+[Carrier_t|pad|Block_t 000A|        udata        |atag|pad|Block_t L0T0|fhdr| free... ]
 
     udata = Allocated user data
+    atag  = A tag with basic metadata about this allocation
     pad   = Padding to ensure correct alignment for user data
     fhdr  = Allocator specific header to keep track of free block
     free  = Unused free memory
     T     = This block is free (THIS_FREE_BLK_HDR_FLG)
     P     = Previous block is free (PREV_FREE_BLK_HDR_FLG)
     L     = Last block in carrier (LAST_BLK_HDR_FLG)
+    A     = Block has an allocation tag footer, only valid for allocated blocks
+            (ATAG_BLK_HDR_FLG)
 */
 
 /* Single block carrier (SBC):
-[Carrier_t|pad|Block_t 111| udata... ]
+[Carrier_t|pad|Block_t 1110| udata... ]
+[Carrier_t|pad|Block_t 111A| udata | atag]
 */
 
 /* Allocation tags ...
@@ -154,20 +164,20 @@ MBC after deallocating first block:
 
 typedef UWord alcu_atag_t;
 
-#define MAKE_ATAG(IdAtom, Type) \
-    (ASSERT((Type) >= ERTS_ALC_N_MIN && (Type) <= ERTS_ALC_N_MAX), \
+#define MAKE_ATAG(IdAtom, TypeNum) \
+    (ASSERT((TypeNum) >= ERTS_ALC_N_MIN && (TypeNum) <= ERTS_ALC_N_MAX), \
      ASSERT(atom_val(IdAtom) <= MAX_ATAG_ATOM_ID), \
-     (atom_val(IdAtom) << ERTS_ALC_N_BITS) | (Type))
+     (atom_val(IdAtom) << ERTS_ALC_N_BITS) | (TypeNum))
 
 #define ATAG_ID(AT) (make_atom((AT) >> ERTS_ALC_N_BITS))
 #define ATAG_TYPE(AT) ((AT) & ERTS_ALC_N_MASK)
 
 #define MAX_ATAG_ATOM_ID (ERTS_UWORD_MAX >> ERTS_ALC_N_BITS)
 
-#define DBG_IS_VALID_ATAG(Allocator, AT) \
+#define DBG_IS_VALID_ATAG(AT) \
     (ATAG_TYPE(AT) >= ERTS_ALC_N_MIN && \
      ATAG_TYPE(AT) <= ERTS_ALC_N_MAX && \
-     (Allocator)->alloc_no == ERTS_ALC_T2A(ERTS_ALC_N2T(ATAG_TYPE(AT))))
+     ATAG_ID(AT) <= MAX_ATAG_ATOM_ID)
 
 /* Blocks ... */
 
@@ -182,10 +192,15 @@ typedef UWord alcu_atag_t;
 #endif
 #define FBLK_FTR_SZ (sizeof(FreeBlkFtr_t))
 
+#define BLK_HAS_ATAG(B) \
+    (!!((B)->bhdr & ATAG_BLK_HDR_FLG))
+
 #define GET_BLK_ATAG(B) \
-    (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1])
+    (ASSERT(BLK_HAS_ATAG(B)), \
+     ((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1])
 #define SET_BLK_ATAG(B, T) \
-    (((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1] = (T))
+    ((B)->bhdr |= ATAG_BLK_HDR_FLG, \
+     ((alcu_atag_t *) (((char *) (B)) + (BLK_SZ(B))))[-1] = (T))
 
 #define BLK_ATAG_SZ(AP) ((AP)->atags ? sizeof(alcu_atag_t) : 0)
 
@@ -203,13 +218,13 @@ typedef UWord alcu_atag_t;
   (((FreeBlkFtr_t *) (((char *) (B)) + (SZ)))[-1] = (SZ))
 
 #define SET_MBC_ABLK_SZ(B, SZ) \
-  (ASSERT(((SZ) & FLG_MASK) == 0), \
+  (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \
    (B)->bhdr = (((B)->bhdr) & ~MBC_ABLK_SZ_MASK) | (SZ))
 #define SET_MBC_FBLK_SZ(B, SZ) \
-  (ASSERT(((SZ) & FLG_MASK) == 0), \
+  (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \
    (B)->bhdr = (((B)->bhdr) & ~MBC_FBLK_SZ_MASK) | (SZ))
 #define SET_SBC_BLK_SZ(B, SZ) \
-  (ASSERT(((SZ) & FLG_MASK) == 0), \
+  (ASSERT(((SZ) & BLK_FLG_MASK) == 0), \
    (B)->bhdr = (((B)->bhdr) & ~SBC_BLK_SZ_MASK) | (SZ))
 #define SET_PREV_BLK_FREE(AP,B) \
   (ASSERT(!IS_MBC_FIRST_BLK(AP,B)), \
@@ -235,12 +250,12 @@ typedef UWord alcu_atag_t;
 
 #  define SET_MBC_ABLK_HDR(B, Sz, F, C) \
     (ASSERT(((Sz) & ~MBC_ABLK_SZ_MASK) == 0), \
-     ASSERT(!((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \
+     ASSERT(!((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \
      (B)->bhdr = ((Sz) | (F) | (BLK_CARRIER_OFFSET(B,C) << MBC_ABLK_OFFSET_SHIFT)))
 
 #  define SET_MBC_FBLK_HDR(B, Sz, F, C) \
     (ASSERT(((Sz) & ~MBC_FBLK_SZ_MASK) == 0), \
-     ASSERT(((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
+     ASSERT(((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
      (B)->bhdr = ((Sz) | (F)), \
      (B)->u.carrier = (C))
 
@@ -257,8 +272,8 @@ typedef UWord alcu_atag_t;
 #  define SET_BLK_FREE(B) \
   (ASSERT(!IS_PREV_BLK_FREE(B)), \
    (B)->u.carrier = ABLK_TO_MBC(B), \
-   (B)->bhdr |= THIS_FREE_BLK_HDR_FLG, \
-   (B)->bhdr &= (MBC_ABLK_SZ_MASK|FLG_MASK))
+   (B)->bhdr &= (MBC_ABLK_SZ_MASK|LAST_BLK_HDR_FLG), \
+   (B)->bhdr |= THIS_FREE_BLK_HDR_FLG)
 
 #  define SET_BLK_ALLOCED(B) \
   (ASSERT(((B)->bhdr & (MBC_ABLK_OFFSET_MASK|THIS_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
@@ -270,15 +285,16 @@ typedef UWord alcu_atag_t;
 #  define MBC_SZ_MAX_LIMIT ((UWord)~0)
 
 #  define SET_MBC_ABLK_HDR(B, Sz, F, C) \
-    (ASSERT(((Sz) & FLG_MASK) == 0), \
-     ASSERT(!((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \
-     ASSERT((UWord)(F) < SBC_BLK_HDR_FLG), \
+    (ASSERT(((Sz) & BLK_FLG_MASK) == 0), \
+     ASSERT(((F) & ~BLK_FLG_MASK) == 0), \
+     ASSERT(!((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG))), \
      (B)->bhdr = ((Sz) | (F)), \
      (B)->carrier = (C))
 
 #  define SET_MBC_FBLK_HDR(B, Sz, F, C) \
-    (ASSERT(((Sz) & FLG_MASK) == 0), \
-     ASSERT(((UWord)(F) & (~FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
+    (ASSERT(((Sz) & BLK_FLG_MASK) == 0), \
+     ASSERT(((F) & ~BLK_FLG_MASK) == 0), \
+     ASSERT(((UWord)(F) & (~BLK_FLG_MASK|THIS_FREE_BLK_HDR_FLG|PREV_FREE_BLK_HDR_FLG)) == THIS_FREE_BLK_HDR_FLG), \
      (B)->bhdr = ((Sz) | (F)), \
      (B)->carrier = (C))
 
@@ -297,7 +313,7 @@ typedef UWord alcu_atag_t;
 #endif /* !MBC_ABLK_OFFSET_BITS */
 
 #define SET_SBC_BLK_HDR(B, Sz) \
-  (ASSERT(((Sz) & FLG_MASK) == 0), (B)->bhdr = ((Sz) | (SBC_BLK_HDR_FLG)))
+  (ASSERT(((Sz) & BLK_FLG_MASK) == 0), (B)->bhdr = ((Sz) | (SBC_BLK_HDR_FLG)))
 
 
 #define BLK_UMEM_SZ(B) \
@@ -320,7 +336,7 @@ typedef UWord alcu_atag_t;
 #define GET_PREV_FREE_BLK_HDR_FLG(B) \
   ((B)->bhdr & PREV_FREE_BLK_HDR_FLG)
 #define GET_BLK_HDR_FLGS(B) \
-  ((B)->bhdr & FLG_MASK)
+  ((B)->bhdr & BLK_FLG_MASK)
 
 #define NXT_BLK(B) \
   (ASSERT(IS_MBC_BLK(B)), \
@@ -419,7 +435,7 @@ do {										\
 #define SCH_SBC				SBC_CARRIER_HDR_FLAG
 
 #define SET_CARRIER_HDR(C, Sz, F, AP) \
-  (ASSERT(((Sz) & FLG_MASK) == 0), (C)->chdr = ((Sz) | (F)), \
+  (ASSERT(((Sz) & CRR_FLG_MASK) == 0), (C)->chdr = ((Sz) | (F)), \
    erts_atomic_init_nob(&(C)->allctr, (erts_aint_t) (AP)))
 
 #define BLK_TO_SBC(B) \
@@ -444,8 +460,8 @@ do {										\
   (!IS_SB_CARRIER((C)))
 
 #define SET_CARRIER_SZ(C, SZ) \
-  (ASSERT(((SZ) & FLG_MASK) == 0), \
-   ((C)->chdr = ((C)->chdr & FLG_MASK) | (SZ)))
+  (ASSERT(((SZ) & CRR_FLG_MASK) == 0), \
+   ((C)->chdr = ((C)->chdr & CRR_FLG_MASK) | (SZ)))
 
 #define CFLG_SBC				(1 << 0)
 #define CFLG_MBC				(1 << 1)
@@ -575,10 +591,12 @@ do {									\
 	STAT_MSEG_MBC_ALLOC((AP), csz__);				\
     else								\
 	STAT_SYS_ALLOC_MBC_ALLOC((AP), csz__);				\
-    (AP)->mbcs.blocks.curr.no += (CRR)->cpool.blocks;			\
+    set_new_allctr_abandon_limit(AP);                                   \
+    (AP)->mbcs.blocks.curr.no += (CRR)->cpool.blocks[(AP)->alloc_no];   \
     if ((AP)->mbcs.blocks.max.no < (AP)->mbcs.blocks.curr.no)		\
 	(AP)->mbcs.blocks.max.no = (AP)->mbcs.blocks.curr.no;		\
-    (AP)->mbcs.blocks.curr.size += (CRR)->cpool.blocks_size;		\
+    (AP)->mbcs.blocks.curr.size +=                                      \
+       (CRR)->cpool.blocks_size[(AP)->alloc_no];                        \
     if ((AP)->mbcs.blocks.max.size < (AP)->mbcs.blocks.curr.size)	\
 	(AP)->mbcs.blocks.max.size = (AP)->mbcs.blocks.curr.size;	\
 } while (0)
@@ -601,25 +619,33 @@ do {									\
     DEBUG_CHECK_CARRIER_NO_SZ((AP));					\
 } while (0)
 
-#define STAT_MBC_ABANDON(AP, CRR)					\
-do {									\
-    UWord csz__ = CARRIER_SZ((CRR));					\
-    if (IS_MSEG_CARRIER((CRR)))						\
-	STAT_MSEG_MBC_FREE((AP), csz__);				\
-    else								\
-	STAT_SYS_ALLOC_MBC_FREE((AP), csz__);				\
-    ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.no			\
-			  >= (CRR)->cpool.blocks);			\
-    (AP)->mbcs.blocks.curr.no -= (CRR)->cpool.blocks;			\
-    ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.size 			\
-			  >= (CRR)->cpool.blocks_size);			\
-    (AP)->mbcs.blocks.curr.size -= (CRR)->cpool.blocks_size;		\
+#define STAT_MBC_FREE(AP, CRR)                                               \
+do {                                                                         \
+    UWord csz__ = CARRIER_SZ((CRR));                                         \
+    if (IS_MSEG_CARRIER((CRR))) {                                            \
+        STAT_MSEG_MBC_FREE((AP), csz__);                                     \
+    } else {                                                                 \
+        STAT_SYS_ALLOC_MBC_FREE((AP), csz__);                                \
+    }                                                                        \
+    set_new_allctr_abandon_limit(AP);                                        \
 } while (0)
 
-#define STAT_MBC_BLK_ALLOC_CRR(CRR, BSZ)				\
+#define STAT_MBC_ABANDON(AP, CRR)                                            \
+do {                                                                         \
+    STAT_MBC_FREE(AP, CRR);                                                  \
+    ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.no                          \
+                          >= (CRR)->cpool.blocks[(AP)->alloc_no]);           \
+    (AP)->mbcs.blocks.curr.no -= (CRR)->cpool.blocks[(AP)->alloc_no];        \
+    ERTS_ALC_CPOOL_ASSERT((AP)->mbcs.blocks.curr.size                        \
+                          >= (CRR)->cpool.blocks_size[(AP)->alloc_no]);      \
+    (AP)->mbcs.blocks.curr.size -= (CRR)->cpool.blocks_size[(AP)->alloc_no]; \
+} while (0)
+
+#define STAT_MBC_BLK_ALLOC_CRR(AP, CRR, BSZ)				\
 do {									\
-    (CRR)->cpool.blocks++;						\
-    (CRR)->cpool.blocks_size += (BSZ);					\
+    (CRR)->cpool.blocks[(AP)->alloc_no]++;				\
+    (CRR)->cpool.blocks_size[(AP)->alloc_no] += (BSZ);			\
+    (CRR)->cpool.total_blocks_size += (BSZ);				\
 } while (0)
 
 #define STAT_MBC_BLK_ALLOC(AP, CRR, BSZ, FLGS)	       			\
@@ -631,50 +657,67 @@ do {									\
     cstats__->blocks.curr.size += (BSZ);				\
     if (cstats__->blocks.max.size < cstats__->blocks.curr.size)		\
 	cstats__->blocks.max.size = cstats__->blocks.curr.size;		\
-    STAT_MBC_BLK_ALLOC_CRR((CRR), (BSZ));				\
+    STAT_MBC_BLK_ALLOC_CRR((AP), (CRR), (BSZ));				\
 } while (0)
 
 static ERTS_INLINE int
 stat_cpool_mbc_blk_free(Allctr_t *allctr,
+                        ErtsAlcType_t type,
 			Carrier_t *crr,
 			Carrier_t **busy_pcrr_pp,
 			UWord blksz)
 {
+    Allctr_t *orig_allctr;
+    int alloc_no;
 
-    ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks > 0);
-    crr->cpool.blocks--;
-    ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size >= blksz);
-    crr->cpool.blocks_size -= blksz;
+    alloc_no = ERTS_ALC_T2A(type);
 
-    if (!busy_pcrr_pp || !*busy_pcrr_pp)
-	return 0;
+    ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks[alloc_no] > 0);
+    crr->cpool.blocks[alloc_no]--;
+    ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size[alloc_no] >= blksz);
+    crr->cpool.blocks_size[alloc_no] -= blksz;
+    ERTS_ALC_CPOOL_ASSERT(crr->cpool.total_blocks_size >= blksz);
+    crr->cpool.total_blocks_size -= blksz;
+
+    if (allctr->alloc_no == alloc_no && (!busy_pcrr_pp || !*busy_pcrr_pp)) {
+        /* This is a local block, so we should not update the pool
+         * statistics. */
+        return 0;
+    }
 
-    ERTS_ALC_CPOOL_ASSERT(crr == *busy_pcrr_pp);
+    /* This is either a foreign block that's been fetched from the pool, or any
+     * block that's in the pool. The carrier's owner keeps the statistics for
+     * both pooled and foreign blocks. */
+
+    orig_allctr = crr->cpool.orig_allctr;
+
+    ERTS_ALC_CPOOL_ASSERT(alloc_no != allctr->alloc_no ||
+        (crr == *busy_pcrr_pp && allctr == orig_allctr));
 
 #ifdef ERTS_ALC_CPOOL_DEBUG
     ERTS_ALC_CPOOL_ASSERT(
-	erts_atomic_dec_read_nob(&allctr->cpool.stat.no_blocks) >= 0);
+	erts_atomic_dec_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]) >= 0);
     ERTS_ALC_CPOOL_ASSERT(
-	erts_atomic_add_read_nob(&allctr->cpool.stat.blocks_size,
+	erts_atomic_add_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no],
 				 -((erts_aint_t) blksz)) >= 0);
 #else
-    erts_atomic_dec_nob(&allctr->cpool.stat.no_blocks);
-    erts_atomic_add_nob(&allctr->cpool.stat.blocks_size,
+    erts_atomic_dec_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]);
+    erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no],
 			-((erts_aint_t) blksz));
 #endif
 
     return 1;
 }
 
-#define STAT_MBC_BLK_FREE(AP, CRR, BPCRRPP, BSZ, FLGS)			\
-do {									\
-    if (!stat_cpool_mbc_blk_free((AP), (CRR), (BPCRRPP), (BSZ))) {	\
-	CarriersStats_t *cstats__ = &(AP)->mbcs;			\
-	ASSERT(cstats__->blocks.curr.no > 0);				\
-	cstats__->blocks.curr.no--;					\
-	ASSERT(cstats__->blocks.curr.size >= (BSZ));			\
-	cstats__->blocks.curr.size -= (BSZ);				\
-    }									\
+#define STAT_MBC_BLK_FREE(AP, TYPE, CRR, BPCRRPP, BSZ, FLGS)               \
+do {                                                                       \
+    if (!stat_cpool_mbc_blk_free((AP), (TYPE), (CRR), (BPCRRPP), (BSZ))) { \
+        CarriersStats_t *cstats__ = &(AP)->mbcs;                           \
+        ASSERT(cstats__->blocks.curr.no > 0);                              \
+        cstats__->blocks.curr.no--;                                        \
+        ASSERT(cstats__->blocks.curr.size >= (BSZ));                       \
+        cstats__->blocks.curr.size -= (BSZ);                               \
+    }                                                                      \
 } while (0)
 
 /* Debug stuff... */
@@ -721,8 +764,8 @@ static void make_name_atoms(Allctr_t *allctr);
 
 static Block_t *create_carrier(Allctr_t *, Uint, UWord);
 static void destroy_carrier(Allctr_t *, Block_t *, Carrier_t **);
-static void mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp);
-static void dealloc_block(Allctr_t *, void *, ErtsAlcFixList_t *, int);
+static void mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp);
+static void dealloc_block(Allctr_t *, ErtsAlcType_t, Uint32, void *, ErtsAlcFixList_t *);
 
 static alcu_atag_t determine_alloc_tag(Allctr_t *allocator, ErtsAlcType_t type)
 {
@@ -764,14 +807,14 @@ static alcu_atag_t determine_alloc_tag(Allctr_t *allocator, ErtsAlcType_t type)
         }
     }
 
-    return MAKE_ATAG(id, type);
+    return MAKE_ATAG(id, ERTS_ALC_T2N(type));
 }
 
 static void set_alloc_tag(Allctr_t *allocator, void *p, alcu_atag_t tag)
 {
     Block_t *block;
 
-    ASSERT(DBG_IS_VALID_ATAG(allocator, tag));
+    ASSERT(DBG_IS_VALID_ATAG(tag));
     ASSERT(allocator->atags && p);
     (void)allocator;
 
@@ -1312,28 +1355,9 @@ chk_fix_list(Allctr_t *allctr, ErtsAlcFixList_t *fix, int ix, int before)
 #define ERTS_DBG_CHK_FIX_LIST(A, FIX, IX, B)
 #endif
 
+static ERTS_INLINE Allctr_t *get_pref_allctr(void *extra);
 static void *mbc_alloc(Allctr_t *allctr, Uint size);
 
-typedef struct {
-    ErtsAllctrDDBlock_t ddblock__; /* must be first */
-    ErtsAlcType_t fix_type;
-} ErtsAllctrFixDDBlock_t;
-
-#define ERTS_ALC_FIX_NO_UNUSE (((ErtsAlcType_t) 1) << ERTS_ALC_N_BITS)
-
-static ERTS_INLINE void
-dealloc_fix_block(Allctr_t *allctr,
-		  ErtsAlcType_t type,
-		  void *ptr,
-		  ErtsAlcFixList_t *fix,
-		  int dec_cc_on_redirect)
-{
-    /* May be redirected... */
-    ASSERT((type & ERTS_ALC_FIX_NO_UNUSE) == 0);
-    ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type = type | ERTS_ALC_FIX_NO_UNUSE;
-    dealloc_block(allctr, ptr, fix, dec_cc_on_redirect);
-}
-
 static ERTS_INLINE void
 sched_fix_shrink(Allctr_t *allctr, int on)
 {
@@ -1375,7 +1399,7 @@ fix_cpool_check_shrink(Allctr_t *allctr,
 	    if (fix->u.cpool.min_list_size > fix->list_size)
 		fix->u.cpool.min_list_size = fix->list_size;
 
-	    dealloc_fix_block(allctr, type, p, fix, 0);
+	    dealloc_block(allctr, type, DEALLOC_FLG_FIX_SHRINK, p, fix);
 	}
     }
 }
@@ -1386,11 +1410,9 @@ fix_cpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size)
     void *res;
     ErtsAlcFixList_t *fix;
 
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type
-	   && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-
-    fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE];
-    ASSERT(size == fix->type_size);
+    fix = &allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)];
+    ASSERT(type == fix->type && size == fix->type_size);
+    ASSERT(size >= sizeof(ErtsAllctrDDBlock_t));
 
     res = fix->list;
     if (res) {
@@ -1419,21 +1441,39 @@ fix_cpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size)
 static ERTS_INLINE void
 fix_cpool_free(Allctr_t *allctr,
 	       ErtsAlcType_t type,
+               Uint32 flags,
 	       void *p,
-	       Carrier_t **busy_pcrr_pp,
-	       int unuse)
+	       Carrier_t **busy_pcrr_pp)
 {
     ErtsAlcFixList_t *fix;
+    Allctr_t *fix_allctr;
 
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type
-	   && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
+    /* If this isn't a fix allocator we need to update the fix list of our
+     * neighboring fix_alloc to keep the statistics consistent. */
+    if (!allctr->fix) {
+        ErtsAllocatorThrSpec_t *tspec = &erts_allctr_thr_spec[ERTS_ALC_A_FIXED_SIZE];
+        fix_allctr = get_pref_allctr(tspec);
+        ASSERT(!fix_allctr->thread_safe);
+        ASSERT(allctr != fix_allctr);
+    }
+    else {
+        fix_allctr = allctr;
+    }
 
-    fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE];
+    ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(fix_allctr));
+    ASSERT(ERTS_ALC_IS_CPOOL_ENABLED(allctr));
 
-    if (unuse)
-	fix->u.cpool.used--;
+    fix = &fix_allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)];
+    ASSERT(type == fix->type);
+
+    if (!(flags & DEALLOC_FLG_FIX_SHRINK)) {
+        fix->u.cpool.used--;
+    }
 
-    if ((!busy_pcrr_pp || !*busy_pcrr_pp)
+    /* We don't want foreign blocks to be long-lived, so we skip recycling if
+     * allctr != fix_allctr. */
+    if (allctr == fix_allctr
+        && (!busy_pcrr_pp || !*busy_pcrr_pp)
 	&& !fix->u.cpool.shrink_list
 	&& fix->list_size < ERTS_ALCU_FIX_MAX_LIST_SZ) {
 	*((void **) p) = fix->list;
@@ -1446,7 +1486,7 @@ fix_cpool_free(Allctr_t *allctr,
 	if (IS_SBC_BLK(blk))
 	    destroy_carrier(allctr, blk, NULL);
 	else
-	    mbc_free(allctr, p, busy_pcrr_pp);
+	    mbc_free(allctr, type, p, busy_pcrr_pp);
 	fix->u.cpool.allocated--;
 	fix_cpool_check_shrink(allctr, type, fix, busy_pcrr_pp);
     }
@@ -1473,7 +1513,7 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs)
 	    fix->u.cpool.shrink_list = fix->u.cpool.min_list_size;
 	    fix->u.cpool.min_list_size = fix->list_size;
 	}
-	type = (ErtsAlcType_t) (ix + ERTS_ALC_N_MIN_A_FIXED_SIZE);
+	type = ERTS_ALC_N2T((ErtsAlcType_t) (ix + ERTS_ALC_N_MIN_A_FIXED_SIZE));
 	for (o = 0; o < ERTS_ALC_FIX_MAX_SHRINK_OPS || flush; o++) {
 	    void *ptr;
 
@@ -1487,7 +1527,7 @@ fix_cpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs)
 	    fix->list = *((void **) ptr);
 	    fix->list_size--;
 	    fix->u.cpool.shrink_list--;
-	    dealloc_fix_block(allctr, type, ptr, fix, 0);
+	    dealloc_block(allctr, type, DEALLOC_FLG_FIX_SHRINK, ptr, fix);
 	}
 	if (fix->u.cpool.min_list_size > fix->list_size)
 	    fix->u.cpool.min_list_size = fix->list_size;
@@ -1513,11 +1553,9 @@ fix_nocpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size)
     ErtsAlcFixList_t *fix;
     void *res;
 
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type
-	   && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-
-    fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE];
-    ASSERT(size == fix->type_size);
+    fix = &allctr->fix[ERTS_ALC_FIX_TYPE_IX(type)];
+    ASSERT(type == fix->type && size == fix->type_size);
+    ASSERT(size >= sizeof(ErtsAllctrDDBlock_t));
 
     ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
     fix->u.nocpool.used++;
@@ -1534,7 +1572,7 @@ fix_nocpool_alloc(Allctr_t *allctr, ErtsAlcType_t type, Uint size)
 	    if (IS_SBC_BLK(blk))
 		destroy_carrier(allctr, blk, NULL);
 	    else
-		mbc_free(allctr, p, NULL);
+		mbc_free(allctr, type, p, NULL);
 	    fix->u.nocpool.allocated--;
 	}
 	ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
@@ -1569,10 +1607,8 @@ fix_nocpool_free(Allctr_t *allctr,
     Block_t *blk;
     ErtsAlcFixList_t *fix;
 
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE <= type
-	   && type <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
-
-    fix = &allctr->fix[type - ERTS_ALC_N_MIN_A_FIXED_SIZE];
+    fix = &allctr->fix[ERTS_ALC_T2N(type) - ERTS_ALC_N_MIN_A_FIXED_SIZE];
+    ASSERT(fix->type == type);
 
     ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 1);
     fix->u.nocpool.used--;
@@ -1591,7 +1627,7 @@ fix_nocpool_free(Allctr_t *allctr,
 	if (IS_SBC_BLK(blk))
 	    destroy_carrier(allctr, blk, NULL);
 	else
-	    mbc_free(allctr, p, NULL);
+	    mbc_free(allctr, type, p, NULL);
 	p = fix->list;
 	fix->list = *((void **) p);
 	fix->list_size--;
@@ -1602,7 +1638,7 @@ fix_nocpool_free(Allctr_t *allctr,
     if (IS_SBC_BLK(blk))
 	destroy_carrier(allctr, blk, NULL);
     else
-	mbc_free(allctr, p, NULL);
+	mbc_free(allctr, type, p, NULL);
     ERTS_DBG_CHK_FIX_LIST(allctr, fix, ix, 0);
 }
 
@@ -1643,7 +1679,7 @@ fix_nocpool_alloc_shrink(Allctr_t *allctr, erts_aint32_t flgs)
 	    ptr = fix->list;
 	    fix->list = *((void **) ptr);
 	    fix->list_size--;
-	    dealloc_block(allctr, ptr, NULL, 0);
+	    dealloc_block(allctr, fix->type, 0, ptr, NULL);
 	    fix->u.nocpool.allocated--;
 	}
 	if (fix->list_size != 0) {
@@ -1685,6 +1721,7 @@ dealloc_mbc(Allctr_t *allctr, Carrier_t *crr)
 }
 
 
+static UWord allctr_abandon_limit(Allctr_t *allctr);
 static void set_new_allctr_abandon_limit(Allctr_t*);
 static void abandon_carrier(Allctr_t*, Carrier_t*);
 static void poolify_my_carrier(Allctr_t*, Carrier_t*);
@@ -1806,7 +1843,7 @@ get_used_allctr(Allctr_t *pref_allctr, int pref_lock, void *p, UWord *sizep,
 static void
 init_dd_queue(ErtsAllctrDDQueue_t *ddq)
 {
-    erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL);
+    erts_atomic_init_nob(&ddq->tail.data.marker.u.atmc_next, ERTS_AINT_NULL);
     erts_atomic_init_nob(&ddq->tail.data.last,
 			 (erts_aint_t) &ddq->tail.data.marker);
     erts_atomic_init_nob(&ddq->tail.data.um_refc[0], 0);
@@ -1827,17 +1864,17 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
     erts_aint_t itmp;
     ErtsAllctrDDBlock_t *enq, *this = ptr;
 
-    erts_atomic_init_nob(&this->atmc_next, ERTS_AINT_NULL);
+    erts_atomic_init_nob(&this->u.atmc_next, ERTS_AINT_NULL);
     /* Enqueue at end of list... */
 
     enq = (ErtsAllctrDDBlock_t *) erts_atomic_read_nob(&ddq->tail.data.last);
-    itmp = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+    itmp = erts_atomic_cmpxchg_relb(&enq->u.atmc_next,
 				    (erts_aint_t) this,
 				    ERTS_AINT_NULL);
     if (itmp == ERTS_AINT_NULL) {
 	/* We are required to move last pointer */
 #ifdef DEBUG
-	ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->atmc_next));
+	ASSERT(ERTS_AINT_NULL == erts_atomic_read_nob(&this->u.atmc_next));
 	ASSERT(((erts_aint_t) enq)
 	       == erts_atomic_xchg_relb(&ddq->tail.data.last,
 					(erts_aint_t) this));
@@ -1855,8 +1892,8 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
 
 	while (1) {
 	    erts_aint_t itmp2;
-	    erts_atomic_set_nob(&this->atmc_next, itmp);
-	    itmp2 = erts_atomic_cmpxchg_relb(&enq->atmc_next,
+	    erts_atomic_set_nob(&this->u.atmc_next, itmp);
+	    itmp2 = erts_atomic_cmpxchg_relb(&enq->u.atmc_next,
 					     (erts_aint_t) this,
 					     itmp);
 	    if (itmp == itmp2)
@@ -1865,7 +1902,7 @@ ddq_managed_thread_enqueue(ErtsAllctrDDQueue_t *ddq, void *ptr, int cinit)
 		itmp = itmp2;
 	    else {
 		enq = (ErtsAllctrDDBlock_t *) itmp2;
-		itmp = erts_atomic_read_acqb(&enq->atmc_next);
+		itmp = erts_atomic_read_acqb(&enq->u.atmc_next);
 		ASSERT(itmp != ERTS_AINT_NULL);
 	    }
 	    i++;
@@ -1881,8 +1918,8 @@ check_insert_marker(ErtsAllctrDDQueue_t *ddq, erts_aint_t ilast)
 	erts_aint_t itmp;
 	ErtsAllctrDDBlock_t *last = (ErtsAllctrDDBlock_t *) ilast;
 
-	erts_atomic_init_nob(&ddq->tail.data.marker.atmc_next, ERTS_AINT_NULL);
-	itmp = erts_atomic_cmpxchg_relb(&last->atmc_next,
+	erts_atomic_init_nob(&ddq->tail.data.marker.u.atmc_next, ERTS_AINT_NULL);
+	itmp = erts_atomic_cmpxchg_relb(&last->u.atmc_next,
 					(erts_aint_t) &ddq->tail.data.marker,
 					ERTS_AINT_NULL);
 	if (itmp == ERTS_AINT_NULL) {
@@ -1933,7 +1970,7 @@ ddq_dequeue(ErtsAllctrDDQueue_t *ddq)
 	ASSERT(ddq->head.used_marker);
 	ddq->head.used_marker = 0;
 	blk = ((ErtsAllctrDDBlock_t *)
-	       erts_atomic_read_nob(&blk->atmc_next));
+	       erts_atomic_read_nob(&blk->u.atmc_next));
 	if (blk == ddq->head.unref_end) {
 	    ddq->head.first = blk;
 	    return NULL;
@@ -1941,7 +1978,7 @@ ddq_dequeue(ErtsAllctrDDQueue_t *ddq)
     }
 
     ddq->head.first = ((ErtsAllctrDDBlock_t *)
-		       erts_atomic_read_nob(&blk->atmc_next));
+		       erts_atomic_read_nob(&blk->u.atmc_next));
 
     ASSERT(ddq->head.first);
 
@@ -2003,19 +2040,13 @@ check_pending_dealloc_carrier(Allctr_t *allctr,
 			      int *need_more_work);
 
 static void
-handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr)
+handle_delayed_fix_dealloc(Allctr_t *allctr, ErtsAlcType_t type, Uint32 flags,
+                           void *ptr)
 {
-    ErtsAlcType_t type;
-
-    type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type;
-
-    ASSERT(ERTS_ALC_N_MIN_A_FIXED_SIZE
-	   <= (type & ~ERTS_ALC_FIX_NO_UNUSE));
-    ASSERT((type & ~ERTS_ALC_FIX_NO_UNUSE)
-	   <= ERTS_ALC_N_MAX_A_FIXED_SIZE);
+    ASSERT(ERTS_ALC_IS_FIX_TYPE(type));
 
     if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr))
-	fix_nocpool_free(allctr, (type & ~ERTS_ALC_FIX_NO_UNUSE), ptr);
+	fix_nocpool_free(allctr, type, ptr);
     else {
 	Block_t *blk = UMEM2BLK(ptr);
 	Carrier_t *busy_pcrr_p;
@@ -2030,20 +2061,24 @@ handle_delayed_fix_dealloc(Allctr_t *allctr, void *ptr)
 				      NULL, &busy_pcrr_p);
 	if (used_allctr == allctr) {
 	doit:
-	    fix_cpool_free(allctr, (type & ~ERTS_ALC_FIX_NO_UNUSE),
-			   ptr, &busy_pcrr_p,
-			   !(type & ERTS_ALC_FIX_NO_UNUSE));
+	    fix_cpool_free(allctr, type, flags, ptr, &busy_pcrr_p);
 	    clear_busy_pool_carrier(allctr, busy_pcrr_p);
 	}
 	else {
 	    /* Carrier migrated; need to redirect block to new owner... */
-	    int cinit = used_allctr->dd.ix - allctr->dd.ix;
+            ErtsAllctrDDBlock_t *dd_block;
+            int cinit;
+
+            dd_block = (ErtsAllctrDDBlock_t*)ptr;
+            dd_block->flags = flags;
+            dd_block->type = type;
 
             ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p);
 
             DEC_CC(allctr->calls.this_free);
 
-	    ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type = type;
+            cinit = used_allctr->dd.ix - allctr->dd.ix;
+
 	    if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit))
 		erts_alloc_notify_delayed_dealloc(used_allctr->ix);
 	}
@@ -2067,7 +2102,6 @@ handle_delayed_dealloc(Allctr_t *allctr,
     int need_mr_wrk = 0;
     int have_checked_incoming = 0;
     int ops = 0;
-    ErtsAlcFixList_t *fix;
     int res;
     ErtsAllctrDDQueue_t *ddq;
 
@@ -2076,8 +2110,6 @@ handle_delayed_dealloc(Allctr_t *allctr,
 
     ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
-    fix = allctr->fix;
-
     ddq = &allctr->dd.q;
 
     res = 0;
@@ -2166,16 +2198,27 @@ handle_delayed_dealloc(Allctr_t *allctr,
             }
 	}
 	else {
+            ErtsAllctrDDBlock_t *dd_block;
+            ErtsAlcType_t type;
+            Uint32 flags;
+        
+            dd_block = (ErtsAllctrDDBlock_t*)ptr;
+            flags = dd_block->flags;
+            type = dd_block->type;
+
+            flags |= DEALLOC_FLG_REDIRECTED;
+
             ASSERT(IS_SBC_BLK(blk) || (ABLK_TO_MBC(blk) !=
                                        ErtsContainerStruct(blk, Carrier_t,
                                                            cpool.homecoming_dd.blk)));
 
 	    INC_CC(allctr->calls.this_free);
 
-	    if (fix)
-		handle_delayed_fix_dealloc(allctr, ptr);
-	    else
-		dealloc_block(allctr, ptr, NULL, 1);
+	    if (ERTS_ALC_IS_FIX_TYPE(type)) {
+		handle_delayed_fix_dealloc(allctr, type, flags, ptr);
+	    } else {
+		dealloc_block(allctr, type, flags, ptr, NULL);
+            }
 	}
     }
 
@@ -2203,8 +2246,10 @@ enqueue_dealloc_other_instance(ErtsAlcType_t type,
 			       void *ptr,
 			       int cinit)
 {
-    if (allctr->fix)
-	((ErtsAllctrFixDDBlock_t*) ptr)->fix_type = type;
+    ErtsAllctrDDBlock_t *dd_block = ((ErtsAllctrDDBlock_t*)ptr);
+
+    dd_block->type = type;
+    dd_block->flags = 0;
 
     if (ddq_enqueue(&allctr->dd.q, ptr, cinit))
 	erts_alloc_notify_delayed_dealloc(allctr->ix);
@@ -2234,10 +2279,7 @@ check_abandon_carrier(Allctr_t *allctr, Block_t *fblk, Carrier_t **busy_pcrr_pp)
     if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr))
 	return;
 
-    allctr->cpool.check_limit_count--;
-    if (--allctr->cpool.check_limit_count <= 0)
-	set_new_allctr_abandon_limit(allctr);
-
+    ASSERT(allctr->cpool.abandon_limit == allctr_abandon_limit(allctr));
     ASSERT(erts_thr_progress_is_managed_thread());
 
     if (allctr->cpool.disable_abandon)
@@ -2255,7 +2297,7 @@ check_abandon_carrier(Allctr_t *allctr, Block_t *fblk, Carrier_t **busy_pcrr_pp)
     if (allctr->main_carrier == crr)
 	return;
 
-    if (crr->cpool.blocks_size > crr->cpool.abandon_limit)
+    if (crr->cpool.total_blocks_size > crr->cpool.abandon_limit)
 	return;
 
     if (crr->cpool.thr_prgr != ERTS_THR_PRGR_INVALID
@@ -2291,24 +2333,26 @@ erts_alcu_check_delayed_dealloc(Allctr_t *allctr,
 			   ERTS_ALCU_DD_OPS_LIM_LOW, NULL, NULL, NULL)
 
 static void
-dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_redirect)
+dealloc_block(Allctr_t *allctr, ErtsAlcType_t type, Uint32 flags, void *ptr,
+              ErtsAlcFixList_t *fix)
 {
     Block_t *blk = UMEM2BLK(ptr);
 
+    ASSERT(!fix || type == fix->type);
+
     ERTS_LC_ASSERT(!allctr->thread_safe
 		       || erts_lc_mtx_is_locked(&allctr->mutex));
 
     if (IS_SBC_BLK(blk)) {
 	destroy_carrier(allctr, blk, NULL);
 	if (fix && ERTS_ALC_IS_CPOOL_ENABLED(allctr)) {
-	    ErtsAlcType_t type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type;
-	    if (!(type & ERTS_ALC_FIX_NO_UNUSE))
+	    if (!(flags & DEALLOC_FLG_FIX_SHRINK))
 		fix->u.cpool.used--;
 	    fix->u.cpool.allocated--;
 	}
     }
     else if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr))
-	mbc_free(allctr, ptr, NULL);
+	mbc_free(allctr, type, ptr, NULL);
     else {
 	Carrier_t *busy_pcrr_p;
 	Allctr_t *used_allctr;
@@ -2317,22 +2361,29 @@ dealloc_block(Allctr_t *allctr, void *ptr, ErtsAlcFixList_t *fix, int dec_cc_on_
 				      NULL, &busy_pcrr_p);
 	if (used_allctr == allctr) {
 	    if (fix) {
-		ErtsAlcType_t type = ((ErtsAllctrFixDDBlock_t *) ptr)->fix_type;
-		if (!(type & ERTS_ALC_FIX_NO_UNUSE))
+	        if (!(flags & DEALLOC_FLG_FIX_SHRINK))
 		    fix->u.cpool.used--;
 		fix->u.cpool.allocated--;
 	    }
-	    mbc_free(allctr, ptr, &busy_pcrr_p);
+	    mbc_free(allctr, type, ptr, &busy_pcrr_p);
 	    clear_busy_pool_carrier(allctr, busy_pcrr_p);
 	}
 	else {
 	    /* Carrier migrated; need to redirect block to new owner... */
-	    int cinit = used_allctr->dd.ix - allctr->dd.ix;
+            ErtsAllctrDDBlock_t *dd_block;
+            int cinit;
+
+            dd_block = (ErtsAllctrDDBlock_t*)ptr;
+            dd_block->flags = flags;
+            dd_block->type = type;
 
             ERTS_ALC_CPOOL_ASSERT(!busy_pcrr_p);
 
-            if (dec_cc_on_redirect)
+            if (flags & DEALLOC_FLG_REDIRECTED)
                 DEC_CC(allctr->calls.this_free);
+
+            cinit = used_allctr->dd.ix - allctr->dd.ix;
+
 	    if (ddq_enqueue(&used_allctr->dd.q, ptr, cinit))
 		erts_alloc_notify_delayed_dealloc(used_allctr->ix);
 	}
@@ -2498,9 +2549,155 @@ mbc_alloc(Allctr_t *allctr, Uint size)
     return BLK2UMEM(blk);
 }
 
+typedef struct {
+    char *ptr;
+    UWord size;
+} ErtsMemDiscardRegion;
+
+/* Construct a discard region for the user memory of a free block, letting the
+ * OS reclaim its physical memory when required.
+ *
+ * Note that we're ignoring both the footer and everything that comes before
+ * the minimum block size as the allocator uses those areas to manage the
+ * block. */
+static void ERTS_INLINE
+mem_discard_start(Allctr_t *allocator, Block_t *block,
+                  ErtsMemDiscardRegion *out)
+{
+    UWord size = BLK_SZ(block);
+
+    ASSERT(size >= allocator->min_block_size);
+
+    if (size > (allocator->min_block_size + FBLK_FTR_SZ)) {
+        out->size = size - allocator->min_block_size - FBLK_FTR_SZ;
+    } else {
+        out->size = 0;
+    }
+
+    out->ptr = (char*)block + allocator->min_block_size;
+}
+
+/* Expands a discard region into a neighboring free block, allowing us to
+ * discard the block header and first page.
+ *
+ * This is very important in small-allocation scenarios where no single block
+ * is large enough to be discarded on its own. */
+static void ERTS_INLINE
+mem_discard_coalesce(Allctr_t *allocator, Block_t *neighbor,
+                     ErtsMemDiscardRegion *region)
+{
+    char *neighbor_start;
+
+    ASSERT(IS_FREE_BLK(neighbor));
+
+    neighbor_start = (char*)neighbor;
+
+    if (region->ptr >= neighbor_start) {
+        char *region_start_page;
+
+        region_start_page = region->ptr - SYS_PAGE_SIZE;
+        region_start_page = (char*)((UWord)region_start_page & ~SYS_PAGE_SZ_MASK);
+
+        /* Expand if our first page begins within the previous free block's
+         * unused data. */
+        if (region_start_page >= (neighbor_start + allocator->min_block_size)) {
+            region->size += (region->ptr - region_start_page) - FBLK_FTR_SZ;
+            region->ptr = region_start_page;
+        }
+    } else {
+        char *region_end_page;
+        UWord neighbor_size;
+
+        ASSERT(region->ptr <= neighbor_start);
+
+        region_end_page = region->ptr + region->size + SYS_PAGE_SIZE;
+        region_end_page = (char*)((UWord)region_end_page & ~SYS_PAGE_SZ_MASK);
+
+        neighbor_size = BLK_SZ(neighbor) - FBLK_FTR_SZ;
+
+        /* Expand if our last page ends anywhere within the next free block,
+         * sans the footer we'll inherit. */
+        if (region_end_page < neighbor_start + neighbor_size) {
+            region->size += region_end_page - (region->ptr + region->size);
+        }
+    }
+}
+
+static void ERTS_INLINE
+mem_discard_finish(Allctr_t *allocator, Block_t *block,
+                   ErtsMemDiscardRegion *region)
+{
+#ifdef DEBUG
+    char *block_start, *block_end;
+    UWord block_size;
+
+    block_size = BLK_SZ(block);
+
+    /* Ensure that the region is completely covered by the legal area of the
+     * free block. This must hold even when the region is too small to be
+     * discarded. */
+    if (region->size > 0) {
+        ASSERT(block_size > allocator->min_block_size + FBLK_FTR_SZ);
+
+        block_start = (char*)block + allocator->min_block_size;
+        block_end = (char*)block + block_size - FBLK_FTR_SZ;
+
+        ASSERT(region->size == 0 ||
+            (region->ptr + region->size <= block_end &&
+             region->ptr >= block_start &&
+             region->size <= block_size));
+    }
+#else
+    (void)allocator;
+    (void)block;
+#endif
+
+    if (region->size > SYS_PAGE_SIZE) {
+        UWord align_offset, size;
+        char *ptr;
+
+        align_offset = SYS_PAGE_SIZE - ((UWord)region->ptr & SYS_PAGE_SZ_MASK);
+
+        size = (region->size - align_offset) & ~SYS_PAGE_SZ_MASK;
+        ptr = region->ptr + align_offset;
+
+        if (size > 0) {
+            ASSERT(!((UWord)ptr & SYS_PAGE_SZ_MASK));
+            ASSERT(!(size & SYS_PAGE_SZ_MASK));
+
+            erts_mem_discard(ptr, size);
+        }
+    }
+}
+
 static void
-mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp)
+carrier_mem_discard_free_blocks(Allctr_t *allocator, Carrier_t *carrier)
 {
+    static const int MAX_BLOCKS_TO_DISCARD = 100;
+    Block_t *block;
+    int i;
+
+    block = allocator->first_fblk_in_mbc(allocator, carrier);
+    i = 0;
+
+    while (block != NULL && i < MAX_BLOCKS_TO_DISCARD) {
+        ErtsMemDiscardRegion region;
+
+        ASSERT(IS_FREE_BLK(block));
+
+        mem_discard_start(allocator, block, &region);
+        mem_discard_finish(allocator, block, &region);
+
+        block = allocator->next_fblk_in_mbc(allocator, carrier, block);
+        i++;
+    }
+}
+
+static void
+mbc_free(Allctr_t *allctr, ErtsAlcType_t type, void *p, Carrier_t **busy_pcrr_pp)
+{
+    ErtsMemDiscardRegion discard_region = {0};
+    int discard;
     Uint is_first_blk;
     Uint is_last_blk;
     Uint blk_sz;
@@ -2516,12 +2713,28 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp)
     ASSERT(IS_MBC_BLK(blk));
     ASSERT(blk_sz >= allctr->min_block_size);
 
+#ifndef DEBUG
+    /* We want to mark freed blocks as reclaimable to the OS, but it's a fairly
+     * expensive operation which doesn't do much good if we use it again soon
+     * after, so we limit it to deallocations on pooled carriers. */
+    discard = busy_pcrr_pp && *busy_pcrr_pp;
+#else
+    /* Always discard in debug mode, regardless of whether we're in the pool or
+     * not. */
+    discard = 1;
+#endif
+
+    if (discard) {
+        mem_discard_start(allctr, blk, &discard_region);
+    }
+
     HARD_CHECK_BLK_CARRIER(allctr, blk);
 
     crr = ABLK_TO_MBC(blk);
 
     ERTS_ALC_CPOOL_FREE_OP(allctr);
-    STAT_MBC_BLK_FREE(allctr, crr, busy_pcrr_pp, blk_sz, alcu_flgs);
+
+    STAT_MBC_BLK_FREE(allctr, type, crr, busy_pcrr_pp, blk_sz, alcu_flgs);
 
     is_first_blk = IS_MBC_FIRST_ABLK(allctr, blk);
     is_last_blk = IS_LAST_BLK(blk);
@@ -2532,6 +2745,10 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp)
 	blk = PREV_BLK(blk);
 	(*allctr->unlink_free_block)(allctr, blk);
 
+        if (discard) {
+            mem_discard_coalesce(allctr, blk, &discard_region);
+        }
+
 	blk_sz += MBC_FBLK_SZ(blk);
 	is_first_blk = IS_MBC_FIRST_FBLK(allctr, blk);
 	SET_MBC_FBLK_SZ(blk, blk_sz);
@@ -2547,6 +2764,11 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp)
 	if (IS_FREE_BLK(nxt_blk)) {
 	    /* Coalesce with next block... */
 	    (*allctr->unlink_free_block)(allctr, nxt_blk);
+
+            if (discard) {
+                mem_discard_coalesce(allctr, nxt_blk, &discard_region);
+            }
+
 	    blk_sz += MBC_FBLK_SZ(nxt_blk);
 	    SET_MBC_FBLK_SZ(blk, blk_sz);
 
@@ -2582,16 +2804,22 @@ mbc_free(Allctr_t *allctr, void *p, Carrier_t **busy_pcrr_pp)
     else {
 	(*allctr->link_free_block)(allctr, blk);
 	HARD_CHECK_BLK_CARRIER(allctr, blk);
-        if (busy_pcrr_pp && *busy_pcrr_pp)
+
+        if (discard) {
+            mem_discard_finish(allctr, blk, &discard_region);
+        }
+
+        if (busy_pcrr_pp && *busy_pcrr_pp) {
             update_pooled_tree(allctr, crr, blk_sz);
-        else
+        } else {
             check_abandon_carrier(allctr, blk, busy_pcrr_pp);
+        }
     }
 }
 
 static void *
-mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
-	    Carrier_t **busy_pcrr_pp)
+mbc_realloc(Allctr_t *allctr, ErtsAlcType_t type, void *p, Uint size,
+            Uint32 alcu_flgs, Carrier_t **busy_pcrr_pp)
 {
     void *new_p;
     Uint old_blk_sz;
@@ -2629,7 +2857,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
         new_blk = UMEM2BLK(new_p);
         ASSERT(!(IS_MBC_BLK(new_blk) && ABLK_TO_MBC(new_blk) == *busy_pcrr_pp));
         sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ));
-        mbc_free(allctr, p, busy_pcrr_pp);
+        mbc_free(allctr, type, p, busy_pcrr_pp);
         return new_p;
     }
 
@@ -2706,7 +2934,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 	crr = ABLK_TO_MBC(blk);
 
 	ERTS_ALC_CPOOL_REALLOC_OP(allctr);
-	STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs);
+	STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs);
 	STAT_MBC_BLK_ALLOC(allctr, crr, blk_sz, alcu_flgs);
 
 	ASSERT(MBC_BLK_SZ(blk) >= allctr->min_block_size);
@@ -2810,7 +3038,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 	    }
 
 	    ERTS_ALC_CPOOL_REALLOC_OP(allctr);
-	    STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs);
+	    STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs);
 	    STAT_MBC_BLK_ALLOC(allctr, crr, blk_sz, alcu_flgs);
 
 	    ASSERT(IS_ALLOCED_BLK(blk));
@@ -2871,7 +3099,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 	if (!new_p)
 	    return NULL;
 	sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ));
-	mbc_free(allctr, p, busy_pcrr_pp);
+	mbc_free(allctr, type, p, busy_pcrr_pp);
 
 	return new_p;
 
@@ -2901,7 +3129,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 			       1);
 	    new_p = BLK2UMEM(new_blk);
 	    sys_memcpy(new_p, p, MIN(size, old_blk_sz - ABLK_HDR_SZ));
-	    mbc_free(allctr, p, NULL);
+	    mbc_free(allctr, type, p, NULL);
 	    return new_p;
 	}
 	else {
@@ -2958,7 +3186,7 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 			       0);
 
 	    ERTS_ALC_CPOOL_FREE_OP(allctr);
-	    STAT_MBC_BLK_FREE(allctr, crr, NULL, old_blk_sz, alcu_flgs);
+	    STAT_MBC_BLK_FREE(allctr, type, crr, NULL, old_blk_sz, alcu_flgs);
 
 	    return new_p;
 	}
@@ -2969,7 +3197,6 @@ mbc_realloc(Allctr_t *allctr, void *p, Uint size, Uint32 alcu_flgs,
 
 #define ERTS_ALC_MAX_DEALLOC_CARRIER		10
 #define ERTS_ALC_CPOOL_MAX_FETCH_INSPECT	100
-#define ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT	100
 #define ERTS_ALC_CPOOL_MAX_FAILED_STAT_READS	3
 
 #define ERTS_ALC_CPOOL_PTR_MOD_MRK		(((erts_aint_t) 1) << 0)
@@ -2996,14 +3223,11 @@ typedef union {
 #  error "Carrier pool implementation assumes ERTS_ALC_A_MIN > ERTS_ALC_A_INVALID"
 #endif
 
-/*
- * The pool is only allowed to be manipulated by managed
- * threads except in the alloc_SUITE:cpool case. In this
- * test case carrier_pool[ERTS_ALC_A_INVALID] will be
- * used.
- */
+/* The pools are only allowed to be manipulated by managed threads except in
+ * the alloc_SUITE:cpool test, where only test_carrier_pool is used. */
 
-static ErtsAlcCrrPool_t carrier_pool[ERTS_ALC_A_MAX+1] erts_align_attribute(ERTS_CACHE_LINE_SIZE);
+static ErtsAlcCrrPool_t firstfit_carrier_pool;
+static ErtsAlcCrrPool_t test_carrier_pool;
 
 #define ERTS_ALC_CPOOL_MAX_BACKOFF (1 << 8)
 
@@ -3024,12 +3248,12 @@ backoff(int n)
 static int
 cpool_dbg_is_in_pool(Allctr_t *allctr, Carrier_t *crr)
 {
-    ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+    ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel;
     ErtsAlcCPoolData_t *cpdp = sentinel;
     Carrier_t *tmp_crr;
 
     while (1) {
-	cpdp = (ErtsAlcCPoolData_t *) (erts_atomic_read_ddrb(&cpdp->next) & ~FLG_MASK);
+	cpdp = (ErtsAlcCPoolData_t *) (erts_atomic_read_ddrb(&cpdp->next) & ~CRR_FLG_MASK);
 	if (cpdp == sentinel)
 	    return 0;
 	tmp_crr = (Carrier_t *) (((char *) cpdp) - offsetof(Carrier_t, cpool));
@@ -3041,7 +3265,7 @@ cpool_dbg_is_in_pool(Allctr_t *allctr, Carrier_t *crr)
 static int
 cpool_is_empty(Allctr_t *allctr)
 {
-    ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+    ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel;
     return ((erts_atomic_read_rb(&sentinel->next) == (erts_aint_t) sentinel)
 	    && (erts_atomic_read_rb(&sentinel->prev) == (erts_aint_t) sentinel));
 }
@@ -3131,16 +3355,31 @@ cpool_insert(Allctr_t *allctr, Carrier_t *crr)
 {
     ErtsAlcCPoolData_t *cpd1p, *cpd2p;
     erts_aint_t val;
-    ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+    ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel;
     Allctr_t *orig_allctr = crr->cpool.orig_allctr;
 
-    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */
+    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */
 			  || erts_thr_progress_is_managed_thread());
 
-    erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size,
-			(erts_aint_t) crr->cpool.blocks_size);
-    erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks,
-			(erts_aint_t) crr->cpool.blocks);
+    {
+        int alloc_no = allctr->alloc_no;
+
+        ERTS_ALC_CPOOL_ASSERT(
+            erts_atomic_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no]) >= 0 &&
+            crr->cpool.blocks_size[alloc_no] >= 0);
+
+        ERTS_ALC_CPOOL_ASSERT(
+            erts_atomic_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]) >= 0 &&
+            crr->cpool.blocks[alloc_no] >= 0);
+
+        /* We only modify the counter for our current type since the others are
+         * conceptually still in the pool. */
+        erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no],
+                            ((erts_aint_t) crr->cpool.blocks_size[alloc_no]));
+        erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no],
+                            ((erts_aint_t) crr->cpool.blocks[alloc_no]));
+    }
+
     erts_atomic_add_nob(&orig_allctr->cpool.stat.carriers_size,
 			(erts_aint_t) CARRIER_SZ(crr));
     erts_atomic_inc_nob(&orig_allctr->cpool.stat.no_carriers);
@@ -3213,10 +3452,10 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr)
     ErtsAlcCPoolData_t *cpd1p, *cpd2p;
     erts_aint_t val;
 #ifdef ERTS_ALC_CPOOL_DEBUG
-    ErtsAlcCPoolData_t *sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+    ErtsAlcCPoolData_t *sentinel = allctr->cpool.sentinel;
 #endif
 
-    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */
+    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */
 			  || erts_thr_progress_is_managed_thread());
     ERTS_ALC_CPOOL_ASSERT(sentinel != &crr->cpool);
 
@@ -3292,28 +3531,43 @@ cpool_delete(Allctr_t *allctr, Allctr_t *prev_allctr, Carrier_t *crr)
 
     crr->cpool.thr_prgr = erts_thr_progress_later(NULL);
 
-    erts_atomic_add_nob(&prev_allctr->cpool.stat.blocks_size,
-			-((erts_aint_t) crr->cpool.blocks_size));
-    erts_atomic_add_nob(&prev_allctr->cpool.stat.no_blocks,
-			-((erts_aint_t) crr->cpool.blocks));
-    erts_atomic_add_nob(&prev_allctr->cpool.stat.carriers_size,
+    {
+        Allctr_t *orig_allctr = crr->cpool.orig_allctr;
+        int alloc_no = allctr->alloc_no;
+
+        ERTS_ALC_CPOOL_ASSERT(orig_allctr == prev_allctr);
+
+        ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks_size[alloc_no] <=
+            erts_atomic_read_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no]));
+
+        ERTS_ALC_CPOOL_ASSERT(crr->cpool.blocks[alloc_no] <=
+            erts_atomic_read_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no]));
+
+        /* We only modify the counters for our current type since the others
+         * were, conceptually, never taken out of the pool. */
+        erts_atomic_add_nob(&orig_allctr->cpool.stat.blocks_size[alloc_no],
+                            -((erts_aint_t) crr->cpool.blocks_size[alloc_no]));
+        erts_atomic_add_nob(&orig_allctr->cpool.stat.no_blocks[alloc_no],
+                            -((erts_aint_t) crr->cpool.blocks[alloc_no]));
+
+        erts_atomic_add_nob(&orig_allctr->cpool.stat.carriers_size,
 			-((erts_aint_t) CARRIER_SZ(crr)));
-    erts_atomic_dec_wb(&prev_allctr->cpool.stat.no_carriers);
+        erts_atomic_dec_wb(&orig_allctr->cpool.stat.no_carriers);
+    }
 
 }
 
 static Carrier_t *
 cpool_fetch(Allctr_t *allctr, UWord size)
 {
-    enum { IGNORANT, HAS_SEEN_SENTINEL, THE_LAST_ONE } loop_state;
-    int i;
+    int i, seen_sentinel;
     Carrier_t *crr;
     Carrier_t *reinsert_crr = NULL;
     ErtsAlcCPoolData_t *cpdp;
     ErtsAlcCPoolData_t *cpool_entrance = NULL;
     ErtsAlcCPoolData_t *sentinel;
 
-    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_INVALID /* testcase */
+    ERTS_ALC_CPOOL_ASSERT(allctr->alloc_no == ERTS_ALC_A_TEST /* testcase */
 			  || erts_thr_progress_is_managed_thread());
 
     i = ERTS_ALC_CPOOL_MAX_FETCH_INSPECT;
@@ -3415,48 +3669,39 @@ cpool_fetch(Allctr_t *allctr, UWord size)
     /*
      * Finally search the shared pool and try employ foreign carriers
      */
-    sentinel = &carrier_pool[allctr->alloc_no].sentinel;
+    sentinel = allctr->cpool.sentinel;
     if (cpool_entrance) {
         /*
          * We saw a pooled carried above, use it as entrance into the pool
 	 */
-	cpdp = cpool_entrance;
     }
     else {
         /*
-         * No pooled carried seen above. Start search at cpool sentinel,
+         * No pooled carrier seen above. Start search at cpool sentinel,
 	 * but begin by passing one element before trying to fetch.
 	 * This in order to avoid contention with threads inserting elements.
 	 */
-	cpool_entrance = sentinel;
-	cpdp = cpool_aint2cpd(cpool_read(&cpool_entrance->prev));
-	if (cpdp == sentinel)
+        cpool_entrance = cpool_aint2cpd(cpool_read(&sentinel->prev));
+	if (cpool_entrance == sentinel)
 	    goto check_dc_list;
     }
 
-    loop_state = IGNORANT;
+    cpdp = cpool_entrance;
+    seen_sentinel = 0;
     do {
 	erts_aint_t exp;
 	cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
-	if (cpdp == cpool_entrance) {
-	    if (cpool_entrance == sentinel) {
-		cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
-		if (cpdp == sentinel)
-		    break;
-	    }
-            loop_state = THE_LAST_ONE;
-	}
-	else if (cpdp == sentinel) {
-	    if (loop_state == HAS_SEEN_SENTINEL) {
+        if (cpdp == sentinel) {
+	    if (seen_sentinel) {
 		/* We been here before. cpool_entrance must have been removed */
                 INC_CC(allctr->cpool.stat.entrance_removed);
 		break;
 	    }
-	    cpdp = cpool_aint2cpd(cpool_read(&cpdp->prev));
-	    if (cpdp == sentinel)
-                break;
-            loop_state = HAS_SEEN_SENTINEL;
+            seen_sentinel = 1;
+            continue;
 	}
+        ASSERT(cpdp != cpool_entrance || seen_sentinel);
+
 	crr = ErtsContainerStruct(cpdp, Carrier_t, cpool);
 	exp = erts_atomic_read_rb(&crr->allctr);
 
@@ -3489,7 +3734,7 @@ cpool_fetch(Allctr_t *allctr, UWord size)
             INC_CC(allctr->cpool.stat.fail_shared);
 	    return NULL;
         }
-    }while (loop_state != THE_LAST_ONE);
+    }while (cpdp != cpool_entrance);
 
 check_dc_list:
     /* Last; check our own pending dealloc carrier list... */
@@ -3668,8 +3913,9 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr)
     crr->cpool.orig_allctr = allctr;
     crr->cpool.thr_prgr = ERTS_THR_PRGR_INVALID;
     erts_atomic_init_nob(&crr->cpool.max_size, 0);
-    crr->cpool.blocks = 0;
-    crr->cpool.blocks_size = 0;
+    sys_memset(&crr->cpool.blocks_size, 0, sizeof(crr->cpool.blocks_size));
+    sys_memset(&crr->cpool.blocks, 0, sizeof(crr->cpool.blocks));
+    crr->cpool.total_blocks_size = 0;
     if (!ERTS_ALC_IS_CPOOL_ENABLED(allctr))
 	crr->cpool.abandon_limit = 0;
     else {
@@ -3684,14 +3930,14 @@ cpool_init_carrier_data(Allctr_t *allctr, Carrier_t *crr)
     crr->cpool.state = ERTS_MBC_IS_HOME;
 }
 
-static void
-set_new_allctr_abandon_limit(Allctr_t *allctr)
+
+
+static UWord
+allctr_abandon_limit(Allctr_t *allctr)
 {
     UWord limit;
     UWord csz;
 
-    allctr->cpool.check_limit_count = ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT;
-
     csz = allctr->mbcs.curr.norm.mseg.size;
     csz += allctr->mbcs.curr.norm.sys_alloc.size;
 
@@ -3701,7 +3947,13 @@ set_new_allctr_abandon_limit(Allctr_t *allctr)
     else
 	limit = (csz/100)*allctr->cpool.util_limit;
 
-    allctr->cpool.abandon_limit = limit;
+    return limit;
+}
+
+static void ERTS_INLINE
+set_new_allctr_abandon_limit(Allctr_t *allctr)
+{
+    allctr->cpool.abandon_limit = allctr_abandon_limit(allctr);
 }
 
 static void
@@ -3713,7 +3965,9 @@ abandon_carrier(Allctr_t *allctr, Carrier_t *crr)
 
     unlink_carrier(&allctr->mbc_list, crr);
     allctr->remove_mbc(allctr, crr);
-    set_new_allctr_abandon_limit(allctr);
+
+    /* Mark our free blocks as unused and reclaimable to the OS. */
+    carrier_mem_discard_free_blocks(allctr, crr);
 
     cpool_insert(allctr, crr);
 
@@ -3766,7 +4020,8 @@ poolify_my_carrier(Allctr_t *allctr, Carrier_t *crr)
 }
 
 static void
-cpool_read_stat(Allctr_t *allctr, UWord *nocp, UWord *cszp, UWord *nobp, UWord *bszp)
+cpool_read_stat(Allctr_t *allctr, int alloc_no,
+                UWord *nocp, UWord *cszp, UWord *nobp, UWord *bszp)
 {
     int i;
     UWord noc = 0, csz = 0, nob = 0, bsz = 0;
@@ -3786,10 +4041,10 @@ cpool_read_stat(Allctr_t *allctr, UWord *nocp, UWord *cszp, UWord *nobp, UWord *
 			? erts_atomic_read_nob(&allctr->cpool.stat.carriers_size)
 			: 0);
 	tnob = (UWord) (nobp
-			? erts_atomic_read_nob(&allctr->cpool.stat.no_blocks)
+			? erts_atomic_read_nob(&allctr->cpool.stat.no_blocks[alloc_no])
 			: 0);
 	tbsz = (UWord) (bszp
-			? erts_atomic_read_nob(&allctr->cpool.stat.blocks_size)
+			? erts_atomic_read_nob(&allctr->cpool.stat.blocks_size[alloc_no])
 			: 0);
 	if (tnoc == noc && tcsz == csz && tnob == nob && tbsz == bsz)
 	    break;
@@ -4044,6 +4299,7 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags)
 #if HAVE_ERTS_MSEG
     mbc_final_touch:
 #endif
+        set_new_allctr_abandon_limit(allctr);
 
 	blk = MBC_TO_FIRST_BLK(allctr, crr);
 
@@ -4262,7 +4518,6 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp)
     else {
 	ASSERT(IS_MBC_FIRST_FBLK(allctr, blk));
 	crr = FIRST_BLK_TO_MBC(allctr, blk);
-	crr_sz = CARRIER_SZ(crr);
 
 #ifdef DEBUG
 	if (!allctr->stopped) {
@@ -4294,15 +4549,7 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp)
 	else
 	{
 	    unlink_carrier(&allctr->mbc_list, crr);
-#if HAVE_ERTS_MSEG
-	    if (IS_MSEG_CARRIER(crr)) {
-		ASSERT(crr_sz % ERTS_SACRR_UNIT_SZ == 0);
-		STAT_MSEG_MBC_FREE(allctr, crr_sz);
-	    }
-	    else
-#endif
-		STAT_SYS_ALLOC_MBC_FREE(allctr, crr_sz);
-
+            STAT_MBC_FREE(allctr, crr);
             if (allctr->remove_mbc)
                 allctr->remove_mbc(allctr, crr);
 	}
@@ -4316,7 +4563,7 @@ destroy_carrier(Allctr_t *allctr, Block_t *blk, Carrier_t **busy_pcrr_pp)
             LTTNG5(carrier_destroy,
                 ERTS_ALC_A2AD(allctr->alloc_no),
                 allctr->ix,
-                crr_sz,
+                CARRIER_SZ(crr),
                 mbc_stats,
                 sbc_stats);
         }
@@ -4394,6 +4641,8 @@ static struct {
     Eterm blocks_size;
     Eterm blocks;
 
+    Eterm foreign_blocks;
+
     Eterm calls;
     Eterm sys_alloc;
     Eterm sys_free;
@@ -4494,6 +4743,7 @@ init_atoms(Allctr_t *allctr)
 	AM_INIT(carriers);
 	AM_INIT(blocks_size);
 	AM_INIT(blocks);
+	AM_INIT(foreign_blocks);
 
 	AM_INIT(calls);
 	AM_INIT(sys_alloc);
@@ -4629,7 +4879,6 @@ sz_info_fix(Allctr_t *allctr,
 		ErtsAlcFixList_t *fix = &allctr->fix[ix];
 		UWord alloced = fix->type_size * fix->u.cpool.allocated;
 		UWord used = fix->type_size * fix->u.cpool.used;
-                ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix;
 
 		if (print_to_p) {
 		    fmtfn_t to = *print_to_p;
@@ -4637,14 +4886,14 @@ sz_info_fix(Allctr_t *allctr,
 		    erts_print(to,
 			       arg,
 			       "fix type internal: %s %bpu %bpu\n",
-			       (char *) ERTS_ALC_N2TD(n),
+			       (char *) ERTS_ALC_T2TD(fix->type),
 			       alloced,
 			       used);
 		}
 
 		if (hpp || szp) {
 		    add_3tup(hpp, szp, &res,
-			     alloc_type_atoms[n],
+			     alloc_type_atoms[ERTS_ALC_T2N(fix->type)],
 			     bld_unstable_uint(hpp, szp, alloced),
 			     bld_unstable_uint(hpp, szp, used));
 		}
@@ -4657,7 +4906,6 @@ sz_info_fix(Allctr_t *allctr,
 	    ErtsAlcFixList_t *fix = &allctr->fix[ix];
 	    UWord alloced = fix->type_size * fix->u.nocpool.allocated;
 	    UWord used = fix->type_size*fix->u.nocpool.used;
-            ErtsAlcType_t n = ERTS_ALC_N_MIN_A_FIXED_SIZE + ix;
 
 	    if (print_to_p) {
 		fmtfn_t to = *print_to_p;
@@ -4665,14 +4913,14 @@ sz_info_fix(Allctr_t *allctr,
 		erts_print(to,
 			   arg,
 			   "fix type: %s %bpu %bpu\n",
-			   (char *) ERTS_ALC_N2TD(n),
+			   (char *) ERTS_ALC_T2TD(fix->type),
 			   alloced,
 			   used);
 	    }
 
 	    if (hpp || szp) {
 		add_3tup(hpp, szp, &res,
-			 alloc_type_atoms[n],
+			 alloc_type_atoms[ERTS_ALC_T2N(fix->type)],
 			 bld_unstable_uint(hpp, szp, alloced),
 			 bld_unstable_uint(hpp, szp, used));
 	    }
@@ -4745,9 +4993,9 @@ info_cpool(Allctr_t *allctr,
     noc = csz = nob = bsz = ~0;
     if (print_to_p || hpp) {
 	if (sz_only)
-	    cpool_read_stat(allctr, NULL, &csz, NULL, &bsz);
+	    cpool_read_stat(allctr, allctr->alloc_no, NULL, &csz, NULL, &bsz);
 	else
-	    cpool_read_stat(allctr, &noc, &csz, &nob, &bsz);
+	    cpool_read_stat(allctr, allctr->alloc_no, &noc, &csz, &nob, &bsz);
     }
 
     if (print_to_p) {
@@ -4762,6 +5010,10 @@ info_cpool(Allctr_t *allctr,
     }
 
     if (hpp || szp) {
+        Eterm foreign_blocks;
+        int i;
+
+        foreign_blocks = NIL;
 	res = NIL;
 
       if (!sz_only) {
@@ -4808,22 +5060,61 @@ info_cpool(Allctr_t *allctr,
         add_3tup(hpp, szp, &res, am.entrance_removed,
                  bld_unstable_uint(hpp, szp, ERTS_ALC_CC_GIGA_VAL(allctr->cpool.stat.entrance_removed)),
                  bld_unstable_uint(hpp, szp, ERTS_ALC_CC_VAL(allctr->cpool.stat.entrance_removed)));
+       }
 
 	add_2tup(hpp, szp, &res,
 		 am.carriers_size,
 		 bld_unstable_uint(hpp, szp, csz));
-      }
-	if (!sz_only)
-	    add_2tup(hpp, szp, &res,
-		     am.carriers,
-		     bld_unstable_uint(hpp, szp, noc));
+
+        if (!sz_only) {
+            add_2tup(hpp, szp, &res,
+                     am.carriers,
+                     bld_unstable_uint(hpp, szp, noc));
+        }
+
 	add_2tup(hpp, szp, &res,
 		 am.blocks_size,
 		 bld_unstable_uint(hpp, szp, bsz));
-	if (!sz_only)
+
+	if (!sz_only) {
 	    add_2tup(hpp, szp, &res,
 		     am.blocks,
 		     bld_unstable_uint(hpp, szp, nob));
+        }
+
+        for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) {
+            const char *name_str;
+            Eterm name, info;
+
+            if (i == allctr->alloc_no) {
+                continue;
+            }
+
+            cpool_read_stat(allctr, i, NULL, NULL, &nob, &bsz);
+
+            if (bsz == 0 && (nob == 0 || sz_only)) {
+                continue;
+            }
+
+            name_str = ERTS_ALC_A2AD(i);
+            info = NIL;
+
+            add_2tup(hpp, szp, &info,
+                     am.blocks_size,
+                     bld_unstable_uint(hpp, szp, bsz));
+
+            if (!sz_only) {
+                add_2tup(hpp, szp, &info,
+                     am.blocks,
+                     bld_unstable_uint(hpp, szp, nob));
+            }
+
+            name = am_atom_put(name_str, sys_strlen(name_str));
+
+            add_2tup(hpp, szp, &foreign_blocks, name, info);
+        }
+
+        add_2tup(hpp, szp, &res, am.foreign_blocks, foreign_blocks);
     }
 
     return res;
@@ -5459,6 +5750,19 @@ erts_alcu_info(Allctr_t *allctr,
     return res;
 }
 
+void
+erts_alcu_foreign_size(Allctr_t *allctr, ErtsAlcType_t alloc_no, AllctrSize_t *size)
+{
+    if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) {
+        UWord csz, bsz;
+        cpool_read_stat(allctr, alloc_no, NULL, &csz, NULL, &bsz);
+        size->carriers = csz;
+        size->blocks = bsz;
+    } else {
+        size->carriers = 0;
+        size->blocks = 0;
+    }
+}
 
 void
 erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t *fi, int fisz)
@@ -5477,7 +5781,7 @@ erts_alcu_current_size(Allctr_t *allctr, AllctrSize_t *size, ErtsAlcUFixInfo_t *
 
     if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) {
 	UWord csz, bsz;
-	cpool_read_stat(allctr, NULL, &csz, NULL, &bsz);
+	cpool_read_stat(allctr, allctr->alloc_no, NULL, &csz, NULL, &bsz);
 	size->blocks += bsz;
 	size->carriers += csz;
     }
@@ -5522,6 +5826,11 @@ do_erts_alcu_alloc(ErtsAlcType_t type, Allctr_t *allctr, Uint size)
 
     ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
+    /* Reject sizes that can't fit into the header word. */
+    if (size > ~BLK_FLG_MASK) {
+        return NULL;
+    }
+
 #if ALLOC_ZERO_EQ_NULL
     if (!size)
 	return NULL;
@@ -5688,12 +5997,11 @@ do_erts_alcu_free(ErtsAlcType_t type, Allctr_t *allctr, void *p,
     ERTS_ALCU_DBG_CHK_THR_ACCESS(allctr);
 
     if (p) {
-
 	INC_CC(allctr->calls.this_free);
 
-	if (allctr->fix) {
+        if (ERTS_ALC_IS_FIX_TYPE(type)) {
 	    if (ERTS_ALC_IS_CPOOL_ENABLED(allctr))
-		fix_cpool_free(allctr, type, p, busy_pcrr_pp, 1);
+		fix_cpool_free(allctr, type, 0, p, busy_pcrr_pp);
 	    else
 		fix_nocpool_free(allctr, type, p);
 	}
@@ -5702,7 +6010,7 @@ do_erts_alcu_free(ErtsAlcType_t type, Allctr_t *allctr, void *p,
 	    if (IS_SBC_BLK(blk))
 		destroy_carrier(allctr, blk, NULL);
 	    else
-		mbc_free(allctr, p, busy_pcrr_pp);
+		mbc_free(allctr, type, p, busy_pcrr_pp);
 	}
     }
 }
@@ -5804,6 +6112,11 @@ do_erts_alcu_realloc(ErtsAlcType_t type,
 	return res;
     }
 
+    /* Reject sizes that can't fit into the header word. */
+    if (size > ~BLK_FLG_MASK) {
+        return NULL;
+    }
+
 #if ALLOC_ZERO_EQ_NULL
     if (!size) {
 	ASSERT(p);
@@ -5820,7 +6133,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type,
 
     if (size < allctr->sbc_threshold) {
 	if (IS_MBC_BLK(blk))
-	    res = mbc_realloc(allctr, p, size, alcu_flgs, busy_pcrr_pp);
+	    res = mbc_realloc(allctr, type, p, size, alcu_flgs, busy_pcrr_pp);
 	else {
 	    Uint used_sz = SBC_HEADER_SIZE + ABLK_HDR_SZ + size;
 	    Uint crr_sz;
@@ -5879,7 +6192,7 @@ do_erts_alcu_realloc(ErtsAlcType_t type,
 		sys_memcpy((void *) res,
 			   (void *) p,
 			   MIN(MBC_ABLK_SZ(blk) - ABLK_HDR_SZ, size));
-		mbc_free(allctr, p, busy_pcrr_pp);
+		mbc_free(allctr, type, p, busy_pcrr_pp);
 	    }
 	    else
 		res = NULL;
@@ -6247,6 +6560,7 @@ int
 erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 {
     /* erts_alcu_start assumes that allctr has been zeroed */
+    int i;
 
     if (((UWord)allctr & ERTS_CRR_ALCTR_FLG_MASK) != 0) {
         erts_exit(ERTS_ABORT_EXIT, "%s:%d:erts_alcu_start: Alignment error\n",
@@ -6270,6 +6584,11 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 
     allctr->ix				= init->ix;
     allctr->alloc_no			= init->alloc_no;
+    allctr->alloc_strat			= init->alloc_strat;
+
+    ASSERT(allctr->alloc_no >= ERTS_ALC_A_MIN &&
+           allctr->alloc_no <= ERTS_ALC_A_MAX);
+
     if (allctr->alloc_no < ERTS_ALC_A_MIN
 	|| ERTS_ALC_A_MAX < allctr->alloc_no)
 	allctr->alloc_no = ERTS_ALC_A_INVALID;
@@ -6322,8 +6641,7 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 						       + sizeof(FreeBlkFtr_t));
     if (init->tpref) {
 	Uint sz = ABLK_HDR_SZ;
-	sz += (init->fix ? 
-	       sizeof(ErtsAllctrFixDDBlock_t) : sizeof(ErtsAllctrDDBlock_t));
+	sz += sizeof(ErtsAllctrDDBlock_t);
 	sz = UNIT_CEILING(sz);
 	if (sz > allctr->min_block_size)
 	    allctr->min_block_size = sz;
@@ -6334,15 +6652,29 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
     allctr->cpool.dc_list.last = NULL;
     allctr->cpool.abandon_limit = 0;
     allctr->cpool.disable_abandon = 0;
-    erts_atomic_init_nob(&allctr->cpool.stat.blocks_size, 0);
-    erts_atomic_init_nob(&allctr->cpool.stat.no_blocks, 0);
+    for (i = ERTS_ALC_A_MIN; i <= ERTS_ALC_A_MAX; i++) {
+        erts_atomic_init_nob(&allctr->cpool.stat.blocks_size[i], 0);
+        erts_atomic_init_nob(&allctr->cpool.stat.no_blocks[i], 0);
+    }
     erts_atomic_init_nob(&allctr->cpool.stat.carriers_size, 0);
     erts_atomic_init_nob(&allctr->cpool.stat.no_carriers, 0);
-    allctr->cpool.check_limit_count = ERTS_ALC_CPOOL_CHECK_LIMIT_COUNT;
     if (!init->ts && init->acul && init->acnl) {
+        ASSERT(allctr->add_mbc);
+        ASSERT(allctr->remove_mbc);
+        ASSERT(allctr->largest_fblk_in_mbc);
+        ASSERT(allctr->first_fblk_in_mbc);
+        ASSERT(allctr->next_fblk_in_mbc);
+
         allctr->cpool.util_limit = init->acul;
         allctr->cpool.in_pool_limit = init->acnl;
         allctr->cpool.fblk_min_limit = init->acfml;
+
+        if (allctr->alloc_strat == ERTS_ALC_S_FIRSTFIT) {
+            allctr->cpool.sentinel = &firstfit_carrier_pool.sentinel;
+        }
+        else if (allctr->alloc_no != ERTS_ALC_A_TEST) {
+            ERTS_INTERNAL_ERROR("Impossible carrier migration config.");
+        }
     }
     else {
         allctr->cpool.util_limit = 0;
@@ -6350,6 +6682,12 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
         allctr->cpool.fblk_min_limit = 0;
     }
 
+    /* The invasive tests don't really care whether the pool is enabled or not,
+     * so we need to set this unconditionally for this allocator type. */
+    if (allctr->alloc_no == ERTS_ALC_A_TEST) {
+        allctr->cpool.sentinel = &test_carrier_pool.sentinel;
+    }
+
     allctr->sbc_threshold = adjust_sbct(allctr, init->sbct);
 
 #if HAVE_ERTS_MSEG
@@ -6461,9 +6799,9 @@ erts_alcu_start(Allctr_t *allctr, AllctrInit_t *init)
 	allctr->fix_shrink_scheduled = 0;
 	for (i = 0; i < ERTS_ALC_NO_FIXED_SIZES; i++) {
 	    allctr->fix[i].type_size = init->fix_type_size[i];
+	    allctr->fix[i].type = ERTS_ALC_N2T(i + ERTS_ALC_N_MIN_A_FIXED_SIZE);
 	    allctr->fix[i].list_size = 0;
 	    allctr->fix[i].list = NULL;
-	    ASSERT(allctr->fix[i].type_size >= sizeof(ErtsAllctrFixDDBlock_t));
 	    if (ERTS_ALC_IS_CPOOL_ENABLED(allctr)) {
 		allctr->fix[i].u.cpool.min_list_size = 0;
 		allctr->fix[i].u.cpool.shrink_list = 0;
@@ -6512,12 +6850,16 @@ erts_alcu_stop(Allctr_t *allctr)
 void
 erts_alcu_init(AlcUInit_t *init)
 {
-    int i;
-    for (i = 0; i <= ERTS_ALC_A_MAX; i++) {
-	ErtsAlcCPoolData_t *sentinel = &carrier_pool[i].sentinel;
-	erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel);
-	erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel);
-    }
+    ErtsAlcCPoolData_t *sentinel;
+
+    sentinel = &firstfit_carrier_pool.sentinel;
+    erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel);
+    erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel);
+
+    sentinel = &test_carrier_pool.sentinel;
+    erts_atomic_init_nob(&sentinel->next, (erts_aint_t) sentinel);
+    erts_atomic_init_nob(&sentinel->prev, (erts_aint_t) sentinel);
+
     ERTS_CT_ASSERT(SBC_BLK_SZ_MASK == MBC_FBLK_SZ_MASK); /* see BLK_SZ */
 #if HAVE_ERTS_MSEG
     ASSERT(erts_mseg_unit_size() == ERTS_SACRR_UNIT_SZ);
@@ -6528,6 +6870,8 @@ erts_alcu_init(AlcUInit_t *init)
 #endif
     allow_sys_alloc_carriers = init->sac;
 
+    sys_page_size = erts_sys_get_page_size();
+
 #ifdef DEBUG
     carrier_alignment = sizeof(Unit_t);
 #endif
@@ -6699,7 +7043,7 @@ static int blockscan_cpool_yielding(blockscan_t *state)
 {
     ErtsAlcCPoolData_t *sentinel, *cursor;
 
-    sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel;
+    sentinel = (state->allocator)->cpool.sentinel;
     cursor = blockscan_restore_cpool_cursor(state);
 
     if (ERTS_PROC_IS_EXITING(state->process)) {
@@ -6831,11 +7175,8 @@ static int blockscan_sweep_mbcs(blockscan_t *state)
 static int blockscan_sweep_cpool(blockscan_t *state)
 {
     if (state->current_op != blockscan_sweep_cpool) {
-        ErtsAlcCPoolData_t *sentinel;
-
         SET_CARRIER_HDR(&state->dummy_carrier, 0, SCH_MBC, state->allocator);
-        sentinel = &carrier_pool[(state->allocator)->alloc_no].sentinel;
-        state->cpool_cursor = sentinel;
+        state->cpool_cursor = (state->allocator)->cpool.sentinel;
     }
 
     state->current_op = blockscan_sweep_cpool;
@@ -7119,11 +7460,14 @@ static int gather_ahist_scan(Allctr_t *allocator,
         alcu_atag_t tag;
 
         block = SBC2BLK(allocator, carrier);
-        tag = GET_BLK_ATAG(block);
 
-        ASSERT(DBG_IS_VALID_ATAG(allocator, tag));
+        if (BLK_HAS_ATAG(block)) {
+            tag = GET_BLK_ATAG(block);
+
+            ASSERT(DBG_IS_VALID_ATAG(tag));
 
-        gather_ahist_update(state, tag, SBC_BLK_SZ(block));
+            gather_ahist_update(state, tag, SBC_BLK_SZ(block));
+        }
     } else {
         UWord scanned_bytes = MBC_HEADER_SIZE(allocator);
 
@@ -7134,10 +7478,10 @@ static int gather_ahist_scan(Allctr_t *allocator,
         while (1) {
             UWord block_size = MBC_BLK_SZ(block);
 
-            if (IS_ALLOCED_BLK(block)) {
+            if (IS_ALLOCED_BLK(block) && BLK_HAS_ATAG(block)) {
                 alcu_atag_t tag = GET_BLK_ATAG(block);
 
-                ASSERT(DBG_IS_VALID_ATAG(allocator, tag));
+                ASSERT(DBG_IS_VALID_ATAG(tag));
 
                 gather_ahist_update(state, tag, block_size);
             }
@@ -7297,8 +7641,6 @@ int erts_alcu_gather_alloc_histograms(Process *p, int allocator_num,
                                           sched_id,
                                           &allocator)) {
         return 0;
-    } else if (!allocator->atags) {
-        return 0;
     }
 
     ensure_atoms_initialized(allocator);
diff --git a/erts/emulator/beam/erl_alloc_util.h b/erts/emulator/beam/erl_alloc_util.h
index f26ace1534..ea1afe8f58 100644
--- a/erts/emulator/beam/erl_alloc_util.h
+++ b/erts/emulator/beam/erl_alloc_util.h
@@ -24,6 +24,7 @@
 #define ERTS_ALCU_VSN_STR "3.0"
 
 #include "erl_alloc_types.h"
+#include "erl_alloc.h"
 #define ERL_THREADS_EMU_INTERNAL__
 #include "erl_threads.h"
 
@@ -44,6 +45,7 @@ typedef struct {
 typedef struct {
     char *name_prefix;
     ErtsAlcType_t alloc_no;
+    ErtsAlcStrat_t alloc_strat;
     int force;
     int ix;
     int ts;
@@ -101,6 +103,7 @@ typedef struct {
 #define ERTS_DEFAULT_ALLCTR_INIT {                                         \
     NULL,                                                                  \
     ERTS_ALC_A_INVALID,	/* (number) alloc_no: allocator number           */\
+    ERTS_ALC_S_INVALID,	/* (number) alloc_strat: allocator strategy      */\
     0,			/* (bool)   force:  force enabled                */\
     0,			/* (number) ix: instance index                   */\
     1,			/* (bool)   ts:     thread safe                  */\
@@ -138,6 +141,7 @@ typedef struct {
 #define ERTS_DEFAULT_ALLCTR_INIT {                                         \
     NULL,                                                                  \
     ERTS_ALC_A_INVALID,	/* (number) alloc_no: allocator number           */\
+    ERTS_ALC_S_INVALID,	/* (number) alloc_strat: allocator strategy      */\
     0,			/* (bool)   force:  force enabled                */\
     0,			/* (number) ix: instance index                   */\
     1,			/* (bool)   ts:     thread safe                  */\
@@ -188,6 +192,7 @@ Eterm	erts_alcu_info(Allctr_t *, int, int, fmtfn_t *, void *, Uint **, Uint *);
 void	erts_alcu_init(AlcUInit_t *);
 void    erts_alcu_current_size(Allctr_t *, AllctrSize_t *,
 			       ErtsAlcUFixInfo_t *, int);
+void    erts_alcu_foreign_size(Allctr_t *, ErtsAlcType_t, AllctrSize_t *);
 void    erts_alcu_check_delayed_dealloc(Allctr_t *, int, int *, ErtsThrPrgrVal *, int *);
 erts_aint32_t erts_alcu_fix_alloc_shrink(Allctr_t *, erts_aint32_t);
 
@@ -286,10 +291,18 @@ void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp);
 #define UNIT_FLOOR(X)	((X) & UNIT_MASK)
 #define UNIT_CEILING(X)	UNIT_FLOOR((X) + INV_UNIT_MASK)
 
-#define FLG_MASK		INV_UNIT_MASK
-#define SBC_BLK_SZ_MASK         UNIT_MASK
-#define MBC_FBLK_SZ_MASK        UNIT_MASK
-#define CARRIER_SZ_MASK         UNIT_MASK
+/* We store flags in the bits that no one will ever use. Generally these are
+ * the bits below the alignment size, but for blocks we also steal the highest
+ * bit since the header's a size and no one can expect to be able to allocate
+ * objects that large. */
+#define HIGHEST_WORD_BIT        (((UWord) 1) << (sizeof(UWord) * CHAR_BIT - 1))
+
+#define BLK_FLG_MASK            (INV_UNIT_MASK | HIGHEST_WORD_BIT)
+#define SBC_BLK_SZ_MASK         (~BLK_FLG_MASK)
+#define MBC_FBLK_SZ_MASK        (~BLK_FLG_MASK)
+
+#define CRR_FLG_MASK        INV_UNIT_MASK
+#define CRR_SZ_MASK         UNIT_MASK
 
 #if ERTS_HAVE_MSEG_SUPER_ALIGNED \
     || (!HAVE_ERTS_MSEG && ERTS_HAVE_ERTS_SYS_ALIGNED_ALLOC)
@@ -299,9 +312,9 @@ void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp);
 #    define ERTS_SUPER_ALIGN_BITS 18
 #  endif
 #  ifdef ARCH_64 
-#    define MBC_ABLK_OFFSET_BITS   24
+#    define MBC_ABLK_OFFSET_BITS   23
 #  else
-#    define MBC_ABLK_OFFSET_BITS   9
+#    define MBC_ABLK_OFFSET_BITS   8
      /* Affects hard limits for sbct and lmbcs documented in erts_alloc.xml */
 #  endif
 #  define ERTS_SACRR_UNIT_SHIFT		ERTS_SUPER_ALIGN_BITS
@@ -322,18 +335,17 @@ void erts_alcu_sched_spec_data_init(struct ErtsSchedulerData_ *esdp);
 
 #if MBC_ABLK_OFFSET_BITS
 #  define MBC_ABLK_OFFSET_SHIFT  (sizeof(UWord)*8 - MBC_ABLK_OFFSET_BITS)
-#  define MBC_ABLK_OFFSET_MASK   (~((UWord)0) << MBC_ABLK_OFFSET_SHIFT)
-#  define MBC_ABLK_SZ_MASK	(~MBC_ABLK_OFFSET_MASK & ~FLG_MASK)
+#  define MBC_ABLK_OFFSET_MASK   ((~((UWord)0) << MBC_ABLK_OFFSET_SHIFT) & ~BLK_FLG_MASK)
+#  define MBC_ABLK_SZ_MASK	(~MBC_ABLK_OFFSET_MASK & ~BLK_FLG_MASK)
 #else
-#  define MBC_ABLK_SZ_MASK	(~FLG_MASK)
+#  define MBC_ABLK_SZ_MASK	(~BLK_FLG_MASK)
 #endif
 
 #define MBC_ABLK_SZ(B) (ASSERT(!is_sbc_blk(B)), (B)->bhdr & MBC_ABLK_SZ_MASK)
 #define MBC_FBLK_SZ(B) (ASSERT(!is_sbc_blk(B)), (B)->bhdr & MBC_FBLK_SZ_MASK)
 #define SBC_BLK_SZ(B) (ASSERT(is_sbc_blk(B)), (B)->bhdr & SBC_BLK_SZ_MASK)
 
-#define CARRIER_SZ(C) \
-  ((C)->chdr & CARRIER_SZ_MASK)
+#define CARRIER_SZ(C) ((C)->chdr & CRR_SZ_MASK)
 
 typedef union {char c[ERTS_ALLOC_ALIGN_BYTES]; long l; double d;} Unit_t;
 
@@ -351,12 +363,20 @@ typedef struct {
 #endif
 } Block_t;
 
-typedef union ErtsAllctrDDBlock_t_ ErtsAllctrDDBlock_t;
+typedef struct ErtsAllctrDDBlock__ {
+    union  {
+        struct ErtsAllctrDDBlock__ *ptr_next;
+        erts_atomic_t atmc_next;
+    } u;
+    ErtsAlcType_t type;
+    Uint32 flags;
+} ErtsAllctrDDBlock_t;
 
-union ErtsAllctrDDBlock_t_ {
-    erts_atomic_t atmc_next;
-    ErtsAllctrDDBlock_t *ptr_next;
-};
+/* Deallocation was caused by shrinking a fix-list, so usage statistics has
+ * already been updated. */
+#define DEALLOC_FLG_FIX_SHRINK    (1 << 0)
+/* Deallocation was redirected to another instance. */
+#define DEALLOC_FLG_REDIRECTED    (1 << 1)
 
 typedef struct {
     Block_t blk;
@@ -365,11 +385,10 @@ typedef struct {
 #endif
 } ErtsFakeDDBlock_t;
 
-
-
 #define THIS_FREE_BLK_HDR_FLG 	(((UWord) 1) << 0)
 #define PREV_FREE_BLK_HDR_FLG 	(((UWord) 1) << 1)
 #define LAST_BLK_HDR_FLG 	(((UWord) 1) << 2)
+#define ATAG_BLK_HDR_FLG 	HIGHEST_WORD_BIT
 
 #define SBC_BLK_HDR_FLG /* Special flag combo for (allocated) SBC blocks */\
     (THIS_FREE_BLK_HDR_FLG | PREV_FREE_BLK_HDR_FLG | LAST_BLK_HDR_FLG)
@@ -381,9 +400,9 @@ typedef struct {
 #define HOMECOMING_MBC_BLK_HDR (THIS_FREE_BLK_HDR_FLG | LAST_BLK_HDR_FLG)
 
 #define IS_FREE_LAST_MBC_BLK(B) \
-    (((B)->bhdr & FLG_MASK) == (THIS_FREE_BLK_HDR_FLG | LAST_BLK_HDR_FLG))
+    (((B)->bhdr & BLK_FLG_MASK) == (THIS_FREE_BLK_HDR_FLG | LAST_BLK_HDR_FLG))
 
-#define IS_SBC_BLK(B) (((B)->bhdr & FLG_MASK) == SBC_BLK_HDR_FLG)
+#define IS_SBC_BLK(B) (((B)->bhdr & SBC_BLK_HDR_FLG) == SBC_BLK_HDR_FLG)
 #define IS_MBC_BLK(B) (!IS_SBC_BLK((B)))
 #define IS_FREE_BLK(B) (ASSERT(IS_MBC_BLK(B)), \
 			(B)->bhdr & THIS_FREE_BLK_HDR_FLG)
@@ -394,7 +413,8 @@ typedef struct {
 #  define ABLK_TO_MBC(B) \
     (ASSERT(IS_MBC_BLK(B) && !IS_FREE_BLK(B)), \
      (Carrier_t*)((ERTS_SACRR_UNIT_FLOOR((UWord)(B)) - \
-		  (((B)->bhdr >> MBC_ABLK_OFFSET_SHIFT) << ERTS_SACRR_UNIT_SHIFT))))
+		  ((((B)->bhdr & ~BLK_FLG_MASK) >> MBC_ABLK_OFFSET_SHIFT) \
+                      << ERTS_SACRR_UNIT_SHIFT))))
 #  define BLK_TO_MBC(B) (IS_FREE_BLK(B) ? FBLK_TO_MBC(B) : ABLK_TO_MBC(B))
 #else
 #  define FBLK_TO_MBC(B) ((B)->carrier)
@@ -433,8 +453,9 @@ typedef struct {
     ErtsThrPrgrVal thr_prgr;
     erts_atomic_t max_size;
     UWord abandon_limit;
-    UWord blocks;
-    UWord blocks_size;
+    UWord blocks[ERTS_ALC_A_MAX + 1];
+    UWord blocks_size[ERTS_ALC_A_MAX + 1];
+    UWord total_blocks_size;
     enum {
         ERTS_MBC_IS_HOME,
         ERTS_MBC_WAS_POOLED,
@@ -452,7 +473,7 @@ struct Carrier_t_ {
 };
 
 #define ERTS_ALC_CARRIER_TO_ALLCTR(C) \
-  ((Allctr_t *) (erts_atomic_read_nob(&(C)->allctr) & ~FLG_MASK))
+  ((Allctr_t *) (erts_atomic_read_nob(&(C)->allctr) & ~CRR_FLG_MASK))
 
 typedef struct {
     Carrier_t *first;
@@ -530,7 +551,6 @@ typedef struct {
     } head;
 } ErtsAllctrDDQueue_t;
 
-
 typedef struct {
     size_t type_size;
     SWord list_size;
@@ -549,6 +569,7 @@ typedef struct {
 	    UWord used;
 	} cpool;
     } u;
+    ErtsAlcType_t type;
 } ErtsAlcFixList_t;
 
 struct Allctr_t_ {
@@ -569,6 +590,9 @@ struct Allctr_t_ {
     /* Allocator number */
     ErtsAlcType_t	alloc_no;
 
+    /* Allocator strategy */
+    ErtsAlcStrat_t	alloc_strat;
+
     /* Instance index */
     int			ix;
 
@@ -617,6 +641,9 @@ struct Allctr_t_ {
 	AOFF_RBTree_t*   pooled_tree;
 	CarrierList_t	 dc_list;
 
+        /* the sentinel of the cpool we're attached to */
+        ErtsAlcCPoolData_t  *sentinel;
+
 	UWord		abandon_limit;
 	int		disable_abandon;
 	int		check_limit_count;
@@ -624,8 +651,8 @@ struct Allctr_t_ {
         UWord           in_pool_limit;    /* acnl */
         UWord           fblk_min_limit;   /* acmfl */
 	struct {
-	    erts_atomic_t	blocks_size;
-	    erts_atomic_t	no_blocks;
+	    erts_atomic_t	blocks_size[ERTS_ALC_A_MAX + 1];
+	    erts_atomic_t	no_blocks[ERTS_ALC_A_MAX + 1];
 	    erts_atomic_t	carriers_size;
 	    erts_atomic_t	no_carriers;
             CallCounter_t       fail_pooled;
@@ -657,10 +684,12 @@ struct Allctr_t_ {
     void		(*creating_mbc)		(Allctr_t *, Carrier_t *);
     void		(*destroying_mbc)	(Allctr_t *, Carrier_t *);
 
-    /* The three callbacks below are needed to support carrier migration */
+    /* The five callbacks below are needed to support carrier migration. */
     void		(*add_mbc)		(Allctr_t *, Carrier_t *);
     void		(*remove_mbc)	        (Allctr_t *, Carrier_t *);
     UWord		(*largest_fblk_in_mbc)  (Allctr_t *, Carrier_t *);
+    Block_t *           (*first_fblk_in_mbc)     (Allctr_t *, Carrier_t *);
+    Block_t *           (*next_fblk_in_mbc)      (Allctr_t *, Carrier_t *, Block_t *);
 
 #if HAVE_ERTS_MSEG
     void*               (*mseg_alloc)(Allctr_t*, Uint *size_p, Uint flags);
diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.c b/erts/emulator/beam/erl_ao_firstfit_alloc.c
index 3f0ab33597..f2ad2f6532 100644
--- a/erts/emulator/beam/erl_ao_firstfit_alloc.c
+++ b/erts/emulator/beam/erl_ao_firstfit_alloc.c
@@ -107,9 +107,11 @@ typedef struct AOFF_Carrier_t_ AOFF_Carrier_t;
 
 struct AOFF_Carrier_t_ {
     Carrier_t crr;
-    AOFF_RBTree_t rbt_node;     /* My node in the carrier tree */
-    AOFF_RBTree_t* root;        /* Root of my block tree */
+    AOFF_RBTree_t rbt_node;        /* My node in the carrier tree */
+    AOFF_RBTree_t* root;           /* Root of my block tree */
+    enum AOFFSortOrder blk_order;
 };
+
 #define RBT_NODE_TO_MBC(PTR) ErtsContainerStruct((PTR), AOFF_Carrier_t, rbt_node)
 
 /* 
@@ -239,6 +241,9 @@ static void aoff_add_mbc(Allctr_t*, Carrier_t*);
 static void aoff_remove_mbc(Allctr_t*, Carrier_t*);
 static UWord aoff_largest_fblk_in_mbc(Allctr_t*, Carrier_t*);
 
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *, Carrier_t *);
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *, Carrier_t *, Block_t *);
+
 /* Generic tree functions used by both carrier and block trees. */
 static void rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del);
 static void rbt_insert(enum AOFFSortOrder, AOFF_RBTree_t** root, AOFF_RBTree_t* blk);
@@ -281,15 +286,28 @@ erts_aoffalc_start(AOFFAllctr_t *alc,
 
     sys_memcpy((void *) alc, (void *) &zero.allctr, sizeof(AOFFAllctr_t));
 
+    if (aoffinit->blk_order == FF_CHAOS) {
+        const enum AOFFSortOrder orders[3] = {FF_AOFF, FF_AOBF, FF_BF};
+        int index = init->ix % (sizeof(orders) / sizeof(orders[0]));
+
+        ASSERT(init->alloc_no == ERTS_ALC_A_TEST);
+        aoffinit->blk_order = orders[index];
+    }
+
+    if (aoffinit->crr_order == FF_CHAOS) {
+        const enum AOFFSortOrder orders[2] = {FF_AGEFF, FF_AOFF};
+        int index = init->ix % (sizeof(orders) / sizeof(orders[0]));
+
+        ASSERT(init->alloc_no == ERTS_ALC_A_TEST);
+        aoffinit->crr_order = orders[index];
+    }
+
     alc->blk_order                      = aoffinit->blk_order;
     alc->crr_order                      = aoffinit->crr_order;
     allctr->mbc_header_size		= sizeof(AOFF_Carrier_t);
     allctr->min_mbc_size		= MIN_MBC_SZ;
     allctr->min_mbc_first_free_size	= MIN_MBC_FIRST_FREE_SZ;
-    allctr->min_block_size = (aoffinit->blk_order == FF_BF
-                              ? (offsetof(AOFF_RBTree_t, u.next)
-                                 + ErtsSizeofMember(AOFF_RBTree_t, u.next))
-                              : offsetof(AOFF_RBTree_t, u));
+    allctr->min_block_size              = sizeof(AOFF_RBTree_t);
 
     allctr->vsn_str			= ERTS_ALC_AOFF_ALLOC_VSN_STR;
 
@@ -311,6 +329,8 @@ erts_aoffalc_start(AOFFAllctr_t *alc,
     allctr->add_mbc                     = aoff_add_mbc;
     allctr->remove_mbc                  = aoff_remove_mbc;
     allctr->largest_fblk_in_mbc         = aoff_largest_fblk_in_mbc;
+    allctr->first_fblk_in_mbc           = aoff_first_fblk_in_mbc;
+    allctr->next_fblk_in_mbc            = aoff_next_fblk_in_mbc;
     allctr->init_atoms			= init_atoms;
 
 #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
@@ -512,14 +532,15 @@ tree_insert_fixup(AOFF_RBTree_t** root, AOFF_RBTree_t *blk)
 static void
 aoff_unlink_free_block(Allctr_t *allctr, Block_t *blk)
 {
-    AOFFAllctr_t* alc = (AOFFAllctr_t*)allctr;
     AOFF_RBTree_t* del = (AOFF_RBTree_t*)blk;
     AOFF_Carrier_t *crr = (AOFF_Carrier_t*) FBLK_TO_MBC(&del->hdr);
 
+    (void)allctr;
+
     ASSERT(crr->rbt_node.hdr.bhdr == crr->root->max_sz);
-    HARD_CHECK_TREE(&crr->crr, alc->blk_order, crr->root, 0);
+    HARD_CHECK_TREE(&crr->crr, crr->blk_order, crr->root, 0);
 
-    if (alc->blk_order == FF_BF) {
+    if (crr->blk_order == FF_BF) {
 	ASSERT(del->flags & IS_BF_FLG);
 	if (IS_LIST_ELEM(del)) {
 	    /* Remove from list */
@@ -540,14 +561,14 @@ aoff_unlink_free_block(Allctr_t *allctr, Block_t *blk)
 	    
 	    replace(&crr->root, (AOFF_RBTree_t*)del, LIST_NEXT(del));
 	    
-	    HARD_CHECK_TREE(&crr->crr, alc->blk_order, crr->root, 0);
+	    HARD_CHECK_TREE(&crr->crr, crr->blk_order, crr->root, 0);
 	    return;
 	}
     }
 
     rbt_delete(&crr->root, (AOFF_RBTree_t*)del);
 
-    HARD_CHECK_TREE(&crr->crr, alc->blk_order, crr->root, 0);
+    HARD_CHECK_TREE(&crr->crr, crr->blk_order, crr->root, 0);
 
     /* Update the carrier tree with a potentially new (lower) max_sz
      */    
@@ -737,17 +758,18 @@ rbt_delete(AOFF_RBTree_t** root, AOFF_RBTree_t* del)
 static void
 aoff_link_free_block(Allctr_t *allctr, Block_t *block)
 {
-    AOFFAllctr_t* alc = (AOFFAllctr_t*) allctr;
     AOFF_RBTree_t *blk = (AOFF_RBTree_t *) block;
     AOFF_RBTree_t *crr_node;
     AOFF_Carrier_t *blk_crr = (AOFF_Carrier_t*) FBLK_TO_MBC(block);
     Uint blk_sz = AOFF_BLK_SZ(blk);
 
+    (void)allctr;
+
     ASSERT(allctr == ERTS_ALC_CARRIER_TO_ALLCTR(&blk_crr->crr));
     ASSERT(blk_crr->rbt_node.hdr.bhdr == (blk_crr->root ? blk_crr->root->max_sz : 0));
-    HARD_CHECK_TREE(&blk_crr->crr, alc->blk_order, blk_crr->root, 0);
+    HARD_CHECK_TREE(&blk_crr->crr, blk_crr->blk_order, blk_crr->root, 0);
 
-    rbt_insert(alc->blk_order, &blk_crr->root, blk);
+    rbt_insert(blk_crr->blk_order, &blk_crr->root, blk);
 
     /*
      * Update carrier tree with a potentially new (larger) max_sz
@@ -891,7 +913,7 @@ aoff_get_free_block(Allctr_t *allctr, Uint size,
     /* Get block within carrier tree
      */
 #ifdef HARD_DEBUG
-    dbg_blk = HARD_CHECK_TREE(&crr->crr, alc->blk_order, crr->root, size);
+    dbg_blk = HARD_CHECK_TREE(&crr->crr, crr->blk_order, crr->root, size);
 #endif
 
     blk = rbt_search(crr->root, size);
@@ -904,7 +926,7 @@ aoff_get_free_block(Allctr_t *allctr, Uint size,
     if (!blk)
 	return NULL;
 
-    if (cand_blk && cmp_cand_blk(alc->blk_order, cand_blk, blk) < 0) {
+    if (cand_blk && cmp_cand_blk(crr->blk_order, cand_blk, blk) < 0) {
 	return NULL; /* cand_blk was better */
     }
 
@@ -927,21 +949,28 @@ static void aoff_creating_mbc(Allctr_t *allctr, Carrier_t *carrier)
     AOFFAllctr_t *alc = (AOFFAllctr_t *) allctr;
     AOFF_Carrier_t *crr = (AOFF_Carrier_t*) carrier;
     AOFF_RBTree_t **root = &alc->mbc_root;
+    Sint64 bt = get_birth_time();
 
     HARD_CHECK_TREE(NULL, alc->crr_order, *root, 0);
 
     crr->rbt_node.hdr.bhdr = 0;
-    if (alc->crr_order == FF_AGEFF || IS_DEBUG) {
-        Sint64 bt = get_birth_time();
-        crr->rbt_node.u.birth_time = bt;
-        crr->crr.cpool.pooled.u.birth_time = bt;
-    }
+
+    /* While birth time is only used for FF_AGEFF, we have to set it for all
+     * types as we can be migrated to an instance that uses it and we don't
+     * want to mess its order up. */
+    crr->rbt_node.u.birth_time = bt;
+    crr->crr.cpool.pooled.u.birth_time = bt;
+
     rbt_insert(alc->crr_order, root, &crr->rbt_node);
 
     /* aoff_link_free_block will add free block later */
     crr->root = NULL;
 
     HARD_CHECK_TREE(NULL, alc->crr_order, *root, 0);
+
+    /* When a carrier has been migrated, its block order may differ from that
+     * of the allocator it's been migrated to. */
+    crr->blk_order = alc->blk_order;
 }
 
 #define IS_CRR_IN_TREE(CRR,ROOT) \
@@ -1034,6 +1063,62 @@ static UWord aoff_largest_fblk_in_mbc(Allctr_t* allctr, Carrier_t* carrier)
     return crr->rbt_node.hdr.bhdr;
 }
 
+static Block_t *aoff_first_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier)
+{
+    AOFF_Carrier_t *crr = (AOFF_Carrier_t*)carrier;
+
+    (void)allctr;
+
+    if (crr->root) {
+        AOFF_RBTree_t *blk;
+
+        /* Descend to the rightmost block of the tree. */
+        for (blk = crr->root; blk->right; blk = blk->right);
+
+        return (Block_t*)blk;
+    }
+
+    return NULL;
+}
+
+static Block_t *aoff_next_fblk_in_mbc(Allctr_t *allctr, Carrier_t *carrier,
+                                      Block_t *block)
+{
+    AOFF_RBTree_t *parent, *blk;
+
+    (void)allctr;
+    (void)carrier;
+
+    blk = (AOFF_RBTree_t*)block;
+
+    if (blk->left) {
+        /* Descend to the rightmost block of the left subtree. */
+        for (blk = blk->left; blk->right; blk = blk->right);
+
+        return (Block_t*)blk;
+    }
+
+    while (blk->parent) {
+        parent = blk->parent;
+
+        /* If we ascend from the right we know we haven't visited our parent
+         * yet, because we always descend as far as we can to the right when
+         * entering a subtree. */
+        if (parent->right == blk) {
+            ASSERT(parent->left != blk);
+            return (Block_t*)parent;
+        }
+
+        /* If we ascend from the left we know we've already visited our
+         * parent, and will need to keep ascending until we do so from the
+         * right or reach the end of the tree. */
+        ASSERT(parent->left == blk);
+        blk = parent;
+    }
+
+    return NULL;
+}
+
 /*
  * info_options()
  */
diff --git a/erts/emulator/beam/erl_ao_firstfit_alloc.h b/erts/emulator/beam/erl_ao_firstfit_alloc.h
index 68df9e0a49..9c9b98da86 100644
--- a/erts/emulator/beam/erl_ao_firstfit_alloc.h
+++ b/erts/emulator/beam/erl_ao_firstfit_alloc.h
@@ -32,7 +32,12 @@ enum AOFFSortOrder {
     FF_AGEFF = 0,    /* carrier trees only */
     FF_AOFF  = 1,
     FF_AOBF  = 2,    /* block trees only */
-    FF_BF    = 3     /* block trees only */
+    FF_BF    = 3,    /* block trees only */
+
+    FF_CHAOS = -1    /* A test-specific sort order that picks any of the above
+                      * after instance id. Used to test that carriers created
+                      * under one order will work fine after being migrated
+                      * to another. */
 };
 
 typedef struct {
diff --git a/erts/emulator/beam/erl_arith.c b/erts/emulator/beam/erl_arith.c
index 144fb56ea5..68d1cd989e 100644
--- a/erts/emulator/beam/erl_arith.c
+++ b/erts/emulator/beam/erl_arith.c
@@ -52,19 +52,11 @@ static ERTS_INLINE void maybe_shrink(Process* p, Eterm* hp, Eterm res, Uint allo
     Uint actual;
 
     if (is_immed(res)) {
-	if (p->heap <= hp && hp < p->htop) {
-	    p->htop = hp;
-	}
-	else {
-	    erts_heap_frag_shrink(p, hp);
-	}
+        ASSERT(!(p->heap <= hp && hp < p->htop));
+        erts_heap_frag_shrink(p, hp);
     } else if ((actual = bignum_header_arity(*hp)+1) < alloc) {
-	if (p->heap <= hp && hp < p->htop) {
-	    p->htop = hp+actual;
-	}
-	else {
-	    erts_heap_frag_shrink(p, hp+actual);
-	}
+        ASSERT(!(p->heap <= hp && hp < p->htop));
+        erts_heap_frag_shrink(p, hp+actual);
     }
 }
 
@@ -246,7 +238,7 @@ shift(Process* p, Eterm arg1, Eterm arg2, int right)
 		    BIF_ERROR(p, SYSTEM_LIMIT);
 		}
 		need = BIG_NEED_SIZE(ires+1);
-		bigp = HAlloc(p, need);
+		bigp = HeapFragOnlyAlloc(p, need);
 		arg1 = big_lshift(arg1, i, bigp);
 		maybe_shrink(p, bigp, arg1, need);
 		if (is_nil(arg1)) {
@@ -298,7 +290,7 @@ BIF_RETTYPE bnot_1(BIF_ALIST_1)
 	ret = make_small(~signed_val(BIF_ARG_1));
     } else if (is_big(BIF_ARG_1)) {
 	Uint need = BIG_NEED_SIZE(big_size(BIF_ARG_1)+1);
-	Eterm* bigp = HAlloc(BIF_P, need);
+	Eterm* bigp = HeapFragOnlyAlloc(BIF_P, need);
 
 	ret = big_bnot(BIF_ARG_1, bigp);
 	maybe_shrink(BIF_P, bigp, ret, need);
@@ -343,7 +335,7 @@ erts_mixed_plus(Process* p, Eterm arg1, Eterm arg2)
 		    if (IS_SSMALL(ires)) {
 			return make_small(ires);
 		    } else {
-			hp = HAlloc(p, 2);
+			hp = HeapFragOnlyAlloc(p, 2);
 			res = small_to_big(ires, hp);
 			return res;
 		    }
@@ -400,7 +392,7 @@ erts_mixed_plus(Process* p, Eterm arg1, Eterm arg2)
 		    sz2 = big_size(arg2);
 		    sz = MAX(sz1, sz2)+1;
 		    need_heap = BIG_NEED_SIZE(sz);
-		    hp = HAlloc(p, need_heap);
+		    hp = HeapFragOnlyAlloc(p, need_heap);
 		    res = big_plus(arg1, arg2, hp);
 		    maybe_shrink(p, hp, res, need_heap);
 		    if (is_nil(res)) {
@@ -446,7 +438,7 @@ erts_mixed_plus(Process* p, Eterm arg1, Eterm arg2)
 		do_float:
 		    f1.fd = f1.fd + f2.fd;
 		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    hp = HAlloc(p, FLOAT_SIZE_OBJECT);
+		    hp = HeapFragOnlyAlloc(p, FLOAT_SIZE_OBJECT);
 		    res = make_float(hp);
 		    PUT_DOUBLE(f1, hp);
 		    return res;
@@ -488,7 +480,7 @@ erts_mixed_minus(Process* p, Eterm arg1, Eterm arg2)
 		    if (IS_SSMALL(ires)) {
 			return make_small(ires);
 		    } else {
-			hp = HAlloc(p, 2);
+			hp = HeapFragOnlyAlloc(p, 2);
 			res = small_to_big(ires, hp);
 			return res;
 		    }
@@ -534,7 +526,7 @@ erts_mixed_minus(Process* p, Eterm arg1, Eterm arg2)
 		    sz2 = big_size(arg2);
 		    sz = MAX(sz1, sz2)+1;
 		    need_heap = BIG_NEED_SIZE(sz);
-		    hp = HAlloc(p, need_heap);
+		    hp = HeapFragOnlyAlloc(p, need_heap);
 		    res = big_minus(arg1, arg2, hp);
                     maybe_shrink(p, hp, res, need_heap);
 		    if (is_nil(res)) {
@@ -589,7 +581,7 @@ erts_mixed_minus(Process* p, Eterm arg1, Eterm arg2)
 		do_float:
 		    f1.fd = f1.fd - f2.fd;
 		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    hp = HAlloc(p, FLOAT_SIZE_OBJECT);
+		    hp = HeapFragOnlyAlloc(p, FLOAT_SIZE_OBJECT);
 		    res = make_float(hp);
 		    PUT_DOUBLE(f1, hp);
 		    return res;
@@ -657,7 +649,7 @@ erts_mixed_times(Process* p, Eterm arg1, Eterm arg2)
 			    hdr = big_res[0];
 			    arity = bignum_header_arity(hdr);
 			    ASSERT(arity == 1 || arity == 2);
-			    hp = HAlloc(p, arity+1);
+			    hp = HeapFragOnlyAlloc(p, arity+1);
 			    res = make_big(hp);
 			    *hp++ = hdr;
 			    *hp++ = big_res[1];
@@ -726,7 +718,7 @@ erts_mixed_times(Process* p, Eterm arg1, Eterm arg2)
 
 		do_big:
 		    need_heap = BIG_NEED_SIZE(sz);
-                    hp = HAlloc(p, need_heap);
+                    hp = HeapFragOnlyAlloc(p, need_heap);
 		    res = big_times(arg1, arg2, hp);
 
 		    /*
@@ -779,7 +771,7 @@ erts_mixed_times(Process* p, Eterm arg1, Eterm arg2)
 		do_float:
 		    f1.fd = f1.fd * f2.fd;
 		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    hp = HAlloc(p, FLOAT_SIZE_OBJECT);
+		    hp = HeapFragOnlyAlloc(p, FLOAT_SIZE_OBJECT);
 		    res = make_float(hp);
 		    PUT_DOUBLE(f1, hp);
 		    return res;
@@ -905,7 +897,7 @@ erts_mixed_div(Process* p, Eterm arg1, Eterm arg2)
 		do_float:
 		    f1.fd = f1.fd / f2.fd;
 		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    hp = HAlloc(p, FLOAT_SIZE_OBJECT);
+		    hp = HeapFragOnlyAlloc(p, FLOAT_SIZE_OBJECT);
 		    PUT_DOUBLE(f1, hp);
 		    return make_float(hp);
 		default:
@@ -957,7 +949,7 @@ erts_int_div(Process* p, Eterm arg1, Eterm arg2)
 
 	    ires = big_size(arg2);
 	    need = BIG_NEED_SIZE(i-ires+1) + BIG_NEED_SIZE(i);
-	    hp = HAlloc(p, need);
+	    hp = HeapFragOnlyAlloc(p, need);
 	    arg1 = big_div(arg1, arg2, hp);
 	    maybe_shrink(p, hp, arg1, need);
 	    if (is_nil(arg1)) {
@@ -1004,7 +996,7 @@ erts_int_rem(Process* p, Eterm arg1, Eterm arg2)
 	    arg1 = SMALL_ZERO;
 	} else if (ires > 0) {
 	    Uint need = BIG_NEED_SIZE(big_size(arg1));
-	    Eterm* hp = HAlloc(p, need);
+	    Eterm* hp = HeapFragOnlyAlloc(p, need);
 
 	    arg1 = big_rem(arg1, arg2, hp);
 	    maybe_shrink(p, hp, arg1, need);
@@ -1041,7 +1033,7 @@ Eterm erts_band(Process* p, Eterm arg1, Eterm arg2)
 	return THE_NON_VALUE;
     }
     need = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
-    hp = HAlloc(p, need);
+    hp = HeapFragOnlyAlloc(p, need);
     arg1 = big_band(arg1, arg2, hp);
     ASSERT(is_not_nil(arg1));
     maybe_shrink(p, hp, arg1, need);
@@ -1069,7 +1061,7 @@ Eterm erts_bor(Process* p, Eterm arg1, Eterm arg2)
 	return THE_NON_VALUE;
     }
     need = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
-    hp = HAlloc(p, need);
+    hp = HeapFragOnlyAlloc(p, need);
     arg1 = big_bor(arg1, arg2, hp);
     ASSERT(is_not_nil(arg1));
     maybe_shrink(p, hp, arg1, need);
@@ -1097,7 +1089,7 @@ Eterm erts_bxor(Process* p, Eterm arg1, Eterm arg2)
 	return THE_NON_VALUE;
     }
     need = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);
-    hp = HAlloc(p, need);
+    hp = HeapFragOnlyAlloc(p, need);
     arg1 = big_bxor(arg1, arg2, hp);
     ASSERT(is_not_nil(arg1));
     maybe_shrink(p, hp, arg1, need);
@@ -1110,7 +1102,7 @@ Eterm erts_bnot(Process* p, Eterm arg)
 
     if (is_big(arg)) {
 	Uint need = BIG_NEED_SIZE(big_size(arg)+1);
-	Eterm* bigp = HAlloc(p, need);
+	Eterm* bigp = HeapFragOnlyAlloc(p, need);
 
 	ret = big_bnot(arg, bigp);
 	maybe_shrink(p, bigp, ret, need);
@@ -1125,924 +1117,6 @@ Eterm erts_bnot(Process* p, Eterm arg)
     return ret;
 } 
 
-#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need))
-
-static ERTS_INLINE void
-trim_heap(Process* p, Eterm* hp, Eterm res)
-{
-    if (is_immed(res)) {
-	ASSERT(p->heap <= hp && hp <= p->htop);
-	p->htop = hp;
-    } else {
-	Eterm* new_htop;
-	ASSERT(is_big(res));
-	new_htop = hp + bignum_header_arity(*hp) + 1;
-	ASSERT(p->heap <= new_htop && new_htop <= p->htop);
-	p->htop = new_htop;
-    }
-    ASSERT(p->heap <= p->htop && p->htop <= p->stop);
-}
-
-/*
- * The functions that follow are called from the emulator loop.
- * They are not allowed to allocate heap fragments, but must do
- * a garbage collection if there is insufficient heap space.
- */
-
-#define erts_heap_frag_shrink horrible error
-#define maybe_shrink horrible error
-
-Eterm
-erts_gc_mixed_plus(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    DECLARE_TMP(tmp_big1,0,p);
-    DECLARE_TMP(tmp_big2,1,p);
-    Eterm res;
-    Eterm hdr;
-    FloatDef f1, f2;
-    dsize_t sz1, sz2, sz;
-    int need_heap;
-    Eterm* hp;
-    Sint ires;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    ERTS_FP_CHECK_INIT(p);
-    switch (arg1 & _TAG_PRIMARY_MASK) {
-    case TAG_PRIMARY_IMMED1:
-	switch ((arg1 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    ires = signed_val(arg1) + signed_val(arg2);
-		    if (IS_SSMALL(ires)) {
-			return make_small(ires);
-		    } else {
-			if (ERTS_NEED_GC(p, 2)) {
-			    erts_garbage_collect(p, 2, reg, live);
-			}
-			hp = p->htop;
-			p->htop += 2;
-			res = small_to_big(ires, hp);
-			return res;
-		    }
-		default:
-		badarith:
-		    p->freason = BADARITH;
-		    return THE_NON_VALUE;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    if (arg1 == SMALL_ZERO) {
-			return arg2;
-		    }
-		    arg1 = small_to_big(signed_val(arg1), tmp_big1);
-		    goto do_big;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	default:
-	    goto badarith;
-	}
-    case TAG_PRIMARY_BOXED:
-	hdr = *boxed_val(arg1);
-	switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-	case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    if (arg2 == SMALL_ZERO) {
-			return arg1;
-		    }
-		    arg2 = small_to_big(signed_val(arg2), tmp_big2);
-		    goto do_big;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		do_big:
-		    sz1 = big_size(arg1);
-		    sz2 = big_size(arg2);
-		    sz = MAX(sz1, sz2)+1;
-		    need_heap = BIG_NEED_SIZE(sz);
-		    if (ERTS_NEED_GC(p, need_heap)) {
-			erts_garbage_collect(p, need_heap, reg, live+2);
-			if (ARG_IS_NOT_TMP(arg1,tmp_big1)) {
-			    arg1 = reg[live];
-			}
-			if (ARG_IS_NOT_TMP(arg2,tmp_big2)) {
-			    arg2 = reg[live+1];
-			}
-		    }
-		    hp = p->htop;
-		    p->htop += need_heap;
-		    res = big_plus(arg1, arg2, hp);
-		    trim_heap(p, hp, res);
-		    if (is_nil(res)) {
-			p->freason = SYSTEM_LIMIT;
-			return THE_NON_VALUE;
-		    }
-		    return res;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0) {
-			goto badarith;
-		    }
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    if (big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    GET_DOUBLE(arg2, f2);
-
-		do_float:
-		    f1.fd = f1.fd + f2.fd;
-		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-			erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live);
-		    }
-		    hp = p->htop;
-		    p->htop += FLOAT_SIZE_OBJECT;
-		    res = make_float(hp);
-		    PUT_DOUBLE(f1, hp);
-		    return res;
-		default:
-		    goto badarith;
-		}
-	    default:
-		goto badarith;
-	    }
-	}
-    default:
-	goto badarith;
-    }
-}
-
-Eterm
-erts_gc_mixed_minus(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    DECLARE_TMP(tmp_big1,0,p);
-    DECLARE_TMP(tmp_big2,1,p);
-    Eterm hdr;
-    Eterm res;
-    FloatDef f1, f2;
-    dsize_t sz1, sz2, sz;
-    int need_heap;
-    Eterm* hp;
-    Sint ires;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    ERTS_FP_CHECK_INIT(p);
-    switch (arg1 & _TAG_PRIMARY_MASK) {
-    case TAG_PRIMARY_IMMED1:
-	switch ((arg1 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    ires = signed_val(arg1) - signed_val(arg2);
-		    if (IS_SSMALL(ires)) {
-			return make_small(ires);
-		    } else {
-			if (ERTS_NEED_GC(p, 2)) {
-			    erts_garbage_collect(p, 2, reg, live);
-			}
-			hp = p->htop;
-			p->htop += 2;
-			res = small_to_big(ires, hp);
-			return res;
-		    }
-		default:
-		badarith:
-		    p->freason = BADARITH;
-		    return THE_NON_VALUE;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    arg1 = small_to_big(signed_val(arg1), tmp_big1);
-		    goto do_big;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	default:
-	    goto badarith;
-	}
-    case TAG_PRIMARY_BOXED:
-	hdr = *boxed_val(arg1);
-	switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-	case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    if (arg2 == SMALL_ZERO) {
-			return arg1;
-		    }
-		    arg2 = small_to_big(signed_val(arg2), tmp_big2);
-
-		do_big:
-		    sz1 = big_size(arg1);
-		    sz2 = big_size(arg2);
-		    sz = MAX(sz1, sz2)+1;
-		    need_heap = BIG_NEED_SIZE(sz);
-		    if (ERTS_NEED_GC(p, need_heap)) {
-			erts_garbage_collect(p, need_heap, reg, live+2);
-			if (ARG_IS_NOT_TMP(arg1,tmp_big1)) {
-			    arg1 = reg[live];
-			}
-			if (ARG_IS_NOT_TMP(arg2,tmp_big2)) {
-			    arg2 = reg[live+1];
-			}
-		    }
-		    hp = p->htop;
-		    p->htop += need_heap;
-		    res = big_minus(arg1, arg2, hp);
-                    trim_heap(p, hp, res);
-		    if (is_nil(res)) {
-			p->freason = SYSTEM_LIMIT;
-			return THE_NON_VALUE;
-		    }
-		    return res;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    goto do_big;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0) {
-			goto badarith;
-		    }
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    if (big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    GET_DOUBLE(arg2, f2);
-
-		do_float:
-		    f1.fd = f1.fd - f2.fd;
-		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-			erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live);
-		    }
-		    hp = p->htop;
-		    p->htop += FLOAT_SIZE_OBJECT;
-		    res = make_float(hp);
-		    PUT_DOUBLE(f1, hp);
-		    return res;
-		default:
-		    goto badarith;
-		}
-	    default:
-		goto badarith;
-	    }
-	}
-    default:
-	goto badarith;
-    }
-}
-
-Eterm
-erts_gc_mixed_times(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    DECLARE_TMP(tmp_big1,0,p);
-    DECLARE_TMP(tmp_big2,1,p);
-    Eterm hdr;
-    Eterm res;
-    FloatDef f1, f2;
-    dsize_t sz1, sz2, sz;
-    int need_heap;
-    Eterm* hp;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    ERTS_FP_CHECK_INIT(p);
-    switch (arg1 & _TAG_PRIMARY_MASK) {
-    case TAG_PRIMARY_IMMED1:
-	switch ((arg1 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    if ((arg1 == SMALL_ZERO) || (arg2 == SMALL_ZERO)) {
-			return(SMALL_ZERO);
-		    } else if (arg1 == SMALL_ONE) {
-			return(arg2);
-		    } else if (arg2 == SMALL_ONE) {
-			return(arg1);
-		    } else {
-			DeclareTmpHeap(big_res,3,p);
-			UseTmpHeap(3,p);
-
-			/*
-			 * The following code is optimized for the case that
-			 * result is small (which should be the most common case
-			 * in practice).
-			 */
-			res = small_times(signed_val(arg1), signed_val(arg2),
-					  big_res);
-			if (is_small(res)) {
-			    UnUseTmpHeap(3,p);
-			    return res;
-			} else {
-			    /*
-			     * The result is a a big number.
-			     * Allocate a heap fragment and copy the result.
-			     * Be careful to allocate exactly what we need
-			     * to not leave any holes.
-			     */
-			    Uint arity;
-			    Uint need;
-			    
-			    ASSERT(is_big(res));
-			    hdr = big_res[0];
-			    arity = bignum_header_arity(hdr);
-			    ASSERT(arity == 1 || arity == 2);
-			    need = arity + 1;
-			    if (ERTS_NEED_GC(p, need)) {
-				erts_garbage_collect(p, need, reg, live);
-			    }
-			    hp = p->htop;
-			    p->htop += need;
-			    res = make_big(hp);
-			    *hp++ = hdr;
-			    *hp++ = big_res[1];
-			    if (arity > 1) {
-				*hp = big_res[2];
-			    }
-			    UnUseTmpHeap(3,p);
-			    return res;
-			}
-		    }
-		default:
-		badarith:
-		    p->freason = BADARITH;
-		    return THE_NON_VALUE;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    if (arg1 == SMALL_ZERO)
-			return(SMALL_ZERO);
-		    if (arg1 == SMALL_ONE)
-			return(arg2);
-		    arg1 = small_to_big(signed_val(arg1), tmp_big1);
-		    sz = 2 + big_size(arg2);
-		    goto do_big;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	default:
-	    goto badarith;
-	}
-    case TAG_PRIMARY_BOXED:
-	hdr = *boxed_val(arg1);
-	switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-	case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    if (arg2 == SMALL_ZERO)
-			return(SMALL_ZERO);
-		    if (arg2 == SMALL_ONE)
-			return(arg1);
-		    arg2 = small_to_big(signed_val(arg2), tmp_big2);
-		    sz = 2 + big_size(arg1);
-		    goto do_big;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    sz1 = big_size(arg1);
-		    sz2 = big_size(arg2);
-		    sz = sz1 + sz2;
-
-		do_big:
-		    need_heap = BIG_NEED_SIZE(sz);
-		    if (ERTS_NEED_GC(p, need_heap)) {
-			erts_garbage_collect(p, need_heap, reg, live+2);
-			if (ARG_IS_NOT_TMP(arg1,tmp_big1)) {
-			    arg1 = reg[live];
-			}
-			if (ARG_IS_NOT_TMP(arg2,tmp_big2)) {
-			    arg2 = reg[live+1];
-			}
-		    }
-		    hp = p->htop;
-		    p->htop += need_heap;
-		    res = big_times(arg1, arg2, hp);
-		    trim_heap(p, hp, res);
-
-		    /*
-		     * Note that the result must be big in this case, since
-		     * at least one operand was big to begin with, and
-		     * the absolute value of the other is > 1.
-		     */
-
-		    if (is_nil(res)) {
-			p->freason = SYSTEM_LIMIT;
-			return THE_NON_VALUE;
-		    }
-		    return res;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0) {
-			goto badarith;
-		    }
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    if (big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    GET_DOUBLE(arg2, f2);
-
-		do_float:
-		    f1.fd = f1.fd * f2.fd;
-		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-			erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live);
-		    }
-		    hp = p->htop;
-		    p->htop += FLOAT_SIZE_OBJECT;
-		    res = make_float(hp);
-		    PUT_DOUBLE(f1, hp);
-		    return res;
-		default:
-		    goto badarith;
-		}
-	    default:
-		goto badarith;
-	    }
-	}
-    default:
-	goto badarith;
-    }
-}
-
-Eterm
-erts_gc_mixed_div(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    FloatDef f1, f2;
-    Eterm* hp;
-    Eterm hdr;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    ERTS_FP_CHECK_INIT(p);
-    switch (arg1 & _TAG_PRIMARY_MASK) {
-    case TAG_PRIMARY_IMMED1:
-	switch ((arg1 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		badarith:
-		    p->freason = BADARITH;
-		    return THE_NON_VALUE;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    if (big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    f1.fd = signed_val(arg1);
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	default:
-	    goto badarith;
-	}
-    case TAG_PRIMARY_BOXED:
-	hdr = *boxed_val(arg1);
-	switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-	case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-	case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0) {
-			goto badarith;
-		    }
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0 ||
-			big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    if (big_to_double(arg1, &f1.fd) < 0) {
-			goto badarith;
-		    }
-		    GET_DOUBLE(arg2, f2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    }
-	case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-	    switch (arg2 & _TAG_PRIMARY_MASK) {
-	    case TAG_PRIMARY_IMMED1:
-		switch ((arg2 & _TAG_IMMED1_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_IMMED1_SMALL >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    f2.fd = signed_val(arg2);
-		    goto do_float;
-		default:
-		    goto badarith;
-		}
-	    case TAG_PRIMARY_BOXED:
-		hdr = *boxed_val(arg2);
-		switch ((hdr & _TAG_HEADER_MASK) >> _TAG_PRIMARY_SIZE) {
-		case (_TAG_HEADER_POS_BIG >> _TAG_PRIMARY_SIZE):
-		case (_TAG_HEADER_NEG_BIG >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    if (big_to_double(arg2, &f2.fd) < 0) {
-			goto badarith;
-		    }
-		    goto do_float;
-		case (_TAG_HEADER_FLOAT >> _TAG_PRIMARY_SIZE):
-		    GET_DOUBLE(arg1, f1);
-		    GET_DOUBLE(arg2, f2);
-
-		do_float:
-		    f1.fd = f1.fd / f2.fd;
-		    ERTS_FP_ERROR(p, f1.fd, goto badarith);
-		    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-			erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live);
-		    }
-		    hp = p->htop;
-		    p->htop += FLOAT_SIZE_OBJECT;
-		    PUT_DOUBLE(f1, hp);
-		    return make_float(hp);
-		default:
-		    goto badarith;
-		}
-	    default:
-		goto badarith;
-	    }
-	}
-    default:
-	goto badarith;
-    }
-}
-
-Eterm
-erts_gc_int_div(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    DECLARE_TMP(tmp_big1,0,p);
-    DECLARE_TMP(tmp_big2,1,p);
-    int ires;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    switch (NUMBER_CODE(arg1, arg2)) {
-    case SMALL_SMALL:
-	/* This case occurs if the most negative fixnum is divided by -1. */
-	ASSERT(arg2 == make_small(-1));
-	arg1 = small_to_big(signed_val(arg1), tmp_big1);
-	/*FALLTHROUGH*/
-    case BIG_SMALL:
-	arg2 = small_to_big(signed_val(arg2), tmp_big2);
-	goto L_big_div;
-    case SMALL_BIG:
-	if (arg1 != make_small(MIN_SMALL)) {
-	    return SMALL_ZERO;
-	}
-	arg1 = small_to_big(signed_val(arg1), tmp_big1);
-	/*FALLTHROUGH*/
-    case BIG_BIG:
-    L_big_div:
-	ires = big_ucomp(arg1, arg2);
-	if (ires < 0) {
-	    arg1 = SMALL_ZERO;
-	} else if (ires == 0) {
-	    arg1 = (big_sign(arg1) == big_sign(arg2)) ?
-		SMALL_ONE : SMALL_MINUS_ONE;
-	} else {
-	    Eterm* hp;
-	    int i = big_size(arg1);
-	    Uint need;
-
-	    ires = big_size(arg2);
-	    need = BIG_NEED_SIZE(i-ires+1) + BIG_NEED_SIZE(i);
-	    if (ERTS_NEED_GC(p, need)) {
-		erts_garbage_collect(p, need, reg, live+2);
-		if (ARG_IS_NOT_TMP(arg1,tmp_big1)) {
-		    arg1 = reg[live];
-		}
-		if (ARG_IS_NOT_TMP(arg2,tmp_big2)) {
-		    arg2 = reg[live+1];
-		}
-	    }
-	    hp = p->htop;
-	    p->htop += need;
-	    arg1 = big_div(arg1, arg2, hp);
-	    trim_heap(p, hp, arg1);
-	    if (is_nil(arg1)) {
-		p->freason = SYSTEM_LIMIT;
-		return THE_NON_VALUE;
-	    }
-	}
-	return arg1;
-    default:
-	p->freason = BADARITH;
-	return THE_NON_VALUE;
-    }
-}
-
-Eterm
-erts_gc_int_rem(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg1;
-    Eterm arg2;
-    DECLARE_TMP(tmp_big1,0,p);
-    DECLARE_TMP(tmp_big2,1,p);
-    int ires;
-
-    arg1 = reg[live];
-    arg2 = reg[live+1];
-    switch (NUMBER_CODE(arg1, arg2)) {
-    case BIG_SMALL:
-	arg2 = small_to_big(signed_val(arg2), tmp_big2);
-	goto L_big_rem;
-    case SMALL_BIG:
-	if (arg1 != make_small(MIN_SMALL)) {
-	    return arg1;
-	} else {
-	    Eterm tmp;
-	    tmp = small_to_big(signed_val(arg1), tmp_big1);
-	    if ((ires = big_ucomp(tmp, arg2)) == 0) {
-		return SMALL_ZERO;
-	    } else {
-		ASSERT(ires < 0);
-		return arg1;
-	    }
-	}
-	/* All paths returned */
-    case BIG_BIG:
-    L_big_rem:
-	ires = big_ucomp(arg1, arg2);
-	if (ires == 0) {
-	    arg1 = SMALL_ZERO;
-	} else if (ires > 0) {
-	    Eterm* hp;
-	    Uint need = BIG_NEED_SIZE(big_size(arg1));
-
-	    if (ERTS_NEED_GC(p, need)) {
-		erts_garbage_collect(p, need, reg, live+2);
-		if (ARG_IS_NOT_TMP(arg1,tmp_big1)) {
-		    arg1 = reg[live];
-		}
-		if (ARG_IS_NOT_TMP(arg2,tmp_big2)) {
-		    arg2 = reg[live+1];
-		}
-	    }
-	    hp = p->htop;
-	    p->htop += need;
-	    arg1 = big_rem(arg1, arg2, hp);
-	    trim_heap(p, hp, arg1);
-	    if (is_nil(arg1)) {
-		p->freason = SYSTEM_LIMIT;
-		return THE_NON_VALUE;
-	    }
-	}
-	return arg1;
-    default:
-	p->freason = BADARITH;
-	return THE_NON_VALUE;
-    }
-}
-
-#define DEFINE_GC_LOGIC_FUNC(func)						\
-Eterm erts_gc_##func(Process* p, Eterm* reg, Uint live)				\
-{										\
-    Eterm arg1;									\
-    Eterm arg2;									\
-    DECLARE_TMP(tmp_big1,0,p);							\
-    DECLARE_TMP(tmp_big2,1,p);							\
-    Eterm* hp;									\
-    int need;									\
-										\
-    arg1 = reg[live];								\
-    arg2 = reg[live+1];								\
-    switch (NUMBER_CODE(arg1, arg2)) {						\
-    case SMALL_BIG:								\
-	arg1 = small_to_big(signed_val(arg1), tmp_big1);			\
-	need = BIG_NEED_SIZE(big_size(arg2) + 1);				\
-	if (ERTS_NEED_GC(p, need)) {						\
-	    erts_garbage_collect(p, need, reg, live+2);				\
-	    arg2 = reg[live+1];							\
-	}									\
-	break;									\
-    case BIG_SMALL:								\
-	arg2 = small_to_big(signed_val(arg2), tmp_big2);			\
-	need = BIG_NEED_SIZE(big_size(arg1) + 1);				\
-	if (ERTS_NEED_GC(p, need)) {						\
-	    erts_garbage_collect(p, need, reg, live+2);				\
-	    arg1 = reg[live];							\
-	}									\
-	break;									\
-    case BIG_BIG:								\
-	need = BIG_NEED_SIZE(MAX(big_size(arg1), big_size(arg2)) + 1);		\
-	if (ERTS_NEED_GC(p, need)) {						\
-	    erts_garbage_collect(p, need, reg, live+2);				\
-	    arg1 = reg[live];							\
-	    arg2 = reg[live+1];							\
-	}									\
-	break;									\
-    default:									\
-	p->freason = BADARITH;							\
-	return THE_NON_VALUE;							\
-    }										\
-    hp = p->htop;								\
-    p->htop += need;								\
-    arg1 = big_##func(arg1, arg2, hp);						\
-    trim_heap(p, hp, arg1);							\
-    return arg1;								\
-}
-
-DEFINE_GC_LOGIC_FUNC(band)
-DEFINE_GC_LOGIC_FUNC(bor)
-DEFINE_GC_LOGIC_FUNC(bxor)
-
-Eterm erts_gc_bnot(Process* p, Eterm* reg, Uint live)
-{
-    Eterm result;
-    Eterm arg;
-    Uint need;
-    Eterm* bigp;
-
-    arg = reg[live];
-    if (is_not_big(arg)) {
-	p->freason = BADARITH;
-	return NIL;
-    } else {
-	need = BIG_NEED_SIZE(big_size(arg)+1);
-	if (ERTS_NEED_GC(p, need)) {
-	    erts_garbage_collect(p, need, reg, live+1);
-	    arg = reg[live];
-	}
-	bigp = p->htop;
-	p->htop += need;
-	result = big_bnot(arg, bigp);
-	trim_heap(p, bigp, result);
-	if (is_nil(result)) {
-	    p->freason = SYSTEM_LIMIT;
-	    return NIL;
-	}
-    }
-    return result;
-} 
-
 /* Needed to remove compiler optimization */
 double erts_get_positive_zero_float() {
     return 0.0f;
diff --git a/erts/emulator/beam/erl_bestfit_alloc.c b/erts/emulator/beam/erl_bestfit_alloc.c
index 9cb1199c2a..ca81c14b96 100644
--- a/erts/emulator/beam/erl_bestfit_alloc.c
+++ b/erts/emulator/beam/erl_bestfit_alloc.c
@@ -209,6 +209,8 @@ erts_bfalc_start(BFAllctr_t *bfallctr,
     allctr->add_mbc                     = NULL;
     allctr->remove_mbc		        = NULL;
     allctr->largest_fblk_in_mbc         = NULL;
+    allctr->first_fblk_in_mbc           = NULL;
+    allctr->next_fblk_in_mbc            = NULL;
     allctr->init_atoms			= init_atoms;
 
 #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c
index a2610bf2e1..ca1ba55b22 100644
--- a/erts/emulator/beam/erl_bif_binary.c
+++ b/erts/emulator/beam/erl_bif_binary.c
@@ -208,8 +208,8 @@ typedef struct _ac_trie {
 typedef struct _bm_data {
     byte *x;
     Sint len;
+    Sint *badshift;
     Sint *goodshift;
-    Sint badshift[ALPHABET_SIZE];
 } BMData;
 
 typedef struct _ac_find_all_state {
@@ -319,16 +319,104 @@ static void dump_ac_node(ACNode *node, int indent, int ch);
  * The needed size of binary data for a search structure - given the
  * accumulated string lengths.
  */
-#define BM_SIZE(StrLen) 	      /* StrLen: length of searchstring */ \
-((MYALIGN(sizeof(Sint) * (StrLen))) + /* goodshift array */                \
- MYALIGN(StrLen) +                    /* searchstring saved */             \
- (MYALIGN(sizeof(BMData))))           /* Structure */
+#define BM_SIZE_SINGLE()    /* Single byte search string */ \
+(MYALIGN(1) +               /* searchstring saved */        \
+ (MYALIGN(sizeof(BMData)))) /* Structure */
+
+#define BM_SIZE_MULTI(StrLen) 	           /* StrLen: length of searchstring */ \
+((MYALIGN(sizeof(Uint) * (StrLen))) +      /* goodshift array */                \
+ (MYALIGN(sizeof(Uint) * ALPHABET_SIZE)) + /* badshift array */                 \
+ MYALIGN(StrLen) +                         /* searchstring saved */             \
+ (MYALIGN(sizeof(BMData))))                /* Structure */
 
 #define AC_SIZE(StrLens)       /* StrLens: sum of all searchstring lengths */ \
 ((MYALIGN(sizeof(ACNode)) *                                                   \
 ((StrLens)+1)) + 	       /* The actual nodes (including rootnode) */    \
  MYALIGN(sizeof(ACTrie)))      /* Structure */
 
+/*
+ * Boyer Moore - most obviously implemented more or less exactly as
+ * Christian Charras and Thierry Lecroq describe it in "Handbook of
+ * Exact String-Matching Algorithms"
+ * http://www-igm.univ-mlv.fr/~lecroq/string/
+ */
+
+/*
+ * Call this to compute badshifts array
+ */
+static void compute_badshifts(BMData *bmd)
+{
+    Sint i;
+    Sint m = bmd->len;
+
+    for (i = 0; i < ALPHABET_SIZE; ++i) {
+	bmd->badshift[i] = m;
+    }
+    for (i = 0; i < m - 1; ++i) {
+	bmd->badshift[bmd->x[i]] = m - i - 1;
+    }
+}
+
+/* Helper for "compute_goodshifts" */
+static void compute_suffixes(byte *x, Sint m, Sint *suffixes)
+{
+    int f,g,i;
+
+    suffixes[m - 1] = m;
+
+    f = 0; /* To avoid use before set warning */
+
+    g = m - 1;
+
+    for (i = m - 2; i >= 0; --i) {
+	if (i > g && suffixes[i + m - 1 - f] < i - g) {
+	    suffixes[i] = suffixes[i + m - 1 - f];
+	} else {
+	    if (i < g) {
+		g = i;
+	    }
+	    f = i;
+	    while ( g >= 0 && x[g] == x[g + m - 1 - f] ) {
+		--g;
+	    }
+	    suffixes[i] = f - g;
+	}
+    }
+}
+
+/*
+ * Call this to compute goodshift array
+ */
+static void compute_goodshifts(BMData *bmd)
+{
+    Sint m = bmd->len;
+    byte *x = bmd->x;
+    Sint i, j;
+    Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Sint));
+
+    compute_suffixes(x, m, suffixes);
+
+    for (i = 0; i < m; ++i) {
+	bmd->goodshift[i] = m;
+    }
+
+    j = 0;
+
+    for (i = m - 1; i >= -1; --i) {
+	if (i == -1 || suffixes[i] == i + 1) {
+	    while (j < m - 1 - i) {
+		if (bmd->goodshift[j] == m) {
+		    bmd->goodshift[j] = m - 1 - i;
+		}
+		++j;
+	    }
+	}
+    }
+    for (i = 0; i <= m - 2; ++i) {
+	bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i;
+    }
+    erts_free(ERTS_ALC_T_TMP, suffixes);
+}
 
 /*
  * Callback for the magic binary
@@ -377,20 +465,37 @@ static ACTrie *create_acdata(MyAllocator *my, Uint len,
 
 /*
  * The same initialization of allocator and basic data for Boyer-Moore.
+ * For single byte, we don't use goodshift and badshift, only memchr.
  */
 static BMData *create_bmdata(MyAllocator *my, byte *x, Uint len,
 			     Binary **the_bin /* out */)
 {
-    Uint datasize = BM_SIZE(len);
+    Uint datasize;
     BMData *bmd;
-    Binary *mb = erts_create_magic_binary(datasize,cleanup_my_data_bm);
-    byte *data = ERTS_MAGIC_BIN_DATA(mb);
+    Binary *mb;
+    byte *data;
+
+    if(len > 1) {
+	datasize = BM_SIZE_MULTI(len);
+    } else {
+	datasize = BM_SIZE_SINGLE();
+    }
+
+    mb = erts_create_magic_binary(datasize,cleanup_my_data_bm);
+    data = ERTS_MAGIC_BIN_DATA(mb);
     init_my_allocator(my, datasize, data);
     bmd = my_alloc(my, sizeof(BMData));
     bmd->x = my_alloc(my,len);
     sys_memcpy(bmd->x,x,len);
     bmd->len = len;
-    bmd->goodshift = my_alloc(my,sizeof(Uint) * len);
+
+    if(len > 1) {
+	bmd->goodshift = my_alloc(my, sizeof(Uint) * len);
+	bmd->badshift = my_alloc(my, sizeof(Uint) * ALPHABET_SIZE);
+	compute_badshifts(bmd);
+	compute_goodshifts(bmd);
+    }
+
     *the_bin = mb;
     return bmd;
 }
@@ -711,91 +816,8 @@ static BFReturn ac_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta
     return (m == 0) ? BF_NOT_FOUND : BF_OK;
 }
 
-/*
- * Boyer Moore - most obviously implemented more or less exactly as
- * Christian Charras and Thierry Lecroq describe it in "Handbook of
- * Exact String-Matching Algorithms"
- * http://www-igm.univ-mlv.fr/~lecroq/string/
- */
-
-/*
- * Call this to compute badshifts array
- */
-static void compute_badshifts(BMData *bmd)
-{
-    Sint i;
-    Sint m = bmd->len;
-
-    for (i = 0; i < ALPHABET_SIZE; ++i) {
-	bmd->badshift[i] = m;
-    }
-    for (i = 0; i < m - 1; ++i) {
-	bmd->badshift[bmd->x[i]] = m - i - 1;
-    }
-}
-
-/* Helper for "compute_goodshifts" */
-static void compute_suffixes(byte *x, Sint m, Sint *suffixes)
-{
-    int f,g,i;
-
-    suffixes[m - 1] = m;
-
-    f = 0; /* To avoid use before set warning */
-
-    g = m - 1;
-
-    for (i = m - 2; i >= 0; --i) {
-	if (i > g && suffixes[i + m - 1 - f] < i - g) {
-	    suffixes[i] = suffixes[i + m - 1 - f];
-	} else {
-	    if (i < g) {
-		g = i;
-	    }
-	    f = i;
-	    while ( g >= 0 && x[g] == x[g + m - 1 - f] ) {
-		--g;
-	    }
-	    suffixes[i] = f - g;
-	}
-    }
-}
-
-/*
- * Call this to compute goodshift array
- */
-static void compute_goodshifts(BMData *bmd)
-{
-    Sint m = bmd->len;
-    byte *x = bmd->x;
-    Sint i, j;
-    Sint *suffixes = erts_alloc(ERTS_ALC_T_TMP, m * sizeof(Sint));
-
-    compute_suffixes(x, m, suffixes);
-
-    for (i = 0; i < m; ++i) {
-	bmd->goodshift[i] = m;
-    }
-
-    j = 0;
-
-    for (i = m - 1; i >= -1; --i) {
-	if (i == -1 || suffixes[i] == i + 1) {
-	    while (j < m - 1 - i) {
-		if (bmd->goodshift[j] == m) {
-		    bmd->goodshift[j] = m - 1 - i;
-		}
-		++j;
-	    }
-	}
-    }
-    for (i = 0; i <= m - 2; ++i) {
-	bmd->goodshift[m - 1 - suffixes[i]] = m - 1 - i;
-    }
-    erts_free(ERTS_ALC_T_TMP, suffixes);
-}
-
 #define BM_LOOP_FACTOR 10 /* Should we have a higher value? */
+#define MC_LOOP_FACTOR 8
 
 static void bm_init_find_first_match(BinaryFindContext *ctx)
 {
@@ -819,13 +841,38 @@ static BFReturn bm_find_first_match(BinaryFindContext *ctx, byte *haystack)
     Sint i;
     Sint j = state->pos;
     register Uint reds = *reductions;
+    byte *pos_pointer;
+    Sint needle_last = blen - 1;
+    Sint mem_read = len - needle_last - j;
+
+    if (mem_read <= 0) {
+	return BF_NOT_FOUND;
+    }
+    mem_read = MIN(mem_read, reds * MC_LOOP_FACTOR);
+    ASSERT(mem_read > 0);
 
-    while (j <= len - blen) {
+    pos_pointer = memchr(&haystack[j + needle_last], needle[needle_last], mem_read);
+    if (pos_pointer == NULL) {
+	reds -= mem_read / MC_LOOP_FACTOR;
+	j += mem_read;
+    } else {
+	reds -= (pos_pointer - &haystack[j]) / MC_LOOP_FACTOR;
+	j = pos_pointer - haystack - needle_last;
+    }
+
+    // Ensure we have at least one reduction before entering the loop
+    ++reds;
+
+    for(;;) {
+	if (j > len - blen) {
+	    *reductions = reds;
+	    return BF_NOT_FOUND;
+	}
 	if (--reds == 0) {
 	    state->pos = j;
 	    return BF_RESTART;
 	}
-	for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
+	for (i = needle_last; i >= 0 && needle[i] == haystack[i + j]; --i)
 	    ;
 	if (i < 0) { /* found */
 	    *reductions = reds;
@@ -835,8 +882,6 @@ static BFReturn bm_find_first_match(BinaryFindContext *ctx, byte *haystack)
 	}
 	j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
     }
-    *reductions = reds;
-    return BF_NOT_FOUND;
 }
 
 static void bm_init_find_all(BinaryFindContext *ctx)
@@ -875,14 +920,38 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta
     Sint *gs = bmd->goodshift;
     Sint *bs = bmd->badshift;
     byte *needle = bmd->x;
-    Sint i;
+    Sint i = -1; /* Use memchr on start and on every match */
     Sint j = state->pos;
     Uint m = state->m;
     Uint allocated = state->allocated;
     FindallData *out = state->out;
     register Uint reds = *reductions;
+    byte *pos_pointer;
+    Sint needle_last = blen - 1;
+    Sint mem_read;
 
-    while (j <= len - blen) {
+    for(;;) {
+	if (i < 0) {
+	    mem_read = len - needle_last - j;
+	    if(mem_read <= 0) {
+		goto done;
+	    }
+	    mem_read = MIN(mem_read, reds * MC_LOOP_FACTOR);
+	    ASSERT(mem_read > 0);
+	    pos_pointer = memchr(&haystack[j + needle_last], needle[needle_last], mem_read);
+	    if (pos_pointer == NULL) {
+		reds -= mem_read / MC_LOOP_FACTOR;
+		j += mem_read;
+	    } else {
+		reds -= (pos_pointer - &haystack[j]) / MC_LOOP_FACTOR;
+		j = pos_pointer - haystack - needle_last;
+	    }
+	    // Ensure we have at least one reduction when resuming the loop
+	    ++reds;
+	}
+	if (j > len - blen) {
+	    goto done;
+	}
 	if (--reds == 0) {
 	    state->pos = j;
 	    state->m = m;
@@ -890,7 +959,7 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta
 	    state->out = out;
 	    return BF_RESTART;
 	}
-	for (i = blen - 1; i >= 0 && needle[i] == haystack[i + j]; --i)
+	for (i = needle_last; i >= 0 && needle[i] == haystack[i + j]; --i)
 	    ;
 	if (i < 0) { /* found */
 	    if (m >= allocated) {
@@ -912,6 +981,7 @@ static BFReturn bm_find_all_non_overlapping(BinaryFindContext *ctx, byte *haysta
 	    j += MAX(gs[i],bs[haystack[i+j]] - blen + 1 + i);
 	}
     }
+ done:
     state->m = m;
     state->out = out;
     *reductions = reds;
@@ -931,6 +1001,7 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
     Eterm t, b, comp_term = NIL;
     Uint characters;
     Uint words;
+    Uint size;
 
     characters = 0;
     words = 0;
@@ -946,11 +1017,12 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
 	    if (binary_bitsize(b) != 0) {
 		goto badarg;
 	    }
-	    if (binary_size(b) == 0) {
+	    size = binary_size(b);
+	    if (size == 0) {
 		goto badarg;
 	    }
 	    ++words;
-	    characters += binary_size(b);
+	    characters += size;
 	}
 	if (is_not_nil(t)) {
 	    goto badarg;
@@ -979,16 +1051,13 @@ static int do_binary_match_compile(Eterm argument, Eterm *tag, Binary **binp)
 	Uint bitoffs, bitsize;
 	byte *temp_alloc = NULL;
 	MyAllocator my;
-	BMData *bmd;
 	Binary *bin;
 
 	ERTS_GET_BINARY_BYTES(comp_term, bytes, bitoffs, bitsize);
 	if (bitoffs != 0) {
 	    bytes = erts_get_aligned_binary_bytes(comp_term, &temp_alloc);
 	}
-	bmd = create_bmdata(&my, bytes, characters, &bin);
-	compute_badshifts(bmd);
-	compute_goodshifts(bmd);
+        create_bmdata(&my, bytes, characters, &bin);
 	erts_free_aligned_binary_bytes(temp_alloc);
 	CHECK_ALLOCATOR(my);
 	*tag = am_bm;
@@ -1901,9 +1970,7 @@ BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen)
 	goto badarg;
     }
 
-
-
-    hp = HAlloc(p, ERL_SUB_BIN_SIZE);
+    hp = HeapFragOnlyAlloc(p, ERL_SUB_BIN_SIZE);
 
     ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size);
     sb = (ErlSubBin *) hp;
@@ -1921,100 +1988,6 @@ BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen)
     BIF_ERROR(p, BADARG);
 }
 
-#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need))
-
-BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple)
-{
-    Uint pos;
-    Sint len;
-    size_t orig_size;
-    Eterm orig;
-    Uint offset;
-    Uint bit_offset;
-    Uint bit_size;
-    Eterm* hp;
-    ErlSubBin* sb;
-    Eterm binary;
-    Eterm *tp;
-    Eterm epos, elen;
-    int extra_args;
-
-
-    if (range_is_tuple) {
-	Eterm tpl = reg[live];
-	extra_args = 1;
-	if (is_not_tuple(tpl)) {
-	    goto badarg;
-	}
-	tp = tuple_val(tpl);
-	if (arityval(*tp) != 2) {
-	    goto badarg;
-	}
-
-	epos = tp[1];
-	elen = tp[2];
-    } else {
-	extra_args = 2;
-	epos = reg[live-1];
-	elen = reg[live];
-    }
-    binary = reg[live-extra_args];
-
-    if (is_not_binary(binary)) {
-	goto badarg;
-    }
-    if (!term_to_Uint(epos, &pos)) {
-	goto badarg;
-    }
-    if (!term_to_Sint(elen, &len)) {
-	goto badarg;
-    }
-    if (len < 0) {
-	Uint lentmp = -(Uint)len;
-	/* overflow */
-	if ((Sint)lentmp < 0) {
-	    goto badarg;
-	}
-	len = lentmp;
-	if (len > pos) {
-	    goto badarg;
-	}
-	pos -= len;
-    }
-    /* overflow */
-    if ((pos + len) < pos || (len > 0 && (pos + len) == pos)) {
-	goto badarg;
-    }
-    if ((orig_size = binary_size(binary)) < pos ||
-	orig_size < (pos + len)) {
-	goto badarg;
-    }
-
-    if (ERTS_NEED_GC(p, ERL_SUB_BIN_SIZE)) {
-	erts_garbage_collect(p, ERL_SUB_BIN_SIZE, reg, live+1-extra_args); /* I don't need the tuple
-									      or indices any more */
-	binary = reg[live-extra_args];
-    }
-
-    hp = p->htop;
-    p->htop += ERL_SUB_BIN_SIZE;
-
-    ERTS_GET_REAL_BIN(binary, orig, offset, bit_offset, bit_size);
-
-    sb = (ErlSubBin *) hp;
-    sb->thing_word = HEADER_SUB_BIN;
-    sb->size = len;
-    sb->offs = offset + pos;
-    sb->orig = orig;
-    sb->bitoffs = bit_offset;
-    sb->bitsize = 0;
-    sb->is_writable = 0;
-
-    BIF_RET(make_binary(sb));
-
- badarg:
-    BIF_ERROR(p, BADARG);
-}
 /*************************************************************
  * The actual guard BIFs are in erl_bif_guard.c
  * but the implementation of both the non-gc and the gc
@@ -3012,17 +2985,19 @@ static void dump_bm_data(BMData *bm)
 	}
     }
     erts_printf(">>\n");
-    erts_printf("GoodShift array:\n");
-    for (i = 0; i < bm->len; ++i) {
-	erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]);
-    }
-    erts_printf("BadShift array:\n");
-    j = 0;
-    for (i = 0; i < ALPHABET_SIZE; i += j) {
-	for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) {
-	    erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]);
+    if(bm->len > 1) {
+	erts_printf("GoodShift array:\n");
+	for (i = 0; i < bm->len; ++i) {
+	    erts_printf("GoodShift[%d]: %ld\n", i, bm->goodshift[i]);
+	}
+	erts_printf("BadShift array:\n");
+	j = 0;
+	for (i = 0; i < ALPHABET_SIZE; i += j) {
+	    for (j = 0; i + j < ALPHABET_SIZE && j < 6; ++j) {
+		erts_printf("BS[%03d]:%02ld, ", i+j, bm->badshift[i+j]);
+	    }
+	    erts_printf("\n");
 	}
-	erts_printf("\n");
     }
 }
 
diff --git a/erts/emulator/beam/erl_bif_ddll.c b/erts/emulator/beam/erl_bif_ddll.c
index 4cda0948a0..639aee29dc 100644
--- a/erts/emulator/beam/erl_bif_ddll.c
+++ b/erts/emulator/beam/erl_bif_ddll.c
@@ -829,7 +829,7 @@ BIF_RETTYPE erl_ddll_format_error_int_1(BIF_ALIST_1)
 	    "cannot be loaded/unloaded";
 	break;
     case am_permanent:
-	errstring = "DDLL driver is permanent an can not be unloaded/loaded";
+	errstring = "DDLL driver is permanent an cannot be unloaded/loaded";
 	break;
     case am_not_loaded:
 	errstring = "DDLL driver is not loaded";
diff --git a/erts/emulator/beam/erl_bif_guard.c b/erts/emulator/beam/erl_bif_guard.c
index 8a5c6ada6c..c921b66a7e 100644
--- a/erts/emulator/beam/erl_bif_guard.c
+++ b/erts/emulator/beam/erl_bif_guard.c
@@ -19,7 +19,12 @@
  */
 
 /*
- * Numeric guard BIFs.
+ * This file implements the former GC BIFs. They used to do a GC when
+ * they needed heap space. Because of changes to the implementation of
+ * literals, those BIFs are now allowed to allocate heap fragments
+ * (using HeapFragOnlyAlloc()). Note that they must NOT call HAlloc(),
+ * because the caller does not do any SWAPIN / SWAPOUT (that is,
+ * HEAP_TOP(p) and HEAP_LIMIT(p) contain stale values).
  */
 
 #ifdef HAVE_CONFIG_H
@@ -36,14 +41,16 @@
 #include "erl_binary.h"
 #include "erl_map.h"
 
-static Eterm gc_double_to_integer(Process* p, double x, Eterm* reg, Uint live);
-
 static Eterm double_to_integer(Process* p, double x);
+static BIF_RETTYPE erlang_length_trap(BIF_ALIST_3);
+static Export erlang_length_export;
 
-/*
- * Guard BIFs called using apply/3 and guard BIFs that never build
- * anything on the heap.
- */
+void erts_init_bif_guard(void)
+{
+    erts_init_trap_export(&erlang_length_export,
+			  am_erlang, am_length, 3,
+			  &erlang_length_trap);
+}
 
 BIF_RETTYPE abs_1(BIF_ALIST_1)
 {
@@ -56,7 +63,7 @@ BIF_RETTYPE abs_1(BIF_ALIST_1)
 	i0 = signed_val(BIF_ARG_1);
 	i = ERTS_SMALL_ABS(i0);
 	if (i0 == MIN_SMALL) {
-	    hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
+	    hp = HeapFragOnlyAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
 	    BIF_RET(uint_to_big(i, hp));
 	} else {
 	    BIF_RET(make_small(i));
@@ -68,7 +75,7 @@ BIF_RETTYPE abs_1(BIF_ALIST_1)
 	    int sz = big_arity(BIF_ARG_1) + 1;
 	    Uint* x;
 
-	    hp = HAlloc(BIF_P, sz);	/* See note at beginning of file */
+	    hp = HeapFragOnlyAlloc(BIF_P, sz);	/* See note at beginning of file */
 	    sz--;
 	    res = make_big(hp);
 	    x = big_val(BIF_ARG_1);
@@ -83,7 +90,7 @@ BIF_RETTYPE abs_1(BIF_ALIST_1)
 
 	GET_DOUBLE(BIF_ARG_1, f);
 	if (f.fd < 0.0) {
-	    hp = HAlloc(BIF_P, FLOAT_SIZE_OBJECT);
+	    hp = HeapFragOnlyAlloc(BIF_P, FLOAT_SIZE_OBJECT);
 	    f.fd = fabs(f.fd);
 	    res = make_float(hp);
 	    PUT_DOUBLE(f, hp);
@@ -116,7 +123,7 @@ BIF_RETTYPE float_1(BIF_ALIST_1)
     } else if (big_to_double(BIF_ARG_1, &f.fd) < 0) {
 	goto badarg;
     }
-    hp = HAlloc(BIF_P, FLOAT_SIZE_OBJECT);
+    hp = HeapFragOnlyAlloc(BIF_P, FLOAT_SIZE_OBJECT);
     res = make_float(hp);
     PUT_DOUBLE(f, hp);
     BIF_RET(res);
@@ -194,26 +201,113 @@ BIF_RETTYPE round_1(BIF_ALIST_1)
     BIF_RET(res);
 }
 
+/*
+ * This version of length/1 is called from native code and apply/3.
+ */
+
 BIF_RETTYPE length_1(BIF_ALIST_1)
 {
+    Eterm args[3];
+
+    /*
+     * Arrange argument registers the way expected by
+     * erts_trapping_length_1(). We save the original argument in
+     * args[2] in case an error should signaled.
+     */
+
+    args[0] = BIF_ARG_1;
+    args[1] = make_small(0);
+    args[2] = BIF_ARG_1;
+    return erlang_length_trap(BIF_P, args, A__I);
+}
+
+static BIF_RETTYPE erlang_length_trap(BIF_ALIST_3)
+{
+    Eterm res;
+
+    res = erts_trapping_length_1(BIF_P, BIF__ARGS);
+    if (is_value(res)) {        /* Success. */
+        BIF_RET(res);
+    } else {                    /* Trap or error. */
+        if (BIF_P->freason == TRAP) {
+            /*
+             * The available reductions were exceeded. Trap.
+             */
+            BIF_TRAP3(&erlang_length_export, BIF_P, BIF_ARG_1, BIF_ARG_2, BIF_ARG_3);
+        } else {
+            /*
+             * Signal an error. The original argument was tucked away in BIF_ARG_3.
+             */
+            ERTS_BIF_ERROR_TRAPPED1(BIF_P, BIF_P->freason,
+                                    bif_export[BIF_length_1], BIF_ARG_3);
+        }
+    }
+}
+
+/*
+ * Trappable helper function for calculating length/1.
+ *
+ * When calling this function, entries in args[] should be set up as
+ * follows:
+ *
+ *   args[0] = List to calculate length for.
+ *   args[1] = Length accumulator (tagged integer).
+ *
+ * If the return value is a tagged integer, the length was calculated
+ * successfully.
+ *
+ * Otherwise, if return value is THE_NON_VALUE and p->freason is TRAP,
+ * the available reductions were exceeded and this function must be called
+ * again after rescheduling. args[0] and args[1] have been updated to
+ * contain the next part of the list and length so far, respectively.
+ *
+ * Otherwise, if return value is THE_NON_VALUE, the list did not end
+ * in an empty list (and p->freason is BADARG).
+ */
+
+Eterm erts_trapping_length_1(Process* p, Eterm* args)
+{
     Eterm list;
     Uint i;
-     
-    if (is_nil(BIF_ARG_1)) 
-	BIF_RET(SMALL_ZERO);
-    if (is_not_list(BIF_ARG_1)) {
-	BIF_ERROR(BIF_P, BADARG);
-    }
-    list = BIF_ARG_1;
-    i = 0;
-    while (is_list(list)) {
-	i++;
+    Uint max_iter;
+    Uint saved_max_iter;
+
+#if defined(DEBUG) || defined(VALGRIND)
+    max_iter = 50;
+#else
+    max_iter = ERTS_BIF_REDS_LEFT(p) * 16;
+#endif
+    saved_max_iter = max_iter;
+    ASSERT(max_iter > 0);
+
+    list = args[0];
+    i = unsigned_val(args[1]);
+    while (is_list(list) && max_iter != 0) {
 	list = CDR(list_val(list));
+	i++, max_iter--;
+    }
+
+    if (is_list(list)) {
+        /*
+         * We have exceeded the alloted number of iterations.
+         * Save the result so far and signal a trap.
+         */
+        args[0] = list;
+        args[1] = make_small(i);
+        p->freason = TRAP;
+        BUMP_ALL_REDS(p);
+        return THE_NON_VALUE;
+    } else if (is_not_nil(list))  {
+        /* Error. Should be NIL. */
+	BIF_ERROR(p, BADARG);
     }
-    if (is_not_nil(list))  {
-	BIF_ERROR(BIF_P, BADARG);
-    }
-    BIF_RET(make_small(i));
+
+    /*
+     * We reached the end of the list successfully. Bump reductions
+     * and return result.
+     */
+    BUMP_REDS(p, saved_max_iter / 16);
+    return make_small(i);
 }
 
 /* returns the size of a tuple or a binary */
@@ -229,7 +323,7 @@ BIF_RETTYPE size_1(BIF_ALIST_1)
 	if (IS_USMALL(0, sz)) {
 	    return make_small(sz);
 	} else {
-	    Eterm* hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
+	    Eterm* hp = HeapFragOnlyAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
 	    BIF_RET(uint_to_big(sz, hp));
 	}
     }
@@ -252,12 +346,12 @@ BIF_RETTYPE bit_size_1(BIF_ALIST_1)
 	    if (IS_USMALL(0,low_bits)) {
 		BIF_RET(make_small(low_bits));
 	    } else {
-		Eterm* hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
+		Eterm* hp = HeapFragOnlyAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
 		BIF_RET(uint_to_big(low_bits, hp));
 	    }
 	} else {
 	    Uint sz = BIG_UINT_HEAP_SIZE+1;
-	    Eterm* hp = HAlloc(BIF_P, sz);
+	    Eterm* hp = HeapFragOnlyAlloc(BIF_P, sz);
 	    hp[0] = make_pos_bignum_header(sz-1);
 	    BIG_DIGIT(hp,0) = low_bits;
 	    BIG_DIGIT(hp,1) = high_bits;
@@ -281,7 +375,7 @@ BIF_RETTYPE byte_size_1(BIF_ALIST_1)
 	if (IS_USMALL(0, bytesize)) {
 	    BIF_RET(make_small(bytesize));
 	} else {
-	    Eterm* hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
+	    Eterm* hp = HeapFragOnlyAlloc(BIF_P, BIG_UINT_HEAP_SIZE);
 	    BIF_RET(uint_to_big(bytesize, hp));
 	}
     } else {
@@ -325,7 +419,7 @@ double_to_integer(Process* p, double x)
     }
     sz = BIG_NEED_SIZE(ds);          /* number of words including arity */
 
-    hp = HAlloc(p, sz);
+    hp = HeapFragOnlyAlloc(p, sz);
     res = make_big(hp);
     xp = (ErtsDigit*) (hp + 1);
 
@@ -371,389 +465,3 @@ BIF_RETTYPE binary_part_2(BIF_ALIST_2)
  badarg:
    BIF_ERROR(BIF_P,BADARG);
 }
-
-
-/*
- * The following code is used when a guard that may build on the
- * heap is called directly. They must not use HAlloc(), but must
- * do a garbage collection if there is insufficient heap space.
- *
- * Important note: All error checking MUST be done before doing
- * a garbage collection. The compiler assumes that all registers
- * are still valid if a guard BIF generates an exception.
- */
-
-#define ERTS_NEED_GC(p, need) ((HEAP_LIMIT((p)) - HEAP_TOP((p))) <= (need))
-
-Eterm erts_gc_length_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm list = reg[live];
-    int i;
-
-    if (is_nil(list)) 
-	return SMALL_ZERO;
-    i = 0;
-    while (is_list(list)) {
-	i++;
-	list = CDR(list_val(list));
-    }
-    if (is_not_nil(list))  {
-	BIF_ERROR(p, BADARG);
-    }
-    return make_small(i);
-}
-
-Eterm erts_gc_size_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg = reg[live];
-    if (is_tuple(arg)) {
-	Eterm* tupleptr = tuple_val(arg);
-	return make_small(arityval(*tupleptr));
-    } else if (is_binary(arg)) {
-	Uint sz = binary_size(arg);
-	if (IS_USMALL(0, sz)) {
-	    return make_small(sz);
-	} else {
-	    Eterm* hp;
-	    if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) {
-		erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live);
-	    }
-	    hp = p->htop;
-	    p->htop += BIG_UINT_HEAP_SIZE;
-	    return uint_to_big(sz, hp);
-	}
-    }
-    BIF_ERROR(p, BADARG);
-}
-
-Eterm erts_gc_bit_size_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg = reg[live];
-    if (is_binary(arg)) {
-	Uint low_bits;
-	Uint bytesize;
-	Uint high_bits;
-	bytesize = binary_size(arg);
-	high_bits = bytesize >>  ((sizeof(Uint) * 8)-3);
-	low_bits = (bytesize << 3) + binary_bitsize(arg);
-	if (high_bits == 0) {
-	    if (IS_USMALL(0,low_bits)) {
-		return make_small(low_bits);
-	    } else {
-		Eterm* hp; 
-		if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) {
-		    erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live);
-		}
-		hp = p->htop;
-		p->htop += BIG_UINT_HEAP_SIZE;
-		return uint_to_big(low_bits, hp);
-	    }
-	} else {
-	    Uint sz = BIG_UINT_HEAP_SIZE+1;
-	    Eterm* hp;
-	    if (ERTS_NEED_GC(p, sz)) {
-		erts_garbage_collect(p, sz, reg, live);
-	    }
-	    hp = p->htop;
-	    p->htop += sz;
-	    hp[0] = make_pos_bignum_header(sz-1);
-	    BIG_DIGIT(hp,0) = low_bits;
-	    BIG_DIGIT(hp,1) = high_bits;
-	    return make_big(hp);
-	}
-    } else {
-	BIF_ERROR(p, BADARG);
-    }
-}
-
-Eterm erts_gc_byte_size_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg = reg[live];
-    if (is_binary(arg)) {
-	Uint bytesize = binary_size(arg);
-	if (binary_bitsize(arg) > 0) {
-	    bytesize++;
-	}
-	if (IS_USMALL(0, bytesize)) {
-	    return make_small(bytesize);
-	} else {
-	    Eterm* hp;
-	    if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) {
-		erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live);
-	    }
-	    hp = p->htop;
-	    p->htop += BIG_UINT_HEAP_SIZE;
-	    return uint_to_big(bytesize, hp);
-	}
-    } else {
-	BIF_ERROR(p, BADARG);
-    }
-}
-
-Eterm erts_gc_map_size_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg = reg[live];
-    if (is_flatmap(arg)) {
-	flatmap_t *mp = (flatmap_t*)flatmap_val(arg);
-        return make_small(flatmap_get_size(mp));
-    } else if (is_hashmap(arg)) {
-        Eterm* hp;
-        Uint size;
-	size = hashmap_size(arg);
-        if (IS_USMALL(0, size)) {
-            return make_small(size);
-        }
-        if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) {
-            erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live);
-        }
-        hp = p->htop;
-        p->htop += BIG_UINT_HEAP_SIZE;
-        return uint_to_big(size, hp);
-    }
-    p->fvalue = arg;
-    BIF_ERROR(p, BADMAP);
-}
-
-Eterm erts_gc_abs_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    Eterm res;
-    Sint i0, i;
-    Eterm* hp;
-
-    arg = reg[live];
-
-    /* integer arguments */
-    if (is_small(arg)) {
-	i0 = signed_val(arg);
-	i = ERTS_SMALL_ABS(i0);
-	if (i0 == MIN_SMALL) {
-	    if (ERTS_NEED_GC(p, BIG_UINT_HEAP_SIZE)) {
-		erts_garbage_collect(p, BIG_UINT_HEAP_SIZE, reg, live+1);
-		arg = reg[live];
-	    }
-	    hp = p->htop;
-	    p->htop += BIG_UINT_HEAP_SIZE;
-	    return uint_to_big(i, hp);
-	} else {
-	    return make_small(i);
-	}
-    } else if (is_big(arg)) {
-	if (!big_sign(arg)) {
-	    return arg;
-	} else {
-	    int sz = big_arity(arg) + 1;
-	    Uint* x;
-
-	    if (ERTS_NEED_GC(p, sz)) {
-		erts_garbage_collect(p, sz, reg, live+1);
-		arg = reg[live];
-	    }
-	    hp = p->htop;
-	    p->htop += sz;
-	    sz--;
-	    res = make_big(hp);
-	    x = big_val(arg);
-	    *hp++ = make_pos_bignum_header(sz);
-	    x++;                          /* skip thing */
-	    while(sz--)
-		*hp++ = *x++;
-	    return res;
-	}
-    } else if (is_float(arg)) {
-	FloatDef f;
-
-	GET_DOUBLE(arg, f);
-	if (f.fd < 0.0) {
-	    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-		erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live+1);
-		arg = reg[live];
-	    }
-	    hp = p->htop;
-	    p->htop += FLOAT_SIZE_OBJECT;
-	    f.fd = fabs(f.fd);
-	    res = make_float(hp);
-	    PUT_DOUBLE(f, hp);
-	    return res;
-	}
-	else
-	    return arg;
-    }
-    BIF_ERROR(p, BADARG);
-}
-
-Eterm erts_gc_float_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    Eterm res;
-    Eterm* hp;
-    FloatDef f;
-     
-    /* check args */
-    arg = reg[live];
-    if (is_not_integer(arg)) {
-	if (is_float(arg))  {
-	    return arg;
-	} else {
-	badarg:
-	    BIF_ERROR(p, BADARG);
-	}
-    }
-    if (is_small(arg)) {
-	Sint i = signed_val(arg);
-	f.fd = i;		/* use "C"'s auto casting */
-    } else if (big_to_double(arg, &f.fd) < 0) {
-	goto badarg;
-    }
-    if (ERTS_NEED_GC(p, FLOAT_SIZE_OBJECT)) {
-	erts_garbage_collect(p, FLOAT_SIZE_OBJECT, reg, live+1);
-	arg = reg[live];
-    }
-    hp = p->htop;
-    p->htop += FLOAT_SIZE_OBJECT;
-    res = make_float(hp);
-    PUT_DOUBLE(f, hp);
-    return res;
-}
-
-Eterm erts_gc_round_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    FloatDef f;
-     
-    arg = reg[live];
-    if (is_not_float(arg)) {
-	if (is_integer(arg))  {
-	    return arg;
-	}
-	BIF_ERROR(p, BADARG);
-    }
-    GET_DOUBLE(arg, f);
-
-    return gc_double_to_integer(p, round(f.fd), reg, live);
-}
-
-Eterm erts_gc_trunc_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    FloatDef f;
-     
-    arg = reg[live];
-    if (is_not_float(arg)) {
-	if (is_integer(arg))  {
-	    return arg;
-	}
-	BIF_ERROR(p, BADARG);
-    }
-    /* get the float */
-    GET_DOUBLE(arg, f);
-
-    /* truncate it and return the resultant integer */
-    return gc_double_to_integer(p, (f.fd >= 0.0) ? floor(f.fd) : ceil(f.fd),
-				reg, live);
-}
-
-Eterm erts_gc_floor_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    FloatDef f;
-
-    arg = reg[live];
-    if (is_not_float(arg)) {
-	if (is_integer(arg))  {
-	    return arg;
-	}
-	BIF_ERROR(p, BADARG);
-    }
-    GET_DOUBLE(arg, f);
-    return gc_double_to_integer(p, floor(f.fd), reg, live);
-}
-
-Eterm erts_gc_ceil_1(Process* p, Eterm* reg, Uint live)
-{
-    Eterm arg;
-    FloatDef f;
-
-    arg = reg[live];
-    if (is_not_float(arg)) {
-	if (is_integer(arg))  {
-	    return arg;
-	}
-	BIF_ERROR(p, BADARG);
-    }
-    GET_DOUBLE(arg, f);
-    return gc_double_to_integer(p, ceil(f.fd), reg, live);
-}
-
-static Eterm
-gc_double_to_integer(Process* p, double x, Eterm* reg, Uint live)
-{
-    int is_negative;
-    int ds;
-    ErtsDigit* xp;
-    int i;
-    Eterm res;
-    size_t sz;
-    Eterm* hp;
-    double dbase;
-
-    if ((x < (double) (MAX_SMALL+1)) && (x > (double) (MIN_SMALL-1))) {
-	Sint xi = x;
-	return make_small(xi);
-    }
-
-    if (x >= 0) {
-	is_negative = 0;
-    } else {
-	is_negative = 1;
-	x = -x;
-    }
-
-    /* Unscale & (calculate exponent) */
-    ds = 0;
-    dbase = ((double)(D_MASK)+1);
-    while(x >= 1.0) {
-	x /= dbase;         /* "shift" right */
-	ds++;
-    }
-    sz = BIG_NEED_SIZE(ds);          /* number of words including arity */
-    if (ERTS_NEED_GC(p, sz)) {
-	erts_garbage_collect(p, sz, reg, live);
-    }
-    hp = p->htop;
-    p->htop += sz;
-    res = make_big(hp);
-    xp = (ErtsDigit*) (hp + 1);
-
-    for (i = ds-1; i >= 0; i--) {
-	ErtsDigit d;
-
-	x *= dbase;      /* "shift" left */
-	d = x;            /* trunc */
-	xp[i] = d;        /* store digit */
-	x -= d;           /* remove integer part */
-    }
-    while ((ds & (BIG_DIGITS_PER_WORD-1)) != 0) {
-	xp[ds++] = 0;
-    }
-
-    if (is_negative) {
-	*hp = make_neg_bignum_header(sz-1);
-    } else {
-	*hp = make_pos_bignum_header(sz-1);
-    }
-    return res;
-}
-
-/********************************************************************************
- * binary_part guards. The actual implementation is in erl_bif_binary.c
- ********************************************************************************/
-Eterm erts_gc_binary_part_3(Process* p, Eterm* reg, Uint live)
-{
-    return erts_gc_binary_part(p,reg,live,0);
-}
-
-Eterm erts_gc_binary_part_2(Process* p, Eterm* reg, Uint live)
-{
-    return erts_gc_binary_part(p,reg,live,1);
-}
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 7fada0d548..3b0f0d33fa 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -2705,9 +2705,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
 	    goto bld_instruction_counts;
 	}
 
-#ifdef DEBUG
 	ASSERT(endp == hp);
-#endif
 
 	BIF_RET(res);
 #endif /* #ifndef ERTS_OPCODE_COUNTER_SUPPORT */
@@ -4671,6 +4669,14 @@ BIF_RETTYPE erts_debug_set_internal_state_2(BIF_ALIST_2)
             BIF_RET(am_notsup);
 #endif
         }
+        else if (ERTS_IS_ATOM_STR("ets_force_split", BIF_ARG_1)) {
+            if (is_tuple(BIF_ARG_2)) {
+                Eterm* tpl = tuple_val(BIF_ARG_2);
+
+                if (erts_ets_force_split(tpl[1], tpl[2] == am_true))
+                    BIF_RET(am_ok);
+            }
+        }
     }
 
     BIF_ERROR(BIF_P, BADARG);
diff --git a/erts/emulator/beam/erl_bif_lists.c b/erts/emulator/beam/erl_bif_lists.c
index aaf262780f..b23fa77f5f 100644
--- a/erts/emulator/beam/erl_bif_lists.c
+++ b/erts/emulator/beam/erl_bif_lists.c
@@ -35,101 +35,270 @@
 
 static Eterm keyfind(int Bif, Process* p, Eterm Key, Eterm Pos, Eterm List);
 
+/* erlang:'++'/2
+ *
+ * Adds a list to another (LHS ++ RHS). For historical reasons this is
+ * implemented by copying LHS and setting its tail to RHS without checking
+ * that RHS is a proper list. [] ++ 'not_a_list' will therefore result in
+ * 'not_a_list', and [1,2] ++ 3 will result in [1,2|3], and this is a bug that
+ * we have to live with. */
 
-static BIF_RETTYPE append(Process* p, Eterm A, Eterm B)
-{
-    Eterm list;
-    Eterm copy;
-    Eterm last;
-    Eterm* hp = NULL;
-    Sint i;
+typedef struct {
+    Eterm lhs_original;
+    Eterm rhs_original;
 
-    list = A;
+    Eterm iterator;
 
-    if (is_nil(list)) {
-        BIF_RET(B);
-    }
+    Eterm result;
+    Eterm *result_cdr;
+} ErtsAppendContext;
+
+static int append_ctx_bin_dtor(Binary *context_bin) {
+    return 1;
+}
+
+static Eterm append_create_trap_state(Process *p,
+                                      ErtsAppendContext *from_context) {
+    ErtsAppendContext *to_context;
+    Binary *state_bin;
+    Eterm *hp;
+
+    state_bin = erts_create_magic_binary(sizeof(ErtsAppendContext),
+                                         append_ctx_bin_dtor);
+
+    to_context = ERTS_MAGIC_BIN_DATA(state_bin);
+    *to_context = *from_context;
 
-    if (is_not_list(list)) {
-        BIF_ERROR(p, BADARG);
+    if (from_context->result_cdr == &from_context->result) {
+        to_context->result_cdr = &to_context->result;
     }
 
-    /* optimistic append on heap first */
+    hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+    return erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+}
+
+static BIF_RETTYPE lists_append_alloc(Process *p, ErtsAppendContext *context) {
+    static const Uint CELLS_PER_RED = 40;
+
+    Eterm *alloc_top, *alloc_end;
+    Uint cells_left, max_cells;
+    Eterm lookahead;
+
+    cells_left = max_cells = CELLS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+    lookahead = context->iterator;
 
-    if ((i = HeapWordsLeft(p) / 2) < 4) {
-        goto list_tail;
+#ifdef DEBUG
+    cells_left = max_cells = max_cells / 10 + 1;
+#endif
+
+    while (cells_left != 0 && is_list(lookahead)) {
+        lookahead = CDR(list_val(lookahead));
+        cells_left--;
     }
 
-    hp   = HEAP_TOP(p);
-    copy = last = CONS(hp, CAR(list_val(list)), make_list(hp+2));
-    list = CDR(list_val(list));
-    hp  += 2;
-    i   -= 2; /* don't use the last 2 words (extra i--;) */
-
-    while(i-- && is_list(list)) {
-        Eterm* listp = list_val(list);
-        last = CONS(hp, CAR(listp), make_list(hp+2));
-        list = CDR(listp);
-        hp += 2;
+    BUMP_REDS(p, (max_cells - cells_left) / CELLS_PER_RED);
+
+    if (is_not_list(lookahead) && is_not_nil(lookahead)) {
+        /* It's possible that we're erroring out with an incomplete list, so it
+         * must be terminated or we'll leave a hole in the heap. */
+        *context->result_cdr = NIL;
+        return -1;
     }
 
-    /* A is proper and B is NIL return A as-is, don't update HTOP */
+    alloc_top = HAlloc(p, 2 * (max_cells - cells_left));
+    alloc_end = alloc_top + 2 * (max_cells - cells_left);
+
+    while (alloc_top < alloc_end) {
+        Eterm *cell = list_val(context->iterator);
+
+        ASSERT(context->iterator != lookahead);
+
+        *context->result_cdr = make_list(alloc_top);
+        context->result_cdr = &CDR(alloc_top);
+        CAR(alloc_top) = CAR(cell);
 
-    if (is_nil(list) && is_nil(B)) {
-        BIF_RET(A);
+        context->iterator = CDR(cell);
+        alloc_top += 2;
     }
 
-    if (is_nil(list)) {
-        HEAP_TOP(p) = hp;
-        CDR(list_val(last)) = B;
-        BIF_RET(copy);
+    if (is_list(context->iterator)) {
+        /* The result only has to be terminated when returning it to the user,
+         * but we're doing it when trapping as well to prevent headaches when
+         * debugging. */
+        *context->result_cdr = NIL;
+        ASSERT(cells_left == 0);
+        return 0;
     }
 
-list_tail:
+    *context->result_cdr = context->rhs_original;
+    ASSERT(is_nil(context->iterator));
+
+    if (is_nil(context->rhs_original)) {
+        /* The list we created was equal to the original, so we'll return that
+         * in the hopes that the garbage we created can be removed soon. */
+        context->result = context->lhs_original;
+    }
+
+    return 1;
+}
+
+static BIF_RETTYPE lists_append_onheap(Process *p, ErtsAppendContext *context) {
+    static const Uint CELLS_PER_RED = 60;
+
+    Eterm *alloc_start, *alloc_top, *alloc_end;
+    Uint cells_left, max_cells;
+
+    cells_left = max_cells = CELLS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+
+#ifdef DEBUG
+    cells_left = max_cells = max_cells / 10 + 1;
+#endif
+
+    ASSERT(HEAP_LIMIT(p) >= HEAP_TOP(p) + 2);
+    alloc_start = HEAP_TOP(p);
+    alloc_end = HEAP_LIMIT(p) - 2;
+    alloc_top = alloc_start;
+
+    /* Don't process more cells than we have reductions for. */
+    alloc_end = MIN(alloc_top + (cells_left * 2), alloc_end);
+
+    while (alloc_top < alloc_end && is_list(context->iterator)) {
+        Eterm *cell = list_val(context->iterator);
 
-    if ((i = erts_list_length(list)) < 0) {
-        BIF_ERROR(p, BADARG);
+        *context->result_cdr = make_list(alloc_top);
+        context->result_cdr = &CDR(alloc_top);
+        CAR(alloc_top) = CAR(cell);
+
+        context->iterator = CDR(cell);
+        alloc_top += 2;
     }
 
-    /* remaining list was proper and B is NIL */
-    if (is_nil(B)) {
-        BIF_RET(A);
+    cells_left -= (alloc_top - alloc_start) / 2;
+    HEAP_TOP(p) = alloc_top;
+
+    ASSERT(cells_left >= 0 && cells_left <= max_cells);
+    BUMP_REDS(p, (max_cells - cells_left) / CELLS_PER_RED);
+
+    if (is_not_list(context->iterator) && is_not_nil(context->iterator)) {
+        *context->result_cdr = NIL;
+        return -1;
     }
 
-    if (hp) {
-        /* Note: fall through case, already written
-         * on the heap.
-         * The last 2 words of the heap is not written yet
-         */
-        Eterm *hp_save = hp;
-        ASSERT(i != 0);
-        HEAP_TOP(p) = hp + 2;
-        if (i == 1) {
-            hp[0] = CAR(list_val(list));
-            hp[1] = B;
-            BIF_RET(copy);
+    if (is_list(context->iterator)) {
+        if (cells_left > CELLS_PER_RED) {
+            return lists_append_alloc(p, context);
         }
-        hp   = HAlloc(p, 2*(i - 1));
-        last = CONS(hp_save, CAR(list_val(list)), make_list(hp));
-    } else {
-        hp   = HAlloc(p, 2*i);
-        copy = last = CONS(hp, CAR(list_val(list)), make_list(hp+2));
-        hp  += 2;
+
+        *context->result_cdr = NIL;
+        return 0;
+    }
+
+    *context->result_cdr = context->rhs_original;
+    ASSERT(is_nil(context->iterator));
+
+    if (is_nil(context->rhs_original)) {
+        context->result = context->lhs_original;
     }
 
-    list = CDR(list_val(list));
-    i--;
+    return 1;
+}
+
+static int append_continue(Process *p, ErtsAppendContext *context) {
+    /* We build the result on the unused part of the heap if possible to save
+     * us the trouble of having to figure out the list size. We fall back to
+     * lists_append_alloc when we run out of space. */
+    if (HeapWordsLeft(p) > 8) {
+        return lists_append_onheap(p, context);
+    }
+
+    return lists_append_alloc(p, context);
+}
+
+static int append_start(Process *p, Eterm lhs, Eterm rhs,
+                        ErtsAppendContext *context) {
+    context->lhs_original = lhs;
+    context->rhs_original = rhs;
+
+    context->result_cdr = &context->result;
+    context->result = NIL;
+
+    context->iterator = lhs;
+
+    return append_continue(p, context);
+}
+
+/* erlang:'++'/2 */
+static Eterm append(Export *bif_entry, BIF_ALIST_2) {
+    Eterm lhs = BIF_ARG_1, rhs = BIF_ARG_2;
+
+    if (is_nil(lhs)) {
+        /* This is buggy but expected, `[] ++ 'not_a_list'` has always resulted
+         * in 'not_a_list'. */
+        return rhs;
+    } else if (is_list(lhs)) {
+        /* We start with the context on the stack in the hopes that we won't
+         * have to trap. */
+        ErtsAppendContext context;
+        int res;
+
+        res = append_start(BIF_P, lhs, rhs, &context);
+
+        if (res == 0) {
+            Eterm state_mref;
+
+            state_mref = append_create_trap_state(BIF_P, &context);
+            erts_set_gc_state(BIF_P, 0);
+
+            BIF_TRAP2(bif_entry, BIF_P, state_mref, NIL);
+        }
+
+        if (res < 0) {
+            ASSERT(is_nil(*context.result_cdr));
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        ASSERT(*context.result_cdr == context.rhs_original);
+        BIF_RET(context.result);
+    } else if (is_internal_magic_ref(lhs)) {
+        ErtsAppendContext *context;
+        int (*dtor)(Binary*);
+        Binary *magic_bin;
+
+        int res;
+
+        magic_bin = erts_magic_ref2bin(lhs);
+        dtor = ERTS_MAGIC_BIN_DESTRUCTOR(magic_bin);
+
+        if (dtor != append_ctx_bin_dtor) {
+            BIF_ERROR(BIF_P, BADARG);
+        }
+
+        ASSERT(BIF_P->flags & F_DISABLE_GC);
+        ASSERT(rhs == NIL);
 
-    ASSERT(i > -1);
-    while(i--) {
-        Eterm* listp = list_val(list);
-        last = CONS(hp, CAR(listp), make_list(hp+2));
-        list = CDR(listp);
-        hp  += 2;
+        context = ERTS_MAGIC_BIN_DATA(magic_bin);
+        res = append_continue(BIF_P, context);
+
+        if (res == 0) {
+            BIF_TRAP2(bif_entry, BIF_P, lhs, NIL);
+        }
+
+        erts_set_gc_state(BIF_P, 1);
+
+        if (res < 0) {
+            ASSERT(is_nil(*context->result_cdr));
+            ERTS_BIF_ERROR_TRAPPED2(BIF_P, BADARG, bif_entry,
+                                    context->lhs_original,
+                                    context->rhs_original);
+        }
+
+        ASSERT(*context->result_cdr == context->rhs_original);
+        BIF_RET(context->result);
     }
 
-    CDR(list_val(last)) = B;
-    BIF_RET(copy);
+    ASSERT(!(BIF_P->flags & F_DISABLE_GC));
+
+    BIF_ERROR(BIF_P, BADARG);
 }
 
 /*
@@ -139,12 +308,12 @@ list_tail:
 Eterm
 ebif_plusplus_2(BIF_ALIST_2)
 {
-    return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
+    return append(bif_export[BIF_ebif_plusplus_2], BIF_CALL_ARGS);
 }
 
 BIF_RETTYPE append_2(BIF_ALIST_2)
 {
-    return append(BIF_P, BIF_ARG_1, BIF_ARG_2);
+    return append(bif_export[BIF_append_2], BIF_CALL_ARGS);
 }
 
 /* erlang:'--'/2
@@ -915,7 +1084,7 @@ static BIF_RETTYPE lists_reverse_alloc(Process *c_p,
     list = list_in;
     tail = tail_in;
 
-    cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p));
+    cells_left = max_cells = CELLS_PER_RED * ERTS_BIF_REDS_LEFT(c_p);
     lookahead = list;
 
     while (cells_left != 0 && is_list(lookahead)) {
@@ -964,7 +1133,7 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
     list = list_in;
     tail = tail_in;
 
-    cells_left = max_cells = CELLS_PER_RED * (1 + ERTS_BIF_REDS_LEFT(c_p));
+    cells_left = max_cells = CELLS_PER_RED * ERTS_BIF_REDS_LEFT(c_p);
 
     ASSERT(HEAP_LIMIT(c_p) >= HEAP_TOP(c_p) + 2);
     alloc_start = HEAP_TOP(c_p);
@@ -992,8 +1161,6 @@ static BIF_RETTYPE lists_reverse_onheap(Process *c_p,
     if (is_nil(list)) {
         BIF_RET(tail);
     } else if (is_list(list)) {
-        ASSERT(is_list(tail));
-
         if (cells_left > CELLS_PER_RED) {
             return lists_reverse_alloc(c_p, list, tail);
         }
diff --git a/erts/emulator/beam/erl_bif_re.c b/erts/emulator/beam/erl_bif_re.c
index bbc64eb9aa..e0b9202fe7 100644
--- a/erts/emulator/beam/erl_bif_re.c
+++ b/erts/emulator/beam/erl_bif_re.c
@@ -532,10 +532,7 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
     int options = 0;
     int pflags = 0;
     int unicode = 0;
-#ifdef DEBUG
     int buffres;
-#endif
-
 
     if (parse_options(arg2,&options,NULL,&pflags,NULL,NULL,NULL,NULL)
 	< 0) {
@@ -556,12 +553,8 @@ re_compile(Process* p, Eterm arg1, Eterm arg2)
         BIF_ERROR(p,BADARG);
     }
     expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
-#ifdef DEBUG
-    buffres =
-#endif
-    erts_iolist_to_buf(arg1, expr, slen);
-
-    ASSERT(buffres >= 0);
+    buffres = erts_iolist_to_buf(arg1, expr, slen);
+    ASSERT(buffres >= 0); (void)buffres;
 
     expr[slen]='\0';
     result = erts_pcre_compile2(expr, options, &errcode, 
@@ -1052,9 +1045,7 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
 			tmpb[ap->len] = '\0';
 		    } else {
 			ErlDrvSizeT slen;
-#ifdef DEBUG
 			int buffres;
-#endif
 
 			if (erts_iolist_size(val, &slen)) {
 			    goto error;
@@ -1068,11 +1059,8 @@ build_capture(Eterm capture_spec[CAPSPEC_SIZE], const pcre *code)
 			    }
 			}
 
-#ifdef DEBUG
-			buffres =
-#endif
-			erts_iolist_to_buf(val, tmpb, slen);
-			ASSERT(buffres >= 0);
+			buffres = erts_iolist_to_buf(val, tmpb, slen);
+			ASSERT(buffres >= 0); (void)buffres;
 			tmpb[slen] = '\0';
 		    }
 		    build_one_capture(code,&ri,&sallocated,has_dupnames,tmpb);
@@ -1145,9 +1133,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
 	    const char *errstr = "";
 	    int errofset = 0;
 	    int capture_count;
-#ifdef DEBUG
 	    int buffres;
-#endif
 
 	    if (pflags & PARSE_FLAG_UNICODE && 
 		(!is_binary(arg2) || !is_binary(arg1) ||
@@ -1161,12 +1147,8 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
 	    
 	    expr = erts_alloc(ERTS_ALC_T_RE_TMP_BUF, slen + 1);
 	    
-#ifdef DEBUG
-	    buffres =
-#endif
-	    erts_iolist_to_buf(arg2, expr, slen);
-
-	    ASSERT(buffres >= 0);
+	    buffres = erts_iolist_to_buf(arg2, expr, slen);
+	    ASSERT(buffres >= 0); (void)buffres;
 
 	    expr[slen]='\0';
 	    result = erts_pcre_compile2(expr, comp_options, &errcode, 
@@ -1317,9 +1299,7 @@ re_run(Process *p, Eterm arg1, Eterm arg2, Eterm arg3)
 	restart.subject = (char *) (pb->bytes+offset);
 	restart.flags |= RESTART_FLAG_SUBJECT_IN_BINARY;
     } else {
-#ifdef DEBUG
 	int buffres;
-#endif
 handle_iolist:
 	if (erts_iolist_size(arg1, &slength)) {
 	    erts_free(ERTS_ALC_T_RE_SUBJECT, restart.ovector);
@@ -1331,11 +1311,8 @@ handle_iolist:
 	}
 	restart.subject = erts_alloc(ERTS_ALC_T_RE_SUBJECT, slength);
 
-#ifdef DEBUG
-	buffres =
-#endif
-	erts_iolist_to_buf(arg1, restart.subject, slength);
-	ASSERT(buffres >= 0);
+	buffres = erts_iolist_to_buf(arg1, restart.subject, slength);
+	ASSERT(buffres >= 0); (void)buffres;
     }
 
     if (pflags & PARSE_FLAG_REPORT_ERRORS) {
@@ -1457,10 +1434,7 @@ re_inspect_2(BIF_ALIST_2)
     Eterm res;
     const pcre *code;
     byte *temp_alloc = NULL;
-#ifdef DEBUG
-    int infores;
-#endif
-    
+    int infores;    
 
     if (is_not_tuple(BIF_ARG_1) || (arityval(*tuple_val(BIF_ARG_1)) != 5)) {
 	goto error;
@@ -1484,12 +1458,8 @@ re_inspect_2(BIF_ALIST_2)
     if (erts_pcre_fullinfo(code, NULL, PCRE_INFO_OPTIONS, &options) != 0)
 	goto error;
 
-#ifdef DEBUG
-    infores =
-#endif
-    erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top);
-
-    ASSERT(infores == 0);
+    infores = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top);
+    ASSERT(infores == 0); (void)infores;
 
     if (top <= 0) {
 	hp = HAlloc(BIF_P, 3);
@@ -1497,18 +1467,10 @@ re_inspect_2(BIF_ALIST_2)
 	erts_free_aligned_binary_bytes(temp_alloc);
 	BIF_RET(res);
     }
-#ifdef DEBUG
-    infores =
-#endif
-    erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize);
-
+    infores = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize);
     ASSERT(infores == 0);
 
-#ifdef DEBUG
-    infores =
-#endif
-    erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable);
-
+    infores = erts_pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable);
     ASSERT(infores == 0);
     
     has_dupnames = ((options & PCRE_DUPNAMES) != 0);
diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h
index 08edb43c49..4bf77988f7 100644
--- a/erts/emulator/beam/erl_binary.h
+++ b/erts/emulator/beam/erl_binary.h
@@ -278,7 +278,6 @@ Eterm erts_bin_bytes_to_list(Eterm previous, Eterm* hp, byte* bytes, Uint size,
  */
 
 BIF_RETTYPE erts_list_to_binary_bif(Process *p, Eterm arg, Export *bif);
-BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is_tuple);
 BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen);
 
 
diff --git a/erts/emulator/beam/erl_bits.c b/erts/emulator/beam/erl_bits.c
index 3a16913473..e82c776e70 100644
--- a/erts/emulator/beam/erl_bits.c
+++ b/erts/emulator/beam/erl_bits.c
@@ -144,6 +144,42 @@ erts_bs_start_match_2(Process *p, Eterm Binary, Uint Max)
     return make_matchstate(ms);
 }
 
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Binary)
+{
+    Eterm Orig;
+    Uint offs;
+    Uint* hp;
+    Uint NeededSize;
+    ErlBinMatchState *ms;
+    Uint bitoffs;
+    Uint bitsize;
+    Uint total_bin_size;
+    ProcBin* pb;
+
+    ASSERT(is_binary(Binary));
+    total_bin_size = binary_size(Binary);
+    if ((total_bin_size >> (8*sizeof(Uint)-3)) != 0) {
+        return NULL;
+    }
+
+    NeededSize = ERL_BIN_MATCHSTATE_SIZE(0);
+    hp = HeapOnlyAlloc(p, NeededSize);
+    ms = (ErlBinMatchState *) hp;
+    ERTS_GET_REAL_BIN(Binary, Orig, offs, bitoffs, bitsize);
+    pb = (ProcBin *) boxed_val(Orig);
+    if (pb->thing_word == HEADER_PROC_BIN && pb->flags != 0) {
+        erts_emasculate_writable_binary(pb);
+    }
+
+    ms->thing_word = HEADER_BIN_MATCHSTATE(0);
+    (ms->mb).orig = Orig;
+    (ms->mb).base = binary_bytes(Orig);
+    (ms->mb).offset = 8 * offs + bitoffs;
+    (ms->mb).size = total_bin_size * 8 + (ms->mb).offset + bitsize;
+
+    return ms;
+}
+
 #ifdef DEBUG
 # define CHECK_MATCH_BUFFER(MB) check_match_buffer(MB)
 
diff --git a/erts/emulator/beam/erl_bits.h b/erts/emulator/beam/erl_bits.h
index 7beef5cfda..50d353e1fa 100644
--- a/erts/emulator/beam/erl_bits.h
+++ b/erts/emulator/beam/erl_bits.h
@@ -73,12 +73,16 @@ struct erl_bits_state {
 typedef struct erl_bin_match_struct{
   Eterm thing_word;
   ErlBinMatchBuffer mb;		/* Present match buffer */
-  Eterm save_offset[1];		/* Saved offsets */
+  Eterm save_offset[1];         /* Saved offsets, only valid for contexts
+                                 * created through bs_start_match2. */
 } ErlBinMatchState;
 
-#define ERL_BIN_MATCHSTATE_SIZE(_Max) ((sizeof(ErlBinMatchState) + (_Max)*sizeof(Eterm))/sizeof(Eterm)) 
-#define HEADER_BIN_MATCHSTATE(_Max) _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max))-1, _TAG_HEADER_BIN_MATCHSTATE)
-#define HEADER_NUM_SLOTS(hdr) (header_arity(hdr)-sizeof(ErlBinMatchState)/sizeof(Eterm)+1)
+#define ERL_BIN_MATCHSTATE_SIZE(_Max) \
+    ((offsetof(ErlBinMatchState, save_offset) + (_Max)*sizeof(Eterm))/sizeof(Eterm))
+#define HEADER_BIN_MATCHSTATE(_Max) \
+    _make_header(ERL_BIN_MATCHSTATE_SIZE((_Max)) - 1, _TAG_HEADER_BIN_MATCHSTATE)
+#define HEADER_NUM_SLOTS(hdr) \
+    (header_arity(hdr) - (offsetof(ErlBinMatchState, save_offset) / sizeof(Eterm)) + 1)
 
 #define make_matchstate(_Ms) make_boxed((Eterm*)(_Ms))  
 #define ms_matchbuffer(_Ms) &(((ErlBinMatchState*) boxed_val(_Ms))->mb)
@@ -144,6 +148,7 @@ void erts_bits_destroy_state(ERL_BITS_PROTO_0);
  */
 
 Eterm erts_bs_start_match_2(Process *p, Eterm Bin, Uint Max);
+ErlBinMatchState *erts_bs_start_match_3(Process *p, Eterm Bin);
 Eterm erts_bs_get_integer_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
 Eterm erts_bs_get_binary_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
 Eterm erts_bs_get_float_2(Process *p, Uint num_bits, unsigned flags, ErlBinMatchBuffer* mb);
diff --git a/erts/emulator/beam/erl_db.c b/erts/emulator/beam/erl_db.c
index c009a3bde8..df6f42edd3 100644
--- a/erts/emulator/beam/erl_db.c
+++ b/erts/emulator/beam/erl_db.c
@@ -90,7 +90,8 @@ enum DbIterSafety {
     ITER_SAFE         /* No need to fixate at all */
 };
 #  define ITERATION_SAFETY(Proc,Tab) \
-    ((IS_TREE_TABLE((Tab)->common.status) || ONLY_WRITER(Proc,Tab)) ? ITER_SAFE \
+    ((IS_TREE_TABLE((Tab)->common.status) || IS_CATREE_TABLE((Tab)->common.status) \
+      || ONLY_WRITER(Proc,Tab)) ? ITER_SAFE                             \
      : (((Tab)->common.status & DB_FINE_LOCKED) ? ITER_UNSAFE : ITER_SAFE_LOCKED))
 
 #define DID_TRAP(P,Ret) (!is_value(Ret) && ((P)->freason == TRAP))
@@ -359,6 +360,7 @@ typedef enum {
 
 extern DbTableMethod db_hash;
 extern DbTableMethod db_tree;
+extern DbTableMethod db_catree;
 
 int user_requested_db_max_tabs;
 int erts_ets_realloc_always_moves;
@@ -407,21 +409,17 @@ static void
 free_dbtable(void *vtb)
 {
     DbTable *tb = (DbTable *) vtb;
-#ifdef HARDDEBUG
-	if (erts_atomic_read_nob(&tb->common.memory_size) != sizeof(DbTable)) {
-	    erts_fprintf(stderr, "ets: free_dbtable memory remain=%ld fix=%x\n",
-			 erts_atomic_read_nob(&tb->common.memory_size)-sizeof(DbTable),
-			 tb->common.fixations);
-	}
-#endif
-	erts_rwmtx_destroy(&tb->common.rwlock);
-	erts_mtx_destroy(&tb->common.fixlock);
-	ASSERT(is_immed(tb->common.heir_data));
 
-        if (tb->common.btid)
-            erts_bin_release(tb->common.btid);
+    ASSERT(erts_atomic_read_nob(&tb->common.memory_size) == sizeof(DbTable));
+
+    erts_rwmtx_destroy(&tb->common.rwlock);
+    erts_mtx_destroy(&tb->common.fixlock);
+    ASSERT(is_immed(tb->common.heir_data));
+
+    if (tb->common.btid)
+        erts_bin_release(tb->common.btid);
 
-	erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable));
+    erts_db_free(ERTS_ALC_T_DB_TABLE, tb, (void *) tb, sizeof(DbTable));
 }
 
 static void schedule_free_dbtable(DbTable* tb)
@@ -1076,7 +1074,7 @@ BIF_RETTYPE ets_update_element_3(BIF_ALIST_3)
     DB_BIF_GET_TABLE(tb, DB_WRITE, LCK_WRITE_REC, BIF_ets_update_element_3);
 
     UseTmpHeap(2,BIF_P);
-    if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) {
+    if (!(tb->common.status & (DB_SET | DB_ORDERED_SET | DB_CA_ORDERED_SET))) {
 	goto bail_out;
     }
     if (is_tuple(BIF_ARG_3)) {
@@ -1165,7 +1163,7 @@ do_update_counter(Process *p, DbTable* tb,
 
     UseTmpHeap(5, p);
 
-    if (!(tb->common.status & (DB_SET | DB_ORDERED_SET))) {
+    if (!(tb->common.status & (DB_SET | DB_ORDERED_SET | DB_CA_ORDERED_SET))) {
 	goto bail_out;
     }
     if (is_integer(arg3)) { /* Incr */
@@ -1647,15 +1645,15 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
 	val = CAR(list_val(list));
 	if (val == am_bag) {
 	    status |= DB_BAG;
-	    status &= ~(DB_SET | DB_DUPLICATE_BAG | DB_ORDERED_SET);
+	    status &= ~(DB_SET | DB_DUPLICATE_BAG | DB_ORDERED_SET | DB_CA_ORDERED_SET);
 	}
 	else if (val == am_duplicate_bag) {
 	    status |= DB_DUPLICATE_BAG;
-	    status &= ~(DB_SET | DB_BAG | DB_ORDERED_SET);
+	    status &= ~(DB_SET | DB_BAG | DB_ORDERED_SET | DB_CA_ORDERED_SET);
 	}
 	else if (val == am_ordered_set) {
 	    status |= DB_ORDERED_SET;
-	    status &= ~(DB_SET | DB_BAG | DB_DUPLICATE_BAG);
+	    status &= ~(DB_SET | DB_BAG | DB_DUPLICATE_BAG | DB_CA_ORDERED_SET);
 	}
 	else if (is_tuple(val)) {
 	    Eterm *tp = tuple_val(val);
@@ -1716,7 +1714,13 @@ BIF_RETTYPE ets_new_2(BIF_ALIST_2)
     if (is_not_nil(list)) { /* bad opt or not a well formed list */
 	BIF_ERROR(BIF_P, BADARG);
     }
-    if (IS_HASH_TABLE(status)) {
+    if (IS_TREE_TABLE(status) && is_fine_locked && !(status & DB_PRIVATE)) {
+        meth = &db_catree;
+        status |= DB_CA_ORDERED_SET;
+        status &= ~(DB_SET | DB_BAG | DB_DUPLICATE_BAG | DB_ORDERED_SET);
+        status |= DB_FINE_LOCKED;
+    }
+    else if (IS_HASH_TABLE(status)) {
 	meth = &db_hash;
 	if (is_fine_locked && !(status & DB_PRIVATE)) {
 	    status |= DB_FINE_LOCKED;
@@ -3506,6 +3510,7 @@ void init_db(ErtsDbSpinCount db_spin_count)
 
     db_initialize_hash();
     db_initialize_tree();
+    db_initialize_catree();
 
     /* Non visual BIF to trap to. */
     erts_init_trap_export(&ets_select_delete_continue_exp,
@@ -4114,6 +4119,8 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What)
 	    ret = am_duplicate_bag;
 	} else if (tb->common.status & DB_ORDERED_SET) {
 	    ret = am_ordered_set;
+	} else if (tb->common.status & DB_CA_ORDERED_SET) {
+	    ret = am_ordered_set;
 	} else { /*TT*/
 	    ASSERT(tb->common.status & DB_BAG);
 	    ret = am_bag;
@@ -4240,9 +4247,20 @@ static Eterm table_info(Process* p, DbTable* tb, Eterm What)
 			 make_small(stats.max_chain_len),
 			 make_small(stats.kept_items));
 	}
-	else {
+	else if (IS_CATREE_TABLE(tb->common.status)) {
+            DbCATreeStats stats;
+            Eterm* hp;
+
+            db_calc_stats_catree(&tb->catree, &stats);
+            hp = HAlloc(p, 4);
+            ret = TUPLE3(hp,
+                         make_small(stats.route_nodes),
+                         make_small(stats.base_nodes),
+                         make_small(stats.max_depth));
+
+        }
+        else
 	    ret = am_false;
-	}
     }
     return ret;
 }
@@ -4409,6 +4427,12 @@ void erts_lcnt_enable_db_lock_count(DbTable *tb, int enable) {
 
     if(IS_HASH_TABLE(tb->common.status)) {
         erts_lcnt_enable_db_hash_lock_count(&tb->hash, enable);
+    } else if(IS_CATREE_TABLE(tb->common.status)) {
+        /* erts_lcnt_enable_db_catree_lock_count is not thread safe so
+           the table needs to get locked */
+        db_lock(tb, LCK_WRITE);
+        erts_lcnt_enable_db_catree_lock_count(&tb->catree, enable);
+        db_unlock(tb, LCK_WRITE);
     }
 }
 
@@ -4441,3 +4465,16 @@ void erts_lcnt_update_db_locks(int enable) {
 #ifdef ETS_DBG_FORCE_TRAP
 erts_aint_t erts_ets_dbg_force_trap = 0;
 #endif
+
+int erts_ets_force_split(Eterm tid, int on)
+{
+    DbTable* tb = tid2tab(tid);
+    if (!tb || !IS_CATREE_TABLE(tb->common.type))
+        return 0;
+
+    db_lock(tb, LCK_WRITE);
+    if (!(tb->common.status & DB_DELETE))
+        db_catree_force_split(&tb->catree, on);
+    db_unlock(tb, LCK_WRITE);
+    return 1;
+}
diff --git a/erts/emulator/beam/erl_db.h b/erts/emulator/beam/erl_db.h
index 23975d208f..5955d42aae 100644
--- a/erts/emulator/beam/erl_db.h
+++ b/erts/emulator/beam/erl_db.h
@@ -66,6 +66,7 @@ typedef struct {
 #include "erl_db_util.h" /* Flags */
 #include "erl_db_hash.h" /* DbTableHash */
 #include "erl_db_tree.h" /* DbTableTree */
+#include "erl_db_catree.h" /* DbTableCATree */
 /*TT*/
 
 Uint erts_get_ets_misc_mem_size(void);
@@ -90,6 +91,7 @@ union db_table {
     DbTableCommon common; /* Any type of db table */
     DbTableHash hash;     /* Linear hash array specific data */
     DbTableTree tree;     /* AVL tree specific data */
+    DbTableCATree catree;     /* CA tree specific data */
     DbTableRelease release;
     /*TT*/
 };
@@ -128,6 +130,7 @@ extern Export ets_select_continue_exp;
 extern erts_atomic_t erts_ets_misc_mem_size;
 
 Eterm erts_ets_colliding_names(Process*, Eterm name, Uint cnt);
+int erts_ets_force_split(Eterm tid, int on);
 Uint erts_db_get_max_tabs(void);
 Eterm erts_db_make_tid(Process *c_p, DbTableCommon *tb);
 
@@ -284,6 +287,12 @@ ERTS_GLB_INLINE void erts_db_free(ErtsAlcType_t type,
 				  void *ptr,
 				  Uint size);
 
+ERTS_GLB_INLINE void erts_schedule_db_free(DbTableCommon* tab,
+                                           void (*free_func)(void *),
+                                           void *ptr,
+                                           ErtsThrPrgrLaterOp *lop,
+                                           Uint size);
+
 ERTS_GLB_INLINE void erts_db_free_nt(ErtsAlcType_t type,
 				     void *ptr,
 				     Uint size);
@@ -304,6 +313,26 @@ erts_db_free(ErtsAlcType_t type, DbTable *tab, void *ptr, Uint size)
 }
 
 ERTS_GLB_INLINE void
+erts_schedule_db_free(DbTableCommon* tab,
+                      void (*free_func)(void *),
+                      void *ptr,
+                      ErtsThrPrgrLaterOp *lop,
+                      Uint size)
+{
+    ASSERT(ptr != 0);
+    ASSERT(((void *) tab) != ptr);
+    ASSERT(size == ERTS_ALC_DBG_BLK_SZ(ptr));
+
+    /*
+     * We update table memory stats here as table may already be gone
+     * when 'free_func' is finally called.
+     */
+    ERTS_DB_ALC_MEM_UPDATE_((DbTable*)tab, size, 0);
+
+    erts_schedule_thr_prgr_later_cleanup_op(free_func, ptr, lop, size);
+}
+
+ERTS_GLB_INLINE void
 erts_db_free_nt(ErtsAlcType_t type, void *ptr, Uint size)
 {
     ASSERT(ptr != 0);
diff --git a/erts/emulator/beam/erl_db_catree.c b/erts/emulator/beam/erl_db_catree.c
new file mode 100644
index 0000000000..75ac1c4a93
--- /dev/null
+++ b/erts/emulator/beam/erl_db_catree.c
@@ -0,0 +1,2250 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB and Kjell Winblad 1998-2018. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Implementation of ETS ordered_set table type with
+ *              fine-grained synchronization.
+ *
+ * Author: 	Kjell Winblad
+ *
+ * This implementation is based on the contention adapting search tree
+ * (CA tree). The CA tree is a concurrent data structure that
+ * dynamically adapts its synchronization granularity based on how
+ * much contention is detected in locks. The following publication
+ * contains a detailed description of CA trees:
+ * 
+ * A Contention Adapting Approach to Concurrent Ordered Sets
+ * Journal of Parallel and Distributed Computing, 2018
+ * Kjell Winblad and Konstantinos Sagonas
+ * https://doi.org/10.1016/j.jpdc.2017.11.007
+ *
+ * The following publication may also be interesting as it discusses
+ * how the CA tree can be used as an ETS ordered_set table type
+ * backend:
+ *
+ * More Scalable Ordered Set for ETS Using Adaptation
+ * In Thirteenth ACM SIGPLAN workshop on Erlang (2014)
+ * Kjell Winblad and Konstantinos Sagonas
+ * https://doi.org/10.1145/2633448.2633455
+ *
+ * This implementation of the ordered_set ETS table type is only
+ * activated when the options {write_concurrency, true}, public and
+ * ordered_set are passed to the ets:new/2 function. This
+ * implementation is expected to scale better than the default
+ * implementation located in "erl_db_tree.c".
+ * 
+ * The default implementation has a static stack optimization (see
+ * get_static_stack in erl_db_tree.c). This implementation does not
+ * have such an optimization as it induces bad scalability when
+ * concurrent read operations are frequent (they all try to get hold
+ * of the same stack). The default implementation may thus perform
+ * better compared to this implementation in scenarios where the
+ * static stack optimization is useful. One such scenario is when only
+ * one process is accessing the table and this process is traversing
+ * the table with a sequence of next/2 calls.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include "sys.h"
+#include "erl_vm.h"
+#include "global.h"
+#include "erl_process.h"
+#include "error.h"
+#define ERTS_WANT_DB_INTERNAL__
+#include "erl_db.h"
+#include "bif.h"
+#include "big.h"
+#include "erl_binary.h"
+
+#include "erl_db_catree.h"
+#include "erl_db_tree.h"
+#include "erl_db_tree_util.h"
+
+#ifdef DEBUG
+#  define IF_DEBUG(X) X
+#else
+#  define IF_DEBUG(X)
+#endif
+
+/*
+** Forward declarations
+*/
+
+static SWord do_free_base_node_cont(DbTableCATree *tb, SWord num_left);
+static SWord do_free_routing_nodes_catree_cont(DbTableCATree *tb, SWord num_left);
+static DbTableCATreeNode *catree_first_base_node_from_free_list(DbTableCATree *tb);
+
+/* Method interface functions */
+static int db_first_catree(Process *p, DbTable *tbl,
+                           Eterm *ret);
+static int db_next_catree(Process *p, DbTable *tbl,
+                          Eterm key, Eterm *ret);
+static int db_last_catree(Process *p, DbTable *tbl,
+                          Eterm *ret);
+static int db_prev_catree(Process *p, DbTable *tbl,
+                          Eterm key,
+                          Eterm *ret);
+static int db_put_catree(DbTable *tbl, Eterm obj, int key_clash_fail);
+static int db_get_catree(Process *p, DbTable *tbl,
+                         Eterm key,  Eterm *ret);
+static int db_member_catree(DbTable *tbl, Eterm key, Eterm *ret);
+static int db_get_element_catree(Process *p, DbTable *tbl,
+                                 Eterm key,int ndex,
+                                 Eterm *ret);
+static int db_erase_catree(DbTable *tbl, Eterm key, Eterm *ret);
+static int db_erase_object_catree(DbTable *tbl, Eterm object,Eterm *ret);
+static int db_slot_catree(Process *p, DbTable *tbl,
+                          Eterm slot_term,  Eterm *ret);
+static int db_select_catree(Process *p, DbTable *tbl, Eterm tid,
+                            Eterm pattern, int reversed, Eterm *ret);
+static int db_select_count_catree(Process *p, DbTable *tbl, Eterm tid,
+                                  Eterm pattern,  Eterm *ret);
+static int db_select_chunk_catree(Process *p, DbTable *tbl, Eterm tid,
+                                  Eterm pattern, Sint chunk_size,
+                                  int reversed, Eterm *ret);
+static int db_select_continue_catree(Process *p, DbTable *tbl,
+                                     Eterm continuation, Eterm *ret);
+static int db_select_count_continue_catree(Process *p, DbTable *tbl,
+                                           Eterm continuation, Eterm *ret);
+static int db_select_delete_catree(Process *p, DbTable *tbl, Eterm tid,
+                                   Eterm pattern,  Eterm *ret);
+static int db_select_delete_continue_catree(Process *p, DbTable *tbl, 
+                                            Eterm continuation, Eterm *ret);
+static int db_select_replace_catree(Process *p, DbTable *tbl, Eterm tid,
+                                    Eterm pattern, Eterm *ret);
+static int db_select_replace_continue_catree(Process *p, DbTable *tbl,
+                                             Eterm continuation, Eterm *ret);
+static int db_take_catree(Process *, DbTable *, Eterm, Eterm *);
+static void db_print_catree(fmtfn_t to, void *to_arg,
+                            int show, DbTable *tbl);
+static int db_free_table_catree(DbTable *tbl);
+static SWord db_free_table_continue_catree(DbTable *tbl, SWord);
+static void db_foreach_offheap_catree(DbTable *,
+                                      void (*)(ErlOffHeap *, void *),
+                                      void *);
+static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds);
+static int
+db_lookup_dbterm_catree(Process *, DbTable *, Eterm key, Eterm obj,
+                        DbUpdateHandle*);
+static void db_finalize_dbterm_catree(int cret, DbUpdateHandle *);
+
+static void split_catree(DbTableCATree *tb,
+                         DbTableCATreeNode* ERTS_RESTRICT base,
+                         DbTableCATreeNode* ERTS_RESTRICT parent);
+static void join_catree(DbTableCATree *tb,
+                        DbTableCATreeNode *thiz,
+                        DbTableCATreeNode *parent);
+
+
+/*
+** External interface
+*/
+DbTableMethod db_catree =
+{
+    db_create_catree,
+    db_first_catree,
+    db_next_catree,
+    db_last_catree,
+    db_prev_catree,
+    db_put_catree,
+    db_get_catree,
+    db_get_element_catree,
+    db_member_catree,
+    db_erase_catree,
+    db_erase_object_catree,
+    db_slot_catree,
+    db_select_chunk_catree,
+    db_select_catree,
+    db_select_delete_catree,
+    db_select_continue_catree,
+    db_select_delete_continue_catree,
+    db_select_count_catree,
+    db_select_count_continue_catree,
+    db_select_replace_catree,
+    db_select_replace_continue_catree,
+    db_take_catree,
+    db_delete_all_objects_catree,
+    db_free_table_catree,
+    db_free_table_continue_catree,
+    db_print_catree,
+    db_foreach_offheap_catree,
+    db_lookup_dbterm_catree,
+    db_finalize_dbterm_catree
+
+};
+
+/*
+ * Constants
+ */
+
+#define ERL_DB_CATREE_LOCK_FAILURE_CONTRIBUTION 200
+#define ERL_DB_CATREE_LOCK_SUCCESS_CONTRIBUTION (-1)
+#define ERL_DB_CATREE_LOCK_MORE_THAN_ONE_CONTRIBUTION (-10)
+#define ERL_DB_CATREE_HIGH_CONTENTION_LIMIT 1000
+#define ERL_DB_CATREE_LOW_CONTENTION_LIMIT (-1000)
+#define ERL_DB_CATREE_MAX_ROUTE_NODE_LAYER_HEIGHT 14
+
+/*
+ * Internal CA tree related helper functions and macros
+ */
+
+#define GET_ROUTE_NODE_KEY(node) (node->u.route.key.term)
+#define GET_BASE_NODE_LOCK(node) (&(node->u.base.lock))
+#define GET_ROUTE_NODE_LOCK(node) (&(node->u.route.lock))
+
+
+/* Helpers for reading and writing shared atomic variables */
+
+/* No memory barrier */
+#define GET_ROOT(tb) ((DbTableCATreeNode*)erts_atomic_read_nob(&((tb)->root)))
+#define GET_LEFT(ca_tree_route_node) ((DbTableCATreeNode*)erts_atomic_read_nob(&(ca_tree_route_node->u.route.left)))
+#define GET_RIGHT(ca_tree_route_node) ((DbTableCATreeNode*)erts_atomic_read_nob(&(ca_tree_route_node->u.route.right)))
+#define SET_ROOT(tb, v) erts_atomic_set_nob(&((tb)->root), (erts_aint_t)(v))
+#define SET_LEFT(ca_tree_route_node, v) erts_atomic_set_nob(&(ca_tree_route_node->u.route.left), (erts_aint_t)(v));
+#define SET_RIGHT(ca_tree_route_node, v) erts_atomic_set_nob(&(ca_tree_route_node->u.route.right), (erts_aint_t)(v));
+
+
+/* Release or acquire barriers */
+#define GET_ROOT_ACQB(tb) ((DbTableCATreeNode*)erts_atomic_read_acqb(&((tb)->root)))
+#define GET_LEFT_ACQB(ca_tree_route_node) ((DbTableCATreeNode*)erts_atomic_read_acqb(&(ca_tree_route_node->u.route.left)))
+#define GET_RIGHT_ACQB(ca_tree_route_node) ((DbTableCATreeNode*)erts_atomic_read_acqb(&(ca_tree_route_node->u.route.right)))
+#define SET_ROOT_RELB(tb, v) erts_atomic_set_relb(&((tb)->root), (erts_aint_t)(v))
+#define SET_LEFT_RELB(ca_tree_route_node, v) erts_atomic_set_relb(&(ca_tree_route_node->u.route.left), (erts_aint_t)(v));
+#define SET_RIGHT_RELB(ca_tree_route_node, v) erts_atomic_set_relb(&(ca_tree_route_node->u.route.right), (erts_aint_t)(v));
+
+/* Compares a key to the key in a route node */
+static ERTS_INLINE Sint cmp_key_route(Eterm key,
+                                      DbTableCATreeNode *obj)
+{
+    return CMP(key, GET_ROUTE_NODE_KEY(obj));
+}
+
+/*
+ * Used by the split_tree function
+ */
+static ERTS_INLINE
+int less_than_two_elements(TreeDbTerm *root)
+{
+    return root == NULL || (root->left == NULL && root->right == NULL);
+}
+
+/*
+ * Inserts a TreeDbTerm into a tree. Returns the new root.
+ */
+static ERTS_INLINE
+TreeDbTerm* insert_TreeDbTerm(DbTableCATree *tb,
+                              TreeDbTerm *insert_to_root,
+                              TreeDbTerm *value_to_insert) {
+    /* Non recursive insertion in AVL tree, building our own stack */
+    TreeDbTerm **tstack[STACK_NEED];
+    int tpos = 0;
+    int dstack[STACK_NEED+1];
+    int dpos = 0;
+    int state = 0;
+    TreeDbTerm * base = insert_to_root;
+    TreeDbTerm **this = &base;
+    Sint c;
+    Eterm key;
+    int dir;
+    TreeDbTerm *p1, *p2, *p;
+
+    key = GETKEY(tb, value_to_insert->dbterm.tpl);
+
+    dstack[dpos++] = DIR_END;
+    for (;;)
+	if (!*this) { /* Found our place */
+	    state = 1;
+	    *this = value_to_insert;
+	    (*this)->balance = 0;
+	    (*this)->left = (*this)->right = NULL;
+	    break;
+	} else if ((c = cmp_key(&tb->common, key, *this)) < 0) {
+	    /* go lefts */
+	    dstack[dpos++] = DIR_LEFT;
+	    tstack[tpos++] = this;
+	    this = &((*this)->left);
+	} else { /* go right */
+	    dstack[dpos++] = DIR_RIGHT;
+	    tstack[tpos++] = this;
+	    this = &((*this)->right);
+	}
+
+    while (state && ( dir = dstack[--dpos] ) != DIR_END) {
+	this = tstack[--tpos];
+	p = *this;
+	if (dir == DIR_LEFT) {
+	    switch (p->balance) {
+	    case 1:
+		p->balance = 0;
+		state = 0;
+		break;
+	    case 0:
+		p->balance = -1;
+		break;
+	    case -1: /* The icky case */
+		p1 = p->left;
+		if (p1->balance == -1) { /* Single LL rotation */
+		    p->left = p1->right;
+		    p1->right = p;
+		    p->balance = 0;
+		    (*this) = p1;
+		} else { /* Double RR rotation */
+		    p2 = p1->right;
+		    p1->right = p2->left;
+		    p2->left = p1;
+		    p->left = p2->right;
+		    p2->right = p;
+		    p->balance = (p2->balance == -1) ? +1 : 0;
+		    p1->balance = (p2->balance == 1) ? -1 : 0;
+		    (*this) = p2;
+		}
+		(*this)->balance = 0;
+		state = 0;
+		break;
+	    }
+	} else { /* dir == DIR_RIGHT */
+	    switch (p->balance) {
+	    case -1:
+		p->balance = 0;
+		state = 0;
+		break;
+	    case 0:
+		p->balance = 1;
+		break;
+	    case 1:
+		p1 = p->right;
+		if (p1->balance == 1) { /* Single RR rotation */
+		    p->right = p1->left;
+		    p1->left = p;
+		    p->balance = 0;
+		    (*this) = p1;
+		} else { /* Double RL rotation */
+		    p2 = p1->left;
+		    p1->left = p2->right;
+		    p2->right = p1;
+		    p->right = p2->left;
+		    p2->left = p;
+		    p->balance = (p2->balance == 1) ? -1 : 0;
+		    p1->balance = (p2->balance == -1) ? 1 : 0;
+		    (*this) = p2;
+		}
+		(*this)->balance = 0;
+		state = 0;
+		break;
+	    }
+	}
+    }
+    return base;
+}
+
+/*
+ * Split an AVL tree into two trees. The function stores the node
+ * containing the "split key" in the write back parameter
+ * split_key_wb. The function stores the left tree containing the keys
+ * that are smaller than the "split key" in the write back parameter
+ * left_wb and the tree containing the rest of the keys in the write
+ * back parameter right_wb.
+ */
+static void split_tree(DbTableCATree *tb,
+                       TreeDbTerm *root,
+                       TreeDbTerm **split_key_node_wb,
+                       TreeDbTerm **left_wb,
+                       TreeDbTerm **right_wb) {
+    TreeDbTerm * split_node = NULL;
+    TreeDbTerm * left_root;
+    TreeDbTerm * right_root;
+    if (root->left == NULL) { /* To get non empty split */
+        *right_wb = root->right;
+        *split_key_node_wb = root->right;
+        root->right = NULL;
+        root->balance = 0;
+        *left_wb = root;
+        return;
+    }
+    split_node = root;
+    left_root = split_node->left;
+    split_node->left = NULL;
+    right_root = split_node->right;
+    split_node->right = NULL;
+    right_root = insert_TreeDbTerm(tb, right_root, split_node);
+    *split_key_node_wb = split_node;
+    *left_wb = left_root;
+    *right_wb = right_root;
+}
+
+/*
+ * Used by the join_trees function
+ */
+static ERTS_INLINE int compute_tree_hight(TreeDbTerm * root)
+{
+    if(root == NULL) {
+        return 0;
+    } else {
+        TreeDbTerm * current_node = root;
+        int hight_so_far = 1;
+        while (current_node->left != NULL || current_node->right != NULL) {
+            if (current_node->balance == -1) {
+                current_node = current_node->left;
+            } else {
+                current_node = current_node->right;
+            }
+            hight_so_far = hight_so_far + 1;
+        }
+        return hight_so_far;
+    }
+}
+
+/*
+ * Used by the join_trees function
+ */
+static ERTS_INLINE
+TreeDbTerm* linkout_min_or_max_tree_node(TreeDbTerm **root, int is_min)
+{
+    TreeDbTerm **tstack[STACK_NEED];
+    int tpos = 0;
+    int dstack[STACK_NEED+1];
+    int dpos = 0;
+    int state = 0;
+    TreeDbTerm **this = root;
+    int dir;
+    TreeDbTerm *q = NULL;
+
+    dstack[dpos++] = DIR_END;
+    for (;;) {
+        if (!*this) { /* Failure */
+            return NULL;
+        } else if (is_min && (*this)->left != NULL) {
+            dstack[dpos++] = DIR_LEFT;
+            tstack[tpos++] = this;
+            this = &((*this)->left);
+        } else if (!is_min && (*this)->right != NULL) {
+            dstack[dpos++] = DIR_RIGHT;
+            tstack[tpos++] = this;
+            this = &((*this)->right);
+        } else { /* Min value, found the one to splice out */
+            q = (*this);
+            if (q->right == NULL) {
+                (*this) = q->left;
+                state = 1;
+            } else if (q->left == NULL) {
+                (*this) = q->right;
+                state = 1;
+            }
+            break;
+        }
+    }
+    while (state && ( dir = dstack[--dpos] ) != DIR_END) {
+        this = tstack[--tpos];
+        if (dir == DIR_LEFT) {
+            state = tree_balance_left(this);
+        } else {
+            state = tree_balance_right(this);
+        }
+    }
+    return q;
+}
+
+#define LINKOUT_MIN_TREE_NODE(root) linkout_min_or_max_tree_node(root, 1)
+#define LINKOUT_MAX_TREE_NODE(root) linkout_min_or_max_tree_node(root, 0)
+
+/*
+ * Joins two AVL trees where all the keys in the left one are smaller
+ * then the keys in the right one and returns the resulting tree.
+ *
+ * The algorithm is described on page 474 in D. E. Knuth. The Art of
+ * Computer Programming: Sorting and Searching,
+ * vol. 3. Addison-Wesley, 2nd edition, 1998.
+ */
+static TreeDbTerm* join_trees(TreeDbTerm *left_root_param,
+                               TreeDbTerm *right_root_param)
+{
+    TreeDbTerm **tstack[STACK_NEED];
+    int tpos = 0;
+    int dstack[STACK_NEED+1];
+    int dpos = 0;
+    int state = 1;
+    TreeDbTerm **this;
+    int dir;
+    TreeDbTerm *p1, *p2, *p;
+    TreeDbTerm *left_root = left_root_param;
+    TreeDbTerm *right_root = right_root_param;
+    int left_height;
+    int right_height;
+    int current_height;
+    dstack[dpos++] = DIR_END;
+    if (left_root == NULL) {
+        return right_root;
+    } else if (right_root == NULL) {
+        return left_root;
+    }
+
+    left_height = compute_tree_hight(left_root);
+    right_height = compute_tree_hight(right_root);
+    if (left_height >= right_height) {
+        TreeDbTerm * new_root =
+            LINKOUT_MIN_TREE_NODE(&right_root);
+        int new_right_height = compute_tree_hight(right_root);
+        TreeDbTerm * current_node = left_root;
+        this = &left_root;
+        current_height = left_height;
+        while(current_height > new_right_height + 1) {
+            if (current_node->balance == -1) {
+                current_height = current_height - 2;
+            } else {
+                current_height = current_height - 1;
+            }
+            dstack[dpos++] = DIR_RIGHT;
+            tstack[tpos++] = this;
+            this = &((*this)->right);
+            current_node = current_node->right;
+        }
+        new_root->left = current_node;
+        new_root->right = right_root;
+        new_root->balance = new_right_height - current_height;
+        *this = new_root;
+    } else {
+        /* This case is symmetric to the previous case */
+        TreeDbTerm * new_root =
+            LINKOUT_MAX_TREE_NODE(&left_root);
+        int new_left_height = compute_tree_hight(left_root);
+        TreeDbTerm * current_node = right_root;
+        this = &right_root;
+        current_height = right_height;
+        while (current_height > new_left_height + 1) {
+            if (current_node->balance == 1) {
+                current_height = current_height - 2;
+            } else {
+                current_height = current_height - 1;
+            }
+            dstack[dpos++] = DIR_LEFT;
+            tstack[tpos++] = this;
+            this = &((*this)->left);
+            current_node = current_node->left;
+        }
+        new_root->right = current_node;
+        new_root->left = left_root;
+        new_root->balance = current_height - new_left_height;
+        *this = new_root;
+    }
+    /* Now we need to continue as if this was during the insert */
+    while (state && ( dir = dstack[--dpos] ) != DIR_END) {
+        this = tstack[--tpos];
+        p = *this;
+        if (dir == DIR_LEFT) {
+            switch (p->balance) {
+            case 1:
+                p->balance = 0;
+                state = 0;
+                break;
+            case 0:
+                p->balance = -1;
+                break;
+            case -1: /* The icky case */
+                p1 = p->left;
+                if (p1->balance == -1) { /* Single LL rotation */
+                    p->left = p1->right;
+                    p1->right = p;
+                    p->balance = 0;
+                    (*this) = p1;
+                } else { /* Double RR rotation */
+                    p2 = p1->right;
+                    p1->right = p2->left;
+                    p2->left = p1;
+                    p->left = p2->right;
+                    p2->right = p;
+                    p->balance = (p2->balance == -1) ? +1 : 0;
+                    p1->balance = (p2->balance == 1) ? -1 : 0;
+                    (*this) = p2;
+                }
+                (*this)->balance = 0;
+                state = 0;
+                break;
+            }
+        } else { /* dir == DIR_RIGHT */
+            switch (p->balance) {
+            case -1:
+                p->balance = 0;
+                state = 0;
+                break;
+            case 0:
+                p->balance = 1;
+                break;
+            case 1:
+                p1 = p->right;
+                if (p1->balance == 1) { /* Single RR rotation */
+                    p->right = p1->left;
+                    p1->left = p;
+                    p->balance = 0;
+                    (*this) = p1;
+                } else { /* Double RL rotation */
+                    p2 = p1->left;
+                    p1->left = p2->right;
+                    p2->right = p1;
+                    p->right = p2->left;
+                    p2->left = p;
+                    p->balance = (p2->balance == 1) ? -1 : 0;
+                    p1->balance = (p2->balance == -1) ? 1 : 0;
+                    (*this) = p2;
+                }
+                (*this)->balance = 0;
+                state = 0;
+                break;
+            }
+        }
+    }
+    /* Return the joined tree */
+    if (left_height >= right_height) {
+        return left_root;
+    } else {
+        return right_root;
+    }
+}
+
+#ifdef DEBUG
+#  define PROVOKE_RANDOM_SPLIT_JOIN
+#endif
+#ifdef PROVOKE_RANDOM_SPLIT_JOIN
+static int dbg_fastrand(void)
+{
+    static int g_seed = 648835;
+    g_seed = (214013*g_seed+2531011);
+    return (g_seed>>16)&0x7FFF;
+}
+
+static void dbg_provoke_random_splitjoin(DbTableCATree* tb,
+                                         DbTableCATreeNode* base_node)
+{
+    if (tb->common.status & DB_CATREE_FORCE_SPLIT)
+        return;
+
+    switch (dbg_fastrand() % 8) {
+    case 1:
+        base_node->u.base.lock_statistics = 1+ERL_DB_CATREE_HIGH_CONTENTION_LIMIT;
+        break;
+    case 2:
+        base_node->u.base.lock_statistics = -1+ERL_DB_CATREE_LOW_CONTENTION_LIMIT;
+        break;
+    }
+}
+#else
+#  define dbg_provoke_random_splitjoin(T,N)
+#endif /* PROVOKE_RANDOM_SPLIT_JOIN */
+
+static ERTS_INLINE
+int try_wlock_base_node(DbTableCATreeBaseNode *base_node)
+{
+    return EBUSY == erts_rwmtx_tryrwlock(&base_node->lock);
+}
+
+/*
+ * Locks a base node without adjusting the lock statistics
+ */
+static ERTS_INLINE
+void wlock_base_node_no_stats(DbTableCATreeNode *base_node)
+{
+    ASSERT(base_node->is_base_node);
+    erts_rwmtx_rwlock(&base_node->u.base.lock);
+}
+
+/*
+ * Locks a base node and adjusts the lock statistics according to if
+ * the lock was contended or not
+ */
+static ERTS_INLINE
+void wlock_base_node(DbTableCATreeNode *base_node)
+{
+    ASSERT(base_node->is_base_node);
+    if (try_wlock_base_node(&base_node->u.base)) {
+        /* The lock is contended */
+        wlock_base_node_no_stats(base_node);
+        base_node->u.base.lock_statistics += ERL_DB_CATREE_LOCK_FAILURE_CONTRIBUTION;
+    } else {
+        base_node->u.base.lock_statistics += ERL_DB_CATREE_LOCK_SUCCESS_CONTRIBUTION;
+    }
+}
+
+static ERTS_INLINE
+void wunlock_base_node(DbTableCATreeNode *base_node)
+{
+    erts_rwmtx_rwunlock(&base_node->u.base.lock);
+}
+
+static ERTS_INLINE
+void wunlock_adapt_base_node(DbTableCATree* tb,
+                             DbTableCATreeNode* node,
+                             DbTableCATreeNode* parent,
+                             int current_level)
+{
+    dbg_provoke_random_splitjoin(tb,node);
+    if ((!node->u.base.root && parent && !(tb->common.status
+                                           & DB_CATREE_FORCE_SPLIT))
+        || node->u.base.lock_statistics < ERL_DB_CATREE_LOW_CONTENTION_LIMIT) {
+        join_catree(tb, node, parent);
+    }
+    else if (node->u.base.lock_statistics > ERL_DB_CATREE_HIGH_CONTENTION_LIMIT
+        && current_level < ERL_DB_CATREE_MAX_ROUTE_NODE_LAYER_HEIGHT) {
+        split_catree(tb, node, parent);
+    }
+    else {
+        wunlock_base_node(node);
+    }
+}
+
+static ERTS_INLINE
+void rlock_base_node(DbTableCATreeNode *base_node)
+{
+    ASSERT(base_node->is_base_node);
+    erts_rwmtx_rlock(&base_node->u.base.lock);
+}
+
+static ERTS_INLINE
+void runlock_base_node(DbTableCATreeNode *base_node)
+{
+    ASSERT(base_node->is_base_node);
+    erts_rwmtx_runlock(&base_node->u.base.lock);
+}
+
+static ERTS_INLINE
+void lock_route_node(DbTableCATreeNode *route_node)
+{
+    ASSERT(!route_node->is_base_node);
+    erts_mtx_lock(&route_node->u.route.lock);
+}
+
+static ERTS_INLINE
+void unlock_route_node(DbTableCATreeNode *route_node)
+{
+    ASSERT(!route_node->is_base_node);
+    erts_mtx_unlock(&route_node->u.route.lock);
+}
+
+static ERTS_INLINE
+Eterm copy_route_key(DbRouteKey* dst, Eterm key, Uint key_size)
+{
+    dst->size = key_size;
+    if (key_size != 0) {
+        Eterm* hp = &dst->heap[0];
+        ErlOffHeap tmp_offheap;
+        tmp_offheap.first  = NULL;
+        dst->term = copy_struct(key, key_size, &hp, &tmp_offheap);
+        dst->oh = tmp_offheap.first;
+    }
+    else {
+        ASSERT(is_immed(key));
+        dst->term = key;
+        dst->oh = NULL;
+    }
+    return dst->term;
+}
+
+static ERTS_INLINE
+void destroy_route_key(DbRouteKey* key)
+{
+    if (key->oh) {
+        ErlOffHeap oh;
+        oh.first = key->oh;
+        erts_cleanup_offheap(&oh);
+    }
+}
+
+static ERTS_INLINE
+void init_root_iterator(DbTableCATree* tb, CATreeRootIterator* iter,
+                        int read_only)
+{
+    iter->tb = tb;
+    iter->read_only = read_only;
+    iter->locked_bnode = NULL;
+    iter->next_route_key = THE_NON_VALUE;
+    iter->search_key = NULL;
+}
+
+static ERTS_INLINE
+void lock_iter_base_node(CATreeRootIterator* iter,
+                         DbTableCATreeNode *base_node,
+                         DbTableCATreeNode *parent,
+                         int current_level)
+{
+    ASSERT(!iter->locked_bnode);
+    if (iter->read_only)
+        rlock_base_node(base_node);
+    else {
+        wlock_base_node(base_node);
+        iter->bnode_parent = parent;
+        iter->bnode_level = current_level;
+    }
+    iter->locked_bnode = base_node;
+}
+
+static ERTS_INLINE
+void unlock_iter_base_node(CATreeRootIterator* iter)
+{
+    ASSERT(iter->locked_bnode);
+    if (iter->read_only)
+        runlock_base_node(iter->locked_bnode);
+    else if (iter->locked_bnode->u.base.is_valid) {
+        wunlock_adapt_base_node(iter->tb, iter->locked_bnode,
+                                iter->bnode_parent, iter->bnode_level);
+    }
+    else
+        wunlock_base_node(iter->locked_bnode);
+    iter->locked_bnode = NULL;
+}
+
+static ERTS_INLINE
+void destroy_root_iterator(CATreeRootIterator* iter)
+{
+    if (iter->locked_bnode)
+        unlock_iter_base_node(iter);
+    if (iter->search_key) {
+        destroy_route_key(iter->search_key);
+        erts_free(ERTS_ALC_T_DB_TMP, iter->search_key);
+    }
+}
+
+typedef struct
+{
+    DbTableCATreeNode *parent;
+    int current_level;
+} FindBaseNode;
+
+static ERTS_INLINE
+DbTableCATreeNode* find_base_node(DbTableCATree* tb, Eterm key,
+                                  FindBaseNode* fbn)
+{
+    DbTableCATreeNode* ERTS_RESTRICT node = GET_ROOT_ACQB(tb);
+    if (fbn) {
+        fbn->parent = NULL;
+        fbn->current_level = 0;
+    }
+    while (!node->is_base_node) {
+        if (fbn) {
+            fbn->current_level++;
+            fbn->parent = node;
+        }
+        if (cmp_key_route(key, node) < 0) {
+            node = GET_LEFT_ACQB(node);
+        } else {
+            node = GET_RIGHT_ACQB(node);
+        }
+    }
+    return node;
+}
+
+static ERTS_INLINE
+DbTableCATreeNode* find_rlock_valid_base_node(DbTableCATree* tb, Eterm key)
+{
+    DbTableCATreeNode* base_node;
+
+    while (1) {
+        base_node = find_base_node(tb, key, NULL);
+        rlock_base_node(base_node);
+        if (base_node->u.base.is_valid)
+            break;
+        runlock_base_node(base_node);
+    }
+    return base_node;
+}
+
+static ERTS_INLINE
+DbTableCATreeNode* find_wlock_valid_base_node(DbTableCATree* tb, Eterm key,
+                                              FindBaseNode* fbn)
+{
+    DbTableCATreeNode* base_node;
+
+    while (1) {
+        base_node = find_base_node(tb, key, fbn);
+        wlock_base_node(base_node);
+        if (base_node->u.base.is_valid)
+            break;
+        wunlock_base_node(base_node);
+    }
+    return base_node;
+}
+
+#ifdef ERTS_ENABLE_LOCK_CHECK
+#  define LC_ORDER(ORDER) ORDER
+#else
+#  define LC_ORDER(ORDER) NIL
+#endif
+
+#define sizeof_base_node() \
+          offsetof(DbTableCATreeNode, u.base.end_of_struct__)
+
+static DbTableCATreeNode *create_base_node(DbTableCATree *tb,
+                                           TreeDbTerm* root)
+{
+    DbTableCATreeNode *p;
+    erts_rwmtx_opt_t rwmtx_opt = ERTS_RWMTX_OPT_DEFAULT_INITER;
+    p = erts_db_alloc(ERTS_ALC_T_DB_TABLE, (DbTable *) tb,
+                      sizeof_base_node());
+
+    p->is_base_node = 1;
+    p->u.base.root = root;
+    if (tb->common.type & DB_FREQ_READ)
+        rwmtx_opt.type = ERTS_RWMTX_TYPE_FREQUENT_READ;
+    if (erts_ets_rwmtx_spin_count >= 0)
+        rwmtx_opt.main_spincount = erts_ets_rwmtx_spin_count;
+
+    erts_rwmtx_init_opt(&p->u.base.lock, &rwmtx_opt,
+                        "erl_db_catree_base_node",
+                        NIL,
+                        ERTS_LOCK_FLAGS_CATEGORY_DB);
+    p->u.base.lock_statistics = ((tb->common.status & DB_CATREE_FORCE_SPLIT)
+                                 ? INT_MAX : 0);
+    p->u.base.is_valid = 1;
+    return p;
+}
+
+static ERTS_INLINE Uint sizeof_route_node(Uint key_size)
+{
+    return (offsetof(DbTableCATreeNode, u.route.key.heap)
+            + key_size*sizeof(Eterm));
+}
+
+static DbTableCATreeNode*
+create_route_node(DbTableCATree *tb,
+                  DbTableCATreeNode *left,
+                  DbTableCATreeNode *right,
+                  DbTerm * keyTerm,
+                  DbTableCATreeNode* lc_parent)
+{
+    Eterm key = GETKEY(tb,keyTerm->tpl);
+    int key_size = size_object(key);
+    DbTableCATreeNode* p = erts_db_alloc(ERTS_ALC_T_DB_TABLE,
+                                         (DbTable *) tb,
+                                         sizeof_route_node(key_size));
+
+    copy_route_key(&p->u.route.key, key, key_size);
+    p->is_base_node = 0;
+    p->u.route.is_valid = 1;
+    erts_atomic_init_nob(&p->u.route.left, (erts_aint_t)left);
+    erts_atomic_init_nob(&p->u.route.right, (erts_aint_t)right);
+#ifdef ERTS_ENABLE_LOCK_CHECK
+    /* Route node lock order is inverse tree depth (from leafs toward root) */
+    p->u.route.lc_order = (lc_parent == NULL ? MAX_SMALL :
+                           lc_parent->u.route.lc_order - 1);
+    /*
+     * This assert may eventually fail as we don't increase 'lc_order' in join
+     * operations when route nodes move up in the tree.
+     * Tough luck if you run a lock-checking VM for such a long time on 32-bit.
+     */
+    ERTS_LC_ASSERT(p->u.route.lc_order >= 0);
+#endif
+    erts_mtx_init(&p->u.route.lock, "erl_db_catree_route_node",
+                  LC_ORDER(make_small(p->u.route.lc_order)),
+                  ERTS_LOCK_FLAGS_CATEGORY_DB);
+    return p;
+}
+
+static void do_free_base_node(void* vptr)
+{
+    DbTableCATreeNode *p = (DbTableCATreeNode *)vptr;
+    ASSERT(p->is_base_node);
+    erts_rwmtx_destroy(&p->u.base.lock);
+    erts_free(ERTS_ALC_T_DB_TABLE, p);
+}
+
+static void free_catree_base_node(DbTableCATree* tb, DbTableCATreeNode* p)
+{
+    ASSERT(p->is_base_node);
+    ERTS_DB_ALC_MEM_UPDATE_(tb, sizeof_base_node(), 0);
+    do_free_base_node(p);
+}
+
+static void do_free_route_node(void *vptr)
+{
+    DbTableCATreeNode *p = (DbTableCATreeNode *)vptr;
+    ASSERT(!p->is_base_node);
+    erts_mtx_destroy(&p->u.route.lock);
+    destroy_route_key(&p->u.route.key);
+    erts_free(ERTS_ALC_T_DB_TABLE, p);
+}
+
+static void free_catree_route_node(DbTableCATree* tb, DbTableCATreeNode* p)
+{
+    ASSERT(!p->is_base_node);
+    ERTS_DB_ALC_MEM_UPDATE_(tb, sizeof_route_node(p->u.route.key.size), 0);
+    do_free_route_node(p);
+}
+
+
+/*
+ * Returns the parent routing node of the specified
+ * route node 'child' if such a parent exists
+ * or NULL if 'child' is attached to the root.
+ */
+static ERTS_INLINE DbTableCATreeNode *
+parent_of(DbTableCATree *tb,
+          DbTableCATreeNode *child)
+{
+    Eterm key = GET_ROUTE_NODE_KEY(child);
+    DbTableCATreeNode *current = GET_ROOT_ACQB(tb);
+    DbTableCATreeNode *prev = NULL;
+
+    while (current != child) {
+        prev = current;
+        if (cmp_key_route(key, current) < 0) {
+            current = GET_LEFT_ACQB(current);
+        } else {
+            current = GET_RIGHT_ACQB(current);
+        }
+    }
+    return prev;
+}
+
+
+static ERTS_INLINE DbTableCATreeNode *
+leftmost_base_node(DbTableCATreeNode *root)
+{
+    DbTableCATreeNode *node = root;
+    while (!node->is_base_node) {
+        node = GET_LEFT_ACQB(node);
+    }
+    return node;
+}
+
+
+static ERTS_INLINE DbTableCATreeNode *
+rightmost_base_node(DbTableCATreeNode *root)
+{
+    DbTableCATreeNode *node = root;
+    while (!node->is_base_node) {
+        node = GET_RIGHT_ACQB(node);
+    }
+    return node;
+}
+
+
+static ERTS_INLINE DbTableCATreeNode *
+leftmost_route_node(DbTableCATreeNode *root)
+{
+    DbTableCATreeNode *node = root;
+    DbTableCATreeNode *prev_node = NULL;
+    while (!node->is_base_node) {
+        prev_node = node;
+        node = GET_LEFT_ACQB(node);
+    }
+    return prev_node;
+}
+
+static ERTS_INLINE DbTableCATreeNode*
+rightmost_route_node(DbTableCATreeNode *root)
+{
+    DbTableCATreeNode * node = root;
+    DbTableCATreeNode * prev_node = NULL;
+    while (!node->is_base_node) {
+        prev_node = node;
+        node = GET_RIGHT_ACQB(node);
+    }
+    return prev_node;
+}
+
+static ERTS_INLINE
+void init_tree_stack(DbTreeStack *stack,
+                     TreeDbTerm **stack_array,
+                     Uint init_slot)
+{
+    stack->array = stack_array;
+    stack->pos = 0;
+    stack->slot = init_slot;
+}
+
+static void join_catree(DbTableCATree *tb,
+                        DbTableCATreeNode *thiz,
+                        DbTableCATreeNode *parent)
+{
+    DbTableCATreeNode *gparent;
+    DbTableCATreeNode *neighbor;
+    DbTableCATreeNode *new_neighbor;
+    DbTableCATreeNode *neighbor_parent;
+
+    ASSERT(thiz->is_base_node);
+    if (parent == NULL) {
+        thiz->u.base.lock_statistics = 0;
+        wunlock_base_node(thiz);
+        return;
+    }
+    ASSERT(!parent->is_base_node);
+    if (GET_LEFT(parent) == thiz) {
+        neighbor = leftmost_base_node(GET_RIGHT_ACQB(parent));
+        if (try_wlock_base_node(&neighbor->u.base)) {
+            /* Failed to acquire lock */
+            thiz->u.base.lock_statistics = 0;
+            wunlock_base_node(thiz);
+            return;
+        } else if (!neighbor->u.base.is_valid) {
+            thiz->u.base.lock_statistics = 0;
+            wunlock_base_node(thiz);
+            wunlock_base_node(neighbor);
+            return;
+        } else {
+            lock_route_node(parent);
+            parent->u.route.is_valid = 0;
+            neighbor->u.base.is_valid = 0;
+            thiz->u.base.is_valid = 0;
+            gparent = NULL;
+            do {
+                if (gparent != NULL) {
+                    unlock_route_node(gparent);
+                }
+                gparent = parent_of(tb, parent);
+                if (gparent != NULL)
+                    lock_route_node(gparent);
+            } while (gparent != NULL && !gparent->u.route.is_valid);
+
+            if (gparent == NULL) {
+                SET_ROOT_RELB(tb, GET_RIGHT(parent));
+            } else if (GET_LEFT(gparent) == parent) {
+                SET_LEFT_RELB(gparent, GET_RIGHT(parent));
+            } else {
+                SET_RIGHT_RELB(gparent, GET_RIGHT(parent));
+            }
+            unlock_route_node(parent);
+            if (gparent != NULL) {
+                unlock_route_node(gparent);
+            }
+            {
+                TreeDbTerm* new_root = join_trees(thiz->u.base.root,
+                                                  neighbor->u.base.root);
+                new_neighbor = create_base_node(tb, new_root);
+            }
+            if (GET_RIGHT(parent) == neighbor) {
+                neighbor_parent = gparent;
+            } else {
+                neighbor_parent = leftmost_route_node(GET_RIGHT(parent));
+            }
+        }
+    } else { /* Symetric case */
+        ASSERT(GET_RIGHT(parent) == thiz);
+        neighbor = rightmost_base_node(GET_LEFT_ACQB(parent));
+        if (try_wlock_base_node(&neighbor->u.base)) {
+            /* Failed to acquire lock */
+            thiz->u.base.lock_statistics = 0;
+            wunlock_base_node(thiz);
+            return;
+        } else if (!neighbor->u.base.is_valid) {
+            thiz->u.base.lock_statistics = 0;
+            wunlock_base_node(thiz);
+            wunlock_base_node(neighbor);
+            return;
+        } else {
+            lock_route_node(parent);
+            parent->u.route.is_valid = 0;
+            neighbor->u.base.is_valid = 0;
+            thiz->u.base.is_valid = 0;
+            gparent = NULL;
+            do {
+                if (gparent != NULL) {
+                    unlock_route_node(gparent);
+                }
+                gparent = parent_of(tb, parent);
+                if (gparent != NULL) {
+                    lock_route_node(gparent);
+                } else {
+                    gparent = NULL;
+                }
+            } while (gparent != NULL && !gparent->u.route.is_valid);
+            if (gparent == NULL) {
+                SET_ROOT_RELB(tb, GET_LEFT(parent));
+            } else if (GET_RIGHT(gparent) == parent) {
+                SET_RIGHT_RELB(gparent, GET_LEFT(parent));
+            } else {
+                SET_LEFT_RELB(gparent, GET_LEFT(parent));
+            }
+            unlock_route_node(parent);
+            if (gparent != NULL) {
+                unlock_route_node(gparent);
+            }
+            {
+                TreeDbTerm* new_root = join_trees(neighbor->u.base.root,
+                                                  thiz->u.base.root);
+                new_neighbor = create_base_node(tb, new_root);
+            }
+            if (GET_LEFT(parent) == neighbor) {
+                neighbor_parent = gparent;
+            } else {
+                neighbor_parent =
+                    rightmost_route_node(GET_LEFT(parent));
+            }
+        }
+    }
+    /* Link in new neighbor and free nodes that are no longer in the tree */
+    if (neighbor_parent == NULL) {
+        SET_ROOT_RELB(tb, new_neighbor);
+    } else if (GET_LEFT(neighbor_parent) == neighbor) {
+        SET_LEFT_RELB(neighbor_parent, new_neighbor);
+    } else {
+        SET_RIGHT_RELB(neighbor_parent, new_neighbor);
+    }
+    wunlock_base_node(thiz);
+    wunlock_base_node(neighbor);
+    /* Free the parent and base */
+    erts_schedule_db_free(&tb->common,
+                          do_free_route_node,
+                          parent,
+                          &parent->u.route.free_item,
+                          sizeof_route_node(parent->u.route.key.size));
+    erts_schedule_db_free(&tb->common,
+                          do_free_base_node,
+                          thiz,
+                          &thiz->u.base.free_item,
+                          sizeof_base_node());
+    erts_schedule_db_free(&tb->common,
+                          do_free_base_node,
+                          neighbor,
+                          &neighbor->u.base.free_item,
+                          sizeof_base_node());
+}
+
+static void split_catree(DbTableCATree *tb,
+                         DbTableCATreeNode* ERTS_RESTRICT base,
+                         DbTableCATreeNode* ERTS_RESTRICT parent)
+{
+    TreeDbTerm *splitOutWriteBack;
+    DbTableCATreeNode* ERTS_RESTRICT new_left;
+    DbTableCATreeNode* ERTS_RESTRICT new_right;
+    DbTableCATreeNode* ERTS_RESTRICT new_route;
+
+    if (less_than_two_elements(base->u.base.root)) {
+        if (!(tb->common.status & DB_CATREE_FORCE_SPLIT))
+            base->u.base.lock_statistics = 0;
+        wunlock_base_node(base);
+        return;
+    } else {
+        TreeDbTerm *left_tree;
+        TreeDbTerm *right_tree;
+
+        split_tree(tb, base->u.base.root, &splitOutWriteBack,
+                   &left_tree, &right_tree);
+
+        new_left = create_base_node(tb, left_tree);
+        new_right = create_base_node(tb, right_tree);
+        new_route = create_route_node(tb,
+                                      new_left,
+                                      new_right,
+                                      &splitOutWriteBack->dbterm,
+                                      parent);
+        if (parent == NULL) {
+            SET_ROOT_RELB(tb, new_route);
+        } else if(GET_LEFT(parent) == base) {
+            SET_LEFT_RELB(parent, new_route);
+        } else {
+            SET_RIGHT_RELB(parent, new_route);
+        }
+        base->u.base.is_valid = 0;
+        wunlock_base_node(base);
+        erts_schedule_db_free(&tb->common,
+                              do_free_base_node,
+                              base,
+                              &base->u.base.free_item,
+                              sizeof_base_node());
+    }
+}
+
+/*
+ * Helper functions for removing the table
+ */
+
+static void catree_add_base_node_to_free_list(
+        DbTableCATree *tb,
+        DbTableCATreeNode *base_node_container)
+{
+    base_node_container->u.base.next =
+        tb->base_nodes_to_free_list;
+    tb->base_nodes_to_free_list = base_node_container;
+}
+
+static void catree_deque_base_node_from_free_list(DbTableCATree *tb)
+{
+    if (tb->base_nodes_to_free_list == NULL) {
+        return; /* List empty */
+    } else {
+        DbTableCATreeNode *first = tb->base_nodes_to_free_list;
+        tb->base_nodes_to_free_list = first->u.base.next;
+    }
+}
+
+static DbTableCATreeNode *catree_first_base_node_from_free_list(
+        DbTableCATree *tb)
+{
+    return tb->base_nodes_to_free_list;
+}
+
+static SWord do_free_routing_nodes_catree_cont(DbTableCATree *tb, SWord num_left)
+{
+    DbTableCATreeNode *root;
+    DbTableCATreeNode *p;
+    for (;;) {
+        root = POP_NODE(&tb->free_stack_rnodes);
+    	if (root == NULL) break;
+        else if(root->is_base_node) {
+            catree_add_base_node_to_free_list(tb, root);
+            break;
+        }
+    	for (;;) {
+            if ((GET_LEFT(root) != NULL) &&
+                (p = GET_LEFT(root))->is_base_node) {
+                SET_LEFT(root, NULL);
+                catree_add_base_node_to_free_list(tb, p);
+            } else if ((GET_RIGHT(root) != NULL) &&
+                       (p = GET_RIGHT(root))->is_base_node) {
+                SET_RIGHT(root, NULL);
+                catree_add_base_node_to_free_list(tb, p);
+            } else if ((p = GET_LEFT(root)) != NULL) {
+                SET_LEFT(root, NULL);
+                PUSH_NODE(&tb->free_stack_rnodes, root);
+                root = p;
+            } else if ((p = GET_RIGHT(root)) != NULL) {
+                SET_RIGHT(root, NULL);
+                PUSH_NODE(&tb->free_stack_rnodes, root);
+                root = p;
+            } else {
+                free_catree_route_node(tb, root);
+                if (--num_left >= 0) {
+                    break;
+                } else {
+                    return num_left;	/* Done enough for now */
+                }
+            }
+        }
+    }
+    return num_left;
+}
+
+static SWord do_free_base_node_cont(DbTableCATree *tb, SWord num_left)
+{
+    TreeDbTerm *root;
+    TreeDbTerm *p;
+    DbTableCATreeNode *base_node_container =
+        catree_first_base_node_from_free_list(tb);
+    for (;;) {
+        root = POP_NODE(&tb->free_stack_elems);
+        if (root == NULL) break;
+        for (;;) {
+            if ((p = root->left) != NULL) {
+                root->left = NULL;
+                PUSH_NODE(&tb->free_stack_elems, root);
+                root = p;
+            } else if ((p = root->right) != NULL) {
+                root->right = NULL;
+                PUSH_NODE(&tb->free_stack_elems, root);
+                root = p;
+            } else {
+                free_term((DbTable*)tb, root);
+                if (--num_left >= 0) {
+                    break;
+                } else {
+                    return num_left;	/* Done enough for now */
+                }
+            }
+        }
+    }
+    catree_deque_base_node_from_free_list(tb);
+    free_catree_base_node(tb, base_node_container);
+    base_node_container = catree_first_base_node_from_free_list(tb);
+    if (base_node_container != NULL) {
+        PUSH_NODE(&tb->free_stack_elems, base_node_container->u.base.root);
+    }
+    return num_left;
+}
+
+
+/*
+** Initialization function
+*/
+
+void db_initialize_catree(void)
+{
+    return;
+};
+
+/*
+** Table interface routines (i.e., what's called by the bif's)
+*/
+
+int db_create_catree(Process *p, DbTable *tbl)
+{
+    DbTableCATree *tb = &tbl->catree;
+    DbTableCATreeNode *root;
+
+    root = create_base_node(tb, NULL);
+    tb->deletion = 0;
+    tb->base_nodes_to_free_list = NULL;
+    erts_atomic_init_relb(&(tb->root), (erts_aint_t)root);
+    return DB_ERROR_NONE;
+}
+
+static int db_first_catree(Process *p, DbTable *tbl, Eterm *ret)
+{
+    TreeDbTerm *root;
+    CATreeRootIterator iter;
+    int result;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    root = *catree_find_first_root(&iter);
+    if (!root) {
+        TreeDbTerm **pp = catree_find_next_root(&iter, NULL);
+        root = pp ? *pp : NULL;
+    }
+
+    result = db_first_tree_common(p, tbl, root, ret, NULL);
+
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_next_catree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTreeStack stack;
+    TreeDbTerm * stack_array[STACK_NEED];
+    TreeDbTerm **rootp;
+    CATreeRootIterator iter;
+    int result;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    iter.next_route_key = key;
+    rootp = catree_find_next_root(&iter, NULL);
+
+    do {
+        init_tree_stack(&stack, stack_array, 0);
+        result = db_next_tree_common(p, tbl, (rootp ? *rootp : NULL), key, ret, &stack);
+        if (result != DB_ERROR_NONE || *ret != am_EOT)
+            break;
+
+        rootp = catree_find_next_root(&iter, NULL);
+    } while (rootp);
+
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_last_catree(Process *p, DbTable *tbl, Eterm *ret)
+{
+    TreeDbTerm *root;
+    CATreeRootIterator iter;
+    int result;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    root = *catree_find_last_root(&iter);
+    if (!root) {
+        TreeDbTerm **pp = catree_find_prev_root(&iter, NULL);
+        root = pp ? *pp : NULL;
+    }
+
+    result = db_last_tree_common(p, tbl, root, ret, NULL);
+
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_prev_catree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTreeStack stack;
+    TreeDbTerm * stack_array[STACK_NEED];
+    TreeDbTerm **rootp;
+    CATreeRootIterator iter;
+    int result;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    iter.next_route_key = key;
+    rootp = catree_find_prev_root(&iter, NULL);
+
+    do {
+        init_tree_stack(&stack, stack_array, 0);
+        result = db_prev_tree_common(p, tbl, (rootp ? *rootp : NULL), key, ret,
+                                     &stack);
+        if (result != DB_ERROR_NONE || *ret != am_EOT)
+            break;
+        rootp = catree_find_prev_root(&iter, NULL);
+    } while (rootp);
+
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_put_catree(DbTable *tbl, Eterm obj, int key_clash_fail)
+{
+    DbTableCATree *tb = &tbl->catree;
+    Eterm key = GETKEY(&tb->common, tuple_val(obj));
+    FindBaseNode fbn;
+    DbTableCATreeNode* node = find_wlock_valid_base_node(tb, key, &fbn);
+    int result = db_put_tree_common(&tb->common, &node->u.base.root, obj,
+                                    key_clash_fail, NULL);
+    wunlock_adapt_base_node(tb, node, fbn.parent, fbn.current_level);
+    return result;
+}
+
+static int db_get_catree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    DbTableCATreeNode* node = find_rlock_valid_base_node(tb, key);
+    int result = db_get_tree_common(p, &tb->common,
+                                    node->u.base.root,
+                                    key, ret, NULL);
+    runlock_base_node(node);
+    return result;
+}
+
+TreeDbTerm** catree_find_root(Eterm key, CATreeRootIterator* iter)
+{
+    FindBaseNode fbn;
+    DbTableCATreeNode* base_node;
+
+    while (1) {
+        base_node = find_base_node(iter->tb, key, &fbn);
+        lock_iter_base_node(iter, base_node, fbn.parent, fbn.current_level);
+        if (base_node->u.base.is_valid)
+            break;
+        unlock_iter_base_node(iter);
+    }
+    return &base_node->u.base.root;
+}
+
+static Eterm save_iter_search_key(CATreeRootIterator* iter, Eterm key)
+{
+    Uint key_size;
+
+    if (is_immed(key))
+        return key;
+
+    if (iter->search_key) {
+        if (key == iter->search_key->term)
+            return key; /* already saved */
+        destroy_route_key(iter->search_key);
+    }
+    key_size = size_object(key);
+    if (!iter->search_key || key_size > iter->search_key->size) {
+        iter->search_key = erts_realloc(ERTS_ALC_T_DB_TMP,
+                                        iter->search_key,
+                                        (offsetof(DbRouteKey, heap)
+                                         + key_size*sizeof(Eterm)));
+    }
+    return copy_route_key(iter->search_key, key, key_size);
+}
+
+TreeDbTerm** catree_find_nextprev_root(CATreeRootIterator *iter,
+                                       int forward,
+                                       Eterm *search_keyp)
+{
+#ifdef DEBUG
+    DbTableCATreeNode *rejected_invalid = NULL;
+    DbTableCATreeNode *rejected_empty = NULL;
+#endif
+    DbTableCATreeNode *node;
+    DbTableCATreeNode *parent;
+    DbTableCATreeNode* next_route_node;
+    Eterm route_key = iter->next_route_key;
+    int current_level;
+
+    if (iter->locked_bnode) {
+        if (search_keyp)
+            *search_keyp = save_iter_search_key(iter, *search_keyp);
+        unlock_iter_base_node(iter);
+    }
+
+    if (is_non_value(route_key))
+        return NULL;
+
+    while (1) {
+        node = GET_ROOT_ACQB(iter->tb);
+        current_level = 0;
+        parent = NULL;
+        next_route_node = NULL;
+        while (!node->is_base_node) {
+            current_level++;
+            parent = node;
+            if (forward) {
+                if (cmp_key_route(route_key,node) < 0) {
+                    next_route_node = node;
+                    node = GET_LEFT_ACQB(node);
+                } else {
+                    node = GET_RIGHT_ACQB(node);
+                }
+            }
+            else {
+                if (cmp_key_route(route_key,node) > 0) {
+                    next_route_node = node;
+                    node = GET_RIGHT_ACQB(node);
+                } else {
+                    node = GET_LEFT_ACQB(node);
+                }
+            }
+        }
+        ASSERT(node != rejected_invalid);
+        lock_iter_base_node(iter, node, parent, current_level);
+        if (node->u.base.is_valid) {
+            ASSERT(node != rejected_empty);
+            if (node->u.base.root) {
+                iter->next_route_key = (next_route_node ?
+                                        next_route_node->u.route.key.term :
+                                        THE_NON_VALUE);
+                iter->locked_bnode = node;
+                return &node->u.base.root;
+            }
+            if (!next_route_node) {
+                unlock_iter_base_node(iter);
+                return NULL;
+            }
+            route_key = next_route_node->u.route.key.term;
+            IF_DEBUG(rejected_empty = node);
+        }
+        else
+            IF_DEBUG(rejected_invalid = node);
+
+        /* Retry */
+        unlock_iter_base_node(iter);
+    }
+}
+
+TreeDbTerm** catree_find_next_root(CATreeRootIterator *iter, Eterm* keyp)
+{
+    return catree_find_nextprev_root(iter, 1, keyp);
+}
+
+TreeDbTerm** catree_find_prev_root(CATreeRootIterator *iter, Eterm* keyp)
+{
+    return catree_find_nextprev_root(iter, 0, keyp);
+}
+
+/* @brief Find root of tree where object with smallest key of all larger than
+ * partially bound key may reside. Can be used as a starting point for
+ * a reverse iteration with pb_key.
+ *
+ * @param pb_key The partially bound key. Example {42, '$1'}
+ * @param iter An initialized root iterator.
+ *
+ * @return Pointer to found root pointer. May not be NULL.
+ */
+TreeDbTerm** catree_find_next_from_pb_key_root(Eterm pb_key,
+                                               CATreeRootIterator* iter)
+{
+#ifdef DEBUG
+    DbTableCATreeNode *rejected_base = NULL;
+#endif
+    DbTableCATreeNode *node;
+    DbTableCATreeNode *parent;
+    DbTableCATreeNode* next_route_node;
+    int current_level;
+
+    ASSERT(!iter->locked_bnode);
+
+    while (1) {
+        node = GET_ROOT_ACQB(iter->tb);
+        current_level = 0;
+        parent = NULL;
+        next_route_node = NULL;
+        while (!node->is_base_node) {
+            current_level++;
+            parent = node;
+            if (cmp_partly_bound(pb_key, GET_ROUTE_NODE_KEY(node)) >= 0) {
+                next_route_node = node;
+                node = GET_RIGHT_ACQB(node);
+            } else {
+                node = GET_LEFT_ACQB(node);
+            }
+        }
+        ASSERT(node != rejected_base);
+        lock_iter_base_node(iter, node, parent, current_level);
+        if (node->u.base.is_valid) {
+            iter->next_route_key = (next_route_node ?
+                                    next_route_node->u.route.key.term :
+                                    THE_NON_VALUE);
+            return &node->u.base.root;
+        }
+        /* Retry */
+        unlock_iter_base_node(iter);
+#ifdef DEBUG
+        rejected_base = node;
+#endif
+    }
+}
+
+/* @brief Find root of tree where object with largest key of all smaller than
+ * partially bound key may reside. Can be used as a starting point for
+ * a forward iteration with pb_key.
+ *
+ * @param pb_key The partially bound key. Example {42, '$1'}
+ * @param iter An initialized root iterator.
+ *
+ * @return Pointer to found root pointer. May not be NULL.
+ */
+TreeDbTerm** catree_find_prev_from_pb_key_root(Eterm key,
+                                               CATreeRootIterator* iter)
+{
+#ifdef DEBUG
+    DbTableCATreeNode *rejected_base = NULL;
+#endif
+    DbTableCATreeNode *node;
+    DbTableCATreeNode *parent;
+    DbTableCATreeNode* next_route_node;
+    int current_level;
+
+    ASSERT(!iter->locked_bnode);
+
+    while (1) {
+        node = GET_ROOT_ACQB(iter->tb);
+        current_level = 0;
+        parent = NULL;
+        next_route_node = NULL;
+        while (!node->is_base_node) {
+            current_level++;
+            parent = node;
+            if (cmp_partly_bound(key, GET_ROUTE_NODE_KEY(node)) <= 0) {
+                next_route_node = node;
+                node = GET_LEFT_ACQB(node);
+            } else {
+                node = GET_RIGHT_ACQB(node);
+            }
+        }
+        ASSERT(node != rejected_base);
+        lock_iter_base_node(iter, node, parent, current_level);
+        if (node->u.base.is_valid) {
+            iter->next_route_key = (next_route_node ?
+                                    next_route_node->u.route.key.term :
+                                    THE_NON_VALUE);
+            return &node->u.base.root;
+        }
+        /* Retry */
+        unlock_iter_base_node(iter);
+#ifdef DEBUG
+        rejected_base = node;
+#endif
+    }
+}
+
+static TreeDbTerm** catree_find_firstlast_root(CATreeRootIterator* iter,
+                                               int first)
+{
+#ifdef DEBUG
+    DbTableCATreeNode *rejected_base = NULL;
+#endif
+    DbTableCATreeNode *node;
+    DbTableCATreeNode* next_route_node;
+    int current_level;
+
+    while (1) {
+        node = GET_ROOT_ACQB(iter->tb);
+        current_level = 0;
+        next_route_node = NULL;
+        while (!node->is_base_node) {
+            current_level++;
+            next_route_node = node;
+            node = first ? GET_LEFT_ACQB(node) : GET_RIGHT_ACQB(node);
+        }
+        ASSERT(node != rejected_base);
+        lock_iter_base_node(iter, node, next_route_node, current_level);
+        if (node->u.base.is_valid) {
+            iter->next_route_key = (next_route_node ?
+                                    next_route_node->u.route.key.term :
+                                    THE_NON_VALUE);
+            return &node->u.base.root;
+        }
+        /* Retry */
+        unlock_iter_base_node(iter);
+#ifdef DEBUG
+        rejected_base = node;
+#endif
+    }
+}
+
+TreeDbTerm** catree_find_first_root(CATreeRootIterator* iter)
+{
+    return catree_find_firstlast_root(iter, 1);
+}
+
+TreeDbTerm** catree_find_last_root(CATreeRootIterator* iter)
+{
+    return catree_find_firstlast_root(iter, 0);
+}
+
+static int db_member_catree(DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    DbTableCATreeNode* node = find_rlock_valid_base_node(tb, key);
+    int result = db_member_tree_common(&tb->common,
+                                       node->u.base.root,
+                                       key, ret, NULL);
+    runlock_base_node(node);
+    return result;
+}
+
+static int db_get_element_catree(Process *p, DbTable *tbl,
+			       Eterm key, int ndex, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    DbTableCATreeNode* node = find_rlock_valid_base_node(tb, key);
+    int result = db_get_element_tree_common(p, &tb->common,
+                                            node->u.base.root,
+                                            key, ndex, ret, NULL);
+    runlock_base_node(node);
+    return result;
+}
+
+static int db_erase_catree(DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    FindBaseNode fbn;
+    DbTableCATreeNode* node = find_wlock_valid_base_node(tb, key, &fbn);
+    int result = db_erase_tree_common(tbl, &node->u.base.root, key,
+                                      ret, NULL);
+    wunlock_adapt_base_node(tb, node, fbn.parent, fbn.current_level);
+    return result;
+}
+
+static int db_erase_object_catree(DbTable *tbl, Eterm object, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    Eterm key = GETKEY(&tb->common, tuple_val(object));
+    FindBaseNode fbn;
+    DbTableCATreeNode* node = find_wlock_valid_base_node(tb, key, &fbn);
+    int result = db_erase_object_tree_common(tbl,
+                                             &node->u.base.root,
+                                             object,
+                                             ret,
+                                             NULL);
+    wunlock_adapt_base_node(tb, node, fbn.parent, fbn.current_level);
+    return result;
+}
+
+
+static int db_slot_catree(Process *p, DbTable *tbl,
+                          Eterm slot_term, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_slot_tree_common(p, tbl, *catree_find_first_root(&iter),
+                                 slot_term, ret, NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_continue_catree(Process *p,
+                                     DbTable *tbl,
+                                     Eterm continuation,
+                                     Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_select_continue_tree_common(p, &tbl->common,
+                                            continuation, ret, NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_catree(Process *p, DbTable *tbl, Eterm tid,
+                            Eterm pattern, int reverse, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_select_tree_common(p, tbl, tid, pattern, reverse, ret,
+                                   NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_count_continue_catree(Process *p,
+                                           DbTable *tbl,
+                                           Eterm continuation,
+                                           Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_select_count_continue_tree_common(p, tbl,
+                                                  continuation, ret, NULL,
+                                                  &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_count_catree(Process *p, DbTable *tbl, Eterm tid,
+                                  Eterm pattern, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_select_count_tree_common(p, tbl,
+                                         tid, pattern, ret, NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_chunk_catree(Process *p, DbTable *tbl, Eterm tid,
+                                  Eterm pattern, Sint chunk_size,
+                                  int reversed, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    result = db_select_chunk_tree_common(p, tbl,
+                                         tid, pattern, chunk_size, reversed, ret,
+                                         NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_delete_continue_catree(Process *p,
+                                            DbTable *tbl,
+                                            Eterm continuation,
+                                            Eterm *ret)
+{
+    DbTreeStack stack;
+    TreeDbTerm * stack_array[STACK_NEED];
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 0);
+    init_tree_stack(&stack, stack_array, 0);
+    result = db_select_delete_continue_tree_common(p, tbl, continuation, ret,
+                                                   &stack, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_delete_catree(Process *p, DbTable *tbl, Eterm tid,
+                                   Eterm pattern, Eterm *ret)
+{
+    DbTreeStack stack;
+    TreeDbTerm * stack_array[STACK_NEED];
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 0);
+    init_tree_stack(&stack, stack_array, 0);
+    result = db_select_delete_tree_common(p, tbl,
+                                          tid, pattern, ret, &stack,
+                                          &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_replace_catree(Process *p, DbTable *tbl, Eterm tid,
+                                    Eterm pattern, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 0);
+    result = db_select_replace_tree_common(p, tbl,
+                                           tid, pattern, ret, NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_select_replace_continue_catree(Process *p, DbTable *tbl,
+                                             Eterm continuation, Eterm *ret)
+{
+    int result;
+    CATreeRootIterator iter;
+
+    init_root_iterator(&tbl->catree, &iter, 0);
+    result = db_select_replace_continue_tree_common(p, tbl, continuation, ret,
+                                                    NULL, &iter);
+    destroy_root_iterator(&iter);
+    return result;
+}
+
+static int db_take_catree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableCATree *tb = &tbl->catree;
+    FindBaseNode fbn;
+    DbTableCATreeNode* node = find_wlock_valid_base_node(tb, key, &fbn);
+    int result = db_take_tree_common(p, tbl, &node->u.base.root, key,
+                                     ret, NULL);
+    wunlock_adapt_base_node(tb, node, fbn.parent, fbn.current_level);
+    return result;
+}
+
+/*
+** Other interface routines (not directly coupled to one bif)
+*/
+
+
+/* Display tree contents (for dump) */
+static void db_print_catree(fmtfn_t to, void *to_arg,
+                            int show, DbTable *tbl)
+{
+    CATreeRootIterator iter;
+    TreeDbTerm** root;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    root = catree_find_first_root(&iter);
+    do {
+        db_print_tree_common(to, to_arg, show, *root, tbl);
+        root = catree_find_next_root(&iter, NULL);
+    } while (root);
+    destroy_root_iterator(&iter);
+}
+
+/* Release all memory occupied by a single table */
+static int db_free_table_catree(DbTable *tbl)
+{
+    while (db_free_table_continue_catree(tbl, ERTS_SWORD_MAX) < 0)
+	;
+    return 1;
+}
+
+static SWord db_free_table_continue_catree(DbTable *tbl, SWord reds)
+{
+    DbTableCATreeNode *first_base_node;
+    DbTableCATree *tb = &tbl->catree;
+    if (!tb->deletion) {
+        tb->deletion = 1;
+        tb->free_stack_elems.array =
+            erts_db_alloc(ERTS_ALC_T_DB_STK,
+                          (DbTable *) tb,
+                          sizeof(TreeDbTerm *) * STACK_NEED);
+        tb->free_stack_elems.pos = 0;
+        tb->free_stack_elems.slot = 0;
+        tb->free_stack_rnodes.array =
+            erts_db_alloc(ERTS_ALC_T_DB_STK,
+                          (DbTable *) tb,
+                          sizeof(DbTableCATreeNode *) * STACK_NEED);
+        tb->free_stack_rnodes.pos = 0;
+        tb->free_stack_rnodes.size = STACK_NEED;
+        PUSH_NODE(&tb->free_stack_rnodes, GET_ROOT(tb));
+        tb->is_routing_nodes_freed = 0;
+        tb->base_nodes_to_free_list = NULL;
+    }
+    if ( ! tb->is_routing_nodes_freed ) {
+        reds = do_free_routing_nodes_catree_cont(tb, reds);
+        if (reds < 0) {
+            return reds; /* Not finished */
+        } else {
+            tb->is_routing_nodes_freed = 1; /* Ready with the routing nodes */
+            first_base_node = catree_first_base_node_from_free_list(tb);
+            PUSH_NODE(&tb->free_stack_elems, first_base_node->u.base.root);
+        }
+    }
+    while (catree_first_base_node_from_free_list(tb) != NULL) {
+        reds = do_free_base_node_cont(tb, reds);
+        if (reds < 0) {
+            return reds; /* Continue later */
+        }
+    }
+    /* Time to free the main structure*/
+    erts_db_free(ERTS_ALC_T_DB_STK,
+                 (DbTable *) tb,
+                 (void *) tb->free_stack_elems.array,
+                 sizeof(TreeDbTerm *) * STACK_NEED);
+    erts_db_free(ERTS_ALC_T_DB_STK,
+                 (DbTable *) tb,
+                 (void *) tb->free_stack_rnodes.array,
+                 sizeof(DbTableCATreeNode *) * STACK_NEED);
+    return 1;
+}
+
+static SWord db_delete_all_objects_catree(Process* p, DbTable* tbl, SWord reds)
+{
+    reds = db_free_table_continue_catree(tbl, reds);
+    if (reds < 0)
+        return reds;
+    db_create_catree(p, tbl);
+    erts_atomic_set_nob(&tbl->catree.common.nitems, 0);
+    return reds;
+}
+
+
+static void do_for_route_nodes(DbTableCATreeNode* node,
+                               void (*func)(ErlOffHeap *, void *),
+                               void *arg)
+{
+    ErlOffHeap tmp_offheap;
+
+    if (!GET_LEFT(node)->is_base_node)
+        do_for_route_nodes(GET_LEFT(node), func, arg);
+
+    tmp_offheap.first = node->u.route.key.oh;
+    tmp_offheap.overhead = 0;
+    (*func)(&tmp_offheap, arg);
+
+    if (!GET_RIGHT(node)->is_base_node)
+        do_for_route_nodes(GET_RIGHT(node), func, arg);
+}
+
+static void db_foreach_offheap_catree(DbTable *tbl,
+                                      void (*func)(ErlOffHeap *, void *),
+                                      void *arg)
+{
+    CATreeRootIterator iter;
+    TreeDbTerm** root;
+
+    init_root_iterator(&tbl->catree, &iter, 1);
+    root = catree_find_first_root(&iter);
+    do {
+        db_foreach_offheap_tree_common(*root, func, arg);
+        root = catree_find_next_root(&iter, NULL);
+    } while (root);
+    destroy_root_iterator(&iter);
+
+    do_for_route_nodes(GET_ROOT(&tbl->catree), func, arg);
+}
+
+static int db_lookup_dbterm_catree(Process *p, DbTable *tbl, Eterm key, Eterm obj,
+                                   DbUpdateHandle *handle)
+{
+    DbTableCATree *tb = &tbl->catree;
+    FindBaseNode fbn;
+    DbTableCATreeNode* node = find_wlock_valid_base_node(tb, key, &fbn);
+    int res = db_lookup_dbterm_tree_common(p, tbl, &node->u.base.root, key,
+                                           obj, handle, NULL);
+    if (res == 0) {
+        wunlock_adapt_base_node(tb, node, fbn.parent, fbn.current_level);
+    } else {
+        /* db_finalize_dbterm_catree will unlock */
+        handle->u.catree.base_node = node;
+        handle->u.catree.parent = fbn.parent;
+        handle->u.catree.current_level = fbn.current_level;
+    }
+    return res;
+}
+
+static void db_finalize_dbterm_catree(int cret, DbUpdateHandle *handle)
+{
+    DbTableCATree *tb = &(handle->tb->catree);
+    db_finalize_dbterm_tree_common(cret, handle, NULL);
+    wunlock_adapt_base_node(tb, handle->u.catree.base_node,
+                            handle->u.catree.parent,
+                            handle->u.catree.current_level);
+    return;
+}
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
+static void erts_lcnt_enable_db_catree_lock_count_helper(DbTableCATree *tb,
+                                                         DbTableCATreeNode *node,
+                                                         int enable)
+{
+    erts_lcnt_ref_t *lcnt_ref;
+    erts_lock_flags_t lock_type;
+    if (node->is_base_node) {
+        lcnt_ref = &GET_BASE_NODE_LOCK(node)->lcnt;
+        lock_type = ERTS_LOCK_TYPE_RWMUTEX;
+    } else {
+        erts_lcnt_enable_db_catree_lock_count_helper(tb, GET_LEFT(node), enable);
+        erts_lcnt_enable_db_catree_lock_count_helper(tb, GET_RIGHT(node), enable);
+        lcnt_ref = &GET_ROUTE_NODE_LOCK(node)->lcnt;
+        lock_type = ERTS_LOCK_TYPE_MUTEX;
+    }
+    if (enable) {
+        erts_lcnt_install_new_lock_info(lcnt_ref, "db_hash_slot", tb->common.the_name,
+                                        lock_type | ERTS_LOCK_FLAGS_CATEGORY_DB);
+    } else {
+        erts_lcnt_uninstall(lcnt_ref);
+    }
+}
+
+void erts_lcnt_enable_db_catree_lock_count(DbTableCATree *tb, int enable)
+{
+    erts_lcnt_enable_db_catree_lock_count_helper(tb, GET_ROOT(tb), enable);
+}
+#endif /* ERTS_ENABLE_LOCK_COUNT */
+
+void db_catree_force_split(DbTableCATree* tb, int on)
+{
+    CATreeRootIterator iter;
+    TreeDbTerm** root;
+
+    init_root_iterator(tb, &iter, 1);
+    root = catree_find_first_root(&iter);
+    do {
+        iter.locked_bnode->u.base.lock_statistics = (on ? INT_MAX : 0);
+        root = catree_find_next_root(&iter, NULL);
+    } while (root);
+    destroy_root_iterator(&iter);
+
+    if (on)
+        tb->common.status |= DB_CATREE_FORCE_SPLIT;
+    else
+        tb->common.status &= ~DB_CATREE_FORCE_SPLIT;
+}
+
+void db_calc_stats_catree(DbTableCATree* tb, DbCATreeStats* stats)
+{
+    DbTableCATreeNode* stack[ERL_DB_CATREE_MAX_ROUTE_NODE_LAYER_HEIGHT];
+    DbTableCATreeNode* node;
+    Uint depth = 0;
+
+    stats->route_nodes = 0;
+    stats->base_nodes = 0;
+    stats->max_depth = 0;
+
+    node = GET_ROOT(tb);
+    do {
+        while (!node->is_base_node) {
+            stats->route_nodes++;
+            ASSERT(depth < sizeof(stack)/sizeof(*stack));
+            stack[depth++] = node;  /* PUSH parent */
+            if (stats->max_depth < depth)
+                stats->max_depth = depth;
+            node = GET_LEFT(node);
+        }
+        stats->base_nodes++;
+
+        while (depth > 0) {
+            DbTableCATreeNode* parent = stack[depth-1];
+            if (node == GET_LEFT(parent)) {
+                node = GET_RIGHT(parent);
+                break;
+            }
+            else {
+                ASSERT(node == GET_RIGHT(parent));
+                node = parent;
+                depth--; /* POP parent */
+            }
+        }
+    } while (depth > 0);
+}
+
+#ifdef HARDDEBUG
+
+/*
+ * Not called, but kept as it might come to use
+ */
+static inline int my_check_table_tree(TreeDbTerm *t)
+{
+    int lh, rh;
+    if (t == NULL)
+	return 0;
+    lh = my_check_table_tree(t->left);
+    rh = my_check_table_tree(t->right);
+    if ((rh - lh) != t->balance) {
+	erts_fprintf(stderr, "Invalid tree balance for this node:\n");
+	erts_fprintf(stderr,"balance = %d, left = 0x%08X, right = 0x%08X\n",
+		     t->balance, t->left, t->right);
+	erts_fprintf(stderr,"\nDump:\n---------------------------------\n");
+	erts_fprintf(stderr,"\n---------------------------------\n");
+        abort();
+    }
+    return ((rh > lh) ? rh : lh) + 1;
+}
+
+#endif
diff --git a/erts/emulator/beam/erl_db_catree.h b/erts/emulator/beam/erl_db_catree.h
new file mode 100644
index 0000000000..418837be8e
--- /dev/null
+++ b/erts/emulator/beam/erl_db_catree.h
@@ -0,0 +1,133 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 1998-2016. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description: Implementation of ETS ordered_set table type with
+ *              fine-grained synchronization.
+ *
+ * Author: 	Kjell Winblad
+ *
+ * "erl_db_catree.c" contains more details about the implementation.
+ *
+ */
+
+#ifndef _DB_CATREE_H
+#define _DB_CATREE_H
+
+struct DbTableCATreeNode;
+
+typedef struct {
+    Eterm term;
+    struct erl_off_heap_header* oh;
+    Uint size;
+    Eterm heap[1];
+} DbRouteKey;
+
+typedef struct {
+    erts_rwmtx_t lock; /* The lock for this base node */
+    Sint lock_statistics;
+    int is_valid; /* If this base node is still valid */
+    TreeDbTerm *root; /* The root of the sequential tree */
+    ErtsThrPrgrLaterOp free_item; /* Used when freeing using thread progress */
+    struct DbTableCATreeNode * next; /* Used when gradually deleting */
+
+    char end_of_struct__;
+} DbTableCATreeBaseNode;
+
+typedef struct {
+#ifdef ERTS_ENABLE_LOCK_CHECK
+    Sint lc_order;
+#endif
+    ErtsThrPrgrLaterOp free_item; /* Used when freeing using thread progress */
+    erts_mtx_t lock; /* Used when joining route nodes */
+    int is_valid; /* If this route node is still valid */
+    erts_atomic_t left;
+    erts_atomic_t right;
+    DbRouteKey key;
+} DbTableCATreeRouteNode;
+
+typedef struct DbTableCATreeNode {
+    int is_base_node;
+    union {
+        DbTableCATreeRouteNode route;
+        DbTableCATreeBaseNode base;
+    } u;
+} DbTableCATreeNode;
+
+typedef struct {
+    Uint pos;          /* Current position on stack */
+    Uint size;         /* The size of the stack array */
+    DbTableCATreeNode** array; /* The stack */
+} CATreeNodeStack;
+
+typedef struct db_table_catree {
+    DbTableCommon common;
+
+    /* CA Tree-specific fields */
+    erts_atomic_t root;         /* The tree root (DbTableCATreeNode*) */
+    Uint deletion;		/* Being deleted */
+    DbTreeStack free_stack_elems;/* Used for deletion ...*/
+    CATreeNodeStack free_stack_rnodes;
+    DbTableCATreeNode *base_nodes_to_free_list;
+    int is_routing_nodes_freed;
+} DbTableCATree;
+
+typedef struct {
+    DbTableCATree* tb;
+    Eterm next_route_key;
+    DbTableCATreeNode* locked_bnode;
+    DbTableCATreeNode* bnode_parent;
+    int bnode_level;
+    int read_only;
+    DbRouteKey* search_key;
+} CATreeRootIterator;
+
+
+void db_initialize_catree(void);
+
+int db_create_catree(Process *p, DbTable *tbl);
+
+
+TreeDbTerm** catree_find_root(Eterm key, CATreeRootIterator*);
+
+TreeDbTerm** catree_find_next_from_pb_key_root(Eterm key, CATreeRootIterator*);
+TreeDbTerm** catree_find_prev_from_pb_key_root(Eterm key, CATreeRootIterator*);
+TreeDbTerm** catree_find_nextprev_root(CATreeRootIterator*, int next, Eterm* keyp);
+TreeDbTerm** catree_find_next_root(CATreeRootIterator*, Eterm* keyp);
+TreeDbTerm** catree_find_prev_root(CATreeRootIterator*, Eterm* keyp);
+TreeDbTerm** catree_find_first_root(CATreeRootIterator*);
+TreeDbTerm** catree_find_last_root(CATreeRootIterator*);
+
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
+void erts_lcnt_enable_db_catree_lock_count(DbTableCATree *tb, int enable);
+#endif
+
+void db_catree_force_split(DbTableCATree*, int on);
+
+typedef struct {
+    Uint route_nodes;
+    Uint base_nodes;
+    Uint max_depth;
+} DbCATreeStats;
+void db_calc_stats_catree(DbTableCATree*, DbCATreeStats*);
+
+
+#endif /* _DB_CATREE_H */
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index 752d3ae3a8..f05a3b51c9 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -2731,13 +2731,9 @@ static int free_seg(DbTableHash *tb, int free_records)
                  * sure no lingering threads are still hanging in BUCKET macro
                  * with an old segtab pointer.
                  */
-                Uint sz = SIZEOF_EXT_SEGTAB(est->nsegs);
-                ASSERT(sz == ERTS_ALC_DBG_BLK_SZ(est));
-                ERTS_DB_ALC_MEM_UPDATE_(tb, sz, 0);
-                erts_schedule_thr_prgr_later_cleanup_op(dealloc_ext_segtab,
-                                                        est,
-                                                        &est->lop,
-                                                        sz);
+                erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
+                                      est, &est->lop,
+                                      SIZEOF_EXT_SEGTAB(est->nsegs));
             }
             else
                 erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
@@ -3107,7 +3103,7 @@ Ldone:
     handle->dbterm = &b->dbterm;
     handle->flags = flags;
     handle->new_size = b->dbterm.size;
-    handle->lck = lck;
+    handle->u.hash.lck = lck;
     return 1;
 }
 
@@ -3120,7 +3116,7 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle)
     DbTableHash *tb = &tbl->hash;
     HashDbTerm **bp = (HashDbTerm **) handle->bp;
     HashDbTerm *b = *bp;
-    erts_rwmtx_t* lck = (erts_rwmtx_t*) handle->lck;
+    erts_rwmtx_t* lck = handle->u.hash.lck;
     HashDbTerm* free_me = NULL;
 
     ERTS_LC_ASSERT(IS_HASH_WLOCKED(tb, lck));  /* locked by db_lookup_dbterm_hash */
diff --git a/erts/emulator/beam/erl_db_tree.c b/erts/emulator/beam/erl_db_tree.c
index 45e4be2426..02a5934a6e 100644
--- a/erts/emulator/beam/erl_db_tree.c
+++ b/erts/emulator/beam/erl_db_tree.c
@@ -48,34 +48,13 @@
 #include "erl_binary.h"
 
 #include "erl_db_tree.h"
+#include "erl_db_tree_util.h"
 
 #define GETKEY_WITH_POS(Keypos, Tplp) (*((Tplp) + Keypos))
 #define NITEMS(tb) ((int)erts_atomic_read_nob(&(tb)->common.nitems))
 
-/*
-** A stack of this size is enough for an AVL tree with more than
-** 0xFFFFFFFF elements. May be subject to change if
-** the datatype of the element counter is changed to a 64 bit integer.
-** The Maximal height of an AVL tree is calculated as:
-** h(n) <= 1.4404 * log(n + 2) - 0.328
-** Where n denotes the number of nodes, h(n) the height of the tree
-** with n nodes and log is the binary logarithm.
-*/
-
-#define STACK_NEED 50
 #define TREE_MAX_ELEMENTS 0xFFFFFFFFUL
 
-#define PUSH_NODE(Dtt, Tdt)                     \
-    ((Dtt)->array[(Dtt)->pos++] = Tdt)
-
-#define POP_NODE(Dtt)			\
-     (((Dtt)->pos) ? 			\
-      (Dtt)->array[--((Dtt)->pos)] : NULL)
-
-#define TOP_NODE(Dtt)                   \
-     ((Dtt->pos) ? 			\
-      (Dtt)->array[(Dtt)->pos - 1] : NULL)
-
 #define TOPN_NODE(Dtt, Pos)                   \
      (((Pos) < Dtt->pos) ? 			\
       (Dtt)->array[(Dtt)->pos - ((Pos) + 1)] : NULL)
@@ -89,10 +68,12 @@
 /* Obtain table static stack if available. NULL if not.
 ** Must be released with release_stack()
 */
-static DbTreeStack* get_static_stack(DbTableTree* tb)
+ERTS_INLINE static DbTreeStack* get_static_stack(DbTableTree* tb)
 {
-    if (!erts_atomic_xchg_acqb(&tb->is_stack_busy, 1)) {
-	return &tb->static_stack;
+    if (tb != NULL) {
+        ASSERT(IS_TREE_TABLE(tb->common.type));
+        if (!erts_atomic_xchg_acqb(&tb->is_stack_busy, 1))
+            return &tb->static_stack;
     }
     return NULL;
 }
@@ -100,13 +81,15 @@ static DbTreeStack* get_static_stack(DbTableTree* tb)
 /* Obtain static stack if available, otherwise empty dynamic stack.
 ** Must be released with release_stack()
 */
-static DbTreeStack* get_any_stack(DbTableTree* tb)
+static DbTreeStack* get_any_stack(DbTable* tb, DbTableTree* stack_container)
 {
     DbTreeStack* stack;
-    if (!erts_atomic_xchg_acqb(&tb->is_stack_busy, 1)) {
-	return &tb->static_stack;
+    if (stack_container != NULL) {
+        ASSERT(IS_TREE_TABLE(stack_container->common.type));
+        if (!erts_atomic_xchg_acqb(&stack_container->is_stack_busy, 1))
+            return &stack_container->static_stack;
     }
-    stack = erts_db_alloc(ERTS_ALC_T_DB_STK, (DbTable *) tb,
+    stack = erts_db_alloc(ERTS_ALC_T_DB_STK, tb,
 			  sizeof(DbTreeStack) + sizeof(TreeDbTerm*) * STACK_NEED);
     stack->pos = 0;
     stack->slot = 0;
@@ -114,62 +97,62 @@ static DbTreeStack* get_any_stack(DbTableTree* tb)
     return stack;
 }
 
-static void release_stack(DbTableTree* tb, DbTreeStack* stack)
+static void release_stack(DbTable* tb, DbTableTree* stack_container, DbTreeStack* stack)
 {
-    if (stack == &tb->static_stack) {
-	ASSERT(erts_atomic_read_nob(&tb->is_stack_busy) == 1);
-	erts_atomic_set_relb(&tb->is_stack_busy, 0);
-    }
-    else {
-	erts_db_free(ERTS_ALC_T_DB_STK, (DbTable *) tb,
-		     (void *) stack, sizeof(DbTreeStack) + sizeof(TreeDbTerm*) * STACK_NEED);
+    if (stack_container != NULL) {
+        ASSERT(IS_TREE_TABLE(stack_container->common.type));
+        if (stack == &stack_container->static_stack) {
+            ASSERT(erts_atomic_read_nob(&stack_container->is_stack_busy) == 1);
+            erts_atomic_set_relb(&stack_container->is_stack_busy, 0);
+            return;
+        }
     }
+    erts_db_free(ERTS_ALC_T_DB_STK, tb,
+                 (void *) stack, sizeof(DbTreeStack) + sizeof(TreeDbTerm*) * STACK_NEED);
 }
 
-static ERTS_INLINE void reset_static_stack(DbTableTree* tb)
+static ERTS_INLINE void reset_stack(DbTreeStack* stack)
 {
-    tb->static_stack.pos = 0;
-    tb->static_stack.slot = 0;
+    if (stack != NULL) {
+        stack->pos = 0;
+        stack->slot = 0;
+    }
 }
 
-static ERTS_INLINE void free_term(DbTableTree *tb, TreeDbTerm* p)
+static ERTS_INLINE void reset_static_stack(DbTableTree* tb)
 {
-    db_free_term((DbTable*)tb, p, offsetof(TreeDbTerm, dbterm));
+    if (tb != NULL) {
+        ASSERT(IS_TREE_TABLE(tb->common.type));
+        reset_stack(&tb->static_stack);
+    }
 }
 
-static ERTS_INLINE TreeDbTerm* new_dbterm(DbTableTree *tb, Eterm obj)
+static ERTS_INLINE TreeDbTerm* new_dbterm(DbTableCommon *tb, Eterm obj)
 {
     TreeDbTerm* p;
-    if (tb->common.compress) {
-	p = db_store_term_comp(&tb->common, NULL, offsetof(TreeDbTerm,dbterm), obj);
+    if (tb->compress) {
+	p = db_store_term_comp(tb, NULL, offsetof(TreeDbTerm,dbterm), obj);
     }
     else {
-	p = db_store_term(&tb->common, NULL, offsetof(TreeDbTerm,dbterm), obj);
+	p = db_store_term(tb, NULL, offsetof(TreeDbTerm,dbterm), obj);
     }
     return p;
 }
-static ERTS_INLINE TreeDbTerm* replace_dbterm(DbTableTree *tb, TreeDbTerm* old,
+static ERTS_INLINE TreeDbTerm* replace_dbterm(DbTableCommon *tb, TreeDbTerm* old,
 					      Eterm obj)
 {
     TreeDbTerm* p;
     ASSERT(old != NULL);
-    if (tb->common.compress) {
-	p = db_store_term_comp(&tb->common, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
+    if (tb->compress) {
+	p = db_store_term_comp(tb, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
     }
     else {
-	p = db_store_term(&tb->common, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
+	p = db_store_term(tb, &(old->dbterm), offsetof(TreeDbTerm,dbterm), obj);
     }
     return p;
 }
 
 /*
-** Some macros for "direction stacks"
-*/
-#define DIR_LEFT 0
-#define DIR_RIGHT 1
-#define DIR_END 2 
-
-/*
  * Number of records to delete before trapping.
  */
 #define DELETE_RECORD_LIMIT 12000
@@ -208,31 +191,37 @@ static void do_dump_tree2(DbTableTree*, int to, void *to_arg, int show,
 ** Datatypes
 */
 
+enum ms_key_boundness {
+    /* Order significant, larger means more "boundness" => less iteration */
+    MS_KEY_UNBOUND           = 0,
+    MS_KEY_PARTIALLY_BOUND   = 1,
+    MS_KEY_BOUND             = 2,
+    MS_KEY_IMPOSSIBLE        = 3
+};
+
 /* 
  * This structure is filled in by analyze_pattern() for the select 
  * functions.
  */
 struct mp_info {
-    int something_can_match;	/* The match_spec is not "impossible" */
-    int some_limitation;	/* There is some limitation on the search
-				 * area, i. e. least and/or most is set.*/
-    int got_partial;		/* The limitation has a partially bound
-				 * key */
+    enum ms_key_boundness key_boundness;
     Eterm least;		/* The lowest matching key (possibly 
 				 * partially bound expression) */
     Eterm most;                 /* The highest matching key (possibly 
 				 * partially bound expression) */
-
-    TreeDbTerm **save_term;      /* If the key is completely bound, this
-	 			  * will be the Tree node we're searching
-				  * for, otherwise it will be useless */
     Binary *mp;                 /* The compiled match program */
 };
 
+struct select_common {
+    TreeDbTerm **root;
+};
+
+
 /*
  * Used by doit_select(_chunk)
  */
 struct select_context {
+    struct select_common common;
     Process *p;
     Eterm accum;
     Binary *mp;
@@ -248,6 +237,7 @@ struct select_context {
  * Used by doit_select_count
  */
 struct select_count_context {
+    struct select_common common;
     Process *p;
     Binary *mp;
     Eterm end_condition;
@@ -261,8 +251,10 @@ struct select_count_context {
  * Used by doit_select_delete
  */
 struct select_delete_context {
+    struct select_common common;
     Process *p;
-    DbTableTree *tb;
+    DbTableCommon *tb;
+    DbTreeStack *stack;
     Uint accum;
     Binary *mp;
     Eterm end_condition;
@@ -276,8 +268,9 @@ struct select_delete_context {
  * Used by doit_select_replace
  */
 struct select_replace_context {
+    struct select_common common;
     Process *p;
-    DbTableTree *tb;
+    DbTableCommon *tb;
     Binary *mp;
     Eterm end_condition;
     Eterm *lastobj;
@@ -292,75 +285,83 @@ typedef int (*extra_match_validator_t)(int keypos, Eterm match, Eterm guard, Ete
 /*
 ** Forward declarations 
 */
-static TreeDbTerm *linkout_tree(DbTableTree *tb, Eterm key);
-static TreeDbTerm *linkout_object_tree(DbTableTree *tb, 
-				       Eterm object);
+static TreeDbTerm *linkout_tree(DbTableCommon *tb, TreeDbTerm **root,
+                                Eterm key, DbTreeStack *stack);
+static TreeDbTerm *linkout_object_tree(DbTableCommon *tb,  TreeDbTerm **root,
+				       Eterm object, DbTableTree *stack);
 static SWord do_free_tree_continue(DbTableTree *tb, SWord reds);
-static void free_term(DbTableTree *tb, TreeDbTerm* p);
-static int balance_left(TreeDbTerm **this); 
-static int balance_right(TreeDbTerm **this); 
+static void free_term(DbTable *tb, TreeDbTerm* p);
+int tree_balance_left(TreeDbTerm **this); 
+int tree_balance_right(TreeDbTerm **this); 
 static int delsub(TreeDbTerm **this); 
-static TreeDbTerm *slot_search(Process *p, DbTableTree *tb, Sint slot);
-static TreeDbTerm *find_node(DbTableTree *tb, Eterm key);
-static TreeDbTerm **find_node2(DbTableTree *tb, Eterm key);
-static TreeDbTerm **find_ptr(DbTableTree *tb, DbTreeStack*, TreeDbTerm *this);
-static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack*, Eterm key);
-static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack*, Eterm key);
-static TreeDbTerm *find_next_from_pb_key(DbTableTree *tb, DbTreeStack*,
-					 Eterm key);
-static TreeDbTerm *find_prev_from_pb_key(DbTableTree *tb, DbTreeStack*,
-					 Eterm key);
-static void traverse_backwards(DbTableTree *tb,
+static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root, Sint slot,
+                               DbTable *tb, DbTableTree *stack_container,
+                               CATreeRootIterator *iter);
+static TreeDbTerm *find_node(DbTableCommon *tb, TreeDbTerm *root,
+                             Eterm key, DbTableTree *stack_container);
+static TreeDbTerm **find_node2(DbTableCommon *tb, TreeDbTerm **root, Eterm key);
+static TreeDbTerm **find_ptr(DbTableCommon *tb, TreeDbTerm **root,
+                             DbTreeStack *stack, TreeDbTerm *this);
+static TreeDbTerm *find_next(DbTableCommon *tb, TreeDbTerm *root,
+                             DbTreeStack* stack, Eterm key);
+static TreeDbTerm *find_prev(DbTableCommon *tb, TreeDbTerm *root,
+                             DbTreeStack* stack, Eterm key);
+static TreeDbTerm *find_next_from_pb_key(DbTable*, TreeDbTerm*** rootpp,
+                                         DbTreeStack* stack, Eterm key,
+                                         CATreeRootIterator*);
+static TreeDbTerm *find_prev_from_pb_key(DbTable*,  TreeDbTerm*** rootpp,
+                                         DbTreeStack* stack, Eterm key,
+                                         CATreeRootIterator*);
+typedef int traverse_doit_funcT(DbTableCommon*, TreeDbTerm*,
+                                struct select_common*, int forward);
+
+static void traverse_backwards(DbTableCommon *tb,
 			       DbTreeStack*,
 			       Eterm lastkey,
-			       int (*doit)(DbTableTree *tb,
-					   TreeDbTerm *,
-					   void *,
-					   int),
-			       void *context); 
-static void traverse_forward(DbTableTree *tb,
+			       traverse_doit_funcT*,
+			       struct select_common *context,
+                               CATreeRootIterator*);
+static void traverse_forward(DbTableCommon *tb,
 			     DbTreeStack*,
 			     Eterm lastkey,
-			     int (*doit)(DbTableTree *tb,
-					 TreeDbTerm *,
-					 void *,
-					 int),
-			     void *context);
-static void traverse_update_backwards(DbTableTree *tb,
+                             traverse_doit_funcT*,
+			     struct select_common *context,
+                             CATreeRootIterator*);
+static void traverse_update_backwards(DbTableCommon *tb,
                                       DbTreeStack*,
                                       Eterm lastkey,
-                                      int (*doit)(DbTableTree *tb,
+                                      int (*doit)(DbTableCommon *tb,
                                                   TreeDbTerm **, // out
-                                                  void *,
+                                                  struct select_common*,
                                                   int),
-                                      void *context);
-static int key_given(DbTableTree *tb, Eterm pattern, TreeDbTerm ***ret,
-		     Eterm *partly_bound_key);
-static Sint cmp_partly_bound(Eterm partly_bound_key, Eterm bound_key);
+                                      struct select_common*,
+                                      CATreeRootIterator*);
+static enum ms_key_boundness key_boundness(DbTableCommon *tb,
+                                           Eterm pattern, Eterm *keyp);
 static Sint do_cmp_partly_bound(Eterm a, Eterm b, int *done);
 
-static int analyze_pattern(DbTableTree *tb, Eterm pattern, 
+static int analyze_pattern(DbTableCommon *tb, Eterm pattern,
                            extra_match_validator_t extra_validator, /* Optional callback */
                            struct mp_info *mpi);
-static int doit_select(DbTableTree *tb,
-		       TreeDbTerm *this,
-		       void *ptr,
+static int doit_select(DbTableCommon *tb,
+                       TreeDbTerm *this,
+                       struct select_common* ptr,
 		       int forward);
-static int doit_select_count(DbTableTree *tb,
+static int doit_select_count(DbTableCommon *tb,
 			     TreeDbTerm *this,
-			     void *ptr,
+                             struct select_common*,
 			     int forward);
-static int doit_select_chunk(DbTableTree *tb,
+static int doit_select_chunk(DbTableCommon *tb,
 			     TreeDbTerm *this,
-			     void *ptr,
+                             struct select_common*,
 			     int forward);
-static int doit_select_delete(DbTableTree *tb,
+static int doit_select_delete(DbTableCommon *tb,
 			      TreeDbTerm *this,
-			      void *ptr,
+			      struct select_common*,
 			      int forward);
-static int doit_select_replace(DbTableTree *tb,
+static int doit_select_replace(DbTableCommon *tb,
                                TreeDbTerm **this_ptr,
-                               void *ptr,
+                               struct select_common*,
                                int forward);
 
 static int partly_bound_can_match_lesser(Eterm partly_bound_1, 
@@ -508,18 +509,18 @@ int db_create_tree(Process *p, DbTable *tbl)
     return DB_ERROR_NONE;
 }
 
-static int db_first_tree(Process *p, DbTable *tbl, Eterm *ret)
+int db_first_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                         Eterm *ret, DbTableTree *stack_container)
 {
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     TreeDbTerm *this;
 
-    if (( this = tb->root ) == NULL) {
+    if (( this = root ) == NULL) {
 	*ret = am_EOT;
 	return DB_ERROR_NONE;
     }
     /* Walk down the tree to the left */
-    if ((stack = get_static_stack(tb)) != NULL) {
+    if ((stack = get_static_stack(stack_container)) != NULL) {
 	stack->pos = stack->slot = 0;
     }
     while (this->left != NULL) {
@@ -529,23 +530,27 @@ static int db_first_tree(Process *p, DbTable *tbl, Eterm *ret)
     if (stack) {
 	PUSH_NODE(stack, this);
 	stack->slot = 1;
-	release_stack(tb,stack);
+	release_stack(tbl,stack_container,stack);
     }
     *ret = db_copy_key(p, tbl, &this->dbterm);
     return DB_ERROR_NONE;
 }
 
-static int db_next_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+static int db_first_tree(Process *p, DbTable *tbl, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
-    DbTreeStack* stack;
+    return db_first_tree_common(p, tbl, tb->root, ret, tb);
+}
+
+int db_next_tree_common(Process *p, DbTable *tbl,
+                        TreeDbTerm *root, Eterm key,
+                        Eterm *ret, DbTreeStack* stack)
+{
     TreeDbTerm *this;
 
-    if (is_atom(key) && key == am_EOT)
+    if (key == am_EOT)
 	return DB_ERROR_BADKEY;
-    stack = get_any_stack(tb);
-    this = find_next(tb, stack, key);
-    release_stack(tb,stack);
+    this = find_next(&tbl->common, root, stack, key);
     if (this == NULL) {
 	*ret = am_EOT;
 	return DB_ERROR_NONE;
@@ -554,18 +559,27 @@ static int db_next_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
     return DB_ERROR_NONE;
 }
 
-static int db_last_tree(Process *p, DbTable *tbl, Eterm *ret)
+static int db_next_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    DbTreeStack* stack = get_any_stack(tbl, tb);
+    int ret_val = db_next_tree_common(p, tbl, tb->root, key, ret, stack);
+    release_stack(tbl,tb,stack);
+    return ret_val;
+}
+
+int db_last_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                        Eterm *ret, DbTableTree *stack_container)
+{
     TreeDbTerm *this;
     DbTreeStack* stack;
 
-    if (( this = tb->root ) == NULL) {
+    if (( this = root ) == NULL) {
 	*ret = am_EOT;
 	return DB_ERROR_NONE;
     }
     /* Walk down the tree to the right */
-    if ((stack = get_static_stack(tb)) != NULL) {
+    if ((stack = get_static_stack(stack_container)) != NULL) {
 	stack->pos = stack->slot = 0;
     }    
     while (this->right != NULL) {
@@ -574,24 +588,27 @@ static int db_last_tree(Process *p, DbTable *tbl, Eterm *ret)
     }
     if (stack) {
 	PUSH_NODE(stack, this);
-	stack->slot = NITEMS(tb);
-	release_stack(tb,stack);
+	stack->slot = NITEMS(tbl);
+	release_stack(tbl,stack_container,stack);
     }
     *ret = db_copy_key(p, tbl, &this->dbterm);
     return DB_ERROR_NONE;
 }
 
-static int db_prev_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+static int db_last_tree(Process *p, DbTable *tbl, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_last_tree_common(p, tbl, tb->root, ret, tb);
+}
+
+int db_prev_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, Eterm key,
+                 Eterm *ret, DbTreeStack* stack)
+{
     TreeDbTerm *this;
-    DbTreeStack* stack;
 
-    if (is_atom(key) && key == am_EOT)
+    if (key == am_EOT)
 	return DB_ERROR_BADKEY;
-    stack = get_any_stack(tb);
-    this = find_prev(tb, stack, key);
-    release_stack(tb,stack);
+    this = find_prev(&tbl->common, root, stack, key);
     if (this == NULL) {
 	*ret = am_EOT;
 	return DB_ERROR_NONE;
@@ -600,25 +617,30 @@ static int db_prev_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
     return DB_ERROR_NONE;
 }
 
-static ERTS_INLINE Sint cmp_key(DbTableTree* tb, Eterm key, TreeDbTerm* obj) {
-    return CMP(key, GETKEY(tb,obj->dbterm.tpl));
+static int db_prev_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableTree *tb = &tbl->tree;
+    DbTreeStack* stack = get_any_stack(tbl, tb);
+    int res = db_prev_tree_common(p, tbl, tb->root, key, ret, stack);
+    release_stack(tbl,tb,stack);
+    return res;
 }
 
-static ERTS_INLINE int cmp_key_eq(DbTableTree* tb, Eterm key, TreeDbTerm* obj) {
+static ERTS_INLINE int cmp_key_eq(DbTableCommon* tb, Eterm key, TreeDbTerm* obj) {
     Eterm obj_key = GETKEY(tb,obj->dbterm.tpl);
     return is_same(key, obj_key) || CMP(key, obj_key) == 0;
 }
 
-static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
+int db_put_tree_common(DbTableCommon *tb, TreeDbTerm **root, Eterm obj,
+                       int key_clash_fail, DbTableTree *stack_container)
 {
-    DbTableTree *tb = &tbl->tree;
     /* Non recursive insertion in AVL tree, building our own stack */
     TreeDbTerm **tstack[STACK_NEED];
     int tpos = 0;
     int dstack[STACK_NEED+1];
     int dpos = 0;
     int state = 0;
-    TreeDbTerm **this = &tb->root;
+    TreeDbTerm **this = root;
     Sint c;
     Eterm key;
     int dir;
@@ -626,14 +648,14 @@ static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
 
     key = GETKEY(tb, tuple_val(obj));
 
-    reset_static_stack(tb);
+    reset_static_stack(stack_container);
 
     dstack[dpos++] = DIR_END;
     for (;;)
 	if (!*this) { /* Found our place */
 	    state = 1;
-	    if (erts_atomic_inc_read_nob(&tb->common.nitems) >= TREE_MAX_ELEMENTS) {
-		erts_atomic_dec_nob(&tb->common.nitems);
+	    if (erts_atomic_inc_read_nob(&tb->nitems) >= TREE_MAX_ELEMENTS) {
+		erts_atomic_dec_nob(&tb->nitems);
 		return DB_ERROR_SYSRES;
 	    }
 	    *this = new_dbterm(tb, obj);
@@ -724,9 +746,15 @@ static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
     return DB_ERROR_NONE;
 }
 
-static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+static int db_put_tree(DbTable *tbl, Eterm obj, int key_clash_fail)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_put_tree_common(&tb->common, &tb->root, obj, key_clash_fail, tb);
+}
+
+int db_get_tree_common(Process *p, DbTableCommon *tb, TreeDbTerm *root, Eterm key,
+                       Eterm *ret, DbTableTree *stack_container)
+{
     Eterm copy;
     Eterm *hp, *hend;
     TreeDbTerm *this;
@@ -737,13 +765,13 @@ static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
      * The list created around it is purely for interface conformance.
      */
     
-    this = find_node(tb,key);
+    this = find_node(tb,root,key,stack_container);
     if (this == NULL) {
 	*ret = NIL;
     } else {
 	hp = HAlloc(p, this->dbterm.size + 2);
 	hend = hp + this->dbterm.size + 2;
-	copy = db_copy_object_from_ets(&tb->common, &this->dbterm, &hp, &MSO(p));
+	copy = db_copy_object_from_ets(tb, &this->dbterm, &hp, &MSO(p));
 	*ret = CONS(hp, copy, NIL);
 	hp += 2;
 	HRelease(p,hend,hp);
@@ -751,18 +779,28 @@ static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
     return DB_ERROR_NONE;
 }
 
-static int db_member_tree(DbTable *tbl, Eterm key, Eterm *ret)
+static int db_get_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_get_tree_common(p, &tb->common, tb->root, key, ret, tb);
+}
 
-    *ret = (find_node(tb,key) == NULL) ? am_false : am_true;
+int db_member_tree_common(DbTableCommon *tb, TreeDbTerm *root, Eterm key, Eterm *ret,
+                          DbTableTree *stack_container)
+{
+    *ret = (find_node(tb,root,key,stack_container) == NULL) ? am_false : am_true;
     return DB_ERROR_NONE;
 }
 
-static int db_get_element_tree(Process *p, DbTable *tbl,
-			       Eterm key, int ndex, Eterm *ret)
+static int db_member_tree(DbTable *tbl, Eterm key, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_member_tree_common(&tb->common, tb->root, key, ret, tb);
+}
+
+int db_get_element_tree_common(Process *p, DbTableCommon *tb, TreeDbTerm *root, Eterm key,
+                               int ndex, Eterm *ret, DbTableTree *stack_container)
+{
     /*
      * Look the node up:
      */
@@ -776,49 +814,69 @@ static int db_get_element_tree(Process *p, DbTable *tbl,
      * around the element here either.
      */
     
-    this = find_node(tb,key);
+    this = find_node(tb,root,key,stack_container);
     if (this == NULL) {
 	return DB_ERROR_BADKEY;
     } else {
 	if (ndex > arityval(this->dbterm.tpl[0])) {
 	    return DB_ERROR_BADPARAM;
 	}
-	*ret = db_copy_element_from_ets(&tb->common, p, &this->dbterm, ndex, &hp, 0);
+	*ret = db_copy_element_from_ets(tb, p, &this->dbterm, ndex, &hp, 0);
     }
     return DB_ERROR_NONE;
 }
 
-static int db_erase_tree(DbTable *tbl, Eterm key, Eterm *ret)
+static int db_get_element_tree(Process *p, DbTable *tbl,
+			       Eterm key, int ndex, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_get_element_tree_common(p, &tb->common, tb->root, key,
+                                      ndex, ret, tb);
+}
+
+int db_erase_tree_common(DbTable *tbl, TreeDbTerm **root, Eterm key, Eterm *ret,
+                         DbTreeStack *stack /* NULL if no static stack */)
+{
     TreeDbTerm *res;
 
     *ret = am_true;
 
-    if ((res = linkout_tree(tb, key)) != NULL) {
-	free_term(tb, res);
+    if ((res = linkout_tree(&tbl->common, root,key, stack)) != NULL) {
+	free_term(tbl, res);
     }
     return DB_ERROR_NONE;
 }
 
-static int db_erase_object_tree(DbTable *tbl, Eterm object, Eterm *ret)
+static int db_erase_tree(DbTable *tbl, Eterm key, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_erase_tree_common(tbl, &tb->root, key, ret, &tb->static_stack);
+}
+
+int db_erase_object_tree_common(DbTable *tbl, TreeDbTerm **root, Eterm object,
+                                Eterm *ret, DbTableTree *stack_container)
+{
     TreeDbTerm *res;
 
     *ret = am_true;
 
-    if ((res = linkout_object_tree(tb, object)) != NULL) {
-	free_term(tb, res);
+    if ((res = linkout_object_tree(&tbl->common, root, object, stack_container)) != NULL) {
+	free_term(tbl, res);
     }
     return DB_ERROR_NONE;
 }
 
-
-static int db_slot_tree(Process *p, DbTable *tbl, 
-			Eterm slot_term, Eterm *ret)
+static int db_erase_object_tree(DbTable *tbl, Eterm object, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return  db_erase_object_tree_common(tbl, &tb->root, object, ret, tb);
+}
+
+int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                        Eterm slot_term, Eterm *ret,
+                        DbTableTree *stack_container,
+                        CATreeRootIterator *iter)
+{
     Sint slot;
     TreeDbTerm *st;
     Eterm *hp, *hend;
@@ -834,10 +892,10 @@ static int db_slot_tree(Process *p, DbTable *tbl,
 
     if (is_not_small(slot_term) ||
 	((slot = signed_val(slot_term)) < 0) ||
-	(slot > NITEMS(tb)))
+	(slot > NITEMS(tbl)))
 	return DB_ERROR_BADPARAM;
 
-    if (slot == NITEMS(tb)) {
+    if (slot == NITEMS(tbl)) {
 	*ret = am_EOT;
 	return DB_ERROR_NONE;
     }
@@ -847,20 +905,27 @@ static int db_slot_tree(Process *p, DbTable *tbl,
      * are counted from 1 and up.
      */
     ++slot;
-    st = slot_search(p, tb, slot); 
+    st = slot_search(p, root, slot, tbl, stack_container, iter);
     if (st == NULL) {
 	*ret = am_false;
 	return DB_ERROR_UNSPEC;
     }
     hp = HAlloc(p, st->dbterm.size + 2);
     hend = hp + st->dbterm.size + 2;
-    copy = db_copy_object_from_ets(&tb->common, &st->dbterm, &hp, &MSO(p));
+    copy = db_copy_object_from_ets(&tbl->common, &st->dbterm, &hp, &MSO(p));
     *ret = CONS(hp, copy, NIL);
     hp += 2;
     HRelease(p,hend,hp);
     return DB_ERROR_NONE;
 }
 
+static int db_slot_tree(Process *p, DbTable *tbl, 
+			Eterm slot_term, Eterm *ret)
+{
+    DbTableTree *tb = &tbl->tree;
+    return db_slot_tree_common(p, tbl, tb->root, slot_term, ret, tb, NULL);
+}
+
 
 
 static BIF_RETTYPE ets_select_reverse(BIF_ALIST_3)
@@ -926,19 +991,14 @@ static BIF_RETTYPE bif_trap3(Export *bif,
 {
     BIF_TRAP3(bif, p, p1, p2, p3);
 }
-    
-/*
-** This is called either when the select bif traps or when ets:select/1 
-** is called. It does mostly the same as db_select_tree and may in either case
-** trap to itself again (via the ets:select/1 bif).
-** Note that this is common for db_select_tree and db_select_chunk_tree.
-*/
-static int db_select_continue_tree(Process *p, 
-				   DbTable *tbl,
-				   Eterm continuation,
-				   Eterm *ret)
+
+int db_select_continue_tree_common(Process *p, 
+                                   DbTableCommon *tb,
+                                   Eterm continuation,
+                                   Eterm *ret,
+                                   DbTableTree *stack_container,
+                                   CATreeRootIterator* iter)
 {
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     struct select_context sc;
     unsigned sz;
@@ -951,7 +1011,6 @@ static int db_select_continue_tree(Process *p,
     Sint chunk_size;
     Sint reverse;
 
-
 #define RET_TO_BIF(Term, State) do { *ret = (Term); return State; } while(0);
 
     /* Decode continuation. We know it's a tuple but not the arity or 
@@ -980,28 +1039,37 @@ static int db_select_continue_tree(Process *p,
     sc.end_condition = NIL;
     sc.lastobj = NULL;
     sc.max = 1000;
-    sc.keypos = tb->common.keypos;
+    sc.keypos = tb->keypos;
     sc.chunk_size = chunk_size;
     reverse = unsigned_val(tptr[7]);
     sc.got = signed_val(tptr[8]);
 
-    stack = get_any_stack(tb);
-    if (chunk_size) {
-	if (reverse) {
-	    traverse_backwards(tb, stack, lastkey, &doit_select_chunk, &sc);
-	} else {
-	    traverse_forward(tb, stack, lastkey, &doit_select_chunk, &sc);
-	}
-    } else {
-	if (reverse) {
-	    traverse_forward(tb, stack, lastkey, &doit_select, &sc);
-	} else {
-	    traverse_backwards(tb, stack, lastkey, &doit_select, &sc);
-	}
+    if (iter) {
+        iter->next_route_key = lastkey;
+        sc.common.root = catree_find_nextprev_root(iter, !!reverse != !!chunk_size, NULL);
     }
-    release_stack(tb,stack);
+    else
+        sc.common.root = &((DbTableTree*)tb)->root;
+
+    if (sc.common.root) {
+        stack = get_any_stack((DbTable*)tb, stack_container);
+        if (chunk_size) {
+            if (reverse) {
+                traverse_backwards(tb, stack, lastkey, &doit_select_chunk, &sc.common, iter);
+            } else {
+                traverse_forward(tb, stack, lastkey, &doit_select_chunk, &sc.common, iter);
+            }
+        } else {
+            if (reverse) {
+                traverse_forward(tb, stack, lastkey, &doit_select, &sc.common, iter);
+            } else {
+                traverse_backwards(tb, stack, lastkey, &doit_select, &sc.common, iter);
+            }
+        }
+        release_stack((DbTable*)tb,stack_container,stack);
 
-    BUMP_REDS(p, 1000 - sc.max);
+        BUMP_REDS(p, 1000 - sc.max);
+    }
 
     if (sc.max > 0 || (chunk_size && sc.got == chunk_size)) {
 	if (chunk_size) {
@@ -1082,13 +1150,29 @@ static int db_select_continue_tree(Process *p,
 
 #undef RET_TO_BIF
 }
+    
+/*
+** This is called either when the select bif traps or when ets:select/1 
+** is called. It does mostly the same as db_select_tree and may in either case
+** trap to itself again (via the ets:select/1 bif).
+** Note that this is common for db_select_tree and db_select_chunk_tree.
+*/
+static int db_select_continue_tree(Process *p, 
+				   DbTable *tbl,
+				   Eterm continuation,
+				   Eterm *ret)
+{
+    DbTableTree *tb = &tbl->tree;
+    return db_select_continue_tree_common(p, &tb->common,
+                                          continuation, ret, tb, NULL);
+}
 
-
-static int db_select_tree(Process *p, DbTable *tbl, Eterm tid,
-			  Eterm pattern, int reverse, Eterm *ret)
+int db_select_tree_common(Process *p, DbTable *tb,
+                          Eterm tid, Eterm pattern, int reverse, Eterm *ret,
+                          DbTableTree *stack_container,
+                          CATreeRootIterator* iter)
 {
     /* Strategy: Traverse backwards to build resulting list from tail to head */
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     struct select_context sc;
     struct mp_info mpi;
@@ -1121,42 +1205,62 @@ static int db_select_tree(Process *p, DbTable *tbl, Eterm tid,
     sc.got = 0;
     sc.chunk_size = 0;
 
-    if ((errcode = analyze_pattern(tb, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
+    if ((errcode = analyze_pattern(&tb->common, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
 	RET_TO_BIF(NIL,errcode);
     }
 
-    if (!mpi.something_can_match) {
+    if (mpi.key_boundness == MS_KEY_IMPOSSIBLE) {
 	RET_TO_BIF(NIL,DB_ERROR_NONE);  
 	/* can't possibly match anything */
     }
 
     sc.mp = mpi.mp;
 
-    if (!mpi.got_partial && mpi.some_limitation && 
-	CMP_EQ(mpi.least,mpi.most)) {
-	doit_select(tb,*(mpi.save_term),&sc,0 /* direction doesn't matter */);
+    if (mpi.key_boundness == MS_KEY_BOUND) {
+        ASSERT(CMP_EQ(mpi.least, mpi.most));
+        if (iter)
+            sc.common.root = catree_find_root(mpi.least, iter);
+        else
+            sc.common.root = &tb->tree.root;
+        this = find_node(&tb->common, *sc.common.root, mpi.least, NULL);
+        if (this)
+            doit_select(&tb->common, this, &sc.common, 0 /* direction doesn't matter */);
 	RET_TO_BIF(sc.accum,DB_ERROR_NONE);
     }
 
-    stack = get_any_stack(tb);
+    stack = get_any_stack((DbTable*)tb,stack_container);
     if (reverse) {
-	if (mpi.some_limitation) {
-	    if ((this = find_prev_from_pb_key(tb, stack, mpi.least)) != NULL) {
+	if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+            this = find_prev_from_pb_key(tb, &sc.common.root, stack, mpi.least, iter);
+	    if (this)
 		lastkey = GETKEY(tb, this->dbterm.tpl);
-	    }
 	    sc.end_condition = mpi.most;
 	}
-	traverse_forward(tb, stack, lastkey, &doit_select, &sc);
+        else {
+            ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+            if (iter)
+                sc.common.root = catree_find_first_root(iter);
+            else
+                sc.common.root = &tb->tree.root;
+        }
+	traverse_forward(&tb->common, stack, lastkey, &doit_select, &sc.common, iter);
     } else {
-	if (mpi.some_limitation) {
-	    if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
-		lastkey = GETKEY(tb, this->dbterm.tpl);
-	    }
+	if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+            this = find_next_from_pb_key(tb, &sc.common.root, stack, mpi.most, iter);
+	    if (this)
+                lastkey = GETKEY(tb, this->dbterm.tpl);
 	    sc.end_condition = mpi.least;
 	}
-	traverse_backwards(tb, stack, lastkey, &doit_select, &sc);
+        else {
+            ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+            if (iter)
+                sc.common.root = catree_find_last_root(iter);
+            else
+                sc.common.root = &tb->tree.root;
+        }
+	traverse_backwards(&tb->common, stack, lastkey, &doit_select, &sc.common, iter);
     }
-    release_stack(tb,stack);
+    release_stack((DbTable*)tb,stack_container,stack);
 #ifdef HARDDEBUG
 	erts_fprintf(stderr,"Least: %T\n", mpi.least);
 	erts_fprintf(stderr,"Most: %T\n", mpi.most);
@@ -1192,16 +1296,20 @@ static int db_select_tree(Process *p, DbTable *tbl, Eterm tid,
 
 }
 
-    
-/*
-** This is called either when the select_count bif traps.
-*/
-static int db_select_count_continue_tree(Process *p, 
-					 DbTable *tbl,
-					 Eterm continuation,
-					 Eterm *ret)
+static int db_select_tree(Process *p, DbTable *tbl, Eterm tid,
+			  Eterm pattern, int reverse, Eterm *ret)
+{
+    return db_select_tree_common(p, tbl, tid,
+                                 pattern, reverse, ret, &tbl->tree, NULL);
+}
+
+int db_select_count_continue_tree_common(Process *p, 
+                                         DbTable *tb,
+                                         Eterm continuation,
+                                         Eterm *ret,
+                                         DbTableTree *stack_container,
+                                         CATreeRootIterator* iter)
 {
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     struct select_count_context sc;
     unsigned sz;
@@ -1213,7 +1321,6 @@ static int db_select_count_continue_tree(Process *p,
     Eterm *tptr;
     Eterm egot;
 
-
 #define RET_TO_BIF(Term, State) do { *ret = (Term); return State; } while(0);
 
     /* Decode continuation. We know it's a tuple and everything else as
@@ -1245,11 +1352,21 @@ static int db_select_count_continue_tree(Process *p,
 	sc.got = unsigned_val(tptr[5]);
     }
 
-    stack = get_any_stack(tb);
-    traverse_backwards(tb, stack, lastkey, &doit_select_count, &sc);
-    release_stack(tb,stack);
+    if (iter) {
+        iter->next_route_key = lastkey;
+        sc.common.root = catree_find_prev_root(iter, NULL);
+    }
+    else {
+        sc.common.root = &tb->tree.root;
+    }
 
-    BUMP_REDS(p, 1000 - sc.max);
+    if (sc.common.root) {
+        stack = get_any_stack(tb, stack_container);
+        traverse_backwards(&tb->common, stack, lastkey, &doit_select_count, &sc.common, iter);
+        release_stack(tb,stack_container,stack);
+
+        BUMP_REDS(p, 1000 - sc.max);
+    }
 
     if (sc.max > 0) {
 	RET_TO_BIF(erts_make_integer(sc.got,p), DB_ERROR_NONE);
@@ -1285,11 +1402,25 @@ static int db_select_count_continue_tree(Process *p,
 #undef RET_TO_BIF
 }
 
-
-static int db_select_count_tree(Process *p, DbTable *tbl, Eterm tid,
-				Eterm pattern, Eterm *ret)
+/*
+** This is called either when the select_count bif traps.
+*/
+static int db_select_count_continue_tree(Process *p, 
+                                         DbTable *tbl,
+                                         Eterm continuation,
+                                         Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_select_count_continue_tree_common(p, tbl,
+                                                continuation, ret, tb, NULL);
+}
+
+
+int db_select_count_tree_common(Process *p, DbTable *tb,
+                                Eterm tid, Eterm pattern, Eterm *ret,
+                                DbTableTree *stack_container,
+                                CATreeRootIterator* iter)
+{
     DbTreeStack* stack;
     struct select_count_context sc;
     struct mp_info mpi;
@@ -1303,7 +1434,6 @@ static int db_select_count_tree(Process *p, DbTable *tbl, Eterm tid,
     Eterm egot;
     Eterm mpb;
 
-
 #define RET_TO_BIF(Term,RetVal) do { 	       	\
 	if (mpi.mp != NULL) {			\
 	    erts_bin_free(mpi.mp);       	\
@@ -1321,33 +1451,46 @@ static int db_select_count_tree(Process *p, DbTable *tbl, Eterm tid,
     sc.keypos = tb->common.keypos;
     sc.got = 0;
 
-    if ((errcode = analyze_pattern(tb, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
+    if ((errcode = analyze_pattern(&tb->common, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
 	RET_TO_BIF(NIL,errcode);
     }
 
-    if (!mpi.something_can_match) {
+    if (mpi.key_boundness == MS_KEY_IMPOSSIBLE) {
 	RET_TO_BIF(make_small(0),DB_ERROR_NONE);  
 	/* can't possibly match anything */
     }
 
     sc.mp = mpi.mp;
 
-    if (!mpi.got_partial && mpi.some_limitation && 
-	CMP_EQ(mpi.least,mpi.most)) {
-	doit_select_count(tb,*(mpi.save_term),&sc,0 /* dummy */);
+    if (mpi.key_boundness == MS_KEY_BOUND) {
+        ASSERT(CMP_EQ(mpi.least, mpi.most));
+        if (iter)
+            sc.common.root = catree_find_root(mpi.least, iter);
+        else
+            sc.common.root = &((DbTable*)tb)->tree.root;
+        this =  find_node(&tb->common, *sc.common.root, mpi.least, NULL);
+        if (this)
+            doit_select_count(&tb->common, this, &sc.common, 0 /* dummy */);
 	RET_TO_BIF(erts_make_integer(sc.got,p),DB_ERROR_NONE);
     }
 
-    stack = get_any_stack(tb);
-    if (mpi.some_limitation) {
-	if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
-	    lastkey = GETKEY(tb, this->dbterm.tpl);
-	}
+    stack = get_any_stack((DbTable*)tb, stack_container);
+    if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+        this = find_next_from_pb_key(tb, &sc.common.root, stack, mpi.most, iter);
+	if (this)
+            lastkey = GETKEY(tb, this->dbterm.tpl);
 	sc.end_condition = mpi.least;
     }
+    else {
+        ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+        if (iter)
+            sc.common.root = catree_find_last_root(iter);
+        else
+            sc.common.root = &tb->tree.root;
+    }
     
-    traverse_backwards(tb, stack, lastkey, &doit_select_count, &sc);
-    release_stack(tb,stack);
+    traverse_backwards(&tb->common, stack, lastkey, &doit_select_count, &sc.common, iter);
+    release_stack((DbTable*)tb,stack_container,stack);
     BUMP_REDS(p, 1000 - sc.max);
     if (sc.max > 0) {
 	RET_TO_BIF(erts_make_integer(sc.got,p),DB_ERROR_NONE);
@@ -1383,12 +1526,21 @@ static int db_select_count_tree(Process *p, DbTable *tbl, Eterm tid,
 
 }
 
-static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
-				Eterm pattern, Sint chunk_size,
-				int reverse,
-				Eterm *ret)
+static int db_select_count_tree(Process *p, DbTable *tbl, Eterm tid,
+                                Eterm pattern, Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_select_count_tree_common(p, tbl,
+                                       tid, pattern, ret, tb, NULL);
+}
+
+
+int db_select_chunk_tree_common(Process *p, DbTable *tb,
+                                Eterm tid, Eterm pattern, Sint chunk_size,
+                                int reverse, Eterm *ret,
+                                DbTableTree *stack_container,
+                                CATreeRootIterator* iter)
+{
     DbTreeStack* stack;
     struct select_context sc;
     struct mp_info mpi;
@@ -1401,7 +1553,6 @@ static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
     int errcode;
     Eterm mpb;
 
-
 #define RET_TO_BIF(Term,RetVal) do { 		\
 	if (mpi.mp != NULL) {			\
 	    erts_bin_free(mpi.mp);		\
@@ -1421,20 +1572,26 @@ static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
     sc.got = 0;
     sc.chunk_size = chunk_size;
 
-    if ((errcode = analyze_pattern(tb, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
+    if ((errcode = analyze_pattern(&tb->common, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
 	RET_TO_BIF(NIL,errcode);
     }
 
-    if (!mpi.something_can_match) {
+    if (mpi.key_boundness == MS_KEY_IMPOSSIBLE) {
 	RET_TO_BIF(am_EOT,DB_ERROR_NONE);
 	/* can't possibly match anything */
     }
 
     sc.mp = mpi.mp;
 
-    if (!mpi.got_partial && mpi.some_limitation && 
-	CMP_EQ(mpi.least,mpi.most)) {
-	doit_select(tb,*(mpi.save_term),&sc, 0 /* direction doesn't matter */);
+    if (mpi.key_boundness == MS_KEY_BOUND) {
+        ASSERT(CMP_EQ(mpi.least, mpi.most));
+        if (iter)
+            sc.common.root = catree_find_root(mpi.least, iter);
+        else
+            sc.common.root = &tb->tree.root;
+        this =  find_node(&tb->common, *sc.common.root, mpi.least, NULL);
+        if (this)
+            doit_select(&tb->common, this, &sc.common, 0 /* direction doesn't matter */);
 	if (sc.accum != NIL) {
 	    hp=HAlloc(p, 3);
 	    RET_TO_BIF(TUPLE2(hp,sc.accum,am_EOT),DB_ERROR_NONE);
@@ -1443,25 +1600,39 @@ static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
 	}
     }
 
-    stack = get_any_stack(tb);
+    stack = get_any_stack((DbTable*)tb,stack_container);
     if (reverse) {
-	if (mpi.some_limitation) {
-	    if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
-		lastkey = GETKEY(tb, this->dbterm.tpl);
-	    }
+	if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+            this = find_next_from_pb_key(tb, &sc.common.root, stack, mpi.most, iter);
+	    if (this)
+                lastkey = GETKEY(tb, this->dbterm.tpl);
 	    sc.end_condition = mpi.least;
 	}
-	traverse_backwards(tb, stack, lastkey, &doit_select_chunk, &sc);
+        else {
+            ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+            if (iter)
+                sc.common.root = catree_find_last_root(iter);
+            else
+                sc.common.root = &tb->tree.root;
+        }
+	traverse_backwards(&tb->common, stack, lastkey, &doit_select_chunk, &sc.common, iter);
     } else {
-	if (mpi.some_limitation) {
-	    if ((this = find_prev_from_pb_key(tb, stack, mpi.least)) != NULL) {
-		lastkey = GETKEY(tb, this->dbterm.tpl);
-	    }
+	if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+            this = find_prev_from_pb_key(tb, &sc.common.root, stack, mpi.least, iter);
+	    if (this)
+                lastkey = GETKEY(tb, this->dbterm.tpl);
 	    sc.end_condition = mpi.most;
 	}
-	traverse_forward(tb, stack, lastkey, &doit_select_chunk, &sc);
+        else {
+            ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+            if (iter)
+                sc.common.root = catree_find_first_root(iter);
+            else
+                sc.common.root = &tb->tree.root;
+        }
+	traverse_forward(&tb->common, stack, lastkey, &doit_select_chunk, &sc.common, iter);
     }
-    release_stack(tb,stack);
+    release_stack((DbTable*)tb,stack_container,stack);
 
     BUMP_REDS(p, 1000 - sc.max);
     if (sc.max > 0 || sc.got == chunk_size) {
@@ -1530,15 +1701,25 @@ static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
 
 }
 
-/*
-** This is called when select_delete traps
-*/
-static int db_select_delete_continue_tree(Process *p, 
-					  DbTable *tbl,
-					  Eterm continuation,
-					  Eterm *ret)
+static int db_select_chunk_tree(Process *p, DbTable *tbl, Eterm tid,
+                                Eterm pattern, Sint chunk_size,
+                                int reverse,
+                                Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    return db_select_chunk_tree_common(p, tbl,
+                                       tid, pattern, chunk_size,
+                                       reverse, ret, tb, NULL);
+}
+
+
+int db_select_delete_continue_tree_common(Process *p,
+                                          DbTable *tbl,
+                                          Eterm continuation,
+                                          Eterm *ret,
+                                          DbTreeStack* stack,
+                                          CATreeRootIterator* iter)
+{
     struct select_delete_context sc;
     unsigned sz;
     Eterm *hp; 
@@ -1549,10 +1730,9 @@ static int db_select_delete_continue_tree(Process *p,
     Eterm *tptr;
     Eterm eaccsum;
 
-
 #define RET_TO_BIF(Term, State) do { 		\
 	if (sc.erase_lastterm) {		\
-	    free_term(tb, sc.lastterm);		\
+	    free_term(tbl, sc.lastterm);		\
 	}					\
 	*ret = (Term); 				\
 	return State; 				\
@@ -1571,7 +1751,8 @@ static int db_select_delete_continue_tree(Process *p,
 
     mp = erts_db_get_match_prog_binary_unchecked(tptr[4]);
     sc.p = p;
-    sc.tb = tb;
+    sc.tb = &tbl->common;
+    sc.stack = stack;
     if (is_big(tptr[5])) {
 	sc.accum = big_to_uint32(tptr[5]);
     } else {
@@ -1580,17 +1761,26 @@ static int db_select_delete_continue_tree(Process *p,
     sc.mp = mp;
     sc.end_condition = NIL;
     sc.max = 1000;
-    sc.keypos = tb->common.keypos;
+    sc.keypos = tbl->common.keypos;
 
-    ASSERT(!erts_atomic_read_nob(&tb->is_stack_busy));
-    traverse_backwards(tb, &tb->static_stack, lastkey, &doit_select_delete, &sc);
+    if (iter) {
+        iter->next_route_key = lastkey;
+        sc.common.root = catree_find_prev_root(iter, NULL);
+    }
+    else {
+        sc.common.root = &tbl->tree.root;
+    }
 
-    BUMP_REDS(p, 1000 - sc.max);
+    if (sc.common.root) {
+        traverse_backwards(&tbl->common, stack, lastkey, &doit_select_delete, &sc.common, iter);
+
+        BUMP_REDS(p, 1000 - sc.max);
+    }
 
     if (sc.max > 0) {
 	RET_TO_BIF(erts_make_integer(sc.accum, p), DB_ERROR_NONE);
     }	
-    key = GETKEY(tb, (sc.lastterm)->dbterm.tpl);
+    key = GETKEY(&tbl->common, (sc.lastterm)->dbterm.tpl);
     if (end_condition != NIL && 
 	cmp_partly_bound(end_condition,key) > 0) { /* done anyway */
 	RET_TO_BIF(erts_make_integer(sc.accum,p),DB_ERROR_NONE);
@@ -1620,10 +1810,23 @@ static int db_select_delete_continue_tree(Process *p,
 #undef RET_TO_BIF
 }
 
-static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
-				 Eterm pattern, Eterm *ret)
+static int db_select_delete_continue_tree(Process *p, 
+					  DbTable *tbl,
+					  Eterm continuation,
+					  Eterm *ret)
 {
     DbTableTree *tb = &tbl->tree;
+    ASSERT(!erts_atomic_read_nob(&tb->is_stack_busy));
+    return db_select_delete_continue_tree_common(p, tbl, continuation, ret,
+                                                 &tb->static_stack, NULL);
+}
+
+int db_select_delete_tree_common(Process *p, DbTable *tbl,
+                                 Eterm tid, Eterm pattern,
+                                 Eterm *ret,
+                                 DbTreeStack* stack,
+                                 CATreeRootIterator* iter)
+{
     struct select_delete_context sc;
     struct mp_info mpi;
     Eterm lastkey = THE_NON_VALUE;
@@ -1641,7 +1844,7 @@ static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
 	    erts_bin_free(mpi.mp);       	\
 	}					\
 	if (sc.erase_lastterm) {                \
-	    free_term(tb, sc.lastterm);         \
+	    free_term(tbl, sc.lastterm);         \
 	}                                       \
 	*ret = (Term); 				\
 	return RetVal; 			        \
@@ -1655,42 +1858,57 @@ static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
     sc.p = p;
     sc.max = 1000; 
     sc.end_condition = NIL;
-    sc.keypos = tb->common.keypos;
-    sc.tb = tb;
+    sc.keypos = tbl->common.keypos;
+    sc.tb = &tbl->common;
+    sc.stack = stack;
     
-    if ((errcode = analyze_pattern(tb, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
+    if ((errcode = analyze_pattern(&tbl->common, pattern, NULL, &mpi)) != DB_ERROR_NONE) {
 	RET_TO_BIF(0,errcode);
     }
 
-    if (!mpi.something_can_match) {
+    if (mpi.key_boundness == MS_KEY_IMPOSSIBLE) {
 	RET_TO_BIF(make_small(0),DB_ERROR_NONE);  
 	/* can't possibly match anything */
     }
 
     sc.mp = mpi.mp;
 
-    if (!mpi.got_partial && mpi.some_limitation && 
-	CMP_EQ(mpi.least,mpi.most)) {
-	doit_select_delete(tb,*(mpi.save_term),&sc, 0 /* direction doesn't
+    if (mpi.key_boundness == MS_KEY_BOUND) {
+        ASSERT(CMP_EQ(mpi.least, mpi.most));
+        if (iter)
+            sc.common.root = catree_find_root(mpi.least, iter);
+        else
+            sc.common.root = &tbl->tree.root;
+        this =  find_node(&tbl->common, *sc.common.root, mpi.least, NULL);
+        if (this)
+            doit_select_delete(&tbl->common, this, &sc.common, 0 /* direction doesn't
 						      matter */);
 	RET_TO_BIF(erts_make_integer(sc.accum,p),DB_ERROR_NONE);
     }
 
-    if (mpi.some_limitation) {
-	if ((this = find_next_from_pb_key(tb, &tb->static_stack, mpi.most)) != NULL) {
-	    lastkey = GETKEY(tb, this->dbterm.tpl);
-	}
+    if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+        this = find_next_from_pb_key(tbl, &sc.common.root, stack, mpi.most, iter);
+        if (this)
+            lastkey = GETKEY(&tbl->common, this->dbterm.tpl);
 	sc.end_condition = mpi.least;
     }
+    else {
+        ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+        if (iter)
+            sc.common.root = catree_find_last_root(iter);
+        else
+            sc.common.root = &tbl->tree.root;
+    }
 
-    traverse_backwards(tb, &tb->static_stack, lastkey, &doit_select_delete, &sc);
+    traverse_backwards(&tbl->common, stack, lastkey,
+                       &doit_select_delete, &sc.common, iter);
     BUMP_REDS(p, 1000 - sc.max);
 
     if (sc.max > 0) {
 	RET_TO_BIF(erts_make_integer(sc.accum,p), DB_ERROR_NONE);
     }
 
-    key = GETKEY(tb, (sc.lastterm)->dbterm.tpl);
+    key = GETKEY(&tbl->common, (sc.lastterm)->dbterm.tpl);
     sz = size_object(key);
     if (IS_USMALL(0, sc.accum)) {
 	hp = HAlloc(p, sz + ERTS_MAGIC_REF_THING_SIZE + 6);
@@ -1714,7 +1932,7 @@ static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
 
     /* Don't free mpi.mp, so don't use macro */
     if (sc.erase_lastterm) {
-	free_term(tb, sc.lastterm);
+	free_term(tbl, sc.lastterm);
     }
     *ret = bif_trap1(&ets_select_delete_continue_exp, p, continuation); 
     return DB_ERROR_NONE;
@@ -1723,12 +1941,21 @@ static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
 
 }
 
-static int db_select_replace_continue_tree(Process *p,
+static int db_select_delete_tree(Process *p, DbTable *tbl, Eterm tid,
+				 Eterm pattern, Eterm *ret)
+{
+    DbTableTree *tb = &tbl->tree;
+    return db_select_delete_tree_common(p, tbl, tid, pattern, ret,
+                                        &tb->static_stack, NULL);
+}
+
+int db_select_replace_continue_tree_common(Process *p,
                                            DbTable *tbl,
                                            Eterm continuation,
-                                           Eterm *ret)
+                                           Eterm *ret,
+                                           DbTableTree *stack_container,
+                                           CATreeRootIterator* iter)
 {
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     struct select_replace_context sc;
     unsigned sz;
@@ -1764,7 +1991,7 @@ static int db_select_replace_continue_tree(Process *p,
     sc.end_condition = NIL;
     sc.lastobj = NULL;
     sc.max = 1000;
-    sc.keypos = tb->common.keypos;
+    sc.keypos = tbl->common.keypos;
     if (is_big(tptr[5])) {
         sc.replaced = big_to_uint32(tptr[5]);
     } else {
@@ -1772,9 +1999,18 @@ static int db_select_replace_continue_tree(Process *p,
     }
     prev_replaced = sc.replaced;
 
-    stack = get_any_stack(tb);
-    traverse_update_backwards(tb, stack, lastkey, &doit_select_replace, &sc);
-    release_stack(tb,stack);
+    if (iter) {
+        iter->next_route_key = lastkey;
+        sc.common.root = catree_find_prev_root(iter, NULL);
+    }
+    else {
+        sc.common.root = &tbl->tree.root;
+    }
+
+    stack = get_any_stack(tbl, stack_container);
+    traverse_update_backwards(&tbl->common, stack, lastkey, &doit_select_replace,
+                              &sc.common, iter);
+    release_stack(tbl, stack_container,stack);
 
     // the more objects we've replaced, the more reductions we've consumed
     BUMP_REDS(p, MIN(2000, (1000 - sc.max) + (sc.replaced - prev_replaced)));
@@ -1782,7 +2018,7 @@ static int db_select_replace_continue_tree(Process *p,
     if (sc.max > 0) {
         RET_TO_BIF(erts_make_integer(sc.replaced,p), DB_ERROR_NONE);
     }
-    key = GETKEY(tb, sc.lastobj);
+    key = GETKEY(tbl, sc.lastobj);
     if (end_condition != NIL &&
             (cmp_partly_bound(end_condition,key) > 0)) {
         /* done anyway */
@@ -1813,10 +2049,20 @@ static int db_select_replace_continue_tree(Process *p,
 #undef RET_TO_BIF
 }
 
-static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
-                                  Eterm pattern, Eterm *ret)
+static int db_select_replace_continue_tree(Process *p,
+                                           DbTable *tbl,
+                                           Eterm continuation,
+                                           Eterm *ret)
+{
+    return db_select_replace_continue_tree_common(p, tbl, continuation, ret,
+                                                  &tbl->tree, NULL);
+}
+
+int db_select_replace_tree_common(Process *p, DbTable *tbl,
+                                  Eterm tid, Eterm pattern, Eterm *ret,
+                                  DbTableTree *stack_container,
+                                  CATreeRootIterator* iter)
 {
-    DbTableTree *tb = &tbl->tree;
     DbTreeStack* stack;
     struct select_replace_context sc;
     struct mp_info mpi;
@@ -1843,48 +2089,64 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
 
     sc.lastobj = NULL;
     sc.p = p;
-    sc.tb = tb;
+    sc.tb = &tbl->common;
     sc.max = 1000;
     sc.end_condition = NIL;
-    sc.keypos = tb->common.keypos;
+    sc.keypos = tbl->common.keypos;
     sc.replaced = 0;
 
-    if ((errcode = analyze_pattern(tb, pattern, db_match_keeps_key, &mpi)) != DB_ERROR_NONE) {
+    if ((errcode = analyze_pattern(&tbl->common, pattern, db_match_keeps_key, &mpi)) != DB_ERROR_NONE) {
         RET_TO_BIF(NIL,errcode);
     }
 
-    if (!mpi.something_can_match) {
+    if (mpi.key_boundness == MS_KEY_IMPOSSIBLE) {
         RET_TO_BIF(make_small(0),DB_ERROR_NONE);
         /* can't possibly match anything */
     }
 
     sc.mp = mpi.mp;
 
-    if (!mpi.got_partial && mpi.some_limitation &&
-            CMP_EQ(mpi.least,mpi.most)) {
-        doit_select_replace(tb,mpi.save_term,&sc,0 /* dummy */);
-        reset_static_stack(tb); /* may refer replaced term */
+    if (mpi.key_boundness == MS_KEY_BOUND) {
+        TreeDbTerm** pp;
+        ASSERT(CMP_EQ(mpi.least, mpi.most));
+        if (iter)
+            sc.common.root = catree_find_root(mpi.least, iter);
+        else
+            sc.common.root = &tbl->tree.root;
+        pp = find_node2(&tbl->common, sc.common.root, mpi.least);
+        if (pp) {
+            doit_select_replace(&tbl->common, pp, &sc.common, 0 /* dummy */);
+            reset_static_stack(stack_container); /* may refer replaced term */
+        }
         RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE);
     }
 
-    stack = get_any_stack(tb);
+    stack = get_any_stack(tbl,stack_container);
 
-    if (mpi.some_limitation) {
-        if ((this = find_next_from_pb_key(tb, stack, mpi.most)) != NULL) {
-            lastkey = GETKEY(tb, this->dbterm.tpl);
-        }
+    if (mpi.key_boundness == MS_KEY_PARTIALLY_BOUND) {
+        this = find_next_from_pb_key(tbl, &sc.common.root, stack, mpi.most, iter);
+        if (this)
+            lastkey = GETKEY(tbl, this->dbterm.tpl);
         sc.end_condition = mpi.least;
     }
+    else {
+        ASSERT(mpi.key_boundness == MS_KEY_UNBOUND);
+        if (iter)
+            sc.common.root = catree_find_last_root(iter);
+        else
+            sc.common.root = &tbl->tree.root;
+    }
 
-    traverse_update_backwards(tb, stack, lastkey, &doit_select_replace, &sc);
-    release_stack(tb,stack);
+    traverse_update_backwards(&tbl->common, stack, lastkey, &doit_select_replace,
+                              &sc.common, iter);
+    release_stack(tbl,stack_container,stack);
     // the more objects we've replaced, the more reductions we've consumed
     BUMP_REDS(p, MIN(2000, (1000 - sc.max) + sc.replaced));
     if (sc.max > 0) {
         RET_TO_BIF(erts_make_integer(sc.replaced,p),DB_ERROR_NONE);
     }
 
-    key = GETKEY(tb, sc.lastobj);
+    key = GETKEY(tbl, sc.lastobj);
     sz = size_object(key);
     if (IS_USMALL(0, sc.replaced)) {
         hp = HAlloc(p, sz + ERTS_MAGIC_REF_THING_SIZE + 6);
@@ -1914,52 +2176,72 @@ static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
 
 }
 
-static int db_take_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+static int db_select_replace_tree(Process *p, DbTable *tbl, Eterm tid,
+                                  Eterm pattern, Eterm *ret)
+{
+    return db_select_replace_tree_common(p, tbl, tid, pattern, ret,
+                                         &tbl->tree, NULL);
+}
+
+int db_take_tree_common(Process *p, DbTable *tbl, TreeDbTerm **root,
+                        Eterm key, Eterm *ret,
+                        DbTreeStack *stack /* NULL if no static stack */)
 {
-    DbTableTree *tb = &tbl->tree;
     TreeDbTerm *this;
 
     *ret = NIL;
-    this = linkout_tree(tb, key);
+    this = linkout_tree(&tbl->common, root, key, stack);
     if (this) {
         Eterm copy, *hp, *hend;
 
         hp = HAlloc(p, this->dbterm.size + 2);
         hend = hp + this->dbterm.size + 2;
-        copy = db_copy_object_from_ets(&tb->common,
+        copy = db_copy_object_from_ets(&tbl->common,
                                        &this->dbterm, &hp, &MSO(p));
         *ret = CONS(hp, copy, NIL);
         hp += 2;
         HRelease(p, hend, hp);
-        free_term(tb, this);
+        free_term(tbl, this);
     }
     return DB_ERROR_NONE;
 }
 
+static int db_take_tree(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
+{
+    DbTableTree *tb = &tbl->tree;
+    return db_take_tree_common(p, tbl, &tb->root,
+                               key, ret, &tb->static_stack);
+}
+
 /*
 ** Other interface routines (not directly coupled to one bif)
 */
 
-
-/* Display tree contents (for dump) */
-static void db_print_tree(fmtfn_t to, void *to_arg,
-			  int show,
-			  DbTable *tbl)
+void db_print_tree_common(fmtfn_t to, void *to_arg,
+                          int show, TreeDbTerm *root, DbTable *tbl)
 {
-    DbTableTree *tb = &tbl->tree;
 #ifdef TREE_DEBUG
     if (show)
 	erts_print(to, to_arg, "\nTree data dump:\n"
 		   "------------------------------------------------\n");
-    do_dump_tree2(&tbl->tree, to, to_arg, show, tb->root, 0);
+    do_dump_tree2(&tbl->common, to, to_arg, show, root, 0);
     if (show)
 	erts_print(to, to_arg, "\n"
 		   "------------------------------------------------\n");
 #else
-    erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", NITEMS(tb));
+    erts_print(to, to_arg, "Ordered set (AVL tree), Elements: %d\n", NITEMS(tbl));
 #endif
 }
 
+/* Display tree contents (for dump) */
+static void db_print_tree(fmtfn_t to, void *to_arg,
+			  int show,
+			  DbTable *tbl)
+{
+    DbTableTree *tb = &tbl->tree;
+    db_print_tree_common(to, to_arg, show, tb->root, tbl);
+}
+
 /* release all memory occupied by a single table */
 static int db_free_empty_table_tree(DbTable *tbl)
 {
@@ -1984,8 +2266,10 @@ static SWord db_free_table_continue_tree(DbTable *tbl, SWord reds)
 		     (DbTable *) tb,
 		     (void *) tb->static_stack.array,
 		     sizeof(TreeDbTerm *) * STACK_NEED);
-	ASSERT(erts_atomic_read_nob(&tb->common.memory_size)
-	       == sizeof(DbTable));
+	ASSERT((erts_atomic_read_nob(&tb->common.memory_size)
+	       == sizeof(DbTable)) ||
+               (erts_atomic_read_nob(&tb->common.memory_size)
+                == (sizeof(DbTable) + sizeof(DbFixation))));
     }
     return reds;
 }
@@ -2004,11 +2288,18 @@ static void do_db_tree_foreach_offheap(TreeDbTerm *,
 				       void (*)(ErlOffHeap *, void *),
 				       void *);
 
+void db_foreach_offheap_tree_common(TreeDbTerm *root,
+                                    void (*func)(ErlOffHeap *, void *),
+                                    void * arg)
+{
+    do_db_tree_foreach_offheap(root, func, arg);
+}
+
 static void db_foreach_offheap_tree(DbTable *tbl,
 				    void (*func)(ErlOffHeap *, void *),
 				    void * arg)
 {
-    do_db_tree_foreach_offheap(tbl->tree.root, func, arg);
+    db_foreach_offheap_tree_common(tbl->tree.root, func, arg);
 }
 
 
@@ -2033,13 +2324,14 @@ do_db_tree_foreach_offheap(TreeDbTerm *tdbt,
     do_db_tree_foreach_offheap(tdbt->right, func, arg);
 }
 
-static TreeDbTerm *linkout_tree(DbTableTree *tb, Eterm key) {
+static TreeDbTerm *linkout_tree(DbTableCommon *tb, TreeDbTerm **root,
+                                Eterm key, DbTreeStack *stack) {
     TreeDbTerm **tstack[STACK_NEED];
     int tpos = 0;
     int dstack[STACK_NEED+1];
     int dpos = 0;
     int state = 0;
-    TreeDbTerm **this = &tb->root;
+    TreeDbTerm **this = root;
     Sint c;
     int dir;
     TreeDbTerm *q = NULL;
@@ -2050,7 +2342,7 @@ static TreeDbTerm *linkout_tree(DbTableTree *tb, Eterm key) {
      * keep the balance. As in insert, we do the stacking ourselves.
      */
 
-    reset_static_stack(tb);
+    reset_stack(stack);
     dstack[dpos++] = DIR_END;
     for (;;) {
 	if (!*this) { /* Failure */
@@ -2076,30 +2368,30 @@ static TreeDbTerm *linkout_tree(DbTableTree *tb, Eterm key) {
 		tstack[tpos++] = this;
 		state = delsub(this);
 	    }
-	    erts_atomic_dec_nob(&tb->common.nitems);
+	    erts_atomic_dec_nob(&tb->nitems);
 	    break;
 	}
     }
     while (state && ( dir = dstack[--dpos] ) != DIR_END) {
 	this = tstack[--tpos];
 	if (dir == DIR_LEFT) {
-	    state = balance_left(this);
+	    state = tree_balance_left(this);
 	} else {
-	    state = balance_right(this);
+	    state = tree_balance_right(this);
 	}
     }
     return q;
 }
 
-static TreeDbTerm *linkout_object_tree(DbTableTree *tb, 
-				       Eterm object)
+static TreeDbTerm *linkout_object_tree(DbTableCommon *tb,  TreeDbTerm **root,
+				       Eterm object, DbTableTree *stack)
 {
     TreeDbTerm **tstack[STACK_NEED];
     int tpos = 0;
     int dstack[STACK_NEED+1];
     int dpos = 0;
     int state = 0;
-    TreeDbTerm **this = &tb->root;
+    TreeDbTerm **this = root;
     Sint c;
     int dir;
     TreeDbTerm *q = NULL;
@@ -2114,7 +2406,7 @@ static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
     
     key = GETKEY(tb, tuple_val(object));
 
-    reset_static_stack(tb);
+    reset_static_stack(stack);
     dstack[dpos++] = DIR_END;
     for (;;) {
 	if (!*this) { /* Failure */
@@ -2128,7 +2420,7 @@ static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
 	    tstack[tpos++] = this;
 	    this = &((*this)->right);
 	} else { /* Equal key, found the only possible matching object*/
-	    if (!db_eq(&tb->common,object,&(*this)->dbterm)) {
+	    if (!db_eq(tb,object,&(*this)->dbterm)) {
 		return NULL;
 	    }
 	    q = (*this);
@@ -2143,16 +2435,16 @@ static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
 		tstack[tpos++] = this;
 		state = delsub(this);
 	    }
-	    erts_atomic_dec_nob(&tb->common.nitems);
+	    erts_atomic_dec_nob(&tb->nitems);
 	    break;
 	}
     }
     while (state && ( dir = dstack[--dpos] ) != DIR_END) {
 	this = tstack[--tpos];
 	if (dir == DIR_LEFT) {
-	    state = balance_left(this);
+	    state = tree_balance_left(this);
 	} else {
-	    state = balance_right(this);
+	    state = tree_balance_right(this);
 	}
     }
     return q;
@@ -2162,7 +2454,7 @@ static TreeDbTerm *linkout_object_tree(DbTableTree *tb,
 ** For the select functions, analyzes the pattern and determines which
 ** part of the tree should be searched. Also compiles the match program
 */
-static int analyze_pattern(DbTableTree *tb, Eterm pattern,
+static int analyze_pattern(DbTableCommon *tb, Eterm pattern,
                            extra_match_validator_t extra_validator, /* Optional callback */
                            struct mp_info *mpi)
 {
@@ -2173,17 +2465,12 @@ static int analyze_pattern(DbTableTree *tb, Eterm pattern,
     Eterm *ptpl;
     int i;
     int num_heads = 0;
-    Eterm key;
-    Eterm partly_bound;
-    int res;
-    Eterm least = 0;
-    Eterm most = 0;
+    Eterm least = THE_NON_VALUE;
+    Eterm most = THE_NON_VALUE;
+    enum ms_key_boundness boundness;
 
-    mpi->some_limitation = 1;
-    mpi->got_partial = 0;
-    mpi->something_can_match = 0;
+    mpi->key_boundness = MS_KEY_IMPOSSIBLE;
     mpi->mp = NULL;
-    mpi->save_term = NULL;
 
     for (lst = pattern; is_list(lst); lst = CDR(list_val(lst)))
 	++num_heads;
@@ -2204,6 +2491,7 @@ static int analyze_pattern(DbTableTree *tb, Eterm pattern,
         Eterm match;
         Eterm guard;
         Eterm body;
+        Eterm key;
 
 	ttpl = CAR(list_val(lst));
 	if (!is_tuple(ttpl)) {
@@ -2223,7 +2511,7 @@ static int analyze_pattern(DbTableTree *tb, Eterm pattern,
 	guards[i] = guard = ptpl[2];
 	bodies[i] = body = ptpl[3];
 
-        if(extra_validator != NULL && !extra_validator(tb->common.keypos, match, guard, body)) {
+        if(extra_validator != NULL && !extra_validator(tb->keypos, match, guard, body)) {
 	    if (buff != sbuff) {
 		erts_free(ERTS_ALC_T_DB_TMP, buff);
 	    }
@@ -2235,30 +2523,29 @@ static int analyze_pattern(DbTableTree *tb, Eterm pattern,
 	}
 	++i;
 
-	partly_bound = NIL;
-	res = key_given(tb, tpl, &(mpi->save_term), &partly_bound);
-	if ( res >= 0 ) {   /* Can match something */
-	    key = 0;
-	    mpi->something_can_match = 1;
-	    if (res > 0) {
-		key = GETKEY(tb,tuple_val(tpl)); 
-	    } else if (partly_bound != NIL) {
-		mpi->got_partial = 1;
-		key = partly_bound;
-	    } else {
-		mpi->some_limitation = 0;
-	    }
-	    if (key != 0) {
-		if (least == 0 || 
-		    partly_bound_can_match_lesser(key,least)) {
-		    least = key;
-		}
-		if (most == 0 || 
-		    partly_bound_can_match_greater(key,most)) {
-		    most = key;
-		}
-	    }
-	}
+        boundness = key_boundness(tb, tpl, &key);
+	switch (boundness)
+        {
+        case MS_KEY_BOUND:
+        case MS_KEY_PARTIALLY_BOUND:
+            if (is_non_value(least) || partly_bound_can_match_lesser(key,least)) {
+                least = key;
+            }
+            if (is_non_value(most) || partly_bound_can_match_greater(key,most)) {
+                most = key;
+            }
+            break;
+        case MS_KEY_IMPOSSIBLE:
+        case MS_KEY_UNBOUND:
+            break;
+        }
+        if (mpi->key_boundness > boundness)
+            mpi->key_boundness = boundness;
+    }
+
+    if (mpi->key_boundness == MS_KEY_BOUND && !CMP_EQ(least, most)) {
+        /* Several different bound keys */
+        mpi->key_boundness = MS_KEY_PARTIALLY_BOUND;
     }
     mpi->least = least;
     mpi->most = most;
@@ -2300,7 +2587,7 @@ static SWord do_free_tree_continue(DbTableTree *tb, SWord reds)
 		PUSH_NODE(&tb->static_stack, root);
 		root = p;
 	    } else {
-		free_term(tb, root);
+		free_term((DbTable*)tb, root);
 		if (--reds < 0) {
                     return reds;   /* Done enough for now */
                 }
@@ -2314,7 +2601,7 @@ static SWord do_free_tree_continue(DbTableTree *tb, SWord reds)
 /*
  * Deletion helpers
  */
-static int balance_left(TreeDbTerm **this) 
+int tree_balance_left(TreeDbTerm **this) 
 {
     TreeDbTerm *p, *p1, *p2;
     int b1, b2, h = 1;
@@ -2359,7 +2646,7 @@ static int balance_left(TreeDbTerm **this)
     return h;
 }
 
-static int balance_right(TreeDbTerm **this) 
+int tree_balance_right(TreeDbTerm **this) 
 {
     TreeDbTerm *p, *p1, *p2;
     int b1, b2, h = 1;
@@ -2431,7 +2718,7 @@ static int delsub(TreeDbTerm **this)
     h = 1;
     while (tpos && h) {
 	r = tstack[--tpos];
-	h = balance_right(r);
+	h = tree_balance_right(r);
     }
     return h;
 }
@@ -2440,11 +2727,29 @@ static int delsub(TreeDbTerm **this)
  * Helper for db_slot
  */
 
-static TreeDbTerm *slot_search(Process *p, DbTableTree *tb, Sint slot)
+static TreeDbTerm *slot_search(Process *p, TreeDbTerm *root,
+                               Sint slot, DbTable *tb,
+                               DbTableTree *stack_container,
+                               CATreeRootIterator *iter)
 {
     TreeDbTerm *this;
     TreeDbTerm *tmp;
-    DbTreeStack* stack = get_any_stack(tb);
+    TreeDbTerm *lastobj;
+    Eterm lastkey;
+    TreeDbTerm **pp;
+    DbTreeStack* stack;
+
+    if (iter) {
+        /* Find first non-empty tree */
+        while (!root) {
+            TreeDbTerm** pp = catree_find_next_root(iter, NULL);
+            if (!pp)
+                return NULL;
+            root = *pp;
+        }
+    }
+
+    stack = get_any_stack(tb,stack_container);
     ASSERT(stack != NULL);
 
     if (slot == 1) { /* Don't search from where we are if we are 
@@ -2456,57 +2761,84 @@ static TreeDbTerm *slot_search(Process *p, DbTableTree *tb, Sint slot)
 				are not recorded */
 	stack->pos = 0;
     }
-    if (EMPTY_NODE(stack)) {
-	this = tb->root;
-	if (this == NULL)
-	    goto done;
-	while (this->left != NULL){
-	    PUSH_NODE(stack, this);
-	    this = this->left;
-	}
-	PUSH_NODE(stack, this);
-	stack->slot = 1;
-    }
-    this = TOP_NODE(stack);
-    while (stack->slot != slot && this != NULL) {
-	if (slot > stack->slot) {
-	    if (this->right != NULL) {
-		this = this->right;
-		while (this->left != NULL) {
-		    PUSH_NODE(stack, this);
-		    this = this->left;
-		}
-		PUSH_NODE(stack, this);
-	    } else {
-		for (;;) {
-		    tmp = POP_NODE(stack);
-		    this = TOP_NODE(stack);
-		    if (this == NULL || this->left == tmp)
-			break;
-		}
-	    }		
-	    ++(stack->slot);
-	} else {
-	    if (this->left != NULL) {
-		this = this->left;
-		while (this->right != NULL) {
-		    PUSH_NODE(stack, this);
-		    this = this->right;
-		}
-		PUSH_NODE(stack, this);
-	    } else {
-		for (;;) {
-		    tmp = POP_NODE(stack);
-		    this = TOP_NODE(stack);
-		    if (this == NULL || this->right == tmp)
-			break;
-		}
-	    }		
-	    --(stack->slot);
-	}
+    while (1) {
+        if (EMPTY_NODE(stack)) {
+            this = root;
+            if (this == NULL)
+                goto next_root;
+            while (this->left != NULL){
+                PUSH_NODE(stack, this);
+                this = this->left;
+            }
+            PUSH_NODE(stack, this);
+            stack->slot++;
+        }
+        this = TOP_NODE(stack);
+        while (stack->slot != slot) {
+            ASSERT(this);
+            lastobj = this;
+            if (slot > stack->slot) {
+                if (this->right != NULL) {
+                    this = this->right;
+                    while (this->left != NULL) {
+                        PUSH_NODE(stack, this);
+                        this = this->left;
+                    }
+                    PUSH_NODE(stack, this);
+                } else {
+                    for (;;) {
+                        tmp = POP_NODE(stack);
+                        this = TOP_NODE(stack);
+                        if (!this)
+                            goto next_root;
+                        if (this->left == tmp)
+                            break;
+                    }
+                }
+                ++(stack->slot);
+            } else {
+                if (this->left != NULL) {
+                    this = this->left;
+                    while (this->right != NULL) {
+                        PUSH_NODE(stack, this);
+                        this = this->right;
+                    }
+                    PUSH_NODE(stack, this);
+                } else {
+                    for (;;) {
+                        tmp = POP_NODE(stack);
+                        this = TOP_NODE(stack);
+                        if (!this)
+                            goto next_root;
+                        if (this->right == tmp)
+                            break;
+                    }
+                }
+                --(stack->slot);
+            }
+        }
+         /* Found slot */
+        ASSERT(this);
+        break;
+
+next_root:
+        if (!iter)
+            break; /* EOT */
+
+        ASSERT(slot > stack->slot);
+        if (lastobj) {
+            lastkey = GETKEY(tb, lastobj->dbterm.tpl);
+            lastobj = NULL;
+        }
+        pp = catree_find_next_root(iter, &lastkey);
+        if (!pp)
+            break; /* EOT */
+        root = *pp;
+        stack->pos = 0;
+        find_next(&tb->common, root, stack, lastkey);
     }
-done:
-    release_stack(tb,stack);
+
+    release_stack(tb,stack_container,stack);
     return this;
 }
 
@@ -2514,7 +2846,8 @@ done:
  * Find next and previous in sort order
  */
 
-static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
+static TreeDbTerm *find_next(DbTableCommon *tb, TreeDbTerm *root,
+                             DbTreeStack* stack, Eterm key) {
     TreeDbTerm *this;
     TreeDbTerm *tmp;
     Sint c;
@@ -2526,7 +2859,7 @@ static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 	}
     }
     if (EMPTY_NODE(stack)) { /* Have to rebuild the stack */
-	if (( this = tb->root ) == NULL)
+	if (( this = root ) == NULL)
 	    return NULL;
 	for (;;) {
 	    PUSH_NODE(stack, this);
@@ -2539,7 +2872,7 @@ static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 		    this = this->right;
 	    } else if (c < 0) {
 		if (this->left == NULL) /* Done */
-		    return this;
+                    goto found_next;
 		else
 		    this = this->left;
 	    } else
@@ -2554,8 +2887,6 @@ static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 	    this = this->left;
 	    PUSH_NODE(stack, this);
 	}
-	if (stack->slot > 0) 
-	    ++(stack->slot);
     } else {
 	do {
 	    tmp = POP_NODE(stack);
@@ -2564,13 +2895,17 @@ static TreeDbTerm *find_next(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 		return NULL;
 	    }
 	} while (this->right == tmp);
-	if (stack->slot > 0) 
-	    ++(stack->slot);
     }
+
+found_next:
+    if (stack->slot > 0)
+        ++(stack->slot);
+
     return this;
 }
 
-static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
+static TreeDbTerm *find_prev(DbTableCommon *tb, TreeDbTerm *root,
+                             DbTreeStack* stack, Eterm key) {
     TreeDbTerm *this;
     TreeDbTerm *tmp;
     Sint c;
@@ -2582,7 +2917,7 @@ static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 	}
     }
     if (EMPTY_NODE(stack)) { /* Have to rebuild the stack */
-	if (( this = tb->root ) == NULL)
+	if (( this = root ) == NULL)
 	    return NULL;
 	for (;;) {
 	    PUSH_NODE(stack, this);
@@ -2595,7 +2930,7 @@ static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 		    this = this->left;
 	    } else if (c > 0) {
 		if (this->right == NULL) /* Done */
-		    return this;
+                    goto found_prev;
 		else
 		    this = this->right;
 	    } else
@@ -2610,8 +2945,6 @@ static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 	    this = this->right;
 	    PUSH_NODE(stack, this);
 	}
-	if (stack->slot > 0) 
-	    --(stack->slot);
     } else {
 	do {
 	    tmp = POP_NODE(stack);
@@ -2620,74 +2953,112 @@ static TreeDbTerm *find_prev(DbTableTree *tb, DbTreeStack* stack, Eterm key) {
 		return NULL;
 	    }
 	} while (this->left == tmp);
-	if (stack->slot > 0) 
-	    --(stack->slot);
     }
+
+found_prev:
+    if (stack->slot > 0)
+        --(stack->slot);
+
     return this;
 }
 
-static TreeDbTerm *find_next_from_pb_key(DbTableTree *tb, DbTreeStack* stack,
-					 Eterm key)
+
+/* @brief Find object with smallest key of all larger than partially bound key.
+ * Can be used as a starting point for a reverse iteration with pb_key.
+ *
+ * @param pb_key The partially bound key. Example {42, '$1'}
+ * @param *rootpp Will return pointer to root pointer of tree with found object.
+ * @param iter Root iterator or NULL for plain DbTableTree.
+ * @param stack A stack to use. Will be cleared.
+ *
+ * @return found object or NULL if no such key exists.
+ */
+static TreeDbTerm *find_next_from_pb_key(DbTable *tbl,  TreeDbTerm*** rootpp,
+                                         DbTreeStack* stack, Eterm pb_key,
+                                         CATreeRootIterator* iter)
 {
+    TreeDbTerm* root;
     TreeDbTerm *this;
-    TreeDbTerm *tmp;
+    Uint candidate = 0;
     Sint c;
 
+    if (iter) {
+        *rootpp = catree_find_next_from_pb_key_root(pb_key, iter);
+        ASSERT(*rootpp);
+        root = **rootpp;
+    }
+    else {
+        *rootpp = &tbl->tree.root;
+        root = tbl->tree.root;
+    }
+
     /* spool the stack, we have to "re-search" */
     stack->pos = stack->slot = 0;
-    if (( this = tb->root ) == NULL)
+    if (( this = root ) == NULL)
 	return NULL;
     for (;;) {
 	PUSH_NODE(stack, this);
-	if (( c = cmp_partly_bound(key,GETKEY(tb, this->dbterm.tpl))) >= 0) {
+	if (( c = cmp_partly_bound(pb_key,GETKEY(tbl, this->dbterm.tpl))) >= 0) {
 	    if (this->right == NULL) {
-		do {
-		    tmp = POP_NODE(stack);
-		    if (( this = TOP_NODE(stack)) == NULL) {
-			return NULL;
-		    }
-		} while (this->right == tmp);
-		return this;
-	    } else
-		this = this->right;
+                stack->pos = candidate;
+                return TOP_NODE(stack);
+	    }
+            this = this->right;
 	} else /*if (c < 0)*/ {
 	    if (this->left == NULL) /* Done */
 		return this;
-	    else
-		this = this->left;
+            candidate = stack->pos;
+            this = this->left;
 	} 
     }
 }
 
-static TreeDbTerm *find_prev_from_pb_key(DbTableTree *tb, DbTreeStack* stack,
-					 Eterm key)
+/* @brief Find object with largest key of all smaller than partially bound key.
+ * Can be used as a starting point for a forward iteration with pb_key.
+ *
+ * @param pb_key The partially bound key. Example {42, '$1'}
+ * @param *rootpp Will return pointer to root pointer of found object.
+ * @param iter Root iterator or NULL for plain DbTableTree.
+ * @param stack A stack to use. Will be cleared.
+ *
+ * @return found object or NULL if no such key exists.
+ */
+static TreeDbTerm *find_prev_from_pb_key(DbTable *tbl, TreeDbTerm*** rootpp,
+                                         DbTreeStack* stack, Eterm pb_key,
+                                         CATreeRootIterator* iter)
 {
+    TreeDbTerm* root;
     TreeDbTerm *this;
-    TreeDbTerm *tmp;
+    Uint candidate = 0;
     Sint c;
 
+    if (iter) {
+        *rootpp = catree_find_prev_from_pb_key_root(pb_key, iter);
+        ASSERT(*rootpp);
+        root = **rootpp;
+    }
+    else {
+        *rootpp = &tbl->tree.root;
+        root = tbl->tree.root;
+    }
+
     /* spool the stack, we have to "re-search" */
     stack->pos = stack->slot = 0;
-    if (( this = tb->root ) == NULL)
+    if (( this = root ) == NULL)
 	return NULL;
     for (;;) {
 	PUSH_NODE(stack, this);
-	if (( c = cmp_partly_bound(key,GETKEY(tb, this->dbterm.tpl))) <= 0) {
+	if (( c = cmp_partly_bound(pb_key,GETKEY(tbl, this->dbterm.tpl))) <= 0) {
 	    if (this->left == NULL) {
-		do {
-		    tmp = POP_NODE(stack);
-		    if (( this = TOP_NODE(stack)) == NULL) {
-			return NULL;
-		    }
-		} while (this->left == tmp);
-		return this;
-	    } else
-		this = this->left;
-	} else /*if (c < 0)*/ {
+                stack->pos = candidate;
+                return TOP_NODE(stack);
+	    }
+            this = this->left;
+	} else /*if (c > 0)*/ {
 	    if (this->right == NULL) /* Done */
 		return this;
-	    else
-		this = this->right;
+            candidate = stack->pos;
+            this = this->right;
 	} 
     }
 }
@@ -2696,16 +3067,17 @@ static TreeDbTerm *find_prev_from_pb_key(DbTableTree *tb, DbTreeStack* stack,
 /*
  * Just lookup a node
  */
-static TreeDbTerm *find_node(DbTableTree *tb, Eterm key)
+static TreeDbTerm *find_node(DbTableCommon *tb, TreeDbTerm *root,
+                             Eterm key, DbTableTree *stack_container)
 {
     TreeDbTerm *this;
     Sint res;
-    DbTreeStack* stack = get_static_stack(tb);
+    DbTreeStack* stack = get_static_stack(stack_container);
 
     if(!stack || EMPTY_NODE(stack)
        || !cmp_key_eq(tb, key, (this=TOP_NODE(stack)))) {
 
-	this = tb->root;
+	this = root;
 	while (this != NULL && (res = cmp_key(tb,key,this)) != 0) {
 	    if (res < 0)
 		this = this->left;
@@ -2714,7 +3086,7 @@ static TreeDbTerm *find_node(DbTableTree *tb, Eterm key)
 	}
     }
     if (stack) {
-	release_stack(tb,stack);
+	release_stack((DbTable*)tb,stack_container,stack);
     }
     return this;
 }
@@ -2722,12 +3094,12 @@ static TreeDbTerm *find_node(DbTableTree *tb, Eterm key)
 /*
  * Lookup a node and return the address of the node pointer in the tree
  */
-static TreeDbTerm **find_node2(DbTableTree *tb, Eterm key)
+static TreeDbTerm **find_node2(DbTableCommon *tb, TreeDbTerm **root, Eterm key)
 {
     TreeDbTerm **this;
     Sint res;
 
-    this = &tb->root;
+    this = root;
     while ((*this) != NULL && (res = cmp_key(tb, key, *this)) != 0) {
 	if (res < 0)
 	    this = &((*this)->left);
@@ -2744,7 +3116,8 @@ static TreeDbTerm **find_node2(DbTableTree *tb, Eterm key)
  * Tries to reuse the existing stack for performance.
  */
 
-static TreeDbTerm **find_ptr(DbTableTree *tb, DbTreeStack *stack, TreeDbTerm *this) {
+static TreeDbTerm **find_ptr(DbTableCommon *tb, TreeDbTerm **root,
+                             DbTreeStack *stack, TreeDbTerm *this) {
     Eterm key = GETKEY(tb, this->dbterm.tpl);
     TreeDbTerm *tmp;
     TreeDbTerm *parent;
@@ -2757,7 +3130,7 @@ static TreeDbTerm **find_ptr(DbTableTree *tb, DbTreeStack *stack, TreeDbTerm *th
 	}
     }
     if (EMPTY_NODE(stack)) { /* Have to rebuild the stack */
-	if (( tmp = tb->root ) == NULL)
+	if (( tmp = *root ) == NULL)
 	    return NULL;
 	for (;;) {
 	    PUSH_NODE(stack, tmp);
@@ -2783,7 +3156,7 @@ static TreeDbTerm **find_ptr(DbTableTree *tb, DbTreeStack *stack, TreeDbTerm *th
 
     parent = TOPN_NODE(stack, 1);
     if (parent == NULL)
-        return ((this != tb->root) ? NULL : &(tb->root));
+        return ((this != *root) ? NULL : root);
     if (parent->left == this)
         return &(parent->left);
     if (parent->right == this)
@@ -2791,12 +3164,11 @@ static TreeDbTerm **find_ptr(DbTableTree *tb, DbTreeStack *stack, TreeDbTerm *th
     return NULL;
 }
 
-static int
-db_lookup_dbterm_tree(Process *p, DbTable *tbl, Eterm key, Eterm obj,
-                      DbUpdateHandle* handle)
+int db_lookup_dbterm_tree_common(Process *p, DbTable *tbl, TreeDbTerm **root,
+                                 Eterm key, Eterm obj, DbUpdateHandle* handle,
+                                 DbTableTree *stack_container)
 {
-    DbTableTree *tb = &tbl->tree;
-    TreeDbTerm **pp = find_node2(tb, key);
+    TreeDbTerm **pp = find_node2(&tbl->common, root, key);
     int flags = 0;
 
     if (pp == NULL) {
@@ -2807,18 +3179,19 @@ db_lookup_dbterm_tree(Process *p, DbTable *tbl, Eterm key, Eterm obj,
             int arity = arityval(*objp);
             Eterm *htop, *hend;
 
-            ASSERT(arity >= tb->common.keypos);
+            ASSERT(arity >= tbl->common.keypos);
             htop = HAlloc(p, arity + 1);
             hend = htop + arity + 1;
             sys_memcpy(htop, objp, sizeof(Eterm) * (arity + 1));
-            htop[tb->common.keypos] = key;
+            htop[tbl->common.keypos] = key;
             obj = make_tuple(htop);
 
-            if (db_put_tree(tbl, obj, 1) != DB_ERROR_NONE) {
+            if (db_put_tree_common(&tbl->common, root,
+                                   obj, 1, stack_container) != DB_ERROR_NONE) {
                 return 0;
             }
 
-            pp = find_node2(tb, key);
+            pp = find_node2(&tbl->common, root, key);
             ASSERT(pp != NULL);
             HRelease(p, hend, htop);
             flags |= DB_NEW_OBJECT;
@@ -2833,21 +3206,28 @@ db_lookup_dbterm_tree(Process *p, DbTable *tbl, Eterm key, Eterm obj,
     return 1;
 }
 
-static void
-db_finalize_dbterm_tree(int cret, DbUpdateHandle *handle)
+static int
+db_lookup_dbterm_tree(Process *p, DbTable *tbl, Eterm key, Eterm obj,
+                      DbUpdateHandle* handle)
 {
-    DbTable *tbl = handle->tb;
     DbTableTree *tb = &tbl->tree;
+    return db_lookup_dbterm_tree_common(p, tbl, &tb->root, key, obj, handle, tb);
+}
+
+void db_finalize_dbterm_tree_common(int cret, DbUpdateHandle *handle,
+                                    DbTableTree *stack_container)
+{
+    DbTable *tbl = handle->tb;
     TreeDbTerm *bp = (TreeDbTerm *) *handle->bp;
 
     if (handle->flags & DB_NEW_OBJECT && cret != DB_ERROR_NONE) {
         Eterm ret;
-        db_erase_tree(tbl, GETKEY(tb, bp->dbterm.tpl), &ret);
+        db_erase_tree(tbl, GETKEY(&tbl->common, bp->dbterm.tpl), &ret);
     } else if (handle->flags & DB_MUST_RESIZE) {
 	db_finalize_resize(handle, offsetof(TreeDbTerm,dbterm));
-        reset_static_stack(tb);
+        reset_static_stack(stack_container);
 
-        free_term(tb, bp);
+        free_term(tbl, bp);
     }
 #ifdef DEBUG
     handle->dbterm = 0;
@@ -2855,156 +3235,207 @@ db_finalize_dbterm_tree(int cret, DbUpdateHandle *handle)
     return;
 }   
 
+static void
+db_finalize_dbterm_tree(int cret, DbUpdateHandle *handle)
+{
+    DbTable *tbl = handle->tb;
+    DbTableTree *tb = &tbl->tree;
+    db_finalize_dbterm_tree_common(cret, handle, tb);
+}
+
 /*
  * Traverse the tree with a callback function, used by db_match_xxx
  */
-static void traverse_backwards(DbTableTree *tb,
+static void traverse_backwards(DbTableCommon *tb,
 			       DbTreeStack* stack,
 			       Eterm lastkey,
-			       int (*doit)(DbTableTree *,
-					   TreeDbTerm *,
-					   void *,
-					   int),
-			       void *context) 
+                               traverse_doit_funcT* doit,
+                               struct select_common *context,
+                               CATreeRootIterator* iter)
 {
     TreeDbTerm *this, *next;
+    TreeDbTerm** root = context->root;
 
     if (lastkey == THE_NON_VALUE) {
-	stack->pos = stack->slot = 0;
-	if (( this = tb->root ) == NULL) {
-	    return;
-	}
-	while (this != NULL) {
-	    PUSH_NODE(stack, this);
-	    this = this->right;
-	}
-	this = TOP_NODE(stack);
-	next = find_prev(tb, stack, GETKEY(tb, this->dbterm.tpl));
-	if (!((*doit)(tb, this, context, 0)))
-	    return;
+        if (iter) {
+            while (*root == NULL) {
+                root = catree_find_prev_root(iter, NULL);
+                if (!root)
+                    return;
+            }
+            context->root = root;
+        }
+        stack->pos = stack->slot = 0;
+        next = *root;
+        while (next != NULL) {
+            PUSH_NODE(stack, next);
+            next = next->right;
+        }
+        next = TOP_NODE(stack);
     } else {
-	next = find_prev(tb, stack, lastkey);
+        next = find_prev(tb, *root, stack, lastkey);
     }
 
-    while ((this = next) != NULL) {
-	next = find_prev(tb, stack, GETKEY(tb, this->dbterm.tpl));
-	if (!((*doit)(tb, this, context, 0)))
-	    return;
+    while (1) {
+        while (next) {
+            this = next;
+            lastkey = GETKEY(tb, this->dbterm.tpl);
+            next = find_prev(tb, *root, stack, lastkey);
+            if (!((*doit)(tb, this, context, 0)))
+                return;
+        }
+
+        if (!iter)
+            return;
+        ASSERT(is_value(lastkey));
+        root = catree_find_prev_root(iter, &lastkey);
+        if (!root)
+            return;
+        context->root = root;
+        stack->pos = stack->slot = 0;
+        next = find_prev(tb, *root, stack, lastkey);
     }
 }
 
 /*
  * Traverse the tree with a callback function, used by db_match_xxx
  */
-static void traverse_forward(DbTableTree *tb,
+static void traverse_forward(DbTableCommon *tb,
 			     DbTreeStack* stack,
 			     Eterm lastkey,
-			     int (*doit)(DbTableTree *,
-					 TreeDbTerm *,
-					 void *,
-					 int),
-			     void *context) 
+                             traverse_doit_funcT* doit,
+                             struct select_common *context,
+                             CATreeRootIterator* iter)
 {
     TreeDbTerm *this, *next;
+    TreeDbTerm **root = context->root;
 
     if (lastkey == THE_NON_VALUE) {
-	stack->pos = stack->slot = 0;
-	if (( this = tb->root ) == NULL) {
-	    return;
-	}
-	while (this != NULL) {
-	    PUSH_NODE(stack, this);
-	    this = this->left;
-	}
-	this = TOP_NODE(stack);
-	next = find_next(tb, stack, GETKEY(tb, this->dbterm.tpl));
-	if (!((*doit)(tb, this, context, 1)))
-	    return;
+        if (iter) {
+            while (*root == NULL) {
+                root = catree_find_next_root(iter, NULL);
+                if (!root)
+                    return;
+            }
+            context->root = root;
+        }
+        stack->pos = stack->slot = 0;
+        next = *root;
+        while (next != NULL) {
+            PUSH_NODE(stack, next);
+            next = next->left;
+        }
+        next = TOP_NODE(stack);
     } else {
-	next = find_next(tb, stack, lastkey);
+        next = find_next(tb, *root, stack, lastkey);
     }
 
-    while ((this = next) != NULL) {
-	next = find_next(tb, stack, GETKEY(tb, this->dbterm.tpl));
-	if (!((*doit)(tb, this, context, 1)))
-	    return;
+    while (1) {
+        while (next) {
+            this = next;
+            lastkey = GETKEY(tb, this->dbterm.tpl);
+            next = find_next(tb, *root, stack, lastkey);
+            if (!((*doit)(tb, this, context, 1)))
+                return;
+        }
+
+        if (!iter)
+            return;
+        ASSERT(is_value(lastkey));
+        root = catree_find_next_root(iter, &lastkey);
+        if (!root)
+            return;
+        context->root = root;
+        stack->pos = stack->slot = 0;
+        next = find_next(tb, *root, stack, lastkey);
     }
 }
 
 /*
  * Traverse the tree with an update callback function, used by db_select_replace
  */
-static void traverse_update_backwards(DbTableTree *tb,
+static void traverse_update_backwards(DbTableCommon *tb,
                                       DbTreeStack* stack,
                                       Eterm lastkey,
-                                      int (*doit)(DbTableTree*,
+                                      int (*doit)(DbTableCommon*,
                                                   TreeDbTerm**,
-                                                  void*,
+                                                  struct select_common*,
                                                   int),
-                                      void* context)
+                                      struct select_common* context,
+                                      CATreeRootIterator* iter)
 {
     int res;
     TreeDbTerm *this, *next, **this_ptr;
+    TreeDbTerm** root = context->root;
 
     if (lastkey == THE_NON_VALUE) {
-        stack->pos = stack->slot = 0;
-        if (( this = tb->root ) == NULL) {
-            return;
+        if (iter) {
+            while (*root == NULL) {
+                root = catree_find_prev_root(iter, NULL);
+                if (!root)
+                    return;
+                context->root = root;
+            }
         }
-        while (this != NULL) {
-            PUSH_NODE(stack, this);
-            this = this->right;
+        stack->pos = stack->slot = 0;
+        next = *root;
+        while (next) {
+            PUSH_NODE(stack, next);
+            next = next->right;
         }
-        this = TOP_NODE(stack);
-        this_ptr = find_ptr(tb, stack, this);
-        ASSERT(this_ptr != NULL);
-        res = (*doit)(tb, this_ptr, context, 0);
-        REPLACE_TOP_NODE(stack, *this_ptr);
-        next = find_prev(tb, stack, GETKEY(tb, (*this_ptr)->dbterm.tpl));
-        if (!res)
-            return;
-    } else {
-        next = find_prev(tb, stack, lastkey);
+        next = TOP_NODE(stack);
     }
+    else
+        next = find_prev(tb, *root, stack, lastkey);
+
+
+    while (1) {
+        while (next) {
+            this = next;
+            this_ptr = find_ptr(tb, root, stack, this);
+            ASSERT(this_ptr != NULL);
+            res = (*doit)(tb, this_ptr, context, 0);
+            this = *this_ptr;
+            REPLACE_TOP_NODE(stack, this);
+            if (!res)
+                return;
+            lastkey = GETKEY(tb, this->dbterm.tpl);
+            next = find_prev(tb, *root, stack, lastkey);
+        }
 
-    while ((this = next) != NULL) {
-        this_ptr = find_ptr(tb, stack, this);
-        ASSERT(this_ptr != NULL);
-        res = (*doit)(tb, this_ptr, context, 0);
-        REPLACE_TOP_NODE(stack, *this_ptr);
-        next = find_prev(tb, stack, GETKEY(tb, (*this_ptr)->dbterm.tpl));
-        if (!res)
+        if (!iter)
+            return;
+        ASSERT(is_value(lastkey));
+        root = catree_find_prev_root(iter, &lastkey);
+        if (!root)
             return;
+        context->root = root;
+        stack->pos = stack->slot = 0;
+        next = find_prev(tb, *root, stack, lastkey);
     }
 }
 
-/*
- * Returns 0 if not given 1 if given and -1 on no possible match
- * if key is given; *ret is set to point to the object concerned.
- */
-static int key_given(DbTableTree *tb, Eterm pattern, TreeDbTerm ***ret,
-		     Eterm *partly_bound)
+static enum ms_key_boundness key_boundness(DbTableCommon *tb,
+                                           Eterm pattern, Eterm *keyp)
 {
-    TreeDbTerm **this;
     Eterm key;
 
-    ASSERT(ret != NULL);
     if (pattern == am_Underscore || db_is_variable(pattern) != -1)
-	return 0;
-    key = db_getkey(tb->common.keypos, pattern);
+	return MS_KEY_UNBOUND;
+    key = db_getkey(tb->keypos, pattern);
     if (is_non_value(key))
-	return -1;  /* can't possibly match anything */
+	return MS_KEY_IMPOSSIBLE;  /* can't possibly match anything */
     if (!db_has_variable(key)) {   /* Bound key */
-	if (( this = find_node2(tb, key) ) == NULL) {
-	    return -1;
-	}
-	*ret = this;
-	return 1;
-    } else if (partly_bound != NULL && key != am_Underscore && 
-	       db_is_variable(key) < 0 && !db_has_map(key))
-	*partly_bound = key;
+        *keyp = key;
+	return MS_KEY_BOUND;
+    } else if (key != am_Underscore &&
+	       db_is_variable(key) < 0 && !db_has_map(key)) {
+
+	*keyp = key;
+        return MS_KEY_PARTIALLY_BOUND;
+    }
 	
-    return 0;
+    return MS_KEY_UNBOUND;
 }
 
 
@@ -3072,7 +3503,8 @@ static Sint do_cmp_partly_bound(Eterm a, Eterm b, int *done)
     }
 }
 
-static Sint cmp_partly_bound(Eterm partly_bound_key, Eterm bound_key) {
+Sint cmp_partly_bound(Eterm partly_bound_key, Eterm bound_key)
+{
     int done = 0;
     Sint ret = do_cmp_partly_bound(partly_bound_key, bound_key, &done);
 #ifdef HARDDEBUG
@@ -3118,7 +3550,7 @@ static int partly_bound_can_match_lesser(Eterm partly_bound_1,
     if (ret)
 	erts_fprintf(stderr," can match lesser than ");
     else
-	erts_fprintf(stderr," can not match lesser than ");
+	erts_fprintf(stderr," cannot match lesser than ");
     erts_fprintf(stderr,"%T\n",partly_bound_2);
 #endif
     return ret;
@@ -3136,7 +3568,7 @@ static int partly_bound_can_match_greater(Eterm partly_bound_1,
     if (ret)
 	erts_fprintf(stderr," can match greater than ");
     else
-	erts_fprintf(stderr," can not match greater than ");
+	erts_fprintf(stderr," cannot match greater than ");
     erts_fprintf(stderr,"%T\n",partly_bound_2);
 #endif
     return ret;
@@ -3288,7 +3720,8 @@ static int do_partly_bound_can_match_greater(Eterm a, Eterm b,
  * Callback functions for the different match functions
  */
 
-static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
+static int doit_select(DbTableCommon *tb, TreeDbTerm *this,
+                       struct select_common* ptr,
 		       int forward)
 {
     struct select_context *sc = (struct select_context *) ptr;
@@ -3306,7 +3739,7 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 			   GETKEY_WITH_POS(sc->keypos, this->dbterm.tpl)) > 0))) {
 	return 0;
     }
-    ret = db_match_dbterm(&tb->common, sc->p, sc->mp, &this->dbterm, &hp, 2);
+    ret = db_match_dbterm(tb, sc->p,sc->mp, &this->dbterm, &hp, 2);
     if (is_value(ret)) {
 	sc->accum = CONS(hp, ret, sc->accum);
     }
@@ -3323,7 +3756,8 @@ static int doit_select(DbTableTree *tb, TreeDbTerm *this, void *ptr,
     return 1;
 }
 
-static int doit_select_count(DbTableTree *tb, TreeDbTerm *this, void *ptr,
+static int doit_select_count(DbTableCommon *tb, TreeDbTerm *this,
+                             struct select_common* ptr,
 			     int forward)
 {
     struct select_count_context *sc = (struct select_count_context *) ptr;
@@ -3337,7 +3771,7 @@ static int doit_select_count(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 			  GETKEY_WITH_POS(sc->keypos, this->dbterm.tpl)) > 0)) {
 	return 0;
     }
-    ret = db_match_dbterm(&tb->common, sc->p, sc->mp, &this->dbterm, NULL, 0);
+    ret = db_match_dbterm(tb, sc->p, sc->mp, &this->dbterm, NULL, 0);
     if (ret == am_true) {
 	++(sc->got);
     }
@@ -3347,7 +3781,8 @@ static int doit_select_count(DbTableTree *tb, TreeDbTerm *this, void *ptr,
     return 1;
 }
 
-static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
+static int doit_select_chunk(DbTableCommon *tb, TreeDbTerm *this,
+                             struct select_common* ptr,
 			     int forward)
 {
     struct select_context *sc = (struct select_context *) ptr;
@@ -3366,7 +3801,7 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 	return 0;
     }
 
-    ret = db_match_dbterm(&tb->common, sc->p, sc->mp, &this->dbterm, &hp, 2);
+    ret = db_match_dbterm(tb, sc->p, sc->mp, &this->dbterm, &hp, 2);
     if (is_value(ret)) {
 	++(sc->got);
 	sc->accum = CONS(hp, ret, sc->accum);
@@ -3385,7 +3820,8 @@ static int doit_select_chunk(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 }
 
 
-static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
+static int doit_select_delete(DbTableCommon *tb, TreeDbTerm *this,
+                              struct select_common *ptr,
 			      int forward)
 {
     struct select_delete_context *sc = (struct select_delete_context *) ptr;
@@ -3393,7 +3829,7 @@ static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
     Eterm key;
 
     if (sc->erase_lastterm)
-	free_term(tb, sc->lastterm);
+	free_term((DbTable*)tb, sc->lastterm);
     sc->erase_lastterm = 0;
     sc->lastterm = this;
     
@@ -3401,10 +3837,10 @@ static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
 	cmp_partly_bound(sc->end_condition, 
 			 GETKEY_WITH_POS(sc->keypos, this->dbterm.tpl)) > 0)
 	return 0;
-    ret = db_match_dbterm(&tb->common, sc->p, sc->mp, &this->dbterm, NULL, 0);
+    ret = db_match_dbterm(tb, sc->p, sc->mp, &this->dbterm, NULL, 0);
     if (ret == am_true) {
 	key = GETKEY(sc->tb, this->dbterm.tpl);
-	linkout_tree(sc->tb, key);
+	linkout_tree(sc->tb, sc->common.root, key, sc->stack);
 	sc->erase_lastterm = 1;
 	++sc->accum;
     }
@@ -3414,7 +3850,8 @@ static int doit_select_delete(DbTableTree *tb, TreeDbTerm *this, void *ptr,
     return 1;
 }
 
-static int doit_select_replace(DbTableTree *tb, TreeDbTerm **this, void *ptr,
+static int doit_select_replace(DbTableCommon *tb, TreeDbTerm **this,
+                               struct select_common* ptr,
                                int forward)
 {
     struct select_replace_context *sc = (struct select_replace_context *) ptr;
@@ -3428,13 +3865,13 @@ static int doit_select_replace(DbTableTree *tb, TreeDbTerm **this, void *ptr,
 			  GETKEY_WITH_POS(sc->keypos, (*this)->dbterm.tpl)) > 0)) {
 	return 0;
     }
-    ret = db_match_dbterm(&tb->common, sc->p, sc->mp, &(*this)->dbterm, NULL, 0);
+    ret = db_match_dbterm(tb, sc->p, sc->mp, &(*this)->dbterm, NULL, 0);
 
     if (is_value(ret)) {
         TreeDbTerm* new;
         TreeDbTerm* old = *this;
 #ifdef DEBUG
-        Eterm key = db_getkey(tb->common.keypos, ret);
+        Eterm key = db_getkey(tb->keypos, ret);
         ASSERT(is_value(key));
         ASSERT(cmp_key(tb, key, old) == 0);
 #endif
@@ -3444,7 +3881,7 @@ static int doit_select_replace(DbTableTree *tb, TreeDbTerm **this, void *ptr,
         new->balance = old->balance;
         sc->lastobj = new->dbterm.tpl;
         *this = new;
-        free_term(tb, old);
+        free_term((DbTable*)tb, old);
         ++(sc->replaced);
     }
     if (--(sc->max) <= 0) {
@@ -3454,7 +3891,7 @@ static int doit_select_replace(DbTableTree *tb, TreeDbTerm **this, void *ptr,
 }
 
 #ifdef TREE_DEBUG
-static void do_dump_tree2(DbTableTree* tb, int to, void *to_arg, int show,
+static void do_dump_tree2(DbTableCommon* tb, int to, void *to_arg, int show,
 			  TreeDbTerm *t, int offset)
 {
     if (t == NULL)
@@ -3463,7 +3900,7 @@ static void do_dump_tree2(DbTableTree* tb, int to, void *to_arg, int show,
     if (show) {
 	const char* prefix;
 	Eterm term;
-	if (tb->common.compress) {
+	if (tb->compress) {
 	    prefix = "key=";
 	    term = GETKEY(tb, t->dbterm.tpl);
 	}
@@ -3518,7 +3955,7 @@ static void check_slot_pos(DbTableTree *tb)
 		   "element position %d is really 0x%08X, when stack says "
 		   "it's 0x%08X\n", tb->stack.slot, t, 
 		   tb->stack.array[tb->stack.pos - 1]);
-	do_dump_tree2(tb, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
+	do_dump_tree2(&tb->common, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
     }
 }
 	
@@ -3533,14 +3970,14 @@ static void check_saved_stack(DbTableTree *tb)
      if (t != stack->array[0]) {
 	 erts_fprintf(stderr,"tb->stack[0] is 0x%08X, should be 0x%08X\n",
 		      stack->array[0], t);
-	 do_dump_tree2(tb, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
+	 do_dump_tree2(&tb->common, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
 	 return;
      }
      while (n < stack->pos) {
 	 if (t == NULL) {
 	     erts_fprintf(stderr, "NULL pointer in tree when stack not empty,"
 			" stack depth is %d\n", n);
-	     do_dump_tree2(tb, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
+	     do_dump_tree2(&tb->common, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
 	     return;
 	 }
 	 n++;
@@ -3554,7 +3991,7 @@ static void check_saved_stack(DbTableTree *tb)
 			    "represent child pointer in tree!"
 			    "(left == 0x%08X, right == 0x%08X\n", 
 			    n, tb->stack[n], t->left, t->right);
-		 do_dump_tree2(tb, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
+		 do_dump_tree2(&tb->common, ERTS_PRINT_STDERR, NULL, 1, tb->root, 0);
 		 return;
 	     }
 	 }
@@ -3573,7 +4010,7 @@ static int check_table_tree(DbTableTree* tb, TreeDbTerm *t)
 	erts_fprintf(stderr,"balance = %d, left = 0x%08X, right = 0x%08X\n",
 		     t->balance, t->left, t->right);
 	erts_fprintf(stderr,"\nDump:\n---------------------------------\n");
-	do_dump_tree2(tb, ERTS_PRINT_STDERR, NULL, 1, t, 0);
+	do_dump_tree2(&tb->common, ERTS_PRINT_STDERR, NULL, 1, t, 0);
 	erts_fprintf(stderr,"\n---------------------------------\n");
     }
     return ((rh > lh) ? rh : lh) + 1;
diff --git a/erts/emulator/beam/erl_db_tree_util.h b/erts/emulator/beam/erl_db_tree_util.h
new file mode 100644
index 0000000000..02df74678d
--- /dev/null
+++ b/erts/emulator/beam/erl_db_tree_util.h
@@ -0,0 +1,158 @@
+/*
+ * %CopyrightBegin%
+ * 
+ * Copyright Ericsson AB 1998-2016. All Rights Reserved.
+ * 
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ * %CopyrightEnd%
+ */
+
+#ifndef _DB_TREE_UTIL_H
+#define _DB_TREE_UTIL_H
+
+/*
+** Internal functions and macros used by both the CA tree and the AVL tree
+*/
+
+/*
+** A stack of this size is enough for an AVL tree with more than
+** 0xFFFFFFFF elements. May be subject to change if
+** the datatype of the element counter is changed to a 64 bit integer.
+** The Maximal height of an AVL tree is calculated as:
+** h(n) <= 1.4404 * log(n + 2) - 0.328
+** Where n denotes the number of nodes, h(n) the height of the tree
+** with n nodes and log is the binary logarithm.
+*/
+  
+#define STACK_NEED 50
+
+#define PUSH_NODE(Dtt, Tdt)                     \
+    ((Dtt)->array[(Dtt)->pos++] = Tdt)
+
+#define POP_NODE(Dtt)			\
+     (((Dtt)->pos) ? 			\
+      (Dtt)->array[--((Dtt)->pos)] : NULL)
+
+#define TOP_NODE(Dtt)                   \
+     ((Dtt->pos) ? 			\
+      (Dtt)->array[(Dtt)->pos - 1] : NULL)
+
+#define EMPTY_NODE(Dtt) (TOP_NODE(Dtt) == NULL)
+
+static ERTS_INLINE void free_term(DbTable *tb, TreeDbTerm* p)
+{
+    db_free_term(tb, p, offsetof(TreeDbTerm, dbterm));
+}
+
+/*
+** Some macros for "direction stacks"
+*/
+#define DIR_LEFT 0
+#define DIR_RIGHT 1
+#define DIR_END 2 
+
+static ERTS_INLINE Sint cmp_key(DbTableCommon* tb, Eterm key, TreeDbTerm* obj) {
+    return CMP(key, GETKEY(tb,obj->dbterm.tpl));
+}
+
+int tree_balance_left(TreeDbTerm **this);
+int tree_balance_right(TreeDbTerm **this);
+
+int db_first_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                         Eterm *ret, DbTableTree *stack_container);
+int db_next_tree_common(Process *p, DbTable *tbl,
+                        TreeDbTerm *root, Eterm key,
+                        Eterm *ret, DbTreeStack* stack);
+int db_last_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                        Eterm *ret, DbTableTree *stack_container);
+int db_prev_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root, Eterm key,
+                        Eterm *ret, DbTreeStack* stack);
+int db_put_tree_common(DbTableCommon *tb, TreeDbTerm **root, Eterm obj,
+                       int key_clash_fail, DbTableTree *stack_container);
+int db_get_tree_common(Process *p, DbTableCommon *tb, TreeDbTerm *root, Eterm key,
+                       Eterm *ret, DbTableTree *stack_container);
+int db_get_element_tree_common(Process *p, DbTableCommon *tb, TreeDbTerm *root, Eterm key,
+                               int ndex, Eterm *ret, DbTableTree *stack_container);
+int db_member_tree_common(DbTableCommon *tb, TreeDbTerm *root, Eterm key, Eterm *ret,
+                          DbTableTree *stack_container);
+int db_erase_tree_common(DbTable *tbl, TreeDbTerm **root, Eterm key, Eterm *ret,
+                         DbTreeStack *stack /* NULL if no static stack */);
+int db_erase_object_tree_common(DbTable *tbl, TreeDbTerm **root, Eterm object,
+                                Eterm *ret, DbTableTree *stack_container);
+int db_slot_tree_common(Process *p, DbTable *tbl, TreeDbTerm *root,
+                        Eterm slot_term, Eterm *ret,
+                        DbTableTree *stack_container,
+                        CATreeRootIterator*);
+int db_select_chunk_tree_common(Process *p, DbTable *tb,
+                                Eterm tid, Eterm pattern, Sint chunk_size,
+                                int reverse, Eterm *ret,
+                                DbTableTree *stack_container,
+                                CATreeRootIterator*);
+int db_select_tree_common(Process *p, DbTable *tb,
+                          Eterm tid, Eterm pattern, int reverse, Eterm *ret,
+                          DbTableTree *stack_container,
+                          CATreeRootIterator*);
+int db_select_delete_tree_common(Process *p, DbTable *tbl,
+                                 Eterm tid, Eterm pattern,
+                                 Eterm *ret,
+                                 DbTreeStack* stack,
+                                 CATreeRootIterator* iter);
+int db_select_continue_tree_common(Process *p, 
+                                   DbTableCommon *tb,
+                                   Eterm continuation,
+                                   Eterm *ret,
+                                   DbTableTree *stack_container,
+                                   CATreeRootIterator* iter);
+int db_select_delete_continue_tree_common(Process *p, 
+                                          DbTable *tbl,
+                                          Eterm continuation,
+                                          Eterm *ret,
+                                          DbTreeStack* stack,
+                                          CATreeRootIterator* iter);
+int db_select_count_tree_common(Process *p, DbTable *tb,
+                                Eterm tid, Eterm pattern, Eterm *ret,
+                                DbTableTree *stack_container,
+                                CATreeRootIterator* iter);
+int db_select_count_continue_tree_common(Process *p,
+                                         DbTable *tb,
+                                         Eterm continuation,
+                                         Eterm *ret,
+                                         DbTableTree *stack_container,
+                                         CATreeRootIterator* iter);
+int db_select_replace_tree_common(Process *p, DbTable*,
+                                  Eterm tid, Eterm pattern, Eterm *ret,
+                                  DbTableTree *stack_container,
+                                  CATreeRootIterator* iter);
+int db_select_replace_continue_tree_common(Process *p,
+                                           DbTable*,
+                                           Eterm continuation,
+                                           Eterm *ret,
+                                           DbTableTree *stack_container,
+                                           CATreeRootIterator* iter);
+int db_take_tree_common(Process *p, DbTable *tbl, TreeDbTerm **root,
+                        Eterm key, Eterm *ret,
+                        DbTreeStack *stack /* NULL if no static stack */);
+void db_print_tree_common(fmtfn_t to, void *to_arg,
+                          int show, TreeDbTerm *root, DbTable *tbl);
+void db_foreach_offheap_tree_common(TreeDbTerm *root,
+                                    void (*func)(ErlOffHeap *, void *),
+                                    void * arg);
+int db_lookup_dbterm_tree_common(Process *p, DbTable *tbl, TreeDbTerm **root,
+                                 Eterm key, Eterm obj, DbUpdateHandle* handle,
+                                 DbTableTree *stack_container);
+void db_finalize_dbterm_tree_common(int cret, DbUpdateHandle *handle,
+                                    DbTableTree *stack_container);
+Sint cmp_partly_bound(Eterm partly_bound_key, Eterm bound_key);
+
+#endif /* _DB_TREE_UTIL_H */
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index f1d47326b4..957762d4b0 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -497,6 +497,7 @@ static erts_atomic32_t trace_control_word;
 /* This needs to be here, before the bif table... */
 
 static Eterm db_set_trace_control_word_fake_1(BIF_ALIST_1);
+static Eterm db_length_1(BIF_ALIST_1);
 
 /*
 ** The table of callable bif's, i e guard bif's and 
@@ -603,7 +604,7 @@ static DMCGuardBif guard_tab[] =
     },
     {
 	am_length,
-	&length_1,
+	&db_length_1,
 	1,
 	DBIF_ALL
     },
@@ -971,6 +972,26 @@ BIF_RETTYPE db_set_trace_control_word_1(BIF_ALIST_1)
     BIF_RET(db_set_trace_control_word(BIF_P, BIF_ARG_1));
 }
 
+/*
+ * Implementation of length/1 for match specs (non-trapping).
+ */
+static Eterm db_length_1(BIF_ALIST_1)
+{
+    Eterm list;
+    Uint i;
+
+    list = BIF_ARG_1;
+    i = 0;
+    while (is_list(list)) {
+	i++;
+	list = CDR(list_val(list));
+    }
+    if (is_not_nil(list)) {
+	BIF_ERROR(BIF_P, BADARG);
+    }
+    BIF_RET(make_small(i));
+}
+
 static Eterm db_set_trace_control_word_fake_1(BIF_ALIST_1)
 {
     Process *p = BIF_P;
@@ -3118,9 +3139,7 @@ void* db_store_term_comp(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj)
     Uint new_sz = offset + db_size_dbterm_comp(tb, obj);
     byte* basep;
     DbTerm* newp;
-#ifdef DEBUG
     byte* top;
-#endif
 
     ASSERT(tb->compress);
     if (old != 0) {
@@ -3142,11 +3161,8 @@ void* db_store_term_comp(DbTableCommon *tb, DbTerm* old, Uint offset, Eterm obj)
     }
 
     newp->size = size_object(obj);
-#ifdef DEBUG
-    top = 
-#endif
-	copy_to_comp(tb, obj, newp, new_sz);
-    ASSERT(top <= basep + new_sz);
+    top = copy_to_comp(tb, obj, newp, new_sz);
+    ASSERT(top <= basep + new_sz); (void)top;
 
     /* ToDo: Maybe realloc if ((basep+new_sz) - top) > WASTED_SPACE_LIMIT */
 
diff --git a/erts/emulator/beam/erl_db_util.h b/erts/emulator/beam/erl_db_util.h
index 6ec3b4f98f..e1af9210ea 100644
--- a/erts/emulator/beam/erl_db_util.h
+++ b/erts/emulator/beam/erl_db_util.h
@@ -89,7 +89,16 @@ typedef struct {
     void** bp;         /* {Hash|Tree}DbTerm** */
     Uint new_size;
     int flags;
-    void* lck;
+    union {
+        struct {
+            erts_rwmtx_t* lck;
+        } hash;
+        struct {
+            struct DbTableCATreeNode* base_node;
+            struct DbTableCATreeNode* parent;
+            int current_level;
+        } catree;
+    } u;
 } DbUpdateHandle;
 
 
@@ -274,23 +283,28 @@ typedef struct db_table_common {
 } DbTableCommon;
 
 /* These are status bit patterns */
-#define DB_PRIVATE       (1 << 0)
-#define DB_PROTECTED     (1 << 1)
-#define DB_PUBLIC        (1 << 2)
-#define DB_DELETE        (1 << 3) /* table is being deleted */
-#define DB_SET           (1 << 4)
-#define DB_BAG           (1 << 5)
-#define DB_DUPLICATE_BAG (1 << 6)
-#define DB_ORDERED_SET   (1 << 7)
-#define DB_FINE_LOCKED   (1 << 8) /* write_concurrency */
-#define DB_FREQ_READ     (1 << 9) /* read_concurrency */
-#define DB_NAMED_TABLE   (1 << 10)
-#define DB_BUSY          (1 << 11)
+#define DB_PRIVATE        (1 << 0)
+#define DB_PROTECTED      (1 << 1)
+#define DB_PUBLIC         (1 << 2)
+#define DB_DELETE         (1 << 3) /* table is being deleted */
+#define DB_SET            (1 << 4)
+#define DB_BAG            (1 << 5)
+#define DB_DUPLICATE_BAG  (1 << 6)
+#define DB_ORDERED_SET    (1 << 7)
+#define DB_CA_ORDERED_SET (1 << 8)
+#define DB_FINE_LOCKED    (1 << 9)  /* write_concurrency */
+#define DB_FREQ_READ      (1 << 10) /* read_concurrency */
+#define DB_NAMED_TABLE    (1 << 11)
+#define DB_BUSY           (1 << 12)
+
+#define DB_CATREE_FORCE_SPLIT (1 << 31)  /* erts_debug */
 
 #define IS_HASH_TABLE(Status) (!!((Status) & \
 				  (DB_BAG | DB_SET | DB_DUPLICATE_BAG)))
 #define IS_TREE_TABLE(Status) (!!((Status) & \
 				  DB_ORDERED_SET))
+#define IS_CATREE_TABLE(Status) (!!((Status) & \
+                                    DB_CA_ORDERED_SET))
 #define NFIXED(T) (erts_refc_read(&(T)->common.fix_count,0))
 #define IS_FIXED(T) (NFIXED(T) != 0) 
 
diff --git a/erts/emulator/beam/erl_dirty_bif.tab b/erts/emulator/beam/erl_dirty_bif.tab
index 20299ff604..609869ad9f 100644
--- a/erts/emulator/beam/erl_dirty_bif.tab
+++ b/erts/emulator/beam/erl_dirty_bif.tab
@@ -57,8 +57,6 @@ dirty-cpu erts_debug:lcnt_clear/0
 #  and debug purposes only. We really do *not* want to execute these
 #  on dirty schedulers on a real system.
 
-dirty-cpu-test erlang:'++'/2
-dirty-cpu-test erlang:append/2
 dirty-cpu-test erlang:iolist_size/1
 dirty-cpu-test erlang:make_tuple/2
 dirty-cpu-test erlang:make_tuple/3
diff --git a/erts/emulator/beam/erl_drv_nif.h b/erts/emulator/beam/erl_drv_nif.h
index 31b4817fb1..9ef7c39d41 100644
--- a/erts/emulator/beam/erl_drv_nif.h
+++ b/erts/emulator/beam/erl_drv_nif.h
@@ -53,7 +53,8 @@ typedef enum {
 enum ErlNifSelectFlags {
     ERL_NIF_SELECT_READ      = (1 << 0),
     ERL_NIF_SELECT_WRITE     = (1 << 1),
-    ERL_NIF_SELECT_STOP      = (1 << 2)
+    ERL_NIF_SELECT_STOP      = (1 << 2),
+    ERL_NIF_SELECT_CANCEL    = (1 << 3)
 };
 
 /*
diff --git a/erts/emulator/beam/erl_goodfit_alloc.c b/erts/emulator/beam/erl_goodfit_alloc.c
index 01d4aa54ff..68b9579433 100644
--- a/erts/emulator/beam/erl_goodfit_alloc.c
+++ b/erts/emulator/beam/erl_goodfit_alloc.c
@@ -226,6 +226,8 @@ erts_gfalc_start(GFAllctr_t *gfallctr,
     allctr->add_mbc		        = NULL;
     allctr->remove_mbc		        = NULL;
     allctr->largest_fblk_in_mbc         = NULL;
+    allctr->first_fblk_in_mbc           = NULL;
+    allctr->next_fblk_in_mbc            = NULL;
     allctr->init_atoms			= init_atoms;
 
 #ifdef ERTS_ALLOC_UTIL_HARD_DEBUG
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 99e788c718..2b19d2cfd3 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -128,7 +128,7 @@ const Eterm etp_hole_marker = 0;
 
 static int modified_sched_thread_suggested_stack_size = 0;
 
-Eterm erts_init_process_id;
+Eterm erts_init_process_id = ERTS_INVALID_PID;
 
 /*
  * Note about VxWorks: All variables must be initialized by executable code,
@@ -163,7 +163,6 @@ int erts_initialized = 0;
  * Configurable parameters.
  */
 
-Uint display_items;	    	/* no of items to display in traces etc */
 int H_MIN_SIZE;			/* The minimum heap grain */
 int BIN_VH_MIN_SIZE;		/* The minimum binary virtual*/
 int H_MAX_SIZE;			/* The maximum heap size */
@@ -354,6 +353,7 @@ erl_init(int ncpu,
     erts_init_bif();
     erts_init_bif_chksum();
     erts_init_bif_binary();
+    erts_init_bif_guard();
     erts_init_bif_persistent_term();
     erts_init_bif_re();
     erts_init_unicode(); /* after RE to get access to PCRE unicode */
@@ -376,21 +376,19 @@ erl_init(int ncpu,
 }
 
 static Eterm
-erl_first_process_otp(char* modname, void* code, unsigned size, int argc, char** argv)
+erl_first_process_otp(char* mod_name, int argc, char** argv)
 {
     int i;
-    Eterm start_mod;
     Eterm args;
     Eterm res;
     Eterm* hp;
     Process parent;
     ErlSpawnOpts so;
-    Eterm env;
-    
-    start_mod = erts_atom_put((byte *) modname, sys_strlen(modname), ERTS_ATOM_ENC_LATIN1, 1);
-    if (erts_find_function(start_mod, am_start, 2,
+    Eterm boot_mod;
+
+    if (erts_find_function(am_erl_init, am_start, 2,
 			   erts_active_code_ix()) == NULL) {
-	erts_exit(ERTS_ERROR_EXIT, "No function %s:start/2\n", modname);
+	erts_exit(ERTS_ERROR_EXIT, "No function erl_init:start/2\n");
     }
 
     /*
@@ -406,13 +404,13 @@ erl_first_process_otp(char* modname, void* code, unsigned size, int argc, char**
 	args = CONS(hp, new_binary(&parent, (byte*)argv[i], len), args);
 	hp += 2;
     }
-    env = new_binary(&parent, code, size);
+    boot_mod = erts_atom_put((byte *) mod_name, sys_strlen(mod_name), ERTS_ATOM_ENC_LATIN1, 1);
     args = CONS(hp, args, NIL);
     hp += 2;
-    args = CONS(hp, env, args);
+    args = CONS(hp, boot_mod, args);
 
     so.flags = erts_default_spo_flags|SPO_SYSTEM_PROC;
-    res = erl_create_process(&parent, start_mod, am_start, args, &so);
+    res = erl_create_process(&parent, am_erl_init, am_start, args, &so);
     erts_proc_unlock(&parent, ERTS_PROC_LOCK_MAIN);
     erts_cleanup_empty_process(&parent);
     return res;
@@ -482,7 +480,6 @@ erts_preloaded(Process* p)
 /* static variables that must not change (use same values at restart) */
 static char* program;
 static char* init = "init";
-static char* boot = "boot";
 static int    boot_argc;
 static char** boot_argv;
 
@@ -536,9 +533,6 @@ void erts_usage(void)
     int this_rel = this_rel_num();
     erts_fprintf(stderr, "Usage: %s [flags] [ -- [init_args] ]\n", progname(program));
     erts_fprintf(stderr, "The flags are:\n\n");
-
-    /*    erts_fprintf(stderr, "-# number  set the number of items to be used in traces etc\n"); */
-
     erts_fprintf(stderr, "-a size        suggested stack size in kilo words for threads\n");
     erts_fprintf(stderr, "               in the async-thread pool, valid range is [%d-%d]\n",
 		 ERTS_ASYNC_THREAD_MIN_STACK_SIZE,
@@ -546,13 +540,9 @@ void erts_usage(void)
     erts_fprintf(stderr, "-A number      set number of threads in async thread pool,\n");
     erts_fprintf(stderr, "               valid range is [0-%d]\n",
 		 ERTS_MAX_NO_OF_ASYNC_THREADS);
-
     erts_fprintf(stderr, "-B[c|d|i]      c to have Ctrl-c interrupt the Erlang shell,\n");
     erts_fprintf(stderr, "               d (or no extra option) to disable the break\n");
     erts_fprintf(stderr, "               handler, i to ignore break signals\n");
-
-    /*    erts_fprintf(stderr, "-b func    set the boot function (default boot)\n"); */
-
     erts_fprintf(stderr, "-c bool        enable or disable time correction\n");
     erts_fprintf(stderr, "-C mode        set time warp mode; valid modes are:\n");
     erts_fprintf(stderr, "               no_time_warp|single_time_warp|multi_time_warp\n");
@@ -571,7 +561,6 @@ void erts_usage(void)
 	       erts_pd_initial_size);
     erts_fprintf(stderr, "-hmqd  val     set default message queue data flag for processes,\n");
     erts_fprintf(stderr, "               valid values are: off_heap | on_heap\n");
-
     erts_fprintf(stderr, "-IOp number    set number of pollsets to be used to poll for I/O,\n");
     erts_fprintf(stderr, "               This value has to be equal or smaller than the\n");
     erts_fprintf(stderr, "               number of poll threads. If the current platform\n");
@@ -582,9 +571,7 @@ void erts_usage(void)
     erts_fprintf(stderr, "               number of poll threads.");
     erts_fprintf(stderr, "-IOPt number   set number of threads to be used to poll for I/O\n");
     erts_fprintf(stderr, "               as a percentage of the number of schedulers.");
-
-    /*    erts_fprintf(stderr, "-i module  set the boot module (default init)\n"); */
-
+    erts_fprintf(stderr, "-i module      set the boot module (default init)\n");
     erts_fprintf(stderr, "-n[s|a|d]      Control behavior of signals to ports\n");
     erts_fprintf(stderr, "               Note that this flag is deprecated!\n");
     erts_fprintf(stderr, "-M<X> <Y>      memory allocator switches,\n");
@@ -599,7 +586,6 @@ void erts_usage(void)
     erts_fprintf(stderr, "-R number      set compatibility release number,\n");
     erts_fprintf(stderr, "               valid range [%d-%d]\n",
 		 this_rel-2, this_rel);
-
     erts_fprintf(stderr, "-r             force ets memory block to be moved on realloc\n");
     erts_fprintf(stderr, "-rg amount     set reader groups limit\n");
     erts_fprintf(stderr, "-sbt type      set scheduler bind type, valid types are:\n");
@@ -670,9 +656,7 @@ void erts_usage(void)
     erts_fprintf(stderr, "-T number      set modified timing level, valid range is [0-%d]\n",
 		 ERTS_MODIFIED_TIMING_LEVELS-1);
     erts_fprintf(stderr, "-V             print Erlang version\n");
-
     erts_fprintf(stderr, "-v             turn on chatty mode (GCs will be reported etc)\n");
-
     erts_fprintf(stderr, "-W<i|w|e>      set error logger warnings mapping,\n");
     erts_fprintf(stderr, "               see error_logger documentation for details\n");
     erts_fprintf(stderr, "-zdbbl size    set the distribution buffer busy limit in kilobytes\n");
@@ -763,7 +747,6 @@ early_init(int *argc, char **argv) /*
 
     erts_sched_compact_load = 1;
     erts_printf_eterm_func = erts_printf_term;
-    display_items = 200;
     erts_backtrace_depth = DEFAULT_BACKTRACE_SIZE;
     erts_async_max_threads = ERTS_DEFAULT_NO_ASYNC_THREADS;
     erts_async_thread_suggested_stack_size = ERTS_ASYNC_THREAD_MIN_STACK_SIZE;
@@ -1270,25 +1253,9 @@ erl_start(int argc, char **argv)
 
 	    /*
 	     * NOTE: -M flags are handled (and removed from argv) by
-	     * erts_alloc_init(). 
-	     *
-	     * The -d, -m, -S, -t, and -T flags was removed in
-	     * Erlang 5.3/OTP R9C.
-	     *
-	     * -S, and -T has been reused in Erlang 5.5/OTP R11B.
-	     *
-	     * -d has been reused in a patch R12B-4.
+	     * erts_alloc_init().
 	     */
 
-	case '#' :
-	    arg = get_arg(argv[i]+2, argv[i+1], &i);
-	    if ((display_items = atoi(arg)) == 0) {
-		erts_fprintf(stderr, "bad display items%s\n", arg);
-		erts_usage();
-	    }
-	    VERBOSE(DEBUG_SYSTEM,
-                    ("using display items %d\n",display_items));
-	    break;
 	case 'p':
 	    if (!sys_strncmp(argv[i],"-pc",3)) {
 		int printable_chars = ERL_PRINTABLE_CHARACTERS_LATIN1;
@@ -1567,11 +1534,6 @@ erl_start(int argc, char **argv)
 	    init = get_arg(argv[i]+2, argv[i+1], &i);
 	    break;
 
-	case 'b':
-	    /* define name of initial function */
-	    boot = get_arg(argv[i]+2, argv[i+1], &i);
-	    break;
-
 	case 'B':
 	  if (argv[i][2] == 'i')          /* +Bi */
 	    ignore_break = 1;
@@ -2257,8 +2219,8 @@ erl_start(int argc, char **argv)
 
     erts_initialized = 1;
 
-    erts_init_process_id = erl_first_process_otp("otp_ring0", NULL, 0,
-                                                 boot_argc, boot_argv);
+    erts_init_process_id = erl_first_process_otp(init, boot_argc, boot_argv);
+    ASSERT(erts_init_process_id != ERTS_INVALID_PID);
 
     {
 	/*
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index 1416c5f96c..3aab4828cc 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -42,6 +42,7 @@
 #include "erl_term.h"
 #include "erl_threads.h"
 #include "erl_atom_table.h"
+#include "erl_utils.h"
 
 typedef struct {
     char *name;
@@ -91,6 +92,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"db_tab",				"address"		},
     {	"db_tab_fix",				"address"		},
     {	"db_hash_slot",				"address"		},
+    {	"erl_db_catree_base_node",		NULL		        },
+    {	"erl_db_catree_route_node",		"index"		        },
     {	"resource_monitors",			"address"	        },
     {   "driver_list",                          NULL                    },
     {	"proc_msgq",				"pid"			},
@@ -193,6 +196,12 @@ struct lc_locked_lock_t_ {
     unsigned int line;
     erts_lock_flags_t flags;
     erts_lock_options_t taken_options;
+    /*
+     * Pointer back to the lock instance if it exists or NULL for proc locks.
+     * If set, we use it to allow trylock of other lock instance
+     * but with identical lock order as an already locked lock.
+     */
+    erts_lc_lock_t *lck;
 };
 
 typedef struct {
@@ -406,6 +415,10 @@ new_locked_lock(lc_thread_t* thr,
     ll->line = line;
     ll->flags = lck->flags;
     ll->taken_options = options;
+    if ((lck->flags & ERTS_LOCK_FLAGS_MASK_TYPE) == ERTS_LOCK_FLAGS_TYPE_PROCLOCK)
+        ll->lck = NULL;
+    else
+        ll->lck = lck;
     return ll;
 }
 
@@ -709,6 +722,14 @@ erts_lc_get_lock_order_id(char *name)
     return (Sint16) -1;
 }
 
+static int
+lc_is_term_order(Sint16 id)
+{
+    return erts_lock_order[id].internal_order != NULL
+        && sys_strcmp(erts_lock_order[id].internal_order, "term") == 0;
+}
+
+
 static int compare_locked_by_id(lc_locked_lock_t *locked_lock, erts_lc_lock_t *comparand)
 {
     if(locked_lock->id < comparand->id) {
@@ -720,18 +741,23 @@ static int compare_locked_by_id(lc_locked_lock_t *locked_lock, erts_lc_lock_t *c
     return 0;
 }
 
-static int compare_locked_by_id_extra(lc_locked_lock_t *locked_lock, erts_lc_lock_t *comparand)
+static int compare_locked_by_id_extra(lc_locked_lock_t *ll, erts_lc_lock_t *comparand)
 {
-    int order = compare_locked_by_id(locked_lock, comparand);
+    int order = compare_locked_by_id(ll, comparand);
 
     if(order) {
         return order;
-    } else if(locked_lock->extra < comparand->extra) {
+    }
+    if (ll->flags & ERTS_LOCK_FLAGS_PROPERTY_TERM_ORDER) {
+        ASSERT(!is_header(ll->extra) && !is_header(comparand->extra));
+        return CMP(ll->extra, comparand->extra);
+    }
+
+    if(ll->extra < comparand->extra) {
         return -1;
-    } else if(locked_lock->extra > comparand->extra) {
+    } else if(ll->extra > comparand->extra) {
         return 1;
     }
-
     return 0;
 }
 
@@ -970,7 +996,8 @@ erts_lc_trylock_force_busy_flg(erts_lc_lock_t *lck, erts_lock_options_t options)
 	return 0;
     }
     else {
-	lc_locked_lock_t *tl_lck;
+        lc_locked_lock_t *ll;
+        int order;
 
 	ASSERT(thr->locked.last);
 
@@ -979,25 +1006,25 @@ erts_lc_trylock_force_busy_flg(erts_lc_lock_t *lck, erts_lock_options_t options)
 	    type_order_violation("trylocking ", thr, lck);
 #endif
 
-	if (thr->locked.last->id < lck->id
-	    || (thr->locked.last->id == lck->id
-		&& thr->locked.last->extra < lck->extra))
-	    return 0;
+        ll = thr->locked.last;
+        order = compare_locked_by_id_extra(ll, lck);
+
+	if (order < 0)
+            return 0;
 
 	/*
-	 * Lock order violation
+	 * TryLock order violation
 	 */
 
-
-	/* Check that we are not trying to lock this lock twice */
-	for (tl_lck = thr->locked.last; tl_lck; tl_lck = tl_lck->prev) {
-	    if (tl_lck->id < lck->id
-		|| (tl_lck->id == lck->id && tl_lck->extra <= lck->extra)) {
-		if (tl_lck->id == lck->id && tl_lck->extra == lck->extra)
-		    lock_twice("Trylocking", thr, lck, options);
-		break;
-	    }
-	}
+        /* Check that we are not trying to lock this lock twice */
+        do {
+            if (order == 0 && (ll->lck == lck || !ll->lck))
+                lock_twice("Trylocking", thr, lck, options);
+            ll = ll->prev;
+            if (!ll)
+                break;
+            order = compare_locked_by_id_extra(ll, lck);
+        } while (order >= 0);
 
 #ifndef ERTS_LC_ALLWAYS_FORCE_BUSY_TRYLOCK_ON_LOCK_ORDER_VIOLATION
 	/* We only force busy if a lock order violation would occur
@@ -1044,10 +1071,10 @@ void erts_lc_trylock_flg_x(int locked, erts_lc_lock_t *lck, erts_lock_options_t
 #endif
 
 	for (tl_lck = thr->locked.last; tl_lck; tl_lck = tl_lck->prev) {
-	    if (tl_lck->id < lck->id
-		|| (tl_lck->id == lck->id && tl_lck->extra <= lck->extra)) {
-		if (tl_lck->id == lck->id && tl_lck->extra == lck->extra)
-		    lock_twice("Trylocking", thr, lck, options);
+            int order = compare_locked_by_id_extra(tl_lck, lck);
+	    if (order <= 0) {
+                if (order == 0 && (tl_lck->lck == lck || !tl_lck->lck))
+                    lock_twice("Trylocking", thr, lck, options);
 		if (locked) {
 		    ll->next = tl_lck->next;
 		    ll->prev = tl_lck;
@@ -1089,10 +1116,10 @@ void erts_lc_require_lock_flg(erts_lc_lock_t *lck, erts_lock_options_t options,
 	for (l_lck2 = thr->required.last;
 	     l_lck2;
 	     l_lck2 = l_lck2->prev) {
-	    if (l_lck2->id < lck->id
-		|| (l_lck2->id == lck->id && l_lck2->extra < lck->extra))
+            int order = compare_locked_by_id_extra(l_lck2, lck);
+	    if (order < 0)
 		break;
-	    else if (l_lck2->id == lck->id && l_lck2->extra == lck->extra)
+	    if (order == 0)
 		require_twice(thr, lck);
 	}
 	if (!l_lck2) {
@@ -1150,6 +1177,7 @@ void erts_lc_lock_flg_x(erts_lc_lock_t *lck, erts_lock_options_t options,
 {
     lc_thread_t *thr;
     lc_locked_lock_t *new_ll;
+    int order;
 
     if (lck->inited != ERTS_LC_INITITALIZED)
 	uninitialized_lock();
@@ -1165,10 +1193,10 @@ void erts_lc_lock_flg_x(erts_lc_lock_t *lck, erts_lock_options_t options,
 	thr->locked.last = thr->locked.first = new_ll;
         ASSERT(0 < lck->id && lck->id < ERTS_LOCK_ORDER_SIZE);
         thr->matrix.m[lck->id][0] = 1;
+        return;
     }
-    else if (thr->locked.last->id < lck->id
-	     || (thr->locked.last->id == lck->id
-		 && thr->locked.last->extra < lck->extra)) {
+    order = compare_locked_by_id_extra(thr->locked.last, lck);
+    if (order < 0) {
         lc_locked_lock_t* ll;
 	if (LOCK_IS_TYPE_ORDER_VIOLATION(lck->flags, thr->locked.last->flags)) {
 	    type_order_violation("locking ", thr, lck);
@@ -1186,7 +1214,7 @@ void erts_lc_lock_flg_x(erts_lc_lock_t *lck, erts_lock_options_t options,
 	thr->locked.last->next = new_ll;
 	thr->locked.last = new_ll;
     }
-    else if (thr->locked.last->id == lck->id && thr->locked.last->extra == lck->extra)
+    else if (order == 0)
 	lock_twice("Locking", thr, lck, options);
     else
 	lock_order_violation(thr, lck);
@@ -1298,7 +1326,6 @@ void
 erts_lc_init_lock(erts_lc_lock_t *lck, char *name, erts_lock_flags_t flags)
 {
     lck->id = erts_lc_get_lock_order_id(name);
-
     lck->extra = (UWord) &lck->extra;
     ASSERT(is_not_immed(lck->extra));
     lck->flags = flags;
@@ -1311,8 +1338,13 @@ erts_lc_init_lock_x(erts_lc_lock_t *lck, char *name, erts_lock_flags_t flags, Et
 {
     lck->id = erts_lc_get_lock_order_id(name);
     lck->extra = extra;
-    ASSERT(is_immed(lck->extra));
     lck->flags = flags;
+    if (lc_is_term_order(lck->id)) {
+        lck->flags |= ERTS_LOCK_FLAGS_PROPERTY_TERM_ORDER;
+        ASSERT(!is_header(lck->extra));
+    }
+    else
+        ASSERT(is_immed(lck->extra));
     lck->taken_options = 0;
     lck->inited = ERTS_LC_INITITALIZED;
 }
diff --git a/erts/emulator/beam/erl_lock_check.h b/erts/emulator/beam/erl_lock_check.h
index d10e32985a..b32f27d9f9 100644
--- a/erts/emulator/beam/erl_lock_check.h
+++ b/erts/emulator/beam/erl_lock_check.h
@@ -104,7 +104,7 @@ Eterm erts_lc_dump_graph(void);
 
 #define erts_lc_lock(lck) erts_lc_lock_x(lck,__FILE__,__LINE__)
 #define erts_lc_trylock(res,lck) erts_lc_trylock_x(res,lck,__FILE__,__LINE__)
-#define erts_lc_lock_flg(lck) erts_lc_lock_flg_x(lck,__FILE__,__LINE__)
-#define erts_lc_trylock_flg(res,lck) erts_lc_trylock_flg_x(res,lck,__FILE__,__LINE__)
+#define erts_lc_lock_flg(lck,flg) erts_lc_lock_flg_x(lck,flg,__FILE__,__LINE__)
+#define erts_lc_trylock_flg(res,lck,flg) erts_lc_trylock_flg_x(res,lck,flg,__FILE__,__LINE__)
 
 #endif /* #ifndef ERTS_LOCK_CHECK_H__ */
diff --git a/erts/emulator/beam/erl_lock_count.h b/erts/emulator/beam/erl_lock_count.h
index 89d95a73cf..0d47b16e0b 100644
--- a/erts/emulator/beam/erl_lock_count.h
+++ b/erts/emulator/beam/erl_lock_count.h
@@ -532,7 +532,7 @@ ERTS_GLB_INLINE
 void lcnt_dec_lock_state__(ethr_atomic_t *l_state) {
     ethr_sint_t state = ethr_atomic_dec_read_acqb(l_state);
 
-    /* We can not assume that state is >= -1 here; unlock and unacquire might
+    /* We cannot assume that state is >= -1 here; unlock and unacquire might
      * bring it below -1 and race to increment it back. */
 
     if(state < 0) {
diff --git a/erts/emulator/beam/erl_lock_flags.h b/erts/emulator/beam/erl_lock_flags.h
index d711f69456..2db133b598 100644
--- a/erts/emulator/beam/erl_lock_flags.h
+++ b/erts/emulator/beam/erl_lock_flags.h
@@ -28,15 +28,17 @@
 
 /* Property/category are bitfields to simplify their use in masks. */
 #define ERTS_LOCK_FLAGS_MASK_CATEGORY (0xFFC0)
-#define ERTS_LOCK_FLAGS_MASK_PROPERTY (0x0030)
+#define ERTS_LOCK_FLAGS_MASK_PROPERTY (0x0038)
 
 /* Type is a plain number. */
-#define ERTS_LOCK_FLAGS_MASK_TYPE     (0x000F)
+#define ERTS_LOCK_FLAGS_MASK_TYPE     (0x0007)
 
 #define ERTS_LOCK_FLAGS_TYPE_SPINLOCK (1)
 #define ERTS_LOCK_FLAGS_TYPE_MUTEX    (2)
 #define ERTS_LOCK_FLAGS_TYPE_PROCLOCK (3)
 
+/* Lock checker use real term order instead of raw word compare */
+#define ERTS_LOCK_FLAGS_PROPERTY_TERM_ORDER (1 << 3)
 /* "Static" guarantees that the lock will never be destroyed once created. */
 #define ERTS_LOCK_FLAGS_PROPERTY_STATIC     (1 << 4)
 #define ERTS_LOCK_FLAGS_PROPERTY_READ_WRITE (1 << 5)
diff --git a/erts/emulator/beam/erl_map.c b/erts/emulator/beam/erl_map.c
index cba17d3e6a..93816542cd 100644
--- a/erts/emulator/beam/erl_map.c
+++ b/erts/emulator/beam/erl_map.c
@@ -125,15 +125,20 @@ BIF_RETTYPE map_size_1(BIF_ALIST_1) {
 	flatmap_t *mp = (flatmap_t*)flatmap_val(BIF_ARG_1);
 	BIF_RET(make_small(flatmap_get_size(mp)));
     } else if (is_hashmap(BIF_ARG_1)) {
-	Eterm *head, *hp, res;
-	Uint size, hsz=0;
+	Eterm *head;
+	Uint size;
 
 	head = hashmap_val(BIF_ARG_1);
 	size = head[1];
-	(void) erts_bld_uint(NULL, &hsz, size);
-	hp = HAlloc(BIF_P, hsz);
-	res = erts_bld_uint(&hp, NULL, size);
-	BIF_RET(res);
+
+        /*
+         * As long as a small has 28 bits (on a 32-bit machine) for
+         * the integer itself, it is impossible to build a map whose
+         * size would not fit in a small. Add an assertion in case we
+         * ever decreases the number of bits in a small.
+         */
+        ASSERT(IS_USMALL(0, size));
+        BIF_RET(make_small(size));
     }
 
     BIF_P->fvalue = BIF_ARG_1;
@@ -1505,25 +1510,6 @@ int hashmap_key_hash_cmp(Eterm* ap, Eterm* bp)
     return ap ? -1 : 1;
 }
 
-/* maps:new/0 */
-
-BIF_RETTYPE maps_new_0(BIF_ALIST_0) {
-    Eterm* hp;
-    Eterm tup;
-    flatmap_t *mp;
-
-    hp    = HAlloc(BIF_P, (MAP_HEADER_FLATMAP_SZ + 1));
-    tup   = make_tuple(hp);
-    *hp++ = make_arityval(0);
-
-    mp    = (flatmap_t*)hp;
-    mp->thing_word = MAP_HEADER_FLATMAP;
-    mp->size = 0;
-    mp->keys = tup;
-
-    BIF_RET(make_flatmap(mp));
-}
-
 /* maps:put/3 */
 
 BIF_RETTYPE maps_put_3(BIF_ALIST_3) {
@@ -1707,11 +1693,16 @@ int erts_maps_update(Process *p, Eterm key, Eterm value, Eterm map, Eterm *res)
 	return 0;
 
 found_key:
-	*hp++ = value;
-	vs++;
-	if (++i < n)
-	    sys_memcpy(hp, vs, (n - i)*sizeof(Eterm));
-	*res = make_flatmap(shp);
+        if(*vs == value) {
+            HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
+            *res = map;
+        } else {
+	    *hp++ = value;
+	    vs++;
+	    if (++i < n)
+	       sys_memcpy(hp, vs, (n - i)*sizeof(Eterm));
+	    *res = make_flatmap(shp);
+        }
 	return 1;
     }
 
@@ -1767,9 +1758,7 @@ Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
 	if (is_immed(key)) {
 	    for( i = 0; i < n; i ++) {
 		if (ks[i] == key) {
-		    *hp++ = value;
-		    vs++;
-		    c = 1;
+                    goto found_key;
 		} else {
 		    *hp++ = *vs++;
 		}
@@ -1777,18 +1766,13 @@ Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
 	} else {
 	    for( i = 0; i < n; i ++) {
 		if (EQ(ks[i], key)) {
-		    *hp++ = value;
-		    vs++;
-		    c = 1;
+		    goto found_key;
 		} else {
 		    *hp++ = *vs++;
 		}
 	    }
 	}
 
-	if (c)
-	    return res;
-
 	/* the map will grow */
 
 	if (n >= MAP_SMALL_MAP_LIMIT) {
@@ -1843,6 +1827,18 @@ Eterm erts_maps_put(Process *p, Eterm key, Eterm value, Eterm map) {
 	 */
 	*shp = make_pos_bignum_header(0);
 	return res;
+
+found_key:
+        if(*vs == value) {
+            HRelease(p, shp + MAP_HEADER_FLATMAP_SZ + n, shp);
+            return map;
+        } else {
+            *hp++ = value;
+            vs++;
+            if (++i < n)
+               sys_memcpy(hp, vs, (n - i)*sizeof(Eterm));
+            return res;
+        }
     }
     ASSERT(is_hashmap(map));
 
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index ee6e6085b6..7339aa8874 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -1040,7 +1040,7 @@ ERL_NIF_TERM enif_make_copy(ErlNifEnv* dst_env, ERL_NIF_TERM src_term)
     Eterm* hp;
     /*
      * No preserved sharing allowed as long as literals are also preserved.
-     * Process independent environment can not be reached by purge.
+     * Process independent environment cannot be reached by purge.
      */
     sz = size_object(src_term);
     hp = alloc_heap(dst_env, sz);
diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h
index 4c09496ef1..58a217c20b 100644
--- a/erts/emulator/beam/erl_nif.h
+++ b/erts/emulator/beam/erl_nif.h
@@ -54,10 +54,16 @@
 ** 2.13: 20.1 add enif_ioq
 ** 2.14: 21.0 add enif_ioq_peek_head, enif_(mutex|cond|rwlock|thread)_name
 **                enif_vfprintf, enif_vsnprintf, enif_make_map_from_arrays
+** 2.15: 22.0 ERL_NIF_SELECT_CANCEL
 */
 #define ERL_NIF_MAJOR_VERSION 2
-#define ERL_NIF_MINOR_VERSION 14
-#define ERL_NIF_MIN_ERTS_VERSION "erts-10.0 (OTP-21)"
+#define ERL_NIF_MINOR_VERSION 15
+/*
+ * WHEN CHANGING INTERFACE VERSION, also replace erts version below
+ * with ticket syntax like "erts-@OTP-12345@", or a temporary placeholder
+ * between two @ like "erts-@MyName@", if you don't know what a ticket is.
+ */
+#define ERL_NIF_MIN_ERTS_VERSION "erts-@OTP-15095@ (OTP-22)"
 
 /*
  * The emulator will refuse to load a nif-lib with a major version
@@ -160,6 +166,8 @@ typedef int ErlNifEvent;
 #define ERL_NIF_SELECT_STOP_SCHEDULED (1 << 1)
 #define ERL_NIF_SELECT_INVALID_EVENT  (1 << 2)
 #define ERL_NIF_SELECT_FAILED         (1 << 3)
+#define ERL_NIF_SELECT_READ_CANCELLED (1 << 4)
+#define ERL_NIF_SELECT_WRITE_CANCELLED (1 << 5)
 
 typedef enum
 {
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 2427d87f66..a24f4bc193 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -3348,7 +3348,12 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
         ErtsMonotonicTime current_time = 0;
 
         aux_work = erts_atomic32_read_acqb(&ssi->aux_work);
-        if (aux_work && !ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+
+        if (aux_work && ERTS_SCHEDULER_IS_DIRTY(esdp)) {
+            ERTS_INTERNAL_ERROR("Executing aux work on a dirty scheduler.");
+        }
+
+        if (aux_work) {
             if (!thr_prgr_active) {
                 erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
                 sched_wall_time_change(esdp, 1);
@@ -3360,16 +3365,14 @@ scheduler_wait(int *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
         }
 
         if (aux_work) {
-            if (!ERTS_SCHEDULER_IS_DIRTY(esdp)) {
-                flgs = erts_atomic32_read_acqb(&ssi->flags);
-                current_time = erts_get_monotonic_time(esdp);
-                if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) {
-                    if (!thr_prgr_active) {
-                        erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
-                        sched_wall_time_change(esdp, 1);
-                    }
-                    erts_bump_timers(esdp->timer_wheel, current_time);
+            flgs = erts_atomic32_read_acqb(&ssi->flags);
+            current_time = erts_get_monotonic_time(esdp);
+            if (current_time >= erts_next_timeout_time(esdp->next_tmo_ref)) {
+                if (!thr_prgr_active) {
+                    erts_thr_progress_active(erts_thr_prgr_data(esdp), thr_prgr_active = 1);
+                    sched_wall_time_change(esdp, 1);
                 }
+                erts_bump_timers(esdp->timer_wheel, current_time);
             }
         }
         else {
@@ -4103,9 +4106,7 @@ schedule_bound_processes(ErtsRunQueue *rq,
 static ERTS_INLINE void
 clear_proc_dirty_queue_bit(Process *p, ErtsRunQueue *rq, int prio_bit)
 {
-#ifdef DEBUG
     erts_aint32_t old;
-#endif
     erts_aint32_t qb = prio_bit;
     if (rq == ERTS_DIRTY_CPU_RUNQ)
 	qb <<= ERTS_PDSFLGS_IN_CPU_PRQ_MASK_OFFSET;
@@ -4113,13 +4114,8 @@ clear_proc_dirty_queue_bit(Process *p, ErtsRunQueue *rq, int prio_bit)
 	ASSERT(rq == ERTS_DIRTY_IO_RUNQ);
 	qb <<= ERTS_PDSFLGS_IN_IO_PRQ_MASK_OFFSET;
     }
-#ifdef DEBUG
-    old = (int)
-#else
-	(void)
-#endif
-	erts_atomic32_read_band_mb(&p->dirty_state, ~qb);
-    ASSERT(old & qb);
+    old = (int) erts_atomic32_read_band_mb(&p->dirty_state, ~qb);
+    ASSERT(old & qb); (void)old;
 }
 
 
@@ -7281,9 +7277,7 @@ msb_scheduler_type_switch(ErtsSchedType sched_type,
     Uint32 nrml_prio, dcpu_prio, dio_prio;
     ErtsSchedType exec_type;
     ErtsRunQueue *exec_rq;
-#ifdef DEBUG
     erts_aint32_t dbg_val;
-#endif
 
     ASSERT(schdlr_sspnd.msb.ongoing);
 
@@ -7398,16 +7392,12 @@ msb_scheduler_type_switch(ErtsSchedType sched_type,
      * Suspend this scheduler and wake up scheduler
      * number one of another type...
      */
-#ifdef DEBUG
     dbg_val =
-#else
-    (void)
-#endif
         erts_atomic32_read_bset_mb(&esdp->ssi->flags,
                                        (ERTS_SSI_FLG_SUSPENDED
                                         | ERTS_SSI_FLG_MSB_EXEC),
                                        ERTS_SSI_FLG_SUSPENDED);
-    ASSERT(dbg_val & ERTS_SSI_FLG_MSB_EXEC);
+    ASSERT(dbg_val & ERTS_SSI_FLG_MSB_EXEC); (void)dbg_val;
 
     switch (exec_type) {
     case ERTS_SCHED_NORMAL:
@@ -7425,11 +7415,7 @@ msb_scheduler_type_switch(ErtsSchedType sched_type,
         break;
     }
 
-#ifdef DEBUG
     dbg_val =
-#else
-    (void)
-#endif
         erts_atomic32_read_bset_mb(&exec_rq->scheduler->ssi->flags,
                                        (ERTS_SSI_FLG_SUSPENDED
                                         | ERTS_SSI_FLG_MSB_EXEC),
@@ -9000,11 +8986,8 @@ erts_suspend(Process* c_p, ErtsProcLocks c_p_locks, Port *busy_port)
 	suspend = 1;
 
     if (suspend) {
-#ifdef DEBUG
-	int res =
-#endif
-	    suspend_process(c_p, c_p);
-	ASSERT(res);
+	int res = suspend_process(c_p, c_p);
+	ASSERT(res); (void)res;
     }
 
     if (!(c_p_locks & ERTS_PROC_LOCK_STATUS))
@@ -12602,9 +12585,7 @@ erts_continue_exit_process(Process *p)
 
  yield:
 
-#ifdef DEBUG
     ASSERT(yield_allowed);
-#endif
 
     ERTS_LC_ASSERT(curr_locks == erts_proc_lc_my_proc_locks(p));
     ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN & curr_locks);
diff --git a/erts/emulator/beam/erl_process_dump.c b/erts/emulator/beam/erl_process_dump.c
index ac5054ea10..0286f6a0d2 100644
--- a/erts/emulator/beam/erl_process_dump.c
+++ b/erts/emulator/beam/erl_process_dump.c
@@ -998,13 +998,16 @@ dump_module_literals(fmtfn_t to, void *to_arg, ErtsLiteralArea* lit_area)
                     }
                     erts_putc(to, to_arg, '\n');
                 }
-            } else {
-                /* Dump everything else in the external format */
+            } else if (is_export_header(w) || is_fun_header(w)) {
                 dump_externally(to, to_arg, term);
                 erts_putc(to, to_arg, '\n');
             }
             size = 1 + header_arity(w);
             switch (w & _HEADER_SUBTAG_MASK) {
+            case FUN_SUBTAG:
+                ASSERT(((ErlFunThing*)(htop))->num_free == 0);
+                size += 1;
+                break;
             case MAP_SUBTAG:
                 if (is_flatmap_header(w)) {
                     size += 1 + flatmap_get_size(htop);
diff --git a/erts/emulator/beam/erl_unicode.c b/erts/emulator/beam/erl_unicode.c
index d225916ac5..1d6869a7cd 100644
--- a/erts/emulator/beam/erl_unicode.c
+++ b/erts/emulator/beam/erl_unicode.c
@@ -1358,11 +1358,9 @@ Uint erts_atom_to_string_length(Eterm atom)
     else {
         byte* err_pos;
         Uint num_chars;
-#ifdef DEBUG
         int ares =
-#endif
             erts_analyze_utf8(ap->name, ap->len, &err_pos, &num_chars, NULL);
-        ASSERT(ares == ERTS_UTF8_OK);
+        ASSERT(ares == ERTS_UTF8_OK); (void)ares;
 
         return num_chars;
     }
diff --git a/erts/emulator/beam/erl_vm.h b/erts/emulator/beam/erl_vm.h
index 4089fac48e..35eae18394 100644
--- a/erts/emulator/beam/erl_vm.h
+++ b/erts/emulator/beam/erl_vm.h
@@ -41,8 +41,8 @@
 #define MAX_REG 1024            /* Max number of x(N) registers used */
 
 /*
- * The new arithmetic operations need some extra X registers in the register array.
- * so does the gc_bif's (i_gc_bif3 need 3 extra).
+ * The new trapping length/1 implementation need 3 extra registers in the
+ * register array.
  */
 #define ERTS_X_REGS_ALLOCATED (MAX_REG+3)
 
@@ -146,6 +146,21 @@
       (HEAP_TOP(p) = HEAP_TOP(p) + (sz), HEAP_TOP(p) - (sz))))
 #endif
 
+/*
+ * Always allocate in a heap fragment, never on the heap.
+ */
+#if defined(VALGRIND)
+/* Running under valgrind, allocate exactly as much as needed.*/
+#  define HeapFragOnlyAlloc(p, sz)              \
+  (ASSERT((sz) >= 0),                           \
+   ErtsHAllocLockCheck(p),                      \
+   erts_heap_alloc((p),(sz),0))
+#else
+#  define HeapFragOnlyAlloc(p, sz)              \
+  (ASSERT((sz) >= 0),                           \
+   ErtsHAllocLockCheck(p),                      \
+   erts_heap_alloc((p),(sz),512))
+#endif
 
 /*
  * Description for each instruction (defined here because the name and
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index 0631404599..77b5a3ca05 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -292,7 +292,6 @@ union erl_off_heap_ptr {
 /* controls warning mapping in error_logger */
 
 extern Eterm node_cookie;
-extern Uint display_items;	/* no of items to display in traces etc */
 
 extern int erts_backtrace_depth;
 extern erts_atomic32_t erts_max_gen_gcs;
@@ -892,6 +891,11 @@ void erts_init_bif(void);
 Eterm erl_send(Process *p, Eterm to, Eterm msg);
 int erts_set_group_leader(Process *proc, Eterm new_gl);
 
+/* erl_bif_guard.c */
+
+void erts_init_bif_guard(void);
+Eterm erts_trapping_length_1(Process* p, Eterm* args);
+
 /* erl_bif_op.c */
 
 Eterm erl_is_function(Process* p, Eterm arg1, Eterm arg2);
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 42c1168f85..df60e889f3 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -238,6 +238,7 @@ HANDLE_APPLY_FUN_ERROR() {
 }
 
 DISPATCH_FUN(I) {
+    //| -no_next
     SET_I($I);
     Dispatchfun();
 }
@@ -299,6 +300,7 @@ i_call_fun_last(Fun, Deallocate) {
 }
 
 return() {
+    //| -no_next
     SET_I(c_p->cp);
     DTRACE_RETURN_FROM_PC(c_p);
 
@@ -559,17 +561,19 @@ update_list(Hd, Dst) {
     HTOP += 2;
 }
 
-i_put_tuple := i_put_tuple.make.fill;
-
-i_put_tuple.make(Dst) {
-    $Dst = make_tuple(HTOP);
-}
-
-i_put_tuple.fill(Arity) {
+put_tuple2(Dst, Arity) {
     Eterm* hp = HTOP;
     Eterm arity = $Arity;
 
+    /*
+     * If operands are not packed (in the 32-bit VM),
+     * is is not safe to use $Dst directly after I
+     * has been updated.
+     */
+    Eterm* dst_ptr = &($Dst);
+
     //| -no_next
+    ASSERT(arity != 0);
     *hp++ = make_arityval(arity);
     I = $NEXT_INSTRUCTION;
     do {
@@ -586,6 +590,7 @@ i_put_tuple.fill(Arity) {
             break;
         }
     } while (--arity != 0);
+    *dst_ptr = make_tuple(HTOP);
     HTOP = hp;
     ASSERT(VALID_INSTR(* (Eterm *)I));
     Goto(*I);
@@ -948,6 +953,7 @@ build_stacktrace() {
 }
 
 raw_raise() {
+    //| -no_prefetch
     Eterm class = x(0);
     Eterm value = x(1);
     Eterm stacktrace = x(2);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index e76d896ffc..cb414143fc 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -483,9 +483,16 @@ is_eq f? s s
 is_ne f? s s
 
 #
-# Putting things.
+# Putting tuples.
+#
+# Code compiled with OTP 22 and later uses put_tuple2 to
+# to construct a tuple.
+#
+# Code compiled before OTP 22 uses put_tuple + one put instruction
+# per element. Translate to put_tuple2.
 #
 
+i_put_tuple/2
 put_tuple Arity Dst => i_put_tuple Dst u
 
 i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \
@@ -495,11 +502,13 @@ i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \
 i_put_tuple Dst Arity Puts=* | put S => \
 	    tuple_append_put(Arity, Dst, Puts, S)
 
-i_put_tuple/2
+i_put_tuple Dst Arity Puts=* => put_tuple2 Dst Arity Puts
 
-i_put_tuple xy I
+put_tuple2 xy I
 
 #
+# Putting lists.
+#
 # The instruction "put_list Const [] Dst" were generated in rare
 # circumstances up to and including OTP 18. Starting with OTP 19,
 # AFAIK, it should never be generated.
@@ -993,10 +1002,11 @@ bif1 Fail Bif=u$bif:erlang:get/1 Src=s Dst=d => gen_get(Src, Dst)
 
 bif2 Jump=j u$bif:erlang:element/2 S1=s S2=xy Dst=d => gen_element(Jump, S1, S2, Dst)
 
-bif1 p Bif S1 Dst => bif1_body Bif S1 Dst
+bif1 p Bif S1 Dst         => i_bif1_body Bif S1 Dst
+bif1 Fail=f Bif S1 Dst    => i_bif1 Fail Bif S1 Dst
 
-bif2 p Bif S1 S2 Dst => i_bif2_body Bif S1 S2 Dst
-bif2 Fail Bif S1 S2 Dst => i_bif2 Fail Bif S1 S2 Dst
+bif2 p Bif S1 S2 Dst      => i_bif2_body Bif S1 S2 Dst
+bif2 Fail=f Bif S1 S2 Dst => i_bif2 Fail Bif S1 S2 Dst
 
 i_get_hash c I d
 i_get s d
@@ -1014,10 +1024,12 @@ i_fast_element xy j? I d
 
 i_element xy j? s d
 
-bif1 f? b s d
-bif1_body b s d
+i_bif1 f? b s d
+i_bif1_body b s d
 i_bif2 f? b s s d
 i_bif2_body b s s d
+i_bif3 f? b s s s d
+i_bif3_body b s s s d
 
 #
 # Internal calls.
@@ -1080,67 +1092,73 @@ func_info M F A => i_func_info u M F A
 %warm
 bs_start_match2 Fail=f ica X Y D => jump Fail
 bs_start_match2 Fail Bin X Y D => i_bs_start_match2 Bin Fail X Y D
-i_bs_start_match2 xy f t t x
+i_bs_start_match2 xy f t t d
 
 bs_save2 Reg Index => gen_bs_save(Reg, Index)
-i_bs_save2 x t
+i_bs_save2 xy t
 
 bs_restore2 Reg Index => gen_bs_restore(Reg, Index)
-i_bs_restore2 x t
+i_bs_restore2 xy t
 
 # Matching integers
 bs_match_string Fail Ms Bits Val => i_bs_match_string Ms Fail Bits Val
 
-i_bs_match_string x f W W
+i_bs_match_string xy f W W
 
 # Fetching integers from binaries.
-bs_get_integer2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_integer2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
 			gen_get_integer2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
 
-i_bs_get_integer_small_imm x W f? t x
-i_bs_get_integer_imm x W t f? t x
-i_bs_get_integer f? t t x s x
-i_bs_get_integer_8 x f? x
-i_bs_get_integer_16 x f? x
+i_bs_get_integer_small_imm Ms Bits Fail Flags Y=y => \
+   i_bs_get_integer_small_imm Ms Bits Fail Flags x | move x Y
+
+i_bs_get_integer_imm Ms Bits Live Fail Flags Y=y => \
+   i_bs_get_integer_imm Ms Bits Live Fail Flags x | move x Y
+
+i_bs_get_integer_small_imm xy W f? t x
+i_bs_get_integer_imm xy W t f? t x
+i_bs_get_integer f? t t xy s d
+i_bs_get_integer_8 xy f? d
+i_bs_get_integer_16 xy f? d
 
 %if ARCH_64
-i_bs_get_integer_32 x f? x
+i_bs_get_integer_32 xy f? d
 %endif
 
 # Fetching binaries from binaries.
-bs_get_binary2 Fail=f Ms=x Live=u Sz=sq Unit=u Flags=u Dst=d => \
+bs_get_binary2 Fail=f Ms=xy Live=u Sz=sq Unit=u Flags=u Dst=d => \
 			gen_get_binary2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
 
-i_bs_get_binary_imm2 f? x t W t x
-i_bs_get_binary2 f x t? s t x
-i_bs_get_binary_all2 f? x t t x
-i_bs_get_binary_all_reuse x f? t
+i_bs_get_binary_imm2 f? xy t W t d
+i_bs_get_binary2 f xy t? s t d
+i_bs_get_binary_all2 f? xy t t d
+i_bs_get_binary_all_reuse xy f? t
 
 # Fetching float from binaries.
-bs_get_float2 Fail=f Ms=x Live=u Sz=s Unit=u Flags=u Dst=d => \
+bs_get_float2 Fail=f Ms=xy Live=u Sz=s Unit=u Flags=u Dst=d => \
 		gen_get_float2(Fail, Ms, Live, Sz, Unit, Flags, Dst)
 
 bs_get_float2 Fail=f Ms=x Live=u Sz=q Unit=u Flags=u Dst=d => jump Fail
 
-i_bs_get_float2 f? x t s t x
+i_bs_get_float2 f? xy t s t d
 
 # Miscellanous
 
-bs_skip_bits2 Fail=f Ms=x Sz=sq Unit=u Flags=u => \
+bs_skip_bits2 Fail=f Ms=xy Sz=sq Unit=u Flags=u => \
 			gen_skip_bits2(Fail, Ms, Sz, Unit, Flags)
 
-i_bs_skip_bits_imm2 f? x W
-i_bs_skip_bits2 f? x xy t
-i_bs_skip_bits_all2 f? x t
+i_bs_skip_bits_imm2 f? xy W
+i_bs_skip_bits2 f? xy xy t
+i_bs_skip_bits_all2 f? xy t
 
-bs_test_tail2 Fail=f Ms=x Bits=u==0 => bs_test_zero_tail2 Fail Ms
-bs_test_tail2 Fail=f Ms=x Bits=u => bs_test_tail_imm2 Fail Ms Bits
-bs_test_zero_tail2 f? x
-bs_test_tail_imm2 f? x W
+bs_test_tail2 Fail=f Ms=xy Bits=u==0 => bs_test_zero_tail2 Fail Ms
+bs_test_tail2 Fail=f Ms=xy Bits=u => bs_test_tail_imm2 Fail Ms Bits
+bs_test_zero_tail2 f? xy
+bs_test_tail_imm2 f? xy W
 
 bs_test_unit F Ms Unit=u==8 => bs_test_unit8 F Ms
-bs_test_unit f? x t
-bs_test_unit8 f? x
+bs_test_unit f? xy t
+bs_test_unit8 f? xy
 
 # An y register operand for bs_context_to_binary is rare,
 # but can happen because of inlining.
@@ -1152,23 +1170,55 @@ bs_context_to_binary Y=y => move Y x | bs_context_to_binary x
 
 bs_context_to_binary x
 
+# Gets a bitstring from the tail of a context.
+bs_get_tail xy d t
+
+# New bs_start_match variant for contexts with external position storage.
+#
+# bs_get/set_position is used to save positions into registers instead of
+# "slots" in the context itself, which lets us continue matching even after
+# we've passed it off to another function.
+
+%if ARCH_64
+bs_start_match3 Fail Bin Live Ctx | bs_get_position Ctx Pos=x Ignored => \
+    i_bs_start_match3_gp Bin Live Fail Ctx Pos
+i_bs_start_match3_gp xy t f d x
+%endif
+
+bs_start_match3 Fail=f ica Live Dst => jump Fail
+bs_start_match3 Fail Bin Live Dst => i_bs_start_match3 Bin Live Fail Dst
+
+i_bs_start_match3 xy t f d
+
+# Match context position instructions. 64-bit assumes that all positions can
+# fit into an unsigned small.
+
+%if ARCH_64
+    bs_get_position Src Dst Live => i_bs_get_position Src Dst
+    i_bs_get_position xy xy
+    bs_set_position xy xy
+%else
+    bs_get_position xy d t?
+    bs_set_position xy xy
+%endif
+
 #
 # Utf8/utf16/utf32 support. (R12B-5)
 #
-bs_get_utf8 Fail=f Ms=x u u Dst=d => i_bs_get_utf8 Ms Fail Dst
-i_bs_get_utf8 x f? x
+bs_get_utf8 Fail=f Ms=xy u u Dst=d => i_bs_get_utf8 Ms Fail Dst
+i_bs_get_utf8 xy f? d
 
-bs_skip_utf8 Fail=f Ms=x u u => i_bs_get_utf8 Ms Fail x
+bs_skip_utf8 Fail=f Ms=xy u u => i_bs_get_utf8 Ms Fail x
 
-bs_get_utf16 Fail=f Ms=x u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
-bs_skip_utf16 Fail=f Ms=x u Flags=u => i_bs_get_utf16 Ms Fail Flags x
+bs_get_utf16 Fail=f Ms=xy u Flags=u Dst=d => i_bs_get_utf16 Ms Fail Flags Dst
+bs_skip_utf16 Fail=f Ms=xy u Flags=u => i_bs_get_utf16 Ms Fail Flags x
 
-i_bs_get_utf16 x f? t x
+i_bs_get_utf16 xy f? t d
 
-bs_get_utf32 Fail=f Ms=x Live=u Flags=u Dst=d => \
+bs_get_utf32 Fail=f Ms=xy Live=u Flags=u Dst=d => \
 	bs_get_integer2 Fail Ms Live i=32 u=1 Flags Dst | \
 	i_bs_validate_unicode_retract Fail Dst Ms
-bs_skip_utf32 Fail=f Ms=x Live=u Flags=u => \
+bs_skip_utf32 Fail=f Ms=xy Live=u Flags=u => \
 	bs_get_integer2 Fail Ms Live i=32 u=1 Flags x | \
 	i_bs_validate_unicode_retract Fail x Ms
 
@@ -1182,6 +1232,9 @@ i_bs_validate_unicode_retract j s S
 
 bs_init2 Fail Sz Words Regs Flags Dst | binary_too_big(Sz) => system_limit Fail
 
+bs_init2 Fail Sz Words Regs Flags Dst=y => \
+   bs_init2 Fail Sz Words Regs Flags x | move x Dst
+
 bs_init2 Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init Sz Regs Dst
 
 bs_init2 Fail Sz=u Words Regs Flags Dst => \
@@ -1202,6 +1255,8 @@ i_bs_init_heap W I t? x
 
 
 bs_init_bits Fail Sz=o Words Regs Flags Dst => system_limit Fail
+bs_init_bits Fail Sz Words Regs Flags Dst=y => \
+   bs_init_bits Fail Sz Words Regs Flags x | move x Dst
 
 bs_init_bits Fail Sz=u Words=u==0 Regs Flags Dst => i_bs_init_bits Sz Regs Dst
 bs_init_bits Fail Sz=u Words Regs Flags Dst =>  i_bs_init_bits_heap Sz Words Regs Dst
@@ -1230,7 +1285,7 @@ bs_private_append Fail Size Unit Bin Flags Dst => \
 
 bs_init_writable
 
-i_bs_append j? I t? t s x
+i_bs_append j? I t? t s xy
 i_bs_private_append j? t s S x
 
 #
@@ -1447,80 +1502,80 @@ gc_bif2 Fail Live u$bif:erlang:sminus/2 S1 S2 Dst => \
 
 #
 # Optimize addition and subtraction of small literals using
-# the i_increment/4 instruction (in bodies, not in guards).
+# the i_increment/3 instruction (in bodies, not in guards).
 #
 
 gen_plus p Live Int=i Reg=d Dst => \
-	gen_increment(Reg, Int, Live, Dst)
+	gen_increment(Reg, Int, Dst)
 gen_plus p Live Reg=d Int=i Dst => \
-	gen_increment(Reg, Int, Live, Dst)
+	gen_increment(Reg, Int, Dst)
 
 gen_minus p Live Reg=d Int=i Dst | negation_is_small(Int) => \
-	gen_increment_from_minus(Reg, Int, Live, Dst)
+	gen_increment_from_minus(Reg, Int, Dst)
 
 #
-# GCing arithmetic instructions.
+# Arithmetic instructions.
 #
 
-gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Live Dst
+gen_plus Fail Live S1 S2 Dst => i_plus S1 S2 Fail Dst
 
-gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Live Dst
+gen_minus Fail Live S1 S2 Dst => i_minus S1 S2 Fail Dst
 
 gc_bif2 Fail Live u$bif:erlang:stimes/2 S1 S2 Dst => \
-  i_times Fail Live S1 S2 Dst
+  i_times Fail S1 S2 Dst
 
 gc_bif2 Fail Live u$bif:erlang:div/2 S1 S2 Dst => \
-  i_m_div Fail Live S1 S2 Dst
+  i_m_div Fail S1 S2 Dst
 gc_bif2 Fail Live u$bif:erlang:intdiv/2 S1 S2 Dst => \
-  i_int_div Fail Live S1 S2 Dst
+  i_int_div Fail S1 S2 Dst
 
 gc_bif2 Fail Live u$bif:erlang:rem/2 S1 S2 Dst => \
-  i_rem S1 S2 Fail Live Dst
+  i_rem S1 S2 Fail Dst
 
 gc_bif2 Fail Live u$bif:erlang:bsl/2 S1 S2 Dst => \
-  i_bsl S1 S2 Fail Live Dst
+  i_bsl S1 S2 Fail Dst
 gc_bif2 Fail Live u$bif:erlang:bsr/2 S1 S2 Dst => \
-  i_bsr S1 S2 Fail Live Dst
+  i_bsr S1 S2 Fail Dst
 
 gc_bif2 Fail Live u$bif:erlang:band/2 S1 S2 Dst => \
-  i_band S1 S2 Fail Live Dst
+  i_band S1 S2 Fail Dst
 
 gc_bif2 Fail Live u$bif:erlang:bor/2 S1 S2 Dst => \
-  i_bor Fail Live S1 S2 Dst
+  i_bor Fail S1 S2 Dst
 
 gc_bif2 Fail Live u$bif:erlang:bxor/2 S1 S2 Dst => \
-  i_bxor Fail Live S1 S2 Dst
+  i_bxor Fail S1 S2 Dst
 
-gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst
+gc_bif1 Fail Live u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src Dst
 
-i_increment rxy W t d
+i_increment rxy W d
 
-i_plus x xy j? t d
-i_plus s s  j? t d
+i_plus x xy j? d
+i_plus s s  j? d
 
-i_minus x x j? t d
-i_minus s s j? t d
+i_minus x x j? d
+i_minus s s j? d
 
-i_times j? t s s d
+i_times j? s s d
 
-i_m_div j? t s s d
-i_int_div j? t s s d
+i_m_div j? s s d
+i_int_div j? s s d
 
-i_rem x x j? t d
-i_rem s s j? t d
+i_rem x x j? d
+i_rem s s j? d
 
-i_bsl s s j? t d
-i_bsr s s j? t d
+i_bsl s s j? d
+i_bsr s s j? d
 
-i_band x c j? t d
-i_band s s j? t d
+i_band x c j? d
+i_band s s j? d
 
-i_bor j? I s s d
-i_bxor j? I s s d
+i_bor j? s s d
+i_bxor j? s s d
 
-i_int_bnot Fail Src=c Live Dst => move Src x | i_int_bnot Fail x Live Dst
+i_int_bnot Fail Src=c Dst => move Src x | i_int_bnot Fail x Dst
 
-i_int_bnot j? S t d
+i_int_bnot j? S d
 
 #
 # Old guard BIFs that creates heap fragments are no longer allowed.
@@ -1533,29 +1588,27 @@ bif1 Fail u$bif:erlang:round/1 s d => too_old_compiler
 bif1 Fail u$bif:erlang:trunc/1 s d => too_old_compiler
 
 #
-# Guard BIFs.
+# Handle the length/1 guard BIF specially to make it trappable.
 #
-gc_bif1 Fail I Bif Src Dst => \
-	gen_guard_bif1(Fail, I, Bif, Src, Dst)
-
-gc_bif2 Fail I Bif S1 S2 Dst => \
-	gen_guard_bif2(Fail, I, Bif, S1, S2, Dst)
 
-gc_bif3 Fail I Bif S1 S2 S3 Dst => \
-	gen_guard_bif3(Fail, I, Bif, S1, S2, S3, Dst)
+gc_bif1 Fail=j Live u$bif:erlang:length/1 Src Dst => \
+   i_length_setup Live Src | i_length Fail Live Dst
 
-i_gc_bif1 j? W s t? d
+i_length_setup t xyc
 
-i_gc_bif2 j? W t? s s d
+i_length j? t d
 
-ii_gc_bif3/7
+#
+# Guard BIFs.
+#
+gc_bif1 p Live Bif Src Dst           => i_bif1_body Bif Src Dst
+gc_bif1 Fail=f Live Bif Src Dst      => i_bif1 Fail Bif Src Dst
 
-# A specific instruction can only have 6 operands, so we must
-# pass one of the arguments in an x register.
-ii_gc_bif3 Fail Bif Live S1 S2 S3 Dst => \
-  move S1 x | i_gc_bif3 Fail Bif Live S2 S3 Dst
+gc_bif2 p Live Bif S1 S2 Dst         => i_bif2_body Bif S1 S2 Dst
+gc_bif2 Fail=f Live Bif S1 S2 Dst    => i_bif2 Fail Bif S1 S2 Dst
 
-i_gc_bif3 j? W t? s s d
+gc_bif3 p Live Bif S1 S2 S3 Dst      => i_bif3_body Bif S1 S2 S3 Dst
+gc_bif3 Fail=f Live Bif S1 S2 S3 Dst => i_bif3 Fail Bif S1 S2 S3 Dst
 
 #
 # The following instruction is specially handled in beam_load.c
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index d81bd89a48..f70f5e73d4 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -1569,7 +1569,7 @@ make_hash2(Eterm term)
  * MUST BE USED AS INPUT FOR THE HASH. Two different terms must always have a
  * chance of hashing different when salted: hash([Salt|A]) vs hash([Salt|B]).
  *
- * This is why we can not use cached hash values for atoms for example.
+ * This is why we cannot use cached hash values for atoms for example.
  *
  */
 
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 47eb5df7dd..c75b4045f7 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -4568,7 +4568,7 @@ static void desc_close_read(inet_descriptor* desc)
 {
     if (desc->s != INVALID_SOCKET) {
 #ifdef __WIN32__
-	/* This call can not be right???
+	/* This call cannot be right???
 	 * We want to turn off read events but keep any write events.
 	 * But on windows driver_select(...,READ,1) is only used as a
 	 * way to hook into the pollset. sock_select is used to control
diff --git a/erts/emulator/drivers/unix/ttsl_drv.c b/erts/emulator/drivers/unix/ttsl_drv.c
index 28c6cc0f94..11bb4373d8 100644
--- a/erts/emulator/drivers/unix/ttsl_drv.c
+++ b/erts/emulator/drivers/unix/ttsl_drv.c
@@ -31,7 +31,7 @@
 static int ttysl_init(void);
 static ErlDrvData ttysl_start(ErlDrvPort, char*);
 
-#ifdef HAVE_TERMCAP  /* else make an empty driver that can not be opened */
+#ifdef HAVE_TERMCAP  /* else make an empty driver that cannot be opened */
 
 #ifndef WANT_NONBLOCKING
 #define WANT_NONBLOCKING
diff --git a/erts/emulator/internal_doc/CarrierMigration.md b/erts/emulator/internal_doc/CarrierMigration.md
index 3a796d11b7..bb3d8aac28 100644
--- a/erts/emulator/internal_doc/CarrierMigration.md
+++ b/erts/emulator/internal_doc/CarrierMigration.md
@@ -34,8 +34,7 @@ Solution
 --------
 
 In order to prevent scenarios like this we've implemented support for
-migration of multi-block carriers between allocator instances of the
-same type.
+migration of multi-block carriers between allocator instances.
 
 ### Management of Free Blocks ###
 
@@ -130,10 +129,6 @@ threads may have references to it via the pool.
 
 ### Migration ###
 
-There exists one pool for each allocator type enabling migration of
-carriers between scheduler specific allocator instances of the same
-allocator type.
-
 Each allocator instance keeps track of the current utilization of its
 multi-block carriers. When the total utilization falls below the "abandon
 carrier utilization limit" it starts to inspect the utilization of the
@@ -208,8 +203,8 @@ limited. We only inspect a limited number of carriers. If none of
 those carriers had a free block large enough to satisfy the allocation
 request, the search will fail. A carrier in the pool can also be BUSY
 if another thread is currently doing block deallocation work on the
-carrier. A BUSY carrier will also be skipped by the search as it can
-not satisfy the request. The pool is lock-free and we do not want to
+carrier. A BUSY carrier will also be skipped by the search as it cannot
+satisfy the request. The pool is lock-free and we do not want to
 block, waiting for the other thread to finish.
 
 ### The bad cluster problem ###
@@ -287,11 +282,3 @@ reduced using the `aoffcbf` strategy. A trade off between memory
 consumption and performance is however inevitable, and it is up to
 the user to decide what is most important. 
 
-Further work
-------------
-
-It would be quite easy to extend this to allow migration of multi-block
-carriers between all allocator types. More or less the only obstacle
-is maintenance of the statistics information.
-
-
diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c
index 5fff55b467..eed2ff8e1b 100644
--- a/erts/emulator/nifs/common/prim_file_nif.c
+++ b/erts/emulator/nifs/common/prim_file_nif.c
@@ -478,7 +478,8 @@ static ERL_NIF_TERM open_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]
     ERL_NIF_TERM result;
     efile_path_t path;
 
-    if(argc != 2 || !enif_is_list(env, argv[1])) {
+    ASSERT(argc == 2);
+    if(!enif_is_list(env, argv[1])) {
         return enif_make_badarg(env);
     }
 
@@ -553,7 +554,8 @@ static ERL_NIF_TERM read_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con
     SysIOVec read_vec[1];
     ErlNifBinary result;
 
-    if(argc != 1 || !enif_is_number(env, argv[0])) {
+    ASSERT(argc == 1);
+    if(!enif_is_number(env, argv[0])) {
         return enif_make_badarg(env);
     }
 
@@ -591,7 +593,8 @@ static ERL_NIF_TERM write_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, co
     Sint64 bytes_written;
     ERL_NIF_TERM tail;
 
-    if(argc != 1 || !enif_inspect_iovec(env, 64, argv[0], &tail, &input)) {
+    ASSERT(argc == 1);
+    if(!enif_inspect_iovec(env, 64, argv[0], &tail, &input)) {
         return enif_make_badarg(env);
     }
 
@@ -614,8 +617,8 @@ static ERL_NIF_TERM pread_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, co
     SysIOVec read_vec[1];
     ErlNifBinary result;
 
-    if(argc != 2 || !enif_is_number(env, argv[0])
-                 || !enif_is_number(env, argv[1])) {
+    ASSERT(argc == 2);
+    if(!enif_is_number(env, argv[0]) || !enif_is_number(env, argv[1])) {
         return enif_make_badarg(env);
     }
 
@@ -654,8 +657,9 @@ static ERL_NIF_TERM pwrite_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, c
     Sint64 bytes_written, offset;
     ERL_NIF_TERM tail;
 
-    if(argc != 2 || !enif_is_number(env, argv[0])
-                 || !enif_inspect_iovec(env, 64, argv[1], &tail, &input)) {
+    ASSERT(argc == 2);
+    if(!enif_is_number(env, argv[0])
+       || !enif_inspect_iovec(env, 64, argv[1], &tail, &input)) {
         return enif_make_badarg(env);
     }
 
@@ -682,7 +686,8 @@ static ERL_NIF_TERM seek_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con
     Sint64 new_position, offset;
     enum efile_seek_t seek;
 
-    if(argc != 2 || !enif_get_int64(env, argv[1], &offset)) {
+    ASSERT(argc == 2);
+    if(!enif_get_int64(env, argv[1], &offset)) {
         return enif_make_badarg(env);
     }
 
@@ -706,7 +711,8 @@ static ERL_NIF_TERM seek_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con
 static ERL_NIF_TERM sync_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     int data_only;
 
-    if(argc != 1 || !enif_get_int(env, argv[0], &data_only)) {
+    ASSERT(argc == 1);
+    if(!enif_get_int(env, argv[0], &data_only)) {
         return enif_make_badarg(env);
     }
 
@@ -718,9 +724,7 @@ static ERL_NIF_TERM sync_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, con
 }
 
 static ERL_NIF_TERM truncate_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    if(argc != 0) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 0);
 
     if(!efile_truncate(d)) {
         return posix_error_to_tuple(env, d->posix_errno);
@@ -732,8 +736,8 @@ static ERL_NIF_TERM truncate_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc,
 static ERL_NIF_TERM allocate_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     Sint64 offset, length;
 
-    if(argc != 2 || !enif_is_number(env, argv[0])
-                 || !enif_is_number(env, argv[1])) {
+    ASSERT(argc == 2);
+    if(!enif_is_number(env, argv[0]) || !enif_is_number(env, argv[1])) {
         return enif_make_badarg(env);
     }
 
@@ -754,8 +758,8 @@ static ERL_NIF_TERM advise_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, c
     enum efile_advise_t advise;
     Sint64 offset, length;
 
-    if(argc != 3 || !enif_is_number(env, argv[0])
-                 || !enif_is_number(env, argv[1])) {
+    ASSERT(argc == 3);
+    if(!enif_is_number(env, argv[0]) || !enif_is_number(env, argv[1])) {
         return enif_make_badarg(env);
     }
 
@@ -818,8 +822,8 @@ static ERL_NIF_TERM ipread_s32bu_p32bu_nif_impl(efile_data_t *d, ErlNifEnv *env,
 
     ErlNifBinary payload;
 
-    if(argc != 2 || !enif_is_number(env, argv[0])
-                 || !enif_is_number(env, argv[1])) {
+    ASSERT(argc == 2);
+    if(!enif_is_number(env, argv[0]) || !enif_is_number(env, argv[1])) {
         return enif_make_badarg(env);
     }
 
@@ -886,9 +890,7 @@ static ERL_NIF_TERM ipread_s32bu_p32bu_nif_impl(efile_data_t *d, ErlNifEnv *env,
 }
 
 static ERL_NIF_TERM get_handle_nif_impl(efile_data_t *d, ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    if(argc != 0) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 0);
 
     return efile_get_handle(env, d);
 }
@@ -900,7 +902,8 @@ static ERL_NIF_TERM read_info_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
     efile_path_t path;
     int follow_links;
 
-    if(argc != 2 || !enif_get_int(env, argv[1], &follow_links)) {
+    ASSERT(argc == 2);
+    if(!enif_get_int(env, argv[1], &follow_links)) {
         return enif_make_badarg(env);
     }
 
@@ -935,7 +938,8 @@ static ERL_NIF_TERM set_permissions_nif(ErlNifEnv *env, int argc, const ERL_NIF_
     efile_path_t path;
     unsigned int permissions;
 
-    if(argc != 2 || !enif_get_uint(env, argv[1], &permissions)) {
+    ASSERT(argc == 2);
+    if(!enif_get_uint(env, argv[1], &permissions)) {
         return enif_make_badarg(env);
     }
 
@@ -954,8 +958,8 @@ static ERL_NIF_TERM set_owner_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
     efile_path_t path;
     int uid, gid;
 
-    if(argc != 3 || !enif_get_int(env, argv[1], &uid)
-                 || !enif_get_int(env, argv[2], &gid)) {
+    ASSERT(argc == 3);
+    if(!enif_get_int(env, argv[1], &uid) || !enif_get_int(env, argv[2], &gid)) {
         return enif_make_badarg(env);
     }
 
@@ -974,9 +978,10 @@ static ERL_NIF_TERM set_time_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     Sint64 accessed, modified, created;
     efile_path_t path;
 
-    if(argc != 4 || !enif_get_int64(env, argv[1], &accessed)
-                 || !enif_get_int64(env, argv[2], &modified)
-                 || !enif_get_int64(env, argv[3], &created)) {
+    ASSERT(argc == 4);
+    if(!enif_get_int64(env, argv[1], &accessed)
+       || !enif_get_int64(env, argv[2], &modified)
+       || !enif_get_int64(env, argv[3], &created)) {
         return enif_make_badarg(env);
     }
 
@@ -995,9 +1000,7 @@ static ERL_NIF_TERM read_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
     efile_path_t path;
     ERL_NIF_TERM result;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1014,9 +1017,7 @@ static ERL_NIF_TERM list_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     efile_path_t path;
     ERL_NIF_TERM result;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1032,9 +1033,7 @@ static ERL_NIF_TERM rename_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv
 
     efile_path_t existing_path, new_path;
 
-    if(argc != 2) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 2);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1052,9 +1051,7 @@ static ERL_NIF_TERM make_hard_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_T
 
     efile_path_t existing_path, new_path;
 
-    if(argc != 2) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 2);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1072,9 +1069,7 @@ static ERL_NIF_TERM make_soft_link_nif(ErlNifEnv *env, int argc, const ERL_NIF_T
 
     efile_path_t existing_path, new_path;
 
-    if(argc != 2) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 2);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &existing_path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1092,9 +1087,7 @@ static ERL_NIF_TERM make_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
 
     efile_path_t path;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1110,9 +1103,7 @@ static ERL_NIF_TERM del_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
 
     efile_path_t path;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1128,9 +1119,7 @@ static ERL_NIF_TERM del_dir_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
 
     efile_path_t path;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1147,7 +1136,8 @@ static ERL_NIF_TERM get_device_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_T
     ERL_NIF_TERM result;
     int device_index;
 
-    if(argc != 1 || !enif_get_int(env, argv[0], &device_index)) {
+    ASSERT(argc == 1);
+    if(!enif_get_int(env, argv[0], &device_index)) {
         return enif_make_badarg(env);
     }
 
@@ -1162,9 +1152,7 @@ static ERL_NIF_TERM get_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     posix_errno_t posix_errno;
     ERL_NIF_TERM result;
 
-    if(argc != 0) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 0);
 
     if((posix_errno = efile_get_cwd(env, &result))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1178,9 +1166,7 @@ static ERL_NIF_TERM set_cwd_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
 
     efile_path_t path;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1256,9 +1242,7 @@ static ERL_NIF_TERM read_file_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM a
 
     ErlNifBinary result;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
@@ -1286,9 +1270,7 @@ static ERL_NIF_TERM altname_nif(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     efile_path_t path;
     ERL_NIF_TERM result;
 
-    if(argc != 1) {
-        return enif_make_badarg(env);
-    }
+    ASSERT(argc == 1);
 
     if((posix_errno = efile_marshal_path(env, argv[0], &path))) {
         return posix_error_to_tuple(env, posix_errno);
diff --git a/erts/emulator/sys/common/erl_check_io.c b/erts/emulator/sys/common/erl_check_io.c
index c681fa481f..b4609007c9 100644
--- a/erts/emulator/sys/common/erl_check_io.c
+++ b/erts/emulator/sys/common/erl_check_io.c
@@ -993,7 +993,7 @@ enif_select(ErlNifEnv* env,
         ctl_op = ERTS_POLL_OP_DEL;
     }
     else {
-        on = 1;
+        on = !(mode & ERL_NIF_SELECT_CANCEL);
         ASSERT(mode);
         if (mode & ERL_DRV_READ) {
             ctl_events |= ERTS_POLL_EV_IN;
@@ -1119,38 +1119,51 @@ enif_select(ErlNifEnv* env,
         ret = 0;
     }
     else { /* off */
+        ret = 0;
         if (state->type == ERTS_EV_TYPE_NIF) {
-            state->driver.nif->in.pid = NIL;
-            state->driver.nif->out.pid = NIL;
-        }
-        ASSERT(state->events==0);
-        if (!wake_poller) {
-            /*
-             * Safe to close fd now as it is not in pollset
-             * or there was no need to eject fd (kernel poll)
-             */
-            if (state->type == ERTS_EV_TYPE_NIF) {
-                ASSERT(state->driver.stop.resource == resource);
-                call_stop = CALL_STOP_AND_RELEASE;
-                state->driver.stop.resource = NULL;
+            if (mode & ERL_NIF_SELECT_READ
+                && is_not_nil(state->driver.nif->in.pid)) {
+                state->driver.nif->in.pid = NIL;
+                ret |= ERL_NIF_SELECT_READ_CANCELLED;
             }
-            else {
-                ASSERT(!state->driver.stop.resource);
-                call_stop = CALL_STOP;
+            if (mode & ERL_NIF_SELECT_WRITE
+                && is_not_nil(state->driver.nif->out.pid)) {
+                state->driver.nif->out.pid = NIL;
+                ret |= ERL_NIF_SELECT_WRITE_CANCELLED;
             }
-            state->type = ERTS_EV_TYPE_NONE;
-            ret = ERL_NIF_SELECT_STOP_CALLED;
         }
-        else {
-            /* Not safe to close fd, postpone stop_select callback. */
-            if (state->type == ERTS_EV_TYPE_NONE) {
-                ASSERT(!state->driver.stop.resource);
-                state->driver.stop.resource = resource;
-                enif_keep_resource(resource);
+        if (mode & ERL_NIF_SELECT_STOP) {
+            ASSERT(state->events==0);
+            if (!wake_poller) {
+                /*
+                 * Safe to close fd now as it is not in pollset
+                 * or there was no need to eject fd (kernel poll)
+                 */
+                if (state->type == ERTS_EV_TYPE_NIF) {
+                    ASSERT(state->driver.stop.resource == resource);
+                    call_stop = CALL_STOP_AND_RELEASE;
+                    state->driver.stop.resource = NULL;
+                }
+                else {
+                    ASSERT(!state->driver.stop.resource);
+                    call_stop = CALL_STOP;
+                }
+                state->type = ERTS_EV_TYPE_NONE;
+                ret |= ERL_NIF_SELECT_STOP_CALLED;
+            }
+            else {
+                /* Not safe to close fd, postpone stop_select callback. */
+                if (state->type == ERTS_EV_TYPE_NONE) {
+                    ASSERT(!state->driver.stop.resource);
+                    state->driver.stop.resource = resource;
+                    enif_keep_resource(resource);
+                }
+                state->type = ERTS_EV_TYPE_STOP_NIF;
+                ret |= ERL_NIF_SELECT_STOP_SCHEDULED;
             }
-            state->type = ERTS_EV_TYPE_STOP_NIF;
-            ret = ERL_NIF_SELECT_STOP_SCHEDULED;
         }
+        else
+            ASSERT(mode & ERL_NIF_SELECT_CANCEL);
     }
 
 done:
@@ -1328,7 +1341,8 @@ print_nif_select_op(erts_dsprintf_buf_t *dsbufp,
 		  (int) fd,
 		  mode & ERL_NIF_SELECT_READ ? " READ" : "",
 		  mode & ERL_NIF_SELECT_WRITE ? " WRITE" : "",
-		  mode & ERL_NIF_SELECT_STOP ? " STOP" : "",
+		  (mode & ERL_NIF_SELECT_STOP ? " STOP"
+                   : (mode & ERL_NIF_SELECT_CANCEL ? " CANCEL" : "")),
 		  resource->type->module,
                   resource->type->name,
                   ref);
@@ -2448,10 +2462,16 @@ drvmode2str(int mode) {
 
 static ERTS_INLINE char *
 nifmode2str(enum ErlNifSelectFlags mode) {
+    if (mode & ERL_NIF_SELECT_STOP)
+        return "STOP";
     switch (mode) {
     case ERL_NIF_SELECT_READ: return "READ";
     case ERL_NIF_SELECT_WRITE: return "WRITE";
-    case ERL_NIF_SELECT_STOP: return "STOP";
+    case ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE: return "READ|WRITE";
+    case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ: return "CANCEL|READ";
+    case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_WRITE: return "CANCEL|WRITE";
+    case ERL_NIF_SELECT_CANCEL|ERL_NIF_SELECT_READ|ERL_NIF_SELECT_WRITE:
+        return "CANCEL|READ|WRITE";
     default: return "UNKNOWN";
     }
 }
diff --git a/erts/emulator/sys/common/erl_mmap.h b/erts/emulator/sys/common/erl_mmap.h
index 539daea419..e1ff0fe80a 100644
--- a/erts/emulator/sys/common/erl_mmap.h
+++ b/erts/emulator/sys/common/erl_mmap.h
@@ -176,4 +176,61 @@ void hard_dbg_remove_mseg(void* seg, UWord sz);
 
 #endif /* HAVE_ERTS_MMAP */
 
+/* Marks the given memory region as unused without freeing it, letting the OS
+ * reclaim its physical memory with the promise that we'll get it back (without
+ * its contents) the next time it's accessed. */
+ERTS_GLB_INLINE void erts_mem_discard(void *p, UWord size);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+#ifdef VALGRIND
+    #include <valgrind/memcheck.h>
+
+    ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+        VALGRIND_MAKE_MEM_UNDEFINED(ptr, size);
+    }
+#elif defined(DEBUG)
+    /* Try to provoke crashes by filling the discard region with garbage. It's
+     * extremely hard to find bugs where we've discarded too much, as the
+     * region often retains its old contents if it's accessed before the OS
+     * reclaims it. */
+    ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+        static const char pattern[] = "DISCARDED";
+        char *data;
+        int i;
+
+        for(i = 0, data = ptr; i < size; i++) {
+            data[i] = pattern[i % sizeof(pattern)];
+        }
+    }
+#elif defined(HAVE_SYS_MMAN_H)
+    #include <sys/mman.h>
+
+    ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+    #ifdef MADV_FREE
+        /* This is preferred as it doesn't necessarily free the pages right
+         * away, which is a bit faster than MADV_DONTNEED. */
+        madvise(ptr, size, MADV_FREE);
+    #else
+        madvise(ptr, size, MADV_DONTNEED);
+    #endif
+    }
+#elif defined(_WIN32)
+    #include <winbase.h>
+
+    /* MEM_RESET is defined on all supported versions of Windows, and has the
+     * same semantics as MADV_FREE. */
+    ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+        VirtualAlloc(ptr, size, MEM_RESET, PAGE_READWRITE);
+    }
+#else
+    /* Dummy implementation. */
+    ERTS_GLB_INLINE void erts_mem_discard(void *ptr, UWord size) {
+        (void)ptr;
+        (void)size;
+    }
+#endif
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
+
 #endif /* ERL_MMAP_H__ */
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index a1c630d68a..b95aadc9b2 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -186,7 +186,9 @@ void sys_primitive_init(HMODULE beam)
 UWord
 erts_sys_get_page_size(void)
 {
-    return (UWord) 4*1024; /* Guess 4 KB */
+    SYSTEM_INFO info;
+    GetSystemInfo(&info);
+    return (UWord)info.dwPageSize;
 }
 
 Uint
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index 6a064ec8d4..b66cc1b2a3 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -130,6 +130,7 @@ MODULES= \
 	ignore_cores \
 	dgawd_handler \
 	random_iolist \
+	erts_test_utils \
 	crypto_reference
 
 NO_OPT= bs_bincomp \
diff --git a/erts/emulator/test/alloc_SUITE.erl b/erts/emulator/test/alloc_SUITE.erl
index 343afe85e6..4e0243c1cd 100644
--- a/erts/emulator/test/alloc_SUITE.erl
+++ b/erts/emulator/test/alloc_SUITE.erl
@@ -71,7 +71,8 @@ migration(Cfg) ->
     %% Disable driver_alloc to avoid recursive alloc_util calls
     %% through enif_mutex_create() in my_creating_mbc().
     drv_case(Cfg, concurrent, "+MZe true +MRe false"),
-    drv_case(Cfg, concurrent, "+MZe true +MRe false +MZas ageffcbf").
+    drv_case(Cfg, concurrent, "+MZe true +MRe false +MZas ageffcbf"),
+    drv_case(Cfg, concurrent, "+MZe true +MRe false +MZas chaosff").
 
 erts_mmap(Config) when is_list(Config) ->
     case {os:type(), mmsc_flags()} of
diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl
index 9e7bcd5255..3eedf2f6a6 100644
--- a/erts/emulator/test/bif_SUITE.erl
+++ b/erts/emulator/test/bif_SUITE.erl
@@ -37,7 +37,8 @@
          group_leader_prio/1, group_leader_prio_dirty/1,
          is_process_alive/1,
          process_info_blast/1,
-         os_env_case_sensitivity/1]).
+         os_env_case_sensitivity/1,
+         test_length/1]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]},
@@ -52,7 +53,8 @@ all() ->
      erl_crash_dump_bytes, min_max, erlang_halt, is_builtin,
      error_stacktrace, error_stacktrace_during_call_trace,
      group_leader_prio, group_leader_prio_dirty,
-     is_process_alive, process_info_blast, os_env_case_sensitivity].
+     is_process_alive, process_info_blast, os_env_case_sensitivity,
+     test_length].
 
 %% Uses erlang:display to test that erts_printf does not do deep recursion
 display(Config) when is_list(Config) ->
@@ -1181,7 +1183,53 @@ consume_msgs() ->
     after 0 ->
               ok
     end.
-                              
+
+%% Test that length/1 returns the correct result after trapping, and
+%% also that the argument is correct in the stacktrace for a badarg
+%% exception.
+
+test_length(_Config) ->
+    {Start,Inc} = case test_server:timetrap_scale_factor() of
+                      1 -> {16*4000,3977};
+                      _ -> {100,1}
+            end,
+    Good = lists:reverse(lists:seq(1, Start)),
+    Bad = Good ++ [bad|cons],
+    test_length(Start, 10*Start, Inc, Good, Bad),
+
+    %% Test that calling length/1 from a match spec works.
+    MsList = lists:seq(1, 2*Start),
+    MsInput = [{tag,Good},{tag,MsList}],
+    Ms0 = [{{tag,'$1'},[{'>',{length,'$1'},Start}],['$1']}],
+    Ms = ets:match_spec_compile(Ms0),
+    [MsList] = ets:match_spec_run(MsInput, Ms),
+    ok.
+
+test_length(I, N, Inc, Good, Bad) when I < N ->
+    Length = id(length),
+    I = length(Good),
+    I = erlang:Length(Good),
+
+    %% Test length/1 in guards.
+    if
+        length(Good) =:= I ->
+            ok
+    end,
+    if
+        length(Bad) =:= I ->
+            error(should_fail);
+        true ->
+            ok
+    end,
+
+    {'EXIT',{badarg,[{erlang,length,[[I|_]],_}|_]}} = (catch length(Bad)),
+    {'EXIT',{badarg,[{erlang,length,[[I|_]],_}|_]}} = (catch erlang:Length(Bad)),
+    IncSeq = lists:seq(I + 1, I + Inc),
+    test_length(I+Inc, N, Inc,
+                lists:reverse(IncSeq, Good),
+                lists:reverse(IncSeq, Bad));
+test_length(_, _, _, _, _) -> ok.
+
 %% helpers
     
 id(I) -> I.
diff --git a/erts/emulator/test/call_trace_SUITE.erl b/erts/emulator/test/call_trace_SUITE.erl
index d19f7f81ad..742592f88e 100644
--- a/erts/emulator/test/call_trace_SUITE.erl
+++ b/erts/emulator/test/call_trace_SUITE.erl
@@ -1395,7 +1395,7 @@ seq(M, N, R) when M =< N ->
     seq(M, N-1, [N|R]);
 seq(_, _, R) -> R.
 
-%% lists:reverse can not be called since it is traced
+%% lists:reverse cannot be called since it is traced
 reverse(L) ->
     reverse(L, []).
 %%
diff --git a/erts/emulator/test/code_SUITE.erl b/erts/emulator/test/code_SUITE.erl
index 0444ba4f89..493c6ebe99 100644
--- a/erts/emulator/test/code_SUITE.erl
+++ b/erts/emulator/test/code_SUITE.erl
@@ -332,6 +332,7 @@ constant_pools(Config) when is_list(Config) ->
     A = literals:a(),
     B = literals:b(),
     C = literals:huge_bignum(),
+    D = literals:funs(),
     process_flag(trap_exit, true),
     Self = self(),
 
@@ -345,7 +346,7 @@ constant_pools(Config) when is_list(Config) ->
     true = erlang:purge_module(literals),
     NoOldHeap ! done,
     receive
-        {'EXIT',NoOldHeap,{A,B,C}} ->
+        {'EXIT',NoOldHeap,{A,B,C,D}} ->
             ok;
         Other ->
             ct:fail({unexpected,Other})
@@ -362,7 +363,7 @@ constant_pools(Config) when is_list(Config) ->
     erlang:purge_module(literals),
     OldHeap ! done,
     receive
-	{'EXIT',OldHeap,{A,B,C,[1,2,3|_]=Seq}} when length(Seq) =:= 16 ->
+	{'EXIT',OldHeap,{A,B,C,D,[1,2,3|_]=Seq}} when length(Seq) =:= 16 ->
 	    ok
     end,
 
@@ -390,7 +391,7 @@ constant_pools(Config) when is_list(Config) ->
 	{'DOWN', Mon, process, Hib, Reason} ->
 	    {undef, [{no_module,
 		      no_function,
-		      [{A,B,C,[1,2,3|_]=Seq}], _}]} = Reason,
+		      [{A,B,C,D,[1,2,3|_]=Seq}], _}]} = Reason,
 	    16 = length(Seq)
     end,
     HeapSz = TotHeapSz, %% Ensure restored to hibernated state...
@@ -400,7 +401,9 @@ constant_pools(Config) when is_list(Config) ->
 no_old_heap(Parent) ->
     A = literals:a(),
     B = literals:b(),
-    Res = {A,B,literals:huge_bignum()},
+    C = literals:huge_bignum(),
+    D = literals:funs(),
+    Res = {A,B,C,D},
     Parent ! go,
     receive
         done ->
@@ -410,7 +413,9 @@ no_old_heap(Parent) ->
 old_heap(Parent) ->
     A = literals:a(),
     B = literals:b(),
-    Res = {A,B,literals:huge_bignum(),lists:seq(1, 16)},
+    C = literals:huge_bignum(),
+    D = literals:funs(),
+    Res = {A,B,C,D,lists:seq(1, 16)},
     create_old_heap(),
     Parent ! go,
     receive
@@ -421,7 +426,9 @@ old_heap(Parent) ->
 hibernated(Parent) ->
     A = literals:a(),
     B = literals:b(),
-    Res = {A,B,literals:huge_bignum(),lists:seq(1, 16)},
+    C = literals:huge_bignum(),
+    D = literals:funs(),
+    Res = {A,B,C,D,lists:seq(1, 16)},
     Parent ! go,
     erlang:hibernate(no_module, no_function, [Res]).
 
@@ -755,7 +762,8 @@ t_copy_literals_frags(Config) when is_list(Config) ->
                                           0, 1, 2, 3, 4, 5, 6, 7,
                                           8, 9,10,11,12,13,14,15,
                                           0, 1, 2, 3, 4, 5, 6, 7,
-                                          8, 9,10,11,12,13,14,15>>}]),
+                                          8, 9,10,11,12,13,14,15>>},
+                        {f, fun ?MODULE:all/0}]),
 
     {module, ?mod} = erlang:load_module(?mod, Bin),
     N = 6000,
@@ -796,6 +804,7 @@ literal_receiver() ->
             C = ?mod:c(),
             D = ?mod:d(),
             E = ?mod:e(),
+            F = ?mod:f(),
             literal_receiver();
         {Pid, sender_confirm} ->
             io:format("sender confirm ~w~n", [Pid]),
@@ -811,7 +820,8 @@ literal_sender(N, Recv) ->
                           ?mod:b(),
                           ?mod:c(),
                           ?mod:d(),
-                          ?mod:e()]},
+                          ?mod:e(),
+                          ?mod:f()]},
     literal_sender(N - 1, Recv).
 
 literal_switcher() ->
diff --git a/erts/emulator/test/code_SUITE_data/literals.erl b/erts/emulator/test/code_SUITE_data/literals.erl
index 7c3b0ebe73..13c8b412b0 100644
--- a/erts/emulator/test/code_SUITE_data/literals.erl
+++ b/erts/emulator/test/code_SUITE_data/literals.erl
@@ -19,7 +19,8 @@
 %%
 
 -module(literals).
--export([a/0,b/0,huge_bignum/0,binary/0,unused_binaries/0,bits/0]).
+-export([a/0,b/0,huge_bignum/0,funs/0,
+         binary/0,unused_binaries/0,bits/0]).
 -export([msg1/0,msg2/0,msg3/0,msg4/0,msg5/0]).
 
 a() ->
@@ -108,3 +109,8 @@ msg2() -> {"hello","world"}.
 msg3() -> <<"halloj">>.
 msg4() -> #{ 1=> "hello", b => "world"}.
 msg5() -> {1,2,3,4,5,6}.
+
+funs() ->
+    %% Literal funs (in a non-literal list).
+    [fun ?MODULE:a/0,
+     fun() -> ok end].                          %No environment.
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index bd62708aa7..94501dad84 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -2660,24 +2660,7 @@ wait_deallocations() ->
 
 driver_alloc_size() ->
     wait_deallocations(),
-    case erlang:system_info({allocator_sizes, driver_alloc}) of
-        false ->
-            undefined;
-        MemInfo ->
-            CS = lists:foldl(
-                   fun ({instance, _, L}, Acc) ->
-                           {value,{_,MBCS}} = lists:keysearch(mbcs, 1, L),
-                           {value,{_,SBCS}} = lists:keysearch(sbcs, 1, L),
-                           [MBCS,SBCS | Acc]
-                   end,
-                   [],
-                   MemInfo),
-            lists:foldl(
-              fun(L, Sz0) ->
-                      {value,{_,Sz,_,_}} = lists:keysearch(blocks_size, 1, L),
-                      Sz0+Sz
-              end, 0, CS)
-    end.
+    erts_debug:alloc_blocks_size(driver_alloc).
 
 rpc(Config, Fun) ->
     case proplists:get_value(node, Config) of
diff --git a/erts/emulator/test/erts_debug_SUITE.erl b/erts/emulator/test/erts_debug_SUITE.erl
index 6aa7a445b5..f39dbedd8f 100644
--- a/erts/emulator/test/erts_debug_SUITE.erl
+++ b/erts/emulator/test/erts_debug_SUITE.erl
@@ -22,8 +22,10 @@
 -include_lib("common_test/include/ct.hrl").
 
 -export([all/0, suite/0,
-	 test_size/1,flat_size_big/1,df/1,term_type/1,
-	 instructions/1, stack_check/1]).
+         test_size/1,flat_size_big/1,df/1,term_type/1,
+         instructions/1, stack_check/1, alloc_blocks_size/1]).
+
+-export([do_alloc_blocks_size/0]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]},
@@ -31,7 +33,7 @@ suite() ->
 
 all() -> 
     [test_size, flat_size_big, df, instructions, term_type,
-     stack_check].
+     stack_check, alloc_blocks_size].
 
 test_size(Config) when is_list(Config) ->
     ConsCell1 = id([a|b]),
@@ -210,5 +212,28 @@ instructions(Config) when is_list(Config) ->
     _ = [list_to_atom(I) || I <- Is],
     ok.
 
+alloc_blocks_size(Config) when is_list(Config) ->
+    F = fun(Args) ->
+                Node = start_slave(Args),
+                ok = rpc:call(Node, ?MODULE, do_alloc_blocks_size, []),
+                true = test_server:stop_node(Node)
+        end,
+    F("+Meamax"),
+    F("+Meamin"),
+    F(""),
+    ok.
+
+do_alloc_blocks_size() ->
+    _ = erts_debug:alloc_blocks_size(binary_alloc),
+    ok.
+
+start_slave(Args) ->
+    Name = ?MODULE_STRING ++ "_slave",
+    Pa = filename:dirname(code:which(?MODULE)),
+    {ok, Node} = test_server:start_node(list_to_atom(Name),
+                                        slave,
+                                        [{args, "-pa " ++ Pa ++ " " ++ Args}]),
+    Node.
+
 id(I) ->
     I.
diff --git a/erts/emulator/test/erts_test_utils.erl b/erts/emulator/test/erts_test_utils.erl
new file mode 100644
index 0000000000..ac2f2435be
--- /dev/null
+++ b/erts/emulator/test/erts_test_utils.erl
@@ -0,0 +1,250 @@
+%%
+%% %CopyrightBegin%
+%% 
+%% Copyright Ericsson AB 2002-2018. All Rights Reserved.
+%% 
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%% 
+%% %CopyrightEnd%
+%%
+
+-module(erts_test_utils).
+
+%%
+%% THIS MODULE IS ALSO USED BY *OTHER* APPLICATIONS TEST CODE
+%%
+
+-export([mk_ext_pid/3,
+         mk_ext_port/2,
+         mk_ext_ref/2,
+         check_node_dist/0, check_node_dist/1, check_node_dist/3]).
+
+
+
+-define(VERSION_MAGIC,       131).
+
+-define(ATOM_EXT,            100).
+-define(REFERENCE_EXT,       101).
+-define(PORT_EXT,            102).
+-define(PID_EXT,             103).
+-define(NEW_REFERENCE_EXT,   114).
+-define(NEW_PID_EXT,         $X).
+-define(NEW_PORT_EXT,        $Y).
+-define(NEWER_REFERENCE_EXT, $Z).
+
+uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 ->
+    [(Uint bsr 24) band 16#ff,
+     (Uint bsr 16) band 16#ff,
+     (Uint bsr 8) band 16#ff,
+     Uint band 16#ff];
+uint32_be(Uint) ->
+    exit({badarg, uint32_be, [Uint]}).
+
+
+uint16_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 16 ->
+    [(Uint bsr 8) band 16#ff,
+     Uint band 16#ff];
+uint16_be(Uint) ->
+    exit({badarg, uint16_be, [Uint]}).
+
+uint8(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 8 ->
+    Uint band 16#ff;
+uint8(Uint) ->
+    exit({badarg, uint8, [Uint]}).
+
+pid_tag(bad_creation) -> ?PID_EXT;
+pid_tag(Creation) when Creation =< 3 -> ?PID_EXT;
+pid_tag(_Creation) -> ?NEW_PID_EXT.
+
+enc_creation(bad_creation) -> uint8(4);
+enc_creation(Creation) when Creation =< 3 -> uint8(Creation);
+enc_creation(Creation) -> uint32_be(Creation).
+
+mk_ext_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) ->
+    mk_ext_pid({atom_to_list(NodeName), Creation}, Number, Serial);
+mk_ext_pid({NodeName, Creation}, Number, Serial) ->
+    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+					      pid_tag(Creation),
+					      ?ATOM_EXT,
+					      uint16_be(length(NodeName)),
+					      NodeName,
+					      uint32_be(Number),
+					      uint32_be(Serial),
+					      enc_creation(Creation)])) of
+	Pid when is_pid(Pid) ->
+	    Pid;
+	{'EXIT', {badarg, _}} ->
+	    exit({badarg, mk_pid, [{NodeName, Creation}, Number, Serial]});
+	Other ->
+	    exit({unexpected_binary_to_term_result, Other})
+    end.
+
+port_tag(bad_creation) -> ?PORT_EXT;
+port_tag(Creation) when Creation =< 3 -> ?PORT_EXT;
+port_tag(_Creation) -> ?NEW_PORT_EXT.
+
+mk_ext_port({NodeName, Creation}, Number) when is_atom(NodeName) ->
+    mk_ext_port({atom_to_list(NodeName), Creation}, Number);
+mk_ext_port({NodeName, Creation}, Number) ->
+    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+					      port_tag(Creation),
+					      ?ATOM_EXT,
+					      uint16_be(length(NodeName)),
+					      NodeName,
+					      uint32_be(Number),
+					      enc_creation(Creation)])) of
+	Port when is_port(Port) ->
+	    Port;
+	{'EXIT', {badarg, _}} ->
+	    exit({badarg, mk_port, [{NodeName, Creation}, Number]});
+	Other ->
+	    exit({unexpected_binary_to_term_result, Other})
+    end.
+
+ref_tag(bad_creation) -> ?NEW_REFERENCE_EXT;
+ref_tag(Creation) when Creation =< 3 -> ?NEW_REFERENCE_EXT;
+ref_tag(_Creation) -> ?NEWER_REFERENCE_EXT.
+
+mk_ext_ref({NodeName, Creation}, Numbers) when is_atom(NodeName),
+					   is_list(Numbers) ->
+    mk_ext_ref({atom_to_list(NodeName), Creation}, Numbers);
+mk_ext_ref({NodeName, Creation}, [Number]) when is_list(NodeName),
+                                                Creation =< 3,
+                                                is_integer(Number) ->
+    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+                                              ?REFERENCE_EXT,
+                                              ?ATOM_EXT,
+                                              uint16_be(length(NodeName)),
+                                              NodeName,
+                                              uint32_be(Number),
+                                              uint8(Creation)])) of
+        Ref when is_reference(Ref) ->
+            Ref;
+        {'EXIT', {badarg, _}} ->
+            exit({badarg, mk_ref, [{NodeName, Creation}, [Number]]});
+        Other ->
+            exit({unexpected_binary_to_term_result, Other})
+    end;
+mk_ext_ref({NodeName, Creation}, Numbers) when is_list(NodeName),
+                                               is_list(Numbers) ->
+    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
+					      ref_tag(Creation),
+					      uint16_be(length(Numbers)),
+					      ?ATOM_EXT,
+					      uint16_be(length(NodeName)),
+					      NodeName,
+					      enc_creation(Creation),
+					      lists:map(fun (N) ->
+								uint32_be(N)
+							end,
+							Numbers)])) of
+	Ref when is_reference(Ref) ->
+	    Ref;
+	{'EXIT', {badarg, _}} ->
+	    exit({badarg, mk_ref, [{NodeName, Creation}, Numbers]});
+	Other ->
+	    exit({unexpected_binary_to_term_result, Other})
+    end.
+
+
+
+%%
+%% Check reference counters for node- and dist entries.
+%%
+check_node_dist() ->
+    check_node_dist(fun(ErrMsg) ->
+                            io:format("check_node_dist ERROR:\n~p\n", [ErrMsg]),
+                            error
+                    end).
+
+check_node_dist(Fail) ->
+    {{node_references, NodeRefs},
+     {dist_references, DistRefs}} =
+        erts_debug:get_internal_state(node_and_dist_references),
+    check_node_dist(Fail, NodeRefs, DistRefs).
+
+
+
+check_node_dist(Fail, NodeRefs, DistRefs) ->
+    check_nd_refc({node(),erlang:system_info(creation)},                  
+                  NodeRefs, DistRefs, Fail).
+
+
+check_nd_refc({ThisNodeName, ThisCreation}, NodeRefs, DistRefs, Fail) ->
+    case catch begin
+                   check_refc(ThisNodeName,ThisCreation,"node table",NodeRefs),
+                   check_refc(ThisNodeName,ThisCreation,"dist table",DistRefs),
+                   ok
+               end of
+        ok ->
+            ok;
+        {'EXIT', Reason} ->
+            {Y,Mo,D} = date(),
+            {H,Mi,S} = time(),
+            ErrMsg = io_lib:format("~n"
+                                   "*** Reference count check of node ~w "
+                                   "failed (~p) at ~w~w~w ~w:~w:~w~n"
+                                   "*** Node table references:~n ~p~n"
+                                   "*** Dist table references:~n ~p~n",
+                                   [node(), Reason, Y, Mo, D, H, Mi, S,
+                                    NodeRefs, DistRefs]),
+            Fail(lists:flatten(ErrMsg))
+    end.
+
+
+check_refc(ThisNodeName,ThisCreation,Table,EntryList) when is_list(EntryList) ->
+    lists:foreach(
+      fun ({Entry, Refc, ReferrerList}) ->
+              {DelayedDeleteTimer,
+               FoundRefs} =
+              lists:foldl(
+                fun ({Referrer, ReferencesList}, {DDT, A1}) ->
+                        {case Referrer of
+                             {system,delayed_delete_timer} ->
+                                 true;
+                             {system,thread_progress_delete_timer} ->
+                                 true;
+                             _ ->
+                                 DDT
+                         end,
+                         A1 + lists:foldl(fun ({_T,Rs},A2) ->
+                                                  A2+Rs
+                                          end,
+                                          0,
+                                          ReferencesList)}
+                end,
+                {false, 0},
+                ReferrerList),
+
+              %% Reference count equals found references?
+              case {Refc, FoundRefs, DelayedDeleteTimer} of
+                  {X, X, _} ->
+                      ok;
+                  {0, 1, true} ->
+                      ok;
+                  _ ->
+                      exit({invalid_reference_count, Table, Entry})
+              end,
+
+              %% All entries in table referred to?
+              case {Entry, Refc} of
+                  {ThisNodeName, 0} -> ok;
+                  {{ThisNodeName, ThisCreation}, 0} -> ok;
+                  {_, 0} when DelayedDeleteTimer == false ->
+                      exit({not_referred_entry_in_table, Table, Entry});
+                  {_, _} -> ok 
+              end
+
+      end,
+      EntryList),
+    ok.
diff --git a/erts/emulator/test/fun_SUITE.erl b/erts/emulator/test/fun_SUITE.erl
index 73fe9b0d8f..f8a879182e 100644
--- a/erts/emulator/test/fun_SUITE.erl
+++ b/erts/emulator/test/fun_SUITE.erl
@@ -576,7 +576,7 @@ refc_dist(Config) when is_list(Config) ->
     process_flag(trap_exit, true),
     Pid = spawn_link(Node, fun() -> receive
                                         Fun when is_function(Fun) ->
-                                            2 = fun_refc(Fun),
+                                            3 = fun_refc(Fun),
                                             exit({normal,Fun}) end
                            end),
     F = fun() -> 42 end,
@@ -598,7 +598,7 @@ refc_dist_send(Node, F) ->
     Pid = spawn_link(Node, fun() -> receive
                                         {To,Fun} when is_function(Fun) ->
                                             wait_until(fun () ->
-                                                               2 =:= fun_refc(Fun)
+                                                               3 =:= fun_refc(Fun)
                                                        end),
                                             To ! Fun
                                     end
@@ -626,7 +626,7 @@ refc_dist_reg_send(Node, F) ->
                                    Me ! Ref,
                                    receive
                                        {Me,Fun} when is_function(Fun) ->
-                                           2 = fun_refc(Fun),
+                                           3 = fun_refc(Fun),
                                            Me ! Fun
                                    end
                            end),
@@ -806,11 +806,13 @@ verify_not_undef(Fun, Tag) ->
 	    ct:fail("tag ~w not defined in fun_info", [Tag]);
 	{Tag,_} -> ok
     end.
-	    
+
 id(X) ->
     X.
 
 spawn_call(Node, AFun) ->
+    Parent = self(),
+    Init = erlang:whereis(init),
     Pid = spawn_link(Node,
 		     fun() ->
 			     receive
@@ -821,8 +823,10 @@ spawn_call(Node, AFun) ->
 						_ -> lists:seq(0, Arity-1)
 					    end,
 				     Res = apply(Fun, Args),
-				     {pid,Creator} = erlang:fun_info(Fun, pid),
-				     Creator ! {result,Res}
+                     case erlang:fun_info(Fun, pid) of
+                        {pid,Init} -> Parent ! {result,Res};
+                        {pid,Creator} -> Creator ! {result,Res}
+                     end
 			     end
 		     end),
     Pid ! {AFun,AFun,AFun},
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index a2f3489943..edad62a9fb 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -485,12 +485,14 @@ t_on_load(Config) when is_list(Config) ->
 -define(ERL_NIF_SELECT_READ, (1 bsl 0)).
 -define(ERL_NIF_SELECT_WRITE, (1 bsl 1)).
 -define(ERL_NIF_SELECT_STOP, (1 bsl 2)).
+-define(ERL_NIF_SELECT_CANCEL, (1 bsl 3)).
 
 -define(ERL_NIF_SELECT_STOP_CALLED, (1 bsl 0)).
 -define(ERL_NIF_SELECT_STOP_SCHEDULED, (1 bsl 1)).
 -define(ERL_NIF_SELECT_INVALID_EVENT, (1 bsl 2)).
 -define(ERL_NIF_SELECT_FAILED, (1 bsl 3)).
-
+-define(ERL_NIF_SELECT_READ_CANCELLED, (1 bsl 4)).
+-define(ERL_NIF_SELECT_WRITE_CANCELLED, (1 bsl 5)).
 
 select(Config) when is_list(Config) ->
     ensure_lib_loaded(Config),
@@ -516,7 +518,16 @@ select(Config) when is_list(Config) ->
                      end),
     0 = select_nif(R,?ERL_NIF_SELECT_READ,R,Pid,Ref),
     {Pid, done} = receive_any(1000),
+
+    %% Cancel read
+    0 = select_nif(R,?ERL_NIF_SELECT_READ bor ?ERL_NIF_SELECT_CANCEL,R,null,Ref),
     <<"hej">> = read_nif(R, 3),
+    0 = select_nif(R,?ERL_NIF_SELECT_READ,R,null,Ref),
+    ?ERL_NIF_SELECT_READ_CANCELLED =
+        select_nif(R,?ERL_NIF_SELECT_READ bor ?ERL_NIF_SELECT_CANCEL,R,null,Ref),
+    ok = write_nif(W, <<"hej again">>),
+    [] = flush(0),
+    <<"hej again">> = read_nif(R, 9),
 
     %% Wait for write
     Written = write_full(W, $a),
@@ -525,6 +536,15 @@ select(Config) when is_list(Config) ->
     Written = read_nif(R,byte_size(Written)),
     [{select, W, Ref, ready_output}] = flush(),
 
+    %% Cancel write
+    0 = select_nif(W,?ERL_NIF_SELECT_WRITE bor ?ERL_NIF_SELECT_CANCEL,W,null,Ref),
+    Written2 = write_full(W, $b),
+    0 = select_nif(W,?ERL_NIF_SELECT_WRITE,W,null,Ref),
+    ?ERL_NIF_SELECT_WRITE_CANCELLED =
+        select_nif(W,?ERL_NIF_SELECT_WRITE bor ?ERL_NIF_SELECT_CANCEL,W,null,Ref),
+    Written2 = read_nif(R,byte_size(Written2)),
+    [] = flush(0),
+
     %% Close write and wait for EOF
     eagain = read_nif(R, 1),
     check_stop_ret(select_nif(W,?ERL_NIF_SELECT_STOP,W,null,Ref)),
diff --git a/erts/emulator/test/node_container_SUITE.erl b/erts/emulator/test/node_container_SUITE.erl
index 300b4ed036..b3d8f9584d 100644
--- a/erts/emulator/test/node_container_SUITE.erl
+++ b/erts/emulator/test/node_container_SUITE.erl
@@ -938,15 +938,11 @@ nc_refc_check(Node) when is_atom(Node) ->
     io:format("Starting reference count check of node ~w~n", [Node]),
     spawn_link(Node,
                fun () ->
-                       {{node_references, NodeRefs},
-                        {dist_references, DistRefs}} = ?ND_REFS,
-                       check_nd_refc({node(), erlang:system_info(creation)},
-                                     NodeRefs,
-                                     DistRefs,
-                                     fun (ErrMsg) ->
-                                             Self ! {Ref, ErrMsg, failed},
-                                             exit(normal)
-                                     end),
+                       erts_test_utils:check_node_dist(
+                         fun (ErrMsg) ->
+                                 Self ! {Ref, ErrMsg, failed},
+                                 exit(normal)
+                         end),
                        Self ! {Ref, succeded}
                end),
     receive
@@ -958,98 +954,26 @@ nc_refc_check(Node) when is_atom(Node) ->
             ok
     end.
 
-check_nd_refc({ThisNodeName, ThisCreation}, NodeRefs, DistRefs, Fail) ->
-    case catch begin
-                   check_refc(ThisNodeName,ThisCreation,"node table",NodeRefs),
-                   check_refc(ThisNodeName,ThisCreation,"dist table",DistRefs),
-                   ok
-               end of
-        ok ->
-            ok;
-        {'EXIT', Reason} ->
-            {Y,Mo,D} = date(),
-            {H,Mi,S} = time(),
-            ErrMsg = io_lib:format("~n"
-                                   "*** Reference count check of node ~w "
-                                   "failed (~p) at ~w~w~w ~w:~w:~w~n"
-                                   "*** Node table references:~n ~p~n"
-                                   "*** Dist table references:~n ~p~n",
-                                   [node(), Reason, Y, Mo, D, H, Mi, S,
-                                    NodeRefs, DistRefs]),
-            Fail(lists:flatten(ErrMsg))
-    end.
-
-
-check_refc(ThisNodeName,ThisCreation,Table,EntryList) when is_list(EntryList) ->
-    lists:foreach(
-      fun ({Entry, Refc, ReferrerList}) ->
-              {DelayedDeleteTimer,
-               FoundRefs} =
-              lists:foldl(
-                fun ({Referrer, ReferencesList}, {DDT, A1}) ->
-                        {case Referrer of
-                             {system,delayed_delete_timer} ->
-                                 true;
-                             {system,thread_progress_delete_timer} ->
-                                 true;
-                             _ ->
-                                 DDT
-                         end,
-                         A1 + lists:foldl(fun ({_T,Rs},A2) ->
-                                                  A2+Rs
-                                          end,
-                                          0,
-                                          ReferencesList)}
-                end,
-                {false, 0},
-                ReferrerList),
-
-              %% Reference count equals found references?
-              case {Refc, FoundRefs, DelayedDeleteTimer} of
-                  {X, X, _} ->
-                      ok;
-                  {0, 1, true} ->
-                      ok;
-                  _ ->
-                      exit({invalid_reference_count, Table, Entry})
-              end,
-
-              %% All entries in table referred to?
-              case {Entry, Refc} of
-                  {ThisNodeName, 0} -> ok;
-                  {{ThisNodeName, ThisCreation}, 0} -> ok;
-                  {_, 0} when DelayedDeleteTimer == false ->
-                      exit({not_referred_entry_in_table, Table, Entry});
-                  {_, _} -> ok 
-              end
-
-      end,
-      EntryList),
-    ok.
-
 get_node_references({NodeName, Creation} = Node) when is_atom(NodeName),
                                                       is_integer(Creation) ->
     {{node_references, NodeRefs},
      {dist_references, DistRefs}} = ?ND_REFS,
-    check_nd_refc({node(), erlang:system_info(creation)},
-                  NodeRefs,
-                  DistRefs,
-                  fun (ErrMsg) ->
-                          io:format("~s", [ErrMsg]),
-                          ct:fail(reference_count_check_failed)
-                  end),
+    erts_test_utils:check_node_dist(
+      fun (ErrMsg) ->
+              io:format("~s", [ErrMsg]),
+              ct:fail(reference_count_check_failed)
+      end,
+      NodeRefs, DistRefs),
     find_references(Node, NodeRefs).
 
 get_dist_references(NodeName) when is_atom(NodeName) ->
     {{node_references, NodeRefs},
      {dist_references, DistRefs}} = ?ND_REFS,
-    check_nd_refc({node(), erlang:system_info(creation)},
-                  NodeRefs,
-                  DistRefs,
-                  fun (ErrMsg) ->
-                          io:format("~s", [ErrMsg]),
-                          ct:fail(reference_count_check_failed)
-                  end),
+    erts_test_utils:check_node_dist(fun (ErrMsg) ->
+                                            io:format("~s", [ErrMsg]),
+                                            ct:fail(reference_count_check_failed)
+                                    end,
+                                    NodeRefs, DistRefs),
     find_references(NodeName, DistRefs).
 
 find_references(N, NRefList) ->
@@ -1138,133 +1062,15 @@ get_nodename() ->
                  ++ "@"
                  ++ hostname()).
 
-
-
--define(VERSION_MAGIC,       131).
-
--define(ATOM_EXT,            100).
--define(REFERENCE_EXT,       101).
--define(PORT_EXT,            102).
--define(PID_EXT,             103).
--define(NEW_REFERENCE_EXT,   114).
--define(NEW_PID_EXT,         $X).
--define(NEW_PORT_EXT,        $Y).
--define(NEWER_REFERENCE_EXT, $Z).
-
-uint32_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 32 ->
-    [(Uint bsr 24) band 16#ff,
-     (Uint bsr 16) band 16#ff,
-     (Uint bsr 8) band 16#ff,
-     Uint band 16#ff];
-uint32_be(Uint) ->
-    exit({badarg, uint32_be, [Uint]}).
-
-
-uint16_be(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 16 ->
-    [(Uint bsr 8) band 16#ff,
-     Uint band 16#ff];
-uint16_be(Uint) ->
-    exit({badarg, uint16_be, [Uint]}).
-
-uint8(Uint) when is_integer(Uint), 0 =< Uint, Uint < 1 bsl 8 ->
-    Uint band 16#ff;
-uint8(Uint) ->
-    exit({badarg, uint8, [Uint]}).
-
-
-pid_tag(bad_creation) -> ?PID_EXT;
-pid_tag(Creation) when Creation =< 3 -> ?PID_EXT;
-pid_tag(_Creation) -> ?NEW_PID_EXT.
-
-enc_creation(bad_creation) -> uint8(4);
-enc_creation(Creation) when Creation =< 3 -> uint8(Creation);
-enc_creation(Creation) -> uint32_be(Creation).
-
-mk_pid({NodeName, Creation}, Number, Serial) when is_atom(NodeName) ->
-    mk_pid({atom_to_list(NodeName), Creation}, Number, Serial);
 mk_pid({NodeName, Creation}, Number, Serial) ->
-    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
-					      pid_tag(Creation),
-					      ?ATOM_EXT,
-					      uint16_be(length(NodeName)),
-					      NodeName,
-					      uint32_be(Number),
-					      uint32_be(Serial),
-					      enc_creation(Creation)])) of
-	Pid when is_pid(Pid) ->
-	    Pid;
-	{'EXIT', {badarg, _}} ->
-	    exit({badarg, mk_pid, [{NodeName, Creation}, Number, Serial]});
-	Other ->
-	    exit({unexpected_binary_to_term_result, Other})
-    end.
-
-port_tag(bad_creation) -> ?PORT_EXT;
-port_tag(Creation) when Creation =< 3 -> ?PORT_EXT;
-port_tag(_Creation) -> ?NEW_PORT_EXT.
+    erts_test_utils:mk_ext_pid({NodeName, Creation}, Number, Serial).
 
-mk_port({NodeName, Creation}, Number) when is_atom(NodeName) ->
-    mk_port({atom_to_list(NodeName), Creation}, Number);
 mk_port({NodeName, Creation}, Number) ->
-    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
-					      port_tag(Creation),
-					      ?ATOM_EXT,
-					      uint16_be(length(NodeName)),
-					      NodeName,
-					      uint32_be(Number),
-					      enc_creation(Creation)])) of
-	Port when is_port(Port) ->
-	    Port;
-	{'EXIT', {badarg, _}} ->
-	    exit({badarg, mk_port, [{NodeName, Creation}, Number]});
-	Other ->
-	    exit({unexpected_binary_to_term_result, Other})
-    end.
+    erts_test_utils:mk_ext_port({NodeName, Creation}, Number).
+
+mk_ref({NodeName, Creation}, Numbers) ->
+    erts_test_utils:mk_ext_ref({NodeName, Creation}, Numbers).
 
-ref_tag(bad_creation) -> ?NEW_REFERENCE_EXT;
-ref_tag(Creation) when Creation =< 3 -> ?NEW_REFERENCE_EXT;
-ref_tag(_Creation) -> ?NEWER_REFERENCE_EXT.
-
-mk_ref({NodeName, Creation}, Numbers) when is_atom(NodeName),
-					   is_list(Numbers) ->
-    mk_ref({atom_to_list(NodeName), Creation}, Numbers);
-mk_ref({NodeName, Creation}, [Number]) when is_list(NodeName),
-					    Creation =< 3,
-					    is_integer(Number) ->
-    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
-                                              ?REFERENCE_EXT,
-                                              ?ATOM_EXT,
-                                              uint16_be(length(NodeName)),
-                                              NodeName,
-                                              uint32_be(Number),
-                                              uint8(Creation)])) of
-        Ref when is_reference(Ref) ->
-            Ref;
-        {'EXIT', {badarg, _}} ->
-            exit({badarg, mk_ref, [{NodeName, Creation}, [Number]]});
-        Other ->
-            exit({unexpected_binary_to_term_result, Other})
-    end;
-mk_ref({NodeName, Creation}, Numbers) when is_list(NodeName),
-					   is_list(Numbers) ->
-    case catch binary_to_term(list_to_binary([?VERSION_MAGIC,
-					      ref_tag(Creation),
-					      uint16_be(length(Numbers)),
-					      ?ATOM_EXT,
-					      uint16_be(length(NodeName)),
-					      NodeName,
-					      enc_creation(Creation),
-					      lists:map(fun (N) ->
-								uint32_be(N)
-							end,
-							Numbers)])) of
-	Ref when is_reference(Ref) ->
-	    Ref;
-	{'EXIT', {badarg, _}} ->
-	    exit({badarg, mk_ref, [{NodeName, Creation}, Numbers]});
-	Other ->
-	    exit({unexpected_binary_to_term_result, Other})
-    end.
 
 exec_loop() ->
     receive
diff --git a/erts/emulator/test/process_SUITE.erl b/erts/emulator/test/process_SUITE.erl
index 57eb082d64..f4b1d885fe 100644
--- a/erts/emulator/test/process_SUITE.erl
+++ b/erts/emulator/test/process_SUITE.erl
@@ -2233,8 +2233,8 @@ processes_term_proc_list(Config) when is_list(Config) ->
     %% We have to run this test case with +S1 since instrument:allocations()
     %% will report a free()'d block as present until it's actually deallocated
     %% by its employer.
-    Run("+MSe true +MSatags false +S1"),
-    Run("+MSe true +MSatags true +S1"),
+    Run("+MSe true +Muatags false +S1"),
+    Run("+MSe true +Muatags true +S1"),
 
     ok.
 
@@ -2242,10 +2242,12 @@ processes_term_proc_list(Config) when is_list(Config) ->
 	chk_term_proc_list(?LINE, MC, XB)).
 
 chk_term_proc_list(Line, MustChk, ExpectBlks) ->
-    Allocs = instrument:allocations(#{ allocator_types => [sl_alloc] }),
+    Allocs = instrument:allocations(),
     case {MustChk, Allocs} of
 	{false, {error, not_enabled}} ->
 	    not_enabled;
+	{false, {ok, {_Shift, _Unscanned, ByOrigin}}} when ByOrigin =:= #{} ->
+	    not_enabled;
 	{_, {ok, {_Shift, _Unscanned, ByOrigin}}} ->
             ByType = maps:get(system, ByOrigin, #{}),
             Hist = maps:get(ptab_list_deleted_el, ByType, {}),
diff --git a/erts/emulator/test/smoke_test_SUITE.erl b/erts/emulator/test/smoke_test_SUITE.erl
index 26c610e3a8..5b46342127 100644
--- a/erts/emulator/test/smoke_test_SUITE.erl
+++ b/erts/emulator/test/smoke_test_SUITE.erl
@@ -56,7 +56,7 @@ end_per_testcase(_Case, Config) when is_list(Config) ->
 %%%
 
 boot_combo(Config) when is_list(Config) ->
-    ZFlags = os:getenv("ERL_ZFLAGS"),
+    ZFlags = os:getenv("ERL_ZFLAGS", ""),
     NOOP = fun () -> ok end,
     A42 = fun () ->
 		  case erlang:system_info(threads) of
@@ -87,10 +87,7 @@ boot_combo(Config) when is_list(Config) ->
 	%% A lot more combos could be implemented...
 	ok
     after
-	os:putenv("ERL_ZFLAGS", case ZFlags of
-				    false -> "";
-				    _ -> ZFlags
-				end)
+	os:putenv("ERL_ZFLAGS", ZFlags)
     end.
 
 native_atomics(Config) when is_list(Config) ->
diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl
index 21ab6b378a..8ea2d88ec4 100644
--- a/erts/emulator/test/system_info_SUITE.erl
+++ b/erts/emulator/test/system_info_SUITE.erl
@@ -457,11 +457,16 @@ cmp_memory(MWs, Str) ->
     %% Total, processes, processes_used, and system will seldom
     %% give us exactly the same result since the two readings
     %% aren't taken atomically.
+    %%
+    %% Torerance is scaled according to the number of schedulers
+    %% to match spawn_mem_workers.
+
+    Tolerance = 1.05 + 0.01 * erlang:system_info(schedulers_online),
 
-    cmp_memory(total, EM, EDM, 1.05),
-    cmp_memory(processes, EM, EDM, 1.05),
-    cmp_memory(processes_used, EM, EDM, 1.05),
-    cmp_memory(system, EM, EDM, 1.05),
+    cmp_memory(total, EM, EDM, Tolerance),
+    cmp_memory(processes, EM, EDM, Tolerance),
+    cmp_memory(processes_used, EM, EDM, Tolerance),
+    cmp_memory(system, EM, EDM, Tolerance),
 
     ok.
     
diff --git a/erts/emulator/test/timer_bif_SUITE.erl b/erts/emulator/test/timer_bif_SUITE.erl
index fc11a04a31..15fe13c8c0 100644
--- a/erts/emulator/test/timer_bif_SUITE.erl
+++ b/erts/emulator/test/timer_bif_SUITE.erl
@@ -361,7 +361,7 @@ evil_timers(Config) when is_list(Config) ->
     %%
     %% 1. A timer started with erlang:start_timer(Time, Receiver, Msg),
     %%    where Msg is a composite term, expires, and the receivers main
-    %%    lock *can not* be acquired immediately (typically when the
+    %%    lock *cannot* be acquired immediately (typically when the
     %%    receiver *is* running).
     %%
     %%    The wrap tuple ({timeout, TRef, Msg}) will in this case
@@ -372,7 +372,7 @@ evil_timers(Config) when is_list(Config) ->
     RecvTimeOutMsgs0 = evil_recv_timeouts(200),
     %% 2. A timer started with erlang:start_timer(Time, Receiver, Msg),
     %%    where Msg is an immediate term, expires, and the receivers main
-    %%    lock *can not* be acquired immediately (typically when the
+    %%    lock *cannot* be acquired immediately (typically when the
     %%    receiver *is* running).
     %%
     %%    The wrap tuple will in this case be allocated in a new
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index da994fae3e..f73e2362bf 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -1840,12 +1840,57 @@ sub do_pack_one {
     }
 
     #
-    # Return if there is nothing to pack.
-    #
-    if ($packable_args == 0) {
-        return (-1);
-    } elsif ($packable_args == 1 and $options == 0) {
-        return (-1);
+    # Check whether any packing can be done.
+    #
+    my $nothing_to_pack = $packable_args == 0 ||
+        $packable_args == 1 && $options == 0;
+    if ($nothing_to_pack) {
+        # The packing engine in the loader processes the operands from
+        # right to left. Rightmost operands that are not packed must
+        # be stacked and then unstacked.
+        #
+        # Because instructions may be broken up into micro
+        # instructions, we might not see all operands at once. So
+        # there could be a micro instructions that packs the operands
+        # to the left of the current micro instruction. If that is the
+        # case, it is essential that we generate stacking and
+        # unstacking instructions even when no packing is
+        # possible. (build_pack_spec() will remove any unecessary
+        # stacking and unstacking operations.)
+        #
+        # Here is an example. Say that we have this instruction:
+        #
+        #     i_plus x x j d
+        #
+        # that comprises two micro instructions:
+        #
+        #     i_plus.fetch x x
+        #     i_plus.execute j d
+        #
+        # This function (do_pack_one()) will be called twice, once to pack
+        # 'x' and 'x', and once to pack 'j' and 'd'.
+        #
+        # On a 32-bit machine, the 'j' and 'd' operands can't be
+        # packed because 'j' requires a full word. The two 'x'
+        # operands in the i_plus.fetch micro instruction will be
+        # packed, though, so we must generate instructions for packing
+        # and unpacking the 'j' and 'd' operands.
+        my $down = '';
+        my $up = '';
+        foreach my $arg (@args) {
+            my $push = 'g';
+            if ($type_bit{$arg} & $type_bit{'q'}) {
+                # The operand may be a literal.
+                $push = 'q';
+            } elsif ($type_bit{$arg} & $type_bit{'f'}) {
+                # The operand may be a failure label.
+                $push = 'f';
+            }
+            $down = "$push${down}";
+            $up = "${up}p";
+        }
+        my $pack_spec = "$down:$up";
+        return (1, ['',$pack_spec,@args]);
     }
 
     #
diff --git a/erts/emulator/zlib/adler32.c b/erts/emulator/zlib/adler32.c
index c693a42b7c..d0be4380a3 100644
--- a/erts/emulator/zlib/adler32.c
+++ b/erts/emulator/zlib/adler32.c
@@ -1,20 +1,15 @@
 /* adler32.c -- compute the Adler-32 checksum of a data stream
- * Copyright (C) 1995-2011 Mark Adler
+ * Copyright (C) 1995-2011, 2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "zutil.h"
 
-#define local static
-
 local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 
-#define BASE 65521      /* largest prime smaller than 65536 */
+#define BASE 65521U     /* largest prime smaller than 65536 */
 #define NMAX 5552
 /* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
 
@@ -65,10 +60,10 @@ local uLong adler32_combine_ OF((uLong adler1, uLong adler2, z_off64_t len2));
 #endif
 
 /* ========================================================================= */
-uLong ZEXPORT adler32(adler, buf, len)
+uLong ZEXPORT adler32_z(adler, buf, len)
     uLong adler;
     const Bytef *buf;
-    uInt len;
+    z_size_t len;
 {
     unsigned long sum2;
     unsigned n;
@@ -136,6 +131,15 @@ uLong ZEXPORT adler32(adler, buf, len)
 }
 
 /* ========================================================================= */
+uLong ZEXPORT adler32(adler, buf, len)
+    uLong adler;
+    const Bytef *buf;
+    uInt len;
+{
+    return adler32_z(adler, buf, len);
+}
+
+/* ========================================================================= */
 local uLong adler32_combine_(adler1, adler2, len2)
     uLong adler1;
     uLong adler2;
@@ -159,7 +163,7 @@ local uLong adler32_combine_(adler1, adler2, len2)
     sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem;
     if (sum1 >= BASE) sum1 -= BASE;
     if (sum1 >= BASE) sum1 -= BASE;
-    if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1);
+    if (sum2 >= ((unsigned long)BASE << 1)) sum2 -= ((unsigned long)BASE << 1);
     if (sum2 >= BASE) sum2 -= BASE;
     return sum1 | (sum2 << 16);
 }
diff --git a/erts/emulator/zlib/compress.c b/erts/emulator/zlib/compress.c
index 8ecef0f790..e2db404abf 100644
--- a/erts/emulator/zlib/compress.c
+++ b/erts/emulator/zlib/compress.c
@@ -1,13 +1,10 @@
 /* compress.c -- compress a memory buffer
- * Copyright (C) 1995-2005 Jean-loup Gailly.
+ * Copyright (C) 1995-2005, 2014, 2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #define ZLIB_INTERNAL
 #include "zlib.h"
 
@@ -31,16 +28,11 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
 {
     z_stream stream;
     int err;
+    const uInt max = (uInt)-1;
+    uLong left;
 
-    stream.next_in = (z_const Bytef *)source;
-    stream.avail_in = (uInt)sourceLen;
-#ifdef MAXSEG_64K
-    /* Check for source > 64K on 16-bit machine: */
-    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
-#endif
-    stream.next_out = dest;
-    stream.avail_out = (uInt)*destLen;
-    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+    left = *destLen;
+    *destLen = 0;
 
     stream.zalloc = (alloc_func)0;
     stream.zfree = (free_func)0;
@@ -49,15 +41,26 @@ int ZEXPORT compress2 (dest, destLen, source, sourceLen, level)
     err = deflateInit(&stream, level);
     if (err != Z_OK) return err;
 
-    err = deflate(&stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        deflateEnd(&stream);
-        return err == Z_OK ? Z_BUF_ERROR : err;
-    }
-    *destLen = stream.total_out;
+    stream.next_out = dest;
+    stream.avail_out = 0;
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
+
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = sourceLen > (uLong)max ? max : (uInt)sourceLen;
+            sourceLen -= stream.avail_in;
+        }
+        err = deflate(&stream, sourceLen ? Z_NO_FLUSH : Z_FINISH);
+    } while (err == Z_OK);
 
-    err = deflateEnd(&stream);
-    return err;
+    *destLen = stream.total_out;
+    deflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK : err;
 }
 
 /* ===========================================================================
diff --git a/erts/emulator/zlib/crc32.c b/erts/emulator/zlib/crc32.c
index ba506d8dd3..9580440c0e 100644
--- a/erts/emulator/zlib/crc32.c
+++ b/erts/emulator/zlib/crc32.c
@@ -1,5 +1,5 @@
 /* crc32.c -- compute the CRC-32 of a data stream
- * Copyright (C) 1995-2006, 2010, 2011, 2012 Mark Adler
+ * Copyright (C) 1995-2006, 2010, 2011, 2012, 2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  *
  * Thanks to Rodney Brown <[email protected]> for his contribution of faster
@@ -28,23 +28,17 @@
 #  endif /* !DYNAMIC_CRC_TABLE */
 #endif /* MAKECRCH */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
-
 #include "zutil.h"      /* for STDC and FAR definitions */
 
-#define local static
-
 /* Definitions for doing the crc four data bytes at a time. */
 #if !defined(NOBYFOUR) && defined(Z_U4)
 #  define BYFOUR
 #endif
 #ifdef BYFOUR
    local unsigned long crc32_little OF((unsigned long,
-                        const unsigned char FAR *, unsigned));
+                        const unsigned char FAR *, z_size_t));
    local unsigned long crc32_big OF((unsigned long,
-                        const unsigned char FAR *, unsigned));
+                        const unsigned char FAR *, z_size_t));
 #  define TBLS 8
 #else
 #  define TBLS 1
@@ -205,10 +199,10 @@ const z_crc_t FAR * ZEXPORT get_crc_table()
 #define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1
 
 /* ========================================================================= */
-unsigned long ZEXPORT crc32(crc, buf, len)
+unsigned long ZEXPORT crc32_z(crc, buf, len)
     unsigned long crc;
     const unsigned char FAR *buf;
-    uInt len;
+    z_size_t len;
 {
     if (buf == Z_NULL) return 0UL;
 
@@ -239,8 +233,29 @@ unsigned long ZEXPORT crc32(crc, buf, len)
     return crc ^ 0xffffffffUL;
 }
 
+/* ========================================================================= */
+unsigned long ZEXPORT crc32(crc, buf, len)
+    unsigned long crc;
+    const unsigned char FAR *buf;
+    uInt len;
+{
+    return crc32_z(crc, buf, len);
+}
+
 #ifdef BYFOUR
 
+/*
+   This BYFOUR code accesses the passed unsigned char * buffer with a 32-bit
+   integer pointer type. This violates the strict aliasing rule, where a
+   compiler can assume, for optimization purposes, that two pointers to
+   fundamentally different types won't ever point to the same memory. This can
+   manifest as a problem only if one of the pointers is written to. This code
+   only reads from those pointers. So long as this code remains isolated in
+   this compilation unit, there won't be a problem. For this reason, this code
+   should not be copied and pasted into a compilation unit in which other code
+   writes to the buffer that is passed to these routines.
+ */
+
 /* ========================================================================= */
 #define DOLIT4 c ^= *buf4++; \
         c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
@@ -251,7 +266,7 @@ unsigned long ZEXPORT crc32(crc, buf, len)
 local unsigned long crc32_little(crc, buf, len)
     unsigned long crc;
     const unsigned char FAR *buf;
-    unsigned len;
+    z_size_t len;
 {
     register z_crc_t c;
     register const z_crc_t FAR *buf4;
@@ -282,7 +297,7 @@ local unsigned long crc32_little(crc, buf, len)
 }
 
 /* ========================================================================= */
-#define DOBIG4 c ^= *++buf4; \
+#define DOBIG4 c ^= *buf4++; \
         c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
             crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
 #define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4
@@ -291,7 +306,7 @@ local unsigned long crc32_little(crc, buf, len)
 local unsigned long crc32_big(crc, buf, len)
     unsigned long crc;
     const unsigned char FAR *buf;
-    unsigned len;
+    z_size_t len;
 {
     register z_crc_t c;
     register const z_crc_t FAR *buf4;
@@ -304,7 +319,6 @@ local unsigned long crc32_big(crc, buf, len)
     }
 
     buf4 = (const z_crc_t FAR *)(const void FAR *)buf;
-    buf4--;
     while (len >= 32) {
         DOBIG32;
         len -= 32;
@@ -313,7 +327,6 @@ local unsigned long crc32_big(crc, buf, len)
         DOBIG4;
         len -= 4;
     }
-    buf4++;
     buf = (const unsigned char FAR *)buf4;
 
     if (len) do {
diff --git a/erts/emulator/zlib/deflate.c b/erts/emulator/zlib/deflate.c
index 943c26dfb2..1ec761448d 100644
--- a/erts/emulator/zlib/deflate.c
+++ b/erts/emulator/zlib/deflate.c
@@ -1,5 +1,5 @@
 /* deflate.c -- compress data using the deflation algorithm
- * Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+ * Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -49,13 +49,10 @@
 
 /* @(#) $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "deflate.h"
 
 const char deflate_copyright[] =
-   " deflate 1.2.8 Copyright 1995-2013 Jean-loup Gailly and Mark Adler ";
+   " deflate 1.2.11 Copyright 1995-2017 Jean-loup Gailly and Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -76,6 +73,8 @@ typedef enum {
 typedef block_state (*compress_func) OF((deflate_state *s, int flush));
 /* Compression function. Returns the block state after the call. */
 
+local int deflateStateCheck      OF((z_streamp strm));
+local void slide_hash     OF((deflate_state *s));
 local void fill_window    OF((deflate_state *s));
 local block_state deflate_stored OF((deflate_state *s, int flush));
 local block_state deflate_fast   OF((deflate_state *s, int flush));
@@ -87,15 +86,16 @@ local block_state deflate_huff   OF((deflate_state *s, int flush));
 local void lm_init        OF((deflate_state *s));
 local void putShortMSB    OF((deflate_state *s, uInt b));
 local void flush_pending  OF((z_streamp strm));
-local int read_buf        OF((z_streamp strm, Bytef *buf, unsigned size));
+local unsigned read_buf   OF((z_streamp strm, Bytef *buf, unsigned size));
 #ifdef ASMV
+#  pragma message("Assembler code may have bugs -- use at your own risk")
       void match_init OF((void)); /* asm code initialization */
       uInt longest_match  OF((deflate_state *s, IPos cur_match));
 #else
 local uInt longest_match  OF((deflate_state *s, IPos cur_match));
 #endif
 
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 local  void check_match OF((deflate_state *s, IPos start, IPos match,
                             int length));
 #endif
@@ -151,21 +151,14 @@ local const config configuration_table[10] = {
  * meaning.
  */
 
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-#ifndef NO_DUMMY_DECL
-struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
-#endif
-
 /* rank Z_BLOCK between Z_NO_FLUSH and Z_PARTIAL_FLUSH */
-#define RANK(f) (((f) << 1) - ((f) > 4 ? 9 : 0))
+#define RANK(f) (((f) * 2) - ((f) > 4 ? 9 : 0))
 
 /* ===========================================================================
  * Update a hash value with the given input byte
- * IN  assertion: all calls to to UPDATE_HASH are made with consecutive
- *    input characters, so that a running hash key can be computed from the
- *    previous key instead of complete recalculation each time.
+ * IN  assertion: all calls to UPDATE_HASH are made with consecutive input
+ *    characters, so that a running hash key can be computed from the previous
+ *    key instead of complete recalculation each time.
  */
 #define UPDATE_HASH(s,h,c) (h = (((h)<<s->hash_shift) ^ (c)) & s->hash_mask)
 
@@ -176,9 +169,9 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
  * the previous length of the hash chain.
  * If this file is compiled with -DFASTEST, the compression level is forced
  * to 1, and no hash chains are maintained.
- * IN  assertion: all calls to to INSERT_STRING are made with consecutive
- *    input characters and the first MIN_MATCH bytes of str are valid
- *    (except for the last MIN_MATCH-1 bytes of the input file).
+ * IN  assertion: all calls to INSERT_STRING are made with consecutive input
+ *    characters and the first MIN_MATCH bytes of str are valid (except for
+ *    the last MIN_MATCH-1 bytes of the input file).
  */
 #ifdef FASTEST
 #define INSERT_STRING(s, str, match_head) \
@@ -200,6 +193,37 @@ struct static_tree_desc_s {int dummy;}; /* for buggy compilers */
     s->head[s->hash_size-1] = NIL; \
     zmemzero((Bytef *)s->head, (unsigned)(s->hash_size-1)*sizeof(*s->head));
 
+/* ===========================================================================
+ * Slide the hash table when sliding the window down (could be avoided with 32
+ * bit values at the expense of memory usage). We slide even when level == 0 to
+ * keep the hash table consistent if we switch back to level > 0 later.
+ */
+local void slide_hash(s)
+    deflate_state *s;
+{
+    unsigned n, m;
+    Posf *p;
+    uInt wsize = s->w_size;
+
+    n = s->hash_size;
+    p = &s->head[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+    } while (--n);
+    n = wsize;
+#ifndef FASTEST
+    p = &s->prev[n];
+    do {
+        m = *--p;
+        *p = (Pos)(m >= wsize ? m - wsize : NIL);
+        /* If n is not on any hash chain, prev[n] is garbage but
+         * its value will never be used.
+         */
+    } while (--n);
+#endif
+}
+
 /* ========================================================================= */
 int ZEXPORT deflateInit_(strm, level, version, stream_size)
     z_streamp strm;
@@ -273,7 +297,7 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
 #endif
     if (memLevel < 1 || memLevel > MAX_MEM_LEVEL || method != Z_DEFLATED ||
         windowBits < 8 || windowBits > 15 || level < 0 || level > 9 ||
-        strategy < 0 || strategy > Z_FIXED) {
+        strategy < 0 || strategy > Z_FIXED || (windowBits == 8 && wrap != 1)) {
         return Z_STREAM_ERROR;
     }
     if (windowBits == 8) windowBits = 9;  /* until 256-byte window bug fixed */
@@ -281,14 +305,15 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     if (s == Z_NULL) return Z_MEM_ERROR;
     strm->state = (struct internal_state FAR *)s;
     s->strm = strm;
+    s->status = INIT_STATE;     /* to pass state test in deflateReset() */
 
     s->wrap = wrap;
     s->gzhead = Z_NULL;
-    s->w_bits = windowBits;
+    s->w_bits = (uInt)windowBits;
     s->w_size = 1 << s->w_bits;
     s->w_mask = s->w_size - 1;
 
-    s->hash_bits = memLevel + 7;
+    s->hash_bits = (uInt)memLevel + 7;
     s->hash_size = 1 << s->hash_bits;
     s->hash_mask = s->hash_size - 1;
     s->hash_shift =  ((s->hash_bits+MIN_MATCH-1)/MIN_MATCH);
@@ -322,6 +347,31 @@ int ZEXPORT deflateInit2_(strm, level, method, windowBits, memLevel, strategy,
     return deflateReset(strm);
 }
 
+/* =========================================================================
+ * Check for a valid deflate stream state. Return 0 if ok, 1 if not.
+ */
+local int deflateStateCheck (strm)
+    z_streamp strm;
+{
+    deflate_state *s;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    s = strm->state;
+    if (s == Z_NULL || s->strm != strm || (s->status != INIT_STATE &&
+#ifdef GZIP
+                                           s->status != GZIP_STATE &&
+#endif
+                                           s->status != EXTRA_STATE &&
+                                           s->status != NAME_STATE &&
+                                           s->status != COMMENT_STATE &&
+                                           s->status != HCRC_STATE &&
+                                           s->status != BUSY_STATE &&
+                                           s->status != FINISH_STATE))
+        return 1;
+    return 0;
+}
+
 /* ========================================================================= */
 int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
     z_streamp strm;
@@ -334,7 +384,7 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
     unsigned avail;
     z_const unsigned char *next;
 
-    if (strm == Z_NULL || strm->state == Z_NULL || dictionary == Z_NULL)
+    if (deflateStateCheck(strm) || dictionary == Z_NULL)
         return Z_STREAM_ERROR;
     s = strm->state;
     wrap = s->wrap;
@@ -392,13 +442,34 @@ int ZEXPORT deflateSetDictionary (strm, dictionary, dictLength)
 }
 
 /* ========================================================================= */
+int ZEXPORT deflateGetDictionary (strm, dictionary, dictLength)
+    z_streamp strm;
+    Bytef *dictionary;
+    uInt  *dictLength;
+{
+    deflate_state *s;
+    uInt len;
+
+    if (deflateStateCheck(strm))
+        return Z_STREAM_ERROR;
+    s = strm->state;
+    len = s->strstart + s->lookahead;
+    if (len > s->w_size)
+        len = s->w_size;
+    if (dictionary != Z_NULL && len)
+        zmemcpy(dictionary, s->window + s->strstart + s->lookahead - len, len);
+    if (dictLength != Z_NULL)
+        *dictLength = len;
+    return Z_OK;
+}
+
+/* ========================================================================= */
 int ZEXPORT deflateResetKeep (strm)
     z_streamp strm;
 {
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL ||
-        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0) {
+    if (deflateStateCheck(strm)) {
         return Z_STREAM_ERROR;
     }
 
@@ -413,7 +484,11 @@ int ZEXPORT deflateResetKeep (strm)
     if (s->wrap < 0) {
         s->wrap = -s->wrap; /* was made negative by deflate(..., Z_FINISH); */
     }
-    s->status = s->wrap ? INIT_STATE : BUSY_STATE;
+    s->status =
+#ifdef GZIP
+        s->wrap == 2 ? GZIP_STATE :
+#endif
+        s->wrap ? INIT_STATE : BUSY_STATE;
     strm->adler =
 #ifdef GZIP
         s->wrap == 2 ? crc32(0L, Z_NULL, 0) :
@@ -443,8 +518,8 @@ int ZEXPORT deflateSetHeader (strm, head)
     z_streamp strm;
     gz_headerp head;
 {
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
-    if (strm->state->wrap != 2) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm) || strm->state->wrap != 2)
+        return Z_STREAM_ERROR;
     strm->state->gzhead = head;
     return Z_OK;
 }
@@ -455,7 +530,7 @@ int ZEXPORT deflatePending (strm, pending, bits)
     int *bits;
     z_streamp strm;
 {
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     if (pending != Z_NULL)
         *pending = strm->state->pending;
     if (bits != Z_NULL)
@@ -472,7 +547,7 @@ int ZEXPORT deflatePrime (strm, bits, value)
     deflate_state *s;
     int put;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
     if ((Bytef *)(s->d_buf) < s->pending_out + ((Buf_size + 7) >> 3))
         return Z_BUF_ERROR;
@@ -497,9 +572,8 @@ int ZEXPORT deflateParams(strm, level, strategy)
 {
     deflate_state *s;
     compress_func func;
-    int err = Z_OK;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
 
 #ifdef FASTEST
@@ -513,13 +587,22 @@ int ZEXPORT deflateParams(strm, level, strategy)
     func = configuration_table[s->level].func;
 
     if ((strategy != s->strategy || func != configuration_table[level].func) &&
-        strm->total_in != 0) {
+        s->high_water) {
         /* Flush the last buffer: */
-        err = deflate(strm, Z_BLOCK);
-        if (err == Z_BUF_ERROR && s->pending == 0)
-            err = Z_OK;
+        int err = deflate(strm, Z_BLOCK);
+        if (err == Z_STREAM_ERROR)
+            return err;
+        if (strm->avail_out == 0)
+            return Z_BUF_ERROR;
     }
     if (s->level != level) {
+        if (s->level == 0 && s->matches != 0) {
+            if (s->matches == 1)
+                slide_hash(s);
+            else
+                CLEAR_HASH(s);
+            s->matches = 0;
+        }
         s->level = level;
         s->max_lazy_match   = configuration_table[level].max_lazy;
         s->good_match       = configuration_table[level].good_length;
@@ -527,7 +610,7 @@ int ZEXPORT deflateParams(strm, level, strategy)
         s->max_chain_length = configuration_table[level].max_chain;
     }
     s->strategy = strategy;
-    return err;
+    return Z_OK;
 }
 
 /* ========================================================================= */
@@ -540,12 +623,12 @@ int ZEXPORT deflateTune(strm, good_length, max_lazy, nice_length, max_chain)
 {
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
     s = strm->state;
-    s->good_match = good_length;
-    s->max_lazy_match = max_lazy;
+    s->good_match = (uInt)good_length;
+    s->max_lazy_match = (uInt)max_lazy;
     s->nice_match = nice_length;
-    s->max_chain_length = max_chain;
+    s->max_chain_length = (uInt)max_chain;
     return Z_OK;
 }
 
@@ -572,14 +655,13 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
 {
     deflate_state *s;
     uLong complen, wraplen;
-    Bytef *str;
 
     /* conservative upper bound for compressed data */
     complen = sourceLen +
               ((sourceLen + 7) >> 3) + ((sourceLen + 63) >> 6) + 5;
 
     /* if can't get parameters, return conservative bound plus zlib wrapper */
-    if (strm == Z_NULL || strm->state == Z_NULL)
+    if (deflateStateCheck(strm))
         return complen + 6;
 
     /* compute wrapper length */
@@ -591,9 +673,11 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
     case 1:                                 /* zlib wrapper */
         wraplen = 6 + (s->strstart ? 4 : 0);
         break;
+#ifdef GZIP
     case 2:                                 /* gzip wrapper */
         wraplen = 18;
         if (s->gzhead != Z_NULL) {          /* user-supplied gzip header */
+            Bytef *str;
             if (s->gzhead->extra != Z_NULL)
                 wraplen += 2 + s->gzhead->extra_len;
             str = s->gzhead->name;
@@ -610,6 +694,7 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
                 wraplen += 2;
         }
         break;
+#endif
     default:                                /* for compiler happiness */
         wraplen = 6;
     }
@@ -637,10 +722,10 @@ local void putShortMSB (s, b)
 }
 
 /* =========================================================================
- * Flush as much pending output as possible. All deflate() output goes
- * through this function so some applications may wish to modify it
- * to avoid allocating a large strm->next_out buffer and copying into it.
- * (See also read_buf()).
+ * Flush as much pending output as possible. All deflate() output, except for
+ * some deflate_stored() output, goes through this function so some
+ * applications may wish to modify it to avoid allocating a large
+ * strm->next_out buffer and copying into it. (See also read_buf()).
  */
 local void flush_pending(strm)
     z_streamp strm;
@@ -657,13 +742,23 @@ local void flush_pending(strm)
     strm->next_out  += len;
     s->pending_out  += len;
     strm->total_out += len;
-    strm->avail_out  -= len;
-    s->pending -= len;
+    strm->avail_out -= len;
+    s->pending      -= len;
     if (s->pending == 0) {
         s->pending_out = s->pending_buf;
     }
 }
 
+/* ===========================================================================
+ * Update the header CRC with the bytes s->pending_buf[beg..s->pending - 1].
+ */
+#define HCRC_UPDATE(beg) \
+    do { \
+        if (s->gzhead->hcrc && s->pending > (beg)) \
+            strm->adler = crc32(strm->adler, s->pending_buf + (beg), \
+                                s->pending - (beg)); \
+    } while (0)
+
 /* ========================================================================= */
 int ZEXPORT deflate (strm, flush)
     z_streamp strm;
@@ -672,230 +767,229 @@ int ZEXPORT deflate (strm, flush)
     int old_flush; /* value of flush param for previous deflate call */
     deflate_state *s;
 
-    if (strm == Z_NULL || strm->state == Z_NULL ||
-        flush > Z_BLOCK || flush < 0) {
+    if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
         return Z_STREAM_ERROR;
     }
     s = strm->state;
 
     if (strm->next_out == Z_NULL ||
-        (strm->next_in == Z_NULL && strm->avail_in != 0) ||
+        (strm->avail_in != 0 && strm->next_in == Z_NULL) ||
         (s->status == FINISH_STATE && flush != Z_FINISH)) {
         ERR_RETURN(strm, Z_STREAM_ERROR);
     }
     if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
 
-    s->strm = strm; /* just in case */
     old_flush = s->last_flush;
     s->last_flush = flush;
 
+    /* Flush as much pending output as possible */
+    if (s->pending != 0) {
+        flush_pending(strm);
+        if (strm->avail_out == 0) {
+            /* Since avail_out is 0, deflate will be called again with
+             * more output space, but possibly with both pending and
+             * avail_in equal to zero. There won't be anything to do,
+             * but this is not an error situation so make sure we
+             * return OK instead of BUF_ERROR at next call of deflate:
+             */
+            s->last_flush = -1;
+            return Z_OK;
+        }
+
+    /* Make sure there is something to do and avoid duplicate consecutive
+     * flushes. For repeated and useless calls with Z_FINISH, we keep
+     * returning Z_STREAM_END instead of Z_BUF_ERROR.
+     */
+    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
+               flush != Z_FINISH) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
+    /* User must not provide more input after the first FINISH: */
+    if (s->status == FINISH_STATE && strm->avail_in != 0) {
+        ERR_RETURN(strm, Z_BUF_ERROR);
+    }
+
     /* Write the header */
     if (s->status == INIT_STATE) {
-#ifdef GZIP
-        if (s->wrap == 2) {
-            strm->adler = crc32(0L, Z_NULL, 0);
-            put_byte(s, 31);
-            put_byte(s, 139);
-            put_byte(s, 8);
-            if (s->gzhead == Z_NULL) {
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, 0);
-                put_byte(s, s->level == 9 ? 2 :
-                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                             4 : 0));
-                put_byte(s, OS_CODE);
-                s->status = BUSY_STATE;
-            }
-            else {
-                put_byte(s, (s->gzhead->text ? 1 : 0) +
-                            (s->gzhead->hcrc ? 2 : 0) +
-                            (s->gzhead->extra == Z_NULL ? 0 : 4) +
-                            (s->gzhead->name == Z_NULL ? 0 : 8) +
-                            (s->gzhead->comment == Z_NULL ? 0 : 16)
-                        );
-                put_byte(s, (Byte)(s->gzhead->time & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
-                put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
-                put_byte(s, s->level == 9 ? 2 :
-                            (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
-                             4 : 0));
-                put_byte(s, s->gzhead->os & 0xff);
-                if (s->gzhead->extra != Z_NULL) {
-                    put_byte(s, s->gzhead->extra_len & 0xff);
-                    put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
-                }
-                if (s->gzhead->hcrc)
-                    strm->adler = crc32(strm->adler, s->pending_buf,
-                                        s->pending);
-                s->gzindex = 0;
-                s->status = EXTRA_STATE;
-            }
-        }
+        /* zlib header */
+        uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
+        uInt level_flags;
+
+        if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
+            level_flags = 0;
+        else if (s->level < 6)
+            level_flags = 1;
+        else if (s->level == 6)
+            level_flags = 2;
         else
-#endif
-        {
-            uInt header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
-            uInt level_flags;
-
-            if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
-                level_flags = 0;
-            else if (s->level < 6)
-                level_flags = 1;
-            else if (s->level == 6)
-                level_flags = 2;
-            else
-                level_flags = 3;
-            header |= (level_flags << 6);
-            if (s->strstart != 0) header |= PRESET_DICT;
-            header += 31 - (header % 31);
+            level_flags = 3;
+        header |= (level_flags << 6);
+        if (s->strstart != 0) header |= PRESET_DICT;
+        header += 31 - (header % 31);
+
+        putShortMSB(s, header);
+
+        /* Save the adler32 of the preset dictionary: */
+        if (s->strstart != 0) {
+            putShortMSB(s, (uInt)(strm->adler >> 16));
+            putShortMSB(s, (uInt)(strm->adler & 0xffff));
+        }
+        strm->adler = adler32(0L, Z_NULL, 0);
+        s->status = BUSY_STATE;
 
+        /* Compression must start with an empty pending buffer */
+        flush_pending(strm);
+        if (s->pending != 0) {
+            s->last_flush = -1;
+            return Z_OK;
+        }
+    }
+#ifdef GZIP
+    if (s->status == GZIP_STATE) {
+        /* gzip header */
+        strm->adler = crc32(0L, Z_NULL, 0);
+        put_byte(s, 31);
+        put_byte(s, 139);
+        put_byte(s, 8);
+        if (s->gzhead == Z_NULL) {
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, 0);
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, OS_CODE);
             s->status = BUSY_STATE;
-            putShortMSB(s, header);
 
-            /* Save the adler32 of the preset dictionary: */
-            if (s->strstart != 0) {
-                putShortMSB(s, (uInt)(strm->adler >> 16));
-                putShortMSB(s, (uInt)(strm->adler & 0xffff));
+            /* Compression must start with an empty pending buffer */
+            flush_pending(strm);
+            if (s->pending != 0) {
+                s->last_flush = -1;
+                return Z_OK;
+            }
+        }
+        else {
+            put_byte(s, (s->gzhead->text ? 1 : 0) +
+                     (s->gzhead->hcrc ? 2 : 0) +
+                     (s->gzhead->extra == Z_NULL ? 0 : 4) +
+                     (s->gzhead->name == Z_NULL ? 0 : 8) +
+                     (s->gzhead->comment == Z_NULL ? 0 : 16)
+                     );
+            put_byte(s, (Byte)(s->gzhead->time & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 8) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 16) & 0xff));
+            put_byte(s, (Byte)((s->gzhead->time >> 24) & 0xff));
+            put_byte(s, s->level == 9 ? 2 :
+                     (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
+                      4 : 0));
+            put_byte(s, s->gzhead->os & 0xff);
+            if (s->gzhead->extra != Z_NULL) {
+                put_byte(s, s->gzhead->extra_len & 0xff);
+                put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
             }
-            strm->adler = adler32(0L, Z_NULL, 0);
+            if (s->gzhead->hcrc)
+                strm->adler = crc32(strm->adler, s->pending_buf,
+                                    s->pending);
+            s->gzindex = 0;
+            s->status = EXTRA_STATE;
         }
     }
-#ifdef GZIP
     if (s->status == EXTRA_STATE) {
         if (s->gzhead->extra != Z_NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
-
-            while (s->gzindex < (s->gzhead->extra_len & 0xffff)) {
-                if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
-                    flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size)
-                        break;
+            ulg beg = s->pending;   /* start of bytes to update crc */
+            uInt left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
+            while (s->pending + left > s->pending_buf_size) {
+                uInt copy = s->pending_buf_size - s->pending;
+                zmemcpy(s->pending_buf + s->pending,
+                        s->gzhead->extra + s->gzindex, copy);
+                s->pending = s->pending_buf_size;
+                HCRC_UPDATE(beg);
+                s->gzindex += copy;
+                flush_pending(strm);
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
                 }
-                put_byte(s, s->gzhead->extra[s->gzindex]);
-                s->gzindex++;
-            }
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (s->gzindex == s->gzhead->extra_len) {
-                s->gzindex = 0;
-                s->status = NAME_STATE;
+                beg = 0;
+                left -= copy;
             }
+            zmemcpy(s->pending_buf + s->pending,
+                    s->gzhead->extra + s->gzindex, left);
+            s->pending += left;
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
         }
-        else
-            s->status = NAME_STATE;
+        s->status = NAME_STATE;
     }
     if (s->status == NAME_STATE) {
         if (s->gzhead->name != Z_NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
+            ulg beg = s->pending;   /* start of bytes to update crc */
             int val;
-
             do {
                 if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
+                    HCRC_UPDATE(beg);
                     flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size) {
-                        val = 1;
-                        break;
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
                     }
+                    beg = 0;
                 }
                 val = s->gzhead->name[s->gzindex++];
                 put_byte(s, val);
             } while (val != 0);
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (val == 0) {
-                s->gzindex = 0;
-                s->status = COMMENT_STATE;
-            }
+            HCRC_UPDATE(beg);
+            s->gzindex = 0;
         }
-        else
-            s->status = COMMENT_STATE;
+        s->status = COMMENT_STATE;
     }
     if (s->status == COMMENT_STATE) {
         if (s->gzhead->comment != Z_NULL) {
-            uInt beg = s->pending;  /* start of bytes to update crc */
+            ulg beg = s->pending;   /* start of bytes to update crc */
             int val;
-
             do {
                 if (s->pending == s->pending_buf_size) {
-                    if (s->gzhead->hcrc && s->pending > beg)
-                        strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                            s->pending - beg);
+                    HCRC_UPDATE(beg);
                     flush_pending(strm);
-                    beg = s->pending;
-                    if (s->pending == s->pending_buf_size) {
-                        val = 1;
-                        break;
+                    if (s->pending != 0) {
+                        s->last_flush = -1;
+                        return Z_OK;
                     }
+                    beg = 0;
                 }
                 val = s->gzhead->comment[s->gzindex++];
                 put_byte(s, val);
             } while (val != 0);
-            if (s->gzhead->hcrc && s->pending > beg)
-                strm->adler = crc32(strm->adler, s->pending_buf + beg,
-                                    s->pending - beg);
-            if (val == 0)
-                s->status = HCRC_STATE;
+            HCRC_UPDATE(beg);
         }
-        else
-            s->status = HCRC_STATE;
+        s->status = HCRC_STATE;
     }
     if (s->status == HCRC_STATE) {
         if (s->gzhead->hcrc) {
-            if (s->pending + 2 > s->pending_buf_size)
+            if (s->pending + 2 > s->pending_buf_size) {
                 flush_pending(strm);
-            if (s->pending + 2 <= s->pending_buf_size) {
-                put_byte(s, (Byte)(strm->adler & 0xff));
-                put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
-                strm->adler = crc32(0L, Z_NULL, 0);
-                s->status = BUSY_STATE;
+                if (s->pending != 0) {
+                    s->last_flush = -1;
+                    return Z_OK;
+                }
             }
+            put_byte(s, (Byte)(strm->adler & 0xff));
+            put_byte(s, (Byte)((strm->adler >> 8) & 0xff));
+            strm->adler = crc32(0L, Z_NULL, 0);
         }
-        else
-            s->status = BUSY_STATE;
-    }
-#endif
+        s->status = BUSY_STATE;
 
-    /* Flush as much pending output as possible */
-    if (s->pending != 0) {
+        /* Compression must start with an empty pending buffer */
         flush_pending(strm);
-        if (strm->avail_out == 0) {
-            /* Since avail_out is 0, deflate will be called again with
-             * more output space, but possibly with both pending and
-             * avail_in equal to zero. There won't be anything to do,
-             * but this is not an error situation so make sure we
-             * return OK instead of BUF_ERROR at next call of deflate:
-             */
+        if (s->pending != 0) {
             s->last_flush = -1;
             return Z_OK;
         }
-
-    /* Make sure there is something to do and avoid duplicate consecutive
-     * flushes. For repeated and useless calls with Z_FINISH, we keep
-     * returning Z_STREAM_END instead of Z_BUF_ERROR.
-     */
-    } else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
-               flush != Z_FINISH) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
-    }
-
-    /* User must not provide more input after the first FINISH: */
-    if (s->status == FINISH_STATE && strm->avail_in != 0) {
-        ERR_RETURN(strm, Z_BUF_ERROR);
     }
+#endif
 
     /* Start a new block or continue the current one.
      */
@@ -903,9 +997,10 @@ int ZEXPORT deflate (strm, flush)
         (flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
         block_state bstate;
 
-        bstate = s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
-                    (s->strategy == Z_RLE ? deflate_rle(s, flush) :
-                        (*(configuration_table[s->level].func))(s, flush));
+        bstate = s->level == 0 ? deflate_stored(s, flush) :
+                 s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
+                 s->strategy == Z_RLE ? deflate_rle(s, flush) :
+                 (*(configuration_table[s->level].func))(s, flush);
 
         if (bstate == finish_started || bstate == finish_done) {
             s->status = FINISH_STATE;
@@ -947,7 +1042,6 @@ int ZEXPORT deflate (strm, flush)
             }
         }
     }
-    Assert(strm->avail_out > 0, "bug2");
 
     if (flush != Z_FINISH) return Z_OK;
     if (s->wrap <= 0) return Z_STREAM_END;
@@ -984,18 +1078,9 @@ int ZEXPORT deflateEnd (strm)
 {
     int status;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (deflateStateCheck(strm)) return Z_STREAM_ERROR;
 
     status = strm->state->status;
-    if (status != INIT_STATE &&
-        status != EXTRA_STATE &&
-        status != NAME_STATE &&
-        status != COMMENT_STATE &&
-        status != HCRC_STATE &&
-        status != BUSY_STATE &&
-        status != FINISH_STATE) {
-      return Z_STREAM_ERROR;
-    }
 
     /* Deallocate in reverse order of allocations: */
     TRY_FREE(strm, strm->state->pending_buf);
@@ -1026,7 +1111,7 @@ int ZEXPORT deflateCopy (dest, source)
     ushf *overlay;
 
 
-    if (source == Z_NULL || dest == Z_NULL || source->state == Z_NULL) {
+    if (deflateStateCheck(source) || dest == Z_NULL) {
         return Z_STREAM_ERROR;
     }
 
@@ -1076,7 +1161,7 @@ int ZEXPORT deflateCopy (dest, source)
  * allocating a large strm->next_in buffer and copying from it.
  * (See also flush_pending()).
  */
-local int read_buf(strm, buf, size)
+local unsigned read_buf(strm, buf, size)
     z_streamp strm;
     Bytef *buf;
     unsigned size;
@@ -1100,7 +1185,7 @@ local int read_buf(strm, buf, size)
     strm->next_in  += len;
     strm->total_in += len;
 
-    return (int)len;
+    return len;
 }
 
 /* ===========================================================================
@@ -1154,9 +1239,9 @@ local uInt longest_match(s, cur_match)
 {
     unsigned chain_length = s->max_chain_length;/* max hash chain length */
     register Bytef *scan = s->window + s->strstart; /* current string */
-    register Bytef *match;                       /* matched string */
+    register Bytef *match;                      /* matched string */
     register int len;                           /* length of current match */
-    int best_len = s->prev_length;              /* best match length so far */
+    int best_len = (int)s->prev_length;         /* best match length so far */
     int nice_match = s->nice_match;             /* stop if match long enough */
     IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
         s->strstart - (IPos)MAX_DIST(s) : NIL;
@@ -1191,7 +1276,7 @@ local uInt longest_match(s, cur_match)
     /* Do not look for matches beyond the end of the input. This is necessary
      * to make deflate deterministic.
      */
-    if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
+    if ((uInt)nice_match > s->lookahead) nice_match = (int)s->lookahead;
 
     Assert((ulg)s->strstart <= s->window_size-MIN_LOOKAHEAD, "need lookahead");
 
@@ -1352,7 +1437,11 @@ local uInt longest_match(s, cur_match)
 
 #endif /* FASTEST */
 
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
+
+#define EQUAL 0
+/* result of memcmp for equal strings */
+
 /* ===========================================================================
  * Check that the match at match_start is indeed a match.
  */
@@ -1378,7 +1467,7 @@ local void check_match(s, start, match, length)
 }
 #else
 #  define check_match(s, start, match, length)
-#endif /* DEBUG */
+#endif /* ZLIB_DEBUG */
 
 /* ===========================================================================
  * Fill the window when the lookahead becomes insufficient.
@@ -1393,8 +1482,7 @@ local void check_match(s, start, match, length)
 local void fill_window(s)
     deflate_state *s;
 {
-    register unsigned n, m;
-    register Posf *p;
+    unsigned n;
     unsigned more;    /* Amount of free space at the end of the window. */
     uInt wsize = s->w_size;
 
@@ -1421,35 +1509,11 @@ local void fill_window(s)
          */
         if (s->strstart >= wsize+MAX_DIST(s)) {
 
-            zmemcpy(s->window, s->window+wsize, (unsigned)wsize);
+            zmemcpy(s->window, s->window+wsize, (unsigned)wsize - more);
             s->match_start -= wsize;
             s->strstart    -= wsize; /* we now have strstart >= MAX_DIST */
             s->block_start -= (long) wsize;
-
-            /* Slide the hash table (could be avoided with 32 bit values
-               at the expense of memory usage). We slide even when level == 0
-               to keep the hash table consistent if we switch back to level > 0
-               later. (Using level 0 permanently is not an optimal usage of
-               zlib, so we don't care about this pathological case.)
-             */
-            n = s->hash_size;
-            p = &s->head[n];
-            do {
-                m = *--p;
-                *p = (Pos)(m >= wsize ? m-wsize : NIL);
-            } while (--n);
-
-            n = wsize;
-#ifndef FASTEST
-            p = &s->prev[n];
-            do {
-                m = *--p;
-                *p = (Pos)(m >= wsize ? m-wsize : NIL);
-                /* If n is not on any hash chain, prev[n] is garbage but
-                 * its value will never be used.
-                 */
-            } while (--n);
-#endif
+            slide_hash(s);
             more += wsize;
         }
         if (s->strm->avail_in == 0) break;
@@ -1555,70 +1619,199 @@ local void fill_window(s)
    if (s->strm->avail_out == 0) return (last) ? finish_started : need_more; \
 }
 
+/* Maximum stored block length in deflate format (not including header). */
+#define MAX_STORED 65535
+
+/* Minimum of a and b. */
+#define MIN(a, b) ((a) > (b) ? (b) : (a))
+
 /* ===========================================================================
  * Copy without compression as much as possible from the input stream, return
  * the current block state.
- * This function does not insert new strings in the dictionary since
- * uncompressible data is probably not useful. This function is used
- * only for the level=0 compression option.
- * NOTE: this function should be optimized to avoid extra copying from
- * window to pending_buf.
+ *
+ * In case deflateParams() is used to later switch to a non-zero compression
+ * level, s->matches (otherwise unused when storing) keeps track of the number
+ * of hash table slides to perform. If s->matches is 1, then one hash table
+ * slide will be done when switching. If s->matches is 2, the maximum value
+ * allowed here, then the hash table will be cleared, since two or more slides
+ * is the same as a clear.
+ *
+ * deflate_stored() is written to minimize the number of times an input byte is
+ * copied. It is most efficient with large input and output buffers, which
+ * maximizes the opportunites to have a single copy from next_in to next_out.
  */
 local block_state deflate_stored(s, flush)
     deflate_state *s;
     int flush;
 {
-    /* Stored blocks are limited to 0xffff bytes, pending_buf is limited
-     * to pending_buf_size, and each stored block has a 5 byte header:
+    /* Smallest worthy block size when not flushing or finishing. By default
+     * this is 32K. This can be as small as 507 bytes for memLevel == 1. For
+     * large input and output buffers, the stored block size will be larger.
      */
-    ulg max_block_size = 0xffff;
-    ulg max_start;
-
-    if (max_block_size > s->pending_buf_size - 5) {
-        max_block_size = s->pending_buf_size - 5;
-    }
-
-    /* Copy as much as possible from input to output: */
-    for (;;) {
-        /* Fill the window as much as possible: */
-        if (s->lookahead <= 1) {
+    unsigned min_block = MIN(s->pending_buf_size - 5, s->w_size);
 
-            Assert(s->strstart < s->w_size+MAX_DIST(s) ||
-                   s->block_start >= (long)s->w_size, "slide too late");
+    /* Copy as many min_block or larger stored blocks directly to next_out as
+     * possible. If flushing, copy the remaining available input to next_out as
+     * stored blocks, if there is enough space.
+     */
+    unsigned len, left, have, last = 0;
+    unsigned used = s->strm->avail_in;
+    do {
+        /* Set len to the maximum size block that we can copy directly with the
+         * available input data and output space. Set left to how much of that
+         * would be copied from what's left in the window.
+         */
+        len = MAX_STORED;       /* maximum deflate stored block length */
+        have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        if (s->strm->avail_out < have)          /* need room for header */
+            break;
+            /* maximum stored block length that will fit in avail_out: */
+        have = s->strm->avail_out - have;
+        left = s->strstart - s->block_start;    /* bytes left in window */
+        if (len > (ulg)left + s->strm->avail_in)
+            len = left + s->strm->avail_in;     /* limit len to the input */
+        if (len > have)
+            len = have;                         /* limit len to the output */
+
+        /* If the stored block would be less than min_block in length, or if
+         * unable to copy all of the available input when flushing, then try
+         * copying to the window and the pending buffer instead. Also don't
+         * write an empty block when flushing -- deflate() does that.
+         */
+        if (len < min_block && ((len == 0 && flush != Z_FINISH) ||
+                                flush == Z_NO_FLUSH ||
+                                len != left + s->strm->avail_in))
+            break;
 
-            fill_window(s);
-            if (s->lookahead == 0 && flush == Z_NO_FLUSH) return need_more;
+        /* Make a dummy stored block in pending to get the header bytes,
+         * including any pending bits. This also updates the debugging counts.
+         */
+        last = flush == Z_FINISH && len == left + s->strm->avail_in ? 1 : 0;
+        _tr_stored_block(s, (char *)0, 0L, last);
+
+        /* Replace the lengths in the dummy stored block with len. */
+        s->pending_buf[s->pending - 4] = len;
+        s->pending_buf[s->pending - 3] = len >> 8;
+        s->pending_buf[s->pending - 2] = ~len;
+        s->pending_buf[s->pending - 1] = ~len >> 8;
+
+        /* Write the stored block header bytes. */
+        flush_pending(s->strm);
+
+#ifdef ZLIB_DEBUG
+        /* Update debugging counts for the data about to be copied. */
+        s->compressed_len += len << 3;
+        s->bits_sent += len << 3;
+#endif
 
-            if (s->lookahead == 0) break; /* flush the current block */
+        /* Copy uncompressed bytes from the window to next_out. */
+        if (left) {
+            if (left > len)
+                left = len;
+            zmemcpy(s->strm->next_out, s->window + s->block_start, left);
+            s->strm->next_out += left;
+            s->strm->avail_out -= left;
+            s->strm->total_out += left;
+            s->block_start += left;
+            len -= left;
         }
-        Assert(s->block_start >= 0L, "block gone");
-
-        s->strstart += s->lookahead;
-        s->lookahead = 0;
-
-        /* Emit a stored block if pending_buf will be full: */
-        max_start = s->block_start + max_block_size;
-        if (s->strstart == 0 || (ulg)s->strstart >= max_start) {
-            /* strstart == 0 is possible when wraparound on 16-bit machine */
-            s->lookahead = (uInt)(s->strstart - max_start);
-            s->strstart = (uInt)max_start;
-            FLUSH_BLOCK(s, 0);
+
+        /* Copy uncompressed bytes directly from next_in to next_out, updating
+         * the check value.
+         */
+        if (len) {
+            read_buf(s->strm, s->strm->next_out, len);
+            s->strm->next_out += len;
+            s->strm->avail_out -= len;
+            s->strm->total_out += len;
         }
-        /* Flush if we may have to slide, otherwise block_start may become
-         * negative and the data will be gone:
+    } while (last == 0);
+
+    /* Update the sliding window with the last s->w_size bytes of the copied
+     * data, or append all of the copied data to the existing window if less
+     * than s->w_size bytes were copied. Also update the number of bytes to
+     * insert in the hash tables, in the event that deflateParams() switches to
+     * a non-zero compression level.
+     */
+    used -= s->strm->avail_in;      /* number of input bytes directly copied */
+    if (used) {
+        /* If any input was used, then no unused input remains in the window,
+         * therefore s->block_start == s->strstart.
          */
-        if (s->strstart - (uInt)s->block_start >= MAX_DIST(s)) {
-            FLUSH_BLOCK(s, 0);
+        if (used >= s->w_size) {    /* supplant the previous history */
+            s->matches = 2;         /* clear hash */
+            zmemcpy(s->window, s->strm->next_in - s->w_size, s->w_size);
+            s->strstart = s->w_size;
         }
+        else {
+            if (s->window_size - s->strstart <= used) {
+                /* Slide the window down. */
+                s->strstart -= s->w_size;
+                zmemcpy(s->window, s->window + s->w_size, s->strstart);
+                if (s->matches < 2)
+                    s->matches++;   /* add a pending slide_hash() */
+            }
+            zmemcpy(s->window + s->strstart, s->strm->next_in - used, used);
+            s->strstart += used;
+        }
+        s->block_start = s->strstart;
+        s->insert += MIN(used, s->w_size - s->insert);
     }
-    s->insert = 0;
-    if (flush == Z_FINISH) {
-        FLUSH_BLOCK(s, 1);
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* If the last block was written to next_out, then done. */
+    if (last)
         return finish_done;
+
+    /* If flushing and all input has been consumed, then done. */
+    if (flush != Z_NO_FLUSH && flush != Z_FINISH &&
+        s->strm->avail_in == 0 && (long)s->strstart == s->block_start)
+        return block_done;
+
+    /* Fill the window with any remaining input. */
+    have = s->window_size - s->strstart - 1;
+    if (s->strm->avail_in > have && s->block_start >= (long)s->w_size) {
+        /* Slide the window down. */
+        s->block_start -= s->w_size;
+        s->strstart -= s->w_size;
+        zmemcpy(s->window, s->window + s->w_size, s->strstart);
+        if (s->matches < 2)
+            s->matches++;           /* add a pending slide_hash() */
+        have += s->w_size;          /* more space now */
     }
-    if ((long)s->strstart > s->block_start)
-        FLUSH_BLOCK(s, 0);
-    return block_done;
+    if (have > s->strm->avail_in)
+        have = s->strm->avail_in;
+    if (have) {
+        read_buf(s->strm, s->window + s->strstart, have);
+        s->strstart += have;
+    }
+    if (s->high_water < s->strstart)
+        s->high_water = s->strstart;
+
+    /* There was not enough avail_out to write a complete worthy or flushed
+     * stored block to next_out. Write a stored block to pending instead, if we
+     * have enough input for a worthy block, or if flushing and there is enough
+     * room for the remaining input as a stored block in the pending buffer.
+     */
+    have = (s->bi_valid + 42) >> 3;         /* number of header bytes */
+        /* maximum stored block length that will fit in pending: */
+    have = MIN(s->pending_buf_size - have, MAX_STORED);
+    min_block = MIN(have, s->w_size);
+    left = s->strstart - s->block_start;
+    if (left >= min_block ||
+        ((left || flush == Z_FINISH) && flush != Z_NO_FLUSH &&
+         s->strm->avail_in == 0 && left <= have)) {
+        len = MIN(left, have);
+        last = flush == Z_FINISH && s->strm->avail_in == 0 &&
+               len == left ? 1 : 0;
+        _tr_stored_block(s, (charf *)s->window + s->block_start, len, last);
+        s->block_start += len;
+        flush_pending(s->strm);
+    }
+
+    /* We've done all we can with the available input and output. */
+    return last ? finish_started : need_more;
 }
 
 /* ===========================================================================
@@ -1895,7 +2088,7 @@ local block_state deflate_rle(s, flush)
                          prev == *++scan && prev == *++scan &&
                          prev == *++scan && prev == *++scan &&
                          scan < strend);
-                s->match_length = MAX_MATCH - (int)(strend - scan);
+                s->match_length = MAX_MATCH - (uInt)(strend - scan);
                 if (s->match_length > s->lookahead)
                     s->match_length = s->lookahead;
             }
diff --git a/erts/emulator/zlib/deflate.h b/erts/emulator/zlib/deflate.h
index ce0299edd1..23ecdd312b 100644
--- a/erts/emulator/zlib/deflate.h
+++ b/erts/emulator/zlib/deflate.h
@@ -1,5 +1,5 @@
 /* deflate.h -- internal compression state
- * Copyright (C) 1995-2012 Jean-loup Gailly
+ * Copyright (C) 1995-2016 Jean-loup Gailly
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -51,13 +51,16 @@
 #define Buf_size 16
 /* size of bit buffer in bi_buf */
 
-#define INIT_STATE    42
-#define EXTRA_STATE   69
-#define NAME_STATE    73
-#define COMMENT_STATE 91
-#define HCRC_STATE   103
-#define BUSY_STATE   113
-#define FINISH_STATE 666
+#define INIT_STATE    42    /* zlib header -> BUSY_STATE */
+#ifdef GZIP
+#  define GZIP_STATE  57    /* gzip header -> BUSY_STATE | EXTRA_STATE */
+#endif
+#define EXTRA_STATE   69    /* gzip extra block -> NAME_STATE */
+#define NAME_STATE    73    /* gzip file name -> COMMENT_STATE */
+#define COMMENT_STATE 91    /* gzip comment -> HCRC_STATE */
+#define HCRC_STATE   103    /* gzip header CRC -> BUSY_STATE */
+#define BUSY_STATE   113    /* deflate -> FINISH_STATE */
+#define FINISH_STATE 666    /* stream complete */
 /* Stream status */
 
 
@@ -83,7 +86,7 @@ typedef struct static_tree_desc_s  static_tree_desc;
 typedef struct tree_desc_s {
     ct_data *dyn_tree;           /* the dynamic tree */
     int     max_code;            /* largest code with non zero frequency */
-    static_tree_desc *stat_desc; /* the corresponding static tree */
+    const static_tree_desc *stat_desc;  /* the corresponding static tree */
 } FAR tree_desc;
 
 typedef ush Pos;
@@ -100,10 +103,10 @@ typedef struct internal_state {
     Bytef *pending_buf;  /* output still pending */
     ulg   pending_buf_size; /* size of pending_buf */
     Bytef *pending_out;  /* next pending byte to output to the stream */
-    uInt   pending;      /* nb of bytes in the pending buffer */
+    ulg   pending;       /* nb of bytes in the pending buffer */
     int   wrap;          /* bit 0 true for zlib, bit 1 true for gzip */
     gz_headerp  gzhead;  /* gzip header information to write */
-    uInt   gzindex;      /* where in extra, name, or comment */
+    ulg   gzindex;       /* where in extra, name, or comment */
     Byte  method;        /* can only be DEFLATED */
     int   last_flush;    /* value of flush param for previous deflate call */
 
@@ -249,7 +252,7 @@ typedef struct internal_state {
     uInt matches;       /* number of string matches in current block */
     uInt insert;        /* bytes at end of window left to insert */
 
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     ulg compressed_len; /* total bit length of compressed file mod 2^32 */
     ulg bits_sent;      /* bit length of compressed data sent mod 2^32 */
 #endif
@@ -275,7 +278,7 @@ typedef struct internal_state {
 /* Output a byte on the stream.
  * IN assertion: there is enough room in pending_buf.
  */
-#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
+#define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);}
 
 
 #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
@@ -309,7 +312,7 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
  * used.
  */
 
-#ifndef DEBUG
+#ifndef ZLIB_DEBUG
 /* Inline versions of _tr_tally for speed: */
 
 #if defined(GEN_TREES_H) || !defined(STDC)
@@ -328,8 +331,8 @@ void ZLIB_INTERNAL _tr_stored_block OF((deflate_state *s, charf *buf,
     flush = (s->last_lit == s->lit_bufsize-1); \
    }
 # define _tr_tally_dist(s, distance, length, flush) \
-  { uch len = (length); \
-    ush dist = (distance); \
+  { uch len = (uch)(length); \
+    ush dist = (ush)(distance); \
     s->d_buf[s->last_lit] = dist; \
     s->l_buf[s->last_lit++] = len; \
     dist--; \
diff --git a/erts/emulator/zlib/gzguts.h b/erts/emulator/zlib/gzguts.h
index d87659d031..990a4d2514 100644
--- a/erts/emulator/zlib/gzguts.h
+++ b/erts/emulator/zlib/gzguts.h
@@ -1,5 +1,5 @@
 /* gzguts.h -- zlib internal header definitions for gz* operations
- * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -25,6 +25,10 @@
 #  include <stdlib.h>
 #  include <limits.h>
 #endif
+
+#ifndef _POSIX_SOURCE
+#  define _POSIX_SOURCE
+#endif
 #include <fcntl.h>
 
 #ifdef _WIN32
@@ -35,6 +39,10 @@
 #  include <io.h>
 #endif
 
+#if defined(_WIN32) || defined(__CYGWIN__)
+#  define WIDECHAR
+#endif
+
 #ifdef WINAPI_FAMILY
 #  define open _open
 #  define read _read
@@ -95,18 +103,19 @@
 #  endif
 #endif
 
-/* unlike snprintf (which is required in C99, yet still not supported by
-   Microsoft more than a decade later!), _snprintf does not guarantee null
-   termination of the result -- however this is only used in gzlib.c where
+/* unlike snprintf (which is required in C99), _snprintf does not guarantee
+   null termination of the result -- however this is only used in gzlib.c where
    the result is assured to fit in the space provided */
-#ifdef _MSC_VER
+#if defined(_MSC_VER) && _MSC_VER < 1900
 #  define snprintf _snprintf
 #endif
 
 #ifndef local
 #  define local static
 #endif
-/* compile with -Dlocal if your debugger can't find static symbols */
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
 
 /* gz* functions always use library allocation functions */
 #ifndef STDC
@@ -170,7 +179,7 @@ typedef struct {
     char *path;             /* path or fd for error messages */
     unsigned size;          /* buffer size, zero if not allocated yet */
     unsigned want;          /* requested buffer size, default is GZBUFSIZE */
-    unsigned char *in;      /* input buffer */
+    unsigned char *in;      /* input buffer (double-sized when writing) */
     unsigned char *out;     /* output buffer (double-sized when reading) */
     int direct;             /* 0 if processing gzip, 1 if transparent */
         /* just for reading */
diff --git a/erts/emulator/zlib/inffast.c b/erts/emulator/zlib/inffast.c
index 5187743fde..0dbd1dbc09 100644
--- a/erts/emulator/zlib/inffast.c
+++ b/erts/emulator/zlib/inffast.c
@@ -1,36 +1,16 @@
 /* inffast.c -- fast decoding
- * Copyright (C) 1995-2008, 2010, 2013 Mark Adler
+ * Copyright (C) 1995-2017 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
 #include "inffast.h"
 
-#ifndef ASMINF
-
-/* Allow machine dependent optimization for post-increment or pre-increment.
-   Based on testing to date,
-   Pre-increment preferred for:
-   - PowerPC G3 (Adler)
-   - MIPS R5000 (Randers-Pehrson)
-   Post-increment preferred for:
-   - none
-   No measurable difference:
-   - Pentium III (Anderson)
-   - M68060 (Nikl)
- */
-#ifdef POSTINC
-#  define OFF 0
-#  define PUP(a) *(a)++
+#ifdef ASMINF
+#  pragma message("Assembler code may have bugs -- use at your own risk")
 #else
-#  define OFF 1
-#  define PUP(a) *++(a)
-#endif
 
 /*
    Decode literal, length, and distance codes and write out the resulting
@@ -99,9 +79,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 
     /* copy state to local variables */
     state = (struct inflate_state FAR *)strm->state;
-    in = strm->next_in - OFF;
+    in = strm->next_in;
     last = in + (strm->avail_in - 5);
-    out = strm->next_out - OFF;
+    out = strm->next_out;
     beg = out - (start - strm->avail_out);
     end = out + (strm->avail_out - 257);
 #ifdef INFLATE_STRICT
@@ -122,9 +102,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
        input data or output space */
     do {
         if (bits < 15) {
-            hold += (unsigned long)(PUP(in)) << bits;
+            hold += (unsigned long)(*in++) << bits;
             bits += 8;
-            hold += (unsigned long)(PUP(in)) << bits;
+            hold += (unsigned long)(*in++) << bits;
             bits += 8;
         }
         here = lcode[hold & lmask];
@@ -137,14 +117,14 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
             Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
                     "inflate:         literal '%c'\n" :
                     "inflate:         literal 0x%02x\n", here.val));
-            PUP(out) = (unsigned char)(here.val);
+            *out++ = (unsigned char)(here.val);
         }
         else if (op & 16) {                     /* length base */
             len = (unsigned)(here.val);
             op &= 15;                           /* number of extra bits */
             if (op) {
                 if (bits < op) {
-                    hold += (unsigned long)(PUP(in)) << bits;
+                    hold += (unsigned long)(*in++) << bits;
                     bits += 8;
                 }
                 len += (unsigned)hold & ((1U << op) - 1);
@@ -153,9 +133,9 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
             }
             Tracevv((stderr, "inflate:         length %u\n", len));
             if (bits < 15) {
-                hold += (unsigned long)(PUP(in)) << bits;
+                hold += (unsigned long)(*in++) << bits;
                 bits += 8;
-                hold += (unsigned long)(PUP(in)) << bits;
+                hold += (unsigned long)(*in++) << bits;
                 bits += 8;
             }
             here = dcode[hold & dmask];
@@ -168,10 +148,10 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                 dist = (unsigned)(here.val);
                 op &= 15;                       /* number of extra bits */
                 if (bits < op) {
-                    hold += (unsigned long)(PUP(in)) << bits;
+                    hold += (unsigned long)(*in++) << bits;
                     bits += 8;
                     if (bits < op) {
-                        hold += (unsigned long)(PUP(in)) << bits;
+                        hold += (unsigned long)(*in++) << bits;
                         bits += 8;
                     }
                 }
@@ -199,30 +179,30 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
                         if (len <= op - whave) {
                             do {
-                                PUP(out) = 0;
+                                *out++ = 0;
                             } while (--len);
                             continue;
                         }
                         len -= op - whave;
                         do {
-                            PUP(out) = 0;
+                            *out++ = 0;
                         } while (--op > whave);
                         if (op == 0) {
                             from = out - dist;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--len);
                             continue;
                         }
 #endif
                     }
-                    from = window - OFF;
+                    from = window;
                     if (wnext == 0) {           /* very common case */
                         from += wsize - op;
                         if (op < len) {         /* some from window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
                             from = out - dist;  /* rest from output */
                         }
@@ -233,14 +213,14 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                         if (op < len) {         /* some from end of window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
-                            from = window - OFF;
+                            from = window;
                             if (wnext < len) {  /* some from start of window */
                                 op = wnext;
                                 len -= op;
                                 do {
-                                    PUP(out) = PUP(from);
+                                    *out++ = *from++;
                                 } while (--op);
                                 from = out - dist;      /* rest from output */
                             }
@@ -251,35 +231,35 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
                         if (op < len) {         /* some from window */
                             len -= op;
                             do {
-                                PUP(out) = PUP(from);
+                                *out++ = *from++;
                             } while (--op);
                             from = out - dist;  /* rest from output */
                         }
                     }
                     while (len > 2) {
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
                         len -= 3;
                     }
                     if (len) {
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
                         if (len > 1)
-                            PUP(out) = PUP(from);
+                            *out++ = *from++;
                     }
                 }
                 else {
                     from = out - dist;          /* copy direct from output */
                     do {                        /* minimum length is three */
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
+                        *out++ = *from++;
+                        *out++ = *from++;
                         len -= 3;
                     } while (len > 2);
                     if (len) {
-                        PUP(out) = PUP(from);
+                        *out++ = *from++;
                         if (len > 1)
-                            PUP(out) = PUP(from);
+                            *out++ = *from++;
                     }
                 }
             }
@@ -316,8 +296,8 @@ unsigned start;         /* inflate()'s starting value for strm->avail_out */
     hold &= (1U << bits) - 1;
 
     /* update state and return */
-    strm->next_in = in + OFF;
-    strm->next_out = out + OFF;
+    strm->next_in = in;
+    strm->next_out = out;
     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
     strm->avail_out = (unsigned)(out < end ?
                                  257 + (end - out) : 257 - (out - end));
diff --git a/erts/emulator/zlib/inflate.c b/erts/emulator/zlib/inflate.c
index 532330b06b..ac333e8c2e 100644
--- a/erts/emulator/zlib/inflate.c
+++ b/erts/emulator/zlib/inflate.c
@@ -1,5 +1,5 @@
 /* inflate.c -- zlib decompression
- * Copyright (C) 1995-2012 Mark Adler
+ * Copyright (C) 1995-2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -80,9 +80,6 @@
  * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
  */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "zutil.h"
 #include "inftrees.h"
 #include "inflate.h"
@@ -95,6 +92,7 @@
 #endif
 
 /* function prototypes */
+local int inflateStateCheck OF((z_streamp strm));
 local void fixedtables OF((struct inflate_state FAR *state));
 local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
                            unsigned copy));
@@ -104,12 +102,26 @@ local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
 local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
                               unsigned len));
 
+local int inflateStateCheck(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (strm == Z_NULL ||
+        strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
+        return 1;
+    state = (struct inflate_state FAR *)strm->state;
+    if (state == Z_NULL || state->strm != strm ||
+        state->mode < HEAD || state->mode > SYNC)
+        return 1;
+    return 0;
+}
+
 int ZEXPORT inflateResetKeep(strm)
 z_streamp strm;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     strm->total_in = strm->total_out = state->total = 0;
     strm->msg = Z_NULL;
@@ -134,7 +146,7 @@ z_streamp strm;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     state->wsize = 0;
     state->whave = 0;
@@ -150,7 +162,7 @@ int windowBits;
     struct inflate_state FAR *state;
 
     /* get the state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
 
     /* extract wrap request from windowBits parameter */
@@ -159,7 +171,7 @@ int windowBits;
         windowBits = -windowBits;
     }
     else {
-        wrap = (windowBits >> 4) + 1;
+        wrap = (windowBits >> 4) + 5;
 #ifdef GUNZIP
         if (windowBits < 48)
             windowBits &= 15;
@@ -213,7 +225,9 @@ int stream_size;
     if (state == Z_NULL) return Z_MEM_ERROR;
     Tracev((stderr, "inflate: allocated\n"));
     strm->state = (struct internal_state FAR *)state;
+    state->strm = strm;
     state->window = Z_NULL;
+    state->mode = HEAD;     /* to pass state test in inflateReset2() */
     ret = inflateReset2(strm, windowBits);
     if (ret != Z_OK) {
         ZFREE(strm, state);
@@ -237,17 +251,17 @@ int value;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (bits < 0) {
         state->hold = 0;
         state->bits = 0;
         return Z_OK;
     }
-    if (bits > 16 || state->bits + bits > 32) return Z_STREAM_ERROR;
+    if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
     value &= (1L << bits) - 1;
-    state->hold += value << state->bits;
-    state->bits += bits;
+    state->hold += (unsigned)value << state->bits;
+    state->bits += (uInt)bits;
     return Z_OK;
 }
 
@@ -628,7 +642,7 @@ int flush;
     static const unsigned short order[19] = /* permutation of code lengths */
         {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
 
-    if (strm == Z_NULL || strm->state == Z_NULL || strm->next_out == Z_NULL ||
+    if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
         (strm->next_in == Z_NULL && strm->avail_in != 0))
         return Z_STREAM_ERROR;
 
@@ -648,6 +662,8 @@ int flush;
             NEEDBITS(16);
 #ifdef GUNZIP
             if ((state->wrap & 2) && hold == 0x8b1f) {  /* gzip header */
+                if (state->wbits == 0)
+                    state->wbits = 15;
                 state->check = crc32(0L, Z_NULL, 0);
                 CRC2(state->check, hold);
                 INITBITS();
@@ -675,7 +691,7 @@ int flush;
             len = BITS(4) + 8;
             if (state->wbits == 0)
                 state->wbits = len;
-            else if (len > state->wbits) {
+            if (len > 15 || len > state->wbits) {
                 strm->msg = (char *)"invalid window size";
                 state->mode = BAD;
                 break;
@@ -702,14 +718,16 @@ int flush;
             }
             if (state->head != Z_NULL)
                 state->head->text = (int)((hold >> 8) & 1);
-            if (state->flags & 0x0200) CRC2(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
             INITBITS();
             state->mode = TIME;
         case TIME:
             NEEDBITS(32);
             if (state->head != Z_NULL)
                 state->head->time = hold;
-            if (state->flags & 0x0200) CRC4(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC4(state->check, hold);
             INITBITS();
             state->mode = OS;
         case OS:
@@ -718,7 +736,8 @@ int flush;
                 state->head->xflags = (int)(hold & 0xff);
                 state->head->os = (int)(hold >> 8);
             }
-            if (state->flags & 0x0200) CRC2(state->check, hold);
+            if ((state->flags & 0x0200) && (state->wrap & 4))
+                CRC2(state->check, hold);
             INITBITS();
             state->mode = EXLEN;
         case EXLEN:
@@ -727,7 +746,8 @@ int flush;
                 state->length = (unsigned)(hold);
                 if (state->head != Z_NULL)
                     state->head->extra_len = (unsigned)hold;
-                if (state->flags & 0x0200) CRC2(state->check, hold);
+                if ((state->flags & 0x0200) && (state->wrap & 4))
+                    CRC2(state->check, hold);
                 INITBITS();
             }
             else if (state->head != Z_NULL)
@@ -745,7 +765,7 @@ int flush;
                                 len + copy > state->head->extra_max ?
                                 state->head->extra_max - len : copy);
                     }
-                    if (state->flags & 0x0200)
+                    if ((state->flags & 0x0200) && (state->wrap & 4))
                         state->check = crc32(state->check, next, copy);
                     have -= copy;
                     next += copy;
@@ -764,9 +784,9 @@ int flush;
                     if (state->head != Z_NULL &&
                             state->head->name != Z_NULL &&
                             state->length < state->head->name_max)
-                        state->head->name[state->length++] = len;
+                        state->head->name[state->length++] = (Bytef)len;
                 } while (len && copy < have);
-                if (state->flags & 0x0200)
+                if ((state->flags & 0x0200) && (state->wrap & 4))
                     state->check = crc32(state->check, next, copy);
                 have -= copy;
                 next += copy;
@@ -785,9 +805,9 @@ int flush;
                     if (state->head != Z_NULL &&
                             state->head->comment != Z_NULL &&
                             state->length < state->head->comm_max)
-                        state->head->comment[state->length++] = len;
+                        state->head->comment[state->length++] = (Bytef)len;
                 } while (len && copy < have);
-                if (state->flags & 0x0200)
+                if ((state->flags & 0x0200) && (state->wrap & 4))
                     state->check = crc32(state->check, next, copy);
                 have -= copy;
                 next += copy;
@@ -799,7 +819,7 @@ int flush;
         case HCRC:
             if (state->flags & 0x0200) {
                 NEEDBITS(16);
-                if (hold != (state->check & 0xffff)) {
+                if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
                     strm->msg = (char *)"header crc mismatch";
                     state->mode = BAD;
                     break;
@@ -1180,11 +1200,11 @@ int flush;
                 out -= left;
                 strm->total_out += out;
                 state->total += out;
-                if (out)
+                if ((state->wrap & 4) && out)
                     strm->adler = state->check =
                         UPDATE(state->check, put - out, out);
                 out = left;
-                if ((
+                if ((state->wrap & 4) && (
 #ifdef GUNZIP
                      state->flags ? hold :
 #endif
@@ -1243,10 +1263,10 @@ int flush;
     strm->total_in += in;
     strm->total_out += out;
     state->total += out;
-    if (state->wrap && out)
+    if ((state->wrap & 4) && out)
         strm->adler = state->check =
             UPDATE(state->check, strm->next_out - out, out);
-    strm->data_type = state->bits + (state->last ? 64 : 0) +
+    strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
                       (state->mode == TYPE ? 128 : 0) +
                       (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
     if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
@@ -1258,7 +1278,7 @@ int ZEXPORT inflateEnd(strm)
 z_streamp strm;
 {
     struct inflate_state FAR *state;
-    if (strm == Z_NULL || strm->state == Z_NULL || strm->zfree == (free_func)0)
+    if (inflateStateCheck(strm))
         return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (state->window != Z_NULL) ZFREE(strm, state->window);
@@ -1276,7 +1296,7 @@ uInt *dictLength;
     struct inflate_state FAR *state;
 
     /* check state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
 
     /* copy dictionary */
@@ -1301,7 +1321,7 @@ uInt dictLength;
     int ret;
 
     /* check state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (state->wrap != 0 && state->mode != DICT)
         return Z_STREAM_ERROR;
@@ -1333,7 +1353,7 @@ gz_headerp head;
     struct inflate_state FAR *state;
 
     /* check state */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
 
@@ -1386,7 +1406,7 @@ z_streamp strm;
     struct inflate_state FAR *state;
 
     /* check parameters */
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
 
@@ -1433,7 +1453,7 @@ z_streamp strm;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
     return state->mode == STORED && state->bits == 0;
 }
@@ -1448,8 +1468,7 @@ z_streamp source;
     unsigned wsize;
 
     /* check input */
-    if (dest == Z_NULL || source == Z_NULL || source->state == Z_NULL ||
-        source->zalloc == (alloc_func)0 || source->zfree == (free_func)0)
+    if (inflateStateCheck(source) || dest == Z_NULL)
         return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)source->state;
 
@@ -1470,6 +1489,7 @@ z_streamp source;
     /* copy state */
     zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
     zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
+    copy->strm = dest;
     if (state->lencode >= state->codes &&
         state->lencode <= state->codes + ENOUGH - 1) {
         copy->lencode = copy->codes + (state->lencode - state->codes);
@@ -1491,25 +1511,51 @@ int subvert;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return Z_STREAM_ERROR;
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
     state = (struct inflate_state FAR *)strm->state;
-    state->sane = !subvert;
 #ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
+    state->sane = !subvert;
     return Z_OK;
 #else
+    (void)subvert;
     state->sane = 1;
     return Z_DATA_ERROR;
 #endif
 }
 
+int ZEXPORT inflateValidate(strm, check)
+z_streamp strm;
+int check;
+{
+    struct inflate_state FAR *state;
+
+    if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
+    state = (struct inflate_state FAR *)strm->state;
+    if (check)
+        state->wrap |= 4;
+    else
+        state->wrap &= ~4;
+    return Z_OK;
+}
+
 long ZEXPORT inflateMark(strm)
 z_streamp strm;
 {
     struct inflate_state FAR *state;
 
-    if (strm == Z_NULL || strm->state == Z_NULL) return -1L << 16;
+    if (inflateStateCheck(strm))
+        return -(1L << 16);
     state = (struct inflate_state FAR *)strm->state;
-    return ((long)(state->back) << 16) +
+    return (long)(((unsigned long)((long)state->back)) << 16) +
         (state->mode == COPY ? state->length :
             (state->mode == MATCH ? state->was - state->length : 0));
 }
+
+unsigned long ZEXPORT inflateCodesUsed(strm)
+z_streamp strm;
+{
+    struct inflate_state FAR *state;
+    if (inflateStateCheck(strm)) return (unsigned long)-1;
+    state = (struct inflate_state FAR *)strm->state;
+    return (unsigned long)(state->next - state->codes);
+}
diff --git a/erts/emulator/zlib/inflate.h b/erts/emulator/zlib/inflate.h
index 95f4986d40..a46cce6b6d 100644
--- a/erts/emulator/zlib/inflate.h
+++ b/erts/emulator/zlib/inflate.h
@@ -1,5 +1,5 @@
 /* inflate.h -- internal inflate state definition
- * Copyright (C) 1995-2009 Mark Adler
+ * Copyright (C) 1995-2016 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -18,7 +18,7 @@
 
 /* Possible inflate modes between inflate() calls */
 typedef enum {
-    HEAD,       /* i: waiting for magic header */
+    HEAD = 16180,   /* i: waiting for magic header */
     FLAGS,      /* i: waiting for method and flags (gzip) */
     TIME,       /* i: waiting for modification time (gzip) */
     OS,         /* i: waiting for extra flags and operating system (gzip) */
@@ -77,11 +77,14 @@ typedef enum {
         CHECK -> LENGTH -> DONE
  */
 
-/* state maintained between inflate() calls.  Approximately 10K bytes. */
+/* State maintained between inflate() calls -- approximately 7K bytes, not
+   including the allocated sliding window, which is up to 32K bytes. */
 struct inflate_state {
+    z_streamp strm;             /* pointer back to this zlib stream */
     inflate_mode mode;          /* current inflate mode */
     int last;                   /* true if processing last block */
-    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip */
+    int wrap;                   /* bit 0 true for zlib, bit 1 true for gzip,
+                                   bit 2 true to validate check value */
     int havedict;               /* true if dictionary provided */
     int flags;                  /* gzip header method and flags (0 if zlib) */
     unsigned dmax;              /* zlib header max distance (INFLATE_STRICT) */
diff --git a/erts/emulator/zlib/inftrees.c b/erts/emulator/zlib/inftrees.c
index 3766fa2646..2ea08fc13e 100644
--- a/erts/emulator/zlib/inftrees.c
+++ b/erts/emulator/zlib/inftrees.c
@@ -1,18 +1,15 @@
 /* inftrees.c -- generate Huffman trees for efficient decoding
- * Copyright (C) 1995-2013 Mark Adler
+ * Copyright (C) 1995-2017 Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "zutil.h"
 #include "inftrees.h"
 
 #define MAXBITS 15
 
 const char inflate_copyright[] =
-   " inflate 1.2.8 Copyright 1995-2013 Mark Adler ";
+   " inflate 1.2.11 Copyright 1995-2017 Mark Adler ";
 /*
   If you use the zlib library in a product, an acknowledgment is welcome
   in the documentation of your product. If for some reason you cannot
@@ -57,7 +54,7 @@ unsigned short FAR *work;
     code FAR *next;             /* next available space in table */
     const unsigned short FAR *base;     /* base value table to use */
     const unsigned short FAR *extra;    /* extra bits table to use */
-    int end;                    /* use base and extra for symbol > end */
+    unsigned match;             /* use base and extra for symbol >= match */
     unsigned short count[MAXBITS+1];    /* number of codes of each length */
     unsigned short offs[MAXBITS+1];     /* offsets in table for each length */
     static const unsigned short lbase[31] = { /* Length codes 257..285 base */
@@ -65,7 +62,7 @@ unsigned short FAR *work;
         35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
     static const unsigned short lext[31] = { /* Length codes 257..285 extra */
         16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 18, 18, 18, 18,
-        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 72, 78};
+        19, 19, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 16, 77, 202};
     static const unsigned short dbase[32] = { /* Distance codes 0..29 base */
         1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
         257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
@@ -184,19 +181,17 @@ unsigned short FAR *work;
     switch (type) {
     case CODES:
         base = extra = work;    /* dummy value--not used */
-        end = 19;
+        match = 20;
         break;
     case LENS:
         base = lbase;
-        base -= 257;
         extra = lext;
-        extra -= 257;
-        end = 256;
+        match = 257;
         break;
-    default:            /* DISTS */
+    default:    /* DISTS */
         base = dbase;
         extra = dext;
-        end = -1;
+        match = 0;
     }
 
     /* initialize state for loop */
@@ -219,13 +214,13 @@ unsigned short FAR *work;
     for (;;) {
         /* create table entry */
         here.bits = (unsigned char)(len - drop);
-        if ((int)(work[sym]) < end) {
+        if (work[sym] + 1U < match) {
             here.op = (unsigned char)0;
             here.val = work[sym];
         }
-        else if ((int)(work[sym]) > end) {
-            here.op = (unsigned char)(extra[work[sym]]);
-            here.val = base[work[sym]];
+        else if (work[sym] >= match) {
+            here.op = (unsigned char)(extra[work[sym] - match]);
+            here.val = base[work[sym] - match];
         }
         else {
             here.op = (unsigned char)(32 + 64);         /* end of block */
diff --git a/erts/emulator/zlib/trees.c b/erts/emulator/zlib/trees.c
index 465e944e5b..50cf4b4571 100644
--- a/erts/emulator/zlib/trees.c
+++ b/erts/emulator/zlib/trees.c
@@ -1,5 +1,5 @@
 /* trees.c -- output deflated data using Huffman coding
- * Copyright (C) 1995-2012 Jean-loup Gailly
+ * Copyright (C) 1995-2017 Jean-loup Gailly
  * detect_data_type() function provided freely by Cosmin Truta, 2006
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
@@ -34,12 +34,9 @@
 
 /* #define GEN_TREES_H */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "deflate.h"
 
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 #  include <ctype.h>
 #endif
 
@@ -125,13 +122,13 @@ struct static_tree_desc_s {
     int     max_length;          /* max bit length for the codes */
 };
 
-local static_tree_desc  static_l_desc =
+local const static_tree_desc  static_l_desc =
 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
 
-local static_tree_desc  static_d_desc =
+local const static_tree_desc  static_d_desc =
 {static_dtree, extra_dbits, 0,          D_CODES, MAX_BITS};
 
-local static_tree_desc  static_bl_desc =
+local const static_tree_desc  static_bl_desc =
 {(const ct_data *)0, extra_blbits, 0,   BL_CODES, MAX_BL_BITS};
 
 /* ===========================================================================
@@ -155,18 +152,16 @@ local int  detect_data_type OF((deflate_state *s));
 local unsigned bi_reverse OF((unsigned value, int length));
 local void bi_windup      OF((deflate_state *s));
 local void bi_flush       OF((deflate_state *s));
-local void copy_block     OF((deflate_state *s, charf *buf, unsigned len,
-                              int header));
 
 #ifdef GEN_TREES_H
 local void gen_trees_header OF((void));
 #endif
 
-#ifndef DEBUG
+#ifndef ZLIB_DEBUG
 #  define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
    /* Send a code of the given tree. c and tree must not have side effects */
 
-#else /* DEBUG */
+#else /* !ZLIB_DEBUG */
 #  define send_code(s, c, tree) \
      { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
        send_bits(s, tree[c].Code, tree[c].Len); }
@@ -185,7 +180,7 @@ local void gen_trees_header OF((void));
  * Send a value on a given number of bits.
  * IN assertion: length <= 16 and value fits in length bits.
  */
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 local void send_bits      OF((deflate_state *s, int value, int length));
 
 local void send_bits(s, value, length)
@@ -211,12 +206,12 @@ local void send_bits(s, value, length)
         s->bi_valid += length;
     }
 }
-#else /* !DEBUG */
+#else /* !ZLIB_DEBUG */
 
 #define send_bits(s, value, length) \
 { int len = length;\
   if (s->bi_valid > (int)Buf_size - len) {\
-    int val = value;\
+    int val = (int)value;\
     s->bi_buf |= (ush)val << s->bi_valid;\
     put_short(s, s->bi_buf);\
     s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
@@ -226,7 +221,7 @@ local void send_bits(s, value, length)
     s->bi_valid += len;\
   }\
 }
-#endif /* DEBUG */
+#endif /* ZLIB_DEBUG */
 
 
 /* the arguments must not have side effects */
@@ -320,7 +315,7 @@ local void tr_static_init()
  * Genererate the file trees.h describing the static trees.
  */
 #ifdef GEN_TREES_H
-#  ifndef DEBUG
+#  ifndef ZLIB_DEBUG
 #    include <stdio.h>
 #  endif
 
@@ -397,7 +392,7 @@ void ZLIB_INTERNAL _tr_init(s)
 
     s->bi_buf = 0;
     s->bi_valid = 0;
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     s->compressed_len = 0L;
     s->bits_sent = 0L;
 #endif
@@ -525,12 +520,12 @@ local void gen_bitlen(s, desc)
         xbits = 0;
         if (n >= base) xbits = extra[n-base];
         f = tree[n].Freq;
-        s->opt_len += (ulg)f * (bits + xbits);
-        if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
+        s->opt_len += (ulg)f * (unsigned)(bits + xbits);
+        if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
     }
     if (overflow == 0) return;
 
-    Trace((stderr,"\nbit length overflow\n"));
+    Tracev((stderr,"\nbit length overflow\n"));
     /* This happens for example on obj2 and pic of the Calgary corpus */
 
     /* Find the first bit length which could increase: */
@@ -557,9 +552,8 @@ local void gen_bitlen(s, desc)
             m = s->heap[--h];
             if (m > max_code) continue;
             if ((unsigned) tree[m].Len != (unsigned) bits) {
-                Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
-                s->opt_len += ((long)bits - (long)tree[m].Len)
-                              *(long)tree[m].Freq;
+                Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
+                s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
                 tree[m].Len = (ush)bits;
             }
             n--;
@@ -581,7 +575,7 @@ local void gen_codes (tree, max_code, bl_count)
     ushf *bl_count;            /* number of codes at each bit length */
 {
     ush next_code[MAX_BITS+1]; /* next code value for each bit length */
-    ush code = 0;              /* running code value */
+    unsigned code = 0;         /* running code value */
     int bits;                  /* bit index */
     int n;                     /* code index */
 
@@ -589,7 +583,8 @@ local void gen_codes (tree, max_code, bl_count)
      * without bit reversal.
      */
     for (bits = 1; bits <= MAX_BITS; bits++) {
-        next_code[bits] = code = (code + bl_count[bits-1]) << 1;
+        code = (code + bl_count[bits-1]) << 1;
+        next_code[bits] = (ush)code;
     }
     /* Check that the bit counts in bl_count are consistent. The last code
      * must be all ones.
@@ -602,7 +597,7 @@ local void gen_codes (tree, max_code, bl_count)
         int len = tree[n].Len;
         if (len == 0) continue;
         /* Now reverse the bits */
-        tree[n].Code = bi_reverse(next_code[len]++, len);
+        tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
 
         Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
              n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
@@ -824,7 +819,7 @@ local int build_bl_tree(s)
         if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
     }
     /* Update opt_len to include the bit length tree and counts */
-    s->opt_len += 3*(max_blindex+1) + 5+5+4;
+    s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
     Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
             s->opt_len, s->static_len));
 
@@ -872,11 +867,17 @@ void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
     int last;         /* one if this is the last block for a file */
 {
     send_bits(s, (STORED_BLOCK<<1)+last, 3);    /* send block type */
-#ifdef DEBUG
+    bi_windup(s);        /* align on byte boundary */
+    put_short(s, (ush)stored_len);
+    put_short(s, (ush)~stored_len);
+    zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
+    s->pending += stored_len;
+#ifdef ZLIB_DEBUG
     s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
     s->compressed_len += (stored_len + 4) << 3;
+    s->bits_sent += 2*16;
+    s->bits_sent += stored_len<<3;
 #endif
-    copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
 }
 
 /* ===========================================================================
@@ -897,7 +898,7 @@ void ZLIB_INTERNAL _tr_align(s)
 {
     send_bits(s, STATIC_TREES<<1, 3);
     send_code(s, END_BLOCK, static_ltree);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
 #endif
     bi_flush(s);
@@ -905,7 +906,7 @@ void ZLIB_INTERNAL _tr_align(s)
 
 /* ===========================================================================
  * Determine the best encoding for the current block: dynamic trees, static
- * trees or store, and output the encoded block to the zip file.
+ * trees or store, and write out the encoded block.
  */
 void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
     deflate_state *s;
@@ -977,7 +978,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
         send_bits(s, (STATIC_TREES<<1)+last, 3);
         compress_block(s, (const ct_data *)static_ltree,
                        (const ct_data *)static_dtree);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
         s->compressed_len += 3 + s->static_len;
 #endif
     } else {
@@ -986,7 +987,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
                        max_blindex+1);
         compress_block(s, (const ct_data *)s->dyn_ltree,
                        (const ct_data *)s->dyn_dtree);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
         s->compressed_len += 3 + s->opt_len;
 #endif
     }
@@ -998,7 +999,7 @@ void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
 
     if (last) {
         bi_windup(s);
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
         s->compressed_len += 7;  /* align on byte boundary */
 #endif
     }
@@ -1093,7 +1094,7 @@ local void compress_block(s, ltree, dtree)
             send_code(s, code, dtree);       /* send the distance code */
             extra = extra_dbits[code];
             if (extra != 0) {
-                dist -= base_dist[code];
+                dist -= (unsigned)base_dist[code];
                 send_bits(s, dist, extra);   /* send the extra distance bits */
             }
         } /* literal or match pair ? */
@@ -1196,34 +1197,7 @@ local void bi_windup(s)
     }
     s->bi_buf = 0;
     s->bi_valid = 0;
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     s->bits_sent = (s->bits_sent+7) & ~7;
 #endif
 }
-
-/* ===========================================================================
- * Copy a stored block, storing first the length and its
- * one's complement if requested.
- */
-local void copy_block(s, buf, len, header)
-    deflate_state *s;
-    charf    *buf;    /* the input data */
-    unsigned len;     /* its length */
-    int      header;  /* true if block header must be written */
-{
-    bi_windup(s);        /* align on byte boundary */
-
-    if (header) {
-        put_short(s, (ush)len);
-        put_short(s, (ush)~len);
-#ifdef DEBUG
-        s->bits_sent += 2*16;
-#endif
-    }
-#ifdef DEBUG
-    s->bits_sent += (ulg)len<<3;
-#endif
-    while (len--) {
-        put_byte(s, *buf++);
-    }
-}
diff --git a/erts/emulator/zlib/uncompr.c b/erts/emulator/zlib/uncompr.c
index 864d571719..f03a1a865e 100644
--- a/erts/emulator/zlib/uncompr.c
+++ b/erts/emulator/zlib/uncompr.c
@@ -1,62 +1,93 @@
 /* uncompr.c -- decompress a memory buffer
- * Copyright (C) 1995-2003, 2010 Jean-loup Gailly.
+ * Copyright (C) 1995-2003, 2010, 2014, 2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #define ZLIB_INTERNAL
 #include "zlib.h"
 
 /* ===========================================================================
-     Decompresses the source buffer into the destination buffer.  sourceLen is
-   the byte length of the source buffer. Upon entry, destLen is the total
-   size of the destination buffer, which must be large enough to hold the
-   entire uncompressed data. (The size of the uncompressed data must have
-   been saved previously by the compressor and transmitted to the decompressor
-   by some mechanism outside the scope of this compression library.)
-   Upon exit, destLen is the actual size of the compressed buffer.
-
-     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
-   enough memory, Z_BUF_ERROR if there was not enough room in the output
-   buffer, or Z_DATA_ERROR if the input data was corrupted.
+     Decompresses the source buffer into the destination buffer.  *sourceLen is
+   the byte length of the source buffer. Upon entry, *destLen is the total size
+   of the destination buffer, which must be large enough to hold the entire
+   uncompressed data. (The size of the uncompressed data must have been saved
+   previously by the compressor and transmitted to the decompressor by some
+   mechanism outside the scope of this compression library.) Upon exit,
+   *destLen is the size of the decompressed data and *sourceLen is the number
+   of source bytes consumed. Upon return, source + *sourceLen points to the
+   first unused input byte.
+
+     uncompress returns Z_OK if success, Z_MEM_ERROR if there was not enough
+   memory, Z_BUF_ERROR if there was not enough room in the output buffer, or
+   Z_DATA_ERROR if the input data was corrupted, including if the input data is
+   an incomplete zlib stream.
 */
-int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+int ZEXPORT uncompress2 (dest, destLen, source, sourceLen)
     Bytef *dest;
     uLongf *destLen;
     const Bytef *source;
-    uLong sourceLen;
+    uLong *sourceLen;
 {
     z_stream stream;
     int err;
+    const uInt max = (uInt)-1;
+    uLong len, left;
+    Byte buf[1];    /* for detection of incomplete stream when *destLen == 0 */
 
-    stream.next_in = (z_const Bytef *)source;
-    stream.avail_in = (uInt)sourceLen;
-    /* Check for source > 64K on 16-bit machine: */
-    if ((uLong)stream.avail_in != sourceLen) return Z_BUF_ERROR;
-
-    stream.next_out = dest;
-    stream.avail_out = (uInt)*destLen;
-    if ((uLong)stream.avail_out != *destLen) return Z_BUF_ERROR;
+    len = *sourceLen;
+    if (*destLen) {
+        left = *destLen;
+        *destLen = 0;
+    }
+    else {
+        left = 1;
+        dest = buf;
+    }
 
+    stream.next_in = (z_const Bytef *)source;
+    stream.avail_in = 0;
     stream.zalloc = (alloc_func)0;
     stream.zfree = (free_func)0;
+    stream.opaque = (voidpf)0;
 
     err = inflateInit(&stream);
     if (err != Z_OK) return err;
 
-    err = inflate(&stream, Z_FINISH);
-    if (err != Z_STREAM_END) {
-        inflateEnd(&stream);
-        if (err == Z_NEED_DICT || (err == Z_BUF_ERROR && stream.avail_in == 0))
-            return Z_DATA_ERROR;
-        return err;
-    }
-    *destLen = stream.total_out;
+    stream.next_out = dest;
+    stream.avail_out = 0;
 
-    err = inflateEnd(&stream);
-    return err;
+    do {
+        if (stream.avail_out == 0) {
+            stream.avail_out = left > (uLong)max ? max : (uInt)left;
+            left -= stream.avail_out;
+        }
+        if (stream.avail_in == 0) {
+            stream.avail_in = len > (uLong)max ? max : (uInt)len;
+            len -= stream.avail_in;
+        }
+        err = inflate(&stream, Z_NO_FLUSH);
+    } while (err == Z_OK);
+
+    *sourceLen -= len + stream.avail_in;
+    if (dest != buf)
+        *destLen = stream.total_out;
+    else if (stream.total_out && err == Z_BUF_ERROR)
+        left = 1;
+
+    inflateEnd(&stream);
+    return err == Z_STREAM_END ? Z_OK :
+           err == Z_NEED_DICT ? Z_DATA_ERROR  :
+           err == Z_BUF_ERROR && left + stream.avail_out ? Z_DATA_ERROR :
+           err;
+}
+
+int ZEXPORT uncompress (dest, destLen, source, sourceLen)
+    Bytef *dest;
+    uLongf *destLen;
+    const Bytef *source;
+    uLong sourceLen;
+{
+    return uncompress2(dest, destLen, source, &sourceLen);
 }
diff --git a/erts/emulator/zlib/zconf.h b/erts/emulator/zlib/zconf.h
index 9987a77553..5e1d68a004 100644
--- a/erts/emulator/zlib/zconf.h
+++ b/erts/emulator/zlib/zconf.h
@@ -1,5 +1,5 @@
 /* zconf.h -- configuration of the zlib compression library
- * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -17,7 +17,7 @@
 #ifdef Z_PREFIX     /* may be set to #if 1 by ./configure */
 #  define Z_PREFIX_SET
 
-/* all linked symbols */
+/* all linked symbols and init macros */
 #  define _dist_code            z__dist_code
 #  define _length_code          z__length_code
 #  define _tr_align             z__tr_align
@@ -29,6 +29,7 @@
 #  define adler32               z_adler32
 #  define adler32_combine       z_adler32_combine
 #  define adler32_combine64     z_adler32_combine64
+#  define adler32_z             z_adler32_z
 #  ifndef Z_SOLO
 #    define compress              z_compress
 #    define compress2             z_compress2
@@ -37,10 +38,14 @@
 #  define crc32                 z_crc32
 #  define crc32_combine         z_crc32_combine
 #  define crc32_combine64       z_crc32_combine64
+#  define crc32_z               z_crc32_z
 #  define deflate               z_deflate
 #  define deflateBound          z_deflateBound
 #  define deflateCopy           z_deflateCopy
 #  define deflateEnd            z_deflateEnd
+#  define deflateGetDictionary  z_deflateGetDictionary
+#  define deflateInit           z_deflateInit
+#  define deflateInit2          z_deflateInit2
 #  define deflateInit2_         z_deflateInit2_
 #  define deflateInit_          z_deflateInit_
 #  define deflateParams         z_deflateParams
@@ -67,6 +72,8 @@
 #    define gzeof                 z_gzeof
 #    define gzerror               z_gzerror
 #    define gzflush               z_gzflush
+#    define gzfread               z_gzfread
+#    define gzfwrite              z_gzfwrite
 #    define gzgetc                z_gzgetc
 #    define gzgetc_               z_gzgetc_
 #    define gzgets                z_gzgets
@@ -78,7 +85,6 @@
 #      define gzopen_w              z_gzopen_w
 #    endif
 #    define gzprintf              z_gzprintf
-#    define gzvprintf             z_gzvprintf
 #    define gzputc                z_gzputc
 #    define gzputs                z_gzputs
 #    define gzread                z_gzread
@@ -89,32 +95,39 @@
 #    define gztell                z_gztell
 #    define gztell64              z_gztell64
 #    define gzungetc              z_gzungetc
+#    define gzvprintf             z_gzvprintf
 #    define gzwrite               z_gzwrite
 #  endif
 #  define inflate               z_inflate
 #  define inflateBack           z_inflateBack
 #  define inflateBackEnd        z_inflateBackEnd
+#  define inflateBackInit       z_inflateBackInit
 #  define inflateBackInit_      z_inflateBackInit_
+#  define inflateCodesUsed      z_inflateCodesUsed
 #  define inflateCopy           z_inflateCopy
 #  define inflateEnd            z_inflateEnd
+#  define inflateGetDictionary  z_inflateGetDictionary
 #  define inflateGetHeader      z_inflateGetHeader
+#  define inflateInit           z_inflateInit
+#  define inflateInit2          z_inflateInit2
 #  define inflateInit2_         z_inflateInit2_
 #  define inflateInit_          z_inflateInit_
 #  define inflateMark           z_inflateMark
 #  define inflatePrime          z_inflatePrime
 #  define inflateReset          z_inflateReset
 #  define inflateReset2         z_inflateReset2
+#  define inflateResetKeep      z_inflateResetKeep
 #  define inflateSetDictionary  z_inflateSetDictionary
-#  define inflateGetDictionary  z_inflateGetDictionary
 #  define inflateSync           z_inflateSync
 #  define inflateSyncPoint      z_inflateSyncPoint
 #  define inflateUndermine      z_inflateUndermine
-#  define inflateResetKeep      z_inflateResetKeep
+#  define inflateValidate       z_inflateValidate
 #  define inflate_copyright     z_inflate_copyright
 #  define inflate_fast          z_inflate_fast
 #  define inflate_table         z_inflate_table
 #  ifndef Z_SOLO
 #    define uncompress            z_uncompress
+#    define uncompress2           z_uncompress2
 #  endif
 #  define zError                z_zError
 #  ifndef Z_SOLO
@@ -224,9 +237,19 @@
 #  define z_const
 #endif
 
-/* Some Mac compilers merge all .h files incorrectly: */
-#if defined(__MWERKS__)||defined(applec)||defined(THINK_C)||defined(__SC__)
-#  define NO_DUMMY_DECL
+#ifdef Z_SOLO
+   typedef unsigned long z_size_t;
+#else
+#  define z_longlong long long
+#  if defined(NO_SIZE_T)
+     typedef unsigned NO_SIZE_T z_size_t;
+#  elif defined(STDC)
+#    include <stddef.h>
+     typedef size_t z_size_t;
+#  else
+     typedef unsigned long z_size_t;
+#  endif
+#  undef z_longlong
 #endif
 
 /* Maximum value for memLevel in deflateInit2 */
@@ -256,7 +279,7 @@
  Of course this will generally degrade compression (there's no free lunch).
 
    The memory requirements for inflate are (in bytes) 1 << windowBits
- that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ that is, 32K for windowBits=15 (default value) plus about 7 kilobytes
  for small objects.
 */
 
diff --git a/erts/emulator/zlib/zlib.h b/erts/emulator/zlib/zlib.h
index 3e0c7672ac..f09cdaf1e0 100644
--- a/erts/emulator/zlib/zlib.h
+++ b/erts/emulator/zlib/zlib.h
@@ -1,7 +1,7 @@
 /* zlib.h -- interface of the 'zlib' general purpose compression library
-  version 1.2.8, April 28th, 2013
+  version 1.2.11, January 15th, 2017
 
-  Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
 
   This software is provided 'as-is', without any express or implied
   warranty.  In no event will the authors be held liable for any damages
@@ -37,11 +37,11 @@
 extern "C" {
 #endif
 
-#define ZLIB_VERSION "1.2.8"
-#define ZLIB_VERNUM 0x1280
+#define ZLIB_VERSION "1.2.11"
+#define ZLIB_VERNUM 0x12b0
 #define ZLIB_VER_MAJOR 1
 #define ZLIB_VER_MINOR 2
-#define ZLIB_VER_REVISION 8
+#define ZLIB_VER_REVISION 11
 #define ZLIB_VER_SUBREVISION 0
 
 /*
@@ -65,7 +65,8 @@ extern "C" {
   with "gz".  The gzip format is different from the zlib format.  gzip is a
   gzip wrapper, documented in RFC 1952, wrapped around a deflate stream.
 
-    This library can optionally read and write gzip streams in memory as well.
+    This library can optionally read and write gzip and raw deflate streams in
+  memory as well.
 
     The zlib format was designed to be compact and fast for use in memory
   and on communications channels.  The gzip format was designed for single-
@@ -74,7 +75,7 @@ extern "C" {
 
     The library does not install any signal handler.  The decoder checks
   the consistency of the compressed data, so the library should never crash
-  even in case of corrupted input.
+  even in the case of corrupted input.
 */
 
 typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
@@ -87,7 +88,7 @@ typedef struct z_stream_s {
     uInt     avail_in;  /* number of bytes available at next_in */
     uLong    total_in;  /* total number of input bytes read so far */
 
-    Bytef    *next_out; /* next output byte should be put there */
+    Bytef    *next_out; /* next output byte will go here */
     uInt     avail_out; /* remaining free space at next_out */
     uLong    total_out; /* total number of bytes output so far */
 
@@ -98,8 +99,9 @@ typedef struct z_stream_s {
     free_func  zfree;   /* used to free the internal state */
     voidpf     opaque;  /* private data object passed to zalloc and zfree */
 
-    int     data_type;  /* best guess about the data type: binary or text */
-    uLong   adler;      /* adler32 value of the uncompressed data */
+    int     data_type;  /* best guess about the data type: binary or text
+                           for deflate, or the decoding state for inflate */
+    uLong   adler;      /* Adler-32 or CRC-32 value of the uncompressed data */
     uLong   reserved;   /* reserved for future use */
 } z_stream;
 
@@ -142,7 +144,9 @@ typedef gz_header FAR *gz_headerp;
 
      zalloc must return Z_NULL if there is not enough memory for the object.
    If zlib is used in a multi-threaded application, zalloc and zfree must be
-   thread safe.
+   thread safe.  In that case, zlib is thread-safe.  When zalloc and zfree are
+   Z_NULL on entry to the initialization function, they are set to internal
+   routines that use the standard library functions malloc() and free().
 
      On 16-bit systems, the functions zalloc and zfree must be able to allocate
    exactly 65536 bytes, but will not be required to allocate more than this if
@@ -155,7 +159,7 @@ typedef gz_header FAR *gz_headerp;
 
      The fields total_in and total_out can be used for statistics or progress
    reports.  After compression, total_in holds the total size of the
-   uncompressed data and may be saved for use in the decompressor (particularly
+   uncompressed data and may be saved for use by the decompressor (particularly
    if the decompressor wants to decompress everything in a single step).
 */
 
@@ -200,7 +204,7 @@ typedef gz_header FAR *gz_headerp;
 #define Z_TEXT     1
 #define Z_ASCII    Z_TEXT   /* for compatibility with 1.2.2 and earlier */
 #define Z_UNKNOWN  2
-/* Possible values of the data_type field (though see inflate()) */
+/* Possible values of the data_type field for deflate() */
 
 #define Z_DEFLATED   8
 /* The deflate compression method (the only one supported in this version) */
@@ -258,11 +262,11 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
     enough room in the output buffer), next_in and avail_in are updated and
     processing will resume at this point for the next call of deflate().
 
-  - Provide more output starting at next_out and update next_out and avail_out
+  - Generate more output starting at next_out and update next_out and avail_out
     accordingly.  This action is forced if the parameter flush is non zero.
     Forcing flush frequently degrades the compression ratio, so this parameter
-    should be set only when necessary (in interactive applications).  Some
-    output may be provided even if flush is not set.
+    should be set only when necessary.  Some output may be provided even if
+    flush is zero.
 
     Before the call of deflate(), the application should ensure that at least
   one of the actions is possible, by providing more input and/or consuming more
@@ -271,7 +275,9 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
   output when it wants, for example when the output buffer is full (avail_out
   == 0), or after each call of deflate().  If deflate returns Z_OK and with
   zero avail_out, it must be called again after making room in the output
-  buffer because there might be more output pending.
+  buffer because there might be more output pending. See deflatePending(),
+  which can be used if desired to determine whether or not there is more ouput
+  in that case.
 
     Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to
   decide how much data to accumulate before producing output, in order to
@@ -292,8 +298,8 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
   input data so far will be available to the decompressor, as for Z_SYNC_FLUSH.
   This completes the current deflate block and follows it with an empty fixed
   codes block that is 10 bits long.  This assures that enough bytes are output
-  in order for the decompressor to finish the block before the empty fixed code
-  block.
+  in order for the decompressor to finish the block before the empty fixed
+  codes block.
 
     If flush is set to Z_BLOCK, a deflate block is completed and emitted, as
   for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to
@@ -319,34 +325,38 @@ ZEXTERN int ZEXPORT deflate OF((z_streamp strm, int flush));
 
     If the parameter flush is set to Z_FINISH, pending input is processed,
   pending output is flushed and deflate returns with Z_STREAM_END if there was
-  enough output space; if deflate returns with Z_OK, this function must be
-  called again with Z_FINISH and more output space (updated avail_out) but no
-  more input data, until it returns with Z_STREAM_END or an error.  After
-  deflate has returned Z_STREAM_END, the only possible operations on the stream
-  are deflateReset or deflateEnd.
-
-    Z_FINISH can be used immediately after deflateInit if all the compression
-  is to be done in a single step.  In this case, avail_out must be at least the
-  value returned by deflateBound (see below).  Then deflate is guaranteed to
-  return Z_STREAM_END.  If not enough output space is provided, deflate will
-  not return Z_STREAM_END, and it must be called again as described above.
-
-    deflate() sets strm->adler to the adler32 checksum of all input read
-  so far (that is, total_in bytes).
+  enough output space.  If deflate returns with Z_OK or Z_BUF_ERROR, this
+  function must be called again with Z_FINISH and more output space (updated
+  avail_out) but no more input data, until it returns with Z_STREAM_END or an
+  error.  After deflate has returned Z_STREAM_END, the only possible operations
+  on the stream are deflateReset or deflateEnd.
+
+    Z_FINISH can be used in the first deflate call after deflateInit if all the
+  compression is to be done in a single step.  In order to complete in one
+  call, avail_out must be at least the value returned by deflateBound (see
+  below).  Then deflate is guaranteed to return Z_STREAM_END.  If not enough
+  output space is provided, deflate will not return Z_STREAM_END, and it must
+  be called again as described above.
+
+    deflate() sets strm->adler to the Adler-32 checksum of all input read
+  so far (that is, total_in bytes).  If a gzip stream is being generated, then
+  strm->adler will be the CRC-32 checksum of the input read so far.  (See
+  deflateInit2 below.)
 
     deflate() may update strm->data_type if it can make a good guess about
-  the input data type (Z_BINARY or Z_TEXT).  In doubt, the data is considered
-  binary.  This field is only for information purposes and does not affect the
-  compression algorithm in any manner.
+  the input data type (Z_BINARY or Z_TEXT).  If in doubt, the data is
+  considered binary.  This field is only for information purposes and does not
+  affect the compression algorithm in any manner.
 
     deflate() returns Z_OK if some progress has been made (more input
   processed or more output produced), Z_STREAM_END if all input has been
   consumed and all output has been produced (only when flush is set to
   Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example
-  if next_in or next_out was Z_NULL), Z_BUF_ERROR if no progress is possible
-  (for example avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not
-  fatal, and deflate() can be called again with more input and more output
-  space to continue compressing.
+  if next_in or next_out was Z_NULL or the state was inadvertently written over
+  by the application), or Z_BUF_ERROR if no progress is possible (for example
+  avail_in or avail_out was zero).  Note that Z_BUF_ERROR is not fatal, and
+  deflate() can be called again with more input and more output space to
+  continue compressing.
 */
 
 
@@ -369,23 +379,21 @@ ZEXTERN int ZEXPORT inflateInit OF((z_streamp strm));
 
      Initializes the internal stream state for decompression.  The fields
    next_in, avail_in, zalloc, zfree and opaque must be initialized before by
-   the caller.  If next_in is not Z_NULL and avail_in is large enough (the
-   exact value depends on the compression method), inflateInit determines the
-   compression method from the zlib header and allocates all data structures
-   accordingly; otherwise the allocation will be deferred to the first call of
-   inflate.  If zalloc and zfree are set to Z_NULL, inflateInit updates them to
-   use default allocation functions.
+   the caller.  In the current version of inflate, the provided input is not
+   read or consumed.  The allocation of a sliding window will be deferred to
+   the first call of inflate (if the decompression does not complete on the
+   first call).  If zalloc and zfree are set to Z_NULL, inflateInit updates
+   them to use default allocation functions.
 
      inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough
    memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
    version assumed by the caller, or Z_STREAM_ERROR if the parameters are
    invalid, such as a null pointer to the structure.  msg is set to null if
-   there is no error message.  inflateInit does not perform any decompression
-   apart from possibly reading the zlib header if present: actual decompression
-   will be done by inflate().  (So next_in and avail_in may be modified, but
-   next_out and avail_out are unused and unchanged.) The current implementation
-   of inflateInit() does not process any header information -- that is deferred
-   until inflate() is called.
+   there is no error message.  inflateInit does not perform any decompression.
+   Actual decompression will be done by inflate().  So next_in, and avail_in,
+   next_out, and avail_out are unused and unchanged.  The current
+   implementation of inflateInit() does not process any header information --
+   that is deferred until inflate() is called.
 */
 
 
@@ -401,17 +409,20 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
 
   - Decompress more input starting at next_in and update next_in and avail_in
     accordingly.  If not all input can be processed (because there is not
-    enough room in the output buffer), next_in is updated and processing will
-    resume at this point for the next call of inflate().
+    enough room in the output buffer), then next_in and avail_in are updated
+    accordingly, and processing will resume at this point for the next call of
+    inflate().
 
-  - Provide more output starting at next_out and update next_out and avail_out
+  - Generate more output starting at next_out and update next_out and avail_out
     accordingly.  inflate() provides as much output as possible, until there is
     no more input data or no more space in the output buffer (see below about
     the flush parameter).
 
     Before the call of inflate(), the application should ensure that at least
   one of the actions is possible, by providing more input and/or consuming more
-  output, and updating the next_* and avail_* values accordingly.  The
+  output, and updating the next_* and avail_* values accordingly.  If the
+  caller of inflate() does not provide both available input and available
+  output space, it is possible that there will be no progress made.  The
   application can consume the uncompressed output when it wants, for example
   when the output buffer is full (avail_out == 0), or after each call of
   inflate().  If inflate returns Z_OK and with zero avail_out, it must be
@@ -428,7 +439,7 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
   gets to the end of that block, or when it runs out of data.
 
     The Z_BLOCK option assists in appending to or combining deflate streams.
-  Also to assist in this, on return inflate() will set strm->data_type to the
+  To assist in this, on return inflate() always sets strm->data_type to the
   number of unused bits in the last byte taken from strm->next_in, plus 64 if
   inflate() is currently decoding the last block in the deflate stream, plus
   128 if inflate() returned immediately after decoding an end-of-block code or
@@ -454,7 +465,7 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
   this case all pending input is processed and all pending output is flushed;
   avail_out must be large enough to hold all of the uncompressed data for the
   operation to complete.  (The size of the uncompressed data may have been
-  saved by the compressor for this purpose.) The use of Z_FINISH is not
+  saved by the compressor for this purpose.)  The use of Z_FINISH is not
   required to perform an inflation in one step.  However it may be used to
   inform inflate that a faster approach can be used for the single inflate()
   call.  Z_FINISH also informs inflate to not maintain a sliding window if the
@@ -476,32 +487,33 @@ ZEXTERN int ZEXPORT inflate OF((z_streamp strm, int flush));
   chosen by the compressor and returns Z_NEED_DICT; otherwise it sets
   strm->adler to the Adler-32 checksum of all output produced so far (that is,
   total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described
-  below.  At the end of the stream, inflate() checks that its computed adler32
+  below.  At the end of the stream, inflate() checks that its computed Adler-32
   checksum is equal to that saved by the compressor and returns Z_STREAM_END
   only if the checksum is correct.
 
     inflate() can decompress and check either zlib-wrapped or gzip-wrapped
   deflate data.  The header type is detected automatically, if requested when
   initializing with inflateInit2().  Any information contained in the gzip
-  header is not retained, so applications that need that information should
-  instead use raw inflate, see inflateInit2() below, or inflateBack() and
-  perform their own processing of the gzip header and trailer.  When processing
+  header is not retained unless inflateGetHeader() is used.  When processing
   gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output
-  producted so far.  The CRC-32 is checked against the gzip trailer.
+  produced so far.  The CRC-32 is checked against the gzip trailer, as is the
+  uncompressed length, modulo 2^32.
 
     inflate() returns Z_OK if some progress has been made (more input processed
   or more output produced), Z_STREAM_END if the end of the compressed data has
   been reached and all uncompressed output has been produced, Z_NEED_DICT if a
   preset dictionary is needed at this point, Z_DATA_ERROR if the input data was
   corrupted (input stream not conforming to the zlib format or incorrect check
-  value), Z_STREAM_ERROR if the stream structure was inconsistent (for example
-  next_in or next_out was Z_NULL), Z_MEM_ERROR if there was not enough memory,
-  Z_BUF_ERROR if no progress is possible or if there was not enough room in the
-  output buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
+  value, in which case strm->msg points to a string with a more specific
+  error), Z_STREAM_ERROR if the stream structure was inconsistent (for example
+  next_in or next_out was Z_NULL, or the state was inadvertently written over
+  by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR
+  if no progress was possible or if there was not enough room in the output
+  buffer when Z_FINISH is used.  Note that Z_BUF_ERROR is not fatal, and
   inflate() can be called again with more input and more output space to
   continue decompressing.  If Z_DATA_ERROR is returned, the application may
   then call inflateSync() to look for a good compression block if a partial
-  recovery of the data is desired.
+  recovery of the data is to be attempted.
 */
 
 
@@ -511,9 +523,8 @@ ZEXTERN int ZEXPORT inflateEnd OF((z_streamp strm));
    This function discards any unprocessed input and does not flush any pending
    output.
 
-     inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
-   was inconsistent.  In the error case, msg may be set but then points to a
-   static string (which must not be deallocated).
+     inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state
+   was inconsistent.
 */
 
 
@@ -544,16 +555,29 @@ ZEXTERN int ZEXPORT deflateInit2 OF((z_streamp strm,
    compression at the expense of memory usage.  The default value is 15 if
    deflateInit is used instead.
 
+     For the current implementation of deflate(), a windowBits value of 8 (a
+   window size of 256 bytes) is not supported.  As a result, a request for 8
+   will result in 9 (a 512-byte window).  In that case, providing 8 to
+   inflateInit2() will result in an error when the zlib header with 9 is
+   checked against the initialization of inflate().  The remedy is to not use 8
+   with deflateInit2() with this initialization, or at least in that case use 9
+   with inflateInit2().
+
      windowBits can also be -8..-15 for raw deflate.  In this case, -windowBits
    determines the window size.  deflate() will then generate raw deflate data
-   with no zlib header or trailer, and will not compute an adler32 check value.
+   with no zlib header or trailer, and will not compute a check value.
 
      windowBits can also be greater than 15 for optional gzip encoding.  Add
    16 to windowBits to write a simple gzip header and trailer around the
    compressed data instead of a zlib wrapper.  The gzip header will have no
    file name, no extra data, no comment, no modification time (set to zero), no
-   header crc, and the operating system will be set to 255 (unknown).  If a
-   gzip stream is being written, strm->adler is a crc32 instead of an adler32.
+   header crc, and the operating system will be set to the appropriate value,
+   if the operating system was determined at compile time.  If a gzip stream is
+   being written, strm->adler is a CRC-32 instead of an Adler-32.
+
+     For raw deflate or gzip encoding, a request for a 256-byte window is
+   rejected as invalid, since only the zlib header provides a means of
+   transmitting the window size to the decompressor.
 
      The memLevel parameter specifies how much memory should be allocated
    for the internal compression state.  memLevel=1 uses minimum memory but is
@@ -614,12 +638,12 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
    addition, the current implementation of deflate will use at most the window
    size minus 262 bytes of the provided dictionary.
 
-     Upon return of this function, strm->adler is set to the adler32 value
+     Upon return of this function, strm->adler is set to the Adler-32 value
    of the dictionary; the decompressor may later use this value to determine
-   which dictionary has been used by the compressor.  (The adler32 value
+   which dictionary has been used by the compressor.  (The Adler-32 value
    applies to the whole dictionary even if only a subset of the dictionary is
    actually used by the compressor.) If a raw deflate was requested, then the
-   adler32 value is not computed and strm->adler is not set.
+   Adler-32 value is not computed and strm->adler is not set.
 
      deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a
    parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
@@ -628,6 +652,28 @@ ZEXTERN int ZEXPORT deflateSetDictionary OF((z_streamp strm,
    not perform any compression: this will be done by deflate().
 */
 
+ZEXTERN int ZEXPORT deflateGetDictionary OF((z_streamp strm,
+                                             Bytef *dictionary,
+                                             uInt  *dictLength));
+/*
+     Returns the sliding dictionary being maintained by deflate.  dictLength is
+   set to the number of bytes in the dictionary, and that many bytes are copied
+   to dictionary.  dictionary must have enough space, where 32768 bytes is
+   always enough.  If deflateGetDictionary() is called with dictionary equal to
+   Z_NULL, then only the dictionary length is returned, and nothing is copied.
+   Similary, if dictLength is Z_NULL, then it is not set.
+
+     deflateGetDictionary() may return a length less than the window size, even
+   when more than the window size in input has been provided. It may return up
+   to 258 bytes less in that case, due to how zlib's implementation of deflate
+   manages the sliding window and lookahead for matches, where matches can be
+   up to 258 bytes long. If the application needs the last window-size bytes of
+   input, then that would need to be saved by the application outside of zlib.
+
+     deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the
+   stream state is inconsistent.
+*/
+
 ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
                                     z_streamp source));
 /*
@@ -648,10 +694,10 @@ ZEXTERN int ZEXPORT deflateCopy OF((z_streamp dest,
 
 ZEXTERN int ZEXPORT deflateReset OF((z_streamp strm));
 /*
-     This function is equivalent to deflateEnd followed by deflateInit,
-   but does not free and reallocate all the internal compression state.  The
-   stream will keep the same compression level and any other attributes that
-   may have been set by deflateInit2.
+     This function is equivalent to deflateEnd followed by deflateInit, but
+   does not free and reallocate the internal compression state.  The stream
+   will leave the compression level and any other attributes that may have been
+   set unchanged.
 
      deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
    stream state was inconsistent (such as zalloc or state being Z_NULL).
@@ -662,20 +708,36 @@ ZEXTERN int ZEXPORT deflateParams OF((z_streamp strm,
                                       int strategy));
 /*
      Dynamically update the compression level and compression strategy.  The
-   interpretation of level and strategy is as in deflateInit2.  This can be
+   interpretation of level and strategy is as in deflateInit2().  This can be
    used to switch between compression and straight copy of the input data, or
    to switch to a different kind of input data requiring a different strategy.
-   If the compression level is changed, the input available so far is
-   compressed with the old level (and may be flushed); the new level will take
-   effect only at the next call of deflate().
-
-     Before the call of deflateParams, the stream state must be set as for
-   a call of deflate(), since the currently available input may have to be
-   compressed and flushed.  In particular, strm->avail_out must be non-zero.
-
-     deflateParams returns Z_OK if success, Z_STREAM_ERROR if the source
-   stream state was inconsistent or if a parameter was invalid, Z_BUF_ERROR if
-   strm->avail_out was zero.
+   If the compression approach (which is a function of the level) or the
+   strategy is changed, and if any input has been consumed in a previous
+   deflate() call, then the input available so far is compressed with the old
+   level and strategy using deflate(strm, Z_BLOCK).  There are three approaches
+   for the compression levels 0, 1..3, and 4..9 respectively.  The new level
+   and strategy will take effect at the next call of deflate().
+
+     If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does
+   not have enough output space to complete, then the parameter change will not
+   take effect.  In this case, deflateParams() can be called again with the
+   same parameters and more output space to try again.
+
+     In order to assure a change in the parameters on the first try, the
+   deflate stream should be flushed using deflate() with Z_BLOCK or other flush
+   request until strm.avail_out is not zero, before calling deflateParams().
+   Then no more input data should be provided before the deflateParams() call.
+   If this is done, the old level and strategy will be applied to the data
+   compressed before deflateParams(), and the new level and strategy will be
+   applied to the the data compressed after deflateParams().
+
+     deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream
+   state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if
+   there was not enough output space to complete the compression of the
+   available input data before a change in the strategy or approach.  Note that
+   in the case of a Z_BUF_ERROR, the parameters are not changed.  A return
+   value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be
+   retried with more output space.
 */
 
 ZEXTERN int ZEXPORT deflateTune OF((z_streamp strm,
@@ -793,7 +855,7 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
    is for use with other formats that use the deflate compressed data format
    such as zip.  Those formats provide their own check values.  If a custom
    format is developed using the raw deflate format for compressed data, it is
-   recommended that a check value such as an adler32 or a crc32 be applied to
+   recommended that a check value such as an Adler-32 or a CRC-32 be applied to
    the uncompressed data as is done in the zlib, gzip, and zip formats.  For
    most applications, the zlib format should be used as is.  Note that comments
    above on the use in deflateInit2() applies to the magnitude of windowBits.
@@ -802,7 +864,10 @@ ZEXTERN int ZEXPORT inflateInit2 OF((z_streamp strm,
    32 to windowBits to enable zlib and gzip decoding with automatic header
    detection, or add 16 to decode only the gzip format (the zlib format will
    return a Z_DATA_ERROR).  If a gzip stream is being decoded, strm->adler is a
-   crc32 instead of an adler32.
+   CRC-32 instead of an Adler-32.  Unlike the gunzip utility and gzread() (see
+   below), inflate() will not automatically decode concatenated gzip streams.
+   inflate() will return Z_STREAM_END at the end of the gzip stream.  The state
+   would need to be reset to continue decoding a subsequent gzip stream.
 
      inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
    memory, Z_VERSION_ERROR if the zlib library version is incompatible with the
@@ -823,7 +888,7 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
      Initializes the decompression dictionary from the given uncompressed byte
    sequence.  This function must be called immediately after a call of inflate,
    if that call returned Z_NEED_DICT.  The dictionary chosen by the compressor
-   can be determined from the adler32 value returned by that call of inflate.
+   can be determined from the Adler-32 value returned by that call of inflate.
    The compressor and decompressor must use exactly the same dictionary (see
    deflateSetDictionary).  For raw inflate, this function can be called at any
    time to set the dictionary.  If the provided dictionary is smaller than the
@@ -834,7 +899,7 @@ ZEXTERN int ZEXPORT inflateSetDictionary OF((z_streamp strm,
      inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a
    parameter is invalid (e.g.  dictionary being Z_NULL) or the stream state is
    inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the
-   expected one (incorrect adler32 value).  inflateSetDictionary does not
+   expected one (incorrect Adler-32 value).  inflateSetDictionary does not
    perform any decompression: this will be done by subsequent calls of
    inflate().
 */
@@ -892,7 +957,7 @@ ZEXTERN int ZEXPORT inflateCopy OF((z_streamp dest,
 ZEXTERN int ZEXPORT inflateReset OF((z_streamp strm));
 /*
      This function is equivalent to inflateEnd followed by inflateInit,
-   but does not free and reallocate all the internal decompression state.  The
+   but does not free and reallocate the internal decompression state.  The
    stream will keep attributes that may have been set by inflateInit2.
 
      inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
@@ -904,7 +969,9 @@ ZEXTERN int ZEXPORT inflateReset2 OF((z_streamp strm,
 /*
      This function is the same as inflateReset, but it also permits changing
    the wrap and window size requests.  The windowBits parameter is interpreted
-   the same as it is for inflateInit2.
+   the same as it is for inflateInit2.  If the window size is changed, then the
+   memory allocated for the window is freed, and the window will be reallocated
+   by inflate() if needed.
 
      inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source
    stream state was inconsistent (such as zalloc or state being Z_NULL), or if
@@ -956,7 +1023,7 @@ ZEXTERN long ZEXPORT inflateMark OF((z_streamp strm));
    location in the input stream can be determined from avail_in and data_type
    as noted in the description for the Z_BLOCK flush parameter for inflate.
 
-     inflateMark returns the value noted above or -1 << 16 if the provided
+     inflateMark returns the value noted above, or -65536 if the provided
    source stream state was inconsistent.
 */
 
@@ -1048,9 +1115,9 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
    This routine would normally be used in a utility that reads zip or gzip
    files and writes out uncompressed files.  The utility would decode the
    header and process the trailer on its own, hence this routine expects only
-   the raw deflate stream to decompress.  This is different from the normal
-   behavior of inflate(), which expects either a zlib or gzip header and
-   trailer around the deflate stream.
+   the raw deflate stream to decompress.  This is different from the default
+   behavior of inflate(), which expects a zlib header and trailer around the
+   deflate stream.
 
      inflateBack() uses two subroutines supplied by the caller that are then
    called by inflateBack() for input and output.  inflateBack() calls those
@@ -1059,12 +1126,12 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
    parameters and return types are defined above in the in_func and out_func
    typedefs.  inflateBack() will call in(in_desc, &buf) which should return the
    number of bytes of provided input, and a pointer to that input in buf.  If
-   there is no input available, in() must return zero--buf is ignored in that
-   case--and inflateBack() will return a buffer error.  inflateBack() will call
-   out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].  out()
-   should return zero on success, or non-zero on failure.  If out() returns
-   non-zero, inflateBack() will return with an error.  Neither in() nor out()
-   are permitted to change the contents of the window provided to
+   there is no input available, in() must return zero -- buf is ignored in that
+   case -- and inflateBack() will return a buffer error.  inflateBack() will
+   call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1].
+   out() should return zero on success, or non-zero on failure.  If out()
+   returns non-zero, inflateBack() will return with an error.  Neither in() nor
+   out() are permitted to change the contents of the window provided to
    inflateBackInit(), which is also the buffer that out() uses to write from.
    The length written by out() will be at most the window size.  Any non-zero
    amount of input may be provided by in().
@@ -1092,7 +1159,7 @@ ZEXTERN int ZEXPORT inflateBack OF((z_streamp strm,
    using strm->next_in which will be Z_NULL only if in() returned an error.  If
    strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning
    non-zero.  (in() will always be called before out(), so strm->next_in is
-   assured to be defined if out() returns non-zero.) Note that inflateBack()
+   assured to be defined if out() returns non-zero.)  Note that inflateBack()
    cannot return Z_OK.
 */
 
@@ -1114,7 +1181,7 @@ ZEXTERN uLong ZEXPORT zlibCompileFlags OF((void));
      7.6: size of z_off_t
 
     Compiler, assembler, and debug options:
-     8: DEBUG
+     8: ZLIB_DEBUG
      9: ASMV or ASMINF -- use ASM code
      10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention
      11: 0 (reserved)
@@ -1164,7 +1231,8 @@ ZEXTERN int ZEXPORT compress OF((Bytef *dest,   uLongf *destLen,
    the byte length of the source buffer.  Upon entry, destLen is the total size
    of the destination buffer, which must be at least the value returned by
    compressBound(sourceLen).  Upon exit, destLen is the actual size of the
-   compressed buffer.
+   compressed data.  compress() is equivalent to compress2() with a level
+   parameter of Z_DEFAULT_COMPRESSION.
 
      compress returns Z_OK if success, Z_MEM_ERROR if there was not
    enough memory, Z_BUF_ERROR if there was not enough room in the output
@@ -1180,7 +1248,7 @@ ZEXTERN int ZEXPORT compress2 OF((Bytef *dest,   uLongf *destLen,
    length of the source buffer.  Upon entry, destLen is the total size of the
    destination buffer, which must be at least the value returned by
    compressBound(sourceLen).  Upon exit, destLen is the actual size of the
-   compressed buffer.
+   compressed data.
 
      compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough
    memory, Z_BUF_ERROR if there was not enough room in the output buffer,
@@ -1203,7 +1271,7 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
    uncompressed data.  (The size of the uncompressed data must have been saved
    previously by the compressor and transmitted to the decompressor by some
    mechanism outside the scope of this compression library.) Upon exit, destLen
-   is the actual size of the uncompressed buffer.
+   is the actual size of the uncompressed data.
 
      uncompress returns Z_OK if success, Z_MEM_ERROR if there was not
    enough memory, Z_BUF_ERROR if there was not enough room in the output
@@ -1212,6 +1280,14 @@ ZEXTERN int ZEXPORT uncompress OF((Bytef *dest,   uLongf *destLen,
    buffer with the uncompressed data up to that point.
 */
 
+ZEXTERN int ZEXPORT uncompress2 OF((Bytef *dest,   uLongf *destLen,
+                                    const Bytef *source, uLong *sourceLen));
+/*
+     Same as uncompress, except that sourceLen is a pointer, where the
+   length of the source is *sourceLen.  On return, *sourceLen is the number of
+   source bytes consumed.
+*/
+
                         /* gzip file access functions */
 
 /*
@@ -1290,10 +1366,9 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
    default buffer size is 8192 bytes.  This function must be called after
    gzopen() or gzdopen(), and before any other calls that read or write the
    file.  The buffer memory allocation is always deferred to the first read or
-   write.  Two buffers are allocated, either both of the specified size when
-   writing, or one of the specified size and the other twice that size when
-   reading.  A larger buffer size of, for example, 64K or 128K bytes will
-   noticeably increase the speed of decompression (reading).
+   write.  Three times that size in buffer space is allocated.  A larger buffer
+   size of, for example, 64K or 128K bytes will noticeably increase the speed
+   of decompression (reading).
 
      The new buffer size also affects the maximum length for gzprintf().
 
@@ -1304,10 +1379,12 @@ ZEXTERN int ZEXPORT gzbuffer OF((gzFile file, unsigned size));
 ZEXTERN int ZEXPORT gzsetparams OF((gzFile file, int level, int strategy));
 /*
      Dynamically update the compression level or strategy.  See the description
-   of deflateInit2 for the meaning of these parameters.
+   of deflateInit2 for the meaning of these parameters.  Previously provided
+   data is flushed before the parameter change.
 
-     gzsetparams returns Z_OK if success, or Z_STREAM_ERROR if the file was not
-   opened for writing.
+     gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not
+   opened for writing, Z_ERRNO if there is an error writing the flushed data,
+   or Z_MEM_ERROR if there is a memory allocation error.
 */
 
 ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
@@ -1335,7 +1412,35 @@ ZEXTERN int ZEXPORT gzread OF((gzFile file, voidp buf, unsigned len));
    case.
 
      gzread returns the number of uncompressed bytes actually read, less than
-   len for end of file, or -1 for error.
+   len for end of file, or -1 for error.  If len is too large to fit in an int,
+   then nothing is read, -1 is returned, and the error state is set to
+   Z_STREAM_ERROR.
+*/
+
+ZEXTERN z_size_t ZEXPORT gzfread OF((voidp buf, z_size_t size, z_size_t nitems,
+                                     gzFile file));
+/*
+     Read up to nitems items of size size from file to buf, otherwise operating
+   as gzread() does.  This duplicates the interface of stdio's fread(), with
+   size_t request and return types.  If the library defines size_t, then
+   z_size_t is identical to size_t.  If not, then z_size_t is an unsigned
+   integer type that can contain a pointer.
+
+     gzfread() returns the number of full items read of size size, or zero if
+   the end of the file was reached and a full item could not be read, or if
+   there was an error.  gzerror() must be consulted if zero is returned in
+   order to determine if there was an error.  If the multiplication of size and
+   nitems overflows, i.e. the product does not fit in a z_size_t, then nothing
+   is read, zero is returned, and the error state is set to Z_STREAM_ERROR.
+
+     In the event that the end of file is reached and only a partial item is
+   available at the end, i.e. the remaining uncompressed data length is not a
+   multiple of size, then the final partial item is nevetheless read into buf
+   and the end-of-file flag is set.  The length of the partial item read is not
+   provided, but could be inferred from the result of gztell().  This behavior
+   is the same as the behavior of fread() implementations in common libraries,
+   but it prevents the direct use of gzfread() to read a concurrently written
+   file, reseting and retrying on end-of-file, when size is not 1.
 */
 
 ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
@@ -1346,19 +1451,33 @@ ZEXTERN int ZEXPORT gzwrite OF((gzFile file,
    error.
 */
 
+ZEXTERN z_size_t ZEXPORT gzfwrite OF((voidpc buf, z_size_t size,
+                                      z_size_t nitems, gzFile file));
+/*
+     gzfwrite() writes nitems items of size size from buf to file, duplicating
+   the interface of stdio's fwrite(), with size_t request and return types.  If
+   the library defines size_t, then z_size_t is identical to size_t.  If not,
+   then z_size_t is an unsigned integer type that can contain a pointer.
+
+     gzfwrite() returns the number of full items written of size size, or zero
+   if there was an error.  If the multiplication of size and nitems overflows,
+   i.e. the product does not fit in a z_size_t, then nothing is written, zero
+   is returned, and the error state is set to Z_STREAM_ERROR.
+*/
+
 ZEXTERN int ZEXPORTVA gzprintf Z_ARG((gzFile file, const char *format, ...));
 /*
      Converts, formats, and writes the arguments to the compressed file under
    control of the format string, as in fprintf.  gzprintf returns the number of
-   uncompressed bytes actually written, or 0 in case of error.  The number of
-   uncompressed bytes written is limited to 8191, or one less than the buffer
-   size given to gzbuffer().  The caller should assure that this limit is not
-   exceeded.  If it is exceeded, then gzprintf() will return an error (0) with
-   nothing written.  In this case, there may also be a buffer overflow with
-   unpredictable consequences, which is possible only if zlib was compiled with
-   the insecure functions sprintf() or vsprintf() because the secure snprintf()
-   or vsnprintf() functions were not available.  This can be determined using
-   zlibCompileFlags().
+   uncompressed bytes actually written, or a negative zlib error code in case
+   of error.  The number of uncompressed bytes written is limited to 8191, or
+   one less than the buffer size given to gzbuffer().  The caller should assure
+   that this limit is not exceeded.  If it is exceeded, then gzprintf() will
+   return an error (0) with nothing written.  In this case, there may also be a
+   buffer overflow with unpredictable consequences, which is possible only if
+   zlib was compiled with the insecure functions sprintf() or vsprintf()
+   because the secure snprintf() or vsnprintf() functions were not available.
+   This can be determined using zlibCompileFlags().
 */
 
 ZEXTERN int ZEXPORT gzputs OF((gzFile file, const char *s));
@@ -1418,7 +1537,7 @@ ZEXTERN int ZEXPORT gzflush OF((gzFile file, int flush));
      If the flush parameter is Z_FINISH, the remaining data is written and the
    gzip stream is completed in the output.  If gzwrite() is called again, a new
    gzip stream will be started in the output.  gzread() is able to read such
-   concatented gzip streams.
+   concatenated gzip streams.
 
      gzflush should be called only when strictly necessary because it will
    degrade compression if called too often.
@@ -1572,7 +1691,7 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
    return the updated checksum.  If buf is Z_NULL, this function returns the
    required initial value for the checksum.
 
-     An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+     An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed
    much faster.
 
    Usage example:
@@ -1585,6 +1704,12 @@ ZEXTERN uLong ZEXPORT adler32 OF((uLong adler, const Bytef *buf, uInt len));
      if (adler != original_adler) error();
 */
 
+ZEXTERN uLong ZEXPORT adler32_z OF((uLong adler, const Bytef *buf,
+                                    z_size_t len));
+/*
+     Same as adler32(), but with a size_t length.
+*/
+
 /*
 ZEXTERN uLong ZEXPORT adler32_combine OF((uLong adler1, uLong adler2,
                                           z_off_t len2));
@@ -1614,6 +1739,12 @@ ZEXTERN uLong ZEXPORT crc32   OF((uLong crc, const Bytef *buf, uInt len));
      if (crc != original_crc) error();
 */
 
+ZEXTERN uLong ZEXPORT crc32_z OF((uLong adler, const Bytef *buf,
+                                  z_size_t len));
+/*
+     Same as crc32(), but with a size_t length.
+*/
+
 /*
 ZEXTERN uLong ZEXPORT crc32_combine OF((uLong crc1, uLong crc2, z_off_t len2));
 
@@ -1644,19 +1775,35 @@ ZEXTERN int ZEXPORT inflateBackInit_ OF((z_streamp strm, int windowBits,
                                          unsigned char FAR *window,
                                          const char *version,
                                          int stream_size));
-#define deflateInit(strm, level) \
-        deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
-#define inflateInit(strm) \
-        inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
-#define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
-        deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
-                      (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
-#define inflateInit2(strm, windowBits) \
-        inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
-                      (int)sizeof(z_stream))
-#define inflateBackInit(strm, windowBits, window) \
-        inflateBackInit_((strm), (windowBits), (window), \
-                      ZLIB_VERSION, (int)sizeof(z_stream))
+#ifdef Z_PREFIX_SET
+#  define z_deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define z_inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define z_inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#else
+#  define deflateInit(strm, level) \
+          deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit(strm) \
+          inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \
+          deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\
+                        (strategy), ZLIB_VERSION, (int)sizeof(z_stream))
+#  define inflateInit2(strm, windowBits) \
+          inflateInit2_((strm), (windowBits), ZLIB_VERSION, \
+                        (int)sizeof(z_stream))
+#  define inflateBackInit(strm, windowBits, window) \
+          inflateBackInit_((strm), (windowBits), (window), \
+                           ZLIB_VERSION, (int)sizeof(z_stream))
+#endif
 
 #ifndef Z_SOLO
 
@@ -1676,10 +1823,10 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 #ifdef Z_PREFIX_SET
 #  undef z_gzgetc
 #  define z_gzgetc(g) \
-          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
 #else
 #  define gzgetc(g) \
-          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : gzgetc(g))
+          ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g))
 #endif
 
 /* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or
@@ -1737,19 +1884,16 @@ ZEXTERN int ZEXPORT gzgetc_ OF((gzFile file));  /* backward compatibility */
 
 #endif /* !Z_SOLO */
 
-/* hack for buggy compilers */
-#if !defined(ZUTIL_H) && !defined(NO_DUMMY_DECL)
-    struct internal_state {int dummy;};
-#endif
-
 /* undocumented functions */
 ZEXTERN const char   * ZEXPORT zError           OF((int));
 ZEXTERN int            ZEXPORT inflateSyncPoint OF((z_streamp));
 ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table    OF((void));
 ZEXTERN int            ZEXPORT inflateUndermine OF((z_streamp, int));
+ZEXTERN int            ZEXPORT inflateValidate OF((z_streamp, int));
+ZEXTERN unsigned long  ZEXPORT inflateCodesUsed OF ((z_streamp));
 ZEXTERN int            ZEXPORT inflateResetKeep OF((z_streamp));
 ZEXTERN int            ZEXPORT deflateResetKeep OF((z_streamp));
-#if defined(_WIN32) && !defined(Z_SOLO)
+#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(Z_SOLO)
 ZEXTERN gzFile         ZEXPORT gzopen_w OF((const wchar_t *path,
                                             const char *mode));
 #endif
diff --git a/erts/emulator/zlib/zlib.mk b/erts/emulator/zlib/zlib.mk
index 3f0d64d250..b51b4ec8d6 100644
--- a/erts/emulator/zlib/zlib.mk
+++ b/erts/emulator/zlib/zlib.mk
@@ -52,7 +52,7 @@ ifeq ($(TYPE),gcov)
 ZLIB_CFLAGS = -O0 -fprofile-arcs -ftest-coverage $(DEBUG_CFLAGS) $(DEFS) $(THR_DEFS)
 else  # gcov
 ifeq ($(TYPE),debug)
-ZLIB_CFLAGS = $(DEBUG_CFLAGS) $(DEFS) $(THR_DEFS)
+ZLIB_CFLAGS = -DZLIB_DEBUG=1 $(DEBUG_CFLAGS) $(DEFS) $(THR_DEFS)
 else # debug
 ZLIB_CFLAGS = $(subst -O2, -O3, $(CONFIGURE_CFLAGS) $(DEFS) $(THR_DEFS))
 #ZLIB_CFLAGS=-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7
@@ -62,6 +62,9 @@ ZLIB_CFLAGS = $(subst -O2, -O3, $(CONFIGURE_CFLAGS) $(DEFS) $(THR_DEFS))
 endif # debug
 endif # gcov
 
+# Don't fail if _LFS64_LARGEFILE is undefined
+ZLIB_CFLAGS := $(filter-out -Werror=undef,$(ZLIB_CFLAGS))
+
 ifeq ($(TARGET), win32)
 $(ZLIB_LIBRARY): $(ZLIB_OBJS)
 	$(V_AR) -out:$@ $(ZLIB_OBJS)
diff --git a/erts/emulator/zlib/zutil.c b/erts/emulator/zlib/zutil.c
index 27a8af4a2b..a76c6b0c7e 100644
--- a/erts/emulator/zlib/zutil.c
+++ b/erts/emulator/zlib/zutil.c
@@ -1,33 +1,27 @@
 /* zutil.c -- target dependent utility functions for the compression library
- * Copyright (C) 1995-2005, 2010, 2011, 2012 Jean-loup Gailly.
+ * Copyright (C) 1995-2017 Jean-loup Gailly
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
 /* @(#) $Id$ */
 
-#ifdef HAVE_CONFIG_H
-#  include "config.h"
-#endif
 #include "zutil.h"
 #ifndef Z_SOLO
 #  include "gzguts.h"
 #endif
 
-#ifndef NO_DUMMY_DECL
-struct internal_state      {int dummy;}; /* for buggy compilers */
-#endif
-
 z_const char * const z_errmsg[10] = {
-"need dictionary",     /* Z_NEED_DICT       2  */
-"stream end",          /* Z_STREAM_END      1  */
-"",                    /* Z_OK              0  */
-"file error",          /* Z_ERRNO         (-1) */
-"stream error",        /* Z_STREAM_ERROR  (-2) */
-"data error",          /* Z_DATA_ERROR    (-3) */
-"insufficient memory", /* Z_MEM_ERROR     (-4) */
-"buffer error",        /* Z_BUF_ERROR     (-5) */
-"incompatible version",/* Z_VERSION_ERROR (-6) */
-""};
+    (z_const char *)"need dictionary",     /* Z_NEED_DICT       2  */
+    (z_const char *)"stream end",          /* Z_STREAM_END      1  */
+    (z_const char *)"",                    /* Z_OK              0  */
+    (z_const char *)"file error",          /* Z_ERRNO         (-1) */
+    (z_const char *)"stream error",        /* Z_STREAM_ERROR  (-2) */
+    (z_const char *)"data error",          /* Z_DATA_ERROR    (-3) */
+    (z_const char *)"insufficient memory", /* Z_MEM_ERROR     (-4) */
+    (z_const char *)"buffer error",        /* Z_BUF_ERROR     (-5) */
+    (z_const char *)"incompatible version",/* Z_VERSION_ERROR (-6) */
+    (z_const char *)""
+};
 
 
 const char * ZEXPORT zlibVersion()
@@ -64,7 +58,7 @@ uLong ZEXPORT zlibCompileFlags()
     case 8:     flags += 2 << 6;        break;
     default:    flags += 3 << 6;
     }
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
     flags += 1 << 8;
 #endif
 #if defined(ASMV) || defined(ASMINF)
@@ -118,8 +112,8 @@ uLong ZEXPORT zlibCompileFlags()
     return flags;
 }
 
-#ifdef DEBUG
-
+#ifdef ZLIB_DEBUG
+#include <stdlib.h>
 #  ifndef verbose
 #    define verbose 0
 #  endif
@@ -222,9 +216,11 @@ local ptr_table table[MAX_PTR];
 
 voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size)
 {
-    voidpf buf = opaque; /* just to make some compilers happy */
+    voidpf buf;
     ulg bsize = (ulg)items*size;
 
+    (void)opaque;
+
     /* If we allocate less than 65520 bytes, we assume that farmalloc
      * will return a usable pointer which doesn't have to be normalized.
      */
@@ -247,6 +243,9 @@ voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, unsigned items, unsigned size)
 void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
 {
     int n;
+
+    (void)opaque;
+
     if (*(ush*)&ptr != 0) { /* object < 64K */
         farfree(ptr);
         return;
@@ -262,7 +261,6 @@ void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
         next_ptr--;
         return;
     }
-    ptr = opaque; /* just to make some compilers happy */
     Assert(0, "zcfree: ptr not found");
 }
 
@@ -281,13 +279,13 @@ void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
 
 voidpf ZLIB_INTERNAL zcalloc (voidpf opaque, uInt items, uInt size)
 {
-    if (opaque) opaque = 0; /* to make compiler happy */
+    (void)opaque;
     return _halloc((long)items, size);
 }
 
 void ZLIB_INTERNAL zcfree (voidpf opaque, voidpf ptr)
 {
-    if (opaque) opaque = 0; /* to make compiler happy */
+    (void)opaque;
     _hfree(ptr);
 }
 
@@ -309,7 +307,7 @@ voidpf ZLIB_INTERNAL zcalloc (opaque, items, size)
     unsigned items;
     unsigned size;
 {
-    if (opaque) items += size - size; /* make compiler happy */
+    (void)opaque;
     return sizeof(uInt) > 2 ? (voidpf)malloc(items * size) :
                               (voidpf)calloc(items, size);
 }
@@ -318,8 +316,8 @@ void ZLIB_INTERNAL zcfree (opaque, ptr)
     voidpf opaque;
     voidpf ptr;
 {
+    (void)opaque;
     free(ptr);
-    if (opaque) return; /* make compiler happy */
 }
 
 #endif /* MY_ZCALLOC */
diff --git a/erts/emulator/zlib/zutil.h b/erts/emulator/zlib/zutil.h
index 24ab06b1cf..b079ea6a80 100644
--- a/erts/emulator/zlib/zutil.h
+++ b/erts/emulator/zlib/zutil.h
@@ -1,5 +1,5 @@
 /* zutil.h -- internal interface and configuration of the compression library
- * Copyright (C) 1995-2013 Jean-loup Gailly.
+ * Copyright (C) 1995-2016 Jean-loup Gailly, Mark Adler
  * For conditions of distribution and use, see copyright notice in zlib.h
  */
 
@@ -36,7 +36,9 @@
 #ifndef local
 #  define local static
 #endif
-/* compile with -Dlocal if your debugger can't find static symbols */
+/* since "static" is used to mean two completely different things in C, we
+   define "local" for the non-static meaning of "static", for readability
+   (compile with -Dlocal if your debugger can't find static symbols) */
 
 typedef unsigned char  uch;
 typedef uch FAR uchf;
@@ -98,28 +100,38 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif
 
 #ifdef AMIGA
-#  define OS_CODE  0x01
+#  define OS_CODE  1
 #endif
 
 #if defined(VAXC) || defined(VMS)
-#  define OS_CODE  0x02
+#  define OS_CODE  2
 #  define F_OPEN(name, mode) \
      fopen((name), (mode), "mbc=60", "ctx=stm", "rfm=fix", "mrs=512")
 #endif
 
+#ifdef __370__
+#  if __TARGET_LIB__ < 0x20000000
+#    define OS_CODE 4
+#  elif __TARGET_LIB__ < 0x40000000
+#    define OS_CODE 11
+#  else
+#    define OS_CODE 8
+#  endif
+#endif
+
 #if defined(ATARI) || defined(atarist)
-#  define OS_CODE  0x05
+#  define OS_CODE  5
 #endif
 
 #ifdef OS2
-#  define OS_CODE  0x06
+#  define OS_CODE  6
 #  if defined(M_I86) && !defined(Z_SOLO)
 #    include <malloc.h>
 #  endif
 #endif
 
 #if defined(MACOS) || defined(TARGET_OS_MAC)
-#  define OS_CODE  0x07
+#  define OS_CODE  7
 #  ifndef Z_SOLO
 #    if defined(__MWERKS__) && __dest_os != __be_os && __dest_os != __win32_os
 #      include <unix.h> /* for fdopen */
@@ -131,18 +143,24 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #  endif
 #endif
 
-#ifdef TOPS20
-#  define OS_CODE  0x0a
+#ifdef __acorn
+#  define OS_CODE 13
 #endif
 
-#ifdef WIN32
-#  ifndef __CYGWIN__  /* Cygwin is Unix, not Win32 */
-#    define OS_CODE  0x0b
-#  endif
+#if defined(WIN32) && !defined(__CYGWIN__)
+#  define OS_CODE  10
+#endif
+
+#ifdef _BEOS_
+#  define OS_CODE  16
+#endif
+
+#ifdef __TOS_OS400__
+#  define OS_CODE 18
 #endif
 
-#ifdef __50SERIES /* Prime/PRIMOS */
-#  define OS_CODE  0x0f
+#ifdef __APPLE__
+#  define OS_CODE 19
 #endif
 
 #if defined(_BEOS_) || defined(RISCOS)
@@ -177,7 +195,7 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
         /* common defaults */
 
 #ifndef OS_CODE
-#  define OS_CODE  0x03  /* assume Unix */
+#  define OS_CODE  3     /* assume Unix */
 #endif
 
 #ifndef F_OPEN
@@ -216,7 +234,7 @@ extern z_const char * const z_errmsg[10]; /* indexed by 2-zlib_error */
 #endif
 
 /* Diagnostic functions */
-#ifdef DEBUG
+#ifdef ZLIB_DEBUG
 #  include <stdio.h>
    extern int ZLIB_INTERNAL z_verbose;
    extern void ZLIB_INTERNAL z_error OF((char *m));
diff --git a/erts/epmd/src/epmd.c b/erts/epmd/src/epmd.c
index 44e997e609..8313678b5d 100644
--- a/erts/epmd/src/epmd.c
+++ b/erts/epmd/src/epmd.c
@@ -437,6 +437,11 @@ static void usage(EpmdVars *g)
     fprintf(stderr, "        epmd -kill even if there "
 	    "are registered nodes.\n");
     fprintf(stderr, "        Also allows forced unregister (epmd -stop).\n");
+#ifdef HAVE_SYSTEMD_DAEMON
+    fprintf(stderr, "    -systemd\n");
+    fprintf(stderr, "        Wait for socket from systemd. The option makes sense\n");
+    fprintf(stderr, "        when started from .socket unit.\n");
+#endif /* HAVE_SYSTEMD_DAEMON */
     fprintf(stderr, "\nDbgExtra options\n");
     fprintf(stderr, "    -packet_timeout Seconds\n");
     fprintf(stderr, "        Set the number of seconds a connection can be\n");
@@ -462,11 +467,6 @@ static void usage(EpmdVars *g)
     fprintf(stderr, "        Forcibly unregisters a name with epmd\n");
     fprintf(stderr, "        (only allowed if -relaxed_command_check was given when \n");
     fprintf(stderr, "        epmd was started).\n");
-#ifdef HAVE_SYSTEMD_DAEMON
-    fprintf(stderr, "    -systemd\n");
-    fprintf(stderr, "        Wait for socket from systemd. The option makes sense\n");
-    fprintf(stderr, "        when started from .socket unit.\n");
-#endif /* HAVE_SYSTEMD_DAEMON */
     epmd_cleanup_exit(g,1);
 }
 
diff --git a/erts/etc/common/erlexec.c b/erts/etc/common/erlexec.c
index 0cb01fd4ef..ec4a4ead23 100644
--- a/erts/etc/common/erlexec.c
+++ b/erts/etc/common/erlexec.c
@@ -819,10 +819,8 @@ int main(int argc, char **argv)
 
 	      case '+':
 		switch (argv[i][1]) {
-		  case '#':
 		  case 'a':
 		  case 'A':
-		  case 'b':
 		  case 'C':
 		  case 'e':
 		  case 'i':
diff --git a/erts/include/internal/gcc/ethr_dw_atomic.h b/erts/include/internal/gcc/ethr_dw_atomic.h
index 47158b7295..dd116c81ce 100644
--- a/erts/include/internal/gcc/ethr_dw_atomic.h
+++ b/erts/include/internal/gcc/ethr_dw_atomic.h
@@ -78,7 +78,7 @@ typedef volatile ETHR_NATIVE_SU_DW_SINT_T * ethr_native_dw_ptr_t;
  * runtime. We, therefore, need an extra word allocated.
  */
 #define ETHR_DW_NATMC_MEM__(VAR) \
-   (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+   (&(VAR)->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
 typedef union {
     volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
     volatile ethr_sint_t sint[3];
diff --git a/erts/include/internal/gcc/ethr_membar.h b/erts/include/internal/gcc/ethr_membar.h
index 07960ce040..5cbda5582d 100644
--- a/erts/include/internal/gcc/ethr_membar.h
+++ b/erts/include/internal/gcc/ethr_membar.h
@@ -96,7 +96,7 @@
  *          issue an aquire memory barrier and an __atomic
  *          builtin memory acess with the __ATOMIC_RELEASE
  *          memory model must at least issue a release memory
- *          barrier. Otherwise the two can not be paired.
+ *          barrier. Otherwise the two cannot be paired.
  *       4. All __atomic builtins accessing memory using the
  *          __ATOMIC_CONSUME builtin can be used for the same
  *          reason __ATOMIC_ACQUIRE can be used. The ethread
diff --git a/erts/include/internal/i386/ethr_dw_atomic.h b/erts/include/internal/i386/ethr_dw_atomic.h
index 91acdb0483..3c47da9758 100644
--- a/erts/include/internal/i386/ethr_dw_atomic.h
+++ b/erts/include/internal/i386/ethr_dw_atomic.h
@@ -73,7 +73,7 @@ typedef volatile ethr_native_sint128_t__ * ethr_native_dw_ptr_t;
  * runtime. We, therefore, need an extra word allocated.
  */
 #define ETHR_DW_NATMC_MEM__(VAR) \
-   (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+   (&(VAR)->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
 typedef union {
 #ifdef ETHR_NATIVE_SU_DW_SINT_T
     volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
diff --git a/erts/include/internal/win/ethr_dw_atomic.h b/erts/include/internal/win/ethr_dw_atomic.h
index a6b26ab7bb..f7a1900a82 100644
--- a/erts/include/internal/win/ethr_dw_atomic.h
+++ b/erts/include/internal/win/ethr_dw_atomic.h
@@ -80,7 +80,7 @@ typedef volatile __int64 * ethr_native_dw_ptr_t;
  * runtime. We, therefore, need an extra word allocated.
  */
 #define ETHR_DW_NATMC_MEM__(VAR) \
-   (&var->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
+   (&(VAR)->c[(int) ((ethr_uint_t) &(VAR)->c[0]) & ETHR_DW_NATMC_ALIGN_MASK__])
 typedef union {
 #ifdef ETHR_NATIVE_SU_DW_SINT_T
     volatile ETHR_NATIVE_SU_DW_SINT_T dw_sint;
diff --git a/erts/preloaded/ebin/atomics.beam b/erts/preloaded/ebin/atomics.beam
index a5ac24f0b8..1de97fa668 100644
--- a/erts/preloaded/ebin/atomics.beam
+++ b/erts/preloaded/ebin/atomics.beam
diff --git a/erts/preloaded/ebin/counters.beam b/erts/preloaded/ebin/counters.beam
index a1aa34a415..4e1a3566f7 100644
--- a/erts/preloaded/ebin/counters.beam
+++ b/erts/preloaded/ebin/counters.beam
diff --git a/erts/preloaded/ebin/erl_init.beam b/erts/preloaded/ebin/erl_init.beam
new file mode 100644
index 0000000000..93f23377b5
--- /dev/null
+++ b/erts/preloaded/ebin/erl_init.beam
diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam
index 37903d24b6..42629bbe8e 100644
--- a/erts/preloaded/ebin/erl_prim_loader.beam
+++ b/erts/preloaded/ebin/erl_prim_loader.beam
diff --git a/erts/preloaded/ebin/erl_tracer.beam b/erts/preloaded/ebin/erl_tracer.beam
index 2509f238bf..a74e87d19c 100644
--- a/erts/preloaded/ebin/erl_tracer.beam
+++ b/erts/preloaded/ebin/erl_tracer.beam
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index 7563663807..54557d9d18 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
diff --git a/erts/preloaded/ebin/erts_code_purger.beam b/erts/preloaded/ebin/erts_code_purger.beam
index bc697d11d7..3cbfd94a43 100644
--- a/erts/preloaded/ebin/erts_code_purger.beam
+++ b/erts/preloaded/ebin/erts_code_purger.beam
diff --git a/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam b/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam
index 5b788368af..43604e0480 100644
--- a/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam
+++ b/erts/preloaded/ebin/erts_dirty_process_signal_handler.beam
diff --git a/erts/preloaded/ebin/erts_internal.beam b/erts/preloaded/ebin/erts_internal.beam
index b8415a9833..543104638b 100644
--- a/erts/preloaded/ebin/erts_internal.beam
+++ b/erts/preloaded/ebin/erts_internal.beam
diff --git a/erts/preloaded/ebin/erts_literal_area_collector.beam b/erts/preloaded/ebin/erts_literal_area_collector.beam
index e2a8c65f38..1f7fbef85a 100644
--- a/erts/preloaded/ebin/erts_literal_area_collector.beam
+++ b/erts/preloaded/ebin/erts_literal_area_collector.beam
diff --git a/erts/preloaded/ebin/init.beam b/erts/preloaded/ebin/init.beam
index fee2da33a6..bcbf4a1469 100644
--- a/erts/preloaded/ebin/init.beam
+++ b/erts/preloaded/ebin/init.beam
diff --git a/erts/preloaded/ebin/otp_ring0.beam b/erts/preloaded/ebin/otp_ring0.beam
index 324e111ad1..0d194896c7 100644
--- a/erts/preloaded/ebin/otp_ring0.beam
+++ b/erts/preloaded/ebin/otp_ring0.beam
diff --git a/erts/preloaded/ebin/persistent_term.beam b/erts/preloaded/ebin/persistent_term.beam
index c73da80a98..79ef03b9a6 100644
--- a/erts/preloaded/ebin/persistent_term.beam
+++ b/erts/preloaded/ebin/persistent_term.beam
diff --git a/erts/preloaded/ebin/prim_buffer.beam b/erts/preloaded/ebin/prim_buffer.beam
index 75e5b7c9cb..70675f47ee 100644
--- a/erts/preloaded/ebin/prim_buffer.beam
+++ b/erts/preloaded/ebin/prim_buffer.beam
diff --git a/erts/preloaded/ebin/prim_eval.beam b/erts/preloaded/ebin/prim_eval.beam
index ddda4764e1..5aa5a7f384 100644
--- a/erts/preloaded/ebin/prim_eval.beam
+++ b/erts/preloaded/ebin/prim_eval.beam
diff --git a/erts/preloaded/ebin/prim_file.beam b/erts/preloaded/ebin/prim_file.beam
index 2d1ce7d631..37a1a563a9 100644
--- a/erts/preloaded/ebin/prim_file.beam
+++ b/erts/preloaded/ebin/prim_file.beam
diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam
index 558968b58a..23a8e8f210 100644
--- a/erts/preloaded/ebin/prim_inet.beam
+++ b/erts/preloaded/ebin/prim_inet.beam
diff --git a/erts/preloaded/ebin/prim_zip.beam b/erts/preloaded/ebin/prim_zip.beam
index 51721a27a8..8483d11aed 100644
--- a/erts/preloaded/ebin/prim_zip.beam
+++ b/erts/preloaded/ebin/prim_zip.beam
diff --git a/erts/preloaded/ebin/zlib.beam b/erts/preloaded/ebin/zlib.beam
index 4519b540c4..f3737a524e 100644
--- a/erts/preloaded/ebin/zlib.beam
+++ b/erts/preloaded/ebin/zlib.beam
diff --git a/erts/preloaded/src/Makefile b/erts/preloaded/src/Makefile
index e1bd5bc295..c655bff72e 100644
--- a/erts/preloaded/src/Makefile
+++ b/erts/preloaded/src/Makefile
@@ -41,7 +41,7 @@ PRE_LOADED_ERL_MODULES = \
 	prim_inet \
 	zlib \
 	prim_zip \
-	otp_ring0 \
+	erl_init \
 	erts_code_purger \
 	erlang \
 	erts_internal \
diff --git a/erts/preloaded/src/otp_ring0.erl b/erts/preloaded/src/erl_init.erl
index 62a60fffe2..d681f05398 100644
--- a/erts/preloaded/src/otp_ring0.erl
+++ b/erts/preloaded/src/erl_init.erl
@@ -17,15 +17,26 @@
 %%
 %% %CopyrightEnd%
 %%
--module(otp_ring0).
+-module(erl_init).
 
-%% Purpose : Start up of erlang system.
+%% Initial process of an Erlang system.
 
 -export([start/2]).
 
--spec start(_, term()) -> term().
-start(_Env, Argv) ->
-    run(init, boot, Argv).
+%% This gets the module name given by the +i option (default 'init')
+%% and the list of command line arguments
+
+-spec start(Mod, BootArgs) -> no_return() when
+      Mod :: module(),
+      BootArgs :: [binary()].
+start(Mod, BootArgs) ->
+    %% Load the static nifs
+    zlib:on_load(),
+    erl_tracer:on_load(),
+    prim_buffer:on_load(),
+    prim_file:on_load(),
+    %% Proceed to the specified boot module
+    run(Mod, boot, BootArgs).
 
 run(M, F, A) ->
     case erlang:function_exported(M, F, 1) of
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index 1ed6b6b284..261b731900 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -3654,90 +3654,28 @@ memory() ->
 -spec erlang:memory(Type :: memory_type()) -> non_neg_integer();
                    (TypeList :: [memory_type()]) -> [{memory_type(), non_neg_integer()}].
 memory(Type) when erlang:is_atom(Type) ->
-    {AA, ALCU, ChkSup, BadArgZero} = need_mem_info(Type),
-    case get_mem_data(ChkSup, ALCU, AA) of
-	notsup ->
-	    erlang:error(notsup, [Type]);
-	Mem ->
-	    Value = get_memval(Type, Mem),
-	    case {BadArgZero, Value} of
-		{true, 0} -> erlang:error(badarg, [Type]);
-		_ -> Value
-	    end
+    try
+        case aa_mem_data(au_mem_data(?ALL_NEEDED_ALLOCS)) of
+            notsup -> erlang:error(notsup);
+            Mem -> get_memval(Type, Mem)
+        end
+    catch
+        error:badarg -> erlang:error(badarg)
     end;
 memory(Types) when erlang:is_list(Types) ->
-    {AA, ALCU, ChkSup, BadArgZeroList} = need_mem_info_list(Types),
-    case get_mem_data(ChkSup, ALCU, AA) of
-	notsup ->
-	    erlang:error(notsup, [Types]);
-	Mem ->
-	    case memory_result_list(Types, BadArgZeroList, Mem) of
-		badarg -> erlang:error(badarg, [Types]);
-		Result -> Result
-	    end
-    end.
-
-memory_result_list([], [], _Mem) ->
-    [];
-memory_result_list([T|Ts], [BAZ|BAZs], Mem) ->
-    case memory_result_list(Ts, BAZs, Mem) of
-	badarg -> badarg;
-	TVs ->
-	    V = get_memval(T, Mem),
-	    case {BAZ, V} of
-		{true, 0} -> badarg;
-		_ -> [{T, V}| TVs]
-	    end
-    end.
-
-get_mem_data(true, AlcUAllocs, NeedAllocatedAreas) ->
-    case memory_is_supported() of
-	false -> notsup;
-	true -> get_mem_data(false, AlcUAllocs, NeedAllocatedAreas)
-    end;
-get_mem_data(false, AlcUAllocs, NeedAllocatedAreas) ->
-    AlcUMem = case AlcUAllocs of
-		  [] -> #memory{};
-		  _ ->
-		      au_mem_data(AlcUAllocs)
-	      end,
-    case NeedAllocatedAreas of
-	true -> aa_mem_data(AlcUMem);
-	false -> AlcUMem
+    try
+        case aa_mem_data(au_mem_data(?ALL_NEEDED_ALLOCS)) of
+            notsup -> erlang:error(notsup);
+            Mem -> memory_1(Types, Mem)
+        end
+    catch
+        error:badarg -> erlang:error(badarg)
     end.
 
-need_mem_info_list([]) ->
-    {false, [], false, []};
-need_mem_info_list([T|Ts]) ->
-    {MAA, MALCU, MChkSup, MBadArgZero} = need_mem_info_list(Ts),
-    {AA, ALCU, ChkSup, BadArgZero} = need_mem_info(T),
-    {case AA of
-	 true -> true;
-	 _ -> MAA
-     end,
-     ALCU ++ (MALCU -- ALCU),
-     case ChkSup of
-	 true -> true;
-	 _ -> MChkSup
-     end,
-     [BadArgZero|MBadArgZero]}.
-
-need_mem_info(Type) when Type == total;
-			 Type == system ->
-    {true, ?ALL_NEEDED_ALLOCS, false, false};
-need_mem_info(Type) when Type == processes;
-			 Type == processes_used ->
-    {true, [eheap_alloc, fix_alloc], true, false};
-need_mem_info(Type) when Type == atom;
-			 Type == atom_used;
-			 Type == code ->
-    {true, [], true, false};
-need_mem_info(binary) ->
-    {false, [binary_alloc], true, false};
-need_mem_info(ets) ->
-    {true, [ets_alloc], true, false};
-need_mem_info(_) ->
-    {false, [], false, true}.
+memory_1([Type | Types], Mem) ->
+    [{Type, get_memval(Type, Mem)} | memory_1(Types, Mem)];
+memory_1([], _Mem) ->
+    [].
 
 get_memval(total, #memory{total = V}) -> V;
 get_memval(processes, #memory{processes = V}) -> V;
@@ -3748,16 +3686,7 @@ get_memval(atom_used, #memory{atom_used = V}) -> V;
 get_memval(binary, #memory{binary = V}) -> V;
 get_memval(code, #memory{code = V}) -> V;
 get_memval(ets, #memory{ets = V}) -> V;
-get_memval(_, #memory{}) -> 0.
-
-memory_is_supported() ->
-    {_, _, FeatureList, _} = erlang:system_info(allocator),
-    case ((erlang:system_info(alloc_util_allocators) 
-	   -- ?CARRIER_ALLOCS)
-	  -- FeatureList) of
-	[] -> true;
-	_ -> false
-    end.
+get_memval(_, #memory{}) -> erlang:error(badarg).
 
 get_blocks_size([{blocks_size, Sz, _, _} | Rest], Acc) ->
     get_blocks_size(Rest, Acc+Sz);
@@ -3768,16 +3697,6 @@ get_blocks_size([_ | Rest], Acc) ->
 get_blocks_size([], Acc) ->
     Acc.
 
-
-blocks_size([{Carriers, SizeList} | Rest], Acc) when Carriers == mbcs;
-						     Carriers == mbcs_pool;
-						     Carriers == sbcs ->
-    blocks_size(Rest, get_blocks_size(SizeList, Acc));
-blocks_size([_ | Rest], Acc) ->
-    blocks_size(Rest, Acc);
-blocks_size([], Acc) ->
-    Acc.
-
 get_fix_proc([{ProcType, A1, U1}| Rest], {A0, U0}) when ProcType == proc;
 							ProcType == monitor;
 							ProcType == link;
@@ -3802,64 +3721,78 @@ fix_proc([_ | Rest], Acc) ->
 fix_proc([], Acc) ->
     Acc.
 
+au_mem_fix(#memory{ processes = Proc,
+                    processes_used = ProcU,
+                    system = Sys } = Mem, Data) ->
+    case fix_proc(Data, {0, 0}) of
+        {A, U} ->
+            Mem#memory{ processes = Proc+A,
+                        processes_used = ProcU+U,
+                        system = Sys-A };
+        {Mask, A, U} ->
+            Mem#memory{ processes = Mask band (Proc+A),
+                        processes_used = Mask band (ProcU+U),
+                        system = Mask band (Sys-A) }
+    end.
+
+au_mem_acc(#memory{ total = Tot,
+                    processes = Proc,
+                    processes_used = ProcU } = Mem,
+           eheap_alloc, Data) ->
+    Sz = get_blocks_size(Data, 0),
+    Mem#memory{ total = Tot+Sz,
+                processes = Proc+Sz,
+                processes_used = ProcU+Sz};
+au_mem_acc(#memory{ total = Tot,
+                    system = Sys,
+                    ets = Ets } = Mem, ets_alloc, Data) ->
+    Sz = get_blocks_size(Data, 0),
+    Mem#memory{ total = Tot+Sz,
+                system = Sys+Sz,
+                ets = Ets+Sz };
+au_mem_acc(#memory{total = Tot,
+		    system = Sys,
+		    binary = Bin } = Mem,
+	    binary_alloc, Data) ->
+    Sz = get_blocks_size(Data, 0),
+    Mem#memory{ total = Tot+Sz,
+                system = Sys+Sz,
+                binary = Bin+Sz};
+au_mem_acc(#memory{ total = Tot,
+                    system = Sys } = Mem,
+           _Type, Data) ->
+    Sz = get_blocks_size(Data, 0),
+    Mem#memory{ total = Tot+Sz,
+                system = Sys+Sz }.
+
+au_mem_foreign(Mem, [{Type, SizeList} | Rest]) ->
+    au_mem_foreign(au_mem_acc(Mem, Type, SizeList), Rest);
+au_mem_foreign(Mem, []) ->
+    Mem.
+
+au_mem_current(Mem0, Type, [{mbcs_pool, MBCS} | Rest]) ->
+    [Foreign] = [Foreign || {foreign_blocks, Foreign} <- MBCS],
+    SizeList = MBCS -- [Foreign],
+    Mem = au_mem_foreign(Mem0, Foreign),
+    au_mem_current(au_mem_acc(Mem, Type, SizeList), Type, Rest);
+au_mem_current(Mem, Type, [{mbcs, SizeList} | Rest]) ->
+    au_mem_current(au_mem_acc(Mem, Type, SizeList), Type, Rest);
+au_mem_current(Mem, Type, [{sbcs, SizeList} | Rest]) ->
+    au_mem_current(au_mem_acc(Mem, Type, SizeList), Type, Rest);
+au_mem_current(Mem, Type, [_ | Rest]) ->
+    au_mem_current(Mem, Type, Rest);
+au_mem_current(Mem, _Type, []) ->
+    Mem.
+
 au_mem_data(notsup, _) ->
     notsup;
 au_mem_data(_, [{_, false} | _]) ->
     notsup;
-au_mem_data(#memory{total = Tot,
-		    processes = Proc,
-		    processes_used = ProcU} = Mem,
-	    [{eheap_alloc, _, Data} | Rest]) ->
-    Sz = blocks_size(Data, 0),
-    au_mem_data(Mem#memory{total = Tot+Sz,
-			   processes = Proc+Sz,
-			   processes_used = ProcU+Sz},
-		Rest);
-au_mem_data(#memory{total = Tot,
-		    system = Sys,
-		    ets = Ets} = Mem,
-	    [{ets_alloc, _, Data} | Rest]) ->
-    Sz = blocks_size(Data, 0),
-    au_mem_data(Mem#memory{total = Tot+Sz,
-			   system = Sys+Sz,
-			   ets = Ets+Sz},
-		Rest);
-au_mem_data(#memory{total = Tot,
-		    system = Sys,
-		    binary = Bin} = Mem,
-	    [{binary_alloc, _, Data} | Rest]) ->
-    Sz = blocks_size(Data, 0),
-    au_mem_data(Mem#memory{total = Tot+Sz,
-			   system = Sys+Sz,
-			   binary = Bin+Sz},
-		Rest);
-au_mem_data(#memory{total = Tot,
-		    processes = Proc,
-		    processes_used = ProcU,
-		    system = Sys} = Mem,
-	    [{fix_alloc, _, Data} | Rest]) ->
-    Sz = blocks_size(Data, 0),
-    case fix_proc(Data, {0, 0}) of
-	{A, U} ->
-	    au_mem_data(Mem#memory{total = Tot+Sz,
-				   processes = Proc+A,
-				   processes_used = ProcU+U,
-				   system = Sys+Sz-A},
-			Rest);
-	{Mask, A, U} ->
-	    au_mem_data(Mem#memory{total = Tot+Sz,
-				   processes = Mask band (Proc+A),
-				   processes_used = Mask band (ProcU+U),
-				   system = Mask band (Sys+Sz-A)},
-			Rest)
-    end;
-au_mem_data(#memory{total = Tot,
-		    system = Sys} = Mem,
-	    [{_, _, Data} | Rest]) ->
-    Sz = blocks_size(Data, 0),
-    au_mem_data(Mem#memory{total = Tot+Sz,
-			   system = Sys+Sz},
-		Rest);
+au_mem_data(#memory{} = Mem0, [{fix_alloc, _, Data} | Rest]) ->
+    Mem = au_mem_fix(Mem0, Data),
+    au_mem_data(au_mem_current(Mem, fix_alloc, Data), Rest);
+au_mem_data(#memory{} = Mem, [{Type, _, Data} | Rest]) ->
+    au_mem_data(au_mem_current(Mem, Type, Data), Rest);
 au_mem_data(EMD, []) ->
     EMD.
 
diff --git a/erts/preloaded/src/erts.app.src b/erts/preloaded/src/erts.app.src
index ab0b9494b0..9de81cae27 100644
--- a/erts/preloaded/src/erts.app.src
+++ b/erts/preloaded/src/erts.app.src
@@ -26,7 +26,7 @@
 		erl_prim_loader,
 		erts_internal,
 		init,
-		otp_ring0,
+		erl_init,
 		erts_code_purger,
 		prim_buffer,
 		prim_eval,
diff --git a/erts/preloaded/src/init.erl b/erts/preloaded/src/init.erl
index b4b8b3bf9b..d8f5c9a945 100644
--- a/erts/preloaded/src/init.erl
+++ b/erts/preloaded/src/init.erl
@@ -200,12 +200,6 @@ boot(BootArgs) ->
     register(init, self()),
     process_flag(trap_exit, true),
 
-    %% Load the static nifs
-    zlib:on_load(),
-    erl_tracer:on_load(),
-    prim_buffer:on_load(),
-    prim_file:on_load(),
-
     {Start0,Flags,Args} = parse_boot_args(BootArgs),
     %% We don't get to profile parsing of BootArgs
     case b2a(get_flag(profile_boot, Flags, false)) of
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index f1d938c9a4..cc2711b540 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -1742,7 +1742,7 @@ type_opt_1(O) when is_atom(O) -> undefined.
 
 %% Get. No supplied value.
 type_value(get, undefined)        -> false; % Undefined type
-%% These two clauses can not happen since they are only used
+%% These two clauses cannot happen since they are only used
 %% in record fields - from record fields they must have a
 %% value though it might be 'undefined', so record fields
 %% calls type_value/3, not type_value/2.
@@ -1908,7 +1908,7 @@ type_value_2(_, _)         -> false.
 
 %% Get. No supplied value.
 %%
-%% These two clauses can not happen since they are only used
+%% These two clauses cannot happen since they are only used
 %% in record fields - from record fields they must have a
 %% value though it might be 'undefined', so record fields
 %% calls enc_value/3, not enc_value/2.
diff --git a/erts/test/upgrade_SUITE.erl b/erts/test/upgrade_SUITE.erl
index c32dbabe8d..f92c25bdb4 100644
--- a/erts/test/upgrade_SUITE.erl
+++ b/erts/test/upgrade_SUITE.erl
@@ -25,7 +25,7 @@
 
 -define(upgr_sname,otp_upgrade).
 
-%% Applications that are excluded from this test because they can not
+%% Applications that are excluded from this test because they cannot
 %% just be started in a new node with out specific configuration.
 -define(start_exclude,
 	[cosEvent,cosEventDomain,cosFileTransfer,cosNotification,