78 files changed, 3716 insertions, 2703 deletions
diff --git a/INSTALL.md b/INSTALL.md
index 2567b791e5..1061c5187a 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -277,7 +277,8 @@ Some of the available `configure` options are:
     x86 processors before pentium 4 (back to 486) in the ethread library. If
     not passed the ethread library (part of the runtime system) will use
     instructions that first appeared on the pentium 4 processor when building
-    for x86.
+    for x86. This option will be automatically enabled if required on the
+    build machine.
 *   `--with-libatomic_ops=PATH` - Use the `libatomic_ops` library for atomic
     memory accesses. If `configure` should inform you about no native atomic
     implementation available, you typically want to try using the
diff --git a/configure.in b/configure.in
index d0879c6291..36b33ec399 100644
--- a/configure.in
+++ b/configure.in
@@ -106,7 +106,8 @@ AC_SUBST(CROSS_COMPILING)
 
 
 AC_ARG_ENABLE(bootstrap-only,
-[  --enable-bootstrap-only enable bootstrap only configuration],
+AS_HELP_STRING([--enable-bootstrap-only],
+               [enable bootstrap only configuration]),
 [ if test "X$enableval" = "Xyes"; then
      BOOTSTRAP_ONLY=yes
   else
@@ -192,53 +193,62 @@ AC_MSG_RESULT([$OTP_REL])
 AC_SUBST(OTP_REL)
 
 AC_ARG_ENABLE(threads,
-[  --enable-threads        enable async thread support
-  --disable-threads       disable async thread support])
+AS_HELP_STRING([--enable-threads], [enable async thread support])
+AS_HELP_STRING([--disable-threads], [disable async thread support]))
 
 AC_ARG_ENABLE(halfword-emulator,
-[  --enable-halfword-emulator        enable halfword emulator (only for 64bit builds)
-  --disable-halfword-emulator        disable halfword emulator (only for 64bit builds)])
+AS_HELP_STRING([--enable-halfword-emulator],
+               [enable halfword emulator (only for 64bit builds)]))
 
 AC_ARG_ENABLE(smp-support,
-[  --enable-smp-support    enable smp support
-  --disable-smp-support   disable smp support])
+AS_HELP_STRING([--enable-smp-support], [enable smp support])
+AS_HELP_STRING([--disable-smp-support], [disable smp support]))
 
 AC_ARG_WITH(termcap,
-[  --with-termcap          use termcap (default)
-  --without-termcap       do not use any termcap libraries (ncurses,curses,termcap,termlib)])
+AS_HELP_STRING([--with-termcap], [use termcap (default)])
+AS_HELP_STRING([--without-termcap],
+               [do not use any termcap libraries (ncurses,curses,termcap,termlib)]))
 
 AC_ARG_ENABLE(kernel-poll,
-[  --enable-kernel-poll    enable kernel poll support])
+AS_HELP_STRING([--enable-kernel-poll], [enable kernel poll support])
+AS_HELP_STRING([--disable-kernel-poll], [disable kernel poll support]))
+
+AC_ARG_ENABLE(sctp,
+AS_HELP_STRING([--enable-sctp], [enable sctp support])
+AS_HELP_STRING([--disable-sctp], [disable sctp support]))
 
 AC_ARG_ENABLE(hipe,
-[  --enable-hipe           enable hipe support
-  --disable-hipe          disable hipe support])
-			
+AS_HELP_STRING([--enable-hipe], [enable hipe support])
+AS_HELP_STRING([--disable-hipe], [disable hipe support]))
+
+AC_ARG_ENABLE(native-libs,
+AS_HELP_STRING([--enable-native-libs],
+               [compile Erlang libraries to native code]))
+
 AC_ARG_WITH(javac,
-[  --with-javac=JAVAC      specify Java compiler to use
-  --with-javac            use a Java compiler if found (default)
-  --without-javac         don't use any Java compiler])
+AS_HELP_STRING([--with-javac=JAVAC], [specify Java compiler to use])
+AS_HELP_STRING([--with-javac], [use a Java compiler if found (default)])
+AS_HELP_STRING([--without-javac], [don't use any Java compiler]))
 
 AC_ARG_ENABLE(megaco_flex_scanner_lineno,
-[  --enable-megaco-flex-scanner-lineno enable megaco flex scanner lineno
-  --disable-megaco-flex-scanner-lineno disable megaco flex scanner lineno])
+AS_HELP_STRING([--disable-megaco-flex-scanner-lineno],
+               [disable megaco flex scanner lineno]))
 
 AC_ARG_ENABLE(megaco_reentrant_flex_scanner,
-[  --enable-megaco-reentrant-flex-scanner enable reentrans megaco flex scanner
-  --disable-megaco-reentrant-flex-scanner disable reentrans megaco flex scanner])
+AS_HELP_STRING([--disable-megaco-reentrant-flex-scanner],
+               [disable reentrant megaco flex scanner]))
 
 AC_ARG_WITH(ssl,
-[  --with-ssl=PATH         specify location of OpenSSL include and lib
-  --with-ssl              use SSL (default)
-  --without-ssl           don't use SSL])
+AS_HELP_STRING([--with-ssl=PATH], [specify location of OpenSSL include and lib])
+AS_HELP_STRING([--with-ssl], [use SSL (default)])
+AS_HELP_STRING([--without-ssl], [don't use SSL]))
 
 AC_ARG_ENABLE(dynamic-ssl-lib,
-[  --enable-dynamic-ssl-lib        force using dynamic openssl libraries
-  --disable-dynamic-ssl-lib       disable using dynamic openssl libraries])
+AS_HELP_STRING([--disable-dynamic-ssl-lib],
+               [disable using dynamic openssl libraries]))
 
 AC_ARG_ENABLE(shared-zlib,
-[  --enable-shared-zlib        enable using shared zlib library
-  --disable-shared-zlib       disable shared zlib, compile own zlib source (default)])
+AS_HELP_STRING([--enable-shared-zlib], [enable using shared zlib library]))
 
 dnl This functionality has been lost along the way... :(
 dnl It could perhaps be nice to reintroduce some day; therefore,
@@ -256,7 +266,8 @@ dnl   esac ], erl_mandir='$(erlang_libdir)/man')
 dnl AC_SUBST(erl_mandir)
 
 AC_ARG_ENABLE(darwin-universal,
-[  --enable-darwin-universal    build universal binaries on darwin i386],
+AS_HELP_STRING([--enable-darwin-universal],
+               [build universal binaries on darwin i386]),
 [ case "$enableval" in
     no) enable_darwin_universal=no ;;
     *)  enable_darwin_univeral=yes ;;
@@ -265,7 +276,7 @@ AC_ARG_ENABLE(darwin-universal,
 
 
 AC_ARG_ENABLE(darwin-64bit,
-[  --enable-darwin-64bit    build 64bit binaries on darwin],
+AS_HELP_STRING([--enable-darwin-64bit], [build 64bit binaries on darwin]),
 [ case "$enableval" in
     no) enable_darwin_64bit=no ;;
     *)  enable_darwin_64bit=yes ;;
@@ -273,7 +284,8 @@ AC_ARG_ENABLE(darwin-64bit,
 ],enable_darwin_64bit=no)
 
 AC_ARG_ENABLE(m64-build,
-[  --enable-m64-build    build 64bit binaries using the -m64 flag to (g)cc],
+AS_HELP_STRING([--enable-m64-build],
+               [build 64bit binaries using the -m64 flag to (g)cc]),
 [ case "$enableval" in
     no) enable_m64_build=no ;;
     *)  enable_m64_build=yes ;;
@@ -281,7 +293,8 @@ AC_ARG_ENABLE(m64-build,
 ],enable_m64_build=no)
 
 AC_ARG_ENABLE(m32-build,
-[  --enable-m32-build    build 32bit binaries using the -m32 flag to (g)cc],
+AS_HELP_STRING([--enable-m32-build],
+               [build 32bit binaries using the -m32 flag to (g)cc]),
 [ case "$enableval" in
     no) enable_m32_build=no ;;
     *)
@@ -293,6 +306,14 @@ AC_ARG_ENABLE(m32-build,
   esac
 ],enable_m32_build=no)
 
+AC_ARG_ENABLE(ethread-pre-pentium4-compatibility,
+	      AS_HELP_STRING([--enable-ethread-pre-pentium4-compatibility],
+			     [enable compatibility with x86 processors before pentium 4 (back to 486) in the ethread library]))
+
+AC_ARG_WITH(libatomic_ops,
+	    AS_HELP_STRING([--with-libatomic_ops=PATH],
+			   [specify and prefer usage of libatomic_ops in the ethread library]))
+
 dnl OK, we might have darwin switches off different kinds, lets 
 dnl check it all before continuing.
 TMPSYS=`uname -s`-`uname -m`
diff --git a/erts/aclocal.m4 b/erts/aclocal.m4
index 0d7914fbb8..443d8622bf 100644
--- a/erts/aclocal.m4
+++ b/erts/aclocal.m4
@@ -1004,8 +1004,8 @@ case "$THR_LIB_NAME" in
 
 	case "$host_cpu" in
 	  sun4u | sparc64 | sun4v)
-		ethr_have_native_atomics=yes;;
-	  i86pc | i386 | i486 | i586 | i686 | x86_64 | amd64)
+		ethr_have_native_atomics=yes;; 
+	  i86pc | i*86 | x86_64 | amd64)
 		ethr_have_native_atomics=yes;;
 	  macppc | ppc | "Power Macintosh")
 		ethr_have_native_atomics=yes;;
@@ -1100,7 +1100,7 @@ test "X$disable_native_ethr_impls" = "Xyes" &&
 
 AC_ARG_ENABLE(prefer-gcc-native-ethr-impls,
 	      AS_HELP_STRING([--enable-prefer-gcc-native-ethr-impls],
-			     [enable prefer gcc native ethread implementations]),
+			     [prefer gcc native ethread implementations]),
 [ case "$enableval" in
     yes) enable_prefer_gcc_native_ethr_impls=yes ;;
     *)  enable_prefer_gcc_native_ethr_impls=no ;;
@@ -1109,21 +1109,60 @@ AC_ARG_ENABLE(prefer-gcc-native-ethr-impls,
 test $enable_prefer_gcc_native_ethr_impls = yes &&
   AC_DEFINE(ETHR_PREFER_GCC_NATIVE_IMPLS, 1, [Define if you prefer gcc native ethread implementations])
 
+AC_ARG_WITH(libatomic_ops,
+	    AS_HELP_STRING([--with-libatomic_ops=PATH],
+			   [specify and prefer usage of libatomic_ops in the ethread library]))
+
 AC_ARG_ENABLE(ethread-pre-pentium4-compatibility,
 	      AS_HELP_STRING([--enable-ethread-pre-pentium4-compatibility],
 			     [enable compatibility with x86 processors before pentium 4 (back to 486) in the ethread library]),
-[ case "$enableval" in
-    yes) enable_ethread_pre_pentium4_compatibility=yes ;;
-    *)  enable_ethread_pre_pentium4_compatibilit=no ;;
-  esac ], enable_ethread_pre_pentium4_compatibilit=no)
+[
+  case "$enable_ethread_pre_pentium4_compatibility" in
+    yes|no) ;;
+    *) enable_ethread_pre_pentium4_compatibility=check;;
+  esac
+],
+[enable_ethread_pre_pentium4_compatibility=check])
+
+test "$cross_compiling" != "yes" || enable_ethread_pre_pentium4_compatibility=no
+
+case "$enable_ethread_pre_pentium4_compatibility-$host_cpu" in
+  check-i86pc | check-i*86)
+    AC_MSG_CHECKING([whether pre pentium 4 compatibility should forced])
+    AC_RUN_IFELSE([
+#if defined(__GNUC__)
+#  if defined(ETHR_PREFER_LIBATOMIC_OPS_NATIVE_IMPLS)
+#    define CHECK_LIBATOMIC_OPS__
+#  else
+#    define CHECK_GCC_ASM__
+#  endif
+#elif defined(ETHR_HAVE_LIBATOMIC_OPS)
+#  define CHECK_LIBATOMIC_OPS__
+#endif
+#if defined(CHECK_LIBATOMIC_OPS__)
+#include "atomic_ops.h"
+#endif
+int main(void)
+{
+#if defined(CHECK_GCC_ASM__)
+    __asm__ __volatile__("mfence" : : : "memory");
+#elif defined(CHECK_LIBATOMIC_OPS__)
+    AO_nop_full();
+#endif
+    return 0;
+}
+	],
+	[enable_ethread_pre_pentium4_compatibility=no],
+	[enable_ethread_pre_pentium4_compatibility=yes],
+	[enable_ethread_pre_pentium4_compatibility=no])
+    AC_MSG_RESULT([$enable_ethread_pre_pentium4_compatibility]);;
+  *)
+    ;;
+esac
 
-test $enable_ethread_pre_pentium4_compatibilit = yes &&
+test $enable_ethread_pre_pentium4_compatibility = yes &&
   AC_DEFINE(ETHR_PRE_PENTIUM4_COMPAT, 1, [Define if you want compatibilty with x86 processors before pentium4.])
 
-AC_ARG_WITH(libatomic_ops,
-	    AS_HELP_STRING([--with-libatomic_ops=PATH],
-			   [use libatomic_ops with the ethread library]))
-
 AC_DEFINE(ETHR_HAVE_ETHREAD_DEFINES, 1, \
 [Define if you have all ethread defines])
 
diff --git a/erts/configure.in b/erts/configure.in
index c279161a29..8d629c25ae 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -110,7 +110,8 @@ ENABLE_ALLOC_TYPE_VARS=
 AC_SUBST(ENABLE_ALLOC_TYPE_VARS)
 
 AC_ARG_ENABLE(bootstrap-only,
-[  --enable-bootstrap-only enable bootstrap only configuration],
+AS_HELP_STRING([--enable-bootstrap-only],
+               [enable bootstrap only configuration]),
 [ if test "X$enableval" = "Xyes"; then
 	# Disable stuff not necessary in a bootstrap only system in order
 	# to speed up things by reducing the amount of stuff needing to be
@@ -126,46 +127,46 @@ AC_ARG_ENABLE(bootstrap-only,
 ])
 
 AC_ARG_ENABLE(threads,
-[  --enable-threads        enable async thread support
-  --disable-threads       disable async thread support],
+AS_HELP_STRING([--enable-threads], [enable async thread support])
+AS_HELP_STRING([--disable-threads], [disable async thread support]),
 [ case "$enableval" in
     no) enable_threads=no ;;
     *)  enable_threads=yes ;;
   esac ], enable_threads=unknown)
 
 AC_ARG_ENABLE(halfword-emulator,
-[  --enable-halfword-emulator        enable halfword emulator (only for 64bit builds)
-  --disable-halfword-emulator        disable halfword emulator (only for 64bit builds)],
+AS_HELP_STRING([--enable-halfword-emulator],
+               [enable halfword emulator (only for 64bit builds)]),
 [ case "$enableval" in
     no) enable_halfword_emualtor=no ;;
     *)  enable_halfword_emulator=yes ;;
   esac ], enable_halfword_emulator=unknown)
 
 AC_ARG_ENABLE(smp-support,
-[  --enable-smp-support    enable smp support
-  --disable-smp-support   disable smp support],
+AS_HELP_STRING([--enable-smp-support], [enable smp support])
+AS_HELP_STRING([--disable-smp-support], [disable smp support]),
 [ case "$enableval" in
     no) enable_smp_support=no ;;
     *)  enable_smp_support=yes ;;
   esac ], enable_smp_support=unknown)
 
 AC_ARG_WITH(termcap,
-[  --with-termcap          use termcap (default)
-  --without-termcap       do not use any termcap libraries (ncurses,curses,termcap,termlib)],
+AS_HELP_STRING([--with-termcap], [use termcap (default)])
+AS_HELP_STRING([--without-termcap],
+               [do not use any termcap libraries (ncurses,curses,termcap,termlib)]),
 [],
 [with_termcap=yes])
 
 
 AC_ARG_ENABLE(hybrid-heap,
-[  --enable-hybrid-heap    enable hybrid heap
-  --disable-hybrid-heap   disable hybrid heap],
+AS_HELP_STRING([--enable-hybrid-heap], [enable hybrid heap]),
 [ case "$enableval" in
     no) enable_hybrid_heap=no ;;
     *)  enable_hybrid_heap=yes ;;
   esac ], enable_hybrid_heap=unknown)
 
 AC_ARG_ENABLE(lock-checking,
-[  --enable-lock-checking  enable lock checking],
+AS_HELP_STRING([--enable-lock-checking], [enable lock checking]),
 [ case "$enableval" in
     no) enable_lock_check=no ;;
     *)  enable_lock_check=yes ;;
@@ -174,16 +175,15 @@ AC_ARG_ENABLE(lock-checking,
   enable_lock_check=no)
 
 AC_ARG_ENABLE(lock-counter,
-[  --enable-lock-counter    enable lock counters
-  --disable-lock-counter   disable lock counters],
+AS_HELP_STRING([--enable-lock-counter], [enable lock counters]),
 [ case "$enableval" in
     no) enable_lock_count=no ;;
     *)  enable_lock_count=yes ;;
   esac ], enable_lock_count=no)
 
 AC_ARG_ENABLE(kernel-poll,
-[  --enable-kernel-poll    enable kernel poll support
-  --disable-kernel-poll   disable kernel poll support],
+AS_HELP_STRING([--enable-kernel-poll], [enable kernel poll support])
+AS_HELP_STRING([--disable-kernel-poll], [disable kernel poll support]),
 [ case "$enableval" in
     no) enable_kernel_poll=no ;;
     *)  enable_kernel_poll=yes ;;
@@ -191,25 +191,27 @@ AC_ARG_ENABLE(kernel-poll,
 
 
 AC_ARG_ENABLE(sctp,
-[  --enable-sctp           enable sctp support
-  --disable-sctp          disable sctp support],
+AS_HELP_STRING([--enable-sctp], [enable sctp support])
+AS_HELP_STRING([--disable-sctp], [disable sctp support]),
 [ case "$enableval" in
     no) enable_sctp=no ;;
     *)  enable_sctp=yes ;;
   esac ], enable_sctp=unknown)
 
 AC_ARG_ENABLE(hipe,
-[  --enable-hipe           enable hipe support
-  --disable-hipe          disable hipe support])
+AS_HELP_STRING([--enable-hipe], [enable hipe support])
+AS_HELP_STRING([--disable-hipe], [disable hipe support]))
 
 AC_ARG_ENABLE(native-libs,
-[  --enable-native-libs    compile Erlang libraries to native code])
+AS_HELP_STRING([--enable-native-libs],
+               [compile Erlang libraries to native code]))
 
 AC_ARG_ENABLE(tsp,
-[  --enable-tsp            compile tsp app])
+AS_HELP_STRING([--enable-tsp], [compile tsp app]))
 
 AC_ARG_ENABLE(fp-exceptions,
-[  --enable-fp-exceptions    Use hardware floating point exceptions (default if hipe enabled)],
+AS_HELP_STRING([--enable-fp-exceptions],
+               [use hardware floating point exceptions (default if hipe enabled)]),
 [ case "$enableval" in
     no) enable_fp_exceptions=no ;;
     *)  enable_fp_exceptions=yes ;;
@@ -217,7 +219,8 @@ AC_ARG_ENABLE(fp-exceptions,
 ],enable_fp_exceptions=auto)
 
 AC_ARG_ENABLE(darwin-universal,
-[  --enable-darwin-universal    build universal binaries on darwin i386],
+AS_HELP_STRING([--enable-darwin-universal],
+               [build universal binaries on darwin i386]),
 [ case "$enableval" in
     no) enable_darwin_universal=no ;;
     *)  enable_darwin_univeral=yes ;;
@@ -226,7 +229,7 @@ AC_ARG_ENABLE(darwin-universal,
 
 
 AC_ARG_ENABLE(darwin-64bit,
-[  --enable-darwin-64bit    build 64bit binaries on darwin],
+AS_HELP_STRING([--enable-darwin-64bit], [build 64bit binaries on darwin]),
 [ case "$enableval" in
     no) enable_darwin_64bit=no ;;
     *)  enable_darwin_64bit=yes ;;
@@ -234,7 +237,8 @@ AC_ARG_ENABLE(darwin-64bit,
 ],enable_darwin_64bit=no)
 
 AC_ARG_ENABLE(m64-build,
-[  --enable-m64-build    build 64bit binaries using the -m64 flag to (g)cc],
+AS_HELP_STRING([--enable-m64-build],
+               [build 64bit binaries using the -m64 flag to (g)cc]),
 [ case "$enableval" in
     no) enable_m64_build=no ;;
     *)  enable_m64_build=yes ;;
@@ -242,7 +246,8 @@ AC_ARG_ENABLE(m64-build,
 ],enable_m64_build=no)
 
 AC_ARG_ENABLE(m32-build,
-[  --enable-m32-build    build 32bit binaries using the -m32 flag to (g)cc],
+AS_HELP_STRING([--enable-m32-build],
+               [build 32bit binaries using the -m32 flag to (g)cc]),
 [ case "$enableval" in
     no) enable_m32_build=no ;;
     *)
@@ -255,7 +260,7 @@ AC_ARG_ENABLE(m32-build,
 ],enable_m32_build=no)
 
 AC_ARG_ENABLE(fixalloc,
-[  --disable-fixalloc      disable the use of fix_alloc])
+AS_HELP_STRING([--disable-fixalloc], [disable the use of fix_alloc]))
 if test x${enable_fixalloc} = xno ; then
   AC_DEFINE(NO_FIX_ALLOC,[],
 	    [Define if you don't want the fix allocator in Erlang])
@@ -263,8 +268,9 @@ fi
 
 AC_SUBST(PERFCTR_PATH)
 AC_ARG_WITH(perfctr,
-[  --with-perfctr=PATH     specify location of perfctr include and lib
-  --without-perfctr       don't use perfctr (default)])
+AS_HELP_STRING([--with-perfctr=PATH],
+               [specify location of perfctr include and lib])
+AS_HELP_STRING([--without-perfctr], [don't use perfctr (default)]))
 
 if test "x$with_perfctr" = "xno" -o "x$with_perfctr" = "x" ; then
     PERFCTR_PATH=
@@ -278,7 +284,8 @@ else
 fi
 
 AC_ARG_ENABLE(clock-gettime,
-[  --enable-clock-gettime  Use clock-gettime for time correction],
+AS_HELP_STRING([--enable-clock-gettime],
+               [use clock-gettime for time correction]),
 [ case "$enableval" in
     no) clock_gettime_correction=no ;;
     *) clock_gettime_correction=yes ;;
@@ -1293,8 +1300,7 @@ dnl zlib
 dnl -------------
 
 AC_ARG_ENABLE(shared-zlib,
-[  --enable-shared-zlib        enable using shared zlib library
-  --disable-shared-zlib       disable shared zlib, compile own zlib source (default)],
+AS_HELP_STRING([--enable-shared-zlib], [enable using shared zlib library]),
 [ case "$enableval" in
     no) enable_shared_zlib=no ;;
     *)  enable_shared_zlib=yes ;;
@@ -1852,6 +1858,27 @@ if test $processor_bind_functionality = yes; then
 	AC_DEFINE(HAVE_PROCESSOR_BIND, 1, [Define if you have processor_bind functionality])
 fi
 
+AC_MSG_CHECKING([for cpuset_getaffinity/cpuset_setaffinity])
+AC_TRY_COMPILE([
+#include <sys/param.h>
+#include <sys/cpuset.h>
+],
+[
+	int res;
+	cpuset_t cpuset;
+	CPU_ZERO(&cpuset);
+	CPU_SET(1, &cpuset);
+	res = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset_t), &cpuset);
+	res = cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset_t), &cpuset);
+	res = CPU_ISSET(1, &cpuset);
+	CPU_CLR(1, &cpuset);
+],
+		cpuset_xetaffinity=yes,
+		cpuset_xetaffinity=no)
+AC_MSG_RESULT([$cpuset_xetaffinity])
+if test $cpuset_xetaffinity = yes; then
+	AC_DEFINE(HAVE_CPUSET_xETAFFINITY, 1, [Define if you have cpuset_getaffinity/cpuset_setaffinity])
+fi
 
 AC_CACHE_CHECK([for 'end' symbol],
 		erts_cv_have_end_symbol,
@@ -3431,9 +3458,12 @@ AC_SUBST(STATIC_ZLIB_LIBS)
 std_ssl_locations="/usr/local /usr/sfw /opt/local /usr /usr/pkg /usr/local/openssl /usr/lib/openssl /usr/openssl /usr/local/ssl /usr/lib/ssl /usr/ssl"
 
 AC_ARG_WITH(ssl-zlib,
-[  --with-ssl-zlib=PATH    specify location of ZLib to be used by OpenSSL
-  --with-ssl-zlib         link SSL with  Zlib (default if found)
-  --without-ssl-zlib      don't link SSL with ZLib])
+AS_HELP_STRING([--with-ssl-zlib=PATH],
+               [specify location of ZLib to be used by OpenSSL])
+AS_HELP_STRING([--with-ssl-zlib],
+               [link SSL with  Zlib (default if found)])
+AS_HELP_STRING([--without-ssl-zlib],
+               [don't link SSL with ZLib]))
 
 
 if  test "x$with_ssl_zlib" = "xno"; then
@@ -3502,13 +3532,13 @@ fi
 		
 			
 AC_ARG_WITH(ssl,
-[  --with-ssl=PATH         specify location of OpenSSL include and lib
-  --with-ssl              use SSL (default)
-  --without-ssl           don't use SSL])
+AS_HELP_STRING([--with-ssl=PATH], [specify location of OpenSSL include and lib])
+AS_HELP_STRING([--with-ssl], [use SSL (default)])
+AS_HELP_STRING([--without-ssl], [don't use SSL]))
 
 AC_ARG_ENABLE(dynamic-ssl-lib,
-[  --enable-dynamic-ssl-lib        enable using dynamic openssl libraries
-  --disable-dynamic-ssl-lib       disable using dynamic openssl libraries],
+AS_HELP_STRING([--disable-dynamic-ssl-lib],
+               [disable using dynamic openssl libraries]),
 [ case "$enableval" in
     no) enable_dynamic_ssl=no ;;
     *)  enable_dynamic_ssl=yes ;;
@@ -3971,9 +4001,9 @@ esac
 
 			
 AC_ARG_WITH(javac,
-[  --with-javac=JAVAC      specify Java compiler to use
-  --with-javac            use a Java compiler if found (default)
-  --without-javac         don't use any Java compiler])
+AS_HELP_STRING([--with-javac=JAVAC], [specify Java compiler to use])
+AS_HELP_STRING([--with-javac], [use a Java compiler if found (default)])
+AS_HELP_STRING([--without-javac], [don't use any Java compiler]))
 
 dnl
 dnl Then there are a number of apps which needs a java compiler...
diff --git a/erts/doc/src/epmd.xml b/erts/doc/src/epmd.xml
index f01cf90a36..474230cb38 100644
--- a/erts/doc/src/epmd.xml
+++ b/erts/doc/src/epmd.xml
@@ -119,7 +119,7 @@
     <tag><c><![CDATA[-port No]]></c></tag>
     <item>
       <p>Let this instance of epmd listen to another TCP port than
-      default 4369. This can be also be set using the
+      default 4369. This can also be set using the
       <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the
       section <seealso marker="#environment_variables">Environment
       variables</seealso> below</p>
@@ -186,7 +186,7 @@
     <tag><c><![CDATA[-port No]]></c></tag>
     <item>
       <p>Contacts the <c>epmd</c> listening on the given TCP port number
-      (default 4369). This can be also be set using the
+      (default 4369). This can also be set using the
       <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the
       section <seealso marker="#environment_variables">Environment
       variables</seealso> below</p>
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index 09c9cf6812..a1d73fb698 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -686,7 +686,7 @@
 	      </p></item>
 	    </taglist>
 	    <p>Binding of schedulers is currently only supported on newer
-	       Linux, Solaris, and Windows systems.</p>
+	       Linux, Solaris, FreeBSD, and Windows systems.</p>
 	    <p>If no CPU topology is available when the <c>+sbt</c> flag
 	       is processed and <c>BindType</c> is any other type than
 	       <c>u</c>, the runtime system will fail to start. CPU
@@ -914,7 +914,7 @@
           <item>
             <p>Set the distribution buffer busy limit
 	    (<seealso marker="erlang#system_info_dist_buf_busy_limit">dist_buf_busy_limit</seealso>)
-	    in kilobytes. Valid range is 1-2097151. Default is 128.</p>
+	    in kilobytes. Valid range is 1-2097151. Default is 1024.</p>
             <p>A larger buffer limit will allow processes to buffer
             more outgoing messages over the distribution. When the
             buffer limit has been reached, sending processes will be
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 25c92bdbb7..638f7eef10 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -5178,7 +5178,7 @@ true</pre>
 	       <seealso marker="#system_info_scheduler_bindings">erlang:system_info(scheduler_bindings)</seealso>.
 	    </p>
             <p>Schedulers can currently only be bound on newer Linux,
-	       Solaris, and Windows systems, but more systems will be
+	       Solaris, FreeBSD, and Windows systems, but more systems will be
 	       supported in the future.
 	    </p>
             <p>In order for the runtime system to be able to bind schedulers,
@@ -5559,7 +5559,7 @@ true</pre>
           <item>
             <p>Returns the automatically detected <c>CpuTopology</c>. The
 	       emulator currently only detects the CPU topology on some newer
-	       Linux, Solaris, and Windows systems. On Windows system with
+	       Linux, Solaris, FreeBSD, and Windows systems. On Windows system with
 	       more than 32 logical processors the CPU topology is not detected.
 	    </p>
             <p>For more information see the documentation of the
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 76d782b159..4ed0ccabc6 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -734,7 +734,7 @@ RUN_OBJS = \
 	$(OBJDIR)/erl_fun.o             $(OBJDIR)/erl_bif_port.o \
 	$(OBJDIR)/erl_term.o 		$(OBJDIR)/erl_node_tables.o \
 	$(OBJDIR)/erl_monitors.o	$(OBJDIR)/erl_process_dump.o \
-	$(OBJDIR)/erl_bif_timer.o \
+	$(OBJDIR)/erl_bif_timer.o	$(OBJDIR)/erl_cpu_topology.o \
 	$(OBJDIR)/erl_drv_thread.o      $(OBJDIR)/erl_bif_chksum.o \
 	$(OBJDIR)/erl_bif_re.o		$(OBJDIR)/erl_unicode.o \
 	$(OBJDIR)/packet_parser.o	$(OBJDIR)/safe_hash.o \
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index 4497e17d79..694460d702 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -162,7 +162,7 @@ Uint erts_dist_cache_size(void)
 static ErtsProcList *
 get_suspended_on_de(DistEntry *dep, Uint32 unset_qflgs)
 {
-    ERTS_SMP_LC_ASSERT(erts_smp_lc_spinlock_is_locked(&dep->qlock));
+    ERTS_SMP_LC_ASSERT(erts_smp_lc_mtx_is_locked(&dep->qlock));
     dep->qflgs &= ~unset_qflgs;
     if (dep->qflgs & ERTS_DE_QFLG_EXIT) {
 	/* No resume when exit has been scheduled */
@@ -455,17 +455,17 @@ int erts_do_net_exits(DistEntry *dep, Eterm reason)
 
 	if (dep->status & ERTS_DE_SFLG_EXITING) {
 #ifdef DEBUG
-	    erts_smp_spin_lock(&dep->qlock);
+	    erts_smp_mtx_lock(&dep->qlock);
 	    ASSERT(dep->qflgs & ERTS_DE_QFLG_EXIT);
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
 #endif
 	}
 	else {
 	    dep->status |= ERTS_DE_SFLG_EXITING;
-	    erts_smp_spin_lock(&dep->qlock);
+	    erts_smp_mtx_lock(&dep->qlock);
 	    ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT));
 	    dep->qflgs |= ERTS_DE_QFLG_EXIT;
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
 	}
 
 	erts_smp_de_links_lock(dep);
@@ -579,7 +579,7 @@ static void clear_dist_entry(DistEntry *dep)
     erts_smp_de_links_unlock(dep);
 #endif
 
-    erts_smp_spin_lock(&dep->qlock);
+    erts_smp_mtx_lock(&dep->qlock);
 
     if (!dep->out_queue.last)
 	obuf = dep->finalized_out_queue.first;
@@ -595,7 +595,7 @@ static void clear_dist_entry(DistEntry *dep)
     dep->status = 0;
     suspendees = get_suspended_on_de(dep, ERTS_DE_QFLGS_ALL);
 
-    erts_smp_spin_unlock(&dep->qlock);
+    erts_smp_mtx_unlock(&dep->qlock);
     erts_smp_atomic_set(&dep->dist_cmd_scheduled, 0);
     dep->send = NULL;
     erts_smp_de_rwunlock(dep);
@@ -613,10 +613,10 @@ static void clear_dist_entry(DistEntry *dep)
     }
 
     if (obufsize) {
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	ASSERT(dep->qsize >= obufsize);
 	dep->qsize -= obufsize;
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
     }
 }
 
@@ -1538,18 +1538,18 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy)
     }
     else {
 	ErtsProcList *plp = NULL;
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	dep->qsize += size_obuf(obuf);
 	if (dep->qsize >= erts_dist_buf_busy_limit)
 	    dep->qflgs |= ERTS_DE_QFLG_BUSY;
 	if (!force_busy && (dep->qflgs & ERTS_DE_QFLG_BUSY)) {
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
 
 	    plp = erts_proclist_create(c_p);
 	    plp->next = NULL;
 	    erts_suspend(c_p, ERTS_PROC_LOCK_MAIN, NULL);
 	    suspended = 1;
-	    erts_smp_spin_lock(&dep->qlock);
+	    erts_smp_mtx_lock(&dep->qlock);
 	}
 
 	/* Enqueue obuf on dist entry */
@@ -1575,7 +1575,7 @@ dsig_send(ErtsDSigData *dsdp, Eterm ctl, Eterm msg, int force_busy)
 	    }
 	}
 
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
 	erts_schedule_dist_command(NULL, dep);
 	erts_smp_de_runlock(dep);
 	
@@ -1708,10 +1708,8 @@ erts_dist_command(Port *prt, int reds_limit)
 {
     Sint reds = ERTS_PORT_REDS_DIST_CMD_START;
     int prt_busy;
-    int de_busy;
     Uint32 status;
     Uint32 flags;
-    Uint32 qflgs;
     Sint obufsize = 0;
     ErtsDistOutputQueue oq, foq;
     DistEntry *dep = prt->dist_entry;
@@ -1746,13 +1744,12 @@ erts_dist_command(Port *prt, int reds_limit)
      * a mess.
      */
 
-    erts_smp_spin_lock(&dep->qlock);
+    erts_smp_mtx_lock(&dep->qlock);
     oq.first = dep->out_queue.first;
     oq.last = dep->out_queue.last;
     dep->out_queue.first = NULL;
     dep->out_queue.last = NULL;
-    qflgs = dep->qflgs;
-    erts_smp_spin_unlock(&dep->qlock);
+    erts_smp_mtx_unlock(&dep->qlock);
 
     foq.first = dep->finalized_out_queue.first;
     foq.last = dep->finalized_out_queue.last;
@@ -1763,17 +1760,8 @@ erts_dist_command(Port *prt, int reds_limit)
 	goto preempted;
 
     prt_busy = (int) (prt->status & ERTS_PORT_SFLG_PORT_BUSY);
-    de_busy = (int) (qflgs & ERTS_DE_QFLG_BUSY);
 
-    if (prt_busy) {
-	if (!de_busy) {
-	    erts_smp_spin_lock(&dep->qlock);
-	    dep->qflgs |= ERTS_DE_QFLG_BUSY;
-	    erts_smp_spin_unlock(&dep->qlock);
-	    de_busy = 1;
-	}
-    }
-    else if (foq.first) {
+    if (!prt_busy && foq.first) {
 	int preempt = 0;
 	do {
 	    Uint size;
@@ -1791,10 +1779,7 @@ erts_dist_command(Port *prt, int reds_limit)
 	    free_dist_obuf(fob);
 	    preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD);
 	    if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) {
-		erts_smp_spin_lock(&dep->qlock);
-		dep->qflgs |= ERTS_DE_QFLG_BUSY;
-		erts_smp_spin_unlock(&dep->qlock);
-		de_busy = prt_busy = 1;
+		prt_busy = 1;
 		break;
 	    }
 	} while (foq.first && !preempt);
@@ -1877,10 +1862,7 @@ erts_dist_command(Port *prt, int reds_limit)
 	    free_dist_obuf(fob);
 	    preempt = reds > reds_limit || (prt->status & ERTS_PORT_SFLGS_DEAD);
 	    if (prt->status & ERTS_PORT_SFLG_PORT_BUSY) {
-		erts_smp_spin_lock(&dep->qlock);
-		dep->qflgs |= ERTS_DE_QFLG_BUSY;
-		erts_smp_spin_unlock(&dep->qlock);
-		de_busy = prt_busy = 1;
+		prt_busy = 1;
 		if (oq.first && !preempt)
 		    goto finalize_only;
 	    }
@@ -1907,22 +1889,23 @@ erts_dist_command(Port *prt, int reds_limit)
 	 * dist entry in a non-busy state and resume suspended
 	 * processes.
 	 */
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	ASSERT(dep->qsize >= obufsize);
 	dep->qsize -= obufsize;
 	obufsize = 0;
-	if (de_busy && !prt_busy && dep->qsize < erts_dist_buf_busy_limit) {
+	if (!prt_busy
+	    && (dep->qflgs & ERTS_DE_QFLG_BUSY)
+	    && dep->qsize < erts_dist_buf_busy_limit) {
 	    ErtsProcList *suspendees;
 	    int resumed;
 	    suspendees = get_suspended_on_de(dep, ERTS_DE_QFLG_BUSY);
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
 
 	    resumed = erts_resume_processes(suspendees);
 	    reds += resumed*ERTS_PORT_REDS_DIST_CMD_RESUMED;
-	    de_busy = 0;
 	}
 	else
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
     }
 
     ASSERT(!oq.first && !oq.last);
@@ -1931,10 +1914,10 @@ erts_dist_command(Port *prt, int reds_limit)
 
     if (obufsize != 0) {
 	ASSERT(obufsize > 0);
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	ASSERT(dep->qsize >= obufsize);
 	dep->qsize -= obufsize;
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
     }
 
     ASSERT(foq.first || !foq.last);
@@ -1984,9 +1967,9 @@ erts_dist_command(Port *prt, int reds_limit)
 	foq.last = NULL;
 
 #ifdef DEBUG
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	ASSERT(dep->qsize == obufsize);
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
 #endif
     }
     else {
@@ -1995,14 +1978,14 @@ erts_dist_command(Port *prt, int reds_limit)
 	     * Unhandle buffers need to be put back first
 	     * in out_queue.
 	     */
-	    erts_smp_spin_lock(&dep->qlock);
+	    erts_smp_mtx_lock(&dep->qlock);
 	    dep->qsize -= obufsize;
 	    obufsize = 0;
 	    oq.last->next = dep->out_queue.first;
 	    dep->out_queue.first = oq.first;
 	    if (!dep->out_queue.last)
 		dep->out_queue.last = oq.last;
-	    erts_smp_spin_unlock(&dep->qlock);
+	    erts_smp_mtx_unlock(&dep->qlock);
 	}
 
 	erts_schedule_dist_command(prt, NULL);
@@ -2026,10 +2009,10 @@ erts_kill_dist_connection(DistEntry *dep, Uint32 connection_id)
 
 	dep->status |= ERTS_DE_SFLG_EXITING;
 
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	ASSERT(!(dep->qflgs & ERTS_DE_QFLG_EXIT));
 	dep->qflgs |= ERTS_DE_QFLG_EXIT;
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
 
 	erts_schedule_dist_command(NULL, dep);
     }
@@ -2400,13 +2383,13 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3)
 	ErtsProcList *plp = erts_proclist_create(BIF_P);
 	plp->next = NULL;
 	erts_suspend(BIF_P, ERTS_PROC_LOCK_MAIN, NULL);
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	if (dep->suspended.last)
 	    dep->suspended.last->next = plp;
 	else
 	    dep->suspended.first = plp;
 	dep->suspended.last = plp;
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
 	goto yield;
     }
 
@@ -2434,9 +2417,9 @@ BIF_RETTYPE setnode_3(BIF_ALIST_3)
     ASSERT(dep->send);
 
 #ifdef DEBUG
-    erts_smp_spin_lock(&dep->qlock);
+    erts_smp_mtx_lock(&dep->qlock);
     ASSERT(dep->qsize == 0);
-    erts_smp_spin_unlock(&dep->qlock);
+    erts_smp_mtx_unlock(&dep->qlock);
 #endif
 
     erts_set_dist_entry_connected(dep, BIF_ARG_2, flags);
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index 28cdd05c3c..64caf34550 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -99,7 +99,7 @@ typedef struct {
 #define ERTS_DE_IS_CONNECTED(DEP) \
   (!ERTS_DE_IS_NOT_CONNECTED((DEP)))
 
-#define ERTS_DE_BUSY_LIMIT (128*1024)
+#define ERTS_DE_BUSY_LIMIT (1024*1024)
 extern int erts_dist_buf_busy_limit;
 extern int erts_is_alive;
 
@@ -154,10 +154,10 @@ erts_dsig_prepare(ErtsDSigData *dsdp,
     }
     if (no_suspend) {
 	failure = ERTS_DSIG_PREP_CONNECTED;
-	erts_smp_spin_lock(&dep->qlock);
+	erts_smp_mtx_lock(&dep->qlock);
 	if (dep->qflgs & ERTS_DE_QFLG_BUSY)
 	    failure = ERTS_DSIG_PREP_WOULD_SUSPEND;
-	erts_smp_spin_unlock(&dep->qlock);
+	erts_smp_mtx_unlock(&dep->qlock);
 	if (failure == ERTS_DSIG_PREP_WOULD_SUSPEND)
 	    goto fail;
     }
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 7df9f19af0..408ffd12f7 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -247,7 +247,7 @@ type	CPUDATA		LONG_LIVED	SYSTEM		cpu_data
 type	TMP_CPU_IDS	SHORT_LIVED	SYSTEM		tmp_cpu_ids
 type	EXT_TERM_DATA	SHORT_LIVED	PROCESSES	external_term_data
 type	ZLIB		STANDARD	SYSTEM		zlib
-type	RDR_GRPS_MAP	LONG_LIVED	SYSTEM		reader_groups_map
+type	CPU_GRPS_MAP	LONG_LIVED	SYSTEM		cpu_groups_map
 
 +if smp
 type	ASYNC		SHORT_LIVED	SYSTEM		async
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 801263ec26..89e3b3209c 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -38,6 +38,7 @@
 #include "erl_instrument.h"
 #include "dist.h"
 #include "erl_gc.h"
+#include "erl_cpu_topology.h"
 #ifdef HIPE
 #include "hipe_arch.h"
 #endif
@@ -1687,6 +1688,8 @@ info_1_tuple(Process* BIF_P,	/* Pointer to current process. */
 	return erts_get_cpu_topology_term(BIF_P, *tp);
     } else if (ERTS_IS_ATOM_STR("cpu_topology", sel) && arity == 2) {
 	Eterm res = erts_get_cpu_topology_term(BIF_P, *tp);
+	if (res == THE_NON_VALUE)
+	    goto badarg;
 	ERTS_BIF_PREP_TRAP1(ret, erts_format_cpu_topology_trap, BIF_P, res);
 	return ret;
 #if defined(PURIFY) || defined(VALGRIND)
@@ -2345,9 +2348,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
     /* Arguments that are unusual follow ... */
     else if (ERTS_IS_ATOM_STR("logical_processors", BIF_ARG_1)) {
 	int no;
-	erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-	no = erts_get_cpu_configured(erts_cpuinfo);
-	erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+	erts_get_logical_processors(&no, NULL, NULL);
 	if (no > 0)
 	    BIF_RET(make_small((Uint) no));
 	else {
@@ -2357,9 +2358,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
     }
     else if (ERTS_IS_ATOM_STR("logical_processors_online", BIF_ARG_1)) {
 	int no;
-	erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-	no = erts_get_cpu_online(erts_cpuinfo);
-	erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+	erts_get_logical_processors(NULL, &no, NULL);
 	if (no > 0)
 	    BIF_RET(make_small((Uint) no));
 	else {
@@ -2369,9 +2368,7 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
     }
     else if (ERTS_IS_ATOM_STR("logical_processors_available", BIF_ARG_1)) {
 	int no;
-	erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-	no = erts_get_cpu_available(erts_cpuinfo);
-	erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+	erts_get_logical_processors(NULL, NULL, &no);
 	if (no > 0)
 	    BIF_RET(make_small((Uint) no));
 	else {
diff --git a/erts/emulator/beam/erl_cpu_topology.c b/erts/emulator/beam/erl_cpu_topology.c
new file mode 100644
index 0000000000..db95c4a5d4
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.c
@@ -0,0 +1,2359 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description:	CPU topology and related functionality
+ *
+ * Author: 	Rickard Green
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include "config.h"
+#endif
+
+#include <ctype.h>
+
+#include "global.h"
+#include "error.h"
+#include "bif.h"
+#include "erl_cpu_topology.h"
+
+#define ERTS_MAX_READER_GROUPS 8
+
+/*
+ * Cpu topology hierarchy.
+ */
+#define ERTS_TOPOLOGY_NODE		0
+#define ERTS_TOPOLOGY_PROCESSOR		1
+#define ERTS_TOPOLOGY_PROCESSOR_NODE	2
+#define ERTS_TOPOLOGY_CORE		3
+#define ERTS_TOPOLOGY_THREAD		4
+#define ERTS_TOPOLOGY_LOGICAL		5
+
+#define ERTS_TOPOLOGY_MAX_DEPTH		6
+
+typedef struct {
+    int bind_id;
+    int bound_id;
+} ErtsCpuBindData;
+
+static erts_cpu_info_t *cpuinfo;
+
+static int max_main_threads;
+static int reader_groups;
+
+static ErtsCpuBindData *scheduler2cpu_map;
+static erts_smp_rwmtx_t cpuinfo_rwmtx;
+
+typedef enum {
+    ERTS_CPU_BIND_UNDEFINED,
+    ERTS_CPU_BIND_SPREAD,
+    ERTS_CPU_BIND_PROCESSOR_SPREAD,
+    ERTS_CPU_BIND_THREAD_SPREAD,
+    ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
+    ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
+    ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
+    ERTS_CPU_BIND_NO_SPREAD,
+    ERTS_CPU_BIND_NONE
+} ErtsCpuBindOrder;
+
+#define ERTS_CPU_BIND_DEFAULT_BIND \
+  ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+
+static int no_cpu_groups_callbacks;
+static ErtsCpuBindOrder cpu_bind_order;
+
+static erts_cpu_topology_t *user_cpudata;
+static int user_cpudata_size;
+static erts_cpu_topology_t *system_cpudata;
+static int system_cpudata_size;
+
+typedef struct {
+    int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
+} erts_avail_cput;
+
+typedef struct {
+    int id;
+    int sub_levels;
+    int cpu_groups;
+} erts_cpu_groups_count_t;
+
+typedef struct {
+    int logical;
+    int cpu_group;
+} erts_cpu_groups_map_array_t;
+
+typedef struct erts_cpu_groups_callback_list_t_ erts_cpu_groups_callback_list_t;
+struct erts_cpu_groups_callback_list_t_ {
+    erts_cpu_groups_callback_list_t *next;
+    erts_cpu_groups_callback_t callback;
+    void *arg;
+};
+
+typedef struct erts_cpu_groups_map_t_ erts_cpu_groups_map_t;
+struct erts_cpu_groups_map_t_ {
+    erts_cpu_groups_map_t *next;
+    int groups;
+    erts_cpu_groups_map_array_t *array;
+    int size;
+    int logical_processors;
+    erts_cpu_groups_callback_list_t *callback_list;
+};
+
+typedef struct {
+    erts_cpu_groups_callback_t callback;
+    int ix;
+    void *arg;
+} erts_cpu_groups_callback_call_t;
+
+static erts_cpu_groups_map_t *cpu_groups_maps;
+
+static erts_cpu_groups_map_t *reader_groups_map;
+
+#define ERTS_TOPOLOGY_CG ERTS_TOPOLOGY_MAX_DEPTH
+
+#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
+
+#ifdef ERTS_SMP
+static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+				int size,
+				ErtsCpuBindOrder bind_order,
+				int mk_seq);
+static void write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
+#endif
+
+static void reader_groups_callback(int, ErtsSchedulerData *, int, void *);
+static erts_cpu_groups_map_t *add_cpu_groups(int groups,
+					     erts_cpu_groups_callback_t callback,
+					     void *arg);
+static void update_cpu_groups_maps(void);
+static void make_cpu_groups_map(erts_cpu_groups_map_t *map, int test);
+static int cpu_groups_lookup(erts_cpu_groups_map_t *map,
+			     ErtsSchedulerData *esdp);
+
+static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
+					 int *cpudata_size);
+static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
+
+static int
+int_cmp(const void *vx, const void *vy)
+{
+    return *((int *) vx) - *((int *) vy);
+}
+
+static int
+cpu_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->processor_node != y->processor_node)
+	return x->processor_node - y->processor_node;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    if (x->node != y->node)
+	return x->node - y->node;
+    return 0;
+}
+
+static int
+cpu_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->processor_node != y->processor_node)
+	return x->processor_node - y->processor_node;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    return 0;
+}
+
+static int
+cpu_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    if (x->processor_node != y->processor_node)
+	return x->processor_node - y->processor_node;
+    if (x->core != y->core)
+	return x->core - y->core;
+    return 0;
+}
+
+static int
+cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    return 0;
+}
+
+static int
+cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    return 0;
+}
+
+static int
+cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    if (x->core != y->core)
+	return x->core - y->core;
+    return 0;
+}
+
+static int
+cpu_no_spread_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    if (x->processor_node != y->processor_node)
+	return x->processor_node - y->processor_node;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    return 0;
+}
+
+static ERTS_INLINE void
+make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
+{
+    int ix;
+    int node = -1;
+    int processor = -1;
+    int processor_node = -1;
+    int processor_node_node = -1;
+    int core = -1;
+    int thread = -1;
+    int old_node = -1;
+    int old_processor = -1;
+    int old_processor_node = -1;
+    int old_core = -1;
+    int old_thread = -1;
+
+    for (ix = 0; ix < size; ix++) {
+	if (!no_node || cpudata[ix].node >= 0) {
+	    if (old_node == cpudata[ix].node)
+		cpudata[ix].node = node;
+	    else {
+		old_node = cpudata[ix].node;
+		old_processor = processor = -1;
+		if (!no_node)
+		    old_processor_node = processor_node = -1;
+		old_core = core = -1;
+		old_thread = thread = -1;
+		if (no_node || cpudata[ix].node >= 0)
+		    cpudata[ix].node = ++node;
+	    }
+	}
+	if (old_processor == cpudata[ix].processor)
+	    cpudata[ix].processor = processor;
+	else {
+	    old_processor = cpudata[ix].processor;
+	    if (!no_node)
+		processor_node_node = old_processor_node = processor_node = -1;
+	    old_core = core = -1;
+	    old_thread = thread = -1;
+	    cpudata[ix].processor = ++processor;
+	}
+	if (no_node && cpudata[ix].processor_node < 0)
+	    old_processor_node = -1;
+	else {
+	    if (old_processor_node == cpudata[ix].processor_node) {
+		if (no_node)
+		    cpudata[ix].node = cpudata[ix].processor_node = node;
+		else {
+		    if (processor_node_node >= 0)
+			cpudata[ix].node = processor_node_node;
+		    cpudata[ix].processor_node = processor_node;
+		}
+	    }
+	    else {
+		old_processor_node = cpudata[ix].processor_node;
+		old_core = core = -1;
+		old_thread = thread = -1;
+		if (no_node)
+		    cpudata[ix].node = cpudata[ix].processor_node = ++node;
+		else {
+		    cpudata[ix].node = processor_node_node = ++node;
+		    cpudata[ix].processor_node = ++processor_node;
+		}
+	    }
+	}
+	if (!no_node && cpudata[ix].processor_node < 0)
+	    cpudata[ix].processor_node = 0;
+	if (old_core == cpudata[ix].core)
+	    cpudata[ix].core = core;
+	else {
+	    old_core = cpudata[ix].core;
+	    old_thread = thread = -1;
+	    cpudata[ix].core = ++core;
+	}
+	if (old_thread == cpudata[ix].thread)
+	    cpudata[ix].thread = thread;
+	else
+	    old_thread = cpudata[ix].thread = ++thread;
+    }
+}
+
+static void
+cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
+		    int size,
+		    ErtsCpuBindOrder bind_order,
+		    int mk_seq)
+{
+    if (size > 1) {
+	int no_node = 0;
+	int (*cmp_func)(const void *, const void *);
+	switch (bind_order) {
+	case ERTS_CPU_BIND_SPREAD:
+	    cmp_func = cpu_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_PROCESSOR_SPREAD:
+	    cmp_func = cpu_processor_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_THREAD_SPREAD:
+	    cmp_func = cpu_thread_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
+	    no_node = 1;
+	    cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
+	    no_node = 1;
+	    cmp_func = cpu_no_node_processor_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
+	    no_node = 1;
+	    cmp_func = cpu_no_node_thread_spread_order_cmp;
+	    break;
+	case ERTS_CPU_BIND_NO_SPREAD:
+	    cmp_func = cpu_no_spread_order_cmp;
+	    break;
+	default:
+	    cmp_func = NULL;
+	    erl_exit(ERTS_ABORT_EXIT,
+		     "Bad cpu bind type: %d\n",
+		     (int) cpu_bind_order);
+	    break;
+	}
+
+	if (mk_seq)
+	    make_cpudata_id_seq(cpudata, size, no_node);
+
+	qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
+    }
+}
+
+static int
+processor_order_cmp(const void *vx, const void *vy)
+{
+    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
+    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
+
+    if (x->processor != y->processor)
+	return x->processor - y->processor;
+    if (x->node != y->node)
+	return x->node - y->node;
+    if (x->processor_node != y->processor_node)
+	return x->processor_node - y->processor_node;
+    if (x->core != y->core)
+	return x->core - y->core;
+    if (x->thread != y->thread)
+	return x->thread - y->thread;
+    return 0;
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp)
+{
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+    int cgcc_ix;
+
+    /* Unbind from cpu */
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    if (scheduler2cpu_map[esdp->no].bound_id >= 0
+	&& erts_unbind_from_cpu(cpuinfo) == 0) {
+	esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+    }
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
+	}
+    }
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(1,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
+
+    if (esdp->no <= max_main_threads)
+	erts_thr_set_main_status(0, 0);
+
+}
+
+void
+erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp)
+{
+    ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(esdp->run_queue));
+
+    if (esdp->no <= max_main_threads)
+	erts_thr_set_main_status(1, (int) esdp->no);
+
+    /* Make sure we check if we should bind to a cpu or not... */
+    if (esdp->run_queue->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
+	erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
+    else
+	esdp->run_queue->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+}
+
+#endif
+
+void
+erts_sched_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+    int res, cpu_id, cgcc_ix;
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+#ifdef ERTS_SMP
+    if (erts_common_run_queue)
+	erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
+    else {
+	esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
+    }
+#endif
+    erts_smp_runq_unlock(esdp->run_queue);
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    cpu_id = scheduler2cpu_map[esdp->no].bind_id;
+    if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
+	res = erts_bind_to_cpu(cpuinfo, cpu_id);
+	if (res == 0)
+	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
+	else {
+	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+	    erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
+			  (int) esdp->no, cpu_id, erl_errno_id(-res));
+	    erts_send_error_to_logger_nogl(dsbufp);
+	    if (scheduler2cpu_map[esdp->no].bound_id >= 0)
+		goto unbind;
+	}
+    }
+    else if (cpu_id < 0) {
+    unbind:
+	/* Get rid of old binding */
+	res = erts_unbind_from_cpu(cpuinfo);
+	if (res == 0)
+	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
+	else if (res != -ENOTSUP) {
+	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
+	    erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
+			  (int) esdp->no, cpu_id, erl_errno_id(-res));
+	    erts_send_error_to_logger_nogl(dsbufp);
+	}
+    }
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
+	}
+    }
+
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(0,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
+
+    erts_smp_runq_lock(esdp->run_queue);
+}
+
+#ifdef ERTS_SMP
+void
+erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp)
+{
+    int cgcc_ix;
+    erts_cpu_groups_map_t *cgm;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_callback_call_t *cgcc;
+
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+
+    cgcc = erts_alloc(ERTS_ALC_T_TMP,
+		      (no_cpu_groups_callbacks
+		       * sizeof(erts_cpu_groups_callback_call_t)));
+    cgcc_ix = 0;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    cgcc[cgcc_ix].callback = cgcl->callback;
+	    cgcc[cgcc_ix].ix = cpu_groups_lookup(cgm, esdp);
+	    cgcc[cgcc_ix].arg = cgcl->arg;
+	    cgcc_ix++;
+	}
+    }
+
+    ASSERT(no_cpu_groups_callbacks == cgcc_ix);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+    for (cgcc_ix = 0; cgcc_ix < no_cpu_groups_callbacks; cgcc_ix++)
+	cgcc[cgcc_ix].callback(0,
+			       esdp,
+			       cgcc[cgcc_ix].ix,
+			       cgcc[cgcc_ix].arg);
+
+    erts_free(ERTS_ALC_T_TMP, cgcc);
+
+    if (esdp->no <= max_main_threads)
+	erts_thr_set_main_status(1, (int) esdp->no);
+}
+#endif
+
+static void
+write_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
+{
+    int s_ix = 1;
+    int cpu_ix;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
+
+	cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
+
+	for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
+	    if (erts_is_cpu_available(cpuinfo, cpudata[cpu_ix].logical))
+		scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
+    }
+
+    if (s_ix <= erts_no_schedulers)
+	for (; s_ix <= erts_no_schedulers; s_ix++)
+	    scheduler2cpu_map[s_ix].bind_id = -1;
+}
+
+int
+erts_init_scheduler_bind_type_string(char *how)
+{
+    if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP)
+	return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
+
+    if (!system_cpudata && !user_cpudata)
+	return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
+
+    if (sys_strcmp(how, "db") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+    else if (sys_strcmp(how, "s") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+    else if (sys_strcmp(how, "ps") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+    else if (sys_strcmp(how, "ts") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+    else if (sys_strcmp(how, "tnnps") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+    else if (sys_strcmp(how, "nnps") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+    else if (sys_strcmp(how, "nnts") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+    else if (sys_strcmp(how, "ns") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+    else if (sys_strcmp(how, "u") == 0)
+	cpu_bind_order = ERTS_CPU_BIND_NONE;
+    else
+	return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
+
+    return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
+}
+
+static Eterm
+bound_schedulers_term(ErtsCpuBindOrder order)
+{
+    switch (order) {
+    case ERTS_CPU_BIND_SPREAD: {
+	ERTS_DECL_AM(spread);
+	return AM_spread;
+    }
+    case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
+	ERTS_DECL_AM(processor_spread);
+	return AM_processor_spread;
+    }
+    case ERTS_CPU_BIND_THREAD_SPREAD: {
+	ERTS_DECL_AM(thread_spread);
+	return AM_thread_spread;
+    }
+    case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
+	ERTS_DECL_AM(thread_no_node_processor_spread);
+	return AM_thread_no_node_processor_spread;
+    }
+    case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
+	ERTS_DECL_AM(no_node_processor_spread);
+	return AM_no_node_processor_spread;
+    }
+    case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
+	ERTS_DECL_AM(no_node_thread_spread);
+	return AM_no_node_thread_spread;
+    }
+    case ERTS_CPU_BIND_NO_SPREAD: {
+	ERTS_DECL_AM(no_spread);
+	return AM_no_spread;
+    }
+    case ERTS_CPU_BIND_NONE: {
+	ERTS_DECL_AM(unbound);
+	return AM_unbound;
+    }
+    default:
+	ASSERT(0);
+	return THE_NON_VALUE;
+    }
+}
+
+Eterm
+erts_bound_schedulers_term(Process *c_p)
+{
+    ErtsCpuBindOrder order;
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    order = cpu_bind_order;
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+    return bound_schedulers_term(order);
+}
+
+Eterm
+erts_bind_schedulers(Process *c_p, Eterm how)
+{
+    int notify = 0;
+    Eterm res;
+    erts_cpu_topology_t *cpudata;
+    int cpudata_size;
+    ErtsCpuBindOrder old_cpu_bind_order;
+
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+    if (erts_bind_to_cpu(cpuinfo, -1) == -ENOTSUP) {
+	ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
+    }
+    else {
+
+	old_cpu_bind_order = cpu_bind_order;
+
+	if (ERTS_IS_ATOM_STR("default_bind", how))
+	    cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+	else if (ERTS_IS_ATOM_STR("spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+	else if (ERTS_IS_ATOM_STR("processor_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+	else if (ERTS_IS_ATOM_STR("thread_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+	else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+	else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+	else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+	else if (ERTS_IS_ATOM_STR("no_spread", how))
+	    cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+	else if (ERTS_IS_ATOM_STR("unbound", how))
+	    cpu_bind_order = ERTS_CPU_BIND_NONE;
+	else {
+	    cpu_bind_order = old_cpu_bind_order;
+	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+	    goto done;
+	}
+
+	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+
+	if (!cpudata) {
+	    cpu_bind_order = old_cpu_bind_order;
+	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
+	    goto done;
+	}
+
+	write_schedulers_bind_change(cpudata, cpudata_size);
+	notify = 1;
+
+	destroy_tmp_cpu_topology_copy(cpudata);
+    
+	res = bound_schedulers_term(old_cpu_bind_order);
+    }
+
+ done:
+
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+
+    if (notify)
+	erts_sched_notify_check_cpu_bind();
+
+    return res;
+}
+
+int
+erts_sched_bind_atthrcreate_prepare(void)
+{
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();
+    return esdp != NULL && erts_is_scheduler_bound(esdp);
+}
+
+int
+erts_sched_bind_atthrcreate_child(int unbind)
+{
+    int res = 0;
+    if (unbind) {
+	erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+	res = erts_unbind_from_cpu(cpuinfo);
+	erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+    }
+    return res;
+}
+
+void
+erts_sched_bind_atthrcreate_parent(int unbind)
+{
+
+}
+
+int
+erts_sched_bind_atfork_prepare(void)
+{
+    ErtsSchedulerData *esdp = erts_get_scheduler_data();
+    int unbind = esdp != NULL && erts_is_scheduler_bound(esdp);
+    if (unbind)
+	erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    return unbind;
+}
+
+int
+erts_sched_bind_atfork_child(int unbind)
+{
+    if (unbind) {
+	ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+			   || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+	return erts_unbind_from_cpu(cpuinfo);
+    }
+    return 0;
+}
+
+char *
+erts_sched_bind_atvfork_child(int unbind)
+{
+    if (unbind) {
+	ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+			   || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+	return erts_get_unbind_from_cpu_str(cpuinfo);
+    }
+    return "false";
+}
+
+void
+erts_sched_bind_atfork_parent(int unbind)
+{
+    if (unbind)
+	erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+Eterm
+erts_fake_scheduler_bindings(Process *p, Eterm how)
+{
+    ErtsCpuBindOrder fake_cpu_bind_order;
+    erts_cpu_topology_t *cpudata;
+    int cpudata_size;
+    Eterm res;
+
+    if (ERTS_IS_ATOM_STR("default_bind", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
+    else if (ERTS_IS_ATOM_STR("spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
+    else if (ERTS_IS_ATOM_STR("processor_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
+    else if (ERTS_IS_ATOM_STR("thread_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
+    else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
+    else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
+    else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
+    else if (ERTS_IS_ATOM_STR("no_spread", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
+    else if (ERTS_IS_ATOM_STR("unbound", how))
+	fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
+    else {
+	ERTS_BIF_PREP_ERROR(res, p, BADARG);
+	return res;
+    }
+
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+
+    if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
+	ERTS_BIF_PREP_RET(res, am_false);
+    else {
+	int i;
+	Eterm *hp;
+	
+	cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
+
+#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
+
+	erts_fprintf(stderr, "node:          ");
+	for (i = 0; i < cpudata_size; i++)
+	    erts_fprintf(stderr, " %2d", cpudata[i].node);
+	erts_fprintf(stderr, "\n");
+	erts_fprintf(stderr, "processor:     ");
+	for (i = 0; i < cpudata_size; i++)
+	    erts_fprintf(stderr, " %2d", cpudata[i].processor);
+	erts_fprintf(stderr, "\n");
+	if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
+	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
+	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
+	    erts_fprintf(stderr, "processor_node:");
+	    for (i = 0; i < cpudata_size; i++)
+		erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
+	    erts_fprintf(stderr, "\n");
+	}
+	erts_fprintf(stderr, "core:          ");
+	for (i = 0; i < cpudata_size; i++)
+	    erts_fprintf(stderr, " %2d", cpudata[i].core);
+	erts_fprintf(stderr, "\n");
+	erts_fprintf(stderr, "thread:        ");
+	for (i = 0; i < cpudata_size; i++)
+	    erts_fprintf(stderr, " %2d", cpudata[i].thread);
+	erts_fprintf(stderr, "\n");
+	erts_fprintf(stderr, "logical:       ");
+	for (i = 0; i < cpudata_size; i++)
+	    erts_fprintf(stderr, " %2d", cpudata[i].logical);
+	erts_fprintf(stderr, "\n");
+#endif
+
+	hp = HAlloc(p, cpudata_size+1);
+	ERTS_BIF_PREP_RET(res, make_tuple(hp));
+	*hp++ = make_arityval((Uint) cpudata_size);
+	for (i = 0; i < cpudata_size; i++)
+	    *hp++ = make_small((Uint) cpudata[i].logical);
+    }
+
+    destroy_tmp_cpu_topology_copy(cpudata);
+
+    return res;
+}
+
+Eterm
+erts_get_schedulers_binds(Process *c_p)
+{
+    int ix;
+    ERTS_DECL_AM(unbound);
+    Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
+    Eterm res = make_tuple(hp);
+
+    *(hp++) = make_arityval(erts_no_schedulers);
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    for (ix = 1; ix <= erts_no_schedulers; ix++)
+	*(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
+		   ? make_small(scheduler2cpu_map[ix].bound_id)
+		   : AM_unbound);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+    return res;
+}
+
+/*
+ * CPU topology
+ */
+
+typedef struct {
+    int *id;
+    int used;
+    int size;
+} ErtsCpuTopIdSeq;
+
+typedef struct {
+    ErtsCpuTopIdSeq logical;
+    ErtsCpuTopIdSeq thread;
+    ErtsCpuTopIdSeq core;
+    ErtsCpuTopIdSeq processor_node;
+    ErtsCpuTopIdSeq processor;
+    ErtsCpuTopIdSeq node;
+} ErtsCpuTopEntry;
+
+static void
+init_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+    int size = 10;
+    cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+				 sizeof(int)*size);
+    cte->logical.size = size;
+    cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+				sizeof(int)*size);
+    cte->thread.size = size;
+    cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+			      sizeof(int)*size);
+    cte->core.size = size;
+    cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+					sizeof(int)*size);
+    cte->processor_node.size = size;
+    cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+				   sizeof(int)*size);
+    cte->processor.size = size;
+    cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
+			      sizeof(int)*size);
+    cte->node.size = size;
+}
+
+static void
+destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
+{
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
+    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
+}
+
+static int
+get_cput_value_or_range(int *v, int *vr, char **str)
+{
+    long l;
+    char *c = *str;
+    errno = 0;
+    if (!isdigit((unsigned char)*c))
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+    l = strtol(c, &c, 10);
+    if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
+    *v = (int) l;
+    if (*c == '-') {
+	c++;
+	if (!isdigit((unsigned char)*c))
+	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+	l = strtol(c, &c, 10);
+	if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
+	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+	*vr = (int) l;
+    }
+    *str = c;
+    return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
+{
+    int ix = 0;
+    int need_size = 0;
+    char *c = *str;
+
+    while (1) {
+	int res;
+	int val;
+	int nids;
+	int val_range = -1;
+	res = get_cput_value_or_range(&val, &val_range, &c);
+	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+	    return res;
+	if (val_range < 0 || val_range == val)
+	    nids = 1;
+	else {
+	    if (val_range > val)
+		nids = val_range - val + 1;
+	    else
+		nids = val - val_range + 1;
+	}
+	need_size += nids;
+	if (need_size > idseq->size) {
+	    idseq->size = need_size + 10;
+	    idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
+				      idseq->id,
+				      sizeof(int)*idseq->size);
+	}
+	if (nids == 1)
+	    idseq->id[ix++] = val;
+	else if (val_range > val) {
+	    for (; val <= val_range; val++)
+		idseq->id[ix++] = val;
+	}
+	else {
+	    for (; val >= val_range; val--)
+		idseq->id[ix++] = val;
+	}
+	if (*c != ',')
+	    break;
+	c++;
+    }
+    *str = c;
+    idseq->used = ix;
+    return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+get_cput_entry(ErtsCpuTopEntry *cput, char **str)
+{
+    int h;
+    char *c = *str;
+
+    cput->logical.used = 0;
+    cput->thread.id[0] = 0;
+    cput->thread.used = 1;
+    cput->core.id[0] = 0;
+    cput->core.used = 1;
+    cput->processor_node.id[0] = -1;
+    cput->processor_node.used = 1;
+    cput->processor.id[0] = 0;
+    cput->processor.used = 1;
+    cput->node.id[0] = -1;
+    cput->node.used = 1;
+
+    h = ERTS_TOPOLOGY_MAX_DEPTH;
+    while (*c != ':' && *c != '\0') {
+	int res;
+	ErtsCpuTopIdSeq *idseqp;
+	switch (*c++) {
+	case 'L':
+	    if (h <= ERTS_TOPOLOGY_LOGICAL)
+		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+	    idseqp = &cput->logical;
+	    h = ERTS_TOPOLOGY_LOGICAL;
+	    break;
+	case 't':
+	case 'T':
+	    if (h <= ERTS_TOPOLOGY_THREAD)
+		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+	    idseqp = &cput->thread;
+	    h = ERTS_TOPOLOGY_THREAD;
+	    break;
+	case 'c':
+	case 'C':
+	    if (h <= ERTS_TOPOLOGY_CORE)
+		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+	    idseqp = &cput->core;
+	    h = ERTS_TOPOLOGY_CORE;
+	    break;
+	case 'p':
+	case 'P':
+	    if (h <= ERTS_TOPOLOGY_PROCESSOR)
+		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+	    idseqp = &cput->processor;
+	    h = ERTS_TOPOLOGY_PROCESSOR;
+	    break;
+	case 'n':
+	case 'N':
+	    if (h <= ERTS_TOPOLOGY_PROCESSOR) {
+	    do_node:
+		if (h <= ERTS_TOPOLOGY_NODE)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+		idseqp = &cput->node;
+		h = ERTS_TOPOLOGY_NODE;
+	    }
+	    else {
+		int p_node = 0;
+		char *p_chk = c;
+		while (*p_chk != '\0' && *p_chk != ':') {
+		    if (*p_chk == 'p' || *p_chk == 'P') {
+			p_node = 1;
+			break;
+		    }
+		    p_chk++;
+		}
+		if (!p_node)
+		    goto do_node;
+		if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
+		idseqp = &cput->processor_node;
+		h = ERTS_TOPOLOGY_PROCESSOR_NODE;
+	    }
+	    break;
+	default:
+	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
+	}
+	res = get_cput_id_seq(idseqp, &c);
+	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
+		return res;
+    }
+
+    if (cput->logical.used < 1)
+	return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
+
+    if (*c == ':') {
+	c++;
+    }
+
+    if (cput->thread.used != 1
+	&& cput->thread.used != cput->logical.used)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+    if (cput->core.used != 1
+	&& cput->core.used != cput->logical.used)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+    if (cput->processor_node.used != 1
+	&& cput->processor_node.used != cput->logical.used)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+    if (cput->processor.used != 1
+	&& cput->processor.used != cput->logical.used)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+    if (cput->node.used != 1
+	&& cput->node.used != cput->logical.used)
+	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
+
+    *str = c;
+    return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+static int
+verify_topology(erts_cpu_topology_t *cpudata, int size)
+{
+    if (size > 0) {
+	int *logical;
+	int node, processor, no_nodes, i;
+
+	/* Verify logical ids */
+	logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
+
+	for (i = 0; i < size; i++)
+	    logical[i] = cpudata[i].logical;
+
+	qsort(logical, size, sizeof(int), int_cmp);
+	for (i = 0; i < size-1; i++) {
+	    if (logical[i] == logical[i+1]) {
+		erts_free(ERTS_ALC_T_TMP, logical);
+		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
+	    }
+	}
+
+	erts_free(ERTS_ALC_T_TMP, logical);
+
+	qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
+
+	/* Verify unique entities */
+
+	for (i = 1; i < size; i++) {
+	    if (cpudata[i-1].processor == cpudata[i].processor
+		&& cpudata[i-1].node == cpudata[i].node
+		&& (cpudata[i-1].processor_node
+		    == cpudata[i].processor_node)
+		&& cpudata[i-1].core == cpudata[i].core
+		&& cpudata[i-1].thread == cpudata[i].thread) {
+		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
+	    }
+	}
+
+	/* Verify numa nodes */
+	node = cpudata[0].node;
+	processor = cpudata[0].processor;
+	no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
+	for (i = 1; i < size; i++) {
+	    if (no_nodes) {
+		if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+	    }
+	    else {
+		if (cpudata[i].processor == processor && cpudata[i].node != node)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+		node = cpudata[i].node;
+		processor = cpudata[i].processor;
+		if (node >= 0 && cpudata[i].processor_node >= 0)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+		if (node < 0 && cpudata[i].processor_node < 0)
+		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
+	    }
+	}
+    }
+
+    return ERTS_INIT_CPU_TOPOLOGY_OK;
+}
+
+int
+erts_init_cpu_topology_string(char *topology_str)
+{
+    ErtsCpuTopEntry cput;
+    int need_size;
+    char *c;
+    int ix;
+    int error = ERTS_INIT_CPU_TOPOLOGY_OK;
+
+    if (user_cpudata)
+	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+    user_cpudata_size = 10;
+
+    user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+			      (sizeof(erts_cpu_topology_t)
+			       * user_cpudata_size));
+
+    init_cpu_top_entry(&cput);
+
+    ix = 0;
+    need_size = 0;
+
+    c = topology_str;
+    if (*c == '\0') {
+	error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
+	goto fail;
+    }
+    do {
+	int r;
+	error = get_cput_entry(&cput, &c);
+	if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
+	    goto fail;
+	need_size += cput.logical.used;
+	if (user_cpudata_size < need_size) {
+	    user_cpudata_size = need_size + 10;
+	    user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+					user_cpudata,
+					(sizeof(erts_cpu_topology_t)
+					 * user_cpudata_size));
+	}
+
+	ASSERT(cput.thread.used == 1
+	       || cput.thread.used == cput.logical.used);
+	ASSERT(cput.core.used == 1
+	       || cput.core.used == cput.logical.used);
+	ASSERT(cput.processor_node.used == 1
+	       || cput.processor_node.used == cput.logical.used);
+	ASSERT(cput.processor.used == 1
+	       || cput.processor.used == cput.logical.used);
+	ASSERT(cput.node.used == 1
+	       || cput.node.used == cput.logical.used);
+
+	for (r = 0; r < cput.logical.used; r++) {
+	    user_cpudata[ix].logical = cput.logical.id[r];
+	    user_cpudata[ix].thread =
+		cput.thread.id[cput.thread.used == 1 ? 0 : r];
+	    user_cpudata[ix].core =
+		cput.core.id[cput.core.used == 1 ? 0 : r];
+	    user_cpudata[ix].processor_node =
+		cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
+	    user_cpudata[ix].processor =
+		cput.processor.id[cput.processor.used == 1 ? 0 : r];
+	    user_cpudata[ix].node =
+		cput.node.id[cput.node.used == 1 ? 0 : r];
+	    ix++;
+	}
+    } while (*c != '\0');
+
+    if (user_cpudata_size != ix) {
+	user_cpudata_size = ix;
+	user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
+				    user_cpudata,
+				    (sizeof(erts_cpu_topology_t)
+				     * user_cpudata_size));
+    }
+
+    error = verify_topology(user_cpudata, user_cpudata_size);
+    if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
+	destroy_cpu_top_entry(&cput);
+	return ERTS_INIT_CPU_TOPOLOGY_OK;
+    }
+
+ fail:
+    if (user_cpudata)
+	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+    user_cpudata_size = 0;
+    destroy_cpu_top_entry(&cput);
+    return error;
+}
+
+#define ERTS_GET_CPU_TOPOLOGY_ERROR		-1
+#define ERTS_GET_USED_CPU_TOPOLOGY		0
+#define ERTS_GET_DETECTED_CPU_TOPOLOGY		1
+#define ERTS_GET_DEFINED_CPU_TOPOLOGY		2
+
+static Eterm get_cpu_topology_term(Process *c_p, int type);
+
+Eterm
+erts_set_cpu_topology(Process *c_p, Eterm term)
+{
+    erts_cpu_topology_t *cpudata = NULL;
+    int cpudata_size = 0;
+    Eterm res;
+
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
+    if (term == am_undefined) {
+	if (user_cpudata)
+	    erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+	user_cpudata = NULL;
+	user_cpudata_size = 0;
+
+	if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
+	    cpudata_size = system_cpudata_size;
+	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
+				 (sizeof(erts_cpu_topology_t)
+				  * cpudata_size));
+
+	    sys_memcpy((void *) cpudata,
+		       (void *) system_cpudata,
+		       sizeof(erts_cpu_topology_t)*cpudata_size);
+	}
+    }
+    else if (is_not_list(term)) {
+    error:
+	erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+	res = THE_NON_VALUE;
+	goto done;
+    }
+    else {
+	Eterm list = term;
+	int ix = 0;
+
+	cpudata_size = 100;
+	cpudata = erts_alloc(ERTS_ALC_T_TMP,
+			     (sizeof(erts_cpu_topology_t)
+			      * cpudata_size));
+
+	while (is_list(list)) {
+	    Eterm *lp = list_val(list);
+	    Eterm cpu = CAR(lp);
+	    Eterm* tp;
+	    Sint id;
+		
+	    if (is_not_tuple(cpu))
+		goto error;
+
+	    tp = tuple_val(cpu);
+
+	    if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
+		goto error;
+
+	    if (ix >= cpudata_size) {
+		cpudata_size += 100;
+		cpudata = erts_realloc(ERTS_ALC_T_TMP,
+				       cpudata,
+				       (sizeof(erts_cpu_topology_t)
+					* cpudata_size));
+	    }
+
+	    id = signed_val(tp[2]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].node = (int) id;
+
+	    id = signed_val(tp[3]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].processor = (int) id;
+
+	    id = signed_val(tp[4]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].processor_node = (int) id;
+
+	    id = signed_val(tp[5]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].core = (int) id;
+
+	    id = signed_val(tp[6]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].thread = (int) id;
+
+	    id = signed_val(tp[7]);
+	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
+		goto error;
+	    cpudata[ix].logical = (int) id;
+
+	    list = CDR(lp);
+	    ix++;
+	}
+
+	if (is_not_nil(list))
+	    goto error;
+	
+	cpudata_size = ix;
+
+	if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
+	    goto error;
+
+	if (user_cpudata_size != cpudata_size) {
+	    if (user_cpudata)
+		erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
+	    user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+				      sizeof(erts_cpu_topology_t)*cpudata_size);
+	    user_cpudata_size = cpudata_size;
+	}
+
+	sys_memcpy((void *) user_cpudata,
+		   (void *) cpudata,
+		   sizeof(erts_cpu_topology_t)*cpudata_size);
+    }
+
+    update_cpu_groups_maps();
+
+    write_schedulers_bind_change(cpudata, cpudata_size);
+
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+    erts_sched_notify_check_cpu_bind();
+
+ done:
+
+    if (cpudata)
+	erts_free(ERTS_ALC_T_TMP, cpudata);
+
+    return res;
+}
+
+static void
+create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
+{
+    if (user_cpudata) {
+	*cpudata_size = user_cpudata_size;
+	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
+			      (sizeof(erts_cpu_topology_t)
+			       * (*cpudata_size)));
+	sys_memcpy((void *) *cpudata,
+		   (void *) user_cpudata,
+		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
+    }
+    else if (system_cpudata) {
+	*cpudata_size = system_cpudata_size;
+	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
+			      (sizeof(erts_cpu_topology_t)
+			       * (*cpudata_size)));
+	sys_memcpy((void *) *cpudata,
+		   (void *) system_cpudata,
+		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
+    }
+    else {
+	*cpudata = NULL;
+	*cpudata_size = 0;
+    }
+}
+
+static void
+destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
+{
+    if (cpudata)
+	erts_free(ERTS_ALC_T_TMP, cpudata);
+}
+
+
+static Eterm
+bld_topology_term(Eterm **hpp,
+		  Uint *hszp,
+		  erts_cpu_topology_t *cpudata,
+		  int size)
+{
+    Eterm res = NIL;
+    int i;
+
+    if (size == 0)
+	return am_undefined;
+
+    for (i = size-1; i >= 0; i--) {
+	res = erts_bld_cons(hpp,
+			    hszp,
+			    erts_bld_tuple(hpp,
+					   hszp,
+					   7,
+					   am_cpu,
+					   make_small(cpudata[i].node),
+					   make_small(cpudata[i].processor),
+					   make_small(cpudata[i].processor_node),
+					   make_small(cpudata[i].core),
+					   make_small(cpudata[i].thread),
+					   make_small(cpudata[i].logical)),
+			    res);
+    }
+    return res;
+}
+
+static Eterm
+get_cpu_topology_term(Process *c_p, int type)
+{
+#ifdef DEBUG
+    Eterm *hp_end;
+#endif
+    Eterm *hp;
+    Uint hsz;
+    Eterm res = THE_NON_VALUE;
+    erts_cpu_topology_t *cpudata = NULL;
+    int size = 0;
+
+    switch (type) {
+    case ERTS_GET_USED_CPU_TOPOLOGY:
+	if (user_cpudata)
+	    goto defined;
+	else
+	    goto detected;
+    case ERTS_GET_DETECTED_CPU_TOPOLOGY:
+    detected:
+	if (!system_cpudata)
+	    res = am_undefined;
+	else {
+	    size = system_cpudata_size;
+	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
+				 (sizeof(erts_cpu_topology_t)
+				  * size));
+	    sys_memcpy((void *) cpudata,
+		       (void *) system_cpudata,
+		       sizeof(erts_cpu_topology_t)*size);
+	}
+	break;
+    case ERTS_GET_DEFINED_CPU_TOPOLOGY:
+    defined:
+	if (!user_cpudata)
+	    res = am_undefined;
+	else {
+	    size = user_cpudata_size;
+	    cpudata = user_cpudata;
+	}
+	break;
+    default:
+	erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
+	break;
+    }
+
+    if (res == am_undefined) {
+	ASSERT(!cpudata);
+	return res;
+    }
+
+    hsz = 0;
+
+    bld_topology_term(NULL, &hsz,
+		      cpudata, size);
+
+    hp = HAlloc(c_p, hsz);
+
+#ifdef DEBUG
+    hp_end = hp + hsz;
+#endif
+
+    res = bld_topology_term(&hp, NULL,
+			    cpudata, size);
+
+    ASSERT(hp_end == hp);
+
+    if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
+	erts_free(ERTS_ALC_T_TMP, cpudata);
+
+    return res;
+}
+
+Eterm
+erts_get_cpu_topology_term(Process *c_p, Eterm which)
+{
+    Eterm res;
+    int type;
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    if (ERTS_IS_ATOM_STR("used", which))
+	type = ERTS_GET_USED_CPU_TOPOLOGY;
+    else if (ERTS_IS_ATOM_STR("detected", which))
+	type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
+    else if (ERTS_IS_ATOM_STR("defined", which))
+	type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
+    else
+	type = ERTS_GET_CPU_TOPOLOGY_ERROR;
+    if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
+	res = THE_NON_VALUE;
+    else
+	res = get_cpu_topology_term(c_p, type);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+    return res;
+}
+
+static void
+get_logical_processors(int *conf, int *onln, int *avail)
+{
+    if (conf)
+	*conf = erts_get_cpu_configured(cpuinfo);
+    if (onln)
+	*onln = erts_get_cpu_online(cpuinfo);
+    if (avail)
+	*avail = erts_get_cpu_available(cpuinfo);
+}
+
+void
+erts_get_logical_processors(int *conf, int *onln, int *avail)
+{
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    get_logical_processors(conf, onln, avail);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+}
+
+void
+erts_pre_early_init_cpu_topology(int *max_rg_p,
+				 int *conf_p,
+				 int *onln_p,
+				 int *avail_p)
+{
+    cpu_groups_maps = NULL;
+    no_cpu_groups_callbacks = 0;
+    *max_rg_p = ERTS_MAX_READER_GROUPS;
+    cpuinfo = erts_cpu_info_create();
+    get_logical_processors(conf_p, onln_p, avail_p);
+}
+
+void
+erts_early_init_cpu_topology(int no_schedulers,
+			     int *max_main_threads_p,
+			     int max_reader_groups,
+			     int *reader_groups_p)
+{
+    user_cpudata = NULL;
+    user_cpudata_size = 0;
+
+    system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+    system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+				(sizeof(erts_cpu_topology_t)
+				 * system_cpudata_size));
+
+    cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
+
+    if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+	|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
+							system_cpudata_size)) {
+	erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+	system_cpudata = NULL;
+	system_cpudata_size = 0;
+    }
+
+    max_main_threads = erts_get_cpu_configured(cpuinfo);
+    if (max_main_threads > no_schedulers)
+	max_main_threads = no_schedulers;
+    *max_main_threads_p = max_main_threads;
+
+    reader_groups = max_main_threads;
+    if (reader_groups <= 1 || max_reader_groups <= 1)
+	reader_groups = 0;
+    if (reader_groups > max_reader_groups)
+	reader_groups = max_reader_groups;
+    *reader_groups_p = reader_groups;
+}
+
+void
+erts_init_cpu_topology(void)
+{
+    int ix;
+
+    erts_smp_rwmtx_init(&cpuinfo_rwmtx, "cpu_info");
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+
+    scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
+				   (sizeof(ErtsCpuBindData)
+				    * (erts_no_schedulers+1)));
+    for (ix = 1; ix <= erts_no_schedulers; ix++) {
+	scheduler2cpu_map[ix].bind_id = -1;
+	scheduler2cpu_map[ix].bound_id = -1;
+    }
+
+    if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
+	int ncpus = erts_get_cpu_configured(cpuinfo);
+	if (ncpus < 1 || erts_no_schedulers < ncpus)
+	    cpu_bind_order = ERTS_CPU_BIND_NONE;
+	else
+	    cpu_bind_order = ((system_cpudata || user_cpudata)
+			      && (erts_bind_to_cpu(cpuinfo, -1) != -ENOTSUP)
+			      ? ERTS_CPU_BIND_DEFAULT_BIND
+			      : ERTS_CPU_BIND_NONE);
+    }
+
+    reader_groups_map = add_cpu_groups(reader_groups,
+				       reader_groups_callback,
+				       NULL);
+
+    if (cpu_bind_order == ERTS_CPU_BIND_NONE)
+	erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+    else {
+	erts_cpu_topology_t *cpudata;
+	int cpudata_size;
+	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+	write_schedulers_bind_change(cpudata, cpudata_size);
+	erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+	erts_sched_notify_check_cpu_bind();
+	destroy_tmp_cpu_topology_copy(cpudata);
+    }
+}
+
+int
+erts_update_cpu_info(void)
+{
+    int changed;
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    changed = erts_cpu_info_update(cpuinfo);
+    if (changed) {
+	erts_cpu_topology_t *cpudata;
+	int cpudata_size;
+
+	if (system_cpudata)
+	    erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+
+	system_cpudata_size = erts_get_cpu_topology_size(cpuinfo);
+	if (!system_cpudata_size)
+	    system_cpudata = NULL;
+	else {
+	    system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
+					(sizeof(erts_cpu_topology_t)
+					 * system_cpudata_size));
+
+	    if (!erts_get_cpu_topology(cpuinfo, system_cpudata)
+		|| (ERTS_INIT_CPU_TOPOLOGY_OK
+		    != verify_topology(system_cpudata,
+				       system_cpudata_size))) {
+		erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
+		system_cpudata = NULL;
+		system_cpudata_size = 0;
+	    }
+	}
+
+	update_cpu_groups_maps();
+
+	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
+	write_schedulers_bind_change(cpudata, cpudata_size);
+	destroy_tmp_cpu_topology_copy(cpudata);
+    }
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+    if (changed)
+	erts_sched_notify_check_cpu_bind();
+    return changed;
+}
+
+/*
+ * reader groups map
+ */
+
+void
+reader_groups_callback(int suspending,
+		       ErtsSchedulerData *esdp,
+		       int group,
+		       void *unused)
+{
+    if (reader_groups && esdp->no <= max_main_threads)
+	erts_smp_rwmtx_set_reader_group(suspending ? 0 : group+1);
+}
+
+static Eterm get_cpu_groups_map(Process *c_p,
+				erts_cpu_groups_map_t *map,
+				int offset);
+Eterm
+erts_debug_reader_groups_map(Process *c_p, int groups)
+{
+    Eterm res;
+    erts_cpu_groups_map_t test;
+
+    test.array = NULL;
+    test.groups = groups;
+    make_cpu_groups_map(&test, 1);
+    if (!test.array)
+	res = NIL;
+    else {
+	res = get_cpu_groups_map(c_p, &test, 1);
+	erts_free(ERTS_ALC_T_TMP, test.array);
+    }
+    return res;
+}
+
+
+Eterm
+erts_get_reader_groups_map(Process *c_p)
+{
+    Eterm res;
+    erts_smp_rwmtx_rlock(&cpuinfo_rwmtx);
+    res = get_cpu_groups_map(c_p, reader_groups_map, 1);
+    erts_smp_rwmtx_runlock(&cpuinfo_rwmtx);
+    return res;
+}
+
+/*
+ * CPU groups
+ */
+
+static Eterm
+get_cpu_groups_map(Process *c_p,
+		   erts_cpu_groups_map_t *map,
+		   int offset)
+{
+#ifdef DEBUG
+    Eterm *endp;
+#endif
+    Eterm res = NIL, tuple;
+    Eterm *hp;
+    int i;
+
+    hp = HAlloc(c_p, map->logical_processors*(2+3));
+#ifdef DEBUG
+    endp = hp + map->logical_processors*(2+3);
+#endif
+    for (i = map->size - 1; i >= 0; i--) {
+	if (map->array[i].logical >= 0) {
+	    tuple = TUPLE2(hp,
+			   make_small(map->array[i].logical),
+			   make_small(map->array[i].cpu_group + offset));
+	    hp += 3;
+	    res = CONS(hp, tuple, res);
+	    hp += 2;
+	}
+    }
+    ASSERT(hp == endp);
+    return res;
+}
+
+static void
+make_available_cpu_topology(erts_avail_cput *no,
+			    erts_avail_cput *avail,
+			    erts_cpu_topology_t *cpudata,
+			    int *size,
+			    int test)
+{
+    int len = *size;
+    erts_cpu_topology_t last;
+    int a, i, j;
+
+    no->level[ERTS_TOPOLOGY_NODE] = -1;
+    no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
+    no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
+    no->level[ERTS_TOPOLOGY_CORE] = -1;
+    no->level[ERTS_TOPOLOGY_THREAD] = -1;
+    no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
+
+    last.node = INT_MIN;
+    last.processor = INT_MIN;
+    last.processor_node = INT_MIN;
+    last.core = INT_MIN;
+    last.thread = INT_MIN;
+    last.logical = INT_MIN;
+
+    a = 0;
+
+    for (i = 0; i < len; i++) {
+
+	if (!test && !erts_is_cpu_available(cpuinfo, cpudata[i].logical))
+	    continue;
+
+	if (last.node != cpudata[i].node)
+	    goto node;
+	if (last.processor != cpudata[i].processor)
+	    goto processor;
+	if (last.processor_node != cpudata[i].processor_node)
+	    goto processor_node;
+	if (last.core != cpudata[i].core)
+	    goto core;
+	ASSERT(last.thread != cpudata[i].thread);
+	goto thread;
+
+    node:
+	no->level[ERTS_TOPOLOGY_NODE]++;
+    processor:
+	no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+    processor_node:
+	no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+    core:
+	no->level[ERTS_TOPOLOGY_CORE]++;
+    thread:
+	no->level[ERTS_TOPOLOGY_THREAD]++;
+
+	no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+	for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
+	    avail[a].level[j] = no->level[j];
+
+	avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
+	avail[a].level[ERTS_TOPOLOGY_CG] = 0;
+
+	ASSERT(last.logical != cpudata[i].logical);
+
+	last = cpudata[i];
+	a++;
+    }
+
+    no->level[ERTS_TOPOLOGY_NODE]++;
+    no->level[ERTS_TOPOLOGY_PROCESSOR]++;
+    no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
+    no->level[ERTS_TOPOLOGY_CORE]++;
+    no->level[ERTS_TOPOLOGY_THREAD]++;
+    no->level[ERTS_TOPOLOGY_LOGICAL]++;
+
+    *size = a;
+}
+
+static void
+cpu_group_insert(erts_cpu_groups_map_t *map,
+		 int logical, int cpu_group)
+{
+    int start = logical % map->size;
+    int ix = start;
+
+    do {
+	if (map->array[ix].logical < 0) {
+	    map->array[ix].logical = logical;
+	    map->array[ix].cpu_group = cpu_group;
+	    return;
+	}
+	ix++;
+	if (ix == map->size)
+	    ix = 0;
+    } while (ix != start);
+
+    erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
+}
+
+
+static int
+sub_levels(erts_cpu_groups_count_t *cgc, int level, int aix,
+	   int avail_sz, erts_avail_cput *avail)
+{
+    int sub_level = level+1;
+    int last = -1;
+    cgc->sub_levels = 0;
+
+    do {
+	if (last != avail[aix].level[sub_level]) {
+	    cgc->sub_levels++;
+	    last = avail[aix].level[sub_level];
+	}
+	aix++;
+    }
+    while (aix < avail_sz && cgc->id == avail[aix].level[level]);
+    cgc->cpu_groups = 0;
+    return aix;
+}
+
+static int
+write_cpu_groups(int *cgp, erts_cpu_groups_count_t *cgcp,
+		    int level, int a,
+		    int avail_sz, erts_avail_cput *avail)
+{
+    int cg = *cgp;
+    int sub_level = level+1;
+    int sl_per_gr = cgcp->sub_levels / cgcp->cpu_groups;
+    int xsl = cgcp->sub_levels % cgcp->cpu_groups;
+    int sls = 0;
+    int last = -1;
+    int xsl_cg_lim = (cgcp->cpu_groups - xsl) + cg + 1;
+
+    ASSERT(level < 0 || avail[a].level[level] == cgcp->id);
+
+    do {
+	if (last != avail[a].level[sub_level]) {
+	    if (!sls) {
+		sls = sl_per_gr;
+		cg++;
+		if (cg >= xsl_cg_lim)
+		    sls++;
+	    }
+	    last = avail[a].level[sub_level];
+	    sls--;
+	}
+	avail[a].level[ERTS_TOPOLOGY_CG] = cg;
+	a++;
+    } while (a < avail_sz && (level < 0
+			      || avail[a].level[level] == cgcp->id));
+
+    ASSERT(cgcp->cpu_groups == cg - *cgp);
+
+    *cgp = cg;
+
+    return a;
+}
+
+static int
+cg_count_sub_levels_compare(const void *vx, const void *vy)
+{
+    erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+    erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+    if (x->sub_levels != y->sub_levels)
+	return y->sub_levels - x->sub_levels;
+    return x->id - y->id;
+}
+
+static int
+cg_count_id_compare(const void *vx, const void *vy)
+{
+    erts_cpu_groups_count_t *x = (erts_cpu_groups_count_t *) vx;
+    erts_cpu_groups_count_t *y = (erts_cpu_groups_count_t *) vy;
+    return x->id - y->id;
+}
+
+static void
+make_cpu_groups_map(erts_cpu_groups_map_t *map, int test)
+{
+    int i, spread_level, avail_sz;
+    erts_avail_cput no, *avail;
+    erts_cpu_topology_t *cpudata;
+    ErtsAlcType_t alc_type = (test
+			      ? ERTS_ALC_T_TMP
+			      : ERTS_ALC_T_CPU_GRPS_MAP);
+
+    if (map->array)
+	erts_free(alc_type, map->array);
+
+    map->array = NULL;
+    map->logical_processors = 0;
+    map->size = 0;
+
+    if (!map->groups)
+	return;
+
+    create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
+
+    if (!cpudata)
+	return;
+
+    cpu_bind_order_sort(cpudata,
+			avail_sz,
+			ERTS_CPU_BIND_NO_SPREAD,
+			1);
+
+    avail = erts_alloc(ERTS_ALC_T_TMP,
+		       sizeof(erts_avail_cput)*avail_sz);
+
+    make_available_cpu_topology(&no, avail, cpudata,
+				&avail_sz, test);
+
+    destroy_tmp_cpu_topology_copy(cpudata);
+
+    map->size = avail_sz*2+1;
+
+    map->array = erts_alloc(alc_type,
+			    (sizeof(erts_cpu_groups_map_array_t)
+			     * map->size));;
+    map->logical_processors = avail_sz;
+
+    for (i = 0; i < map->size; i++) {
+	map->array[i].logical = -1;
+	map->array[i].cpu_group = -1;
+    }
+
+    spread_level = ERTS_TOPOLOGY_CORE;
+    for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
+	if (no.level[i] > map->groups) {
+	    spread_level = i;
+	    break;
+	}
+    }
+
+    if (no.level[spread_level] <= map->groups) {
+	int a, cg, last = -1;
+	cg = -1;
+	ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
+	for (a = 0; a < avail_sz; a++) {
+	    if (last != avail[a].level[spread_level]) {
+		cg++;
+		last = avail[a].level[spread_level];
+	    }
+	    cpu_group_insert(map,
+			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+			     cg);
+	}
+    }
+    else { /* map->groups < no.level[spread_level] */
+	erts_cpu_groups_count_t *cg_count;
+	int a, cg, tl, toplevels;
+
+	tl = spread_level-1;
+
+	if (spread_level == ERTS_TOPOLOGY_NODE)
+	    toplevels = 1;
+	else
+	    toplevels = no.level[tl];
+
+	cg_count = erts_alloc(ERTS_ALC_T_TMP,
+			      toplevels*sizeof(erts_cpu_groups_count_t));
+
+	if (toplevels == 1) {
+	    cg_count[0].id = 0;
+	    cg_count[0].sub_levels = no.level[spread_level];
+	    cg_count[0].cpu_groups = map->groups;
+	}
+	else {
+	    int cgs_per_tl, cgs;
+	    cgs = map->groups;
+	    cgs_per_tl = cgs / toplevels;
+
+	    a = 0;
+	    for (i = 0; i < toplevels; i++) {
+		cg_count[i].id = avail[a].level[tl];
+		a = sub_levels(&cg_count[i], tl, a, avail_sz, avail);
+	    }
+
+	    qsort(cg_count,
+		  toplevels,
+		  sizeof(erts_cpu_groups_count_t),
+		  cg_count_sub_levels_compare);
+
+	    for (i = 0; i < toplevels; i++) {
+		if (cg_count[i].sub_levels < cgs_per_tl) {
+		    cg_count[i].cpu_groups = cg_count[i].sub_levels;
+		    cgs -= cg_count[i].sub_levels;
+		}
+		else {
+		    cg_count[i].cpu_groups = cgs_per_tl;
+		    cgs -= cgs_per_tl;
+		}
+	    }
+
+	    while (cgs > 0) {
+		for (i = 0; i < toplevels; i++) {
+		    if (cg_count[i].sub_levels == cg_count[i].cpu_groups)
+			break;
+		    else {
+			cg_count[i].cpu_groups++;
+			if (--cgs == 0)
+			    break;
+		    }
+		}
+	    }
+
+	    qsort(cg_count,
+		  toplevels,
+		  sizeof(erts_cpu_groups_count_t),
+		  cg_count_id_compare);
+	}
+
+	a = i = 0;
+	cg = -1;
+	while (a < avail_sz) {
+	    a = write_cpu_groups(&cg, &cg_count[i], tl,
+				 a, avail_sz, avail);
+	    i++;
+	}
+
+	ASSERT(map->groups == cg + 1);
+
+	for (a = 0; a < avail_sz; a++)
+	    cpu_group_insert(map,
+			     avail[a].level[ERTS_TOPOLOGY_LOGICAL],
+			     avail[a].level[ERTS_TOPOLOGY_CG]);
+
+	erts_free(ERTS_ALC_T_TMP, cg_count);
+    }
+
+    erts_free(ERTS_ALC_T_TMP, avail);
+}
+
+static erts_cpu_groups_map_t *
+add_cpu_groups(int groups,
+	       erts_cpu_groups_callback_t callback,
+	       void *arg)
+{
+    int use_groups = groups;
+    erts_cpu_groups_callback_list_t *cgcl;
+    erts_cpu_groups_map_t *cgm;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    if (use_groups > max_main_threads)
+	use_groups = max_main_threads;
+
+    if (!use_groups)
+	return NULL;
+
+    no_cpu_groups_callbacks++;
+    cgcl = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+		      sizeof(erts_cpu_groups_callback_list_t));
+    cgcl->callback = callback;
+    cgcl->arg = arg;
+
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	if (cgm->groups == use_groups) {
+	    cgcl->next = cgm->callback_list;
+	    cgm->callback_list = cgcl;
+	    return cgm;
+	}
+    }
+
+
+    cgm = erts_alloc(ERTS_ALC_T_CPU_GRPS_MAP,
+		     sizeof(erts_cpu_groups_map_t));
+    cgm->next = cpu_groups_maps;
+    cgm->groups = use_groups;
+    cgm->array = NULL;
+    cgm->size = 0;
+    cgm->logical_processors = 0;
+    cgm->callback_list = cgcl;
+
+    cgcl->next = NULL;
+
+    make_cpu_groups_map(cgm, 0);
+
+    cpu_groups_maps = cgm;
+
+    return cgm;
+}
+
+static void
+remove_cpu_groups(erts_cpu_groups_callback_t callback, void *arg)
+{
+    erts_cpu_groups_map_t *prev_cgm, *cgm;
+    erts_cpu_groups_callback_list_t *prev_cgcl, *cgcl;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    no_cpu_groups_callbacks--;
+
+    prev_cgm = NULL;
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next) {
+	prev_cgcl = NULL;
+	for (cgcl = cgm->callback_list; cgcl; cgcl = cgcl->next) {
+	    if (cgcl->callback == callback && cgcl->arg == arg) {
+		if (prev_cgcl)
+		    prev_cgcl->next = cgcl->next;
+		else
+		    cgm->callback_list = cgcl->next;
+		erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgcl);
+		if (!cgm->callback_list) {
+		    if (prev_cgm)
+			prev_cgm->next = cgm->next;
+		    else
+			cpu_groups_maps = cgm->next;
+		    if (cgm->array)
+			erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm->array);
+		    erts_free(ERTS_ALC_T_CPU_GRPS_MAP, cgm);
+		}
+		return;
+	    }
+	    prev_cgcl = cgcl;
+	}
+	prev_cgm = cgm;
+    }
+
+    erl_exit(ERTS_ABORT_EXIT, "Cpu groups not found\n");
+}
+
+static int
+cpu_groups_lookup(erts_cpu_groups_map_t *map,
+		  ErtsSchedulerData *esdp)
+{
+    int start, logical, ix;
+
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rlocked(&cpuinfo_rwmtx)
+		       || erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    if (esdp->cpu_id < 0)
+	return (((int) esdp->no) - 1) % map->groups;
+
+    logical = esdp->cpu_id;
+    start = logical % map->size;
+    ix = start;
+
+    do {
+	if (map->array[ix].logical == logical) {
+	    int group = map->array[ix].cpu_group;
+	    ASSERT(0 <= group && group < map->groups);
+	    return group;
+	}
+	ix++;
+	if (ix == map->size)
+	    ix = 0;
+    } while (ix != start);
+
+    erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
+}
+
+static void
+update_cpu_groups_maps(void)
+{
+    erts_cpu_groups_map_t *cgm;
+    ERTS_SMP_LC_ASSERT(erts_lc_rwmtx_is_rwlocked(&cpuinfo_rwmtx));
+
+    for (cgm = cpu_groups_maps; cgm; cgm = cgm->next)
+	make_cpu_groups_map(cgm, 0);
+}
+
+void
+erts_add_cpu_groups(int groups,
+		    erts_cpu_groups_callback_t callback,
+		    void *arg)
+{
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    add_cpu_groups(groups, callback, arg);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
+
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+			    void *arg)
+{
+    erts_smp_rwmtx_rwlock(&cpuinfo_rwmtx);
+    remove_cpu_groups(callback, arg);
+    erts_smp_rwmtx_rwunlock(&cpuinfo_rwmtx);
+}
diff --git a/erts/emulator/beam/erl_cpu_topology.h b/erts/emulator/beam/erl_cpu_topology.h
new file mode 100644
index 0000000000..c5a9520b61
--- /dev/null
+++ b/erts/emulator/beam/erl_cpu_topology.h
@@ -0,0 +1,105 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2010. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+/*
+ * Description:	CPU topology and related functionality
+ *
+ * Author: 	Rickard Green
+ */
+
+#ifndef ERL_CPU_TOPOLOGY_H__
+#define ERL_CPU_TOPOLOGY_H__
+
+void erts_pre_early_init_cpu_topology(int *max_rg_p,
+				      int *conf_p,
+				      int *onln_p,
+				      int *avail_p);
+void erts_early_init_cpu_topology(int no_schedulers,
+				  int *max_main_threads_p,
+				  int max_reader_groups,
+				  int *reader_groups_p);
+void erts_init_cpu_topology(void);
+
+
+#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS		0
+#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED		1
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY	2
+#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE	3
+
+int erts_init_scheduler_bind_type_string(char *how);
+
+
+#define ERTS_INIT_CPU_TOPOLOGY_OK			0
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID		1
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE		2
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY	3
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE		4
+#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES		5
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID		6
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS		7
+#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES	8
+#define ERTS_INIT_CPU_TOPOLOGY_MISSING			9
+
+int erts_init_cpu_topology_string(char *topology_str);
+
+void erts_sched_check_cpu_bind(ErtsSchedulerData *esdp);
+#ifdef ERTS_SMP
+void erts_sched_init_check_cpu_bind(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_prep_suspend(ErtsSchedulerData *esdp);
+void erts_sched_check_cpu_bind_post_suspend(ErtsSchedulerData *esdp);
+#endif
+
+int erts_update_cpu_info(void);
+
+Eterm erts_bind_schedulers(Process *c_p, Eterm how);
+Eterm erts_get_schedulers_binds(Process *c_p);
+
+Eterm erts_get_reader_groups_map(Process *c_p);
+
+Eterm erts_set_cpu_topology(Process *c_p, Eterm term);
+Eterm erts_get_cpu_topology_term(Process *c_p, Eterm which);
+
+int erts_update_cpu_info(void);
+void erts_get_logical_processors(int *conf, int *onln, int *avail);
+
+int erts_sched_bind_atthrcreate_prepare(void);
+int erts_sched_bind_atthrcreate_child(int unbind);
+void erts_sched_bind_atthrcreate_parent(int unbind);
+
+int erts_sched_bind_atfork_prepare(void);
+int erts_sched_bind_atfork_child(int unbind);
+char *erts_sched_bind_atvfork_child(int unbind);
+void erts_sched_bind_atfork_parent(int unbind);
+
+Eterm erts_fake_scheduler_bindings(Process *p, Eterm how);
+Eterm erts_debug_cpu_groups_map(Process *c_p, int groups);
+
+
+typedef void (*erts_cpu_groups_callback_t)(int,
+					   ErtsSchedulerData *,
+					   int,
+					   void *);
+
+void erts_add_cpu_groups(int groups,
+			 erts_cpu_groups_callback_t callback,
+			 void *arg);
+void erts_remove_cpu_groups(erts_cpu_groups_callback_t callback,
+			    void *arg);
+
+#endif
diff --git a/erts/emulator/beam/erl_drv_thread.c b/erts/emulator/beam/erl_drv_thread.c
index d42820ddf3..17b08a71d4 100644
--- a/erts/emulator/beam/erl_drv_thread.c
+++ b/erts/emulator/beam/erl_drv_thread.c
@@ -528,7 +528,7 @@ erl_drv_tsd_get(ErlDrvTSDKey key)
     if (!dtid)
 	return NULL;
 #endif
-    if (ERL_DRV_TSD_LEN__ < key)
+    if (ERL_DRV_TSD_LEN__ <= key)
 	return NULL;
     return ERL_DRV_TSD__[key];
 }
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 36fefc5cba..a2fd5921a2 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -41,6 +41,7 @@
 #include "erl_printf_term.h"
 #include "erl_misc_utils.h"
 #include "packet_parser.h"
+#include "erl_cpu_topology.h"
 
 #ifdef HIPE
 #include "hipe_mode_switch.h"	/* for hipe_mode_switch_init() */
@@ -63,6 +64,8 @@ extern void ConNormalExit(void);
 extern void ConWaitForExit(void);
 #endif
 
+static void erl_init(int ncpu);
+
 #define ERTS_MIN_COMPAT_REL 7
 
 #ifdef ERTS_SMP
@@ -76,9 +79,6 @@ int erts_initialized = 0;
 static erts_tid_t main_thread;
 #endif
 
-erts_cpu_info_t *erts_cpuinfo;
-
-int erts_reader_groups;
 int erts_use_sender_punish;
 
 /*
@@ -111,7 +111,6 @@ int erts_compat_rel;
 static int use_multi_run_queue;
 static int no_schedulers;
 static int no_schedulers_online;
-static int max_reader_groups;
 
 #ifdef DEBUG
 Uint32 verbose;             /* See erl_debug.h for information about verbose */
@@ -230,18 +229,18 @@ void erl_error(char *fmt, va_list args)
     erts_vfprintf(stderr, fmt, args);
 }
 
-static void early_init(int *argc, char **argv);
+static int early_init(int *argc, char **argv);
 
 void
 erts_short_init(void)
 {
-    early_init(NULL, NULL);
-    erl_init();
+    int ncpu = early_init(NULL, NULL);
+    erl_init(ncpu);
     erts_initialized = 1;
 }
 
-void
-erl_init(void)
+static void
+erl_init(int ncpu)
 {
     init_benchmarking();
 
@@ -252,11 +251,11 @@ erl_init(void)
     erts_init_monitors();
     erts_init_gc();
     init_time();
-    erts_init_process();
+    erts_init_process(ncpu);
     erts_init_scheduling(use_multi_run_queue,
 			 no_schedulers,
 			 no_schedulers_online);
-
+    erts_init_cpu_topology(); /* Must be after init_scheduling */
     H_MIN_SIZE      = erts_next_heap_size(H_MIN_SIZE, 0);
     BIN_VH_MIN_SIZE = erts_next_heap_size(BIN_VH_MIN_SIZE, 0);
 
@@ -588,7 +587,7 @@ static void ethr_ll_free(void *ptr)
 
 #endif
 
-static void
+static int
 early_init(int *argc, char **argv) /*
 				   * Only put things here which are
 				   * really important initialize
@@ -601,6 +600,10 @@ early_init(int *argc, char **argv) /*
     int ncpuavail;
     int schdlrs;
     int schdlrs_onln;
+    int max_main_threads;
+    int max_reader_groups;
+    int reader_groups;
+
     use_multi_run_queue = 1;
     erts_printf_eterm_func = erts_printf_term;
     erts_disable_tolerant_timeofday = 0;
@@ -616,13 +619,11 @@ early_init(int *argc, char **argv) /*
 
     erts_use_sender_punish = 1;
 
-    erts_cpuinfo = erts_cpu_info_create();
-
-#ifdef ERTS_SMP
-    ncpu = erts_get_cpu_configured(erts_cpuinfo);
-    ncpuonln = erts_get_cpu_online(erts_cpuinfo);
-    ncpuavail = erts_get_cpu_available(erts_cpuinfo);
-#else
+    erts_pre_early_init_cpu_topology(&max_reader_groups,
+				     &ncpu,
+				     &ncpuonln,
+				     &ncpuavail);
+#ifndef ERTS_SMP
     ncpu = 1;
     ncpuonln = 1;
     ncpuavail = 1;
@@ -665,15 +666,9 @@ early_init(int *argc, char **argv) /*
 			    ? ncpuavail
 			    : (ncpuonln > 0 ? ncpuonln : no_schedulers));
 
-#ifdef ERTS_SMP
-    erts_max_main_threads = no_schedulers_online;
-#endif
-
     schdlrs = no_schedulers;
     schdlrs_onln = no_schedulers_online;
 
-    max_reader_groups = ERTS_MAX_READER_GROUPS;
-
     if (argc && argv) {
 	int i = 1;
 	while (i < *argc) {
@@ -769,9 +764,13 @@ early_init(int *argc, char **argv) /*
 
     erts_alloc_init(argc, argv, &alloc_opts); /* Handles (and removes)
 						 -M flags. */
-
-    erts_early_init_scheduling(); /* Require allocators */
-    erts_init_utils(); /* Require allocators */
+    /* Require allocators */
+    erts_early_init_scheduling();
+    erts_init_utils();
+    erts_early_init_cpu_topology(no_schedulers,
+				 &max_main_threads,
+				 max_reader_groups,
+				 &reader_groups);
 
 #ifdef USE_THREADS
     {
@@ -785,24 +784,13 @@ early_init(int *argc, char **argv) /*
 	elid.mem.ll.alloc = ethr_ll_alloc;
 	elid.mem.ll.realloc = ethr_ll_realloc;
 	elid.mem.ll.free = ethr_ll_free;
-
-#ifdef ERTS_SMP
-	elid.main_threads = erts_max_main_threads;
-#else
-	elid.main_threads = 1;
-#endif
-	elid.reader_groups = (elid.main_threads > 1
-			      ? elid.main_threads
-			      : 0);
-	if (max_reader_groups <= 1)
-	    elid.reader_groups = 0;
-	if (elid.reader_groups > max_reader_groups)
-	    elid.reader_groups = max_reader_groups;
-	erts_reader_groups = elid.reader_groups;
+	elid.main_threads = max_main_threads;
+	elid.reader_groups = reader_groups;
 
 	erts_thr_late_init(&elid);
     }
 #endif
+
 #ifdef ERTS_ENABLE_LOCK_CHECK
     erts_lc_late_init();
 #endif
@@ -820,6 +808,8 @@ early_init(int *argc, char **argv) /*
 
     erts_ets_realloc_always_moves = 0;
     erts_dist_buf_busy_limit = ERTS_DE_BUSY_LIMIT;
+
+    return ncpu;
 }
 
 #ifndef ERTS_SMP
@@ -853,8 +843,7 @@ erl_start(int argc, char **argv)
     char envbuf[21]; /* enough for any 64-bit integer */
     size_t envbufsz;
     int async_max_threads = erts_async_max_threads;
-
-    early_init(&argc, argv);
+    int ncpu = early_init(&argc, argv);
 
     envbufsz = sizeof(envbuf);
     if (erts_sys_getenv(ERL_MAX_ETS_TABLES_ENV, envbuf, &envbufsz) == 0)
@@ -1111,7 +1100,7 @@ erl_start(int argc, char **argv)
 	    char *sub_param = argv[i]+2;
 	    if (has_prefix("bt", sub_param)) {
 		arg = get_arg(sub_param+2, argv[i+1], &i);
-		res = erts_init_scheduler_bind_type(arg);
+		res = erts_init_scheduler_bind_type_string(arg);
 		if (res != ERTS_INIT_SCHED_BIND_TYPE_SUCCESS) {
 		    switch (res) {
 		    case ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED:
@@ -1136,7 +1125,7 @@ erl_start(int argc, char **argv)
 	    }
 	    else if (has_prefix("ct", sub_param)) {
 		arg = get_arg(sub_param+2, argv[i+1], &i);
-		res = erts_init_cpu_topology(arg);
+		res = erts_init_cpu_topology_string(arg);
 		if (res != ERTS_INIT_CPU_TOPOLOGY_OK) {
 		    switch (res) {
 		    case ERTS_INIT_CPU_TOPOLOGY_INVALID_ID:
@@ -1407,7 +1396,7 @@ erl_start(int argc, char **argv)
     boot_argc = argc - i;  /* Number of arguments to init */
     boot_argv = &argv[i];
 
-    erl_init();
+    erl_init(ncpu);
 
     init_shared_memory(boot_argc, boot_argv);
     load_preloaded();
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index d6138fa4e4..04c7dbd2ec 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -128,8 +128,8 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {   "removed_fd_pre_alloc_lock",            NULL                    },
     {   "state_prealloc",                       NULL                    },
     {	"schdlr_sspnd",				NULL			},
-    {	"cpu_bind",				NULL			},
     {	"run_queue",				"address"		},
+    {	"cpu_info",				NULL			},
     {	"pollset",				"address"		},
 #ifdef __WIN32__
     {	"pollwaiter",				"address"		},
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index d0b08bf72e..8cdda395df 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -107,7 +107,7 @@ dist_table_alloc(void *dep_tmpl)
     dep->nlinks				= NULL;
     dep->monitors			= NULL;
 
-    erts_smp_spinlock_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr);
+    erts_smp_mtx_init_x(&dep->qlock, "dist_entry_out_queue", chnl_nr);
     dep->qflgs				= 0;
     dep->qsize				= 0;
     dep->out_queue.first		= NULL;
@@ -172,7 +172,7 @@ dist_table_free(void *vdep)
     ASSERT(!dep->cache);
     erts_smp_rwmtx_destroy(&dep->rwmtx);
     erts_smp_mtx_destroy(&dep->lnk_mtx);
-    erts_smp_spinlock_destroy(&dep->qlock);
+    erts_smp_mtx_destroy(&dep->qlock);
 
 #ifdef DEBUG
     sys_memset(vdep, 0x77, sizeof(DistEntry));
@@ -755,9 +755,9 @@ void erts_init_node_tables(void)
     erts_this_dist_entry->nlinks			= NULL;
     erts_this_dist_entry->monitors			= NULL;
 
-    erts_smp_spinlock_init_x(&erts_this_dist_entry->qlock,
-			     "dist_entry_out_queue",
-			     make_small(ERST_INTERNAL_CHANNEL_NO));
+    erts_smp_mtx_init_x(&erts_this_dist_entry->qlock,
+			"dist_entry_out_queue",
+			make_small(ERST_INTERNAL_CHANNEL_NO));
     erts_this_dist_entry->qflgs				= 0;
     erts_this_dist_entry->qsize				= 0;
     erts_this_dist_entry->out_queue.first		= NULL;
diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h
index eb759b87e9..b0a63ae035 100644
--- a/erts/emulator/beam/erl_node_tables.h
+++ b/erts/emulator/beam/erl_node_tables.h
@@ -131,7 +131,7 @@ typedef struct dist_entry_ {
     ErtsLink *nlinks;           /* Link tree with subtrees */
     ErtsMonitor *monitors;      /* Monitor tree */
 
-    erts_smp_spinlock_t qlock;  /* Protects qflgs and out_queue */
+    erts_smp_mtx_t qlock;       /* Protects qflgs and out_queue */
     Uint32 qflgs;
     Sint qsize;
     ErtsDistOutputQueue out_queue;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index b47ce97c46..f252c2cbe2 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -24,7 +24,6 @@
 #endif
 
 #include <stddef.h> /* offsetof() */
-#include <ctype.h>
 #include "sys.h"
 #include "erl_vm.h"
 #include "global.h"
@@ -39,6 +38,7 @@
 #include "erl_threads.h"
 #include "erl_binary.h"
 #include "beam_bp.h"
+#include "erl_cpu_topology.h"
 
 #define ERTS_RUNQ_CHECK_BALANCE_REDS_PER_SCHED (2000*CONTEXT_REDS)
 #define ERTS_RUNQ_CALL_CHECK_BALANCE_REDS \
@@ -63,8 +63,6 @@
 #define ERTS_WAKEUP_OTHER_DEC 10
 #define ERTS_WAKEUP_OTHER_FIXED_INC (CONTEXT_REDS/10)
 
-#define ERTS_MAX_CPU_TOPOLOGY_ID ((int) 0xffff)
-
 #if 0 || defined(DEBUG)
 #define ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
 #endif
@@ -119,10 +117,6 @@ Uint erts_process_tab_index_mask;
 
 static int wakeup_other_limit;
 
-#ifdef ERTS_SMP
-Uint erts_max_main_threads;
-#endif
-
 int erts_sched_thread_suggested_stack_size = -1;
 
 #ifdef ERTS_ENABLE_LOCK_CHECK
@@ -195,48 +189,6 @@ do {							\
 
 #endif
 
-/*
- * Cpu topology hierarchy.
- */
-#define ERTS_TOPOLOGY_NODE		0
-#define ERTS_TOPOLOGY_PROCESSOR		1
-#define ERTS_TOPOLOGY_PROCESSOR_NODE	2
-#define ERTS_TOPOLOGY_CORE		3
-#define ERTS_TOPOLOGY_THREAD		4
-#define ERTS_TOPOLOGY_LOGICAL		5
-
-#define ERTS_TOPOLOGY_MAX_DEPTH		6
-
-typedef struct {
-    int bind_id;
-    int bound_id;
-} ErtsCpuBindData;
-
-static ErtsCpuBindData *scheduler2cpu_map;
-erts_smp_rwmtx_t erts_cpu_bind_rwmtx;
-
-typedef enum {
-    ERTS_CPU_BIND_UNDEFINED,
-    ERTS_CPU_BIND_SPREAD,
-    ERTS_CPU_BIND_PROCESSOR_SPREAD,
-    ERTS_CPU_BIND_THREAD_SPREAD,
-    ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD,
-    ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD,
-    ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD,
-    ERTS_CPU_BIND_NO_SPREAD,
-    ERTS_CPU_BIND_NONE
-} ErtsCpuBindOrder;
-
-#define ERTS_CPU_BIND_DEFAULT_BIND \
-  ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
-
-ErtsCpuBindOrder cpu_bind_order;
-
-static erts_cpu_topology_t *user_cpudata;
-static int user_cpudata_size;
-static erts_cpu_topology_t *system_cpudata;
-static int system_cpudata_size;
-
 erts_sched_stat_t erts_sched_stat;
 
 ErtsRunQueue *erts_common_run_queue;
@@ -259,11 +211,6 @@ ErtsSchedulerData *erts_scheduler_data;
 ErtsAlignedRunQueue *erts_aligned_run_queues;
 Uint erts_no_run_queues;
 
-typedef union {
-    ErtsSchedulerData esd;
-    char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
-} ErtsAlignedSchedulerData;
-
 ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
 
 #ifdef ERTS_SMP
@@ -334,12 +281,6 @@ ERTS_SCHED_PREF_QUICK_ALLOC_IMPL(proclist,
 				 200,
 				 ERTS_ALC_T_PROC_LIST)
 
-#define ERTS_RUNQ_IX(IX)						\
-  (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues),			\
-   &erts_aligned_run_queues[(IX)].runq)
-#define ERTS_SCHEDULER_IX(IX)						\
-  (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers),			\
-   &erts_aligned_scheduler_data[(IX)].esd)
 #define ERTS_SCHED_SLEEP_INFO_IX(IX)					\
   (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers),			\
    &aligned_sched_sleep_info[(IX)].ssi)
@@ -398,22 +339,8 @@ static int stack_element_dump(int to, void *to_arg, Process* p, Eterm* sp,
 #ifdef ERTS_SMP
 static void handle_pending_exiters(ErtsProcList *);
 
-static void cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
-				int size,
-				ErtsCpuBindOrder bind_order,
-				int mk_seq);
-static void signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size);
-
 #endif
 
-static int reader_group_lookup(int logical);
-static void create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata,
-					 int *cpudata_size);
-static void destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata);
-
-static void early_cpu_bind_init(void);
-static void late_cpu_bind_init(void);
-
 #if defined(ERTS_SMP) && defined(ERTS_ENABLE_LOCK_CHECK)
 int
 erts_smp_lc_runq_is_locked(ErtsRunQueue *runq)
@@ -469,13 +396,13 @@ erts_pre_init_process(void)
 
 /* initialize the scheduler */
 void
-erts_init_process(void)
+erts_init_process(int ncpu)
 {
     Uint proc_bits = ERTS_PROC_BITS;
 
 #ifdef ERTS_SMP
     erts_disable_proc_not_running_opt = 0;
-    erts_init_proc_lock();
+    erts_init_proc_lock(ncpu);
 #endif
 
     init_proclist_alloc();
@@ -1060,6 +987,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 
 	sys_poll_aux_work:
 
+	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
 	    erl_sys_schedule(1); /* Might give us something to do */
 
 	    dt = do_time_read_and_reset();
@@ -1155,6 +1084,8 @@ scheduler_wait(long *fcalls, ErtsSchedulerData *esdp, ErtsRunQueue *rq)
 
 	    erts_smp_runq_unlock(rq);
 
+	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
 	    erl_sys_schedule(0);
 
 	    dt = do_time_read_and_reset();
@@ -1242,7 +1173,7 @@ wake_scheduler(ErtsRunQueue *rq, int incq, int one)
 	do {
 	    ErtsSchedulerSleepInfo *wake_ssi = ssi;
 	    ssi = ssi->next;
-	    erts_sched_finish_poke(ssi, ssi_flags_set_wake(wake_ssi));
+	    erts_sched_finish_poke(wake_ssi, ssi_flags_set_wake(wake_ssi));
 	} while (ssi);
     }
 }
@@ -1335,6 +1266,31 @@ erts_smp_notify_inc_runq(ErtsRunQueue *runq)
     smp_notify_inc_runq(runq);
 }
 
+void
+erts_sched_notify_check_cpu_bind(void)
+{
+#ifdef ERTS_SMP
+    int ix;
+    if (erts_common_run_queue) {
+	for (ix = 0; ix < erts_no_schedulers; ix++)
+	    erts_smp_atomic_set(&ERTS_SCHEDULER_IX(ix)->chk_cpu_bind, 1);
+	wake_all_schedulers();
+    }
+    else {
+	for (ix = 0; ix < erts_no_run_queues; ix++) {
+	    ErtsRunQueue *rq = ERTS_RUNQ_IX(ix);
+	    erts_smp_runq_lock(rq);
+	    rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+	    erts_smp_runq_unlock(rq);
+	    wake_scheduler(rq, 0, 1);
+	};
+    }
+#else
+    erts_sched_check_cpu_bind(erts_get_scheduler_data());
+#endif
+}
+
+
 #ifdef ERTS_SMP
 
 ErtsRunQueue *
@@ -2379,7 +2335,6 @@ erts_debug_nbalance(void)
 void
 erts_early_init_scheduling(void)
 {
-    early_cpu_bind_init();
     wakeup_other_limit = ERTS_WAKEUP_OTHER_LIMIT_MEDIUM;
 }
 
@@ -2656,8 +2611,6 @@ erts_init_scheduling(int mrq, int no_schedulers, int no_schedulers_online)
 
     /* init port tasks */
     erts_port_task_init();
-
-    late_cpu_bind_init();
 }
 
 ErtsRunQueue *
@@ -2883,12 +2836,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
     long flgs;
     int changing;
     long no = (long) esdp->no;
-    ErtsRunQueue *rq = esdp->run_queue;
     ErtsSchedulerSleepInfo *ssi = esdp->ssi;
     long active_schedulers;
     int curr_online = 1;
     int wake = 0;
-    int reset_read_group = 0;
 #if defined(ERTS_SCHED_NEED_NONBLOCKABLE_AUX_WORK) \
     || defined(ERTS_SCHED_NEED_BLOCKABLE_AUX_WORK)
     long aux_work;
@@ -2909,20 +2860,7 @@ suspend_scheduler(ErtsSchedulerData *esdp)
 
     erts_smp_runq_unlock(esdp->run_queue);
 
-    /* Unbind from cpu */
-    erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-    if (scheduler2cpu_map[esdp->no].bound_id >= 0
-	&& erts_unbind_from_cpu(erts_cpuinfo) == 0) {
-	esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
-	reset_read_group = 1;
-    }
-    erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
-    if (reset_read_group)
-	erts_smp_rwmtx_set_reader_group(0);
-
-    if (esdp->no <= erts_max_main_threads)
-	erts_thr_set_main_status(0, 0);
+    erts_sched_check_cpu_bind_prep_suspend(esdp);
 
     if (erts_system_profile_flags.scheduler)
     	profile_scheduler(make_small(esdp->no), am_inactive);
@@ -3056,17 +2994,10 @@ suspend_scheduler(ErtsSchedulerData *esdp)
     if (erts_system_profile_flags.scheduler)
     	profile_scheduler(make_small(esdp->no), am_active);
 
-    if (esdp->no <= erts_max_main_threads)
-	erts_thr_set_main_status(1, (int) esdp->no);
-
     erts_smp_runq_lock(esdp->run_queue);
     non_empty_runq(esdp->run_queue);
 
-    /* Make sure we check if we should bind to a cpu or not... */
-    if (rq->flags & ERTS_RUNQ_FLG_SHARED_RUNQ)
-	erts_smp_atomic_set(&esdp->chk_cpu_bind, 1);
-    else
-	rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
+    erts_sched_check_cpu_bind_post_suspend(esdp);
 }
 
 #define ERTS_RUNQ_RESET_SUSPEND_INFO(RQ, DBG_ID)			\
@@ -3583,15 +3514,7 @@ sched_thread_func(void *vesdp)
     erts_tsd_set(sched_data_key, vesdp);
 #ifdef ERTS_SMP
 
-    if (no <= erts_max_main_threads) {
-	erts_thr_set_main_status(1, (int) no);
-	if (erts_reader_groups) {
-	    int rg = (int) no;
-	    if (rg > erts_reader_groups)
-		rg = (((int) no) - 1) % erts_reader_groups + 1;
-	    erts_smp_rwmtx_set_reader_group(rg);
-	}
-    }
+    erts_sched_init_check_cpu_bind((ErtsSchedulerData *) vesdp);
 
     erts_proc_lock_prepare_proc_lock_waiter();
     ERTS_SCHED_SLEEP_INFO_IX(no - 1)->event = erts_tse_fetch();
@@ -3693,1907 +3616,6 @@ erts_start_schedulers(void)
 
 #endif /* ERTS_SMP */
 
-static int
-int_cmp(const void *vx, const void *vy)
-{
-    return *((int *) vx) - *((int *) vy);
-}
-
-static int
-cpu_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->processor_node != y->processor_node)
-	return x->processor_node - y->processor_node;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    if (x->node != y->node)
-	return x->node - y->node;
-    return 0;
-}
-
-static int
-cpu_processor_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->processor_node != y->processor_node)
-	return x->processor_node - y->processor_node;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    return 0;
-}
-
-static int
-cpu_thread_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    if (x->processor_node != y->processor_node)
-	return x->processor_node - y->processor_node;
-    if (x->core != y->core)
-	return x->core - y->core;
-    return 0;
-}
-
-static int
-cpu_thread_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    return 0;
-}
-
-static int
-cpu_no_node_processor_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    return 0;
-}
-
-static int
-cpu_no_node_thread_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    if (x->core != y->core)
-	return x->core - y->core;
-    return 0;
-}
-
-static int
-cpu_no_spread_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    if (x->processor_node != y->processor_node)
-	return x->processor_node - y->processor_node;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    return 0;
-}
-
-static ERTS_INLINE void
-make_cpudata_id_seq(erts_cpu_topology_t *cpudata, int size, int no_node)
-{
-    int ix;
-    int node = -1;
-    int processor = -1;
-    int processor_node = -1;
-    int processor_node_node = -1;
-    int core = -1;
-    int thread = -1;
-    int old_node = -1;
-    int old_processor = -1;
-    int old_processor_node = -1;
-    int old_core = -1;
-    int old_thread = -1;
-
-    for (ix = 0; ix < size; ix++) {
-	if (!no_node || cpudata[ix].node >= 0) {
-	    if (old_node == cpudata[ix].node)
-		cpudata[ix].node = node;
-	    else {
-		old_node = cpudata[ix].node;
-		old_processor = processor = -1;
-		if (!no_node)
-		    old_processor_node = processor_node = -1;
-		old_core = core = -1;
-		old_thread = thread = -1;
-		if (no_node || cpudata[ix].node >= 0)
-		    cpudata[ix].node = ++node;
-	    }
-	}
-	if (old_processor == cpudata[ix].processor)
-	    cpudata[ix].processor = processor;
-	else {
-	    old_processor = cpudata[ix].processor;
-	    if (!no_node)
-		processor_node_node = old_processor_node = processor_node = -1;
-	    old_core = core = -1;
-	    old_thread = thread = -1;
-	    cpudata[ix].processor = ++processor;
-	}
-	if (no_node && cpudata[ix].processor_node < 0)
-	    old_processor_node = -1;
-	else {
-	    if (old_processor_node == cpudata[ix].processor_node) {
-		if (no_node)
-		    cpudata[ix].node = cpudata[ix].processor_node = node;
-		else {
-		    if (processor_node_node >= 0)
-			cpudata[ix].node = processor_node_node;
-		    cpudata[ix].processor_node = processor_node;
-		}
-	    }
-	    else {
-		old_processor_node = cpudata[ix].processor_node;
-		old_core = core = -1;
-		old_thread = thread = -1;
-		if (no_node)
-		    cpudata[ix].node = cpudata[ix].processor_node = ++node;
-		else {
-		    cpudata[ix].node = processor_node_node = ++node;
-		    cpudata[ix].processor_node = ++processor_node;
-		}
-	    }
-	}
-	if (!no_node && cpudata[ix].processor_node < 0)
-	    cpudata[ix].processor_node = 0;
-	if (old_core == cpudata[ix].core)
-	    cpudata[ix].core = core;
-	else {
-	    old_core = cpudata[ix].core;
-	    old_thread = thread = -1;
-	    cpudata[ix].core = ++core;
-	}
-	if (old_thread == cpudata[ix].thread)
-	    cpudata[ix].thread = thread;
-	else
-	    old_thread = cpudata[ix].thread = ++thread;
-    }
-}
-
-static void
-cpu_bind_order_sort(erts_cpu_topology_t *cpudata,
-		    int size,
-		    ErtsCpuBindOrder bind_order,
-		    int mk_seq)
-{
-    if (size > 1) {
-	int no_node = 0;
-	int (*cmp_func)(const void *, const void *);
-	switch (bind_order) {
-	case ERTS_CPU_BIND_SPREAD:
-	    cmp_func = cpu_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_PROCESSOR_SPREAD:
-	    cmp_func = cpu_processor_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_THREAD_SPREAD:
-	    cmp_func = cpu_thread_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD:
-	    no_node = 1;
-	    cmp_func = cpu_thread_no_node_processor_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD:
-	    no_node = 1;
-	    cmp_func = cpu_no_node_processor_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD:
-	    no_node = 1;
-	    cmp_func = cpu_no_node_thread_spread_order_cmp;
-	    break;
-	case ERTS_CPU_BIND_NO_SPREAD:
-	    cmp_func = cpu_no_spread_order_cmp;
-	    break;
-	default:
-	    cmp_func = NULL;
-	    erl_exit(ERTS_ABORT_EXIT,
-		     "Bad cpu bind type: %d\n",
-		     (int) cpu_bind_order);
-	    break;
-	}
-
-	if (mk_seq)
-	    make_cpudata_id_seq(cpudata, size, no_node);
-
-	qsort(cpudata, size, sizeof(erts_cpu_topology_t), cmp_func);
-    }
-}
-
-static int
-processor_order_cmp(const void *vx, const void *vy)
-{
-    erts_cpu_topology_t *x = (erts_cpu_topology_t *) vx;
-    erts_cpu_topology_t *y = (erts_cpu_topology_t *) vy;
-
-    if (x->processor != y->processor)
-	return x->processor - y->processor;
-    if (x->node != y->node)
-	return x->node - y->node;
-    if (x->processor_node != y->processor_node)
-	return x->processor_node - y->processor_node;
-    if (x->core != y->core)
-	return x->core - y->core;
-    if (x->thread != y->thread)
-	return x->thread - y->thread;
-    return 0;
-}
-
-static void
-check_cpu_bind(ErtsSchedulerData *esdp)
-{
-    int rg = 0;
-    int res;
-    int cpu_id;
-    erts_smp_runq_unlock(esdp->run_queue);
-    erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-    cpu_id = scheduler2cpu_map[esdp->no].bind_id;
-    if (cpu_id >= 0 && cpu_id != scheduler2cpu_map[esdp->no].bound_id) {
-	res = erts_bind_to_cpu(erts_cpuinfo, cpu_id);
-	if (res == 0)
-	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = cpu_id;
-	else {
-	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
-	    erts_dsprintf(dsbufp, "Scheduler %d failed to bind to cpu %d: %s\n",
-			  (int) esdp->no, cpu_id, erl_errno_id(-res));
-	    erts_send_error_to_logger_nogl(dsbufp);
-	    if (scheduler2cpu_map[esdp->no].bound_id >= 0)
-		goto unbind;
-	}
-    }
-    else if (cpu_id < 0) {
-    unbind:
-	/* Get rid of old binding */
-	res = erts_unbind_from_cpu(erts_cpuinfo);
-	if (res == 0)
-	    esdp->cpu_id = scheduler2cpu_map[esdp->no].bound_id = -1;
-	else if (res != -ENOTSUP) {
-	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
-	    erts_dsprintf(dsbufp, "Scheduler %d failed to unbind from cpu %d: %s\n",
-			  (int) esdp->no, cpu_id, erl_errno_id(-res));
-	    erts_send_error_to_logger_nogl(dsbufp);
-	}
-    }
-    if (erts_reader_groups) {
-	if (esdp->cpu_id >= 0)
-	    rg = reader_group_lookup(esdp->cpu_id);
-	else
-	    rg = (((int) esdp->no) - 1) % erts_reader_groups + 1;
-    }
-    erts_smp_runq_lock(esdp->run_queue);
-#ifdef ERTS_SMP
-    if (erts_common_run_queue)
-	erts_smp_atomic_set(&esdp->chk_cpu_bind, 0);
-    else {
-	esdp->run_queue->flags &= ~ERTS_RUNQ_FLG_CHK_CPU_BIND;
-    }
-#endif
-    erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
-    if (erts_reader_groups)
-	erts_smp_rwmtx_set_reader_group(rg);
-}
-
-static void
-signal_schedulers_bind_change(erts_cpu_topology_t *cpudata, int size)
-{
-    int s_ix = 1;
-    int cpu_ix;
-
-    if (cpu_bind_order != ERTS_CPU_BIND_NONE && size) {
-
-	cpu_bind_order_sort(cpudata, size, cpu_bind_order, 1);
-
-	for (cpu_ix = 0; cpu_ix < size && cpu_ix < erts_no_schedulers; cpu_ix++)
-	    if (erts_is_cpu_available(erts_cpuinfo, cpudata[cpu_ix].logical))
-		scheduler2cpu_map[s_ix++].bind_id = cpudata[cpu_ix].logical;
-    }
-
-    if (s_ix <= erts_no_schedulers)
-	for (; s_ix <= erts_no_schedulers; s_ix++)
-	    scheduler2cpu_map[s_ix].bind_id = -1;
-
-#ifdef ERTS_SMP
-    if (erts_common_run_queue) {
-	for (s_ix = 0; s_ix < erts_no_schedulers; s_ix++)
-	    erts_smp_atomic_set(&ERTS_SCHEDULER_IX(s_ix)->chk_cpu_bind, 1);
-	wake_all_schedulers();
-    }
-    else {
-	for (s_ix = 0; s_ix < erts_no_run_queues; s_ix++) {
-	    ErtsRunQueue *rq = ERTS_RUNQ_IX(s_ix);
-	    erts_smp_runq_lock(rq);
-	    rq->flags |= ERTS_RUNQ_FLG_CHK_CPU_BIND;
-	    erts_smp_runq_unlock(rq);
-	    wake_scheduler(rq, 0, 1);
-	};
-    }
-#else
-    check_cpu_bind(erts_get_scheduler_data());
-#endif
-}
-
-int
-erts_init_scheduler_bind_type(char *how)
-{
-    if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP)
-	return ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED;
-
-    if (!system_cpudata && !user_cpudata)
-	return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY;
-
-    if (sys_strcmp(how, "db") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
-    else if (sys_strcmp(how, "s") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_SPREAD;
-    else if (sys_strcmp(how, "ps") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
-    else if (sys_strcmp(how, "ts") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
-    else if (sys_strcmp(how, "tnnps") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
-    else if (sys_strcmp(how, "nnps") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
-    else if (sys_strcmp(how, "nnts") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
-    else if (sys_strcmp(how, "ns") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
-    else if (sys_strcmp(how, "u") == 0)
-	cpu_bind_order = ERTS_CPU_BIND_NONE;
-    else
-	return ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE;
-
-    return ERTS_INIT_SCHED_BIND_TYPE_SUCCESS;
-}
-
-/*
- * reader groups map
- */
-
-typedef struct {
-    int level[ERTS_TOPOLOGY_MAX_DEPTH+1];
-} erts_avail_cput;
-
-typedef struct {
-    int *map;
-    int size;
-    int groups;
-} erts_reader_groups_map_test;
-
-typedef struct {
-    int id;
-    int sub_levels;
-    int reader_groups;
-} erts_rg_count_t;
-
-typedef struct {
-    int logical;
-    int reader_group;
-} erts_reader_groups_map_t;
-
-typedef struct {
-    erts_reader_groups_map_t *map;
-    int map_size;
-    int logical_processors;
-    int groups;
-} erts_make_reader_groups_map_test;
-
-static int reader_groups_available_cpu_check;
-static int reader_groups_logical_processors;
-static int reader_groups_map_size;
-static erts_reader_groups_map_t *reader_groups_map;
-
-#define ERTS_TOPOLOGY_RG ERTS_TOPOLOGY_MAX_DEPTH
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test);
-
-static Eterm
-get_reader_groups_map(Process *c_p,
-		      erts_reader_groups_map_t *map,
-		      int map_size,
-		      int logical_processors)
-{
-#ifdef DEBUG
-    Eterm *endp;
-#endif
-    Eterm res = NIL, tuple;
-    Eterm *hp;
-    int i;
-
-    hp = HAlloc(c_p, logical_processors*(2+3));
-#ifdef DEBUG
-    endp = hp + logical_processors*(2+3);
-#endif
-    for (i = map_size - 1; i >= 0; i--) {
-	if (map[i].logical >= 0) {
-	    tuple = TUPLE2(hp,
-			   make_small(map[i].logical),
-			   make_small(map[i].reader_group));
-	    hp += 3;
-	    res = CONS(hp, tuple, res);
-	    hp += 2;
-	}
-    }
-    ASSERT(hp == endp);
-    return res;
-}
-
-Eterm
-erts_debug_reader_groups_map(Process *c_p, int groups)
-{
-    Eterm res;
-    erts_make_reader_groups_map_test test;
-
-    test.groups = groups;
-    make_reader_groups_map(&test);
-    if (!test.map)
-	res = NIL;
-    else {
-	res = get_reader_groups_map(c_p,
-				    test.map,
-				    test.map_size,
-				    test.logical_processors);
-	erts_free(ERTS_ALC_T_TMP, test.map);
-    }
-    return res;
-}
-
-
-Eterm
-erts_get_reader_groups_map(Process *c_p)
-{
-    Eterm res;
-    erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-    res = get_reader_groups_map(c_p,
-				reader_groups_map,
-				reader_groups_map_size,
-				reader_groups_logical_processors);
-    erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-    return res;
-}
-
-static void
-make_available_cpu_topology(erts_avail_cput *no,
-			    erts_avail_cput *avail,
-			    erts_cpu_topology_t *cpudata,
-			    int *size,
-			    int test)
-{
-    int len = *size;
-    erts_cpu_topology_t last;
-    int a, i, j;
-
-    no->level[ERTS_TOPOLOGY_NODE] = -1;
-    no->level[ERTS_TOPOLOGY_PROCESSOR] = -1;
-    no->level[ERTS_TOPOLOGY_PROCESSOR_NODE] = -1;
-    no->level[ERTS_TOPOLOGY_CORE] = -1;
-    no->level[ERTS_TOPOLOGY_THREAD] = -1;
-    no->level[ERTS_TOPOLOGY_LOGICAL] = -1;
-
-    last.node = INT_MIN;
-    last.processor = INT_MIN;
-    last.processor_node = INT_MIN;
-    last.core = INT_MIN;
-    last.thread = INT_MIN;
-    last.logical = INT_MIN;
-
-    a = 0;
-
-    for (i = 0; i < len; i++) {
-
-	if (!test && !erts_is_cpu_available(erts_cpuinfo, cpudata[i].logical))
-	    continue;
-
-	if (last.node != cpudata[i].node)
-	    goto node;
-	if (last.processor != cpudata[i].processor)
-	    goto processor;
-	if (last.processor_node != cpudata[i].processor_node)
-	    goto processor_node;
-	if (last.core != cpudata[i].core)
-	    goto core;
-	ASSERT(last.thread != cpudata[i].thread);
-	goto thread;
-
-    node:
-	no->level[ERTS_TOPOLOGY_NODE]++;
-    processor:
-	no->level[ERTS_TOPOLOGY_PROCESSOR]++;
-    processor_node:
-	no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
-    core:
-	no->level[ERTS_TOPOLOGY_CORE]++;
-    thread:
-	no->level[ERTS_TOPOLOGY_THREAD]++;
-
-	no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
-	for (j = 0; j < ERTS_TOPOLOGY_LOGICAL; j++)
-	    avail[a].level[j] = no->level[j];
-
-	avail[a].level[ERTS_TOPOLOGY_LOGICAL] = cpudata[i].logical;
-	avail[a].level[ERTS_TOPOLOGY_RG] = 0;
-
-	ASSERT(last.logical != cpudata[a].logical);
-
-	last = cpudata[i];
-	a++;
-    }
-
-    no->level[ERTS_TOPOLOGY_NODE]++;
-    no->level[ERTS_TOPOLOGY_PROCESSOR]++;
-    no->level[ERTS_TOPOLOGY_PROCESSOR_NODE]++;
-    no->level[ERTS_TOPOLOGY_CORE]++;
-    no->level[ERTS_TOPOLOGY_THREAD]++;
-    no->level[ERTS_TOPOLOGY_LOGICAL]++;
-
-    *size = a;
-}
-
-static int
-reader_group_lookup(int logical)
-{
-    int start = logical % reader_groups_map_size;
-    int ix = start;
-
-    do {
-	if (reader_groups_map[ix].logical == logical) {
-	    ASSERT(reader_groups_map[ix].reader_group > 0);
-	    return reader_groups_map[ix].reader_group;
-	}
-	ix++;
-	if (ix == reader_groups_map_size)
-	    ix = 0;
-    } while (ix != start);
-
-    erl_exit(ERTS_ABORT_EXIT, "Logical cpu id %d not found\n", logical);
-}
-
-static void
-reader_group_insert(erts_reader_groups_map_t *map, int map_size,
-		    int logical, int reader_group)
-{
-    int start = logical % map_size;
-    int ix = start;
-
-    do {
-	if (map[ix].logical < 0) {
-	    map[ix].logical = logical;
-	    map[ix].reader_group = reader_group;
-	    return;
-	}
-	ix++;
-	if (ix == map_size)
-	    ix = 0;
-    } while (ix != start);
-
-    erl_exit(ERTS_ABORT_EXIT, "Reader groups map full\n");
-}
-
-
-static int
-sub_levels(erts_rg_count_t *rgc, int level, int aix, int avail_sz, erts_avail_cput *avail)
-{
-    int sub_level = level+1;
-    int last = -1;
-    rgc->sub_levels = 0;
-
-    do {
-	if (last != avail[aix].level[sub_level]) {
-	    rgc->sub_levels++;
-	    last = avail[aix].level[sub_level];
-	}
-	aix++;
-    }
-    while (aix < avail_sz && rgc->id == avail[aix].level[level]);
-    rgc->reader_groups = 0;
-    return aix;
-}
-
-static int
-write_reader_groups(int *rgp, erts_rg_count_t *rgcp,
-		    int level, int a,
-		    int avail_sz, erts_avail_cput *avail)
-{
-    int rg = *rgp;
-    int sub_level = level+1;
-    int sl_per_gr = rgcp->sub_levels / rgcp->reader_groups;
-    int xsl = rgcp->sub_levels % rgcp->reader_groups;
-    int sls = 0;
-    int last = -1;
-    int xsl_rg_lim = (rgcp->reader_groups - xsl) + rg + 1;
-
-    ASSERT(level < 0 || avail[a].level[level] == rgcp->id)
-
-    do {
-	if (last != avail[a].level[sub_level]) {
-	    if (!sls) {
-		sls = sl_per_gr;
-		rg++;
-		if (rg >= xsl_rg_lim)
-		    sls++;
-	    }
-	    last = avail[a].level[sub_level];
-	    sls--;
-	}
-	avail[a].level[ERTS_TOPOLOGY_RG] = rg;
-	a++;
-    } while (a < avail_sz && (level < 0
-			      || avail[a].level[level] == rgcp->id));
-
-    ASSERT(rgcp->reader_groups == rg - *rgp);
-
-    *rgp = rg;
-
-    return a;
-}
-
-static int
-rg_count_sub_levels_compare(const void *vx, const void *vy)
-{
-    erts_rg_count_t *x = (erts_rg_count_t *) vx;
-    erts_rg_count_t *y = (erts_rg_count_t *) vy;
-    if (x->sub_levels != y->sub_levels)
-	return y->sub_levels - x->sub_levels;
-    return x->id - y->id;
-}
-
-static int
-rg_count_id_compare(const void *vx, const void *vy)
-{
-    erts_rg_count_t *x = (erts_rg_count_t *) vx;
-    erts_rg_count_t *y = (erts_rg_count_t *) vy;
-    return x->id - y->id;
-}
-
-static void
-make_reader_groups_map(erts_make_reader_groups_map_test *test)
-{
-    int i, spread_level, avail_sz;
-    erts_avail_cput no, *avail;
-    erts_cpu_topology_t *cpudata;
-    erts_reader_groups_map_t *map;
-    int map_sz;
-    int groups = erts_reader_groups;
-
-    if (test) {
-	test->map = NULL;
-	test->map_size = 0;
-	groups = test->groups;
-    }
-
-    if (!groups)
-	return;
-
-    if (!test) {
-	if (reader_groups_map)
-	    erts_free(ERTS_ALC_T_RDR_GRPS_MAP, reader_groups_map);
-
-	reader_groups_logical_processors = 0;
-	reader_groups_map_size = 0;
-	reader_groups_map = NULL;
-    }
-
-    create_tmp_cpu_topology_copy(&cpudata, &avail_sz);
-
-    if (!cpudata)
-	return;
-
-    cpu_bind_order_sort(cpudata,
-			avail_sz,
-			ERTS_CPU_BIND_NO_SPREAD,
-			1);
-
-    avail = erts_alloc(ERTS_ALC_T_TMP,
-		       sizeof(erts_avail_cput)*avail_sz);
-
-    make_available_cpu_topology(&no, avail, cpudata,
-				&avail_sz, test != NULL);
-
-    destroy_tmp_cpu_topology_copy(cpudata);
-
-    map_sz = avail_sz*2+1;
-
-    if (test) {
-	map = erts_alloc(ERTS_ALC_T_TMP,
-			 (sizeof(erts_reader_groups_map_t)
-			  * map_sz));
-	test->map = map;
-	test->map_size = map_sz;
-	test->logical_processors = avail_sz;
-    }
-    else {
-	map = erts_alloc(ERTS_ALC_T_RDR_GRPS_MAP,
-			 (sizeof(erts_reader_groups_map_t)
-			  * map_sz));
-	reader_groups_map = map;
-	reader_groups_logical_processors = avail_sz;
-	reader_groups_map_size = map_sz;
-
-    }
-
-    for (i = 0; i < map_sz; i++) {
-	map[i].logical = -1;
-	map[i].reader_group = 0;
-    }
-
-    spread_level = ERTS_TOPOLOGY_CORE;
-    for (i = ERTS_TOPOLOGY_NODE; i < ERTS_TOPOLOGY_THREAD; i++) {
-	if (no.level[i] > groups) {
-	    spread_level = i;
-	    break;
-	}
-    }
-
-    if (no.level[spread_level] <= groups) {
-	int a, rg, last = -1;
-	rg = 0;
-	ASSERT(spread_level == ERTS_TOPOLOGY_CORE);
-	for (a = 0; a < avail_sz; a++) {
-	    if (last != avail[a].level[spread_level]) {
-		rg++;
-		last = avail[a].level[spread_level];
-	    }
-	    reader_group_insert(map,
-				map_sz,
-				avail[a].level[ERTS_TOPOLOGY_LOGICAL],
-				rg);
-	}
-    }
-    else { /* groups < no.level[spread_level] */
-	erts_rg_count_t *rg_count;
-	int a, rg, tl, toplevels;
-
-	tl = spread_level-1;
-
-	if (spread_level == ERTS_TOPOLOGY_NODE)
-	    toplevels = 1;
-	else
-	    toplevels = no.level[tl];
-
-	rg_count = erts_alloc(ERTS_ALC_T_TMP,
-			      toplevels*sizeof(erts_rg_count_t));
-
-	if (toplevels == 1) {
-	    rg_count[0].id = 0;
-	    rg_count[0].sub_levels = no.level[spread_level];
-	    rg_count[0].reader_groups = groups;
-	}
-	else {
-	    int rgs_per_tl, rgs;
-	    rgs = groups;
-	    rgs_per_tl = rgs / toplevels;
-
-	    a = 0;
-	    for (i = 0; i < toplevels; i++) {
-		rg_count[i].id = avail[a].level[tl];
-		a = sub_levels(&rg_count[i], tl, a, avail_sz, avail);
-	    }
-
-	    qsort(rg_count,
-		  toplevels,
-		  sizeof(erts_rg_count_t),
-		  rg_count_sub_levels_compare);
-
-	    for (i = 0; i < toplevels; i++) {
-		if (rg_count[i].sub_levels < rgs_per_tl) {
-		    rg_count[i].reader_groups = rg_count[i].sub_levels;
-		    rgs -= rg_count[i].sub_levels;
-		}
-		else {
-		    rg_count[i].reader_groups = rgs_per_tl;
-		    rgs -= rgs_per_tl;
-		}
-	    }
-
-	    while (rgs > 0) {
-		for (i = 0; i < toplevels; i++) {
-		    if (rg_count[i].sub_levels == rg_count[i].reader_groups)
-			break;
-		    else {
-			rg_count[i].reader_groups++;
-			if (--rgs == 0)
-			    break;
-		    }
-		}
-	    }
-
-	    qsort(rg_count,
-		  toplevels,
-		  sizeof(erts_rg_count_t),
-		  rg_count_id_compare);
-	}
-
-	a = i = rg = 0;
-	while (a < avail_sz) {
-	    a = write_reader_groups(&rg, &rg_count[i], tl,
-				    a, avail_sz, avail);
-	    i++;
-	}
-
-	ASSERT(groups == rg);
-
-	for (a = 0; a < avail_sz; a++)
-	    reader_group_insert(map,
-				map_sz,
-				avail[a].level[ERTS_TOPOLOGY_LOGICAL],
-				avail[a].level[ERTS_TOPOLOGY_RG]);
-
-	erts_free(ERTS_ALC_T_TMP, rg_count);
-    }
-
-    erts_free(ERTS_ALC_T_TMP, avail);
-}
-
-/*
- * CPU topology
- */
-
-typedef struct {
-    int *id;
-    int used;
-    int size;
-} ErtsCpuTopIdSeq;
-
-typedef struct {
-    ErtsCpuTopIdSeq logical;
-    ErtsCpuTopIdSeq thread;
-    ErtsCpuTopIdSeq core;
-    ErtsCpuTopIdSeq processor_node;
-    ErtsCpuTopIdSeq processor;
-    ErtsCpuTopIdSeq node;
-} ErtsCpuTopEntry;
-
-static void
-init_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
-    int size = 10;
-    cte->logical.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-				 sizeof(int)*size);
-    cte->logical.size = size;
-    cte->thread.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-				sizeof(int)*size);
-    cte->thread.size = size;
-    cte->core.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-			      sizeof(int)*size);
-    cte->core.size = size;
-    cte->processor_node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-					sizeof(int)*size);
-    cte->processor_node.size = size;
-    cte->processor.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-				   sizeof(int)*size);
-    cte->processor.size = size;
-    cte->node.id = erts_alloc(ERTS_ALC_T_TMP_CPU_IDS,
-			      sizeof(int)*size);
-    cte->node.size = size;
-}
-
-static void
-destroy_cpu_top_entry(ErtsCpuTopEntry *cte)
-{
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->logical.id);
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->thread.id);
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->core.id);
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor_node.id);
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->processor.id);
-    erts_free(ERTS_ALC_T_TMP_CPU_IDS, cte->node.id);
-}
-
-static int
-get_cput_value_or_range(int *v, int *vr, char **str)
-{
-    long l;
-    char *c = *str;
-    errno = 0;
-    if (!isdigit((unsigned char)*c))
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
-    l = strtol(c, &c, 10);
-    if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID;
-    *v = (int) l;
-    if (*c == '-') {
-	c++;
-	if (!isdigit((unsigned char)*c))
-	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-	l = strtol(c, &c, 10);
-	if (errno != 0 || l < 0 || ERTS_MAX_CPU_TOPOLOGY_ID < l)
-	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-	*vr = (int) l;
-    }
-    *str = c;
-    return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_id_seq(ErtsCpuTopIdSeq *idseq, char **str)
-{
-    int ix = 0;
-    int need_size = 0;
-    char *c = *str;
-
-    while (1) {
-	int res;
-	int val;
-	int nids;
-	int val_range = -1;
-	res = get_cput_value_or_range(&val, &val_range, &c);
-	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
-	    return res;
-	if (val_range < 0 || val_range == val)
-	    nids = 1;
-	else {
-	    if (val_range > val)
-		nids = val_range - val + 1;
-	    else
-		nids = val - val_range + 1;
-	}
-	need_size += nids;
-	if (need_size > idseq->size) {
-	    idseq->size = need_size + 10;
-	    idseq->id = erts_realloc(ERTS_ALC_T_TMP_CPU_IDS,
-				      idseq->id,
-				      sizeof(int)*idseq->size);
-	}
-	if (nids == 1)
-	    idseq->id[ix++] = val;
-	else if (val_range > val) {
-	    for (; val <= val_range; val++)
-		idseq->id[ix++] = val;
-	}
-	else {
-	    for (; val >= val_range; val--)
-		idseq->id[ix++] = val;
-	}
-	if (*c != ',')
-	    break;
-	c++;
-    }
-    *str = c;
-    idseq->used = ix;
-    return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-get_cput_entry(ErtsCpuTopEntry *cput, char **str)
-{
-    int h;
-    char *c = *str;
-
-    cput->logical.used = 0;
-    cput->thread.id[0] = 0;
-    cput->thread.used = 1;
-    cput->core.id[0] = 0;
-    cput->core.used = 1;
-    cput->processor_node.id[0] = -1;
-    cput->processor_node.used = 1;
-    cput->processor.id[0] = 0;
-    cput->processor.used = 1;
-    cput->node.id[0] = -1;
-    cput->node.used = 1;
-
-    h = ERTS_TOPOLOGY_MAX_DEPTH;
-    while (*c != ':' && *c != '\0') {
-	int res;
-	ErtsCpuTopIdSeq *idseqp;
-	switch (*c++) {
-	case 'L':
-	    if (h <= ERTS_TOPOLOGY_LOGICAL)
-		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-	    idseqp = &cput->logical;
-	    h = ERTS_TOPOLOGY_LOGICAL;
-	    break;
-	case 't':
-	case 'T':
-	    if (h <= ERTS_TOPOLOGY_THREAD)
-		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-	    idseqp = &cput->thread;
-	    h = ERTS_TOPOLOGY_THREAD;
-	    break;
-	case 'c':
-	case 'C':
-	    if (h <= ERTS_TOPOLOGY_CORE)
-		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-	    idseqp = &cput->core;
-	    h = ERTS_TOPOLOGY_CORE;
-	    break;
-	case 'p':
-	case 'P':
-	    if (h <= ERTS_TOPOLOGY_PROCESSOR)
-		return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-	    idseqp = &cput->processor;
-	    h = ERTS_TOPOLOGY_PROCESSOR;
-	    break;
-	case 'n':
-	case 'N':
-	    if (h <= ERTS_TOPOLOGY_PROCESSOR) {
-	    do_node:
-		if (h <= ERTS_TOPOLOGY_NODE)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-		idseqp = &cput->node;
-		h = ERTS_TOPOLOGY_NODE;
-	    }
-	    else {
-		int p_node = 0;
-		char *p_chk = c;
-		while (*p_chk != '\0' && *p_chk != ':') {
-		    if (*p_chk == 'p' || *p_chk == 'P') {
-			p_node = 1;
-			break;
-		    }
-		    p_chk++;
-		}
-		if (!p_node)
-		    goto do_node;
-		if (h <= ERTS_TOPOLOGY_PROCESSOR_NODE)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY;
-		idseqp = &cput->processor_node;
-		h = ERTS_TOPOLOGY_PROCESSOR_NODE;
-	    }
-	    break;
-	default:
-	    return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE;
-	}
-	res = get_cput_id_seq(idseqp, &c);
-	if (res != ERTS_INIT_CPU_TOPOLOGY_OK)
-		return res;
-    }
-
-    if (cput->logical.used < 1)
-	return ERTS_INIT_CPU_TOPOLOGY_MISSING_LID;
-
-    if (*c == ':') {
-	c++;
-    }
-
-    if (cput->thread.used != 1
-	&& cput->thread.used != cput->logical.used)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-    if (cput->core.used != 1
-	&& cput->core.used != cput->logical.used)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-    if (cput->processor_node.used != 1
-	&& cput->processor_node.used != cput->logical.used)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-    if (cput->processor.used != 1
-	&& cput->processor.used != cput->logical.used)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-    if (cput->node.used != 1
-	&& cput->node.used != cput->logical.used)
-	return ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE;
-
-    *str = c;
-    return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-static int
-verify_topology(erts_cpu_topology_t *cpudata, int size)
-{
-    if (size > 0) {
-	int *logical;
-	int node, processor, no_nodes, i;
-
-	/* Verify logical ids */
-	logical = erts_alloc(ERTS_ALC_T_TMP, sizeof(int)*size);
-
-	for (i = 0; i < size; i++)
-	    logical[i] = cpudata[i].logical;
-
-	qsort(logical, size, sizeof(int), int_cmp);
-	for (i = 0; i < size-1; i++) {
-	    if (logical[i] == logical[i+1]) {
-		erts_free(ERTS_ALC_T_TMP, logical);
-		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS;
-	    }
-	}
-
-	erts_free(ERTS_ALC_T_TMP, logical);
-
-	qsort(cpudata, size, sizeof(erts_cpu_topology_t), processor_order_cmp);
-
-	/* Verify unique entities */
-
-	for (i = 1; i < size; i++) {
-	    if (cpudata[i-1].processor == cpudata[i].processor
-		&& cpudata[i-1].node == cpudata[i].node
-		&& (cpudata[i-1].processor_node
-		    == cpudata[i].processor_node)
-		&& cpudata[i-1].core == cpudata[i].core
-		&& cpudata[i-1].thread == cpudata[i].thread) {
-		return ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES;
-	    }
-	}
-
-	/* Verify numa nodes */
-	node = cpudata[0].node;
-	processor = cpudata[0].processor;
-	no_nodes = cpudata[0].node < 0 && cpudata[0].processor_node < 0;
-	for (i = 1; i < size; i++) {
-	    if (no_nodes) {
-		if (cpudata[i].node >= 0 || cpudata[i].processor_node >= 0)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
-	    }
-	    else {
-		if (cpudata[i].processor == processor && cpudata[i].node != node)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
-		node = cpudata[i].node;
-		processor = cpudata[i].processor;
-		if (node >= 0 && cpudata[i].processor_node >= 0)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
-		if (node < 0 && cpudata[i].processor_node < 0)
-		    return ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES;
-	    }
-	}
-    }
-
-    return ERTS_INIT_CPU_TOPOLOGY_OK;
-}
-
-int
-erts_init_cpu_topology(char *topology_str)
-{
-    ErtsCpuTopEntry cput;
-    int need_size;
-    char *c;
-    int ix;
-    int error = ERTS_INIT_CPU_TOPOLOGY_OK;
-
-    if (user_cpudata)
-	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
-    user_cpudata_size = 10;
-
-    user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
-			      (sizeof(erts_cpu_topology_t)
-			       * user_cpudata_size));
-
-    init_cpu_top_entry(&cput);
-
-    ix = 0;
-    need_size = 0;
-
-    c = topology_str;
-    if (*c == '\0') {
-	error = ERTS_INIT_CPU_TOPOLOGY_MISSING;
-	goto fail;
-    }
-    do {
-	int r;
-	error = get_cput_entry(&cput, &c);
-	if (error != ERTS_INIT_CPU_TOPOLOGY_OK)
-	    goto fail;
-	need_size += cput.logical.used;
-	if (user_cpudata_size < need_size) {
-	    user_cpudata_size = need_size + 10;
-	    user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
-					user_cpudata,
-					(sizeof(erts_cpu_topology_t)
-					 * user_cpudata_size));
-	}
-
-	ASSERT(cput.thread.used == 1
-	       || cput.thread.used == cput.logical.used);
-	ASSERT(cput.core.used == 1
-	       || cput.core.used == cput.logical.used);
-	ASSERT(cput.processor_node.used == 1
-	       || cput.processor_node.used == cput.logical.used);
-	ASSERT(cput.processor.used == 1
-	       || cput.processor.used == cput.logical.used);
-	ASSERT(cput.node.used == 1
-	       || cput.node.used == cput.logical.used);
-
-	for (r = 0; r < cput.logical.used; r++) {
-	    user_cpudata[ix].logical = cput.logical.id[r];
-	    user_cpudata[ix].thread =
-		cput.thread.id[cput.thread.used == 1 ? 0 : r];
-	    user_cpudata[ix].core =
-		cput.core.id[cput.core.used == 1 ? 0 : r];
-	    user_cpudata[ix].processor_node =
-		cput.processor_node.id[cput.processor_node.used == 1 ? 0 : r];
-	    user_cpudata[ix].processor =
-		cput.processor.id[cput.processor.used == 1 ? 0 : r];
-	    user_cpudata[ix].node =
-		cput.node.id[cput.node.used == 1 ? 0 : r];
-	    ix++;
-	}
-    } while (*c != '\0');
-
-    if (user_cpudata_size != ix) {
-	user_cpudata_size = ix;
-	user_cpudata = erts_realloc(ERTS_ALC_T_CPUDATA,
-				    user_cpudata,
-				    (sizeof(erts_cpu_topology_t)
-				     * user_cpudata_size));
-    }
-
-    error = verify_topology(user_cpudata, user_cpudata_size);
-    if (error == ERTS_INIT_CPU_TOPOLOGY_OK) {
-	destroy_cpu_top_entry(&cput);
-	return ERTS_INIT_CPU_TOPOLOGY_OK;
-    }
-
- fail:
-    if (user_cpudata)
-	erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
-    user_cpudata_size = 0;
-    destroy_cpu_top_entry(&cput);
-    return error;
-}
-
-#define ERTS_GET_CPU_TOPOLOGY_ERROR		-1
-#define ERTS_GET_USED_CPU_TOPOLOGY		0
-#define ERTS_GET_DETECTED_CPU_TOPOLOGY		1
-#define ERTS_GET_DEFINED_CPU_TOPOLOGY		2
-
-static Eterm get_cpu_topology_term(Process *c_p, int type);
-
-Eterm
-erts_set_cpu_topology(Process *c_p, Eterm term)
-{
-    erts_cpu_topology_t *cpudata = NULL;
-    int cpudata_size = 0;
-    Eterm res;
-
-    erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-    res = get_cpu_topology_term(c_p, ERTS_GET_USED_CPU_TOPOLOGY);
-    if (term == am_undefined) {
-	if (user_cpudata)
-	    erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
-	user_cpudata = NULL;
-	user_cpudata_size = 0;
-
-	if (cpu_bind_order != ERTS_CPU_BIND_NONE && system_cpudata) {
-	    cpudata_size = system_cpudata_size;
-	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
-				 (sizeof(erts_cpu_topology_t)
-				  * cpudata_size));
-
-	    sys_memcpy((void *) cpudata,
-		       (void *) system_cpudata,
-		       sizeof(erts_cpu_topology_t)*cpudata_size);
-	}
-    }
-    else if (is_not_list(term)) {
-    error:
-	res = THE_NON_VALUE;
-	goto done;
-    }
-    else {
-	Eterm list = term;
-	int ix = 0;
-
-	cpudata_size = 100;
-	cpudata = erts_alloc(ERTS_ALC_T_TMP,
-			     (sizeof(erts_cpu_topology_t)
-			      * cpudata_size));
-
-	while (is_list(list)) {
-	    Eterm *lp = list_val(list);
-	    Eterm cpu = CAR(lp);
-	    Eterm* tp;
-	    Sint id;
-		
-	    if (is_not_tuple(cpu))
-		goto error;
-
-	    tp = tuple_val(cpu);
-
-	    if (arityval(tp[0]) != 7 || tp[1] != am_cpu)
-		goto error;
-
-	    if (ix >= cpudata_size) {
-		cpudata_size += 100;
-		cpudata = erts_realloc(ERTS_ALC_T_TMP,
-				       cpudata,
-				       (sizeof(erts_cpu_topology_t)
-					* cpudata_size));
-	    }
-
-	    id = signed_val(tp[2]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].node = (int) id;
-
-	    id = signed_val(tp[3]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].processor = (int) id;
-
-	    id = signed_val(tp[4]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].processor_node = (int) id;
-
-	    id = signed_val(tp[5]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].core = (int) id;
-
-	    id = signed_val(tp[6]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].thread = (int) id;
-
-	    id = signed_val(tp[7]);
-	    if (id < -1 || ERTS_MAX_CPU_TOPOLOGY_ID < id)
-		goto error;
-	    cpudata[ix].logical = (int) id;
-
-	    list = CDR(lp);
-	    ix++;
-	}
-
-	if (is_not_nil(list))
-	    goto error;
-	
-	cpudata_size = ix;
-
-	if (ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(cpudata, cpudata_size))
-	    goto error;
-
-	if (user_cpudata_size != cpudata_size) {
-	    if (user_cpudata)
-		erts_free(ERTS_ALC_T_CPUDATA, user_cpudata);
-	    user_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
-				      sizeof(erts_cpu_topology_t)*cpudata_size);
-	    user_cpudata_size = cpudata_size;
-	}
-
-	sys_memcpy((void *) user_cpudata,
-		   (void *) cpudata,
-		   sizeof(erts_cpu_topology_t)*cpudata_size);
-    }
-
-    make_reader_groups_map(NULL);
-
-    signal_schedulers_bind_change(cpudata, cpudata_size);
-
- done:
-    erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
-    if (cpudata)
-	erts_free(ERTS_ALC_T_TMP, cpudata);
-
-    return res;
-}
-
-static Eterm
-bound_schedulers_term(ErtsCpuBindOrder order)
-{
-    switch (order) {
-    case ERTS_CPU_BIND_SPREAD: {
-	ERTS_DECL_AM(spread);
-	return AM_spread;
-    }
-    case ERTS_CPU_BIND_PROCESSOR_SPREAD: {
-	ERTS_DECL_AM(processor_spread);
-	return AM_processor_spread;
-    }
-    case ERTS_CPU_BIND_THREAD_SPREAD: {
-	ERTS_DECL_AM(thread_spread);
-	return AM_thread_spread;
-    }
-    case ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD: {
-	ERTS_DECL_AM(thread_no_node_processor_spread);
-	return AM_thread_no_node_processor_spread;
-    }
-    case ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD: {
-	ERTS_DECL_AM(no_node_processor_spread);
-	return AM_no_node_processor_spread;
-    }
-    case ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD: {
-	ERTS_DECL_AM(no_node_thread_spread);
-	return AM_no_node_thread_spread;
-    }
-    case ERTS_CPU_BIND_NO_SPREAD: {
-	ERTS_DECL_AM(no_spread);
-	return AM_no_spread;
-    }
-    case ERTS_CPU_BIND_NONE: {
-	ERTS_DECL_AM(unbound);
-	return AM_unbound;
-    }
-    default:
-	ASSERT(0);
-	return THE_NON_VALUE;
-    }
-}
-
-Eterm
-erts_bound_schedulers_term(Process *c_p)
-{
-    ErtsCpuBindOrder order;
-    erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-    order = cpu_bind_order;
-    erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-    return bound_schedulers_term(order);
-}
-
-static void
-create_tmp_cpu_topology_copy(erts_cpu_topology_t **cpudata, int *cpudata_size)
-{
-    if (user_cpudata) {
-	*cpudata_size = user_cpudata_size;
-	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
-			      (sizeof(erts_cpu_topology_t)
-			       * (*cpudata_size)));
-	sys_memcpy((void *) *cpudata,
-		   (void *) user_cpudata,
-		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
-    }
-    else if (system_cpudata) {
-	*cpudata_size = system_cpudata_size;
-	*cpudata = erts_alloc(ERTS_ALC_T_TMP,
-			      (sizeof(erts_cpu_topology_t)
-			       * (*cpudata_size)));
-	sys_memcpy((void *) *cpudata,
-		   (void *) system_cpudata,
-		   sizeof(erts_cpu_topology_t)*(*cpudata_size));
-    }
-    else {
-	*cpudata = NULL;
-	*cpudata_size = 0;
-    }
-}
-
-static void
-destroy_tmp_cpu_topology_copy(erts_cpu_topology_t *cpudata)
-{
-    if (cpudata)
-	erts_free(ERTS_ALC_T_TMP, cpudata);
-}
-
-Eterm
-erts_bind_schedulers(Process *c_p, Eterm how)
-{
-    Eterm res;
-    erts_cpu_topology_t *cpudata;
-    int cpudata_size;
-    ErtsCpuBindOrder old_cpu_bind_order;
-
-    erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-
-    if (erts_bind_to_cpu(erts_cpuinfo, -1) == -ENOTSUP) {
-	ERTS_BIF_PREP_ERROR(res, c_p, EXC_NOTSUP);
-    }
-    else {
-
-	old_cpu_bind_order = cpu_bind_order;
-
-	if (ERTS_IS_ATOM_STR("default_bind", how))
-	    cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
-	else if (ERTS_IS_ATOM_STR("spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_SPREAD;
-	else if (ERTS_IS_ATOM_STR("processor_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
-	else if (ERTS_IS_ATOM_STR("thread_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
-	else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
-	else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
-	else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
-	else if (ERTS_IS_ATOM_STR("no_spread", how))
-	    cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
-	else if (ERTS_IS_ATOM_STR("unbound", how))
-	    cpu_bind_order = ERTS_CPU_BIND_NONE;
-	else {
-	    cpu_bind_order = old_cpu_bind_order;
-	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
-	    goto done;
-	}
-
-	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-
-	if (!cpudata) {
-	    cpu_bind_order = old_cpu_bind_order;
-	    ERTS_BIF_PREP_ERROR(res, c_p, BADARG);
-	    goto done;
-	}
-
-	signal_schedulers_bind_change(cpudata, cpudata_size);
-
-	destroy_tmp_cpu_topology_copy(cpudata);
-    
-	res = bound_schedulers_term(old_cpu_bind_order);
-    }
-
- done:
-
-    erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-
-    return res;
-}
-
-Eterm
-erts_fake_scheduler_bindings(Process *p, Eterm how)
-{
-    ErtsCpuBindOrder fake_cpu_bind_order;
-    erts_cpu_topology_t *cpudata;
-    int cpudata_size;
-    Eterm res;
-
-    if (ERTS_IS_ATOM_STR("default_bind", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_DEFAULT_BIND;
-    else if (ERTS_IS_ATOM_STR("spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_SPREAD;
-    else if (ERTS_IS_ATOM_STR("processor_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_PROCESSOR_SPREAD;
-    else if (ERTS_IS_ATOM_STR("thread_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_SPREAD;
-    else if (ERTS_IS_ATOM_STR("thread_no_node_processor_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD;
-    else if (ERTS_IS_ATOM_STR("no_node_processor_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD;
-    else if (ERTS_IS_ATOM_STR("no_node_thread_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD;
-    else if (ERTS_IS_ATOM_STR("no_spread", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_NO_SPREAD;
-    else if (ERTS_IS_ATOM_STR("unbound", how))
-	fake_cpu_bind_order = ERTS_CPU_BIND_NONE;
-    else {
-	ERTS_BIF_PREP_ERROR(res, p, BADARG);
-	return res;
-    }
-
-    erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-    create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-    erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-
-    if (!cpudata || fake_cpu_bind_order == ERTS_CPU_BIND_NONE)
-	ERTS_BIF_PREP_RET(res, am_false);
-    else {
-	int i;
-	Eterm *hp;
-	
-	cpu_bind_order_sort(cpudata, cpudata_size, fake_cpu_bind_order, 1);
-
-#ifdef ERTS_FAKE_SCHED_BIND_PRINT_SORTED_CPU_DATA
-
-	erts_fprintf(stderr, "node:          ");
-	for (i = 0; i < cpudata_size; i++)
-	    erts_fprintf(stderr, " %2d", cpudata[i].node);
-	erts_fprintf(stderr, "\n");
-	erts_fprintf(stderr, "processor:     ");
-	for (i = 0; i < cpudata_size; i++)
-	    erts_fprintf(stderr, " %2d", cpudata[i].processor);
-	erts_fprintf(stderr, "\n");
-	if (fake_cpu_bind_order != ERTS_CPU_BIND_THREAD_NO_NODE_PROCESSOR_SPREAD
-	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_PROCESSOR_SPREAD
-	    && fake_cpu_bind_order != ERTS_CPU_BIND_NO_NODE_THREAD_SPREAD) {
-	    erts_fprintf(stderr, "processor_node:");
-	    for (i = 0; i < cpudata_size; i++)
-		erts_fprintf(stderr, " %2d", cpudata[i].processor_node);
-	    erts_fprintf(stderr, "\n");
-	}
-	erts_fprintf(stderr, "core:          ");
-	for (i = 0; i < cpudata_size; i++)
-	    erts_fprintf(stderr, " %2d", cpudata[i].core);
-	erts_fprintf(stderr, "\n");
-	erts_fprintf(stderr, "thread:        ");
-	for (i = 0; i < cpudata_size; i++)
-	    erts_fprintf(stderr, " %2d", cpudata[i].thread);
-	erts_fprintf(stderr, "\n");
-	erts_fprintf(stderr, "logical:       ");
-	for (i = 0; i < cpudata_size; i++)
-	    erts_fprintf(stderr, " %2d", cpudata[i].logical);
-	erts_fprintf(stderr, "\n");
-#endif
-
-	hp = HAlloc(p, cpudata_size+1);
-	ERTS_BIF_PREP_RET(res, make_tuple(hp));
-	*hp++ = make_arityval((Uint) cpudata_size);
-	for (i = 0; i < cpudata_size; i++)
-	    *hp++ = make_small((Uint) cpudata[i].logical);
-    }
-
-    destroy_tmp_cpu_topology_copy(cpudata);
-
-    return res;
-}
-
-Eterm
-erts_get_schedulers_binds(Process *c_p)
-{
-    int ix;
-    ERTS_DECL_AM(unbound);
-    Eterm *hp = HAlloc(c_p, erts_no_schedulers+1);
-    Eterm res = make_tuple(hp);
-
-    *(hp++) = make_arityval(erts_no_schedulers);
-    erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-    for (ix = 1; ix <= erts_no_schedulers; ix++)
-	*(hp++) = (scheduler2cpu_map[ix].bound_id >= 0
-		   ? make_small(scheduler2cpu_map[ix].bound_id)
-		   : AM_unbound);
-    erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-    return res;
-}
-
-static Eterm
-bld_topology_term(Eterm **hpp,
-		  Uint *hszp,
-		  erts_cpu_topology_t *cpudata,
-		  int size)
-{
-    Eterm res = NIL;
-    int i;
-
-    if (size == 0)
-	return am_undefined;
-
-    for (i = size-1; i >= 0; i--) {
-	res = erts_bld_cons(hpp,
-			    hszp,
-			    erts_bld_tuple(hpp,
-					   hszp,
-					   7,
-					   am_cpu,
-					   make_small(cpudata[i].node),
-					   make_small(cpudata[i].processor),
-					   make_small(cpudata[i].processor_node),
-					   make_small(cpudata[i].core),
-					   make_small(cpudata[i].thread),
-					   make_small(cpudata[i].logical)),
-			    res);
-    }
-    return res;
-}
-
-static Eterm
-get_cpu_topology_term(Process *c_p, int type)
-{
-#ifdef DEBUG
-    Eterm *hp_end;
-#endif
-    Eterm *hp;
-    Uint hsz;
-    Eterm res = THE_NON_VALUE;
-    erts_cpu_topology_t *cpudata = NULL;
-    int size = 0;
-
-    switch (type) {
-    case ERTS_GET_USED_CPU_TOPOLOGY:
-	if (user_cpudata)
-	    goto defined;
-	else
-	    goto detected;
-    case ERTS_GET_DETECTED_CPU_TOPOLOGY:
-    detected:
-	if (!system_cpudata)
-	    res = am_undefined;
-	else {
-	    size = system_cpudata_size;
-	    cpudata = erts_alloc(ERTS_ALC_T_TMP,
-				 (sizeof(erts_cpu_topology_t)
-				  * size));
-	    sys_memcpy((void *) cpudata,
-		       (void *) system_cpudata,
-		       sizeof(erts_cpu_topology_t)*size);
-	}
-	break;
-    case ERTS_GET_DEFINED_CPU_TOPOLOGY:
-    defined:
-	if (!user_cpudata)
-	    res = am_undefined;
-	else {
-	    size = user_cpudata_size;
-	    cpudata = user_cpudata;
-	}
-	break;
-    default:
-	erl_exit(ERTS_ABORT_EXIT, "Bad cpu topology type: %d\n", type);
-	break;
-    }
-
-    if (res == am_undefined) {
-	ASSERT(!cpudata);
-	return res;
-    }
-
-    hsz = 0;
-
-    bld_topology_term(NULL, &hsz,
-		      cpudata, size);
-
-    hp = HAlloc(c_p, hsz);
-
-#ifdef DEBUG
-    hp_end = hp + hsz;
-#endif
-
-    res = bld_topology_term(&hp, NULL,
-			    cpudata, size);
-
-    ASSERT(hp_end == hp);
-
-    if (cpudata && cpudata != system_cpudata && cpudata != user_cpudata)
-	erts_free(ERTS_ALC_T_TMP, cpudata);
-
-    return res;
-}
-
-Eterm
-erts_get_cpu_topology_term(Process *c_p, Eterm which)
-{
-    Eterm res;
-    int type;
-    erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-    if (ERTS_IS_ATOM_STR("used", which))
-	type = ERTS_GET_USED_CPU_TOPOLOGY;
-    else if (ERTS_IS_ATOM_STR("detected", which))
-	type = ERTS_GET_DETECTED_CPU_TOPOLOGY;
-    else if (ERTS_IS_ATOM_STR("defined", which))
-	type = ERTS_GET_DEFINED_CPU_TOPOLOGY;
-    else
-	type = ERTS_GET_CPU_TOPOLOGY_ERROR;
-    if (type == ERTS_GET_CPU_TOPOLOGY_ERROR)
-	res = THE_NON_VALUE;
-    else
-	res = get_cpu_topology_term(c_p, type);
-    erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-    return res;
-}
-
-static void
-early_cpu_bind_init(void)
-{
-    user_cpudata = NULL;
-    user_cpudata_size = 0;
-
-    system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
-    system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
-				(sizeof(erts_cpu_topology_t)
-				 * system_cpudata_size));
-
-    cpu_bind_order = ERTS_CPU_BIND_UNDEFINED;
-
-    reader_groups_available_cpu_check = 1;
-    reader_groups_logical_processors = 0;
-    reader_groups_map_size = 0;
-    reader_groups_map = NULL;
-
-    if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
-	|| ERTS_INIT_CPU_TOPOLOGY_OK != verify_topology(system_cpudata,
-							system_cpudata_size)) {
-	erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
-	system_cpudata = NULL;
-	system_cpudata_size = 0;
-    }
-}
-
-static void
-late_cpu_bind_init(void)
-{
-    int ix;
-
-    erts_smp_rwmtx_init(&erts_cpu_bind_rwmtx, "cpu_bind");
-
-    scheduler2cpu_map = erts_alloc(ERTS_ALC_T_CPUDATA,
-				   (sizeof(ErtsCpuBindData)
-				    * (erts_no_schedulers+1)));
-    for (ix = 1; ix <= erts_no_schedulers; ix++) {
-	scheduler2cpu_map[ix].bind_id = -1;
-	scheduler2cpu_map[ix].bound_id = -1;
-    }
-
-    if (cpu_bind_order == ERTS_CPU_BIND_UNDEFINED) {
-	int ncpus = erts_get_cpu_configured(erts_cpuinfo);
-	if (ncpus < 1 || erts_no_schedulers < ncpus)
-	    cpu_bind_order = ERTS_CPU_BIND_NONE;
-	else
-	    cpu_bind_order = ((system_cpudata || user_cpudata)
-			      && (erts_bind_to_cpu(erts_cpuinfo, -1) != -ENOTSUP)
-			      ? ERTS_CPU_BIND_DEFAULT_BIND
-			      : ERTS_CPU_BIND_NONE);
-    }
-
-    make_reader_groups_map(NULL);
-
-    if (cpu_bind_order != ERTS_CPU_BIND_NONE) {
-	erts_cpu_topology_t *cpudata;
-	int cpudata_size;
-	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-	signal_schedulers_bind_change(cpudata, cpudata_size);
-	destroy_tmp_cpu_topology_copy(cpudata);
-    }
-}
-
-int
-erts_update_cpu_info(void)
-{
-    int changed;
-    erts_smp_rwmtx_rwlock(&erts_cpu_bind_rwmtx);
-    changed = erts_cpu_info_update(erts_cpuinfo);
-    if (changed) {
-	erts_cpu_topology_t *cpudata;
-	int cpudata_size;
-
-	if (system_cpudata)
-	    erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
-
-	system_cpudata_size = erts_get_cpu_topology_size(erts_cpuinfo);
-	if (!system_cpudata_size)
-	    system_cpudata = NULL;
-	else {
-	    system_cpudata = erts_alloc(ERTS_ALC_T_CPUDATA,
-					(sizeof(erts_cpu_topology_t)
-					 * system_cpudata_size));
-
-	    if (!erts_get_cpu_topology(erts_cpuinfo, system_cpudata)
-		|| (ERTS_INIT_CPU_TOPOLOGY_OK
-		    != verify_topology(system_cpudata,
-				       system_cpudata_size))) {
-		erts_free(ERTS_ALC_T_CPUDATA, system_cpudata);
-		system_cpudata = NULL;
-		system_cpudata_size = 0;
-	    }
-	}
-
-	create_tmp_cpu_topology_copy(&cpudata, &cpudata_size);
-	signal_schedulers_bind_change(cpudata, cpudata_size);
-	destroy_tmp_cpu_topology_copy(cpudata);
-    }
-    erts_smp_rwmtx_rwunlock(&erts_cpu_bind_rwmtx);
-    return changed;
-}
-
 #ifdef ERTS_SMP
 
 static void
@@ -7069,7 +5091,7 @@ Process *schedule(Process *p, int calls)
 	    }
 	    if ((rq->flags & ERTS_RUNQ_FLG_CHK_CPU_BIND)
 		|| erts_smp_atomic_read(&esdp->chk_cpu_bind)) {
-		check_cpu_bind(esdp);
+		erts_sched_check_cpu_bind(esdp);
 	    }
 	}
 
@@ -7165,7 +5187,9 @@ Process *schedule(Process *p, int calls)
 
 	    erts_smp_atomic_set(&function_calls, 0);
 	    fcalls = 0;
+
 	    ASSERT(!erts_port_task_have_outstanding_io_tasks());
+
 #ifdef ERTS_SMP
 	    /* erts_sys_schedule_interrupt(0); */
 #endif
@@ -7497,6 +5521,15 @@ erts_schedule_misc_op(void (*func)(void *), void *arg)
     ErtsRunQueue *rq = erts_get_runq_current(NULL);
     ErtsMiscOpList *molp = misc_op_list_alloc();
 
+    if (!rq) {
+	/*
+	 * This can only happen when the sys msg dispatcher
+	 * thread schedules misc ops (this happens *very*
+	 * seldom; only when trace drivers are unloaded).
+	 */
+	rq =  ERTS_RUNQ_IX(0);
+    }
+
     erts_smp_runq_lock(rq);
 
     while (rq->misc.evac_runq) {
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 4365e409e5..c038e57b65 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -89,7 +89,6 @@ extern int erts_sched_thread_suggested_stack_size;
 #define ERTS_SCHED_THREAD_MAX_STACK_SIZE 8192	/* Kilo words */
 
 #ifdef ERTS_SMP
-extern Uint erts_max_main_threads;
 #include "erl_bits.h"
 #endif
 
@@ -426,6 +425,13 @@ struct ErtsSchedulerData_ {
 #endif
 };
 
+typedef union {
+    ErtsSchedulerData esd;
+    char align[ERTS_ALC_CACHE_LINE_ALIGN_SIZE(sizeof(ErtsSchedulerData))];
+} ErtsAlignedSchedulerData;
+
+extern ErtsAlignedSchedulerData *erts_aligned_scheduler_data;
+
 #ifndef ERTS_SMP
 extern ErtsSchedulerData *erts_scheduler_data;
 #endif
@@ -1007,27 +1013,12 @@ extern struct erts_system_profile_flags_t erts_system_profile_flags;
 	    (p)->flags &= ~F_TIMO; \
     } while (0)
 
-
-#define ERTS_INIT_SCHED_BIND_TYPE_SUCCESS		0
-#define ERTS_INIT_SCHED_BIND_TYPE_NOT_SUPPORTED		1
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_CPU_TOPOLOGY	2
-#define ERTS_INIT_SCHED_BIND_TYPE_ERROR_NO_BAD_TYPE	3
-
-int erts_init_scheduler_bind_type(char *how);
-
-#define ERTS_INIT_CPU_TOPOLOGY_OK			0
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID		1
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_RANGE		2
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_HIERARCHY	3
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_ID_TYPE		4
-#define ERTS_INIT_CPU_TOPOLOGY_INVALID_NODES		5
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING_LID		6
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_LIDS		7
-#define ERTS_INIT_CPU_TOPOLOGY_NOT_UNIQUE_ENTITIES	8
-#define ERTS_INIT_CPU_TOPOLOGY_MISSING			9
-
-int erts_init_cpu_topology(char *topology_str);
-int erts_update_cpu_info(void);
+#define ERTS_RUNQ_IX(IX)						\
+  (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_run_queues),			\
+   &erts_aligned_run_queues[(IX)].runq)
+#define ERTS_SCHEDULER_IX(IX)						\
+  (ASSERT_EXPR(0 <= (IX) && (IX) < erts_no_schedulers),			\
+   &erts_aligned_scheduler_data[(IX)].esd)
 
 void erts_pre_init_process(void);
 void erts_late_init_process(void);
@@ -1058,8 +1049,9 @@ Eterm erts_multi_scheduling_blockers(Process *);
 void erts_start_schedulers(void);
 void erts_smp_notify_check_children_needed(void);
 #endif
+void erts_sched_notify_check_cpu_bind(void);
 Uint erts_active_schedulers(void);
-void erts_init_process(void);
+void erts_init_process(int);
 Eterm erts_process_status(Process *, ErtsProcLocks, Process *, Eterm);
 Uint erts_run_queues_len(Uint *);
 void erts_add_to_runq(Process *);
diff --git a/erts/emulator/beam/erl_process_lock.c b/erts/emulator/beam/erl_process_lock.c
index a4d12139e9..1bebcdb911 100644
--- a/erts/emulator/beam/erl_process_lock.c
+++ b/erts/emulator/beam/erl_process_lock.c
@@ -117,10 +117,9 @@ static int aux_thr_proc_lock_spin_count;
 static void cleanup_tse(void);
 
 void
-erts_init_proc_lock(void)
+erts_init_proc_lock(int cpus)
 {
     int i;
-    int cpus;
     erts_smp_spinlock_init(&qs_lock, "proc_lck_qs_alloc");
     for (i = 0; i < ERTS_NO_OF_PIX_LOCKS; i++) {
 #ifdef ERTS_ENABLE_LOCK_COUNT
@@ -138,7 +137,6 @@ erts_init_proc_lock(void)
     lc_id.proc_lock_msgq	= erts_lc_get_lock_order_id("proc_msgq");
     lc_id.proc_lock_status	= erts_lc_get_lock_order_id("proc_status");
 #endif
-    cpus = erts_get_cpu_configured(erts_cpuinfo);
     if (cpus > 1) {
 	proc_lock_spin_count = ERTS_PROC_LOCK_SPIN_COUNT_BASE;
 	proc_lock_spin_count += (ERTS_PROC_LOCK_SPIN_COUNT_SCHED_INC
diff --git a/erts/emulator/beam/erl_process_lock.h b/erts/emulator/beam/erl_process_lock.h
index 7cfc9893fa..4fe30c7209 100644
--- a/erts/emulator/beam/erl_process_lock.h
+++ b/erts/emulator/beam/erl_process_lock.h
@@ -1,7 +1,7 @@
 /*
  * %CopyrightBegin%
  * 
- * Copyright Ericsson AB 2007-2009. All Rights Reserved.
+ * Copyright Ericsson AB 2007-2010. All Rights Reserved.
  * 
  * The contents of this file are subject to the Erlang Public License,
  * Version 1.1, (the "License"); you may not use this file except in
@@ -334,7 +334,7 @@ erts_proc_lock_flags_cmpxchg(erts_proc_lock_t *lck, ErtsProcLocks new,
 
 extern erts_pix_lock_t erts_pix_locks[ERTS_NO_OF_PIX_LOCKS];
 
-void erts_init_proc_lock(void);
+void erts_init_proc_lock(int cpus);
 void erts_proc_lock_prepare_proc_lock_waiter(void);
 void erts_proc_lock_failed(Process *,
 			   erts_pix_lock_t *,
diff --git a/erts/emulator/beam/erl_threads.h b/erts/emulator/beam/erl_threads.h
index 0b7269262e..a74cf79b8c 100644
--- a/erts/emulator/beam/erl_threads.h
+++ b/erts/emulator/beam/erl_threads.h
@@ -27,9 +27,6 @@
 
 #define ERTS_SPIN_BODY ETHR_SPIN_BODY
 
-#define ERTS_MAX_READER_GROUPS 8
-extern int erts_reader_groups;
-
 #include "sys.h"
 #ifdef USE_THREADS
 
diff --git a/erts/emulator/beam/global.h b/erts/emulator/beam/global.h
index ecd3c8f68a..12536f6cde 100644
--- a/erts/emulator/beam/global.h
+++ b/erts/emulator/beam/global.h
@@ -1728,11 +1728,6 @@ Uint erts_current_reductions(Process* current, Process *p);
 
 int erts_print_system_version(int to, void *arg, Process *c_p);
 
-/*
- * Interface to erl_init
- */
-void erl_init(void);
-
 #define seq_trace_output(token, msg, type, receiver, process) \
 seq_trace_output_generic((token), (msg), (type), (receiver), (process), NIL)
 #define seq_trace_output_exit(token, msg, type, receiver, exitfrom) \
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index 79022d5dd7..9ed92bbe03 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -2802,17 +2802,25 @@ driver_deliver_term(ErlDrvPort port,
 	    break;
 	case ERL_DRV_INT:  /* signed int argument */
 	    ERTS_DDT_CHK_ENOUGH_ARGS(1);
+#if HALFWORD_HEAP
+	    erts_bld_sint64(NULL, &need, (Sint64)ptr[0]);
+#else
 	    /* check for bignum */
 	    if (!IS_SSMALL((Sint)ptr[0]))
 		need += BIG_UINT_HEAP_SIZE;  /* use small_to_big */
+#endif
 	    ptr++;
 	    depth++;
 	    break;
 	case ERL_DRV_UINT:  /* unsigned int argument */
 	    ERTS_DDT_CHK_ENOUGH_ARGS(1);
+#if HALFWORD_HEAP
+	    erts_bld_uint64(NULL, &need, (Uint64)ptr[0]);
+#else
 	    /* check for bignum */
 	    if (!IS_USMALL(0, (Uint)ptr[0]))
 		need += BIG_UINT_HEAP_SIZE;  /* use small_to_big */
+#endif
 	    ptr++;
 	    depth++;
 	    break;
@@ -2979,22 +2987,30 @@ driver_deliver_term(ErlDrvPort port,
 	    break;
 
 	case ERL_DRV_INT:  /* signed int argument */
+#if HALFWORD_HEAP
+	    mess = erts_bld_sint64(&hp, NULL, (Sint64)ptr[0]);
+#else
 	    if (IS_SSMALL((Sint)ptr[0]))
 		mess = make_small((Sint)ptr[0]);
 	    else {
 		mess = small_to_big((Sint)ptr[0], hp);
 		hp += BIG_UINT_HEAP_SIZE;
 	    }
+#endif
 	    ptr++;
 	    break;
 
 	case ERL_DRV_UINT:  /* unsigned int argument */
+#if HALFWORD_HEAP
+	    mess = erts_bld_uint64(&hp, NULL, (Uint64)ptr[0]);
+#else
 	    if (IS_USMALL(0, (Uint)ptr[0]))
 		mess = make_small((Uint)ptr[0]);
 	    else {
 		mess = uint_to_big((Uint)ptr[0], hp);
 		hp += BIG_UINT_HEAP_SIZE;
 	    }
+#endif
 	    ptr++;
 	    break;
 
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 0031568af6..93203a08a9 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -466,8 +466,6 @@ static const int zero_value = 0, one_value = 1;
 #  endif /* !__WIN32__ */
 #endif /* WANT_NONBLOCKING */
 
-extern erts_cpu_info_t *erts_cpuinfo; /* erl_init.c */
-
 __decl_noreturn void __noreturn erl_exit(int n, char*, ...);
 
 /* Some special erl_exit() codes: */
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index f07e4793d2..18f7cdd15a 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -637,15 +637,12 @@ static int my_strncasecmp(const char *s1, const char *s2, size_t n)
 #define IS_BUSY(d) \
   (((d)->state & INET_F_BUSY) == INET_F_BUSY)
 
+#define INET_MAX_OPT_BUFFER (64*1024)
+
 #define INET_DEF_BUFFER     1460        /* default buffer size */
 #define INET_MIN_BUFFER     1           /* internal min buffer */
-#define INET_MAX_BUFFER     (1024*64)   /* internal max buffer */
 
-/* Note: INET_HIGH_WATERMARK MUST be less than 2*INET_MAX_BUFFER */
 #define INET_HIGH_WATERMARK (1024*8) /* 8k pending high => busy  */
-/* Note: INET_LOW_WATERMARK MUST be less than INET_MAX_BUFFER and
-** less than INET_HIGH_WATERMARK
-*/
 #define INET_LOW_WATERMARK  (1024*4) /* 4k pending => allow more */
 
 #define INET_INFINITY  0xffffffff  /* infinity value */
@@ -1256,139 +1253,136 @@ static int load_ip_and_port
 	LOAD_ATOM((spec), (i), (flag) ? am_true : am_false);
 #endif /* HAVE_SCTP */
 
+/* Assume a cache line size of 64 bytes */
+#define INET_DRV_CACHE_LINE_SIZE ((ErlDrvUInt) 64)
+#define INET_DRV_CACHE_LINE_MASK (INET_DRV_CACHE_LINE_SIZE - 1)
+
 /*
 ** Binary Buffer Managment
 ** We keep a stack of usable buffers 
 */
-#define BUFFER_STACK_SIZE 16
-
-static erts_smp_spinlock_t inet_buffer_stack_lock;
-static ErlDrvBinary* buffer_stack[BUFFER_STACK_SIZE];
-static int buffer_stack_pos = 0;
+#define BUFFER_STACK_SIZE 14
+#define BUFFER_STACK_MAX_MEM_SIZE (1024*1024)
 
+ErlDrvTSDKey buffer_stack_key;
 
-/*
- * XXX
- * The erts_smp_spin_* functions should not be used by drivers (but this
- * driver is special). Replace when driver locking api has been implemented.
- * /rickard
- */
-#define BUFSTK_LOCK	erts_smp_spin_lock(&inet_buffer_stack_lock);
-#define BUFSTK_UNLOCK	erts_smp_spin_unlock(&inet_buffer_stack_lock);
-
-#ifdef DEBUG
-static int tot_buf_allocated = 0;  /* memory in use for i_buf */
-static int tot_buf_stacked = 0;   /* memory on stack */
-static int max_buf_allocated = 0; /* max allocated */
-
-#define COUNT_BUF_ALLOC(sz) do { \
-  BUFSTK_LOCK; \
-  tot_buf_allocated += (sz); \
-  if (tot_buf_allocated > max_buf_allocated) \
-    max_buf_allocated = tot_buf_allocated; \
-  BUFSTK_UNLOCK; \
-} while(0)
-
-#define COUNT_BUF_FREE(sz) do { \
- BUFSTK_LOCK; \
- tot_buf_allocated -= (sz); \
- BUFSTK_UNLOCK; \
- } while(0)
-
-#define COUNT_BUF_STACK(sz) do { \
- BUFSTK_LOCK; \
- tot_buf_stacked += (sz); \
- BUFSTK_UNLOCK; \
- } while(0)
+typedef struct {
+    int mem_size;
+    int pos;
+    ErlDrvBinary* stk[BUFFER_STACK_SIZE];
+} InetDrvBufStkBase;
 
-#else
+typedef struct {
+    InetDrvBufStkBase buf;
+    char align[(((sizeof(InetDrvBufStkBase) - 1) / INET_DRV_CACHE_LINE_SIZE) + 1)
+	       * INET_DRV_CACHE_LINE_SIZE];
+} InetDrvBufStk;
+
+static InetDrvBufStk *get_bufstk(void)
+{
+    InetDrvBufStk *bs = erl_drv_tsd_get(buffer_stack_key);
+    if (bs)
+	return bs;
+    bs = driver_alloc(sizeof(InetDrvBufStk)
+		      + INET_DRV_CACHE_LINE_SIZE - 1);
+    if (!bs)
+	return NULL;
+    if ((((ErlDrvUInt) bs) & INET_DRV_CACHE_LINE_MASK) != 0)
+	bs = ((InetDrvBufStk *)
+	      ((((ErlDrvUInt) bs) & ~INET_DRV_CACHE_LINE_MASK)
+	       + INET_DRV_CACHE_LINE_SIZE));
+    erl_drv_tsd_set(buffer_stack_key, bs);
+    bs->buf.pos = 0;
+    bs->buf.mem_size = 0;
 
-#define COUNT_BUF_ALLOC(sz)
-#define COUNT_BUF_FREE(sz)
-#define COUNT_BUF_STACK(sz)
+    ASSERT(bs == erl_drv_tsd_get(buffer_stack_key));
 
-#endif
+    return bs;
+}
 
 static ErlDrvBinary* alloc_buffer(long minsz)
 {
-    ErlDrvBinary* buf = NULL;
+    InetDrvBufStk *bs = get_bufstk();
+
+    DEBUGF(("alloc_buffer: %ld\r\n", minsz));
 
-    BUFSTK_LOCK;
+    if (bs && bs->buf.pos > 0) {
+	long size;
+	ErlDrvBinary* buf = bs->buf.stk[--bs->buf.pos];
+	size = buf->orig_size;
+	bs->buf.mem_size -= size;
+	ASSERT(0 <= bs->buf.mem_size
+	       && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
+	if (size >= minsz)
+	    return buf;
 
-    DEBUGF(("alloc_buffer: sz = %ld, tot = %d, max = %d\r\n", 
-	    minsz, tot_buf_allocated, max_buf_allocated));
+	driver_free_binary(buf);
+    }
 
-    if (buffer_stack_pos > 0) {
-	int origsz;
+    ASSERT(!bs || bs->buf.pos != 0 || bs->buf.mem_size == 0);
 
-	buf = buffer_stack[--buffer_stack_pos];
-	origsz = buf->orig_size;
-	BUFSTK_UNLOCK;
-	COUNT_BUF_STACK(-origsz);
-	if (origsz < minsz) {
-	    if ((buf = driver_realloc_binary(buf, minsz)) == NULL)
-		return NULL;
-	    COUNT_BUF_ALLOC(buf->orig_size - origsz);
+    return driver_alloc_binary(minsz);
+}
+
+/*#define CHECK_DOUBLE_RELEASE 1*/
+#ifdef CHECK_DOUBLE_RELEASE
+static void
+check_double_release(InetDrvBufStk *bs, ErlDrvBinary* buf)
+{
+#ifdef __GNUC__
+#warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator
+#endif
+    int i;
+    for (i = 0; i < bs->buf.pos; ++i) {
+	if (bs->buf.stk[i] == buf) {
+	    erl_exit(ERTS_ABORT_EXIT,
+		     "Multiple buffer release in inet_drv, this "
+		     "is a bug, save the core and send it to "
+		     "[email protected]!");
 	}
     }
-    else {
-	BUFSTK_UNLOCK;
-	if ((buf = driver_alloc_binary(minsz)) == NULL)
-	    return NULL;
-	COUNT_BUF_ALLOC(buf->orig_size);
-    }
-    return buf;
 }
+#endif
 
-/*
-** Max buffer memory "cached" BUFFER_STACK_SIZE * INET_MAX_BUFFER
-** (16 * 64k ~ 1M)
-*/
-/*#define CHECK_DOUBLE_RELEASE 1*/
 static void release_buffer(ErlDrvBinary* buf)
 {
+    InetDrvBufStk *bs;
+    long size;
+
     DEBUGF(("release_buffer: %ld\r\n", (buf==NULL) ? 0 : buf->orig_size));
-    if (buf == NULL)
+
+    if (!buf)
 	return;
-    BUFSTK_LOCK;
-    if ((buf->orig_size > INET_MAX_BUFFER) || 
-	(buffer_stack_pos >= BUFFER_STACK_SIZE)) {
-	BUFSTK_UNLOCK;
-	COUNT_BUF_FREE(buf->orig_size);
+
+    size = buf->orig_size;
+
+    if (size > BUFFER_STACK_MAX_MEM_SIZE)
+	goto free_binary;
+
+    bs = get_bufstk();
+    if (!bs
+	|| (bs->buf.mem_size + size > BUFFER_STACK_MAX_MEM_SIZE)
+	|| (bs->buf.pos >= BUFFER_STACK_SIZE)) {
+    free_binary:
 	driver_free_binary(buf);
     }
     else {
 #ifdef CHECK_DOUBLE_RELEASE
-#ifdef __GNUC__
-#warning CHECK_DOUBLE_RELEASE is enabled, this is a custom build emulator
-#endif
-	int i;
-	for (i = 0; i < buffer_stack_pos; ++i) {
-	    if (buffer_stack[i] == buf) {
-		erl_exit(1,"Multiple buffer release in inet_drv, this is a "
-			 "bug, save the core and send it to "
-			 "[email protected]!");
-	    }
-	}
+	check_double_release(bs, buf);
 #endif
-	buffer_stack[buffer_stack_pos++] = buf;
-	BUFSTK_UNLOCK;
-	COUNT_BUF_STACK(buf->orig_size);
+	ASSERT(bs->buf.pos != 0 || bs->buf.mem_size == 0);
+
+	bs->buf.mem_size += size;
+	bs->buf.stk[bs->buf.pos++] = buf;
+
+	ASSERT(0 <= bs->buf.mem_size
+	       && bs->buf.mem_size <= BUFFER_STACK_MAX_MEM_SIZE);
     }
 }
 
 static ErlDrvBinary* realloc_buffer(ErlDrvBinary* buf, long newsz)
 {
-    ErlDrvBinary* bin;
-#ifdef DEBUG
-    long orig_size =  buf->orig_size;
-#endif
-
-    if ((bin = driver_realloc_binary(buf,newsz)) != NULL) {
-	COUNT_BUF_ALLOC(newsz - orig_size);
-	;
-    }
-    return bin;
+    return driver_realloc_binary(buf, newsz);
 }
 
 /* use a TRICK, access the refc field to see if any one else has
@@ -1402,10 +1396,8 @@ static void free_buffer(ErlDrvBinary* buf)
     if (buf != NULL) {
 	if (driver_binary_get_refc(buf) == 1)
 	    release_buffer(buf);
-	else {
-	    COUNT_BUF_FREE(buf->orig_size);
+	else
 	    driver_free_binary(buf);
-	}
     }
 }
 
@@ -3409,20 +3401,14 @@ static int inet_init()
     if (!sock_init())
 	goto error;
 
-    buffer_stack_pos = 0;
-
-    erts_smp_spinlock_init(&inet_buffer_stack_lock, "inet_buffer_stack_lock");
+    if (0 != erl_drv_tsd_key_create("inet_buffer_stack_key", &buffer_stack_key))
+	goto error;
 
     ASSERT(sizeof(struct in_addr) == 4);
 #   if defined(HAVE_IN6) && defined(AF_INET6)
     ASSERT(sizeof(struct in6_addr) == 16);
 #   endif
 
-#ifdef DEBUG
-    tot_buf_allocated = 0;
-    max_buf_allocated = 0;
-    tot_buf_stacked = 0;
-#endif
     INIT_ATOM(ok);
     INIT_ATOM(tcp);
     INIT_ATOM(udp);
@@ -5165,8 +5151,7 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
 	case INET_LOPT_BUFFER:
 	    DEBUGF(("inet_set_opts(%ld): s=%d, BUFFER=%d\r\n",
 		    (long)desc->port, desc->s, ival));
-	    if (ival > INET_MAX_BUFFER)  ival = INET_MAX_BUFFER;
-	    else if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
+	    if (ival < INET_MIN_BUFFER) ival = INET_MIN_BUFFER;
 	    desc->bufsz = ival;
 	    continue;
 
@@ -5231,7 +5216,6 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
 	    if (desc->stype == SOCK_STREAM) {
 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
 		if (ival < 0) ival = 0;
-		else if (ival > INET_MAX_BUFFER*2) ival = INET_MAX_BUFFER*2;
 		if (tdesc->low > ival)
 		    tdesc->low = ival;
 		tdesc->high = ival;
@@ -5242,7 +5226,6 @@ static int inet_set_opts(inet_descriptor* desc, char* ptr, int len)
 	    if (desc->stype == SOCK_STREAM) {
 		tcp_descriptor* tdesc = (tcp_descriptor*) desc;
 		if (ival < 0) ival = 0;
-		else if (ival > INET_MAX_BUFFER) ival = INET_MAX_BUFFER;
 		if (tdesc->high < ival)
 		    tdesc->high = ival;
 		tdesc->low = ival;
@@ -5588,9 +5571,6 @@ static int sctp_set_opts(inet_descriptor* desc, char* ptr, int len)
 	case INET_LOPT_BUFFER:
 	    desc->bufsz  = get_int32(curr);		curr += 4;
 
-            if (desc->bufsz > INET_MAX_BUFFER)
-		desc->bufsz = INET_MAX_BUFFER;
-            else
 	    if (desc->bufsz < INET_MIN_BUFFER)
 		desc->bufsz = INET_MIN_BUFFER;
 	    res = 0;	  /* This does not affect the kernel buffer size */
@@ -6028,7 +6008,7 @@ static int inet_fill_opts(inet_descriptor* desc,
 #define PLACE_FOR(Size,Ptr)						   \
     do {								   \
 	int need = dest_used + (Size);					   \
-	if (need > INET_MAX_BUFFER) {					   \
+	if (need > INET_MAX_OPT_BUFFER) {				   \
 	    RETURN_ERROR();						   \
 	}								   \
 	if (need > dest_allocated) {					   \
@@ -6252,7 +6232,7 @@ static int inet_fill_opts(inet_descriptor* desc,
 		buf += 4;
 		data_provided = (int) *buf++;
 		arg_sz = get_int32(buf);
-		if (arg_sz > INET_MAX_BUFFER) {	
+		if (arg_sz > INET_MAX_OPT_BUFFER) {
 		    RETURN_ERROR();
 		}
 		buf += 4;
@@ -6366,7 +6346,7 @@ static int sctp_fill_opts(inet_descriptor* desc, char* buf, int buflen,
 		     "miscalculated buffer size");              \
 	}                                                       \
 	need = (Index) + (N);                                   \
-	if (need > INET_MAX_BUFFER/sizeof(ErlDrvTermData)) {    \
+	if (need > INET_MAX_OPT_BUFFER/sizeof(ErlDrvTermData)) {\
 	    RETURN_ERROR((Spec), -ENOMEM);                      \
 	}                                                       \
 	if (need > spec_allocated) {                            \
@@ -7219,7 +7199,7 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len,
 	      }
 	  }
 	  DEBUGF(("inet_ctl(%ld): GETSTAT\r\n", (long) desc->port)); 
-	  if (dstlen > INET_MAX_BUFFER) /* sanity check */
+	  if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
 	      return 0;
 	  if (dstlen > rsize) {
 	      if ((dst = (char*) ALLOC(dstlen)) == NULL)
@@ -7235,7 +7215,7 @@ static int inet_ctl(inet_descriptor* desc, int cmd, char* buf, int len,
 	  char* dst;
 	  int dstlen = 1 /* Reply code */ + len*5;
 	  DEBUGF(("inet_ctl(%ld): INET_REQ_SUBSCRIBE\r\n", (long) desc->port)); 
-	  if (dstlen > INET_MAX_BUFFER) /* sanity check */
+	  if (dstlen > INET_MAX_OPT_BUFFER) /* sanity check */
 	      return 0;
 	  if (dstlen > rsize) {
 	      if ((dst = (char*) ALLOC(dstlen)) == NULL)
diff --git a/erts/emulator/sys/unix/sys.c b/erts/emulator/sys/unix/sys.c
index af4ab693dc..01ba773688 100644
--- a/erts/emulator/sys/unix/sys.c
+++ b/erts/emulator/sys/unix/sys.c
@@ -75,6 +75,7 @@ static erts_smp_rwmtx_t environ_rwmtx;
 
 #include "erl_sys_driver.h"
 #include "erl_check_io.h"
+#include "erl_cpu_topology.h"
 
 #ifndef DISABLE_VFORK
 #define DISABLE_VFORK 0
@@ -399,7 +400,7 @@ typedef struct {
 #ifdef ERTS_THR_HAVE_SIG_FUNCS
     sigset_t saved_sigmask;
 #endif
-    int unbind_child;
+    int sched_bind_data;
 } erts_thr_create_data_t;
 
 /*
@@ -410,15 +411,13 @@ static void *
 thr_create_prepare(void)
 {
     erts_thr_create_data_t *tcdp;
-    ErtsSchedulerData *esdp;
 
     tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
 
 #ifdef ERTS_THR_HAVE_SIG_FUNCS
     erts_thr_sigmask(SIG_BLOCK, &thr_create_sigmask, &tcdp->saved_sigmask);
 #endif
-    esdp = erts_get_scheduler_data();
-    tcdp->unbind_child = esdp && erts_is_scheduler_bound(esdp);
+    tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
 
     return (void *) tcdp;
 }
@@ -430,6 +429,8 @@ thr_create_cleanup(void *vtcdp)
 {
     erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
 
+    erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
 #ifdef ERTS_THR_HAVE_SIG_FUNCS
     /* Restore signalmask... */
     erts_thr_sigmask(SIG_SETMASK, &tcdp->saved_sigmask, NULL);
@@ -456,12 +457,7 @@ thr_create_prepare_child(void *vtcdp)
     erts_thread_disable_fpe();
 #endif
 
-    if (tcdp->unbind_child) {
-	erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
-	erts_unbind_from_cpu(erts_cpuinfo);
-	erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
-    }
-    
+    erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
 }
 
 #endif /* #ifdef USE_THREADS */
@@ -1461,9 +1457,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
 
     CHLD_STAT_LOCK;
 
-    unbind = erts_is_scheduler_bound(NULL);
-    if (unbind)
-	erts_smp_rwmtx_rlock(&erts_cpu_bind_rwmtx);
+    unbind = erts_sched_bind_atfork_prepare();
 
 #if !DISABLE_VFORK
     /* See fork/vfork discussion before this function. */
@@ -1476,7 +1470,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
 	if (pid == 0) {
 	    /* The child! Setup child... */
 
-	    if (unbind && erts_unbind_from_cpu(erts_cpuinfo) != 0)
+	    if (erts_sched_bind_atfork_child(unbind) != 0)
 		goto child_error;
 
 	    /* OBSERVE!
@@ -1577,8 +1571,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
 
 	cs_argv[CS_ARGV_PROGNAME_IX] = child_setup_prog;
 	cs_argv[CS_ARGV_WD_IX] = opts->wd ? opts->wd : ".";
-	cs_argv[CS_ARGV_UNBIND_IX]
-	    = (unbind ? erts_get_unbind_from_cpu_str(erts_cpuinfo) : "false");
+	cs_argv[CS_ARGV_UNBIND_IX] = erts_sched_bind_atvfork_child(unbind);
 	cs_argv[CS_ARGV_FD_CR_IX] = fd_close_range;
 	for (i = 0; i < CS_ARGV_NO_OF_DUP2_OPS; i++)
 	    cs_argv[CS_ARGV_DUP2_OP_IX(i)] = &dup2_op[i][0];
@@ -1627,8 +1620,7 @@ static ErlDrvData spawn_start(ErlDrvPort port_num, char* name, SysDriverOpts* op
     }
 #endif
 
-    if (unbind)
-	erts_smp_rwmtx_runlock(&erts_cpu_bind_rwmtx);
+    erts_sched_bind_atfork_parent(unbind);
 
     if (pid == -1) {
         saved_errno = errno;
diff --git a/erts/emulator/sys/win32/sys.c b/erts/emulator/sys/win32/sys.c
index 15d4cd7361..d24347b3aa 100644
--- a/erts/emulator/sys/win32/sys.c
+++ b/erts/emulator/sys/win32/sys.c
@@ -31,7 +31,7 @@
 #include "global.h"
 #include "erl_threads.h"
 #include "../../drivers/win32/win_con.h"
-
+#include "erl_cpu_topology.h"
 
 
 void erts_sys_init_float(void);
@@ -2973,13 +2973,50 @@ check_supported_os_version(void)
 }
 
 #ifdef USE_THREADS
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
+typedef struct {
+    int sched_bind_data;
+} erts_thr_create_data_t;
+
+/*
+ * thr_create_prepare() is called in parent thread before thread creation.
+ * Returned value is passed as argument to thr_create_cleanup().
+ */
+static void *
+thr_create_prepare(void)
+{
+    erts_thr_create_data_t *tcdp;
+
+    tcdp = erts_alloc(ERTS_ALC_T_TMP, sizeof(erts_thr_create_data_t));
+    tcdp->sched_bind_data = erts_sched_bind_atthrcreate_prepare();
+
+    return (void *) tcdp;
+}
+
+
+/* thr_create_cleanup() is called in parent thread after thread creation. */
+static void
+thr_create_cleanup(void *vtcdp)
+{
+    erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+    erts_sched_bind_atthrcreate_parent(tcdp->sched_bind_data);
+
+    erts_free(ERTS_ALC_T_TMP, tcdp);
+}
+
 static void
 thr_create_prepare_child(void *vtcdp)
 {
+    erts_thr_create_data_t *tcdp = (erts_thr_create_data_t *) vtcdp;
+
+#ifdef ERTS_ENABLE_LOCK_COUNT
     erts_lcnt_thread_setup();
-}
 #endif /* ERTS_ENABLE_LOCK_COUNT */
+
+    erts_sched_bind_atthrcreate_child(tcdp->sched_bind_data);
+}
+
 #endif /* USE_THREADS */
 
 void
@@ -2991,9 +3028,13 @@ erts_sys_pre_init(void)
 #ifdef USE_THREADS
     {
 	erts_thr_init_data_t eid = ERTS_THR_INIT_DATA_DEF_INITER;
-#ifdef ERTS_ENABLE_LOCK_COUNT
+
 	eid.thread_create_child_func = thr_create_prepare_child;
-#endif
+	/* Before creation in parent */
+	eid.thread_create_prepare_func = thr_create_prepare;
+	/* After creation in parent */
+	eid.thread_create_parent_func = thr_create_cleanup,
+
 	erts_thr_init(&eid);
 #ifdef ERTS_ENABLE_LOCK_COUNT
 	erts_lcnt_init();
diff --git a/erts/emulator/test/send_term_SUITE.erl b/erts/emulator/test/send_term_SUITE.erl
index 5fd01a9ac5..819aa34886 100644
--- a/erts/emulator/test/send_term_SUITE.erl
+++ b/erts/emulator/test/send_term_SUITE.erl
@@ -1,7 +1,7 @@
 %%
 %% %CopyrightBegin%
 %% 
-%% Copyright Ericsson AB 2005-2009. All Rights Reserved.
+%% Copyright Ericsson AB 2005-2010. All Rights Reserved.
 %% 
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
@@ -61,7 +61,7 @@ basic(Config) when is_list(Config) ->
     ?line ExpectExt2Term = term(P, 5),
 
     %% ERL_DRV_INT, ERL_DRV_UINT
-    ?line case erlang:system_info(wordsize) of
+    ?line case erlang:system_info({wordsize, external}) of
 	      4 ->
 		  ?line {-1, 4294967295} = term(P, 6);
 	      8 ->
diff --git a/erts/emulator/test/system_info_SUITE.erl b/erts/emulator/test/system_info_SUITE.erl
index ba433d4e11..cd940f3ddf 100644
--- a/erts/emulator/test/system_info_SUITE.erl
+++ b/erts/emulator/test/system_info_SUITE.erl
@@ -132,6 +132,7 @@ misc_smoke_tests(Config) when is_list(Config) ->
     ?line true = is_binary(erlang:system_info(procs)),
     ?line true = is_binary(erlang:system_info(loaded)),
     ?line true = is_binary(erlang:system_info(dist)),
+    ?line ok = try erlang:system_info({cpu_topology,erts_get_cpu_topology_error_case}), fail catch error:badarg -> ok end,
     ?line ok.
     
 
diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h
index 4a205699bd..53fa1acdc2 100644
--- a/erts/include/internal/ethread.h
+++ b/erts/include/internal/ethread.h
@@ -239,6 +239,8 @@ typedef DWORD ethr_tsd_key;
 #      include "gcc/ethread.h"
 #      include "libatomic_ops/ethread.h"
 #    endif
+#  elif defined(ETHR_HAVE_LIBATOMIC_OPS)
+#    include "libatomic_ops/ethread.h"
 #  elif defined(ETHR_WIN32_THREADS)
 #    include "win/ethread.h"
 #  endif
@@ -757,7 +759,7 @@ ETHR_INLINE_FUNC_NAME_(ethr_atomic_set_relb)(ethr_atomic_t *var, long val)
 #ifdef ETHR_HAVE_NATIVE_ATOMICS
     ethr_native_atomic_set_relb(var, val);
 #else
-    return ETHR_INLINE_FUNC_NAME_(ethr_atomic_set)(var, val);
+    ETHR_INLINE_FUNC_NAME_(ethr_atomic_set)(var, val);
 #endif
 }
 
diff --git a/erts/lib_src/common/erl_misc_utils.c b/erts/lib_src/common/erl_misc_utils.c
index 498ce6837a..4c881993a5 100644
--- a/erts/lib_src/common/erl_misc_utils.c
+++ b/erts/lib_src/common/erl_misc_utils.c
@@ -71,6 +71,19 @@
 			(CPUSET)) != 0 ? -errno : 0)
 #define ERTS_MU_SET_THR_AFFINITY__(SETP)				\
      (sched_setaffinity(0, sizeof(cpu_set_t), (SETP)) != 0 ? -errno : 0)
+#elif defined(HAVE_CPUSET_xETAFFINITY)
+#  include <sys/param.h>
+#  include <sys/cpuset.h>
+#  define ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__
+#define ERTS_MU_GET_PROC_AFFINITY__(CPUINFOP, CPUSET)			\
+     (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, 	    \
+			sizeof(cpuset_t),				\
+			(CPUSET)) != 0 ? -errno : 0)
+#define ERTS_MU_SET_THR_AFFINITY__(CPUSETP)				\
+     (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1,    \
+            sizeof(cpuset_t),               \
+            (CPUSETP)) != 0 ? -errno : 0)
+#  define cpu_set_t cpuset_t
 #elif defined(__WIN32__)
 #  define ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__
 #  define cpu_set_t DWORD
@@ -100,6 +113,11 @@
 #  define ERTS_SYS_CPU_PATH	"/sys/devices/system/cpu"
 #endif
 
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/sysctl.h>
+#endif
+
 static int read_topology(erts_cpu_info_t *cpuinfo);
 
 #if defined(ERTS_HAVE_MISC_UTIL_AFFINITY_MASK__)
@@ -1380,6 +1398,245 @@ read_topology(erts_cpu_info_t *cpuinfo)
     return res;
 }
 
+#elif defined(__FreeBSD__)
+
+/**
+ * FreeBSD topology detection is based on kern.sched.topology_spec XML as
+ * exposed by the ULE scheduler and described in SMP(4). It is available in
+ * 8.0 and higher.
+ *
+ * Threads are identified in this XML chunk with a THREAD flag. The function
+ * (simplistically) distinguishes cores and processors by the amount of cache
+ * they share (0 => processor, otherwise => core). Nodes are not identified
+ * (ULE doesn't handle NUMA yet, I believe).
+ */
+
+/**
+ * Recursively parse a topology_spec <group> tag.
+ */
+static
+const char* parse_topology_spec_group(erts_cpu_info_t *cpuinfo, const char* xml, int parentCacheLevel, int* processor_p, int* core_p, int* index_procs_p) {
+    int error = 0;
+    int cacheLevel = parentCacheLevel;
+    const char* next_group_start = strstr(xml + 1, "<group");
+    int is_thread_group = 0;
+    const char* next_cache_level;
+    const char* next_thread_flag;
+    const char* next_group_end;
+    const char* next_children;
+    const char* next_children_end;
+
+    /* parse the cache level */
+    next_cache_level = strstr(xml, "cache-level=\"");
+    if (next_cache_level && (next_group_start == NULL || next_cache_level < next_group_start)) {
+	sscanf(next_cache_level, "cache-level=\"%i\"", &cacheLevel);
+    }
+
+    /* parse the threads flag */
+    next_thread_flag = strstr(xml, "THREAD");
+    if (next_thread_flag && (next_group_start == NULL || next_thread_flag < next_group_start))
+	is_thread_group = 1;
+
+    /* Determine if it's a leaf with the position of the next children tag */
+    next_group_end = strstr(xml, "</group>");
+    next_children = strstr(xml, "<children>");
+    next_children_end = strstr(xml, "</children>");
+    if (next_children == NULL || next_group_end < next_children) {
+	do {
+	    const char* next_cpu_start;
+	    const char* next_cpu_cdata;
+	    const char* next_cpu_end;
+	    int cpu_str_size;
+	    char* cpu_str;
+	    char* cpu_crsr;
+	    char* brkb;
+	    int thread = 0;
+	    int index_procs = *index_procs_p;
+
+	    next_cpu_start = strstr(xml, "<cpu");
+	    if (!next_cpu_start) {
+		error = 1;
+		break;
+	    }
+	    next_cpu_cdata = strstr(next_cpu_start, ">") + 1;
+	    if (!next_cpu_cdata) {
+		error = 1;
+		break;
+	    }
+	    next_cpu_end = strstr(next_cpu_cdata, "</cpu>");
+	    if (!next_cpu_end) {
+		error = 1;
+		break;
+	    }
+	    cpu_str_size = next_cpu_end - next_cpu_cdata;
+	    cpu_str = (char*) malloc(cpu_str_size + 1);
+	    memcpy(cpu_str, (const char*) next_cpu_cdata, cpu_str_size);
+	    cpu_str[cpu_str_size] = 0;
+	    for (cpu_crsr = strtok_r(cpu_str, " \t,", &brkb); cpu_crsr; cpu_crsr = strtok_r(NULL, " \t,", &brkb)) {
+		int cpu_id;
+		if (index_procs >= cpuinfo->configured) {
+		    void* t = realloc(cpuinfo->topology, (sizeof(erts_cpu_topology_t) * (index_procs + 1)));
+		    if (t) {
+			cpuinfo->topology = t;
+		    } else {
+			error = 1;
+			break;
+		    }
+		}
+		cpu_id = atoi(cpu_crsr);
+		cpuinfo->topology[index_procs].node = -1;
+		cpuinfo->topology[index_procs].processor = *processor_p;
+		cpuinfo->topology[index_procs].processor_node = -1;
+		cpuinfo->topology[index_procs].core = *core_p;
+		cpuinfo->topology[index_procs].thread = thread;
+		cpuinfo->topology[index_procs].logical = cpu_id;
+		if (is_thread_group) {
+		    thread++;
+		} else {
+		    *core_p = (*core_p)++;
+		}
+		index_procs++;
+	    }
+	    *index_procs_p = index_procs;
+	    free(cpu_str);
+	} while (0);
+	xml = next_group_end;
+    } else {
+	while (next_group_start != NULL && next_group_start < next_children_end) {
+	    xml = parse_topology_spec_group(cpuinfo, next_group_start, cacheLevel, processor_p, core_p, index_procs_p);
+	    if (!xml)
+		break;
+	    next_group_start = strstr(xml, "<group");
+	    next_children_end = strstr(xml, "</children>");
+	}
+    }
+
+    if (cacheLevel == 0) {
+	*core_p = 0;
+	*processor_p = (*processor_p)++;
+    } else {
+	*core_p = (*core_p)++;
+    }
+
+    if (error)
+	xml = NULL;
+
+    return xml;
+}
+
+/**
+ * Parse the topology_spec. Return the number of CPUs or 0 if parsing failed.
+ */
+static
+int parse_topology_spec(erts_cpu_info_t *cpuinfo, const char* xml) {
+    int res = 1;
+    int index_procs = 0;
+    int core = 0;
+    int processor = 0;
+    xml = strstr(xml, "<groups");
+    if (!xml)
+	return -1;
+
+    xml += 7;
+    xml = strstr(xml, "<group");
+    while (xml) {
+	xml = parse_topology_spec_group(cpuinfo, xml, 0, &processor, &core, &index_procs);
+	if (!xml) {
+	    res = 0;
+	    break;
+	}
+	xml = strstr(xml, "<group");
+    }
+
+    if (res)
+	res = index_procs;
+
+    return res;
+}
+
+static int
+read_topology(erts_cpu_info_t *cpuinfo)
+{
+    int ix;
+    int res = 0;
+    size_t topology_spec_size = 0;
+    void* topology_spec = NULL;
+
+    errno = 0;
+
+    if (cpuinfo->configured < 1)
+	goto error;
+
+    cpuinfo->topology_size = cpuinfo->configured;
+    cpuinfo->topology = malloc(sizeof(erts_cpu_topology_t)
+			       * cpuinfo->configured);
+    if (!cpuinfo->topology) {
+	res = -ENOMEM;
+	goto error;
+    }
+
+    for (ix = 0; ix < cpuinfo->configured; ix++) {
+	cpuinfo->topology[ix].node = -1;
+	cpuinfo->topology[ix].processor = -1;
+	cpuinfo->topology[ix].processor_node = -1;
+	cpuinfo->topology[ix].core = -1;
+	cpuinfo->topology[ix].thread = -1;
+	cpuinfo->topology[ix].logical = -1;
+    }
+
+    if (!sysctlbyname("kern.sched.topology_spec", NULL, &topology_spec_size, NULL, 0)) {
+	topology_spec = malloc(topology_spec_size);
+	if (!topology_spec) {
+	    res = -ENOMEM;
+	    goto error;
+	}
+
+	if (sysctlbyname("kern.sched.topology_spec", topology_spec, &topology_spec_size, NULL, 0)) {
+	    goto error;
+	}
+
+	res = parse_topology_spec(cpuinfo, topology_spec);
+	if (!res || res < cpuinfo->online)
+	    res = 0;
+	else {
+	    cpuinfo->topology_size = res;
+
+	    if (cpuinfo->topology_size != cpuinfo->configured) {
+		void *t = realloc(cpuinfo->topology, (sizeof(erts_cpu_topology_t)
+						  * cpuinfo->topology_size));
+		if (t)
+		    cpuinfo->topology = t;
+	    }
+
+	    adjust_processor_nodes(cpuinfo, 1);
+
+	    qsort(cpuinfo->topology,
+	        cpuinfo->topology_size,
+	        sizeof(erts_cpu_topology_t),
+	        cpu_cmp);
+	}
+    }
+
+error:
+
+    if (res == 0) {
+	cpuinfo->topology_size = 0;
+	if (cpuinfo->topology) {
+	    free(cpuinfo->topology);
+	    cpuinfo->topology = NULL;
+	}
+	if (errno)
+	    res = -errno;
+	else
+	    res = -EINVAL;
+    }
+
+    if (topology_spec)
+	free(topology_spec);
+
+    return res;
+}
+
 #else
 
 static int
diff --git a/erts/preloaded/src/init.erl b/erts/preloaded/src/init.erl
index 3ab9a1cd6d..24430a3d40 100644
--- a/erts/preloaded/src/init.erl
+++ b/erts/preloaded/src/init.erl
@@ -72,6 +72,7 @@
 		script_id = [],
 		loaded = [],
 		subscribed = []}).
+-type state() :: #state{}.
 
 -define(ON_LOAD_HANDLER, init__boot__on_load_handler).
 
@@ -146,10 +147,10 @@ restart() -> init ! {stop,restart}, ok.
 -spec reboot() -> 'ok'.
 reboot() -> init ! {stop,reboot}, ok.
 
--spec stop() -> no_return().
+-spec stop() -> 'ok'.
 stop() -> init ! {stop,stop}, ok.
 
--spec stop(non_neg_integer() | string()) -> no_return().
+-spec stop(non_neg_integer() | string()) -> 'ok'.
 stop(Status) -> init ! {stop,{stop,Status}}, ok.
 
 -spec boot([binary()]) -> no_return().
@@ -278,7 +279,7 @@ crash(String, List) ->
     halt(halt_string(String, List)).
 
 %% Status is {InternalStatus,ProvidedStatus}
--spec boot_loop(pid(), #state{}) -> no_return().
+-spec boot_loop(pid(), state()) -> no_return().
 boot_loop(BootPid, State) ->
     receive
 	{BootPid,loaded,ModLoaded} ->
diff --git a/lib/common_test/src/ct_master.erl b/lib/common_test/src/ct_master.erl
index 42e4cf08f4..2ea2ba106a 100644
--- a/lib/common_test/src/ct_master.erl
+++ b/lib/common_test/src/ct_master.erl
@@ -101,12 +101,14 @@ run([TS|TestSpecs],AllowUserTerms,InclNodes,ExclNodes) when is_list(TS),
 	    TSRec=#testspec{logdir=AllLogDirs,
 			    config=StdCfgFiles,
 			    userconfig=UserCfgFiles,
+			    include=AllIncludes,
 			    init=AllInitOpts,
 			    event_handler=AllEvHs} ->
 	        AllCfgFiles = {StdCfgFiles, UserCfgFiles},
 		RunSkipPerNode = ct_testspec:prepare_tests(TSRec),
 		RunSkipPerNode2 = exclude_nodes(ExclNodes,RunSkipPerNode),
-		run_all(RunSkipPerNode2,AllLogDirs,AllCfgFiles,AllEvHs,[],[],AllInitOpts,TS1)
+		run_all(RunSkipPerNode2,AllLogDirs,AllCfgFiles,AllEvHs,
+			AllIncludes,[],[],AllInitOpts,TS1)
 	end,
     [{TS,Result} | run(TestSpecs,AllowUserTerms,InclNodes,ExclNodes)];
 run([],_,_,_) ->
@@ -163,11 +165,13 @@ run_on_node([TS|TestSpecs],AllowUserTerms,Node) when is_list(TS),is_atom(Node) -
 	    TSRec=#testspec{logdir=AllLogDirs,
 			    config=StdCfgFiles,
 			    init=AllInitOpts,
+			    include=AllIncludes,
 			    userconfig=UserCfgFiles,
 			    event_handler=AllEvHs} ->
 	        AllCfgFiles = {StdCfgFiles, UserCfgFiles},
 		{Run,Skip} = ct_testspec:prepare_tests(TSRec,Node),
-		run_all([{Node,Run,Skip}],AllLogDirs,AllCfgFiles,AllEvHs,[],[],AllInitOpts,TS1)
+		run_all([{Node,Run,Skip}],AllLogDirs,AllCfgFiles,AllEvHs,
+			AllIncludes, [],[],AllInitOpts,TS1)
 	end,
     [{TS,Result} | run_on_node(TestSpecs,AllowUserTerms,Node)];
 run_on_node([],_,_) ->
@@ -189,7 +193,7 @@ run_on_node(TestSpecs,Node) ->
 
 run_all([{Node,Run,Skip}|Rest],AllLogDirs,
 	{AllStdCfgFiles, AllUserCfgFiles}=AllCfgFiles,
-	AllEvHs,NodeOpts,LogDirs,InitOptions,Specs) ->
+	AllEvHs,AllIncludes,NodeOpts,LogDirs,InitOptions,Specs) ->
     LogDir =
 	lists:foldl(fun({N,Dir},_Found) when N == Node ->
 			    Dir;
@@ -211,6 +215,14 @@ run_all([{Node,Run,Skip}|Rest],AllLogDirs,
 		       ({_N,_F},Fs) -> Fs;
 		       (F,Fs) -> [{userconfig, F}|Fs]
 		    end,[],AllUserCfgFiles),
+    
+    Includes = lists:foldr(fun({N,I},Acc) when N =:= Node ->
+				   [I|Acc];
+			      ({_,_},Acc) ->
+				   Acc;
+			      (I,Acc) ->
+				   [I | Acc]
+			   end, [], AllIncludes),
     EvHs =
 	lists:foldr(fun({N,H,A},Hs) when N == Node -> [{H,A}|Hs];
 		       ({_N,_H,_A},Hs) -> Hs;
@@ -219,10 +231,13 @@ run_all([{Node,Run,Skip}|Rest],AllLogDirs,
 
     NO = {Node,[{prepared_tests,{Run,Skip},Specs},
 		{logdir,LogDir},
+		{include, Includes},
 		{config,StdCfgFiles},
 		{event_handler,EvHs}] ++ UserCfgFiles},
-    run_all(Rest,AllLogDirs,AllCfgFiles,AllEvHs,[NO|NodeOpts],[LogDir|LogDirs],InitOptions,Specs);
-run_all([],AllLogDirs,_,AllEvHs,NodeOpts,LogDirs,InitOptions,Specs) ->
+    run_all(Rest,AllLogDirs,AllCfgFiles,AllEvHs,AllIncludes,
+	    [NO|NodeOpts],[LogDir|LogDirs],InitOptions,Specs);
+run_all([],AllLogDirs,_,AllEvHs,_AllIncludes,
+	NodeOpts,LogDirs,InitOptions,Specs) ->
     Handlers = [{H,A} || {Master,H,A} <- AllEvHs, Master == master],
     MasterLogDir = case lists:keysearch(master,1,AllLogDirs) of
 		       {value,{_,Dir}} -> Dir;
diff --git a/lib/common_test/test/ct_master_SUITE.erl b/lib/common_test/test/ct_master_SUITE.erl
index e0e1f93db2..5ac2866227 100644
--- a/lib/common_test/test/ct_master_SUITE.erl
+++ b/lib/common_test/test/ct_master_SUITE.erl
@@ -33,6 +33,13 @@
 
 -define(eh, ct_test_support_eh).
 
+-define(TEMP_DIR, case os:type() of
+		      {win32,_} ->
+			  "c:/Temp";
+		      _ ->
+			  "/tmp"
+		  end).
+
 %%--------------------------------------------------------------------
 %% TEST SERVER CALLBACK FUNCTIONS
 %%--------------------------------------------------------------------
@@ -43,18 +50,39 @@
 %% there will be clashes with logging processes etc).
 %%--------------------------------------------------------------------
 init_per_suite(Config) ->
-    Config1 = ct_test_support:init_per_suite(Config),
-    Config1.
+    ct_test_support:init_per_suite(Config).
 
 end_per_suite(Config) ->
     ct_test_support:end_per_suite(Config).
 
 init_per_testcase(TestCase, Config) ->
-    ct_test_support:init_per_testcase(TestCase, [{master, true}|Config]).
+    NodeCount = 5,
+    NodeNames = [list_to_atom("t_"++integer_to_list(N)) ||
+		 N <- lists:seq(1, NodeCount)],
+    ct_test_support:init_per_testcase(
+      TestCase,[{node_names,NodeNames},
+		{master, true}|Config]).
 
 end_per_testcase(TestCase, Config) ->
+    case os:type() of
+	{win32,_} ->
+	    %% If this is a windows run the logs are saved to /tmp and
+	    %% then moved to private_dir as a tar because otherwise
+	    %% the file names become too long! :(
+	    Files = filelib:wildcard(filename:join(?TEMP_DIR,"slave.*")),
+	    erl_tar:create(
+	      filename:join(
+		proplists:get_value(priv_dir,Config),"slaves.tar.gz"),
+	      Files,[compressed]),
+	    os:cmd("rm -rf "++filename:join(?TEMP_DIR,"slave.*"));
+	_ ->
+	    ok
+    end,
+    
     ct_test_support:end_per_testcase(TestCase, Config).
 
+all() ->
+    all(suite).
 all(doc) ->
     [""];
 
@@ -67,15 +95,35 @@ all(suite) ->
 %% TEST CASES
 %%--------------------------------------------------------------------
 ct_master_test(Config) when is_list(Config)->
-    NodeCount = 5,
+    NodeNames = proplists:get_value(node_names, Config),
     DataDir = ?config(data_dir, Config),
     PrivDir = ?config(priv_dir, Config),
-    NodeNames = [list_to_atom("testnode_"++integer_to_list(N)) ||
-		 N <- lists:seq(1, NodeCount)],
+
     FileName = filename:join(PrivDir, "ct_master_spec.spec"),
     Suites = [master_SUITE],
     TSFile = make_spec(DataDir, FileName, NodeNames, Suites, Config),
+    ERPid = ct_test_support:start_event_receiver(Config),
+    spawn(ct@ancalagon,
+	  fun() ->
+		  dbg:tracer(),dbg:p(all,c),
+		  dbg:tpl(erlang, spawn_link, 4,x),
+		  receive ok -> ok end
+	  end),
+
     [{TSFile, ok}] = run_test(ct_master_test, FileName, Config),
+
+    Events = ct_test_support:get_events(ERPid, Config),
+
+    ct_test_support:log_events(groups_suite_1, 
+			       reformat(Events, ?eh), 
+			       ?config(priv_dir, Config)),
+    find_events(NodeNames, [{tc_start,{master_SUITE,init_per_suite}},
+			    {tc_start,{master_SUITE,first_testcase}},
+			    {tc_start,{master_SUITE,second_testcase}},
+			    {tc_start,{master_SUITE,third_testcase}},
+			    {tc_start,{master_SUITE,end_per_suite}}],
+	       Events),
+    
     ok.
 
 %%%-----------------------------------------------------------------
@@ -112,13 +160,25 @@ make_spec(DataDir, FileName, NodeNames, Suites, Config)->
 
     PrivDir = ?config(priv_dir, Config),
     LD = lists:map(fun(NodeName)->
-	     {logdir, NodeName, get_log_dir(PrivDir, NodeName)}
+	     {logdir, NodeName, get_log_dir(os:type(),PrivDir, NodeName)}
          end,
 	 NodeNames) ++ [{logdir, master, PrivDir}],
-
-    ct_test_support:write_testspec(N++C++S++LD++NS, FileName).
-
-get_log_dir(PrivDir, NodeName)->
+    EvHArgs = [{cbm,ct_test_support},{trace_level,?config(trace_level,Config)}],
+    EH = [{event_handler,master,[?eh],EvHArgs}],
+
+    Include = [{include,filename:join([DataDir,"master/include"])}],
+
+    ct_test_support:write_testspec(N++Include++EH++C++S++LD++NS, FileName).
+
+get_log_dir({win32,_},PrivDir, NodeName)->
+    case filelib:is_dir(?TEMP_DIR) of
+	false ->
+	    file:make_dir(?TEMP_DIR);
+	_ ->
+	    ok
+    end,
+    get_log_dir(tmp, ?TEMP_DIR,NodeName);
+get_log_dir(_,PrivDir,NodeName) ->
     LogDir = filename:join(PrivDir, io_lib:format("slave.~p", [NodeName])),
     file:make_dir(LogDir),
     LogDir.
@@ -126,11 +186,34 @@ get_log_dir(PrivDir, NodeName)->
 run_test(_Name, FileName, Config)->
     [{FileName, ok}] = ct_test_support:run(ct_master, run, [FileName], Config).
 
-reformat_events(Events, EH) ->
+reformat(Events, EH) ->
     ct_test_support:reformat(Events, EH).
 
 %%%-----------------------------------------------------------------
 %%% TEST EVENTS
 %%%-----------------------------------------------------------------
+find_events([], _CheckEvents, _) ->
+    ok;
+find_events([NodeName|NodeNames],CheckEvents,AllEvents) ->
+    find_events(NodeNames, CheckEvents,
+		remove_events(add_host(NodeName),CheckEvents, AllEvents, [])).
+
+remove_events(Node,[{Name,Data} | RestChecks],
+	      [{?eh,#event{ name = Name, node = Node, data = Data }}|RestEvs],
+	       Acc) ->
+    remove_events(Node, RestChecks, RestEvs, Acc);
+remove_events(Node, Checks, [Event|RestEvs], Acc) ->
+    remove_events(Node, Checks, RestEvs, [Event | Acc]);
+remove_events(_Node, [], [], Acc) ->
+    lists:reverse(Acc);
+remove_events(Node, Events, [], Acc) ->
+    test_server:format("Could not find events: ~p in ~p for node ~p",
+	   [Events, lists:reverse(Acc), Node]),
+    exit(event_not_found).
+
+add_host(NodeName) ->
+    {ok, HostName} = inet:gethostname(),
+    list_to_atom(atom_to_list(NodeName)++"@"++HostName).
+    
 expected_events(_)->
-[].
+    [].
diff --git a/lib/common_test/test/ct_master_SUITE_data/master/include/test.hrl b/lib/common_test/test/ct_master_SUITE_data/master/include/test.hrl
new file mode 100644
index 0000000000..e69de29bb2
--- /dev/null
+++ b/lib/common_test/test/ct_master_SUITE_data/master/include/test.hrl
diff --git a/lib/common_test/test/ct_master_SUITE_data/master/master_SUITE.erl b/lib/common_test/test/ct_master_SUITE_data/master/master_SUITE.erl
index e37ec3659c..032d69ad9f 100644
--- a/lib/common_test/test/ct_master_SUITE_data/master/master_SUITE.erl
+++ b/lib/common_test/test/ct_master_SUITE_data/master/master_SUITE.erl
@@ -28,6 +28,7 @@
 -compile(export_all).
 
 -include_lib("common_test/include/ct.hrl").
+-include("test.hrl").
 
 suite() ->
     [].
diff --git a/lib/dialyzer/RELEASE_NOTES b/lib/dialyzer/RELEASE_NOTES
index a05b3ac52b..08f274a996 100644
--- a/lib/dialyzer/RELEASE_NOTES
+++ b/lib/dialyzer/RELEASE_NOTES
@@ -5,6 +5,13 @@
 
 Version 2.x.x (in Erlang/OTP R14B01)
 ------------------------------------
+ - Fixed pretty rare infinite loop when refining the types of an SCC whose
+   functions all returned none() (thanks to Stavros Aronis).
+ - Fixed pretty rare crash when taking the infimum of two tuple_sets.
+ - Fixed pretty rare crash when using parameterized types containing unbound
+   variables (thanks to Nicolas Trangez for reporting it).
+ - Deeper unfolding of recursive types (thanks to Maria Christakis).
+ - Fixed some incomplete and erroneous specs in modules of kernel and stdlib.
  - Fixed problems in the handling of remote types in records used as types
    (thanks to Nico Kruber for the report and to Maria Christakis for the fix).
  - Fixed handling of nested opaque types (thanks to Thorsten Schuett for
diff --git a/lib/dialyzer/src/dialyzer.erl b/lib/dialyzer/src/dialyzer.erl
index d8fd073ca6..895089846a 100644
--- a/lib/dialyzer/src/dialyzer.erl
+++ b/lib/dialyzer/src/dialyzer.erl
@@ -225,6 +225,8 @@ plt_info(Plt) ->
 %% Machinery
 %%-----------
 
+-type doit_ret() :: {'ok', dial_ret()} | {'error', string()}.
+
 doit(F) ->
   try
     {ok, F()}
@@ -233,13 +235,17 @@ doit(F) ->
       {error, lists:flatten(Msg)}
   end.
 
+-spec cl_error(string()) -> no_return().
+
 cl_error(Msg) ->
   cl_halt({error, Msg}, #options{}).
 
+-spec gui_halt(doit_ret(), #options{}) -> no_return().
+
 gui_halt(R, Opts) ->
   cl_halt(R, Opts#options{report_mode = quiet}).
 
--spec cl_halt({'ok',dial_ret()} | {'error',string()}, #options{}) -> no_return().
+-spec cl_halt(doit_ret(), #options{}) -> no_return().
 
 cl_halt({ok, R = ?RET_NOTHING_SUSPICIOUS}, #options{report_mode = quiet}) ->
   halt(R);
diff --git a/lib/dialyzer/src/dialyzer_analysis_callgraph.erl b/lib/dialyzer/src/dialyzer_analysis_callgraph.erl
index 3438cc8c7e..abad1f3a75 100644
--- a/lib/dialyzer/src/dialyzer_analysis_callgraph.erl
+++ b/lib/dialyzer/src/dialyzer_analysis_callgraph.erl
@@ -21,7 +21,7 @@
 %%%-------------------------------------------------------------------
 %%% File    : dialyzer_analysis_callgraph.erl
 %%% Author  : Tobias Lindahl <[email protected]>
-%%% Description : 
+%%% Description :
 %%%
 %%% Created :  5 Apr 2005 by Tobias Lindahl <[email protected]>
 %%%-------------------------------------------------------------------
@@ -32,7 +32,7 @@
 
 -include("dialyzer.hrl").
 
--record(analysis_state, 
+-record(analysis_state,
 	{
 	  codeserver                    :: dialyzer_codeserver:codeserver(),
 	  analysis_type  = succ_typings :: anal_type(),
@@ -44,7 +44,7 @@
 	  plt                           :: dialyzer_plt:plt(),
 	  start_from     = byte_code    :: start_from(),
 	  use_contracts  = true         :: boolean(),
-	  behaviours = {false,[]}   :: {boolean(),[atom()]}
+	  behaviours     = {false,[]}   :: {boolean(),[atom()]}
 	 }).
 
 -record(server_state, {parent :: pid(), legal_warnings :: [dial_warn_tag()]}).
@@ -83,10 +83,10 @@ loop(#server_state{parent = Parent, legal_warnings = LegalWarnings} = State,
 	  send_warnings(Parent, SendWarnings)
       end,
       loop(State, Analysis, ExtCalls);
-    {AnalPid, cserver, CServer, Plt} -> 
+    {AnalPid, cserver, CServer, Plt} ->
       send_codeserver_plt(Parent, CServer, Plt),
       loop(State, Analysis, ExtCalls);
-    {AnalPid, done, Plt, DocPlt} ->      
+    {AnalPid, done, Plt, DocPlt} ->
       case ExtCalls =:= none of
 	true ->
 	  send_analysis_done(Parent, Plt, DocPlt);
@@ -176,7 +176,7 @@ analysis_start(Parent, Analysis) ->
   NonExportsList = sets:to_list(NonExports),
   Plt3 = dialyzer_plt:delete_list(State3#analysis_state.plt, NonExportsList),
   Plt4 = dialyzer_plt:delete_contract_list(Plt3, NonExportsList),
-  send_codeserver_plt(Parent, CServer, State3#analysis_state.plt), 
+  send_codeserver_plt(Parent, CServer, State3#analysis_state.plt),
   send_analysis_done(Parent, Plt4, State3#analysis_state.doc_plt).
 
 analyze_callgraph(Callgraph, State) ->
@@ -229,24 +229,24 @@ compile_and_store(Files, #analysis_state{codeserver = CServer,
 		  {error, Reason} ->
 		    {TmpCG, TmpCServer, [{File, Reason}|TmpFailed], TmpNoWarn,
                      TmpMods};
-		  {ok, NewCG, NoWarn, NewCServer, Mod} -> 
+		  {ok, NewCG, NoWarn, NewCServer, Mod} ->
 		    {NewCG, NewCServer, TmpFailed, NoWarn++TmpNoWarn,
                      [Mod|TmpMods]}
 		end
 	    end;
 	  byte_code ->
-	    fun(File, {TmpCG, TmpCServer, TmpFailed, TmpNoWarn, TmpMods}) -> 
+	    fun(File, {TmpCG, TmpCServer, TmpFailed, TmpNoWarn, TmpMods}) ->
 		case compile_byte(File, TmpCG, TmpCServer, UseContracts) of
 		  {error, Reason} ->
 		    {TmpCG, TmpCServer, [{File, Reason}|TmpFailed], TmpNoWarn,
                      TmpMods};
-		  {ok, NewCG, NoWarn, NewCServer, Mod} -> 
+		  {ok, NewCG, NoWarn, NewCServer, Mod} ->
 		    {NewCG, NewCServer, TmpFailed, NoWarn++TmpNoWarn,
                      [Mod|TmpMods]}
 		end
 	    end
 	end,
-  {NewCallgraph1, NewCServer, Failed, NoWarn, Modules} = 
+  {NewCallgraph1, NewCServer, Failed, NoWarn, Modules} =
     lists:foldl(Fun, {Callgraph, CServer, [], [], []}, Files),
   case Failed =:= [] of
     true ->
@@ -255,7 +255,7 @@ compile_and_store(Files, #analysis_state{codeserver = CServer,
         lists:foldl(fun({Mod, F}, Dict) -> dict:append(Mod, F, Dict) end,
                     dict:new(), NewFiles),
       check_for_duplicate_modules(ModDict);
-    false -> 
+    false ->
       Msg = io_lib:format("Could not scan the following file(s): ~p",
 			  [lists:flatten(Failed)]),
       exit({error, Msg})
@@ -268,14 +268,14 @@ compile_and_store(Files, #analysis_state{codeserver = CServer,
   if UnknownBehaviours =:= [] -> ok;
      true -> send_unknown_behaviours(Parent, UnknownBehaviours)
   end,
-  State1 = State#analysis_state{behaviours = {BehChk,KnownBehaviours}},
+  State1 = State#analysis_state{behaviours = {BehChk, KnownBehaviours}},
   NewCallgraph2 = cleanup_callgraph(State1, NewCServer, NewCallgraph1, Modules),
   {T3, _} = statistics(runtime),
   Msg2 = io_lib:format("done in ~.2f secs\n", [(T3-T2)/1000]),
-  send_log(Parent, Msg2),  
+  send_log(Parent, Msg2),
   {NewCallgraph2, sets:from_list(NoWarn), NewCServer}.
 
-cleanup_callgraph(#analysis_state{plt = InitPlt, parent = Parent, 
+cleanup_callgraph(#analysis_state{plt = InitPlt, parent = Parent,
 				  codeserver = CodeServer,
 				  behaviours = {BehChk, KnownBehaviours}
 				 },
@@ -298,9 +298,9 @@ cleanup_callgraph(#analysis_state{plt = InitPlt, parent = Parent,
 		       not dialyzer_plt:contains_mfa(InitPlt, To)],
   {BadCalls1, RealExtCalls} =
     if ExtCalls1 =:= [] -> {[], []};
-       true -> 
+       true ->
 	ModuleSet = sets:from_list(Modules),
-	lists:partition(fun({_From, {M, _F, _A}}) -> 
+	lists:partition(fun({_From, {M, _F, _A}}) ->
 			    sets:is_element(M, ModuleSet) orelse
 			      dialyzer_plt:contains_module(InitPlt, M)
 			end, ExtCalls1)
@@ -367,14 +367,14 @@ compile_byte(File, Callgraph, CServer, UseContracts) ->
 	  case dialyzer_utils:get_record_and_type_info(AbstrCode) of
 	    {error, _} = Error -> Error;
 	    {ok, RecInfo} ->
-	      CServer1 = 
+	      CServer1 =
 		dialyzer_codeserver:store_temp_records(Mod, RecInfo, CServer),
 	      case UseContracts of
 		true ->
 		  case dialyzer_utils:get_spec_info(Mod, AbstrCode, RecInfo) of
 		    {error, _} = Error -> Error;
 		    {ok, SpecInfo} ->
-		      CServer2 = 
+		      CServer2 =
 			dialyzer_codeserver:store_temp_contracts(Mod, SpecInfo,
 								 CServer1),
 		      store_core(Mod, Core, NoWarn, Callgraph, CServer2)
@@ -455,8 +455,12 @@ expand_files([File|Left], Ext, FileAcc) ->
   case filelib:is_dir(File) of
     true ->
       {ok, List} = file:list_dir(File),
-      NewFiles =
-        [filename:join(File, X) || X <- List, filename:extension(X) =:= Ext],
+      NewFiles = lists:foldl(fun (X, Acc) ->
+				 case filename:extension(X) =:= Ext of
+				   true -> [filename:join(File, X)|Acc];
+				   false -> Acc
+				 end
+			     end, FileAcc, List),
       expand_files(Left, Ext, NewFiles);
     false ->
       expand_files(Left, Ext, [File|FileAcc])
diff --git a/lib/dialyzer/src/dialyzer_cl.erl b/lib/dialyzer/src/dialyzer_cl.erl
index 616e2465dc..0250c47ad0 100644
--- a/lib/dialyzer/src/dialyzer_cl.erl
+++ b/lib/dialyzer/src/dialyzer_cl.erl
@@ -559,7 +559,7 @@ cl_loop(State, LogCache) ->
       cl_loop(State, LogCache)
   end.
 
--spec failed_anal_msg(string(), [_]) -> string().
+-spec failed_anal_msg(string(), [_]) -> nonempty_string().
 
 failed_anal_msg(Reason, LogCache) ->
   Msg = "Analysis failed with error: " ++ Reason ++ "\n",
diff --git a/lib/dialyzer/src/dialyzer_contracts.erl b/lib/dialyzer/src/dialyzer_contracts.erl
index bf80c6f470..bcdcf2685d 100644
--- a/lib/dialyzer/src/dialyzer_contracts.erl
+++ b/lib/dialyzer/src/dialyzer_contracts.erl
@@ -163,20 +163,23 @@ process_contract_remote_types(CodeServer) ->
 check_contracts(Contracts, Callgraph, FunTypes) ->
   FoldFun =
     fun(Label, Type, NewContracts) ->
-	{ok, {M,F,A} = MFA} = dialyzer_callgraph:lookup_name(Label, Callgraph),
-	case orddict:find(MFA, Contracts) of
-	  {ok, {_FileLine, Contract}} ->
-	    case check_contract(Contract, Type) of
-	      ok ->
-		case erl_bif_types:is_known(M, F, A) of
-		  true ->
-		    %% Disregard the contracts since
-		    %% this is a known function.
-		    NewContracts;
-		  false ->
-		    [{MFA, Contract}|NewContracts]
+	case dialyzer_callgraph:lookup_name(Label, Callgraph) of
+	  {ok, {M,F,A} = MFA} ->
+	    case orddict:find(MFA, Contracts) of
+	      {ok, {_FileLine, Contract}} ->
+		case check_contract(Contract, Type) of
+		  ok ->
+		    case erl_bif_types:is_known(M, F, A) of
+		      true ->
+			%% Disregard the contracts since
+			%% this is a known function.
+			NewContracts;
+		      false ->
+			[{MFA, Contract}|NewContracts]
+		    end;
+		  {error, _Error} -> NewContracts
 		end;
-	      {error, _Error} -> NewContracts
+	      error -> NewContracts
 	    end;
 	  error -> NewContracts
 	end
diff --git a/lib/dialyzer/src/dialyzer_typesig.erl b/lib/dialyzer/src/dialyzer_typesig.erl
index 3effb1c2e6..f68472d2fc 100644
--- a/lib/dialyzer/src/dialyzer_typesig.erl
+++ b/lib/dialyzer/src/dialyzer_typesig.erl
@@ -314,6 +314,7 @@ traverse(Tree, DefinedVars, State) ->
 	  error -> t_fun(length(Vars), t_none());
 	  {ok, Dom} -> t_fun(Dom, t_none())
 	end,
+      TreeVar = mk_var(Tree),
       State2 =
 	try
 	  State1 = case state__add_prop_constrs(Tree, State0) of
@@ -321,20 +322,21 @@ traverse(Tree, DefinedVars, State) ->
 		     PropState -> PropState
 		   end,
 	  {BodyState, BodyVar} = traverse(Body, DefinedVars1, State1),
-	  state__store_conj(mk_var(Tree), eq,
+	  state__store_conj(TreeVar, eq,
 			    t_fun(mk_var_list(Vars), BodyVar), BodyState)
 	catch
 	  throw:error ->
-	    state__store_conj(mk_var(Tree), eq, FunFailType, State0)
+	    state__store_conj(TreeVar, eq, FunFailType, State0)
 	end,
       Cs = state__cs(State2),
-      State3 = state__store_constrs(mk_var(Tree), Cs, State2),
-      Ref = mk_constraint_ref(mk_var(Tree), get_deps(Cs)),
+      State3 = state__store_constrs(TreeVar, Cs, State2),
+      Ref = mk_constraint_ref(TreeVar, get_deps(Cs)),
       OldCs = state__cs(State),
       State4 = state__new_constraint_context(State3),
       State5 = state__store_conj_list([OldCs, Ref], State4),
       State6 = state__store_fun_arity(Tree, State5),
-      {State6, mk_var(Tree)};
+      State7 = state__add_fun_to_scc(TreeVar, State6),
+      {State7, TreeVar};
     'let' ->
       Vars = cerl:let_vars(Tree),
       Arg = cerl:let_arg(Tree),
@@ -580,7 +582,7 @@ handle_try(Tree, DefinedVars, State) ->
 	      mk_conj_constraint_list([HandlerCs,
 				       mk_constraint(TreeVar, eq, HandlerVar)]),
 	    Disj = mk_disj_constraint_list([Conj1, Conj2]),
-	    {Disj, mk_var(Tree)};
+	    {Disj, TreeVar};
 	  {false, true} ->
 	    {mk_conj_constraint_list([ArgBodyCs,
 				      mk_constraint(TreeVar, eq, BodyVar)]),
@@ -2070,7 +2072,7 @@ new_state(SCC0, NextLabel, CallGraph, Plt, PropTypes) ->
   NameMap = dict:from_list([{MFA, Var} || {MFA, {Var, _Fun}, _Rec} <- SCC0]),
   SCC = [mk_var(Fun) || {_MFA, {_Var, Fun}, _Rec} <- SCC0],
   #state{callgraph = CallGraph, name_map = NameMap, next_label = NextLabel,
-	 prop_types = PropTypes, plt = Plt, scc = SCC}.
+	 prop_types = PropTypes, plt = Plt, scc = ordsets:from_list(SCC)}.
 
 state__set_rec_dict(State, RecDict) ->
   State#state{records = RecDict}.
@@ -2161,6 +2163,9 @@ get_apply_constr(FunLabels, Dst, ArgTypes, #state{callgraph = CG} = State) ->
 state__scc(#state{scc = SCC}) ->
   SCC.
 
+state__add_fun_to_scc(Fun, #state{scc = SCC} = State) ->
+  State#state{scc = ordsets:add_element(Fun, SCC)}.
+
 state__plt(#state{plt = PLT}) ->
   PLT.
 
diff --git a/lib/edoc/src/edoc_layout.erl b/lib/edoc/src/edoc_layout.erl
index 900f0b3040..6cc2f5cd9b 100644
--- a/lib/edoc/src/edoc_layout.erl
+++ b/lib/edoc/src/edoc_layout.erl
@@ -482,7 +482,7 @@ local_defs([]) -> [];
 local_defs(Es) ->
     [?NL,
      {ul, [{class, "definitions"}],
-      lists:concat([[{li, [{tt, localdef(E)}]}, ?NL] || E <- Es])}].
+      lists:append([[{li, [{tt, localdef(E)}]}, ?NL] || E <- Es])}].
 
 localdef(E = #xmlElement{content = Es}) ->
     (case get_elem(typevar, Es) of
diff --git a/lib/edoc/src/edoc_lib.erl b/lib/edoc/src/edoc_lib.erl
index 5b7fb1e0d2..c1f95a7a67 100644
--- a/lib/edoc/src/edoc_lib.erl
+++ b/lib/edoc/src/edoc_lib.erl
@@ -288,11 +288,13 @@ parse_expr(S, L) ->
 %% content in e.g.
 %% <a href="overview-summary.html#mtag-author">`@author'</a> tags.
 
-%% @type info() = #info{name = string(),
-%%                      mail = string(),
-%%                      uri = string()}
+%% @type info() = #info{name  = string(),
+%%                      email = string(),
+%%                      uri   = string()}
 
--record(info, {name = "", email = "", uri = ""}).
+-record(info, {name = ""  :: string(),
+	       email = "" :: string(),
+	       uri = ""   :: string()}).
 
 parse_contact(S, L) ->
     I = scan_name(S, L, #info{}, []),
@@ -988,6 +990,14 @@ get_plugin(Key, Default, Opts) ->
 %% ---------------------------------------------------------------------
 %% Error handling
 
+-type line() :: erl_scan:line().
+-type err()  :: 'eof'
+	      | {'missing', char()}
+	      | {line(), atom(), string()}
+	      | string().
+
+-spec throw_error(err(), line()) -> no_return().
+
 throw_error({missing, C}, L) ->
     throw_error({"missing '~c'.", [C]}, L);
 throw_error(eof, L) ->
diff --git a/lib/edoc/src/edoc_macros.erl b/lib/edoc/src/edoc_macros.erl
index 2874e2940c..5b512cb53a 100644
--- a/lib/edoc/src/edoc_macros.erl
+++ b/lib/edoc/src/edoc_macros.erl
@@ -14,8 +14,6 @@
 %% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 %% USA
 %%
-%% $Id$
-%%
 %% @private
 %% @copyright 2001-2005 Richard Carlsson
 %% @author Richard Carlsson <[email protected]>
@@ -317,6 +315,14 @@ macro_content([C | Cs], As, L, N) ->
 macro_content([], _As, _L, _N) ->
     throw('end').
 
+-type line() :: erl_scan:line().
+-type err()  :: 'unterminated_macro'
+	      | 'macro_name'
+	      | {'macro_name', string()}
+	      | {string(), [string()]}.
+
+-spec throw_error(line(), err()) -> no_return().
+
 throw_error(L, unterminated_macro) ->
     throw_error(L, {"unexpected end of macro.", []});
 throw_error(L, macro_name) ->
diff --git a/lib/edoc/src/edoc_parser.yrl b/lib/edoc/src/edoc_parser.yrl
index 0eea8ae66f..91ee5a1b2b 100644
--- a/lib/edoc/src/edoc_parser.yrl
+++ b/lib/edoc/src/edoc_parser.yrl
@@ -404,6 +404,8 @@ parse_throws(S, L) ->
 
 %% ---------------------------------------------------------------------
 
+-spec throw_error(term(), erl_scan:line()) -> no_return().
+
 throw_error({L, M, D}, _L0) ->
     throw({error,L,{format_error,M,D}});
 throw_error({parse_spec, E}, L) ->
diff --git a/lib/edoc/src/edoc_refs.erl b/lib/edoc/src/edoc_refs.erl
index c2146bbe02..edc30674c0 100644
--- a/lib/edoc/src/edoc_refs.erl
+++ b/lib/edoc/src/edoc_refs.erl
@@ -14,8 +14,6 @@
 %% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 %% USA
 %%
-%% $Id$
-%%
 %% @private
 %% @copyright 2003 Richard Carlsson
 %% @author Richard Carlsson <[email protected]>
diff --git a/lib/edoc/src/edoc_tags.erl b/lib/edoc/src/edoc_tags.erl
index 1f2cb99c75..c0b861e08a 100644
--- a/lib/edoc/src/edoc_tags.erl
+++ b/lib/edoc/src/edoc_tags.erl
@@ -330,6 +330,10 @@ parse_typedef(Data, Line, _Env, _Where) ->
 	    Def
     end.
 
+-type line() :: erl_scan:line().
+
+-spec parse_file(_, line(), _, _) -> no_return().
+
 parse_file(Data, Line, Env, _Where) ->
     case edoc_lib:parse_expr(Data, Line) of
 	{string, _, File0} ->
@@ -344,6 +348,8 @@ parse_file(Data, Line, Env, _Where) ->
 	    throw_error(Line, file_not_string)
     end.
 
+-spec parse_header(_, line(), _, _) -> no_return().
+
 parse_header(Data, Line, Env, {Where, _}) ->
     parse_header(Data, Line, Env, Where);
 parse_header(Data, Line, Env, Where) when is_list(Where) ->
@@ -362,6 +368,13 @@ parse_header(Data, Line, Env, Where) when is_list(Where) ->
 	    throw_error(Line, file_not_string)
     end.
 
+-type err() :: 'file_not_string'
+             | {'file_not_found', file:filename()}
+             | {'read_file', file:filename(), term()}
+             | string().
+
+-spec throw_error(line(), err()) -> no_return().
+
 throw_error(L, {read_file, File, R}) ->
     throw_error(L, {"error reading file '~s': ~w",
 		    [edoc_lib:filename(File), R]});
diff --git a/lib/edoc/src/edoc_types.erl b/lib/edoc/src/edoc_types.erl
index 85c9ee6f2a..b0255f793d 100644
--- a/lib/edoc/src/edoc_types.erl
+++ b/lib/edoc/src/edoc_types.erl
@@ -14,8 +14,6 @@
 %% Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 %% USA
 %%
-%% $Id$
-%%
 %% @private
 %% @copyright 2001-2003 Richard Carlsson
 %% @author Richard Carlsson <[email protected]>
diff --git a/lib/erl_interface/src/decode/decode_big.c b/lib/erl_interface/src/decode/decode_big.c
index efe9c6e5d9..b5e9b45a3b 100644
--- a/lib/erl_interface/src/decode/decode_big.c
+++ b/lib/erl_interface/src/decode/decode_big.c
@@ -74,7 +74,7 @@ erlang_big *ei_alloc_big(unsigned int digit_bytes) {
     memset(b,(char)0,sizeof(erlang_big));
     if ( (b->digits = malloc(2*n)) == NULL) {
         free(b);
-        return 0;
+        return NULL;
     }
    
     b->arity = digit_bytes;
diff --git a/lib/erl_interface/src/epmd/epmd_publish.c b/lib/erl_interface/src/epmd/epmd_publish.c
index a9b8727747..d45fe644c0 100644
--- a/lib/erl_interface/src/epmd/epmd_publish.c
+++ b/lib/erl_interface/src/epmd/epmd_publish.c
@@ -69,6 +69,12 @@ static int ei_epmd_r4_publish (int port, const char *alive, unsigned ms)
   int n;
   int res, creation;
   
+  if (len > sizeof(buf)-2)
+  {
+    erl_errno = ERANGE;
+    return -1;
+  }
+
   s = buf;
   put16be(s,len);
 
diff --git a/lib/erl_interface/src/epmd/epmd_unpublish.c b/lib/erl_interface/src/epmd/epmd_unpublish.c
index 08662fe1ec..495cbab44c 100644
--- a/lib/erl_interface/src/epmd/epmd_unpublish.c
+++ b/lib/erl_interface/src/epmd/epmd_unpublish.c
@@ -59,6 +59,11 @@ int ei_unpublish_tmo(const char *alive, unsigned ms)
     int len = 1 + strlen(alive);
     int fd, res;
 
+    if (len > sizeof(buf)-3) {
+	erl_errno = ERANGE;
+	return -1;
+    }
+
     put16be(s,len);
     put8(s,EI_EPMD_STOP_REQ);
     strcpy(s, alive);
diff --git a/lib/erl_interface/src/legacy/erl_marshal.c b/lib/erl_interface/src/legacy/erl_marshal.c
index 18315bfbd3..5084c65230 100644
--- a/lib/erl_interface/src/legacy/erl_marshal.c
+++ b/lib/erl_interface/src/legacy/erl_marshal.c
@@ -1890,8 +1890,11 @@ static int cmp_big_big(unsigned char**e1, unsigned char **e2)
     ei_get_type((char *)*e1,&i1,&t1,&n1);
     ei_get_type((char *)*e2,&i2,&t2,&n2);
     
-    b1 = ei_alloc_big(n1);
-    b2 = ei_alloc_big(n2);
+    if ( (b1 = ei_alloc_big(n1)) == NULL) return -1;
+    if ( (b2 = ei_alloc_big(n2)) == NULL) {
+        ei_free_big(b1);
+        return 1;
+    }
     
     ei_decode_big((char *)*e1,&i1,b1);
     ei_decode_big((char *)*e2,&i2,b2);
diff --git a/lib/erl_interface/src/misc/ei_portio.c b/lib/erl_interface/src/misc/ei_portio.c
index c4e397f1e0..a3f6f63fff 100644
--- a/lib/erl_interface/src/misc/ei_portio.c
+++ b/lib/erl_interface/src/misc/ei_portio.c
@@ -166,6 +166,9 @@ int ei_writev_fill_t(int fd,  const  struct  iovec  *iov,  int iovcnt, unsigned
 	if (done < sum) {
 	    if (iov_base == NULL) {
 		iov_base = malloc(sizeof(struct iovec) * iovcnt);
+		if (iov_base == NULL) {
+		return -1;
+		}
 		memcpy(iov_base, iov, sizeof(struct iovec) * iovcnt);
 		current_iov = iov_base;
 	    }
diff --git a/lib/erl_interface/src/misc/ei_printterm.c b/lib/erl_interface/src/misc/ei_printterm.c
index 98473f780e..5fc6b3542c 100644
--- a/lib/erl_interface/src/misc/ei_printterm.c
+++ b/lib/erl_interface/src/misc/ei_printterm.c
@@ -253,7 +253,8 @@ static int print_term(FILE* fp, ei_x_buff* x,
             erlang_big *b;
             char *ds;
 
-            b = ei_alloc_big(n);
+            if ( (b = ei_alloc_big(n)) == NULL) goto err;
+
             if (ei_decode_big(buf, index, b) < 0) {
                 ei_free_big(b);
                 goto err;
diff --git a/lib/hipe/cerl/erl_bif_types.erl b/lib/hipe/cerl/erl_bif_types.erl
index 835f9a205a..696414eb7a 100644
--- a/lib/hipe/cerl/erl_bif_types.erl
+++ b/lib/hipe/cerl/erl_bif_types.erl
@@ -2001,12 +2001,6 @@ type(file, get_cwd, 0, _) ->
 	t_tuple([t_atom('error'), t_file_posix_error()]));
 type(file, make_dir, 1, Xs) ->
   strict(arg_types(file, make_dir, 1), Xs, fun (_) -> t_file_return() end);
-type(file, open, 2, Xs) ->
-  strict(arg_types(file, open, 2), Xs,
-	 fun (_) ->
-	     t_sup([t_tuple([t_atom('ok'), t_file_io_device()]),
-		    t_tuple([t_atom('error'), t_file_posix_error()])])
-	 end);
 type(file, read_file, 1, Xs) ->
   strict(arg_types(file, read_file, 1), Xs,
 	 fun (_) ->
@@ -4214,8 +4208,6 @@ arg_types(file, get_cwd, 0) ->
   [];
 arg_types(file, make_dir, 1) ->
   [t_file_name()];
-arg_types(file, open, 2) ->
-  [t_file_name(), t_list(t_file_open_option())];
 arg_types(file, read_file, 1) ->
   [t_file_name()];
 arg_types(file, set_cwd, 1) ->
diff --git a/lib/hipe/cerl/erl_types.erl b/lib/hipe/cerl/erl_types.erl
index 4dd124a457..1ed85af172 100644
--- a/lib/hipe/cerl/erl_types.erl
+++ b/lib/hipe/cerl/erl_types.erl
@@ -2127,7 +2127,8 @@ t_elements(?identifier(IDs)) ->
 t_elements(?list(_, _, _) = T) -> [T];
 t_elements(?number(_, _) = T) ->
   case T of
-    ?number(?any, ?unknown_qual) -> [T]; 
+    ?number(?any, ?unknown_qual) ->
+      [?float, ?integer(?any)];
     ?float -> [T];
     ?integer(?any) -> [T];
     ?int_range(_, _) -> [T];
@@ -2174,10 +2175,10 @@ t_inf(?var(_), T, _Mode) -> subst_all_vars_to_any(T);
 t_inf(T, ?var(_), _Mode) -> subst_all_vars_to_any(T);
 t_inf(?any, T, _Mode) -> subst_all_vars_to_any(T);
 t_inf(T, ?any, _Mode) -> subst_all_vars_to_any(T);
-t_inf(?unit, _, _Mode) -> ?unit;
-t_inf(_, ?unit, _Mode) -> ?unit;
 t_inf(?none, _, _Mode) -> ?none;
 t_inf(_, ?none, _Mode) -> ?none;
+t_inf(?unit, _, _Mode) -> ?unit;	% ?unit cases should appear below ?none
+t_inf(_, ?unit, _Mode) -> ?unit;
 t_inf(T, T, _Mode) -> subst_all_vars_to_any(T);
 t_inf(?atom(Set1), ?atom(Set2), _) ->
   case set_intersection(Set1, Set2) of
@@ -2386,10 +2387,12 @@ inf_tuple_sets(L1, L2, Mode) ->
     List -> ?tuple_set(List)
   end.
 
-inf_tuple_sets([{Arity, Tuples1}|Left1], [{Arity, Tuples2}|Left2], Acc, Mode) ->
+inf_tuple_sets([{Arity, Tuples1}|Ts1], [{Arity, Tuples2}|Ts2], Acc, Mode) ->
   case inf_tuples_in_sets(Tuples1, Tuples2, Mode) of
-    [] -> inf_tuple_sets(Left1, Left2, Acc, Mode);
-    NewTuples -> inf_tuple_sets(Left1, Left2, [{Arity, NewTuples}|Acc], Mode)
+    [] -> inf_tuple_sets(Ts1, Ts2, Acc, Mode);
+    [?tuple_set([{Arity, NewTuples}])] ->
+      inf_tuple_sets(Ts1, Ts2, [{Arity, NewTuples}|Acc], Mode);
+    NewTuples -> inf_tuple_sets(Ts1, Ts2, [{Arity, NewTuples}|Acc], Mode)
   end;
 inf_tuple_sets([{Arity1, _}|Ts1] = L1, [{Arity2, _}|Ts2] = L2, Acc, Mode) ->
   if Arity1 < Arity2 -> inf_tuple_sets(Ts1, L2, Acc, Mode);
@@ -2766,7 +2769,9 @@ t_subtract_list(T, []) ->
 -spec t_subtract(erl_type(), erl_type()) -> erl_type().
 
 t_subtract(_, ?any) -> ?none;
+t_subtract(_, ?var(_)) -> ?none;
 t_subtract(?any, _) -> ?any;
+t_subtract(?var(_) = T, _) -> T;
 t_subtract(T, ?unit) -> T;
 t_subtract(?unit, _) -> ?unit;
 t_subtract(?none, _) -> ?none;
@@ -2922,7 +2927,7 @@ t_subtract(T, ?product(_)) ->
   T;
 t_subtract(?union(U1), ?union(U2)) ->
   subtract_union(U1, U2);
-t_subtract(T1, T2) ->  
+t_subtract(T1, T2) ->
   ?union(U1) = force_union(T1),
   ?union(U2) = force_union(T2),
   subtract_union(U1, U2).
diff --git a/lib/hipe/icode/hipe_icode_callgraph.erl b/lib/hipe/icode/hipe_icode_callgraph.erl
index 95182fc002..3dba8e1071 100644
--- a/lib/hipe/icode/hipe_icode_callgraph.erl
+++ b/lib/hipe/icode/hipe_icode_callgraph.erl
@@ -25,8 +25,6 @@
 %%           in hipe_icode_type.erl.
 %%
 %% Created :  7 Jun 2004 by Tobias Lindahl <[email protected]>
-%%
-%% $Id$
 %%-----------------------------------------------------------------------
 -module(hipe_icode_callgraph).
 
@@ -48,7 +46,7 @@
 
 -type mfa_icode() :: {mfa(), #icode{}}.
 
--record(icode_callgraph, {codedict :: dict(), ordered_sccs :: [[atom()]]}).
+-record(icode_callgraph, {codedict :: dict(), ordered_sccs :: [[mfa()]]}).
 
 %%------------------------------------------------------------------------
 %% Exported functions
@@ -78,7 +76,7 @@ construct_callgraph(List) ->
 
 to_list(#icode_callgraph{codedict = Dict, ordered_sccs = SCCs}) ->
   FlatList = lists:flatten(SCCs),
-  [{Mod, dict:fetch(Mod, Dict)} || Mod <- FlatList].
+  [{MFA, dict:fetch(MFA, Dict)} || MFA <- FlatList].
 
 %%------------------------------------------------------------------------
 
diff --git a/lib/hipe/icode/hipe_icode_range.erl b/lib/hipe/icode/hipe_icode_range.erl
index bcc857acf4..c7e6a451af 100644
--- a/lib/hipe/icode/hipe_icode_range.erl
+++ b/lib/hipe/icode/hipe_icode_range.erl
@@ -843,7 +843,7 @@ compare_with_integer(N, OldVarRange) ->
 
 %%== Ranges ==================================================================
 
--spec pp_ann(#ann{} | erl_types:erl_type()) -> [string()].
+-spec pp_ann(#ann{} | erl_types:erl_type()) -> string().
 
 pp_ann(#ann{range=#range{range=R, other=false}}) ->
   pp_range(R);
@@ -1365,7 +1365,7 @@ range_bnot(Range) ->
   Minus_one = range_init({-1,-1}, false),
   range_add(range_mult(Range, Minus_one), Minus_one).
 
--spec width(range_rep() | integer()) -> 'pos_inf' | non_neg_integer().
+-spec width(range_rep() | inf_integer()) -> 'pos_inf' | non_neg_integer().
 
 width({Min, Max}) -> inf_max([width(Min), width(Max)]);
 width(pos_inf) -> pos_inf;
diff --git a/lib/hipe/main/hipe.erl b/lib/hipe/main/hipe.erl
index c80fb6a0a2..570e4d9d17 100644
--- a/lib/hipe/main/hipe.erl
+++ b/lib/hipe/main/hipe.erl
@@ -1,20 +1,20 @@
 %% -*- erlang-indent-level: 2 -*-
 %%
 %% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
-%% 
+%%
+%% Copyright Ericsson AB 2001-2010. All Rights Reserved.
+%%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved online at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 %% ====================================================================
@@ -25,7 +25,6 @@
 %%  Purpose  :  
 %%  Notes    : 
 %%  History  : * 1998-01-28 Erik Johansson ([email protected]): Created.
-%%  CVS      : $Id$
 %% ====================================================================
 %% @doc This is the direct interface to the HiPE compiler.
 %%
@@ -506,7 +505,7 @@ compile(Name, File, Opts0) ->
       run_compiler(Name, DisasmFun, IcodeFun, NewOpts)
   end.
 
--spec compile_core(mod(), _, compile_file(), comp_options()) ->
+-spec compile_core(mod(), cerl:c_module(), compile_file(), comp_options()) ->
 	 {'ok', compile_ret()} | {'error', term()}.
 
 compile_core(Name, Core0, File, Opts) ->
@@ -535,7 +534,7 @@ compile_core(Name, Core0, File, Opts) ->
 %%
 %% @see compile/3
 
--spec compile(mod(), _, compile_file(), comp_options()) ->
+-spec compile(mod(), cerl:c_module() | [], compile_file(), comp_options()) ->
 	 {'ok', compile_ret()} | {'error', term()}.
 
 compile(Name, [], File, Opts) ->
@@ -790,7 +789,7 @@ finalize_fun(MfaIcodeList, Exports, Opts) ->
     FalseVal when (FalseVal =:= undefined) orelse (FalseVal =:= false) ->
       [finalize_fun_sequential(MFAIcode, Opts, #comp_servers{})
        || {_MFA, _Icode} = MFAIcode <- MfaIcodeList];
-    TrueVal when (TrueVal =:= true) or (TrueVal =:= debug) ->
+    TrueVal when (TrueVal =:= true) orelse (TrueVal =:= debug) ->
       finalize_fun_concurrent(MfaIcodeList, Exports, Opts)
   end.
 
@@ -939,6 +938,8 @@ assemble(CompiledCode, Closures, Exports, Options) ->
       hipe_sparc_assemble:assemble(CompiledCode, Closures, Exports, Options);
     powerpc ->
       hipe_ppc_assemble:assemble(CompiledCode, Closures, Exports, Options);
+    ppc64 ->
+      hipe_ppc_assemble:assemble(CompiledCode, Closures, Exports, Options);
     arm ->
       hipe_arm_assemble:assemble(CompiledCode, Closures, Exports, Options);
     x86 ->
@@ -1048,7 +1049,7 @@ post(Res, Icode, Options) ->
 %% --------------------------------------------------------------------
 
 %% @doc Returns the current HiPE version as a string().
--spec version() -> string().
+-spec version() -> nonempty_string().
 
 version() ->
   ?VERSION_STRING().
@@ -1390,6 +1391,8 @@ o1_opts() ->
       Common;
     powerpc ->
       Common;
+    ppc64 ->
+      Common;
     arm ->
       Common -- [inline_fp]; % Pointless optimising for absent hardware
     x86 ->
@@ -1411,6 +1414,8 @@ o2_opts() ->
       Common;
     powerpc ->
       Common;
+    ppc64 ->
+      Common;
     arm ->
       Common;
     x86 ->
@@ -1429,6 +1434,8 @@ o3_opts() ->
       Common;
     powerpc ->
       Common;
+    ppc64 ->
+      Common;
     arm ->
       Common;
     x86 ->
diff --git a/lib/hipe/main/hipe_main.erl b/lib/hipe/main/hipe_main.erl
index fe9bc83fd2..e81642fb33 100644
--- a/lib/hipe/main/hipe_main.erl
+++ b/lib/hipe/main/hipe_main.erl
@@ -1,20 +1,20 @@
 %% -*- erlang-indent-level: 2 -*-
 %%
 %% %CopyrightBegin%
-%% 
-%% Copyright Ericsson AB 2001-2009. All Rights Reserved.
-%% 
+%%
+%% Copyright Ericsson AB 2001-2010. All Rights Reserved.
+%%
 %% The contents of this file are subject to the Erlang Public License,
 %% Version 1.1, (the "License"); you may not use this file except in
 %% compliance with the License. You should have received a copy of the
 %% Erlang Public License along with this software. If not, it can be
 %% retrieved online at http://www.erlang.org/.
-%% 
+%%
 %% Software distributed under the License is distributed on an "AS IS"
 %% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 %% the License for the specific language governing rights and limitations
 %% under the License.
-%% 
+%%
 %% %CopyrightEnd%
 %%
 %% @doc	This is the HiPE compiler's main "loop".
@@ -102,7 +102,7 @@ compile_icode(MFA, LinearIcode0, Options, Servers, DebugState) ->
   ?opt_start_timer("Icode"),
   LinearIcode1 = icode_no_comment(LinearIcode0, Options),
   IcodeCfg0 = icode_linear_to_cfg(LinearIcode1, Options),
-  %%hipe_icode_cfg:pp(IcodeCfg1),
+  %% hipe_icode_cfg:pp(IcodeCfg0),
   IcodeCfg1 = icode_handle_exceptions(IcodeCfg0, MFA, Options),
   IcodeCfg3 = icode_inline_bifs(IcodeCfg1, Options),
   pp(IcodeCfg3, MFA, icode, pp_icode, Options, Servers),
diff --git a/lib/hipe/rtl/hipe_rtl.erl b/lib/hipe/rtl/hipe_rtl.erl
index ef06b2abf8..d93f423f0c 100644
--- a/lib/hipe/rtl/hipe_rtl.erl
+++ b/lib/hipe/rtl/hipe_rtl.erl
@@ -354,6 +354,8 @@
          phi_arglist_update/2,
          phi_redirect_pred/3]).
 
+-export_type([alub_cond/0]).
+
 %%
 %% RTL
 %%
@@ -590,6 +592,9 @@ branch_pred(#branch{p=P}) -> P.
 %% alub
 %%
 
+-type alub_cond() :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le'
+                   | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'.
+
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False) ->
   mk_alub(Dst, Src1, Op, Src2, Cond, True, False, 0.5).
 mk_alub(Dst, Src1, Op, Src2, Cond, True, False, P) ->
diff --git a/lib/hipe/rtl/hipe_rtl_arith.inc b/lib/hipe/rtl/hipe_rtl_arith.inc
index 31fedd927e..9e80fa5e13 100644
--- a/lib/hipe/rtl/hipe_rtl_arith.inc
+++ b/lib/hipe/rtl/hipe_rtl_arith.inc
@@ -119,7 +119,8 @@ eval_alu(Op, Arg1, Arg2) ->
 %% there are cases where we can evaluate a subset of the bits, but can
 %% not do a full eval-alub call (eg. a + 0 gives no carry)
 %%
--spec eval_cond_bits(atom(), boolean(), boolean(), boolean(), boolean()) -> boolean().
+-spec eval_cond_bits(hipe_rtl:alub_cond(), boolean(),
+		     boolean(), boolean(), boolean()) -> boolean().
 
 eval_cond_bits(Cond, N, Z, V, C) ->
   case Cond of
@@ -146,9 +147,7 @@ eval_cond_bits(Cond, N, Z, V, C) ->
     'overflow' ->
       V;
     'not_overflow' ->
-      not V;
-    _ ->
-      ?EXIT({'condition code not handled',Cond})
+      not V
   end.
 
 eval_alub(Op, Cond, Arg1, Arg2) ->
diff --git a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
index 76c0a88933..64d723d15d 100644
--- a/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
+++ b/lib/hipe/rtl/hipe_rtl_ssa_const_prop.erl
@@ -93,8 +93,6 @@
 -include("../ssa/hipe_ssa_const_prop.inc").
 
 -type bool_lattice() :: 'true' | 'false' | 'top' | 'bottom'.
--type conditional()  :: 'eq' | 'ne' | 'ge' | 'geu' | 'gt' | 'gtu' | 'le'
-                      | 'leu' | 'lt' | 'ltu' | 'overflow' | 'not_overflow'.
 
 %%-----------------------------------------------------------------------------
 %% Procedure : visit_expression/2
@@ -400,7 +398,7 @@ maybe_top_or_bottom([top | Rest], _) -> maybe_top_or_bottom(Rest, top);
 maybe_top_or_bottom([bottom | _], _) -> bottom;
 maybe_top_or_bottom([_ | Rest],  TB) -> maybe_top_or_bottom(Rest, TB).
 
--spec partial_eval_branch(conditional(), bool_lattice(), bool_lattice(),
+-spec partial_eval_branch(hipe_rtl:alub_cond(), bool_lattice(), bool_lattice(),
 			  bool_lattice() | 0, bool_lattice() | 0) ->
 	 bool_lattice().
 partial_eval_branch(Cond, N0, Z0, V0, C0) ->
@@ -441,14 +439,14 @@ visit_alub(Inst, Env) ->
                  hipe_rtl:alub_false_label(Inst)];
       top    -> [];
       _      ->
-        %if the partial branch cannot be evaluated we must execute the 
-        % instruction at runtime.
+        %% if the partial branch cannot be evaluated we must execute the
+        %% instruction at runtime.
         case partial_eval_branch(hipe_rtl:alub_cond(Inst), N, Z, C, V) of
           bottom -> [hipe_rtl:alub_true_label(Inst), 
                      hipe_rtl:alub_false_label(Inst)];
           top    -> [];
-          true   -> [hipe_rtl:alub_true_label(Inst) ];
-          false  -> [hipe_rtl:alub_false_label(Inst) ]
+          true   -> [hipe_rtl:alub_true_label(Inst)];
+          false  -> [hipe_rtl:alub_false_label(Inst)]
         end
      end,
   {[], NewSSA, NewEnv} = set_to(hipe_rtl:alub_dst(Inst), NewVal,  Env),
@@ -944,8 +942,8 @@ update_branch(Inst, Env) ->
 
 %% some small helpers.
 alub_to_move(Inst, Res, Lab) ->
-  [ hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res),
-    hipe_rtl:mk_goto(Lab) ].
+  [hipe_rtl:mk_move(hipe_rtl:alub_dst(Inst), Res),
+   hipe_rtl:mk_goto(Lab)].
 
 make_alub_subst_list(bottom, _, Tail) ->  Tail;
 make_alub_subst_list(top, Src, _) ->
@@ -970,13 +968,13 @@ update_alub(Inst, Env) ->
       %% move and the branch. We can however replace variable with constants:
       S1 = make_alub_subst_list(Val1, Src1, []),
       S2 = make_alub_subst_list(Val2, Src2, S1),
-      [ hipe_rtl:subst_uses(S2, Inst) ];
-    _ -> % we know where we will be going, let's find out what Dst should be.
-         % knowing where we are going means that at most one of the values is
-         % bottom, hence we can replace the alu-instr with a move. 
-         % remember, a = b + 0 can give us enough info to know what jump to 
-         % do without knowing the value of a. (I wonder if this will ever 
-         % actualy happen ;) 
+      [hipe_rtl:subst_uses(S2, Inst)];
+    _ -> %% we know where we will be going, let's find out what Dst should be.
+         %% knowing where we are going means that at most one of the values is
+         %% bottom, hence we can replace the alu-instr with a move.
+         %% remember, a = b + 0 can give us enough info to know what jump to
+         %% do without knowing the value of a. (I wonder if this will ever
+         %% actualy happen ;)
       Res = case ResVal of 
               bottom ->  % something nonconstant.
                 if (Val1 =:= bottom) -> Src1;
@@ -985,11 +983,12 @@ update_alub(Inst, Env) ->
               _ -> hipe_rtl:mk_imm(ResVal)
             end,
       case CondRes of 
-        top -> io:format("oops. something VERY bad: ~w ~w V1 & 2 ~w ~w\n", 
-	                 [Inst, {ResVal, N, Z, C, V} , Val1, Val2]),
-           [Inst ];
-        true   -> alub_to_move(Inst, Res, hipe_rtl:alub_true_label(Inst));
-        false  -> alub_to_move(Inst, Res, hipe_rtl:alub_false_label(Inst))
+        top ->
+	  io:format("oops. something VERY bad: ~w ~w V1 & 2 ~w ~w\n",
+		    [Inst, {ResVal, N, Z, C, V} , Val1, Val2]),
+	  [Inst];
+        true  -> alub_to_move(Inst, Res, hipe_rtl:alub_true_label(Inst));
+        false -> alub_to_move(Inst, Res, hipe_rtl:alub_false_label(Inst))
       end
   end.
 
@@ -1050,7 +1049,7 @@ update_phi(Instruction, Environment) ->
 
 %%-----------------------------------------------------------------------------
 
-%% make sure that all precoloured rgisters are taken out of the equation.
+%% make sure that all precoloured registers are taken out of the equation.
 lookup_lattice_value(X, Environment) ->
   case hipe_rtl_arch:is_precoloured(X) or hipe_rtl:is_const_label(X) of 
     true ->
diff --git a/lib/hipe/tools/hipe_tool.erl b/lib/hipe/tools/hipe_tool.erl
index a1bd79895d..990805ceca 100644
--- a/lib/hipe/tools/hipe_tool.erl
+++ b/lib/hipe/tools/hipe_tool.erl
@@ -56,9 +56,9 @@
 
 -record(state, {win_created = false	:: boolean(),
 		mindex = 0		:: integer(),
-		mod			:: module(),
+		mod			:: atom(),
 		funs = []		:: [fa()],
-		mods = [] 		:: [module()],
+		mods = [] 		:: [atom()],
 		options = [o2]		:: comp_options(),
 		compiling = false	:: 'false' | pid()
 	       }).
@@ -291,8 +291,7 @@ update_code_listbox(State) ->
 		      integer_to_list(length(Mods))++")"),
 	  catch gs:config(code_listbox, [{data, Mods},
 					 {items, Mods},
-					 {selection, 0}
-					]),
+					 {selection, 0}]),
 	  update_module_box(State#state{mods = Mods}, 0, Mods, "")  
       end
   end.
@@ -367,7 +366,7 @@ update_text(Lab, Text) ->
 %% @doc Returns a list of all loaded modules. 
 %%---------------------------------------------------------------------
 
--spec mods() -> [module()].
+-spec mods() -> [atom()].
 
 mods() ->
   [Mod || {Mod,_File} <- code:all_loaded()].
@@ -382,25 +381,26 @@ funs(Mod) ->
 native_code(Mod) ->
   Mod:module_info(native_addresses).
 
--spec mfas(module(), [fa()]) -> [mfa()].
+-spec mfas(atom(), [fa()]) -> [mfa()].
 
 mfas(Mod, Funs) ->
   [{Mod,F,A} || {F,A} <- Funs].
 
--spec fun_names(module(), [fa()], [fa_address()], boolean()) -> string().
+-spec fun_names(atom(), [fa()], [fa_address()], boolean()) -> [string()].
 
 fun_names(M, Funs, NativeCode, Prof) ->
-  [list_to_atom(atom_to_list(F) ++ "/" ++ integer_to_list(A) ++
-		(case in_native(F, A, NativeCode) of
-		   true -> " [native] ";
-		   false -> ""
-		 end)
-		++
-		if Prof -> 
-		    (catch integer_to_list(hipe_bifs:call_count_get({M,F,A})));
-		   true -> ""
-		end) ||
-      {F,A} <- Funs].
+  [atom_to_list(F) ++ "/" ++ integer_to_list(A)
+   ++
+     (case in_native(F, A, NativeCode) of
+	true -> " [native] ";
+	false -> ""
+      end)
+   ++
+     if Prof ->
+	 (catch integer_to_list(hipe_bifs:call_count_get({M,F,A})));
+	true -> ""
+     end
+   || {F,A} <- Funs].
 
 -spec in_native(atom(), arity(), [fa_address()]) -> boolean().
 
@@ -461,7 +461,7 @@ get_compile(Info) ->
     false -> []
   end.
 
--spec is_profiled(module()) -> boolean().
+-spec is_profiled(atom()) -> boolean().
 
 is_profiled(Mod) ->
   case hipe_bifs:call_count_get({Mod,module_info,0}) of
@@ -478,7 +478,7 @@ compile(State) ->
   P = spawn(fun() -> c(Parent, State#state.mod, State#state.options) end),
   State#state{compiling = P}.
 
--spec c(pid(), module(), comp_options()) -> 'ok'.
+-spec c(pid(), atom(), comp_options()) -> 'ok'.
 
 c(Parent, Mod, Options) ->
   Res = hipe:c(Mod, Options),
diff --git a/lib/inets/doc/src/mod_auth.xml b/lib/inets/doc/src/mod_auth.xml
index f3628c8297..9503add2e0 100644
--- a/lib/inets/doc/src/mod_auth.xml
+++ b/lib/inets/doc/src/mod_auth.xml
@@ -111,7 +111,8 @@
       </desc>
     </func>
     <func>
-      <name>list_users(Options) -> {ok, Users} | {error, Reason} &lt;name>list_users(Port, Dir) -> {ok, Users} | {error, Reason}</name>
+      <name>list_users(Options) -> {ok, Users} | {error, Reason}</name>
+      <name>list_users(Port, Dir) -> {ok, Users} | {error, Reason}</name>
       <name>list_users(Address, Port, Dir) -> {ok, Users} | {error, Reason}</name>
       <fsummary>List users in the user database.</fsummary>
       <type>
diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml
index 2044b074ee..64cdd3a8ea 100644
--- a/lib/kernel/doc/src/file.xml
+++ b/lib/kernel/doc/src/file.xml
@@ -603,8 +603,10 @@ f.txt:  {person, "kalle", 25}.
       <type>
         <v>Filename = name()</v>
         <v>Modes = [Mode]</v>
-        <v>&nbsp;Mode = read | write | append | exclusive | raw | binary | {delayed_write, Size, Delay} | delayed_write | {read_ahead, Size} | read_ahead | compressed</v>
+        <v>&nbsp;Mode = read | write | append | exclusive | raw | binary | {delayed_write, Size, Delay} | delayed_write | {read_ahead, Size} | read_ahead | compressed | {encoding, Encoding}</v>
         <v>&nbsp;&nbsp;Size = Delay = int()</v>
+	<v>&nbsp;&nbsp;Encoding = latin1 | unicode | utf8 | utf16 | {utf16, Endian} | utf32 | {utf32, Endian}</v>
+        <v>&nbsp;&nbsp;&nbsp;&nbsp;Endian = big | little</v>
         <v>IoDevice = io_device()</v>
         <v>Reason = ext_posix() | system_limit</v>
       </type>
diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl
index cffe4e3db5..97d914b043 100644
--- a/lib/kernel/src/file.erl
+++ b/lib/kernel/src/file.erl
@@ -81,19 +81,28 @@
 -type io_device() :: pid() | fd().
 -type location()  :: integer() | {'bof', integer()} | {'cur', integer()}
 		   | {'eof', integer()} | 'bof' | 'cur' | 'eof'.
--type mode()      :: 'read' | 'write' | 'append' | 'raw' | 'binary' | 
-		     {'delayed_write', non_neg_integer(), non_neg_integer()} | 
-		     'delayed_write' | {'read_ahead', pos_integer()} | 
-		     'read_ahead' | 'compressed' | 'exclusive'.
+-type mode()      :: 'read' | 'write' | 'append'
+                   | 'exclusive' | 'raw' | 'binary'
+		   | {'delayed_write', non_neg_integer(), non_neg_integer()}
+		   | 'delayed_write' | {'read_ahead', pos_integer()}
+		   | 'read_ahead' | 'compressed'
+		   | {'encoding', unicode:encoding()}.
 -type name()      :: string() | atom() | [name()].
--type posix()     :: atom().
+-type posix()     :: 'eacces'  | 'eagain'  | 'ebadf'   | 'ebusy'  | 'edquot'
+		   | 'eexist'  | 'efault'  | 'efbig'   | 'eintr'  | 'einval'
+		   | 'eio'     | 'eisdir'  | 'eloop'   | 'emfile' | 'emlink'
+		   | 'enametoolong'
+		   | 'enfile'  | 'enodev'  | 'enoent'  | 'enomem' | 'enospc'
+		   | 'enotblk' | 'enotdir' | 'enotsup' | 'enxio'  | 'eperm'
+		   | 'epipe'   | 'erofs'   | 'espipe'  | 'esrch'  | 'estale'
+		   | 'exdev'.
 -type bindings()  :: any().
 
 -type date()      :: {pos_integer(), pos_integer(), pos_integer()}.
 -type time()      :: {non_neg_integer(), non_neg_integer(), non_neg_integer()}.
 -type date_time() :: {date(), time()}.
--type posix_file_advise() :: 'normal' | 'sequential' | 'random' | 'no_reuse' |
-                            'will_need' | 'dont_need'.
+-type posix_file_advise() :: 'normal' | 'sequential' | 'random'
+                           | 'no_reuse' | 'will_need' | 'dont_need'.
 
 %%%-----------------------------------------------------------------
 %%% General functions
@@ -286,7 +295,7 @@ raw_write_file_info(Name, #file_info{} = Info) ->
 %% Contemporary mode specification - list of options
 
 -spec open(Name :: name(), Modes :: [mode()]) ->
-	{'ok', io_device()} | {'error', posix()}.
+	{'ok', io_device()} | {'error', posix() | 'system_limit'}.
 
 open(Item, ModeList) when is_list(ModeList) ->
     case lists:member(raw, ModeList) of
diff --git a/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc
index 0714c7b645..2e2cc386b7 100644
--- a/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc
+++ b/lib/mnesia/doc/src/Mnesia_chap2.xmlsrc
@@ -235,9 +235,7 @@
         <seealso marker="Mnesia_chap3#start_mnesia">Starting Mnesia</seealso>.
         </item>
       </list>
-      <p>Continuing the dialogue with the Erlang shell will produce the following
-        the following:
-        </p>
+      <p>Continuing the dialogue with the Erlang shell will produce the following:</p>
       <pre><![CDATA[
         3> company:init().
         {atomic,ok}
@@ -418,7 +416,7 @@ In_proj</tcaption>
         interchangeably throughout this book. 
         </p>
       <p>A Mnesia table is populated by Mnesia records. For example,
-        the tuple <c>{boss, klacke, bjarne}</c> is an record.  The
+        the tuple <c>{boss, klacke, bjarne}</c> is a record.  The
         second element in this tuple is the key. In order to uniquely
         identify a table row both the key and the table name is
         needed. The term <em>object identifier</em>, 
@@ -553,7 +551,7 @@ In_proj</tcaption>
           stored in the database: 
           </p>
         <pre>
-\011  mnesia:select(employee, [{#employee{sex = female, name = '$1', _ = '_'},[], ['$1']}]).
+mnesia:select(employee, [{#employee{sex = female, name = '$1', _ = '_'},[], ['$1']}]).
         </pre>
         <p>Select must always run within an activity such as a
           transaction. To be able to call from the shell we might
@@ -587,8 +585,8 @@ In_proj</tcaption>
           </p>
         <pre>
           Q = qlc:q([E#employee.name || E <![CDATA[<-]]> mnesia:table(employee),
-\011                                E#employee.sex == female]),
-\011  qlc:e(Q),
+                                E#employee.sex == female]),
+          qlc:e(Q),
         </pre>
         <p>Accessing mnesia tables from a QLC list comprehension must
           always be done within a transaction. Consider the following
diff --git a/lib/mnesia/doc/src/Mnesia_chap3.xml b/lib/mnesia/doc/src/Mnesia_chap3.xml
index 9a382bcb5a..2db9af9cf7 100644
--- a/lib/mnesia/doc/src/Mnesia_chap3.xml
+++ b/lib/mnesia/doc/src/Mnesia_chap3.xml
@@ -132,7 +132,7 @@
             function changes the format on all records in table
             <c>Tab</c>. It applies the argument <c>Fun</c> to all
             records in the table. <c>Fun</c> shall be a function which
-            takes an record of the old type, and returns the record of the new
+            takes a record of the old type, and returns the record of the new
             type. The table key may not be changed.</p>
           <code type="none">
 -record(old, {key, val}).
@@ -418,8 +418,8 @@ skeppet %<input>erl -sname b -mnesia dir '"/ldisc/scratch/Mnesia.company"'</inpu
               type <c>set</c> and <c>bag</c>: </p>
             <pre>
  f() -> F =  fun() ->
-\011      mnesia:write({foo, 1, 2}), mnesia:write({foo, 1, 3}),
-\011      mnesia:read({foo, 1}) end, mnesia:transaction(F).             </pre>
+     mnesia:write({foo, 1, 2}), mnesia:write({foo, 1, 3}),
+     mnesia:read({foo, 1}) end, mnesia:transaction(F).             </pre>
             <p>This transaction will return the list <c>[{foo,1,3}]</c> if
               the <c>foo</c> table is of type <c>set</c>.  However, list
               <c>[{foo,1,2}, {foo,1,3}]</c> will return if the table is
diff --git a/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc
index 7d89c1b0dd..6e8055326b 100644
--- a/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc
+++ b/lib/mnesia/doc/src/Mnesia_chap4.xmlsrc
@@ -514,13 +514,13 @@ The behavior is undefined if any process perform a write
        of the table itself. This is an implementation detail, but remember
        the dirty functions are low level functions. 
       </item>
-      <item><c>mnesia:dirty_last(Tab)</c> This function works exactly as
+      <item><c>mnesia:dirty_last(Tab)</c> This function works exactly like
       <c>mnesia:dirty_first/1</c> but returns the last object in
        Erlang  term  order for the <c>ordered_set</c> table type. For
        all other table types, <c>mnesia:dirty_first/1</c> and 
       <c>mnesia:dirty_last/1</c> are synonyms.
       </item>
-      <item><c>mnesia:dirty_prev(Tab, Key)</c> This function works exactly as
+      <item><c>mnesia:dirty_prev(Tab, Key)</c> This function works exactly like
       <c>mnesia:dirty_next/2</c> but returns the previous object in
        Erlang term  order for the ordered_set table type. For
        all other table types, <c>mnesia:dirty_next/2</c> and 
diff --git a/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc
index 1c7e3662e1..30a8991465 100644
--- a/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc
+++ b/lib/mnesia/doc/src/Mnesia_chap5.xmlsrc
@@ -335,7 +335,7 @@ ok
             explicitly be set at table creation. The default is
             <c>0</c>, but if <c>n_disc_copies</c> and
             <c>n_disc_only_copies</c> also are <c>0</c>,
-            <c>n_ram_copies</c>\011will default be set to <c>1</c>. 
+            <c>n_ram_copies</c> will default be set to <c>1</c>.
             </p>
         </item>
         <tag><c>{n_disc_copies, Int}</c></tag>
@@ -408,7 +408,7 @@ ok
 (a@sam)4> SecProps = [{foreign_key, {prim_dict, sec_val}}].
 [{foreign_key,{prim_dict,sec_val}}]
 (a@sam)5> mnesia:create_table(sec_dict, 
-\011                      [{frag_properties, SecProps},
+                              [{frag_properties, SecProps},
 (a@sam)5>                      {attributes, [sec_key, sec_val]}]).
 {atomic,ok}
 (a@sam)6> Write = fun(Rec) -> mnesia:write(Rec) end.
@@ -418,23 +418,23 @@ ok
 (a@sam)8> SecKey = 42.
 42
 (a@sam)9> mnesia:activity(sync_dirty, Write,
-\011\011          [{prim_dict, PrimKey, -11}], mnesia_frag).
+                          [{prim_dict, PrimKey, -11}], mnesia_frag).
 ok
 (a@sam)10> mnesia:activity(sync_dirty, Write,
-\011\011           [{sec_dict, SecKey, PrimKey}], mnesia_frag).
+                           [{sec_dict, SecKey, PrimKey}], mnesia_frag).
 ok
 (a@sam)11> mnesia:change_table_frag(prim_dict, {add_frag, [node()]}).
 {atomic,ok}
 (a@sam)12> SecRead = fun(PrimKey, SecKey) ->
-\011\011       mnesia:read({sec_dict, PrimKey}, SecKey, read) end.
+               mnesia:read({sec_dict, PrimKey}, SecKey, read) end.
 #Fun<erl_eval>
 (a@sam)13> mnesia:activity(transaction, SecRead,
-\011\011           [PrimKey, SecKey], mnesia_frag).
+                           [PrimKey, SecKey], mnesia_frag).
 [{sec_dict,42,11}]
 (a@sam)14> Info = fun(Tab, Item) -> mnesia:table_info(Tab, Item) end.
 #Fun<erl_eval>
 (a@sam)15> mnesia:activity(sync_dirty, Info,
-\011\011           [prim_dict, frag_size], mnesia_frag).
+                           [prim_dict, frag_size], mnesia_frag).
 [{prim_dict,0},
  {prim_dict_frag2,0},
  {prim_dict_frag3,0},
@@ -444,7 +444,7 @@ ok
  {prim_dict_frag7,0},
  {prim_dict_frag8,0}]
 (a@sam)16> mnesia:activity(sync_dirty, Info,
-\011\011           [sec_dict, frag_size], mnesia_frag).
+                           [sec_dict, frag_size], mnesia_frag).
 [{sec_dict,0},
  {sec_dict_frag2,0},
  {sec_dict_frag3,0},
@@ -1051,7 +1051,7 @@ ok
            ActivityID will be received.  Note that this event may still be received even
            if no table events with a corresponding ActivityID were received, depending on
            the tables to which the receiving process is subscribed.</p>
-	 <p>Dirty operations always only contain one update and thus no activity event is sent.</p>
+         <p>Dirty operations always only contain one update and thus no activity event is sent.</p>
        </item>
      </taglist>
     </section>
diff --git a/lib/mnesia/doc/src/mnesia.xml b/lib/mnesia/doc/src/mnesia.xml
index 5d3bcf830e..16e78ea0af 100644
--- a/lib/mnesia/doc/src/mnesia.xml
+++ b/lib/mnesia/doc/src/mnesia.xml
@@ -799,7 +799,7 @@ mnesia:change_table_copy_type(person, node(), disc_copies)
           </item>
           <item>
             <p><c>{local_content, Bool}</c>, where <c>Bool</c> must be
-              either <c>true</c> or <c>false</c>. The default value is <c>false</c>.\011      </p>
+              either <c>true</c> or <c>false</c>. The default value is <c>false</c>.</p>
           </item>
         </list>
         <p>For example, the following call creates the <c>person</c> table
@@ -1022,7 +1022,7 @@ mnesia:create_table(person,
       <name>dirty_last(Tab) ->  Key | exit({aborted, Reason}) </name>
       <fsummary>Return the key for the last record in a table.</fsummary>
       <desc>
-        <p>This function works exactly 
+        <p>This function works exactly like
           <c>mnesia:dirty_first/1</c> but returns the last object in
           Erlang  term  order for the <c>ordered_set</c> table type. For
           all other table types, <c>mnesia:dirty_first/1</c> and 
@@ -1063,11 +1063,11 @@ mnesia:create_table(person,
       <name>dirty_prev(Tab, Key) -> Key | exit({aborted, Reason}) </name>
       <fsummary>Return the previous key in a table. </fsummary>
       <desc>
-        <p>This function works exactly 
+        <p>This function works exactly like
           <c>mnesia:dirty_next/2</c> but returns the previous object in
           Erlang term  order for the ordered_set table type. For
           all other table types, <c>mnesia:dirty_next/2</c> and 
-          <c>mnesia:dirty_prev/2</c> are synonyms.\011  </p>
+          <c>mnesia:dirty_prev/2</c> are synonyms.</p>
       </desc>
     </func>
     <func>
@@ -1334,7 +1334,7 @@ mnesia:create_table(person,
       <name>foldr(Function, Acc, Table) -> NewAcc | transaction abort </name>
       <fsummary>Call Function for each record in Table  </fsummary>
       <desc>
-        <p>This function works exactly as 
+        <p>This function works exactly like
           <c>foldl/3</c> but iterates the table in the opposite order 
           for the <c>ordered_set</c> table type. For
           all other table types, <c>foldr/3</c> and 
@@ -1512,14 +1512,14 @@ mnesia:create_table(person,
       <fsummary>Check if code is running in a transaction.</fsummary>
       <desc>
         <p>When this function is executed inside a transaction context
-	  it returns <c>true</c>, otherwise <c>false</c>.</p>
+          it returns <c>true</c>, otherwise <c>false</c>.</p>
       </desc>
     </func>
     <func>
       <name>last(Tab) ->  Key | transaction abort </name>
       <fsummary>Return the key for the last record in a table.</fsummary>
       <desc>
-        <p>This function works exactly 
+        <p>This function works exactly like
           <c>mnesia:first/1</c> but returns the last object in
           Erlang  term  order for the <c>ordered_set</c> table type. For
           all other table types, <c>mnesia:first/1</c> and 
@@ -1698,11 +1698,11 @@ mnesia:create_table(person,
       <name>prev(Tab, Key) -> Key | transaction abort </name>
       <fsummary>Return the previous key in a table. </fsummary>
       <desc>
-        <p>This function works exactly 
+        <p>This function works exactly like
           <c>mnesia:next/2</c> but returns the previous object in
           Erlang term  order for the ordered_set table type. For
           all other table types, <c>mnesia:next/2</c> and 
-          <c>mnesia:prev/2</c> are synonyms.\011  </p>
+          <c>mnesia:prev/2</c> are synonyms.</p>
       </desc>
     </func>
     <func>
@@ -1891,10 +1891,10 @@ mnesia:create_table(person,
         <p>For example to find the names of all male persons with an age over 30 in table
           Tab do:</p>
         <code type="none">
-\011  MatchHead = #person{name='$1', sex=male, age='$2', _='_'},
-\011  Guard = {'>', '$2', 30},
-\011  Result = '$1',
-\011  mnesia:select(Tab,[{MatchHead, [Guard], [Result]}]),
+MatchHead = #person{name='$1', sex=male, age='$2', _='_'},
+Guard = {'>', '$2', 30},
+Result = '$1',
+mnesia:select(Tab,[{MatchHead, [Guard], [Result]}]),
         </code>
       </desc>
     </func>
@@ -2835,7 +2835,7 @@ raise(Name, Amount) ->
     </func>
     <func>
       <name>write(Tab, Record, LockKind) -> transaction abort | ok </name>
-      <fsummary>Write an record into the database.</fsummary>
+      <fsummary>Write a record into the database.</fsummary>
       <desc>
         <p>Writes the record <c>Record</c> to the table <c>Tab</c>.
           </p>
diff --git a/lib/stdlib/doc/src/re.xml b/lib/stdlib/doc/src/re.xml
index 80adc3e347..056e7bc9b9 100644
--- a/lib/stdlib/doc/src/re.xml
+++ b/lib/stdlib/doc/src/re.xml
@@ -37,29 +37,24 @@
   <modulesummary>Perl like regular expressions for Erlang</modulesummary>
   <description>
 
-    <p>This module contains functions for regular expression
-      matching for strings and binaries.</p>
+    <p>This module contains regular expression matching functions for
+    strings and binaries.</p>
 
     <p>The regular expression syntax and semantics resemble that of
-    Perl.  This library in many ways replaces the old regexp library
-    written purely in Erlang, as it has a richer syntax as well as
-    many more options. The library is also faster than the
-    older regexp implementation.</p>
-
-    <p>Although the library's matching algorithms are currently based
-    on the PCRE library, it is not to be viewed as an Erlang to PCRE
-    mapping. Only parts of the PCRE library is interfaced and the re
-    library in some ways extend PCRE. The PCRE documentation contains
-    many parts of no interest to the Erlang programmer, why only the
-    relevant part of the documentation is included here. There should
-    bee no need to go directly to the PCRE library documentation.</p>
+    Perl. This library replaces the deprecated pure-Erlang regexp
+    library; it has a richer syntax, more options and is faster.</p>
+
+    <p>The library's matching algorithms are currently based on the
+    PCRE library, but not all of the PCRE library is interfaced and
+    some parts of the library go beyond what PCRE offers. The sections of
+    the PCRE documentation which are relevant to this module are included
+    here.</p>
 
     <note>
-    <p>The Erlang literal syntax for strings give special
-    meaning to the &quot;\&quot; (backslash) character. To literally write
-    a regular expression or a replacement string containing a
-    backslash in your code or in the shell, two backslashes have to be written:
-    &quot;\\&quot;.</p>
+    <p>The Erlang literal syntax for strings uses the &quot;\&quot;
+    (backslash) character as an escape code. You need to escape
+    backslashes in literal strings, both in your code and in the shell,
+    with an additional backslash, i.e.: &quot;\\&quot;.</p>
     </note>
 
 
@@ -72,7 +67,7 @@
       - a binary is allowed as the tail of the list</code>
       <code type="none">
     unicode_binary() = binary() with characters encoded in UTF-8 coding standard
-    unicode_char() = integer() representing valid unicode codepoint
+    unicode_char() = integer() representing a valid unicode codepoint
 
     chardata() = charlist() | unicode_binary()
 
@@ -82,9 +77,9 @@
       <code type="none">
     mp() = Opaque datatype containing a compiled regular expression.
       - The mp() is guaranteed to be a tuple() having the atom
-	're_pattern' as it's first element, to allow for matching in
+	're_pattern' as its first element, to allow for matching in
         guards. The arity of the tuple() or the content of the other fields
-	is however not to be trusted.</code>
+	may change in future releases.</code>
   </section>
   <funcs>
     <func>
@@ -132,7 +127,7 @@
       <tag><c>dollar_endonly</c></tag>
       <item>A dollar metacharacter in the pattern matches only at the end of the subject string. Without this option, a dollar also matches immediately before a newline at the end of the string (but not before any other newlines). The <c>dollar_endonly</c> option is ignored if <c>multiline</c> is given. There is no equivalent option in Perl, and no way to set it within a pattern.</item>
       <tag><c>dotall</c></tag>
-      <item>A dot maturate in the pattern matches all characters, including those that indicate newline. Without it, a dot does not match when the current position is at a newline. This option is equivalent to Perl's /s option, and it can be changed within a pattern by a (?s) option setting. A negative class such as [^a] always matches newline characters, independent of the setting of this option.</item>
+      <item>A dot in the pattern matches all characters, including those that indicate newline. Without it, a dot does not match when the current position is at a newline. This option is equivalent to Perl's /s option, and it can be changed within a pattern by a (?s) option setting. A negative class such as [^a] always matches newline characters, independent of this option's setting.</item>
       <tag><c>extended</c></tag>
       <item>Whitespace data characters in the pattern are ignored except when escaped or inside a character class. Whitespace does not include the VT character (ASCII 11). In addition, characters between an unescaped # outside a character class and the next newline, inclusive, are also ignored. This is equivalent to Perl's /x option, and it can be changed within a pattern by a (?x) option setting.
 
@@ -214,9 +209,10 @@ This option makes it possible to include comments inside complicated patterns. N
       or as a pre compiled <c>mp()</c> in which case it is executed
       against the subject directly.</p>
 
-      <p>When compilation is involved, the exception <c>badarg</c> is thrown if
-      a compilation error occurs. To locate the error in the regular
-      expression, use the function <c>re:compile/2</c> to get more information.</p>
+      <p>When compilation is involved, the exception <c>badarg</c> is
+      thrown if a compilation error occurs. Call <c>re:compile/2</c>
+      to get information about the location of the error in the
+      regular expression.</p>
 
       <p>If the regular expression is previously compiled, the option
       list can only contain the options <c>anchored</c>,
@@ -246,7 +242,7 @@ This option makes it possible to include comments inside complicated patterns. N
       how captured substrings are to be returned (as index tuples,
       lists or binaries). The <c>capture</c> option makes the function
       quite flexible and powerful. The different options are described
-      in detail below</p>
+      in detail below.</p>
 
       <p>If the capture options describe that no substring capturing
       at all is to be done (<c>{capture, none}</c>), the function will
@@ -256,7 +252,7 @@ This option makes it possible to include comments inside complicated patterns. N
       be done either by specifying <c>none</c> or an empty list as
       <c>ValueSpec</c>.</p>
 
-      <p>A description of all the options relevant for execution follows:</p>
+      <p>The options relevant for execution are:</p>
 
       <taglist>
       <tag><c>anchored</c></tag>
@@ -270,27 +266,25 @@ This option makes it possible to include comments inside complicated patterns. N
       <tag><c>global</c></tag>
       <item>
 
-      <p>Implements global (repetitive) search as the <c>g</c> flag in
-      i.e. Perl. Each match found is returned as a separate
+      <p>Implements global (repetitive) search (the <c>g</c> flag in
+      Perl). Each match is returned as a separate
       <c>list()</c> containing the specific match as well as any
       matching subexpressions (or as specified by the <c>capture
       option</c>). The <c>Captured</c> part of the return value will
-      hence be a <c>list()</c> of <c>list()</c>'s when this
+      hence be a <c>list()</c> of <c>list()</c>s when this
       option is given.</p>
 
-      <p>When the regular expression matches an empty string, the
-      behaviour might seem non-intuitive, why the behaviour requites
-      some clarifying.  With the global option, <c>re:run/3</c>
-      handles empty matches in the same way as Perl, meaning that a
-      match at any point giving an empty string (with length 0) will
-      be retried with the options
-      <c>[anchored, notempty]</c> as well. If that 
-      search gives a result of length &gt; 0, the result is included. 
-      An example:</p>
+      <p>The interaction of the global option with a regular
+      expression which matches an empty string surprises some users.
+      When the global option is given, <c>re:run/3</c> handles empty
+      matches in the same way as Perl: a zero-length match at any
+      point will be retried with the options <c>[anchored,
+      notempty]</c> as well. If that search gives a result of length
+      &gt; 0, the result is included.  For example:</p>
       
 <code>    re:run("cat","(|at)",[global]).</code>
 
-      <p>The matching will be performed as following:</p>
+      <p>The following matching will be performed:</p>
       <taglist>
       <tag>At offset <c>0</c></tag>
       <item>The regexp <c>(|at)</c> will first match at the initial
@@ -302,11 +296,11 @@ This option makes it possible to include comments inside complicated patterns. N
       <item> The search is retried
       with the options <c>[anchored, notempty]</c> at the same
       position, which does not give any interesting result of longer
-      length, why the search position is now advanced to the next
+      length, so the search position is now advanced to the next
       character (<c>a</c>).</item>
       <tag>At offset <c>1</c></tag>
-      <item>Now the search results in
-      <c>[{1,0},{1,0}]</c> meaning this search will also be repeated
+      <item>This time, the search results in
+      <c>[{1,0},{1,0}]</c>, so this search will also be repeated
       with the extra options.</item>
       <tag>At offset <c>1</c> with <c>[anchored, notempty]</c></tag>
       <item>Now the <c>ab</c> alternative
@@ -333,16 +327,17 @@ This option makes it possible to include comments inside complicated patterns. N
       entire match fails. For example, if the pattern</p>
 <code>    a?b?</code>
       <p>is applied to a string not beginning with "a" or "b", it
-      matches the empty string at the start of the subject. With
-      <c>notempty</c> given, this match is not valid, so re:run/3 searches
-      further into the string for occurrences of "a" or "b".</p>
+      would normally match the empty string at the start of the
+      subject. With the <c>notempty</c> option, this match is not
+      valid, so re:run/3 searches further into the string for
+      occurrences of "a" or "b".</p>
 
       <p>Perl has no direct equivalent of <c>notempty</c>, but it does
       make a special case of a pattern match of the empty string
       within its split() function, and when using the /g modifier. It
       is possible to emulate Perl's behavior after matching a null
       string by first trying the match again at the same offset with
-      <c>notempty</c> and <c>anchored</c>, and then if that fails by
+      <c>notempty</c> and <c>anchored</c>, and then, if that fails, by
       advancing the starting offset (see below) and trying an ordinary
       match again.</p>
       </item>
@@ -352,7 +347,7 @@ This option makes it possible to include comments inside complicated patterns. N
       string is not the beginning of a line, so the circumflex
       metacharacter should not match before it. Setting this without
       <c>multiline</c> (at compile time) causes circumflex never to
-      match. This option affects only the behavior of the circumflex
+      match. This option only affects the behavior of the circumflex
       metacharacter. It does not affect \A.</item>
 
       <tag><c>noteol</c></tag>
@@ -388,7 +383,7 @@ This option makes it possible to include comments inside complicated patterns. N
       </taglist>
       </item>
       <tag><c>bsr_anycrlf</c></tag>
-      <item>Specifies specifically that \R is to match only the cr, lf or crlf sequences, not the Unicode specific newline characters.(overrides compilation option)</item>
+      <item>Specifies specifically that \R is to match only the cr, lf or crlf sequences, not the Unicode specific newline characters. (overrides compilation option)</item>
       <tag><c>bsr_unicode</c></tag>
       <item>Specifies specifically that \R is to match all the Unicode newline characters (including crlf etc, the default).(overrides compilation option)</item>
 
@@ -444,7 +439,7 @@ This option makes it possible to include comments inside complicated patterns. N
         <tag><c>none</c></tag>
         <item>Do not return matching subpatterns at all, yielding the single atom <c>match</c> as the return value of the function when matching successfully instead of the <c>{match, list()}</c> return. Specifying an empty list gives the same behavior.</item>
         </taglist>
-      <p>The value list is a list of indexes for the subpatterns to return, where index 0 is for all of the pattern, and 1 is for the first explicit capturing subpattern in the regular expression, and so forth. When using named captured subpatterns (see below) in the regular expression, one can use <c>atom()</c>'s or <c>string()</c>'s to specify the subpatterns to be returned. This deserves an example, consider the following regular expression:</p>
+      <p>The value list is a list of indexes for the subpatterns to return, where index 0 is for all of the pattern, and 1 is for the first explicit capturing subpattern in the regular expression, and so forth. When using named captured subpatterns (see below) in the regular expression, one can use <c>atom()</c>s or <c>string()</c>s to specify the subpatterns to be returned. For example, consider the regular expression:</p>
       <code>    ".*(abcd).*"</code>
       <p>matched against the string ""ABCabcdABC", capturing only the "abcd" part (the first explicit subpattern):</p>
       <code>    re:run("ABCabcdABC",".*(abcd).*",[{capture,[1]}]).</code>
@@ -455,7 +450,7 @@ This option makes it possible to include comments inside complicated patterns. N
       <code>    ".*(?&lt;FOO&gt;abcd).*"</code>
       <p>With this expression, we could still give the index of the subpattern with the following call:</p>
       <code>    re:run("ABCabcdABC",".*(?&lt;FOO&gt;abcd).*",[{capture,[1]}]).</code>
-      <p>giving the same result as before. But as the subpattern is named, we can also give its name in the value list:</p>
+      <p>giving the same result as before. But, since the subpattern is named, we can also specify its name in the value list:</p>
       <code>    re:run("ABCabcdABC",".*(?&lt;FOO&gt;abcd).*",[{capture,['FOO']}]).</code>
       <p>which would yield the same result as the earlier examples, namely:</p>
       <code>    {match,[{3,4}]}</code>
@@ -473,15 +468,15 @@ This option makes it possible to include comments inside complicated patterns. N
       <item><p>Optionally specifies how captured substrings are to be returned. If omitted, the default of <c>index</c> is used. The <c>Type</c> can be one of the following:</p>
         <taglist>
         <tag><c>index</c></tag> 
-        <item>Return captured substrings as pairs of byte indexes into the subject string and length of the matching string in the subject (as if the subject string was flattened with <c>iolist_to_binary/1</c> or <c>unicode:characters_to_binary/2</c> prior to matching). Note that the <c>unicode</c> option results in <em>byte-oriented</em> indexes in a (possibly imagined) <em>UTF-8 encoded</em> binary. A byte index tuple <c>{0,2}</c> might therefore represent one or two characters when <c>unicode</c> is in effect. This might seem contra-intuitive, but has been deemed the most effective and useful way to way to do it. To return lists instead might result in simpler code if that is desired. This return type is the default.</item>
+        <item>Return captured substrings as pairs of byte indexes into the subject string and length of the matching string in the subject (as if the subject string was flattened with <c>iolist_to_binary/1</c> or <c>unicode:characters_to_binary/2</c> prior to matching). Note that the <c>unicode</c> option results in <em>byte-oriented</em> indexes in a (possibly virtual) <em>UTF-8 encoded</em> binary. A byte index tuple <c>{0,2}</c> might therefore represent one or two characters when <c>unicode</c> is in effect. This might seem counter-intuitive, but has been deemed the most effective and useful way to way to do it. To return lists instead might result in simpler code if that is desired. This return type is the default.</item>
         <tag><c>list</c></tag> 
-        <item>Return matching substrings as lists of characters (Erlang <c>string()</c>'s). It the <c>unicode</c> option is used in combination with the \C sequence in the regular expression, a captured subpattern can contain bytes that has is not valid UTF-8 (\C matches bytes regardless of character encoding). In that case the <c>list</c> capturing may result in the same types of tuples that <c>unicode:characters_to_list/2</c> can return, namely three-tuples with the tag <c>incomplete</c> or <c>error</c>, the successfully converted characters and the invalid UTF-8 tail of the conversion as a binary. The best strategy is to avoid using the \C sequence when capturing lists.</item>
+        <item>Return matching substrings as lists of characters (Erlang <c>string()</c>s). It the <c>unicode</c> option is used in combination with the \C sequence in the regular expression, a captured subpattern can contain bytes that are not valid UTF-8 (\C matches bytes regardless of character encoding). In that case the <c>list</c> capturing may result in the same types of tuples that <c>unicode:characters_to_list/2</c> can return, namely three-tuples with the tag <c>incomplete</c> or <c>error</c>, the successfully converted characters and the invalid UTF-8 tail of the conversion as a binary. The best strategy is to avoid using the \C sequence when capturing lists.</item>
         <tag><c>binary</c></tag> 
-        <item>Return matching substrings as binaries. If the <c>unicode</c> option is used, these binaries is in UTF-8. If the \C sequence is used together with <c>unicode</c> the binaries may be invalid UTF-8.</item>
+        <item>Return matching substrings as binaries. If the <c>unicode</c> option is used, these binaries are in UTF-8. If the \C sequence is used together with <c>unicode</c> the binaries may be invalid UTF-8.</item>
         </taglist>
       </item>
       </taglist>
-      <p>In general, subpatterns that got assigned no value in the match are returned as the tuple <c>{-1,0}</c> when <c>type</c> is <c>index</c>. Unassigned subpatterns are returned as the empty binary or list respectively for other return types. Consider the regular expression:</p>
+      <p>In general, subpatterns that were not assigned a value in the match are returned as the tuple <c>{-1,0}</c> when <c>type</c> is <c>index</c>. Unassigned subpatterns are returned as the empty binary or list, respectively, for other return types. Consider the regular expression:</p>
 <code>    ".*((?&lt;FOO&gt;abdd)|a(..d)).*"</code>
       <p>There are three explicitly capturing subpatterns, where the opening parenthesis position determines the order in the result, hence <c>((?&lt;FOO&gt;abdd)|a(..d))</c> is subpattern index 1, <c>(?&lt;FOO&gt;abdd)</c> is subpattern index 2 and <c>(..d)</c> is subpattern index 3. When matched against the following string:</p>
 <code>    "ABCabcdABC"</code>
@@ -533,8 +528,8 @@ This option makes it possible to include comments inside complicated patterns. N
 	<v>NLSpec = cr | crlf | lf | anycrlf | any </v>
       </type>
       <desc>
-      <p>Replaces the matched part of the <c>Subject</c> string with the content of <c>Replacement</c>.</p>
-      <p>Options are given as to the <c>re:run/3</c> function except that the <c>capture</c> option of <c>re:run/3</c> is not allowed. 
+      <p>Replaces the matched part of the <c>Subject</c> string with the contents of <c>Replacement</c>.</p>
+      <p>The permissible options are the same as for <c>re:run/3</c>, except that the <c>capture</c> option is not allowed.
       Instead a <c>{return, ReturnType}</c> is present. The default return type is <c>iodata</c>, constructed in a 
       way to minimize copying. The <c>iodata</c> result can be used directly in many i/o-operations. If a flat <c>list()</c> is
       desired, specify <c>{return, list}</c> and if a binary is preferred, specify <c>{return, binary}</c>.</p>
@@ -544,7 +539,7 @@ This option makes it possible to include comments inside complicated patterns. N
       a Unicode <c>charlist()</c>. If compilation is done implicitly
       and the <c>unicode</c> compilation option is given to this
       function, both the regular expression and the <c>Subject</c>
-      should be given as valid Unicode <c>charlist()</c>'s.</p>
+      should be given as valid Unicode <c>charlist()</c>s.</p>
 
       <p>The replacement string can contain the special character
       <c>&amp;</c>, which inserts the whole matching expression in the
@@ -554,7 +549,7 @@ This option makes it possible to include comments inside complicated patterns. N
       generated by the regular expression, nothing is inserted.</p>
       <p>To insert an <c>&amp;</c> or <c>\</c> in the result, precede it
       with a <c>\</c>. Note that Erlang already gives a special
-      meaning to <c>\</c> in literal strings, why a single <c>\</c>
+      meaning to <c>\</c> in literal strings, so a single <c>\</c>
       has to be written as <c>"\\"</c> and therefore a double <c>\</c>
       as <c>"\\\\"</c>. Example:</p>
       <code>    re:replace("abcd","c","[&amp;]",[{return,list}]).</code>
@@ -611,7 +606,7 @@ This option makes it possible to include comments inside complicated patterns. N
       a Unicode <c>charlist()</c>. If compilation is done implicitly
       and the <c>unicode</c> compilation option is given to this
       function, both the regular expression and the <c>Subject</c>
-      should be given as valid Unicode <c>charlist()</c>'s.</p>
+      should be given as valid Unicode <c>charlist()</c>s.</p>
 
       <p>The result is given as a list of &quot;strings&quot;, the
       preferred datatype given in the <c>return</c> option (default iodata).</p> 
@@ -656,25 +651,25 @@ This option makes it possible to include comments inside complicated patterns. N
       <p>Here the regular expression matched first the &quot;l&quot;,
       causing &quot;Er&quot; to be the first part in the result. When
       the regular expression matched, the (only) subexpression was
-      bound to the &quot;l&quot;, why the &quot;l&quot; is inserted
+      bound to the &quot;l&quot;, so the &quot;l&quot; is inserted
       in the group together with &quot;Er&quot;. The next match is of
       the &quot;n&quot;, making &quot;a&quot; the next part to be
-      returned. As the subexpression is bound to the substring
+      returned. Since the subexpression is bound to the substring
       &quot;n&quot; in this case, the &quot;n&quot; is inserted into
       this group. The last group consists of the rest of the string,
       as no more matches are found.</p>
 
 
       <p>By default, all parts of the string, including the empty
-      strings are returned from the function. As an example:</p>      
+      strings, are returned from the function. For example:</p>
 
 <code>    re:split("Erlang","[lg]",[{return,list}]).</code>
 
-      <p>The result will be:</p>
+      <p>will return:</p>
 
 <code>    ["Er","an",[]]</code>
 
-      <p>as the matching of the &quot;g&quot; in the end of the string
+      <p>since the matching of the &quot;g&quot; in the end of the string
       leaves an empty rest which is also returned. This behaviour
       differs from the default behaviour of the split function in
       Perl, where empty strings at the end are by default removed. To
@@ -701,10 +696,10 @@ This option makes it possible to include comments inside complicated patterns. N
 
       <p>Note that the last part is &quot;ang&quot;, not
       &quot;an&quot;, as we only specified splitting into two parts,
-      and the splitting stops when enough parts are given, why the
-      result differs from that of <c>trim</c>.</p>
+      and the splitting stops when enough parts are given, which is
+      why the result differs from that of <c>trim</c>.</p>
 
-      <p>More than three parts are not possible with this indata, why</p>
+      <p>More than three parts are not possible with this indata, so</p>
 
 <code>    re:split("Erlang","[lg]",[{return,list},{parts,4}]).</code>
 
@@ -745,7 +740,7 @@ This option makes it possible to include comments inside complicated patterns. N
       the parts of the string matching the subexpressions of the
       regexp.</p>
       <p>The return value from the function will in this case be a
-      <c>list()</c> of <c>list()</c>'s.  Each sublist begins with the
+      <c>list()</c> of <c>list()</c>s.  Each sublist begins with the
       string picked out of the subject string, followed by the parts
       matching each of the subexpressions in order of occurrence in the
       regular expression.</p>
@@ -782,10 +777,8 @@ This option makes it possible to include comments inside complicated patterns. N
     <title>PERL LIKE REGULAR EXPRESSIONS SYNTAX</title>
     <p>The following sections contain reference material for the
     regular expressions used by this module. The regular expression
-    reference is taken from the PCRE documentation, but converted as
-    needed.</p>
-    <p>The documentation is altered where appropriate and where the re 
-    module behaves differently than the PCRE library.</p>
+    reference is based on the PCRE documentation, with changes in
+    cases where the re module behaves differently to the PCRE library.</p>
   </section>
 
 <section><title>PCRE regular expression details</title>
diff --git a/lib/stdlib/src/unicode.erl b/lib/stdlib/src/unicode.erl
index 09b1deff9c..869505ba83 100644
--- a/lib/stdlib/src/unicode.erl
+++ b/lib/stdlib/src/unicode.erl
@@ -25,8 +25,17 @@
 %%                         InEncoding is not {latin1 | unicode | utf8})
 %%
 
--export([characters_to_list/1, characters_to_list_int/2, characters_to_binary/1,characters_to_binary_int/2, characters_to_binary/3,bom_to_encoding/1, encoding_to_bom/1]).
+-export([characters_to_list/1, characters_to_list_int/2,
+	 characters_to_binary/1, characters_to_binary_int/2,
+	 characters_to_binary/3,
+	 bom_to_encoding/1, encoding_to_bom/1]).
 
+-export_type([encoding/0]).
+
+-type encoding()  :: 'latin1' | 'unicode' | 'utf8'
+                   | 'utf16' | {'utf16', endian()}
+                   | 'utf32' | {'utf32', endian()}.
+-type endian()    :: 'big' | 'little'.
 
 characters_to_list(ML) ->
     unicode:characters_to_list(ML,unicode).