61 files changed, 1748 insertions, 555 deletions
diff --git a/erts/autoconf/vxworks/sed.general b/erts/autoconf/vxworks/sed.general
index dbb9420b67..efa4e99054 100644
--- a/erts/autoconf/vxworks/sed.general
+++ b/erts/autoconf/vxworks/sed.general
@@ -57,6 +57,7 @@ s|@ETHR_LIB_NAME@||
 s|@ETHR_DEFS@||
 s|@ETHR_THR_LIB_BASE@||
 s|@ETHR_THR_LIB_BASE_DIR@||
+s|@SYSTEMD_DAEMON_LIBS@||
 s|@EMU_THR_DEFS@||
 s|@EMU_THR_LIBS@||
 s|@EMU_THR_LIB_NAME@|ethread|
diff --git a/erts/configure.in b/erts/configure.in
index f66110b98b..40b335849c 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -4821,6 +4821,26 @@ if test "x$GCC" = xyes; then
 fi
 
 dnl ----------------------------------------------------------------------
+dnl Enable -fsanitize= flags.
+dnl ----------------------------------------------------------------------
+
+m4_define(DEFAULT_SANITIZERS, [address,undefined])
+AC_ARG_ENABLE(
+    sanitizers,
+    AS_HELP_STRING(
+        [--enable-sanitizers@<:@=comma-separated list of sanitizers@:>@],
+	    [Default=DEFAULT_SANITIZERS]),
+[
+case "$enableval" in
+    no) sanitizers= ;;
+    yes) sanitizers="-fsanitize=DEFAULT_SANITIZERS" ;;
+    *) sanitizers="-fsanitize=$enableval" ;;
+esac
+CFLAGS="$CFLAGS $sanitizers"
+LDFLAGS="$LDFLAGS $sanitizers"
+])
+
+dnl ----------------------------------------------------------------------
 dnl Output the result.
 dnl ----------------------------------------------------------------------
 
diff --git a/erts/doc/src/crash_dump.xml b/erts/doc/src/crash_dump.xml
index c59741f250..d3de29b876 100644
--- a/erts/doc/src/crash_dump.xml
+++ b/erts/doc/src/crash_dump.xml
@@ -85,20 +85,22 @@
         operating system.</p>
       <list type="bulleted">
         <item>"<em>&lt;A&gt;</em>: Cannot allocate <em>&lt;N&gt;</em>
-         bytes of memory (of type "<em>&lt;T&gt;</em>")." - The system
-         has run out of memory. &lt;A&gt; is the allocator that failed
-         to allocate memory, &lt;N&gt; is the number of bytes that
-         &lt;A&gt; tried to allocate, and &lt;T&gt; is the memory block
-         type that the memory was needed for. The most common case is
-         that a process stores huge amounts of data. In this case
-         &lt;T&gt; is most often <c><![CDATA[heap]]></c>, <c><![CDATA[old_heap]]></c>,
-        <c><![CDATA[heap_frag]]></c>, or <c><![CDATA[binary]]></c>. For more information on
-         allocators see
-        <seealso marker="erts_alloc">erts_alloc(3)</seealso>.</item>
+         bytes of memory (of type "<em>&lt;T&gt;</em>", thread
+         <em>&lt;I&gt;</em>em>)." - The system has run out of memory. &lt;A&gt;
+         is the allocator that failed to allocate memory, &lt;N&gt; is the
+         number of bytes that &lt;A&gt; tried to allocate, &lt;T&gt; is the
+         memory block type that the memory was needed for, and &lt;I&gt; is the
+         thread identifier. The most common case is that a process stores huge
+         amounts of data. In this case &lt;T&gt; is most often
+         <c><![CDATA[heap]]></c>, <c><![CDATA[old_heap]]></c>,
+         <c><![CDATA[heap_frag]]></c>, or <c><![CDATA[binary]]></c>.
+         For more information on allocators see
+         <seealso marker="erts_alloc">erts_alloc(3)</seealso>.</item>
         <item>"<em>&lt;A&gt;</em>: Cannot reallocate <em>&lt;N&gt;</em>
-         bytes of memory (of type "<em>&lt;T&gt;</em>")." - Same as
-         above with the exception that memory was being reallocated
-         instead of being allocated when the system ran out of memory.</item>
+         bytes of memory (of type "<em>&lt;T&gt;</em>", thread
+         <em>&lt;I&gt;</em>em>)." - Same as above with the exception that memory
+         was being reallocated instead of being allocated when the system ran
+         out of memory.</item>
         <item>"Unexpected op code <em>N</em>" - Error in compiled
          code, <c><![CDATA[beam]]></c> file damaged or error in the compiler.</item>
         <item>"Module <em>Name</em> undefined" <c><![CDATA[|]]></c> "Function
@@ -246,6 +248,9 @@
       <tag><em>Last scheduled in for | Current call</em></tag>
       <item>The current function of the process. These fields will not
        always exist.</item>
+      <tag><em>Run queue</em></tag>
+      <item>The identifier of the scheduler run queue in which the process is
+       running.</item>
       <tag><em>Spawned by</em></tag>
       <item>The parent of the process, i.e. the process which executed
       <c><![CDATA[spawn]]></c> or <c><![CDATA[spawn_link]]></c>.</item>
diff --git a/erts/doc/src/epmd.xml b/erts/doc/src/epmd.xml
index 963d35c3c8..25f819ab50 100644
--- a/erts/doc/src/epmd.xml
+++ b/erts/doc/src/epmd.xml
@@ -58,12 +58,12 @@
       of the IP address and a port number. The name of the node is
       an atom on the form of <c><![CDATA[Name@Node]]></c>.
       The job of the <c><![CDATA[epmd]]></c> daemon is to keep track of which
-      node name listens on which address. Hence, <c><![CDATA[epmd]]></c> map
+      node name listens on which address. Hence, <c><![CDATA[epmd]]></c> maps
       symbolic node names to machine addresses.</p>
 
       <p>The TCP/IP <c>epmd</c> daemon actually only keeps track of
-      the <c>Name</c> (first) part of an Erlang node name, the <c>Host</c>
-      part (whatever is after the <c><![CDATA[@]]></c> is implicit in the
+      the <c>Name</c> (first) part of an Erlang node name. The <c>Host</c>
+      part (whatever is after the <c><![CDATA[@]]></c>) is implicit in the
       node name where the <c>epmd</c> daemon was actually contacted,
       as is the IP address where the Erlang node can be
       reached. Consistent and correct TCP naming services are
@@ -77,12 +77,12 @@
         <p>The daemon is started automatically by the <c>erl</c>
         command if the node is to be distributed and there is no
         running instance present. If automatically launched,
-        environment variables has to be used to alter the behavior of
+        environment variables have to be used to alter the behavior of
         the daemon. See the <seealso
         marker="#environment_variables">Environment
         variables</seealso> section below.</p>
 
-        <p>If the -daemon argument is not given, the
+        <p>If the -daemon argument is not given, 
         <c><![CDATA[epmd]]></c> runs as a normal program with the
 	controlling terminal of the shell in which it is
 	started. Normally, it should run as a daemon.</p>
@@ -122,7 +122,7 @@
       comma-separated list of IP addresses and on the loopback address
       (which is implicitly added to the list if it has not been
       specified). This can also be set using the
-      <c><![CDATA[ERL_EPMD_ADDRESS]]></c> environment variable, see the
+      <c><![CDATA[ERL_EPMD_ADDRESS]]></c> environment variable. See the
       section <seealso marker="#environment_variables">Environment
       variables</seealso> below.</p>
     </item>
@@ -130,7 +130,7 @@
     <item>
       <p>Let this instance of epmd listen to another TCP port than
       default 4369. This can also be set using the
-      <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the
+      <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable. See the
       section <seealso marker="#environment_variables">Environment
       variables</seealso> below</p>
     </item>
@@ -153,7 +153,7 @@
             <p>With relaxed command checking, the <c>epmd</c> daemon can be killed from the localhost with i.e. <c>epmd -kill</c> even if there are active nodes registered. Normally only daemons with an empty node database can be killed with the <c>epmd -kill</c> command.</p>
 	  </item>
 	  <item>
-	    <p>The <c>epmd -stop</c> command (and the corresponding messages to epmd, as can be given using <c>erl_interface/ei</c>) is normally always ignored, as it opens up for strange situation when two nodes of the same name can be alive at the same time. A node unregisters itself by just closing the connection to epmd, why the <c>stop</c> command was only intended for use in debugging situations.</p>
+	    <p>The <c>epmd -stop</c> command (and the corresponding messages to epmd, as can be given using <c>erl_interface/ei</c>) is normally always ignored, as it opens up the possibility of a strange situation where two nodes of the same name can be alive at the same time. A node unregisters itself by just closing the connection to epmd, which is why the <c>stop</c> command was only intended for use in debugging situations.</p>
 	    <p>With relaxed command checking enabled, you can forcibly unregister live nodes.</p>
 	  </item>
         </list>
@@ -166,7 +166,7 @@
   <section>
     <marker id="debug_flags"></marker>
     <title>DbgExtra options</title>
-    <p>These options are purely for debugging and testing epmd clients, they should not be used in normal operation.</p>
+    <p>These options are purely for debugging and testing epmd clients. They should not be used in normal operation.</p>
 
     <taglist>
     <tag><c><![CDATA[-packet_timeout Seconds]]></c></tag>
@@ -177,9 +177,9 @@
     </item>
     <tag><c><![CDATA[-delay_accept Seconds]]></c></tag>
     <item>
-    <p>To simulate a busy server you can insert a delay between epmd
-    gets notified about that a new connection is requested and
-    when the connections gets accepted.</p>
+    <p>To simulate a busy server you can insert a delay between when epmd
+    gets notified that a new connection is requested and
+    when the connection gets accepted.</p>
     </item>
     <tag><c><![CDATA[-delay_write Seconds]]></c></tag>
     <item>
@@ -191,15 +191,15 @@
   <section>
     <marker id="interactive_flags"></marker>
     <title>Interactive options</title>
-    <p>These options make <c>epmd</c> run as an interactive command displaying the results of sending queries ta an already running instance of <c>epmd</c>. The epmd contacted is always on the local node, but the <c>-port</c> option can be used to select between instances if several are running using different port on the host.</p>
+    <p>These options make <c>epmd</c> run as an interactive command, displaying the results of sending queries to an already running instance of <c>epmd</c>. The epmd contacted is always on the local node, but the <c>-port</c> option can be used to select between instances if several are running using different ports on the host.</p>
     <taglist>
     <tag><c><![CDATA[-port No]]></c></tag>
     <item>
       <p>Contacts the <c>epmd</c> listening on the given TCP port number
       (default 4369). This can also be set using the
-      <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable, see the
+      <c><![CDATA[ERL_EPMD_PORT]]></c> environment variable. See the
       section <seealso marker="#environment_variables">Environment
-      variables</seealso> below</p>
+      variables</seealso> below.</p>
     </item>
     <tag><c><![CDATA[-names]]></c></tag>
     <item>
@@ -210,7 +210,7 @@
       <p>Kill the currently running <c>epmd</c>.</p>
 
       <p>Killing the running <c>epmd</c> is only allowed if <c>epmd
-        -names</c> show an empty database or
+        -names</c> shows an empty database or
         <c>-relaxed_command_check</c> was given when the running
         instance of <c>epmd</c> was started. Note that
         <c>-relaxed_command_check</c> is given when starting the
@@ -228,7 +228,7 @@
        <p>This command can only be used when contacting <c>epmd</c>
        instances started with the <c>-relaxed_command_check</c>
        flag. Note that relaxed command checking has to be enabled for
-       the <c>epmd</c> daemon contacted, When running epmd
+       the <c>epmd</c> daemon contacted. When running epmd
        interactively,
         <c>-relaxed_command_check</c> has no effect.</p>
     </item>
@@ -259,7 +259,7 @@
       <item>
         <p>If set prior to start, the <c>epmd</c> daemon will behave
         as if the <c>-relaxed_command_check</c> option was given at
-        start-up. If consequently setting this option before starting
+        start-up. Consequently, if this option is set before starting
         the Erlang virtual machine, the automatically started
         <c>epmd</c> will accept the <c>-kill</c> and <c>-stop</c>
         commands without restrictions.</p>
@@ -287,8 +287,8 @@
   remote hosts. However, only the query commands are answered (and
   acted upon) if the query comes from a remote host. It is always an
   error to try to register a nodename if the client is not a process
-  located on the same host as the <c>epmd</c> instance is running on,
-  why such requests are considered hostile and the connection is
+  located on the same host as the <c>epmd</c> instance is running on-
+  such requests are considered hostile and the connection is
   immediately closed.</p>
 
   <p>The queries accepted from remote nodes are:</p>
@@ -307,3 +307,4 @@
 
 </comref>
 
+
diff --git a/erts/doc/src/erl.xml b/erts/doc/src/erl.xml
index f8f4d14436..f856b9ab86 100644
--- a/erts/doc/src/erl.xml
+++ b/erts/doc/src/erl.xml
@@ -851,6 +851,19 @@
           </p>
       </item>
       <tag><marker id="+SDio"><c><![CDATA[+SDio IOSchedulers]]></c></marker></tag>
+      <item>
+          <p>Sets the number of dirty I/O scheduler threads to create when threading
+            support has been enabled. The valid range is 0-1024. By default, the number
+            of dirty I/O scheduler threads created is 10, same as the default number of
+            threads in the <seealso marker="#async_thread_pool_size">async thread pool
+            </seealso>.
+          </p>
+          <p>This option is ignored if the emulator doesn't have threading support
+            enabled. Currently, <em>this option is experimental</em> and is supported only
+            if the emulator was configured and built with support for dirty schedulers
+            enabled (it's disabled by default).
+          </p>
+      </item>
       <tag><c><![CDATA[+sFlag Value]]></c></tag>
       <item>
         <p>Scheduling specific flags.</p>
@@ -1173,7 +1186,7 @@
 	    utilization.
 	    </p>
           </item>
-	  <tag><marker id="+swct"><c>+sws very_eager|eager|medium|lazy|very_lazy</c></marker></tag>
+	  <tag><marker id="+swct"><c>+swct very_eager|eager|medium|lazy|very_lazy</c></marker></tag>
 	  <item>
              <p>
 	       Set scheduler wake cleanup threshold. Default is <c>medium</c>.
diff --git a/erts/doc/src/erl_nif.xml b/erts/doc/src/erl_nif.xml
index 6b1f4cccf8..1d33b334bb 100644
--- a/erts/doc/src/erl_nif.xml
+++ b/erts/doc/src/erl_nif.xml
@@ -168,16 +168,18 @@ ok
      <p><marker id="lengthy_work"/>
      As mentioned in the <seealso marker="#WARNING">warning</seealso> text at
      the beginning of this document it is of vital importance that a native function
-     does return relatively fast. It is hard to give an exact maximum amount
+     return relatively quickly. It is hard to give an exact maximum amount
      of time that a native function is allowed to work, but as a rule of thumb
-     a well behaving native function should return to its caller before a
+     a well-behaving native function should return to its caller before a
      millisecond has passed. This can be achieved using different approaches.
-     If you have full control over the code that are to execute in the native
+     If you have full control over the code to execute in the native
      function, the best approach is to divide the work into multiple chunks of
-     work and call the native function multiple times. Function 
+     work and call the native function multiple times, either directly from Erlang code
+     or by having a native function schedule a future NIF call via the
+     <seealso marker="#enif_schedule_nif"> enif_schedule_nif</seealso> function. Function
      <seealso marker="#enif_consume_timeslice">enif_consume_timeslice</seealso> can be
-     used this facilitate such work division. In some cases, however, this might not
-     be possible, e.g. when calling third party libraries. Then you typically want
+     used to help with such work division. In some cases, however, this might not
+     be possible, e.g. when calling third-party libraries. Then you typically want
      to dispatch the work to another thread, return
      from the native function, and wait for the result. The thread can send
      the result back to the calling thread using message passing. Information
@@ -342,29 +344,31 @@ ok
       libraries might however fail if deprecated features are used.
       </p></item>
 
-      <tag>Dirty NIFs</tag>
-      <item><p><marker id="dirty_nifs"/><em>Note that the dirty NIF functionality
-      is experimental</em> and that you have to enable support for dirty
-      schedulers when building OTP in order to try the functionality out. Native functions
+      <tag>Long-running NIFs</tag>
+      <item><p><marker id="dirty_nifs"/>Native functions
       <seealso marker="#lengthy_work">
       must normally run quickly</seealso>, as explained earlier in this document. They
       generally should execute for no more than a millisecond. But not all native functions
       can execute so quickly; for example, functions that encrypt large blocks of data or
       perform lengthy file system operations can often run for tens of seconds or more.</p>
-      <p>A NIF that cannot execute in a millisecond or less is called a "dirty NIF" since
-      it performs work that the Erlang runtime cannot handle cleanly. Applications
-      that make use of such functions must indicate to the runtime that the functions are
+      <p>If the functionality of a long-running NIF can be split so that its work can be
+      achieved through a series of shorter NIF calls, the application can either make that series
+      of NIF calls from the Erlang level, or it can call a NIF that first performs a chunk of the
+      work, then invokes the <seealso marker="#enif_schedule_nif">enif_schedule_nif</seealso>
+      function to schedule another NIF call to perform the next chunk. The final call scheduled
+      in this manner can then return the overall result. Breaking up a long-running function in
+      this manner enables the VM to regain control between calls to the NIFs, thereby avoiding
+      degraded responsiveness, scheduler load balancing problems, and other strange behaviours.</p>
+      <p>A NIF that cannot be split and cannot execute in a millisecond or less is called a "dirty NIF"
+      because it performs work that the Erlang runtime cannot handle cleanly.
+      <em>Note that the dirty NIF functionality described here is experimental</em> and that you have to
+      enable support for dirty schedulers when building OTP in order to try the functionality out.
+      Applications that make use of such functions must indicate to the runtime that the functions are
       dirty so they can be handled specially. To schedule a dirty NIF for execution, the
-      application calls <seealso marker="#enif_schedule_dirty_nif">enif_schedule_dirty_nif</seealso>,
-      passing to it a pointer to the dirty NIF to be executed and indicating with a flag
+      appropriate flags value can be set for the NIF in its <seealso marker="#ErlNifFunc">ErlNifFunc</seealso>
+      entry, or the application can call <seealso marker="#enif_schedule_nif">enif_schedule_nif</seealso>,
+      passing to it a pointer to the dirty NIF to be executed and indicating with the <c>flags</c>
       argument whether it expects the operation to be CPU-bound or I/O-bound.</p>
-      <p>All dirty NIFs must ultimately invoke the <seealso marker="#enif_schedule_dirty_nif_finalizer">
-      enif_schedule_dirty_nif_finalizer</seealso> as their final action, passing to it the
-      result they wish to return to the original caller. A finalizer function can either
-      receive the result and return it directly, or it can return a different value instead.
-      For convenience, the NIF API provides the <seealso marker="#enif_dirty_nif_finalizer">
-      enif_dirty_nif_finalizer</seealso> function that applications can use as a finalizer;
-      it simply returns its result argument.</p>
       <note><p>Dirty NIF support is available only when the emulator is configured with dirty
       schedulers enabled. This feature is currently disabled by default. To determine whether
       the dirty NIF API is available, native code can check to see if the C preprocessor macro
@@ -498,6 +502,7 @@ typedef struct {
     const char* <em>name</em>;
     unsigned <em>arity</em>;
     ERL_NIF_TERM (*<em>fptr</em>)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
+    unsigned flags;
 } ErlNifFunc;
 </code>
         <p>Describes a NIF by its name, arity and implementation.
@@ -508,7 +513,17 @@ typedef struct {
         will thus denote the Nth argument to the NIF. Note that the
         <c>argc</c> argument allows for the same C function to
         implement several Erlang functions with different arity (but
-        same name probably).</p>
+        same name probably). For a regular NIF, <c>flags</c> is 0 (and
+        so its value can be omitted for statically initialized <c>ErlNifFunc</c>
+        instances), or it can be used to indicate that the NIF is a <seealso
+        marker="#dirty_nifs">dirty NIF</seealso> that should be executed
+        on a dirty scheduler thread (<em>note that the dirty NIF functionality
+        described here is experimental</em> and that you have to enable
+        support for dirty schedulers when building OTP in order to try the
+        functionality out). If the dirty NIF is expected to be
+        CPU-bound, its <c>flags</c> field should be set to
+        <c>ERL_NIF_DIRTY_JOB_CPU_BOUND</c>, or for I/O-bound jobs,
+        <c>ERL_NIF_DIRTY_JOB_IO_BOUND</c>.</p>
       </item>
     <tag><marker id="ErlNifBinary"/>ErlNifBinary</tag>
      <item>
@@ -672,18 +687,6 @@ typedef enum {
     See also the <seealso marker="#WARNING">warning</seealso> text at the beginning of this document.</p>
     </desc>
     </func>
-    <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result)</nametext></name>
-      <fsummary>Simple dirty NIF result finalizer</fsummary>
-      <desc>
-          <p>A convenience function that a dirty NIF can use as a finalizer that simply
-          return its <c>result</c> argument as its return value. This function is provided
-          for dirty NIFs with results that should be returned directly to the original caller.</p>
-          <note><p>This function is available only when the emulator is configured with dirty
-          schedulers enabled. This feature is currently disabled by default. To determine whether
-          the dirty NIF API is available, native code can check to see if the C preprocessor macro
-          <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note>
-      </desc>
-    </func>
     <func><name><ret>int</ret><nametext>enif_equal_tids(ErlNifTid tid1, ErlNifTid tid2)</nametext></name>
     <fsummary></fsummary>
     <desc><p>Same as <seealso marker="erl_driver#erl_drv_equal_tids">erl_drv_equal_tids</seealso>.
@@ -811,9 +814,9 @@ typedef enum {
           built with threading support, dirty scheduler threads are available and
           <c>enif_have_dirty_schedulers()</c> returns true. If the emulator was built without
           threading support, <c>enif_have_dirty_schedulers()</c> returns false.</p>
-          <p>If dirty scheduler threads are not available in the emulator, calls to
-          <c>enif_schedule_dirty_nif</c> and <c>enif_schedule_dirty_nif_finalizer</c> result in
-          the NIF and finalizer functions being called directly within the calling thread.</p>
+          <p>If dirty scheduler threads are not available in the emulator, a call to
+          <c>enif_schedule_nif</c> with its <c>flags</c> argument set to indicate that the specified
+          NIF is to be executed on a dirty scheduler thread results in a <c>badarg</c> exception.</p>
           <note><p>This function is available only when the emulator is configured with dirty
           schedulers enabled. This feature is currently disabled by default. To determine whether
           the dirty NIF API is available, native code can check to see if the C preprocessor macro
@@ -873,8 +876,8 @@ typedef enum {
           <p>Check to see if the current NIF is executing on a dirty scheduler thread. If the
           emulator is built with threading support, calling <c>enif_is_on_dirty_scheduler</c>
           from within a dirty NIF returns true. It returns false when the calling NIF is a regular
-          NIF or a NIF finalizer, both of which run on normal scheduler threads, or when the emulator
-          is built without threading support.</p>
+          NIF running on a normal scheduler thread, or when the emulator is built without threading
+          support.</p>
           <note><p>This function is available only when the emulator is configured with dirty
           schedulers enabled. This feature is currently disabled by default. To determine whether
           the dirty NIF API is available, native code can check to see if the C preprocessor macro
@@ -1245,46 +1248,27 @@ typedef enum {
     <desc><p>Same as <seealso marker="erl_driver#erl_drv_rwlock_tryrwlock">erl_drv_rwlock_tryrwlock</seealso>.
           </p></desc>
     </func>
-    <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_schedule_dirty_nif(ErlNifEnv* env, int flags, ERL_NIF_TERM (*fp)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]), int argc, const ERL_NIF_TERM argv[])</nametext></name>
-      <fsummary>Schedule a dirty NIF for execution</fsummary>
+    <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_schedule_nif(ErlNifEnv* env, const char* fun_name, int flags, ERL_NIF_TERM (*fp)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]), int argc, const ERL_NIF_TERM argv[])</nametext></name>
+      <fsummary>Schedule a NIF for execution</fsummary>
       <desc>
-          <p>Schedule dirty NIF <c>fp</c> to execute a long-running operation. The <c>flags</c>
-          argument must be set to either <c>ERL_NIF_DIRTY_JOB_CPU_BOUND</c> if the job is expected to
-          be primarily CPU-bound, or <c>ERL_NIF_DIRTY_JOB_IO_BOUND</c> for jobs that will be
-          I/O-bound. The <c>argc</c> and <c>argv</c> arguments can either be the originals passed
-          into the calling NIF, or they can be values created by the calling NIF. The calling
-          NIF must use the return value of <c>enif_schedule_dirty_nif</c> as its own return value.</p>
-          <p>Be aware that <c>enif_schedule_dirty_nif</c>, as its name implies, only schedules the
-          dirty NIF for future execution. The calling NIF does not block waiting for the dirty NIF to
-          execute and return, which means that the calling NIF can't expect to receive the dirty NIF
+          <p>Schedule NIF <c>fp</c> to execute. This function allows an application to break up long-running
+          work into multiple regular NIF calls or to schedule a <seealso marker="#dirty_nifs">dirty NIF</seealso>
+          to execute on a dirty scheduler thread (<em>note that the dirty NIF functionality described here is
+          experimental</em> and that you have to enable support for dirty schedulers when building OTP in
+          order to try the functionality out).</p>
+          <p>The <c>fun_name</c> argument provides a name for the NIF being scheduled for execution. If it cannot
+          be converted to an atom, <c>enif_schedule_nif</c> returns a <c>badarg</c> exception.</p>
+          <p>The <c>flags</c> argument must be set to 0 for a regular NIF, or if the emulator was built the
+          experimental dirty scheduler support enabled, <c>flags</c> can be set to either <c>ERL_NIF_DIRTY_JOB_CPU_BOUND</c>
+          if the job is expected to be primarily CPU-bound, or <c>ERL_NIF_DIRTY_JOB_IO_BOUND</c> for jobs that will
+          be I/O-bound.</p>
+          <p>The <c>argc</c> and <c>argv</c> arguments can either be the originals passed into the calling NIF, or
+          they can be values created by the calling NIF.</p>
+          <p>The calling NIF must use the return value of <c>enif_schedule_nif</c> as its own return value.</p>
+          <p>Be aware that <c>enif_schedule_nif</c>, as its name implies, only schedules the
+          NIF for future execution. The calling NIF does not block waiting for the scheduled NIF to
+          execute and return, which means that the calling NIF can't expect to receive the scheduled NIF
           return value and use it for further operations.</p>
-          <p>A dirty NIF may not invoke the <seealso marker="#enif_make_badarg">enif_make_badarg</seealso>
-          to raise an exception. If it wishes to return an exception, the dirty NIF should pass a
-          regular result indicating the exception details to its finalizer, and allow the finalizer
-          to raise the exception on its behalf.</p>
-          <note><p>This function is available only when the emulator is configured with dirty schedulers
-          enabled. This feature is currently disabled by default. To determine whether the dirty NIF API
-          is available, native code can check to see if the C preprocessor macro
-          <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note>
-      </desc>
-    </func>
-    <func><name><ret>ERL_NIF_TERM</ret><nametext>enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result, ERL_NIF_TERM (*fp)(ErlNifEnv* env, ERL_NIF_TERM result))</nametext></name>
-      <fsummary>Schedule a dirty NIF finalizer</fsummary>
-      <desc>
-          <p>When a dirty NIF finishes executing, it must schedule a finalizer function to return
-          its result to the original NIF caller. The dirty NIF passes <c>result</c> as the value it
-          wants the finalizer to use as the return value. The <c>fp</c> argument is a pointer to the
-          finalizer function. The NIF API provides the <seealso marker="#enif_dirty_nif_finalizer">
-          enif_dirty_nif_finalizer</seealso> function that can be used as a finalizer that simply
-          returns its <c>result</c> argument. You are also free to write your own custom finalizer
-          that uses <c>result</c> to derive a different return value, or ignores <c>result</c>
-          entirely and returns a completely different value.</p>
-          <p>Without exception, all dirty NIFs must invoke <c>enif_schedule_dirty_nif_finalizer</c>
-          to complete their execution.</p>
-          <note><p>This function is available only when the emulator is configured with dirty
-          schedulers enabled. This feature is currently disabled by default. To determine whether
-          the dirty NIF API is available, native code can check to see if the C preprocessor macro
-          <c>ERL_NIF_DIRTY_SCHEDULER_SUPPORT</c> is defined.</p></note>
       </desc>
     </func>
     <func><name><ret>ErlNifPid *</ret><nametext>enif_self(ErlNifEnv* caller_env, ErlNifPid* pid)</nametext></name>
@@ -1384,4 +1368,3 @@ typedef enum {
     <p><seealso marker="erlang#load_nif-2">erlang:load_nif/2</seealso></p>
   </section>
 </cref>
-
diff --git a/erts/doc/src/erl_prim_loader.xml b/erts/doc/src/erl_prim_loader.xml
index 6751deda4d..171f84decc 100644
--- a/erts/doc/src/erl_prim_loader.xml
+++ b/erts/doc/src/erl_prim_loader.xml
@@ -148,6 +148,22 @@
       </desc>
     </func>
     <func>
+      <name name="read_link_info" arity="1"/>
+      <fsummary>Get information about a link or file</fsummary>
+      <desc>
+        <p>This function works like
+          <seealso marker="#read_file_info/1">read_file_info/1</seealso>
+          except that if <c><anno>Filename</anno></c> is a symbolic link,
+          information about the link will be returned in the <c>file_info</c>
+          record and the <c>type</c> field of the record will be set to
+          <c>symlink</c>.</p>
+        <p>If <c><anno>Filename</anno></c> is not a symbolic link, this function
+          returns exactly the same result as <c>read_file_info/1</c>.
+          On platforms that do not support symbolic links, this function
+          is always equivalent to <c>read_file_info/1</c>.</p>
+      </desc>
+    </func>
+    <func>
       <name name="set_path" arity="1"/>
       <fsummary>Set the path of the loader</fsummary>
       <desc>
diff --git a/erts/doc/src/erlang.xml b/erts/doc/src/erlang.xml
index 9ad42374bf..84168397f6 100644
--- a/erts/doc/src/erlang.xml
+++ b/erts/doc/src/erlang.xml
@@ -4968,7 +4968,7 @@ true</pre>
       <desc>
         <p>Note that the run-time is the sum of the run-time for all
         threads in the Erlang run-time system and may therefore be greater
-        than the wall-clock time.</p>
+        than the wall-clock time. The time is returned in milliseconds.</p>
         <pre>
 > <input>statistics(runtime).</input>
 {1690,1620}
diff --git a/erts/doc/src/notes.xml b/erts/doc/src/notes.xml
index 6b3e41c1f2..5c4bb3ed25 100644
--- a/erts/doc/src/notes.xml
+++ b/erts/doc/src/notes.xml
@@ -30,6 +30,190 @@
   </header>
   <p>This document describes the changes made to the ERTS application.</p>
 
+<section><title>Erts 6.1.2</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    OTP-11850 fixed filelib:wildcard/1 to work with broken
+	    symlinks. This correction, however, introduced problems
+	    since symlinks were no longer followed for functions like
+	    filelib:ensure_dir/1, filelib:is_dir/1,
+	    filelib:file_size/1, etc. This is now corrected.</p>
+          <p>
+	    Own Id: OTP-12054 Aux Id: seq12660 </p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 6.1.1</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+          <p>
+	    Fixed ETHR_FORCE_INLINE which caused the build to break
+	    on some platforms without adequate thread support
+	    (VxWorks).</p>
+          <p>
+	    Own Id: OTP-12010</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 6.1</title>
+
+    <section><title>Fixed Bugs and Malfunctions</title>
+      <list>
+        <item>
+	    <p>The documentation for <c>spawn_opt/5</c> now has a
+	    note mentioning that the <c>monitor</c> option is not
+	    supported.</p>
+          <p>
+	    Own Id: OTP-11849</p>
+        </item>
+        <item>
+          <p>
+	    Fix broken system monitoring of <c>large_heap</c> for
+	    non-smp VM. No message for <c>large_heap</c> was ever
+	    sent on non-smp VM. Bug exist since R16B.</p>
+          <p>
+	    Own Id: OTP-11852</p>
+        </item>
+        <item>
+          <p>
+	    The emulator without SMP support crashed when passing a
+	    message to a process without enough heap space for the
+	    message. This bug was introduced in <c>erts-6.0</c>.</p>
+          <p>
+	    Own Id: OTP-11887 Aux Id: OTP-11388 </p>
+        </item>
+        <item>
+          <p>
+	    Fix race between ETS table deletion and unfixation that
+	    could cause VM crash. The race could happen between a
+	    terminating process that does not own the table but has a
+	    fixation on it and another process that deletes the table
+	    (maybe the owner terminating) at the same time. Bug
+	    existed since R15B02.</p>
+          <p>
+	    Own Id: OTP-11892</p>
+        </item>
+        <item>
+	    <p>The string following the <c>-eval</c> option when
+	    invoking <c>erl</c> would not be properly translated from
+	    UTF-8 to a list of Unicode characters (as would the
+	    arguments for <c>-run</c>).</p>
+	    <p>That bug would cause the build of Erlang/OTP to fail
+	    when building in a directory whose pathname contained
+	    non-US ASCII characters encoded in UTF-8. (Thanks to Eric
+	    Pailleau for reporting this bug.)</p>
+          <p>
+	    Own Id: OTP-11916</p>
+        </item>
+        <item>
+          <p>
+	    Fix erts_debug:size/1 to handle Map sizes</p>
+          <p>
+	    Own Id: OTP-11923</p>
+        </item>
+        <item>
+          <p>
+	    Removed <c>erlang:bitstr_to_list/1</c> and
+	    <c>erlang:list_to_bitstr/1</c>. They were added by
+	    mistake, and have always raised an <c>undefined</c>
+	    exception when called.</p>
+          <p>
+	    Own Id: OTP-11942</p>
+        </item>
+        <item>
+          <p>
+	    Fixed compilation using mingw-w64 on Windows.</p>
+          <p>
+	    Thanks to Jani Hakala.</p>
+          <p>
+	    Own Id: OTP-11945</p>
+        </item>
+        <item>
+          <p>
+	    The git sha is no longer printed in the shell start
+	    header when erlang is built from a tagged git release.</p>
+          <p>
+	    Own Id: OTP-11961</p>
+        </item>
+        <item>
+          <p>
+	    Fixed a bug where <c>send</c> trace events were
+	    erroneously dropped when the send was done to a
+	    registered process. This bug was introduced in R16B.</p>
+          <p>
+	    Own Id: OTP-11968</p>
+        </item>
+      </list>
+    </section>
+
+
+    <section><title>Improvements and New Features</title>
+      <list>
+        <item>
+	    <p>The following native functions now bump an appropriate
+	    amount of reductions and yield when out of
+	    reductions:</p> <list>
+	    <item><c>erlang:binary_to_list/1</c></item>
+	    <item><c>erlang:binary_to_list/3</c></item>
+	    <item><c>erlang:bitstring_to_list/1</c></item>
+	    <item><c>erlang:list_to_binary/1</c></item>
+	    <item><c>erlang:iolist_to_binary/1</c></item>
+	    <item><c>erlang:list_to_bitstring/1</c></item>
+	    <item><c>binary:list_to_bin/1</c></item> </list>
+	    <p>Characteristics impact:</p> <taglist>
+	    <tag>Performance</tag> <item>The functions converting
+	    from lists got a performance loss for very small lists,
+	    and a performance gain for very large lists.</item>
+	    <tag>Priority</tag> <item>Previously a process executing
+	    one of these functions effectively got an unfair priority
+	    boost. This priority boost depended on the input size.
+	    The larger the input was, the larger the priority boost
+	    got. This unfair priority boost is now lost. </item>
+	    </taglist>
+          <p>
+	    Own Id: OTP-11888</p>
+        </item>
+        <item>
+          <p>
+	    The systemd features of epmd have been removed from epmd
+	    by default. To enable them you have to build erlang with
+	    the configure option --enable-systemd.</p>
+          <p>
+	    Own Id: OTP-11921</p>
+        </item>
+        <item>
+          <p>
+	    Removed Erlang wrapper code used when calling
+	    <c>binary_to_term/1</c>, and <c>binary_to_term/2</c>.
+	    This improves the performance of these BIFs especially
+	    when they are called with small binaries as input.</p>
+          <p>
+	    Own Id: OTP-11931</p>
+        </item>
+        <item>
+          <p>
+	    Add erlang:system_info(tolerant_timeofday), an API to
+	    check whether compensation for sudden changes of system
+	    time is enabled or not.</p>
+          <p>
+	    Own Id: OTP-11970</p>
+        </item>
+      </list>
+    </section>
+
+</section>
+
 <section><title>Erts 6.0.1</title>
 
     <section><title>Fixed Bugs and Malfunctions</title>
@@ -756,6 +940,27 @@
 	    Thanks to Matwey V. Kornilov</p>
           <p>
 	    Own Id: OTP-11829</p>
+       </item>
+      </list>
+    </section>
+
+</section>
+
+<section><title>Erts 5.10.4.1</title>
+
+    <section><title>Known Bugs and Problems</title>
+      <list>
+        <item>
+          <p>
+	    When using gen_tcp:connect and the <c>fd</c> option with
+	    <c>port</c> and/or <c>ip</c>, the <c>port</c> and
+	    <c>ip</c> options were ignored. This has been fixed so
+	    that if <c>port</c> and/or <c>ip</c> is specified
+	    together with <c>fd</c> a bind is requested for that
+	    <c>fd</c>. If <c>port</c> and/or <c>ip</c> is not
+	    specified bind will not be called.</p>
+          <p>
+	    Own Id: OTP-12061</p>
         </item>
       </list>
     </section>
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 1026e5f649..8bfb7d2ad2 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -3503,6 +3503,7 @@ get_map_elements_fail:
 	     * I[0]: &&call_nif
 	     * I[1]: Function pointer to NIF function
 	     * I[2]: Pointer to erl_module_nif
+	     * I[3]: Function pointer to dirty NIF
 	     */
 	    BifFunction vbf;
 
@@ -3523,13 +3524,6 @@ get_map_elements_fail:
 		reg[0] = r(0);
 		nif_bif_result = (*fp)(&env, bif_nif_arity, reg);
 		erts_post_nif(&env);
-#ifdef ERTS_DIRTY_SCHEDULERS
-		if (is_non_value(nif_bif_result) && c_p->freason == TRAP) {
-		    Export* ep = ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(c_p);
-		    ep->code[0] = I[-3];
-		    ep->code[1] = I[-2];
-		}
-#endif
 	    }
 	    ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(nif_bif_result));
 	    PROCESS_MAIN_CHK_LOCKS(c_p);
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index e96177cfd9..cfc6146b0a 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2363,7 +2363,11 @@ load_code(LoaderState* stp)
 
 		if (stp->may_load_nif) {
 		    const int finfo_ix = ci - FUNC_INFO_SZ;
-		    enum { MIN_FUNC_SZ = 3 };		    
+#ifdef ERTS_DIRTY_SCHEDULERS
+		    enum { MIN_FUNC_SZ = 4 };
+#else
+		    enum { MIN_FUNC_SZ = 3 };
+#endif
 		    if (finfo_ix - last_func_start < MIN_FUNC_SZ && last_func_start) {		   
 			/* Must make room for call_nif op */
 			int pad = MIN_FUNC_SZ - (finfo_ix - last_func_start);
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index fcbeb6cf5c..f3c05d047d 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -1869,6 +1869,7 @@ do_send(Process *p, Eterm to, Eterm msg, int suspend, Eterm *refp) {
     } else if (is_external_pid(to)) {
 	dep = external_pid_dist_entry(to);
 	if(dep == erts_this_dist_entry) {
+#if DEBUG
 	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
 	    erts_dsprintf(dsbufp,
 			  "Discarding message %T from %T to %T in an old "
@@ -1879,6 +1880,7 @@ do_send(Process *p, Eterm to, Eterm msg, int suspend, Eterm *refp) {
 			  external_pid_creation(to),
 			  erts_this_node->creation);
 	    erts_send_error_to_logger(p->group_leader, dsbufp);
+#endif
 	    return 0;
 	}
 	return remote_send(p, dep, to, to, msg, suspend);
@@ -1912,6 +1914,7 @@ do_send(Process *p, Eterm to, Eterm msg, int suspend, Eterm *refp) {
     } else if (is_external_port(to)
 	       && (external_port_dist_entry(to)
 		   == erts_this_dist_entry)) {
+#if DEBUG
 	erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
 	erts_dsprintf(dsbufp,
 		      "Discarding message %T from %T to %T in an old "
@@ -1922,6 +1925,7 @@ do_send(Process *p, Eterm to, Eterm msg, int suspend, Eterm *refp) {
 		      external_port_creation(to),
 		      erts_this_node->creation);
 	erts_send_error_to_logger(p->group_leader, dsbufp);
+#endif
 	return 0;
     } else if (is_internal_port(to)) {
 	int ret_val;
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 011e49f1fe..e68b8e6274 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -601,6 +601,10 @@ bif maps:values/1
 bif erts_internal:cmp_term/2
 
 #
+# New in 17.1.
+#
+bif erlang:fun_info_mfa/1
+#
 # Obsolete
 #
 
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 41a041eba6..e62caa6b22 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -274,6 +274,9 @@
 	_b = _b << _s;							\
 	_vn1 = _b >> H_EXP;						\
 	_vn0 = _b & LO_MASK;						\
+        /* Sometimes _s is 0 which triggers undefined behaviour for the \
+           (_a0>>(D_EXP-_s)) shift, but this is ok because the          \
+           & -s will make it all to 0 later anyways. */                 \
 	_un32 = (_a1 << _s) | ((_a0>>(D_EXP-_s)) & (-_s >> (D_EXP-1)));	\
 	_un10 = _a0 << _s;						\
 	_un1 = _un10 >> H_EXP;						\
@@ -1506,13 +1509,15 @@ Eterm uword_to_big(UWord x, Eterm *y)
 */
 Eterm small_to_big(Sint x, Eterm *y)
 {
+    Uint xu;
     if (x >= 0) {
+        xu = x;
 	*y = make_pos_bignum_header(1);
     } else {
-	x = -x;
+        xu = -(Uint)x;
 	*y = make_neg_bignum_header(1);
     }
-    BIG_DIGIT(y, 0) = x;
+    BIG_DIGIT(y, 0) = xu;
     return make_big(y);
 }
 
@@ -1540,21 +1545,24 @@ Eterm erts_uint64_to_big(Uint64 x, Eterm **hpp)
 Eterm erts_sint64_to_big(Sint64 x, Eterm **hpp)
 {
     Eterm *hp = *hpp;
+    Uint64 ux;
     int neg;
-    if (x >= 0)
+    if (x >= 0) {
 	neg = 0;
+        ux = x;
+    }
     else {
 	neg = 1;
-	x = -x;
+	ux = -(Uint64)x;
     }
 #if defined(ARCH_32) || HALFWORD_HEAP
-    if (x >= (((Uint64) 1) << 32)) {
+    if (ux >= (((Uint64) 1) << 32)) {
 	if (neg)
 	    *hp = make_neg_bignum_header(2);
 	else
 	    *hp = make_pos_bignum_header(2);
-	BIG_DIGIT(hp, 0) = (Uint) (x & ((Uint) 0xffffffff));
-	BIG_DIGIT(hp, 1) = (Uint) ((x >> 32) & ((Uint) 0xffffffff));
+	BIG_DIGIT(hp, 0) = (Uint) (ux & ((Uint) 0xffffffff));
+	BIG_DIGIT(hp, 1) = (Uint) ((ux >> 32) & ((Uint) 0xffffffff));
 	*hpp += 3;
     }
     else
@@ -1564,7 +1572,7 @@ Eterm erts_sint64_to_big(Sint64 x, Eterm **hpp)
 	    *hp = make_neg_bignum_header(1);
 	else
 	    *hp = make_pos_bignum_header(1);
-	BIG_DIGIT(hp, 0) = (Uint) x;
+	BIG_DIGIT(hp, 0) = (Uint) ux;
 	*hpp += 2;
     }
     return make_big(hp);
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index d80111822e..da31876d75 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -101,7 +101,7 @@ typedef Uint  dsize_t;	 /* Vector size type */
 #define ERTS_SINT64_HEAP_SIZE(X)				\
   (IS_SSMALL((X))						\
    ? 0								\
-   : ERTS_UINT64_BIG_HEAP_SIZE__((X) >= 0 ? (X) : -(X)))
+   : ERTS_UINT64_BIG_HEAP_SIZE__((X) >= 0 ? (X) : -(Uint64)(X)))
 #define ERTS_UINT64_HEAP_SIZE(X)				\
   (IS_USMALL(0, (X)) ? 0 : ERTS_UINT64_BIG_HEAP_SIZE__((X)))
 
diff --git a/erts/emulator/beam/break.c b/erts/emulator/beam/break.c
index 7d4f52ee23..08265b590d 100644
--- a/erts/emulator/beam/break.c
+++ b/erts/emulator/beam/break.c
@@ -256,6 +256,7 @@ print_process_info(int to, void *to_arg, Process *p)
 		   p->current[1],
 		   p->current[2]);
     }
+    erts_print(to, to_arg, "Run queue: %d\n", erts_get_runq_proc(p)->ix);
 
     erts_print(to, to_arg, "Spawned by: %T\n", p->parent);
     approx_started = (time_t) p->approx_started;
diff --git a/erts/emulator/beam/erl_alloc.c b/erts/emulator/beam/erl_alloc.c
index 05ac24e04d..90cd227fae 100644
--- a/erts/emulator/beam/erl_alloc.c
+++ b/erts/emulator/beam/erl_alloc.c
@@ -1873,8 +1873,8 @@ erts_alc_fatal_error(int error, int func, ErtsAlcType_t n, ...)
 	size = va_arg(argp, Uint);
 	va_end(argp);
 	erl_exit(1,
-		 "%s: Cannot %s %lu bytes of memory (of type \"%s\").\n",
-		 allctr_str, op, size, t_str);
+		 "%s: Cannot %s %lu bytes of memory (of type \"%s\", thread %d).\n",
+		 allctr_str, op, size, t_str, ERTS_ALC_GET_THR_IX());
 	break;
     }
     case ERTS_ALC_E_NOALLCTR:
diff --git a/erts/emulator/beam/erl_alloc_util.c b/erts/emulator/beam/erl_alloc_util.c
index 45f0cc4312..a4e164bf51 100644
--- a/erts/emulator/beam/erl_alloc_util.c
+++ b/erts/emulator/beam/erl_alloc_util.c
@@ -3274,6 +3274,15 @@ create_carrier(Allctr_t *allctr, Uint umem_sz, UWord flags)
 
     ASSERT(!(flags & CFLG_FORCE_MSEG && flags & CFLG_FORCE_SYS_ALLOC));
 
+    if (umem_sz > (ERTS_UINT_MAX - ERTS_UINT_MAX/100)) {
+	/* Do an overly conservative _overflow_ check here so we don't
+	 * have to deal with it from here on. I guess we could be more accurate
+	 * but I don't think the need to allocate over 99% of the address space
+	 * will ever arise on any machine, neither 32 nor 64 bit.
+	 */
+	return NULL;
+    }
+
     blk_sz = UMEMSZ2BLKSZ(allctr, umem_sz);
 
 #ifdef ERTS_SMP
diff --git a/erts/emulator/beam/erl_bif_binary.c b/erts/emulator/beam/erl_bif_binary.c
index 7e0e825a0d..3bf78adce7 100644
--- a/erts/emulator/beam/erl_bif_binary.c
+++ b/erts/emulator/beam/erl_bif_binary.c
@@ -1324,9 +1324,9 @@ static int parse_match_opts_list(Eterm l, Eterm bin, Uint *posp, Uint *endp)
 		goto badarg;
 	    }
 	    if (len < 0) {
-		Sint lentmp = -len;
+		Uint lentmp = -(Uint)len;
 		/* overflow */
-		if (lentmp == len || lentmp < 0 || -lentmp != len) {
+		if ((Sint)lentmp < 0) {
 		    goto badarg;
 		}
 		len = lentmp;
@@ -1555,9 +1555,9 @@ BIF_RETTYPE erts_binary_part(Process *p, Eterm binary, Eterm epos, Eterm elen)
 	goto badarg;
     }
     if (len < 0) {
-	Sint lentmp = -len;
+	Uint lentmp = -(Uint)len;
 	/* overflow */
-	if (lentmp == len || lentmp < 0 || -lentmp != len) {
+	if ((Sint)lentmp < 0) {
 	    goto badarg;
 	}
 	len = lentmp;
@@ -1644,9 +1644,9 @@ BIF_RETTYPE erts_gc_binary_part(Process *p, Eterm *reg, Eterm live, int range_is
 	goto badarg;
     }
     if (len < 0) {
-	Sint lentmp = -len;
+	Uint lentmp = -(Uint)len;
 	/* overflow */
-	if (lentmp == len || lentmp < 0 || -lentmp != len) {
+	if ((Sint)lentmp < 0) {
 	    goto badarg;
 	}
 	len = lentmp;
@@ -2213,9 +2213,9 @@ static BIF_RETTYPE binary_bin_to_list_common(Process *p,
 	goto badarg;
     }
     if (len < 0) {
-	Sint lentmp = -len;
+	Uint lentmp = -(Uint)len;
 	/* overflow */
-	if (lentmp == len || lentmp < 0 || -lentmp != len) {
+	if ((Sint)lentmp < 0) {
 	    goto badarg;
 	}
 	len = lentmp;
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 4d5e55aaf5..6efe9d9550 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -3055,6 +3055,25 @@ fun_info_2(BIF_ALIST_2)
     return TUPLE2(hp, what, val);
 }
 
+BIF_RETTYPE
+fun_info_mfa_1(BIF_ALIST_1)
+{
+    Process* p = BIF_P;
+    Eterm fun = BIF_ARG_1;
+    Eterm* hp;
+
+    if (is_fun(fun)) {
+	ErlFunThing* funp = (ErlFunThing *) fun_val(fun);
+	hp = HAlloc(p, 4);
+	BIF_RET(TUPLE3(hp,funp->fe->module,funp->fe->address[-2],make_small(funp->arity)));
+    } else if (is_export(fun)) {
+	Export* exp = (Export *) ((UWord) (export_val(fun))[1]);
+	hp = HAlloc(p, 4);
+	BIF_RET(TUPLE3(hp,exp->code[0],exp->code[1],make_small(exp->code[2])));
+    }
+    BIF_ERROR(p, BADARG);
+}
+
 BIF_RETTYPE is_process_alive_1(BIF_ALIST_1) 
 {
    if(is_internal_pid(BIF_ARG_1)) {
@@ -3856,16 +3875,19 @@ static Eterm lcnt_build_lock_stats_term(Eterm **hpp, Uint *szp, erts_lcnt_lock_s
     Uint tries = 0, colls = 0;
     unsigned long timer_s = 0, timer_ns = 0, timer_n = 0;
     unsigned int  line = 0;
+    unsigned int  i;
     
     Eterm af, uil;
     Eterm uit, uic;
     Eterm uits, uitns, uitn;
     Eterm tt, tstat, tloc, t;
+    Eterm thist, vhist[ERTS_LCNT_HISTOGRAM_SLOT_SIZE];
 	
     /* term:
-     * [{{file, line}, {tries, colls, {seconds, nanoseconds, n_blocks}}}]
+     * [{{file, line}, {tries, colls, {seconds, nanoseconds, n_blocks}},
+     *   { .. histogram .. }]
      */
-    
+
     tries = (Uint) ethr_atomic_read(&stats->tries);
     colls = (Uint) ethr_atomic_read(&stats->colls);
    
@@ -3874,23 +3896,27 @@ static Eterm lcnt_build_lock_stats_term(Eterm **hpp, Uint *szp, erts_lcnt_lock_s
     timer_ns = stats->timer.ns;
     timer_n  = stats->timer_n;
    
-    af    = erts_atom_put(stats->file, strlen(stats->file), ERTS_ATOM_ENC_LATIN1, 1); 
+    af    = erts_atom_put((byte *)stats->file, strlen(stats->file), ERTS_ATOM_ENC_LATIN1, 1);
     uil   = erts_bld_uint( hpp, szp, line);
     tloc  = erts_bld_tuple(hpp, szp, 2, af, uil);
     
-    uit   = erts_bld_uint( hpp, szp, tries);             
-    uic   = erts_bld_uint( hpp, szp, colls);             
-    
+    uit   = erts_bld_uint( hpp, szp, tries);
+    uic   = erts_bld_uint( hpp, szp, colls);
+
     uits  = erts_bld_uint( hpp, szp, timer_s);
     uitns = erts_bld_uint( hpp, szp, timer_ns);
     uitn  = erts_bld_uint( hpp, szp, timer_n);
     tt    = erts_bld_tuple(hpp, szp, 3, uits, uitns, uitn);
 
     tstat = erts_bld_tuple(hpp, szp, 3, uit, uic, tt);
-    
-    t     = erts_bld_tuple(hpp, szp, 2, tloc, tstat);
-    
-    res   = erts_bld_cons( hpp, szp, t, res);
+
+    for(i = 0; i < ERTS_LCNT_HISTOGRAM_SLOT_SIZE; i++) {
+	vhist[i] = erts_bld_uint(hpp, szp, stats->hist.ns[i]);
+    }
+    thist  = erts_bld_tuplev(hpp, szp, ERTS_LCNT_HISTOGRAM_SLOT_SIZE, vhist);
+
+    t   = erts_bld_tuple(hpp, szp, 3, tloc, tstat, thist);
+    res = erts_bld_cons( hpp, szp, t, res);
 
     return res;
 }
@@ -3911,13 +3937,13 @@ static Eterm lcnt_build_lock_term(Eterm **hpp, Uint *szp, erts_lcnt_lock_t *lock
     
     ASSERT(ltype);
     
-    type  = erts_atom_put(ltype, strlen(ltype), ERTS_ATOM_ENC_LATIN1, 1);           
-    name  = erts_atom_put(lock->name, strlen(lock->name), ERTS_ATOM_ENC_LATIN1, 1); 
+    type  = erts_atom_put((byte *)ltype, strlen(ltype), ERTS_ATOM_ENC_LATIN1, 1);
+    name  = erts_atom_put((byte *)lock->name, strlen(lock->name), ERTS_ATOM_ENC_LATIN1, 1);
 
     if (lock->flag & ERTS_LCNT_LT_ALLOC) {
 	/* use allocator types names as id's for allocator locks */
 	ltype = (char *) ERTS_ALC_A2AD(signed_val(lock->id));
-	id    = erts_atom_put(ltype, strlen(ltype), ERTS_ATOM_ENC_LATIN1, 1);
+	id    = erts_atom_put((byte *)ltype, strlen(ltype), ERTS_ATOM_ENC_LATIN1, 1);
     } else if (lock->flag & ERTS_LCNT_LT_PROCLOCK) {
 	/* use registered names as id's for process locks if available */
 	proc  = erts_proc_lookup(lock->id);
@@ -3928,16 +3954,15 @@ static Eterm lcnt_build_lock_term(Eterm **hpp, Uint *szp, erts_lcnt_lock_t *lock
 	    id = lock->id;
 	}
     } else {
-	id    = lock->id;                                    
+	id = lock->id;
     }
-    
+
     for (i = 0; i < lock->n_stats; i++) {
 	stats = lcnt_build_lock_stats_term(hpp, szp, &(lock->stats[i]), stats);
     }
-	
-    t     = erts_bld_tuple(hpp, szp, 4, name, id, type, stats);
-    
-    res   = erts_bld_cons( hpp, szp, t, res);          
+
+    t   = erts_bld_tuple(hpp, szp, 4, name, id, type, stats);
+    res = erts_bld_cons( hpp, szp, t, res);
 
     return res;
 }
@@ -3957,12 +3982,12 @@ static Eterm lcnt_build_result_term(Eterm **hpp, Uint *szp, erts_lcnt_data_t *da
     dtns = erts_bld_uint( hpp, szp, data->duration.ns);
     tdt  = erts_bld_tuple(hpp, szp, 2, dts, dtns);
     
-    adur = erts_atom_put(str_duration, strlen(str_duration), ERTS_ATOM_ENC_LATIN1, 1);
+    adur = erts_atom_put((byte *)str_duration, strlen(str_duration), ERTS_ATOM_ENC_LATIN1, 1);
     tdur = erts_bld_tuple(hpp, szp, 2, adur, tdt);
    
     /* lock tuple */
     
-    aloc = erts_atom_put(str_locks, strlen(str_locks), ERTS_ATOM_ENC_LATIN1, 1);
+    aloc = erts_atom_put((byte *)str_locks, strlen(str_locks), ERTS_ATOM_ENC_LATIN1, 1);
     	
     for (lock = data->current_locks->head; lock != NULL ; lock = lock->next ) {
 	lloc = lcnt_build_lock_term(hpp, szp, lock, lloc);
diff --git a/erts/emulator/beam/erl_binary.h b/erts/emulator/beam/erl_binary.h
index 6c9f53ce87..06dfeb1260 100644
--- a/erts/emulator/beam/erl_binary.h
+++ b/erts/emulator/beam/erl_binary.h
@@ -236,6 +236,8 @@ erts_bin_drv_alloc_fnf(Uint size)
 {
     Uint bsize = ERTS_SIZEOF_Binary(size) + CHICKEN_PAD;
     void *res;
+    if (bsize < size) /* overflow */
+	return NULL;
     res = erts_alloc_fnf(ERTS_ALC_T_DRV_BINARY, bsize);
     ERTS_CHK_BIN_ALIGNMENT(res);
     return (Binary *) res;
@@ -246,6 +248,8 @@ erts_bin_drv_alloc(Uint size)
 {
     Uint bsize = ERTS_SIZEOF_Binary(size) + CHICKEN_PAD;
     void *res;
+    if (bsize < size) /* overflow */
+	erts_alloc_enomem(ERTS_ALC_T_DRV_BINARY, size);
     res = erts_alloc(ERTS_ALC_T_DRV_BINARY, bsize);
     ERTS_CHK_BIN_ALIGNMENT(res);
     return (Binary *) res;
@@ -257,6 +261,8 @@ erts_bin_nrml_alloc(Uint size)
 {
     Uint bsize = ERTS_SIZEOF_Binary(size) + CHICKEN_PAD;
     void *res;
+    if (bsize < size) /* overflow */
+	erts_alloc_enomem(ERTS_ALC_T_BINARY, size);
     res = erts_alloc(ERTS_ALC_T_BINARY, bsize);
     ERTS_CHK_BIN_ALIGNMENT(res);
     return (Binary *) res;
@@ -267,11 +273,12 @@ erts_bin_realloc_fnf(Binary *bp, Uint size)
 {
     Binary *nbp;
     Uint bsize = ERTS_SIZEOF_Binary(size) + CHICKEN_PAD;
+    ErtsAlcType_t type = (bp->flags & BIN_FLAG_DRV) ? ERTS_ALC_T_DRV_BINARY
+	                                            : ERTS_ALC_T_BINARY;
     ASSERT((bp->flags & BIN_FLAG_MAGIC) == 0);
-    if (bp->flags & BIN_FLAG_DRV)
-	nbp = erts_realloc_fnf(ERTS_ALC_T_DRV_BINARY, (void *) bp, bsize);
-    else
-	nbp = erts_realloc_fnf(ERTS_ALC_T_BINARY, (void *) bp, bsize);
+    if (bsize < size) /* overflow */
+	return NULL;
+    nbp = erts_realloc_fnf(type, (void *) bp, bsize);
     ERTS_CHK_BIN_ALIGNMENT(nbp);
     return nbp;
 }
@@ -281,17 +288,14 @@ erts_bin_realloc(Binary *bp, Uint size)
 {
     Binary *nbp;
     Uint bsize = ERTS_SIZEOF_Binary(size) + CHICKEN_PAD;
+    ErtsAlcType_t type = (bp->flags & BIN_FLAG_DRV) ? ERTS_ALC_T_DRV_BINARY
+	                                            : ERTS_ALC_T_BINARY;
     ASSERT((bp->flags & BIN_FLAG_MAGIC) == 0);
-    if (bp->flags & BIN_FLAG_DRV)
-	nbp = erts_realloc_fnf(ERTS_ALC_T_DRV_BINARY, (void *) bp, bsize);
-    else
-	nbp = erts_realloc_fnf(ERTS_ALC_T_BINARY, (void *) bp, bsize);
+    if (bsize < size) /* overflow */
+	erts_realloc_enomem(type, bp, size);
+    nbp = erts_realloc_fnf(type, (void *) bp, bsize);
     if (!nbp)
-	erts_realloc_n_enomem(ERTS_ALC_T2N(bp->flags & BIN_FLAG_DRV
-					   ? ERTS_ALC_T_DRV_BINARY
-					   : ERTS_ALC_T_BINARY),
-			      bp,
-			      bsize);
+	erts_realloc_enomem(type, bp, bsize);
     ERTS_CHK_BIN_ALIGNMENT(nbp);
     return nbp;
 }
@@ -312,6 +316,7 @@ erts_create_magic_binary(Uint size, void (*destructor)(Binary *))
 {
     Uint bsize = ERTS_MAGIC_BIN_SIZE(size);
     Binary* bptr = erts_alloc_fnf(ERTS_ALC_T_BINARY, bsize);
+    ASSERT(bsize > size);
     if (!bptr)
 	erts_alloc_n_enomem(ERTS_ALC_T2N(ERTS_ALC_T_BINARY), bsize);
     ERTS_CHK_BIN_ALIGNMENT(bptr);
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index aa15d2cc57..0db42d4325 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -2018,6 +2018,20 @@ setup_rootset(Process *p, Eterm *objv, int nobj, Rootset *rootset)
 	roots[n].sz = 1;
 	n++;
     }
+
+    /*
+     * If a NIF has saved arguments, they need to be added
+     */
+    if (ERTS_PROC_GET_NIF_TRAP_EXPORT(p)) {
+	Eterm* argv;
+	int argc;
+	if (erts_setup_nif_gc(p, &argv, &argc)) {
+	    roots[n].v = argv;
+	    roots[n].sz = argc;
+	    n++;
+	}
+    }
+
     ASSERT(n <= rootset->size);
 
     mp = p->msg.first;
diff --git a/erts/emulator/beam/erl_init.c b/erts/emulator/beam/erl_init.c
index 5e6d812242..88c4006934 100644
--- a/erts/emulator/beam/erl_init.c
+++ b/erts/emulator/beam/erl_init.c
@@ -2066,8 +2066,10 @@ erl_exit_vv(int n, int flush_async, char *fmt, va_list args1, va_list args2)
     system_cleanup(flush_async);
 
     save_statistics();
-
-    an = abs(n);
+    if (n < 0)
+        an = -(unsigned int)n;
+    else
+        an = n;
 
     if (erts_mtrace_enabled)
 	erts_mtrace_exit((Uint32) an);
diff --git a/erts/emulator/beam/erl_lock_check.c b/erts/emulator/beam/erl_lock_check.c
index c13eb87012..b105ece6f1 100644
--- a/erts/emulator/beam/erl_lock_check.c
+++ b/erts/emulator/beam/erl_lock_check.c
@@ -139,7 +139,6 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"drv_tsd",				NULL			},
     {	"async_enq_mtx",			NULL			},
 #ifdef ERTS_SMP
-    {	"sys_msg_q", 				NULL			},
     {	"atom_tab",				NULL			},
     {	"make_ref",				NULL			},
     {	"misc_op_list_pre_alloc_lock",		"address"		},
@@ -148,6 +147,7 @@ static erts_lc_lock_order_t erts_lock_order[] = {
     {	"btm_pre_alloc_lock",			NULL,			},
     {	"dist_entry_out_queue",			"address"		},
     {	"port_sched_lock",			"port_id"		},
+    {	"sys_msg_q", 				NULL			},
     {   "port_table",                           NULL                    },
 #endif
     {	"mtrace_op",				NULL			},
@@ -227,8 +227,7 @@ rw_op_str(Uint16 flags)
     case ERTS_LC_FLG_LO_READ:
 	return " (r)";
     case ERTS_LC_FLG_LO_WRITE:
-	erts_fprintf(stderr, "\nInternal error\n");
-	lc_abort();
+	ERTS_INTERNAL_ERROR("Only write flag present");
     default:
 	break;
     }
@@ -311,8 +310,7 @@ static ERTS_INLINE void lc_free(void *p)
 static void *lc_core_alloc(void)
 {
     lc_unlock();
-    erts_fprintf(stderr, "Lock checker out of memory!\n");
-    lc_abort();
+    ERTS_INTERNAL_ERROR("Lock checker out of memory!\n");
 }
 
 #else
@@ -325,8 +323,7 @@ static void *lc_core_alloc(void)
     fbs = (erts_lc_free_block_t *) malloc(sizeof(erts_lc_free_block_t)
 					  * ERTS_LC_FB_CHUNK_SIZE);
     if (!fbs) {
-	erts_fprintf(stderr, "Lock checker failed to allocate memory!\n");
-	lc_abort();
+        ERTS_INTERNAL_ERROR("Lock checker failed to allocate memory!");
     }
     for (i = 1; i < ERTS_LC_FB_CHUNK_SIZE - 1; i++) {
 #ifdef DEBUG
@@ -366,11 +363,11 @@ create_locked_locks(char *thread_name)
 {
     erts_lc_locked_locks_t *l_lcks = malloc(sizeof(erts_lc_locked_locks_t));
     if (!l_lcks)
-	lc_abort();
+	ERTS_INTERNAL_ERROR("Lock checker failed to allocate memory!");
 
     l_lcks->thread_name = strdup(thread_name ? thread_name : "unknown");
     if (!l_lcks->thread_name)
-	lc_abort();
+	ERTS_INTERNAL_ERROR("Lock checker failed to allocate memory!");
 
     l_lcks->emu_thread = 0;
     l_lcks->tid = erts_thr_self();
@@ -691,7 +688,7 @@ erts_lc_set_thread_name(char *thread_name)
 	free((void *) l_lcks->thread_name);
 	l_lcks->thread_name = strdup(thread_name ? thread_name : "unknown");
 	if (!l_lcks->thread_name)
-	    lc_abort();
+	    ERTS_INTERNAL_ERROR("strdup failed");
     }
     l_lcks->emu_thread = 1;
 }
@@ -1330,7 +1327,7 @@ erts_lc_init(void)
 #endif /* #ifdef ERTS_LC_STATIC_ALLOC */
 
     if (ethr_spinlock_init(&free_blocks_lock) != 0)
-	lc_abort();
+	ERTS_INTERNAL_ERROR("spinlock_init failed");
 
     erts_tsd_key_create(&locks_key,"erts_lock_check_key");
 }
diff --git a/erts/emulator/beam/erl_lock_count.c b/erts/emulator/beam/erl_lock_count.c
index 6f44bf097b..cf6996ea06 100644
--- a/erts/emulator/beam/erl_lock_count.c
+++ b/erts/emulator/beam/erl_lock_count.c
@@ -61,6 +61,25 @@ static ERTS_INLINE void lcnt_unlock(void) {
     ethr_mutex_unlock(&lcnt_data_lock); 
 }
 
+const int log2_tab64[64] = {
+    63,  0, 58,  1, 59, 47, 53,  2,
+    60, 39, 48, 27, 54, 33, 42,  3,
+    61, 51, 37, 40, 49, 18, 28, 20,
+    55, 30, 34, 11, 43, 14, 22,  4,
+    62, 57, 46, 52, 38, 26, 32, 41,
+    50, 36, 17, 19, 29, 10, 13, 21,
+    56, 45, 25, 31, 35, 16,  9, 12,
+    44, 24, 15,  8, 23,  7,  6,  5};
+
+static ERTS_INLINE int lcnt_log2(Uint64 v) {
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    return log2_tab64[((Uint64)((v - (v >> 1))*0x07EDD5E59A4E28C2)) >> 58];
+}
 
 static char* lcnt_lock_type(Uint16 flag) {
     switch(flag & ERTS_LCNT_LT_ALL) {
@@ -81,19 +100,20 @@ static void lcnt_clear_stats(erts_lcnt_lock_stats_t *stats) {
     stats->timer_n  = 0;
     stats->file     = (char *)str_undefined;
     stats->line     = 0;
+    sys_memzero(stats->hist.ns, sizeof(stats->hist.ns));
 }
 
 static void lcnt_time(erts_lcnt_time_t *time) {
-#ifdef HAVE_GETHRTIME
+#if 0 || defined(HAVE_GETHRTIME)
     SysHrTime hr_time;
     hr_time  = sys_gethrtime();
     time->s  = (unsigned long)(hr_time / 1000000000LL);
     time->ns = (unsigned long)(hr_time - 1000000000LL*time->s);
-#else    
-    SysTimeval tv;
-    sys_gettimeofday(&tv);
-    time->s  = tv.tv_sec;
-    time->ns = tv.tv_usec*1000LL;
+#else
+  SysTimeval tv;
+  sys_gettimeofday(&tv);
+  time->s  = tv.tv_sec;
+  time->ns = tv.tv_usec*1000LL;
 #endif
 }
 
@@ -111,28 +131,29 @@ static void lcnt_time_diff(erts_lcnt_time_t *d, erts_lcnt_time_t *t1, erts_lcnt_
 	dns += 1000000000LL;
     }
 
+    ASSERT(ds >= 0);
+
     d->s  = ds;
     d->ns = dns;
 }
 
-/* difference d must be positive */
+/* difference d must be non-negative */
 
 static void lcnt_time_add(erts_lcnt_time_t *t, erts_lcnt_time_t *d) {
-    unsigned long ngns = 0;
-    
     t->s  += d->s;
     t->ns += d->ns;
 
-    ngns   = t->ns / 1000000000LL;
+    t->s  += t->ns / 1000000000LL;
     t->ns  = t->ns % 1000000000LL;
-    
-    t->s  += ngns;
 }
 
 static erts_lcnt_thread_data_t *lcnt_thread_data_alloc(void) {
     erts_lcnt_thread_data_t *eltd;
    
     eltd = (erts_lcnt_thread_data_t*)malloc(sizeof(erts_lcnt_thread_data_t));
+    if (!eltd) {
+        ERTS_INTERNAL_ERROR("Lock counter failed to allocate memory!");
+    }
     eltd->timer_set = 0;
     eltd->lock_in_conflict = 0;
 
@@ -158,59 +179,64 @@ static char* lock_opt(Uint16 flag) {
     return "--";
 }
 
-static void print_lock_x(erts_lcnt_lock_t *lock, Uint16 flag, char *action, char *extra) {
-    erts_aint_t colls, tries, w_state, r_state;
-    erts_lcnt_lock_stats_t *stats = NULL;
-    
+static void print_lock_x(erts_lcnt_lock_t *lock, Uint16 flag, char *action) {
+    erts_aint_t w_state, r_state;
     char *type;
-    int i;
-    
+
+    if (strcmp(lock->name, "run_queue") != 0) return;
     type = lcnt_lock_type(lock->flag);
     r_state = ethr_atomic_read(&lock->r_state);
     w_state = ethr_atomic_read(&lock->w_state);
-
     
     if (lock->flag & flag) {
-        erts_printf("%20s [%30s] [r/w state %4ld/%4ld] id %T %s\r\n", 
-		action, 
-		lock->name, 
-		r_state, 
-		w_state, 
-		lock->id, 
-		extra);
+        erts_fprintf(stderr,"%10s [%24s] [r/w state %4ld/%4ld] %2s id %T\r\n",
+		action,
+		lock->name,
+		r_state,
+		w_state,
+		type,
+		lock->id);
     }
 }
-
-static void print_lock(erts_lcnt_lock_t *lock, char *action) {
-    if (strcmp(lock->name, "proc_main") == 0) {
-        print_lock_x(lock, ERTS_LCNT_LT_ALL, action, "");
-    }
-}
-
 #endif
 
 static erts_lcnt_lock_stats_t *lcnt_get_lock_stats(erts_lcnt_lock_t *lock, char *file, unsigned int line) {
     unsigned int i;
     erts_lcnt_lock_stats_t *stats = NULL;
-    
-    for (i = 0; i < lock->n_stats; i++) {
-	if ((lock->stats[i].file == file) && (lock->stats[i].line == line)) {
-	    return &(lock->stats[i]);
-	}
-    }
-    if (lock->n_stats < ERTS_LCNT_MAX_LOCK_LOCATIONS) {
-	stats = &lock->stats[lock->n_stats];
-	lock->n_stats++;
 
-	stats->file = file;
-	stats->line = line;
-	return stats;
+    if (erts_lcnt_rt_options & ERTS_LCNT_OPT_LOCATION) {
+	for (i = 0; i < lock->n_stats; i++) {
+	    if ((lock->stats[i].file == file) && (lock->stats[i].line == line)) {
+		return &(lock->stats[i]);
+	    }
+	}
+	if (lock->n_stats < ERTS_LCNT_MAX_LOCK_LOCATIONS) {
+	    stats = &lock->stats[lock->n_stats];
+	    lock->n_stats++;
+	    stats->file = file;
+	    stats->line = line;
+	    return stats;
+	}
     }
     return &lock->stats[0];
+}
 
+static void lcnt_update_stats_hist(erts_lcnt_hist_t *hist, erts_lcnt_time_t *time_wait) {
+    int idx;
+    unsigned long r;
+
+    if (time_wait->s > 0 || time_wait->ns > ERTS_LCNT_HISTOGRAM_MAX_NS) {
+	idx = ERTS_LCNT_HISTOGRAM_SLOT_SIZE - 1;
+    } else {
+	r = time_wait->ns >> ERTS_LCNT_HISTOGRAM_RSHIFT;
+	if (r) idx = lcnt_log2(r);
+	else idx = 0;
+    }
+    hist->ns[idx]++;
 }
 
-static void lcnt_update_stats(erts_lcnt_lock_stats_t *stats, int lock_in_conflict, erts_lcnt_time_t *time_wait) {
+static void lcnt_update_stats(erts_lcnt_lock_stats_t *stats, int lock_in_conflict,
+	                      erts_lcnt_time_t *time_wait) {
     
     ethr_atomic_inc(&stats->tries);
 
@@ -220,6 +246,7 @@ static void lcnt_update_stats(erts_lcnt_lock_stats_t *stats, int lock_in_conflic
     if (time_wait) {
 	lcnt_time_add(&(stats->timer), time_wait);
 	stats->timer_n++;
+	lcnt_update_stats_hist(&stats->hist,time_wait);
     }
 }
 
@@ -248,6 +275,9 @@ void erts_lcnt_init() {
     
     /* init lcnt structure */
     erts_lcnt_data = (erts_lcnt_data_t*)malloc(sizeof(erts_lcnt_data_t));
+    if (!erts_lcnt_data) {
+        ERTS_INTERNAL_ERROR("Lock counter failed to allocate memory!");
+    }
     erts_lcnt_data->current_locks = erts_lcnt_list_init();
     erts_lcnt_data->deleted_locks = erts_lcnt_list_init();
 
@@ -269,6 +299,9 @@ erts_lcnt_lock_list_t *erts_lcnt_list_init(void) {
     erts_lcnt_lock_list_t *list;
     
     list = (erts_lcnt_lock_list_t*)malloc(sizeof(erts_lcnt_lock_list_t));
+    if (!list) {
+        ERTS_INTERNAL_ERROR("Lock counter failed to allocate memory!");
+    }
     list->head = NULL;
     list->tail = NULL;
     list->n    = 0;
@@ -330,8 +363,9 @@ void erts_lcnt_list_delete(erts_lcnt_lock_list_t *list, erts_lcnt_lock_t *lock)
 /* interface to erl_threads.h */
 /* only lock on init and destroy, all others should use atomics */
 void erts_lcnt_init_lock(erts_lcnt_lock_t *lock, char *name, Uint16 flag ) { 
-    erts_lcnt_init_lock_x(lock, name, flag, am_undefined);
+    erts_lcnt_init_lock_x(lock, name, flag, NIL);
 }
+
 void erts_lcnt_init_lock_x(erts_lcnt_lock_t *lock, char *name, Uint16 flag, Eterm id) { 
     int i;
     if (!name) {
@@ -360,7 +394,6 @@ void erts_lcnt_init_lock_x(erts_lcnt_lock_t *lock, char *name, Uint16 flag, Eter
     }
 
     erts_lcnt_list_insert(erts_lcnt_data->current_locks, lock);
-    
     lcnt_unlock();
 }
 
@@ -375,6 +408,9 @@ void erts_lcnt_destroy_lock(erts_lcnt_lock_t *lock) {
 	/* copy structure and insert the copy */
 
 	deleted_lock = (erts_lcnt_lock_t*)malloc(sizeof(erts_lcnt_lock_t));
+        if (!deleted_lock) {
+            ERTS_INTERNAL_ERROR("Lock counter failed to allocate memory!");
+        }
 	memcpy(deleted_lock, lock, sizeof(erts_lcnt_lock_t));
 
 	deleted_lock->next = NULL;
@@ -417,8 +453,9 @@ void erts_lcnt_lock_opt(erts_lcnt_lock_t *lock, Uint16 option) {
     
     if ((w_state > 0) || (r_state > 0)) {
 	eltd->lock_in_conflict = 1;
-	if (eltd->timer_set == 0)
+	if (eltd->timer_set == 0) {
 	    lcnt_time(&eltd->timer);
+	}
 	eltd->timer_set++;
     } else {
 	eltd->lock_in_conflict = 0;
@@ -433,7 +470,7 @@ void erts_lcnt_lock(erts_lcnt_lock_t *lock) {
     if (!ERTS_LCNT_LOCK_TYPE(lock)) return;
 
     w_state = ethr_atomic_read(&lock->w_state);
-    ethr_atomic_inc( &lock->w_state);
+    ethr_atomic_inc(&lock->w_state);
 
     eltd = lcnt_get_thread_data();
 
@@ -446,10 +483,10 @@ void erts_lcnt_lock(erts_lcnt_lock_t *lock) {
 	 * 'atomicly'. All other locks will block the thread if w_state > 0
 	 * i.e. locked.
 	 */
-	if (eltd->timer_set == 0)
+	if (eltd->timer_set == 0) {
 	    lcnt_time(&eltd->timer);
+	}
 	eltd->timer_set++;
-
     } else {
 	eltd->lock_in_conflict = 0;
     }
@@ -459,11 +496,10 @@ void erts_lcnt_lock(erts_lcnt_lock_t *lock) {
 
 void erts_lcnt_lock_unaquire(erts_lcnt_lock_t *lock) {
     /* should check if this thread was "waiting" */
-    
     if (erts_lcnt_rt_options & ERTS_LCNT_OPT_SUSPEND) return;
     if (!ERTS_LCNT_LOCK_TYPE(lock)) return;
 
-    ethr_atomic_dec( &lock->w_state);
+    ethr_atomic_dec(&lock->w_state);
 }
 
 /* erts_lcnt_lock_post
@@ -491,7 +527,7 @@ void erts_lcnt_lock_post_x(erts_lcnt_lock_t *lock, char *file, unsigned int line
     if (!(lock->flag & (ERTS_LCNT_LT_RWMUTEX | ERTS_LCNT_LT_RWSPINLOCK))) {
 	flowstate = ethr_atomic_read(&lock->flowstate);
 	ASSERT(flowstate == 0);
-    	ethr_atomic_inc( &lock->flowstate);
+	ethr_atomic_inc(&lock->flowstate);
     }
 #endif
     
@@ -500,19 +536,12 @@ void erts_lcnt_lock_post_x(erts_lcnt_lock_t *lock, char *file, unsigned int line
     ASSERT(eltd);
 
     /* if lock was in conflict, time it */
-    
-    if (erts_lcnt_rt_options & ERTS_LCNT_OPT_LOCATION) {
-	stats = lcnt_get_lock_stats(lock, file, line);
-    } else {
-	stats = &lock->stats[0];
-    }
-
+    stats = lcnt_get_lock_stats(lock, file, line);
     if (eltd->timer_set) {
 	lcnt_time(&timer);
 	
 	lcnt_time_diff(&time_wait, &timer, &(eltd->timer));
 	lcnt_update_stats(stats, eltd->lock_in_conflict, &time_wait);
-	
 	eltd->timer_set--;
 	ASSERT(eltd->timer_set >= 0);
     } else {
@@ -541,11 +570,11 @@ void erts_lcnt_unlock(erts_lcnt_lock_t *lock) {
     /* flowstate */
     flowstate = ethr_atomic_read(&lock->flowstate);
     ASSERT(flowstate == 1);
-    ethr_atomic_dec( &lock->flowstate);
+    ethr_atomic_dec(&lock->flowstate);
     
     /* write state */
     w_state = ethr_atomic_read(&lock->w_state);
-    ASSERT(w_state > 0)
+    ASSERT(w_state > 0);
 #endif
     ethr_atomic_dec(&lock->w_state);
 }
@@ -582,9 +611,7 @@ void erts_lcnt_trylock(erts_lcnt_lock_t *lock, int res) {
     	ethr_atomic_inc( &lock->flowstate);
 #endif
 	ethr_atomic_inc(&lock->w_state);
-	
 	lcnt_update_stats(&(lock->stats[0]), 0, NULL);
-
     } else {
         ethr_atomic_inc(&lock->stats[0].tries);
         ethr_atomic_inc(&lock->stats[0].colls);
diff --git a/erts/emulator/beam/erl_lock_count.h b/erts/emulator/beam/erl_lock_count.h
index 75f7cd028b..ffbb93da1b 100644
--- a/erts/emulator/beam/erl_lock_count.h
+++ b/erts/emulator/beam/erl_lock_count.h
@@ -35,6 +35,10 @@
  * 	| | | - collisions (including trylock busy)
  * 	| | | - timer (time spent in waiting for lock)
  * 	| | | - n_timer (collisions excluding trylock busy)
+ * 	| | | - histogram
+ * 	| | | | - # 0 = log2(lock wait_time ns)
+ * 	| | | | - ...
+ * 	| | | | - # n = log2(lock wait_time ns)
  *
  * 	Each instance of a lock is the unique lock, i.e. set and id in that set.
  * 	For each lock there is a set of statistics with where and what impact
@@ -68,8 +72,17 @@
 
 #include "ethread.h"
 
+#define ERTS_LCNT_MAX_LOCK_LOCATIONS  (10)
 
-#define ERTS_LCNT_MAX_LOCK_LOCATIONS (10)
+/* histogram */
+#define ERTS_LCNT_HISTOGRAM_MAX_NS    (((unsigned long)1LL << 28) - 1)
+#if 0 || defined(HAVE_GETHRTIME)
+#define ERTS_LCNT_HISTOGRAM_SLOT_SIZE (30)
+#define ERTS_LCNT_HISTOGRAM_RSHIFT    (0)
+#else
+#define ERTS_LCNT_HISTOGRAM_SLOT_SIZE (20)
+#define ERTS_LCNT_HISTOGRAM_RSHIFT    (10)
+#endif
 
 #define ERTS_LCNT_LT_SPINLOCK   (((Uint16) 1) << 0)
 #define ERTS_LCNT_LT_RWSPINLOCK (((Uint16) 1) << 1)
@@ -104,6 +117,10 @@ typedef struct {
     
 extern erts_lcnt_time_t timer_start;
 
+typedef struct {
+   Uint32 ns[ERTS_LCNT_HISTOGRAM_SLOT_SIZE]; /* log2 array of nano seconds occurences */
+} erts_lcnt_hist_t;
+
 typedef struct erts_lcnt_lock_stats_s {
     /* "tries" and "colls" needs to be atomic since
      * trylock busy does not aquire a lock and there
@@ -118,6 +135,7 @@ typedef struct erts_lcnt_lock_stats_s {
     
     unsigned long timer_n;    /* #times waited for lock */
     erts_lcnt_time_t timer;   /* total wait time for lock */
+    erts_lcnt_hist_t hist;
 } erts_lcnt_lock_stats_t;
 
 /* rw locks uses both states, other locks only uses w_state */
diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c
index 59a677a12c..8870fac7d9 100644
--- a/erts/emulator/beam/erl_message.c
+++ b/erts/emulator/beam/erl_message.c
@@ -415,7 +415,13 @@ erts_queue_dist_message(Process *rcvr,
 	if (!(*rcvr_locks & ERTS_PROC_LOCK_MSGQ))
 	    erts_smp_proc_unlock(rcvr, ERTS_PROC_LOCK_MSGQ);
 
-	erts_proc_notify_new_message(rcvr);
+	erts_proc_notify_new_message(rcvr,
+#ifdef ERTS_SMP
+				     *rcvr_locks
+#else
+				     0
+#endif
+	    );
     }
 }
 
@@ -542,7 +548,13 @@ queue_message(Process *c_p,
     if (locked_msgq)
 	erts_smp_proc_unlock(receiver, ERTS_PROC_LOCK_MSGQ);
 
-    erts_proc_notify_new_message(receiver);
+    erts_proc_notify_new_message(receiver,
+#ifdef ERTS_SMP
+				 *receiver_locks
+#else
+				 0
+#endif
+	);
 
 #ifndef ERTS_SMP
     ERTS_HOLE_CHECK(receiver);
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index ff551ea3af..1414744763 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -1513,72 +1513,251 @@ int enif_consume_timeslice(ErlNifEnv* env, int percent)
     return ERTS_BIF_REDS_LEFT(env->proc) == 0;
 }
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-
-/* NIFs exports need one more item than the Export struct provides, the
- * erl_module_nif*, so the DirtyNifExport below adds that. The Export
- * member must be first in the struct.
+/*
+ * NIF exports need a few more items than the Export struct provides,
+ * including the erl_module_nif* and a NIF function pointer, so the
+ * NifExport below adds those. The Export member must be first in the
+ * struct. The saved_mfa, saved_argc, nif_level, alloced_argv_sz and argv
+ * members are used to track the MFA and arguments of the top NIF in case a
+ * chain of one or more enif_schedule_nif() calls results in an exception,
+ * since in that case the original MFA and registers have to be restored
+ * before returning to Erlang to ensure stacktrace information associated
+ * with the exception is correct.
  */
+typedef ERL_NIF_TERM (*NativeFunPtr)(ErlNifEnv*, int, const ERL_NIF_TERM[]);
+
 typedef struct {
     Export exp;
     struct erl_module_nif* m;
-} DirtyNifExport;
+    NativeFunPtr fp;
+    Eterm saved_mfa[3];
+    int saved_argc;
+    int alloced_argv_sz;
+    Eterm argv[1];
+} NifExport;
 
-static void
-alloc_proc_psd(Process* proc, DirtyNifExport **ep)
+/*
+ * If a process has saved arguments, they need to be part of the GC
+ * rootset. The function below is called from setup_rootset() in
+ * erl_gc.c. This function is declared in erl_process.h.
+ */
+int
+erts_setup_nif_gc(Process* proc, Eterm** objv, int* nobj)
+{
+    NifExport* ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    int gc = (ep && ep->saved_argc > 0);
+
+    if (gc) {
+	*objv = ep->argv;
+	*nobj = ep->saved_argc;
+    }
+    return gc;
+}
+
+/*
+ * Allocate a NifExport and set it in proc specific data
+ */
+static NifExport*
+allocate_nif_sched_data(Process* proc, int argc)
 {
+    NifExport* ep;
+    size_t argv_extra, total;
     int i;
-    if (!*ep) {
-	*ep = erts_alloc(ERTS_ALC_T_PSD, sizeof(DirtyNifExport));
-	sys_memset((void*) *ep, 0, sizeof(DirtyNifExport));
-	for (i=0; i<ERTS_NUM_CODE_IX; i++) {
-	    (*ep)->exp.addressv[i] = &(*ep)->exp.code[3];
-	}
-	(*ep)->exp.code[3] = (BeamInstr) em_call_nif;
+
+    argv_extra = argc > 1 ? sizeof(Eterm)*(argc-1) : 0;
+    total = sizeof(NifExport) + argv_extra;
+    ep = erts_alloc(ERTS_ALC_T_PSD, total);
+    sys_memset((void*) ep, 0, total);
+    ep->alloced_argv_sz = argc;
+    for (i=0; i<ERTS_NUM_CODE_IX; i++) {
+	ep->exp.addressv[i] = &ep->exp.code[3];
     }
-    (void) ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(proc, ERTS_PROC_LOCK_MAIN, &(*ep)->exp);
+    ep->exp.code[3] = (BeamInstr) em_call_nif;
+    (void) ERTS_PROC_SET_NIF_TRAP_EXPORT(proc, ERTS_PROC_LOCK_MAIN, &ep->exp);
+    return ep;
 }
 
+/*
+ * Initialize a NifExport struct. Create it if needed and store it in the
+ * proc. The direct_fp function is what will be invoked by op_call_nif, and
+ * the indirect_fp function, if not NULL, is what the direct_fp function
+ * will call. If the allocated NifExport isn't enough to hold all of argv,
+ * allocate a larger one. Save MFA and registers only if the need_save
+ * parameter is true.
+ */
 static ERL_NIF_TERM
-execute_dirty_nif_finalizer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+init_nif_sched_data(ErlNifEnv* env, NativeFunPtr direct_fp, NativeFunPtr indirect_fp,
+		    int need_save, int argc, const ERL_NIF_TERM argv[])
 {
-    Eterm* reg = ERTS_PROC_GET_SCHDATA(env->proc)->x_reg_array;
-    ERL_NIF_TERM result, dirty_result = (ERL_NIF_TERM) reg[0];
-    typedef ERL_NIF_TERM (*FinalizerFP)(ErlNifEnv*, ERL_NIF_TERM);
-    FinalizerFP fp;
-#if HAVE_INT64 && SIZEOF_LONG != 8
-    ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64));
-    enif_get_uint64(env, reg[1], (ErlNifUInt64 *) &fp);
-#else
-    ASSERT(sizeof(fp) <= sizeof(unsigned long));
-    enif_get_ulong(env, reg[1], (unsigned long *) &fp);
-#endif
-    result = (*fp)(env, dirty_result);
-    if (erts_refc_dectest(&env->mod_nif->rt_dtor_cnt, 0) == 0
-	&& env->mod_nif->mod == NULL)
-	close_lib(env->mod_nif);
-    return result;
+    Process* proc = env->proc;
+    Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
+    NifExport* ep;
+    int i;
+
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    if (!ep)
+	ep = allocate_nif_sched_data(proc, argc);
+    else if (need_save && ep->alloced_argv_sz < argc) {
+	NifExport* new_ep = allocate_nif_sched_data(proc, argc);
+	erts_free(ERTS_ALC_T_PSD, (void*) ep);
+	ep = new_ep;
+    }
+    ERTS_VBUMP_ALL_REDS(proc);
+    for (i = 0; i < argc; i++) {
+	if (need_save)
+	    ep->argv[i] = reg[i];
+	reg[i] = (Eterm) argv[i];
+    }
+    if (need_save) {
+	ep->saved_mfa[0] = proc->current[0];
+	ep->saved_mfa[1] = proc->current[1];
+	ep->saved_mfa[2] = proc->current[2];
+	ep->saved_argc = argc;
+    }
+    proc->i = (BeamInstr*) ep->exp.addressv[0];
+    ep->exp.code[0] = (BeamInstr) proc->current[0];
+    ep->exp.code[1] = (BeamInstr) proc->current[1];
+    ep->exp.code[2] = argc;
+    ep->exp.code[4] = (BeamInstr) direct_fp;
+    ep->m = env->mod_nif;
+    ep->fp = indirect_fp;
+    proc->freason = TRAP;
+    return THE_NON_VALUE;
 }
 
-#endif /* ERTS_DIRTY_SCHEDULERS */
+/*
+ * Restore saved MFA and registers. Registers are restored only when the
+ * exception flag is true.
+ */
+static void
+restore_nif_mfa(Process* proc, NifExport* ep, int exception)
+{
+    int i;
+    Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
 
-#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+    proc->current[0] = ep->saved_mfa[0];
+    proc->current[1] = ep->saved_mfa[1];
+    proc->current[2] = ep->saved_mfa[2];
+    if (exception)
+	for (i = 0; i < ep->saved_argc; i++)
+	    reg[i] = ep->argv[i];
+    ep->saved_argc = 0;
+    ep->saved_mfa[0] = THE_NON_VALUE;
+}
 
-ERL_NIF_TERM
-enif_schedule_dirty_nif(ErlNifEnv* env, int flags,
-			ERL_NIF_TERM (*fp)(ErlNifEnv*, int, const ERL_NIF_TERM[]),
-			int argc, const ERL_NIF_TERM argv[])
+#ifdef ERTS_DIRTY_SCHEDULERS
+
+/*
+ * Finalize a dirty NIF call. This function is scheduled to cause the VM to
+ * switch the process off a dirty scheduler thread and back onto a regular
+ * scheduler thread, and then return the result from the dirty NIF. It also
+ * restores the original NIF MFA when necessary based on the value of
+ * ep->fp set by execute_dirty_nif via init_nif_sched_data -- non-NULL
+ * means restore, NULL means do not restore.
+ */
+static ERL_NIF_TERM
+dirty_nif_finalizer(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    Process* proc = env->proc;
+    NifExport* ep;
+
+    ASSERT(argc == 1);
+    ASSERT(!ERTS_SCHEDULER_IS_DIRTY(env->proc->scheduler_data));
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    if (ep->fp)
+	restore_nif_mfa(proc, ep, 0);
+    return argv[0];
+}
+
+/* Finalize a dirty NIF call that raised an exception.  Otherwise same as
+ * the dirty_nif_finalizer() function.
+ */
+static ERL_NIF_TERM
+dirty_nif_exception(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    Process* proc = env->proc;
+    NifExport* ep;
+
+    ASSERT(!ERTS_SCHEDULER_IS_DIRTY(env->proc->scheduler_data));
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    if (ep->fp)
+	restore_nif_mfa(proc, ep, 1);
+    return enif_make_badarg(env);
+}
+
+/*
+ * Dirty NIF execution wrapper function. Invoke an application's dirty NIF,
+ * then check the result and schedule the appropriate finalizer function
+ * where needed. Also restore the original NIF MFA when appropriate.
+ */
+static ERL_NIF_TERM
+execute_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    Process* proc = env->proc;
+    NativeFunPtr fp = (NativeFunPtr) proc->current[6];
+    NifExport* ep;
+    ERL_NIF_TERM result;
+
+    ASSERT(ERTS_SCHEDULER_IS_DIRTY(env->proc->scheduler_data));
+
+    /*
+     * Set ep->fp to NULL before the native call so we know later whether it scheduled another NIF for execution
+     */
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    ep->fp = NULL;
+    result = (*fp)(env, argc, argv);
+    erts_smp_atomic32_read_band_mb(&proc->state,
+				   ~(ERTS_PSFLG_DIRTY_CPU_PROC
+				     |ERTS_PSFLG_DIRTY_IO_PROC
+				     |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q
+				     |ERTS_PSFLG_DIRTY_IO_PROC_IN_Q));
+    if (erts_refc_dectest(&env->mod_nif->rt_dtor_cnt, 0) == 0 && env->mod_nif->mod == NULL)
+	close_lib(env->mod_nif);
+    /*
+     * If no more NIFs were scheduled by the native call via
+     * enif_schedule_nif(), then ep->fp will still be NULL as set above, in
+     * which case we need to restore the original NIF calling
+     * context. Reuse fp essentially as a boolean for this, passing it to
+     * init_nif_sched_data below. Both dirty_nif_exception and
+     * dirty_nif_finalizer then check ep->fp to decide whether or not to
+     * restore the original calling context.
+     */
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    if (ep->fp)
+	fp = NULL;
+    if (is_non_value(result)) {
+	if (proc->freason != TRAP) {
+	    ASSERT(proc->freason == BADARG);
+	    return init_nif_sched_data(env, dirty_nif_exception, fp, 0, argc, argv);
+	} else {
+	    if (ep->fp == NULL)
+		restore_nif_mfa(proc, ep, 1);
+	    return result;
+	}
+    }
+    else
+	return init_nif_sched_data(env, dirty_nif_finalizer, fp, 0, 1, &result);
+}
+
+/*
+ * Dirty NIF scheduling wrapper function. Schedule a dirty NIF to execute
+ * via the execute_dirty_nif() wrapper function. The dirty scheduler thread
+ * type (CPU or I/O) is indicated in flags parameter.
+ */
+static ERTS_INLINE ERL_NIF_TERM
+schedule_dirty_nif(ErlNifEnv* env, int flags, int argc, const ERL_NIF_TERM argv[])
 {
-#ifdef USE_THREADS
     erts_aint32_t state, n, a;
     Process* proc = env->proc;
-    Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
-    DirtyNifExport* ep = NULL;
-    int i;
+    NativeFunPtr fp = (NativeFunPtr) proc->current[6];
+    NifExport* ep;
+    int need_save;
 
-    int chkflgs = (flags & (ERL_NIF_DIRTY_JOB_IO_BOUND|ERL_NIF_DIRTY_JOB_CPU_BOUND));
-    if (chkflgs != ERL_NIF_DIRTY_JOB_IO_BOUND && chkflgs != ERL_NIF_DIRTY_JOB_CPU_BOUND)
-	return enif_make_badarg(env);
+    ASSERT(flags==ERL_NIF_DIRTY_JOB_IO_BOUND || flags==ERL_NIF_DIRTY_JOB_CPU_BOUND);
 
     a = erts_smp_atomic32_read_acqb(&proc->state);
     while (1) {
@@ -1590,7 +1769,7 @@ enif_schedule_dirty_nif(ErlNifEnv* env, int flags,
 	 */
 	n &= ~(ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_IO_PROC
 	       |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q);
-	if (chkflgs == ERL_NIF_DIRTY_JOB_CPU_BOUND)
+	if (flags == ERL_NIF_DIRTY_JOB_CPU_BOUND)
 	    n |= ERTS_PSFLG_DIRTY_CPU_PROC;
 	else
 	    n |= ERTS_PSFLG_DIRTY_IO_PROC;
@@ -1598,69 +1777,100 @@ enif_schedule_dirty_nif(ErlNifEnv* env, int flags,
 	if (a == state)
 	    break;
     }
-    if (!(ep = (DirtyNifExport*) ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc)))
-	alloc_proc_psd(proc, &ep);
-    ERTS_VBUMP_ALL_REDS(proc);
-    ep->exp.code[2] = argc;
-    for (i = 0; i < argc; i++) {
-	reg[i] = (Eterm) argv[i];
-    }
-    proc->i = (BeamInstr*) ep->exp.addressv[0];
-    ep->exp.code[4] = (BeamInstr) fp;
-    ep->m = env->mod_nif;
-    proc->freason = TRAP;
-
     erts_refc_inc(&env->mod_nif->rt_dtor_cnt, 1);
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    need_save = (ep == NULL || is_non_value(ep->saved_mfa[0]));
+    return init_nif_sched_data(env, execute_dirty_nif, fp, need_save, argc, argv);
+}
 
-    return THE_NON_VALUE;
-#else
-    return (*fp)(env, argc, argv);
-#endif
+static ERL_NIF_TERM
+schedule_dirty_io_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    return schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_IO_BOUND, argc, argv);
+}
+
+static ERL_NIF_TERM
+schedule_dirty_cpu_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    return schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_CPU_BOUND, argc, argv);
+}
+
+#endif /* ERTS_DIRTY_SCHEDULERS */
+
+/*
+ * NIF execution wrapper used by enif_schedule_nif() for regular NIFs. It
+ * calls the actual NIF, restores original NIF MFA if necessary, and
+ * then returns the NIF result.
+ */
+static ERL_NIF_TERM
+execute_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    Process* proc = env->proc;
+    NativeFunPtr fp = (NativeFunPtr) proc->current[6];
+    NifExport* ep;
+    ERL_NIF_TERM result;
+
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    ep->fp = NULL;
+    result = (*fp)(env, argc, argv);
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    /*
+     * If no NIFs were scheduled by the native call via
+     * enif_schedule_nif(), then ep->fp will still be NULL as set above, in
+     * which case we need to restore the original NIF MFA.
+     */
+    if (ep->fp == NULL)
+	restore_nif_mfa(proc, ep, is_non_value(result) && proc->freason != TRAP);
+    return result;
 }
 
 ERL_NIF_TERM
-enif_schedule_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result,
-				  ERL_NIF_TERM (*fp)(ErlNifEnv*, ERL_NIF_TERM))
+enif_schedule_nif(ErlNifEnv* env, const char* fun_name, int flags,
+		  ERL_NIF_TERM (*fp)(ErlNifEnv*, int, const ERL_NIF_TERM[]),
+		  int argc, const ERL_NIF_TERM argv[])
 {
-#ifdef USE_THREADS
     Process* proc = env->proc;
-    Eterm* reg = ERTS_PROC_GET_SCHDATA(proc)->x_reg_array;
-    DirtyNifExport* ep;
+    NifExport* ep;
+    ERL_NIF_TERM fun_name_atom, result;
+    int need_save;
 
-    erts_smp_atomic32_read_band_mb(&proc->state,
-				   ~(ERTS_PSFLG_DIRTY_CPU_PROC
-				     |ERTS_PSFLG_DIRTY_IO_PROC
-				     |ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q
-				     |ERTS_PSFLG_DIRTY_IO_PROC_IN_Q));
-    if (!(ep = (DirtyNifExport*) ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(proc)))
-	alloc_proc_psd(proc, &ep);
-    ERTS_VBUMP_ALL_REDS(proc);
-    ep->exp.code[2] = 2;
-    reg[0] = (Eterm) result;
-#if HAVE_INT64 && SIZEOF_LONG != 8
-    ASSERT(sizeof(fp) <= sizeof(ErlNifUInt64));
-    reg[1] = (Eterm) enif_make_uint64(env, (ErlNifUInt64) fp);
-#else
-    ASSERT(sizeof(fp) <= sizeof(unsigned long));
-    reg[1] = (Eterm) enif_make_ulong(env, (unsigned long) fp);
-#endif
-    proc->i = (BeamInstr*) ep->exp.addressv[0];
-    ep->exp.code[4] = (BeamInstr) execute_dirty_nif_finalizer;
-    proc->freason = TRAP;
+    if (argc > MAX_ARG)
+	return enif_make_badarg(env);
+    fun_name_atom = enif_make_atom(env, fun_name);
+    if (enif_is_exception(env, fun_name_atom))
+	return fun_name_atom;
 
-    return THE_NON_VALUE;
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    need_save = (ep == NULL || is_non_value(ep->saved_mfa[0]));
+
+    if (flags) {
+#ifdef ERTS_DIRTY_SCHEDULERS
+	NativeFunPtr sched_fun;
+	int chkflgs = (flags & (ERL_NIF_DIRTY_JOB_IO_BOUND|ERL_NIF_DIRTY_JOB_CPU_BOUND));
+	if (chkflgs == ERL_NIF_DIRTY_JOB_IO_BOUND)
+	    sched_fun = schedule_dirty_io_nif;
+	else if (chkflgs == ERL_NIF_DIRTY_JOB_CPU_BOUND)
+	    sched_fun = schedule_dirty_cpu_nif;
+	else
+	    return enif_make_badarg(env);
+	result = init_nif_sched_data(env, sched_fun, fp, need_save, argc, argv);
 #else
-    return (*fp)(env, result);
+	return enif_make_badarg(env);
 #endif
-}
+    }
+    else
+	result = init_nif_sched_data(env, execute_nif, fp, need_save, argc, argv);
 
-/* A simple finalizer that just returns its result argument */
-ERL_NIF_TERM
-enif_dirty_nif_finalizer(ErlNifEnv* env, ERL_NIF_TERM result)
-{
+    ep = (NifExport*) ERTS_PROC_GET_NIF_TRAP_EXPORT(proc);
+    ASSERT(ep);
+    ep->exp.code[1] = (BeamInstr) fun_name_atom;
     return result;
 }
 
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+
 int
 enif_is_on_dirty_scheduler(ErlNifEnv* env)
 {
@@ -1977,6 +2187,35 @@ static Eterm load_nif_error(Process* p, const char* atom, const char* format, ..
     return ret;
 }
 
+/*
+ * The function below is for looping through ErlNifFunc arrays, helping
+ * provide backwards compatibility across the version 2.7 change that added
+ * the "flags" field to ErlNifFunc.
+ */
+static ErlNifFunc* next_func(ErlNifEntry* entry, int* incrp, ErlNifFunc* func)
+{
+    ASSERT(incrp);
+    if (!*incrp) {
+	if (entry->major > 2 || (entry->major == 2 && entry->minor >= 7))
+	    *incrp = sizeof(ErlNifFunc);
+	else {
+	    /*
+	     * ErlNifFuncV1 below is what ErlNifFunc was before the
+	     * addition of the flags field for 2.7, and is needed to handle
+	     * backward compatibility.
+	     */
+	    typedef struct {
+		const char* name;
+		unsigned arity;
+		ERL_NIF_TERM (*fptr)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
+	    }ErlNifFuncV1;
+	    *incrp = sizeof(ErlNifFuncV1);
+	}
+    }
+    return (ErlNifFunc*) ((char*)func + *incrp);
+}
+
+
 BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 {
     static const char bad_lib[] = "bad_lib";
@@ -2086,22 +2325,48 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
     }
     else {
 	/*erts_fprintf(stderr, "Found module %T\r\n", mod_atom);*/
-    
+
+	int maybe_dirty_nifs = ((entry->major > 2 || (entry->major == 2 && entry->minor >= 7))
+				&& (entry->options & ERL_NIF_DIRTY_NIF_OPTION));
+	int incr = 0;
+	ErlNifFunc* f = entry->funcs;
 	for (i=0; i < entry->num_of_funcs && ret==am_ok; i++) {
 	    BeamInstr** code_pp;
-	    ErlNifFunc* f = &entry->funcs[i];
 	    if (!erts_atom_get(f->name, sys_strlen(f->name), &f_atom, ERTS_ATOM_ENC_LATIN1)
 		|| (code_pp = get_func_pp(mod->curr.code, f_atom, f->arity))==NULL) {
 		ret = load_nif_error(BIF_P,bad_lib,"Function not found %T:%s/%u",
 				     mod_atom, f->name, f->arity);
-	    }    
-	    else if (code_pp[1] - code_pp[0] < (5+3)) {
+	    }
+	    else if (maybe_dirty_nifs && f->flags) {
+		/*
+		 * If the flags field is non-zero and this emulator was
+		 * built with dirty scheduler support, check that the flags
+		 * value is legal. But if this emulator was built without
+		 * dirty scheduler support, treat a non-zero flags field as
+		 * a load error.
+		 */
+#ifdef ERTS_DIRTY_SCHEDULERS
+		if (f->flags != ERL_NIF_DIRTY_JOB_IO_BOUND && f->flags != ERL_NIF_DIRTY_JOB_CPU_BOUND)
+		    ret = load_nif_error(BIF_P, bad_lib, "Illegal flags field value %d for NIF %T:%s/%u",
+					 f->flags, mod_atom, f->name, f->arity);
+#else
+		ret = load_nif_error(BIF_P, bad_lib, "NIF %T:%s/%u requires a runtime with dirty scheduler support.",
+				     mod_atom, f->name, f->arity);
+#endif
+	    }
+#ifdef ERTS_DIRTY_SCHEDULERS
+	    else if (code_pp[1] - code_pp[0] < (5+4))
+#else
+	    else if (code_pp[1] - code_pp[0] < (5+3))
+#endif
+	    {
 		ret = load_nif_error(BIF_P,bad_lib,"No explicit call to load_nif"
-				     " in module (%T:%s/%u to small)",
-				     mod_atom, entry->funcs[i].name, entry->funcs[i].arity);
+				     " in module (%T:%s/%u too small)",
+				     mod_atom, f->name, f->arity);
 	    }
 	    /*erts_fprintf(stderr, "Found NIF %T:%s/%u\r\n",
-			 mod_atom, entry->funcs[i].name, entry->funcs[i].arity);*/
+	      mod_atom, f->name, f->arity);*/
+	    f = next_func(entry, &incr, f);
 	}
     }
 
@@ -2127,7 +2392,8 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 	 * is deprecated and was only ment as a development feature not to
 	 * be used in production systems. (See warning below)
 	 */
-	int k;
+	int k, old_incr = 0;
+	ErlNifFunc* old_func;
         lib->priv_data = mod->curr.nif->priv_data;
 
 	ASSERT(mod->curr.nif->entry != NULL);
@@ -2136,13 +2402,16 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 	    goto error;
 	}
 	/* Check that no NIF is removed */
+	old_func = mod->curr.nif->entry->funcs;
 	for (k=0; k < mod->curr.nif->entry->num_of_funcs; k++) {
-	    ErlNifFunc* old_func = &mod->curr.nif->entry->funcs[k];
+	    int incr = 0;
+	    ErlNifFunc* f = entry->funcs;
 	    for (i=0; i < entry->num_of_funcs; i++) {
-		if (old_func->arity == entry->funcs[i].arity
-		    && sys_strcmp(old_func->name, entry->funcs[i].name) == 0) {			   
+		if (old_func->arity == f->arity
+		    && sys_strcmp(old_func->name, f->name) == 0) {
 		    break;
 		}
+		f = next_func(entry, &incr, f);
 	    }
 	    if (i == entry->num_of_funcs) {
 		ret = load_nif_error(BIF_P,reload,"Reloaded library missing "
@@ -2150,7 +2419,8 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 				     old_func->name, old_func->arity);
 		goto error;
 	    }
-	}       
+	    old_func = next_func(mod->curr.nif->entry, &old_incr, old_func);
+	}
 	erts_pre_nif(&env, BIF_P, lib);
 	veto = entry->reload(&env, &lib->priv_data, BIF_ARG_2);
 	erts_post_nif(&env);
@@ -2197,13 +2467,17 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 	/*
 	** Everything ok, patch the beam code with op_call_nif
 	*/
-        mod->curr.nif = lib;
+
+	int incr = 0;
+	ErlNifFunc* f = entry->funcs;
+
+	mod->curr.nif = lib;
 	for (i=0; i < entry->num_of_funcs; i++)
 	{
 	    BeamInstr* code_ptr;
-	    erts_atom_get(entry->funcs[i].name, sys_strlen(entry->funcs[i].name), &f_atom, ERTS_ATOM_ENC_LATIN1); 
-	    code_ptr = *get_func_pp(mod->curr.code, f_atom, entry->funcs[i].arity);
-	    
+	    erts_atom_get(f->name, sys_strlen(f->name), &f_atom, ERTS_ATOM_ENC_LATIN1);
+	    code_ptr = *get_func_pp(mod->curr.code, f_atom, f->arity);
+
 	    if (code_ptr[1] == 0) {
 		code_ptr[5+0] = (BeamInstr) BeamOp(op_call_nif);
 	    }
@@ -2211,10 +2485,21 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 		GenericBp* g = (GenericBp *) code_ptr[1];
 		ASSERT(code_ptr[5+0] ==
 		       (BeamInstr) BeamOp(op_i_generic_breakpoint));
-	        g->orig_instr = (BeamInstr) BeamOp(op_call_nif);
-	    }	    
-	    code_ptr[5+1] = (BeamInstr) entry->funcs[i].fptr;
+		g->orig_instr = (BeamInstr) BeamOp(op_call_nif);
+	    }
+	    if ((entry->major > 2 || (entry->major == 2 && entry->minor >= 7))
+		&& (entry->options & ERL_NIF_DIRTY_NIF_OPTION) && f->flags) {
+#ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+		code_ptr[5+3] = (BeamInstr) f->fptr;
+		code_ptr[5+1] = (f->flags == ERL_NIF_DIRTY_JOB_IO_BOUND) ?
+		    (BeamInstr) schedule_dirty_io_nif :
+		    (BeamInstr) schedule_dirty_cpu_nif;
+#endif
+	    }
+	    else
+		code_ptr[5+1] = (BeamInstr) f->fptr;
 	    code_ptr[5+2] = (BeamInstr) lib;
+	    f = next_func(entry, &incr, f);
 	}
     }
     else {
diff --git a/erts/emulator/beam/erl_nif.h b/erts/emulator/beam/erl_nif.h
index 5b93c2398e..226fc199a1 100644
--- a/erts/emulator/beam/erl_nif.h
+++ b/erts/emulator/beam/erl_nif.h
@@ -42,9 +42,13 @@
 ** 2.5: R17 Maps API additions
 ** 2.6: R17 with maps
 **      R17 dirty schedulers
+** 2.7: 17.3 add enif_schedule_nif
+**           remove enif_schedule_dirty_nif, enif_schedule_dirty_nif_finalizer, enif_dirty_nif_finalizer
+**           add ErlNifEntry options
+**           add ErlNifFunc flags
 */
 #define ERL_NIF_MAJOR_VERSION 2
-#define ERL_NIF_MINOR_VERSION 6
+#define ERL_NIF_MINOR_VERSION 7
 
 /*
  * The emulator will refuse to load a nif-lib with a major version
@@ -125,8 +129,10 @@ typedef struct
     const char* name;
     unsigned arity;
     ERL_NIF_TERM (*fptr)(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[]);
+    unsigned flags;
 }ErlNifFunc;
 
+
 typedef struct enif_entry_t
 {
     int major;
@@ -139,8 +145,11 @@ typedef struct enif_entry_t
     int  (*upgrade)(ErlNifEnv*, void** priv_data, void** old_priv_data, ERL_NIF_TERM load_info);
     void (*unload) (ErlNifEnv*, void* priv_data);
     const char* vm_variant;
+    unsigned options;
 }ErlNifEntry;
 
+/* Field bits for ErlNifEntry options */
+#define ERL_NIF_DIRTY_NIF_OPTION 1
 
 
 typedef struct
@@ -232,10 +241,21 @@ extern TWinDynNifCallbacks WinDynNifCallbacks;
 #  else
 #    define ERL_NIF_INIT_DECL(MODNAME) __declspec(dllexport) ErlNifEntry* nif_init(TWinDynNifCallbacks* callbacks)
 #  endif
-#  define ERL_NIF_INIT_BODY memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks))
+#  ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+#    define ERL_NIF_INIT_BODY do {					\
+	memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks)); \
+	entry.options = ERL_NIF_DIRTY_NIF_OPTION;			\
+     } while(0)
+#  else
+#    define ERL_NIF_INIT_BODY memcpy(&WinDynNifCallbacks,callbacks,sizeof(TWinDynNifCallbacks))
+#  endif
 #else 
 #  define ERL_NIF_INIT_GLOB
-#  define ERL_NIF_INIT_BODY
+#  ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
+#    define ERL_NIF_INIT_BODY entry.options = ERL_NIF_DIRTY_NIF_OPTION
+#  else
+#    define ERL_NIF_INIT_BODY
+#  endif
 #  ifdef STATIC_ERLANG_NIF
 #    define ERL_NIF_INIT_DECL(MODNAME)  ErlNifEntry* MODNAME ## _nif_init(void)
 #  else
diff --git a/erts/emulator/beam/erl_nif_api_funcs.h b/erts/emulator/beam/erl_nif_api_funcs.h
index d7c554e60b..be39816a64 100644
--- a/erts/emulator/beam/erl_nif_api_funcs.h
+++ b/erts/emulator/beam/erl_nif_api_funcs.h
@@ -141,10 +141,8 @@ ERL_NIF_API_FUNC_DECL(int,enif_is_number,(ErlNifEnv*, ERL_NIF_TERM term));
 ERL_NIF_API_FUNC_DECL(void*,enif_dlopen,(const char* lib, void (*err_handler)(void*,const char*), void* err_arg));
 ERL_NIF_API_FUNC_DECL(void*,enif_dlsym,(void* handle, const char* symbol, void (*err_handler)(void*,const char*), void* err_arg));
 ERL_NIF_API_FUNC_DECL(int,enif_consume_timeslice,(ErlNifEnv*, int percent));
+ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_nif,(ErlNifEnv*,const char*,int,ERL_NIF_TERM (*)(ErlNifEnv*,int,const ERL_NIF_TERM[]),int,const ERL_NIF_TERM[]));
 #ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif,(ErlNifEnv*,int,ERL_NIF_TERM (*)(ErlNifEnv*,int,const ERL_NIF_TERM[]),int,const ERL_NIF_TERM[]));
-ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_schedule_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM,ERL_NIF_TERM (*)(ErlNifEnv*,ERL_NIF_TERM)));
-ERL_NIF_API_FUNC_DECL(ERL_NIF_TERM,enif_dirty_nif_finalizer,(ErlNifEnv*,ERL_NIF_TERM));
 ERL_NIF_API_FUNC_DECL(int,enif_is_on_dirty_scheduler,(ErlNifEnv*));
 ERL_NIF_API_FUNC_DECL(int,enif_have_dirty_schedulers,(void));
 #endif
@@ -289,10 +287,8 @@ ERL_NIF_API_FUNC_DECL(int, enif_map_iterator_get_pair, (ErlNifEnv *env, ErlNifMa
 #  define enif_dlopen ERL_NIF_API_FUNC_MACRO(enif_dlopen)
 #  define enif_dlsym ERL_NIF_API_FUNC_MACRO(enif_dlsym)
 #  define enif_consume_timeslice ERL_NIF_API_FUNC_MACRO(enif_consume_timeslice)
+#  define enif_schedule_nif ERL_NIF_API_FUNC_MACRO(enif_schedule_nif)
 #ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
-#  define enif_schedule_dirty_nif ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif)
-#  define enif_schedule_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_schedule_dirty_nif_finalizer)
-#  define enif_dirty_nif_finalizer ERL_NIF_API_FUNC_MACRO(enif_dirty_nif_finalizer)
 #  define enif_is_on_dirty_scheduler ERL_NIF_API_FUNC_MACRO(enif_is_on_dirty_scheduler)
 #  define enif_have_dirty_schedulers ERL_NIF_API_FUNC_MACRO(enif_have_dirty_schedulers)
 #endif
diff --git a/erts/emulator/beam/erl_port_task.c b/erts/emulator/beam/erl_port_task.c
index 31d9a1e26e..682f6f8f4b 100644
--- a/erts/emulator/beam/erl_port_task.c
+++ b/erts/emulator/beam/erl_port_task.c
@@ -68,6 +68,13 @@ static void chk_task_queues(Port *pp, ErtsPortTask *execq, int processing_busy_q
 #define  DTRACE_DRIVER(PROBE_NAME, PP) do {} while(0)
 #endif
 
+#define ERTS_SMP_LC_VERIFY_RQ(RQ, PP)					\
+    do {								\
+	ERTS_SMP_LC_ASSERT(erts_smp_lc_runq_is_locked(runq));		\
+	ERTS_SMP_LC_ASSERT((RQ) == ((ErtsRunQueue *)			\
+				    erts_smp_atomic_read_nob(&(PP)->run_queue))); \
+    } while (0)
+
 erts_smp_atomic_t erts_port_task_outstanding_io_tasks;
 
 #define ERTS_PT_STATE_SCHEDULED		0
@@ -798,12 +805,13 @@ schedule_port_task_handle_list_free(ErtsPortTaskHandleList *pthlp)
 static ERTS_INLINE void
 abort_nosuspend_task(Port *pp,
 		     ErtsPortTaskType type,
-		     ErtsPortTaskTypeData *tdp)
+		     ErtsPortTaskTypeData *tdp,
+		     int bpq_data)
 {
 
     ASSERT(type == ERTS_PORT_TASK_PROC_SIG);
 
-    if (!pp->sched.taskq.bpq)
+    if (!bpq_data)
 	tdp->psig.callback(NULL,
 			   ERTS_PORT_SFLG_INVALID,
 			   ERTS_PROC2PORT_SIG_ABORT_NOSUSPEND,
@@ -991,6 +999,7 @@ static ERTS_INLINE int
 finalize_exec(Port *pp, ErtsPortTask **execq, int processing_busy_q)
 {
     erts_aint32_t act;
+    unsigned int prof_runnable_ports;
 
     if (!processing_busy_q)
 	pp->sched.taskq.local.first = *execq;
@@ -1007,6 +1016,10 @@ finalize_exec(Port *pp, ErtsPortTask **execq, int processing_busy_q)
     if (act & ERTS_PTS_FLG_CHK_UNSET_BUSY_PORT_Q)
 	act = check_unset_busy_port_q(pp, act, pp->sched.taskq.bpq);
 
+    prof_runnable_ports = erts_system_profile_flags.runnable_ports;
+    if (prof_runnable_ports)
+	erts_port_task_sched_lock(&pp->sched);
+
     while (1) {
 	erts_aint32_t new, exp;
 
@@ -1018,12 +1031,24 @@ finalize_exec(Port *pp, ErtsPortTask **execq, int processing_busy_q)
 
 	act = erts_smp_atomic32_cmpxchg_relb(&pp->sched.flags, new, exp);
 
-	ASSERT(!(act & ERTS_PTS_FLG_IN_RUNQ));
+	ERTS_LC_ASSERT(!(act & ERTS_PTS_FLG_IN_RUNQ));
+	ERTS_LC_ASSERT(!(act & ERTS_PTS_FLG_EXEC_IMM));
 
 	if (exp == act)
 	    break;
     }
 
+    if (prof_runnable_ports | IS_TRACED_FL(pp, F_TRACE_SCHED_PORTS)) {
+	/* trace port scheduling, out */
+	if (IS_TRACED_FL(pp, F_TRACE_SCHED_PORTS))
+	    trace_sched_ports(pp, am_out);
+	if (prof_runnable_ports) {
+	    if (!(act & (ERTS_PTS_FLG_EXEC_IMM|ERTS_PTS_FLG_HAVE_TASKS)))
+		profile_runnable_port(pp, am_inactive);
+	    erts_port_task_sched_unlock(&pp->sched);
+	}
+    }
+
     return (act & ERTS_PTS_FLG_HAVE_TASKS) != 0;
 }
 
@@ -1345,7 +1370,7 @@ erts_port_task_abort_nosuspend_tasks(Port *pp)
 #endif
 	schedule_port_task_handle_list_free(pthlp);
 
-	abort_nosuspend_task(pp, type, &td);
+	abort_nosuspend_task(pp, type, &td, pp->sched.taskq.bpq != NULL);
     }
 }
 
@@ -1369,6 +1394,7 @@ erts_port_task_schedule(Eterm id,
     Port *pp;
     ErtsPortTask *ptp = NULL;
     erts_aint32_t act, add_flags;
+    unsigned int prof_runnable_ports;
 
     if (pthp && erts_port_task_is_scheduled(pthp)) {
 	ASSERT(0);
@@ -1457,6 +1483,10 @@ erts_port_task_schedule(Eterm id,
     if (ns_pthlp)
 	add_flags |= ERTS_PTS_FLG_HAVE_NS_TASKS;
 
+    prof_runnable_ports = erts_system_profile_flags.runnable_ports;
+    if (prof_runnable_ports)
+	erts_port_task_sched_lock(&pp->sched);
+
     while (1) {
 	erts_aint32_t new, exp;
 
@@ -1481,6 +1511,13 @@ erts_port_task_schedule(Eterm id,
 	    goto done; /* Died after our task insert... */
     }
 
+    if (prof_runnable_ports) {
+	if (!(act & ERTS_PTS_FLG_EXEC_IMM))
+	    profile_runnable_port(pp, am_active);
+	erts_port_task_sched_unlock(&pp->sched);
+	prof_runnable_ports = 0;
+    }
+
     /* Enqueue port on run-queue */
 
     runq = erts_port_runq(pp);
@@ -1489,8 +1526,10 @@ erts_port_task_schedule(Eterm id,
 
 #ifdef ERTS_SMP
     xrunq = erts_check_emigration_need(runq, ERTS_PORT_PRIO_LEVEL);
+    ERTS_SMP_LC_ASSERT(runq != xrunq);
+    ERTS_SMP_LC_VERIFY_RQ(runq, pp);
     if (xrunq) {
-	/* Port emigrated ... */
+	/* Emigrate port ... */
 	erts_smp_atomic_set_nob(&pp->run_queue, (erts_aint_t) xrunq);
 	erts_smp_runq_unlock(runq);
 	runq = erts_port_runq(pp);
@@ -1500,10 +1539,6 @@ erts_port_task_schedule(Eterm id,
 #endif
 
     enqueue_port(runq, pp);
-	    
-    if (erts_system_profile_flags.runnable_ports) {
-	profile_runnable_port(pp, am_active);
-    }
 
     erts_smp_runq_unlock(runq);
 
@@ -1511,6 +1546,9 @@ erts_port_task_schedule(Eterm id,
 
 done:
 
+    if (prof_runnable_ports)
+	erts_port_task_sched_unlock(&pp->sched);
+
 #ifdef ERTS_SMP
     if (dhndl != ERTS_THR_PRGR_DHANDLE_MANAGED)
 	erts_port_dec_refc(pp);
@@ -1525,7 +1563,7 @@ abort_nosuspend:
 	erts_port_dec_refc(pp);
 #endif
 
-    abort_nosuspend_task(pp, ptp->type, &ptp->u.alive.td);
+    abort_nosuspend_task(pp, ptp->type, &ptp->u.alive.td, 0);
 
     ASSERT(ns_pthlp);
     erts_free(ERTS_ALC_T_PT_HNDL_LIST, ns_pthlp);
@@ -1609,6 +1647,8 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
 	goto done;
     }
 
+    ERTS_SMP_LC_VERIFY_RQ(runq, pp);
+
     erts_smp_runq_unlock(runq);
 
     *curr_port_pp = pp;
@@ -1765,10 +1805,6 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
 
     erts_unblock_fpe(fpe_was_unmasked);
 
-    /* trace port scheduling, out */
-    if (IS_TRACED_FL(pp, F_TRACE_SCHED_PORTS)) {
-    	trace_sched_ports(pp, am_out);
-    }
 
     if (io_tasks_executed) {
 	ASSERT(erts_smp_atomic_read_nob(&erts_port_task_outstanding_io_tasks)
@@ -1791,11 +1827,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
 
     erts_smp_runq_lock(runq);
  
-    if (!active) {
-	if (erts_system_profile_flags.runnable_ports)
-	    profile_runnable_port(pp, am_inactive);
-    }
-    else {
+    if (active) {
 #ifdef ERTS_SMP
 	ErtsRunQueue *xrunq;
 #endif
@@ -1804,6 +1836,8 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
 
 #ifdef ERTS_SMP
 	xrunq = erts_check_emigration_need(runq, ERTS_PORT_PRIO_LEVEL);
+	ERTS_SMP_LC_ASSERT(runq != xrunq);
+	ERTS_SMP_LC_VERIFY_RQ(runq, pp);
 	if (!xrunq) {
 #endif
 	    enqueue_port(runq, pp);
@@ -1811,7 +1845,7 @@ erts_port_task_execute(ErtsRunQueue *runq, Port **curr_port_pp)
 #ifdef ERTS_SMP
 	}
 	else {
-	    /* Port emigrated ... */
+	    /* Emigrate port... */
 	    erts_smp_atomic_set_nob(&pp->run_queue, (erts_aint_t) xrunq);
 	    erts_smp_runq_unlock(runq);
 
diff --git a/erts/emulator/beam/erl_port_task.h b/erts/emulator/beam/erl_port_task.h
index 1d30465ec9..9ef0cfcedc 100644
--- a/erts/emulator/beam/erl_port_task.h
+++ b/erts/emulator/beam/erl_port_task.h
@@ -78,6 +78,7 @@ extern erts_smp_atomic_t erts_port_task_outstanding_io_tasks;
 #define ERTS_PTS_FLG_PARALLELISM		(((erts_aint32_t) 1) <<  9)
 #define ERTS_PTS_FLG_FORCE_SCHED		(((erts_aint32_t) 1) << 10)
 #define ERTS_PTS_FLG_EXITING			(((erts_aint32_t) 1) << 11)
+#define ERTS_PTS_FLG_EXEC_IMM			(((erts_aint32_t) 1) << 12)
 
 #define ERTS_PTS_FLGS_BUSY \
     (ERTS_PTS_FLG_BUSY_PORT | ERTS_PTS_FLG_BUSY_PORT_Q)
@@ -87,6 +88,7 @@ extern erts_smp_atomic_t erts_port_task_outstanding_io_tasks;
      | ERTS_PTS_FLG_HAVE_BUSY_TASKS		\
      | ERTS_PTS_FLG_HAVE_TASKS			\
      | ERTS_PTS_FLG_EXEC			\
+     | ERTS_PTS_FLG_EXEC_IMM			\
      | ERTS_PTS_FLG_FORCE_SCHED			\
      | ERTS_PTS_FLG_EXITING)
 
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index b73f9b7f92..685004f267 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -590,12 +590,10 @@ erts_pre_init_process(void)
      erts_psd_required_locks[ERTS_PSD_DELAYED_GC_TASK_QS].set_locks
 	 = ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS;
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-     erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].get_locks
-	 = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS;
-     erts_psd_required_locks[ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT].set_locks
-	 = ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS;
-#endif
+     erts_psd_required_locks[ERTS_PSD_NIF_TRAP_EXPORT].get_locks
+	 = ERTS_PSD_NIF_TRAP_EXPORT_GET_LOCKS;
+     erts_psd_required_locks[ERTS_PSD_NIF_TRAP_EXPORT].set_locks
+	 = ERTS_PSD_NIF_TRAP_EXPORT_SET_LOCKS;
 
      /* Check that we have locks for all entries */
      for (ix = 0; ix < ERTS_PSD_SIZE; ix++) {
@@ -2211,6 +2209,9 @@ aux_work_timeout_early_init(int no_schedulers)
     p = (UWord) malloc((sizeof(ErtsAuxWorkTmo)
 			+ sizeof(erts_atomic32_t)*(no_schedulers+1))
 		       + ERTS_CACHE_LINE_SIZE-1);
+    if (!p) {
+        ERTS_INTERNAL_ERROR("malloc failed to allocate memory!");
+    }
     if (p & ERTS_CACHE_LINE_MASK)
 	p = (p & ~ERTS_CACHE_LINE_MASK) + ERTS_CACHE_LINE_SIZE;
     ASSERT((p & ERTS_CACHE_LINE_MASK) == 0);
@@ -3755,17 +3756,25 @@ evacuate_run_queue(ErtsRunQueue *rq,
 	    }
 #ifdef ERTS_DIRTY_SCHEDULERS
 	    else if (state & ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q) {
-		erts_aint32_t old;
-		old = erts_smp_atomic32_read_band_nob(&proc->state,
-						      ~(ERTS_PSFLG_DIRTY_CPU_PROC
-							| ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q));
+#ifdef DEBUG
+		erts_aint32_t old =
+#else
+		(void)
+#endif
+		    erts_smp_atomic32_read_band_nob(&proc->state,
+						    ~(ERTS_PSFLG_DIRTY_CPU_PROC
+						      | ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q));
 		/* assert that no other dirty flags are set */
 		ASSERT(!(old & (ERTS_PSFLG_DIRTY_IO_PROC|ERTS_PSFLG_DIRTY_IO_PROC_IN_Q)));
 	    } else if (state & ERTS_PSFLG_DIRTY_IO_PROC_IN_Q) {
-		erts_aint32_t old;
-		old = erts_smp_atomic32_read_band_nob(&proc->state,
-						      ~(ERTS_PSFLG_DIRTY_IO_PROC
-							| ERTS_PSFLG_DIRTY_IO_PROC_IN_Q));
+#ifdef DEBUG
+		erts_aint32_t old =
+#else
+		(void)
+#endif
+		    erts_smp_atomic32_read_band_nob(&proc->state,
+						    ~(ERTS_PSFLG_DIRTY_IO_PROC
+						      | ERTS_PSFLG_DIRTY_IO_PROC_IN_Q));
 		/* assert that no other dirty flags are set */
 		ASSERT(!(old & (ERTS_PSFLG_DIRTY_CPU_PROC|ERTS_PSFLG_DIRTY_CPU_PROC_IN_Q)));
 	    }
@@ -5874,6 +5883,9 @@ schedule_out_process(ErtsRunQueue *c_rq, erts_aint32_t state, Process *p, Proces
     case ERTS_ENQUEUE_NOT:
 	if (erts_system_profile_flags.runnable_procs) {
 
+	    /* Status lock prevents out of order "runnable proc" trace msgs */
+	    ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p));
+
 	    if (!(a & ERTS_PSFLG_ACTIVE_SYS)
 		&& (!(a & ERTS_PSFLG_ACTIVE)
 		    || (a & ERTS_PSFLG_SUSPENDED))) {
@@ -5987,7 +5999,8 @@ change_proc_schedule_state(Process *p,
 			   erts_aint32_t clear_state_flags,
 			   erts_aint32_t set_state_flags,
 			   erts_aint32_t *statep,
-			   erts_aint32_t *enq_prio_p)
+			   erts_aint32_t *enq_prio_p,
+			   ErtsProcLocks locks)
 {
     /*
      * NOTE: ERTS_PSFLG_RUNNING, ERTS_PSFLG_RUNNING_SYS and
@@ -5996,6 +6009,11 @@ change_proc_schedule_state(Process *p,
      */
     erts_aint32_t a = *statep, n;
     int enqueue; /* < 0 -> use proxy */
+    unsigned int prof_runnable_procs = erts_system_profile_flags.runnable_procs;
+    unsigned int lock_status = (prof_runnable_procs
+				&& !(locks & ERTS_PROC_LOCK_STATUS));
+
+    ERTS_SMP_LC_ASSERT(locks == erts_proc_lc_my_proc_locks(p));
 
     ASSERT(!(a & ERTS_PSFLG_PROXY));
     ASSERT((clear_state_flags & (ERTS_PSFLG_RUNNING
@@ -6005,6 +6023,9 @@ change_proc_schedule_state(Process *p,
 			       | ERTS_PSFLG_RUNNING_SYS
 			       | ERTS_PSFLG_ACTIVE_SYS)) == 0);
 
+    if (lock_status)
+	erts_smp_proc_lock(p, ERTS_PROC_LOCK_STATUS);
+
     while (1) {
 	erts_aint32_t e;
 	n = e = a;
@@ -6040,7 +6061,9 @@ change_proc_schedule_state(Process *p,
 	    break;
     }
 
-    if (erts_system_profile_flags.runnable_procs) {
+    if (prof_runnable_procs) {
+
+	/* Status lock prevents out of order "runnable proc" trace msgs */
 
 	if (((n & (ERTS_PSFLG_SUSPENDED
 		   | ERTS_PSFLG_ACTIVE)) == ERTS_PSFLG_ACTIVE)
@@ -6053,15 +6076,18 @@ change_proc_schedule_state(Process *p,
 	    profile_runnable_proc(p, am_active);
 	}
 
+	if (lock_status)
+	    erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
     }
 
+
     *statep = a;
 
     return enqueue;
 }
 
 static ERTS_INLINE void
-schedule_process(Process *p, erts_aint32_t in_state)
+schedule_process(Process *p, erts_aint32_t in_state, ErtsProcLocks locks)
 {
     erts_aint32_t enq_prio  = -1;
     erts_aint32_t state = in_state;
@@ -6069,7 +6095,8 @@ schedule_process(Process *p, erts_aint32_t in_state)
 					     0,
 					     ERTS_PSFLG_ACTIVE,
 					     &state,
-					     &enq_prio);
+					     &enq_prio,
+					     locks);
     if (enqueue != ERTS_ENQUEUE_NOT)
 	add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio),
 		 state,
@@ -6077,16 +6104,27 @@ schedule_process(Process *p, erts_aint32_t in_state)
 }
 
 void
-erts_schedule_process(Process *p, erts_aint32_t state)
+erts_schedule_process(Process *p, erts_aint32_t state, ErtsProcLocks locks)
 {
-    schedule_process(p, state);
+    schedule_process(p, state, locks);
 }
 
 static void
 schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy)
 {
+    /*
+     * Expects status lock to be locked when called, and
+     * returns with status lock unlocked...
+     */
     erts_aint32_t a = state, n, enq_prio = -1;
     int enqueue; /* < 0 -> use proxy */
+    unsigned int prof_runnable_procs = erts_system_profile_flags.runnable_procs;
+
+    /* Status lock prevents out of order "runnable proc" trace msgs */
+    ERTS_SMP_LC_ASSERT(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p));
+
+    if (!prof_runnable_procs)
+	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
 
     ASSERT(!(state & ERTS_PSFLG_PROXY));
 
@@ -6095,7 +6133,7 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy)
 	n = e = a;
 
 	if (a & ERTS_PSFLG_FREE)
-	    return; /* We don't want to schedule free processes... */
+	    goto cleanup; /* We don't want to schedule free processes... */
 
 	enqueue = ERTS_ENQUEUE_NOT;
 	n |= ERTS_PSFLG_ACTIVE_SYS;
@@ -6108,7 +6146,7 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy)
 	    goto cleanup;
     }
 
-    if (erts_system_profile_flags.runnable_procs) {
+    if (prof_runnable_procs) {
 
 	if (!(a & (ERTS_PSFLG_ACTIVE_SYS
 		   | ERTS_PSFLG_RUNNING
@@ -6118,6 +6156,8 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy)
 	    profile_runnable_proc(p, am_active);
 	}
 
+	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+	prof_runnable_procs = 0;
     }
 
     if (enqueue != ERTS_ENQUEUE_NOT) {
@@ -6132,8 +6172,14 @@ schedule_process_sys_task(Process *p, erts_aint32_t state, Process *proxy)
     }
 
 cleanup:
+
+    if (prof_runnable_procs)
+	erts_smp_proc_unlock(p, ERTS_PROC_LOCK_STATUS);
+
     if (proxy)
 	free_proxy_proc(proxy);
+
+    ERTS_SMP_LC_ASSERT(!(ERTS_PROC_LOCK_STATUS & erts_proc_lc_my_proc_locks(p)));
 }
 
 static ERTS_INLINE int
@@ -6200,7 +6246,7 @@ suspend_process(Process *c_p, Process *p)
 }
 
 static ERTS_INLINE void
-resume_process(Process *p)
+resume_process(Process *p, ErtsProcLocks locks)
 {
     erts_aint32_t state, enq_prio = -1;
     int enqueue;
@@ -6217,7 +6263,8 @@ resume_process(Process *p)
 					 ERTS_PSFLG_SUSPENDED,
 					 0,
 					 &state,
-					 &enq_prio);
+					 &enq_prio,
+					 locks);
     if (enqueue)
 	add2runq(enqueue > 0 ? p : make_proxy_proc(NULL, p, enq_prio),
 		 state,
@@ -7818,6 +7865,9 @@ erts_start_schedulers(void)
 
 #ifdef ETHR_HAVE_THREAD_NAMES
     opts.name = malloc(80);
+    if (!opts.name) {
+        ERTS_INTERNAL_ERROR("malloc failed to allocate memory!");
+    }
 #endif
 
 #ifdef ERTS_SMP
@@ -8030,7 +8080,8 @@ handle_pend_sync_suspend(Process *suspendee,
 	}
 	/* suspender is suspended waiting for suspendee to suspend;
 	   resume suspender */
-	resume_process(suspender);
+	ASSERT(suspendee != suspender);
+	resume_process(suspender, ERTS_PROC_LOCK_STATUS);
 	erts_smp_proc_unlock(suspender, ERTS_PROC_LOCK_STATUS);
     }
 }
@@ -8065,7 +8116,7 @@ pid2proc_not_running(Process *c_p, ErtsProcLocks c_p_locks,
 	ASSERT(c_p->flags & F_P2PNR_RESCHED);
 	c_p->flags &= ~F_P2PNR_RESCHED;
 	if (!suspend && rp)
-	    resume_process(rp);
+	    resume_process(rp, rp_locks);
     }
     else {
 
@@ -8223,7 +8274,8 @@ handle_pend_bif_sync_suspend(Process *suspendee,
 	}
 	/* suspender is suspended waiting for suspendee to suspend;
 	   resume suspender */
-	resume_process(suspender);
+	ASSERT(suspender != suspendee);
+	resume_process(suspender, ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS);
 	erts_smp_proc_unlock(suspender,
 			     ERTS_PROC_LOCK_LINK|ERTS_PROC_LOCK_STATUS);
     }
@@ -8583,7 +8635,8 @@ resume_process_1(BIF_ALIST_1)
 
 	ASSERT(ERTS_PSFLG_SUSPENDED
 	       & erts_smp_atomic32_read_nob(&suspendee->state));
-	resume_process(suspendee);
+	ASSERT(BIF_P != suspendee);
+	resume_process(suspendee, ERTS_PROC_LOCK_STATUS);
 
 	erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS);
     }
@@ -8713,7 +8766,7 @@ erts_resume(Process* process, ErtsProcLocks process_locks)
     ERTS_SMP_LC_ASSERT(process_locks == erts_proc_lc_my_proc_locks(process));
     if (!(process_locks & ERTS_PROC_LOCK_STATUS))
 	erts_smp_proc_lock(process, ERTS_PROC_LOCK_STATUS);
-    resume_process(process);
+    resume_process(process, process_locks|ERTS_PROC_LOCK_STATUS);
     if (!(process_locks & ERTS_PROC_LOCK_STATUS))
 	erts_smp_proc_unlock(process, ERTS_PROC_LOCK_STATUS);
 }
@@ -8732,7 +8785,7 @@ erts_resume_processes(ErtsProcList *list)
 	proc = erts_pid2proc(NULL, 0, plp->pid, ERTS_PROC_LOCK_STATUS);
 	if (proc) {
 	    if (erts_proclist_same(plp, proc)) {
-		resume_process(proc);
+		resume_process(proc, ERTS_PROC_LOCK_STATUS);
 		nresumed++;
 	    }
 	    erts_smp_proc_unlock(proc, ERTS_PROC_LOCK_STATUS);
@@ -9968,8 +10021,10 @@ erts_internal_request_system_task_3(BIF_ALIST_3)
 	rp_state = n;
     }
 
-    erts_smp_proc_unlock(rp, ERTS_PROC_LOCK_STATUS);
-
+    /*
+     * schedule_process_sys_task() unlocks status
+     * lock on process.
+     */
     schedule_process_sys_task(rp, rp_state, NULL);
 
     if (free_stqs)
@@ -10714,7 +10769,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
      * Schedule process for execution.
      */
 
-    schedule_process(p, state);
+    schedule_process(p, state, 0);
 
     VERBOSE(DEBUG_PROCESSES, ("Created a new process: %T\n",p->common.id));
 
@@ -11035,7 +11090,8 @@ set_proc_exiting(Process *p,
 					 ERTS_PSFLG_SUSPENDED|ERTS_PSFLG_PENDING_EXIT,
 					 ERTS_PSFLG_EXITING|ERTS_PSFLG_ACTIVE,
 					 &state,
-					 &enq_prio);
+					 &enq_prio,
+					 ERTS_PROC_LOCKS_ALL);
 
     p->fvalue = reason;
     if (bp)
@@ -11076,7 +11132,8 @@ set_proc_self_exiting(Process *c_p)
 				   ERTS_PSFLG_SUSPENDED|ERTS_PSFLG_PENDING_EXIT,
 				   ERTS_PSFLG_EXITING|ERTS_PSFLG_ACTIVE,
 				   &state,
-				   &enq_prio);
+				   &enq_prio,
+				   ERTS_PROC_LOCKS_ALL);
 
     ASSERT(!enqueue);
     return state;
@@ -11721,8 +11778,9 @@ resume_suspend_monitor(ErtsSuspendMonitor *smon, void *vc_p)
     Process *suspendee = erts_pid2proc((Process *) vc_p, ERTS_PROC_LOCK_MAIN,
 				       smon->pid, ERTS_PROC_LOCK_STATUS);
     if (suspendee) {
+	ASSERT(suspendee != vc_p);
 	if (smon->active)
-	    resume_process(suspendee);
+	    resume_process(suspendee, ERTS_PROC_LOCK_STATUS);
 	erts_smp_proc_unlock(suspendee, ERTS_PROC_LOCK_STATUS);
     }
     erts_destroy_suspend_monitor(smon);
@@ -12055,7 +12113,7 @@ timeout_proc(Process* p)
 
     state = erts_smp_atomic32_read_acqb(&p->state);
     if (!(state & ERTS_PSFLG_ACTIVE))
-	schedule_process(p, state);
+	schedule_process(p, state, ERTS_PROC_LOCK_MAIN|ERTS_PROC_LOCK_STATUS);
 }
 
 
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index ed6dadbffa..9b740f049e 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -734,13 +734,9 @@ erts_smp_reset_max_len(ErtsRunQueue *rq, ErtsRunQueueInfo *rqi)
 #define ERTS_PSD_DIST_ENTRY			3
 #define ERTS_PSD_CALL_TIME_BP			4
 #define ERTS_PSD_DELAYED_GC_TASK_QS		5
-#ifdef ERTS_DIRTY_SCHEDULERS
-#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT	6
+#define ERTS_PSD_NIF_TRAP_EXPORT		6
 
 #define ERTS_PSD_SIZE				7
-#else
-#define ERTS_PSD_SIZE				6
-#endif
 
 typedef struct {
     void *data[ERTS_PSD_SIZE];
@@ -767,10 +763,8 @@ typedef struct {
 #define ERTS_PSD_DELAYED_GC_TASK_QS_GET_LOCKS ERTS_PROC_LOCK_MAIN
 #define ERTS_PSD_DELAYED_GC_TASK_QS_SET_LOCKS ERTS_PROC_LOCK_MAIN
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_GET_LOCKS ERTS_PROC_LOCK_MAIN
-#define ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT_SET_LOCKS ERTS_PROC_LOCK_MAIN
-#endif
+#define ERTS_PSD_NIF_TRAP_EXPORT_GET_LOCKS ERTS_PROC_LOCK_MAIN
+#define ERTS_PSD_NIF_TRAP_EXPORT_SET_LOCKS ERTS_PROC_LOCK_MAIN
 
 typedef struct {
     ErtsProcLocks get_locks;
@@ -1367,6 +1361,8 @@ Uint64 erts_get_proc_interval(void);
 Uint64 erts_ensure_later_proc_interval(Uint64);
 Uint64 erts_step_proc_interval(void);
 
+int erts_setup_nif_gc(Process* proc, Eterm** objv, int* nobj); /* see erl_nif.c */
+
 ErtsProcList *erts_proclist_create(Process *);
 void erts_proclist_destroy(ErtsProcList *);
 
@@ -1704,17 +1700,17 @@ ErtsSchedulerData *erts_get_scheduler_data(void)
 #endif
 #endif
 
-void erts_schedule_process(Process *, erts_aint32_t);
+void erts_schedule_process(Process *, erts_aint32_t, ErtsProcLocks);
 
-ERTS_GLB_INLINE void erts_proc_notify_new_message(Process *p);
+ERTS_GLB_INLINE void erts_proc_notify_new_message(Process *p, ErtsProcLocks locks);
 #if ERTS_GLB_INLINE_INCL_FUNC_DEF
 ERTS_GLB_INLINE void
-erts_proc_notify_new_message(Process *p)
+erts_proc_notify_new_message(Process *p, ErtsProcLocks locks)
 {
     /* No barrier needed, due to msg lock */
     erts_aint32_t state = erts_smp_atomic32_read_nob(&p->state);
     if (!(state & ERTS_PSFLG_ACTIVE))
-	erts_schedule_process(p, state);
+	erts_schedule_process(p, state, locks);
 }
 #endif
 
@@ -1817,12 +1813,10 @@ erts_psd_set(Process *p, ErtsProcLocks plocks, int ix, void *data)
 #define ERTS_PROC_SET_DELAYED_GC_TASK_QS(P, L, PBT) \
     ((ErtsProcSysTaskQs *) erts_psd_set((P), (L), ERTS_PSD_DELAYED_GC_TASK_QS, (void *) (PBT)))
 
-#ifdef ERTS_DIRTY_SCHEDULERS
-#define ERTS_PROC_GET_DIRTY_SCHED_TRAP_EXPORT(P) \
-    ((Export *) erts_psd_get((P), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT))
-#define ERTS_PROC_SET_DIRTY_SCHED_TRAP_EXPORT(P, L, DSTE) \
-    ((Export *) erts_psd_set((P), (L), ERTS_PSD_DIRTY_SCHED_TRAP_EXPORT, (void *) (DSTE)))
-#endif
+#define ERTS_PROC_GET_NIF_TRAP_EXPORT(P) \
+    ((Export *) erts_psd_get((P), ERTS_PSD_NIF_TRAP_EXPORT))
+#define ERTS_PROC_SET_NIF_TRAP_EXPORT(P, L, DSTE) \
+    ((Export *) erts_psd_set((P), (L), ERTS_PSD_NIF_TRAP_EXPORT, (void *) (DSTE)))
 
 
 ERTS_GLB_INLINE Eterm erts_proc_get_error_handler(Process *p);
diff --git a/erts/emulator/beam/io.c b/erts/emulator/beam/io.c
index edf4a28784..ae053fc191 100644
--- a/erts/emulator/beam/io.c
+++ b/erts/emulator/beam/io.c
@@ -1218,9 +1218,10 @@ typedef struct {
 static ERTS_INLINE ErtsTryImmDrvCallResult
 try_imm_drv_call(ErtsTryImmDrvCallState *sp)
 {
+    unsigned int prof_runnable_ports;
     ErtsTryImmDrvCallResult res;
     int reds_left_in;
-    erts_aint32_t invalid_state, invalid_sched_flags;
+    erts_aint32_t act, exp, invalid_state, invalid_sched_flags;
     Port *prt = sp->port;
     Process *c_p = sp->c_p;
 
@@ -1247,18 +1248,39 @@ try_imm_drv_call(ErtsTryImmDrvCallState *sp)
 	goto locked_fail;
     }
 
-    sp->sched_flags = erts_smp_atomic32_read_nob(&prt->sched.flags);
-    if (sp->sched_flags & invalid_sched_flags) {
-	res = ERTS_TRY_IMM_DRV_CALL_INVALID_SCHED_FLAGS;
-	goto locked_fail;
-    }
+    prof_runnable_ports = erts_system_profile_flags.runnable_ports;
+    if (prof_runnable_ports)
+	erts_port_task_sched_lock(&prt->sched);
 
+    act = erts_smp_atomic32_read_nob(&prt->sched.flags);
+
+    do {
+	erts_aint32_t new;
+	
+	if (act & invalid_sched_flags) {
+	    res = ERTS_TRY_IMM_DRV_CALL_INVALID_SCHED_FLAGS;
+	    sp->sched_flags = act;
+	    goto locked_fail;
+	}
+	exp = act;
+	new = act | ERTS_PTS_FLG_EXEC_IMM;
+	act = erts_smp_atomic32_cmpxchg_mb(&prt->sched.flags, new, exp);
+    } while (act != exp);
+    
+    sp->sched_flags = act;
 
     if (!c_p)
 	reds_left_in = CONTEXT_REDS/10;
     else {
 	if (IS_TRACED_FL(c_p, F_TRACE_SCHED_PROCS))
 	    trace_virtual_sched(c_p, am_out);
+	/*
+	 * No status lock held while sending runnable
+	 * proc trace messages. It is however not needed
+	 * in this case, since only this thread can send
+	 * such messages for this process until the process
+	 * has been scheduled out.
+	 */
 	if (erts_system_profile_flags.runnable_procs
 	    && erts_system_profile_flags.exclusive)
 	    profile_runnable_proc(c_p, am_inactive);
@@ -1273,11 +1295,14 @@ try_imm_drv_call(ErtsTryImmDrvCallState *sp)
 
     ERTS_SMP_CHK_NO_PROC_LOCKS;
 
-    if (IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS))
-	trace_sched_ports_where(prt, am_in, sp->port_op);
-    if (erts_system_profile_flags.runnable_ports
-	&& !erts_port_is_scheduled(prt))
-    	profile_runnable_port(prt, am_active);
+    if (prof_runnable_ports | IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS)) {
+	if (prof_runnable_ports && !(act & (ERTS_PTS_FLG_IN_RUNQ|ERTS_PTS_FLG_EXEC)))
+	    profile_runnable_port(prt, am_active);
+	if (IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS))
+	    trace_sched_ports_where(prt, am_in, sp->port_op);
+	if (prof_runnable_ports)
+	    erts_port_task_sched_unlock(&prt->sched);
+    }
 
     sp->fpe_was_unmasked = erts_block_fpe();
 
@@ -1294,17 +1319,31 @@ finalize_imm_drv_call(ErtsTryImmDrvCallState *sp)
     int reds;
     Port *prt = sp->port;
     Process *c_p = sp->c_p;
+    erts_aint32_t act;
+    unsigned int prof_runnable_ports;
 
     reds = prt->reds;
     reds += erts_port_driver_callback_epilogue(prt, NULL);
 
     erts_unblock_fpe(sp->fpe_was_unmasked);
 
-    if (IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS))
-	trace_sched_ports_where(prt, am_out, sp->port_op);
-    if (erts_system_profile_flags.runnable_ports
-	&& !erts_port_is_scheduled(prt))
-    	profile_runnable_port(prt, am_inactive);
+    prof_runnable_ports = erts_system_profile_flags.runnable_ports;
+    if (prof_runnable_ports)
+	erts_port_task_sched_lock(&prt->sched);
+
+    act = erts_smp_atomic32_read_band_mb(&prt->sched.flags,
+					 ~ERTS_PTS_FLG_EXEC_IMM);
+    ERTS_SMP_LC_ASSERT(act & ERTS_PTS_FLG_EXEC_IMM);
+
+    if (prof_runnable_ports | IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS)) {
+	if (IS_TRACED_FL(prt, F_TRACE_SCHED_PORTS))
+	    trace_sched_ports_where(prt, am_out, sp->port_op);
+	if (prof_runnable_ports) {
+	    if (!(act & (ERTS_PTS_FLG_IN_RUNQ|ERTS_PTS_FLG_EXEC)))
+		profile_runnable_port(prt, am_inactive);
+	    erts_port_task_sched_unlock(&prt->sched);
+	}
+    }
 
     erts_port_release(prt);
 
@@ -1319,6 +1358,13 @@ finalize_imm_drv_call(ErtsTryImmDrvCallState *sp)
 
 	if (IS_TRACED_FL(c_p, F_TRACE_SCHED_PROCS))
 	    trace_virtual_sched(c_p, am_in);
+	/*
+	 * No status lock held while sending runnable
+	 * proc trace messages. It is however not needed
+	 * in this case, since only this thread can send
+	 * such messages for this process until the process
+	 * has been scheduled out.
+	 */
 	if (erts_system_profile_flags.runnable_procs
 	    && erts_system_profile_flags.exclusive)
 	    profile_runnable_proc(c_p, am_active);
@@ -6129,7 +6175,7 @@ driver_pdl_create(ErlDrvPort dp)
 	return NULL;
     pdl = erts_alloc(ERTS_ALC_T_PORT_DATA_LOCK,
 		     sizeof(struct erl_drv_port_data_lock));
-    erts_mtx_init(&pdl->mtx, "port_data_lock");
+    erts_mtx_init_x(&pdl->mtx, "port_data_lock", pp->common.id, 1);
     pdl_init_refc(pdl);
     erts_port_inc_refc(pp);
     pdl->prt = pp;
@@ -7166,7 +7212,7 @@ char *driver_dl_error(void)
 
 
 #define ERL_DRV_SYS_INFO_SIZE(LAST_FIELD) \
-  (((size_t) &((ErlDrvSysInfo *) 0)->LAST_FIELD) \
+  (offsetof(ErlDrvSysInfo, LAST_FIELD) \
    + sizeof(((ErlDrvSysInfo *) 0)->LAST_FIELD))
 
 void
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index 05f07e57b2..3d8dd9c6d0 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -274,6 +274,7 @@ __decl_noreturn void __noreturn erl_assert_error(const char* expr, const char *f
 typedef unsigned int Eterm;
 typedef unsigned int Uint;
 typedef int          Sint;
+#define ERTS_UINT_MAX UINT_MAX
 #define ERTS_SIZEOF_ETERM SIZEOF_INT
 #define ErtsStrToSint strtol
 #else
@@ -347,6 +348,7 @@ typedef long long          Sint;
 
 typedef Uint UWord;
 typedef Sint SWord;
+#define ERTS_UINT_MAX ERTS_UWORD_MAX
 
 #endif /* HALFWORD_HEAP */
 
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 72092ec7b0..55f9e68e78 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -3948,6 +3948,9 @@ erts_save_emu_args(int argc, char **argv)
 	size += sz+1;
     } 
     ptr = (char *) malloc(size);
+    if (!ptr) {
+        ERTS_INTERNAL_ERROR("malloc failed to allocate memory!");
+    }
 #ifdef DEBUG
     end_ptr = ptr + size;
 #endif
diff --git a/erts/emulator/drivers/common/inet_drv.c b/erts/emulator/drivers/common/inet_drv.c
index 09bada457d..891589d1c5 100644
--- a/erts/emulator/drivers/common/inet_drv.c
+++ b/erts/emulator/drivers/common/inet_drv.c
@@ -4372,7 +4372,7 @@ static int erl_inet_close(inet_descriptor* desc)
 	desc_close(desc);
 	desc->state = INET_STATE_CLOSED;
     } else if (desc->prebound && (desc->s != INVALID_SOCKET)) {
-	sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE, 0);
+	sock_select(desc, FD_READ | FD_WRITE | FD_CLOSE | ERL_DRV_USE_NO_CALLBACK, 0);
 	desc->event_mask = 0;
 #ifdef __WIN32__
 	desc->forced_events = 0;
@@ -4536,7 +4536,8 @@ static ErlDrvSSizeT inet_ctl_open(inet_descriptor* desc, int domain, int type,
 
 /* as inet_open but pass in an open socket (MUST BE OF RIGHT TYPE) */
 static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type,
-				    SOCKET s, char** rbuf, ErlDrvSizeT rsize)
+				    SOCKET s, Uint32 bound,
+                                    char** rbuf, ErlDrvSizeT rsize)
 {
     inet_address name;
     unsigned int sz = sizeof(name);
@@ -4560,7 +4561,12 @@ static ErlDrvSSizeT inet_ctl_fdopen(inet_descriptor* desc, int domain, int type,
 #ifdef __WIN32__
     driver_select(desc->port, desc->event, ERL_DRV_READ, 1);
 #endif
-    desc->state = INET_STATE_BOUND; /* assume bound */
+
+    if (bound)
+        desc->state = INET_STATE_BOUND;
+    else
+        desc->state = INET_STATE_OPEN;
+
     if (type == SOCK_STREAM) { /* check if connected */
 	sz = sizeof(name);
 	if (!IS_SOCKET_ERROR(sock_peer(s, (struct sockaddr*) &name, &sz))) {
@@ -5772,7 +5778,7 @@ done:
 	    ia_p->Ipv6IfIndex &&
 	    ia_p->Ipv6IfIndex != index)
 	{
-	    /* Oops, there was an other interface for IPv6. Possible? XXX */
+	    /* Oops, there was another interface for IPv6. Possible? XXX */
 	    index = ia_p->Ipv6IfIndex;
 	    goto index;
 	}
@@ -9121,10 +9127,11 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
 	break;
     }
 
-    case INET_REQ_FDOPEN: {  /* pass in an open socket */
+    case INET_REQ_FDOPEN: {  /* pass in an open (and optionally bound) socket */
 	int domain;
+        int bound;
 	DEBUGF(("tcp_inet_ctl(%ld): FDOPEN\r\n", (long)desc->inet.port));
-	if (len != 6) return ctl_error(EINVAL, rbuf, rsize);
+	if (len != 6 && len != 10) return ctl_error(EINVAL, rbuf, rsize);
 	switch(buf[0]) {
 	case INET_AF_INET:
 	    domain = AF_INET;
@@ -9142,8 +9149,13 @@ static ErlDrvSSizeT tcp_inet_ctl(ErlDrvData e, unsigned int cmd,
 	    return ctl_error(EINVAL, rbuf, rsize);
 	}
 	if (buf[1] != INET_TYPE_STREAM) return ctl_error(EINVAL, rbuf, rsize);
+
+        if (len == 6) bound = 1;
+        else bound = get_int32(buf+2+4);
+
 	return inet_ctl_fdopen(INETP(desc), domain, SOCK_STREAM,
-			       (SOCKET) get_int32(buf+2), rbuf, rsize);
+                               (SOCKET) get_int32(buf+2),
+                               bound, rbuf, rsize);
 	break;
     }
 
@@ -11116,10 +11128,11 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
 	return replen;
 
 
-    case INET_REQ_FDOPEN: {  /* pass in an open (and bound) socket */
+    case INET_REQ_FDOPEN: {  /* pass in an open (and optionally bound) socket */
 	SOCKET s;
+        int bound;
 	DEBUGF(("packet inet_ctl(%ld): FDOPEN\r\n", (long)desc->port));
-	if (len != 6) {
+	if (len != 6 && len != 10) {
 	    return ctl_error(EINVAL, rbuf, rsize);
 	}
 	switch (buf[0]) {
@@ -11144,7 +11157,11 @@ static ErlDrvSSizeT packet_inet_ctl(ErlDrvData e, unsigned int cmd, char* buf,
 	    return ctl_error(EINVAL, rbuf, rsize);
 	}
 	s = (SOCKET)get_int32(buf+2);
-	replen = inet_ctl_fdopen(desc, af, type, s, rbuf, rsize);
+
+        if (len == 6) bound = 1;
+        else bound = get_int32(buf+2+4);
+
+	replen = inet_ctl_fdopen(desc, af, type, s, bound, rbuf, rsize);
 
 	if ((*rbuf)[0] != INET_REP_ERROR) {
 	    if (desc->active)
diff --git a/erts/emulator/drivers/unix/multi_drv.c b/erts/emulator/drivers/unix/multi_drv.c
index 822c96730c..724d325ed5 100644
--- a/erts/emulator/drivers/unix/multi_drv.c
+++ b/erts/emulator/drivers/unix/multi_drv.c
@@ -20,7 +20,7 @@
 /* Purpose:    Multidriver interface 
    This is an example of a driver which allows multiple instances of itself.
    I.e have one erlang process execute open_port(multi......) and
-   at the same time have an other erlang process open an other port 
+   at the same time have another erlang process open another port
    running multi there as well.
 */
 
diff --git a/erts/emulator/sys/common/erl_poll.c b/erts/emulator/sys/common/erl_poll.c
index 0a58a625b2..aa412a20c8 100644
--- a/erts/emulator/sys/common/erl_poll.c
+++ b/erts/emulator/sys/common/erl_poll.c
@@ -2157,7 +2157,7 @@ ERTS_POLL_EXPORT(erts_poll_wait)(ErtsPollSet ps,
 
 #ifdef ERTS_POLL_DEBUG_PRINT
     erts_printf("Entering erts_poll_wait(), timeout=%d\n",
-		 (int) tv->tv_sec*1000 + tv->tv_usec/1000);
+		 (int) tvp->tv_sec*1000 + tvp->tv_usec/1000);
 #endif
 
     if (ERTS_POLLSET_SET_POLLED_CHK(ps)) {
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index 0b0568c31a..dfbe47786a 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -31,6 +31,7 @@ MODULES= \
 	a_SUITE \
 	after_SUITE \
 	alloc_SUITE \
+	async_ports_SUITE \
 	beam_SUITE \
 	beam_literals_SUITE \
 	bif_SUITE \
diff --git a/erts/emulator/test/async_ports_SUITE.erl b/erts/emulator/test/async_ports_SUITE.erl
new file mode 100644
index 0000000000..c89b3655ff
--- /dev/null
+++ b/erts/emulator/test/async_ports_SUITE.erl
@@ -0,0 +1,118 @@
+-module(async_ports_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-compile(export_all).
+
+-define(PACKET_SIZE, (10 * 1024 * 8)).
+-define(CPORT_DELAY, 100).
+-define(TEST_LOOPS_COUNT, 100000).
+-define(SLEEP_BEFORE_CHECK, 1000).
+-define(TEST_PROCS_COUNT, 2).
+-define(TC_TIMETRAP_SECONDS, 10).
+
+suite() -> [{ct_hooks,[ts_install_cth]}].
+
+all() ->
+    [
+     permanent_busy_test
+    ].
+
+permanent_busy_test(Config) ->
+    ct:timetrap({seconds, ?TC_TIMETRAP_SECONDS}),
+    ExePath = filename:join(?config(data_dir, Config), "cport"),
+
+    Self = self(),
+    spawn_link(
+      fun() ->
+              Block = <<0:?PACKET_SIZE>>,
+
+              Port = open_port(ExePath),
+              
+              Testers =
+                  lists:map(
+                    fun(_) ->
+                            erlang:spawn_link(?MODULE, run_loop,
+                                              [Self,
+                                               Port,
+                                               Block,
+                                               ?TEST_LOOPS_COUNT,
+                                               0])
+                    end,
+                    lists:seq(1, ?TEST_PROCS_COUNT)),
+              Self ! {test_info, Port, Testers},
+              endless_flush(Port)
+      end),
+
+    receive
+        {test_info, Port, Testers} ->
+            MaxWaitTime = round(0.7 * ?TC_TIMETRAP_SECONDS * 1000),
+            ct:log("wait testers, maximum ~w mcsec~n", [MaxWaitTime]),
+            ok = wait_testers(MaxWaitTime, Testers),
+            timer:sleep(?SLEEP_BEFORE_CHECK),
+            case erlang:port_command(Port, <<"test">>, [nosuspend]) of
+                false ->
+                    exit(port_dead);
+                true ->
+                    ok
+            end
+    end.
+
+wait_testers(Timeout, Testers) ->
+    lists:foldl(
+      fun(Pid, AccIn) ->
+              StartWait = os:timestamp(),
+              receive
+                  {Pid, port_dead} ->
+                      recalc_timeout(AccIn, StartWait)
+              after AccIn ->
+                      Pid ! stop,
+                      recalc_timeout(AccIn, StartWait)
+              end
+      end, Timeout, Testers),
+    ok.
+
+recalc_timeout(TimeoutIn, WaitStart) ->
+    erlang:max(0, TimeoutIn - round(timer:now_diff(os:timestamp(), WaitStart)) div 1000).
+
+open_port(ExePath) ->
+    erlang:open_port({spawn, ExePath ++ " 100"}, [{packet, 4}, eof, exit_status, use_stdio, binary]).
+
+run_loop(RootProc, Port, Block, CheckLimit, BusyCnt) ->
+    receive
+        stop ->
+            ok
+    after 0 ->
+            case erlang:port_command(Port, Block, [nosuspend]) of
+                true ->
+                    run_loop(RootProc, Port, Block, CheckLimit, 0);
+                false ->
+                    if
+                        BusyCnt + 1 > CheckLimit ->
+                            check_dead(RootProc, Port, Block, CheckLimit);
+                        true ->
+                            run_loop(RootProc, Port, Block, CheckLimit, BusyCnt + 1)
+                    end
+            end
+    end.
+
+check_dead(RootProc, Port, Block, CheckLimit) ->
+    ct:log("~p: check port dead~n", [self()]),
+    timer:sleep(?SLEEP_BEFORE_CHECK),
+    case erlang:port_command(Port, Block, [nosuspend]) of
+        true ->
+            ct:log("not dead~n"),
+            run_loop(RootProc, Port, Block, CheckLimit, 0);
+        false ->
+            ct:log("port dead: ~p~n", [Port]),
+            RootProc ! {self(), port_dead},
+            ok
+    end.
+
+endless_flush(Port) ->
+    receive
+        {Port, {data, _}} ->
+            endless_flush(Port);
+        {Port, SomethingWrong} ->
+            erlang:error({someting_wrong, SomethingWrong})
+    end.
diff --git a/erts/emulator/test/async_ports_SUITE_data/Makefile.src b/erts/emulator/test/async_ports_SUITE_data/Makefile.src
new file mode 100644
index 0000000000..56da3fbe12
--- /dev/null
+++ b/erts/emulator/test/async_ports_SUITE_data/Makefile.src
@@ -0,0 +1,15 @@
+CC = @CC@
+LD = @LD@
+CFLAGS = @CFLAGS@ @DEFS@
+CROSSLDFLAGS = @CROSSLDFLAGS@
+
+PROGS = cport@exe@
+
+
+all: $(PROGS)
+
+cport@exe@: cport@obj@
+	$(LD) $(CROSSLDFLAGS) -o cport cport@obj@ @LIBS@
+
+cport@obj@: cport.c
+	$(CC) -c -o cport@obj@ $(CFLAGS) cport.c
diff --git a/erts/emulator/test/async_ports_SUITE_data/cport.c b/erts/emulator/test/async_ports_SUITE_data/cport.c
new file mode 100644
index 0000000000..033aff382a
--- /dev/null
+++ b/erts/emulator/test/async_ports_SUITE_data/cport.c
@@ -0,0 +1,81 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#ifdef __WIN32__
+#  include "windows.h"
+#  include "winbase.h"
+#else
+#  include <unistd.h>
+#endif
+
+typedef unsigned char byte;
+
+int read_cmd(byte *buf)
+{
+  int len;
+  if (read_exact(buf, 4) != 4)
+      return(-1);
+
+  len = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+  return read_exact(buf, len);
+}
+
+int write_cmd(byte *buf, int len)
+{
+  byte li[4];
+  li[0] = (len >> 24) & 0xff;
+  li[1] = (len >> 16) & 0xff;
+  li[2] = (len >> 8)  & 0xff;
+  li[3] = len  & 0xff;
+  write_exact(&li, 4);
+
+  return write_exact(buf, len);
+}
+
+int read_exact(byte *buf, int len)
+{
+  int i, got=0;
+  do {
+    if ((i = read(0, buf+got, len-got)) <= 0)
+      {
+	return(i);
+      }
+    got += i;
+  } while (got<len);
+  return len;
+}
+
+int write_exact(byte *buf, int len)
+{
+  int i, wrote = 0;
+  do {
+    if ((i = write(1, buf+wrote, len-wrote)) < 0)
+      return (i);
+    wrote += i;
+  } while (wrote<len);
+  return len;
+}
+
+byte static_buf[31457280]; // 30 mb
+
+int main(int argc, char **argv) {
+  int sleep_time = atoi(argv[1]);
+  int fn, arg, res;
+  byte *buf = &static_buf[0];
+  int len = 0;
+  if (sleep_time <= 0)
+    sleep_time = 0;
+#ifdef __WIN32__
+  else
+    sleep_time = ((sleep_time - 1) / 1000) + 1; /* Milli seconds */
+#endif
+  while ((len = read_cmd(buf)) > 0) {
+#ifdef __WIN32__
+    Sleep((DWORD) sleep_time);
+#else
+    usleep(sleep_time);
+#endif
+    write_cmd(buf, len);
+  }
+}
diff --git a/erts/emulator/test/busy_port_SUITE.erl b/erts/emulator/test/busy_port_SUITE.erl
index 4b4af0babe..2ed5aaa0d0 100644
--- a/erts/emulator/test/busy_port_SUITE.erl
+++ b/erts/emulator/test/busy_port_SUITE.erl
@@ -98,8 +98,10 @@ generator(0, Writer, _Data) ->
 
     %% Calling process_info(Pid, current_function) on a suspended process
     %% used to crash Beam.
-    {current_function, {erlang, send, 2}} =
-	process_info(Writer, current_function),
+    case process_info(Writer, [status,current_function]) of
+	[{status,suspended},{current_function,{erlang,send,2}}] -> ok;
+	[{status,suspended},{current_function,{erlang,bif_return_trap,_}}] -> ok
+    end,
     unlock_slave();
 generator(N, Writer, Data) ->
     Writer ! {exec, Data},
diff --git a/erts/emulator/test/driver_SUITE.erl b/erts/emulator/test/driver_SUITE.erl
index c62bc0c454..344bde7c91 100644
--- a/erts/emulator/test/driver_SUITE.erl
+++ b/erts/emulator/test/driver_SUITE.erl
@@ -1062,10 +1062,9 @@ otp_6602(Config) when is_list(Config) ->
 				      %% Inet driver use port locking...
 				      {ok, S} = gen_udp:open(0),
 				      {ok, Fd} = inet:getfd(S),
-				      {ok, Port} = inet:port(S),
 				      %% Steal fd (lock checker used to
 				      %% trigger here).
-				      {ok, _S2} = gen_udp:open(Port,[{fd,Fd}]),
+				      {ok, _S2} = gen_udp:open(0,[{fd,Fd}]),
 				      Parent ! Done
 			      end),
     ?line receive Done -> ok end,
diff --git a/erts/emulator/test/fun_SUITE.erl b/erts/emulator/test/fun_SUITE.erl
index 8ad5f290ed..2968f5bebb 100644
--- a/erts/emulator/test/fun_SUITE.erl
+++ b/erts/emulator/test/fun_SUITE.erl
@@ -30,7 +30,7 @@
 	 fun_to_port/1,t_hash/1,t_phash/1,t_phash2/1,md5/1,
 	 refc/1,refc_ets/1,refc_dist/1,
 	 const_propagation/1,t_arity/1,t_is_function2/1,
-	 t_fun_info/1]).
+	 t_fun_info/1,t_fun_info_mfa/1]).
 
 -export([nothing/0]).
 
@@ -42,7 +42,8 @@ all() ->
     [bad_apply, bad_fun_call, badarity, ext_badarity,
      equality, ordering, fun_to_port, t_hash, t_phash,
      t_phash2, md5, refc, refc_ets, refc_dist,
-     const_propagation, t_arity, t_is_function2, t_fun_info].
+     const_propagation, t_arity, t_is_function2, t_fun_info,
+     t_fun_info_mfa].
 
 groups() -> 
     [].
@@ -824,6 +825,24 @@ t_fun_info(Config) when is_list(Config) ->
     ?line bad_info(<<1,2>>),
     ok.
 
+t_fun_info_mfa(Config) when is_list(Config) ->
+    Fun1 = fun spawn_call/2,
+    {module,M1}  = erlang:fun_info(Fun1, module),
+    {name,F1}    = erlang:fun_info(Fun1, name),
+    {arity,A1}   = erlang:fun_info(Fun1, arity),
+    {M1,F1,A1=2} = erlang:fun_info_mfa(Fun1),
+    %% Module fun.
+    Fun2 = fun ?MODULE:t_fun_info/1,
+    {module,M2}  = erlang:fun_info(Fun2, module),
+    {name,F2}    = erlang:fun_info(Fun2, name),
+    {arity,A2}   = erlang:fun_info(Fun2, arity),
+    {M2,F2,A2=1} = erlang:fun_info_mfa(Fun2),
+
+    %% Not fun.
+    {'EXIT',_} = (catch erlang:fun_info_mfa(id(d))),
+    ok.
+
+
 bad_info(Term) ->
     try	erlang:fun_info(Term, module) of
 	Any ->
diff --git a/erts/emulator/test/nif_SUITE.erl b/erts/emulator/test/nif_SUITE.erl
index b2da6f58af..01ee71c4fa 100644
--- a/erts/emulator/test/nif_SUITE.erl
+++ b/erts/emulator/test/nif_SUITE.erl
@@ -37,7 +37,8 @@
 	 threading/1, send/1, send2/1, send3/1, send_threaded/1, neg/1, 
 	 is_checks/1,
 	 get_length/1, make_atom/1, make_string/1, reverse_list_test/1,
-	 otp_9668/1, consume_timeslice/1, dirty_nif/1, dirty_nif_send/1
+	 otp_9668/1, consume_timeslice/1, dirty_nif/1, dirty_nif_send/1,
+	 dirty_nif_exception/1, nif_schedule/1
 	]).
 
 -export([many_args_100/100]).
@@ -64,7 +65,8 @@ all() ->
      resource_takeover, threading, send, send2, send3,
      send_threaded, neg, is_checks, get_length, make_atom,
      make_string,reverse_list_test,
-     otp_9668, consume_timeslice, dirty_nif, dirty_nif_send
+     otp_9668, consume_timeslice,
+     nif_schedule, dirty_nif, dirty_nif_send, dirty_nif_exception
     ].
 
 groups() -> 
@@ -1524,6 +1526,20 @@ consume_timeslice(Config) when is_list(Config) ->
 
     ok.
 
+nif_schedule(Config) when is_list(Config) ->
+    ensure_lib_loaded(Config),
+    A = "this is a string",
+    B = {this,is,a,tuple},
+    {B,A} = call_nif_schedule(A, B),
+    ok = try call_nif_schedule(1, 2)
+	 catch
+	     error:badarg ->
+		 [{?MODULE,call_nif_schedule,[1,2],_}|_] =
+		     erlang:get_stacktrace(),
+		 ok
+	 end,
+    ok.
+
 dirty_nif(Config) when is_list(Config) ->
     try erlang:system_info(dirty_cpu_schedulers) of
 	N when is_integer(N) ->
@@ -1556,6 +1572,24 @@ dirty_nif_send(Config) when is_list(Config) ->
 	    {skipped,"No dirty scheduler support"}
     end.
 
+dirty_nif_exception(Config) when is_list(Config) ->
+    try erlang:system_info(dirty_cpu_schedulers) of
+	N when is_integer(N) ->
+	    ensure_lib_loaded(Config),
+	    try
+	        call_dirty_nif_exception(),
+	        ?t:fail(expected_badarg)
+	    catch
+	        error:badarg ->
+		    [{?MODULE,call_dirty_nif_exception,[],_}|_] =
+			erlang:get_stacktrace(),
+		    ok
+	    end
+    catch
+	error:badarg ->
+	    {skipped,"No dirty scheduler support"}
+    end.
+
 next_msg(_Pid) ->
     receive
 	M -> M
@@ -1685,8 +1719,10 @@ echo_int(_) -> ?nif_stub.
 type_sizes() -> ?nif_stub.
 otp_9668_nif(_) -> ?nif_stub.
 consume_timeslice_nif(_,_) -> ?nif_stub.
+call_nif_schedule(_,_) -> ?nif_stub.
 call_dirty_nif(_,_,_) -> ?nif_stub.
 send_from_dirty_nif(_) -> ?nif_stub.
+call_dirty_nif_exception() -> ?nif_stub.
 
 %% maps
 is_map_nif(_) -> ?nif_stub.
diff --git a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
index 955dc64189..ad9d5d9254 100644
--- a/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
+++ b/erts/emulator/test/nif_SUITE_data/nif_SUITE.c
@@ -1493,6 +1493,31 @@ static ERL_NIF_TERM consume_timeslice_nif(ErlNifEnv* env, int argc, const ERL_NI
     }
 }
 
+static ERL_NIF_TERM nif_sched2(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    char s[64];
+    if (!enif_get_string(env, argv[2], s, sizeof s, ERL_NIF_LATIN1))
+	return enif_make_badarg(env);
+    return enif_make_tuple2(env, argv[3], argv[2]);
+}
+
+static ERL_NIF_TERM nif_sched1(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    ERL_NIF_TERM new_argv[4];
+    new_argv[0] = enif_make_atom(env, "garbage0");
+    new_argv[1] = enif_make_atom(env, "garbage1");
+    new_argv[2] = argv[0];
+    new_argv[3] = argv[1];
+    return enif_schedule_nif(env, "nif_sched2", 0, nif_sched2, 4, new_argv);
+}
+
+static ERL_NIF_TERM call_nif_schedule(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+{
+    if (argc != 2)
+	return enif_make_atom(env, "false");
+    return enif_schedule_nif(env, "nif_sched1", 0, nif_sched1, argc, argv);
+}
+
 #ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
 static ERL_NIF_TERM dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
@@ -1507,11 +1532,10 @@ static ERL_NIF_TERM dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[
     enif_get_int(env, argv[0], &n);
     enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1);
     enif_inspect_binary(env, argv[2], &b);
-    result = enif_make_tuple3(env,
-			      enif_make_int(env, n),
-			      enif_make_string(env, s, ERL_NIF_LATIN1),
-			      enif_make_binary(env, &b));
-    return enif_schedule_dirty_nif_finalizer(env, result, enif_dirty_nif_finalizer);
+    return enif_make_tuple3(env,
+			    enif_make_int(env, n),
+			    enif_make_string(env, s, ERL_NIF_LATIN1),
+			    enif_make_binary(env, &b));
 }
 
 static ERL_NIF_TERM call_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
@@ -1526,7 +1550,7 @@ static ERL_NIF_TERM call_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM
 	if (enif_get_int(env, argv[0], &n) &&
 	    enif_get_string(env, argv[1], s, sizeof s, ERL_NIF_LATIN1) &&
 	    enif_inspect_binary(env, argv[2], &b))
-	    return enif_schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_CPU_BOUND, dirty_nif, argc, argv);
+	    return enif_schedule_nif(env, "call_dirty_nif", ERL_NIF_DIRTY_JOB_CPU_BOUND, dirty_nif, argc, argv);
 	else
 	    return enif_make_badarg(env);
     } else {
@@ -1534,35 +1558,42 @@ static ERL_NIF_TERM call_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM
     }
 }
 
-static ERL_NIF_TERM dirty_sender(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+static ERL_NIF_TERM send_from_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
     ERL_NIF_TERM result;
     ErlNifPid pid;
     ErlNifEnv* menv;
     int res;
 
-    enif_get_local_pid(env, argv[0], &pid);
+    if (!enif_get_local_pid(env, argv[0], &pid))
+	return enif_make_badarg(env);
     result = enif_make_tuple2(env, enif_make_atom(env, "ok"), enif_make_pid(env, &pid));
     menv = enif_alloc_env();
     res = enif_send(env, &pid, menv, result);
     enif_free_env(menv);
     if (!res)
-	/* Note the next line will crash, since dirty nifs can't return exceptions.
-	 * This is intentional, since enif_send should not fail if the test succeeds.
-	 */
-	return enif_schedule_dirty_nif_finalizer(env, enif_make_badarg(env), enif_dirty_nif_finalizer);
+	return enif_make_badarg(env);
     else
-	return enif_schedule_dirty_nif_finalizer(env, result, enif_dirty_nif_finalizer);
+	return result;
 }
 
-static ERL_NIF_TERM send_from_dirty_nif(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
+static ERL_NIF_TERM call_dirty_nif_exception(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
 {
-    ERL_NIF_TERM result;
-    ErlNifPid pid;
-
-    if (!enif_get_local_pid(env, argv[0], &pid))
+    switch (argc) {
+    case 0: {
+	ERL_NIF_TERM args[255];
+	int i;
+	for (i = 0; i < 255; i++)
+	    args[i] = enif_make_int(env, i);
+	return enif_schedule_nif(env, "call_dirty_nif_exception", ERL_NIF_DIRTY_JOB_CPU_BOUND,
+				 call_dirty_nif_exception, 255, argv);
+    }
+    case 1:
 	return enif_make_badarg(env);
-    return enif_schedule_dirty_nif(env, ERL_NIF_DIRTY_JOB_CPU_BOUND, dirty_sender, argc, argv);
+    default:
+	return enif_schedule_nif(env, "call_dirty_nif_exception", ERL_NIF_DIRTY_JOB_CPU_BOUND,
+				 call_dirty_nif_exception, argc-1, argv);
+    }
 }
 #endif
 
@@ -1742,9 +1773,11 @@ static ErlNifFunc nif_funcs[] =
     {"type_sizes", 0, type_sizes},
     {"otp_9668_nif", 1, otp_9668_nif},
     {"consume_timeslice_nif", 2, consume_timeslice_nif},
+    {"call_nif_schedule", 2, call_nif_schedule},
 #ifdef ERL_NIF_DIRTY_SCHEDULER_SUPPORT
     {"call_dirty_nif", 3, call_dirty_nif},
-    {"send_from_dirty_nif", 1, send_from_dirty_nif},
+    {"send_from_dirty_nif", 1, send_from_dirty_nif, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"call_dirty_nif_exception", 0, call_dirty_nif_exception, ERL_NIF_DIRTY_JOB_IO_BOUND},
 #endif
     {"is_map_nif", 1, is_map_nif},
     {"get_map_size_nif", 1, get_map_size_nif},
diff --git a/erts/epmd/src/epmd.c b/erts/epmd/src/epmd.c
index 3cfa7a782f..9630e0cdf0 100644
--- a/erts/epmd/src/epmd.c
+++ b/erts/epmd/src/epmd.c
@@ -498,7 +498,11 @@ static void dbg_gen_printf(int onsyslog,int perr,int from_level,
 #ifdef HAVE_SYSLOG_H
       if (onsyslog)
 	{
-	  erts_vsnprintf(buf, DEBUG_BUFFER_SIZE, format, args);
+	  int len;
+	  len = erts_vsnprintf(buf, DEBUG_BUFFER_SIZE, format, args);
+	  if (perr != 0 && len < sizeof(buf)) {
+	      erts_snprintf(buf+len, sizeof(buf)-len, ": %s", strerror(perr));
+	  }
 	  syslog(LOG_ERR,"epmd: %s",buf);
 	}
 #endif
diff --git a/erts/include/internal/ethread.h b/erts/include/internal/ethread.h
index 31d19902f5..72c054b588 100644
--- a/erts/include/internal/ethread.h
+++ b/erts/include/internal/ethread.h
@@ -31,6 +31,7 @@
 #endif
 
 #include <stdlib.h>
+#include "ethread_inline.h"
 #include "erl_errno.h"
 
 #if defined(DEBUG)
@@ -51,31 +52,6 @@
 #  endif
 #endif
 
-#if !defined(__GNUC__)
-#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) 0
-#elif !defined(__GNUC_MINOR__)
-#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
-  ((__GNUC__ << 24) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
-#elif !defined(__GNUC_PATCHLEVEL__)
-#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
-  (((__GNUC__ << 24) | (__GNUC_MINOR__ << 12)) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
-#else
-#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
-  (((__GNUC__ << 24) | (__GNUC_MINOR__ << 12) | __GNUC_PATCHLEVEL__) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
-#endif
-
-#undef ETHR_INLINE
-#if defined(__GNUC__)
-#  define ETHR_INLINE __inline__
-#  if ETHR_AT_LEAST_GCC_VSN__(3, 1, 1)
-#    define ETHR_FORCE_INLINE __inline__ __attribute__((__always_inline__))
-#  else
-#    define ETHR_FORCE_INLINE __inline__
-#  endif
-#elif defined(__WIN32__)
-#  define ETHR_INLINE __forceinline
-#  define ETHR_FORCE_INLINE __forceinline
-#endif
 #if defined(ETHR_DEBUG) || !defined(ETHR_INLINE) || ETHR_XCHK \
     || (defined(__GNUC__) && defined(ERTS_MIXED_CYGWIN_VC))
 #  undef ETHR_INLINE
diff --git a/erts/include/internal/ethread_inline.h b/erts/include/internal/ethread_inline.h
new file mode 100644
index 0000000000..ffb756c84f
--- /dev/null
+++ b/erts/include/internal/ethread_inline.h
@@ -0,0 +1,49 @@
+/*
+ * %CopyrightBegin%
+ *
+ * Copyright Ericsson AB 2004-2014. All Rights Reserved.
+ *
+ * The contents of this file are subject to the Erlang Public License,
+ * Version 1.1, (the "License"); you may not use this file except in
+ * compliance with the License. You should have received a copy of the
+ * Erlang Public License along with this software. If not, it can be
+ * retrieved online at http://www.erlang.org/.
+ *
+ * Software distributed under the License is distributed on an "AS IS"
+ * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+ * the License for the specific language governing rights and limitations
+ * under the License.
+ *
+ * %CopyrightEnd%
+ */
+
+#ifndef ETHREAD_INLINE_H__
+#define ETHREAD_INLINE_H__
+
+#if !defined(__GNUC__)
+#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) 0
+#elif !defined(__GNUC_MINOR__)
+#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
+  ((__GNUC__ << 24) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
+#elif !defined(__GNUC_PATCHLEVEL__)
+#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
+  (((__GNUC__ << 24) | (__GNUC_MINOR__ << 12)) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
+#else
+#  define ETHR_AT_LEAST_GCC_VSN__(MAJ, MIN, PL) \
+  (((__GNUC__ << 24) | (__GNUC_MINOR__ << 12) | __GNUC_PATCHLEVEL__) >= (((MAJ) << 24) | ((MIN) << 12) | (PL)))
+#endif
+
+#undef ETHR_INLINE
+#if defined(__GNUC__)
+#  define ETHR_INLINE __inline__
+#  if ETHR_AT_LEAST_GCC_VSN__(3, 1, 1)
+#    define ETHR_FORCE_INLINE __inline__ __attribute__((__always_inline__))
+#  else
+#    define ETHR_FORCE_INLINE __inline__
+#  endif
+#elif defined(__WIN32__)
+#  define ETHR_INLINE __forceinline
+#  define ETHR_FORCE_INLINE __forceinline
+#endif
+
+#endif /* #ifndef ETHREAD_INLINE_H__ */
diff --git a/erts/lib_src/Makefile.in b/erts/lib_src/Makefile.in
index cf1aef518a..b680c03b1d 100644
--- a/erts/lib_src/Makefile.in
+++ b/erts/lib_src/Makefile.in
@@ -465,6 +465,7 @@ RELEASE_LIBS=$(ERTS_LIBS)
 INTERNAL_RELEASE_INCLUDES= \
 	$(ERTS_INCL_INT)/README \
 	$(ERTS_INCL_INT)/ethread.h \
+	$(ERTS_INCL_INT)/ethread_inline.h \
 	$(ERTS_INCL_INT)/ethr_mutex.h \
 	$(ERTS_INCL_INT)/ethr_optimized_fallbacks.h \
 	$(ERTS_INCL_INT)/ethr_atomics.h \
diff --git a/erts/lib_src/common/erl_misc_utils.c b/erts/lib_src/common/erl_misc_utils.c
index 8bf7656bb0..d58a28b5cb 100644
--- a/erts/lib_src/common/erl_misc_utils.c
+++ b/erts/lib_src/common/erl_misc_utils.c
@@ -25,7 +25,7 @@
 #  include <windows.h>
 #endif
 
-#include "ethread.h"
+#include "ethread_inline.h"
 #include "erl_misc_utils.h"
 
 #if defined(__WIN32__)
diff --git a/erts/preloaded/ebin/erl_prim_loader.beam b/erts/preloaded/ebin/erl_prim_loader.beam
index eec49f3983..5f2b619322 100644
--- a/erts/preloaded/ebin/erl_prim_loader.beam
+++ b/erts/preloaded/ebin/erl_prim_loader.beam
diff --git a/erts/preloaded/ebin/erlang.beam b/erts/preloaded/ebin/erlang.beam
index 260badbcb3..cf3effc1e5 100644
--- a/erts/preloaded/ebin/erlang.beam
+++ b/erts/preloaded/ebin/erlang.beam
diff --git a/erts/preloaded/ebin/prim_inet.beam b/erts/preloaded/ebin/prim_inet.beam
index 93e70cd623..8420052533 100644
--- a/erts/preloaded/ebin/prim_inet.beam
+++ b/erts/preloaded/ebin/prim_inet.beam
diff --git a/erts/preloaded/src/erl_prim_loader.erl b/erts/preloaded/src/erl_prim_loader.erl
index 578913b633..466e0b0020 100644
--- a/erts/preloaded/src/erl_prim_loader.erl
+++ b/erts/preloaded/src/erl_prim_loader.erl
@@ -42,11 +42,11 @@
 
 %% Public
 -export([start/3, set_path/1, get_path/0, get_file/1, get_files/2,
-         list_dir/1, read_file_info/1, get_cwd/0, get_cwd/1]).
+         list_dir/1, read_file_info/1, read_link_info/1, get_cwd/0, get_cwd/1]).
 
 %% Used by erl_boot_server
 -export([prim_init/0, prim_get_file/2, prim_list_dir/2,
-         prim_read_file_info/2, prim_get_cwd/2]).
+         prim_read_file_info/3, prim_get_cwd/2]).
 
 %% Used by escript and code
 -export([set_primary_archive/4, release_archives/0]).
@@ -223,6 +223,12 @@ list_dir(Dir) ->
 read_file_info(File) ->
     check_file_result(read_file_info, File, request({read_file_info,File})).
 
+-spec read_link_info(Filename) -> {'ok', FileInfo} | 'error' when
+      Filename :: string(),
+      FileInfo :: file:file_info().
+read_link_info(File) ->
+    check_file_result(read_link_info, File, request({read_link_info,File})).
+
 -spec get_cwd() -> {'ok', string()} | 'error'.
 get_cwd() ->
     check_file_result(get_cwd, [], request({get_cwd,[]})).
@@ -325,6 +331,9 @@ loop(State, Parent, Paths) ->
                     {read_file_info,File} ->
                         {Res,State1} = handle_read_file_info(State, File),
                         {Res,State1,Paths};
+                    {read_link_info,File} ->
+                        {Res,State1} = handle_read_link_info(State, File),
+                        {Res,State1,Paths};
                     {get_cwd,[]} ->
                         {Res,State1} = handle_get_cwd(State, []),
                         {Res,State1,Paths};
@@ -387,10 +396,15 @@ handle_list_dir(State = #state{loader = inet}, Dir) ->
     ?SAFE2(inet_list_dir(State, Dir), State).
 
 handle_read_file_info(State = #state{loader = efile}, File) ->
-    ?SAFE2(efile_read_file_info(State, File), State);
+    ?SAFE2(efile_read_file_info(State, File, true), State);
 handle_read_file_info(State = #state{loader = inet}, File) ->
     ?SAFE2(inet_read_file_info(State, File), State).
 
+handle_read_link_info(State = #state{loader = efile}, File) ->
+    ?SAFE2(efile_read_file_info(State, File, false), State);
+handle_read_link_info(State = #state{loader = inet}, File) ->
+    ?SAFE2(inet_read_link_info(State, File), State).
+
 handle_get_cwd(State = #state{loader = efile}, Drive) ->
     ?SAFE2(efile_get_cwd(State, Drive), State);
 handle_get_cwd(State = #state{loader = inet}, Drive) ->
@@ -514,8 +528,8 @@ efile_list_dir(#state{prim_state = PS} = State, Dir) ->
     {Res, PS2} = prim_list_dir(PS, Dir),
     {Res, State#state{prim_state = PS2}}.
 
-efile_read_file_info(#state{prim_state = PS} = State, File) ->
-    {Res, PS2} = prim_read_file_info(PS, File),
+efile_read_file_info(#state{prim_state = PS} = State, File, FollowLinks) ->
+    {Res, PS2} = prim_read_file_info(PS, File, FollowLinks),
     {Res, State#state{prim_state = PS2}}.
 
 efile_get_cwd(#state{prim_state = PS} = State, Drive) ->
@@ -718,6 +732,10 @@ inet_list_dir(State, Dir) ->
 inet_read_file_info(State, File) ->
     inet_send_and_rcv({read_file_info,File}, read_file_info, State).
 
+%% -> {{ok,Info},State} | {{error,Reason},State}
+inet_read_link_info(State, File) ->
+    inet_send_and_rcv({read_link_info,File}, read_link_info, State).
+
 %% -> {{ok,Cwd},State} | {{error,Reason},State}
 inet_get_cwd(State, []) ->
     inet_send_and_rcv(get_cwd, get_cwd, State);
@@ -951,16 +969,18 @@ prim_list_dir(PS, Dir) ->
     debug(PS, {return, Res2}),
     {Res2, PS3}.
 
--spec prim_read_file_info(prim_state(), file:filename()) ->
+-spec prim_read_file_info(prim_state(), file:filename(), boolean()) ->
 	{{'ok', #file_info{}}, prim_state()}
       | {{'error', term()}, prim_state()}.
-prim_read_file_info(PS, File) ->
+prim_read_file_info(PS, File, FollowLinks) ->
     debug(PS, {read_file_info, File}),
     {Res2, PS2} =
         case name_split(PS#prim_state.primary_archive, File) of
             {file, PrimFile} ->
-                Res = prim_file:read_file_info(PrimFile),
-                {Res, PS};
+                case FollowLinks of
+                    true -> {prim_file:read_file_info(PrimFile), PS};
+                    false -> {prim_file:read_link_info(PrimFile), PS}
+                end;
             {archive, ArchiveFile, []} ->
                 %% Fake top directory
                 debug(PS, {archive_read_file_info, ArchiveFile}),
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index 4ff0513321..98d7a942a6 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -91,7 +91,7 @@
 -export([external_size/2, finish_after_on_load/2, finish_loading/1, float/1]).
 -export([float_to_binary/1, float_to_binary/2,
 	 float_to_list/1, float_to_list/2]).
--export([fun_info/2, fun_to_list/1, function_exported/3]).
+-export([fun_info/2, fun_info_mfa/1, fun_to_list/1, function_exported/3]).
 -export([garbage_collect/0, garbage_collect/1, garbage_collect/2]).
 -export([garbage_collect_message_area/0, get/0, get/1, get_keys/1]).
 -export([get_module_info/1, get_stacktrace/0, group_leader/0]).
@@ -827,6 +827,15 @@ float_to_list(_Float, _Options) ->
 fun_info(_Fun, _Item) ->
     erlang:nif_error(undefined).
 
+%% fun_info_mfa/1
+-spec erlang:fun_info_mfa(Fun) -> {Mod, Name, Arity} when
+      Fun :: function(),
+      Mod :: atom(),
+      Name :: atom(),
+      Arity :: non_neg_integer().
+fun_info_mfa(_Fun) ->
+    erlang:nif_error(undefined).
+
 %% fun_to_list/1
 -spec erlang:fun_to_list(Fun) -> string() when
       Fun :: function().
diff --git a/erts/preloaded/src/prim_inet.erl b/erts/preloaded/src/prim_inet.erl
index 143c718130..79ff013c77 100644
--- a/erts/preloaded/src/prim_inet.erl
+++ b/erts/preloaded/src/prim_inet.erl
@@ -25,7 +25,7 @@
 
 %% Primitive inet_drv interface
 
--export([open/3, open/4, fdopen/4, close/1]).
+-export([open/3, open/4, fdopen/4, fdopen/5, close/1]).
 -export([bind/3, listen/1, listen/2, peeloff/2]).
 -export([connect/3, connect/4, async_connect/4]).
 -export([accept/1, accept/2, async_accept/2]).
@@ -70,7 +70,12 @@ open(Protocol, Family, Type, Opts) ->
     open(Protocol, Family, Type, Opts, ?INET_REQ_OPEN, []).
 
 fdopen(Protocol, Family, Type, Fd) when is_integer(Fd) ->
-    open(Protocol, Family, Type, [], ?INET_REQ_FDOPEN, ?int32(Fd)).
+    fdopen(Protocol, Family, Type, Fd, true).
+
+fdopen(Protocol, Family, Type, Fd, Bound)
+  when is_integer(Fd), Bound == true orelse Bound == false ->
+    open(Protocol, Family, Type, [], ?INET_REQ_FDOPEN,
+         [?int32(Fd), enc_value_2(bool, Bound)]).
 
 open(Protocol, Family, Type, Opts, Req, Data) ->
     Drv = protocol2drv(Protocol),
diff --git a/erts/vsn.mk b/erts/vsn.mk
index fff334c89f..0db4370ea8 100644
--- a/erts/vsn.mk
+++ b/erts/vsn.mk
@@ -17,7 +17,7 @@
 # %CopyrightEnd%
 # 
 
-VSN = 6.0.2
+VSN = 6.1.2
 
 # Port number 4365 in 4.2
 # Port number 4366 in 4.3