65 files changed, 2492 insertions, 1154 deletions
diff --git a/OTP_VERSION b/OTP_VERSION
index 01c3bca9e3..9854364f85 100644
--- a/OTP_VERSION
+++ b/OTP_VERSION
@@ -1 +1 @@
-22.0.1
+23.0-rc0
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 941c3ebbbe..0de694f449 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -3131,27 +3131,6 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
     return 0;
 }
 
-static int
-is_killed_apply(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val+2 <= Reg.val;
-}
-
-static int
-is_killed(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val <= Reg.val;
-}
-
-static int
-is_killed_by_call_fun(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val+1 <= Reg.val;
-}
-
 /*
  * Test whether register Reg is killed by make_fun instruction that
  * creates the fun given by index idx.
@@ -3172,16 +3151,6 @@ is_killed_by_make_fun(LoaderState* stp, GenOpArg Reg, GenOpArg idx)
 }
 
 /*
- * Test whether register Reg is killed by the send instruction that follows.
- */
-
-static int
-is_killed_by_send(LoaderState* stp, GenOpArg Reg)
-{
-    return Reg.type == TAG_x && 2 <= Reg.val;
-}
-
-/*
  * Generate an instruction for element/2.
  */
 
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index b81056c774..dd04018ce6 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -1987,7 +1987,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp,
                 trace_send(p, portid, msg);
 
             if (have_seqtrace(SEQ_TRACE_TOKEN(p))) {
-                seq_trace_update_send(p);
+                seq_trace_update_serial(p);
                 seq_trace_output(SEQ_TRACE_TOKEN(p), msg,
                                  SEQ_TRACE_SEND, portid, p);
             }
@@ -4866,9 +4866,13 @@ BIF_RETTYPE phash_2(BIF_ALIST_2)
 BIF_RETTYPE phash2_1(BIF_ALIST_1)
 {
     Uint32 hash;
-
-    hash = make_hash2(BIF_ARG_1);
-    BIF_RET(make_small(hash & ((1L << 27) - 1)));
+    Eterm trap_state = THE_NON_VALUE;
+    hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P);
+    if (trap_state == THE_NON_VALUE) {
+        BIF_RET(make_small(hash & ((1L << 27) - 1)));
+    } else {
+        BIF_TRAP1(bif_export[BIF_phash2_1], BIF_P, trap_state);
+    }
 }
 
 BIF_RETTYPE phash2_2(BIF_ALIST_2)
@@ -4876,6 +4880,7 @@ BIF_RETTYPE phash2_2(BIF_ALIST_2)
     Uint32 hash;
     Uint32 final_hash;
     Uint32 range;
+    Eterm trap_state = THE_NON_VALUE;
 
     /* Check for special case 2^32 */
     if (term_equals_2pow32(BIF_ARG_2)) {
@@ -4887,7 +4892,10 @@ BIF_RETTYPE phash2_2(BIF_ALIST_2)
 	}
 	range = (Uint32) u;
     }
-    hash = make_hash2(BIF_ARG_1);
+    hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P);
+    if (trap_state != THE_NON_VALUE) {
+        BIF_TRAP2(bif_export[BIF_phash2_2], BIF_P, trap_state, BIF_ARG_2);
+    }
     if (range) {
 	final_hash = hash % range; /* [0..range-1] */
     } else {
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index ff19ef018e..1329d9d55f 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -1051,7 +1051,7 @@ erts_dsig_send_msg(ErtsDSigSendContext* ctx, Eterm remote, Eterm message)
 #endif
 
     if (have_seqtrace(SEQ_TRACE_TOKEN(sender))) {
-	seq_trace_update_send(sender);
+	seq_trace_update_serial(sender);
 	token = SEQ_TRACE_TOKEN(sender);
 	seq_trace_output(token, message, SEQ_TRACE_SEND, remote, sender);
     }
@@ -1125,7 +1125,7 @@ erts_dsig_send_reg_msg(ErtsDSigSendContext* ctx, Eterm remote_name, Eterm messag
 #endif
 
     if (have_seqtrace(SEQ_TRACE_TOKEN(sender))) {
-	seq_trace_update_send(sender);
+	seq_trace_update_serial(sender);
 	token = SEQ_TRACE_TOKEN(sender);
 	seq_trace_output(token, message, SEQ_TRACE_SEND, remote_name, sender);
     }
@@ -1184,7 +1184,7 @@ erts_dsig_send_exit_tt(ErtsDSigSendContext *ctx, Eterm local, Eterm remote,
         msg = reason;
 
     if (have_seqtrace(token)) {
-	seq_trace_update_send(ctx->c_p);
+	seq_trace_update_serial(ctx->c_p);
 	seq_trace_output_exit(token, reason, SEQ_TRACE_SEND, remote, local);
         if (ctx->dep->flags & DFLAG_EXIT_PAYLOAD) {
             ctl = TUPLE4(&ctx->ctl_heap[0],
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 92e5069c71..58d586453c 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -277,6 +277,7 @@ type	SETUP_CONN_ARG	SHORT_LIVED	PROCESSES	setup_connection_argument
 type    LIST_TRAP       SHORT_LIVED     PROCESSES       list_bif_trap_state
 type    CONT_EXIT_TRAP  SHORT_LIVED     PROCESSES       continue_exit_trap_state
 type    SEQ_YIELD_STATE SHORT_LIVED     SYSTEM          dist_seq_yield_state
+type    PHASH2_TRAP     SHORT_LIVED     PROCESSES       phash2_trap_state
 
 type	ENVIRONMENT	SYSTEM		SYSTEM		environment
 
diff --git a/erts/emulator/beam/erl_bif_trace.c b/erts/emulator/beam/erl_bif_trace.c
index 711e62c795..cf5339feb9 100644
--- a/erts/emulator/beam/erl_bif_trace.c
+++ b/erts/emulator/beam/erl_bif_trace.c
@@ -1858,6 +1858,8 @@ Eterm erts_seq_trace(Process *p, Eterm arg1, Eterm arg2,
 
     if (arg1 == am_send) {
 	current_flag = SEQ_TRACE_SEND;
+    } else if (arg1 == am_spawn) {
+	current_flag = SEQ_TRACE_SPAWN;
     } else if (arg1 == am_receive) {
 	current_flag = SEQ_TRACE_RECEIVE; 
     } else if (arg1 == am_print) {
@@ -1966,8 +1968,9 @@ BIF_RETTYPE erl_seq_trace_info(Process *p, Eterm item)
     }
 
     if (have_no_seqtrace(SEQ_TRACE_TOKEN(p))) {
-	if ((item == am_send)  || (item == am_receive) || 
-	    (item == am_print) || (item == am_timestamp)
+	if ((item == am_send) || (item == am_spawn) ||
+        (item == am_receive) || (item == am_print)
+        || (item == am_timestamp)
 	    || (item == am_monotonic_timestamp)
 	    || (item == am_strict_monotonic_timestamp)) {
 	    hp = HAlloc(p,3);
@@ -1982,6 +1985,8 @@ BIF_RETTYPE erl_seq_trace_info(Process *p, Eterm item)
 
     if (item == am_send) {
 	current_flag = SEQ_TRACE_SEND;
+    } else if (item == am_spawn) {
+	current_flag = SEQ_TRACE_SPAWN;
     } else if (item == am_receive) {
 	current_flag = SEQ_TRACE_RECEIVE; 
     } else if (item == am_print) {
@@ -2031,7 +2036,7 @@ BIF_RETTYPE seq_trace_print_1(BIF_ALIST_1)
     if (have_no_seqtrace(SEQ_TRACE_TOKEN(BIF_P))) {
 	BIF_RET(am_false);
     }
-    seq_trace_update_send(BIF_P);
+    seq_trace_update_serial(BIF_P);
     seq_trace_output(SEQ_TRACE_TOKEN(BIF_P), BIF_ARG_1, 
 		     SEQ_TRACE_PRINT, NIL, BIF_P);
     BIF_RET(am_true);
@@ -2055,7 +2060,7 @@ BIF_RETTYPE seq_trace_print_2(BIF_ALIST_2)
     }
     if (SEQ_TRACE_TOKEN_LABEL(BIF_P) != BIF_ARG_1)
 	BIF_RET(am_false);
-    seq_trace_update_send(BIF_P);
+    seq_trace_update_serial(BIF_P);
     seq_trace_output(SEQ_TRACE_TOKEN(BIF_P), BIF_ARG_2, 
 		     SEQ_TRACE_PRINT, NIL, BIF_P);
     BIF_RET(am_true);
diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c
index 6645341512..c27c3b5423 100644
--- a/erts/emulator/beam/erl_message.c
+++ b/erts/emulator/beam/erl_message.c
@@ -674,7 +674,7 @@ erts_send_message(Process* sender,
          * Make sure we don't use the heap between those instances.
          */
         if (have_seqtrace(stoken)) {
-	    seq_trace_update_send(sender);
+	    seq_trace_update_serial(sender);
 	    seq_trace_output(stoken, message, SEQ_TRACE_SEND,
 			     receiver->common.id, sender);
 
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index 1fbe362330..ce43cb9e71 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -815,7 +815,7 @@ int enif_send(ErlNifEnv* env, const ErlNifPid* to_pid,
             }
 #endif
             if (have_seqtrace(stoken)) {
-                seq_trace_update_send(c_p);
+                seq_trace_update_serial(c_p);
                 seq_trace_output(stoken, msg, SEQ_TRACE_SEND,
                                  rp->common.id, c_p);
             }
diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c
index f58a606d57..fb8a860b27 100644
--- a/erts/emulator/beam/erl_proc_sig_queue.c
+++ b/erts/emulator/beam/erl_proc_sig_queue.c
@@ -995,7 +995,7 @@ send_gen_exit_signal(Process *c_p, Eterm from_tag,
 
     seq_trace = c_p && have_seqtrace(token);
     if (seq_trace)
-        seq_trace_update_send(c_p);
+        seq_trace_update_serial(c_p);
 
 #ifdef USE_VM_PROBES
     utag_sz = 0;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 1f6adb98ef..70f48ddd97 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -11019,8 +11019,13 @@ erts_set_gc_state(Process *c_p, int enable)
     ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p));
 
     if (!enable) {
-	c_p->flags |= F_DISABLE_GC;
-	return 0;
+        /* Strictly speaking it's not illegal to disable the GC when it's
+         * already disabled, but we risk enabling the GC prematurely if (for
+         * example) a BIF were to blindly disable it when trapping and then
+         * re-enable it before returning its result. */
+        ASSERT(!(c_p->flags & F_DISABLE_GC));
+        c_p->flags |= F_DISABLE_GC;
+        return 0;
     }
 
     c_p->flags &= ~F_DISABLE_GC;
@@ -11610,9 +11615,6 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->mbuf_sz = 0;
     erts_atomic_init_nob(&p->psd, (erts_aint_t) NULL);
     p->dictionary = NULL;
-    p->seq_trace_lastcnt = 0;
-    p->seq_trace_clock = 0;
-    SEQ_TRACE_TOKEN(p) = NIL;
 #ifdef USE_VM_PROBES
     DT_UTAG(p) = NIL;
     DT_UTAG_FLAGS(p) = 0;
@@ -11633,6 +11635,45 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->fp_exception = 0;
 #endif
 
+    /* seq_trace is handled before regular tracing as the latter may touch the
+     * trace token. */
+    if (have_seqtrace(SEQ_TRACE_TOKEN(parent))) {
+        Eterm token;
+        Uint token_sz;
+        Eterm *hp;
+
+        ASSERT(SEQ_TRACE_TOKEN_ARITY(parent) == 5);
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_FLAGS(parent)));
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_SERIAL(parent)));
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_LASTCNT(parent)));
+
+        seq_trace_update_serial(parent);
+
+        token = SEQ_TRACE_TOKEN(parent);
+        token_sz = size_object(token);
+
+        hp = HAlloc(p, token_sz);
+        SEQ_TRACE_TOKEN(p) = copy_struct(token, token_sz, &hp, &MSO(p));
+
+        /* The counters behave the same way on spawning as they do on messages;
+         * we don't inherit our parent's lastcnt. */
+        p->seq_trace_lastcnt = parent->seq_trace_clock;
+        p->seq_trace_clock = parent->seq_trace_clock;
+
+        ASSERT((locks & (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) ==
+               (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE));
+
+        locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+        erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+        erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+
+        seq_trace_output(token, NIL, SEQ_TRACE_SPAWN, p->common.id, parent);
+    } else {
+        SEQ_TRACE_TOKEN(p) = NIL;
+        p->seq_trace_lastcnt = 0;
+        p->seq_trace_clock = 0;
+    }
+
     if (IS_TRACED(parent)) {
 	if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOS) {
 	    ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent) & TRACEE_FLAGS);
@@ -11654,9 +11695,14 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
 		}
         }
         if (ARE_TRACE_FLAGS_ON(parent, F_TRACE_PROCS)) {
-            locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
-            erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
-            erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+            /* The locks may already be released if seq_trace is enabled as
+             * well. */
+            if ((locks & (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE))
+                  == (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) {
+                locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+                erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+                erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+            }
             trace_proc_spawn(parent, am_spawn, p->common.id, mod, func, args);
             if (so->flags & SPO_LINK)
                 trace_proc(parent, locks, parent, am_link, p->common.id);
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index 6118c671ee..93a3215c17 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -1481,6 +1481,8 @@ extern int erts_system_profile_ts_type;
 #define SEQ_TRACE_SEND     (1 << 0)
 #define SEQ_TRACE_RECEIVE  (1 << 1)
 #define SEQ_TRACE_PRINT    (1 << 2)
+/* (This three-bit gap contains the timestamp.) */
+#define SEQ_TRACE_SPAWN    (1 << 6)
 
 #define ERTS_SEQ_TRACE_FLAGS_TS_TYPE_SHIFT 3
 
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index c85a7df5ec..6e462b64ef 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -826,7 +826,7 @@ trace_receive(Process* receiver,
 }
 
 int
-seq_trace_update_send(Process *p)
+seq_trace_update_serial(Process *p)
 {
     ErtsTracer seq_tracer = erts_get_system_seq_tracer();
     ASSERT((is_tuple(SEQ_TRACE_TOKEN(p)) || is_nil(SEQ_TRACE_TOKEN(p))));
@@ -894,6 +894,7 @@ seq_trace_output_generic(Eterm token, Eterm msg, Uint type,
 
     switch (type) {
     case SEQ_TRACE_SEND:    type_atom = am_send; break;
+    case SEQ_TRACE_SPAWN:   type_atom = am_spawn; break;
     case SEQ_TRACE_PRINT:   type_atom = am_print; break;
     case SEQ_TRACE_RECEIVE: type_atom = am_receive; break;
     default:
diff --git a/erts/emulator/beam/erl_trace.h b/erts/emulator/beam/erl_trace.h
index b7844d1cb0..bb5c9ac276 100644
--- a/erts/emulator/beam/erl_trace.h
+++ b/erts/emulator/beam/erl_trace.h
@@ -163,7 +163,9 @@ seq_trace_output_generic((token), (msg), (type), (receiver), NULL, (exitfrom))
 void seq_trace_output_generic(Eterm token, Eterm msg, Uint type, 
 			      Eterm receiver, Process *process, Eterm exitfrom);
 
-int seq_trace_update_send(Process *process);
+/* Bump the sequence number if tracing is enabled; must be used before sending
+ * send/spawn trace messages. */
+int seq_trace_update_serial(Process *process);
 
 Eterm erts_seq_trace(Process *process, 
 		     Eterm atom_type, Eterm atom_true_or_false, 
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index 430ac305c5..449243a9b7 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -70,6 +70,7 @@ int erts_fit_in_bits_uint(Uint);
 Sint erts_list_length(Eterm);
 int erts_is_builtin(Eterm, Eterm, int);
 Uint32 make_hash2(Eterm);
+Uint32 trapping_make_hash2(Eterm, Eterm*, struct process*);
 Uint32 make_hash(Eterm);
 Uint32 make_internal_hash(Eterm, Uint32 salt);
 
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 7cffe7fb5c..bc8c1189a8 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -683,10 +683,11 @@ swap(R1, R2) {
     $R2 = V;
 }
 
-swap_temp(R1, R2, Tmp) {
-    Eterm V = $R1;
-    $R1 = $R2;
-    $R2 = $Tmp = V;
+swap2(R1, R2, R3) {
+    Eterm V = $R2;
+    $R2 = $R1;
+    $R1 = $R3;
+    $R3 = V;
 }
 
 test_heap(Nh, Live) {
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 10ca74cd60..1beeb67c1f 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -324,76 +324,15 @@ move_src_window2 y x x
 move_src_window3 y x x x
 move_src_window4 y x x x x
 
-# Swap registers.
-move R1=xy Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp
-
-# The compiler uses x(1022) when swapping registers. It will definitely
-# not be used again.
-swap_temp R1 R2 Tmp=x==1022 => swap R1 R2
-
-swap_temp R1 R2 Tmp | move Src Tmp => swap R1 R2 | move Src Tmp
-
-swap_temp R1 R2 Tmp | line Loc | apply Live | is_killed_apply(Tmp, Live) => \
-  swap R1 R2 | line Loc | apply Live
-swap_temp R1 R2 Tmp | line Loc | apply_last Live D | is_killed_apply(Tmp, Live) => \
-  swap R1 R2 | line Loc | apply_last Live D
-
-swap_temp R1 R2 Tmp | line Loc | call_fun Live | is_killed_by_call_fun(Tmp, Live) => \
-  swap R1 R2 | line Loc | call_fun Live
-swap_temp R1 R2 Tmp | make_fun2 OldIndex=u | is_killed_by_make_fun(Tmp, OldIndex) => \
-  swap R1 R2 | make_fun2 OldIndex
-
-swap_temp R1 R2 Tmp | line Loc | call Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | line Loc | call Live Addr
-swap_temp R1 R2 Tmp | call_only Live Addr | \
-  is_killed(Tmp, Live) => swap R1 R2 | call_only Live Addr
-swap_temp R1 R2 Tmp | call_last Live Addr D | \
-  is_killed(Tmp, Live) => swap R1 R2 | call_last Live Addr D
-
-swap_temp R1 R2 Tmp | line Loc | call_ext Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | line Loc | call_ext Live Addr
-swap_temp R1 R2 Tmp | line Loc | call_ext_only Live Addr | \
-  is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | line Loc | call_ext_last Live Addr D | \
-  is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_last Live Addr D
-
-swap_temp R1 R2 Tmp | call_ext Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext Live Addr
-swap_temp R1 R2 Tmp | call_ext_only Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | call_ext_last Live Addr D | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext_last Live Addr D
-
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-     swap R1 R2 | move Src Any | line Loc | call Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-     swap R1 R2 | move Src Any | line Loc | call_ext Live Addr
-swap_temp R1 R2 Tmp | move Src Any | call_only Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | call_only Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext_only Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | line Loc | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_fun Live | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | line Loc | call_fun Live
-
-swap_temp R1 R2 Tmp | line Loc | send | is_killed_by_send(Tmp) => \
-  swap R1 R2 | line Loc | send
-
-# swap_temp/3 with Y register operands are rare.
-swap_temp R1 R2=y Tmp => swap R1 R2 | move R2 Tmp
-swap_temp R1=y R2 Tmp => swap R1 R2 | move R2 Tmp
-
 swap R1=x R2=y => swap R2 R1
 
-swap_temp x x x
-
 swap xy x
 swap y y
 
+swap R1=x R2=x | swap R3=x R1 => swap2 R1 R2 R3
+
+swap2 x x x
+
 # move_shift
 
 move SD=x    D=x | move Src=cxy SD=x  | distinct(D, Src) => move_shift Src SD D
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index c261c8e117..acc321aa51 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -92,6 +92,12 @@
 #  define ERTS_GLB_INLINE_INCL_FUNC_DEF 0
 #endif
 
+#ifdef __GNUC__
+#  define ERTS_NOINLINE __attribute__((__noinline__))
+#else
+#  define ERTS_NOINLINE
+#endif
+
 #if defined(VALGRIND) && !defined(NO_FPE_SIGNALS)
 #  define NO_FPE_SIGNALS
 #endif
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 0bbae65e28..88cdcc2675 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -1069,54 +1069,237 @@ do {                               \
 
 #define HCONST 0x9e3779b9UL /* the golden ratio; an arbitrary value */
 
-static Uint32
-block_hash(byte *k, Uint length, Uint32 initval)
+typedef struct {
+    Uint32 a,b,c;
+} ErtsBlockHashHelperCtx;
+
+#define BLOCK_HASH_BYTES_PER_ITER 12
+
+/* The three functions below are separated into different functions even
+   though they are always used together to make trapping and handling
+   of unaligned binaries easier. Examples of how they are used can be
+   found in block_hash and make_hash2_helper.*/
+static ERTS_INLINE
+void block_hash_setup(Uint32 initval,
+                      ErtsBlockHashHelperCtx* ctx /* out parameter */)
+{
+    ctx->a = ctx->b = HCONST;
+    ctx->c = initval;           /* the previous hash value */
+}
+
+static ERTS_INLINE
+void block_hash_buffer(byte *buf,
+                       Uint buf_length,
+                       ErtsBlockHashHelperCtx* ctx /* out parameter */)
 {
-   Uint32 a,b,c;
-   Uint len;
-
-   /* Set up the internal state */
-   len = length;
-   a = b = HCONST;
-   c = initval;           /* the previous hash value */
-
-   while (len >= 12)
-   {
-      a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));
-      b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));
-      c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));
-      MIX(a,b,c);
-      k += 12; len -= 12;
-   }
-
-   c += length;
-   switch(len)              /* all the case statements fall through */
-   {
-   case 11: c+=((Uint32)k[10]<<24);
-   case 10: c+=((Uint32)k[9]<<16);
-   case 9 : c+=((Uint32)k[8]<<8);
-      /* the first byte of c is reserved for the length */
-   case 8 : b+=((Uint32)k[7]<<24);
-   case 7 : b+=((Uint32)k[6]<<16);
-   case 6 : b+=((Uint32)k[5]<<8);
-   case 5 : b+=k[4];
-   case 4 : a+=((Uint32)k[3]<<24);
-   case 3 : a+=((Uint32)k[2]<<16);
-   case 2 : a+=((Uint32)k[1]<<8);
-   case 1 : a+=k[0];
-     /* case 0: nothing left to add */
-   }
-   MIX(a,b,c);
-   return c;
+    Uint len = buf_length;
+    byte *k = buf;
+    ASSERT(buf_length % BLOCK_HASH_BYTES_PER_ITER == 0);
+    while (len >= BLOCK_HASH_BYTES_PER_ITER) {
+        ctx->a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));
+        ctx->b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));
+        ctx->c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));
+        MIX(ctx->a,ctx->b,ctx->c);
+        k += BLOCK_HASH_BYTES_PER_ITER; len -= BLOCK_HASH_BYTES_PER_ITER;
+    }
 }
 
+static ERTS_INLINE
+Uint32 block_hash_final_bytes(byte *buf,
+                              Uint buf_length,
+                              Uint full_length,
+                              ErtsBlockHashHelperCtx* ctx)
+{
+    Uint len = buf_length;
+    byte *k = buf;
+    ctx->c += full_length;
+    switch(len)
+    { /* all the case statements fall through */      
+    case 11: ctx->c+=((Uint32)k[10]<<24);
+    case 10: ctx->c+=((Uint32)k[9]<<16);
+    case 9 : ctx->c+=((Uint32)k[8]<<8);
+    /* the first byte of c is reserved for the length */
+    case 8 : ctx->b+=((Uint32)k[7]<<24);
+    case 7 : ctx->b+=((Uint32)k[6]<<16);
+    case 6 : ctx->b+=((Uint32)k[5]<<8);
+    case 5 : ctx->b+=k[4];
+    case 4 : ctx->a+=((Uint32)k[3]<<24);
+    case 3 : ctx->a+=((Uint32)k[2]<<16);
+    case 2 : ctx->a+=((Uint32)k[1]<<8);
+    case 1 : ctx->a+=k[0];
+    /* case 0: nothing left to add */
+    }
+    MIX(ctx->a,ctx->b,ctx->c);
+    return ctx->c;
+}
+
+static
 Uint32
-make_hash2(Eterm term)
+block_hash(byte *block, Uint block_length, Uint32 initval)
 {
+    ErtsBlockHashHelperCtx ctx;
+    Uint no_bytes_not_in_loop =
+        (block_length % BLOCK_HASH_BYTES_PER_ITER);
+    Uint no_bytes_to_process_in_loop =
+        block_length - no_bytes_not_in_loop;
+    byte *final_bytes = block + no_bytes_to_process_in_loop;
+    block_hash_setup(initval, &ctx);
+    block_hash_buffer(block,
+                      no_bytes_to_process_in_loop,
+                      &ctx);
+    return block_hash_final_bytes(final_bytes,
+                                  no_bytes_not_in_loop,
+                                  block_length,
+                                  &ctx);
+}
+
+typedef enum {
+    tag_primary_list,
+    arityval_subtag,
+    hamt_subtag_head_flatmap,
+    map_subtag,
+    fun_subtag,
+    neg_big_subtag,
+    sub_binary_subtag_1,
+    sub_binary_subtag_2,
+    hash2_common_1,
+    hash2_common_2,
+    hash2_common_3,
+} ErtsMakeHash2TrapLocation; 
+
+typedef struct {
+    int c;
+    Uint32 sh;
+    Eterm* ptr;
+} ErtsMakeHash2Context_TAG_PRIMARY_LIST;
+
+typedef struct {
+    int i;
+    int arity;
+    Eterm* elem;
+} ErtsMakeHash2Context_ARITYVAL_SUBTAG;
+
+typedef struct {
+    Eterm *ks;
+    Eterm *vs;
+    int i;
+    Uint size;
+} ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP;
+
+typedef struct {
+    Eterm* ptr;
+    int i;
+} ErtsMakeHash2Context_MAP_SUBTAG;
+
+typedef struct {
+    Uint num_free;
+    Eterm* bptr;
+} ErtsMakeHash2Context_FUN_SUBTAG;
+
+typedef struct {
+    Eterm* ptr;
+    Uint i;
+    Uint n;
+    Uint32 con;
+} ErtsMakeHash2Context_NEG_BIG_SUBTAG;
+
+typedef struct {
+    byte* bptr;
+    Uint sz;
+    Uint bitsize;
+    Uint bitoffs;
+    Uint no_bytes_processed;
+    ErtsBlockHashHelperCtx block_hash_ctx;
+    /* The following fields are only used when bitoffs != 0 */
+    byte* buf;
+    int done;
+
+} ErtsMakeHash2Context_SUB_BINARY_SUBTAG;
+
+typedef struct {
+    int dummy__; /* Empty structs are not supported on all platforms */
+} ErtsMakeHash2Context_EMPTY;
+
+typedef struct {
+    ErtsMakeHash2TrapLocation trap_location;
+    /* specific to the trap location: */
+    union {
+        ErtsMakeHash2Context_TAG_PRIMARY_LIST tag_primary_list;
+        ErtsMakeHash2Context_ARITYVAL_SUBTAG arityval_subtag;
+        ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP hamt_subtag_head_flatmap;
+        ErtsMakeHash2Context_MAP_SUBTAG map_subtag;
+        ErtsMakeHash2Context_FUN_SUBTAG fun_subtag;
+        ErtsMakeHash2Context_NEG_BIG_SUBTAG neg_big_subtag;
+        ErtsMakeHash2Context_SUB_BINARY_SUBTAG sub_binary_subtag_1;
+        ErtsMakeHash2Context_SUB_BINARY_SUBTAG sub_binary_subtag_2;
+        ErtsMakeHash2Context_EMPTY hash2_common_1;
+        ErtsMakeHash2Context_EMPTY hash2_common_2;
+        ErtsMakeHash2Context_EMPTY hash2_common_3;
+    } trap_location_state;
+    /* same for all trap locations: */
+    Eterm term; 
     Uint32 hash;
     Uint32 hash_xor_pairs;
-    DeclareTmpHeapNoproc(tmp_big,2);
+    ErtsEStack stack;
+} ErtsMakeHash2Context;
+
+static int make_hash2_ctx_bin_dtor(Binary *context_bin) {
+    ErtsMakeHash2Context* context = ERTS_MAGIC_BIN_DATA(context_bin);
+    DESTROY_SAVED_ESTACK(&context->stack);
+    if (context->trap_location == sub_binary_subtag_2 &&
+        context->trap_location_state.sub_binary_subtag_2.buf != NULL) {
+        erts_free(ERTS_ALC_T_PHASH2_TRAP, context->trap_location_state.sub_binary_subtag_2.buf);
+    }
+    return 1;
+}
 
+/* hash2_save_trap_state is called seldom so we want to avoid inlining */
+static ERTS_NOINLINE
+Eterm hash2_save_trap_state(Eterm state_mref,
+                            Uint32 hash_xor_pairs,
+                            Uint32 hash,
+                            Process* p,
+                            Eterm term,
+                            Eterm* ESTK_DEF_STACK(s),
+                            ErtsEStack s,
+                            ErtsMakeHash2TrapLocation trap_location,
+                            void* trap_location_state_ptr,
+                            size_t trap_location_state_size) {
+    Binary* state_bin;
+    ErtsMakeHash2Context* context;
+    if (state_mref == THE_NON_VALUE) {
+        Eterm* hp;
+        state_bin = erts_create_magic_binary(sizeof(ErtsMakeHash2Context),
+                                             make_hash2_ctx_bin_dtor);
+        hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+        state_mref = erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+    } else {
+        state_bin = erts_magic_ref2bin(state_mref);
+    }
+    context = ERTS_MAGIC_BIN_DATA(state_bin);
+    context->term = term;
+    context->hash = hash;
+    context->hash_xor_pairs = hash_xor_pairs;
+    ESTACK_SAVE(s, &context->stack);
+    context->trap_location = trap_location;
+    sys_memcpy(&context->trap_location_state,
+               trap_location_state_ptr,
+               trap_location_state_size);
+    erts_set_gc_state(p, 0);
+    BUMP_ALL_REDS(p);
+    return state_mref;
+}
+#undef NOINLINE_HASH2_SAVE_TRAP_STATE
+
+/* Writes back a magic reference to *state_mref_write_back when the
+   function traps */
+static ERTS_INLINE Uint32
+make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_back, Process* p)
+{
+    static const Uint ITERATIONS_PER_RED = 64;
+    Uint32 hash;
+    Uint32 hash_xor_pairs;
+    Eterm term = term_param;
     ERTS_UNDEF(hash_xor_pairs, 0);
 
 /* (HCONST * {2, ..., 22}) mod 2^32 */
@@ -1168,12 +1351,63 @@ make_hash2(Eterm term)
 
 #define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2)
 
+#define NOT_SSMALL28_HASH(SMALL)                          \
+    do {                                                  \
+        Uint64 t;                                         \
+        Uint32 x, y;                                      \
+        Uint32 con;                                       \
+        if (SMALL < 0) {                                  \
+            con = HCONST_10;                              \
+            t = (Uint64)(SMALL * (-1));                   \
+        } else {                                          \
+            con = HCONST_11;                              \
+            t = SMALL;                                    \
+        }                                                 \
+        x = t & 0xffffffff;                               \
+        y = t >> 32;                                      \
+        UINT32_HASH_2(x, y, con);                         \
+    } while(0)
+    
 #ifdef ARCH_64
 #  define POINTER_HASH(Ptr, AConst) UINT32_HASH_2((Uint32)(UWord)(Ptr), (((UWord)(Ptr)) >> 32), AConst)
 #else
 #  define POINTER_HASH(Ptr, AConst) UINT32_HASH(Ptr, AConst)
 #endif
 
+#define TRAP_LOCATION_NO_RED(location_name)                             \
+    do {                                                                \
+        if(can_trap && iterations_until_trap <= 0) {                    \
+                *state_mref_write_back  =                               \
+                    hash2_save_trap_state(state_mref,                   \
+                                          hash_xor_pairs,               \
+                                          hash,                         \
+                                          p,                            \
+                                          term,                         \
+                                          ESTK_DEF_STACK(s),            \
+                                          s,                            \
+                                          location_name,                \
+                                          &ctx,                         \
+                                          sizeof(ctx));                 \
+                return 0;                                               \
+            L_##location_name:                                          \
+                ctx = context->trap_location_state. location_name;      \
+        }                                                               \
+    } while(0)
+
+#define TRAP_LOCATION(location_name)                            \
+    do {                                                        \
+        if (can_trap) {                                         \
+            iterations_until_trap--;                            \
+            TRAP_LOCATION_NO_RED(location_name);                \
+        }                                                       \
+    } while(0)
+
+#define TRAP_LOCATION_NO_CTX(location_name)                             \
+    do {                                                                \
+        ErtsMakeHash2Context_EMPTY ctx;                                 \
+        TRAP_LOCATION(location_name);                                   \
+    } while(0)
+    
     /* Optimization. Simple cases before declaration of estack. */
     if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
 	switch (term & _TAG_IMMED1_MASK) {
@@ -1186,51 +1420,94 @@ make_hash2(Eterm term)
 	    break;
 	case _TAG_IMMED1_SMALL:
 	  {
-	      Sint x = signed_val(term);
-
-	      if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
-		  term = small_to_big(x, tmp_big);
-		  break;
+	      Sint small = signed_val(term);
+	      if (SMALL_BITS > 28 && !IS_SSMALL28(small)) {
+                  hash = 0;
+                  NOT_SSMALL28_HASH(small);
+                  return hash;
 	      }
 	      hash = 0;
-	      SINT32_HASH(x, HCONST);
+	      SINT32_HASH(small, HCONST);
 	      return hash;
 	  }
 	}
     };
     {
     Eterm tmp;
+    long max_iterations = 0;
+    long iterations_until_trap = 0;
+    Eterm state_mref = THE_NON_VALUE;
+    ErtsMakeHash2Context* context = NULL;
     DECLARE_ESTACK(s);
-
-    UseTmpHeapNoproc(2);
+    ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK);
+    if(can_trap){
+#ifdef DEBUG
+        (void)ITERATIONS_PER_RED;
+        iterations_until_trap = max_iterations =
+            (1103515245 * (ERTS_BIF_REDS_LEFT(p)) + 12345)  % 227;
+#else
+        iterations_until_trap = max_iterations =
+            ITERATIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+#endif
+    }
+    if (can_trap && is_internal_magic_ref(term)) {
+        Binary* state_bin;
+        state_mref = term;
+        state_bin = erts_magic_ref2bin(state_mref);
+        if (ERTS_MAGIC_BIN_DESTRUCTOR(state_bin) == make_hash2_ctx_bin_dtor) {
+            /* Restore state after a trap */
+            context = ERTS_MAGIC_BIN_DATA(state_bin);
+            term = context->term;
+            hash = context->hash;
+            hash_xor_pairs = context->hash_xor_pairs;
+            ESTACK_RESTORE(s, &context->stack);
+            ASSERT(p->flags & F_DISABLE_GC);
+            erts_set_gc_state(p, 1);
+            switch (context->trap_location) {
+            case hash2_common_3:           goto L_hash2_common_3;
+            case tag_primary_list:         goto L_tag_primary_list;
+            case arityval_subtag:          goto L_arityval_subtag;
+            case hamt_subtag_head_flatmap: goto L_hamt_subtag_head_flatmap;
+            case map_subtag:               goto L_map_subtag;
+            case fun_subtag:               goto L_fun_subtag;
+            case neg_big_subtag:           goto L_neg_big_subtag;
+            case sub_binary_subtag_1:      goto L_sub_binary_subtag_1;
+            case sub_binary_subtag_2:      goto L_sub_binary_subtag_2;
+            case hash2_common_1:           goto L_hash2_common_1;
+            case hash2_common_2:           goto L_hash2_common_2;
+            }
+        }
+    }
     hash = 0;
     for (;;) {
 	switch (primary_tag(term)) {
 	case TAG_PRIMARY_LIST:
 	{
-	    int c = 0;
-	    Uint32 sh = 0;
-	    Eterm* ptr = list_val(term);
-	    while (is_byte(*ptr)) {
+            ErtsMakeHash2Context_TAG_PRIMARY_LIST ctx = {
+                .c =  0,
+                .sh = 0,
+                .ptr = list_val(term)};
+	    while (is_byte(*ctx.ptr)) {
 		/* Optimization for strings. */
-		sh = (sh << 8) + unsigned_val(*ptr);
-		if (c == 3) {
-		    UINT32_HASH(sh, HCONST_4);
-		    c = sh = 0;
+		ctx.sh = (ctx.sh << 8) + unsigned_val(*ctx.ptr);
+		if (ctx.c == 3) {
+		    UINT32_HASH(ctx.sh, HCONST_4);
+		    ctx.c = ctx.sh = 0;
 		} else {
-		    c++;
+		    ctx.c++;
 		}
-		term = CDR(ptr);
+		term = CDR(ctx.ptr);
 		if (is_not_list(term))
 		    break;
-		ptr = list_val(term);
+		ctx.ptr = list_val(term);
+                TRAP_LOCATION(tag_primary_list);
 	    }
-	    if (c > 0)
-		UINT32_HASH(sh, HCONST_4);
+	    if (ctx.c > 0)
+		UINT32_HASH(ctx.sh, HCONST_4);
 	    if (is_list(term)) {
-		tmp = CDR(ptr);
+		tmp = CDR(ctx.ptr);
                 ESTACK_PUSH(s, tmp);
-		term = CAR(ptr);
+		term = CAR(ctx.ptr);
 	    }
 	}
 	break;
@@ -1241,34 +1518,39 @@ make_hash2(Eterm term)
 	    switch (hdr & _TAG_HEADER_MASK) {
 	    case ARITYVAL_SUBTAG:
 	    {
-		int i;
-		int arity = header_arity(hdr);
-		Eterm* elem = tuple_val(term);
-		UINT32_HASH(arity, HCONST_9);
-		if (arity == 0) /* Empty tuple */
+                ErtsMakeHash2Context_ARITYVAL_SUBTAG ctx = {
+                    .i =  0,
+                    .arity = header_arity(hdr),
+                    .elem = tuple_val(term)};
+		UINT32_HASH(ctx.arity, HCONST_9);
+		if (ctx.arity == 0) /* Empty tuple */
 		    goto hash2_common;
-		for (i = arity; ; i--) {
-		    term = elem[i];
-                    if (i == 1)
+		for (ctx.i = ctx.arity; ; ctx.i--) {
+		    term = ctx.elem[ctx.i];
+                    if (ctx.i == 1)
                         break;
                     ESTACK_PUSH(s, term);
+                    TRAP_LOCATION(arityval_subtag);
 		}
 	    }
 	    break;
             case MAP_SUBTAG:
             {
-                Eterm* ptr = boxed_val(term) + 1;
                 Uint size;
-                int i;
+                ErtsMakeHash2Context_MAP_SUBTAG ctx = {
+                    .ptr = boxed_val(term) + 1,
+                    .i = 0};
                 switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
                 case HAMT_SUBTAG_HEAD_FLATMAP:
                 {
                     flatmap_t *mp = (flatmap_t *)flatmap_val(term);
-                    Eterm *ks = flatmap_get_keys(mp);
-                    Eterm *vs = flatmap_get_values(mp);
-                    size      = flatmap_get_size(mp);
-                    UINT32_HASH(size, HCONST_16);
-                    if (size == 0)
+                    ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP ctx = {
+                        .ks = flatmap_get_keys(mp),
+                        .vs = flatmap_get_values(mp),
+                        .i = 0,
+                        .size = flatmap_get_size(mp)};
+                    UINT32_HASH(ctx.size, HCONST_16);
+                    if (ctx.size == 0)
                         goto hash2_common;
 
                     /* We want a portable hash function that is *independent* of
@@ -1281,17 +1563,18 @@ make_hash2(Eterm term)
                     ESTACK_PUSH(s, HASH_MAP_TAIL);
                     hash = 0;
                     hash_xor_pairs = 0;
-                    for (i = size - 1; i >= 0; i--) {
+                    for (ctx.i = ctx.size - 1; ctx.i >= 0; ctx.i--) {
                         ESTACK_PUSH(s, HASH_MAP_PAIR);
-                        ESTACK_PUSH(s, vs[i]);
-                        ESTACK_PUSH(s, ks[i]);
+                        ESTACK_PUSH(s, ctx.vs[ctx.i]);
+                        ESTACK_PUSH(s, ctx.ks[ctx.i]);
+                        TRAP_LOCATION(hamt_subtag_head_flatmap);
                     }
                     goto hash2_common;
                 }
 
                 case HAMT_SUBTAG_HEAD_ARRAY:
                 case HAMT_SUBTAG_HEAD_BITMAP:
-                    size = *ptr++;
+                    size = *ctx.ptr++;
                     UINT32_HASH(size, HCONST_16);
                     if (size == 0)
                         goto hash2_common;
@@ -1303,27 +1586,28 @@ make_hash2(Eterm term)
                 }
                 switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
                 case HAMT_SUBTAG_HEAD_ARRAY:
-                    i = 16;
+                    ctx.i = 16;
                     break;
                 case HAMT_SUBTAG_HEAD_BITMAP:
                 case HAMT_SUBTAG_NODE_BITMAP:
-                    i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
+                    ctx.i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
                     break;
                 default:
                     erts_exit(ERTS_ERROR_EXIT, "bad header");
                 }
-                while (i) {
-                    if (is_list(*ptr)) {
-                        Eterm* cons = list_val(*ptr);
+                while (ctx.i) {
+                    if (is_list(*ctx.ptr)) {
+                        Eterm* cons = list_val(*ctx.ptr);
                         ESTACK_PUSH(s, HASH_MAP_PAIR);
                         ESTACK_PUSH(s, CDR(cons));
                         ESTACK_PUSH(s, CAR(cons));
                     }
                     else {
-                        ASSERT(is_boxed(*ptr));
-                        ESTACK_PUSH(s, *ptr);
+                        ASSERT(is_boxed(*ctx.ptr));
+                        ESTACK_PUSH(s, *ctx.ptr);
                     }
-                    i--; ptr++;
+                    ctx.i--; ctx.ptr++;
+                    TRAP_LOCATION(map_subtag);
                 }
                 goto hash2_common;
             }
@@ -1344,22 +1628,25 @@ make_hash2(Eterm term)
 	    case FUN_SUBTAG:
 	    {
 		ErlFunThing* funp = (ErlFunThing *) fun_val(term);
-		Uint num_free = funp->num_free;
+                ErtsMakeHash2Context_FUN_SUBTAG ctx = {
+                    .num_free = funp->num_free,
+                    .bptr = NULL};
 		UINT32_HASH_2
-		    (num_free,
+		    (ctx.num_free,
 		     atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue,
 		     HCONST);
 		UINT32_HASH_2
 		    (funp->fe->old_index, funp->fe->old_uniq, HCONST);
-		if (num_free == 0) {
+		if (ctx.num_free == 0) {
 		    goto hash2_common;
 		} else {
-		    Eterm* bptr = funp->env + num_free - 1;
-		    while (num_free-- > 1) {
-			term = *bptr--;
+		    ctx.bptr = funp->env + ctx.num_free - 1;
+		    while (ctx.num_free-- > 1) {
+			term = *ctx.bptr--;
 			ESTACK_PUSH(s, term);
+                        TRAP_LOCATION(fun_subtag);
 		    }
-		    term = *bptr;
+		    term = *ctx.bptr;
 		}
 	    }
 	    break;
@@ -1367,70 +1654,190 @@ make_hash2(Eterm term)
 	    case HEAP_BINARY_SUBTAG:
 	    case SUB_BINARY_SUBTAG:
 	    {
-		byte* bptr;
-		unsigned sz = binary_size(term);
+#define BYTE_BITS 8
+                ErtsMakeHash2Context_SUB_BINARY_SUBTAG ctx = {
+                    .bptr = 0,
+                    /* !!!!!!!!!!!!!!!!!!!! OBS !!!!!!!!!!!!!!!!!!!!
+                     *
+                     * The size is truncated to 32 bits on the line
+                     * below so that the code is compatible with old
+                     * versions of the code. This means that hash
+                     * values for binaries with a size greater than
+                     * 4GB do not take all bytes in consideration.
+                     *
+                     * !!!!!!!!!!!!!!!!!!!! OBS !!!!!!!!!!!!!!!!!!!!
+                     */ 
+                    .sz = (0xFFFFFFFF & binary_size(term)),
+                    .bitsize = 0,
+                    .bitoffs = 0,
+                    .no_bytes_processed = 0
+                };
 		Uint32 con = HCONST_13 + hash;
-		Uint bitoffs;
-		Uint bitsize;
-
-		ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize);
-		if (sz == 0 && bitsize == 0) {
+                Uint iters_for_bin = MAX(1, ctx.sz / BLOCK_HASH_BYTES_PER_ITER);
+		ERTS_GET_BINARY_BYTES(term, ctx.bptr, ctx.bitoffs, ctx.bitsize);
+		if (ctx.sz == 0 && ctx.bitsize == 0) {
 		    hash = con;
-		} else {
-		    if (bitoffs == 0) {
-			hash = block_hash(bptr, sz, con);
-			if (bitsize > 0) {
-			    UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)),
-					  HCONST_15);
-			}
-		    } else {
-			byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP,
-							sz + (bitsize != 0));
-			erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize);
-			hash = block_hash(buf, sz, con);
-			if (bitsize > 0) {
-			    UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)),
-					  HCONST_15);
-			}
-			erts_free(ERTS_ALC_T_TMP, (void *) buf);
-		    }
+		} else if (ctx.bitoffs == 0 &&
+                           (!can_trap ||
+                            (iterations_until_trap - iters_for_bin) > 0)) {
+                    /* No need to trap while hashing binary */
+                    if (can_trap) iterations_until_trap -= iters_for_bin;
+                    hash = block_hash(ctx.bptr, ctx.sz, con);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.bptr[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                } else if (ctx.bitoffs == 0) {
+                    /* Need to trap while hashing binary */
+                    ErtsBlockHashHelperCtx* block_hash_ctx = &ctx.block_hash_ctx;
+                    block_hash_setup(con, block_hash_ctx);
+                    do {
+                        Uint max_bytes_to_process =
+                            iterations_until_trap <= 0 ? BLOCK_HASH_BYTES_PER_ITER :
+                            iterations_until_trap * BLOCK_HASH_BYTES_PER_ITER;
+                        Uint bytes_left = ctx.sz - ctx.no_bytes_processed;
+                        Uint even_bytes_left =
+                            bytes_left - (bytes_left % BLOCK_HASH_BYTES_PER_ITER);
+                        Uint bytes_to_process =
+                            MIN(max_bytes_to_process, even_bytes_left);
+                        block_hash_buffer(&ctx.bptr[ctx.no_bytes_processed],
+                                          bytes_to_process,
+                                          block_hash_ctx);
+                        ctx.no_bytes_processed += bytes_to_process;
+                        iterations_until_trap -=
+                            MAX(1, bytes_to_process / BLOCK_HASH_BYTES_PER_ITER);
+                        TRAP_LOCATION_NO_RED(sub_binary_subtag_1);
+                        block_hash_ctx = &ctx.block_hash_ctx; /* Restore after trap */
+                    } while ((ctx.sz - ctx.no_bytes_processed) >=
+                             BLOCK_HASH_BYTES_PER_ITER);
+                    hash = block_hash_final_bytes(ctx.bptr +
+                                                  ctx.no_bytes_processed,
+                                                  ctx.sz - ctx.no_bytes_processed,
+                                                  ctx.sz,
+                                                  block_hash_ctx);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.bptr[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                } else if (/* ctx.bitoffs != 0 && */
+                           (!can_trap ||
+                            (iterations_until_trap - iters_for_bin) > 0)) {
+                    /* No need to trap while hashing binary */
+                    Uint nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    byte *buf = erts_alloc(ERTS_ALC_T_TMP, nr_of_bytes);
+                    Uint nr_of_bits_to_copy = ctx.sz*BYTE_BITS+ctx.bitsize;
+                    if (can_trap) iterations_until_trap -= iters_for_bin;
+                    erts_copy_bits(ctx.bptr,
+                                   ctx.bitoffs, 1, buf, 0, 1, nr_of_bits_to_copy);
+                    hash = block_hash(buf, ctx.sz, con);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (buf[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                    erts_free(ERTS_ALC_T_TMP, buf);
+                } else /* ctx.bitoffs != 0 && */ {
+#ifdef DEBUG
+#define BINARY_BUF_SIZE (BLOCK_HASH_BYTES_PER_ITER * 3)
+#else
+#define BINARY_BUF_SIZE (BLOCK_HASH_BYTES_PER_ITER * 256)
+#endif
+#define BINARY_BUF_SIZE_BITS (BINARY_BUF_SIZE*BYTE_BITS)
+                    /* Need to trap while hashing binary */
+                    ErtsBlockHashHelperCtx* block_hash_ctx = &ctx.block_hash_ctx;
+                    Uint nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    ERTS_CT_ASSERT(BINARY_BUF_SIZE % BLOCK_HASH_BYTES_PER_ITER == 0);
+                    ctx.buf = erts_alloc(ERTS_ALC_T_PHASH2_TRAP,
+                                         MIN(nr_of_bytes, BINARY_BUF_SIZE));
+                    block_hash_setup(con, block_hash_ctx);
+                    do {
+                        Uint bytes_left =
+                            ctx.sz - ctx.no_bytes_processed;
+                        Uint even_bytes_left =
+                            bytes_left - (bytes_left % BLOCK_HASH_BYTES_PER_ITER);
+                        Uint bytes_to_process =
+                            MIN(BINARY_BUF_SIZE, even_bytes_left);
+                        Uint nr_of_bits_left =
+                            (ctx.sz*BYTE_BITS+ctx.bitsize) -
+                            ctx.no_bytes_processed*BYTE_BITS; 
+                        Uint nr_of_bits_to_copy =
+                            MIN(nr_of_bits_left, BINARY_BUF_SIZE_BITS);
+                        ctx.done = nr_of_bits_left == nr_of_bits_to_copy;
+                        erts_copy_bits(ctx.bptr + ctx.no_bytes_processed,
+                                       ctx.bitoffs, 1, ctx.buf, 0, 1,
+                                       nr_of_bits_to_copy);
+                        block_hash_buffer(ctx.buf,
+                                          bytes_to_process,
+                                          block_hash_ctx);
+                        ctx.no_bytes_processed += bytes_to_process;
+                        iterations_until_trap -=
+                            MAX(1, bytes_to_process / BLOCK_HASH_BYTES_PER_ITER);
+                        TRAP_LOCATION_NO_RED(sub_binary_subtag_2);
+                        block_hash_ctx = &ctx.block_hash_ctx; /* Restore after trap */
+                    } while (!ctx.done);
+                    nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    hash = block_hash_final_bytes(ctx.buf +
+                                                  (ctx.no_bytes_processed -
+                                                   ((nr_of_bytes-1) / BINARY_BUF_SIZE) *  BINARY_BUF_SIZE),
+                                                  ctx.sz - ctx.no_bytes_processed,
+                                                  ctx.sz,
+                                                  block_hash_ctx);
+                    if (ctx.bitsize > 0) {
+                        Uint last_byte_index =
+                            nr_of_bytes - (((nr_of_bytes-1) / BINARY_BUF_SIZE) *  BINARY_BUF_SIZE) -1;
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.buf[last_byte_index] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                    erts_free(ERTS_ALC_T_PHASH2_TRAP, ctx.buf);
+                    context->trap_location_state.sub_binary_subtag_2.buf = NULL;
 		}
 		goto hash2_common;
+#undef BYTE_BITS
+#undef BINARY_BUF_SIZE
+#undef BINARY_BUF_SIZE_BITS
 	    }
 	    break;
 	    case POS_BIG_SUBTAG:
 	    case NEG_BIG_SUBTAG:
 	    {
-		Eterm* ptr = big_val(term);
-		Uint i = 0;
-		Uint n = BIG_SIZE(ptr);
-		Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11;
+		Eterm* big_val_ptr = big_val(term);
+                ErtsMakeHash2Context_NEG_BIG_SUBTAG ctx = {
+                    .ptr = big_val_ptr,
+                    .i = 0,
+                    .n = BIG_SIZE(big_val_ptr),
+                    .con = BIG_SIGN(big_val_ptr) ? HCONST_10 : HCONST_11};
 #if D_EXP == 16
 		do {
 		    Uint32 x, y;
-		    x = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
-		    y = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    x = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    x += (Uint32)(ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0) << 16;
+		    y = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    y += (Uint32)(ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0) << 16;
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #elif D_EXP == 32
 		do {
 		    Uint32 x, y;
-		    x = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    y = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    x = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    y = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #elif D_EXP == 64
 		do {
 		    Uint t;
 		    Uint32 x, y;
-                    ASSERT(i < n);
-		    t = BIG_DIGIT(ptr, i++);
+                    ASSERT(ctx.i < ctx.n);
+		    t = BIG_DIGIT(ctx.ptr, ctx.i++);
 		    x = t & 0xffffffff;
 		    y = t >> 32;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #else
 #error "unsupported D_EXP size"
 #endif
@@ -1508,13 +1915,13 @@ make_hash2(Eterm term)
 		}
 	    case _TAG_IMMED1_SMALL:
 	      {
-		  Sint x = signed_val(term);
+		  Sint small = signed_val(term);
+		  if (SMALL_BITS > 28 && !IS_SSMALL28(small)) {
+                      NOT_SSMALL28_HASH(small);
+		  } else {
+		      SINT32_HASH(small, HCONST);
+                  }
 
-		  if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
-		      term = small_to_big(x, tmp_big);
-		      break;
-		  }
-		  SINT32_HASH(x, HCONST);
 		  goto hash2_common;
 	      }
 	    }
@@ -1529,7 +1936,10 @@ make_hash2(Eterm term)
 
 	    if (ESTACK_ISEMPTY(s)) {
 		DESTROY_ESTACK(s);
-		UnUseTmpHeapNoproc(2);
+                if (can_trap) {
+                    BUMP_REDS(p, (max_iterations - iterations_until_trap) / ITERATIONS_PER_RED);
+                    ASSERT(!(p->flags & F_DISABLE_GC));
+                }
 		return hash;
 	    }
 
@@ -1540,18 +1950,37 @@ make_hash2(Eterm term)
 		    hash = (Uint32) ESTACK_POP(s);
                     UINT32_HASH(hash_xor_pairs, HCONST_19);
 		    hash_xor_pairs = (Uint32) ESTACK_POP(s);
+                    TRAP_LOCATION_NO_CTX(hash2_common_1);
 		    goto hash2_common;
 		}
 		case HASH_MAP_PAIR:
 		    hash_xor_pairs ^= hash;
                     hash = 0;
+                    TRAP_LOCATION_NO_CTX(hash2_common_2);
 		    goto hash2_common;
 		default:
 		    break;
 	    }
+
 	}
+        TRAP_LOCATION_NO_CTX(hash2_common_3);
     }
     }
+#undef TRAP_LOCATION_NO_RED
+#undef TRAP_LOCATION
+#undef TRAP_LOCATION_NO_CTX
+}
+
+Uint32
+make_hash2(Eterm term)
+{
+    return make_hash2_helper(term, 0, NULL, NULL);
+}
+
+Uint32
+trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
+{
+    return make_hash2_helper(term, 1, state_mref_write_back, p);
 }
 
 /* Term hash function for internal use.
diff --git a/erts/emulator/nifs/common/prim_file_nif.c b/erts/emulator/nifs/common/prim_file_nif.c
index 3df04e42e2..9e9a14844e 100644
--- a/erts/emulator/nifs/common/prim_file_nif.c
+++ b/erts/emulator/nifs/common/prim_file_nif.c
@@ -231,6 +231,7 @@ static int load(ErlNifEnv *env, void** priv_data, ERL_NIF_TERM prim_file_pid)
     am_append = enif_make_atom(env, "append");
     am_sync = enif_make_atom(env, "sync");
     am_skip_type_check = enif_make_atom(env, "skip_type_check");
+    am_directory = enif_make_atom(env, "directory");
 
     am_read_write = enif_make_atom(env, "read_write");
     am_none = enif_make_atom(env, "none");
@@ -447,6 +448,8 @@ static enum efile_modes_t efile_translate_modelist(ErlNifEnv *env, ERL_NIF_TERM
             modes |= EFILE_MODE_SYNC;
         } else if(enif_is_identical(head, am_skip_type_check)) {
             modes |= EFILE_MODE_SKIP_TYPE_CHECK;
+        } else if (enif_is_identical(head, am_directory)) {
+            modes |= EFILE_MODE_DIRECTORY;
         } else {
             /* Modes like 'raw', 'ram', 'delayed_writes' etc are handled
              * further up the chain. */
diff --git a/erts/emulator/nifs/common/prim_file_nif.h b/erts/emulator/nifs/common/prim_file_nif.h
index b2e30c59dd..020714a03b 100644
--- a/erts/emulator/nifs/common/prim_file_nif.h
+++ b/erts/emulator/nifs/common/prim_file_nif.h
@@ -30,6 +30,8 @@ enum efile_modes_t {
     EFILE_MODE_SKIP_TYPE_CHECK = (1 << 5), /* Special for device files on Unix. */
     EFILE_MODE_NO_TRUNCATE = (1 << 6), /* Special for reopening on VxWorks. */
 
+    EFILE_MODE_DIRECTORY = (1 << 7),
+
     EFILE_MODE_READ_WRITE = EFILE_MODE_READ | EFILE_MODE_WRITE
 };
 
diff --git a/erts/emulator/nifs/unix/unix_prim_file.c b/erts/emulator/nifs/unix/unix_prim_file.c
index 169b193993..20021b9358 100644
--- a/erts/emulator/nifs/unix/unix_prim_file.c
+++ b/erts/emulator/nifs/unix/unix_prim_file.c
@@ -107,7 +107,7 @@ ERL_NIF_TERM efile_get_handle(ErlNifEnv *env, efile_data_t *d) {
     return result;
 }
 
-static int open_file_type_check(const efile_path_t *path, int fd) {
+static int open_file_is_dir(const efile_path_t *path, int fd) {
     struct stat file_info;
     int error;
 
@@ -119,27 +119,14 @@ static int open_file_type_check(const efile_path_t *path, int fd) {
     (void)path;
 #endif
 
-    if(error < 0) {
-        /* If we failed to stat assume success and let the next call handle the
-         * error. The old driver checked whether the file was to be used
-         * immediately in a read within the call, but the new implementation
-         * never does that. */
-         return 1;
-    }
-
-    /* Allow everything that isn't a directory, and error out on the next call
-     * if it's unsupported. */
-    if(S_ISDIR(file_info.st_mode)) {
-        return 0;
-    }
-
-    return 1;
+    /* Assume not a directory on error. */
+    return error == 0 && S_ISDIR(file_info.st_mode);
 }
 
 posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
         ErlNifResourceType *nif_type, efile_data_t **d) {
 
-    int flags, fd;
+    int mode, flags, fd;
 
     flags = 0;
 
@@ -174,18 +161,38 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
 #endif
     }
 
+    if(modes & EFILE_MODE_DIRECTORY) {
+        mode = DIR_MODE;
+#ifdef O_DIRECTORY
+        flags |= O_DIRECTORY;
+#endif
+    } else {
+        mode = FILE_MODE;
+    }
+
     do {
-        fd = open((const char*)path->data, flags, FILE_MODE);
+        fd = open((const char*)path->data, flags, mode);
     } while(fd == -1 && errno == EINTR);
 
     if(fd != -1) {
         efile_unix_t *u;
 
-        if(!(modes & EFILE_MODE_SKIP_TYPE_CHECK) && !open_file_type_check(path, fd)) {
+#ifndef O_DIRECTORY
+        /* On platforms without O_DIRECTORY support, ensure that using the
+         * directory flag to open a file fails. */
+        if(!(modes & EFILE_MODE_SKIP_TYPE_CHECK) &&
+           (modes & EFILE_MODE_DIRECTORY) && !open_file_is_dir(path, fd)) {
             close(fd);
+            return ENOTDIR;
+        }
+#endif
 
-            /* This is blatantly incorrect, but we're documented as returning
-             * this for everything that isn't a file. */
+        /* open() works on directories without the O_DIRECTORY flag but for
+         * consistency across platforms we require that the user has requested
+         * directory mode. */
+        if(!(modes & EFILE_MODE_SKIP_TYPE_CHECK) &&
+           !(modes & EFILE_MODE_DIRECTORY) && open_file_is_dir(path, fd)) {
+            close(fd);
             return EISDIR;
         }
 
diff --git a/erts/emulator/nifs/win32/win_prim_file.c b/erts/emulator/nifs/win32/win_prim_file.c
index e7d3924240..13306104c0 100644
--- a/erts/emulator/nifs/win32/win_prim_file.c
+++ b/erts/emulator/nifs/win32/win_prim_file.c
@@ -270,6 +270,17 @@ static int normalize_path_result(ErlNifBinary *path) {
 }
 
 /* @brief Checks whether all the given attributes are set on the object at the
+ * given handle. Note that it assumes false on errors. */
+static int handle_has_file_attributes(HANDLE handle, DWORD mask) {
+    BY_HANDLE_FILE_INFORMATION native_file_info;
+    if(!GetFileInformationByHandle(handle, &native_file_info)) {
+        return 0;
+    }
+
+    return !!((native_file_info.dwFileAttributes & mask) == mask);
+}
+
+/* @brief Checks whether all the given attributes are set on the object at the
  * given path. Note that it assumes false on errors. */
 static int has_file_attributes(const efile_path_t *path, DWORD mask) {
     DWORD attributes = GetFileAttributesW((WCHAR*)path->data);
@@ -412,10 +423,15 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
 
     ASSERT_PATH_FORMAT(path);
 
+    attributes = 0;
     access_flags = 0;
     open_mode = 0;
 
-    if(modes & EFILE_MODE_READ && !(modes & EFILE_MODE_WRITE)) {
+    if(modes & EFILE_MODE_DIRECTORY) {
+        attributes = FILE_FLAG_BACKUP_SEMANTICS;
+        access_flags = GENERIC_READ;
+        open_mode = OPEN_EXISTING;
+    } else if(modes & EFILE_MODE_READ && !(modes & EFILE_MODE_WRITE)) {
         access_flags = GENERIC_READ;
         open_mode = OPEN_EXISTING;
     } else if(modes & EFILE_MODE_WRITE && !(modes & EFILE_MODE_READ)) {
@@ -438,9 +454,9 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
     }
 
     if(modes & EFILE_MODE_SYNC) {
-        attributes = FILE_FLAG_WRITE_THROUGH;
+        attributes |= FILE_FLAG_WRITE_THROUGH;
     } else {
-        attributes = FILE_ATTRIBUTE_NORMAL;
+        attributes |= FILE_ATTRIBUTE_NORMAL;
     }
 
     handle = CreateFileW((WCHAR*)path->data, access_flags,
@@ -449,6 +465,12 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
     if(handle != INVALID_HANDLE_VALUE) {
         efile_win_t *w;
 
+        /* Directory mode specified, but path is not a directory. */
+        if((modes & EFILE_MODE_DIRECTORY) && !handle_has_file_attributes(handle, FILE_ATTRIBUTE_DIRECTORY)) {
+            CloseHandle(handle);
+            return ENOTDIR;
+        }
+
         w = (efile_win_t*)enif_alloc_resource(nif_type, sizeof(efile_win_t));
         w->handle = handle;
 
@@ -461,7 +483,7 @@ posix_errno_t efile_open(const efile_path_t *path, enum efile_modes_t modes,
 
         /* Rewrite all failures on directories to EISDIR to match the old
          * driver. */
-        if(has_file_attributes(path, FILE_ATTRIBUTE_DIRECTORY)) {
+        if(!(modes & EFILE_MODE_DIRECTORY) && has_file_attributes(path, FILE_ATTRIBUTE_DIRECTORY)) {
             return EISDIR;
         }
 
diff --git a/erts/emulator/test/Makefile b/erts/emulator/test/Makefile
index 019af2162f..731aa66924 100644
--- a/erts/emulator/test/Makefile
+++ b/erts/emulator/test/Makefile
@@ -90,6 +90,7 @@ MODULES= \
 	gc_SUITE \
 	guard_SUITE \
 	hash_SUITE \
+	hash_property_test_SUITE \
 	hibernate_SUITE \
 	hipe_SUITE \
 	iovec_SUITE \
@@ -252,7 +253,7 @@ release_tests_spec: make_emakefile
 	$(INSTALL_DATA) $(NO_OPT_ERL_FILES) "$(RELSYSDIR)"
 	$(INSTALL_DATA) $(NATIVE_ERL_FILES) "$(RELSYSDIR)"
 	chmod -R u+w "$(RELSYSDIR)"
-	tar cf - *_SUITE_data | (cd "$(RELSYSDIR)"; tar xf -)
+	tar cf - *_SUITE_data property_test | (cd "$(RELSYSDIR)"; tar xf -)
 
 release_docs_spec:
 
diff --git a/erts/emulator/test/emulator.spec b/erts/emulator/test/emulator.spec
index 7a6dd83020..087bd8880d 100644
--- a/erts/emulator/test/emulator.spec
+++ b/erts/emulator/test/emulator.spec
@@ -1,2 +1,3 @@
 {enable_builtin_hooks, false}.
 {suites,"../emulator_test",all}.
+{skip_groups,"../emulator_test",hash_SUITE,[phash2_benchmark],"Benchmark only"}.
diff --git a/erts/emulator/test/emulator_bench.spec b/erts/emulator/test/emulator_bench.spec
index 03638bfa23..8b1bb71a40 100644
--- a/erts/emulator/test/emulator_bench.spec
+++ b/erts/emulator/test/emulator_bench.spec
@@ -1,3 +1,4 @@
 {groups,"../emulator_test",estone_SUITE,[estone_bench]}.
 {groups,"../emulator_test",binary_SUITE,[iolist_size_benchmarks]}.
 {groups,"../emulator_test",erts_debug_SUITE,[interpreter_size_bench]}.
+{groups,"../emulator_test",hash_SUITE,[phash2_benchmark]}.
diff --git a/erts/emulator/test/hash_SUITE.erl b/erts/emulator/test/hash_SUITE.erl
index 3cbb3c7d5f..1bf9e033bf 100644
--- a/erts/emulator/test/hash_SUITE.erl
+++ b/erts/emulator/test/hash_SUITE.erl
@@ -33,7 +33,25 @@
 -module(hash_SUITE).
 -export([basic_test/0,cmp_test/1,range_test/0,spread_test/1,
 	 phash2_test/0, otp_5292_test/0,
-         otp_7127_test/0]).
+         otp_7127_test/0, 
+         run_phash2_benchmarks/0,
+         test_phash2_binary_aligned_and_unaligned_equal/1,
+         test_phash2_4GB_plus_bin/1,
+         test_phash2_10MB_plus_bin/1,
+         test_phash2_large_map/1,
+         test_phash2_shallow_long_list/1,
+         test_phash2_deep_list/1,
+         test_phash2_deep_tuple/1,
+         test_phash2_deep_tiny/1,
+         test_phash2_with_42/1,
+         test_phash2_with_short_tuple/1,
+         test_phash2_with_short_list/1,
+         test_phash2_with_tiny_bin/1,
+         test_phash2_with_tiny_unaligned_sub_binary/1,
+         test_phash2_with_small_unaligned_sub_binary/1,
+         test_phash2_with_large_bin/1,
+         test_phash2_with_large_unaligned_sub_binary/1,
+         test_phash2_with_super_large_unaligned_sub_binary/1]).
 
 %%
 %% Define to run outside of test server
@@ -43,13 +61,15 @@
 %%
 %% Define for debug output
 %%
-%-define(debug,1).
+-define(debug,1).
 
 -ifdef(STANDALONE).
 -define(config(A,B),config(A,B)).
+-record(event, {name, data}).
 -export([config/2]).
 -else.
 -include_lib("common_test/include/ct.hrl").
+-include_lib("common_test/include/ct_event.hrl").
 -endif.
 
 -ifdef(debug).
@@ -67,12 +87,15 @@
 -ifdef(STANDALONE).
 config(priv_dir,_) ->
     ".".
+notify(X) -> 
+    erlang:display(X).
 -else.
 %% When run in test server.
--export([all/0, suite/0,
+-export([groups/0, all/0, suite/0,
 	 test_basic/1,test_cmp/1,test_range/1,test_spread/1,
 	 test_phash2/1,otp_5292/1,bit_level_binaries/1,otp_7127/1,
-         test_hash_zero/1]).
+         test_hash_zero/1, init_per_suite/1, end_per_suite/1,
+         init_per_group/2, end_per_group/2]).
 
 suite() ->
     [{ct_hooks,[ts_install_cth]},
@@ -81,7 +104,71 @@ suite() ->
 all() -> 
     [test_basic, test_cmp, test_range, test_spread,
      test_phash2, otp_5292, bit_level_binaries, otp_7127,
-     test_hash_zero].
+     test_hash_zero, test_phash2_binary_aligned_and_unaligned_equal,
+     test_phash2_4GB_plus_bin,
+     test_phash2_10MB_plus_bin,
+     {group, phash2_benchmark_tests},
+     {group, phash2_benchmark}].
+
+get_phash2_benchmarks() ->
+    [
+     test_phash2_large_map,
+     test_phash2_shallow_long_list,
+     test_phash2_deep_list,
+     test_phash2_deep_tuple,
+     test_phash2_deep_tiny,
+     test_phash2_with_42,
+     test_phash2_with_short_tuple,
+     test_phash2_with_short_list,
+     test_phash2_with_tiny_bin,
+     test_phash2_with_tiny_unaligned_sub_binary,
+     test_phash2_with_small_unaligned_sub_binary,
+     test_phash2_with_large_bin,
+     test_phash2_with_large_unaligned_sub_binary,
+     test_phash2_with_super_large_unaligned_sub_binary
+    ].
+
+groups() -> 
+    [
+     {
+      phash2_benchmark_tests,
+      [],
+      get_phash2_benchmarks()
+     },
+     {
+      phash2_benchmark,
+      [],
+      get_phash2_benchmarks()
+     }
+    ].
+
+
+init_per_suite(Config) ->
+    io:format("START APPS~n"),
+    A0 = case application:start(sasl) of
+	     ok -> [sasl];
+	     _ -> []
+	 end,
+    A = case application:start(os_mon) of
+	     ok -> [os_mon|A0];
+	     _ -> A0
+	 end,
+    io:format("APPS STARTED~n"),
+    [{started_apps, A}|Config].
+
+end_per_suite(Config) ->
+    As = proplists:get_value(started_apps, Config),
+    lists:foreach(fun (A) -> application:stop(A) end, As),
+    Config.
+
+init_per_group(phash2_benchmark_tests, Config) ->
+    [phash2_benchmark_tests |Config];
+init_per_group(_, Config) ->
+    Config.
+
+end_per_group(_, Config) ->
+    Config.
+
 
 %% Tests basic functionality of erlang:phash and that the
 %% hashes has not changed (neither hash nor phash)
@@ -119,6 +206,9 @@ otp_7127(Config) when is_list(Config) ->
 
 test_hash_zero(Config) when is_list(Config) ->
     hash_zero_test().
+
+notify(X) ->
+    ct_event:notify(X).
 -endif.
 
 
@@ -354,6 +444,7 @@ phash2_test() ->
 
 	 %% bit-level binaries
 	 {<<0:7>>, 1055790816},
+	 {(fun()-> B = <<255,7:3>>, <<_:4,D/bitstring>> = B, D end)(), 911751529},
 	 {<<"abc",13:4>>, 670412287},
 	 {<<5:3,"12345678901234567890">>, 289973273},
 
@@ -424,6 +515,159 @@ phash2_test() ->
     [] = [{E,H,H2} || {E,H} <- L, (H2 = erlang:phash2(E, Max)) =/= H],
     ok.
 
+test_phash2_binary_aligned_and_unaligned_equal(Config) when is_list(Config) ->
+    erts_debug:set_internal_state(available_internal_state, true),
+    test_aligned_and_unaligned_equal_up_to(256*12+255),
+    erts_debug:set_internal_state(available_internal_state, false).
+
+test_aligned_and_unaligned_equal_up_to(BinSize) ->
+    Results =
+        lists:map(fun(Size) ->
+                          test_aligned_and_unaligned_equal(Size)
+                  end, lists:seq(1, BinSize)),
+    %% DataDir = filename:join(filename:dirname(code:which(?MODULE)), "hash_SUITE_data"),
+    %% ExpResFile = filename:join(DataDir, "phash2_bin_expected_results.txt"),
+    %% {ok, [ExpRes]} = file:consult(ExpResFile),
+    %% %% ok = file:write_file(ExpResFile, io_lib:format("~w.~n", [Results])),
+    %% Results = ExpRes,
+    110469206 = erlang:phash2(Results).
+
+test_aligned_and_unaligned_equal(BinSize) ->
+    Bin = make_random_bin(BinSize),
+    LastByte = last_byte(Bin),
+    LastInBitstring = LastByte rem 11,
+    Bitstring = << Bin/binary, <<LastInBitstring:5>>/bitstring >>,
+    UnalignedBin = make_unaligned_sub_bitstring(Bin),
+    UnalignedBitstring = make_unaligned_sub_bitstring(Bitstring),
+    case erts_debug:get_internal_state(available_internal_state) of
+        false -> erts_debug:set_internal_state(available_internal_state, true);
+        _ -> ok
+    end,
+    erts_debug:set_internal_state(reds_left, 3),
+    BinHash = erlang:phash2(Bin),
+    BinHash = erlang:phash2(Bin),
+    erts_debug:set_internal_state(reds_left, 3),
+    UnalignedBinHash = erlang:phash2(UnalignedBin),
+    UnalignedBinHash = erlang:phash2(UnalignedBin),
+    BinHash = UnalignedBinHash,
+    erts_debug:set_internal_state(reds_left, 3),
+    BitstringHash = erlang:phash2(Bitstring),
+    BitstringHash = erlang:phash2(Bitstring),
+    erts_debug:set_internal_state(reds_left, 3),
+    UnalignedBitstringHash = erlang:phash2(UnalignedBitstring),
+    UnalignedBitstringHash = erlang:phash2(UnalignedBitstring),
+    BitstringHash = UnalignedBitstringHash,
+    {BinHash, BitstringHash}.
+
+last_byte(Bin) ->
+    NotLastByteSize = (erlang:bit_size(Bin)) - 8,
+    <<_:NotLastByteSize/bitstring, LastByte:8>> = Bin,
+    LastByte.
+
+test_phash2_4GB_plus_bin(Config) when is_list(Config) ->
+    run_when_enough_resources(
+      fun() ->
+              erts_debug:set_internal_state(available_internal_state, true),
+              %% Created Bin4GB here so it only needs to be created once
+              erts_debug:set_internal_state(force_gc, self()),
+              Bin4GB = get_4GB_bin(),
+              test_phash2_plus_bin_helper1(Bin4GB, <<>>, <<>>, 13708901),
+              erts_debug:set_internal_state(force_gc, self()),
+              test_phash2_plus_bin_helper1(Bin4GB, <<>>, <<3:5>>, 66617678),
+              erts_debug:set_internal_state(force_gc, self()),
+              test_phash2_plus_bin_helper1(Bin4GB, <<13>>, <<>>, 31308392),
+              erts_debug:set_internal_state(force_gc, self()),
+              erts_debug:set_internal_state(available_internal_state, false)
+      end).
+
+
+test_phash2_10MB_plus_bin(Config) when is_list(Config) ->
+    erts_debug:set_internal_state(available_internal_state, true),
+    erts_debug:set_internal_state(force_gc, self()),
+    Bin10MB = get_10MB_bin(),
+    test_phash2_plus_bin_helper1(Bin10MB, <<>>, <<>>, 22776267),
+    erts_debug:set_internal_state(force_gc, self()),
+    test_phash2_plus_bin_helper1(Bin10MB, <<>>, <<3:5>>, 124488972),
+    erts_debug:set_internal_state(force_gc, self()),
+    test_phash2_plus_bin_helper1(Bin10MB, <<13>>, <<>>, 72958346),
+    erts_debug:set_internal_state(force_gc, self()),
+    erts_debug:set_internal_state(available_internal_state, false).
+
+get_10MB_bin() ->
+    TmpBin = make_random_bin(10239),
+    Bin = erlang:iolist_to_binary([0, TmpBin]),
+    IOList10MB = duplicate_iolist(Bin, 10),
+    Bin10MB = erlang:iolist_to_binary(IOList10MB),
+    10485760 = size(Bin10MB),
+    Bin10MB.
+
+get_4GB_bin() ->
+    TmpBin = make_random_bin(65535),
+    Bin = erlang:iolist_to_binary([0, TmpBin]),
+    IOList4GB = duplicate_iolist(Bin, 16),
+    Bin4GB = erlang:iolist_to_binary(IOList4GB),
+    4294967296 = size(Bin4GB),
+    Bin4GB.
+
+duplicate_iolist(IOList, 0) ->
+    IOList;
+duplicate_iolist(IOList, NrOfTimes) ->
+    duplicate_iolist([IOList, IOList], NrOfTimes - 1).
+
+test_phash2_plus_bin_helper1(Bin4GB, ExtraBytes, ExtraBits, ExpectedHash) ->
+    test_phash2_plus_bin_helper2(Bin4GB, fun id/1, ExtraBytes, ExtraBits, ExpectedHash),
+    test_phash2_plus_bin_helper2(Bin4GB, fun make_unaligned_sub_bitstring/1, ExtraBytes, ExtraBits, ExpectedHash).
+
+test_phash2_plus_bin_helper2(Bin, TransformerFun, ExtraBytes, ExtraBits, ExpectedHash) ->
+    ExtraBitstring = << ExtraBytes/binary, ExtraBits/bitstring >>,
+    LargerBitstring = << ExtraBytes/binary,
+                         ExtraBits/bitstring,
+                         Bin/bitstring >>,
+    LargerTransformedBitstring = TransformerFun(LargerBitstring),
+    ExtraBitstringHash = erlang:phash2(ExtraBitstring),
+    ExpectedHash =
+        case size(LargerTransformedBitstring) < 4294967296 of
+            true ->
+                erts_debug:set_internal_state(force_gc, self()),
+                erts_debug:set_internal_state(reds_left, 1),
+                Hash = erlang:phash2(LargerTransformedBitstring),
+                Hash = erlang:phash2(LargerTransformedBitstring),
+                Hash;
+            false ->
+                erts_debug:set_internal_state(force_gc, self()),
+                erts_debug:set_internal_state(reds_left, 1),
+                ExtraBitstringHash = erlang:phash2(LargerTransformedBitstring),
+                ExtraBitstringHash = erlang:phash2(LargerTransformedBitstring),
+                ExtraBitstringHash
+        end.
+
+run_when_enough_resources(Fun) ->
+    case {total_memory(), erlang:system_info(wordsize)} of
+        {Mem, 8} when is_integer(Mem) andalso Mem >= 31 ->
+            Fun();
+        {Mem, WordSize} ->
+            {skipped,
+             io_lib:format("Not enough resources (System Memory >= ~p, Word Size = ~p)",
+                           [Mem, WordSize])}
+    end.
+
+%% Total memory in GB
+total_memory() ->
+    try
+        MemoryData = memsup:get_system_memory_data(),
+        case lists:keysearch(total_memory, 1, MemoryData) of
+            {value, {total_memory, TM}} ->
+        	TM div (1024*1024*1024);
+            false ->
+        	{value, {system_total_memory, STM}} =
+        	    lists:keysearch(system_total_memory, 1, MemoryData),
+        	STM div (1024*1024*1024)
+        end
+    catch
+        _ : _ ->
+            undefined
+    end.
+
 -ifdef(FALSE).
 f1() ->
     abc.
@@ -436,14 +680,23 @@ f3(X, Y) ->
 -endif.
 
 otp_5292_test() ->
-    PH = fun(E) -> [erlang:phash(E, 1 bsl 32),
-                    erlang:phash(-E, 1 bsl 32),
-                    erlang:phash2(E, 1 bsl 32),
-                    erlang:phash2(-E, 1 bsl 32)]
-            end,
+    PH = fun(E) ->
+                 EInList = [1, 2, 3, E],
+                 EInList2 = [E, 1, 2, 3],
+                 NegEInList = [1, 2, 3, -E],
+                 NegEInList2 = [-E, 1, 2, 3],
+                 [erlang:phash(E, 1 bsl 32),
+                  erlang:phash(-E, 1 bsl 32),
+                  erlang:phash2(E, 1 bsl 32),
+                  erlang:phash2(-E, 1 bsl 32),
+                  erlang:phash2(EInList, 1 bsl 32),
+                  erlang:phash2(EInList2, 1 bsl 32),
+                  erlang:phash2(NegEInList, 1 bsl 32),
+                  erlang:phash2(NegEInList2, 1 bsl 32)]
+         end,
     S2 = md5([md5(hash_int(S, E, PH)) || {Start, N, Sz} <- d(),
                                          {S, E} <- int(Start, N, Sz)]),
-    <<124,81,198,121,174,233,19,137,10,83,33,80,226,111,238,99>> = S2,
+    <<234,63,192,76,253,57,250,32,44,11,73,1,161,102,14,238>> = S2,
     ok.
 
 d() ->
@@ -684,3 +937,313 @@ unaligned_sub_bitstr(Bin0) when is_bitstring(Bin0) ->
 
 id(I) -> I.
     
+
+%% Benchmarks for phash2
+
+run_phash2_benchmarks() ->
+    Benchmarks = [
+                  test_phash2_large_map,
+                  test_phash2_shallow_long_list,
+                  test_phash2_deep_list,
+                  test_phash2_deep_tuple,
+                  test_phash2_deep_tiny,
+                  test_phash2_with_42,
+                  test_phash2_with_short_tuple,
+                  test_phash2_with_short_list,
+                  test_phash2_with_tiny_bin,
+                  test_phash2_with_tiny_unaligned_sub_binary,
+                  test_phash2_with_small_unaligned_sub_binary,
+                  test_phash2_with_large_bin,
+                  test_phash2_with_large_unaligned_sub_binary,
+                  test_phash2_with_super_large_unaligned_sub_binary
+                 ],
+    [print_comment(B) || B <- Benchmarks].
+
+
+print_comment(FunctionName) ->
+    io:format("~p~n", [FunctionName]),
+    io:format("~s~n", [element(2, erlang:apply(?MODULE, FunctionName, [[]]))]).
+
+nr_of_iters(BenchmarkNumberOfIterations, Config) ->
+    case lists:member(phash2_benchmark_tests, Config) of
+        true -> 1;
+        false -> BenchmarkNumberOfIterations
+    end.
+
+
+test_phash2_large_map(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {1000000, 121857429};
+            _ ->
+                {1000, 66609305}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(45, Config),
+                                  get_map(Size),
+                                  ExpectedHash).
+
+test_phash2_shallow_long_list(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {1000000, 78700388};
+            _ ->
+                {1000, 54749638}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(1, Config),
+                                  lists:duplicate(Size, get_complex_tuple()),
+                                  ExpectedHash).
+
+test_phash2_deep_list(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {500000, 17986444};
+            _ ->
+                {1000, 81794308}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(1, Config),
+                                  make_deep_list(Size, get_complex_tuple()),
+                                  ExpectedHash).
+
+test_phash2_deep_tuple(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {500000, 116594715};
+            _ ->
+                {500, 109057352}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(1, Config),
+                                  make_deep_tuple(Size, get_complex_tuple()),
+                                  ExpectedHash).
+
+test_phash2_deep_tiny(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(1000000, Config),
+                                  make_deep_list(19, 42),
+                                  111589624).
+
+test_phash2_with_42(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(20000000, Config),
+                                  42,
+                                  30328728).
+
+test_phash2_with_short_tuple(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(10000000, Config),
+                                  {a,b,<<"hej">>, "hej"},
+                                  50727199).
+
+test_phash2_with_short_list(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(10000000, Config),
+                                  [a,b,"hej", "hello"],
+                                  117108642).
+
+test_phash2_with_tiny_bin(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(20000000, Config),
+                                  make_random_bin(10),
+                                  129616602).
+
+test_phash2_with_tiny_unaligned_sub_binary(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(10000000, Config),
+                                  make_unaligned_sub_binary(make_random_bin(11)),
+                                  59364725).
+
+test_phash2_with_small_unaligned_sub_binary(Config) when is_list(Config) ->
+    run_phash2_test_and_benchmark(nr_of_iters(400000, Config),
+                                  make_unaligned_sub_binary(make_random_bin(1001)),
+                                  130388119).
+
+test_phash2_with_large_bin(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {10000000, 48249379};
+            _ ->
+                {1042, 14679520}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(150, Config),
+                                  make_random_bin(Size),
+                                  ExpectedHash).
+
+test_phash2_with_large_unaligned_sub_binary(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {10000001, 122836437};
+            _ ->
+                {10042, 127144287}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(50, Config),
+                                  make_unaligned_sub_binary(make_random_bin(Size)),
+                                  ExpectedHash).
+
+test_phash2_with_super_large_unaligned_sub_binary(Config) when is_list(Config) ->
+    {Size, ExpectedHash} =
+        case {total_memory(), erlang:system_info(wordsize)} of
+            {Mem, 8} when is_integer(Mem) andalso Mem > 2 ->
+                {20000001, 112086727};
+            _ ->
+                {20042, 91996619}
+        end,
+    run_phash2_test_and_benchmark(nr_of_iters(20, Config),
+                                  make_unaligned_sub_binary(make_random_bin(Size)),
+                                  ExpectedHash).
+
+make_deep_list(1, Item) ->
+    {Item, Item};
+make_deep_list(Depth, Item) ->
+    [{Item, Item}, make_deep_list(Depth - 1, Item)].
+
+make_deep_tuple(1, Item) ->
+    [Item, Item];
+make_deep_tuple(Depth, Item) ->
+    {[Item, Item], make_deep_tuple(Depth - 1, Item)}.
+
+% Helper functions for benchmarking
+
+loop(0, _) -> ok;
+loop(Iterations, Fun) ->
+    Fun(),
+    loop(Iterations - 1, Fun).
+
+run_phash2_test_and_benchmark(Iterations, Term, ExpectedHash) ->
+    Parent = self(),
+    Test =
+        fun() ->
+                Hash = erlang:phash2(Term),
+                case ExpectedHash =:= Hash of
+                    false ->
+                        Parent ! {got_bad_hash, Hash},
+                        ExpectedHash = Hash;
+                    _ -> ok
+                end
+        end,
+    Benchmark =
+        fun() ->
+                garbage_collect(),
+                {Time, _} =timer:tc(fun() -> loop(Iterations, Test) end),
+                Parent ! Time
+        end,
+    spawn(Benchmark),
+    receive
+        {got_bad_hash, Hash} ->
+            ExpectedHash = Hash;
+        Time ->
+            TimeInS = case (Time/1000000) of
+                          0.0 -> 0.0000000001;
+                          T -> T
+                      end,
+            IterationsPerSecond = Iterations / TimeInS,
+            notify(#event{ name = benchmark_data, data = [{value, IterationsPerSecond}]}),
+            {comment, io_lib:format("Iterations per second: ~p, Iterations ~p, Benchmark time: ~p seconds)",
+                                    [IterationsPerSecond, Iterations, Time/1000000])}
+    end.
+
+get_complex_tuple() ->
+    BPort = <<131,102,100,0,13,110,111,110,111,100,101,64,110,111,104,
+              111,115,116,0,0,0,1,0>>,
+    Port = binary_to_term(BPort),
+
+    BXPort = <<131,102,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+               0,0,0,24,3>>,
+    XPort = binary_to_term(BXPort),
+
+    BRef = <<131,114,0,3,100,0,13,110,111,110,111,100,101,64,110,111,104,
+             111,115,116,0,0,0,1,255,0,0,0,0,0,0,0,0>>,
+    Ref = binary_to_term(BRef),
+
+    BXRef = <<131,114,0,3,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+              2,0,0,0,155,0,0,0,0,0,0,0,0>>,
+    XRef = binary_to_term(BXRef),
+
+    BXPid = <<131,103,100,0,11,97,112,97,64,108,101,103,111,108,97,115,
+              0,0,0,36,0,0,0,0,1>>,
+    XPid = binary_to_term(BXPid),
+
+
+    %% X = f1(), Y = f2(), Z = f3(X, Y),
+
+    %% F1 = fun f1/0, % -> abc
+    B1 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+           13,196,76,242,0,0,0,1,0,0,0,0,100,0,1,116,97,1,98,2,195,126,
+           58,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+           115,116,0,0,0,112,0,0,0,0,0>>,
+    F1 = binary_to_term(B1),
+
+    %% F2 = fun f2/0, % -> abd
+    B2 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+           13,196,76,242,0,0,0,2,0,0,0,0,100,0,1,116,97,2,98,3,130,152,
+           185,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+           115,116,0,0,0,112,0,0,0,0,0>>,
+    F2 = binary_to_term(B2),
+
+    %% F3 = fun f3/2, % -> {abc, abd}
+    B3 = <<131,112,0,0,0,66,2,215,206,77,69,249,50,170,17,129,47,21,98,
+           13,196,76,242,0,0,0,3,0,0,0,0,100,0,1,116,97,3,98,7,168,160,
+           93,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+           115,116,0,0,0,112,0,0,0,0,0>>,
+    F3 = binary_to_term(B3),
+
+    %% F4 = fun () -> 123456789012345678901234567 end,
+    B4 = <<131,112,0,0,0,66,0,215,206,77,69,249,50,170,17,129,47,21,98,
+           13,196,76,242,0,0,0,4,0,0,0,0,100,0,1,116,97,4,98,2,230,21,
+           171,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+           115,116,0,0,0,112,0,0,0,0,0>>,
+    F4 = binary_to_term(B4),
+
+    %% F5 = fun() -> {X,Y,Z} end,
+    B5 = <<131,112,0,0,0,92,0,215,206,77,69,249,50,170,17,129,47,21,98,
+           13,196,76,242,0,0,0,5,0,0,0,3,100,0,1,116,97,5,98,0,99,101,
+           130,103,100,0,13,110,111,110,111,100,101,64,110,111,104,111,
+           115,116,0,0,0,112,0,0,0,0,0,100,0,3,97,98,99,100,0,3,97,98,
+           100,104,2,100,0,3,97,98,99,100,0,3,97,98,100>>,
+    F5 = binary_to_term(B5),
+    {{1,{2}},an_atom, 1, 3434.923942394,<<"this is a binary">>,
+     make_unaligned_sub_binary(<<"this is also a binary">>),c,d,e,f,g,h,i,j,k,l,[f],
+     999999999999999999666666662123123123123324234999999999999999, 234234234,
+     BPort, Port, BXPort, XPort, BRef, Ref, BXRef, XRef, BXPid, XPid, F1, F2, F3, F4, F5,
+     #{a => 1, b => 2, c => 3, d => 4, e => 5, f => 6, g => 7, h => 8, i => 9,
+       j => 1, k => 1, l => 123123123123213, m => [1,2,3,4,5,6,7,8], o => 5, p => 6,
+       q => 7, r => 8, s => 9}}.
+
+get_map_helper(MapSoFar, 0) ->
+    MapSoFar;
+get_map_helper(MapSoFar, NumOfItemsToAdd) ->
+    NewMapSoFar = maps:put(NumOfItemsToAdd, NumOfItemsToAdd, MapSoFar),
+    get_map_helper(NewMapSoFar, NumOfItemsToAdd -1).
+
+get_map(Size) ->
+    get_map_helper(#{}, Size).
+
+
+%% Copied from binary_SUITE
+make_unaligned_sub_binary(Bin0) when is_binary(Bin0) ->
+    Bin1 = <<0:3,Bin0/binary,31:5>>,
+    Sz = size(Bin0),
+    <<0:3,Bin:Sz/binary,31:5>> = id(Bin1),
+    Bin.
+
+make_unaligned_sub_bitstring(Bin0) ->
+    Bin1 = <<0:3,Bin0/bitstring,31:5>>,
+    Sz = erlang:bit_size(Bin0),
+    <<0:3,Bin:Sz/bitstring,31:5>> = id(Bin1),
+    Bin.
+
+make_random_bin(Size) ->
+    make_random_bin(Size, []).
+
+make_random_bin(0, Acc) ->
+    iolist_to_binary(Acc);
+make_random_bin(Size, []) ->
+    make_random_bin(Size - 1, [simple_rand() rem 256]);
+make_random_bin(Size, [N | Tail]) ->
+    make_random_bin(Size - 1, [simple_rand(N) rem 256, N |Tail]).
+
+simple_rand() ->
+    123456789.
+simple_rand(Seed) ->
+    A = 1103515245,
+    C = 12345,
+    M = (1 bsl 31),
+    (A * Seed + C) rem M.
diff --git a/erts/emulator/test/hash_property_test_SUITE.erl b/erts/emulator/test/hash_property_test_SUITE.erl
new file mode 100644
index 0000000000..b4c7810a52
--- /dev/null
+++ b/erts/emulator/test/hash_property_test_SUITE.erl
@@ -0,0 +1,103 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2004-2019. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%                                                             %%%
+%%%                       WARNING                               %%%
+%%%                                                             %%%
+%%% This is experimental code which may be changed or removed   %%%
+%%%               anytime without any warning.                  %%%
+%%%                                                             %%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+-module(hash_property_test_SUITE).
+
+-export([suite/0,all/0,groups/0,init_per_suite/1,
+         end_per_suite/1,init_per_group/2,end_per_group/2]).
+
+-export([test_phash2_no_diff/1,
+         test_phash2_no_diff_long/1,
+         test_phash2_no_diff_between_versions/1]).
+
+-include_lib("common_test/include/ct.hrl").
+
+suite() ->
+    [{ct_hooks,[ts_install_cth]}].
+
+all() -> [{group, proper}].
+
+groups() ->
+    [{proper, [], [test_phash2_no_diff,
+                   test_phash2_no_diff_long,
+                   test_phash2_no_diff_between_versions]}].
+
+
+%%% First prepare Config and compile the property tests for the found tool:
+init_per_suite(Config) ->
+    ct_property_test:init_per_suite(Config).
+
+end_per_suite(Config) ->
+    Config.
+
+%%% Only proper is supported
+init_per_group(proper, Config) ->
+    case proplists:get_value(property_test_tool,Config) of
+	proper -> Config;
+	X -> {skip, lists:concat([X," is not supported"])}
+    end;
+init_per_group(_, Config) ->
+    Config.
+
+end_per_group(_, Config) ->
+    Config.
+
+test_phash2_no_diff(Config) when is_list(Config) ->
+    true = ct_property_test:quickcheck(
+             phash2_properties:prop_phash2_same_with_same_input(),
+             Config).
+
+test_phash2_no_diff_long(Config) when is_list(Config) ->
+    true = ct_property_test:quickcheck(
+             phash2_properties:prop_phash2_same_with_same_long_input(),
+             Config).
+
+test_phash2_no_diff_between_versions(Config) when is_list(Config) ->
+    R = "21",
+    case test_server:is_release_available(R) of
+        true ->
+            Rel = {release,R},
+            case test_server:start_node(rel21,peer,[{erl,[Rel]}]) of
+                {error, Reason} -> {skip, io_lib:format("Could not start node: ~p~n", [Reason])};
+                {ok, Node} ->
+                    try
+                        true = ct_property_test:quickcheck(
+                                 phash2_properties:prop_phash2_same_in_different_versions(Node),
+                                 Config),
+                        true = ct_property_test:quickcheck(
+                                 phash2_properties:prop_phash2_same_in_different_versions_with_long_input(Node),
+                                 Config)
+                    after
+                        test_server:stop_node(Node)
+                    end
+            end;
+        false ->
+            {skip, io_lib:format("Release ~s not available~n", [R])}
+    end.
diff --git a/erts/emulator/test/property_test/phash2_properties.erl b/erts/emulator/test/property_test/phash2_properties.erl
new file mode 100644
index 0000000000..b1f3207c56
--- /dev/null
+++ b/erts/emulator/test/property_test/phash2_properties.erl
@@ -0,0 +1,63 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2019-2019. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+%%
+
+-module(phash2_properties).
+
+-ifdef(PROPER).
+
+-include_lib("proper/include/proper.hrl").
+-export([prop_phash2_same_with_same_input/0,
+         prop_phash2_same_with_same_long_input/0,
+         prop_phash2_same_in_different_versions/1,
+         prop_phash2_same_in_different_versions_with_long_input/1]).
+-proptest([proper]).
+
+%%--------------------------------------------------------------------
+%% Properties --------------------------------------------------------
+%%--------------------------------------------------------------------
+
+prop_phash2_same_with_same_input() ->
+    ?FORALL(T, any(), erlang:phash2(T) =:= erlang:phash2(T)).
+
+prop_phash2_same_with_same_long_input() ->
+    ?FORALL(T, any(),
+            begin
+                BigTerm = lists:duplicate(10000, T),
+                erlang:phash2(BigTerm) =:= erlang:phash2(BigTerm)
+            end).
+
+prop_phash2_same_in_different_versions(DifferntVersionNode) ->
+    ?FORALL(T, any(),
+            erlang:phash2(T) =:= rpc:call(DifferntVersionNode,erlang,phash2,[T])).
+
+prop_phash2_same_in_different_versions_with_long_input(DifferntVersionNode) ->
+    ?FORALL(T, any(),
+            begin
+                BigTerm = lists:duplicate(10000, T),
+                RpcRes = rpc:call(DifferntVersionNode,erlang,phash2,[BigTerm]),
+                LocalRes = erlang:phash2(BigTerm),
+                RpcRes =:= LocalRes
+            end).
+
+%%--------------------------------------------------------------------
+%% Generators  -------------------------------------------------------
+%%--------------------------------------------------------------------
+
+-endif.
diff --git a/erts/lib_src/common/ethr_aux.c b/erts/lib_src/common/ethr_aux.c
index 7b156fe01a..931469b386 100644
--- a/erts/lib_src/common/ethr_aux.c
+++ b/erts/lib_src/common/ethr_aux.c
@@ -109,7 +109,8 @@ x86_init(void)
 
     if (eax > 0
 	&& (ETHR_IS_X86_VENDOR("GenuineIntel", ebx, ecx, edx)
-	    || ETHR_IS_X86_VENDOR("AuthenticAMD", ebx, ecx, edx))) {
+	    || ETHR_IS_X86_VENDOR("AuthenticAMD", ebx, ecx, edx)
+	    || ETHR_IS_X86_VENDOR("HygonGenuine", ebx, ecx, edx))) {
 	eax = 1;
 	ethr_x86_cpuid__(&eax, &ebx, &ecx, &edx);
     }
diff --git a/erts/preloaded/src/erlang.erl b/erts/preloaded/src/erlang.erl
index ac73946dc0..0ead6ffbc2 100644
--- a/erts/preloaded/src/erlang.erl
+++ b/erts/preloaded/src/erlang.erl
@@ -2186,7 +2186,7 @@ nodes(_Arg) ->
 -spec open_port(PortName, PortSettings) -> port() when
       PortName :: {spawn, Command :: string() | binary()} |
                   {spawn_driver, Command :: string() | binary()} |
-                  {spawn_executable, FileName :: file:name() } |
+                  {spawn_executable, FileName :: file:name_all() } |
                   {fd, In :: non_neg_integer(), Out :: non_neg_integer()},
       PortSettings :: [Opt],
       Opt :: {packet, N :: 1 | 2 | 4}
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 87b0d345f2..0c1dc30f9c 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -52,7 +52,6 @@ MODULES =  \
 	beam_clean \
 	beam_dict \
 	beam_disasm \
-	beam_except \
 	beam_flatten \
 	beam_jump \
 	beam_listing \
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 707974b2c1..a734ca3a10 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -33,8 +33,9 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
 
 function({function,Name,Arity,CLabel,Is0}) ->
     try
-        Is1 = blockify(Is0),
-        Is = embed_lines(Is1),
+        Is1 = swap_opt(Is0),
+        Is2 = blockify(Is1),
+        Is = embed_lines(Is2),
         {function,Name,Arity,CLabel,Is}
     catch
         Class:Error:Stack ->
@@ -42,6 +43,40 @@ function({function,Name,Arity,CLabel,Is0}) ->
 	    erlang:raise(Class, Error, Stack)
     end.
 
+%%%
+%%% Try to use a `swap` instruction instead of a sequence of moves.
+%%%
+%%% Note that beam_ssa_codegen generates `swap` instructions only for
+%%% the moves within a single SSA instruction (such as `call`), not
+%%% for the moves generated by a sequence of SSA instructions.
+%%% Therefore, this optimization is needed.
+%%%
+
+swap_opt([{move,Reg1,{x,X}=Temp}=Move1,
+          {move,Reg2,Reg1}=Move2,
+          {move,Temp,Reg2}=Move3|Is]) when Reg1 =/= Temp ->
+    case is_unused(X, Is) of
+        true ->
+            [{swap,Reg1,Reg2}|swap_opt(Is)];
+        false ->
+            [Move1|swap_opt([Move2,Move3|Is])]
+    end;
+swap_opt([I|Is]) ->
+    [I|swap_opt(Is)];
+swap_opt([]) -> [].
+
+is_unused(X, [{call,A,_}|_]) when A =< X -> true;
+is_unused(X, [{call_ext,A,_}|_]) when A =< X -> true;
+is_unused(X, [{make_fun2,_,_,_,A}|_]) when A =< X -> true;
+is_unused(X, [{move,Src,Dst}|Is]) ->
+    case {Src,Dst} of
+        {{x,X},_} -> false;
+        {_,{x,X}} -> true;
+        {_,_} -> is_unused(X, Is)
+    end;
+is_unused(X, [{line,_}|Is]) -> is_unused(X, Is);
+is_unused(_, _) -> false.
+
 %% blockify(Instructions0) -> Instructions
 %%  Collect sequences of instructions to basic blocks.
 %%  Also do some simple optimations on instructions outside the blocks.
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index 7299654476..6b2b2ce085 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -34,7 +34,8 @@ module({Mod,Exp,Attr,Fs0,_}, Opts) ->
     Used = find_all_used(WorkList, All, cerl_sets:from_list(WorkList)),
     Fs1 = remove_unused(Order, Used, All),
     {Fs2,Lc} = clean_labels(Fs1),
-    Fs = maybe_remove_lines(Fs2, Opts),
+    Fs3 = fix_swap(Fs2, Opts),
+    Fs = maybe_remove_lines(Fs3, Opts),
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
 %% Determine the rootset, i.e. exported functions and
@@ -137,31 +138,54 @@ function_replace([{function,Name,Arity,Entry,Asm0}|Fs], Dict, Acc) ->
 function_replace([], _, Acc) -> Acc.
 
 %%%
+%%% If compatibility with a previous release (OTP 22 or earlier) has
+%%% been requested, replace swap instructions with a sequence of moves.
+%%%
+
+fix_swap(Fs, Opts) ->
+    case proplists:get_bool(no_swap, Opts) of
+        false -> Fs;
+        true -> fold_functions(fun swap_moves/1, Fs)
+    end.
+
+swap_moves([{swap,Reg1,Reg2}|Is]) ->
+    Temp = {x,1022},
+    [{move,Reg1,Temp},{move,Reg2,Reg1},{move,Temp,Reg2}|swap_moves(Is)];
+swap_moves([I|Is]) ->
+    [I|swap_moves(Is)];
+swap_moves([]) -> [].
+
+%%%
 %%% Remove line instructions if requested.
 %%%
 
 maybe_remove_lines(Fs, Opts) ->
     case proplists:get_bool(no_line_info, Opts) of
 	false -> Fs;
-	true -> remove_lines(Fs)
+	true -> fold_functions(fun remove_lines/1, Fs)
     end.
 
-remove_lines([{function,N,A,Lbl,Is0}|T]) ->
-    Is = remove_lines_fun(Is0),
-    [{function,N,A,Lbl,Is}|remove_lines(T)];
-remove_lines([]) -> [].
-
-remove_lines_fun([{line,_}|Is]) ->
-    remove_lines_fun(Is);
-remove_lines_fun([{block,Bl0}|Is]) ->
+remove_lines([{line,_}|Is]) ->
+    remove_lines(Is);
+remove_lines([{block,Bl0}|Is]) ->
     Bl = remove_lines_block(Bl0),
-    [{block,Bl}|remove_lines_fun(Is)];
-remove_lines_fun([I|Is]) ->
-    [I|remove_lines_fun(Is)];
-remove_lines_fun([]) -> [].
+    [{block,Bl}|remove_lines(Is)];
+remove_lines([I|Is]) ->
+    [I|remove_lines(Is)];
+remove_lines([]) -> [].
 
 remove_lines_block([{set,_,_,{line,_}}|Is]) ->
     remove_lines_block(Is);
 remove_lines_block([I|Is]) ->
     [I|remove_lines_block(Is)];
 remove_lines_block([]) -> [].
+
+
+%%%
+%%% Helpers.
+%%%
+
+fold_functions(F, [{function,N,A,Lbl,Is0}|T]) ->
+    Is = F(Is0),
+    [{function,N,A,Lbl,Is}|fold_functions(F, T)];
+fold_functions(_F, []) -> [].
diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl
index 7d048716e4..45b69d7e95 100644
--- a/lib/compiler/src/beam_disasm.erl
+++ b/lib/compiler/src/beam_disasm.erl
@@ -1123,6 +1123,13 @@ resolve_inst({put_tuple2,[Dst,{{z,1},{u,_},List0}]},_,_,_) ->
     {put_tuple2,Dst,{list,List}};
 
 %%
+%% OTP 23.
+%%
+resolve_inst({swap,[_,_]=List},_,_,_) ->
+    [R1,R2] = resolve_args(List),
+    {swap,R1,R2};
+
+%%
 %% Catches instructions that are not yet handled.
 %%
 resolve_inst(X,_,_,_) -> ?exit({resolve_inst,X}).
diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl
deleted file mode 100644
index 2305502800..0000000000
--- a/lib/compiler/src/beam_except.erl
+++ /dev/null
@@ -1,247 +0,0 @@
-%%
-%% %CopyrightBegin%
-%%
-%% Copyright Ericsson AB 2011-2018. All Rights Reserved.
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-%%
-%% %CopyrightEnd%
-%%
-
--module(beam_except).
--export([module/2]).
-
-%%% Rewrite certain calls to erlang:error/{1,2} to specialized
-%%% instructions:
-%%%
-%%% erlang:error({badmatch,Value})       => badmatch Value
-%%% erlang:error({case_clause,Value})    => case_end Value
-%%% erlang:error({try_clause,Value})     => try_case_end Value
-%%% erlang:error(if_clause)              => if_end
-%%% erlang:error(function_clause, Args)  => jump FuncInfoLabel
-%%%
-
--import(lists, [reverse/1,reverse/2,seq/2,splitwith/2]).
-
--spec module(beam_utils:module_code(), [compile:option()]) ->
-                    {'ok',beam_utils:module_code()}.
-
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
-    Fs = [function(F) || F <- Fs0],
-    {ok,{Mod,Exp,Attr,Fs,Lc}}.
-
-function({function,Name,Arity,CLabel,Is0}) ->
-    try
-	Is = function_1(Is0),
-	{function,Name,Arity,CLabel,Is}
-    catch
-        Class:Error:Stack ->
-	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
-	    erlang:raise(Class, Error, Stack)
-    end.
-
--record(st,
-	{lbl :: beam_asm:label(),              %func_info label
-	 loc :: [_],                           %location for func_info
-	 arity :: arity()                      %arity for function
-	 }).
-
-function_1(Is0) ->
-    case Is0 of
-	[{label,Lbl},{line,Loc},{func_info,_,_,Arity}|_] ->
-	    St = #st{lbl=Lbl,loc=Loc,arity=Arity},
-	    translate(Is0, St, []);
-	[{label,_}|_] ->
-	    %% No line numbers. The source must be a .S file.
-	    %% There is no need to do anything.
-	    Is0
-    end.
-
-translate([{call_ext,Ar,{extfunc,erlang,error,Ar}}=I|Is], St, Acc) ->
-    translate_1(Ar, I, Is, St, Acc);
-translate([I|Is], St, Acc) ->
-    translate(Is, St, [I|Acc]);
-translate([], _, Acc) ->
-    reverse(Acc).
-
-translate_1(Ar, I, Is, #st{arity=Arity}=St, [{line,_}=Line|Acc1]=Acc0) ->
-    case dig_out(Ar, Arity, Acc1) of
-	no ->
-	    translate(Is, St, [I|Acc0]);
-	{yes,function_clause,Acc2} ->
-	    case {Is,Line,St} of
-		{[return|_],{line,Loc},#st{lbl=Fi,loc=Loc}} ->
-		    Instr = {jump,{f,Fi}},
-		    translate(Is, St, [Instr|Acc2]);
-		{_,_,_} ->
-                    %% Not a call_only instruction, or not the same
-                    %% location information as in in the line instruction
-                    %% before the func_info instruction. Not safe
-                    %% to translate to a jump.
-		    translate(Is, St, [I|Acc0])
-	    end;
-	{yes,Instr,Acc2} ->
-	    translate(Is, St, [Instr,Line|Acc2])
-    end.
-
-dig_out(1, _Arity, Is) ->
-    dig_out(Is);
-dig_out(2, Arity, Is) ->
-    dig_out_fc(Arity, Is);
-dig_out(_, _, _) -> no.
-
-dig_out([{block,Bl0}|Is]) ->
-    case dig_out_block(reverse(Bl0)) of
-	no -> no;
-	{yes,What,[]} ->
-	    {yes,What,Is};
-	{yes,What,Bl} ->
-	    {yes,What,[{block,Bl}|Is]}
-    end;
-dig_out(_) -> no.
-
-dig_out_block([{set,[{x,0}],[{atom,if_clause}],move}]) ->
-    {yes,if_end,[]};
-dig_out_block([{set,[{x,0}],[{literal,{Exc,Value}}],move}|Is]) ->
-    translate_exception(Exc, {literal,Value}, Is, 0);
-dig_out_block([{set,[{x,0}],[{atom,Exc},Value],put_tuple2}|Is]) ->
-    translate_exception(Exc, Value, Is, 3);
-dig_out_block(_) -> no.
-
-translate_exception(badmatch, Val, Is, Words) ->
-    {yes,{badmatch,Val},fix_block(Is, Words)};
-translate_exception(case_clause, Val, Is, Words) ->
-    {yes,{case_end,Val},fix_block(Is, Words)};
-translate_exception(try_clause, Val, Is, Words) ->
-    {yes,{try_case_end,Val},fix_block(Is, Words)};
-translate_exception(_, _, _, _) -> no.
-
-fix_block(Is, 0) ->
-    reverse(Is);
-fix_block(Is, Words) ->
-    reverse(fix_block_1(Is, Words)).
-
-fix_block_1([{set,[],[],{alloc,Live,{F1,F2,Needed0,F3}}}|Is], Words) ->
-    case Needed0 - Words of
-        0 ->
-            Is;
-        Needed ->
-            true = Needed >= 0,				%Assertion.
-            [{set,[],[],{alloc,Live,{F1,F2,Needed,F3}}}|Is]
-    end;
-fix_block_1([I|Is], Words) ->
-    [I|fix_block_1(Is, Words)];
-fix_block_1([], _Words) ->
-    %% Rare. The heap allocation was probably done by a binary
-    %% construction instruction.
-    [].
-
-dig_out_fc(Arity, Is0) ->
-    Regs0 = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Arity-1)]),
-    {Is,Acc0} = splitwith(fun({label,_}) -> false;
-                             ({test,_,_,_}) -> false;
-                             (_) -> true
-                          end, Is0),
-    {Regs,Acc} = dig_out_fc_1(reverse(Is), Regs0, Acc0),
-    case Regs of
-        #{{x,0}:={atom,function_clause},{x,1}:=Args} ->
-            case moves_from_stack(Args, 0, []) of
-                {Moves,Arity} ->
-                    {yes,function_clause,reverse(Moves, Acc)};
-                {_,_} ->
-                    no
-            end;
-        #{} ->
-            no
-    end.
-
-dig_out_fc_1([{block,Bl}|Is], Regs0, Acc) ->
-    Regs = dig_out_fc_block(Bl, Regs0),
-    dig_out_fc_1(Is, Regs, Acc);
-dig_out_fc_1([{bs_set_position,_,_}=I|Is], Regs, Acc) ->
-    dig_out_fc_1(Is, Regs, [I|Acc]);
-dig_out_fc_1([{bs_get_tail,Src,Dst,Live0}|Is], Regs0, Acc) ->
-    Regs = prune_xregs(Live0, Regs0),
-    Live = dig_out_stack_live(Regs, Live0),
-    I = {bs_get_tail,Src,Dst,Live},
-    dig_out_fc_1(Is, Regs, [I|Acc]);
-dig_out_fc_1([_|_], _Regs, _Acc) ->
-    {#{},[]};
-dig_out_fc_1([], Regs, Acc) ->
-    {Regs,Acc}.
-
-dig_out_fc_block([{set,[],[],{alloc,Live,_}}|Is], Regs0) ->
-    Regs = prune_xregs(Live, Regs0),
-    dig_out_fc_block(Is, Regs);
-dig_out_fc_block([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) ->
-    Regs = Regs0#{Dst=>{cons,get_reg(Hd, Regs0),get_reg(Tl, Regs0)}},
-    dig_out_fc_block(Is, Regs);
-dig_out_fc_block([{set,[Dst],[Src],move}|Is], Regs0) ->
-    Regs = Regs0#{Dst=>get_reg(Src, Regs0)},
-    dig_out_fc_block(Is, Regs);
-dig_out_fc_block([{set,_,_,_}|_], _Regs) ->
-    %% Unknown instruction. Fail.
-    #{};
-dig_out_fc_block([], Regs) -> Regs.
-
-dig_out_stack_live(Regs, Default) ->
-    Reg = {x,2},
-    case Regs of
-        #{Reg:=List} ->
-            dig_out_stack_live_1(List, Default);
-        #{} ->
-            Default
-    end.
-
-dig_out_stack_live_1({cons,{arg,N},T}, Live) ->
-    dig_out_stack_live_1(T, max(N + 1, Live));
-dig_out_stack_live_1({cons,_,T}, Live) ->
-    dig_out_stack_live_1(T, Live);
-dig_out_stack_live_1(nil, Live) ->
-    Live;
-dig_out_stack_live_1(_, Live) -> Live.
-
-prune_xregs(Live, Regs) ->
-    maps:filter(fun({x,X}, _) -> X < Live end, Regs).
-
-moves_from_stack({cons,{arg,N},_}, I, _Acc) when N =/= I ->
-    %% Wrong argument. Give up.
-    {[],-1};
-moves_from_stack({cons,H,T}, I, Acc) ->
-    case H of
-        {arg,I} ->
-            moves_from_stack(T, I+1, Acc);
-        _ ->
-            moves_from_stack(T, I+1, [{move,H,{x,I}}|Acc])
-    end;
-moves_from_stack(nil, I, Acc) ->
-    {reverse(Acc),I};
-moves_from_stack({literal,[H|T]}, I, Acc) ->
-    Cons = {cons,tag_literal(H),tag_literal(T)},
-    moves_from_stack(Cons, I, Acc);
-moves_from_stack(_, _, _) ->
-    %% Not understood. Give up.
-    {[],-1}.
-
-
-get_reg(R, Regs) ->
-    case Regs of
-        #{R:=Val} -> Val;
-        #{} -> R
-    end.
-
-tag_literal([]) -> nil;
-tag_literal(T) when is_atom(T) -> {atom,T};
-tag_literal(T) when is_float(T) -> {float,T};
-tag_literal(T) when is_integer(T) -> {integer,T};
-tag_literal(T) -> {literal,T}.
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index a9977b0b1d..831e6489a9 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -79,7 +79,7 @@
 -type var_base()   :: atom() | non_neg_integer().
 
 -type literal_value() :: atom() | integer() | float() | list() |
-                         nil() | tuple() | map() | binary().
+                         nil() | tuple() | map() | binary() | fun().
 
 -type op()   :: {'bif',atom()} | {'float',float_op()} | prim_op() | cg_prim_op().
 -type anno() :: #{atom() := any()}.
@@ -118,7 +118,7 @@
 
 %% Primops only used internally during code generation.
 -type cg_prim_op() :: 'bs_get' | 'bs_match_string' | 'bs_restore' | 'bs_skip' |
-                      'copy' | 'put_tuple_arity' | 'put_tuple_element' |
+                      'copy' | 'match_fail' | 'put_tuple_arity' | 'put_tuple_element' |
                       'set_tuple_element'.
 
 -import(lists, [foldl/3,keyfind/3,mapfoldl/3,member/2,reverse/1]).
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 07f4c8b461..7248aca5f3 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -28,7 +28,7 @@
 
 -include("beam_ssa.hrl").
 
--import(lists, [foldl/3,keymember/3,keysort/2,last/1,map/2,mapfoldl/3,
+-import(lists, [foldl/3,keymember/3,keysort/2,map/2,mapfoldl/3,
                 reverse/1,reverse/2,sort/1,splitwith/2,takewhile/2]).
 
 -record(cg, {lcount=1 :: beam_label(),          %Label counter
@@ -37,7 +37,8 @@
              used_labels=gb_sets:empty() :: gb_sets:set(ssa_label()),
              regs=#{} :: #{beam_ssa:var_name()=>ssa_register()},
              ultimate_fail=1 :: beam_label(),
-             catches=gb_sets:empty() :: gb_sets:set(ssa_label())
+             catches=gb_sets:empty() :: gb_sets:set(ssa_label()),
+             fc_label=1 :: beam_label()
              }).
 
 -spec module(beam_ssa:b_module(), [compile:option()]) ->
@@ -124,7 +125,7 @@ function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
         Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0},
         St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry),
                      ultimate_fail=Ult},
-        {Body,St} = cg_fun(Blocks, St5),
+        {Body,St} = cg_fun(Blocks, St5#cg{fc_label=Fi}),
         Asm = [{label,Fi},line(Anno),
                {func_info,AtomMod,{atom,Name},Arity}] ++
                add_parameter_annos(Body, Anno) ++
@@ -384,6 +385,7 @@ classify_heap_need(is_tagged_tuple) -> neutral;
 classify_heap_need(kill_try_tag) -> gc;
 classify_heap_need(landingpad) -> gc;
 classify_heap_need(make_fun) -> gc;
+classify_heap_need(match_fail) -> gc;
 classify_heap_need(new_try_tag) -> gc;
 classify_heap_need(peek_message) -> gc;
 classify_heap_need(put_map) -> gc;
@@ -1168,6 +1170,10 @@ cg_block([#cg_set{op=call}=I,
           #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) ->
     %% A call in try/catch block.
     cg_block([I], none, St);
+cg_block([#cg_set{op=match_fail}=I,
+          #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) ->
+    %% A match_fail instruction in a try/catch block.
+    cg_block([I], none, St);
 cg_block([#cg_set{op=get_map_element,dst=Dst0,args=Args0},
           #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
     [Dst,Map,Key] = beam_args([Dst0|Args0], St),
@@ -1229,6 +1235,28 @@ cg_block([#cg_set{op=copy}|_]=T0, Context, St0) ->
         no ->
             {Is,St}
     end;
+cg_block([#cg_set{op=match_fail,args=Args0,anno=Anno}], none, St) ->
+    Args = beam_args(Args0, St),
+    Is = cg_match_fail(Args, line(Anno), none),
+    {Is,St};
+cg_block([#cg_set{op=match_fail,args=Args0,anno=Anno}|T], Context, St0) ->
+    FcLabel = case Context of
+                  {return,_,none} ->
+                      %% There is no stack frame. If this is a function_clause
+                      %% exception, it is safe to jump to the label of the
+                      %% func_info instruction.
+                      St0#cg.fc_label;
+                  _ ->
+                      %% This is most probably not a function_clause.
+                      %% If this is a function_clause exception
+                      %% (rare), it is not safe to jump to the
+                      %% func_info label.
+                      none
+              end,
+    Args = beam_args(Args0, St0),
+    Is0 = cg_match_fail(Args, line(Anno), FcLabel),
+    {Is1,St} = cg_block(T, Context, St0),
+    {Is0++Is1,St};
 cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) ->
     [Dst|Args] = beam_args([Dst0|Args0], St),
     Is = cg_instr(Op, Args, Dst, Set),
@@ -1260,8 +1288,7 @@ cg_copy(T0, St) ->
                            end, T0),
     Moves0 = cg_copy_1(Copies, St),
     Moves1 = [Move || {move,Src,Dst}=Move <- Moves0, Src =/= Dst],
-    Scratch = {x,1022},
-    Moves = order_moves(Moves1, Scratch),
+    Moves = order_moves(Moves1),
     {Moves,T}.
 
 cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) ->
@@ -1502,6 +1529,42 @@ cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0},
     Is = setup_args(Args++[Func], Anno, Context, St) ++ Line ++ Call,
     {Is,St}.
 
+cg_match_fail([{atom,function_clause}|Args], Line, Fc) ->
+    case Fc of
+        none ->
+            %% There is a stack frame (probably because of inlining).
+            %% Jumping to the func_info label is not allowed by
+            %% beam_validator. Rewrite the instruction as a call to
+            %% erlang:error/2.
+            make_fc(Args, Line);
+        _ ->
+            setup_args(Args) ++ [{jump,{f,Fc}}]
+    end;
+cg_match_fail([{atom,Op}], Line, _Fc) ->
+    [Line,Op];
+cg_match_fail([{atom,Op},Val], Line, _Fc) ->
+    [Line,{Op,Val}].
+
+make_fc(Args, Line) ->
+    %% Recreate the original call to erlang:error/2.
+    Live = foldl(fun({x,X}, A) -> max(X+1, A);
+                    (_, A) -> A
+                 end, 0, Args),
+    TmpReg = {x,Live},
+    StkMoves = build_stk(reverse(Args), TmpReg, nil),
+    [{test_heap,2*length(Args),Live}|StkMoves] ++
+        [{move,{atom,function_clause},{x,0}},
+         Line,
+         {call_ext,2,{extfunc,erlang,error,2}}].
+
+build_stk([V], _TmpReg, Tail) ->
+    [{put_list,V,Tail,{x,1}}];
+build_stk([V|Vs], TmpReg, Tail) ->
+    I = {put_list,V,Tail,TmpReg},
+    [I|build_stk(Vs, TmpReg, TmpReg)];
+build_stk([], _TmpReg, nil) ->
+    [{move,nil,{x,1}}].
+
 build_call(call_fun, Arity, _Func, none, Dst) ->
     [{call_fun,Arity}|copy({x,0}, Dst)];
 build_call(call_fun, Arity, _Func, {return,Dst,N}, Dst) when is_integer(N) ->
@@ -1540,15 +1603,15 @@ build_apply(Arity, {return,Val,N}, _Dst) when is_integer(N) ->
 build_apply(Arity, none, Dst) ->
     [{apply,Arity}|copy({x,0}, Dst)].
 
-cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) ->
-    Live = get_live(Set),
-    [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}];
 cg_instr(bs_get_tail, [Src], Dst, Set) ->
     Live = get_live(Set),
     [{bs_get_tail,Src,Dst,Live}];
 cg_instr(bs_get_position, [Ctx], Dst, Set) ->
     Live = get_live(Set),
     [{bs_get_position,Ctx,Dst,Live}];
+cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) ->
+    Live = get_live(Set),
+    [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}];
 cg_instr(Op, Args, Dst, _Set) ->
     cg_instr(Op, Args, Dst).
 
@@ -1718,7 +1781,7 @@ cg_catch(Agg, T0, Context, St0) ->
 
 cg_try(Agg, Tag, T0, Context, St0) ->
     {Moves0,T1} = cg_extract(T0, Agg, St0),
-    Moves = order_moves(Moves0, {x,3}),
+    Moves = order_moves(Moves0),
     [#cg_set{op=kill_try_tag}|T2] = T1,
     {T,St} = cg_block(T2, Context, St0),
     {[{try_case,Tag}|Moves++T],St}.
@@ -1874,8 +1937,7 @@ setup_args([]) ->
     [];
 setup_args([_|_]=Args) ->
     Moves = gen_moves(Args, 0, []),
-    Scratch = {x,1+last(sort([length(Args)-1|[X || {x,X} <- Args]]))},
-    order_moves(Moves, Scratch).
+    order_moves(Moves).
 
 %% kill_yregs(Anno, #cg{}) -> [{kill,{y,Y}}].
 %%  Kill Y registers that will not be used again.
@@ -1895,47 +1957,48 @@ gen_moves([A|As], I, Acc) ->
 gen_moves([], _, Acc) ->
     keysort(3, Acc).
 
-%% order_moves([Move], ScratchReg) -> [Move]
+%% order_moves([Move]) -> [Move]
 %%  Orders move instruction so that source registers are not
 %%  destroyed before they are used. If there are cycles
 %%  (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}),
-%%  the scratch register is used to break up the cycle.
-%%    If possible, the first move of the input list is placed
+%%  swap instructions will be used to break up the cycle.
+%%
+%%  If possible, the first move of the input list is placed
 %%  last in the result list (to make the move to {x,0} occur
 %%  just before the call to allow the Beam loader to coalesce
 %%  the instructions).
 
-order_moves(Ms, Scr) -> order_moves(Ms, Scr, []).
+order_moves(Ms) -> order_moves(Ms, []).
 
-order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) ->
-    {Chain,Ms} = collect_chain(Ms0, [M], ScrReg),
+order_moves([{move,_,_}=M|Ms0], Acc0) ->
+    {Chain,Ms} = collect_chain(Ms0, [M]),
     Acc = reverse(Chain, Acc0),
-    order_moves(Ms, ScrReg, Acc);
-order_moves([], _, Acc) -> Acc.
+    order_moves(Ms, Acc);
+order_moves([], Acc) -> Acc.
 
-collect_chain(Ms, Path, ScrReg) ->
-    collect_chain(Ms, Path, [], ScrReg).
+collect_chain(Ms, Path) ->
+    collect_chain(Ms, Path, []).
 
-collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) ->
+collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others) ->
     case keymember(Src, 3, Path) of
         false ->
-            collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg);
+            collect_chain(reverse(Others, Ms0), [M|Path], []);
         true ->
-            %% There is a cycle, which we must break up.
-            {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)}
+            %% There is a cycle.
+            {break_up_cycle(M, Path),reverse(Others, Ms0)}
     end;
-collect_chain([M|Ms], Path, Others, ScrReg) ->
-    collect_chain(Ms, Path, [M|Others], ScrReg);
-collect_chain([], Path, Others, _) ->
+collect_chain([M|Ms], Path, Others) ->
+    collect_chain(Ms, Path, [M|Others]);
+collect_chain([], Path, Others) ->
     {Path,Others}.
 
-break_up_cycle({move,Src,_}=M, Path, ScrReg) ->
-    [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)].
+break_up_cycle({move,Src,_Dst}=M, Path) ->
+    break_up_cycle_1(Src, [M|Path], []).
 
-break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) ->
-    [{move,Src,ScrReg}|Path];
-break_up_cycle1(Dst, [M|Path], LastMove) ->
-    [M|break_up_cycle1(Dst, Path, LastMove)].
+break_up_cycle_1(Dst, [{move,_Src,Dst}|Path], Acc) ->
+    reverse(Acc, Path);
+break_up_cycle_1(Dst, [{move,S,D}|Path], Acc) ->
+    break_up_cycle_1(Dst, Path, [{swap,S,D}|Acc]).
 
 %%%
 %%% General utility functions.
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index 64b9b3e222..88767456a3 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -730,8 +730,8 @@ will_succeed_1('=/=', A, '=:=', B) when A =:= B -> no;
 will_succeed_1('<', A, '=:=', B)  when B >= A -> no;
 will_succeed_1('<', A, '=/=', B)  when B >= A -> yes;
 will_succeed_1('<', A, '<',   B)  when B >= A -> yes;
-will_succeed_1('<', A, '=<',  B)  when B > A  -> yes;
-will_succeed_1('<', A, '>=',  B)  when B > A  -> no;
+will_succeed_1('<', A, '=<',  B)  when B >= A -> yes;
+will_succeed_1('<', A, '>=',  B)  when B >= A -> no;
 will_succeed_1('<', A, '>',   B)  when B >= A -> no;
 
 will_succeed_1('=<', A, '=:=', B) when B > A  -> no;
@@ -751,9 +751,9 @@ will_succeed_1('>=', A, '>',   B) when B < A  -> yes;
 will_succeed_1('>', A, '=:=', B)  when B =< A -> no;
 will_succeed_1('>', A, '=/=', B)  when B =< A -> yes;
 will_succeed_1('>', A, '<',   B)  when B =< A -> no;
-will_succeed_1('>', A, '=<',  B)  when B < A  -> no;
+will_succeed_1('>', A, '=<',  B)  when B =< A -> no;
 will_succeed_1('>', A, '>=',  B)  when B =< A -> yes;
-will_succeed_1('>', A, '>',   B)  when B < A  -> yes;
+will_succeed_1('>', A, '>',   B)  when B =< A -> yes;
 
 will_succeed_1('==', A, '==', B) ->
     if
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 90c0d3cf16..0c8cefe74d 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -157,6 +157,8 @@ repeated_passes(Opts) ->
           ?PASS(ssa_opt_dead),
           ?PASS(ssa_opt_cse),
           ?PASS(ssa_opt_tail_phis),
+          ?PASS(ssa_opt_tuple_size),
+          ?PASS(ssa_opt_record),
           ?PASS(ssa_opt_type_continue)],        %Must run after ssa_opt_dead to
                                                 %clean up phi nodes.
     passes_1(Ps, Opts).
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 9af72afca7..a5fcb91cc0 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -108,7 +108,8 @@ functions([], _Ps, _UseBSM3) -> [].
              intervals=[] :: [{b_var(),[range()]}],
              res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
              regs=#{} :: #{b_var():=ssa_register()},
-             extra_annos=[] :: [{atom(),term()}]
+             extra_annos=[] :: [{atom(),term()}],
+             location :: term()
             }).
 -define(PASS(N), {N,fun N/1}).
 
@@ -120,6 +121,7 @@ passes(Opts) ->
           %% Preliminaries.
           ?PASS(fix_bs),
           ?PASS(sanitize),
+          ?PASS(match_fail_instructions),
           case FixTuples of
               false -> ignore;
               true -> ?PASS(fix_tuples)
@@ -162,7 +164,9 @@ passes(Opts) ->
 function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
          Ps, UseBSM3) ->
     try
-        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
+        Location = maps:get(location, Anno, none),
+        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,
+                  cnt=Count0,location=Location},
         St = compile:run_sub_passes(Ps, St0),
         #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
         F1 = add_extra_annos(F0, ExtraAnnos),
@@ -854,6 +858,114 @@ prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
                  gb_sets:is_element(Pred, Reachable)],
     Phi#b_set{args=Args}.
 
+%%% Rewrite certain calls to erlang:error/{1,2} to specialized
+%%% instructions:
+%%%
+%%% erlang:error({badmatch,Value})       => badmatch Value
+%%% erlang:error({case_clause,Value})    => case_end Value
+%%% erlang:error({try_clause,Value})     => try_case_end Value
+%%% erlang:error(if_clause)              => if_end
+%%% erlang:error(function_clause, Args)  => jump FuncInfoLabel
+%%%
+%%% In SSA code, we represent those instructions as a 'match_fail'
+%%% instruction with the name of the BEAM instruction as the first
+%%% argument.
+
+match_fail_instructions(#st{ssa=Blocks0,args=Args,location=Location}=St) ->
+    Ls = maps:to_list(Blocks0),
+    Info = {length(Args),Location},
+    Blocks = match_fail_instrs_1(Ls, Info, Blocks0),
+    St#st{ssa=Blocks}.
+
+match_fail_instrs_1([{L,#b_blk{is=Is0}=Blk}|Bs], Arity, Blocks0) ->
+    case match_fail_instrs_blk(Is0, Arity, []) of
+        none ->
+            match_fail_instrs_1(Bs, Arity, Blocks0);
+        Is ->
+            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
+            match_fail_instrs_1(Bs, Arity, Blocks)
+    end;
+match_fail_instrs_1([], _Arity, Blocks) -> Blocks.
+
+match_fail_instrs_blk([#b_set{op=put_tuple,dst=Dst,
+                              args=[#b_literal{val=Tag},Val]},
+                       #b_set{op=call,
+                              args=[#b_remote{mod=#b_literal{val=erlang},
+                                              name=#b_literal{val=error}},
+                                    Dst]}=Call|Is],
+                      _Arity, Acc) ->
+    match_fail_instr(Call, Tag, Val, Is, Acc);
+match_fail_instrs_blk([#b_set{op=call,
+                              args=[#b_remote{mod=#b_literal{val=erlang},
+                                              name=#b_literal{val=error}},
+                                    #b_literal{val={Tag,Val}}]}=Call|Is],
+                      _Arity, Acc) ->
+    match_fail_instr(Call, Tag, #b_literal{val=Val}, Is, Acc);
+match_fail_instrs_blk([#b_set{op=call,
+                              args=[#b_remote{mod=#b_literal{val=erlang},
+                                              name=#b_literal{val=error}},
+                                    #b_literal{val=if_clause}]}=Call|Is],
+                      _Arity, Acc) ->
+    I = Call#b_set{op=match_fail,args=[#b_literal{val=if_end}]},
+    reverse(Acc, [I|Is]);
+match_fail_instrs_blk([#b_set{op=call,anno=Anno,
+                              args=[#b_remote{mod=#b_literal{val=erlang},
+                                              name=#b_literal{val=error}},
+                                    #b_literal{val=function_clause},
+                                    Stk]}=Call],
+                      {Arity,Location}, Acc) ->
+    case match_fail_stk(Stk, Acc, [], []) of
+        {[_|_]=Vars,Is} when length(Vars) =:= Arity ->
+            case maps:get(location, Anno, none) of
+                Location ->
+                    I = Call#b_set{op=match_fail,
+                                   args=[#b_literal{val=function_clause}|Vars]},
+                    Is ++ [I];
+                _ ->
+                    %% erlang:error/2 has a different location than the
+                    %% func_info instruction at the beginning of the function
+                    %% (probably because of inlining). Keep the original call.
+                    reverse(Acc, [Call])
+            end;
+        _ ->
+            %% Either the stacktrace could not be picked apart (for example,
+            %% if the call to erlang:error/2 was handwritten) or the number
+            %% of arguments in the stacktrace was different from the arity
+            %% of the host function (because it is the implementation of a
+            %% fun). Keep the original call.
+            reverse(Acc, [Call])
+    end;
+match_fail_instrs_blk([I|Is], Arity, Acc) ->
+    match_fail_instrs_blk(Is, Arity, [I|Acc]);
+match_fail_instrs_blk(_, _, _) ->
+    none.
+
+match_fail_instr(Call, Tag, Val, Is, Acc) ->
+    Op = case Tag of
+             badmatch -> Tag;
+             case_clause -> case_end;
+             try_clause -> try_case_end;
+             _ -> none
+         end,
+    case Op of
+        none ->
+            none;
+        _ ->
+            I = Call#b_set{op=match_fail,args=[#b_literal{val=Op},Val]},
+            reverse(Acc, [I|Is])
+    end.
+
+match_fail_stk(#b_var{}=V, [#b_set{op=put_list,dst=V,args=[H,T]}|Is], IAcc, VAcc) ->
+    match_fail_stk(T, Is, IAcc, [H|VAcc]);
+match_fail_stk(#b_literal{val=[H|T]}, Is, IAcc, VAcc) ->
+    match_fail_stk(#b_literal{val=T}, Is, IAcc, [#b_literal{val=H}|VAcc]);
+match_fail_stk(#b_literal{val=[]}, [], IAcc, VAcc) ->
+    {reverse(VAcc),IAcc};
+match_fail_stk(T, [#b_set{op=Op}=I|Is], IAcc, VAcc)
+  when Op =:= bs_get_tail; Op =:= bs_set_position ->
+    match_fail_stk(T, Is, [I|IAcc], VAcc);
+match_fail_stk(_, _, _, _) -> none.
+
 %%%
 %%% Fix tuples.
 %%%
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 57fd7fec60..f1c0030b3c 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -41,8 +41,9 @@
 
 %% Records that represent type information.
 -record(t_atom, {elements=any :: 'any' | [atom()]}).
--record(t_integer, {elements=any :: 'any' | {integer(),integer()}}).
 -record(t_bs_match, {type :: type()}).
+-record(t_fun, {arity=any :: arity() | 'any'}).
+-record(t_integer, {elements=any :: 'any' | {integer(),integer()}}).
 -record(t_tuple, {size=0 :: integer(),
                   exact=false :: boolean(),
                   %% Known element types (1-based index), unknown elements are
@@ -50,8 +51,9 @@
                   elements=#{} :: #{ non_neg_integer() => type() }}).
 
 -type type() :: 'any' | 'none' |
-                #t_atom{} | #t_integer{} | #t_bs_match{} | #t_tuple{} |
-                {'binary',pos_integer()} | 'cons' | 'float' | 'list' | 'map' | 'nil' | 'number'.
+                #t_atom{} | #t_bs_match{} | #t_fun{} | #t_integer{} | #t_tuple{} |
+                {'binary',pos_integer()} | 'cons' | 'float' |
+                'list' | 'map' | 'nil' | 'number'.
 -type type_db() :: #{beam_ssa:var_name():=type()}.
 
 -spec opt_start(Linear, Args, Anno, FuncDb) -> {Linear, FuncDb} when
@@ -157,21 +159,29 @@ opt_finish_1([Arg | Args], [TypeMap | TypeMaps], ParamInfo)
        map_size(TypeMap) =:= 0 ->
     opt_finish_1(Args, TypeMaps, ParamInfo);
 opt_finish_1([Arg | Args], [TypeMap | TypeMaps], ParamInfo0) ->
-    case join(maps:values(TypeMap)) of
+    JoinedType0 = verified_type(join(maps:values(TypeMap))),
+    case validator_anno(JoinedType0) of
         any ->
             opt_finish_1(Args, TypeMaps, ParamInfo0);
         JoinedType ->
-            JoinedType = verified_type(JoinedType),
-            ParamInfo = ParamInfo0#{ Arg => validator_anno(JoinedType) },
+            ParamInfo = ParamInfo0#{ Arg => JoinedType },
             opt_finish_1(Args, TypeMaps, ParamInfo)
     end;
 opt_finish_1([], [], ParamInfo) ->
     ParamInfo.
 
+validator_anno(any) ->
+    any;
+validator_anno(#t_fun{}) ->
+    %% There is no need make funs visible to beam_validator.
+    any;
 validator_anno(#t_tuple{size=Size,exact=Exact,elements=Elements0}) ->
-    Elements = maps:fold(fun(Index, Type, Acc) ->
+    Elements = maps:fold(fun(Index, Type0, Acc) ->
                                  Key = beam_validator:type_anno(integer, Index),
-                                 Acc#{ Key => validator_anno(Type) }
+                                 case validator_anno(Type0) of
+                                     any -> Acc;
+                                     Type -> Acc#{Key=>Type}
+                                 end
                          end, #{}, Elements0),
     beam_validator:type_anno(tuple, Size, Exact, Elements);
 validator_anno(#t_integer{elements={Same,Same}}) ->
@@ -413,6 +423,11 @@ simplify_remote_call(Mod, Name, Args0, I) ->
             end
     end.
 
+opt_call(#b_set{dst=Dst,args=[#b_var{}=Fun|Args]}=I, _D, Ts0, Ds0, Fdb) ->
+    Type = #t_fun{arity=length(Args)},
+    Ts = Ts0#{ Fun => Type, Dst => any },
+    Ds = Ds0#{ Dst => I },
+    {Ts, Ds, Fdb, I};
 opt_call(#b_set{dst=Dst,args=[#b_local{}=Callee|Args]}=I0, D, Ts0, Ds0, Fdb0) ->
     {Ts, Ds, I} = opt_local_call(I0, Ts0, Ds0, Fdb0),
     case Fdb0 of
@@ -440,9 +455,15 @@ opt_local_call(#b_set{dst=Dst,args=[Id|_]}=I0, Ts0, Ds0, Fdb) ->
                #{} -> any
            end,
     I = case Type of
-            any -> I0;
-            none -> I0;
-            _ -> beam_ssa:add_anno(result_type, validator_anno(Type), I0)
+            none ->
+                I0;
+            _ ->
+                case validator_anno(Type) of
+                    any ->
+                        I0;
+                    ValidatorType ->
+                        beam_ssa:add_anno(result_type, ValidatorType, I0)
+                end
         end,
     Ts = Ts0#{ Dst => Type },
     Ds = Ds0#{ Dst => I },
@@ -519,19 +540,36 @@ simplify(#b_set{op={bif,tuple_size},args=[Term]}=I, Ts) ->
         _ ->
             I
     end;
-simplify(#b_set{op={bif,'=='},args=Args}=I, Ts) ->
+simplify(#b_set{op={bif,is_function},args=[Fun,#b_literal{val=Arity}]}=I, Ts)
+  when is_integer(Arity), Arity >= 0 ->
+    case get_type(Fun, Ts) of
+        #t_fun{arity=any} ->
+            I;
+        #t_fun{arity=Arity} ->
+            #b_literal{val=true};
+        any ->
+            I;
+        _ ->
+            #b_literal{val=false}
+    end;
+simplify(#b_set{op={bif,Op0},args=Args}=I, Ts) when Op0 =:= '=='; Op0 =:= '/=' ->
     Types = get_types(Args, Ts),
-    EqEq = case {meet(Types),join(Types)} of
-               {none,any} -> true;
-               {#t_integer{},#t_integer{}} -> true;
-               {float,float} -> true;
-               {{binary,_},_} -> true;
-               {#t_atom{},_} -> true;
-               {_,_} -> false
-           end,
+    EqEq0 = case {meet(Types),join(Types)} of
+                {none,any} -> true;
+                {#t_integer{},#t_integer{}} -> true;
+                {float,float} -> true;
+                {{binary,_},_} -> true;
+                {#t_atom{},_} -> true;
+                {_,_} -> false
+            end,
+    EqEq = EqEq0 orelse any_non_numeric_argument(Args, Ts),
     case EqEq of
         true ->
-            simplify(I#b_set{op={bif,'=:='}}, Ts);
+            Op = case Op0 of
+                     '==' -> '=:=';
+                     '/=' -> '=/='
+                 end,
+            simplify(I#b_set{op={bif,Op}}, Ts);
         false ->
             eval_bif(I, Ts)
     end;
@@ -547,6 +585,17 @@ simplify(#b_set{op={bif,'=:='},args=[A1,_A2]=Args}=I, Ts) ->
                 {true,#t_atom{elements=[true]}} ->
                     %% Bool =:= true  ==>  Bool
                     A1;
+                {true,#t_atom{elements=[false]}} ->
+                    %% Bool =:= false ==> not Bool
+                    %%
+                    %% This will be further optimized to eliminate the
+                    %% 'not', swapping the success and failure
+                    %% branches in the br instruction. If A1 comes
+                    %% from a type test (such as is_atom/1) or a
+                    %% comparison operator (such as >=) that can be
+                    %% translated to test instruction, this
+                    %% optimization will eliminate one instruction.
+                    simplify(I#b_set{op={bif,'not'},args=[A1]}, Ts);
                 {_,_} ->
                     eval_bif(I, Ts)
             end
@@ -597,6 +646,44 @@ simplify(#b_set{op=wait_timeout,args=[#b_literal{val=infinity}]}=I, _Ts) ->
     I#b_set{op=wait,args=[]};
 simplify(I, _Ts) -> I.
 
+any_non_numeric_argument([#b_literal{val=Lit}|_], _Ts) ->
+    is_non_numeric(Lit);
+any_non_numeric_argument([#b_var{}=V|T], Ts) ->
+    is_non_numeric_type(get_type(V, Ts)) orelse any_non_numeric_argument(T, Ts);
+any_non_numeric_argument([], _Ts) -> false.
+
+is_non_numeric([H|T]) ->
+    is_non_numeric(H) andalso is_non_numeric(T);
+is_non_numeric(Tuple) when is_tuple(Tuple) ->
+    is_non_numeric_tuple(Tuple, tuple_size(Tuple));
+is_non_numeric(Map) when is_map(Map) ->
+    %% Note that 17.x and 18.x compare keys in different ways.
+    %% Be very conservative -- require that both keys and values
+    %% are non-numeric.
+    is_non_numeric(maps:to_list(Map));
+is_non_numeric(Num) when is_number(Num) ->
+    false;
+is_non_numeric(_) -> true.
+
+is_non_numeric_tuple(Tuple, El) when El >= 1 ->
+    is_non_numeric(element(El, Tuple)) andalso
+	is_non_numeric_tuple(Tuple, El-1);
+is_non_numeric_tuple(_Tuple, 0) -> true.
+
+is_non_numeric_type(#t_atom{}) -> true;
+is_non_numeric_type({binary,_}) -> true;
+is_non_numeric_type(nil) -> true;
+is_non_numeric_type(#t_tuple{size=Size,exact=true,elements=Types})
+  when map_size(Types) =:= Size ->
+    is_non_numeric_tuple_type(Size, Types);
+is_non_numeric_type(_) -> false.
+
+is_non_numeric_tuple_type(0, _Types) ->
+    true;
+is_non_numeric_tuple_type(Pos, Types) ->
+    is_non_numeric_type(map_get(Pos, Types)) andalso
+        is_non_numeric_tuple_type(Pos - 1, Types).
+
 make_literal_list(Args) ->
     make_literal_list(Args, []).
 
@@ -859,6 +946,13 @@ type(bs_get_tail, _Args, _Ts, _Ds) ->
 type(call, [#b_remote{mod=#b_literal{val=Mod},
                       name=#b_literal{val=Name}}|Args], Ts, _Ds) ->
     case {Mod,Name,Args} of
+        {erlang,make_fun,[_,_,Arity0]} ->
+            case Arity0 of
+                #b_literal{val=Arity} when is_integer(Arity), Arity >= 0 ->
+                    #t_fun{arity=Arity};
+                _ ->
+                    #t_fun{}
+            end;
         {erlang,setelement,[Pos,Tuple,Arg]} ->
             case {get_type(Pos, Ts),get_type(Tuple, Ts)} of
                 {#t_integer{elements={Index,Index}},
@@ -931,6 +1025,8 @@ type(is_nonempty_list, [_], _Ts, _Ds) ->
     t_boolean();
 type(is_tagged_tuple, [_,#b_literal{},#b_literal{}], _Ts, _Ds) ->
     t_boolean();
+type(make_fun, [#b_local{arity=TotalArity}|Env], _Ts, _Ds) ->
+    #t_fun{arity=TotalArity-length(Env)};
 type(put_map, _Args, _Ts, _Ds) ->
     map;
 type(put_list, _Args, _Ts, _Ds) ->
@@ -1112,6 +1208,11 @@ will_succeed(is_float, Type) ->
         number -> maybe;
         _ -> no
     end;
+will_succeed(is_function, Type) ->
+    case Type of
+        #t_fun{} -> yes;
+        _ -> no
+    end;
 will_succeed(is_integer, Type) ->
     case Type of
         #t_integer{} -> yes;
@@ -1351,6 +1452,9 @@ get_type(#b_literal{val=Val}, _Ts) ->
             t_atom(Val);
         is_float(Val) ->
             float;
+        is_function(Val) ->
+            {arity,Arity} = erlang:fun_info(Val, arity),
+            #t_fun{arity=Arity};
         is_integer(Val) ->
             t_integer(Val);
         is_list(Val), Val =/= [] ->
@@ -1744,6 +1848,7 @@ join(#t_atom{elements=any}=T, #t_atom{elements=[_|_]}) -> T;
 join(#t_atom{elements=[_|_]}, #t_atom{elements=any}=T) -> T;
 join({binary,U1}, {binary,U2}) ->
     {binary,gcd(U1, U2)};
+join(#t_fun{}, #t_fun{}) -> #t_fun{};
 join(#t_integer{}, #t_integer{}) -> t_integer();
 join(list, cons) -> list;
 join(cons, list) -> list;
@@ -1861,6 +1966,10 @@ meet(#t_atom{elements=[_|_]}=T, #t_atom{elements=any}) ->
     T;
 meet(#t_atom{elements=any}, #t_atom{elements=[_|_]}=T) ->
     T;
+meet(#t_fun{arity=any}, #t_fun{}=T) ->
+    T;
+meet(#t_fun{}=T, #t_fun{arity=any}) ->
+    T;
 meet(#t_integer{elements={_,_}}=T, #t_integer{elements=any}) ->
     T;
 meet(#t_integer{elements=any}, #t_integer{elements={_,_}}=T) ->
@@ -1950,6 +2059,7 @@ verified_type(none=T) -> T;
 verified_type(#t_atom{elements=any}=T) -> T;
 verified_type(#t_atom{elements=[_|_]}=T) -> T;
 verified_type({binary,U}=T) when is_integer(U) -> T;
+verified_type(#t_fun{arity=Arity}=T) when Arity =:= any; is_integer(Arity) -> T;
 verified_type(#t_integer{elements=any}=T) -> T;
 verified_type(#t_integer{elements={Min,Max}}=T)
   when is_integer(Min), is_integer(Max) -> T;
diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl
index acf3838da4..ad8839cc7d 100644
--- a/lib/compiler/src/beam_trim.erl
+++ b/lib/compiler/src/beam_trim.erl
@@ -244,6 +244,9 @@ remap([{make_fun2,_,_,_,_}=I|T], Map, Acc) ->
 remap([{deallocate,N}|Is], Map, Acc) ->
     I = {deallocate,Map({frame_size,N})},
     remap(Is, Map, [I|Acc]);
+remap([{swap,Reg1,Reg2}|Is], Map, Acc) ->
+    I = {swap,Map(Reg1),Map(Reg2)},
+    remap(Is, Map, [I|Acc]);
 remap([{test,Name,Fail,Ss}|Is], Map, Acc) ->
     I = {test,Name,Fail,[Map(S) || S <- Ss]},
     remap(Is, Map, [I|Acc]);
@@ -382,6 +385,8 @@ frame_size([{bs_set_position,_,_}|Is], Safe) ->
     frame_size(Is, Safe);
 frame_size([{bs_get_tail,_,_,_}|Is], Safe) ->
     frame_size(Is, Safe);
+frame_size([{swap,_,_}|Is], Safe) ->
+    frame_size(Is, Safe);
 frame_size(_, _) -> throw(not_possible).
 
 frame_size_branch(0, Is, Safe) ->
@@ -444,6 +449,8 @@ is_not_used(Y, [{line,_}|Is]) ->
     is_not_used(Y, Is);
 is_not_used(Y, [{make_fun2,_,_,_,_}|Is]) ->
     is_not_used(Y, Is);
+is_not_used(Y, [{swap,Reg1,Reg2}|Is]) ->
+    Y =/= Reg1 andalso Y =/= Reg2 andalso is_not_used(Y, Is);
 is_not_used(Y, [{test,_,_,Ss}|Is]) ->
     not member(Y, Ss) andalso is_not_used(Y, Is);
 is_not_used(Y, [{test,_Op,{f,_},_Live,Ss,Dst}|Is]) ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index ebe9631e09..717ea17475 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -200,7 +200,7 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
          numy=none :: none | undecided | index(),
          %% Available heap size.
          h=0,
-         %Available heap size for floats.
+         %%Available heap size for floats.
          hf=0,
          %% Floating point state.
          fls=undefined,
@@ -240,7 +240,7 @@ index_parameter_types([{function,_,_,Entry,Code0}|Fs], Acc0) ->
 	    Acc = index_parameter_types_1(Is, Entry, Acc0),
 	    index_parameter_types(Fs, Acc);
 	_ ->
-	    %% Something serious is wrong. Ignore it for now.
+	    %% Something is seriously wrong. Ignore it for now.
 	    %% It will be detected and diagnosed later.
 	    index_parameter_types(Fs, Acc0)
     end;
@@ -392,6 +392,23 @@ valfun_1(build_stacktrace=I, Vst) ->
     call(I, 1, Vst);
 valfun_1({move,Src,Dst}, Vst) ->
     assign(Src, Dst, Vst);
+valfun_1({swap,RegA,RegB}, Vst0) ->
+    assert_movable(RegA, Vst0),
+    assert_movable(RegB, Vst0),
+
+    %% We don't expect fragile registers to be swapped.
+    %% Therefore, we can conservatively make both registers
+    %% fragile if one of the register is fragile instead of
+    %% swapping the fragility of the registers.
+    Sources = [RegA,RegB],
+    Vst1 = propagate_fragility(RegA, Sources, Vst0),
+    Vst2 = propagate_fragility(RegB, Sources, Vst1),
+
+    %% Swap the value references.
+    VrefA = get_reg_vref(RegA, Vst2),
+    VrefB = get_reg_vref(RegB, Vst2),
+    Vst = set_reg_vref(VrefB, RegA, Vst2),
+    set_reg_vref(VrefA, RegB, Vst);
 valfun_1({fmove,Src,{fr,_}=Dst}, Vst) ->
     assert_type(float, Src, Vst),
     set_freg(Dst, Vst);
@@ -1848,16 +1865,9 @@ get_reg_vref({y,_}=Src, #vst{current=#st{ys=Ys}}) ->
     end.
 
 set_type(Type, #value_ref{}=Ref, #vst{current=#st{vs=Vs0}=St}=Vst) ->
-    case Vs0 of
-        #{ Ref := #value{}=Entry } ->
-            Vs = Vs0#{ Ref => Entry#value{type=Type} },
-            Vst#vst{current=St#st{vs=Vs}};
-        #{} ->
-            %% Dead references may happen during type inference and are not an
-            %% error in and of themselves. If a problem were to arise from this
-            %% it'll explode elsewhere.
-            Vst
-    end.
+    #{ Ref := #value{}=Entry } = Vs0,
+    Vs = Vs0#{ Ref => Entry#value{type=Type} },
+    Vst#vst{current=St#st{vs=Vs}}.
 
 new_value(Type, Op, Ss, #vst{current=#st{vs=Vs0}=St,ref_ctr=Counter}=Vst) ->
     Ref = #value_ref{id=Counter},
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 28db8986ff..e5e63341b7 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -265,7 +265,9 @@ expand_opt(r19, Os) ->
 expand_opt(r20, Os) ->
     expand_opt_before_21(Os);
 expand_opt(r21, Os) ->
-    [no_put_tuple2 | expand_opt(no_bsm3, Os)];
+    [no_swap, no_put_tuple2 | expand_opt(no_bsm3, Os)];
+expand_opt(r22, Os) ->
+    [no_swap | Os];
 expand_opt({debug_info_key,_}=O, Os) ->
     [encrypt_debug_info,O|Os];
 expand_opt(no_type_opt, Os) ->
@@ -275,7 +277,7 @@ expand_opt(no_type_opt, Os) ->
 expand_opt(O, Os) -> [O|Os].
 
 expand_opt_before_21(Os) ->
-    [no_put_tuple2, no_get_hd_tl, no_ssa_opt_record,
+    [no_swap, no_put_tuple2, no_get_hd_tl, no_ssa_opt_record,
      no_utf8_atoms | expand_opt(no_bsm3, Os)].
 
 %% format_error(ErrorDescriptor) -> string()
@@ -860,8 +862,6 @@ asm_passes() ->
        {unless,no_postopt,
 	[{pass,beam_block},
 	 {iff,dblk,{listing,"block"}},
-	 {unless,no_except,{pass,beam_except}},
-	 {iff,dexcept,{listing,"except"}},
 	 {unless,no_jopt,{pass,beam_jump}},
 	 {iff,djmp,{listing,"jump"}},
 	 {unless,no_peep_opt,{pass,beam_peep}},
@@ -2095,7 +2095,6 @@ pre_load() ->
 	 beam_block,
 	 beam_clean,
 	 beam_dict,
-	 beam_except,
 	 beam_flatten,
 	 beam_jump,
 	 beam_kernel_to_ssa,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index a086a3a8d3..9dc3b6e339 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -27,7 +27,6 @@
 	     beam_clean,
 	     beam_dict,
 	     beam_disasm,
-	     beam_except,
 	     beam_flatten,
 	     beam_jump,
              beam_kernel_to_ssa,
diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab
index 86590fad87..03507bafb3 100755
--- a/lib/compiler/src/genop.tab
+++ b/lib/compiler/src/genop.tab
@@ -596,3 +596,7 @@ BEAM_FORMAT_NUMBER=0
 ## @spec bs_set_positon Ctx Pos
 ## @doc  Sets the current position of Ctx to Pos
 168: bs_set_position/2
+
+## @spec swap Register1 Register2
+## @doc  Swaps the contents of two registers.
+169: swap/2
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index 4939a94a92..63c67639d4 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -99,10 +99,6 @@
               t=#{} :: map(),                       %Types
               in_guard=false}).                     %In guard or not.
 
--type type_info() :: cerl:cerl() | 'bool' | 'integer' | {'fun', pos_integer()}.
--type yes_no_maybe() :: 'yes' | 'no' | 'maybe'.
--type sub() :: #sub{}.
-
 -spec module(cerl:c_module(), [compile:option()]) ->
 	{'ok', cerl:c_module(), [_]}.
 
@@ -315,10 +311,10 @@ expr(#c_seq{arg=Arg0,body=B0}=Seq0, Ctxt, Sub) ->
 	false ->
 	    %% Arg cannot be "values" here - only a single value
 	    %% make sense here.
-            case {Ctxt,is_safe_simple(Arg, Sub)} of
+            case {Ctxt,is_safe_simple(Arg)} of
                 {effect,true} -> B1;
                 {effect,false} ->
-                    case is_safe_simple(B1, Sub) of
+                    case is_safe_simple(B1) of
                         true -> Arg;
                         false -> Seq0#c_seq{arg=Arg,body=B1}
                     end;
@@ -442,7 +438,7 @@ expr(#c_catch{anno=Anno,body=B}, effect, Sub) ->
 expr(#c_catch{body=B0}=Catch, _, Sub) ->
     %% We can remove catch if the value is simple
     B1 = body(B0, value, Sub),
-    case is_safe_simple(B1, Sub) of
+    case is_safe_simple(B1) of
 	true -> B1;
 	false -> Catch#c_catch{body=B1}
     end;
@@ -458,7 +454,7 @@ expr(#c_try{arg=E0,vars=[#c_var{name=X}],body=#c_var{name=X},
 
 	    %% We can remove try/catch if the expression is an
 	    %% expression that cannot fail.
-	    case is_safe_bool_expr(E2, Sub) orelse is_safe_simple(E2, Sub) of
+	    case is_safe_bool_expr(E2) orelse is_safe_simple(E2) of
 		true -> E2;
 		false -> Try#c_try{arg=E2}
 	    end;
@@ -472,7 +468,7 @@ expr(#c_try{anno=A,arg=E0,vars=Vs0,body=B0,evars=Evs0,handler=H0}=Try, _, Sub0)
     E1 = body(E0, value, Sub0),
     {Vs1,Sub1} = var_list(Vs0, Sub0),
     B1 = body(B0, value, Sub1),
-    case is_safe_simple(E1, Sub0) of
+    case is_safe_simple(E1) of
 	true ->
 	    expr(#c_let{anno=A,vars=Vs1,arg=E1,body=B1}, value, Sub0);
 	false ->
@@ -602,20 +598,20 @@ is_literal_fun(_) -> false.
 %%  Currently, we don't attempt to check binaries because they
 %%  are difficult to check.
 
-is_safe_simple(#c_var{}=Var, _) ->
+is_safe_simple(#c_var{}=Var) ->
     not cerl:is_c_fname(Var);
-is_safe_simple(#c_cons{hd=H,tl=T}, Sub) ->
-    is_safe_simple(H, Sub) andalso is_safe_simple(T, Sub);
-is_safe_simple(#c_tuple{es=Es}, Sub) -> is_safe_simple_list(Es, Sub);
-is_safe_simple(#c_literal{}, _) -> true;
+is_safe_simple(#c_cons{hd=H,tl=T}) ->
+    is_safe_simple(H) andalso is_safe_simple(T);
+is_safe_simple(#c_tuple{es=Es}) -> is_safe_simple_list(Es);
+is_safe_simple(#c_literal{}) -> true;
 is_safe_simple(#c_call{module=#c_literal{val=erlang},
 		       name=#c_literal{val=Name},
-		       args=Args}, Sub) when is_atom(Name) ->
+		       args=Args}) when is_atom(Name) ->
     NumArgs = length(Args),
     case erl_internal:bool_op(Name, NumArgs) of
 	true ->
 	    %% Boolean operators are safe if the arguments are boolean.
-	    all(fun(C) -> is_boolean_type(C, Sub) =:= yes end, Args);
+	    all(fun is_bool_expr/1, Args);
 	false ->
 	    %% We need a rather complicated test to ensure that
 	    %% we only allow safe calls that are allowed in a guard.
@@ -624,9 +620,9 @@ is_safe_simple(#c_call{module=#c_literal{val=erlang},
 		      (erl_internal:comp_op(Name, NumArgs) orelse
 		       erl_internal:new_type_test(Name, NumArgs))
     end;
-is_safe_simple(_, _) -> false.
+is_safe_simple(_) -> false.
 
-is_safe_simple_list(Es, Sub) -> all(fun(E) -> is_safe_simple(E, Sub) end, Es).
+is_safe_simple_list(Es) -> all(fun(E) -> is_safe_simple(E) end, Es).
 
 %% will_fail(Expr) -> true|false.
 %%  Determine whether the expression will fail with an exception.
@@ -853,7 +849,7 @@ useless_call(_, _) -> no.
 %%  Anything that will not have any effect will be thrown away.
 
 make_effect_seq([H|T], Sub) ->
-    case is_safe_simple(H, Sub) of
+    case is_safe_simple(H) of
 	true -> make_effect_seq(T, Sub);
 	false -> #c_seq{arg=H,body=make_effect_seq(T, Sub)}
     end;
@@ -959,138 +955,14 @@ fold_lit_args(Call, Module, Name, Args0) ->
 %%  Attempt to evaluate some pure BIF calls with one or more
 %%  non-literals arguments.
 %%
-fold_non_lit_args(Call, erlang, is_boolean, [Arg], Sub) ->
-    eval_is_boolean(Call, Arg, Sub);
 fold_non_lit_args(Call, erlang, length, [Arg], _) ->
     eval_length(Call, Arg);
 fold_non_lit_args(Call, erlang, '++', [Arg1,Arg2], _) ->
     eval_append(Call, Arg1, Arg2);
 fold_non_lit_args(Call, lists, append, [Arg1,Arg2], _) ->
     eval_append(Call, Arg1, Arg2);
-fold_non_lit_args(Call, erlang, is_function, [Arg1], Sub) ->
-    eval_is_function_1(Call, Arg1, Sub);
-fold_non_lit_args(Call, erlang, is_function, [Arg1,Arg2], Sub) ->
-    eval_is_function_2(Call, Arg1, Arg2, Sub);
-fold_non_lit_args(Call, erlang, N, Args, Sub) ->
-    NumArgs = length(Args),
-    case erl_internal:comp_op(N, NumArgs) of
-	true ->
-	    eval_rel_op(Call, N, Args, Sub);
-	false ->
-	    case erl_internal:bool_op(N, NumArgs) of
-		true ->
-		    eval_bool_op(Call, N, Args, Sub);
-		false ->
-		    Call
-	    end
-    end;
 fold_non_lit_args(Call, _, _, _, _) -> Call.
 
-eval_is_function_1(Call, Arg1, Sub) ->
-    case get_type(Arg1, Sub) of
-        none -> Call;
-        {'fun',_} -> #c_literal{anno=cerl:get_ann(Call),val=true};
-        _ -> #c_literal{anno=cerl:get_ann(Call),val=false}
-    end.
-
-eval_is_function_2(Call, Arg1, #c_literal{val=Arity}, Sub)
-  when is_integer(Arity), Arity > 0 ->
-    case get_type(Arg1, Sub) of
-        none -> Call;
-        {'fun',Arity} -> #c_literal{anno=cerl:get_ann(Call),val=true};
-        _ -> #c_literal{anno=cerl:get_ann(Call),val=false}
-    end;
-eval_is_function_2(Call, _Arg1, _Arg2, _Sub) -> Call.
-
-%% Evaluate a relational operation using type information.
-eval_rel_op(Call, Op, [#c_var{name=V},#c_var{name=V}], _) ->
-    Bool = erlang:Op(same, same),
-    #c_literal{anno=cerl:get_ann(Call),val=Bool};
-eval_rel_op(Call, '=:=', [Term,#c_literal{val=true}], Sub) ->
-    %% BoolVar =:= true  ==>  BoolVar
-    case is_boolean_type(Term, Sub) of
-	yes -> Term;
-	maybe -> Call;
-	no -> #c_literal{val=false}
-    end;
-eval_rel_op(Call, '==', Ops, Sub) ->
-    case is_exact_eq_ok(Ops, Sub) of
-	true ->
-	    Name = #c_literal{anno=cerl:get_ann(Call),val='=:='},
-	    Call#c_call{name=Name};
-	false ->
-	    Call
-    end;
-eval_rel_op(Call, '/=', Ops, Sub) ->
-    case is_exact_eq_ok(Ops, Sub) of
-	true ->
-	    Name = #c_literal{anno=cerl:get_ann(Call),val='=/='},
-	    Call#c_call{name=Name};
-	false ->
-	    Call
-    end;
-eval_rel_op(Call, _, _, _) -> Call.
-
-is_exact_eq_ok([A,B]=L, Sub) ->
-    case is_int_type(A, Sub) =:= yes andalso is_int_type(B, Sub) =:= yes of
-	true -> true;
-	false -> is_exact_eq_ok_1(L)
-    end.
-
-is_exact_eq_ok_1([#c_literal{val=Lit}|_]) ->
-    is_non_numeric(Lit);
-is_exact_eq_ok_1([_|T]) ->
-    is_exact_eq_ok_1(T);
-is_exact_eq_ok_1([]) -> false.
-
-is_non_numeric([H|T]) ->
-    is_non_numeric(H) andalso is_non_numeric(T);
-is_non_numeric(Tuple) when is_tuple(Tuple) ->
-    is_non_numeric_tuple(Tuple, tuple_size(Tuple));
-is_non_numeric(Map) when is_map(Map) ->
-    %% Note that 17.x and 18.x compare keys in different ways.
-    %% Be very conservative -- require that both keys and values
-    %% are non-numeric.
-    is_non_numeric(maps:to_list(Map));
-is_non_numeric(Num) when is_number(Num) ->
-    false;
-is_non_numeric(_) -> true.
-
-is_non_numeric_tuple(Tuple, El) when El >= 1 ->
-    is_non_numeric(element(El, Tuple)) andalso
-	is_non_numeric_tuple(Tuple, El-1);
-is_non_numeric_tuple(_Tuple, 0) -> true.
-
-%% Evaluate a bool op using type information. We KNOW that
-%% there must be at least one non-literal argument (i.e.
-%% there is no need to handle the case that all argments
-%% are literal).
-
-eval_bool_op(Call, 'and', [#c_literal{val=true},Term], Sub) ->
-    eval_bool_op_1(Call, Term, Term, Sub);
-eval_bool_op(Call, 'and', [Term,#c_literal{val=true}], Sub) ->
-    eval_bool_op_1(Call, Term, Term, Sub);
-eval_bool_op(Call, 'and', [#c_literal{val=false}=Res,Term], Sub) ->
-    eval_bool_op_1(Call, Res, Term, Sub);
-eval_bool_op(Call, 'and', [Term,#c_literal{val=false}=Res], Sub) ->
-    eval_bool_op_1(Call, Res, Term, Sub);
-eval_bool_op(Call, _, _, _) -> Call.
-
-eval_bool_op_1(Call, Res, Term, Sub) ->
-    case is_boolean_type(Term, Sub) of
-	yes -> Res;
-	no -> eval_failure(Call, badarg);
-	maybe -> Call
-    end.
-
-%% Evaluate is_boolean/1 using type information.
-eval_is_boolean(Call, Term, Sub) ->
-    case is_boolean_type(Term, Sub) of
-	no -> #c_literal{val=false};
-	yes -> #c_literal{val=true};
-	maybe -> Call
-    end.
-
 %% eval_length(Call, List) -> Val.
 %%  Evaluates the length for the prefix of List which has a known
 %%  shape.
@@ -1804,7 +1676,7 @@ opt_bool_case_guard(#c_case{arg=#c_literal{}}=Case) ->
     %%
     Case;
 opt_bool_case_guard(#c_case{arg=Arg,clauses=Cs0}=Case) ->
-    case is_safe_bool_expr(Arg, sub_new()) of
+    case is_safe_bool_expr(Arg) of
 	false ->
 	    Case;
 	true ->
@@ -1945,7 +1817,7 @@ case_opt_arg(E0, Sub, Cs, LitExpr) ->
 		    {error,Cs};
 		false ->
 		    %% If possible, expand this variable to a previously
-		    %% matched term.
+		    %% constructed tuple
 		    E = case_expand_var(E0, Sub),
 		    case_opt_arg_1(E, Cs, LitExpr)
 	    end
@@ -2004,13 +1876,8 @@ case_opt_compiler_generated(Core) ->
 case_expand_var(E, #sub{t=Tdb}) ->
     Key = cerl:var_name(E),
     case Tdb of
-        #{Key:=T} ->
-	    case cerl:is_c_tuple(T) of
-		false -> E;
-		true -> T
-	    end;
-        _ ->
-	    E
+        #{Key:=T} -> T;
+        _ -> E
     end.
 
 %% case_opt_nomatch(E, Clauses, LitExpr) -> Clauses'
@@ -2302,43 +2169,30 @@ is_simple_case_arg(_) -> false.
 %%  Check whether the Core expression is guaranteed to return
 %%  a boolean IF IT RETURNS AT ALL.
 %%
-is_bool_expr(Core) ->
-    is_bool_expr(Core, sub_new()).
 
-%% is_bool_expr(Core, Sub) -> true|false
-%%  Check whether the Core expression is guaranteed to return
-%%  a boolean IF IT RETURNS AT ALL. Uses type information
-%%  to be able to identify more expressions as booleans.
-%%
 is_bool_expr(#c_call{module=#c_literal{val=erlang},
-		     name=#c_literal{val=Name},args=Args}=Call, _) ->
+		     name=#c_literal{val=Name},args=Args}=Call) ->
     NumArgs = length(Args),
     erl_internal:comp_op(Name, NumArgs) orelse
 	erl_internal:new_type_test(Name, NumArgs) orelse
         erl_internal:bool_op(Name, NumArgs) orelse
 	will_fail(Call);
 is_bool_expr(#c_try{arg=E,vars=[#c_var{name=X}],body=#c_var{name=X},
-		   handler=#c_literal{val=false}}, Sub) ->
-    is_bool_expr(E, Sub);
-is_bool_expr(#c_case{clauses=Cs}, Sub) ->
-    is_bool_expr_list(Cs, Sub);
-is_bool_expr(#c_clause{body=B}, Sub) ->
-    is_bool_expr(B, Sub);
-is_bool_expr(#c_let{vars=[V],arg=Arg,body=B}, Sub0) ->
-    Sub = case is_bool_expr(Arg, Sub0) of
-	      true -> update_types(V, [bool], Sub0);
-	      false -> Sub0
-	  end,
-    is_bool_expr(B, Sub);
-is_bool_expr(#c_let{body=B}, Sub) ->
-    %% Binding of multiple variables.
-    is_bool_expr(B, Sub);
-is_bool_expr(C, Sub) ->
-    is_boolean_type(C, Sub) =:= yes.
-
-is_bool_expr_list([C|Cs], Sub) ->
-    is_bool_expr(C, Sub) andalso is_bool_expr_list(Cs, Sub);
-is_bool_expr_list([], _) -> true.
+		   handler=#c_literal{val=false}}) ->
+    is_bool_expr(E);
+is_bool_expr(#c_case{clauses=Cs}) ->
+    is_bool_expr_list(Cs);
+is_bool_expr(#c_clause{body=B}) ->
+    is_bool_expr(B);
+is_bool_expr(#c_let{body=B}) ->
+    is_bool_expr(B);
+is_bool_expr(#c_literal{val=Val}) ->
+    is_boolean(Val);
+is_bool_expr(_) -> false.
+
+is_bool_expr_list([C|Cs]) ->
+    is_bool_expr(C) andalso is_bool_expr_list(Cs);
+is_bool_expr_list([]) -> true.
 
 %% is_safe_bool_expr(Core) -> true|false
 %%  Check whether the Core expression ALWAYS returns a boolean
@@ -2346,17 +2200,17 @@ is_bool_expr_list([], _) -> true.
 %%  is suitable for a guard (no calls to non-guard BIFs, local
 %%  functions, or is_record/2).
 %%
-is_safe_bool_expr(Core, Sub) ->
-    is_safe_bool_expr_1(Core, Sub, cerl_sets:new()).
+is_safe_bool_expr(Core) ->
+    is_safe_bool_expr_1(Core, cerl_sets:new()).
 
 is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
                             name=#c_literal{val=is_record},
                             args=[A,#c_literal{val=Tag},#c_literal{val=Size}]},
-                    Sub, _BoolVars) when is_atom(Tag), is_integer(Size) ->
-    is_safe_simple(A, Sub);
+                    _BoolVars) when is_atom(Tag), is_integer(Size) ->
+    is_safe_simple(A);
 is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
                             name=#c_literal{val=is_record}},
-                    _Sub, _BoolVars) ->
+                    _BoolVars) ->
     %% The is_record/2 BIF is NOT allowed in guards.
     %% The is_record/3 BIF where its second argument is not an atom or its third
     %% is not an integer is NOT allowed in guards.
@@ -2368,49 +2222,49 @@ is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
 is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
                             name=#c_literal{val=is_function},
                             args=[A,#c_literal{val=Arity}]},
-                    Sub, _BoolVars) when is_integer(Arity), Arity >= 0 ->
-    is_safe_simple(A, Sub);
+                    _BoolVars) when is_integer(Arity), Arity >= 0 ->
+    is_safe_simple(A);
 is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
                             name=#c_literal{val=is_function}},
-                    _Sub, _BoolVars) ->
+                    _BoolVars) ->
     false;
 is_safe_bool_expr_1(#c_call{module=#c_literal{val=erlang},
 			    name=#c_literal{val=Name},args=Args},
-		    Sub, BoolVars) ->
+		    BoolVars) ->
     NumArgs = length(Args),
     case (erl_internal:comp_op(Name, NumArgs) orelse
 	  erl_internal:new_type_test(Name, NumArgs)) andalso
-	is_safe_simple_list(Args, Sub) of
+	is_safe_simple_list(Args) of
 	true ->
 	    true;
 	false ->
 	    %% Boolean operators are safe if all arguments are boolean.
 	    erl_internal:bool_op(Name, NumArgs) andalso
-		is_safe_bool_expr_list(Args, Sub, BoolVars)
+		is_safe_bool_expr_list(Args, BoolVars)
     end;
-is_safe_bool_expr_1(#c_let{vars=Vars,arg=Arg,body=B}, Sub, BoolVars) ->
-    case is_safe_simple(Arg, Sub) of
+is_safe_bool_expr_1(#c_let{vars=Vars,arg=Arg,body=B}, BoolVars) ->
+    case is_safe_simple(Arg) of
 	true ->
-	    case {is_safe_bool_expr_1(Arg, Sub, BoolVars),Vars} of
+	    case {is_safe_bool_expr_1(Arg, BoolVars),Vars} of
 		{true,[#c_var{name=V}]} ->
-		    is_safe_bool_expr_1(B, Sub, cerl_sets:add_element(V, BoolVars));
+		    is_safe_bool_expr_1(B, cerl_sets:add_element(V, BoolVars));
 		{false,_} ->
-		    is_safe_bool_expr_1(B, Sub, BoolVars)
+		    is_safe_bool_expr_1(B, BoolVars)
 	    end;
 	false -> false
     end;
-is_safe_bool_expr_1(#c_literal{val=Val}, _Sub, _) ->
+is_safe_bool_expr_1(#c_literal{val=Val}, _BoolVars) ->
     is_boolean(Val);
-is_safe_bool_expr_1(#c_var{name=V}, _Sub, BoolVars) ->
+is_safe_bool_expr_1(#c_var{name=V}, BoolVars) ->
     cerl_sets:is_element(V, BoolVars);
-is_safe_bool_expr_1(_, _, _) -> false.
+is_safe_bool_expr_1(_, _) -> false.
 
-is_safe_bool_expr_list([C|Cs], Sub, BoolVars) ->
-    case is_safe_bool_expr_1(C, Sub, BoolVars) of
-	true -> is_safe_bool_expr_list(Cs, Sub, BoolVars);
+is_safe_bool_expr_list([C|Cs], BoolVars) ->
+    case is_safe_bool_expr_1(C, BoolVars) of
+	true -> is_safe_bool_expr_list(Cs, BoolVars);
 	false -> false
     end;
-is_safe_bool_expr_list([], _, _) -> true.
+is_safe_bool_expr_list([], _) -> true.
 
 %% simplify_let(Let, Sub) -> Expr | impossible
 %%  If the argument part of an let contains a complex expression, such
@@ -2785,7 +2639,7 @@ opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Sub) ->
                     %% with exported variables, but the return value is
                     %% ignored). We can remove the first variable and the
                     %% the first value returned from the 'let' argument.
-                    Arg2 = remove_first_value(Arg1, Sub),
+                    Arg2 = remove_first_value(Arg1),
                     Let1 = Let0#c_let{vars=Vars,arg=Arg2,body=Body},
                     post_opt_let(Let1, Sub);
                 true ->
@@ -2805,36 +2659,36 @@ post_opt_let(Let0, Sub) ->
     opt_build_stacktrace(Let1).
 
 
-%% remove_first_value(Core0, Sub) -> Core.
+%% remove_first_value(Core0) -> Core.
 %%  Core0 is an expression that returns at least two values.
 %%  Remove the first value returned from Core0.
 
-remove_first_value(#c_values{es=[V|Vs]}, Sub) ->
+remove_first_value(#c_values{es=[V|Vs]}) ->
     Values = core_lib:make_values(Vs),
-    case is_safe_simple(V, Sub) of
+    case is_safe_simple(V) of
         false ->
             #c_seq{arg=V,body=Values};
         true ->
             Values
     end;
-remove_first_value(#c_case{clauses=Cs0}=Core, Sub) ->
-    Cs = remove_first_value_cs(Cs0, Sub),
+remove_first_value(#c_case{clauses=Cs0}=Core) ->
+    Cs = remove_first_value_cs(Cs0),
     Core#c_case{clauses=Cs};
-remove_first_value(#c_receive{clauses=Cs0,action=Act0}=Core, Sub) ->
-    Cs = remove_first_value_cs(Cs0, Sub),
-    Act = remove_first_value(Act0, Sub),
+remove_first_value(#c_receive{clauses=Cs0,action=Act0}=Core) ->
+    Cs = remove_first_value_cs(Cs0),
+    Act = remove_first_value(Act0),
     Core#c_receive{clauses=Cs,action=Act};
-remove_first_value(#c_let{body=B}=Core, Sub) ->
-    Core#c_let{body=remove_first_value(B, Sub)};
-remove_first_value(#c_seq{body=B}=Core, Sub) ->
-    Core#c_seq{body=remove_first_value(B, Sub)};
-remove_first_value(#c_primop{}=Core, _Sub) ->
+remove_first_value(#c_let{body=B}=Core) ->
+    Core#c_let{body=remove_first_value(B)};
+remove_first_value(#c_seq{body=B}=Core) ->
+    Core#c_seq{body=remove_first_value(B)};
+remove_first_value(#c_primop{}=Core) ->
     Core;
-remove_first_value(#c_call{}=Core, _Sub) ->
+remove_first_value(#c_call{}=Core) ->
     Core.
 
-remove_first_value_cs(Cs, Sub) ->
-    [C#c_clause{body=remove_first_value(B, Sub)} ||
+remove_first_value_cs(Cs) ->
+    [C#c_clause{body=remove_first_value(B)} ||
         #c_clause{body=B}=C <- Cs].
 
 %% maybe_suppress_warnings(Arg, #c_var{}, PreviousBody) -> Arg'
@@ -2962,54 +2816,6 @@ move_case_into_arg(Expr, _) ->
     Expr.
 
 %%%
-%%% Retrieving information about types.
-%%%
-
--spec get_type(cerl:cerl(), #sub{}) -> type_info() | 'none'.
-
-get_type(#c_var{name=V}, #sub{t=Tdb}) ->
-    case Tdb of
-        #{V:=Type} -> Type;
-        _ -> none
-    end;
-get_type(C, _) ->
-    case cerl:type(C) of
-	binary -> C;
-	map -> C;
-	_ ->
-	    case cerl:is_data(C) of
-		true -> C;
-		false -> none
-	    end
-    end.
-
--spec is_boolean_type(cerl:cerl(), sub()) -> yes_no_maybe().
-
-is_boolean_type(Var, Sub) ->
-    case get_type(Var, Sub) of
-	none ->
-	    maybe;
-	bool ->
-	    yes;
-	C ->
-	    B = cerl:is_c_atom(C) andalso
-		is_boolean(cerl:atom_val(C)),
-	    yes_no(B)
-    end.
-
--spec is_int_type(cerl:cerl(), sub()) -> yes_no_maybe().
-
-is_int_type(Var, Sub) ->
-    case get_type(Var, Sub) of
-	none -> maybe;
-	integer -> yes;
-	C -> yes_no(cerl:is_c_int(C))
-    end.
-
-yes_no(true) -> yes;
-yes_no(false) -> no.
-
-%%%
 %%% Update type information.
 %%%
 
@@ -3020,70 +2826,14 @@ update_let_types(_Vs, _Arg, Sub) ->
     %% that returns multiple values.
     Sub.
 
-update_let_types_1([#c_var{}=V|Vs], [A|As], Sub0) ->
-    Sub = update_types_from_expr(V, A, Sub0),
+update_let_types_1([#c_var{name=V}|Vs], [A|As], Sub0) ->
+    Sub = update_types(V, A, Sub0),
     update_let_types_1(Vs, As, Sub);
 update_let_types_1([], [], Sub) -> Sub.
 
-update_types_from_expr(V, Expr, Sub) ->
-    Type = extract_type(Expr, Sub),
-    update_types(V, [Type], Sub).
-
-extract_type(#c_call{module=#c_literal{val=erlang},
-		     name=#c_literal{val=Name},
-		     args=Args}=Call, Sub) ->
-    case returns_integer(Name, Args) of
-	true -> integer;
-	false -> extract_type_1(Call, Sub)
-    end;
-extract_type(Expr, Sub) ->
-    extract_type_1(Expr, Sub).
-
-extract_type_1(Expr, Sub) ->
-    case is_bool_expr(Expr, Sub) of
-	false -> Expr;
-	true -> bool
-    end.
-
-returns_integer('band', [_,_]) -> true;
-returns_integer('bnot', [_]) -> true;
-returns_integer('bor', [_,_]) -> true;
-returns_integer('bxor', [_,_]) -> true;
-returns_integer(bit_size, [_]) -> true;
-returns_integer('bsl', [_,_]) -> true;
-returns_integer('bsr', [_,_]) -> true;
-returns_integer(byte_size, [_]) -> true;
-returns_integer(ceil, [_]) -> true;
-returns_integer('div', [_,_]) -> true;
-returns_integer(floor, [_]) -> true;
-returns_integer(length, [_]) -> true;
-returns_integer('rem', [_,_]) -> true;
-returns_integer('round', [_]) -> true;
-returns_integer(size, [_]) -> true;
-returns_integer(tuple_size, [_]) -> true;
-returns_integer(trunc, [_]) -> true;
-returns_integer(_, _) -> false.
-
-%% update_types(Expr, Pattern, Sub) -> Sub'
-%%  Update the type database.
-
--spec update_types(cerl:c_var(), [type_info()], sub()) -> sub().
-
-update_types(#c_var{name=V}, Pat, #sub{t=Tdb0}=Sub) ->
-    Tdb = update_types_1(V, Pat, Tdb0),
-    Sub#sub{t=Tdb}.
-
-update_types_1(V, [#c_tuple{}=P], Types) ->
-    Types#{V=>P};
-update_types_1(V, [#c_literal{val=Bool}], Types) when is_boolean(Bool) ->
-    Types#{V=>bool};
-update_types_1(V, [#c_fun{vars=Vars}], Types) ->
-    Types#{V=>{'fun',length(Vars)}};
-update_types_1(V, [#c_var{name={_,Arity}}], Types) ->
-    Types#{V=>{'fun',Arity}};
-update_types_1(V, [Type], Types) when is_atom(Type) ->
-    Types#{V=>Type};
-update_types_1(_, _, Types) -> Types.
+update_types(V, #c_tuple{}=P, #sub{t=Tdb}=Sub) ->
+    Sub#sub{t=Tdb#{V=>P}};
+update_types(_, _, Sub) -> Sub.
 
 %% kill_types(V, Tdb) -> Tdb'
 %%  Kill any entries that references the variable,
@@ -3099,10 +2849,6 @@ kill_types2(V, [{_,#c_tuple{}=Tuple}=Entry|Tdb]) ->
 	false -> [Entry|kill_types2(V, Tdb)];
 	true -> kill_types2(V, Tdb)
     end;
-kill_types2(V, [{_, {'fun',_}}=Entry|Tdb]) ->
-    [Entry|kill_types2(V, Tdb)];
-kill_types2(V, [{_,Atom}=Entry|Tdb]) when is_atom(Atom) ->
-    [Entry|kill_types2(V, Tdb)];
 kill_types2(_, []) -> [].
 
 %% copy_type(DestVar, SrcVar, Tdb) -> Tdb'
diff --git a/lib/compiler/test/beam_ssa_SUITE.erl b/lib/compiler/test/beam_ssa_SUITE.erl
index a741ebbdf9..96cc846799 100644
--- a/lib/compiler/test/beam_ssa_SUITE.erl
+++ b/lib/compiler/test/beam_ssa_SUITE.erl
@@ -346,48 +346,8 @@ cover_ssa_dead(_Config) ->
     40.0 = percentage(4.0, 10.0),
     60.0 = percentage(6, 10),
 
-    %% Cover '=:=', followed by '=/='.
-    false = 'cover__=:=__=/='(41),
-    true = 'cover__=:=__=/='(42),
-    false = 'cover__=:=__=/='(43),
-
-    %% Cover '<', followed by '=/='.
-    true = 'cover__<__=/='(41),
-    false = 'cover__<__=/='(42),
-    false = 'cover__<__=/='(43),
-
-    %% Cover '=<', followed by '=/='.
-    true = 'cover__=<__=/='(41),
-    true = 'cover__=<__=/='(42),
-    false = 'cover__=<__=/='(43),
-
-    %% Cover '>=', followed by '=/='.
-    false = 'cover__>=__=/='(41),
-    true = 'cover__>=__=/='(42),
-    true = 'cover__>=__=/='(43),
-
-    %% Cover '>', followed by '=/='.
-    false = 'cover__>__=/='(41),
-    false = 'cover__>__=/='(42),
-    true = 'cover__>__=/='(43),
-
     ok.
 
-'cover__=:=__=/='(X) when X =:= 42 -> X =/= 43;
-'cover__=:=__=/='(_) -> false.
-
-'cover__<__=/='(X) when X < 42 -> X =/= 42;
-'cover__<__=/='(_) -> false.
-
-'cover__=<__=/='(X) when X =< 42 -> X =/= 43;
-'cover__=<__=/='(_) -> false.
-
-'cover__>=__=/='(X) when X >= 42 -> X =/= 41;
-'cover__>=__=/='(_) -> false.
-
-'cover__>__=/='(X) when X > 42 -> X =/= 42;
-'cover__>__=/='(_) -> false.
-
 format_str(Str, FormatData, IoList, EscChars) ->
     Escapable = FormatData =:= escapable,
     case id(Str) of
diff --git a/lib/compiler/test/compile_SUITE.erl b/lib/compiler/test/compile_SUITE.erl
index 53627b9d81..7e9e641478 100644
--- a/lib/compiler/test/compile_SUITE.erl
+++ b/lib/compiler/test/compile_SUITE.erl
@@ -378,7 +378,6 @@ do_file_listings(DataDir, PrivDir, [File|Files]) ->
             {dprecg, ".precodegen"},
             {dcg, ".codegen"},
             {dblk, ".block"},
-            {dexcept, ".except"},
             {djmp, ".jump"},
             {dclean, ".clean"},
             {dpeep, ".peep"},
@@ -1411,8 +1410,13 @@ bc_options(Config) ->
          {158, small_maps, [r20]},
          {158, small_maps, [r21]},
 
+         {164, small_maps, [r22]},
+         {164, big, [r22]},
          {164, small_maps, []},
-         {164, big, []}
+         {164, big, []},
+
+         {168, small, [r22]},
+         {168, small, []}
         ],
 
     Test = fun({Expected,Mod,Options}) ->
diff --git a/lib/compiler/test/guard_SUITE.erl b/lib/compiler/test/guard_SUITE.erl
index cea7a374cd..d3d62b53f5 100644
--- a/lib/compiler/test/guard_SUITE.erl
+++ b/lib/compiler/test/guard_SUITE.erl
@@ -19,7 +19,7 @@
 %%
 -module(guard_SUITE).
 
--include_lib("common_test/include/ct.hrl").
+-include_lib("syntax_tools/include/merl.hrl").
 
 -export([all/0, suite/0,groups/0,init_per_suite/1, end_per_suite/1, 
 	 init_per_group/2,end_per_group/2,
@@ -31,7 +31,8 @@
 	 old_guard_tests/1,complex_guard/1,
 	 build_in_guard/1,gbif/1,
 	 t_is_boolean/1,is_function_2/1,
-	 tricky/1,rel_ops/1,rel_op_combinations/1,literal_type_tests/1,
+	 tricky/1,rel_ops/1,rel_op_combinations/1,
+         generated_combinations/1,literal_type_tests/1,
 	 basic_andalso_orelse/1,traverse_dcd/1,
 	 check_qlc_hrl/1,andalso_semi/1,t_tuple_size/1,binary_part/1,
 	 bad_constants/1,bad_guards/1,
@@ -51,7 +52,7 @@ groups() ->
        more_xor_guards,build_in_guard,
        old_guard_tests,complex_guard,gbif,
        t_is_boolean,is_function_2,tricky,
-       rel_ops,rel_op_combinations,
+       rel_ops,rel_op_combinations,generated_combinations,
        literal_type_tests,basic_andalso_orelse,traverse_dcd,
        check_qlc_hrl,andalso_semi,t_tuple_size,binary_part,
        bad_constants,bad_guards,guard_in_catch,beam_bool_SUITE,
@@ -1579,6 +1580,122 @@ redundant_12(X) when X >= 50, X =< 80 -> 2*X;
 redundant_12(X) when X < 51 -> 5*X;
 redundant_12(_) -> none.
 
+generated_combinations(Config) ->
+    case ?MODULE of
+	guard_SUITE -> generated_combinations_1(Config);
+	_ -> {skip,"Enough to run this case once."}
+    end.
+
+%% Exhaustively test all combinations of relational operators
+%% to ensure the correctness of the optimizations in beam_ssa_dead.
+
+generated_combinations_1(Config) ->
+    Mod = ?FUNCTION_NAME,
+    RelOps = ['=:=','=/=','==','/=','<','=<','>=','>'],
+    Combinations0 = [{Op1,Op2} || Op1 <- RelOps, Op2 <- RelOps],
+    Combinations1 = gen_lit_combs(Combinations0),
+    Combinations2 = [{neq,Comb} ||
+                        {_Op1,_Lit1,Op2,_Lit2}=Comb <- Combinations1,
+                        Op2 =:= '=/=' orelse Op2 =:= '/='] ++ Combinations1,
+    Combinations = gen_func_names(Combinations2, 0),
+    Fs = gen_rel_op_functions(Combinations),
+    Tree = ?Q(["-module('@Mod@').",
+               "-compile([export_all,nowarn_export_all])."]) ++ Fs,
+    %%merl:print(Tree),
+    Opts = test_lib:opt_opts(?MODULE),
+    {ok,_Bin} = merl:compile_and_load(Tree, Opts),
+    test_combinations(Combinations, Mod).
+
+gen_lit_combs([{Op1,Op2}|T]) ->
+    [{Op1,7,Op2,6},
+     {Op1,7.0,Op2,6},
+     {Op1,7,Op2,6.0},
+     {Op1,7.0,Op2,6.0},
+
+     {Op1,7,Op2,7},
+     {Op1,7.0,Op2,7},
+     {Op1,7,Op2,7.0},
+     {Op1,7.0,Op2,7.0},
+
+     {Op1,6,Op2,7},
+     {Op1,6.0,Op2,7},
+     {Op1,6,Op2,7.0},
+     {Op1,6.0,Op2,7.0}|gen_lit_combs(T)];
+gen_lit_combs([]) -> [].
+
+gen_func_names([E|Es], I) ->
+    Name = list_to_atom("f" ++ integer_to_list(I)),
+    [{Name,E}|gen_func_names(Es, I+1)];
+gen_func_names([], _) -> [].
+
+gen_rel_op_functions([{Name,{neq,{Op1,Lit1,Op2,Lit2}}}|T]) ->
+    %% Note that in the translation to SSA, '=/=' will be
+    %% translated to '=:=' in a guard (with switched success
+    %% and failure labels). Therefore, to test the optimization,
+    %% we must use '=/=' (or '/=') in a body context.
+    %%
+    %% Here is an example of a generated function:
+    %%
+    %%     f160(A) when erlang:'>='(A, 7) ->
+    %%          one;
+    %%     f160(A) ->
+    %%          true = erlang:'/='(A, 7),
+    %%          two.
+    [?Q("'@Name@'(A) when erlang:'@Op1@'(A, _@Lit1@) -> one;
+        '@Name@'(A) -> true = erlang:'@Op2@'(A, _@Lit2@), two. ")|
+     gen_rel_op_functions(T)];
+gen_rel_op_functions([{Name,{Op1,Lit1,Op2,Lit2}}|T]) ->
+    %% Example of a generated function:
+    %%
+    %% f721(A) when erlang:'=<'(A, 7.0) -> one;
+    %% f721(A) when erlang:'<'(A, 6) -> two;
+    %% f721(_) -> three.
+    [?Q("'@Name@'(A) when erlang:'@Op1@'(A, _@Lit1@) -> one;
+        '@Name@'(A) when erlang:'@Op2@'(A, _@Lit2@) -> two;
+        '@Name@'(_) -> three.")|gen_rel_op_functions(T)];
+gen_rel_op_functions([]) -> [].
+
+test_combinations([{Name,E}|T], Mod) ->
+    try
+        test_combinations_1([5,6,7,8,9], E, fun Mod:Name/1),
+        test_combination(6.5, E, fun Mod:Name/1)
+    catch
+        error:Reason:Stk ->
+            io:format("~p: ~p\n", [Name,E]),
+            erlang:raise(error, Reason, Stk)
+    end,
+    test_combinations(T, Mod);
+test_combinations([], _Mod) -> ok.
+
+test_combinations_1([V|Vs], E, Fun) ->
+    test_combination(V, E, Fun),
+    test_combination(float(V), E, Fun),
+    test_combinations_1(Vs, E, Fun);
+test_combinations_1([], _, _) -> ok.
+
+test_combination(Val, {neq,Expr}, Fun) ->
+    Result = eval_combination_expr(Expr, Val),
+    Result = try
+                 Fun(Val)                       %Returns 'one' or 'two'.
+             catch
+                 error:{badmatch,_} ->
+                     three
+             end;
+test_combination(Val, Expr, Fun) ->
+    Result = eval_combination_expr(Expr, Val),
+    Result = Fun(Val).
+
+eval_combination_expr({Op1,Lit1,Op2,Lit2}, Val) ->
+    case erlang:Op1(Val, Lit1) of
+        true ->
+            one;
+        false ->
+            case erlang:Op2(Val, Lit2) of
+                true -> two;
+                false -> three
+            end
+    end.
+
 %% Test type tests on literal values. (From emulator test suites.)
 literal_type_tests(Config) when is_list(Config) ->
     case ?MODULE of
diff --git a/lib/compiler/test/misc_SUITE.erl b/lib/compiler/test/misc_SUITE.erl
index a0b415ceaa..eb60dc049d 100644
--- a/lib/compiler/test/misc_SUITE.erl
+++ b/lib/compiler/test/misc_SUITE.erl
@@ -227,15 +227,6 @@ silly_coverage(Config) when is_list(Config) ->
 		     {label,2}|non_proper_list]}],99},
     expect_error(fun() -> beam_block:module(BlockInput, []) end),
 
-    %% beam_except
-    ExceptInput = {?MODULE,[{foo,0}],[],
-		   [{function,foo,0,2,
-		     [{label,1},
-		      {line,loc},
-		      {func_info,{atom,?MODULE},{atom,foo},0},
-		      {label,2}|non_proper_list]}],99},
-    expect_error(fun() -> beam_except:module(ExceptInput, []) end),
-
     %% beam_jump
     JumpInput = BlockInput,
     expect_error(fun() -> beam_jump:module(JumpInput, []) end),
diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl
index 3348c6e9ea..98210a351c 100644
--- a/lib/compiler/test/test_lib.erl
+++ b/lib/compiler/test/test_lib.erl
@@ -82,6 +82,7 @@ opt_opts(Mod) ->
                      (no_ssa_float) -> true;
                      (no_ssa_opt) -> true;
                      (no_stack_trimming) -> true;
+                     (no_swap) -> true;
                      (no_type_opt) -> true;
                      (_) -> false
                 end, Opts).
diff --git a/lib/hipe/icode/hipe_beam_to_icode.erl b/lib/hipe/icode/hipe_beam_to_icode.erl
index 8e7e56b6c4..42e4ead169 100644
--- a/lib/hipe/icode/hipe_beam_to_icode.erl
+++ b/lib/hipe/icode/hipe_beam_to_icode.erl
@@ -1204,6 +1204,17 @@ trans_fun([{bs_get_position=Name,_,_,_}|_Instructions], _Env) ->
 trans_fun([{bs_set_position=Name,_,_}|_Instructions], _Env) ->
   nyi(Name);
 %%--------------------------------------------------------------------
+%% New instructions added in OTP 23.
+%%--------------------------------------------------------------------
+%%--- swap ---
+trans_fun([{swap,Reg1,Reg2}|Instructions], Env) ->
+  Var1 = mk_var(Reg1),
+  Var2 = mk_var(Reg2),
+  Temp = mk_var(new),
+  [hipe_icode:mk_move(Temp, Var1),
+   hipe_icode:mk_move(Var1, Var2),
+   hipe_icode:mk_move(Var2, Temp) | trans_fun(Instructions, Env)];
+%%--------------------------------------------------------------------
 %%--- ERROR HANDLING ---
 %%--------------------------------------------------------------------
 trans_fun([X|_], _) ->
diff --git a/lib/kernel/doc/src/file.xml b/lib/kernel/doc/src/file.xml
index fc25e83d40..b3e8149cc2 100644
--- a/lib/kernel/doc/src/file.xml
+++ b/lib/kernel/doc/src/file.xml
@@ -939,6 +939,10 @@ f.txt:  {person, "kalle", 25}.
             support for POSIX <c>O_SYNC</c> or equivalent, use of the <c>sync</c>
 	    flag causes <c>open</c> to return <c>{error, enotsup}</c>.</p>
           </item>
+          <tag><c>directory</c></tag>
+          <item>
+            <p>Allows <c>open</c> to work on directories.</p>
+          </item>
         </taglist>
         <p>Returns:</p>
         <taglist>
@@ -985,8 +989,10 @@ f.txt:  {person, "kalle", 25}.
           </item>
           <tag><c>enotdir</c></tag>
           <item>
-            <p>A component of the filename is not a directory. On some
-              platforms, <c>enoent</c> is returned instead.</p>
+            <p>A component of the filename is not a directory, or the
+              filename itself is not a directory if <c>directory</c>
+              mode was specified. On some platforms, <c>enoent</c> is
+              returned instead.</p>
           </item>
           <tag><c>enospc</c></tag>
           <item>
diff --git a/lib/kernel/doc/src/seq_trace.xml b/lib/kernel/doc/src/seq_trace.xml
index aa29223dd0..aa9067f082 100644
--- a/lib/kernel/doc/src/seq_trace.xml
+++ b/lib/kernel/doc/src/seq_trace.xml
@@ -107,6 +107,12 @@ seq_trace:set_token(OldToken), % activate the trace token again
               enables/disables tracing on message sending. Default is
               <c>false</c>.</p>
           </item>
+          <tag><c>set_token('spawn', <anno>Bool</anno>)</c></tag>
+          <item>
+            <p>A trace token flag (<c>true | false</c>) which
+              enables/disables tracing on process spawning. Default is
+              <c>false</c>.</p>
+          </item>
           <tag><c>set_token('receive', <anno>Bool</anno>)</c></tag>
           <item>
             <p>A trace token flag (<c>true | false</c>) which
@@ -257,7 +263,12 @@ TimeStamp = {Seconds, Milliseconds, Microseconds}
       <tag><c>{send, Serial, From, To, Message}</c></tag>
       <item>
         <p>Used when a process <c>From</c> with its trace token flag
-          <c>print</c> set to <c>true</c> has sent a message.</p>
+          <c>send</c> set to <c>true</c> has sent a message.</p>
+      </item>
+      <tag><c>{spawn, Serial, Parent, Child, _}</c></tag>
+      <item>
+        <p>Used when a process <c>Parent</c> with its trace token flag
+          <c>spawn</c> set to <c>true</c> has spawned a process.</p>
       </item>
       <tag><c>{'receive', Serial, From, To, Message}</c></tag>
       <item>
@@ -295,8 +306,8 @@ TimeStamp = {Seconds, Milliseconds, Microseconds}
       is initiated by a single message. In short, it works as follows:</p>
     <p>Each process has a <em>trace token</em>, which can be empty or
       not empty. When not empty, the trace token can be seen as
-      the tuple <c>{Label, Flags, Serial, From}</c>. The trace token is
-      passed invisibly with each message.</p>
+      the tuple <c>{Label, Flags, Serial, From}</c>. The trace token is passed
+      invisibly to spawned processes and with each message sent.</p>
     <p>To start a sequential trace, the user must explicitly set
       the trace token in the process that will send the first message
       in a sequence.</p>
@@ -306,9 +317,10 @@ TimeStamp = {Seconds, Milliseconds, Microseconds}
     <p>On each Erlang node, a process can be set as the <em>system tracer</em>.
       This process will receive trace messages each time
       a message with a trace token is sent or received (if the trace
-      token flag <c>send</c> or <c>'receive'</c> is set). The system
-      tracer can then print each trace event, write it to a file, or
-      whatever suitable.</p>
+      token flag <c>send</c> or <c>'receive'</c> is set), and when a process
+      with a non-empty trace token spawns another (if the trace token flag
+      <c>spawn</c> is set). The system tracer can then print each trace event,
+      write it to a file, or whatever suitable.</p>
     <note>
       <p>The system tracer only receives those trace events that
         occur locally within the Erlang node. To get the whole picture
@@ -322,10 +334,9 @@ TimeStamp = {Seconds, Milliseconds, Microseconds}
 
   <section>
     <title>Trace Token</title>
-    <p>Each process has a current trace token. Initially, the token is
-      empty. When the process sends a message to another process, a
-      copy of the current token is sent "invisibly" along with
-      the message.</p>
+    <p>Each process has a current trace token, which is copied from the process
+      that spawned it. When a process sends a message to another process, a
+      copy of the current token is sent "invisibly" along with the message.</p>
     <p>The current token of a process is set in one of the following two
       ways:</p>
     <list type="bulleted">
@@ -354,8 +365,9 @@ TimeStamp = {Seconds, Milliseconds, Microseconds}
     <p>The algorithm for updating <c>Serial</c> can be described as
       follows:</p>
     <p>Let each process have two counters, <c>prev_cnt</c> and
-      <c>curr_cnt</c>, both are set to <c>0</c> when a process is created.
-      The counters are updated at the following occasions:</p>
+      <c>curr_cnt</c>, both are set to <c>0</c> when a process is created
+      outside of a trace sequence. The counters are updated at the following
+      occasions:</p>
     <list type="bulleted">
       <item>
         <p><em>When the process is about to send a message and the trace token
@@ -370,6 +382,16 @@ tcurr := curr_cnt</pre>
           passed along with the message.</p>
       </item>
       <item>
+        <p><em>When the process is about to spawn another process and the trace
+          token is not empty.</em></p>
+        <p>The counters of the parent process are updated in the same way as
+          for send above. The trace token is then passed to the child process,
+          whose counters will be set as follows:</p>
+        <code>
+curr_cnt := tcurr
+prev_cnt := tcurr</code>
+      </item>
+      <item>
         <p><em>When the process calls</em> <c>seq_trace:print(Label, Info)</c>,
           <c>Label</c> <em>matches the label part of the trace token and the
           trace token print flag is <c>true</c>.</em></p>
@@ -487,9 +509,9 @@ tracer() ->
            print_trace(Label,TraceInfo,false);
         {seq_trace,Label,TraceInfo,Ts} ->
            print_trace(Label,TraceInfo,Ts);
-        Other -> ignore
+        _Other -> ignore
     end,
-    tracer().        
+    tracer().
 
 print_trace(Label,TraceInfo,false) ->
     io:format("~p:",[Label]),
@@ -504,8 +526,11 @@ print_trace({'receive',Serial,From,To,Message}) ->
     io:format("~p Received ~p FROM ~p WITH~n~p~n", 
               [To,Serial,From,Message]);
 print_trace({send,Serial,From,To,Message}) ->
-    io:format("~p Sent ~p TO ~p WITH~n~p~n", 
-              [From,Serial,To,Message]).</code>
+    io:format("~p Sent ~p TO ~p WITH~n~p~n",
+              [From,Serial,To,Message]);
+print_trace({spawn,Serial,Parent,Child,_}) ->
+    io:format("~p Spawned ~p AT ~p~n",
+              [Parent,Child,Serial]).</code>
     <p>The code that creates a process that runs this tracer function
       and sets that process as the system tracer can look like this:</p>
     <code type="none">
diff --git a/lib/kernel/src/file.erl b/lib/kernel/src/file.erl
index 1d4e37196c..a0616da670 100644
--- a/lib/kernel/src/file.erl
+++ b/lib/kernel/src/file.erl
@@ -460,7 +460,7 @@ raw_write_file_info(Name, #file_info{} = Info) ->
 -spec open(File, Modes) -> {ok, IoDevice} | {error, Reason} when
       File :: Filename | iodata(),
       Filename :: name_all(),
-      Modes :: [mode() | ram],
+      Modes :: [mode() | ram | directory],
       IoDevice :: io_device(),
       Reason :: posix() | badarg | system_limit.
 
@@ -1143,7 +1143,7 @@ path_script(Path, File, Bs) ->
              {ok, IoDevice, FullName} | {error, Reason} when
       Path :: [Dir :: name_all()],
       Filename :: name_all(),
-      Modes :: [mode()],
+      Modes :: [mode() | directory],
       IoDevice :: io_device(),
       FullName :: filename_all(),
       Reason :: posix() | badarg | system_limit.
diff --git a/lib/kernel/src/seq_trace.erl b/lib/kernel/src/seq_trace.erl
index 4f9d7b3e5c..327754d9ac 100644
--- a/lib/kernel/src/seq_trace.erl
+++ b/lib/kernel/src/seq_trace.erl
@@ -20,12 +20,14 @@
 
 -module(seq_trace).
 
--define(SEQ_TRACE_SEND, 1).       %(1 << 0)
--define(SEQ_TRACE_RECEIVE, 2).    %(1 << 1)
--define(SEQ_TRACE_PRINT, 4).      %(1 << 2)
--define(SEQ_TRACE_NOW_TIMESTAMP, 8). %(1 << 3)
--define(SEQ_TRACE_STRICT_MON_TIMESTAMP, 16). %(1 << 4)
--define(SEQ_TRACE_MON_TIMESTAMP, 32). %(1 << 5)
+%% Don't forget to update seq_trace_SUITE after changing these.
+-define(SEQ_TRACE_SEND, 1).                     %(1 << 0)
+-define(SEQ_TRACE_RECEIVE, 2).                  %(1 << 1)
+-define(SEQ_TRACE_PRINT, 4).                    %(1 << 2)
+-define(SEQ_TRACE_NOW_TIMESTAMP, 8).            %(1 << 3)
+-define(SEQ_TRACE_STRICT_MON_TIMESTAMP, 16).    %(1 << 4)
+-define(SEQ_TRACE_MON_TIMESTAMP, 32).           %(1 << 5)
+-define(SEQ_TRACE_SPAWN, 64).                   %(1 << 6)
 
 -export([set_token/1,
 	 set_token/2,
@@ -39,7 +41,8 @@
 
 %%---------------------------------------------------------------------------
 
--type flag()       :: 'send' | 'receive' | 'print' | 'timestamp' | 'monotonic_timestamp' | 'strict_monotonic_timestamp'.
+-type flag()       :: 'send' | 'spawn' | 'receive' | 'print' | 'timestamp' |
+                      'monotonic_timestamp' | 'strict_monotonic_timestamp'.
 -type component()  :: 'label' | 'serial' | flag().
 -type value()      :: (Label :: term())
                     | {Previous :: non_neg_integer(),
@@ -142,10 +145,11 @@ set_token2([]) ->
 decode_flags(Flags) ->
     Print = (Flags band ?SEQ_TRACE_PRINT) > 0,
     Send = (Flags band ?SEQ_TRACE_SEND) > 0,
+    Spawn = (Flags band ?SEQ_TRACE_SPAWN) > 0,
     Rec = (Flags band ?SEQ_TRACE_RECEIVE) > 0,
     NowTs = (Flags band ?SEQ_TRACE_NOW_TIMESTAMP) > 0,
     StrictMonTs = (Flags band ?SEQ_TRACE_STRICT_MON_TIMESTAMP) > 0,
     MonTs = (Flags band ?SEQ_TRACE_MON_TIMESTAMP) > 0,
-    [{print,Print},{send,Send},{'receive',Rec},{timestamp,NowTs},
+    [{print,Print},{send,Send},{spawn,Spawn},{'receive',Rec},{timestamp,NowTs},
      {strict_monotonic_timestamp, StrictMonTs},
      {monotonic_timestamp, MonTs}].
diff --git a/lib/kernel/test/file_SUITE.erl b/lib/kernel/test/file_SUITE.erl
index 3bc8e6e828..21aaefa654 100644
--- a/lib/kernel/test/file_SUITE.erl
+++ b/lib/kernel/test/file_SUITE.erl
@@ -987,6 +987,14 @@ new_modes(Config) when is_list(Config) ->
 	     ok
      end,
 
+     % open directory
+     {ok, Fd9} = ?FILE_MODULE:open(NewDir, [directory]),
+     ok = ?FILE_MODULE:close(Fd9),
+
+     % open raw directory
+     {ok, Fd10} = ?FILE_MODULE:open(NewDir, [raw, directory]),
+     ok = ?FILE_MODULE:close(Fd10),
+
      [] = flush(),
      ok.
 
@@ -1236,6 +1244,9 @@ open_errors(Config) when is_list(Config) ->
     {error, E4} = ?FILE_MODULE:open(DataDirSlash, [write]),
     {eisdir,eisdir,eisdir,eisdir} = {E1,E2,E3,E4},
 
+    Real = filename:join(DataDir, "realmen.html"),
+    {error, enotdir} = ?FILE_MODULE:open(Real, [directory]),
+
     [] = flush(),
     ok.
 
diff --git a/lib/kernel/test/gen_tcp_api_SUITE.erl b/lib/kernel/test/gen_tcp_api_SUITE.erl
index 1be016444f..00c9dc5ed5 100644
--- a/lib/kernel/test/gen_tcp_api_SUITE.erl
+++ b/lib/kernel/test/gen_tcp_api_SUITE.erl
@@ -594,10 +594,13 @@ unused_ip() ->
     io:format("we = ~p, unused_ip = ~p~n", [Hent, IP]),
     IP.
 
-unused_ip(_, _, _, 255) -> error;
+unused_ip(255, 255, 255, 255) -> error;
+unused_ip(255, B, C, D) -> unused_ip(1, B + 1, C, D);
+unused_ip(A, 255, C, D) -> unused_ip(A, 1, C + 1, D);
+unused_ip(A, B, 255, D) -> unused_ip(A, B, 1, D + 1);
 unused_ip(A, B, C, D) ->
     case inet:gethostbyaddr({A, B, C, D}) of
-	{ok, _} -> unused_ip(A, B, C, D+1);
+	{ok, _} -> unused_ip(A + 1, B, C, D);
 	{error, _} -> {ok, {A, B, C, D}}
     end.
 
diff --git a/lib/kernel/test/seq_trace_SUITE.erl b/lib/kernel/test/seq_trace_SUITE.erl
index 663f910751..09b4365c8c 100644
--- a/lib/kernel/test/seq_trace_SUITE.erl
+++ b/lib/kernel/test/seq_trace_SUITE.erl
@@ -29,7 +29,7 @@
 	 send/1, distributed_send/1, recv/1, distributed_recv/1,
 	 trace_exit/1, distributed_exit/1, call/1, port/1,
 	 match_set_seq_token/1, gc_seq_token/1, label_capability_mismatch/1,
-         send_literal/1]).
+         send_literal/1,inherit_on_spawn/1,spawn_flag/1]).
 
 %% internal exports
 -export([simple_tracer/2, one_time_receiver/0, one_time_receiver/1,
@@ -51,7 +51,8 @@ all() ->
     [token_set_get, tracer_set_get, print, send, send_literal,
      distributed_send, recv, distributed_recv, trace_exit,
      distributed_exit, call, port, match_set_seq_token,
-     gc_seq_token, label_capability_mismatch].
+     gc_seq_token, label_capability_mismatch,
+     inherit_on_spawn, spawn_flag].
 
 groups() -> 
     [].
@@ -81,14 +82,29 @@ token_set_get(Config) when is_list(Config) ->
     do_token_set_get(timestamp),
     do_token_set_get(monotonic_timestamp),
     do_token_set_get(strict_monotonic_timestamp).
-    
+
+-define(SEQ_TRACE_SEND, 1).                     %(1 << 0)
+-define(SEQ_TRACE_RECEIVE, 2).                  %(1 << 1)
+-define(SEQ_TRACE_PRINT, 4).                    %(1 << 2)
+-define(SEQ_TRACE_NOW_TIMESTAMP, 8).            %(1 << 3)
+-define(SEQ_TRACE_STRICT_MON_TIMESTAMP, 16).    %(1 << 4)
+-define(SEQ_TRACE_MON_TIMESTAMP, 32).           %(1 << 5)
+-define(SEQ_TRACE_SPAWN, 64).                   %(1 << 6)
+
 do_token_set_get(TsType) ->
-    io:format("Testing ~p~n", [TsType]),
+    BaseOpts = ?SEQ_TRACE_SEND bor
+               ?SEQ_TRACE_RECEIVE bor
+               ?SEQ_TRACE_PRINT bor
+               ?SEQ_TRACE_SPAWN,
     Flags = case TsType of
-		timestamp -> 15;
-		strict_monotonic_timestamp -> 23;
-		monotonic_timestamp -> 39
-	    end,
+        timestamp ->
+            BaseOpts bor ?SEQ_TRACE_NOW_TIMESTAMP;
+        strict_monotonic_timestamp ->
+            BaseOpts bor ?SEQ_TRACE_STRICT_MON_TIMESTAMP;
+        monotonic_timestamp ->
+            BaseOpts bor ?SEQ_TRACE_MON_TIMESTAMP
+        end,
+    ct:pal("Type ~p, flags = ~p~n", [TsType, Flags]),
     Self = self(),
     seq_trace:reset_trace(),
     %% Test that initial seq_trace is disabled
@@ -100,6 +116,8 @@ do_token_set_get(TsType) ->
     {print,true} = seq_trace:get_token(print),
     false = seq_trace:set_token(send,true),
     {send,true} = seq_trace:get_token(send),
+    false = seq_trace:set_token(spawn,true),
+    {spawn,true} = seq_trace:get_token(spawn),
     false = seq_trace:set_token('receive',true),
     {'receive',true} = seq_trace:get_token('receive'),
     false = seq_trace:set_token(TsType,true),
@@ -462,8 +480,6 @@ call(Config) when is_list(Config) ->
     1 =
 	erlang:trace(Self, true, 
 		     [call, set_on_spawn, {tracer, TrB(pid)}]),
-    Label = 17,
-    seq_trace:set_token(label, Label), % Token enters here!!
     RefB = make_ref(),
     Pid2B = spawn_link(
 		    fun() ->
@@ -477,6 +493,12 @@ call(Config) when is_list(Config) ->
 			    RefB = call_tracee_1(RefB),
 			    Pid2B ! {self(), msg, RefB}
 		    end),
+
+    %% The token is set *AFTER* spawning to make sure we're testing that the
+    %% token follows on send and not that it inherits on spawn.
+    Label = 17,
+    seq_trace:set_token(label, Label),
+
     Pid1B ! {Self, msg, RefB},
     %% The message is passed Self -> Pid1B -> Pid2B -> Self, and the 
     %% seq_trace token follows invisibly. Traced functions are 
@@ -497,6 +519,62 @@ call(Config) when is_list(Config) ->
     seq_trace:reset_trace(),
     ok.
 
+%% The token should follow spawn, just like it follows messages.
+inherit_on_spawn(Config) when is_list(Config) ->
+    seq_trace:reset_trace(),
+    start_tracer(),
+
+    Ref = make_ref(),
+    seq_trace:set_token(label,Ref),
+    set_token_flags([send]),
+
+    Self = self(),
+    Other = spawn(fun() -> Self ! {gurka,Ref} end),
+
+    receive {gurka,Ref} -> ok end,
+    seq_trace:reset_trace(),
+
+    [{Ref,{send,_,Other,Self,{gurka,Ref}}, _Ts}] = stop_tracer(1),
+
+    ok.
+
+spawn_flag(Config) when is_list(Config) ->
+    seq_trace:reset_trace(),
+    start_tracer(),
+
+    Ref = make_ref(),
+    seq_trace:set_token(label,Ref),
+    set_token_flags([spawn]),
+
+    Self = self(),
+
+    {serial,{0,0}} = seq_trace:get_token(serial),
+
+    %% The serial number is bumped on spawning (just like message passing), so
+    %% our child should inherit a counter of 1.
+    ProcessA = spawn(fun() ->
+                             {serial,{0,1}} = seq_trace:get_token(serial),
+                             Self ! {a,Ref}
+                     end),
+    receive {a,Ref} -> ok end,
+
+    {serial,{1,2}} = seq_trace:get_token(serial),
+
+    ProcessB = spawn(fun() ->
+                             {serial,{2,3}} = seq_trace:get_token(serial),
+                             Self ! {b,Ref} 
+                     end),
+    receive {b,Ref} -> ok end,
+
+    {serial,{3,4}} = seq_trace:get_token(serial),
+
+    seq_trace:reset_trace(),
+
+    [{Ref,{spawn,{0,1},Self,ProcessA,[]}, _Ts},
+     {Ref,{spawn,{2,3},Self,ProcessB,[]}, _Ts}] = stop_tracer(2),
+
+    ok.
+
 %% Send trace messages to a port.
 port(Config) when is_list(Config) ->
     lists:foreach(fun (TsType) -> do_port(TsType, Config) end,
@@ -916,7 +994,7 @@ stop_tracer(N) when is_integer(N) ->
 	    receive
 		{tracerlog,Data} ->
 		    Data
-	    after 1000 ->
+	    after 5000 ->
 		    {error,timeout}
 	    end
     end.
diff --git a/lib/observer/test/ttb_SUITE.erl b/lib/observer/test/ttb_SUITE.erl
index 33133dd78d..f8bb2e5eb0 100644
--- a/lib/observer/test/ttb_SUITE.erl
+++ b/lib/observer/test/ttb_SUITE.erl
@@ -658,11 +658,13 @@ seq_trace(Config) when is_list(Config) ->
     ?line ok = ttb:format(
 		 [filename:join(Privdir,atom_to_list(Node)++"-seq_trace")]),
     ?line [{trace_ts,StartProc,call,{?MODULE,seq,[]},{_,_,_}},
-	   {seq_trace,0,{send,{0,1},StartProc,P1Proc,{Start,P2}}},
-	   {seq_trace,0,{send,{1,2},P1Proc,P2Proc,{P1,Start}}},
-	   {seq_trace,0,{send,{2,3},P2Proc,StartProc,{P2,P1}}},
+	   {seq_trace,0,{send,{First, Seq0},StartProc,P1Proc,{Start,P2}}},
+	   {seq_trace,0,{send,{Seq0,  Seq1},P1Proc,P2Proc,{P1,Start}}},
+	   {seq_trace,0,{send,{Seq1,  Last},P2Proc,StartProc,{P2,P1}}},
 	   end_of_trace] = flush(),
-
+    true = First < Seq0,
+    true = Seq0 < Seq1,
+    true = Seq1 < Last,
    %% Additional test for metatrace
     case StartProc of
 	{Start,_,_} -> ok;
diff --git a/lib/os_mon/test/cpu_sup_SUITE.erl b/lib/os_mon/test/cpu_sup_SUITE.erl
index ba28f31f26..3a8346ac44 100644
--- a/lib/os_mon/test/cpu_sup_SUITE.erl
+++ b/lib/os_mon/test/cpu_sup_SUITE.erl
@@ -162,39 +162,54 @@ util_values(Config) when is_list(Config) ->
     Ref = make_ref(),
     Loop = fun (L) -> L(L) end,
     Spinner = fun () ->
-                      Looper = spawn_link(fun () -> Loop(Loop) end),
+                      NrOfProcesses = 100,
+                      Loopers = [spawn_link(fun () -> Loop(Loop) end)
+                                 || _ <- lists:seq(1,NrOfProcesses)],
                       receive after ?SPIN_TIME -> ok end,
-                      unlink(Looper),
-                      exit(Looper, kill),
-                      Tester ! Ref
-              end,
+                      [(fun () ->
+                                unlink(Looper),
+                                exit(Looper, kill),
+                                Tester ! Ref
+                       end)()
+                       || Looper <- Loopers]
 
+              end,
     cpu_sup:util(),
-
-    spawn_link(Spinner),
-    receive Ref -> ok end,
-    HighUtil1 = cpu_sup:util(),
-
     receive after ?SPIN_TIME -> ok end,
-    LowUtil1 = cpu_sup:util(),
+    LowUtil0 = cpu_sup:util(),
+    NrOfProcessors = erlang:system_info(logical_processors_available),
+    case LowUtil0 of
+        U when U > ((100.0 / NrOfProcessors) * 0.5) ->
+            %% We cannot run this test if the system is doing other
+            %% work at the same time as the result will be unreliable
+            {skip, io_lib:format("CPU utilization was too high (~f%)", [LowUtil0])};
+        _ ->
+            cpu_sup:util(),
+            spawn_link(Spinner),
+            receive Ref -> ok end,
+            HighUtil1 = cpu_sup:util(),
 
-    spawn_link(Spinner),
-    receive Ref -> ok end,
-    HighUtil2 = cpu_sup:util(),
+            receive after ?SPIN_TIME -> ok end,
+            LowUtil1 = cpu_sup:util(),
 
-    receive after ?SPIN_TIME -> ok end,
-    LowUtil2 = cpu_sup:util(),
+            spawn_link(Spinner),
+            receive Ref -> ok end,
+            HighUtil2 = cpu_sup:util(),
 
-    Utils = [{high1,HighUtil1}, {low1,LowUtil1},
-             {high2,HighUtil2}, {low2,LowUtil2}],
-    io:format("Utils: ~p~n", [Utils]),
+            receive after ?SPIN_TIME -> ok end,
+            LowUtil2 = cpu_sup:util(),
 
-    false = LowUtil1 > HighUtil1,
-    false = LowUtil1 > HighUtil2,
-    false = LowUtil2 > HighUtil1,
-    false = LowUtil2 > HighUtil2,
+            Utils = [{high1,HighUtil1}, {low1,LowUtil1},
+                     {high2,HighUtil2}, {low2,LowUtil2}],
+            io:format("Utils: ~p~n", [Utils]),
 
-    ok.
+            false = LowUtil1 > HighUtil1,
+            false = LowUtil1 > HighUtil2,
+            false = LowUtil2 > HighUtil1,
+            false = LowUtil2 > HighUtil2,
+
+            ok
+    end.
 
 
 % Outdated
diff --git a/lib/stdlib/test/ets_SUITE_data/visualize_throughput.html b/lib/stdlib/test/ets_SUITE_data/visualize_throughput.html
index 27d6849c60..239877c257 100644
--- a/lib/stdlib/test/ets_SUITE_data/visualize_throughput.html
+++ b/lib/stdlib/test/ets_SUITE_data/visualize_throughput.html
@@ -4,7 +4,7 @@
 <!-- %% -->
 <!-- %% %CopyrightBegin% -->
 <!-- %% -->
-<!-- %% Copyright Ericsson AB and Kjell Winblad 1996-2018. All Rights Reserved. -->
+<!-- %% Copyright Ericsson AB and Kjell Winblad 1996-2019. All Rights Reserved. -->
 <!-- %% -->
 <!-- %% Licensed under the Apache License, Version 2.0 (the "License"); -->
 <!-- %% you may not use this file except in compliance with the License. -->
@@ -44,6 +44,12 @@
     <br>
     <textarea id="dataField" rows="4" cols="50">#bench_data_placeholder</textarea> 
     <br>
+    <input type="checkbox" id="throughputPlot" checked> Include Throughput Plot
+    <br>
+    <input type="checkbox" id="betterThanWorstPlot"> Include % More Throughput Than Worst Plot
+    <br>
+    <input type="checkbox" id="worseThanBestPlot"> Include % Less Throughput Than Best Plot
+    <br>
     <input type="checkbox" id="barPlot"> Bar Plot
     <br>
     <input type="checkbox" id="sameSpacing" checked> Same X Spacing Between Points
@@ -148,10 +154,52 @@
           }
           return data;
       }
+      function toCompareData(dataParam, compareWithWorst) {
+          var data = $.extend(true, [], dataParam);
+          var worstSoFarMap = {};
+          var defaultSoFarValue = compareWithWorst ? Number.MAX_VALUE : Number.MIN_VALUE;
+          function getWorstBestSoFar(x){
+              return worstSoFarMap[x] === undefined ? defaultSoFarValue : worstSoFarMap[x];
+          }
+          function setWorstBestSoFar(x, y){
+              return worstSoFarMap[x] = y;
+          }
+          function lessOrGreaterThan(n1, n2){
+            return compareWithWorst ? n1 < n2 : n1 > n2;
+          } 
+          $.each(data, function(i, allResConfig) {
+            $.each(allResConfig.y, function(index, res) {
+                var xName = allResConfig.x[index];
+                if(lessOrGreaterThan(res, getWorstBestSoFar(xName))){
+                    setWorstBestSoFar(xName, res);                     
+                }
+            });
+          });
+          $.each(data, function(i, allResConfig) {
+            $.each(allResConfig.y, function(index, res) {
+                var xName = allResConfig.x[index];
+                if(compareWithWorst){
+                    allResConfig.y[index] = ((res / getWorstBestSoFar(xName))-1.0) * 100;
+                }else{
+                    allResConfig.y[index] = (1.0 -(res / getWorstBestSoFar(xName))) * 100;
+                }
+            });
+          });
+          return data;
+      }
+      function toBetterThanWorstData(data){
+        return toCompareData(data, true);
+      }
+      function toWorseThanBestData(data){
+        return toCompareData(data, false);
+      }
       function plotGraphs(){
           var insertPlaceholder = $("#insertPlaceholder");
           var sameSpacing = $('#sameSpacing').is(":checked");
           var barPlot = $('#barPlot').is(":checked");
+          var throughputPlot = $('#throughputPlot').is(":checked");
+          var betterThanWorstPlot = $('#betterThanWorstPlot').is(":checked");
+          var worseThanBestPlot = $('#worseThanBestPlot').is(":checked");
           var lines = $("#dataField").val();
           $('.showCheck').each(function() {
               var item = $(this);
@@ -188,42 +236,59 @@
                           plotGraph(lines, sameSpacing, barPlot, prefix));
               }
           }
+          var nrOfGraphs = 0;
+          function plotScenario(name, plotType) {
+              var data = scenarioDataMap[name];
+              var yAxisTitle = undefined;
+              nrOfGraphs = nrOfGraphs + 1;
+              $("<div class='added' id='graph" + nrOfGraphs + "'>")
+                  .insertBefore(insertPlaceholder);
+              $("<button type='button' class='added' id='fullscreenButton" + nrOfGraphs + "'>Fill screen</button>")
+                  .insertBefore(insertPlaceholder);
+              $("<span class='added'><br><hr><br></span>")
+                  .insertBefore(insertPlaceholder);
+              if (plotType === 'throughput') {
+                  yAxisTitle = 'Operations/Second';
+              } else if (plotType === 'better_than_worst') {
+                  yAxisTitle = '% More Throughput Than Worst';
+                  data = toBetterThanWorstData(data);
+              } else {
+                  yAxisTitle = '% Less Throughput Than Best';
+                  data = toWorseThanBestData(data);
+              }
+              var layout = {
+                  title: name,
+                  xaxis: {
+                      title: '# of Processes'
+                  },
+                  yaxis: {
+                      title: yAxisTitle
+                  }
+              };
+              $("#fullscreenButton" + nrOfGraphs).click(
+                  function () {
+                      $('#graph' + nrOfGraphs).replaceWith(
+                          $("<div class='added' id='graph" + nrOfGraphs + "'>"));
+                      layout = $.extend({}, layout, {
+                          width: $(window).width() - 40,
+                          height: $(window).height() - 40
+                      });
+                      Plotly.newPlot('graph' + nrOfGraphs, data, layout);
+                  });
+              Plotly.newPlot('graph' + nrOfGraphs, data, layout);
+          }
           $.each(scenarioList,
-                 function( index, name ) {
-                     var nrOfGraphs = index + 1;
-                     var data = scenarioDataMap[name];
-                     $( "<div class='added' id='graph"+nrOfGraphs+"'>")
-                         .insertBefore( insertPlaceholder );
-                     $( "<button type='button' class='added' id='fullscreenButton"+nrOfGraphs+"'>Fill screen</button>")
-                         .insertBefore( insertPlaceholder );
-                     $( "<span class='added'><br><hr><br></span>")
-                         .insertBefore( insertPlaceholder );
-                     var layout = {
-                         title:name,
-                         xaxis: {
-                             title: '# of Processes'
-                         },
-                         yaxis: {
-                             title: 'Operations/Second'
-                         }
-                         
-                     };
-                     
-                     $("#fullscreenButton"+nrOfGraphs).click(
-                         function(){
-                             $('#graph'+nrOfGraphs).replaceWith(
-                                 $("<div class='added' id='graph"+nrOfGraphs+"'>"));
-                             layout = $.extend({}, layout, {
-                                 width:$(window).width()-40,
-                                 height:$(window).height()-40
-                             });      
-                             Plotly.newPlot('graph'+nrOfGraphs,  data, layout);                         
-                         });
-                     Plotly.newPlot('graph'+nrOfGraphs,  data, layout);
-
-                 });
-          
-          
+              function (index, name) {
+                  if (throughputPlot) {
+                      plotScenario(name, 'throughput');
+                  }
+                  if (betterThanWorstPlot) {
+                      plotScenario(name, 'better_than_worst');
+                  }
+                  if (worseThanBestPlot) {
+                      plotScenario(name, 'worse_than_best');
+                  }
+              });
       }
     $(document).ready(function(){
         $('#renderButton').click(
diff --git a/make/otp_version_tickets_in_merge b/make/otp_version_tickets_in_merge
index 3728402492..e69de29bb2 100644
--- a/make/otp_version_tickets_in_merge
+++ b/make/otp_version_tickets_in_merge
@@ -1,2 +0,0 @@
-OTP-15823
-OTP-15825
diff --git a/scripts/pre-push b/scripts/pre-push
index 71e9fd1e75..670f1c9796 100755
--- a/scripts/pre-push
+++ b/scripts/pre-push
@@ -22,12 +22,12 @@
 #   <local ref> <local sha1> <remote ref> <remote sha1>
 #
 
-NEW_RELEASES="21 20 19 18 17"
+NEW_RELEASES="22 21 20 19 18 17"
 OLD_RELEASES="r16 r15 r14 r13"
 RELEASES="$NEW_RELEASES $OLD_RELEASES"
 
 # First commit on master, not allowed in other branches
-MASTER_ONLY=aea2a053e28a11497796879715be29ab0c3cd1a0
+MASTER_ONLY=f633fe962ea7078c32f8c81d34950c0ebce0f472
 
 # Number of commits and files allowed in one push by this script
 NCOMMITS_MAX=100