45 files changed, 1633 insertions, 1164 deletions
diff --git a/erts/emulator/beam/beam_bif_load.c b/erts/emulator/beam/beam_bif_load.c
index 4c8ee5178a..04b2ed64b7 100644
--- a/erts/emulator/beam/beam_bif_load.c
+++ b/erts/emulator/beam/beam_bif_load.c
@@ -1125,13 +1125,12 @@ check_process_code(Process* rp, Module* modp, int *redsp, int fcalls)
     mod_size = modp->old.code_length;
 
     /*
-     * Check if current instruction or continuation pointer points into module.
+     * Check if the instruction pointer points into module.
      */
-    if (ErtsInArea(rp->i, mod_start, mod_size)
-	|| ErtsInArea(rp->cp, mod_start, mod_size)) {
+    if (ErtsInArea(rp->i, mod_start, mod_size)) {
 	return am_true;
     }
- 
+
     *redsp += 1;
 
     if (erts_check_nif_export_in_area(rp, mod_start, mod_size))
diff --git a/erts/emulator/beam/beam_bp.c b/erts/emulator/beam/beam_bp.c
index 0832b3f374..10940072ae 100644
--- a/erts/emulator/beam/beam_bp.c
+++ b/erts/emulator/beam/beam_bp.c
@@ -642,46 +642,35 @@ erts_clear_export_break(Module* modp, ErtsCodeInfo *ci)
 }
 
 /*
- * If c_p->cp is a trace return instruction, we set cp
- * to be the place where we again start to execute code.
+ * If the topmost continuation pointer on the stack is a trace return
+ * instruction, we modify it to be the place where we again start to
+ * execute code.
  *
- * cp is used by match spec {caller} to get the calling
- * function, and if we don't do this fixup it will be
- * 'undefined'. This has the odd side effect of {caller}
- * not really being which function is the caller, but
- * rather which function we are about to return to.
+ * This continuation pointer is used by match spec {caller} to get the
+ * calling function, and if we don't do this fixup it will be
+ * 'undefined'. This has the odd side effect of {caller} not really
+ * being the function which is the caller, but rather the function
+ * which we are about to return to.
  */
 static void fixup_cp_before_trace(Process *c_p, int *return_to_trace)
 {
-    Eterm *cpp, *E = c_p->stop;
-    BeamInstr w = *c_p->cp;
-    if (BeamIsOpCode(w, op_return_trace)) {
-        cpp = &E[2];
-    } else if (BeamIsOpCode(w, op_i_return_to_trace)) {
-        *return_to_trace = 1;
-        cpp = &E[0];
-    } else if (BeamIsOpCode(w, op_i_return_time_trace)) {
-        cpp = &E[0];
-    } else {
-        cpp = NULL;
-    }
-    if (cpp) {
-        for (;;) {
-            BeamInstr w = *cp_val(*cpp);
-            if (BeamIsOpCode(w, op_return_trace)) {
-                cpp += 3;
-            } else if (BeamIsOpCode(w, op_i_return_to_trace)) {
-                *return_to_trace = 1;
-                cpp += 1;
-            } else if (BeamIsOpCode(w, op_i_return_time_trace)) {
-                cpp += 2;
-            } else {
-                break;
-            }
+    Eterm *cpp = c_p->stop;
+
+    for (;;) {
+        BeamInstr w = *cp_val(*cpp);
+        if (BeamIsOpCode(w, op_return_trace)) {
+            cpp += 3;
+        } else if (BeamIsOpCode(w, op_i_return_to_trace)) {
+            *return_to_trace = 1;
+            cpp += 1;
+        } else if (BeamIsOpCode(w, op_i_return_time_trace)) {
+            cpp += 2;
+        } else {
+            break;
         }
-        c_p->cp = (BeamInstr *) cp_val(*cpp);
-        ASSERT(is_CP(*cpp));
     }
+    c_p->stop[0] = (Eterm) cp_val(*cpp);
+    ASSERT(is_CP(*cpp));
 }
 
 BeamInstr
@@ -743,12 +732,13 @@ erts_generic_breakpoint(Process* c_p, ErtsCodeInfo *info, Eterm* reg)
 
     if (bp_flags & ERTS_BPF_TIME_TRACE_ACTIVE) {
 	Eterm w;
+        Eterm* E;
 	erts_trace_time_call(c_p, info, bp->time);
-	w = (BeamInstr) *c_p->cp;
+        E = c_p->stop;
+        w = (BeamInstr) E[0];
 	if (! (BeamIsOpCode(w, op_i_return_time_trace) ||
 	       BeamIsOpCode(w, op_return_trace) ||
                BeamIsOpCode(w, op_i_return_to_trace)) ) {
-	    Eterm* E = c_p->stop;
 	    ASSERT(c_p->htop <= E && E <= c_p->hend);
 	    if (E - 2 < c_p->htop) {
 		(void) erts_garbage_collect(c_p, 2, reg, info->mfa.arity);
@@ -759,9 +749,8 @@ erts_generic_breakpoint(Process* c_p, ErtsCodeInfo *info, Eterm* reg)
 	    ASSERT(c_p->htop <= E && E <= c_p->hend);
 
 	    E -= 2;
-	    E[0] = make_cp(erts_codeinfo_to_code(info));
-	    E[1] = make_cp(c_p->cp);     /* original return address */
-	    c_p->cp = beam_return_time_trace;
+	    E[1] = make_cp(erts_codeinfo_to_code(info));
+	    E[0] = (Eterm) beam_return_time_trace;
 	    c_p->stop = E;
 	}
     }
@@ -790,7 +779,7 @@ erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr* I)
     int applying = (I == ep->beam); /* Yup, the apply code for a bif
                                       * is actually in the
                                       * export entry */
-    BeamInstr *cp = p->cp;
+    BeamInstr* cp = (BeamInstr *) p->stop[0];
     GenericBp* g;
     GenericBpData* bp = NULL;
     Uint bp_flags = 0;
@@ -809,7 +798,7 @@ erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr* I)
      * but it is correct during apply of bif.
      */
     if (!applying) {
-	p->cp = I;
+        p->stop[0] = (Eterm) I;
     } else {
         fixup_cp_before_trace(p, &return_to_trace);
     }
@@ -846,7 +835,7 @@ erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr* I)
     }
 
     /* Restore original continuation pointer (if changed). */
-    p->cp = cp;
+    p->stop[0] = (Eterm) cp;
 
     func = bif_table[bif_index].f;
 
@@ -854,7 +843,7 @@ erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr* I)
 
     if (erts_nif_export_check_save_trace(p, result,
 					 applying, ep,
-					 cp, flags,
+					 flags,
 					 flags_meta, I,
 					 meta_tracer)) {
 	/*
@@ -865,24 +854,31 @@ erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr* I)
 	return result;
     }
 
-    return erts_bif_trace_epilogue(p, result, applying, ep, cp,
+    return erts_bif_trace_epilogue(p, result, applying, ep,
 				   flags, flags_meta, I,
 				   meta_tracer);
 }
 
 Eterm
 erts_bif_trace_epilogue(Process *p, Eterm result, int applying,
-			Export* ep, BeamInstr *cp, Uint32 flags,
+			Export* ep, Uint32 flags,
 			Uint32 flags_meta, BeamInstr* I,
 			ErtsTracer meta_tracer)
 {
+    BeamInstr *cp = NULL;
+
     if (applying && (flags & MATCH_SET_RETURN_TO_TRACE)) {
 	BeamInstr i_return_trace      = beam_return_trace[0];
 	BeamInstr i_return_to_trace   = beam_return_to_trace[0];
 	BeamInstr i_return_time_trace = beam_return_time_trace[0];
 	Eterm *cpp;
+
 	/* Maybe advance cp to skip trace stack frames */
-	for (cpp = p->stop;  ;  cp = cp_val(*cpp++)) {
+        cpp = p->stop;
+        while (is_not_CP(*cpp)) {
+            cpp++;
+        }
+        for (cp = cp_val(*cpp++); ;) {
 	    if (*cp == i_return_trace) {
 		/* Skip stack frame variables */
 		while (is_not_CP(*cpp)) cpp++;
@@ -897,8 +893,11 @@ erts_bif_trace_epilogue(Process *p, Eterm result, int applying,
 		 */
 		cp = NULL;
 		break;
-	    } else break;
-	}
+	    } else {
+                break;
+            }
+            cp = cp_val(*cpp++);
+        }
     }
 
     /* Try to get these in the order
@@ -939,7 +938,7 @@ erts_bif_trace_epilogue(Process *p, Eterm result, int applying,
 	    if ((flags & MATCH_SET_RETURN_TO_TRACE) && p->catches > 0) {
 		/* can only happen if(local)*/
 		Eterm *ptr = p->stop;
-		ASSERT(is_CP(*ptr));
+		ASSERT(!applying || is_CP(*ptr));
 		ASSERT(ptr <= STACK_START(p));
 		/* Search the nearest stack frame for a catch */
 		while (++ptr < STACK_START(p)) {
@@ -991,19 +990,19 @@ do_call_trace(Process* c_p, ErtsCodeInfo* info, Eterm* reg,
 	      int local, Binary* ms, ErtsTracer tracer)
 {
     int return_to_trace = 0;
-    BeamInstr *cp_save = c_p->cp;
     Uint32 flags;
     Uint need = 0;
+    Eterm cp_save;
     Eterm* E = c_p->stop;
 
-    fixup_cp_before_trace(c_p, &return_to_trace);
+    cp_save = E[0];
 
+    fixup_cp_before_trace(c_p, &return_to_trace);
     ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
     flags = erts_call_trace(c_p, info, ms, reg, local, &tracer);
     ERTS_REQ_PROC_MAIN_LOCK(c_p);
 
-    /* restore cp after potential fixup */
-    c_p->cp = cp_save;
+    E[0] = cp_save;
 
     ASSERT(!ERTS_PROC_IS_EXITING(c_p));
     if ((flags & MATCH_SET_RETURN_TO_TRACE) && !return_to_trace) {
@@ -1023,28 +1022,23 @@ do_call_trace(Process* c_p, ErtsCodeInfo* info, Eterm* reg,
     if (flags & MATCH_SET_RETURN_TO_TRACE && !return_to_trace) {
 	E -= 1;
 	ASSERT(c_p->htop <= E && E <= c_p->hend);
-	E[0] = make_cp(c_p->cp);
-	c_p->cp = beam_return_to_trace;
+	E[0] = (Eterm) beam_return_to_trace;
+        c_p->stop = E;
     }
-    if (flags & MATCH_SET_RX_TRACE)
-    {
+    if (flags & MATCH_SET_RX_TRACE) {
 	E -= 3;
         c_p->stop = E;
 	ASSERT(c_p->htop <= E && E <= c_p->hend);
 	ASSERT(is_CP((Eterm) (UWord) (&info->mfa.module)));
 	ASSERT(IS_TRACER_VALID(tracer));
-	E[2] = make_cp(c_p->cp);
-        E[1] = copy_object(tracer, c_p);
-	E[0] = make_cp(&info->mfa.module);
-                               /* We ARE at the beginning of an instruction,
-				  the funcinfo is above i. */
-	c_p->cp = (flags & MATCH_SET_EXCEPTION_TRACE) ?
-	    beam_exception_trace : beam_return_trace;
+        E[2] = copy_object(tracer, c_p);
+        E[1] = make_cp(&info->mfa.module);
+        E[0] = (Eterm) ((flags & MATCH_SET_EXCEPTION_TRACE) ?
+                        beam_exception_trace : beam_return_trace);
 	erts_proc_lock(c_p, ERTS_PROC_LOCKS_ALL_MINOR);
 	ERTS_TRACE_FLAGS(c_p) |= F_EXCEPTION_TRACE;
 	erts_proc_unlock(c_p, ERTS_PROC_LOCKS_ALL_MINOR);
-    } else
-        c_p->stop = E;
+    }
     return tracer;
 }
 
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 07c16e3415..9f8b56a5d5 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -141,10 +141,6 @@ do {                                     \
      BeamCodeAddr(IP) < (BeamInstr)LabelAddr(end_emulator_loop))
 #endif /* NO_JUMP_TABLE */
 
-#define SET_CP(p, ip)           \
-   ASSERT(VALID_INSTR(*(ip)));  \
-   (p)->cp = (ip)
-
 #define SET_I(ip) \
    ASSERT(VALID_INSTR(* (Eterm *)(ip))); \
    I = (ip)
@@ -524,7 +520,7 @@ init_emulator(void)
 #define DTRACE_RETURN_FROM_PC(p)                                                        \
     do {                                                                                \
         ErtsCodeMFA* cmfa;                                                                  \
-        if (DTRACE_ENABLED(function_return) && (cmfa = find_function_from_pc((p)->cp))) { \
+        if (DTRACE_ENABLED(function_return) && (cmfa = find_function_from_pc(cp_val((p)->stop[0])))) { \
             DTRACE_RETURN((p), cmfa);                               \
         }                                                                               \
     } while(0)
@@ -1443,7 +1439,7 @@ handle_error(Process* c_p, BeamInstr* pc, Eterm* reg, ErtsCodeMFA *bif_mfa)
 	reg[2] = Value;
 	reg[3] = c_p->ftrace;
         if ((new_pc = next_catch(c_p, reg))) {
-	    c_p->cp = 0;	/* To avoid keeping stale references. */
+            c_p->stop[0] = NIL;  /* To avoid keeping stale references. */
             ERTS_RECV_MARK_CLEAR(c_p); /* No longer safe to use this position */
 	    return new_pc;
 	}
@@ -1481,35 +1477,6 @@ next_catch(Process* c_p, Eterm *reg) {
         return NULL;
     }
 
-    /*
-     * Better safe than sorry here. In debug builds, produce a core
-     * dump if the top of the stack doesn't point to a continuation
-     * pointer. In other builds, ignore a non-CP at the top of stack.
-     */
-    ASSERT(is_CP(*ptr));
-    if ((is_not_CP(*ptr) || (*cp_val(*ptr) != i_return_trace &&
-			     *cp_val(*ptr) != i_return_to_trace &&
-			     *cp_val(*ptr) != i_return_time_trace ))
-	&& c_p->cp) {
-	/* Can not follow cp here - code may be unloaded */
-	BeamInstr *cpp = c_p->cp;
-	if (cpp == beam_exception_trace) {
-            ErtsCodeMFA *mfa = (ErtsCodeMFA*)cp_val(ptr[0]);
-	    erts_trace_exception(c_p, mfa,
-				 reg[1], reg[2],
-                                 ERTS_TRACER_FROM_ETERM(ptr+1));
-	    /* Skip return_trace parameters */
-	    ptr += 2;
-	} else if (cpp == beam_return_trace) {
-	    /* Skip return_trace parameters */
-	    ptr += 2;
-	} else if (cpp == beam_return_time_trace) {
-	    /* Skip return_trace parameters */
-	    ptr += 1;
-	} else if (cpp == beam_return_to_trace) {
-	    have_return_to_trace = !0; /* Record next cp */
-	}
-    }
     while (ptr < STACK_START(c_p)) {
 	if (is_catch(*ptr)) {
 	    if (active_catches) goto found_catch;
@@ -1664,6 +1631,57 @@ expand_error_value(Process* c_p, Uint freason, Eterm Value) {
     return Value;
 }
 
+
+static void
+gather_stacktrace(Process* p, Eterm *ptr, struct StackTrace* s, int depth)
+{
+    BeamInstr *prev;
+    BeamInstr i_return_trace;
+    BeamInstr i_return_to_trace;
+
+    if (depth == 0) {
+        return;
+    }
+
+    prev = s->depth ? s->trace[s->depth-1] : s->pc;
+    i_return_trace = beam_return_trace[0];
+    i_return_to_trace = beam_return_to_trace[0];
+
+    /*
+     * Traverse the stack backwards and add all unique continuation
+     * pointers to the buffer, up to the maximum stack trace size.
+     *
+     * Skip trace stack frames.
+     */
+
+    ASSERT(ptr >= STACK_TOP(p) && ptr <= STACK_START(p));
+
+    while (ptr < STACK_START(p) && depth > 0) {
+        if (is_CP(*ptr)) {
+            if (*cp_val(*ptr) == i_return_trace) {
+                /* Skip stack frame variables */
+                do ++ptr; while (is_not_CP(*ptr));
+                /* Skip return_trace parameters */
+                ptr += 2;
+            } else if (*cp_val(*ptr) == i_return_to_trace) {
+                /* Skip stack frame variables */
+                do ++ptr; while (is_not_CP(*ptr));
+            } else {
+                BeamInstr *cp = cp_val(*ptr);
+                if (cp != prev) {
+                    /* Record non-duplicates only */
+                    prev = cp;
+                    s->trace[s->depth++] = cp - 1;
+                    depth--;
+                }
+                ptr++;
+            }
+        } else {
+            ptr++;
+        }
+    }
+}
+
 /*
  * Quick-saving the stack trace in an internal form on the heap. Note
  * that c_p->ftrace will point to a cons cell which holds the given args
@@ -1702,6 +1720,7 @@ static void
 save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
 		ErtsCodeMFA *bif_mfa, Eterm args) {
     struct StackTrace* s;
+    Eterm *stack_start;
     int sz;
     int depth = erts_backtrace_depth;    /* max depth (never negative) */
 
@@ -1720,6 +1739,33 @@ save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
     s->depth = 0;
 
     /*
+     * If we crash on an instruction that returns to a return/exception trace
+     * instruction, we must set the stacktrace 'pc' to the actual return
+     * address or we'll lose the top stackframe when gathering the stack
+     * trace.
+     */
+    stack_start = STACK_TOP(c_p);
+    if (stack_start < STACK_START(c_p) && is_CP(*stack_start)) {
+        BeamInstr *cp = cp_val(*stack_start);
+
+        if (cp == pc) {
+            if (pc == beam_exception_trace || pc == beam_return_trace) {
+                ASSERT(&stack_start[3] <= STACK_START(c_p));
+                /* Fake having failed on the first instruction in the function
+                 * pointed to by the tag. */
+                pc = cp_val(stack_start[1]);
+                stack_start += 3;
+            } else if (pc == beam_return_to_trace) {
+                ASSERT(&stack_start[2] <= STACK_START(c_p));
+                pc = cp_val(stack_start[1]);
+                /* Skip both the trace tag and the new 'pc' to avoid
+                 * duplicated entries. */
+                stack_start += 2;
+            }
+        }
+    }
+
+    /*
      * If the failure was in a BIF other than 'error/1', 'error/2',
      * 'exit/1' or 'throw/1', save BIF-MFA and save the argument
      * registers by consing up an arglist.
@@ -1750,11 +1796,6 @@ save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
 	    s->trace[s->depth++] = pc;
 	    depth--;
 	}
-	/* Save second stack entry if CP is valid and different from pc */
-	if (depth > 0 && c_p->cp != 0 && c_p->cp != pc) {
-	    s->trace[s->depth++] = c_p->cp - 1;
-	    depth--;
-	}
 	s->pc = NULL;
 	args = make_arglist(c_p, reg, bif_mfa->arity); /* Overwrite CAR(c_p->ftrace) */
     } else {
@@ -1762,9 +1803,9 @@ save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
     non_bif_stacktrace:
 
 	s->current = c_p->current;
-        /* 
+        /*
 	 * For a function_clause error, the arguments are in the beam
-	 * registers, c_p->cp is valid, and c_p->current is set.
+	 * registers and c_p->current is set.
 	 */
 	if ( (GET_EXC_INDEX(s->freason)) ==
 	     (GET_EXC_INDEX(EXC_FUNCTION_CLAUSE)) ) {
@@ -1772,18 +1813,8 @@ save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
 	    ASSERT(s->current);
 	    a = s->current->arity;
 	    args = make_arglist(c_p, reg, a); /* Overwrite CAR(c_p->ftrace) */
-	    /* Save first stack entry */
-	    ASSERT(c_p->cp);
-	    if (depth > 0) {
-		s->trace[s->depth++] = c_p->cp - 1;
-		depth--;
-	    }
 	    s->pc = NULL; /* Ignore pc */
 	} else {
-	    if (depth > 0 && c_p->cp != 0 && c_p->cp != pc) {
-		s->trace[s->depth++] = c_p->cp - 1;
-		depth--;
-	    }
 	    s->pc = pc;
 	}
     }
@@ -1796,80 +1827,13 @@ save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
     }
 
     /* Save the actual stack trace */
-    erts_save_stacktrace(c_p, s, depth);
+    gather_stacktrace(c_p, stack_start, s, depth);
 }
 
 void
 erts_save_stacktrace(Process* p, struct StackTrace* s, int depth)
 {
-    if (depth > 0) {
-	Eterm *ptr;
-	BeamInstr *prev = s->depth ? s->trace[s->depth-1] : NULL;
-	BeamInstr i_return_trace = beam_return_trace[0];
-	BeamInstr i_return_to_trace = beam_return_to_trace[0];
-
-	/*
-	 * Traverse the stack backwards and add all unique continuation
-	 * pointers to the buffer, up to the maximum stack trace size.
-	 * 
-	 * Skip trace stack frames.
-	 */
-	ptr = p->stop;
-	if (ptr < STACK_START(p) &&
-	    (is_not_CP(*ptr)|| (*cp_val(*ptr) != i_return_trace &&
-				*cp_val(*ptr) != i_return_to_trace)) &&
-	    p->cp) {
-	    /* Cannot follow cp here - code may be unloaded */
-	    BeamInstr *cpp = p->cp;
-	    int trace_cp;
-	    if (cpp == beam_exception_trace || cpp == beam_return_trace) {
-		/* Skip return_trace parameters */
-		ptr += 2;
-		trace_cp = 1;
-	    } else if (cpp == beam_return_to_trace) {
-		/* Skip return_to_trace parameters */
-		ptr += 1;
-		trace_cp = 1;
-	    }
-	    else {
-		trace_cp = 0;
-	    }
-	    if (trace_cp && s->pc == cpp) {
-		/*
-		 * If process 'cp' points to a return/exception trace
-		 * instruction and 'cp' has been saved as 'pc' in
-		 * stacktrace, we need to update 'pc' in stacktrace
-		 * with the actual 'cp' located on the top of the
-		 * stack; otherwise, we will lose the top stackframe
-		 * when building the stack trace.
-		 */
-		ASSERT(is_CP(p->stop[0]));
-		s->pc = cp_val(p->stop[0]);
-	    }
-	}
-	while (ptr < STACK_START(p) && depth > 0) {
-	    if (is_CP(*ptr)) {
-		if (*cp_val(*ptr) == i_return_trace) {
-		    /* Skip stack frame variables */
-		    do ++ptr; while (is_not_CP(*ptr));
-		    /* Skip return_trace parameters */
-		    ptr += 2;
-		} else if (*cp_val(*ptr) == i_return_to_trace) {
-		    /* Skip stack frame variables */
-		    do ++ptr; while (is_not_CP(*ptr));
-		} else {
-		    BeamInstr *cp = cp_val(*ptr);
-		    if (cp != prev) {
-			/* Record non-duplicates only */
-			prev = cp;
-			s->trace[s->depth++] = cp - 1;
-			depth--;
-		    }
-		    ptr++;
-		}
-	    } else ptr++;
-	}
-    }
+    gather_stacktrace(p, STACK_TOP(p), s, depth);
 }
 
 /*
@@ -2144,36 +2108,33 @@ apply_bif_error_adjustment(Process *p, Export *ep,
 	 * erlang:error/1, erlang:error/2, erlang:exit/1,
 	 * or erlang:throw/1. Error handling of these BIFs is
 	 * special!
+         *
+	 * We need the topmost continuation pointer to point into the
+	 * calling function when handling the error after the BIF has
+	 * been applied. This in order to get the topmost stackframe
+	 * correct.
 	 *
-	 * We need 'p->cp' to point into the calling
-	 * function when handling the error after the BIF has
-	 * been applied. This in order to get the topmost
-	 * stackframe correct. Without the following adjustment,
-	 * 'p->cp' will point into the function that called
-	 * current function when handling the error. We add a
-	 * dummy stackframe in order to achieve this.
-	 *
-	 * Note that these BIFs unconditionally will cause
-	 * an exception to be raised. That is, our modifications
-	 * of 'p->cp' as well as the stack will be corrected by
-	 * the error handling code.
-	 *
-	 * If we find an exception/return-to trace continuation
-	 * pointer as the topmost continuation pointer, we do not
-	 * need to do anything since the information already will
-	 * be available for generation of the stacktrace.
+	 * Note that these BIFs will unconditionally cause an
+	 * exception to be raised. That is, our modifications of the
+	 * stack will be corrected by the error handling code.
 	 */
 	int apply_only = stack_offset == 0;
 	BeamInstr *cpp;
+        Eterm *E;
 
-	if (apply_only) {
-	    ASSERT(p->cp != NULL);
-	    cpp = p->cp;
-	}
-	else {
-	    ASSERT(is_CP(p->stop[0]));
-	    cpp = cp_val(p->stop[0]);
-	}
+        E = p->stop;
+
+        while (is_not_CP(*E)) {
+            E++;
+        }
+        cpp = cp_val(E[0]);
+
+        /*
+	 * If we find an exception/return-to trace continuation
+	 * pointer as the topmost continuation pointer, we do not
+	 * need to do anything since the information will already
+	 * be available for generation of the stacktrace.
+         */
 
 	if (cpp != beam_exception_trace
 	    && cpp != beam_return_trace
@@ -2183,38 +2144,29 @@ apply_bif_error_adjustment(Process *p, Export *ep,
 		need = 1; /* i_apply_only */
 	    if (p->stop - p->htop < need)
 		erts_garbage_collect(p, (int) need, reg, arity+1);
-	    p->stop -= need;
-
 	    if (apply_only) {
 		/*
 		 * Called from the i_apply_only instruction.
 		 *
-		 * 'p->cp' contains continuation pointer pointing
-		 * into the function that called current function.
-		 * We push that continuation pointer onto the stack,
-		 * and set 'p->cp' to point into current function.
+                 * Push the continuation pointer for the current
+                 * function to the stack.
 		 */
-
-		p->stop[0] = make_cp(p->cp);
-		p->cp = I;
-	    }
-	    else {
+                p->stop -= need;
+                p->stop[0] = make_cp(I);
+	    } else {
 		/*
-		 * Called from an i_apply_last_p, or apply_last_IP,
-		 * instruction.
-		 *
-		 * Calling instruction will after we return read
-		 * a continuation pointer from the stack and write
-		 * it to 'p->cp', and then remove the topmost
-		 * stackframe of size 'stack_offset'.
+		 * Called from an i_apply_last_* instruction.
 		 *
-		 * We have sized the dummy-stackframe so that it
-		 * will be removed by the instruction we currently
-		 * are executing, and leave the stackframe that
-		 * normally would have been removed intact.
+                 * The calling instruction will deallocate a stack
+                 * frame of size 'stack_offset'.
 		 *
+                 * Push the continuation pointer for the current
+                 * function to the stack, and then add a dummy
+                 * stackframe for the i_apply_last* instruction
+                 * to discard.
 		 */
-		p->stop[0] = make_cp(I);
+                p->stop[0] = make_cp(I);
+                p->stop -= need;
 	    }
 	}
     }
@@ -2437,10 +2389,10 @@ erts_hibernate(Process* c_p, Eterm* reg)
     c_p->arg_reg[0] = module;
     c_p->arg_reg[1] = function;
     c_p->arg_reg[2] = args;
-    c_p->stop = STACK_START(c_p);
+    c_p->stop = c_p->hend - 1;  /* Keep first continuation pointer */
+    ASSERT(c_p->stop[0] == make_cp(beam_apply+1));
     c_p->catches = 0;
     c_p->i = beam_apply;
-    c_p->cp = (BeamInstr *) beam_apply+1;
 
     /*
      * If there are no waiting messages, garbage collect and
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 35f2ea6688..3d5683f19f 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -315,6 +315,7 @@ typedef struct LoaderState {
 				 * (or 0 if there is no on_load function)
 				 */
     int otp_20_or_higher;       /* Compiled with OTP 20 or higher */
+    unsigned max_opcode;        /* Highest opcode used in module */
 
     /*
      * Atom table.
@@ -1588,6 +1589,17 @@ static int
 read_lambda_table(LoaderState* stp)
 {
     unsigned int i;
+    unsigned int otp_22_or_lower;
+
+    /*
+     * Determine whether this module was compiled with OTP 22 or lower
+     * by looking at the max opcode number. The compiler in OTP 23 will
+     * always set the max opcode to the opcode for `swap` (whether
+     * actually used or not) so that a module compiled for OTP 23
+     * cannot be loaded in earlier versions.
+     */
+
+    otp_22_or_lower = stp->max_opcode < genop_swap_2;
 
     GetInt(stp, 4, stp->num_lambdas);
     if (stp->num_lambdas > stp->lambdas_allocated) {
@@ -1619,6 +1631,29 @@ read_lambda_table(LoaderState* stp)
 	GetInt(stp, 4, Index);
 	GetInt(stp, 4, stp->lambdas[i].num_free);
 	GetInt(stp, 4, OldUniq);
+
+        /*
+         * Fun entries are now keyed by the explicit ("new") index in
+         * the fun entry. That allows multiple make_fun2 instructions
+         * to share the same fun entry (when the `fun F/A` syntax is
+         * used). Before OTP 23, fun entries were keyed by the old
+         * index, which is the order of the entries in the fun
+         * chunk. Each make_fun2 needed to refer to its own fun entry.
+         *
+         * Modules compiled before OTP 23 can safely be loaded if the
+         * old index and the new index are equal. That is true for all
+         * modules compiled with OTP R15 and later.
+         */
+        if (otp_22_or_lower && i != Index) {
+            /*
+             * Compiled with a compiler before OTP R15B. The new indices
+             * are not reliable, so it is not safe to load this module.
+             */
+            LoadError2(stp, "please re-compile this module with an "
+                       ERLANG_OTP_RELEASE " compiler "
+                       "(old-style fun with indices: %d/%d)",
+                       i, Index);
+        }
 	fe = erts_put_fun_entry2(stp->module, OldUniq, i, stp->mod_md5,
 				 Index, arity-stp->lambdas[i].num_free);
 	stp->lambdas[i].fe = fe;
@@ -1839,7 +1874,6 @@ read_code_header(LoaderState* stp)
 {
     unsigned head_size;
     unsigned version;
-    unsigned opcode_max;
     int i;
 
     /*
@@ -1871,8 +1905,8 @@ read_code_header(LoaderState* stp)
     /*
      * Verify the number of the highest opcode used.
      */
-    GetInt(stp, 4, opcode_max);
-    if (opcode_max > MAX_GENERIC_OPCODE) {
+    GetInt(stp, 4, stp->max_opcode);
+    if (stp->max_opcode > MAX_GENERIC_OPCODE) {
 	LoadError2(stp,
 		   "This BEAM file was compiled for a later version"
 		   " of the run-time system than " ERLANG_OTP_RELEASE ".\n"
@@ -1880,7 +1914,7 @@ read_code_header(LoaderState* stp)
 		   ERLANG_OTP_RELEASE " compiler.\n"
 		   "  (Use of opcode %d; this emulator supports "
 		   "only up to %d.)",
-		   opcode_max, MAX_GENERIC_OPCODE);
+		   stp->max_opcode, MAX_GENERIC_OPCODE);
     }
 
     GetInt(stp, 4, stp->num_labels);
@@ -3131,27 +3165,6 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
     return 0;
 }
 
-static int
-is_killed_apply(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val+2 <= Reg.val;
-}
-
-static int
-is_killed(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val <= Reg.val;
-}
-
-static int
-is_killed_by_call_fun(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
-{
-    return Reg.type == TAG_x && Live.type == TAG_u &&
-	Live.val+1 <= Reg.val;
-}
-
 /*
  * Test whether register Reg is killed by make_fun instruction that
  * creates the fun given by index idx.
@@ -3172,16 +3185,6 @@ is_killed_by_make_fun(LoaderState* stp, GenOpArg Reg, GenOpArg idx)
 }
 
 /*
- * Test whether register Reg is killed by the send instruction that follows.
- */
-
-static int
-is_killed_by_send(LoaderState* stp, GenOpArg Reg)
-{
-    return Reg.type == TAG_x && 2 <= Reg.val;
-}
-
-/*
  * Generate an instruction for element/2.
  */
 
diff --git a/erts/emulator/beam/bif.c b/erts/emulator/beam/bif.c
index b81056c774..7afbbfd894 100644
--- a/erts/emulator/beam/bif.c
+++ b/erts/emulator/beam/bif.c
@@ -1915,7 +1915,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp,
 	    erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
 	    erts_dsprintf(dsbufp,
 			  "Discarding message %T from %T to %T in an old "
-			  "incarnation (%d) of this node (%d)\n",
+			  "incarnation (%u) of this node (%u)\n",
 			  msg,
 			  p->common.id,
 			  to,
@@ -1959,7 +1959,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp,
 	erts_dsprintf_buf_t *dsbufp = erts_create_logger_dsbuf();
 	erts_dsprintf(dsbufp,
 		      "Discarding message %T from %T to %T in an old "
-		      "incarnation (%d) of this node (%d)\n",
+		      "incarnation (%u) of this node (%u)\n",
 		      msg,
 		      p->common.id,
 		      to,
@@ -1987,7 +1987,7 @@ do_send(Process *p, Eterm to, Eterm msg, Eterm return_term, Eterm *refp,
                 trace_send(p, portid, msg);
 
             if (have_seqtrace(SEQ_TRACE_TOKEN(p))) {
-                seq_trace_update_send(p);
+                seq_trace_update_serial(p);
                 seq_trace_output(SEQ_TRACE_TOKEN(p), msg,
                                  SEQ_TRACE_SEND, portid, p);
             }
@@ -4866,9 +4866,13 @@ BIF_RETTYPE phash_2(BIF_ALIST_2)
 BIF_RETTYPE phash2_1(BIF_ALIST_1)
 {
     Uint32 hash;
-
-    hash = make_hash2(BIF_ARG_1);
-    BIF_RET(make_small(hash & ((1L << 27) - 1)));
+    Eterm trap_state = THE_NON_VALUE;
+    hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P);
+    if (trap_state == THE_NON_VALUE) {
+        BIF_RET(make_small(hash & ((1L << 27) - 1)));
+    } else {
+        BIF_TRAP1(bif_export[BIF_phash2_1], BIF_P, trap_state);
+    }
 }
 
 BIF_RETTYPE phash2_2(BIF_ALIST_2)
@@ -4876,6 +4880,7 @@ BIF_RETTYPE phash2_2(BIF_ALIST_2)
     Uint32 hash;
     Uint32 final_hash;
     Uint32 range;
+    Eterm trap_state = THE_NON_VALUE;
 
     /* Check for special case 2^32 */
     if (term_equals_2pow32(BIF_ARG_2)) {
@@ -4887,7 +4892,10 @@ BIF_RETTYPE phash2_2(BIF_ALIST_2)
 	}
 	range = (Uint32) u;
     }
-    hash = make_hash2(BIF_ARG_1);
+    hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P);
+    if (trap_state != THE_NON_VALUE) {
+        BIF_TRAP2(bif_export[BIF_phash2_2], BIF_P, trap_state, BIF_ARG_2);
+    }
     if (range) {
 	final_hash = hash % range; /* [0..range-1] */
     } else {
@@ -5156,7 +5164,7 @@ erts_schedule_bif(Process *proc,
 	else if (proc->flags & F_HIPE_MODE) {
 	    /* Pointer to bif export in i */
 	    exp = (Export *) i;
-	    pc = c_p->cp;
+            pc = cp_val(c_p->stop[0]);
 	    mfa = &exp->info.mfa;
 	}
 #endif
@@ -5173,8 +5181,7 @@ erts_schedule_bif(Process *proc,
 	    mfa = &exp->info.mfa;
 	}
 	else if (BeamIsOpCode(*i, op_apply_bif)) {
-	    /* Pointer to bif in i+1, and mfa in i-3 */	    
-	    pc = c_p->cp;
+            pc = cp_val(c_p->stop[0]);
 	    mfa = erts_code_to_codemfa(i);
 	}
 	else {
@@ -5265,7 +5272,6 @@ erts_call_dirty_bif(ErtsSchedulerData *esdp, Process *c_p, BeamInstr *I, Eterm *
     dirty_shadow_proc->freason = c_p->freason;
     dirty_shadow_proc->fvalue = c_p->fvalue;
     dirty_shadow_proc->ftrace = c_p->ftrace;
-    dirty_shadow_proc->cp = c_p->cp;
     dirty_shadow_proc->i = c_p->i;
 
 #ifdef DEBUG
@@ -5312,7 +5318,6 @@ erts_call_dirty_bif(ErtsSchedulerData *esdp, Process *c_p, BeamInstr *I, Eterm *
 	c_p->freason = dirty_shadow_proc->freason;
 	c_p->fvalue = dirty_shadow_proc->fvalue;
 	c_p->ftrace = dirty_shadow_proc->ftrace;
-	c_p->cp = dirty_shadow_proc->cp;
 	c_p->i = dirty_shadow_proc->i;
 	c_p->arity = dirty_shadow_proc->arity;
     }
diff --git a/erts/emulator/beam/bif_instrs.tab b/erts/emulator/beam/bif_instrs.tab
index 8e0caa38a3..f1877882a1 100644
--- a/erts/emulator/beam/bif_instrs.tab
+++ b/erts/emulator/beam/bif_instrs.tab
@@ -280,7 +280,7 @@ call_bif(Exp) {
          * erlang code or by nif_bif.epilogue() when the BIF
          * is done).
          */
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         SET_I(c_p->i);
         SWAPIN;
         Dispatch();
@@ -297,7 +297,7 @@ call_bif(Exp) {
 
 //
 // Call a BIF tail-recursively, storing the result in x(0) and doing
-// a return to the continuation poiner (c_p->cp).
+// a return to the continuation poiner.
 //
 
 call_bif_only(Exp) {
@@ -367,7 +367,7 @@ call_bif_only(Exp) {
     } else if (c_p->freason == TRAP) {
         /*
          * Dispatch to a trap. When the trap is done, a jump
-         * to the continuation pointer (c_p->cp) will be done.
+         * to the continuation pointer on the stack will be done.
          */
         SET_I(c_p->i);
         SWAPIN;
@@ -413,7 +413,7 @@ send() {
         r(0) = result;
         CHECK_TERM(r(0));
     } else if (c_p->freason == TRAP) {
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         SET_I(c_p->i);
         SWAPIN;
         Dispatch();
@@ -570,8 +570,7 @@ nif_bif.epilogue() {
     if (ERTS_LIKELY(is_value(nif_bif_result))) {
         r(0) = nif_bif_result;
         CHECK_TERM(r(0));
-        SET_I(c_p->cp);
-        c_p->cp = 0;
+        $RETURN();
         Goto(*I);
     } else if (c_p->freason == TRAP) {
         SET_I(c_p->i);
@@ -581,6 +580,10 @@ nif_bif.epilogue() {
         }
         Dispatch();
     }
-    I = handle_error(c_p, c_p->cp, reg, c_p->current);
+    {
+        BeamInstr *cp = cp_val(*E);
+        ASSERT(VALID_INSTR(*cp));
+        I = handle_error(c_p, cp, reg, c_p->current);
+    }
     goto post_error_handling;
 }
diff --git a/erts/emulator/beam/big.c b/erts/emulator/beam/big.c
index 522f50287a..7666f23a4f 100644
--- a/erts/emulator/beam/big.c
+++ b/erts/emulator/beam/big.c
@@ -2176,6 +2176,24 @@ term_to_Uint64(Eterm term, Uint64 *up)
 #endif
 }
 
+int
+term_to_Uint32(Eterm term, Uint32 *up)
+{
+#if ERTS_SIZEOF_ETERM == 4
+    return term_to_Uint(term,up);
+#else
+    if (is_small(term)) {
+	Sint i = signed_val(term);
+	if (i >= 0) {
+            *up = (Uint32) i;
+            return 1;
+        }
+    }
+    *up = BADARG;
+    return 0;
+#endif
+}
+
 
 int term_to_Sint(Eterm term, Sint *sp)
 {
diff --git a/erts/emulator/beam/big.h b/erts/emulator/beam/big.h
index ad19cce395..3fed076419 100644
--- a/erts/emulator/beam/big.h
+++ b/erts/emulator/beam/big.h
@@ -168,6 +168,8 @@ Eterm erts_uint64_array_to_big(Uint **, int, int, Uint64 *);
 int term_to_Uint64(Eterm, Uint64*);
 int term_to_Sint64(Eterm, Sint64*);
 #endif
+int term_to_Uint32(Eterm, Uint32*);
+
 
 Uint32 big_to_uint32(Eterm b);
 int term_equals_2pow32(Eterm);
diff --git a/erts/emulator/beam/dist.c b/erts/emulator/beam/dist.c
index eb9e749a08..dafe805a6f 100644
--- a/erts/emulator/beam/dist.c
+++ b/erts/emulator/beam/dist.c
@@ -1051,7 +1051,7 @@ erts_dsig_send_msg(ErtsDSigSendContext* ctx, Eterm remote, Eterm message)
 #endif
 
     if (have_seqtrace(SEQ_TRACE_TOKEN(sender))) {
-	seq_trace_update_send(sender);
+	seq_trace_update_serial(sender);
 	token = SEQ_TRACE_TOKEN(sender);
 	seq_trace_output(token, message, SEQ_TRACE_SEND, remote, sender);
     }
@@ -1125,7 +1125,7 @@ erts_dsig_send_reg_msg(ErtsDSigSendContext* ctx, Eterm remote_name, Eterm messag
 #endif
 
     if (have_seqtrace(SEQ_TRACE_TOKEN(sender))) {
-	seq_trace_update_send(sender);
+	seq_trace_update_serial(sender);
 	token = SEQ_TRACE_TOKEN(sender);
 	seq_trace_output(token, message, SEQ_TRACE_SEND, remote_name, sender);
     }
@@ -1184,7 +1184,7 @@ erts_dsig_send_exit_tt(ErtsDSigSendContext *ctx, Eterm local, Eterm remote,
         msg = reason;
 
     if (have_seqtrace(token)) {
-	seq_trace_update_send(ctx->c_p);
+	seq_trace_update_serial(ctx->c_p);
 	seq_trace_output_exit(token, reason, SEQ_TRACE_SEND, remote, local);
         if (ctx->dep->flags & DFLAG_EXIT_PAYLOAD) {
             ctl = TUPLE4(&ctx->ctl_heap[0],
@@ -3762,12 +3762,10 @@ int distribution_info(fmtfn_t to, void *arg)	/* Called by break handler */
 BIF_RETTYPE setnode_2(BIF_ALIST_2)
 {
     Process *net_kernel;
-    Uint creation;
+    Uint32 creation;
 
     /* valid creation ? */
-    if(!term_to_Uint(BIF_ARG_2, &creation))
-	goto error;
-    if(creation > 3)
+    if(!term_to_Uint32(BIF_ARG_2, &creation))
 	goto error;
 
     /* valid node name ? */
@@ -3811,7 +3809,7 @@ BIF_RETTYPE setnode_2(BIF_ALIST_2)
     erts_proc_unlock(BIF_P, ERTS_PROC_LOCK_MAIN);
     erts_thr_progress_block();
     inc_no_nodes();
-    erts_set_this_node(BIF_ARG_1, (Uint32) creation);
+    erts_set_this_node(BIF_ARG_1, creation);
     erts_is_alive = 1;
     send_nodes_mon_msgs(NULL, am_nodeup, BIF_ARG_1, am_visible, NIL);
     erts_thr_progress_unblock();
diff --git a/erts/emulator/beam/dist.h b/erts/emulator/beam/dist.h
index a33fb7efcf..37ec88cc55 100644
--- a/erts/emulator/beam/dist.h
+++ b/erts/emulator/beam/dist.h
@@ -54,11 +54,12 @@
 #define DFLAG_DIST_MANDATORY (DFLAG_EXTENDED_REFERENCES         \
                               | DFLAG_EXTENDED_PIDS_PORTS       \
 			      | DFLAG_UTF8_ATOMS                \
-			      | DFLAG_NEW_FUN_TAGS)
+			      | DFLAG_NEW_FUN_TAGS              \
+                              | DFLAG_BIG_CREATION)
 
 /*
  * Additional optimistic flags when encoding toward pending connection.
- * If remote node (erl_interface) does not supporting these then we may need
+ * If remote node (erl_interface) does not support these then we may need
  * to transcode messages enqueued before connection setup was finished.
  */
 #define DFLAG_DIST_HOPEFULLY (DFLAG_EXPORT_PTR_TAG              \
@@ -75,7 +76,6 @@
                             | DFLAG_SMALL_ATOM_TAGS           \
                             | DFLAG_UTF8_ATOMS                \
                             | DFLAG_MAP_TAG                   \
-                            | DFLAG_BIG_CREATION              \
                             | DFLAG_SEND_SENDER               \
                             | DFLAG_BIG_SEQTRACE_LABELS       \
                             | DFLAG_EXIT_PAYLOAD              \
diff --git a/erts/emulator/beam/erl_alloc.types b/erts/emulator/beam/erl_alloc.types
index 21941ba96e..349977ebe7 100644
--- a/erts/emulator/beam/erl_alloc.types
+++ b/erts/emulator/beam/erl_alloc.types
@@ -278,6 +278,7 @@ type	SETUP_CONN_ARG	SHORT_LIVED	PROCESSES	setup_connection_argument
 type    LIST_TRAP       SHORT_LIVED     PROCESSES       list_bif_trap_state
 type    CONT_EXIT_TRAP  SHORT_LIVED     PROCESSES       continue_exit_trap_state
 type    SEQ_YIELD_STATE SHORT_LIVED     SYSTEM          dist_seq_yield_state
+type    PHASH2_TRAP     SHORT_LIVED     PROCESSES       phash2_trap_state
 
 type	ENVIRONMENT	SYSTEM		SYSTEM		environment
 
diff --git a/erts/emulator/beam/erl_bif_info.c b/erts/emulator/beam/erl_bif_info.c
index 4d8c3eb9dd..e5ba79488a 100644
--- a/erts/emulator/beam/erl_bif_info.c
+++ b/erts/emulator/beam/erl_bif_info.c
@@ -2022,14 +2022,16 @@ current_function(Process *c_p, ErtsHeapFactory *hfact, Process* rp,
 
     if (c_p == rp && !(flags & ERTS_PI_FLAG_REQUEST_FOR_OTHER)) {
 	FunctionInfo fi2;
+        BeamInstr* continuation_ptr;
 
 	/*
 	 * The current function is erlang:process_info/{1,2},
 	 * which is not the answer that the application want.
-	 * We will use the function pointed into by rp->cp
-	 * instead if it can be looked up.
+         * We will use the continuation pointer stored at the
+         * top of the stack instead.
 	 */
-	erts_lookup_function_info(&fi2, rp->cp, full_info);
+        continuation_ptr = (BeamInstr *) rp->stop[0];
+        erts_lookup_function_info(&fi2, continuation_ptr, full_info);
 	if (fi2.mfa) {
 	    fi = fi2;
 	    rp->current = fi2.mfa;
@@ -2076,10 +2078,6 @@ current_stacktrace(ErtsHeapFactory *hfact, Process* rp,
 	s->trace[s->depth++] = rp->i;
 	depth--;
     }
-    if (depth > 0 && rp->cp != 0) {
-	s->trace[s->depth++] = rp->cp - 1;
-	depth--;
-    }
     erts_save_stacktrace(rp, s, depth);
 
     depth = s->depth;
@@ -2813,7 +2811,10 @@ BIF_RETTYPE system_info_1(BIF_ALIST_1)
     } else if (BIF_ARG_1 == am_threads) {
 	return am_true;
     } else if (BIF_ARG_1 == am_creation) {
-	return make_small(erts_this_node->creation);
+        Uint hsz = 0;
+        erts_bld_uint(NULL, &hsz, erts_this_node->creation);
+        hp = hsz ? HAlloc(BIF_P, hsz) : NULL;
+        BIF_RET(erts_bld_uint(&hp, NULL, erts_this_node->creation));
     } else if (BIF_ARG_1 == am_break_ignored) {
       extern int ignore_break;
       if (ignore_break) 
diff --git a/erts/emulator/beam/erl_bif_port.c b/erts/emulator/beam/erl_bif_port.c
index ed825d3dda..dd1e884705 100644
--- a/erts/emulator/beam/erl_bif_port.c
+++ b/erts/emulator/beam/erl_bif_port.c
@@ -44,6 +44,7 @@
 #include "erl_bif_unique.h"
 #include "dtrace-wrapper.h"
 #include "erl_proc_sig_queue.h"
+#include "erl_osenv.h"
 
 static Port *open_port(Process* p, Eterm name, Eterm settings, int *err_typep, int *err_nump);
 static int merge_global_environment(erts_osenv_t *env, Eterm key_value_pairs);
diff --git a/erts/emulator/beam/erl_bif_trace.c b/erts/emulator/beam/erl_bif_trace.c
index b31d5b86cb..80ba7d1b3c 100644
--- a/erts/emulator/beam/erl_bif_trace.c
+++ b/erts/emulator/beam/erl_bif_trace.c
@@ -1858,6 +1858,8 @@ Eterm erts_seq_trace(Process *p, Eterm arg1, Eterm arg2,
 
     if (arg1 == am_send) {
 	current_flag = SEQ_TRACE_SEND;
+    } else if (arg1 == am_spawn) {
+	current_flag = SEQ_TRACE_SPAWN;
     } else if (arg1 == am_receive) {
 	current_flag = SEQ_TRACE_RECEIVE; 
     } else if (arg1 == am_print) {
@@ -1976,8 +1978,9 @@ BIF_RETTYPE erl_seq_trace_info(Process *p, Eterm item)
     }
 
     if (have_no_seqtrace(SEQ_TRACE_TOKEN(p))) {
-	if ((item == am_send)  || (item == am_receive) || 
-	    (item == am_print) || (item == am_timestamp)
+	if ((item == am_send) || (item == am_spawn) ||
+        (item == am_receive) || (item == am_print)
+        || (item == am_timestamp)
 	    || (item == am_monotonic_timestamp)
 	    || (item == am_strict_monotonic_timestamp)) {
 	    hp = HAlloc(p,3);
@@ -1992,6 +1995,8 @@ BIF_RETTYPE erl_seq_trace_info(Process *p, Eterm item)
 
     if (item == am_send) {
 	current_flag = SEQ_TRACE_SEND;
+    } else if (item == am_spawn) {
+	current_flag = SEQ_TRACE_SPAWN;
     } else if (item == am_receive) {
 	current_flag = SEQ_TRACE_RECEIVE; 
     } else if (item == am_print) {
@@ -2041,7 +2046,7 @@ BIF_RETTYPE seq_trace_print_1(BIF_ALIST_1)
     if (have_no_seqtrace(SEQ_TRACE_TOKEN(BIF_P))) {
 	BIF_RET(am_false);
     }
-    seq_trace_update_send(BIF_P);
+    seq_trace_update_serial(BIF_P);
     seq_trace_output(SEQ_TRACE_TOKEN(BIF_P), BIF_ARG_1, 
 		     SEQ_TRACE_PRINT, NIL, BIF_P);
     BIF_RET(am_true);
@@ -2062,7 +2067,7 @@ BIF_RETTYPE seq_trace_print_2(BIF_ALIST_2)
     }
     if (!EQ(BIF_ARG_1, SEQ_TRACE_TOKEN_LABEL(BIF_P)))
 	BIF_RET(am_false);
-    seq_trace_update_send(BIF_P);
+    seq_trace_update_serial(BIF_P);
     seq_trace_output(SEQ_TRACE_TOKEN(BIF_P), BIF_ARG_2, 
 		     SEQ_TRACE_PRINT, NIL, BIF_P);
     BIF_RET(am_true);
diff --git a/erts/emulator/beam/erl_db_hash.c b/erts/emulator/beam/erl_db_hash.c
index 4904d3fa42..44ecf7cce5 100644
--- a/erts/emulator/beam/erl_db_hash.c
+++ b/erts/emulator/beam/erl_db_hash.c
@@ -93,11 +93,9 @@
     erts_flxctr_dec_read_centralized(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
 #define RESET_NITEMS(DB)                                                \
     erts_flxctr_reset(&(DB)->common.counters, ERTS_DB_TABLE_NITEMS_COUNTER_ID)
-/* 
- * The following symbols can be manipulated to "tune" the linear hash array 
- */
+
 #define GROW_LIMIT(NACTIVE) ((NACTIVE)*1)
-#define SHRINK_LIMIT(NACTIVE) ((NACTIVE) / 2)
+#define SHRINK_LIMIT(TB) erts_atomic_read_nob(&(TB)->shrink_limit)
 
 /*
 ** We want the first mandatory segment to be small (to reduce minimal footprint)
@@ -137,6 +135,11 @@
 
 #define BUCKET(tb, i) SEGTAB(tb)[SLOT_IX_TO_SEG_IX(i)]->buckets[(i) & EXT_SEGSZ_MASK]
 
+#ifdef DEBUG
+#  define DBG_BUCKET_INACTIVE ((HashDbTerm*)0xdead5107)
+#endif
+
+
 /*
  * When deleting a table, the number of records to delete.
  * Approximate number, because we must delete entire buckets.
@@ -377,7 +380,7 @@ typedef int (*extra_match_validator_t)(int keypos, Eterm match, Eterm guard, Ete
 */
 static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix);
 static void alloc_seg(DbTableHash *tb);
-static int free_seg(DbTableHash *tb, int free_records);
+static int free_seg(DbTableHash *tb);
 static HashDbTerm* next_live(DbTableHash *tb, Uint *iptr, erts_rwmtx_t** lck_ptr,
 			     HashDbTerm *list);
 static HashDbTerm* search_list(DbTableHash* tb, Eterm key, 
@@ -471,10 +474,8 @@ db_finalize_dbterm_hash(int cret, DbUpdateHandle* handle);
 
 static ERTS_INLINE void try_shrink(DbTableHash* tb)
 {
-    int nactive = NACTIVE(tb);
     int nitems = NITEMS(tb);
-    if (nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive)
-	&& !IS_FIXED(tb)) {
+    if (nitems < SHRINK_LIMIT(tb) && !IS_FIXED(tb)) {
 	shrink(tb, nitems);
     }
 }	
@@ -685,6 +686,7 @@ int db_create_hash(Process *p, DbTable *tbl)
 
     erts_atomic_init_nob(&tb->szm, FIRST_SEGSZ_MASK);
     erts_atomic_init_nob(&tb->nactive, FIRST_SEGSZ);
+    erts_atomic_init_nob(&tb->shrink_limit, 0);
     erts_atomic_init_nob(&tb->fixdel, (erts_aint_t)NULL);
     erts_atomic_init_nob(&tb->segtab, (erts_aint_t)NULL);
     SET_SEGTAB(tb, tb->first_segtab);
@@ -771,7 +773,7 @@ static int db_next_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
     b = next_live(tb, &ix, &lck, b->next);
     if (tb->common.status & (DB_BAG | DB_DUPLICATE_BAG)) {
 	while (b != 0) {
-	    if (!has_live_key(tb, b, key, hval)) {
+	    if (!has_key(tb, b, key, hval)) {
 		break;
 	    }
 	    b = next_live(tb, &ix, &lck, b->next);
@@ -781,6 +783,7 @@ static int db_next_hash(Process *p, DbTable *tbl, Eterm key, Eterm *ret)
 	*ret = am_EOT;
     }
     else {
+        ASSERT(!is_pseudo_deleted(b));
 	*ret = db_copy_key(p, tbl, &b->dbterm);
 	RUNLOCK_HASH(lck);
     }    
@@ -2466,7 +2469,7 @@ static SWord db_free_table_continue_hash(DbTable *tbl, SWord reds)
     erts_atomic_set_relb(&tb->fixdel, (erts_aint_t)NULL);
 
     while(tb->nslots != 0) {
-	reds -= EXT_SEGSZ/64 + free_seg(tb, 1);
+	reds -= EXT_SEGSZ/64 + free_seg(tb);
 
 	/*
 	 * If we have done enough work, get out here.
@@ -2664,6 +2667,34 @@ static struct ext_segtab* alloc_ext_segtab(DbTableHash* tb, unsigned seg_ix)
     return est;
 }
 
+static void calc_shrink_limit(DbTableHash* tb)
+{
+    erts_aint_t shrink_limit;
+
+    if (tb->nslots >= (FIRST_SEGSZ + 2*EXT_SEGSZ)) {
+        /*
+         * Start shrink when we can remove one extra segment
+         * and still remain below 50% load.
+         */
+        shrink_limit = (tb->nslots - EXT_SEGSZ) / 2;
+    }
+    else {
+        /*
+         * But don't shrink below two segments.
+         * Why? In order to have chance of getting rid of the last extra segment,
+         * and rehash it into the first small segment, we either have to start
+         * early and do speculative joining of buckets or we have to join a lot
+         * of buckets during each delete-op.
+         *
+         * Instead keep segment #2 once allocated. I also think it's a good bet
+         * a shrinking large table will grow large again.
+         */
+        shrink_limit = 0;
+    }
+    erts_atomic_set_nob(&tb->shrink_limit, shrink_limit);
+}
+
+
 /* Extend table with one new segment
 */
 static void alloc_seg(DbTableHash *tb)
@@ -2682,8 +2713,17 @@ static void alloc_seg(DbTableHash *tb)
     segtab[seg_ix] = (struct segment*) erts_db_alloc(ERTS_ALC_T_DB_SEG,
                                                      (DbTable *) tb,
                                                      SIZEOF_SEGMENT(EXT_SEGSZ));
-    sys_memset(segtab[seg_ix], 0, SIZEOF_SEGMENT(EXT_SEGSZ));
+#ifdef DEBUG
+    {
+        int i;
+        for (i = 0; i < EXT_SEGSZ; i++) {
+            segtab[seg_ix]->buckets[i] = DBG_BUCKET_INACTIVE;
+        }
+    }
+#endif
     tb->nslots += EXT_SEGSZ;
+
+    calc_shrink_limit(tb);
 }
 
 static void dealloc_ext_segtab(void* lop_data)
@@ -2693,10 +2733,19 @@ static void dealloc_ext_segtab(void* lop_data)
     erts_free(ERTS_ALC_T_DB_SEG, est);
 }
 
-/* Shrink table by freeing the top segment
+struct dealloc_seg_ops {
+    struct segment* segp;
+    Uint seg_sz;
+
+    struct ext_segtab* est;
+};
+
+/* Shrink table by removing the top segment
 ** free_records: 1=free any records in segment, 0=assume segment is empty 
+** ds_ops: (out) Instructions for dealloc_seg().
 */
-static int free_seg(DbTableHash *tb, int free_records)
+static int remove_seg(DbTableHash *tb, int free_records,
+                      struct dealloc_seg_ops *ds_ops)
 {
     const int seg_ix = SLOT_IX_TO_SEG_IX(tb->nslots) - 1;
     struct segment** const segtab = SEGTAB(tb);
@@ -2704,24 +2753,47 @@ static int free_seg(DbTableHash *tb, int free_records)
     Uint seg_sz;
     int nrecords = 0;
 
+    ERTS_LC_ASSERT(IS_TAB_WLOCKED(tb) || tb->common.status & DB_DELETE
+                   || erts_atomic_read_nob(&tb->is_resizing));
+
     ASSERT(segp != NULL);
-#ifndef DEBUG
-    if (free_records)
-#endif
-    {	
-	int i = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
-	while (i--) {
-	    HashDbTerm* p = segp->buckets[i];
+    if (free_records) {
+        int ix, n;
+        if (seg_ix == 0) {
+            /* First segment (always fully active) */
+            n = FIRST_SEGSZ;
+            ix = FIRST_SEGSZ-1;
+        }
+        else if (NACTIVE(tb) < tb->nslots) {
+            /* Last extended segment partially active */
+            n = (NACTIVE(tb) - FIRST_SEGSZ) & EXT_SEGSZ_MASK;
+            ix = (NACTIVE(tb)-1) & EXT_SEGSZ_MASK;
+        }
+        else {
+            /* Full extended segment */
+            n = EXT_SEGSZ;
+            ix = EXT_SEGSZ - 1;
+        }
+        for ( ; n > 0; n--, ix--) {
+	    HashDbTerm* p = segp->buckets[ix & EXT_SEGSZ_MASK];
 	    while(p != 0) {		
 		HashDbTerm* nxt = p->next;
-		ASSERT(free_records); /* segment not empty as assumed? */
 		free_term(tb, p);
 		p = nxt;
 		++nrecords;
 	    }
 	}
     }
-    
+#ifdef DEBUG
+    else {
+        int ix = (seg_ix == 0) ? FIRST_SEGSZ-1 : EXT_SEGSZ-1;
+        for ( ; ix >= 0; ix--) {
+            ASSERT(segp->buckets[ix] == DBG_BUCKET_INACTIVE);
+        }
+    }
+#endif
+
+    ds_ops->est = NULL;
     if (seg_ix >= NSEG_1) {
         struct ext_segtab* est = ErtsContainerStruct_(segtab,struct ext_segtab,segtab);
 
@@ -2730,35 +2802,64 @@ static int free_seg(DbTableHash *tb, int free_records)
             SET_SEGTAB(tb, est->prev_segtab);
             tb->nsegs = est->prev_nsegs;
 
-            if (!tb->common.is_thread_safe) {
-                /*
-                 * Table is doing a graceful shrink operation and we must avoid
-                 * deallocating this segtab while it may still be read by other
-                 * threads. Schedule deallocation with thread progress to make
-                 * sure no lingering threads are still hanging in BUCKET macro
-                 * with an old segtab pointer.
-                 */
-                erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
-                                      est, &est->lop,
-                                      SIZEOF_EXT_SEGTAB(est->nsegs));
-            }
-            else
-                erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
-                             SIZEOF_EXT_SEGTAB(est->nsegs));
+            ds_ops->est = est;
         }
     }
+
     seg_sz = (seg_ix == 0) ? FIRST_SEGSZ : EXT_SEGSZ;
-    erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb, segp, SIZEOF_SEGMENT(seg_sz));
+    tb->nslots -= seg_sz;
+    ASSERT(tb->nslots >= 0);
+
+    ds_ops->segp = segp;
+    ds_ops->seg_sz = seg_sz;
     
 #ifdef DEBUG
     if (seg_ix < tb->nsegs)
         SEGTAB(tb)[seg_ix] = NULL;
 #endif
-    tb->nslots -= seg_sz;
-    ASSERT(tb->nslots >= 0);
+    calc_shrink_limit(tb);
     return nrecords;
 }
 
+/*
+ * Deallocate segment removed by remove_seg()
+ */
+static void dealloc_seg(DbTableHash *tb, struct dealloc_seg_ops* ds_ops)
+{
+    struct ext_segtab* est = ds_ops->est;
+
+    if (est) {
+        if (!tb->common.is_thread_safe) {
+            /*
+             * Table is doing a graceful shrink operation and we must avoid
+             * deallocating this segtab while it may still be read by other
+             * threads. Schedule deallocation with thread progress to make
+             * sure no lingering threads are still hanging in BUCKET macro
+             * with an old segtab pointer.
+             */
+            erts_schedule_db_free(&tb->common, dealloc_ext_segtab,
+                                  est, &est->lop,
+                                  SIZEOF_EXT_SEGTAB(est->nsegs));
+        }
+        else
+            erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable*)tb, est,
+                         SIZEOF_EXT_SEGTAB(est->nsegs));
+    }
+
+    erts_db_free(ERTS_ALC_T_DB_SEG, (DbTable *)tb,
+                 ds_ops->segp, SIZEOF_SEGMENT(ds_ops->seg_sz));
+}
+
+/* Remove and deallocate top segment and all its contained objects */
+static int free_seg(DbTableHash *tb)
+{
+    struct dealloc_seg_ops ds_ops;
+    int reds;
+
+    reds = remove_seg(tb, 1, &ds_ops);
+    dealloc_seg(tb, &ds_ops);
+    return reds;
+}
 
 /*
 ** Copy terms from ptr1 until ptr2
@@ -2880,6 +2981,7 @@ static void grow(DbTableHash* tb, int nitems)
         pnext = &BUCKET(tb, from_ix);
         p = *pnext;
         to_pnext = &BUCKET(tb, to_ix);
+        ASSERT(*to_pnext == DBG_BUCKET_INACTIVE);
         while (p != NULL) {
             if (is_pseudo_deleted(p)) { /* rare but possible with fine locking */
                 *pnext = p->next;
@@ -2916,19 +3018,21 @@ abort:
 */
 static void shrink(DbTableHash* tb, int nitems)
 {
-    HashDbTerm** src_bp;
-    HashDbTerm** dst_bp;
+    struct dealloc_seg_ops ds_ops;
+    HashDbTerm* src;
+    HashDbTerm* tail;
     HashDbTerm** bp;
     erts_rwmtx_t* lck;
     int src_ix, dst_ix, low_szm;
     int nactive;
     int loop_limit = 5;
 
+    ds_ops.segp = NULL;
     do {
         if (!begin_resizing(tb))
             return; /* already in progress */
         nactive = NACTIVE(tb);
-        if (!(nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive))) {
+        if (!(nitems < SHRINK_LIMIT(tb))) {
             goto abort; /* already done (race) */
         }
         src_ix = nactive - 1;
@@ -2945,41 +3049,49 @@ static void shrink(DbTableHash* tb, int nitems)
             goto abort;
         }
 
-        src_bp = &BUCKET(tb, src_ix);
-        dst_bp = &BUCKET(tb, dst_ix);
-        bp = src_bp;
-
-        /*
-         * We join lists by appending "dst" at the end of "src"
-         * as we must step through "src" anyway to purge pseudo deleted.
-         */
-        while(*bp != NULL) {
-            if (is_pseudo_deleted(*bp)) {
-                HashDbTerm* deleted = *bp;
-                *bp = deleted->next;
-                free_term(tb, deleted);
-            } else {
-                bp = &(*bp)->next;
-            }
-        }
-        *bp = *dst_bp;
-        *dst_bp = *src_bp;
-        *src_bp = NULL;
-
+        src = BUCKET(tb, src_ix);
+#ifdef DEBUG
+        BUCKET(tb, src_ix) = DBG_BUCKET_INACTIVE;
+#endif
         nactive = src_ix;
         erts_atomic_set_nob(&tb->nactive, nactive);
         if (dst_ix == 0) {
             erts_atomic_set_relb(&tb->szm, low_szm);
         }
-        WUNLOCK_HASH(lck);
-
         if (tb->nslots - src_ix >= EXT_SEGSZ) {
-            free_seg(tb, 0);
+            remove_seg(tb, 0, &ds_ops);
         }
         done_resizing(tb);
 
-    } while (--loop_limit
-             && nactive > FIRST_SEGSZ && nitems < SHRINK_LIMIT(nactive));
+        if (src) {
+            /*
+             * We join buckets by appending "dst" list at the end of "src" list
+             * as we must step through "src" anyway to purge pseudo deleted.
+             */
+            bp = &BUCKET(tb, dst_ix);
+            tail = *bp;
+            *bp = src;
+
+            while(*bp != NULL) {
+                if (is_pseudo_deleted(*bp)) {
+                    HashDbTerm* deleted = *bp;
+                    *bp = deleted->next;
+                    free_term(tb, deleted);
+                } else {
+                    bp = &(*bp)->next;
+                }
+            }
+            *bp = tail;
+        }
+
+        WUNLOCK_HASH(lck);
+
+        if (ds_ops.segp) {
+            dealloc_seg(tb, &ds_ops);
+            ds_ops.segp = NULL;
+        }
+
+    } while (--loop_limit && nitems < SHRINK_LIMIT(tb));
     return;
 
 abort:
diff --git a/erts/emulator/beam/erl_db_hash.h b/erts/emulator/beam/erl_db_hash.h
index 9759d8b466..b26b82056f 100644
--- a/erts/emulator/beam/erl_db_hash.h
+++ b/erts/emulator/beam/erl_db_hash.h
@@ -63,9 +63,10 @@ typedef struct db_table_hash_fine_locks {
 typedef struct db_table_hash {
     DbTableCommon common;
 
-    /* SMP: szm and nactive are write-protected by is_resizing or table write lock */
+    /* szm, nactive, shrink_limit are write-protected by is_resizing or table write lock */
     erts_atomic_t szm;     /* current size mask. */
     erts_atomic_t nactive; /* Number of "active" slots */
+    erts_atomic_t shrink_limit; /* Shrink table when fewer objects than this */
 
     erts_atomic_t segtab;  /* The segment table (struct segment**) */
     struct segment* first_segtab[1];
diff --git a/erts/emulator/beam/erl_db_util.c b/erts/emulator/beam/erl_db_util.c
index 1ea7074d21..6a48f5c74e 100644
--- a/erts/emulator/beam/erl_db_util.c
+++ b/erts/emulator/beam/erl_db_util.c
@@ -2612,7 +2612,10 @@ restart:
 	    break;
         case matchCaller:
             ASSERT(c_p == self);
-	    if (!(c_p->cp) || !(cp = find_function_from_pc(c_p->cp))) {
+            t = c_p->stop[0];
+            if (is_not_CP(t)) {
+                *esp++ = am_undefined;
+            } else if (!(cp = find_function_from_pc(cp_val(t)))) {
  		*esp++ = am_undefined;
  	    } else {
 		ehp = HAllocX(build_proc, 4, HEAP_XTRA);
@@ -5226,7 +5229,7 @@ static Eterm match_spec_test(Process *p, Eterm against, Eterm spec, int trace)
     Eterm l;
     Uint32 ret_flags;
     Uint sz;
-    BeamInstr *save_cp;
+    Eterm save_cp;
 
     if (trace && !(is_list(against) || against == NIL)) {
 	return THE_NON_VALUE;
@@ -5270,13 +5273,13 @@ static Eterm match_spec_test(Process *p, Eterm against, Eterm spec, int trace)
 		++n;
 		l = CDR(list_val(l));
 	    }
-	    save_cp = p->cp;
-	    p->cp = NULL;
+	    save_cp = p->stop[0];
+	    p->stop[0] = NIL;
 	    res = erts_match_set_run_trace(p, p,
                       mps, arr, n,
 		      ERTS_PAM_COPY_RESULT|ERTS_PAM_IGNORE_TRACE_SILENT,
 		      &ret_flags);
-	    p->cp = save_cp;
+	    p->stop[0] = save_cp;
 	} else {
 	    n = 0;
 	    arr = NULL;
diff --git a/erts/emulator/beam/erl_fun.c b/erts/emulator/beam/erl_fun.c
index 9c866250bb..79a1fdb8b9 100644
--- a/erts/emulator/beam/erl_fun.c
+++ b/erts/emulator/beam/erl_fun.c
@@ -100,27 +100,6 @@ int erts_fun_table_sz(void)
 }
 
 ErlFunEntry*
-erts_put_fun_entry(Eterm mod, int uniq, int index)
-{
-    ErlFunEntry template;
-    ErlFunEntry* fe;
-    erts_aint_t refc;
-    ASSERT(is_atom(mod));
-    template.old_uniq = uniq;
-    template.old_index = index;
-    template.module = mod;
-    erts_fun_write_lock();
-    fe = (ErlFunEntry *) hash_put(&erts_fun_table, (void*) &template);
-    sys_memset(fe->uniq, 0, sizeof(fe->uniq));
-    fe->index = 0;
-    refc = erts_refc_inctest(&fe->refc, 0);
-    if (refc < 2) /* New or pending delete */
-	erts_refc_inc(&fe->refc, 1);
-    erts_fun_write_unlock();
-    return fe;
-}
-
-ErlFunEntry*
 erts_put_fun_entry2(Eterm mod, int old_uniq, int old_index,
 		    byte* uniq, int index, int arity)
 {
@@ -130,12 +109,12 @@ erts_put_fun_entry2(Eterm mod, int old_uniq, int old_index,
 
     ASSERT(is_atom(mod));
     template.old_uniq = old_uniq;
-    template.old_index = old_index;
+    template.index = index;
     template.module = mod;
     erts_fun_write_lock();
     fe = (ErlFunEntry *) hash_put(&erts_fun_table, (void*) &template);
     sys_memcpy(fe->uniq, uniq, sizeof(fe->uniq));
-    fe->index = index;
+    fe->old_index = old_index;
     fe->arity = arity;
     refc = erts_refc_inctest(&fe->refc, 0);
     if (refc < 2) /* New or pending delete */
@@ -144,13 +123,6 @@ erts_put_fun_entry2(Eterm mod, int old_uniq, int old_index,
     return fe;
 }
 
-struct my_key {
-    Eterm mod;
-    byte* uniq;
-    int index;
-    ErlFunEntry* fe;
-};
-
 ErlFunEntry*
 erts_get_fun_entry(Eterm mod, int uniq, int index)
 {
@@ -159,7 +131,7 @@ erts_get_fun_entry(Eterm mod, int uniq, int index)
 
     ASSERT(is_atom(mod));
     template.old_uniq = uniq;
-    template.old_index = index;
+    template.index = index;
     template.module = mod;
     erts_fun_read_lock();
     ret = (ErlFunEntry *) hash_get(&erts_fun_table, (void*) &template);
@@ -199,36 +171,33 @@ erts_erase_fun_entry(ErlFunEntry* fe)
     erts_fun_write_unlock();
 }
 
+struct fun_purge_foreach_args {
+    BeamInstr *start;
+    BeamInstr *end;
+};
+
+static void fun_purge_foreach(ErlFunEntry *fe, struct fun_purge_foreach_args *arg)
+{
+    BeamInstr* addr = fe->address;
+    if (arg->start <= addr && addr < arg->end) {
+        fe->pend_purge_address = addr;
+        ERTS_THR_WRITE_MEMORY_BARRIER;
+        fe->address = unloaded_fun;
+#ifdef HIPE
+        fe->pend_purge_native_address = fe->native_address;
+        hipe_set_closure_stub(fe);
+#endif
+        erts_purge_state_add_fun(fe);
+    }
+}
+
 void
 erts_fun_purge_prepare(BeamInstr* start, BeamInstr* end)
 {
-    int limit;
-    HashBucket** bucket;
-    int i;
+    struct fun_purge_foreach_args args = {start, end};
 
     erts_fun_read_lock();
-    limit = erts_fun_table.size;
-    bucket = erts_fun_table.bucket;
-    for (i = 0; i < limit; i++) {
-	HashBucket* b = bucket[i];
-
-	while (b) {
-	    ErlFunEntry* fe = (ErlFunEntry *) b;
-	    BeamInstr* addr = fe->address;
-
-	    if (start <= addr && addr < end) {
-		fe->pend_purge_address = addr;
-		ERTS_THR_WRITE_MEMORY_BARRIER;
-		fe->address = unloaded_fun;
-#ifdef HIPE
-                fe->pend_purge_native_address = fe->native_address;
-                hipe_set_closure_stub(fe);
-#endif
-		erts_purge_state_add_fun(fe);
-	    }
-	    b = b->next;
-	}
-    }
+    hash_foreach(&erts_fun_table, (HFOREACH_FUN)fun_purge_foreach, &args);
     erts_fun_read_unlock();
 }
 
@@ -278,36 +247,34 @@ erts_fun_purge_complete(ErlFunEntry **funs, Uint no)
     ERTS_THR_WRITE_MEMORY_BARRIER;
 }
 
+struct dump_fun_foreach_args {
+    fmtfn_t to;
+    void *to_arg;
+};
+
+static void
+dump_fun_foreach(ErlFunEntry *fe, struct dump_fun_foreach_args *args)
+{
+    erts_print(args->to, args->to_arg, "=fun\n");
+    erts_print(args->to, args->to_arg, "Module: %T\n", fe->module);
+    erts_print(args->to, args->to_arg, "Uniq: %d\n", fe->old_uniq);
+    erts_print(args->to, args->to_arg, "Index: %d\n",fe->old_index);
+    erts_print(args->to, args->to_arg, "Address: %p\n", fe->address);
+#ifdef HIPE
+    erts_print(args->to, args->to_arg, "Native_address: %p\n", fe->native_address);
+#endif
+    erts_print(args->to, args->to_arg, "Refc: %ld\n", erts_refc_read(&fe->refc, 1));
+}
+
 void
 erts_dump_fun_entries(fmtfn_t to, void *to_arg)
 {
-    int limit;
-    HashBucket** bucket;
-    int i;
+    struct dump_fun_foreach_args args = {to, to_arg};
     int lock = !ERTS_IS_CRASH_DUMPING;
 
-
     if (lock)
 	erts_fun_read_lock();
-    limit = erts_fun_table.size;
-    bucket = erts_fun_table.bucket;
-    for (i = 0; i < limit; i++) {
-	HashBucket* b = bucket[i];
-
-	while (b) {
-	    ErlFunEntry* fe = (ErlFunEntry *) b;
-	    erts_print(to, to_arg, "=fun\n");
-	    erts_print(to, to_arg, "Module: %T\n", fe->module);
-	    erts_print(to, to_arg, "Uniq: %d\n", fe->old_uniq);
-	    erts_print(to, to_arg, "Index: %d\n",fe->old_index);
-	    erts_print(to, to_arg, "Address: %p\n", fe->address);
-#ifdef HIPE
-	    erts_print(to, to_arg, "Native_address: %p\n", fe->native_address);
-#endif
-	    erts_print(to, to_arg, "Refc: %ld\n", erts_refc_read(&fe->refc, 1));
-	    b = b->next;
-	}
-    }
+    hash_foreach(&erts_fun_table, (HFOREACH_FUN)dump_fun_foreach, &args);
     if (lock)
 	erts_fun_read_unlock();
 }
@@ -315,15 +282,27 @@ erts_dump_fun_entries(fmtfn_t to, void *to_arg)
 static HashValue
 fun_hash(ErlFunEntry* obj)
 {
-    return (HashValue) (obj->old_uniq ^ obj->old_index ^ atom_val(obj->module));
+    return (HashValue) (obj->old_uniq ^ obj->index ^ atom_val(obj->module));
 }
 
 static int
 fun_cmp(ErlFunEntry* obj1, ErlFunEntry* obj2)
 {
-    return !(obj1->module == obj2->module && 
+    /*
+     * OTP 23: Use 'index' (instead of 'old_index') when comparing fun
+     * entries. In OTP 23, multiple make_fun2 instructions may refer to the
+     * the same 'index' (for the wrapper function generated for the
+     * 'fun F/A' syntax).
+     *
+     * This is safe when loading code compiled with OTP R15 and later,
+     * because since R15 (2011), the 'index' has been reliably equal
+     * to 'old_index'. The loader refuses to load modules compiled before
+     * OTP R15.
+     */
+
+    return !(obj1->module == obj2->module &&
 	     obj1->old_uniq == obj2->old_uniq &&
-	     obj1->old_index == obj2->old_index);
+	     obj1->index == obj2->index);
 }
 
 static ErlFunEntry*
@@ -333,7 +312,7 @@ fun_alloc(ErlFunEntry* template)
 						  sizeof(ErlFunEntry));
 
     obj->old_uniq = template->old_uniq;
-    obj->old_index = template->old_index;
+    obj->index = template->index;
     obj->module = template->module;
     erts_refc_init(&obj->refc, -1);
     obj->address = unloaded_fun;
diff --git a/erts/emulator/beam/erl_fun.h b/erts/emulator/beam/erl_fun.h
index fb2901d866..eefc7a95bb 100644
--- a/erts/emulator/beam/erl_fun.h
+++ b/erts/emulator/beam/erl_fun.h
@@ -74,7 +74,6 @@ void erts_init_fun_table(void);
 void erts_fun_info(fmtfn_t, void *);
 int erts_fun_table_sz(void);
 
-ErlFunEntry* erts_put_fun_entry(Eterm mod, int uniq, int index);
 ErlFunEntry* erts_get_fun_entry(Eterm mod, int uniq, int index);
 
 ErlFunEntry* erts_put_fun_entry2(Eterm mod, int old_uniq, int old_index,
diff --git a/erts/emulator/beam/erl_gc.c b/erts/emulator/beam/erl_gc.c
index 13b1f8ab4d..f387960b08 100644
--- a/erts/emulator/beam/erl_gc.c
+++ b/erts/emulator/beam/erl_gc.c
@@ -65,6 +65,8 @@
 #  define HARDDEBUG 1
 #endif
 
+extern BeamInstr beam_apply[2];
+
 /*
  * Returns number of elements in an array.
  */
@@ -934,13 +936,15 @@ garbage_collect_hibernate(Process* p, int check_long_gc)
      */
     erts_atomic32_read_bor_nob(&p->state, ERTS_PSFLG_GC);
     ErtsGcQuickSanityCheck(p);
-    ASSERT(p->stop == p->hend);	/* Stack must be empty. */
+    ASSERT(p->stop == p->hend - 1); /* Only allow one continuation pointer. */
+    ASSERT(p->stop[0] == make_cp(beam_apply+1));
 
     /*
      * Do it.
      */
 
     heap_size = p->heap_sz + (p->old_htop - p->old_heap) + p->mbuf_sz;
+    heap_size += 1;             /* Reserve place for continuation pointer */
 
     heap = (Eterm*) ERTS_HEAP_ALLOC(ERTS_ALC_T_TMP_HEAP,
 				    sizeof(Eterm)*heap_size);
@@ -966,13 +970,11 @@ garbage_collect_hibernate(Process* p, int check_long_gc)
     p->high_water = htop;
     p->htop = htop;
     p->hend = p->heap + heap_size;
-    p->stop = p->hend;
+    p->stop = p->hend - 1;
     p->heap_sz = heap_size;
 
     heap_size = actual_size = p->htop - p->heap;
-    if (heap_size == 0) {
-	heap_size = 1; /* We want a heap... */
-    }
+    heap_size += 1;             /* Reserve place for continuation pointer */
 
     FLAGS(p) &= ~F_FORCE_GC;
     p->live_hf_end = ERTS_INVALID_HFRAG_PTR;
@@ -988,14 +990,15 @@ garbage_collect_hibernate(Process* p, int check_long_gc)
      * hibernated.
      */
 
-    ASSERT(p->hend - p->stop == 0); /* Empty stack */
     ASSERT(actual_size < p->heap_sz);
 
     heap = ERTS_HEAP_ALLOC(ERTS_ALC_T_HEAP, sizeof(Eterm)*heap_size);
     sys_memcpy((void *) heap, (void *) p->heap, actual_size*sizeof(Eterm));
     ERTS_HEAP_FREE(ERTS_ALC_T_TMP_HEAP, p->heap, p->heap_sz*sizeof(Eterm));
 
-    p->stop = p->hend = heap + heap_size;
+    p->hend = heap + heap_size;
+    p->stop = p->hend - 1;
+    p->stop[0] = make_cp(beam_apply+1);
 
     offs = heap - p->heap;
     area = (char *) p->heap;
diff --git a/erts/emulator/beam/erl_message.c b/erts/emulator/beam/erl_message.c
index 1bebf6efe2..42a07a59d6 100644
--- a/erts/emulator/beam/erl_message.c
+++ b/erts/emulator/beam/erl_message.c
@@ -674,7 +674,7 @@ erts_send_message(Process* sender,
          * Make sure we don't use the heap between those instances.
          */
         if (have_seqtrace(stoken)) {
-	    seq_trace_update_send(sender);
+	    seq_trace_update_serial(sender);
 	    seq_trace_output(stoken, message, SEQ_TRACE_SEND,
 			     receiver->common.id, sender);
 
diff --git a/erts/emulator/beam/erl_nfunc_sched.c b/erts/emulator/beam/erl_nfunc_sched.c
index b8cf2bee0e..b2658ef180 100644
--- a/erts/emulator/beam/erl_nfunc_sched.c
+++ b/erts/emulator/beam/erl_nfunc_sched.c
@@ -67,7 +67,7 @@ erts_destroy_nif_export(Process *p)
 
 void
 erts_nif_export_save_trace(Process *c_p, NifExport *nep, int applying,
-			   Export* ep, BeamInstr *cp, Uint32 flags,
+			   Export* ep, Uint32 flags,
 			   Uint32 flags_meta, BeamInstr* I,
 			   ErtsTracer meta_tracer)
 {
@@ -78,7 +78,6 @@ erts_nif_export_save_trace(Process *c_p, NifExport *nep, int applying,
 		      sizeof(NifExportTrace));
     netp->applying = applying;
     netp->ep = ep;
-    netp->cp = cp;
     netp->flags = flags;
     netp->flags_meta = flags_meta;
     netp->I = I;
@@ -93,7 +92,7 @@ erts_nif_export_restore_trace(Process *c_p, Eterm result, NifExport *nep)
     NifExportTrace *netp = nep->trace;
     nep->trace = NULL;
     erts_bif_trace_epilogue(c_p, result, netp->applying, netp->ep,
-			    netp->cp, netp->flags, netp->flags_meta,
+			    netp->flags, netp->flags_meta,
 			    netp->I, netp->meta_tracer);
     erts_tracer_update(&netp->meta_tracer, NIL);
     erts_free(ERTS_ALC_T_NIF_EXP_TRACE, netp);
@@ -148,7 +147,6 @@ erts_nif_export_schedule(Process *c_p, Process *dirty_shadow_proc,
 	for (i = 0; i < (int) mfa->arity; i++)
 	    nep->argv[i] = reg[i];
 	nep->pc = pc;
-	nep->cp = c_p->cp;
 	nep->mfa = mfa;
 	nep->current = c_p->current;
 	ASSERT(argc >= 0);
diff --git a/erts/emulator/beam/erl_nfunc_sched.h b/erts/emulator/beam/erl_nfunc_sched.h
index 1cb252eba5..5c6486cbb8 100644
--- a/erts/emulator/beam/erl_nfunc_sched.h
+++ b/erts/emulator/beam/erl_nfunc_sched.h
@@ -28,7 +28,6 @@
 typedef struct {
     int applying;
     Export* ep;
-    BeamInstr *cp;
     Uint32 flags;
     Uint32 flags_meta;
     BeamInstr* I;
@@ -53,7 +52,6 @@ typedef struct {
     NifExportTrace *trace;
     /* --- The following is only used on error --- */
     BeamInstr *pc;	/* Program counter */
-    BeamInstr *cp;	/* Continuation pointer */
     ErtsCodeMFA *mfa;	/* MFA of original call */
     int argc;		/* Number of arguments in original call */
     int argv_size;	/* Allocated size of argv */
@@ -62,7 +60,7 @@ typedef struct {
 
 NifExport *erts_new_proc_nif_export(Process *c_p, int argc);
 void erts_nif_export_save_trace(Process *c_p, NifExport *nep, int applying,
-				Export* ep, BeamInstr *cp, Uint32 flags,
+				Export* ep, Uint32 flags,
 				Uint32 flags_meta, BeamInstr* I,
 				ErtsTracer meta_tracer);
 void erts_nif_export_restore_trace(Process *c_p, Eterm result, NifExport *nep);
@@ -85,7 +83,7 @@ ERTS_GLB_INLINE void erts_nif_export_restore_error(Process* c_p, BeamInstr **pc,
 						   Eterm *reg, ErtsCodeMFA **nif_mfa);
 ERTS_GLB_INLINE int erts_nif_export_check_save_trace(Process *c_p, Eterm result,
 						     int applying, Export* ep,
-						     BeamInstr *cp, Uint32 flags,
+						     Uint32 flags,
 						     Uint32 flags_meta, BeamInstr* I,
 						     ErtsTracer meta_tracer);
 ERTS_GLB_INLINE Process *erts_proc_shadow2real(Process *c_p);
@@ -131,8 +129,6 @@ erts_check_nif_export_in_area(Process *p, char *start, Uint size)
 	return 0;
     if (ErtsInArea(nep->pc, start, size))
 	return 1;
-    if (ErtsInArea(nep->cp, start, size))
-	return 1;
     if (ErtsInArea(nep->mfa, start, size))
 	return 1;
     if (ErtsInArea(nep->current, start, size))
@@ -164,7 +160,6 @@ erts_nif_export_restore_error(Process* c_p, BeamInstr **pc,
 
     ASSERT(nep);
     *pc = nep->pc;
-    c_p->cp = nep->cp;
     *nif_mfa = nep->mfa;
     for (ix = 0; ix < nep->argc; ix++)
 	reg[ix] = nep->argv[ix];
@@ -174,7 +169,7 @@ erts_nif_export_restore_error(Process* c_p, BeamInstr **pc,
 ERTS_GLB_INLINE int
 erts_nif_export_check_save_trace(Process *c_p, Eterm result,
 				 int applying, Export* ep,
-				 BeamInstr *cp, Uint32 flags,
+				 Uint32 flags,
 				 Uint32 flags_meta, BeamInstr* I,
 				 ErtsTracer meta_tracer)
 {
@@ -182,7 +177,7 @@ erts_nif_export_check_save_trace(Process *c_p, Eterm result,
 	NifExport *nep = ERTS_PROC_GET_NIF_TRAP_EXPORT(c_p);
 	if (nep && nep->argc >= 0) {
 	    erts_nif_export_save_trace(c_p, nep, applying, ep,
-				       cp, flags, flags_meta,
+				       flags, flags_meta,
 				       I, meta_tracer);
 	    return 1;
 	}
diff --git a/erts/emulator/beam/erl_nif.c b/erts/emulator/beam/erl_nif.c
index 1fbe362330..46f7e864fd 100644
--- a/erts/emulator/beam/erl_nif.c
+++ b/erts/emulator/beam/erl_nif.c
@@ -334,7 +334,7 @@ schedule(ErlNifEnv* env, NativeFunPtr direct_fp, NativeFunPtr indirect_fp,
 
     ep = erts_nif_export_schedule(c_p, dirty_shadow_proc,
 				  c_p->current,
-				  c_p->cp,
+                                  cp_val(c_p->stop[0]),
 				  BeamOpCodeAddr(op_call_nif),
 				  direct_fp, indirect_fp,
 				  mod, func_name,
@@ -815,7 +815,7 @@ int enif_send(ErlNifEnv* env, const ErlNifPid* to_pid,
             }
 #endif
             if (have_seqtrace(stoken)) {
-                seq_trace_update_send(c_p);
+                seq_trace_update_serial(c_p);
                 seq_trace_output(stoken, msg, SEQ_TRACE_SEND,
                                  rp->common.id, c_p);
             }
@@ -4117,7 +4117,6 @@ static struct erl_module_nif* create_lib(const ErlNifEntry* src)
     return lib;
 };
 
-
 BIF_RETTYPE load_nif_2(BIF_ALIST_2)
 {
     static const char bad_lib[] = "bad_lib";
@@ -4140,6 +4139,7 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
     struct erl_module_nif* lib = NULL;
     struct erl_module_instance* this_mi;
     struct erl_module_instance* prev_mi;
+    BeamInstr* caller_cp;
 
     if (BIF_P->flags & F_HIPE_MODE) {
 	ret = load_nif_error(BIF_P, "notsup", "Calling load_nif from HiPE compiled "
@@ -4175,7 +4175,8 @@ BIF_RETTYPE load_nif_2(BIF_ALIST_2)
     ASSERT(BIF_P->current->module == am_erlang
 	   && BIF_P->current->function == am_load_nif 
 	   && BIF_P->current->arity == 2);
-    caller = find_function_from_pc(BIF_P->cp);
+    caller_cp = cp_val(BIF_P->stop[0]);
+    caller = find_function_from_pc(caller_cp);
     ASSERT(caller != NULL);
     mod_atom = caller->module;
     ASSERT(is_atom(mod_atom));
diff --git a/erts/emulator/beam/erl_node_tables.c b/erts/emulator/beam/erl_node_tables.c
index 50e9812534..285252a53e 100644
--- a/erts/emulator/beam/erl_node_tables.c
+++ b/erts/emulator/beam/erl_node_tables.c
@@ -977,7 +977,7 @@ static void print_node(void *venp, void *vpndp)
 	if(pndp->sysname == NIL) {
 	    erts_print(pndp->to, pndp->to_arg, "Name: %T ", enp->sysname);
 	}
-	erts_print(pndp->to, pndp->to_arg, " %d", enp->creation);
+	erts_print(pndp->to, pndp->to_arg, " %u", enp->creation);
 #ifdef DEBUG
 	erts_print(pndp->to, pndp->to_arg, " (refc=%ld)",
 		   erts_refc_read(&enp->refc, 0));
@@ -1020,7 +1020,7 @@ void erts_print_node_info(fmtfn_t to,
 /* ----------------------------------------------------------------------- */
 
 void
-erts_set_this_node(Eterm sysname, Uint creation)
+erts_set_this_node(Eterm sysname, Uint32 creation)
 {
     ERTS_LC_ASSERT(erts_thr_progress_is_blocking());
     ASSERT(2 <= de_refc_read(erts_this_dist_entry, 2));
diff --git a/erts/emulator/beam/erl_node_tables.h b/erts/emulator/beam/erl_node_tables.h
index ffaafbbbea..beae2df75f 100644
--- a/erts/emulator/beam/erl_node_tables.h
+++ b/erts/emulator/beam/erl_node_tables.h
@@ -264,7 +264,7 @@ void erts_set_dist_entry_pending(DistEntry *);
 void erts_set_dist_entry_connected(DistEntry *, Eterm, Uint);
 ErlNode *erts_find_or_insert_node(Eterm, Uint32, Eterm);
 void erts_schedule_delete_node(ErlNode *);
-void erts_set_this_node(Eterm, Uint);
+void erts_set_this_node(Eterm, Uint32);
 Uint erts_node_table_size(void);
 void erts_init_node_tables(int);
 void erts_node_table_info(fmtfn_t, void *);
diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c
index d5e0e3b218..b60fb64342 100644
--- a/erts/emulator/beam/erl_proc_sig_queue.c
+++ b/erts/emulator/beam/erl_proc_sig_queue.c
@@ -995,7 +995,7 @@ send_gen_exit_signal(Process *c_p, Eterm from_tag,
 
     seq_trace = c_p && have_seqtrace(token);
     if (seq_trace)
-        seq_trace_update_send(c_p);
+        seq_trace_update_serial(c_p);
 
 #ifdef USE_VM_PROBES
     utag_sz = 0;
diff --git a/erts/emulator/beam/erl_process.c b/erts/emulator/beam/erl_process.c
index 1c1ef1db84..4b4337ce17 100644
--- a/erts/emulator/beam/erl_process.c
+++ b/erts/emulator/beam/erl_process.c
@@ -10992,8 +10992,13 @@ erts_set_gc_state(Process *c_p, int enable)
     ERTS_LC_ASSERT(ERTS_PROC_LOCK_MAIN == erts_proc_lc_my_proc_locks(c_p));
 
     if (!enable) {
-	c_p->flags |= F_DISABLE_GC;
-	return 0;
+        /* Strictly speaking it's not illegal to disable the GC when it's
+         * already disabled, but we risk enabling the GC prematurely if (for
+         * example) a BIF were to blindly disable it when trapping and then
+         * re-enable it before returning its result. */
+        ASSERT(!(c_p->flags & F_DISABLE_GC));
+        c_p->flags |= F_DISABLE_GC;
+        return 0;
     }
 
     c_p->flags &= ~F_DISABLE_GC;
@@ -11453,7 +11458,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
 #else
     arg_size = size_object_litopt(args, &litarea);
 #endif
-    heap_need = arg_size;
+    heap_need = arg_size + 1;   /* Reserve place for continuation pointer */
 
     p->flags = flags;
 
@@ -11502,7 +11507,8 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->old_hend = p->old_htop = p->old_heap = NULL;
     p->high_water = p->heap;
     p->gen_gcs = 0;
-    p->stop = p->hend = p->heap + sz;
+    p->hend = p->heap + sz;
+    p->stop = p->hend - 1;     /* Reserve place for continuation pointer */
     p->htop = p->heap;
     p->heap_sz = sz;
     p->abandoned_heap = NULL;
@@ -11520,7 +11526,7 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->current = &p->u.initial;
 
     p->i = (BeamInstr *) beam_apply;
-    p->cp = (BeamInstr *) beam_apply+1;
+    p->stop[0] = make_cp(beam_apply + 1);
 
     p->arg_reg = p->def_arg_reg;
     p->max_arg_reg = sizeof(p->def_arg_reg)/sizeof(p->def_arg_reg[0]);
@@ -11583,9 +11589,6 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->mbuf_sz = 0;
     erts_atomic_init_nob(&p->psd, (erts_aint_t) NULL);
     p->dictionary = NULL;
-    p->seq_trace_lastcnt = 0;
-    p->seq_trace_clock = 0;
-    SEQ_TRACE_TOKEN(p) = NIL;
 #ifdef USE_VM_PROBES
     DT_UTAG(p) = NIL;
     DT_UTAG_FLAGS(p) = 0;
@@ -11606,6 +11609,45 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
     p->fp_exception = 0;
 #endif
 
+    /* seq_trace is handled before regular tracing as the latter may touch the
+     * trace token. */
+    if (have_seqtrace(SEQ_TRACE_TOKEN(parent))) {
+        Eterm token;
+        Uint token_sz;
+        Eterm *hp;
+
+        ASSERT(SEQ_TRACE_TOKEN_ARITY(parent) == 5);
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_FLAGS(parent)));
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_SERIAL(parent)));
+        ASSERT(is_immed(SEQ_TRACE_TOKEN_LASTCNT(parent)));
+
+        seq_trace_update_serial(parent);
+
+        token = SEQ_TRACE_TOKEN(parent);
+        token_sz = size_object(token);
+
+        hp = HAlloc(p, token_sz);
+        SEQ_TRACE_TOKEN(p) = copy_struct(token, token_sz, &hp, &MSO(p));
+
+        /* The counters behave the same way on spawning as they do on messages;
+         * we don't inherit our parent's lastcnt. */
+        p->seq_trace_lastcnt = parent->seq_trace_clock;
+        p->seq_trace_clock = parent->seq_trace_clock;
+
+        ASSERT((locks & (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) ==
+               (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE));
+
+        locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+        erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+        erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+
+        seq_trace_output(token, NIL, SEQ_TRACE_SPAWN, p->common.id, parent);
+    } else {
+        SEQ_TRACE_TOKEN(p) = NIL;
+        p->seq_trace_lastcnt = 0;
+        p->seq_trace_clock = 0;
+    }
+
     if (IS_TRACED(parent)) {
 	if (ERTS_TRACE_FLAGS(parent) & F_TRACE_SOS) {
 	    ERTS_TRACE_FLAGS(p) |= (ERTS_TRACE_FLAGS(parent) & TRACEE_FLAGS);
@@ -11627,9 +11669,14 @@ erl_create_process(Process* parent, /* Parent of process (default group leader).
 		}
         }
         if (ARE_TRACE_FLAGS_ON(parent, F_TRACE_PROCS)) {
-            locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
-            erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
-            erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+            /* The locks may already be released if seq_trace is enabled as
+             * well. */
+            if ((locks & (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE))
+                  == (ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE)) {
+                locks &= ~(ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+                erts_proc_unlock(p, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+                erts_proc_unlock(parent, ERTS_PROC_LOCK_STATUS|ERTS_PROC_LOCK_TRACE);
+            }
             trace_proc_spawn(parent, am_spawn, p->common.id, mod, func, args);
             if (so->flags & SPO_LINK)
                 trace_proc(parent, locks, parent, am_link, p->common.id);
@@ -11793,7 +11840,6 @@ void erts_init_empty_process(Process *p)
     p->u.initial.function = 0;
     p->u.initial.arity = 0;
     p->catches = 0;
-    p->cp = NULL;
     p->i = NULL;
     p->current = NULL;
 
@@ -11871,7 +11917,6 @@ erts_debug_verify_clean_empty_process(Process* p)
     ASSERT(p->bif_timers == NULL);
     ASSERT(p->dictionary == NULL);
     ASSERT(p->catches == 0);
-    ASSERT(p->cp == NULL);
     ASSERT(p->i == NULL);
     ASSERT(p->current == NULL);
 
@@ -13100,9 +13145,6 @@ erts_program_counter_info(fmtfn_t to, void *to_arg, Process *p)
     erts_print(to, to_arg, "Program counter: %p (", p->i);
     print_function_from_pc(to, to_arg, p->i);
     erts_print(to, to_arg, ")\n");
-    erts_print(to, to_arg, "CP: %p (", p->cp);
-    print_function_from_pc(to, to_arg, p->cp);
-    erts_print(to, to_arg, ")\n");
     state = erts_atomic32_read_acqb(&p->state);
     if (!(state & (ERTS_PSFLG_RUNNING
 		   | ERTS_PSFLG_RUNNING_SYS
@@ -13379,9 +13421,6 @@ static void print_current_process_info(fmtfn_t to, void *to_arg,
 	erts_print(to, to_arg, "Current Process Program counter: %p (", p->i);
 	print_function_from_pc(to, to_arg, p->i);
 	erts_print(to, to_arg, ")\n");
-	erts_print(to, to_arg, "Current Process CP: %p (", p->cp);
-	print_function_from_pc(to, to_arg, p->cp);
-	erts_print(to, to_arg, ")\n");
 
 	/* Getting this stacktrace can segfault if we are very very
 	   unlucky if called while a process is being garbage collected.
diff --git a/erts/emulator/beam/erl_process.h b/erts/emulator/beam/erl_process.h
index c0d7cfd13d..a7a6528f92 100644
--- a/erts/emulator/beam/erl_process.h
+++ b/erts/emulator/beam/erl_process.h
@@ -975,7 +975,6 @@ struct process {
     unsigned max_arg_reg;	/* Maximum number of argument registers available. */
     Eterm def_arg_reg[6];	/* Default array for argument registers. */
 
-    BeamInstr* cp;		/* (untagged) Continuation pointer (for threaded code). */
     BeamInstr* i;		/* Program counter for threaded code. */
     Sint catches;		/* Number of catches on stack */
     Sint fcalls;		/* 
@@ -1489,6 +1488,8 @@ extern int erts_system_profile_ts_type;
 #define SEQ_TRACE_SEND     (1 << 0)
 #define SEQ_TRACE_RECEIVE  (1 << 1)
 #define SEQ_TRACE_PRINT    (1 << 2)
+/* (This three-bit gap contains the timestamp.) */
+#define SEQ_TRACE_SPAWN    (1 << 6)
 
 #define ERTS_SEQ_TRACE_FLAGS_TS_TYPE_SHIFT 3
 
diff --git a/erts/emulator/beam/erl_trace.c b/erts/emulator/beam/erl_trace.c
index f6f177887c..5c46a10d64 100644
--- a/erts/emulator/beam/erl_trace.c
+++ b/erts/emulator/beam/erl_trace.c
@@ -830,7 +830,7 @@ trace_receive(Process* receiver,
 }
 
 int
-seq_trace_update_send(Process *p)
+seq_trace_update_serial(Process *p)
 {
     ErtsTracer seq_tracer = erts_get_system_seq_tracer();
     ASSERT((is_tuple(SEQ_TRACE_TOKEN(p)) || is_nil(SEQ_TRACE_TOKEN(p))));
@@ -898,6 +898,7 @@ seq_trace_output_generic(Eterm token, Eterm msg, Uint type,
 
     switch (type) {
     case SEQ_TRACE_SEND:    type_atom = am_send; break;
+    case SEQ_TRACE_SPAWN:   type_atom = am_spawn; break;
     case SEQ_TRACE_PRINT:   type_atom = am_print; break;
     case SEQ_TRACE_RECEIVE: type_atom = am_receive; break;
     default:
diff --git a/erts/emulator/beam/erl_trace.h b/erts/emulator/beam/erl_trace.h
index af38ef52db..f564549ab9 100644
--- a/erts/emulator/beam/erl_trace.h
+++ b/erts/emulator/beam/erl_trace.h
@@ -145,7 +145,7 @@ int erts_trace_flags(Eterm List,
 Eterm erts_bif_trace(int bif_index, Process* p, Eterm* args, BeamInstr *I);
 Eterm
 erts_bif_trace_epilogue(Process *p, Eterm result, int applying,
-			Export* ep, BeamInstr *cp, Uint32 flags,
+			Export* ep, Uint32 flags,
 			Uint32 flags_meta, BeamInstr* I,
 			ErtsTracer meta_tracer);
 
@@ -163,7 +163,9 @@ seq_trace_output_generic((token), (msg), (type), (receiver), NULL, (exitfrom))
 void seq_trace_output_generic(Eterm token, Eterm msg, Uint type, 
 			      Eterm receiver, Process *process, Eterm exitfrom);
 
-int seq_trace_update_send(Process *process);
+/* Bump the sequence number if tracing is enabled; must be used before sending
+ * send/spawn trace messages. */
+int seq_trace_update_serial(Process *process);
 
 Eterm erts_seq_trace(Process *process, 
 		     Eterm atom_type, Eterm atom_true_or_false, 
diff --git a/erts/emulator/beam/erl_utils.h b/erts/emulator/beam/erl_utils.h
index 430ac305c5..449243a9b7 100644
--- a/erts/emulator/beam/erl_utils.h
+++ b/erts/emulator/beam/erl_utils.h
@@ -70,6 +70,7 @@ int erts_fit_in_bits_uint(Uint);
 Sint erts_list_length(Eterm);
 int erts_is_builtin(Eterm, Eterm, int);
 Uint32 make_hash2(Eterm);
+Uint32 trapping_make_hash2(Eterm, Eterm*, struct process*);
 Uint32 make_hash(Eterm);
 Uint32 make_internal_hash(Eterm, Uint32 salt);
 
diff --git a/erts/emulator/beam/export.c b/erts/emulator/beam/export.c
index 946ffeffb8..b928f03b2f 100644
--- a/erts/emulator/beam/export.c
+++ b/erts/emulator/beam/export.c
@@ -196,6 +196,17 @@ init_export_table(void)
     }
 }
 
+static struct export_entry* init_template(struct export_templ* templ,
+					  Eterm m, Eterm f, unsigned a)
+{
+    templ->entry.ep = &templ->exp;
+    templ->entry.slot.index = -1;
+    templ->exp.info.mfa.module = m;
+    templ->exp.info.mfa.function = f;
+    templ->exp.info.mfa.arity = a;
+    return &templ->entry;
+}
+
 /*
  * Return a pointer to the export entry for the given function,
  * or NULL otherwise.  Notes:
@@ -214,41 +225,15 @@ erts_find_export_entry(Eterm m, Eterm f, unsigned int a,ErtsCodeIndex code_ix);
 Export*
 erts_find_export_entry(Eterm m, Eterm f, unsigned int a, ErtsCodeIndex code_ix)
 {
-    HashValue hval = EXPORT_HASH((BeamInstr) m, (BeamInstr) f, (BeamInstr) a);
-    int ix;
-    HashBucket* b;
-
-    ix = hval % export_tables[code_ix].htable.size;
-    b = export_tables[code_ix].htable.bucket[ix];
-
-    /*
-     * Note: We have inlined the code from hash.c for speed.
-     */
-	
-    while (b != (HashBucket*) 0) {
-	Export* ep = ((struct export_entry*) b)->ep;
-	if (ep->info.mfa.module == m &&
-            ep->info.mfa.function == f &&
-            ep->info.mfa.arity == a) {
-	    return ep;
-	}
-	b = b->next;
-    }
+    struct export_templ templ;
+    struct export_entry *ee =
+        hash_fetch(&export_tables[code_ix].htable,
+                   init_template(&templ, m, f, a),
+                   (H_FUN)export_hash, (HCMP_FUN)export_cmp);
+    if (ee) return ee->ep;
     return NULL;
 }
 
-static struct export_entry* init_template(struct export_templ* templ,
-					  Eterm m, Eterm f, unsigned a)
-{
-    templ->entry.ep = &templ->exp;
-    templ->entry.slot.index = -1;
-    templ->exp.info.mfa.module = m;
-    templ->exp.info.mfa.function = f;
-    templ->exp.info.mfa.arity = a;
-    return &templ->entry;
-}
-
-
 /*
  * Find the export entry for a loaded function.
  * Returns a NULL pointer if the given function is not loaded, or
diff --git a/erts/emulator/beam/external.c b/erts/emulator/beam/external.c
index ce61cdf040..5cea253ebe 100644
--- a/erts/emulator/beam/external.c
+++ b/erts/emulator/beam/external.c
@@ -51,18 +51,17 @@
 
 #define MAX_STRING_LEN 0xffff
 
-/* MAX value for the creation field in pid, port and reference
-   for the local node and for the current external format.
-
-   Larger creation values than this are allowed in external pid, port and refs
-   encoded with NEW_PID_EXT, NEW_PORT_EXT and NEWER_REFERENCE_EXT.
-   The point here is to prepare for future upgrade to 32-bit creation.
-   OTP-19 (erts-8.0) can handle big creation values from other (newer) nodes,
-   but do not use big creation values for the local node yet,
-   as we still may have to communicate with older nodes.
+/*
+ * MAX value for the creation field in pid, port and reference
+ * for the old PID_EXT, PORT_EXT, REFERENCE_EXT and NEW_REFERENCE_EXT.
+ * Older nodes (OTP 19-22) will send us these so we must be able to decode them.
+ *
+ * From OTP 23 DFLAG_BIG_CREATION is mandatory so this node will always
+ * encode with new big 32-bit creations using NEW_PID_EXT, NEW_PORT_EXT
+ * and NEWER_REFERENCE_EXT.
 */
-#define ERTS_MAX_LOCAL_CREATION (3)
-#define is_valid_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_LOCAL_CREATION)
+#define ERTS_MAX_TINY_CREATION (3)
+#define is_tiny_creation(Cre) ((unsigned)(Cre) <= ERTS_MAX_TINY_CREATION)
 
 #undef ERTS_DEBUG_USE_DIST_SEP
 #ifdef DEBUG
@@ -2469,7 +2468,8 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags)
     Eterm sysname = ((is_internal_pid(pid) && (dflags & DFLAG_INTERNAL_TAGS))
 		      ? INTERNAL_LOCAL_SYSNAME : pid_node_name(pid));
     Uint32 creation = pid_creation(pid);
-    byte* tagp = ep++;
+
+    *ep++ = NEW_PID_EXT;
 
     /* insert  atom here containing host and sysname  */
     ep = enc_atom(acmp, sysname, ep, dflags);
@@ -2481,15 +2481,8 @@ enc_pid(ErtsAtomCacheMap *acmp, Eterm pid, byte* ep, Uint32 dflags)
     ep += 4;
     put_int32(os, ep);
     ep += 4;
-    if (creation <= ERTS_MAX_LOCAL_CREATION) {
-        *tagp = PID_EXT;
-        *ep++ = creation;
-    } else {
-        ASSERT(is_external_pid(pid));
-        *tagp = NEW_PID_EXT;
-        put_int32(creation, ep);
-        ep += 4;
-    }
+    put_int32(creation, ep);
+    ep += 4;
     return ep;
 }
 
@@ -2609,7 +2602,7 @@ dec_pid(ErtsDistExternal *edep, ErtsHeapFactory* factory, byte* ep,
     if (tag == PID_EXT) {
         cre = get_int8(ep);
         ep += 1;
-        if (!is_valid_creation(cre)) {
+        if (!is_tiny_creation(cre)) {
             return NULL;
         }
     } else {
@@ -2870,25 +2863,18 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep,
 	    Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_ref(obj))
 			     ? INTERNAL_LOCAL_SYSNAME : ref_node_name(obj));
             Uint32 creation = ref_creation(obj);
-            byte* tagp = ep++;
 
 	    ASSERT(dflags & DFLAG_EXTENDED_REFERENCES);
 
 	    erts_magic_ref_save_bin(obj);
 
+            *ep++ = NEWER_REFERENCE_EXT;
 	    i = ref_no_numbers(obj);
 	    put_int16(i, ep);
 	    ep += 2;
 	    ep = enc_atom(acmp, sysname, ep, dflags);
-            if (creation <= ERTS_MAX_LOCAL_CREATION) {
-                *tagp = NEW_REFERENCE_EXT;
-                *ep++ = creation;
-            } else {
-                ASSERT(is_external_ref(obj));
-                *tagp = NEWER_REFERENCE_EXT;
-                put_int32(creation, ep);
-                ep += 4;
-            }
+            put_int32(creation, ep);
+            ep += 4;
 	    ref_num = ref_numbers(obj);
 	    for (j = 0; j < i; j++) {
 		put_int32(ref_num[j], ep);
@@ -2901,21 +2887,14 @@ enc_term_int(TTBEncodeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj, byte* ep,
 	    Eterm sysname = (((dflags & DFLAG_INTERNAL_TAGS) && is_internal_port(obj))
 			     ? INTERNAL_LOCAL_SYSNAME : port_node_name(obj));
             Uint32 creation = port_creation(obj);
-            byte* tagp = ep++;
 
+            *ep++ = NEW_PORT_EXT;
 	    ep = enc_atom(acmp, sysname, ep, dflags);
 	    j = port_number(obj);
 	    put_int32(j, ep);
 	    ep += 4;
-            if (creation <= ERTS_MAX_LOCAL_CREATION) {
-                *tagp = PORT_EXT;
-                *ep++ = creation;
-            } else {
-                ASSERT(is_external_port(obj));
-                *tagp = NEW_PORT_EXT;
-                put_int32(creation, ep);
-                ep += 4;
-            }
+            put_int32(creation, ep);
+            ep += 4;
 	    break;
 	}
 	case LIST_DEF:
@@ -3610,7 +3589,7 @@ dec_term_atom_common:
                 if (tag == PORT_EXT) {
                     cre = get_int8(ep);
                     ep++;
-                    if (!is_valid_creation(cre)) {
+                    if (!is_tiny_creation(cre)) {
                         goto error;
                     }
                 }
@@ -3657,7 +3636,7 @@ dec_term_atom_common:
 
 		cre = get_int8(ep);
 		ep += 1;
-		if (!is_valid_creation(cre)) {
+		if (!is_tiny_creation(cre)) {
 		    goto error;
 		}
 		goto ref_ext_common;
@@ -3671,7 +3650,7 @@ dec_term_atom_common:
 
 		cre = get_int8(ep);
 		ep += 1;
-		if (!is_valid_creation(cre)) {
+		if (!is_tiny_creation(cre)) {
 		    goto error;
 		}
 		r0 = get_int32(ep);
@@ -4066,73 +4045,6 @@ dec_term_atom_common:
 		next = &(funp->creator);
 		break;
 	    }
-	case FUN_EXT:
-	    {
-		ErlFunThing* funp = (ErlFunThing *) hp;
-		Eterm module;
-		Sint old_uniq;
-		Sint old_index;
-		unsigned num_free;
-		int i;
-		Eterm temp;
-
-		num_free = get_int32(ep);
-		ep += 4;
-		hp += ERL_FUN_SIZE;
-		hp += num_free;
-		factory->hp = hp;
-		funp->thing_word = HEADER_FUN;
-		funp->num_free = num_free;
-		*objp = make_fun(funp);
-
-		/* Creator pid */
-		if ((*ep != PID_EXT && *ep != NEW_PID_EXT)
-		    || (ep = dec_pid(edep, factory, ep+1,
-				     &funp->creator, *ep))==NULL) {
-		    goto error;
-		}
-
-		/* Module */
-		if ((ep = dec_atom(edep, ep, &module)) == NULL) {
-		    goto error;
-		}
-
-		/* Index */
-		if ((ep = dec_term(edep, factory, ep, &temp, NULL)) == NULL) {
-		    goto error;
-		}
-		if (!is_small(temp)) {
-		    goto error;
-		}
-		old_index = unsigned_val(temp);
-
-		/* Uniq */
-		if ((ep = dec_term(edep, factory, ep, &temp, NULL)) == NULL) {
-		    goto error;
-		}
-		if (!is_small(temp)) {
-		    goto error;
-		}
-		
-		/*
-		 * It is safe to link the fun into the fun list only when
-		 * no more validity tests can fail.
-		 */
-		funp->next = factory->off_heap->first;
-		factory->off_heap->first = (struct erl_off_heap_header*)funp;
-		old_uniq = unsigned_val(temp);
-
-		funp->fe = erts_put_fun_entry(module, old_uniq, old_index);
-		funp->arity = funp->fe->address[-1] - num_free;
-		hp = factory->hp;
-
-		/* Environment */
-		for (i = num_free-1; i >= 0; i--) {
-		    funp->env[i] = (Eterm) next;
-		    next = funp->env + i;
-		}
-		break;
-	    }
 	case ATOM_INTERNAL_REF2:
 	    n = get_int16(ep);
 	    ep += 2;
@@ -4401,30 +4313,21 @@ encode_size_struct_int(TTBSizeContext* ctx, ErtsAtomCacheMap *acmp, Eterm obj,
 		result += 1 + 4 + 1 + i;  /* tag,size,sign,digits */
 	    break;
         case EXTERNAL_PID_DEF:
-            if (external_pid_creation(obj) > ERTS_MAX_LOCAL_CREATION)
-                result += 3;
-            /*fall through*/
 	case PID_DEF:
 	    result += (1 + encode_size_struct2(acmp, pid_node_name(obj), dflags) +
-		       4 + 4 + 1);
+		       4 + 4 + 4);
 	    break;
         case EXTERNAL_REF_DEF:
-            if (external_ref_creation(obj) > ERTS_MAX_LOCAL_CREATION)
-                result += 3;
-            /*fall through*/
 	case REF_DEF:
 	    ASSERT(dflags & DFLAG_EXTENDED_REFERENCES);
 	    i = ref_no_numbers(obj);
 	    result += (1 + 2 + encode_size_struct2(acmp, ref_node_name(obj), dflags) +
-		       1 + 4*i);
+		       4 + 4*i);
 	    break;
         case EXTERNAL_PORT_DEF:
-            if (external_port_creation(obj) > ERTS_MAX_LOCAL_CREATION)
-                result += 3;
-            /*fall through*/
         case PORT_DEF:
 	    result += (1 + encode_size_struct2(acmp, port_node_name(obj), dflags) +
-		      4 + 1);
+		      4 + 4);
 	    break;
 	case LIST_DEF: {
 	    int is_str = is_external_string(obj, &m);
@@ -4891,9 +4794,6 @@ init_done:
 		total_size = get_int32(ep);
 		CHKSIZE(total_size);		
 		ep += 1+16+4+4;
-		/*FALLTHROUGH*/
-
-	    case FUN_EXT:
 		CHKSIZE(4);
 		num_free = get_int32(ep);
 		ep += 4;
@@ -4904,6 +4804,12 @@ init_done:
 		heap_size += ERL_FUN_SIZE + num_free;
 		break;
 	    }
+	case FUN_EXT:
+            /*
+             * OTP 23: No longer support decoding the old fun
+             * representation.
+             */
+            goto error;
 	case ATOM_INTERNAL_REF2:
 	    SKIP(2+atom_extra_skip);
 	    atom_extra_skip = 0;
diff --git a/erts/emulator/beam/hash.c b/erts/emulator/beam/hash.c
index 8954dbb06c..177b7cc3d1 100644
--- a/erts/emulator/beam/hash.c
+++ b/erts/emulator/beam/hash.c
@@ -30,37 +30,19 @@
 #include "hash.h"
 
 /*
-** List of sizes (all are primes)
-*/
-static const int h_size_table[] = {
-    2, 5, 11, 23, 47, 97, 197, 397, 797,  /* double upto here */
-    1201,   1597,
-    2411,   3203,
-    4813,   6421,
-    9643,   12853,
-    19289,  25717,
-    51437,
-    102877,
-    205759,
-    411527,
-    823117,
-    1646237,
-    3292489,
-    6584983,
-    13169977,
-    26339969,
-    52679969,
-    -1
-};
-
-/*
 ** Get info about hash
 **
 */
 
+#define MAX_SHIFT (ERTS_SIZEOF_TERM * 8)
+
+static int hash_get_slots(Hash *h) {
+    return UWORD_CONSTANT(1) << (MAX_SHIFT - h->shift);
+}
+
 void hash_get_info(HashInfo *hi, Hash *h)
 {
-    int size = h->size;
+    int size = hash_get_slots(h);
     int i;
     int max_depth = 0;
     int objects = 0;
@@ -84,7 +66,7 @@ void hash_get_info(HashInfo *hi, Hash *h)
     ASSERT(objects == h->nobjs);
 
     hi->name  = h->name;
-    hi->size  = h->size;
+    hi->size  = hash_get_slots(h);
     hi->used  = used;
     hi->objs  = h->nobjs;
     hi->depth = max_depth;
@@ -118,15 +100,15 @@ hash_table_sz(Hash *h)
   int i;
   for(i=0;h->name[i];i++);
   i++;
-  return sizeof(Hash) + h->size*sizeof(HashBucket*) + i;
+  return sizeof(Hash) + hash_get_slots(h)*sizeof(HashBucket*) + i;
 }
 
 
 static ERTS_INLINE void set_thresholds(Hash* h)
 {
-    h->grow_threshold = (8*h->size)/5;   /* grow at 160% load */
-    if (h->size_ix > h->min_size_ix)
-        h->shrink_threshold = h->size / 5;  /* shrink at 20% load */
+    h->grow_threshold = (8*hash_get_slots(h))/5;   /* grow at 160% load */
+    if (h->shift < h->max_shift)
+        h->shrink_threshold = hash_get_slots(h) / 5;  /* shrink at 20% load */
     else
         h->shrink_threshold = -1;  /* never shrink below inital size */
 }
@@ -138,29 +120,27 @@ static ERTS_INLINE void set_thresholds(Hash* h)
 Hash* hash_init(int type, Hash* h, char* name, int size, HashFunctions fun)
 {
     int sz;
-    int ix = 0;
+    int shift = 1;
 
     h->meta_alloc_type = type;
 
-    while (h_size_table[ix] != -1 && h_size_table[ix] < size)
-	ix++;
-    if (h_size_table[ix] == -1)
-	return NULL;
-
-    size = h_size_table[ix];
-    sz = size*sizeof(HashBucket*);
+    while ((UWORD_CONSTANT(1) << shift) < size)
+        shift++;
 
-    h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
-
-    memzero(h->bucket, sz);
     h->is_allocated = 0;
     h->name = name;
     h->fun = fun;
-    h->size = size;
-    h->size_ix = ix;
-    h->min_size_ix = ix;
+    h->shift = MAX_SHIFT - shift;
+    h->max_shift = h->shift;
     h->nobjs = 0;
     set_thresholds(h);
+
+    sz = hash_get_slots(h) * sizeof(HashBucket*);
+    h->bucket = (HashBucket**) fun.meta_alloc(h->meta_alloc_type, sz);
+    memzero(h->bucket, sz);
+
+    ASSERT(h->shift > 0 && h->shift < 64);
+
     return h;
 }
 
@@ -183,7 +163,7 @@ Hash* hash_new(int type, char* name, int size, HashFunctions fun)
 */
 void hash_delete(Hash* h)
 {
-    int old_size = h->size;
+    int old_size = hash_get_slots(h);
     int i;
 
     for (i = 0; i < old_size; i++) {
@@ -206,22 +186,20 @@ void hash_delete(Hash* h)
 static void rehash(Hash* h, int grow)
 {
     int sz;
-    int old_size = h->size;
+    int old_size = hash_get_slots(h);
     HashBucket** new_bucket;
     int i;
 
     if (grow) {
-	if ((h_size_table[h->size_ix+1]) == -1)
-	    return;
-	h->size_ix++;
+	h->shift--;
     }
     else {
-	if (h->size_ix == 0)
+	if (h->shift == h->max_shift)
 	    return;
-	h->size_ix--;
+	h->shift++;
     }
-    h->size = h_size_table[h->size_ix];
-    sz = h->size*sizeof(HashBucket*);
+
+    sz = hash_get_slots(h)*sizeof(HashBucket*);
 
     new_bucket = (HashBucket **) h->fun.meta_alloc(h->meta_alloc_type, sz);
     memzero(new_bucket, sz);
@@ -230,7 +208,7 @@ static void rehash(Hash* h, int grow)
 	HashBucket* b = h->bucket[i];
 	while (b != (HashBucket*) 0) {
 	    HashBucket* b_next = b->next;
-	    int ix = b->hvalue % h->size;
+	    Uint ix = hash_get_slot(h, b->hvalue);
 	    b->next = new_bucket[ix];
 	    new_bucket[ix] = b;
 	    b = b_next;
@@ -247,16 +225,7 @@ static void rehash(Hash* h, int grow)
 */
 void* hash_get(Hash* h, void* tmpl)
 {
-    HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
-    HashBucket* b = h->bucket[ix];
-
-    while(b != (HashBucket*) 0) {
-	if ((b->hvalue == hval) && (h->fun.cmp(tmpl, (void*)b) == 0))
-	    return (void*) b;
-	b = b->next;
-    }
-    return (void*) 0;
+    return hash_fetch(h, tmpl, h->fun.hash, h->fun.cmp);
 }
 
 /*
@@ -265,7 +234,7 @@ void* hash_get(Hash* h, void* tmpl)
 void* hash_put(Hash* h, void* tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket* b = h->bucket[ix];
 
     while(b != (HashBucket*) 0) {
@@ -291,7 +260,7 @@ void* hash_put(Hash* h, void* tmpl)
 void* hash_erase(Hash* h, void* tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket* b = h->bucket[ix];
     HashBucket* prev = 0;
 
@@ -323,7 +292,7 @@ void *
 hash_remove(Hash *h, void *tmpl)
 {
     HashValue hval = h->fun.hash(tmpl);
-    int ix = hval % h->size;
+    Uint ix = hash_get_slot(h, hval);
     HashBucket *b = h->bucket[ix];
     HashBucket *prev = NULL;
 
@@ -343,11 +312,11 @@ hash_remove(Hash *h, void *tmpl)
     return NULL;
 }
 
-void hash_foreach(Hash* h, void (*func)(void *, void *), void *func_arg2)
+void hash_foreach(Hash* h, HFOREACH_FUN func, void *func_arg2)
 {
     int i;
 
-    for (i = 0; i < h->size; i++) {
+    for (i = 0; i < hash_get_slots(h); i++) {
 	HashBucket* b = h->bucket[i];
 	while(b != (HashBucket*) 0) {
 	    (*func)((void *) b, func_arg2);
diff --git a/erts/emulator/beam/hash.h b/erts/emulator/beam/hash.h
index d319aaca83..4e8eb6594b 100644
--- a/erts/emulator/beam/hash.h
+++ b/erts/emulator/beam/hash.h
@@ -18,16 +18,16 @@
  * %CopyrightEnd%
  */
 
-/*
-** General hash functions
-**
-*/
+/**
+ * General hash functions
+ *
+ **/
 #ifndef __HASH_H__
 #define __HASH_H__
 
 #include "sys.h"
 
-typedef unsigned long HashValue;
+typedef UWord HashValue;
 typedef struct hash Hash;
 
 typedef int (*HCMP_FUN)(void*, void*);
@@ -38,6 +38,7 @@ typedef void (*HFREE_FUN)(void*);
 typedef void* (*HMALLOC_FUN)(int,size_t);
 typedef void (*HMFREE_FUN)(int,void*);
 typedef int (*HMPRINT_FUN)(fmtfn_t,void*,char*, ...);
+typedef void (*HFOREACH_FUN)(void *, void *);
 
 /*
 ** This bucket must be placed in top of 
@@ -75,11 +76,10 @@ struct hash
     int is_allocated;    /* 0 iff hash structure is on stack or is static */
     int meta_alloc_type; /* argument to pass to meta_alloc and meta_free */
     char* name;          /* Table name (static string, for debugging) */
-    int size;		 /* Number of slots */
+    int shift;		 /* How much to shift the hash value */
+    int max_shift;       /* Never shift more than this value */
     int shrink_threshold;
     int grow_threshold;
-    int size_ix;         /* Size index in size table */
-    int min_size_ix;     /* Never shrink table smaller than this */
     int nobjs;		 /* Number of objects in table */
     HashBucket** bucket; /* Vector of bucket pointers (objects) */
 };
@@ -96,6 +96,54 @@ void* hash_get(Hash*, void*);
 void* hash_put(Hash*, void*);
 void* hash_erase(Hash*, void*);
 void* hash_remove(Hash*, void*);
-void  hash_foreach(Hash*, void (*func)(void *, void *), void *);
+void  hash_foreach(Hash*, HFOREACH_FUN, void *);
+
+ERTS_GLB_INLINE Uint hash_get_slot(Hash *h, HashValue hv);
+ERTS_GLB_INLINE void* hash_fetch(Hash *, void*, H_FUN, HCMP_FUN);
+
+#if ERTS_GLB_INLINE_INCL_FUNC_DEF
+
+ERTS_GLB_INLINE Uint
+hash_get_slot(Hash *h, HashValue hv)
+{
+    /* This slot mapping function uses fibonacci hashing in order to
+     * protect itself against a very bad hash function. This is not
+     * a hash function, so the user of hash.h should still spend time
+     * to figure out a good hash function for its data.
+     *
+     * See https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+     * for some thoughts and ideas about fibonacci hashing.
+     */
+
+    /* This is not strictly part of the fibonacci hashing algorithm
+     * but it does help to spread the values of the mapping function better.
+     */
+    hv ^= hv >> h->shift;
+#ifdef ARCH_64
+    /* 2^64 / 1.61803398875 = 11400714819323198485.... */
+    return (UWORD_CONSTANT(11400714819323198485) * hv) >> h->shift;
+#else
+    /* 2^32 / 1.61803398875 = 2654435769.... */
+    return (UWORD_CONSTANT(2654435769) * hv) >> h->shift;
+#endif
+}
+
+ERTS_GLB_INLINE void* hash_fetch(Hash *h, void* tmpl, H_FUN hash, HCMP_FUN cmp)
+{
+    HashValue hval = hash(tmpl);
+    Uint ix = hash_get_slot(h, hval);
+    HashBucket* b = h->bucket[ix];
+    ASSERT(h->fun.hash == hash);
+    ASSERT(h->fun.cmp == cmp);
+
+    while(b != (HashBucket*) 0) {
+	if ((b->hvalue == hval) && (cmp(tmpl, (void*)b) == 0))
+	    return (void*) b;
+	b = b->next;
+    }
+    return (void*) 0;
+}
+
+#endif /* ERTS_GLB_INLINE_INCL_FUNC_DEF */
 
 #endif
diff --git a/erts/emulator/beam/index.c b/erts/emulator/beam/index.c
index be1771b037..09d3c24424 100644
--- a/erts/emulator/beam/index.c
+++ b/erts/emulator/beam/index.c
@@ -114,35 +114,26 @@ int index_get(IndexTable* t, void* tmpl)
     return -1;
 }
 
-void erts_index_merge(Hash* src, IndexTable* dst)
+static void index_merge_foreach(IndexSlot *p, IndexTable *dst)
 {
-    int limit = src->size;
-    HashBucket** bucket = src->bucket;
-    int i;
-
-    for (i = 0; i < limit; i++) {
-	HashBucket* b = bucket[i];
-	IndexSlot* p;
-	int ix;
-
-	while (b) {
-	    Uint sz;
-	    ix = dst->entries++;
-	    if (ix >= dst->size) {
-		if (ix >= dst->limit) {
-		    erts_exit(ERTS_ERROR_EXIT, "no more index entries in %s (max=%d)\n",
-			     dst->htable.name, dst->limit);
-		}
-		sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*);
-		dst->seg_table[ix>>INDEX_PAGE_SHIFT] = erts_alloc(dst->type, sz);
-		dst->size += INDEX_PAGE_SIZE;
-	    }
-	    p = (IndexSlot*) b;
-	    p->index = ix;
-	    dst->seg_table[ix>>INDEX_PAGE_SHIFT][ix&INDEX_PAGE_MASK] = p;
-	    b = b->next;
-	}
+    Uint sz;
+    int ix = dst->entries++;
+    if (ix >= dst->size) {
+        if (ix >= dst->limit) {
+            erts_exit(ERTS_ERROR_EXIT, "no more index entries in %s (max=%d)\n",
+                      dst->htable.name, dst->limit);
+        }
+        sz = INDEX_PAGE_SIZE*sizeof(IndexSlot*);
+        dst->seg_table[ix>>INDEX_PAGE_SHIFT] = erts_alloc(dst->type, sz);
+        dst->size += INDEX_PAGE_SIZE;
     }
+    p->index = ix;
+    dst->seg_table[ix>>INDEX_PAGE_SHIFT][ix&INDEX_PAGE_MASK] = p;
+}
+
+void erts_index_merge(Hash* src, IndexTable* dst)
+{
+    hash_foreach(src, (HFOREACH_FUN)index_merge_foreach, dst);
 }
 
 void index_erase_latest_from(IndexTable* t, Uint from_ix)
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index 7cffe7fb5c..38b1e5909b 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -19,7 +19,12 @@
 // %CopyrightEnd%
 //
 
-// Stack manipulation instructions
+//
+// Stack manipulation instructions follow.
+//
+// See the comment for AH() in macros.tab for information about
+// the layout of stack frames.
+//
 
 allocate(NeedStack, Live) {
     $AH($NeedStack, 0, $Live);
@@ -58,22 +63,81 @@ allocate_heap_zero(NeedStack, NeedHeap, Live) {
 
 deallocate(Deallocate) {
     //| -no_prefetch
-    SET_CP(c_p, (BeamInstr *) cp_val(*E));
     E = ADD_BYTE_OFFSET(E, $Deallocate);
 }
 
-deallocate_return(Deallocate) {
+//
+// Micro-benchmarks showed that the deallocate_return instruction
+// became slower when the continuation pointer was moved from
+// the process struct to the stack. The reason seems to be read
+// dependencies, i.e. that the CPU cannot figure out beforehand
+// from which position on the stack the continuation pointer
+// should be fetched.
+//
+// Initializing num_bytes with a constant value seems to restore
+// the lost speed, so we've specialized the instruction for the
+// most common values.
+//
+
+deallocate_return0 := dealloc_ret.n0.execute;
+deallocate_return1 := dealloc_ret.n1.execute;
+deallocate_return2 := dealloc_ret.n2.execute;
+deallocate_return3 := dealloc_ret.n3.execute;
+deallocate_return4 := dealloc_ret.n4.execute;
+deallocate_return := dealloc_ret.var.execute;
+
+dealloc_ret.head() {
+    Uint num_bytes;
+}
+
+dealloc_ret.n0() {
+    num_bytes = (0+1) * sizeof(Eterm);
+}
+
+dealloc_ret.n1() {
+    num_bytes = (1+1) * sizeof(Eterm);
+}
+
+dealloc_ret.n2() {
+    num_bytes = (2+1) * sizeof(Eterm);
+}
+
+dealloc_ret.n3() {
+    num_bytes = (3+1) * sizeof(Eterm);
+}
+
+dealloc_ret.n4() {
+    num_bytes = (4+1) * sizeof(Eterm);
+}
+
+dealloc_ret.var(Deallocate) {
+    num_bytes = $Deallocate;
+}
+
+dealloc_ret.execute() {
     //| -no_next
-    int words_to_pop = $Deallocate;
-    SET_I((BeamInstr *) cp_val(*E));
-    E = ADD_BYTE_OFFSET(E, words_to_pop);
+
+    E = ADD_BYTE_OFFSET(E, num_bytes);
+    $RETURN();
     CHECK_TERM(x(0));
     DispatchReturn;
 }
 
 move_deallocate_return(Src, Deallocate) {
-    x(0) = $Src;
-    $deallocate_return($Deallocate);
+    //| -no_next
+
+    /*
+     * Explicitly do reads first to mitigate the impact of read
+     * dependencies.
+     */
+
+    Uint bytes_to_pop = $Deallocate;
+    Eterm src = $Src;
+    E = ADD_BYTE_OFFSET(E, bytes_to_pop);
+    x(0) = src;
+    $RETURN();
+    CHECK_TERM(x(0));
+    DispatchReturn;
 }
 
 // Call instructions
@@ -93,14 +157,16 @@ DISPATCH_ABS(CallDest) {
 }
 
 i_call(CallDest) {
-    SET_CP(c_p, $NEXT_INSTRUCTION);
+    $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
     $DISPATCH_REL($CallDest);
 }
 
 move_call(Src, CallDest) {
-    x(0) = $Src;
-    SET_CP(c_p, $NEXT_INSTRUCTION);
-    $DISPATCH_REL($CallDest);
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
+    x(0) = src;
+    $DISPATCH_REL(call_dest);
 }
 
 i_call_last(CallDest, Deallocate) {
@@ -109,8 +175,11 @@ i_call_last(CallDest, Deallocate) {
 }
 
 move_call_last(Src, CallDest, Deallocate) {
-    x(0) = $Src;
-    $i_call_last($CallDest, $Deallocate);
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    $deallocate($Deallocate);
+    x(0) = src;
+    $DISPATCH_REL(call_dest);
 }
 
 i_call_only(CallDest) {
@@ -118,8 +187,10 @@ i_call_only(CallDest) {
 }
 
 move_call_only(Src, CallDest) {
-    x(0) = $Src;
-    $i_call_only($CallDest);
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    x(0) = src;
+    $DISPATCH_REL(call_dest);
 }
 
 DISPATCHX(Dest) {
@@ -131,22 +202,27 @@ DISPATCHX(Dest) {
 }
 
 i_call_ext(Dest) {
-    SET_CP(c_p, $NEXT_INSTRUCTION);
+    $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
     $DISPATCHX($Dest);
 }
 
-i_move_call_ext(Src, Dest) {
-    x(0) = $Src;
-    $i_call_ext($Dest);
+i_move_call_ext(Src, CallDest) {
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
+    x(0) = src;
+    $DISPATCHX(call_dest);
 }
 
 i_call_ext_only(Dest) {
     $DISPATCHX($Dest);
 }
 
-i_move_call_ext_only(Dest, Src) {
-    x(0) = $Src;
-    $i_call_ext_only($Dest);
+i_move_call_ext_only(CallDest, Src) {
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    x(0) = src;
+    $DISPATCHX(call_dest);
 }
 
 i_call_ext_last(Dest, Deallocate) {
@@ -154,9 +230,12 @@ i_call_ext_last(Dest, Deallocate) {
     $DISPATCHX($Dest);
 }
 
-i_move_call_ext_last(Dest, StackOffset, Src) {
-    x(0) = $Src;
-    $i_call_ext_last($Dest, $StackOffset);
+i_move_call_ext_last(CallDest, Deallocate, Src) {
+    Eterm call_dest = $CallDest;
+    Eterm src = $Src;
+    $deallocate($Deallocate);
+    x(0) = src;
+    $DISPATCHX(call_dest);
 }
 
 APPLY(I, Deallocate, Next) {
@@ -175,7 +254,7 @@ i_apply() {
     BeamInstr *next;
     $APPLY(NULL, 0, next);
     if (ERTS_LIKELY(next != NULL)) {
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         $DISPATCH_ABS(next);
     }
     $HANDLE_APPLY_ERROR();
@@ -211,7 +290,7 @@ apply(Arity) {
     BeamInstr *next;
     $FIXED_APPLY($Arity, NULL, 0, next);
     if (ERTS_LIKELY(next != NULL)) {
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         $DISPATCH_ABS(next);
     }
     $HANDLE_APPLY_ERROR();
@@ -247,7 +326,7 @@ i_apply_fun() {
     BeamInstr *next;
     $APPLY_FUN(next);
     if (ERTS_LIKELY(next != NULL)) {
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         $DISPATCH_FUN(next);
     }
     $HANDLE_APPLY_FUN_ERROR();
@@ -283,7 +362,7 @@ i_call_fun(Fun) {
     BeamInstr *next;
     $CALL_FUN($Fun, next);
     if (ERTS_LIKELY(next != NULL)) {
-        SET_CP(c_p, $NEXT_INSTRUCTION);
+        $SAVE_CONTINUATION_POINTER($NEXT_INSTRUCTION);
         $DISPATCH_FUN(next);
     }
     $HANDLE_APPLY_FUN_ERROR();
@@ -301,15 +380,8 @@ i_call_fun_last(Fun, Deallocate) {
 
 return() {
     //| -no_next
-    SET_I(c_p->cp);
+    $RETURN();
     DTRACE_RETURN_FROM_PC(c_p);
-
-    /*
-     * We must clear the CP to make sure that a stale value do not
-     * create a false module dependcy preventing code upgrading.
-     * It also means that we can use the CP in stack backtraces.
-     */
-    c_p->cp = 0;
     CHECK_TERM(r(0));
     HEAP_SPACE_VERIFIED(0);
     DispatchReturn;
@@ -478,16 +550,21 @@ i_make_fun(FunP, NumFree) {
 }
 
 move_trim(Src, Dst, Words) {
-    Uint cp = E[0];
     $Dst = $Src;
-    E += $Words;
-    E[0] = cp;
+    $i_trim($Words);
 }
 
 i_trim(Words) {
-    Uint cp = E[0];
     E += $Words;
-    E[0] = cp;
+
+    /*
+     * Clear the reserved location for the continuation pointer at
+     * E[0]. This is not strictly necessary for correctness, but if a
+     * GC is triggered before E[0] is overwritten by another
+     * continuation pointer the now dead term at E[0] would be
+     * retained by the GC.
+     */
+    E[0] = NIL;
 }
 
 move(Src, Dst) {
@@ -599,8 +676,7 @@ move_window5(S1, S2, S3, S4, S5, D) {
 move_return(Src) {
     //| -no_next
     x(0) = $Src;
-    SET_I(c_p->cp);
-    c_p->cp = 0;
+    $RETURN();
     DispatchReturn;
 }
 
@@ -683,10 +759,11 @@ swap(R1, R2) {
     $R2 = V;
 }
 
-swap_temp(R1, R2, Tmp) {
-    Eterm V = $R1;
-    $R1 = $R2;
-    $R2 = $Tmp = V;
+swap2(R1, R2, R3) {
+    Eterm V = $R2;
+    $R2 = $R1;
+    $R1 = $R3;
+    $R3 = V;
 }
 
 test_heap(Nh, Live) {
diff --git a/erts/emulator/beam/macros.tab b/erts/emulator/beam/macros.tab
index 1b5e5f66b0..9d183e1f41 100644
--- a/erts/emulator/beam/macros.tab
+++ b/erts/emulator/beam/macros.tab
@@ -104,14 +104,52 @@ GC_TEST_PRESERVE(NeedHeap, Live, PreserveTerm) {
 
 
 // Make sure that there are NeedStack + NeedHeap + 1 words available
-// on the combined heap/stack segment, then allocates NeedHeap + 1
-// words on the stack and saves CP.
+// on the combined heap/stack segment, then decrement the stack
+// pointer by (NeedStack + 1) words. Finally clear the word reserved
+// for the continuation pointer at the top of the stack.
+//
+// Stack frame layout:
+//
+//       +-----------+
+// y(N)  | Term      |
+//       +-----------+
+//            .
+//            .
+//            .
+//       +-----------+
+// y(0)  | Term      |
+//       +-----------+
+// E ==> | NIL or CP |
+//       +-----------+
+//
+// When the function owning the stack frame is the currently executing
+// function, the word at the top of the stack is NIL. When calling
+// another function, the continuation pointer will be stored in the
+// word at the top of the stack. When returning to the function
+// owning the stack frame, the word at the stack top will again be set
+// to NIL.
+
 AH(NeedStack, NeedHeap, Live) {
     unsigned needed = $NeedStack + 1;
     $GC_TEST(needed, $NeedHeap, $Live);
     E -= needed;
-    *E = make_cp(c_p->cp);
-    c_p->cp = 0;
+    *E = NIL;
+}
+
+// Save the continuation pointer in the reserved slot at the
+// top of the stack as preparation for doing a function call.
+
+SAVE_CONTINUATION_POINTER(IP) {
+    ASSERT(VALID_INSTR(*($IP)));
+    *E = (BeamInstr) ($IP);
+}
+
+// Return to the function whose continuation pointer is stored
+// at the top of the stack and set that word to NIL.
+
+RETURN() {
+    SET_I(cp_val(*E));
+    *E = NIL;
 }
 
 NEXT0() {
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index b9d4f6afcc..c0ca9260a0 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -324,76 +324,15 @@ move_src_window2 y x x
 move_src_window3 y x x x
 move_src_window4 y x x x x
 
-# Swap registers.
-move R1=xy Tmp=x | move R2=xy R1 | move Tmp R2 => swap_temp R1 R2 Tmp
-
-# The compiler uses x(1022) when swapping registers. It will definitely
-# not be used again.
-swap_temp R1 R2 Tmp=x==1022 => swap R1 R2
-
-swap_temp R1 R2 Tmp | move Src Tmp => swap R1 R2 | move Src Tmp
-
-swap_temp R1 R2 Tmp | line Loc | apply Live | is_killed_apply(Tmp, Live) => \
-  swap R1 R2 | line Loc | apply Live
-swap_temp R1 R2 Tmp | line Loc | apply_last Live D | is_killed_apply(Tmp, Live) => \
-  swap R1 R2 | line Loc | apply_last Live D
-
-swap_temp R1 R2 Tmp | line Loc | call_fun Live | is_killed_by_call_fun(Tmp, Live) => \
-  swap R1 R2 | line Loc | call_fun Live
-swap_temp R1 R2 Tmp | make_fun2 OldIndex=u | is_killed_by_make_fun(Tmp, OldIndex) => \
-  swap R1 R2 | make_fun2 OldIndex
-
-swap_temp R1 R2 Tmp | line Loc | call Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | line Loc | call Live Addr
-swap_temp R1 R2 Tmp | call_only Live Addr | \
-  is_killed(Tmp, Live) => swap R1 R2 | call_only Live Addr
-swap_temp R1 R2 Tmp | call_last Live Addr D | \
-  is_killed(Tmp, Live) => swap R1 R2 | call_last Live Addr D
-
-swap_temp R1 R2 Tmp | line Loc | call_ext Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | line Loc | call_ext Live Addr
-swap_temp R1 R2 Tmp | line Loc | call_ext_only Live Addr | \
-  is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | line Loc | call_ext_last Live Addr D | \
-  is_killed(Tmp, Live) => swap R1 R2 | line Loc | call_ext_last Live Addr D
-
-swap_temp R1 R2 Tmp | call_ext Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext Live Addr
-swap_temp R1 R2 Tmp | call_ext_only Live Addr | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | call_ext_last Live Addr D | is_killed(Tmp, Live) => \
-  swap R1 R2 | call_ext_last Live Addr D
-
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-     swap R1 R2 | move Src Any | line Loc | call Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-     swap R1 R2 | move Src Any | line Loc | call_ext Live Addr
-swap_temp R1 R2 Tmp | move Src Any | call_only Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | call_only Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_ext_only Live Addr | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | line Loc | call_ext_only Live Addr
-swap_temp R1 R2 Tmp | move Src Any | line Loc | call_fun Live | \
-  is_killed(Tmp, Live) | distinct(Tmp, Src) => \
-    swap R1 R2 | move Src Any | line Loc | call_fun Live
-
-swap_temp R1 R2 Tmp | line Loc | send | is_killed_by_send(Tmp) => \
-  swap R1 R2 | line Loc | send
-
-# swap_temp/3 with Y register operands are rare.
-swap_temp R1 R2=y Tmp => swap R1 R2 | move R2 Tmp
-swap_temp R1=y R2 Tmp => swap R1 R2 | move R2 Tmp
-
 swap R1=x R2=y => swap R2 R1
 
-swap_temp x x x
-
 swap xy x
 swap y y
 
+swap R1=x R2=x | swap R3=x R1 => swap2 R1 R2 R3
+
+swap2 x x x
+
 # move_shift
 
 move SD=x    D=x | move Src=cxy SD=x  | distinct(D, Src) => move_shift Src SD D
@@ -657,8 +596,20 @@ move S x==0 | deallocate D | return => move_deallocate_return S D
 
 move_deallocate_return xycn Q
 
+deallocate u==0 | return => deallocate_return0
+deallocate u==1 | return => deallocate_return1
+deallocate u==2 | return => deallocate_return2
+deallocate u==3 | return => deallocate_return3
+deallocate u==4 | return => deallocate_return4
+
 deallocate D | return => deallocate_return D
 
+deallocate_return0
+deallocate_return1
+deallocate_return2
+deallocate_return3
+deallocate_return4
+
 deallocate_return Q
 
 test_heap Need u==1 | put_list Y=y x==0 x==0 => test_heap_1_put_list Need Y
diff --git a/erts/emulator/beam/register.c b/erts/emulator/beam/register.c
index c7e02c6d48..8e44b527a2 100644
--- a/erts/emulator/beam/register.c
+++ b/erts/emulator/beam/register.c
@@ -265,10 +265,8 @@ Eterm
 erts_whereis_name_to_id(Process *c_p, Eterm name)
 {
     Eterm res = am_undefined;
-    HashValue hval;
-    int ix;
-    HashBucket* b;
     ErtsProcLocks c_p_locks = 0;
+    RegProc *rp, tmpl;
     if (c_p) {
         c_p_locks = ERTS_PROC_LOCK_MAIN;
         ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(c_p);
@@ -278,29 +276,14 @@ erts_whereis_name_to_id(Process *c_p, Eterm name)
     if (c_p && !c_p_locks)
         erts_proc_lock(c_p, ERTS_PROC_LOCK_MAIN);
 
-    hval = REG_HASH(name);
-    ix = hval % process_reg.size;
-    b = process_reg.bucket[ix];
+    tmpl.name = name;
+    rp = hash_fetch(&process_reg, &tmpl, (H_FUN)reg_hash, (HCMP_FUN)reg_cmp);
 
-    /*
-     * Note: We have inlined the code from hash.c for speed.
-     */
-	
-    while (b) {
-	RegProc* rp = (RegProc *) b;
-	if (rp->name == name) {
-	    /*
-	     * SMP NOTE: No need to lock registered entity since it cannot
-	     * be removed without acquiring write reg lock and id on entity
-	     * is read only.
-	     */
-	    if (rp->p)
-		res = rp->p->common.id;
-	    else if (rp->pt)
-		res = rp->pt->common.id;
-	    break;
-	}
-	b = b->next;
+    if (rp) {
+        if (rp->p)
+            res = rp->p->common.id;
+        else if (rp->pt)
+            res = rp->pt->common.id;
     }
 
     reg_read_unlock();
@@ -321,10 +304,7 @@ erts_whereis_name(Process *c_p,
 		  Port** port,
                   int lock_port)
 {
-    RegProc* rp = NULL;
-    HashValue hval;
-    int ix;
-    HashBucket* b;
+    RegProc* rp = NULL, tmpl;
     ErtsProcLocks current_c_p_locks;
     Port *pending_port = NULL;
 
@@ -342,21 +322,8 @@ erts_whereis_name(Process *c_p,
      * - current_c_p_locks (either c_p_locks or 0) on c_p
      */
 
-    hval = REG_HASH(name);
-    ix = hval % process_reg.size;
-    b = process_reg.bucket[ix];
-
-    /*
-     * Note: We have inlined the code from hash.c for speed.
-     */
-
-    while (b) {
-	if (((RegProc *) b)->name == name) {
-	    rp = (RegProc *) b;
-	    break;
-	}
-	b = b->next;
-    }
+    tmpl.name = name;
+    rp = hash_fetch(&process_reg, &tmpl, (H_FUN)reg_hash, (HCMP_FUN)reg_cmp);
 
     if (proc) {
 	if (!rp)
@@ -564,18 +531,6 @@ int erts_unregister_name(Process *c_p,
     return res;
 }
 
-int process_reg_size(void)
-{
-    int size;
-    int lock = !ERTS_IS_CRASH_DUMPING;
-    if (lock)
-	reg_read_lock();
-    size = process_reg.size;
-    if (lock)
-	reg_read_unlock();
-    return size;
-}
-
 int process_reg_sz(void)
 {
     int sz;
@@ -592,15 +547,24 @@ int process_reg_sz(void)
 
 #include "bif.h"
 
+struct registered_foreach_arg {
+    Eterm res;
+    Eterm *hp;
+};
+
+static void
+registered_foreach(RegProc *reg, struct registered_foreach_arg *arg)
+{
+    arg->res = CONS(arg->hp, reg->name, arg->res);
+    arg->hp += 2;
+}
+
 /* return a list of the registered processes */
 
 BIF_RETTYPE registered_0(BIF_ALIST_0)
 {
-    int i;
-    Eterm res;
+    struct registered_foreach_arg arg;
     Uint need;
-    Eterm* hp;
-    HashBucket **bucket;
     ErtsProcLocks proc_locks = ERTS_PROC_LOCK_MAIN;
 
     ERTS_CHK_HAVE_ONLY_MAIN_PROC_LOCK(BIF_P);
@@ -608,41 +572,21 @@ BIF_RETTYPE registered_0(BIF_ALIST_0)
     if (!proc_locks)
 	erts_proc_lock(BIF_P, ERTS_PROC_LOCK_MAIN);
 
-    bucket = process_reg.bucket;
-
-    /* work out how much heap we need & maybe garb, by scanning through
-       the registered process table */
-    need = 0;
-    for (i = 0; i < process_reg.size; i++) {
-	HashBucket *b = bucket[i];
-	while (b != NULL) {
-	    need += 2;
-	    b = b->next;
-	}
-    }
+    /* work out how much heap we need */
+    need = process_reg.nobjs * 2;
 
     if (need == 0) {
 	reg_read_unlock();
 	BIF_RET(NIL);
     }
 
-    hp = HAlloc(BIF_P, need);
-     
-     /* scan through again and make the list */ 
-    res = NIL;
+    /* scan through again and make the list */
+    arg.hp = HAlloc(BIF_P, need);
+    arg.res = NIL;
 
-    for (i = 0; i < process_reg.size; i++) {
-	HashBucket *b = bucket[i];
-	while (b != NULL) {
-	    RegProc *reg = (RegProc *) b;
-
-	    res = CONS(hp, reg->name, res);
-	    hp += 2;
-	    b = b->next;
-	}
-    }
+    hash_foreach(&process_reg, (HFOREACH_FUN)registered_foreach, &arg);
 
     reg_read_unlock();
 
-    BIF_RET(res);
+    BIF_RET(arg.res);
 }
diff --git a/erts/emulator/beam/register.h b/erts/emulator/beam/register.h
index 27a314ca78..c77bd03653 100644
--- a/erts/emulator/beam/register.h
+++ b/erts/emulator/beam/register.h
@@ -41,7 +41,6 @@ typedef struct reg_proc
     Eterm name;         /* Atom name */
 } RegProc;
 
-int process_reg_size(void);
 int process_reg_sz(void);
 void init_register_table(void);
 void register_info(fmtfn_t, void *);
diff --git a/erts/emulator/beam/sys.h b/erts/emulator/beam/sys.h
index c261c8e117..8a59b61b63 100644
--- a/erts/emulator/beam/sys.h
+++ b/erts/emulator/beam/sys.h
@@ -92,6 +92,12 @@
 #  define ERTS_GLB_INLINE_INCL_FUNC_DEF 0
 #endif
 
+#ifdef __GNUC__
+#  define ERTS_NOINLINE __attribute__((__noinline__))
+#else
+#  define ERTS_NOINLINE
+#endif
+
 #if defined(VALGRIND) && !defined(NO_FPE_SIGNALS)
 #  define NO_FPE_SIGNALS
 #endif
@@ -172,7 +178,8 @@ typedef ERTS_SYS_FD_TYPE ErtsSysFdType;
 #  define ERTS_UNLIKELY(BOOL) (BOOL)
 #endif
 
-#if ERTS_AT_LEAST_GCC_VSN__(2, 96, 0)
+/* AIX doesn't like this and claims section conflicts */
+#if ERTS_AT_LEAST_GCC_VSN__(2, 96, 0) && !defined(_AIX)
 #if (defined(__APPLE__) && defined(__MACH__)) || defined(__DARWIN__)
 #  define ERTS_WRITE_UNLIKELY(X) X __attribute__ ((section ("__DATA,ERTS_LOW_WRITE") ))
 #else
@@ -666,7 +673,16 @@ typedef struct preload {
  */
 typedef Eterm ErtsTracer;
 
-#include "erl_osenv.h"
+
+/*
+ * This structure contains the rb tree for the erlang osenv copy
+ * see erl_osenv.h for more details.
+ */
+typedef struct __erts_osenv_t {
+    struct __env_rbtnode_t *tree;
+    int variable_count;
+    int content_size;
+} erts_osenv_t;
 
 /*
  * This structure contains options to all built in drivers.
diff --git a/erts/emulator/beam/trace_instrs.tab b/erts/emulator/beam/trace_instrs.tab
index 3eee81c053..9f22587f96 100644
--- a/erts/emulator/beam/trace_instrs.tab
+++ b/erts/emulator/beam/trace_instrs.tab
@@ -20,16 +20,15 @@
 //
 
 return_trace() {
-    ErtsCodeMFA* mfa = (ErtsCodeMFA *)(E[0]);
+    ErtsCodeMFA* mfa = (ErtsCodeMFA *)(E[1]);
 
     SWAPOUT;		/* Needed for shared heap */
     ERTS_UNREQ_PROC_MAIN_LOCK(c_p);
-    erts_trace_return(c_p, mfa, r(0), ERTS_TRACER_FROM_ETERM(E+1)/* tracer */);
+    erts_trace_return(c_p, mfa, r(0), ERTS_TRACER_FROM_ETERM(E+2)/* tracer */);
     ERTS_REQ_PROC_MAIN_LOCK(c_p);
     SWAPIN;
-    c_p->cp = NULL;
-    SET_I((BeamInstr *) cp_val(E[2]));
     E += 3;
+    $RETURN();
     Goto(*I);
     //| -no_next
 }
@@ -45,13 +44,12 @@ i_generic_breakpoint() {
 }
 
 i_return_time_trace() {
-    BeamInstr *pc = (BeamInstr *) (UWord) E[0];
+    BeamInstr *pc = (BeamInstr *) (UWord) E[1];
     SWAPOUT;
     erts_trace_time_return(c_p, erts_code_to_codeinfo(pc));
     SWAPIN;
-    c_p->cp = NULL;
-    SET_I((BeamInstr *) cp_val(E[1]));
     E += 2;
+    $RETURN();
     Goto(*I);
     //| -no_next
 }
@@ -59,8 +57,10 @@ i_return_time_trace() {
 i_return_to_trace() {
     if (IS_TRACED_FL(c_p, F_TRACE_RETURN_TO)) {
         Uint *cpp = (Uint*) E;
+        while (is_not_CP(*cpp)) {
+            cpp++;
+        }
         for(;;) {
-            ASSERT(is_CP(*cpp));
             if (IsOpCode(*cp_val(*cpp), return_trace)) {
                 do
                     ++cpp;
@@ -80,9 +80,8 @@ i_return_to_trace() {
         ERTS_REQ_PROC_MAIN_LOCK(c_p);
         SWAPIN;
     }
-    c_p->cp = NULL;
-    SET_I((BeamInstr *) cp_val(E[0]));
     E += 1;
+    $RETURN();
     Goto(*I);
     //| -no_next
 }
diff --git a/erts/emulator/beam/utils.c b/erts/emulator/beam/utils.c
index 0bbae65e28..fb06d60768 100644
--- a/erts/emulator/beam/utils.c
+++ b/erts/emulator/beam/utils.c
@@ -66,7 +66,7 @@
 #undef M_MMAP_THRESHOLD
 #undef M_MMAP_MAX
 
-#if defined(__GLIBC__) && defined(HAVE_MALLOC_H)
+#if (defined(__GLIBC__) || defined(_AIX)) && defined(HAVE_MALLOC_H)
 #include <malloc.h>
 #endif
 
@@ -907,7 +907,7 @@ tail_recur:
 	    hash = hash * FUNNY_NUMBER10 + num_free;
 	    hash = hash*FUNNY_NUMBER1 +
 		(atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue);
-	    hash = hash*FUNNY_NUMBER2 + funp->fe->old_index;
+	    hash = hash*FUNNY_NUMBER2 + funp->fe->index;
 	    hash = hash*FUNNY_NUMBER2 + funp->fe->old_uniq;
 	    if (num_free > 0) {
 		if (num_free > 1) {
@@ -1069,54 +1069,237 @@ do {                               \
 
 #define HCONST 0x9e3779b9UL /* the golden ratio; an arbitrary value */
 
-static Uint32
-block_hash(byte *k, Uint length, Uint32 initval)
+typedef struct {
+    Uint32 a,b,c;
+} ErtsBlockHashHelperCtx;
+
+#define BLOCK_HASH_BYTES_PER_ITER 12
+
+/* The three functions below are separated into different functions even
+   though they are always used together to make trapping and handling
+   of unaligned binaries easier. Examples of how they are used can be
+   found in block_hash and make_hash2_helper.*/
+static ERTS_INLINE
+void block_hash_setup(Uint32 initval,
+                      ErtsBlockHashHelperCtx* ctx /* out parameter */)
+{
+    ctx->a = ctx->b = HCONST;
+    ctx->c = initval;           /* the previous hash value */
+}
+
+static ERTS_INLINE
+void block_hash_buffer(byte *buf,
+                       Uint buf_length,
+                       ErtsBlockHashHelperCtx* ctx /* out parameter */)
 {
-   Uint32 a,b,c;
-   Uint len;
-
-   /* Set up the internal state */
-   len = length;
-   a = b = HCONST;
-   c = initval;           /* the previous hash value */
-
-   while (len >= 12)
-   {
-      a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));
-      b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));
-      c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));
-      MIX(a,b,c);
-      k += 12; len -= 12;
-   }
-
-   c += length;
-   switch(len)              /* all the case statements fall through */
-   {
-   case 11: c+=((Uint32)k[10]<<24);
-   case 10: c+=((Uint32)k[9]<<16);
-   case 9 : c+=((Uint32)k[8]<<8);
-      /* the first byte of c is reserved for the length */
-   case 8 : b+=((Uint32)k[7]<<24);
-   case 7 : b+=((Uint32)k[6]<<16);
-   case 6 : b+=((Uint32)k[5]<<8);
-   case 5 : b+=k[4];
-   case 4 : a+=((Uint32)k[3]<<24);
-   case 3 : a+=((Uint32)k[2]<<16);
-   case 2 : a+=((Uint32)k[1]<<8);
-   case 1 : a+=k[0];
-     /* case 0: nothing left to add */
-   }
-   MIX(a,b,c);
-   return c;
+    Uint len = buf_length;
+    byte *k = buf;
+    ASSERT(buf_length % BLOCK_HASH_BYTES_PER_ITER == 0);
+    while (len >= BLOCK_HASH_BYTES_PER_ITER) {
+        ctx->a += (k[0] +((Uint32)k[1]<<8) +((Uint32)k[2]<<16) +((Uint32)k[3]<<24));
+        ctx->b += (k[4] +((Uint32)k[5]<<8) +((Uint32)k[6]<<16) +((Uint32)k[7]<<24));
+        ctx->c += (k[8] +((Uint32)k[9]<<8) +((Uint32)k[10]<<16)+((Uint32)k[11]<<24));
+        MIX(ctx->a,ctx->b,ctx->c);
+        k += BLOCK_HASH_BYTES_PER_ITER; len -= BLOCK_HASH_BYTES_PER_ITER;
+    }
 }
 
+static ERTS_INLINE
+Uint32 block_hash_final_bytes(byte *buf,
+                              Uint buf_length,
+                              Uint full_length,
+                              ErtsBlockHashHelperCtx* ctx)
+{
+    Uint len = buf_length;
+    byte *k = buf;
+    ctx->c += full_length;
+    switch(len)
+    { /* all the case statements fall through */      
+    case 11: ctx->c+=((Uint32)k[10]<<24);
+    case 10: ctx->c+=((Uint32)k[9]<<16);
+    case 9 : ctx->c+=((Uint32)k[8]<<8);
+    /* the first byte of c is reserved for the length */
+    case 8 : ctx->b+=((Uint32)k[7]<<24);
+    case 7 : ctx->b+=((Uint32)k[6]<<16);
+    case 6 : ctx->b+=((Uint32)k[5]<<8);
+    case 5 : ctx->b+=k[4];
+    case 4 : ctx->a+=((Uint32)k[3]<<24);
+    case 3 : ctx->a+=((Uint32)k[2]<<16);
+    case 2 : ctx->a+=((Uint32)k[1]<<8);
+    case 1 : ctx->a+=k[0];
+    /* case 0: nothing left to add */
+    }
+    MIX(ctx->a,ctx->b,ctx->c);
+    return ctx->c;
+}
+
+static
 Uint32
-make_hash2(Eterm term)
+block_hash(byte *block, Uint block_length, Uint32 initval)
 {
+    ErtsBlockHashHelperCtx ctx;
+    Uint no_bytes_not_in_loop =
+        (block_length % BLOCK_HASH_BYTES_PER_ITER);
+    Uint no_bytes_to_process_in_loop =
+        block_length - no_bytes_not_in_loop;
+    byte *final_bytes = block + no_bytes_to_process_in_loop;
+    block_hash_setup(initval, &ctx);
+    block_hash_buffer(block,
+                      no_bytes_to_process_in_loop,
+                      &ctx);
+    return block_hash_final_bytes(final_bytes,
+                                  no_bytes_not_in_loop,
+                                  block_length,
+                                  &ctx);
+}
+
+typedef enum {
+    tag_primary_list,
+    arityval_subtag,
+    hamt_subtag_head_flatmap,
+    map_subtag,
+    fun_subtag,
+    neg_big_subtag,
+    sub_binary_subtag_1,
+    sub_binary_subtag_2,
+    hash2_common_1,
+    hash2_common_2,
+    hash2_common_3,
+} ErtsMakeHash2TrapLocation; 
+
+typedef struct {
+    int c;
+    Uint32 sh;
+    Eterm* ptr;
+} ErtsMakeHash2Context_TAG_PRIMARY_LIST;
+
+typedef struct {
+    int i;
+    int arity;
+    Eterm* elem;
+} ErtsMakeHash2Context_ARITYVAL_SUBTAG;
+
+typedef struct {
+    Eterm *ks;
+    Eterm *vs;
+    int i;
+    Uint size;
+} ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP;
+
+typedef struct {
+    Eterm* ptr;
+    int i;
+} ErtsMakeHash2Context_MAP_SUBTAG;
+
+typedef struct {
+    Uint num_free;
+    Eterm* bptr;
+} ErtsMakeHash2Context_FUN_SUBTAG;
+
+typedef struct {
+    Eterm* ptr;
+    Uint i;
+    Uint n;
+    Uint32 con;
+} ErtsMakeHash2Context_NEG_BIG_SUBTAG;
+
+typedef struct {
+    byte* bptr;
+    Uint sz;
+    Uint bitsize;
+    Uint bitoffs;
+    Uint no_bytes_processed;
+    ErtsBlockHashHelperCtx block_hash_ctx;
+    /* The following fields are only used when bitoffs != 0 */
+    byte* buf;
+    int done;
+
+} ErtsMakeHash2Context_SUB_BINARY_SUBTAG;
+
+typedef struct {
+    int dummy__; /* Empty structs are not supported on all platforms */
+} ErtsMakeHash2Context_EMPTY;
+
+typedef struct {
+    ErtsMakeHash2TrapLocation trap_location;
+    /* specific to the trap location: */
+    union {
+        ErtsMakeHash2Context_TAG_PRIMARY_LIST tag_primary_list;
+        ErtsMakeHash2Context_ARITYVAL_SUBTAG arityval_subtag;
+        ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP hamt_subtag_head_flatmap;
+        ErtsMakeHash2Context_MAP_SUBTAG map_subtag;
+        ErtsMakeHash2Context_FUN_SUBTAG fun_subtag;
+        ErtsMakeHash2Context_NEG_BIG_SUBTAG neg_big_subtag;
+        ErtsMakeHash2Context_SUB_BINARY_SUBTAG sub_binary_subtag_1;
+        ErtsMakeHash2Context_SUB_BINARY_SUBTAG sub_binary_subtag_2;
+        ErtsMakeHash2Context_EMPTY hash2_common_1;
+        ErtsMakeHash2Context_EMPTY hash2_common_2;
+        ErtsMakeHash2Context_EMPTY hash2_common_3;
+    } trap_location_state;
+    /* same for all trap locations: */
+    Eterm term; 
     Uint32 hash;
     Uint32 hash_xor_pairs;
-    DeclareTmpHeapNoproc(tmp_big,2);
+    ErtsEStack stack;
+} ErtsMakeHash2Context;
+
+static int make_hash2_ctx_bin_dtor(Binary *context_bin) {
+    ErtsMakeHash2Context* context = ERTS_MAGIC_BIN_DATA(context_bin);
+    DESTROY_SAVED_ESTACK(&context->stack);
+    if (context->trap_location == sub_binary_subtag_2 &&
+        context->trap_location_state.sub_binary_subtag_2.buf != NULL) {
+        erts_free(ERTS_ALC_T_PHASH2_TRAP, context->trap_location_state.sub_binary_subtag_2.buf);
+    }
+    return 1;
+}
 
+/* hash2_save_trap_state is called seldom so we want to avoid inlining */
+static ERTS_NOINLINE
+Eterm hash2_save_trap_state(Eterm state_mref,
+                            Uint32 hash_xor_pairs,
+                            Uint32 hash,
+                            Process* p,
+                            Eterm term,
+                            Eterm* ESTK_DEF_STACK(s),
+                            ErtsEStack s,
+                            ErtsMakeHash2TrapLocation trap_location,
+                            void* trap_location_state_ptr,
+                            size_t trap_location_state_size) {
+    Binary* state_bin;
+    ErtsMakeHash2Context* context;
+    if (state_mref == THE_NON_VALUE) {
+        Eterm* hp;
+        state_bin = erts_create_magic_binary(sizeof(ErtsMakeHash2Context),
+                                             make_hash2_ctx_bin_dtor);
+        hp = HAlloc(p, ERTS_MAGIC_REF_THING_SIZE);
+        state_mref = erts_mk_magic_ref(&hp, &MSO(p), state_bin);
+    } else {
+        state_bin = erts_magic_ref2bin(state_mref);
+    }
+    context = ERTS_MAGIC_BIN_DATA(state_bin);
+    context->term = term;
+    context->hash = hash;
+    context->hash_xor_pairs = hash_xor_pairs;
+    ESTACK_SAVE(s, &context->stack);
+    context->trap_location = trap_location;
+    sys_memcpy(&context->trap_location_state,
+               trap_location_state_ptr,
+               trap_location_state_size);
+    erts_set_gc_state(p, 0);
+    BUMP_ALL_REDS(p);
+    return state_mref;
+}
+#undef NOINLINE_HASH2_SAVE_TRAP_STATE
+
+/* Writes back a magic reference to *state_mref_write_back when the
+   function traps */
+static ERTS_INLINE Uint32
+make_hash2_helper(Eterm term_param, const int can_trap, Eterm* state_mref_write_back, Process* p)
+{
+    static const Uint ITERATIONS_PER_RED = 64;
+    Uint32 hash;
+    Uint32 hash_xor_pairs;
+    Eterm term = term_param;
     ERTS_UNDEF(hash_xor_pairs, 0);
 
 /* (HCONST * {2, ..., 22}) mod 2^32 */
@@ -1168,12 +1351,63 @@ make_hash2(Eterm term)
 
 #define IS_SSMALL28(x) (((Uint) (((x) >> (28-1)) + 1)) < 2)
 
+#define NOT_SSMALL28_HASH(SMALL)                          \
+    do {                                                  \
+        Uint64 t;                                         \
+        Uint32 x, y;                                      \
+        Uint32 con;                                       \
+        if (SMALL < 0) {                                  \
+            con = HCONST_10;                              \
+            t = (Uint64)(SMALL * (-1));                   \
+        } else {                                          \
+            con = HCONST_11;                              \
+            t = SMALL;                                    \
+        }                                                 \
+        x = t & 0xffffffff;                               \
+        y = t >> 32;                                      \
+        UINT32_HASH_2(x, y, con);                         \
+    } while(0)
+    
 #ifdef ARCH_64
 #  define POINTER_HASH(Ptr, AConst) UINT32_HASH_2((Uint32)(UWord)(Ptr), (((UWord)(Ptr)) >> 32), AConst)
 #else
 #  define POINTER_HASH(Ptr, AConst) UINT32_HASH(Ptr, AConst)
 #endif
 
+#define TRAP_LOCATION_NO_RED(location_name)                             \
+    do {                                                                \
+        if(can_trap && iterations_until_trap <= 0) {                    \
+                *state_mref_write_back  =                               \
+                    hash2_save_trap_state(state_mref,                   \
+                                          hash_xor_pairs,               \
+                                          hash,                         \
+                                          p,                            \
+                                          term,                         \
+                                          ESTK_DEF_STACK(s),            \
+                                          s,                            \
+                                          location_name,                \
+                                          &ctx,                         \
+                                          sizeof(ctx));                 \
+                return 0;                                               \
+            L_##location_name:                                          \
+                ctx = context->trap_location_state. location_name;      \
+        }                                                               \
+    } while(0)
+
+#define TRAP_LOCATION(location_name)                            \
+    do {                                                        \
+        if (can_trap) {                                         \
+            iterations_until_trap--;                            \
+            TRAP_LOCATION_NO_RED(location_name);                \
+        }                                                       \
+    } while(0)
+
+#define TRAP_LOCATION_NO_CTX(location_name)                             \
+    do {                                                                \
+        ErtsMakeHash2Context_EMPTY ctx;                                 \
+        TRAP_LOCATION(location_name);                                   \
+    } while(0)
+    
     /* Optimization. Simple cases before declaration of estack. */
     if (primary_tag(term) == TAG_PRIMARY_IMMED1) {
 	switch (term & _TAG_IMMED1_MASK) {
@@ -1186,51 +1420,94 @@ make_hash2(Eterm term)
 	    break;
 	case _TAG_IMMED1_SMALL:
 	  {
-	      Sint x = signed_val(term);
-
-	      if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
-		  term = small_to_big(x, tmp_big);
-		  break;
+	      Sint small = signed_val(term);
+	      if (SMALL_BITS > 28 && !IS_SSMALL28(small)) {
+                  hash = 0;
+                  NOT_SSMALL28_HASH(small);
+                  return hash;
 	      }
 	      hash = 0;
-	      SINT32_HASH(x, HCONST);
+	      SINT32_HASH(small, HCONST);
 	      return hash;
 	  }
 	}
     };
     {
     Eterm tmp;
+    long max_iterations = 0;
+    long iterations_until_trap = 0;
+    Eterm state_mref = THE_NON_VALUE;
+    ErtsMakeHash2Context* context = NULL;
     DECLARE_ESTACK(s);
-
-    UseTmpHeapNoproc(2);
+    ESTACK_CHANGE_ALLOCATOR(s, ERTS_ALC_T_SAVED_ESTACK);
+    if(can_trap){
+#ifdef DEBUG
+        (void)ITERATIONS_PER_RED;
+        iterations_until_trap = max_iterations =
+            (1103515245 * (ERTS_BIF_REDS_LEFT(p)) + 12345)  % 227;
+#else
+        iterations_until_trap = max_iterations =
+            ITERATIONS_PER_RED * ERTS_BIF_REDS_LEFT(p);
+#endif
+    }
+    if (can_trap && is_internal_magic_ref(term)) {
+        Binary* state_bin;
+        state_mref = term;
+        state_bin = erts_magic_ref2bin(state_mref);
+        if (ERTS_MAGIC_BIN_DESTRUCTOR(state_bin) == make_hash2_ctx_bin_dtor) {
+            /* Restore state after a trap */
+            context = ERTS_MAGIC_BIN_DATA(state_bin);
+            term = context->term;
+            hash = context->hash;
+            hash_xor_pairs = context->hash_xor_pairs;
+            ESTACK_RESTORE(s, &context->stack);
+            ASSERT(p->flags & F_DISABLE_GC);
+            erts_set_gc_state(p, 1);
+            switch (context->trap_location) {
+            case hash2_common_3:           goto L_hash2_common_3;
+            case tag_primary_list:         goto L_tag_primary_list;
+            case arityval_subtag:          goto L_arityval_subtag;
+            case hamt_subtag_head_flatmap: goto L_hamt_subtag_head_flatmap;
+            case map_subtag:               goto L_map_subtag;
+            case fun_subtag:               goto L_fun_subtag;
+            case neg_big_subtag:           goto L_neg_big_subtag;
+            case sub_binary_subtag_1:      goto L_sub_binary_subtag_1;
+            case sub_binary_subtag_2:      goto L_sub_binary_subtag_2;
+            case hash2_common_1:           goto L_hash2_common_1;
+            case hash2_common_2:           goto L_hash2_common_2;
+            }
+        }
+    }
     hash = 0;
     for (;;) {
 	switch (primary_tag(term)) {
 	case TAG_PRIMARY_LIST:
 	{
-	    int c = 0;
-	    Uint32 sh = 0;
-	    Eterm* ptr = list_val(term);
-	    while (is_byte(*ptr)) {
+            ErtsMakeHash2Context_TAG_PRIMARY_LIST ctx = {
+                .c =  0,
+                .sh = 0,
+                .ptr = list_val(term)};
+	    while (is_byte(*ctx.ptr)) {
 		/* Optimization for strings. */
-		sh = (sh << 8) + unsigned_val(*ptr);
-		if (c == 3) {
-		    UINT32_HASH(sh, HCONST_4);
-		    c = sh = 0;
+		ctx.sh = (ctx.sh << 8) + unsigned_val(*ctx.ptr);
+		if (ctx.c == 3) {
+		    UINT32_HASH(ctx.sh, HCONST_4);
+		    ctx.c = ctx.sh = 0;
 		} else {
-		    c++;
+		    ctx.c++;
 		}
-		term = CDR(ptr);
+		term = CDR(ctx.ptr);
 		if (is_not_list(term))
 		    break;
-		ptr = list_val(term);
+		ctx.ptr = list_val(term);
+                TRAP_LOCATION(tag_primary_list);
 	    }
-	    if (c > 0)
-		UINT32_HASH(sh, HCONST_4);
+	    if (ctx.c > 0)
+		UINT32_HASH(ctx.sh, HCONST_4);
 	    if (is_list(term)) {
-		tmp = CDR(ptr);
+		tmp = CDR(ctx.ptr);
                 ESTACK_PUSH(s, tmp);
-		term = CAR(ptr);
+		term = CAR(ctx.ptr);
 	    }
 	}
 	break;
@@ -1241,34 +1518,39 @@ make_hash2(Eterm term)
 	    switch (hdr & _TAG_HEADER_MASK) {
 	    case ARITYVAL_SUBTAG:
 	    {
-		int i;
-		int arity = header_arity(hdr);
-		Eterm* elem = tuple_val(term);
-		UINT32_HASH(arity, HCONST_9);
-		if (arity == 0) /* Empty tuple */
+                ErtsMakeHash2Context_ARITYVAL_SUBTAG ctx = {
+                    .i =  0,
+                    .arity = header_arity(hdr),
+                    .elem = tuple_val(term)};
+		UINT32_HASH(ctx.arity, HCONST_9);
+		if (ctx.arity == 0) /* Empty tuple */
 		    goto hash2_common;
-		for (i = arity; ; i--) {
-		    term = elem[i];
-                    if (i == 1)
+		for (ctx.i = ctx.arity; ; ctx.i--) {
+		    term = ctx.elem[ctx.i];
+                    if (ctx.i == 1)
                         break;
                     ESTACK_PUSH(s, term);
+                    TRAP_LOCATION(arityval_subtag);
 		}
 	    }
 	    break;
             case MAP_SUBTAG:
             {
-                Eterm* ptr = boxed_val(term) + 1;
                 Uint size;
-                int i;
+                ErtsMakeHash2Context_MAP_SUBTAG ctx = {
+                    .ptr = boxed_val(term) + 1,
+                    .i = 0};
                 switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
                 case HAMT_SUBTAG_HEAD_FLATMAP:
                 {
                     flatmap_t *mp = (flatmap_t *)flatmap_val(term);
-                    Eterm *ks = flatmap_get_keys(mp);
-                    Eterm *vs = flatmap_get_values(mp);
-                    size      = flatmap_get_size(mp);
-                    UINT32_HASH(size, HCONST_16);
-                    if (size == 0)
+                    ErtsMakeHash2Context_HAMT_SUBTAG_HEAD_FLATMAP ctx = {
+                        .ks = flatmap_get_keys(mp),
+                        .vs = flatmap_get_values(mp),
+                        .i = 0,
+                        .size = flatmap_get_size(mp)};
+                    UINT32_HASH(ctx.size, HCONST_16);
+                    if (ctx.size == 0)
                         goto hash2_common;
 
                     /* We want a portable hash function that is *independent* of
@@ -1281,17 +1563,18 @@ make_hash2(Eterm term)
                     ESTACK_PUSH(s, HASH_MAP_TAIL);
                     hash = 0;
                     hash_xor_pairs = 0;
-                    for (i = size - 1; i >= 0; i--) {
+                    for (ctx.i = ctx.size - 1; ctx.i >= 0; ctx.i--) {
                         ESTACK_PUSH(s, HASH_MAP_PAIR);
-                        ESTACK_PUSH(s, vs[i]);
-                        ESTACK_PUSH(s, ks[i]);
+                        ESTACK_PUSH(s, ctx.vs[ctx.i]);
+                        ESTACK_PUSH(s, ctx.ks[ctx.i]);
+                        TRAP_LOCATION(hamt_subtag_head_flatmap);
                     }
                     goto hash2_common;
                 }
 
                 case HAMT_SUBTAG_HEAD_ARRAY:
                 case HAMT_SUBTAG_HEAD_BITMAP:
-                    size = *ptr++;
+                    size = *ctx.ptr++;
                     UINT32_HASH(size, HCONST_16);
                     if (size == 0)
                         goto hash2_common;
@@ -1303,27 +1586,28 @@ make_hash2(Eterm term)
                 }
                 switch (hdr & _HEADER_MAP_SUBTAG_MASK) {
                 case HAMT_SUBTAG_HEAD_ARRAY:
-                    i = 16;
+                    ctx.i = 16;
                     break;
                 case HAMT_SUBTAG_HEAD_BITMAP:
                 case HAMT_SUBTAG_NODE_BITMAP:
-                    i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
+                    ctx.i = hashmap_bitcount(MAP_HEADER_VAL(hdr));
                     break;
                 default:
                     erts_exit(ERTS_ERROR_EXIT, "bad header");
                 }
-                while (i) {
-                    if (is_list(*ptr)) {
-                        Eterm* cons = list_val(*ptr);
+                while (ctx.i) {
+                    if (is_list(*ctx.ptr)) {
+                        Eterm* cons = list_val(*ctx.ptr);
                         ESTACK_PUSH(s, HASH_MAP_PAIR);
                         ESTACK_PUSH(s, CDR(cons));
                         ESTACK_PUSH(s, CAR(cons));
                     }
                     else {
-                        ASSERT(is_boxed(*ptr));
-                        ESTACK_PUSH(s, *ptr);
+                        ASSERT(is_boxed(*ctx.ptr));
+                        ESTACK_PUSH(s, *ctx.ptr);
                     }
-                    i--; ptr++;
+                    ctx.i--; ctx.ptr++;
+                    TRAP_LOCATION(map_subtag);
                 }
                 goto hash2_common;
             }
@@ -1344,22 +1628,25 @@ make_hash2(Eterm term)
 	    case FUN_SUBTAG:
 	    {
 		ErlFunThing* funp = (ErlFunThing *) fun_val(term);
-		Uint num_free = funp->num_free;
+                ErtsMakeHash2Context_FUN_SUBTAG ctx = {
+                    .num_free = funp->num_free,
+                    .bptr = NULL};
 		UINT32_HASH_2
-		    (num_free,
+		    (ctx.num_free,
 		     atom_tab(atom_val(funp->fe->module))->slot.bucket.hvalue,
 		     HCONST);
 		UINT32_HASH_2
-		    (funp->fe->old_index, funp->fe->old_uniq, HCONST);
-		if (num_free == 0) {
+		    (funp->fe->index, funp->fe->old_uniq, HCONST);
+		if (ctx.num_free == 0) {
 		    goto hash2_common;
 		} else {
-		    Eterm* bptr = funp->env + num_free - 1;
-		    while (num_free-- > 1) {
-			term = *bptr--;
+		    ctx.bptr = funp->env + ctx.num_free - 1;
+		    while (ctx.num_free-- > 1) {
+			term = *ctx.bptr--;
 			ESTACK_PUSH(s, term);
+                        TRAP_LOCATION(fun_subtag);
 		    }
-		    term = *bptr;
+		    term = *ctx.bptr;
 		}
 	    }
 	    break;
@@ -1367,70 +1654,190 @@ make_hash2(Eterm term)
 	    case HEAP_BINARY_SUBTAG:
 	    case SUB_BINARY_SUBTAG:
 	    {
-		byte* bptr;
-		unsigned sz = binary_size(term);
+#define BYTE_BITS 8
+                ErtsMakeHash2Context_SUB_BINARY_SUBTAG ctx = {
+                    .bptr = 0,
+                    /* !!!!!!!!!!!!!!!!!!!! OBS !!!!!!!!!!!!!!!!!!!!
+                     *
+                     * The size is truncated to 32 bits on the line
+                     * below so that the code is compatible with old
+                     * versions of the code. This means that hash
+                     * values for binaries with a size greater than
+                     * 4GB do not take all bytes in consideration.
+                     *
+                     * !!!!!!!!!!!!!!!!!!!! OBS !!!!!!!!!!!!!!!!!!!!
+                     */ 
+                    .sz = (0xFFFFFFFF & binary_size(term)),
+                    .bitsize = 0,
+                    .bitoffs = 0,
+                    .no_bytes_processed = 0
+                };
 		Uint32 con = HCONST_13 + hash;
-		Uint bitoffs;
-		Uint bitsize;
-
-		ERTS_GET_BINARY_BYTES(term, bptr, bitoffs, bitsize);
-		if (sz == 0 && bitsize == 0) {
+                Uint iters_for_bin = MAX(1, ctx.sz / BLOCK_HASH_BYTES_PER_ITER);
+		ERTS_GET_BINARY_BYTES(term, ctx.bptr, ctx.bitoffs, ctx.bitsize);
+		if (ctx.sz == 0 && ctx.bitsize == 0) {
 		    hash = con;
-		} else {
-		    if (bitoffs == 0) {
-			hash = block_hash(bptr, sz, con);
-			if (bitsize > 0) {
-			    UINT32_HASH_2(bitsize, (bptr[sz] >> (8 - bitsize)),
-					  HCONST_15);
-			}
-		    } else {
-			byte* buf = (byte *) erts_alloc(ERTS_ALC_T_TMP,
-							sz + (bitsize != 0));
-			erts_copy_bits(bptr, bitoffs, 1, buf, 0, 1, sz*8+bitsize);
-			hash = block_hash(buf, sz, con);
-			if (bitsize > 0) {
-			    UINT32_HASH_2(bitsize, (buf[sz] >> (8 - bitsize)),
-					  HCONST_15);
-			}
-			erts_free(ERTS_ALC_T_TMP, (void *) buf);
-		    }
+		} else if (ctx.bitoffs == 0 &&
+                           (!can_trap ||
+                            (iterations_until_trap - iters_for_bin) > 0)) {
+                    /* No need to trap while hashing binary */
+                    if (can_trap) iterations_until_trap -= iters_for_bin;
+                    hash = block_hash(ctx.bptr, ctx.sz, con);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.bptr[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                } else if (ctx.bitoffs == 0) {
+                    /* Need to trap while hashing binary */
+                    ErtsBlockHashHelperCtx* block_hash_ctx = &ctx.block_hash_ctx;
+                    block_hash_setup(con, block_hash_ctx);
+                    do {
+                        Uint max_bytes_to_process =
+                            iterations_until_trap <= 0 ? BLOCK_HASH_BYTES_PER_ITER :
+                            iterations_until_trap * BLOCK_HASH_BYTES_PER_ITER;
+                        Uint bytes_left = ctx.sz - ctx.no_bytes_processed;
+                        Uint even_bytes_left =
+                            bytes_left - (bytes_left % BLOCK_HASH_BYTES_PER_ITER);
+                        Uint bytes_to_process =
+                            MIN(max_bytes_to_process, even_bytes_left);
+                        block_hash_buffer(&ctx.bptr[ctx.no_bytes_processed],
+                                          bytes_to_process,
+                                          block_hash_ctx);
+                        ctx.no_bytes_processed += bytes_to_process;
+                        iterations_until_trap -=
+                            MAX(1, bytes_to_process / BLOCK_HASH_BYTES_PER_ITER);
+                        TRAP_LOCATION_NO_RED(sub_binary_subtag_1);
+                        block_hash_ctx = &ctx.block_hash_ctx; /* Restore after trap */
+                    } while ((ctx.sz - ctx.no_bytes_processed) >=
+                             BLOCK_HASH_BYTES_PER_ITER);
+                    hash = block_hash_final_bytes(ctx.bptr +
+                                                  ctx.no_bytes_processed,
+                                                  ctx.sz - ctx.no_bytes_processed,
+                                                  ctx.sz,
+                                                  block_hash_ctx);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.bptr[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                } else if (/* ctx.bitoffs != 0 && */
+                           (!can_trap ||
+                            (iterations_until_trap - iters_for_bin) > 0)) {
+                    /* No need to trap while hashing binary */
+                    Uint nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    byte *buf = erts_alloc(ERTS_ALC_T_TMP, nr_of_bytes);
+                    Uint nr_of_bits_to_copy = ctx.sz*BYTE_BITS+ctx.bitsize;
+                    if (can_trap) iterations_until_trap -= iters_for_bin;
+                    erts_copy_bits(ctx.bptr,
+                                   ctx.bitoffs, 1, buf, 0, 1, nr_of_bits_to_copy);
+                    hash = block_hash(buf, ctx.sz, con);
+                    if (ctx.bitsize > 0) {
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (buf[ctx.sz] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                    erts_free(ERTS_ALC_T_TMP, buf);
+                } else /* ctx.bitoffs != 0 && */ {
+#ifdef DEBUG
+#define BINARY_BUF_SIZE (BLOCK_HASH_BYTES_PER_ITER * 3)
+#else
+#define BINARY_BUF_SIZE (BLOCK_HASH_BYTES_PER_ITER * 256)
+#endif
+#define BINARY_BUF_SIZE_BITS (BINARY_BUF_SIZE*BYTE_BITS)
+                    /* Need to trap while hashing binary */
+                    ErtsBlockHashHelperCtx* block_hash_ctx = &ctx.block_hash_ctx;
+                    Uint nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    ERTS_CT_ASSERT(BINARY_BUF_SIZE % BLOCK_HASH_BYTES_PER_ITER == 0);
+                    ctx.buf = erts_alloc(ERTS_ALC_T_PHASH2_TRAP,
+                                         MIN(nr_of_bytes, BINARY_BUF_SIZE));
+                    block_hash_setup(con, block_hash_ctx);
+                    do {
+                        Uint bytes_left =
+                            ctx.sz - ctx.no_bytes_processed;
+                        Uint even_bytes_left =
+                            bytes_left - (bytes_left % BLOCK_HASH_BYTES_PER_ITER);
+                        Uint bytes_to_process =
+                            MIN(BINARY_BUF_SIZE, even_bytes_left);
+                        Uint nr_of_bits_left =
+                            (ctx.sz*BYTE_BITS+ctx.bitsize) -
+                            ctx.no_bytes_processed*BYTE_BITS; 
+                        Uint nr_of_bits_to_copy =
+                            MIN(nr_of_bits_left, BINARY_BUF_SIZE_BITS);
+                        ctx.done = nr_of_bits_left == nr_of_bits_to_copy;
+                        erts_copy_bits(ctx.bptr + ctx.no_bytes_processed,
+                                       ctx.bitoffs, 1, ctx.buf, 0, 1,
+                                       nr_of_bits_to_copy);
+                        block_hash_buffer(ctx.buf,
+                                          bytes_to_process,
+                                          block_hash_ctx);
+                        ctx.no_bytes_processed += bytes_to_process;
+                        iterations_until_trap -=
+                            MAX(1, bytes_to_process / BLOCK_HASH_BYTES_PER_ITER);
+                        TRAP_LOCATION_NO_RED(sub_binary_subtag_2);
+                        block_hash_ctx = &ctx.block_hash_ctx; /* Restore after trap */
+                    } while (!ctx.done);
+                    nr_of_bytes = ctx.sz + (ctx.bitsize != 0);
+                    hash = block_hash_final_bytes(ctx.buf +
+                                                  (ctx.no_bytes_processed -
+                                                   ((nr_of_bytes-1) / BINARY_BUF_SIZE) *  BINARY_BUF_SIZE),
+                                                  ctx.sz - ctx.no_bytes_processed,
+                                                  ctx.sz,
+                                                  block_hash_ctx);
+                    if (ctx.bitsize > 0) {
+                        Uint last_byte_index =
+                            nr_of_bytes - (((nr_of_bytes-1) / BINARY_BUF_SIZE) *  BINARY_BUF_SIZE) -1;
+                        UINT32_HASH_2(ctx.bitsize,
+                                      (ctx.buf[last_byte_index] >> (BYTE_BITS - ctx.bitsize)),
+                                      HCONST_15);
+                    }
+                    erts_free(ERTS_ALC_T_PHASH2_TRAP, ctx.buf);
+                    context->trap_location_state.sub_binary_subtag_2.buf = NULL;
 		}
 		goto hash2_common;
+#undef BYTE_BITS
+#undef BINARY_BUF_SIZE
+#undef BINARY_BUF_SIZE_BITS
 	    }
 	    break;
 	    case POS_BIG_SUBTAG:
 	    case NEG_BIG_SUBTAG:
 	    {
-		Eterm* ptr = big_val(term);
-		Uint i = 0;
-		Uint n = BIG_SIZE(ptr);
-		Uint32 con = BIG_SIGN(ptr) ? HCONST_10 : HCONST_11;
+		Eterm* big_val_ptr = big_val(term);
+                ErtsMakeHash2Context_NEG_BIG_SUBTAG ctx = {
+                    .ptr = big_val_ptr,
+                    .i = 0,
+                    .n = BIG_SIZE(big_val_ptr),
+                    .con = BIG_SIGN(big_val_ptr) ? HCONST_10 : HCONST_11};
 #if D_EXP == 16
 		do {
 		    Uint32 x, y;
-		    x = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    x += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
-		    y = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    y += (Uint32)(i < n ? BIG_DIGIT(ptr, i++) : 0) << 16;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    x = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    x += (Uint32)(ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0) << 16;
+		    y = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    y += (Uint32)(ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0) << 16;
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #elif D_EXP == 32
 		do {
 		    Uint32 x, y;
-		    x = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    y = i < n ? BIG_DIGIT(ptr, i++) : 0;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    x = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    y = ctx.i < ctx.n ? BIG_DIGIT(ctx.ptr, ctx.i++) : 0;
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #elif D_EXP == 64
 		do {
 		    Uint t;
 		    Uint32 x, y;
-                    ASSERT(i < n);
-		    t = BIG_DIGIT(ptr, i++);
+                    ASSERT(ctx.i < ctx.n);
+		    t = BIG_DIGIT(ctx.ptr, ctx.i++);
 		    x = t & 0xffffffff;
 		    y = t >> 32;
-		    UINT32_HASH_2(x, y, con);
-		} while (i < n);
+		    UINT32_HASH_2(x, y, ctx.con);
+                    TRAP_LOCATION(neg_big_subtag);
+		} while (ctx.i < ctx.n);
 #else
 #error "unsupported D_EXP size"
 #endif
@@ -1508,13 +1915,13 @@ make_hash2(Eterm term)
 		}
 	    case _TAG_IMMED1_SMALL:
 	      {
-		  Sint x = signed_val(term);
+		  Sint small = signed_val(term);
+		  if (SMALL_BITS > 28 && !IS_SSMALL28(small)) {
+                      NOT_SSMALL28_HASH(small);
+		  } else {
+		      SINT32_HASH(small, HCONST);
+                  }
 
-		  if (SMALL_BITS > 28 && !IS_SSMALL28(x)) {
-		      term = small_to_big(x, tmp_big);
-		      break;
-		  }
-		  SINT32_HASH(x, HCONST);
 		  goto hash2_common;
 	      }
 	    }
@@ -1529,7 +1936,10 @@ make_hash2(Eterm term)
 
 	    if (ESTACK_ISEMPTY(s)) {
 		DESTROY_ESTACK(s);
-		UnUseTmpHeapNoproc(2);
+                if (can_trap) {
+                    BUMP_REDS(p, (max_iterations - iterations_until_trap) / ITERATIONS_PER_RED);
+                    ASSERT(!(p->flags & F_DISABLE_GC));
+                }
 		return hash;
 	    }
 
@@ -1540,18 +1950,37 @@ make_hash2(Eterm term)
 		    hash = (Uint32) ESTACK_POP(s);
                     UINT32_HASH(hash_xor_pairs, HCONST_19);
 		    hash_xor_pairs = (Uint32) ESTACK_POP(s);
+                    TRAP_LOCATION_NO_CTX(hash2_common_1);
 		    goto hash2_common;
 		}
 		case HASH_MAP_PAIR:
 		    hash_xor_pairs ^= hash;
                     hash = 0;
+                    TRAP_LOCATION_NO_CTX(hash2_common_2);
 		    goto hash2_common;
 		default:
 		    break;
 	    }
+
 	}
+        TRAP_LOCATION_NO_CTX(hash2_common_3);
     }
     }
+#undef TRAP_LOCATION_NO_RED
+#undef TRAP_LOCATION
+#undef TRAP_LOCATION_NO_CTX
+}
+
+Uint32
+make_hash2(Eterm term)
+{
+    return make_hash2_helper(term, 0, NULL, NULL);
+}
+
+Uint32
+trapping_make_hash2(Eterm term, Eterm* state_mref_write_back, Process* p)
+{
+    return make_hash2_helper(term, 1, state_mref_write_back, p);
 }
 
 /* Term hash function for internal use.
@@ -1731,7 +2160,7 @@ make_internal_hash(Eterm term, Uint32 salt)
 		ErlFunThing* funp = (ErlFunThing *) fun_val(term);
 		Uint num_free = funp->num_free;
                 UINT32_HASH_2(num_free, funp->fe->module, HCONST_20);
-                UINT32_HASH_2(funp->fe->old_index, funp->fe->old_uniq, HCONST_21);
+                UINT32_HASH_2(funp->fe->index, funp->fe->old_uniq, HCONST_21);
 		if (num_free == 0) {
 		    goto pop_next;
 		} else {
@@ -2381,7 +2810,7 @@ tailrecur_ne:
 		    f1 = (ErlFunThing *) fun_val(a);
 		    f2 = (ErlFunThing *) fun_val(b);
 		    if (f1->fe->module != f2->fe->module ||
-			f1->fe->old_index != f2->fe->old_index ||
+			f1->fe->index != f2->fe->index ||
 			f1->fe->old_uniq != f2->fe->old_uniq ||
 			f1->num_free != f2->num_free) {
 			goto not_equal;
@@ -2976,7 +3405,7 @@ tailrecur_ne:
 		    if (diff != 0) {
 			RETURN_NEQ(diff);
 		    }
-		    diff = f1->fe->old_index - f2->fe->old_index;
+		    diff = f1->fe->index - f2->fe->index;
 		    if (diff != 0) {
 			RETURN_NEQ(diff);
 		    }