Merge branch 'bjorn/beam-loader/OTP-9030' into dev

* bjorn/beam-loader/OTP-9030: (43 commits) c: Reduce memory footprint erl_posix_msg: Reduce memory footprint Introduce a few more variations of the move instructions Combine a move + jump sequence into the move_jump instruction Optimize and clean-up the exact equality/non-equality instructions Optimize addition of a small integer to a variable Introduce a special instruction for select_val with two values Introduce a few more specialized put_list instructions Eliminate the "put_list c n Dst" instructions Eliminate the specific move_sd instruction Eliminate use of GetArg1() in the badmatch and case_end instructions Eliminate use of GetArg2() in the i_element instruction Eliminate use of GetArg1() in the fast_element instruction Eliminate use of GetArg1() in the jump_on_val* instructions Eliminate use of GetArg1() in the select_val instruction beam_emu: Eliminate sloppy use of tmp_arg1 and tmp_arg2 beam_emu: Don't inline helper functions into process_main() beam_emu: Clean up calling of the error_handler module Simplify a select_val instruction that selects only one value Optimize creation of tuples ...
author: Björn Gustavsson <bjorn@erlang.org> 2011-01-17 15:29:48 +0100
committer: Björn Gustavsson <bjorn@erlang.org> 2011-01-17 15:29:48 +0100
commit: faef041a446314bb228e0e8c88a09241df2798f1 (patch)
tree: 728f73a167f50a46b54617a6cc054807064236e9 /erts
parent: 7bc25db7f9a70190a661b2a97734900893d33169 (diff)
parent: 3f1fce3929cc0cc68d7e5b1ce543bd3f20a31e2b (diff)
download: otp-faef041a446314bb228e0e8c88a09241df2798f1.tar.gz
otp-faef041a446314bb228e0e8c88a09241df2798f1.tar.bz2
otp-faef041a446314bb228e0e8c88a09241df2798f1.zip
13 files changed, 1694 insertions, 862 deletions
diff --git a/erts/configure.in b/erts/configure.in
index 6e983a07b0..627f734409 100644
--- a/erts/configure.in
+++ b/erts/configure.in
@@ -580,6 +580,11 @@ AC_SUBST(WFLAGS)
 AC_SUBST(CFLAG_RUNTIME_LIBRARY_PATH)
 
 AC_CHECK_SIZEOF(void *) # Needed for ARCH and smp checks below
+if test "x$ac_cv_sizeof_void_p" = x8; then
+  AC_SUBST(EXTERNAL_WORD_SIZE, 64)
+else
+  AC_SUBST(EXTERNAL_WORD_SIZE, 32)
+fi
 
 dnl
 dnl Figure out operating system and cpu architecture
diff --git a/erts/emulator/Makefile.in b/erts/emulator/Makefile.in
index 6c33e2ca16..f04df354a8 100644
--- a/erts/emulator/Makefile.in
+++ b/erts/emulator/Makefile.in
@@ -505,8 +505,10 @@ ifdef HIPE_ENABLED
 OPCODE_TABLES += hipe/hipe_ops.tab
 endif
 
-$(TTF_DIR)/beam_opcodes.h $(TTF_DIR)/beam_opcodes.c: $(OPCODE_TABLES)
-	LANG=C $(PERL) utils/beam_makeops -outdir $(TTF_DIR) \
+$(TTF_DIR)/beam_opcodes.h $(TTF_DIR)/beam_opcodes.c: $(OPCODE_TABLES) utils/beam_makeops
+	LANG=C $(PERL) utils/beam_makeops \
+		-wordsize @EXTERNAL_WORD_SIZE@ \
+		-outdir $(TTF_DIR) \
 		-emulator $(OPCODE_TABLES)
 
 # bif and atom table
diff --git a/erts/emulator/beam/beam_debug.c b/erts/emulator/beam/beam_debug.c
index b0bf14b94f..2855241b91 100644
--- a/erts/emulator/beam/beam_debug.c
+++ b/erts/emulator/beam/beam_debug.c
@@ -48,7 +48,6 @@
 void dbg_bt(Process* p, Eterm* sp);
 void dbg_where(BeamInstr* addr, Eterm x0, Eterm* reg);
 
-static void print_big(int to, void *to_arg, Eterm* addr);
 static int print_op(int to, void *to_arg, int op, int size, BeamInstr* addr);
 Eterm
 erts_debug_same_2(Process* p, Eterm term1, Eterm term2)
@@ -157,6 +156,25 @@ void debug_dump_code(BeamInstr *I, int num)
 }
 #endif
 
+BIF_RETTYPE
+erts_debug_instructions_0(BIF_ALIST_0)
+{
+    int i = 0;
+    Uint needed = num_instructions * 2;
+    Eterm* hp;
+    Eterm res = NIL;
+
+    for (i = 0; i < num_instructions; i++) {
+	needed += 2*strlen(opc[i].name);
+    }
+    hp = HAlloc(BIF_P, needed);
+    for (i = num_instructions-1; i >= 0; i--) {
+	Eterm s = erts_bld_string_n(&hp, 0, opc[i].name, strlen(opc[i].name));
+	res = erts_bld_cons(&hp, 0, s, res);
+    }
+    return res;
+}
+
 Eterm
 erts_debug_disassemble_1(Process* p, Eterm addr)
 {
@@ -312,6 +330,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
     BeamInstr packed = 0;		/* Accumulator for packed operations. */
     BeamInstr args[8];		/* Arguments for this instruction. */
     BeamInstr* ap;			/* Pointer to arguments. */
+    BeamInstr* unpacked;		/* Unpacked arguments */
 
     start_prog = opc[op].pack;
 
@@ -360,6 +379,12 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
 		*ap++ = packed & BEAM_LOOSE_MASK;
 		packed >>= BEAM_LOOSE_SHIFT;
 		break;
+#ifdef ARCH_64
+	    case 'w':		/* Shift 32 steps */
+		*ap++ = packed & BEAM_WIDE_MASK;
+		packed >>= BEAM_WIDE_SHIFT;
+		break;
+#endif
 	    case 'p':
 		*sp++ = *--ap;
 		break;
@@ -386,7 +411,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
 	    break;
 	case 'x':		/* x(N) */
 	    if (reg_index(ap[0]) == 0) {
-		erts_print(to, to_arg, "X[0]");
+		erts_print(to, to_arg, "x[0]");
 	    } else {
 		erts_print(to, to_arg, "x(%d)", reg_index(ap[0]));
 	    }
@@ -506,6 +531,7 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
 	    ap++;
 	    break;
 	case 'P':	/* Byte offset into tuple (see beam_load.c) */
+	case 'Q':	/* Like 'P', but packable */
 	    erts_print(to, to_arg, "%d", (*ap / sizeof(Eterm)) - 1);
 	    ap++;
 	    break;
@@ -526,9 +552,12 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
      * Print more information about certain instructions.
      */
 
+    unpacked = ap;
     ap = addr + size;
     switch (op) {
-    case op_i_select_val_sfI:
+    case op_i_select_val_rfI:
+    case op_i_select_val_xfI:
+    case op_i_select_val_yfI:
 	{
 	    int n = ap[-1];
 
@@ -540,7 +569,24 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
 	    }
 	}
 	break;
-    case op_i_jump_on_val_sfII:
+    case op_i_select_tuple_arity_rfI:
+    case op_i_select_tuple_arity_xfI:
+    case op_i_select_tuple_arity_yfI:
+	{
+	    int n = ap[-1];
+
+	    while (n > 0) {
+		Uint arity = arityval(ap[0]);
+		erts_print(to, to_arg, " {%d} f(" HEXF ")", arity, ap[1]);
+		ap += 2;
+		size += 2;
+		n--;
+	    }
+	}
+	break;
+    case op_i_jump_on_val_rfII:
+    case op_i_jump_on_val_xfII:
+    case op_i_jump_on_val_yfII:
 	{
 	    int n;
 	    for (n = ap[-2]; n > 0; n--) {
@@ -550,39 +596,46 @@ print_op(int to, void *to_arg, int op, int size, BeamInstr* addr)
 	    }
 	}
 	break;
-    case op_i_select_big_sf:
-	while (ap[0]) {
-	    Eterm *bigp = (Eterm *) ap;
-	    int arity = thing_arityval(*bigp);
-	    print_big(to, to_arg, bigp);
-	    size += TermWords(arity+1);
-	    ap += TermWords(arity+1);
-	    erts_print(to, to_arg, " f(" HEXF ") ", ap[0]);
-	    ap++;
-	    size++;
+    case op_i_jump_on_val_zero_rfI:
+    case op_i_jump_on_val_zero_xfI:
+    case op_i_jump_on_val_zero_yfI:
+	{
+	    int n;
+	    for (n = ap[-1]; n > 0; n--) {
+		erts_print(to, to_arg, "f(" HEXF ") ", ap[0]);
+		ap++;
+		size++;
+	    }
+	}
+	break;
+    case op_i_put_tuple_rI:
+    case op_i_put_tuple_xI:
+    case op_i_put_tuple_yI:
+	{
+	    int n = unpacked[-1];
+
+	    while (n > 0) {
+		if (!is_header(ap[0])) {
+		    erts_print(to, to_arg, " %T", (Eterm) ap[0]);
+		} else {
+		    switch ((ap[0] >> 2) & 0x03) {
+		    case R_REG_DEF:
+			erts_print(to, to_arg, " x(0)");
+			break;
+		    case X_REG_DEF:
+			erts_print(to, to_arg, " x(%d)", ap[0] >> 4);
+			break;
+		    case Y_REG_DEF:
+			erts_print(to, to_arg, " y(%d)", ap[0] >> 4);
+			break;
+		    }
+		}
+		ap++, size++, n--;
+	    }
 	}
-	ap++;
-	size++;
 	break;
     }
     erts_print(to, to_arg, "\n");
 
     return size;
 }
-
-static void
-print_big(int to, void *to_arg, Eterm* addr)
-{
-    int i;
-    int k;
-
-    i = BIG_SIZE(addr);
-    if (BIG_SIGN(addr))
-	erts_print(to, to_arg, "-#integer(%d) = {", i);
-    else
-	erts_print(to, to_arg, "#integer(%d) = {", i);
-    erts_print(to, to_arg, "0x%x", BIG_DIGIT(addr, 0));
-    for (k = 1; k < i; k++)
-	erts_print(to, to_arg, ",0x%x", BIG_DIGIT(addr, k));
-    erts_print(to, to_arg, "}");
-}
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index 8a0e12dd4f..16741aa2d7 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -344,6 +344,8 @@ extern int count_instructions;
 #define xb(N) (*(Eterm *) (((unsigned char *)reg) + (N)))
 #define yb(N) (*(Eterm *) (((unsigned char *)E) + (N)))
 #define fb(N) (*(double *) (((unsigned char *)&(freg[0].fd)) + (N)))
+#define Qb(N) (N)
+#define Ib(N) (N)
 #define x(N) reg[N]
 #define y(N) E[N]
 #define r(N) x##N
@@ -472,6 +474,13 @@ extern int count_instructions;
     HEAP_SPACE_VERIFIED(need);                                      \
   } while (0)
 
+#define TestHeapPutList(Need, Reg)		\
+  do {						\
+     TestHeap((Need), 1);			\
+     PutList(Reg, r(0), r(0), StoreSimpleDest);	\
+     CHECK_TERM(r(0));				\
+  } while (0)
+
 #ifdef HYBRID
 #ifdef INCREMENTAL
 #define TestGlobalHeap(Nh, Live, hp)                                    \
@@ -516,6 +525,11 @@ extern int count_instructions;
      SWAPIN;							\
   } while (0)
 
+#define PutTuple(Dst, Arity)			\
+ do {						\
+   Dst = make_tuple(HTOP);			\
+   pt_arity = (Arity);				\
+ } while (0)
 
 /*
  * Check that we haven't used the reductions and jump to function pointed to by
@@ -674,6 +688,11 @@ extern int count_instructions;
     SET_I((BeamInstr *) CallDest);			\
     Dispatch();
 
+#define MoveJump(Src)				\
+     r(0) = (Src);				\
+     SET_I((BeamInstr *) Arg(0));		\
+     Goto(*I);
+
 #define GetList(Src, H, T) do {			\
    Eterm* tmp_ptr = list_val(Src);		\
    H = CAR(tmp_ptr);				\
@@ -723,16 +742,8 @@ extern int count_instructions;
      (Dest) = (* (Eterm *) EXPAND_POINTER(tmp_arg1));		\
   } while (0)
 
-#define PutTuple(Arity, Src, Dest)		\
-     ASSERT(is_arity_value(Arity));		\
-     Dest = make_tuple(HTOP);			\
-     HTOP[0] = (Arity);				\
-     HTOP[1] = (Src);				\
-     HTOP += 2
-
-#define Put(Word) *HTOP++ = (Word)
-
 #define EqualImmed(X, Y, Action) if (X != Y) { Action; }
+#define NotEqualImmed(X, Y, Action) if (X == Y) { Action; }
 
 #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; }
 
@@ -984,8 +995,41 @@ extern int count_instructions;
 #define IsPid(Src, Fail) if (is_not_pid(Src)) { Fail; }
 #define IsRef(Src, Fail) if (is_not_ref(Src)) { Fail; }
 
-static BifFunction translate_gc_bif(void* gcf);
-static BeamInstr* handle_error(Process* c_p, BeamInstr* pc, Eterm* reg, BifFunction bf);
+/*
+ * process_main() is already huge, so we want to avoid inlining
+ * into it. Especially functions that are seldom used.
+ */
+#ifdef __GNUC__
+#  define NOINLINE __attribute__((__noinline__))
+#else
+#  define NOINLINE
+#endif
+
+/*
+ * The following functions are called directly by process_main().
+ * Don't inline them.
+ */
+static BifFunction translate_gc_bif(void* gcf) NOINLINE;
+static BeamInstr* handle_error(Process* c_p, BeamInstr* pc,
+			       Eterm* reg, BifFunction bf) NOINLINE;
+static BeamInstr* call_error_handler(Process* p, BeamInstr* ip,
+				     Eterm* reg, Eterm func) NOINLINE;
+static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity) NOINLINE;
+static BeamInstr* apply(Process* p, Eterm module, Eterm function,
+			Eterm args, Eterm* reg) NOINLINE;
+static int hibernate(Process* c_p, Eterm module, Eterm function,
+		     Eterm args, Eterm* reg) NOINLINE;
+static BeamInstr* call_fun(Process* p, int arity,
+			   Eterm* reg, Eterm args) NOINLINE;
+static BeamInstr* apply_fun(Process* p, Eterm fun,
+			    Eterm args, Eterm* reg) NOINLINE;
+static Eterm new_fun(Process* p, Eterm* reg,
+		     ErlFunEntry* fe, int num_free) NOINLINE;
+
+
+/*
+ * Functions not directly called by process_main(). OK to inline.
+ */
 static BeamInstr* next_catch(Process* c_p, Eterm *reg);
 static void terminate_proc(Process* c_p, Eterm Value);
 static Eterm add_stacktrace(Process* c_p, Eterm Value, Eterm exc);
@@ -993,16 +1037,6 @@ static void save_stacktrace(Process* c_p, BeamInstr* pc, Eterm* reg,
 			     BifFunction bf, Eterm args);
 static struct StackTrace * get_trace_from_exc(Eterm exc);
 static Eterm make_arglist(Process* c_p, Eterm* reg, int a);
-static Eterm call_error_handler(Process* p, BeamInstr* ip, Eterm* reg);
-static Eterm call_breakpoint_handler(Process* p, BeamInstr* fi, Eterm* reg);
-static BeamInstr* fixed_apply(Process* p, Eterm* reg, Uint arity);
-static BeamInstr* apply(Process* p, Eterm module, Eterm function,
-		     Eterm args, Eterm* reg);
-static int hibernate(Process* c_p, Eterm module, Eterm function,
-		     Eterm args, Eterm* reg);
-static BeamInstr* call_fun(Process* p, int arity, Eterm* reg, Eterm args);
-static BeamInstr* apply_fun(Process* p, Eterm fun, Eterm args, Eterm* reg);
-static Eterm new_fun(Process* p, Eterm* reg, ErlFunEntry* fe, int num_free);
 
 #if defined(VXWORKS)
 static int init_done;
@@ -1146,6 +1180,8 @@ void process_main(void)
 
     Uint temp_bits; /* Temporary used by BsSkipBits2 & BsGetInteger2 */
 
+    Eterm pt_arity;		/* Used by do_put_tuple */
+
     ERL_BITS_DECLARE_STATEP; /* Has to be last declaration */
 
 
@@ -1246,6 +1282,52 @@ void process_main(void)
 #define STORE_ARITH_RESULT(res) StoreBifResult(2, (res));
 #define ARITH_FUNC(name) erts_gc_##name
 
+	{
+	    Eterm increment_reg_val;
+	    Eterm increment_val;
+	    Uint live;
+	    Eterm result;
+
+	OpCase(i_increment_yIId):
+	    increment_reg_val = yb(Arg(0));
+	    goto do_increment;
+
+	OpCase(i_increment_xIId):
+	    increment_reg_val = xb(Arg(0));
+	    goto do_increment;
+
+	OpCase(i_increment_rIId):
+	    increment_reg_val = r(0);
+	    I--;
+
+	do_increment:
+	    increment_val = Arg(1);
+	    if (is_small(increment_reg_val)) {
+		Sint i = signed_val(increment_reg_val) + increment_val;
+		ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i));
+		if (MY_IS_SSMALL(i)) {
+		    result = make_small(i);
+		store_result:
+		    StoreBifResult(3, result);
+		}
+	    }
+
+	    live = Arg(2);
+	    SWAPOUT;
+	    reg[0] = r(0);
+	    reg[live] = increment_reg_val;
+	    reg[live+1] = make_small(increment_val);
+	    result = erts_gc_mixed_plus(c_p, reg, live);
+	    r(0) = reg[0];
+	    SWAPIN;
+	    ERTS_HOLE_CHECK(c_p);
+	    if (is_value(result)) {
+		goto store_result;
+	    }
+	    ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue));
+	    goto find_func_info;
+	}
+	    
  OpCase(i_plus_jId):
  {
      Eterm result;
@@ -1309,6 +1391,52 @@ void process_main(void)
     }
     Next(1);
 
+    {
+	Eterm is_eq_exact_lit_val;
+
+    OpCase(i_is_eq_exact_literal_xfc):
+	is_eq_exact_lit_val = xb(Arg(0));
+	I++;
+	goto do_is_eq_exact_literal;
+
+    OpCase(i_is_eq_exact_literal_yfc):
+	is_eq_exact_lit_val = yb(Arg(0));
+	I++;
+	goto do_is_eq_exact_literal;
+
+    OpCase(i_is_eq_exact_literal_rfc):
+	is_eq_exact_lit_val = r(0);
+
+    do_is_eq_exact_literal:
+	if (!eq(Arg(1), is_eq_exact_lit_val)) {
+	    ClauseFail();
+	}
+	Next(2);
+    }
+
+    {
+	Eterm is_ne_exact_lit_val;
+
+    OpCase(i_is_ne_exact_literal_xfc):
+	is_ne_exact_lit_val = xb(Arg(0));
+	I++;
+	goto do_is_ne_exact_literal;
+
+    OpCase(i_is_ne_exact_literal_yfc):
+	is_ne_exact_lit_val = yb(Arg(0));
+	I++;
+	goto do_is_ne_exact_literal;
+
+    OpCase(i_is_ne_exact_literal_rfc):
+	is_ne_exact_lit_val = r(0);
+
+    do_is_ne_exact_literal:
+	if (eq(Arg(1), is_ne_exact_lit_val)) {
+	    ClauseFail();
+	}
+	Next(2);
+    }
+
  OpCase(i_move_call_only_fcr): {
      r(0) = Arg(1);
  }
@@ -1392,6 +1520,17 @@ void process_main(void)
      NextPF(1, next);
  }
 
+ OpCase(move_x1_c): {
+	x(1) = Arg(0);
+	Next(1);
+    }
+
+ OpCase(move_x2_c): {
+	x(2) = Arg(0);
+	Next(1);
+    }
+
+
  OpCase(return): {
     SET_I(c_p->cp);
     /*
@@ -1405,16 +1544,6 @@ void process_main(void)
     Goto(*I);
  }
 
- OpCase(test_heap_1_put_list_Iy): {
-     BeamInstr *next;
-
-     PreFetch(2, next);
-     TestHeap(Arg(0), 1);
-     PutList(yb(Arg(1)), r(0), r(0), StoreSimpleDest);
-     CHECK_TERM(r(0));
-     NextPF(2, next);
- }
-
     /*
      * Send is almost a standard call-BIF with two arguments, except for:
      *    1) It cannot be traced.
@@ -1447,24 +1576,36 @@ void process_main(void)
      goto find_func_info;
  }
 
- OpCase(i_element_jssd): {
-     Eterm index;
-     Eterm tuple;
-
-     /*
-      * Inlined version of element/2 for speed.
-      */
-     GetArg2(1, index, tuple);
-     if (is_small(index) && is_tuple(tuple)) {
-	 Eterm* tp = tuple_val(tuple);
-
-	 if ((signed_val(index) >= 1) &&
-	     (signed_val(index) <= arityval(*tp))) {
-	     Eterm result = tp[signed_val(index)];
-	     StoreBifResult(3, result);
-	 }
-     }
- }
+    {
+	Eterm element_index;
+	Eterm element_tuple;
+
+    OpCase(i_element_xjsd):
+	element_tuple = xb(Arg(0));
+	I++;
+	goto do_element;
+
+    OpCase(i_element_yjsd):
+	element_tuple = yb(Arg(0));
+	I++;
+	goto do_element;
+
+    OpCase(i_element_rjsd):
+	element_tuple = r(0);
+	/* Fall through */
+
+    do_element:
+	GetArg1(1, element_index);
+	if (is_small(element_index) && is_tuple(element_tuple)) {
+	    Eterm* tp = tuple_val(element_tuple);
+
+	    if ((signed_val(element_index) >= 1) &&
+		(signed_val(element_index) <= arityval(*tp))) {
+		Eterm result = tp[signed_val(element_index)];
+		StoreBifResult(2, result);
+	    }
+	}
+    }
  /* Fall through */
 
  OpCase(badarg_j):
@@ -1472,24 +1613,32 @@ void process_main(void)
     c_p->freason = BADARG;
     goto lb_Cl_error;
 
- OpCase(i_fast_element_jIsd): {
-     Eterm tuple;
-
-     /*
-      * Inlined version of element/2 for even more speed.
-      * The first argument is an untagged integer >= 1.
-      * The second argument is guaranteed to be a register operand.
-      */
-     GetArg1(2, tuple);
-     if (is_tuple(tuple)) {
-	 Eterm* tp = tuple_val(tuple);
-	 tmp_arg2 = Arg(1);
-	 if (tmp_arg2 <= arityval(*tp)) {
-	     Eterm result = tp[tmp_arg2];
-	     StoreBifResult(3, result);
-	 }
-     }
+    {
+	Eterm fast_element_tuple;
+
+    OpCase(i_fast_element_rjId):
+	fast_element_tuple = r(0);
+
+    do_fast_element:
+	if (is_tuple(fast_element_tuple)) {
+	    Eterm* tp = tuple_val(fast_element_tuple);
+	    Eterm pos = Arg(1);	/* Untagged integer >= 1 */
+	    if (pos <= arityval(*tp)) {
+		Eterm result = tp[pos];
+		StoreBifResult(2, result);
+	    }
+	}
      goto badarg;
+
+    OpCase(i_fast_element_xjId):
+     fast_element_tuple = xb(Arg(0));
+     I++;
+     goto do_fast_element;
+
+    OpCase(i_fast_element_yjId):
+     fast_element_tuple = yb(Arg(0));
+     I++;
+     goto do_fast_element;
  }
 
  OpCase(catch_yf):
@@ -1842,8 +1991,87 @@ void process_main(void)
      NextPF(0, next);
  }
 
- OpCase(i_select_val_sfI):
-     GetArg1(0, tmp_arg1);
+
+ {
+     Eterm select_val2;
+
+ OpCase(i_select_tuple_arity2_yfAfAf):
+     select_val2 = yb(Arg(0));
+     goto do_select_tuple_arity2;
+
+ OpCase(i_select_tuple_arity2_xfAfAf):
+     select_val2 = xb(Arg(0));
+     goto do_select_tuple_arity2;
+
+ OpCase(i_select_tuple_arity2_rfAfAf):
+     select_val2 = r(0);
+     I--;
+
+ do_select_tuple_arity2:
+     if (is_not_tuple(select_val2)) {
+	 goto select_val2_fail;
+     }
+     select_val2 = *tuple_val(select_val2);
+     goto do_select_val2;
+
+ OpCase(i_select_val2_yfcfcf):
+     select_val2 = yb(Arg(0));
+     goto do_select_val2;
+
+ OpCase(i_select_val2_xfcfcf):
+     select_val2 = xb(Arg(0));
+     goto do_select_val2;
+
+ OpCase(i_select_val2_rfcfcf):
+     select_val2 = r(0);
+     I--;
+
+ do_select_val2:
+     if (select_val2 == Arg(2)) {
+	 I += 2;
+     } else if (select_val2 == Arg(4)) {
+	 I += 4;
+     }
+
+ select_val2_fail:
+     SET_I((BeamInstr *) Arg(1));
+     Goto(*I);
+ }
+
+ {
+     Eterm select_val;
+
+ OpCase(i_select_tuple_arity_xfI):
+     select_val = xb(Arg(0));
+     goto do_select_tuple_arity;
+
+ OpCase(i_select_tuple_arity_yfI):
+     select_val = yb(Arg(0));
+     goto do_select_tuple_arity;
+
+ OpCase(i_select_tuple_arity_rfI):
+     select_val = r(0);
+     I--;
+
+ do_select_tuple_arity:
+     if (is_tuple(select_val)) {
+	 select_val = *tuple_val(select_val);
+	 goto do_binary_search;
+     }
+     SET_I((BeamInstr *) Arg(1));
+     Goto(*I);
+
+ OpCase(i_select_val_xfI):
+     select_val = xb(Arg(0));
+     goto do_binary_search;
+
+ OpCase(i_select_val_yfI):
+     select_val = yb(Arg(0));
+     goto do_binary_search;
+     
+ OpCase(i_select_val_rfI):
+     select_val = r(0);
+     I--;
 
  do_binary_search:
  {
@@ -1880,9 +2108,9 @@ void process_main(void)
 	 unsigned int boffset = ((unsigned int)bdiff >> 1) & ~(sizeof(struct Pairs)-1);
 
 	 mid = (struct Pairs*)((char*)low + boffset);
-	 if (tmp_arg1 < mid->val) {
+	 if (select_val < mid->val) {
 	     high = mid;
-	 } else if (tmp_arg1 > mid->val) {
+	 } else if (select_val > mid->val) {
 	     low = mid + 1;
 	 } else {
 	     SET_I(mid->addr);
@@ -1892,16 +2120,28 @@ void process_main(void)
      SET_I((BeamInstr *) Arg(1));
      Goto(*I);
  }
+ }
 
- OpCase(i_jump_on_val_zero_sfI):
  {
-     Eterm index;
-
-     GetArg1(0, index);
-     if (is_small(index)) {
-	 index = signed_val(index);
-	 if (index < Arg(2)) {
-	     SET_I((BeamInstr *) (&Arg(3))[index]);
+     Eterm jump_on_val_zero_index;
+     
+ OpCase(i_jump_on_val_zero_yfI):
+     jump_on_val_zero_index = yb(Arg(0));
+     goto do_jump_on_val_zero_index;
+
+ OpCase(i_jump_on_val_zero_xfI):
+     jump_on_val_zero_index = xb(Arg(0));
+     goto do_jump_on_val_zero_index;
+
+ OpCase(i_jump_on_val_zero_rfI):
+     jump_on_val_zero_index = r(0);
+     I--;
+
+ do_jump_on_val_zero_index:
+     if (is_small(jump_on_val_zero_index)) {
+	 jump_on_val_zero_index = signed_val(jump_on_val_zero_index);
+	 if (jump_on_val_zero_index < Arg(2)) {
+	     SET_I((BeamInstr *) (&Arg(3))[jump_on_val_zero_index]);
 	     Goto(*I);
 	 }
      }
@@ -1909,15 +2149,27 @@ void process_main(void)
      Goto(*I);
  }
 
- OpCase(i_jump_on_val_sfII):
  {
-     Eterm index;
+     Eterm jump_on_val_index;
 
-     GetArg1(0, index);
-     if (is_small(index)) {
-	 index = (Uint) (signed_val(index) - Arg(3));
-	 if (index < Arg(2)) {
-	     SET_I((BeamInstr *) (&Arg(4))[index]);
+ 
+ OpCase(i_jump_on_val_yfII):
+     jump_on_val_index = yb(Arg(0));
+     goto do_jump_on_val_index;
+
+ OpCase(i_jump_on_val_xfII):
+     jump_on_val_index = xb(Arg(0));
+     goto do_jump_on_val_index;
+
+ OpCase(i_jump_on_val_rfII):
+     jump_on_val_index = r(0);
+     I--;
+
+ do_jump_on_val_index:
+     if (is_small(jump_on_val_index)) {
+	 jump_on_val_index = (Uint) (signed_val(jump_on_val_index) - Arg(3));
+	 if (jump_on_val_index < Arg(2)) {
+	     SET_I((BeamInstr *) (&Arg(4))[jump_on_val_index]);
 	     Goto(*I);
 	 }
      }
@@ -1925,6 +2177,32 @@ void process_main(void)
      Goto(*I);
  }
 
+ do_put_tuple: {
+     Eterm* hp = HTOP;
+
+     *hp++ = make_arityval(pt_arity);
+
+     do {
+	 Eterm term = *I++;
+	 switch (term & _TAG_IMMED1_MASK) {
+	 case (R_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER:
+	     *hp++ = r(0);
+	     break;
+	 case (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER:
+	     *hp++ = x(term >> _TAG_IMMED1_SIZE);
+	     break;
+	 case (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER:
+	     *hp++ = y(term >> _TAG_IMMED1_SIZE);
+	     break;
+	 default:
+	     *hp++ = term;
+	     break;
+	 }
+     } while (--pt_arity != 0);
+     HTOP = hp;
+     Goto(*I);
+ }
+
     /*
      * All guards with zero arguments have special instructions:
      * 	self/0
@@ -2562,23 +2840,25 @@ void process_main(void)
 
  OpCase(i_int_bnot_jsId):
  {
-     GetArg1(1, tmp_arg1);
-     if (is_small(tmp_arg1)) {
-	 tmp_arg1 = make_small(~signed_val(tmp_arg1));
+     Eterm bnot_val;
+
+     GetArg1(1, bnot_val);
+     if (is_small(bnot_val)) {
+	 bnot_val = make_small(~signed_val(bnot_val));
      } else {
 	 Uint live = Arg(2);
 	 SWAPOUT;
 	 reg[0] = r(0);
-	 reg[live] = tmp_arg1;
-	 tmp_arg1 = erts_gc_bnot(c_p, reg, live);
+	 reg[live] = bnot_val;
+	 bnot_val = erts_gc_bnot(c_p, reg, live);
 	 r(0) = reg[0];
 	 SWAPIN;
 	 ERTS_HOLE_CHECK(c_p);
-	 if (is_nil(tmp_arg1)) {
+	 if (is_nil(bnot_val)) {
 	     goto lb_Cl_error;
 	 }
      }
-     StoreBifResult(3, tmp_arg1);
+     StoreBifResult(3, bnot_val);
  }
 
  badarith:
@@ -2833,121 +3113,6 @@ void process_main(void)
      goto do_schedule1;
  }
 
- OpCase(i_select_tuple_arity_sfI):
- {
-     GetArg1(0, tmp_arg1);
-
-     if (is_tuple(tmp_arg1)) {
-	 tmp_arg1 = *tuple_val(tmp_arg1);
-	 goto do_binary_search;
-     }
-     SET_I((BeamInstr *) Arg(1));
-     Goto(*I);
- }     
-
- OpCase(i_select_big_sf):
-    {
-	Eterm* bigp;
-	Uint arity;
-	Eterm* given;
-	Uint given_arity;
-	Uint given_size;
-
-	GetArg1(0, tmp_arg1);
-	if (is_big(tmp_arg1)) {
-
-	    /*
-	     * The loader has sorted the bignumbers in descending order
-	     * on the arity word.  Therefore, we know that the search
-	     * has failed as soon as we encounter an arity word less than
-	     * the arity word of the given number.  There is a zero word
-	     * (less than any valid arity word) stored after the last bignumber.
-	     */
-
- 	    given = big_val(tmp_arg1);
-	    given_arity = given[0];
-	    given_size = thing_arityval(given_arity);
-	    bigp = (Eterm *) &Arg(2);
-	    while ((arity = bigp[0]) > given_arity) {
-		bigp += (TermWords(thing_arityval(arity) + 1) + 1) * (sizeof(BeamInstr)/sizeof(Eterm));
-	    }
-	    while (bigp[0] == given_arity) {
-		if (memcmp(bigp+1, given+1, sizeof(Eterm)*given_size) == 0) {
-		    BeamInstr *tmp =
-			((BeamInstr *) (UWord) bigp) + TermWords(given_size + 1);
-		    SET_I((BeamInstr *) *tmp);
-		    Goto(*I);
-		}
-		bigp += (TermWords(thing_arityval(arity) + 1) + 1) * (sizeof(BeamInstr)/sizeof(Eterm));
-	    }
-	}
-
-	/*
-	 * Failed.
-	 */
-
-	SET_I((BeamInstr *) Arg(1));
-	Goto(*I);
-    }
-
-#if defined(ARCH_64) && !HALFWORD_HEAP
- OpCase(i_select_float_sfI):
- {
-     Uint f;
-     int n;
-     struct ValLabel {
-	 Uint f;
-	 BeamInstr* addr;
-     };
-     struct ValLabel* ptr;
-
-     GetArg1(0, tmp_arg1);
-     ASSERT(is_float(tmp_arg1));
-     f = float_val(tmp_arg1)[1];
-     n = Arg(2);
-     ptr = (struct ValLabel *) &Arg(3);
-     while (n-- > 0) {
-	 if (ptr->f == f) {
-	     SET_I(ptr->addr);
-	     Goto(*I);
-	 }
-	 ptr++;
-     }
-     SET_I((Eterm *) Arg(1));
-     Goto(*I);
- }
-#else
- OpCase(i_select_float_sfI):
- {
-     Uint fpart1;
-     Uint fpart2;
-     int n;
-     struct ValLabel {
-	 Uint fpart1;
-	 Uint fpart2;
-	 BeamInstr* addr;
-     };
-     struct ValLabel* ptr;
-
-     GetArg1(0, tmp_arg1);
-     ASSERT(is_float(tmp_arg1));
-     fpart1 = float_val(tmp_arg1)[1];
-     fpart2 = float_val(tmp_arg1)[2];
-
-     n = Arg(2);
-     ptr = (struct ValLabel *) &Arg(3);
-     while (n-- > 0) {
-	 if (ptr->fpart1 == fpart1 && ptr->fpart2 == fpart2) {
-	     SET_I(ptr->addr);
-	     Goto(*I);
-	 }
-	 ptr++;
-     }
-     SET_I((BeamInstr *) Arg(1));
-     Goto(*I);
- }
-#endif
-
  OpCase(set_tuple_element_sdP): {
      Eterm element;
      Eterm tuple;
@@ -2993,15 +3158,17 @@ void process_main(void)
 	the first argument. We also handle atom tags in the first
 	argument for backwards compatibility.
      */
-     GetArg2(0, tmp_arg1, tmp_arg2);
-     c_p->fvalue = tmp_arg2;
+     Eterm raise_val1;
+     Eterm raise_val2;
+     GetArg2(0, raise_val1, raise_val2);
+     c_p->fvalue = raise_val2;
      if (c_p->freason == EXC_NULL) {
        /* a safety check for the R10-0 case; should not happen */
        c_p->ftrace = NIL;
        c_p->freason = EXC_ERROR;
      }
      /* for R10-0 code, keep existing c_p->ftrace and hope it's correct */
-     switch (tmp_arg1) {
+     switch (raise_val1) {
      case am_throw:
        c_p->freason = EXC_THROWN & ~EXF_SAVETRACE;
        break;
@@ -3017,8 +3184,8 @@ void process_main(void)
 	   passed from a user! Currently only expecting generated calls.
 	*/
 	 struct StackTrace *s;
-	 c_p->ftrace = tmp_arg1;
-	 s = get_trace_from_exc(tmp_arg1);
+	 c_p->ftrace = raise_val1;
+	 s = get_trace_from_exc(raise_val1);
 	 if (s == NULL) {
 	   c_p->freason = EXC_ERROR;
 	 } else {
@@ -3029,11 +3196,24 @@ void process_main(void)
      goto find_func_info;
  }
 
- OpCase(badmatch_s): {
-     GetArg1(0, tmp_arg1);
-     c_p->fvalue = tmp_arg1;
-     c_p->freason = BADMATCH;
- }
+    {
+	Eterm badmatch_val;
+
+    OpCase(badmatch_y):
+	badmatch_val = yb(Arg(0));
+	goto do_badmatch;
+
+    OpCase(badmatch_x):
+	badmatch_val = xb(Arg(0));
+	goto do_badmatch;
+
+    OpCase(badmatch_r):
+	badmatch_val = r(0);
+
+    do_badmatch:
+	c_p->fvalue = badmatch_val;
+	c_p->freason = BADMATCH;
+    }
  /* Fall through here */
 
  find_func_info: {
@@ -3056,12 +3236,11 @@ void process_main(void)
      */
     SWAPOUT;
     reg[0] = r(0);
-    tmp_arg1 = call_error_handler(c_p, I-3, reg);
+    I = call_error_handler(c_p, I-3, reg, am_undefined_function);
     r(0) = reg[0];
     SWAPIN;
-    if (tmp_arg1) {
-	SET_I(c_p->i);
-	Dispatch();
+    if (I) {
+	Goto(*I);
     }
 
  /* Fall through */
@@ -3084,128 +3263,142 @@ void process_main(void)
      }
  }
 
- OpCase(call_nif):
-     {
-          /*
-	   * call_nif is always first instruction in function:
-	   *
-	   * I[-3]: Module
-	   * I[-2]: Function
-	   * I[-1]: Arity
-	   * I[0]: &&call_nif
-	   * I[1]: Function pointer to NIF function
-	   * I[2]: Pointer to erl_module_nif
-	   */
-     	 BifFunction vbf;
-
-	 c_p->current = I-3; /* current and vbf set to please handle_error */ 
-	 SWAPOUT;
-	 c_p->fcalls = FCALLS - 1;
-	 PROCESS_MAIN_CHK_LOCKS(c_p);
-	 tmp_arg2 = I[-1];
-	 ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p);
+    {
+	Eterm nif_bif_result;
+	Eterm bif_nif_arity;
 
-	 ASSERT(!ERTS_PROC_IS_EXITING(c_p));
-	 {
-	     typedef Eterm NifF(struct enif_environment_t*, int argc, Eterm argv[]);
-	     NifF* fp = vbf = (NifF*) I[1];
-	     struct enif_environment_t env;
-	     erts_pre_nif(&env, c_p, (struct erl_module_nif*)I[2]);
-	     reg[0] = r(0);
-	     tmp_arg1 = (*fp)(&env, tmp_arg2, reg);
-	     erts_post_nif(&env);
-	 }
-	 ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1));
-	 PROCESS_MAIN_CHK_LOCKS(c_p);
-	 goto apply_bif_or_nif_epilogue;
-	 
- OpCase(apply_bif):
-	/*
-	 * At this point, I points to the code[3] in the export entry for
-	 * the BIF:
-	 *
-	 * code[0]: Module
-	 * code[1]: Function
-	 * code[2]: Arity
-	 * code[3]: &&apply_bif
-	 * code[4]: Function pointer to BIF function
-	 */
+    OpCase(call_nif):
+	{
+	    /*
+	     * call_nif is always first instruction in function:
+	     *
+	     * I[-3]: Module
+	     * I[-2]: Function
+	     * I[-1]: Arity
+	     * I[0]: &&call_nif
+	     * I[1]: Function pointer to NIF function
+	     * I[2]: Pointer to erl_module_nif
+	     */
+	    BifFunction vbf;
 
-	c_p->current = I-3;	/* In case we apply process_info/1,2 or load_nif/1 */
-	c_p->i = I;		/* In case we apply check_process_code/2. */
-	c_p->arity = 0;		/* To allow garbage collection on ourselves
-				 * (check_process_code/2).
-				 */
-	SWAPOUT;
-	c_p->fcalls = FCALLS - 1;
-	vbf = (BifFunction) Arg(0);
-	PROCESS_MAIN_CHK_LOCKS(c_p);
-	tmp_arg2 = I[-1];
-	ASSERT(tmp_arg2 <= 3);
-	ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p);
-	switch (tmp_arg2) {
-	case 3:
+	    c_p->current = I-3; /* current and vbf set to please handle_error */ 
+	    SWAPOUT;
+	    c_p->fcalls = FCALLS - 1;
+	    PROCESS_MAIN_CHK_LOCKS(c_p);
+	    bif_nif_arity = I[-1];
+	    ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p);
+
+	    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
 	    {
-		Eterm (*bf)(Process*, Eterm, Eterm, Eterm, BeamInstr*) = vbf;
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p));
-		tmp_arg1 = (*bf)(c_p, r(0), x(1), x(2), I);
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1));
-		PROCESS_MAIN_CHK_LOCKS(c_p);
+		typedef Eterm NifF(struct enif_environment_t*, int argc, Eterm argv[]);
+		NifF* fp = vbf = (NifF*) I[1];
+		struct enif_environment_t env;
+		erts_pre_nif(&env, c_p, (struct erl_module_nif*)I[2]);
+		reg[0] = r(0);
+		nif_bif_result = (*fp)(&env, bif_nif_arity, reg);
+		erts_post_nif(&env);
 	    }
-	    break;
-	case 2:
-	    {
-		Eterm (*bf)(Process*, Eterm, Eterm, BeamInstr*) = vbf;
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p));
-		tmp_arg1 = (*bf)(c_p, r(0), x(1), I);
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1));
-		PROCESS_MAIN_CHK_LOCKS(c_p);
+	    ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(nif_bif_result));
+	    PROCESS_MAIN_CHK_LOCKS(c_p);
+	    goto apply_bif_or_nif_epilogue;
+	 
+	OpCase(apply_bif):
+	    /*
+	     * At this point, I points to the code[3] in the export entry for
+	     * the BIF:
+	     *
+	     * code[0]: Module
+	     * code[1]: Function
+	     * code[2]: Arity
+	     * code[3]: &&apply_bif
+	     * code[4]: Function pointer to BIF function
+	     */
+
+	    c_p->current = I-3;	/* In case we apply process_info/1,2 or load_nif/1 */
+	    c_p->i = I;		/* In case we apply check_process_code/2. */
+	    c_p->arity = 0;		/* To allow garbage collection on ourselves
+					 * (check_process_code/2).
+					 */
+	    SWAPOUT;
+	    c_p->fcalls = FCALLS - 1;
+	    vbf = (BifFunction) Arg(0);
+	    PROCESS_MAIN_CHK_LOCKS(c_p);
+	    bif_nif_arity = I[-1];
+	    ASSERT(bif_nif_arity <= 3);
+	    ERTS_SMP_UNREQ_PROC_MAIN_LOCK(c_p);
+	    switch (bif_nif_arity) {
+	    case 3:
+		{
+		    Eterm (*bf)(Process*, Eterm, Eterm, Eterm, BeamInstr*) = vbf;
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
+		    nif_bif_result = (*bf)(c_p, r(0), x(1), x(2), I);
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p) ||
+			   is_non_value(nif_bif_result));
+		    PROCESS_MAIN_CHK_LOCKS(c_p);
+		}
+		break;
+	    case 2:
+		{
+		    Eterm (*bf)(Process*, Eterm, Eterm, BeamInstr*) = vbf;
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
+		    nif_bif_result = (*bf)(c_p, r(0), x(1), I);
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p) ||
+			   is_non_value(nif_bif_result));
+		    PROCESS_MAIN_CHK_LOCKS(c_p);
+		}
+		break;
+	    case 1:
+		{
+		    Eterm (*bf)(Process*, Eterm, BeamInstr*) = vbf;
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
+		    nif_bif_result = (*bf)(c_p, r(0), I);
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p) ||
+			   is_non_value(nif_bif_result));
+		    PROCESS_MAIN_CHK_LOCKS(c_p);
+		}
+		break;
+	    case 0:
+		{
+		    Eterm (*bf)(Process*, BeamInstr*) = vbf;
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p));
+		    nif_bif_result = (*bf)(c_p, I);
+		    ASSERT(!ERTS_PROC_IS_EXITING(c_p) ||
+			   is_non_value(nif_bif_result));
+		    PROCESS_MAIN_CHK_LOCKS(c_p);
+		    break;
+		}
+	    default:
+		erl_exit(1, "apply_bif: invalid arity: %u\n",
+			 bif_nif_arity);
 	    }
-	    break;
-	case 1:
-	    {
-		Eterm (*bf)(Process*, Eterm, BeamInstr*) = vbf;
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p));
-		tmp_arg1 = (*bf)(c_p, r(0), I);
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1));
-		PROCESS_MAIN_CHK_LOCKS(c_p);
+
+	apply_bif_or_nif_epilogue:
+	    ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p);
+	    ERTS_HOLE_CHECK(c_p);
+	    if (c_p->mbuf) {
+		reg[0] = r(0);
+		nif_bif_result = erts_gc_after_bif_call(c_p, nif_bif_result,
+						  reg, bif_nif_arity);
+		r(0) = reg[0];
 	    }
-	    break;
-	case 0:
-	    {
-		Eterm (*bf)(Process*, BeamInstr*) = vbf;
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p));
-		tmp_arg1 = (*bf)(c_p, I);
-		ASSERT(!ERTS_PROC_IS_EXITING(c_p) || is_non_value(tmp_arg1));
-		PROCESS_MAIN_CHK_LOCKS(c_p);
-		break;
+	    SWAPIN;  /* There might have been a garbage collection. */
+	    FCALLS = c_p->fcalls;
+	    if (is_value(nif_bif_result)) {
+		r(0) = nif_bif_result;
+		CHECK_TERM(r(0));
+		SET_I(c_p->cp);
+		Goto(*I);
+	    } else if (c_p->freason == TRAP) {
+		SET_I(*((BeamInstr **) (UWord) ((c_p)->def_arg_reg + 3)));
+		r(0) = c_p->def_arg_reg[0];
+		x(1) = c_p->def_arg_reg[1];
+		x(2) = c_p->def_arg_reg[2];
+		Dispatch();
 	    }
-	}
-apply_bif_or_nif_epilogue:
-	ERTS_SMP_REQ_PROC_MAIN_LOCK(c_p);
-	ERTS_HOLE_CHECK(c_p);
-	if (c_p->mbuf) {
 	    reg[0] = r(0);
-	    tmp_arg1 = erts_gc_after_bif_call(c_p, tmp_arg1, reg, tmp_arg2);
-	    r(0) = reg[0];
-	}
-	SWAPIN;			/* There might have been a garbage collection. */
-	FCALLS = c_p->fcalls;
-	if (is_value(tmp_arg1)) {
-	    r(0) = tmp_arg1;
-	    CHECK_TERM(r(0));
-	    SET_I(c_p->cp);
-	    Goto(*I);
-	} else if (c_p->freason == TRAP) {
-	    SET_I(*((BeamInstr **) (UWord) ((c_p)->def_arg_reg + 3)));
-	    r(0) = c_p->def_arg_reg[0];
-	    x(1) = c_p->def_arg_reg[1];
-	    x(2) = c_p->def_arg_reg[2];
-	    Dispatch();
+	    I = handle_error(c_p, c_p->cp, reg, vbf);
+	    goto post_error_handling;
 	}
-	reg[0] = r(0);
-	I = handle_error(c_p, c_p->cp, reg, vbf);
-	goto post_error_handling;
     }
 
  OpCase(i_get_sd):
@@ -3218,11 +3411,26 @@ apply_bif_or_nif_epilogue:
 	StoreBifResult(1, result);
     }
 
- OpCase(case_end_s):
-    GetArg1(0, tmp_arg1);
-    c_p->fvalue = tmp_arg1;
-    c_p->freason = EXC_CASE_CLAUSE;
-    goto find_func_info;
+    {
+	Eterm case_end_val;
+
+    OpCase(case_end_x):
+	case_end_val = xb(Arg(0));
+	goto do_case_end;
+
+    OpCase(case_end_y):
+	case_end_val = yb(Arg(0));
+	goto do_case_end;
+
+    OpCase(case_end_r):
+	case_end_val = r(0);
+	I--;
+
+    do_case_end:
+	c_p->fvalue = case_end_val;
+	c_p->freason = EXC_CASE_CLAUSE;
+	goto find_func_info;
+    }
 
  OpCase(if_end):
     c_p->freason = EXC_IF_CLAUSE;
@@ -3235,10 +3443,13 @@ apply_bif_or_nif_epilogue:
  }
 
  OpCase(try_case_end_s):
-    GetArg1(0, tmp_arg1);
-    c_p->fvalue = tmp_arg1;
-    c_p->freason = EXC_TRY_CLAUSE;
-    goto find_func_info;
+    {
+	Eterm try_case_end_val;
+	GetArg1(0, try_case_end_val);
+	c_p->fvalue = try_case_end_val;
+	c_p->freason = EXC_TRY_CLAUSE;
+	goto find_func_info;
+    }
 
  /*
   * Construction of binaries using new instructions.
@@ -3786,19 +3997,20 @@ apply_bif_or_nif_epilogue:
      Eterm header;
      BeamInstr *next;
      Uint slots;
+     Eterm context;
 
      OpCase(i_bs_start_match2_rfIId): {
-	 tmp_arg1 = r(0);
+	 context = r(0);
 
      do_start_match:
 	 slots = Arg(2);
-	 if (!is_boxed(tmp_arg1)) {
+	 if (!is_boxed(context)) {
 	     ClauseFail();
 	 }
 	 PreFetch(4, next);
-	 header = *boxed_val(tmp_arg1);
+	 header = *boxed_val(context);
 	 if (header_is_bin_matchstate(header)) {
-	     ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(tmp_arg1);
+	     ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(context);
 	     Uint actual_slots = HEADER_NUM_SLOTS(header);
 	     ms->save_offset[0] = ms->mb.offset;
 	     if (actual_slots < slots) {
@@ -3806,8 +4018,8 @@ apply_bif_or_nif_epilogue:
 		 Uint live = Arg(1);
 		 Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
 
-		 TestHeapPreserve(wordsneeded, live, tmp_arg1);
-		 ms = (ErlBinMatchState *) boxed_val(tmp_arg1);
+		 TestHeapPreserve(wordsneeded, live, context);
+		 ms = (ErlBinMatchState *) boxed_val(context);
 		 dst = (ErlBinMatchState *) HTOP;
 		 *dst = *ms;
 		 *HTOP = HEADER_BIN_MATCHSTATE(slots);
@@ -3819,12 +4031,12 @@ apply_bif_or_nif_epilogue:
 	     Eterm result;
 	     Uint live = Arg(1);
 	     Uint wordsneeded = ERL_BIN_MATCHSTATE_SIZE(slots);
-	     TestHeapPreserve(wordsneeded, live, tmp_arg1);
+	     TestHeapPreserve(wordsneeded, live, context);
 	     HEAP_TOP(c_p) = HTOP;
 #ifdef DEBUG
 	     c_p->stop = E;	/* Needed for checking in HeapOnlyAlloc(). */
 #endif
-	     result = erts_bs_start_match_2(c_p, tmp_arg1, slots);
+	     result = erts_bs_start_match_2(c_p, context, slots);
 	     HTOP = HEAP_TOP(c_p);
 	     HEAP_SPACE_VERIFIED(0);
 	     if (is_non_value(result)) {
@@ -3838,12 +4050,12 @@ apply_bif_or_nif_epilogue:
 	 NextPF(4, next);
      }
      OpCase(i_bs_start_match2_xfIId): {
-	 tmp_arg1 = xb(Arg(0));
+	 context = xb(Arg(0));
 	 I++;
 	 goto do_start_match;
      }
      OpCase(i_bs_start_match2_yfIId): {
-	 tmp_arg1 = yb(Arg(0));
+	 context = yb(Arg(0));
 	 I++;
 	 goto do_start_match;
      }
@@ -3936,93 +4148,105 @@ apply_bif_or_nif_epilogue:
      NextPF(2, next);
  }
 
+ {
+     Eterm bs_get_integer8_context;
+
  OpCase(i_bs_get_integer_8_rfd): {
-     tmp_arg1 = r(0);
-     goto do_bs_get_integer_8;
- }
+	 bs_get_integer8_context = r(0);
+	 goto do_bs_get_integer_8;
+     }
 
  OpCase(i_bs_get_integer_8_xfd): {
-     tmp_arg1 = xb(Arg(0));
-     I++;
- }
+	 bs_get_integer8_context = xb(Arg(0));
+	 I++;
+     }
 
  do_bs_get_integer_8: {
-     ErlBinMatchBuffer *_mb;
-     Eterm _result;
-     _mb = ms_matchbuffer(tmp_arg1);
-     if (_mb->size - _mb->offset < 8) {
-	 ClauseFail();
-     }
-     if (BIT_OFFSET(_mb->offset) != 0) {
-	 _result = erts_bs_get_integer_2(c_p, 8, 0, _mb);
-     } else {
-	 _result = make_small(_mb->base[BYTE_OFFSET(_mb->offset)]);
-	 _mb->offset += 8;
+	 ErlBinMatchBuffer *_mb;
+	 Eterm _result;
+	 _mb = ms_matchbuffer(bs_get_integer8_context);
+	 if (_mb->size - _mb->offset < 8) {
+	     ClauseFail();
+	 }
+	 if (BIT_OFFSET(_mb->offset) != 0) {
+	     _result = erts_bs_get_integer_2(c_p, 8, 0, _mb);
+	 } else {
+	     _result = make_small(_mb->base[BYTE_OFFSET(_mb->offset)]);
+	     _mb->offset += 8;
+	 }
+	 StoreBifResult(1, _result);
      }
-     StoreBifResult(1, _result);
  }
 
- OpCase(i_bs_get_integer_16_rfd): {
-     tmp_arg1 = r(0);
+ {
+     Eterm bs_get_integer_16_context;
+
+ OpCase(i_bs_get_integer_16_rfd):
+     bs_get_integer_16_context = r(0);
      goto do_bs_get_integer_16;
- }
 
- OpCase(i_bs_get_integer_16_xfd): {
-     tmp_arg1 = xb(Arg(0));
+ OpCase(i_bs_get_integer_16_xfd):
+     bs_get_integer_16_context = xb(Arg(0));
      I++;
- }
 
- do_bs_get_integer_16: {
-     ErlBinMatchBuffer *_mb;
-     Eterm _result;
-     _mb = ms_matchbuffer(tmp_arg1);
-     if (_mb->size - _mb->offset < 16) {
-	 ClauseFail();
-     }
-     if (BIT_OFFSET(_mb->offset) != 0) {
-	 _result = erts_bs_get_integer_2(c_p, 16, 0, _mb);
-     } else {
-	 _result = make_small(get_int16(_mb->base+BYTE_OFFSET(_mb->offset)));
-	 _mb->offset += 16;
+ do_bs_get_integer_16:
+     {
+	 ErlBinMatchBuffer *_mb;
+	 Eterm _result;
+	 _mb = ms_matchbuffer(bs_get_integer_16_context);
+	 if (_mb->size - _mb->offset < 16) {
+	     ClauseFail(); 
+	 }
+	 if (BIT_OFFSET(_mb->offset) != 0) {
+	     _result = erts_bs_get_integer_2(c_p, 16, 0, _mb);
+	 } else {
+	     _result = make_small(get_int16(_mb->base+BYTE_OFFSET(_mb->offset)));
+	     _mb->offset += 16;
+	 }
+	 StoreBifResult(1, _result);
      }
-     StoreBifResult(1, _result);
  }
 
- OpCase(i_bs_get_integer_32_rfId): {
-     tmp_arg1 = r(0);
+ {
+     Eterm bs_get_integer_32_context;
+
+ OpCase(i_bs_get_integer_32_rfId):
+     bs_get_integer_32_context = r(0);
      goto do_bs_get_integer_32;
- }
+
      
- OpCase(i_bs_get_integer_32_xfId): {
-     tmp_arg1 = xb(Arg(0));
+ OpCase(i_bs_get_integer_32_xfId):
+     bs_get_integer_32_context = xb(Arg(0));
      I++;
- }
 
- do_bs_get_integer_32: {
-     ErlBinMatchBuffer *_mb;
-     Uint32 _integer;
-     Eterm _result;
-     _mb = ms_matchbuffer(tmp_arg1);
-     if (_mb->size - _mb->offset < 32) { ClauseFail(); }
-     if (BIT_OFFSET(_mb->offset) != 0) {
-	 _integer = erts_bs_get_unaligned_uint32(_mb);
-     } else {
-	 _integer = get_int32(_mb->base + _mb->offset/8);
-     }
-     _mb->offset += 32;
+
+ do_bs_get_integer_32:
+     {
+	 ErlBinMatchBuffer *_mb;
+	 Uint32 _integer;
+	 Eterm _result;
+	 _mb = ms_matchbuffer(bs_get_integer_32_context);
+	 if (_mb->size - _mb->offset < 32) { ClauseFail(); }
+	 if (BIT_OFFSET(_mb->offset) != 0) {
+	     _integer = erts_bs_get_unaligned_uint32(_mb);
+	 } else {
+	     _integer = get_int32(_mb->base + _mb->offset/8);
+	 }
+	 _mb->offset += 32;
 #if !defined(ARCH_64) || HALFWORD_HEAP
-     if (IS_USMALL(0, _integer)) {
+	 if (IS_USMALL(0, _integer)) {
 #endif
-	 _result = make_small(_integer);
+	     _result = make_small(_integer);
 #if !defined(ARCH_64) || HALFWORD_HEAP
-     } else {
-	 TestHeap(BIG_UINT_HEAP_SIZE, Arg(1));
-	 _result = uint_to_big((Uint) _integer, HTOP);
-	 HTOP += BIG_UINT_HEAP_SIZE;
-	 HEAP_SPACE_VERIFIED(0);
-     }
+	 } else {
+	     TestHeap(BIG_UINT_HEAP_SIZE, Arg(1));
+	     _result = uint_to_big((Uint) _integer, HTOP);
+	     HTOP += BIG_UINT_HEAP_SIZE;
+	     HEAP_SPACE_VERIFIED(0);
+	 }
 #endif
-     StoreBifResult(2, _result);
+	 StoreBifResult(2, _result);
+     }
  }
 
  /* Operands: Size Live Fail Flags Dst */
@@ -4120,54 +4344,64 @@ apply_bif_or_nif_epilogue:
      StoreBifResult(3, result);
  }
 
- /* Operands: MatchContext Fail Dst */
+ {
+     Eterm get_utf8_context;
+
+     /* Operands: MatchContext Fail Dst */
  OpCase(i_bs_get_utf8_rfd): {
-     tmp_arg1 = r(0);
-     goto do_bs_get_utf8;
- }
+	 get_utf8_context = r(0);
+	 goto do_bs_get_utf8;
+     }
 
  OpCase(i_bs_get_utf8_xfd): {
-     tmp_arg1 = xb(Arg(0));
-     I++;
- }
+	 get_utf8_context = xb(Arg(0));
+	 I++;
+     }
 
- /*
-  * tmp_arg1 = match_context
-  * Operands: Fail Dst
-  */
+     /*
+      * get_utf8_context = match_context
+      * Operands: Fail Dst
+      */
 
-    do_bs_get_utf8: {
-     Eterm result = erts_bs_get_utf8(ms_matchbuffer(tmp_arg1));
-     if (is_non_value(result)) {
-	 ClauseFail();
+ do_bs_get_utf8: {
+	 Eterm result = erts_bs_get_utf8(ms_matchbuffer(get_utf8_context));
+	 if (is_non_value(result)) {
+	     ClauseFail();
+	 }
+	 StoreBifResult(1, result);
      }
-     StoreBifResult(1, result);
  }
 
- /* Operands: MatchContext Fail Flags Dst */
+ {
+     Eterm get_utf16_context;
+
+     /* Operands: MatchContext Fail Flags Dst */
  OpCase(i_bs_get_utf16_rfId): {
-     tmp_arg1 = r(0);
-     goto do_bs_get_utf16;
- }
+	 get_utf16_context = r(0);
+	 goto do_bs_get_utf16;
+     }
 
  OpCase(i_bs_get_utf16_xfId): {
-     tmp_arg1 = xb(Arg(0));
-     I++;
- }
+	 get_utf16_context = xb(Arg(0));
+	 I++;
+     }
 
- /*
-  * tmp_arg1 = match_context
-  * Operands: Fail Flags Dst
-  */
-    do_bs_get_utf16: {
-     Eterm result = erts_bs_get_utf16(ms_matchbuffer(tmp_arg1), Arg(1));
-     if (is_non_value(result)) {
-	 ClauseFail();
+     /*
+      * get_utf16_context = match_context
+      * Operands: Fail Flags Dst
+      */
+ do_bs_get_utf16: {
+	 Eterm result = erts_bs_get_utf16(ms_matchbuffer(get_utf16_context),
+					  Arg(1));
+	 if (is_non_value(result)) {
+	     ClauseFail();
+	 }
+	 StoreBifResult(2, result);
      }
-     StoreBifResult(2, result);
  }
 
  {
+     Eterm context_to_binary_context;
      ErlBinMatchBuffer* mb;
      ErlSubBin* sb;
      Uint size;
@@ -4176,27 +4410,29 @@ apply_bif_or_nif_epilogue:
      Uint hole_size;
 
      OpCase(bs_context_to_binary_r): {
-	 tmp_arg1 = x0;
+	 context_to_binary_context = x0;
 	 I -= 2;
 	 goto do_context_to_binary;
      }
 
      /* Unfortunately, inlining can generate this instruction. */
      OpCase(bs_context_to_binary_y): {
-	 tmp_arg1 = yb(Arg(0));
+	 context_to_binary_context = yb(Arg(0));
 	 goto do_context_to_binary0;
      }
 
      OpCase(bs_context_to_binary_x): {
-	 tmp_arg1 = xb(Arg(0));
+	 context_to_binary_context = xb(Arg(0));
      
      do_context_to_binary0:
 	 I--;
      }
 
  do_context_to_binary:
-     if (is_boxed(tmp_arg1) && header_is_bin_matchstate(*boxed_val(tmp_arg1))) {
-	 ErlBinMatchState* ms = (ErlBinMatchState *) boxed_val(tmp_arg1);
+     if (is_boxed(context_to_binary_context) &&
+	 header_is_bin_matchstate(*boxed_val(context_to_binary_context))) {
+	 ErlBinMatchState* ms;
+	 ms = (ErlBinMatchState *) boxed_val(context_to_binary_context);
 	 mb = &ms->mb;
 	 offs = ms->save_offset[0];
 	 size = mb->size - offs;
@@ -4205,17 +4441,17 @@ apply_bif_or_nif_epilogue:
      Next(2);
 
      OpCase(i_bs_get_binary_all_reuse_rfI): {
-	 tmp_arg1 = x0;
+	 context_to_binary_context = x0;
 	 goto do_bs_get_binary_all_reuse;
      }
 
      OpCase(i_bs_get_binary_all_reuse_xfI): {
-	 tmp_arg1 = xb(Arg(0));
+	 context_to_binary_context = xb(Arg(0));
 	 I++;
      }
 
  do_bs_get_binary_all_reuse:
-     mb = ms_matchbuffer(tmp_arg1);
+     mb = ms_matchbuffer(context_to_binary_context);
      size = mb->size - mb->offset;
      if (size % Arg(1) != 0) {
 	 ClauseFail();
@@ -4224,7 +4460,7 @@ apply_bif_or_nif_epilogue:
 
  do_bs_get_binary_all_reuse_common:
      orig = mb->orig;
-     sb = (ErlSubBin *) boxed_val(tmp_arg1);
+     sb = (ErlSubBin *) boxed_val(context_to_binary_context);
      hole_size = 1 + header_arity(sb->thing_word) - ERL_SUB_BIN_SIZE;
      sb->thing_word = HEADER_SUB_BIN;
      sb->size = BYTE_OFFSET(size);
@@ -4240,12 +4476,14 @@ apply_bif_or_nif_epilogue:
  }
 
  {
+     Eterm match_string_context;
+
      OpCase(i_bs_match_string_rfII): {
-	 tmp_arg1 = r(0);
+	 match_string_context = r(0);
 	 goto do_bs_match_string;
      }
      OpCase(i_bs_match_string_xfII): {
-	 tmp_arg1 = xb(Arg(0));
+	 match_string_context = xb(Arg(0));
 	 I++;
      }
 
@@ -4260,7 +4498,7 @@ apply_bif_or_nif_epilogue:
 	 PreFetch(3, next);
 	 bits = Arg(1);
 	 bytes = (byte *) Arg(2);
-	 mb = ms_matchbuffer(tmp_arg1);
+	 mb = ms_matchbuffer(match_string_context);
 	 if (mb->size - mb->offset < bits) {
 	     ClauseFail();
 	 }
@@ -4723,7 +4961,7 @@ apply_bif_or_nif_epilogue:
      NextPF(2, next);
  }
 
- OpCase(fmove_new_ld): {
+ OpCase(fmove_ld): {
      Eterm fr = Arg(0);
      Eterm dest = make_float(HTOP);
 
@@ -4753,11 +4991,6 @@ apply_bif_or_nif_epilogue:
      NextPF(2, next);
  }
 
- /*
-  * Old allocating fmove.
-  */
-
-
 #ifdef NO_FPE_SIGNALS
      OpCase(fclearerror):
      OpCase(i_fcheckerror):
@@ -4969,12 +5202,11 @@ apply_bif_or_nif_epilogue:
  OpCase(i_debug_breakpoint): {
      SWAPOUT;
      reg[0] = r(0);
-     tmp_arg1 = call_breakpoint_handler(c_p, I-3, reg);
+     I = call_error_handler(c_p, I-3, reg, am_breakpoint);
      r(0) = reg[0];
      SWAPIN;
-     if (tmp_arg1) {
-	 SET_I(c_p->i);
-	 Dispatch();
+     if (I) {
+	 Goto(*I);
      }
      goto no_error_handler;
  }
@@ -5724,8 +5956,8 @@ build_stacktrace(Process* c_p, Eterm exc) {
 }
 
 
-static Eterm
-call_error_handler(Process* p, BeamInstr* fi, Eterm* reg)
+static BeamInstr*
+call_error_handler(Process* p, BeamInstr* fi, Eterm* reg, Eterm func)
 {
     Eterm* hp;
     Export* ep;
@@ -5737,14 +5969,12 @@ call_error_handler(Process* p, BeamInstr* fi, Eterm* reg)
     /*
      * Search for the error_handler module.
      */
-    ep = erts_find_function(erts_proc_get_error_handler(p),
-			    am_undefined_function, 3);
+    ep = erts_find_function(erts_proc_get_error_handler(p), func, 3);
     if (ep == NULL) {		/* No error handler */
 	p->current = fi;
 	p->freason = EXC_UNDEF;
 	return 0;
     }
-    p->i = ep->address;
 
     /*
      * Create a list with all arguments in the x registers.
@@ -5764,63 +5994,14 @@ call_error_handler(Process* p, BeamInstr* fi, Eterm* reg)
     }
 
     /*
-     * Set up registers for call to error_handler:undefined_function/3.
+     * Set up registers for call to error_handler:<func>/3.
      */
     reg[0] = fi[0];
     reg[1] = fi[1];
     reg[2] = args;
-    return 1;
-}
-
-static Eterm
-call_breakpoint_handler(Process* p, BeamInstr* fi, Eterm* reg)
-{
-    Eterm* hp;
-    Export* ep;
-    int arity;
-    Eterm args;
-    Uint sz;
-    int i;
-
-    /*
-     * Search for error handler module.
-     */
-    ep = erts_find_function(erts_proc_get_error_handler(p),
-			    am_breakpoint, 3);
-    if (ep == NULL) {		/* No error handler */
-	p->current = fi;
-	p->freason = EXC_UNDEF;
-	return 0;
-    }
-    p->i = ep->address;
-
-    /*
-     * Create a list with all arguments in the x registers.
-     */
-
-    arity = fi[2];
-    sz = 2 * arity;
-    if (HeapWordsLeft(p) < sz) {
-	erts_garbage_collect(p, sz, reg, arity);
-    }
-    hp = HEAP_TOP(p);
-    HEAP_TOP(p) += sz;
-    args = NIL;
-    for (i = arity-1; i >= 0; i--) {
-	args = CONS(hp, reg[i], args);
-	hp += 2;
-    }
-
-    /*
-     * Set up registers for call to error_handler:breakpoint/3.
-     */
-    reg[0] = fi[0];
-    reg[1] = fi[1];
-    reg[2] = args;
-    return 1;
+    return ep->address;
 }
 
-
 
 static Export*
 apply_setup_error_handler(Process* p, Eterm module, Eterm function, Uint arity, Eterm* reg)
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index df5602b040..e6448931eb 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -89,13 +89,12 @@ typedef struct {
 } Label;
 
 /*
- * Type for a operand for a generic instruction.
+ * Type for an operand for a generic instruction.
  */
 
 typedef struct {
     unsigned type;		/* Type of operand. */
-    BeamInstr val;			/* Value of operand. */
-    Uint bigarity;		/* Arity for bignumbers (only). */
+    BeamInstr val;		/* Value of operand. */
 } GenOpArg;
 
 /*
@@ -326,11 +325,6 @@ typedef struct {
     Literal* literals;		/* Array of literals. */
     LiteralPatch* literal_patches; /* Operands that need to be patched. */
     Uint total_literal_size;	/* Total heap size for all literals. */
-
-    /*
-     * Floating point.
-     */
-    int new_float_instructions;	/* New allocation scheme for floating point. */
 } LoaderState;
 
 typedef struct {
@@ -476,12 +470,14 @@ static int read_code_header(LoaderState* stp);
 static int load_code(LoaderState* stp);
 static GenOp* gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index,
 			  GenOpArg Tuple, GenOpArg Dst);
-static GenOp* gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail,
+static GenOp* gen_split_values(LoaderState* stp, GenOpArg S,
+			       GenOpArg TypeFail, GenOpArg Fail,
 			       GenOpArg Size, GenOpArg* Rest);
 static GenOp* gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 			     GenOpArg Size, GenOpArg* Rest);
-static GenOp* gen_select_big(LoaderState* stp, GenOpArg S, GenOpArg Fail,
-			     GenOpArg Size, GenOpArg* Rest);
+static GenOp* gen_select_literals(LoaderState* stp, GenOpArg S,
+				  GenOpArg Fail, GenOpArg Size,
+				  GenOpArg* Rest);
 static GenOp* const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 			       GenOpArg Size, GenOpArg* Rest);
 static GenOp* gen_func_info(LoaderState* stp, GenOpArg mod, GenOpArg Func,
@@ -818,7 +814,6 @@ init_state(LoaderState* stp)
     stp->total_literal_size = 0;
     stp->literal_patches = 0;
     stp->string_patches = 0;
-    stp->new_float_instructions = 0;
     stp->may_load_nif = 0;
     stp->on_load = 0;
 }
@@ -1618,7 +1613,6 @@ load_code(LoaderState* stp)
 			    BeamInstr val;
 			    BeamInstr words = 0;
 			    
-			    stp->new_float_instructions = 1;
 			    GetTagAndValue(stp, tag, n);
 			    VerifyTag(stp, tag, TAG_u);
 			    while (n-- > 0) {
@@ -1772,7 +1766,7 @@ load_code(LoaderState* stp)
 	    }
 
 	    stp->specific_op = specific;
-	    CodeNeed(opc[stp->specific_op].sz+2); /* Extra margin for packing */
+	    CodeNeed(opc[stp->specific_op].sz+16); /* Extra margin for packing */
 	    code[ci++] = BeamOpCode(stp->specific_op);
 	}
 	
@@ -1936,7 +1930,8 @@ load_code(LoaderState* stp)
 		}
 		code[ci++] = (BeamInstr) stp->import[i].bf;
 		break;
-	    case 'P':		/* Byte offset into tuple */
+	    case 'P':		/* Byte offset into tuple or stack */
+	    case 'Q':		/* Like 'P', but packable */
 		VerifyTag(stp, tag, TAG_u);
 		tmp = tmp_op->a[arg].val;
 		code[ci++] = (BeamInstr) ((tmp_op->a[arg].val+1) * sizeof(Eterm));
@@ -1957,84 +1952,6 @@ load_code(LoaderState* stp)
 	}
 
 	/*
-	 * Load any list arguments using the primitive tags.
-	 */
-
-	for ( ; arg < tmp_op->arity; arg++) {
-	    switch (tmp_op->a[arg].type) {
-	    case TAG_i:
-		CodeNeed(1);
-		code[ci++] = make_small(tmp_op->a[arg].val);
-		break;
-	    case TAG_u:
-	    case TAG_a:
-	    case TAG_v:
-		CodeNeed(1);
-		code[ci++] = tmp_op->a[arg].val;
-		break;
-	    case TAG_f:
-		CodeNeed(1);
-		code[ci] = stp->labels[tmp_op->a[arg].val].patches;
-		stp->labels[tmp_op->a[arg].val].patches = ci;
-		ci++;
-		break;
-	    case TAG_q:
-		{
-		    Eterm lit;
-
-		    lit = stp->literals[tmp_op->a[arg].val].term;
-		    if (is_big(lit)) {
-			Eterm* bigp;
-			Eterm *tmp;
-			Uint size;
-			Uint term_size;
-
-			bigp = big_val(lit);
-			term_size = bignum_header_arity(*bigp);
-			size = TermWords(term_size + 1);
-			CodeNeed(size);
-			tmp = (Eterm *) (code + ci);
-			*tmp++ = *bigp++;
-			while (term_size-- > 0) {
-			    *tmp++ = *bigp++;
-			}
-			ci +=size;
-		    } else if (is_float(lit)) {
-#if defined(ARCH_64) && !HALFWORD_HEAP
-			CodeNeed(1);
-			code[ci++] = float_val(stp->literals[tmp_op->a[arg].val].term)[1];
-#elif HALFWORD_HEAP
-			Eterm* fptr;
-			Uint size;
-			Eterm *tmp;
-
-			fptr = float_val(stp->literals[tmp_op->a[arg].val].term)+1;
-			size = TermWords(2);
-			CodeNeed(size);
-			tmp = (Eterm *) (code + ci);
-			*tmp++ = *fptr++;
-			*tmp = *fptr;
-			ci += size;
-#else
-			Eterm* fptr;
-
-			fptr = float_val(stp->literals[tmp_op->a[arg].val].term)+1;
-			CodeNeed(2);
-			code[ci++] = *fptr++;
-			code[ci++] = *fptr;
-#endif
-		    } else {
-			LoadError0(stp, "literal is neither float nor big");
-		    }
-		}
-		break;
-	    default:
-		LoadError1(stp, "unsupported primitive type '%c'",
-			   tag_to_letter[tmp_op->a[arg].type]);
-	    }
-	}
-
-	/*
 	 * The packing engine.
 	 */
 	if (opc[stp->specific_op].pack[0]) {
@@ -2057,6 +1974,11 @@ load_code(LoaderState* stp)
 		case '6':	/* Shift 16 steps */
 		    packed = (packed << BEAM_LOOSE_SHIFT) | code[--ci];
 		    break;
+#ifdef ARCH_64
+		case 'w':	/* Shift 32 steps */
+		    packed = (packed << BEAM_WIDE_SHIFT) | code[--ci];
+		    break;
+#endif
 		case 'p':	/* Put instruction (from stack). */
 		    code[ci++] = *--sp;
 		    break;
@@ -2072,6 +1994,58 @@ load_code(LoaderState* stp)
 	}
 
 	/*
+	 * Load any list arguments using the primitive tags.
+	 */
+
+	for ( ; arg < tmp_op->arity; arg++) {
+	    switch (tmp_op->a[arg].type) {
+	    case TAG_i:
+		CodeNeed(1);
+		code[ci++] = make_small(tmp_op->a[arg].val);
+		break;
+	    case TAG_u:
+	    case TAG_a:
+	    case TAG_v:
+		CodeNeed(1);
+		code[ci++] = tmp_op->a[arg].val;
+		break;
+	    case TAG_f:
+		CodeNeed(1);
+		code[ci] = stp->labels[tmp_op->a[arg].val].patches;
+		stp->labels[tmp_op->a[arg].val].patches = ci;
+		ci++;
+		break;
+	    case TAG_r:
+		CodeNeed(1);
+		code[ci++] = (R_REG_DEF << _TAG_PRIMARY_SIZE) |
+		    TAG_PRIMARY_HEADER;
+		break;
+	    case TAG_x:
+		CodeNeed(1);
+		code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) |
+		    (X_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER;
+		break;
+	    case TAG_y:
+		CodeNeed(1);
+		code[ci++] = (tmp_op->a[arg].val << _TAG_IMMED1_SIZE) |
+		    (Y_REG_DEF << _TAG_PRIMARY_SIZE) | TAG_PRIMARY_HEADER;
+		break;
+	    case TAG_n:
+		CodeNeed(1);
+		code[ci++] = NIL;
+		break;
+	    case TAG_q:
+		CodeNeed(1);
+		new_literal_patch(stp, ci);
+		code[ci++] = tmp_op->a[arg].val;
+		break;
+	    default:
+		LoadError1(stp, "unsupported primitive type '%c'",
+			   tag_to_letter[tmp_op->a[arg].type]);
+	    }
+	}
+
+	/*
 	 * Handle a few special cases.
 	 */
 	switch (stp->specific_op) {
@@ -2239,11 +2213,12 @@ use_jump_tab(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
 }
 
 /*
- * Predicate to test whether all values in a table are big numbers.
+ * Predicate to test whether all values in a table are either
+ * floats or bignums.
  */
 
 static int
-all_values_are_big(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
+floats_or_bignums(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
 {
     int i;
 
@@ -2255,9 +2230,6 @@ all_values_are_big(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
 	if (Rest[i].type != TAG_q) {
 	    return 0;
 	}
-	if (is_not_big(stp->literals[Rest[i].val].term)) {
-	    return 0;
-	}
 	if (Rest[i+1].type != TAG_f) {
 	    return 0;
 	}
@@ -2317,6 +2289,14 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
     return 0;
 }
 
+static int
+same_label(LoaderState* stp, GenOpArg Target, GenOpArg Label)
+{
+    return Target.type = TAG_f && Label.type == TAG_u &&
+	Target.val == Label.val;
+}
+
+
 /*
  * Generate an instruction for element/2.
  */
@@ -2328,23 +2308,23 @@ gen_element(LoaderState* stp, GenOpArg Fail, GenOpArg Index,
     GenOp* op;
 
     NEW_GENOP(stp, op);
-    op->op = genop_i_element_4;
     op->arity = 4;
-    op->a[0] = Fail;
-    op->a[1] = Index;
-    op->a[2] = Tuple;
-    op->a[3] = Dst;
     op->next = NULL;
 
-    /*
-     * If safe, generate a faster instruction.
-     */
-
     if (Index.type == TAG_i && Index.val > 0 &&
 	(Tuple.type == TAG_r || Tuple.type == TAG_x || Tuple.type == TAG_y)) {
 	op->op = genop_i_fast_element_4;
-	op->a[1].type = TAG_u;
-	op->a[1].val = Index.val;
+	op->a[0] = Tuple;
+	op->a[1] = Fail;
+	op->a[2].type = TAG_u;
+	op->a[2].val = Index.val;
+	op->a[3] = Dst;
+    } else {
+	op->op = genop_i_element_4;
+	op->a[0] = Tuple;
+	op->a[1] = Fail;
+	op->a[2] = Index;
+	op->a[3] = Dst;
     }
 
     return op;
@@ -2595,8 +2575,6 @@ binary_too_big_bits(LoaderState* stp, GenOpArg Size)
     return Size.type == TAG_u && (((Size.val+7)/8) >> (8*sizeof(Uint)-3) != 0);
 }
 
-#define new_float_allocation(Stp) ((Stp)->new_float_instructions)
-
 static GenOp*
 gen_put_binary(LoaderState* stp, GenOpArg Fail,GenOpArg Size,
 	       GenOpArg Unit, GenOpArg Flags, GenOpArg Src)
@@ -2809,6 +2787,52 @@ gen_skip_bits2(LoaderState* stp, GenOpArg Fail, GenOpArg Ms,
     return op;
 }
 
+static GenOp*
+gen_increment(LoaderState* stp, GenOpArg Reg, GenOpArg Integer,
+	      GenOpArg Live, GenOpArg Dst)
+{
+    GenOp* op;
+
+    NEW_GENOP(stp, op);
+    op->op = genop_i_increment_4;
+    op->arity = 4;
+    op->next = NULL;
+    op->a[0] = Reg;
+    op->a[1].type = TAG_u;
+    op->a[1].val = Integer.val;
+    op->a[2] = Live;
+    op->a[3] = Dst;
+    return op;
+}
+
+static GenOp*
+gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer,
+			 GenOpArg Live, GenOpArg Dst)
+{
+    GenOp* op;
+
+    NEW_GENOP(stp, op);
+    op->op = genop_i_increment_4;
+    op->arity = 4;
+    op->next = NULL;
+    op->a[0] = Reg;
+    op->a[1].type = TAG_u;
+    op->a[1].val = -Integer.val;
+    op->a[2] = Live;
+    op->a[3] = Dst;
+    return op;
+}
+
+/*
+ * Test whether the negation of the given number is small.
+ */
+static int
+negation_is_small(LoaderState* stp, GenOpArg Int)
+{
+    return Int.type == TAG_i && IS_SSMALL(-Int.val);
+}
+
+
 static int
 smp(LoaderState* stp)
 {
@@ -3000,6 +3024,21 @@ gen_select_tuple_arity(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 	ASSERT(op->a[i].val < op->a[i+2].val);
     }
 #endif
+
+    /*
+     * Use a special-cased instruction if there are only two values.
+     */
+    if (size == 2) {
+	op->op = genop_i_select_tuple_arity2_6;
+	op->arity--;
+	op->a[2].type = TAG_u;
+	op->a[2].val = arityval(op->a[3].val);
+	op->a[3] = op->a[4];
+	op->a[4].type = TAG_u;
+	op->a[4].val = arityval(op->a[5].val);
+	op->a[5] = op->a[6];
+    }
+
     return op;
 }
 
@@ -3009,18 +3048,24 @@ gen_select_tuple_arity(LoaderState* stp, GenOpArg S, GenOpArg Fail,
  */
 
 static GenOp*
-gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail,
-		 GenOpArg Size, GenOpArg* Rest)
+gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg TypeFail,
+		 GenOpArg Fail, GenOpArg Size, GenOpArg* Rest)
 
 {
     GenOp* op1;
     GenOp* op2;
     GenOp* label;
-    Uint type;
+    GenOp* is_integer;
     int i;
 
     ASSERT(Size.val >= 2 && Size.val % 2 == 0);
 
+    NEW_GENOP(stp, is_integer);
+    is_integer->op = genop_is_integer_2;
+    is_integer->arity = 2;
+    is_integer->a[0] = TypeFail;
+    is_integer->a[1] = S;
+
     NEW_GENOP(stp, label);
     label->op = genop_label_1;
     label->arity = 1;
@@ -3046,15 +3091,13 @@ gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail,
     op2->a[2].type = TAG_u;
     op2->a[2].val = 0;
 
-    op1->next = label;
-    label->next = op2;
-    op2->next = NULL;
-
-    type = Rest[0].type;
+    /*
+     * Split the list.
+     */
 
     ASSERT(Size.type == TAG_u);
     for (i = 0; i < Size.val; i += 2) {
-	GenOp* op = (Rest[i].type == type) ? op1 : op2;
+	GenOp* op = (Rest[i].type == TAG_q) ? op2 : op1;
 	int dst = 3 + op->a[2].val;
 
 	ASSERT(Rest[i+1].type == TAG_f);
@@ -3063,13 +3106,36 @@ gen_split_values(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 	op->arity += 2;
 	op->a[2].val += 2;
     }
+    ASSERT(op1->a[2].val > 0);
+    ASSERT(op2->a[2].val > 0);
 
     /*
-     * None of the instructions should have zero elements in the list.
+     * Order the instruction sequence appropriately.
      */
 
-    ASSERT(op1->a[2].val > 0);
-    ASSERT(op2->a[2].val > 0);
+    if (TypeFail.val == Fail.val) {
+	/*
+	 * select_val L1 S ... (small numbers)
+	 * label L1
+	 * is_integer Fail S
+	 * select_val Fail S ... (bignums)
+	 */
+	op1->next = label;
+	label->next = is_integer;
+	is_integer->next = op2;
+    } else {
+	/*
+	 * is_integer TypeFail S
+	 * select_val L1 S ... (small numbers)
+	 * label L1
+	 * select_val Fail S ... (bignums)
+	 */
+	is_integer->next = op1;
+	op1->next = label;
+	label->next = op2;
+	op1 = is_integer;
+    }
+    op2->next = NULL;
 
     return op1;
 }
@@ -3091,6 +3157,29 @@ gen_jump_tab(LoaderState* stp, GenOpArg S, GenOpArg Fail, GenOpArg Size, GenOpAr
     ASSERT(Size.val >= 2 && Size.val % 2 == 0);
 
     /*
+     * If there is only one choice, don't generate a jump table.
+     */
+    if (Size.val == 2) {
+	GenOp* jump;
+
+	NEW_GENOP(stp, op);
+	op->arity = 3;
+	op->op = genop_is_ne_exact_3;
+	op->a[0] = Rest[1];
+	op->a[1] = S;
+	op->a[2] = Rest[0];
+
+	NEW_GENOP(stp, jump);
+	jump->next = NULL;
+	jump->arity = 1;
+	jump->op = genop_jump_1;
+	jump->a[0] = Fail;
+
+	op->next = jump;
+	return op;
+    }
+
+    /*
      * Calculate the minimum and maximum values and size of jump table.
      */
 
@@ -3162,8 +3251,9 @@ genopargcompare(GenOpArg* a, GenOpArg* b)
 }
 
 /*
- * Generate a select_val instruction.  We know that a jump table is not suitable,
- * and that all values are of the same type (integer, atoms, floats; never bignums).
+ * Generate a select_val instruction.  We know that a jump table
+ * is not suitable, and that all values are of the same type
+ * (integer or atoms).
  */
 
 static GenOp*
@@ -3177,12 +3267,7 @@ gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 
     NEW_GENOP(stp, op);
     op->next = NULL;
-    if (Rest[0].type != TAG_q) {
-	op->op = genop_i_select_val_3;
-    } else {
-	ASSERT(is_float(stp->literals[Rest[0].val].term));
-	op->op = genop_i_select_float_3;
-    }
+    op->op = genop_i_select_val_3;
     GENOP_ARITY(op, arity);
     op->a[0] = S;
     op->a[1] = Fail;
@@ -3204,19 +3289,19 @@ gen_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
     }
 #endif
 
-    return op;
-}
-
-/* 
- *  Compare function for qsort().
- */
+    /*
+     * Use a special-cased instruction if there are only two values.
+     */
+    if (size == 2) {
+	op->op = genop_i_select_val2_6;
+	op->arity--;
+	op->a[2] = op->a[3];
+	op->a[3] = op->a[4];
+	op->a[4] = op->a[5];
+	op->a[5] = op->a[6];
+    }
 
-static int
-genbigcompare(GenOpArg* a, GenOpArg* b)
-{
-    int val = (int)(b->bigarity - a->bigarity);
-    
-    return val != 0 ? val : ((int) (a->val - b->val));
+    return op;
 }
 
 /*
@@ -3224,37 +3309,35 @@ genbigcompare(GenOpArg* a, GenOpArg* b)
  */
 
 static GenOp*
-gen_select_big(LoaderState* stp, GenOpArg S, GenOpArg Fail,
+gen_select_literals(LoaderState* stp, GenOpArg S, GenOpArg Fail,
 	       GenOpArg Size, GenOpArg* Rest)
 {
     GenOp* op;
-    int arity = Size.val + 2 + 1;
-    int size = Size.val / 2;
+    GenOp* jump;
+    GenOp** prev_next = &op;
+
     int i;
 
-    NEW_GENOP(stp, op);
-    op->next = NULL;
-    op->op = genop_i_select_big_2;
-    GENOP_ARITY(op, arity);
-    op->a[0] = S;
-    op->a[1] = Fail;
     for (i = 0; i < Size.val; i += 2) {
+	GenOp* op;
 	ASSERT(Rest[i].type == TAG_q);
-	op->a[i+2] = Rest[i];
-	op->a[i+2].bigarity = *big_val(stp->literals[op->a[i+2].val].term);
-	op->a[i+3] = Rest[i+1];
-    }
-    ASSERT(i+2 == arity-1);
-    op->a[arity-1].type = TAG_u;
-    op->a[arity-1].val = 0;
-
-    /*
-     * Sort the values in descending arity order.
-     */
-
-    qsort(op->a+2, size, 2*sizeof(GenOpArg), 
-	  (int (*)(const void *, const void *)) genbigcompare);
 
+	NEW_GENOP(stp, op);
+	op->op = genop_is_ne_exact_3;
+	op->arity = 3;
+	op->a[0] = Rest[i+1];
+	op->a[1] = S;
+	op->a[2] = Rest[i];
+	*prev_next = op;
+	prev_next = &op->next;
+    }
+
+    NEW_GENOP(stp, jump);
+    jump->next = NULL;
+    jump->op = genop_jump_1;
+    jump->arity = 1;
+    jump->a[0] = Fail;
+    *prev_next = jump;
     return op;
 }
 
@@ -3272,7 +3355,6 @@ const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
     int i;
 
     ASSERT(Size.type == TAG_u);
-    ASSERT(S.type == TAG_q);
 
     NEW_GENOP(stp, op);
     op->next = NULL;
@@ -3283,18 +3365,32 @@ const_select_val(LoaderState* stp, GenOpArg S, GenOpArg Fail,
      * Search for a literal matching the controlling expression.
      */
 
-    if (S.type == TAG_q) {
-	Eterm expr = stp->literals[S.val].term;
-	for (i = 0; i < Size.val; i += 2) {
-	    if (Rest[i].type == TAG_q) {
-		Eterm term = stp->literals[Rest[i].val].term;
-		if (eq(term, expr)) {
-		    ASSERT(Rest[i+1].type == TAG_f);
-		    op->a[0] = Rest[i+1];
-		    return op;
+    switch (S.type) {
+    case TAG_q:
+	{
+	    Eterm expr = stp->literals[S.val].term;
+	    for (i = 0; i < Size.val; i += 2) {
+		if (Rest[i].type == TAG_q) {
+		    Eterm term = stp->literals[Rest[i].val].term;
+		    if (eq(term, expr)) {
+			ASSERT(Rest[i+1].type == TAG_f);
+			op->a[0] = Rest[i+1];
+			return op;
+		    }
 		}
 	    }
 	}
+	break;
+    case TAG_i:
+    case TAG_a:
+	for (i = 0; i < Size.val; i += 2) {
+	    if (Rest[i].val == S.val && Rest[i].type == S.type) {
+		ASSERT(Rest[i+1].type == TAG_f);
+		op->a[0] = Rest[i+1];
+		return op;
+	    }
+	}
+	break;
     }
 
     /*
@@ -3477,6 +3573,56 @@ gen_guard_bif3(LoaderState* stp, GenOpArg Fail, GenOpArg Live, GenOpArg Bif,
     return op;
 }
 
+static GenOp*
+tuple_append_put5(LoaderState* stp, GenOpArg Arity, GenOpArg Dst,
+		  GenOpArg* Puts, GenOpArg S1, GenOpArg S2, GenOpArg S3,
+		  GenOpArg S4, GenOpArg S5)
+{
+    GenOp* op;
+    int arity = Arity.val;	/* Arity of tuple, not the instruction */
+    int i;
+
+    NEW_GENOP(stp, op);
+    op->next = NULL;
+    GENOP_ARITY(op, arity+2+5);
+    op->op = genop_i_put_tuple_2;
+    op->a[0] = Dst;
+    op->a[1].type = TAG_u;
+    op->a[1].val = arity + 5;
+    for (i = 0; i < arity; i++) {
+	op->a[i+2] = Puts[i];
+    }
+    op->a[arity+2] = S1;
+    op->a[arity+3] = S2;
+    op->a[arity+4] = S3;
+    op->a[arity+5] = S4;
+    op->a[arity+6] = S5;
+    return op;
+}
+
+static GenOp*
+tuple_append_put(LoaderState* stp, GenOpArg Arity, GenOpArg Dst,
+		 GenOpArg* Puts, GenOpArg S)
+{
+    GenOp* op;
+    int arity = Arity.val;	/* Arity of tuple, not the instruction */
+    int i;
+
+    NEW_GENOP(stp, op);
+    op->next = NULL;
+    GENOP_ARITY(op, arity+2+1);
+    op->op = genop_i_put_tuple_2;
+    op->a[0] = Dst;
+    op->a[1].type = TAG_u;
+    op->a[1].val = arity + 1;
+    for (i = 0; i < arity; i++) {
+	op->a[i+2] = Puts[i];
+    }
+    op->a[arity+2] = S;
+    return op;
+}
+
+
 
 /*
  * Freeze the code in memory, move the string table into place,
@@ -3876,11 +4022,23 @@ transform_engine(LoaderState* st)
 	    if (i == 0)
 		goto restart;
 	    break;
+#if defined(TOP_is_eq)
 	case TOP_is_eq:
 	    ASSERT(ap < instr->arity);
 	    if (*pc++ != instr->a[ap].val)
 		goto restart;
 	    break;
+#endif
+	case TOP_is_type_eq:
+	    mask = *pc++;
+
+	    ASSERT(ap < instr->arity);
+	    ASSERT(instr->a[ap].type < BEAM_NUM_TAGS);
+	    if (((1 << instr->a[ap].type) & mask) == 0)
+		goto restart;
+	    if (*pc++ != instr->a[ap].val)
+		goto restart;
+	    break;
 	case TOP_is_same_var:
 	    ASSERT(ap < instr->arity);
 	    i = *pc++;
@@ -4001,14 +4159,17 @@ transform_engine(LoaderState* st)
 	case TOP_rest_args:
 	    {
 		int n = *pc++;
+		int formal_arity = gen_opc[instr->op].arity;
+		int num_vars = n + (instr->arity - formal_arity);
+		int j = formal_arity;
+
 		var = erts_alloc(ERTS_ALC_T_LOADER_TMP,
-				 instr->arity * sizeof(GenOpArg));
+				 num_vars * sizeof(GenOpArg));
 		for (i = 0; i < n; i++) {
 		    var[i] = def_vars[i];
 		}
-		while (i < instr->arity) {
-		    var[i] = instr->a[i];
-		    i++;
+		while (i < num_vars) {
+		    var[i++] = instr->a[j++];
 		}
 	    }
 	    break;
diff --git a/erts/emulator/beam/bif.tab b/erts/emulator/beam/bif.tab
index 60b4b1946b..d9dd80fa8b 100644
--- a/erts/emulator/beam/bif.tab
+++ b/erts/emulator/beam/bif.tab
@@ -660,6 +660,7 @@ bif erts_debug:display/1
 bif 'erl.system.debug':display/1 ebif_erts_debug_display_1
 bif erts_debug:dist_ext_to_term/2
 bif 'erl.system.debug':dist_ext_to_term/2 ebif_erts_debug_dist_ext_to_term_2
+bif erts_debug:instructions/0
 
 #
 # Monitor testing bif's...
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index a2439d5582..e861f97e7a 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -101,16 +101,16 @@ return
 %macro: test_heap TestHeap -pack
 
 allocate t t
-allocate_heap I I I
+allocate_heap t I t
 deallocate I
 init y
 allocate_zero t t
-allocate_heap_zero I I I
+allocate_heap_zero t I t
 
 trim N Remaining => i_trim N
 i_trim I
 
-test_heap I I
+test_heap I t
 
 allocate_heap S u==0 R => allocate S R
 allocate_heap_zero S u==0 R => allocate_zero S R
@@ -124,7 +124,7 @@ init Y1 | init Y2 => init2 Y1 Y2
 
 # Selecting values
 
-select_val S=q Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest)
+select_val S=aiq Fail=f Size=u Rest=* => const_select_val(S, Fail, Size, Rest)
 
 select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \
   gen_jump_tab(S, Fail, Size, Rest)
@@ -132,34 +132,59 @@ select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \
 is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | use_jump_tab(Size, Rest) => \
   gen_jump_tab(S, Fail, Size, Rest)
 
+is_integer TypeFail=f S | select_val S=s Fail=f Size=u Rest=* | \
+	   mixed_types(Size, Rest) => \
+  gen_split_values(S, TypeFail, Fail, Size, Rest)
+
 select_val S=s Fail=f Size=u Rest=* | mixed_types(Size, Rest) => \
-  gen_split_values(S, Fail, Size, Rest)
+  gen_split_values(S, Fail, Fail, Size, Rest)
 
-is_integer Fail=f S | select_val S=s Fail=f Size=u Rest=* | \
+is_integer Fail=f S | select_val S=d Fail=f Size=u Rest=* | \
   fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest)
 
-is_atom Fail=f S | select_val S=s Fail=f Size=u Rest=* | \
+is_atom Fail=f S | select_val S=d Fail=f Size=u Rest=* | \
   fixed_size_values(Size, Rest) => gen_select_val(S, Fail, Size, Rest)
 
-select_val S=s Fail=f Size=u Rest=* | fixed_size_values(Size, Rest) => \
-  gen_select_val(S, Fail, Size, Rest)
+select_val S=s Fail=f Size=u Rest=* | floats_or_bignums(Size, Rest) => \
+  gen_select_literals(S, Fail, Size, Rest)
 
-select_val S=s Fail=f Size=u Rest=* | all_values_are_big(Size, Rest) => \
-  gen_select_big(S, Fail, Size, Rest)
+select_val S=d Fail=f Size=u Rest=* | fixed_size_values(Size, Rest) => \
+  gen_select_val(S, Fail, Size, Rest)
 
-is_tuple Fail=f S | select_tuple_arity S=s Fail=f Size=u Rest=* => \
+is_tuple Fail=f S | select_tuple_arity S=d Fail=f Size=u Rest=* => \
   gen_select_tuple_arity(S, Fail, Size, Rest)
 
-select_tuple_arity S=s Fail=f Size=u Rest=* => \
+select_tuple_arity S=d Fail=f Size=u Rest=* => \
   gen_select_tuple_arity(S, Fail, Size, Rest)
 
-i_select_val s f I
-i_select_tuple_arity s f I
-i_select_big s f
-i_select_float s f I
+i_select_val r f I
+i_select_val x f I
+i_select_val y f I
+
+i_select_val2 r f c f c f
+i_select_val2 x f c f c f
+i_select_val2 y f c f c f
+
+i_select_tuple_arity2 r f A f A f
+i_select_tuple_arity2 x f A f A f
+i_select_tuple_arity2 y f A f A f
+
+i_select_tuple_arity r f I
+i_select_tuple_arity x f I
+i_select_tuple_arity y f I
+
+i_jump_on_val_zero r f I
+i_jump_on_val_zero x f I
+i_jump_on_val_zero y f I
+
+i_jump_on_val r f I I
+i_jump_on_val x f I I
+i_jump_on_val y f I I
 
-i_jump_on_val_zero s f I
-i_jump_on_val s f I I
+jump Target | label Lbl | same_label(Target, Lbl) => label Lbl
+
+is_ne_exact L1 S1 S2 | jump Fail | label L2 | same_label(L1, L2) => \
+  is_eq_exact Fail S1 S2 | label L2
 
 %macro: get_list GetList -pack
 get_list x x x
@@ -234,11 +259,17 @@ is_number Fail Literal=q => move Literal x | is_number Fail x
 
 jump f
 
-case_end Literal=q => move Literal x | case_end x
-badmatch Literal=q => move Literal x | badmatch x
+case_end Literal=cq => move Literal x | case_end x
+badmatch Literal=cq => move Literal x | badmatch x
+
+case_end r
+case_end x
+case_end y
+
+badmatch r
+badmatch x
+badmatch y
 
-case_end s
-badmatch s
 if_end
 raise s s
 
@@ -248,12 +279,33 @@ system_limit j
 
 move R R =>
 
+move C=cxy r | jump Lbl => move_jump Lbl C
+
+%macro: move_jump MoveJump -nonext
+move_jump f n
+move_jump f c
+move_jump f x
+move_jump f y
+
 move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2
 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2
+move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4
+
+move C=aiq X=x==1 => move_x1 C
+move C=aiq X=x==2 => move_x2 C
+
+move_x1 c
+move_x2 c
 
 %macro: move2 Move2 -pack
 move2 x y x y
 move2 y x y x
+move2 x x x x
+
+# The compiler almost never generates a "move Literal y(Y)" instruction,
+# so let's cheat if we encounter one.
+move S=n D=y => init D
+move S=c D=y => move S x | move x D
 
 %macro:move Move -pack -gen_dest
 move x x
@@ -265,15 +317,10 @@ move r x
 move r y
 move c r
 move c x
-move c y
 move n x
 move n r
 move y y
 
-%cold
-move s d
-%hot
-
 # Receive operations.
 
 loop_rec Fail Src | smp_mark_target_label(Fail) => i_loop_rec Fail Src
@@ -306,55 +353,78 @@ i_wait_error_locked
 send
 
 #
-# Comparisions.
+# Optimized comparisons with one immediate/literal operand.
+#
+
+is_eq_exact Lbl R=rxy C=ian => i_is_eq_exact_immed Lbl R C
+is_eq_exact Lbl R=rxy C=q => i_is_eq_exact_literal R Lbl C
+
+is_ne_exact Lbl R=rxy C=ian => i_is_ne_exact_immed Lbl R C
+is_ne_exact Lbl R=rxy C=q => i_is_ne_exact_literal R Lbl C
+
+%macro: i_is_eq_exact_immed EqualImmed -fail_action
+i_is_eq_exact_immed f r c
+i_is_eq_exact_immed f x c
+i_is_eq_exact_immed f y c
+
+i_is_eq_exact_literal r f c
+i_is_eq_exact_literal x f c
+i_is_eq_exact_literal y f c
+
+%macro: i_is_ne_exact_immed NotEqualImmed -fail_action
+i_is_ne_exact_immed f r c
+i_is_ne_exact_immed f x c
+i_is_ne_exact_immed f y c
+
+i_is_ne_exact_literal r f c
+i_is_ne_exact_literal x f c
+i_is_ne_exact_literal y f c
+
+#
+# All other comparisons.
 #
 
-is_eq_exact Lbl=f R=rxy C=ian => i_is_eq_immed Lbl R C
-is_eq Lbl=f R=rxy C=an => i_is_eq_immed Lbl R C
+is_eq_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl
+is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl
 
 is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl
 is_lt Lbl S1 S2 => i_fetch S1 S2 | i_is_lt Lbl
 is_eq Lbl S1 S2 => i_fetch S1 S2 | i_is_eq Lbl
 is_ne Lbl S1 S2 => i_fetch S1 S2 | i_is_ne Lbl
 
-is_eq_exact Lbl=f S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl
-is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl
-
+i_is_eq_exact f
+i_is_ne_exact f
 i_is_lt f
 i_is_ge f
 i_is_eq f
 i_is_ne f
-i_is_eq_exact f
-i_is_ne_exact f
-
-%macro: i_is_eq_immed EqualImmed -fail_action
-i_is_eq_immed f r c
-i_is_eq_immed f x c
-i_is_eq_immed f y c
 
 #
 # Putting things.
 #
 
-put_tuple Arity Dst | put V => i_put_tuple Arity V Dst
+put_tuple Arity Dst => i_put_tuple Dst u
 
-%macro: i_put_tuple PutTuple -pack
-i_put_tuple A x x
-i_put_tuple A y x
-i_put_tuple A r x
-i_put_tuple A n x
-i_put_tuple A c x
-i_put_tuple A x y
-i_put_tuple A x r
-i_put_tuple A y r
-i_put_tuple A n r
-i_put_tuple A c r
+i_put_tuple Dst Arity Puts=* | put S1 | put S2 | \
+  put S3 | put S4 | put S5 => \
+	    tuple_append_put5(Arity, Dst, Puts, S1, S2, S3, S4, S5)
 
-%cold
-i_put_tuple A r y
-i_put_tuple A y y
-i_put_tuple A c y
-%hot
+i_put_tuple Dst Arity Puts=* | put S => \
+	    tuple_append_put(Arity, Dst, Puts, S)
+
+i_put_tuple/2
+
+%macro:i_put_tuple PutTuple -pack -goto:do_put_tuple
+i_put_tuple r I
+i_put_tuple x I
+i_put_tuple y I
+
+#
+# The instruction "put_list Const [] Dst" will not be generated by
+# the current BEAM compiler. But until R15A, play it safe by handling
+# that instruction with the following transformation.
+#
+put_list Const=c n Dst => move Const x | put_list x n Dst
 
 %macro:put_list PutList -pack -gen_dest
 
@@ -362,10 +432,8 @@ put_list x n x
 put_list y n x
 put_list x x x
 put_list y x x
-put_list c n x
 put_list x x r
 put_list y r r
-put_list c n r
 
 put_list y y x
 put_list x y x
@@ -376,6 +444,13 @@ put_list y y r
 put_list y r x
 put_list r n x
 
+put_list x r x
+put_list x y r
+put_list y x r
+put_list y x x
+
+put_list x r r
+
 # put_list SrcReg Constant Dst
 put_list r c r
 put_list r c x
@@ -403,17 +478,9 @@ put_list c y x
 put_list c y y
 
 %cold
-put_list x r r
 put_list s s d
 %hot
 
-%macro: put Put
-put x
-put r
-put y
-put c
-put n
-
 %macro: i_fetch FetchArgs -pack
 i_fetch c c
 i_fetch c r
@@ -464,19 +531,20 @@ move_return n r
 
 move S r | deallocate D | return => move_deallocate_return S r D
 
-%macro: move_deallocate_return MoveDeallocateReturn -nonext
-move_deallocate_return x r P
-move_deallocate_return y r P
-move_deallocate_return c r P
-move_deallocate_return n r P
+%macro: move_deallocate_return MoveDeallocateReturn -pack -nonext
+move_deallocate_return x r Q
+move_deallocate_return y r Q
+move_deallocate_return c r Q
+move_deallocate_return n r Q
 
 deallocate D | return => deallocate_return D
 
 %macro: deallocate_return DeallocateReturn -nonext
-deallocate_return P
+deallocate_return Q
 
 test_heap Need u==1 | put_list Y=y r r => test_heap_1_put_list Need Y
 
+%macro: test_heap_1_put_list TestHeapPutList -pack
 test_heap_1_put_list I y
 
 # Test tuple & arity (head)
@@ -576,14 +644,14 @@ is_list f y
 
 is_nonempty_list Fail=f S=rx | allocate Need Rs => is_nonempty_list_allocate Fail S Need Rs
 
-%macro:is_nonempty_list_allocate IsNonemptyListAllocate -fail_action
-is_nonempty_list_allocate f x I I
-is_nonempty_list_allocate f r I I
+%macro:is_nonempty_list_allocate IsNonemptyListAllocate -fail_action -pack
+is_nonempty_list_allocate f x I t
+is_nonempty_list_allocate f r I t
 
 is_nonempty_list F=f r | test_heap I1 I2 => is_non_empty_list_test_heap F r I1 I2
 
-%macro: is_non_empty_list_test_heap IsNonemptyListTestHeap -fail_action
-is_non_empty_list_test_heap f r I I
+%macro: is_non_empty_list_test_heap IsNonemptyListTestHeap -fail_action -pack
+is_non_empty_list_test_heap f r I t
 
 %macro: is_nonempty_list IsNonemptyList -fail_action
 is_nonempty_list f x
@@ -912,8 +980,13 @@ node x
 node y
 %hot
 
-i_fast_element j I s d
-i_element j s s d
+i_fast_element r j I d
+i_fast_element x j I d
+i_fast_element y j I d
+
+i_element r j s d
+i_element x j s d
+i_element y j s d
 
 bif1 f b s d
 bif1_body b s d
@@ -940,11 +1013,11 @@ move S r | call_last Ar P=f D => move_call_last S r P D
 
 i_move_call_last f P c r
 
-%macro:move_call_last MoveCallLast -arg_f -nonext
+%macro:move_call_last MoveCallLast -arg_f -nonext -pack
 
 move_call_last/4
-move_call_last x r f P
-move_call_last y r f P
+move_call_last x r f Q
+move_call_last y r f Q
 
 move S=c r | call_only Ar P=f => i_move_call_only P S r
 move S=x r | call_only Ar P=f => move_call_only S r P
@@ -1307,6 +1380,8 @@ fconv Arg=iqan Dst=l => move Arg x | fconv x Dst
 
 fmove q l
 fmove d l
+fmove l d
+
 fconv d l
 
 i_fadd l l l
@@ -1322,12 +1397,6 @@ fcheckerror p => i_fcheckerror
 i_fcheckerror
 fclearerror
 
-fmove FR=l Dst=d | new_float_allocation() => fmove_new FR Dst
- 
-# The new instruction for moving a float out of a floating point register.
-# (No allocation.)
-fmove_new l d
-
 #
 # New apply instructions in R10B.
 #
@@ -1336,7 +1405,21 @@ apply I
 apply_last I P
 
 #
-# New GCing arithmetic instructions.
+# Optimize addition and subtraction of small literals using
+# the i_increment/4 instruction (in bodies, not in guards).
+#
+
+gc_bif2 p Live u$bif:erlang:splus/2 Int=i Reg=d Dst => \
+	gen_increment(Reg, Int, Live, Dst)
+gc_bif2 p Live u$bif:erlang:splus/2 Reg=d Int=i Dst => \
+	gen_increment(Reg, Int, Live, Dst)
+
+gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \
+	negation_is_small(Int) => \
+	gen_increment_from_minus(Reg, Int, Live, Dst)
+
+#
+# GCing arithmetic instructions.
 #
 
 gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst
@@ -1359,6 +1442,10 @@ gc_bif1 Fail I u$bif:erlang:bnot/1 Src Dst=d => i_int_bnot Fail Src I Dst
 gc_bif1 Fail I u$bif:erlang:sminus/1 Src Dst=d => i_fetch i Src | i_minus Fail I Dst
 gc_bif1 Fail I u$bif:erlang:splus/1 Src Dst=d => i_fetch i Src | i_plus Fail I Dst
 
+i_increment r I I d
+i_increment x I I d
+i_increment y I I d
+
 i_plus j I d
 i_minus j I d
 i_times j I d
diff --git a/erts/emulator/test/beam_SUITE.erl b/erts/emulator/test/beam_SUITE.erl
index 228ff15341..32ac07cb2d 100644
--- a/erts/emulator/test/beam_SUITE.erl
+++ b/erts/emulator/test/beam_SUITE.erl
@@ -20,7 +20,8 @@
 -module(beam_SUITE).
 
 -export([all/1, packed_registers/1, apply_last/1, apply_last_bif/1,
-	 buildo_mucho/1, heap_sizes/1, big_lists/1, fconv/1]).
+	 buildo_mucho/1, heap_sizes/1, big_lists/1, fconv/1,
+	 select_val/1]).
 
 -export([applied/2]).
 
@@ -28,7 +29,7 @@
 
 all(suite) ->
     [packed_registers, apply_last, apply_last_bif, buildo_mucho,
-     heap_sizes, big_lists].
+     heap_sizes, big_lists, select_val].
 
 
 %% Verify that apply(M, F, A) is really tail recursive.
@@ -302,3 +303,19 @@ do_fconv(nil, Float) when is_float(Float) ->
     Float + [];
 do_fconv(tuple_literal, Float) when is_float(Float) ->
     Float + {a,b}.
+
+select_val(Config) when is_list(Config) ->
+    ?line zero = do_select_val(0),
+    ?line big = do_select_val(1 bsl 64),
+    ?line integer = do_select_val(42),
+    ok.
+
+do_select_val(X) ->
+    case X of
+	0 ->
+	    zero;
+	1 bsl 64 ->
+	    big;
+	Int when is_integer(Int) ->
+	    integer
+    end.
diff --git a/erts/emulator/test/beam_literals_SUITE.erl b/erts/emulator/test/beam_literals_SUITE.erl
index 75841adbfc..1eda939cf8 100644
--- a/erts/emulator/test/beam_literals_SUITE.erl
+++ b/erts/emulator/test/beam_literals_SUITE.erl
@@ -23,7 +23,8 @@
 	 matching_bigs/1, matching_more_bigs/1,
 	 matching_bigs_and_smalls/1, badmatch/1, case_clause/1,
 	 receiving/1, literal_type_tests/1,
-	 put_list/1, fconv/1, literal_case_expression/1]).
+	 put_list/1, fconv/1, literal_case_expression/1,
+	 increment/1]).
 
 -include("test_server.hrl").
 
@@ -32,7 +33,7 @@ all(suite) ->
      matching_bigs, matching_more_bigs,
      matching_bigs_and_smalls, badmatch, case_clause,
      receiving, literal_type_tests,
-     put_list, fconv, literal_case_expression].
+     put_list, fconv, literal_case_expression, increment].
 
 putting(doc) -> "Test creating lists and tuples containing big number literals.";
 putting(Config) when is_list(Config) ->
@@ -48,6 +49,7 @@ matching_bigs(doc) -> "Test matching of a few big number literals (in Beam,"
 matching_bigs(Config) when is_list(Config) ->
     a = matching1(3972907842873739),
     b = matching1(-389789298378939783333333333333333333784),
+    other = matching1(3141699999999999999999999999999999999),
     other = matching1(42).
 
 matching_smalls(doc) -> "Test matching small numbers (both positive and negative).";
@@ -405,14 +407,51 @@ fconv_2(F) when is_float(F) ->
 literal_case_expression(Config) when is_list(Config) ->
     ?line DataDir = ?config(data_dir, Config),
     ?line Src = filename:join(DataDir, "literal_case_expression"),
-    ?line {ok,literal_case_expression=Mod,Code} = compile:file(Src, [from_asm,binary]),
+    ?line {ok,literal_case_expression=Mod,Code} =
+	compile:file(Src, [from_asm,binary]),
     ?line {module,Mod} = code:load_binary(Mod, Src, Code),
     ?line ok = Mod:x(),
     ?line ok = Mod:y(),
+    ?line ok = Mod:zi1(),
+    ?line ok = Mod:zi2(),
+    ?line ok = Mod:za1(),
+    ?line ok = Mod:za2(),
     ?line true = code:delete(Mod),
     ?line code:purge(Mod),
     ok.
 
+%% Test the i_increment instruction.
+increment(Config) when is_list(Config) ->
+    %% In the 32-bit emulator, Neg32 can be represented as a small,
+    %% but -Neg32 cannot. Therefore the i_increment instruction must
+    %% not be used in the subtraction that follows (since i_increment
+    %% cannot handle a bignum literal).
+    Neg32 = -(1 bsl 27),
+    Big32 = id(1 bsl 32),
+    Result32 = (1 bsl 32) + (1 bsl 27),
+    ?line Result32 = Big32 + (1 bsl 27),
+    ?line Result32 = Big32 - Neg32,
+
+    %% Same thing, but for the 64-bit emulator.
+    Neg64 = -(1 bsl 59),
+    Big64 = id(1 bsl 64),
+    Result64 = (1 bsl 64) + (1 bsl 59),
+    ?line Result64 = Big64 + (1 bsl 59),
+    ?line Result64 = Big64 - Neg64,
+
+    %% Test error handling for the i_increment instruction.
+    Bad = id(bad),
+    ?line {'EXIT',{badarith,_}} = (catch Bad + 42),
+
+    %% Small operands, but a big result.
+    Res32 = 1 bsl 27,
+    Small32 = id(Res32-1),
+    ?line Res32 = Small32 + 1,
+    Res64 = 1 bsl 59,
+    Small64 = id(Res64-1),
+    ?line Res64 = Small64 + 1,
+    ok.
+
 %% Help functions.
 
 chksum(Term) ->
diff --git a/erts/emulator/test/beam_literals_SUITE_data/literal_case_expression.S b/erts/emulator/test/beam_literals_SUITE_data/literal_case_expression.S
index c0ffe9ab53..bfdfc079dc 100644
--- a/erts/emulator/test/beam_literals_SUITE_data/literal_case_expression.S
+++ b/erts/emulator/test/beam_literals_SUITE_data/literal_case_expression.S
@@ -1,10 +1,11 @@
 {module, literal_case_expression}.  %% version = 0
 
-{exports, [{module_info,0},{module_info,1},{x,0},{y,0}]}.
+{exports, [{module_info,0},{module_info,1},{x,0},{y,0},
+	   {zi1,0},{zi2,0},{za1,0},{za2,0}]}.
 
 {attributes, []}.
 
-{labels, 15}.
+{labels, 32}.
 
 
 {function, x, 0, 2}.
@@ -52,6 +53,81 @@
   {label,10}.
     {case_end,{float,34.0000}}.
 
+{function, zi1, 0, 16}.
+  {label,15}.
+    {func_info,{atom,literal_case_expression},{atom,zi1},0}.
+  {label,16}.
+    {test,is_integer,{f,19},[{integer,42}]}.
+    {select_val,{integer,42},
+                {f,18},
+                {list,[{integer,42},
+                       {f,17},
+                       {integer,1000},
+                       {f,18}]}}.
+  {label,17}.
+    {move,{atom,ok},{x,0}}.
+    return.
+  {label,18}.
+    {move,{atom,error},{x,0}}.
+    return.
+  {label,19}.
+    {case_end,{integer,42}}.
+
+{function, zi2, 0, 16}.
+  {label,20}.
+    {func_info,{atom,literal_case_expression},{atom,zi2},0}.
+  {label,21}.
+    {test,is_integer,{f,23},[{integer,42}]}.
+    {select_val,{integer,42},
+                {f,23},
+                {list,[{integer,42},
+                       {f,22},
+                       {integer,1000},
+                       {f,23}]}}.
+  {label,22}.
+    {move,{atom,ok},{x,0}}.
+    return.
+  {label,23}.
+    {move,{atom,error},{x,0}}.
+    return.
+
+{function, za1, 0, 25}.
+  {label,24}.
+    {func_info,{atom,literal_case_expression},{atom,za1},0}.
+  {label,25}.
+    {test,is_atom,{f,28},[{atom,x}]}.
+    {select_val,{atom,x},
+                {f,27},
+                {list,[{atom,a},
+                       {f,27},
+                       {atom,x},
+                       {f,26}]}}.
+  {label,26}.
+    {move,{atom,ok},{x,0}}.
+    return.
+  {label,27}.
+    {move,{atom,error},{x,0}}.
+    return.
+  {label,28}.
+    {case_end,{atom,x}}.
+
+{function, za2, 0, 30}.
+  {label,29}.
+    {func_info,{atom,literal_case_expression},{atom,za2},0}.
+  {label,30}.
+    {test,is_atom,{f,32},[{atom,x}]}.
+    {select_val,{atom,x},
+                {f,32},
+                {list,[{atom,a},
+                       {f,32},
+                       {atom,x},
+                       {f,31}]}}.
+  {label,31}.
+    {move,{atom,ok},{x,0}}.
+    return.
+  {label,32}.
+    {move,{atom,error},{x,0}}.
+    return.
 
 {function, module_info, 0, 12}.
   {label,11}.
diff --git a/erts/emulator/test/erts_debug_SUITE.erl b/erts/emulator/test/erts_debug_SUITE.erl
index e60a999df1..934a1b10a4 100644
--- a/erts/emulator/test/erts_debug_SUITE.erl
+++ b/erts/emulator/test/erts_debug_SUITE.erl
@@ -21,10 +21,10 @@
 -include("test_server.hrl").
 
 -export([all/1,init_per_testcase/2,fin_per_testcase/2,
-	 flat_size/1,flat_size_big/1,df/1]).
+	 flat_size/1,flat_size_big/1,df/1,instructions/1]).
 
 all(suite) ->
-    [flat_size,flat_size_big,df].
+    [flat_size,flat_size_big,df,instructions].
 
 init_per_testcase(Func, Config) when is_atom(Func), is_list(Config) ->
     Dog=?t:timetrap(?t:minutes(2)),
@@ -70,3 +70,8 @@ df(Config) when is_list(Config) ->
 
 pps() ->
     {erlang:ports()}.
+
+instructions(Config) when is_list(Config) ->
+    ?line Is = erts_debug:instructions(),
+    ?line _ = [list_to_atom(I) || I <- Is],
+    ok.
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index de19a2e35b..e7c57142c0 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -27,6 +27,7 @@ my $outdir = ".";		# Directory for output files.
 my $verbose = 0;
 my $hot = 1;
 my $num_file_opcodes = 0;
+my $wordsize = 32;
 
 # This is shift counts and mask for the packer.
 my $WHOLE_WORD = '';
@@ -36,12 +37,20 @@ my @pack_mask;
 
 $pack_instr[2] = ['6', 'i'];
 $pack_instr[3] = ['0', '0', 'i'];
+$pack_instr[4] = ['6', '6', '6', 'i']; # Only for 64 bit wordsize
 
 $pack_shift[2] = ['0', 'BEAM_LOOSE_SHIFT'];
 $pack_shift[3] = ['0', 'BEAM_TIGHT_SHIFT', '(2*BEAM_TIGHT_SHIFT)'];
+$pack_shift[4] = ['0', 'BEAM_LOOSE_SHIFT', # Only for 64 bit wordsize
+		  '(2*BEAM_LOOSE_SHIFT)',
+		  '(3*BEAM_LOOSE_SHIFT)'];
 
 $pack_mask[2]  = ['BEAM_LOOSE_MASK', $WHOLE_WORD];
 $pack_mask[3]  = ['BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK', 'BEAM_TIGHT_MASK'];
+$pack_mask[4]  = ['BEAM_LOOSE_MASK', # Only for 64 bit wordsize
+		  'BEAM_LOOSE_MASK',
+		  'BEAM_LOOSE_MASK',
+		  $WHOLE_WORD];
 
 # There are two types of instructions: generic and specific.
 # The generic instructions are those generated by the Beam compiler.
@@ -80,6 +89,8 @@ my %cold_code;
 my @unnumbered_generic;
 my %unnumbered;
 
+my %is_transformed;
+
 #
 # Code transformations.
 #
@@ -118,7 +129,8 @@ my %arg_size = ('r' => 0,	# x(0) - x register zero
 		't' => 1,	# untagged integer -- can be packed
 		'b' => 1,	# pointer to bif
 		'A' => 1,	# arity value
-		'P' => 1,	# byte offset into tuple
+		'P' => 1,	# byte offset into tuple or stack
+		'Q' => 1,	# like 'P', but packable
 		'h' => 1,	# character
 		'l' => 1,	# float reg
 		'q' => 1,	# literal term
@@ -157,6 +169,7 @@ my @tag_type;
     $type_bit{'U'} = $type_bit{'u'};
     $type_bit{'e'} = $type_bit{'u'};
     $type_bit{'P'} = $type_bit{'u'};
+    $type_bit{'Q'} = $type_bit{'u'};
 }
 
 #
@@ -169,6 +182,7 @@ while (@ARGV && $ARGV[0] =~ /^-(.*)/) {
     ($target = \&emulator_output), next if /^emulator/;
     ($target = \&compiler_output), next if /^compiler/;
     ($outdir = shift), next if /^outdir/;
+    ($wordsize = shift), next if /^wordsize/;
     ($verbose = 1), next if /^v/;
     die "$0: Bad option: -$_\n";
 }
@@ -474,8 +488,9 @@ sub emulator_output {
 		$gen_transform_offset{$key} : -1;
 	    my($spec_op) = $gen_to_spec{$key};
 	    my($num_specific) = $num_specific{$key};
-	    defined $spec_op or $tr != -1 or
+	    defined $spec_op or
 		$obsolete[$gen_opnum{$name,$arity}] or
+		$is_transformed{$name,$arity} or
 		error("instruction $key has no specific instruction");
 	    $spec_op = -1 unless defined $spec_op;
 	    &init_item($name, $arity, $spec_op, $num_specific, $tr, $min_window{$key});
@@ -498,12 +513,14 @@ sub emulator_output {
     print "#define NUM_SPECIFIC_OPS ", scalar(@op_to_name), "\n";
     print "\n";
     print "#ifdef ARCH_64\n";
+    print "#  define BEAM_WIDE_MASK 0xFFFFUL\n";
     print "#  define BEAM_LOOSE_MASK 0x1FFFUL\n";
     print "#if HALFWORD_HEAP\n";
     print "#  define BEAM_TIGHT_MASK 0x1FFCUL\n";
     print "#else\n";
     print "#  define BEAM_TIGHT_MASK 0x1FF8UL\n";
     print "#endif\n";
+    print "#  define BEAM_WIDE_SHIFT 32\n";
     print "#  define BEAM_LOOSE_SHIFT 16\n";
     print "#  define BEAM_TIGHT_SHIFT 16\n";
     print "#else\n";
@@ -796,6 +813,7 @@ sub basic_generator {
 		     'I' => 1,
 		     't' => 1,
 		     'P' => 1,
+		     'Q' => 1,
 		     );
 
     # Pick up the macro to use and its flags (if any).
@@ -916,7 +934,18 @@ sub basic_generator {
 	$var_decls .= "BeamInstr tmp_packed2;"
 	    if $macro_code =~ /tmp_packed2/;
 	if ($flags =~ /-nonext/) {
-	    $code = "$macro_code\n";
+	    $code = join("\n",
+			 "{ $var_decls",
+			 $macro_code,
+			 "}");
+	} elsif ($flags =~ /-goto:(\S*)/) {
+	    my $goto = $1;
+	    $code = join("\n",
+			 "{ $var_decls",
+			 $macro_code,
+			 "I += $size + 1;",
+			 "goto $goto;",
+			 "}");
 	} else {
 	    $code = join("\n",
 			 "{ $var_decls",
@@ -935,18 +964,31 @@ sub basic_generator {
 
 sub do_pack {
     my(@args) = @_;
-    my($i);
     my($packable_args) = 0;
+    my @is_packable;		# Packability (boolean) for each argument.
+    my $wide_packing = 0;
 
     #
     # Count the number of packable arguments.  If we encounter any 's' or 'd'
     # arguments, packing is not possible.
     #
-    for ($i = 0; $i < @args; $i++) {
-	if ($args[$i] =~ /[xyt]/) {
+    my $packable_types = "xytQ";
+    foreach my $arg (@args) {
+	if ($arg =~ /^[$packable_types]/) {
 	    $packable_args++;
-	} elsif ($args[$i] =~ /[sd]/) {
+	    push @is_packable, 1;
+	} elsif ($arg =~ /^I/ and $wordsize == 64 and $packable_args < 2) {
+	    $wide_packing = 1;
+	    push @is_packable, 1;
+	    if (++$packable_args == 2) {
+		# We can only pack two arguments. Turn off packing
+		# for the rest of the arguments.
+		$packable_types = "\xFF";
+	    }
+	} elsif ($arg =~ /^[sd]/) {
 	    return ('', '', @args);
+	} else {
+	    push @is_packable, 0;
 	}
     }
 
@@ -962,10 +1004,27 @@ sub do_pack {
 				# beginning).
     my($up) = '';		# Pack commands (storing back while
 				# moving forward).
-    my($args_per_word) = $packable_args < 4 ? $packable_args : 2;
-    my(@shift) = @{$pack_shift[$args_per_word]};
-    my(@mask) = @{$pack_mask[$args_per_word]};
-    my(@pack_instr) = @{$pack_instr[$args_per_word]};
+    my $args_per_word;
+    if ($packable_args < 4 or $wordsize == 64) {
+	$args_per_word = $packable_args;
+    } else {
+	# 4 packable argument, 32 bit wordsize. Need 2 words.
+	$args_per_word = 2;
+    }
+
+    my @shift;
+    my @mask;
+    my @instr;
+
+    if ($wide_packing) {
+	@shift = ('0', 'BEAM_WIDE_SHIFT');
+	@mask = ('BEAM_WIDE_MASK', $WHOLE_WORD);
+	@instr = ('w', 'i');
+    } else {
+	@shift = @{$pack_shift[$args_per_word]};
+	@mask = @{$pack_mask[$args_per_word]};
+	@instr = @{$pack_instr[$args_per_word]};
+    }
 
     #
     # Now generate the packing instructions.  One complication is that
@@ -979,10 +1038,10 @@ sub do_pack {
     my($ap) = 0;		# Argument number within word.
     my($tmpnum) = 1;		# Number of temporary variable.
     my($expr) = '';
-    for ($i = 0; $i < @args; $i++) {
+    for (my $i = 0; $i < @args; $i++) {
 	my($reg) = $args[$i];
 	my($this_size) = $arg_size{$reg};
-	if ($reg =~ /[xyt]/) {
+	if ($is_packable[$i]) {
 	    $this_size = 0;
 	    $did_some_packing = 1;
 
@@ -993,7 +1052,7 @@ sub do_pack {
 		$this_size = 1;
 	    }
 
-	    $down = "$pack_instr[$ap]$down";
+	    $down = "$instr[$ap]$down";
 	    my($unpack) = &make_unpack($tmpnum, $shift[$ap], $mask[$ap]);
 	    $args[$i] = "pack:$this_size:$reg" . "b($unpack)";
 
@@ -1103,6 +1162,10 @@ sub compile_transform {
     if ($obsolete[$gen_opnum{$name,$arity}]) {
 	error("obsolete function must not be used in transformations");
     }
+
+    if ($src) {
+	$is_transformed{$name,$arity} = 1;
+    }
     
     [$name,$arity,@ops];
 }
@@ -1291,13 +1354,28 @@ sub tr_gen_from {
 	    my($var, $type, $type_val, $cond, $val) = @$op;
 
 	    if ($type ne '' && $type ne '*') {
-		my($types) = '';
-		my($type_mask) = 0;
-		foreach (split('', $type)) {
-		    $types .= "$_ ";
-		    $type_mask |= $type_bit{$_};
+		#
+		# The is_bif, is_not_bif, and is_func instructions have
+		# their own built-in type test and don't need to
+		# be guarded with a type test instruction.
+		#
+		unless ($cond eq 'is_bif' or
+			$cond eq 'is_not_bif' or
+			$cond eq 'is_func') {
+		    my($types) = '';
+		    my($type_mask) = 0;
+		    foreach (split('', $type)) {
+			$types .= "$_ ";
+			$type_mask |= $type_bit{$_};
+		    }
+		    if ($cond ne 'is_eq') {
+			push(@code, &make_op($types, 'is_type', $type_mask));
+		    } else {
+			$cond = '';
+			push(@code, &make_op($types, 'is_type_eq',
+					     $type_mask, $val));
+		    }
 		}
-		push(@code, &make_op($types, 'is_type', $type_mask));
 	    }
 
 	    if ($cond eq 'is_func') {
diff --git a/erts/emulator/utils/count b/erts/emulator/utils/count
new file mode 100755
index 0000000000..617f5c25e8
--- /dev/null
+++ b/erts/emulator/utils/count
@@ -0,0 +1,127 @@
+%% -*- erlang -*-
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 1998-2010. All Rights Reserved.
+%%
+%% The contents of this file are subject to the Erlang Public License,
+%% Version 1.1, (the "License"); you may not use this file except in
+%% compliance with the License. You should have received a copy of the
+%% Erlang Public License along with this software. If not, it can be
+%% retrieved online at http://www.erlang.org/.
+%%
+%% Software distributed under the License is distributed on an "AS IS"
+%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
+%% the License for the specific language governing rights and limitations
+%% under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+-mode(compile).
+
+main(_) ->
+    DisDir = "./dis",
+    ok = filelib:ensure_dir(filename:join(DisDir, "dummy")),
+    io:format("Dissambling to ~s\n", [DisDir]),
+    ok = file:set_cwd(DisDir),
+    Path = code:get_path() -- ["."],
+    Beams0 = [filelib:wildcard(filename:join(Dir, "*.beam")) ||
+		 Dir <- Path],
+    Beams = lists:append(Beams0),
+    Mods0 = [list_to_atom(filename:rootname(filename:basename(F))) ||
+	       F <- Beams],
+    Mods = lists:usort(Mods0),
+    start_sem(),
+    Ps = [begin
+	      {_,Ref} = spawn_monitor(fun() -> count(M) end),
+	      Ref
+	  end || M <- Mods],
+    [put(list_to_atom(I), 0) || I <- erts_debug:instructions()],
+    Res = wait_for_all(Ps, 1),
+    OutFile = "count",
+    {ok,Out} = file:open(OutFile, [write]),
+    [io:format(Out, "~s ~p\n", [I,C]) || {I,C} <- Res],
+    ok = file:close(Out),
+    io:format("\nResult written to ~s\n",
+	      [filename:join(DisDir, OutFile)]),
+    ok.
+
+wait_for_all([], _) ->
+    lists:reverse(lists:keysort(2, get()));
+wait_for_all([_|_]=Ps, I) ->
+    receive
+	{'DOWN',Ref,process,_,Result} ->
+	    io:format("\r~p", [I]),
+	    [increment(Key, Count) || {Key,Count} <- Result],
+	    wait_for_all(Ps -- [Ref], I+1)
+    end.
+
+count(M) ->
+    down(),
+    erts_debug:df(M),
+    {ok,Fd} = file:open(atom_to_list(M) ++ ".dis", [read,raw]),
+    count_is(Fd),
+    ok = file:close(Fd),
+    exit(get()).
+
+count_is(Fd) ->
+    case file:read_line(Fd) of
+	{ok,Line} ->
+	    count_instr(Line),
+	    count_is(Fd);
+	eof ->
+	    ok
+    end.
+
+count_instr([$\s|T]) ->
+    count_instr_1(T, []);
+count_instr([_|T]) ->
+    count_instr(T);
+count_instr([]) ->
+    %% Empty line.
+    ok.
+
+count_instr_1([$\s|_], Acc) ->
+    Instr = list_to_atom(lists:reverse(Acc)),
+    increment(Instr, 1);
+count_instr_1([H|T], Acc) ->
+    count_instr_1(T, [H|Acc]).
+
+increment(Key, Inc) -> 
+    case get(Key) of
+	undefined ->
+	    put(Key, Inc);
+	Count ->
+	    put(Key, Count+Inc)
+    end.
+
+%%%
+%%% Counting sempahore to limit the number of processes that
+%%% can run concurrently.
+%%%
+
+down() ->
+    sem ! {down,self()},
+    receive
+	sem_taken -> ok
+    end.
+
+start_sem() ->	    
+    spawn(fun() ->
+		  register(sem, self()),
+		  process_flag(trap_exit, true),
+		  do_sem(erlang:system_info(schedulers)+1) end).
+
+do_sem(0) ->
+    receive
+	{'EXIT',_,_} ->
+	    do_sem(1)
+    end;
+do_sem(C) ->
+    receive
+	{down,Pid} ->
+	    link(Pid),
+	    Pid ! sem_taken,
+	    do_sem(C-1)
+    end.
author	Björn Gustavsson <bjorn@erlang.org>	2011-01-17 15:29:48 +0100
committer	Björn Gustavsson <bjorn@erlang.org>	2011-01-17 15:29:48 +0100
commit	faef041a446314bb228e0e8c88a09241df2798f1 (patch)
tree	728f73a167f50a46b54617a6cc054807064236e9 /erts
parent	7bc25db7f9a70190a661b2a97734900893d33169 (diff)
parent	3f1fce3929cc0cc68d7e5b1ce543bd3f20a31e2b (diff)
download	otp-faef041a446314bb228e0e8c88a09241df2798f1.tar.gz otp-faef041a446314bb228e0e8c88a09241df2798f1.tar.bz2 otp-faef041a446314bb228e0e8c88a09241df2798f1.zip