From 4b507534f27c343fe2b53f07bbe52e94c81e381f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 7 Dec 2015 17:10:37 +0100
Subject: ops.tab: Remove useless transformation

The transformation on the following line will do the job.
---
 erts/emulator/beam/ops.tab | 1 -
 1 file changed, 1 deletion(-)

diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 772460c177..78000160e3 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -1539,7 +1539,6 @@ gen_minus p Live Reg=d Int=i Dst | negation_is_small(Int) => \
 # GCing arithmetic instructions.
 #
 
-gen_plus Fail Live Y=y X=x Dst => i_plus Fail Live X Y Dst
 gen_plus Fail Live S1 S2 Dst => i_plus Fail Live S1 S2 Dst
 
 gen_minus Fail Live S1 S2 Dst => i_minus Fail Live S1 S2 Dst
-- 
cgit v1.2.3


From ec26a0c56cb6e28cc5a35ef72116275e5eeef823 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 1 Apr 2016 14:03:51 +0200
Subject: Refactor calls to transform_engine()

We used to set last_op_next and last_op to NULL just in case.
Setting last_op_next to causes a rescan of the instructions
to find the last instruction in the chain, so we would want
to avoid that unless really necessary.
---
 erts/emulator/beam/beam_load.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 16cbdbffea..2f2b433999 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2028,11 +2028,7 @@ load_code(LoaderState* stp)
 	ASSERT(arity == last_op->arity);
 
     do_transform:
-	if (stp->genop == NULL) {
-	    last_op_next = NULL;
-	    goto get_next_instr;
-	}
-
+	ASSERT(stp->genop != NULL);
 	if (gen_opc[stp->genop->op].transform != -1) {
 	    int need;
 	    tmp_op = stp->genop;
@@ -2045,25 +2041,34 @@ load_code(LoaderState* stp)
 	    }
 	    switch (transform_engine(stp)) {
 	    case TE_FAIL:
-		last_op_next = NULL;
-		last_op = NULL;
+		/*
+		 * No transformation found. stp->genop != NULL and
+		 * last_op_next is still valid. Go ahead and load
+		 * the instruction.
+		 */
 		break;
 	    case TE_OK:
+		/*
+		 * Some transformation was applied. last_op_next is
+		 * no longer valid and stp->genop may be NULL.
+		 * Try to transform again.
+		 */
+		if (stp->genop == NULL) {
+		    last_op_next = &stp->genop;
+		    goto get_next_instr;
+		}
 		last_op_next = NULL;
-		last_op = NULL;
 		goto do_transform;
 	    case TE_SHORT_WINDOW:
-		last_op_next = NULL;
-		last_op = NULL;
+		/*
+		 * No transformation applied. stp->genop != NULL and
+		 * last_op_next is still valid. Fetch a new instruction
+		 * before trying the transformation again.
+		 */
 		goto get_next_instr;
 	    }
 	}
 
-	if (stp->genop == NULL) {
-	    last_op_next = NULL;
-	    goto get_next_instr;
-	}
-
 	/*
 	 * From the collected generic instruction, find the specific
 	 * instruction.
@@ -2584,7 +2589,10 @@ load_code(LoaderState* stp)
 	{
 	    GenOp* next = stp->genop->next;
 	    FREE_GENOP(stp, stp->genop);
-	    stp->genop = next;
+	    if ((stp->genop = next) == NULL) {
+		last_op_next = &stp->genop;
+		goto get_next_instr;
+	    }
 	    goto do_transform;
 	}
     }
-- 
cgit v1.2.3


From 6d51b25958393d95dee32baafd708aa3909ddb5a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Fri, 1 Apr 2016 16:07:37 +0200
Subject: Eliminate allocation of variables in transform_engine()

When an instruction with a variable number operands (such as
select_val) is seen of the left side of a transformation, the
'next_arg' instruction will allocate a buffer to fit all variables and
all operands will be copied into the buffer. Very often, the 'commit'
instruction will never be reached because of a test or predicate
failing or because of a short window; in that case, the variable
buffer will be deallocated.

Note that originally there were only few instructions with a variable
number of operands, but now common operations such as tuple building
also have a variable number of operands.

To avoid those frequent allocations and deallocations, modify the
'next_arg' instruction to only save a pointer to the first of the
"rest" arguments. Also move the deallocation of the instructions
on the left side from the 'commit' instruction to the 'end'
instruction to ensure that 'store_rest_args' will still work.
---
 erts/emulator/beam/beam_load.c   | 94 ++++++++++++----------------------------
 erts/emulator/utils/beam_makeops | 65 ++++++++++-----------------
 2 files changed, 49 insertions(+), 110 deletions(-)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 2f2b433999..c6c35e74c9 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -4813,31 +4813,25 @@ transform_engine(LoaderState* st)
     Uint op;
     int ap;			/* Current argument. */
     Uint* restart;		/* Where to restart if current match fails. */
-    GenOpArg def_vars[TE_MAX_VARS]; /* Default buffer for variables. */
-    GenOpArg* var = def_vars;
-    int num_vars = 0;
+    GenOpArg var[TE_MAX_VARS];	/* Buffer for variables. */
+    GenOpArg* rest_args = NULL;
+    int num_rest_args = 0;
     int i;			/* General index. */
     Uint mask;
     GenOp* instr;
+    GenOp* first = st->genop;
+    GenOp* keep = NULL;
     Uint* pc;
-    int rval;
     static Uint restart_fail[1] = {TOP_fail};
 
-    ASSERT(gen_opc[st->genop->op].transform != -1);
-    pc = op_transform + gen_opc[st->genop->op].transform;
-    restart = pc;
+    ASSERT(gen_opc[first->op].transform != -1);
+    restart = op_transform + gen_opc[first->op].transform;
 
  restart:
-    if (var != def_vars) {
-	erts_free(ERTS_ALC_T_LOADER_TMP, (void *) var);
-	var = def_vars;
-    }
     ASSERT(restart != NULL);
     pc = restart;
     ASSERT(*pc < NUM_TOPS);	/* Valid instruction? */
-    instr = st->genop;
-
-#define RETURN(r) rval = (r); goto do_return;
+    instr = first;
 
 #ifdef DEBUG
     restart = NULL;
@@ -4855,7 +4849,7 @@ transform_engine(LoaderState* st)
 		 * We'll need at least one more instruction to decide whether
 		 * this combination matches or not.
 		 */
-		RETURN(TE_SHORT_WINDOW);
+		return TE_SHORT_WINDOW;
 	    }
 	    if (*pc++ != instr->op)
 		goto restart;
@@ -5017,19 +5011,9 @@ transform_engine(LoaderState* st)
 #if defined(TOP_rest_args)
 	case TOP_rest_args:
 	    {
-		int n = *pc++;
 		int formal_arity = gen_opc[instr->op].arity;
-		int j = formal_arity;
-
-		num_vars = n + (instr->arity - formal_arity);
-		var = erts_alloc(ERTS_ALC_T_LOADER_TMP,
-				 num_vars * sizeof(GenOpArg));
-		for (i = 0; i < n; i++) {
-		    var[i] = def_vars[i];
-		}
-		while (i < num_vars) {
-		    var[i++] = instr->a[j++];
-		}
+		num_rest_args = instr->arity - formal_arity;
+		rest_args = instr->a + formal_arity;
 	    }
 	    break;
 #endif
@@ -5038,16 +5022,8 @@ transform_engine(LoaderState* st)
 	    break;
 	case TOP_commit:
 	    instr = instr->next; /* The next_instr was optimized away. */
-
-	    /*
-	     * The left-hand side of this transformation matched.
-	     * Delete all matched instructions.
-	     */
-	    while (st->genop != instr) {
-		GenOp* next = st->genop->next;
-		FREE_GENOP(st, st->genop);
-		st->genop = next;
-	    }
+	    keep = instr;
+	    st->genop = instr;
 #ifdef DEBUG
 	    instr = 0;
 #endif
@@ -5077,22 +5053,19 @@ transform_engine(LoaderState* st)
 		    lastp = &((*lastp)->next);
 		}
 		 
-		instr = instr->next; /* The next_instr was optimized away. */
-
-		/*
-		 * The left-hand side of this transformation matched.
-		 * Delete all matched instructions.
-		 */
-		while (st->genop != instr) {
-		    GenOp* next = st->genop->next;
-		    FREE_GENOP(st, st->genop);
-		    st->genop = next;
-		}
-		*lastp = st->genop;
+		keep = instr->next; /* The next_instr was optimized away. */
+		*lastp = keep;
 		st->genop = new_instr;
 	    }
-	    RETURN(TE_OK);
+	    /* FALLTHROUGH */
 #endif
+	case TOP_end:
+	    while (first != keep) {
+		GenOp* next = first->next;
+		FREE_GENOP(st, first);
+		first = next;
+	    }
+	    return TE_OK;
 	case TOP_new_instr:
 	    /*
 	     * Note that the instructions are generated in reverse order.
@@ -5123,14 +5096,10 @@ transform_engine(LoaderState* st)
 #if defined(TOP_store_rest_args)
 	case TOP_store_rest_args:
 	    {
-		int n = *pc++;
-		int num_extra = num_vars - n;
-
-		ASSERT(n <= num_vars);
-		GENOP_ARITY(instr, instr->arity+num_extra);
+		GENOP_ARITY(instr, instr->arity+num_rest_args);
 		memcpy(instr->a, instr->def_args, ap*sizeof(GenOpArg));
-		memcpy(instr->a+ap, var+n, num_extra*sizeof(GenOpArg));
-		ap += num_extra;
+		memcpy(instr->a+ap, rest_args, num_rest_args*sizeof(GenOpArg));
+		ap += num_rest_args;
 	    }
 	    break;
 #endif
@@ -5142,21 +5111,12 @@ transform_engine(LoaderState* st)
 	case TOP_try_me_else_fail:
 	    restart = restart_fail;
 	    break;
-	case TOP_end:
-	    RETURN(TE_OK);
 	case TOP_fail:
-	    RETURN(TE_FAIL);
+	    return TE_FAIL;
 	default:
 	    ASSERT(0);
 	}
     }
-#undef RETURN
-
- do_return:
-    if (var != def_vars) {
-	erts_free(ERTS_ALC_T_LOADER_TMP, (void *) var);
-    }
-    return rval;
 }
 
 static void
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index f805e7cc64..86bfb5d746 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -1504,8 +1504,6 @@ sub tr_gen_from {
     my($var_num) = 0;
     my(@code);
     my($min_window) = 0;
-    my(@fix_rest_args);
-    my(@fix_pred_funcs);
     my($op, $ref);		# Loop variables.
     my $where = "left side of transformation in line $line: ";
     my %var_used = %$used_ref;
@@ -1530,8 +1528,17 @@ sub tr_gen_from {
 	    my $var;
 	    my(@args);
 
-	    push(@fix_pred_funcs, scalar(@code));
-	    push(@code, [$name, @ops]);
+	    foreach $var (@ops) {
+		error($where, "variable '$var' unbound")
+		    unless defined $var{$var};
+		if ($var_type{$var} eq 'scalar') {
+		    push(@args, "var[$var{$var}]");
+		} else {
+		    push(@args, "rest_args");
+		}
+	    }
+	    my $pi = tr_next_index(\@pred_table, \%pred_table, $name, @args);
+	    push(@code, make_op("$name()", 'pred', $pi));
 	    next;
 	}
 
@@ -1595,12 +1602,16 @@ sub tr_gen_from {
 		    $may_fail = 1;
 		    push(@code, &make_op($var, 'is_same_var', $var{$var}));
 		} elsif ($type eq '*') {
-		    #
-		    # Reserve a hole for a 'rest_args' instruction.
-		    #
+		    foreach my $type (values %var_type) {
+			error("only one use of a '*' variable is " .
+			      "allowed on the left hand side of " .
+			      "a transformation")
+			    if $type eq 'array';
+		    }
 		    $ignored_var = '';
-		    push(@fix_rest_args, scalar(@code));
-		    push(@code, $var);
+		    $var{$var} = 'unnumbered';
+		    $var_type{$var} = 'array';
+		    push(@code, make_op($var, 'rest_args'));
 		} elsif ($var_used{$var}) {
 		    $ignored_var = '';
 		    $var_type{$var} = 'scalar';
@@ -1629,38 +1640,6 @@ sub tr_gen_from {
     #
     push(@code, make_op($may_fail ? '' : 'always reached', 'commit'));
 
-    #
-    # If there is an rest_args instruction, we must insert its correct
-    # variable number (higher than any other).
-    #
-    my $index;
-    &error("only one use of a '*' variable is allowed on the left hand side of a transformation")
-	if @fix_rest_args > 1;
-    foreach $index (@fix_rest_args) {
-	my $var = $code[$index];
-	$var{$var} = $var_num++;
-	$var_type{$var} = 'array';
-	splice(@code, $index, 1, &make_op($var, 'rest_args', $var{$var}));
-    }
-
-    foreach $index (@fix_pred_funcs) {
-	my($name, @ops) = @{$code[$index]};
-	my(@args);
-	my $var;
-
-	foreach $var (@ops) {
-	    &error($where, "variable '$var' unbound")
-		unless defined $var{$var};
-	    if ($var_type{$var} eq 'scalar') {
-		push(@args, "var[$var{$var}]");
-	    } else {
-		push(@args, "var+$var{$var}");
-	    }
-	}
-	my $pi = tr_next_index(\@pred_table, \%pred_table, $name, @args);
-	splice(@code, $index, 1, make_op("$name()", 'pred', $pi));
-    }
-
     $te_max_vars = $var_num
 	if $te_max_vars < $var_num;
     [$min_window, \%var, \%var_type, \@code];
@@ -1697,7 +1676,7 @@ sub tr_gen_to {
 		if ($var_type{$var} eq 'scalar') {
 		    push(@args, "var[$var{$var}]");
 		} else {
-		    push(@args, "var+$var{$var}");
+		    push(@args, "rest_args");
 		}
 	    }
 	    pop(@code);	# Get rid of 'commit' instruction
@@ -1725,7 +1704,7 @@ sub tr_gen_to {
 	    my($var, $type, $type_val) = @$op;
 
 	    if ($type eq '*') {
-		push(@code, make_op($var, 'store_rest_args', $var{$var}));
+		push(@code, make_op($var, 'store_rest_args'));
 	    } elsif ($var ne '') {
 		&error($where, "variable '$var' unbound")
 		    unless defined $var{$var};
-- 
cgit v1.2.3


From c6cabe0b76dda183d209498e1e4e13e3407dcf9b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 7 Dec 2015 14:51:44 +0100
Subject: Simplify window management for the transformation engine

Generic instructions have a min_window field. Its purpose is to
avoid calling transform_engine() when there are too few instructions
in the current "transformation window" for a transformation to
succeed.

Currently it does not do much good since the window size will be
decremented by one before being used. The reason for the subtraction
is probably that in some circumstances in the past, the loader could
read past the end of the BEAM module while attempting to fetch
instructions to increase the window size. Therefore, it would not
be safe to just remove the subtraction by one.

The simplest and safest solution seems to always ensure that there
are always at least TWO instructions when calling transform_engine().
That will be safe, as long as a BEAM module is always finished with
an int_code_end/0 that is not involved in any transformation.
---
 erts/emulator/beam/beam_load.c   | 16 ++++++++--------
 erts/emulator/beam/beam_load.h   |  1 -
 erts/emulator/utils/beam_makeops | 13 +++----------
 3 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index c6c35e74c9..5d03c98657 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2030,14 +2030,14 @@ load_code(LoaderState* stp)
     do_transform:
 	ASSERT(stp->genop != NULL);
 	if (gen_opc[stp->genop->op].transform != -1) {
-	    int need;
-	    tmp_op = stp->genop;
-
-	    for (need = gen_opc[stp->genop->op].min_window-1; need > 0; need--) {
-		if (tmp_op == NULL) {
-		    goto get_next_instr;
-		}
-		tmp_op = tmp_op->next;
+	    if (stp->genop->next == NULL) {
+		/*
+		 * Simple heuristic: Most transformations requires
+		 * at least two instructions, so make sure that
+		 * there are. That will reduce the number of
+		 * TE_SHORT_WINDOWs.
+		 */
+		goto get_next_instr;
 	    }
 	    switch (transform_engine(stp)) {
 	    case TE_FAIL:
diff --git a/erts/emulator/beam/beam_load.h b/erts/emulator/beam/beam_load.h
index 22ab71c868..68f4b96893 100644
--- a/erts/emulator/beam/beam_load.h
+++ b/erts/emulator/beam/beam_load.h
@@ -33,7 +33,6 @@ typedef struct gen_op_entry {
    int specific;
    int num_specific;
    int transform;
-   int min_window;
 } GenOpEntry;
 
 extern GenOpEntry gen_opc[];
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index 86bfb5d746..8f99fdb201 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -113,7 +113,6 @@ my @if_line;
 #
 my $te_max_vars = 0;		# Max number of variables ever needed.
 my %gen_transform;
-my %min_window;
 my %match_engine_ops;		# All opcodes for the match engine.
 my %gen_transform_offset;
 my @transformations;
@@ -382,7 +381,6 @@ while (<>) {
 	    $gen_arity{$name} = $arity;
 	    $gen_to_spec{"$name/$arity"} = undef;
 	    $num_specific{"$name/$arity"} = 0;
-	    $min_window{"$name/$arity"} = 255;
 	    $obsolete[$op_num] = defined $obsolete;
 	} else {		# Unnumbered generic operation.
 	    push(@unnumbered_generic, [$name, $arity]);
@@ -440,7 +438,6 @@ $num_file_opcodes = @gen_opname;
 	$gen_arity{$name} = $arity;
 	$gen_to_spec{"$name/$arity"} = undef;
 	$num_specific{"$name/$arity"} = 0;
-	$min_window{"$name/$arity"} = 255;
     }
 }
 
@@ -607,7 +604,7 @@ sub emulator_output {
 		$is_transformed{$name,$arity} or
 		error("instruction $key has no specific instruction");
 	    $spec_op = -1 unless defined $spec_op;
-	    &init_item($name, $arity, $spec_op, $num_specific, $tr, $min_window{$key});
+	    &init_item($name, $arity, $spec_op, $num_specific, $tr);
 	}
     }
     print "};\n";
@@ -1503,7 +1500,6 @@ sub tr_gen_from {
     my(%var_type);
     my($var_num) = 0;
     my(@code);
-    my($min_window) = 0;
     my($op, $ref);		# Loop variables.
     my $where = "left side of transformation in line $line: ";
     my %var_used = %$used_ref;
@@ -1551,7 +1547,6 @@ sub tr_gen_from {
 	$opnum = $gen_opnum{$name,$arity};
 
 	push(@code, make_op("$name/$arity", 'next_instr', $opnum));
-	$min_window++;
 	foreach $op (@ops) {
 	    my($var, $type, $type_val, $cond, $val) = @$op;
 	    my $ignored_var = "$var (ignored)";
@@ -1642,12 +1637,12 @@ sub tr_gen_from {
 
     $te_max_vars = $var_num
 	if $te_max_vars < $var_num;
-    [$min_window, \%var, \%var_type, \@code];
+    [\%var, \%var_type, \@code];
 }
 
 sub tr_gen_to {
     my($line, $orig_transform, $so_far, @tr) = @_;
-    my($min_window, $var_ref, $var_type_ref, $code_ref) = @$so_far;
+    my($var_ref, $var_type_ref, $code_ref) = @$so_far;
     my(%var) = %$var_ref;
     my(%var_type) = %$var_type_ref;
     my(@code) = @$code_ref;
@@ -1731,8 +1726,6 @@ sub tr_gen_to {
     my($dummy, $arity);
     ($dummy, $op, $arity) = @$first;
     my($comment) = "\n/*\n * Line $line:\n *   $orig_transform\n */\n\n";
-    $min_window{$key} = $min_window
-	if $min_window{$key} > $min_window;
 
     my $prev_last;
     $prev_last = pop(@{$gen_transform{$key}})
-- 
cgit v1.2.3


From e93a66110aa27a5b8228fb46a3459a6de0e626d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 7 Dec 2015 16:12:21 +0100
Subject: Introduce a 'rename' instruction

Introduce a 'rename' instruction that can be used to optimize
simple renaming with unchanged operands such as:

  get_tuple_element Reg P Dst => i_get_tuple_element Reg P Dst

By allowing it to lower the arity of instruction, transformations
such as the following can be handled:

  trim N Remaining => i_trim N

All in all, currently 67 transformations can be optimized in this
way, including some commonly used ones.
---
 erts/emulator/beam/beam_load.c   |  6 +++++
 erts/emulator/utils/beam_makeops | 55 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 5d03c98657..ad174664ae 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -5077,6 +5077,12 @@ transform_engine(LoaderState* st)
 	    instr->arity = gen_opc[op].arity;
 	    ap = 0;
 	    break;
+#ifdef TOP_rename
+	case TOP_rename:
+	    instr->op = op = *pc++;
+	    instr->arity = gen_opc[op].arity;
+	    return TE_OK;
+#endif
 	case TOP_store_type:
 	    i = *pc++;
 	    instr->a[ap].type = i;
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index 8f99fdb201..3d4213d55d 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -1718,6 +1718,8 @@ sub tr_gen_to {
     push(@code, make_op('', 'end'))
 	unless is_instr($code[$#code], 'call_end');
 
+    tr_maybe_rename(\@code);
+
     #
     # Chain together all codes segments having the same first operation.
     #
@@ -1743,6 +1745,59 @@ sub tr_gen_to {
     push(@{$gen_transform{$key}}, @code),
 }
 
+sub tr_maybe_rename {
+    my($ref) = @_;
+    my $s = 'left';
+    my $a = 0;
+    my $num_args = 0;
+    my $new_instr;
+    my $first;
+    my $i;
+
+    for ($i = 1; $i < @$ref; $i++) {
+	my $instr = $$ref[$i];
+	my($size, $instr_ref, $comment) = @$instr;
+	my($op, @args) = @$instr_ref;
+
+	if ($s eq 'left') {
+	    if ($op eq 'set_var_next_arg') {
+		if ($num_args == $a and $args[0] == $a) {
+		    $num_args++;
+		}
+		$a++;
+	    } elsif ($op eq 'next_arg') {
+		$a++;
+	    } elsif ($op eq 'commit') {
+		$a = 0;
+		$first = $i;
+		$s = 'committed';
+	    } elsif ($op eq 'next_instr') {
+		return;
+	    }
+	} elsif ($s eq 'committed') {
+	    if ($op eq 'new_instr') {
+		$new_instr = $args[0];
+		$a = 0;
+		$s = 'right';
+	    } else {
+		return;
+	    }
+	} elsif ($s eq 'right') {
+	    if ($op eq 'store_var_next_arg' && $args[0] == $a) {
+		$a++;
+	    } elsif ($op eq 'end' && $a <= $num_args) {
+		my $name = $gen_opname[$new_instr];
+		my $arity = $gen_arity[$new_instr];
+		my $new_op = make_op("$name/$arity", 'rename', $new_instr);
+		splice @$ref, $first, $i-$first+1, ($new_op);
+		return;
+	    } else {
+		return;
+	    }
+	}
+    }
+}
+
 sub tr_code_len {
     my($sum) = 0;
     my($ref);
-- 
cgit v1.2.3


From 937f527054f13dd524588c064cd5d76e3cfd23eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Tue, 5 Apr 2016 12:56:57 +0200
Subject: Avoid rebuilding unchanged instructions

In transformations such as:

  move S X0=x==0 | line Loc  | call_ext Ar Func => \
  line Loc       | move S X0 | call_ext Ar Func

we can avoid rebuilding the last instruction in the sequence
by introducing a 'keep' instruction.

Currently, there are only 13 transformations that are hit by
this optimization, but most of them are frequently used.
---
 erts/emulator/beam/beam_load.c   | 11 ++++++++++-
 erts/emulator/utils/beam_makeops | 41 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 1 deletion(-)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index ad174664ae..bdb451a6fe 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -5028,7 +5028,16 @@ transform_engine(LoaderState* st)
 	    instr = 0;
 #endif
 	    break;
-
+#if defined(TOP_keep)
+	case TOP_keep:
+	    /* Keep the current instruction unchanged. */
+	    keep = instr;
+	    st->genop = instr;
+#ifdef DEBUG
+	    instr = 0;
+#endif
+	    break;
+#endif
 #if defined(TOP_call_end)
 	case TOP_call_end:
 	    {
diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index 3d4213d55d..66ffd83ee9 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -1718,6 +1718,7 @@ sub tr_gen_to {
     push(@code, make_op('', 'end'))
 	unless is_instr($code[$#code], 'call_end');
 
+    tr_maybe_keep(\@code);
     tr_maybe_rename(\@code);
 
     #
@@ -1745,6 +1746,46 @@ sub tr_gen_to {
     push(@{$gen_transform{$key}}, @code),
 }
 
+sub tr_maybe_keep {
+    my($ref) = @_;
+    my @last_instr;
+    my $pos;
+    my $reused_instr;
+
+    for (my $i = 0; $i < @$ref; $i++) {
+	my $instr = $$ref[$i];
+	my($size, $instr_ref, $comment) = @$instr;
+	my($op, @args) = @$instr_ref;
+	if ($op eq 'next_instr') {
+	    @last_instr = ($args[0]);
+	} elsif ($op eq 'set_var_next_arg') {
+	    push @last_instr, $args[0];
+	} elsif ($op eq 'next_arg') {
+	    push @last_instr, 'ignored';
+	} elsif ($op eq 'new_instr') {
+	    unless (defined $pos) {
+		# 'new_instr' immediately after 'commit'.
+		$reused_instr = $args[0];
+		return unless shift(@last_instr) == $reused_instr;
+		$pos = $i - 1;
+	    } else {
+		# Second 'new_instr' after 'commit'. The instructions
+		# from $pos up to and including $i - 1 rebuilds the
+		# existing instruction exactly.
+		my $name = $gen_opname[$reused_instr];
+		my $arity = $gen_arity[$reused_instr];
+		my $reuse = make_op("$name/$arity", 'keep');
+		splice @$ref, $pos, $i-$pos, ($reuse);
+		return;
+	    }
+	} elsif ($op eq 'store_var_next_arg') {
+	    return unless shift(@last_instr) eq $args[0];
+	} elsif (defined $pos) {
+	    return;
+	}
+    }
+}
+
 sub tr_maybe_rename {
     my($ref) = @_;
     my $s = 'left';
-- 
cgit v1.2.3


From 72bec464764c919cbfbd2db1c86cce227b2b9c42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Mon, 4 Apr 2016 15:37:46 +0200
Subject: Remove unused variables after code generation

The removal of instructions on the left side of a transformation
is done while generating the code for the left side.

Postpone removal of unused variables to a later, separate passes to
allow more variables to be eliminated after the optimizations
passes introduced in the previous commits.
---
 erts/emulator/utils/beam_makeops | 123 +++++++++++++++++++++++----------------
 1 file changed, 73 insertions(+), 50 deletions(-)

diff --git a/erts/emulator/utils/beam_makeops b/erts/emulator/utils/beam_makeops
index 66ffd83ee9..2e7073a8f0 100755
--- a/erts/emulator/utils/beam_makeops
+++ b/erts/emulator/utils/beam_makeops
@@ -1402,8 +1402,7 @@ sub tr_gen {
 
     foreach $ref (@g) {
 	my($line, $orig_transform, $from_ref, $to_ref) = @$ref;
-	my $used_ref = used_vars($from_ref, $to_ref);
-	my $so_far = tr_gen_from($line, $used_ref, @$from_ref);
+	my $so_far = tr_gen_from($line, @$from_ref);
 	tr_gen_to($line, $orig_transform, $so_far, @$to_ref);
     }
 
@@ -1454,55 +1453,14 @@ sub tr_gen {
     print "};\n\n";
 }
 
-sub used_vars {
-    my($from_ref,$to_ref) = @_;
-    my %used;
-    my %seen;
-
-    foreach my $ref (@$from_ref) {
-	my($name,$arity,@ops) = @$ref;
-	if ($name =~ /^[.]/) {
-	    foreach my $var (@ops) {
-		$used{$var} = 1;
-	    }
-	} else {
-	    # Any variable that is used at least twice on the
-	    # left-hand side is used. (E.g. "move R R".)
-	    foreach my $op (@ops) {
-		my($var, $type, $type_val) = @$op;
-		next if $var eq '';
-		$used{$var} = 1 if $seen{$var};
-		$seen{$var} = 1;
-	    }
-	}
-    }
-
-    foreach my $ref (@$to_ref) {
-	my($name, $arity, @ops) = @$ref;
-	if ($name =~ /^[.]/) {
-	    foreach my $var (@ops) {
-		$used{$var} = 1;
-	    }
-	} else {
-	    foreach my $op (@ops) {
-		my($var, $type, $type_val) = @$op;
-		next if $var eq '';
-		$used{$var} = 1;
-	    }
-	}
-    }
-    \%used;
-}
-
 sub tr_gen_from {
-    my($line,$used_ref,@tr) = @_;
+    my($line,@tr) = @_;
     my(%var) = ();
     my(%var_type);
     my($var_num) = 0;
     my(@code);
     my($op, $ref);		# Loop variables.
     my $where = "left side of transformation in line $line: ";
-    my %var_used = %$used_ref;
     my $may_fail = 0;
     my $is_first = 1;
 
@@ -1534,7 +1492,10 @@ sub tr_gen_from {
 		}
 	    }
 	    my $pi = tr_next_index(\@pred_table, \%pred_table, $name, @args);
-	    push(@code, make_op("$name()", 'pred', $pi));
+	    my $op = make_op("$name()", 'pred', $pi);
+	    my @slots = grep(/^\d+/, map { $var{$_} } @ops);
+	    op_slot_usage($op, @slots);
+	    push(@code, $op);
 	    next;
 	}
 
@@ -1595,7 +1556,9 @@ sub tr_gen_from {
 		if (defined $var{$var}) {
 		    $ignored_var = '';
 		    $may_fail = 1;
-		    push(@code, &make_op($var, 'is_same_var', $var{$var}));
+		    my $op = make_op($var, 'is_same_var', $var{$var});
+		    op_slot_usage($op, $var{$var});
+		    push(@code, $op);
 		} elsif ($type eq '*') {
 		    foreach my $type (values %var_type) {
 			error("only one use of a '*' variable is " .
@@ -1607,7 +1570,7 @@ sub tr_gen_from {
 		    $var{$var} = 'unnumbered';
 		    $var_type{$var} = 'array';
 		    push(@code, make_op($var, 'rest_args'));
-		} elsif ($var_used{$var}) {
+		} else {
 		    $ignored_var = '';
 		    $var_type{$var} = 'scalar';
 		    $var{$var} = $var_num;
@@ -1677,7 +1640,10 @@ sub tr_gen_to {
 	    pop(@code);	# Get rid of 'commit' instruction
 	    my $index = tr_next_index(\@call_table, \%call_table,
 				      $name, @args);
-	    push(@code, make_op("$name()", 'call_end', $index));
+	    my $op = make_op("$name()", 'call_end', $index);
+	    my @slots = grep(/^\d+/, map { $var{$_} } @ops);
+	    op_slot_usage($op, @slots);
+	    push(@code, $op);
 	    last;
 	}
 
@@ -1703,7 +1669,9 @@ sub tr_gen_to {
 	    } elsif ($var ne '') {
 		&error($where, "variable '$var' unbound")
 		    unless defined $var{$var};
-		push(@code, &make_op($var, 'store_var_next_arg', $var{$var}));
+		my $op = make_op($var, 'store_var_next_arg', $var{$var});
+		op_slot_usage($op, $var{$var});
+		push(@code, $op);
 	    } elsif ($type ne '') {
 		push(@code, &make_op('', 'store_type', "TAG_$type"));
 		if ($type_val) {
@@ -1720,6 +1688,7 @@ sub tr_gen_to {
 
     tr_maybe_keep(\@code);
     tr_maybe_rename(\@code);
+    tr_remove_unused(\@code);
 
     #
     # Chain together all codes segments having the same first operation.
@@ -1839,6 +1808,55 @@ sub tr_maybe_rename {
     }
 }
 
+sub tr_remove_unused {
+    my($ref) = @_;
+    my %used;
+
+    # Collect all used variables.
+    for my $instr (@$ref) {
+	my $uref = $$instr[3];
+	for my $slot (@$uref) {
+	    $used{$slot} = 1;
+	}
+    }
+
+    # Replace 'set_var_next_arg' with 'next_arg' if the variable
+    # is never used.
+    for my $instr (@$ref) {
+	my($size, $instr_ref, $comment) = @$instr;
+	my($op, @args) = @$instr_ref;
+	if ($op eq 'set_var_next_arg') {
+	    my $var = $args[0];
+	    next if $used{$var};
+	    $instr = make_op("$comment (ignored)", 'next_arg');
+	}
+    }
+
+    # Delete a sequence of 'next_arg' instructions when they are
+    # redundant before instructions such as 'commit'.
+    my @opcode;
+    my %ending = (call_end => 1,
+		  commit => 1,
+		  next_instr => 1,
+		  pred => 1,
+		  rename => 1,
+		  keep => 1);
+    for (my $i = 0; $i < @$ref; $i++) {
+	my $instr = $$ref[$i];
+	my($size, $instr_ref, $comment) = @$instr;
+	my($opcode) = @$instr_ref;
+
+	if ($ending{$opcode}) {
+	    my $first = $i;
+	    $first-- while $first > 0 and $opcode[$first-1] eq 'next_arg';
+	    my $n = $i - $first;
+	    splice @$ref, $first, $n;
+	    $i -= $n;
+	}
+	$opcode[$i] = $opcode;
+    }
+}
+
 sub tr_code_len {
     my($sum) = 0;
     my($ref);
@@ -1851,7 +1869,12 @@ sub tr_code_len {
 
 sub make_op {
     my($comment, @op) = @_;
-    [scalar(@op), [@op], $comment];
+    [scalar(@op), [@op], $comment, []];
+}
+
+sub op_slot_usage {
+    my($op_ref, @slots) = @_;
+    $$op_ref[3] = \@slots;
 }
 
 sub is_instr {
-- 
cgit v1.2.3


From 4f33597d52a0cef2e47b07578bc8a35a17c2f969 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 6 Apr 2016 07:02:36 +0200
Subject: Don't let the loader do the compiler's job

Optimizations that are possible to do by the compiler should be
done by the compiler and not by the loader.

If the compiler has done its job correctly, attempting to do the two
transformations only wastes time.
---
 erts/emulator/beam/beam_load.c | 7 -------
 erts/emulator/beam/ops.tab     | 5 -----
 2 files changed, 12 deletions(-)

diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index bdb451a6fe..a98900460e 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2735,13 +2735,6 @@ mixed_types(LoaderState* stp, GenOpArg Size, GenOpArg* Rest)
     return 0;
 }
 
-static int
-same_label(LoaderState* stp, GenOpArg Target, GenOpArg Label)
-{
-    return Target.type = TAG_f && Label.type == TAG_u &&
-	Target.val == Label.val;
-}
-
 static int
 is_killed_apply(LoaderState* stp, GenOpArg Reg, GenOpArg Live)
 {
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 78000160e3..485c072540 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -181,11 +181,6 @@ i_jump_on_val_zero y f I
 i_jump_on_val x f I I
 i_jump_on_val y f I I
 
-jump Target | label Lbl | same_label(Target, Lbl) => label Lbl
-
-is_ne_exact L1 S1 S2 | jump Fail | label L2 | same_label(L1, L2) => \
-  is_eq_exact Fail S1 S2 | label L2
-
 %macro: get_list GetList -pack
 get_list x x x
 get_list x x y
-- 
cgit v1.2.3


From 921c838b8142d3c8d1739c6b30c6d88e39e5f147 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= <bjorn@erlang.org>
Date: Wed, 6 Apr 2016 07:13:26 +0200
Subject: Eliminate unnecessary renaming of bs_put_utf16/3

There is no reason to rename bs_put_utf16/3.

(We rename instructions if we'll need to change the operands or
if we will need to avoid an endless transformation loop. Neither
of these reasons apply to bs_put_utf16/3.)
---
 erts/emulator/beam/beam_emu.c | 2 +-
 erts/emulator/beam/ops.tab    | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index a390422040..09a41f2b56 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -4102,7 +4102,7 @@ do {						\
      StoreBifResult(1, result);
  }
 
- OpCase(i_bs_put_utf16_jIs): {
+ OpCase(bs_put_utf16_jIs): {
      Eterm arg;
 
      GetArg1(2, arg);
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 485c072540..15f27835a8 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -1350,9 +1350,7 @@ bs_put_utf8 Fail u Src=s => i_bs_put_utf8 Fail Src
 
 i_bs_put_utf8 j s
 
-bs_put_utf16 Fail Flags=u Src=s => i_bs_put_utf16 Fail Flags Src
-
-i_bs_put_utf16 j I s
+bs_put_utf16 j I s
 
 bs_put_utf32 Fail=j Flags=u Src=s => \
    i_bs_validate_unicode Fail Src | bs_put_integer Fail i=32 u=1 Flags Src
-- 
cgit v1.2.3