aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2019-02-25 11:48:14 +0100
committerBjörn Gustavsson <[email protected]>2019-02-28 12:18:24 +0100
commitcb2eb1766b5db1d5462a63025e99496ec8c6969b (patch)
tree3c0a5061f4bb9c5c1a66a615e237711424976572
parent72b503485b0d029f615c80c3e64680419cd690d9 (diff)
downloadotp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.tar.gz
otp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.tar.bz2
otp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.zip
Tune move instructions
Of the `move_dup` instructions, only `move_dup x x x` was frequently used. Remove the other register combinations. With those instruction `move_dup` instructions removed, it is necessary to add new predicates to avoid unsafe translation to `move_shift` and `move2_par`. Also add additional transformations to transform more `move` instructions into `move2_par`. The existing transformation would require the `move` instructions to be in the "right" order in order to be transformed. Remove `move3 x y x y x y` because it turns out to be rarely executed.
-rw-r--r--erts/emulator/beam/beam_load.c29
-rw-r--r--erts/emulator/beam/ops.tab91
2 files changed, 80 insertions, 40 deletions
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index e9e294cd59..c8618121ea 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -2986,6 +2986,35 @@ compiled_with_otp_20_or_higher(LoaderState* stp)
}
/*
+ * Predicate that tests whether the following two moves are independent:
+ *
+ * move Src1 Dst1
+ * move Src2 Dst2
+ *
+ */
+static int
+independent_moves(LoaderState* stp, GenOpArg Src1, GenOpArg Dst1,
+ GenOpArg Src2, GenOpArg Dst2)
+{
+ return (Src1.type != Dst2.type || Src1.val != Dst2.val) &&
+ (Src2.type != Dst1.type || Src2.val != Dst1.val) &&
+ (Dst1.type != Dst2.type ||Dst1.val != Dst2.val);
+}
+
+/*
+ * Predicate that tests that two registers are distinct.
+ *
+ * move Src1 Dst1
+ * move Src2 Dst2
+ *
+ */
+static int
+distinct(LoaderState* stp, GenOpArg Reg1, GenOpArg Reg2)
+{
+ return Reg1.type != Reg2.type || Reg1.val != Reg2.val;
+}
+
+/*
* Predicate that tests whether a jump table can be used.
*/
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 8e730e42d6..9958d072e7 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -311,69 +311,80 @@ swap_temp x xy x
swap x xy
+# move_dup
+
move Src=x D1=x | move Src=x D2=x => move_dup Src D1 D2
-move Src=x SD=x | move SD=x D=x => move_dup Src SD D
-move Src=x D1=x | move Src=x D2=y => move_dup Src D1 D2
-move Src=y SD=x | move SD=x D=y => move_dup Src SD D
-move Src=x SD=x | move SD=x D=y => move_dup Src SD D
-move Src=y SD=x | move SD=x D=x => move_dup Src SD D
-
-move SD=x D=x | move Src=xy SD=x => move_shift Src SD D
-move SD=y D=x | move Src=x SD=y => move_shift Src SD D
-move SD=x D=y | move Src=x SD=x => move_shift Src SD D
-
-# The transformations above guarantee that the source for
-# the second move is not the same as the destination for
-# the first move. That means that we can do the moves in
-# parallel (fetch both values, then store them) which could
-# be faster.
+move Src=x SD=x | move SD=x D=x => move_dup Src SD D
-move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2
-move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2
+move_dup x x x
-move X1=x X2=x | move X3=x X4=x => move2_par X1 X2 X3 X4
+# move_shift
-move X1=x X2=x | move X3=x Y1=y => move2_par X1 X2 X3 Y1
+move SD=x D=x | move Src=xy SD=x | distinct(D, Src) => move_shift Src SD D
+move SD=y D=x | move Src=x SD=y | distinct(D, Src) => move_shift Src SD D
+move SD=x D=y | move Src=x SD=x | distinct(D, Src) => move_shift Src SD D
-move S1=x S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1
+move_shift x x x
+move_shift y x x
+move_shift x y x
+move_shift x x y
-move S1=y S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1
+# move2_par x x x x
-move Y1=y X1=x | move S1=x D1=x => move2_par Y1 X1 S1 D1
-move S1=x D1=x | move Y1=y X1=x => move2_par S1 D1 Y1 X1
+move X1=x X2=x | move X3=x X4=x | independent_moves(X1, X2, X3, X4) => \
+ move2_par X1 X2 X3 X4
+move2_par x x x x
-move2_par X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3
-move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3
-move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6
+# move2_par x y x y
-move C=aiq X=x==1 => move_x1 C
-move C=aiq X=x==2 => move_x2 C
+move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2
+move2_par x y x y
-move_x1 c
-move_x2 c
+# move2_par x x x y
-move_shift x x x
-move_shift y x x
-move_shift x y x
-move_shift x x y
+move X1=x X2=x | move X3=x Y1=y | independent_moves(X1, X2, X3, Y1) => \
+ move2_par X1 X2 X3 Y1
+move X3=x Y1=y | move X1=x X2=x | independent_moves(X3, Y1, X1, X2) => \
+ move2_par X1 X2 X3 Y1
+move2_par x x x y
-move_dup xy x xy
+# move2_par y x y x
-move2_par x y x y
+move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2
move2_par y x y x
-move2_par x x x x
-move2_par x x x y
+# move2_par y x x y
+move S1=y S2=x | move X1=x Y1=y | independent_moves(S1, S2, X1, Y1) => \
+ move2_par S1 S2 X1 Y1
+move X1=x Y1=y | move S1=y S2=x | independent_moves(S1, S2, X1, Y1) => \
+ move2_par S1 S2 X1 Y1
move2_par y x x y
-move2_par x x y x
+# move2_par y x x x
+
+move Y1=y X1=x | move S1=x D1=x | independent_moves(Y1, X1, S1, D1) => \
+ move2_par Y1 X1 S1 D1
+move S1=x D1=x | move Y1=y X1=x | independent_moves(Y1, X1, S1, D1) => \
+ move2_par Y1 X1 S1 D1
move2_par y x x x
-move3 x y x y x y
+# move3
+
+move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3
+move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6
+
move3 y x y x y x
move3 x x x x x x
+# move_x1, move_x2
+
+move C=aiq X=x==1 => move_x1 C
+move C=aiq X=x==2 => move_x2 C
+
+move_x1 c
+move_x2 c
+
# The compiler almost never generates a "move Literal y(Y)" instruction,
# so let's cheat if we encounter one.
move S=n D=y => init D