diff options
author | Björn Gustavsson <[email protected]> | 2019-02-25 11:48:14 +0100 |
---|---|---|
committer | Björn Gustavsson <[email protected]> | 2019-02-28 12:18:24 +0100 |
commit | cb2eb1766b5db1d5462a63025e99496ec8c6969b (patch) | |
tree | 3c0a5061f4bb9c5c1a66a615e237711424976572 | |
parent | 72b503485b0d029f615c80c3e64680419cd690d9 (diff) | |
download | otp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.tar.gz otp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.tar.bz2 otp-cb2eb1766b5db1d5462a63025e99496ec8c6969b.zip |
Tune move instructions
Of the `move_dup` instructions, only `move_dup x x x` was
frequently used. Remove the other register combinations.
With those instruction `move_dup` instructions removed, it
is necessary to add new predicates to avoid unsafe translation
to `move_shift` and `move2_par`.
Also add additional transformations to transform more `move`
instructions into `move2_par`. The existing transformation
would require the `move` instructions to be in the "right"
order in order to be transformed.
Remove `move3 x y x y x y` because it turns out to be rarely
executed.
-rw-r--r-- | erts/emulator/beam/beam_load.c | 29 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 91 |
2 files changed, 80 insertions, 40 deletions
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index e9e294cd59..c8618121ea 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -2986,6 +2986,35 @@ compiled_with_otp_20_or_higher(LoaderState* stp) } /* + * Predicate that tests whether the following two moves are independent: + * + * move Src1 Dst1 + * move Src2 Dst2 + * + */ +static int +independent_moves(LoaderState* stp, GenOpArg Src1, GenOpArg Dst1, + GenOpArg Src2, GenOpArg Dst2) +{ + return (Src1.type != Dst2.type || Src1.val != Dst2.val) && + (Src2.type != Dst1.type || Src2.val != Dst1.val) && + (Dst1.type != Dst2.type ||Dst1.val != Dst2.val); +} + +/* + * Predicate that tests that two registers are distinct. + * + * move Src1 Dst1 + * move Src2 Dst2 + * + */ +static int +distinct(LoaderState* stp, GenOpArg Reg1, GenOpArg Reg2) +{ + return Reg1.type != Reg2.type || Reg1.val != Reg2.val; +} + +/* * Predicate that tests whether a jump table can be used. */ diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 8e730e42d6..9958d072e7 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -311,69 +311,80 @@ swap_temp x xy x swap x xy +# move_dup + move Src=x D1=x | move Src=x D2=x => move_dup Src D1 D2 -move Src=x SD=x | move SD=x D=x => move_dup Src SD D -move Src=x D1=x | move Src=x D2=y => move_dup Src D1 D2 -move Src=y SD=x | move SD=x D=y => move_dup Src SD D -move Src=x SD=x | move SD=x D=y => move_dup Src SD D -move Src=y SD=x | move SD=x D=x => move_dup Src SD D - -move SD=x D=x | move Src=xy SD=x => move_shift Src SD D -move SD=y D=x | move Src=x SD=y => move_shift Src SD D -move SD=x D=y | move Src=x SD=x => move_shift Src SD D - -# The transformations above guarantee that the source for -# the second move is not the same as the destination for -# the first move. That means that we can do the moves in -# parallel (fetch both values, then store them) which could -# be faster. +move Src=x SD=x | move SD=x D=x => move_dup Src SD D -move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2 -move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2 +move_dup x x x -move X1=x X2=x | move X3=x X4=x => move2_par X1 X2 X3 X4 +# move_shift -move X1=x X2=x | move X3=x Y1=y => move2_par X1 X2 X3 Y1 +move SD=x D=x | move Src=xy SD=x | distinct(D, Src) => move_shift Src SD D +move SD=y D=x | move Src=x SD=y | distinct(D, Src) => move_shift Src SD D +move SD=x D=y | move Src=x SD=x | distinct(D, Src) => move_shift Src SD D -move S1=x S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1 +move_shift x x x +move_shift y x x +move_shift x y x +move_shift x x y -move S1=y S2=x | move X1=x Y1=y => move2_par S1 S2 X1 Y1 +# move2_par x x x x -move Y1=y X1=x | move S1=x D1=x => move2_par Y1 X1 S1 D1 -move S1=x D1=x | move Y1=y X1=x => move2_par S1 D1 Y1 X1 +move X1=x X2=x | move X3=x X4=x | independent_moves(X1, X2, X3, X4) => \ + move2_par X1 X2 X3 X4 +move2_par x x x x -move2_par X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 -move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 -move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 +# move2_par x y x y -move C=aiq X=x==1 => move_x1 C -move C=aiq X=x==2 => move_x2 C +move X1=x Y1=y | move X2=x Y2=y => move2_par X1 Y1 X2 Y2 +move2_par x y x y -move_x1 c -move_x2 c +# move2_par x x x y -move_shift x x x -move_shift y x x -move_shift x y x -move_shift x x y +move X1=x X2=x | move X3=x Y1=y | independent_moves(X1, X2, X3, Y1) => \ + move2_par X1 X2 X3 Y1 +move X3=x Y1=y | move X1=x X2=x | independent_moves(X3, Y1, X1, X2) => \ + move2_par X1 X2 X3 Y1 +move2_par x x x y -move_dup xy x xy +# move2_par y x y x -move2_par x y x y +move Y1=y X1=x | move Y2=y X2=x => move2_par Y1 X1 Y2 X2 move2_par y x y x -move2_par x x x x -move2_par x x x y +# move2_par y x x y +move S1=y S2=x | move X1=x Y1=y | independent_moves(S1, S2, X1, Y1) => \ + move2_par S1 S2 X1 Y1 +move X1=x Y1=y | move S1=y S2=x | independent_moves(S1, S2, X1, Y1) => \ + move2_par S1 S2 X1 Y1 move2_par y x x y -move2_par x x y x +# move2_par y x x x + +move Y1=y X1=x | move S1=x D1=x | independent_moves(Y1, X1, S1, D1) => \ + move2_par Y1 X1 S1 D1 +move S1=x D1=x | move Y1=y X1=x | independent_moves(Y1, X1, S1, D1) => \ + move2_par Y1 X1 S1 D1 move2_par y x x x -move3 x y x y x y +# move3 + +move2_par Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 +move2_par X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 + move3 y x y x y x move3 x x x x x x +# move_x1, move_x2 + +move C=aiq X=x==1 => move_x1 C +move C=aiq X=x==2 => move_x2 C + +move_x1 c +move_x2 c + # The compiler almost never generates a "move Literal y(Y)" instruction, # so let's cheat if we encounter one. move S=n D=y => init D |