diff options
author | Björn-Egil Dahlberg <[email protected]> | 2015-04-28 16:38:19 +0200 |
---|---|---|
committer | Björn-Egil Dahlberg <[email protected]> | 2015-04-28 16:38:19 +0200 |
commit | 3ce2fe4c0e88da5ff8f50624f133e1fee9726473 (patch) | |
tree | 966ca8764a1d617eb23776594cbfd0a9727236f1 /erts | |
parent | 22173c2e4d65e47039975b91015560ae4256b3c1 (diff) | |
parent | 979d05fc326d0f6cf7c1c4a5182bb33942852dd7 (diff) | |
download | otp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.tar.gz otp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.tar.bz2 otp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.zip |
Merge branch 'egil/opt-instructions/OTP-12690'
* egil/opt-instructions/OTP-12690:
erts: Specialize minus and plus instruction
erts: Add move2 specialization for common move patterns
erts: Specialize rem instruction for common case
erts: Specialize band instruction for common case
erts: Batch loads and stores for move_window
erts: Fix loader increment from minus instruction
erts: Add move window instruction
erts: Add instruction move3 for xy and xx
erts: Specialize compare instructions
kernel: Add instruction_count helper to erts_debug
Diffstat (limited to 'erts')
-rw-r--r-- | erts/emulator/beam/beam_emu.c | 131 | ||||
-rw-r--r-- | erts/emulator/beam/beam_load.c | 6 | ||||
-rw-r--r-- | erts/emulator/beam/ops.tab | 110 |
3 files changed, 241 insertions, 6 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index fce4fd498a..21faf8fbf2 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -562,7 +562,8 @@ void** beam_ops; Store(term, Dst); \ } while (0) -#define Move2(src1, dst1, src2, dst2) dst1 = (src1); dst2 = (src2) +#define Move2(S1, D1, S2, D2) D1 = (S1); D2 = (S2) +#define Move3(S1, D1, S2, D2, S3, D3) D1 = (S1); D2 = (S2); D3 = (S3) #define MoveGenDest(src, dstp) \ if ((dstp) == NULL) { r(0) = (src); } else { *(dstp) = src; } @@ -662,6 +663,9 @@ void** beam_ops; #define EqualImmed(X, Y, Action) if (X != Y) { Action; } #define NotEqualImmed(X, Y, Action) if (X == Y) { Action; } +#define EqualExact(X, Y, Action) if (!EQ(X,Y)) { Action; } +#define IsLessThan(X, Y, Action) if (CMP_GE(X, Y)) { Action; } +#define IsGreaterEqual(X, Y, Action) if (CMP_LT(X, Y)) { Action; } #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; } @@ -1389,7 +1393,39 @@ void process_main(void) ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue)); goto find_func_info; } - + +#define DO_BIG_ARITH(Func,Arg1,Arg2) \ + do { \ + Uint live = Arg(1); \ + SWAPOUT; \ + reg[0] = r(0); \ + reg[live] = (Arg1); \ + reg[live+1] = (Arg2); \ + result = (Func)(c_p, reg, live); \ + r(0) = reg[0]; \ + SWAPIN; \ + ERTS_HOLE_CHECK(c_p); \ + if (is_value(result)) { \ + StoreBifResult(4,result); \ + } \ + goto lb_Cl_error; \ + } while(0) + + OpCase(i_plus_jIxxd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + Sint i = signed_val(xb(Arg(2))) + signed_val(xb(Arg(3))); + ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); + if (MY_IS_SSMALL(i)) { + result = make_small(i); + StoreBifResult(4, result); + } + } + DO_BIG_ARITH(ARITH_FUNC(mixed_plus), xb(Arg(2)), xb(Arg(3))); + } + OpCase(i_plus_jId): { Eterm result; @@ -1401,12 +1437,26 @@ void process_main(void) result = make_small(i); STORE_ARITH_RESULT(result); } - } arith_func = ARITH_FUNC(mixed_plus); goto do_big_arith2; } + OpCase(i_minus_jIxxd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + Sint i = signed_val(xb(Arg(2))) - signed_val(xb(Arg(3))); + ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); + if (MY_IS_SSMALL(i)) { + result = make_small(i); + StoreBifResult(4, result); + } + } + DO_BIG_ARITH(ARITH_FUNC(mixed_minus), xb(Arg(2)), xb(Arg(3))); + } + OpCase(i_minus_jId): { Eterm result; @@ -1499,6 +1549,52 @@ void process_main(void) Next(2); } + OpCase(move_window3_xxxy): { + BeamInstr *next; + Eterm xt0, xt1, xt2; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(3))); + PreFetch(4, next); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; + NextPF(4, next); + } + OpCase(move_window4_xxxxy): { + BeamInstr *next; + Eterm xt0, xt1, xt2, xt3; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(4))); + PreFetch(5, next); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + xt3 = xb(Arg(3)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; + y[3] = xt3; + NextPF(5, next); + } + OpCase(move_window5_xxxxxy): { + BeamInstr *next; + Eterm xt0, xt1, xt2, xt3, xt4; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(5))); + PreFetch(6, next); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + xt3 = xb(Arg(3)); + xt4 = xb(Arg(4)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; + y[3] = xt3; + y[4] = xt4; + NextPF(6, next); + } + OpCase(i_move_call_only_fcr): { r(0) = Arg(1); } @@ -2835,6 +2931,19 @@ do { \ goto do_big_arith2; } + OpCase(i_rem_jIxxd): + { + Eterm result; + + if (xb(Arg(3)) == SMALL_ZERO) { + goto badarith; + } else if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + result = make_small(signed_val(xb(Arg(2))) % signed_val(xb(Arg(3)))); + StoreBifResult(4, result); + } + DO_BIG_ARITH(ARITH_FUNC(int_rem),xb(Arg(2)),xb(Arg(3))); + } + OpCase(i_rem_jId): { Eterm result; @@ -2850,6 +2959,20 @@ do { \ } } + OpCase(i_band_jIxcd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), Arg(3))) { + /* + * No need to untag -- TAG & TAG == TAG. + */ + result = xb(Arg(2)) & Arg(3); + StoreBifResult(4, result); + } + DO_BIG_ARITH(ARITH_FUNC(band),xb(Arg(2)),Arg(3)); + } + OpCase(i_band_jId): { Eterm result; @@ -2865,6 +2988,8 @@ do { \ goto do_big_arith2; } +#undef DO_BIG_ARITH + do_big_arith2: { Eterm result; diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 8d7beb4eb4..282aa71109 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3172,7 +3172,11 @@ gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer, static int negation_is_small(LoaderState* stp, GenOpArg Int) { - return Int.type == TAG_i && IS_SSMALL(-Int.val); + /* Check for the rare case of overflow in BeamInstr (UWord) -> Sint + * Cast to the correct type before using IS_SSMALL (Sint) */ + return Int.type == TAG_i && + !(Int.val & ~((((BeamInstr)1) << ((sizeof(Sint)*8)-1))-1)) && + IS_SSMALL(-((Sint)Int.val)); } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9bdc9cb88d..ece038131e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -298,10 +298,49 @@ move_jump f c move_jump f x move_jump f y + +# Movement to and from the stack is common +# Try to pack as much as we can into one instruction + +# Window move +move_window/5 +move_window/6 + +# x -> y + +move S1=r S2=y | move X1=x Y1=y => move2 S1 S2 X1 Y1 + +move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \ + move_window X1 X2 X3 Y1 Y3 + +move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \ + move_window X1 X2 X3 X4 Y1 Y4 + +move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \ + move_window5 X1 X2 X3 X4 X5 Y1 + +move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1 +move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1 + +move_window3 x x x y +move_window4 x x x x y +move_window5 x x x x x y + move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 +move S1=x S2=r | move S3=x S4=x => move2 S1 S2 S3 S4 +move S1=x S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 +move S1=y S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 + +move Y1=y X1=x | move S1=r D1=x => move2 Y1 X1 S1 D1 +move S1=r D1=x | move Y1=y X1=x => move2 S1 D1 Y1 X1 + +move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 +move2 Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 +move2 X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 + move C=aiq X=x==1 => move_x1 C move C=aiq X=x==2 => move_x2 C @@ -313,6 +352,20 @@ move2 x y x y move2 y x y x move2 x x x x +move2 x r x x + +move2 x r x y +move2 r y x y +move2 y r x y + +move2 r x y x +move2 y x r x + +%macro: move3 Move3 +move3 x y x y x y +move3 y x y x y x +move3 x x x x x x + # The compiler almost never generates a "move Literal y(Y)" instruction, # so let's cheat if we encounter one. move S=n D=y => init D @@ -392,14 +445,59 @@ i_is_ne_exact_literal x f c i_is_ne_exact_literal y f c # +# Common Compare Specializations +# We don't do all of them since we want +# to keep the instruction set small-ish +# + +is_eq_exact Lbl S1=xy S2=r => is_eq_exact Lbl S2 S1 +is_eq_exact Lbl S1=rx S2=xy => i_is_eq_exact_spec Lbl S1 S2 +%macro: i_is_eq_exact_spec EqualExact -fail_action + +i_is_eq_exact_spec f x x +i_is_eq_exact_spec f x y +i_is_eq_exact_spec f r x +i_is_eq_exact_spec f r y +%cold +i_is_eq_exact_spec f r r +%hot + +is_lt Lbl S1=rxc S2=rxc => i_is_lt_spec Lbl S1 S2 + +%macro: i_is_lt_spec IsLessThan -fail_action + +i_is_lt_spec f x x +i_is_lt_spec f x r +i_is_lt_spec f x c +i_is_lt_spec f r x +i_is_lt_spec f r c +i_is_lt_spec f c x +i_is_lt_spec f c r +%cold +i_is_lt_spec f r r +i_is_lt_spec f c c +%hot + +is_ge Lbl S1=xc S2=xc => i_is_ge_spec Lbl S1 S2 + +%macro: i_is_ge_spec IsGreaterEqual -fail_action + +i_is_ge_spec f x x +i_is_ge_spec f x c +i_is_ge_spec f c x +%cold +i_is_ge_spec f c c +%hot + +# # All other comparisons. # is_eq_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl -is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl is_lt Lbl S1 S2 => i_fetch S1 S2 | i_is_lt Lbl +is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl is_eq Lbl S1 S2 => i_fetch S1 S2 | i_is_eq Lbl is_ne Lbl S1 S2 => i_fetch S1 S2 | i_is_ne Lbl @@ -493,7 +591,6 @@ put_list s s d %hot %macro: i_fetch FetchArgs -pack -i_fetch c c i_fetch c r i_fetch c x i_fetch c y @@ -510,6 +607,7 @@ i_fetch y x i_fetch y y %cold +i_fetch c c i_fetch s s %hot @@ -1562,17 +1660,21 @@ gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \ # GCing arithmetic instructions. # +gc_bif2 Fail I u$bif:erlang:splus/2 S1=x S2=x Dst=d => i_plus Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst +gc_bif2 Fail I u$bif:erlang:sminus/2 S1=x S2=x Dst=d => i_minus Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:sminus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_minus Fail I Dst gc_bif2 Fail I u$bif:erlang:stimes/2 S1 S2 Dst=d => i_fetch S1 S2 | i_times Fail I Dst gc_bif2 Fail I u$bif:erlang:div/2 S1 S2 Dst=d => i_fetch S1 S2 | i_m_div Fail I Dst gc_bif2 Fail I u$bif:erlang:intdiv/2 S1 S2 Dst=d => i_fetch S1 S2 | i_int_div Fail I Dst +gc_bif2 Fail I u$bif:erlang:rem/2 S1=x S2=x Dst=d => i_rem Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:rem/2 S1 S2 Dst=d => i_fetch S1 S2 | i_rem Fail I Dst gc_bif2 Fail I u$bif:erlang:bsl/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsl Fail I Dst gc_bif2 Fail I u$bif:erlang:bsr/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsr Fail I Dst +gc_bif2 Fail I u$bif:erlang:band/2 S1=x S2=c Dst=d => i_band Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:band/2 S1 S2 Dst=d => i_fetch S1 S2 | i_band Fail I Dst gc_bif2 Fail I u$bif:erlang:bor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bor Fail I Dst gc_bif2 Fail I u$bif:erlang:bxor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bxor Fail I Dst @@ -1586,16 +1688,20 @@ i_increment r I I d i_increment x I I d i_increment y I I d +i_plus j I x x d i_plus j I d +i_minus j I x x d i_minus j I d i_times j I d i_m_div j I d i_int_div j I d +i_rem j I x x d i_rem j I d i_bsl j I d i_bsr j I d +i_band j I x c d i_band j I d i_bor j I d i_bxor j I d |