From 177d20a7329d24aec0f4a0cbbbd3fc803bcbba4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Fri, 17 Apr 2015 11:04:09 +0200 Subject: erts: Specialize compare instructions * i_is_lt for r, x registers and constants * i_is_ge for x registers and constants * i_is_exact_eq for r and x registers --- erts/emulator/beam/beam_emu.c | 3 +++ erts/emulator/beam/ops.tab | 49 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 50 insertions(+), 2 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index bb7b799950..aad76e9a93 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -662,6 +662,9 @@ void** beam_ops; #define EqualImmed(X, Y, Action) if (X != Y) { Action; } #define NotEqualImmed(X, Y, Action) if (X == Y) { Action; } +#define EqualExact(X, Y, Action) if (!EQ(X,Y)) { Action; } +#define IsLessThan(X, Y, Action) if (CMP_GE(X, Y)) { Action; } +#define IsGreaterEqual(X, Y, Action) if (CMP_LT(X, Y)) { Action; } #define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9bdc9cb88d..9330192d04 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -391,6 +391,51 @@ i_is_ne_exact_literal r f c i_is_ne_exact_literal x f c i_is_ne_exact_literal y f c +# +# Common Compare Specializations +# We don't do all of them since we want +# to keep the instruction set small-ish +# + +is_eq_exact Lbl S1=xy S2=r => is_eq_exact Lbl S2 S1 +is_eq_exact Lbl S1=rx S2=xy => i_is_eq_exact_spec Lbl S1 S2 +%macro: i_is_eq_exact_spec EqualExact -fail_action + +i_is_eq_exact_spec f x x +i_is_eq_exact_spec f x y +i_is_eq_exact_spec f r x +i_is_eq_exact_spec f r y +%cold +i_is_eq_exact_spec f r r +%hot + +is_lt Lbl S1=rxc S2=rxc => i_is_lt_spec Lbl S1 S2 + +%macro: i_is_lt_spec IsLessThan -fail_action + +i_is_lt_spec f x x +i_is_lt_spec f x r +i_is_lt_spec f x c +i_is_lt_spec f r x +i_is_lt_spec f r c +i_is_lt_spec f c x +i_is_lt_spec f c r +%cold +i_is_lt_spec f r r +i_is_lt_spec f c c +%hot + +is_ge Lbl S1=xc S2=xc => i_is_ge_spec Lbl S1 S2 + +%macro: i_is_ge_spec IsGreaterEqual -fail_action + +i_is_ge_spec f x x +i_is_ge_spec f x c +i_is_ge_spec f c x +%cold +i_is_ge_spec f c c +%hot + # # All other comparisons. # @@ -398,8 +443,8 @@ i_is_ne_exact_literal y f c is_eq_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl -is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl is_lt Lbl S1 S2 => i_fetch S1 S2 | i_is_lt Lbl +is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl is_eq Lbl S1 S2 => i_fetch S1 S2 | i_is_eq Lbl is_ne Lbl S1 S2 => i_fetch S1 S2 | i_is_ne Lbl @@ -493,7 +538,6 @@ put_list s s d %hot %macro: i_fetch FetchArgs -pack -i_fetch c c i_fetch c r i_fetch c x i_fetch c y @@ -510,6 +554,7 @@ i_fetch y x i_fetch y y %cold +i_fetch c c i_fetch s s %hot -- cgit v1.2.3 From 63949dcd13e24bdc73336b0f5e88d4f06cce56c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Mon, 15 Sep 2014 17:58:33 +0200 Subject: erts: Add instruction move3 for xy and xx --- erts/emulator/beam/beam_emu.c | 3 ++- erts/emulator/beam/ops.tab | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index aad76e9a93..8b9e271068 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -562,7 +562,8 @@ void** beam_ops; Store(term, Dst); \ } while (0) -#define Move2(src1, dst1, src2, dst2) dst1 = (src1); dst2 = (src2) +#define Move2(S1, D1, S2, D2) D1 = (S1); D2 = (S2) +#define Move3(S1, D1, S2, D2, S3, D3) D1 = (S1); D2 = (S2); D3 = (S3) #define MoveGenDest(src, dstp) \ if ((dstp) == NULL) { r(0) = (src); } else { *(dstp) = src; } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 9330192d04..66339f8ace 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -302,6 +302,9 @@ move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 +move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 +move2 Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 + move C=aiq X=x==1 => move_x1 C move C=aiq X=x==2 => move_x2 C @@ -313,6 +316,11 @@ move2 x y x y move2 y x y x move2 x x x x +%macro: move3 Move3 +move3 x y x y x y +move3 y x y x y x +move3 x x x x x x + # The compiler almost never generates a "move Literal y(Y)" instruction, # so let's cheat if we encounter one. move S=n D=y => init D -- cgit v1.2.3 From d1321eaf9bd1e417561e3d70fd85749fe589143b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Wed, 22 Apr 2015 20:44:43 +0200 Subject: erts: Add move window instruction Move an entire region of x registers to the stack. This reduces the dispatch pressure of move instructions. Also introduce a move2 specialization for some common move patterns: move r y | move x y -> move2 : As above, moving regions to the stack move x r | move x y -> move2 : A seemingly common pattern --- erts/emulator/beam/beam_emu.c | 34 ++++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index 8b9e271068..b9b2094e6c 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1503,6 +1503,40 @@ void process_main(void) Next(2); } + OpCase(move_window3_xxxy): { + BeamInstr *next; + Eterm *y; + PreFetch(4, next); + y = (Eterm *)(((unsigned char *)E) + (Arg(3))); + y[0] = xb(Arg(0)); + y[1] = xb(Arg(1)); + y[2] = xb(Arg(2)); + NextPF(4, next); + } + OpCase(move_window4_xxxxy): { + BeamInstr *next; + Eterm *y; + PreFetch(5, next); + y = (Eterm *)(((unsigned char *)E) + (Arg(4))); + y[0] = xb(Arg(0)); + y[1] = xb(Arg(1)); + y[2] = xb(Arg(2)); + y[3] = xb(Arg(3)); + NextPF(5, next); + } + OpCase(move_window5_xxxxxy): { + BeamInstr *next; + Eterm *y; + PreFetch(6, next); + y = (Eterm *)(((unsigned char *)E) + (Arg(5))); + y[0] = xb(Arg(0)); + y[1] = xb(Arg(1)); + y[2] = xb(Arg(2)); + y[3] = xb(Arg(3)); + y[4] = xb(Arg(4)); + NextPF(6, next); + } + OpCase(i_move_call_only_fcr): { r(0) = Arg(1); } diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 66339f8ace..3f7e69e07e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -298,12 +298,44 @@ move_jump f c move_jump f x move_jump f y + +# Movement to and from the stack is common +# Try to pack as much as we can into one instruction + +# Window move +move_window/5 +move_window/6 + +# x -> y + +move S1=r S2=y | move X1=x Y1=y => move2 S1 S2 X1 Y1 + +move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \ + move_window X1 X2 X3 Y1 Y3 + +move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \ + move_window X1 X2 X3 X4 Y1 Y4 + +move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \ + move_window5 X1 X2 X3 X4 X5 Y1 + +move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1 +move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1 + +move_window3 x x x y +move_window4 x x x x y +move_window5 x x x x x y + move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 +move S1=x S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 +move S1=y S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 + move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 move2 Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 +move2 X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 move C=aiq X=x==1 => move_x1 C move C=aiq X=x==2 => move_x2 C @@ -316,6 +348,11 @@ move2 x y x y move2 y x y x move2 x x x x +move2 x r x y +move2 r y x y +move2 y r x y + + %macro: move3 Move3 move3 x y x y x y move3 y x y x y x -- cgit v1.2.3 From 06a912c068f62e1e04ebb2aa2002d611b8cc31e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 23 Apr 2015 15:34:07 +0200 Subject: erts: Fix loader increment from minus instruction A type error caused the optimization to never kick in. --- erts/emulator/beam/beam_load.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c index 8d7beb4eb4..282aa71109 100644 --- a/erts/emulator/beam/beam_load.c +++ b/erts/emulator/beam/beam_load.c @@ -3172,7 +3172,11 @@ gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer, static int negation_is_small(LoaderState* stp, GenOpArg Int) { - return Int.type == TAG_i && IS_SSMALL(-Int.val); + /* Check for the rare case of overflow in BeamInstr (UWord) -> Sint + * Cast to the correct type before using IS_SSMALL (Sint) */ + return Int.type == TAG_i && + !(Int.val & ~((((BeamInstr)1) << ((sizeof(Sint)*8)-1))-1)) && + IS_SSMALL(-((Sint)Int.val)); } -- cgit v1.2.3 From 38384c6c5f66cf52d24ae6f48cc71bbe52611aa9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 23 Apr 2015 17:24:18 +0200 Subject: erts: Batch loads and stores for move_window May lessen load/store latency. --- erts/emulator/beam/beam_emu.c | 48 +++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 18 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index b9b2094e6c..beec8967fc 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1505,35 +1505,47 @@ void process_main(void) OpCase(move_window3_xxxy): { BeamInstr *next; - Eterm *y; + Eterm xt0, xt1, xt2; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(3))); PreFetch(4, next); - y = (Eterm *)(((unsigned char *)E) + (Arg(3))); - y[0] = xb(Arg(0)); - y[1] = xb(Arg(1)); - y[2] = xb(Arg(2)); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; NextPF(4, next); } OpCase(move_window4_xxxxy): { BeamInstr *next; - Eterm *y; + Eterm xt0, xt1, xt2, xt3; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(4))); PreFetch(5, next); - y = (Eterm *)(((unsigned char *)E) + (Arg(4))); - y[0] = xb(Arg(0)); - y[1] = xb(Arg(1)); - y[2] = xb(Arg(2)); - y[3] = xb(Arg(3)); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + xt3 = xb(Arg(3)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; + y[3] = xt3; NextPF(5, next); } OpCase(move_window5_xxxxxy): { BeamInstr *next; - Eterm *y; + Eterm xt0, xt1, xt2, xt3, xt4; + Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(5))); PreFetch(6, next); - y = (Eterm *)(((unsigned char *)E) + (Arg(5))); - y[0] = xb(Arg(0)); - y[1] = xb(Arg(1)); - y[2] = xb(Arg(2)); - y[3] = xb(Arg(3)); - y[4] = xb(Arg(4)); + xt0 = xb(Arg(0)); + xt1 = xb(Arg(1)); + xt2 = xb(Arg(2)); + xt3 = xb(Arg(3)); + xt4 = xb(Arg(4)); + y[0] = xt0; + y[1] = xt1; + y[2] = xt2; + y[3] = xt3; + y[4] = xt4; NextPF(6, next); } -- cgit v1.2.3 From 699ad918545ac7e716b77ea05a6a943434616020 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 23 Apr 2015 19:06:03 +0200 Subject: erts: Specialize band instruction for common case * i_band specialization on x registers and constants --- erts/emulator/beam/beam_emu.c | 33 +++++++++++++++++++++++++++++++++ erts/emulator/beam/ops.tab | 2 ++ 2 files changed, 35 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index beec8967fc..aca340db6b 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -2885,6 +2885,23 @@ do { \ goto do_big_arith2; } +#define DO_BIG_ARITH(Func,Arg1,Arg2) \ + do { \ + Uint live = Arg(1); \ + SWAPOUT; \ + reg[0] = r(0); \ + reg[live] = (Arg1); \ + reg[live+1] = (Arg2); \ + result = (Func)(c_p, reg, live); \ + r(0) = reg[0]; \ + SWAPIN; \ + ERTS_HOLE_CHECK(c_p); \ + if (is_value(result)) { \ + StoreBifResult(4,result); \ + } \ + goto lb_Cl_error; \ + } while(0) + OpCase(i_rem_jId): { Eterm result; @@ -2900,6 +2917,22 @@ do { \ } } + OpCase(i_band_jIxcd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), Arg(3))) { + /* + * No need to untag -- TAG & TAG == TAG. + */ + result = xb(Arg(2)) & Arg(3); + StoreBifResult(4, result); + } + DO_BIG_ARITH(ARITH_FUNC(band),xb(Arg(2)),Arg(3)); + } + +#undef DO_BIG_ARITH + OpCase(i_band_jId): { Eterm result; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 3f7e69e07e..3436b664d9 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1663,6 +1663,7 @@ gc_bif2 Fail I u$bif:erlang:rem/2 S1 S2 Dst=d => i_fetch S1 S2 | i_rem Fail I Ds gc_bif2 Fail I u$bif:erlang:bsl/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsl Fail I Dst gc_bif2 Fail I u$bif:erlang:bsr/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsr Fail I Dst +gc_bif2 Fail I u$bif:erlang:band/2 S1=x S2=c Dst=d => i_band Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:band/2 S1 S2 Dst=d => i_fetch S1 S2 | i_band Fail I Dst gc_bif2 Fail I u$bif:erlang:bor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bor Fail I Dst gc_bif2 Fail I u$bif:erlang:bxor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bxor Fail I Dst @@ -1686,6 +1687,7 @@ i_rem j I d i_bsl j I d i_bsr j I d +i_band j I x c d i_band j I d i_bor j I d i_bxor j I d -- cgit v1.2.3 From b58b4718012cfb848dad3106b2ab22c08997c639 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Thu, 23 Apr 2015 19:21:34 +0200 Subject: erts: Specialize rem instruction for common case * i_rem specialization on x registers --- erts/emulator/beam/beam_emu.c | 13 +++++++++++++ erts/emulator/beam/ops.tab | 2 ++ 2 files changed, 15 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index aca340db6b..f369d7b632 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -2902,6 +2902,19 @@ do { \ goto lb_Cl_error; \ } while(0) + OpCase(i_rem_jIxxd): + { + Eterm result; + + if (xb(Arg(3)) == SMALL_ZERO) { + goto badarith; + } else if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + result = make_small(signed_val(xb(Arg(2))) % signed_val(xb(Arg(3)))); + StoreBifResult(4, result); + } + DO_BIG_ARITH(ARITH_FUNC(int_rem),xb(Arg(2)),xb(Arg(3))); + } + OpCase(i_rem_jId): { Eterm result; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 3436b664d9..7f7b0baacc 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1658,6 +1658,7 @@ gc_bif2 Fail I u$bif:erlang:stimes/2 S1 S2 Dst=d => i_fetch S1 S2 | i_times Fail gc_bif2 Fail I u$bif:erlang:div/2 S1 S2 Dst=d => i_fetch S1 S2 | i_m_div Fail I Dst gc_bif2 Fail I u$bif:erlang:intdiv/2 S1 S2 Dst=d => i_fetch S1 S2 | i_int_div Fail I Dst +gc_bif2 Fail I u$bif:erlang:rem/2 S1=x S2=x Dst=d => i_rem Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:rem/2 S1 S2 Dst=d => i_fetch S1 S2 | i_rem Fail I Dst gc_bif2 Fail I u$bif:erlang:bsl/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsl Fail I Dst @@ -1682,6 +1683,7 @@ i_minus j I d i_times j I d i_m_div j I d i_int_div j I d +i_rem j I x x d i_rem j I d i_bsl j I d -- cgit v1.2.3 From dd4d7667f5ab60c76567a6f3d814b3b64583280a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Fri, 24 Apr 2015 16:43:04 +0200 Subject: erts: Add move2 specialization for common move patterns Common pattern seen in SSL: move y x | move r x -> move2 move r x | move y x -> move2 Common pattern seen in SSL and Compiler: move x r | move x x -> move2 --- erts/emulator/beam/ops.tab | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'erts') diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 7f7b0baacc..22c7f14f0c 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -330,9 +330,13 @@ move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2 move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2 move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4 +move S1=x S2=r | move S3=x S4=x => move2 S1 S2 S3 S4 move S1=x S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 move S1=y S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1 +move Y1=y X1=x | move S1=r D1=x => move2 Y1 X1 S1 D1 +move S1=r D1=x | move Y1=y X1=x => move2 S1 D1 Y1 X1 + move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3 move2 Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3 move2 X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6 @@ -348,10 +352,14 @@ move2 x y x y move2 y x y x move2 x x x x +move2 x r x x + move2 x r x y move2 r y x y move2 y r x y +move2 r x y x +move2 y x r x %macro: move3 Move3 move3 x y x y x y -- cgit v1.2.3 From 979d05fc326d0f6cf7c1c4a5182bb33942852dd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn-Egil=20Dahlberg?= Date: Fri, 24 Apr 2015 17:42:24 +0200 Subject: erts: Specialize minus and plus instruction Seen on SSL application where substraction with x registers were prevalent: * i_minus specialization on x registers * i_plus specialization on x registers --- erts/emulator/beam/beam_emu.c | 71 ++++++++++++++++++++++++++++++------------- erts/emulator/beam/ops.tab | 4 +++ 2 files changed, 54 insertions(+), 21 deletions(-) (limited to 'erts') diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c index f369d7b632..7dd7250e2f 100644 --- a/erts/emulator/beam/beam_emu.c +++ b/erts/emulator/beam/beam_emu.c @@ -1393,7 +1393,39 @@ void process_main(void) ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue)); goto find_func_info; } - + +#define DO_BIG_ARITH(Func,Arg1,Arg2) \ + do { \ + Uint live = Arg(1); \ + SWAPOUT; \ + reg[0] = r(0); \ + reg[live] = (Arg1); \ + reg[live+1] = (Arg2); \ + result = (Func)(c_p, reg, live); \ + r(0) = reg[0]; \ + SWAPIN; \ + ERTS_HOLE_CHECK(c_p); \ + if (is_value(result)) { \ + StoreBifResult(4,result); \ + } \ + goto lb_Cl_error; \ + } while(0) + + OpCase(i_plus_jIxxd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + Sint i = signed_val(xb(Arg(2))) + signed_val(xb(Arg(3))); + ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); + if (MY_IS_SSMALL(i)) { + result = make_small(i); + StoreBifResult(4, result); + } + } + DO_BIG_ARITH(ARITH_FUNC(mixed_plus), xb(Arg(2)), xb(Arg(3))); + } + OpCase(i_plus_jId): { Eterm result; @@ -1405,12 +1437,26 @@ void process_main(void) result = make_small(i); STORE_ARITH_RESULT(result); } - } arith_func = ARITH_FUNC(mixed_plus); goto do_big_arith2; } + OpCase(i_minus_jIxxd): + { + Eterm result; + + if (is_both_small(xb(Arg(2)), xb(Arg(3)))) { + Sint i = signed_val(xb(Arg(2))) - signed_val(xb(Arg(3))); + ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i)); + if (MY_IS_SSMALL(i)) { + result = make_small(i); + StoreBifResult(4, result); + } + } + DO_BIG_ARITH(ARITH_FUNC(mixed_minus), xb(Arg(2)), xb(Arg(3))); + } + OpCase(i_minus_jId): { Eterm result; @@ -2885,23 +2931,6 @@ do { \ goto do_big_arith2; } -#define DO_BIG_ARITH(Func,Arg1,Arg2) \ - do { \ - Uint live = Arg(1); \ - SWAPOUT; \ - reg[0] = r(0); \ - reg[live] = (Arg1); \ - reg[live+1] = (Arg2); \ - result = (Func)(c_p, reg, live); \ - r(0) = reg[0]; \ - SWAPIN; \ - ERTS_HOLE_CHECK(c_p); \ - if (is_value(result)) { \ - StoreBifResult(4,result); \ - } \ - goto lb_Cl_error; \ - } while(0) - OpCase(i_rem_jIxxd): { Eterm result; @@ -2944,8 +2973,6 @@ do { \ DO_BIG_ARITH(ARITH_FUNC(band),xb(Arg(2)),Arg(3)); } -#undef DO_BIG_ARITH - OpCase(i_band_jId): { Eterm result; @@ -2961,6 +2988,8 @@ do { \ goto do_big_arith2; } +#undef DO_BIG_ARITH + do_big_arith2: { Eterm result; diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 22c7f14f0c..ece038131e 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -1660,7 +1660,9 @@ gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \ # GCing arithmetic instructions. # +gc_bif2 Fail I u$bif:erlang:splus/2 S1=x S2=x Dst=d => i_plus Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst +gc_bif2 Fail I u$bif:erlang:sminus/2 S1=x S2=x Dst=d => i_minus Fail I S1 S2 Dst gc_bif2 Fail I u$bif:erlang:sminus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_minus Fail I Dst gc_bif2 Fail I u$bif:erlang:stimes/2 S1 S2 Dst=d => i_fetch S1 S2 | i_times Fail I Dst gc_bif2 Fail I u$bif:erlang:div/2 S1 S2 Dst=d => i_fetch S1 S2 | i_m_div Fail I Dst @@ -1686,7 +1688,9 @@ i_increment r I I d i_increment x I I d i_increment y I I d +i_plus j I x x d i_plus j I d +i_minus j I x x d i_minus j I d i_times j I d i_m_div j I d -- cgit v1.2.3