aboutsummaryrefslogtreecommitdiffstats
path: root/erts/emulator/beam
diff options
context:
space:
mode:
authorBjörn-Egil Dahlberg <[email protected]>2015-04-28 16:38:19 +0200
committerBjörn-Egil Dahlberg <[email protected]>2015-04-28 16:38:19 +0200
commit3ce2fe4c0e88da5ff8f50624f133e1fee9726473 (patch)
tree966ca8764a1d617eb23776594cbfd0a9727236f1 /erts/emulator/beam
parent22173c2e4d65e47039975b91015560ae4256b3c1 (diff)
parent979d05fc326d0f6cf7c1c4a5182bb33942852dd7 (diff)
downloadotp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.tar.gz
otp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.tar.bz2
otp-3ce2fe4c0e88da5ff8f50624f133e1fee9726473.zip
Merge branch 'egil/opt-instructions/OTP-12690'
* egil/opt-instructions/OTP-12690: erts: Specialize minus and plus instruction erts: Add move2 specialization for common move patterns erts: Specialize rem instruction for common case erts: Specialize band instruction for common case erts: Batch loads and stores for move_window erts: Fix loader increment from minus instruction erts: Add move window instruction erts: Add instruction move3 for xy and xx erts: Specialize compare instructions kernel: Add instruction_count helper to erts_debug
Diffstat (limited to 'erts/emulator/beam')
-rw-r--r--erts/emulator/beam/beam_emu.c131
-rw-r--r--erts/emulator/beam/beam_load.c6
-rw-r--r--erts/emulator/beam/ops.tab110
3 files changed, 241 insertions, 6 deletions
diff --git a/erts/emulator/beam/beam_emu.c b/erts/emulator/beam/beam_emu.c
index fce4fd498a..21faf8fbf2 100644
--- a/erts/emulator/beam/beam_emu.c
+++ b/erts/emulator/beam/beam_emu.c
@@ -562,7 +562,8 @@ void** beam_ops;
Store(term, Dst); \
} while (0)
-#define Move2(src1, dst1, src2, dst2) dst1 = (src1); dst2 = (src2)
+#define Move2(S1, D1, S2, D2) D1 = (S1); D2 = (S2)
+#define Move3(S1, D1, S2, D2, S3, D3) D1 = (S1); D2 = (S2); D3 = (S3)
#define MoveGenDest(src, dstp) \
if ((dstp) == NULL) { r(0) = (src); } else { *(dstp) = src; }
@@ -662,6 +663,9 @@ void** beam_ops;
#define EqualImmed(X, Y, Action) if (X != Y) { Action; }
#define NotEqualImmed(X, Y, Action) if (X == Y) { Action; }
+#define EqualExact(X, Y, Action) if (!EQ(X,Y)) { Action; }
+#define IsLessThan(X, Y, Action) if (CMP_GE(X, Y)) { Action; }
+#define IsGreaterEqual(X, Y, Action) if (CMP_LT(X, Y)) { Action; }
#define IsFloat(Src, Fail) if (is_not_float(Src)) { Fail; }
@@ -1389,7 +1393,39 @@ void process_main(void)
ASSERT(c_p->freason != BADMATCH || is_value(c_p->fvalue));
goto find_func_info;
}
-
+
+#define DO_BIG_ARITH(Func,Arg1,Arg2) \
+ do { \
+ Uint live = Arg(1); \
+ SWAPOUT; \
+ reg[0] = r(0); \
+ reg[live] = (Arg1); \
+ reg[live+1] = (Arg2); \
+ result = (Func)(c_p, reg, live); \
+ r(0) = reg[0]; \
+ SWAPIN; \
+ ERTS_HOLE_CHECK(c_p); \
+ if (is_value(result)) { \
+ StoreBifResult(4,result); \
+ } \
+ goto lb_Cl_error; \
+ } while(0)
+
+ OpCase(i_plus_jIxxd):
+ {
+ Eterm result;
+
+ if (is_both_small(xb(Arg(2)), xb(Arg(3)))) {
+ Sint i = signed_val(xb(Arg(2))) + signed_val(xb(Arg(3)));
+ ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i));
+ if (MY_IS_SSMALL(i)) {
+ result = make_small(i);
+ StoreBifResult(4, result);
+ }
+ }
+ DO_BIG_ARITH(ARITH_FUNC(mixed_plus), xb(Arg(2)), xb(Arg(3)));
+ }
+
OpCase(i_plus_jId):
{
Eterm result;
@@ -1401,12 +1437,26 @@ void process_main(void)
result = make_small(i);
STORE_ARITH_RESULT(result);
}
-
}
arith_func = ARITH_FUNC(mixed_plus);
goto do_big_arith2;
}
+ OpCase(i_minus_jIxxd):
+ {
+ Eterm result;
+
+ if (is_both_small(xb(Arg(2)), xb(Arg(3)))) {
+ Sint i = signed_val(xb(Arg(2))) - signed_val(xb(Arg(3)));
+ ASSERT(MY_IS_SSMALL(i) == IS_SSMALL(i));
+ if (MY_IS_SSMALL(i)) {
+ result = make_small(i);
+ StoreBifResult(4, result);
+ }
+ }
+ DO_BIG_ARITH(ARITH_FUNC(mixed_minus), xb(Arg(2)), xb(Arg(3)));
+ }
+
OpCase(i_minus_jId):
{
Eterm result;
@@ -1499,6 +1549,52 @@ void process_main(void)
Next(2);
}
+ OpCase(move_window3_xxxy): {
+ BeamInstr *next;
+ Eterm xt0, xt1, xt2;
+ Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(3)));
+ PreFetch(4, next);
+ xt0 = xb(Arg(0));
+ xt1 = xb(Arg(1));
+ xt2 = xb(Arg(2));
+ y[0] = xt0;
+ y[1] = xt1;
+ y[2] = xt2;
+ NextPF(4, next);
+ }
+ OpCase(move_window4_xxxxy): {
+ BeamInstr *next;
+ Eterm xt0, xt1, xt2, xt3;
+ Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(4)));
+ PreFetch(5, next);
+ xt0 = xb(Arg(0));
+ xt1 = xb(Arg(1));
+ xt2 = xb(Arg(2));
+ xt3 = xb(Arg(3));
+ y[0] = xt0;
+ y[1] = xt1;
+ y[2] = xt2;
+ y[3] = xt3;
+ NextPF(5, next);
+ }
+ OpCase(move_window5_xxxxxy): {
+ BeamInstr *next;
+ Eterm xt0, xt1, xt2, xt3, xt4;
+ Eterm *y = (Eterm *)(((unsigned char *)E) + (Arg(5)));
+ PreFetch(6, next);
+ xt0 = xb(Arg(0));
+ xt1 = xb(Arg(1));
+ xt2 = xb(Arg(2));
+ xt3 = xb(Arg(3));
+ xt4 = xb(Arg(4));
+ y[0] = xt0;
+ y[1] = xt1;
+ y[2] = xt2;
+ y[3] = xt3;
+ y[4] = xt4;
+ NextPF(6, next);
+ }
+
OpCase(i_move_call_only_fcr): {
r(0) = Arg(1);
}
@@ -2835,6 +2931,19 @@ do { \
goto do_big_arith2;
}
+ OpCase(i_rem_jIxxd):
+ {
+ Eterm result;
+
+ if (xb(Arg(3)) == SMALL_ZERO) {
+ goto badarith;
+ } else if (is_both_small(xb(Arg(2)), xb(Arg(3)))) {
+ result = make_small(signed_val(xb(Arg(2))) % signed_val(xb(Arg(3))));
+ StoreBifResult(4, result);
+ }
+ DO_BIG_ARITH(ARITH_FUNC(int_rem),xb(Arg(2)),xb(Arg(3)));
+ }
+
OpCase(i_rem_jId):
{
Eterm result;
@@ -2850,6 +2959,20 @@ do { \
}
}
+ OpCase(i_band_jIxcd):
+ {
+ Eterm result;
+
+ if (is_both_small(xb(Arg(2)), Arg(3))) {
+ /*
+ * No need to untag -- TAG & TAG == TAG.
+ */
+ result = xb(Arg(2)) & Arg(3);
+ StoreBifResult(4, result);
+ }
+ DO_BIG_ARITH(ARITH_FUNC(band),xb(Arg(2)),Arg(3));
+ }
+
OpCase(i_band_jId):
{
Eterm result;
@@ -2865,6 +2988,8 @@ do { \
goto do_big_arith2;
}
+#undef DO_BIG_ARITH
+
do_big_arith2:
{
Eterm result;
diff --git a/erts/emulator/beam/beam_load.c b/erts/emulator/beam/beam_load.c
index 8d7beb4eb4..282aa71109 100644
--- a/erts/emulator/beam/beam_load.c
+++ b/erts/emulator/beam/beam_load.c
@@ -3172,7 +3172,11 @@ gen_increment_from_minus(LoaderState* stp, GenOpArg Reg, GenOpArg Integer,
static int
negation_is_small(LoaderState* stp, GenOpArg Int)
{
- return Int.type == TAG_i && IS_SSMALL(-Int.val);
+ /* Check for the rare case of overflow in BeamInstr (UWord) -> Sint
+ * Cast to the correct type before using IS_SSMALL (Sint) */
+ return Int.type == TAG_i &&
+ !(Int.val & ~((((BeamInstr)1) << ((sizeof(Sint)*8)-1))-1)) &&
+ IS_SSMALL(-((Sint)Int.val));
}
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 9bdc9cb88d..ece038131e 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -298,10 +298,49 @@ move_jump f c
move_jump f x
move_jump f y
+
+# Movement to and from the stack is common
+# Try to pack as much as we can into one instruction
+
+# Window move
+move_window/5
+move_window/6
+
+# x -> y
+
+move S1=r S2=y | move X1=x Y1=y => move2 S1 S2 X1 Y1
+
+move X1=x Y1=y | move X2=x Y2=y | move X3=x Y3=y | succ(Y1,Y2) | succ(Y2,Y3) => \
+ move_window X1 X2 X3 Y1 Y3
+
+move_window X1=x X2=x X3=x Y1=y Y3=y | move X4=x Y4=y | succ(Y3,Y4) => \
+ move_window X1 X2 X3 X4 Y1 Y4
+
+move_window X1=x X2=x X3=x X4=x Y1=y Y4=y | move X5=x Y5=y | succ(Y4,Y5) => \
+ move_window5 X1 X2 X3 X4 X5 Y1
+
+move_window X1=x X2=x X3=x Y1=y Y3=y => move_window3 X1 X2 X3 Y1
+move_window X1=x X2=x X3=x X4=x Y1=y Y4=y => move_window4 X1 X2 X3 X4 Y1
+
+move_window3 x x x y
+move_window4 x x x x y
+move_window5 x x x x x y
+
move X1=x Y1=y | move X2=x Y2=y => move2 X1 Y1 X2 Y2
move Y1=y X1=x | move Y2=y X2=x => move2 Y1 X1 Y2 X2
move X1=x X2=x | move X3=x X4=x => move2 X1 X2 X3 X4
+move S1=x S2=r | move S3=x S4=x => move2 S1 S2 S3 S4
+move S1=x S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1
+move S1=y S2=r | move X1=x Y1=y => move2 S1 S2 X1 Y1
+
+move Y1=y X1=x | move S1=r D1=x => move2 Y1 X1 S1 D1
+move S1=r D1=x | move Y1=y X1=x => move2 S1 D1 Y1 X1
+
+move2 X1=x Y1=y X2=x Y2=y | move X3=x Y3=y => move3 X1 Y1 X2 Y2 X3 Y3
+move2 Y1=y X1=x Y2=y X2=x | move Y3=y X3=x => move3 Y1 X1 Y2 X2 Y3 X3
+move2 X1=x X2=x X3=x X4=x | move X5=x X6=x => move3 X1 X2 X3 X4 X5 X6
+
move C=aiq X=x==1 => move_x1 C
move C=aiq X=x==2 => move_x2 C
@@ -313,6 +352,20 @@ move2 x y x y
move2 y x y x
move2 x x x x
+move2 x r x x
+
+move2 x r x y
+move2 r y x y
+move2 y r x y
+
+move2 r x y x
+move2 y x r x
+
+%macro: move3 Move3
+move3 x y x y x y
+move3 y x y x y x
+move3 x x x x x x
+
# The compiler almost never generates a "move Literal y(Y)" instruction,
# so let's cheat if we encounter one.
move S=n D=y => init D
@@ -392,14 +445,59 @@ i_is_ne_exact_literal x f c
i_is_ne_exact_literal y f c
#
+# Common Compare Specializations
+# We don't do all of them since we want
+# to keep the instruction set small-ish
+#
+
+is_eq_exact Lbl S1=xy S2=r => is_eq_exact Lbl S2 S1
+is_eq_exact Lbl S1=rx S2=xy => i_is_eq_exact_spec Lbl S1 S2
+%macro: i_is_eq_exact_spec EqualExact -fail_action
+
+i_is_eq_exact_spec f x x
+i_is_eq_exact_spec f x y
+i_is_eq_exact_spec f r x
+i_is_eq_exact_spec f r y
+%cold
+i_is_eq_exact_spec f r r
+%hot
+
+is_lt Lbl S1=rxc S2=rxc => i_is_lt_spec Lbl S1 S2
+
+%macro: i_is_lt_spec IsLessThan -fail_action
+
+i_is_lt_spec f x x
+i_is_lt_spec f x r
+i_is_lt_spec f x c
+i_is_lt_spec f r x
+i_is_lt_spec f r c
+i_is_lt_spec f c x
+i_is_lt_spec f c r
+%cold
+i_is_lt_spec f r r
+i_is_lt_spec f c c
+%hot
+
+is_ge Lbl S1=xc S2=xc => i_is_ge_spec Lbl S1 S2
+
+%macro: i_is_ge_spec IsGreaterEqual -fail_action
+
+i_is_ge_spec f x x
+i_is_ge_spec f x c
+i_is_ge_spec f c x
+%cold
+i_is_ge_spec f c c
+%hot
+
+#
# All other comparisons.
#
is_eq_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_eq_exact Lbl
is_ne_exact Lbl S1 S2 => i_fetch S1 S2 | i_is_ne_exact Lbl
-is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl
is_lt Lbl S1 S2 => i_fetch S1 S2 | i_is_lt Lbl
+is_ge Lbl S1 S2 => i_fetch S1 S2 | i_is_ge Lbl
is_eq Lbl S1 S2 => i_fetch S1 S2 | i_is_eq Lbl
is_ne Lbl S1 S2 => i_fetch S1 S2 | i_is_ne Lbl
@@ -493,7 +591,6 @@ put_list s s d
%hot
%macro: i_fetch FetchArgs -pack
-i_fetch c c
i_fetch c r
i_fetch c x
i_fetch c y
@@ -510,6 +607,7 @@ i_fetch y x
i_fetch y y
%cold
+i_fetch c c
i_fetch s s
%hot
@@ -1562,17 +1660,21 @@ gc_bif2 p Live u$bif:erlang:sminus/2 Reg=d Int=i Dst | \
# GCing arithmetic instructions.
#
+gc_bif2 Fail I u$bif:erlang:splus/2 S1=x S2=x Dst=d => i_plus Fail I S1 S2 Dst
gc_bif2 Fail I u$bif:erlang:splus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_plus Fail I Dst
+gc_bif2 Fail I u$bif:erlang:sminus/2 S1=x S2=x Dst=d => i_minus Fail I S1 S2 Dst
gc_bif2 Fail I u$bif:erlang:sminus/2 S1 S2 Dst=d => i_fetch S1 S2 | i_minus Fail I Dst
gc_bif2 Fail I u$bif:erlang:stimes/2 S1 S2 Dst=d => i_fetch S1 S2 | i_times Fail I Dst
gc_bif2 Fail I u$bif:erlang:div/2 S1 S2 Dst=d => i_fetch S1 S2 | i_m_div Fail I Dst
gc_bif2 Fail I u$bif:erlang:intdiv/2 S1 S2 Dst=d => i_fetch S1 S2 | i_int_div Fail I Dst
+gc_bif2 Fail I u$bif:erlang:rem/2 S1=x S2=x Dst=d => i_rem Fail I S1 S2 Dst
gc_bif2 Fail I u$bif:erlang:rem/2 S1 S2 Dst=d => i_fetch S1 S2 | i_rem Fail I Dst
gc_bif2 Fail I u$bif:erlang:bsl/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsl Fail I Dst
gc_bif2 Fail I u$bif:erlang:bsr/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bsr Fail I Dst
+gc_bif2 Fail I u$bif:erlang:band/2 S1=x S2=c Dst=d => i_band Fail I S1 S2 Dst
gc_bif2 Fail I u$bif:erlang:band/2 S1 S2 Dst=d => i_fetch S1 S2 | i_band Fail I Dst
gc_bif2 Fail I u$bif:erlang:bor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bor Fail I Dst
gc_bif2 Fail I u$bif:erlang:bxor/2 S1 S2 Dst=d => i_fetch S1 S2 | i_bxor Fail I Dst
@@ -1586,16 +1688,20 @@ i_increment r I I d
i_increment x I I d
i_increment y I I d
+i_plus j I x x d
i_plus j I d
+i_minus j I x x d
i_minus j I d
i_times j I d
i_m_div j I d
i_int_div j I d
+i_rem j I x x d
i_rem j I d
i_bsl j I d
i_bsr j I d
+i_band j I x c d
i_band j I d
i_bor j I d
i_bxor j I d