aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjörn Gustavsson <[email protected]>2017-11-29 15:13:05 +0100
committerBjörn Gustavsson <[email protected]>2017-11-30 12:43:22 +0100
commit67fd015394185302f769378c2c5e47bddbdc22ea (patch)
tree51957138935856b761194837d7e0f7b5c67f8485
parent93fde6744a0c94c2d31f99cb1f9019ff6e98f83d (diff)
downloadotp-67fd015394185302f769378c2c5e47bddbdc22ea.tar.gz
otp-67fd015394185302f769378c2c5e47bddbdc22ea.tar.bz2
otp-67fd015394185302f769378c2c5e47bddbdc22ea.zip
Stop trying to maximize the use of x(0)
X register 0 used to be mapped to a hardware register, and therefore faster than the other registers. Because of that, the compiler tried to use x(0) as much as possible as a temporary register. That was changed a few releases ago. X register 0 is now placed in the array of all X registers and has no special speed advantage compared to the other registers. Remove the code in the compiler that attempts to use x(0) as much as possible. As a result, the following type of instruction will be much less frequent: {put_list,Src,{x,0},{x,0}} Instead, the following type of instruction will be more frequent: {put_list,Src,{x,X},{x,X}} (Where X is an arbitrary X register.) Update the runtime system to specialize that kind of put_list instruction.
-rw-r--r--erts/emulator/beam/instrs.tab7
-rw-r--r--erts/emulator/beam/ops.tab7
-rw-r--r--lib/compiler/src/v3_codegen.erl47
3 files changed, 23 insertions, 38 deletions
diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab
index c17d1a8f69..d934abb217 100644
--- a/erts/emulator/beam/instrs.tab
+++ b/erts/emulator/beam/instrs.tab
@@ -542,6 +542,13 @@ put_list(Hd, Tl, Dst) {
HTOP += 2;
}
+update_list(Hd, Dst) {
+ HTOP[0] = $Hd;
+ HTOP[1] = $Dst;
+ $Dst = make_list(HTOP);
+ HTOP += 2;
+}
+
i_put_tuple := i_put_tuple.make.fill;
i_put_tuple.make(Dst) {
diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab
index 3df91056cb..c30af029ce 100644
--- a/erts/emulator/beam/ops.tab
+++ b/erts/emulator/beam/ops.tab
@@ -503,6 +503,10 @@ i_put_tuple xy I
#
put_list Const=c n Dst => move Const x | put_list x n Dst
+put_list Src Dst=x Dst => update_list Src Dst
+
+update_list xyc x
+
put_list x n x
put_list y n x
put_list x x x
@@ -525,8 +529,6 @@ put_list c y x
# The following put_list instructions using x(0) are frequently used.
-put_list y r r
-put_list x r r
put_list r n r
put_list r n x
put_list r x x
@@ -537,6 +539,7 @@ put_list x x r
put_list s s d
%hot
+
#
# Some more only used by the emulator
#
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 2f81910e59..8edc85bb36 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -711,23 +711,21 @@ cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) ->
Res = make_reservation(As, 0),
Regs0 = reserve(Res, Bef#sr.reg, Bef#sr.stk),
Stk = extend_stack(Bef, Lf, Lf+1, Vdb),
- Int0 = Bef#sr{reg=Regs0,stk=Stk,res=Res},
- X0_v0 = x0_vars(As, Fb, Lf, Vdb),
- {Keis,{Aft,_,St1}} =
+ Int = Bef#sr{reg=Regs0,stk=Stk,res=Res},
+ {Keis,{Aft,St1}} =
flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end,
- {Int0,X0_v0,St0}, need_heap(Kes, Fb)),
+ {Int,St0}, need_heap(Kes, Fb)),
{Keis,Aft,St1}.
-cg_basic_block(#cg_need_heap{}=Ke, {Inta,X0v,Sta}, _Lf, Vdb) ->
- {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta),
- {Keis, {Intb,X0v,Stb}};
-cg_basic_block(Ke, {Inta,X0_v1,Sta}, Lf, Vdb) ->
+cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) ->
+ {Keis,Aft,St1} = cg(Ke, Vdb, Bef, St0),
+ {Keis,{Aft,St1}};
+cg_basic_block(Ke, {Bef,St0}, Lf, Vdb) ->
#l{i=I} = get_kanno(Ke),
- {Sis,Intb} = save_carefully(Inta, I, Lf+1, Vdb),
- {X0_v2,Intc} = allocate_x0(X0_v1, I, Intb),
- Intd = reserve(Intc),
- {Keis,Inte,Stb} = cg(Ke, Vdb, Intd, Sta),
- {Sis ++ Keis, {Inte,X0_v2,Stb}}.
+ {Sis,Int0} = save_carefully(Bef, I, Lf+1, Vdb),
+ Int1 = reserve(Int0),
+ {Keis,Aft,St1} = cg(Ke, Vdb, Int1, St0),
+ {Sis ++ Keis,{Aft,St1}}.
make_reservation([], _) -> [];
make_reservation([#k_var{name=V}|As], I) -> [{I,V}|make_reservation(As, I+1)];
@@ -778,29 +776,6 @@ save_carefully([V|Vs], Bef, Acc) ->
save_carefully(Vs, Bef#sr{stk=Stk1}, [Move|Acc])
end.
-x0_vars([], _Fb, _Lf, _Vdb) -> [];
-x0_vars([#k_var{name=V}|_], Fb, _Lf, Vdb) ->
- {V,F,_L} = VFL = vdb_find(V, Vdb),
- x0_vars1([VFL], Fb, F, Vdb);
-x0_vars([X0|_], Fb, Lf, Vdb) ->
- x0_vars1([{X0,Lf,Lf}], Fb, Lf, Vdb).
-
-x0_vars1(X0, Fb, Xf, Vdb) ->
- Vs0 = [VFL || {_V,F,L}=VFL <- Vdb,
- F >= Fb,
- L < Xf],
- Vs1 = keysort(3, Vs0),
- keysort(2, X0++Vs1).
-
-allocate_x0([], _, Bef) -> {[],Bef#sr{res=[]}};
-allocate_x0([{_,_,L}|Vs], I, Bef) when L =< I ->
- allocate_x0(Vs, I, Bef);
-allocate_x0([{V,_F,_L}=VFL|Vs], _, Bef) ->
- {[VFL|Vs],Bef#sr{res=reserve_x0(V, Bef#sr.res)}}.
-
-reserve_x0(V, [_|Res]) -> [{0,V}|Res];
-reserve_x0(V, []) -> [{0,V}].
-
top_level_block(Keis, #sr{stk=[]}, _MaxRegs, #cg{need_frame=false}) ->
Keis;
top_level_block(Keis, Bef, MaxRegs, _St) ->