From 67fd015394185302f769378c2c5e47bddbdc22ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 29 Nov 2017 15:13:05 +0100 Subject: Stop trying to maximize the use of x(0) X register 0 used to be mapped to a hardware register, and therefore faster than the other registers. Because of that, the compiler tried to use x(0) as much as possible as a temporary register. That was changed a few releases ago. X register 0 is now placed in the array of all X registers and has no special speed advantage compared to the other registers. Remove the code in the compiler that attempts to use x(0) as much as possible. As a result, the following type of instruction will be much less frequent: {put_list,Src,{x,0},{x,0}} Instead, the following type of instruction will be more frequent: {put_list,Src,{x,X},{x,X}} (Where X is an arbitrary X register.) Update the runtime system to specialize that kind of put_list instruction. --- erts/emulator/beam/instrs.tab | 7 ++++++ erts/emulator/beam/ops.tab | 7 ++++-- lib/compiler/src/v3_codegen.erl | 47 ++++++++++------------------------------- 3 files changed, 23 insertions(+), 38 deletions(-) diff --git a/erts/emulator/beam/instrs.tab b/erts/emulator/beam/instrs.tab index c17d1a8f69..d934abb217 100644 --- a/erts/emulator/beam/instrs.tab +++ b/erts/emulator/beam/instrs.tab @@ -542,6 +542,13 @@ put_list(Hd, Tl, Dst) { HTOP += 2; } +update_list(Hd, Dst) { + HTOP[0] = $Hd; + HTOP[1] = $Dst; + $Dst = make_list(HTOP); + HTOP += 2; +} + i_put_tuple := i_put_tuple.make.fill; i_put_tuple.make(Dst) { diff --git a/erts/emulator/beam/ops.tab b/erts/emulator/beam/ops.tab index 3df91056cb..c30af029ce 100644 --- a/erts/emulator/beam/ops.tab +++ b/erts/emulator/beam/ops.tab @@ -503,6 +503,10 @@ i_put_tuple xy I # put_list Const=c n Dst => move Const x | put_list x n Dst +put_list Src Dst=x Dst => update_list Src Dst + +update_list xyc x + put_list x n x put_list y n x put_list x x x @@ -525,8 +529,6 @@ put_list c y x # The following put_list instructions using x(0) are frequently used. -put_list y r r -put_list x r r put_list r n r put_list r n x put_list r x x @@ -537,6 +539,7 @@ put_list x x r put_list s s d %hot + # # Some more only used by the emulator # diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 2f81910e59..8edc85bb36 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -711,23 +711,21 @@ cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) -> Res = make_reservation(As, 0), Regs0 = reserve(Res, Bef#sr.reg, Bef#sr.stk), Stk = extend_stack(Bef, Lf, Lf+1, Vdb), - Int0 = Bef#sr{reg=Regs0,stk=Stk,res=Res}, - X0_v0 = x0_vars(As, Fb, Lf, Vdb), - {Keis,{Aft,_,St1}} = + Int = Bef#sr{reg=Regs0,stk=Stk,res=Res}, + {Keis,{Aft,St1}} = flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end, - {Int0,X0_v0,St0}, need_heap(Kes, Fb)), + {Int,St0}, need_heap(Kes, Fb)), {Keis,Aft,St1}. -cg_basic_block(#cg_need_heap{}=Ke, {Inta,X0v,Sta}, _Lf, Vdb) -> - {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta), - {Keis, {Intb,X0v,Stb}}; -cg_basic_block(Ke, {Inta,X0_v1,Sta}, Lf, Vdb) -> +cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) -> + {Keis,Aft,St1} = cg(Ke, Vdb, Bef, St0), + {Keis,{Aft,St1}}; +cg_basic_block(Ke, {Bef,St0}, Lf, Vdb) -> #l{i=I} = get_kanno(Ke), - {Sis,Intb} = save_carefully(Inta, I, Lf+1, Vdb), - {X0_v2,Intc} = allocate_x0(X0_v1, I, Intb), - Intd = reserve(Intc), - {Keis,Inte,Stb} = cg(Ke, Vdb, Intd, Sta), - {Sis ++ Keis, {Inte,X0_v2,Stb}}. + {Sis,Int0} = save_carefully(Bef, I, Lf+1, Vdb), + Int1 = reserve(Int0), + {Keis,Aft,St1} = cg(Ke, Vdb, Int1, St0), + {Sis ++ Keis,{Aft,St1}}. make_reservation([], _) -> []; make_reservation([#k_var{name=V}|As], I) -> [{I,V}|make_reservation(As, I+1)]; @@ -778,29 +776,6 @@ save_carefully([V|Vs], Bef, Acc) -> save_carefully(Vs, Bef#sr{stk=Stk1}, [Move|Acc]) end. -x0_vars([], _Fb, _Lf, _Vdb) -> []; -x0_vars([#k_var{name=V}|_], Fb, _Lf, Vdb) -> - {V,F,_L} = VFL = vdb_find(V, Vdb), - x0_vars1([VFL], Fb, F, Vdb); -x0_vars([X0|_], Fb, Lf, Vdb) -> - x0_vars1([{X0,Lf,Lf}], Fb, Lf, Vdb). - -x0_vars1(X0, Fb, Xf, Vdb) -> - Vs0 = [VFL || {_V,F,L}=VFL <- Vdb, - F >= Fb, - L < Xf], - Vs1 = keysort(3, Vs0), - keysort(2, X0++Vs1). - -allocate_x0([], _, Bef) -> {[],Bef#sr{res=[]}}; -allocate_x0([{_,_,L}|Vs], I, Bef) when L =< I -> - allocate_x0(Vs, I, Bef); -allocate_x0([{V,_F,_L}=VFL|Vs], _, Bef) -> - {[VFL|Vs],Bef#sr{res=reserve_x0(V, Bef#sr.res)}}. - -reserve_x0(V, [_|Res]) -> [{0,V}|Res]; -reserve_x0(V, []) -> [{0,V}]. - top_level_block(Keis, #sr{stk=[]}, _MaxRegs, #cg{need_frame=false}) -> Keis; top_level_block(Keis, Bef, MaxRegs, _St) -> -- cgit v1.2.3