From 93fde6744a0c94c2d31f99cb1f9019ff6e98f83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 29 Nov 2017 07:41:23 +0100 Subject: Clean up collection of basic blocks --- lib/compiler/src/v3_codegen.erl | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 535c0679d8..2f81910e59 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -679,28 +679,26 @@ basic_block([Ke|Kes], Acc) -> no_block -> {reverse(Acc, [Ke]),Kes} end. -%% #k_put{} instructions that may garbage collect are not allowed in basic blocks. - -collect_block(#k_put{arg=#k_binary{}}) -> - no_block; -collect_block(#k_put{arg=#k_map{}}) -> - no_block; -collect_block(#k_put{}) -> - include; -collect_block(#k_call{op=#k_var{}=Var,args=As}) -> - {block_end,As++[Var]}; +collect_block(#k_put{arg=Arg}) -> + %% #k_put{} instructions that may garbage collect are not allowed + %% in basic blocks. + case Arg of + #k_binary{} -> no_block; + #k_map{} -> no_block; + _ -> include + end; collect_block(#k_call{op=Func,args=As}) -> {block_end,As++func_vars(Func)}; -collect_block(#k_enter{op=#k_var{}=Var,args=As}) -> - {block_end,As++[Var]}; collect_block(#k_enter{op=Func,args=As}) -> {block_end,As++func_vars(Func)}; collect_block(#k_return{args=Rs}) -> {block_end,Rs}; collect_block(#k_break{args=Bs}) -> {block_end,Bs}; -collect_block(_) -> no_block. +collect_block(_) -> no_block. +func_vars(#k_var{}=Var) -> + [Var]; func_vars(#k_remote{mod=M,name=F}) when is_record(M, k_var); is_record(F, k_var) -> [M,F]; -- cgit v1.2.3 From 67fd015394185302f769378c2c5e47bddbdc22ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Wed, 29 Nov 2017 15:13:05 +0100 Subject: Stop trying to maximize the use of x(0) X register 0 used to be mapped to a hardware register, and therefore faster than the other registers. Because of that, the compiler tried to use x(0) as much as possible as a temporary register. That was changed a few releases ago. X register 0 is now placed in the array of all X registers and has no special speed advantage compared to the other registers. Remove the code in the compiler that attempts to use x(0) as much as possible. As a result, the following type of instruction will be much less frequent: {put_list,Src,{x,0},{x,0}} Instead, the following type of instruction will be more frequent: {put_list,Src,{x,X},{x,X}} (Where X is an arbitrary X register.) Update the runtime system to specialize that kind of put_list instruction. --- lib/compiler/src/v3_codegen.erl | 47 ++++++++++------------------------------- 1 file changed, 11 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 2f81910e59..8edc85bb36 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -711,23 +711,21 @@ cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) -> Res = make_reservation(As, 0), Regs0 = reserve(Res, Bef#sr.reg, Bef#sr.stk), Stk = extend_stack(Bef, Lf, Lf+1, Vdb), - Int0 = Bef#sr{reg=Regs0,stk=Stk,res=Res}, - X0_v0 = x0_vars(As, Fb, Lf, Vdb), - {Keis,{Aft,_,St1}} = + Int = Bef#sr{reg=Regs0,stk=Stk,res=Res}, + {Keis,{Aft,St1}} = flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end, - {Int0,X0_v0,St0}, need_heap(Kes, Fb)), + {Int,St0}, need_heap(Kes, Fb)), {Keis,Aft,St1}. -cg_basic_block(#cg_need_heap{}=Ke, {Inta,X0v,Sta}, _Lf, Vdb) -> - {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta), - {Keis, {Intb,X0v,Stb}}; -cg_basic_block(Ke, {Inta,X0_v1,Sta}, Lf, Vdb) -> +cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) -> + {Keis,Aft,St1} = cg(Ke, Vdb, Bef, St0), + {Keis,{Aft,St1}}; +cg_basic_block(Ke, {Bef,St0}, Lf, Vdb) -> #l{i=I} = get_kanno(Ke), - {Sis,Intb} = save_carefully(Inta, I, Lf+1, Vdb), - {X0_v2,Intc} = allocate_x0(X0_v1, I, Intb), - Intd = reserve(Intc), - {Keis,Inte,Stb} = cg(Ke, Vdb, Intd, Sta), - {Sis ++ Keis, {Inte,X0_v2,Stb}}. + {Sis,Int0} = save_carefully(Bef, I, Lf+1, Vdb), + Int1 = reserve(Int0), + {Keis,Aft,St1} = cg(Ke, Vdb, Int1, St0), + {Sis ++ Keis,{Aft,St1}}. make_reservation([], _) -> []; make_reservation([#k_var{name=V}|As], I) -> [{I,V}|make_reservation(As, I+1)]; @@ -778,29 +776,6 @@ save_carefully([V|Vs], Bef, Acc) -> save_carefully(Vs, Bef#sr{stk=Stk1}, [Move|Acc]) end. -x0_vars([], _Fb, _Lf, _Vdb) -> []; -x0_vars([#k_var{name=V}|_], Fb, _Lf, Vdb) -> - {V,F,_L} = VFL = vdb_find(V, Vdb), - x0_vars1([VFL], Fb, F, Vdb); -x0_vars([X0|_], Fb, Lf, Vdb) -> - x0_vars1([{X0,Lf,Lf}], Fb, Lf, Vdb). - -x0_vars1(X0, Fb, Xf, Vdb) -> - Vs0 = [VFL || {_V,F,L}=VFL <- Vdb, - F >= Fb, - L < Xf], - Vs1 = keysort(3, Vs0), - keysort(2, X0++Vs1). - -allocate_x0([], _, Bef) -> {[],Bef#sr{res=[]}}; -allocate_x0([{_,_,L}|Vs], I, Bef) when L =< I -> - allocate_x0(Vs, I, Bef); -allocate_x0([{V,_F,_L}=VFL|Vs], _, Bef) -> - {[VFL|Vs],Bef#sr{res=reserve_x0(V, Bef#sr.res)}}. - -reserve_x0(V, [_|Res]) -> [{0,V}|Res]; -reserve_x0(V, []) -> [{0,V}]. - top_level_block(Keis, #sr{stk=[]}, _MaxRegs, #cg{need_frame=false}) -> Keis; top_level_block(Keis, Bef, MaxRegs, _St) -> -- cgit v1.2.3 From 924895c37df31913077d16c6baabfc70777f1b7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 30 Nov 2017 06:00:00 +0100 Subject: Clean up and comment code generation for basic blocks --- lib/compiler/src/beam_asm.erl | 2 +- lib/compiler/src/v3_codegen.erl | 193 +++++++++++++++++++++++++++------------- 2 files changed, 130 insertions(+), 65 deletions(-) (limited to 'lib') diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl index 3dff51d7f6..453e00fce3 100644 --- a/lib/compiler/src/beam_asm.erl +++ b/lib/compiler/src/beam_asm.erl @@ -24,7 +24,7 @@ -export([module/4]). -export([encode/2]). --export_type([fail/0,label/0,reg/0,src/0,module_code/0,function_name/0]). +-export_type([fail/0,label/0,reg/0,reg_num/0,src/0,module_code/0,function_name/0]). -import(lists, [map/2,member/2,keymember/3,duplicate/2,splitwith/2]). -include("beam_opcodes.hrl"). diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl index 8edc85bb36..b222b25d7c 100644 --- a/lib/compiler/src/v3_codegen.erl +++ b/lib/compiler/src/v3_codegen.erl @@ -51,7 +51,7 @@ set_kanno(Kthing, Anno) -> setelement(2, Kthing, Anno). %% Stack/register state record. -record(sr, {reg=[], %Register table stk=[], %Stack table - res=[]}). %Reserved regs: [{reserved,I,V}] + res=[]}). %Registers to reserve %% Internal records. -record(cg_need_heap, {anno=[] :: term(), @@ -704,14 +704,41 @@ func_vars(#k_remote{mod=M,name=F}) [M,F]; func_vars(_) -> []. -%% cg_basic_block([Kexpr], FirstI, LastI, As, Vdb, StackReg, State) -> +%% cg_basic_block([Kexpr], FirstI, LastI, Arguments, Vdb, StackReg, State) -> %% {[Ainstr],StackReg,State}. +%% +%% Do a specialized code generation for a basic block of #put{} +%% instructions (that don't do any garbage collection) followed by a +%% call, break, or return. +%% +%% 'Arguments' is a list of the variables that must be loaded into +%% consecutive X registers before the last instruction in the block. +%% The point of this specialized code generation is to try put the +%% all of the variables in 'Arguments' into the correct X register +%% to begin with, instead of putting them into the first available +%% X register and having to move them to the correct X register +%% later. +%% +%% To achieve that, we attempt to reserve the X registers that the +%% variables in 'Arguments' will need to be in when the block ends. +%% +%% To make it more likely that reservations will be successful, we +%% will try to save variables that need to be saved to the stack as +%% early as possible (if an X register needed by a variable in +%% Arguments is occupied by another variable, the value in the +%% X register can be evicted if it is saved on the stack). +%% +%% We will take care not to increase the size of stack frame compared +%% to what the standard code generator would have done (that is, to +%% save all X registers at the last possible moment). We will do that +%% by extending the stack frame to the minimal size needed to save +%% all that needs to be saved using extend_stack/4, and use +%% save_carefully/4 during code generation to only save the variables +%% that can be saved without growing the stack frame. cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) -> - Res = make_reservation(As, 0), - Regs0 = reserve(Res, Bef#sr.reg, Bef#sr.stk), - Stk = extend_stack(Bef, Lf, Lf+1, Vdb), - Int = Bef#sr{reg=Regs0,stk=Stk,res=Res}, + Int0 = reserve_arg_regs(As, Bef), + Int = extend_stack(Int0, Lf, Lf+1, Vdb), {Keis,{Aft,St1}} = flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end, {Int,St0}, need_heap(Kes, Fb)), @@ -722,59 +749,88 @@ cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) -> {Keis,{Aft,St1}}; cg_basic_block(Ke, {Bef,St0}, Lf, Vdb) -> #l{i=I} = get_kanno(Ke), + + %% Save all we can to increase the possibility that reserving + %% registers will succeed. {Sis,Int0} = save_carefully(Bef, I, Lf+1, Vdb), Int1 = reserve(Int0), {Keis,Aft,St1} = cg(Ke, Vdb, Int1, St0), {Sis ++ Keis,{Aft,St1}}. -make_reservation([], _) -> []; -make_reservation([#k_var{name=V}|As], I) -> [{I,V}|make_reservation(As, I+1)]; -make_reservation([A|As], I) -> [{I,A}|make_reservation(As, I+1)]. +%% reserve_arg_regs([Argument], Bef) -> Aft. +%% Try to reserve the X registers for all arguments. All registers +%% that we wish to reserve will be saved in Bef#sr.res. + +reserve_arg_regs(As, Bef) -> + Res = reserve_arg_regs_1(As, 0), + reserve(Bef#sr{res=Res}). + +reserve_arg_regs_1([#k_var{name=V}|As], I) -> + [{I,V}|reserve_arg_regs_1(As, I+1)]; +reserve_arg_regs_1([A|As], I) -> + [{I,A}|reserve_arg_regs_1(As, I+1)]; +reserve_arg_regs_1([], _) -> []. + +%% reserve(Bef) -> Aft. +%% Try to reserve more registers. The registers we wish to reserve +%% are found in Bef#sr.res. -reserve(Sr) -> Sr#sr{reg=reserve(Sr#sr.res, Sr#sr.reg, Sr#sr.stk)}. +reserve(#sr{reg=Regs,stk=Stk,res=Res}=Sr) -> + Sr#sr{reg=reserve_1(Res, Regs, Stk)}. -reserve([{I,V}|Rs], [free|Regs], Stk) -> [{reserved,I,V}|reserve(Rs, Regs, Stk)]; -reserve([{I,V}|Rs], [{I,V}|Regs], Stk) -> [{I,V}|reserve(Rs, Regs, Stk)]; -reserve([{I,V}|Rs], [{I,Var}|Regs], Stk) -> +reserve_1([{I,V}|Rs], [free|Regs], Stk) -> + [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; +reserve_1([{I,V}|Rs], [{I,V}|Regs], Stk) -> + [{I,V}|reserve_1(Rs, Regs, Stk)]; +reserve_1([{I,V}|Rs], [{I,Var}|Regs], Stk) -> case on_stack(Var, Stk) of - true -> [{reserved,I,V}|reserve(Rs, Regs, Stk)]; - false -> [{I,Var}|reserve(Rs, Regs, Stk)] + true -> [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; + false -> [{I,Var}|reserve_1(Rs, Regs, Stk)] end; -reserve([{I,V}|Rs], [{reserved,I,_}|Regs], Stk) -> - [{reserved,I,V}|reserve(Rs, Regs, Stk)]; -%reserve([{I,V}|Rs], [Other|Regs], Stk) -> [Other|reserve(Rs, Regs, Stk)]; -reserve([{I,V}|Rs], [], Stk) -> [{reserved,I,V}|reserve(Rs, [], Stk)]; -reserve([], Regs, _) -> Regs. - -extend_stack(Bef, Fb, Lf, Vdb) -> - Stk0 = clear_dead_stk(Bef#sr.stk, Fb, Vdb), - Saves = [V || {V,F,L} <- Vdb, - F < Fb, - L >= Lf, - not on_stack(V, Stk0)], - Stk1 = foldl(fun (V, Stk) -> put_stack(V, Stk) end, Stk0, Saves), - Bef#sr.stk ++ lists:duplicate(length(Stk1) - length(Bef#sr.stk), free). - -save_carefully(Bef, Fb, Lf, Vdb) -> - Stk = Bef#sr.stk, - %% New variables that are in use but not on stack. - New = [VFL || {V,F,L} = VFL <- Vdb, - F < Fb, - L >= Lf, - not on_stack(V, Stk)], - Saves = [V || {V,_,_} <- keysort(2, New)], - save_carefully(Saves, Bef, []). - -save_carefully([], Bef, Acc) -> {reverse(Acc),Bef}; -save_carefully([V|Vs], Bef, Acc) -> - case put_stack_carefully(V, Bef#sr.stk) of - error -> {reverse(Acc),Bef}; +reserve_1([{I,V}|Rs], [{reserved,I,_}|Regs], Stk) -> + [{reserved,I,V}|reserve_1(Rs, Regs, Stk)]; +reserve_1([{I,V}|Rs], [], Stk) -> + [{reserved,I,V}|reserve_1(Rs, [], Stk)]; +reserve_1([], Regs, _) -> Regs. + +%% extend_stack(Bef, FirstBefore, LastFrom, Vdb) -> Aft. +%% Extend the stack enough to fit all variables alive past LastFrom +%% and not already on the stack. + +extend_stack(#sr{stk=Stk0}=Bef, Fb, Lf, Vdb) -> + Stk1 = clear_dead_stk(Stk0, Fb, Vdb), + New = new_not_on_stack(Stk1, Fb, Lf, Vdb), + Stk2 = foldl(fun ({V,_,_}, Stk) -> put_stack(V, Stk) end, Stk1, New), + Stk = Stk0 ++ lists:duplicate(length(Stk2) - length(Stk0), free), + Bef#sr{stk=Stk}. + +%% save_carefully(Bef, FirstBefore, LastFrom, Vdb) -> {[SaveVar],Aft}. +%% Save variables which are used past current point and which are not +%% already on the stack, but only if the variables can be saved without +%% growing the stack frame. + +save_carefully(#sr{stk=Stk}=Bef, Fb, Lf, Vdb) -> + New0 = new_not_on_stack(Stk, Fb, Lf, Vdb), + New = keysort(2, New0), + save_carefully_1(New, Bef, []). + +save_carefully_1([{V,_,_}|Vs], #sr{reg=Regs,stk=Stk0}=Bef, Acc) -> + case put_stack_carefully(V, Stk0) of + error -> + {reverse(Acc),Bef}; Stk1 -> - SrcReg = fetch_reg(V, Bef#sr.reg), + SrcReg = fetch_reg(V, Regs), Move = {move,SrcReg,fetch_stack(V, Stk1)}, {x,_} = SrcReg, %Assertion - must be X register. - save_carefully(Vs, Bef#sr{stk=Stk1}, [Move|Acc]) - end. + save_carefully_1(Vs, Bef#sr{stk=Stk1}, [Move|Acc]) + end; +save_carefully_1([], Bef, Acc) -> + {reverse(Acc),Bef}. + +%% top_level_block([Instruction], Bef, MaxRegs, St) -> [Instruction]. +%% For the top-level block, allocate a stack frame a necessary, +%% adjust Y register numbering and instructions that return +%% from the function. top_level_block(Keis, #sr{stk=[]}, _MaxRegs, #cg{need_frame=false}) -> Keis; @@ -2338,21 +2394,21 @@ break_up_cycle1(Dst, [M|Path], LastMove) -> %% clear_dead(Sr, Until, Vdb) -> Aft. %% Remove all variables in Sr which have died AT ALL so far. -clear_dead(Sr, Until, Vdb) -> - Sr#sr{reg=clear_dead_reg(Sr, Until, Vdb), - stk=clear_dead_stk(Sr#sr.stk, Until, Vdb)}. +clear_dead(#sr{stk=Stk}=Sr0, Until, Vdb) -> + Sr = Sr0#sr{reg=clear_dead_reg(Sr0, Until, Vdb), + stk=clear_dead_stk(Stk, Until, Vdb)}, + reserve(Sr). clear_dead_reg(Sr, Until, Vdb) -> - Reg = [case R of - {_I,V} = IV -> - case vdb_find(V, Vdb) of - {V,_,L} when L > Until -> IV; - _ -> free %Remove anything else - end; - {reserved,_I,_V} = Reserved -> Reserved; - free -> free - end || R <- Sr#sr.reg], - reserve(Sr#sr.res, Reg, Sr#sr.stk). + [case R of + {_I,V} = IV -> + case vdb_find(V, Vdb) of + {V,_,L} when L > Until -> IV; + _ -> free %Remove anything else + end; + {reserved,_I,_V}=Reserved -> Reserved; + free -> free + end || R <- Sr#sr.reg]. clear_dead_stk(Stk, Until, Vdb) -> [case S of @@ -2424,16 +2480,25 @@ adjust_stack(Bef, Fb, Lf, Vdb) -> save_stack(Stk0, Fb, Lf, Vdb) -> %% New variables that are in use but not on stack. - New = [VFL || {V,F,L} = VFL <- Vdb, - F < Fb, - L >= Lf, - not on_stack(V, Stk0)], + New = new_not_on_stack(Stk0, Fb, Lf, Vdb), + %% Add new variables that are not just dropped immediately. %% N.B. foldr works backwards from the end!! Saves = [V || {V,_,_} <- keysort(3, New)], Stk1 = foldr(fun (V, Stk) -> put_stack(V, Stk) end, Stk0, Saves), {Stk1,Saves}. +%% new_not_on_stack(Stack, FirstBefore, LastFrom, Vdb) -> +%% [{Variable,First,Last}] +%% Return information about all variables that are used past current +%% point and that are not already on the stack. + +new_not_on_stack(Stk, Fb, Lf, Vdb) -> + [VFL || {V,F,L} = VFL <- Vdb, + F < Fb, + L >= Lf, + not on_stack(V, Stk)]. + %% saves([SaveVar], Reg, Stk) -> [{move,Reg,Stk}]. %% Generate move instructions to save variables onto stack. The %% stack/reg info used is that after the new stack has been made. -- cgit v1.2.3 From e4b486d24fcc5029b26fe576b9e373fc02b9098a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gustavsson?= Date: Thu, 30 Nov 2017 15:32:05 +0100 Subject: bs_match_SUITE: Cover more clauses in v3_codegen:bs_rename_ctx/4 --- lib/compiler/test/bs_match_SUITE.erl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/compiler/test/bs_match_SUITE.erl b/lib/compiler/test/bs_match_SUITE.erl index e6fa80e143..39f9b5d063 100644 --- a/lib/compiler/test/bs_match_SUITE.erl +++ b/lib/compiler/test/bs_match_SUITE.erl @@ -40,7 +40,7 @@ map_and_binary/1,unsafe_branch_caching/1, bad_literals/1,good_literals/1,constant_propagation/1, parse_xml/1,get_payload/1,escape/1,num_slots_different/1, - check_bitstring_list/1]). + check_bitstring_list/1,guard/1]). -export([coverage_id/1,coverage_external_ignore/2]). @@ -73,7 +73,7 @@ groups() -> map_and_binary,unsafe_branch_caching, bad_literals,good_literals,constant_propagation,parse_xml, get_payload,escape,num_slots_different, - check_bitstring_list]}]. + check_bitstring_list,guard]}]. init_per_suite(Config) -> @@ -1587,6 +1587,18 @@ check_bitstring_list(<<>>, []) -> check_bitstring_list(_, _) -> false. +guard(_Config) -> + Tuple = id({a,b}), + ok = guard_1(<<1,2,3>>, {1,2,3}), + + ok. + +%% Cover handling of #k_put{} in v3_codegen:bsm_rename_ctx/4. + +guard_1(<>, Tuple) when Tuple =:= {A,B,C} -> + ok. + + check(F, R) -> R = F(). -- cgit v1.2.3