diff options
author | Björn Gustavsson <[email protected]> | 2018-01-11 09:58:29 +0100 |
---|---|---|
committer | GitHub <[email protected]> | 2018-01-11 09:58:29 +0100 |
commit | 64baf77671e6106929ea702a6a51b089e48b3409 (patch) | |
tree | 2c249e9cfefeee5febca6e0e375a121b3ab10954 | |
parent | 24e83b0372c5a18d946711fccd0678a664f9635b (diff) | |
parent | cb6fc15c35c7e588604cb9ee767b79b5e9627d46 (diff) | |
download | otp-64baf77671e6106929ea702a6a51b089e48b3409.tar.gz otp-64baf77671e6106929ea702a6a51b089e48b3409.tar.bz2 otp-64baf77671e6106929ea702a6a51b089e48b3409.zip |
Merge pull request #1661 from bjorng/bjorn/compiler/opt-allocate_zero
Optimize allocation of stack frames
-rw-r--r-- | lib/compiler/src/beam_block.erl | 69 | ||||
-rw-r--r-- | lib/compiler/src/beam_type.erl | 30 | ||||
-rw-r--r-- | lib/compiler/src/beam_utils.erl | 45 |
3 files changed, 87 insertions, 57 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index fe1ce6f60b..570fc05ae5 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -23,7 +23,7 @@ -module(beam_block). -export([module/2]). --import(lists, [reverse/1,reverse/2,foldl/3,member/2]). +-import(lists, [reverse/1,reverse/2,member/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. @@ -41,7 +41,8 @@ function({function,Name,Arity,CLabel,Is0}) -> Is4 = move_allocates(Is3), Is5 = beam_utils:live_opt(Is4), Is6 = opt_blocks(Is5), - Is = beam_utils:delete_live_annos(Is6), + Is7 = beam_utils:delete_live_annos(Is6), + Is = opt_allocs(Is7), %% Done. {function,Name,Arity,CLabel,Is} @@ -143,10 +144,9 @@ opt_blocks([I|Is]) -> opt_blocks([]) -> []. opt_block(Is0) -> - Is = find_fixpoint(fun(Is) -> - opt_tuple_element(opt(Is)) - end, Is0), - opt_alloc(Is). + find_fixpoint(fun(Is) -> + opt_tuple_element(opt(Is)) + end, Is0). find_fixpoint(OptFun, Is0) -> case OptFun(Is0) of @@ -411,31 +411,47 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> no end. +%% opt_allocs(Instructions) -> Instructions. Optimize allocate +%% instructions inside blocks. If safe, replace an allocate_zero +%% instruction with the slightly cheaper allocate instruction. + +opt_allocs(Is) -> + D = beam_utils:index_labels(Is), + opt_allocs_1(Is, D). + +opt_allocs_1([{block,Bl0}|Is], D) -> + Bl = opt_alloc(Bl0, {D,Is}), + [{block,Bl}|opt_allocs_1(Is, D)]; +opt_allocs_1([I|Is], D) -> + [I|opt_allocs_1(Is, D)]; +opt_allocs_1([], _) -> []. + %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. opt_alloc([{set,[],[],{alloc,Live0,Info0}}, - {set,[],[],{alloc,Live,Info}}|Is]) -> + {set,[],[],{alloc,Live,Info}}|Is], D) -> Live = Live0, %Assertion. Alloc = combine_alloc(Info0, Info), I = {set,[],[],{alloc,Live,Alloc}}, - opt_alloc([I|Is]); -opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) -> - [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is]; -opt_alloc([I|Is]) -> [I|opt_alloc(Is)]; -opt_alloc([]) -> []. + opt_alloc([I|Is], D); +opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) -> + [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is]; +opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)]; +opt_alloc([], _) -> []. combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) -> {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}. - + %% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr] %% Generates the optimal sequence of instructions for %% allocating and initalizing the stack frame and needed heap. -opt_alloc(_Is, nostack, Nh, LivingRegs) -> +opt_alloc(_Is, _D, nostack, Nh, LivingRegs) -> {alloc,LivingRegs,{nozero,nostack,Nh,[]}}; -opt_alloc(Is, Ns, Nh, LivingRegs) -> - InitRegs = init_yreg(Is, 0), +opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) -> + Is = [{block,Bl}|OuterIs], + InitRegs = init_yregs(Ns, Is, D), case count_ones(InitRegs) of N when N*2 > Ns -> {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}}; @@ -451,19 +467,14 @@ gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 -> gen_init(Fs, Regs, Y, Acc) -> gen_init(Fs, Regs bsr 1, Y+1, Acc). -%% init_yreg(Instructions, RegSet) -> RegSetInitialized -%% Calculate the set of initialized y registers. - -init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg; -init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg; -init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg; -init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg)); -init_yreg(_Is, Reg) -> Reg. - -add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys). - -add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y); -add_yreg(_, Reg) -> Reg. +init_yregs(Y, Is, D) when Y >= 0 -> + case beam_utils:is_killed({y,Y}, Is, D) of + true -> + (1 bsl Y) bor init_yregs(Y-1, Is, D); + false -> + init_yregs(Y-1, Is, D) + end; +init_yregs(_, _, _) -> 0. count_ones(Bits) -> count_ones(Bits, 0). count_ones(0, Acc) -> Acc; diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl index d1f1076e17..e8cca4a0c0 100644 --- a/lib/compiler/src/beam_type.erl +++ b/lib/compiler/src/beam_type.erl @@ -333,29 +333,17 @@ flt_need_heap_2({set,_,_,{put_tuple,_}}, H, Fl) -> {[],H+1,Fl}; flt_need_heap_2({set,_,_,put}, H, Fl) -> {[],H+1,Fl}; -%% Then the "neutral" instructions. We just pass them. -flt_need_heap_2({set,[{fr,_}],_,_}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,[],[],fclearerror}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,[],[],fcheckerror}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,{bif,_,_}}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,move}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,{get_tuple_element,_}}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,get_list}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,{try_catch,_,_}}, H, Fl) -> - {[],H,Fl}; -flt_need_heap_2({set,_,_,init}, H, Fl) -> - {[],H,Fl}; -%% All other instructions should cause the insertion of an allocation +%% The following instructions cause the insertion of an allocation %% instruction if needed. +flt_need_heap_2({set,_,_,{alloc,_,_}}, H, Fl) -> + {flt_alloc(H, Fl),0,0}; +flt_need_heap_2({set,_,_,{set_tuple_element,_}}, H, Fl) -> + {flt_alloc(H, Fl),0,0}; +flt_need_heap_2({'%live',_,_}, H, Fl) -> + {flt_alloc(H, Fl),0,0}; +%% All other instructions are "neutral". We just pass them. flt_need_heap_2(_, H, Fl) -> - {flt_alloc(H, Fl),0,0}. + {[],H,Fl}. flt_alloc(0, 0) -> []; diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl index 60221578bd..a2795e625f 100644 --- a/lib/compiler/src/beam_utils.erl +++ b/lib/compiler/src/beam_utils.erl @@ -118,6 +118,7 @@ is_killed(R, Is, D) -> St = #live{lbl=D,res=gb_trees:empty()}, case check_liveness(R, Is, St) of {killed,_} -> true; + {exit_not_used,_} -> true; {_,_} -> false end. @@ -130,6 +131,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) -> St0 = #live{lbl=D,res=gb_trees:empty()}, case check_liveness_at(R, Lbl, St0) of {killed,_} -> true; + {exit_not_used,_} -> true; {_,_} -> false end. @@ -146,6 +148,7 @@ is_not_used(R, Is, D) -> St = #live{lbl=D,res=gb_trees:empty()}, case check_liveness(R, Is, St) of {used,_} -> false; + {exit_not_used,_} -> false; {_,_} -> true end. @@ -309,7 +312,7 @@ delete_live_annos([]) -> []. combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) -> H1 + H2; combine_heap_needs(H1, H2) -> - combine_alloc_lists([H1,H2]). + {alloc,combine_alloc_lists([H1,H2])}. %% anno_defs(Instructions) -> Instructions' @@ -347,6 +350,9 @@ split_even(Rs) -> split_even(Rs, [], []). %% %% killed - Reg is assigned or killed by an allocation instruction. %% not_used - the value of Reg is not used, but Reg must not be garbage +%% exit_not_used - the value of Reg is not used, but must not be garbage +%% because the stack will be scanned because an +%% exit BIF will raise an exception %% used - Reg is used check_liveness(R, [{block,Blk}|Is], St0) -> @@ -370,6 +376,8 @@ check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) -> case check_liveness_at(R, Fail, St0) of {killed,St1} -> check_liveness(R, Is, St1); + {exit_not_used,St1} -> + check_liveness(R, Is, St1); {not_used,St1} -> not_used(check_liveness(R, Is, St1)); {used,_}=Used -> @@ -432,7 +440,7 @@ check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) -> false -> if R =:= Dst -> {killed,St}; - true -> check_liveness(R, Is, St) + true -> not_used(check_liveness(R, Is, St)) end end end; @@ -466,7 +474,7 @@ check_liveness(R, [{call_ext,Live,_}=I|Is], St) -> %% We must make sure we don't check beyond this %% instruction or we will fall through into random %% unrelated code and get stuck in a loop. - {killed,St} + {exit_not_used,St} end end; check_liveness(R, [{call_fun,Live}|Is], St) -> @@ -631,6 +639,7 @@ check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) -> {Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}} end. +not_used({exit_not_used,St}) -> {not_used,St}; not_used({killed,St}) -> {not_used,St}; not_used({_,_}=Res) -> Res. @@ -659,13 +668,30 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) -> {killed,St0}; true -> case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of - {killed,St} -> {not_used,St}; {transparent,St} -> {alloc_used,St}; - {_,_}=Res -> Res + {_,_}=Res -> not_used(Res) end end; -check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St) -> - check_liveness_block_1(R, Ss, Ds, Op, Is, St); +check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) -> + case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of + {transparent,St} -> {alloc_used,St}; + {_,_}=Res -> not_used(Res) + end; +check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) -> + case Ds of + [R] -> + {killed,St0}; + _ -> + case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of + {exit_not_used,St} -> + {used,St}; + {transparent,St} -> + %% Conservatively assumed that it is used. + {used,St}; + {_,_}=Res -> + Res + end + end; check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) -> check_liveness_block_1(R, Ss, Ds, Op, Is, St); check_liveness_block(_, [], St) -> {transparent,St}. @@ -681,6 +707,11 @@ check_liveness_block_1(R, Ss, Ds, Op, Is, St0) -> true -> {killed,St}; false -> check_liveness_block(R, Is, St) end; + {exit_not_used,St} -> + case member(R, Ds) of + true -> {exit_not_used,St}; + false -> check_liveness_block(R, Is, St) + end; {not_used,St} -> not_used(case member(R, Ds) of true -> {killed,St}; |