diff options
Diffstat (limited to 'lib/compiler/src/beam_block.erl')
-rw-r--r-- | lib/compiler/src/beam_block.erl | 194 |
1 files changed, 127 insertions, 67 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl index 6543e05e20..39ae8d5347 100644 --- a/lib/compiler/src/beam_block.erl +++ b/lib/compiler/src/beam_block.erl @@ -23,30 +23,37 @@ -module(beam_block). -export([module/2]). --import(lists, [reverse/1,reverse/2,foldl/3,member/2]). +-import(lists, [reverse/1,reverse/2,member/2]). -spec module(beam_utils:module_code(), [compile:option()]) -> {'ok',beam_utils:module_code()}. -module({Mod,Exp,Attr,Fs0,Lc}, _Opt) -> - Fs = [function(F) || F <- Fs0], +module({Mod,Exp,Attr,Fs0,Lc}, Opts) -> + Blockify = not member(no_blockify, Opts), + Fs = [function(F, Blockify) || F <- Fs0], {ok,{Mod,Exp,Attr,Fs,Lc}}. -function({function,Name,Arity,CLabel,Is0}) -> +function({function,Name,Arity,CLabel,Is0}, Blockify) -> try %% Collect basic blocks and optimize them. - Is1 = blockify(Is0), - Is2 = embed_lines(Is1), - Is3 = move_allocates(Is2), - Is4 = beam_utils:live_opt(Is3), - Is5 = opt_blocks(Is4), - Is6 = beam_utils:delete_live_annos(Is5), - - %% Done. - {function,Name,Arity,CLabel,Is6} + Is2 = case Blockify of + true -> + Is1 = blockify(Is0), + embed_lines(Is1); + false -> + Is0 + end, + Is3 = beam_utils:anno_defs(Is2), + Is4 = move_allocates(Is3), + Is5 = beam_utils:live_opt(Is4), + Is6 = opt_blocks(Is5), + Is7 = beam_utils:delete_annos(Is6), + Is = opt_allocs(Is7), + + %% Done. + {function,Name,Arity,CLabel,Is} catch - Class:Error -> - Stack = erlang:get_stacktrace(), + Class:Error:Stack -> io:fwrite("Function: ~w/~w\n", [Name,Arity]), erlang:raise(Class, Error, Stack) end. @@ -136,17 +143,16 @@ embed_lines([], Acc) -> Acc. opt_blocks([{block,Bl0}|Is]) -> %% The live annotation at the beginning is not useful. - [{'%live',_,_}|Bl] = Bl0, + [{'%anno',_}|Bl] = Bl0, [{block,opt_block(Bl)}|opt_blocks(Is)]; opt_blocks([I|Is]) -> [I|opt_blocks(Is)]; opt_blocks([]) -> []. opt_block(Is0) -> - Is = find_fixpoint(fun(Is) -> - opt_tuple_element(opt(Is)) - end, Is0), - opt_alloc(Is). + find_fixpoint(fun(Is) -> + opt_tuple_element(opt(Is)) + end, Is0). find_fixpoint(OptFun, Is0) -> case OptFun(Is0) of @@ -173,7 +179,7 @@ find_fixpoint(OptFun, Is0) -> %% safe to assume that if x(N) is initialized, then all lower-numbered %% x registers are also initialized. %% -%% For example, in general it is not safe to transform the following +%% For example, we must be careful when transforming the following %% instructions: %% %% get_tuple_element x(0) Element => x(1) @@ -185,13 +191,9 @@ find_fixpoint(OptFun, Is0) -> %% get_tuple_element x(0) Element => x(1) %% %% The transformation is safe if and only if x(1) has been -%% initialized previously. Unfortunately, beam_reorder may have moved -%% a get_tuple_element instruction so that x(1) is not always -%% initialized when this code is reached. To find whether or not x(1) -%% is initialized, we would need to analyze all code preceding these -%% two instructions (across branches). Since we currently don't have -%% any practical mechanism for doing that, we will have to -%% conservatively assume that the transformation is unsafe. +%% initialized previously. We will use the annotations added by +%% beam_utils:anno_defs/1 to determine whether x(a) has been +%% initialized. move_allocates([{block,Bl0}|Is]) -> Bl = move_allocates_1(reverse(Bl0), []), @@ -200,15 +202,20 @@ move_allocates([I|Is]) -> [I|move_allocates(Is)]; move_allocates([]) -> []. +move_allocates_1([{'%anno',_}|Is], Acc) -> + move_allocates_1(Is, Acc); move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) -> - case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of - {false,_} -> - move_allocates_1(Is, [I|Acc0]); - {true,not_possible} -> - move_allocates_1(Is, [I|Acc0]); - {true,Live} when is_integer(Live) -> - A = {set,[],[],{alloc,Live,Info}}, - move_allocates_1(Is, [A,I|Acc]) + case alloc_may_pass(I) of + false -> + move_allocates_1(Is, [I|Acc0]); + true -> + case alloc_live_regs(I, Is, Live0) of + not_possible -> + move_allocates_1(Is, [I|Acc0]); + Live when is_integer(Live) -> + A = {set,[],[],{alloc,Live,Info}}, + move_allocates_1(Is, [A,I|Acc]) + end end; move_allocates_1([I|Is], Acc) -> move_allocates_1(Is, [I|Acc]); @@ -219,21 +226,34 @@ alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false; alloc_may_pass({set,_,_,put_list}) -> false; alloc_may_pass({set,_,_,put}) -> false; alloc_may_pass({set,_,_,_}) -> true. - + %% opt([Instruction]) -> [Instruction] %% Optimize the instruction stream inside a basic block. opt([{set,[X],[X],move}|Is]) -> opt(Is); +opt([{set,[X],_,move},{set,[X],_,move}=I|Is]) -> + opt([I|Is]); +opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) -> + opt([I1,{set,[D2],[S1],move}|Is]); +opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[S2],move}|Is0]) when S1 =/= D2 -> + %% Place move S x0 at the end of move sequences so that + %% loader can merge with the following instruction + {Ds,Is} = opt_moves([D2], Is0), + [{set,Ds,[S2],move}|opt([I1|Is])]; opt([{set,_,_,{line,_}}=Line1, {set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1, {set,_,_,{line,_}}=Line2, {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is]) when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> opt([Line2,I2,Line1,I1|Is]); +opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1, + {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is]) + when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg -> + opt([I2,I1|Is]); opt([{set,Ds0,Ss,Op}|Is0]) -> {Ds,Is} = opt_moves(Ds0, Is0), [{set,Ds,Ss,Op}|opt(Is)]; -opt([{'%live',_,_}=I|Is]) -> +opt([{'%anno',_}=I|Is]) -> [I|opt(Is)]; opt([]) -> []. @@ -401,31 +421,47 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) -> no end. +%% opt_allocs(Instructions) -> Instructions. Optimize allocate +%% instructions inside blocks. If safe, replace an allocate_zero +%% instruction with the slightly cheaper allocate instruction. + +opt_allocs(Is) -> + D = beam_utils:index_labels(Is), + opt_allocs_1(Is, D). + +opt_allocs_1([{block,Bl0}|Is], D) -> + Bl = opt_alloc(Bl0, {D,Is}), + [{block,Bl}|opt_allocs_1(Is, D)]; +opt_allocs_1([I|Is], D) -> + [I|opt_allocs_1(Is, D)]; +opt_allocs_1([], _) -> []. + %% opt_alloc(Instructions) -> Instructions' %% Optimises all allocate instructions. opt_alloc([{set,[],[],{alloc,Live0,Info0}}, - {set,[],[],{alloc,Live,Info}}|Is]) -> + {set,[],[],{alloc,Live,Info}}|Is], D) -> Live = Live0, %Assertion. Alloc = combine_alloc(Info0, Info), I = {set,[],[],{alloc,Live,Alloc}}, - opt_alloc([I|Is]); -opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) -> - [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is]; -opt_alloc([I|Is]) -> [I|opt_alloc(Is)]; -opt_alloc([]) -> []. + opt_alloc([I|Is], D); +opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) -> + [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is]; +opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)]; +opt_alloc([], _) -> []. combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) -> {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}. - + %% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr] %% Generates the optimal sequence of instructions for %% allocating and initalizing the stack frame and needed heap. -opt_alloc(_Is, nostack, Nh, LivingRegs) -> +opt_alloc(_Is, _D, nostack, Nh, LivingRegs) -> {alloc,LivingRegs,{nozero,nostack,Nh,[]}}; -opt_alloc(Is, Ns, Nh, LivingRegs) -> - InitRegs = init_yreg(Is, 0), +opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) -> + Is = [{block,Bl}|OuterIs], + InitRegs = init_yregs(Ns, Is, D), case count_ones(InitRegs) of N when N*2 > Ns -> {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}}; @@ -441,19 +477,14 @@ gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 -> gen_init(Fs, Regs, Y, Acc) -> gen_init(Fs, Regs bsr 1, Y+1, Acc). -%% init_yreg(Instructions, RegSet) -> RegSetInitialized -%% Calculate the set of initialized y registers. - -init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg; -init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg; -init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg; -init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg)); -init_yreg(_Is, Reg) -> Reg. - -add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys). - -add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y); -add_yreg(_, Reg) -> Reg. +init_yregs(Y, Is, D) when Y >= 0 -> + case beam_utils:is_killed({y,Y}, Is, D) of + true -> + (1 bsl Y) bor init_yregs(Y-1, Is, D); + false -> + init_yregs(Y-1, Is, D) + end; +init_yregs(_, _, _) -> 0. count_ones(Bits) -> count_ones(Bits, 0). count_ones(0, Acc) -> Acc; @@ -463,16 +494,34 @@ count_ones(Bits, Acc) -> %% Calculate the new number of live registers when we move an allocate %% instruction upwards, passing a 'set' instruction. -alloc_live_regs({set,Ds,Ss,_}, Regs0) -> +alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) -> Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)), - live_regs(0, Rset). + Live = live_regs(0, Rset), + case ensure_contiguous(Rset, Live) of + not_possible -> + %% Liveness information (looking forward in the + %% instruction stream) can't prove that moving this + %% allocation instruction is safe. Now use the annotation + %% of defined registers at the beginning of the current + %% block to see whether moving would be safe. + Def0 = defined_regs(Is, 0), + Def = Def0 band ((1 bsl Live) - 1), + ensure_contiguous(Rset bor Def, Live); + Live -> + %% Safe based on liveness information. + Live + end. live_regs(N, 0) -> N; -live_regs(N, Regs) when Regs band 1 =:= 1 -> - live_regs(N+1, Regs bsr 1); -live_regs(_, _) -> - not_possible. +live_regs(N, Regs) -> + live_regs(N+1, Regs bsr 1). + +ensure_contiguous(Regs, Live) -> + case (1 bsl Live) - 1 of + Regs -> Live; + _ -> not_possible + end. x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N))); x_dead([_|Rs], Regs) -> x_dead(Rs, Regs); @@ -481,3 +530,14 @@ x_dead([], Regs) -> Regs. x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N)); x_live([_|Rs], Regs) -> x_live(Rs, Regs); x_live([], Regs) -> Regs. + +%% defined_regs(ReversedInstructions) -> RegBitmap. +%% Given a reversed instruction stream, determine the +%% the registers that are defined. + +defined_regs([{'%anno',{def,Def}}|_], Regs) -> + Def bor Regs; +defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) -> + x_live(Ds, Regs bor ((1 bsl Live) - 1)); +defined_regs([{set,Ds,_,_}|Is], Regs) -> + defined_regs(Is, x_live(Ds, Regs)). |