aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src/beam_block.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src/beam_block.erl')
-rw-r--r--lib/compiler/src/beam_block.erl194
1 files changed, 127 insertions, 67 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 6543e05e20..39ae8d5347 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -23,30 +23,37 @@
-module(beam_block).
-export([module/2]).
--import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
+-import(lists, [reverse/1,reverse/2,member/2]).
-spec module(beam_utils:module_code(), [compile:option()]) ->
{'ok',beam_utils:module_code()}.
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
- Fs = [function(F) || F <- Fs0],
+module({Mod,Exp,Attr,Fs0,Lc}, Opts) ->
+ Blockify = not member(no_blockify, Opts),
+ Fs = [function(F, Blockify) || F <- Fs0],
{ok,{Mod,Exp,Attr,Fs,Lc}}.
-function({function,Name,Arity,CLabel,Is0}) ->
+function({function,Name,Arity,CLabel,Is0}, Blockify) ->
try
%% Collect basic blocks and optimize them.
- Is1 = blockify(Is0),
- Is2 = embed_lines(Is1),
- Is3 = move_allocates(Is2),
- Is4 = beam_utils:live_opt(Is3),
- Is5 = opt_blocks(Is4),
- Is6 = beam_utils:delete_live_annos(Is5),
-
- %% Done.
- {function,Name,Arity,CLabel,Is6}
+ Is2 = case Blockify of
+ true ->
+ Is1 = blockify(Is0),
+ embed_lines(Is1);
+ false ->
+ Is0
+ end,
+ Is3 = beam_utils:anno_defs(Is2),
+ Is4 = move_allocates(Is3),
+ Is5 = beam_utils:live_opt(Is4),
+ Is6 = opt_blocks(Is5),
+ Is7 = beam_utils:delete_annos(Is6),
+ Is = opt_allocs(Is7),
+
+ %% Done.
+ {function,Name,Arity,CLabel,Is}
catch
- Class:Error ->
- Stack = erlang:get_stacktrace(),
+ Class:Error:Stack ->
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
erlang:raise(Class, Error, Stack)
end.
@@ -136,17 +143,16 @@ embed_lines([], Acc) -> Acc.
opt_blocks([{block,Bl0}|Is]) ->
%% The live annotation at the beginning is not useful.
- [{'%live',_,_}|Bl] = Bl0,
+ [{'%anno',_}|Bl] = Bl0,
[{block,opt_block(Bl)}|opt_blocks(Is)];
opt_blocks([I|Is]) ->
[I|opt_blocks(Is)];
opt_blocks([]) -> [].
opt_block(Is0) ->
- Is = find_fixpoint(fun(Is) ->
- opt_tuple_element(opt(Is))
- end, Is0),
- opt_alloc(Is).
+ find_fixpoint(fun(Is) ->
+ opt_tuple_element(opt(Is))
+ end, Is0).
find_fixpoint(OptFun, Is0) ->
case OptFun(Is0) of
@@ -173,7 +179,7 @@ find_fixpoint(OptFun, Is0) ->
%% safe to assume that if x(N) is initialized, then all lower-numbered
%% x registers are also initialized.
%%
-%% For example, in general it is not safe to transform the following
+%% For example, we must be careful when transforming the following
%% instructions:
%%
%% get_tuple_element x(0) Element => x(1)
@@ -185,13 +191,9 @@ find_fixpoint(OptFun, Is0) ->
%% get_tuple_element x(0) Element => x(1)
%%
%% The transformation is safe if and only if x(1) has been
-%% initialized previously. Unfortunately, beam_reorder may have moved
-%% a get_tuple_element instruction so that x(1) is not always
-%% initialized when this code is reached. To find whether or not x(1)
-%% is initialized, we would need to analyze all code preceding these
-%% two instructions (across branches). Since we currently don't have
-%% any practical mechanism for doing that, we will have to
-%% conservatively assume that the transformation is unsafe.
+%% initialized previously. We will use the annotations added by
+%% beam_utils:anno_defs/1 to determine whether x(a) has been
+%% initialized.
move_allocates([{block,Bl0}|Is]) ->
Bl = move_allocates_1(reverse(Bl0), []),
@@ -200,15 +202,20 @@ move_allocates([I|Is]) ->
[I|move_allocates(Is)];
move_allocates([]) -> [].
+move_allocates_1([{'%anno',_}|Is], Acc) ->
+ move_allocates_1(Is, Acc);
move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
- case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
- {false,_} ->
- move_allocates_1(Is, [I|Acc0]);
- {true,not_possible} ->
- move_allocates_1(Is, [I|Acc0]);
- {true,Live} when is_integer(Live) ->
- A = {set,[],[],{alloc,Live,Info}},
- move_allocates_1(Is, [A,I|Acc])
+ case alloc_may_pass(I) of
+ false ->
+ move_allocates_1(Is, [I|Acc0]);
+ true ->
+ case alloc_live_regs(I, Is, Live0) of
+ not_possible ->
+ move_allocates_1(Is, [I|Acc0]);
+ Live when is_integer(Live) ->
+ A = {set,[],[],{alloc,Live,Info}},
+ move_allocates_1(Is, [A,I|Acc])
+ end
end;
move_allocates_1([I|Is], Acc) ->
move_allocates_1(Is, [I|Acc]);
@@ -219,21 +226,34 @@ alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false;
alloc_may_pass({set,_,_,put_list}) -> false;
alloc_may_pass({set,_,_,put}) -> false;
alloc_may_pass({set,_,_,_}) -> true.
-
+
%% opt([Instruction]) -> [Instruction]
%% Optimize the instruction stream inside a basic block.
opt([{set,[X],[X],move}|Is]) -> opt(Is);
+opt([{set,[X],_,move},{set,[X],_,move}=I|Is]) ->
+ opt([I|Is]);
+opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) ->
+ opt([I1,{set,[D2],[S1],move}|Is]);
+opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[S2],move}|Is0]) when S1 =/= D2 ->
+ %% Place move S x0 at the end of move sequences so that
+ %% loader can merge with the following instruction
+ {Ds,Is} = opt_moves([D2], Is0),
+ [{set,Ds,[S2],move}|opt([I1|Is])];
opt([{set,_,_,{line,_}}=Line1,
{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1,
{set,_,_,{line,_}}=Line2,
{set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
opt([Line2,I2,Line1,I1|Is]);
+opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1,
+ {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is])
+ when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
+ opt([I2,I1|Is]);
opt([{set,Ds0,Ss,Op}|Is0]) ->
{Ds,Is} = opt_moves(Ds0, Is0),
[{set,Ds,Ss,Op}|opt(Is)];
-opt([{'%live',_,_}=I|Is]) ->
+opt([{'%anno',_}=I|Is]) ->
[I|opt(Is)];
opt([]) -> [].
@@ -401,31 +421,47 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
no
end.
+%% opt_allocs(Instructions) -> Instructions. Optimize allocate
+%% instructions inside blocks. If safe, replace an allocate_zero
+%% instruction with the slightly cheaper allocate instruction.
+
+opt_allocs(Is) ->
+ D = beam_utils:index_labels(Is),
+ opt_allocs_1(Is, D).
+
+opt_allocs_1([{block,Bl0}|Is], D) ->
+ Bl = opt_alloc(Bl0, {D,Is}),
+ [{block,Bl}|opt_allocs_1(Is, D)];
+opt_allocs_1([I|Is], D) ->
+ [I|opt_allocs_1(Is, D)];
+opt_allocs_1([], _) -> [].
+
%% opt_alloc(Instructions) -> Instructions'
%% Optimises all allocate instructions.
opt_alloc([{set,[],[],{alloc,Live0,Info0}},
- {set,[],[],{alloc,Live,Info}}|Is]) ->
+ {set,[],[],{alloc,Live,Info}}|Is], D) ->
Live = Live0, %Assertion.
Alloc = combine_alloc(Info0, Info),
I = {set,[],[],{alloc,Live,Alloc}},
- opt_alloc([I|Is]);
-opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
- [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
-opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
-opt_alloc([]) -> [].
+ opt_alloc([I|Is], D);
+opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) ->
+ [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is];
+opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)];
+opt_alloc([], _) -> [].
combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) ->
{zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
-
+
%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
%% Generates the optimal sequence of instructions for
%% allocating and initalizing the stack frame and needed heap.
-opt_alloc(_Is, nostack, Nh, LivingRegs) ->
+opt_alloc(_Is, _D, nostack, Nh, LivingRegs) ->
{alloc,LivingRegs,{nozero,nostack,Nh,[]}};
-opt_alloc(Is, Ns, Nh, LivingRegs) ->
- InitRegs = init_yreg(Is, 0),
+opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) ->
+ Is = [{block,Bl}|OuterIs],
+ InitRegs = init_yregs(Ns, Is, D),
case count_ones(InitRegs) of
N when N*2 > Ns ->
{alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
@@ -441,19 +477,14 @@ gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 ->
gen_init(Fs, Regs, Y, Acc) ->
gen_init(Fs, Regs bsr 1, Y+1, Acc).
-%% init_yreg(Instructions, RegSet) -> RegSetInitialized
-%% Calculate the set of initialized y registers.
-
-init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
-init_yreg(_Is, Reg) -> Reg.
-
-add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
-
-add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
-add_yreg(_, Reg) -> Reg.
+init_yregs(Y, Is, D) when Y >= 0 ->
+ case beam_utils:is_killed({y,Y}, Is, D) of
+ true ->
+ (1 bsl Y) bor init_yregs(Y-1, Is, D);
+ false ->
+ init_yregs(Y-1, Is, D)
+ end;
+init_yregs(_, _, _) -> 0.
count_ones(Bits) -> count_ones(Bits, 0).
count_ones(0, Acc) -> Acc;
@@ -463,16 +494,34 @@ count_ones(Bits, Acc) ->
%% Calculate the new number of live registers when we move an allocate
%% instruction upwards, passing a 'set' instruction.
-alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
+alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) ->
Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
- live_regs(0, Rset).
+ Live = live_regs(0, Rset),
+ case ensure_contiguous(Rset, Live) of
+ not_possible ->
+ %% Liveness information (looking forward in the
+ %% instruction stream) can't prove that moving this
+ %% allocation instruction is safe. Now use the annotation
+ %% of defined registers at the beginning of the current
+ %% block to see whether moving would be safe.
+ Def0 = defined_regs(Is, 0),
+ Def = Def0 band ((1 bsl Live) - 1),
+ ensure_contiguous(Rset bor Def, Live);
+ Live ->
+ %% Safe based on liveness information.
+ Live
+ end.
live_regs(N, 0) ->
N;
-live_regs(N, Regs) when Regs band 1 =:= 1 ->
- live_regs(N+1, Regs bsr 1);
-live_regs(_, _) ->
- not_possible.
+live_regs(N, Regs) ->
+ live_regs(N+1, Regs bsr 1).
+
+ensure_contiguous(Regs, Live) ->
+ case (1 bsl Live) - 1 of
+ Regs -> Live;
+ _ -> not_possible
+ end.
x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
@@ -481,3 +530,14 @@ x_dead([], Regs) -> Regs.
x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
x_live([_|Rs], Regs) -> x_live(Rs, Regs);
x_live([], Regs) -> Regs.
+
+%% defined_regs(ReversedInstructions) -> RegBitmap.
+%% Given a reversed instruction stream, determine the
+%% the registers that are defined.
+
+defined_regs([{'%anno',{def,Def}}|_], Regs) ->
+ Def bor Regs;
+defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
+ x_live(Ds, Regs bor ((1 bsl Live) - 1));
+defined_regs([{set,Ds,_,_}|Is], Regs) ->
+ defined_regs(Is, x_live(Ds, Regs)).