%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 1999-2016. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose : Partitions assembly instructions into basic blocks and
%% optimizes them.
-module(beam_block).
-export([module/2]).
-import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
Fs = [function(F) || F <- Fs0],
{ok,{Mod,Exp,Attr,Fs,Lc}}.
function({function,Name,Arity,CLabel,Is0}) ->
try
%% Collect basic blocks and optimize them.
Is1 = blockify(Is0),
Is2 = embed_lines(Is1),
Is3 = move_allocates(Is2),
Is4 = beam_utils:live_opt(Is3),
Is5 = opt_blocks(Is4),
Is6 = beam_utils:delete_live_annos(Is5),
%% Done.
{function,Name,Arity,CLabel,Is6}
catch
Class:Error ->
Stack = erlang:get_stacktrace(),
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
erlang:raise(Class, Error, Stack)
end.
%% blockify(Instructions0) -> Instructions
%% Collect sequences of instructions to basic blocks.
%% Also do some simple optimations on instructions outside the blocks.
blockify(Is) ->
blockify(Is, []).
blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
%% Useless instruction sequence.
blockify(Is, Acc);
blockify([I|Is0]=IsAll, Acc) ->
case collect(I) of
error -> blockify(Is0, [I|Acc]);
Instr when is_tuple(Instr) ->
{Block,Is} = collect_block(IsAll),
blockify(Is, [{block,Block}|Acc])
end;
blockify([], Acc) -> reverse(Acc).
collect_block(Is) ->
collect_block(Is, []).
collect_block([{allocate,N,R}|Is0], Acc) ->
{Inits,Is} = lists:splitwith(fun ({init,{y,_}}) -> true;
(_) -> false
end, Is0),
collect_block(Is, [{set,[],[],{alloc,R,{nozero,N,0,Inits}}}|Acc]);
collect_block([{allocate_zero,Ns,R},{test_heap,Nh,R}|Is], Acc) ->
collect_block(Is, [{set,[],[],{alloc,R,{zero,Ns,Nh,[]}}}|Acc]);
collect_block([I|Is]=Is0, Acc) ->
case collect(I) of
error -> {reverse(Acc),Is0};
Instr -> collect_block(Is, [Instr|Acc])
end;
collect_block([], Acc) ->
{reverse(Acc),[]}.
collect({allocate,N,R}) -> {set,[],[],{alloc,R,{nozero,N,0,[]}}};
collect({allocate_zero,N,R}) -> {set,[],[],{alloc,R,{zero,N,0,[]}}};
collect({allocate_heap,Ns,Nh,R}) -> {set,[],[],{alloc,R,{nozero,Ns,Nh,[]}}};
collect({allocate_heap_zero,Ns,Nh,R}) -> {set,[],[],{alloc,R,{zero,Ns,Nh,[]}}};
collect({init,D}) -> {set,[D],[],init};
collect({test_heap,N,R}) -> {set,[],[],{alloc,R,{nozero,nostack,N,[]}}};
collect({bif,N,F,As,D}) -> {set,[D],As,{bif,N,F}};
collect({gc_bif,N,F,R,As,D}) -> {set,[D],As,{alloc,R,{gc_bif,N,F}}};
collect({move,S,D}) -> {set,[D],[S],move};
collect({put_list,S1,S2,D}) -> {set,[D],[S1,S2],put_list};
collect({put_tuple,A,D}) -> {set,[D],[],{put_tuple,A}};
collect({put,S}) -> {set,[],[S],put};
collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}};
collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}};
collect({get_list,S,D1,D2}) -> {set,[D1,D2],[S],get_list};
collect(remove_message) -> {set,[],[],remove_message};
collect({put_map,F,Op,S,D,R,{list,Puts}}) ->
{set,[D],[S|Puts],{alloc,R,{put_map,Op,F}}};
collect({'catch'=Op,R,L}) ->
{set,[R],[],{try_catch,Op,L}};
collect({'try'=Op,R,L}) ->
{set,[R],[],{try_catch,Op,L}};
collect(fclearerror) -> {set,[],[],fclearerror};
collect({fcheckerror,{f,0}}) -> {set,[],[],fcheckerror};
collect({fmove,S,D}) -> {set,[D],[S],fmove};
collect({fconv,S,D}) -> {set,[D],[S],fconv};
collect(_) -> error.
%% embed_lines([Instruction]) -> [Instruction]
%% Combine blocks that would be split by line/1 instructions.
%% Also move a line instruction before a block into the block,
%% but leave the line/1 instruction after a block outside.
embed_lines(Is) ->
embed_lines(reverse(Is), []).
embed_lines([{block,B2},{line,_}=Line,{block,B1}|T], Acc) ->
B = {block,B1++[{set,[],[],Line}]++B2},
embed_lines([B|T], Acc);
embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
B = {block,[{set,[],[],Line}|B1]},
embed_lines([B|T], Acc);
embed_lines([I|Is], Acc) ->
embed_lines(Is, [I|Acc]);
embed_lines([], Acc) -> Acc.
opt_blocks([{block,Bl0}|Is]) ->
%% The live annotation at the beginning is not useful.
[{'%live',_,_}|Bl] = Bl0,
[{block,opt_block(Bl)}|opt_blocks(Is)];
opt_blocks([I|Is]) ->
[I|opt_blocks(Is)];
opt_blocks([]) -> [].
opt_block(Is0) ->
Is = find_fixpoint(fun(Is) ->
opt_tuple_element(opt(Is))
end, Is0),
opt_alloc(Is).
find_fixpoint(OptFun, Is0) ->
case OptFun(Is0) of
Is0 -> Is0;
Is1 -> find_fixpoint(OptFun, Is1)
end.
%% move_allocates(Is0) -> Is
%% Move allocate instructions upwards in the instruction stream
%% (within the same block), in the hope of getting more possibilities
%% for optimizing away moves later.
%%
%% For example, we can transform the following instructions:
%%
%% get_tuple_element x(1) Element => x(2)
%% allocate_zero StackSize 3 %% x(0), x(1), x(2) are live
%%
%% to the following instructions:
%%
%% allocate_zero StackSize 2 %% x(0) and x(1) are live
%% get_tuple_element x(1) Element => x(2)
%%
%% NOTE: Since the beam_reorder pass has been run, it is no longer
%% safe to assume that if x(N) is initialized, then all lower-numbered
%% x registers are also initialized.
%%
%% For example, in general it is not safe to transform the following
%% instructions:
%%
%% get_tuple_element x(0) Element => x(1)
%% allocate_zero StackSize 3 %x(0), x(1), x(2) are live
%%
%% to the following instructions:
%%
%% allocate_zero StackSize 3
%% get_tuple_element x(0) Element => x(1)
%%
%% The transformation is safe if and only if x(1) has been
%% initialized previously. Unfortunately, beam_reorder may have moved
%% a get_tuple_element instruction so that x(1) is not always
%% initialized when this code is reached. To find whether or not x(1)
%% is initialized, we would need to analyze all code preceding these
%% two instructions (across branches). Since we currently don't have
%% any practical mechanism for doing that, we will have to
%% conservatively assume that the transformation is unsafe.
move_allocates([{block,Bl0}|Is]) ->
Bl = move_allocates_1(reverse(Bl0), []),
[{block,Bl}|move_allocates(Is)];
move_allocates([I|Is]) ->
[I|move_allocates(Is)];
move_allocates([]) -> [].
move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
{false,_} ->
move_allocates_1(Is, [I|Acc0]);
{true,not_possible} ->
move_allocates_1(Is, [I|Acc0]);
{true,Live} when is_integer(Live) ->
A = {set,[],[],{alloc,Live,Info}},
move_allocates_1(Is, [A,I|Acc])
end;
move_allocates_1([I|Is], Acc) ->
move_allocates_1(Is, [I|Acc]);
move_allocates_1([], Acc) -> Acc.
alloc_may_pass({set,_,_,{alloc,_,_}}) -> false;
alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false;
alloc_may_pass({set,_,_,put_list}) -> false;
alloc_may_pass({set,_,_,put}) -> false;
alloc_may_pass({set,_,_,_}) -> true.
%% opt([Instruction]) -> [Instruction]
%% Optimize the instruction stream inside a basic block.
opt([{set,[X],[X],move}|Is]) -> opt(Is);
opt([{set,_,_,{line,_}}=Line1,
{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,0}}}=I1,
{set,_,_,{line,_}}=Line2,
{set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
opt([Line2,I2,Line1,I1|Is]);
opt([{set,Ds0,Ss,Op}|Is0]) ->
{Ds,Is} = opt_moves(Ds0, Is0),
[{set,Ds,Ss,Op}|opt(Is)];
opt([{'%live',_,_}=I|Is]) ->
[I|opt(Is)];
opt([]) -> [].
%% opt_moves([Dest], [Instruction]) -> {[Dest],[Instruction]}
%% For each Dest, does the optimization described in opt_move/2.
opt_moves([], Is0) -> {[],Is0};
opt_moves([D0]=Ds, Is0) ->
case opt_move(D0, Is0) of
not_possible -> {Ds,Is0};
{D1,Is} -> {[D1],Is}
end;
opt_moves([X0,Y0], Is0) ->
{X,Is2} = case opt_move(X0, Is0) of
not_possible -> {X0,Is0};
{Y0,_} -> {X0,Is0};
{_X1,_Is1} = XIs1 -> XIs1
end,
case opt_move(Y0, Is2) of
not_possible -> {[X,Y0],Is2};
{X,_} -> {[X,Y0],Is2};
{Y,Is} -> {[X,Y],Is}
end.
%% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible
%% If there is a {move,Dest,FinalDest} instruction
%% in the instruction stream, remove the move instruction
%% and let FinalDest be the destination.
opt_move(Dest, Is) ->
opt_move_1(Dest, Is, []).
opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) ->
%% Provided that the source register is killed by instructions
%% that follow, the optimization is safe.
case eliminate_use_of_from_reg(Is0, R, D, []) of
{yes,Is} -> opt_move_rev(D, Acc, Is);
no -> not_possible
end;
opt_move_1(_R, [{set,_,_,{alloc,_,_}}|_], _) ->
%% The optimization is either not possible or not safe.
%%
%% If R is an X register killed by allocation, the optimization is
%% not safe. On the other hand, if the X register is killed, there
%% will not follow a 'move' instruction with this X register as
%% the source.
%%
%% If R is a Y register, the optimization is still not safe
%% because the new target register is an X register that cannot
%% safely pass the alloc instruction.
not_possible;
opt_move_1(R, [{set,_,_,_}=I|Is], Acc) ->
%% If the source register is either killed or used by this
%% instruction, the optimimization is not possible.
case is_killed_or_used(R, I) of
true -> not_possible;
false -> opt_move_1(R, Is, [I|Acc])
end;
opt_move_1(_, _, _) ->
not_possible.
%% opt_tuple_element([Instruction]) -> [Instruction]
%% If possible, move get_tuple_element instructions forward
%% in the instruction stream to a move instruction, eliminating
%% the move instruction. Example:
%%
%% get_tuple_element Tuple Pos Dst1
%% ...
%% move Dst1 Dst2
%%
%% This code may be possible to rewrite to:
%%
%% %%(Moved get_tuple_element instruction)
%% ...
%% get_tuple_element Tuple Pos Dst2
%%
opt_tuple_element([{set,[D],[S],{get_tuple_element,_}}=I|Is0]) ->
case opt_tuple_element_1(Is0, I, {S,D}, []) of
no ->
[I|opt_tuple_element(Is0)];
{yes,Is} ->
opt_tuple_element(Is)
end;
opt_tuple_element([I|Is]) ->
[I|opt_tuple_element(Is)];
opt_tuple_element([]) -> [].
opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) ->
no;
opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) ->
no;
opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) ->
case eliminate_use_of_from_reg(Is0, S, D, []) of
no ->
no;
{yes,Is} ->
{set,[S],Ss,Op} = I0,
I = {set,[D],Ss,Op},
{yes,reverse(Acc, [I|Is])}
end;
opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) ->
case member(S, Ds) orelse member(D, Ss) of
true ->
no;
false ->
opt_tuple_element_1(Is, MovedI, Regs, [I|Acc])
end;
opt_tuple_element_1(_, _, _, _) -> no.
%% Reverse the instructions, while checking that there are no
%% instructions that would interfere with using the new destination
%% register (D).
opt_move_rev(D, [I|Is], Acc) ->
case is_killed_or_used(D, I) of
true -> not_possible;
false -> opt_move_rev(D, Is, [I|Acc])
end;
opt_move_rev(D, [], Acc) -> {D,Acc}.
%% is_killed_or_used(Register, {set,_,_,_}) -> bool()
%% Test whether the register is used by the instruction.
is_killed_or_used(R, {set,Ss,Ds,_}) ->
member(R, Ds) orelse member(R, Ss).
%% eliminate_use_of_from_reg([Instruction], FromRegister, ToRegister, Acc) ->
%% {yes,Is} | no
%% Eliminate any use of FromRegister in the instruction sequence
%% by replacing uses of FromRegister with ToRegister. If FromRegister
%% is referenced by an allocation instruction, return 'no' to indicate
%% that FromRegister is still used and that the optimization is not
%% possible.
eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
if
X < Live ->
no;
true ->
{yes,reverse(Acc, Is0)}
end;
eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) ->
I = case member(From, Ss0) of
true ->
Ss = [case S of
From -> To;
_ -> S
end || S <- Ss0],
{set,Ds,Ss,Op};
false ->
I0
end,
case member(From, Ds) of
true ->
{yes,reverse(Acc, [I|Is])};
false ->
eliminate_use_of_from_reg(Is, From, To, [I|Acc])
end;
eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
case beam_utils:is_killed_block(From, [I]) of
true ->
{yes,reverse(Acc, Is)};
false ->
no
end.
%% opt_alloc(Instructions) -> Instructions'
%% Optimises all allocate instructions.
opt_alloc([{set,[],[],{alloc,Live0,Info0}},
{set,[],[],{alloc,Live,Info}}|Is]) ->
Live = Live0, %Assertion.
Alloc = combine_alloc(Info0, Info),
I = {set,[],[],{alloc,Live,Alloc}},
opt_alloc([I|Is]);
opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
[{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
opt_alloc([]) -> [].
combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]}) ->
{zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
%% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
%% Generates the optimal sequence of instructions for
%% allocating and initalizing the stack frame and needed heap.
opt_alloc(_Is, nostack, Nh, LivingRegs) ->
{alloc,LivingRegs,{nozero,nostack,Nh,[]}};
opt_alloc(Is, Ns, Nh, LivingRegs) ->
InitRegs = init_yreg(Is, 0),
case count_ones(InitRegs) of
N when N*2 > Ns ->
{alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
_ ->
{alloc,LivingRegs,{zero,Ns,Nh,[]}}
end.
gen_init(Fs, Regs) -> gen_init(Fs, Regs, 0, []).
gen_init(SameFs, _Regs, SameFs, Acc) -> reverse(Acc);
gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 ->
gen_init(Fs, Regs bsr 1, Y+1, [{init,{y,Y}}|Acc]);
gen_init(Fs, Regs, Y, Acc) ->
gen_init(Fs, Regs bsr 1, Y+1, Acc).
%% init_yreg(Instructions, RegSet) -> RegSetInitialized
%% Calculate the set of initialized y registers.
init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg;
init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg;
init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
init_yreg(_Is, Reg) -> Reg.
add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
add_yreg(_, Reg) -> Reg.
count_ones(Bits) -> count_ones(Bits, 0).
count_ones(0, Acc) -> Acc;
count_ones(Bits, Acc) ->
count_ones(Bits bsr 1, Acc + (Bits band 1)).
%% Calculate the new number of live registers when we move an allocate
%% instruction upwards, passing a 'set' instruction.
alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
live_regs(0, Rset).
live_regs(N, 0) ->
N;
live_regs(N, Regs) when Regs band 1 =:= 1 ->
live_regs(N+1, Regs bsr 1);
live_regs(_, _) ->
not_possible.
x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
x_dead([], Regs) -> Regs.
x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
x_live([_|Rs], Regs) -> x_live(Rs, Regs);
x_live([], Regs) -> Regs.