%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose: Prepare for code generation, including register allocation.
%%
%% The output of this compiler pass is still in the SSA format, but
%% it has been annotated and transformed to help the code generator.
%%
%% * Some instructions are translated to other instructions closer to
%% the BEAM instructions. For example, the binary matching
%% instructions are transformed from the optimization-friendly
%% internal format to instruction more similar to the actual BEAM
%% instructions.
%%
%% * Blocks that will need an instruction for allocating a stack frame
%% are annotated with a {frame_size,Size} annotation.
%%
%% * 'copy' instructions are added for all variables that need
%% to be saved to the stack frame. Additional 'copy' instructions
%% can be added as an optimization to reuse y registers (see
%% the copy_retval sub pass).
%%
%% * Each function is annotated with a {register,RegisterMap}
%% annotation that maps each variable to a BEAM register. The linear
%% scan algorithm is used to allocate registers.
%%
%% There are four kind of registers. x, y, fr (floating point register),
%% and z. A variable will be allocated to a z register if it is only
%% used by the instruction following the instruction that defines the
%% the variable. The code generator will typically combine those
%% instructions to a test instruction. z registers are also used for
%% some instructions that don't have a return value.
%%
%% References:
%%
%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
%% in the context of SSA form and register constraints. In Proceedings
%% of the International Conference on Compiler Construction, pages
%% 229–246. LNCS 2304, Springer-Verlag, 2002.
%%
%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
%% linear scan register allocator. In Proceedings of the ACM/USENIX
%% International Conference on Virtual Execution Environments, pages
%% 132–141. ACM Press, 2005.
%%
%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
%% Form. In Proceedings of the International Symposium on Code
%% Generation and Optimization, pages 170-179. ACM Press, 2010.
%%
-module(beam_ssa_pre_codegen).
-export([module/2]).
-include("beam_ssa.hrl").
-import(lists, [all/2,any/2,append/1,duplicate/2,
foldl/3,last/1,map/2,member/2,partition/2,
reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).
-spec module(beam_ssa:b_module(), [compile:option()]) ->
{'ok',beam_ssa:b_module()}.
module(#b_module{body=Fs0}=Module, Opts) ->
UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
Ps = passes(Opts),
Fs = functions(Fs0, Ps, UseBSM3),
{ok,Module#b_module{body=Fs}}.
functions([F|Fs], Ps, UseBSM3) ->
[function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
functions([], _Ps, _UseBSM3) -> [].
-type b_var() :: beam_ssa:b_var().
-type var_name() :: beam_ssa:var_name().
-type instr_number() :: pos_integer().
-type range() :: {instr_number(),instr_number()}.
-type reg_num() :: beam_asm:reg_num().
-type xreg() :: {'x',reg_num()}.
-type yreg() :: {'y',reg_num()}.
-type ypool() :: {'y',beam_ssa:label()}.
-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
ypool() | {yreg(),ypool()} | 'z'.
-type ssa_register() :: beam_ssa_codegen:ssa_register().
-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
-record(st, {ssa :: beam_ssa:block_map(),
args :: [b_var()],
cnt :: beam_ssa:label(),
use_bsm3 :: boolean(),
frames=[] :: [beam_ssa:label()],
intervals=[] :: [{b_var(),[range()]}],
res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
regs=#{} :: #{b_var():=ssa_register()},
extra_annos=[] :: [{atom(),term()}]
}).
-define(PASS(N), {N,fun N/1}).
passes(Opts) ->
AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
FixTuples = proplists:get_bool(no_put_tuple2, Opts),
Ps = [?PASS(assert_no_critical_edges),
%% Preliminaries.
?PASS(fix_bs),
?PASS(sanitize),
case FixTuples of
false -> ignore;
true -> ?PASS(fix_tuples)
end,
?PASS(use_set_tuple_element),
?PASS(place_frames),
?PASS(fix_receives),
%% Find and reserve Y registers.
?PASS(find_yregs),
?PASS(reserve_yregs),
%% Handle legacy binary match instruction that don't
%% accept a Y register as destination.
?PASS(legacy_bs),
%% Improve reuse of Y registers to potentially
%% reduce the size of the stack frame.
?PASS(copy_retval),
?PASS(opt_get_list),
%% Calculate live intervals.
?PASS(number_instructions),
?PASS(live_intervals),
?PASS(reserve_regs),
%% If needed for a .precg file, save the live intervals
%% so they can be included in an annotation.
case AddPrecgAnnos of
false -> ignore;
true -> ?PASS(save_live_intervals)
end,
%% Allocate registers.
?PASS(linear_scan),
?PASS(frame_size),
?PASS(turn_yregs)],
[P || P <- Ps, P =/= ignore].
function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
Ps, UseBSM3) ->
try
St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
St = compile:run_sub_passes(Ps, St0),
#st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
F1 = add_extra_annos(F0, ExtraAnnos),
F = beam_ssa:add_anno(registers, Regs, F1),
F#b_function{bs=Blocks,cnt=Count}
catch
Class:Error:Stack ->
#{func_info:={_,Name,Arity}} = Anno,
io:fwrite("Function: ~w/~w\n", [Name,Arity]),
erlang:raise(Class, Error, Stack)
end.
save_live_intervals(#st{intervals=Intervals}=St) ->
St#st{extra_annos=[{live_intervals,Intervals}]}.
%% Add extra annotations when a .precg listing file is being produced.
add_extra_annos(F, Annos) ->
foldl(fun({Name,Value}, Acc) ->
beam_ssa:add_anno(Name, Value, Acc)
end, F, Annos).
%% assert_no_critical_edges(St0) -> St.
%% The code generator will not work if there are critial edges.
%% Abort if any critical edges are found.
assert_no_critical_edges(#st{ssa=Blocks}=St) ->
F = fun assert_no_ces/3,
beam_ssa:fold_rpo(F, Blocks, Blocks),
St.
assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
%% This block has multiple predecessors. Make sure that none
%% of the precessors have more than one successor.
true = all(fun({_,P}) ->
length(beam_ssa:successors(P, Blocks)) =:= 1
end, Phis), %Assertion.
Blocks;
assert_no_ces(_, _, Blocks) -> Blocks.
%% fix_bs(St0) -> St.
%% Fix up the binary matching instructions:
%%
%% * Insert bs_save and bs_restore instructions where needed.
%%
%% * Combine bs_match and bs_extract instructions to bs_get
%% instructions.
fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
%% Mark the root of the match context list.
[{Dst,{context,Dst}}|A];
(#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
%% Link this match context the previous match context.
[{Dst,ParentCtx}|A];
(_, A) ->
A
end,
case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
[] ->
%% No binary matching in this function.
St;
[_|_]=M ->
CtxChain = maps:from_list(M),
Linear0 = beam_ssa:linearize(Blocks),
%% Insert position instructions where needed.
{Linear1,Count} = case UseBSM3 of
true ->
bs_pos_bsm3(Linear0, CtxChain, Count0);
false ->
bs_pos_bsm2(Linear0, CtxChain, Count0)
end,
%% Rename instructions.
Linear = bs_instrs(Linear1, CtxChain, []),
St#st{ssa=maps:from_list(Linear),cnt=Count}
end.
%% Insert bs_get_position and bs_set_position instructions as needed.
bs_pos_bsm3(Linear0, CtxChain, Count0) ->
Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
Rs = maps:values(Rs0),
S0 = sofs:relation(Rs, [{context,save_point}]),
S1 = sofs:relation_to_family(S0),
S = sofs:to_external(S1),
{SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
{Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
{Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),
%% Now insert all saves and restores.
{bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.
make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
SavePoint = get_savepoint(Ps, SavePoints),
I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
make_bs_setpos_map([], _, Count, Acc) ->
{maps:from_list(Acc),Count}.
make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
Ignored = #b_var{name={'@ssa_ignored',Count}},
Args = [Ctx, get_savepoint(Ps, SavePoints)],
I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
make_bs_getpos_map([], _, Count, Acc) ->
{maps:from_list(Acc),Count}.
get_savepoint({_,_}=Ps, SavePoints) ->
Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
#b_var{name=Name}.
make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
{Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
make_bs_pos_dict(T, Count, Acc);
make_bs_pos_dict([], Count, Acc) ->
{maps:from_list(Acc), Count}.
make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_bs_pos_dict_1([], Ctx, I, Acc) ->
{[{Ctx,I}|Acc], I}.
%% As bs_position but without OTP-22 instructions. This is only used when
%% cross-compiling to older versions.
bs_pos_bsm2(Linear0, CtxChain, Count0) ->
Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
Rs = maps:values(Rs0),
S0 = sofs:relation(Rs, [{context,save_point}]),
S1 = sofs:relation_to_family(S0),
S = sofs:to_external(S1),
Slots = make_save_point_dict(S, []),
{Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
{Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
%% Now insert all saves and restores.
{bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.
make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
Ignored = #b_var{name={'@ssa_ignored',Count}},
case make_slot(Ps, Slots) of
#b_literal{val=start} ->
make_save_map(T, Slots, Count, Acc);
Slot ->
I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
end;
make_save_map([], _, Count, Acc) ->
{maps:from_list(Acc),Count}.
make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
Ignored = #b_var{name={'@ssa_ignored',Count}},
I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
make_restore_map([], _, Count, Acc) ->
{maps:from_list(Acc),Count}.
make_slot({Same,Same}, _Slots) ->
#b_literal{val=start};
make_slot({_,_}=Ps, Slots) ->
#b_literal{val=map_get(Ps, Slots)}.
make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
make_save_point_dict(T, Acc);
make_save_point_dict([], Acc) ->
maps:from_list(Acc).
make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
%% Special {atom,start} save point. Does not need a
%% bs_save instruction.
make_save_point_dict_1(T, Ctx, I, Acc);
make_save_point_dict_1([H|T], Ctx, I, Acc) ->
make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_save_point_dict_1([], Ctx, I, Acc) ->
[{Ctx,I}|Acc].
bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
FPos = case D0 of
#{L:=Pos0} -> Pos0;
#{} -> #{}
end,
{SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
D = bs_update_successors(Last, SPos, FPos, D0),
bs_restores(Bs, CtxChain, D, Rs);
bs_restores([], _, _, Rs) -> Rs.
bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
join_positions([{Succ,SPos},{Fail,FPos}], D);
bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
join_positions(Update, D);
bs_update_successors(#b_ret{}, _, _, D) -> D.
join_positions([{L,MapPos0}|T], D) ->
case D of
#{L:=MapPos0} ->
%% Same map.
join_positions(T, D);
#{L:=MapPos1} ->
%% Different maps.
MapPos = join_positions_1(MapPos0, MapPos1),
join_positions(T, D#{L:=MapPos});
#{} ->
join_positions(T, D#{L=>MapPos0})
end;
join_positions([], D) -> D.
join_positions_1(MapPos0, MapPos1) ->
MapPos2 = maps:map(fun(Start, Pos) ->
case MapPos0 of
#{Start:=Pos} -> Pos;
#{Start:=_} -> unknown;
#{} -> Pos
end
end, MapPos1),
maps:merge(MapPos0, MapPos2).
bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
CtxChain, PosMap0, Rs) ->
PosMap = PosMap0#{Start=>Start},
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
CtxChain, PosMap0, Rs0) ->
Start = bs_subst_ctx(NewPos, CtxChain),
[_,FromPos|_] = Args,
case PosMap0 of
#{Start:=FromPos} ->
%% Same position, no restore needed.
PosMap = case bs_match_type(I) of
plain ->
%% Update position to new position.
PosMap0#{Start:=NewPos};
_ ->
%% Position will not change (test_unit
%% instruction or no instruction at
%% all).
PosMap0#{Start:=FromPos}
end,
bs_restores_is(Is, CtxChain, PosMap, Rs0);
#{Start:=_} ->
%% Different positions, might need a restore instruction.
case bs_match_type(I) of
none ->
%% The tail test will be optimized away.
%% No need to do a restore.
PosMap = PosMap0#{Start:=FromPos},
bs_restores_is(Is, CtxChain, PosMap, Rs0);
test_unit ->
%% This match instruction will be replaced by
%% a test_unit instruction. We will need a
%% restore. The new position will be the position
%% restored to (NOT NewPos).
PosMap = PosMap0#{Start:=FromPos},
Rs = Rs0#{NewPos=>{Start,FromPos}},
bs_restores_is(Is, CtxChain, PosMap, Rs);
plain ->
%% Match or skip. Position will be changed.
PosMap = PosMap0#{Start:=NewPos},
Rs = Rs0#{NewPos=>{Start,FromPos}},
bs_restores_is(Is, CtxChain, PosMap, Rs)
end
end;
bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
CtxChain, PosMap, Rs) ->
Start = bs_subst_ctx(FromPos, CtxChain),
#{Start:=FromPos} = PosMap, %Assertion.
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
CtxChain, PosMap0, Rs0) ->
{Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
%% We can land here from any point, so all positions are invalid.
PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
CtxChain, PosMap0, Rs0)
when Op =:= bs_test_tail;
Op =:= bs_get_tail ->
{Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([], _CtxChain, PosMap, Rs) ->
{PosMap,Rs}.
bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
#b_literal{val=binary},_Flags,
#b_literal{val=all},#b_literal{val=U}]}) ->
case U of
1 -> none;
_ -> test_unit
end;
bs_match_type(_) ->
plain.
%% Call instructions leave the match position in an undefined state,
%% requiring us to invalidate each affected argument.
bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
Start = bs_subst_ctx(Arg, CtxChain),
case PosMap0 of
#{Start:=_} ->
PosMap = PosMap0#{Start:=unknown},
bs_invalidate_pos(Args, PosMap, CtxChain);
#{} ->
%% Not a match context.
bs_invalidate_pos(Args, PosMap0, CtxChain)
end;
bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
bs_invalidate_pos(Args, PosMap, CtxChain);
bs_invalidate_pos([], PosMap, _CtxChain) ->
PosMap.
bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
Start = bs_subst_ctx(Arg, CtxChain),
case PosMap0 of
#{Start:=Arg} ->
%% Same position, no restore needed.
bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
#{Start:=_} ->
%% Different positions, need a restore instruction.
PosMap = PosMap0#{Start:=Arg},
Rs = Rs0#{Dst=>{Start,Arg}},
bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
#{} ->
%% Not a match context.
bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
end;
bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
{Rs,PosMap}.
%% Insert all bs_save and bs_restore instructions.
bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).
bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
%% The old instructions require bs_start_match to be annotated with the
%% number of position slots it needs.
bs_insert_1(Blocks, Saves, Restores, SavePoints,
fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
NumSlots = case SavePoints of
#{Dst:=NumSlots0} -> NumSlots0;
#{} -> 0
end,
beam_ssa:add_anno(num_slots, NumSlots, I0);
(I) ->
I
end).
bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
Bs = bs_insert_saves(Is, Bs0, Saves),
[{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
bs_insert_1([], _, _, _, _) -> [].
bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
I = XFrm(I0),
if
Op =:= bs_test_tail;
Op =:= bs_get_tail;
Op =:= bs_match;
Op =:= call ->
Rs = case Restores of
#{Dst:=R} -> [R];
#{} -> []
end,
Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
true ->
[I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
end;
bs_insert_is_1([], _, _, _) -> [].
bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
case Saves of
#{Dst:=S} ->
bs_insert_save(S, Bs);
#{} ->
bs_insert_saves(Is, Bs, Saves)
end;
bs_insert_saves([], Bs, _) -> Bs.
bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
Is = case Is0 of
[#b_set{op=bs_extract}=Ex|Is1] ->
[Ex,Save|Is1];
_ ->
[Save|Is0]
end,
[{L,Blk#b_blk{is=Is}}|Bs].
%% Translate bs_match instructions to bs_get, bs_match_string,
%% or bs_skip. Also rename match context variables to use the
%% variable assigned to by the start_match instruction.
bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
case bs_instrs_is(Is0, CtxChain, []) of
[#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
%% Drop this instruction. Rewrite the corresponding
%% bs_match instruction in the previous block to
%% a bs_get instruction.
Acc = bs_combine(Dst, Ctx, Acc0),
bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
Is ->
bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
end;
bs_instrs([], _, Acc) ->
reverse(Acc).
bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
I1 = I0#b_set{args=Args},
I = case {Op,Args} of
{bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
{bs_match,[#b_literal{val=string},Ctx|As]} ->
I1#b_set{op=bs_match_string,args=[Ctx|As]};
{bs_get_tail,[Ctx|As]} ->
I1#b_set{op=bs_get_tail,args=[Ctx|As]};
{_,_} ->
I1
end,
bs_instrs_is(Is, CtxChain, [I|Acc]);
bs_instrs_is([], _, Acc) ->
reverse(Acc).
%% Combine a bs_match instruction with the destination register
%% taken from a bs_extract instruction.
bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
[#b_set{}=Succeeded,
#b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
Succeeded#b_set{args=[Dst]}]),
[{L,Blk#b_blk{is=Is}}|Acc].
bs_subst_ctx(#b_var{}=Var, CtxChain) ->
case CtxChain of
#{Var:={context,Ctx}} ->
Ctx;
#{Var:=ParentCtx} ->
bs_subst_ctx(ParentCtx, CtxChain);
#{} ->
%% Not a match context variable.
Var
end;
bs_subst_ctx(Other, _CtxChain) ->
Other.
%% legacy_bs(St0) -> St.
%% Binary matching instructions in OTP 21 and earlier don't support
%% a Y register as destination. If St#st.use_bsm3 is false,
%% we will need to rewrite those instructions so that the result
%% is first put in an X register and then moved to a Y register
%% if the operation succeeded.
legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
Linear0 = beam_ssa:linearize(Blocks0),
{Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
Blocks = maps:from_list(Linear),
St#st{ssa=Blocks,cnt=Count};
legacy_bs(#st{use_bsm3=true}=St) -> St.
legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
#b_blk{is=Is0,last=Last} = Blk,
Is1 = case Copies0 of
#{L:=Copy} -> [Copy|Is0];
#{} -> Is0
end,
{Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
{Acc,Count}.
legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
#b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
Last, IsYreg, Count0, Copies0, Acc) ->
NeedsFix = is_map_key(Dst, IsYreg) andalso
case Op of
bs_get -> true;
bs_init -> true;
_ -> false
end,
case NeedsFix of
true ->
TempDst = #b_var{name={'@bs_temp_dst',Count0}},
Count = Count0 + 1,
I = I0#b_set{dst=TempDst},
SuccI = SuccI0#b_set{args=[TempDst]},
Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
#b_br{bool=SuccDst,succ=SuccL} = Last,
Copies = Copies0#{SuccL=>Copy},
legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
false ->
legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
end;
legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
{reverse(Acc),Count,Copies}.
%% sanitize(St0) -> St.
%% Remove constructs that can cause problems later:
%%
%% * Unreachable blocks may cause problems for determination of
%% dominators.
%%
%% * Some instructions (such as get_hd) don't accept literal
%% arguments. Evaluate the instructions and remove them.
sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
Ls = beam_ssa:rpo(Blocks0),
{Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
St#st{ssa=Blocks,cnt=Count}.
sanitize([L|Ls], Count0, Blocks0, Values0) ->
#b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
case sanitize_is(Is0, Count0, Values0, false, []) of
no_change ->
sanitize(Ls, Count0, Blocks0, Values0);
{Is,Count,Values} ->
Blk = Blk0#b_blk{is=Is},
Blocks = Blocks0#{L:=Blk},
sanitize(Ls, Count, Blocks, Values)
end;
sanitize([], Count, Blocks0, Values) ->
Blocks = if
map_size(Values) =:= 0 ->
Blocks0;
true ->
beam_ssa:rename_vars(Values, [0], Blocks0)
end,
%% Unreachable blocks can cause problems for the dominator calculations.
Ls = beam_ssa:rpo(Blocks),
Reachable = gb_sets:from_list(Ls),
{case map_size(Blocks) =:= gb_sets:size(Reachable) of
true -> Blocks;
false -> remove_unreachable(Ls, Blocks, Reachable, [])
end,Count}.
sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
Count0, Values, Changed, Acc) ->
case sanitize_args(Args0, Values) of
[#b_literal{}=Map,Key] ->
%% Bind the literal map to a variable.
{MapVar,Count} = new_var('@ssa_map', Count0),
I = I0#b_set{args=[MapVar,Key]},
Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
[_,_]=Args0 ->
sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
[_,_]=Args ->
I = I0#b_set{args=Args},
sanitize_is(Is, Count0, Values, Changed, [I|Acc])
end;
sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
Count, Values, Changed0, Acc) ->
Args = sanitize_args(Args0, Values),
case sanitize_instr(Op, Args, I0) of
{value,Value0} ->
Value = #b_literal{val=Value0},
sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
{ok,I} ->
sanitize_is(Is0, Count, Values, true, [I|Acc]);
ok ->
I = I0#b_set{args=Args},
Changed = Changed0 orelse Args =/= Args0,
sanitize_is(Is0, Count, Values, Changed, [I|Acc])
end;
sanitize_is([], Count, Values, Changed, Acc) ->
case Changed of
true ->
{reverse(Acc),Count,Values};
false ->
no_change
end.
sanitize_args(Args, Values) ->
map(fun(Var) ->
case Values of
#{Var:=New} -> New;
#{} -> Var
end
end, Args).
sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
case erl_bifs:is_pure(erlang, Bif, 1) of
false ->
ok;
true ->
try
{value,erlang:Bif(Lit)}
catch
error:_ ->
ok
end
end;
sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
true = erl_bifs:is_pure(erlang, Bif, 2), %Assertion.
try
{value,erlang:Bif(Lit1, Lit2)}
catch
error:_ ->
ok
end;
sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
{value,Hd};
sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
{value,Tl};
sanitize_instr(get_tuple_element, [#b_literal{val=T},
#b_literal{val=I}], _I)
when I < tuple_size(T) ->
{value,element(I+1, T)};
sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
{value,case Lit of
[_|_] -> true;
_ -> false
end};
sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
#b_literal{val=Arity},
#b_literal{val=Tag}], _I)
when is_integer(Arity), is_atom(Tag) ->
if
tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
{value,true};
true ->
{value,false}
end;
sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
if
is_integer(Sz), Sz >= 0 -> ok;
true -> {ok,sanitize_badarg(I0)}
end;
sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
if
is_integer(Sz), Sz >= 0 -> ok;
true -> {ok,sanitize_badarg(I0)}
end;
sanitize_instr(succeeded, [#b_literal{}], _I) ->
{value,true};
sanitize_instr(_, _, _) -> ok.
sanitize_badarg(I) ->
Func = #b_remote{mod=#b_literal{val=erlang},
name=#b_literal{val=error},arity=1},
I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
#b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
case split_phis(Is0) of
{[_|_]=Phis,Rest} ->
Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
Blk = Blk0#b_blk{is=Is},
remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
{[],_} ->
remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
end;
remove_unreachable([], _Blocks, _, Acc) ->
maps:from_list(Acc).
prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
Args = [A || {_,Pred}=A <- Args0,
gb_sets:is_element(Pred, Reachable)],
Phi#b_set{args=Args}.
%%%
%%% Fix tuples.
%%%
%% fix_tuples(St0) -> St.
%% If compatibility with a previous version of Erlang has been
%% requested, tuple creation must be split into two instruction to
%% mirror the the way tuples are created in BEAM prior to OTP 22.
%% Each put_tuple instruction is split into put_tuple_arity followed
%% by put_tuple_elements.
fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
Arity = #b_literal{val=length(Args)},
{Ignore,C} = new_var('@ssa_ignore', C0),
{[Put#b_set{op=put_tuple_arity,args=[Arity]},
#b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
(I, C) -> {[I],C}
end,
{Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
St#st{ssa=Blocks,cnt=Count}.
%%%
%%% Introduce the set_tuple_element instructions to make
%%% multiple-field record updates faster.
%%%
%%% The expansion of record field updates, when more than one field is
%%% updated, but not a majority of the fields, will create a sequence of
%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the
%%% first call is the original record tuple, and in the subsequent calls
%%% Tuple is the result of the previous call. Furthermore, all Index
%%% values are constant positive integers, and the first call to
%%% `setelement` will have the greatest index. Thus all the following
%%% calls do not actually need to test at run-time whether Tuple has type
%%% tuple, nor that the index is within the tuple bounds.
%%%
%%% Since this optimization introduces destructive updates, it used to
%%% be done as the very last Core Erlang pass before going to
%%% lower-level code. However, it turns out that this kind of destructive
%%% updates are awkward also in SSA code and can prevent or complicate
%%% type analysis and aggressive optimizations.
%%%
%%% NOTE: Because there no write barriers in the system, this kind of
%%% optimization can only be done when we are sure that garbage
%%% collection will not be triggered between the creation of the tuple
%%% and the destructive updates - otherwise we might insert pointers
%%% from an older generation to a newer.
%%%
use_set_tuple_element(#st{ssa=Blocks0}=St) ->
Uses = count_uses(Blocks0),
RPO = reverse(beam_ssa:rpo(Blocks0)),
Blocks = use_ste_1(RPO, Uses, Blocks0),
St#st{ssa=Blocks}.
use_ste_1([L|Ls], Uses, Blocks0) ->
{Blk0,Blocks} = use_ste_across(L, Uses, Blocks0),
#b_blk{is=Is0} = Blk0,
case use_ste_is(Is0, Uses) of
Is0 ->
use_ste_1(Ls, Uses, Blocks);
Is ->
Blk = Blk0#b_blk{is=Is},
use_ste_1(Ls, Uses, Blocks#{L:=Blk})
end;
use_ste_1([], _, Blocks) -> Blocks.
%%% Optimize within a single block.
use_ste_is([#b_set{}=I|Is0], Uses) ->
Is = use_ste_is(Is0, Uses),
case extract_ste(I) of
none ->
[I|Is];
Extracted ->
use_ste_call(Extracted, I, Is, Uses)
end;
use_ste_is([], _Uses) -> [].
use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) ->
case get_ste_call(Is0, []) of
{Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is}
when Pos1 > 0, Pos0 > Pos1 ->
case is_single_use(Dst0, Uses) of
true ->
Call = Call1#b_set{dst=Dst1},
Args = [Val1,Dst1,#b_literal{val=Pos1-1}],
Dsetel = Call2#b_set{op=set_tuple_element,
dst=Dst0,
args=Args},
[Call|Prefix] ++ [Dsetel|Is];
false ->
[Call1|Is0]
end;
_ ->
[Call1|Is0]
end.
get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) ->
get_ste_call(Is, [I|Acc]);
get_ste_call([#b_set{op=call}=I|Is], Acc) ->
case extract_ste(I) of
none ->
none;
Extracted ->
{reverse(Acc),Extracted,I,Is}
end;
get_ste_call(_, _) -> none.
extract_ste(#b_set{op=call,dst=Dst,
args=[#b_remote{mod=#b_literal{val=M},
name=#b_literal{val=F}}|Args]}) ->
case {M,F,Args} of
{erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} ->
{Dst,Pos,Tuple,Val};
{_,_,_} ->
none
end;
extract_ste(#b_set{}) -> none.
%%% Optimize accross blocks within a try/catch block.
use_ste_across(L, Uses, Blocks) ->
case map_get(L, Blocks) of
#b_blk{last=#b_br{bool=#b_var{}}}=Blk ->
try
use_ste_across_1(L, Blk, Uses, Blocks)
catch
throw:not_possible ->
{Blk,Blocks}
end;
#b_blk{}=Blk ->
{Blk,Blocks}
end.
use_ste_across_1(L, Blk0, Uses, Blocks0) ->
#b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0,
case reverse(IsThis) of
[#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0,
#b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] ->
case is_single_use(Bool, Uses) andalso
is_n_uses(2, Result, Uses) of
true -> ok;
false -> throw(not_possible)
end,
Call2 = use_ste_across_next(Next, Uses, Blocks0),
Is = [Call1,Call2],
case use_ste_is(Is, decrement_uses(Result, Uses)) of
[#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] ->
Blocks1 = use_ste_fix_next(Ste, Next, Blocks0),
Succ = Succ0#b_set{args=[Call#b_set.dst]},
Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])},
Blocks = Blocks1#{L:=Blk},
{Blk,Blocks};
_ ->
throw(not_possible)
end;
_ ->
throw(not_possible)
end.
use_ste_across_next(Next, Uses, Blocks) ->
case map_get(Next, Blocks) of
#b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call,
#b_set{op=succeeded,dst=Bool,args=[Result]}],
last=#b_br{bool=Bool}} ->
case is_single_use(Bool, Uses) andalso
is_n_uses(2, Result, Uses) of
true -> ok;
false -> throw(not_possible)
end,
Call;
#b_blk{} ->
throw(not_possible)
end.
use_ste_fix_next(Ste, Next, Blocks) ->
Blk0 = map_get(Next, Blocks),
#b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0,
Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}),
Blk = Blk0#b_blk{is=[Ste],last=Br},
Blocks#{Next:=Blk}.
%% Count how many times each variable is used.
count_uses(Blocks) ->
count_uses_blk(maps:values(Blocks), #{}).
count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
F = fun(I, CountMap) ->
foldl(fun(Var, Acc) ->
case Acc of
#{Var:=3} -> Acc;
#{Var:=C} -> Acc#{Var:=C+1};
#{} -> Acc#{Var=>1}
end
end, CountMap, beam_ssa:used(I))
end,
CountMap = F(Last, foldl(F, CountMap0, Is)),
count_uses_blk(Bs, CountMap);
count_uses_blk([], CountMap) -> CountMap.
decrement_uses(V, Uses) ->
#{V:=C} = Uses,
Uses#{V:=C-1}.
is_n_uses(N, V, Uses) ->
case Uses of
#{V:=N} -> true;
#{} -> false
end.
is_single_use(V, Uses) ->
case Uses of
#{V:=1} -> true;
#{} -> false
end.
%%%
%%% Find out where frames should be placed.
%%%
%% place_frames(St0) -> St.
%% Return a list of the labels for the blocks that need stack frame
%% allocation instructions.
%%
%% This function attempts to place stack frames as tight as possible
%% around the code, to avoid building stack frames for code paths
%% that don't need one.
%%
%% Stack frames are placed in blocks that dominate all of their
%% descendants. That guarantees that the deallocation instructions
%% cannot be reached from other execution paths that didn't set up
%% a stack frame or set up a stack frame with a different size.
place_frames(#st{ssa=Blocks}=St) ->
{Doms,_} = beam_ssa:dominators(Blocks),
Ls = beam_ssa:rpo(Blocks),
Tried = gb_sets:empty(),
Frames0 = [],
{Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
St#st{frames=Frames}.
place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
Blk = map_get(L, Blocks),
case need_frame(Blk) of
true ->
%% This block needs a frame. Try to place it here.
{Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),
%% Successfully placed. Try to place more frames in descendants
%% that are not dominated by this block.
place_frames_1(Ls, Blocks, Doms, Tried, Frames);
false ->
try
place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
catch
throw:{need_frame,For,Tried1}=Reason ->
%% An descendant block needs a stack frame. Try to
%% place it here.
case is_dominated_by(For, L, Doms) of
true ->
%% Try to place a frame here.
{Frames,Tried} = do_place_frame(L, Blocks, Doms,
Tried1, Frames0),
place_frames_1(Ls, Blocks, Doms, Tried, Frames);
false ->
%% Wrong place. This block does not dominate
%% the block that needs the frame. Pass it on
%% to our ancestors.
throw(Reason)
end
end
end;
place_frames_1([], _, _, Tried, Frames) ->
{Frames,Tried}.
%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
%% Try to place a frame in this block. This function returns
%% successfully if it either succeds at placing a frame in this
%% block, if an ancestor that dominates this block has already placed
%% a frame, or if we have already tried to put a frame in this block.
%%
%% An {need_frame,Label,Tried} exception will be thrown if this block
%% block is not suitable for having a stack frame (i.e. it does not dominate
%% all of its descendants). The exception means that an ancestor will have to
%% place the frame needed by this block.
do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
case gb_sets:is_element(L, Tried0) of
true ->
%% We have already tried to put a frame in this block.
{Frames,Tried0};
false ->
%% Try to place a frame in this block.
Tried = gb_sets:insert(L, Tried0),
case place_frame_here(L, Blocks, Doms, Frames) of
yes ->
%% We need a frame and it is safe to place it here.
{[L|Frames],Tried};
no ->
%% An ancestor has a frame. Not needed.
{Frames,Tried};
ancestor ->
%% This block does not dominate all of its
%% descendants. We must place the frame in
%% an ancestor.
throw({need_frame,L,Tried})
end
end.
%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
%% Determine whether a frame should be placed in block Label.
place_frame_here(L, Blocks, Doms, Frames) ->
B0 = any(fun(DomBy) ->
is_dominated_by(L, DomBy, Doms)
end, Frames),
case B0 of
true ->
%% This block is dominated by an ancestor block that
%% defines a frame. Not needed/allowed to put a frame
%% here.
no;
false ->
%% No frame in any ancestor. We need a frame.
%% Now check whether the frame can be placed here.
%% If this block dominates all of its descendants
%% and the predecessors of any phi nodes it can be
%% placed here.
Descendants = beam_ssa:rpo([L], Blocks),
PhiPredecessors = phi_predecessors(L, Blocks),
MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
Dominates = all(fun(?BADARG_BLOCK) ->
%% This block defines no variables and calls
%% erlang:error(badarg). It does not matter
%% whether L dominates ?BADARG_BLOCK or not;
%% it is still safe to put the frame in L.
true;
(Bl) ->
is_dominated_by(Bl, L, Doms)
end, MustDominate),
%% Also, this block must not be a loop header.
IsLoopHeader = is_loop_header(L, Blocks),
case Dominates andalso not IsLoopHeader of
true -> yes;
false -> ancestor
end
end.
%% phi_predecessors(Label, Blocks) ->
%% Return all predecessors referenced in phi nodes.
phi_predecessors(L, Blocks) ->
#b_blk{is=Is} = map_get(L, Blocks),
[P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
%% Test whether block Label is dominated by block DominatedBy.
is_dominated_by(L, DomBy, Doms) ->
DominatedBy = map_get(L, Doms),
member(DomBy, DominatedBy).
%% need_frame(#b_blk{}) -> true|false.
%% Test whether any of the instructions in the block requires a stack frame.
need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
need_frame_1(Is, {return,Ret});
need_frame(#b_blk{is=Is}) ->
need_frame_1(Is, body).
need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
%% Since make_fun clobbers X registers, a stack frame is needed if
%% any of the following instructions use any other variable than
%% the one holding the reference to the created fun.
need_frame_1(Is, Context) orelse
case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
[Fun] -> false;
[_|_] -> true
end;
need_frame_1([#b_set{op=new_try_tag}|_], _) ->
true;
need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
if
Val =:= Ret -> need_frame_1(Is, tail);
true -> need_frame_1(Is, body)
end;
need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
case Func of
#b_remote{mod=#b_literal{val=Mod},
name=#b_literal{val=Name},
arity=Arity} when is_atom(Mod), is_atom(Name) ->
case erl_bifs:is_exit_bif(Mod, Name, Arity) of
true ->
false;
false ->
Context =:= body orelse
Is =/= [] orelse
is_trap_bif(Mod, Name, Arity)
end;
#b_remote{} ->
%% This is an apply(), which always needs a frame.
true;
#b_local{} ->
Context =:= body orelse Is =/= [];
_ ->
%% A fun call always needs a frame.
true
end;
need_frame_1([I|Is], Context) ->
beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
need_frame_1([], _) -> false.
%% is_trap_bif(Mod, Name, Arity) -> true|false.
%% Test whether we need a stack frame for this BIF.
is_trap_bif(erlang, '!', 2) -> true;
is_trap_bif(erlang, link, 1) -> true;
is_trap_bif(erlang, unlink, 1) -> true;
is_trap_bif(erlang, monitor_node, 2) -> true;
is_trap_bif(erlang, group_leader, 2) -> true;
is_trap_bif(erlang, exit, 2) -> true;
is_trap_bif(_, _, _) -> false.
%%%
%%% Fix variables used in matching in receive.
%%%
%%% The loop_rec/2 instruction may return a reference to a
%%% message outside of any heap or heap fragment. If the message
%%% does not match, it is not allowed to store any reference to
%%% the message (or part of the message) on the stack. If we do,
%%% the message will be corrupted if there happens to be a GC.
%%%
%%% Here we make sure to introduce copies of variables that are
%%% matched out and subsequently used after the remove_message/0
%%% instructions. That will make sure that only X registers are
%%% used during matching.
%%%
%%% Depending on where variables are defined and used, they must
%%% be handled in two different ways.
%%%
%%% Variables that are always defined in the receive (before branching
%%% out into the different clauses of the receive) and used after the
%%% receive must be handled in the following way: Before each
%%% remove_message instruction, each such variable must be copied, and
%%% all variables must be consolidated using a phi node in the
%%% common exit block for the receive.
%%%
%%% Variables that are matched out and used in the same clause
%%% need copy instructions before the remove_message instruction
%%% in that clause.
%%%
fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
{Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
Blocks0, Count0),
St#st{ssa=Blocks,cnt=Count}.
fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
case Blk of
#b_blk{is=[#b_set{op=peek_message}|_]} ->
Rm = find_rm_blocks(L, Blocks0),
LoopExit = find_loop_exit(Rm, Blocks0),
Defs0 = beam_ssa:def([L], Blocks0),
CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
{Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
Blocks0, Count0),
Defs = ordsets:subtract(Defs0, CommonUsed),
{Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
fix_receives_1(Ls, Blocks, Count);
#b_blk{} ->
fix_receives_1(Ls, Blocks0, Count0)
end;
fix_receives_1([], Blocks, Count) ->
{Blocks,Count}.
recv_common(_Defs, none, _Blocks) ->
%% There is no common exit block because receive is used
%% in the tail position of a function.
[];
recv_common(Defs, Exit, Blocks) ->
{ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
Def = ordsets:subtract(Defs, ExitDefs),
ordsets:intersection(Def, ExitUsed).
%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
%% Blocks0, Count0) -> {Blocks,Count}.
%% Handle variables alwys defined in a receive and used
%% in the exit block following the receive.
recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
{Msg,Count1} = new_var('@recv', Count0),
Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
N = length(Rm),
{MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
ExitBlk0 = map_get(Exit, Blocks1),
ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
Blocks2 = Blocks1#{Exit:=ExitBlk},
Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
recv_fix_common(T, Exit, Rm, Blocks, Count);
recv_fix_common([], _, _, Blocks, Count) ->
{Blocks,Count}.
recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
Ren = #{Msg=>V},
Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
#b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
Copy = #b_set{op=copy,dst=V,args=[Msg]},
Is = insert_after_phis(Is0, [Copy]),
Blk = Blk0#b_blk{is=Is},
Blocks = Blocks1#{Rm:=Blk},
recv_fix_common_1(Vs, Rms, Msg, Blocks);
recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
Path = beam_ssa:rpo([Rm], Blocks),
Preds = exit_predecessors(Path, Exit, Blocks),
[{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
fix_exit_phi_args([], [], _, _) -> [].
exit_predecessors([L|Ls], Exit, Blocks) ->
Blk = map_get(L, Blocks),
case member(Exit, beam_ssa:successors(Blk)) of
true ->
[L|exit_predecessors(Ls, Exit, Blocks)];
false ->
exit_predecessors(Ls, Exit, Blocks)
end;
exit_predecessors([], _Exit, _Blocks) -> [].
%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
%% Add a copy instruction for all variables that are matched out and
%% later used within a clause of the receive.
fix_receive([L|Ls], Defs, Blocks0, Count0) ->
{RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
Def = ordsets:subtract(Defs, RmDefs),
Used = ordsets:intersection(Def, Used0),
{NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
Ren = zip(Used, NewVars),
Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
#b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
Is = insert_after_phis(Is0, CopyIs),
Blk = Blk1#b_blk{is=Is},
Blocks = Blocks1#{L:=Blk},
fix_receive(Ls, Defs, Blocks, Count);
fix_receive([], _Defs, Blocks, Count) ->
{Blocks,Count}.
%% find_loop_exit([Label], Blocks) -> Label | none.
%% Find the block to which control is transferred when the
%% the receive loop is exited.
find_loop_exit([L1,L2|_Ls], Blocks) ->
Path1 = beam_ssa:rpo([L1], Blocks),
Path2 = beam_ssa:rpo([L2], Blocks),
find_loop_exit_1(reverse(Path1), reverse(Path2), none);
find_loop_exit(_, _) -> none.
find_loop_exit_1([H|T1], [H|T2], _) ->
find_loop_exit_1(T1, T2, H);
find_loop_exit_1(_, _, Exit) -> Exit.
%% find_rm_blocks(StartLabel, Blocks) -> [Label].
%% Find all blocks that start with remove_message within the receive
%% loop whose peek_message label is StartLabel.
find_rm_blocks(L, Blocks) ->
Seen = gb_sets:singleton(L),
Blk = map_get(L, Blocks),
Succ = beam_ssa:successors(Blk),
find_rm_blocks_1(Succ, Seen, Blocks).
find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
case gb_sets:is_member(L, Seen0) of
true ->
find_rm_blocks_1(Ls, Seen0, Blocks);
false ->
Seen = gb_sets:insert(L, Seen0),
Blk = map_get(L, Blocks),
case find_rm_act(Blk#b_blk.is) of
prune ->
%% Looping back. Don't look at any successors.
find_rm_blocks_1(Ls, Seen, Blocks);
continue ->
%% Neutral block. Do nothing here, but look at
%% all successors.
Succ = beam_ssa:successors(Blk),
find_rm_blocks_1(Succ++Ls, Seen, Blocks);
found ->
%% Found remove_message instruction.
[L|find_rm_blocks_1(Ls, Seen, Blocks)]
end
end;
find_rm_blocks_1([], _, _) -> [].
find_rm_act([#b_set{op=Op}|Is]) ->
case Op of
remove_message -> found;
peek_message -> prune;
recv_next -> prune;
wait_timeout -> prune;
wait -> prune;
_ -> find_rm_act(Is)
end;
find_rm_act([]) ->
continue.
%%%
%%% Find out which variables need to be stored in Y registers.
%%%
-record(dk, {d :: ordsets:ordset(var_name()),
k :: ordsets:ordset(var_name())
}).
%% find_yregs(St0) -> St.
%% Find all variables that must be stored in Y registers. Annotate
%% the blocks that allocate frames with the set of Y registers
%% used within that stack frame.
%%
%% Basically, we following all execution paths starting from a block
%% that allocates a frame, keeping track of of all defined registers
%% and all registers killed by an instruction that clobbers X
%% registers. For every use of a variable, we check if if it is in
%% the set of killed variables; if it is, it must be stored in an Y
%% register.
find_yregs(#st{frames=[]}=St) ->
St;
find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
Blocks = find_yregs_1(FrameDefs, Blocks0),
St#st{ssa=Blocks}.
find_yregs_1([{F,Defs}|Fs], Blocks0) ->
DK = #dk{d=Defs,k=[]},
D0 = #{F=>DK},
Ls = beam_ssa:rpo([F], Blocks0),
Yregs0 = [],
Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
Blk0 = map_get(F, Blocks0),
Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
Blocks = Blocks0#{F:=Blk},
find_yregs_1(Fs, Blocks);
find_yregs_1([], Blocks) -> Blocks.
find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
Blk0 = map_get(L, Blocks0),
#b_blk{is=Is,last=Last} = Blk0,
Ys0 = map_get(L, D0),
{Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
Yregs = find_yregs_terminator(Last, Ys, Yregs1),
Successors = beam_ssa:successors(Blk0),
D = find_update_succ(Successors, Ys, D0),
find_yregs_2(Ls, Blocks0, D, Yregs);
find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.
find_defs(Frames, Blocks, Defs) ->
Seen = gb_sets:empty(),
FramesSet = gb_sets:from_list(Frames),
{FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
FrameDefs.
find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
case gb_sets:is_member(L, Frames) of
true ->
OrderedDefs = ordsets:from_list(Defs0),
find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
[{L,OrderedDefs}|Acc0]);
false ->
case gb_sets:is_member(L, Seen0) of
true ->
find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
false ->
Seen1 = gb_sets:insert(L, Seen0),
{Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
#b_blk{is=Is} = Blk = map_get(L, Blocks),
Defs = find_defs_is(Is, Defs0),
Successors = beam_ssa:successors(Blk),
find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
end
end;
find_defs_1([], _, _, Seen, _, Acc) ->
{Acc,Seen}.
find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
find_defs_is(Is, [Dst|Acc]);
find_defs_is([], Acc) -> Acc.
find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
case D0 of
#{S:=#dk{d=Defs1,k=Killed1}} ->
Defs = ordsets:intersection(Defs0, Defs1),
Killed = ordsets:union(Killed0, Killed1),
DK = #dk{d=Defs,k=Killed},
D = D0#{S:=DK},
find_update_succ(Ss, DK0, D);
#{} ->
D = D0#{S=>DK0},
find_update_succ(Ss, DK0, D)
end;
find_update_succ([], _, D) -> D.
find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
Used = beam_ssa:used(I),
Yregs1 = ordsets:intersection(Used, Killed0),
Yregs = ordsets:union(Yregs0, Yregs1),
case beam_ssa:clobbers_xregs(I) of
false ->
Defs = ordsets:add_element(Dst, Defs0),
find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
true ->
Killed = ordsets:union(Defs0, Killed0),
Defs = [Dst],
find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
end;
find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.
find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
Used = beam_ssa:used(Terminator),
Yregs = ordsets:intersection(Used, Killed),
ordsets:union(Yregs0, Yregs).
%%%
%%% Try to reduce the size of the stack frame, by adding an explicit
%%% 'copy' instructions for return values from 'call' and 'make_fun' that
%%% need to be saved in Y registers. Here is an example to show
%%% how that's useful. First, here is the Erlang code:
%%%
%%% f(Pid) ->
%%% Res = foo(42),
%%% _ = node(Pid),
%%% bar(),
%%% Res.
%%%
%%% Compiled to SSA format, the main part of the code looks like this:
%%%
%%% 0:
%%% Res = call local literal foo/1, literal 42
%%% _1 = bif:node Pid
%%% @ssa_bool = succeeded _1
%%% br @ssa_bool, label 3, label 1
%%% 3:
%%% @ssa_ignored = call local literal bar/0
%%% ret Res
%%%
%%% It can be seen that the variables Pid and Res must be saved in Y
%%% registers in order to survive the function calls. A previous sub
%%% pass has inserted a 'copy' instruction to save the value of the
%%% variable Pid:
%%%
%%% 0:
%%% Pid:4 = copy Pid
%%% Res = call local literal foo/1, literal 42
%%% _1 = bif:node Pid:4
%%% @ssa_bool = succeeded _1
%%% br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%% @ssa_ignored = call local literal bar/0
%%% ret Res
%%%
%%% The Res and Pid:4 variables must be assigned to different Y registers
%%% because they are live at the same time. copy_retval() inserts a
%%% 'copy' instruction to copy Res to a new variable:
%%%
%%% 0:
%%% Pid:4 = copy Pid
%%% Res:6 = call local literal foo/1, literal 42
%%% _1 = bif:node Pid:4
%%% @ssa_bool = succeeded _1
%%% br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%% Res = copy Res:6
%%% @ssa_ignored = call local literal bar/0
%%% ret Res
%%%
%%% The new variable Res:6 is used to capture the return value from the call.
%%% The variables Pid:4 and Res are no longer live at the same time, so they
%%% can be assigned to the same Y register.
%%%
copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
{Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
St#st{ssa=Blocks,cnt=Count}.
copy_retval_1([F|Fs], Blocks0, Count0) ->
#b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
Yregs1 = gb_sets:from_list(Yregs0),
Yregs = collect_yregs(Is, Yregs1),
Ls = beam_ssa:rpo([F], Blocks0),
{Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
copy_retval_1(Fs, Blocks, Count);
copy_retval_1([], Blocks, Count) ->
{Blocks,Count}.
collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
Yregs0) ->
true = gb_sets:is_member(X, Yregs0), %Assertion.
Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
collect_yregs(Is, Yregs);
collect_yregs([#b_set{}|Is], Yregs) ->
collect_yregs(Is, Yregs);
collect_yregs([], Yregs) -> Yregs.
copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
#b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
RC = case {Last,Ls} of
{#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
true;
{_,_} ->
false
end,
case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
{Is,Count} ->
case Copy0 =:= none andalso Count0 =:= Count of
true ->
copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
false ->
Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
copy_retval_2(Ls, Yregs, none, Blocks, Count)
end;
{Is,Count,Copy} ->
Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
end;
copy_retval_2([], _Yregs, none, Blocks, Count) ->
{Blocks,Count}.
copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
Copy, Count, Acc) ->
I = I0#b_set{args=copy_sub_args(Args0, Copy)},
{reverse(Acc, [I|acc_copy([], Copy)]),Count};
copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
when Op =:= call; Op =:= make_fun ->
{I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
{reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
{reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
{I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
case gb_sets:is_member(Dst, Yregs) of
true ->
{NewVar,Count} = new_var(RetName, Count1),
Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
I = I1#b_set{dst=NewVar},
copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
false ->
copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
end;
copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
I = I0#b_set{args=copy_sub_args(Args0, Copy)},
case beam_ssa:clobbers_xregs(I) of
true ->
copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
false ->
copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
end;
copy_retval_is([], RC, _, Copy, Count, Acc) ->
case {Copy,RC} of
{none,_} ->
{reverse(Acc),Count};
{#b_set{},true} ->
{reverse(Acc),Count,Copy};
{#b_set{},false} ->
{reverse(Acc, [Copy]),Count}
end.
%%
%% Consider this code:
%%
%% Var = ...
%% ...
%% A1 = call foo/0
%% A = copy A1
%% B = call bar/1, Var
%%
%% If the Var variable is no longer used after this code, its Y register
%% can't be reused for A. To allow the Y register to be reused
%% we will need to insert 'copy' instructions for arguments that are
%% in Y registers:
%%
%% Var = ...
%% ...
%% A1 = call foo/0
%% Var1 = copy Var
%% A = copy A1
%% B = call bar/1, Var1
%%
place_retval_copy(I, _Yregs, none, Count, Acc) ->
{I,Count,Acc};
place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
#b_set{dst=Avoid} = Copy,
{Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
Acc = [Copy|Acc1],
{I#b_set{args=[F|Args]},Count,Acc}.
copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
case gb_sets:is_member(A, Yregs) of
true when A =/= Avoid ->
{NewVar,Count} = new_var(AName, Count0),
Copy = #b_set{op=copy,dst=NewVar,args=[A]},
copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
_ ->
copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
end;
copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
{reverse(Acc),CopyAcc,Count}.
acc_copy(Acc, none) -> Acc;
acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
copy_sub_args(Args, none) ->
Args;
copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
[sub_arg(A, Dst, Src) || A <- Args].
sub_arg(Old, Old, New) -> New;
sub_arg(Old, _, _) -> Old.
%%%
%%% Consider:
%%%
%%% x1/Hd = get_hd x0/Cons
%%% y0/Tl = get_tl x0/Cons
%%%
%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
%%% a 'move' instruction must be inserted.
%%%
%%% If we swap get_hd and get_tl when Tl is in a Y register,
%%% x0 can be used for Hd if Cons is not used again:
%%%
%%% y0/Tl = get_tl x0/Cons
%%% x0/Hd = get_hd x0/Cons
%%%
opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
ResMap = maps:from_list(Res),
Ls = beam_ssa:rpo(Blocks),
St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
opt_get_list_1([L|Ls], Res, Blocks0) ->
#b_blk{is=Is0} = Blk = map_get(L, Blocks0),
case opt_get_list_is(Is0, Res, [], false) of
no ->
opt_get_list_1(Ls, Res, Blocks0);
{yes,Is} ->
Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
opt_get_list_1(Ls, Res, Blocks)
end;
opt_get_list_1([], _, Blocks) -> Blocks.
opt_get_list_is([#b_set{op=get_hd,dst=Hd,
args=[Cons]}=GetHd,
#b_set{op=get_tl,dst=Tl,
args=[Cons]}=GetTl|Is],
Res, Acc, Changed) ->
%% Note that when this pass is run, only Y registers have
%% reservations. The absence of an entry for a variable therefore
%% means that the variable will be in an X register.
case Res of
#{Hd:={y,_}} ->
%% Hd will be in a Y register. Don't swap.
opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
#{Tl:={y,_}} ->
%% Tl will be in a Y register. Swap.
opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
#{} ->
%% Both are in X registers. Nothing to do.
opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
end;
opt_get_list_is([I|Is], Res, Acc, Changed) ->
opt_get_list_is(Is, Res, [I|Acc], Changed);
opt_get_list_is([], _Res, Acc, Changed) ->
case Changed of
true ->
{yes,reverse(Acc)};
false ->
no
end.
%%%
%%% Number instructions in the order they are executed.
%%%
%% number_instructions(St0) -> St.
%% Number instructions in the order they are executed. Use a step
%% size of 2. Don't number phi instructions. All phi variables in
%% a block will be live one unit before the first non-phi instruction
%% in the block.
number_instructions(#st{ssa=Blocks0}=St) ->
Ls = beam_ssa:rpo(Blocks0),
St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
number_is_1([L|Ls], N0, Blocks0) ->
#b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
{Is,N1} = number_is_2(Is0, N0, []),
Last = beam_ssa:add_anno(n, N1, Last0),
N = N1 + 2,
Bl = Bl0#b_blk{is=Is,last=Last},
Blocks = Blocks0#{L:=Bl},
number_is_1(Ls, N, Blocks);
number_is_1([], _, Blocks) -> Blocks.
number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
number_is_2(Is, N, [I|Acc]);
number_is_2([I0|Is], N, Acc) ->
I = beam_ssa:add_anno(n, N, I0),
number_is_2(Is, N+2, [I|Acc]);
number_is_2([], N, Acc) ->
{reverse(Acc),N}.
%%%
%%% Calculate live intervals.
%%%
live_intervals(#st{args=Args,ssa=Blocks}=St) ->
Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
LiveMap0 = #{},
Acc0 = {[],LiveMap0},
{Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
Intervals = merge_ranges(rel2fam(Vars0++Vars)),
St#st{intervals=Intervals}.
merge_ranges([{V,Rs}|T]) ->
[{V,merge_ranges_1(Rs)}|merge_ranges(T)];
merge_ranges([]) -> [].
merge_ranges_1([{A,N},{N,Z}|Rs]) ->
merge_ranges_1([{A,Z}|Rs]);
merge_ranges_1([R|Rs]) ->
[R|merge_ranges_1(Rs)];
merge_ranges_1([]) -> [].
live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
Live0 = [],
Successors = beam_ssa:successors(L, Blocks),
Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),
%% Add ranges for all variables that are live in the successors.
#b_blk{is=Is,last=Last} = map_get(L, Blocks),
End = beam_ssa:get_anno(n, Last),
Use = [{V,{use,End+1}} || V <- Live1],
%% Determine used and defined variables in this block.
FirstNumber = first_number(Is, Last),
UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
UseDef = rel2fam(UseDef0),
%% Update what is live at the beginning of this block and
%% store it.
Used = [V || {V,[{use,_}|_]} <- UseDef],
Live2 = ordsets:union(Live1, Used),
Killed = [V || {V,[{def,_}|_]} <- UseDef],
Live = ordsets:subtract(Live2, Killed),
LiveMap = LiveMap0#{L=>Live},
%% Construct the ranges for this block.
Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
{Vars,LiveMap}.
make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
{use,Last} = last(Uses),
make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
{use,Last} = last(Uses),
make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
make_block_ranges([], _, Acc) -> Acc.
live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
Acc = [{Dst,{def,FirstNumber}}|Acc0],
live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
FirstNumber, Acc0) ->
N = beam_ssa:get_anno(n, I),
#b_set{dst=Dst} = I,
Acc1 = [{Dst,{def,N}}|Acc0],
Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
N = beam_ssa:get_anno(n, I),
Acc1 = case I of
#b_set{dst=Dst} ->
[{Dst,{def,N}}|Acc0];
_ ->
Acc0
end,
Used = beam_ssa:used(I),
Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([], _FirstNumber, Acc) ->
Acc.
%% first_number([#b_set{}]) -> InstructionNumber.
%% Return the number for the first instruction for the block.
%% Note that this number is one less than the first
%% non-phi instruction in the block.
first_number([#b_set{op=phi}|Is], Last) ->
first_number(Is, Last);
first_number([I|_], _) ->
beam_ssa:get_anno(n, I) - 1;
first_number([], Last) ->
beam_ssa:get_anno(n, Last) - 1.
update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
#b_blk{is=Is} = map_get(L, Blocks),
Live = update_live_phis(Is, Pred, Live1),
update_successors(Ls, Pred, Blocks, LiveMap, Live);
update_successors([], _, _, _, Live) -> Live.
get_live(L, LiveMap) ->
case LiveMap of
#{L:=Live} -> Live;
#{} -> []
end.
update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
Pred, Live0) ->
Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
Live1 = ordsets:union(ordsets:from_list(Used), Live0),
Live = ordsets:del_element(Killed, Live1),
update_live_phis(Is, Pred, Live);
update_live_phis(_, _, Live) -> Live.
%%%
%%% Reserve Y registers.
%%%
%% reserve_yregs(St0) -> St.
%% In each block that allocates a stack frame, insert instructions
%% that copy variables that must be in Y registers (given by
%% the `yregs` annotation) to new variables.
%%
%% Also allocate specific Y registers for try and catch tags.
%% The outermost try/catch tag is placed in y0, any directly
%% nested tag in y1, and so on. Note that this is the reversed
%% order as required by BEAM; it will be corrected later by
%% turn_yregs().
reserve_yregs(#st{frames=Frames}=St0) ->
foldl(fun reserve_yregs_1/2, St0, Frames).
reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
Blk = map_get(L, Blocks0),
Yregs = beam_ssa:get_anno(yregs, Blk),
{Def,Used} = beam_ssa:def_used([L], Blocks0),
UsedYregs = ordsets:intersection(Yregs, Used),
DefBefore = ordsets:subtract(UsedYregs, Def),
{BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
InsideVars = ordsets:subtract(UsedYregs, DefBefore),
ResTryTags0 = reserve_try_tags(L, Blocks),
ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
Vars = BeforeVars ++ InsideVars,
Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
St#st{res=Res,ssa=Blocks,cnt=Count+1}.
reserve_try_tags(L, Blocks) ->
Seen = gb_sets:empty(),
{Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
ordsets:from_list(Res).
reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
case gb_sets:is_element(L, Seen0) of
true ->
reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
false ->
Seen1 = gb_sets:insert(L, Seen0),
#b_blk{is=Is} = Blk = map_get(L, Blocks),
Active0 = get_active(L, ActMap0),
Active = reserve_try_tags_is(Is, Active0),
Successors = beam_ssa:successors(Blk),
ActMap1 = update_act_map(Successors, Active, ActMap0),
{ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
end;
reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
{ActMap,Seen}.
get_active(L, ActMap) ->
case ActMap of
#{L:=Active} -> Active;
#{} -> #{}
end.
reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
N = map_size(Active),
reserve_try_tags_is(Is, Active#{V=>N});
reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
reserve_try_tags_is(Is, maps:remove(Tag, Active));
reserve_try_tags_is([_|Is], Active) ->
reserve_try_tags_is(Is, Active);
reserve_try_tags_is([], Active) -> Active.
update_act_map([L|Ls], Active0, ActMap0) ->
case ActMap0 of
#{L:=Active1} ->
ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
update_act_map(Ls, Active0, ActMap);
#{} ->
ActMap = ActMap0#{L=>Active0},
update_act_map(Ls, Active0, ActMap)
end;
update_act_map([], _, ActMap) -> ActMap.
rename_vars([], _, Blocks, Count) ->
{[],Blocks,Count};
rename_vars(Vs, L, Blocks0, Count0) ->
{NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
Ren = zip(Vs, NewVars),
Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
#b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
Is = insert_after_phis(Is0, CopyIs),
Blk = Blk0#b_blk{is=Is},
Blocks = Blocks1#{L:=Blk},
{NewVars,Blocks,Count}.
insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
[I|insert_after_phis(Is, InsertIs)];
insert_after_phis(Is, InsertIs) ->
InsertIs ++ Is.
%% frame_size(St0) -> St.
%% Calculate the frame size for each block that allocates a frame.
%% Annotate the block with the frame size. Also annotate all
%% return instructions with {deallocate,FrameSize} to simplify
%% code generation.
frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
Blocks = foldl(fun(L, Blks) ->
frame_size_1(L, Regs, Blks)
end, Blocks0, Frames),
St#st{ssa=Blocks}.
frame_size_1(L, Regs, Blocks0) ->
Def = beam_ssa:def([L], Blocks0),
Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
Yregs = ordsets:from_list(Yregs0),
FrameSize = length(ordsets:from_list(Yregs)),
if
FrameSize =/= 0 ->
[{y,0}|_] = Yregs, %Assertion.
{y,Last} = last(Yregs),
Last = FrameSize - 1, %Assertion.
ok;
true ->
ok
end,
Blk0 = map_get(L, Blocks0),
Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
%% Insert an annotation for frame deallocation on
%% each #b_ret{}.
Blocks = Blocks0#{L:=Blk},
Reachable = beam_ssa:rpo([L], Blocks),
frame_deallocate(Reachable, FrameSize, Blocks).
frame_deallocate([L|Ls], Size, Blocks0) ->
Blk0 = map_get(L, Blocks0),
Blk = case Blk0 of
#b_blk{last=#b_ret{}=Ret0} ->
Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
Blk0#b_blk{last=Ret};
#b_blk{} ->
Blk0
end,
Blocks = Blocks0#{L:=Blk},
frame_deallocate(Ls, Size, Blocks);
frame_deallocate([], _, Blocks) -> Blocks.
%% turn_yregs(St0) -> St.
%% Renumber y registers so that {y,0} becomes {y,FrameSize-1},
%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
%% catches work. The register allocator (linear_scan()) has given
%% a lower number to the outermost catch.
turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
Regs1 = foldl(fun(L, A) ->
Blk = map_get(L, Blocks),
FrameSize = beam_ssa:get_anno(frame_size, Blk),
Def = beam_ssa:def([L], Blocks),
[turn_yregs_1(Def, FrameSize, Regs0)|A]
end, [], Frames),
Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
St#st{regs=Regs}.
turn_yregs_1(Def, FrameSize, Regs) ->
Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
Yregs1 = rel2fam(Yregs0),
FrameSize = length(Yregs1),
Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
R0 = sofs:family(Yregs2),
R1 = sofs:family_to_relation(R0),
R = sofs:converse(R1),
sofs:to_external(R).
%%%
%%% Reserving registers before register allocation.
%%%
%% reserve_regs(St0) -> St.
%% Reserve registers prior to register allocation. Y registers
%% have already been reserved. This function will reserve z,
%% fr, and specific x registers.
reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
%% Reserve x0, x1, and so on for the function arguments.
Res1 = reserve_arg_regs(Args, 0, Res0),
%% Reserve Z registers (dummy registers) for instructions with no
%% return values (e.g. remove_message) or pseudo-return values
%% (e.g. landingpad).
Res2 = reserve_zregs(Blocks, Intervals, Res1),
%% Reserve float registers.
Res3 = reserve_fregs(Blocks, Res2),
%% Reserve all remaining unreserved variables as X registers.
Res = maps:from_list(Res3),
St#st{res=reserve_xregs(Blocks, Res)}.
reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.
reserve_zregs(Blocks, Intervals, Res) ->
ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
ShortLived = cerl_sets:from_list(ShortLived0),
F = fun(_, #b_blk{is=Is,last=Last}, A) ->
reserve_zreg(Is, Last, ShortLived, A)
end,
beam_ssa:fold_rpo(F, [0], Res, Blocks).
reserve_zreg([#b_set{op=Op,dst=Dst}],
#b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
Op =:= get_tuple_element ->
%% If type optimization has determined that the result of these
%% instructions can be used directly in a branch, we must avoid reserving a
%% z register or code generation will fail.
A;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
#b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
case {Val,Last} of
{#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
%% These two instructions can be combined to a test_arity
%% instruction provided that the arity variable is short-lived.
reserve_zreg_1(Dst, ShortLived, A0);
{_,_} ->
%% Either the arity is too big, or the boolean value is not
%% used in a conditional branch.
A0
end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
#b_switch{}, ShortLived, A) ->
reserve_zreg_1(Dst, ShortLived, A);
reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
%% There is no short, easy way to rewrite 'xor' to a series of
%% test instructions.
A;
reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
%% There is no short, easy way to rewrite is_record/2 to a series of
%% test instructions.
A;
reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
IsZReg = case Op of
bs_match_string -> true;
bs_save -> true;
bs_restore -> true;
bs_set_position -> true;
{float,clearerror} -> true;
kill_try_tag -> true;
landingpad -> true;
put_tuple_elements -> true;
remove_message -> true;
set_tuple_element -> true;
succeeded -> true;
timeout -> true;
wait_timeout -> true;
_ -> false
end,
A = case IsZReg of
true -> [{Dst,z}|A0];
false -> A0
end,
reserve_zreg(Is, Last, ShortLived, A);
reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
reserve_zreg_1(Bool, ShortLived, A);
reserve_zreg([], _, _, A) -> A.
reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
case cerl_sets:is_element(V, ShortLived) of
true -> [{V,z}|A];
false -> A
end;
reserve_zreg_1(#b_literal{}, _, A) -> A.
reserve_fregs(Blocks, Res) ->
F = fun(_, #b_blk{is=Is}, A) ->
reserve_freg(Is, A)
end,
beam_ssa:fold_rpo(F, [0], Res, Blocks).
reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
case Op of
get ->
reserve_freg(Is, Res);
_ ->
reserve_freg(Is, [{V,fr}|Res])
end;
reserve_freg([_|Is], Res) ->
reserve_freg(Is, Res);
reserve_freg([], Res) -> Res.
%% reserve_xregs(St0) -> St.
%% Reserve all remaining variables as X registers.
%%
%% If a variable will need to be in a specific X register for a
%% 'call' or 'make_fun' (and there is nothing that will kill it
%% between the definition and use), reserve the register using a
%% {prefer,{x,X} annotation. That annotation means that the linear
%% scan algorithm will place the variable in the preferred register,
%% unless that register is already occupied.
%%
%% All remaining variables are reserved as X registers. Linear scan
%% will allocate the lowest free X register for the variable.
reserve_xregs(Blocks, Res) ->
Ls = reverse(beam_ssa:rpo(Blocks)),
reserve_xregs(Ls, Blocks, #{}, Res).
reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
#b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
%% Calculate mapping from variable name to the preferred
%% register.
Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
%% We need to figure out where the code generator will
%% place instructions that will do a garbage collection.
%% Insert 'gc' markers as pseudo-instructions in the
%% instruction sequence.
Is1 = reverse(Is0),
Is2 = res_place_gc_instrs(Is1, []),
Is = res_place_allocate(Anno, Is2),
%% Add register hints for variables that are defined
%% in the (reversed) instruction sequence.
{Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
XsMap = XsMap0#{L=>Xs},
reserve_xregs(Ls, Blocks, XsMap, Res);
reserve_xregs([], _, _, Res) -> Res.
%% Insert explicit 'gc' markers points where there will
%% be a garbage collection. (Note that the instruction
%% sequence passed to this function is reversed.)
res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
res_place_gc_instrs(Is, [I|Acc]);
res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
when Op =:= call; Op =:= make_fun ->
case Acc of
[] ->
res_place_gc_instrs(Is, [I|Acc]);
[GC|_] when GC =:= gc; GC =:= test_heap ->
res_place_gc_instrs(Is, [I,gc|Acc]);
[_|_] ->
res_place_gc_instrs(Is, [I,gc|Acc])
end;
res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
case beam_ssa_codegen:classify_heap_need(Op, Args) of
neutral ->
case Acc0 of
[test_heap|Acc] ->
res_place_gc_instrs(Is, [test_heap,I|Acc]);
Acc ->
res_place_gc_instrs(Is, [I|Acc])
end;
{put,_} ->
case Acc0 of
[test_heap|Acc] ->
res_place_gc_instrs(Is, [test_heap,I|Acc]);
Acc ->
res_place_gc_instrs(Is, [test_heap,I|Acc])
end;
_ ->
res_place_gc_instrs(Is, [gc,I|Acc0])
end;
res_place_gc_instrs([], Acc) ->
%% Reverse and replace 'test_heap' markers with 'gc'.
%% (The distinction is no longer useful.)
res_place_gc_instrs_rev(Acc, []).
res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([test_heap|Is], Acc) ->
res_place_gc_instrs_rev(Is, [gc|Acc]);
res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([I|Is], Acc) ->
res_place_gc_instrs_rev(Is, [I|Acc]);
res_place_gc_instrs_rev([], Acc) -> Acc.
res_place_allocate(#{yregs:=_}, Is) ->
%% There will be an 'allocate' instruction inserted here.
Is ++ [gc];
res_place_allocate(#{}, Is) -> Is.
reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
%% At this point, the code generator will place an instruction
%% that does a garbage collection. We must prune the remembered
%% registers.
Xs = res_xregs_prune(Xs0, Used, Res),
reserve_xregs_is(Is, Res, Xs, Used);
reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
Res = reserve_xreg(Dst, Xs0, Res0),
Used1 = ordsets:union(Used0, beam_ssa:used(I)),
Used = ordsets:del_element(Dst, Used1),
case Op of
call ->
Xs = reserve_call_args(tl(Args)),
reserve_xregs_is(Is, Res, Xs, Used);
make_fun ->
Xs = reserve_call_args(tl(Args)),
reserve_xregs_is(Is, Res, Xs, Used);
_ ->
reserve_xregs_is(Is, Res, Xs0, Used)
end;
reserve_xregs_is([], Res, Xs, _Used) ->
{Res,Xs}.
%% Pick up register hints from the successors of this blocks.
reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
_Blocks, XsMap, _Res) ->
%% We know that no variables are used at ?BADARG_BLOCK, so
%% any register hints from the success blocks are safe to use.
map_get(Succ, XsMap);
reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
Blocks, XsMap, Res) when Succ =/= Fail ->
#{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
case {SuccBlk,FailBlk} of
{#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
%% Both branches ultimately transfer to the same
%% block (via two blocks with no instructions).
%% Pick up register hints from the phi nodes
%% in the common block.
#{PhiL:=#b_blk{is=PhiIs}} = Blocks,
Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
res_xregs_from_phi(PhiIs, Fail, Res, Xs);
{_,_} when Is =/= [] ->
case last(Is) of
#b_set{op=succeeded,args=[Arg]} ->
%% We know that Arg will not be used at the failure
%% label, so we can pick up register hints from the
%% success label.
Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
#{Arg:=Reg} -> #{Arg=>Reg};
#{} -> #{}
end;
_ ->
%% Register hints from the success block may not
%% be safe at the failure block, and vice versa.
#{}
end;
{_,_} ->
%% Register hints from the success block may not
%% be safe at the failure block, and vice versa.
#{}
end;
reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
Blocks, XsMap, Res) ->
case map_get(Succ, Blocks) of
#b_blk{is=[],last=Last} ->
reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
#b_blk{is=[_|_]=PhiIs} ->
res_xregs_from_phi(PhiIs, L, Res, #{})
end;
reserve_terminator(_, _, _, _, _, _) -> #{}.
%% Pick up a reservation from a phi node.
res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
Pred, Res, Acc) ->
case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
[] ->
%% The value of the phi node for this predecessor
%% is a literal. Nothing to do here.
res_xregs_from_phi(Is, Pred, Res, Acc);
[V] ->
case Res of
#{Dst:={prefer,Reg}} ->
%% Try placing V in the same register as for
%% the phi node.
res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
#{Dst:=_} ->
res_xregs_from_phi(Is, Pred, Res, Acc)
end
end;
res_xregs_from_phi(_, _, _, Acc) -> Acc.
reserve_call_args(Args) ->
reserve_call_args(Args, 0, #{}).
reserve_call_args([#b_var{}=Var|As], X, Xs) ->
reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
reserve_call_args([#b_literal{}|As], X, Xs) ->
reserve_call_args(As, X+1, Xs);
reserve_call_args([], _, Xs) -> Xs.
reserve_xreg(V, Xs, Res) ->
case Res of
#{V:=_} ->
%% Already reserved (but not as an X register).
Res;
#{} ->
case Xs of
#{V:=X} ->
%% Add a hint that this specific X register is
%% preferred, unless it is already in use.
Res#{V=>{prefer,X}};
#{} ->
%% Reserve as an X register in general.
Res#{V=>x}
end
end.
%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
%% Prune the list of preferred registers, to make sure that
%% there are no "holes" (uninitialized X registers) when
%% invoking the garbage collector.
res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
%% The number of safe registers is the number of the X registers
%% used after this point. The actual number of safe registers may
%% be higher than this number, but this is a conservative safe
%% estimate.
NumSafe = foldl(fun(V, N) ->
case Res of
#{V:={x,_}} -> N + 1;
#{V:=_} -> N;
#{} -> N + 1
end
end, 0, Used),
%% Remove unsafe registers from the list of potential
%% preferred registers.
maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
res_xregs_prune(Xs, _Used, _Res) -> Xs.
%%%
%%% Register allocation using linear scan.
%%%
-record(i,
{sort=1 :: instr_number(),
reg=none :: i_reg(),
pool=x :: pool_id(),
var=#b_var{} :: b_var(),
rs=[] :: [range()]
}).
-record(l,
{cur=#i{} :: interval(),
unhandled_res=[] :: [interval()],
unhandled_any=[] :: [interval()],
active=[] :: [interval()],
inactive=[] :: [interval()],
free=#{} :: #{var_name()=>pool(),
{'next',pool_id()}:=reg_num()},
regs=[] :: [{b_var(),ssa_register()}]
}).
-type interval() :: #i{}.
-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
-type pool() :: ordsets:ordset(ssa_register()).
linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
St = St0#st{intervals=[],res=[]},
Free = init_free(maps:to_list(Res)),
Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
Intervals = sort(Intervals1),
IsReserved = fun(#i{reg=Reg}) ->
case Reg of
none -> false;
{prefer,{_,_}} -> false;
{_,_} -> true
end
end,
{UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
L = #l{unhandled_res=UnhandledRes,
unhandled_any=Unhandled,free=Free},
#l{regs=Regs} = do_linear(L),
St#st{regs=maps:from_list(Regs)}.
init_interval({V,[{Start,_}|_]=Rs}, Res) ->
Info = map_get(V, Res),
Pool = case Info of
{prefer,{x,_}} -> x;
x -> x;
{x,_} -> x;
{y,Uniq} -> Uniq;
{{y,_},Uniq} -> Uniq;
z -> z;
fr -> fr
end,
Reg = case Info of
{prefer,{x,_}} -> Info;
{x,_} -> Info;
{{y,_}=Y,_} -> Y;
_ -> none
end,
#i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.
init_free(Res) ->
Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
#{x:=Xs0} = Free1 = maps:from_list(Free0),
Xs = init_xregs(Xs0),
Free = Free1#{x:=Xs},
Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
maps:merge(Free, maps:from_list(Next)).
init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
[{x,Reg}|init_free_1(Res)];
init_free_1([{_,{x,_}=Reg}|Res]) ->
[{x,Reg}|init_free_1(Res)];
init_free_1([{_,{y,Uniq}}|Res]) ->
[{Uniq,{y,0}}|init_free_1(Res)];
init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
[{Uniq,Reg}|init_free_1(Res)];
init_free_1([{_,z}|Res]) ->
[{z,{z,0}}|init_free_1(Res)];
init_free_1([{_,fr}|Res]) ->
[{fr,{fr,0}}|init_free_1(Res)];
init_free_1([{_,x}|Res]) ->
init_free_1(Res);
init_free_1([]) -> [].
%% Make sure that the pool of xregs is contiguous.
init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
[{x,N}|init_xregs([{x,M}|Is])];
init_xregs([{x,N}|[{x,_}|_]=Is]) ->
[{x,N}|init_xregs([{x,N+1}|Is])];
init_xregs([{x,_}]=Is) -> Is.
do_linear(L0) ->
case set_next_current(L0) of
done ->
L0;
L1 ->
L2 = expire_active(L1),
L3 = check_inactive(L2),
Available = collect_available(L3),
L4 = select_register(Available, L3),
L = make_cur_active(L4),
do_linear(L)
end.
set_next_current(#l{unhandled_res=[Cur1|T1],
unhandled_any=[Cur2|T2]}=L) ->
case {Cur1,Cur2} of
{#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
L#l{cur=Cur1,unhandled_res=T1};
{_,_} ->
L#l{cur=Cur2,unhandled_any=T2}
end;
set_next_current(#l{unhandled_res=[],
unhandled_any=[Cur|T]}=L) ->
L#l{cur=Cur,unhandled_any=T};
set_next_current(#l{unhandled_res=[Cur|T],
unhandled_any=[]}=L) ->
L#l{cur=Cur,unhandled_res=T};
set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
done.
expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
{Act,L} = expire_active(Act0, CurBegin, L0, []),
L#l{active=Act}.
expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
{_,_} = Reg, %Assertion.
case overlap_status(Rs0, CurBegin) of
ends_before_cur ->
L = free_reg(I, L0),
expire_active(Is, CurBegin, L, Acc);
overlapping ->
expire_active(Is, CurBegin, L0, [I|Acc]);
not_overlapping ->
Rs = strip_before_current(Rs0, CurBegin),
L1 = free_reg(I, L0),
L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
expire_active(Is, CurBegin, L, Acc)
end;
expire_active([], _CurBegin, L, Acc) ->
{Acc,L}.
check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
{InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
L#l{inactive=InAct}.
check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
case overlap_status(Rs0, CurBegin) of
ends_before_cur ->
check_inactive(Is, CurBegin, L0, Acc);
not_overlapping ->
check_inactive(Is, CurBegin, L0, [I|Acc]);
overlapping ->
Rs = strip_before_current(Rs0, CurBegin),
L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
L = reserve_reg(I, L1),
check_inactive(Is, CurBegin, L, Acc)
end;
check_inactive([], _CurBegin, L, Acc) ->
{Acc,L}.
strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
strip_before_current(Rs, CurBegin);
strip_before_current(Rs, _CurBegin) -> Rs.
collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
%% Use the preferred register if it is available.
Avail = collect_available(L#l{cur=I#i{reg=none}}),
case member(Prefer, Avail) of
true -> [Prefer];
false -> Avail
end;
collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
%% Return the already reserved register.
[ReservedReg];
collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
Free = get_pool(Cur, L),
%% Note that since the live intervals are constructed from
%% SSA form, there cannot be any overlap of the current interval
%% with any inactive interval. See [3], page 175. Therefore we
%% only have check the unhandled intervals for overlap with
%% the current interval. As a further optimization, we only need
%% to check the intervals that have reserved registers.
collect_available(Unhandled, Cur, Free).
collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
when Pool1 =/= Pool2 ->
%% Wrong pool. Ignore this interval.
collect_available(Is, Cur, Free);
collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
case overlaps(I, Cur) of
true ->
Free = ordsets:del_element(Reg, Free0),
collect_available(Is, Cur, Free);
false ->
collect_available(Is, Cur, Free0)
end;
collect_available([], _, Free) -> Free.
select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
Cur = Cur0#i{reg=Reg},
reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
%% Allocate a new register in the pool.
{Reg,L1} = get_next_free(Cur0, L0),
Cur = Cur0#i{reg=Reg},
L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
reserve_reg(Cur, L).
make_cur_active(#l{cur=Cur,active=Act}=L) ->
L#l{active=[Cur|Act]}.
overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
are_overlapping(Rs1, Rs2).
overlap_status([{S,E}], CurBegin) ->
if
E =< CurBegin -> ends_before_cur;
CurBegin < S -> not_overlapping;
true -> overlapping
end;
overlap_status([{S,E}|Rs], CurBegin) ->
if
E =< CurBegin ->
overlap_status(Rs, CurBegin);
S =< CurBegin ->
overlapping;
true ->
not_overlapping
end.
reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
FreeRegs0 = get_pool(I, L),
FreeRegs = ordsets:del_element(Reg, FreeRegs0),
update_pool(I, FreeRegs, L).
free_reg(#i{reg={_,_}=Reg}=I, L) ->
FreeRegs0 = get_pool(I, L),
FreeRegs = ordsets:add_element(Reg, FreeRegs0),
update_pool(I, FreeRegs, L).
get_pool(#i{pool=Pool}, #l{free=Free}) ->
map_get(Pool, Free).
update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
Free = Free0#{Pool:=New},
L#l{free=Free}.
get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
K = {next,Pool},
N = map_get(K, Free0),
Free = Free0#{K:=N+1},
L = L0#l{free=Free},
if
is_integer(Pool) -> {{y,N},L};
is_atom(Pool) -> {{Pool,N},L}
end.
%%%
%%% Interval utilities.
%%%
are_overlapping([R|Rs1], Rs2) ->
case are_overlapping_1(R, Rs2) of
true ->
true;
false ->
are_overlapping(Rs1, Rs2)
end;
are_overlapping([], _) -> false.
are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
false;
are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
(S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
are_overlapping_1({_,_}, []) -> false.
%%%
%%% Utilities.
%%%
%% is_loop_header(L, Blocks) -> false|true.
%% Check whether the block is a loop header.
is_loop_header(L, Blocks) ->
%% We KNOW that a loop header must start with a peek_message
%% instruction.
case map_get(L, Blocks) of
#b_blk{is=[#b_set{op=peek_message}|_]} -> true;
_ -> false
end.
rel2fam(S0) ->
S1 = sofs:relation(S0),
S = sofs:rel2fam(S1),
sofs:to_external(S).
split_phis(Is) ->
splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
is_yreg({y,_}) -> true;
is_yreg({x,_}) -> false;
is_yreg({z,_}) -> false;
is_yreg({fr,_}) -> false.
new_vars([Base|Vs0], Count0) ->
{V,Count1} = new_var(Base, Count0),
{Vs,Count} = new_vars(Vs0, Count1),
{[V|Vs],Count};
new_vars([], Count) -> {[],Count}.
new_var({Base,Int}, Count) ->
true = is_integer(Int), %Assertion.
{#b_var{name={Base,Count}},Count+1};
new_var(Base, Count) ->
{#b_var{name={Base,Count}},Count+1}.