aboutsummaryrefslogtreecommitdiffstats
path: root/lib/compiler/src/beam_ssa_pre_codegen.erl
diff options
context:
space:
mode:
Diffstat (limited to 'lib/compiler/src/beam_ssa_pre_codegen.erl')
-rw-r--r--lib/compiler/src/beam_ssa_pre_codegen.erl2634
1 files changed, 2634 insertions, 0 deletions
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
new file mode 100644
index 0000000000..df4de8d7bd
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -0,0 +1,2634 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%% http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Prepare for code generation, including register allocation.
+%%
+%% The output of this compiler pass is still in the SSA format, but
+%% it has been annotated and transformed to help the code generator.
+%%
+%% * Some instructions are translated to other instructions closer to
+%% the BEAM instructions. For example, the binary matching
+%% instructions are transformed from the optimization-friendly
+%% internal format to instruction more similar to the actual BEAM
+%% instructions.
+%%
+%% * Blocks that will need an instruction for allocating a stack frame
+%% are annotated with a {frame_size,Size} annotation.
+%%
+%% * 'copy' instructions are added for all variables that need
+%% to be saved to the stack frame. Additional 'copy' instructions
+%% can be added as an optimization to reuse y registers (see
+%% the copy_retval sub pass).
+%%
+%% * Each function is annotated with a {register,RegisterMap}
+%% annotation that maps each variable to a BEAM register. The linear
+%% scan algorithm is used to allocate registers.
+%%
+%% There are four kind of registers. x, y, fr (floating point register),
+%% and z. A variable will be allocated to a z register if it is only
+%% used by the instruction following the instruction that defines the
+%% the variable. The code generator will typically combine those
+%% instructions to a test instruction. z registers are also used for
+%% some instructions that don't have a return value.
+%%
+%% References:
+%%
+%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
+%% in the context of SSA form and register constraints. In Proceedings
+%% of the International Conference on Compiler Construction, pages
+%% 229–246. LNCS 2304, Springer-Verlag, 2002.
+%%
+%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
+%% linear scan register allocator. In Proceedings of the ACM/USENIX
+%% International Conference on Virtual Execution Environments, pages
+%% 132–141. ACM Press, 2005.
+%%
+%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
+%% Form. In Proceedings of the International Symposium on Code
+%% Generation and Optimization, pages 170-179. ACM Press, 2010.
+%%
+
+-module(beam_ssa_pre_codegen).
+
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [all/2,any/2,append/1,duplicate/2,
+ foldl/3,last/1,map/2,member/2,partition/2,
+ reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+ {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, Opts) ->
+ UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
+ Ps = passes(Opts),
+ Fs = functions(Fs0, Ps, UseBSM3),
+ {ok,Module#b_module{body=Fs}}.
+
+functions([F|Fs], Ps, UseBSM3) ->
+ [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
+functions([], _Ps, _UseBSM3) -> [].
+
+-type b_var() :: beam_ssa:b_var().
+-type var_name() :: beam_ssa:var_name().
+-type instr_number() :: pos_integer().
+-type range() :: {instr_number(),instr_number()}.
+-type reg_num() :: beam_asm:reg_num().
+-type xreg() :: {'x',reg_num()}.
+-type yreg() :: {'y',reg_num()}.
+-type ypool() :: {'y',beam_ssa:label()}.
+-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
+ ypool() | {yreg(),ypool()} | 'z'.
+-type ssa_register() :: beam_ssa_codegen:ssa_register().
+
+-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
+-record(st, {ssa :: beam_ssa:block_map(),
+ args :: [b_var()],
+ cnt :: beam_ssa:label(),
+ use_bsm3 :: boolean(),
+ frames=[] :: [beam_ssa:label()],
+ intervals=[] :: [{b_var(),[range()]}],
+ res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
+ regs=#{} :: #{b_var():=ssa_register()},
+ extra_annos=[] :: [{atom(),term()}]
+ }).
+-define(PASS(N), {N,fun N/1}).
+
+passes(Opts) ->
+ AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
+ FixTuples = proplists:get_bool(no_put_tuple2, Opts),
+ Ps = [?PASS(assert_no_critical_edges),
+
+ %% Preliminaries.
+ ?PASS(fix_bs),
+ ?PASS(sanitize),
+ case FixTuples of
+ false -> ignore;
+ true -> ?PASS(fix_tuples)
+ end,
+ ?PASS(place_frames),
+ ?PASS(fix_receives),
+
+ %% Find and reserve Y registers.
+ ?PASS(find_yregs),
+ ?PASS(reserve_yregs),
+
+ %% Handle legacy binary match instruction that don't
+ %% accept a Y register as destination.
+ ?PASS(legacy_bs),
+
+ %% Improve reuse of Y registers to potentially
+ %% reduce the size of the stack frame.
+ ?PASS(copy_retval),
+ ?PASS(opt_get_list),
+
+ %% Calculate live intervals.
+ ?PASS(number_instructions),
+ ?PASS(live_intervals),
+ ?PASS(reserve_regs),
+
+ %% If needed for a .precg file, save the live intervals
+ %% so they can be included in an annotation.
+ case AddPrecgAnnos of
+ false -> ignore;
+ true -> ?PASS(save_live_intervals)
+ end,
+
+ %% Allocate registers.
+ ?PASS(linear_scan),
+ ?PASS(frame_size),
+ ?PASS(turn_yregs)],
+ [P || P <- Ps, P =/= ignore].
+
+function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
+ Ps, UseBSM3) ->
+ try
+ St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
+ St = compile:run_sub_passes(Ps, St0),
+ #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
+ F1 = add_extra_annos(F0, ExtraAnnos),
+ F = beam_ssa:add_anno(registers, Regs, F1),
+ F#b_function{bs=Blocks,cnt=Count}
+ catch
+ Class:Error:Stack ->
+ #{func_info:={_,Name,Arity}} = Anno,
+ io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+ erlang:raise(Class, Error, Stack)
+ end.
+
+save_live_intervals(#st{intervals=Intervals}=St) ->
+ St#st{extra_annos=[{live_intervals,Intervals}]}.
+
+%% Add extra annotations when a .precg listing file is being produced.
+add_extra_annos(F, Annos) ->
+ foldl(fun({Name,Value}, Acc) ->
+ beam_ssa:add_anno(Name, Value, Acc)
+ end, F, Annos).
+
+%% assert_no_critical_edges(St0) -> St.
+%% The code generator will not work if there are critial edges.
+%% Abort if any critical edges are found.
+
+assert_no_critical_edges(#st{ssa=Blocks}=St) ->
+ F = fun assert_no_ces/3,
+ beam_ssa:fold_rpo(F, Blocks, Blocks),
+ St.
+
+assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
+ %% This block has multiple predecessors. Make sure that none
+ %% of the precessors have more than one successor.
+ true = all(fun({_,P}) ->
+ length(beam_ssa:successors(P, Blocks)) =:= 1
+ end, Phis), %Assertion.
+ Blocks;
+assert_no_ces(_, _, Blocks) -> Blocks.
+
+%% fix_bs(St0) -> St.
+%% Fix up the binary matching instructions:
+%%
+%% * Insert bs_save and bs_restore instructions where needed.
+%%
+%% * Combine bs_match and bs_extract instructions to bs_get
+%% instructions.
+
+fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
+ F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
+ %% Mark the root of the match context list.
+ [{Dst,{context,Dst}}|A];
+ (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
+ %% Link this match context the previous match context.
+ [{Dst,ParentCtx}|A];
+ (_, A) ->
+ A
+ end,
+ case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
+ [] ->
+ %% No binary matching in this function.
+ St;
+ [_|_]=M ->
+ CtxChain = maps:from_list(M),
+ Linear0 = beam_ssa:linearize(Blocks),
+
+ %% Insert position instructions where needed.
+ {Linear1,Count} = case UseBSM3 of
+ true ->
+ bs_pos_bsm3(Linear0, CtxChain, Count0);
+ false ->
+ bs_pos_bsm2(Linear0, CtxChain, Count0)
+ end,
+
+ %% Rename instructions.
+ Linear = bs_instrs(Linear1, CtxChain, []),
+
+ St#st{ssa=maps:from_list(Linear),cnt=Count}
+ end.
+
+%% Insert bs_get_position and bs_set_position instructions as needed.
+bs_pos_bsm3(Linear0, CtxChain, Count0) ->
+ Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+ Rs = maps:values(Rs0),
+ S0 = sofs:relation(Rs, [{context,save_point}]),
+ S1 = sofs:relation_to_family(S0),
+ S = sofs:to_external(S1),
+
+ {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
+ {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
+ {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),
+
+ %% Now insert all saves and restores.
+ {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.
+
+make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
+ SavePoint = get_savepoint(Ps, SavePoints),
+ I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
+ make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
+make_bs_setpos_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ Args = [Ctx, get_savepoint(Ps, SavePoints)],
+ I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
+ make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
+make_bs_getpos_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+get_savepoint({_,_}=Ps, SavePoints) ->
+ Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
+ #b_var{name=Name}.
+
+make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
+ {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
+ make_bs_pos_dict(T, Count, Acc);
+make_bs_pos_dict([], Count, Acc) ->
+ {maps:from_list(Acc), Count}.
+
+make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
+ make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_bs_pos_dict_1([], Ctx, I, Acc) ->
+ {[{Ctx,I}|Acc], I}.
+
+%% As bs_position but without OTP-22 instructions. This is only used when
+%% cross-compiling to older versions.
+bs_pos_bsm2(Linear0, CtxChain, Count0) ->
+ Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+ Rs = maps:values(Rs0),
+ S0 = sofs:relation(Rs, [{context,save_point}]),
+ S1 = sofs:relation_to_family(S0),
+ S = sofs:to_external(S1),
+ Slots = make_save_point_dict(S, []),
+ {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
+ {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
+
+ %% Now insert all saves and restores.
+ {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.
+
+make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ case make_slot(Ps, Slots) of
+ #b_literal{val=start} ->
+ make_save_map(T, Slots, Count, Acc);
+ Slot ->
+ I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
+ make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
+ end;
+make_save_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
+ Ignored = #b_var{name={'@ssa_ignored',Count}},
+ I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
+ make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
+make_restore_map([], _, Count, Acc) ->
+ {maps:from_list(Acc),Count}.
+
+make_slot({Same,Same}, _Slots) ->
+ #b_literal{val=start};
+make_slot({_,_}=Ps, Slots) ->
+ #b_literal{val=map_get(Ps, Slots)}.
+
+make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
+ Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
+ make_save_point_dict(T, Acc);
+make_save_point_dict([], Acc) ->
+ maps:from_list(Acc).
+
+make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
+ %% Special {atom,start} save point. Does not need a
+ %% bs_save instruction.
+ make_save_point_dict_1(T, Ctx, I, Acc);
+make_save_point_dict_1([H|T], Ctx, I, Acc) ->
+ make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_save_point_dict_1([], Ctx, I, Acc) ->
+ [{Ctx,I}|Acc].
+
+bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
+ FPos = case D0 of
+ #{L:=Pos0} -> Pos0;
+ #{} -> #{}
+ end,
+ {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
+ D = bs_update_successors(Last, SPos, FPos, D0),
+ bs_restores(Bs, CtxChain, D, Rs);
+bs_restores([], _, _, Rs) -> Rs.
+
+bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
+ join_positions([{Succ,SPos},{Fail,FPos}], D);
+bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
+ Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
+ join_positions(Update, D);
+bs_update_successors(#b_ret{}, _, _, D) -> D.
+
+join_positions([{L,MapPos0}|T], D) ->
+ case D of
+ #{L:=MapPos0} ->
+ %% Same map.
+ join_positions(T, D);
+ #{L:=MapPos1} ->
+ %% Different maps.
+ MapPos = join_positions_1(MapPos0, MapPos1),
+ join_positions(T, D#{L:=MapPos});
+ #{} ->
+ join_positions(T, D#{L=>MapPos0})
+ end;
+join_positions([], D) -> D.
+
+join_positions_1(MapPos0, MapPos1) ->
+ MapPos2 = maps:map(fun(Start, Pos) ->
+ case MapPos0 of
+ #{Start:=Pos} -> Pos;
+ #{Start:=_} -> unknown;
+ #{} -> Pos
+ end
+ end, MapPos1),
+ maps:merge(MapPos0, MapPos2).
+
+bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
+ CtxChain, PosMap0, Rs) ->
+ PosMap = PosMap0#{Start=>Start},
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
+ CtxChain, PosMap0, Rs0) ->
+ Start = bs_subst_ctx(NewPos, CtxChain),
+ [_,FromPos|_] = Args,
+ case PosMap0 of
+ #{Start:=FromPos} ->
+ %% Same position, no restore needed.
+ PosMap = case bs_match_type(I) of
+ plain ->
+ %% Update position to new position.
+ PosMap0#{Start:=NewPos};
+ _ ->
+ %% Position will not change (test_unit
+ %% instruction or no instruction at
+ %% all).
+ PosMap0#{Start:=FromPos}
+ end,
+ bs_restores_is(Is, CtxChain, PosMap, Rs0);
+ #{Start:=_} ->
+ %% Different positions, might need a restore instruction.
+ case bs_match_type(I) of
+ none ->
+ %% The tail test will be optimized away.
+ %% No need to do a restore.
+ PosMap = PosMap0#{Start:=FromPos},
+ bs_restores_is(Is, CtxChain, PosMap, Rs0);
+ test_unit ->
+ %% This match instruction will be replaced by
+ %% a test_unit instruction. We will need a
+ %% restore. The new position will be the position
+ %% restored to (NOT NewPos).
+ PosMap = PosMap0#{Start:=FromPos},
+ Rs = Rs0#{NewPos=>{Start,FromPos}},
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+ plain ->
+ %% Match or skip. Position will be changed.
+ PosMap = PosMap0#{Start:=NewPos},
+ Rs = Rs0#{NewPos=>{Start,FromPos}},
+ bs_restores_is(Is, CtxChain, PosMap, Rs)
+ end
+ end;
+bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
+ CtxChain, PosMap, Rs) ->
+ Start = bs_subst_ctx(FromPos, CtxChain),
+ #{Start:=FromPos} = PosMap, %Assertion.
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
+ CtxChain, PosMap0, Rs0) ->
+ {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+ PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
+ %% We can land here from any point, so all positions are invalid.
+ PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
+ CtxChain, PosMap0, Rs0)
+ when Op =:= bs_test_tail;
+ Op =:= bs_get_tail ->
+ {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
+ bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([], _CtxChain, PosMap, Rs) ->
+ {PosMap,Rs}.
+
+bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
+ #b_literal{val=binary},_Flags,
+ #b_literal{val=all},#b_literal{val=U}]}) ->
+ case U of
+ 1 -> none;
+ _ -> test_unit
+ end;
+bs_match_type(_) ->
+ plain.
+
+%% Call instructions leave the match position in an undefined state,
+%% requiring us to invalidate each affected argument.
+bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
+ Start = bs_subst_ctx(Arg, CtxChain),
+ case PosMap0 of
+ #{Start:=_} ->
+ PosMap = PosMap0#{Start:=unknown},
+ bs_invalidate_pos(Args, PosMap, CtxChain);
+ #{} ->
+ %% Not a match context.
+ bs_invalidate_pos(Args, PosMap0, CtxChain)
+ end;
+bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
+ bs_invalidate_pos(Args, PosMap, CtxChain);
+bs_invalidate_pos([], PosMap, _CtxChain) ->
+ PosMap.
+
+bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
+ Start = bs_subst_ctx(Arg, CtxChain),
+ case PosMap0 of
+ #{Start:=Arg} ->
+ %% Same position, no restore needed.
+ bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
+ #{Start:=_} ->
+ %% Different positions, need a restore instruction.
+ PosMap = PosMap0#{Start:=Arg},
+ Rs = Rs0#{Dst=>{Start,Arg}},
+ bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+ #{} ->
+ %% Not a match context.
+ bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
+ end;
+bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
+ bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
+ {Rs,PosMap}.
+
+%% Insert all bs_save and bs_restore instructions.
+
+bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
+ bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).
+
+bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
+ %% The old instructions require bs_start_match to be annotated with the
+ %% number of position slots it needs.
+ bs_insert_1(Blocks, Saves, Restores, SavePoints,
+ fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
+ NumSlots = case SavePoints of
+ #{Dst:=NumSlots0} -> NumSlots0;
+ #{} -> 0
+ end,
+ beam_ssa:add_anno(num_slots, NumSlots, I0);
+ (I) ->
+ I
+ end).
+
+bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
+ Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
+ Bs = bs_insert_saves(Is, Bs0, Saves),
+ [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
+bs_insert_1([], _, _, _, _) -> [].
+
+bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
+ I = XFrm(I0),
+ if
+ Op =:= bs_test_tail;
+ Op =:= bs_get_tail;
+ Op =:= bs_match;
+ Op =:= call ->
+ Rs = case Restores of
+ #{Dst:=R} -> [R];
+ #{} -> []
+ end,
+ Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
+ true ->
+ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
+ end;
+bs_insert_is_1([], _, _, _) -> [].
+
+bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
+ case Saves of
+ #{Dst:=S} ->
+ bs_insert_save(S, Bs);
+ #{} ->
+ bs_insert_saves(Is, Bs, Saves)
+ end;
+bs_insert_saves([], Bs, _) -> Bs.
+
+bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
+ Is = case Is0 of
+ [#b_set{op=bs_extract}=Ex|Is1] ->
+ [Ex,Save|Is1];
+ _ ->
+ [Save|Is0]
+ end,
+ [{L,Blk#b_blk{is=Is}}|Bs].
+
+%% Translate bs_match instructions to bs_get, bs_match_string,
+%% or bs_skip. Also rename match context variables to use the
+%% variable assigned to by the start_match instruction.
+
+bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
+ case bs_instrs_is(Is0, CtxChain, []) of
+ [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
+ %% Drop this instruction. Rewrite the corresponding
+ %% bs_match instruction in the previous block to
+ %% a bs_get instruction.
+ Acc = bs_combine(Dst, Ctx, Acc0),
+ bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
+ Is ->
+ bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
+ end;
+bs_instrs([], _, Acc) ->
+ reverse(Acc).
+
+bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
+ Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
+ I1 = I0#b_set{args=Args},
+ I = case {Op,Args} of
+ {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
+ I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
+ {bs_match,[#b_literal{val=string},Ctx|As]} ->
+ I1#b_set{op=bs_match_string,args=[Ctx|As]};
+ {bs_get_tail,[Ctx|As]} ->
+ I1#b_set{op=bs_get_tail,args=[Ctx|As]};
+ {_,_} ->
+ I1
+ end,
+ bs_instrs_is(Is, CtxChain, [I|Acc]);
+bs_instrs_is([], _, Acc) ->
+ reverse(Acc).
+
+%% Combine a bs_match instruction with the destination register
+%% taken from a bs_extract instruction.
+
+bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
+ [#b_set{}=Succeeded,
+ #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
+ Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
+ Succeeded#b_set{args=[Dst]}]),
+ [{L,Blk#b_blk{is=Is}}|Acc].
+
+bs_subst_ctx(#b_var{}=Var, CtxChain) ->
+ case CtxChain of
+ #{Var:={context,Ctx}} ->
+ Ctx;
+ #{Var:=ParentCtx} ->
+ bs_subst_ctx(ParentCtx, CtxChain);
+ #{} ->
+ %% Not a match context variable.
+ Var
+ end;
+bs_subst_ctx(Other, _CtxChain) ->
+ Other.
+
+%% legacy_bs(St0) -> St.
+%% Binary matching instructions in OTP 21 and earlier don't support
+%% a Y register as destination. If St#st.use_bsm3 is false,
+%% we will need to rewrite those instructions so that the result
+%% is first put in an X register and then moved to a Y register
+%% if the operation succeeded.
+
+legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
+ IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
+ Linear0 = beam_ssa:linearize(Blocks0),
+ {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
+ Blocks = maps:from_list(Linear),
+ St#st{ssa=Blocks,cnt=Count};
+legacy_bs(#st{use_bsm3=true}=St) -> St.
+
+legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
+ #b_blk{is=Is0,last=Last} = Blk,
+ Is1 = case Copies0 of
+ #{L:=Copy} -> [Copy|Is0];
+ #{} -> Is0
+ end,
+ {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
+ legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
+legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
+ {Acc,Count}.
+
+legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
+ #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
+ Last, IsYreg, Count0, Copies0, Acc) ->
+ NeedsFix = is_map_key(Dst, IsYreg) andalso
+ case Op of
+ bs_get -> true;
+ bs_init -> true;
+ _ -> false
+ end,
+ case NeedsFix of
+ true ->
+ TempDst = #b_var{name={'@bs_temp_dst',Count0}},
+ Count = Count0 + 1,
+ I = I0#b_set{dst=TempDst},
+ SuccI = SuccI0#b_set{args=[TempDst]},
+ Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
+ #b_br{bool=SuccDst,succ=SuccL} = Last,
+ Copies = Copies0#{SuccL=>Copy},
+ legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
+ false ->
+ legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
+ end;
+legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
+ legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
+legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
+ {reverse(Acc),Count,Copies}.
+
+%% sanitize(St0) -> St.
+%% Remove constructs that can cause problems later:
+%%
+%% * Unreachable blocks may cause problems for determination of
+%% dominators.
+%%
+%% * Some instructions (such as get_hd) don't accept literal
+%% arguments. Evaluate the instructions and remove them.
+
+sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ Ls = beam_ssa:rpo(Blocks0),
+ {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
+ St#st{ssa=Blocks,cnt=Count}.
+
+sanitize([L|Ls], Count0, Blocks0, Values0) ->
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
+ case sanitize_is(Is0, Count0, Values0, false, []) of
+ no_change ->
+ sanitize(Ls, Count0, Blocks0, Values0);
+ {Is,Count,Values} ->
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks0#{L:=Blk},
+ sanitize(Ls, Count, Blocks, Values)
+ end;
+sanitize([], Count, Blocks0, Values) ->
+ Blocks = if
+ map_size(Values) =:= 0 ->
+ Blocks0;
+ true ->
+ beam_ssa:rename_vars(Values, [0], Blocks0)
+ end,
+
+ %% Unreachable blocks can cause problems for the dominator calculations.
+ Ls = beam_ssa:rpo(Blocks),
+ Reachable = gb_sets:from_list(Ls),
+ {case map_size(Blocks) =:= gb_sets:size(Reachable) of
+ true -> Blocks;
+ false -> remove_unreachable(Ls, Blocks, Reachable, [])
+ end,Count}.
+
+sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
+ Count0, Values, Changed, Acc) ->
+ case sanitize_args(Args0, Values) of
+ [#b_literal{}=Map,Key] ->
+ %% Bind the literal map to a variable.
+ {MapVar,Count} = new_var('@ssa_map', Count0),
+ I = I0#b_set{args=[MapVar,Key]},
+ Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
+ sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+ [_,_]=Args0 ->
+ sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
+ [_,_]=Args ->
+ I = I0#b_set{args=Args},
+ sanitize_is(Is, Count0, Values, Changed, [I|Acc])
+ end;
+sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
+ Count, Values, Changed0, Acc) ->
+ Args = sanitize_args(Args0, Values),
+ case sanitize_instr(Op, Args, I0) of
+ {value,Value0} ->
+ Value = #b_literal{val=Value0},
+ sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
+ {ok,I} ->
+ sanitize_is(Is0, Count, Values, true, [I|Acc]);
+ ok ->
+ I = I0#b_set{args=Args},
+ Changed = Changed0 orelse Args =/= Args0,
+ sanitize_is(Is0, Count, Values, Changed, [I|Acc])
+ end;
+sanitize_is([], Count, Values, Changed, Acc) ->
+ case Changed of
+ true ->
+ {reverse(Acc),Count,Values};
+ false ->
+ no_change
+ end.
+
+sanitize_args(Args, Values) ->
+ map(fun(Var) ->
+ case Values of
+ #{Var:=New} -> New;
+ #{} -> Var
+ end
+ end, Args).
+
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
+ case erl_bifs:is_pure(erlang, Bif, 1) of
+ false ->
+ ok;
+ true ->
+ try
+ {value,erlang:Bif(Lit)}
+ catch
+ error:_ ->
+ ok
+ end
+ end;
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
+ true = erl_bifs:is_pure(erlang, Bif, 2), %Assertion.
+ try
+ {value,erlang:Bif(Lit1, Lit2)}
+ catch
+ error:_ ->
+ ok
+ end;
+sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
+ {value,Hd};
+sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
+ {value,Tl};
+sanitize_instr(get_tuple_element, [#b_literal{val=T},
+ #b_literal{val=I}], _I)
+ when I < tuple_size(T) ->
+ {value,element(I+1, T)};
+sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
+ {value,case Lit of
+ [_|_] -> true;
+ _ -> false
+ end};
+sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
+ #b_literal{val=Arity},
+ #b_literal{val=Tag}], _I)
+ when is_integer(Arity), is_atom(Tag) ->
+ if
+ tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
+ {value,true};
+ true ->
+ {value,false}
+ end;
+sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
+ if
+ is_integer(Sz), Sz >= 0 -> ok;
+ true -> {ok,sanitize_badarg(I0)}
+ end;
+sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
+ if
+ is_integer(Sz), Sz >= 0 -> ok;
+ true -> {ok,sanitize_badarg(I0)}
+ end;
+sanitize_instr(succeeded, [#b_literal{}], _I) ->
+ {value,true};
+sanitize_instr(_, _, _) -> ok.
+
+sanitize_badarg(I) ->
+ Func = #b_remote{mod=#b_literal{val=erlang},
+ name=#b_literal{val=error},arity=1},
+ I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
+
+remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
+ case split_phis(Is0) of
+ {[_|_]=Phis,Rest} ->
+ Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
+ Blk = Blk0#b_blk{is=Is},
+ remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
+ {[],_} ->
+ remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
+ end;
+remove_unreachable([], _Blocks, _, Acc) ->
+ maps:from_list(Acc).
+
+prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
+ Args = [A || {_,Pred}=A <- Args0,
+ gb_sets:is_element(Pred, Reachable)],
+ Phi#b_set{args=Args}.
+
+%%%
+%%% Fix tuples.
+%%%
+
+%% fix_tuples(St0) -> St.
+%% If compatibility with a previous version of Erlang has been
+%% requested, tuple creation must be split into two instruction to
+%% mirror the the way tuples are created in BEAM prior to OTP 22.
+%% Each put_tuple instruction is split into put_tuple_arity followed
+%% by put_tuple_elements.
+
+fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
+ Arity = #b_literal{val=length(Args)},
+ {Ignore,C} = new_var('@ssa_ignore', C0),
+ {[Put#b_set{op=put_tuple_arity,args=[Arity]},
+ #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
+ (I, C) -> {[I],C}
+ end,
+ {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+%%%
+%%% Find out where frames should be placed.
+%%%
+
+%% place_frames(St0) -> St.
+%% Return a list of the labels for the blocks that need stack frame
+%% allocation instructions.
+%%
+%% This function attempts to place stack frames as tight as possible
+%% around the code, to avoid building stack frames for code paths
+%% that don't need one.
+%%
+%% Stack frames are placed in blocks that dominate all of their
+%% descendants. That guarantees that the deallocation instructions
+%% cannot be reached from other execution paths that didn't set up
+%% a stack frame or set up a stack frame with a different size.
+
+place_frames(#st{ssa=Blocks}=St) ->
+ {Doms,_} = beam_ssa:dominators(Blocks),
+ Ls = beam_ssa:rpo(Blocks),
+ Tried = gb_sets:empty(),
+ Frames0 = [],
+ {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
+ St#st{frames=Frames}.
+
+place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
+ Blk = map_get(L, Blocks),
+ case need_frame(Blk) of
+ true ->
+ %% This block needs a frame. Try to place it here.
+ {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),
+
+ %% Successfully placed. Try to place more frames in descendants
+ %% that are not dominated by this block.
+ place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+ false ->
+ try
+ place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
+ catch
+ throw:{need_frame,For,Tried1}=Reason ->
+ %% An descendant block needs a stack frame. Try to
+ %% place it here.
+ case is_dominated_by(For, L, Doms) of
+ true ->
+ %% Try to place a frame here.
+ {Frames,Tried} = do_place_frame(L, Blocks, Doms,
+ Tried1, Frames0),
+ place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+ false ->
+ %% Wrong place. This block does not dominate
+ %% the block that needs the frame. Pass it on
+ %% to our ancestors.
+ throw(Reason)
+ end
+ end
+ end;
+place_frames_1([], _, _, Tried, Frames) ->
+ {Frames,Tried}.
+
+%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
+%% Try to place a frame in this block. This function returns
+%% successfully if it either succeds at placing a frame in this
+%% block, if an ancestor that dominates this block has already placed
+%% a frame, or if we have already tried to put a frame in this block.
+%%
+%% An {need_frame,Label,Tried} exception will be thrown if this block
+%% block is not suitable for having a stack frame (i.e. it does not dominate
+%% all of its descendants). The exception means that an ancestor will have to
+%% place the frame needed by this block.
+
+do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
+ case gb_sets:is_element(L, Tried0) of
+ true ->
+ %% We have already tried to put a frame in this block.
+ {Frames,Tried0};
+ false ->
+ %% Try to place a frame in this block.
+ Tried = gb_sets:insert(L, Tried0),
+ case place_frame_here(L, Blocks, Doms, Frames) of
+ yes ->
+ %% We need a frame and it is safe to place it here.
+ {[L|Frames],Tried};
+ no ->
+ %% An ancestor has a frame. Not needed.
+ {Frames,Tried};
+ ancestor ->
+ %% This block does not dominate all of its
+ %% descendants. We must place the frame in
+ %% an ancestor.
+ throw({need_frame,L,Tried})
+ end
+ end.
+
+%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
+%% Determine whether a frame should be placed in block Label.
+
+place_frame_here(L, Blocks, Doms, Frames) ->
+ B0 = any(fun(DomBy) ->
+ is_dominated_by(L, DomBy, Doms)
+ end, Frames),
+ case B0 of
+ true ->
+ %% This block is dominated by an ancestor block that
+ %% defines a frame. Not needed/allowed to put a frame
+ %% here.
+ no;
+ false ->
+ %% No frame in any ancestor. We need a frame.
+ %% Now check whether the frame can be placed here.
+ %% If this block dominates all of its descendants
+ %% and the predecessors of any phi nodes it can be
+ %% placed here.
+ Descendants = beam_ssa:rpo([L], Blocks),
+ PhiPredecessors = phi_predecessors(L, Blocks),
+ MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
+ Dominates = all(fun(?BADARG_BLOCK) ->
+ %% This block defines no variables and calls
+ %% erlang:error(badarg). It does not matter
+ %% whether L dominates ?BADARG_BLOCK or not;
+ %% it is still safe to put the frame in L.
+ true;
+ (Bl) ->
+ is_dominated_by(Bl, L, Doms)
+ end, MustDominate),
+
+ %% Also, this block must not be a loop header.
+ IsLoopHeader = is_loop_header(L, Blocks),
+ case Dominates andalso not IsLoopHeader of
+ true -> yes;
+ false -> ancestor
+ end
+ end.
+
+%% phi_predecessors(Label, Blocks) ->
+%% Return all predecessors referenced in phi nodes.
+
+phi_predecessors(L, Blocks) ->
+ #b_blk{is=Is} = map_get(L, Blocks),
+ [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
+
+%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
+%% Test whether block Label is dominated by block DominatedBy.
+
+is_dominated_by(L, DomBy, Doms) ->
+ DominatedBy = map_get(L, Doms),
+ member(DomBy, DominatedBy).
+
+%% need_frame(#b_blk{}) -> true|false.
+%% Test whether any of the instructions in the block requires a stack frame.
+
+need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
+ need_frame_1(Is, {return,Ret});
+need_frame(#b_blk{is=Is}) ->
+ need_frame_1(Is, body).
+
+need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
+ %% Since make_fun clobbers X registers, a stack frame is needed if
+ %% any of the following instructions use any other variable than
+ %% the one holding the reference to the created fun.
+ need_frame_1(Is, Context) orelse
+ case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
+ [Fun] -> false;
+ [_|_] -> true
+ end;
+need_frame_1([#b_set{op=new_try_tag}|_], _) ->
+ true;
+need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
+ if
+ Val =:= Ret -> need_frame_1(Is, tail);
+ true -> need_frame_1(Is, body)
+ end;
+need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
+ case Func of
+ #b_remote{mod=#b_literal{val=Mod},
+ name=#b_literal{val=Name},
+ arity=Arity} when is_atom(Mod), is_atom(Name) ->
+ case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+ true ->
+ false;
+ false ->
+ Context =:= body orelse
+ Is =/= [] orelse
+ is_trap_bif(Mod, Name, Arity)
+ end;
+ #b_remote{} ->
+ %% This is an apply(), which always needs a frame.
+ true;
+ #b_local{} ->
+ Context =:= body orelse Is =/= [];
+ _ ->
+ %% A fun call always needs a frame.
+ true
+ end;
+need_frame_1([I|Is], Context) ->
+ beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
+need_frame_1([], _) -> false.
+
+%% is_trap_bif(Mod, Name, Arity) -> true|false.
+%% Test whether we need a stack frame for this BIF.
+
+is_trap_bif(erlang, '!', 2) -> true;
+is_trap_bif(erlang, link, 1) -> true;
+is_trap_bif(erlang, unlink, 1) -> true;
+is_trap_bif(erlang, monitor_node, 2) -> true;
+is_trap_bif(erlang, group_leader, 2) -> true;
+is_trap_bif(erlang, exit, 2) -> true;
+is_trap_bif(_, _, _) -> false.
+
+%%%
+%%% Fix variables used in matching in receive.
+%%%
+%%% The loop_rec/2 instruction may return a reference to a
+%%% message outside of any heap or heap fragment. If the message
+%%% does not match, it is not allowed to store any reference to
+%%% the message (or part of the message) on the stack. If we do,
+%%% the message will be corrupted if there happens to be a GC.
+%%%
+%%% Here we make sure to introduce copies of variables that are
+%%% matched out and subsequently used after the remove_message/0
+%%% instructions. That will make sure that only X registers are
+%%% used during matching.
+%%%
+%%% Depending on where variables are defined and used, they must
+%%% be handled in two different ways.
+%%%
+%%% Variables that are always defined in the receive (before branching
+%%% out into the different clauses of the receive) and used after the
+%%% receive must be handled in the following way: Before each
+%%% remove_message instruction, each such variable must be copied, and
+%%% all variables must be consolidated using a phi node in the
+%%% common exit block for the receive.
+%%%
+%%% Variables that are matched out and used in the same clause
+%%% need copy instructions before the remove_message instruction
+%%% in that clause.
+%%%
+
+fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
+ {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
+ Blocks0, Count0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
+ case Blk of
+ #b_blk{is=[#b_set{op=peek_message}|_]} ->
+ Rm = find_rm_blocks(L, Blocks0),
+ LoopExit = find_loop_exit(Rm, Blocks0),
+ Defs0 = beam_ssa:def([L], Blocks0),
+ CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
+ {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
+ Blocks0, Count0),
+ Defs = ordsets:subtract(Defs0, CommonUsed),
+ {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
+ fix_receives_1(Ls, Blocks, Count);
+ #b_blk{} ->
+ fix_receives_1(Ls, Blocks0, Count0)
+ end;
+fix_receives_1([], Blocks, Count) ->
+ {Blocks,Count}.
+
+recv_common(_Defs, none, _Blocks) ->
+ %% There is no common exit block because receive is used
+ %% in the tail position of a function.
+ [];
+recv_common(Defs, Exit, Blocks) ->
+ {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+ Def = ordsets:subtract(Defs, ExitDefs),
+ ordsets:intersection(Def, ExitUsed).
+
+%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
+%% Blocks0, Count0) -> {Blocks,Count}.
+%% Handle variables alwys defined in a receive and used
+%% in the exit block following the receive.
+
+recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
+ {Msg,Count1} = new_var('@recv', Count0),
+ Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
+ N = length(Rm),
+ {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
+ PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
+ Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
+ ExitBlk0 = map_get(Exit, Blocks1),
+ ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
+ Blocks2 = Blocks1#{Exit:=ExitBlk},
+ Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
+ recv_fix_common(T, Exit, Rm, Blocks, Count);
+recv_fix_common([], _, _, Blocks, Count) ->
+ {Blocks,Count}.
+
+recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
+ Ren = #{Msg=>V},
+ Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
+ #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
+ Copy = #b_set{op=copy,dst=V,args=[Msg]},
+ Is = insert_after_phis(Is0, [Copy]),
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks1#{Rm:=Blk},
+ recv_fix_common_1(Vs, Rms, Msg, Blocks);
+recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
+
+fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
+ Path = beam_ssa:rpo([Rm], Blocks),
+ Preds = exit_predecessors(Path, Exit, Blocks),
+ [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
+fix_exit_phi_args([], [], _, _) -> [].
+
+exit_predecessors([L|Ls], Exit, Blocks) ->
+ Blk = map_get(L, Blocks),
+ case member(Exit, beam_ssa:successors(Blk)) of
+ true ->
+ [L|exit_predecessors(Ls, Exit, Blocks)];
+ false ->
+ exit_predecessors(Ls, Exit, Blocks)
+ end;
+exit_predecessors([], _Exit, _Blocks) -> [].
+
+%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
+%% Add a copy instruction for all variables that are matched out and
+%% later used within a clause of the receive.
+
+fix_receive([L|Ls], Defs, Blocks0, Count0) ->
+ {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+ Def = ordsets:subtract(Defs, RmDefs),
+ Used = ordsets:intersection(Def, Used0),
+ {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
+ Ren = zip(Used, NewVars),
+ Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+ #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
+ Is = insert_after_phis(Is0, CopyIs),
+ Blk = Blk1#b_blk{is=Is},
+ Blocks = Blocks1#{L:=Blk},
+ fix_receive(Ls, Defs, Blocks, Count);
+fix_receive([], _Defs, Blocks, Count) ->
+ {Blocks,Count}.
+
+%% find_loop_exit([Label], Blocks) -> Label | none.
+%% Find the block to which control is transferred when the
+%% the receive loop is exited.
+
+find_loop_exit([L1,L2|_Ls], Blocks) ->
+ Path1 = beam_ssa:rpo([L1], Blocks),
+ Path2 = beam_ssa:rpo([L2], Blocks),
+ find_loop_exit_1(reverse(Path1), reverse(Path2), none);
+find_loop_exit(_, _) -> none.
+
+find_loop_exit_1([H|T1], [H|T2], _) ->
+ find_loop_exit_1(T1, T2, H);
+find_loop_exit_1(_, _, Exit) -> Exit.
+
+%% find_rm_blocks(StartLabel, Blocks) -> [Label].
+%% Find all blocks that start with remove_message within the receive
+%% loop whose peek_message label is StartLabel.
+
+find_rm_blocks(L, Blocks) ->
+ Seen = gb_sets:singleton(L),
+ Blk = map_get(L, Blocks),
+ Succ = beam_ssa:successors(Blk),
+ find_rm_blocks_1(Succ, Seen, Blocks).
+
+find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ find_rm_blocks_1(Ls, Seen0, Blocks);
+ false ->
+ Seen = gb_sets:insert(L, Seen0),
+ Blk = map_get(L, Blocks),
+ case find_rm_act(Blk#b_blk.is) of
+ prune ->
+ %% Looping back. Don't look at any successors.
+ find_rm_blocks_1(Ls, Seen, Blocks);
+ continue ->
+ %% Neutral block. Do nothing here, but look at
+ %% all successors.
+ Succ = beam_ssa:successors(Blk),
+ find_rm_blocks_1(Succ++Ls, Seen, Blocks);
+ found ->
+ %% Found remove_message instruction.
+ [L|find_rm_blocks_1(Ls, Seen, Blocks)]
+ end
+ end;
+find_rm_blocks_1([], _, _) -> [].
+
+find_rm_act([#b_set{op=Op}|Is]) ->
+ case Op of
+ remove_message -> found;
+ peek_message -> prune;
+ recv_next -> prune;
+ wait_timeout -> prune;
+ wait -> prune;
+ _ -> find_rm_act(Is)
+ end;
+find_rm_act([]) ->
+ continue.
+
+%%%
+%%% Find out which variables need to be stored in Y registers.
+%%%
+
+-record(dk, {d :: ordsets:ordset(var_name()),
+ k :: ordsets:ordset(var_name())
+ }).
+
+%% find_yregs(St0) -> St.
+%% Find all variables that must be stored in Y registers. Annotate
+%% the blocks that allocate frames with the set of Y registers
+%% used within that stack frame.
+%%
+%% Basically, we following all execution paths starting from a block
+%% that allocates a frame, keeping track of of all defined registers
+%% and all registers killed by an instruction that clobbers X
+%% registers. For every use of a variable, we check if if it is in
+%% the set of killed variables; if it is, it must be stored in an Y
+%% register.
+
+find_yregs(#st{frames=[]}=St) ->
+ St;
+find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
+ FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
+ Blocks = find_yregs_1(FrameDefs, Blocks0),
+ St#st{ssa=Blocks}.
+
+find_yregs_1([{F,Defs}|Fs], Blocks0) ->
+ DK = #dk{d=Defs,k=[]},
+ D0 = #{F=>DK},
+ Ls = beam_ssa:rpo([F], Blocks0),
+ Yregs0 = [],
+ Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
+ Blk0 = map_get(F, Blocks0),
+ Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
+ Blocks = Blocks0#{F:=Blk},
+ find_yregs_1(Fs, Blocks);
+find_yregs_1([], Blocks) -> Blocks.
+
+find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
+ Blk0 = map_get(L, Blocks0),
+ #b_blk{is=Is,last=Last} = Blk0,
+ Ys0 = map_get(L, D0),
+ {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
+ Yregs = find_yregs_terminator(Last, Ys, Yregs1),
+ Successors = beam_ssa:successors(Blk0),
+ D = find_update_succ(Successors, Ys, D0),
+ find_yregs_2(Ls, Blocks0, D, Yregs);
+find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.
+
+find_defs(Frames, Blocks, Defs) ->
+ Seen = gb_sets:empty(),
+ FramesSet = gb_sets:from_list(Frames),
+ {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
+ FrameDefs.
+
+find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
+ case gb_sets:is_member(L, Frames) of
+ true ->
+ OrderedDefs = ordsets:from_list(Defs0),
+ find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
+ [{L,OrderedDefs}|Acc0]);
+ false ->
+ case gb_sets:is_member(L, Seen0) of
+ true ->
+ find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
+ false ->
+ Seen1 = gb_sets:insert(L, Seen0),
+ {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
+ #b_blk{is=Is} = Blk = map_get(L, Blocks),
+ Defs = find_defs_is(Is, Defs0),
+ Successors = beam_ssa:successors(Blk),
+ find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
+ end
+ end;
+find_defs_1([], _, _, Seen, _, Acc) ->
+ {Acc,Seen}.
+
+find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
+ find_defs_is(Is, [Dst|Acc]);
+find_defs_is([], Acc) -> Acc.
+
+find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
+ case D0 of
+ #{S:=#dk{d=Defs1,k=Killed1}} ->
+ Defs = ordsets:intersection(Defs0, Defs1),
+ Killed = ordsets:union(Killed0, Killed1),
+ DK = #dk{d=Defs,k=Killed},
+ D = D0#{S:=DK},
+ find_update_succ(Ss, DK0, D);
+ #{} ->
+ D = D0#{S=>DK0},
+ find_update_succ(Ss, DK0, D)
+ end;
+find_update_succ([], _, D) -> D.
+
+find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
+ Used = beam_ssa:used(I),
+ Yregs1 = ordsets:intersection(Used, Killed0),
+ Yregs = ordsets:union(Yregs0, Yregs1),
+ case beam_ssa:clobbers_xregs(I) of
+ false ->
+ Defs = ordsets:add_element(Dst, Defs0),
+ find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
+ true ->
+ Killed = ordsets:union(Defs0, Killed0),
+ Defs = [Dst],
+ find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
+ end;
+find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.
+
+find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
+ Used = beam_ssa:used(Terminator),
+ Yregs = ordsets:intersection(Used, Killed),
+ ordsets:union(Yregs0, Yregs).
+
+%%%
+%%% Try to reduce the size of the stack frame, by adding an explicit
+%%% 'copy' instructions for return values from 'call' and 'make_fun' that
+%%% need to be saved in Y registers. Here is an example to show
+%%% how that's useful. First, here is the Erlang code:
+%%%
+%%% f(Pid) ->
+%%% Res = foo(42),
+%%% _ = node(Pid),
+%%% bar(),
+%%% Res.
+%%%
+%%% Compiled to SSA format, the main part of the code looks like this:
+%%%
+%%% 0:
+%%% Res = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%% 3:
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% It can be seen that the variables Pid and Res must be saved in Y
+%%% registers in order to survive the function calls. A previous sub
+%%% pass has inserted a 'copy' instruction to save the value of the
+%%% variable Pid:
+%%%
+%%% 0:
+%%% Pid:4 = copy Pid
+%%% Res = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid:4
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% The Res and Pid:4 variables must be assigned to different Y registers
+%%% because they are live at the same time. copy_retval() inserts a
+%%% 'copy' instruction to copy Res to a new variable:
+%%%
+%%% 0:
+%%% Pid:4 = copy Pid
+%%% Res:6 = call local literal foo/1, literal 42
+%%% _1 = bif:node Pid:4
+%%% @ssa_bool = succeeded _1
+%%% br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%% Res = copy Res:6
+%%% @ssa_ignored = call local literal bar/0
+%%% ret Res
+%%%
+%%% The new variable Res:6 is used to capture the return value from the call.
+%%% The variables Pid:4 and Res are no longer live at the same time, so they
+%%% can be assigned to the same Y register.
+%%%
+
+copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
+ {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
+ St#st{ssa=Blocks,cnt=Count}.
+
+copy_retval_1([F|Fs], Blocks0, Count0) ->
+ #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
+ Yregs1 = gb_sets:from_list(Yregs0),
+ Yregs = collect_yregs(Is, Yregs1),
+ Ls = beam_ssa:rpo([F], Blocks0),
+ {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
+ copy_retval_1(Fs, Blocks, Count);
+copy_retval_1([], Blocks, Count) ->
+ {Blocks,Count}.
+
+collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
+ Yregs0) ->
+ true = gb_sets:is_member(X, Yregs0), %Assertion.
+ Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
+ collect_yregs(Is, Yregs);
+collect_yregs([#b_set{}|Is], Yregs) ->
+ collect_yregs(Is, Yregs);
+collect_yregs([], Yregs) -> Yregs.
+
+copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
+ #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
+ RC = case {Last,Ls} of
+ {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
+ true;
+ {_,_} ->
+ false
+ end,
+ case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
+ {Is,Count} ->
+ case Copy0 =:= none andalso Count0 =:= Count of
+ true ->
+ copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
+ false ->
+ Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+ copy_retval_2(Ls, Yregs, none, Blocks, Count)
+ end;
+ {Is,Count,Copy} ->
+ Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+ copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
+ end;
+copy_retval_2([], _Yregs, none, Blocks, Count) ->
+ {Blocks,Count}.
+
+copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
+ Copy, Count, Acc) ->
+ I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+ {reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+ when Op =:= call; Op =:= make_fun ->
+ {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+ {reverse(Acc, [I]),Count};
+copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
+ {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
+ {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
+ Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
+ {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
+ case gb_sets:is_member(Dst, Yregs) of
+ true ->
+ {NewVar,Count} = new_var(RetName, Count1),
+ Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
+ I = I1#b_set{dst=NewVar},
+ copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
+ false ->
+ copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
+ end;
+copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
+ I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+ case beam_ssa:clobbers_xregs(I) of
+ true ->
+ copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
+ false ->
+ copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
+ end;
+copy_retval_is([], RC, _, Copy, Count, Acc) ->
+ case {Copy,RC} of
+ {none,_} ->
+ {reverse(Acc),Count};
+ {#b_set{},true} ->
+ {reverse(Acc),Count,Copy};
+ {#b_set{},false} ->
+ {reverse(Acc, [Copy]),Count}
+ end.
+
+%%
+%% Consider this code:
+%%
+%% Var = ...
+%% ...
+%% A1 = call foo/0
+%% A = copy A1
+%% B = call bar/1, Var
+%%
+%% If the Var variable is no longer used after this code, its Y register
+%% can't be reused for A. To allow the Y register to be reused
+%% we will need to insert 'copy' instructions for arguments that are
+%% in Y registers:
+%%
+%% Var = ...
+%% ...
+%% A1 = call foo/0
+%% Var1 = copy Var
+%% A = copy A1
+%% B = call bar/1, Var1
+%%
+
+place_retval_copy(I, _Yregs, none, Count, Acc) ->
+ {I,Count,Acc};
+place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
+ #b_set{dst=Avoid} = Copy,
+ {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
+ Acc = [Copy|Acc1],
+ {I#b_set{args=[F|Args]},Count,Acc}.
+
+copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
+ case gb_sets:is_member(A, Yregs) of
+ true when A =/= Avoid ->
+ {NewVar,Count} = new_var(AName, Count0),
+ Copy = #b_set{op=copy,dst=NewVar,args=[A]},
+ copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
+ _ ->
+ copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
+ end;
+copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
+ copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
+copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
+ {reverse(Acc),CopyAcc,Count}.
+
+acc_copy(Acc, none) -> Acc;
+acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
+
+copy_sub_args(Args, none) ->
+ Args;
+copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
+ [sub_arg(A, Dst, Src) || A <- Args].
+
+sub_arg(Old, Old, New) -> New;
+sub_arg(Old, _, _) -> Old.
+
+%%%
+%%% Consider:
+%%%
+%%% x1/Hd = get_hd x0/Cons
+%%% y0/Tl = get_tl x0/Cons
+%%%
+%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
+%%% a 'move' instruction must be inserted.
+%%%
+%%% If we swap get_hd and get_tl when Tl is in a Y register,
+%%% x0 can be used for Hd if Cons is not used again:
+%%%
+%%% y0/Tl = get_tl x0/Cons
+%%% x0/Hd = get_hd x0/Cons
+%%%
+
+opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
+ ResMap = maps:from_list(Res),
+ Ls = beam_ssa:rpo(Blocks),
+ St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
+
+opt_get_list_1([L|Ls], Res, Blocks0) ->
+ #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
+ case opt_get_list_is(Is0, Res, [], false) of
+ no ->
+ opt_get_list_1(Ls, Res, Blocks0);
+ {yes,Is} ->
+ Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
+ opt_get_list_1(Ls, Res, Blocks)
+ end;
+opt_get_list_1([], _, Blocks) -> Blocks.
+
+opt_get_list_is([#b_set{op=get_hd,dst=Hd,
+ args=[Cons]}=GetHd,
+ #b_set{op=get_tl,dst=Tl,
+ args=[Cons]}=GetTl|Is],
+ Res, Acc, Changed) ->
+ %% Note that when this pass is run, only Y registers have
+ %% reservations. The absence of an entry for a variable therefore
+ %% means that the variable will be in an X register.
+ case Res of
+ #{Hd:={y,_}} ->
+ %% Hd will be in a Y register. Don't swap.
+ opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
+ #{Tl:={y,_}} ->
+ %% Tl will be in a Y register. Swap.
+ opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
+ #{} ->
+ %% Both are in X registers. Nothing to do.
+ opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
+ end;
+opt_get_list_is([I|Is], Res, Acc, Changed) ->
+ opt_get_list_is(Is, Res, [I|Acc], Changed);
+opt_get_list_is([], _Res, Acc, Changed) ->
+ case Changed of
+ true ->
+ {yes,reverse(Acc)};
+ false ->
+ no
+ end.
+
+%%%
+%%% Number instructions in the order they are executed.
+%%%
+
+%% number_instructions(St0) -> St.
+%% Number instructions in the order they are executed. Use a step
+%% size of 2. Don't number phi instructions. All phi variables in
+%% a block will be live one unit before the first non-phi instruction
+%% in the block.
+
+number_instructions(#st{ssa=Blocks0}=St) ->
+ Ls = beam_ssa:rpo(Blocks0),
+ St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
+
+number_is_1([L|Ls], N0, Blocks0) ->
+ #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
+ {Is,N1} = number_is_2(Is0, N0, []),
+ Last = beam_ssa:add_anno(n, N1, Last0),
+ N = N1 + 2,
+ Bl = Bl0#b_blk{is=Is,last=Last},
+ Blocks = Blocks0#{L:=Bl},
+ number_is_1(Ls, N, Blocks);
+number_is_1([], _, Blocks) -> Blocks.
+
+number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
+ number_is_2(Is, N, [I|Acc]);
+number_is_2([I0|Is], N, Acc) ->
+ I = beam_ssa:add_anno(n, N, I0),
+ number_is_2(Is, N+2, [I|Acc]);
+number_is_2([], N, Acc) ->
+ {reverse(Acc),N}.
+
+%%%
+%%% Calculate live intervals.
+%%%
+
+live_intervals(#st{args=Args,ssa=Blocks}=St) ->
+ Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
+ F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
+ LiveMap0 = #{},
+ Acc0 = {[],LiveMap0},
+ {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
+ Intervals = merge_ranges(rel2fam(Vars0++Vars)),
+ St#st{intervals=Intervals}.
+
+merge_ranges([{V,Rs}|T]) ->
+ [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
+merge_ranges([]) -> [].
+
+merge_ranges_1([{A,N},{N,Z}|Rs]) ->
+ merge_ranges_1([{A,Z}|Rs]);
+merge_ranges_1([R|Rs]) ->
+ [R|merge_ranges_1(Rs)];
+merge_ranges_1([]) -> [].
+
+live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
+ Live0 = [],
+ Successors = beam_ssa:successors(L, Blocks),
+ Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),
+
+ %% Add ranges for all variables that are live in the successors.
+ #b_blk{is=Is,last=Last} = map_get(L, Blocks),
+ End = beam_ssa:get_anno(n, Last),
+ Use = [{V,{use,End+1}} || V <- Live1],
+
+ %% Determine used and defined variables in this block.
+ FirstNumber = first_number(Is, Last),
+ UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
+ UseDef = rel2fam(UseDef0),
+
+ %% Update what is live at the beginning of this block and
+ %% store it.
+ Used = [V || {V,[{use,_}|_]} <- UseDef],
+ Live2 = ordsets:union(Live1, Used),
+ Killed = [V || {V,[{def,_}|_]} <- UseDef],
+ Live = ordsets:subtract(Live2, Killed),
+ LiveMap = LiveMap0#{L=>Live},
+
+ %% Construct the ranges for this block.
+ Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
+ {Vars,LiveMap}.
+
+make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
+ make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
+make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
+ {use,Last} = last(Uses),
+ make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
+make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
+ {use,Last} = last(Uses),
+ make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
+make_block_ranges([], _, Acc) -> Acc.
+
+live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
+ Acc = [{Dst,{def,FirstNumber}}|Acc0],
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
+ FirstNumber, Acc0) ->
+ N = beam_ssa:get_anno(n, I),
+ #b_set{dst=Dst} = I,
+ Acc1 = [{Dst,{def,N}}|Acc0],
+ Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
+ N = beam_ssa:get_anno(n, I),
+ Acc1 = case I of
+ #b_set{dst=Dst} ->
+ [{Dst,{def,N}}|Acc0];
+ _ ->
+ Acc0
+ end,
+ Used = beam_ssa:used(I),
+ Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
+ live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([], _FirstNumber, Acc) ->
+ Acc.
+
+%% first_number([#b_set{}]) -> InstructionNumber.
+%% Return the number for the first instruction for the block.
+%% Note that this number is one less than the first
+%% non-phi instruction in the block.
+
+first_number([#b_set{op=phi}|Is], Last) ->
+ first_number(Is, Last);
+first_number([I|_], _) ->
+ beam_ssa:get_anno(n, I) - 1;
+first_number([], Last) ->
+ beam_ssa:get_anno(n, Last) - 1.
+
+update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
+ Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
+ #b_blk{is=Is} = map_get(L, Blocks),
+ Live = update_live_phis(Is, Pred, Live1),
+ update_successors(Ls, Pred, Blocks, LiveMap, Live);
+update_successors([], _, _, _, Live) -> Live.
+
+get_live(L, LiveMap) ->
+ case LiveMap of
+ #{L:=Live} -> Live;
+ #{} -> []
+ end.
+
+update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
+ Pred, Live0) ->
+ Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
+ Live1 = ordsets:union(ordsets:from_list(Used), Live0),
+ Live = ordsets:del_element(Killed, Live1),
+ update_live_phis(Is, Pred, Live);
+update_live_phis(_, _, Live) -> Live.
+
+%%%
+%%% Reserve Y registers.
+%%%
+
+%% reserve_yregs(St0) -> St.
+%% In each block that allocates a stack frame, insert instructions
+%% that copy variables that must be in Y registers (given by
+%% the `yregs` annotation) to new variables.
+%%
+%% Also allocate specific Y registers for try and catch tags.
+%% The outermost try/catch tag is placed in y0, any directly
+%% nested tag in y1, and so on. Note that this is the reversed
+%% order as required by BEAM; it will be corrected later by
+%% turn_yregs().
+
+reserve_yregs(#st{frames=Frames}=St0) ->
+ foldl(fun reserve_yregs_1/2, St0, Frames).
+
+reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
+ Blk = map_get(L, Blocks0),
+ Yregs = beam_ssa:get_anno(yregs, Blk),
+ {Def,Used} = beam_ssa:def_used([L], Blocks0),
+ UsedYregs = ordsets:intersection(Yregs, Used),
+ DefBefore = ordsets:subtract(UsedYregs, Def),
+ {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
+ InsideVars = ordsets:subtract(UsedYregs, DefBefore),
+ ResTryTags0 = reserve_try_tags(L, Blocks),
+ ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
+ Vars = BeforeVars ++ InsideVars,
+ Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
+ St#st{res=Res,ssa=Blocks,cnt=Count+1}.
+
+reserve_try_tags(L, Blocks) ->
+ Seen = gb_sets:empty(),
+ {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
+ Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
+ Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
+ ordsets:from_list(Res).
+
+reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
+ case gb_sets:is_element(L, Seen0) of
+ true ->
+ reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
+ false ->
+ Seen1 = gb_sets:insert(L, Seen0),
+ #b_blk{is=Is} = Blk = map_get(L, Blocks),
+ Active0 = get_active(L, ActMap0),
+ Active = reserve_try_tags_is(Is, Active0),
+ Successors = beam_ssa:successors(Blk),
+ ActMap1 = update_act_map(Successors, Active, ActMap0),
+ {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
+ reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
+ end;
+reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
+ {ActMap,Seen}.
+
+get_active(L, ActMap) ->
+ case ActMap of
+ #{L:=Active} -> Active;
+ #{} -> #{}
+ end.
+
+reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
+ N = map_size(Active),
+ reserve_try_tags_is(Is, Active#{V=>N});
+reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
+ reserve_try_tags_is(Is, maps:remove(Tag, Active));
+reserve_try_tags_is([_|Is], Active) ->
+ reserve_try_tags_is(Is, Active);
+reserve_try_tags_is([], Active) -> Active.
+
+update_act_map([L|Ls], Active0, ActMap0) ->
+ case ActMap0 of
+ #{L:=Active1} ->
+ ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
+ update_act_map(Ls, Active0, ActMap);
+ #{} ->
+ ActMap = ActMap0#{L=>Active0},
+ update_act_map(Ls, Active0, ActMap)
+ end;
+update_act_map([], _, ActMap) -> ActMap.
+
+rename_vars([], _, Blocks, Count) ->
+ {[],Blocks,Count};
+rename_vars(Vs, L, Blocks0, Count0) ->
+ {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
+ Ren = zip(Vs, NewVars),
+ Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+ #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
+ CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
+ Is = insert_after_phis(Is0, CopyIs),
+ Blk = Blk0#b_blk{is=Is},
+ Blocks = Blocks1#{L:=Blk},
+ {NewVars,Blocks,Count}.
+
+insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
+ [I|insert_after_phis(Is, InsertIs)];
+insert_after_phis(Is, InsertIs) ->
+ InsertIs ++ Is.
+
+%% frame_size(St0) -> St.
+%% Calculate the frame size for each block that allocates a frame.
+%% Annotate the block with the frame size. Also annotate all
+%% return instructions with {deallocate,FrameSize} to simplify
+%% code generation.
+
+frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
+ Blocks = foldl(fun(L, Blks) ->
+ frame_size_1(L, Regs, Blks)
+ end, Blocks0, Frames),
+ St#st{ssa=Blocks}.
+
+frame_size_1(L, Regs, Blocks0) ->
+ Def = beam_ssa:def([L], Blocks0),
+ Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
+ Yregs = ordsets:from_list(Yregs0),
+ FrameSize = length(ordsets:from_list(Yregs)),
+ if
+ FrameSize =/= 0 ->
+ [{y,0}|_] = Yregs, %Assertion.
+ {y,Last} = last(Yregs),
+ Last = FrameSize - 1, %Assertion.
+ ok;
+ true ->
+ ok
+ end,
+ Blk0 = map_get(L, Blocks0),
+ Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
+
+ %% Insert an annotation for frame deallocation on
+ %% each #b_ret{}.
+ Blocks = Blocks0#{L:=Blk},
+ Reachable = beam_ssa:rpo([L], Blocks),
+ frame_deallocate(Reachable, FrameSize, Blocks).
+
+frame_deallocate([L|Ls], Size, Blocks0) ->
+ Blk0 = map_get(L, Blocks0),
+ Blk = case Blk0 of
+ #b_blk{last=#b_ret{}=Ret0} ->
+ Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
+ Blk0#b_blk{last=Ret};
+ #b_blk{} ->
+ Blk0
+ end,
+ Blocks = Blocks0#{L:=Blk},
+ frame_deallocate(Ls, Size, Blocks);
+frame_deallocate([], _, Blocks) -> Blocks.
+
+
+%% turn_yregs(St0) -> St.
+%% Renumber y registers so that {y,0} becomes {y,FrameSize-1},
+%% {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
+%% catches work. The register allocator (linear_scan()) has given
+%% a lower number to the outermost catch.
+
+turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
+ Regs1 = foldl(fun(L, A) ->
+ Blk = map_get(L, Blocks),
+ FrameSize = beam_ssa:get_anno(frame_size, Blk),
+ Def = beam_ssa:def([L], Blocks),
+ [turn_yregs_1(Def, FrameSize, Regs0)|A]
+ end, [], Frames),
+ Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
+ St#st{regs=Regs}.
+
+turn_yregs_1(Def, FrameSize, Regs) ->
+ Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
+ Yregs1 = rel2fam(Yregs0),
+ FrameSize = length(Yregs1),
+ Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
+ R0 = sofs:family(Yregs2),
+ R1 = sofs:family_to_relation(R0),
+ R = sofs:converse(R1),
+ sofs:to_external(R).
+
+%%%
+%%% Reserving registers before register allocation.
+%%%
+
+%% reserve_regs(St0) -> St.
+%% Reserve registers prior to register allocation. Y registers
+%% have already been reserved. This function will reserve z,
+%% fr, and specific x registers.
+
+reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
+ %% Reserve x0, x1, and so on for the function arguments.
+ Res1 = reserve_arg_regs(Args, 0, Res0),
+
+ %% Reserve Z registers (dummy registers) for instructions with no
+ %% return values (e.g. remove_message) or pseudo-return values
+ %% (e.g. landingpad).
+ Res2 = reserve_zregs(Blocks, Intervals, Res1),
+
+ %% Reserve float registers.
+ Res3 = reserve_fregs(Blocks, Res2),
+
+ %% Reserve all remaining unreserved variables as X registers.
+ Res = maps:from_list(Res3),
+ St#st{res=reserve_xregs(Blocks, Res)}.
+
+reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
+ reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
+reserve_arg_regs([], _, Acc) -> Acc.
+
+reserve_zregs(Blocks, Intervals, Res) ->
+ ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
+ ShortLived = cerl_sets:from_list(ShortLived0),
+ F = fun(_, #b_blk{is=Is,last=Last}, A) ->
+ reserve_zreg(Is, Last, ShortLived, A)
+ end,
+ beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_zreg([#b_set{op=Op,dst=Dst}],
+ #b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
+ Op =:= get_tuple_element ->
+ %% If type optimization has determined that the result of these
+ %% instructions can be used directly in a branch, we must avoid reserving a
+ %% z register or code generation will fail.
+ A;
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
+ #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+ case {Val,Last} of
+ {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
+ %% These two instructions can be combined to a test_arity
+ %% instruction provided that the arity variable is short-lived.
+ reserve_zreg_1(Dst, ShortLived, A0);
+ {_,_} ->
+ %% Either the arity is too big, or the boolean value is not
+ %% used in a conditional branch.
+ A0
+ end;
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
+ #b_switch{}, ShortLived, A) ->
+ reserve_zreg_1(Dst, ShortLived, A);
+reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
+ %% There is no short, easy way to rewrite 'xor' to a series of
+ %% test instructions.
+ A;
+reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
+ %% There is no short, easy way to rewrite is_record/2 to a series of
+ %% test instructions.
+ A;
+reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
+ IsZReg = case Op of
+ bs_match_string -> true;
+ bs_save -> true;
+ bs_restore -> true;
+ bs_set_position -> true;
+ {float,clearerror} -> true;
+ kill_try_tag -> true;
+ landingpad -> true;
+ put_tuple_elements -> true;
+ remove_message -> true;
+ set_tuple_element -> true;
+ succeeded -> true;
+ timeout -> true;
+ wait_timeout -> true;
+ _ -> false
+ end,
+ A = case IsZReg of
+ true -> [{Dst,z}|A0];
+ false -> A0
+ end,
+ reserve_zreg(Is, Last, ShortLived, A);
+reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
+ reserve_zreg_1(Bool, ShortLived, A);
+reserve_zreg([], _, _, A) -> A.
+
+reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
+ case cerl_sets:is_element(V, ShortLived) of
+ true -> [{V,z}|A];
+ false -> A
+ end;
+reserve_zreg_1(#b_literal{}, _, A) -> A.
+
+reserve_fregs(Blocks, Res) ->
+ F = fun(_, #b_blk{is=Is}, A) ->
+ reserve_freg(Is, A)
+ end,
+ beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
+ case Op of
+ get ->
+ reserve_freg(Is, Res);
+ _ ->
+ reserve_freg(Is, [{V,fr}|Res])
+ end;
+reserve_freg([_|Is], Res) ->
+ reserve_freg(Is, Res);
+reserve_freg([], Res) -> Res.
+
+%% reserve_xregs(St0) -> St.
+%% Reserve all remaining variables as X registers.
+%%
+%% If a variable will need to be in a specific X register for a
+%% 'call' or 'make_fun' (and there is nothing that will kill it
+%% between the definition and use), reserve the register using a
+%% {prefer,{x,X} annotation. That annotation means that the linear
+%% scan algorithm will place the variable in the preferred register,
+%% unless that register is already occupied.
+%%
+%% All remaining variables are reserved as X registers. Linear scan
+%% will allocate the lowest free X register for the variable.
+
+reserve_xregs(Blocks, Res) ->
+ Ls = reverse(beam_ssa:rpo(Blocks)),
+ reserve_xregs(Ls, Blocks, #{}, Res).
+
+reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
+ #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
+
+ %% Calculate mapping from variable name to the preferred
+ %% register.
+ Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
+
+ %% We need to figure out where the code generator will
+ %% place instructions that will do a garbage collection.
+ %% Insert 'gc' markers as pseudo-instructions in the
+ %% instruction sequence.
+ Is1 = reverse(Is0),
+ Is2 = res_place_gc_instrs(Is1, []),
+ Is = res_place_allocate(Anno, Is2),
+
+ %% Add register hints for variables that are defined
+ %% in the (reversed) instruction sequence.
+ {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
+
+ XsMap = XsMap0#{L=>Xs},
+ reserve_xregs(Ls, Blocks, XsMap, Res);
+reserve_xregs([], _, _, Res) -> Res.
+
+%% Insert explicit 'gc' markers points where there will
+%% be a garbage collection. (Note that the instruction
+%% sequence passed to this function is reversed.)
+
+res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
+ res_place_gc_instrs(Is, [I|Acc]);
+res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
+ when Op =:= call; Op =:= make_fun ->
+ case Acc of
+ [] ->
+ res_place_gc_instrs(Is, [I|Acc]);
+ [GC|_] when GC =:= gc; GC =:= test_heap ->
+ res_place_gc_instrs(Is, [I,gc|Acc]);
+ [_|_] ->
+ res_place_gc_instrs(Is, [I,gc|Acc])
+ end;
+res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
+ case beam_ssa_codegen:classify_heap_need(Op, Args) of
+ neutral ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [I|Acc])
+ end;
+ {put,_} ->
+ case Acc0 of
+ [test_heap|Acc] ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc]);
+ Acc ->
+ res_place_gc_instrs(Is, [test_heap,I|Acc])
+ end;
+ _ ->
+ res_place_gc_instrs(Is, [gc,I|Acc0])
+ end;
+res_place_gc_instrs([], Acc) ->
+ %% Reverse and replace 'test_heap' markers with 'gc'.
+ %% (The distinction is no longer useful.)
+ res_place_gc_instrs_rev(Acc, []).
+
+res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([test_heap|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [gc|Acc]);
+res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
+ res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([I|Is], Acc) ->
+ res_place_gc_instrs_rev(Is, [I|Acc]);
+res_place_gc_instrs_rev([], Acc) -> Acc.
+
+res_place_allocate(#{yregs:=_}, Is) ->
+ %% There will be an 'allocate' instruction inserted here.
+ Is ++ [gc];
+res_place_allocate(#{}, Is) -> Is.
+
+reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
+ %% At this point, the code generator will place an instruction
+ %% that does a garbage collection. We must prune the remembered
+ %% registers.
+ Xs = res_xregs_prune(Xs0, Used, Res),
+ reserve_xregs_is(Is, Res, Xs, Used);
+reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
+ Res = reserve_xreg(Dst, Xs0, Res0),
+ Used1 = ordsets:union(Used0, beam_ssa:used(I)),
+ Used = ordsets:del_element(Dst, Used1),
+ case Op of
+ call ->
+ Xs = reserve_call_args(tl(Args)),
+ reserve_xregs_is(Is, Res, Xs, Used);
+ make_fun ->
+ Xs = reserve_call_args(tl(Args)),
+ reserve_xregs_is(Is, Res, Xs, Used);
+ _ ->
+ reserve_xregs_is(Is, Res, Xs0, Used)
+ end;
+reserve_xregs_is([], Res, Xs, _Used) ->
+ {Res,Xs}.
+
+%% Pick up register hints from the successors of this blocks.
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+ _Blocks, XsMap, _Res) ->
+ %% We know that no variables are used at ?BADARG_BLOCK, so
+ %% any register hints from the success blocks are safe to use.
+ map_get(Succ, XsMap);
+reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
+ Blocks, XsMap, Res) when Succ =/= Fail ->
+ #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
+ case {SuccBlk,FailBlk} of
+ {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
+ #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
+ %% Both branches ultimately transfer to the same
+ %% block (via two blocks with no instructions).
+ %% Pick up register hints from the phi nodes
+ %% in the common block.
+ #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
+ Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
+ res_xregs_from_phi(PhiIs, Fail, Res, Xs);
+ {_,_} when Is =/= [] ->
+ case last(Is) of
+ #b_set{op=succeeded,args=[Arg]} ->
+ %% We know that Arg will not be used at the failure
+ %% label, so we can pick up register hints from the
+ %% success label.
+ Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
+ case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
+ #{Arg:=Reg} -> #{Arg=>Reg};
+ #{} -> #{}
+ end;
+ _ ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+ {_,_} ->
+ %% Register hints from the success block may not
+ %% be safe at the failure block, and vice versa.
+ #{}
+ end;
+reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
+ Blocks, XsMap, Res) ->
+ case map_get(Succ, Blocks) of
+ #b_blk{is=[],last=Last} ->
+ reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
+ #b_blk{is=[_|_]=PhiIs} ->
+ res_xregs_from_phi(PhiIs, L, Res, #{})
+ end;
+reserve_terminator(_, _, _, _, _, _) -> #{}.
+
+%% Pick up a reservation from a phi node.
+res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
+ Pred, Res, Acc) ->
+ case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
+ [] ->
+ %% The value of the phi node for this predecessor
+ %% is a literal. Nothing to do here.
+ res_xregs_from_phi(Is, Pred, Res, Acc);
+ [V] ->
+ case Res of
+ #{Dst:={prefer,Reg}} ->
+ %% Try placing V in the same register as for
+ %% the phi node.
+ res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
+ #{Dst:=_} ->
+ res_xregs_from_phi(Is, Pred, Res, Acc)
+ end
+ end;
+res_xregs_from_phi(_, _, _, Acc) -> Acc.
+
+reserve_call_args(Args) ->
+ reserve_call_args(Args, 0, #{}).
+
+reserve_call_args([#b_var{}=Var|As], X, Xs) ->
+ reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
+reserve_call_args([#b_literal{}|As], X, Xs) ->
+ reserve_call_args(As, X+1, Xs);
+reserve_call_args([], _, Xs) -> Xs.
+
+reserve_xreg(V, Xs, Res) ->
+ case Res of
+ #{V:=_} ->
+ %% Already reserved (but not as an X register).
+ Res;
+ #{} ->
+ case Xs of
+ #{V:=X} ->
+ %% Add a hint that this specific X register is
+ %% preferred, unless it is already in use.
+ Res#{V=>{prefer,X}};
+ #{} ->
+ %% Reserve as an X register in general.
+ Res#{V=>x}
+ end
+ end.
+
+%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
+%% Prune the list of preferred registers, to make sure that
+%% there are no "holes" (uninitialized X registers) when
+%% invoking the garbage collector.
+
+res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
+ %% The number of safe registers is the number of the X registers
+ %% used after this point. The actual number of safe registers may
+ %% be higher than this number, but this is a conservative safe
+ %% estimate.
+ NumSafe = foldl(fun(V, N) ->
+ case Res of
+ #{V:={x,_}} -> N + 1;
+ #{V:=_} -> N;
+ #{} -> N + 1
+ end
+ end, 0, Used),
+
+ %% Remove unsafe registers from the list of potential
+ %% preferred registers.
+ maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
+res_xregs_prune(Xs, _Used, _Res) -> Xs.
+
+%%%
+%%% Register allocation using linear scan.
+%%%
+
+-record(i,
+ {sort=1 :: instr_number(),
+ reg=none :: i_reg(),
+ pool=x :: pool_id(),
+ var=#b_var{} :: b_var(),
+ rs=[] :: [range()]
+ }).
+
+-record(l,
+ {cur=#i{} :: interval(),
+ unhandled_res=[] :: [interval()],
+ unhandled_any=[] :: [interval()],
+ active=[] :: [interval()],
+ inactive=[] :: [interval()],
+ free=#{} :: #{var_name()=>pool(),
+ {'next',pool_id()}:=reg_num()},
+ regs=[] :: [{b_var(),ssa_register()}]
+ }).
+
+-type interval() :: #i{}.
+-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
+-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
+-type pool() :: ordsets:ordset(ssa_register()).
+
+linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
+ St = St0#st{intervals=[],res=[]},
+ Free = init_free(maps:to_list(Res)),
+ Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
+ Intervals = sort(Intervals1),
+ IsReserved = fun(#i{reg=Reg}) ->
+ case Reg of
+ none -> false;
+ {prefer,{_,_}} -> false;
+ {_,_} -> true
+ end
+ end,
+ {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
+ L = #l{unhandled_res=UnhandledRes,
+ unhandled_any=Unhandled,free=Free},
+ #l{regs=Regs} = do_linear(L),
+ St#st{regs=maps:from_list(Regs)}.
+
+init_interval({V,[{Start,_}|_]=Rs}, Res) ->
+ Info = map_get(V, Res),
+ Pool = case Info of
+ {prefer,{x,_}} -> x;
+ x -> x;
+ {x,_} -> x;
+ {y,Uniq} -> Uniq;
+ {{y,_},Uniq} -> Uniq;
+ z -> z;
+ fr -> fr
+ end,
+ Reg = case Info of
+ {prefer,{x,_}} -> Info;
+ {x,_} -> Info;
+ {{y,_}=Y,_} -> Y;
+ _ -> none
+ end,
+ #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.
+
+init_free(Res) ->
+ Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
+ #{x:=Xs0} = Free1 = maps:from_list(Free0),
+ Xs = init_xregs(Xs0),
+ Free = Free1#{x:=Xs},
+ Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
+ maps:merge(Free, maps:from_list(Next)).
+
+init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
+ [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{x,_}=Reg}|Res]) ->
+ [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{y,Uniq}}|Res]) ->
+ [{Uniq,{y,0}}|init_free_1(Res)];
+init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
+ [{Uniq,Reg}|init_free_1(Res)];
+init_free_1([{_,z}|Res]) ->
+ [{z,{z,0}}|init_free_1(Res)];
+init_free_1([{_,fr}|Res]) ->
+ [{fr,{fr,0}}|init_free_1(Res)];
+init_free_1([{_,x}|Res]) ->
+ init_free_1(Res);
+init_free_1([]) -> [].
+
+%% Make sure that the pool of xregs is contiguous.
+init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
+ [{x,N}|init_xregs([{x,M}|Is])];
+init_xregs([{x,N}|[{x,_}|_]=Is]) ->
+ [{x,N}|init_xregs([{x,N+1}|Is])];
+init_xregs([{x,_}]=Is) -> Is.
+
+do_linear(L0) ->
+ case set_next_current(L0) of
+ done ->
+ L0;
+ L1 ->
+ L2 = expire_active(L1),
+ L3 = check_inactive(L2),
+ Available = collect_available(L3),
+ L4 = select_register(Available, L3),
+ L = make_cur_active(L4),
+ do_linear(L)
+ end.
+
+set_next_current(#l{unhandled_res=[Cur1|T1],
+ unhandled_any=[Cur2|T2]}=L) ->
+ case {Cur1,Cur2} of
+ {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
+ L#l{cur=Cur1,unhandled_res=T1};
+ {_,_} ->
+ L#l{cur=Cur2,unhandled_any=T2}
+ end;
+set_next_current(#l{unhandled_res=[],
+ unhandled_any=[Cur|T]}=L) ->
+ L#l{cur=Cur,unhandled_any=T};
+set_next_current(#l{unhandled_res=[Cur|T],
+ unhandled_any=[]}=L) ->
+ L#l{cur=Cur,unhandled_res=T};
+set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
+ done.
+
+expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
+ {Act,L} = expire_active(Act0, CurBegin, L0, []),
+ L#l{active=Act}.
+
+expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+ {_,_} = Reg, %Assertion.
+ case overlap_status(Rs0, CurBegin) of
+ ends_before_cur ->
+ L = free_reg(I, L0),
+ expire_active(Is, CurBegin, L, Acc);
+ overlapping ->
+ expire_active(Is, CurBegin, L0, [I|Acc]);
+ not_overlapping ->
+ Rs = strip_before_current(Rs0, CurBegin),
+ L1 = free_reg(I, L0),
+ L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
+ expire_active(Is, CurBegin, L, Acc)
+ end;
+expire_active([], _CurBegin, L, Acc) ->
+ {Acc,L}.
+
+check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
+ {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
+ L#l{inactive=InAct}.
+
+check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+ case overlap_status(Rs0, CurBegin) of
+ ends_before_cur ->
+ check_inactive(Is, CurBegin, L0, Acc);
+ not_overlapping ->
+ check_inactive(Is, CurBegin, L0, [I|Acc]);
+ overlapping ->
+ Rs = strip_before_current(Rs0, CurBegin),
+ L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
+ L = reserve_reg(I, L1),
+ check_inactive(Is, CurBegin, L, Acc)
+ end;
+check_inactive([], _CurBegin, L, Acc) ->
+ {Acc,L}.
+
+strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
+ strip_before_current(Rs, CurBegin);
+strip_before_current(Rs, _CurBegin) -> Rs.
+
+collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
+ %% Use the preferred register if it is available.
+ Avail = collect_available(L#l{cur=I#i{reg=none}}),
+ case member(Prefer, Avail) of
+ true -> [Prefer];
+ false -> Avail
+ end;
+collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
+ %% Return the already reserved register.
+ [ReservedReg];
+collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
+ Free = get_pool(Cur, L),
+
+ %% Note that since the live intervals are constructed from
+ %% SSA form, there cannot be any overlap of the current interval
+ %% with any inactive interval. See [3], page 175. Therefore we
+ %% only have check the unhandled intervals for overlap with
+ %% the current interval. As a further optimization, we only need
+ %% to check the intervals that have reserved registers.
+ collect_available(Unhandled, Cur, Free).
+
+collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
+ when Pool1 =/= Pool2 ->
+ %% Wrong pool. Ignore this interval.
+ collect_available(Is, Cur, Free);
+collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
+ case overlaps(I, Cur) of
+ true ->
+ Free = ordsets:del_element(Reg, Free0),
+ collect_available(Is, Cur, Free);
+ false ->
+ collect_available(Is, Cur, Free0)
+ end;
+collect_available([], _, Free) -> Free.
+
+select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
+ Cur = Cur0#i{reg=Reg},
+ reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
+select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
+ %% Allocate a new register in the pool.
+ {Reg,L1} = get_next_free(Cur0, L0),
+ Cur = Cur0#i{reg=Reg},
+ L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
+ reserve_reg(Cur, L).
+
+make_cur_active(#l{cur=Cur,active=Act}=L) ->
+ L#l{active=[Cur|Act]}.
+
+overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
+ are_overlapping(Rs1, Rs2).
+
+overlap_status([{S,E}], CurBegin) ->
+ if
+ E =< CurBegin -> ends_before_cur;
+ CurBegin < S -> not_overlapping;
+ true -> overlapping
+ end;
+overlap_status([{S,E}|Rs], CurBegin) ->
+ if
+ E =< CurBegin ->
+ overlap_status(Rs, CurBegin);
+ S =< CurBegin ->
+ overlapping;
+ true ->
+ not_overlapping
+ end.
+
+reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
+ FreeRegs0 = get_pool(I, L),
+ FreeRegs = ordsets:del_element(Reg, FreeRegs0),
+ update_pool(I, FreeRegs, L).
+
+free_reg(#i{reg={_,_}=Reg}=I, L) ->
+ FreeRegs0 = get_pool(I, L),
+ FreeRegs = ordsets:add_element(Reg, FreeRegs0),
+ update_pool(I, FreeRegs, L).
+
+get_pool(#i{pool=Pool}, #l{free=Free}) ->
+ map_get(Pool, Free).
+
+update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
+ Free = Free0#{Pool:=New},
+ L#l{free=Free}.
+
+get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
+ K = {next,Pool},
+ N = map_get(K, Free0),
+ Free = Free0#{K:=N+1},
+ L = L0#l{free=Free},
+ if
+ is_integer(Pool) -> {{y,N},L};
+ is_atom(Pool) -> {{Pool,N},L}
+ end.
+
+%%%
+%%% Interval utilities.
+%%%
+
+are_overlapping([R|Rs1], Rs2) ->
+ case are_overlapping_1(R, Rs2) of
+ true ->
+ true;
+ false ->
+ are_overlapping(Rs1, Rs2)
+ end;
+are_overlapping([], _) -> false.
+
+are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
+ false;
+are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
+ (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
+are_overlapping_1({_,_}, []) -> false.
+
+%%%
+%%% Utilities.
+%%%
+
+%% is_loop_header(L, Blocks) -> false|true.
+%% Check whether the block is a loop header.
+
+is_loop_header(L, Blocks) ->
+ %% We KNOW that a loop header must start with a peek_message
+ %% instruction.
+ case map_get(L, Blocks) of
+ #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+ _ -> false
+ end.
+
+rel2fam(S0) ->
+ S1 = sofs:relation(S0),
+ S = sofs:rel2fam(S1),
+ sofs:to_external(S).
+
+split_phis(Is) ->
+ splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
+
+is_yreg({y,_}) -> true;
+is_yreg({x,_}) -> false;
+is_yreg({z,_}) -> false;
+is_yreg({fr,_}) -> false.
+
+new_vars([Base|Vs0], Count0) ->
+ {V,Count1} = new_var(Base, Count0),
+ {Vs,Count} = new_vars(Vs0, Count1),
+ {[V|Vs],Count};
+new_vars([], Count) -> {[],Count}.
+
+new_var({Base,Int}, Count) ->
+ true = is_integer(Int), %Assertion.
+ {#b_var{name={Base,Count}},Count+1};
+new_var(Base, Count) ->
+ {#b_var{name={Base,Count}},Count+1}.