path: root/lib/compiler/src/beam_ssa_pre_codegen.erl



%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose: Prepare for code generation, including register allocation.
%%
%% The output of this compiler pass is still in the SSA format, but
%% it has been annotated and transformed to help the code generator.
%%
%% * Some instructions are translated to other instructions closer to
%% the BEAM instructions. For example, the binary matching
%% instructions are transformed from the optimization-friendly
%% internal format to instruction more similar to the actual BEAM
%% instructions.
%%
%% * Blocks that will need an instruction for allocating a stack frame
%% are annotated with a {frame_size,Size} annotation.
%%
%% * 'copy' instructions are added for all variables that need
%% to be saved to the stack frame. Additional 'copy' instructions
%% can be added as an optimization to reuse y registers (see
%% the copy_retval sub pass).
%%
%% * Each function is annotated with a {register,RegisterMap}
%% annotation that maps each variable to a BEAM register. The linear
%% scan algorithm is used to allocate registers.
%%
%% There are four kind of registers. x, y, fr (floating point register),
%% and z. A variable will be allocated to a z register if it is only
%% used by the instruction following the instruction that defines the
%% the variable. The code generator will typically combine those
%% instructions to a test instruction. z registers are also used for
%% some instructions that don't have a return value.
%%
%% References:
%%
%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
%% in the context of SSA form and register constraints. In Proceedings
%% of the International Conference on Compiler Construction, pages
%% 229–246. LNCS 2304, Springer-Verlag, 2002.
%%
%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
%% linear scan register allocator. In Proceedings of the ACM/USENIX
%% International Conference on Virtual Execution Environments, pages
%% 132–141. ACM Press, 2005.
%%
%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
%% Form. In Proceedings of the International Symposium on Code
%% Generation and Optimization, pages 170-179. ACM Press, 2010.
%%

-module(beam_ssa_pre_codegen).

-export([module/2]).

-include("beam_ssa.hrl").

-import(lists, [all/2,any/2,append/1,duplicate/2,
                foldl/3,last/1,map/2,member/2,partition/2,
                reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).

-spec module(beam_ssa:b_module(), [compile:option()]) ->
                    {'ok',beam_ssa:b_module()}.

module(#b_module{body=Fs0}=Module, Opts) ->
    UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
    Ps = passes(Opts),
    Fs = functions(Fs0, Ps, UseBSM3),
    {ok,Module#b_module{body=Fs}}.

functions([F|Fs], Ps, UseBSM3) ->
    [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
functions([], _Ps, _UseBSM3) -> [].

-type b_var() :: beam_ssa:b_var().
-type var_name() :: beam_ssa:var_name().
-type instr_number() :: pos_integer().
-type range() :: {instr_number(),instr_number()}.
-type reg_num() :: beam_asm:reg_num().
-type xreg() :: {'x',reg_num()}.
-type yreg() :: {'y',reg_num()}.
-type ypool() :: {'y',beam_ssa:label()}.
-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
                       ypool() | {yreg(),ypool()} | 'z'.
-type ssa_register() :: beam_ssa_codegen:ssa_register().

-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
-record(st, {ssa :: beam_ssa:block_map(),
             args :: [b_var()],
             cnt :: beam_ssa:label(),
             use_bsm3 :: boolean(),
             frames=[] :: [beam_ssa:label()],
             intervals=[] :: [{b_var(),[range()]}],
             res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
             regs=#{} :: #{b_var():=ssa_register()},
             extra_annos=[] :: [{atom(),term()}]
            }).
-define(PASS(N), {N,fun N/1}).

passes(Opts) ->
    AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
    FixTuples = proplists:get_bool(no_put_tuple2, Opts),
    Ps = [?PASS(assert_no_critical_edges),

          %% Preliminaries.
          ?PASS(fix_bs),
          ?PASS(sanitize),
          case FixTuples of
              false -> ignore;
              true -> ?PASS(fix_tuples)
          end,
          ?PASS(use_set_tuple_element),
          ?PASS(place_frames),
          ?PASS(fix_receives),

          %% Find and reserve Y registers.
          ?PASS(find_yregs),
          ?PASS(reserve_yregs),

          %% Handle legacy binary match instruction that don't
          %% accept a Y register as destination.
          ?PASS(legacy_bs),

          %% Improve reuse of Y registers to potentially
          %% reduce the size of the stack frame.
          ?PASS(copy_retval),
          ?PASS(opt_get_list),

          %% Calculate live intervals.
          ?PASS(number_instructions),
          ?PASS(live_intervals),
          ?PASS(reserve_regs),

          %% If needed for a .precg file, save the live intervals
          %% so they can be included in an annotation.
          case AddPrecgAnnos of
              false -> ignore;
              true -> ?PASS(save_live_intervals)
          end,

          %% Allocate registers.
          ?PASS(linear_scan),
          ?PASS(frame_size),
          ?PASS(turn_yregs)],
    [P || P <- Ps, P =/= ignore].

function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
         Ps, UseBSM3) ->
    try
        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
        St = compile:run_sub_passes(Ps, St0),
        #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
        F1 = add_extra_annos(F0, ExtraAnnos),
        F = beam_ssa:add_anno(registers, Regs, F1),
        F#b_function{bs=Blocks,cnt=Count}
    catch
        Class:Error:Stack ->
            #{func_info:={_,Name,Arity}} = Anno,
            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
            erlang:raise(Class, Error, Stack)
    end.

save_live_intervals(#st{intervals=Intervals}=St) ->
    St#st{extra_annos=[{live_intervals,Intervals}]}.

%% Add extra annotations when a .precg listing file is being produced.
add_extra_annos(F, Annos) ->
    foldl(fun({Name,Value}, Acc) ->
                  beam_ssa:add_anno(Name, Value, Acc)
          end, F, Annos).

%% assert_no_critical_edges(St0) -> St.
%%  The code generator will not work if there are critial edges.
%%  Abort if any critical edges are found.

assert_no_critical_edges(#st{ssa=Blocks}=St) ->
    F = fun assert_no_ces/3,
    beam_ssa:fold_rpo(F, Blocks, Blocks),
    St.

assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
    %% This block has multiple predecessors. Make sure that none
    %% of the precessors have more than one successor.
    true = all(fun({_,P}) ->
                       length(beam_ssa:successors(P, Blocks)) =:= 1
               end, Phis),                      %Assertion.
    Blocks;
assert_no_ces(_, _, Blocks) -> Blocks.

%% fix_bs(St0) -> St.
%%  Fix up the binary matching instructions:
%%
%%    * Insert bs_save and bs_restore instructions where needed.
%%
%%    * Combine bs_match and bs_extract instructions to bs_get
%%      instructions.

fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
    F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
                %% Mark the root of the match context list.
                [{Dst,{context,Dst}}|A];
           (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
                %% Link this match context the previous match context.
                [{Dst,ParentCtx}|A];
           (_, A) ->
                A
        end,
    case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
        [] ->
            %% No binary matching in this function.
            St;
        [_|_]=M ->
            CtxChain = maps:from_list(M),
            Linear0 = beam_ssa:linearize(Blocks),

            %% Insert position instructions where needed.
            {Linear1,Count} = case UseBSM3 of
                                  true ->
                                      bs_pos_bsm3(Linear0, CtxChain, Count0);
                                  false ->
                                      bs_pos_bsm2(Linear0, CtxChain, Count0)
                              end,

            %% Rename instructions.
            Linear = bs_instrs(Linear1, CtxChain, []),

            St#st{ssa=maps:from_list(Linear),cnt=Count}
    end.

%% Insert bs_get_position and bs_set_position instructions as needed.
bs_pos_bsm3(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),

    {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
    {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
    {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.

make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
    SavePoint = get_savepoint(Ps, SavePoints),
    I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
    make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
make_bs_setpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    Args = [Ctx, get_savepoint(Ps, SavePoints)],
    I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
    make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
make_bs_getpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

get_savepoint({_,_}=Ps, SavePoints) ->
    Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
    #b_var{name=Name}.

make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
    {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
    make_bs_pos_dict(T, Count, Acc);
make_bs_pos_dict([], Count, Acc) ->
    {maps:from_list(Acc), Count}.

make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
    make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_bs_pos_dict_1([], Ctx, I, Acc) ->
    {[{Ctx,I}|Acc], I}.

%% As bs_position but without OTP-22 instructions. This is only used when
%% cross-compiling to older versions.
bs_pos_bsm2(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),
    Slots = make_save_point_dict(S, []),
    {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
    {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.

make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    case make_slot(Ps, Slots) of
        #b_literal{val=start} ->
            make_save_map(T, Slots, Count, Acc);
        Slot ->
            I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
            make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
    end;
make_save_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
    make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
make_restore_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_slot({Same,Same}, _Slots) ->
    #b_literal{val=start};
make_slot({_,_}=Ps, Slots) ->
    #b_literal{val=map_get(Ps, Slots)}.

make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
    Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
    make_save_point_dict(T, Acc);
make_save_point_dict([], Acc) ->
    maps:from_list(Acc).

make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
    %% Special {atom,start} save point. Does not need a
    %% bs_save instruction.
    make_save_point_dict_1(T, Ctx, I, Acc);
make_save_point_dict_1([H|T], Ctx, I, Acc) ->
    make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_save_point_dict_1([], Ctx, I, Acc) ->
    [{Ctx,I}|Acc].

bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
    FPos = case D0 of
               #{L:=Pos0} -> Pos0;
               #{} -> #{}
           end,
    {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
    D = bs_update_successors(Last, SPos, FPos, D0),
    bs_restores(Bs, CtxChain, D, Rs);
bs_restores([], _, _, Rs) -> Rs.

bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
    join_positions([{Succ,SPos},{Fail,FPos}], D);
bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
    Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
    join_positions(Update, D);
bs_update_successors(#b_ret{}, _, _, D) -> D.

join_positions([{L,MapPos0}|T], D) ->
    case D of
        #{L:=MapPos0} ->
            %% Same map.
            join_positions(T, D);
        #{L:=MapPos1} ->
            %% Different maps.
            MapPos = join_positions_1(MapPos0, MapPos1),
            join_positions(T, D#{L:=MapPos});
        #{} ->
            join_positions(T, D#{L=>MapPos0})
    end;
join_positions([], D) -> D.

join_positions_1(MapPos0, MapPos1) ->
    MapPos2 = maps:map(fun(Start, Pos) ->
                               case MapPos0 of
                                   #{Start:=Pos} -> Pos;
                                   #{Start:=_} -> unknown;
                                   #{} -> Pos
                               end
                       end, MapPos1),
    maps:merge(MapPos0, MapPos2).

bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
               CtxChain, PosMap0, Rs) ->
    PosMap = PosMap0#{Start=>Start},
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
               CtxChain, PosMap0, Rs0) ->
    Start = bs_subst_ctx(NewPos, CtxChain),
    [_,FromPos|_] = Args,
    case PosMap0 of
        #{Start:=FromPos} ->
            %% Same position, no restore needed.
            PosMap = case bs_match_type(I) of
                         plain ->
                             %% Update position to new position.
                             PosMap0#{Start:=NewPos};
                         _ ->
                             %% Position will not change (test_unit
                             %% instruction or no instruction at
                             %% all).
                             PosMap0#{Start:=FromPos}
                     end,
            bs_restores_is(Is, CtxChain, PosMap, Rs0);
        #{Start:=_} ->
            %% Different positions, might need a restore instruction.
            case bs_match_type(I) of
                none ->
                    %% The tail test will be optimized away.
                    %% No need to do a restore.
                    PosMap = PosMap0#{Start:=FromPos},
                    bs_restores_is(Is, CtxChain, PosMap, Rs0);
                test_unit ->
                    %% This match instruction will be replaced by
                    %% a test_unit instruction. We will need a
                    %% restore. The new position will be the position
                    %% restored to (NOT NewPos).
                    PosMap = PosMap0#{Start:=FromPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs);
                plain ->
                    %% Match or skip. Position will be changed.
                    PosMap = PosMap0#{Start:=NewPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs)
            end
    end;
bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
               CtxChain, PosMap, Rs) ->
    Start = bs_subst_ctx(FromPos, CtxChain),
    #{Start:=FromPos} = PosMap,                 %Assertion.
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0) ->
    {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
    %% We can land here from any point, so all positions are invalid.
    PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0)
  when Op =:= bs_test_tail;
       Op =:= bs_get_tail ->
    {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([], _CtxChain, PosMap, Rs) ->
    {PosMap,Rs}.

bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
                             #b_literal{val=binary},_Flags,
                             #b_literal{val=all},#b_literal{val=U}]}) ->
    case U of
        1 -> none;
        _ -> test_unit
    end;
bs_match_type(_) ->
    plain.

%% Call instructions leave the match position in an undefined state,
%% requiring us to invalidate each affected argument.
bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=_} ->
            PosMap = PosMap0#{Start:=unknown},
            bs_invalidate_pos(Args, PosMap, CtxChain);
        #{} ->
            %% Not a match context.
            bs_invalidate_pos(Args, PosMap0, CtxChain)
    end;
bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
    bs_invalidate_pos(Args, PosMap, CtxChain);
bs_invalidate_pos([], PosMap, _CtxChain) ->
    PosMap.

bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=Arg} ->
            %% Same position, no restore needed.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
        #{Start:=_} ->
            %% Different positions, need a restore instruction.
            PosMap = PosMap0#{Start:=Arg},
            Rs = Rs0#{Dst=>{Start,Arg}},
            bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
        #{} ->
            %% Not a match context.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
    end;
bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
    bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
    {Rs,PosMap}.

%% Insert all bs_save and bs_restore instructions.

bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
    bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).

bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
    %% The old instructions require bs_start_match to be annotated with the
    %% number of position slots it needs.
    bs_insert_1(Blocks, Saves, Restores, SavePoints,
                fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
                        NumSlots = case SavePoints of
                                       #{Dst:=NumSlots0} -> NumSlots0;
                                       #{} -> 0
                                   end,
                        beam_ssa:add_anno(num_slots, NumSlots, I0);
                   (I) ->
                        I
                end).

bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
    Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
    Bs = bs_insert_saves(Is, Bs0, Saves),
    [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
bs_insert_1([], _, _, _, _) -> [].

bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
    I = XFrm(I0),
    if
        Op =:= bs_test_tail;
        Op =:= bs_get_tail;
        Op =:= bs_match;
        Op =:= call ->
            Rs = case Restores of
                     #{Dst:=R} -> [R];
                     #{} -> []
                 end,
            Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
        true ->
            [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
    end;
bs_insert_is_1([], _, _, _) -> [].

bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
    case Saves of
        #{Dst:=S} ->
            bs_insert_save(S, Bs);
        #{} ->
            bs_insert_saves(Is, Bs, Saves)
    end;
bs_insert_saves([], Bs, _) -> Bs.

bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
    Is = case Is0 of
             [#b_set{op=bs_extract}=Ex|Is1] ->
                 [Ex,Save|Is1];
             _ ->
                 [Save|Is0]
         end,
    [{L,Blk#b_blk{is=Is}}|Bs].

%% Translate bs_match instructions to bs_get, bs_match_string,
%% or bs_skip. Also rename match context variables to use the
%% variable assigned to by the start_match instruction.

bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
    case bs_instrs_is(Is0, CtxChain, []) of
        [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
            %% Drop this instruction. Rewrite the corresponding
            %% bs_match instruction in the previous block to
            %% a bs_get instruction.
            Acc = bs_combine(Dst, Ctx, Acc0),
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
        Is ->
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
    end;
bs_instrs([], _, Acc) ->
    reverse(Acc).

bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
    Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
    I1 = I0#b_set{args=Args},
    I = case {Op,Args} of
            {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
                I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
            {bs_match,[#b_literal{val=string},Ctx|As]} ->
                I1#b_set{op=bs_match_string,args=[Ctx|As]};
            {bs_get_tail,[Ctx|As]} ->
                I1#b_set{op=bs_get_tail,args=[Ctx|As]};
            {_,_} ->
                I1
        end,
    bs_instrs_is(Is, CtxChain, [I|Acc]);
bs_instrs_is([], _, Acc) ->
    reverse(Acc).

%% Combine a bs_match instruction with the destination register
%% taken from a bs_extract instruction.

bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
    [#b_set{}=Succeeded,
     #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
    Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
                       Succeeded#b_set{args=[Dst]}]),
    [{L,Blk#b_blk{is=Is}}|Acc].

bs_subst_ctx(#b_var{}=Var, CtxChain) ->
    case CtxChain of
        #{Var:={context,Ctx}} ->
            Ctx;
        #{Var:=ParentCtx} ->
            bs_subst_ctx(ParentCtx, CtxChain);
        #{} ->
            %% Not a match context variable.
            Var
    end;
bs_subst_ctx(Other, _CtxChain) ->
    Other.

%% legacy_bs(St0) -> St.
%%  Binary matching instructions in OTP 21 and earlier don't support
%%  a Y register as destination. If St#st.use_bsm3 is false,
%%  we will need to rewrite those instructions so that the result
%%  is first put in an X register and then moved to a Y register
%%  if the operation succeeded.

legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
    Linear0 = beam_ssa:linearize(Blocks0),
    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
    Blocks = maps:from_list(Linear),
    St#st{ssa=Blocks,cnt=Count};
legacy_bs(#st{use_bsm3=true}=St) -> St.

legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
    #b_blk{is=Is0,last=Last} = Blk,
    Is1 = case Copies0 of
              #{L:=Copy} -> [Copy|Is0];
              #{} -> Is0
          end,
    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
    {Acc,Count}.

legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
             Last, IsYreg, Count0, Copies0, Acc) ->
    NeedsFix = is_map_key(Dst, IsYreg) andalso
        case Op of
            bs_get -> true;
            bs_init -> true;
            _ -> false
        end,
    case NeedsFix of
        true ->
            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
            Count = Count0 + 1,
            I = I0#b_set{dst=TempDst},
            SuccI = SuccI0#b_set{args=[TempDst]},
            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
            #b_br{bool=SuccDst,succ=SuccL} = Last,
            Copies = Copies0#{SuccL=>Copy},
            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
        false ->
            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
    end;
legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
    {reverse(Acc),Count,Copies}.

%% sanitize(St0) -> St.
%%  Remove constructs that can cause problems later:
%%
%%  * Unreachable blocks may cause problems for determination of
%%  dominators.
%%
%%  * Some instructions (such as get_hd) don't accept literal
%%  arguments. Evaluate the instructions and remove them.

sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
    St#st{ssa=Blocks,cnt=Count}.

sanitize([L|Ls], Count0, Blocks0, Values0) ->
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
    case sanitize_is(Is0, Count0, Values0, false, []) of
        no_change ->
            sanitize(Ls, Count0, Blocks0, Values0);
        {Is,Count,Values} ->
            Blk = Blk0#b_blk{is=Is},
            Blocks = Blocks0#{L:=Blk},
            sanitize(Ls, Count, Blocks, Values)
    end;
sanitize([], Count, Blocks0, Values) ->
    Blocks = if
                 map_size(Values) =:= 0 ->
                     Blocks0;
                 true ->
                     beam_ssa:rename_vars(Values, [0], Blocks0)
             end,

    %% Unreachable blocks can cause problems for the dominator calculations.
    Ls = beam_ssa:rpo(Blocks),
    Reachable = gb_sets:from_list(Ls),
    {case map_size(Blocks) =:= gb_sets:size(Reachable) of
         true -> Blocks;
         false -> remove_unreachable(Ls, Blocks, Reachable, [])
     end,Count}.

sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
            Count0, Values, Changed, Acc) ->
    case sanitize_args(Args0, Values) of
        [#b_literal{}=Map,Key] ->
            %% Bind the literal map to a variable.
            {MapVar,Count} = new_var('@ssa_map', Count0),
            I = I0#b_set{args=[MapVar,Key]},
            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
            sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
        [_,_]=Args0 ->
            sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
        [_,_]=Args ->
            I = I0#b_set{args=Args},
            sanitize_is(Is, Count0, Values, Changed, [I|Acc])
    end;
sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
            Count, Values, Changed0, Acc) ->
    Args = sanitize_args(Args0, Values),
    case sanitize_instr(Op, Args, I0) of
        {value,Value0} ->
            Value = #b_literal{val=Value0},
            sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
        {ok,I} ->
            sanitize_is(Is0, Count, Values, true, [I|Acc]);
        ok ->
            I = I0#b_set{args=Args},
            Changed = Changed0 orelse Args =/= Args0,
            sanitize_is(Is0, Count, Values, Changed, [I|Acc])
    end;
sanitize_is([], Count, Values, Changed, Acc) ->
    case Changed of
        true ->
            {reverse(Acc),Count,Values};
        false ->
            no_change
    end.

sanitize_args(Args, Values) ->
    map(fun(Var) ->
                case Values of
                    #{Var:=New} -> New;
                    #{} -> Var
                end
        end, Args).

sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
    case erl_bifs:is_pure(erlang, Bif, 1) of
        false ->
            ok;
        true ->
            try
                {value,erlang:Bif(Lit)}
            catch
                error:_ ->
                    ok
            end
    end;
sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
    true = erl_bifs:is_pure(erlang, Bif, 2),    %Assertion.
    try
        {value,erlang:Bif(Lit1, Lit2)}
    catch
        error:_ ->
            ok
    end;
sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
    {value,Hd};
sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
    {value,Tl};
sanitize_instr(get_tuple_element, [#b_literal{val=T},
                                   #b_literal{val=I}], _I)
  when I < tuple_size(T) ->
    {value,element(I+1, T)};
sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
    {value,case Lit of
               [_|_] -> true;
               _ -> false
           end};
sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
                                 #b_literal{val=Arity},
                                 #b_literal{val=Tag}], _I)
  when is_integer(Arity), is_atom(Tag) ->
    if
        tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
            {value,true};
        true ->
            {value,false}
    end;
sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(succeeded, [#b_literal{}], _I) ->
    {value,true};
sanitize_instr(_, _, _) -> ok.

sanitize_badarg(I) ->
    Func = #b_remote{mod=#b_literal{val=erlang},
                     name=#b_literal{val=error},arity=1},
    I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.

remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
    case split_phis(Is0) of
        {[_|_]=Phis,Rest} ->
            Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
            Blk = Blk0#b_blk{is=Is},
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
        {[],_} ->
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
    end;
remove_unreachable([], _Blocks, _, Acc) ->
    maps:from_list(Acc).

prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
    Args = [A || {_,Pred}=A <- Args0,
                 gb_sets:is_element(Pred, Reachable)],
    Phi#b_set{args=Args}.

%%%
%%% Fix tuples.
%%%

%% fix_tuples(St0) -> St.
%%  If compatibility with a previous version of Erlang has been
%%  requested, tuple creation must be split into two instruction to
%%  mirror the the way tuples are created in BEAM prior to OTP 22.
%%  Each put_tuple instruction is split into put_tuple_arity followed
%%  by put_tuple_elements.

fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
    F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
                Arity = #b_literal{val=length(Args)},
                {Ignore,C} = new_var('@ssa_ignore', C0),
                {[Put#b_set{op=put_tuple_arity,args=[Arity]},
                  #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
           (I, C) -> {[I],C}
        end,
    {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
    St#st{ssa=Blocks,cnt=Count}.

%%%
%%% Introduce the set_tuple_element instructions to make
%%% multiple-field record updates faster.
%%%
%%% The expansion of record field updates, when more than one field is
%%% updated, but not a majority of the fields, will create a sequence of
%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the
%%% first call is the original record tuple, and in the subsequent calls
%%% Tuple is the result of the previous call. Furthermore, all Index
%%% values are constant positive integers, and the first call to
%%% `setelement` will have the greatest index. Thus all the following
%%% calls do not actually need to test at run-time whether Tuple has type
%%% tuple, nor that the index is within the tuple bounds.
%%%
%%% Since this optimization introduces destructive updates, it used to
%%% be done as the very last Core Erlang pass before going to
%%% lower-level code. However, it turns out that this kind of destructive
%%% updates are awkward also in SSA code and can prevent or complicate
%%% type analysis and aggressive optimizations.
%%%
%%% NOTE: Because there no write barriers in the system, this kind of
%%% optimization can only be done when we are sure that garbage
%%% collection will not be triggered between the creation of the tuple
%%% and the destructive updates - otherwise we might insert pointers
%%% from an older generation to a newer.
%%%

use_set_tuple_element(#st{ssa=Blocks0}=St) ->
    Uses = count_uses(Blocks0),
    RPO = reverse(beam_ssa:rpo(Blocks0)),
    Blocks = use_ste_1(RPO, Uses, Blocks0),
    St#st{ssa=Blocks}.

use_ste_1([L|Ls], Uses, Blocks0) ->
    {Blk0,Blocks} = use_ste_across(L, Uses, Blocks0),
    #b_blk{is=Is0} = Blk0,
    case use_ste_is(Is0, Uses) of
        Is0 ->
            use_ste_1(Ls, Uses, Blocks);
        Is ->
            Blk = Blk0#b_blk{is=Is},
            use_ste_1(Ls, Uses, Blocks#{L:=Blk})
    end;
use_ste_1([], _, Blocks) -> Blocks.

%%% Optimize within a single block.

use_ste_is([#b_set{}=I|Is0], Uses) ->
    Is = use_ste_is(Is0, Uses),
    case extract_ste(I) of
        none ->
            [I|Is];
        Extracted ->
            use_ste_call(Extracted, I, Is, Uses)
    end;
use_ste_is([], _Uses) -> [].

use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) ->
    case get_ste_call(Is0, []) of
        {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is}
          when Pos1 > 0, Pos0 > Pos1 ->
            case is_single_use(Dst0, Uses) of
                true ->
                    Call = Call1#b_set{dst=Dst1},
                    Args = [Val1,Dst1,#b_literal{val=Pos1-1}],
                    Dsetel = Call2#b_set{op=set_tuple_element,
                                         dst=Dst0,
                                         args=Args},
                    [Call|Prefix] ++ [Dsetel|Is];
                false ->
                    [Call1|Is0]
            end;
        _ ->
            [Call1|Is0]
    end.

get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) ->
    get_ste_call(Is, [I|Acc]);
get_ste_call([#b_set{op=call}=I|Is], Acc) ->
    case extract_ste(I) of
        none ->
            none;
        Extracted ->
            {reverse(Acc),Extracted,I,Is}
    end;
get_ste_call(_, _) -> none.

extract_ste(#b_set{op=call,dst=Dst,
                   args=[#b_remote{mod=#b_literal{val=M},
                                  name=#b_literal{val=F}}|Args]}) ->
    case {M,F,Args} of
        {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} ->
            {Dst,Pos,Tuple,Val};
        {_,_,_} ->
            none
    end;
extract_ste(#b_set{}) -> none.

%%% Optimize accross blocks within a try/catch block.

use_ste_across(L, Uses, Blocks) ->
    case map_get(L, Blocks) of
        #b_blk{last=#b_br{bool=#b_var{}}}=Blk ->
            try
                use_ste_across_1(L, Blk, Uses, Blocks)
            catch
                throw:not_possible ->
                    {Blk,Blocks}
            end;
        #b_blk{}=Blk ->
            {Blk,Blocks}
    end.

use_ste_across_1(L, Blk0, Uses, Blocks0) ->
    #b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0,
    case reverse(IsThis) of
        [#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0,
         #b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] ->
            case is_single_use(Bool, Uses) andalso
                is_n_uses(2, Result, Uses) of
                true -> ok;
                false -> throw(not_possible)
            end,
            Call2 = use_ste_across_next(Next, Uses, Blocks0),
            Is = [Call1,Call2],
            case use_ste_is(Is, decrement_uses(Result, Uses)) of
                [#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] ->
                    Blocks1 = use_ste_fix_next(Ste, Next, Blocks0),
                    Succ = Succ0#b_set{args=[Call#b_set.dst]},
                    Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])},
                    Blocks = Blocks1#{L:=Blk},
                    {Blk,Blocks};
                _ ->
                    throw(not_possible)
            end;
        _ ->
            throw(not_possible)
    end.

use_ste_across_next(Next, Uses, Blocks) ->
    case map_get(Next, Blocks) of
        #b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call,
                   #b_set{op=succeeded,dst=Bool,args=[Result]}],
               last=#b_br{bool=Bool}} ->
            case is_single_use(Bool, Uses) andalso
                is_n_uses(2, Result, Uses) of
                true -> ok;
                false -> throw(not_possible)
            end,
            Call;
        #b_blk{} ->
            throw(not_possible)
    end.

use_ste_fix_next(Ste, Next, Blocks) ->
    Blk0 = map_get(Next, Blocks),
    #b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0,
    Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}),
    Blk = Blk0#b_blk{is=[Ste],last=Br},
    Blocks#{Next:=Blk}.

%% Count how many times each variable is used.

count_uses(Blocks) ->
    count_uses_blk(maps:values(Blocks), #{}).

count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
    F = fun(I, CountMap) ->
                foldl(fun(Var, Acc) ->
                              case Acc of
                                  #{Var:=3} -> Acc;
                                  #{Var:=C} -> Acc#{Var:=C+1};
                                  #{} ->       Acc#{Var=>1}
                              end
                      end, CountMap, beam_ssa:used(I))
        end,
    CountMap = F(Last, foldl(F, CountMap0, Is)),
    count_uses_blk(Bs, CountMap);
count_uses_blk([], CountMap) -> CountMap.

decrement_uses(V, Uses) ->
    #{V:=C} = Uses,
    Uses#{V:=C-1}.

is_n_uses(N, V, Uses) ->
    case Uses of
        #{V:=N} -> true;
        #{} -> false
    end.

is_single_use(V, Uses) ->
    case Uses of
        #{V:=1} -> true;
        #{} -> false
    end.

%%%
%%% Find out where frames should be placed.
%%%

%% place_frames(St0) -> St.
%%   Return a list of the labels for the blocks that need stack frame
%%   allocation instructions.
%%
%%   This function attempts to place stack frames as tight as possible
%%   around the code, to avoid building stack frames for code paths
%%   that don't need one.
%%
%%   Stack frames are placed in blocks that dominate all of their
%%   descendants. That guarantees that the deallocation instructions
%%   cannot be reached from other execution paths that didn't set up
%%   a stack frame or set up a stack frame with a different size.

place_frames(#st{ssa=Blocks}=St) ->
    {Doms,_} = beam_ssa:dominators(Blocks),
    Ls = beam_ssa:rpo(Blocks),
    Tried = gb_sets:empty(),
    Frames0 = [],
    {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
    St#st{frames=Frames}.

place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
    Blk = map_get(L, Blocks),
    case need_frame(Blk) of
        true ->
            %% This block needs a frame. Try to place it here.
            {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),

            %% Successfully placed. Try to place more frames in descendants
            %% that are not dominated by this block.
            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
        false ->
            try
                place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
            catch
                throw:{need_frame,For,Tried1}=Reason ->
                    %% An descendant block needs a stack frame. Try to
                    %% place it here.
                    case is_dominated_by(For, L, Doms) of
                        true ->
                            %% Try to place a frame here.
                            {Frames,Tried} = do_place_frame(L, Blocks, Doms,
                                                            Tried1, Frames0),
                            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
                        false ->
                            %% Wrong place. This block does not dominate
                            %% the block that needs the frame. Pass it on
                            %% to our ancestors.
                            throw(Reason)
                    end
            end
    end;
place_frames_1([], _, _, Tried, Frames) ->
    {Frames,Tried}.

%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
%%  Try to place a frame in this block. This function returns
%%  successfully if it either succeds at placing a frame in this
%%  block, if an ancestor that dominates this block has already placed
%%  a frame, or if we have already tried to put a frame in this block.
%%
%%  An {need_frame,Label,Tried} exception will be thrown if this block
%%  block is not suitable for having a stack frame (i.e. it does not dominate
%%  all of its descendants). The exception means that an ancestor will have to
%%  place the frame needed by this block.

do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
    case gb_sets:is_element(L, Tried0) of
        true ->
            %% We have already tried to put a frame in this block.
            {Frames,Tried0};
        false ->
            %% Try to place a frame in this block.
            Tried = gb_sets:insert(L, Tried0),
            case place_frame_here(L, Blocks, Doms, Frames) of
                yes ->
                    %% We need a frame and it is safe to place it here.
                    {[L|Frames],Tried};
                no ->
                    %% An ancestor has a frame. Not needed.
                    {Frames,Tried};
                ancestor ->
                    %% This block does not dominate all of its
                    %% descendants. We must place the frame in
                    %% an ancestor.
                    throw({need_frame,L,Tried})
            end
    end.

%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
%%  Determine whether a frame should be placed in block Label.

place_frame_here(L, Blocks, Doms, Frames) ->
    B0 = any(fun(DomBy) ->
                     is_dominated_by(L, DomBy, Doms)
             end, Frames),
    case B0 of
        true ->
            %% This block is dominated by an ancestor block that
            %% defines a frame. Not needed/allowed to put a frame
            %% here.
            no;
        false ->
            %% No frame in any ancestor. We need a frame.
            %% Now check whether the frame can be placed here.
            %% If this block dominates all of its descendants
            %% and the predecessors of any phi nodes it can be
            %% placed here.
            Descendants = beam_ssa:rpo([L], Blocks),
            PhiPredecessors = phi_predecessors(L, Blocks),
            MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
            Dominates = all(fun(?BADARG_BLOCK) ->
                                    %% This block defines no variables and calls
                                    %% erlang:error(badarg). It does not matter
                                    %% whether L dominates ?BADARG_BLOCK or not;
                                    %% it is still safe to put the frame in L.
                                    true;
                               (Bl) ->
                                    is_dominated_by(Bl, L, Doms)
                            end, MustDominate),

            %% Also, this block must not be a loop header.
            IsLoopHeader = is_loop_header(L, Blocks),
            case Dominates andalso not IsLoopHeader of
                true -> yes;
                false -> ancestor
            end
    end.

%% phi_predecessors(Label, Blocks) ->
%%  Return all predecessors referenced in phi nodes.

phi_predecessors(L, Blocks) ->
    #b_blk{is=Is} = map_get(L, Blocks),
    [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].

%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
%%  Test whether block Label is dominated by block DominatedBy.

is_dominated_by(L, DomBy, Doms) ->
    DominatedBy = map_get(L, Doms),
    member(DomBy, DominatedBy).

%% need_frame(#b_blk{}) -> true|false.
%%  Test whether any of the instructions in the block requires a stack frame.

need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
    need_frame_1(Is, {return,Ret});
need_frame(#b_blk{is=Is}) ->
    need_frame_1(Is, body).

need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
    %% Since make_fun clobbers X registers, a stack frame is needed if
    %% any of the following instructions use any other variable than
    %% the one holding the reference to the created fun.
    need_frame_1(Is, Context) orelse
        case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
            [Fun] -> false;
            [_|_] -> true
        end;
need_frame_1([#b_set{op=new_try_tag}|_], _) ->
    true;
need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
    if
        Val =:= Ret -> need_frame_1(Is, tail);
        true -> need_frame_1(Is, body)
    end;
need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
    case Func of
        #b_remote{mod=#b_literal{val=Mod},
                  name=#b_literal{val=Name},
                  arity=Arity} when is_atom(Mod), is_atom(Name) ->
            case erl_bifs:is_exit_bif(Mod, Name, Arity) of
                true ->
                    false;
                false ->
                    Context =:= body orelse
                        Is =/= [] orelse
                        is_trap_bif(Mod, Name, Arity)
                end;
        #b_remote{} ->
            %% This is an apply(), which always needs a frame.
            true;
        #b_local{} ->
            Context =:= body orelse Is =/= [];
        _ ->
             %% A fun call always needs a frame.
            true
    end;
need_frame_1([I|Is], Context) ->
    beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
need_frame_1([], _) -> false.

%% is_trap_bif(Mod, Name, Arity) -> true|false.
%%   Test whether we need a stack frame for this BIF.

is_trap_bif(erlang, '!', 2) -> true;
is_trap_bif(erlang, link, 1) -> true;
is_trap_bif(erlang, unlink, 1) -> true;
is_trap_bif(erlang, monitor_node, 2) -> true;
is_trap_bif(erlang, group_leader, 2) -> true;
is_trap_bif(erlang, exit, 2) -> true;
is_trap_bif(_, _, _) -> false.

%%%
%%% Fix variables used in matching in receive.
%%%
%%% The loop_rec/2 instruction may return a reference to a
%%% message outside of any heap or heap fragment. If the message
%%% does not match, it is not allowed to store any reference to
%%% the message (or part of the message) on the stack. If we do,
%%% the message will be corrupted if there happens to be a GC.
%%%
%%% Here we make sure to introduce copies of variables that are
%%% matched out and subsequently used after the remove_message/0
%%% instructions. That will make sure that only X registers are
%%% used during matching.
%%%
%%% Depending on where variables are defined and used, they must
%%% be handled in two different ways.
%%%
%%% Variables that are always defined in the receive (before branching
%%% out into the different clauses of the receive) and used after the
%%% receive must be handled in the following way: Before each
%%% remove_message instruction, each such variable must be copied, and
%%% all variables must be consolidated using a phi node in the
%%% common exit block for the receive.
%%%
%%% Variables that are matched out and used in the same clause
%%% need copy instructions before the remove_message instruction
%%% in that clause.
%%%

fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
                                    Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
    case Blk of
        #b_blk{is=[#b_set{op=peek_message}|_]} ->
            Rm = find_rm_blocks(L, Blocks0),
            LoopExit = find_loop_exit(Rm, Blocks0),
            Defs0 = beam_ssa:def([L], Blocks0),
            CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
                                               Blocks0, Count0),
            Defs = ordsets:subtract(Defs0, CommonUsed),
            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
            fix_receives_1(Ls, Blocks, Count);
        #b_blk{} ->
            fix_receives_1(Ls, Blocks0, Count0)
    end;
fix_receives_1([], Blocks, Count) ->
    {Blocks,Count}.

recv_common(_Defs, none, _Blocks) ->
    %% There is no common exit block because receive is used
    %% in the tail position of a function.
    [];
recv_common(Defs, Exit, Blocks) ->
    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
    Def = ordsets:subtract(Defs, ExitDefs),
    ordsets:intersection(Def, ExitUsed).

%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
%%                 Blocks0, Count0) -> {Blocks,Count}.
%%  Handle variables alwys defined in a receive and used
%%  in the exit block following the receive.

recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
    {Msg,Count1} = new_var('@recv', Count0),
    Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
    N = length(Rm),
    {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
    PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
    Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
    ExitBlk0 = map_get(Exit, Blocks1),
    ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
    Blocks2 = Blocks1#{Exit:=ExitBlk},
    Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
    recv_fix_common(T, Exit, Rm, Blocks, Count);
recv_fix_common([], _, _, Blocks, Count) ->
    {Blocks,Count}.

recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
    Ren = #{Msg=>V},
    Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
    #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
    Copy = #b_set{op=copy,dst=V,args=[Msg]},
    Is = insert_after_phis(Is0, [Copy]),
    Blk = Blk0#b_blk{is=Is},
    Blocks = Blocks1#{Rm:=Blk},
    recv_fix_common_1(Vs, Rms, Msg, Blocks);
recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.

fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
    Path = beam_ssa:rpo([Rm], Blocks),
    Preds = exit_predecessors(Path, Exit, Blocks),
    [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
fix_exit_phi_args([], [], _, _) -> [].

exit_predecessors([L|Ls], Exit, Blocks) ->
    Blk = map_get(L, Blocks),
    case member(Exit, beam_ssa:successors(Blk)) of
        true ->
            [L|exit_predecessors(Ls, Exit, Blocks)];
        false ->
            exit_predecessors(Ls, Exit, Blocks)
    end;
exit_predecessors([], _Exit, _Blocks) -> [].

%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
%%  Add a copy instruction for all variables that are matched out and
%%  later used within a clause of the receive.

fix_receive([L|Ls], Defs, Blocks0, Count0) ->
    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
    Def = ordsets:subtract(Defs, RmDefs),
    Used = ordsets:intersection(Def, Used0),
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
    Ren = zip(Used, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk1#b_blk{is=Is},
    Blocks = Blocks1#{L:=Blk},
    fix_receive(Ls, Defs, Blocks, Count);
fix_receive([], _Defs, Blocks, Count) ->
    {Blocks,Count}.

%% find_loop_exit([Label], Blocks) -> Label | none.
%%  Find the block to which control is transferred when the
%%  the receive loop is exited.

find_loop_exit([L1,L2|_Ls], Blocks) ->
    Path1 = beam_ssa:rpo([L1], Blocks),
    Path2 = beam_ssa:rpo([L2], Blocks),
    find_loop_exit_1(reverse(Path1), reverse(Path2), none);
find_loop_exit(_, _) -> none.

find_loop_exit_1([H|T1], [H|T2], _) ->
    find_loop_exit_1(T1, T2, H);
find_loop_exit_1(_, _, Exit) -> Exit.

%% find_rm_blocks(StartLabel, Blocks) -> [Label].
%%  Find all blocks that start with remove_message within the receive
%%  loop whose peek_message label is StartLabel.

find_rm_blocks(L, Blocks) ->
    Seen = gb_sets:singleton(L),
    Blk = map_get(L, Blocks),
    Succ = beam_ssa:successors(Blk),
    find_rm_blocks_1(Succ, Seen, Blocks).

find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
    case gb_sets:is_member(L, Seen0) of
        true ->
            find_rm_blocks_1(Ls, Seen0, Blocks);
        false ->
            Seen = gb_sets:insert(L, Seen0),
            Blk = map_get(L, Blocks),
            case find_rm_act(Blk#b_blk.is) of
                prune ->
                    %% Looping back. Don't look at any successors.
                    find_rm_blocks_1(Ls, Seen, Blocks);
                continue ->
                    %% Neutral block. Do nothing here, but look at
                    %% all successors.
                    Succ = beam_ssa:successors(Blk),
                    find_rm_blocks_1(Succ++Ls, Seen, Blocks);
                found ->
                    %% Found remove_message instruction.
                    [L|find_rm_blocks_1(Ls, Seen, Blocks)]
            end
    end;
find_rm_blocks_1([], _, _) -> [].

find_rm_act([#b_set{op=Op}|Is]) ->
    case Op of
        remove_message -> found;
        peek_message -> prune;
        recv_next -> prune;
        wait_timeout -> prune;
        wait -> prune;
        _ -> find_rm_act(Is)
    end;
find_rm_act([]) ->
    continue.

%%%
%%% Find out which variables need to be stored in Y registers.
%%%

-record(dk, {d :: ordsets:ordset(var_name()),
             k :: ordsets:ordset(var_name())
            }).

%% find_yregs(St0) -> St.
%%  Find all variables that must be stored in Y registers. Annotate
%%  the blocks that allocate frames with the set of Y registers
%%  used within that stack frame.
%%
%%  Basically, we following all execution paths starting from a block
%%  that allocates a frame, keeping track of of all defined registers
%%  and all registers killed by an instruction that clobbers X
%%  registers. For every use of a variable, we check if if it is in
%%  the set of killed variables; if it is, it must be stored in an Y
%%  register.

find_yregs(#st{frames=[]}=St) ->
    St;
find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
    FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
    Blocks = find_yregs_1(FrameDefs, Blocks0),
    St#st{ssa=Blocks}.

find_yregs_1([{F,Defs}|Fs], Blocks0) ->
    DK = #dk{d=Defs,k=[]},
    D0 = #{F=>DK},
    Ls = beam_ssa:rpo([F], Blocks0),
    Yregs0 = [],
    Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
    Blk0 = map_get(F, Blocks0),
    Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
    Blocks = Blocks0#{F:=Blk},
    find_yregs_1(Fs, Blocks);
find_yregs_1([], Blocks) -> Blocks.

find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
    Blk0 = map_get(L, Blocks0),
    #b_blk{is=Is,last=Last} = Blk0,
    Ys0 = map_get(L, D0),
    {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
    Yregs = find_yregs_terminator(Last, Ys, Yregs1),
    Successors = beam_ssa:successors(Blk0),
    D = find_update_succ(Successors, Ys, D0),
    find_yregs_2(Ls, Blocks0, D, Yregs);
find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.

find_defs(Frames, Blocks, Defs) ->
    Seen = gb_sets:empty(),
    FramesSet = gb_sets:from_list(Frames),
    {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
    FrameDefs.

find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
    case gb_sets:is_member(L, Frames) of
        true ->
            OrderedDefs = ordsets:from_list(Defs0),
            find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
                        [{L,OrderedDefs}|Acc0]);
        false ->
            case gb_sets:is_member(L, Seen0) of
                true ->
                    find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
                false ->
                    Seen1 = gb_sets:insert(L, Seen0),
                    {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
                    #b_blk{is=Is} = Blk = map_get(L, Blocks),
                    Defs = find_defs_is(Is, Defs0),
                    Successors = beam_ssa:successors(Blk),
                    find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
            end
    end;
find_defs_1([], _, _, Seen, _, Acc) ->
    {Acc,Seen}.

find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
    find_defs_is(Is, [Dst|Acc]);
find_defs_is([], Acc) -> Acc.

find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
    case D0 of
        #{S:=#dk{d=Defs1,k=Killed1}} ->
            Defs = ordsets:intersection(Defs0, Defs1),
            Killed = ordsets:union(Killed0, Killed1),
            DK = #dk{d=Defs,k=Killed},
            D = D0#{S:=DK},
            find_update_succ(Ss, DK0, D);
        #{} ->
            D = D0#{S=>DK0},
            find_update_succ(Ss, DK0, D)
    end;
find_update_succ([], _, D) -> D.

find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
    Used = beam_ssa:used(I),
    Yregs1 = ordsets:intersection(Used, Killed0),
    Yregs = ordsets:union(Yregs0, Yregs1),
    case beam_ssa:clobbers_xregs(I) of
        false ->
            Defs = ordsets:add_element(Dst, Defs0),
            find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
        true ->
            Killed = ordsets:union(Defs0, Killed0),
            Defs = [Dst],
            find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
    end;
find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.

find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
    Used = beam_ssa:used(Terminator),
    Yregs = ordsets:intersection(Used, Killed),
    ordsets:union(Yregs0, Yregs).

%%%
%%% Try to reduce the size of the stack frame, by adding an explicit
%%% 'copy' instructions for return values from 'call' and 'make_fun' that
%%% need to be saved in Y registers. Here is an example to show
%%% how that's useful. First, here is the Erlang code:
%%%
%%% f(Pid) ->
%%%    Res = foo(42),
%%%    _ = node(Pid),
%%%    bar(),
%%%    Res.
%%%
%%% Compiled to SSA format, the main part of the code looks like this:
%%%
%%% 0:
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% It can be seen that the variables Pid and Res must be saved in Y
%%% registers in order to survive the function calls. A previous sub
%%% pass has inserted a 'copy' instruction to save the value of the
%%% variable Pid:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The Res and Pid:4 variables must be assigned to different Y registers
%%% because they are live at the same time. copy_retval() inserts a
%%% 'copy' instruction to copy Res to a new variable:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res:6 = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   Res = copy Res:6
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The new variable Res:6 is used to capture the return value from the call.
%%% The variables Pid:4 and Res are no longer live at the same time, so they
%%% can be assigned to the same Y register.
%%%

copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

copy_retval_1([F|Fs], Blocks0, Count0) ->
    #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
    Yregs1 = gb_sets:from_list(Yregs0),
    Yregs = collect_yregs(Is, Yregs1),
    Ls = beam_ssa:rpo([F], Blocks0),
    {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
    copy_retval_1(Fs, Blocks, Count);
copy_retval_1([], Blocks, Count) ->
    {Blocks,Count}.

collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
              Yregs0) ->
    true = gb_sets:is_member(X, Yregs0),        %Assertion.
    Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
    collect_yregs(Is, Yregs);
collect_yregs([#b_set{}|Is], Yregs) ->
    collect_yregs(Is, Yregs);
collect_yregs([], Yregs) -> Yregs.

copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
    #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
    RC = case {Last,Ls} of
             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
                 true;
             {_,_} ->
                 false
         end,
    case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
        {Is,Count} ->
            case Copy0 =:= none andalso Count0 =:= Count of
                true ->
                    copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
                false ->
                    Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
                    copy_retval_2(Ls, Yregs, none, Blocks, Count)
            end;
        {Is,Count,Copy} ->
            Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
            copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
    end;
copy_retval_2([], _Yregs, none, Blocks, Count) ->
    {Blocks,Count}.

copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
           Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    {reverse(Acc, [I|acc_copy([], Copy)]),Count};
copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
  when Op =:= call; Op =:= make_fun ->
    {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
    {reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
           Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
    {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
    case gb_sets:is_member(Dst, Yregs) of
        true ->
            {NewVar,Count} = new_var(RetName, Count1),
            Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
            I = I1#b_set{dst=NewVar},
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
        false ->
            copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
    end;
copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    case beam_ssa:clobbers_xregs(I) of
        true ->
            copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
        false ->
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
        end;
copy_retval_is([], RC, _, Copy, Count, Acc) ->
    case {Copy,RC} of
        {none,_} ->
            {reverse(Acc),Count};
        {#b_set{},true} ->
            {reverse(Acc),Count,Copy};
        {#b_set{},false} ->
            {reverse(Acc, [Copy]),Count}
    end.

%%
%% Consider this code:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   A = copy A1
%%   B = call bar/1, Var
%%
%% If the Var variable is no longer used after this code, its Y register
%% can't be reused for A. To allow the Y register to be reused
%% we will need to insert 'copy' instructions for arguments that are
%% in Y registers:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   Var1 = copy Var
%%   A = copy A1
%%   B = call bar/1, Var1
%%

place_retval_copy(I, _Yregs, none, Count, Acc) ->
    {I,Count,Acc};
place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
    #b_set{dst=Avoid} = Copy,
    {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
    Acc = [Copy|Acc1],
    {I#b_set{args=[F|Args]},Count,Acc}.

copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
    case gb_sets:is_member(A, Yregs) of
        true when A =/= Avoid ->
            {NewVar,Count} = new_var(AName, Count0),
            Copy = #b_set{op=copy,dst=NewVar,args=[A]},
            copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
        _ ->
            copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
    end;
copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
    copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
    {reverse(Acc),CopyAcc,Count}.

acc_copy(Acc, none) -> Acc;
acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].

copy_sub_args(Args, none) ->
    Args;
copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
    [sub_arg(A, Dst, Src) || A <- Args].

sub_arg(Old, Old, New) -> New;
sub_arg(Old, _, _) -> Old.

%%%
%%% Consider:
%%%
%%%   x1/Hd = get_hd x0/Cons
%%%   y0/Tl = get_tl x0/Cons
%%%
%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
%%% a 'move' instruction must be inserted.
%%%
%%% If we swap get_hd and get_tl when Tl is in a Y register,
%%% x0 can be used for Hd if Cons is not used again:
%%%
%%%   y0/Tl = get_tl x0/Cons
%%%   x0/Hd = get_hd x0/Cons
%%%

opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
    ResMap = maps:from_list(Res),
    Ls = beam_ssa:rpo(Blocks),
    St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.

opt_get_list_1([L|Ls], Res, Blocks0) ->
    #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
    case opt_get_list_is(Is0, Res, [], false) of
        no ->
            opt_get_list_1(Ls, Res, Blocks0);
        {yes,Is} ->
            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
            opt_get_list_1(Ls, Res, Blocks)
    end;
opt_get_list_1([], _, Blocks) -> Blocks.

opt_get_list_is([#b_set{op=get_hd,dst=Hd,
                        args=[Cons]}=GetHd,
                 #b_set{op=get_tl,dst=Tl,
                        args=[Cons]}=GetTl|Is],
                Res, Acc, Changed) ->
    %% Note that when this pass is run, only Y registers have
    %% reservations. The absence of an entry for a variable therefore
    %% means that the variable will be in an X register.
    case Res of
        #{Hd:={y,_}} ->
            %% Hd will be in a Y register. Don't swap.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
        #{Tl:={y,_}} ->
            %% Tl will be in a Y register. Swap.
            opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
        #{} ->
            %% Both are in X registers. Nothing to do.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
    end;
opt_get_list_is([I|Is], Res, Acc, Changed) ->
    opt_get_list_is(Is, Res, [I|Acc], Changed);
opt_get_list_is([], _Res, Acc, Changed) ->
    case Changed of
        true ->
            {yes,reverse(Acc)};
        false ->
            no
    end.

%%%
%%% Number instructions in the order they are executed.
%%%

%% number_instructions(St0) -> St.
%%  Number instructions in the order they are executed. Use a step
%%  size of 2. Don't number phi instructions. All phi variables in
%%  a block will be live one unit before the first non-phi instruction
%%  in the block.

number_instructions(#st{ssa=Blocks0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    St#st{ssa=number_is_1(Ls, 1, Blocks0)}.

number_is_1([L|Ls], N0, Blocks0) ->
    #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
    {Is,N1} = number_is_2(Is0, N0, []),
    Last = beam_ssa:add_anno(n, N1, Last0),
    N = N1 + 2,
    Bl = Bl0#b_blk{is=Is,last=Last},
    Blocks = Blocks0#{L:=Bl},
    number_is_1(Ls, N, Blocks);
number_is_1([], _, Blocks) -> Blocks.

number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
    number_is_2(Is, N, [I|Acc]);
number_is_2([I0|Is], N, Acc) ->
    I = beam_ssa:add_anno(n, N, I0),
    number_is_2(Is, N+2, [I|Acc]);
number_is_2([], N, Acc) ->
    {reverse(Acc),N}.

%%%
%%% Calculate live intervals.
%%%

live_intervals(#st{args=Args,ssa=Blocks}=St) ->
    Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
    F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
    LiveMap0 = #{},
    Acc0 = {[],LiveMap0},
    {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
    Intervals = merge_ranges(rel2fam(Vars0++Vars)),
    St#st{intervals=Intervals}.

merge_ranges([{V,Rs}|T]) ->
    [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
merge_ranges([]) -> [].

merge_ranges_1([{A,N},{N,Z}|Rs]) ->
    merge_ranges_1([{A,Z}|Rs]);
merge_ranges_1([R|Rs]) ->
    [R|merge_ranges_1(Rs)];
merge_ranges_1([]) -> [].

live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
    Live0 = [],
    Successors = beam_ssa:successors(L, Blocks),
    Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),

    %% Add ranges for all variables that are live in the successors.
    #b_blk{is=Is,last=Last} = map_get(L, Blocks),
    End = beam_ssa:get_anno(n, Last),
    Use = [{V,{use,End+1}} || V <- Live1],

    %% Determine used and defined variables in this block.
    FirstNumber = first_number(Is, Last),
    UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
    UseDef = rel2fam(UseDef0),

    %% Update what is live at the beginning of this block and
    %% store it.
    Used = [V || {V,[{use,_}|_]} <- UseDef],
    Live2 = ordsets:union(Live1, Used),
    Killed = [V || {V,[{def,_}|_]} <- UseDef],
    Live = ordsets:subtract(Live2, Killed),
    LiveMap = LiveMap0#{L=>Live},

    %% Construct the ranges for this block.
    Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
    {Vars,LiveMap}.

make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
    make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
make_block_ranges([], _, Acc) -> Acc.

live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
    Acc = [{Dst,{def,FirstNumber}}|Acc0],
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
                    FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    #b_set{dst=Dst} = I,
    Acc1 = [{Dst,{def,N}}|Acc0],
    Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    Acc1 = case I of
               #b_set{dst=Dst} ->
                   [{Dst,{def,N}}|Acc0];
               _ ->
                   Acc0
           end,
    Used = beam_ssa:used(I),
    Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([], _FirstNumber, Acc) ->
    Acc.

%% first_number([#b_set{}]) -> InstructionNumber.
%%  Return the number for the first instruction for the block.
%%  Note that this number is one less than the first
%%  non-phi instruction in the block.

first_number([#b_set{op=phi}|Is], Last) ->
    first_number(Is, Last);
first_number([I|_], _) ->
    beam_ssa:get_anno(n, I) - 1;
first_number([], Last) ->
    beam_ssa:get_anno(n, Last) - 1.

update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
    Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
    #b_blk{is=Is} = map_get(L, Blocks),
    Live = update_live_phis(Is, Pred, Live1),
    update_successors(Ls, Pred, Blocks, LiveMap, Live);
update_successors([], _, _, _, Live) -> Live.

get_live(L, LiveMap) ->
    case LiveMap of
        #{L:=Live} -> Live;
        #{} -> []
    end.

update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
                 Pred, Live0) ->
    Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
    Live1 = ordsets:union(ordsets:from_list(Used), Live0),
    Live = ordsets:del_element(Killed, Live1),
    update_live_phis(Is, Pred, Live);
update_live_phis(_, _, Live) -> Live.

%%%
%%% Reserve Y registers.
%%%

%% reserve_yregs(St0) -> St.
%%  In each block that allocates a stack frame, insert instructions
%%  that copy variables that must be in Y registers (given by
%%  the `yregs` annotation) to new variables.
%%
%%  Also allocate specific Y registers for try and catch tags.
%%  The outermost try/catch tag is placed in y0, any directly
%%  nested tag in y1, and so on. Note that this is the reversed
%%  order as required by BEAM; it will be corrected later by
%%  turn_yregs().

reserve_yregs(#st{frames=Frames}=St0) ->
    foldl(fun reserve_yregs_1/2, St0, Frames).

reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
    Blk = map_get(L, Blocks0),
    Yregs = beam_ssa:get_anno(yregs, Blk),
    {Def,Used} = beam_ssa:def_used([L], Blocks0),
    UsedYregs = ordsets:intersection(Yregs, Used),
    DefBefore = ordsets:subtract(UsedYregs, Def),
    {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
    InsideVars = ordsets:subtract(UsedYregs, DefBefore),
    ResTryTags0 = reserve_try_tags(L, Blocks),
    ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
    Vars = BeforeVars ++ InsideVars,
    Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
    St#st{res=Res,ssa=Blocks,cnt=Count+1}.

reserve_try_tags(L, Blocks) ->
    Seen = gb_sets:empty(),
    {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
    Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
    Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
    ordsets:from_list(Res).

reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
    case gb_sets:is_element(L, Seen0) of
        true ->
            reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
        false ->
            Seen1 = gb_sets:insert(L, Seen0),
            #b_blk{is=Is} = Blk = map_get(L, Blocks),
            Active0 = get_active(L, ActMap0),
            Active = reserve_try_tags_is(Is, Active0),
            Successors = beam_ssa:successors(Blk),
            ActMap1 = update_act_map(Successors, Active, ActMap0),
            {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
            reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
    end;
reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
    {ActMap,Seen}.

get_active(L, ActMap) ->
    case ActMap of
        #{L:=Active} -> Active;
        #{} -> #{}
    end.

reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
    N = map_size(Active),
    reserve_try_tags_is(Is, Active#{V=>N});
reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
    reserve_try_tags_is(Is, maps:remove(Tag, Active));
reserve_try_tags_is([_|Is], Active) ->
    reserve_try_tags_is(Is, Active);
reserve_try_tags_is([], Active) -> Active.

update_act_map([L|Ls], Active0, ActMap0) ->
    case ActMap0 of
        #{L:=Active1} ->
            ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
            update_act_map(Ls, Active0, ActMap);
        #{} ->
            ActMap = ActMap0#{L=>Active0},
            update_act_map(Ls, Active0, ActMap)
    end;
update_act_map([], _, ActMap) -> ActMap.

rename_vars([], _, Blocks, Count) ->
    {[],Blocks,Count};
rename_vars(Vs, L, Blocks0, Count0) ->
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
    Ren = zip(Vs, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk0#b_blk{is=Is},
    Blocks = Blocks1#{L:=Blk},
    {NewVars,Blocks,Count}.

insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
    [I|insert_after_phis(Is, InsertIs)];
insert_after_phis(Is, InsertIs) ->
    InsertIs ++ Is.

%% frame_size(St0) -> St.
%%  Calculate the frame size for each block that allocates a frame.
%%  Annotate the block with the frame size. Also annotate all
%%  return instructions with {deallocate,FrameSize} to simplify
%%  code generation.

frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
    Blocks = foldl(fun(L, Blks) ->
                           frame_size_1(L, Regs, Blks)
                   end, Blocks0, Frames),
    St#st{ssa=Blocks}.

frame_size_1(L, Regs, Blocks0) ->
    Def = beam_ssa:def([L], Blocks0),
    Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
    Yregs = ordsets:from_list(Yregs0),
    FrameSize = length(ordsets:from_list(Yregs)),
    if
        FrameSize =/= 0 ->
            [{y,0}|_] = Yregs,                  %Assertion.
            {y,Last} = last(Yregs),
            Last = FrameSize - 1,               %Assertion.
            ok;
        true ->
            ok
    end,
    Blk0 = map_get(L, Blocks0),
    Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),

    %% Insert an annotation for frame deallocation on
    %% each #b_ret{}.
    Blocks = Blocks0#{L:=Blk},
    Reachable = beam_ssa:rpo([L], Blocks),
    frame_deallocate(Reachable, FrameSize, Blocks).

frame_deallocate([L|Ls], Size, Blocks0) ->
    Blk0 = map_get(L, Blocks0),
    Blk = case Blk0 of
              #b_blk{last=#b_ret{}=Ret0} ->
                  Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
                  Blk0#b_blk{last=Ret};
              #b_blk{} ->
                  Blk0
          end,
    Blocks = Blocks0#{L:=Blk},
    frame_deallocate(Ls, Size, Blocks);
frame_deallocate([], _, Blocks) -> Blocks.


%% turn_yregs(St0) -> St.
%%  Renumber y registers so that {y,0} becomes {y,FrameSize-1},
%%  {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
%%  catches work. The register allocator (linear_scan()) has given
%%  a lower number to the outermost catch.

turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
    Regs1 = foldl(fun(L, A) ->
                          Blk = map_get(L, Blocks),
                          FrameSize = beam_ssa:get_anno(frame_size, Blk),
                          Def = beam_ssa:def([L], Blocks),
                          [turn_yregs_1(Def, FrameSize, Regs0)|A]
                  end, [], Frames),
    Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
    St#st{regs=Regs}.

turn_yregs_1(Def, FrameSize, Regs) ->
    Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
    Yregs1 = rel2fam(Yregs0),
    FrameSize = length(Yregs1),
    Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
    R0 = sofs:family(Yregs2),
    R1 = sofs:family_to_relation(R0),
    R = sofs:converse(R1),
    sofs:to_external(R).

%%%
%%% Reserving registers before register allocation.
%%%

%% reserve_regs(St0) -> St.
%%  Reserve registers prior to register allocation. Y registers
%%  have already been reserved. This function will reserve z,
%%  fr, and specific x registers.

reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
    %% Reserve x0, x1, and so on for the function arguments.
    Res1 = reserve_arg_regs(Args, 0, Res0),

    %% Reserve Z registers (dummy registers) for instructions with no
    %% return values (e.g. remove_message) or pseudo-return values
    %% (e.g. landingpad).
    Res2 = reserve_zregs(Blocks, Intervals, Res1),

    %% Reserve float registers.
    Res3 = reserve_fregs(Blocks, Res2),

    %% Reserve all remaining unreserved variables as X registers.
    Res = maps:from_list(Res3),
    St#st{res=reserve_xregs(Blocks, Res)}.

reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
    reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.

reserve_zregs(Blocks, Intervals, Res) ->
    ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
    ShortLived = cerl_sets:from_list(ShortLived0),
    F = fun(_, #b_blk{is=Is,last=Last}, A) ->
                reserve_zreg(Is, Last, ShortLived, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_zreg([#b_set{op=Op,dst=Dst}],
              #b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
                                                    Op =:= get_tuple_element ->
    %% If type optimization has determined that the result of these
    %% instructions can be used directly in a branch, we must avoid reserving a
    %% z register or code generation will fail.
    A;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
              #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
    case {Val,Last} of
        {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
            %% These two instructions can be combined to a test_arity
            %% instruction provided that the arity variable is short-lived.
            reserve_zreg_1(Dst, ShortLived, A0);
        {_,_} ->
            %% Either the arity is too big, or the boolean value is not
            %% used in a conditional branch.
            A0
    end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
             #b_switch{}, ShortLived, A) ->
    reserve_zreg_1(Dst, ShortLived, A);
reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
    %% There is no short, easy way to rewrite 'xor' to a series of
    %% test instructions.
    A;
reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
    %% There is no short, easy way to rewrite is_record/2 to a series of
    %% test instructions.
    A;
reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
    IsZReg = case Op of
                 bs_match_string -> true;
                 bs_save -> true;
                 bs_restore -> true;
                 bs_set_position -> true;
                 {float,clearerror} -> true;
                 kill_try_tag -> true;
                 landingpad -> true;
                 put_tuple_elements -> true;
                 remove_message -> true;
                 set_tuple_element -> true;
                 succeeded -> true;
                 timeout -> true;
                 wait_timeout -> true;
                 _ -> false
             end,
    A = case IsZReg of
            true -> [{Dst,z}|A0];
            false -> A0
        end,
    reserve_zreg(Is, Last, ShortLived, A);
reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
    reserve_zreg_1(Bool, ShortLived, A);
reserve_zreg([], _, _, A) -> A.

reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
    case cerl_sets:is_element(V, ShortLived) of
        true -> [{V,z}|A];
        false -> A
    end;
reserve_zreg_1(#b_literal{}, _, A) -> A.

reserve_fregs(Blocks, Res) ->
    F = fun(_, #b_blk{is=Is}, A) ->
                reserve_freg(Is, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
    case Op of
        get ->
            reserve_freg(Is, Res);
        _ ->
            reserve_freg(Is, [{V,fr}|Res])
    end;
reserve_freg([_|Is], Res) ->
    reserve_freg(Is, Res);
reserve_freg([], Res) -> Res.

%% reserve_xregs(St0) -> St.
%%  Reserve all remaining variables as X registers.
%%
%%  If a variable will need to be in a specific X register for a
%%  'call' or 'make_fun' (and there is nothing that will kill it
%%  between the definition and use), reserve the register using a
%%  {prefer,{x,X} annotation. That annotation means that the linear
%%  scan algorithm will place the variable in the preferred register,
%%  unless that register is already occupied.
%%
%%  All remaining variables are reserved as X registers. Linear scan
%%  will allocate the lowest free X register for the variable.

reserve_xregs(Blocks, Res) ->
    Ls = reverse(beam_ssa:rpo(Blocks)),
    reserve_xregs(Ls, Blocks, #{}, Res).

reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
    #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),

    %% Calculate mapping from variable name to the preferred
    %% register.
    Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),

    %% We need to figure out where the code generator will
    %% place instructions that will do a garbage collection.
    %% Insert 'gc' markers as pseudo-instructions in the
    %% instruction sequence.
    Is1 = reverse(Is0),
    Is2 = res_place_gc_instrs(Is1, []),
    Is = res_place_allocate(Anno, Is2),

    %% Add register hints for variables that are defined
    %% in the (reversed) instruction sequence.
    {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),

    XsMap = XsMap0#{L=>Xs},
    reserve_xregs(Ls, Blocks, XsMap, Res);
reserve_xregs([], _, _, Res) -> Res.

%% Insert explicit 'gc' markers points where there will
%% be a garbage collection. (Note that the instruction
%% sequence passed to this function is reversed.)

res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
    res_place_gc_instrs(Is, [I|Acc]);
res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
  when Op =:= call; Op =:= make_fun ->
    case Acc of
        [] ->
            res_place_gc_instrs(Is, [I|Acc]);
        [GC|_] when GC =:= gc; GC =:= test_heap ->
            res_place_gc_instrs(Is, [I,gc|Acc]);
        [_|_] ->
            res_place_gc_instrs(Is, [I,gc|Acc])
    end;
res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
    case beam_ssa_codegen:classify_heap_need(Op, Args) of
        neutral ->
            case Acc0 of
                [test_heap|Acc] ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
                Acc ->
                    res_place_gc_instrs(Is, [I|Acc])
            end;
        {put,_} ->
            case Acc0 of
                [test_heap|Acc] ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
                Acc ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc])
            end;
        _ ->
            res_place_gc_instrs(Is, [gc,I|Acc0])
    end;
res_place_gc_instrs([], Acc) ->
    %% Reverse and replace 'test_heap' markers with 'gc'.
    %% (The distinction is no longer useful.)
    res_place_gc_instrs_rev(Acc, []).

res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
    res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([test_heap|Is], Acc) ->
    res_place_gc_instrs_rev(Is, [gc|Acc]);
res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
    res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([I|Is], Acc) ->
    res_place_gc_instrs_rev(Is, [I|Acc]);
res_place_gc_instrs_rev([], Acc) -> Acc.

res_place_allocate(#{yregs:=_}, Is) ->
    %% There will be an 'allocate' instruction inserted here.
    Is ++ [gc];
res_place_allocate(#{}, Is) -> Is.

reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
    %% At this point, the code generator will place an instruction
    %% that does a garbage collection. We must prune the remembered
    %% registers.
    Xs = res_xregs_prune(Xs0, Used, Res),
    reserve_xregs_is(Is, Res, Xs, Used);
reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
    Res = reserve_xreg(Dst, Xs0, Res0),
    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
    Used = ordsets:del_element(Dst, Used1),
    case Op of
        call ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        make_fun ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        _ ->
            reserve_xregs_is(Is, Res, Xs0, Used)
    end;
reserve_xregs_is([], Res, Xs, _Used) ->
    {Res,Xs}.

%% Pick up register hints from the successors of this blocks.
reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
                   _Blocks, XsMap, _Res) ->
    %% We know that no variables are used at ?BADARG_BLOCK, so
    %% any register hints from the success blocks are safe to use.
    map_get(Succ, XsMap);
reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
                   Blocks, XsMap, Res) when Succ =/= Fail ->
    #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
    case {SuccBlk,FailBlk} of
        {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
         #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
            %% Both branches ultimately transfer to the same
            %% block (via two blocks with no instructions).
            %% Pick up register hints from the phi nodes
            %% in the common block.
            #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
            Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
            res_xregs_from_phi(PhiIs, Fail, Res, Xs);
        {_,_} when Is =/= [] ->
            case last(Is) of
                #b_set{op=succeeded,args=[Arg]} ->
                    %% We know that Arg will not be used at the failure
                    %% label, so we can pick up register hints from the
                    %% success label.
                    Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
                    case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
                        #{Arg:=Reg} -> #{Arg=>Reg};
                        #{} -> #{}
                    end;
                _ ->
                    %% Register hints from the success block may not
                    %% be safe at the failure block, and vice versa.
                    #{}
            end;
        {_,_} ->
            %% Register hints from the success block may not
            %% be safe at the failure block, and vice versa.
            #{}
    end;
reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
                   Blocks, XsMap, Res) ->
    case map_get(Succ, Blocks) of
        #b_blk{is=[],last=Last} ->
            reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
        #b_blk{is=[_|_]=PhiIs} ->
            res_xregs_from_phi(PhiIs, L, Res, #{})
    end;
reserve_terminator(_, _, _, _, _, _) -> #{}.

%% Pick up a reservation from a phi node.
res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
                   Pred, Res, Acc) ->
    case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
        [] ->
            %% The value of the phi node for this predecessor
            %% is a literal. Nothing to do here.
            res_xregs_from_phi(Is, Pred, Res, Acc);
        [V] ->
            case Res of
                #{Dst:={prefer,Reg}} ->
                    %% Try placing V in the same register as for
                    %% the phi node.
                    res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
                #{Dst:=_} ->
                    res_xregs_from_phi(Is, Pred, Res, Acc)
            end
    end;
res_xregs_from_phi(_, _, _, Acc) -> Acc.

reserve_call_args(Args) ->
    reserve_call_args(Args, 0, #{}).

reserve_call_args([#b_var{}=Var|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
reserve_call_args([#b_literal{}|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs);
reserve_call_args([], _, Xs) -> Xs.

reserve_xreg(V, Xs, Res) ->
    case Res of
        #{V:=_} ->
            %% Already reserved (but not as an X register).
            Res;
        #{} ->
            case Xs of
                #{V:=X} ->
                    %% Add a hint that this specific X register is
                    %% preferred, unless it is already in use.
                    Res#{V=>{prefer,X}};
                #{} ->
                    %% Reserve as an X register in general.
                    Res#{V=>x}
            end
    end.

%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
%%  Prune the list of preferred registers, to make sure that
%%  there are no "holes" (uninitialized X registers) when
%%  invoking the garbage collector.

res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
    %% The number of safe registers is the number of the X registers
    %% used after this point. The actual number of safe registers may
    %% be higher than this number, but this is a conservative safe
    %% estimate.
    NumSafe = foldl(fun(V, N) ->
                            case Res of
                                #{V:={x,_}} -> N + 1;
                                #{V:=_} -> N;
                                #{} -> N + 1
                            end
                    end, 0, Used),

    %% Remove unsafe registers from the list of potential
    %% preferred registers.
    maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
res_xregs_prune(Xs, _Used, _Res) -> Xs.

%%%
%%% Register allocation using linear scan.
%%%

-record(i,
        {sort=1 :: instr_number(),
         reg=none :: i_reg(),
         pool=x :: pool_id(),
         var=#b_var{} :: b_var(),
         rs=[] :: [range()]
        }).

-record(l,
        {cur=#i{} :: interval(),
         unhandled_res=[] :: [interval()],
         unhandled_any=[] :: [interval()],
         active=[] :: [interval()],
         inactive=[] :: [interval()],
         free=#{} :: #{var_name()=>pool(),
                       {'next',pool_id()}:=reg_num()},
         regs=[] :: [{b_var(),ssa_register()}]
        }).

-type interval() :: #i{}.
-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
-type pool() :: ordsets:ordset(ssa_register()).

linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
    St = St0#st{intervals=[],res=[]},
    Free = init_free(maps:to_list(Res)),
    Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
    Intervals = sort(Intervals1),
    IsReserved = fun(#i{reg=Reg}) ->
                         case Reg of
                             none -> false;
                             {prefer,{_,_}} -> false;
                             {_,_} -> true
                         end
                 end,
    {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
    L = #l{unhandled_res=UnhandledRes,
           unhandled_any=Unhandled,free=Free},
    #l{regs=Regs} = do_linear(L),
    St#st{regs=maps:from_list(Regs)}.

init_interval({V,[{Start,_}|_]=Rs}, Res) ->
    Info = map_get(V, Res),
    Pool = case Info of
               {prefer,{x,_}} -> x;
               x -> x;
               {x,_} -> x;
               {y,Uniq} -> Uniq;
               {{y,_},Uniq} -> Uniq;
               z -> z;
               fr -> fr
           end,
    Reg = case Info of
              {prefer,{x,_}} -> Info;
              {x,_} -> Info;
              {{y,_}=Y,_} -> Y;
              _ -> none
          end,
    #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.

init_free(Res) ->
    Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
    #{x:=Xs0} = Free1 = maps:from_list(Free0),
    Xs = init_xregs(Xs0),
    Free = Free1#{x:=Xs},
    Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
    maps:merge(Free, maps:from_list(Next)).

init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{x,_}=Reg}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{y,Uniq}}|Res]) ->
    [{Uniq,{y,0}}|init_free_1(Res)];
init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
    [{Uniq,Reg}|init_free_1(Res)];
init_free_1([{_,z}|Res]) ->
    [{z,{z,0}}|init_free_1(Res)];
init_free_1([{_,fr}|Res]) ->
    [{fr,{fr,0}}|init_free_1(Res)];
init_free_1([{_,x}|Res]) ->
    init_free_1(Res);
init_free_1([]) -> [].

%% Make sure that the pool of xregs is contiguous.
init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
    [{x,N}|init_xregs([{x,M}|Is])];
init_xregs([{x,N}|[{x,_}|_]=Is]) ->
    [{x,N}|init_xregs([{x,N+1}|Is])];
init_xregs([{x,_}]=Is) -> Is.

do_linear(L0) ->
    case set_next_current(L0) of
        done ->
            L0;
        L1 ->
            L2 = expire_active(L1),
            L3 = check_inactive(L2),
            Available = collect_available(L3),
            L4 = select_register(Available, L3),
            L = make_cur_active(L4),
            do_linear(L)
    end.

set_next_current(#l{unhandled_res=[Cur1|T1],
                    unhandled_any=[Cur2|T2]}=L) ->
    case {Cur1,Cur2} of
        {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
            L#l{cur=Cur1,unhandled_res=T1};
        {_,_} ->
            L#l{cur=Cur2,unhandled_any=T2}
    end;
set_next_current(#l{unhandled_res=[],
                    unhandled_any=[Cur|T]}=L) ->
    L#l{cur=Cur,unhandled_any=T};
set_next_current(#l{unhandled_res=[Cur|T],
                    unhandled_any=[]}=L) ->
    L#l{cur=Cur,unhandled_res=T};
set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
    done.

expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
    {Act,L} = expire_active(Act0, CurBegin, L0, []),
    L#l{active=Act}.

expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    {_,_} = Reg,                                %Assertion.
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            L = free_reg(I, L0),
            expire_active(Is, CurBegin, L, Acc);
        overlapping ->
            expire_active(Is, CurBegin, L0, [I|Acc]);
        not_overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = free_reg(I, L0),
            L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
            expire_active(Is, CurBegin, L, Acc)
    end;
expire_active([], _CurBegin, L, Acc) ->
    {Acc,L}.

check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
    {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
    L#l{inactive=InAct}.

check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            check_inactive(Is, CurBegin, L0, Acc);
        not_overlapping ->
            check_inactive(Is, CurBegin, L0, [I|Acc]);
        overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
            L = reserve_reg(I, L1),
            check_inactive(Is, CurBegin, L, Acc)
    end;
check_inactive([], _CurBegin, L, Acc) ->
    {Acc,L}.

strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
    strip_before_current(Rs, CurBegin);
strip_before_current(Rs, _CurBegin) -> Rs.

collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
    %% Use the preferred register if it is available.
    Avail = collect_available(L#l{cur=I#i{reg=none}}),
    case member(Prefer, Avail) of
        true -> [Prefer];
        false -> Avail
    end;
collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
    %% Return the already reserved register.
    [ReservedReg];
collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
    Free = get_pool(Cur, L),

    %% Note that since the live intervals are constructed from
    %% SSA form, there cannot be any overlap of the current interval
    %% with any inactive interval. See [3], page 175. Therefore we
    %% only have check the unhandled intervals for overlap with
    %% the current interval. As a further optimization, we only need
    %% to check the intervals that have reserved registers.
    collect_available(Unhandled, Cur, Free).

collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
  when Pool1 =/= Pool2 ->
    %% Wrong pool. Ignore this interval.
    collect_available(Is, Cur, Free);
collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
    case overlaps(I, Cur) of
        true ->
            Free = ordsets:del_element(Reg, Free0),
            collect_available(Is, Cur, Free);
        false ->
            collect_available(Is, Cur, Free0)
    end;
collect_available([], _, Free) -> Free.

select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
    Cur = Cur0#i{reg=Reg},
    reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
    %% Allocate a new register in the pool.
    {Reg,L1} = get_next_free(Cur0, L0),
    Cur = Cur0#i{reg=Reg},
    L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
    reserve_reg(Cur, L).

make_cur_active(#l{cur=Cur,active=Act}=L) ->
    L#l{active=[Cur|Act]}.

overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
    are_overlapping(Rs1, Rs2).

overlap_status([{S,E}], CurBegin) ->
    if
        E =< CurBegin -> ends_before_cur;
        CurBegin < S -> not_overlapping;
        true -> overlapping
    end;
overlap_status([{S,E}|Rs], CurBegin) ->
    if
        E =< CurBegin ->
            overlap_status(Rs, CurBegin);
        S =< CurBegin ->
            overlapping;
        true ->
            not_overlapping
    end.

reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:del_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

free_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:add_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

get_pool(#i{pool=Pool}, #l{free=Free}) ->
    map_get(Pool, Free).

update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
    Free = Free0#{Pool:=New},
    L#l{free=Free}.

get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
    K = {next,Pool},
    N = map_get(K, Free0),
    Free = Free0#{K:=N+1},
    L = L0#l{free=Free},
    if
        is_integer(Pool) -> {{y,N},L};
        is_atom(Pool)    -> {{Pool,N},L}
    end.

%%%
%%% Interval utilities.
%%%

are_overlapping([R|Rs1], Rs2) ->
    case are_overlapping_1(R, Rs2) of
        true ->
            true;
        false ->
            are_overlapping(Rs1, Rs2)
    end;
are_overlapping([], _) -> false.

are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
    false;
are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
    (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
are_overlapping_1({_,_}, []) -> false.

%%%
%%% Utilities.
%%%

%% is_loop_header(L, Blocks) -> false|true.
%%  Check whether the block is a loop header.

is_loop_header(L, Blocks) ->
    %% We KNOW that a loop header must start with a peek_message
    %% instruction.
    case map_get(L, Blocks) of
        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
        _ -> false
    end.

rel2fam(S0) ->
    S1 = sofs:relation(S0),
    S = sofs:rel2fam(S1),
    sofs:to_external(S).

split_phis(Is) ->
    splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).

is_yreg({y,_}) -> true;
is_yreg({x,_}) -> false;
is_yreg({z,_}) -> false;
is_yreg({fr,_}) -> false.

new_vars([Base|Vs0], Count0) ->
    {V,Count1} = new_var(Base, Count0),
    {Vs,Count} = new_vars(Vs0, Count1),
    {[V|Vs],Count};
new_vars([], Count) -> {[],Count}.

new_var({Base,Int}, Count)  ->
    true = is_integer(Int),                     %Assertion.
    {#b_var{name={Base,Count}},Count+1};
new_var(Base, Count) ->
    {#b_var{name={Base,Count}},Count+1}.
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose: Prepare for code generation, including register allocation.
%%
%% The output of this compiler pass is still in the SSA format, but
%% it has been annotated and transformed to help the code generator.
%%
%% * Some instructions are translated to other instructions closer to
%% the BEAM instructions. For example, the binary matching
%% instructions are transformed from the optimization-friendly
%% internal format to instruction more similar to the actual BEAM
%% instructions.
%%
%% * Blocks that will need an instruction for allocating a stack frame
%% are annotated with a {frame_size,Size} annotation.
%%
%% * 'copy' instructions are added for all variables that need
%% to be saved to the stack frame. Additional 'copy' instructions
%% can be added as an optimization to reuse y registers (see
%% the copy_retval sub pass).
%%
%% * Each function is annotated with a {register,RegisterMap}
%% annotation that maps each variable to a BEAM register. The linear
%% scan algorithm is used to allocate registers.
%%
%% There are four kind of registers. x, y, fr (floating point register),
%% and z. A variable will be allocated to a z register if it is only
%% used by the instruction following the instruction that defines the
%% the variable. The code generator will typically combine those
%% instructions to a test instruction. z registers are also used for
%% some instructions that don't have a return value.
%%
%% References:
%%
%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
%% in the context of SSA form and register constraints. In Proceedings
%% of the International Conference on Compiler Construction, pages
%% 229–246. LNCS 2304, Springer-Verlag, 2002.
%%
%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
%% linear scan register allocator. In Proceedings of the ACM/USENIX
%% International Conference on Virtual Execution Environments, pages
%% 132–141. ACM Press, 2005.
%%
%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
%% Form. In Proceedings of the International Symposium on Code
%% Generation and Optimization, pages 170-179. ACM Press, 2010.
%%

-module(beam_ssa_pre_codegen).

-export([module/2]).

-include("beam_ssa.hrl").

-import(lists, [all/2,any/2,append/1,duplicate/2,
                foldl/3,last/1,map/2,member/2,partition/2,
                reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).

-spec module(beam_ssa:b_module(), [compile:option()]) ->
                    {'ok',beam_ssa:b_module()}.

module(#b_module{body=Fs0}=Module, Opts) ->
    UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
    Ps = passes(Opts),
    Fs = functions(Fs0, Ps, UseBSM3),
    {ok,Module#b_module{body=Fs}}.

functions([F|Fs], Ps, UseBSM3) ->
    [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
functions([], _Ps, _UseBSM3) -> [].

-type b_var() :: beam_ssa:b_var().
-type var_name() :: beam_ssa:var_name().
-type instr_number() :: pos_integer().
-type range() :: {instr_number(),instr_number()}.
-type reg_num() :: beam_asm:reg_num().
-type xreg() :: {'x',reg_num()}.
-type yreg() :: {'y',reg_num()}.
-type ypool() :: {'y',beam_ssa:label()}.
-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
                       ypool() | {yreg(),ypool()} | 'z'.
-type ssa_register() :: beam_ssa_codegen:ssa_register().

-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
-record(st, {ssa :: beam_ssa:block_map(),
             args :: [b_var()],
             cnt :: beam_ssa:label(),
             use_bsm3 :: boolean(),
             frames=[] :: [beam_ssa:label()],
             intervals=[] :: [{b_var(),[range()]}],
             res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
             regs=#{} :: #{b_var():=ssa_register()},
             extra_annos=[] :: [{atom(),term()}]
            }).
-define(PASS(N), {N,fun N/1}).

passes(Opts) ->
    AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
    FixTuples = proplists:get_bool(no_put_tuple2, Opts),
    Ps = [?PASS(assert_no_critical_edges),

          %% Preliminaries.
          ?PASS(fix_bs),
          ?PASS(sanitize),
          case FixTuples of
              false -> ignore;
              true -> ?PASS(fix_tuples)
          end,
          ?PASS(use_set_tuple_element),
          ?PASS(place_frames),
          ?PASS(fix_receives),

          %% Find and reserve Y registers.
          ?PASS(find_yregs),
          ?PASS(reserve_yregs),

          %% Handle legacy binary match instruction that don't
          %% accept a Y register as destination.
          ?PASS(legacy_bs),

          %% Improve reuse of Y registers to potentially
          %% reduce the size of the stack frame.
          ?PASS(copy_retval),
          ?PASS(opt_get_list),

          %% Calculate live intervals.
          ?PASS(number_instructions),
          ?PASS(live_intervals),
          ?PASS(reserve_regs),

          %% If needed for a .precg file, save the live intervals
          %% so they can be included in an annotation.
          case AddPrecgAnnos of
              false -> ignore;
              true -> ?PASS(save_live_intervals)
          end,

          %% Allocate registers.
          ?PASS(linear_scan),
          ?PASS(frame_size),
          ?PASS(turn_yregs)],
    [P || P <- Ps, P =/= ignore].

function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
         Ps, UseBSM3) ->
    try
        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
        St = compile:run_sub_passes(Ps, St0),
        #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
        F1 = add_extra_annos(F0, ExtraAnnos),
        F = beam_ssa:add_anno(registers, Regs, F1),
        F#b_function{bs=Blocks,cnt=Count}
    catch
        Class:Error:Stack ->
            #{func_info:={_,Name,Arity}} = Anno,
            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
            erlang:raise(Class, Error, Stack)
    end.

save_live_intervals(#st{intervals=Intervals}=St) ->
    St#st{extra_annos=[{live_intervals,Intervals}]}.

%% Add extra annotations when a .precg listing file is being produced.
add_extra_annos(F, Annos) ->
    foldl(fun({Name,Value}, Acc) ->
                  beam_ssa:add_anno(Name, Value, Acc)
          end, F, Annos).

%% assert_no_critical_edges(St0) -> St.
%%  The code generator will not work if there are critial edges.
%%  Abort if any critical edges are found.

assert_no_critical_edges(#st{ssa=Blocks}=St) ->
    F = fun assert_no_ces/3,
    beam_ssa:fold_rpo(F, Blocks, Blocks),
    St.

assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
    %% This block has multiple predecessors. Make sure that none
    %% of the precessors have more than one successor.
    true = all(fun({_,P}) ->
                       length(beam_ssa:successors(P, Blocks)) =:= 1
               end, Phis),                      %Assertion.
    Blocks;
assert_no_ces(_, _, Blocks) -> Blocks.

%% fix_bs(St0) -> St.
%%  Fix up the binary matching instructions:
%%
%%    * Insert bs_save and bs_restore instructions where needed.
%%
%%    * Combine bs_match and bs_extract instructions to bs_get
%%      instructions.

fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
    F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
                %% Mark the root of the match context list.
                [{Dst,{context,Dst}}|A];
           (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
                %% Link this match context the previous match context.
                [{Dst,ParentCtx}|A];
           (_, A) ->
                A
        end,
    case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
        [] ->
            %% No binary matching in this function.
            St;
        [_|_]=M ->
            CtxChain = maps:from_list(M),
            Linear0 = beam_ssa:linearize(Blocks),

            %% Insert position instructions where needed.
            {Linear1,Count} = case UseBSM3 of
                                  true ->
                                      bs_pos_bsm3(Linear0, CtxChain, Count0);
                                  false ->
                                      bs_pos_bsm2(Linear0, CtxChain, Count0)
                              end,

            %% Rename instructions.
            Linear = bs_instrs(Linear1, CtxChain, []),

            St#st{ssa=maps:from_list(Linear),cnt=Count}
    end.

%% Insert bs_get_position and bs_set_position instructions as needed.
bs_pos_bsm3(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),

    {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
    {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
    {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.

make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
    SavePoint = get_savepoint(Ps, SavePoints),
    I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
    make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
make_bs_setpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    Args = [Ctx, get_savepoint(Ps, SavePoints)],
    I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
    make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
make_bs_getpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

get_savepoint({_,_}=Ps, SavePoints) ->
    Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
    #b_var{name=Name}.

make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
    {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
    make_bs_pos_dict(T, Count, Acc);
make_bs_pos_dict([], Count, Acc) ->
    {maps:from_list(Acc), Count}.

make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
    make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_bs_pos_dict_1([], Ctx, I, Acc) ->
    {[{Ctx,I}|Acc], I}.

%% As bs_position but without OTP-22 instructions. This is only used when
%% cross-compiling to older versions.
bs_pos_bsm2(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),
    Slots = make_save_point_dict(S, []),
    {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
    {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.

make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    case make_slot(Ps, Slots) of
        #b_literal{val=start} ->
            make_save_map(T, Slots, Count, Acc);
        Slot ->
            I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
            make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
    end;
make_save_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
    make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
make_restore_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_slot({Same,Same}, _Slots) ->
    #b_literal{val=start};
make_slot({_,_}=Ps, Slots) ->
    #b_literal{val=map_get(Ps, Slots)}.

make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
    Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
    make_save_point_dict(T, Acc);
make_save_point_dict([], Acc) ->
    maps:from_list(Acc).

make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
    %% Special {atom,start} save point. Does not need a
    %% bs_save instruction.
    make_save_point_dict_1(T, Ctx, I, Acc);
make_save_point_dict_1([H|T], Ctx, I, Acc) ->
    make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_save_point_dict_1([], Ctx, I, Acc) ->
    [{Ctx,I}|Acc].

bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
    FPos = case D0 of
               #{L:=Pos0} -> Pos0;
               #{} -> #{}
           end,
    {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
    D = bs_update_successors(Last, SPos, FPos, D0),
    bs_restores(Bs, CtxChain, D, Rs);
bs_restores([], _, _, Rs) -> Rs.

bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
    join_positions([{Succ,SPos},{Fail,FPos}], D);
bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
    Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
    join_positions(Update, D);
bs_update_successors(#b_ret{}, _, _, D) -> D.

join_positions([{L,MapPos0}|T], D) ->
    case D of
        #{L:=MapPos0} ->
            %% Same map.
            join_positions(T, D);
        #{L:=MapPos1} ->
            %% Different maps.
            MapPos = join_positions_1(MapPos0, MapPos1),
            join_positions(T, D#{L:=MapPos});
        #{} ->
            join_positions(T, D#{L=>MapPos0})
    end;
join_positions([], D) -> D.

join_positions_1(MapPos0, MapPos1) ->
    MapPos2 = maps:map(fun(Start, Pos) ->
                               case MapPos0 of
                                   #{Start:=Pos} -> Pos;
                                   #{Start:=_} -> unknown;
                                   #{} -> Pos
                               end
                       end, MapPos1),
    maps:merge(MapPos0, MapPos2).

bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
               CtxChain, PosMap0, Rs) ->
    PosMap = PosMap0#{Start=>Start},
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
               CtxChain, PosMap0, Rs0) ->
    Start = bs_subst_ctx(NewPos, CtxChain),
    [_,FromPos|_] = Args,
    case PosMap0 of
        #{Start:=FromPos} ->
            %% Same position, no restore needed.
            PosMap = case bs_match_type(I) of
                         plain ->
                             %% Update position to new position.
                             PosMap0#{Start:=NewPos};
                         _ ->
                             %% Position will not change (test_unit
                             %% instruction or no instruction at
                             %% all).
                             PosMap0#{Start:=FromPos}
                     end,
            bs_restores_is(Is, CtxChain, PosMap, Rs0);
        #{Start:=_} ->
            %% Different positions, might need a restore instruction.
            case bs_match_type(I) of
                none ->
                    %% The tail test will be optimized away.
                    %% No need to do a restore.
                    PosMap = PosMap0#{Start:=FromPos},
                    bs_restores_is(Is, CtxChain, PosMap, Rs0);
                test_unit ->
                    %% This match instruction will be replaced by
                    %% a test_unit instruction. We will need a
                    %% restore. The new position will be the position
                    %% restored to (NOT NewPos).
                    PosMap = PosMap0#{Start:=FromPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs);
                plain ->
                    %% Match or skip. Position will be changed.
                    PosMap = PosMap0#{Start:=NewPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs)
            end
    end;
bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
               CtxChain, PosMap, Rs) ->
    Start = bs_subst_ctx(FromPos, CtxChain),
    #{Start:=FromPos} = PosMap,                 %Assertion.
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0) ->
    {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
    %% We can land here from any point, so all positions are invalid.
    PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0)
  when Op =:= bs_test_tail;
       Op =:= bs_get_tail ->
    {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([], _CtxChain, PosMap, Rs) ->
    {PosMap,Rs}.

bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
                             #b_literal{val=binary},_Flags,
                             #b_literal{val=all},#b_literal{val=U}]}) ->
    case U of
        1 -> none;
        _ -> test_unit
    end;
bs_match_type(_) ->
    plain.

%% Call instructions leave the match position in an undefined state,
%% requiring us to invalidate each affected argument.
bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=_} ->
            PosMap = PosMap0#{Start:=unknown},
            bs_invalidate_pos(Args, PosMap, CtxChain);
        #{} ->
            %% Not a match context.
            bs_invalidate_pos(Args, PosMap0, CtxChain)
    end;
bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
    bs_invalidate_pos(Args, PosMap, CtxChain);
bs_invalidate_pos([], PosMap, _CtxChain) ->
    PosMap.

bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=Arg} ->
            %% Same position, no restore needed.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
        #{Start:=_} ->
            %% Different positions, need a restore instruction.
            PosMap = PosMap0#{Start:=Arg},
            Rs = Rs0#{Dst=>{Start,Arg}},
            bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
        #{} ->
            %% Not a match context.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
    end;
bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
    bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
    {Rs,PosMap}.

%% Insert all bs_save and bs_restore instructions.

bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
    bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).

bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
    %% The old instructions require bs_start_match to be annotated with the
    %% number of position slots it needs.
    bs_insert_1(Blocks, Saves, Restores, SavePoints,
                fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
                        NumSlots = case SavePoints of
                                       #{Dst:=NumSlots0} -> NumSlots0;
                                       #{} -> 0
                                   end,
                        beam_ssa:add_anno(num_slots, NumSlots, I0);
                   (I) ->
                        I
                end).

bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
    Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
    Bs = bs_insert_saves(Is, Bs0, Saves),
    [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
bs_insert_1([], _, _, _, _) -> [].

bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
    I = XFrm(I0),
    if
        Op =:= bs_test_tail;
        Op =:= bs_get_tail;
        Op =:= bs_match;
        Op =:= call ->
            Rs = case Restores of
                     #{Dst:=R} -> [R];
                     #{} -> []
                 end,
            Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
        true ->
            [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
    end;
bs_insert_is_1([], _, _, _) -> [].

bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
    case Saves of
        #{Dst:=S} ->
            bs_insert_save(S, Bs);
        #{} ->
            bs_insert_saves(Is, Bs, Saves)
    end;
bs_insert_saves([], Bs, _) -> Bs.

bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
    Is = case Is0 of
             [#b_set{op=bs_extract}=Ex|Is1] ->
                 [Ex,Save|Is1];
             _ ->
                 [Save|Is0]
         end,
    [{L,Blk#b_blk{is=Is}}|Bs].

%% Translate bs_match instructions to bs_get, bs_match_string,
%% or bs_skip. Also rename match context variables to use the
%% variable assigned to by the start_match instruction.

bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
    case bs_instrs_is(Is0, CtxChain, []) of
        [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
            %% Drop this instruction. Rewrite the corresponding
            %% bs_match instruction in the previous block to
            %% a bs_get instruction.
            Acc = bs_combine(Dst, Ctx, Acc0),
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
        Is ->
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
    end;
bs_instrs([], _, Acc) ->
    reverse(Acc).

bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
    Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
    I1 = I0#b_set{args=Args},
    I = case {Op,Args} of
            {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
                I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
            {bs_match,[#b_literal{val=string},Ctx|As]} ->
                I1#b_set{op=bs_match_string,args=[Ctx|As]};
            {bs_get_tail,[Ctx|As]} ->
                I1#b_set{op=bs_get_tail,args=[Ctx|As]};
            {_,_} ->
                I1
        end,
    bs_instrs_is(Is, CtxChain, [I|Acc]);
bs_instrs_is([], _, Acc) ->
    reverse(Acc).

%% Combine a bs_match instruction with the destination register
%% taken from a bs_extract instruction.

bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
    [#b_set{}=Succeeded,
     #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
    Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
                       Succeeded#b_set{args=[Dst]}]),
    [{L,Blk#b_blk{is=Is}}|Acc].

bs_subst_ctx(#b_var{}=Var, CtxChain) ->
    case CtxChain of
        #{Var:={context,Ctx}} ->
            Ctx;
        #{Var:=ParentCtx} ->
            bs_subst_ctx(ParentCtx, CtxChain);
        #{} ->
            %% Not a match context variable.
            Var
    end;
bs_subst_ctx(Other, _CtxChain) ->
    Other.

%% legacy_bs(St0) -> St.
%%  Binary matching instructions in OTP 21 and earlier don't support
%%  a Y register as destination. If St#st.use_bsm3 is false,
%%  we will need to rewrite those instructions so that the result
%%  is first put in an X register and then moved to a Y register
%%  if the operation succeeded.

legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
    Linear0 = beam_ssa:linearize(Blocks0),
    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
    Blocks = maps:from_list(Linear),
    St#st{ssa=Blocks,cnt=Count};
legacy_bs(#st{use_bsm3=true}=St) -> St.

legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
    #b_blk{is=Is0,last=Last} = Blk,
    Is1 = case Copies0 of
              #{L:=Copy} -> [Copy|Is0];
              #{} -> Is0
          end,
    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
    {Acc,Count}.

legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
             Last, IsYreg, Count0, Copies0, Acc) ->
    NeedsFix = is_map_key(Dst, IsYreg) andalso
        case Op of
            bs_get -> true;
            bs_init -> true;
            _ -> false
        end,
    case NeedsFix of
        true ->
            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
            Count = Count0 + 1,
            I = I0#b_set{dst=TempDst},
            SuccI = SuccI0#b_set{args=[TempDst]},
            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
            #b_br{bool=SuccDst,succ=SuccL} = Last,
            Copies = Copies0#{SuccL=>Copy},
            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
        false ->
            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
    end;
legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
    {reverse(Acc),Count,Copies}.

%% sanitize(St0) -> St.
%%  Remove constructs that can cause problems later:
%%
%%  * Unreachable blocks may cause problems for determination of
%%  dominators.
%%
%%  * Some instructions (such as get_hd) don't accept literal
%%  arguments. Evaluate the instructions and remove them.

sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
    St#st{ssa=Blocks,cnt=Count}.

sanitize([L|Ls], Count0, Blocks0, Values0) ->
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
    case sanitize_is(Is0, Count0, Values0, false, []) of
        no_change ->
            sanitize(Ls, Count0, Blocks0, Values0);
        {Is,Count,Values} ->
            Blk = Blk0#b_blk{is=Is},
            Blocks = Blocks0#{L:=Blk},
            sanitize(Ls, Count, Blocks, Values)
    end;
sanitize([], Count, Blocks0, Values) ->
    Blocks = if
                 map_size(Values) =:= 0 ->
                     Blocks0;
                 true ->
                     beam_ssa:rename_vars(Values, [0], Blocks0)
             end,

    %% Unreachable blocks can cause problems for the dominator calculations.
    Ls = beam_ssa:rpo(Blocks),
    Reachable = gb_sets:from_list(Ls),
    {case map_size(Blocks) =:= gb_sets:size(Reachable) of
         true -> Blocks;
         false -> remove_unreachable(Ls, Blocks, Reachable, [])
     end,Count}.

sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
            Count0, Values, Changed, Acc) ->
    case sanitize_args(Args0, Values) of
        [#b_literal{}=Map,Key] ->
            %% Bind the literal map to a variable.
            {MapVar,Count} = new_var('@ssa_map', Count0),
            I = I0#b_set{args=[MapVar,Key]},
            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
            sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
        [_,_]=Args0 ->
            sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
        [_,_]=Args ->
            I = I0#b_set{args=Args},
            sanitize_is(Is, Count0, Values, Changed, [I|Acc])
    end;
sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
            Count, Values, Changed0, Acc) ->
    Args = sanitize_args(Args0, Values),
    case sanitize_instr(Op, Args, I0) of
        {value,Value0} ->
            Value = #b_literal{val=Value0},
            sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
        {ok,I} ->
            sanitize_is(Is0, Count, Values, true, [I|Acc]);
        ok ->
            I = I0#b_set{args=Args},
            Changed = Changed0 orelse Args =/= Args0,
            sanitize_is(Is0, Count, Values, Changed, [I|Acc])
    end;
sanitize_is([], Count, Values, Changed, Acc) ->
    case Changed of
        true ->
            {reverse(Acc),Count,Values};
        false ->
            no_change
    end.

sanitize_args(Args, Values) ->
    map(fun(Var) ->
                case Values of
                    #{Var:=New} -> New;
                    #{} -> Var
                end
        end, Args).

sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
    case erl_bifs:is_pure(erlang, Bif, 1) of
        false ->
            ok;
        true ->
            try
                {value,erlang:Bif(Lit)}
            catch
                error:_ ->
                    ok
            end
    end;
sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
    true = erl_bifs:is_pure(erlang, Bif, 2),    %Assertion.
    try
        {value,erlang:Bif(Lit1, Lit2)}
    catch
        error:_ ->
            ok
    end;
sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
    {value,Hd};
sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
    {value,Tl};
sanitize_instr(get_tuple_element, [#b_literal{val=T},
                                   #b_literal{val=I}], _I)
  when I < tuple_size(T) ->
    {value,element(I+1, T)};
sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
    {value,case Lit of
               [_|_] -> true;
               _ -> false
           end};
sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
                                 #b_literal{val=Arity},
                                 #b_literal{val=Tag}], _I)
  when is_integer(Arity), is_atom(Tag) ->
    if
        tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
            {value,true};
        true ->
            {value,false}
    end;
sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(succeeded, [#b_literal{}], _I) ->
    {value,true};
sanitize_instr(_, _, _) -> ok.

sanitize_badarg(I) ->
    Func = #b_remote{mod=#b_literal{val=erlang},
                     name=#b_literal{val=error},arity=1},
    I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.

remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
    case split_phis(Is0) of
        {[_|_]=Phis,Rest} ->
            Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
            Blk = Blk0#b_blk{is=Is},
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
        {[],_} ->
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
    end;
remove_unreachable([], _Blocks, _, Acc) ->
    maps:from_list(Acc).

prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
    Args = [A || {_,Pred}=A <- Args0,
                 gb_sets:is_element(Pred, Reachable)],
    Phi#b_set{args=Args}.

%%%
%%% Fix tuples.
%%%

%% fix_tuples(St0) -> St.
%%  If compatibility with a previous version of Erlang has been
%%  requested, tuple creation must be split into two instruction to
%%  mirror the the way tuples are created in BEAM prior to OTP 22.
%%  Each put_tuple instruction is split into put_tuple_arity followed
%%  by put_tuple_elements.

fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
    F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
                Arity = #b_literal{val=length(Args)},
                {Ignore,C} = new_var('@ssa_ignore', C0),
                {[Put#b_set{op=put_tuple_arity,args=[Arity]},
                  #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
           (I, C) -> {[I],C}
        end,
    {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
    St#st{ssa=Blocks,cnt=Count}.

%%%
%%% Introduce the set_tuple_element instructions to make
%%% multiple-field record updates faster.
%%%
%%% The expansion of record field updates, when more than one field is
%%% updated, but not a majority of the fields, will create a sequence of
%%% calls to `erlang:setelement(Index, Value, Tuple)` where Tuple in the
%%% first call is the original record tuple, and in the subsequent calls
%%% Tuple is the result of the previous call. Furthermore, all Index
%%% values are constant positive integers, and the first call to
%%% `setelement` will have the greatest index. Thus all the following
%%% calls do not actually need to test at run-time whether Tuple has type
%%% tuple, nor that the index is within the tuple bounds.
%%%
%%% Since this optimization introduces destructive updates, it used to
%%% be done as the very last Core Erlang pass before going to
%%% lower-level code. However, it turns out that this kind of destructive
%%% updates are awkward also in SSA code and can prevent or complicate
%%% type analysis and aggressive optimizations.
%%%
%%% NOTE: Because there no write barriers in the system, this kind of
%%% optimization can only be done when we are sure that garbage
%%% collection will not be triggered between the creation of the tuple
%%% and the destructive updates - otherwise we might insert pointers
%%% from an older generation to a newer.
%%%

use_set_tuple_element(#st{ssa=Blocks0}=St) ->
    Uses = count_uses(Blocks0),
    RPO = reverse(beam_ssa:rpo(Blocks0)),
    Blocks = use_ste_1(RPO, Uses, Blocks0),
    St#st{ssa=Blocks}.

use_ste_1([L|Ls], Uses, Blocks0) ->
    {Blk0,Blocks} = use_ste_across(L, Uses, Blocks0),
    #b_blk{is=Is0} = Blk0,
    case use_ste_is(Is0, Uses) of
        Is0 ->
            use_ste_1(Ls, Uses, Blocks);
        Is ->
            Blk = Blk0#b_blk{is=Is},
            use_ste_1(Ls, Uses, Blocks#{L:=Blk})
    end;
use_ste_1([], _, Blocks) -> Blocks.

%%% Optimize within a single block.

use_ste_is([#b_set{}=I|Is0], Uses) ->
    Is = use_ste_is(Is0, Uses),
    case extract_ste(I) of
        none ->
            [I|Is];
        Extracted ->
            use_ste_call(Extracted, I, Is, Uses)
    end;
use_ste_is([], _Uses) -> [].

use_ste_call({Dst0,Pos0,_Var0,_Val0}, Call1, Is0, Uses) ->
    case get_ste_call(Is0, []) of
        {Prefix,{Dst1,Pos1,Dst0,Val1},Call2,Is}
          when Pos1 > 0, Pos0 > Pos1 ->
            case is_single_use(Dst0, Uses) of
                true ->
                    Call = Call1#b_set{dst=Dst1},
                    Args = [Val1,Dst1,#b_literal{val=Pos1-1}],
                    Dsetel = Call2#b_set{op=set_tuple_element,
                                         dst=Dst0,
                                         args=Args},
                    [Call|Prefix] ++ [Dsetel|Is];
                false ->
                    [Call1|Is0]
            end;
        _ ->
            [Call1|Is0]
    end.

get_ste_call([#b_set{op=get_tuple_element}=I|Is], Acc) ->
    get_ste_call(Is, [I|Acc]);
get_ste_call([#b_set{op=call}=I|Is], Acc) ->
    case extract_ste(I) of
        none ->
            none;
        Extracted ->
            {reverse(Acc),Extracted,I,Is}
    end;
get_ste_call(_, _) -> none.

extract_ste(#b_set{op=call,dst=Dst,
                   args=[#b_remote{mod=#b_literal{val=M},
                                  name=#b_literal{val=F}}|Args]}) ->
    case {M,F,Args} of
        {erlang,setelement,[#b_literal{val=Pos},Tuple,Val]} ->
            {Dst,Pos,Tuple,Val};
        {_,_,_} ->
            none
    end;
extract_ste(#b_set{}) -> none.

%%% Optimize accross blocks within a try/catch block.

use_ste_across(L, Uses, Blocks) ->
    case map_get(L, Blocks) of
        #b_blk{last=#b_br{bool=#b_var{}}}=Blk ->
            try
                use_ste_across_1(L, Blk, Uses, Blocks)
            catch
                throw:not_possible ->
                    {Blk,Blocks}
            end;
        #b_blk{}=Blk ->
            {Blk,Blocks}
    end.

use_ste_across_1(L, Blk0, Uses, Blocks0) ->
    #b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0,
    case reverse(IsThis) of
        [#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0,
         #b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] ->
            case is_single_use(Bool, Uses) andalso
                is_n_uses(2, Result, Uses) of
                true -> ok;
                false -> throw(not_possible)
            end,
            Call2 = use_ste_across_next(Next, Uses, Blocks0),
            Is = [Call1,Call2],
            case use_ste_is(Is, decrement_uses(Result, Uses)) of
                [#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] ->
                    Blocks1 = use_ste_fix_next(Ste, Next, Blocks0),
                    Succ = Succ0#b_set{args=[Call#b_set.dst]},
                    Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])},
                    Blocks = Blocks1#{L:=Blk},
                    {Blk,Blocks};
                _ ->
                    throw(not_possible)
            end;
        _ ->
            throw(not_possible)
    end.

use_ste_across_next(Next, Uses, Blocks) ->
    case map_get(Next, Blocks) of
        #b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call,
                   #b_set{op=succeeded,dst=Bool,args=[Result]}],
               last=#b_br{bool=Bool}} ->
            case is_single_use(Bool, Uses) andalso
                is_n_uses(2, Result, Uses) of
                true -> ok;
                false -> throw(not_possible)
            end,
            Call;
        #b_blk{} ->
            throw(not_possible)
    end.

use_ste_fix_next(Ste, Next, Blocks) ->
    Blk0 = map_get(Next, Blocks),
    #b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0,
    Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}),
    Blk = Blk0#b_blk{is=[Ste],last=Br},
    Blocks#{Next:=Blk}.

%% Count how many times each variable is used.

count_uses(Blocks) ->
    count_uses_blk(maps:values(Blocks), #{}).

count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
    F = fun(I, CountMap) ->
                foldl(fun(Var, Acc) ->
                              case Acc of
                                  #{Var:=3} -> Acc;
                                  #{Var:=C} -> Acc#{Var:=C+1};
                                  #{} ->       Acc#{Var=>1}
                              end
                      end, CountMap, beam_ssa:used(I))
        end,
    CountMap = F(Last, foldl(F, CountMap0, Is)),
    count_uses_blk(Bs, CountMap);
count_uses_blk([], CountMap) -> CountMap.

decrement_uses(V, Uses) ->
    #{V:=C} = Uses,
    Uses#{V:=C-1}.

is_n_uses(N, V, Uses) ->
    case Uses of
        #{V:=N} -> true;
        #{} -> false
    end.

is_single_use(V, Uses) ->
    case Uses of
        #{V:=1} -> true;
        #{} -> false
    end.

%%%
%%% Find out where frames should be placed.
%%%

%% place_frames(St0) -> St.
%%   Return a list of the labels for the blocks that need stack frame
%%   allocation instructions.
%%
%%   This function attempts to place stack frames as tight as possible
%%   around the code, to avoid building stack frames for code paths
%%   that don't need one.
%%
%%   Stack frames are placed in blocks that dominate all of their
%%   descendants. That guarantees that the deallocation instructions
%%   cannot be reached from other execution paths that didn't set up
%%   a stack frame or set up a stack frame with a different size.

place_frames(#st{ssa=Blocks}=St) ->
    {Doms,_} = beam_ssa:dominators(Blocks),
    Ls = beam_ssa:rpo(Blocks),
    Tried = gb_sets:empty(),
    Frames0 = [],
    {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
    St#st{frames=Frames}.

place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
    Blk = map_get(L, Blocks),
    case need_frame(Blk) of
        true ->
            %% This block needs a frame. Try to place it here.
            {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),

            %% Successfully placed. Try to place more frames in descendants
            %% that are not dominated by this block.
            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
        false ->
            try
                place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
            catch
                throw:{need_frame,For,Tried1}=Reason ->
                    %% An descendant block needs a stack frame. Try to
                    %% place it here.
                    case is_dominated_by(For, L, Doms) of
                        true ->
                            %% Try to place a frame here.
                            {Frames,Tried} = do_place_frame(L, Blocks, Doms,
                                                            Tried1, Frames0),
                            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
                        false ->
                            %% Wrong place. This block does not dominate
                            %% the block that needs the frame. Pass it on
                            %% to our ancestors.
                            throw(Reason)
                    end
            end
    end;
place_frames_1([], _, _, Tried, Frames) ->
    {Frames,Tried}.

%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
%%  Try to place a frame in this block. This function returns
%%  successfully if it either succeds at placing a frame in this
%%  block, if an ancestor that dominates this block has already placed
%%  a frame, or if we have already tried to put a frame in this block.
%%
%%  An {need_frame,Label,Tried} exception will be thrown if this block
%%  block is not suitable for having a stack frame (i.e. it does not dominate
%%  all of its descendants). The exception means that an ancestor will have to
%%  place the frame needed by this block.

do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
    case gb_sets:is_element(L, Tried0) of
        true ->
            %% We have already tried to put a frame in this block.
            {Frames,Tried0};
        false ->
            %% Try to place a frame in this block.
            Tried = gb_sets:insert(L, Tried0),
            case place_frame_here(L, Blocks, Doms, Frames) of
                yes ->
                    %% We need a frame and it is safe to place it here.
                    {[L|Frames],Tried};
                no ->
                    %% An ancestor has a frame. Not needed.
                    {Frames,Tried};
                ancestor ->
                    %% This block does not dominate all of its
                    %% descendants. We must place the frame in
                    %% an ancestor.
                    throw({need_frame,L,Tried})
            end
    end.

%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
%%  Determine whether a frame should be placed in block Label.

place_frame_here(L, Blocks, Doms, Frames) ->
    B0 = any(fun(DomBy) ->
                     is_dominated_by(L, DomBy, Doms)
             end, Frames),
    case B0 of
        true ->
            %% This block is dominated by an ancestor block that
            %% defines a frame. Not needed/allowed to put a frame
            %% here.
            no;
        false ->
            %% No frame in any ancestor. We need a frame.
            %% Now check whether the frame can be placed here.
            %% If this block dominates all of its descendants
            %% and the predecessors of any phi nodes it can be
            %% placed here.
            Descendants = beam_ssa:rpo([L], Blocks),
            PhiPredecessors = phi_predecessors(L, Blocks),
            MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
            Dominates = all(fun(?BADARG_BLOCK) ->
                                    %% This block defines no variables and calls
                                    %% erlang:error(badarg). It does not matter
                                    %% whether L dominates ?BADARG_BLOCK or not;
                                    %% it is still safe to put the frame in L.
                                    true;
                               (Bl) ->
                                    is_dominated_by(Bl, L, Doms)
                            end, MustDominate),

            %% Also, this block must not be a loop header.
            IsLoopHeader = is_loop_header(L, Blocks),
            case Dominates andalso not IsLoopHeader of
                true -> yes;
                false -> ancestor
            end
    end.

%% phi_predecessors(Label, Blocks) ->
%%  Return all predecessors referenced in phi nodes.

phi_predecessors(L, Blocks) ->
    #b_blk{is=Is} = map_get(L, Blocks),
    [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].

%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
%%  Test whether block Label is dominated by block DominatedBy.

is_dominated_by(L, DomBy, Doms) ->
    DominatedBy = map_get(L, Doms),
    member(DomBy, DominatedBy).

%% need_frame(#b_blk{}) -> true|false.
%%  Test whether any of the instructions in the block requires a stack frame.

need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
    need_frame_1(Is, {return,Ret});
need_frame(#b_blk{is=Is}) ->
    need_frame_1(Is, body).

need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
    %% Since make_fun clobbers X registers, a stack frame is needed if
    %% any of the following instructions use any other variable than
    %% the one holding the reference to the created fun.
    need_frame_1(Is, Context) orelse
        case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
            [Fun] -> false;
            [_|_] -> true
        end;
need_frame_1([#b_set{op=new_try_tag}|_], _) ->
    true;
need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
    if
        Val =:= Ret -> need_frame_1(Is, tail);
        true -> need_frame_1(Is, body)
    end;
need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
    case Func of
        #b_remote{mod=#b_literal{val=Mod},
                  name=#b_literal{val=Name},
                  arity=Arity} when is_atom(Mod), is_atom(Name) ->
            case erl_bifs:is_exit_bif(Mod, Name, Arity) of
                true ->
                    false;
                false ->
                    Context =:= body orelse
                        Is =/= [] orelse
                        is_trap_bif(Mod, Name, Arity)
                end;
        #b_remote{} ->
            %% This is an apply(), which always needs a frame.
            true;
        #b_local{} ->
            Context =:= body orelse Is =/= [];
        _ ->
             %% A fun call always needs a frame.
            true
    end;
need_frame_1([I|Is], Context) ->
    beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
need_frame_1([], _) -> false.

%% is_trap_bif(Mod, Name, Arity) -> true|false.
%%   Test whether we need a stack frame for this BIF.

is_trap_bif(erlang, '!', 2) -> true;
is_trap_bif(erlang, link, 1) -> true;
is_trap_bif(erlang, unlink, 1) -> true;
is_trap_bif(erlang, monitor_node, 2) -> true;
is_trap_bif(erlang, group_leader, 2) -> true;
is_trap_bif(erlang, exit, 2) -> true;
is_trap_bif(_, _, _) -> false.

%%%
%%% Fix variables used in matching in receive.
%%%
%%% The loop_rec/2 instruction may return a reference to a
%%% message outside of any heap or heap fragment. If the message
%%% does not match, it is not allowed to store any reference to
%%% the message (or part of the message) on the stack. If we do,
%%% the message will be corrupted if there happens to be a GC.
%%%
%%% Here we make sure to introduce copies of variables that are
%%% matched out and subsequently used after the remove_message/0
%%% instructions. That will make sure that only X registers are
%%% used during matching.
%%%
%%% Depending on where variables are defined and used, they must
%%% be handled in two different ways.
%%%
%%% Variables that are always defined in the receive (before branching
%%% out into the different clauses of the receive) and used after the
%%% receive must be handled in the following way: Before each
%%% remove_message instruction, each such variable must be copied, and
%%% all variables must be consolidated using a phi node in the
%%% common exit block for the receive.
%%%
%%% Variables that are matched out and used in the same clause
%%% need copy instructions before the remove_message instruction
%%% in that clause.
%%%

fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
                                    Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
    case Blk of
        #b_blk{is=[#b_set{op=peek_message}|_]} ->
            Rm = find_rm_blocks(L, Blocks0),
            LoopExit = find_loop_exit(Rm, Blocks0),
            Defs0 = beam_ssa:def([L], Blocks0),
            CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
                                               Blocks0, Count0),
            Defs = ordsets:subtract(Defs0, CommonUsed),
            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
            fix_receives_1(Ls, Blocks, Count);
        #b_blk{} ->
            fix_receives_1(Ls, Blocks0, Count0)
    end;
fix_receives_1([], Blocks, Count) ->
    {Blocks,Count}.

recv_common(_Defs, none, _Blocks) ->
    %% There is no common exit block because receive is used
    %% in the tail position of a function.
    [];
recv_common(Defs, Exit, Blocks) ->
    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
    Def = ordsets:subtract(Defs, ExitDefs),
    ordsets:intersection(Def, ExitUsed).

%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
%%                 Blocks0, Count0) -> {Blocks,Count}.
%%  Handle variables alwys defined in a receive and used
%%  in the exit block following the receive.

recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
    {Msg,Count1} = new_var('@recv', Count0),
    Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
    N = length(Rm),
    {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
    PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
    Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
    ExitBlk0 = map_get(Exit, Blocks1),
    ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
    Blocks2 = Blocks1#{Exit:=ExitBlk},
    Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
    recv_fix_common(T, Exit, Rm, Blocks, Count);
recv_fix_common([], _, _, Blocks, Count) ->
    {Blocks,Count}.

recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
    Ren = #{Msg=>V},
    Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
    #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
    Copy = #b_set{op=copy,dst=V,args=[Msg]},
    Is = insert_after_phis(Is0, [Copy]),
    Blk = Blk0#b_blk{is=Is},
    Blocks = Blocks1#{Rm:=Blk},
    recv_fix_common_1(Vs, Rms, Msg, Blocks);
recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.

fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
    Path = beam_ssa:rpo([Rm], Blocks),
    Preds = exit_predecessors(Path, Exit, Blocks),
    [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
fix_exit_phi_args([], [], _, _) -> [].

exit_predecessors([L|Ls], Exit, Blocks) ->
    Blk = map_get(L, Blocks),
    case member(Exit, beam_ssa:successors(Blk)) of
        true ->
            [L|exit_predecessors(Ls, Exit, Blocks)];
        false ->
            exit_predecessors(Ls, Exit, Blocks)
    end;
exit_predecessors([], _Exit, _Blocks) -> [].

%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
%%  Add a copy instruction for all variables that are matched out and
%%  later used within a clause of the receive.

fix_receive([L|Ls], Defs, Blocks0, Count0) ->
    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
    Def = ordsets:subtract(Defs, RmDefs),
    Used = ordsets:intersection(Def, Used0),
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
    Ren = zip(Used, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk1#b_blk{is=Is},
    Blocks = Blocks1#{L:=Blk},
    fix_receive(Ls, Defs, Blocks, Count);
fix_receive([], _Defs, Blocks, Count) ->
    {Blocks,Count}.

%% find_loop_exit([Label], Blocks) -> Label | none.
%%  Find the block to which control is transferred when the
%%  the receive loop is exited.

find_loop_exit([L1,L2|_Ls], Blocks) ->
    Path1 = beam_ssa:rpo([L1], Blocks),
    Path2 = beam_ssa:rpo([L2], Blocks),
    find_loop_exit_1(reverse(Path1), reverse(Path2), none);
find_loop_exit(_, _) -> none.

find_loop_exit_1([H|T1], [H|T2], _) ->
    find_loop_exit_1(T1, T2, H);
find_loop_exit_1(_, _, Exit) -> Exit.

%% find_rm_blocks(StartLabel, Blocks) -> [Label].
%%  Find all blocks that start with remove_message within the receive
%%  loop whose peek_message label is StartLabel.

find_rm_blocks(L, Blocks) ->
    Seen = gb_sets:singleton(L),
    Blk = map_get(L, Blocks),
    Succ = beam_ssa:successors(Blk),
    find_rm_blocks_1(Succ, Seen, Blocks).

find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
    case gb_sets:is_member(L, Seen0) of
        true ->
            find_rm_blocks_1(Ls, Seen0, Blocks);
        false ->
            Seen = gb_sets:insert(L, Seen0),
            Blk = map_get(L, Blocks),
            case find_rm_act(Blk#b_blk.is) of
                prune ->
                    %% Looping back. Don't look at any successors.
                    find_rm_blocks_1(Ls, Seen, Blocks);
                continue ->
                    %% Neutral block. Do nothing here, but look at
                    %% all successors.
                    Succ = beam_ssa:successors(Blk),
                    find_rm_blocks_1(Succ++Ls, Seen, Blocks);
                found ->
                    %% Found remove_message instruction.
                    [L|find_rm_blocks_1(Ls, Seen, Blocks)]
            end
    end;
find_rm_blocks_1([], _, _) -> [].

find_rm_act([#b_set{op=Op}|Is]) ->
    case Op of
        remove_message -> found;
        peek_message -> prune;
        recv_next -> prune;
        wait_timeout -> prune;
        wait -> prune;
        _ -> find_rm_act(Is)
    end;
find_rm_act([]) ->
    continue.

%%%
%%% Find out which variables need to be stored in Y registers.
%%%

-record(dk, {d :: ordsets:ordset(var_name()),
             k :: ordsets:ordset(var_name())
            }).

%% find_yregs(St0) -> St.
%%  Find all variables that must be stored in Y registers. Annotate
%%  the blocks that allocate frames with the set of Y registers
%%  used within that stack frame.
%%
%%  Basically, we following all execution paths starting from a block
%%  that allocates a frame, keeping track of of all defined registers
%%  and all registers killed by an instruction that clobbers X
%%  registers. For every use of a variable, we check if if it is in
%%  the set of killed variables; if it is, it must be stored in an Y
%%  register.

find_yregs(#st{frames=[]}=St) ->
    St;
find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
    FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
    Blocks = find_yregs_1(FrameDefs, Blocks0),
    St#st{ssa=Blocks}.

find_yregs_1([{F,Defs}|Fs], Blocks0) ->
    DK = #dk{d=Defs,k=[]},
    D0 = #{F=>DK},
    Ls = beam_ssa:rpo([F], Blocks0),
    Yregs0 = [],
    Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
    Blk0 = map_get(F, Blocks0),
    Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
    Blocks = Blocks0#{F:=Blk},
    find_yregs_1(Fs, Blocks);
find_yregs_1([], Blocks) -> Blocks.

find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
    Blk0 = map_get(L, Blocks0),
    #b_blk{is=Is,last=Last} = Blk0,
    Ys0 = map_get(L, D0),
    {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
    Yregs = find_yregs_terminator(Last, Ys, Yregs1),
    Successors = beam_ssa:successors(Blk0),
    D = find_update_succ(Successors, Ys, D0),
    find_yregs_2(Ls, Blocks0, D, Yregs);
find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.

find_defs(Frames, Blocks, Defs) ->
    Seen = gb_sets:empty(),
    FramesSet = gb_sets:from_list(Frames),
    {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
    FrameDefs.

find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
    case gb_sets:is_member(L, Frames) of
        true ->
            OrderedDefs = ordsets:from_list(Defs0),
            find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
                        [{L,OrderedDefs}|Acc0]);
        false ->
            case gb_sets:is_member(L, Seen0) of
                true ->
                    find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
                false ->
                    Seen1 = gb_sets:insert(L, Seen0),
                    {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
                    #b_blk{is=Is} = Blk = map_get(L, Blocks),
                    Defs = find_defs_is(Is, Defs0),
                    Successors = beam_ssa:successors(Blk),
                    find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
            end
    end;
find_defs_1([], _, _, Seen, _, Acc) ->
    {Acc,Seen}.

find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
    find_defs_is(Is, [Dst|Acc]);
find_defs_is([], Acc) -> Acc.

find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
    case D0 of
        #{S:=#dk{d=Defs1,k=Killed1}} ->
            Defs = ordsets:intersection(Defs0, Defs1),
            Killed = ordsets:union(Killed0, Killed1),
            DK = #dk{d=Defs,k=Killed},
            D = D0#{S:=DK},
            find_update_succ(Ss, DK0, D);
        #{} ->
            D = D0#{S=>DK0},
            find_update_succ(Ss, DK0, D)
    end;
find_update_succ([], _, D) -> D.

find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
    Used = beam_ssa:used(I),
    Yregs1 = ordsets:intersection(Used, Killed0),
    Yregs = ordsets:union(Yregs0, Yregs1),
    case beam_ssa:clobbers_xregs(I) of
        false ->
            Defs = ordsets:add_element(Dst, Defs0),
            find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
        true ->
            Killed = ordsets:union(Defs0, Killed0),
            Defs = [Dst],
            find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
    end;
find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.

find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
    Used = beam_ssa:used(Terminator),
    Yregs = ordsets:intersection(Used, Killed),
    ordsets:union(Yregs0, Yregs).

%%%
%%% Try to reduce the size of the stack frame, by adding an explicit
%%% 'copy' instructions for return values from 'call' and 'make_fun' that
%%% need to be saved in Y registers. Here is an example to show
%%% how that's useful. First, here is the Erlang code:
%%%
%%% f(Pid) ->
%%%    Res = foo(42),
%%%    _ = node(Pid),
%%%    bar(),
%%%    Res.
%%%
%%% Compiled to SSA format, the main part of the code looks like this:
%%%
%%% 0:
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% It can be seen that the variables Pid and Res must be saved in Y
%%% registers in order to survive the function calls. A previous sub
%%% pass has inserted a 'copy' instruction to save the value of the
%%% variable Pid:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The Res and Pid:4 variables must be assigned to different Y registers
%%% because they are live at the same time. copy_retval() inserts a
%%% 'copy' instruction to copy Res to a new variable:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res:6 = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   Res = copy Res:6
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The new variable Res:6 is used to capture the return value from the call.
%%% The variables Pid:4 and Res are no longer live at the same time, so they
%%% can be assigned to the same Y register.
%%%

copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

copy_retval_1([F|Fs], Blocks0, Count0) ->
    #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
    Yregs1 = gb_sets:from_list(Yregs0),
    Yregs = collect_yregs(Is, Yregs1),
    Ls = beam_ssa:rpo([F], Blocks0),
    {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
    copy_retval_1(Fs, Blocks, Count);
copy_retval_1([], Blocks, Count) ->
    {Blocks,Count}.

collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
              Yregs0) ->
    true = gb_sets:is_member(X, Yregs0),        %Assertion.
    Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
    collect_yregs(Is, Yregs);
collect_yregs([#b_set{}|Is], Yregs) ->
    collect_yregs(Is, Yregs);
collect_yregs([], Yregs) -> Yregs.

copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
    #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
    RC = case {Last,Ls} of
             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
                 true;
             {_,_} ->
                 false
         end,
    case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
        {Is,Count} ->
            case Copy0 =:= none andalso Count0 =:= Count of
                true ->
                    copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
                false ->
                    Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
                    copy_retval_2(Ls, Yregs, none, Blocks, Count)
            end;
        {Is,Count,Copy} ->
            Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
            copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
    end;
copy_retval_2([], _Yregs, none, Blocks, Count) ->
    {Blocks,Count}.

copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
           Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    {reverse(Acc, [I|acc_copy([], Copy)]),Count};
copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
  when Op =:= call; Op =:= make_fun ->
    {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
    {reverse(Acc, [I]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
           Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
    {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
    case gb_sets:is_member(Dst, Yregs) of
        true ->
            {NewVar,Count} = new_var(RetName, Count1),
            Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
            I = I1#b_set{dst=NewVar},
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
        false ->
            copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
    end;
copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    case beam_ssa:clobbers_xregs(I) of
        true ->
            copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
        false ->
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
        end;
copy_retval_is([], RC, _, Copy, Count, Acc) ->
    case {Copy,RC} of
        {none,_} ->
            {reverse(Acc),Count};
        {#b_set{},true} ->
            {reverse(Acc),Count,Copy};
        {#b_set{},false} ->
            {reverse(Acc, [Copy]),Count}
    end.

%%
%% Consider this code:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   A = copy A1
%%   B = call bar/1, Var
%%
%% If the Var variable is no longer used after this code, its Y register
%% can't be reused for A. To allow the Y register to be reused
%% we will need to insert 'copy' instructions for arguments that are
%% in Y registers:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   Var1 = copy Var
%%   A = copy A1
%%   B = call bar/1, Var1
%%

place_retval_copy(I, _Yregs, none, Count, Acc) ->
    {I,Count,Acc};
place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
    #b_set{dst=Avoid} = Copy,
    {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
    Acc = [Copy|Acc1],
    {I#b_set{args=[F|Args]},Count,Acc}.

copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
    case gb_sets:is_member(A, Yregs) of
        true when A =/= Avoid ->
            {NewVar,Count} = new_var(AName, Count0),
            Copy = #b_set{op=copy,dst=NewVar,args=[A]},
            copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
        _ ->
            copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
    end;
copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
    copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
    {reverse(Acc),CopyAcc,Count}.

acc_copy(Acc, none) -> Acc;
acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].

copy_sub_args(Args, none) ->
    Args;
copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
    [sub_arg(A, Dst, Src) || A <- Args].

sub_arg(Old, Old, New) -> New;
sub_arg(Old, _, _) -> Old.

%%%
%%% Consider:
%%%
%%%   x1/Hd = get_hd x0/Cons
%%%   y0/Tl = get_tl x0/Cons
%%%
%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
%%% a 'move' instruction must be inserted.
%%%
%%% If we swap get_hd and get_tl when Tl is in a Y register,
%%% x0 can be used for Hd if Cons is not used again:
%%%
%%%   y0/Tl = get_tl x0/Cons
%%%   x0/Hd = get_hd x0/Cons
%%%

opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
    ResMap = maps:from_list(Res),
    Ls = beam_ssa:rpo(Blocks),
    St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.

opt_get_list_1([L|Ls], Res, Blocks0) ->
    #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
    case opt_get_list_is(Is0, Res, [], false) of
        no ->
            opt_get_list_1(Ls, Res, Blocks0);
        {yes,Is} ->
            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
            opt_get_list_1(Ls, Res, Blocks)
    end;
opt_get_list_1([], _, Blocks) -> Blocks.

opt_get_list_is([#b_set{op=get_hd,dst=Hd,
                        args=[Cons]}=GetHd,
                 #b_set{op=get_tl,dst=Tl,
                        args=[Cons]}=GetTl|Is],
                Res, Acc, Changed) ->
    %% Note that when this pass is run, only Y registers have
    %% reservations. The absence of an entry for a variable therefore
    %% means that the variable will be in an X register.
    case Res of
        #{Hd:={y,_}} ->
            %% Hd will be in a Y register. Don't swap.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
        #{Tl:={y,_}} ->
            %% Tl will be in a Y register. Swap.
            opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
        #{} ->
            %% Both are in X registers. Nothing to do.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
    end;
opt_get_list_is([I|Is], Res, Acc, Changed) ->
    opt_get_list_is(Is, Res, [I|Acc], Changed);
opt_get_list_is([], _Res, Acc, Changed) ->
    case Changed of
        true ->
            {yes,reverse(Acc)};
        false ->
            no
    end.

%%%
%%% Number instructions in the order they are executed.
%%%

%% number_instructions(St0) -> St.
%%  Number instructions in the order they are executed. Use a step
%%  size of 2. Don't number phi instructions. All phi variables in
%%  a block will be live one unit before the first non-phi instruction
%%  in the block.

number_instructions(#st{ssa=Blocks0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    St#st{ssa=number_is_1(Ls, 1, Blocks0)}.

number_is_1([L|Ls], N0, Blocks0) ->
    #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
    {Is,N1} = number_is_2(Is0, N0, []),
    Last = beam_ssa:add_anno(n, N1, Last0),
    N = N1 + 2,
    Bl = Bl0#b_blk{is=Is,last=Last},
    Blocks = Blocks0#{L:=Bl},
    number_is_1(Ls, N, Blocks);
number_is_1([], _, Blocks) -> Blocks.

number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
    number_is_2(Is, N, [I|Acc]);
number_is_2([I0|Is], N, Acc) ->
    I = beam_ssa:add_anno(n, N, I0),
    number_is_2(Is, N+2, [I|Acc]);
number_is_2([], N, Acc) ->
    {reverse(Acc),N}.

%%%
%%% Calculate live intervals.
%%%

live_intervals(#st{args=Args,ssa=Blocks}=St) ->
    Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
    F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
    LiveMap0 = #{},
    Acc0 = {[],LiveMap0},
    {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
    Intervals = merge_ranges(rel2fam(Vars0++Vars)),
    St#st{intervals=Intervals}.

merge_ranges([{V,Rs}|T]) ->
    [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
merge_ranges([]) -> [].

merge_ranges_1([{A,N},{N,Z}|Rs]) ->
    merge_ranges_1([{A,Z}|Rs]);
merge_ranges_1([R|Rs]) ->
    [R|merge_ranges_1(Rs)];
merge_ranges_1([]) -> [].

live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
    Live0 = [],
    Successors = beam_ssa:successors(L, Blocks),
    Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),

    %% Add ranges for all variables that are live in the successors.
    #b_blk{is=Is,last=Last} = map_get(L, Blocks),
    End = beam_ssa:get_anno(n, Last),
    Use = [{V,{use,End+1}} || V <- Live1],

    %% Determine used and defined variables in this block.
    FirstNumber = first_number(Is, Last),
    UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
    UseDef = rel2fam(UseDef0),

    %% Update what is live at the beginning of this block and
    %% store it.
    Used = [V || {V,[{use,_}|_]} <- UseDef],
    Live2 = ordsets:union(Live1, Used),
    Killed = [V || {V,[{def,_}|_]} <- UseDef],
    Live = ordsets:subtract(Live2, Killed),
    LiveMap = LiveMap0#{L=>Live},

    %% Construct the ranges for this block.
    Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
    {Vars,LiveMap}.

make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
    make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
make_block_ranges([], _, Acc) -> Acc.

live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
    Acc = [{Dst,{def,FirstNumber}}|Acc0],
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
                    FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    #b_set{dst=Dst} = I,
    Acc1 = [{Dst,{def,N}}|Acc0],
    Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    Acc1 = case I of
               #b_set{dst=Dst} ->
                   [{Dst,{def,N}}|Acc0];
               _ ->
                   Acc0
           end,
    Used = beam_ssa:used(I),
    Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([], _FirstNumber, Acc) ->
    Acc.

%% first_number([#b_set{}]) -> InstructionNumber.
%%  Return the number for the first instruction for the block.
%%  Note that this number is one less than the first
%%  non-phi instruction in the block.

first_number([#b_set{op=phi}|Is], Last) ->
    first_number(Is, Last);
first_number([I|_], _) ->
    beam_ssa:get_anno(n, I) - 1;
first_number([], Last) ->
    beam_ssa:get_anno(n, Last) - 1.

update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
    Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
    #b_blk{is=Is} = map_get(L, Blocks),
    Live = update_live_phis(Is, Pred, Live1),
    update_successors(Ls, Pred, Blocks, LiveMap, Live);
update_successors([], _, _, _, Live) -> Live.

get_live(L, LiveMap) ->
    case LiveMap of
        #{L:=Live} -> Live;
        #{} -> []
    end.

update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
                 Pred, Live0) ->
    Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
    Live1 = ordsets:union(ordsets:from_list(Used), Live0),
    Live = ordsets:del_element(Killed, Live1),
    update_live_phis(Is, Pred, Live);
update_live_phis(_, _, Live) -> Live.

%%%
%%% Reserve Y registers.
%%%

%% reserve_yregs(St0) -> St.
%%  In each block that allocates a stack frame, insert instructions
%%  that copy variables that must be in Y registers (given by
%%  the `yregs` annotation) to new variables.
%%
%%  Also allocate specific Y registers for try and catch tags.
%%  The outermost try/catch tag is placed in y0, any directly
%%  nested tag in y1, and so on. Note that this is the reversed
%%  order as required by BEAM; it will be corrected later by
%%  turn_yregs().

reserve_yregs(#st{frames=Frames}=St0) ->
    foldl(fun reserve_yregs_1/2, St0, Frames).

reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
    Blk = map_get(L, Blocks0),
    Yregs = beam_ssa:get_anno(yregs, Blk),
    {Def,Used} = beam_ssa:def_used([L], Blocks0),
    UsedYregs = ordsets:intersection(Yregs, Used),
    DefBefore = ordsets:subtract(UsedYregs, Def),
    {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
    InsideVars = ordsets:subtract(UsedYregs, DefBefore),
    ResTryTags0 = reserve_try_tags(L, Blocks),
    ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
    Vars = BeforeVars ++ InsideVars,
    Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
    St#st{res=Res,ssa=Blocks,cnt=Count+1}.

reserve_try_tags(L, Blocks) ->
    Seen = gb_sets:empty(),
    {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
    Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
    Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
    ordsets:from_list(Res).

reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
    case gb_sets:is_element(L, Seen0) of
        true ->
            reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
        false ->
            Seen1 = gb_sets:insert(L, Seen0),
            #b_blk{is=Is} = Blk = map_get(L, Blocks),
            Active0 = get_active(L, ActMap0),
            Active = reserve_try_tags_is(Is, Active0),
            Successors = beam_ssa:successors(Blk),
            ActMap1 = update_act_map(Successors, Active, ActMap0),
            {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
            reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
    end;
reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
    {ActMap,Seen}.

get_active(L, ActMap) ->
    case ActMap of
        #{L:=Active} -> Active;
        #{} -> #{}
    end.

reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
    N = map_size(Active),
    reserve_try_tags_is(Is, Active#{V=>N});
reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
    reserve_try_tags_is(Is, maps:remove(Tag, Active));
reserve_try_tags_is([_|Is], Active) ->
    reserve_try_tags_is(Is, Active);
reserve_try_tags_is([], Active) -> Active.

update_act_map([L|Ls], Active0, ActMap0) ->
    case ActMap0 of
        #{L:=Active1} ->
            ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
            update_act_map(Ls, Active0, ActMap);
        #{} ->
            ActMap = ActMap0#{L=>Active0},
            update_act_map(Ls, Active0, ActMap)
    end;
update_act_map([], _, ActMap) -> ActMap.

rename_vars([], _, Blocks, Count) ->
    {[],Blocks,Count};
rename_vars(Vs, L, Blocks0, Count0) ->
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
    Ren = zip(Vs, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk0#b_blk{is=Is},
    Blocks = Blocks1#{L:=Blk},
    {NewVars,Blocks,Count}.

insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
    [I|insert_after_phis(Is, InsertIs)];
insert_after_phis(Is, InsertIs) ->
    InsertIs ++ Is.

%% frame_size(St0) -> St.
%%  Calculate the frame size for each block that allocates a frame.
%%  Annotate the block with the frame size. Also annotate all
%%  return instructions with {deallocate,FrameSize} to simplify
%%  code generation.

frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
    Blocks = foldl(fun(L, Blks) ->
                           frame_size_1(L, Regs, Blks)
                   end, Blocks0, Frames),
    St#st{ssa=Blocks}.

frame_size_1(L, Regs, Blocks0) ->
    Def = beam_ssa:def([L], Blocks0),
    Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
    Yregs = ordsets:from_list(Yregs0),
    FrameSize = length(ordsets:from_list(Yregs)),
    if
        FrameSize =/= 0 ->
            [{y,0}|_] = Yregs,                  %Assertion.
            {y,Last} = last(Yregs),
            Last = FrameSize - 1,               %Assertion.
            ok;
        true ->
            ok
    end,
    Blk0 = map_get(L, Blocks0),
    Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),

    %% Insert an annotation for frame deallocation on
    %% each #b_ret{}.
    Blocks = Blocks0#{L:=Blk},
    Reachable = beam_ssa:rpo([L], Blocks),
    frame_deallocate(Reachable, FrameSize, Blocks).

frame_deallocate([L|Ls], Size, Blocks0) ->
    Blk0 = map_get(L, Blocks0),
    Blk = case Blk0 of
              #b_blk{last=#b_ret{}=Ret0} ->
                  Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
                  Blk0#b_blk{last=Ret};
              #b_blk{} ->
                  Blk0
          end,
    Blocks = Blocks0#{L:=Blk},
    frame_deallocate(Ls, Size, Blocks);
frame_deallocate([], _, Blocks) -> Blocks.


%% turn_yregs(St0) -> St.
%%  Renumber y registers so that {y,0} becomes {y,FrameSize-1},
%%  {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
%%  catches work. The register allocator (linear_scan()) has given
%%  a lower number to the outermost catch.

turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
    Regs1 = foldl(fun(L, A) ->
                          Blk = map_get(L, Blocks),
                          FrameSize = beam_ssa:get_anno(frame_size, Blk),
                          Def = beam_ssa:def([L], Blocks),
                          [turn_yregs_1(Def, FrameSize, Regs0)|A]
                  end, [], Frames),
    Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
    St#st{regs=Regs}.

turn_yregs_1(Def, FrameSize, Regs) ->
    Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
    Yregs1 = rel2fam(Yregs0),
    FrameSize = length(Yregs1),
    Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
    R0 = sofs:family(Yregs2),
    R1 = sofs:family_to_relation(R0),
    R = sofs:converse(R1),
    sofs:to_external(R).

%%%
%%% Reserving registers before register allocation.
%%%

%% reserve_regs(St0) -> St.
%%  Reserve registers prior to register allocation. Y registers
%%  have already been reserved. This function will reserve z,
%%  fr, and specific x registers.

reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
    %% Reserve x0, x1, and so on for the function arguments.
    Res1 = reserve_arg_regs(Args, 0, Res0),

    %% Reserve Z registers (dummy registers) for instructions with no
    %% return values (e.g. remove_message) or pseudo-return values
    %% (e.g. landingpad).
    Res2 = reserve_zregs(Blocks, Intervals, Res1),

    %% Reserve float registers.
    Res3 = reserve_fregs(Blocks, Res2),

    %% Reserve all remaining unreserved variables as X registers.
    Res = maps:from_list(Res3),
    St#st{res=reserve_xregs(Blocks, Res)}.

reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
    reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.

reserve_zregs(Blocks, Intervals, Res) ->
    ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
    ShortLived = cerl_sets:from_list(ShortLived0),
    F = fun(_, #b_blk{is=Is,last=Last}, A) ->
                reserve_zreg(Is, Last, ShortLived, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_zreg([#b_set{op=Op,dst=Dst}],
              #b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
                                                    Op =:= get_tuple_element ->
    %% If type optimization has determined that the result of these
    %% instructions can be used directly in a branch, we must avoid reserving a
    %% z register or code generation will fail.
    A;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
              #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
    case {Val,Last} of
        {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
            %% These two instructions can be combined to a test_arity
            %% instruction provided that the arity variable is short-lived.
            reserve_zreg_1(Dst, ShortLived, A0);
        {_,_} ->
            %% Either the arity is too big, or the boolean value is not
            %% used in a conditional branch.
            A0
    end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
             #b_switch{}, ShortLived, A) ->
    reserve_zreg_1(Dst, ShortLived, A);
reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
    %% There is no short, easy way to rewrite 'xor' to a series of
    %% test instructions.
    A;
reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
    %% There is no short, easy way to rewrite is_record/2 to a series of
    %% test instructions.
    A;
reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
    IsZReg = case Op of
                 bs_match_string -> true;
                 bs_save -> true;
                 bs_restore -> true;
                 bs_set_position -> true;
                 {float,clearerror} -> true;
                 kill_try_tag -> true;
                 landingpad -> true;
                 put_tuple_elements -> true;
                 remove_message -> true;
                 set_tuple_element -> true;
                 succeeded -> true;
                 timeout -> true;
                 wait_timeout -> true;
                 _ -> false
             end,
    A = case IsZReg of
            true -> [{Dst,z}|A0];
            false -> A0
        end,
    reserve_zreg(Is, Last, ShortLived, A);
reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
    reserve_zreg_1(Bool, ShortLived, A);
reserve_zreg([], _, _, A) -> A.

reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
    case cerl_sets:is_element(V, ShortLived) of
        true -> [{V,z}|A];
        false -> A
    end;
reserve_zreg_1(#b_literal{}, _, A) -> A.

reserve_fregs(Blocks, Res) ->
    F = fun(_, #b_blk{is=Is}, A) ->
                reserve_freg(Is, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
    case Op of
        get ->
            reserve_freg(Is, Res);
        _ ->
            reserve_freg(Is, [{V,fr}|Res])
    end;
reserve_freg([_|Is], Res) ->
    reserve_freg(Is, Res);
reserve_freg([], Res) -> Res.

%% reserve_xregs(St0) -> St.
%%  Reserve all remaining variables as X registers.
%%
%%  If a variable will need to be in a specific X register for a
%%  'call' or 'make_fun' (and there is nothing that will kill it
%%  between the definition and use), reserve the register using a
%%  {prefer,{x,X} annotation. That annotation means that the linear
%%  scan algorithm will place the variable in the preferred register,
%%  unless that register is already occupied.
%%
%%  All remaining variables are reserved as X registers. Linear scan
%%  will allocate the lowest free X register for the variable.

reserve_xregs(Blocks, Res) ->
    Ls = reverse(beam_ssa:rpo(Blocks)),
    reserve_xregs(Ls, Blocks, #{}, Res).

reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
    #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),

    %% Calculate mapping from variable name to the preferred
    %% register.
    Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),

    %% We need to figure out where the code generator will
    %% place instructions that will do a garbage collection.
    %% Insert 'gc' markers as pseudo-instructions in the
    %% instruction sequence.
    Is1 = reverse(Is0),
    Is2 = res_place_gc_instrs(Is1, []),
    Is = res_place_allocate(Anno, Is2),

    %% Add register hints for variables that are defined
    %% in the (reversed) instruction sequence.
    {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),

    XsMap = XsMap0#{L=>Xs},
    reserve_xregs(Ls, Blocks, XsMap, Res);
reserve_xregs([], _, _, Res) -> Res.

%% Insert explicit 'gc' markers points where there will
%% be a garbage collection. (Note that the instruction
%% sequence passed to this function is reversed.)

res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
    res_place_gc_instrs(Is, [I|Acc]);
res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
  when Op =:= call; Op =:= make_fun ->
    case Acc of
        [] ->
            res_place_gc_instrs(Is, [I|Acc]);
        [GC|_] when GC =:= gc; GC =:= test_heap ->
            res_place_gc_instrs(Is, [I,gc|Acc]);
        [_|_] ->
            res_place_gc_instrs(Is, [I,gc|Acc])
    end;
res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
    case beam_ssa_codegen:classify_heap_need(Op, Args) of
        neutral ->
            case Acc0 of
                [test_heap|Acc] ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
                Acc ->
                    res_place_gc_instrs(Is, [I|Acc])
            end;
        {put,_} ->
            case Acc0 of
                [test_heap|Acc] ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
                Acc ->
                    res_place_gc_instrs(Is, [test_heap,I|Acc])
            end;
        _ ->
            res_place_gc_instrs(Is, [gc,I|Acc0])
    end;
res_place_gc_instrs([], Acc) ->
    %% Reverse and replace 'test_heap' markers with 'gc'.
    %% (The distinction is no longer useful.)
    res_place_gc_instrs_rev(Acc, []).

res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
    res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([test_heap|Is], Acc) ->
    res_place_gc_instrs_rev(Is, [gc|Acc]);
res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
    res_place_gc_instrs_rev(Is, Acc);
res_place_gc_instrs_rev([I|Is], Acc) ->
    res_place_gc_instrs_rev(Is, [I|Acc]);
res_place_gc_instrs_rev([], Acc) -> Acc.

res_place_allocate(#{yregs:=_}, Is) ->
    %% There will be an 'allocate' instruction inserted here.
    Is ++ [gc];
res_place_allocate(#{}, Is) -> Is.

reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
    %% At this point, the code generator will place an instruction
    %% that does a garbage collection. We must prune the remembered
    %% registers.
    Xs = res_xregs_prune(Xs0, Used, Res),
    reserve_xregs_is(Is, Res, Xs, Used);
reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
    Res = reserve_xreg(Dst, Xs0, Res0),
    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
    Used = ordsets:del_element(Dst, Used1),
    case Op of
        call ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        make_fun ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        _ ->
            reserve_xregs_is(Is, Res, Xs0, Used)
    end;
reserve_xregs_is([], Res, Xs, _Used) ->
    {Res,Xs}.

%% Pick up register hints from the successors of this blocks.
reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
                   _Blocks, XsMap, _Res) ->
    %% We know that no variables are used at ?BADARG_BLOCK, so
    %% any register hints from the success blocks are safe to use.
    map_get(Succ, XsMap);
reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
                   Blocks, XsMap, Res) when Succ =/= Fail ->
    #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
    case {SuccBlk,FailBlk} of
        {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
         #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
            %% Both branches ultimately transfer to the same
            %% block (via two blocks with no instructions).
            %% Pick up register hints from the phi nodes
            %% in the common block.
            #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
            Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
            res_xregs_from_phi(PhiIs, Fail, Res, Xs);
        {_,_} when Is =/= [] ->
            case last(Is) of
                #b_set{op=succeeded,args=[Arg]} ->
                    %% We know that Arg will not be used at the failure
                    %% label, so we can pick up register hints from the
                    %% success label.
                    Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
                    case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
                        #{Arg:=Reg} -> #{Arg=>Reg};
                        #{} -> #{}
                    end;
                _ ->
                    %% Register hints from the success block may not
                    %% be safe at the failure block, and vice versa.
                    #{}
            end;
        {_,_} ->
            %% Register hints from the success block may not
            %% be safe at the failure block, and vice versa.
            #{}
    end;
reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
                   Blocks, XsMap, Res) ->
    case map_get(Succ, Blocks) of
        #b_blk{is=[],last=Last} ->
            reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
        #b_blk{is=[_|_]=PhiIs} ->
            res_xregs_from_phi(PhiIs, L, Res, #{})
    end;
reserve_terminator(_, _, _, _, _, _) -> #{}.

%% Pick up a reservation from a phi node.
res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
                   Pred, Res, Acc) ->
    case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
        [] ->
            %% The value of the phi node for this predecessor
            %% is a literal. Nothing to do here.
            res_xregs_from_phi(Is, Pred, Res, Acc);
        [V] ->
            case Res of
                #{Dst:={prefer,Reg}} ->
                    %% Try placing V in the same register as for
                    %% the phi node.
                    res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
                #{Dst:=_} ->
                    res_xregs_from_phi(Is, Pred, Res, Acc)
            end
    end;
res_xregs_from_phi(_, _, _, Acc) -> Acc.

reserve_call_args(Args) ->
    reserve_call_args(Args, 0, #{}).

reserve_call_args([#b_var{}=Var|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
reserve_call_args([#b_literal{}|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs);
reserve_call_args([], _, Xs) -> Xs.

reserve_xreg(V, Xs, Res) ->
    case Res of
        #{V:=_} ->
            %% Already reserved (but not as an X register).
            Res;
        #{} ->
            case Xs of
                #{V:=X} ->
                    %% Add a hint that this specific X register is
                    %% preferred, unless it is already in use.
                    Res#{V=>{prefer,X}};
                #{} ->
                    %% Reserve as an X register in general.
                    Res#{V=>x}
            end
    end.

%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
%%  Prune the list of preferred registers, to make sure that
%%  there are no "holes" (uninitialized X registers) when
%%  invoking the garbage collector.

res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
    %% The number of safe registers is the number of the X registers
    %% used after this point. The actual number of safe registers may
    %% be higher than this number, but this is a conservative safe
    %% estimate.
    NumSafe = foldl(fun(V, N) ->
                            case Res of
                                #{V:={x,_}} -> N + 1;
                                #{V:=_} -> N;
                                #{} -> N + 1
                            end
                    end, 0, Used),

    %% Remove unsafe registers from the list of potential
    %% preferred registers.
    maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
res_xregs_prune(Xs, _Used, _Res) -> Xs.

%%%
%%% Register allocation using linear scan.
%%%

-record(i,
        {sort=1 :: instr_number(),
         reg=none :: i_reg(),
         pool=x :: pool_id(),
         var=#b_var{} :: b_var(),
         rs=[] :: [range()]
        }).

-record(l,
        {cur=#i{} :: interval(),
         unhandled_res=[] :: [interval()],
         unhandled_any=[] :: [interval()],
         active=[] :: [interval()],
         inactive=[] :: [interval()],
         free=#{} :: #{var_name()=>pool(),
                       {'next',pool_id()}:=reg_num()},
         regs=[] :: [{b_var(),ssa_register()}]
        }).

-type interval() :: #i{}.
-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
-type pool() :: ordsets:ordset(ssa_register()).

linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
    St = St0#st{intervals=[],res=[]},
    Free = init_free(maps:to_list(Res)),
    Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
    Intervals = sort(Intervals1),
    IsReserved = fun(#i{reg=Reg}) ->
                         case Reg of
                             none -> false;
                             {prefer,{_,_}} -> false;
                             {_,_} -> true
                         end
                 end,
    {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
    L = #l{unhandled_res=UnhandledRes,
           unhandled_any=Unhandled,free=Free},
    #l{regs=Regs} = do_linear(L),
    St#st{regs=maps:from_list(Regs)}.

init_interval({V,[{Start,_}|_]=Rs}, Res) ->
    Info = map_get(V, Res),
    Pool = case Info of
               {prefer,{x,_}} -> x;
               x -> x;
               {x,_} -> x;
               {y,Uniq} -> Uniq;
               {{y,_},Uniq} -> Uniq;
               z -> z;
               fr -> fr
           end,
    Reg = case Info of
              {prefer,{x,_}} -> Info;
              {x,_} -> Info;
              {{y,_}=Y,_} -> Y;
              _ -> none
          end,
    #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.

init_free(Res) ->
    Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
    #{x:=Xs0} = Free1 = maps:from_list(Free0),
    Xs = init_xregs(Xs0),
    Free = Free1#{x:=Xs},
    Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
    maps:merge(Free, maps:from_list(Next)).

init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{x,_}=Reg}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{y,Uniq}}|Res]) ->
    [{Uniq,{y,0}}|init_free_1(Res)];
init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
    [{Uniq,Reg}|init_free_1(Res)];
init_free_1([{_,z}|Res]) ->
    [{z,{z,0}}|init_free_1(Res)];
init_free_1([{_,fr}|Res]) ->
    [{fr,{fr,0}}|init_free_1(Res)];
init_free_1([{_,x}|Res]) ->
    init_free_1(Res);
init_free_1([]) -> [].

%% Make sure that the pool of xregs is contiguous.
init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
    [{x,N}|init_xregs([{x,M}|Is])];
init_xregs([{x,N}|[{x,_}|_]=Is]) ->
    [{x,N}|init_xregs([{x,N+1}|Is])];
init_xregs([{x,_}]=Is) -> Is.

do_linear(L0) ->
    case set_next_current(L0) of
        done ->
            L0;
        L1 ->
            L2 = expire_active(L1),
            L3 = check_inactive(L2),
            Available = collect_available(L3),
            L4 = select_register(Available, L3),
            L = make_cur_active(L4),
            do_linear(L)
    end.

set_next_current(#l{unhandled_res=[Cur1|T1],
                    unhandled_any=[Cur2|T2]}=L) ->
    case {Cur1,Cur2} of
        {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
            L#l{cur=Cur1,unhandled_res=T1};
        {_,_} ->
            L#l{cur=Cur2,unhandled_any=T2}
    end;
set_next_current(#l{unhandled_res=[],
                    unhandled_any=[Cur|T]}=L) ->
    L#l{cur=Cur,unhandled_any=T};
set_next_current(#l{unhandled_res=[Cur|T],
                    unhandled_any=[]}=L) ->
    L#l{cur=Cur,unhandled_res=T};
set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
    done.

expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
    {Act,L} = expire_active(Act0, CurBegin, L0, []),
    L#l{active=Act}.

expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    {_,_} = Reg,                                %Assertion.
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            L = free_reg(I, L0),
            expire_active(Is, CurBegin, L, Acc);
        overlapping ->
            expire_active(Is, CurBegin, L0, [I|Acc]);
        not_overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = free_reg(I, L0),
            L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
            expire_active(Is, CurBegin, L, Acc)
    end;
expire_active([], _CurBegin, L, Acc) ->
    {Acc,L}.

check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
    {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
    L#l{inactive=InAct}.

check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            check_inactive(Is, CurBegin, L0, Acc);
        not_overlapping ->
            check_inactive(Is, CurBegin, L0, [I|Acc]);
        overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
            L = reserve_reg(I, L1),
            check_inactive(Is, CurBegin, L, Acc)
    end;
check_inactive([], _CurBegin, L, Acc) ->
    {Acc,L}.

strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
    strip_before_current(Rs, CurBegin);
strip_before_current(Rs, _CurBegin) -> Rs.

collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
    %% Use the preferred register if it is available.
    Avail = collect_available(L#l{cur=I#i{reg=none}}),
    case member(Prefer, Avail) of
        true -> [Prefer];
        false -> Avail
    end;
collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
    %% Return the already reserved register.
    [ReservedReg];
collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
    Free = get_pool(Cur, L),

    %% Note that since the live intervals are constructed from
    %% SSA form, there cannot be any overlap of the current interval
    %% with any inactive interval. See [3], page 175. Therefore we
    %% only have check the unhandled intervals for overlap with
    %% the current interval. As a further optimization, we only need
    %% to check the intervals that have reserved registers.
    collect_available(Unhandled, Cur, Free).

collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
  when Pool1 =/= Pool2 ->
    %% Wrong pool. Ignore this interval.
    collect_available(Is, Cur, Free);
collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
    case overlaps(I, Cur) of
        true ->
            Free = ordsets:del_element(Reg, Free0),
            collect_available(Is, Cur, Free);
        false ->
            collect_available(Is, Cur, Free0)
    end;
collect_available([], _, Free) -> Free.

select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
    Cur = Cur0#i{reg=Reg},
    reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
    %% Allocate a new register in the pool.
    {Reg,L1} = get_next_free(Cur0, L0),
    Cur = Cur0#i{reg=Reg},
    L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
    reserve_reg(Cur, L).

make_cur_active(#l{cur=Cur,active=Act}=L) ->
    L#l{active=[Cur|Act]}.

overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
    are_overlapping(Rs1, Rs2).

overlap_status([{S,E}], CurBegin) ->
    if
        E =< CurBegin -> ends_before_cur;
        CurBegin < S -> not_overlapping;
        true -> overlapping
    end;
overlap_status([{S,E}|Rs], CurBegin) ->
    if
        E =< CurBegin ->
            overlap_status(Rs, CurBegin);
        S =< CurBegin ->
            overlapping;
        true ->
            not_overlapping
    end.

reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:del_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

free_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:add_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

get_pool(#i{pool=Pool}, #l{free=Free}) ->
    map_get(Pool, Free).

update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
    Free = Free0#{Pool:=New},
    L#l{free=Free}.

get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
    K = {next,Pool},
    N = map_get(K, Free0),
    Free = Free0#{K:=N+1},
    L = L0#l{free=Free},
    if
        is_integer(Pool) -> {{y,N},L};
        is_atom(Pool)    -> {{Pool,N},L}
    end.

%%%
%%% Interval utilities.
%%%

are_overlapping([R|Rs1], Rs2) ->
    case are_overlapping_1(R, Rs2) of
        true ->
            true;
        false ->
            are_overlapping(Rs1, Rs2)
    end;
are_overlapping([], _) -> false.

are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
    false;
are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
    (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
are_overlapping_1({_,_}, []) -> false.

%%%
%%% Utilities.
%%%

%% is_loop_header(L, Blocks) -> false|true.
%%  Check whether the block is a loop header.

is_loop_header(L, Blocks) ->
    %% We KNOW that a loop header must start with a peek_message
    %% instruction.
    case map_get(L, Blocks) of
        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
        _ -> false
    end.

rel2fam(S0) ->
    S1 = sofs:relation(S0),
    S = sofs:rel2fam(S1),
    sofs:to_external(S).

split_phis(Is) ->
    splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).

is_yreg({y,_}) -> true;
is_yreg({x,_}) -> false;
is_yreg({z,_}) -> false;
is_yreg({fr,_}) -> false.

new_vars([Base|Vs0], Count0) ->
    {V,Count1} = new_var(Base, Count0),
    {Vs,Count} = new_vars(Vs0, Count1),
    {[V|Vs],Count};
new_vars([], Count) -> {[],Count}.

new_var({Base,Int}, Count)  ->
    true = is_integer(Int),                     %Assertion.
    {#b_var{name={Base,Count}},Count+1};
new_var(Base, Count) ->
    {#b_var{name={Base,Count}},Count+1}.