1 files changed, 2634 insertions, 0 deletions
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
new file mode 100644
index 0000000000..df4de8d7bd
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -0,0 +1,2634 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+%% Purpose: Prepare for code generation, including register allocation.
+%%
+%% The output of this compiler pass is still in the SSA format, but
+%% it has been annotated and transformed to help the code generator.
+%%
+%% * Some instructions are translated to other instructions closer to
+%% the BEAM instructions. For example, the binary matching
+%% instructions are transformed from the optimization-friendly
+%% internal format to instruction more similar to the actual BEAM
+%% instructions.
+%%
+%% * Blocks that will need an instruction for allocating a stack frame
+%% are annotated with a {frame_size,Size} annotation.
+%%
+%% * 'copy' instructions are added for all variables that need
+%% to be saved to the stack frame. Additional 'copy' instructions
+%% can be added as an optimization to reuse y registers (see
+%% the copy_retval sub pass).
+%%
+%% * Each function is annotated with a {register,RegisterMap}
+%% annotation that maps each variable to a BEAM register. The linear
+%% scan algorithm is used to allocate registers.
+%%
+%% There are four kind of registers. x, y, fr (floating point register),
+%% and z. A variable will be allocated to a z register if it is only
+%% used by the instruction following the instruction that defines the
+%% the variable. The code generator will typically combine those
+%% instructions to a test instruction. z registers are also used for
+%% some instructions that don't have a return value.
+%%
+%% References:
+%%
+%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
+%% in the context of SSA form and register constraints. In Proceedings
+%% of the International Conference on Compiler Construction, pages
+%% 229–246. LNCS 2304, Springer-Verlag, 2002.
+%%
+%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
+%% linear scan register allocator. In Proceedings of the ACM/USENIX
+%% International Conference on Virtual Execution Environments, pages
+%% 132–141. ACM Press, 2005.
+%%
+%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
+%% Form. In Proceedings of the International Symposium on Code
+%% Generation and Optimization, pages 170-179. ACM Press, 2010.
+%%
+
+-module(beam_ssa_pre_codegen).
+
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [all/2,any/2,append/1,duplicate/2,
+                foldl/3,last/1,map/2,member/2,partition/2,
+                reverse/1,reverse/2,sort/1,splitwith/2,zip/2]).
+
+-spec module(beam_ssa:b_module(), [compile:option()]) ->
+                    {'ok',beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, Opts) ->
+    UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
+    Ps = passes(Opts),
+    Fs = functions(Fs0, Ps, UseBSM3),
+    {ok,Module#b_module{body=Fs}}.
+
+functions([F|Fs], Ps, UseBSM3) ->
+    [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
+functions([], _Ps, _UseBSM3) -> [].
+
+-type b_var() :: beam_ssa:b_var().
+-type var_name() :: beam_ssa:var_name().
+-type instr_number() :: pos_integer().
+-type range() :: {instr_number(),instr_number()}.
+-type reg_num() :: beam_asm:reg_num().
+-type xreg() :: {'x',reg_num()}.
+-type yreg() :: {'y',reg_num()}.
+-type ypool() :: {'y',beam_ssa:label()}.
+-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
+                       ypool() | {yreg(),ypool()} | 'z'.
+-type ssa_register() :: beam_ssa_codegen:ssa_register().
+
+-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
+-record(st, {ssa :: beam_ssa:block_map(),
+             args :: [b_var()],
+             cnt :: beam_ssa:label(),
+             use_bsm3 :: boolean(),
+             frames=[] :: [beam_ssa:label()],
+             intervals=[] :: [{b_var(),[range()]}],
+             res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
+             regs=#{} :: #{b_var():=ssa_register()},
+             extra_annos=[] :: [{atom(),term()}]
+            }).
+-define(PASS(N), {N,fun N/1}).
+
+passes(Opts) ->
+    AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
+    FixTuples = proplists:get_bool(no_put_tuple2, Opts),
+    Ps = [?PASS(assert_no_critical_edges),
+
+          %% Preliminaries.
+          ?PASS(fix_bs),
+          ?PASS(sanitize),
+          case FixTuples of
+              false -> ignore;
+              true -> ?PASS(fix_tuples)
+          end,
+          ?PASS(place_frames),
+          ?PASS(fix_receives),
+
+          %% Find and reserve Y registers.
+          ?PASS(find_yregs),
+          ?PASS(reserve_yregs),
+
+          %% Handle legacy binary match instruction that don't
+          %% accept a Y register as destination.
+          ?PASS(legacy_bs),
+
+          %% Improve reuse of Y registers to potentially
+          %% reduce the size of the stack frame.
+          ?PASS(copy_retval),
+          ?PASS(opt_get_list),
+
+          %% Calculate live intervals.
+          ?PASS(number_instructions),
+          ?PASS(live_intervals),
+          ?PASS(reserve_regs),
+
+          %% If needed for a .precg file, save the live intervals
+          %% so they can be included in an annotation.
+          case AddPrecgAnnos of
+              false -> ignore;
+              true -> ?PASS(save_live_intervals)
+          end,
+
+          %% Allocate registers.
+          ?PASS(linear_scan),
+          ?PASS(frame_size),
+          ?PASS(turn_yregs)],
+    [P || P <- Ps, P =/= ignore].
+
+function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
+         Ps, UseBSM3) ->
+    try
+        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
+        St = compile:run_sub_passes(Ps, St0),
+        #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
+        F1 = add_extra_annos(F0, ExtraAnnos),
+        F = beam_ssa:add_anno(registers, Regs, F1),
+        F#b_function{bs=Blocks,cnt=Count}
+    catch
+        Class:Error:Stack ->
+            #{func_info:={_,Name,Arity}} = Anno,
+            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+            erlang:raise(Class, Error, Stack)
+    end.
+
+save_live_intervals(#st{intervals=Intervals}=St) ->
+    St#st{extra_annos=[{live_intervals,Intervals}]}.
+
+%% Add extra annotations when a .precg listing file is being produced.
+add_extra_annos(F, Annos) ->
+    foldl(fun({Name,Value}, Acc) ->
+                  beam_ssa:add_anno(Name, Value, Acc)
+          end, F, Annos).
+
+%% assert_no_critical_edges(St0) -> St.
+%%  The code generator will not work if there are critial edges.
+%%  Abort if any critical edges are found.
+
+assert_no_critical_edges(#st{ssa=Blocks}=St) ->
+    F = fun assert_no_ces/3,
+    beam_ssa:fold_rpo(F, Blocks, Blocks),
+    St.
+
+assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
+    %% This block has multiple predecessors. Make sure that none
+    %% of the precessors have more than one successor.
+    true = all(fun({_,P}) ->
+                       length(beam_ssa:successors(P, Blocks)) =:= 1
+               end, Phis),                      %Assertion.
+    Blocks;
+assert_no_ces(_, _, Blocks) -> Blocks.
+
+%% fix_bs(St0) -> St.
+%%  Fix up the binary matching instructions:
+%%
+%%    * Insert bs_save and bs_restore instructions where needed.
+%%
+%%    * Combine bs_match and bs_extract instructions to bs_get
+%%      instructions.
+
+fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
+    F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
+                %% Mark the root of the match context list.
+                [{Dst,{context,Dst}}|A];
+           (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
+                %% Link this match context the previous match context.
+                [{Dst,ParentCtx}|A];
+           (_, A) ->
+                A
+        end,
+    case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
+        [] ->
+            %% No binary matching in this function.
+            St;
+        [_|_]=M ->
+            CtxChain = maps:from_list(M),
+            Linear0 = beam_ssa:linearize(Blocks),
+
+            %% Insert position instructions where needed.
+            {Linear1,Count} = case UseBSM3 of
+                                  true ->
+                                      bs_pos_bsm3(Linear0, CtxChain, Count0);
+                                  false ->
+                                      bs_pos_bsm2(Linear0, CtxChain, Count0)
+                              end,
+
+            %% Rename instructions.
+            Linear = bs_instrs(Linear1, CtxChain, []),
+
+            St#st{ssa=maps:from_list(Linear),cnt=Count}
+    end.
+
+%% Insert bs_get_position and bs_set_position instructions as needed.
+bs_pos_bsm3(Linear0, CtxChain, Count0) ->
+    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+    Rs = maps:values(Rs0),
+    S0 = sofs:relation(Rs, [{context,save_point}]),
+    S1 = sofs:relation_to_family(S0),
+    S = sofs:to_external(S1),
+
+    {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
+    {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
+    {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),
+
+    %% Now insert all saves and restores.
+    {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.
+
+make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
+    SavePoint = get_savepoint(Ps, SavePoints),
+    I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
+    make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
+make_bs_setpos_map([], _, Count, Acc) ->
+    {maps:from_list(Acc),Count}.
+
+make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
+    Ignored = #b_var{name={'@ssa_ignored',Count}},
+    Args = [Ctx, get_savepoint(Ps, SavePoints)],
+    I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
+    make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
+make_bs_getpos_map([], _, Count, Acc) ->
+    {maps:from_list(Acc),Count}.
+
+get_savepoint({_,_}=Ps, SavePoints) ->
+    Name = {'@ssa_bs_position', map_get(Ps, SavePoints)},
+    #b_var{name=Name}.
+
+make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
+    {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
+    make_bs_pos_dict(T, Count, Acc);
+make_bs_pos_dict([], Count, Acc) ->
+    {maps:from_list(Acc), Count}.
+
+make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
+    make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_bs_pos_dict_1([], Ctx, I, Acc) ->
+    {[{Ctx,I}|Acc], I}.
+
+%% As bs_position but without OTP-22 instructions. This is only used when
+%% cross-compiling to older versions.
+bs_pos_bsm2(Linear0, CtxChain, Count0) ->
+    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
+    Rs = maps:values(Rs0),
+    S0 = sofs:relation(Rs, [{context,save_point}]),
+    S1 = sofs:relation_to_family(S0),
+    S = sofs:to_external(S1),
+    Slots = make_save_point_dict(S, []),
+    {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
+    {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),
+
+    %% Now insert all saves and restores.
+    {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.
+
+make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
+    Ignored = #b_var{name={'@ssa_ignored',Count}},
+    case make_slot(Ps, Slots) of
+        #b_literal{val=start} ->
+            make_save_map(T, Slots, Count, Acc);
+        Slot ->
+            I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
+            make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
+    end;
+make_save_map([], _, Count, Acc) ->
+    {maps:from_list(Acc),Count}.
+
+make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
+    Ignored = #b_var{name={'@ssa_ignored',Count}},
+    I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
+    make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
+make_restore_map([], _, Count, Acc) ->
+    {maps:from_list(Acc),Count}.
+
+make_slot({Same,Same}, _Slots) ->
+    #b_literal{val=start};
+make_slot({_,_}=Ps, Slots) ->
+    #b_literal{val=map_get(Ps, Slots)}.
+
+make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
+    Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
+    make_save_point_dict(T, Acc);
+make_save_point_dict([], Acc) ->
+    maps:from_list(Acc).
+
+make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
+    %% Special {atom,start} save point. Does not need a
+    %% bs_save instruction.
+    make_save_point_dict_1(T, Ctx, I, Acc);
+make_save_point_dict_1([H|T], Ctx, I, Acc) ->
+    make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
+make_save_point_dict_1([], Ctx, I, Acc) ->
+    [{Ctx,I}|Acc].
+
+bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
+    FPos = case D0 of
+               #{L:=Pos0} -> Pos0;
+               #{} -> #{}
+           end,
+    {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
+    D = bs_update_successors(Last, SPos, FPos, D0),
+    bs_restores(Bs, CtxChain, D, Rs);
+bs_restores([], _, _, Rs) -> Rs.
+
+bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
+    join_positions([{Succ,SPos},{Fail,FPos}], D);
+bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
+    Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
+    join_positions(Update, D);
+bs_update_successors(#b_ret{}, _, _, D) -> D.
+
+join_positions([{L,MapPos0}|T], D) ->
+    case D of
+        #{L:=MapPos0} ->
+            %% Same map.
+            join_positions(T, D);
+        #{L:=MapPos1} ->
+            %% Different maps.
+            MapPos = join_positions_1(MapPos0, MapPos1),
+            join_positions(T, D#{L:=MapPos});
+        #{} ->
+            join_positions(T, D#{L=>MapPos0})
+    end;
+join_positions([], D) -> D.
+
+join_positions_1(MapPos0, MapPos1) ->
+    MapPos2 = maps:map(fun(Start, Pos) ->
+                               case MapPos0 of
+                                   #{Start:=Pos} -> Pos;
+                                   #{Start:=_} -> unknown;
+                                   #{} -> Pos
+                               end
+                       end, MapPos1),
+    maps:merge(MapPos0, MapPos2).
+
+bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
+               CtxChain, PosMap0, Rs) ->
+    PosMap = PosMap0#{Start=>Start},
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
+               CtxChain, PosMap0, Rs0) ->
+    Start = bs_subst_ctx(NewPos, CtxChain),
+    [_,FromPos|_] = Args,
+    case PosMap0 of
+        #{Start:=FromPos} ->
+            %% Same position, no restore needed.
+            PosMap = case bs_match_type(I) of
+                         plain ->
+                             %% Update position to new position.
+                             PosMap0#{Start:=NewPos};
+                         _ ->
+                             %% Position will not change (test_unit
+                             %% instruction or no instruction at
+                             %% all).
+                             PosMap0#{Start:=FromPos}
+                     end,
+            bs_restores_is(Is, CtxChain, PosMap, Rs0);
+        #{Start:=_} ->
+            %% Different positions, might need a restore instruction.
+            case bs_match_type(I) of
+                none ->
+                    %% The tail test will be optimized away.
+                    %% No need to do a restore.
+                    PosMap = PosMap0#{Start:=FromPos},
+                    bs_restores_is(Is, CtxChain, PosMap, Rs0);
+                test_unit ->
+                    %% This match instruction will be replaced by
+                    %% a test_unit instruction. We will need a
+                    %% restore. The new position will be the position
+                    %% restored to (NOT NewPos).
+                    PosMap = PosMap0#{Start:=FromPos},
+                    Rs = Rs0#{NewPos=>{Start,FromPos}},
+                    bs_restores_is(Is, CtxChain, PosMap, Rs);
+                plain ->
+                    %% Match or skip. Position will be changed.
+                    PosMap = PosMap0#{Start:=NewPos},
+                    Rs = Rs0#{NewPos=>{Start,FromPos}},
+                    bs_restores_is(Is, CtxChain, PosMap, Rs)
+            end
+    end;
+bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
+               CtxChain, PosMap, Rs) ->
+    Start = bs_subst_ctx(FromPos, CtxChain),
+    #{Start:=FromPos} = PosMap,                 %Assertion.
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
+               CtxChain, PosMap0, Rs0) ->
+    {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+    PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
+    %% We can land here from any point, so all positions are invalid.
+    PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
+               CtxChain, PosMap0, Rs0)
+  when Op =:= bs_test_tail;
+       Op =:= bs_get_tail ->
+    {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
+    bs_restores_is(Is, CtxChain, PosMap, Rs);
+bs_restores_is([], _CtxChain, PosMap, Rs) ->
+    {PosMap,Rs}.
+
+bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
+                             #b_literal{val=binary},_Flags,
+                             #b_literal{val=all},#b_literal{val=U}]}) ->
+    case U of
+        1 -> none;
+        _ -> test_unit
+    end;
+bs_match_type(_) ->
+    plain.
+
+%% Call instructions leave the match position in an undefined state,
+%% requiring us to invalidate each affected argument.
+bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
+    Start = bs_subst_ctx(Arg, CtxChain),
+    case PosMap0 of
+        #{Start:=_} ->
+            PosMap = PosMap0#{Start:=unknown},
+            bs_invalidate_pos(Args, PosMap, CtxChain);
+        #{} ->
+            %% Not a match context.
+            bs_invalidate_pos(Args, PosMap0, CtxChain)
+    end;
+bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
+    bs_invalidate_pos(Args, PosMap, CtxChain);
+bs_invalidate_pos([], PosMap, _CtxChain) ->
+    PosMap.
+
+bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
+    Start = bs_subst_ctx(Arg, CtxChain),
+    case PosMap0 of
+        #{Start:=Arg} ->
+            %% Same position, no restore needed.
+            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
+        #{Start:=_} ->
+            %% Different positions, need a restore instruction.
+            PosMap = PosMap0#{Start:=Arg},
+            Rs = Rs0#{Dst=>{Start,Arg}},
+            bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+        #{} ->
+            %% Not a match context.
+            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
+    end;
+bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
+    bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
+bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
+    {Rs,PosMap}.
+
+%% Insert all bs_save and bs_restore instructions.
+
+bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
+    bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).
+
+bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
+    %% The old instructions require bs_start_match to be annotated with the
+    %% number of position slots it needs.
+    bs_insert_1(Blocks, Saves, Restores, SavePoints,
+                fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
+                        NumSlots = case SavePoints of
+                                       #{Dst:=NumSlots0} -> NumSlots0;
+                                       #{} -> 0
+                                   end,
+                        beam_ssa:add_anno(num_slots, NumSlots, I0);
+                   (I) ->
+                        I
+                end).
+
+bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
+    Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
+    Bs = bs_insert_saves(Is, Bs0, Saves),
+    [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
+bs_insert_1([], _, _, _, _) -> [].
+
+bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
+    I = XFrm(I0),
+    if
+        Op =:= bs_test_tail;
+        Op =:= bs_get_tail;
+        Op =:= bs_match;
+        Op =:= call ->
+            Rs = case Restores of
+                     #{Dst:=R} -> [R];
+                     #{} -> []
+                 end,
+            Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
+        true ->
+            [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
+    end;
+bs_insert_is_1([], _, _, _) -> [].
+
+bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
+    case Saves of
+        #{Dst:=S} ->
+            bs_insert_save(S, Bs);
+        #{} ->
+            bs_insert_saves(Is, Bs, Saves)
+    end;
+bs_insert_saves([], Bs, _) -> Bs.
+
+bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
+    Is = case Is0 of
+             [#b_set{op=bs_extract}=Ex|Is1] ->
+                 [Ex,Save|Is1];
+             _ ->
+                 [Save|Is0]
+         end,
+    [{L,Blk#b_blk{is=Is}}|Bs].
+
+%% Translate bs_match instructions to bs_get, bs_match_string,
+%% or bs_skip. Also rename match context variables to use the
+%% variable assigned to by the start_match instruction.
+
+bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
+    case bs_instrs_is(Is0, CtxChain, []) of
+        [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
+            %% Drop this instruction. Rewrite the corresponding
+            %% bs_match instruction in the previous block to
+            %% a bs_get instruction.
+            Acc = bs_combine(Dst, Ctx, Acc0),
+            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
+        Is ->
+            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
+    end;
+bs_instrs([], _, Acc) ->
+    reverse(Acc).
+
+bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
+    Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
+    I1 = I0#b_set{args=Args},
+    I = case {Op,Args} of
+            {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
+                I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
+            {bs_match,[#b_literal{val=string},Ctx|As]} ->
+                I1#b_set{op=bs_match_string,args=[Ctx|As]};
+            {bs_get_tail,[Ctx|As]} ->
+                I1#b_set{op=bs_get_tail,args=[Ctx|As]};
+            {_,_} ->
+                I1
+        end,
+    bs_instrs_is(Is, CtxChain, [I|Acc]);
+bs_instrs_is([], _, Acc) ->
+    reverse(Acc).
+
+%% Combine a bs_match instruction with the destination register
+%% taken from a bs_extract instruction.
+
+bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
+    [#b_set{}=Succeeded,
+     #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
+    Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
+                       Succeeded#b_set{args=[Dst]}]),
+    [{L,Blk#b_blk{is=Is}}|Acc].
+
+bs_subst_ctx(#b_var{}=Var, CtxChain) ->
+    case CtxChain of
+        #{Var:={context,Ctx}} ->
+            Ctx;
+        #{Var:=ParentCtx} ->
+            bs_subst_ctx(ParentCtx, CtxChain);
+        #{} ->
+            %% Not a match context variable.
+            Var
+    end;
+bs_subst_ctx(Other, _CtxChain) ->
+    Other.
+
+%% legacy_bs(St0) -> St.
+%%  Binary matching instructions in OTP 21 and earlier don't support
+%%  a Y register as destination. If St#st.use_bsm3 is false,
+%%  we will need to rewrite those instructions so that the result
+%%  is first put in an X register and then moved to a Y register
+%%  if the operation succeeded.
+
+legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
+    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
+    Linear0 = beam_ssa:linearize(Blocks0),
+    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
+    Blocks = maps:from_list(Linear),
+    St#st{ssa=Blocks,cnt=Count};
+legacy_bs(#st{use_bsm3=true}=St) -> St.
+
+legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
+    #b_blk{is=Is0,last=Last} = Blk,
+    Is1 = case Copies0 of
+              #{L:=Copy} -> [Copy|Is0];
+              #{} -> Is0
+          end,
+    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
+    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
+legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
+    {Acc,Count}.
+
+legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
+              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
+             Last, IsYreg, Count0, Copies0, Acc) ->
+    NeedsFix = is_map_key(Dst, IsYreg) andalso
+        case Op of
+            bs_get -> true;
+            bs_init -> true;
+            _ -> false
+        end,
+    case NeedsFix of
+        true ->
+            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
+            Count = Count0 + 1,
+            I = I0#b_set{dst=TempDst},
+            SuccI = SuccI0#b_set{args=[TempDst]},
+            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
+            #b_br{bool=SuccDst,succ=SuccL} = Last,
+            Copies = Copies0#{SuccL=>Copy},
+            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
+        false ->
+            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
+    end;
+legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
+    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
+legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
+    {reverse(Acc),Count,Copies}.
+
+%% sanitize(St0) -> St.
+%%  Remove constructs that can cause problems later:
+%%
+%%  * Unreachable blocks may cause problems for determination of
+%%  dominators.
+%%
+%%  * Some instructions (such as get_hd) don't accept literal
+%%  arguments. Evaluate the instructions and remove them.
+
+sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
+    Ls = beam_ssa:rpo(Blocks0),
+    {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
+    St#st{ssa=Blocks,cnt=Count}.
+
+sanitize([L|Ls], Count0, Blocks0, Values0) ->
+    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks0),
+    case sanitize_is(Is0, Count0, Values0, false, []) of
+        no_change ->
+            sanitize(Ls, Count0, Blocks0, Values0);
+        {Is,Count,Values} ->
+            Blk = Blk0#b_blk{is=Is},
+            Blocks = Blocks0#{L:=Blk},
+            sanitize(Ls, Count, Blocks, Values)
+    end;
+sanitize([], Count, Blocks0, Values) ->
+    Blocks = if
+                 map_size(Values) =:= 0 ->
+                     Blocks0;
+                 true ->
+                     beam_ssa:rename_vars(Values, [0], Blocks0)
+             end,
+
+    %% Unreachable blocks can cause problems for the dominator calculations.
+    Ls = beam_ssa:rpo(Blocks),
+    Reachable = gb_sets:from_list(Ls),
+    {case map_size(Blocks) =:= gb_sets:size(Reachable) of
+         true -> Blocks;
+         false -> remove_unreachable(Ls, Blocks, Reachable, [])
+     end,Count}.
+
+sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
+            Count0, Values, Changed, Acc) ->
+    case sanitize_args(Args0, Values) of
+        [#b_literal{}=Map,Key] ->
+            %% Bind the literal map to a variable.
+            {MapVar,Count} = new_var('@ssa_map', Count0),
+            I = I0#b_set{args=[MapVar,Key]},
+            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
+            sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+        [_,_]=Args0 ->
+            sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
+        [_,_]=Args ->
+            I = I0#b_set{args=Args},
+            sanitize_is(Is, Count0, Values, Changed, [I|Acc])
+    end;
+sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
+            Count, Values, Changed0, Acc) ->
+    Args = sanitize_args(Args0, Values),
+    case sanitize_instr(Op, Args, I0) of
+        {value,Value0} ->
+            Value = #b_literal{val=Value0},
+            sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
+        {ok,I} ->
+            sanitize_is(Is0, Count, Values, true, [I|Acc]);
+        ok ->
+            I = I0#b_set{args=Args},
+            Changed = Changed0 orelse Args =/= Args0,
+            sanitize_is(Is0, Count, Values, Changed, [I|Acc])
+    end;
+sanitize_is([], Count, Values, Changed, Acc) ->
+    case Changed of
+        true ->
+            {reverse(Acc),Count,Values};
+        false ->
+            no_change
+    end.
+
+sanitize_args(Args, Values) ->
+    map(fun(Var) ->
+                case Values of
+                    #{Var:=New} -> New;
+                    #{} -> Var
+                end
+        end, Args).
+
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
+    case erl_bifs:is_pure(erlang, Bif, 1) of
+        false ->
+            ok;
+        true ->
+            try
+                {value,erlang:Bif(Lit)}
+            catch
+                error:_ ->
+                    ok
+            end
+    end;
+sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
+    true = erl_bifs:is_pure(erlang, Bif, 2),    %Assertion.
+    try
+        {value,erlang:Bif(Lit1, Lit2)}
+    catch
+        error:_ ->
+            ok
+    end;
+sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
+    {value,Hd};
+sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
+    {value,Tl};
+sanitize_instr(get_tuple_element, [#b_literal{val=T},
+                                   #b_literal{val=I}], _I)
+  when I < tuple_size(T) ->
+    {value,element(I+1, T)};
+sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
+    {value,case Lit of
+               [_|_] -> true;
+               _ -> false
+           end};
+sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
+                                 #b_literal{val=Arity},
+                                 #b_literal{val=Tag}], _I)
+  when is_integer(Arity), is_atom(Tag) ->
+    if
+        tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
+            {value,true};
+        true ->
+            {value,false}
+    end;
+sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
+    if
+        is_integer(Sz), Sz >= 0 -> ok;
+        true -> {ok,sanitize_badarg(I0)}
+    end;
+sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
+    if
+        is_integer(Sz), Sz >= 0 -> ok;
+        true -> {ok,sanitize_badarg(I0)}
+    end;
+sanitize_instr(succeeded, [#b_literal{}], _I) ->
+    {value,true};
+sanitize_instr(_, _, _) -> ok.
+
+sanitize_badarg(I) ->
+    Func = #b_remote{mod=#b_literal{val=erlang},
+                     name=#b_literal{val=error},arity=1},
+    I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
+
+remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
+    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
+    case split_phis(Is0) of
+        {[_|_]=Phis,Rest} ->
+            Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
+            Blk = Blk0#b_blk{is=Is},
+            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
+        {[],_} ->
+            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
+    end;
+remove_unreachable([], _Blocks, _, Acc) ->
+    maps:from_list(Acc).
+
+prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
+    Args = [A || {_,Pred}=A <- Args0,
+                 gb_sets:is_element(Pred, Reachable)],
+    Phi#b_set{args=Args}.
+
+%%%
+%%% Fix tuples.
+%%%
+
+%% fix_tuples(St0) -> St.
+%%  If compatibility with a previous version of Erlang has been
+%%  requested, tuple creation must be split into two instruction to
+%%  mirror the the way tuples are created in BEAM prior to OTP 22.
+%%  Each put_tuple instruction is split into put_tuple_arity followed
+%%  by put_tuple_elements.
+
+fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
+    F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
+                Arity = #b_literal{val=length(Args)},
+                {Ignore,C} = new_var('@ssa_ignore', C0),
+                {[Put#b_set{op=put_tuple_arity,args=[Arity]},
+                  #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
+           (I, C) -> {[I],C}
+        end,
+    {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
+    St#st{ssa=Blocks,cnt=Count}.
+
+%%%
+%%% Find out where frames should be placed.
+%%%
+
+%% place_frames(St0) -> St.
+%%   Return a list of the labels for the blocks that need stack frame
+%%   allocation instructions.
+%%
+%%   This function attempts to place stack frames as tight as possible
+%%   around the code, to avoid building stack frames for code paths
+%%   that don't need one.
+%%
+%%   Stack frames are placed in blocks that dominate all of their
+%%   descendants. That guarantees that the deallocation instructions
+%%   cannot be reached from other execution paths that didn't set up
+%%   a stack frame or set up a stack frame with a different size.
+
+place_frames(#st{ssa=Blocks}=St) ->
+    {Doms,_} = beam_ssa:dominators(Blocks),
+    Ls = beam_ssa:rpo(Blocks),
+    Tried = gb_sets:empty(),
+    Frames0 = [],
+    {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
+    St#st{frames=Frames}.
+
+place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
+    Blk = map_get(L, Blocks),
+    case need_frame(Blk) of
+        true ->
+            %% This block needs a frame. Try to place it here.
+            {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),
+
+            %% Successfully placed. Try to place more frames in descendants
+            %% that are not dominated by this block.
+            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+        false ->
+            try
+                place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
+            catch
+                throw:{need_frame,For,Tried1}=Reason ->
+                    %% An descendant block needs a stack frame. Try to
+                    %% place it here.
+                    case is_dominated_by(For, L, Doms) of
+                        true ->
+                            %% Try to place a frame here.
+                            {Frames,Tried} = do_place_frame(L, Blocks, Doms,
+                                                            Tried1, Frames0),
+                            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
+                        false ->
+                            %% Wrong place. This block does not dominate
+                            %% the block that needs the frame. Pass it on
+                            %% to our ancestors.
+                            throw(Reason)
+                    end
+            end
+    end;
+place_frames_1([], _, _, Tried, Frames) ->
+    {Frames,Tried}.
+
+%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
+%%  Try to place a frame in this block. This function returns
+%%  successfully if it either succeds at placing a frame in this
+%%  block, if an ancestor that dominates this block has already placed
+%%  a frame, or if we have already tried to put a frame in this block.
+%%
+%%  An {need_frame,Label,Tried} exception will be thrown if this block
+%%  block is not suitable for having a stack frame (i.e. it does not dominate
+%%  all of its descendants). The exception means that an ancestor will have to
+%%  place the frame needed by this block.
+
+do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
+    case gb_sets:is_element(L, Tried0) of
+        true ->
+            %% We have already tried to put a frame in this block.
+            {Frames,Tried0};
+        false ->
+            %% Try to place a frame in this block.
+            Tried = gb_sets:insert(L, Tried0),
+            case place_frame_here(L, Blocks, Doms, Frames) of
+                yes ->
+                    %% We need a frame and it is safe to place it here.
+                    {[L|Frames],Tried};
+                no ->
+                    %% An ancestor has a frame. Not needed.
+                    {Frames,Tried};
+                ancestor ->
+                    %% This block does not dominate all of its
+                    %% descendants. We must place the frame in
+                    %% an ancestor.
+                    throw({need_frame,L,Tried})
+            end
+    end.
+
+%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
+%%  Determine whether a frame should be placed in block Label.
+
+place_frame_here(L, Blocks, Doms, Frames) ->
+    B0 = any(fun(DomBy) ->
+                     is_dominated_by(L, DomBy, Doms)
+             end, Frames),
+    case B0 of
+        true ->
+            %% This block is dominated by an ancestor block that
+            %% defines a frame. Not needed/allowed to put a frame
+            %% here.
+            no;
+        false ->
+            %% No frame in any ancestor. We need a frame.
+            %% Now check whether the frame can be placed here.
+            %% If this block dominates all of its descendants
+            %% and the predecessors of any phi nodes it can be
+            %% placed here.
+            Descendants = beam_ssa:rpo([L], Blocks),
+            PhiPredecessors = phi_predecessors(L, Blocks),
+            MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
+            Dominates = all(fun(?BADARG_BLOCK) ->
+                                    %% This block defines no variables and calls
+                                    %% erlang:error(badarg). It does not matter
+                                    %% whether L dominates ?BADARG_BLOCK or not;
+                                    %% it is still safe to put the frame in L.
+                                    true;
+                               (Bl) ->
+                                    is_dominated_by(Bl, L, Doms)
+                            end, MustDominate),
+
+            %% Also, this block must not be a loop header.
+            IsLoopHeader = is_loop_header(L, Blocks),
+            case Dominates andalso not IsLoopHeader of
+                true -> yes;
+                false -> ancestor
+            end
+    end.
+
+%% phi_predecessors(Label, Blocks) ->
+%%  Return all predecessors referenced in phi nodes.
+
+phi_predecessors(L, Blocks) ->
+    #b_blk{is=Is} = map_get(L, Blocks),
+    [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].
+
+%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
+%%  Test whether block Label is dominated by block DominatedBy.
+
+is_dominated_by(L, DomBy, Doms) ->
+    DominatedBy = map_get(L, Doms),
+    member(DomBy, DominatedBy).
+
+%% need_frame(#b_blk{}) -> true|false.
+%%  Test whether any of the instructions in the block requires a stack frame.
+
+need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
+    need_frame_1(Is, {return,Ret});
+need_frame(#b_blk{is=Is}) ->
+    need_frame_1(Is, body).
+
+need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
+    %% Since make_fun clobbers X registers, a stack frame is needed if
+    %% any of the following instructions use any other variable than
+    %% the one holding the reference to the created fun.
+    need_frame_1(Is, Context) orelse
+        case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
+            [Fun] -> false;
+            [_|_] -> true
+        end;
+need_frame_1([#b_set{op=new_try_tag}|_], _) ->
+    true;
+need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
+    if
+        Val =:= Ret -> need_frame_1(Is, tail);
+        true -> need_frame_1(Is, body)
+    end;
+need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
+    case Func of
+        #b_remote{mod=#b_literal{val=Mod},
+                  name=#b_literal{val=Name},
+                  arity=Arity} when is_atom(Mod), is_atom(Name) ->
+            case erl_bifs:is_exit_bif(Mod, Name, Arity) of
+                true ->
+                    false;
+                false ->
+                    Context =:= body orelse
+                        Is =/= [] orelse
+                        is_trap_bif(Mod, Name, Arity)
+                end;
+        #b_remote{} ->
+            %% This is an apply(), which always needs a frame.
+            true;
+        #b_local{} ->
+            Context =:= body orelse Is =/= [];
+        _ ->
+             %% A fun call always needs a frame.
+            true
+    end;
+need_frame_1([I|Is], Context) ->
+    beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
+need_frame_1([], _) -> false.
+
+%% is_trap_bif(Mod, Name, Arity) -> true|false.
+%%   Test whether we need a stack frame for this BIF.
+
+is_trap_bif(erlang, '!', 2) -> true;
+is_trap_bif(erlang, link, 1) -> true;
+is_trap_bif(erlang, unlink, 1) -> true;
+is_trap_bif(erlang, monitor_node, 2) -> true;
+is_trap_bif(erlang, group_leader, 2) -> true;
+is_trap_bif(erlang, exit, 2) -> true;
+is_trap_bif(_, _, _) -> false.
+
+%%%
+%%% Fix variables used in matching in receive.
+%%%
+%%% The loop_rec/2 instruction may return a reference to a
+%%% message outside of any heap or heap fragment. If the message
+%%% does not match, it is not allowed to store any reference to
+%%% the message (or part of the message) on the stack. If we do,
+%%% the message will be corrupted if there happens to be a GC.
+%%%
+%%% Here we make sure to introduce copies of variables that are
+%%% matched out and subsequently used after the remove_message/0
+%%% instructions. That will make sure that only X registers are
+%%% used during matching.
+%%%
+%%% Depending on where variables are defined and used, they must
+%%% be handled in two different ways.
+%%%
+%%% Variables that are always defined in the receive (before branching
+%%% out into the different clauses of the receive) and used after the
+%%% receive must be handled in the following way: Before each
+%%% remove_message instruction, each such variable must be copied, and
+%%% all variables must be consolidated using a phi node in the
+%%% common exit block for the receive.
+%%%
+%%% Variables that are matched out and used in the same clause
+%%% need copy instructions before the remove_message instruction
+%%% in that clause.
+%%%
+
+fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
+    {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
+                                    Blocks0, Count0),
+    St#st{ssa=Blocks,cnt=Count}.
+
+fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
+    case Blk of
+        #b_blk{is=[#b_set{op=peek_message}|_]} ->
+            Rm = find_rm_blocks(L, Blocks0),
+            LoopExit = find_loop_exit(Rm, Blocks0),
+            Defs0 = beam_ssa:def([L], Blocks0),
+            CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
+            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
+                                               Blocks0, Count0),
+            Defs = ordsets:subtract(Defs0, CommonUsed),
+            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
+            fix_receives_1(Ls, Blocks, Count);
+        #b_blk{} ->
+            fix_receives_1(Ls, Blocks0, Count0)
+    end;
+fix_receives_1([], Blocks, Count) ->
+    {Blocks,Count}.
+
+recv_common(_Defs, none, _Blocks) ->
+    %% There is no common exit block because receive is used
+    %% in the tail position of a function.
+    [];
+recv_common(Defs, Exit, Blocks) ->
+    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+    Def = ordsets:subtract(Defs, ExitDefs),
+    ordsets:intersection(Def, ExitUsed).
+
+%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
+%%                 Blocks0, Count0) -> {Blocks,Count}.
+%%  Handle variables alwys defined in a receive and used
+%%  in the exit block following the receive.
+
+recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
+    {Msg,Count1} = new_var('@recv', Count0),
+    Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
+    N = length(Rm),
+    {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
+    PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
+    Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
+    ExitBlk0 = map_get(Exit, Blocks1),
+    ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
+    Blocks2 = Blocks1#{Exit:=ExitBlk},
+    Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
+    recv_fix_common(T, Exit, Rm, Blocks, Count);
+recv_fix_common([], _, _, Blocks, Count) ->
+    {Blocks,Count}.
+
+recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
+    Ren = #{Msg=>V},
+    Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
+    #b_blk{is=Is0} = Blk0 = map_get(Rm, Blocks1),
+    Copy = #b_set{op=copy,dst=V,args=[Msg]},
+    Is = insert_after_phis(Is0, [Copy]),
+    Blk = Blk0#b_blk{is=Is},
+    Blocks = Blocks1#{Rm:=Blk},
+    recv_fix_common_1(Vs, Rms, Msg, Blocks);
+recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.
+
+fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
+    Path = beam_ssa:rpo([Rm], Blocks),
+    Preds = exit_predecessors(Path, Exit, Blocks),
+    [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
+fix_exit_phi_args([], [], _, _) -> [].
+
+exit_predecessors([L|Ls], Exit, Blocks) ->
+    Blk = map_get(L, Blocks),
+    case member(Exit, beam_ssa:successors(Blk)) of
+        true ->
+            [L|exit_predecessors(Ls, Exit, Blocks)];
+        false ->
+            exit_predecessors(Ls, Exit, Blocks)
+    end;
+exit_predecessors([], _Exit, _Blocks) -> [].
+
+%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
+%%  Add a copy instruction for all variables that are matched out and
+%%  later used within a clause of the receive.
+
+fix_receive([L|Ls], Defs, Blocks0, Count0) ->
+    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+    Def = ordsets:subtract(Defs, RmDefs),
+    Used = ordsets:intersection(Def, Used0),
+    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
+    Ren = zip(Used, NewVars),
+    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+    #b_blk{is=Is0} = Blk1 = map_get(L, Blocks1),
+    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
+    Is = insert_after_phis(Is0, CopyIs),
+    Blk = Blk1#b_blk{is=Is},
+    Blocks = Blocks1#{L:=Blk},
+    fix_receive(Ls, Defs, Blocks, Count);
+fix_receive([], _Defs, Blocks, Count) ->
+    {Blocks,Count}.
+
+%% find_loop_exit([Label], Blocks) -> Label | none.
+%%  Find the block to which control is transferred when the
+%%  the receive loop is exited.
+
+find_loop_exit([L1,L2|_Ls], Blocks) ->
+    Path1 = beam_ssa:rpo([L1], Blocks),
+    Path2 = beam_ssa:rpo([L2], Blocks),
+    find_loop_exit_1(reverse(Path1), reverse(Path2), none);
+find_loop_exit(_, _) -> none.
+
+find_loop_exit_1([H|T1], [H|T2], _) ->
+    find_loop_exit_1(T1, T2, H);
+find_loop_exit_1(_, _, Exit) -> Exit.
+
+%% find_rm_blocks(StartLabel, Blocks) -> [Label].
+%%  Find all blocks that start with remove_message within the receive
+%%  loop whose peek_message label is StartLabel.
+
+find_rm_blocks(L, Blocks) ->
+    Seen = gb_sets:singleton(L),
+    Blk = map_get(L, Blocks),
+    Succ = beam_ssa:successors(Blk),
+    find_rm_blocks_1(Succ, Seen, Blocks).
+
+find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
+    case gb_sets:is_member(L, Seen0) of
+        true ->
+            find_rm_blocks_1(Ls, Seen0, Blocks);
+        false ->
+            Seen = gb_sets:insert(L, Seen0),
+            Blk = map_get(L, Blocks),
+            case find_rm_act(Blk#b_blk.is) of
+                prune ->
+                    %% Looping back. Don't look at any successors.
+                    find_rm_blocks_1(Ls, Seen, Blocks);
+                continue ->
+                    %% Neutral block. Do nothing here, but look at
+                    %% all successors.
+                    Succ = beam_ssa:successors(Blk),
+                    find_rm_blocks_1(Succ++Ls, Seen, Blocks);
+                found ->
+                    %% Found remove_message instruction.
+                    [L|find_rm_blocks_1(Ls, Seen, Blocks)]
+            end
+    end;
+find_rm_blocks_1([], _, _) -> [].
+
+find_rm_act([#b_set{op=Op}|Is]) ->
+    case Op of
+        remove_message -> found;
+        peek_message -> prune;
+        recv_next -> prune;
+        wait_timeout -> prune;
+        wait -> prune;
+        _ -> find_rm_act(Is)
+    end;
+find_rm_act([]) ->
+    continue.
+
+%%%
+%%% Find out which variables need to be stored in Y registers.
+%%%
+
+-record(dk, {d :: ordsets:ordset(var_name()),
+             k :: ordsets:ordset(var_name())
+            }).
+
+%% find_yregs(St0) -> St.
+%%  Find all variables that must be stored in Y registers. Annotate
+%%  the blocks that allocate frames with the set of Y registers
+%%  used within that stack frame.
+%%
+%%  Basically, we following all execution paths starting from a block
+%%  that allocates a frame, keeping track of of all defined registers
+%%  and all registers killed by an instruction that clobbers X
+%%  registers. For every use of a variable, we check if if it is in
+%%  the set of killed variables; if it is, it must be stored in an Y
+%%  register.
+
+find_yregs(#st{frames=[]}=St) ->
+    St;
+find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
+    FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
+    Blocks = find_yregs_1(FrameDefs, Blocks0),
+    St#st{ssa=Blocks}.
+
+find_yregs_1([{F,Defs}|Fs], Blocks0) ->
+    DK = #dk{d=Defs,k=[]},
+    D0 = #{F=>DK},
+    Ls = beam_ssa:rpo([F], Blocks0),
+    Yregs0 = [],
+    Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
+    Blk0 = map_get(F, Blocks0),
+    Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
+    Blocks = Blocks0#{F:=Blk},
+    find_yregs_1(Fs, Blocks);
+find_yregs_1([], Blocks) -> Blocks.
+
+find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
+    Blk0 = map_get(L, Blocks0),
+    #b_blk{is=Is,last=Last} = Blk0,
+    Ys0 = map_get(L, D0),
+    {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
+    Yregs = find_yregs_terminator(Last, Ys, Yregs1),
+    Successors = beam_ssa:successors(Blk0),
+    D = find_update_succ(Successors, Ys, D0),
+    find_yregs_2(Ls, Blocks0, D, Yregs);
+find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.
+
+find_defs(Frames, Blocks, Defs) ->
+    Seen = gb_sets:empty(),
+    FramesSet = gb_sets:from_list(Frames),
+    {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
+    FrameDefs.
+
+find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
+    case gb_sets:is_member(L, Frames) of
+        true ->
+            OrderedDefs = ordsets:from_list(Defs0),
+            find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
+                        [{L,OrderedDefs}|Acc0]);
+        false ->
+            case gb_sets:is_member(L, Seen0) of
+                true ->
+                    find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
+                false ->
+                    Seen1 = gb_sets:insert(L, Seen0),
+                    {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
+                    #b_blk{is=Is} = Blk = map_get(L, Blocks),
+                    Defs = find_defs_is(Is, Defs0),
+                    Successors = beam_ssa:successors(Blk),
+                    find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
+            end
+    end;
+find_defs_1([], _, _, Seen, _, Acc) ->
+    {Acc,Seen}.
+
+find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
+    find_defs_is(Is, [Dst|Acc]);
+find_defs_is([], Acc) -> Acc.
+
+find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
+    case D0 of
+        #{S:=#dk{d=Defs1,k=Killed1}} ->
+            Defs = ordsets:intersection(Defs0, Defs1),
+            Killed = ordsets:union(Killed0, Killed1),
+            DK = #dk{d=Defs,k=Killed},
+            D = D0#{S:=DK},
+            find_update_succ(Ss, DK0, D);
+        #{} ->
+            D = D0#{S=>DK0},
+            find_update_succ(Ss, DK0, D)
+    end;
+find_update_succ([], _, D) -> D.
+
+find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
+    Used = beam_ssa:used(I),
+    Yregs1 = ordsets:intersection(Used, Killed0),
+    Yregs = ordsets:union(Yregs0, Yregs1),
+    case beam_ssa:clobbers_xregs(I) of
+        false ->
+            Defs = ordsets:add_element(Dst, Defs0),
+            find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
+        true ->
+            Killed = ordsets:union(Defs0, Killed0),
+            Defs = [Dst],
+            find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
+    end;
+find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.
+
+find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
+    Used = beam_ssa:used(Terminator),
+    Yregs = ordsets:intersection(Used, Killed),
+    ordsets:union(Yregs0, Yregs).
+
+%%%
+%%% Try to reduce the size of the stack frame, by adding an explicit
+%%% 'copy' instructions for return values from 'call' and 'make_fun' that
+%%% need to be saved in Y registers. Here is an example to show
+%%% how that's useful. First, here is the Erlang code:
+%%%
+%%% f(Pid) ->
+%%%    Res = foo(42),
+%%%    _ = node(Pid),
+%%%    bar(),
+%%%    Res.
+%%%
+%%% Compiled to SSA format, the main part of the code looks like this:
+%%%
+%%% 0:
+%%%   Res = call local literal foo/1, literal 42
+%%%   _1 = bif:node Pid
+%%%   @ssa_bool = succeeded _1
+%%%   br @ssa_bool, label 3, label 1
+%%% 3:
+%%%   @ssa_ignored = call local literal bar/0
+%%%   ret Res
+%%%
+%%% It can be seen that the variables Pid and Res must be saved in Y
+%%% registers in order to survive the function calls. A previous sub
+%%% pass has inserted a 'copy' instruction to save the value of the
+%%% variable Pid:
+%%%
+%%% 0:
+%%%   Pid:4 = copy Pid
+%%%   Res = call local literal foo/1, literal 42
+%%%   _1 = bif:node Pid:4
+%%%   @ssa_bool = succeeded _1
+%%%   br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%%   @ssa_ignored = call local literal bar/0
+%%%   ret Res
+%%%
+%%% The Res and Pid:4 variables must be assigned to different Y registers
+%%% because they are live at the same time. copy_retval() inserts a
+%%% 'copy' instruction to copy Res to a new variable:
+%%%
+%%% 0:
+%%%   Pid:4 = copy Pid
+%%%   Res:6 = call local literal foo/1, literal 42
+%%%   _1 = bif:node Pid:4
+%%%   @ssa_bool = succeeded _1
+%%%   br @ssa_bool, label 3, label 1
+%%%
+%%% 3:
+%%%   Res = copy Res:6
+%%%   @ssa_ignored = call local literal bar/0
+%%%   ret Res
+%%%
+%%% The new variable Res:6 is used to capture the return value from the call.
+%%% The variables Pid:4 and Res are no longer live at the same time, so they
+%%% can be assigned to the same Y register.
+%%%
+
+copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
+    {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
+    St#st{ssa=Blocks,cnt=Count}.
+
+copy_retval_1([F|Fs], Blocks0, Count0) ->
+    #b_blk{anno=#{yregs:=Yregs0},is=Is} = map_get(F, Blocks0),
+    Yregs1 = gb_sets:from_list(Yregs0),
+    Yregs = collect_yregs(Is, Yregs1),
+    Ls = beam_ssa:rpo([F], Blocks0),
+    {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
+    copy_retval_1(Fs, Blocks, Count);
+copy_retval_1([], Blocks, Count) ->
+    {Blocks,Count}.
+
+collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
+              Yregs0) ->
+    true = gb_sets:is_member(X, Yregs0),        %Assertion.
+    Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
+    collect_yregs(Is, Yregs);
+collect_yregs([#b_set{}|Is], Yregs) ->
+    collect_yregs(Is, Yregs);
+collect_yregs([], Yregs) -> Yregs.
+
+copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
+    #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
+    RC = case {Last,Ls} of
+             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
+                 true;
+             {_,_} ->
+                 false
+         end,
+    case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
+        {Is,Count} ->
+            case Copy0 =:= none andalso Count0 =:= Count of
+                true ->
+                    copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
+                false ->
+                    Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+                    copy_retval_2(Ls, Yregs, none, Blocks, Count)
+            end;
+        {Is,Count,Copy} ->
+            Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
+            copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
+    end;
+copy_retval_2([], _Yregs, none, Blocks, Count) ->
+    {Blocks,Count}.
+
+copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
+           Copy, Count, Acc) ->
+    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+    {reverse(Acc, [I|acc_copy([], Copy)]),Count};
+copy_retval_is([#b_set{op=Op}=I0], false, Yregs, Copy, Count0, Acc0)
+  when Op =:= call; Op =:= make_fun ->
+    {I,Count,Acc} = place_retval_copy(I0, Yregs, Copy, Count0, Acc0),
+    {reverse(Acc, [I]),Count};
+copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
+    {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
+    {reverse(Acc, acc_copy(Is, Copy)),Count};
+copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
+           Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
+    {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
+    case gb_sets:is_member(Dst, Yregs) of
+        true ->
+            {NewVar,Count} = new_var(RetName, Count1),
+            Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
+            I = I1#b_set{dst=NewVar},
+            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
+        false ->
+            copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
+    end;
+copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
+    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
+    case beam_ssa:clobbers_xregs(I) of
+        true ->
+            copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
+        false ->
+            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
+        end;
+copy_retval_is([], RC, _, Copy, Count, Acc) ->
+    case {Copy,RC} of
+        {none,_} ->
+            {reverse(Acc),Count};
+        {#b_set{},true} ->
+            {reverse(Acc),Count,Copy};
+        {#b_set{},false} ->
+            {reverse(Acc, [Copy]),Count}
+    end.
+
+%%
+%% Consider this code:
+%%
+%%   Var = ...
+%%   ...
+%%   A1 = call foo/0
+%%   A = copy A1
+%%   B = call bar/1, Var
+%%
+%% If the Var variable is no longer used after this code, its Y register
+%% can't be reused for A. To allow the Y register to be reused
+%% we will need to insert 'copy' instructions for arguments that are
+%% in Y registers:
+%%
+%%   Var = ...
+%%   ...
+%%   A1 = call foo/0
+%%   Var1 = copy Var
+%%   A = copy A1
+%%   B = call bar/1, Var1
+%%
+
+place_retval_copy(I, _Yregs, none, Count, Acc) ->
+    {I,Count,Acc};
+place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
+    #b_set{dst=Avoid} = Copy,
+    {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
+    Acc = [Copy|Acc1],
+    {I#b_set{args=[F|Args]},Count,Acc}.
+
+copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
+    case gb_sets:is_member(A, Yregs) of
+        true when A =/= Avoid ->
+            {NewVar,Count} = new_var(AName, Count0),
+            Copy = #b_set{op=copy,dst=NewVar,args=[A]},
+            copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
+        _ ->
+            copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
+    end;
+copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
+    copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
+copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
+    {reverse(Acc),CopyAcc,Count}.
+
+acc_copy(Acc, none) -> Acc;
+acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].
+
+copy_sub_args(Args, none) ->
+    Args;
+copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
+    [sub_arg(A, Dst, Src) || A <- Args].
+
+sub_arg(Old, Old, New) -> New;
+sub_arg(Old, _, _) -> Old.
+
+%%%
+%%% Consider:
+%%%
+%%%   x1/Hd = get_hd x0/Cons
+%%%   y0/Tl = get_tl x0/Cons
+%%%
+%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
+%%% a 'move' instruction must be inserted.
+%%%
+%%% If we swap get_hd and get_tl when Tl is in a Y register,
+%%% x0 can be used for Hd if Cons is not used again:
+%%%
+%%%   y0/Tl = get_tl x0/Cons
+%%%   x0/Hd = get_hd x0/Cons
+%%%
+
+opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
+    ResMap = maps:from_list(Res),
+    Ls = beam_ssa:rpo(Blocks),
+    St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.
+
+opt_get_list_1([L|Ls], Res, Blocks0) ->
+    #b_blk{is=Is0} = Blk = map_get(L, Blocks0),
+    case opt_get_list_is(Is0, Res, [], false) of
+        no ->
+            opt_get_list_1(Ls, Res, Blocks0);
+        {yes,Is} ->
+            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
+            opt_get_list_1(Ls, Res, Blocks)
+    end;
+opt_get_list_1([], _, Blocks) -> Blocks.
+
+opt_get_list_is([#b_set{op=get_hd,dst=Hd,
+                        args=[Cons]}=GetHd,
+                 #b_set{op=get_tl,dst=Tl,
+                        args=[Cons]}=GetTl|Is],
+                Res, Acc, Changed) ->
+    %% Note that when this pass is run, only Y registers have
+    %% reservations. The absence of an entry for a variable therefore
+    %% means that the variable will be in an X register.
+    case Res of
+        #{Hd:={y,_}} ->
+            %% Hd will be in a Y register. Don't swap.
+            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
+        #{Tl:={y,_}} ->
+            %% Tl will be in a Y register. Swap.
+            opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
+        #{} ->
+            %% Both are in X registers. Nothing to do.
+            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
+    end;
+opt_get_list_is([I|Is], Res, Acc, Changed) ->
+    opt_get_list_is(Is, Res, [I|Acc], Changed);
+opt_get_list_is([], _Res, Acc, Changed) ->
+    case Changed of
+        true ->
+            {yes,reverse(Acc)};
+        false ->
+            no
+    end.
+
+%%%
+%%% Number instructions in the order they are executed.
+%%%
+
+%% number_instructions(St0) -> St.
+%%  Number instructions in the order they are executed. Use a step
+%%  size of 2. Don't number phi instructions. All phi variables in
+%%  a block will be live one unit before the first non-phi instruction
+%%  in the block.
+
+number_instructions(#st{ssa=Blocks0}=St) ->
+    Ls = beam_ssa:rpo(Blocks0),
+    St#st{ssa=number_is_1(Ls, 1, Blocks0)}.
+
+number_is_1([L|Ls], N0, Blocks0) ->
+    #b_blk{is=Is0,last=Last0} = Bl0 = map_get(L, Blocks0),
+    {Is,N1} = number_is_2(Is0, N0, []),
+    Last = beam_ssa:add_anno(n, N1, Last0),
+    N = N1 + 2,
+    Bl = Bl0#b_blk{is=Is,last=Last},
+    Blocks = Blocks0#{L:=Bl},
+    number_is_1(Ls, N, Blocks);
+number_is_1([], _, Blocks) -> Blocks.
+
+number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
+    number_is_2(Is, N, [I|Acc]);
+number_is_2([I0|Is], N, Acc) ->
+    I = beam_ssa:add_anno(n, N, I0),
+    number_is_2(Is, N+2, [I|Acc]);
+number_is_2([], N, Acc) ->
+    {reverse(Acc),N}.
+
+%%%
+%%% Calculate live intervals.
+%%%
+
+live_intervals(#st{args=Args,ssa=Blocks}=St) ->
+    Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
+    F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
+    LiveMap0 = #{},
+    Acc0 = {[],LiveMap0},
+    {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
+    Intervals = merge_ranges(rel2fam(Vars0++Vars)),
+    St#st{intervals=Intervals}.
+
+merge_ranges([{V,Rs}|T]) ->
+    [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
+merge_ranges([]) -> [].
+
+merge_ranges_1([{A,N},{N,Z}|Rs]) ->
+    merge_ranges_1([{A,Z}|Rs]);
+merge_ranges_1([R|Rs]) ->
+    [R|merge_ranges_1(Rs)];
+merge_ranges_1([]) -> [].
+
+live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
+    Live0 = [],
+    Successors = beam_ssa:successors(L, Blocks),
+    Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),
+
+    %% Add ranges for all variables that are live in the successors.
+    #b_blk{is=Is,last=Last} = map_get(L, Blocks),
+    End = beam_ssa:get_anno(n, Last),
+    Use = [{V,{use,End+1}} || V <- Live1],
+
+    %% Determine used and defined variables in this block.
+    FirstNumber = first_number(Is, Last),
+    UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
+    UseDef = rel2fam(UseDef0),
+
+    %% Update what is live at the beginning of this block and
+    %% store it.
+    Used = [V || {V,[{use,_}|_]} <- UseDef],
+    Live2 = ordsets:union(Live1, Used),
+    Killed = [V || {V,[{def,_}|_]} <- UseDef],
+    Live = ordsets:subtract(Live2, Killed),
+    LiveMap = LiveMap0#{L=>Live},
+
+    %% Construct the ranges for this block.
+    Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
+    {Vars,LiveMap}.
+
+make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
+    make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
+make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
+    {use,Last} = last(Uses),
+    make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
+make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
+    {use,Last} = last(Uses),
+    make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
+make_block_ranges([], _, Acc) -> Acc.
+
+live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
+    Acc = [{Dst,{def,FirstNumber}}|Acc0],
+    live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
+                    FirstNumber, Acc0) ->
+    N = beam_ssa:get_anno(n, I),
+    #b_set{dst=Dst} = I,
+    Acc1 = [{Dst,{def,N}}|Acc0],
+    Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
+    live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
+    N = beam_ssa:get_anno(n, I),
+    Acc1 = case I of
+               #b_set{dst=Dst} ->
+                   [{Dst,{def,N}}|Acc0];
+               _ ->
+                   Acc0
+           end,
+    Used = beam_ssa:used(I),
+    Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
+    live_interval_blk_1(Is, FirstNumber, Acc);
+live_interval_blk_1([], _FirstNumber, Acc) ->
+    Acc.
+
+%% first_number([#b_set{}]) -> InstructionNumber.
+%%  Return the number for the first instruction for the block.
+%%  Note that this number is one less than the first
+%%  non-phi instruction in the block.
+
+first_number([#b_set{op=phi}|Is], Last) ->
+    first_number(Is, Last);
+first_number([I|_], _) ->
+    beam_ssa:get_anno(n, I) - 1;
+first_number([], Last) ->
+    beam_ssa:get_anno(n, Last) - 1.
+
+update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
+    Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
+    #b_blk{is=Is} = map_get(L, Blocks),
+    Live = update_live_phis(Is, Pred, Live1),
+    update_successors(Ls, Pred, Blocks, LiveMap, Live);
+update_successors([], _, _, _, Live) -> Live.
+
+get_live(L, LiveMap) ->
+    case LiveMap of
+        #{L:=Live} -> Live;
+        #{} -> []
+    end.
+
+update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
+                 Pred, Live0) ->
+    Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
+    Live1 = ordsets:union(ordsets:from_list(Used), Live0),
+    Live = ordsets:del_element(Killed, Live1),
+    update_live_phis(Is, Pred, Live);
+update_live_phis(_, _, Live) -> Live.
+
+%%%
+%%% Reserve Y registers.
+%%%
+
+%% reserve_yregs(St0) -> St.
+%%  In each block that allocates a stack frame, insert instructions
+%%  that copy variables that must be in Y registers (given by
+%%  the `yregs` annotation) to new variables.
+%%
+%%  Also allocate specific Y registers for try and catch tags.
+%%  The outermost try/catch tag is placed in y0, any directly
+%%  nested tag in y1, and so on. Note that this is the reversed
+%%  order as required by BEAM; it will be corrected later by
+%%  turn_yregs().
+
+reserve_yregs(#st{frames=Frames}=St0) ->
+    foldl(fun reserve_yregs_1/2, St0, Frames).
+
+reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
+    Blk = map_get(L, Blocks0),
+    Yregs = beam_ssa:get_anno(yregs, Blk),
+    {Def,Used} = beam_ssa:def_used([L], Blocks0),
+    UsedYregs = ordsets:intersection(Yregs, Used),
+    DefBefore = ordsets:subtract(UsedYregs, Def),
+    {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
+    InsideVars = ordsets:subtract(UsedYregs, DefBefore),
+    ResTryTags0 = reserve_try_tags(L, Blocks),
+    ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
+    Vars = BeforeVars ++ InsideVars,
+    Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
+    St#st{res=Res,ssa=Blocks,cnt=Count+1}.
+
+reserve_try_tags(L, Blocks) ->
+    Seen = gb_sets:empty(),
+    {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
+    Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
+    Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
+    ordsets:from_list(Res).
+
+reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
+    case gb_sets:is_element(L, Seen0) of
+        true ->
+            reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
+        false ->
+            Seen1 = gb_sets:insert(L, Seen0),
+            #b_blk{is=Is} = Blk = map_get(L, Blocks),
+            Active0 = get_active(L, ActMap0),
+            Active = reserve_try_tags_is(Is, Active0),
+            Successors = beam_ssa:successors(Blk),
+            ActMap1 = update_act_map(Successors, Active, ActMap0),
+            {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
+            reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
+    end;
+reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
+    {ActMap,Seen}.
+
+get_active(L, ActMap) ->
+    case ActMap of
+        #{L:=Active} -> Active;
+        #{} -> #{}
+    end.
+
+reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
+    N = map_size(Active),
+    reserve_try_tags_is(Is, Active#{V=>N});
+reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
+    reserve_try_tags_is(Is, maps:remove(Tag, Active));
+reserve_try_tags_is([_|Is], Active) ->
+    reserve_try_tags_is(Is, Active);
+reserve_try_tags_is([], Active) -> Active.
+
+update_act_map([L|Ls], Active0, ActMap0) ->
+    case ActMap0 of
+        #{L:=Active1} ->
+            ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
+            update_act_map(Ls, Active0, ActMap);
+        #{} ->
+            ActMap = ActMap0#{L=>Active0},
+            update_act_map(Ls, Active0, ActMap)
+    end;
+update_act_map([], _, ActMap) -> ActMap.
+
+rename_vars([], _, Blocks, Count) ->
+    {[],Blocks,Count};
+rename_vars(Vs, L, Blocks0, Count0) ->
+    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
+    Ren = zip(Vs, NewVars),
+    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
+    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks1),
+    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
+    Is = insert_after_phis(Is0, CopyIs),
+    Blk = Blk0#b_blk{is=Is},
+    Blocks = Blocks1#{L:=Blk},
+    {NewVars,Blocks,Count}.
+
+insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
+    [I|insert_after_phis(Is, InsertIs)];
+insert_after_phis(Is, InsertIs) ->
+    InsertIs ++ Is.
+
+%% frame_size(St0) -> St.
+%%  Calculate the frame size for each block that allocates a frame.
+%%  Annotate the block with the frame size. Also annotate all
+%%  return instructions with {deallocate,FrameSize} to simplify
+%%  code generation.
+
+frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
+    Blocks = foldl(fun(L, Blks) ->
+                           frame_size_1(L, Regs, Blks)
+                   end, Blocks0, Frames),
+    St#st{ssa=Blocks}.
+
+frame_size_1(L, Regs, Blocks0) ->
+    Def = beam_ssa:def([L], Blocks0),
+    Yregs0 = [map_get(V, Regs) || V <- Def, is_yreg(map_get(V, Regs))],
+    Yregs = ordsets:from_list(Yregs0),
+    FrameSize = length(ordsets:from_list(Yregs)),
+    if
+        FrameSize =/= 0 ->
+            [{y,0}|_] = Yregs,                  %Assertion.
+            {y,Last} = last(Yregs),
+            Last = FrameSize - 1,               %Assertion.
+            ok;
+        true ->
+            ok
+    end,
+    Blk0 = map_get(L, Blocks0),
+    Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),
+
+    %% Insert an annotation for frame deallocation on
+    %% each #b_ret{}.
+    Blocks = Blocks0#{L:=Blk},
+    Reachable = beam_ssa:rpo([L], Blocks),
+    frame_deallocate(Reachable, FrameSize, Blocks).
+
+frame_deallocate([L|Ls], Size, Blocks0) ->
+    Blk0 = map_get(L, Blocks0),
+    Blk = case Blk0 of
+              #b_blk{last=#b_ret{}=Ret0} ->
+                  Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
+                  Blk0#b_blk{last=Ret};
+              #b_blk{} ->
+                  Blk0
+          end,
+    Blocks = Blocks0#{L:=Blk},
+    frame_deallocate(Ls, Size, Blocks);
+frame_deallocate([], _, Blocks) -> Blocks.
+
+
+%% turn_yregs(St0) -> St.
+%%  Renumber y registers so that {y,0} becomes {y,FrameSize-1},
+%%  {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
+%%  catches work. The register allocator (linear_scan()) has given
+%%  a lower number to the outermost catch.
+
+turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
+    Regs1 = foldl(fun(L, A) ->
+                          Blk = map_get(L, Blocks),
+                          FrameSize = beam_ssa:get_anno(frame_size, Blk),
+                          Def = beam_ssa:def([L], Blocks),
+                          [turn_yregs_1(Def, FrameSize, Regs0)|A]
+                  end, [], Frames),
+    Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
+    St#st{regs=Regs}.
+
+turn_yregs_1(Def, FrameSize, Regs) ->
+    Yregs0 = [{map_get(V, Regs),V} || V <- Def, is_yreg(map_get(V, Regs))],
+    Yregs1 = rel2fam(Yregs0),
+    FrameSize = length(Yregs1),
+    Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
+    R0 = sofs:family(Yregs2),
+    R1 = sofs:family_to_relation(R0),
+    R = sofs:converse(R1),
+    sofs:to_external(R).
+
+%%%
+%%% Reserving registers before register allocation.
+%%%
+
+%% reserve_regs(St0) -> St.
+%%  Reserve registers prior to register allocation. Y registers
+%%  have already been reserved. This function will reserve z,
+%%  fr, and specific x registers.
+
+reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
+    %% Reserve x0, x1, and so on for the function arguments.
+    Res1 = reserve_arg_regs(Args, 0, Res0),
+
+    %% Reserve Z registers (dummy registers) for instructions with no
+    %% return values (e.g. remove_message) or pseudo-return values
+    %% (e.g. landingpad).
+    Res2 = reserve_zregs(Blocks, Intervals, Res1),
+
+    %% Reserve float registers.
+    Res3 = reserve_fregs(Blocks, Res2),
+
+    %% Reserve all remaining unreserved variables as X registers.
+    Res = maps:from_list(Res3),
+    St#st{res=reserve_xregs(Blocks, Res)}.
+
+reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
+    reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
+reserve_arg_regs([], _, Acc) -> Acc.
+
+reserve_zregs(Blocks, Intervals, Res) ->
+    ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
+    ShortLived = cerl_sets:from_list(ShortLived0),
+    F = fun(_, #b_blk{is=Is,last=Last}, A) ->
+                reserve_zreg(Is, Last, ShortLived, A)
+        end,
+    beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_zreg([#b_set{op=Op,dst=Dst}],
+              #b_br{bool=Dst}, _ShortLived, A) when Op =:= call;
+                                                    Op =:= get_tuple_element ->
+    %% If type optimization has determined that the result of these
+    %% instructions can be used directly in a branch, we must avoid reserving a
+    %% z register or code generation will fail.
+    A;
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
+              #b_set{op={bif,'=:='},args=[Dst,Val]}], Last, ShortLived, A0) ->
+    case {Val,Last} of
+        {#b_literal{val=Arity},#b_br{bool=#b_var{}}} when Arity bsr 32 =:= 0 ->
+            %% These two instructions can be combined to a test_arity
+            %% instruction provided that the arity variable is short-lived.
+            reserve_zreg_1(Dst, ShortLived, A0);
+        {_,_} ->
+            %% Either the arity is too big, or the boolean value is not
+            %% used in a conditional branch.
+            A0
+    end;
+reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
+             #b_switch{}, ShortLived, A) ->
+    reserve_zreg_1(Dst, ShortLived, A);
+reserve_zreg([#b_set{op={bif,'xor'}}], _Last, _ShortLived, A) ->
+    %% There is no short, easy way to rewrite 'xor' to a series of
+    %% test instructions.
+    A;
+reserve_zreg([#b_set{op={bif,is_record}}], _Last, _ShortLived, A) ->
+    %% There is no short, easy way to rewrite is_record/2 to a series of
+    %% test instructions.
+    A;
+reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
+    IsZReg = case Op of
+                 bs_match_string -> true;
+                 bs_save -> true;
+                 bs_restore -> true;
+                 bs_set_position -> true;
+                 {float,clearerror} -> true;
+                 kill_try_tag -> true;
+                 landingpad -> true;
+                 put_tuple_elements -> true;
+                 remove_message -> true;
+                 set_tuple_element -> true;
+                 succeeded -> true;
+                 timeout -> true;
+                 wait_timeout -> true;
+                 _ -> false
+             end,
+    A = case IsZReg of
+            true -> [{Dst,z}|A0];
+            false -> A0
+        end,
+    reserve_zreg(Is, Last, ShortLived, A);
+reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
+    reserve_zreg_1(Bool, ShortLived, A);
+reserve_zreg([], _, _, A) -> A.
+
+reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
+    case cerl_sets:is_element(V, ShortLived) of
+        true -> [{V,z}|A];
+        false -> A
+    end;
+reserve_zreg_1(#b_literal{}, _, A) -> A.
+
+reserve_fregs(Blocks, Res) ->
+    F = fun(_, #b_blk{is=Is}, A) ->
+                reserve_freg(Is, A)
+        end,
+    beam_ssa:fold_rpo(F, [0], Res, Blocks).
+
+reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
+    case Op of
+        get ->
+            reserve_freg(Is, Res);
+        _ ->
+            reserve_freg(Is, [{V,fr}|Res])
+    end;
+reserve_freg([_|Is], Res) ->
+    reserve_freg(Is, Res);
+reserve_freg([], Res) -> Res.
+
+%% reserve_xregs(St0) -> St.
+%%  Reserve all remaining variables as X registers.
+%%
+%%  If a variable will need to be in a specific X register for a
+%%  'call' or 'make_fun' (and there is nothing that will kill it
+%%  between the definition and use), reserve the register using a
+%%  {prefer,{x,X} annotation. That annotation means that the linear
+%%  scan algorithm will place the variable in the preferred register,
+%%  unless that register is already occupied.
+%%
+%%  All remaining variables are reserved as X registers. Linear scan
+%%  will allocate the lowest free X register for the variable.
+
+reserve_xregs(Blocks, Res) ->
+    Ls = reverse(beam_ssa:rpo(Blocks)),
+    reserve_xregs(Ls, Blocks, #{}, Res).
+
+reserve_xregs([L|Ls], Blocks, XsMap0, Res0) ->
+    #b_blk{anno=Anno,is=Is0,last=Last} = map_get(L, Blocks),
+
+    %% Calculate mapping from variable name to the preferred
+    %% register.
+    Xs0 = reserve_terminator(L, Is0, Last, Blocks, XsMap0, Res0),
+
+    %% We need to figure out where the code generator will
+    %% place instructions that will do a garbage collection.
+    %% Insert 'gc' markers as pseudo-instructions in the
+    %% instruction sequence.
+    Is1 = reverse(Is0),
+    Is2 = res_place_gc_instrs(Is1, []),
+    Is = res_place_allocate(Anno, Is2),
+
+    %% Add register hints for variables that are defined
+    %% in the (reversed) instruction sequence.
+    {Res,Xs} = reserve_xregs_is(Is, Res0, Xs0, []),
+
+    XsMap = XsMap0#{L=>Xs},
+    reserve_xregs(Ls, Blocks, XsMap, Res);
+reserve_xregs([], _, _, Res) -> Res.
+
+%% Insert explicit 'gc' markers points where there will
+%% be a garbage collection. (Note that the instruction
+%% sequence passed to this function is reversed.)
+
+res_place_gc_instrs([#b_set{op=phi}=I|Is], Acc) ->
+    res_place_gc_instrs(Is, [I|Acc]);
+res_place_gc_instrs([#b_set{op=Op}=I|Is], Acc)
+  when Op =:= call; Op =:= make_fun ->
+    case Acc of
+        [] ->
+            res_place_gc_instrs(Is, [I|Acc]);
+        [GC|_] when GC =:= gc; GC =:= test_heap ->
+            res_place_gc_instrs(Is, [I,gc|Acc]);
+        [_|_] ->
+            res_place_gc_instrs(Is, [I,gc|Acc])
+    end;
+res_place_gc_instrs([#b_set{op=Op,args=Args}=I|Is], Acc0) ->
+    case beam_ssa_codegen:classify_heap_need(Op, Args) of
+        neutral ->
+            case Acc0 of
+                [test_heap|Acc] ->
+                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
+                Acc ->
+                    res_place_gc_instrs(Is, [I|Acc])
+            end;
+        {put,_} ->
+            case Acc0 of
+                [test_heap|Acc] ->
+                    res_place_gc_instrs(Is, [test_heap,I|Acc]);
+                Acc ->
+                    res_place_gc_instrs(Is, [test_heap,I|Acc])
+            end;
+        _ ->
+            res_place_gc_instrs(Is, [gc,I|Acc0])
+    end;
+res_place_gc_instrs([], Acc) ->
+    %% Reverse and replace 'test_heap' markers with 'gc'.
+    %% (The distinction is no longer useful.)
+    res_place_gc_instrs_rev(Acc, []).
+
+res_place_gc_instrs_rev([test_heap|Is], [gc|_]=Acc) ->
+    res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([test_heap|Is], Acc) ->
+    res_place_gc_instrs_rev(Is, [gc|Acc]);
+res_place_gc_instrs_rev([gc|Is], [gc|_]=Acc) ->
+    res_place_gc_instrs_rev(Is, Acc);
+res_place_gc_instrs_rev([I|Is], Acc) ->
+    res_place_gc_instrs_rev(Is, [I|Acc]);
+res_place_gc_instrs_rev([], Acc) -> Acc.
+
+res_place_allocate(#{yregs:=_}, Is) ->
+    %% There will be an 'allocate' instruction inserted here.
+    Is ++ [gc];
+res_place_allocate(#{}, Is) -> Is.
+
+reserve_xregs_is([gc|Is], Res, Xs0, Used) ->
+    %% At this point, the code generator will place an instruction
+    %% that does a garbage collection. We must prune the remembered
+    %% registers.
+    Xs = res_xregs_prune(Xs0, Used, Res),
+    reserve_xregs_is(Is, Res, Xs, Used);
+reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
+    Res = reserve_xreg(Dst, Xs0, Res0),
+    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
+    Used = ordsets:del_element(Dst, Used1),
+    case Op of
+        call ->
+            Xs = reserve_call_args(tl(Args)),
+            reserve_xregs_is(Is, Res, Xs, Used);
+        make_fun ->
+            Xs = reserve_call_args(tl(Args)),
+            reserve_xregs_is(Is, Res, Xs, Used);
+        _ ->
+            reserve_xregs_is(Is, Res, Xs0, Used)
+    end;
+reserve_xregs_is([], Res, Xs, _Used) ->
+    {Res,Xs}.
+
+%% Pick up register hints from the successors of this blocks.
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+                   _Blocks, XsMap, _Res) ->
+    %% We know that no variables are used at ?BADARG_BLOCK, so
+    %% any register hints from the success blocks are safe to use.
+    map_get(Succ, XsMap);
+reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},
+                   Blocks, XsMap, Res) when Succ =/= Fail ->
+    #{Succ:=SuccBlk,Fail:=FailBlk} = Blocks,
+    case {SuccBlk,FailBlk} of
+        {#b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}},
+         #b_blk{is=[],last=#b_br{succ=PhiL,fail=PhiL}}} ->
+            %% Both branches ultimately transfer to the same
+            %% block (via two blocks with no instructions).
+            %% Pick up register hints from the phi nodes
+            %% in the common block.
+            #{PhiL:=#b_blk{is=PhiIs}} = Blocks,
+            Xs = res_xregs_from_phi(PhiIs, Succ, Res, #{}),
+            res_xregs_from_phi(PhiIs, Fail, Res, Xs);
+        {_,_} when Is =/= [] ->
+            case last(Is) of
+                #b_set{op=succeeded,args=[Arg]} ->
+                    %% We know that Arg will not be used at the failure
+                    %% label, so we can pick up register hints from the
+                    %% success label.
+                    Br = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
+                    case reserve_terminator(L, [], Br, Blocks, XsMap, Res) of
+                        #{Arg:=Reg} -> #{Arg=>Reg};
+                        #{} -> #{}
+                    end;
+                _ ->
+                    %% Register hints from the success block may not
+                    %% be safe at the failure block, and vice versa.
+                    #{}
+            end;
+        {_,_} ->
+            %% Register hints from the success block may not
+            %% be safe at the failure block, and vice versa.
+            #{}
+    end;
+reserve_terminator(L, Is, #b_br{bool=#b_literal{val=true},succ=Succ},
+                   Blocks, XsMap, Res) ->
+    case map_get(Succ, Blocks) of
+        #b_blk{is=[],last=Last} ->
+            reserve_terminator(Succ, Is, Last, Blocks, XsMap, Res);
+        #b_blk{is=[_|_]=PhiIs} ->
+            res_xregs_from_phi(PhiIs, L, Res, #{})
+    end;
+reserve_terminator(_, _, _, _, _, _) -> #{}.
+
+%% Pick up a reservation from a phi node.
+res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
+                   Pred, Res, Acc) ->
+    case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
+        [] ->
+            %% The value of the phi node for this predecessor
+            %% is a literal. Nothing to do here.
+            res_xregs_from_phi(Is, Pred, Res, Acc);
+        [V] ->
+            case Res of
+                #{Dst:={prefer,Reg}} ->
+                    %% Try placing V in the same register as for
+                    %% the phi node.
+                    res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
+                #{Dst:=_} ->
+                    res_xregs_from_phi(Is, Pred, Res, Acc)
+            end
+    end;
+res_xregs_from_phi(_, _, _, Acc) -> Acc.
+
+reserve_call_args(Args) ->
+    reserve_call_args(Args, 0, #{}).
+
+reserve_call_args([#b_var{}=Var|As], X, Xs) ->
+    reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
+reserve_call_args([#b_literal{}|As], X, Xs) ->
+    reserve_call_args(As, X+1, Xs);
+reserve_call_args([], _, Xs) -> Xs.
+
+reserve_xreg(V, Xs, Res) ->
+    case Res of
+        #{V:=_} ->
+            %% Already reserved (but not as an X register).
+            Res;
+        #{} ->
+            case Xs of
+                #{V:=X} ->
+                    %% Add a hint that this specific X register is
+                    %% preferred, unless it is already in use.
+                    Res#{V=>{prefer,X}};
+                #{} ->
+                    %% Reserve as an X register in general.
+                    Res#{V=>x}
+            end
+    end.
+
+%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
+%%  Prune the list of preferred registers, to make sure that
+%%  there are no "holes" (uninitialized X registers) when
+%%  invoking the garbage collector.
+
+res_xregs_prune(Xs, Used, Res) when map_size(Xs) =/= 0 ->
+    %% The number of safe registers is the number of the X registers
+    %% used after this point. The actual number of safe registers may
+    %% be higher than this number, but this is a conservative safe
+    %% estimate.
+    NumSafe = foldl(fun(V, N) ->
+                            case Res of
+                                #{V:={x,_}} -> N + 1;
+                                #{V:=_} -> N;
+                                #{} -> N + 1
+                            end
+                    end, 0, Used),
+
+    %% Remove unsafe registers from the list of potential
+    %% preferred registers.
+    maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs);
+res_xregs_prune(Xs, _Used, _Res) -> Xs.
+
+%%%
+%%% Register allocation using linear scan.
+%%%
+
+-record(i,
+        {sort=1 :: instr_number(),
+         reg=none :: i_reg(),
+         pool=x :: pool_id(),
+         var=#b_var{} :: b_var(),
+         rs=[] :: [range()]
+        }).
+
+-record(l,
+        {cur=#i{} :: interval(),
+         unhandled_res=[] :: [interval()],
+         unhandled_any=[] :: [interval()],
+         active=[] :: [interval()],
+         inactive=[] :: [interval()],
+         free=#{} :: #{var_name()=>pool(),
+                       {'next',pool_id()}:=reg_num()},
+         regs=[] :: [{b_var(),ssa_register()}]
+        }).
+
+-type interval() :: #i{}.
+-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
+-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
+-type pool() :: ordsets:ordset(ssa_register()).
+
+linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
+    St = St0#st{intervals=[],res=[]},
+    Free = init_free(maps:to_list(Res)),
+    Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
+    Intervals = sort(Intervals1),
+    IsReserved = fun(#i{reg=Reg}) ->
+                         case Reg of
+                             none -> false;
+                             {prefer,{_,_}} -> false;
+                             {_,_} -> true
+                         end
+                 end,
+    {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
+    L = #l{unhandled_res=UnhandledRes,
+           unhandled_any=Unhandled,free=Free},
+    #l{regs=Regs} = do_linear(L),
+    St#st{regs=maps:from_list(Regs)}.
+
+init_interval({V,[{Start,_}|_]=Rs}, Res) ->
+    Info = map_get(V, Res),
+    Pool = case Info of
+               {prefer,{x,_}} -> x;
+               x -> x;
+               {x,_} -> x;
+               {y,Uniq} -> Uniq;
+               {{y,_},Uniq} -> Uniq;
+               z -> z;
+               fr -> fr
+           end,
+    Reg = case Info of
+              {prefer,{x,_}} -> Info;
+              {x,_} -> Info;
+              {{y,_}=Y,_} -> Y;
+              _ -> none
+          end,
+    #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.
+
+init_free(Res) ->
+    Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
+    #{x:=Xs0} = Free1 = maps:from_list(Free0),
+    Xs = init_xregs(Xs0),
+    Free = Free1#{x:=Xs},
+    Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
+    maps:merge(Free, maps:from_list(Next)).
+
+init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
+    [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{x,_}=Reg}|Res]) ->
+    [{x,Reg}|init_free_1(Res)];
+init_free_1([{_,{y,Uniq}}|Res]) ->
+    [{Uniq,{y,0}}|init_free_1(Res)];
+init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
+    [{Uniq,Reg}|init_free_1(Res)];
+init_free_1([{_,z}|Res]) ->
+    [{z,{z,0}}|init_free_1(Res)];
+init_free_1([{_,fr}|Res]) ->
+    [{fr,{fr,0}}|init_free_1(Res)];
+init_free_1([{_,x}|Res]) ->
+    init_free_1(Res);
+init_free_1([]) -> [].
+
+%% Make sure that the pool of xregs is contiguous.
+init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
+    [{x,N}|init_xregs([{x,M}|Is])];
+init_xregs([{x,N}|[{x,_}|_]=Is]) ->
+    [{x,N}|init_xregs([{x,N+1}|Is])];
+init_xregs([{x,_}]=Is) -> Is.
+
+do_linear(L0) ->
+    case set_next_current(L0) of
+        done ->
+            L0;
+        L1 ->
+            L2 = expire_active(L1),
+            L3 = check_inactive(L2),
+            Available = collect_available(L3),
+            L4 = select_register(Available, L3),
+            L = make_cur_active(L4),
+            do_linear(L)
+    end.
+
+set_next_current(#l{unhandled_res=[Cur1|T1],
+                    unhandled_any=[Cur2|T2]}=L) ->
+    case {Cur1,Cur2} of
+        {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
+            L#l{cur=Cur1,unhandled_res=T1};
+        {_,_} ->
+            L#l{cur=Cur2,unhandled_any=T2}
+    end;
+set_next_current(#l{unhandled_res=[],
+                    unhandled_any=[Cur|T]}=L) ->
+    L#l{cur=Cur,unhandled_any=T};
+set_next_current(#l{unhandled_res=[Cur|T],
+                    unhandled_any=[]}=L) ->
+    L#l{cur=Cur,unhandled_res=T};
+set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
+    done.
+
+expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
+    {Act,L} = expire_active(Act0, CurBegin, L0, []),
+    L#l{active=Act}.
+
+expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+    {_,_} = Reg,                                %Assertion.
+    case overlap_status(Rs0, CurBegin) of
+        ends_before_cur ->
+            L = free_reg(I, L0),
+            expire_active(Is, CurBegin, L, Acc);
+        overlapping ->
+            expire_active(Is, CurBegin, L0, [I|Acc]);
+        not_overlapping ->
+            Rs = strip_before_current(Rs0, CurBegin),
+            L1 = free_reg(I, L0),
+            L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
+            expire_active(Is, CurBegin, L, Acc)
+    end;
+expire_active([], _CurBegin, L, Acc) ->
+    {Acc,L}.
+
+check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
+    {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
+    L#l{inactive=InAct}.
+
+check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
+    case overlap_status(Rs0, CurBegin) of
+        ends_before_cur ->
+            check_inactive(Is, CurBegin, L0, Acc);
+        not_overlapping ->
+            check_inactive(Is, CurBegin, L0, [I|Acc]);
+        overlapping ->
+            Rs = strip_before_current(Rs0, CurBegin),
+            L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
+            L = reserve_reg(I, L1),
+            check_inactive(Is, CurBegin, L, Acc)
+    end;
+check_inactive([], _CurBegin, L, Acc) ->
+    {Acc,L}.
+
+strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
+    strip_before_current(Rs, CurBegin);
+strip_before_current(Rs, _CurBegin) -> Rs.
+
+collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
+    %% Use the preferred register if it is available.
+    Avail = collect_available(L#l{cur=I#i{reg=none}}),
+    case member(Prefer, Avail) of
+        true -> [Prefer];
+        false -> Avail
+    end;
+collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
+    %% Return the already reserved register.
+    [ReservedReg];
+collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
+    Free = get_pool(Cur, L),
+
+    %% Note that since the live intervals are constructed from
+    %% SSA form, there cannot be any overlap of the current interval
+    %% with any inactive interval. See [3], page 175. Therefore we
+    %% only have check the unhandled intervals for overlap with
+    %% the current interval. As a further optimization, we only need
+    %% to check the intervals that have reserved registers.
+    collect_available(Unhandled, Cur, Free).
+
+collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
+  when Pool1 =/= Pool2 ->
+    %% Wrong pool. Ignore this interval.
+    collect_available(Is, Cur, Free);
+collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
+    case overlaps(I, Cur) of
+        true ->
+            Free = ordsets:del_element(Reg, Free0),
+            collect_available(Is, Cur, Free);
+        false ->
+            collect_available(Is, Cur, Free0)
+    end;
+collect_available([], _, Free) -> Free.
+
+select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
+    Cur = Cur0#i{reg=Reg},
+    reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
+select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
+    %% Allocate a new register in the pool.
+    {Reg,L1} = get_next_free(Cur0, L0),
+    Cur = Cur0#i{reg=Reg},
+    L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
+    reserve_reg(Cur, L).
+
+make_cur_active(#l{cur=Cur,active=Act}=L) ->
+    L#l{active=[Cur|Act]}.
+
+overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
+    are_overlapping(Rs1, Rs2).
+
+overlap_status([{S,E}], CurBegin) ->
+    if
+        E =< CurBegin -> ends_before_cur;
+        CurBegin < S -> not_overlapping;
+        true -> overlapping
+    end;
+overlap_status([{S,E}|Rs], CurBegin) ->
+    if
+        E =< CurBegin ->
+            overlap_status(Rs, CurBegin);
+        S =< CurBegin ->
+            overlapping;
+        true ->
+            not_overlapping
+    end.
+
+reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
+    FreeRegs0 = get_pool(I, L),
+    FreeRegs = ordsets:del_element(Reg, FreeRegs0),
+    update_pool(I, FreeRegs, L).
+
+free_reg(#i{reg={_,_}=Reg}=I, L) ->
+    FreeRegs0 = get_pool(I, L),
+    FreeRegs = ordsets:add_element(Reg, FreeRegs0),
+    update_pool(I, FreeRegs, L).
+
+get_pool(#i{pool=Pool}, #l{free=Free}) ->
+    map_get(Pool, Free).
+
+update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
+    Free = Free0#{Pool:=New},
+    L#l{free=Free}.
+
+get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
+    K = {next,Pool},
+    N = map_get(K, Free0),
+    Free = Free0#{K:=N+1},
+    L = L0#l{free=Free},
+    if
+        is_integer(Pool) -> {{y,N},L};
+        is_atom(Pool)    -> {{Pool,N},L}
+    end.
+
+%%%
+%%% Interval utilities.
+%%%
+
+are_overlapping([R|Rs1], Rs2) ->
+    case are_overlapping_1(R, Rs2) of
+        true ->
+            true;
+        false ->
+            are_overlapping(Rs1, Rs2)
+    end;
+are_overlapping([], _) -> false.
+
+are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
+    false;
+are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
+    (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
+are_overlapping_1({_,_}, []) -> false.
+
+%%%
+%%% Utilities.
+%%%
+
+%% is_loop_header(L, Blocks) -> false|true.
+%%  Check whether the block is a loop header.
+
+is_loop_header(L, Blocks) ->
+    %% We KNOW that a loop header must start with a peek_message
+    %% instruction.
+    case map_get(L, Blocks) of
+        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
+        _ -> false
+    end.
+
+rel2fam(S0) ->
+    S1 = sofs:relation(S0),
+    S = sofs:rel2fam(S1),
+    sofs:to_external(S).
+
+split_phis(Is) ->
+    splitwith(fun(#b_set{op=Op}) -> Op =:= phi end, Is).
+
+is_yreg({y,_}) -> true;
+is_yreg({x,_}) -> false;
+is_yreg({z,_}) -> false;
+is_yreg({fr,_}) -> false.
+
+new_vars([Base|Vs0], Count0) ->
+    {V,Count1} = new_var(Base, Count0),
+    {Vs,Count} = new_vars(Vs0, Count1),
+    {[V|Vs],Count};
+new_vars([], Count) -> {[],Count}.
+
+new_var({Base,Int}, Count)  ->
+    true = is_integer(Int),                     %Assertion.
+    {#b_var{name={Base,Count}},Count+1};
+new_var(Base, Count) ->
+    {#b_var{name={Base,Count}},Count+1}.