aboutsummaryrefslogblamecommitdiffstats
path: root/lib/compiler/src/beam_ssa_pre_codegen.erl
blob: 917593137545199d74e1b1845293a24afb41e1d5 (plain) (tree)
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
























                                                                           



                                                                 


















































                                                                        


                                                    

                                  


                                                          
















                                                                      
                                   

                                                   





                                                                             


                                                        




                                          



                                       






                                          



                                                              







                                                        
                              


                                                                 
                               





                                                

                             
                                 
 

                                                                   
       
                                                                     














                                                                        































                                                                          
                                                         
















                                                                      






                                                                             






                                                       
































                                                                                 
 




                                                              
 



                                                       
 


                                                                         









                                                                              
                                                            



















































                                                                          
                                                                       













































































                                                                     








                                                                      


                                                    
                            






                                                                     









                                                                        
















                                                                    





















                                                                    


















                                                                           
                                         

                                                                         
 

                                                                            

                            
                           





                                      
                                                                     
               
                                                              
        
                                  












































                                                                

                                                       





























                                                                           




















































                                                                             







































                                                                            














                                                               
                                                      

                                            






                                                                    


                                                             








                                               







                                       




















































































                                                                               




                                                                     



                                                       
                                                        

































































































































































                                                                                
                                                                     



                                                                      
                                                                 

























                                                              

                                              
            

                                                






























                                                                
                                     


                                                                      
                                                             














































                                                                        
                                            

                                                                  
                                                              













                                                            
                                            







                                                    

                                                                            

                                      








                                                    








                                                                     
                                                                            


                                                      
                                                                      
























































































                                                                     
                                                                      





















































                                                                                     
                                          
















                                                           
                                                                           




























































































                                                                             
                                                           







































                                                                                      
                                                                             

                                                                        
                                         
               
                                                      
















































                                                                                
                             



                                                                              



                                                                                




















































                                                                                  
                                         
                                           
                                         






























































                                                                      
                                              

                                                            

                                                 
                                                   
                               










                                             
                                                 










                                                                      
                                                                        











                                                             
                   










                                                           
                                                                      
                                         


                                                     
                                
                        
                                
                                                         

                                                 

                                
                                 





                                             


                                              

























                                                              
                                                          
                                
                                                     











                                                                   
                                             





















































                                                                           
                                                                 

                                           
                                                                       


















                                                               
                                                                          


                                                      
                                                                      


                                        
                           









































































































                                                                             
                                              























                                                                               
                                                                 
                       
                                         
                                 

                                         



















                                                    
                                            











                                               
                                                      





























                                                                     
                                                                             

































                                                                                 
                                                         
                                     
                                                     














                                                                    

                                                



















































                                                                     















































































































































































































































































































                                                                             


                                       
                   
                                  
 
                              
                                                           


                                        
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose: Prepare for code generation, including register allocation.
%%
%% The output of this compiler pass is still in the SSA format, but
%% it has been annotated and transformed to help the code generator.
%%
%% * Some instructions are translated to other instructions closer to
%% the BEAM instructions. For example, the binary matching
%% instructions are transformed from the optimization-friendly
%% internal format to instruction more similar to the actual BEAM
%% instructions.
%%
%% * Blocks that will need an instruction for allocating a stack frame
%% are annotated with a {frame_size,Size} annotation.
%%
%% * 'copy' instructions are added for all variables that need
%% to be saved to the stack frame. Additional 'copy' instructions
%% can be added as an optimization to reuse y registers (see
%% the copy_retval sub pass).
%%
%% * Each function is annotated with a {register,RegisterMap}
%% annotation that maps each variable to a BEAM register. The linear
%% scan algorithm is used to allocate registers.
%%
%% There are four kind of registers. x, y, fr (floating point register),
%% and z. A variable will be allocated to a z register if it is only
%% used by the instruction following the instruction that defines the
%% the variable. The code generator will typically combine those
%% instructions to a test instruction. z registers are also used for
%% some instructions that don't have a return value.
%%
%% References:
%%
%% [1] H. Mössenböck and M. Pfeiffer. Linear scan register allocation
%% in the context of SSA form and register constraints. In Proceedings
%% of the International Conference on Compiler Construction, pages
%% 229–246. LNCS 2304, Springer-Verlag, 2002.
%%
%% [2] C. Wimmer and H. Mössenböck. Optimized interval splitting in a
%% linear scan register allocator. In Proceedings of the ACM/USENIX
%% International Conference on Virtual Execution Environments, pages
%% 132–141. ACM Press, 2005.
%%
%% [3] C. Wimmer and M. Franz. Linear Scan Register Allocation on SSA
%% Form. In Proceedings of the International Symposium on Code
%% Generation and Optimization, pages 170-179. ACM Press, 2010.
%%

-module(beam_ssa_pre_codegen).

-export([module/2]).

-include("beam_ssa.hrl").

-import(lists, [all/2,any/2,append/1,duplicate/2,
                foldl/3,last/1,map/2,member/2,partition/2,
                reverse/1,reverse/2,sort/1,zip/2]).

-spec module(beam_ssa:b_module(), [compile:option()]) ->
                    {'ok',beam_ssa:b_module()}.

module(#b_module{body=Fs0}=Module, Opts) ->
    UseBSM3 = not proplists:get_bool(no_bsm3, Opts),
    Ps = passes(Opts),
    Fs = functions(Fs0, Ps, UseBSM3),
    {ok,Module#b_module{body=Fs}}.

functions([F|Fs], Ps, UseBSM3) ->
    [function(F, Ps, UseBSM3)|functions(Fs, Ps, UseBSM3)];
functions([], _Ps, _UseBSM3) -> [].

-type b_var() :: beam_ssa:b_var().
-type var_name() :: beam_ssa:var_name().
-type instr_number() :: pos_integer().
-type range() :: {instr_number(),instr_number()}.
-type reg_num() :: beam_asm:reg_num().
-type xreg() :: {'x',reg_num()}.
-type yreg() :: {'y',reg_num()}.
-type ypool() :: {'y',beam_ssa:label()}.
-type reservation() :: 'fr' | {'prefer',xreg()} | 'x' | {'x',xreg()} |
                       ypool() | {yreg(),ypool()} | 'z'.
-type ssa_register() :: beam_ssa_codegen:ssa_register().

-define(TC(Body), tc(fun() -> Body end, ?FILE, ?LINE)).
-record(st, {ssa :: beam_ssa:block_map(),
             args :: [b_var()],
             cnt :: beam_ssa:label(),
             use_bsm3 :: boolean(),
             frames=[] :: [beam_ssa:label()],
             intervals=[] :: [{b_var(),[range()]}],
             res=[] :: [{b_var(),reservation()}] | #{b_var():=reservation()},
             regs=#{} :: #{b_var():=ssa_register()},
             extra_annos=[] :: [{atom(),term()}]
            }).
-define(PASS(N), {N,fun N/1}).

passes(Opts) ->
    AddPrecgAnnos = proplists:get_bool(dprecg, Opts),
    FixTuples = proplists:get_bool(no_put_tuple2, Opts),
    Ps = [?PASS(assert_no_critical_edges),

          %% Preliminaries.
          ?PASS(fix_bs),
          ?PASS(sanitize),
          case FixTuples of
              false -> ignore;
              true -> ?PASS(fix_tuples)
          end,
          ?PASS(place_frames),
          ?PASS(fix_receives),

          %% Find and reserve Y registers.
          ?PASS(find_yregs),
          ?PASS(reserve_yregs),

          %% Handle legacy binary match instruction that don't
          %% accept a Y register as destination.
          ?PASS(legacy_bs),

          %% Improve reuse of Y registers to potentially
          %% reduce the size of the stack frame.
          ?PASS(copy_retval),
          ?PASS(opt_get_list),

          %% Calculate live intervals.
          ?PASS(number_instructions),
          ?PASS(live_intervals),
          ?PASS(reserve_regs),

          %% If needed for a .precg file, save the live intervals
          %% so they can be included in an annotation.
          case AddPrecgAnnos of
              false -> ignore;
              true -> ?PASS(save_live_intervals)
          end,

          %% Allocate registers.
          ?PASS(linear_scan),
          ?PASS(frame_size),
          ?PASS(turn_yregs)],
    [P || P <- Ps, P =/= ignore].

function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
         Ps, UseBSM3) ->
    try
        St0 = #st{ssa=Blocks0,args=Args,use_bsm3=UseBSM3,cnt=Count0},
        St = compile:run_sub_passes(Ps, St0),
        #st{ssa=Blocks,cnt=Count,regs=Regs,extra_annos=ExtraAnnos} = St,
        F1 = add_extra_annos(F0, ExtraAnnos),
        F = beam_ssa:add_anno(registers, Regs, F1),
        F#b_function{bs=Blocks,cnt=Count}
    catch
        Class:Error:Stack ->
            #{func_info:={_,Name,Arity}} = Anno,
            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
            erlang:raise(Class, Error, Stack)
    end.

save_live_intervals(#st{intervals=Intervals}=St) ->
    St#st{extra_annos=[{live_intervals,Intervals}]}.

%% Add extra annotations when a .precg listing file is being produced.
add_extra_annos(F, Annos) ->
    foldl(fun({Name,Value}, Acc) ->
                  beam_ssa:add_anno(Name, Value, Acc)
          end, F, Annos).

%% assert_no_critical_edges(St0) -> St.
%%  The code generator will not work if there are critial edges.
%%  Abort if any critical edges are found.

assert_no_critical_edges(#st{ssa=Blocks}=St) ->
    F = fun assert_no_ces/3,
    beam_ssa:fold_rpo(F, Blocks, Blocks),
    St.

assert_no_ces(_, #b_blk{is=[#b_set{op=phi,args=[_,_]=Phis}|_]}, Blocks) ->
    %% This block has multiple predecessors. Make sure that none
    %% of the precessors have more than one successor.
    true = all(fun({_,P}) ->
                       length(beam_ssa:successors(P, Blocks)) =:= 1
               end, Phis),                      %Assertion.
    Blocks;
assert_no_ces(_, _, Blocks) -> Blocks.

%% fix_bs(St0) -> St.
%%  Fix up the binary matching instructions:
%%
%%    * Insert bs_save and bs_restore instructions where needed.
%%
%%    * Combine bs_match and bs_extract instructions to bs_get
%%      instructions.

fix_bs(#st{ssa=Blocks,cnt=Count0,use_bsm3=UseBSM3}=St) ->
    F = fun(#b_set{op=bs_start_match,dst=Dst}, A) ->
                %% Mark the root of the match context list.
                [{Dst,{context,Dst}}|A];
           (#b_set{op=bs_match,dst=Dst,args=[_,ParentCtx|_]}, A) ->
                %% Link this match context the previous match context.
                [{Dst,ParentCtx}|A];
           (_, A) ->
                A
        end,
    case beam_ssa:fold_instrs_rpo(F, [0], [],Blocks) of
        [] ->
            %% No binary matching in this function.
            St;
        [_|_]=M ->
            CtxChain = maps:from_list(M),
            Linear0 = beam_ssa:linearize(Blocks),

            %% Insert position instructions where needed.
            {Linear1,Count} = case UseBSM3 of
                                  true ->
                                      bs_pos_bsm3(Linear0, CtxChain, Count0);
                                  false ->
                                      bs_pos_bsm2(Linear0, CtxChain, Count0)
                              end,

            %% Rename instructions.
            Linear = bs_instrs(Linear1, CtxChain, []),

            St#st{ssa=maps:from_list(Linear),cnt=Count}
    end.

%% Insert bs_get_position and bs_set_position instructions as needed.
bs_pos_bsm3(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),

    {SavePoints,Count1} = make_bs_pos_dict(S, Count0, []),
    {Gets,Count2} = make_bs_setpos_map(Rs, SavePoints, Count1, []),
    {Sets,Count} = make_bs_getpos_map(maps:to_list(Rs0), SavePoints, Count2, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm3(Linear0, Gets, Sets, SavePoints),Count}.

make_bs_setpos_map([{Ctx,Save}=Ps|T], SavePoints, Count, Acc) ->
    SavePoint = get_savepoint(Ps, SavePoints),
    I = #b_set{op=bs_get_position,dst=SavePoint,args=[Ctx]},
    make_bs_setpos_map(T, SavePoints, Count+1, [{Save,I}|Acc]);
make_bs_setpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_bs_getpos_map([{Bef,{Ctx,_}=Ps}|T], SavePoints, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    Args = [Ctx, get_savepoint(Ps, SavePoints)],
    I = #b_set{op=bs_set_position,dst=Ignored,args=Args},
    make_bs_getpos_map(T, SavePoints, Count+1, [{Bef,I}|Acc]);
make_bs_getpos_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

get_savepoint({_,_}=Ps, SavePoints) ->
    Name = {'@ssa_bs_position', maps:get(Ps, SavePoints)},
    #b_var{name=Name}.

make_bs_pos_dict([{Ctx,Pts}|T], Count0, Acc0) ->
    {Acc, Count} = make_bs_pos_dict_1(Pts, Ctx, Count0, Acc0),
    make_bs_pos_dict(T, Count, Acc);
make_bs_pos_dict([], Count, Acc) ->
    {maps:from_list(Acc), Count}.

make_bs_pos_dict_1([H|T], Ctx, I, Acc) ->
    make_bs_pos_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_bs_pos_dict_1([], Ctx, I, Acc) ->
    {[{Ctx,I}|Acc], I}.

%% As bs_position but without OTP-22 instructions. This is only used when
%% cross-compiling to older versions.
bs_pos_bsm2(Linear0, CtxChain, Count0) ->
    Rs0 = bs_restores(Linear0, CtxChain, #{}, #{}),
    Rs = maps:values(Rs0),
    S0 = sofs:relation(Rs, [{context,save_point}]),
    S1 = sofs:relation_to_family(S0),
    S = sofs:to_external(S1),
    Slots = make_save_point_dict(S, []),
    {Saves,Count1} = make_save_map(Rs, Slots, Count0, []),
    {Restores,Count} = make_restore_map(maps:to_list(Rs0), Slots, Count1, []),

    %% Now insert all saves and restores.
    {bs_insert_bsm2(Linear0, Saves, Restores, Slots),Count}.

make_save_map([{Ctx,Save}=Ps|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    case make_slot(Ps, Slots) of
        #b_literal{val=start} ->
            make_save_map(T, Slots, Count, Acc);
        Slot ->
            I = #b_set{op=bs_save,dst=Ignored,args=[Ctx,Slot]},
            make_save_map(T, Slots, Count+1, [{Save,I}|Acc])
    end;
make_save_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_restore_map([{Bef,{Ctx,_}=Ps}|T], Slots, Count, Acc) ->
    Ignored = #b_var{name={'@ssa_ignored',Count}},
    I = #b_set{op=bs_restore,dst=Ignored,args=[Ctx,make_slot(Ps, Slots)]},
    make_restore_map(T, Slots, Count+1, [{Bef,I}|Acc]);
make_restore_map([], _, Count, Acc) ->
    {maps:from_list(Acc),Count}.

make_slot({Same,Same}, _Slots) ->
    #b_literal{val=start};
make_slot({_,_}=Ps, Slots) ->
    #b_literal{val=maps:get(Ps, Slots)}.

make_save_point_dict([{Ctx,Pts}|T], Acc0) ->
    Acc = make_save_point_dict_1(Pts, Ctx, 0, Acc0),
    make_save_point_dict(T, Acc);
make_save_point_dict([], Acc) ->
    maps:from_list(Acc).

make_save_point_dict_1([Ctx|T], Ctx, I, Acc) ->
    %% Special {atom,start} save point. Does not need a
    %% bs_save instruction.
    make_save_point_dict_1(T, Ctx, I, Acc);
make_save_point_dict_1([H|T], Ctx, I, Acc) ->
    make_save_point_dict_1(T, Ctx, I+1, [{{Ctx,H},I}|Acc]);
make_save_point_dict_1([], Ctx, I, Acc) ->
    [{Ctx,I}|Acc].

bs_restores([{L,#b_blk{is=Is,last=Last}}|Bs], CtxChain, D0, Rs0) ->
    FPos = case D0 of
               #{L:=Pos0} -> Pos0;
               #{} -> #{}
           end,
    {SPos,Rs} = bs_restores_is(Is, CtxChain, FPos, Rs0),
    D = bs_update_successors(Last, SPos, FPos, D0),
    bs_restores(Bs, CtxChain, D, Rs);
bs_restores([], _, _, Rs) -> Rs.

bs_update_successors(#b_br{succ=Succ,fail=Fail}, SPos, FPos, D) ->
    join_positions([{Succ,SPos},{Fail,FPos}], D);
bs_update_successors(#b_switch{fail=Fail,list=List}, SPos, _FPos, D) ->
    Update = [{L,SPos} || {_,L} <- List] ++ [{Fail,SPos}],
    join_positions(Update, D);
bs_update_successors(#b_ret{}, _, _, D) -> D.

join_positions([{L,MapPos0}|T], D) ->
    case D of
        #{L:=MapPos0} ->
            %% Same map.
            join_positions(T, D);
        #{L:=MapPos1} ->
            %% Different maps.
            MapPos = join_positions_1(MapPos0, MapPos1),
            join_positions(T, D#{L:=MapPos});
        #{} ->
            join_positions(T, D#{L=>MapPos0})
    end;
join_positions([], D) -> D.

join_positions_1(MapPos0, MapPos1) ->
    MapPos2 = maps:map(fun(Start, Pos) ->
                               case MapPos0 of
                                   #{Start:=Pos} -> Pos;
                                   #{Start:=_} -> unknown;
                                   #{} -> Pos
                               end
                       end, MapPos1),
    maps:merge(MapPos0, MapPos2).

bs_restores_is([#b_set{op=bs_start_match,dst=Start}|Is],
               CtxChain, PosMap0, Rs) ->
    PosMap = PosMap0#{Start=>Start},
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=bs_match,dst=NewPos,args=Args}=I|Is],
               CtxChain, PosMap0, Rs0) ->
    Start = bs_subst_ctx(NewPos, CtxChain),
    [_,FromPos|_] = Args,
    case PosMap0 of
        #{Start:=FromPos} ->
            %% Same position, no restore needed.
            PosMap = case bs_match_type(I) of
                         plain ->
                             %% Update position to new position.
                             PosMap0#{Start:=NewPos};
                         _ ->
                             %% Position will not change (test_unit
                             %% instruction or no instruction at
                             %% all).
                             PosMap0#{Start:=FromPos}
                     end,
            bs_restores_is(Is, CtxChain, PosMap, Rs0);
        #{Start:=_} ->
            %% Different positions, might need a restore instruction.
            case bs_match_type(I) of
                none ->
                    %% The tail test will be optimized away.
                    %% No need to do a restore.
                    PosMap = PosMap0#{Start:=FromPos},
                    bs_restores_is(Is, CtxChain, PosMap, Rs0);
                test_unit ->
                    %% This match instruction will be replaced by
                    %% a test_unit instruction. We will need a
                    %% restore. The new position will be the position
                    %% restored to (NOT NewPos).
                    PosMap = PosMap0#{Start:=FromPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs);
                plain ->
                    %% Match or skip. Position will be changed.
                    PosMap = PosMap0#{Start:=NewPos},
                    Rs = Rs0#{NewPos=>{Start,FromPos}},
                    bs_restores_is(Is, CtxChain, PosMap, Rs)
            end
    end;
bs_restores_is([#b_set{op=bs_extract,args=[FromPos|_]}|Is],
               CtxChain, PosMap, Rs) ->
    Start = bs_subst_ctx(FromPos, CtxChain),
    #{Start:=FromPos} = PosMap,                 %Assertion.
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=call,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0) ->
    {Rs,PosMap1} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    PosMap = bs_invalidate_pos(Args, PosMap1, CtxChain),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=landingpad}|Is], CtxChain, PosMap0, Rs) ->
    %% We can land here from any point, so all positions are invalid.
    PosMap = maps:map(fun(_Start,_Pos) -> unknown end, PosMap0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([#b_set{op=Op,dst=Dst,args=Args}|Is],
               CtxChain, PosMap0, Rs0)
  when Op =:= bs_test_tail;
       Op =:= bs_get_tail ->
    {Rs,PosMap} = bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0),
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([_|Is], CtxChain, PosMap, Rs) ->
    bs_restores_is(Is, CtxChain, PosMap, Rs);
bs_restores_is([], _CtxChain, PosMap, Rs) ->
    {PosMap,Rs}.

bs_match_type(#b_set{args=[#b_literal{val=skip},_Ctx,
                             #b_literal{val=binary},_Flags,
                             #b_literal{val=all},#b_literal{val=U}]}) ->
    case U of
        1 -> none;
        _ -> test_unit
    end;
bs_match_type(_) ->
    plain.

%% Call instructions leave the match position in an undefined state,
%% requiring us to invalidate each affected argument.
bs_invalidate_pos([#b_var{}=Arg|Args], PosMap0, CtxChain) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=_} ->
            PosMap = PosMap0#{Start:=unknown},
            bs_invalidate_pos(Args, PosMap, CtxChain);
        #{} ->
            %% Not a match context.
            bs_invalidate_pos(Args, PosMap0, CtxChain)
    end;
bs_invalidate_pos([_|Args], PosMap, CtxChain) ->
    bs_invalidate_pos(Args, PosMap, CtxChain);
bs_invalidate_pos([], PosMap, _CtxChain) ->
    PosMap.

bs_restore_args([#b_var{}=Arg|Args], PosMap0, CtxChain, Dst, Rs0) ->
    Start = bs_subst_ctx(Arg, CtxChain),
    case PosMap0 of
        #{Start:=Arg} ->
            %% Same position, no restore needed.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0);
        #{Start:=_} ->
            %% Different positions, need a restore instruction.
            PosMap = PosMap0#{Start:=Arg},
            Rs = Rs0#{Dst=>{Start,Arg}},
            bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
        #{} ->
            %% Not a match context.
            bs_restore_args(Args, PosMap0, CtxChain, Dst, Rs0)
    end;
bs_restore_args([_|Args], PosMap, CtxChain, Dst, Rs) ->
    bs_restore_args(Args, PosMap, CtxChain, Dst, Rs);
bs_restore_args([], PosMap, _CtxChain, _Dst, Rs) ->
    {Rs,PosMap}.

%% Insert all bs_save and bs_restore instructions.

bs_insert_bsm3(Blocks, Saves, Restores, SavePoints) ->
    bs_insert_1(Blocks, Saves, Restores, SavePoints, fun(I) -> I end).

bs_insert_bsm2(Blocks, Saves, Restores, SavePoints) ->
    %% The old instructions require bs_start_match to be annotated with the
    %% number of position slots it needs.
    bs_insert_1(Blocks, Saves, Restores, SavePoints,
                fun(#b_set{op=bs_start_match,dst=Dst}=I0) ->
                        NumSlots = case SavePoints of
                                       #{Dst:=NumSlots0} -> NumSlots0;
                                       #{} -> 0
                                   end,
                        beam_ssa:add_anno(num_slots, NumSlots, I0);
                   (I) ->
                        I
                end).

bs_insert_1([{L,#b_blk{is=Is0}=Blk}|Bs0], Saves, Restores, Slots, XFrm) ->
    Is = bs_insert_is_1(Is0, Restores, Slots, XFrm),
    Bs = bs_insert_saves(Is, Bs0, Saves),
    [{L,Blk#b_blk{is=Is}}|bs_insert_1(Bs, Saves, Restores, Slots, XFrm)];
bs_insert_1([], _, _, _, _) -> [].

bs_insert_is_1([#b_set{op=Op,dst=Dst}=I0|Is], Restores, SavePoints, XFrm) ->
    I = XFrm(I0),
    if
        Op =:= bs_test_tail;
        Op =:= bs_get_tail;
        Op =:= bs_match;
        Op =:= call ->
            Rs = case Restores of
                     #{Dst:=R} -> [R];
                     #{} -> []
                 end,
            Rs ++ [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)];
        true ->
            [I|bs_insert_is_1(Is, Restores, SavePoints, XFrm)]
    end;
bs_insert_is_1([], _, _, _) -> [].

bs_insert_saves([#b_set{dst=Dst}|Is], Bs, Saves) ->
    case Saves of
        #{Dst:=S} ->
            bs_insert_save(S, Bs);
        #{} ->
            bs_insert_saves(Is, Bs, Saves)
    end;
bs_insert_saves([], Bs, _) -> Bs.

bs_insert_save(Save, [{L,#b_blk{is=Is0}=Blk}|Bs]) ->
    Is = case Is0 of
             [#b_set{op=bs_extract}=Ex|Is1] ->
                 [Ex,Save|Is1];
             _ ->
                 [Save|Is0]
         end,
    [{L,Blk#b_blk{is=Is}}|Bs].

%% Translate bs_match instructions to bs_get, bs_match_string,
%% or bs_skip. Also rename match context variables to use the
%% variable assigned to by the start_match instruction.

bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
    case bs_instrs_is(Is0, CtxChain, []) of
        [#b_set{op=bs_extract,dst=Dst,args=[Ctx]}|Is] ->
            %% Drop this instruction. Rewrite the corresponding
            %% bs_match instruction in the previous block to
            %% a bs_get instruction.
            Acc = bs_combine(Dst, Ctx, Acc0),
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc]);
        Is ->
            bs_instrs(Bs, CtxChain, [{L,Blk#b_blk{is=Is}}|Acc0])
    end;
bs_instrs([], _, Acc) ->
    reverse(Acc).

bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
    Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
    I1 = I0#b_set{args=Args},
    I = case {Op,Args} of
            {bs_match,[#b_literal{val=skip},Ctx,Type|As]} ->
                I1#b_set{op=bs_skip,args=[Type,Ctx|As]};
            {bs_match,[#b_literal{val=string},Ctx|As]} ->
                I1#b_set{op=bs_match_string,args=[Ctx|As]};
            {bs_get_tail,[Ctx|As]} ->
                I1#b_set{op=bs_get_tail,args=[Ctx|As]};
            {_,_} ->
                I1
        end,
    bs_instrs_is(Is, CtxChain, [I|Acc]);
bs_instrs_is([], _, Acc) ->
    reverse(Acc).

%% Combine a bs_match instruction with the destination register
%% taken from a bs_extract instruction.

bs_combine(Dst, Ctx, [{L,#b_blk{is=Is0}=Blk}|Acc]) ->
    [#b_set{}=Succeeded,
     #b_set{op=bs_match,args=[Type,_|As]}=BsMatch|Is1] = reverse(Is0),
    Is = reverse(Is1, [BsMatch#b_set{op=bs_get,dst=Dst,args=[Type,Ctx|As]},
                       Succeeded#b_set{args=[Dst]}]),
    [{L,Blk#b_blk{is=Is}}|Acc].

bs_subst_ctx(#b_var{}=Var, CtxChain) ->
    case CtxChain of
        #{Var:={context,Ctx}} ->
            Ctx;
        #{Var:=ParentCtx} ->
            bs_subst_ctx(ParentCtx, CtxChain);
        #{} ->
            %% Not a match context variable.
            Var
    end;
bs_subst_ctx(Other, _CtxChain) ->
    Other.

%% legacy_bs(St0) -> St.
%%  Binary matching instructions in OTP 21 and earlier don't support
%%  a Y register as destination. If St#st.use_bsm3 is false,
%%  we will need to rewrite those instructions so that the result
%%  is first put in an X register and then moved to a Y register
%%  if the operation succeeded.

legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
    Linear0 = beam_ssa:linearize(Blocks0),
    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
    Blocks = maps:from_list(Linear),
    St#st{ssa=Blocks,cnt=Count};
legacy_bs(#st{use_bsm3=true}=St) -> St.

legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
    #b_blk{is=Is0,last=Last} = Blk,
    Is1 = case Copies0 of
              #{L:=Copy} -> [Copy|Is0];
              #{} -> Is0
          end,
    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
    {Acc,Count}.

legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
             Last, IsYreg, Count0, Copies0, Acc) ->
    NeedsFix = is_map_key(Dst, IsYreg) andalso
        case Op of
            bs_get -> true;
            bs_init -> true;
            _ -> false
        end,
    case NeedsFix of
        true ->
            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
            Count = Count0 + 1,
            I = I0#b_set{dst=TempDst},
            SuccI = SuccI0#b_set{args=[TempDst]},
            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
            #b_br{bool=SuccDst,succ=SuccL} = Last,
            Copies = Copies0#{SuccL=>Copy},
            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
        false ->
            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
    end;
legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
    {reverse(Acc),Count,Copies}.

%% sanitize(St0) -> St.
%%  Remove constructs that can cause problems later:
%%
%%  * Unreachable blocks may cause problems for determination of
%%  dominators.
%%
%%  * Some instructions (such as get_hd) don't accept literal
%%  arguments. Evaluate the instructions and remove them.

sanitize(#st{ssa=Blocks0,cnt=Count0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    {Blocks,Count} = sanitize(Ls, Count0, Blocks0, #{}),
    St#st{ssa=Blocks,cnt=Count}.

sanitize([L|Ls], Count0, Blocks0, Values0) ->
    #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks0),
    case sanitize_is(Is0, Count0, Values0, false, []) of
        no_change ->
            sanitize(Ls, Count0, Blocks0, Values0);
        {Is,Count,Values} ->
            Blk = Blk0#b_blk{is=Is},
            Blocks = Blocks0#{L:=Blk},
            sanitize(Ls, Count, Blocks, Values)
    end;
sanitize([], Count, Blocks0, Values) ->
    Blocks = if
                 map_size(Values) =:= 0 ->
                     Blocks0;
                 true ->
                     beam_ssa:rename_vars(Values, [0], Blocks0)
             end,

    %% Unreachable blocks can cause problems for the dominator calculations.
    Ls = beam_ssa:rpo(Blocks),
    Reachable = gb_sets:from_list(Ls),
    {case map_size(Blocks) =:= gb_sets:size(Reachable) of
         true -> Blocks;
         false -> remove_unreachable(Ls, Blocks, Reachable, [])
     end,Count}.

sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
            Count0, Values, Changed, Acc) ->
    case sanitize_args(Args0, Values) of
        [#b_literal{}=Map,Key] ->
            %% Bind the literal map to a variable.
            {MapVar,Count} = new_var('@ssa_map', Count0),
            I = I0#b_set{args=[MapVar,Key]},
            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
            sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
        [_,_]=Args0 ->
            sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
        [_,_]=Args ->
            I = I0#b_set{args=Args},
            sanitize_is(Is, Count0, Values, Changed, [I|Acc])
    end;
sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
            Count, Values, Changed0, Acc) ->
    Args = sanitize_args(Args0, Values),
    case sanitize_instr(Op, Args, I0) of
        {value,Value0} ->
            Value = #b_literal{val=Value0},
            sanitize_is(Is0, Count, Values#{Dst=>Value}, true, Acc);
        {ok,I} ->
            sanitize_is(Is0, Count, Values, true, [I|Acc]);
        ok ->
            I = I0#b_set{args=Args},
            Changed = Changed0 orelse Args =/= Args0,
            sanitize_is(Is0, Count, Values, Changed, [I|Acc])
    end;
sanitize_is([], Count, Values, Changed, Acc) ->
    case Changed of
        true ->
            {reverse(Acc),Count,Values};
        false ->
            no_change
    end.

sanitize_args(Args, Values) ->
    map(fun(Var) ->
                case Values of
                    #{Var:=New} -> New;
                    #{} -> Var
                end
        end, Args).

sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
    case erl_bifs:is_pure(erlang, Bif, 1) of
        false ->
            ok;
        true ->
            try
                {value,erlang:Bif(Lit)}
            catch
                error:_ ->
                    ok
            end
    end;
sanitize_instr({bif,Bif}, [#b_literal{val=Lit1},#b_literal{val=Lit2}], _I) ->
    true = erl_bifs:is_pure(erlang, Bif, 2),    %Assertion.
    try
        {value,erlang:Bif(Lit1, Lit2)}
    catch
        error:_ ->
            ok
    end;
sanitize_instr(get_hd, [#b_literal{val=[Hd|_]}], _I) ->
    {value,Hd};
sanitize_instr(get_tl, [#b_literal{val=[_|Tl]}], _I) ->
    {value,Tl};
sanitize_instr(get_tuple_element, [#b_literal{val=T},
                                   #b_literal{val=I}], _I)
  when I < tuple_size(T) ->
    {value,element(I+1, T)};
sanitize_instr(is_nonempty_list, [#b_literal{val=Lit}], _I) ->
    {value,case Lit of
               [_|_] -> true;
               _ -> false
           end};
sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
                                 #b_literal{val=Arity},
                                 #b_literal{val=Tag}], _I)
  when is_integer(Arity), is_atom(Tag) ->
    if
        tuple_size(Tuple) =:= Arity, element(1, Tuple) =:= Tag ->
            {value,true};
        true ->
            {value,false}
    end;
sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(bs_init, [#b_literal{val=append},_,#b_literal{val=Sz}|_], I0) ->
    if
        is_integer(Sz), Sz >= 0 -> ok;
        true -> {ok,sanitize_badarg(I0)}
    end;
sanitize_instr(succeeded, [#b_literal{}], _I) ->
    {value,true};
sanitize_instr(_, _, _) -> ok.

sanitize_badarg(I) ->
    Func = #b_remote{mod=#b_literal{val=erlang},
                     name=#b_literal{val=error},arity=1},
    I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.

remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
    #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks),
    case split_phis(Is0) of
        {[_|_]=Phis,Rest} ->
            Is = [prune_phi(Phi, Reachable) || Phi <- Phis] ++ Rest,
            Blk = Blk0#b_blk{is=Is},
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk}|Acc]);
        {[],_} ->
            remove_unreachable(Ls, Blocks, Reachable, [{L,Blk0}|Acc])
    end;
remove_unreachable([], _Blocks, _, Acc) ->
    maps:from_list(Acc).

prune_phi(#b_set{args=Args0}=Phi, Reachable) ->
    Args = [A || {_,Pred}=A <- Args0,
                 gb_sets:is_element(Pred, Reachable)],
    Phi#b_set{args=Args}.

%%%
%%% Fix tuples.
%%%

%% fix_tuples(St0) -> St.
%%  If compatibility with a previous version of Erlang has been
%%  requested, tuple creation must be split into two instruction to
%%  mirror the the way tuples are created in BEAM prior to OTP 22.
%%  Each put_tuple instruction is split into put_tuple_arity followed
%%  by put_tuple_elements.

fix_tuples(#st{ssa=Blocks0,cnt=Count0}=St) ->
    F = fun (#b_set{op=put_tuple,args=Args}=Put, C0) ->
                Arity = #b_literal{val=length(Args)},
                {Ignore,C} = new_var('@ssa_ignore', C0),
                {[Put#b_set{op=put_tuple_arity,args=[Arity]},
                  #b_set{dst=Ignore,op=put_tuple_elements,args=Args}],C};
           (I, C) -> {[I],C}
        end,
    {Blocks,Count} = beam_ssa:flatmapfold_instrs_rpo(F, [0], Count0, Blocks0),
    St#st{ssa=Blocks,cnt=Count}.

%%%
%%% Find out where frames should be placed.
%%%

%% place_frames(St0) -> St.
%%   Return a list of the labels for the blocks that need stack frame
%%   allocation instructions.
%%
%%   This function attempts to place stack frames as tight as possible
%%   around the code, to avoid building stack frames for code paths
%%   that don't need one.
%%
%%   Stack frames are placed in blocks that dominate all of their
%%   descendants. That guarantees that the deallocation instructions
%%   cannot be reached from other execution paths that didn't set up
%%   a stack frame or set up a stack frame with a different size.

place_frames(#st{ssa=Blocks}=St) ->
    Doms = beam_ssa:dominators(Blocks),
    Ls = beam_ssa:rpo(Blocks),
    Tried = gb_sets:empty(),
    Frames0 = [],
    {Frames,_} = place_frames_1(Ls, Blocks, Doms, Tried, Frames0),
    St#st{frames=Frames}.

place_frames_1([L|Ls], Blocks, Doms, Tried0, Frames0) ->
    Blk = maps:get(L, Blocks),
    case need_frame(Blk) of
        true ->
            %% This block needs a frame. Try to place it here.
            {Frames,Tried} = do_place_frame(L, Blocks, Doms, Tried0, Frames0),

            %% Successfully placed. Try to place more frames in descendants
            %% that are not dominated by this block.
            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
        false ->
            try
                place_frames_1(Ls, Blocks, Doms, Tried0, Frames0)
            catch
                throw:{need_frame,For,Tried1}=Reason ->
                    %% An descendant block needs a stack frame. Try to
                    %% place it here.
                    case is_dominated_by(For, L, Doms) of
                        true ->
                            %% Try to place a frame here.
                            {Frames,Tried} = do_place_frame(L, Blocks, Doms,
                                                            Tried1, Frames0),
                            place_frames_1(Ls, Blocks, Doms, Tried, Frames);
                        false ->
                            %% Wrong place. This block does not dominate
                            %% the block that needs the frame. Pass it on
                            %% to our ancestors.
                            throw(Reason)
                    end
            end
    end;
place_frames_1([], _, _, Tried, Frames) ->
    {Frames,Tried}.

%% do_place_frame(Label, Blocks, Dominators, Tried0, Frames0) -> {Frames,Tried}.
%%  Try to place a frame in this block. This function returns
%%  successfully if it either succeds at placing a frame in this
%%  block, if an ancestor that dominates this block has already placed
%%  a frame, or if we have already tried to put a frame in this block.
%%
%%  An {need_frame,Label,Tried} exception will be thrown if this block
%%  block is not suitable for having a stack frame (i.e. it does not dominate
%%  all of its descendants). The exception means that an ancestor will have to
%%  place the frame needed by this block.

do_place_frame(L, Blocks, Doms, Tried0, Frames) ->
    case gb_sets:is_element(L, Tried0) of
        true ->
            %% We have already tried to put a frame in this block.
            {Frames,Tried0};
        false ->
            %% Try to place a frame in this block.
            Tried = gb_sets:insert(L, Tried0),
            case place_frame_here(L, Blocks, Doms, Frames) of
                yes ->
                    %% We need a frame and it is safe to place it here.
                    {[L|Frames],Tried};
                no ->
                    %% An ancestor has a frame. Not needed.
                    {Frames,Tried};
                ancestor ->
                    %% This block does not dominate all of its
                    %% descendants. We must place the frame in
                    %% an ancestor.
                    throw({need_frame,L,Tried})
            end
    end.

%% place_frame_here(Label, Blocks, Doms, Frames) -> no|yes|ancestor.
%%  Determine whether a frame should be placed in block Label.

place_frame_here(L, Blocks, Doms, Frames) ->
    B0 = any(fun(DomBy) ->
                     is_dominated_by(L, DomBy, Doms)
             end, Frames),
    case B0 of
        true ->
            %% This block is dominated by an ancestor block that
            %% defines a frame. Not needed/allowed to put a frame
            %% here.
            no;
        false ->
            %% No frame in any ancestor. We need a frame.
            %% Now check whether the frame can be placed here.
            %% If this block dominates all of its descendants
            %% and the predecessors of any phi nodes it can be
            %% placed here.
            Descendants = beam_ssa:rpo([L], Blocks),
            PhiPredecessors = phi_predecessors(L, Blocks),
            MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
            Dominates = all(fun(?BADARG_BLOCK) ->
                                    %% This block defines no variables and calls
                                    %% erlang:error(badarg). It does not matter
                                    %% whether L dominates ?BADARG_BLOCK or not;
                                    %% it is still safe to put the frame in L.
                                    true;
                               (Bl) ->
                                    is_dominated_by(Bl, L, Doms)
                            end, MustDominate),

            %% Also, this block must not be a loop header.
            IsLoopHeader = is_loop_header(L, Blocks),
            case Dominates andalso not IsLoopHeader of
                true -> yes;
                false -> ancestor
            end
    end.

%% phi_predecessors(Label, Blocks) ->
%%  Return all predecessors referenced in phi nodes.

phi_predecessors(L, Blocks) ->
    #b_blk{is=Is} = maps:get(L, Blocks),
    [P || #b_set{op=phi,args=Args} <- Is, {_,P} <- Args].

%% is_dominated_by(Label, DominatedBy, Dominators) -> true|false.
%%  Test whether block Label is dominated by block DominatedBy.

is_dominated_by(L, DomBy, Doms) ->
    DominatedBy = maps:get(L, Doms),
    ordsets:is_element(DomBy, DominatedBy).

%% need_frame(#b_blk{}) -> true|false.
%%  Test whether any of the instructions in the block requires a stack frame.

need_frame(#b_blk{is=Is,last=#b_ret{arg=Ret}}) ->
    need_frame_1(Is, {return,Ret});
need_frame(#b_blk{is=Is}) ->
    need_frame_1(Is, body).

need_frame_1([#b_set{op=make_fun,dst=Fun}|Is], {return,_}=Context) ->
    %% Since make_fun clobbers X registers, a stack frame is needed if
    %% any of the following instructions use any other variable than
    %% the one holding the reference to the created fun.
    need_frame_1(Is, Context) orelse
        case beam_ssa:used(#b_blk{is=Is,last=#b_ret{arg=Fun}}) of
            [Fun] -> false;
            [_|_] -> true
        end;
need_frame_1([#b_set{op=new_try_tag}|_], _) ->
    true;
need_frame_1([#b_set{op=call,dst=Val}]=Is, {return,Ret}) ->
    if
        Val =:= Ret -> need_frame_1(Is, tail);
        true -> need_frame_1(Is, body)
    end;
need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
    case Func of
        #b_remote{mod=#b_literal{val=Mod},
                  name=#b_literal{val=Name},
                  arity=Arity} ->
            case erl_bifs:is_exit_bif(Mod, Name, Arity) of
                true ->
                    false;
                false ->
                    Context =:= body orelse
                        Is =/= [] orelse
                        is_trap_bif(Mod, Name, Arity)
                end;
        #b_remote{} ->
            %% This is an apply(), which always needs a frame.
            true;
        #b_local{} ->
            Context =:= body orelse Is =/= [];
        _ ->
             %% A fun call always needs a frame.
            true
    end;
need_frame_1([I|Is], Context) ->
    beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
need_frame_1([], _) -> false.

%% is_trap_bif(Mod, Name, Arity) -> true|false.
%%   Test whether we need a stack frame for this BIF.

is_trap_bif(erlang, '!', 2) -> true;
is_trap_bif(erlang, link, 1) -> true;
is_trap_bif(erlang, unlink, 1) -> true;
is_trap_bif(erlang, monitor_node, 2) -> true;
is_trap_bif(erlang, group_leader, 2) -> true;
is_trap_bif(erlang, exit, 2) -> true;
is_trap_bif(_, _, _) -> false.

%%%
%%% Fix variables used in matching in receive.
%%%
%%% The loop_rec/2 instruction may return a reference to a
%%% message outside of any heap or heap fragment. If the message
%%% does not match, it is not allowed to store any reference to
%%% the message (or part of the message) on the stack. If we do,
%%% the message will be corrupted if there happens to be a GC.
%%%
%%% Here we make sure to introduce copies of variables that are
%%% matched out and subsequently used after the remove_message/0
%%% instructions. That will make sure that only X registers are
%%% used during matching.
%%%
%%% Depending on where variables are defined and used, they must
%%% be handled in two different ways.
%%%
%%% Variables that are always defined in the receive (before branching
%%% out into the different clauses of the receive) and used after the
%%% receive must be handled in the following way: Before each
%%% remove_message instruction, each such variable must be copied, and
%%% all variables must be consolidated using a phi node in the
%%% common exit block for the receive.
%%%
%%% Variables that are matched out and used in the same clause
%%% need copy instructions before the remove_message instruction
%%% in that clause.
%%%

fix_receives(#st{ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = fix_receives_1(maps:to_list(Blocks0),
                                    Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
    case Blk of
        #b_blk{is=[#b_set{op=peek_message}|_]} ->
            Rm = find_rm_blocks(L, Blocks0),
            LoopExit = find_loop_exit(Rm, Blocks0),
            Defs0 = beam_ssa:def([L], Blocks0),
            CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
                                               Blocks0, Count0),
            Defs = ordsets:subtract(Defs0, CommonUsed),
            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
            fix_receives_1(Ls, Blocks, Count);
        #b_blk{} ->
            fix_receives_1(Ls, Blocks0, Count0)
    end;
fix_receives_1([], Blocks, Count) ->
    {Blocks,Count}.

recv_common(_Defs, none, _Blocks) ->
    %% There is no common exit block because receive is used
    %% in the tail position of a function.
    [];
recv_common(Defs, Exit, Blocks) ->
    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
    Def = ordsets:subtract(Defs, ExitDefs),
    ordsets:intersection(Def, ExitUsed).

%% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
%%                 Blocks0, Count0) -> {Blocks,Count}.
%%  Handle variables alwys defined in a receive and used
%%  in the exit block following the receive.

recv_fix_common([Msg0|T], Exit, Rm, Blocks0, Count0) ->
    {Msg,Count1} = new_var('@recv', Count0),
    Blocks1 = beam_ssa:rename_vars(#{Msg0=>Msg}, [Exit], Blocks0),
    N = length(Rm),
    {MsgVars,Count} = new_vars(duplicate(N, '@recv'), Count1),
    PhiArgs = fix_exit_phi_args(MsgVars, Rm, Exit, Blocks1),
    Phi = #b_set{op=phi,dst=Msg,args=PhiArgs},
    ExitBlk0 = maps:get(Exit, Blocks1),
    ExitBlk = ExitBlk0#b_blk{is=[Phi|ExitBlk0#b_blk.is]},
    Blocks2 = Blocks1#{Exit:=ExitBlk},
    Blocks = recv_fix_common_1(MsgVars, Rm, Msg0, Blocks2),
    recv_fix_common(T, Exit, Rm, Blocks, Count);
recv_fix_common([], _, _, Blocks, Count) ->
    {Blocks,Count}.

recv_fix_common_1([V|Vs], [Rm|Rms], Msg, Blocks0) ->
    Ren = #{Msg=>V},
    Blocks1 = beam_ssa:rename_vars(Ren, [Rm], Blocks0),
    #b_blk{is=Is0} = Blk0 = maps:get(Rm, Blocks1),
    Copy = #b_set{op=copy,dst=V,args=[Msg]},
    Is = insert_after_phis(Is0, [Copy]),
    Blk = Blk0#b_blk{is=Is},
    Blocks = Blocks1#{Rm:=Blk},
    recv_fix_common_1(Vs, Rms, Msg, Blocks);
recv_fix_common_1([], [], _Msg, Blocks) -> Blocks.

fix_exit_phi_args([V|Vs], [Rm|Rms], Exit, Blocks) ->
    Path = beam_ssa:rpo([Rm], Blocks),
    Preds = exit_predecessors(Path, Exit, Blocks),
    [{V,Pred} || Pred <- Preds] ++ fix_exit_phi_args(Vs, Rms, Exit, Blocks);
fix_exit_phi_args([], [], _, _) -> [].

exit_predecessors([L|Ls], Exit, Blocks) ->
    Blk = map_get(L, Blocks),
    case member(Exit, beam_ssa:successors(Blk)) of
        true ->
            [L|exit_predecessors(Ls, Exit, Blocks)];
        false ->
            exit_predecessors(Ls, Exit, Blocks)
    end;
exit_predecessors([], _Exit, _Blocks) -> [].

%% fix_receive([Label], Defs, Blocks0, Count0) -> {Blocks,Count}.
%%  Add a copy instruction for all variables that are matched out and
%%  later used within a clause of the receive.

fix_receive([L|Ls], Defs, Blocks0, Count0) ->
    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
    Def = ordsets:subtract(Defs, RmDefs),
    Used = ordsets:intersection(Def, Used0),
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
    Ren = zip(Used, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk1 = maps:get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk1#b_blk{is=Is},
    Blocks = maps:put(L, Blk, Blocks1),
    fix_receive(Ls, Defs, Blocks, Count);
fix_receive([], _Defs, Blocks, Count) ->
    {Blocks,Count}.

%% find_loop_exit([Label], Blocks) -> Label | none.
%%  Find the block to which control is transferred when the
%%  the receive loop is exited.

find_loop_exit([L1,L2|_Ls], Blocks) ->
    Path1 = beam_ssa:rpo([L1], Blocks),
    Path2 = beam_ssa:rpo([L2], Blocks),
    find_loop_exit_1(reverse(Path1), reverse(Path2), none);
find_loop_exit(_, _) -> none.

find_loop_exit_1([H|T1], [H|T2], _) ->
    find_loop_exit_1(T1, T2, H);
find_loop_exit_1(_, _, Exit) -> Exit.

%% find_rm_blocks(StartLabel, Blocks) -> [Label].
%%  Find all blocks that start with remove_message within the receive
%%  loop whose peek_message label is StartLabel.

find_rm_blocks(L, Blocks) ->
    Seen = gb_sets:singleton(L),
    Blk = maps:get(L, Blocks),
    Succ = beam_ssa:successors(Blk),
    find_rm_blocks_1(Succ, Seen, Blocks).

find_rm_blocks_1([L|Ls], Seen0, Blocks) ->
    case gb_sets:is_member(L, Seen0) of
        true ->
            find_rm_blocks_1(Ls, Seen0, Blocks);
        false ->
            Seen = gb_sets:insert(L, Seen0),
            Blk = maps:get(L, Blocks),
            case find_rm_act(Blk#b_blk.is) of
                prune ->
                    %% Looping back. Don't look at any successors.
                    find_rm_blocks_1(Ls, Seen, Blocks);
                continue ->
                    %% Neutral block. Do nothing here, but look at
                    %% all successors.
                    Succ = beam_ssa:successors(Blk),
                    find_rm_blocks_1(Succ++Ls, Seen, Blocks);
                found ->
                    %% Found remove_message instruction.
                    [L|find_rm_blocks_1(Ls, Seen, Blocks)]
            end
    end;
find_rm_blocks_1([], _, _) -> [].

find_rm_act([#b_set{op=Op}|Is]) ->
    case Op of
        remove_message -> found;
        peek_message -> prune;
        recv_next -> prune;
        wait_timeout -> prune;
        wait -> prune;
        _ -> find_rm_act(Is)
    end;
find_rm_act([]) ->
    continue.

%%%
%%% Find out which variables need to be stored in Y registers.
%%%

-record(dk, {d :: ordsets:ordset(var_name()),
             k :: ordsets:ordset(var_name())
            }).

%% find_yregs(St0) -> St.
%%  Find all variables that must be stored in Y registers. Annotate
%%  the blocks that allocate frames with the set of Y registers
%%  used within that stack frame.
%%
%%  Basically, we following all execution paths starting from a block
%%  that allocates a frame, keeping track of of all defined registers
%%  and all registers killed by an instruction that clobbers X
%%  registers. For every use of a variable, we check if if it is in
%%  the set of killed variables; if it is, it must be stored in an Y
%%  register.

find_yregs(#st{frames=[]}=St) ->
    St;
find_yregs(#st{frames=[_|_]=Frames,args=Args,ssa=Blocks0}=St) ->
    FrameDefs = find_defs(Frames, Blocks0, [V || #b_var{}=V <- Args]),
    Blocks = find_yregs_1(FrameDefs, Blocks0),
    St#st{ssa=Blocks}.

find_yregs_1([{F,Defs}|Fs], Blocks0) ->
    DK = #dk{d=Defs,k=[]},
    D0 = #{F=>DK},
    Ls = beam_ssa:rpo([F], Blocks0),
    Yregs0 = [],
    Yregs = find_yregs_2(Ls, Blocks0, D0, Yregs0),
    Blk0 = maps:get(F, Blocks0),
    Blk = beam_ssa:add_anno(yregs, Yregs, Blk0),
    Blocks = Blocks0#{F:=Blk},
    find_yregs_1(Fs, Blocks);
find_yregs_1([], Blocks) -> Blocks.

find_yregs_2([L|Ls], Blocks0, D0, Yregs0) ->
    Blk0 = maps:get(L, Blocks0),
    #b_blk{is=Is,last=Last} = Blk0,
    Ys0 = maps:get(L, D0),
    {Yregs1,Ys} = find_yregs_is(Is, Ys0, Yregs0),
    Yregs = find_yregs_terminator(Last, Ys, Yregs1),
    Successors = beam_ssa:successors(Blk0),
    D = find_update_succ(Successors, Ys, D0),
    find_yregs_2(Ls, Blocks0, D, Yregs);
find_yregs_2([], _Blocks, _D, Yregs) -> Yregs.

find_defs(Frames, Blocks, Defs) ->
    Seen = gb_sets:empty(),
    FramesSet = gb_sets:from_list(Frames),
    {FrameDefs,_} = find_defs_1([0], Blocks, FramesSet, Seen, Defs, []),
    FrameDefs.

find_defs_1([L|Ls], Blocks, Frames, Seen0, Defs0, Acc0) ->
    case gb_sets:is_member(L, Frames) of
        true ->
            OrderedDefs = ordsets:from_list(Defs0),
            find_defs_1(Ls, Blocks, Frames, Seen0, Defs0,
                        [{L,OrderedDefs}|Acc0]);
        false ->
            case gb_sets:is_member(L, Seen0) of
                true ->
                    find_defs_1(Ls, Blocks, Frames, Seen0, Defs0, Acc0);
                false ->
                    Seen1 = gb_sets:insert(L, Seen0),
                    {Acc,Seen} = find_defs_1(Ls, Blocks, Frames, Seen1, Defs0, Acc0),
                    #b_blk{is=Is} = Blk = maps:get(L, Blocks),
                    Defs = find_defs_is(Is, Defs0),
                    Successors = beam_ssa:successors(Blk),
                    find_defs_1(Successors, Blocks, Frames, Seen, Defs, Acc)
            end
    end;
find_defs_1([], _, _, Seen, _, Acc) ->
    {Acc,Seen}.

find_defs_is([#b_set{dst=Dst}|Is], Acc) ->
    find_defs_is(Is, [Dst|Acc]);
find_defs_is([], Acc) -> Acc.

find_update_succ([S|Ss], #dk{d=Defs0,k=Killed0}=DK0, D0) ->
    case D0 of
        #{S:=#dk{d=Defs1,k=Killed1}} ->
            Defs = ordsets:intersection(Defs0, Defs1),
            Killed = ordsets:union(Killed0, Killed1),
            DK = #dk{d=Defs,k=Killed},
            D = maps:put(S, DK, D0),
            find_update_succ(Ss, DK0, D);
        #{} ->
            D = maps:put(S, DK0, D0),
            find_update_succ(Ss, DK0, D)
    end;
find_update_succ([], _, D) -> D.

find_yregs_is([#b_set{dst=Dst}=I|Is], #dk{d=Defs0,k=Killed0}=Ys, Yregs0) ->
    Used = beam_ssa:used(I),
    Yregs1 = ordsets:intersection(Used, Killed0),
    Yregs = ordsets:union(Yregs0, Yregs1),
    case beam_ssa:clobbers_xregs(I) of
        false ->
            Defs = ordsets:add_element(Dst, Defs0),
            find_yregs_is(Is, Ys#dk{d=Defs}, Yregs);
        true ->
            Killed = ordsets:union(Defs0, Killed0),
            Defs = [Dst],
            find_yregs_is(Is, Ys#dk{d=Defs,k=Killed}, Yregs)
    end;
find_yregs_is([], Ys, Yregs) -> {Yregs,Ys}.

find_yregs_terminator(Terminator, #dk{k=Killed}, Yregs0) ->
    Used = beam_ssa:used(Terminator),
    Yregs = ordsets:intersection(Used, Killed),
    ordsets:union(Yregs0, Yregs).

%%%
%%% Try to reduce the size of the stack frame, by adding an explicit
%%% 'copy' instructions for return values from 'call' and 'make_fun' that
%%% need to be saved in Y registers. Here is an example to show
%%% how that's useful. First, here is the Erlang code:
%%%
%%% f(Pid) ->
%%%    Res = foo(42),
%%%    _ = node(Pid),
%%%    bar(),
%%%    Res.
%%%
%%% Compiled to SSA format, the main part of the code looks like this:
%%%
%%% 0:
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% It can be seen that the variables Pid and Res must be saved in Y
%%% registers in order to survive the function calls. A previous sub
%%% pass has inserted a 'copy' instruction to save the value of the
%%% variable Pid:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The Res and Pid:4 variables must be assigned to different Y registers
%%% because they are live at the same time. copy_retval() inserts a
%%% 'copy' instruction to copy Res to a new variable:
%%%
%%% 0:
%%%   Pid:4 = copy Pid
%%%   Res:6 = call local literal foo/1, literal 42
%%%   _1 = bif:node Pid:4
%%%   @ssa_bool = succeeded _1
%%%   br @ssa_bool, label 3, label 1
%%%
%%% 3:
%%%   Res = copy Res:6
%%%   @ssa_ignored = call local literal bar/0
%%%   ret Res
%%%
%%% The new variable Res:6 is used to capture the return value from the call.
%%% The variables Pid:4 and Res are no longer live at the same time, so they
%%% can be assigned to the same Y register.
%%%

copy_retval(#st{frames=Frames,ssa=Blocks0,cnt=Count0}=St) ->
    {Blocks,Count} = copy_retval_1(Frames, Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

copy_retval_1([F|Fs], Blocks0, Count0) ->
    #b_blk{anno=#{yregs:=Yregs0},is=Is} = maps:get(F, Blocks0),
    Yregs1 = gb_sets:from_list(Yregs0),
    Yregs = collect_yregs(Is, Yregs1),
    Ls = beam_ssa:rpo([F], Blocks0),
    {Blocks,Count} = copy_retval_2(Ls, Yregs, none, Blocks0, Count0),
    copy_retval_1(Fs, Blocks, Count);
copy_retval_1([], Blocks, Count) ->
    {Blocks,Count}.

collect_yregs([#b_set{op=copy,dst=Y,args=[#b_var{}=X]}|Is],
              Yregs0) ->
    true = gb_sets:is_member(X, Yregs0),        %Assertion.
    Yregs = gb_sets:insert(Y, gb_sets:delete(X, Yregs0)),
    collect_yregs(Is, Yregs);
collect_yregs([#b_set{}|Is], Yregs) ->
    collect_yregs(Is, Yregs);
collect_yregs([], Yregs) -> Yregs.

copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
    #b_blk{is=Is0,last=Last} = Blk = maps:get(L, Blocks0),
    RC = case {Last,Ls} of
             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
                 true;
             {_,_} ->
                 false
         end,
    case copy_retval_is(Is0, RC, Yregs, Copy0, Count0, []) of
        {Is,Count} ->
            case Copy0 =:= none andalso Count0 =:= Count of
                true ->
                    copy_retval_2(Ls, Yregs, none, Blocks0, Count0);
                false ->
                    Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
                    copy_retval_2(Ls, Yregs, none, Blocks, Count)
            end;
        {Is,Count,Copy} ->
            Blocks = Blocks0#{L=>Blk#b_blk{is=Is}},
            copy_retval_2(Ls, Yregs, Copy, Blocks, Count)
    end;
copy_retval_2([], _Yregs, none, Blocks, Count) ->
    {Blocks,Count}.

copy_retval_is([#b_set{op=put_tuple_elements,args=Args0}=I0], false, _Yregs,
           Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    {reverse(Acc, [I|acc_copy([], Copy)]),Count};
copy_retval_is([#b_set{}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{},#b_set{op=succeeded}]=Is, false, _Yregs, Copy, Count, Acc) ->
    {reverse(Acc, acc_copy(Is, Copy)),Count};
copy_retval_is([#b_set{op=Op,dst=#b_var{name=RetName}=Dst}=I0|Is], RC, Yregs,
           Copy0, Count0, Acc0) when Op =:= call; Op =:= make_fun ->
    {I1,Count1,Acc} = place_retval_copy(I0, Yregs, Copy0, Count0, Acc0),
    case gb_sets:is_member(Dst, Yregs) of
        true ->
            {NewVar,Count} = new_var(RetName, Count1),
            Copy = #b_set{op=copy,dst=Dst,args=[NewVar]},
            I = I1#b_set{dst=NewVar},
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc]);
        false ->
            copy_retval_is(Is, RC, Yregs, none, Count1, [I1|Acc])
    end;
copy_retval_is([#b_set{args=Args0}=I0|Is], RC, Yregs, Copy, Count, Acc) ->
    I = I0#b_set{args=copy_sub_args(Args0, Copy)},
    case beam_ssa:clobbers_xregs(I) of
        true ->
            copy_retval_is(Is, RC, Yregs, none, Count, [I|acc_copy(Acc, Copy)]);
        false ->
            copy_retval_is(Is, RC, Yregs, Copy, Count, [I|Acc])
        end;
copy_retval_is([], RC, _, Copy, Count, Acc) ->
    case {Copy,RC} of
        {none,_} ->
            {reverse(Acc),Count};
        {#b_set{},true} ->
            {reverse(Acc),Count,Copy};
        {#b_set{},false} ->
            {reverse(Acc, [Copy]),Count}
    end.

%%
%% Consider this code:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   A = copy A1
%%   B = call bar/1, Var
%%
%% If the Var variable is no longer used after this code, its Y register
%% can't be reused for A. To allow the Y register to be reused
%% we will need to insert 'copy' instructions for arguments that are
%% in Y registers:
%%
%%   Var = ...
%%   ...
%%   A1 = call foo/0
%%   Var1 = copy Var
%%   A = copy A1
%%   B = call bar/1, Var1
%%

place_retval_copy(I, _Yregs, none, Count, Acc) ->
    {I,Count,Acc};
place_retval_copy(#b_set{args=[F|Args0]}=I, Yregs, Copy, Count0, Acc0) ->
    #b_set{dst=Avoid} = Copy,
    {Args,Acc1,Count} = copy_func_args(Args0, Yregs, Avoid, Acc0, [], Count0),
    Acc = [Copy|Acc1],
    {I#b_set{args=[F|Args]},Count,Acc}.

copy_func_args([#b_var{name=AName}=A|As], Yregs, Avoid, CopyAcc, Acc, Count0) ->
    case gb_sets:is_member(A, Yregs) of
        true when A =/= Avoid ->
            {NewVar,Count} = new_var(AName, Count0),
            Copy = #b_set{op=copy,dst=NewVar,args=[A]},
            copy_func_args(As, Yregs, Avoid, [Copy|CopyAcc], [NewVar|Acc], Count);
        _ ->
            copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count0)
    end;
copy_func_args([A|As], Yregs, Avoid, CopyAcc, Acc, Count) ->
    copy_func_args(As, Yregs, Avoid, CopyAcc, [A|Acc], Count);
copy_func_args([], _Yregs, _Avoid, CopyAcc, Acc, Count) ->
    {reverse(Acc),CopyAcc,Count}.

acc_copy(Acc, none) -> Acc;
acc_copy(Acc, #b_set{}=Copy) -> [Copy|Acc].

copy_sub_args(Args, none) ->
    Args;
copy_sub_args(Args, #b_set{dst=Dst,args=[Src]}) ->
    [sub_arg(A, Dst, Src) || A <- Args].

sub_arg(Old, Old, New) -> New;
sub_arg(Old, _, _) -> Old.

%%%
%%% Consider:
%%%
%%%   x1/Hd = get_hd x0/Cons
%%%   y0/Tl = get_tl x0/Cons
%%%
%%% Register x0 can't be reused for Hd. If Hd needs to be in x0,
%%% a 'move' instruction must be inserted.
%%%
%%% If we swap get_hd and get_tl when Tl is in a Y register,
%%% x0 can be used for Hd if Cons is not used again:
%%%
%%%   y0/Tl = get_tl x0/Cons
%%%   x0/Hd = get_hd x0/Cons
%%%

opt_get_list(#st{ssa=Blocks,res=Res}=St) ->
    ResMap = maps:from_list(Res),
    Ls = beam_ssa:rpo(Blocks),
    St#st{ssa=opt_get_list_1(Ls, ResMap, Blocks)}.

opt_get_list_1([L|Ls], Res, Blocks0) ->
    #b_blk{is=Is0} = Blk = maps:get(L, Blocks0),
    case opt_get_list_is(Is0, Res, [], false) of
        no ->
            opt_get_list_1(Ls, Res, Blocks0);
        {yes,Is} ->
            Blocks = Blocks0#{L:=Blk#b_blk{is=Is}},
            opt_get_list_1(Ls, Res, Blocks)
    end;
opt_get_list_1([], _, Blocks) -> Blocks.

opt_get_list_is([#b_set{op=get_hd,dst=Hd,
                        args=[Cons]}=GetHd,
                 #b_set{op=get_tl,dst=Tl,
                        args=[Cons]}=GetTl|Is],
                Res, Acc, Changed) ->
    %% Note that when this pass is run, only Y registers have
    %% reservations. The absence of an entry for a variable therefore
    %% means that the variable will be in an X register.
    case Res of
        #{Hd:={y,_}} ->
            %% Hd will be in a Y register. Don't swap.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed);
        #{Tl:={y,_}} ->
            %% Tl will be in a Y register. Swap.
            opt_get_list_is([GetHd|Is], Res, [GetTl|Acc], true);
        #{} ->
            %% Both are in X registers. Nothing to do.
            opt_get_list_is([GetTl|Is], Res, [GetHd|Acc], Changed)
    end;
opt_get_list_is([I|Is], Res, Acc, Changed) ->
    opt_get_list_is(Is, Res, [I|Acc], Changed);
opt_get_list_is([], _Res, Acc, Changed) ->
    case Changed of
        true ->
            {yes,reverse(Acc)};
        false ->
            no
    end.

%%%
%%% Number instructions in the order they are executed.
%%%

%% number_instructions(St0) -> St.
%%  Number instructions in the order they are executed. Use a step
%%  size of 2. Don't number phi instructions. All phi variables in
%%  a block will be live one unit before the first non-phi instruction
%%  in the block.

number_instructions(#st{ssa=Blocks0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    St#st{ssa=number_is_1(Ls, 1, Blocks0)}.

number_is_1([L|Ls], N0, Blocks0) ->
    #b_blk{is=Is0,last=Last0} = Bl0 = maps:get(L, Blocks0),
    {Is,N1} = number_is_2(Is0, N0, []),
    Last = beam_ssa:add_anno(n, N1, Last0),
    N = N1 + 2,
    Bl = Bl0#b_blk{is=Is,last=Last},
    Blocks = maps:put(L, Bl, Blocks0),
    number_is_1(Ls, N, Blocks);
number_is_1([], _, Blocks) -> Blocks.

number_is_2([#b_set{op=phi}=I|Is], N, Acc) ->
    number_is_2(Is, N, [I|Acc]);
number_is_2([I0|Is], N, Acc) ->
    I = beam_ssa:add_anno(n, N, I0),
    number_is_2(Is, N+2, [I|Acc]);
number_is_2([], N, Acc) ->
    {reverse(Acc),N}.

%%%
%%% Calculate live intervals.
%%%

live_intervals(#st{args=Args,ssa=Blocks}=St) ->
    Vars0 = [{V,{0,1}} || #b_var{}=V <- Args],
    F = fun(L, _, A) -> live_interval_blk(L, Blocks, A) end,
    LiveMap0 = #{},
    Acc0 = {[],LiveMap0},
    {Vars,_} = beam_ssa:fold_po(F, Acc0, Blocks),
    Intervals = merge_ranges(rel2fam(Vars0++Vars)),
    St#st{intervals=Intervals}.

merge_ranges([{V,Rs}|T]) ->
    [{V,merge_ranges_1(Rs)}|merge_ranges(T)];
merge_ranges([]) -> [].

merge_ranges_1([{A,N},{N,Z}|Rs]) ->
    merge_ranges_1([{A,Z}|Rs]);
merge_ranges_1([R|Rs]) ->
    [R|merge_ranges_1(Rs)];
merge_ranges_1([]) -> [].

live_interval_blk(L, Blocks, {Vars0,LiveMap0}) ->
    Live0 = [],
    Successors = beam_ssa:successors(L, Blocks),
    Live1 = update_successors(Successors, L, Blocks, LiveMap0, Live0),

    %% Add ranges for all variables that are live in the successors.
    #b_blk{is=Is,last=Last} = maps:get(L, Blocks),
    End = beam_ssa:get_anno(n, Last),
    Use = [{V,{use,End+1}} || V <- Live1],

    %% Determine used and defined variables in this block.
    FirstNumber = first_number(Is, Last),
    UseDef0 = live_interval_blk_1([Last|reverse(Is)], FirstNumber, Use),
    UseDef = rel2fam(UseDef0),

    %% Update what is live at the beginning of this block and
    %% store it.
    Used = [V || {V,[{use,_}|_]} <- UseDef],
    Live2 = ordsets:union(Live1, Used),
    Killed = [V || {V,[{def,_}|_]} <- UseDef],
    Live = ordsets:subtract(Live2, Killed),
    LiveMap = LiveMap0#{L=>Live},

    %% Construct the ranges for this block.
    Vars = make_block_ranges(UseDef, FirstNumber, Vars0),
    {Vars,LiveMap}.

make_block_ranges([{V,[{def,Def}]}|Vs], First, Acc) ->
    make_block_ranges(Vs, First, [{V,{Def,Def}}|Acc]);
make_block_ranges([{V,[{def,Def}|Uses]}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{Def,Last}}|Acc]);
make_block_ranges([{V,[{use,_}|_]=Uses}|Vs], First, Acc) ->
    {use,Last} = last(Uses),
    make_block_ranges(Vs, First, [{V,{First,Last}}|Acc]);
make_block_ranges([], _, Acc) -> Acc.

live_interval_blk_1([#b_set{op=phi,dst=Dst}|Is], FirstNumber, Acc0) ->
    Acc = [{Dst,{def,FirstNumber}}|Acc0],
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([#b_set{op=bs_start_match}=I|Is],
                    FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    #b_set{dst=Dst} = I,
    Acc1 = [{Dst,{def,N}}|Acc0],
    Acc = [{V,{use,N}} || V <- beam_ssa:used(I)] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([I|Is], FirstNumber, Acc0) ->
    N = beam_ssa:get_anno(n, I),
    Acc1 = case I of
               #b_set{dst=Dst} ->
                   [{Dst,{def,N}}|Acc0];
               _ ->
                   Acc0
           end,
    Used = beam_ssa:used(I),
    Acc = [{V,{use,N}} || V <- Used] ++ Acc1,
    live_interval_blk_1(Is, FirstNumber, Acc);
live_interval_blk_1([], _FirstNumber, Acc) ->
    Acc.

%% first_number([#b_set{}]) -> InstructionNumber.
%%  Return the number for the first instruction for the block.
%%  Note that this number is one less than the first
%%  non-phi instruction in the block.

first_number([#b_set{op=phi}|Is], Last) ->
    first_number(Is, Last);
first_number([I|_], _) ->
    beam_ssa:get_anno(n, I) - 1;
first_number([], Last) ->
    beam_ssa:get_anno(n, Last) - 1.

update_successors([L|Ls], Pred, Blocks, LiveMap, Live0) ->
    Live1 = ordsets:union(Live0, get_live(L, LiveMap)),
    #b_blk{is=Is} = maps:get(L, Blocks),
    Live = update_live_phis(Is, Pred, Live1),
    update_successors(Ls, Pred, Blocks, LiveMap, Live);
update_successors([], _, _, _, Live) -> Live.

get_live(L, LiveMap) ->
    case LiveMap of
        #{L:=Live} -> Live;
        #{} -> []
    end.

update_live_phis([#b_set{op=phi,dst=Killed,args=Args}|Is],
                 Pred, Live0) ->
    Used = [V || {#b_var{}=V,L} <- Args, L =:= Pred],
    Live1 = ordsets:union(ordsets:from_list(Used), Live0),
    Live = ordsets:del_element(Killed, Live1),
    update_live_phis(Is, Pred, Live);
update_live_phis(_, _, Live) -> Live.

%%%
%%% Reserve Y registers.
%%%

%% reserve_yregs(St0) -> St.
%%  In each block that allocates a stack frame, insert instructions
%%  that copy variables that must be in Y registers (given by
%%  the `yregs` annotation) to new variables.
%%
%%  Also allocate specific Y registers for try and catch tags.
%%  The outermost try/catch tag is placed in y0, any directly
%%  nested tag in y1, and so on. Note that this is the reversed
%%  order as required by BEAM; it will be corrected later by
%%  turn_yregs().

reserve_yregs(#st{frames=Frames}=St0) ->
    foldl(fun reserve_yregs_1/2, St0, Frames).

reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
    Blk = maps:get(L, Blocks0),
    Yregs = beam_ssa:get_anno(yregs, Blk),
    {Def,Used} = beam_ssa:def_used([L], Blocks0),
    UsedYregs = ordsets:intersection(Yregs, Used),
    DefBefore = ordsets:subtract(UsedYregs, Def),
    {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
    InsideVars = ordsets:subtract(UsedYregs, DefBefore),
    ResTryTags0 = reserve_try_tags(L, Blocks),
    ResTryTags = [{V,{Reg,Count}} || {V,Reg} <- ResTryTags0],
    Vars = BeforeVars ++ InsideVars,
    Res = [{V,{y,Count}} || V <- Vars] ++ ResTryTags ++ Res0,
    St#st{res=Res,ssa=Blocks,cnt=Count+1}.

reserve_try_tags(L, Blocks) ->
    Seen = gb_sets:empty(),
    {Res0,_} = reserve_try_tags_1([L], Blocks, Seen, #{}),
    Res1 = [maps:to_list(M) || {_,M} <- maps:to_list(Res0)],
    Res = [{V,{y,Y}} || {V,Y} <- append(Res1)],
    ordsets:from_list(Res).

reserve_try_tags_1([L|Ls], Blocks, Seen0, ActMap0) ->
    case gb_sets:is_element(L, Seen0) of
        true ->
            reserve_try_tags_1(Ls, Blocks, Seen0, ActMap0);
        false ->
            Seen1 = gb_sets:insert(L, Seen0),
            #b_blk{is=Is} = Blk = maps:get(L, Blocks),
            Active0 = get_active(L, ActMap0),
            Active = reserve_try_tags_is(Is, Active0),
            Successors = beam_ssa:successors(Blk),
            ActMap1 = update_act_map(Successors, Active, ActMap0),
            {ActMap,Seen} = reserve_try_tags_1(Ls, Blocks, Seen1, ActMap1),
            reserve_try_tags_1(Successors, Blocks, Seen,ActMap)
    end;
reserve_try_tags_1([], _Blocks, Seen, ActMap) ->
    {ActMap,Seen}.

get_active(L, ActMap) ->
    case ActMap of
        #{L:=Active} -> Active;
        #{} -> #{}
    end.

reserve_try_tags_is([#b_set{op=new_try_tag,dst=V}|Is], Active) ->
    N = map_size(Active),
    reserve_try_tags_is(Is, Active#{V=>N});
reserve_try_tags_is([#b_set{op=kill_try_tag,args=[Tag]}|Is], Active) ->
    reserve_try_tags_is(Is, maps:remove(Tag, Active));
reserve_try_tags_is([_|Is], Active) ->
    reserve_try_tags_is(Is, Active);
reserve_try_tags_is([], Active) -> Active.

update_act_map([L|Ls], Active0, ActMap0) ->
    case ActMap0 of
        #{L:=Active1} ->
            ActMap = ActMap0#{L=>maps:merge(Active0, Active1)},
            update_act_map(Ls, Active0, ActMap);
        #{} ->
            ActMap = ActMap0#{L=>Active0},
            update_act_map(Ls, Active0, ActMap)
    end;
update_act_map([], _, ActMap) -> ActMap.

rename_vars([], _, Blocks, Count) ->
    {[],Blocks,Count};
rename_vars(Vs, L, Blocks0, Count0) ->
    {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Vs], Count0),
    Ren = zip(Vs, NewVars),
    Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
    #b_blk{is=Is0} = Blk0 = maps:get(L, Blocks1),
    CopyIs = [#b_set{op=copy,dst=New,args=[Old]} || {Old,New} <- Ren],
    Is = insert_after_phis(Is0, CopyIs),
    Blk = Blk0#b_blk{is=Is},
    Blocks = maps:put(L, Blk, Blocks1),
    {NewVars,Blocks,Count}.

insert_after_phis([#b_set{op=phi}=I|Is], InsertIs) ->
    [I|insert_after_phis(Is, InsertIs)];
insert_after_phis(Is, InsertIs) ->
    InsertIs ++ Is.

%% frame_size(St0) -> St.
%%  Calculate the frame size for each block that allocates a frame.
%%  Annotate the block with the frame size. Also annotate all
%%  return instructions with {deallocate,FrameSize} to simplify
%%  code generation.

frame_size(#st{frames=Frames,regs=Regs,ssa=Blocks0}=St) ->
    Blocks = foldl(fun(L, Blks) ->
                           frame_size_1(L, Regs, Blks)
                   end, Blocks0, Frames),
    St#st{ssa=Blocks}.

frame_size_1(L, Regs, Blocks0) ->
    Def = beam_ssa:def([L], Blocks0),
    Yregs0 = [maps:get(V, Regs) || V <- Def, is_yreg(maps:get(V, Regs))],
    Yregs = ordsets:from_list(Yregs0),
    FrameSize = length(ordsets:from_list(Yregs)),
    if
        FrameSize =/= 0 ->
            [{y,0}|_] = Yregs,                  %Assertion.
            {y,Last} = last(Yregs),
            Last = FrameSize - 1,               %Assertion.
            ok;
        true ->
            ok
    end,
    Blk0 = maps:get(L, Blocks0),
    Blk = beam_ssa:add_anno(frame_size, FrameSize, Blk0),

    %% Insert an annotation for frame deallocation on
    %% each #b_ret{}.
    Blocks = maps:put(L, Blk, Blocks0),
    Reachable = beam_ssa:rpo([L], Blocks),
    frame_deallocate(Reachable, FrameSize, Blocks).

frame_deallocate([L|Ls], Size, Blocks0) ->
    Blk0 = maps:get(L, Blocks0),
    Blk = case Blk0 of
              #b_blk{last=#b_ret{}=Ret0} ->
                  Ret = beam_ssa:add_anno(deallocate, Size, Ret0),
                  Blk0#b_blk{last=Ret};
              #b_blk{} ->
                  Blk0
          end,
    Blocks = maps:put(L, Blk, Blocks0),
    frame_deallocate(Ls, Size, Blocks);
frame_deallocate([], _, Blocks) -> Blocks.


%% turn_yregs(St0) -> St.
%%  Renumber y registers so that {y,0} becomes {y,FrameSize-1},
%%  {y,FrameSize-1} becomes {y,0} and so on. This is to make nested
%%  catches work. The register allocator (linear_scan()) has given
%%  a lower number to the outermost catch.

turn_yregs(#st{frames=Frames,regs=Regs0,ssa=Blocks}=St) ->
    Regs1 = foldl(fun(L, A) ->
                          Blk = maps:get(L, Blocks),
                          FrameSize = beam_ssa:get_anno(frame_size, Blk),
                          Def = beam_ssa:def([L], Blocks),
                          [turn_yregs_1(Def, FrameSize, Regs0)|A]
                  end, [], Frames),
    Regs = maps:merge(Regs0, maps:from_list(append(Regs1))),
    St#st{regs=Regs}.

turn_yregs_1(Def, FrameSize, Regs) ->
    Yregs0 = [{maps:get(V, Regs),V} || V <- Def, is_yreg(maps:get(V, Regs))],
    Yregs1 = rel2fam(Yregs0),
    FrameSize = length(Yregs1),
    Yregs2 = [{{y,FrameSize-Y-1},Vs} || {{y,Y},Vs} <- Yregs1],
    R0 = sofs:family(Yregs2),
    R1 = sofs:family_to_relation(R0),
    R = sofs:converse(R1),
    sofs:to_external(R).

%%%
%%% Reserving registers before register allocation.
%%%

%% reserve_regs(St0) -> St.
%%  Reserve registers prior to register allocation. Y registers
%%  have already been reserved. This function will reserve z,
%%  fr, and specific x registers.

reserve_regs(#st{args=Args,ssa=Blocks,intervals=Intervals,res=Res0}=St) ->
    %% Reserve x0, x1, and so on for the function arguments.
    Res1 = reserve_arg_regs(Args, 0, Res0),

    %% Reserve Z registers (dummy registers) for instructions with no
    %% return values (e.g. remove_message) or pseudo-return values
    %% (e.g. landingpad).
    Res2 = reserve_zregs(Blocks, Intervals, Res1),

    %% Reserve float registers.
    Res3 = reserve_fregs(Blocks, Res2),

    %% Reserve all remaining unreserved variables as X registers.
    Res = maps:from_list(Res3),
    St#st{res=reserve_xregs(Blocks, Res)}.

reserve_arg_regs([#b_var{}=Arg|Is], N, Acc) ->
    reserve_arg_regs(Is, N+1, [{Arg,{x,N}}|Acc]);
reserve_arg_regs([], _, Acc) -> Acc.

reserve_zregs(Blocks, Intervals, Res) ->
    ShortLived0 = [V || {V,[{Start,End}]} <- Intervals, Start+2 =:= End],
    ShortLived = cerl_sets:from_list(ShortLived0),
    F = fun(_, #b_blk{is=Is,last=Last}, A) ->
                reserve_zreg(Is, Last, ShortLived, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst},
              #b_set{op={bif,'=:='},args=[Dst,Val]}], _Last, ShortLived, A0) ->
    case Val of
        #b_literal{val=Arity} when Arity bsr 32 =:= 0 ->
            %% These two instructions can be combined to a test_arity
            %% instruction provided that the arity variable is short-lived.
            reserve_zreg_1(Dst, ShortLived, A0);
        _ ->
            A0
    end;
reserve_zreg([#b_set{op={bif,tuple_size},dst=Dst}],
             #b_switch{}, ShortLived, A) ->
    reserve_zreg_1(Dst, ShortLived, A);
reserve_zreg([#b_set{op=Op,dst=Dst}|Is], Last, ShortLived, A0) ->
    IsZReg = case Op of
                 bs_match_string -> true;
                 bs_save -> true;
                 bs_restore -> true;
                 bs_set_position -> true;
                 {float,clearerror} -> true;
                 kill_try_tag -> true;
                 landingpad -> true;
                 put_tuple_elements -> true;
                 remove_message -> true;
                 set_tuple_element -> true;
                 succeeded -> true;
                 timeout -> true;
                 wait_timeout -> true;
                 _ -> false
             end,
    A = case IsZReg of
            true -> [{Dst,z}|A0];
            false -> A0
        end,
    reserve_zreg(Is, Last, ShortLived, A);
reserve_zreg([], #b_br{bool=Bool}, ShortLived, A) ->
    reserve_zreg_1(Bool, ShortLived, A);
reserve_zreg([], _, _, A) -> A.

reserve_zreg_1(#b_var{}=V, ShortLived, A) ->
    case cerl_sets:is_element(V, ShortLived) of
        true -> [{V,z}|A];
        false -> A
    end;
reserve_zreg_1(#b_literal{}, _, A) -> A.

reserve_fregs(Blocks, Res) ->
    F = fun(_, #b_blk{is=Is}, A) ->
                reserve_freg(Is, A)
        end,
    beam_ssa:fold_rpo(F, [0], Res, Blocks).

reserve_freg([#b_set{op={float,Op},dst=V}|Is], Res) ->
    case Op of
        get ->
            reserve_freg(Is, Res);
        _ ->
            reserve_freg(Is, [{V,fr}|Res])
    end;
reserve_freg([_|Is], Res) ->
    reserve_freg(Is, Res);
reserve_freg([], Res) -> Res.

%% reserve_xregs(St0) -> St.
%%  Reserve all remaining variables as X registers.
%%
%%  If a variable will need to be in a specific X register for a
%%  'call' or 'make_fun' (and there is nothing that will kill it
%%  between the definition and use), reserve the register using a
%%  {prefer,{x,X} annotation. That annotation means that the linear
%%  scan algorithm will place the variable in the preferred register,
%%  unless that register is already occupied.
%%
%%  All remaining variables are reserved as X registers. Linear scan
%%  will allocate the lowest free X register for the variable.

reserve_xregs(Blocks, Res) ->
    F = fun(L, #b_blk{is=Is,last=Last}, R) ->
                {Xs0,Used0} = reserve_terminator(L, Last, Blocks, R),
                reserve_xregs_is(reverse(Is), R, Xs0, Used0)
        end,
    beam_ssa:fold_po(F, Res, Blocks).

reserve_xregs_is([#b_set{op=Op,dst=Dst,args=Args}=I|Is], Res0, Xs0, Used0) ->
    Xs1 = case is_gc_safe(I) of
              true ->
                  Xs0;
              false ->
                  %% There may be a garbage collection after executing this
                  %% instruction. We will need prune the list of preferred
                  %% X registers.
                  res_xregs_prune(Xs0, Used0, Res0)
          end,
    Res = reserve_xreg(Dst, Xs1, Res0),
    Used1 = ordsets:union(Used0, beam_ssa:used(I)),
    Used = ordsets:del_element(Dst, Used1),
    case Op of
        call ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        make_fun ->
            Xs = reserve_call_args(tl(Args)),
            reserve_xregs_is(Is, Res, Xs, Used);
        _ ->
            reserve_xregs_is(Is, Res, Xs1, Used)
    end;
reserve_xregs_is([], Res, _Xs, _Used) -> Res.

reserve_terminator(L, #b_br{bool=#b_literal{val=true},succ=Succ}, Blocks, Res) ->
    case maps:get(Succ, Blocks) of
        #b_blk{is=[],last=Last} ->
            reserve_terminator(Succ, Last, Blocks, Res);
        #b_blk{is=[_|_]=Is} ->
            {res_xregs_from_phi(Is, L, Res, #{}),[]}
    end;
reserve_terminator(_, Last, _, _) ->
    {#{},beam_ssa:used(Last)}.

res_xregs_from_phi([#b_set{op=phi,dst=Dst,args=Args}|Is],
                   Pred, Res, Acc) ->
    case [V || {#b_var{}=V,L} <- Args, L =:= Pred] of
        [] ->
            res_xregs_from_phi(Is, Pred, Res, Acc);
        [V] ->
            case Res of
                #{Dst:={prefer,Reg}} ->
                    res_xregs_from_phi(Is, Pred, Res, Acc#{V=>Reg});
                #{Dst:=_} ->
                    res_xregs_from_phi(Is, Pred, Res, Acc)
            end
    end;
res_xregs_from_phi(_, _, _, Acc) -> Acc.

reserve_call_args(Args) ->
    reserve_call_args(Args, 0, #{}).

reserve_call_args([#b_var{}=Var|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs#{Var=>{x,X}});
reserve_call_args([#b_literal{}|As], X, Xs) ->
    reserve_call_args(As, X+1, Xs);
reserve_call_args([], _, Xs) -> Xs.

reserve_xreg(V, Xs, Res) ->
    case Res of
        #{V:=_} ->
            %% Already reserved.
            Res;
        #{} ->
            case Xs of
                #{V:=X} ->
                    %% Add a hint that a specific X register is
                    %% preferred, unless it is already in use.
                    Res#{V=>{prefer,X}};
                #{} ->
                    %% Reserve as an X register in general.
                    Res#{V=>x}
            end
    end.

is_gc_safe(#b_set{op=phi}) ->
    false;
is_gc_safe(#b_set{op=Op,args=Args}) ->
    case beam_ssa_codegen:classify_heap_need(Op, Args) of
        neutral -> true;
        {put,_} -> true;
        _ -> false
    end.

%% res_xregs_prune(PreferredRegs, Used, Res) -> PreferredRegs.
%%  Prune the list of preferred to only include X registers that
%%  are guaranteed to survice a garbage collection.

res_xregs_prune(Xs, Used, Res) ->
    %% The number of safe registers is the number of the X registers
    %% used after this point. The actual number of safe registers may
    %% be highter than this number, but this is a conservative safe
    %% estimate.
    NumSafe = foldl(fun(V, N) ->
                            case Res of
                                #{V:={x,_}} -> N + 1;
                                #{V:=_} -> N;
                                #{} -> N + 1
                            end
                    end, 0, Used),

    %% Remove unsafe registers from the list of potential
    %% preferred registers.
    maps:filter(fun(_, {x,X}) -> X < NumSafe end, Xs).

%%%
%%% Register allocation using linear scan.
%%%

-record(i,
        {sort=1 :: instr_number(),
         reg=none :: i_reg(),
         pool=x :: pool_id(),
         var=#b_var{} :: b_var(),
         rs=[] :: [range()]
        }).

-record(l,
        {cur=#i{} :: interval(),
         unhandled_res=[] :: [interval()],
         unhandled_any=[] :: [interval()],
         active=[] :: [interval()],
         inactive=[] :: [interval()],
         free=#{} :: #{var_name()=>pool(),
                       {'next',pool_id()}:=reg_num()},
         regs=[] :: [{b_var(),ssa_register()}]
        }).

-type interval() :: #i{}.
-type i_reg() :: ssa_register() | {'prefer',xreg()} | 'none'.
-type pool_id() :: 'fr' | 'x' | 'z' | instr_number().
-type pool() :: ordsets:ordset(ssa_register()).

linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
    St = St0#st{intervals=[],res=[]},
    Free = init_free(maps:to_list(Res)),
    Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
    Intervals = sort(Intervals1),
    IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
    {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
    L = #l{unhandled_res=UnhandledRes,
           unhandled_any=Unhandled,free=Free},
    #l{regs=Regs} = do_linear(L),
    St#st{regs=maps:from_list(Regs)}.

init_interval({V,[{Start,_}|_]=Rs}, Res) ->
    Info = maps:get(V, Res),
    Pool = case Info of
               {prefer,{x,_}} -> x;
               x -> x;
               {x,_} -> x;
               {y,Uniq} -> Uniq;
               {{y,_},Uniq} -> Uniq;
               z -> z;
               fr -> fr
           end,
    Reg = case Info of
              {prefer,{x,_}} -> Info;
              {x,_} -> Info;
              {{y,_}=Y,_} -> Y;
              _ -> none
          end,
    #i{sort=Start,var=V,reg=Reg,pool=Pool,rs=Rs}.

init_free(Res) ->
    Free0 = rel2fam([{x,{x,0}}|init_free_1(Res)]),
    #{x:=Xs0} = Free1 = maps:from_list(Free0),
    Xs = init_xregs(Xs0),
    Free = Free1#{x:=Xs},
    Next = maps:fold(fun(K, V, A) -> [{{next,K},length(V)}|A] end, [], Free),
    maps:merge(Free, maps:from_list(Next)).

init_free_1([{_,{prefer,{x,_}=Reg}}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{x,_}=Reg}|Res]) ->
    [{x,Reg}|init_free_1(Res)];
init_free_1([{_,{y,Uniq}}|Res]) ->
    [{Uniq,{y,0}}|init_free_1(Res)];
init_free_1([{_,{{y,_}=Reg,Uniq}}|Res]) ->
    [{Uniq,Reg}|init_free_1(Res)];
init_free_1([{_,z}|Res]) ->
    [{z,{z,0}}|init_free_1(Res)];
init_free_1([{_,fr}|Res]) ->
    [{fr,{fr,0}}|init_free_1(Res)];
init_free_1([{_,x}|Res]) ->
    init_free_1(Res);
init_free_1([]) -> [].

%% Make sure that the pool of xregs is contiguous.
init_xregs([{x,N},{x,M}|Is]) when N+1 =:= M ->
    [{x,N}|init_xregs([{x,M}|Is])];
init_xregs([{x,N}|[{x,_}|_]=Is]) ->
    [{x,N}|init_xregs([{x,N+1}|Is])];
init_xregs([{x,_}]=Is) -> Is.

do_linear(L0) ->
    case set_next_current(L0) of
        done ->
            L0;
        L1 ->
            L2 = expire_active(L1),
            L3 = check_inactive(L2),
            Available = collect_available(L3),
            L4 = select_register(Available, L3),
            L = make_cur_active(L4),
            do_linear(L)
    end.

set_next_current(#l{unhandled_res=[Cur1|T1],
                    unhandled_any=[Cur2|T2]}=L) ->
    case {Cur1,Cur2} of
        {#i{sort=N1},#i{sort=N2}} when N1 < N2 ->
            L#l{cur=Cur1,unhandled_res=T1};
        {_,_} ->
            L#l{cur=Cur2,unhandled_any=T2}
    end;
set_next_current(#l{unhandled_res=[],
                    unhandled_any=[Cur|T]}=L) ->
    L#l{cur=Cur,unhandled_any=T};
set_next_current(#l{unhandled_res=[Cur|T],
                    unhandled_any=[]}=L) ->
    L#l{cur=Cur,unhandled_res=T};
set_next_current(#l{unhandled_res=[],unhandled_any=[]}) ->
    done.

expire_active(#l{cur=#i{sort=CurBegin},active=Act0}=L0) ->
    {Act,L} = expire_active(Act0, CurBegin, L0, []),
    L#l{active=Act}.

expire_active([#i{reg=Reg,rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    {_,_} = Reg,                                %Assertion.
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            L = free_reg(I, L0),
            expire_active(Is, CurBegin, L, Acc);
        overlapping ->
            expire_active(Is, CurBegin, L0, [I|Acc]);
        not_overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = free_reg(I, L0),
            L = L1#l{inactive=[I#i{rs=Rs}|L1#l.inactive]},
            expire_active(Is, CurBegin, L, Acc)
    end;
expire_active([], _CurBegin, L, Acc) ->
    {Acc,L}.

check_inactive(#l{cur=#i{sort=CurBegin},inactive=InAct0}=L0) ->
    {InAct,L} = check_inactive(InAct0, CurBegin, L0, []),
    L#l{inactive=InAct}.

check_inactive([#i{rs=Rs0}=I|Is], CurBegin, L0, Acc) ->
    case overlap_status(Rs0, CurBegin) of
        ends_before_cur ->
            check_inactive(Is, CurBegin, L0, Acc);
        not_overlapping ->
            check_inactive(Is, CurBegin, L0, [I|Acc]);
        overlapping ->
            Rs = strip_before_current(Rs0, CurBegin),
            L1 = L0#l{active=[I#i{rs=Rs}|L0#l.active]},
            L = reserve_reg(I, L1),
            check_inactive(Is, CurBegin, L, Acc)
    end;
check_inactive([], _CurBegin, L, Acc) ->
    {Acc,L}.

strip_before_current([{_,E}|Rs], CurBegin) when E =< CurBegin ->
    strip_before_current(Rs, CurBegin);
strip_before_current(Rs, _CurBegin) -> Rs.

collect_available(#l{cur=#i{reg={prefer,{_,_}=Prefer}}=I}=L) ->
    %% Use the preferred register if it is available.
    Avail = collect_available(L#l{cur=I#i{reg=none}}),
    case member(Prefer, Avail) of
        true -> [Prefer];
        false -> Avail
    end;
collect_available(#l{cur=#i{reg={_,_}=ReservedReg}}) ->
    %% Return the already reserved register.
    [ReservedReg];
collect_available(#l{unhandled_res=Unhandled,cur=Cur}=L) ->
    Free = get_pool(Cur, L),

    %% Note that since the live intervals are constructed from
    %% SSA form, there cannot be any overlap of the current interval
    %% with any inactive interval. See [3], page 175. Therefore we
    %% only have check the unhandled intervals for overlap with
    %% the current interval. As a further optimization, we only need
    %% to check the intervals that have reserved registers.
    collect_available(Unhandled, Cur, Free).

collect_available([#i{pool=Pool1}|Is], #i{pool=Pool2}=Cur, Free)
  when Pool1 =/= Pool2 ->
    %% Wrong pool. Ignore this interval.
    collect_available(Is, Cur, Free);
collect_available([#i{reg={_,_}=Reg}=I|Is], Cur, Free0) ->
    case overlaps(I, Cur) of
        true ->
            Free = ordsets:del_element(Reg, Free0),
            collect_available(Is, Cur, Free);
        false ->
            collect_available(Is, Cur, Free0)
    end;
collect_available([], _, Free) -> Free.

select_register([{_,_}=Reg|_], #l{cur=Cur0,regs=Regs}=L) ->
    Cur = Cur0#i{reg=Reg},
    reserve_reg(Cur, L#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]});
select_register([], #l{cur=Cur0,regs=Regs}=L0) ->
    %% Allocate a new register in the pool.
    {Reg,L1} = get_next_free(Cur0, L0),
    Cur = Cur0#i{reg=Reg},
    L = L1#l{cur=Cur,regs=[{Cur#i.var,Reg}|Regs]},
    reserve_reg(Cur, L).

make_cur_active(#l{cur=Cur,active=Act}=L) ->
    L#l{active=[Cur|Act]}.

overlaps(#i{rs=Rs1}, #i{rs=Rs2}) ->
    are_overlapping(Rs1, Rs2).

overlap_status([{S,E}], CurBegin) ->
    if
        E =< CurBegin -> ends_before_cur;
        CurBegin < S -> not_overlapping;
        true -> overlapping
    end;
overlap_status([{S,E}|Rs], CurBegin) ->
    if
        E =< CurBegin ->
            overlap_status(Rs, CurBegin);
        S =< CurBegin ->
            overlapping;
        true ->
            not_overlapping
    end.

reserve_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:del_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

free_reg(#i{reg={_,_}=Reg}=I, L) ->
    FreeRegs0 = get_pool(I, L),
    FreeRegs = ordsets:add_element(Reg, FreeRegs0),
    update_pool(I, FreeRegs, L).

get_pool(#i{pool=Pool}, #l{free=Free}) ->
    maps:get(Pool, Free).

update_pool(#i{pool=Pool}, New, #l{free=Free0}=L) ->
    Free = maps:put(Pool, New, Free0),
    L#l{free=Free}.

get_next_free(#i{pool=Pool}, #l{free=Free0}=L0) ->
    K = {next,Pool},
    N = maps:get(K, Free0),
    Free = maps:put(K, N+1, Free0),
    L = L0#l{free=Free},
    if
        is_integer(Pool) -> {{y,N},L};
        is_atom(Pool)    -> {{Pool,N},L}
    end.

%%%
%%% Interval utilities.
%%%

are_overlapping([R|Rs1], Rs2) ->
    case are_overlapping_1(R, Rs2) of
        true ->
            true;
        false ->
            are_overlapping(Rs1, Rs2)
    end;
are_overlapping([], _) -> false.

are_overlapping_1({_S1,E1}, [{S2,_E2}|_]) when E1 < S2 ->
    false;
are_overlapping_1({S1,E1}=R, [{S2,E2}|Rs]) ->
    (S2 < E1 andalso E2 > S1) orelse are_overlapping_1(R, Rs);
are_overlapping_1({_,_}, []) -> false.

%%%
%%% Utilities.
%%%

%% is_loop_header(L, Blocks) -> false|true.
%%  Check whether the block is a loop header.

is_loop_header(L, Blocks) ->
    %% We KNOW that a loop header must start with a peek_message
    %% instruction.
    case maps:get(L, Blocks) of
        #b_blk{is=[#b_set{op=peek_message}|_]} -> true;
        _ -> false
    end.

rel2fam(S0) ->
    S1 = sofs:relation(S0),
    S = sofs:rel2fam(S1),
    sofs:to_external(S).

split_phis(Is) ->
    partition(fun(#b_set{op=Op}) -> Op =:= phi end, Is).

is_yreg({y,_}) -> true;
is_yreg({x,_}) -> false;
is_yreg({z,_}) -> false;
is_yreg({fr,_}) -> false.

new_vars([Base|Vs0], Count0) ->
    {V,Count1} = new_var(Base, Count0),
    {Vs,Count} = new_vars(Vs0, Count1),
    {[V|Vs],Count};
new_vars([], Count) -> {[],Count}.

new_var({Base,Int}, Count)  ->
    true = is_integer(Int),                     %Assertion.
    {#b_var{name={Base,Count}},Count+1};
new_var(Base, Count) ->
    {#b_var{name={Base,Count}},Count+1}.