aboutsummaryrefslogblamecommitdiffstats
path: root/lib/compiler/src/beam_ssa_codegen.erl
blob: 1c7563faa07bac5b3f92a940bfc1a967125a7222 (plain) (tree)
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748













































































































                                                                             

                                                         















                                                                        


                                                        






















                                                                          










                                                         







































































































































                                                                              

                                          


























                                                                            
                                      





                                           

                                               







                                              





































































































































































































































































                                                                                
                                                                    















                                                 
                                          






                                            
                                       





















                                                            
                                                             





















































                                                                   

                                































                                                              
                                                                               

                                         
                                                                              

                                         
                                                   













                                                                                  
                                                                  









                                                                                    
                                                               

































                                                                  

                                                 























































































































































































































































                                                                                    


                                            








                                                                           


































































































































                                                                                









                                                          















































































                                                                          
                                                                            





                                                       
                                                 
                                                                   







                                                                  



                                                                     

                                                                    



















































                                                                                    





                                             














                                                            

                                             

                                                            















                                                                    

                                       













































































































































































































































































































































                                                                                 
                                          





















































































                                                                


                                                           










































                                                                
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%
%% Purpose: Generate BEAM assembly code from the SSA format.

-module(beam_ssa_codegen).

-export([module/2]).
-export([classify_heap_need/2]).    %Called from beam_ssa_pre_codegen.

-export_type([ssa_register/0]).

-include("beam_ssa.hrl").

-import(lists, [foldl/3,keymember/3,keysort/2,last/1,map/2,mapfoldl/3,
                reverse/1,reverse/2,sort/1,splitwith/2,takewhile/2]).

-record(cg, {lcount=1 :: beam_label(),          %Label counter
	     functable=#{} :: #{fa()=>beam_label()},
             labels=#{} :: #{ssa_label()=>0|beam_label()},
             used_labels=gb_sets:empty() :: gb_sets:set(ssa_label()),
             regs=#{} :: #{beam_ssa:var_name()=>ssa_register()},
             ultimate_fail=1 :: beam_label(),
             catches=gb_sets:empty() :: gb_sets:set(ssa_label())
             }).

-spec module(beam_ssa:b_module(), [compile:option()]) ->
                    {'ok',beam_asm:module_code()}.

module(#b_module{name=Mod,exports=Es,attributes=Attrs,body=Fs}, _Opts) ->
    {Asm,St} = functions(Fs, {atom,Mod}),
    {ok,{Mod,Es,Attrs,Asm,St#cg.lcount}}.

-record(need, {h=0 :: non_neg_integer(),
               f=0 :: non_neg_integer()}).

-record(cg_blk, {anno=#{} :: anno(),
                 is=[] :: [instruction()],
                 last :: terminator()}).

-record(cg_set, {anno=#{} :: anno(),
                 dst :: b_var(),
                 op :: beam_ssa:op(),
                 args :: [beam_ssa:argument() | xreg()]}).

-record(cg_alloc, {anno=#{} :: anno(),
                   stack=none :: 'none' | pos_integer(),
                   words=#need{} :: #need{},
                   live :: 'undefined' | pos_integer(),
                   def_yregs=[] :: [yreg()]
                  }).

-record(cg_br, {bool :: beam_ssa:value(),
                succ :: ssa_label(),
                fail :: ssa_label()
               }).
-record(cg_ret, {arg :: cg_value(),
                 dealloc=none :: 'none' | pos_integer()
                }).
-record(cg_switch, {arg :: cg_value(),
                    fail :: ssa_label(),
                    list :: [sw_list_item()]
                   }).

-type fa() :: {beam_asm:function_name(),arity()}.
-type ssa_label() :: beam_ssa:label().
-type beam_label() :: beam_asm:label().

-type anno() :: beam_ssa:anno().

-type b_var() :: beam_ssa:b_var().
-type b_literal() :: beam_ssa:b_literal().

-type cg_value() :: beam_ssa:value() | xreg().

-type cg_set() :: #cg_set{}.
-type cg_alloc() :: #cg_alloc{}.

-type instruction() :: cg_set() | cg_alloc().

-type cg_br() :: #cg_br{}.
-type cg_ret() :: #cg_ret{}.
-type cg_switch() :: #cg_switch{}.
-type terminator() :: cg_br() | cg_ret() | cg_switch().

-type sw_list_item() :: {b_literal(),ssa_label()}.

-type reg_num() :: beam_asm:reg_num().
-type xreg() :: {'x',reg_num()}.
-type yreg() :: {'y',reg_num()}.

-type ssa_register() :: xreg() | yreg() | {'fr',reg_num()} | {'z',reg_num()}.

functions(Forms, AtomMod) ->
    mapfoldl(fun (F, St) -> function(F, AtomMod, St) end,
             #cg{lcount=1}, Forms).

function(#b_function{anno=Anno,bs=Blocks}, AtomMod, St0) ->
    #{func_info:={_,Name,Arity}} = Anno,
    try
        assert_badarg_block(Blocks),            %Assertion.
        Regs = maps:get(registers, Anno),
        St1 = St0#cg{labels=#{},used_labels=gb_sets:empty(),
                     regs=Regs},
        {Fi,St2} = new_label(St1),              %FuncInfo label
        {Entry,St3} = local_func_label(Name, Arity, St2),
        {Ult,St4} = new_label(St3),             %Ultimate failure
        Labels = (St4#cg.labels)#{0=>Entry,?BADARG_BLOCK=>0},
        St5 = St4#cg{labels=Labels,used_labels=gb_sets:singleton(Entry),
                     ultimate_fail=Ult},
        {Body,St} = cg_fun(Blocks, St5),
        Asm = [{label,Fi},line(Anno),
               {func_info,AtomMod,{atom,Name},Arity}] ++
               add_parameter_annos(Body, Anno) ++
               [{label,Ult},if_end],
        Func = {function,Name,Arity,Entry,Asm},
        {Func,St}
    catch
        Class:Error:Stack ->
            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
            erlang:raise(Class, Error, Stack)
    end.

assert_badarg_block(Blocks) ->
    %% Assertion: ?BADARG_BLOCK must be the call erlang:error(badarg).
    case Blocks of
        #{?BADARG_BLOCK:=Blk} ->
            #b_blk{is=[#b_set{op=call,dst=Ret,
                              args=[#b_remote{mod=#b_literal{val=erlang},
                                              name=#b_literal{val=error}},
                                    #b_literal{val=badarg}]}],
                   last=#b_ret{arg=Ret}} = Blk,
            ok;
        #{} ->
            %% ?BADARG_BLOCK has been removed because it was never used.
            ok
    end.

add_parameter_annos([{label, _}=Entry | Body], Anno) ->
    ParamInfo = maps:get(parameter_type_info, Anno, #{}),
    Annos = maps:fold(
        fun(K, V, Acc) when is_map_key(K, ParamInfo) ->
                TypeInfo = maps:get(K, ParamInfo),
                [{'%', {type_info, V, TypeInfo}} | Acc];
           (_K, _V, Acc) ->
                Acc
        end, [], maps:get(registers, Anno)),
    [Entry | Annos] ++ Body.

cg_fun(Blocks, St0) ->
    Linear0 = linearize(Blocks),
    St = collect_catch_labels(Linear0, St0),
    Linear1 = need_heap(Linear0),
    Linear2 = prefer_xregs(Linear1, St),
    Linear3 = liveness(Linear2, St),
    Linear4 = defined(Linear3, St),
    Linear = opt_allocate(Linear4, St),
    cg_linear(Linear, St).

%% collect_catch_labels(Linear, St) -> St.
%%  Collect all catch labels (labels for blocks that begin
%%  with 'landingpad' instructions) for later use.

collect_catch_labels(Linear, St) ->
    Labels = collect_catch_labels_1(Linear),
    St#cg{catches=gb_sets:from_list(Labels)}.

collect_catch_labels_1([{L,#cg_blk{is=[#cg_set{op=landingpad}|_]}}|Bs]) ->
    [L|collect_catch_labels_1(Bs)];
collect_catch_labels_1([_|Bs]) ->
    collect_catch_labels_1(Bs);
collect_catch_labels_1([]) -> [].

%% need_heap([{BlockLabel,Block]) -> [{BlockLabel,Block}].
%%  Insert need_heap instructions in the instruction list. Try to be smart and
%%  collect them together as much as possible.

need_heap(Bs0) ->
    Bs1 = need_heap_allocs(Bs0, #{}),
    {Bs,#need{h=0,f=0}} = need_heap_blks(reverse(Bs1), #need{}, []),
    Bs.

need_heap_allocs([{L,#cg_blk{is=Is0,last=Terminator}=Blk0}|Bs], Counts0) ->
    Next = next_block(Bs),
    Successors = successors(Terminator),
    Counts = foldl(fun(S, Cnts) ->
                           case Cnts of
                               #{S:=C} -> Cnts#{S:=C+1};
                               #{} when S =:= Next -> Cnts#{S=>1};
                               #{} -> Cnts#{S=>42}
                           end
                   end, Counts0, Successors),
    case Counts of
        #{L:=1} ->
            [{L,Blk0}|need_heap_allocs(Bs, Counts)];
        #{L:=_} ->
            %% This block has multiple predecessors. Force an allocation
            %% in this block so that the predecessors don't need to do
            %% an allocation on behalf of this block.
            Is = case need_heap_never(Is0) of
                     true -> Is0;
                     false -> [#cg_alloc{}|Is0]
                 end,
            Blk = Blk0#cg_blk{is=Is},
            [{L,Blk}|need_heap_allocs(Bs, Counts)];
        #{} ->
            [{L,Blk0}|need_heap_allocs(Bs, Counts)]
    end;
need_heap_allocs([], _) -> [].

need_heap_never([#cg_alloc{}|_]) -> true;
need_heap_never([#cg_set{op=recv_next}|_]) -> true;
need_heap_never([#cg_set{op=wait}|_]) -> true;
need_heap_never(_) -> false.

need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) ->
    {Is1,H1} = need_heap_is(reverse(Is0), H0, []),
    {Ns,H} = need_heap_terminator(Bs, H1),
    Is = Ns ++ Is1,
    Blk = Blk0#cg_blk{is=Is},
    need_heap_blks(Bs, H, [{L,Blk}|Acc]);
need_heap_blks([], H, Acc) ->
    {Acc,H}.

need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) ->
    Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)},
    need_heap_is(Is, #need{}, [Alloc|Acc]);
need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
    case classify_heap_need(Op, Args) of
        {put,Words} ->
            %% Pass through adding to needed heap.
            need_heap_is(Is, add_heap_words(N, Words), [I|Acc]);
        put_float ->
            need_heap_is(Is, add_heap_float(N), [I|Acc]);
        neutral ->
            need_heap_is(Is, N, [I|Acc]);
        gc ->
            need_heap_is(Is, #need{}, [I]++need_heap_need(N)++Acc)
    end;
need_heap_is([], N, Acc) ->
    {Acc,N}.

need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) ->
    {[],N};
need_heap_terminator([{_,#cg_blk{}}|_], N) ->
    {need_heap_need(N),#need{}};
need_heap_terminator([], H) ->
    {need_heap_need(H),#need{}}.

need_heap_need(#need{h=0,f=0}) -> [];
need_heap_need(#need{}=N) -> [#cg_alloc{words=N}].

add_heap_words(#need{h=H1,f=F1}, #need{h=H2,f=F2}) ->
    #need{h=H1+H2,f=F1+F2};
add_heap_words(#need{h=Heap}=N, Words) when is_integer(Words) ->
    N#need{h=Heap+Words}.

add_heap_float(#need{f=F}=N) ->
    N#need{f=F+1}.

%% classify_heap_need(Operation, Arguments) ->
%%        gc | neutral | {put,Words} | put_float.
%%  Classify the heap need for this instruction. The return
%%  values have the following meaning.
%%
%%  {put,Words} means that the instruction will use Words words to build
%%  something on the heap.
%%
%%  'put_float' means that the instruction will build one floating point
%%  number on the heap.
%%
%%  'gc' means that that the instruction can potentially do a GC or throw an
%%  exception. That means that an allocation instruction for any building
%%  must be placed after this instruction.
%%
%%  'neutral' means that the instruction does nothing to disturb the heap.

-spec classify_heap_need(beam_ssa:op(), [beam_ssa:value()]) ->
                                'gc' | 'neutral' |
                                {'put',non_neg_integer()} | 'put_float'.

classify_heap_need(put_list, _) ->
    {put,2};
classify_heap_need(put_tuple_arity, [#b_literal{val=Words}]) ->
    {put,Words+1};
classify_heap_need(put_tuple, Elements) ->
    {put,length(Elements)+1};
classify_heap_need({bif,Name}, Args) ->
    case is_gc_bif(Name, Args) of
        false -> neutral;
        true -> gc
    end;
classify_heap_need({float,Op}, _Args) ->
    case Op of
        get -> put_float;
        _ -> neutral
    end;
classify_heap_need(Name, _Args) ->
    classify_heap_need(Name).

%% classify_heap_need(Operation) -> gc | neutral.
%%  Return either 'gc' or 'neutral'.
%%
%%  'gc' means that that the instruction can potentially do a GC or throw an
%%  exception. That means that an allocation instruction for any building
%%  must be placed after this instruction.
%%
%%  'neutral' means that the instruction does nothing to disturb the heap.
%%
%%  Note: Only handle operations in this function that are not handled
%%  by classify_heap_need/2.

classify_heap_need(bs_add) -> gc;
classify_heap_need(bs_get) -> gc;
classify_heap_need(bs_get_tail) -> gc;
classify_heap_need(bs_init) -> gc;
classify_heap_need(bs_init_writable) -> gc;
classify_heap_need(bs_match_string) -> gc;
classify_heap_need(bs_put) -> neutral;
classify_heap_need(bs_restore) -> neutral;
classify_heap_need(bs_save) -> neutral;
classify_heap_need(bs_get_position) -> gc;
classify_heap_need(bs_set_position) -> neutral;
classify_heap_need(bs_skip) -> gc;
classify_heap_need(bs_start_match) -> neutral;
classify_heap_need(bs_test_tail) -> neutral;
classify_heap_need(bs_utf16_size) -> neutral;
classify_heap_need(bs_utf8_size) -> neutral;
classify_heap_need(build_stacktrace) -> gc;
classify_heap_need(call) -> gc;
classify_heap_need(catch_end) -> gc;
classify_heap_need(copy) -> neutral;
classify_heap_need(extract) -> gc;
classify_heap_need(get_hd) -> neutral;
classify_heap_need(get_map_element) -> neutral;
classify_heap_need(get_tl) -> neutral;
classify_heap_need(get_tuple_element) -> neutral;
classify_heap_need(has_map_field) -> neutral;
classify_heap_need(is_nonempty_list) -> neutral;
classify_heap_need(is_tagged_tuple) -> neutral;
classify_heap_need(kill_try_tag) -> gc;
classify_heap_need(landingpad) -> gc;
classify_heap_need(make_fun) -> gc;
classify_heap_need(new_try_tag) -> gc;
classify_heap_need(peek_message) -> gc;
classify_heap_need(put_map) -> gc;
classify_heap_need(put_tuple_elements) -> neutral;
classify_heap_need(raw_raise) -> gc;
classify_heap_need(recv_next) -> gc;
classify_heap_need(remove_message) -> neutral;
classify_heap_need(resume) -> gc;
classify_heap_need(set_tuple_element) -> gc;
classify_heap_need(succeeded) -> neutral;
classify_heap_need(timeout) -> gc;
classify_heap_need(wait) -> gc;
classify_heap_need(wait_timeout) -> gc.

%%%
%%% Because beam_ssa_pre_codegen has inserted 'copy' instructions to copy
%%% variables that must be saved on the stack, a value can for some time
%%% be in both an X register and a Y register.
%%%
%%% Here we will keep track of variables that have the same value and
%%% rewrite instructions to use the variable that refers to the X
%%% register instead of the Y register. That could improve performance,
%%% since the BEAM interpreter have more optimized instructions
%%% operating on X registers than on Y registers.
%%%
%%% 'call' and 'make_fun' are handled somewhat specially. If a value
%%% already is in the correct X register, the X register will always
%%% be used instead of the Y register. However, if there are one or more
%%% values in the wrong X registers, the X registers variables will be
%%% used only if that does not cause more 'move' instructions to be
%%% be emitted than if the Y register variables were used.
%%%
%%% Here are some examples. The first example shows how a 'move' from
%%% an Y register is eliminated:
%%%
%%%     move x0 y1
%%%     move y1 x0   %%Will be eliminated.
%%%
%%%     call f/1
%%%
%%% Here is an example when x0 and x1 must be swapped to load the argument
%%% registers. Here the 'call' instruction will use the Y registers to
%%% avoid introducing an extra 'move' insruction:
%%%
%%%     move x0 y0
%%%     move x1 y1
%%%
%%%     move y0 x1
%%%     move y1 x0
%%%
%%%     call f/2
%%%
%%% Using the X register to load the argument registers would need
%%% an extra 'move' instruction like this:
%%%
%%%     move x0 y0
%%%     move x1 y1
%%%
%%%     move x1 x2
%%%     move x0 x1
%%%     move x2 x0
%%%
%%%     call f/2
%%%

prefer_xregs(Linear, St) ->
    prefer_xregs(Linear, St, #{0=>#{}}).

prefer_xregs([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], St, Map0) ->
    Copies0 = maps:get(L, Map0),
    {Is,Copies} = prefer_xregs_is(Is0, St, Copies0, []),
    Last = prefer_xregs_terminator(Last0, Copies, St),
    Blk = Blk0#cg_blk{is=Is,last=Last},
    Successors = successors(Last),
    Map = prefer_xregs_successors(Successors, Copies, Map0),
    [{L,Blk}|prefer_xregs(Bs, St, Map)];
prefer_xregs([], _St, _Map) -> [].

prefer_xregs_successors([L|Ls], Copies0, Map0) ->
    case Map0 of
        #{L:=Copies1} ->
            Copies = merge_copies(Copies0, Copies1),
            Map = Map0#{L:=Copies},
            prefer_xregs_successors(Ls, Copies0, Map);
        #{} ->
            Map = Map0#{L=>Copies0},
            prefer_xregs_successors(Ls, Copies0, Map)
    end;
prefer_xregs_successors([], _, Map) -> Map.

prefer_xregs_is([#cg_alloc{}=I|Is], St, Copies0, Acc) ->
    Copies = case I of
                 #cg_alloc{stack=none,words=#need{h=0,f=0}} ->
                     Copies0;
                 #cg_alloc{} ->
                     #{}
             end,
    prefer_xregs_is(Is, St, Copies, [I|Acc]);
prefer_xregs_is([#cg_set{op=copy,dst=Dst,args=[Src]}=I|Is], St, Copies0, Acc) ->
    Copies1 = prefer_xregs_prune(I, Copies0, St),
    Copies = case beam_args([Src,Dst], St) of
                 [Same,Same] -> Copies1;
                 [_,_] -> Copies1#{Dst=>Src}
             end,
    prefer_xregs_is(Is, St, Copies, [I|Acc]);
prefer_xregs_is([#cg_set{op=call,dst=Dst}=I0|Is], St, Copies, Acc) ->
    I = prefer_xregs_call(I0, Copies, St),
    prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]);
prefer_xregs_is([#cg_set{op=make_fun,dst=Dst}=I0|Is], St, Copies, Acc) ->
    I = prefer_xregs_call(I0, Copies, St),
    prefer_xregs_is(Is, St, #{Dst=>{x,0}}, [I|Acc]);
prefer_xregs_is([#cg_set{op=set_tuple_element}=I|Is], St, Copies, Acc) ->
    %% FIXME: HiPE translates the following code segment incorrectly:
    %%     {call_ext,3,{extfunc,erlang,setelement,3}}.
    %%     {move,{x,0},{y,3}}.
    %%     {set_tuple_element,{y,1},{y,3},1}.
    %% Therefore, skip the translation of the arguments for set_tuple_element.
    prefer_xregs_is(Is, St, Copies, [I|Acc]);
prefer_xregs_is([#cg_set{args=Args0}=I0|Is], St, Copies0, Acc) ->
    Args = [do_prefer_xreg(A, Copies0, St) || A <- Args0],
    I = I0#cg_set{args=Args},
    Copies = prefer_xregs_prune(I, Copies0, St),
    prefer_xregs_is(Is, St, Copies, [I|Acc]);
prefer_xregs_is([], _St, Copies, Acc) ->
    {reverse(Acc),Copies}.

prefer_xregs_terminator(#cg_br{bool=Arg0}=I, Copies, St) ->
    Arg = do_prefer_xreg(Arg0, Copies, St),
    I#cg_br{bool=Arg};
prefer_xregs_terminator(#cg_ret{arg=Arg0}=I, Copies, St) ->
    Arg = do_prefer_xreg(Arg0, Copies, St),
    I#cg_ret{arg=Arg};
prefer_xregs_terminator(#cg_switch{arg=Arg0}=I, Copies, St) ->
    Arg = do_prefer_xreg(Arg0, Copies, St),
    I#cg_switch{arg=Arg}.

prefer_xregs_prune(#cg_set{anno=#{clobbers:=true}}, _, _) ->
    #{};
prefer_xregs_prune(#cg_set{dst=Dst}, Copies, St) ->
    DstReg = beam_arg(Dst, St),
    F = fun(_, Alias) ->
                beam_arg(Alias, St) =/= DstReg
        end,
    maps:filter(F, Copies).

%% prefer_xregs_call(Instruction, Copies, St) -> Instruction.
%%  Given a 'call' or 'make_fun' instruction, minimize the number
%%  of 'move' instructions to set up the argument registers.
%%  Prefer using X registers over Y registers, unless that will
%%  result in more 'move' instructions.

prefer_xregs_call(#cg_set{args=[_]}=I, _Copies, _St) ->
    I;
prefer_xregs_call(#cg_set{args=[F|Args0]}=I, Copies, St) ->
    case Args0 of
        [A0] ->
            %% Only one argument. Always prefer the X register
            %% if available.
            A = do_prefer_xreg(A0, Copies, St),
            I#cg_set{args=[F,A]};
        [_|_] ->
            %% Two or more arguments. Try rewriting arguments in
            %% two ways and see which way produces the least
            %% number of 'move' instructions.
            Args1 = prefer_xregs_call_1(Args0, Copies, 0, St),
            Args2 = [do_prefer_xreg(A, Copies, St) || A <- Args0],
            case {count_moves(Args1, St),count_moves(Args2, St)} of
                {N1,N2} when N1 < N2 ->
                    %% There will be fewer 'move' instructions if
                    %% we keep using Y registers.
                    I#cg_set{args=[F|Args1]};
                {_,_} ->
                    %% Always use the values in X registers.
                    I#cg_set{args=[F|Args2]}
            end
    end.

count_moves(Args, St) ->
    length(setup_args(beam_args(Args, St))).

prefer_xregs_call_1([#b_var{}=A|As], Copies, X, St) ->
    case {beam_arg(A, St),Copies} of
        {{y,_},#{A:=Other}} ->
            case beam_arg(Other, St) of
                {x,X} ->
                    %% This value is already in the correct X register.
                    %% It is always benefical to use the X register variable.
                    [Other|prefer_xregs_call_1(As, Copies, X+1, St)];
                _ ->
                    %% This value is another X register. Keep using
                    %% the Y register variable.
                    [A|prefer_xregs_call_1(As, Copies, X+1, St)]
            end;
        {_,_} ->
            %% The value is not available in an X register.
            [A|prefer_xregs_call_1(As, Copies, X+1, St)]
    end;
prefer_xregs_call_1([A|As], Copies, X, St) ->
    [A|prefer_xregs_call_1(As, Copies, X+1, St)];
prefer_xregs_call_1([], _, _, _) -> [].

do_prefer_xreg(#b_var{}=A, Copies, St) ->
    case {beam_arg(A, St),Copies} of
        {{y,_},#{A:=Copy}} ->
            Copy;
        {_,_} ->
            A
    end;
do_prefer_xreg(A, _, _) -> A.

merge_copies(Copies0, Copies1) when map_size(Copies0) =< map_size(Copies1) ->
    maps:filter(fun(K, V) ->
                        case Copies1 of
                            #{K:=V} -> true;
                            #{} -> false
                        end
                end, Copies0);
merge_copies(Copies0, Copies1) ->
    merge_copies(Copies1, Copies0).


%%%
%%% Add annotations for the number of live registers.
%%%

liveness(Linear, #cg{regs=Regs}) ->
    liveness(reverse(Linear), #{}, Regs, []).

liveness([{L,#cg_blk{is=Is0,last=Last0}=Blk0}|Bs], LiveMap0, Regs, Acc) ->
    Successors = liveness_successors(Last0),
    Live0 = ordsets:union([liveness_get(S, LiveMap0) || S <- Successors]),
    Live1 = liveness_terminator(Last0, Live0),
    {Is,Live} = liveness_is(reverse(Is0), Regs, Live1, []),
    LiveMap = LiveMap0#{L=>Live},
    Blk = Blk0#cg_blk{is=Is},
    liveness(Bs, LiveMap, Regs, [{L,Blk}|Acc]);
liveness([], _LiveMap, _Regs, Acc) -> Acc.

liveness_get(S, LiveMap) ->
    case LiveMap of
        #{S:=Live} -> Live;
        #{} -> []
    end.

liveness_successors(Terminator) ->
    successors(Terminator) -- [?BADARG_BLOCK].

liveness_is([#cg_alloc{}=I0|Is], Regs, Live, Acc) ->
    I = I0#cg_alloc{live=num_live(Live, Regs)},
    liveness_is(Is, Regs, Live, [I|Acc]);
liveness_is([#cg_set{dst=Dst,args=Args}=I0|Is], Regs, Live0, Acc) ->
    Live1 = liveness_clobber(I0, Live0, Regs),
    I1 = liveness_yregs_anno(I0, Live1, Regs),
    Live2 = liveness_args(Args, Live1),
    Live = ordsets:del_element(Dst, Live2),
    I = liveness_anno(I1, Live, Regs),
    liveness_is(Is, Regs, Live, [I|Acc]);
liveness_is([], _, Live, Acc) ->
    {Acc,Live}.

liveness_terminator(#cg_br{bool=Arg}, Live) ->
    liveness_terminator_1(Arg, Live);
liveness_terminator(#cg_switch{arg=Arg}, Live) ->
    liveness_terminator_1(Arg, Live);
liveness_terminator(#cg_ret{arg=Arg}, Live) ->
    liveness_terminator_1(Arg, Live).

liveness_terminator_1(#b_var{}=V, Live) ->
    ordsets:add_element(V, Live);
liveness_terminator_1(#b_literal{}, Live) ->
    Live;
liveness_terminator_1(Reg, Live) ->
    _ = verify_beam_register(Reg),
    ordsets:add_element(Reg, Live).

liveness_args([#b_var{}=V|As], Live) ->
    liveness_args(As, ordsets:add_element(V, Live));
liveness_args([#b_remote{mod=Mod,name=Name}|As], Live) ->
    liveness_args([Mod,Name|As], Live);
liveness_args([A|As], Live) ->
    case is_beam_register(A) of
        true ->
            liveness_args(As, ordsets:add_element(A, Live));
        false ->
            liveness_args(As, Live)
    end;
liveness_args([], Live) -> Live.

liveness_anno(#cg_set{op=Op}=I, Live, Regs) ->
    case need_live_anno(Op) of
        true ->
            NumLive = num_live(Live, Regs),
            Anno = (I#cg_set.anno)#{live=>NumLive},
            I#cg_set{anno=Anno};
        false ->
            I
    end.

liveness_yregs_anno(#cg_set{op=Op,dst=Dst}=I, Live0, Regs) ->
    case need_live_anno(Op) of
        true ->
            Live = ordsets:del_element(Dst, Live0),
            LiveYregs = [V || V <- Live, is_yreg(V, Regs)],
            Anno = (I#cg_set.anno)#{live_yregs=>LiveYregs},
            I#cg_set{anno=Anno};
        false ->
            I
    end.

liveness_clobber(#cg_set{anno=Anno}, Live, Regs) ->
    case Anno of
        #{clobbers:=true} ->
            [R || R <- Live, is_yreg(R, Regs)];
        _ ->
            Live
    end.

is_yreg(R, Regs) ->
    case Regs of
        #{R:={y,_}} -> true;
        #{} -> false
    end.

num_live(Live, Regs) ->
    Rs = ordsets:from_list([get_register(V, Regs) || V <- Live]),
    num_live_1(Rs, 0).

num_live_1([{x,X}|T], X) ->
    num_live_1(T, X+1);
num_live_1([{x,_}|_]=T, X) ->
    %% error({hole,{x,X},expected,Next});
    num_live_1(T, X+1);
num_live_1([{y,_}|_], X) ->
    X;
num_live_1([{z,_}|_], X) ->
    X;
num_live_1([{fr,_}|T], X) ->
    num_live_1(T, X);
num_live_1([], X) ->
    X.

get_live(#cg_set{anno=#{live:=Live}}) ->
    Live.

%% need_live_anno(Operation) -> true|false.
%%  Return 'true' if the instruction needs a 'live' annotation with
%%  the number live X registers, or 'false' otherwise.

need_live_anno(Op) ->
    case Op of
        {bif,_} -> true;
        bs_get -> true;
        bs_init -> true;
        bs_get_position -> true;
        bs_get_tail -> true;
        bs_start_match -> true;
        bs_skip -> true;
        call -> true;
        put_map -> true;
        _ -> false
    end.

%%%
%%% Add annotations for defined Y registers.
%%%

defined(Linear, #cg{regs=Regs}) ->
    def(Linear, #{}, Regs).

def([{L,#cg_blk{is=Is0,last=Last}=Blk0}|Bs], DefMap0, Regs) ->
    Def0 = def_get(L, DefMap0),
    {Is,Def} = def_is(Is0, Regs, Def0, []),
    Successors = successors(Last),
    DefMap = def_successors(Successors, Def, DefMap0),
    Blk = Blk0#cg_blk{is=Is},
    [{L,Blk}|def(Bs, DefMap, Regs)];
def([], _, _) -> [].

def_get(L, DefMap) ->
    case DefMap of
        #{L:=Def} -> Def;
        #{} -> []
    end.

def_is([#cg_alloc{anno=Anno0}=I0|Is], Regs, Def, Acc) ->
    I = I0#cg_alloc{anno=Anno0#{def_yregs=>Def}},
    def_is(Is, Regs, Def, [I|Acc]);
def_is([#cg_set{op=kill_try_tag,args=[#b_var{}=Tag]}=I|Is], Regs, Def0, Acc) ->
    Def = ordsets:del_element(Tag, Def0),
    def_is(Is, Regs, Def, [I|Acc]);
def_is([#cg_set{op=catch_end,args=[#b_var{}=Tag|_]}=I|Is], Regs, Def0, Acc) ->
    Def = ordsets:del_element(Tag, Def0),
    def_is(Is, Regs, Def, [I|Acc]);
def_is([#cg_set{anno=Anno0,op=call,dst=Dst}=I0|Is],
       Regs, Def0, Acc) ->
    #{live_yregs:=LiveYregVars} = Anno0,
    LiveRegs = gb_sets:from_list([maps:get(V, Regs) || V <- LiveYregVars]),
    Kill0 = ordsets:subtract(Def0, LiveYregVars),

    %% Kill0 is the set of variables that have just died. However, the registers
    %% used for killed variables may have been reused, so we must check that the
    %% registers to be killed are not used by other variables.
    Kill = [K || K <- Kill0, not gb_sets:is_element(maps:get(K, Regs), LiveRegs)],
    Anno = Anno0#{def_yregs=>Def0,kill_yregs=>Kill},
    I = I0#cg_set{anno=Anno},
    Def1 = ordsets:subtract(Def0, Kill),
    Def = def_add_yreg(Dst, Def1, Regs),
    def_is(Is, Regs, Def, [I|Acc]);
def_is([#cg_set{anno=Anno0,op={bif,Bif},dst=Dst,args=Args}=I0|Is],
       Regs, Def0, Acc) ->
    Arity = length(Args),
    I = case is_gc_bif(Bif, Args) orelse not erl_bifs:is_safe(erlang, Bif, Arity) of
            true ->
                I0#cg_set{anno=Anno0#{def_yregs=>Def0}};
            false ->
                I0
        end,
    Def = def_add_yreg(Dst, Def0, Regs),
    def_is(Is, Regs, Def, [I|Acc]);
def_is([#cg_set{anno=Anno0,dst=Dst}=I0|Is], Regs, Def0, Acc) ->
    I = case need_y_init(I0) of
            true ->
                I0#cg_set{anno=Anno0#{def_yregs=>Def0}};
            false ->
                I0
        end,
    Def = def_add_yreg(Dst, Def0, Regs),
    def_is(Is, Regs, Def, [I|Acc]);
def_is([], _, Def, Acc) ->
    {reverse(Acc),Def}.

def_add_yreg(Dst, Def, Regs) ->
    case is_yreg(Dst, Regs) of
        true -> ordsets:add_element(Dst, Def);
        false -> Def
    end.

def_successors([S|Ss], Def0, DefMap) ->
    case DefMap of
        #{S:=Def1} ->
            Def = ordsets:intersection(Def0, Def1),
            def_successors(Ss, Def0, DefMap#{S:=Def});
        #{} ->
            def_successors(Ss, Def0, DefMap#{S=>Def0})
    end;
def_successors([], _, DefMap) -> DefMap.

%% need_y_init(#cg_set{}) -> true|false.
%%  Return true if this instructions needs initialized Y registers
%%  (because the instruction may do a GC or cause an exception
%%  so that the stack will be scanned), or false otherwise.

need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers;
need_y_init(#cg_set{op=bs_get}) -> true;
need_y_init(#cg_set{op=bs_get_position}) -> true;
need_y_init(#cg_set{op=bs_get_tail}) -> true;
need_y_init(#cg_set{op=bs_init}) -> true;
need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) ->
    case Type of
        utf8 -> true;
        utf16 -> true;
        utf32 -> true;
        _ -> false
    end;
need_y_init(#cg_set{op=bs_start_match}) -> true;
need_y_init(#cg_set{op=put_map}) -> true;
need_y_init(#cg_set{}) -> false.

%% opt_allocate([{BlockLabel,Block}], #st{}) -> [BeamInstruction].
%%  Update the def_yregs field of each #cg_alloc{} that allocates
%%  a stack frame. #cg_alloc.def_yregs will list all Y registers
%%  that will be initialized by the subsequent code (thus, the
%%  listed Y registers don't require init/1 instructions).

opt_allocate(Linear, #cg{regs=Regs}) ->
    opt_allocate_1(Linear, Regs).

opt_allocate_1([{L,#cg_blk{is=[#cg_alloc{stack=Stk}=I0|Is]}=Blk0}|Bs]=Bs0, Regs)
  when is_integer(Stk) ->
    Yregs = opt_alloc_def(Bs0, gb_sets:singleton(L), []),
    I = I0#cg_alloc{def_yregs=Yregs},
    [{L,Blk0#cg_blk{is=[I|Is]}}|opt_allocate_1(Bs, Regs)];
opt_allocate_1([B|Bs], Regs) ->
    [B|opt_allocate_1(Bs, Regs)];
opt_allocate_1([], _) -> [].

opt_alloc_def([{L,#cg_blk{is=Is,last=Last}}|Bs], Ws0, Def0) ->
    case gb_sets:is_member(L, Ws0) of
        false ->
            opt_alloc_def(Bs, Ws0, Def0);
        true ->
            case opt_allocate_is(Is) of
                none ->
                    Succ = successors(Last),
                    Ws = gb_sets:union(Ws0, gb_sets:from_list(Succ)),
                    opt_alloc_def(Bs, Ws, Def0);
                Def1 when is_list(Def1) ->
                    Def = [Def1|Def0],
                    opt_alloc_def(Bs, Ws0, Def)
            end
    end;
opt_alloc_def([], _, Def) ->
    ordsets:intersection(Def).

opt_allocate_is([#cg_set{anno=Anno}|Is]) ->
    case Anno of
        #{def_yregs:=Yregs} ->
            Yregs;
        #{} ->
            opt_allocate_is(Is)
    end;
opt_allocate_is([#cg_alloc{anno=#{def_yregs:=Yregs},stack=none}|_]) ->
    Yregs;
opt_allocate_is([#cg_alloc{}|Is]) ->
    opt_allocate_is(Is);
opt_allocate_is([]) -> none.

%%%
%%% Here follows the main code generation functions.
%%%

%% cg_linear([{BlockLabel,Block}]) -> [BeamInstruction].
%%  Generate BEAM instructions.

cg_linear([{L,#cg_blk{anno=#{recv_set:=L}=Anno0}=B0}|Bs], St0) ->
    Anno = maps:remove(recv_set, Anno0),
    B = B0#cg_blk{anno=Anno},
    {Is,St1} = cg_linear([{L,B}|Bs], St0),
    {Fail,St} = use_block_label(L, St1),
    {[{recv_set,Fail}|Is],St};
cg_linear([{L,#cg_blk{is=Is0,last=Last}}|Bs], St0) ->
    Next = next_block(Bs),
    St1 = new_block_label(L, St0),
    {Is1,St2} = cg_block(Is0, Last, Next, St1),
    {Is2,St} = cg_linear(Bs, St2),
    {def_block_label(L, St)++Is1++Is2,St};
cg_linear([], St) -> {[],St}.

cg_block([#cg_set{op=recv_next}], #cg_br{succ=Lr0}, _Next, St0) ->
    {Lr,St} = use_block_label(Lr0, St0),
    {[{loop_rec_end,Lr}],St};
cg_block([#cg_set{op=wait}], #cg_br{succ=Lr0}, _Next, St0) ->
    {Lr,St} = use_block_label(Lr0, St0),
    {[{wait,Lr}],St};
cg_block(Is0, Last, Next, St0) ->
    case Last of
        #cg_br{succ=Next,fail=Next} ->
            cg_block(Is0, none, St0);
        #cg_br{succ=Same,fail=Same} ->
            {Fail,St1} = use_block_label(Same, St0),
            {Is,St} = cg_block(Is0, none, St1),
            {Is++[jump(Fail)],St};
        #cg_br{bool=Bool,succ=Next,fail=Fail0} ->
            {Fail,St1} = use_block_label(Fail0, St0),
            {Is,St} = cg_block(Is0, {Bool,Fail}, St1),
            {Is,St};
        #cg_br{bool=Bool,succ=Succ0,fail=Fail0} ->
            {[Succ,Fail],St1} = use_block_labels([Succ0,Fail0], St0),
            {Is,St} = cg_block(Is0, {Bool,Fail}, St1),
            {Is++[jump(Succ)],St};
        #cg_ret{arg=Src0,dealloc=N} ->
            Src = beam_arg(Src0, St0),
            cg_block(Is0, {return,Src,N}, St0);
        #cg_switch{} ->
            cg_switch(Is0, Last, St0)
    end.

cg_switch(Is0, Last, St0) ->
    #cg_switch{arg=Src0,fail=Fail0,list=List0} = Last,
    Src = beam_arg(Src0, St0),
    {Fail1,St1} = use_block_label(Fail0, St0),
    Fail = ensure_label(Fail1, St1),
    {List1,St2} =
        flatmapfoldl(fun({V,L}, S0) ->
                             {Lbl,S} = use_block_label(L, S0),
                             {[beam_arg(V, S),Lbl],S}
                     end, St1, List0),
    {Is1,St} = cg_block(Is0, none, St2),
    case reverse(Is1) of
        [{bif,tuple_size,_,[Tuple],{z,_}=Src}|More] ->
            List = map(fun({integer,Arity}) -> Arity;
                          ({f,_}=F) -> F
                       end, List1),
            Is = reverse(More, [{select_tuple_arity,Tuple,Fail,{list,List}}]),
            {Is,St};
        _ ->
            SelectVal = {select_val,Src,Fail,{list,List1}},
            {Is1 ++ [SelectVal],St}
    end.

jump({f,_}=Fail) ->
    {jump,Fail};
jump({catch_tag,Fail}) ->
    {jump,Fail}.

bif_fail({f,_}=Fail) -> Fail;
bif_fail({catch_tag,_}) -> {f,0}.

next_block([]) -> none;
next_block([{Next,_}|_]) -> Next.

ensure_label(Fail0, #cg{ultimate_fail=Lbl}) ->
    case bif_fail(Fail0) of
        {f,0} -> {f,Lbl};
        {f,_}=Fail -> Fail
    end.

cg_block([#cg_set{anno=#{recv_mark:=L}=Anno0}=I0|T], Context, St0) ->
    Anno = maps:remove(recv_mark, Anno0),
    I = I0#cg_set{anno=Anno},
    {Is,St1} = cg_block([I|T], Context, St0),
    {Fail,St} = use_block_label(L, St1),
    {[{recv_mark,Fail}|Is],St};
cg_block([#cg_set{op=new_try_tag,dst=Tag,args=Args}], {Tag,Fail0}, St) ->
    {catch_tag,Fail} = Fail0,
    [Reg,{atom,Kind}] = beam_args([Tag|Args], St),
    {[{Kind,Reg,Fail}],St};
cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I,
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
    [Dst|Args] = beam_args([Dst0|Args0], St),
    Line0 = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno),
    Fail = bif_fail(Fail0),
    Line = case Fail of
               {f,0} -> Line0;
               {f,_} -> []
           end,
    case is_gc_bif(Name, Args) of
        true ->
            Live = get_live(I),
            Kill = kill_yregs(Anno, St),
            {Kill++Line++[{gc_bif,Name,Fail,Live,Args,Dst}],St};
        false ->
            {Line++[{bif,Name,Fail,Args,Dst}],St}
    end;
cg_block([#cg_set{op={bif,tuple_size},dst=Arity0,args=[Tuple0]},
          #cg_set{op={bif,'=:='},dst=Bool,args=[Arity0,#b_literal{val=Ar}]}=Eq],
         {Bool,Fail}=Context, St0) ->
    Tuple = beam_arg(Tuple0, St0),
    case beam_arg(Arity0, St0) of
        {z,_} ->
            %% The size will only be used once. Combine to a test_arity instruction.
            Test = {test,test_arity,ensure_label(Fail, St0),[Tuple,Ar]},
            {[Test],St0};
        Arity ->
            %% The size will be used more than once. Must do an explicit
            %% BIF call followed by the '==' test.
            TupleSize = {bif,tuple_size,{f,0},[Tuple],Arity},
            {Is,St} = cg_block([Eq], Context, St0),
            {[TupleSize|Is],St}
    end;
cg_block([#cg_set{op={bif,Name},dst=Dst0,args=Args0}]=Is0, {Dst0,Fail}, St0) ->
    [Dst|Args] = beam_args([Dst0|Args0], St0),
    case Dst of
        {z,_} ->
            %% The result of the BIF call will only be used once. Convert to
            %% a test instruction.
            Test = bif_to_test(Name, Args, ensure_label(Fail, St0)),
            {Test,St0};
        _ ->
            %% Must explicitly call the BIF since the result will be used
            %% more than once.
            {Is1,St1} = cg_block(Is0, none, St0),
            {Is2,St} = cg_block([], {Dst0,Fail}, St1),
            {Is1++Is2,St}
    end;
cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I|T],
         Context, St0) ->
    [Dst|Args] = beam_args([Dst0|Args0], St0),
    {Is0,St} = cg_block(T, Context, St0),
    case is_gc_bif(Name, Args) of
        true ->
            Line = call_line(body, {extfunc,erlang,Name,length(Args)}, Anno),
            Live = get_live(I),
            Kill = kill_yregs(Anno, St),
            Is = Kill++Line++[{gc_bif,Name,{f,0},Live,Args,Dst}|Is0],
            {Is,St};
        false ->
            Is = [{bif,Name,{f,0},Args,Dst}|Is0],
            {Is,St}
    end;
cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
    Fail = bif_fail(Fail0),
    Line = line(Anno),
    [#b_literal{val=Kind}|Args1] = Args0,
    case Kind of
        new ->
            [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St),
            Live = get_live(I),
            {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St};
        private_append ->
            [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
            Flags = {field_flags,[]},
            Is = [Line,{bs_private_append,Fail,Bits,Unit,Src,Flags,Dst}],
            {Is,St};
        append ->
            [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
            Flags = {field_flags,[]},
            Live = get_live(I),
            Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}],
            {Is,St}
    end;
cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I,
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
    [Dst,Bin1] = beam_args([Ctx0,Bin0], St),
    {Bin,Pre} = force_reg(Bin1, Dst),
    Live = get_live(I),
    %% num_slots is only set when using the old instructions.
    case maps:find(num_slots, Anno) of
        {ok, Slots} ->
            Is = Pre ++ [{test,bs_start_match2,Fail,Live,[Bin,Slots],Dst}],
            {Is,St};
        error ->
            Is = Pre ++ [{test,bs_start_match3,Fail,Live,[Bin],Dst}],
            {Is,St}
    end;
cg_block([#cg_set{op=bs_get}=Set,
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
    {cg_bs_get(Fail, Set, St),St};
cg_block([#cg_set{op=bs_match_string,args=[CtxVar,#b_literal{val=String}]},
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
    CtxReg = beam_arg(CtxVar, St),
    Is = [{test,bs_match_string,Fail,[CtxReg,String]}],
    {Is,St};
cg_block([#cg_set{dst=Dst0,op=landingpad,args=Args0}|T], Context, St0) ->
    [Dst,{atom,Kind},Tag] = beam_args([Dst0|Args0], St0),
    case Kind of
        'catch' ->
            cg_catch(Dst, T, Context, St0);
        'try' ->
            cg_try(Dst, Tag, T, Context, St0)
    end;
cg_block([#cg_set{op=kill_try_tag,args=Args0}|Is], Context, St0) ->
    [Reg] = beam_args(Args0, St0),
    {Is0,St} = cg_block(Is, Context, St0),
    {[{try_end,Reg}|Is0],St};
cg_block([#cg_set{op=catch_end,dst=Dst0,args=Args0}|Is], Context, St0) ->
    [Dst,Reg,{x,0}] = beam_args([Dst0|Args0], St0),
    {Is0,St} = cg_block(Is, Context, St0),
    {[{catch_end,Reg}|copy({x,0}, Dst)++Is0],St};
cg_block([#cg_set{op=call}=I,
          #cg_set{op=succeeded,dst=Bool}], {Bool,_Fail}, St) ->
    %% A call in try/catch block.
    cg_block([I], none, St);
cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=I,
          #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
    [Dst|Args] = beam_args([Dst0|Args0], St),
    {cg_test(Op, bif_fail(Fail), Args, Dst, I),St};
cg_block([#cg_set{op=bs_put,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
    Args = beam_args(Args0, St),
    {cg_bs_put(bif_fail(Fail), Args),St};
cg_block([#cg_set{op=bs_test_tail,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
    [Ctx,{integer,Bits}] = beam_args(Args0, St),
    {[{test,bs_test_tail2,bif_fail(Fail),[Ctx,Bits]}],St};
cg_block([#cg_set{op={float,checkerror},dst=Bool}], {Bool,Fail}, St) ->
    {[{fcheckerror,bif_fail(Fail)}],St};
cg_block([#cg_set{op=is_tagged_tuple,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
    [Src,{integer,Arity},Tag] = beam_args(Args0, St),
    {[{test,is_tagged_tuple,ensure_label(Fail, St),[Src,Arity,Tag]}],St};
cg_block([#cg_set{op=is_nonempty_list,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
    Args = beam_args(Args0, St),
    {[{test,is_nonempty_list,ensure_label(Fail, St),Args}],St};
cg_block([#cg_set{op=has_map_field,dst=Bool,args=Args0}], {Bool,Fail}, St) ->
    [Src,Key] = beam_args(Args0, St),
    {[{test,has_map_fields,Fail,Src,{list,[Key]}}],St};
cg_block([#cg_set{op=call}=Call], {_Bool,_Fail}=Context, St0) ->
    {Is0,St1} = cg_call(Call, body, none, St0),
    {Is1,St} = cg_block([], Context, St1),
    {Is0++Is1,St};
cg_block([#cg_set{op=call,dst=Dst0}=Call], Context, St) ->
    Dst = beam_arg(Dst0, St),
    case Context of
        {return,Dst,_} ->
            cg_call(Call, tail, Context, St);
        _ ->
            cg_call(Call, body, Context, St)
    end;
cg_block([#cg_set{op=call}=Call|T], Context, St0) ->
    {Is0,St1} = cg_call(Call, body, none, St0),
    {Is1,St} = cg_block(T, Context, St1),
    {Is0++Is1,St};
cg_block([#cg_set{op=make_fun,dst=Dst0,args=[Local|Args0]}|T],
         Context, St0) ->
    #b_local{name=#b_literal{val=Func},arity=Arity} = Local,
    [Dst|Args] = beam_args([Dst0|Args0], St0),
    {FuncLbl,St1} = local_func_label(Func, Arity, St0),
    Is0 = setup_args(Args) ++
        [{make_fun2,{f,FuncLbl},0,0,length(Args)}|copy({x,0}, Dst)],
    {Is1,St} = cg_block(T, Context, St1),
    {Is0++Is1,St};
cg_block([#cg_set{op=copy}|_]=T0, Context, St0) ->
    {Is0,T} = cg_copy(T0, St0),
    {Is1,St} = cg_block(T, Context, St0),
    Is = Is0 ++ Is1,
    case is_call(T) of
        {yes,Arity} ->
            {opt_call_moves(Is, Arity),St};
        no ->
            {Is,St}
    end;
cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set], none, St) ->
    [Dst|Args] = beam_args([Dst0|Args0], St),
    Is = cg_instr(Op, Args, Dst, Set),
    {Is,St};
cg_block([#cg_set{op=Op,dst=Dst0,args=Args0}=Set|T], Context, St0) ->
    [Dst|Args] = beam_args([Dst0|Args0], St0),
    Is0 = cg_instr(Op, Args, Dst, Set),
    {Is1,St} = cg_block(T, Context, St0),
    {Is0++Is1,St};
cg_block([#cg_alloc{}=Alloc|T], Context, St0) ->
    Is0 = cg_alloc(Alloc, St0),
    {Is1,St} = cg_block(T, Context, St0),
    {Is0++Is1,St};
cg_block([], {return,Arg,none}, St) ->
    Is = copy(Arg, {x,0}) ++ [return],
    {Is,St};
cg_block([], {return,Arg,N}, St) ->
    Is = copy(Arg, {x,0}) ++ [{deallocate,N},return],
    {Is,St};
cg_block([], none, St) ->
    {[],St};
cg_block([], {Bool0,Fail}, St) ->
    [Bool] = beam_args([Bool0], St),
    {[{test,is_eq_exact,Fail,[Bool,{atom,true}]}],St}.

cg_copy(T0, St) ->
    {Copies,T} = splitwith(fun(#cg_set{op=copy}) -> true;
                              (_) -> false
                           end, T0),
    Moves0 = cg_copy_1(Copies, St),
    Moves1 = [Move || {move,Src,Dst}=Move <- Moves0, Src =/= Dst],
    Scratch = {x,1022},
    Moves = order_moves(Moves1, Scratch),
    {Moves,T}.

cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) ->
    [Dst,Src] = beam_args([Dst0|Args], St),
    Copies = cg_copy_1(T, St),
    case keymember(Dst, 3, Copies) of
        true ->
            %% Will be overwritten. Don't generate a move instruction.
            Copies;
        false ->
            [{move,Src,Dst}|Copies]
    end;
cg_copy_1([], _St) -> [].

bif_to_test('and', [V1,V2], Fail) ->
    [{test,is_eq_exact,Fail,[V1,{atom,true}]},
     {test,is_eq_exact,Fail,[V2,{atom,true}]}];
bif_to_test('or', [V1,V2], {f,Lbl}=Fail) when Lbl =/= 0 ->
    %% Labels are spaced 2 apart. We can create a new
    %% label by incrementing the Fail label.
    SuccLabel = Lbl + 1,
    [{test,is_eq_exact,{f,SuccLabel},[V1,{atom,false}]},
     {test,is_eq_exact,Fail,[V2,{atom,true}]},
     {label,SuccLabel}];
bif_to_test('not', [Var], Fail) ->
    [{test,is_eq_exact,Fail,[Var,{atom,false}]}];
bif_to_test(Name, Args, Fail) ->
    [beam_utils:bif_to_test(Name, Args, Fail)].

opt_call_moves(Is0, Arity) ->
    {Moves0,Is} = splitwith(fun({move,_,_}) -> true;
                               (_) -> false
                            end, Is0),
    Moves = opt_call_moves_1(Moves0, Arity),
    Moves ++ Is.

opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) ->
    case is_killed(Tmp, Is, Arity) of
        true ->
            %% The X register Tmp is never used again. We can collapse
            %% the two move instruction into one.
            [{move,Src,Dst}|opt_call_moves_1(Is, Arity)];
        false ->
            [M1|opt_call_moves_1([M2|Is], Arity)]
    end;
opt_call_moves_1([M|Ms], Arity) ->
    [M|opt_call_moves_1(Ms, Arity)];
opt_call_moves_1([], _Arity) -> [].

is_killed(R, [{move,R,_}|_], _) ->
    false;
is_killed(R, [{move,_,R}|_], _) ->
    true;
is_killed(R, [{move,_,_}|Is], Arity) ->
    is_killed(R, Is, Arity);
is_killed({x,X}, [], Arity) ->
    X >= Arity.

cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) ->
    [];
cg_alloc(#cg_alloc{stack=none,words=Need,live=Live}, _St) ->
    [{test_heap,alloc(Need),Live}];
cg_alloc(#cg_alloc{stack=Stk,words=Need,live=Live,def_yregs=DefYregs},
         #cg{regs=Regs}) when is_integer(Stk) ->
    Alloc = alloc(Need),
    All = [{y,Y} || Y <- lists:seq(0, Stk-1)],
    Def = ordsets:from_list([maps:get(V, Regs) || V <- DefYregs]),
    NeedInit = ordsets:subtract(All, Def),
    NoZero = length(Def)*2 > Stk,
    I = case {NoZero,Alloc} of
            {true,0} -> {allocate,Stk,Live};
            {true,_} -> {allocate_heap,Stk,Alloc,Live};
            {false,0} -> {allocate_zero,Stk,Live};
            {false,_} -> {allocate_heap_zero,Stk,Alloc,Live}
        end,
    [I|case NoZero of
           true -> [{init,Y} || Y <- NeedInit];
           false -> []
       end].

alloc(#need{h=Words,f=0}) ->
    Words;
alloc(#need{h=Words,f=Floats}) ->
    {alloc,[{words,Words},{floats,Floats}]}.

is_call([#cg_set{op=call,args=[#b_var{}|Args]}|_]) ->
    {yes,1+length(Args)};
is_call([#cg_set{op=call,args=[_|Args]}|_]) ->
    {yes,length(Args)};
is_call([#cg_set{op=make_fun,args=[_|Args]}|_]) ->
    {yes,length(Args)};
is_call(_) ->
    no.

cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=[#b_local{}=Func0|Args0]},
        Where, Context, St0) ->
    [Dst|Args] = beam_args([Dst0|Args0], St0),
    #b_local{name=Name0,arity=Arity} = Func0,
    {atom,Name} = beam_arg(Name0, St0),
    {FuncLbl,St} = local_func_label(Name, Arity, St0),
    Line = call_line(Where, local, Anno),
    Call = build_call(call, Arity, {f,FuncLbl}, Context, Dst),
    Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
    {Is,St};
cg_call(#cg_set{anno=Anno0,op=call,dst=Dst0,args=[#b_remote{}=Func0|Args0]},
        Where, Context, St) ->
    [Dst|Args] = beam_args([Dst0|Args0], St),
    #b_remote{mod=Mod0,name=Name0,arity=Arity} = Func0,
    case {beam_arg(Mod0, St),beam_arg(Name0, St)} of
        {{atom,Mod},{atom,Name}} ->
            Func = {extfunc,Mod,Name,Arity},
            Line = call_line(Where, Func, Anno0),
            Call = build_call(call_ext, Arity, Func, Context, Dst),
            Anno = case erl_bifs:is_exit_bif(Mod, Name, Arity) of
                       true ->
                           %% There is no need to kill Y registers
                           %% before calling an exit BIF.
                           maps:remove(kill_yregs, Anno0);
                       false ->
                           Anno0
                   end,
            Is = setup_args(Args, Anno, Context, St) ++ Line ++ Call,
            {Is,St};
        {Mod,Name} ->
            Apply = build_apply(Arity, Context, Dst),
            Is = setup_args(Args++[Mod,Name], Anno0, Context, St) ++
                [line(Anno0)] ++ Apply,
            {Is,St}
    end;
cg_call(#cg_set{anno=Anno,op=call,dst=Dst0,args=Args0},
        Where, Context, St) ->
    [Dst,Func|Args] = beam_args([Dst0|Args0], St),
    Line = call_line(Where, Func, Anno),
    Arity = length(Args),
    Call = build_call(call_fun, Arity, Func, Context, Dst),
    Is = setup_args(Args++[Func], Anno, Context, St) ++ Line ++ Call,
    {Is,St}.

build_call(call_fun, Arity, _Func, none, Dst) ->
    [{call_fun,Arity}|copy({x,0}, Dst)];
build_call(call_fun, Arity, _Func, {return,Dst,N}, Dst) when is_integer(N) ->
    [{call_fun,Arity},{deallocate,N},return];
build_call(call_fun, Arity, _Func, {return,Val,N}, _Dst) when is_integer(N) ->
    [{call_fun,Arity},{move,Val,{x,0}},{deallocate,N},return];
build_call(call_ext, 2, {extfunc,erlang,'!',2}, none, Dst) ->
    [send|copy({x,0}, Dst)];
build_call(call_ext, 2, {extfunc,erlang,'!',2}, {return,Dst,N}, Dst)
  when is_integer(N) ->
    [send,{deallocate,N},return];
build_call(Prefix, Arity, Func, {return,Dst,none}, Dst) ->
    I = case Prefix of
            call -> call_only;
            call_ext -> call_ext_only
        end,
    [{I,Arity,Func}];
build_call(call_ext, Arity, {extfunc,Mod,Name,Arity}=Func, {return,_,none}, _Dst) ->
    true = erl_bifs:is_exit_bif(Mod, Name, Arity), %Assertion.
    [{call_ext_only,Arity,Func}];
build_call(Prefix, Arity, Func, {return,Dst,N}, Dst) when is_integer(N) ->
    I = case Prefix of
            call -> call_last;
            call_ext -> call_ext_last
        end,
    [{I,Arity,Func,N}];
build_call(I, Arity, Func, {return,Val,N}, _Dst) when is_integer(N) ->
    [{I,Arity,Func}|copy(Val, {x,0})++[{deallocate,N},return]];
build_call(I, Arity, Func, none, Dst) ->
    [{I,Arity,Func}|copy({x,0}, Dst)].

build_apply(Arity, {return,Dst,N}, Dst) when is_integer(N) ->
    [{apply_last,Arity,N}];
build_apply(Arity, {return,Val,N}, _Dst) when is_integer(N) ->
    [{apply,Arity}|copy(Val, {x,0})++[{deallocate,N},return]];
build_apply(Arity, none, Dst) ->
    [{apply,Arity}|copy({x,0}, Dst)].

cg_instr(put_map, [{atom,assoc},SrcMap|Ss], Dst, Set) ->
    Live = get_live(Set),
    [{put_map_assoc,{f,0},SrcMap,Dst,Live,{list,Ss}}];
cg_instr(bs_get_tail, [Src], Dst, Set) ->
    Live = get_live(Set),
    [{bs_get_tail,Src,Dst,Live}];
cg_instr(bs_get_position, [Ctx], Dst, Set) ->
    Live = get_live(Set),
    [{bs_get_position,Ctx,Dst,Live}];
cg_instr(Op, Args, Dst, _Set) ->
    cg_instr(Op, Args, Dst).

cg_instr(bs_init_writable, Args, Dst) ->
    setup_args(Args) ++ [bs_init_writable|copy({x,0}, Dst)];
cg_instr(bs_restore, [Ctx,Slot], _Dst) ->
    case Slot of
        {integer,N} ->
            [{bs_restore2,Ctx,N}];
        {atom,start} ->
            [{bs_restore2,Ctx,Slot}]
    end;
cg_instr(bs_save, [Ctx,Slot], _Dst) ->
    {integer,N} = Slot,
    [{bs_save2,Ctx,N}];
cg_instr(bs_set_position, [Ctx,Pos], _Dst) ->
    [{bs_set_position,Ctx,Pos}];
cg_instr(build_stacktrace, Args, Dst) ->
    setup_args(Args) ++ [build_stacktrace|copy({x,0}, Dst)];
cg_instr(set_tuple_element=Op, [New,Tuple,{integer,Index}], _Dst) ->
    [{Op,New,Tuple,Index}];
cg_instr({float,clearerror}, [], _Dst) ->
    [fclearerror];
cg_instr({float,get}, [Src], Dst) ->
    [{fmove,Src,Dst}];
cg_instr({float,put}, [Src], Dst) ->
    [{fmove,Src,Dst}];
cg_instr(get_hd=Op, [Src], Dst) ->
    [{Op,Src,Dst}];
cg_instr(get_tl=Op, [Src], Dst) ->
    [{Op,Src,Dst}];
cg_instr(get_tuple_element=Op, [Src,{integer,N}], Dst) ->
    [{Op,Src,N,Dst}];
cg_instr(put_list=Op, [Hd,Tl], Dst) ->
    [{Op,Hd,Tl,Dst}];
cg_instr(put_tuple, Elements, Dst) ->
    [{put_tuple2,Dst,{list,Elements}}];
cg_instr(put_tuple_arity, [{integer,Arity}], Dst) ->
    [{put_tuple,Arity,Dst}];
cg_instr(put_tuple_elements, Elements, _Dst) ->
    [{put,E} || E <- Elements];
cg_instr(raw_raise, Args, Dst) ->
    setup_args(Args) ++ [raw_raise|copy({x,0}, Dst)];
cg_instr(remove_message, [], _Dst) ->
    [remove_message];
cg_instr(resume, [A,B], _Dst) ->
    [{bif,raise,{f,0},[A,B],{x,0}}];
cg_instr(timeout, [], _Dst) ->
    [timeout].

cg_test(bs_add=Op, Fail, [Src1,Src2,{integer,Unit}], Dst, _I) ->
    [{Op,Fail,[Src1,Src2,Unit],Dst}];
cg_test(bs_skip, Fail, Args, _Dst, I) ->
    cg_bs_skip(Fail, Args, I);
cg_test(bs_utf8_size=Op, Fail, [Src], Dst, _I) ->
    [{Op,Fail,Src,Dst}];
cg_test(bs_utf16_size=Op, Fail, [Src], Dst, _I) ->
    [{Op,Fail,Src,Dst}];
cg_test({float,convert}, Fail, [Src], Dst, _I) ->
    {f,0} = Fail,                               %Assertion.
    [{fconv,Src,Dst}];
cg_test({float,Op0}, Fail, Args, Dst, #cg_set{anno=Anno}) ->
    Op = case Op0 of
             '+' -> fadd;
             '-' when length(Args) =:= 2 -> fsub;
             '-' -> fnegate;
             '*' -> fmul;
             '/' -> fdiv
         end,
    [line(Anno),{bif,Op,Fail,Args,Dst}];
cg_test(get_map_element, Fail, [Map,Key], Dst, _I) ->
    [{get_map_elements,Fail,Map,{list,[Key,Dst]}}];
cg_test(peek_message, Fail, [], Dst, _I) ->
    [{loop_rec,Fail,{x,0}}|copy({x,0}, Dst)];
cg_test(put_map, Fail, [{atom,exact},SrcMap|Ss], Dst, Set) ->
    Live = get_live(Set),
    [{put_map_exact,Fail,SrcMap,Dst,Live,{list,Ss}}];
cg_test(wait_timeout, Fail, [Timeout], _Dst, _) ->
    case Timeout of
        {atom,infinity} ->
            [{wait,Fail}];
        _ ->
            [{wait_timeout,Fail,Timeout}]
    end.

cg_bs_get(Fail, #cg_set{dst=Dst0,args=[#b_literal{val=Type}|Ss0]}=Set, St) ->
    Op = case Type of
             integer -> bs_get_integer2;
             float   -> bs_get_float2;
             binary  -> bs_get_binary2;
             utf8    -> bs_get_utf8;
             utf16   -> bs_get_utf16;
             utf32   -> bs_get_utf32
         end,
    [Dst|Ss1] = beam_args([Dst0|Ss0], St),
    Ss = case Ss1 of
             [Ctx,{literal,Flags},Size,{integer,Unit}] ->
                 %% Plain integer/float/binary.
                 [Ctx,Size,Unit,field_flags(Flags, Set)];
             [Ctx,{literal,Flags}] ->
                 %% Utf8/16/32.
                 [Ctx,field_flags(Flags, Set)]
         end,
    Live = get_live(Set),
    [{test,Op,Fail,Live,Ss,Dst}].

cg_bs_skip(Fail, [{atom,Type}|Ss0], Set) ->
    Op = case Type of
             utf8 -> bs_skip_utf8;
             utf16 -> bs_skip_utf16;
             utf32 -> bs_skip_utf32;
             _ -> bs_skip_bits2
         end,
    Live = get_live(Set),
    Ss = case Ss0 of
             [Ctx,{literal,Flags},Size,{integer,Unit}] ->
                 %% Plain integer/float/binary.
                 [Ctx,Size,Unit,field_flags(Flags, Set)];
             [Ctx,{literal,Flags}] ->
                 %% Utf8/16/32.
                 [Ctx,Live,field_flags(Flags, Set)]
         end,
    case {Type,Ss} of
        {binary,[_,{atom,all},1,_]} ->
            [];
        {binary,[R,{atom,all},U,_]} ->
            [{test,bs_test_unit,Fail,[R,U]}];
        {_,_} ->
            [{test,Op,Fail,Ss}]
    end.

field_flags(Flags, #cg_set{anno=#{location:={File,Line}}}) ->
    {field_flags,[{anno,[Line,{file,File}]}|Flags]};
field_flags(Flags, _) ->
    {field_flags,Flags}.

cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) ->
    Op = case Type of
             integer -> bs_put_integer;
             float   -> bs_put_float;
             binary  -> bs_put_binary;
             utf8    -> bs_put_utf8;
             utf16   -> bs_put_utf16;
             utf32   -> bs_put_utf32
         end,
    case Args of
        [Src,Size,{integer,Unit}] ->
            [{Op,Fail,Size,Unit,{field_flags,Flags},Src}];
        [Src] ->
            [{Op,Fail,{field_flags,Flags},Src}]
    end.

cg_bs_init(Dst, Size0, Unit, Live, Fail) ->
    Op = case Unit of
             1 -> bs_init_bits;
             8 -> bs_init2
         end,
    Size = cg_bs_init_size(Size0),
    [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}].

cg_bs_init_size({x,_}=R) -> R;
cg_bs_init_size({y,_}=R) -> R;
cg_bs_init_size({integer,Int}) -> Int.

cg_catch(Agg, T0, Context, St0) ->
    {Moves,T1} = cg_extract(T0, Agg, St0),
    {T,St} = cg_block(T1, Context, St0),
    {Moves++T,St}.

cg_try(Agg, Tag, T0, Context, St0) ->
    {Moves0,T1} = cg_extract(T0, Agg, St0),
    Moves = order_moves(Moves0, {x,3}),
    [#cg_set{op=kill_try_tag}|T2] = T1,
    {T,St} = cg_block(T2, Context, St0),
    {[{try_case,Tag}|Moves++T],St}.

cg_extract([#cg_set{op=extract,dst=Dst0,args=Args0}|Is0], Agg, St) ->
    [Dst,Agg,{integer,X}] = beam_args([Dst0|Args0], St),
    {Ds,Is} = cg_extract(Is0, Agg, St),
    case keymember(Dst, 3, Ds) of
        true ->
            %% This destination will be overwritten.
            {Ds,Is};
        false ->
            {copy({x,X}, Dst)++Ds,Is}
    end;
cg_extract(Is, _, _) ->
    {[],Is}.

copy(Src, Src) -> [];
copy(Src, Dst) -> [{move,Src,Dst}].

force_reg({literal,_}=Lit, Reg) ->
    {Reg,[{move,Lit,Reg}]};
force_reg({Kind,_}=R, _) when Kind =:= x; Kind =:= y ->
    {R,[]}.

%% successors(Terminator) -> [Successor].
%%  Return an ordset of all successors for the given terminator.

successors(#cg_br{succ=Succ,fail=Fail}) ->
    ordsets:from_list([Succ,Fail]);
successors(#cg_switch{fail=Fail,list=List}) ->
    ordsets:from_list([Fail|[Lbl || {_,Lbl} <- List]]);
successors(#cg_ret{}) -> [].

%% linearize(Blocks) -> [{BlockLabel,#cg_blk{}}].
%%  Linearize the intermediate representation of the code. Also
%%  translate blocks from the SSA records to internal record types
%%  used only in this module.

linearize(Blocks) ->
    Linear = beam_ssa:linearize(Blocks),
    linearize_1(Linear, Blocks).

linearize_1([{?BADARG_BLOCK,_}|Ls], Blocks) ->
    linearize_1(Ls, Blocks);
linearize_1([{L,Block0}|Ls], Blocks) ->
    Block = translate_block(L, Block0, Blocks),
    [{L,Block}|linearize_1(Ls, Blocks)];
linearize_1([], _Blocks) -> [].

%% translate_block(BlockLabel, #b_blk{}, Blocks) -> #cg_blk{}.
%%  Translate a block to the internal records used in this module.
%%  Also eliminate phi nodes, replacing them with 'copy' instructions
%%  in the predecessor blocks.

translate_block(L, #b_blk{anno=Anno,is=Is0,last=Last0}, Blocks) ->
    Last = translate_terminator(Last0),
    PhiCopies = translate_phis(L, Last, Blocks),
    Is1 = translate_is(Is0, PhiCopies),
    Is = case Anno of
             #{frame_size:=Size} ->
                 Alloc = #cg_alloc{stack=Size},
                 [Alloc|Is1];
             #{} -> Is1
         end,
    #cg_blk{anno=Anno,is=Is,last=Last}.

translate_is([#b_set{op=phi}|Is], Tail) ->
    translate_is(Is, Tail);
translate_is([#b_set{anno=Anno0,op=Op,dst=Dst,args=Args}=I|Is], Tail) ->
    Anno = case beam_ssa:clobbers_xregs(I) of
               true -> Anno0#{clobbers=>true};
               false -> Anno0
           end,
    [#cg_set{anno=Anno,op=Op,dst=Dst,args=Args}|translate_is(Is, Tail)];
translate_is([], Tail) -> Tail.

translate_terminator(#b_ret{anno=Anno,arg=Arg}) ->
    Dealloc = case Anno of
                  #{deallocate:=N} -> N;
                  #{} -> none
              end,
    #cg_ret{arg=Arg,dealloc=Dealloc};
translate_terminator(#b_br{bool=#b_literal{val=true},succ=Succ}) ->
    #cg_br{bool=#b_literal{val=true},succ=Succ,fail=Succ};
translate_terminator(#b_br{bool=#b_literal{val=false},fail=Fail}) ->
    #cg_br{bool=#b_literal{val=true},succ=Fail,fail=Fail};
translate_terminator(#b_br{bool=Bool,succ=Succ,fail=Fail}) ->
    #cg_br{bool=Bool,succ=Succ,fail=Fail};
translate_terminator(#b_switch{arg=Bool,fail=Fail,list=List}) ->
    #cg_switch{arg=Bool,fail=Fail,list=List}.

translate_phis(L, #cg_br{succ=Target,fail=Target}, Blocks) ->
    #b_blk{is=Is} = maps:get(Target, Blocks),
    Phis = takewhile(fun(#b_set{op=phi}) -> true;
                        (#b_set{}) -> false
                     end, Is),
    phi_copies(Phis, L);
translate_phis(_, _, _) -> [].

phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) ->
    CopyArgs = [V || {V,Target} <- PhiArgs, Target =:= L],
    [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)];
phi_copies([], _) -> [].

%% setup_args(Args, Anno, Context) -> [Instruction].
%% setup_args(Args) -> [Instruction].
%%  Set up X registers for a call.

setup_args(Args, Anno, none, St) ->
    setup_args(Args) ++ kill_yregs(Anno, St);
setup_args(Args, _, _, _) ->
    setup_args(Args).

setup_args([]) ->
    [];
setup_args([_|_]=Args) ->
    Moves = gen_moves(Args, 0, []),
    Scratch = {x,1+last(sort([length(Args)-1|[X || {x,X} <- Args]]))},
    order_moves(Moves, Scratch).

%% kill_yregs(Anno, #cg{}) -> [{kill,{y,Y}}].
%%  Kill Y registers that will not be used again.

kill_yregs(#{kill_yregs:=Kill}, #cg{regs=Regs}) ->
    ordsets:from_list([{kill,maps:get(V, Regs)} || V <- Kill]);
kill_yregs(#{}, #cg{}) -> [].

%% gen_moves(As, I, Acc)
%%  Generate the basic move instruction to move the arguments
%%  to their proper registers. The list will be sorted on
%%  destinations. (I.e. the move to {x,0} will be first --
%%  see the comment to order_moves/2.)

gen_moves([A|As], I, Acc) ->
    gen_moves(As, I+1, copy(A, {x,I}) ++ Acc);
gen_moves([], _, Acc) ->
    keysort(3, Acc).

%% order_moves([Move], ScratchReg) -> [Move]
%%  Orders move instruction so that source registers are not
%%  destroyed before they are used. If there are cycles
%%  (such as {move,{x,0},{x,1}}, {move,{x,1},{x,1}}),
%%  the scratch register is used to break up the cycle.
%%    If possible, the first move of the input list is placed
%%  last in the result list (to make the move to {x,0} occur
%%  just before the call to allow the Beam loader to coalesce
%%  the instructions).

order_moves(Ms, Scr) -> order_moves(Ms, Scr, []).

order_moves([{move,_,_}=M|Ms0], ScrReg, Acc0) ->
    {Chain,Ms} = collect_chain(Ms0, [M], ScrReg),
    Acc = reverse(Chain, Acc0),
    order_moves(Ms, ScrReg, Acc);
order_moves([], _, Acc) -> Acc.

collect_chain(Ms, Path, ScrReg) ->
    collect_chain(Ms, Path, [], ScrReg).

collect_chain([{move,Src,Same}=M|Ms0], [{move,Same,_}|_]=Path, Others, ScrReg) ->
    case keymember(Src, 3, Path) of
        false ->
            collect_chain(reverse(Others, Ms0), [M|Path], [], ScrReg);
        true ->
            %% There is a cycle, which we must break up.
            {break_up_cycle(M, Path, ScrReg),reverse(Others, Ms0)}
    end;
collect_chain([M|Ms], Path, Others, ScrReg) ->
    collect_chain(Ms, Path, [M|Others], ScrReg);
collect_chain([], Path, Others, _) ->
    {Path,Others}.

break_up_cycle({move,Src,_}=M, Path, ScrReg) ->
    [{move,ScrReg,Src},M|break_up_cycle1(Src, Path, ScrReg)].

break_up_cycle1(Dst, [{move,Src,Dst}|Path], ScrReg) ->
    [{move,Src,ScrReg}|Path];
break_up_cycle1(Dst, [M|Path], LastMove) ->
    [M|break_up_cycle1(Dst, Path, LastMove)].

%%%
%%% General utility functions.
%%%

verify_beam_register({x,_}=Reg) -> Reg.

is_beam_register({x,_}) -> true;
is_beam_register(_) -> false.

get_register(V, Regs) ->
    case is_beam_register(V) of
        true -> V;
        false -> maps:get(V, Regs)
    end.

beam_args(As, St) ->
    [beam_arg(A, St) || A <- As].

beam_arg(#b_var{}=Name, #cg{regs=Regs}) ->
    maps:get(Name, Regs);
beam_arg(#b_literal{val=Val}, _) ->
    if
        is_atom(Val) -> {atom,Val};
        is_float(Val) -> {float,Val};
        is_integer(Val) -> {integer,Val};
        Val =:= [] -> nil;
        true -> {literal,Val}
    end;
beam_arg(Reg, _) ->
    verify_beam_register(Reg).

new_block_label(L, St0) ->
    {_Lbl,St} = label_for_block(L, St0),
    St.

def_block_label(L, #cg{labels=Labels,used_labels=Used}) ->
    Lbl = maps:get(L, Labels),
    case gb_sets:is_member(Lbl, Used) of
        false -> [];
        true -> [{label,Lbl}]
    end.

use_block_labels(Ls, St) ->
    mapfoldl(fun use_block_label/2, St, Ls).

use_block_label(L, #cg{used_labels=Used,catches=Catches}=St0) ->
    {Lbl,St} = label_for_block(L, St0),
    case gb_sets:is_member(L, Catches) of
        true ->
            {{catch_tag,{f,Lbl}},
             St#cg{used_labels=gb_sets:add(Lbl, Used)}};
        false ->
            {{f,Lbl},St#cg{used_labels=gb_sets:add(Lbl, Used)}}
    end.

label_for_block(L, #cg{labels=Labels0}=St0) ->
    case Labels0 of
        #{L:=Lbl} ->
            {Lbl,St0};
        #{} ->
            {Lbl,St} = new_label(St0),
            Labels = Labels0#{L=>Lbl},
            {Lbl,St#cg{labels=Labels}}
    end.

%% local_func_label(Name, Arity, State) -> {Label,State'}
%% local_func_label({Name,Arity}, State) -> {Label,State'}
%%  Get the function entry label for a local function.

local_func_label(Name, Arity, St) ->
    local_func_label({Name,Arity}, St).

local_func_label(Key, #cg{functable=Map}=St0) ->
    case Map of
       #{Key := Label} ->
            {Label,St0};
        _ ->
            {Label,St} = new_label(St0),
            {Label,St#cg{functable=Map#{Key => Label}}}
    end.

%% is_gc_bif(Name, Args) -> true|false.
%%  Determines whether the BIF Name/Arity might do a GC.

-spec is_gc_bif(atom(), [beam_ssa:value()]) -> boolean().

is_gc_bif(hd, [_]) -> false;
is_gc_bif(tl, [_]) -> false;
is_gc_bif(self, []) -> false;
is_gc_bif(node, []) -> false;
is_gc_bif(node, [_]) -> false;
is_gc_bif(element, [_,_]) -> false;
is_gc_bif(get, [_]) -> false;
is_gc_bif(is_map_key, [_,_]) -> false;
is_gc_bif(map_get, [_,_]) -> false;
is_gc_bif(tuple_size, [_]) -> false;
is_gc_bif(Bif, Args) ->
    Arity = length(Args),
    not (erl_internal:bool_op(Bif, Arity) orelse
	 erl_internal:new_type_test(Bif, Arity) orelse
	 erl_internal:comp_op(Bif, Arity)).

%% new_label(St) -> {L,St}.

new_label(#cg{lcount=Next}=St) ->
    %% Advance the label counter by 2 to allow us to create
    %% a label for 'or' by incrementing an existing label.
    {Next,St#cg{lcount=Next+2}}.

%% call_line(tail|body, Func, Anno) -> [] | [{line,...}].
%%  Produce a line instruction if it will be needed by the
%%  call to Func.

call_line(_Context, {extfunc,Mod,Name,Arity}, Anno) ->
    case erl_bifs:is_safe(Mod, Name, Arity) of
	false ->
	    %% The call could be to a BIF.
	    %% We'll need a line instruction in case the
	    %% BIF call fails.
	    [line(Anno)];
	true ->
	    %% Call to a safe BIF. Since it cannot fail,
	    %% we don't need any line instruction here.
	    []
    end;
call_line(body, _, Anno) ->
    [line(Anno)];
call_line(tail, local, _) ->
    %% Tail-recursive call to a local function. A line
    %% instruction will not be useful.
    [];
call_line(tail, _, Anno) ->
    %% Call to a fun.
    [line(Anno)].

%% line(Le) -> {line,[] | {location,File,Line}}
%%  Create a line instruction, containing information about
%%  the current filename and line number. A line information
%%  instruction should be placed before any operation that could
%%  cause an exception.

line(#{location:={File,Line}}) ->
    {line,[{location,File,Line}]};
line(#{}) ->
    {line,[]}.

flatmapfoldl(F, Accu0, [Hd|Tail]) ->
    {R,Accu1} = F(Hd, Accu0),
    {Rs,Accu2} = flatmapfoldl(F, Accu1, Tail),
    {R++Rs,Accu2};
flatmapfoldl(_, Accu, []) -> {[],Accu}.