aboutsummaryrefslogblamecommitdiffstats
path: root/lib/compiler/src/beam_ssa_opt.erl
blob: 83ee918b25787f6156747af3cae96e0ff4229e4a (plain) (tree)
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627

















































                                                                              
                                       


                                   




                                                                      

                              
                              




                                                             


                                      
                                    
                            





























                                                                         


                                         




























                                                                            































































































                                                                                      








































































































































































































                                                                                     



                                                               
                    

                                                          





























































                                                                          






                                                    

                                                          


                                                       

                                                       























                                                                      

                                                     




                                                

                                             











                                                    
                                                   




                                                    
                                                 





                                                     



                                       
 
                                                        

                                              







                                                                         


























































                                                                   


                                                                          

                                          


                                                                     
            























                                                                           
        

                                    























                                                                   























































                                                                      























































                                                                    









                                                                                     










































                                                                           
                                                               





                                                                                
                                              









                                                                                       












































































































































                                                                              















                                                     































                                                                













                                             
   























































































































                                                                                     







































































































                                                                                          




















































































































































































































































































































                                                                                   



                                         
                                                         
                                  
                                                 
                                          

                            
                                             

                            
                                           

                            
                       







                                                                
%%
%% %CopyrightBegin%
%%
%% Copyright Ericsson AB 2018. All Rights Reserved.
%%
%% Licensed under the Apache License, Version 2.0 (the "License");
%% you may not use this file except in compliance with the License.
%% You may obtain a copy of the License at
%%
%%     http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing, software
%% distributed under the License is distributed on an "AS IS" BASIS,
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
%% See the License for the specific language governing permissions and
%% limitations under the License.
%%
%% %CopyrightEnd%
%%

-module(beam_ssa_opt).
-export([module/2]).

-include("beam_ssa.hrl").
-import(lists, [all/2,append/1,foldl/3,keyfind/3,member/2,reverse/1,reverse/2,
                splitwith/2,takewhile/2,unzip/1]).

-spec module(beam_ssa:b_module(), [compile:option()]) ->
                    {'ok',beam_ssa:b_module()}.

module(#b_module{body=Fs0}=Module, Opts) ->
    Ps = passes(Opts),
    Fs = functions(Fs0, Ps),
    {ok,Module#b_module{body=Fs}}.

functions([F|Fs], Ps) ->
    [function(F, Ps)|functions(Fs, Ps)];
functions([], _Ps) -> [].

-type b_blk() :: beam_ssa:b_blk().
-type b_var() :: beam_ssa:b_var().
-type label() :: beam_ssa:label().

-record(st, {ssa :: beam_ssa:block_map() | [{label(),b_blk()}],
             args :: [b_var()],
             cnt :: label()}).
-define(PASS(N), {N,fun N/1}).

passes(Opts0) ->
    Ps = [?PASS(ssa_opt_split_blocks),
          ?PASS(ssa_opt_coalesce_phis),
          ?PASS(ssa_opt_element),
          ?PASS(ssa_opt_linearize),
          ?PASS(ssa_opt_record),

          %% Run ssa_opt_cse twice, because it will help ssa_opt_dead,
          %% and ssa_opt_dead will help ssa_opt_cse. Run ssa_opt_live
          %% twice, because it will help ssa_opt_dead and ssa_opt_dead
          %% will help ssa_opt_live.
          ?PASS(ssa_opt_cse),
          ?PASS(ssa_opt_type),
          ?PASS(ssa_opt_live),
          ?PASS(ssa_opt_dead),
          ?PASS(ssa_opt_cse),                   %Second time.
          ?PASS(ssa_opt_float),
          ?PASS(ssa_opt_live),                  %Second time.

          ?PASS(ssa_opt_bsm),
          ?PASS(ssa_opt_bsm_shortcut),
          ?PASS(ssa_opt_misc),
          ?PASS(ssa_opt_tuple_size),
          ?PASS(ssa_opt_sw),
          ?PASS(ssa_opt_blockify),
          ?PASS(ssa_opt_sink),
          ?PASS(ssa_opt_merge_blocks)],
    Negations = [{list_to_atom("no_"++atom_to_list(N)),N} ||
                    {N,_} <- Ps],
    Opts = proplists:substitute_negations(Negations, Opts0),
    [case proplists:get_value(Name, Opts, true) of
         true ->
             P;
         false ->
             {NoName,Name} = keyfind(Name, 2, Negations),
             {NoName,fun(S) -> S end}
     end || {Name,_}=P <- Ps].

function(#b_function{anno=Anno,bs=Blocks0,args=Args,cnt=Count0}=F, Ps) ->
    try
        St = #st{ssa=Blocks0,args=Args,cnt=Count0},
        #st{ssa=Blocks,cnt=Count} = compile:run_sub_passes(Ps, St),
        F#b_function{bs=Blocks,cnt=Count}
    catch
        Class:Error:Stack ->
            #{func_info:={_,Name,Arity}} = Anno,
            io:fwrite("Function: ~w/~w\n", [Name,Arity]),
            erlang:raise(Class, Error, Stack)
    end.

%%%
%%% Trivial sub passes.
%%%

ssa_opt_dead(#st{ssa=Linear}=St) ->
    St#st{ssa=beam_ssa_dead:opt(Linear)}.

ssa_opt_linearize(#st{ssa=Blocks}=St) ->
    St#st{ssa=beam_ssa:linearize(Blocks)}.

ssa_opt_type(#st{ssa=Linear,args=Args}=St) ->
    St#st{ssa=beam_ssa_type:opt(Linear, Args)}.

ssa_opt_blockify(#st{ssa=Linear}=St) ->
    St#st{ssa=maps:from_list(Linear)}.

%%%
%%% Split blocks before certain instructions to enable more optimizations.
%%%
%%% Splitting before element/2 enables the optimization that swaps
%%% element/2 instructions.
%%%
%%% Splitting before call and make_fun instructions gives more opportunities
%%% for sinking get_tuple_element instructions.
%%%

ssa_opt_split_blocks(#st{ssa=Blocks0,cnt=Count0}=St) ->
    P = fun(#b_set{op={bif,element}}) -> true;
           (#b_set{op=call}) -> true;
           (#b_set{op=make_fun}) -> true;
           (_) -> false
        end,
    {Blocks,Count} = beam_ssa:split_blocks(P, Blocks0, Count0),
    St#st{ssa=Blocks,cnt=Count}.

%%%
%%% Coalesce phi nodes.
%%%
%%% Nested cases can led to code such as this:
%%%
%%%     10:
%%%       _1 = phi {literal value1, label 8}, {Var, label 9}
%%%       br 11
%%%
%%%     11:
%%%       _2 = phi {_1, label 10}, {literal false, label 3}
%%%
%%% The phi nodes can be coalesced like this:
%%%
%%%     11:
%%%       _2 = phi {literal value1, label 8}, {Var, label 9}, {literal false, label 3}
%%%
%%% Coalescing can help other optimizations, and can in some cases reduce register
%%% shuffling (if the phi variables for two phi nodes happens to be allocated to
%%% different registers).
%%%

ssa_opt_coalesce_phis(#st{ssa=Blocks0}=St) ->
    Ls = beam_ssa:rpo(Blocks0),
    Blocks = c_phis_1(Ls, Blocks0),
    St#st{ssa=Blocks}.

c_phis_1([L|Ls], Blocks0) ->
    case maps:get(L, Blocks0) of
        #b_blk{is=[#b_set{op=phi}|_]}=Blk ->
            Blocks = c_phis_2(L, Blk, Blocks0),
            c_phis_1(Ls, Blocks);
        #b_blk{} ->
            c_phis_1(Ls, Blocks0)
    end;
c_phis_1([], Blocks) -> Blocks.

c_phis_2(L, #b_blk{is=Is0}=Blk0, Blocks0) ->
    case c_phis_args(Is0, Blocks0) of
        none ->
            Blocks0;
        {_,_,Preds}=Info ->
            Is = c_rewrite_phis(Is0, Info),
            Blk = Blk0#b_blk{is=Is},
            Blocks = Blocks0#{L:=Blk},
            c_fix_branches(Preds, L, Blocks)
    end.

c_phis_args([#b_set{op=phi,args=Args0}|Is], Blocks) ->
    case c_phis_args_1(Args0, Blocks) of
        none ->
            c_phis_args(Is, Blocks);
        Res ->
            Res
    end;
c_phis_args(_, _Blocks) -> none.

c_phis_args_1([{Var,Pred}|As], Blocks) ->
    case c_get_pred_vars(Var, Pred, Blocks) of
        none ->
            c_phis_args_1(As, Blocks);
        Result ->
            Result
    end;
c_phis_args_1([], _Blocks) -> none.

c_get_pred_vars(Var, Pred, Blocks) ->
    case maps:get(Pred, Blocks) of
        #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} ->
            {Var,Pred,Args};
        #b_blk{} ->
            none
    end.

c_rewrite_phis([#b_set{op=phi,args=Args0}=I|Is], Info) ->
    Args = c_rewrite_phi(Args0, Info),
    [I#b_set{args=Args}|c_rewrite_phis(Is, Info)];
c_rewrite_phis(Is, _Info) -> Is.

c_rewrite_phi([{Var,Pred}|As], {Var,Pred,Values}) ->
    Values ++ As;
c_rewrite_phi([{Value,Pred}|As], {_,Pred,Values}) ->
    [{Value,P} || {_,P} <- Values] ++ As;
c_rewrite_phi([A|As], Info) ->
    [A|c_rewrite_phi(As, Info)];
c_rewrite_phi([], _Info) -> [].

c_fix_branches([{_,Pred}|As], L, Blocks0) ->
    #b_blk{last=Last0} = Blk0 = maps:get(Pred, Blocks0),
    #b_br{bool=#b_literal{val=true}} = Last0,   %Assertion.
    Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L},
    Blk = Blk0#b_blk{last=Last},
    Blocks = Blocks0#{Pred:=Blk},
    c_fix_branches(As, L, Blocks);
c_fix_branches([], _, Blocks) -> Blocks.

%%%
%%% Order element/2 calls.
%%%
%%% Order an unbroken chain of element/2 calls for the same tuple
%%% with the same failure label so that the highest element is
%%% retrieved first. That will allow the other element/2 calls to
%%% be replaced with get_tuple_element/3 instructions.
%%%

ssa_opt_element(#st{ssa=Blocks}=St) ->
    %% Collect the information about element instructions in this
    %% function.
    GetEls = collect_element_calls(beam_ssa:linearize(Blocks)),

    %% Collect the element instructions into chains. The
    %% element calls in each chain are ordered in reverse
    %% execution order.
    Chains = collect_chains(GetEls, []),

    %% For each chain, swap the first element call with the
    %% element call with the highest index.
    St#st{ssa=swap_element_calls(Chains, Blocks)}.

collect_element_calls([{L,#b_blk{is=Is0,last=Last}}|Bs]) ->
    case {Is0,Last} of
        {[#b_set{op={bif,element},dst=Element,
                 args=[#b_literal{val=N},#b_var{}=Tuple]},
          #b_set{op=succeeded,dst=Bool,args=[Element]}],
         #b_br{bool=Bool,succ=Succ,fail=Fail}} ->
            Info = {L,Succ,{Tuple,Fail},N},
            [Info|collect_element_calls(Bs)];
        {_,_} ->
            collect_element_calls(Bs)
    end;
collect_element_calls([]) -> [].

collect_chains([{This,_,V,_}=El|Els], [{_,This,V,_}|_]=Chain) ->
    %% Add to the previous chain.
    collect_chains(Els, [El|Chain]);
collect_chains([El|Els], [_,_|_]=Chain) ->
    %% Save the previous chain and start a new chain.
    [Chain|collect_chains(Els, [El])];
collect_chains([El|Els], _Chain) ->
    %% The previous chain is too short; discard it and start a new.
    collect_chains(Els, [El]);
collect_chains([], [_,_|_]=Chain) ->
    %% Save the last chain.
    [Chain];
collect_chains([], _) ->  [].

swap_element_calls([[{L,_,_,N}|_]=Chain|Chains], Blocks0) ->
    Blocks = swap_element_calls_1(Chain, {N,L}, Blocks0),
    swap_element_calls(Chains, Blocks);
swap_element_calls([], Blocks) -> Blocks.

swap_element_calls_1([{L1,_,_,N1}], {N2,L2}, Blocks) when N2 > N1 ->
    %% We have reached the end of the chain, and the first
    %% element instrution to be executed. Its index is lower
    %% than the maximum index found while traversing the chain,
    %% so we will need to swap the instructions.
    #{L1:=Blk1,L2:=Blk2} = Blocks,
    [#b_set{dst=Dst1}=GetEl1,Succ1] = Blk1#b_blk.is,
    [#b_set{dst=Dst2}=GetEl2,Succ2] = Blk2#b_blk.is,
    Is1 = [GetEl2,Succ1#b_set{args=[Dst2]}],
    Is2 = [GetEl1,Succ2#b_set{args=[Dst1]}],
    Blocks#{L1:=Blk1#b_blk{is=Is1},L2:=Blk2#b_blk{is=Is2}};
swap_element_calls_1([{L,_,_,N1}|Els], {N2,_}, Blocks) when N1 > N2 ->
    swap_element_calls_1(Els, {N2,L}, Blocks);
swap_element_calls_1([_|Els], Highest, Blocks) ->
    swap_element_calls_1(Els, Highest, Blocks);
swap_element_calls_1([], _, Blocks) ->
    %% Nothing to do. The element call with highest index
    %% is already the first one to be executed.
    Blocks.

%%%
%%% Record optimization.
%%%
%%% Replace tuple matching with an is_tagged_tuple instruction
%%% when applicable.
%%%

ssa_opt_record(#st{ssa=Linear}=St) ->
    Blocks = maps:from_list(Linear),
    St#st{ssa=record_opt(Linear, Blocks)}.

record_opt([{L,#b_blk{is=Is0,last=Last}=Blk0}|Bs], Blocks) ->
    Is = record_opt_is(Is0, Last, Blocks),
    Blk = Blk0#b_blk{is=Is},
    [{L,Blk}|record_opt(Bs, Blocks)];
record_opt([], _Blocks) -> [].

record_opt_is([#b_set{op={bif,is_tuple},dst=#b_var{name=Bool},
                      args=[Tuple]}=Set],
              Last, Blocks) ->
    case is_tagged_tuple(Tuple, Bool, Last, Blocks) of
        {yes,Size,Tag} ->
            Args = [Tuple,Size,Tag],
            [Set#b_set{op=is_tagged_tuple,args=Args}];
        no ->
            [Set]
    end;
record_opt_is([I|Is], Last, Blocks) ->
    [I|record_opt_is(Is, Last, Blocks)];
record_opt_is([], _Last, _Blocks) -> [].

is_tagged_tuple(#b_var{name=Tuple}, Bool,
                #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail},
                Blocks) ->
    SuccBlk = maps:get(Succ, Blocks),
    is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks);
is_tagged_tuple(_, _, _, _) -> no.

is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) ->
    case Is of
        [#b_set{op={bif,tuple_size},dst=#b_var{name=ArityVar},
                args=[#b_var{name=Tuple}]},
         #b_set{op={bif,'=:='},
                dst=#b_var{name=Bool},
                args=[#b_var{name=ArityVar},
                      #b_literal{val=ArityVal}=Arity]}]
        when is_integer(ArityVal) ->
            case Last of
                #b_br{bool=#b_var{name=Bool},succ=Succ,fail=Fail} ->
                    SuccBlk = maps:get(Succ, Blocks),
                    case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
                        no ->
                            no;
                        {yes,Tag} ->
                            {yes,Arity,Tag}
                    end;
                _ ->
                    no
            end;
        _ ->
            no
    end.

is_tagged_tuple_2(#b_blk{is=Is,
                         last=#b_br{bool=#b_var{name=Bool},fail=Fail}},
                  Tuple, Fail) ->
    is_tagged_tuple_3(Is, Bool, Tuple);
is_tagged_tuple_2(#b_blk{}, _, _) -> no.

is_tagged_tuple_3([#b_set{op=get_tuple_element,
                          dst=#b_var{name=TagVar},
                          args=[#b_var{name=Tuple},#b_literal{val=0}]}|Is],
                  Bool, Tuple) ->
    is_tagged_tuple_4(Is, Bool, TagVar);
is_tagged_tuple_3([_|Is], Bool, Tuple) ->
    is_tagged_tuple_3(Is, Bool, Tuple);
is_tagged_tuple_3([], _, _) -> no.

is_tagged_tuple_4([#b_set{op={bif,'=:='},dst=#b_var{name=Bool},
                          args=[#b_var{name=TagVar},
                                #b_literal{val=TagVal}=Tag]}],
                 Bool, TagVar) when is_atom(TagVal) ->
    {yes,Tag};
is_tagged_tuple_4([_|Is], Bool, TagVar) ->
    is_tagged_tuple_4(Is, Bool, TagVar);
is_tagged_tuple_4([], _, _) -> no.

%%%
%%% Common subexpression elimination (CSE).
%%%
%%% Eliminate repeated evaluation of identical expressions. To avoid
%%% increasing the size of the stack frame, we don't eliminate
%%% subexpressions across instructions that clobber the X registers.
%%%

ssa_opt_cse(#st{ssa=Linear}=St) ->
    M = #{0=>#{}},
    St#st{ssa=cse(Linear, #{}, M)}.

cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
    Es0 = maps:get(L, M0),
    {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
    Last = sub(Last0, Sub),
    M = cse_successors(Is1, Blk, Es, M0),
    Is = reverse(Is1),
    [{L,Blk#b_blk{is=Is,last=Last}}|cse(Bs, Sub, M)];
cse([], _, _) -> [].

cse_successors([#b_set{op=succeeded,args=[Src]},Bif|_], Blk, EsSucc, M0) ->
    case cse_suitable(Bif) of
        true ->
            %% The previous instruction only has a valid value at the success branch.
            %% We must remove the substitution for Src from the failure branch.
            #b_blk{last=#b_br{succ=Succ,fail=Fail}} = Blk,
            M = cse_successors_1([Succ], EsSucc, M0),
            EsFail = maps:filter(fun(_, Val) -> Val =/= Src end, EsSucc),
            cse_successors_1([Fail], EsFail, M);
        false ->
            %% There can't be any replacement for Src in EsSucc. No need for
            %% any special handling.
            cse_successors_1(beam_ssa:successors(Blk), EsSucc, M0)
    end;
cse_successors(_Is, Blk, Es, M) ->
    cse_successors_1(beam_ssa:successors(Blk), Es, M).

cse_successors_1([L|Ls], Es0, M) ->
    case M of
        #{L:=Es1} when map_size(Es1) =:= 0 ->
            %% The map is already empty. No need to do anything
            %% since the intersection will be empty.
            cse_successors_1(Ls, Es0, M);
        #{L:=Es1} ->
            %% Calculate the intersection of the two maps.
            %% Both keys and values must match.
            Es = maps:filter(fun(Key, Value) ->
                                     case Es1 of
                                         #{Key:=Value} -> true;
                                         #{} -> false
                                     end
                             end, Es0),
            cse_successors_1(Ls, Es0, M#{L:=Es});
        #{} ->
            cse_successors_1(Ls, Es0, M#{L=>Es0})
    end;
cse_successors_1([], _, M) -> M.

cse_is([#b_set{op=succeeded,dst=Bool,args=[Src]}=I0|Is], Es, Sub0, Acc) ->
    I = sub(I0, Sub0),
    case I of
        #b_set{args=[Src]} ->
            cse_is(Is, Es, Sub0, [I|Acc]);
        #b_set{} ->
            %% The previous instruction has been eliminated. Eliminate the
            %% 'succeeded' instruction too.
            Sub = Sub0#{Bool=>#b_literal{val=true}},
            cse_is(Is, Es, Sub, Acc)
    end;
cse_is([#b_set{dst=Dst}=I0|Is], Es0, Sub0, Acc) ->
    I = sub(I0, Sub0),
    case beam_ssa:clobbers_xregs(I) of
        true ->
            %% Retaining the expressions map across calls and other
            %% clobbering instructions would work, but it would cause
            %% the common subexpressions to be saved to Y registers,
            %% which would probably increase the size of the stack
            %% frame.
            cse_is(Is, #{}, Sub0, [I|Acc]);
        false ->
            case cse_expr(I) of
                none ->
                    %% Not suitable for CSE.
                    cse_is(Is, Es0, Sub0, [I|Acc]);
                {ok,ExprKey} ->
                    case Es0 of
                        #{ExprKey:=Src} ->
                            Sub = Sub0#{Dst=>Src},
                            cse_is(Is, Es0, Sub, Acc);
                        #{} ->
                            Es = Es0#{ExprKey=>Dst},
                            cse_is(Is, Es, Sub0, [I|Acc])
                    end
            end
    end;
cse_is([], Es, Sub, Acc) ->
    {Acc,Es,Sub}.

cse_expr(#b_set{op=Op,args=Args}=I) ->
    case cse_suitable(I) of
        true -> {ok,{Op,Args}};
        false -> none
    end.

cse_suitable(#b_set{op=get_hd}) -> true;
cse_suitable(#b_set{op=get_tl}) -> true;
cse_suitable(#b_set{op=put_list}) -> true;
cse_suitable(#b_set{op=put_tuple}) -> true;
cse_suitable(#b_set{op={bif,tuple_size}}) ->
    %% Doing CSE for tuple_size/1 can prevent the
    %% creation of test_arity and select_tuple_arity
    %% instructions. That could decrease performance
    %% and beam_validator could fail to understand
    %% that tuple operations that follow are safe.
    false;
cse_suitable(#b_set{anno=Anno,op={bif,Name},args=Args}) ->
    %% Doing CSE for floating point operators is unsafe.
    %% Doing CSE for comparison operators would prevent
    %% creation of 'test' instructions.
    Arity = length(Args),
    not (is_map_key(float_op, Anno) orelse
         erl_internal:new_type_test(Name, Arity) orelse
         erl_internal:comp_op(Name, Arity) orelse
         erl_internal:bool_op(Name, Arity));
cse_suitable(#b_set{}) -> false.

%%%
%%% Using floating point instructions.
%%%
%%% Use the special floating points version of arithmetic
%%% instructions, if the operands are known to be floats or the result
%%% of the operation will be a float.
%%%
%%% The float instructions were never used in guards before, so we
%%% will take special care to keep not using them in guards.  Using
%%% them in guards would require a new version of the 'fconv'
%%% instruction that would take a failure label.  Since it is unlikely
%%% that using float instructions in guards would be benefical, why
%%% bother implementing a new instruction?  Also, implementing float
%%% instructions in guards in HiPE could turn out to be a lot of work.
%%%

-record(fs,
        {s=undefined :: 'undefined' | 'cleared',
         regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
         fail=none :: 'none' | beam_ssa:label(),
         non_guards :: gb_sets:set(beam_ssa:label()),
         bs :: beam_ssa:block_map()
        }).

ssa_opt_float(#st{ssa=Linear0,cnt=Count0}=St) ->
    NonGuards0 = float_non_guards(Linear0),
    NonGuards = gb_sets:from_list(NonGuards0),
    Blocks = maps:from_list(Linear0),
    Fs = #fs{non_guards=NonGuards,bs=Blocks},
    {Linear,Count} = float_opt(Linear0, Count0, Fs),
    St#st{ssa=Linear,cnt=Count}.

float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
    case Is of
        [#b_set{op=landingpad}|_] ->
            [L|float_non_guards(Bs)];
        _ ->
            float_non_guards(Bs)
    end;
float_non_guards([]) -> [?BADARG_BLOCK].

float_opt([{L,#b_blk{last=#b_br{fail=F}}=Blk}|Bs0],
            Count0, #fs{non_guards=NonGuards}=Fs) ->
    case gb_sets:is_member(F, NonGuards) of
        true ->
            %% This block is not inside a guard.
            %% We can do the optimization.
            float_opt_1(L, Blk, Bs0, Count0, Fs);
        false ->
            %% This block is inside a guard. Don't do
            %% any floating point optimizations.
            {Bs,Count} = float_opt(Bs0, Count0, Fs),
            {[{L,Blk}|Bs],Count}
    end;
float_opt([{L,Blk}|Bs], Count, Fs) ->
    float_opt_1(L, Blk, Bs, Count, Fs);
float_opt([], Count, _Fs) ->
    {[],Count}.

float_opt_1(L, #b_blk{is=Is0}=Blk0, Bs0, Count0, Fs0) ->
    case float_opt_is(Is0, Fs0, Count0, []) of
        {Is1,Fs1,Count1} ->
            Fs2 = float_fail_label(Blk0, Fs1),
            Fail = Fs2#fs.fail,
            {Flush,Blk,Fs,Count2} = float_maybe_flush(Blk0, Fs2, Count1),
            Split = float_split_conv(Is1, Blk),
            {Blks0,Count3} = float_number(Split, L, Count2),
            {Blks,Count4} = float_conv(Blks0, Fail, Count3),
            {Bs,Count} = float_opt(Bs0, Count4, Fs),
            {Blks++Flush++Bs,Count};
        none ->
            {Bs,Count} = float_opt(Bs0, Count0, Fs0),
            {[{L,Blk0}|Bs],Count}
    end.

%% Split {float,convert} instructions into individual blocks.
float_split_conv(Is0, Blk) ->
    Br = #b_br{bool=#b_literal{val=true},succ=0,fail=0},
    case splitwith(fun(#b_set{op=Op}) ->
                           Op =/= {float,convert}
                   end, Is0) of
        {Is,[]} ->
            [Blk#b_blk{is=Is}];
        {[_|_]=Is1,[#b_set{op={float,convert}}=Conv|Is2]} ->
            [#b_blk{is=Is1,last=Br},
             #b_blk{is=[Conv],last=Br}|float_split_conv(Is2, Blk)];
        {[],[#b_set{op={float,convert}}=Conv|Is1]} ->
            [#b_blk{is=[Conv],last=Br}|float_split_conv(Is1, Blk)]
    end.

%% Number the blocks that were split.
float_number([B|Bs0], FirstL, Count0) ->
    {Bs,Count} = float_number(Bs0, Count0),
    {[{FirstL,B}|Bs],Count}.

float_number([B|Bs0], Count0) ->
    {Bs,Count} = float_number(Bs0, Count0+1),
    {[{Count0,B}|Bs],Count};
float_number([], Count) ->
    {[],Count}.

%% Insert 'succeeded' instructions after each {float,convert}
%% instruction.
float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) ->
    case Is0 of
        [#b_set{op={float,convert}}=Conv] ->
            {Bool0,Count1} = new_reg('@ssa_bool', Count0),
            Bool = #b_var{name=Bool0},
            Succeeded = #b_set{op=succeeded,dst=Bool,
                               args=[Conv#b_set.dst]},
            Is = [Conv,Succeeded],
            [{NextL,_}|_] = Bs0,
            Br = #b_br{bool=Bool,succ=NextL,fail=Fail},
            Blk = Blk0#b_blk{is=Is,last=Br},
            {Bs,Count} = float_conv(Bs0, Fail, Count1),
            {[{L,Blk}|Bs],Count};
        [_|_] ->
            case Bs0 of
                [{NextL,_}|_] ->
                    Br = #b_br{bool=#b_literal{val=true},
                               succ=NextL,fail=NextL},
                    Blk = Blk0#b_blk{last=Br},
                    {Bs,Count} = float_conv(Bs0, Fail, Count0),
                    {[{L,Blk}|Bs],Count};
                [] ->
                    {[{L,Blk0}],Count0}
            end
    end.

float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
    #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
    #b_blk{is=Is} = maps:get(Succ, Blocks),
    case Is of
        [#b_set{anno=#{float_op:=_}}|_] ->
            %% The next operation is also a floating point operation.
            %% No flush needed.
            {[],Blk0,Fs0,Count0};
        _ ->
            %% Flush needed.
            {Bool0,Count1} = new_reg('@ssa_bool', Count0),
            Bool = #b_var{name=Bool0},

            %% Allocate block numbers.
            CheckL = Count1,              %For checkerror.
            FlushL = Count1 + 1,          %For flushing of float regs.
            Count = Count1 + 2,
            Blk = Blk0#b_blk{last=Br#b_br{succ=CheckL}},

            %% Build the block with the checkerror instruction.
            CheckIs = [#b_set{op={float,checkerror},dst=Bool}],
            CheckBr = #b_br{bool=Bool,succ=FlushL,fail=Fail},
            CheckBlk = #b_blk{is=CheckIs,last=CheckBr},

            %% Build the block that flushes all registers.
            FlushIs = float_flush_regs(Fs0),
            FlushBr = #b_br{bool=#b_literal{val=true},succ=Succ,fail=Succ},
            FlushBlk = #b_blk{is=FlushIs,last=FlushBr},

            %% Update state and blocks.
            Fs = Fs0#fs{s=undefined,regs=#{},fail=none},
            FlushBs = [{CheckL,CheckBlk},{FlushL,FlushBlk}],
            {FlushBs,Blk,Fs,Count}
    end;
float_maybe_flush(Blk, Fs, Count) ->
    {[],Blk,Fs,Count}.

float_opt_is([#b_set{op=succeeded,args=[Src]}=I0],
             #fs{regs=Rs}=Fs, Count, Acc) ->
    case Rs of
        #{Src:=Fr} ->
            I = I0#b_set{args=[Fr]},
            {reverse(Acc, [I]),Fs,Count};
        #{} ->
            {reverse(Acc, [I0]),Fs,Count}
    end;
float_opt_is([#b_set{anno=Anno0}=I0|Is0], Fs0, Count0, Acc) ->
    case Anno0 of
        #{float_op:=FTypes} ->
            Anno = maps:remove(float_op, Anno0),
            I1 = I0#b_set{anno=Anno},
            {Is,Fs,Count} = float_make_op(I1, FTypes, Fs0, Count0),
            float_opt_is(Is0, Fs, Count, reverse(Is, Acc));
        #{} ->
            float_opt_is(Is0, Fs0#fs{regs=#{}}, Count0, [I0|Acc])
    end;
float_opt_is([], Fs, _Count, _Acc) ->
    #fs{s=undefined} = Fs,                      %Assertion.
    none.

float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
              Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
    {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
    {As,Is0} = unzip(As1),
    {Fr,Count2} = new_reg('@fr', Count1),
    FrDst = #b_var{name=Fr},
    I = I0#b_set{op={float,Op},dst=FrDst,args=As},
    Rs = Rs1#{Dst=>FrDst},
    Is = append(Is0) ++ [I],
    case S of
        undefined ->
            {Ignore,Count} = new_reg('@ssa_ignore', Count2),
            C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
            {[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
        cleared ->
            {Is,Fs#fs{regs=Rs},Count2}
    end.

float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
    {Load,Rs,Count} = float_reg_arg(A, T, Rs0, Count0),
    float_load(As, Ts, Rs, Count, [Load|Acc]);
float_load([], [], Rs, Count, Acc) ->
    {reverse(Acc),Rs,Count}.

float_reg_arg(A, T, Rs, Count0) ->
    case Rs of
        #{A:=Fr} ->
            {{Fr,[]},Rs,Count0};
        #{} ->
            {Fr,Count} = new_float_copy_reg(Count0),
            Dst = #b_var{name=Fr},
            I = float_load_reg(T, A, Dst),
            {{Dst,[I]},Rs#{A=>Dst},Count}
    end.

float_load_reg(convert, #b_var{}=Src, Dst) ->
    #b_set{op={float,convert},dst=Dst,args=[Src]};
float_load_reg(convert, #b_literal{val=Val}=Src, Dst) ->
    try float(Val) of
        F ->
            #b_set{op={float,put},dst=Dst,args=[#b_literal{val=F}]}
    catch
        error:_ ->
            %% Let the exception happen at runtime.
            #b_set{op={float,convert},dst=Dst,args=[Src]}
    end;
float_load_reg(float, Src, Dst) ->
    #b_set{op={float,put},dst=Dst,args=[Src]}.

new_float_copy_reg(Count) ->
    new_reg('@fr_copy', Count).

new_reg(Base, Count) ->
    Fr = {Base,Count},
    {Fr,Count+1}.

float_fail_label(#b_blk{last=Last}, Fs) ->
    case Last of
        #b_br{bool=#b_var{},fail=Fail} ->
            Fs#fs{fail=Fail};
        _ ->
            Fs
    end.

float_flush_regs(#fs{regs=Rs}) ->
    maps:fold(fun(_, #b_var{name={'@fr_copy',_}}, Acc) ->
                      Acc;
                 (Dst, Fr, Acc) ->
                      [#b_set{op={float,get},dst=Dst,args=[Fr]}|Acc]
              end, [], Rs).

%%%
%%% Live optimization.
%%%
%%% Optimize instructions whose values are not used. They could be
%%% removed if they have no side effects, or in a few cases replaced
%%% with a cheaper instructions
%%%

ssa_opt_live(#st{ssa=Linear}=St) ->
    St#st{ssa=live_opt(reverse(Linear), #{}, [])}.

live_opt([{L,Blk0}|Bs], LiveMap0, Acc) ->
    Successors = beam_ssa:successors(Blk0),
    Live0 = live_opt_succ(Successors, L, LiveMap0),
    {Blk,Live} = live_opt_blk(Blk0, Live0),
    LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
    live_opt(Bs, LiveMap, [{L,Blk}|Acc]);
live_opt([], _, Acc) -> Acc.

live_opt_succ([S|Ss], L, LiveMap) ->
    Live0 = live_opt_succ(Ss, L, LiveMap),
    Key = {S,L},
    case LiveMap of
        #{Key:=Live} ->
            gb_sets:union(Live, Live0);
        #{S:=Live} ->
            gb_sets:union(Live, Live0);
        #{} ->
            Live0
    end;
live_opt_succ([], _, _) ->
    gb_sets:empty().

live_opt_phis(Is, L, Live0, LiveMap0) ->
    LiveMap = LiveMap0#{L=>Live0},
    Phis = takewhile(fun(#b_set{op=Op}) -> Op =:= phi end, Is),
    case Phis of
        [] ->
            LiveMap;
        [_|_] ->
            PhiArgs = append([Args || #b_set{args=Args} <- Phis]),
            case [{P,V} || {#b_var{name=V},P} <- PhiArgs] of
                [_|_]=PhiVars ->
                    PhiLive0 = rel2fam(PhiVars),
                    PhiLive = [{{L,P},gb_sets:union(gb_sets:from_list(Vs), Live0)} ||
                                  {P,Vs} <- PhiLive0],
                    maps:merge(LiveMap, maps:from_list(PhiLive));
                [] ->
                    %% There were only literals in the phi node(s).
                    LiveMap
            end
    end.

live_opt_blk(#b_blk{is=Is0,last=Last}=Blk, Live0) ->
    Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(Last))),
    {Is,Live} = live_opt_is(reverse(Is0), Live1, []),
    {Blk#b_blk{is=Is},Live}.

live_opt_is([#b_set{op=phi,dst=#b_var{name=Dst}}=I|Is], Live, Acc) ->
    case gb_sets:is_member(Dst, Live) of
        true ->
            live_opt_is(Is, Live, [I|Acc]);
        false ->
            live_opt_is(Is, Live, Acc)
    end;
live_opt_is([#b_set{op=succeeded,dst=#b_var{name=SuccDst}=SuccDstVar,
                    args=[#b_var{name=Dst}]}=SuccI,
             #b_set{dst=#b_var{name=Dst}}=I|Is], Live0, Acc) ->
    case gb_sets:is_member(Dst, Live0) of
        true ->
            case gb_sets:is_member(SuccDst, Live0) of
                true ->
                    Live1 = gb_sets:add(Dst, Live0),
                    Live = gb_sets:delete_any(SuccDst, Live1),
                    live_opt_is([I|Is], Live, [SuccI|Acc]);
                false ->
                    live_opt_is([I|Is], Live0, Acc)
            end;
        false ->
            case live_opt_unused(I) of
                {replace,NewI0} ->
                    NewI = NewI0#b_set{dst=SuccDstVar},
                    live_opt_is([NewI|Is], Live0, Acc);
                keep ->
                    case gb_sets:is_member(SuccDst, Live0) of
                        true ->
                            Live1 = gb_sets:add(Dst, Live0),
                            Live = gb_sets:delete_any(SuccDst, Live1),
                            live_opt_is([I|Is], Live, [SuccI|Acc]);
                        false ->
                            live_opt_is([I|Is], Live0, Acc)
                    end
            end
    end;
live_opt_is([#b_set{dst=#b_var{name=Dst}}=I|Is], Live0, Acc) ->
    case gb_sets:is_member(Dst, Live0) of
        true ->
            Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
            Live = gb_sets:delete_any(Dst, Live1),
            live_opt_is(Is, Live, [I|Acc]);
        false ->
            case beam_ssa:no_side_effect(I) of
                true ->
                    live_opt_is(Is, Live0, Acc);
                false ->
                    Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
                    live_opt_is(Is, Live, [I|Acc])
            end
    end;
live_opt_is([], Live, Acc) ->
    {Acc,Live}.

live_opt_unused(#b_set{op=get_map_element}=Set) ->
    {replace,Set#b_set{op=has_map_field}};
live_opt_unused(_) -> keep.

%%%
%%% Optimize binary matching instructions.
%%%

ssa_opt_bsm(#st{ssa=Linear}=St) ->
    Extracted0 = bsm_extracted(Linear),
    Extracted = cerl_sets:from_list(Extracted0),
    St#st{ssa=bsm_skip(Linear, Extracted)}.

bsm_skip([{L,#b_blk{is=Is0}=Blk}|Bs], Extracted) ->
    Is = bsm_skip_is(Is0, Extracted),
    [{L,Blk#b_blk{is=Is}}|bsm_skip(Bs, Extracted)];
bsm_skip([], _) -> [].

bsm_skip_is([I0|Is], Extracted) ->
    case I0 of
        #b_set{op=bs_match,args=[#b_literal{val=string}|_]} ->
            [I0|bsm_skip_is(Is, Extracted)];
        #b_set{op=bs_match,dst=Ctx,args=[Type,PrevCtx|Args0]} ->
            I = case cerl_sets:is_element(Ctx, Extracted) of
                    true ->
                        I0;
                    false ->
                        %% The value is never extracted.
                        Args = [#b_literal{val=skip},PrevCtx,Type|Args0],
                        I0#b_set{args=Args}
                end,
            [I|Is];
        #b_set{} ->
            [I0|bsm_skip_is(Is, Extracted)]
    end;
bsm_skip_is([], _) -> [].

bsm_extracted([{_,#b_blk{is=Is}}|Bs]) ->
    case Is of
        [#b_set{op=bs_extract,args=[Ctx]}|_] ->
            [Ctx|bsm_extracted(Bs)];
        _ ->
            bsm_extracted(Bs)
    end;
bsm_extracted([]) -> [].

%%%
%%% Short-cutting binary matching instructions.
%%%

ssa_opt_bsm_shortcut(#st{ssa=Linear}=St) ->
    Positions = bsm_positions(Linear, #{}),
    case map_size(Positions) of
        0 ->
            %% No binary matching instructions.
            St;
        _ ->
            St#st{ssa=bsm_shortcut(Linear, Positions)}
    end.

bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
    PosMap = bsm_positions_is(Is, PosMap0),
    case {Is,Last} of
        {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
         #b_br{bool=Bool,fail=Fail}} ->
            Bits = Bits0 + maps:get(Ctx, PosMap0),
            bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
        {_,_} ->
            bsm_positions(Bs, PosMap)
    end;
bsm_positions([], PosMap) -> PosMap.

bsm_positions_is([#b_set{op=bs_start_match,dst=New}|Is], PosMap0) ->
    PosMap = PosMap0#{New=>0},
    bsm_positions_is(Is, PosMap);
bsm_positions_is([#b_set{op=bs_match,dst=New,args=Args}|Is], PosMap0) ->
    [_,Old|_] = Args,
    #{Old:=Bits0} = PosMap0,
    Bits = bsm_update_bits(Args, Bits0),
    PosMap = PosMap0#{New=>Bits},
    bsm_positions_is(Is, PosMap);
bsm_positions_is([_|Is], PosMap) ->
    bsm_positions_is(Is, PosMap);
bsm_positions_is([], PosMap) -> PosMap.

bsm_update_bits([#b_literal{val=string},_,#b_literal{val=String}], Bits) ->
    Bits + bit_size(String);
bsm_update_bits([#b_literal{val=utf8}|_], Bits) ->
    Bits + 8;
bsm_update_bits([#b_literal{val=utf16}|_], Bits) ->
    Bits + 16;
bsm_update_bits([#b_literal{val=utf32}|_], Bits) ->
    Bits + 32;
bsm_update_bits([_,_,_,#b_literal{val=Sz},#b_literal{val=U}], Bits)
  when is_integer(Sz) ->
    Bits + Sz*U;
bsm_update_bits(_, Bits) -> Bits.

bsm_shortcut([{L,#b_blk{is=Is,last=Last0}=Blk}|Bs], PosMap) ->
    case {Is,Last0} of
        {[#b_set{op=bs_match,dst=New,args=[_,Old|_]},
          #b_set{op=succeeded,dst=Bool,args=[New]}],
         #b_br{bool=Bool,fail=Fail}} ->
            case PosMap of
                #{Old:=Bits,Fail:={TailBits,NextFail}} when Bits > TailBits ->
                    Last = Last0#b_br{fail=NextFail},
                    [{L,Blk#b_blk{last=Last}}|bsm_shortcut(Bs, PosMap)];
                #{} ->
                    [{L,Blk}|bsm_shortcut(Bs, PosMap)]
            end;
        {_,_} ->
            [{L,Blk}|bsm_shortcut(Bs, PosMap)]
    end;
bsm_shortcut([], _PosMap) -> [].

%%%
%%% Miscellanous optimizations in execution order.
%%%

ssa_opt_misc(#st{ssa=Linear}=St) ->
    St#st{ssa=misc_opt(Linear, #{})}.

misc_opt([{L,#b_blk{is=Is0,last=Last0}=Blk0}|Bs], Sub0) ->
    {Is,Sub} = misc_opt_is(Is0, Sub0, []),
    Last = sub(Last0, Sub),
    Blk = Blk0#b_blk{is=Is,last=Last},
    [{L,Blk}|misc_opt(Bs, Sub)];
misc_opt([], _) -> [].

misc_opt_is([#b_set{op=phi}=I0|Is], Sub0, Acc) ->
    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub0),
    case all_same(Args) of
        true ->
            %% Eliminate the phi node if there is just one source
            %% value or if the values are identical.
            [{Val,_}|_] = Args,
            Sub = Sub0#{Dst=>Val},
            misc_opt_is(Is, Sub, Acc);
        false ->
            misc_opt_is(Is, Sub0, [I|Acc])
    end;
misc_opt_is([#b_set{op={bif,'and'}}=I0], Sub, Acc) ->
    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
    case eval_and(Args) of
        error ->
            misc_opt_is([], Sub, [I|Acc]);
        Val ->
            misc_opt_is([], Sub#{Dst=>Val}, Acc)
    end;
misc_opt_is([#b_set{op={bif,'or'}}=I0], Sub, Acc) ->
    #b_set{dst=Dst,args=Args} = I = sub(I0, Sub),
    case eval_or(Args) of
        error ->
            misc_opt_is([], Sub, [I|Acc]);
        Val ->
            misc_opt_is([], Sub#{Dst=>Val}, Acc)
    end;
misc_opt_is([#b_set{}=I0|Is], Sub, Acc) ->
    #b_set{op=Op,dst=Dst,args=Args} = I = sub(I0, Sub),
    case make_literal(Op, Args) of
        #b_literal{}=Literal ->
            misc_opt_is(Is, Sub#{Dst=>Literal}, Acc);
        error ->
            misc_opt_is(Is, Sub, [I|Acc])
    end;
misc_opt_is([], Sub, Acc) ->
    {reverse(Acc),Sub}.

all_same([{H,_}|T]) ->
    all(fun({E,_}) -> E =:= H end, T).

make_literal(put_tuple, Args) ->
    case make_literal_list(Args, []) of
        error ->
            error;
        List ->
            #b_literal{val=list_to_tuple(List)}
    end;
make_literal(put_list, [#b_literal{val=H},#b_literal{val=T}]) ->
    #b_literal{val=[H|T]};
make_literal(_, _) -> error.

make_literal_list([#b_literal{val=H}|T], Acc) ->
    make_literal_list(T, [H|Acc]);
make_literal_list([_|_], _) ->
    error;
make_literal_list([], Acc) ->
    reverse(Acc).

eval_and(Args) ->
    case Args of
        [_,#b_literal{val=false}=Res] -> Res;
        [Res,#b_literal{val=true}] -> Res;
        [_,_] -> error
    end.

eval_or(Args) ->
    case Args of
        [Res,#b_literal{val=false}] -> Res;
        [_,#b_literal{val=true}=Res] -> Res;
        [_,_] -> error
    end.

%%%
%%% Optimize expressions such as "tuple_size(Var) =:= 2".
%%%
%%% Consider this code:
%%%
%%% 0:
%%%   .
%%%   .
%%%   .
%%%   Size = bif:tuple_size Var
%%%   BoolVar1 = succeeded Size
%%%   br BoolVar1, label 4, label 3
%%%
%%% 4:
%%%   BoolVar2 = bif:'=:=' Size, literal 2
%%%   br BoolVar2, label 6, label 3
%%%
%%% 6: ...   %% OK
%%%
%%% 3: ...   %% Not a tuple of size 2
%%%
%%% The BEAM code will look this:
%%%
%%%   {bif,tuple_size,{f,3},[{x,0}],{x,0}}.
%%%   {test,is_eq_exact,{f,3},[{x,0},{integer,2}]}.
%%%
%%% Better BEAM code will be produced if we transform the
%%% code like this:
%%%
%%% 0:
%%%   .
%%%   .
%%%   .
%%%   br label 10
%%%
%%% 10:
%%%   NewBoolVar = bif:is_tuple Var
%%%   br NewBoolVar, label 11, label 3
%%%
%%% 11:
%%%   Size = bif:tuple_size Var
%%%   br label 4
%%%
%%% 4:
%%%   BoolVar2 = bif:'=:=' Size, literal 2
%%%   br BoolVar2, label 6, label 3
%%%
%%% (The key part of the transformation is the removal of
%%% the 'succeeded' instruction to signal to the code generator
%%% that the call to tuple_size/1 can't fail.)
%%%
%%% The BEAM code will look like:
%%%
%%%   {test,is_tuple,{f,3},[{x,0}]}.
%%%   {test_arity,{f,3},[{x,0},2]}.
%%%
%%% Those two instructions will be combined into a single
%%% is_tuple_of_arity instruction by the loader.
%%%

ssa_opt_tuple_size(#st{ssa=Linear0,cnt=Count0}=St) ->
    {Linear,Count} = opt_tup_size(Linear0, Count0, []),
    St#st{ssa=Linear,cnt=Count}.

opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], Count0, Acc0) ->
    case {Is,Last} of
        {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
         #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
            {Acc,Count} = opt_tup_size_1(Tup, L, Count0, Acc0),
            opt_tup_size(Bs, Count, [{L,Blk}|Acc]);
        {_,_} ->
            opt_tup_size(Bs, Count0, [{L,Blk}|Acc0])
    end;
opt_tup_size([], Count, Acc) ->
    {reverse(Acc),Count}.

opt_tup_size_1(Size, EqL, Count0, [{L,Blk0}|Acc]) ->
    case Blk0 of
        #b_blk{is=Is0,last=#b_br{bool=Bool,succ=EqL,fail=Fail}} ->
            case opt_tup_size_is(Is0, Bool, Size, []) of
                none ->
                    {[{L,Blk0}|Acc],Count0};
                {PreIs,TupleSizeIs,Tuple} ->
                    opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
                                   Tuple, Fail, Count0, Acc)
            end;
        #b_blk{} ->
            {[{L,Blk0}|Acc],Count0}
    end;
opt_tup_size_1(_, _, Count, Acc) ->
    {Acc,Count}.

opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
    IsTupleL = Count0,
    TupleSizeL = Count0 + 1,
    Bool = #b_var{name={'@ssa_bool',Count0+2}},
    Count = Count0 + 3,

    True = #b_literal{val=true},
    PreBr = #b_br{bool=True,succ=IsTupleL,fail=IsTupleL},
    PreBlk = #b_blk{is=PreIs,last=PreBr},

    IsTupleIs = [#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}],
    IsTupleBr = #b_br{bool=Bool,succ=TupleSizeL,fail=Fail},
    IsTupleBlk = #b_blk{is=IsTupleIs,last=IsTupleBr},

    TupleSizeBr = #b_br{bool=True,succ=EqL,fail=EqL},
    TupleSizeBlk = #b_blk{is=TupleSizeIs,last=TupleSizeBr},
    {[{TupleSizeL,TupleSizeBlk},
      {IsTupleL,IsTupleBlk},
      {PreL,PreBlk}|Acc],Count}.

opt_tup_size_is([#b_set{op={bif,tuple_size},dst=Size,args=[Tuple]}=I,
                 #b_set{op=succeeded,dst=Bool,args=[Size]}],
                Bool, Size, Acc) ->
    {reverse(Acc),[I],Tuple};
opt_tup_size_is([I|Is], Bool, Size, Acc) ->
    opt_tup_size_is(Is, Bool, Size, [I|Acc]);
opt_tup_size_is([], _, _, _Acc) -> none.

%%%
%%% Optimize #b_switch{} instructions.
%%%
%%% If the argument for a #b_switch{} comes from a phi node with all
%%% literals, any values in the switch list which are not in the phi
%%% node can be removed.
%%%
%%% If the values in the phi node and switch list are the same,
%%% the failure label can't be reached and be eliminated.
%%%
%%% A #b_switch{} with only one value can be rewritten to
%%% a #b_br{}. A switch that only verifies that the argument
%%% is 'true' or 'false' can be rewritten to a is_boolean test.
%%%

ssa_opt_sw(#st{ssa=Linear0,cnt=Count0}=St) ->
    {Linear,Count} = opt_sw(Linear0, #{}, Count0, []),
    St#st{ssa=Linear,cnt=Count}.

opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) ->
    Phis = opt_sw_phis(Is, Phis0),
    case opt_sw_last(Last0, Phis) of
        #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} ->
            %% Rewrite a single value switch to a br.
            Bool = #b_var{name={'@ssa_bool',Count0}},
            Count = Count0 + 1,
            IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]},
            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
            Blk = Blk0#b_blk{is=Is++[IsEq],last=Br},
            opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
        #b_switch{arg=Arg,fail=Fail,
                  list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]}
          when B1 =:= not B2 ->
            %% Replace with is_boolean test.
            Bool = #b_var{name={'@ssa_bool',Count0}},
            Count = Count0 + 1,
            IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]},
            Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
            Blk = Blk0#b_blk{is=Is++[IsBool],last=Br},
            opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
        Last0 ->
            opt_sw(Bs, Phis, Count0, [{L,Blk0}|Acc]);
        Last ->
            Blk = Blk0#b_blk{last=Last},
            opt_sw(Bs, Phis, Count0, [{L,Blk}|Acc])
    end;
opt_sw([{L,#b_blk{is=Is}=Blk}|Bs], Phis0, Count, Acc) ->
    Phis = opt_sw_phis(Is, Phis0),
    opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
opt_sw([], _Phis, Count, Acc) ->
    {reverse(Acc),Count}.

opt_sw_phis([#b_set{op=phi,dst=Dst,args=Args}|Is], Phis) ->
    case opt_sw_literals(Args, []) of
        error ->
            opt_sw_phis(Is, Phis);
        Literals ->
            opt_sw_phis(Is, Phis#{Dst=>Literals})
    end;
opt_sw_phis(_, Phis) -> Phis.

opt_sw_last(#b_switch{arg=Arg,fail=Fail,list=List0}=Sw0, Phis) ->
    case Phis of
        #{Arg:=Values0} ->
            Values = gb_sets:from_list(Values0),

            %% Prune the switch list to only contain the possible values.
            List1 = [P || {Lit,_}=P <- List0, gb_sets:is_member(Lit, Values)],

            %% Now test whether the failure label can ever be reached.
            Sw = case gb_sets:size(Values) =:= length(List1) of
                     true ->
                         %% The switch list has the same number of values as the phi node.
                         %% The values must be the same, because the values that were not
                         %% possible were pruned from the switch list. Therefore, the
                         %% failure label can't possibly be reached, and we can choose a
                         %% a new failure label by picking a value from the list.
                         case List1 of
                             [{#b_literal{},Lbl}|List] ->
                                 Sw0#b_switch{fail=Lbl,list=List};
                             [] ->
                                 Sw0#b_switch{list=List1}
                         end;
                     false ->
                         %% There are some values in the phi node that are not in the
                         %% switch list; thus, the failure label can still be reached.
                         Sw0
                 end,
            beam_ssa:normalize(Sw);
        #{} ->
            %% Ensure that no label in the switch list is the same
            %% as the failure label.
            List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail],
            Sw = Sw0#b_switch{list=List},
            beam_ssa:normalize(Sw)
    end.

opt_sw_literals([{#b_literal{}=Lit,_}|T], Acc) ->
    opt_sw_literals(T, [Lit|Acc]);
opt_sw_literals([_|_], _Acc) ->
    error;
opt_sw_literals([], Acc) -> Acc.


%%%
%%% Merge blocks.
%%%

ssa_opt_merge_blocks(#st{ssa=Blocks}=St) ->
    Preds = beam_ssa:predecessors(Blocks),
    St#st{ssa=merge_blocks_1(beam_ssa:rpo(Blocks), Preds, Blocks)}.

merge_blocks_1([L|Ls], Preds0, Blocks0) ->
    case Preds0 of
        #{L:=[P]} ->
            #{P:=Blk0,L:=Blk1} = Blocks0,
            case is_merge_allowed(L, Blk0, Blk1) of
                true ->
                    #b_blk{is=Is0} = Blk0,
                    #b_blk{is=Is1} = Blk1,
                    Is = Is0 ++ Is1,
                    Blk = Blk1#b_blk{is=Is},
                    Blocks1 = maps:remove(L, Blocks0),
                    Blocks2 = maps:put(P, Blk, Blocks1),
                    Successors = beam_ssa:successors(Blk),
                    Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2),
                    Preds = merge_update_preds(Successors, L, P, Preds0),
                    merge_blocks_1(Ls, Preds, Blocks);
                false ->
                    merge_blocks_1(Ls, Preds0, Blocks0)
            end;
        #{} ->
            merge_blocks_1(Ls, Preds0, Blocks0)
    end;
merge_blocks_1([], _Preds, Blocks) -> Blocks.

merge_update_preds([L|Ls], From, To, Preds0) ->
    Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)],
    Preds = maps:put(L, Ps, Preds0),
    merge_update_preds(Ls, From, To, Preds);
merge_update_preds([], _, _, Preds) -> Preds.

rename_label(From, From, To) -> To;
rename_label(Lbl, _, _) -> Lbl.

is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
    false;
is_merge_allowed(L, Blk0, #b_blk{}) ->
    case beam_ssa:successors(Blk0) of
        [L] -> true;
        [_|_] -> false
    end.

%%%
%%% When a tuple is matched, the pattern matching compiler generates a
%%% get_tuple_element instruction for every tuple element that will
%%% ever be used in the rest of the function. That often forces the
%%% extracted tuple elements to be stored in Y registers until it's
%%% time to use them. It could also mean that there could be execution
%%% paths that will never use the extracted elements.
%%%
%%% This optimization will sink get_tuple_element instructions, that
%%% is, move them forward in the execution stream to the last possible
%%% block there they will still dominate all uses. That may reduce the
%%% size of stack frames, reduce register shuffling, and avoid
%%% extracting tuple elements on execution paths that never use the
%%% extracted values.
%%%

ssa_opt_sink(#st{ssa=Blocks0}=St) ->
    Linear = beam_ssa:linearize(Blocks0),

    %% Create a map with all variables that define get_tuple_element
    %% instructions. The variable name map to the block it is defined in.
    Defs = maps:from_list(def_blocks(Linear)),

    %% Now find all the blocks that use variables defined by get_tuple_element
    %% instructions.
    Used = used_blocks(Linear, Defs, []),

    %% Calculate dominators.
    Dom0 = beam_ssa:dominators(Blocks0),

    %% It is not safe to move get_tuple_element instructions to blocks
    %% that begin with certain instructions. It is also unsafe to move
    %% the instructions into any part of a receive. To avoid such
    %% unsafe moves, pretend that the unsuitable blocks are not
    %% dominators.
    Unsuitable = unsuitable(Linear, Blocks0),
    Dom = case gb_sets:is_empty(Unsuitable) of
              true ->
                  Dom0;
              false ->
                  F = fun(_, DomBy) ->
                              [L || L <- DomBy,
                                    not gb_sets:is_element(L, Unsuitable)]
                      end,
                  maps:map(F, Dom0)
          end,

    %% Calculate new positions for get_tuple_element instructions. The new
    %% position is a block that dominates all uses of the variable.
    DefLoc = new_def_locations(Used, Defs, Dom),

    %% Now move all suitable get_tuple_element instructions to their
    %% new blocks.
    Blocks = foldl(fun({V,To}, A) ->
                           From = maps:get(V, Defs),
                           move_defs(V, From, To, A)
                   end, Blocks0, DefLoc),
    St#st{ssa=Blocks}.

def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
    def_blocks_is(Is, L, def_blocks(Bs));
def_blocks([]) -> [].

def_blocks_is([#b_set{op=get_tuple_element,dst=#b_var{name=Dst}}|Is], L, Acc) ->
    def_blocks_is(Is, L, [{Dst,L}|Acc]);
def_blocks_is([_|Is], L, Acc) ->
    def_blocks_is(Is, L, Acc);
def_blocks_is([], _, Acc) -> Acc.

used_blocks([{L,Blk}|Bs], Def, Acc0) ->
    Used = beam_ssa:used(Blk),
    Acc = [{V,L} || V <- Used, maps:is_key(V, Def)] ++ Acc0,
    used_blocks(Bs, Def, Acc);
used_blocks([], _Def, Acc) ->
    rel2fam(Acc).

%% unsuitable(Linear, Blocks) -> Unsuitable.
%%  Return an ordset of block labels for the blocks that are not
%%  suitable for sinking of get_tuple_element instructions.

unsuitable(Linear, Blocks) ->
    Predecessors = beam_ssa:predecessors(Blocks),
    Unsuitable0 = unsuitable_1(Linear),
    Unsuitable1 = unsuitable_recv(Linear, Blocks, Predecessors),
    gb_sets:from_list(Unsuitable0 ++ Unsuitable1).

unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
    Unsuitable = case Op of
                     bs_extract -> true;
                     bs_put -> true;
                     {float,_} -> true;
                     landingpad -> true;
                     peek_message -> true;
                     wait_timeout -> true;
                     _ -> false
                 end,
    case Unsuitable of
        true ->
            [L|unsuitable_1(Bs)];
        false ->
            unsuitable_1(Bs)
    end;
unsuitable_1([{_,#b_blk{}}|Bs]) ->
    unsuitable_1(Bs);
unsuitable_1([]) -> [].

unsuitable_recv([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs], Blocks, Predecessors) ->
    Ls = case Op of
             remove_message ->
                 unsuitable_loop(L, Blocks, Predecessors);
             recv_next ->
                 unsuitable_loop(L, Blocks, Predecessors);
             _ ->
                 []
         end,
    Ls ++ unsuitable_recv(Bs, Blocks, Predecessors);
unsuitable_recv([_|Bs], Blocks, Predecessors) ->
    unsuitable_recv(Bs, Blocks, Predecessors);
unsuitable_recv([], _, _) -> [].

unsuitable_loop(L, Blocks, Predecessors) ->
    unsuitable_loop(L, Blocks, Predecessors, []).

unsuitable_loop(L, Blocks, Predecessors, Acc) ->
    Ps = maps:get(L, Predecessors),
    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).

unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
    case maps:get(P, Blocks) of
        #b_blk{is=[#b_set{op=peek_message}|_]} ->
            unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
        #b_blk{} ->
            case ordsets:is_element(P, Acc0) of
                false ->
                    Acc1 = ordsets:add_element(P, Acc0),
                    Acc = unsuitable_loop(P, Blocks, Predecessors, Acc1),
                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc);
                true ->
                    unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0)
            end
    end;
unsuitable_loop_1([], _, _, Acc) -> Acc.

%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) ->
%%          [{Variable,NewDefinitionBlock}]
%%  Calculate new locations for get_tuple_element instructions. For each
%%  variable, the new location is a block that dominates all uses of
%%  variable and as near to the uses of as possible. If no such block
%%  distinct from the block where the instruction currently is, the
%%  variable will not be included in the result list.

new_def_locations([{V,UsedIn}|Vs], Defs, Dom) ->
    DefIn = maps:get(V, Defs),
    case common_dom(UsedIn, DefIn, Dom) of
        [] ->
            new_def_locations(Vs, Defs, Dom);
        [_|_]=BetterDef ->
            L = most_dominated(BetterDef, Dom),
            [{V,L}|new_def_locations(Vs, Defs, Dom)]
    end;
new_def_locations([], _, _) -> [].

common_dom([L|Ls], DefIn, Dom) ->
    DomBy0 = maps:get(L, Dom),
    DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)),
    common_dom_1(Ls, Dom, DomBy).

common_dom_1(_, _, []) ->
    [];
common_dom_1([L|Ls], Dom, [_|_]=DomBy0) ->
    DomBy1 = maps:get(L, Dom),
    DomBy = ordsets:intersection(DomBy0, DomBy1),
    common_dom_1(Ls, Dom, DomBy);
common_dom_1([], _, DomBy) -> DomBy.

most_dominated([L|Ls], Dom) ->
    most_dominated(Ls, L, maps:get(L, Dom), Dom).

most_dominated([L|Ls], L0, DomBy, Dom) ->
    case member(L, DomBy) of
        true ->
            most_dominated(Ls, L0, DomBy, Dom);
        false ->
            most_dominated(Ls, L, maps:get(L, Dom), Dom)
    end;
most_dominated([], L, _, _) -> L.


%% Move get_tuple_element instructions to their new locations.

move_defs(V, From, To, Blocks) ->
    #{From:=FromBlk0,To:=ToBlk0} = Blocks,
    {Def,FromBlk} = remove_def(V, FromBlk0),
    try insert_def(V, Def, ToBlk0) of
        ToBlk ->
            %%io:format("~p: ~p => ~p\n", [V,From,To]),
            Blocks#{From:=FromBlk,To:=ToBlk}
    catch
        throw:not_possible ->
            Blocks
    end.

remove_def(V, #b_blk{is=Is0}=Blk) ->
    {Def,Is} = remove_def_is(Is0, V, []),
    {Def,Blk#b_blk{is=Is}}.

remove_def_is([#b_set{dst=#b_var{name=Dst}}=Def|Is], Dst, Acc) ->
    {Def,reverse(Acc, Is)};
remove_def_is([I|Is], Dst, Acc) ->
    remove_def_is(Is, Dst, [I|Acc]).

insert_def(V, Def, #b_blk{is=Is0}=Blk) ->
    Is = insert_def_is(Is0, V, Def),
    Blk#b_blk{is=Is}.

insert_def_is([#b_set{op=phi}=I|Is], V, Def) ->
    case member(V, beam_ssa:used(I)) of
        true ->
            throw(not_possible);
        false ->
            [I|insert_def_is(Is, V, Def)]
    end;
insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
    Action0 = case Op of
                  call -> beyond;
                  'catch_end' -> beyond;
                  set_tuple_element -> beyond;
                  timeout -> beyond;
                  _ -> here
              end,
    Action = case Is of
                 [#b_set{op=succeeded}|_] -> here;
                 _ -> Action0
             end,
    case Action of
        beyond ->
            case member(V, beam_ssa:used(I)) of
                true ->
                    %% The variable is used by this instruction. We must
                    %% place the definition before this instruction.
                    [Def|Is0];
                false ->
                    %% Place it beyond the current instruction.
                    [I|insert_def_is(Is, V, Def)]
            end;
        here ->
            [Def|Is0]
    end;
insert_def_is([], _V, Def) ->
    [Def].


%%%
%%% Common utilities.
%%%

rel2fam(S0) ->
    S1 = sofs:relation(S0),
    S = sofs:rel2fam(S1),
    sofs:to_external(S).

sub(I, Sub) ->
    beam_ssa:normalize(sub_1(I, Sub)).

sub_1(#b_set{op=phi,args=Args}=I, Sub) ->
    I#b_set{args=[{sub_arg(A, Sub),P} || {A,P} <- Args]};
sub_1(#b_set{args=Args}=I, Sub) ->
    I#b_set{args=[sub_arg(A, Sub) || A <- Args]};
sub_1(#b_br{bool=#b_var{}=Old}=Br, Sub) ->
    New = sub_arg(Old, Sub),
    Br#b_br{bool=New};
sub_1(#b_switch{arg=#b_var{}=Old}=Sw, Sub) ->
    New = sub_arg(Old, Sub),
    Sw#b_switch{arg=New};
sub_1(#b_ret{arg=#b_var{}=Old}=Ret, Sub) ->
    New = sub_arg(Old, Sub),
    Ret#b_ret{arg=New};
sub_1(Last, _) -> Last.

sub_arg(#b_remote{mod=Mod,name=Name}=Rem, Sub) ->
    Rem#b_remote{mod=sub_arg(Mod, Sub),name=sub_arg(Name, Sub)};
sub_arg(Old, Sub) ->
    case Sub of
        #{Old:=New} -> New;
        #{} -> Old
    end.