1 files changed, 159 insertions, 106 deletions
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index a5fcb91cc0..61c42fdb6d 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -120,6 +120,7 @@ passes(Opts) ->
 
           %% Preliminaries.
           ?PASS(fix_bs),
+          ?PASS(exception_trampolines),
           ?PASS(sanitize),
           ?PASS(match_fail_instructions),
           case FixTuples of
@@ -158,7 +159,9 @@ passes(Opts) ->
           %% Allocate registers.
           ?PASS(linear_scan),
           ?PASS(frame_size),
-          ?PASS(turn_yregs)],
+          ?PASS(turn_yregs),
+
+          ?PASS(assert_no_critical_edges)],
     [P || P <- Ps, P =/= ignore].
 
 function(#b_function{anno=Anno,args=Args,bs=Blocks0,cnt=Count0}=F0,
@@ -600,6 +603,10 @@ bs_instrs([{L,#b_blk{is=Is0}=Blk}|Bs], CtxChain, Acc0) ->
 bs_instrs([], _, Acc) ->
     reverse(Acc).
 
+bs_instrs_is([#b_set{op=succeeded}=I|Is], CtxChain, Acc) ->
+    %% This instruction refers to a specific operation, so we must not
+    %% substitute the context argument.
+    bs_instrs_is(Is, CtxChain, [I | Acc]);
 bs_instrs_is([#b_set{op=Op,args=Args0}=I0|Is], CtxChain, Acc) ->
     Args = [bs_subst_ctx(A, CtxChain) || A <- Args0],
     I1 = I0#b_set{args=Args},
@@ -693,6 +700,44 @@ legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
 legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
     {reverse(Acc),Count,Copies}.
 
+%% exception_trampolines(St0) -> St.
+%%
+%% Removes the "exception trampolines" that were added to prevent exceptions
+%% from being optimized away.
+
+exception_trampolines(#st{ssa=Blocks0}=St) ->
+    RPO = reverse(beam_ssa:rpo(Blocks0)),
+    Blocks = et_1(RPO, #{}, Blocks0),
+    St#st{ssa=Blocks}.
+
+et_1([L | Ls], Trampolines, Blocks) ->
+    #{ L := #b_blk{is=Is,last=Last0}=Block0 } = Blocks,
+    case {Is, Last0} of
+        {[#b_set{op=exception_trampoline}], #b_br{succ=Succ}} ->
+            et_1(Ls, Trampolines#{ L => Succ }, maps:remove(L, Blocks));
+        {_, #b_br{succ=Same,fail=Same}} when Same =:= ?EXCEPTION_BLOCK ->
+            %% The exception block is just a marker saying that we should raise
+            %% an exception (= {f,0}) instead of jumping to a particular fail
+            %% block. Since it's not a reachable block we can't allow
+            %% unconditional jumps to it except through a trampoline.
+            error({illegal_jump_to_exception_block, L});
+        {_, #b_br{succ=Succ0,fail=Fail0}} ->
+            Succ = maps:get(Succ0, Trampolines, Succ0),
+            Fail = maps:get(Fail0, Trampolines, Fail0),
+            if
+                Succ =/= Succ0; Fail =/= Fail0 ->
+                    Last = Last0#b_br{succ=Succ,fail=Fail},
+                    Block = Block0#b_blk{last=Last},
+                    et_1(Ls, Trampolines, Blocks#{ L := Block });
+                Succ =:= Succ0, Fail =:= Fail0 ->
+                    et_1(Ls, Trampolines, Blocks)
+            end;
+        {_, _} ->
+             et_1(Ls, Trampolines, Blocks)
+    end;
+et_1([], _Trampolines, Blocks) ->
+    Blocks.
+
 %% sanitize(St0) -> St.
 %%  Remove constructs that can cause problems later:
 %%
@@ -1021,9 +1066,8 @@ use_set_tuple_element(#st{ssa=Blocks0}=St) ->
     Blocks = use_ste_1(RPO, Uses, Blocks0),
     St#st{ssa=Blocks}.
 
-use_ste_1([L|Ls], Uses, Blocks0) ->
-    {Blk0,Blocks} = use_ste_across(L, Uses, Blocks0),
-    #b_blk{is=Is0} = Blk0,
+use_ste_1([L|Ls], Uses, Blocks) ->
+    #b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
     case use_ste_is(Is0, Uses) of
         Is0 ->
             use_ste_1(Ls, Uses, Blocks);
@@ -1086,69 +1130,6 @@ extract_ste(#b_set{op=call,dst=Dst,
     end;
 extract_ste(#b_set{}) -> none.
 
-%%% Optimize accross blocks within a try/catch block.
-
-use_ste_across(L, Uses, Blocks) ->
-    case map_get(L, Blocks) of
-        #b_blk{last=#b_br{bool=#b_var{}}}=Blk ->
-            try
-                use_ste_across_1(L, Blk, Uses, Blocks)
-            catch
-                throw:not_possible ->
-                    {Blk,Blocks}
-            end;
-        #b_blk{}=Blk ->
-            {Blk,Blocks}
-    end.
-
-use_ste_across_1(L, Blk0, Uses, Blocks0) ->
-    #b_blk{is=IsThis,last=#b_br{bool=Bool,succ=Next}} = Blk0,
-    case reverse(IsThis) of
-        [#b_set{op=succeeded,dst=Bool,args=[Result]}=Succ0,
-         #b_set{op=call,args=[#b_remote{}|_],dst=Result}=Call1|Prefix] ->
-            case is_single_use(Bool, Uses) andalso
-                is_n_uses(2, Result, Uses) of
-                true -> ok;
-                false -> throw(not_possible)
-            end,
-            Call2 = use_ste_across_next(Next, Uses, Blocks0),
-            Is = [Call1,Call2],
-            case use_ste_is(Is, decrement_uses(Result, Uses)) of
-                [#b_set{}=Call,#b_set{op=set_tuple_element}=Ste] ->
-                    Blocks1 = use_ste_fix_next(Ste, Next, Blocks0),
-                    Succ = Succ0#b_set{args=[Call#b_set.dst]},
-                    Blk = Blk0#b_blk{is=reverse(Prefix, [Call,Succ])},
-                    Blocks = Blocks1#{L:=Blk},
-                    {Blk,Blocks};
-                _ ->
-                    throw(not_possible)
-            end;
-        _ ->
-            throw(not_possible)
-    end.
-
-use_ste_across_next(Next, Uses, Blocks) ->
-    case map_get(Next, Blocks) of
-        #b_blk{is=[#b_set{op=call,dst=Result,args=[#b_remote{}|_]}=Call,
-                   #b_set{op=succeeded,dst=Bool,args=[Result]}],
-               last=#b_br{bool=Bool}} ->
-            case is_single_use(Bool, Uses) andalso
-                is_n_uses(2, Result, Uses) of
-                true -> ok;
-                false -> throw(not_possible)
-            end,
-            Call;
-        #b_blk{} ->
-            throw(not_possible)
-    end.
-
-use_ste_fix_next(Ste, Next, Blocks) ->
-    Blk0 = map_get(Next, Blocks),
-    #b_blk{is=[#b_set{op=call},#b_set{op=succeeded}],last=Br0} = Blk0,
-    Br = beam_ssa:normalize(Br0#b_br{bool=#b_literal{val=true}}),
-    Blk = Blk0#b_blk{is=[Ste],last=Br},
-    Blocks#{Next:=Blk}.
-
 %% Count how many times each variable is used.
 
 count_uses(Blocks) ->
@@ -1158,7 +1139,7 @@ count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
     F = fun(I, CountMap) ->
                 foldl(fun(Var, Acc) ->
                               case Acc of
-                                  #{Var:=3} -> Acc;
+                                  #{Var:=2} -> Acc;
                                   #{Var:=C} -> Acc#{Var:=C+1};
                                   #{} ->       Acc#{Var=>1}
                               end
@@ -1168,16 +1149,6 @@ count_uses_blk([#b_blk{is=Is,last=Last}|Bs], CountMap0) ->
     count_uses_blk(Bs, CountMap);
 count_uses_blk([], CountMap) -> CountMap.
 
-decrement_uses(V, Uses) ->
-    #{V:=C} = Uses,
-    Uses#{V:=C-1}.
-
-is_n_uses(N, V, Uses) ->
-    case Uses of
-        #{V:=N} -> true;
-        #{} -> false
-    end.
-
 is_single_use(V, Uses) ->
     case Uses of
         #{V:=1} -> true;
@@ -1299,10 +1270,10 @@ place_frame_here(L, Blocks, Doms, Frames) ->
             Descendants = beam_ssa:rpo([L], Blocks),
             PhiPredecessors = phi_predecessors(L, Blocks),
             MustDominate = ordsets:from_list(PhiPredecessors ++ Descendants),
-            Dominates = all(fun(?BADARG_BLOCK) ->
+            Dominates = all(fun(?EXCEPTION_BLOCK) ->
                                     %% This block defines no variables and calls
                                     %% erlang:error(badarg). It does not matter
-                                    %% whether L dominates ?BADARG_BLOCK or not;
+                                    %% whether L dominates ?EXCEPTION_BLOCK or not;
                                     %% it is still safe to put the frame in L.
                                     true;
                                (Bl) ->
@@ -1433,10 +1404,11 @@ fix_receives_1([{L,Blk}|Ls], Blocks0, Count0) ->
             LoopExit = find_loop_exit(Rm, Blocks0),
             Defs0 = beam_ssa:def([L], Blocks0),
             CommonUsed = recv_common(Defs0, LoopExit, Blocks0),
-            {Blocks1,Count1} = recv_fix_common(CommonUsed, LoopExit, Rm,
-                                               Blocks0, Count0),
+            {Blocks1,Count1} = recv_crit_edges(Rm, LoopExit, Blocks0, Count0),
+            {Blocks2,Count2} = recv_fix_common(CommonUsed, LoopExit, Rm,
+                                               Blocks1, Count1),
             Defs = ordsets:subtract(Defs0, CommonUsed),
-            {Blocks,Count} = fix_receive(Rm, Defs, Blocks1, Count1),
+            {Blocks,Count} = fix_receive(Rm, Defs, Blocks2, Count2),
             fix_receives_1(Ls, Blocks, Count);
         #b_blk{} ->
             fix_receives_1(Ls, Blocks0, Count0)
@@ -1449,9 +1421,60 @@ recv_common(_Defs, none, _Blocks) ->
     %% in the tail position of a function.
     [];
 recv_common(Defs, Exit, Blocks) ->
-    {ExitDefs,ExitUsed} = beam_ssa:def_used([Exit], Blocks),
+    {ExitDefs,ExitUnused} = beam_ssa:def_unused([Exit], Defs, Blocks),
     Def = ordsets:subtract(Defs, ExitDefs),
-    ordsets:intersection(Def, ExitUsed).
+    ordsets:subtract(Def, ExitUnused).
+
+%% recv_crit_edges([RemoveMessageLabel], LoopExit,
+%%                 Blocks0, Count0) -> {Blocks,Count}.
+%%
+%%  Adds dummy blocks on all conditional jumps to the exit block so that
+%%  recv_fix_common/5 can insert phi nodes without having to worry about
+%%  critical edges.
+
+recv_crit_edges(_Rms, none, Blocks0, Count0) ->
+    {Blocks0, Count0};
+recv_crit_edges(Rms, Exit, Blocks0, Count0) ->
+    Ls = beam_ssa:rpo(Rms, Blocks0),
+    rce_insert_edges(Ls, Exit, Count0, Blocks0).
+
+rce_insert_edges([L | Ls], Exit, Count0, Blocks0) ->
+    Successors = beam_ssa:successors(map_get(L, Blocks0)),
+    case member(Exit, Successors) of
+        true when Successors =/= [Exit] ->
+            {Blocks, Count} = rce_insert_edge(L, Exit, Count0, Blocks0),
+            rce_insert_edges(Ls, Exit, Count, Blocks);
+        _ ->
+            rce_insert_edges(Ls, Exit, Count0, Blocks0)
+    end;
+rce_insert_edges([], _Exit, Count, Blocks) ->
+    {Blocks, Count}.
+
+rce_insert_edge(L, Exit, Count, Blocks0) ->
+    #b_blk{last=Last0} = FromBlk0 = map_get(L, Blocks0),
+
+    ToExit = #b_br{bool=#b_literal{val=true},succ=Exit,fail=Exit},
+
+    FromBlk = FromBlk0#b_blk{last=rce_reroute_terminator(Last0, Exit, Count)},
+    EdgeBlk = #b_blk{anno=#{},is=[],last=ToExit},
+
+    Blocks = Blocks0#{ Count => EdgeBlk, L => FromBlk },
+    {Blocks, Count + 1}.
+
+rce_reroute_terminator(#b_br{succ=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_br{succ=New}, Exit, New);
+rce_reroute_terminator(#b_br{fail=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_br{fail=New}, Exit, New);
+rce_reroute_terminator(#b_br{}=Last, _Exit, _New) ->
+    Last;
+rce_reroute_terminator(#b_switch{fail=Exit}=Last, Exit, New) ->
+    rce_reroute_terminator(Last#b_switch{fail=New}, Exit, New);
+rce_reroute_terminator(#b_switch{list=List0}=Last, Exit, New) ->
+    List = [if
+                Lbl =:= Exit -> {Arg, New};
+                Lbl =/= Exit -> {Arg, Lbl}
+            end || {Arg, Lbl} <- List0],
+    Last#b_switch{list=List}.
 
 %% recv_fix_common([CommonVar], LoopExit, [RemoveMessageLabel],
 %%                 Blocks0, Count0) -> {Blocks,Count}.
@@ -1505,9 +1528,9 @@ exit_predecessors([], _Exit, _Blocks) -> [].
 %%  later used within a clause of the receive.
 
 fix_receive([L|Ls], Defs, Blocks0, Count0) ->
-    {RmDefs,Used0} = beam_ssa:def_used([L], Blocks0),
+    {RmDefs,Unused} = beam_ssa:def_unused([L], Defs, Blocks0),
     Def = ordsets:subtract(Defs, RmDefs),
-    Used = ordsets:intersection(Def, Used0),
+    Used = ordsets:subtract(Def, Unused),
     {NewVars,Count} = new_vars([Base || #b_var{name=Base} <- Used], Count0),
     Ren = zip(Used, NewVars),
     Blocks1 = beam_ssa:rename_vars(Ren, [L], Blocks0),
@@ -1521,21 +1544,51 @@ fix_receive([], _Defs, Blocks, Count) ->
     {Blocks,Count}.
 
 %% find_loop_exit([Label], Blocks) -> Label | none.
-%%  Find the block to which control is transferred when the
-%%  the receive loop is exited.
-
-find_loop_exit([L1,L2|_Ls], Blocks) ->
-    Path1 = beam_ssa:rpo([L1], Blocks),
-    Path2 = beam_ssa:rpo([L2], Blocks),
-    find_loop_exit_1(Path1, cerl_sets:from_list(Path2));
-find_loop_exit(_, _) -> none.
-
-find_loop_exit_1([H|T], OtherPath) ->
-    case cerl_sets:is_element(H, OtherPath) of
-        true -> H;
-        false ->  find_loop_exit_1(T, OtherPath)
+%%  Given the list of all blocks with the remove_message instructions
+%%  for this receive, find the block to which control is transferred
+%%  when the receive loop is exited (if any).
+
+find_loop_exit([_,_|_]=RmBlocks, Blocks) ->
+    %% We used to only analyze the path from two of the remove_message
+    %% blocks. That would fail to find a common block if one or both
+    %% of the blocks happened to raise an exception. To be sure that
+    %% we always find a common block if there is one (shared by at
+    %% least two clauses), we must analyze the path from all
+    %% remove_message blocks.
+    {Dominators,_} = beam_ssa:dominators(Blocks),
+    RmSet = cerl_sets:from_list(RmBlocks),
+    Rpo = beam_ssa:rpo(RmBlocks, Blocks),
+    find_loop_exit_1(Rpo, RmSet, Dominators);
+find_loop_exit(_, _) ->
+    %% There is (at most) a single clause. There is no common
+    %% loop exit block.
+    none.
+
+find_loop_exit_1([?EXCEPTION_BLOCK|Ls], RmSet, Dominators) ->
+    %% ?EXCEPTION_BLOCK is a marker and not an actual block, so it is not
+    %% the block we are looking for.
+    find_loop_exit_1(Ls, RmSet, Dominators);
+find_loop_exit_1([L|Ls], RmSet, Dominators) ->
+    DomBy = map_get(L, Dominators),
+    case any(fun(E) -> cerl_sets:is_element(E, RmSet) end, DomBy) of
+        true ->
+            %% This block is dominated by one of the remove_message blocks,
+            %% which means that the block is part of only one clause.
+            %% It is not the block we are looking for.
+            find_loop_exit_1(Ls, RmSet, Dominators);
+        false ->
+            %% This block is the first block that is not dominated by
+            %% any of the blocks with remove_message instructions,
+            %% which means that at least two of the receive clauses
+            %% will ultimately transfer control to it. It is the block
+            %% we are looking for.
+            L
     end;
-find_loop_exit_1([], _) -> none.
+find_loop_exit_1([], _, _) ->
+    %% None of clauses transfers control to a common block after the receive
+    %% statement. That means that the receive statement is a the end of a
+    %% function (or that all clauses raise exceptions).
+    none.
 
 %% find_rm_blocks(StartLabel, Blocks) -> [Label].
 %%  Find all blocks that start with remove_message within the receive
@@ -1784,7 +1837,7 @@ collect_yregs([], Yregs) -> Yregs.
 copy_retval_2([L|Ls], Yregs, Copy0, Blocks0, Count0) ->
     #b_blk{is=Is0,last=Last} = Blk = map_get(L, Blocks0),
     RC = case {Last,Ls} of
-             {#b_br{succ=Succ,fail=?BADARG_BLOCK},[Succ|_]} ->
+             {#b_br{succ=Succ,fail=?EXCEPTION_BLOCK},[Succ|_]} ->
                  true;
              {_,_} ->
                  false
@@ -2133,8 +2186,8 @@ reserve_yregs(#st{frames=Frames}=St0) ->
 reserve_yregs_1(L, #st{ssa=Blocks0,cnt=Count0,res=Res0}=St) ->
     Blk = map_get(L, Blocks0),
     Yregs = beam_ssa:get_anno(yregs, Blk),
-    {Def,Used} = beam_ssa:def_used([L], Blocks0),
-    UsedYregs = ordsets:intersection(Yregs, Used),
+    {Def,Unused} = beam_ssa:def_unused([L], Yregs, Blocks0),
+    UsedYregs = ordsets:subtract(Yregs, Unused),
     DefBefore = ordsets:subtract(UsedYregs, Def),
     {BeforeVars,Blocks,Count} = rename_vars(DefBefore, L, Blocks0, Count0),
     InsideVars = ordsets:subtract(UsedYregs, DefBefore),
@@ -2523,9 +2576,9 @@ reserve_xregs_is([], Res, Xs, _Used) ->
     {Res,Xs}.
 
 %% Pick up register hints from the successors of this blocks.
-reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?BADARG_BLOCK},
+reserve_terminator(_L, _Is, #b_br{bool=#b_var{},succ=Succ,fail=?EXCEPTION_BLOCK},
                    _Blocks, XsMap, _Res) ->
-    %% We know that no variables are used at ?BADARG_BLOCK, so
+    %% We know that no variables are used at ?EXCEPTION_BLOCK, so
     %% any register hints from the success blocks are safe to use.
     map_get(Succ, XsMap);
 reserve_terminator(L, Is, #b_br{bool=#b_var{},succ=Succ,fail=Fail},