1 files changed, 129 insertions, 83 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 32b64b393f..580abf4ed9 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -158,6 +158,7 @@ repeated_passes(Opts) ->
           ?PASS(ssa_opt_dead),
           ?PASS(ssa_opt_cse),
           ?PASS(ssa_opt_tail_phis),
+          ?PASS(ssa_opt_sink),
           ?PASS(ssa_opt_tuple_size),
           ?PASS(ssa_opt_record),
           ?PASS(ssa_opt_type_continue)],        %Must run after ssa_opt_dead to
@@ -175,8 +176,8 @@ epilogue_passes(Opts) ->
           ?PASS(ssa_opt_bsm),
           ?PASS(ssa_opt_bsm_units),
           ?PASS(ssa_opt_bsm_shortcut),
-          ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_sink),
+          ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_merge_blocks),
           ?PASS(ssa_opt_get_tuple_element),
           ?PASS(ssa_opt_trim_unreachable)],
@@ -901,44 +902,52 @@ cse_suitable(#b_set{}) -> false.
 -record(fs,
         {s=undefined :: 'undefined' | 'cleared',
          regs=#{} :: #{beam_ssa:b_var():=beam_ssa:b_var()},
+         vars=cerl_sets:new() :: cerl_sets:set(),
          fail=none :: 'none' | beam_ssa:label(),
          non_guards :: gb_sets:set(beam_ssa:label()),
          bs :: beam_ssa:block_map()
         }).
 
 ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
-    NonGuards0 = float_non_guards(Linear0),
-    NonGuards = gb_sets:from_list(NonGuards0),
+    NonGuards = non_guards(Linear0),
     Blocks = maps:from_list(Linear0),
     Fs = #fs{non_guards=NonGuards,bs=Blocks},
     {Linear,Count} = float_opt(Linear0, Count0, Fs),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
-float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) ->
-    not gb_sets:is_member(F, NonGuards);
-float_blk_is_in_guard(#b_blk{}, #fs{}) ->
+%% The fconv instruction doesn't support jumping to a fail label, so we have to
+%% skip this optimization if the fail block is a guard.
+%%
+%% We also skip the optimization in blocks that always fail, as it's both
+%% difficult and pointless to rewrite them to use float ops.
+float_can_optimize_blk(#b_blk{last=#b_br{bool=#b_var{},fail=F}},
+                       #fs{non_guards=NonGuards}) ->
+    gb_sets:is_member(F, NonGuards);
+float_can_optimize_blk(#b_blk{}, #fs{}) ->
     false.
 
-float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
-    case Is of
-        [#b_set{op=landingpad}|_] ->
-            [L|float_non_guards(Bs)];
-        _ ->
-            float_non_guards(Bs)
-    end;
-float_non_guards([]) -> [?BADARG_BLOCK].
-
+float_opt([{L,#b_blk{is=[#b_set{op=exception_trampoline,args=[Var]}]}=Blk0} |
+           Bs0], Count0, Fs) ->
+    %% If we've replaced a BIF with float operations, we'll have a lot of extra
+    %% blocks that jump to the same failure block, which may have a trampoline
+    %% that refers to the original operation.
+    %%
+    %% Since the point of the trampoline is to keep the BIF from being removed
+    %% by liveness optimization, we can discard it as the liveness pass leaves
+    %% floats alone.
+    Blk = case cerl_sets:is_element(Var, Fs#fs.vars) of
+              true -> Blk0#b_blk{is=[]};
+              false -> Blk0
+          end,
+    {Bs, Count} = float_opt(Bs0, Count0, Fs),
+    {[{L,Blk}|Bs],Count};
 float_opt([{L,Blk}|Bs0], Count0, Fs) ->
-    case float_blk_is_in_guard(Blk, Fs) of
+    case float_can_optimize_blk(Blk, Fs) of
         true ->
-            %% This block is inside a guard. Don't do
-            %% any floating point optimizations.
-            {Bs,Count} = float_opt(Bs0, Count0, Fs),
-            {[{L,Blk}|Bs],Count};
+            float_opt_1(L, Blk, Bs0, Count0, Fs);
         false ->
-            %% This block is not inside a guard.
-            %% We can do the optimization.
-            float_opt_1(L, Blk, Bs0, Count0, Fs)
+            {Bs,Count} = float_opt(Bs0, Count0, Fs),
+            {[{L,Blk}|Bs],Count}
     end;
 float_opt([], Count, _Fs) ->
     {[],Count}.
@@ -1017,12 +1026,12 @@ float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
     #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
 
     %% If the success block starts with a floating point operation, we can
-    %% defer flushing to that block as long as it isn't a guard.
+    %% defer flushing to that block as long as it's suitable for optimization.
     #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks),
-    SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0),
+    CanOptimizeSucc = float_can_optimize_blk(SuccBlk, Fs0),
 
     case Is of
-        [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard ->
+        [#b_set{anno=#{float_op:=_}}|_] when CanOptimizeSucc ->
             %% No flush needed.
             {[],Blk0,Fs0,Count0};
         _ ->
@@ -1078,21 +1087,22 @@ float_opt_is([], Fs, _Count, _Acc) ->
     none.
 
 float_make_op(#b_set{op={bif,Op},dst=Dst,args=As0}=I0,
-              Ts, #fs{s=S,regs=Rs0}=Fs, Count0) ->
+              Ts, #fs{s=S,regs=Rs0,vars=Vs0}=Fs, Count0) ->
     {As1,Rs1,Count1} = float_load(As0, Ts, Rs0, Count0, []),
     {As,Is0} = unzip(As1),
     {Fr,Count2} = new_reg('@fr', Count1),
     FrDst = #b_var{name=Fr},
     I = I0#b_set{op={float,Op},dst=FrDst,args=As},
+    Vs = cerl_sets:add_element(Dst, Vs0),
     Rs = Rs1#{Dst=>FrDst},
     Is = append(Is0) ++ [I],
     case S of
         undefined ->
             {Ignore,Count} = new_reg('@ssa_ignore', Count2),
             C = #b_set{op={float,clearerror},dst=#b_var{name=Ignore}},
-            {[C|Is],Fs#fs{s=cleared,regs=Rs},Count};
+            {[C|Is],Fs#fs{s=cleared,regs=Rs,vars=Vs},Count};
         cleared ->
-            {Is,Fs#fs{regs=Rs},Count2}
+            {Is,Fs#fs{regs=Rs,vars=Vs},Count2}
     end.
 
 float_load([A|As], [T|Ts], Rs0, Count0, Acc) ->
@@ -1221,34 +1231,31 @@ live_opt_is([#b_set{op=phi,dst=Dst}=I|Is], Live, Acc) ->
         false ->
             live_opt_is(Is, Live, Acc)
     end;
-live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
-                    args=[Dst]}=SuccI,
-             #b_set{dst=Dst}=I|Is], Live0, Acc) ->
-    case gb_sets:is_member(Dst, Live0) of
-        true ->
-            Live1 = gb_sets:add(Dst, Live0),
-            Live = gb_sets:delete_any(SuccDst, Live1),
-            live_opt_is([I|Is], Live, [SuccI|Acc]);
-        false ->
-            case live_opt_unused(I) of
-                {replace,NewI0} ->
-                    NewI = NewI0#b_set{dst=SuccDstVar},
-                    live_opt_is([NewI|Is], Live0, Acc);
-                keep ->
-                    case gb_sets:is_member(SuccDst, Live0) of
-                        true ->
-                            Live1 = gb_sets:add(Dst, Live0),
-                            Live = gb_sets:delete(SuccDst, Live1),
-                            live_opt_is([I|Is], Live, [SuccI|Acc]);
-                        false ->
-                            live_opt_is([I|Is], Live0, Acc)
-                    end
-            end
+live_opt_is([#b_set{op=succeeded,dst=SuccDst,args=[MapDst]}=SuccI,
+             #b_set{op=get_map_element,dst=MapDst}=MapI | Is],
+            Live0, Acc) ->
+    case {gb_sets:is_member(SuccDst, Live0),
+          gb_sets:is_member(MapDst, Live0)} of
+        {true, true} ->
+            Live = gb_sets:delete(SuccDst, Live0),
+            live_opt_is([MapI | Is], Live, [SuccI | Acc]);
+        {true, false} ->
+            %% 'get_map_element' is unused; replace 'succeeded' with
+            %% 'has_map_field'
+            NewI = MapI#b_set{op=has_map_field,dst=SuccDst},
+            live_opt_is([NewI | Is], Live0, Acc);
+        {false, true} ->
+            %% 'succeeded' is unused (we know it will succeed); discard it and
+            %% keep 'get_map_element'
+            live_opt_is([MapI | Is], Live0, Acc);
+        {false, false} ->
+            live_opt_is(Is, Live0, Acc)
     end;
 live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
     case gb_sets:is_member(Dst, Live0) of
         true ->
-            Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+            LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)),
+            Live1 = gb_sets:union(Live0, LiveUsed),
             Live = gb_sets:delete(Dst, Live1),
             live_opt_is(Is, Live, [I|Acc]);
         false ->
@@ -1256,17 +1263,14 @@ live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
                 true ->
                     live_opt_is(Is, Live0, Acc);
                 false ->
-                    Live = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
+                    LiveUsed = gb_sets:from_ordset(beam_ssa:used(I)),
+                    Live = gb_sets:union(Live0, LiveUsed),
                     live_opt_is(Is, Live, [I|Acc])
             end
     end;
 live_opt_is([], Live, Acc) ->
     {Acc,Live}.
 
-live_opt_unused(#b_set{op=get_map_element}=Set) ->
-    {replace,Set#b_set{op=has_map_field}};
-live_opt_unused(_) -> keep.
-
 %%%
 %%% Optimize binary matching.
 %%%
@@ -1777,35 +1781,44 @@ opt_bs_put_split_int_1(Int, L, R) ->
 %%%
 
 ssa_opt_tuple_size({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
-    {Linear,Count} = opt_tup_size(Linear0, Count0, []),
+    %% This optimization is only safe in guards, as prefixing tuple_size with
+    %% an is_tuple check prevents it from throwing an exception.
+    NonGuards = non_guards(Linear0),
+    {Linear,Count} = opt_tup_size(Linear0, NonGuards, Count0, []),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
-opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], Count0, Acc0) ->
+opt_tup_size([{L,#b_blk{is=Is,last=Last}=Blk}|Bs], NonGuards, Count0, Acc0) ->
     case {Is,Last} of
         {[#b_set{op={bif,'=:='},dst=Bool,args=[#b_var{}=Tup,#b_literal{val=Arity}]}],
          #b_br{bool=Bool}} when is_integer(Arity), Arity >= 0 ->
-            {Acc,Count} = opt_tup_size_1(Tup, L, Count0, Acc0),
-            opt_tup_size(Bs, Count, [{L,Blk}|Acc]);
+            {Acc,Count} = opt_tup_size_1(Tup, L, NonGuards, Count0, Acc0),
+            opt_tup_size(Bs, NonGuards, Count, [{L,Blk}|Acc]);
         {_,_} ->
-            opt_tup_size(Bs, Count0, [{L,Blk}|Acc0])
+            opt_tup_size(Bs, NonGuards, Count0, [{L,Blk}|Acc0])
     end;
-opt_tup_size([], Count, Acc) ->
+opt_tup_size([], _NonGuards, Count, Acc) ->
     {reverse(Acc),Count}.
 
-opt_tup_size_1(Size, EqL, Count0, [{L,Blk0}|Acc]) ->
-    case Blk0 of
-        #b_blk{is=Is0,last=#b_br{bool=Bool,succ=EqL,fail=Fail}} ->
-            case opt_tup_size_is(Is0, Bool, Size, []) of
-                none ->
+opt_tup_size_1(Size, EqL, NonGuards, Count0, [{L,Blk0}|Acc]) ->
+    #b_blk{is=Is0,last=Last} = Blk0,
+    case Last of
+        #b_br{bool=Bool,succ=EqL,fail=Fail} ->
+            case gb_sets:is_member(Fail, NonGuards) of
+                true ->
                     {[{L,Blk0}|Acc],Count0};
-                {PreIs,TupleSizeIs,Tuple} ->
-                    opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
-                                   Tuple, Fail, Count0, Acc)
+                false ->
+                    case opt_tup_size_is(Is0, Bool, Size, []) of
+                        none ->
+                            {[{L,Blk0}|Acc],Count0};
+                        {PreIs,TupleSizeIs,Tuple} ->
+                            opt_tup_size_2(PreIs, TupleSizeIs, L, EqL,
+                                           Tuple, Fail, Count0, Acc)
+                    end
             end;
-        #b_blk{} ->
+        _ ->
             {[{L,Blk0}|Acc],Count0}
     end;
-opt_tup_size_1(_, _, Count, Acc) ->
+opt_tup_size_1(_, _, _, Count, Acc) ->
     {Acc,Count}.
 
 opt_tup_size_2(PreIs, TupleSizeIs, PreL, EqL, Tuple, Fail, Count0, Acc) ->
@@ -1943,12 +1956,28 @@ verify_merge_is(_) ->
 
 is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
     false;
-is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{}) ->
+is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=exception_trampoline}|_]}) ->
+    false;
+is_merge_allowed(_, #b_blk{is=[#b_set{op=exception_trampoline}|_]}, #b_blk{}) ->
+    false;
+is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{is=Is}) ->
     %% The predecessor block must have exactly one successor (L) for
     %% the merge to be safe.
     case beam_ssa:successors(Blk) of
-        [L] -> true;
-        [_|_] -> false
+        [L] ->
+            case Is of
+                [#b_set{op=phi,args=[_]}|_] ->
+                    %% The type optimizer pass must have been
+                    %% turned off, since it would have removed this
+                    %% redundant phi node. Refuse to merge the blocks
+                    %% to ensure that this phi node remains at the
+                    %% beginning of a block.
+                    false;
+                _ ->
+                    true
+            end;
+        [_|_] ->
+            false
     end;
 is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
     false.
@@ -1969,9 +1998,7 @@ is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
 %%% extracted values.
 %%%
 
-ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
-    Linear = beam_ssa:linearize(Blocks0),
-
+ssa_opt_sink({#st{ssa=Linear}=St, FuncDb}) ->
     %% Create a map with all variables that define get_tuple_element
     %% instructions. The variable name map to the block it is defined in.
     case def_blocks(Linear) of
@@ -1980,10 +2007,12 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
             {St, FuncDb};
         [_|_]=Defs0 ->
             Defs = maps:from_list(Defs0),
-            {do_ssa_opt_sink(Linear, Defs, St), FuncDb}
+            {do_ssa_opt_sink(Defs, St), FuncDb}
     end.
 
-do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
+do_ssa_opt_sink(Defs, #st{ssa=Linear}=St) ->
+    Blocks0 = maps:from_list(Linear),
+
     %% Now find all the blocks that use variables defined by get_tuple_element
     %% instructions.
     Used = used_blocks(Linear, Defs, []),
@@ -2008,7 +2037,8 @@ do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
                            From = map_get(V, Defs),
                            move_defs(V, From, To, A)
                    end, Blocks0, DefLoc),
-    St#st{ssa=Blocks}.
+
+    St#st{ssa=beam_ssa:linearize(Blocks)}.
 
 def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
     def_blocks_is(Is, L, def_blocks(Bs));
@@ -2041,6 +2071,7 @@ unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}|_]}}|Bs]) ->
     Unsuitable = case Op of
                      bs_extract -> true;
                      bs_put -> true;
+                     exception_trampoline -> true;
                      {float,_} -> true;
                      landingpad -> true;
                      peek_message -> true;
@@ -2244,6 +2275,21 @@ gcd(A, B) ->
         X -> gcd(B, X)
     end.
 
+non_guards(Linear) ->
+    gb_sets:from_list(non_guards_1(Linear)).
+
+non_guards_1([{L,#b_blk{is=Is}}|Bs]) ->
+    case Is of
+        [#b_set{op=exception_trampoline}|_] ->
+            [L | non_guards_1(Bs)];
+        [#b_set{op=landingpad}|_] ->
+            [L | non_guards_1(Bs)];
+        _ ->
+            non_guards_1(Bs)
+    end;
+non_guards_1([]) ->
+    [?EXCEPTION_BLOCK].
+
 rel2fam(S0) ->
     S1 = sofs:relation(S0),
     S = sofs:rel2fam(S1),