1 files changed, 221 insertions, 222 deletions
diff --git a/lib/compiler/src/beam_ssa_opt.erl b/lib/compiler/src/beam_ssa_opt.erl
index 2c898ba6f8..90c0d3cf16 100644
--- a/lib/compiler/src/beam_ssa_opt.erl
+++ b/lib/compiler/src/beam_ssa_opt.erl
@@ -79,14 +79,12 @@ module(Module, Opts) ->
     {ok, finish(Module, StMap)}.
 
 phase([FuncId | Ids], Ps, StMap, FuncDb0) ->
-    try
-        {St, FuncDb} =
-            compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}),
-
-        phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb)
+    try compile:run_sub_passes(Ps, {map_get(FuncId, StMap), FuncDb0}) of
+        {St, FuncDb} ->
+            phase(Ids, Ps, StMap#{ FuncId => St }, FuncDb)
     catch
         Class:Error:Stack ->
-            #b_local{name=Name,arity=Arity} = FuncId,
+            #b_local{name=#b_literal{val=Name},arity=Arity} = FuncId,
             io:fwrite("Function: ~w/~w\n", [Name,Arity]),
             erlang:raise(Class, Error, Stack)
     end;
@@ -166,15 +164,18 @@ repeated_passes(Opts) ->
 epilogue_passes(Opts) ->
     Ps = [?PASS(ssa_opt_type_finish),
           ?PASS(ssa_opt_float),
-          ?PASS(ssa_opt_live),                  %One last time to clean up the
-                                                %mess left by the float pass.
+          ?PASS(ssa_opt_sw),
+
+          %% Run live one more time to clean up after the float and sw
+          %% passes.
+          ?PASS(ssa_opt_live),
           ?PASS(ssa_opt_bsm),
           ?PASS(ssa_opt_bsm_units),
           ?PASS(ssa_opt_bsm_shortcut),
-          ?PASS(ssa_opt_sw),
           ?PASS(ssa_opt_blockify),
           ?PASS(ssa_opt_sink),
           ?PASS(ssa_opt_merge_blocks),
+          ?PASS(ssa_opt_get_tuple_element),
           ?PASS(ssa_opt_trim_unreachable)],
     passes_1(Ps, Opts).
 
@@ -251,22 +252,14 @@ fdb_update(Caller, Callee, FuncDb) ->
     FuncDb#{ Caller => CallerVertex#func_info{out=Calls},
              Callee => CalleeVertex#func_info{in=CalledBy} }.
 
-%% Returns the post-order of all local calls in this module. That is, it starts
-%% with the functions that don't call any others and then walks up the call
-%% chain.
+%% Returns the post-order of all local calls in this module. That is,
+%% called functions will be ordered before the functions calling them.
 %%
 %% Functions where module-level optimization is disabled are added last in
 %% arbitrary order.
 
 get_call_order_po(StMap, FuncDb) ->
-    Leaves = maps:fold(fun(Id, #func_info{out=[]}, Acc) ->
-                               [Id | Acc];
-                          (_, _, Acc) ->
-                               Acc
-                       end, [], FuncDb),
-
-    Order = gco_po_1(sort(Leaves), FuncDb, [], #{}),
-
+    Order = gco_po(FuncDb),
     Order ++ maps:fold(fun(K, _V, Acc) ->
                                case is_map_key(K, FuncDb) of
                                    false -> [K | Acc];
@@ -274,20 +267,23 @@ get_call_order_po(StMap, FuncDb) ->
                                end
                        end, [], StMap).
 
-gco_po_1([Id | Ids], FuncDb, Children, Seen) when not is_map_key(Id, Seen) ->
-    [Id | gco_po_1(Ids, FuncDb, [Id | Children], Seen#{ Id => true })];
-gco_po_1([_Id | Ids], FuncDb, Children, Seen) ->
-    gco_po_1(Ids, FuncDb, Children, Seen);
-gco_po_1([], FuncDb, [_|_]=Children, Seen) ->
-    gco_po_1(gco_po_parents(Children, FuncDb), FuncDb, [], Seen);
-gco_po_1([], _FuncDb, [], _Seen) ->
-    [].
+gco_po(FuncDb) ->
+    All = sort(maps:keys(FuncDb)),
+    {RPO,_} = gco_rpo(All, FuncDb, cerl_sets:new(), []),
+    reverse(RPO).
 
-gco_po_parents([Child | Children], FuncDb) ->
-    #{ Child := #func_info{in=Parents}} = FuncDb,
-    Parents ++ gco_po_parents(Children, FuncDb);
-gco_po_parents([], _FuncDb) ->
-    [].
+gco_rpo([Id|Ids], FuncDb, Seen0, Acc0) ->
+    case cerl_sets:is_element(Id, Seen0) of
+        true ->
+            gco_rpo(Ids, FuncDb, Seen0, Acc0);
+        false ->
+            #func_info{out=Successors} = map_get(Id, FuncDb),
+            Seen1 = cerl_sets:add_element(Id, Seen0),
+            {Acc,Seen} = gco_rpo(Successors, FuncDb, Seen1, Acc0),
+            gco_rpo(Ids, FuncDb, Seen, [Id|Acc])
+    end;
+gco_rpo([], _, Seen, Acc) ->
+    {Acc,Seen}.
 
 %%%
 %%% Trivial sub passes.
@@ -364,7 +360,7 @@ ssa_opt_coalesce_phis({#st{ssa=Blocks0}=St, FuncDb}) ->
     {St#st{ssa=Blocks}, FuncDb}.
 
 c_phis_1([L|Ls], Blocks0) ->
-    case maps:get(L, Blocks0) of
+    case map_get(L, Blocks0) of
         #b_blk{is=[#b_set{op=phi}|_]}=Blk ->
             Blocks = c_phis_2(L, Blk, Blocks0),
             c_phis_1(Ls, Blocks);
@@ -403,7 +399,7 @@ c_phis_args_1([{Var,Pred}|As], Blocks) ->
 c_phis_args_1([], _Blocks) -> none.
 
 c_get_pred_vars(Var, Pred, Blocks) ->
-    case maps:get(Pred, Blocks) of
+    case map_get(Pred, Blocks) of
         #b_blk{is=[#b_set{op=phi,dst=Var,args=Args}]} ->
             {Var,Pred,Args};
         #b_blk{} ->
@@ -424,7 +420,7 @@ c_rewrite_phi([A|As], Info) ->
 c_rewrite_phi([], _Info) -> [].
 
 c_fix_branches([{_,Pred}|As], L, Blocks0) ->
-    #b_blk{last=Last0} = Blk0 = maps:get(Pred, Blocks0),
+    #b_blk{last=Last0} = Blk0 = map_get(Pred, Blocks0),
     #b_br{bool=#b_literal{val=true}} = Last0,   %Assertion.
     Last = Last0#b_br{bool=#b_literal{val=true},succ=L,fail=L},
     Blk = Blk0#b_blk{last=Last},
@@ -687,6 +683,14 @@ record_opt_is([#b_set{op={bif,is_tuple},dst=Bool,args=[Tuple]}=Set],
         no ->
             [Set]
     end;
+record_opt_is([I|Is]=Is0, #b_br{bool=Bool}=Last, Blocks) ->
+    case is_tagged_tuple_1(Is0, Last, Blocks) of
+        {yes,_Fail,Tuple,Arity,Tag} ->
+            Args = [Tuple,Arity,Tag],
+            [I#b_set{op=is_tagged_tuple,dst=Bool,args=Args}];
+        no ->
+            [I|record_opt_is(Is, Last, Blocks)]
+    end;
 record_opt_is([I|Is], Last, Blocks) ->
     [I|record_opt_is(Is, Last, Blocks)];
 record_opt_is([], _Last, _Blocks) -> [].
@@ -694,29 +698,30 @@ record_opt_is([], _Last, _Blocks) -> [].
 is_tagged_tuple(#b_var{}=Tuple, Bool,
                 #b_br{bool=Bool,succ=Succ,fail=Fail},
                 Blocks) ->
-    SuccBlk = maps:get(Succ, Blocks),
-    is_tagged_tuple_1(SuccBlk, Tuple, Fail, Blocks);
+    #b_blk{is=Is,last=Last} = map_get(Succ, Blocks),
+    case is_tagged_tuple_1(Is, Last, Blocks) of
+        {yes,Fail,Tuple,Arity,Tag} ->
+            {yes,Arity,Tag};
+        _ ->
+            no
+    end;
 is_tagged_tuple(_, _, _, _) -> no.
 
-is_tagged_tuple_1(#b_blk{is=Is,last=Last}, Tuple, Fail, Blocks) ->
-    case Is of
-        [#b_set{op={bif,tuple_size},dst=ArityVar,
-                args=[#b_var{}=Tuple]},
-         #b_set{op={bif,'=:='},
-                dst=Bool,
-                args=[ArityVar, #b_literal{val=ArityVal}=Arity]}]
-        when is_integer(ArityVal) ->
-            case Last of
-                #b_br{bool=Bool,succ=Succ,fail=Fail} ->
-                    SuccBlk = maps:get(Succ, Blocks),
-                    case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
-                        no ->
-                            no;
-                        {yes,Tag} ->
-                            {yes,Arity,Tag}
-                    end;
-                _ ->
-                    no
+is_tagged_tuple_1(Is, Last, Blocks) ->
+    case {Is,Last} of
+        {[#b_set{op={bif,tuple_size},dst=ArityVar,
+                 args=[#b_var{}=Tuple]},
+          #b_set{op={bif,'=:='},
+                 dst=Bool,
+                 args=[ArityVar, #b_literal{val=ArityVal}=Arity]}],
+         #b_br{bool=Bool,succ=Succ,fail=Fail}}
+          when is_integer(ArityVal) ->
+            SuccBlk = map_get(Succ, Blocks),
+            case is_tagged_tuple_2(SuccBlk, Tuple, Fail) of
+                no ->
+                    no;
+                {yes,Tag} ->
+                    {yes,Fail,Tuple,Arity,Tag}
             end;
         _ ->
             no
@@ -759,7 +764,7 @@ ssa_opt_cse({#st{ssa=Linear}=St, FuncDb}) ->
     {St#st{ssa=cse(Linear, #{}, M)}, FuncDb}.
 
 cse([{L,#b_blk{is=Is0,last=Last0}=Blk}|Bs], Sub0, M0) ->
-    Es0 = maps:get(L, M0),
+    Es0 = map_get(L, M0),
     {Is1,Es,Sub} = cse_is(Is0, Es0, Sub0, []),
     Last = sub(Last0, Sub),
     M = cse_successors(Is1, Blk, Es, M0),
@@ -854,6 +859,7 @@ cse_expr(#b_set{op=Op,args=Args}=I) ->
 cse_suitable(#b_set{op=get_hd}) -> true;
 cse_suitable(#b_set{op=get_tl}) -> true;
 cse_suitable(#b_set{op=put_list}) -> true;
+cse_suitable(#b_set{op=get_tuple_element}) -> true;
 cse_suitable(#b_set{op=put_tuple}) -> true;
 cse_suitable(#b_set{op={bif,tuple_size}}) ->
     %% Doing CSE for tuple_size/1 can prevent the
@@ -905,6 +911,11 @@ ssa_opt_float({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
     {Linear,Count} = float_opt(Linear0, Count0, Fs),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
+float_blk_is_in_guard(#b_blk{last=#b_br{fail=F}}, #fs{non_guards=NonGuards}) ->
+    not gb_sets:is_member(F, NonGuards);
+float_blk_is_in_guard(#b_blk{}, #fs{}) ->
+    false.
+
 float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
     case Is of
         [#b_set{op=landingpad}|_] ->
@@ -914,21 +925,18 @@ float_non_guards([{L,#b_blk{is=Is}}|Bs]) ->
     end;
 float_non_guards([]) -> [?BADARG_BLOCK].
 
-float_opt([{L,#b_blk{last=#b_br{fail=F}}=Blk}|Bs0],
-            Count0, #fs{non_guards=NonGuards}=Fs) ->
-    case gb_sets:is_member(F, NonGuards) of
+float_opt([{L,Blk}|Bs0], Count0, Fs) ->
+    case float_blk_is_in_guard(Blk, Fs) of
         true ->
-            %% This block is not inside a guard.
-            %% We can do the optimization.
-            float_opt_1(L, Blk, Bs0, Count0, Fs);
-        false ->
             %% This block is inside a guard. Don't do
             %% any floating point optimizations.
             {Bs,Count} = float_opt(Bs0, Count0, Fs),
-            {[{L,Blk}|Bs],Count}
+            {[{L,Blk}|Bs],Count};
+        false ->
+            %% This block is not inside a guard.
+            %% We can do the optimization.
+            float_opt_1(L, Blk, Bs0, Count0, Fs)
     end;
-float_opt([{L,Blk}|Bs], Count, Fs) ->
-    float_opt_1(L, Blk, Bs, Count, Fs);
 float_opt([], Count, _Fs) ->
     {[],Count}.
 
@@ -1004,10 +1012,14 @@ float_conv([{L,#b_blk{is=Is0}=Blk0}|Bs0], Fail, Count0) ->
 
 float_maybe_flush(Blk0, #fs{s=cleared,fail=Fail,bs=Blocks}=Fs0, Count0) ->
     #b_blk{last=#b_br{bool=#b_var{},succ=Succ}=Br} = Blk0,
-    #b_blk{is=Is} = maps:get(Succ, Blocks),
+
+    %% If the success block starts with a floating point operation, we can
+    %% defer flushing to that block as long as it isn't a guard.
+    #b_blk{is=Is} = SuccBlk = map_get(Succ, Blocks),
+    SuccIsGuard = float_blk_is_in_guard(SuccBlk, Fs0),
+
     case Is of
-        [#b_set{anno=#{float_op:=_}}|_] ->
-            %% The next operation is also a floating point operation.
+        [#b_set{anno=#{float_op:=_}}|_] when not SuccIsGuard ->
             %% No flush needed.
             {[],Blk0,Fs0,Count0};
         _ ->
@@ -1151,25 +1163,28 @@ ssa_opt_live({#st{ssa=Linear0}=St, FuncDb}) ->
 live_opt([{L,Blk0}|Bs], LiveMap0, Blocks) ->
     Blk1 = beam_ssa_share:block(Blk0, Blocks),
     Successors = beam_ssa:successors(Blk1),
-    Live0 = live_opt_succ(Successors, L, LiveMap0),
+    Live0 = live_opt_succ(Successors, L, LiveMap0, gb_sets:empty()),
     {Blk,Live} = live_opt_blk(Blk1, Live0),
     LiveMap = live_opt_phis(Blk#b_blk.is, L, Live, LiveMap0),
     live_opt(Bs, LiveMap, Blocks#{L:=Blk});
 live_opt([], _, Acc) -> Acc.
 
-live_opt_succ([S|Ss], L, LiveMap) ->
-    Live0 = live_opt_succ(Ss, L, LiveMap),
+live_opt_succ([S|Ss], L, LiveMap, Live0) ->
     Key = {S,L},
     case LiveMap of
         #{Key:=Live} ->
-            gb_sets:union(Live, Live0);
+            %% The successor has a phi node, and the value for
+            %% this block in the phi node is a variable.
+            live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0));
         #{S:=Live} ->
-            gb_sets:union(Live, Live0);
+            %% No phi node in the successor, or the value for
+            %% this block in the phi node is a literal.
+            live_opt_succ(Ss, L, LiveMap, gb_sets:union(Live, Live0));
         #{} ->
-            Live0
+            %% A peek_message block which has not been processed yet.
+            live_opt_succ(Ss, L, LiveMap, Live0)
     end;
-live_opt_succ([], _, _) ->
-    gb_sets:empty().
+live_opt_succ([], _, _, Acc) -> Acc.
 
 live_opt_phis(Is, L, Live0, LiveMap0) ->
     LiveMap = LiveMap0#{L=>Live0},
@@ -1220,7 +1235,7 @@ live_opt_is([#b_set{op=succeeded,dst=SuccDst=SuccDstVar,
                     case gb_sets:is_member(SuccDst, Live0) of
                         true ->
                             Live1 = gb_sets:add(Dst, Live0),
-                            Live = gb_sets:delete_any(SuccDst, Live1),
+                            Live = gb_sets:delete(SuccDst, Live1),
                             live_opt_is([I|Is], Live, [SuccI|Acc]);
                         false ->
                             live_opt_is([I|Is], Live0, Acc)
@@ -1231,7 +1246,7 @@ live_opt_is([#b_set{dst=Dst}=I|Is], Live0, Acc) ->
     case gb_sets:is_member(Dst, Live0) of
         true ->
             Live1 = gb_sets:union(Live0, gb_sets:from_ordset(beam_ssa:used(I))),
-            Live = gb_sets:delete_any(Dst, Live1),
+            Live = gb_sets:delete(Dst, Live1),
             live_opt_is(Is, Live, [I|Acc]);
         false ->
             case beam_ssa:no_side_effect(I) of
@@ -1375,7 +1390,7 @@ bsm_positions([{L,#b_blk{is=Is,last=Last}}|Bs], PosMap0) ->
     case {Is,Last} of
         {[#b_set{op=bs_test_tail,dst=Bool,args=[Ctx,#b_literal{val=Bits0}]}],
          #b_br{bool=Bool,fail=Fail}} ->
-            Bits = Bits0 + maps:get(Ctx, PosMap0),
+            Bits = Bits0 + map_get(Ctx, PosMap0),
             bsm_positions(Bs, PosMap#{L=>{Bits,Fail}});
         {_,_} ->
             bsm_positions(Bs, PosMap)
@@ -1467,7 +1482,7 @@ bsm_units_skip_1([#b_set{op=bs_match,
                  Block0, Units) ->
     [#b_set{op=succeeded,dst=Bool,args=[New]}] = Test, %Assertion.
     #b_br{bool=Bool} = Last0 = Block0#b_blk.last, %Assertion.
-    CtxUnit = maps:get(Ctx, Units),
+    CtxUnit = map_get(Ctx, Units),
     if
         CtxUnit rem OpUnit =:= 0 ->
             Is = takewhile(fun(I) -> I =/= Skip end, Block0#b_blk.is),
@@ -1479,7 +1494,7 @@ bsm_units_skip_1([#b_set{op=bs_match,
     end;
 bsm_units_skip_1([#b_set{op=bs_match,dst=New,args=Args}|_], Block, Units) ->
     [_,Ctx|_] = Args,
-    CtxUnit = maps:get(Ctx, Units),
+    CtxUnit = map_get(Ctx, Units),
     OpUnit = bsm_op_unit(Args),
     {Block, Units#{ New => gcd(OpUnit, CtxUnit) }};
 bsm_units_skip_1([_I | Is], Block, Units) ->
@@ -1507,23 +1522,23 @@ bsm_op_unit(_) ->
 %% may differ between them, so we can only keep the information that is common
 %% to all paths.
 bsm_units_join(Lbl, MapA, UnitMaps0) when is_map_key(Lbl, UnitMaps0) ->
-    MapB = maps:get(Lbl, UnitMaps0),
+    MapB = map_get(Lbl, UnitMaps0),
     Merged = if
                  map_size(MapB) =< map_size(MapA) ->
                      bsm_units_join_1(maps:keys(MapB), MapA, MapB);
                  map_size(MapB) > map_size(MapA) ->
                      bsm_units_join_1(maps:keys(MapA), MapB, MapA)
              end,
-    maps:put(Lbl, Merged, UnitMaps0);
+    UnitMaps0#{Lbl := Merged};
 bsm_units_join(Lbl, MapA, UnitMaps0) when MapA =/= #{} ->
-    maps:put(Lbl, MapA, UnitMaps0);
+    UnitMaps0#{Lbl => MapA};
 bsm_units_join(_Lbl, _MapA, UnitMaps0) ->
     UnitMaps0.
 
 bsm_units_join_1([Key | Keys], Left, Right) when is_map_key(Key, Left) ->
-    UnitA = maps:get(Key, Left),
-    UnitB = maps:get(Key, Right),
-    bsm_units_join_1(Keys, Left, maps:put(Key, gcd(UnitA, UnitB), Right));
+    UnitA = map_get(Key, Left),
+    UnitB = map_get(Key, Right),
+    bsm_units_join_1(Keys, Left, Right#{Key := gcd(UnitA, UnitB)});
 bsm_units_join_1([Key | Keys], Left, Right) ->
     bsm_units_join_1(Keys, Left, maps:remove(Key, Right));
 bsm_units_join_1([], _MapA, Right) ->
@@ -1834,12 +1849,16 @@ opt_tup_size_is([], _, _, _Acc) -> none.
 %%%
 
 ssa_opt_sw({#st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
-    {Linear,Count} = opt_sw(Linear0, #{}, Count0, []),
+    {Linear,Count} = opt_sw(Linear0, Count0, []),
     {St#st{ssa=Linear,cnt=Count}, FuncDb}.
 
-opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) ->
-    Phis = opt_sw_phis(Is, Phis0),
-    case opt_sw_last(Last0, Phis) of
+opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Sw0}=Blk0}|Bs], Count0, Acc) ->
+    %% Ensure that no label in the switch list is the same
+    %% as the failure label.
+    #b_switch{fail=Fail,list=List0} = Sw0,
+    List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail],
+    Sw1 = beam_ssa:normalize(Sw0#b_switch{list=List}),
+    case Sw1 of
         #b_switch{arg=Arg,fail=Fail,list=[{Lit,Lbl}]} ->
             %% Rewrite a single value switch to a br.
             Bool = #b_var{name={'@ssa_bool',Count0}},
@@ -1847,7 +1866,7 @@ opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) -
             IsEq = #b_set{op={bif,'=:='},dst=Bool,args=[Arg,Lit]},
             Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
             Blk = Blk0#b_blk{is=Is++[IsEq],last=Br},
-            opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
+            opt_sw(Bs, Count, [{L,Blk}|Acc]);
         #b_switch{arg=Arg,fail=Fail,
                   list=[{#b_literal{val=B1},Lbl},{#b_literal{val=B2},Lbl}]}
           when B1 =:= not B2 ->
@@ -1857,71 +1876,18 @@ opt_sw([{L,#b_blk{is=Is,last=#b_switch{}=Last0}=Blk0}|Bs], Phis0, Count0, Acc) -
             IsBool = #b_set{op={bif,is_boolean},dst=Bool,args=[Arg]},
             Br = #b_br{bool=Bool,succ=Lbl,fail=Fail},
             Blk = Blk0#b_blk{is=Is++[IsBool],last=Br},
-            opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
-        Last0 ->
-            opt_sw(Bs, Phis, Count0, [{L,Blk0}|Acc]);
-        Last ->
-            Blk = Blk0#b_blk{last=Last},
-            opt_sw(Bs, Phis, Count0, [{L,Blk}|Acc])
+            opt_sw(Bs, Count, [{L,Blk}|Acc]);
+        Sw0 ->
+            opt_sw(Bs, Count0, [{L,Blk0}|Acc]);
+        Sw ->
+            Blk = Blk0#b_blk{last=Sw},
+            opt_sw(Bs, Count0, [{L,Blk}|Acc])
     end;
-opt_sw([{L,#b_blk{is=Is}=Blk}|Bs], Phis0, Count, Acc) ->
-    Phis = opt_sw_phis(Is, Phis0),
-    opt_sw(Bs, Phis, Count, [{L,Blk}|Acc]);
-opt_sw([], _Phis, Count, Acc) ->
+opt_sw([{L,#b_blk{}=Blk}|Bs], Count, Acc) ->
+    opt_sw(Bs, Count, [{L,Blk}|Acc]);
+opt_sw([], Count, Acc) ->
     {reverse(Acc),Count}.
 
-opt_sw_phis([#b_set{op=phi,dst=Dst,args=Args}|Is], Phis) ->
-    case opt_sw_literals(Args, []) of
-        error ->
-            opt_sw_phis(Is, Phis);
-        Literals ->
-            opt_sw_phis(Is, Phis#{Dst=>Literals})
-    end;
-opt_sw_phis(_, Phis) -> Phis.
-
-opt_sw_last(#b_switch{arg=Arg,fail=Fail,list=List0}=Sw0, Phis) ->
-    case Phis of
-        #{Arg:=Values0} ->
-            Values = gb_sets:from_list(Values0),
-
-            %% Prune the switch list to only contain the possible values.
-            List1 = [P || {Lit,_}=P <- List0, gb_sets:is_member(Lit, Values)],
-
-            %% Now test whether the failure label can ever be reached.
-            Sw = case gb_sets:size(Values) =:= length(List1) of
-                     true ->
-                         %% The switch list has the same number of values as the phi node.
-                         %% The values must be the same, because the values that were not
-                         %% possible were pruned from the switch list. Therefore, the
-                         %% failure label can't possibly be reached, and we can choose a
-                         %% a new failure label by picking a value from the list.
-                         case List1 of
-                             [{#b_literal{},Lbl}|List] ->
-                                 Sw0#b_switch{fail=Lbl,list=List};
-                             [] ->
-                                 Sw0#b_switch{list=List1}
-                         end;
-                     false ->
-                         %% There are some values in the phi node that are not in the
-                         %% switch list; thus, the failure label can still be reached.
-                         Sw0
-                 end,
-            beam_ssa:normalize(Sw);
-        #{} ->
-            %% Ensure that no label in the switch list is the same
-            %% as the failure label.
-            List = [{Val,Lbl} || {Val,Lbl} <- List0, Lbl =/= Fail],
-            Sw = Sw0#b_switch{list=List},
-            beam_ssa:normalize(Sw)
-    end.
-
-opt_sw_literals([{#b_literal{}=Lit,_}|T], Acc) ->
-    opt_sw_literals(T, [Lit|Acc]);
-opt_sw_literals([_|_], _Acc) ->
-    error;
-opt_sw_literals([], Acc) -> Acc.
-
-
 %%%
 %%% Merge blocks.
 %%%
@@ -1943,7 +1909,7 @@ merge_blocks_1([L|Ls], Preds0, Blocks0) ->
                     Is = Is0 ++ Is1,
                     Blk = Blk1#b_blk{is=Is},
                     Blocks1 = maps:remove(L, Blocks0),
-                    Blocks2 = maps:put(P, Blk, Blocks1),
+                    Blocks2 = Blocks1#{P:=Blk},
                     Successors = beam_ssa:successors(Blk),
                     Blocks = beam_ssa:update_phi_labels(Successors, L, P, Blocks2),
                     Preds = merge_update_preds(Successors, L, P, Preds0),
@@ -1957,8 +1923,8 @@ merge_blocks_1([L|Ls], Preds0, Blocks0) ->
 merge_blocks_1([], _Preds, Blocks) -> Blocks.
 
 merge_update_preds([L|Ls], From, To, Preds0) ->
-    Ps = [rename_label(P, From, To) || P <- maps:get(L, Preds0)],
-    Preds = maps:put(L, Ps, Preds0),
+    Ps = [rename_label(P, From, To) || P <- map_get(L, Preds0)],
+    Preds = Preds0#{L:=Ps},
     merge_update_preds(Ls, From, To, Preds);
 merge_update_preds([], _, _, Preds) -> Preds.
 
@@ -1972,13 +1938,17 @@ verify_merge_is([#b_set{op=Op}|_]) ->
 verify_merge_is(_) ->
     ok.
 
-is_merge_allowed(_, _, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
+is_merge_allowed(_, #b_blk{}, #b_blk{is=[#b_set{op=peek_message}|_]}) ->
     false;
-is_merge_allowed(L, Blk0, #b_blk{}) ->
-    case beam_ssa:successors(Blk0) of
+is_merge_allowed(L, #b_blk{last=#b_br{}}=Blk, #b_blk{}) ->
+    %% The predecessor block must have exactly one successor (L) for
+    %% the merge to be safe.
+    case beam_ssa:successors(Blk) of
         [L] -> true;
         [_|_] -> false
-    end.
+    end;
+is_merge_allowed(_, #b_blk{last=#b_switch{}}, #b_blk{}) ->
+    false.
 
 %%%
 %%% When a tuple is matched, the pattern matching compiler generates a
@@ -2001,14 +1971,22 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
 
     %% Create a map with all variables that define get_tuple_element
     %% instructions. The variable name map to the block it is defined in.
-    Defs = maps:from_list(def_blocks(Linear)),
+    case def_blocks(Linear) of
+        [] ->
+            %% No get_tuple_element instructions, so there is nothing to do.
+            {St, FuncDb};
+        [_|_]=Defs0 ->
+            Defs = maps:from_list(Defs0),
+            {do_ssa_opt_sink(Linear, Defs, St), FuncDb}
+    end.
 
+do_ssa_opt_sink(Linear, Defs, #st{ssa=Blocks0}=St) ->
     %% Now find all the blocks that use variables defined by get_tuple_element
     %% instructions.
     Used = used_blocks(Linear, Defs, []),
 
     %% Calculate dominators.
-    Dom0 = beam_ssa:dominators(Blocks0),
+    {Dom,Numbering} = beam_ssa:dominators(Blocks0),
 
     %% It is not safe to move get_tuple_element instructions to blocks
     %% that begin with certain instructions. It is also unsafe to move
@@ -2016,28 +1994,18 @@ ssa_opt_sink({#st{ssa=Blocks0}=St, FuncDb}) ->
     %% unsafe moves, pretend that the unsuitable blocks are not
     %% dominators.
     Unsuitable = unsuitable(Linear, Blocks0),
-    Dom = case gb_sets:is_empty(Unsuitable) of
-              true ->
-                  Dom0;
-              false ->
-                  F = fun(_, DomBy) ->
-                              [L || L <- DomBy,
-                                    not gb_sets:is_element(L, Unsuitable)]
-                      end,
-                  maps:map(F, Dom0)
-          end,
 
     %% Calculate new positions for get_tuple_element instructions. The new
     %% position is a block that dominates all uses of the variable.
-    DefLoc = new_def_locations(Used, Defs, Dom),
+    DefLoc = new_def_locations(Used, Defs, Dom, Numbering, Unsuitable),
 
     %% Now move all suitable get_tuple_element instructions to their
     %% new blocks.
     Blocks = foldl(fun({V,To}, A) ->
-                           From = maps:get(V, Defs),
+                           From = map_get(V, Defs),
                            move_defs(V, From, To, A)
                    end, Blocks0, DefLoc),
-    {St#st{ssa=Blocks}, FuncDb}.
+    St#st{ssa=Blocks}.
 
 def_blocks([{L,#b_blk{is=Is}}|Bs]) ->
     def_blocks_is(Is, L, def_blocks(Bs));
@@ -2104,11 +2072,11 @@ unsuitable_loop(L, Blocks, Predecessors) ->
     unsuitable_loop(L, Blocks, Predecessors, []).
 
 unsuitable_loop(L, Blocks, Predecessors, Acc) ->
-    Ps = maps:get(L, Predecessors),
+    Ps = map_get(L, Predecessors),
     unsuitable_loop_1(Ps, Blocks, Predecessors, Acc).
 
 unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
-    case maps:get(P, Blocks) of
+    case map_get(P, Blocks) of
         #b_blk{is=[#b_set{op=peek_message}|_]} ->
             unsuitable_loop_1(Ps, Blocks, Predecessors, Acc0);
         #b_blk{} ->
@@ -2123,50 +2091,42 @@ unsuitable_loop_1([P|Ps], Blocks, Predecessors, Acc0) ->
     end;
 unsuitable_loop_1([], _, _, Acc) -> Acc.
 
-%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs, Dominators) ->
-%%          [{Variable,NewDefinitionBlock}]
-%%  Calculate new locations for get_tuple_element instructions. For each
-%%  variable, the new location is a block that dominates all uses of
-%%  variable and as near to the uses of as possible. If no such block
-%%  distinct from the block where the instruction currently is, the
-%%  variable will not be included in the result list.
-
-new_def_locations([{V,UsedIn}|Vs], Defs, Dom) ->
-    DefIn = maps:get(V, Defs),
-    case common_dom(UsedIn, DefIn, Dom) of
-        [] ->
-            new_def_locations(Vs, Defs, Dom);
-        [_|_]=BetterDef ->
-            L = most_dominated(BetterDef, Dom),
-            [{V,L}|new_def_locations(Vs, Defs, Dom)]
-    end;
-new_def_locations([], _, _) -> [].
-
-common_dom([L|Ls], DefIn, Dom) ->
-    DomBy0 = maps:get(L, Dom),
-    DomBy = ordsets:subtract(DomBy0, maps:get(DefIn, Dom)),
-    common_dom_1(Ls, Dom, DomBy).
-
-common_dom_1(_, _, []) ->
-    [];
-common_dom_1([L|Ls], Dom, [_|_]=DomBy0) ->
-    DomBy1 = maps:get(L, Dom),
-    DomBy = ordsets:intersection(DomBy0, DomBy1),
-    common_dom_1(Ls, Dom, DomBy);
-common_dom_1([], _, DomBy) -> DomBy.
-
-most_dominated([L|Ls], Dom) ->
-    most_dominated(Ls, L, maps:get(L, Dom), Dom).
-
-most_dominated([L|Ls], L0, DomBy, Dom) ->
-    case member(L, DomBy) of
+%% new_def_locations([{Variable,[UsedInBlock]}|Vs], Defs,
+%%                   Dominators, Numbering, Unsuitable) ->
+%%  [{Variable,NewDefinitionBlock}]
+%%
+%%  Calculate new locations for get_tuple_element instructions. For
+%%  each variable, the new location is a block that dominates all uses
+%%  of the variable and as near to the uses of as possible.
+
+new_def_locations([{V,UsedIn}|Vs], Defs, Dom, Numbering, Unsuitable) ->
+    DefIn = map_get(V, Defs),
+    Common = common_dominator(UsedIn, Dom, Numbering, Unsuitable),
+    case member(Common, map_get(DefIn, Dom)) of
         true ->
-            most_dominated(Ls, L0, DomBy, Dom);
+            %% The common dominator is either DefIn or an
+            %% ancestor of DefIn.
+            new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable);
         false ->
-            most_dominated(Ls, L, maps:get(L, Dom), Dom)
+            %% We have found a suitable descendant of DefIn,
+            %% to which the get_tuple_element instruction can
+            %% be sunk.
+            [{V,Common}|new_def_locations(Vs, Defs, Dom, Numbering, Unsuitable)]
     end;
-most_dominated([], L, _, _) -> L.
+new_def_locations([], _, _, _, _) -> [].
 
+common_dominator(Ls0, Dom, Numbering, Unsuitable) ->
+    [Common|_] = beam_ssa:common_dominators(Ls0, Dom, Numbering),
+    case gb_sets:is_member(Common, Unsuitable) of
+        true ->
+            %% It is not allowed to place the instruction here. Try
+            %% to find another suitable dominating block by going up
+            %% one step in the dominator tree.
+            [Common,OneUp|_] = map_get(Common, Dom),
+            common_dominator([OneUp], Dom, Numbering, Unsuitable);
+        false ->
+            Common
+    end.
 
 %% Move get_tuple_element instructions to their new locations.
 
@@ -2206,7 +2166,6 @@ insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
     Action0 = case Op of
                   call -> beyond;
                   'catch_end' -> beyond;
-                  set_tuple_element -> beyond;
                   timeout -> beyond;
                   _ -> here
               end,
@@ -2231,6 +2190,46 @@ insert_def_is([#b_set{op=Op}=I|Is]=Is0, V, Def) ->
 insert_def_is([], _V, Def) ->
     [Def].
 
+%%%
+%%% Order consecutive get_tuple_element instructions in ascending
+%%% position order. This will give the loader more opportunities
+%%% for combining get_tuple_element instructions.
+%%%
+
+ssa_opt_get_tuple_element({#st{ssa=Blocks0}=St, FuncDb}) ->
+    Blocks = opt_get_tuple_element(maps:to_list(Blocks0), Blocks0),
+    {St#st{ssa=Blocks}, FuncDb}.
+
+opt_get_tuple_element([{L,#b_blk{is=Is0}=Blk0}|Bs], Blocks) ->
+    case opt_get_tuple_element_is(Is0, false, []) of
+        {yes,Is} ->
+            Blk = Blk0#b_blk{is=Is},
+            opt_get_tuple_element(Bs, Blocks#{L:=Blk});
+        no ->
+            opt_get_tuple_element(Bs, Blocks)
+    end;
+opt_get_tuple_element([], Blocks) -> Blocks.
+
+opt_get_tuple_element_is([#b_set{op=get_tuple_element,
+                                 args=[#b_var{}=Src,_]}=I0|Is0],
+                         _AnyChange, Acc) ->
+    {GetIs0,Is} = collect_get_tuple_element(Is0, Src, [I0]),
+    GetIs1 = sort([{Pos,I} || #b_set{args=[_,Pos]}=I <- GetIs0]),
+    GetIs = [I || {_,I} <- GetIs1],
+    opt_get_tuple_element_is(Is, true, reverse(GetIs, Acc));
+opt_get_tuple_element_is([I|Is], AnyChange, Acc) ->
+    opt_get_tuple_element_is(Is, AnyChange, [I|Acc]);
+opt_get_tuple_element_is([], AnyChange, Acc) ->
+    case AnyChange of
+        true -> {yes,reverse(Acc)};
+        false -> no
+    end.
+
+collect_get_tuple_element([#b_set{op=get_tuple_element,
+                                  args=[Src,_]}=I|Is], Src, Acc) ->
+    collect_get_tuple_element(Is, Src, [I|Acc]);
+collect_get_tuple_element(Is, _Src, Acc) ->
+    {Acc,Is}.
 
 %%%
 %%% Common utilities.