21 files changed, 969 insertions, 950 deletions
diff --git a/lib/compiler/src/Makefile b/lib/compiler/src/Makefile
index 26ae6566e6..d475e5a19a 100644
--- a/lib/compiler/src/Makefile
+++ b/lib/compiler/src/Makefile
@@ -63,6 +63,7 @@ MODULES =  \
 	beam_ssa_bsm \
 	beam_ssa_codegen \
 	beam_ssa_dead \
+	beam_ssa_funs \
 	beam_ssa_lint \
 	beam_ssa_opt \
 	beam_ssa_pp \
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index 0abc845310..dd2537a699 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -59,6 +59,9 @@ rename_instrs([{test,is_eq_exact,_,[Dst,Src]}=Test,
 rename_instrs([{test,is_eq_exact,_,[Same,Same]}|Is]) ->
     %% Same literal or same register. Will always succeed.
     rename_instrs(Is);
+rename_instrs([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_},{label,Fail}|Is]) ->
+    %% This instruction sequence does nothing.
+    rename_instrs(Is);
 rename_instrs([{apply_last,A,N}|Is]) ->
     [{apply,A},{deallocate,N},return|rename_instrs(Is)];
 rename_instrs([{call_last,A,F,N}|Is]) ->
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index d28c0fd9e4..9d8d5b2b0c 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -49,9 +49,6 @@ function({function,Name,Arity,CLabel,Is0}) ->
 blockify(Is) ->
     blockify(Is, []).
 
-blockify([{loop_rec,{f,Fail},{x,0}},{loop_rec_end,_Lbl},{label,Fail}|Is], Acc) ->
-    %% Useless instruction sequence.
-    blockify(Is, Acc);
 blockify([I|Is0]=IsAll, Acc) ->
     case collect(I) of
 	error -> blockify(Is0, [I|Acc]);
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index f5f0ac2218..7299654476 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -23,17 +23,15 @@
 
 -export([module/2]).
 -export([clean_labels/1]).
--import(lists, [foldl/3]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
 
 module({Mod,Exp,Attr,Fs0,_}, Opts) ->
     Order = [Lbl || {function,_,_,Lbl,_} <- Fs0],
-    All = foldl(fun({function,_,_,Lbl,_}=Func,D) -> dict:store(Lbl, Func, D) end,
-		dict:new(), Fs0),
+    All = maps:from_list([{Lbl,Func} || {function,_,_,Lbl,_}=Func <- Fs0]),
     WorkList = rootset(Fs0, Exp, Attr),
-    Used = find_all_used(WorkList, All, sets:from_list(WorkList)),
+    Used = find_all_used(WorkList, All, cerl_sets:from_list(WorkList)),
     Fs1 = remove_unused(Order, Used, All),
     {Fs2,Lc} = clean_labels(Fs1),
     Fs = maybe_remove_lines(Fs2, Opts),
@@ -55,16 +53,16 @@ rootset(Fs, Root0, Attr) ->
 %% Remove the unused functions.
 
 remove_unused([F|Fs], Used, All) ->
-    case sets:is_element(F, Used) of
+    case cerl_sets:is_element(F, Used) of
 	false -> remove_unused(Fs, Used, All);
-	true -> [dict:fetch(F, All)|remove_unused(Fs, Used, All)]
+	true -> [map_get(F, All)|remove_unused(Fs, Used, All)]
     end;
 remove_unused([], _, _) -> [].
-	    
+
 %% Find all used functions.
 
 find_all_used([F|Fs0], All, Used0) ->
-    {function,_,_,_,Code} = dict:fetch(F, All),
+    {function,_,_,_,Code} = map_get(F, All),
     {Fs,Used} = update_work_list(Code, {Fs0,Used0}),
     find_all_used(Fs, All, Used);
 find_all_used([], _All, Used) -> Used.
@@ -78,9 +76,9 @@ update_work_list([_|Is], Sets) ->
 update_work_list([], Sets) -> Sets.
 
 add_to_work_list(F, {Fs,Used}=Sets) ->
-    case sets:is_element(F, Used) of
+    case cerl_sets:is_element(F, Used) of
 	true -> Sets;
-	false -> {[F|Fs],sets:add_element(F, Used)}
+	false -> {[F|Fs],cerl_sets:add_element(F, Used)}
     end.
 
 
diff --git a/lib/compiler/src/beam_except.erl b/lib/compiler/src/beam_except.erl
index 98831d87a7..49bfb5606f 100644
--- a/lib/compiler/src/beam_except.erl
+++ b/lib/compiler/src/beam_except.erl
@@ -31,7 +31,7 @@
 %%% erlang:error(function_clause, Args)  => jump FuncInfoLabel
 %%%
 
--import(lists, [reverse/1,seq/2]).
+-import(lists, [reverse/1,seq/2,splitwith/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -74,13 +74,13 @@ translate([I|Is], St, Acc) ->
 translate([], _, Acc) ->
     reverse(Acc).
 
-translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) ->
-    case dig_out(Ar, Acc1) of
+translate_1(Ar, I, Is, #st{arity=Arity}=St, [{line,_}=Line|Acc1]=Acc0) ->
+    case dig_out(Ar, Arity, Acc1) of
 	no ->
 	    translate(Is, St, [I|Acc0]);
-	{yes,{function_clause,Arity},Acc2} ->
+	{yes,function_clause,Acc2} ->
 	    case {Line,St} of
-		{{line,Loc},#st{lbl=Fi,loc=Loc,arity=Arity}} ->
+		{{line,Loc},#st{lbl=Fi,loc=Loc}} ->
 		    Instr = {jump,{f,Fi}},
 		    translate(Is, St, [Instr|Acc2]);
 		{_,_} ->
@@ -92,9 +92,13 @@ translate_1(Ar, I, Is, St, [{line,_}=Line|Acc1]=Acc0) ->
 	    translate(Is, St, [Instr,Line|Acc2])
     end.
 
-dig_out(Ar, [{kill,_}|Is]) ->
-    dig_out(Ar, Is);
-dig_out(1, [{block,Bl0}|Is]) ->
+dig_out(1, _Arity, Is) ->
+    dig_out(Is);
+dig_out(2, Arity, Is) ->
+    dig_out_fc(Arity, Is);
+dig_out(_, _, _) -> no.
+
+dig_out([{block,Bl0}|Is]) ->
     case dig_out_block(reverse(Bl0)) of
 	no -> no;
 	{yes,What,[]} ->
@@ -102,12 +106,7 @@ dig_out(1, [{block,Bl0}|Is]) ->
 	{yes,What,Bl} ->
 	    {yes,What,[{block,Bl}|Is]}
     end;
-dig_out(2, [{block,Bl}|Is]) ->
-    case dig_out_block_fc(Bl) of
-	no -> no;
-	{yes,What} -> {yes,What,Is}
-    end;
-dig_out(_, _) -> no.
+dig_out(_) -> no.
 
 dig_out_block([{set,[{x,0}],[{atom,if_clause}],move}]) ->
     {yes,if_end,[]};
@@ -141,33 +140,64 @@ fix_block_1([{set,[],[],{alloc,Live,{F1,F2,Needed0,F3}}}|Is], Words) ->
 fix_block_1([I|Is], Words) ->
     [I|fix_block_1(Is, Words)].
 
-dig_out_block_fc([{set,[],[],{alloc,Live,_}}|Bl]) ->
-    Regs = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Live-1)]),
-    dig_out_fc(Bl, Regs);
-dig_out_block_fc(_) -> no.
 
-dig_out_fc([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) ->
+dig_out_fc(Arity, Is0) ->
+    Regs0 = maps:from_list([{{x,X},{arg,X}} || X <- seq(0, Arity-1)]),
+    {Is,Acc0} = splitwith(fun({label,_}) -> false;
+                             ({test,_,_,_}) -> false;
+                             (_) -> true
+                          end, Is0),
+    {Regs,Acc} = dig_out_fc_1(reverse(Is), Regs0, Acc0),
+    case is_fc(Arity, Regs) of
+        true ->
+            {yes,function_clause,Acc};
+        false ->
+            no
+    end.
+
+dig_out_fc_1([{block,Bl}|Is], Regs0, Acc) ->
+    Regs = dig_out_fc_block(Bl, Regs0),
+    dig_out_fc_1(Is, Regs, Acc);
+dig_out_fc_1([{bs_set_position,_,_}=I|Is], Regs, Acc) ->
+    dig_out_fc_1(Is, Regs, [I|Acc]);
+dig_out_fc_1([{bs_get_tail,_,_,Live}=I|Is], Regs0, Acc) ->
+    Regs = prune_xregs(Live, Regs0),
+    dig_out_fc_1(Is, Regs, [I|Acc]);
+dig_out_fc_1([_|_], _Regs, _Acc) ->
+    {#{},[]};
+dig_out_fc_1([], Regs, Acc) ->
+    {Regs,Acc}.
+
+dig_out_fc_block([{set,[],[],{alloc,Live,_}}|Is], Regs0) ->
+    Regs = prune_xregs(Live, Regs0),
+    dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,[Dst],[Hd,Tl],put_list}|Is], Regs0) ->
     Regs = Regs0#{Dst=>{cons,get_reg(Hd, Regs0),get_reg(Tl, Regs0)}},
-    dig_out_fc(Is, Regs);
-dig_out_fc([{set,[Dst],[Src],move}|Is], Regs0) ->
+    dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,[Dst],[Src],move}|Is], Regs0) ->
     Regs = Regs0#{Dst=>get_reg(Src, Regs0)},
-    dig_out_fc(Is, Regs);
-dig_out_fc([{set,_,_,_}|_], _Regs) ->
-    %% Unknown instruction. It is not a function_clause error.
-    no;
-dig_out_fc([], Regs) ->
+    dig_out_fc_block(Is, Regs);
+dig_out_fc_block([{set,_,_,_}|_], _Regs) ->
+    %% Unknown instruction. Fail.
+    #{};
+dig_out_fc_block([], Regs) -> Regs.
+
+prune_xregs(Live, Regs) ->
+    maps:filter(fun({x,X}, _) -> X < Live end, Regs).
+
+is_fc(Arity, Regs) ->
     case Regs of
         #{{x,0}:={atom,function_clause},{x,1}:=Args} ->
-            dig_out_fc_1(Args, 0);
+            is_fc_1(Args, 0) =:= Arity;
         #{} ->
-            no
+            false
     end.
 
-dig_out_fc_1({cons,{arg,I},T}, I) ->
-    dig_out_fc_1(T, I+1);
-dig_out_fc_1(nil, I) ->
-    {yes,{function_clause,I}};
-dig_out_fc_1(_, _) -> no.
+is_fc_1({cons,{arg,I},T}, I) ->
+    is_fc_1(T, I+1);
+is_fc_1(nil, I) ->
+    I;
+is_fc_1(_, _) -> -1.
 
 get_reg(R, Regs) ->
     case Regs of
diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl
index 973d16a1bc..3e6bc1b1ed 100644
--- a/lib/compiler/src/beam_flatten.erl
+++ b/lib/compiler/src/beam_flatten.erl
@@ -32,8 +32,7 @@ module({Mod,Exp,Attr,Fs,Lc}, _Opt) ->
     {ok,{Mod,Exp,Attr,[function(F) || F <- Fs],Lc}}.
 
 function({function,Name,Arity,CLabel,Is0}) ->
-    Is1 = block(Is0),
-    Is = opt(Is1),
+    Is = block(Is0),
     {function,Name,Arity,CLabel,Is}.
 
 block(Is) ->
@@ -43,21 +42,12 @@ block([{block,Is0}|Is1], Acc) -> block(Is1, norm_block(Is0, Acc));
 block([I|Is], Acc) -> block(Is, [I|Acc]);
 block([], Acc) -> reverse(Acc).
 
-norm_block([{set,[],[],{alloc,R,{_,nostack,_,_}=Alloc}}|Is], Acc0) ->
-    case insert_alloc_in_bs_init(Acc0, Alloc) of
-	impossible ->
-	    norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0));
-	Acc ->
-	    norm_block(Is, Acc)
-    end;
 norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) ->
     norm_block(Is, reverse(norm_allocate(Alloc, R), Acc0));
-norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) ->
-    I = {get_list,S,D1,D2},
-    norm_block(Is, [I|Acc]);
-norm_block([I|Is], Acc) -> norm_block(Is, [norm(I)|Acc]);
+norm_block([I|Is], Acc) ->
+    norm_block(Is, [norm(I)|Acc]);
 norm_block([], Acc) -> Acc.
-    
+
 norm({set,[D],As,{bif,N,F}})      -> {bif,N,F,As,D};
 norm({set,[D],As,{alloc,R,{gc_bif,N,F}}}) -> {gc_bif,N,F,R,As,D};
 norm({set,[D],[],init})           -> {init,D};
@@ -91,57 +81,3 @@ norm_allocate({nozero,Ns,0,Inits}, Regs) ->
     [{allocate,Ns,Regs}|Inits];
 norm_allocate({nozero,Ns,Nh,Inits}, Regs) ->
     [{allocate_heap,Ns,Nh,Regs}|Inits].
-
-%% insert_alloc_in_bs_init(ReverseInstructionStream, AllocationInfo) ->
-%%                                  impossible | ReverseInstructionStream'
-%%   A bs_init/6 instruction should not be followed by a test heap instruction.
-%%   Given the AllocationInfo from a test heap instruction, merge the
-%%   allocation amounts into the previous bs_init/6 instruction (if any).
-%%
-insert_alloc_in_bs_init([{bs_put,_,_,_}=I|Is], Alloc) ->
-    %% The instruction sequence ends with an bs_put/4 instruction.
-    %% We'll need to search backwards for the bs_init/6 instruction.
-    insert_alloc_1(Is, Alloc, [I]);
-insert_alloc_in_bs_init(_, _) -> impossible.
-
-insert_alloc_1([{bs_init=Op,Fail,Info0,Live,Ss,Dst}|Is],
-	       {_,nostack,Ws2,[]}, Acc) when is_integer(Live) ->
-    %% The number of extra heap words is always in the second position
-    %% in the Info tuple.
-    Ws1 = element(2, Info0),
-    Al = beam_utils:combine_heap_needs(Ws1, Ws2),
-    Info = setelement(2, Info0, Al),
-    I = {Op,Fail,Info,Live,Ss,Dst},
-    reverse(Acc, [I|Is]);
-insert_alloc_1([{bs_put,_,_,_}=I|Is], Alloc, Acc) ->
-    insert_alloc_1(Is, Alloc, [I|Acc]).
-
-%% opt(Is0) -> Is
-%%  Simple peep-hole optimization to move a {move,Any,{x,0}} past
-%%  any kill up to the next call instruction. (To give the loader
-%%  an opportunity to combine the 'move' and the 'call' instructions.)
-%%
-opt(Is) ->
-    opt_1(Is, []).
-
-opt_1([{move,_,{x,0}}=I|Is0], Acc0) ->
-    case move_past_kill(Is0, I, Acc0) of
-	impossible -> opt_1(Is0, [I|Acc0]);
-	{Is,Acc} -> opt_1(Is, Acc)
-    end;
-opt_1([I|Is], Acc) ->
-    opt_1(Is, [I|Acc]);
-opt_1([], Acc) -> reverse(Acc).
-
-move_past_kill([{kill,Src}|_], {move,Src,_}, _) ->
-    impossible;
-move_past_kill([{kill,_}=I|Is], Move, Acc) ->
-    move_past_kill(Is, Move, [I|Acc]);
-move_past_kill([{trim,N,_}=I|Is], {move,Src,Dst}=Move, Acc) ->
-    case Src of
-	{y,Y} when Y < N-> impossible;
-	{y,Y} -> {Is,[{move,{y,Y-N},Dst},I|Acc]};
-	_ -> {Is,[Move,I|Acc]}
-    end;
-move_past_kill(Is, Move, Acc) ->
-    {Is,[Move|Acc]}.
diff --git a/lib/compiler/src/beam_jump.erl b/lib/compiler/src/beam_jump.erl
index fbff4cfd79..d3a618d211 100644
--- a/lib/compiler/src/beam_jump.erl
+++ b/lib/compiler/src/beam_jump.erl
@@ -144,13 +144,19 @@ module({Mod,Exp,Attr,Fs0,Lc0}, _Opt) ->
 %%
 %%  NOTE: This function assumes that there are no labels inside blocks.
 function({function,Name,Arity,CLabel,Asm0}, Lc0) ->
-    Asm1 = eliminate_moves(Asm0),
-    {Asm2,Lc} = insert_labels(Asm1, Lc0, []),
-    Asm3 = share(Asm2),
-    Asm4 = move(Asm3),
-    Asm5 = opt(Asm4, CLabel),
-    Asm = remove_unused_labels(Asm5),
-    {{function,Name,Arity,CLabel,Asm},Lc}.
+    try
+        Asm1 = eliminate_moves(Asm0),
+        {Asm2,Lc} = insert_labels(Asm1, Lc0, []),
+        Asm3 = share(Asm2),
+        Asm4 = move(Asm3),
+        Asm5 = opt(Asm4, CLabel),
+        Asm = remove_unused_labels(Asm5),
+        {{function,Name,Arity,CLabel,Asm},Lc}
+    catch
+        Class:Error:Stack ->
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
 
 %%%
 %%% Scan instructions in execution order and remove redundant 'move'
@@ -196,22 +202,19 @@ no_fallthrough([I|_]) ->
     is_unreachable_after(I).
 
 already_has_value(Lit, Lbl, Reg, D) ->
-    Key = {Lbl,Reg},
     case D of
-        #{Lbl:=unsafe} ->
-            false;
-        #{Key:=Lit} ->
+        #{Lbl:={Reg,Lit}} ->
             true;
         #{} ->
             false
     end.
 
 update_value_dict([Lit,{f,Lbl}|T], Reg, D0) ->
-    Key = {Lbl,Reg},
     D = case D0 of
-            #{Key := inconsistent} -> D0;
-            #{Key := _} -> D0#{Key := inconsistent};
-            _ -> D0#{Key => Lit}
+            #{Lbl:=unsafe} -> D0;
+            #{Lbl:={Reg,Lit}} -> D0;
+            #{Lbl:=_} -> D0#{Lbl:=unsafe};
+            #{} -> D0#{Lbl=>{Reg,Lit}}
         end,
     update_value_dict(T, Reg, D);
 update_value_dict([], _, D) -> D.
@@ -407,7 +410,7 @@ find_fixpoint(OptFun, Is0) ->
 	Is -> find_fixpoint(OptFun, Is)
     end.
 
-opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) ->
+opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc, St) ->
     %% We have
     %%    Test Label Ops
     %%    jump Label
@@ -416,23 +419,20 @@ opt([{test,_,{f,L}=Lbl,_}=I|[{jump,{f,L}}|_]=Is], Acc0, St0) ->
     case beam_utils:is_pure_test(I) of
 	false ->
 	    %% Test is not pure; we must keep it.
-	    opt(Is, [I|Acc0], label_used(Lbl, St0));
+	    opt(Is, [I|Acc], label_used(Lbl, St));
 	true ->
 	    %% The test is pure and its failure label is the same
 	    %% as in the jump that follows -- thus it is not needed.
-            %% Check if any of the previous instructions could also be eliminated.
-            {Acc,St} = opt_useless_loads(Acc0, L, St0),
 	    opt(Is, Acc, St)
     end;
-opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc0, St0) ->
+opt([{test,_,{f,L}=Lbl,_}=I|[{label,L}|_]=Is], Acc, St) ->
     %% Similar to the above, except we have a fall-through rather than jump
     %%    Test Label Ops
     %%    label Label
     case beam_utils:is_pure_test(I) of
 	false ->
-	    opt(Is, [I|Acc0], label_used(Lbl, St0));
+	    opt(Is, [I|Acc], label_used(Lbl, St));
 	true ->
-            {Acc,St} = opt_useless_loads(Acc0, L, St0),
 	    opt(Is, Acc, St)
     end;
 opt([{test,Test0,{f,L}=Lbl,Ops}=I|[{jump,To}|Is]=Is0], Acc, St) ->
@@ -499,46 +499,6 @@ normalize_replace([{From,To0}|Rest], Replace, Acc) ->
 normalize_replace([], _Replace, Acc) ->
     maps:from_list(Acc).
 
-%% After eliminating a test, it might happen, that a register was only used
-%% in this test. Let's check if that was the case and if it was so, we can
-%% eliminate the load into the register completely.
-opt_useless_loads([{block,_}|_]=Is, L, #st{index={lazy,FIs}}=St) ->
-    opt_useless_loads(Is, L, St#st{index=beam_utils:index_labels(FIs)});
-opt_useless_loads([{block,Block0}|Is], L, #st{index=Index}=St) ->
-    case opt_useless_block_loads(Block0, L, Index) of
-        [] ->
-            opt_useless_loads(Is, L, St);
-        [_|_]=Block ->
-            {[{block,Block}|Is],St}
-    end;
-%% After eliminating the test and useless blocks, it might happen,
-%% that the previous test could also be eliminated.
-%% It might be that the label was already marked as used, even if ultimately,
-%% it never will be - we can't do much about it at that point, though
-opt_useless_loads([{test,_,{f,L},_}=I|Is], L, St) ->
-    case beam_utils:is_pure_test(I) of
-        false ->
-            {[I|Is],St};
-        true ->
-            opt_useless_loads(Is, L, St)
-    end;
-opt_useless_loads(Is, _L, St) ->
-    {Is,St}.
-
-opt_useless_block_loads([{set,[Dst],_,_}=I|Is], L, Index) ->
-    BlockJump = [{block,Is},{jump,{f,L}}],
-    case beam_utils:is_killed(Dst, BlockJump, Index) of
-        true ->
-            %% The register is killed and not used, we can remove the load
-            opt_useless_block_loads(Is, L, Index);
-        false ->
-            [I|opt_useless_block_loads(Is, L, Index)]
-    end;
-opt_useless_block_loads([I|Is], L, Index) ->
-    [I|opt_useless_block_loads(Is, L, Index)];
-opt_useless_block_loads([], _L, _Index) ->
-    [].
-
 collect_labels(Is, Label, #st{entry=Entry,replace=Replace} = St) ->
     collect_labels_1(Is, Label, Entry, Replace, St).
 
diff --git a/lib/compiler/src/beam_listing.erl b/lib/compiler/src/beam_listing.erl
index 8a0ce5b50a..6121593b11 100644
--- a/lib/compiler/src/beam_listing.erl
+++ b/lib/compiler/src/beam_listing.erl
@@ -66,7 +66,7 @@ module(Stream, [_|_]=Fs) ->
     foreach(fun (F) -> io:format(Stream, "~p.\n", [F]) end, Fs).
 
 format_asm([{label,L}|Is]) ->
-    ["  {label,",integer_to_list(L),"}.\n"|format_asm(Is)];
+    [io_lib:format("  {label,~p}.\n", [L])|format_asm(Is)];
 format_asm([I|Is]) ->
     [io_lib:format("    ~p", [I]),".\n"|format_asm(Is)];
 format_asm([]) -> [].
diff --git a/lib/compiler/src/beam_ssa.erl b/lib/compiler/src/beam_ssa.erl
index 1a2e759965..c5e23d2ae0 100644
--- a/lib/compiler/src/beam_ssa.erl
+++ b/lib/compiler/src/beam_ssa.erl
@@ -198,6 +198,7 @@ no_side_effect(#b_set{op=Op}) ->
         has_map_field -> true;
         is_nonempty_list -> true;
         is_tagged_tuple -> true;
+        make_fun -> true;
         put_map -> true;
         put_list -> true;
         put_tuple -> true;
@@ -578,11 +579,11 @@ used(_) -> [].
 
 -spec definitions(Blocks :: block_map()) -> definition_map().
 definitions(Blocks) ->
-    beam_ssa:fold_instrs_rpo(fun(#b_set{ dst = Var }=I, Acc) ->
-                                     maps:put(Var, I, Acc);
-                                (_Terminator, Acc) ->
-                                     Acc
-                             end, [0], #{}, Blocks).
+    fold_instrs_rpo(fun(#b_set{ dst = Var }=I, Acc) ->
+                            maps:put(Var, I, Acc);
+                       (_Terminator, Acc) ->
+                            Acc
+                    end, [0], #{}, Blocks).
 
 -spec uses(Blocks :: block_map()) -> usage_map().
 uses(Blocks) ->
@@ -592,7 +593,7 @@ uses(Blocks) ->
       From :: [label()],
       Blocks :: block_map().
 uses(From, Blocks) ->
-    beam_ssa:fold_rpo(fun fold_uses_block/3, From, #{}, Blocks).
+    fold_rpo(fun fold_uses_block/3, From, #{}, Blocks).
 
 fold_uses_block(Lbl, #b_blk{is=Is,last=Last}, UseMap0) ->
     F = fun(I, UseMap) ->
@@ -600,7 +601,7 @@ fold_uses_block(Lbl, #b_blk{is=Is,last=Last}, UseMap0) ->
                               Uses0 = maps:get(Var, Acc, []),
                               Uses = [{Lbl, I} | Uses0],
                               maps:put(Var, Uses, Acc)
-                      end, UseMap, beam_ssa:used(I))
+                      end, UseMap, used(I))
         end,
     F(Last, foldl(F, UseMap0, Is)).
 
@@ -797,8 +798,8 @@ split_blocks_1([L|Ls], P, Blocks0, Count0) ->
             BefBlk = Blk#b_blk{is=Bef,last=Br},
             NewBlk = Blk#b_blk{is=Aft},
             Blocks1 = Blocks0#{L:=BefBlk,NewLbl=>NewBlk},
-            Successors = beam_ssa:successors(NewBlk),
-            Blocks = beam_ssa:update_phi_labels(Successors, L, NewLbl, Blocks1),
+            Successors = successors(NewBlk),
+            Blocks = update_phi_labels(Successors, L, NewLbl, Blocks1),
             split_blocks_1([NewLbl|Ls], P, Blocks, Count);
         no ->
             split_blocks_1(Ls, P, Blocks0, Count0)
diff --git a/lib/compiler/src/beam_ssa_bsm.erl b/lib/compiler/src/beam_ssa_bsm.erl
index 2efeb6b5b6..9631bf3334 100644
--- a/lib/compiler/src/beam_ssa_bsm.erl
+++ b/lib/compiler/src/beam_ssa_bsm.erl
@@ -354,6 +354,11 @@ amb_get_alias(#b_var{}=Arg, Lbl, State) ->
         error ->
             {Arg, State}
     end;
+amb_get_alias(#b_remote{mod=Mod0,name=Name0}=Arg0, Lbl, State0) ->
+    {Mod, State1} = amb_get_alias(Mod0, Lbl, State0),
+    {Name, State} = amb_get_alias(Name0, Lbl, State1),
+    Arg = Arg0#b_remote{mod=Mod,name=Name},
+    {Arg, State};
 amb_get_alias(Arg, _Lbl, State) ->
     {Arg, State}.
 
@@ -391,15 +396,22 @@ amb_insert_promotions(Blocks0, State) ->
                 Block = maps:get(Lbl, Blocks),
 
                 Alias = Promotion#b_set.dst,
-                {Before, After} = splitwith(fun(#b_set{args=Args}) ->
-                                                    not member(Alias, Args)
-                                            end, Block#b_blk.is),
+                {Before, After} = splitwith(
+                                    fun(#b_set{args=Args}) ->
+                                            not is_var_in_args(Alias, Args)
+                                    end, Block#b_blk.is),
                 Is = Before ++ [Promotion | After],
 
                 maps:put(Lbl, Block#b_blk{is=Is}, Blocks)
         end,
     maps:fold(F, Blocks0, State#amb.promotions).
 
+is_var_in_args(Var, [Var | _]) -> true;
+is_var_in_args(Var, [#b_remote{name=Var} | _]) -> true;
+is_var_in_args(Var, [#b_remote{mod=Var} | _]) -> true;
+is_var_in_args(Var, [_ | Args]) -> is_var_in_args(Var, Args);
+is_var_in_args(_Var, []) -> false.
+
 %%%
 %%% Subpasses
 %%%
@@ -732,6 +744,10 @@ aca_cs_args([Arg | Args], VRs) ->
 aca_cs_args([], _VRs) ->
     [].
 
+aca_cs_arg(#b_remote{mod=Mod0,name=Name0}=Rem, VRs) ->
+    Mod = aca_cs_arg(Mod0, VRs),
+    Name = aca_cs_arg(Name0, VRs),
+    Rem#b_remote{mod=Mod,name=Name};
 aca_cs_arg(Arg, VRs) ->
     case VRs of
         #{ Arg := New } -> New;
diff --git a/lib/compiler/src/beam_ssa_codegen.erl b/lib/compiler/src/beam_ssa_codegen.erl
index 1c7563faa0..3c14062d0b 100644
--- a/lib/compiler/src/beam_ssa_codegen.erl
+++ b/lib/compiler/src/beam_ssa_codegen.erl
@@ -231,7 +231,7 @@ need_heap_never(_) -> false.
 
 need_heap_blks([{L,#cg_blk{is=Is0}=Blk0}|Bs], H0, Acc) ->
     {Is1,H1} = need_heap_is(reverse(Is0), H0, []),
-    {Ns,H} = need_heap_terminator(Bs, H1),
+    {Ns,H} = need_heap_terminator(Bs, L, H1),
     Is = Ns ++ Is1,
     Blk = Blk0#cg_blk{is=Is},
     need_heap_blks(Bs, H, [{L,Blk}|Acc]);
@@ -241,6 +241,13 @@ need_heap_blks([], H, Acc) ->
 need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) ->
     Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)},
     need_heap_is(Is, #need{}, [Alloc|Acc]);
+need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) ->
+    Alloc = case need_heap_need(N) of
+                [#cg_alloc{words=Need}] -> alloc(Need);
+                [] -> 0
+            end,
+    I = I0#cg_set{anno=Anno#{alloc=>Alloc}},
+    need_heap_is(Is, #need{}, [I|Acc]);
 need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
     case classify_heap_need(Op, Args) of
         {put,Words} ->
@@ -256,11 +263,31 @@ need_heap_is([#cg_set{op=Op,args=Args}=I|Is], N, Acc) ->
 need_heap_is([], N, Acc) ->
     {Acc,N}.
 
-need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=Same,fail=Same}}}|_], N) ->
+need_heap_terminator([{_,#cg_blk{last=#cg_br{succ=L,fail=L}}}|_], L, N) ->
+    %% Fallthrough.
     {[],N};
-need_heap_terminator([{_,#cg_blk{}}|_], N) ->
+need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) ->
+    case need_heap_need(N) of
+        [] ->
+            {[],#need{}};
+        [_|_]=Alloc ->
+            %% If the preceding instructions are a binary construction,
+            %% hoist the allocation and incorporate into the bs_init
+            %% instruction.
+            case reverse(Is) of
+                [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] ->
+                    {[],N};
+                [#cg_set{op=bs_put}|_] ->
+                    {[],N};
+                _ ->
+                    %% Not binary construction. Must emit an allocation
+                    %% instruction in this block.
+                    {Alloc,#need{}}
+            end
+    end;
+need_heap_terminator([{_,#cg_blk{}}|_], _, N) ->
     {need_heap_need(N),#need{}};
-need_heap_terminator([], H) ->
+need_heap_terminator([], _, H) ->
     {need_heap_need(H),#need{}}.
 
 need_heap_need(#need{h=0,f=0}) -> [];
@@ -1041,12 +1068,13 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
           #cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
     Fail = bif_fail(Fail0),
     Line = line(Anno),
+    Alloc = map_get(alloc, Anno),
     [#b_literal{val=Kind}|Args1] = Args0,
     case Kind of
         new ->
             [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Live = get_live(I),
-            {[Line|cg_bs_init(Dst, Size, Unit, Live, Fail)],St};
+            {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St};
         private_append ->
             [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Flags = {field_flags,[]},
@@ -1056,7 +1084,7 @@ cg_block([#cg_set{op=bs_init,dst=Dst0,args=Args0,anno=Anno}=I,
             [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
             Flags = {field_flags,[]},
             Live = get_live(I),
-            Is = [Line,{bs_append,Fail,Bits,0,Live,Unit,Src,Flags,Dst}],
+            Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}],
             {Is,St}
     end;
 cg_block([#cg_set{anno=Anno,op=bs_start_match,dst=Ctx0,args=[Bin0]}=I,
@@ -1204,6 +1232,12 @@ cg_copy_1([#cg_set{dst=Dst0,args=Args}|T], St) ->
     end;
 cg_copy_1([], _St) -> [].
 
+-define(IS_LITERAL(Val), (Val =:= nil orelse
+                          element(1, Val) =:= integer orelse
+                          element(1, Val) =:= float orelse
+                          element(1, Val) =:= atom orelse
+                          element(1, Val) =:= literal)).
+
 bif_to_test('and', [V1,V2], Fail) ->
     [{test,is_eq_exact,Fail,[V1,{atom,true}]},
      {test,is_eq_exact,Fail,[V2,{atom,true}]}];
@@ -1217,15 +1251,99 @@ bif_to_test('or', [V1,V2], {f,Lbl}=Fail) when Lbl =/= 0 ->
 bif_to_test('not', [Var], Fail) ->
     [{test,is_eq_exact,Fail,[Var,{atom,false}]}];
 bif_to_test(Name, Args, Fail) ->
-    [beam_utils:bif_to_test(Name, Args, Fail)].
+    [bif_to_test_1(Name, Args, Fail)].
+
+bif_to_test_1(is_atom,     [_]=Ops, Fail) ->
+    {test,is_atom,Fail,Ops};
+bif_to_test_1(is_boolean,  [_]=Ops, Fail) ->
+    {test,is_boolean,Fail,Ops};
+bif_to_test_1(is_binary,   [_]=Ops, Fail) ->
+    {test,is_binary,Fail,Ops};
+bif_to_test_1(is_bitstring,[_]=Ops, Fail) ->
+    {test,is_bitstr,Fail,Ops};
+bif_to_test_1(is_float,    [_]=Ops, Fail) ->
+    {test,is_float,Fail,Ops};
+bif_to_test_1(is_function, [_]=Ops, Fail) ->
+    {test,is_function,Fail,Ops};
+bif_to_test_1(is_function, [_,_]=Ops, Fail) ->
+    {test,is_function2,Fail,Ops};
+bif_to_test_1(is_integer,  [_]=Ops, Fail) ->
+    {test,is_integer,Fail,Ops};
+bif_to_test_1(is_list,     [_]=Ops, Fail) ->
+    {test,is_list,Fail,Ops};
+bif_to_test_1(is_map,      [_]=Ops, Fail) ->
+    {test,is_map,Fail,Ops};
+bif_to_test_1(is_number,   [_]=Ops, Fail) ->
+    {test,is_number,Fail,Ops};
+bif_to_test_1(is_pid,      [_]=Ops, Fail) ->
+    {test,is_pid,Fail,Ops};
+bif_to_test_1(is_port,     [_]=Ops, Fail) ->
+    {test,is_port,Fail,Ops};
+bif_to_test_1(is_reference, [_]=Ops, Fail) ->
+    {test,is_reference,Fail,Ops};
+bif_to_test_1(is_tuple,    [_]=Ops, Fail) ->
+    {test,is_tuple,Fail,Ops};
+bif_to_test_1('=<', [A,B], Fail) ->
+    {test,is_ge,Fail,[B,A]};
+bif_to_test_1('>', [A,B], Fail) ->
+    {test,is_lt,Fail,[B,A]};
+bif_to_test_1('<', [_,_]=Ops, Fail) ->
+    {test,is_lt,Fail,Ops};
+bif_to_test_1('>=', [_,_]=Ops, Fail) ->
+    {test,is_ge,Fail,Ops};
+bif_to_test_1('==', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_eq,Fail,[A,C]};
+bif_to_test_1('==', [_,_]=Ops, Fail) ->
+    {test,is_eq,Fail,Ops};
+bif_to_test_1('/=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_ne,Fail,[A,C]};
+bif_to_test_1('/=', [_,_]=Ops, Fail) ->
+    {test,is_ne,Fail,Ops};
+bif_to_test_1('=:=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_eq_exact,Fail,[A,C]};
+bif_to_test_1('=:=', [_,_]=Ops, Fail) ->
+    {test,is_eq_exact,Fail,Ops};
+bif_to_test_1('=/=', [C,A], Fail) when ?IS_LITERAL(C) ->
+    {test,is_ne_exact,Fail,[A,C]};
+bif_to_test_1('=/=', [_,_]=Ops, Fail) ->
+    {test,is_ne_exact,Fail,Ops}.
 
 opt_call_moves(Is0, Arity) ->
     {Moves0,Is} = splitwith(fun({move,_,_}) -> true;
+                               ({kill,_}) -> true;
                                (_) -> false
                             end, Is0),
     Moves = opt_call_moves_1(Moves0, Arity),
     Moves ++ Is.
 
+opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1|[{kill,_}|_]=Is], Arity) ->
+    %% There could be a {move,Tmp,{x,0}} instruction after the
+    %% kill/1 instructions (moved to there by opt_move_to_x0/1).
+    case splitwith(fun({kill,_}) -> true;
+                      (_) -> false
+                   end, Is) of
+        {Kills,[{move,{x,_}=Tmp,{x,0}}=M2]} ->
+            %% The two move/2 instructions (M1 and M2) can be combined
+            %% to one. The question is, though, is it safe to place
+            %% them after the kill/1 instructions?
+            case is_killed(Src, Kills, Arity) of
+                true ->
+                    %% Src (a Y register) is killed by one of the
+                    %% kill/1 instructions. Thus M1 and M2
+                    %% must be placed before the kill/1 instructions
+                    %% (essentially undoing what opt_move_to_x0/1
+                    %% did, which turned out to be a pessimization
+                    %% in this case).
+                    opt_call_moves_1([M1,M2|Kills], Arity);
+                false ->
+                    %% Src is not killed by any of the kill/1
+                    %% instructions. Thus it is safe to place
+                    %% M1 and M2 after the kill/1 instructions.
+                    opt_call_moves_1(Kills++[M1,M2], Arity)
+            end;
+        {_,_} ->
+            [M1|Is]
+    end;
 opt_call_moves_1([{move,Src,{x,_}=Tmp}=M1,{move,Tmp,Dst}=M2|Is], Arity) ->
     case is_killed(Tmp, Is, Arity) of
         true ->
@@ -1239,6 +1357,10 @@ opt_call_moves_1([M|Ms], Arity) ->
     [M|opt_call_moves_1(Ms, Arity)];
 opt_call_moves_1([], _Arity) -> [].
 
+is_killed(Y, [{kill,Y}|_], _) ->
+    true;
+is_killed(R, [{kill,_}|Is], Arity) ->
+    is_killed(R, Is, Arity);
 is_killed(R, [{move,R,_}|_], _) ->
     false;
 is_killed(R, [{move,_,R}|_], _) ->
@@ -1246,7 +1368,9 @@ is_killed(R, [{move,_,R}|_], _) ->
 is_killed(R, [{move,_,_}|Is], Arity) ->
     is_killed(R, Is, Arity);
 is_killed({x,X}, [], Arity) ->
-    X >= Arity.
+    X >= Arity;
+is_killed({y,_}, [], _) ->
+    false.
 
 cg_alloc(#cg_alloc{stack=none,words=#need{h=0,f=0}}, _St) ->
     [];
@@ -1527,13 +1651,13 @@ cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) ->
             [{Op,Fail,{field_flags,Flags},Src}]
     end.
 
-cg_bs_init(Dst, Size0, Unit, Live, Fail) ->
+cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) ->
     Op = case Unit of
              1 -> bs_init_bits;
              8 -> bs_init2
          end,
     Size = cg_bs_init_size(Size0),
-    [{Op,Fail,Size,0,Live,{field_flags,[]},Dst}].
+    [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}].
 
 cg_bs_init_size({x,_}=R) -> R;
 cg_bs_init_size({y,_}=R) -> R;
@@ -1652,12 +1776,41 @@ phi_copies([#b_set{dst=Dst,args=PhiArgs}|Sets], L) ->
     [#cg_set{op=copy,dst=Dst,args=CopyArgs}|phi_copies(Sets, L)];
 phi_copies([], _) -> [].
 
+%% opt_move_to_x0([Instruction]) -> [Instruction].
+%%  Simple peep-hole optimization to move a {move,Any,{x,0}} past
+%%  any kill up to the next call instruction. (To give the loader
+%%  an opportunity to combine the 'move' and the 'call' instructions.)
+
+opt_move_to_x0(Moves) ->
+    opt_move_to_x0(Moves, []).
+
+opt_move_to_x0([{move,_,{x,0}}=I|Is0], Acc0) ->
+    case move_past_kill(Is0, I, Acc0) of
+       impossible -> opt_move_to_x0(Is0, [I|Acc0]);
+       {Is,Acc} -> opt_move_to_x0(Is, Acc)
+    end;
+opt_move_to_x0([I|Is], Acc) ->
+    opt_move_to_x0(Is, [I|Acc]);
+opt_move_to_x0([], Acc) -> reverse(Acc).
+
+move_past_kill([{kill,Src}|_], {move,Src,_}, _) ->
+    impossible;
+move_past_kill([{kill,_}=I|Is], Move, Acc) ->
+    move_past_kill(Is, Move, [I|Acc]);
+move_past_kill(Is, Move, Acc) ->
+    {Is,[Move|Acc]}.
+
 %% setup_args(Args, Anno, Context) -> [Instruction].
 %% setup_args(Args) -> [Instruction].
 %%  Set up X registers for a call.
 
 setup_args(Args, Anno, none, St) ->
-    setup_args(Args) ++ kill_yregs(Anno, St);
+    case {setup_args(Args),kill_yregs(Anno, St)} of
+        {Moves,[]} ->
+            Moves;
+        {Moves,Kills} ->
+            opt_move_to_x0(Moves ++ Kills)
+    end;
 setup_args(Args, _, _, _) ->
     setup_args(Args).
 
diff --git a/lib/compiler/src/beam_ssa_dead.erl b/lib/compiler/src/beam_ssa_dead.erl
index c20652580d..067d9a6741 100644
--- a/lib/compiler/src/beam_ssa_dead.erl
+++ b/lib/compiler/src/beam_ssa_dead.erl
@@ -135,7 +135,8 @@ shortcut_terminator(Last, _Is, _Bs, _St) ->
     Last.
 
 shortcut_switch([{Lit,L0}|T], Bool, Bs, St0) ->
-    St = St0#st{rel_op=normalize_op({bif,'=:='}, [Bool,Lit])},
+    RelOp = {'=:=',Bool,Lit},
+    St = St0#st{rel_op=RelOp},
     #b_br{bool=#b_literal{val=true},succ=L} =
         shortcut(L0, bind_var(Bool, Lit, Bs), St#st{target=one_way}),
     [{Lit,L}|shortcut_switch(T, Bool, Bs, St0)];
@@ -388,41 +389,43 @@ eval_terminator(#b_switch{arg=Arg,fail=Fail,list=List}=Sw, Bs, St) ->
             %% Literal argument. Simplify to a `br`.
             beam_ssa:normalize(Sw#b_switch{arg=Val});
         #b_var{} ->
-            case St of
-                #st{rel_op=none} ->
-                    %% No previous relational operator is stored.
-                    %% Give up.
+            %% Try optimizing the switch.
+            case eval_switch(List, Arg, St, Fail) of
+                none ->
                     none;
-                #st{} ->
-                    %% There is a previous relational operator stored.
-                    %% Try optimizing the switch.
-                    case eval_switch(List, Arg, St, Fail) of
-                        none ->
-                            none;
-                        To when is_integer(To) ->
-                            %% Either one of the values in the switch
-                            %% matched a previous value in a '=:=' test, or
-                            %% none of the values matched a previous test.
-                            #b_br{bool=#b_literal{val=true},succ=To,fail=To}
-                    end
+                To when is_integer(To) ->
+                    %% Either one of the values in the switch
+                    %% matched a previous value in a '=:=' test, or
+                    %% none of the values matched a previous test.
+                    #b_br{bool=#b_literal{val=true},succ=To,fail=To}
             end
     end;
 eval_terminator(#b_ret{}, _Bs, _St) ->
     none.
 
-eval_switch([{Lit,Lbl}|T], Arg, St, Fail) ->
-    case eval_rel_op({bif,'=:='}, [Arg,Lit], St) of
-        none ->
-            %% This label could be reached.
-            eval_switch(T, Arg, St, none);
-        #b_literal{val=false} ->
-            %% This branch will never be taken.
-            eval_switch(T, Arg, St, Fail);
-        #b_literal{val=true} ->
+eval_switch(List, Arg, #st{rel_op={_,Arg,_}=PrevOp}, Fail) ->
+    %% There is a previous relational operator testing the same variable.
+    %% Optimization may be possible.
+    eval_switch_1(List, Arg, PrevOp, Fail);
+eval_switch(_, _, _, _) ->
+    %% There is either no previous relational operator, or it tests
+    %% a different variable. Nothing to optimize.
+    none.
+
+eval_switch_1([{Lit,Lbl}|T], Arg, PrevOp, Fail) ->
+    RelOp = {'=:=',Arg,Lit},
+    case will_succeed(PrevOp, RelOp) of
+        yes ->
             %% Success. This branch will always be taken.
-            Lbl
+            Lbl;
+        no ->
+            %% This branch will never be taken.
+            eval_switch_1(T, Arg, PrevOp, Fail);
+        maybe ->
+            %% This label could be reached.
+            eval_switch_1(T, Arg, PrevOp, none)
     end;
-eval_switch([], _Arg, _St, Fail) ->
+eval_switch_1([], _Arg, _PrevOp, Fail) ->
     %% Fail is now either the failure label or 'none'.
     Fail.
 
diff --git a/lib/compiler/src/beam_ssa_funs.erl b/lib/compiler/src/beam_ssa_funs.erl
new file mode 100644
index 0000000000..38df50fd74
--- /dev/null
+++ b/lib/compiler/src/beam_ssa_funs.erl
@@ -0,0 +1,149 @@
+%%
+%% %CopyrightBegin%
+%%
+%% Copyright Ericsson AB 2018. All Rights Reserved.
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+%%
+%% %CopyrightEnd%
+%%
+
+%%%
+%%% If a fun is defined locally and only used for calls, it can be replaced
+%%% with direct calls to the relevant function. This greatly speeds up "named
+%%% functions" (which rely on make_fun to recreate themselves) and macros that
+%%% wrap their body in a fun.
+%%%
+
+-module(beam_ssa_funs).
+
+-export([module/2]).
+
+-include("beam_ssa.hrl").
+
+-import(lists, [foldl/3]).
+
+-spec module(Module, Options) -> Result when
+      Module :: beam_ssa:b_module(),
+      Options :: [compile:option()],
+      Result :: {ok, beam_ssa:b_module()}.
+
+module(#b_module{body=Fs0}=Module, _Opts) ->
+    Trampolines = foldl(fun find_trampolines/2, #{}, Fs0),
+    Fs = [lfo(F, Trampolines) || F <- Fs0],
+    {ok, Module#b_module{body=Fs}}.
+
+%% If a function does absolutely nothing beyond calling another function with
+%% the same arguments in the same order, we can shave off a call by short-
+%% circuiting it.
+find_trampolines(#b_function{args=Args,bs=Blocks}=F, Trampolines) ->
+    case maps:get(0, Blocks) of
+        #b_blk{is=[#b_set{op=call,
+                          args=[#b_local{}=Actual | Args],
+                          dst=Dst}],
+               last=#b_ret{arg=Dst}} ->
+            {_, Name, Arity} = beam_ssa:get_anno(func_info, F),
+            Trampoline = #b_local{name=#b_literal{val=Name},arity=Arity},
+            maps:put(Trampoline, Actual, Trampolines);
+        _ ->
+            Trampolines
+    end.
+
+lfo(#b_function{bs=Blocks0}=F, Trampolines) ->
+    Linear0 = beam_ssa:linearize(Blocks0),
+    Linear = lfo_optimize(Linear0, lfo_analyze(Linear0, #{}), Trampolines),
+    F#b_function{bs=maps:from_list(Linear)}.
+
+%% Gather a map of the locally defined funs that are only used for calls.
+lfo_analyze([{_L,#b_blk{is=Is,last=Last}}|Bs], LFuns0) ->
+    LFuns = lfo_analyze_last(Last, lfo_analyze_is(Is, LFuns0)),
+    lfo_analyze(Bs, LFuns);
+lfo_analyze([], LFuns) ->
+    LFuns.
+
+lfo_analyze_is([#b_set{op=make_fun,
+                       dst=Dst,
+                       args=[#b_local{} | FreeVars]}=Def | Is],
+               LFuns0) ->
+    LFuns = maps:put(Dst, Def, maps:without(FreeVars, LFuns0)),
+    lfo_analyze_is(Is, LFuns);
+lfo_analyze_is([#b_set{op=call,
+                       args=[Fun | CallArgs]} | Is],
+               LFuns) when is_map_key(Fun, LFuns) ->
+    #b_set{args=[#b_local{arity=Arity} | FreeVars]} = maps:get(Fun, LFuns),
+    case length(CallArgs) + length(FreeVars) of
+        Arity ->
+            lfo_analyze_is(Is, maps:without(CallArgs, LFuns));
+        _ ->
+            %% This will `badarity` at runtime, and it's easier to disable the
+            %% optimization than to simulate it.
+            lfo_analyze_is(Is, maps:without([Fun | CallArgs], LFuns))
+    end;
+lfo_analyze_is([#b_set{args=Args} | Is], LFuns) when map_size(LFuns) =/= 0 ->
+    %% We disqualify funs that are used outside calls because this forces them
+    %% to be created anyway, and the slight performance gain from direct calls
+    %% is not enough to offset the potential increase in stack frame size (the
+    %% free variables need to be kept alive until the call).
+    %%
+    %% This is also a kludge to make HiPE work, as the latter will generate
+    %% code with the assumption that the functions referenced in a make_fun
+    %% will only be used by funs, which will not be the case if we mix it with
+    %% direct calls. See cerl_cconv.erl for details.
+    %%
+    %% Future optimizations like delaying fun creation until use may require us
+    %% to copy affected functions so that HiPE gets its own to play with (until
+    %% HiPE is fixed anyway).
+    lfo_analyze_is(Is, maps:without(Args, LFuns));
+lfo_analyze_is([_ | Is], LFuns) ->
+    lfo_analyze_is(Is, LFuns);
+lfo_analyze_is([], LFuns) ->
+    LFuns.
+
+lfo_analyze_last(#b_switch{arg=Arg}, LFuns) ->
+    maps:remove(Arg, LFuns);
+lfo_analyze_last(#b_ret{arg=Arg}, LFuns) ->
+    maps:remove(Arg, LFuns);
+lfo_analyze_last(_, LFuns) ->
+    LFuns.
+
+%% Replace all calls of suitable funs with a direct call to their
+%% implementation. Liveness optimization will get rid of the make_fun
+%% instruction.
+lfo_optimize(Linear, LFuns, _Trampolines) when map_size(LFuns) =:= 0 ->
+    Linear;
+lfo_optimize(Linear, LFuns, Trampolines) ->
+    lfo_optimize_1(Linear, LFuns, Trampolines).
+
+lfo_optimize_1([{L,#b_blk{is=Is0}=Blk}|Bs], LFuns, Trampolines) ->
+    Is = lfo_optimize_is(Is0, LFuns, Trampolines),
+    [{L,Blk#b_blk{is=Is}} | lfo_optimize_1(Bs, LFuns, Trampolines)];
+lfo_optimize_1([], _LFuns, _Trampolines) ->
+    [].
+
+lfo_optimize_is([#b_set{op=call,
+                        args=[Fun | CallArgs]}=Call0 | Is],
+                LFuns, Trampolines) when is_map_key(Fun, LFuns) ->
+    #b_set{args=[Local | FreeVars]} = maps:get(Fun, LFuns),
+    Args = [lfo_short_circuit(Local, Trampolines) | CallArgs ++ FreeVars],
+    Call = beam_ssa:add_anno(local_fun_opt, Fun, Call0#b_set{args=Args}),
+    [Call | lfo_optimize_is(Is, LFuns, Trampolines)];
+lfo_optimize_is([I | Is], LFuns, Trampolines) ->
+    [I | lfo_optimize_is(Is, LFuns, Trampolines)];
+lfo_optimize_is([], _LFuns, _Trampolines) ->
+    [].
+
+lfo_short_circuit(Call, Trampolines) ->
+    case maps:find(Call, Trampolines) of
+        {ok, Other} -> lfo_short_circuit(Other, Trampolines);
+        error -> Call
+    end.
diff --git a/lib/compiler/src/beam_ssa_pre_codegen.erl b/lib/compiler/src/beam_ssa_pre_codegen.erl
index 36137ef046..c60c6da9ea 100644
--- a/lib/compiler/src/beam_ssa_pre_codegen.erl
+++ b/lib/compiler/src/beam_ssa_pre_codegen.erl
@@ -131,6 +131,10 @@ passes(Opts) ->
           ?PASS(find_yregs),
           ?PASS(reserve_yregs),
 
+          %% Handle legacy binary match instruction that don't
+          %% accept a Y register as destination.
+          ?PASS(legacy_bs),
+
           %% Improve reuse of Y registers to potentially
           %% reduce the size of the stack frame.
           ?PASS(copy_retval),
@@ -612,6 +616,59 @@ bs_subst_ctx(#b_var{}=Var, CtxChain) ->
 bs_subst_ctx(Other, _CtxChain) ->
     Other.
 
+%% legacy_bs(St0) -> St.
+%%  Binary matching instructions in OTP 21 and earlier don't support
+%%  a Y register as destination. If St#st.use_bsm3 is false,
+%%  we will need to rewrite those instructions so that the result
+%%  is first put in an X register and then moved to a Y register
+%%  if the operation succeeded.
+
+legacy_bs(#st{use_bsm3=false,ssa=Blocks0,cnt=Count0,res=Res}=St) ->
+    IsYreg = maps:from_list([{V,true} || {V,{y,_}} <- Res]),
+    Linear0 = beam_ssa:linearize(Blocks0),
+    {Linear,Count} = legacy_bs(Linear0, IsYreg, Count0, #{}, []),
+    Blocks = maps:from_list(Linear),
+    St#st{ssa=Blocks,cnt=Count};
+legacy_bs(#st{use_bsm3=true}=St) -> St.
+
+legacy_bs([{L,Blk}|Bs], IsYreg, Count0, Copies0, Acc) ->
+    #b_blk{is=Is0,last=Last} = Blk,
+    Is1 = case Copies0 of
+              #{L:=Copy} -> [Copy|Is0];
+              #{} -> Is0
+          end,
+    {Is,Count,Copies} = legacy_bs_is(Is1, Last, IsYreg, Count0, Copies0, []),
+    legacy_bs(Bs, IsYreg, Count, Copies, [{L,Blk#b_blk{is=Is}}|Acc]);
+legacy_bs([], _IsYreg, Count, _Copies, Acc) ->
+    {Acc,Count}.
+
+legacy_bs_is([#b_set{op=Op,dst=Dst}=I0,
+              #b_set{op=succeeded,dst=SuccDst,args=[Dst]}=SuccI0],
+             Last, IsYreg, Count0, Copies0, Acc) ->
+    NeedsFix = is_map_key(Dst, IsYreg) andalso
+        case Op of
+            bs_get -> true;
+            bs_init -> true;
+            _ -> false
+        end,
+    case NeedsFix of
+        true ->
+            TempDst = #b_var{name={'@bs_temp_dst',Count0}},
+            Count = Count0 + 1,
+            I = I0#b_set{dst=TempDst},
+            SuccI = SuccI0#b_set{args=[TempDst]},
+            Copy = #b_set{op=copy,dst=Dst,args=[TempDst]},
+            #b_br{bool=SuccDst,succ=SuccL} = Last,
+            Copies = Copies0#{SuccL=>Copy},
+            legacy_bs_is([], Last, IsYreg, Count, Copies, [SuccI,I|Acc]);
+        false ->
+            legacy_bs_is([], Last, IsYreg, Count0, Copies0, [SuccI0,I0|Acc])
+    end;
+legacy_bs_is([I|Is], Last, IsYreg, Count, Copies, Acc) ->
+    legacy_bs_is(Is, Last, IsYreg, Count, Copies, [I|Acc]);
+legacy_bs_is([], _Last, _IsYreg, Count, Copies, Acc) ->
+    {reverse(Acc),Count,Copies}.
+
 %% sanitize(St0) -> St.
 %%  Remove constructs that can cause problems later:
 %%
@@ -652,21 +709,24 @@ sanitize([], Count, Blocks0, Values) ->
          false -> remove_unreachable(Ls, Blocks, Reachable, [])
      end,Count}.
 
-sanitize_is([#b_set{op=get_map_element,
-                    args=[#b_literal{}=Map,Key]}=I0|Is],
-            Count0, Values, _Changed, Acc) ->
-    {MapVar,Count} = new_var('@ssa_map', Count0),
-    I = I0#b_set{args=[MapVar,Key]},
-    Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
-    sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
+            Count0, Values, Changed, Acc) ->
+    case sanitize_args(Args0, Values) of
+        [#b_literal{}=Map,Key] ->
+            %% Bind the literal map to a variable.
+            {MapVar,Count} = new_var('@ssa_map', Count0),
+            I = I0#b_set{args=[MapVar,Key]},
+            Copy = #b_set{op=copy,dst=MapVar,args=[Map]},
+            sanitize_is(Is, Count, Values, true, [I,Copy|Acc]);
+        [_,_]=Args0 ->
+            sanitize_is(Is, Count0, Values, Changed, [I0|Acc]);
+        [_,_]=Args ->
+            I = I0#b_set{args=Args},
+            sanitize_is(Is, Count0, Values, Changed, [I|Acc])
+    end;
 sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
-            Count, Values, Changed, Acc) ->
-    Args = map(fun(Var) ->
-                       case Values of
-                           #{Var:=New} -> New;
-                           #{} -> Var
-                       end
-               end, Args0),
+            Count, Values, Changed0, Acc) ->
+    Args = sanitize_args(Args0, Values),
     case sanitize_instr(Op, Args, I0) of
         {value,Value0} ->
             Value = #b_literal{val=Value0},
@@ -674,7 +734,9 @@ sanitize_is([#b_set{op=Op,dst=Dst,args=Args0}=I0|Is0],
         {ok,I} ->
             sanitize_is(Is0, Count, Values, true, [I|Acc]);
         ok ->
-            sanitize_is(Is0, Count, Values, Changed, [I0|Acc])
+            I = I0#b_set{args=Args},
+            Changed = Changed0 orelse Args =/= Args0,
+            sanitize_is(Is0, Count, Values, Changed, [I|Acc])
     end;
 sanitize_is([], Count, Values, Changed, Acc) ->
     case Changed of
@@ -684,6 +746,14 @@ sanitize_is([], Count, Values, Changed, Acc) ->
             no_change
     end.
 
+sanitize_args(Args, Values) ->
+    map(fun(Var) ->
+                case Values of
+                    #{Var:=New} -> New;
+                    #{} -> Var
+                end
+        end, Args).
+
 sanitize_instr({bif,Bif}, [#b_literal{val=Lit}], _I) ->
     case erl_bifs:is_pure(erlang, Bif, 1) of
         false ->
@@ -973,11 +1043,11 @@ need_frame_1([#b_set{op=call,args=[Func|_]}|Is], Context) ->
         #b_remote{} ->
             %% This is an apply(), which always needs a frame.
             true;
-        #b_var{} ->
-            %% A fun call always needs a frame.
-            true;
+        #b_local{} ->
+            Context =:= body orelse Is =/= [];
         _ ->
-            Context =:= body orelse Is =/= []
+             %% A fun call always needs a frame.
+            true
     end;
 need_frame_1([I|Is], Context) ->
     beam_ssa:clobbers_xregs(I) orelse need_frame_1(Is, Context);
@@ -2141,7 +2211,13 @@ linear_scan(#st{intervals=Intervals0,res=Res}=St0) ->
     Free = init_free(maps:to_list(Res)),
     Intervals1 = [init_interval(Int, Res) || Int <- Intervals0],
     Intervals = sort(Intervals1),
-    IsReserved = fun (#i{reg=Reg}) -> Reg =/= none end,
+    IsReserved = fun(#i{reg=Reg}) ->
+                         case Reg of
+                             none -> false;
+                             {prefer,{_,_}} -> false;
+                             {_,_} -> true
+                         end
+                 end,
     {UnhandledRes,Unhandled} = partition(IsReserved, Intervals),
     L = #l{unhandled_res=UnhandledRes,
            unhandled_any=Unhandled,free=Free},
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
index 18e6e73a46..95fc3bb0e9 100644
--- a/lib/compiler/src/beam_ssa_type.erl
+++ b/lib/compiler/src/beam_ssa_type.erl
@@ -27,9 +27,10 @@
 
 -define(UNICODE_INT, #t_integer{elements={0,16#10FFFF}}).
 
--record(d, {ds :: #{beam_ssa:var_name():=beam_ssa:b_set()},
+-record(d, {ds :: #{beam_ssa:b_var():=beam_ssa:b_set()},
             ls :: #{beam_ssa:label():=type_db()},
-            sub :: #{beam_ssa:var_name():=beam_ssa:value()}
+            once :: cerl_sets:set(beam_ssa:b_var()),
+            sub :: #{beam_ssa:b_var():=beam_ssa:value()}
            }).
 
 -define(ATOM_SET_SIZE, 5).
@@ -56,13 +57,15 @@
       Block :: beam_ssa:b_blk().
 
 opt(Linear, Args) ->
+    UsedOnce = used_once(Linear, Args),
     Ts = maps:from_list([{V,any} || #b_var{}=V <- Args]),
     FakeCall = #b_set{op=call,args=[#b_remote{mod=#b_literal{val=unknown},
                                               name=#b_literal{val=unknown},
                                               arity=0}]},
     Defs = maps:from_list([{Var,FakeCall#b_set{dst=Var}} ||
                               #b_var{}=Var <- Args]),
-    D = #d{ds=Defs,ls=#{0=>Ts},sub=#{}},
+    D = #d{ds=Defs,ls=#{0=>Ts,?BADARG_BLOCK=>#{}},
+           once=UsedOnce,sub=#{}},
     opt_1(Linear, D).
 
 opt_1([{L,Blk}|Bs], #d{ls=Ls}=D) ->
@@ -425,16 +428,43 @@ opt_terminator(#b_ret{}=Ret, _Ts, _Ds) -> Ret.
 
 update_successors(#b_br{bool=#b_literal{val=true},succ=S}, Ts, D) ->
     update_successor(S, Ts, D);
-update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts, D0) ->
-    D = update_successor_bool(Bool, false, Fail, Ts, D0),
-    SuccTs = infer_types(Bool, Ts, D0),
-    update_successor_bool(Bool, true, Succ, SuccTs, D);
-update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts, D0) ->
-    D = update_successor(Fail, Ts, D0),
-    foldl(fun({Val,S}, A) ->
-                  T = get_type(Val, Ts),
-                  update_successor(S, Ts#{V=>T}, A)
-          end, D, List);
+update_successors(#b_br{bool=#b_var{}=Bool,succ=Succ,fail=Fail}, Ts0, D0) ->
+    case cerl_sets:is_element(Bool, D0#d.once) of
+        true ->
+            %% This variable is defined in this block and is only
+            %% referenced by this br terminator. Therefore, there is
+            %% no need to include the type database passed on to the
+            %% successors of this block.
+            Ts = maps:remove(Bool, Ts0),
+            D = update_successor(Fail, Ts, D0),
+            SuccTs = infer_types(Bool, Ts, D0),
+            update_successor(Succ, SuccTs, D);
+        false ->
+            D = update_successor_bool(Bool, false, Fail, Ts0, D0),
+            SuccTs = infer_types(Bool, Ts0, D0),
+            update_successor_bool(Bool, true, Succ, SuccTs, D)
+    end;
+update_successors(#b_switch{arg=#b_var{}=V,fail=Fail,list=List}, Ts0, D0) ->
+    case cerl_sets:is_element(V, D0#d.once) of
+        true ->
+            %% This variable is defined in this block and is only
+            %% referenced by this switch terminator. Therefore, there is
+            %% no need to include the type database passed on to the
+            %% successors of this block.
+            Ts = maps:remove(V, Ts0),
+            D = update_successor(Fail, Ts, D0),
+            F = fun({_Val,S}, A) ->
+                        update_successor(S, Ts, A)
+                end,
+            foldl(F, D, List);
+        false ->
+            D = update_successor(Fail, Ts0, D0),
+            F = fun({Val,S}, A) ->
+                        T = get_type(Val, Ts0),
+                        update_successor(S, Ts0#{V=>T}, A)
+                end,
+            foldl(F, D, List)
+        end;
 update_successors(#b_ret{}, _Ts, D) -> D.
 
 update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
@@ -447,6 +477,11 @@ update_successor_bool(#b_var{}=Var, BoolValue, S, Ts, D) ->
             update_successor(S, Ts, D)
     end.
 
+update_successor(?BADARG_BLOCK, _Ts, #d{}=D) ->
+    %% We KNOW that no variables are used in the ?BADARG_BLOCK,
+    %% so there is no need to update the type information. That
+    %% can be a huge timesaver for huge functions.
+    D;
 update_successor(S, Ts0, #d{ls=Ls}=D) ->
     case Ls of
         #{S:=Ts1} ->
@@ -766,6 +801,48 @@ simplify_not(#b_br{bool=#b_var{}=V,succ=Succ,fail=Fail}=Br0, Ts, Ds) ->
             Br0
     end.
 
+%%%
+%%% Calculate the set of variables that are only used once in the
+%%% block that they are defined in. That will allow us to discard type
+%%% information for variables that will never be referenced by the
+%%% successor blocks, potentially improving compilation times.
+%%%
+
+used_once(Linear, Args) ->
+    Map0 = used_once_1(reverse(Linear), #{}),
+    Map = maps:without(Args, Map0),
+    cerl_sets:from_list(maps:keys(Map)).
+
+used_once_1([{L,#b_blk{is=Is,last=Last}}|Bs], Uses0) ->
+    Uses = used_once_2([Last|reverse(Is)], L, Uses0),
+    used_once_1(Bs, Uses);
+used_once_1([], Uses) -> Uses.
+
+used_once_2([I|Is], L, Uses0) ->
+    Uses = used_once_uses(beam_ssa:used(I), L, Uses0),
+    case I of
+        #b_set{dst=Dst} ->
+            case Uses of
+                #{Dst:=[L]} ->
+                    used_once_2(Is, L, Uses);
+                #{} ->
+                    used_once_2(Is, L, maps:remove(Dst, Uses))
+            end;
+        _ ->
+            used_once_2(Is, L, Uses)
+    end;
+used_once_2([], _, Uses) -> Uses.
+
+used_once_uses([V|Vs], L, Uses) ->
+    case Uses of
+        #{V:=Us} ->
+            used_once_uses(Vs, L, Uses#{V:=[L|Us]});
+        #{} ->
+            used_once_uses(Vs, L, Uses#{V=>[L]})
+    end;
+used_once_uses([], _, Uses) -> Uses.
+
+
 get_types(Values, Ts) ->
     [get_type(Val, Ts) || Val <- Values].
 -spec get_type(beam_ssa:value(), type_db()) -> type().
diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl
index 1acbedd45b..51ff580a7a 100644
--- a/lib/compiler/src/beam_trim.erl
+++ b/lib/compiler/src/beam_trim.erl
@@ -21,12 +21,11 @@
 -module(beam_trim).
 -export([module/2]).
 
--import(lists, [reverse/1,reverse/2,splitwith/2,sort/1]).
+-import(lists, [any/2,member/2,reverse/1,reverse/2,splitwith/2,sort/1]).
 
 -record(st,
-	{safe :: gb_sets:set(beam_asm:label()), %Safe labels.
-	 lbl :: beam_utils:code_index()         %Code at each label.
-	 }).
+	{safe :: cerl_sets:set(beam_asm:label()) %Safe labels.
+        }).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -36,10 +35,15 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opts) ->
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
 function({function,Name,Arity,CLabel,Is0}) ->
-    %%ok = io:fwrite("~w: ~p\n", [?LINE,{Name,Arity}]),
-    St = #st{safe=safe_labels(Is0, []),lbl=beam_utils:index_labels(Is0)},
-    Is = trim(Is0, St, []),
-    {function,Name,Arity,CLabel,Is}.
+    try
+        St = #st{safe=safe_labels(Is0, [])},
+        Is = trim(Is0, St, []),
+        {function,Name,Arity,CLabel,Is}
+    catch
+        Class:Error:Stack ->
+	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
+	    erlang:raise(Class, Error, Stack)
+    end.
 
 trim([{kill,_}|_]=Is0, St, Acc) ->
     {Kills0,Is1} = splitwith(fun({kill,_}) -> true;
@@ -47,14 +51,33 @@ trim([{kill,_}|_]=Is0, St, Acc) ->
 			    end, Is0),
     Kills = sort(Kills0),
     try
-	{FrameSize,Layout} = frame_layout(Is1, Kills, St),
-	Configs = trim_instructions(Layout),
-	try_remap(Configs, Is1, FrameSize)
-	of
+        %% Find out the size and layout of the stack frame.
+        %% Example of a layout:
+        %%
+        %%    [{kill,{y,0}},{dead,{y,1},{live,{y,2}},{kill,{y,3}}]
+        %%
+        %% That means that y0 and y3 are to be killed, that y1
+        %% has been killed previously, and that y2 is live.
+        {FrameSize,Layout} = frame_layout(Is1, Kills, St),
+
+        %% Calculate all recipes that are not worse in terms
+        %% of estimated execution time. The recipes are ordered
+        %% in descending order from how much they trim.
+        Recipes = trim_recipes(Layout),
+
+        %% Try the recipes in order. A recipe may not work out because
+        %% a register that was previously killed may be
+        %% resurrected. If that happens, the next recipe, which trims
+        %% less, will be tried.
+        try_remap(Recipes, Is1, FrameSize)
+    of
 	{Is,TrimInstr} ->
+            %% One of the recipes was applied.
 	    trim(Is, St, reverse(TrimInstr)++Acc)
     catch
 	not_possible ->
+            %% No recipe worked out. Use the original kill
+            %% instructions.
 	    trim(Is1, St, reverse(Kills, Acc))
     end;
 trim([I|Is], St, Acc) ->
@@ -62,34 +85,42 @@ trim([I|Is], St, Acc) ->
 trim([], _, Acc) ->
     reverse(Acc).
 
-%% trim_instructions([{kill,R}|{live,R}|{dead,R}]) -> {[Instruction],MapFun}
-%%  Figure out the sequence of moves and trim to use.
+%% trim_recipes([{kill,R}|{live,R}|{dead,R}]) -> [Recipe].
+%%      Recipe = {Kills,NumberToTrim,Moves}
+%%      Kills = [{kill,Y}]
+%%      Moves = [{move,SrcY,DstY}]
+%%
+%%  Calculate how to best trim the stack and kill the correct
+%%  Y registers. Return a list of possible recipes. The best
+%%  recipe (the one that trims the most) is first in the list.
+%%  All of the recipes are no worse in estimated execution time
+%%  than the original sequences of kill instructions.
 
-trim_instructions(Layout) ->
+trim_recipes(Layout) ->
     Cost = length([I || {kill,_}=I <- Layout]),
-    trim_instructions_1(Layout, 0, [], {Cost,[]}).
+    trim_recipes_1(Layout, 0, [], {Cost,[]}).
 
-trim_instructions_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Config0) ->
+trim_recipes_1([{kill,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) ->
     Trim = Trim0 + 1,
-    Config = save_config(Ks, Trim, Moves, Config0),
-    trim_instructions_1(Ks, Trim, Moves, Config);
-trim_instructions_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Config0) ->
+    Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+    trim_recipes_1(Ks, Trim, Moves, Recipes);
+trim_recipes_1([{dead,{y,Trim0}}|Ks], Trim0, Moves, Recipes0) ->
     Trim = Trim0 + 1,
-    Config = save_config(Ks, Trim, Moves, Config0),
-    trim_instructions_1(Ks, Trim, Moves, Config);
-trim_instructions_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Config0) ->
+    Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+    trim_recipes_1(Ks, Trim, Moves, Recipes);
+trim_recipes_1([{live,{y,Trim0}=Src}|Ks0], Trim0, Moves0, Recipes0) ->
     case take_last_dead(Ks0) of
 	none ->
-	    {_,ConfigList} = Config0,
-	    ConfigList;
+            {_,RecipesList} = Recipes0,
+            RecipesList;
 	{Dst,Ks} ->
 	    Trim = Trim0 + 1,
 	    Moves = [{move,Src,Dst}|Moves0],
-	    Config = save_config(Ks, Trim, Moves, Config0),
-	    trim_instructions_1(Ks, Trim, Moves, Config)
+            Recipes = save_recipe(Ks, Trim, Moves, Recipes0),
+            trim_recipes_1(Ks, Trim, Moves, Recipes)
     end;
-trim_instructions_1([], _, _, {_,ConfigList}) ->
-    ConfigList.
+trim_recipes_1([], _, _, {_,RecipesList}) ->
+    RecipesList.
 
 take_last_dead(L) ->
     take_last_dead_1(reverse(L)).
@@ -100,28 +131,48 @@ take_last_dead_1([{dead,Reg}|Is]) ->
     {Reg,reverse(Is)};
 take_last_dead_1(_) -> none.
 
-save_config(Ks, Trim, Moves, {MaxCost,Acc}=Config) ->
-    case config_cost(Ks, Moves) of
-	Cost when Cost =< MaxCost ->
-	    {MaxCost,[{Ks,Trim,Moves}|Acc]};
+save_recipe(Ks, Trim, Moves, {MaxCost,Acc}=Recipes) ->
+    case recipe_cost(Ks, Moves) of
+        Cost when Cost =< MaxCost ->
+            %% The price is right.
+            {MaxCost,[{Ks,Trim,Moves}|Acc]};
 	_Cost ->
-	    Config
+            %% Too expensive.
+            Recipes
     end.
 
-config_cost(Ks, Moves) ->
+recipe_cost(Ks, Moves) ->
     %% We estimate that a {move,{y,_},{y,_}} instruction is roughly twice as
     %% expensive as a {kill,{y,_}} instruction. A {trim,_} instruction is
     %% roughly as expensive as a {kill,{y,_}} instruction.
 
-    config_cost_1(Ks, 1+2*length(Moves)).
+    recipe_cost_1(Ks, 1+2*length(Moves)).
 
-config_cost_1([{kill,_}|Ks], Cost) ->
-    config_cost_1(Ks, Cost+1);
-config_cost_1([_|Ks], Cost) ->
-    config_cost_1(Ks, Cost);
-config_cost_1([], Cost) -> Cost.
+recipe_cost_1([{kill,_}|Ks], Cost) ->
+    recipe_cost_1(Ks, Cost+1);
+recipe_cost_1([_|Ks], Cost) ->
+    recipe_cost_1(Ks, Cost);
+recipe_cost_1([], Cost) -> Cost.
+
+%% try_remap([Recipe], [Instruction], FrameSize) ->
+%%           {[Instruction],[TrimInstruction]}.
+%%  Try to renumber Y registers in the instruction stream. The
+%%  first rececipe that works will be used.
+%%
+%%  This function will issue a `not_possible` exception if none
+%%  of the recipes were possible to apply.
+
+try_remap([R|Rs], Is, FrameSize) ->
+    {TrimInstr,Map} = expand_recipe(R, FrameSize),
+    try
+	{remap(Is, Map, []),TrimInstr}
+    catch
+	throw:not_possible ->
+	    try_remap(Rs, Is, FrameSize)
+    end;
+try_remap([], _, _) -> throw(not_possible).
 
-expand_config({Layout,Trim,Moves}, FrameSize) ->
+expand_recipe({Layout,Trim,Moves}, FrameSize) ->
     Kills = [Kill || {kill,_}=Kill <- Layout],
     {Kills++reverse(Moves, [{trim,Trim,FrameSize-Trim}]),create_map(Trim, Moves)}.
 
@@ -132,16 +183,16 @@ create_map(Trim, []) ->
        (Any) -> Any
     end;
 create_map(Trim, Moves) ->
-    GbTree0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves],
-    GbTree = gb_trees:from_orddict(sort(GbTree0)),
-    IllegalTargets = gb_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]),
+    Map0 = [{Src,Dst-Trim} || {move,{y,Src},{y,Dst}} <- Moves],
+    Map = maps:from_list(Map0),
+    IllegalTargets = cerl_sets:from_list([Dst || {move,_,{y,Dst}} <- Moves]),
     fun({y,Y0}) when Y0 < Trim ->
-	    case gb_trees:lookup(Y0, GbTree) of
-		{value,Y} -> {y,Y};
-		none -> throw(not_possible)
-	    end;
+            case Map of
+                #{Y0:=Y} -> {y,Y};
+                #{} -> throw(not_possible)
+            end;
        ({y,Y}) ->
-	    case gb_sets:is_element(Y, IllegalTargets) of
+	    case cerl_sets:is_element(Y, IllegalTargets) of
 		true -> throw(not_possible);
 		false -> {y,Y-Trim}
 	    end;
@@ -149,19 +200,15 @@ create_map(Trim, Moves) ->
        (Any) -> Any
     end.
 
-try_remap([C|Cs], Is, FrameSize) ->
-    {TrimInstr,Map} = expand_config(C, FrameSize),
-    try
-	{remap(Is, Map, []),TrimInstr}
-    catch
-	throw:not_possible ->
-	    try_remap(Cs, Is, FrameSize)
-    end;
-try_remap([], _, _) -> throw(not_possible).
-
 remap([{block,Bl0}|Is], Map, Acc) ->
     Bl = remap_block(Bl0, Map, []),
     remap(Is, Map, [{block,Bl}|Acc]);
+remap([{bs_get_tail,Src,Dst,Live}|Is], Map, Acc) ->
+    I = {bs_get_tail,Map(Src),Map(Dst),Live},
+    remap(Is, Map, [I|Acc]);
+remap([{bs_set_position,Src1,Src2}|Is], Map, Acc) ->
+    I = {bs_set_position,Map(Src1),Map(Src2)},
+    remap(Is, Map, [I|Acc]);
 remap([{call_fun,_}=I|Is], Map, Acc) ->
     remap(Is, Map, [I|Acc]);
 remap([{call,_,_}=I|Is], Map, Acc) ->
@@ -205,35 +252,66 @@ remap([return|_]=Is, _, Acc) ->
     reverse(Acc, Is);
 remap([{line,_}=I|Is], Map, Acc) ->
     remap(Is, Map, [I|Acc]).
-    
+
 remap_block([{set,Ds0,Ss0,Info}|Is], Map, Acc) ->
     Ds = [Map(D) || D <- Ds0],
     Ss = [Map(S) || S <- Ss0],
     remap_block(Is, Map, [{set,Ds,Ss,Info}|Acc]);
 remap_block([], _, Acc) -> reverse(Acc).
-    
-safe_labels([{label,L},{line,_},{badmatch,{Tag,_}}|Is], Acc) when Tag =/= y ->
-    safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},{line,_},{case_end,{Tag,_}}|Is], Acc) when Tag =/= y ->
-    safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},{line,_},if_end|Is], Acc) ->
-    safe_labels(Is, [L|Acc]);
-safe_labels([{label,L},
-	     {block,[{set,[{x,0}],[{Tag,_}],move}]},
-	     {line,_},
-	     {call_ext,1,{extfunc,erlang,error,1}}|Is], Acc) when Tag =/= y ->
-    safe_labels(Is, [L|Acc]);
+
+%% safe_labels([Instruction], Accumulator) -> gb_set()
+%%  Build a gb_set of safe labels. The code at a safe
+%%  label does not depend on the values in a specific
+%%  Y register, only that all Y registers are initialized
+%%  so that it safe to scan the stack when an exception
+%%  is generated.
+%%
+%%  In other words, code at a safe label will continue
+%%  to work if Y registers have been renumbered and
+%%  the size of the stack frame has changed.
+
+safe_labels([{label,L}|Is], Acc) ->
+    case is_safe_label(Is) of
+        true -> safe_labels(Is, [L|Acc]);
+        false -> safe_labels(Is, Acc)
+    end;
 safe_labels([_|Is], Acc) ->
     safe_labels(Is, Acc);
-safe_labels([], Acc) -> gb_sets:from_list(Acc).
+safe_labels([], Acc) -> cerl_sets:from_list(Acc).
+
+is_safe_label([{line,_}|Is]) ->
+    is_safe_label(Is);
+is_safe_label([{badmatch,{Tag,_}}|_]) ->
+    Tag =/= y;
+is_safe_label([{case_end,{Tag,_}}|_]) ->
+    Tag =/= y;
+is_safe_label([{try_case_end,{Tag,_}}|_]) ->
+    Tag =/= y;
+is_safe_label([if_end|_]) ->
+    true;
+is_safe_label([{block,Bl}|Is]) ->
+    is_safe_label_block(Bl) andalso is_safe_label(Is);
+is_safe_label([{call_ext,_,{extfunc,M,F,A}}|_]) ->
+    erl_bifs:is_exit_bif(M, F, A);
+is_safe_label(_) -> false.
+
+is_safe_label_block([{set,Ds,Ss,_}|Is]) ->
+    IsYreg = fun({y,_}) -> true;
+                (_) -> false
+             end,
+    %% This instruction is safe if the instruction
+    %% neither reads or writes Y registers.
+    not (any(IsYreg, Ss) orelse any(IsYreg, Ds)) andalso
+        is_safe_label_block(Is);
+is_safe_label_block([]) -> true.
 
 %% frame_layout([Instruction], [{kill,_}], St) ->
 %%      [{kill,Reg} | {live,Reg} | {dead,Reg}]
 %%  Figure out the layout of the stack frame.
 
-frame_layout(Is, Kills, #st{safe=Safe,lbl=D}) ->
+frame_layout(Is, Kills, #st{safe=Safe}) ->
     N = frame_size(Is, Safe),
-    IsKilled = fun(R) -> beam_utils:is_not_used(R, Is, D) end,
+    IsKilled = fun(R) -> is_not_used(R, Is) end,
     {N,frame_layout_1(Kills, 0, N, IsKilled, [])}.
 
 frame_layout_1([{kill,{y,Y}}=I|Ks], Y, N, IsKilled, Acc) ->
@@ -253,6 +331,11 @@ frame_layout_2(Is) -> reverse(Is).
 
 %% frame_size([Instruction], SafeLabels) -> FrameSize
 %%  Find out the frame size by looking at the code that follows.
+%%
+%%  Implicitly, also check that the instructions are a straight
+%%  sequence of code that ends in a return. Any branches are
+%%  to safe labels (i.e., the code at those labels don't depend
+%%  on the contents of any Y register).
 
 frame_size([{block,_}|Is], Safe) ->
     frame_size(Is, Safe);
@@ -285,15 +368,92 @@ frame_size([{make_fun2,_,_,_,_}|Is], Safe) ->
     frame_size(Is, Safe);
 frame_size([{get_map_elements,{f,L},_,_}|Is], Safe) ->
     frame_size_branch(L, Is, Safe);
-frame_size([{deallocate,N}|_], _) -> N;
+frame_size([{deallocate,N}|_], _) ->
+    N;
 frame_size([{line,_}|Is], Safe) ->
     frame_size(Is, Safe);
+frame_size([{bs_set_position,_,_}|Is], Safe) ->
+    frame_size(Is, Safe);
+frame_size([{bs_get_tail,_,_,_}|Is], Safe) ->
+    frame_size(Is, Safe);
 frame_size(_, _) -> throw(not_possible).
 
 frame_size_branch(0, Is, Safe) ->
     frame_size(Is, Safe);
 frame_size_branch(L, Is, Safe) ->
-    case gb_sets:is_member(L, Safe) of
+    case cerl_sets:is_element(L, Safe) of
 	false -> throw(not_possible);
 	true -> frame_size(Is, Safe)
     end.
+
+%% is_not_used(Y, [Instruction]) -> true|false.
+%%  Test whether the value of Y is unused in the instruction sequence.
+%%  Return true if the value of Y is not used, and false if it is used.
+%%
+%%  This function handles the same instructions as frame_size/2. It
+%%  assumes that any labels in the instructions are safe labels.
+
+is_not_used(Y, [{apply,_}|Is]) ->
+    is_not_used(Y, Is);
+is_not_used(Y, [{bif,_,{f,_},Ss,Dst}|Is]) ->
+    is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{block,Bl}|Is]) ->
+    case is_not_used_block(Y, Bl) of
+        used -> false;
+        killed -> true;
+        transparent -> is_not_used(Y, Is)
+    end;
+is_not_used(Y, [{bs_get_tail,Src,Dst,_}|Is]) ->
+    is_not_used_ss_dst(Y, [Src], Dst, Is);
+is_not_used(Y, [{bs_init,_,_,_,Ss,Dst}|Is]) ->
+    is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{bs_put,{f,_},_,Ss}|Is]) ->
+    not member(Y, Ss) andalso is_not_used(Y, Is);
+is_not_used(Y, [{bs_set_position,Src1,Src2}|Is]) ->
+    Y =/= Src1 andalso Y =/= Src2 andalso
+        is_not_used(Y, Is);
+is_not_used(Y, [{call,_,_}|Is]) ->
+    is_not_used(Y, Is);
+is_not_used(Y, [{call_ext,_,_}=I|Is]) ->
+    beam_jump:is_exit_instruction(I) orelse is_not_used(Y, Is);
+is_not_used(Y, [{call_fun,_}|Is]) ->
+    is_not_used(Y, Is);
+is_not_used(_Y, [{deallocate,_}|_]) ->
+    true;
+is_not_used(Y, [{gc_bif,_,{f,_},_Live,Ss,Dst}|Is]) ->
+    is_not_used_ss_dst(Y, Ss, Dst, Is);
+is_not_used(Y, [{get_map_elements,{f,_},S,{list,List}}|Is]) ->
+    {Ss,Ds} = beam_utils:split_even(List),
+    case member(Y, [S|Ss]) of
+	true ->
+	    false;
+	false ->
+            member(Y, Ds) orelse is_not_used(Y, Is)
+    end;
+is_not_used(Y, [{kill,Yreg}|Is]) ->
+    Y =:= Yreg orelse is_not_used(Y, Is);
+is_not_used(Y, [{line,_}|Is]) ->
+    is_not_used(Y, Is);
+is_not_used(Y, [{make_fun2,_,_,_,_}|Is]) ->
+    is_not_used(Y, Is);
+is_not_used(Y, [{test,_,_,Ss}|Is]) ->
+    not member(Y, Ss) andalso is_not_used(Y, Is);
+is_not_used(Y, [{test,_Op,{f,_},_Live,Ss,Dst}|Is]) ->
+    is_not_used_ss_dst(Y, Ss, Dst, Is).
+
+is_not_used_block(Y, [{set,Ds,Ss,_}|Is]) ->
+    case member(Y, Ss) of
+        true ->
+            used;
+        false ->
+            case member(Y, Ds) of
+                true ->
+                    killed;
+                false ->
+                    is_not_used_block(Y, Is)
+            end
+    end;
+is_not_used_block(_Y, []) -> transparent.
+
+is_not_used_ss_dst(Y, Ss, Dst, Is) ->
+    not member(Y, Ss) andalso (Y =:= Dst orelse is_not_used(Y, Is)).
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 626e041ea0..6e6574c0b3 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -18,27 +18,16 @@
 %% %CopyrightEnd%
 %%
 %% Purpose : Common utilities used by several optimization passes.
-%% 
+%%
 
 -module(beam_utils).
--export([is_killed/3,is_killed_at/3,is_not_used/3,
-	 empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
-	 code_at/2,bif_to_test/3,is_pure_test/1,
-	 combine_heap_needs/2,
-	 split_even/1
-        ]).
+-export([replace_labels/4,is_pure_test/1,split_even/1]).
 
 -export_type([code_index/0,module_code/0,instruction/0]).
 
--import(lists, [flatmap/2,map/2,member/2,sort/1,reverse/1]).
-
--define(is_const(Val), (Val =:= nil orelse
-                        element(1, Val) =:= integer orelse
-                        element(1, Val) =:= float orelse
-                        element(1, Val) =:= atom orelse
-                        element(1, Val) =:= literal)).
+-import(lists, [map/2,reverse/1]).
 
-%% instruction() describes all instructions that are used during optimzation
+%% instruction() describes all instructions that are used during optimization
 %% (from beam_a to beam_z).
 -type instruction() :: atom() | tuple().
 
@@ -54,97 +43,6 @@
 -type fail() :: beam_asm:fail() | 'fail'.
 -type test() :: {'test',atom(),fail(),[beam_asm:src()]} |
 		{'test',atom(),fail(),integer(),list(),beam_asm:reg()}.
--type result_cache() :: gb_trees:tree(beam_asm:label(), 'killed' | 'used').
-
--record(live,
-	{lbl :: code_index(),            %Label to code index.
-	 res :: result_cache()}).        %Result cache for each label.
-
-%% is_killed(Register, [Instruction], State) -> true|false
-%%  Determine whether a register is killed by the instruction sequence.
-%%  If true is returned, it means that the register will not be
-%%  referenced in ANY way (not even indirectly by an allocate instruction);
-%%  i.e. it is OK to enter the instruction sequence with Register
-%%  containing garbage.
-%%
-%%  The state (constructed by index_instructions/1) is used to allow us
-%%  to determine the kill state across branches.
-
--spec is_killed(beam_asm:reg(), [instruction()], code_index()) -> boolean().
-
-is_killed(R, Is, D) ->
-    St = #live{lbl=D,res=gb_trees:empty()},
-    case check_liveness(R, Is, St) of
-	{killed,_} -> true;
-	{exit_not_used,_} -> false;
-	{_,_} -> false
-    end.
-
-%% is_killed_at(Reg, Lbl, State) -> true|false
-%%  Determine whether Reg is killed at label Lbl.
-
--spec is_killed_at(beam_asm:reg(), beam_asm:label(), code_index()) -> boolean().
-
-is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
-    St0 = #live{lbl=D,res=gb_trees:empty()},
-    case check_liveness_at(R, Lbl, St0) of
-	{killed,_} -> true;
-	{exit_not_used,_} -> false;
-	{_,_} -> false
-    end.
-
-%% is_not_used(Register, [Instruction], State) -> true|false
-%%  Determine whether a register is never used in the instruction sequence
-%%  (it could still be referenced by an allocate instruction, meaning that
-%%  it MUST be initialized, but that its value does not matter).
-%%    The state is used to allow us to determine the usage state
-%%  across branches.
-
--spec is_not_used(beam_asm:reg(), [instruction()], code_index()) -> boolean().
-
-is_not_used(R, Is, D) ->
-    St = #live{lbl=D,res=gb_trees:empty()},
-    case check_liveness(R, Is, St) of
-	{used,_} -> false;
-	{exit_not_used,_} -> true;
-	{_,_} -> true
-    end.
-
-%% index_labels(FunctionIs) -> State
-%%  Index the instruction sequence so that we can quickly
-%%  look up the instruction following a specific label.
-
--spec index_labels([instruction()]) -> code_index().
-
-index_labels(Is) ->
-    index_labels_1(Is, []).
-
-%% empty_label_index() -> State
-%%  Create an empty label index.
-
--spec empty_label_index() -> code_index().
-
-empty_label_index() ->
-    gb_trees:empty().
-
-%% index_label(Label, [Instruction], State) -> State
-%%  Add an index for a label.
-
--spec index_label(beam_asm:label(), [instruction()], code_index()) ->
-   code_index().
-
-index_label(Lbl, Is0, Acc) ->
-    Is = drop_labels(Is0),
-    gb_trees:enter(Lbl, Is, Acc).
-
-
-%% code_at(Label, State) -> [I].
-%%  Retrieve the code at the given label.
-
--spec code_at(beam_asm:label(), code_index()) -> [instruction()].
-
-code_at(L, Ll) ->
-    gb_trees:get(L, Ll).
 
 %% replace_labels(FunctionIs, Tail, ReplaceDb, Fallback) -> FunctionIs.
 %%  Replace all labels in instructions according to the ReplaceDb.
@@ -158,44 +56,6 @@ code_at(L, Ll) ->
 replace_labels(Is, Acc, D, Fb) ->
     replace_labels_1(Is, Acc, D, Fb).
 
-%% bif_to_test(Bif, [Op], Fail) -> {test,Test,Fail,[Op]}
-%%  Convert a BIF to a test. Fail if not possible.
-
--spec bif_to_test(atom(), list(), fail()) -> test().
-
-bif_to_test(is_atom,     [_]=Ops, Fail) -> {test,is_atom,Fail,Ops};
-bif_to_test(is_boolean,  [_]=Ops, Fail) -> {test,is_boolean,Fail,Ops};
-bif_to_test(is_binary,   [_]=Ops, Fail) -> {test,is_binary,Fail,Ops};
-bif_to_test(is_bitstring,[_]=Ops, Fail) -> {test,is_bitstr,Fail,Ops};
-bif_to_test(is_float,    [_]=Ops, Fail) -> {test,is_float,Fail,Ops};
-bif_to_test(is_function, [_]=Ops, Fail) -> {test,is_function,Fail,Ops};
-bif_to_test(is_function, [_,_]=Ops, Fail) -> {test,is_function2,Fail,Ops};
-bif_to_test(is_integer,  [_]=Ops, Fail) -> {test,is_integer,Fail,Ops};
-bif_to_test(is_list,     [_]=Ops, Fail) -> {test,is_list,Fail,Ops};
-bif_to_test(is_map,      [_]=Ops, Fail) -> {test,is_map,Fail,Ops};
-bif_to_test(is_number,   [_]=Ops, Fail) -> {test,is_number,Fail,Ops};
-bif_to_test(is_pid,      [_]=Ops, Fail) -> {test,is_pid,Fail,Ops};
-bif_to_test(is_port,     [_]=Ops, Fail) -> {test,is_port,Fail,Ops};
-bif_to_test(is_reference, [_]=Ops, Fail) -> {test,is_reference,Fail,Ops};
-bif_to_test(is_tuple,    [_]=Ops, Fail)     -> {test,is_tuple,Fail,Ops};
-bif_to_test('=<', [A,B], Fail) -> {test,is_ge,Fail,[B,A]};
-bif_to_test('>', [A,B], Fail) -> {test,is_lt,Fail,[B,A]};
-bif_to_test('<', [_,_]=Ops, Fail) -> {test,is_lt,Fail,Ops};
-bif_to_test('>=', [_,_]=Ops, Fail) -> {test,is_ge,Fail,Ops};
-bif_to_test('==', [C,A], Fail) when ?is_const(C) ->
-    {test,is_eq,Fail,[A,C]};
-bif_to_test('==', [_,_]=Ops, Fail) -> {test,is_eq,Fail,Ops};
-bif_to_test('/=', [C,A], Fail) when ?is_const(C) ->
-    {test,is_ne,Fail,[A,C]};
-bif_to_test('/=', [_,_]=Ops, Fail) -> {test,is_ne,Fail,Ops};
-bif_to_test('=:=', [C,A], Fail) when ?is_const(C) ->
-    {test,is_eq_exact,Fail,[A,C]};
-bif_to_test('=:=', [_,_]=Ops, Fail) -> {test,is_eq_exact,Fail,Ops};
-bif_to_test('=/=', [C,A], Fail) when ?is_const(C) ->
-    {test,is_ne_exact,Fail,[A,C]};
-bif_to_test('=/=', [_,_]=Ops, Fail) -> {test,is_ne_exact,Fail,Ops}.
-
-
 %% is_pure_test({test,Op,Fail,Ops}) -> true|false.
 %%  Return 'true' if the test instruction does not modify any
 %%  registers and/or bit syntax matching state.
@@ -215,22 +75,9 @@ is_pure_test({test,test_arity,_,[_,_]}) -> true;
 is_pure_test({test,has_map_fields,_,[_|_]}) -> true;
 is_pure_test({test,is_bitstr,_,[_]}) -> true;
 is_pure_test({test,is_function2,_,[_,_]}) -> true;
-is_pure_test({test,Op,_,Ops}) -> 
+is_pure_test({test,Op,_,Ops}) ->
     erl_internal:new_type_test(Op, length(Ops)).
 
-%% combine_heap_needs(HeapNeed1, HeapNeed2) -> HeapNeed
-%%  Combine the heap need for two allocation instructions.
-
--type heap_need_tag() :: 'floats' | 'words'.
--type heap_need() :: non_neg_integer() |
-                     {'alloc',[{heap_need_tag(),non_neg_integer()}]}.
--spec combine_heap_needs(heap_need(), heap_need()) -> heap_need().
-
-combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) ->
-    H1 + H2;
-combine_heap_needs(H1, H2) ->
-    {alloc,combine_alloc_lists([H1,H2])}.
-
 %% split_even/1
 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]}
 
@@ -242,438 +89,6 @@ split_even(Rs) -> split_even(Rs, [], []).
 %%% Local functions.
 %%%
 
-
-%% check_liveness(Reg, [Instruction], #live{}) ->
-%%                      {killed | not_used | used, #live{}}
-%%  Find out whether Reg is used or killed in instruction sequence.
-%%
-%%    killed - Reg is assigned or killed by an allocation instruction.
-%%    not_used - the value of Reg is not used, but Reg must not be garbage
-%%    exit_not_used - the value of Reg is not used, but must not be garbage
-%%                    because the stack will be scanned because an
-%%                    exit BIF will raise an exception
-%%    used - Reg is used
-
-check_liveness({fr,_}, _, St) ->
-    %% Conservatively always consider the floating point register used.
-    {used,St};
-check_liveness(R, [{block,Blk}|Is], St0) ->
-    case check_liveness_block(R, Blk, St0) of
-	{transparent,St1} ->
-	    check_liveness(R, Is, St1);
-	{alloc_used,St1} ->
-            %% Used by an allocating instruction, but value not referenced.
-            %% Must check the rest of the instructions.
-	    not_used(check_liveness(R, Is, St1));
-	{Other,_}=Res when is_atom(Other) ->
-	    Res
-    end;
-check_liveness(R, [{label,_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) ->
-    case member(R, As) of
-	true ->
-	    {used,St0};
-	false ->
-	    case check_liveness_at(R, Fail, St0) of
-		{killed,St1} ->
-		    check_liveness(R, Is, St1);
-		{exit_not_used,St1} ->
-		    not_used(check_liveness(R, Is, St1));
-		{not_used,St1} ->
-		    not_used(check_liveness(R, Is, St1));
-		{used,_}=Used ->
-		    Used
-	    end
-    end;
-check_liveness(R, [{test,Op,Fail,Live,Ss,Dst}|Is], St) ->
-    %% Check this instruction as a block to get a less conservative
-    %% result if the caller is is_not_used/3.
-    Block = [{set,[Dst],Ss,{alloc,Live,{bif,Op,Fail}}}],
-    check_liveness(R, [{block,Block}|Is], St);
-check_liveness(R, [{select,_,R,_,_}|_], St) ->
-    {used,St};
-check_liveness(R, [{select,_,_,Fail,Branches}|_], St) ->
-    check_liveness_everywhere(R, [Fail|Branches], St);
-check_liveness(R, [{jump,{f,F}}|_], St) ->
-    check_liveness_at(R, F, St);
-check_liveness(R, [{case_end,Used}|_], St) ->
-    check_liveness_exit(R, Used, St);
-check_liveness(R, [{try_case_end,Used}|_], St) ->
-    check_liveness_exit(R, Used, St);
-check_liveness(R, [{badmatch,Used}|_], St) ->
-    check_liveness_exit(R, Used, St);
-check_liveness(R, [if_end|_], St) ->
-    check_liveness_exit(R, ignore, St);
-check_liveness(R, [{func_info,_,_,Ar}|_], St) ->
-    case R of
-	{x,X} when X < Ar -> {used,St};
-	_ -> {killed,St}
-    end;
-check_liveness(R, [{kill,R}|_], St) ->
-    {killed,St};
-check_liveness(R, [{kill,_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) ->
-    case member(R, Ss) of
-	true ->
-	    {used,St};
-	false ->
-	    if
-		R =:= Dst -> {killed,St};
-		true -> check_liveness(R, Is, St)
-	    end
-    end;
-check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
-    case R of
-	{x,X} ->
-            case member(R, Ss) of
-                true ->
-                    {used,St};
-                false ->
-                    if
-                        X < Live ->
-                            not_used(check_liveness(R, Is, St));
-                        true ->
-                            {killed,St}
-                    end
-	    end;
-	{y,_} ->
-	    case member(R, Ss) of
-		true -> {used,St};
-		false ->
-                    %% If the exception is taken, the stack may
-                    %% be scanned. Therefore the register is not
-                    %% guaranteed to be killed.
-		    if
-                        R =:= Dst -> {not_used,St};
-			true -> not_used(check_liveness(R, Is, St))
-		    end
-	    end
-    end;
-check_liveness(R, [{deallocate,_}|Is], St) ->
-    case R of
-	{y,_} -> {killed,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness({x,_}=R, [return|_], St) ->
-    case R of
-	{x,0} -> {used,St};
-	{x,_} -> {killed,St}
-    end;
-check_liveness(R, [{call,Live,_}|Is], St) ->
-    case R of
-	{x,X} when X < Live -> {used,St};
-	{x,_} -> {killed,St};
-	{y,_} -> not_used(check_liveness(R, Is, St))
-    end;
-check_liveness(R, [{call_ext,Live,_}=I|Is], St) ->
-    case R of
-	{x,X} when X < Live ->
-	    {used,St};
-	{x,_} ->
-	    {killed,St};
-	{y,_} ->
-	    case beam_jump:is_exit_instruction(I) of
-		false ->
-		    not_used(check_liveness(R, Is, St));
-		true ->
-		    %% We must make sure we don't check beyond this
-		    %% instruction or we will fall through into random
-		    %% unrelated code and get stuck in a loop.
-		    {exit_not_used,St}
-	    end
-    end;
-check_liveness(R, [{call_fun,Live}|Is], St) ->
-    case R of
-	{x,X} when X =< Live -> {used,St};
-	{x,_} -> {killed,St};
-	{y,_} -> not_used(check_liveness(R, Is, St))
-    end;
-check_liveness(R, [{apply,Args}|Is], St) ->
-    case R of
-	{x,X} when X < Args+2 -> {used,St};
-	{x,_} -> {killed,St};
-	{y,_} -> not_used(check_liveness(R, Is, St))
-    end;
-check_liveness(R, [{bif,Op,Fail,Ss,D}|Is], St) ->
-    Set = {set,[D],Ss,{bif,Op,Fail}},
-    check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{gc_bif,Op,{f,Fail},Live,Ss,D}|Is], St) ->
-    Set = {set,[D],Ss,{alloc,Live,{gc_bif,Op,Fail}}},
-    check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{bs_put,{f,0},_,Ss}|Is], St) ->
-    case member(R, Ss) of
-	true -> {used,St};
-	false -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_restore2,S,_}|Is], St) ->
-    case R of
-	S -> {used,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{bs_save2,S,_}|Is], St) ->
-    case R of
-	S -> {used,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{move,S,D}|Is], St) ->
-    case R of
-	S -> {used,St};
-	D -> {killed,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) ->
-    case R of
-	{x,X} when X < NumFree -> {used,St};
-	{x,_} -> {killed,St};
-	{y,_} -> not_used(check_liveness(R, Is, St))
-    end;
-check_liveness(R, [{'catch'=Op,Y,Fail}|Is], St) ->
-    Set = {set,[Y],[],{try_catch,Op,Fail}},
-    check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{'try'=Op,Y,Fail}|Is], St) ->
-    Set = {set,[Y],[],{try_catch,Op,Fail}},
-    check_liveness(R, [{block,[Set]}|Is], St);
-check_liveness(R, [{try_end,Y}|Is], St) ->
-    case R of
-	Y ->
-	    {killed,St};
-	{y,_} ->
-	    %% y registers will be used if an exception occurs and
-	    %% control transfers to the label given in the previous
-	    %% try/2 instruction.
-	    {used,St};
-	_ ->
-	    check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{catch_end,Y}|Is], St) ->
-    case R of
-	Y -> {killed,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{get_tuple_element,S,_,D}|Is], St) ->
-    case R of
-	S -> {used,St};
-	D -> {killed,St};
-	_ -> check_liveness(R, Is, St)
-    end;
-check_liveness(R, [{loop_rec,{f,_},{x,0}}|_], St) ->
-    case R of
-	{x,_} ->
-	    {killed,St};
-	_ ->
-	    %% y register. Rarely happens. Be very conversative and
-	    %% assume it's used.
-	    {used,St}
-    end;
-check_liveness(R, [{loop_rec_end,{f,Fail}}|_], St) ->
-    check_liveness_at(R, Fail, St);
-check_liveness(R, [{line,_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) ->
-    {Ss,Ds} = split_even(L),
-    case member(R, [S|Ss]) of
-	true ->
-	    {used,St0};
-	false ->
-	    case check_liveness_at(R, Fail, St0) of
-		{killed,St}=Killed ->
-		    case member(R, Ds) of
-			true -> Killed;
-			false -> check_liveness(R, Is, St)
-		    end;
-		Other ->
-		    Other
-	    end
-    end;
-check_liveness(R, [{put_map,F,Op,S,D,Live,{list,Puts}}|Is], St) ->
-    Set = {set,[D],[S|Puts],{alloc,Live,{put_map,Op,F}}},
-    check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{put_tuple,Ar,D}|Is], St) ->
-    Set = {set,[D],[],{put_tuple,Ar}},
-    check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{put_list,S1,S2,D}|Is], St) ->
-    Set = {set,[D],[S1,S2],put_list},
-    check_liveness(R, [{block,[Set]}||Is], St);
-check_liveness(R, [{test_heap,N,Live}|Is], St) ->
-    I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]},
-    check_liveness(R, [I|Is], St);
-check_liveness(R, [{allocate_zero,N,Live}|Is], St) ->
-    I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]},
-    check_liveness(R, [I|Is], St);
-check_liveness(R, [{get_hd,S,D}|Is], St) ->
-    I = {block,[{set,[D],[S],get_hd}]},
-    check_liveness(R, [I|Is], St);
-check_liveness(R, [{get_tl,S,D}|Is], St) ->
-    I = {block,[{set,[D],[S],get_tl}]},
-    check_liveness(R, [I|Is], St);
-check_liveness(R, [remove_message|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness({x,X}, [build_stacktrace|_], St) when X > 0 ->
-    {killed,St};
-check_liveness(R, [{recv_mark,_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(R, [{recv_set,_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(R, [{'%',_}|Is], St) ->
-    check_liveness(R, Is, St);
-check_liveness(_R, Is, St) when is_list(Is) ->
-    %% Not implemented. Conservatively assume that the register is used.
-    {used,St}.
-
-check_liveness_everywhere(R, Lbls, St0) ->
-    check_liveness_everywhere_1(R, Lbls, killed, St0).
-
-check_liveness_everywhere_1(R, [{f,Lbl}|T], Res0, St0) ->
-    {Res1,St} = check_liveness_at(R, Lbl, St0),
-    Res = case Res1 of
-	      killed -> Res0;
-	      _ -> Res1
-	  end,
-    case Res of
-	used -> {used,St};
-	_ -> check_liveness_everywhere_1(R, T, Res, St)
-    end;
-check_liveness_everywhere_1(R, [_|T], Res, St) ->
-    check_liveness_everywhere_1(R, T, Res, St);
-check_liveness_everywhere_1(_, [], Res, St) ->
-    {Res,St}.
-
-check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) ->
-    case gb_trees:lookup(Lbl, ResMemorized) of
-	{value,Res} ->
-	    {Res,St0};
-	none ->
-	    {Res,St} = case gb_trees:lookup(Lbl, Ll) of
-			   {value,Is} -> check_liveness(R, Is, St0);
-			   none -> {used,St0}
-		       end,
-	    {Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}}
-    end.
-
-not_used({used,_}=Res) -> Res;
-not_used({_,St}) -> {not_used,St}.
-
-check_liveness_exit(R, R, St) -> {used,St};
-check_liveness_exit({x,_}, _, St) -> {killed,St};
-check_liveness_exit({y,_}, _, St) -> {exit_not_used,St}.
-
-%% check_liveness_block(Reg, [Instruction], State) ->
-%%     {killed | not_used | used | alloc_used | transparent,State'}
-%%  Finds out how Reg is used in the instruction sequence inside a block.
-%%  Returns one of:
-%%    killed - Reg is assigned a new value or killed by an
-%%       allocation instruction
-%%    not_used - The value is not used, but the register is referenced
-%%       e.g. by an allocation instruction
-%%    transparent - Reg is neither used nor killed
-%%    alloc_used - Used only in an allocate instruction
-%%    used - Reg is explicitly used by an instruction
-%%
-%%  Annotations are not allowed.
-%%
-%%  (Unknown instructions will cause an exception.)
-
-check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
-    if 
-	X >= Live ->
-	    {killed,St0};
-	true ->
-	    case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
-                {transparent,St} -> {alloc_used,St};
-		{_,_}=Res -> not_used(Res)
-	    end
-    end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) ->
-    case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
-        {transparent,St} -> {alloc_used,St};
-        {_,_}=Res -> not_used(Res)
-    end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) ->
-    case Ds of
-        [R] ->
-            {killed,St0};
-        _ ->
-            case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
-                {exit_not_used,St} ->
-                    {used,St};
-                {transparent,St} ->
-                    %% Conservatively assumed that it is used.
-                    {used,St};
-                {_,_}=Res ->
-                    Res
-            end
-    end;
-check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) ->
-    check_liveness_block_1(R, Ss, Ds, Op, Is, St);
-check_liveness_block(_, [], St) -> {transparent,St}.
-
-check_liveness_block_1(R, Ss, Ds, Op, Is, St0) ->
-    case member(R, Ss) of
-	true ->
-	    {used,St0};
-	false ->
-	    case check_liveness_block_2(R, Op, Ss, St0) of
-		{killed,St} ->
-		    case member(R, Ds) of
-			true -> {killed,St};
-			false -> check_liveness_block(R, Is, St)
-		    end;
-		{exit_not_used,St} ->
-		    case member(R, Ds) of
-			true -> {exit_not_used,St};
-			false -> check_liveness_block(R, Is, St)
-		    end;
-		{not_used,St} ->
-		    not_used(case member(R, Ds) of
-				 true -> {killed,St};
-				 false -> check_liveness_block(R, Is, St)
-			     end);
-		{used,St} ->
-		    {used,St}
-	    end
-    end.
-
-check_liveness_block_2(R, {gc_bif,Op,{f,Lbl}}, Ss, St) ->
-    check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St);
-check_liveness_block_2(R, {bif,Op,{f,Lbl}}, Ss, St) ->
-    Arity = length(Ss),
-    case erl_internal:comp_op(Op, Arity) orelse
-	erl_internal:new_type_test(Op, Arity) of
-	true ->
-	    {killed,St};
-	false ->
-	    check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St)
-    end;
-check_liveness_block_2(R, {put_map,_Op,{f,Lbl}}, _Ss, St) ->
-    check_liveness_block_3(R, Lbl, {unsafe,0}, St);
-check_liveness_block_2(_, _, _, St) ->
-    {killed,St}.
-
-check_liveness_block_3({x,_}, 0, _FA, St) ->
-    {killed,St};
-check_liveness_block_3({y,_}, 0, {F,A}, St) ->
-    %% If the exception is thrown, the stack may be scanned,
-    %% thus implicitly using the y register.
-    case erl_bifs:is_safe(erlang, F, A) of
-        true -> {killed,St};
-        false -> {used,St}
-    end;
-check_liveness_block_3(R, Lbl, _FA, St0) ->
-    check_liveness_at(R, Lbl, St0).
-
-index_labels_1([{label,Lbl}|Is0], Acc) ->
-    Is = drop_labels(Is0),
-    index_labels_1(Is0, [{Lbl,Is}|Acc]);
-index_labels_1([_|Is], Acc) ->
-    index_labels_1(Is, Acc);
-index_labels_1([], Acc) -> gb_trees:from_orddict(sort(Acc)).
-
-drop_labels([{label,_}|Is]) -> drop_labels(Is);
-drop_labels(Is) -> Is.
-
-
 replace_labels_1([{test,Test,{f,Lbl},Ops}|Is], Acc, D, Fb) ->
     replace_labels_1(Is, [{test,Test,{f,label(Lbl, D, Fb)},Ops}|Acc], D, Fb);
 replace_labels_1([{test,Test,{f,Lbl},Live,Ops,Dst}|Is], Acc, D, Fb) ->
@@ -729,21 +144,6 @@ label(Old, D, Fb) ->
         _ -> Fb(Old)
     end.
 
-%% Help function for combine_heap_needs.
-
-combine_alloc_lists(Al0) ->
-    Al1 = flatmap(fun(Words) when is_integer(Words) ->
-                         [{words,Words}];
-                    ({alloc,List}) ->
-                         List
-                 end, Al0),
-    Al2 = sofs:relation(Al1),
-    Al3 = sofs:relation_to_family(Al2),
-    Al4 = sofs:to_external(Al3),
-    [{Tag,lists:sum(L)} || {Tag,L} <- Al4].
-
-%% live_opt/4.
-
 split_even([], Ss, Ds) ->
     {reverse(Ss),reverse(Ds)};
 split_even([S,D|Rs], Ss, Ds) ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index ca065295d6..7d908df3bf 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -479,16 +479,20 @@ valfun_1({try_case,Reg}, #vst{current=#st{ct=[Fail|Fails]}}=Vst0) ->
 	    error({bad_type,Type})
     end;
 valfun_1({get_list,Src,D1,D2}, Vst0) ->
+    assert_not_literal(Src),
     assert_type(cons, Src, Vst0),
     Vst = set_type_reg(term, Src, D1, Vst0),
     set_type_reg(term, Src, D2, Vst);
 valfun_1({get_hd,Src,Dst}, Vst) ->
+    assert_not_literal(Src),
     assert_type(cons, Src, Vst),
     set_type_reg(term, Src, Dst, Vst);
 valfun_1({get_tl,Src,Dst}, Vst) ->
+    assert_not_literal(Src),
     assert_type(cons, Src, Vst),
     set_type_reg(term, Src, Dst, Vst);
 valfun_1({get_tuple_element,Src,I,Dst}, Vst) ->
+    assert_not_literal(Src),
     assert_type({tuple_element,I+1}, Src, Vst),
     set_type_reg(term, Src, Dst, Vst);
 valfun_1({jump,{f,Lbl}}, Vst) ->
@@ -917,6 +921,7 @@ valfun_4(_, _) ->
     error(unknown_instruction).
 
 verify_get_map(Fail, Src, List, Vst0) ->
+    assert_not_literal(Src),                    %OTP 22.
     assert_type(map, Src, Vst0),
     Vst1 = foldl(fun(D, Vsti) ->
                          case is_reg_defined(D,Vsti) of
@@ -1466,6 +1471,10 @@ assert_term(Src, Vst) ->
     get_term_type(Src, Vst),
     ok.
 
+assert_not_literal({x,_}) -> ok;
+assert_not_literal({y,_}) -> ok;
+assert_not_literal(Literal) -> error({literal_not_allowed,Literal}).
+
 %% The possible types.
 %%
 %% First non-term types:
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index d894694c79..65c4f140c9 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -210,8 +210,11 @@ do_compile(Input, Opts0) ->
                               {error,Reason}
                       end
              end,
-    %% Dialyzer has already spawned workers.
-    case lists:member(dialyzer, Opts) of
+    %% Some tools, like Dialyzer, has already spawned workers
+    %% and spawning extra workers actually slow the compilation
+    %% down instead of speeding it up, so we provide a mechanism
+    %% to bypass the compiler process.
+    case lists:member(no_spawn_compiler_process, Opts) of
         true ->
             IntFun();
         false ->
@@ -823,6 +826,9 @@ kernel_passes() ->
      {unless,no_bsm_opt,{pass,beam_ssa_bsm}},
      {iff,dssabsm,{listing,"ssabsm"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
+     {unless,no_fun_opt,{pass,beam_ssa_funs}},
+     {iff,dssafuns,{listing,"ssafuns"}},
+     {iff,ssalint,{pass,beam_ssa_lint}},
      {unless,no_ssa_opt,{pass,beam_ssa_opt}},
      {iff,dssaopt,{listing,"ssaopt"}},
      {iff,ssalint,{pass,beam_ssa_lint}},
@@ -862,7 +868,9 @@ asm_passes() ->
        %% need to do a few clean-ups to code.
        {iff,no_postopt,[{pass,beam_clean}]},
 
+       {iff,diffable,?pass(diffable)},
        {pass,beam_z},
+       {iff,diffable,{listing,"S"}},
        {iff,dz,{listing,"z"}},
        {iff,dopt,{listing,"optimize"}},
        {iff,'S',{listing,"S"}},
@@ -1006,11 +1014,17 @@ parse_module(_Code, St0) ->
     end.
 
 do_parse_module(DefEncoding, #compile{ifile=File,options=Opts,dir=Dir}=St) ->
+    SourceName0 = proplists:get_value(source, Opts, File),
+    SourceName = case member(deterministic, Opts) of
+                     true -> filename:basename(SourceName0);
+                     false -> SourceName0
+                 end,
     R = epp:parse_file(File,
-		       [{includes,[".",Dir|inc_paths(Opts)]},
-			{macros,pre_defs(Opts)},
-			{default_encoding,DefEncoding},
-			extra]),
+                       [{includes,[".",Dir|inc_paths(Opts)]},
+                        {source_name, SourceName},
+                        {macros,pre_defs(Opts)},
+                        {default_encoding,DefEncoding},
+                        extra]),
     case R of
 	{ok,Forms,Extra} ->
 	    Encoding = proplists:get_value(encoding, Extra),
@@ -1914,6 +1928,39 @@ restore_expand_module([F|Fs]) ->
     [F|restore_expand_module(Fs)];
 restore_expand_module([]) -> [].
 
+%%%
+%%% Transform the BEAM code to make it more friendly for
+%%% diffing: using function names instead of labels for
+%%% local calls and number labels relative to each function.
+%%%
+
+diffable(Code0, St) ->
+    {Mod,Exp,Attr,Fs0,NumLabels} = Code0,
+    EntryLabels0 = [{Entry,{Name,Arity}} ||
+                       {function,Name,Arity,Entry,_} <- Fs0],
+    EntryLabels = maps:from_list(EntryLabels0),
+    Fs = [diffable_fix_function(F, EntryLabels) || F <- Fs0],
+    Code = {Mod,Exp,Attr,Fs,NumLabels},
+    {ok,Code,St}.
+
+diffable_fix_function({function,Name,Arity,Entry0,Is0}, LabelMap0) ->
+    Entry = maps:get(Entry0, LabelMap0),
+    {Is1,LabelMap} = diffable_label_map(Is0, 1, LabelMap0, []),
+    Fb = fun(Old) -> error({no_fb,Old}) end,
+    Is = beam_utils:replace_labels(Is1, [], LabelMap, Fb),
+    {function,Name,Arity,Entry,Is}.
+
+diffable_label_map([{label,Old}|Is], New, Map, Acc) ->
+    case Map of
+        #{Old:=NewLabel} ->
+            diffable_label_map(Is, New, Map, [{label,NewLabel}|Acc]);
+        #{} ->
+            diffable_label_map(Is, New+1, Map#{Old=>New}, [{label,New}|Acc])
+    end;
+diffable_label_map([I|Is], New, Map, Acc) ->
+    diffable_label_map(Is, New, Map, [I|Acc]);
+diffable_label_map([], _New, Map, Acc) ->
+    {Acc,Map}.
 
 -spec options() -> 'ok'.
 
@@ -2047,6 +2094,7 @@ pre_load() ->
 	 beam_ssa_bsm,
 	 beam_ssa_codegen,
 	 beam_ssa_dead,
+         beam_ssa_funs,
 	 beam_ssa_opt,
 	 beam_ssa_pre_codegen,
 	 beam_ssa_recv,
diff --git a/lib/compiler/src/compiler.app.src b/lib/compiler/src/compiler.app.src
index 7b802fdd62..86259270bd 100644
--- a/lib/compiler/src/compiler.app.src
+++ b/lib/compiler/src/compiler.app.src
@@ -39,6 +39,7 @@
              beam_ssa_bsm,
              beam_ssa_codegen,
              beam_ssa_dead,
+             beam_ssa_funs,
              beam_ssa_lint,
              beam_ssa_opt,
              beam_ssa_pp,
diff --git a/lib/compiler/src/erl_bifs.erl b/lib/compiler/src/erl_bifs.erl
index 71ab0e872a..ce9762899e 100644
--- a/lib/compiler/src/erl_bifs.erl
+++ b/lib/compiler/src/erl_bifs.erl
@@ -108,6 +108,7 @@ is_pure(erlang, list_to_atom, 1) -> true;
 is_pure(erlang, list_to_binary, 1) -> true;
 is_pure(erlang, list_to_float, 1) -> true;
 is_pure(erlang, list_to_integer, 1) -> true;
+is_pure(erlang, list_to_integer, 2) -> true;
 is_pure(erlang, list_to_pid, 1) -> true;
 is_pure(erlang, list_to_tuple, 1) -> true;
 is_pure(erlang, max, 2) -> true;