4 files changed, 457 insertions, 120 deletions
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 79c4b651db..fe1ce6f60b 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -37,13 +37,14 @@ function({function,Name,Arity,CLabel,Is0}) ->
 	%% Collect basic blocks and optimize them.
 	Is1 = blockify(Is0),
 	Is2 = embed_lines(Is1),
-	Is3 = move_allocates(Is2),
-	Is4 = beam_utils:live_opt(Is3),
-	Is5 = opt_blocks(Is4),
-	Is6 = beam_utils:delete_live_annos(Is5),
-
-	%% Done.
-	{function,Name,Arity,CLabel,Is6}
+        Is3 = beam_utils:anno_defs(Is2),
+        Is4 = move_allocates(Is3),
+        Is5 = beam_utils:live_opt(Is4),
+        Is6 = opt_blocks(Is5),
+        Is = beam_utils:delete_live_annos(Is6),
+
+        %% Done.
+        {function,Name,Arity,CLabel,Is}
     catch
         Class:Error:Stack ->
 	    io:fwrite("Function: ~w/~w\n", [Name,Arity]),
@@ -172,7 +173,7 @@ find_fixpoint(OptFun, Is0) ->
 %%  safe to assume that if x(N) is initialized, then all lower-numbered
 %%  x registers are also initialized.
 %%
-%%  For example, in general it is not safe to transform the following
+%%  For example, we must be careful when transforming the following
 %%  instructions:
 %%
 %%     get_tuple_element x(0) Element => x(1)
@@ -184,13 +185,9 @@ find_fixpoint(OptFun, Is0) ->
 %%     get_tuple_element x(0) Element => x(1)
 %%
 %%  The transformation is safe if and only if x(1) has been
-%%  initialized previously. Unfortunately, beam_reorder may have moved
-%%  a get_tuple_element instruction so that x(1) is not always
-%%  initialized when this code is reached. To find whether or not x(1)
-%%  is initialized, we would need to analyze all code preceding these
-%%  two instructions (across branches). Since we currently don't have
-%%  any practical mechanism for doing that, we will have to
-%%  conservatively assume that the transformation is unsafe.
+%%  initialized previously.  We will use the annotations added by
+%%  beam_utils:anno_defs/1 to determine whether x(a) has been
+%%  initialized.
 
 move_allocates([{block,Bl0}|Is]) ->
     Bl = move_allocates_1(reverse(Bl0), []),
@@ -199,15 +196,20 @@ move_allocates([I|Is]) ->
     [I|move_allocates(Is)];
 move_allocates([]) -> [].
 
+move_allocates_1([{'%def',_}|Is], Acc) ->
+    move_allocates_1(Is, Acc);
 move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
-    case {alloc_may_pass(I),alloc_live_regs(I, Live0)} of
-	{false,_} ->
-	    move_allocates_1(Is, [I|Acc0]);
-	{true,not_possible} ->
-	    move_allocates_1(Is, [I|Acc0]);
-	{true,Live} when is_integer(Live) ->
-	    A = {set,[],[],{alloc,Live,Info}},
-	    move_allocates_1(Is, [A,I|Acc])
+    case alloc_may_pass(I) of
+        false ->
+            move_allocates_1(Is, [I|Acc0]);
+        true ->
+            case alloc_live_regs(I, Is, Live0) of
+                not_possible ->
+                    move_allocates_1(Is, [I|Acc0]);
+                Live when is_integer(Live) ->
+                    A = {set,[],[],{alloc,Live,Info}},
+                    move_allocates_1(Is, [A,I|Acc])
+            end
     end;
 move_allocates_1([I|Is], Acc) ->
     move_allocates_1(Is, [I|Acc]);
@@ -471,16 +473,34 @@ count_ones(Bits, Acc) ->
 %% Calculate the new number of live registers when we move an allocate
 %% instruction upwards, passing a 'set' instruction.
 
-alloc_live_regs({set,Ds,Ss,_}, Regs0) ->
+alloc_live_regs({set,Ds,Ss,_}, Is, Regs0) ->
     Rset = x_live(Ss, x_dead(Ds, (1 bsl Regs0)-1)),
-    live_regs(0, Rset).
+    Live = live_regs(0, Rset),
+    case ensure_contiguous(Rset, Live) of
+        not_possible ->
+            %% Liveness information (looking forward in the
+            %% instruction stream) can't prove that moving this
+            %% allocation instruction is safe. Now use the annotation
+            %% of defined registers at the beginning of the current
+            %% block to see whether moving would be safe.
+            Def0 = defined_regs(Is, 0),
+            Def = Def0 band ((1 bsl Live) - 1),
+            ensure_contiguous(Rset bor Def, Live);
+        Live ->
+            %% Safe based on liveness information.
+            Live
+    end.
 
 live_regs(N, 0) ->
     N;
-live_regs(N, Regs) when Regs band 1 =:= 1 ->
-    live_regs(N+1, Regs bsr 1);
-live_regs(_, _) ->
-    not_possible.
+live_regs(N, Regs) ->
+    live_regs(N+1, Regs bsr 1).
+
+ensure_contiguous(Regs, Live) ->
+    case (1 bsl Live) - 1 of
+        Regs -> Live;
+        _ -> not_possible
+    end.
 
 x_dead([{x,N}|Rs], Regs) -> x_dead(Rs, Regs band (bnot (1 bsl N)));
 x_dead([_|Rs], Regs) -> x_dead(Rs, Regs);
@@ -489,3 +509,14 @@ x_dead([], Regs) -> Regs.
 x_live([{x,N}|Rs], Regs) -> x_live(Rs, Regs bor (1 bsl N));
 x_live([_|Rs], Regs) -> x_live(Rs, Regs);
 x_live([], Regs) -> Regs.
+
+%% defined_regs(ReversedInstructions) -> RegBitmap.
+%%  Given a reversed instruction stream, determine the
+%%  the registers that are defined.
+
+defined_regs([{'%def',Def}|_], Regs) ->
+    Def bor Regs;
+defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
+    x_live(Ds, Regs bor ((1 bsl Live) - 1));
+defined_regs([{set,Ds,_,_}|Is], Regs) ->
+    defined_regs(Is, x_live(Ds, Regs)).
diff --git a/lib/compiler/src/beam_record.erl b/lib/compiler/src/beam_record.erl
index e4775ca6a4..db1053e48c 100644
--- a/lib/compiler/src/beam_record.erl
+++ b/lib/compiler/src/beam_record.erl
@@ -15,19 +15,12 @@
 %%
 %% %CopyrightEnd%
 %%
-%% File:    beam_record.erl
-%% Author:  Björn-Egil Dahlberg
-%% Created: 2014-09-03
-%%
-
--module(beam_record).
--export([module/2]).
 
 %% Rewrite the instruction stream on tagged tuple tests.
-%% Tagged tuples means a tuple of any arity with an atom as its first element.
-%% Typically records, ok-tuples and error-tuples.
-%% 
-%% from:
+%% Tagged tuples means a tuple of any arity with an atom as its
+%% first element, such as records and error tuples.
+%%
+%% From:
 %%     ...
 %%     {test,is_tuple,Fail,[Src]}.
 %%     {test,test_arity,Fail,[Src,Sz]}.
@@ -36,13 +29,16 @@
 %%     ...
 %%     {test,is_eq_exact,Fail,[Dst,Atom]}.
 %%     ...
-%% to:
+%% To:
 %%     ...
 %%     {test,is_tagged_tuple,Fail,[Src,Sz,Atom]}.
 %%     ...
+%%
 
+-module(beam_record).
+-export([module/2]).
 
--import(lists, [reverse/1]).
+-import(lists, [reverse/1,reverse/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -51,55 +47,85 @@ module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
     Fs = [function(F) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
-function({function,Name,Arity,CLabel,Is}) ->
+function({function,Name,Arity,CLabel,Is0}) ->
     try
-        Idx = beam_utils:index_labels(Is),
-        {function,Name,Arity,CLabel,rewrite(Is,Idx)}
+        Is1 = beam_utils:anno_defs(Is0),
+        Idx = beam_utils:index_labels(Is1),
+        Is = rewrite(reverse(Is1), Idx),
+        {function,Name,Arity,CLabel,Is}
     catch
         Class:Error:Stack ->
             io:fwrite("Function: ~w/~w\n", [Name,Arity]),
             erlang:raise(Class, Error, Stack)
     end.
 
-rewrite(Is,Idx) ->
-    rewrite(Is,Idx,[]).
+rewrite(Is, Idx) ->
+    rewrite(Is, Idx, 0, []).
 
-rewrite([{test,is_tuple,Fail,[Src]}=I1,
-         {test,test_arity,Fail,[Src,N]}=I2|Is],Idx,Acc) ->
-    case is_tagged_tuple(Is,Fail,Src,Idx) of
+rewrite([{test,test_arity,Fail,[Src,N]}=TA,
+         {test,is_tuple,Fail,[Src]}=TT|Is], Idx, Def, Acc0) ->
+    case is_tagged_tuple(Acc0, Def, Fail, Src, Idx) of
         no ->
-            rewrite(Is,Idx,[I2,I1|Acc]);
-        {Atom,[{block,[]}|Is1]} ->
-            rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc]);
-        {Atom,Is1} ->
-            rewrite(Is1,Idx,[{test,is_tagged_tuple,Fail,[Src,N,Atom]}|Acc])
+            rewrite(Is, Idx, 0, [TT,TA|Acc0]);
+        {yes,Atom,Acc} ->
+            I = {test,is_tagged_tuple,Fail,[Src,N,Atom]},
+            rewrite(Is, Idx, Def, [I|Acc])
     end;
-rewrite([I|Is],Idx,Acc) ->
-    rewrite(Is,Idx,[I|Acc]);
-rewrite([],_,Acc) -> reverse(Acc).
-
-is_tagged_tuple([{block,[{set,[Dst],[Src],{get_tuple_element,0}}=B|Bs]},
-                 {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is],Fail,Src,Idx) ->
+rewrite([{block,[{'%def',Def}|Bl]}|Is], Idx, _Def, Acc) ->
+    rewrite(Is, Idx, Def, [{block,Bl}|Acc]);
+rewrite([{label,L}=I|Is], Idx0, Def, Acc) ->
+    Idx = beam_utils:index_label(L, Acc, Idx0),
+    rewrite(Is, Idx, Def, [I|Acc]);
+rewrite([I|Is], Idx, Def, Acc) ->
+    rewrite(Is, Idx, Def, [I|Acc]);
+rewrite([], _, _, Acc) -> Acc.
 
-    %% if Dst is killed in the instruction stream and at fail label,
-    %% we can safely remove get_tuple_element.
-    %%
-    %% if Dst is not killed in the stream, we cannot remove get_tuple_element
-    %% since it is referenced.
-
-    case is_killed(Dst,Is,Fail,Idx) of
-        true  -> {Atom,[{block,Bs}|Is]};
-        false -> {Atom,[{block,[B|Bs]}|Is]}
+is_tagged_tuple([{block,Bl},
+                 {test,is_eq_exact,Fail,[Dst,{atom,_}=Atom]}|Is],
+                Def, Fail, Src, Idx) ->
+    case is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, []) of
+        no ->
+            no;
+        {yes,[]} ->
+            {yes,Atom,Is};
+        {yes,[_|_]=Block} ->
+            {yes,Atom,[{block,Block}|Is]}
     end;
-is_tagged_tuple([{block,[{set,_,_,_}=B|Bs]},
-                 {test,is_eq_exact,_,_}=I|Is],Fail,Src,Idx) ->
-    case is_tagged_tuple([{block,Bs},I|Is],Fail,Src,Idx) of
-        {Atom,[{block,Bsr}|Isr]} -> {Atom,[{block,[B|Bsr]}|Isr]};
-        no                       -> no
+is_tagged_tuple(_, _, _, _, _) ->
+    no.
+
+is_tagged_tuple_1([{set,[Dst],[Src],{get_tuple_element,0}}=I|Bl],
+                  Is, Fail, Src, Dst, Idx, Def, Acc) ->
+    %% Check usage of Dst to find out whether the get_tuple_element
+    %% is needed.
+    case usage(Dst, Is, Fail, Idx) of
+        killed ->
+            %% Safe to remove the get_tuple_element instruction.
+            {yes,reverse(Acc, Bl)};
+        used ->
+            %% Actively used. Must keep instruction.
+            {yes,reverse(Acc, [I|Bl])};
+        not_used ->
+            %% Not actually used (but must be initialized).
+            case is_defined(Dst, Def) of
+                false ->
+                    %% Dst must be initialized, but the
+                    %% actual value does not matter.
+                    Kill = {set,[Dst],[nil],move},
+                    {yes,reverse(Acc, [Kill|Bl])};
+                true ->
+                    %% The register is previously initialized.
+                    %% We can remove the instruction.
+                    {yes,reverse(Acc, Bl)}
+            end
     end;
-is_tagged_tuple(_Is,_Fail,_Src,_Idx) ->
+is_tagged_tuple_1([I|Bl], Is, Fail, Src, Dst, Idx, Def, Acc) ->
+    is_tagged_tuple_1(Bl, Is, Fail, Src, Dst, Idx, Def, [I|Acc]);
+is_tagged_tuple_1(_, _, _, _, _, _, _, _) ->
     no.
 
-is_killed(Dst,Is,{_,Lbl},Idx) ->
-    beam_utils:is_killed(Dst,Is,Idx) andalso
-        beam_utils:is_killed_at(Dst,Lbl,Idx).
+usage(Dst, Is, Fail, Idx) ->
+    beam_utils:usage(Dst, [{test,is_number,Fail,[nil]}|Is], Idx).
+
+is_defined({x,X}, Def) ->
+    (Def bsr X) band 1 =:= 1.
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 941ae95110..e61d6a43b4 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -22,11 +22,13 @@
 
 -module(beam_utils).
 -export([is_killed_block/2,is_killed/3,is_killed_at/3,
-	 is_not_used/3,
+	 is_not_used/3,usage/3,
 	 empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
 	 code_at/2,bif_to_test/3,is_pure_test/1,
 	 live_opt/1,delete_live_annos/1,combine_heap_needs/2,
-	 split_even/1]).
+         anno_defs/1,
+	 split_even/1
+        ]).
 
 -export_type([code_index/0,module_code/0,instruction/0]).
 
@@ -60,6 +62,23 @@
 	{lbl :: code_index(),            %Label to code index.
 	 res :: result_cache()}).        %Result cache for each label.
 
+%% usage(Register, [Instruction], State) -> killed|not_used|used.
+%%  Determine the usage of Register in the instruction sequence.
+%%  The return value is one of:
+%%
+%%  killed   - The register is not used in any way.
+%%  not_used - The register is referenced only by an allocating instruction
+%%             (the actual value does not matter).
+%%  used     - The register is used (its value do matter).
+
+-spec usage(beam_asm:reg(), [instruction()], code_index()) ->
+  'killed' | 'not_used' | 'used'.
+
+usage(R, Is, D) ->
+    St = #live{lbl=D,res=gb_trees:empty()},
+    {Usage,_} = check_liveness(R, Is, St),
+    Usage.
+
 
 %% is_killed_block(Register, [Instruction]) -> true|false
 %%  Determine whether a register is killed by the instruction sequence inside
@@ -293,6 +312,23 @@ combine_heap_needs(H1, H2) ->
     combine_alloc_lists([H1,H2]).
 
 
+%% anno_defs(Instructions) -> Instructions'
+%%  Add {'%def',RegisterBitmap} annotations to the beginning of
+%%  each block.  Iff bit X is set in the the bitmap, it means
+%%  that {x,X} is defined when the block is entered.
+
+-spec anno_defs([instruction()]) -> [instruction()].
+
+anno_defs(Is0) ->
+    {Bef,[Fi|Is1]} =
+	splitwith(fun({func_info,_,_,_}) -> false;
+		     (_) -> true
+		  end, Is0),
+    {func_info,_,_,Arity} = Fi,
+    Regs = init_def_regs(Arity),
+    Is = defs(Is1, Regs, #{}),
+    Bef ++ [Fi|Is].
+
 %% split_even/1
 %% [1,2,3,4,5,6] -> {[1,3,5],[2,4,6]}
 
@@ -300,7 +336,6 @@ combine_heap_needs(H1, H2) ->
 
 split_even(Rs) -> split_even(Rs, [], []).
 
-
 %%%
 %%% Local functions.
 %%%
@@ -318,6 +353,10 @@ check_liveness(R, [{block,Blk}|Is], St0) ->
     case check_liveness_block(R, Blk, St0) of
 	{transparent,St1} ->
 	    check_liveness(R, Is, St1);
+	{alloc_used,St1} ->
+            %% Used by an allocating instruction, but value not referenced.
+            %% Must check the rest of the instructions.
+	    not_used(check_liveness(R, Is, St1));
 	{Other,_}=Res when is_atom(Other) ->
 	    Res
     end;
@@ -376,9 +415,16 @@ check_liveness(R, [{bs_init,_,_,none,Ss,Dst}|Is], St) ->
 check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
     case R of
 	{x,X} ->
-	    case X < Live orelse member(R, Ss) of
-		true -> {used,St};
-		false -> {killed,St}
+            case member(R, Ss) of
+                true ->
+                    {used,St};
+                false ->
+                    if
+                        X < Live ->
+                            not_used(check_liveness(R, Is, St));
+                        true ->
+                            {killed,St}
+                    end
 	    end;
 	{y,_} ->
 	    case member(R, Ss) of
@@ -588,7 +634,7 @@ check_liveness_ret(R, R, St) -> {used,St};
 check_liveness_ret(_, _, St) -> {killed,St}.
 
 %% check_liveness_block(Reg, [Instruction], State) ->
-%%     {killed | not_used | used | transparent,State'}
+%%     {killed | not_used | used | alloc_used | transparent,State'}
 %%  Finds out how Reg is used in the instruction sequence inside a block.
 %%  Returns one of:
 %%    killed - Reg is assigned a new value or killed by an
@@ -596,6 +642,7 @@ check_liveness_ret(_, _, St) -> {killed,St}.
 %%    not_used - The value is not used, but the register is referenced
 %%       e.g. by an allocation instruction
 %%    transparent - Reg is neither used nor killed
+%%    alloc_used - Used only in an allocate instruction
 %%    used - Reg is explicitly used by an instruction
 %%
 %%  '%live' annotations are not allowed.
@@ -609,7 +656,7 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
 	true ->
 	    case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
 		{killed,St} -> {not_used,St};
-		{transparent,St} -> {not_used,St};
+                {transparent,St} -> {alloc_used,St};
 		{_,_}=Res -> Res
 	    end
     end;
@@ -949,3 +996,208 @@ split_even([], Ss, Ds) ->
     {reverse(Ss),reverse(Ds)};
 split_even([S,D|Rs], Ss, Ds) ->
     split_even(Rs, [S|Ss], [D|Ds]).
+
+%%%
+%%% Add annotations for defined registers.
+%%%
+%%% This analysis is done by scanning the instructions in
+%%% execution order.
+%%%
+
+defs([{apply,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{bif,_,{f,Fail},_Src,Dst}=I|Is], Regs0, D) ->
+    Regs = def_regs([Dst], Regs0),
+    [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([{block,Block0}|Is], Regs0, D0) ->
+    {Block,Regs,D} = defs_list(Block0, Regs0, D0),
+    [{block,[{'%def',Regs0}|Block]}|defs(Is, Regs, D)];
+defs([{bs_init,{f,L},_,_,_,Dst}=I|Is], Regs0, D) ->
+    Regs = def_regs([Dst], Regs0),
+    [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([{bs_put,{f,L},_,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([build_stacktrace=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{call,_,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{call_ext,_,{extfunc,M,F,A}}=I|Is], _Regs, D) ->
+    case erl_bifs:is_exit_bif(M, F, A) of
+        false ->
+            [I|defs(Is, 1, D)];
+        true ->
+            [I|defs_unreachable(Is, D)]
+    end;
+defs([{call_ext,_,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{call_fun,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{'catch',_,{f,L}}=I|Is], Regs, D) ->
+    RegsAtLabel = init_def_regs(1),
+    [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))];
+defs([{catch_end,_}=I|Is], _Regs, D) ->
+    Regs = init_def_regs(1),
+    [I|defs(Is, Regs, D)];
+defs([{gc_bif,_,{f,Fail},Live,_Src,Dst}=I|Is], Regs0, D) ->
+    true = all_defined(Live, Regs0),            %Assertion.
+    Regs = def_regs([Dst], init_def_regs(Live)),
+    [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([{get_map_elements,{f,L},_Src,{list,DstList}}=I|Is], Regs0, D) ->
+    {_,Ds} = beam_utils:split_even(DstList),
+    Regs = def_regs(Ds, Regs0),
+    [I|defs(Is, Regs, update_regs(L, Regs0, D))];
+defs([{get_tuple_element,_,_,Dst}=I|Is], Regs0, D) ->
+    Regs = def_regs([Dst], Regs0),
+    [I|defs(Is, Regs, D)];
+defs([{jump,{f,L}}=I|Is], Regs, D) ->
+    [I|defs_unreachable(Is, update_regs(L, Regs, D))];
+defs([{label,L}=I|Is], Regs0, D) ->
+    case D of
+        #{L:=Regs1} ->
+            Regs = Regs0 band Regs1,
+            [I|defs(Is, Regs, D)];
+        #{} ->
+            [I|defs(Is, Regs0, D)]
+    end;
+defs([{loop_rec,{f,L},{x,0}}=I|Is], _Regs, D0) ->
+    RegsAtLabel = init_def_regs(0),
+    D = update_regs(L, RegsAtLabel, D0),
+    [I|defs(Is, init_def_regs(1), D)];
+defs([{loop_rec_end,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 0, D)];
+defs([{make_fun2,_,_,_,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
+defs([{move,_,Dst}=I|Is], Regs0, D) ->
+    Regs = def_regs([Dst], Regs0),
+    [I|defs(Is, Regs, D)];
+defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) ->
+    Regs = def_regs([Dst], Regs0),
+    [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([return=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) ->
+    D = update_list([Fail|List], Regs, D0),
+    [I|defs_unreachable(Is, D)];
+defs([{test,_,{f,L},_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, update_regs(L, Regs, D))];
+defs([{test,_,{f,L},Live,_,Dst}=I|Is], Regs0, D) ->
+    true = all_defined(Live, Regs0),             %Assertion.
+    Regs = def_regs([Dst], init_def_regs(Live)),
+    [I|defs(Is, Regs, update_regs(L, Regs0, D))];
+defs([{'try',_,{f,L}}=I|Is], Regs, D) ->
+    RegsAtLabel = init_def_regs(3),
+    [I|defs(Is, Regs, update_regs(L, RegsAtLabel, D))];
+defs([{try_case,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, init_def_regs(3), D)];
+defs([{wait,_}=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+defs([{wait_timeout,_,_}=I|Is], _Regs, D) ->
+    [I|defs(Is, 0, D)];
+
+%% Exceptions.
+defs([{badmatch,_}=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+defs([{case_end,_}=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+defs([if_end=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+defs([{try_case_end,_}=I|Is], _Regs, D) ->
+    [I|defs_unreachable(Is, D)];
+
+%% Neutral instructions
+defs([{bs_context_to_binary,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{bs_restore2,_,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{bs_save2,_,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{deallocate,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{kill,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{line,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{recv_mark,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{recv_set,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([timeout=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{trim,_,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{try_end,_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([{'%',_}=I|Is], Regs, D) ->
+    [I|defs(Is, Regs, D)];
+defs([], _, _) -> [].
+
+defs_unreachable([{label,L}=I|Is], D) ->
+    case D of
+        #{L:=Regs} ->
+            [I|defs(Is, Regs, D)];
+        #{} ->
+            defs_unreachable(Is, D)
+    end;
+defs_unreachable([_|Is], D) ->
+    defs_unreachable(Is, D);
+defs_unreachable([], _D) -> [].
+
+defs_list(Is, Regs, D) ->
+    defs_list(Is, Regs, D, []).
+
+defs_list([{set,Ds,_,{alloc,Live,Info}}=I|Is], Regs0, D0, Acc) ->
+    true = all_defined(Live, Regs0),             %Assertion.
+    D = case Info of
+            {gc_bif,_,{f,Fail}} ->
+                update_regs(Fail, Regs0, D0);
+            {put_map,_,{f,Fail}} ->
+                update_regs(Fail, Regs0, D0);
+            _ ->
+                D0
+        end,
+    Regs = def_regs(Ds, init_def_regs(Live)),
+    defs_list(Is, Regs, D, [I|Acc]);
+defs_list([{set,Ds,_,Info}=I|Is], Regs0, D0, Acc) ->
+    D = case Info of
+            {bif,_,{f,Fail}} ->
+                update_regs(Fail, Regs0, D0);
+            {try_catch,'catch',{f,Fail}} ->
+                update_regs(Fail, init_def_regs(1), D0);
+            {try_catch,'try',{f,Fail}} ->
+                update_regs(Fail, init_def_regs(3), D0);
+            _ ->
+                D0
+        end,
+    Regs = def_regs(Ds, Regs0),
+    defs_list(Is, Regs, D, [I|Acc]);
+defs_list([], Regs, D, Acc) ->
+    {reverse(Acc),Regs,D}.
+
+init_def_regs(Arity) ->
+    (1 bsl Arity) - 1.
+
+def_regs([{x,X}|T], Regs) ->
+    def_regs(T, Regs bor (1 bsl X));
+def_regs([_|T], Regs) ->
+    def_regs(T, Regs);
+def_regs([], Regs) -> Regs.
+
+update_list([{f,L}|T], Regs, D0) ->
+    D = update_regs(L, Regs, D0),
+    update_list(T, Regs, D);
+update_list([_|T], Regs, D) ->
+    update_list(T, Regs, D);
+update_list([], _Regs, D) -> D.
+
+update_regs(L, Regs0, D) ->
+    case D of
+        #{L:=Regs1} ->
+            Regs = Regs0 band Regs1,
+            D#{L:=Regs};
+        #{} ->
+            D#{L=>Regs0}
+    end.
+
+all_defined(Live, Regs) ->
+    All = (1 bsl Live) - 1,
+    Regs band All =:= All.
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 8189420c1c..62c6c54b9f 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -446,7 +446,7 @@ cg_fun(Les, Hvs, Vdb, AtomMod, NameArity, Anno, St0) ->
 					   put_reg(V, Reg)
 				   end, [], Hvs),
 			 stk=[]}, 0, Vdb),
-    {B,_Aft,St} = cg_list(Les, 0, Vdb, Bef,
+    {B,_Aft,St} = cg_list(Les, Vdb, Bef,
 			  St3#cg{bfail=0,
 				 ultimate_failure=UltimateMatchFail,
 				 is_top_block=true}),
@@ -498,19 +498,19 @@ cg(#cg_need_heap{h=H}, _Vdb, Bef, St) ->
 
 %% cg_list([Kexpr], FirstI, Vdb, StackReg, St) -> {[Ainstr],StackReg,St}.
 
-cg_list(Kes, I, Vdb, Bef, St0) ->
+cg_list(Kes, Vdb, Bef, St0) ->
     {Keis,{Aft,St1}} =
 	flatmapfoldl(fun (Ke, {Inta,Sta}) ->
 			     {Keis,Intb,Stb} = cg(Ke, Vdb, Inta, Sta),
 			     {Keis,{Intb,Stb}}
-		     end, {Bef,St0}, need_heap(Kes, I)),
+		     end, {Bef,St0}, need_heap(Kes)),
     {Keis,Aft,St1}.
 
 %% need_heap([Lkexpr], I, St) -> [Lkexpr].
 %%  Insert need_heap instructions in Kexpr list.  Try to be smart and
 %%  collect them together as much as possible.
 
-need_heap(Kes0, _I) ->
+need_heap(Kes0) ->
     {Kes,H} = need_heap_0(reverse(Kes0), 0, []),
 
     %% Prepend need_heap if necessary.
@@ -741,26 +741,55 @@ block_cg(Es, Le, _Vdb, Bef, St) ->
     block_cg(Es, Le, Bef, St).
 
 block_cg(Es, Le, Bef, #cg{is_top_block=false}=St) ->
-    cg_block(Es, Le#l.i, Le#l.vdb, Bef, St);
-block_cg(Es, Le, Bef, St0) ->
-    {Is0,Aft,St} = cg_block(Es, Le#l.i, Le#l.vdb, Bef,
-			    St0#cg{is_top_block=false,need_frame=false}),
-    Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
-    {Is,Aft,St#cg{is_top_block=true}}.
-
-cg_block([], _I, _Vdb, Bef, St0) ->
+    cg_block(Es, Le#l.vdb, Bef, St);
+block_cg(Es, Le, Bef, #cg{is_top_block=true}=St0) ->
+    %% No stack frame has been established yet. Do we need one?
+    case need_stackframe(Es) of
+        true ->
+            %% We need a stack frame. Generate the code and add the
+            %% code for creating and deallocating the stack frame.
+            {Is0,Aft,St} = cg_block(Es, Le#l.vdb, Bef,
+                                    St0#cg{is_top_block=false,need_frame=false}),
+            Is = top_level_block(Is0, Aft, max_reg(Bef#sr.reg), St),
+            {Is,Aft,St#cg{is_top_block=true}};
+        false ->
+            %% This sequence of instructions ending in a #k_match{} (a
+            %% 'case' or 'if') in the Erlang code does not need a
+            %% stack frame yet. Delay the creation (if a stack frame
+            %% is needed at all, it will be created inside the
+            %% #k_match{}).
+            cg_list(Es, Le#l.vdb, Bef, St0)
+    end.
+
+%% need_stackframe([Kexpr]) -> true|false.
+%%  Does this list of instructions need a stack frame?
+%%
+%%  A sequence of instructions that don't clobber the X registers
+%%  followed by a single #k_match{} doesn't need a stack frame.
+
+need_stackframe([H|T]) ->
+    case H of
+        #k_bif{op=#k_internal{}} -> true;
+        #k_put{arg=#k_binary{}} -> true;
+        #k_bif{} -> need_stackframe(T);
+        #k_put{} -> need_stackframe(T);
+        #k_guard_match{} -> need_stackframe(T);
+        #k_match{} when T =:= [] -> false;
+        _ -> true
+    end;
+need_stackframe([]) -> false.
+
+cg_block([], _Vdb, Bef, St0) ->
     {[],Bef,St0};
-cg_block(Kes0, I, Vdb, Bef, St0) ->
+cg_block(Kes0, Vdb, Bef, St0) ->
     {Kes2,Int1,St1} =
 	case basic_block(Kes0) of
 	    {Kes1,LastI,Args,Rest} ->
-		Ke = hd(Kes1),
-                #l{i=Fb} = get_kanno(Ke),
-		cg_basic_block(Kes1, Fb, LastI, Args, Vdb, Bef, St0);
+		cg_basic_block(Kes1, LastI, Args, Vdb, Bef, St0);
 	    {Kes1,Rest} ->
-		cg_list(Kes1, I, Vdb, Bef, St0)
+		cg_list(Kes1, Vdb, Bef, St0)
 	end,
-    {Kes3,Int2,St2} = cg_block(Rest, I, Vdb, Int1, St1),
+    {Kes3,Int2,St2} = cg_block(Rest, Vdb, Int1, St1),
     {Kes2 ++ Kes3,Int2,St2}.
 
 basic_block(Kes) -> basic_block(Kes, []).
@@ -838,12 +867,12 @@ func_vars(_) -> [].
 %%  save_carefully/4 during code generation to only save the variables
 %%  that can be saved without growing the stack frame.
 
-cg_basic_block(Kes, Fb, Lf, As, Vdb, Bef, St0) ->
+cg_basic_block(Kes, Lf, As, Vdb, Bef, St0) ->
     Int0 = reserve_arg_regs(As, Bef),
     Int = extend_stack(Int0, Lf, Lf+1, Vdb),
     {Keis,{Aft,St1}} =
 	flatmapfoldl(fun(Ke, St) -> cg_basic_block(Ke, St, Lf, Vdb) end,
-		     {Int,St0}, need_heap(Kes, Fb)),
+		     {Int,St0}, need_heap(Kes)),
     {Keis,Aft,St1}.
 
 cg_basic_block(#cg_need_heap{}=Ke, {Bef,St0}, _Lf, Vdb) ->
@@ -1478,8 +1507,8 @@ guard_clause_cg(#k_guard_clause{anno=#l{vdb=Vdb},guard=G,body=B}, Fail, Bef, St0
 %%  the correct exit point.  Primops and tests all go to the next
 %%  instruction on success or jump to a failure label.
 
-guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{i=I,vdb=Pdb}}, Fail, _Vdb, Bef, St) ->
-    protected_cg(Ts, Rs, Fail, I, Pdb, Bef, St);
+guard_cg(#k_protected{arg=Ts,ret=Rs,anno=#l{vdb=Pdb}}, Fail, _Vdb, Bef, St) ->
+    protected_cg(Ts, Rs, Fail, Pdb, Bef, St);
 guard_cg(#k_test{anno=#l{i=I},op=Test0,args=As,inverted=Inverted},
          Fail, Vdb, Bef, St0) ->
     #k_remote{mod=#k_atom{val=erlang},name=#k_atom{val=Test}} = Test0,
@@ -1500,13 +1529,13 @@ guard_cg(G, _Fail, Vdb, Bef, St) ->
 %% guard_cg_list([Kexpr], Fail, I, Vdb, StackReg, St) ->
 %%      {[Ainstr],StackReg,St}.
 
-guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) ->
+guard_cg_list(Kes, Fail, Vdb, Bef, St0) ->
     {Keis,{Aft,St1}} =
 	flatmapfoldl(fun (Ke, {Inta,Sta}) ->
 			     {Keis,Intb,Stb} =
 				 guard_cg(Ke, Fail, Vdb, Inta, Sta),
 			     {Keis,{Intb,Stb}}
-		     end, {Bef,St0}, need_heap(Kes, I)),
+		     end, {Bef,St0}, need_heap(Kes)),
     {Keis,Aft,St1}.
 
 %% protected_cg([Kexpr], [Ret], Fail, I, Vdb, Bef, St) -> {[Ainstr],Aft,St}.
@@ -1516,15 +1545,14 @@ guard_cg_list(Kes, Fail, I, Vdb, Bef, St0) ->
 %%  return values then these must be set to 'false' on failure,
 %%  control always passes to the next instruction.
 
-protected_cg(Ts, [], Fail, I, Vdb, Bef, St0) ->
+protected_cg(Ts, [], Fail, Vdb, Bef, St0) ->
     %% Protect these calls, revert when done.
-    {Tis,Aft,St1} = guard_cg_list(Ts, Fail, I, Vdb, Bef,
-				  St0#cg{bfail=Fail}),
+    {Tis,Aft,St1} = guard_cg_list(Ts, Fail, Vdb, Bef, St0#cg{bfail=Fail}),
     {Tis,Aft,St1#cg{bfail=St0#cg.bfail}};
-protected_cg(Ts, Rs, _Fail, I, Vdb, Bef, St0) ->
+protected_cg(Ts, Rs, _Fail, Vdb, Bef, St0) ->
     {Pfail,St1} = new_label(St0),
     {Psucc,St2} = new_label(St1),
-    {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, I, Vdb, Bef,
+    {Tis,Aft,St3} = guard_cg_list(Ts, Pfail, Vdb, Bef,
 				  St2#cg{bfail=Pfail}),
     %%ok = io:fwrite("cg ~w: ~p~n", [?LINE,{Rs,I,Vdb,Aft}]),
     %% Set return values to false.
@@ -1896,17 +1924,17 @@ cg_recv_wait(#k_atom{val=infinity}, #cg_block{anno=Le,es=Tes}, I, Bef, St0) ->
     %% But to keep the stack and register information up to date,
     %% we will generate the code for the 'after' body, and then discard it.
     Int1 = clear_dead(Bef, I, Le#l.vdb),
-    {_,Int2,St1} = cg_block(Tes, Le#l.i, Le#l.vdb,
+    {_,Int2,St1} = cg_block(Tes, Le#l.vdb,
                             Int1#sr{reg=clear_regs(Int1#sr.reg)}, St0),
     {[{wait,{f,St1#cg.recv}}],Int2,St1};
 cg_recv_wait(#k_int{val=0}, #cg_block{anno=Le,es=Tes}, _I, Bef, St0) ->
-    {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb, Bef, St0),
+    {Tis,Int,St1} = cg_block(Tes, Le#l.vdb, Bef, St0),
     {[timeout|Tis],Int,St1};
 cg_recv_wait(Te, #cg_block{anno=Le,es=Tes}, I, Bef, St0) ->
     Reg = cg_reg_arg(Te, Bef),
     %% Must have empty registers here!  Bug if anything in registers.
     Int0 = clear_dead(Bef, I, Le#l.vdb),
-    {Tis,Int,St1} = cg_block(Tes, Le#l.i, Le#l.vdb,
+    {Tis,Int,St1} = cg_block(Tes, Le#l.vdb,
 			     Int0#sr{reg=clear_regs(Int0#sr.reg)}, St0),
     {[{wait_timeout,{f,St1#cg.recv},Reg},timeout] ++ Tis,Int,St1}.
 
@@ -1967,7 +1995,7 @@ catch_cg(#cg_block{es=C}, #k_var{name=R}, Le, Vdb, Bef, St0) ->
     CatchTag = Le#l.i,
     Int1 = Bef#sr{stk=put_catch(CatchTag, Bef#sr.stk)},
     CatchReg = fetch_stack({catch_tag,CatchTag}, Int1#sr.stk),
-    {Cis,Int2,St2} = cg_block(C, Le#l.i, Le#l.vdb, Int1,
+    {Cis,Int2,St2} = cg_block(C, Le#l.vdb, Int1,
 			      St1#cg{break=B,in_catch=true}),
     [] = Int2#sr.reg,				%Assertion.
     Aft = Int2#sr{reg=[{0,R}],stk=drop_catch(CatchTag, Int2#sr.stk)},