19 files changed, 924 insertions, 446 deletions
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index 7df2edd714..91acb19971 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -61,6 +61,14 @@ rename_instrs([{'%live',_}|Is]) ->
     %% Ignore old type of live annotation. Only happens when compiling
     %% from very old .S files.
     rename_instrs(Is);
+rename_instrs([{get_list,S,D1,D2}|Is]) ->
+    %% Only happens when compiling from old .S files.
+    if
+        D1 =:= S ->
+            [{get_tl,S,D2},{get_hd,S,D1}|rename_instrs(Is)];
+        true ->
+            [{get_hd,S,D1},{get_tl,S,D2}|rename_instrs(Is)]
+    end;
 rename_instrs([I|Is]) ->
     [rename_instr(I)|rename_instrs(Is)];
 rename_instrs([]) -> [].
diff --git a/lib/compiler/src/beam_asm.erl b/lib/compiler/src/beam_asm.erl
index 453e00fce3..5ef340c831 100644
--- a/lib/compiler/src/beam_asm.erl
+++ b/lib/compiler/src/beam_asm.erl
@@ -407,7 +407,17 @@ encode_arg({atom, Atom}, Dict0) when is_atom(Atom) ->
     {Index, Dict} = beam_dict:atom(Atom, Dict0),
     {encode(?tag_a, Index), Dict};
 encode_arg({integer, N}, Dict) ->
-    {encode(?tag_i, N), Dict};
+    %% Conservatily assume that all integers whose absolute
+    %% value is greater than 1 bsl 128 will be bignums in
+    %% the runtime system.
+    if
+        N >= 1 bsl 128 ->
+            encode_arg({literal, N}, Dict);
+        N =< -(1 bsl 128) ->
+            encode_arg({literal, N}, Dict);
+        true ->
+            {encode(?tag_i, N), Dict}
+    end;
 encode_arg(nil, Dict) ->
     {encode(?tag_a, 0), Dict};
 encode_arg({f, W}, Dict) ->
@@ -465,7 +475,7 @@ encode_alloc_list_1([{floats,Floats}|T], Dict, Acc0) ->
 encode_alloc_list_1([], Dict, Acc) ->
     {iolist_to_binary(Acc),Dict}.
 
--spec encode(non_neg_integer(), pos_integer()) -> iodata().
+-spec encode(non_neg_integer(), integer()) -> iodata().
 
 encode(Tag, N) when N < 0 ->
     encode1(Tag, negative_to_bytes(N));
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index 39ae8d5347..8cd271e1dc 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -36,19 +36,18 @@ module({Mod,Exp,Attr,Fs0,Lc}, Opts) ->
 function({function,Name,Arity,CLabel,Is0}, Blockify) ->
     try
 	%% Collect basic blocks and optimize them.
-        Is2 = case Blockify of
-                  true ->
-                      Is1 = blockify(Is0),
-                      embed_lines(Is1);
-                  false ->
-                      Is0
+        Is1 = case Blockify of
+                  false -> Is0;
+                  true -> blockify(Is0)
               end,
-        Is3 = beam_utils:anno_defs(Is2),
-        Is4 = move_allocates(Is3),
-        Is5 = beam_utils:live_opt(Is4),
-        Is6 = opt_blocks(Is5),
-        Is7 = beam_utils:delete_annos(Is6),
-        Is = opt_allocs(Is7),
+        Is2 = embed_lines(Is1),
+        Is3 = local_cse(Is2),
+        Is4 = beam_utils:anno_defs(Is3),
+        Is5 = move_allocates(Is4),
+        Is6 = beam_utils:live_opt(Is5),
+        Is7 = opt_blocks(Is6),
+        Is8 = beam_utils:delete_annos(Is7),
+        Is = opt_allocs(Is8),
 
         %% Done.
         {function,Name,Arity,CLabel,Is}
@@ -109,7 +108,8 @@ collect({put_tuple,A,D})     -> {set,[D],[],{put_tuple,A}};
 collect({put,S})             -> {set,[],[S],put};
 collect({get_tuple_element,S,I,D}) -> {set,[D],[S],{get_tuple_element,I}};
 collect({set_tuple_element,S,D,I}) -> {set,[],[S,D],{set_tuple_element,I}};
-collect({get_list,S,D1,D2})  -> {set,[D1,D2],[S],get_list};
+collect({get_hd,S,D})  ->       {set,[D],[S],get_hd};
+collect({get_tl,S,D})  ->       {set,[D],[S],get_tl};
 collect(remove_message)      -> {set,[],[],remove_message};
 collect({put_map,F,Op,S,D,R,{list,Puts}}) ->
     {set,[D],[S|Puts],{alloc,R,{put_map,Op,F}}};
@@ -137,6 +137,11 @@ embed_lines([{block,B2},{line,_}=Line,{block,B1}|T], Acc) ->
 embed_lines([{block,B1},{line,_}=Line|T], Acc) ->
     B = {block,[{set,[],[],Line}|B1]},
     embed_lines([B|T], Acc);
+embed_lines([{block,B2},{block,B1}|T], Acc) ->
+    %% This can only happen when beam_block is run for
+    %% the second time.
+    B = {block,B1++B2},
+    embed_lines([B|T], Acc);
 embed_lines([I|Is], Acc) ->
     embed_lines(Is, [I|Acc]);
 embed_lines([], Acc) -> Acc.
@@ -204,7 +209,7 @@ move_allocates([]) -> [].
 
 move_allocates_1([{'%anno',_}|Is], Acc) ->
     move_allocates_1(Is, Acc);
-move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
+move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info0}}|Acc]=Acc0) ->
     case alloc_may_pass(I) of
         false ->
             move_allocates_1(Is, [I|Acc0]);
@@ -213,6 +218,7 @@ move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
                 not_possible ->
                     move_allocates_1(Is, [I|Acc0]);
                 Live when is_integer(Live) ->
+                    Info = safe_info(Info0),
                     A = {set,[],[],{alloc,Live,Info}},
                     move_allocates_1(Is, [A,I|Acc])
             end
@@ -221,17 +227,25 @@ move_allocates_1([I|Is], Acc) ->
     move_allocates_1(Is, [I|Acc]);
 move_allocates_1([], Acc) -> Acc.
 
+alloc_may_pass({set,_,[{fr,_}],fmove}) -> false;
 alloc_may_pass({set,_,_,{alloc,_,_}}) -> false;
 alloc_may_pass({set,_,_,{set_tuple_element,_}}) -> false;
 alloc_may_pass({set,_,_,put_list}) -> false;
 alloc_may_pass({set,_,_,put}) -> false;
 alloc_may_pass({set,_,_,_}) -> true.
 
+safe_info({nozero,Stack,Heap,_}) ->
+    %% nozero is not safe if the allocation instruction is moved
+    %% upwards past an instruction that may throw an exception
+    %% (such as element/2).
+    {zero,Stack,Heap,[]};
+safe_info(Info) -> Info.
+
 %% opt([Instruction]) -> [Instruction]
 %%  Optimize the instruction stream inside a basic block.
 
 opt([{set,[X],[X],move}|Is]) -> opt(Is);
-opt([{set,[X],_,move},{set,[X],_,move}=I|Is]) ->
+opt([{set,[Dst],_,move},{set,[Dst],[Src],move}=I|Is]) when Dst =/= Src ->
     opt([I|Is]);
 opt([{set,[{x,0}],[S1],move}=I1,{set,[D2],[{x,0}],move}|Is]) ->
     opt([I1,{set,[D2],[S1],move}|Is]);
@@ -250,6 +264,16 @@ opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1,
      {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is])
   when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
     opt([I2,I1|Is]);
+opt([{set,Hd0,Cons,get_hd}=GetHd,
+     {set,Tl0,Cons,get_tl}=GetTl|Is0]) ->
+    case {opt_moves(Hd0, [GetTl|Is0]),opt_moves(Tl0, [GetHd|Is0])} of
+        {{Hd0,Is},{Tl0,_}} ->
+            [GetHd|opt(Is)];
+        {{Hd,Is},{Tl0,_}} ->
+            [{set,Hd,Cons,get_hd}|opt(Is)];
+        {{_,_},{Tl,Is}} ->
+            [{set,Tl,Cons,get_tl}|opt(Is)]
+    end;
 opt([{set,Ds0,Ss,Op}|Is0]) ->
     {Ds,Is} = opt_moves(Ds0, Is0),
     [{set,Ds,Ss,Op}|opt(Is)];
@@ -265,17 +289,6 @@ opt_moves([D0]=Ds, Is0) ->
     case opt_move(D0, Is0) of
 	not_possible -> {Ds,Is0};
 	{D1,Is} -> {[D1],Is}
-    end;
-opt_moves([X0,Y0], Is0) ->
-    {X,Is2} = case opt_move(X0, Is0) of
-		  not_possible -> {X0,Is0};
-		  {Y0,_} -> {X0,Is0};
-		  {_X1,_Is1} = XIs1 -> XIs1
-	      end,
-    case opt_move(Y0, Is2) of
-	not_possible -> {[X,Y0],Is2};
-	{X,_} -> {[X,Y0],Is2};
-	{Y,Is} -> {[X,Y],Is}
     end.
 
 %% opt_move(Dest, [Instruction]) -> {UpdatedDest,[Instruction]} | not_possible
@@ -289,7 +302,7 @@ opt_move(Dest, Is) ->
 opt_move_1(R, [{set,[D],[R],move}|Is0], Acc) ->
     %% Provided that the source register is killed by instructions
     %% that follow, the optimization is safe.
-    case eliminate_use_of_from_reg(Is0, R, D, []) of
+    case eliminate_use_of_from_reg(Is0, R, D) of
 	{yes,Is} -> opt_move_rev(D, Acc, Is);
 	no -> not_possible
     end;
@@ -347,13 +360,21 @@ opt_tuple_element_1([{set,_,_,{alloc,_,_}}|_], _, _, _) ->
 opt_tuple_element_1([{set,_,_,{try_catch,_,_}}|_], _, _, _) ->
     no;
 opt_tuple_element_1([{set,[D],[S],move}|Is0], I0, {_,S}, Acc) ->
-    case eliminate_use_of_from_reg(Is0, S, D, []) of
+    case eliminate_use_of_from_reg(Is0, S, D) of
 	no ->
 	    no;
-	{yes,Is} ->
+	{yes,Is1} ->
 	    {set,[S],Ss,Op} = I0,
 	    I = {set,[D],Ss,Op},
-	    {yes,reverse(Acc, [I|Is])}
+            case opt_move_rev(S, Acc, [I|Is1]) of
+                not_possible ->
+                    %% Not safe because the move of the
+                    %% get_tuple_element instruction would cause the
+                    %% result of a previous instruction to be ignored.
+                    no;
+                {_,Is} ->
+                    {yes,Is}
+            end
     end;
 opt_tuple_element_1([{set,Ds,Ss,_}=I|Is], MovedI, {S,D}=Regs, Acc) ->
     case member(S, Ds) orelse member(D, Ss) of
@@ -389,6 +410,14 @@ is_killed_or_used(R, {set,Ss,Ds,_}) ->
 %%  that FromRegister is still used and that the optimization is not
 %%  possible.
 
+eliminate_use_of_from_reg(Is, From, To) ->
+    try
+        eliminate_use_of_from_reg(Is, From, To, [])
+    catch
+        throw:not_possible ->
+            no
+    end.
+
 eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
     if
 	X < Live ->
@@ -397,21 +426,32 @@ eliminate_use_of_from_reg([{set,_,_,{alloc,Live,_}}|_]=Is0, {x,X}, _, Acc) ->
 	    {yes,reverse(Acc, Is0)}
     end;
 eliminate_use_of_from_reg([{set,Ds,Ss0,Op}=I0|Is], From, To, Acc) ->
+    ensure_safe_tuple(I0, To),
     I = case member(From, Ss0) of
-	    true ->
-		Ss = [case S of
-			  From -> To;
-			  _ -> S
-		      end || S <- Ss0],
-		{set,Ds,Ss,Op};
-	    false ->
-		I0
-	end,
+            true ->
+                Ss = [case S of
+                          From -> To;
+                          _ -> S
+                      end || S <- Ss0],
+                {set,Ds,Ss,Op};
+            false ->
+                I0
+        end,
     case member(From, Ds) of
-	true ->
-	    {yes,reverse(Acc, [I|Is])};
-	false ->
-	    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
+        true ->
+            {yes,reverse(Acc, [I|Is])};
+        false ->
+            case member(To, Ds) of
+                true ->
+                    case beam_utils:is_killed_block(From, Is) of
+                        true ->
+                            {yes,reverse(Acc, [I|Is])};
+                        false ->
+                            no
+                    end;
+                false ->
+                    eliminate_use_of_from_reg(Is, From, To, [I|Acc])
+            end
     end;
 eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
     case beam_utils:is_killed_block(From, [I]) of
@@ -421,6 +461,10 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
 	    no
     end.
 
+ensure_safe_tuple({set,[To],[],{put_tuple,_}}, To) ->
+    throw(not_possible);
+ensure_safe_tuple(_, _) -> ok.
+
 %% opt_allocs(Instructions) -> Instructions.  Optimize allocate
 %%  instructions inside blocks. If safe, replace an allocate_zero
 %%  instruction with the slightly cheaper allocate instruction.
@@ -541,3 +585,109 @@ defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
     x_live(Ds, Regs bor ((1 bsl Live) - 1));
 defined_regs([{set,Ds,_,_}|Is], Regs) ->
     defined_regs(Is, x_live(Ds, Regs)).
+
+%%%
+%%% Do local common sub expression elimination (CSE) in each block.
+%%%
+
+local_cse([{block,Bl0}|Is]) ->
+    Bl = cse_block(Bl0, orddict:new(), []),
+    [{block,Bl}|local_cse(Is)];
+local_cse([I|Is]) ->
+    [I|local_cse(Is)];
+local_cse([]) -> [].
+
+cse_block([I|Is], Es0, Acc0) ->
+    Es1 = cse_clear(I, Es0),
+    case cse_expr(I) of
+        none ->
+            %% Instruction is not suitable for CSE.
+            cse_block(Is, Es1, [I|Acc0]);
+        {ok,D,Expr} ->
+            %% Suitable instruction. First update the dictionary of
+            %% suitable expressions for the next iteration.
+            Es = cse_add(D, Expr, Es1),
+
+            %% Search for a previous identical expression.
+            case cse_find(Expr, Es0) of
+                error ->
+                    %% Nothing found
+                    cse_block(Is, Es, [I|Acc0]);
+                Src ->
+                    %% Use the previously calculated result.
+                    %% Also eliminate any line instruction.
+                    Move = {set,[D],[Src],move},
+                    case Acc0 of
+                        [{set,_,_,{line,_}}|Acc] ->
+                            cse_block(Is, Es, [Move|Acc]);
+                        [_|_] ->
+                            cse_block(Is, Es, [Move|Acc0])
+                    end
+            end
+    end;
+cse_block([], _, Acc) ->
+    reverse(Acc).
+
+%% cse_find(Expr, Expressions) -> error | Register.
+%%  Find a previously evaluated expression whose result can be reused,
+%%  or return 'error' if no such expression is found.
+
+cse_find(Expr, Es) ->
+    case orddict:find(Expr, Es) of
+        {ok,{Src,_}} -> Src;
+        error -> error
+    end.
+
+cse_expr({set,[D],Ss,{bif,N,_}}) ->
+    case D of
+        {fr,_} ->
+            %% There are too many things that can go wrong.
+            none;
+        _ ->
+            {ok,D,{{bif,N},Ss}}
+    end;
+cse_expr({set,[D],Ss,{alloc,_,{gc_bif,N,_}}}) ->
+    {ok,D,{{gc_bif,N},Ss}};
+cse_expr({set,[D],Ss,put_list}) ->
+    {ok,D,{put_list,Ss}};
+cse_expr(_) -> none.
+
+%% cse_clear(Instr, Expressions0) -> Expressions.
+%%  Remove all previous expressions that will become
+%%  invalid when this instruction is executed. Basically,
+%%  an expression is no longer safe to reuse when the
+%%  register it has been stored to has been modified, killed,
+%%  or if any of the source operands have changed.
+
+cse_clear({set,Ds,_,{alloc,Live,_}}, Es) ->
+    cse_clear_1(Es, Live, Ds);
+cse_clear({set,Ds,_,_}, Es) ->
+    cse_clear_1(Es, all, Ds).
+
+cse_clear_1(Es, Live, Ds0) ->
+    Ds = ordsets:from_list(Ds0),
+    [E || E <- Es, cse_is_safe(E, Live, Ds)].
+
+cse_is_safe({_,{Dst,Interfering}}, Live, Ds) ->
+    ordsets:is_disjoint(Interfering, Ds) andalso
+        case Dst of
+            {x,X} ->
+                X < Live;
+            _ ->
+                true
+        end.
+
+%% cse_add(Dest, Expr, Expressions0) -> Expressions.
+%%  Provided that it is safe, add a new expression to the dictionary
+%%  of already evaluated expressions.
+
+cse_add(D, {_,Ss}=Expr, Es) ->
+    case member(D, Ss) of
+        false ->
+            Interfering = ordsets:from_list([D|Ss]),
+            orddict:store(Expr, {D,Interfering}, Es);
+        true ->
+            %% Unsafe because the instruction overwrites one of
+            %% source operands.
+            Es
+    end.
diff --git a/lib/compiler/src/beam_clean.erl b/lib/compiler/src/beam_clean.erl
index e094c2c320..7ddf9fa2e2 100644
--- a/lib/compiler/src/beam_clean.erl
+++ b/lib/compiler/src/beam_clean.erl
@@ -24,7 +24,7 @@
 -export([module/2]).
 -export([bs_clean_saves/1]).
 -export([clean_labels/1]).
--import(lists, [foldl/3,reverse/1,filter/2]).
+-import(lists, [foldl/3,reverse/1]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
@@ -303,8 +303,21 @@ maybe_remove_lines(Fs, Opts) ->
     end.
 
 remove_lines([{function,N,A,Lbl,Is0}|T]) ->
-    Is = filter(fun({line,_}) -> false;
-		   (_)  -> true
-		end, Is0),
+    Is = remove_lines_fun(Is0),
     [{function,N,A,Lbl,Is}|remove_lines(T)];
 remove_lines([]) -> [].
+
+remove_lines_fun([{line,_}|Is]) ->
+    remove_lines_fun(Is);
+remove_lines_fun([{block,Bl0}|Is]) ->
+    Bl = remove_lines_block(Bl0),
+    [{block,Bl}|remove_lines_fun(Is)];
+remove_lines_fun([I|Is]) ->
+    [I|remove_lines_fun(Is)];
+remove_lines_fun([]) -> [].
+
+remove_lines_block([{set,_,_,{line,_}}|Is]) ->
+    remove_lines_block(Is);
+remove_lines_block([I|Is]) ->
+    [I|remove_lines_block(Is)];
+remove_lines_block([]) -> [].
diff --git a/lib/compiler/src/beam_dead.erl b/lib/compiler/src/beam_dead.erl
index da944f3ce6..dbbaae05eb 100644
--- a/lib/compiler/src/beam_dead.erl
+++ b/lib/compiler/src/beam_dead.erl
@@ -294,24 +294,25 @@ backward([{jump,{f,To}}=J|[{gc_bif,_,{f,To},_,_,_Dst}|Is]], D, Acc) ->
     %% register is initialized, and it is therefore no need to test
     %% for liveness of the destination register at label To.
     backward([J|Is], D, Acc);
-backward([{test,bs_start_match2,F,Live,[R,_]=Args,Ctxt}|Is], D,
-	 [{test,bs_match_string,F,[Ctxt,Bs]},
-	  {test,bs_test_tail2,F,[Ctxt,0]}|Acc0]=Acc) ->
+backward([{test,bs_start_match2,F,Live,[Src,_]=Args,Ctxt}|Is], D, Acc0) ->
     {f,To0} = F,
-    case beam_utils:is_killed(Ctxt, Acc0, D) of
-	true ->
-	    To = shortcut_bs_context_to_binary(To0, R, D),
-	    Eq = {test,is_eq_exact,{f,To},[R,{literal,Bs}]},
-	    backward(Is, D, [Eq|Acc0]);
-	false ->
-	    To = shortcut_bs_start_match(To0, R, D),
-	    I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
-	    backward(Is, D, [I|Acc])
+    case test_bs_literal(F, Ctxt, D, Acc0) of
+        {none,Acc} ->
+            %% Ctxt killed immediately after bs_start_match2.
+            To = shortcut_bs_context_to_binary(To0, Src, D),
+            I = {test,is_bitstr,{f,To},[Src]},
+            backward(Is, D, [I|Acc]);
+        {Literal,Acc} ->
+            %% Ctxt killed after matching a literal.
+            To = shortcut_bs_context_to_binary(To0, Src, D),
+            Eq = {test,is_eq_exact,{f,To},[Src,{literal,Literal}]},
+            backward(Is, D, [Eq|Acc]);
+        not_killed ->
+            %% Ctxt not killed. Not much to do.
+            To = shortcut_bs_start_match(To0, Src, D),
+            I = {test,bs_start_match2,{f,To},Live,Args,Ctxt},
+            backward(Is, D, [I|Acc0])
     end;
-backward([{test,bs_start_match2,{f,To0},Live,[Src|_]=Info,Dst}|Is], D, Acc) ->
-    To = shortcut_bs_start_match(To0, Src, D),
-    I = {test,bs_start_match2,{f,To},Live,Info,Dst},
-    backward(Is, D, [I|Acc]);
 backward([{test,Op,{f,To0},Ops0}|Is], D, Acc) ->
     To1 = shortcut_bs_test(To0, Is, D),
     To2 = shortcut_label(To1, D),
@@ -511,6 +512,22 @@ remove_from_list(Lit, [Val,{f,_}=Fail|T]) ->
     [Val,Fail|remove_from_list(Lit, T)];
 remove_from_list(_, []) -> [].
 
+
+test_bs_literal(F, Ctxt, D,
+                [{test,bs_match_string,F,[Ctxt,Bs]},
+                 {test,bs_test_tail2,F,[Ctxt,0]}|Acc]) ->
+    test_bs_literal_1(Ctxt, Acc, D, Bs);
+test_bs_literal(F, Ctxt, D, [{test,bs_test_tail2,F,[Ctxt,0]}|Acc]) ->
+    test_bs_literal_1(Ctxt, Acc, D, <<>>);
+test_bs_literal(_, Ctxt, D, Acc) ->
+    test_bs_literal_1(Ctxt, Acc, D, none).
+
+test_bs_literal_1(Ctxt, Is, D, Literal) ->
+    case beam_utils:is_killed(Ctxt, Is, D) of
+        true -> {Literal,Is};
+        false -> not_killed
+    end.
+
 %% shortcut_bs_test(TargetLabel, ReversedInstructions, D) -> TargetLabel'
 %%  Try to shortcut the failure label for bit syntax matching.
 
diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl
index 50b76d7f29..a68c4b5367 100644
--- a/lib/compiler/src/beam_disasm.erl
+++ b/lib/compiler/src/beam_disasm.erl
@@ -1090,6 +1090,10 @@ resolve_inst({build_stacktrace,[]},_,_,_) ->
     build_stacktrace;
 resolve_inst({raw_raise,[]},_,_,_) ->
     raw_raise;
+resolve_inst({get_hd,[Src,Dst]},_,_,_) ->
+    {get_hd,Src,Dst};
+resolve_inst({get_tl,[Src,Dst]},_,_,_) ->
+    {get_tl,Src,Dst};
 
 %%
 %% Catches instructions that are not yet handled.
diff --git a/lib/compiler/src/beam_flatten.erl b/lib/compiler/src/beam_flatten.erl
index a4d45a4ca6..c60211f516 100644
--- a/lib/compiler/src/beam_flatten.erl
+++ b/lib/compiler/src/beam_flatten.erl
@@ -50,6 +50,9 @@ norm_block([{set,[],[],{alloc,R,Alloc}}|Is], Acc0) ->
 	Acc ->
 	    norm_block(Is, Acc)
     end;
+norm_block([{set,[D1],[S],get_hd},{set,[D2],[S],get_tl}|Is], Acc) ->
+    I = {get_list,S,D1,D2},
+    norm_block(Is, [I|Acc]);
 norm_block([I|Is], Acc) -> norm_block(Is, [norm(I)|Acc]);
 norm_block([], Acc) -> Acc.
     
@@ -64,12 +67,14 @@ norm({set,[D],[],{put_tuple,A}})  -> {put_tuple,A,D};
 norm({set,[],[S],put})            -> {put,S};
 norm({set,[D],[S],{get_tuple_element,I}}) -> {get_tuple_element,S,I,D};
 norm({set,[],[S,D],{set_tuple_element,I}}) -> {set_tuple_element,S,D,I};
-norm({set,[D1,D2],[S],get_list})          -> {get_list,S,D1,D2};
+norm({set,[D],[S],get_hd})        -> {get_hd,S,D};
+norm({set,[D],[S],get_tl})        -> {get_tl,S,D};
 norm({set,[D],[S|Puts],{alloc,R,{put_map,Op,F}}}) ->
     {put_map,F,Op,S,D,R,{list,Puts}};
 norm({set,[],[],remove_message})   -> remove_message;
 norm({set,[],[],fclearerror}) -> fclearerror;
-norm({set,[],[],fcheckerror}) -> {fcheckerror,{f,0}}.
+norm({set,[],[],fcheckerror}) -> {fcheckerror,{f,0}};
+norm({set,[],[],{line,_}=Line}) -> Line.
 
 norm_allocate({_Zero,nostack,Nh,[]}, Regs) ->
     [{test_heap,Nh,Regs}];
diff --git a/lib/compiler/src/beam_split.erl b/lib/compiler/src/beam_split.erl
index d041f18806..52dd89b5bb 100644
--- a/lib/compiler/src/beam_split.erl
+++ b/lib/compiler/src/beam_split.erl
@@ -50,8 +50,9 @@ split_block([{set,[R],[_,_,_]=As,{bif,is_record,{f,Lbl}}}|Is], Bl, Acc) ->
     split_block(Is, [], [{bif,is_record,{f,Lbl},As,R}|make_block(Bl, Acc)]);
 split_block([{set,[R],As,{bif,N,{f,Lbl}=Fail}}|Is], Bl, Acc) when Lbl =/= 0 ->
     split_block(Is, [], [{bif,N,Fail,As,R}|make_block(Bl, Acc)]);
-split_block([{set,[R],As,{bif,raise,{f,_}=Fail}}|Is], Bl, Acc) ->
-    split_block(Is, [], [{bif,raise,Fail,As,R}|make_block(Bl, Acc)]);
+split_block([{set,[],[],{line,_}=Line},
+             {set,[R],As,{bif,raise,{f,_}=Fail}}|Is], Bl, Acc) ->
+    split_block(Is, [], [{bif,raise,Fail,As,R},Line|make_block(Bl, Acc)]);
 split_block([{set,[R],As,{alloc,Live,{gc_bif,N,{f,Lbl}=Fail}}}|Is], Bl, Acc)
   when Lbl =/= 0 ->
     split_block(Is, [], [{gc_bif,N,Fail,Live,As,R}|make_block(Bl, Acc)]);
@@ -61,8 +62,6 @@ split_block([{set,[D],[S|Puts],{alloc,R,{put_map,Op,{f,Lbl}=Fail}}}|Is],
 			 make_block(Bl, Acc)]);
 split_block([{set,[R],[],{try_catch,Op,L}}|Is], Bl, Acc) ->
     split_block(Is, [], [{Op,R,L}|make_block(Bl, Acc)]);
-split_block([{set,[],[],{line,_}=Line}|Is], Bl, Acc) ->
-    split_block(Is, [], [Line|make_block(Bl, Acc)]);
 split_block([I|Is], Bl, Acc) ->
     split_block(Is, [I|Bl], Acc);
 split_block([], Bl, Acc) -> make_block(Bl, Acc).
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index 3b6bf49961..28f36db399 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -17,14 +17,15 @@
 %%
 %% %CopyrightEnd%
 %%
-%% Purpose : Type-based optimisations.
+%% Purpose: Type-based optimisations. See the comment for verified_type/1
+%% the very end of this file for a description of the types in the
+%% type database.
 
 -module(beam_type).
 
 -export([module/2]).
 
--import(lists, [filter/2,foldl/3,keyfind/3,member/2,
-		reverse/1,reverse/2,sort/1]).
+-import(lists, [foldl/3,member/2,reverse/1,reverse/2,sort/1]).
 
 -define(UNICODE_INT, {integer,{0,16#10FFFF}}).
 
@@ -80,96 +81,81 @@ simplify(Is0, TypeDb0) ->
 %%  Basic simplification, mostly tuples, no floating point optimizations.
 
 simplify_basic(Is, Ts) ->
-    simplify_basic_1(Is, Ts, []).
-    
-simplify_basic_1([{set,[D],[{integer,Index},Reg],{bif,element,_}}=I0|Is], Ts0, Acc) ->
-    I = case max_tuple_size(Reg, Ts0) of
-	    Sz when 0 < Index, Index =< Sz ->
-		{set,[D],[Reg],{get_tuple_element,Index-1}};
-	    _Other -> I0
-    end,
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is, Ts, [I|Acc]);
-simplify_basic_1([{set,[D],[TupleReg],{get_tuple_element,0}}=I|Is0], Ts0, Acc) ->
-    case tdb_find(TupleReg, Ts0) of
-	{tuple,_,_,[Contents]} ->
-	    simplify_basic_1([{set,[D],[Contents],move}|Is0], Ts0, Acc);
-	_ ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is0, Ts, [I|Acc])
+    simplify_basic(Is, Ts, []).
+
+simplify_basic([I0|Is], Ts0, Acc) ->
+    case simplify_instr(I0, Ts0) of
+        [] ->
+            simplify_basic(Is, Ts0, Acc);
+        [I] ->
+            Ts = update(I, Ts0),
+            simplify_basic(Is, Ts, [I|Acc])
+    end;
+simplify_basic([], Ts, Acc) ->
+    {reverse(Acc),Ts}.
+
+%% simplify_instr(Instruction, Ts) -> [Instruction].
+
+%%  Simplify a simple instruction using type information. Return an
+%%  empty list if the instruction should be removed, or a list with
+%%  the original or modified instruction.
+
+simplify_instr({set,[D],[{integer,Index},Reg],{bif,element,_}}=I, Ts) ->
+    case max_tuple_size(Reg, Ts) of
+        Sz when 0 < Index, Index =< Sz ->
+            [{set,[D],[Reg],{get_tuple_element,Index-1}}];
+        _ -> [I]
     end;
-simplify_basic_1([{set,_,_,{try_catch,_,_}}=I|Is], _Ts, Acc) ->
-    simplify_basic_1(Is, tdb_new(), [I|Acc]);
-simplify_basic_1([{test,is_atom,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,Test,Fail,[R]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	boolean -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        any ->
+            [I];
+        Type ->
+            case will_succeed(Test, Type) of
+                yes -> [];
+                no -> [{jump,Fail}];
+                maybe -> [I]
+            end
+    end;
+simplify_instr({set,[D],[TupleReg],{get_tuple_element,0}}=I, Ts) ->
+    case tdb_find(TupleReg, Ts) of
+        {tuple,_,_,[Contents]} ->
+            [{set,[D],[Contents],move}];
+        _ ->
+            [I]
     end;
-simplify_basic_1([{test,is_integer,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,test_arity,_,[R,Arity]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	integer -> simplify_basic_1(Is, Ts, Acc);
-	{integer,_} -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        {tuple,exact_size,Arity,_} -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_tuple,_,[R]}=I|Is], Ts, Acc) ->
+simplify_instr({test,is_eq_exact,Fail,[R,{atom,A}=Atom]}=I, Ts) ->
     case tdb_find(R, Ts) of
-	{tuple,_,_,_} -> simplify_basic_1(Is, Ts, Acc);
-	_ -> simplify_basic_1(Is, Ts, [I|Acc])
+        {atom,_}=Atom -> [];
+        boolean when is_boolean(A) -> [I];
+        any -> [I];
+        _ -> [{jump,Fail}]
     end;
-simplify_basic_1([{test,test_arity,_,[R,Arity]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	{tuple,exact_size,Arity,_} ->
-	    simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
+simplify_instr({test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I, Ts) ->
+    case tdb_find(R, Ts) of
+        {tuple,exact_size,Arity,[Tag]} -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_map,_,[R]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	map -> simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
+simplify_instr({select,select_val,Reg,_,_}=I, Ts) ->
+    [case tdb_find(Reg, Ts) of
+         {integer,Range} ->
+             simplify_select_val_int(I, Range);
+         boolean ->
+             simplify_select_val_bool(I);
+         _ ->
+             I
+     end];
+simplify_instr({test,bs_test_unit,_,[Src,Unit]}=I, Ts) ->
+    case tdb_find(Src, Ts) of
+        {binary,U} when U rem Unit =:= 0 -> [];
+        _ -> [I]
     end;
-simplify_basic_1([{test,is_nonempty_list,_,[R]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	nonempty_list -> simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
-    end;
-simplify_basic_1([{test,is_eq_exact,Fail,[R,{atom,_}=Atom]}=I|Is0], Ts0, Acc0) ->
-    Acc = case tdb_find(R, Ts0) of
-	      {atom,_}=Atom -> Acc0;
-	      {atom,_} -> [{jump,Fail}|Acc0];
-	      _ -> [I|Acc0]
-	  end,
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is0, Ts, Acc);
-simplify_basic_1([{test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I|Is], Ts0, Acc) ->
-    case tdb_find(R, Ts0) of
-	{tuple,exact_size,Arity,[Tag]} ->
-	    simplify_basic_1(Is, Ts0, Acc);
-	_Other ->
-	    Ts = update(I, Ts0),
-	    simplify_basic_1(Is, Ts, [I|Acc])
-    end;
-simplify_basic_1([{select,select_val,Reg,_,_}=I0|Is], Ts, Acc) ->
-    I = case tdb_find(Reg, Ts) of
-	    {integer,Range} ->
-		simplify_select_val_int(I0, Range);
-	    boolean ->
-		simplify_select_val_bool(I0);
-	    _ ->
-		I0
-	end,
-    simplify_basic_1(Is, tdb_new(), [I|Acc]);
-simplify_basic_1([I|Is], Ts0, Acc) ->
-    Ts = update(I, Ts0),
-    simplify_basic_1(Is, Ts, [I|Acc]);
-simplify_basic_1([], Ts, Acc) ->
-    Is = reverse(Acc),
-    {Is,Ts}.
+simplify_instr(I, _) -> [I].
 
 simplify_select_val_int({select,select_val,R,_,L0}=I, {Min,Max}) ->
     Vs = sort([V || {integer,V} <- L0]),
@@ -197,6 +183,53 @@ eq_ranges([H], H, H) -> true;
 eq_ranges([H|T], H, Max) -> eq_ranges(T, H+1, Max);
 eq_ranges(_, _, _) -> false.
 
+%% will_succeed(TestOperation, Type) -> yes|no|maybe.
+%%  Test whether TestOperation applied to an argument of type Type
+%%  will succeed.  Return yes, no, or maybe.
+%%
+%%  Type is a type as described in the comment for verified_type/1 at
+%%  the very end of this file, but it will *never* be 'any'.
+
+will_succeed(is_atom, Type) ->
+    case Type of
+        {atom,_} -> yes;
+        boolean -> yes;
+        _ -> no
+    end;
+will_succeed(is_binary, Type) ->
+    case Type of
+        {binary,U} when U rem 8 =:= 0 -> yes;
+        {binary,_} -> maybe;
+        _ -> no
+    end;
+will_succeed(is_bitstr, Type) ->
+    case Type of
+        {binary,_} -> yes;
+        _ -> no
+    end;
+will_succeed(is_integer, Type) ->
+    case Type of
+        integer -> yes;
+        {integer,_} -> yes;
+        _ -> no
+    end;
+will_succeed(is_map, Type) ->
+    case Type of
+        map -> yes;
+        _ -> no
+    end;
+will_succeed(is_nonempty_list, Type) ->
+    case Type of
+        nonempty_list -> yes;
+        _ -> no
+    end;
+will_succeed(is_tuple, Type) ->
+    case Type of
+        {tuple,_,_,_} -> yes;
+        _ -> no
+    end;
+will_succeed(_, _) -> maybe.
+
 %% simplify_float([Instruction], TypeDatabase) ->
 %%                 {[Instruction],TypeDatabase'} | not_possible
 %%  Simplify floating point operations in blocks.
@@ -226,7 +259,7 @@ simplify_float_1([{set,[D0],[A0],{alloc,_,{gc_bif,'-',{f,0}}}}=I|Is]=Is0,
 	    {D,Rs} = find_dest(D0, Rs1),
 	    Areg = fetch_reg(A, Rs),
 	    Acc = [{set,[D],[Areg],{bif,fnegate,{f,0}}}|clearerror(Acc1)],
-	    Ts = tdb_update([{D0,float}], Ts0),
+	    Ts = tdb_store(D0, float, Ts0),
 	    simplify_float_1(Is, Ts, Rs, Acc);
 	_Other ->
 	    Ts = update(I, Ts0),
@@ -249,7 +282,7 @@ simplify_float_1([{set,[D0],[A0,B0],{alloc,_,{gc_bif,Op0,{f,0}}}}=I|Is]=Is0,
 	    Areg = fetch_reg(A, Rs),
 	    Breg = fetch_reg(B, Rs),
 	    Acc = [{set,[D],[Areg,Breg],{bif,Op,{f,0}}}|clearerror(Acc2)],
-	    Ts = tdb_update([{D0,float}], Ts0),
+	    Ts = tdb_store(D0, float, Ts0),
 	    simplify_float_1(Is, Ts, Rs, Acc)
     end;
 simplify_float_1([{set,_,_,{try_catch,_,_}}=I|Is]=Is0, _Ts, Rs0, Acc0) ->
@@ -422,100 +455,100 @@ update({'%anno',_}, Ts) ->
     Ts;
 update({set,[D],[S],move}, Ts) ->
     tdb_copy(S, D, Ts);
-update({set,[D],[{integer,I},Reg],{bif,element,_}}, Ts0) ->
-    tdb_update([{Reg,{tuple,min_size,I,[]}},{D,kill}], Ts0);
-update({set,[D],[_Index,Reg],{bif,element,_}}, Ts0) ->
-    tdb_update([{Reg,{tuple,min_size,0,[]}},{D,kill}], Ts0);
-update({set,[D],Args,{bif,N,_}}, Ts0) ->
+update({set,[D],[Index,Reg],{bif,element,_}}, Ts0) ->
+    MinSize = case Index of
+                  {integer,I} -> I;
+                  _ -> 0
+              end,
+    Ts = tdb_meet(Reg, {tuple,min_size,MinSize,[]}, Ts0),
+    tdb_store(D, any, Ts);
+update({set,[D],Args,{bif,N,_}}, Ts) ->
     Ar = length(Args),
     BoolOp = erl_internal:new_type_test(N, Ar) orelse
 	erl_internal:comp_op(N, Ar) orelse
 	erl_internal:bool_op(N, Ar),
-    case BoolOp of
-	true ->
-	    tdb_update([{D,boolean}], Ts0);
-	false ->
-	    tdb_update([{D,kill}], Ts0)
+    Type = case BoolOp of
+               true -> boolean;
+               false -> unary_op_type(N)
+           end,
+    tdb_store(D, Type, Ts);
+update({set,[D],[S],{get_tuple_element,0}}, Ts0) ->
+    if
+        D =:= S ->
+            tdb_store(D, any, Ts0);
+        true ->
+            Ts = tdb_store(D, {tuple_element,S,0}, Ts0),
+            tdb_store(S, {tuple,min_size,1,[]}, Ts)
     end;
-update({set,[D],[S],{get_tuple_element,0}}, Ts) ->
-    tdb_update([{D,{tuple_element,S,0}}], Ts);
 update({set,[D],[S],{alloc,_,{gc_bif,float,{f,0}}}}, Ts0) ->
     %% Make sure we reject non-numeric literal argument.
     case possibly_numeric(S) of
-	true ->  tdb_update([{D,float}], Ts0);
-	false -> Ts0
+        true ->  tdb_store(D, float, Ts0);
+        false -> Ts0
     end;
 update({set,[D],[S1,S2],{alloc,_,{gc_bif,'band',{f,0}}}}, Ts) ->
-    case keyfind(integer, 1, [S1,S2]) of
-	{integer,N} ->
-	    update_band(N, D, Ts);
-	false ->
-	    tdb_update([{D,integer}], Ts)
-    end;
-update({set,[D],[S1,S2],{alloc,_,{gc_bif,'/',{f,0}}}}, Ts0) ->
+    Type = band_type(S1, S2, Ts),
+    tdb_store(D, Type, Ts);
+update({set,[D],[S1,S2],{alloc,_,{gc_bif,'/',{f,0}}}}, Ts) ->
     %% Make sure we reject non-numeric literals.
     case possibly_numeric(S1) andalso possibly_numeric(S2) of
-	true ->  tdb_update([{D,float}], Ts0);
-	false -> Ts0
+        true -> tdb_store(D, float, Ts);
+        false -> Ts
     end;
 update({set,[D],[S1,S2],{alloc,_,{gc_bif,Op,{f,0}}}}, Ts0) ->
     case op_type(Op) of
 	integer ->
-	    tdb_update([{D,integer}], Ts0);
-	{float,_} ->
-	    case {tdb_find(S1, Ts0),tdb_find(S2, Ts0)} of
-		{float,_} -> tdb_update([{D,float}], Ts0);
-		{_,float} -> tdb_update([{D,float}], Ts0);
-		{_,_} -> tdb_update([{D,kill}], Ts0)
-	    end;
-	unknown ->
-	    tdb_update([{D,kill}], Ts0)
-    end;
-update({set,[],_Src,_Op}, Ts0) -> Ts0;
-update({set,[D],_Src,_Op}, Ts0) ->
-    tdb_update([{D,kill}], Ts0);
-update({set,[D1,D2],_Src,_Op}, Ts0) ->
-    tdb_update([{D1,kill},{D2,kill}], Ts0);
+	    tdb_store(D, integer, Ts0);
+        {float,_} ->
+            case {tdb_find(S1, Ts0),tdb_find(S2, Ts0)} of
+                {float,_} -> tdb_store(D, float, Ts0);
+                {_,float} -> tdb_store(D, float, Ts0);
+                {_,_} -> tdb_store(D, any, Ts0)
+            end;
+        Type ->
+            tdb_store(D, Type, Ts0)
+    end;
+update({set,[D],[_],{alloc,_,{gc_bif,Op,{f,0}}}}, Ts) ->
+    tdb_store(D, unary_op_type(Op), Ts);
+update({set,[],_Src,_Op}, Ts) ->
+    Ts;
+update({set,[D],_Src,_Op}, Ts) ->
+    tdb_store(D, any, Ts);
 update({kill,D}, Ts) ->
-    tdb_update([{D,kill}], Ts);
+    tdb_store(D, any, Ts);
 
 %% Instructions outside of blocks.
-update({test,is_float,_Fail,[Src]}, Ts0) ->
-    tdb_update([{Src,float}], Ts0);
-update({test,test_arity,_Fail,[Src,Arity]}, Ts0) ->
-    tdb_update([{Src,{tuple,exact_size,Arity,[]}}], Ts0);
-update({test,is_map,_Fail,[Src]}, Ts0) ->
-    tdb_update([{Src,map}], Ts0);
+update({test,test_arity,_Fail,[Src,Arity]}, Ts) ->
+    tdb_meet(Src, {tuple,exact_size,Arity,[]}, Ts);
 update({get_map_elements,_,Src,{list,Elems0}}, Ts0) ->
+    Ts1 = tdb_meet(Src, map, Ts0),
     {_Ss,Ds} = beam_utils:split_even(Elems0),
-    Elems = [{Dst,kill} || Dst <- Ds],
-    tdb_update([{Src,map}|Elems], Ts0);
-update({test,is_nonempty_list,_Fail,[Src]}, Ts0) ->
-    tdb_update([{Src,nonempty_list}], Ts0);
-update({test,is_eq_exact,_,[Reg,{atom,_}=Atom]}, Ts) ->
-    case tdb_find(Reg, Ts) of
-	error ->
-	    Ts;
-	{tuple_element,TupleReg,0} ->
-	    tdb_update([{TupleReg,{tuple,min_size,1,[Atom]}}], Ts);
-	_ ->
-	    Ts
-    end;
+    foldl(fun(Dst, A) -> tdb_store(Dst, any, A) end, Ts1, Ds);
+update({test,is_eq_exact,_,[Reg,{atom,_}=Atom]}, Ts0) ->
+    Ts = case tdb_find_source_tuple(Reg, Ts0) of
+             {source_tuple,TupleReg} ->
+                 tdb_meet(TupleReg, {tuple,min_size,1,[Atom]}, Ts0);
+             none ->
+                 Ts0
+         end,
+    tdb_meet(Reg, Atom, Ts);
 update({test,is_record,_Fail,[Src,Tag,{integer,Arity}]}, Ts) ->
-    tdb_update([{Src,{tuple,exact_size,Arity,[Tag]}}], Ts);
+    tdb_meet(Src, {tuple,exact_size,Arity,[Tag]}, Ts);
 
-%% Binary matching
+%% Binaries and binary matching.
 
 update({test,bs_get_integer2,_,_,Args,Dst}, Ts) ->
-    tdb_update([{Dst,get_bs_integer_type(Args)}], Ts);
+    tdb_store(Dst, get_bs_integer_type(Args), Ts);
 update({test,bs_get_utf8,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,?UNICODE_INT}], Ts);
+    tdb_store(Dst, ?UNICODE_INT, Ts);
 update({test,bs_get_utf16,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,?UNICODE_INT}], Ts);
+    tdb_store(Dst, ?UNICODE_INT, Ts);
 update({test,bs_get_utf32,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,?UNICODE_INT}], Ts);
+    tdb_store(Dst, ?UNICODE_INT, Ts);
+update({bs_init,_,{bs_init2,_,_},_,_,Dst}, Ts) ->
+    tdb_store(Dst, {binary,8}, Ts);
 update({bs_init,_,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
+    tdb_store(Dst, {binary,1}, Ts);
 update({bs_put,_,_,_}, Ts) ->
     Ts;
 update({bs_save2,_,_}, Ts) ->
@@ -523,14 +556,31 @@ update({bs_save2,_,_}, Ts) ->
 update({bs_restore2,_,_}, Ts) ->
     Ts;
 update({bs_context_to_binary,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
-update({test,bs_start_match2,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
-update({test,bs_get_binary2,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,kill}], Ts);
+    tdb_store(Dst, {binary,1}, Ts);
+update({test,bs_start_match2,_,_,[Src,_],Dst}, Ts0) ->
+    Ts = tdb_meet(Src, {binary,1}, Ts0),
+    tdb_copy(Src, Dst, Ts);
+update({test,bs_get_binary2,_,_,[_,_,Unit,_],Dst}, Ts) ->
+    true = is_integer(Unit),                    %Assertion.
+    tdb_store(Dst, {binary,Unit}, Ts);
 update({test,bs_get_float2,_,_,_,Dst}, Ts) ->
-    tdb_update([{Dst,float}], Ts);
-
+    tdb_store(Dst, float, Ts);
+update({test,bs_test_unit,_,[Src,Unit]}, Ts) ->
+    tdb_meet(Src, {binary,Unit}, Ts);
+
+%% Other test instructions
+update({test,Test,_Fail,[Src]}, Ts) ->
+    Type = case Test of
+               is_binary -> {binary,8};
+               is_bitstr -> {binary,1};
+               is_boolean -> boolean;
+               is_float -> float;
+               is_integer -> integer;
+               is_map -> map;
+               is_nonempty_list -> nonempty_list;
+               _ -> any
+           end,
+    tdb_meet(Src, Type, Ts);
 update({test,_Test,_Fail,_Other}, Ts) ->
     Ts;
 
@@ -538,7 +588,7 @@ update({test,_Test,_Fail,_Other}, Ts) ->
 
 update({call_ext,Ar,{extfunc,math,Math,Ar}}, Ts) ->
     case is_math_bif(Math, Ar) of
-	true -> tdb_update([{{x,0},float}], Ts);
+	true -> tdb_store({x,0}, float, Ts);
 	false -> tdb_kill_xregs(Ts)
     end;
 update({call_ext,3,{extfunc,erlang,setelement,3}}, Ts0) ->
@@ -555,7 +605,7 @@ update({call_ext,3,{extfunc,erlang,setelement,3}}, Ts0) ->
 			%% first element of the tuple.
 			{tuple,SzKind,Sz,[]}
 		end,
-	    tdb_update([{{x,0},T}], Ts);
+            tdb_store({x,0}, T, Ts);
 	_ ->
 	    Ts
     end;
@@ -566,24 +616,32 @@ update({call_fun, _}, Ts) -> tdb_kill_xregs(Ts);
 update({apply, _}, Ts) -> tdb_kill_xregs(Ts);
 
 update({line,_}, Ts) -> Ts;
+update({'%',_}, Ts) -> Ts;
 
 %% The instruction is unknown.  Kill all information.
 update(_I, _Ts) -> tdb_new().
 
-update_band(N, Reg, Ts) ->
-    Type = update_band_1(N, 0),
-    tdb_update([{Reg,Type}], Ts).
+band_type({integer,Int}, Other, Ts) ->
+    band_type_1(Int, Other, Ts);
+band_type(Other, {integer,Int}, Ts) ->
+    band_type_1(Int, Other, Ts);
+band_type(_, _, _) -> integer.
 
-update_band_1(N, Bits) when Bits < 64 ->
+band_type_1(Int, OtherSrc, Ts) ->
+    Type = band_type_2(Int, 0),
+    OtherType = tdb_find(OtherSrc, Ts),
+    meet(Type, OtherType).
+
+band_type_2(N, Bits) when Bits < 64 ->
     case 1 bsl Bits of
 	P when P =:= N + 1 ->
 	    {integer,{0,N}};
 	P when P > N + 1 ->
 	    integer;
 	_ ->
-	    update_band_1(N, Bits+1)
+	    band_type_2(N, Bits+1)
     end;
-update_band_1(_, _) ->
+band_type_2(_, _) ->
     %% Negative or large positive number. Give up.
     integer.
 
@@ -707,7 +765,15 @@ op_type('bxor') -> integer;
 op_type('bsl') -> integer;
 op_type('bsr') -> integer;
 op_type('div') -> integer;
-op_type(_) -> unknown.
+op_type(_) -> any.
+
+unary_op_type(bit_size) -> integer;
+unary_op_type(byte_size) -> integer;
+unary_op_type(length) -> integer;
+unary_op_type(map_size) -> integer;
+unary_op_type(size) -> integer;
+unary_op_type(tuple_size) -> integer;
+unary_op_type(_) -> any.
 
 flush(Rs, [{set,[_],[_,_,_],{bif,is_record,_}}|_]=Is0, Acc0) ->
     Acc = flush_all(Rs, Is0, Acc0),
@@ -790,38 +856,39 @@ checkerror_1([], OrigIs) -> OrigIs.
 checkerror_2(OrigIs) -> [{set,[],[],fcheckerror}|OrigIs].
 
 
-%%% Routines for maintaining a type database.  The type database 
+%%% Routines for maintaining a type database.  The type database
 %%% associates type information with registers.
 %%%
-%%% {tuple,min_size,Size,First} means that the corresponding register contains
-%%% a tuple with *at least* Size elements (conversely, exact_size means that it
-%%% contains a tuple with *exactly* Size elements). An tuple with unknown size
-%%% is represented as {tuple,min_size,0,[]}. First is either [] (meaning that
-%%% the tuple's first element is unknown) or [FirstElement] (the contents of
-%%% the first element).
-%%%
-%%% 'float' means that the register contains a float.
-%%%
-%%% 'integer' or {integer,{Min,Max}} that the register contains an
-%%% integer.
+%%% See the comment for verified_type/1 at the end of module for
+%%% a description of the possible types.
 
 %% tdb_new() -> EmptyDataBase
 %%  Creates a new, empty type database.
 
 tdb_new() -> [].
 
-%% tdb_find(Register, Db) -> Information|error
+%% tdb_find(Register, Db) -> Type
 %%  Returns type information or the atom error if there is no type
 %%  information available for Register.
+%%
+%%  See the comment for verified_type/1 at the end of module for
+%%  a description of the possible types.
 
-tdb_find({x,_}=K, Ts) -> tdb_find_1(K, Ts);
-tdb_find({y,_}=K, Ts) -> tdb_find_1(K, Ts);
-tdb_find(_, _) -> error.
+tdb_find(Reg, Ts) ->
+    case tdb_find_raw(Reg, Ts) of
+        {tuple_element,_,_} -> any;
+        Type -> Type
+    end.
 
-tdb_find_1(K, Ts) ->
-    case orddict:find(K, Ts) of
-	{ok,Val} -> Val;
-	error -> error
+%% tdb_find_source_tuple(Register, Ts) -> {source_tuple,Register} | 'none'.
+%%  Find the tuple whose first element was fetched to the register Register.
+
+tdb_find_source_tuple(Reg, Ts) ->
+    case tdb_find_raw(Reg, Ts) of
+        {tuple_element,Src,0} ->
+            {source_tuple,Src};
+        _ ->
+            none
     end.
 
 %% tdb_copy(Source, Dest, Db) -> Db'
@@ -829,9 +896,9 @@ tdb_find_1(K, Ts) ->
 %%  as the Source.
 
 tdb_copy({Tag,_}=S, D, Ts) when Tag =:= x; Tag =:= y ->
-    case tdb_find(S, Ts) of
-	error -> orddict:erase(D, Ts);
-	Type -> orddict:store(D, Type, Ts)
+    case tdb_find_raw(S, Ts) of
+        any -> orddict:erase(D, Ts);
+        Type -> orddict:store(D, Type, Ts)
     end;
 tdb_copy(Literal, D, Ts) ->
     Type = case Literal of
@@ -843,14 +910,89 @@ tdb_copy(Literal, D, Ts) ->
 	       {literal,Tuple} when tuple_size(Tuple) >= 1 ->
 		   Lit = tag_literal(element(1, Tuple)),
 		   {tuple,exact_size,tuple_size(Tuple),[Lit]};
-	       _ -> term
+	       _ -> any
 	   end,
-    if
-	Type =:= term ->
-	    orddict:erase(D, Ts);
-	true ->
-	    verify_type(Type),
-	    orddict:store(D, Type, Ts)
+    tdb_store(D, verified_type(Type), Ts).
+
+%% tdb_store(Register, Type, Ts0) -> Ts.
+%%  Store a new type for register Register. Return the update type
+%%  database. Use this function when a new value is assigned to
+%%  a register.
+%%
+%%  See the comment for verified_type/1 at the end of module for
+%%  a description of the possible types.
+
+tdb_store(Reg, any, Ts) ->
+    erase(Reg, Ts);
+tdb_store(Reg, Type, Ts) ->
+    store(Reg, verified_type(Type), Ts).
+
+store(Key, New, [{K,_}|_]=Dict) when Key < K ->
+    [{Key,New}|Dict];
+store(Key, New, [{K,Val}=E|Dict]) when Key > K ->
+    case Val of
+        {tuple_element,Key,_} -> store(Key, New, Dict);
+        _ -> [E|store(Key, New, Dict)]
+    end;
+store(Key, New, [{_K,Old}|Dict]) ->		%Key == K
+    case Old of
+        {tuple,_,_,_} ->
+            [{Key,New}|erase_tuple_element(Key, Dict)];
+        _ ->
+            [{Key,New}|Dict]
+    end;
+store(Key, New, []) -> [{Key,New}].
+
+erase(Key, [{K,_}=E|Dict]) when Key < K ->
+    [E|Dict];
+erase(Key, [{K,Val}=E|Dict]) when Key > K ->
+    case Val of
+        {tuple_element,Key,_} -> erase(Key, Dict);
+        _ -> [E|erase(Key, Dict)]
+    end;
+erase(Key, [{_K,Val}|Dict]) ->                 %Key == K
+    case Val of
+        {tuple,_,_,_} -> erase_tuple_element(Key, Dict);
+        _ -> Dict
+    end;
+erase(_, []) -> [].
+
+erase_tuple_element(Key, [{_,{tuple_element,Key,_}}|Dict]) ->
+    erase_tuple_element(Key, Dict);
+erase_tuple_element(Key, [E|Dict]) ->
+    [E|erase_tuple_element(Key, Dict)];
+erase_tuple_element(_Key, []) -> [].
+
+%% tdb_meet(Register, Type, Ts0) -> Ts.
+%%  Update information of a register that is used as the source for an
+%%  instruction. The type Type will be combined using the meet operation
+%%  with the previous type information for the register, resulting in
+%%  narrower (more specific) type.
+%%
+%%  For example, if the previous type is {tuple,min_size,2,[]} and the
+%%  the new type is {tuple,exact_size,5,[]}, the meet of the types will
+%%  be {tuple,exact_size,5,[]}.
+%%
+%%  See the comment for verified_type/1 at the end of module for
+%%  a description of the possible types.
+
+tdb_meet(Reg, NewType, Ts) ->
+    Update = fun(Type0) -> meet(Type0, NewType) end,
+    orddict:update(Reg, Update, NewType, Ts).
+
+%%%
+%%% Here follows internal helper functions for accessing and
+%%% updating the type database.
+%%%
+
+tdb_find_raw({x,_}=K, Ts) -> tdb_find_raw_1(K, Ts);
+tdb_find_raw({y,_}=K, Ts) -> tdb_find_raw_1(K, Ts);
+tdb_find_raw(_, _) -> any.
+
+tdb_find_raw_1(K, Ts) ->
+    case orddict:find(K, Ts) of
+	{ok,Val} -> Val;
+	error -> any
     end.
 
 tag_literal(A) when is_atom(A) -> {atom,A};
@@ -859,45 +1001,6 @@ tag_literal(I) when is_integer(I) -> {integer,I};
 tag_literal([]) -> nil;
 tag_literal(Lit) -> {literal,Lit}.
 
-%% tdb_update([UpdateOp], Db) -> NewDb
-%%        UpdateOp = {Register,kill}|{Register,NewInfo}
-%%  Updates a type database.  If a 'kill' operation is given, the type
-%%  information for that register will be removed from the database.
-%%  A kill operation takes precedence over other operations for the same
-%%  register (i.e. [{{x,0},kill},{{x,0},{tuple,min_size,5,[]}}] means that the
-%%  the existing type information, if any, will be discarded, and the
-%%  the '{tuple,min_size,5,[]}' information ignored.
-%%
-%%  If NewInfo information is given and there exists information about
-%%  the register, the old and new type information will be merged.
-%%  For instance, {tuple,min_size,5,_} and {tuple,min_size,10,_} will be merged
-%%  to produce {tuple,min_size,10,_}.
-
-tdb_update(Uis0, Ts0) ->
-    Uis1 = filter(fun ({{x,_},_Op}) -> true;
-		      ({{y,_},_Op}) -> true;
-		      (_) -> false
-		  end, Uis0),
-    tdb_update1(lists:sort(Uis1), Ts0).
-
-tdb_update1([{Key,kill}|Ops], [{K,_Old}|_]=Db) when Key < K ->
-    tdb_update1(remove_key(Key, Ops), Db);
-tdb_update1([{Key,Type}=New|Ops], [{K,_Old}|_]=Db) when Key < K ->
-    verify_type(Type),
-    [New|tdb_update1(Ops, Db)];
-tdb_update1([{Key,kill}|Ops], [{Key,_}|Db]) ->
-    tdb_update1(remove_key(Key, Ops), Db);
-tdb_update1([{Key,NewInfo}|Ops], [{Key,OldInfo}|Db]) ->
-    [{Key,merge_type_info(NewInfo, OldInfo)}|tdb_update1(Ops, Db)];
-tdb_update1([{_,_}|_]=Ops, [Old|Db]) ->
-    [Old|tdb_update1(Ops, Db)];
-tdb_update1([{Key,kill}|Ops], []) ->
-    tdb_update1(remove_key(Key, Ops), []);
-tdb_update1([{_,Type}=New|Ops], []) ->
-    verify_type(Type),
-    [New|tdb_update1(Ops, [])];
-tdb_update1([], Db) -> Db.
-
 %% tdb_kill_xregs(Db) -> NewDb
 %%  Kill all information about x registers. Also kill all tuple_element
 %%  dependencies from y registers to x registers.
@@ -906,41 +1009,106 @@ tdb_kill_xregs([{{x,_},_Type}|Db]) -> tdb_kill_xregs(Db);
 tdb_kill_xregs([{{y,_},{tuple_element,{x,_},_}}|Db]) -> tdb_kill_xregs(Db);
 tdb_kill_xregs([Any|Db]) -> [Any|tdb_kill_xregs(Db)];
 tdb_kill_xregs([]) -> [].
-    
-remove_key(Key, [{Key,_Op}|Ops]) -> remove_key(Key, Ops);
-remove_key(_, Ops) -> Ops.
 
-merge_type_info(I, I) -> I;
-merge_type_info({tuple,min_size,Sz1,Same}, {tuple,min_size,Sz2,Same}=Max) when Sz1 < Sz2 ->
+%% meet(Type1, Type2) -> Type
+%%  Returns the "meet" of Type1 and Type2. The meet is a narrower
+%%  type than Type1 and Type2. For example:
+%%
+%%     meet(integer, {integer,{0,3}}) -> {integer,{0,3}}
+%%
+%%  The meet for two different types result in 'none', which is
+%%  the bottom element for our type lattice:
+%%
+%%     meet(integer, map) -> none
+
+meet(T, T) ->
+    T;
+meet({integer,_}=T, integer) ->
+    T;
+meet(integer, {integer,_}=T) ->
+    T;
+meet({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) ->
+    {integer,{max(Min1, Min2),min(Max1, Max2)}};
+meet({tuple,min_size,Sz1,Same}, {tuple,min_size,Sz2,Same}=Max) when Sz1 < Sz2 ->
     Max;
-merge_type_info({tuple,min_size,Sz1,Same}=Max, {tuple,min_size,Sz2,Same}) when Sz1 > Sz2 ->
+meet({tuple,min_size,Sz1,Same}=Max, {tuple,min_size,Sz2,Same}) when Sz1 > Sz2 ->
     Max;
-merge_type_info({tuple,exact_size,_,Same}=Exact, {tuple,_,_,Same}) ->
+meet({tuple,exact_size,_,Same}=Exact, {tuple,_,_,Same}) ->
     Exact;
-merge_type_info({tuple,_,_,Same},{tuple,exact_size,_,Same}=Exact) ->
+meet({tuple,_,_,Same},{tuple,exact_size,_,Same}=Exact) ->
     Exact;
-merge_type_info({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) ->
-    merge_type_info({tuple,SzKind1,Sz1,First}, Tuple2);
-merge_type_info({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) ->
-    merge_type_info(Tuple1, {tuple,SzKind2,Sz2,First});
-merge_type_info(integer, {integer,_}=Int) ->
-    Int;
-merge_type_info({integer,_}=Int, integer) ->
-    Int;
-merge_type_info({integer,{Min1,Max1}}, {integer,{Min2,Max2}}) ->
-    {integer,{max(Min1, Min2),min(Max1, Max2)}};
-merge_type_info(NewType, _) ->
-    verify_type(NewType),
-    NewType.
-
-verify_type({atom,_}) -> ok;
-verify_type(boolean) -> ok;
-verify_type(integer) -> ok;
-verify_type({integer,{Min,Max}})
-  when is_integer(Min), is_integer(Max) -> ok;
-verify_type(map) -> ok;
-verify_type(nonempty_list) -> ok;
-verify_type({tuple,_,Sz,[]}) when is_integer(Sz) -> ok;
-verify_type({tuple,_,Sz,[_]}) when is_integer(Sz) -> ok;
-verify_type({tuple_element,_,_}) -> ok;
-verify_type(float) -> ok.
+meet({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) ->
+    meet({tuple,SzKind1,Sz1,First}, Tuple2);
+meet({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) ->
+    meet(Tuple1, {tuple,SzKind2,Sz2,First});
+meet({binary,U1}, {binary,U2}) ->
+    {binary,max(U1, U2)};
+meet(T1, T2) ->
+    case is_any(T1) of
+        true ->
+            verified_type(T2);
+        false ->
+            case is_any(T2) of
+                true ->
+                    verified_type(T1);
+                false ->
+                    none                        %The bottom element.
+            end
+    end.
+
+is_any(any) -> true;
+is_any({tuple_element,_,_}) -> true;
+is_any(_) -> false.
+
+%% verified_type(Type) -> Type
+%%  Returns the passed in type if it is one of the defined types.
+%%  Crashes if there is anything wrong with the type.
+%%
+%%  Here are all possible types:
+%%
+%%  any                  Any Erlang term (top element for the type lattice).
+%%
+%%  {atom,Atom}          The specific atom Atom.
+%%  {binary,Unit}        Binary/bitstring aligned to unit Unit.
+%%  boolean              'true' | 'false'
+%%  float                Floating point number.
+%%  integer              Integer.
+%%  {integer,{Min,Max}}  Integer in the inclusive range Min through Max.
+%%  map                  Map.
+%%  nonempty_list        Nonempty list.
+%%  {tuple,_,_,_}        Tuple (see below).
+%%
+%%  none                 No type (bottom element for the type lattice).
+%%
+%%  {tuple,min_size,Size,First} means that the corresponding register
+%%  contains a tuple with *at least* Size elements (conversely,
+%%  {tuple,exact_size,Size,First} means that it contains a tuple with
+%%  *exactly* Size elements). An tuple with unknown size is
+%%  represented as {tuple,min_size,0,[]}. First is either [] (meaning
+%%  that the tuple's first element is unknown) or [FirstElement] (the
+%%  contents of the first element).
+%%
+%%  There is also a pseudo-type called {tuple_element,_,_}:
+%%
+%%    {tuple_element,SrcTuple,ElementNumber}
+%%
+%%  that does not provide any information about the type of the
+%%  register itself, but provides a link back to the source tuple that
+%%  the register got its value from.
+%%
+%%  Note that {tuple_element,_,_} will *never* be returned by tdb_find/2.
+%%  Use tdb_find_source_tuple/2 to locate the source tuple for a register.
+
+verified_type(any=T) -> T;
+verified_type({atom,_}=T) -> T;
+verified_type({binary,U}=T) when is_integer(U) -> T;
+verified_type(boolean=T) -> T;
+verified_type(integer=T) -> T;
+verified_type({integer,{Min,Max}}=T)
+  when is_integer(Min), is_integer(Max) -> T;
+verified_type(map=T) -> T;
+verified_type(nonempty_list=T) -> T;
+verified_type({tuple,_,Sz,[]}=T) when is_integer(Sz) -> T;
+verified_type({tuple,_,Sz,[_]}=T) when is_integer(Sz) -> T;
+verified_type({tuple_element,_,_}=T) -> T;
+verified_type(float=T) -> T.
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 5333925589..047cd5a569 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -118,7 +118,7 @@ is_killed(R, Is, D) ->
     St = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness(R, Is, St) of
 	{killed,_} -> true;
-	{exit_not_used,_} -> true;
+	{exit_not_used,_} -> false;
 	{_,_} -> false
     end.
 
@@ -131,7 +131,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
     St0 = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness_at(R, Lbl, St0) of
 	{killed,_} -> true;
-	{exit_not_used,_} -> true;
+	{exit_not_used,_} -> false;
 	{_,_} -> false
     end.
 
@@ -148,7 +148,7 @@ is_not_used(R, Is, D) ->
     St = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness(R, Is, St) of
 	{used,_} -> false;
-	{exit_not_used,_} -> false;
+	{exit_not_used,_} -> true;
 	{_,_} -> true
     end.
 
@@ -440,8 +440,11 @@ check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
 	    case member(R, Ss) of
 		true -> {used,St};
 		false ->
+                    %% If the exception is taken, the stack may
+                    %% be scanned. Therefore the register is not
+                    %% guaranteed to be killed.
 		    if
-			R =:= Dst -> {killed,St};
+                        R =:= Dst -> {not_used,St};
 			true -> not_used(check_liveness(R, Is, St))
 		    end
 	    end
@@ -602,8 +605,11 @@ check_liveness(R, [{test_heap,N,Live}|Is], St) ->
 check_liveness(R, [{allocate_zero,N,Live}|Is], St) ->
     I = {block,[{set,[],[],{alloc,Live,{zero,N,0,[]}}}]},
     check_liveness(R, [I|Is], St);
-check_liveness(R, [{get_list,S,D1,D2}|Is], St) ->
-    I = {block,[{set,[D1,D2],[S],get_list}]},
+check_liveness(R, [{get_hd,S,D}|Is], St) ->
+    I = {block,[{set,[D],[S],get_hd}]},
+    check_liveness(R, [I|Is], St);
+check_liveness(R, [{get_tl,S,D}|Is], St) ->
+    I = {block,[{set,[D],[S],get_tl}]},
     check_liveness(R, [I|Is], St);
 check_liveness(R, [remove_message|Is], St) ->
     check_liveness(R, Is, St);
@@ -732,8 +738,8 @@ check_liveness_block_1(R, Ss, Ds, Op, Is, St0) ->
 	    end
     end.
 
-check_liveness_block_2(R, {gc_bif,_Op,{f,Lbl}}, _Ss, St) ->
-    check_liveness_block_3(R, Lbl, St);
+check_liveness_block_2(R, {gc_bif,Op,{f,Lbl}}, Ss, St) ->
+    check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St);
 check_liveness_block_2(R, {bif,Op,{f,Lbl}}, Ss, St) ->
     Arity = length(Ss),
     case erl_internal:comp_op(Op, Arity) orelse
@@ -741,16 +747,23 @@ check_liveness_block_2(R, {bif,Op,{f,Lbl}}, Ss, St) ->
 	true ->
 	    {killed,St};
 	false ->
-	    check_liveness_block_3(R, Lbl, St)
+	    check_liveness_block_3(R, Lbl, {Op,length(Ss)}, St)
     end;
 check_liveness_block_2(R, {put_map,_Op,{f,Lbl}}, _Ss, St) ->
-    check_liveness_block_3(R, Lbl, St);
+    check_liveness_block_3(R, Lbl, {unsafe,0}, St);
 check_liveness_block_2(_, _, _, St) ->
     {killed,St}.
 
-check_liveness_block_3(_, 0, St) ->
+check_liveness_block_3({x,_}, 0, _FA, St) ->
     {killed,St};
-check_liveness_block_3(R, Lbl, St0) ->
+check_liveness_block_3({y,_}, 0, {F,A}, St) ->
+    %% If the exception is thrown, the stack may be scanned,
+    %% thus implicitly using the y register.
+    case erl_bifs:is_safe(erlang, F, A) of
+        true -> {killed,St};
+        false -> {used,St}
+    end;
+check_liveness_block_3(R, Lbl, _FA, St0) ->
     check_liveness_at(R, Lbl, St0).
 
 index_labels_1([{label,Lbl}|Is0], Acc) ->
@@ -788,6 +801,10 @@ replace_labels_1([{wait,{f,Lbl}}|Is], Acc, D, Fb) ->
     replace_labels_1(Is, [{wait,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
 replace_labels_1([{wait_timeout,{f,Lbl},To}|Is], Acc, D, Fb) ->
     replace_labels_1(Is, [{wait_timeout,{f,label(Lbl, D, Fb)},To}|Acc], D, Fb);
+replace_labels_1([{recv_mark=Op,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{Op,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
+replace_labels_1([{recv_set=Op,{f,Lbl}}|Is], Acc, D, Fb) ->
+    replace_labels_1(Is, [{Op,{f,label(Lbl, D, Fb)}}|Acc], D, Fb);
 replace_labels_1([{bif,Name,{f,Lbl},As,R}|Is], Acc, D, Fb) when Lbl =/= 0 ->
     replace_labels_1(Is, [{bif,Name,{f,label(Lbl, D, Fb)},As,R}|Acc], D, Fb);
 replace_labels_1([{gc_bif,Name,{f,Lbl},Live,As,R}|Is], Acc, D, Fb) when Lbl =/= 0 ->
@@ -991,47 +1008,52 @@ live_opt([{recv_mark,_}=I|Is], Regs, D, Acc) ->
 
 live_opt([], _, _, Acc) -> Acc.
 
-live_opt_block([{set,Ds,Ss,Op0}|Is], Regs0, D, Acc) ->
-    Regs1 = x_live(Ss, x_dead(Ds, Regs0)),
-    {Op, Regs} = live_opt_block_op(Op0, Regs1, D),
-    I = {set, Ds, Ss, Op},
-
-    case Ds of
-        [{x,X}] ->
-            case (not is_live(X, Regs0)) andalso Op =:= move of
-                true ->
-                    live_opt_block(Is, Regs0, D, Acc);
-                false ->
-                    live_opt_block(Is, Regs, D, [I|Acc])
-            end;
-        _ ->
-            live_opt_block(Is, Regs, D, [I|Acc])
+live_opt_block([{set,[{x,X}]=Ds,Ss,move}=I|Is], Regs0, D, Acc) ->
+    Regs = x_live(Ss, x_dead(Ds, Regs0)),
+    case is_live(X, Regs0) of
+        true ->
+            live_opt_block(Is, Regs, D, [I|Acc]);
+        false ->
+            %% Useless move, will never be used.
+            live_opt_block(Is, Regs, D, Acc)
     end;
-live_opt_block([{'%anno',_}|Is], Regs, D, Acc) ->
-    live_opt_block(Is, Regs, D, Acc);
-live_opt_block([], Regs, _, Acc) -> {Acc,Regs}.
-
-live_opt_block_op({alloc,Live0,AllocOp}, Regs0, D) ->
-    Regs =
-        case AllocOp of
-            {Kind, _N, Fail} when Kind =:= gc_bif; Kind =:= put_map ->
-                live_join_label(Fail, D, Regs0);
-            _ ->
-                Regs0
-        end,
+live_opt_block([{set,Ds,Ss,{alloc,Live0,AllocOp}}|Is], Regs0, D, Acc) ->
+    %% Calculate liveness from the point of view of the GC.
+    %% There will never be a GC if the instruction fails, so we should
+    %% ignore the failure branch.
+    GcRegs1 = x_dead(Ds, Regs0),
+    GcRegs = x_live(Ss, GcRegs1),
+    Live = live_regs(GcRegs),
 
     %% The life-time analysis used by the code generator is sometimes too
     %% conservative, so it may be possible to lower the number of live
     %% registers based on the exact liveness information. The main benefit is
     %% that more optimizations that depend on liveness information (such as the
-    %% beam_bool and beam_dead passes) may be applied.
-    Live = live_regs(Regs),
-    true = Live =< Live0,
-    {{alloc,Live,AllocOp}, live_call(Live)};
-live_opt_block_op({bif,_N,Fail} = Op, Regs, D) ->
-    {Op, live_join_label(Fail, D, Regs)};
-live_opt_block_op(Op, Regs, _D) ->
-    {Op, Regs}.
+    %% beam_dead pass) may be applied.
+    true = Live =< Live0,                       %Assertion.
+    I = {set,Ds,Ss,{alloc,Live,AllocOp}},
+
+    %% Calculate liveness from the point of view of the preceding instruction.
+    %% The liveness is the union of live registers in the GC and the live
+    %% registers at the failure label.
+    Regs1 = live_call(Live),
+    Regs = live_join_alloc(AllocOp, D, Regs1),
+    live_opt_block(Is, Regs, D, [I|Acc]);
+live_opt_block([{set,Ds,Ss,{bif,_,Fail}}=I|Is], Regs0, D, Acc) ->
+    Regs1 = x_dead(Ds, Regs0),
+    Regs2 = x_live(Ss, Regs1),
+    Regs = live_join_label(Fail, D, Regs2),
+    live_opt_block(Is, Regs, D, [I|Acc]);
+live_opt_block([{set,Ds,Ss,_}=I|Is], Regs0, D, Acc) ->
+    Regs = x_live(Ss, x_dead(Ds, Regs0)),
+    live_opt_block(Is, Regs, D, [I|Acc]);
+live_opt_block([{'%anno',_}|Is], Regs, D, Acc) ->
+    live_opt_block(Is, Regs, D, Acc);
+live_opt_block([], Regs, _, Acc) -> {Acc,Regs}.
+
+live_join_alloc({Kind,_Name,Fail}, D, Regs) when Kind =:= gc_bif; Kind =:= put_map ->
+    live_join_label(Fail, D, Regs);
+live_join_alloc(_, _, Regs) -> Regs.
 
 live_join_labels([{f,L}|T], D, Regs0) when L =/= 0 ->
     Regs = gb_trees:get(L, D) bor Regs0,
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 22ceef097c..c30ab34ac7 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -85,8 +85,6 @@ format_error(Error) ->
 %%% Things currently not checked. XXX
 %%%
 %%% - Heap allocation for binaries.
-%%% - That put_tuple is followed by the correct number of
-%%%   put instructions.
 %%%
 
 %% validate(Module, [Function]) -> [] | [Error]
@@ -148,7 +146,8 @@ validate_0(Module, [{function,Name,Ar,Entry,Code}|Fs], Ft) ->
 	 hf=0,				%Available heap size for floats.
 	 fls=undefined,			%Floating point state.
 	 ct=[],				%List of hot catch/try labels
-	 setelem=false			%Previous instruction was setelement/3.
+         setelem=false,                 %Previous instruction was setelement/3.
+         puts_left=none                 %put/1 instructions left.
 	}).
 
 -type label()        :: integer().
@@ -340,11 +339,25 @@ valfun_1({put_list,A,B,Dst}, Vst0) ->
     Vst = eat_heap(2, Vst0),
     set_type_reg(cons, Dst, Vst);
 valfun_1({put_tuple,Sz,Dst}, Vst0) when is_integer(Sz) ->
+    Vst1 = eat_heap(1, Vst0),
+    Vst = set_type_reg(tuple_in_progress, Dst, Vst1),
+    #vst{current=St0} = Vst,
+    St = St0#st{puts_left={Sz,{Dst,{tuple,Sz}}}},
+    Vst#vst{current=St};
+valfun_1({put,Src}, Vst0) ->
+    assert_term(Src, Vst0),
     Vst = eat_heap(1, Vst0),
-    set_type_reg({tuple,Sz}, Dst, Vst);
-valfun_1({put,Src}, Vst) ->
-    assert_term(Src, Vst),
-    eat_heap(1, Vst);
+    #vst{current=St0} = Vst,
+    case St0 of
+        #st{puts_left=none} ->
+            error(not_building_a_tuple);
+        #st{puts_left={1,{Dst,Type}}} ->
+            St = St0#st{puts_left=none},
+            set_type_reg(Type, Dst, Vst#vst{current=St});
+        #st{puts_left={PutsLeft,Info}} when is_integer(PutsLeft) ->
+            St = St0#st{puts_left={PutsLeft-1,Info}},
+            Vst#vst{current=St}
+    end;
 %% Instructions for optimization of selective receives.
 valfun_1({recv_mark,{f,Fail}}, Vst) when is_integer(Fail) ->
     Vst;
@@ -578,6 +591,12 @@ valfun_4({get_list,Src,D1,D2}, Vst0) ->
     assert_type(cons, Src, Vst0),
     Vst = set_type_reg(term, D1, Vst0),
     set_type_reg(term, D2, Vst);
+valfun_4({get_hd,Src,Dst}, Vst) ->
+    assert_type(cons, Src, Vst),
+    set_type_reg(term, Dst, Vst);
+valfun_4({get_tl,Src,Dst}, Vst) ->
+    assert_type(cons, Src, Vst),
+    set_type_reg(term, Dst, Vst);
 valfun_4({get_tuple_element,Src,I,Dst}, Vst) ->
     assert_type({tuple_element,I+1}, Src, Vst),
     set_type_reg(term, Dst, Vst);
@@ -1134,6 +1153,7 @@ set_type_y(Type, {y,Y}=Reg, #vst{current=#st{y=Ys0}=St}=Vst)
 	     {value,_} ->
 		 gb_trees:update(Y, Type, Ys0)
 	 end,
+    check_try_catch_tags(Type, Y, Ys0),
     Vst#vst{current=St#st{y=Ys}};
 set_type_y(Type, Reg, #vst{}) -> error({invalid_store,Reg,Type}).
 
@@ -1141,6 +1161,29 @@ set_catch_end({y,Y}, #vst{current=#st{y=Ys0}=St}=Vst) ->
     Ys = gb_trees:update(Y, initialized, Ys0),
     Vst#vst{current=St#st{y=Ys}}.
 
+check_try_catch_tags(Type, LastY, Ys) ->
+    case is_try_catch_tag(Type) of
+        false ->
+            ok;
+        true ->
+            %% Every catch or try/catch must use a lower Y register
+            %% number than any enclosing catch or try/catch. That will
+            %% ensure that when the stack is scanned when an
+            %% exception occurs, the innermost try/catch tag is found
+            %% first.
+            Bad = [{{y,Y},Tag} || {Y,Tag} <- gb_trees:to_list(Ys),
+                                  Y < LastY, is_try_catch_tag(Tag)],
+            case Bad of
+                [] ->
+                    ok;
+                [_|_] ->
+                    error({bad_try_catch_nesting,{y,LastY},Bad})
+            end
+    end.
+
+is_try_catch_tag({catchtag,_}) -> true;
+is_try_catch_tag({trytag,_}) -> true;
+is_try_catch_tag(_) -> false.
 
 is_reg_defined({x,_}=Reg, Vst) -> is_type_defined_x(Reg, Vst);
 is_reg_defined({y,_}=Reg, Vst) -> is_type_defined_y(Reg, Vst);
@@ -1274,6 +1317,7 @@ get_move_term_type(Src, Vst) ->
 	initialized -> error({unassigned,Src});
 	{catchtag,_} -> error({catchtag,Src});
 	{trytag,_} -> error({trytag,Src});
+        tuple_in_progress -> error({tuple_in_progress,Src});
 	Type -> Type
     end.
 
@@ -1282,10 +1326,7 @@ get_move_term_type(Src, Vst) ->
 %%  a standard Erlang type (no catch/try tags or match contexts).
 
 get_term_type(Src, Vst) ->
-    case get_term_type_1(Src, Vst) of
-	initialized -> error({unassigned,Src});
-	{catchtag,_} -> error({catchtag,Src});
-	{trytag,_} -> error({trytag,Src});
+    case get_move_term_type(Src, Vst) of
 	#ms{} -> error({match_context,Src});
 	Type -> Type
     end.
@@ -1332,7 +1373,12 @@ branch_arities([Sz,{f,L}|T], Tuple, #vst{current=St}=Vst0)
     Vst = branch_state(L, Vst1),
     branch_arities(T, Tuple, Vst#vst{current=St}).
 
-branch_state(0, #vst{}=Vst) -> Vst;
+branch_state(0, #vst{}=Vst) ->
+    %% If the instruction fails, the stack may be scanned
+    %% looking for a catch tag. Therefore the Y registers
+    %% must be initialized at this point.
+    verify_y_init(Vst),
+    Vst;
 branch_state(L, #vst{current=St,branched=B}=Vst) ->
     Vst#vst{
       branched=case gb_trees:is_defined(L, B) of
diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl
index 1c56b95a9e..6c3a6995d7 100644
--- a/lib/compiler/src/beam_z.erl
+++ b/lib/compiler/src/beam_z.erl
@@ -24,18 +24,20 @@
 
 -export([module/2]).
 
--import(lists, [dropwhile/2]).
+-import(lists, [dropwhile/2,map/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_asm:module_code()}.
 
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
-    Fs = [function(F) || F <- Fs0],
+module({Mod,Exp,Attr,Fs0,Lc}, Opts) ->
+    NoGetHdTl = proplists:get_bool(no_get_hd_tl, Opts),
+    Fs = [function(F, NoGetHdTl) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
-function({function,Name,Arity,CLabel,Is0}) ->
+function({function,Name,Arity,CLabel,Is0}, NoGetHdTl) ->
     try
-	Is = undo_renames(Is0),
+	Is1 = undo_renames(Is0),
+        Is = maybe_eliminate_get_hd_tl(Is1, NoGetHdTl),
 	{function,Name,Arity,CLabel,Is}
     catch
         Class:Error:Stack ->
@@ -65,6 +67,10 @@ undo_renames([{bif,raise,_,_,_}=I|Is0]) ->
 		      (_) -> true
 		   end, Is0),
     [I|undo_renames(Is)];
+undo_renames([{get_hd,Src,Dst1},{get_tl,Src,Dst2}|Is]) ->
+    [{get_list,Src,Dst1,Dst2}|undo_renames(Is)];
+undo_renames([{get_tl,Src,Dst2},{get_hd,Src,Dst1}|Is]) ->
+    [{get_list,Src,Dst1,Dst2}|undo_renames(Is)];
 undo_renames([I|Is]) ->
     [undo_rename(I)|undo_renames(Is)];
 undo_renames([]) -> [].
@@ -107,3 +113,17 @@ undo_rename({get_map_elements,Fail,Src,{list,List}}) ->
 undo_rename({select,I,Reg,Fail,List}) ->
     {I,Reg,Fail,{list,List}};
 undo_rename(I) -> I.
+
+%%%
+%%% Eliminate get_hd/get_tl instructions if requested by
+%%% the no_get_hd_tl option.
+%%%
+
+maybe_eliminate_get_hd_tl(Is, true) ->
+    map(fun({get_hd,Cons,Hd}) ->
+                {get_list,Cons,Hd,{x,1022}};
+           ({get_tl,Cons,Tl}) ->
+                {get_list,Cons,{x,1022},Tl};
+           (I) -> I
+        end, Is);
+maybe_eliminate_get_hd_tl(Is, false) -> Is.
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 1409c358c2..c6a0056a70 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -219,13 +219,15 @@ expand_opt(report, Os) ->
 expand_opt(return, Os) ->
     [return_errors,return_warnings|Os];
 expand_opt(r16, Os) ->
-    [no_record_opt,no_utf8_atoms|Os];
+    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
 expand_opt(r17, Os) ->
-    [no_record_opt,no_utf8_atoms|Os];
+    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
 expand_opt(r18, Os) ->
-    [no_record_opt,no_utf8_atoms|Os];
+    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
 expand_opt(r19, Os) ->
-    [no_record_opt,no_utf8_atoms|Os];
+    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
+expand_opt(r20, Os) ->
+    [no_get_hd_tl,no_record_opt,no_utf8_atoms|Os];
 expand_opt({debug_info_key,_}=O, Os) ->
     [encrypt_debug_info,O|Os];
 expand_opt(no_float_opt, Os) ->
diff --git a/lib/compiler/src/core_parse.yrl b/lib/compiler/src/core_parse.yrl
index 79a7cccd98..85444023c6 100644
--- a/lib/compiler/src/core_parse.yrl
+++ b/lib/compiler/src/core_parse.yrl
@@ -496,7 +496,7 @@ make_lit_bin(Acc, [#c_bitstr{val=I0,size=Sz0,unit=U0,type=Type0,flags=F0}|T]) ->
 	    throw(impossible)
     end,
     if
-	Sz =< 8, T =:= [] ->
+	0 =< Sz, Sz =< 8, T =:= [] ->
 	    <<Acc/binary,I:Sz>>;
 	Sz =:= 8 ->
 	    make_lit_bin(<<Acc/binary,I:8>>, T);
diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab
index d59bb241a8..a47d4e8cf7 100755
--- a/lib/compiler/src/genop.tab
+++ b/lib/compiler/src/genop.tab
@@ -564,3 +564,13 @@ BEAM_FORMAT_NUMBER=0
 ##       exception, but store the atom 'badarg' in x(0) and execute the
 ##       next instruction.
 161: raw_raise/0
+
+## @spec get_hd  Source Head
+## @doc  Get the head (or car) part of a list (a cons cell) from Source and
+##       put it into the register Head.
+162: get_hd/2
+
+## @spec get_tl  Source Tail
+## @doc  Get the tail (or cdr) part of a list (a cons cell) from Source and
+##       put it into the register Tail.
+163: get_tl/2
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index a96d58a903..a8f4926e55 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -1495,28 +1495,34 @@ select_extract_map(Src, Vs, Fail, I, Vdb, Bef, St) ->
     {Code, Aft, St}.
 
 
-select_extract_cons(Src, [#k_var{name=Hd}, #k_var{name=Tl}], I, Vdb, Bef, St) ->
-    {Es,Aft} = case {vdb_find(Hd, Vdb), vdb_find(Tl, Vdb)} of
-		   {{_,_,Lhd}, {_,_,Ltl}} when Lhd =< I, Ltl =< I ->
-		       %% Both head and tail are dead.  No need to generate
-		       %% any instruction.
-		       {[], Bef};
-		   _ ->
-		       %% At least one of head and tail will be used,
-		       %% but we must always fetch both.  We will call
-		       %% clear_dead/2 to allow reuse of the register
-		       %% in case only of them is used.
-
-		       Reg0 = put_reg(Tl, put_reg(Hd, Bef#sr.reg)),
-		       Int0 = Bef#sr{reg=Reg0},
-		       Rsrc = fetch_var(Src, Int0),
-		       Rhd = fetch_reg(Hd, Reg0),
-		       Rtl = fetch_reg(Tl, Reg0),
-		       Int1 = clear_dead(Int0, I, Vdb),
-		       {[{get_list,Rsrc,Rhd,Rtl}], Int1}
-	       end,
-    {Es,Aft,St}.
-    
+select_extract_cons(Src, [#k_var{name=Hd},#k_var{name=Tl}], I, Vdb, Bef, St) ->
+    Rsrc = fetch_var(Src, Bef),
+    Int = clear_dead(Bef, I, Vdb),
+    {{_,_,Lhd},{_,_,Ltl}} = {vdb_find(Hd, Vdb),vdb_find(Tl, Vdb)},
+    case {Lhd =< I, Ltl =< I} of
+        {true,true} ->
+            %% Both dead.
+            {[],Bef,St};
+        {true,false} ->
+            %% Head dead.
+            Reg0 = put_reg(Tl, Bef#sr.reg),
+            Aft = Int#sr{reg=Reg0},
+            Rtl = fetch_reg(Tl, Reg0),
+            {[{get_tl,Rsrc,Rtl}],Aft,St};
+        {false,true} ->
+            %% Tail dead.
+            Reg0 = put_reg(Hd, Bef#sr.reg),
+            Aft = Int#sr{reg=Reg0},
+            Rhd = fetch_reg(Hd, Reg0),
+            {[{get_hd,Rsrc,Rhd}],Aft,St};
+        {false,false} ->
+            %% Both used.
+            Reg0 = put_reg(Tl, put_reg(Hd, Bef#sr.reg)),
+            Aft = Bef#sr{reg=Reg0},
+            Rhd = fetch_reg(Hd, Reg0),
+            Rtl = fetch_reg(Tl, Reg0),
+            {[{get_hd,Rsrc,Rhd},{get_tl,Rsrc,Rtl}],Aft,St}
+    end.
 
 guard_clause_cg(#k_guard_clause{anno=#l{vdb=Vdb},guard=G,body=B}, Fail, Bef, St0) ->
     {Gis,Int,St1} = guard_cg(G, Fail, Vdb, Bef, St0),
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
index 6029b91cdc..8cf8c69fef 100644
--- a/lib/compiler/src/v3_core.erl
+++ b/lib/compiler/src/v3_core.erl
@@ -1152,7 +1152,7 @@ fun_tq(Cs0, L, St0, NameInfo) ->
 %% lc_tq(Line, Exp, [Qualifier], Mc, State) -> {LetRec,[PreExp],State}.
 %%  This TQ from Simon PJ pp 127-138.  
 
-lc_tq(Line, E, [#igen{anno=GAnno,ceps=Ceps,
+lc_tq(Line, E, [#igen{anno=#a{anno=GA}=GAnno,ceps=Ceps,
 		      acc_pat=AccPat,acc_guard=AccGuard,
                       skip_pat=SkipPat,tail=Tail,tail_pat=TailPat,
                       arg={Pre,Arg}}|Qs], Mc, St0) ->
@@ -1162,7 +1162,7 @@ lc_tq(Line, E, [#igen{anno=GAnno,ceps=Ceps,
     F = #c_var{anno=LA,name={Name,1}},
     Nc = #iapply{anno=GAnno,op=F,args=[Tail]},
     {Var,St2} = new_var(St1),
-    Fc = function_clause([Var], LA, {Name,1}),
+    Fc = function_clause([Var], GA, {Name,1}),
     TailClause = #iclause{anno=LAnno,pats=[TailPat],guard=[],body=[Mc]},
     Cs0 = case {AccPat,AccGuard} of
               {SkipPat,[]} ->
@@ -1185,9 +1185,9 @@ lc_tq(Line, E, [#igen{anno=GAnno,ceps=Ceps,
                                   body=Lps ++ [Lc]}|Cs0],
                         St3}
                end,
-    Fun = #ifun{anno=LAnno,id=[],vars=[Var],clauses=Cs,fc=Fc},
-    {#iletrec{anno=LAnno#a{anno=[list_comprehension|LA]},defs=[{{Name,1},Fun}],
-              body=Pre ++ [#iapply{anno=LAnno,op=F,args=[Arg]}]},
+    Fun = #ifun{anno=GAnno,id=[],vars=[Var],clauses=Cs,fc=Fc},
+    {#iletrec{anno=GAnno#a{anno=[list_comprehension|GA]},defs=[{{Name,1},Fun}],
+              body=Pre ++ [#iapply{anno=GAnno,op=F,args=[Arg]}]},
      Ceps,St4};
 lc_tq(Line, E, [#ifilter{}=Filter|Qs], Mc, St) ->
     filter_tq(Line, E, Filter, Mc, St, Qs, fun lc_tq/5);
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl
index fd73e5a7dc..dfe8d26afb 100644
--- a/lib/compiler/src/v3_kernel.erl
+++ b/lib/compiler/src/v3_kernel.erl
@@ -2377,12 +2377,11 @@ uexpr(#k_try{anno=A,arg=A0,vars=Vs,body=B0,evars=Evs,handler=H0},
 	    {A1,Au,St2} = ubody(A0, {break,Avs}, St1),
 	    {B1,Bu,St3} = ubody(B0, Br, St2),
 	    {H1,Hu,St4} = ubody(H0, Br, St3),
-	    {Rs1,St5} = ensure_return_vars(Rs0, St4),
 	    Used = union([Au,subtract(Bu, lit_list_vars(Vs)),
 			  subtract(Hu, lit_list_vars(Evs))]),
-	    {#k_try{anno=#k{us=Used,ns=lit_list_vars(Rs1),a=A},
-		    arg=A1,vars=Vs,body=B1,evars=Evs,handler=H1,ret=Rs1},
-	     Used,St5}
+	    {#k_try{anno=#k{us=Used,ns=lit_list_vars(Rs0),a=A},
+		    arg=A1,vars=Vs,body=B1,evars=Evs,handler=H1,ret=Rs0},
+	     Used,St4}
     end;
 uexpr(#k_try{anno=A,arg=A0,vars=Vs,body=B0,evars=Evs,handler=H0},
       return, St0) ->
@@ -2390,13 +2389,11 @@ uexpr(#k_try{anno=A,arg=A0,vars=Vs,body=B0,evars=Evs,handler=H0},
     {A1,Au,St2} = ubody(A0, {break,Avs}, St1),	%Must break to clean up here!
     {B1,Bu,St3} = ubody(B0, return, St2),
     {H1,Hu,St4} = ubody(H0, return, St3),
-    NumNew = 1,
-    {Ns,St5} = new_vars(NumNew, St4),
     Used = union([Au,subtract(Bu, lit_list_vars(Vs)),
 		  subtract(Hu, lit_list_vars(Evs))]),
-    {#k_try_enter{anno=#k{us=Used,ns=Ns,a=A},
+    {#k_try_enter{anno=#k{us=Used,ns=[],a=A},
 		  arg=A1,vars=Vs,body=B1,evars=Evs,handler=H1},
-     Used,St5};
+     Used,St4};
 uexpr(#k_catch{anno=A,body=B0}, {break,Rs0}, St0) ->
     {Rb,St1} = new_var(St0),
     {B1,Bu,St2} = ubody(B0, {break,[Rb]}, St1),
diff --git a/lib/compiler/src/v3_kernel_pp.erl b/lib/compiler/src/v3_kernel_pp.erl
index ac91039ae0..e9cbe81088 100644
--- a/lib/compiler/src/v3_kernel_pp.erl
+++ b/lib/compiler/src/v3_kernel_pp.erl
@@ -248,7 +248,7 @@ format_1(#k_put{arg=A,ret=Rs}, Ctxt) ->
     [format(A, Ctxt),
      format_ret(Rs, ctxt_bump_indent(Ctxt, 1))
     ];
-format_1(#k_try{arg=A,vars=Vs,body=B,evars=Evs,handler=H}, Ctxt) ->
+format_1(#k_try{arg=A,vars=Vs,body=B,evars=Evs,handler=H,ret=Rs}, Ctxt) ->
     Ctxt1 = ctxt_bump_indent(Ctxt, Ctxt#ctxt.body_indent),
     ["try",
      nl_indent(Ctxt1),
@@ -264,7 +264,8 @@ format_1(#k_try{arg=A,vars=Vs,body=B,evars=Evs,handler=H}, Ctxt) ->
      nl_indent(Ctxt1),
      format(H, Ctxt1),
      nl_indent(Ctxt),
-     "end"
+     "end",
+     format_ret(Rs, Ctxt)
     ];
 format_1(#k_try_enter{arg=A,vars=Vs,body=B,evars=Evs,handler=H}, Ctxt) ->
     Ctxt1 = ctxt_bump_indent(Ctxt, Ctxt#ctxt.body_indent),