14 files changed, 525 insertions, 338 deletions
diff --git a/lib/compiler/src/beam_a.erl b/lib/compiler/src/beam_a.erl
index 6f09dc4be4..7df2edd714 100644
--- a/lib/compiler/src/beam_a.erl
+++ b/lib/compiler/src/beam_a.erl
@@ -58,7 +58,8 @@ rename_instrs([{call_only,A,F}|Is]) ->
 rename_instrs([{call_ext_only,A,F}|Is]) ->
     [{call_ext,A,F},return|rename_instrs(Is)];
 rename_instrs([{'%live',_}|Is]) ->
-    %% When compiling from old .S files.
+    %% Ignore old type of live annotation. Only happens when compiling
+    %% from very old .S files.
     rename_instrs(Is);
 rename_instrs([I|Is]) ->
     [rename_instr(I)|rename_instrs(Is)];
diff --git a/lib/compiler/src/beam_block.erl b/lib/compiler/src/beam_block.erl
index fe1ce6f60b..39ae8d5347 100644
--- a/lib/compiler/src/beam_block.erl
+++ b/lib/compiler/src/beam_block.erl
@@ -23,25 +23,32 @@
 -module(beam_block).
 
 -export([module/2]).
--import(lists, [reverse/1,reverse/2,foldl/3,member/2]).
+-import(lists, [reverse/1,reverse/2,member/2]).
 
 -spec module(beam_utils:module_code(), [compile:option()]) ->
                     {'ok',beam_utils:module_code()}.
 
-module({Mod,Exp,Attr,Fs0,Lc}, _Opt) ->
-    Fs = [function(F) || F <- Fs0],
+module({Mod,Exp,Attr,Fs0,Lc}, Opts) ->
+    Blockify = not member(no_blockify, Opts),
+    Fs = [function(F, Blockify) || F <- Fs0],
     {ok,{Mod,Exp,Attr,Fs,Lc}}.
 
-function({function,Name,Arity,CLabel,Is0}) ->
+function({function,Name,Arity,CLabel,Is0}, Blockify) ->
     try
 	%% Collect basic blocks and optimize them.
-	Is1 = blockify(Is0),
-	Is2 = embed_lines(Is1),
+        Is2 = case Blockify of
+                  true ->
+                      Is1 = blockify(Is0),
+                      embed_lines(Is1);
+                  false ->
+                      Is0
+              end,
         Is3 = beam_utils:anno_defs(Is2),
         Is4 = move_allocates(Is3),
         Is5 = beam_utils:live_opt(Is4),
         Is6 = opt_blocks(Is5),
-        Is = beam_utils:delete_live_annos(Is6),
+        Is7 = beam_utils:delete_annos(Is6),
+        Is = opt_allocs(Is7),
 
         %% Done.
         {function,Name,Arity,CLabel,Is}
@@ -136,17 +143,16 @@ embed_lines([], Acc) -> Acc.
 
 opt_blocks([{block,Bl0}|Is]) ->
     %% The live annotation at the beginning is not useful.
-    [{'%live',_,_}|Bl] = Bl0,
+    [{'%anno',_}|Bl] = Bl0,
     [{block,opt_block(Bl)}|opt_blocks(Is)];
 opt_blocks([I|Is]) ->
     [I|opt_blocks(Is)];
 opt_blocks([]) -> [].
 
 opt_block(Is0) ->
-    Is = find_fixpoint(fun(Is) ->
-			       opt_tuple_element(opt(Is))
-		       end, Is0),
-    opt_alloc(Is).
+    find_fixpoint(fun(Is) ->
+                          opt_tuple_element(opt(Is))
+                  end, Is0).
 
 find_fixpoint(OptFun, Is0) ->
     case OptFun(Is0) of
@@ -196,7 +202,7 @@ move_allocates([I|Is]) ->
     [I|move_allocates(Is)];
 move_allocates([]) -> [].
 
-move_allocates_1([{'%def',_}|Is], Acc) ->
+move_allocates_1([{'%anno',_}|Is], Acc) ->
     move_allocates_1(Is, Acc);
 move_allocates_1([I|Is], [{set,[],[],{alloc,Live0,Info}}|Acc]=Acc0) ->
     case alloc_may_pass(I) of
@@ -240,10 +246,14 @@ opt([{set,_,_,{line,_}}=Line1,
      {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,0}}}=I2|Is])
   when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
     opt([Line2,I2,Line1,I1|Is]);
+opt([{set,[D1],[{integer,Idx1},Reg],{bif,element,{f,L}}}=I1,
+     {set,[D2],[{integer,Idx2},Reg],{bif,element,{f,L}}}=I2|Is])
+  when Idx1 < Idx2, D1 =/= D2, D1 =/= Reg, D2 =/= Reg ->
+    opt([I2,I1|Is]);
 opt([{set,Ds0,Ss,Op}|Is0]) ->
     {Ds,Is} = opt_moves(Ds0, Is0),
     [{set,Ds,Ss,Op}|opt(Is)];
-opt([{'%live',_,_}=I|Is]) ->
+opt([{'%anno',_}=I|Is]) ->
     [I|opt(Is)];
 opt([]) -> [].
 
@@ -411,31 +421,47 @@ eliminate_use_of_from_reg([I]=Is, From, _To, Acc) ->
 	    no
     end.
 
+%% opt_allocs(Instructions) -> Instructions.  Optimize allocate
+%%  instructions inside blocks. If safe, replace an allocate_zero
+%%  instruction with the slightly cheaper allocate instruction.
+
+opt_allocs(Is) ->
+    D = beam_utils:index_labels(Is),
+    opt_allocs_1(Is, D).
+
+opt_allocs_1([{block,Bl0}|Is], D) ->
+    Bl = opt_alloc(Bl0, {D,Is}),
+    [{block,Bl}|opt_allocs_1(Is, D)];
+opt_allocs_1([I|Is], D) ->
+    [I|opt_allocs_1(Is, D)];
+opt_allocs_1([], _) -> [].
+
 %% opt_alloc(Instructions) -> Instructions'
 %%  Optimises all allocate instructions.
 
 opt_alloc([{set,[],[],{alloc,Live0,Info0}},
-	   {set,[],[],{alloc,Live,Info}}|Is]) ->
+           {set,[],[],{alloc,Live,Info}}|Is], D) ->
     Live = Live0,				%Assertion.
     Alloc = combine_alloc(Info0, Info),
     I = {set,[],[],{alloc,Live,Alloc}},
-    opt_alloc([I|Is]);
-opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is]) ->
-    [{set,[],[],opt_alloc(Is, Ns, Nh, R)}|Is];
-opt_alloc([I|Is]) -> [I|opt_alloc(Is)];
-opt_alloc([]) -> [].
+    opt_alloc([I|Is], D);
+opt_alloc([{set,[],[],{alloc,R,{_,Ns,Nh,[]}}}|Is], D) ->
+    [{set,[],[],opt_alloc(Is, D, Ns, Nh, R)}|Is];
+opt_alloc([I|Is], D) -> [I|opt_alloc(Is, D)];
+opt_alloc([], _) -> [].
 
 combine_alloc({_,Ns,Nh1,Init}, {_,nostack,Nh2,[]})  ->
     {zero,Ns,beam_utils:combine_heap_needs(Nh1, Nh2),Init}.
-	
+
 %% opt_alloc(Instructions, FrameSize, HeapNeed, LivingRegs) -> [Instr]
 %%  Generates the optimal sequence of instructions for
 %%  allocating and initalizing the stack frame and needed heap.
 
-opt_alloc(_Is, nostack, Nh, LivingRegs) ->
+opt_alloc(_Is, _D, nostack, Nh, LivingRegs) ->
     {alloc,LivingRegs,{nozero,nostack,Nh,[]}};
-opt_alloc(Is, Ns, Nh, LivingRegs) ->
-    InitRegs = init_yreg(Is, 0),
+opt_alloc(Bl, {D,OuterIs}, Ns, Nh, LivingRegs) ->
+    Is = [{block,Bl}|OuterIs],
+    InitRegs = init_yregs(Ns, Is, D),
     case count_ones(InitRegs) of
 	N when N*2 > Ns ->
 	    {alloc,LivingRegs,{nozero,Ns,Nh,gen_init(Ns, InitRegs)}};
@@ -451,19 +477,14 @@ gen_init(Fs, Regs, Y, Acc) when Regs band 1 =:= 0 ->
 gen_init(Fs, Regs, Y, Acc) ->
     gen_init(Fs, Regs bsr 1, Y+1, Acc).
 
-%% init_yreg(Instructions, RegSet) -> RegSetInitialized
-%%  Calculate the set of initialized y registers.
-
-init_yreg([{set,_,_,{bif,_,_}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{gc_bif,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,_,_,{alloc,_,{put_map,_,_}}}|_], Reg) -> Reg;
-init_yreg([{set,Ds,_,_}|Is], Reg) -> init_yreg(Is, add_yregs(Ds, Reg));
-init_yreg(_Is, Reg) -> Reg.
-
-add_yregs(Ys, Reg) -> foldl(fun(Y, R0) -> add_yreg(Y, R0) end, Reg, Ys).
-    
-add_yreg({y,Y}, Reg) -> Reg bor (1 bsl Y);
-add_yreg(_, Reg)     -> Reg.
+init_yregs(Y, Is, D) when Y >= 0 ->
+    case beam_utils:is_killed({y,Y}, Is, D) of
+        true ->
+            (1 bsl Y) bor init_yregs(Y-1, Is, D);
+        false ->
+            init_yregs(Y-1, Is, D)
+    end;
+init_yregs(_, _, _) -> 0.
 
 count_ones(Bits) -> count_ones(Bits, 0).
 count_ones(0, Acc) -> Acc;
@@ -514,7 +535,7 @@ x_live([], Regs) -> Regs.
 %%  Given a reversed instruction stream, determine the
 %%  the registers that are defined.
 
-defined_regs([{'%def',Def}|_], Regs) ->
+defined_regs([{'%anno',{def,Def}}|_], Regs) ->
     Def bor Regs;
 defined_regs([{set,Ds,_,{alloc,Live,_}}|_], Regs) ->
     x_live(Ds, Regs bor ((1 bsl Live) - 1));
diff --git a/lib/compiler/src/beam_bsm.erl b/lib/compiler/src/beam_bsm.erl
index 9ea5a3eb92..9f3b9d788f 100644
--- a/lib/compiler/src/beam_bsm.erl
+++ b/lib/compiler/src/beam_bsm.erl
@@ -124,20 +124,21 @@ btb_opt_1([{test,bs_get_binary2,F,_,[Reg,{atom,all},U,Fs],Reg}=I0|Is], D, Acc0)
 		  end,
 	    btb_opt_1(Is, D, Acc)
     end;
-btb_opt_1([{test,bs_get_binary2,F,_,[Ctx,{atom,all},U,Fs],Dst}=I0|Is], D, Acc0) ->
-    case btb_reaches_match(Is, [Ctx,Dst], D) of
+btb_opt_1([{test,bs_get_binary2,F,_,[Ctx,{atom,all},U,Fs],Dst}=I0|Is0], D, Acc0) ->
+    case btb_reaches_match(Is0, [Ctx,Dst], D) of
 	{error,Reason} ->
 	    Comment = btb_comment_no_opt(Reason, Fs),
-	    btb_opt_1(Is, D, [Comment,I0|Acc0]);
+	    btb_opt_1(Is0, D, [Comment,I0|Acc0]);
 	{ok,MustSave} when U =:= 1 ->
 	    Comment = btb_comment_opt(Fs),
-	    Acc1 = btb_gen_save(MustSave, Ctx, [Comment|Acc0]),
-	    Acc = [{move,Ctx,Dst}|Acc1],
+            Acc = btb_gen_save(MustSave, Ctx, [Comment|Acc0]),
+            Is = prepend_move(Ctx, Dst, Is0),
 	    btb_opt_1(Is, D, Acc);
 	{ok,MustSave} ->
 	    Comment = btb_comment_opt(Fs),
 	    Acc1 = btb_gen_save(MustSave, Ctx, [Comment|Acc0]),
-	    Acc = [{move,Ctx,Dst},{test,bs_test_unit,F,[Ctx,U]}|Acc1],
+            Acc = [{test,bs_test_unit,F,[Ctx,U]}|Acc1],
+            Is = prepend_move(Ctx, Dst, Is0),
 	    btb_opt_1(Is, D, Acc)
     end;
 btb_opt_1([I|Is], D, Acc) ->
@@ -150,6 +151,12 @@ btb_gen_save(true, Reg, Acc) ->
     [{bs_save2,Reg,{atom,start}}|Acc];
 btb_gen_save(false, _, Acc) -> Acc.
 
+prepend_move(Ctx, Dst, [{block,Bl0}|Is]) ->
+    Bl = [{set,[Dst],[Ctx],move}|Bl0],
+    [{block,Bl}|Is];
+prepend_move(Ctx, Dst, Is) ->
+    [{move,Ctx,Dst}|Is].
+
 %% btb_reaches_match([Instruction], [Register], D) ->
 %%   {ok,MustSave}|{error,Reason}
 %%
diff --git a/lib/compiler/src/beam_disasm.erl b/lib/compiler/src/beam_disasm.erl
index 22ba86fa38..50b76d7f29 100644
--- a/lib/compiler/src/beam_disasm.erl
+++ b/lib/compiler/src/beam_disasm.erl
@@ -1088,6 +1088,8 @@ resolve_inst({get_map_elements,Args0},_,_,_) ->
 
 resolve_inst({build_stacktrace,[]},_,_,_) ->
     build_stacktrace;
+resolve_inst({raw_raise,[]},_,_,_) ->
+    raw_raise;
 
 %%
 %% Catches instructions that are not yet handled.
diff --git a/lib/compiler/src/beam_disasm.hrl b/lib/compiler/src/beam_disasm.hrl
index 8cc0bcf99b..c3326c15a0 100644
--- a/lib/compiler/src/beam_disasm.hrl
+++ b/lib/compiler/src/beam_disasm.hrl
@@ -27,7 +27,7 @@
 %%      PROPER TYPES FOR THE SET OF BEAM INSTRUCTIONS.
 %%
 -type beam_instr() :: 'bs_init_writable' | 'build_stacktrace'
-                    | 'fclearerror' | 'if_end'
+                    | 'fclearerror' | 'if_end' | 'raw_raise'
                     | 'remove_message' | 'return' | 'send' | 'timeout'
                     | tuple().  %% XXX: Very underspecified - FIX THIS
 
diff --git a/lib/compiler/src/beam_record.erl b/lib/compiler/src/beam_record.erl
index db1053e48c..58a6de6775 100644
--- a/lib/compiler/src/beam_record.erl
+++ b/lib/compiler/src/beam_record.erl
@@ -71,7 +71,7 @@ rewrite([{test,test_arity,Fail,[Src,N]}=TA,
             I = {test,is_tagged_tuple,Fail,[Src,N,Atom]},
             rewrite(Is, Idx, Def, [I|Acc])
     end;
-rewrite([{block,[{'%def',Def}|Bl]}|Is], Idx, _Def, Acc) ->
+rewrite([{block,[{'%anno',{def,Def}}|Bl]}|Is], Idx, _Def, Acc) ->
     rewrite(Is, Idx, Def, [{block,Bl}|Acc]);
 rewrite([{label,L}=I|Is], Idx0, Def, Acc) ->
     Idx = beam_utils:index_label(L, Acc, Idx0),
diff --git a/lib/compiler/src/beam_type.erl b/lib/compiler/src/beam_type.erl
index e9f62a5765..3b6bf49961 100644
--- a/lib/compiler/src/beam_type.erl
+++ b/lib/compiler/src/beam_type.erl
@@ -40,7 +40,7 @@ function({function,Name,Arity,CLabel,Asm0}) ->
 	Asm1 = beam_utils:live_opt(Asm0),
 	Asm2 = opt(Asm1, [], tdb_new()),
 	Asm3 = beam_utils:live_opt(Asm2),
-	Asm = beam_utils:delete_live_annos(Asm3),
+	Asm = beam_utils:delete_annos(Asm3),
 	{function,Name,Arity,CLabel,Asm}
     catch
         Class:Error:Stack ->
@@ -92,7 +92,7 @@ simplify_basic_1([{set,[D],[{integer,Index},Reg],{bif,element,_}}=I0|Is], Ts0, A
     simplify_basic_1(Is, Ts, [I|Acc]);
 simplify_basic_1([{set,[D],[TupleReg],{get_tuple_element,0}}=I|Is0], Ts0, Acc) ->
     case tdb_find(TupleReg, Ts0) of
-	{tuple,_,[Contents]} ->
+	{tuple,_,_,[Contents]} ->
 	    simplify_basic_1([{set,[D],[Contents],move}|Is0], Ts0, Acc);
 	_ ->
 	    Ts = update(I, Ts0),
@@ -113,9 +113,17 @@ simplify_basic_1([{test,is_integer,_,[R]}=I|Is], Ts, Acc) ->
     end;
 simplify_basic_1([{test,is_tuple,_,[R]}=I|Is], Ts, Acc) ->
     case tdb_find(R, Ts) of
-	{tuple,_,_} -> simplify_basic_1(Is, Ts, Acc);
+	{tuple,_,_,_} -> simplify_basic_1(Is, Ts, Acc);
 	_ -> simplify_basic_1(Is, Ts, [I|Acc])
     end;
+simplify_basic_1([{test,test_arity,_,[R,Arity]}=I|Is], Ts0, Acc) ->
+    case tdb_find(R, Ts0) of
+	{tuple,exact_size,Arity,_} ->
+	    simplify_basic_1(Is, Ts0, Acc);
+	_Other ->
+	    Ts = update(I, Ts0),
+	    simplify_basic_1(Is, Ts, [I|Acc])
+    end;
 simplify_basic_1([{test,is_map,_,[R]}=I|Is], Ts0, Acc) ->
     case tdb_find(R, Ts0) of
 	map -> simplify_basic_1(Is, Ts0, Acc);
@@ -138,6 +146,14 @@ simplify_basic_1([{test,is_eq_exact,Fail,[R,{atom,_}=Atom]}=I|Is0], Ts0, Acc0) -
 	  end,
     Ts = update(I, Ts0),
     simplify_basic_1(Is0, Ts, Acc);
+simplify_basic_1([{test,is_record,_,[R,{atom,_}=Tag,{integer,Arity}]}=I|Is], Ts0, Acc) ->
+    case tdb_find(R, Ts0) of
+	{tuple,exact_size,Arity,[Tag]} ->
+	    simplify_basic_1(Is, Ts0, Acc);
+	_Other ->
+	    Ts = update(I, Ts0),
+	    simplify_basic_1(Is, Ts, [I|Acc])
+    end;
 simplify_basic_1([{select,select_val,Reg,_,_}=I0|Is], Ts, Acc) ->
     I = case tdb_find(Reg, Ts) of
 	    {integer,Range} ->
@@ -284,7 +300,7 @@ clearerror([], OrigIs) -> [{set,[],[],fclearerror}|OrigIs].
 %%  Combine two blocks and eliminate any move instructions that assign
 %%  to registers that are killed later in the block.
 %%
-merge_blocks(B1, [{'%live',_,_}|B2]) ->
+merge_blocks(B1, [{'%anno',_}|B2]) ->
     merge_blocks_1(B1++[{set,[],[],stop_here}|B2]).
 
 merge_blocks_1([{set,[],_,stop_here}|Is]) -> Is;
@@ -333,27 +349,17 @@ flt_need_heap_2({set,_,_,{put_tuple,_}}, H, Fl) ->
     {[],H+1,Fl};
 flt_need_heap_2({set,_,_,put}, H, Fl) ->
     {[],H+1,Fl};
-%% Then the "neutral" instructions. We just pass them.
-flt_need_heap_2({set,[{fr,_}],_,_}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,[],[],fclearerror}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,[],[],fcheckerror}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,_,_,{bif,_,_}}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,_,_,move}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,_,_,{get_tuple_element,_}}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,_,_,get_list}, H, Fl) ->
-    {[],H,Fl};
-flt_need_heap_2({set,_,_,{try_catch,_,_}}, H, Fl) ->
-    {[],H,Fl};
-%% All other instructions should cause the insertion of an allocation
+%% The following instructions cause the insertion of an allocation
 %% instruction if needed.
+flt_need_heap_2({set,_,_,{alloc,_,_}}, H, Fl) ->
+    {flt_alloc(H, Fl),0,0};
+flt_need_heap_2({set,_,_,{set_tuple_element,_}}, H, Fl) ->
+    {flt_alloc(H, Fl),0,0};
+flt_need_heap_2({'%anno',_}, H, Fl) ->
+    {flt_alloc(H, Fl),0,0};
+%% All other instructions are "neutral". We just pass them.
 flt_need_heap_2(_, H, Fl) ->
-    {flt_alloc(H, Fl),0,0}.
+    {[],H,Fl}.
 
 flt_alloc(0, 0) ->
     [];
@@ -376,7 +382,7 @@ build_alloc(Words, Floats) -> {alloc,[{words,Words},{floats,Floats}]}.
 %%  is not continous at an allocation function (e.g. if {x,0} and {x,2}
 %%  are live, but not {x,1}).
 
-flt_liveness([{'%live',_Live,Regs}=LiveInstr|Is]) ->
+flt_liveness([{'%anno',{used,Regs}}=LiveInstr|Is]) ->
     flt_liveness_1(Is, Regs, [LiveInstr]).
 
 flt_liveness_1([{set,Ds,Ss,{alloc,Live0,Alloc}}|Is], Regs0, Acc) ->
@@ -388,7 +394,7 @@ flt_liveness_1([{set,Ds,Ss,{alloc,Live0,Alloc}}|Is], Regs0, Acc) ->
 flt_liveness_1([{set,Ds,_,_}=I|Is], Regs0, Acc) ->
     Regs = x_live(Ds, Regs0),
     flt_liveness_1(Is, Regs, [I|Acc]);
-flt_liveness_1([{'%live',_,_}], _Regs, Acc) ->
+flt_liveness_1([{'%anno',_}], _Regs, Acc) ->
     reverse(Acc).
 
 init_regs(Live) ->
@@ -412,13 +418,14 @@ x_live([], Regs) -> Regs.
 %%  Update the type database to account for executing an instruction.
 %%
 %%  First the cases for instructions inside basic blocks.
-update({'%live',_,_}, Ts) -> Ts;
+update({'%anno',_}, Ts) ->
+    Ts;
 update({set,[D],[S],move}, Ts) ->
     tdb_copy(S, D, Ts);
 update({set,[D],[{integer,I},Reg],{bif,element,_}}, Ts0) ->
-    tdb_update([{Reg,{tuple,I,[]}},{D,kill}], Ts0);
+    tdb_update([{Reg,{tuple,min_size,I,[]}},{D,kill}], Ts0);
 update({set,[D],[_Index,Reg],{bif,element,_}}, Ts0) ->
-    tdb_update([{Reg,{tuple,0,[]}},{D,kill}], Ts0);
+    tdb_update([{Reg,{tuple,min_size,0,[]}},{D,kill}], Ts0);
 update({set,[D],Args,{bif,N,_}}, Ts0) ->
     Ar = length(Args),
     BoolOp = erl_internal:new_type_test(N, Ar) orelse
@@ -476,7 +483,7 @@ update({kill,D}, Ts) ->
 update({test,is_float,_Fail,[Src]}, Ts0) ->
     tdb_update([{Src,float}], Ts0);
 update({test,test_arity,_Fail,[Src,Arity]}, Ts0) ->
-    tdb_update([{Src,{tuple,Arity,[]}}], Ts0);
+    tdb_update([{Src,{tuple,exact_size,Arity,[]}}], Ts0);
 update({test,is_map,_Fail,[Src]}, Ts0) ->
     tdb_update([{Src,map}], Ts0);
 update({get_map_elements,_,Src,{list,Elems0}}, Ts0) ->
@@ -490,12 +497,12 @@ update({test,is_eq_exact,_,[Reg,{atom,_}=Atom]}, Ts) ->
 	error ->
 	    Ts;
 	{tuple_element,TupleReg,0} ->
-	    tdb_update([{TupleReg,{tuple,1,[Atom]}}], Ts);
+	    tdb_update([{TupleReg,{tuple,min_size,1,[Atom]}}], Ts);
 	_ ->
 	    Ts
     end;
 update({test,is_record,_Fail,[Src,Tag,{integer,Arity}]}, Ts) ->
-    tdb_update([{Src,{tuple,Arity,[Tag]}}], Ts);
+    tdb_update([{Src,{tuple,exact_size,Arity,[Tag]}}], Ts);
 
 %% Binary matching
 
@@ -537,7 +544,7 @@ update({call_ext,Ar,{extfunc,math,Math,Ar}}, Ts) ->
 update({call_ext,3,{extfunc,erlang,setelement,3}}, Ts0) ->
     Ts = tdb_kill_xregs(Ts0),
     case tdb_find({x,1}, Ts0) of
-	{tuple,Sz,_}=T0 ->
+	{tuple,SzKind,Sz,_}=T0 ->
 	    T = case tdb_find({x,0}, Ts0) of
 		    {integer,{I,I}} when I > 1 ->
 			%% First element is not changed. The result
@@ -546,7 +553,7 @@ update({call_ext,3,{extfunc,erlang,setelement,3}}, Ts0) ->
 		    _ ->
 			%% Position is 1 or unknown. May change the
 			%% first element of the tuple.
-			{tuple,Sz,[]}
+			{tuple,SzKind,Sz,[]}
 		end,
 	    tdb_update([{{x,0},T}], Ts);
 	_ ->
@@ -630,7 +637,7 @@ possibly_numeric(_) -> false.
 
 max_tuple_size(Reg, Ts) ->
     case tdb_find(Reg, Ts) of
-	{tuple,Sz,_} -> Sz;
+	{tuple,_,Sz,_} -> Sz;
 	_Other -> 0
     end.
 
@@ -786,11 +793,12 @@ checkerror_2(OrigIs) -> [{set,[],[],fcheckerror}|OrigIs].
 %%% Routines for maintaining a type database.  The type database 
 %%% associates type information with registers.
 %%%
-%%% {tuple,Size,First} means that the corresponding register contains a
-%%% tuple with *at least* Size elements.  An tuple with unknown
-%%% size is represented as {tuple,0,[]}. First is either [] (meaning that
-%%% the tuple's first element is unknown) or [FirstElement] (the contents
-%%% of the first element).
+%%% {tuple,min_size,Size,First} means that the corresponding register contains
+%%% a tuple with *at least* Size elements (conversely, exact_size means that it
+%%% contains a tuple with *exactly* Size elements). An tuple with unknown size
+%%% is represented as {tuple,min_size,0,[]}. First is either [] (meaning that
+%%% the tuple's first element is unknown) or [FirstElement] (the contents of
+%%% the first element).
 %%%
 %%% 'float' means that the register contains a float.
 %%%
@@ -834,7 +842,7 @@ tdb_copy(Literal, D, Ts) ->
 	       {literal,#{}} -> map;
 	       {literal,Tuple} when tuple_size(Tuple) >= 1 ->
 		   Lit = tag_literal(element(1, Tuple)),
-		   {tuple,tuple_size(Tuple),[Lit]};
+		   {tuple,exact_size,tuple_size(Tuple),[Lit]};
 	       _ -> term
 	   end,
     if
@@ -856,14 +864,14 @@ tag_literal(Lit) -> {literal,Lit}.
 %%  Updates a type database.  If a 'kill' operation is given, the type
 %%  information for that register will be removed from the database.
 %%  A kill operation takes precedence over other operations for the same
-%%  register (i.e. [{{x,0},kill},{{x,0},{tuple,5,[]}}] means that the
+%%  register (i.e. [{{x,0},kill},{{x,0},{tuple,min_size,5,[]}}] means that the
 %%  the existing type information, if any, will be discarded, and the
-%%  the '{tuple,5,[]}' information ignored.
+%%  the '{tuple,min_size,5,[]}' information ignored.
 %%
 %%  If NewInfo information is given and there exists information about
 %%  the register, the old and new type information will be merged.
-%%  For instance, {tuple,5,_} and {tuple,10,_} will be merged to produce
-%%  {tuple,10,_}.
+%%  For instance, {tuple,min_size,5,_} and {tuple,min_size,10,_} will be merged
+%%  to produce {tuple,min_size,10,_}.
 
 tdb_update(Uis0, Ts0) ->
     Uis1 = filter(fun ({{x,_},_Op}) -> true;
@@ -901,16 +909,20 @@ tdb_kill_xregs([]) -> [].
     
 remove_key(Key, [{Key,_Op}|Ops]) -> remove_key(Key, Ops);
 remove_key(_, Ops) -> Ops.
-    
+
 merge_type_info(I, I) -> I;
-merge_type_info({tuple,Sz1,Same}, {tuple,Sz2,Same}=Max) when Sz1 < Sz2 ->
+merge_type_info({tuple,min_size,Sz1,Same}, {tuple,min_size,Sz2,Same}=Max) when Sz1 < Sz2 ->
     Max;
-merge_type_info({tuple,Sz1,Same}=Max, {tuple,Sz2,Same}) when Sz1 > Sz2 ->
+merge_type_info({tuple,min_size,Sz1,Same}=Max, {tuple,min_size,Sz2,Same}) when Sz1 > Sz2 ->
     Max;
-merge_type_info({tuple,Sz1,[]}, {tuple,_Sz2,First}=Tuple2) ->
-    merge_type_info({tuple,Sz1,First}, Tuple2);
-merge_type_info({tuple,_Sz1,First}=Tuple1, {tuple,Sz2,_}) ->
-    merge_type_info(Tuple1, {tuple,Sz2,First});
+merge_type_info({tuple,exact_size,_,Same}=Exact, {tuple,_,_,Same}) ->
+    Exact;
+merge_type_info({tuple,_,_,Same},{tuple,exact_size,_,Same}=Exact) ->
+    Exact;
+merge_type_info({tuple,SzKind1,Sz1,[]}, {tuple,_SzKind2,_Sz2,First}=Tuple2) ->
+    merge_type_info({tuple,SzKind1,Sz1,First}, Tuple2);
+merge_type_info({tuple,_SzKind1,_Sz1,First}=Tuple1, {tuple,SzKind2,Sz2,_}) ->
+    merge_type_info(Tuple1, {tuple,SzKind2,Sz2,First});
 merge_type_info(integer, {integer,_}=Int) ->
     Int;
 merge_type_info({integer,_}=Int, integer) ->
@@ -928,7 +940,7 @@ verify_type({integer,{Min,Max}})
   when is_integer(Min), is_integer(Max) -> ok;
 verify_type(map) -> ok;
 verify_type(nonempty_list) -> ok;
-verify_type({tuple,Sz,[]}) when is_integer(Sz) -> ok;
-verify_type({tuple,Sz,[_]}) when is_integer(Sz) -> ok;
+verify_type({tuple,_,Sz,[]}) when is_integer(Sz) -> ok;
+verify_type({tuple,_,Sz,[_]}) when is_integer(Sz) -> ok;
 verify_type({tuple_element,_,_}) -> ok;
 verify_type(float) -> ok.
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
index 60221578bd..5333925589 100644
--- a/lib/compiler/src/beam_utils.erl
+++ b/lib/compiler/src/beam_utils.erl
@@ -25,7 +25,7 @@
 	 is_not_used/3,usage/3,
 	 empty_label_index/0,index_label/3,index_labels/1,replace_labels/4,
 	 code_at/2,bif_to_test/3,is_pure_test/1,
-	 live_opt/1,delete_live_annos/1,combine_heap_needs/2,
+	 live_opt/1,delete_annos/1,combine_heap_needs/2,
          anno_defs/1,
 	 split_even/1
         ]).
@@ -95,7 +95,7 @@ is_killed_block({x,X}, [{set,_,_,{alloc,Live,_}}|_]) ->
     X >= Live;
 is_killed_block(R, [{set,Ds,Ss,_Op}|Is]) ->
     not member(R, Ss) andalso (member(R, Ds) orelse is_killed_block(R, Is));
-is_killed_block(R, [{'%live',_,Regs}|Is]) ->
+is_killed_block(R, [{'%anno',{used,Regs}}|Is]) ->
     case R of
 	{x,X} when (Regs bsr X) band 1 =:= 0 -> true;
 	_ -> is_killed_block(R, Is)
@@ -118,6 +118,7 @@ is_killed(R, Is, D) ->
     St = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness(R, Is, St) of
 	{killed,_} -> true;
+	{exit_not_used,_} -> true;
 	{_,_} -> false
     end.
 
@@ -130,6 +131,7 @@ is_killed_at(R, Lbl, D) when is_integer(Lbl) ->
     St0 = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness_at(R, Lbl, St0) of
 	{killed,_} -> true;
+	{exit_not_used,_} -> true;
 	{_,_} -> false
     end.
 
@@ -146,6 +148,7 @@ is_not_used(R, Is, D) ->
     St = #live{lbl=D,res=gb_trees:empty()},
     case check_liveness(R, Is, St) of
 	{used,_} -> false;
+	{exit_not_used,_} -> false;
 	{_,_} -> true
     end.
 
@@ -267,7 +270,7 @@ is_pure_test({test,Op,_,Ops}) ->
 %%  Go through the instruction sequence in reverse execution
 %%  order, keep track of liveness and remove 'move' instructions
 %%  whose destination is a register that will not be used.
-%%  Also insert {'%live',Live,Regs} annotations at the beginning
+%%  Also insert {used,Regs} annotations at the beginning
 %%  and end of each block.
 
 -spec live_opt([instruction()]) -> [instruction()].
@@ -282,22 +285,22 @@ live_opt(Is0) ->
     Bef ++ [Fi|live_opt(reverse(Is), 0, D, [])].
 
 
-%% delete_live_annos([Instruction]) -> [Instruction].
-%%  Delete all live annotations.
+%% delete_annos([Instruction]) -> [Instruction].
+%%  Delete all annotations.
 
--spec delete_live_annos([instruction()]) -> [instruction()].
+-spec delete_annos([instruction()]) -> [instruction()].
 
-delete_live_annos([{block,Bl0}|Is]) ->
-    case delete_live_annos(Bl0) of
-	[] -> delete_live_annos(Is);
-	[_|_]=Bl -> [{block,Bl}|delete_live_annos(Is)]
+delete_annos([{block,Bl0}|Is]) ->
+    case delete_annos(Bl0) of
+	[] -> delete_annos(Is);
+	[_|_]=Bl -> [{block,Bl}|delete_annos(Is)]
     end;
-delete_live_annos([{'%live',_,_}|Is]) ->
-    delete_live_annos(Is);
-delete_live_annos([I|Is]) ->
-    [I|delete_live_annos(Is)];
-delete_live_annos([]) -> [].
-    
+delete_annos([{'%anno',_}|Is]) ->
+    delete_annos(Is);
+delete_annos([I|Is]) ->
+    [I|delete_annos(Is)];
+delete_annos([]) -> [].
+
 %% combine_heap_needs(HeapNeed1, HeapNeed2) -> HeapNeed
 %%  Combine the heap need for two allocation instructions.
 
@@ -309,11 +312,11 @@ delete_live_annos([]) -> [].
 combine_heap_needs(H1, H2) when is_integer(H1), is_integer(H2) ->
     H1 + H2;
 combine_heap_needs(H1, H2) ->
-    combine_alloc_lists([H1,H2]).
+    {alloc,combine_alloc_lists([H1,H2])}.
 
 
 %% anno_defs(Instructions) -> Instructions'
-%%  Add {'%def',RegisterBitmap} annotations to the beginning of
+%%  Add {def,RegisterBitmap} annotations to the beginning of
 %%  each block.  Iff bit X is set in the the bitmap, it means
 %%  that {x,X} is defined when the block is entered.
 
@@ -347,6 +350,9 @@ split_even(Rs) -> split_even(Rs, [], []).
 %%
 %%    killed - Reg is assigned or killed by an allocation instruction.
 %%    not_used - the value of Reg is not used, but Reg must not be garbage
+%%    exit_not_used - the value of Reg is not used, but must not be garbage
+%%                    because the stack will be scanned because an
+%%                    exit BIF will raise an exception
 %%    used - Reg is used
 
 check_liveness(R, [{block,Blk}|Is], St0) ->
@@ -370,6 +376,8 @@ check_liveness(R, [{test,_,{f,Fail},As}|Is], St0) ->
 	    case check_liveness_at(R, Fail, St0) of
 		{killed,St1} ->
 		    check_liveness(R, Is, St1);
+		{exit_not_used,St1} ->
+		    check_liveness(R, Is, St1);
 		{not_used,St1} ->
 		    not_used(check_liveness(R, Is, St1));
 		{used,_}=Used ->
@@ -389,6 +397,8 @@ check_liveness(R, [{jump,{f,F}}|_], St) ->
     check_liveness_at(R, F, St);
 check_liveness(R, [{case_end,Used}|_], St) -> 
     check_liveness_ret(R, Used, St);
+check_liveness(R, [{try_case_end,Used}|_], St) ->
+    check_liveness_ret(R, Used, St);
 check_liveness(R, [{badmatch,Used}|_], St) ->
     check_liveness_ret(R, Used, St);
 check_liveness(_, [if_end|_], St) ->
@@ -432,7 +442,7 @@ check_liveness(R, [{bs_init,_,_,Live,Ss,Dst}|Is], St) ->
 		false ->
 		    if
 			R =:= Dst -> {killed,St};
-			true -> check_liveness(R, Is, St)
+			true -> not_used(check_liveness(R, Is, St))
 		    end
 	    end
     end;
@@ -466,7 +476,7 @@ check_liveness(R, [{call_ext,Live,_}=I|Is], St) ->
 		    %% We must make sure we don't check beyond this
 		    %% instruction or we will fall through into random
 		    %% unrelated code and get stuck in a loop.
-		    {killed,St}
+		    {exit_not_used,St}
 	    end
     end;
 check_liveness(R, [{call_fun,Live}|Is], St) ->
@@ -514,16 +524,12 @@ check_liveness(R, [{make_fun2,_,_,_,NumFree}|Is], St) ->
 	{x,_} -> {killed,St};
 	{y,_} -> not_used(check_liveness(R, Is, St))
     end;
-check_liveness({x,_}=R, [{'catch',_,_}|Is], St) ->
-    %% All x registers will be killed if an exception occurs.
-    %% Therefore we only need to check the liveness for the
-    %% instructions following the catch instruction.
-    check_liveness(R, Is, St);
-check_liveness({x,_}=R, [{'try',_,_}|Is], St) ->
-    %% All x registers will be killed if an exception occurs.
-    %% Therefore we only need to check the liveness for the
-    %% instructions inside the 'try' block.
-    check_liveness(R, Is, St);
+check_liveness(R, [{'catch'=Op,Y,Fail}|Is], St) ->
+    Set = {set,[Y],[],{try_catch,Op,Fail}},
+    check_liveness(R, [{block,[Set]}|Is], St);
+check_liveness(R, [{'try'=Op,Y,Fail}|Is], St) ->
+    Set = {set,[Y],[],{try_catch,Op,Fail}},
+    check_liveness(R, [{block,[Set]}|Is], St);
 check_liveness(R, [{try_end,Y}|Is], St) ->
     case R of
 	Y ->
@@ -584,6 +590,12 @@ check_liveness(R, [{get_map_elements,{f,Fail},S,{list,L}}|Is], St0) ->
 check_liveness(R, [{put_map,F,Op,S,D,Live,{list,Puts}}|Is], St) ->
     Set = {set,[D],[S|Puts],{alloc,Live,{put_map,Op,F}}},
     check_liveness(R, [{block,[Set]}||Is], St);
+check_liveness(R, [{put_tuple,Ar,D}|Is], St) ->
+    Set = {set,[D],[],{put_tuple,Ar}},
+    check_liveness(R, [{block,[Set]}||Is], St);
+check_liveness(R, [{put_list,S1,S2,D}|Is], St) ->
+    Set = {set,[D],[S1,S2],put_list},
+    check_liveness(R, [{block,[Set]}||Is], St);
 check_liveness(R, [{test_heap,N,Live}|Is], St) ->
     I = {block,[{set,[],[],{alloc,Live,{nozero,nostack,N,[]}}}]},
     check_liveness(R, [I|Is], St);
@@ -597,6 +609,12 @@ check_liveness(R, [remove_message|Is], St) ->
     check_liveness(R, Is, St);
 check_liveness({x,X}, [build_stacktrace|_], St) when X > 0 ->
     {killed,St};
+check_liveness(R, [{recv_mark,_}|Is], St) ->
+    check_liveness(R, Is, St);
+check_liveness(R, [{recv_set,_}|Is], St) ->
+    check_liveness(R, Is, St);
+check_liveness(R, [{'%',_}|Is], St) ->
+    check_liveness(R, Is, St);
 check_liveness(_R, Is, St) when is_list(Is) ->
     %% Not implemented. Conservatively assume that the register is used.
     {used,St}.
@@ -631,6 +649,7 @@ check_liveness_at(R, Lbl, #live{lbl=Ll,res=ResMemorized}=St0) ->
 	    {Res,St#live{res=gb_trees:insert(Lbl, Res, St#live.res)}}
     end.
 
+not_used({exit_not_used,St}) -> {not_used,St};
 not_used({killed,St}) -> {not_used,St};
 not_used({_,_}=Res) -> Res.
 
@@ -649,7 +668,7 @@ check_liveness_ret(_, _, St) -> {killed,St}.
 %%    alloc_used - Used only in an allocate instruction
 %%    used - Reg is explicitly used by an instruction
 %%
-%%  '%live' annotations are not allowed.
+%%  Annotations are not allowed.
 %%
 %%  (Unknown instructions will cause an exception.)
 
@@ -659,13 +678,30 @@ check_liveness_block({x,X}=R, [{set,Ds,Ss,{alloc,Live,Op}}|Is], St0) ->
 	    {killed,St0};
 	true ->
 	    case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
-		{killed,St} -> {not_used,St};
                 {transparent,St} -> {alloc_used,St};
-		{_,_}=Res -> Res
+		{_,_}=Res -> not_used(Res)
 	    end
     end;
-check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St) ->
-    check_liveness_block_1(R, Ss, Ds, Op, Is, St);
+check_liveness_block({y,_}=R, [{set,Ds,Ss,{alloc,_Live,Op}}|Is], St0) ->
+    case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
+        {transparent,St} -> {alloc_used,St};
+        {_,_}=Res -> not_used(Res)
+    end;
+check_liveness_block({y,_}=R, [{set,Ds,Ss,{try_catch,_,Op}}|Is], St0) ->
+    case Ds of
+        [R] ->
+            {killed,St0};
+        _ ->
+            case check_liveness_block_1(R, Ss, Ds, Op, Is, St0) of
+                {exit_not_used,St} ->
+                    {used,St};
+                {transparent,St} ->
+                    %% Conservatively assumed that it is used.
+                    {used,St};
+                {_,_}=Res ->
+                    Res
+            end
+    end;
 check_liveness_block(R, [{set,Ds,Ss,Op}|Is], St) ->
     check_liveness_block_1(R, Ss, Ds, Op, Is, St);
 check_liveness_block(_, [], St) -> {transparent,St}.
@@ -681,6 +717,11 @@ check_liveness_block_1(R, Ss, Ds, Op, Is, St0) ->
 			true -> {killed,St};
 			false -> check_liveness_block(R, Is, St)
 		    end;
+		{exit_not_used,St} ->
+		    case member(R, Ds) of
+			true -> {exit_not_used,St};
+			false -> check_liveness_block(R, Is, St)
+		    end;
 		{not_used,St} ->
 		    not_used(case member(R, Ds) of
 				 true -> {killed,St};
@@ -824,12 +865,14 @@ live_opt([{test,bs_start_match2,Fail,Live,[Src,_],_}=I|Is], _, D, Acc) ->
 
 %% Other instructions.
 live_opt([{block,Bl0}|Is], Regs0, D, Acc) ->
-    Live0 = {'%live',live_regs(Regs0),Regs0},
+    Live0 = make_anno({used,Regs0}),
     {Bl,Regs} = live_opt_block(reverse(Bl0), Regs0, D, [Live0]),
-    Live = {'%live',live_regs(Regs),Regs},
+    Live = make_anno({used,Regs}),
     live_opt(Is, Regs, D, [{block,[Live|Bl]}|Acc]);
 live_opt([build_stacktrace=I|Is], _, D, Acc) ->
     live_opt(Is, live_call(1), D, [I|Acc]);
+live_opt([raw_raise=I|Is], _, D, Acc) ->
+    live_opt(Is, live_call(3), D, [I|Acc]);
 live_opt([{label,L}=I|Is], Regs, D0, Acc) ->
     D = gb_trees:insert(L, Regs, D0),
     live_opt(Is, Regs, D, [I|Acc]);
@@ -871,7 +914,8 @@ live_opt([{test,_,Fail,Live,Ss,_}=I|Is], _, D, Acc) ->
     Regs1 = x_live(Ss, Regs0),
     Regs = live_join_label(Fail, D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
-live_opt([{select,_,Src,Fail,List}=I|Is], Regs0, D, Acc) ->
+live_opt([{select,_,Src,Fail,List}=I|Is], _, D, Acc) ->
+    Regs0 = 0,
     Regs1 = x_live([Src], Regs0),
     Regs = live_join_labels([Fail|List], D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
@@ -894,6 +938,25 @@ live_opt([{get_map_elements,Fail,Src,{list,List}}=I|Is], Regs0, D, Acc) ->
     Regs1 = x_live([Src|Ss], x_dead(Ds, Regs0)),
     Regs = live_join_label(Fail, D, Regs1),
     live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{gc_bif,N,F,R,As,Dst}=I|Is], Regs0, D, Acc) ->
+    Bl = [{set,[Dst],As,{alloc,R,{gc_bif,N,F}}}],
+    {_,Regs} = live_opt_block(Bl, Regs0, D, []),
+    live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{bif,N,F,As,Dst}=I|Is], Regs0, D, Acc) ->
+    Bl = [{set,[Dst],As,{bif,N,F}}],
+    {_,Regs} = live_opt_block(Bl, Regs0, D, []),
+    live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{get_tuple_element,Src,Idx,Dst}=I|Is], Regs0, D, Acc) ->
+    Bl = [{set,[Dst],[Src],{get_tuple_element,Idx}}],
+    {_,Regs} = live_opt_block(Bl, Regs0, D, []),
+    live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{move,Src,Dst}=I|Is], Regs0, D, Acc) ->
+    Regs = x_live([Src], x_dead([Dst], Regs0)),
+    live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{put_map,F,Op,S,Dst,R,{list,Puts}}=I|Is], Regs0, D, Acc) ->
+    Bl = [{set,[Dst],[S|Puts],{alloc,R,{put_map,Op,F}}}],
+    {_,Regs} = live_opt_block(Bl, Regs0, D, []),
+    live_opt(Is, Regs, D, [I|Acc]);
 
 %% Transparent instructions - they neither use nor modify x registers.
 live_opt([{deallocate,_}=I|Is], Regs, D, Acc) ->
@@ -910,6 +973,10 @@ live_opt([{wait_timeout,_,{Tag,_}}=I|Is], Regs, D, Acc) when Tag =/= x ->
     live_opt(Is, Regs, D, [I|Acc]);
 live_opt([{line,_}=I|Is], Regs, D, Acc) ->
     live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{'catch',_,_}=I|Is], Regs, D, Acc) ->
+    live_opt(Is, Regs, D, [I|Acc]);
+live_opt([{'try',_,_}=I|Is], Regs, D, Acc) ->
+    live_opt(Is, Regs, D, [I|Acc]);
 
 %% The following instructions can occur if the "compilation" has been
 %% started from a .S file using the 'from_asm' option.
@@ -940,7 +1007,7 @@ live_opt_block([{set,Ds,Ss,Op0}|Is], Regs0, D, Acc) ->
         _ ->
             live_opt_block(Is, Regs, D, [I|Acc])
     end;
-live_opt_block([{'%live',_,_}|Is], Regs, D, Acc) ->
+live_opt_block([{'%anno',_}|Is], Regs, D, Acc) ->
     live_opt_block(Is, Regs, D, Acc);
 live_opt_block([], Regs, _, Acc) -> {Acc,Regs}.
 
@@ -1015,7 +1082,7 @@ defs([{bif,_,{f,Fail},_Src,Dst}=I|Is], Regs0, D) ->
     [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
 defs([{block,Block0}|Is], Regs0, D0) ->
     {Block,Regs,D} = defs_list(Block0, Regs0, D0),
-    [{block,[{'%def',Regs0}|Block]}|defs(Is, Regs, D)];
+    [{block,[make_anno({def,Regs0})|Block]}|defs(Is, Regs, D)];
 defs([{bs_init,{f,L},_,_,_,Dst}=I|Is], Regs0, D) ->
     Regs = def_regs([Dst], Regs0),
     [I|defs(Is, Regs, update_regs(L, Regs, D))];
@@ -1077,6 +1144,8 @@ defs([{move,_,Dst}=I|Is], Regs0, D) ->
 defs([{put_map,{f,Fail},_,_,Dst,_,_}=I|Is], Regs0, D) ->
     Regs = def_regs([Dst], Regs0),
     [I|defs(Is, Regs, update_regs(Fail, Regs0, D))];
+defs([raw_raise=I|Is], _Regs, D) ->
+    [I|defs(Is, 1, D)];
 defs([return=I|Is], _Regs, D) ->
     [I|defs_unreachable(Is, D)];
 defs([{select,_,_Src,Fail,List}=I|Is], Regs, D0) ->
@@ -1205,3 +1274,13 @@ update_regs(L, Regs0, D) ->
 all_defined(Live, Regs) ->
     All = (1 bsl Live) - 1,
     Regs band All =:= All.
+
+%%%
+%%% Utilities.
+%%%
+
+%% make_anno(Anno) -> WrappedAnno.
+%%  Wrap an annotation term.
+
+make_anno(Anno) ->
+    {'%anno',Anno}.
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
index 2ad9747940..22ceef097c 100644
--- a/lib/compiler/src/beam_validator.erl
+++ b/lib/compiler/src/beam_validator.erl
@@ -524,15 +524,18 @@ valfun_4({bif,element,{f,Fail},[Pos,Tuple],Dst}, Vst0) ->
 valfun_4({bif,raise,{f,0},Src,_Dst}, Vst) ->
     validate_src(Src, Vst),
     kill_state(Vst);
+valfun_4(raw_raise=I, Vst) ->
+    call(I, 3, Vst);
 valfun_4({bif,Op,{f,Fail},Src,Dst}, Vst0) ->
     validate_src(Src, Vst0),
     Vst = branch_state(Fail, Vst0),
     Type = bif_type(Op, Src, Vst),
     set_type_reg(Type, Dst, Vst);
 valfun_4({gc_bif,Op,{f,Fail},Live,Src,Dst}, #vst{current=St0}=Vst0) ->
+    verify_live(Live, Vst0),
+    verify_y_init(Vst0),
     St = kill_heap_allocation(St0),
     Vst1 = Vst0#vst{current=St},
-    verify_live(Live, Vst1),
     Vst2 = branch_state(Fail, Vst1),
     Vst = prune_x_regs(Live, Vst2),
     validate_src(Src, Vst),
@@ -686,6 +689,7 @@ valfun_4({bs_utf16_size,{f,Fail},A,Dst}, Vst) ->
     set_type_reg({integer,[]}, Dst, branch_state(Fail, Vst));
 valfun_4({bs_init2,{f,Fail},Sz,Heap,Live,_,Dst}, Vst0) ->
     verify_live(Live, Vst0),
+    verify_y_init(Vst0),
     if
 	is_integer(Sz) ->
 	    ok;
@@ -698,6 +702,7 @@ valfun_4({bs_init2,{f,Fail},Sz,Heap,Live,_,Dst}, Vst0) ->
     set_type_reg(binary, Dst, Vst);
 valfun_4({bs_init_bits,{f,Fail},Sz,Heap,Live,_,Dst}, Vst0) ->
     verify_live(Live, Vst0),
+    verify_y_init(Vst0),
     if
 	is_integer(Sz) ->
 	    ok;
@@ -710,6 +715,7 @@ valfun_4({bs_init_bits,{f,Fail},Sz,Heap,Live,_,Dst}, Vst0) ->
     set_type_reg(binary, Dst, Vst);
 valfun_4({bs_append,{f,Fail},Bits,Heap,Live,_Unit,Bin,_Flags,Dst}, Vst0) ->
     verify_live(Live, Vst0),
+    verify_y_init(Vst0),
     assert_term(Bits, Vst0),
     assert_term(Bin, Vst0),
     Vst1 = heap_alloc(Heap, Vst0),
@@ -945,6 +951,7 @@ deallocate(#vst{current=St}=Vst) ->
 
 test_heap(Heap, Live, Vst0) ->
     verify_live(Live, Vst0),
+    verify_y_init(Vst0),
     Vst = prune_x_regs(Live, Vst0),
     heap_alloc(Heap, Vst).
 
diff --git a/lib/compiler/src/compile.erl b/lib/compiler/src/compile.erl
index 770aa2c6c1..1409c358c2 100644
--- a/lib/compiler/src/compile.erl
+++ b/lib/compiler/src/compile.erl
@@ -775,6 +775,8 @@ asm_passes() ->
 	 {iff,drecv,{listing,"recv"}},
 	 {unless,no_record_opt,{pass,beam_record}},
 	 {iff,drecord,{listing,"record"}},
+         {unless,no_blk2,?pass(block2)},
+	 {iff,dblk2,{listing,"block2"}},
 	 {unless,no_stack_trimming,{pass,beam_trim}},
 	 {iff,dtrim,{listing,"trim"}},
 	 {pass,beam_flatten}]},
@@ -1350,6 +1352,10 @@ v3_kernel(Code0, #compile{options=Opts,warnings=Ws0}=St) ->
 	    {ok,Code,St}
     end.
 
+block2(Code0, #compile{options=Opts}=St) ->
+    {ok,Code} = beam_block:module(Code0, [no_blockify|Opts]),
+    {ok,Code,St}.
+
 test_old_inliner(#compile{options=Opts}) ->
     %% The point of this test is to avoid loading the old inliner
     %% if we know that it will not be used.
diff --git a/lib/compiler/src/genop.tab b/lib/compiler/src/genop.tab
index 397e478e1e..d59bb241a8 100755
--- a/lib/compiler/src/genop.tab
+++ b/lib/compiler/src/genop.tab
@@ -554,3 +554,13 @@ BEAM_FORMAT_NUMBER=0
 ##       Do a garbage collection if necessary to allocate space on the heap
 ##       for the result.
 160: build_stacktrace/0
+
+## @spec raw_raise
+## @doc  This instruction works like the erlang:raise/3 BIF, except that the
+##       stacktrace in x(2) must be a raw stacktrace.
+##       x(0) is the class of the exception (error, exit, or throw),
+##       x(1) is the exception term, and x(2) is the raw stackframe.
+##       If x(0) is not a valid class, the instruction will not throw an
+##       exception, but store the atom 'badarg' in x(0) and execute the
+##       next instruction.
+161: raw_raise/0
diff --git a/lib/compiler/src/sys_core_bsm.erl b/lib/compiler/src/sys_core_bsm.erl
index 37e071fafa..65580f79e3 100644
--- a/lib/compiler/src/sys_core_bsm.erl
+++ b/lib/compiler/src/sys_core_bsm.erl
@@ -24,7 +24,7 @@
 -export([module/2,format_error/1]).
 
 -include("core_parse.hrl").
--import(lists, [member/2,nth/2,reverse/1,usort/1]).
+-import(lists, [member/2,reverse/1,usort/1]).
 
 -spec module(cerl:c_module(), [compile:option()]) -> {'ok', cerl:c_module()}.
 
@@ -59,13 +59,6 @@ format_error(bin_opt_alias) ->
 format_error(bin_partition) ->
     "INFO: matching non-variables after a previous clause matching a variable "
 	"will prevent delayed sub binary optimization";
-format_error(bin_left_var_used_in_guard) ->
-    "INFO: a variable to the left of the binary pattern is used in a guard; "
-	"will prevent delayed sub binary optimization";
-format_error(bin_argument_order) ->
-    "INFO: matching anything else but a plain variable to the left of "
-	"binary pattern will prevent delayed sub binary optimization; "
-	"SUGGEST changing argument order";
 format_error(bin_var_used) ->
     "INFO: using a matched out sub binary will prevent "
 	"delayed sub binary optimization";
@@ -96,46 +89,41 @@ bsm_an(#c_case{arg=#c_values{es=Es}}=Case) ->
 bsm_an(Other) ->
     {ok,Other}.
 
-bsm_an_1(Vs, #c_case{clauses=Cs}=Case) ->
-    case bsm_leftmost(Cs) of
-	none -> {ok,Case};
-	Pos -> bsm_an_2(Vs, Cs, Case, Pos)
-    end.
-
-bsm_an_2(Vs, Cs, Case, Pos) ->
-    case bsm_nonempty(Cs, Pos) of
-	true -> bsm_an_3(Vs, Cs, Case, Pos);
-	false -> {ok,Case}
+bsm_an_1(Vs0, #c_case{clauses=Cs0}=Case) ->
+    case bsm_leftmost(Cs0) of
+	none ->
+            {ok,Case};
+        1 ->
+            bsm_an_2(Vs0, Cs0, Case);
+        Pos ->
+            Vs = move_from_col(Pos, Vs0),
+            Cs = [C#c_clause{pats=move_from_col(Pos, Ps)} ||
+                     #c_clause{pats=Ps}=C <- Cs0],
+            bsm_an_2(Vs, Cs, Case)
     end.
 
-bsm_an_3(Vs, Cs, Case, Pos) ->
+bsm_an_2(Vs, Cs, Case) ->
     try
-	bsm_ensure_no_partition(Cs, Pos),
-	{ok,bsm_do_an(Vs, Pos, Cs, Case)}
+        bsm_ensure_no_partition(Cs),
+        {ok,bsm_do_an(Vs, Cs, Case)}
     catch
-	throw:{problem,Where,What} ->
-	    {ok,Case,{Where,What}}
+        throw:{problem,Where,What} ->
+            {ok,Case,{Where,What}}
     end.
 
-bsm_do_an(Vs0, Pos, Cs0, Case) ->
-    case nth(Pos, Vs0) of
-	#c_var{name=Vname}=V0 ->
-	    Cs = bsm_do_an_var(Vname, Pos, Cs0, []),
-	    V = bsm_annotate_for_reuse(V0),
-	    Bef = lists:sublist(Vs0, Pos-1),
-	    Aft = lists:nthtail(Pos, Vs0),
-	    case Bef ++ [V|Aft] of
-		[_] ->
-		    Case#c_case{arg=V,clauses=Cs};
-		Vs ->
-		    Case#c_case{arg=#c_values{es=Vs},clauses=Cs}
-	    end;
-	_ ->
-	    Case
-    end.
+move_from_col(Pos, L) ->
+    {First,[Col|Rest]} = lists:split(Pos - 1, L),
+    [Col|First] ++ Rest.
 
-bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) ->
-    case nth(S, Ps) of
+bsm_do_an([#c_var{name=Vname}=V0|Vs0], Cs0, Case) ->
+    Cs = bsm_do_an_var(Vname, Cs0),
+    V = bsm_annotate_for_reuse(V0),
+    Vs = core_lib:make_values([V|Vs0]),
+    Case#c_case{arg=Vs,clauses=Cs};
+bsm_do_an(_Vs, _Cs, Case) -> Case.
+
+bsm_do_an_var(V, [#c_clause{pats=[P|_],guard=G,body=B0}=C0|Cs]) ->
+    case P of
 	#c_var{name=VarName} ->
 	    case core_lib:is_var_used(V, G) of
 		true -> bsm_problem(C0, orig_bin_var_used_in_guard);
@@ -148,23 +136,23 @@ bsm_do_an_var(V, S, [#c_clause{pats=Ps,guard=G,body=B0}=C0|Cs], Acc) ->
 	    B1 = bsm_maybe_ctx_to_binary(VarName, B0),
 	    B = bsm_maybe_ctx_to_binary(V, B1),
 	    C = C0#c_clause{body=B},
-	    bsm_do_an_var(V, S, Cs, [C|Acc]);
-	#c_alias{}=P ->
+            [C|bsm_do_an_var(V, Cs)];
+        #c_alias{} ->
 	    case bsm_could_match_binary(P) of
 		false ->
-		    bsm_do_an_var(V, S, Cs, [C0|Acc]);
+		    [C0|bsm_do_an_var(V, Cs)];
 		true ->
 		    bsm_problem(C0, bin_opt_alias)
 	    end;
-	P ->
+        _ ->
 	    case bsm_could_match_binary(P) andalso bsm_is_var_used(V, G, B0) of
 		false ->
-		    bsm_do_an_var(V, S, Cs, [C0|Acc]);
+		    [C0|bsm_do_an_var(V, Cs)];
 		true ->
 		    bsm_problem(C0, bin_var_used)
 	    end
     end;
-bsm_do_an_var(_, _, [], Acc) -> reverse(Acc).
+bsm_do_an_var(_, []) -> [].
 
 bsm_annotate_for_reuse(#c_var{anno=Anno}=Var) ->
     Var#c_var{anno=[reuse_for_context|Anno]}.
@@ -192,131 +180,82 @@ previous_ctx_to_binary(V, Core) ->
     end.
 
 %% bsm_leftmost(Cs) -> none | ArgumentNumber
-%%  Find the leftmost argument that does binary matching. Return
-%%  the number of the argument (1-N).
+%%  Find the leftmost argument that matches a nonempty binary.
+%%  Return either 'none' or the argument number (1-N).
 
 bsm_leftmost(Cs) ->
     bsm_leftmost_1(Cs, none).
 
+bsm_leftmost_1([_|_], 1) ->
+    1;
 bsm_leftmost_1([#c_clause{pats=Ps}|Cs], Pos) ->
     bsm_leftmost_2(Ps, Cs, 1, Pos);
 bsm_leftmost_1([], Pos) -> Pos.
 
 bsm_leftmost_2(_, Cs, Pos, Pos) ->
     bsm_leftmost_1(Cs, Pos);
-bsm_leftmost_2([#c_binary{}|_], Cs, N, _) ->
+bsm_leftmost_2([#c_binary{segments=[_|_]}|_], Cs, N, _) ->
     bsm_leftmost_1(Cs, N);
 bsm_leftmost_2([_|Ps], Cs, N, Pos) ->
     bsm_leftmost_2(Ps, Cs, N+1, Pos);
 bsm_leftmost_2([], Cs, _, Pos) ->
     bsm_leftmost_1(Cs, Pos).
 
-%% bsm_nonempty(Cs, Pos) -> true|false
-%%  Check if at least one of the clauses matches a non-empty
-%%  binary in the given argument position.
+%% bsm_ensure_no_partition(Cs) -> ok     (exception if problem)
+%%  There must only be a single bs_start_match2 instruction if we
+%%  are to reuse the binary variable for the match context.
+%%
+%%  To make sure that there is only a single bs_start_match2
+%%  instruction, we will check for partitions such as:
 %%
-bsm_nonempty([#c_clause{pats=Ps}|Cs], Pos) ->
-    case nth(Pos, Ps) of
-	#c_binary{segments=[_|_]} ->
-	    true;
-	_ ->
-	    bsm_nonempty(Cs, Pos)
-    end;
-bsm_nonempty([], _ ) -> false.
-
-%% bsm_ensure_no_partition(Cs, Pos) -> ok     (exception if problem)
-%%  We must make sure that matching is not partitioned between
-%%  variables like this:
 %%             foo(<<...>>) -> ...
 %%             foo(<Variable>) when ... -> ...
-%%             foo(<Any non-variable pattern>) ->
-%%  If there is such partition, we are not allowed to reuse the binary variable
-%%  for the match context.
+%%             foo(<Non-variable pattern>) ->
 %%
-%%  Also, arguments to the left of the argument that is matched
-%%  against a binary, are only allowed to be simple variables, not
-%%  used in guards. The reason is that we must know that the binary is
-%%  only matched in one place (i.e. there must be only one bs_start_match2
-%%  instruction emitted).
+%%  If there is such partition, we reject the optimization.
 
-bsm_ensure_no_partition(Cs, Pos) ->
-    bsm_ensure_no_partition_1(Cs, Pos, before).
+bsm_ensure_no_partition(Cs) ->
+    bsm_ensure_no_partition_1(Cs, before).
 
 %% Loop through each clause.
-bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], Pos, State0) ->
-    State = bsm_ensure_no_partition_2(Ps, Pos, G, simple_vars, State0),
+bsm_ensure_no_partition_1([#c_clause{pats=Ps,guard=G}|Cs], State0) ->
+    State = bsm_ensure_no_partition_2(Ps, G, State0),
     case State of
 	'after' ->
-	    bsm_ensure_no_partition_after(Cs, Pos);
+	    bsm_ensure_no_partition_after(Cs);
 	_ ->
 	    ok
     end,
-    bsm_ensure_no_partition_1(Cs, Pos, State);
-bsm_ensure_no_partition_1([], _, _) -> ok.
+    bsm_ensure_no_partition_1(Cs, State);
+bsm_ensure_no_partition_1([], _) -> ok.
 
-%% Loop through each pattern for this clause.
-bsm_ensure_no_partition_2([#c_binary{}=Where|_], 1, _, Vstate, State) ->
-    case State of
-	before when Vstate =:= simple_vars -> within;
-	before -> bsm_problem(Where, Vstate);
-	within when Vstate =:= simple_vars -> within;
-	within -> bsm_problem(Where, Vstate)
-    end;
-bsm_ensure_no_partition_2([#c_alias{}=Alias|_], 1, N, Vstate, State) ->
+bsm_ensure_no_partition_2([#c_binary{}|_], _, _State) ->
+    within;
+bsm_ensure_no_partition_2([#c_alias{}=Alias|_], N, State) ->
     %% Retrieve the real pattern that the alias refers to and check that.
     P = bsm_real_pattern(Alias),
-    bsm_ensure_no_partition_2([P], 1, N, Vstate, State);
-bsm_ensure_no_partition_2([_|_], 1, _, _Vstate, before=State) ->
+    bsm_ensure_no_partition_2([P], N, State);
+bsm_ensure_no_partition_2([_|_], _, before=State) ->
     %% No binary matching yet - therefore no partition.
     State;
-bsm_ensure_no_partition_2([P|_], 1, _, Vstate, State) ->
+bsm_ensure_no_partition_2([P|_], _, State) ->
     case bsm_could_match_binary(P) of
 	false ->
-	    %% If clauses can be freely arranged (Vstate =:= simple_vars),
-	    %% a clause that cannot match a binary will not partition the clause.
-	    %% Example:
-	    %%
-	    %% a(Var, <<>>) -> ...
-	    %% a(Var, []) -> ...
-	    %% a(Var, <<B>>) -> ...
-	    %%
-	    %% But if the clauses can't be freely rearranged, as in
-	    %%
-	    %% b(Var, <<X>>) -> ...
-	    %% b(1, 2) -> ...
-	    %%
-	    %% we do have a problem.
-	    %%
-	    case Vstate of
-		simple_vars -> State;
-		_ -> bsm_problem(P, Vstate)
-	    end;
+            State;
 	true ->
 	    %% The pattern P *may* match a binary, so we must update the state.
 	    %% (P must be a variable.)
-	    case State of
-		within -> 'after';
-		'after' -> 'after'
-	    end
-    end;
-bsm_ensure_no_partition_2([#c_var{name=V}|Ps], N, G, Vstate, S) ->
-    case core_lib:is_var_used(V, G) of
-	false ->
-	    bsm_ensure_no_partition_2(Ps, N-1, G, Vstate, S);
-	true ->
-	    bsm_ensure_no_partition_2(Ps, N-1, G, bin_left_var_used_in_guard, S)
-    end;
-bsm_ensure_no_partition_2([_|Ps], N, G, _, S) ->
-    bsm_ensure_no_partition_2(Ps, N-1, G, bin_argument_order, S).
+            'after'
+    end.
 
-bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs], Pos) ->
-    case nth(Pos, Ps) of
-	#c_var{} ->
-	    bsm_ensure_no_partition_after(Cs, Pos);
-	_ ->
-	    bsm_problem(C, bin_partition)
+bsm_ensure_no_partition_after([#c_clause{pats=Ps}=C|Cs]) ->
+    case Ps of
+        [#c_var{}|_] ->
+            bsm_ensure_no_partition_after(Cs);
+        _ ->
+            bsm_problem(C, bin_partition)
     end;
-bsm_ensure_no_partition_after([], _) -> ok.
+bsm_ensure_no_partition_after([]) -> ok.
 
 bsm_could_match_binary(#c_alias{pat=P}) -> bsm_could_match_binary(P);
 bsm_could_match_binary(#c_cons{}) -> false;
diff --git a/lib/compiler/src/sys_core_fold.erl b/lib/compiler/src/sys_core_fold.erl
index e28d48acf5..a9bd363ee1 100644
--- a/lib/compiler/src/sys_core_fold.erl
+++ b/lib/compiler/src/sys_core_fold.erl
@@ -145,14 +145,9 @@ find_fixpoint(OptFun, Core0, Max) ->
 body(Body, Sub) ->
     body(Body, value, Sub).
 
-body(#c_values{anno=A,es=Es0}, Ctxt, Sub) ->
-    Es1 = expr_list(Es0, Ctxt, Sub),
-    case Ctxt of
-	value ->
-	    #c_values{anno=A,es=Es1};
-	effect ->
-	    make_effect_seq(Es1, Sub)
-    end;
+body(#c_values{anno=A,es=Es0}, value, Sub) ->
+    Es1 = expr_list(Es0, value, Sub),
+    #c_values{anno=A,es=Es1};
 body(E, Ctxt, Sub) ->
     ?ASSERT(verify_scope(E, Sub)),
     expr(E, Ctxt, Sub).
@@ -313,9 +308,15 @@ expr(#c_seq{arg=Arg0,body=B0}=Seq0, Ctxt, Sub) ->
 	false ->
 	    %% Arg cannot be "values" here - only a single value
 	    %% make sense here.
-	    case is_safe_simple(Arg, Sub) of
-		true -> B1;
-		false -> Seq0#c_seq{arg=Arg,body=B1}
+            case {Ctxt,is_safe_simple(Arg, Sub)} of
+                {effect,true} -> B1;
+                {effect,false} ->
+                    case is_safe_simple(B1, Sub) of
+                        true -> Arg;
+                        false -> Seq0#c_seq{arg=Arg,body=B1}
+                    end;
+                {value,true} -> B1;
+                {value,false} -> Seq0#c_seq{arg=Arg,body=B1}
 	    end
     end;
 expr(#c_let{}=Let0, Ctxt, Sub) ->
@@ -379,10 +380,7 @@ expr(#c_case{}=Case0, Ctxt, Sub) ->
 	    Case = Case1#c_case{arg=Arg2,clauses=Cs2},
 	    warn_no_clause_match(Case1, Case),
 	    Expr = eval_case(Case, Sub),
-            case move_case_into_arg(Case, Sub) of
-                impossible -> Expr;
-                Other -> Other
-            end;
+            move_case_into_arg(Expr, Sub);
 	Other ->
 	    expr(Other, Ctxt, Sub)
     end;
@@ -2509,6 +2507,72 @@ are_all_failing_clauses(Cs) ->
 is_failing_clause(#c_clause{body=B}) ->
     will_fail(B).
 
+%% opt_build_stacktrace(Let) -> Core.
+%%  If the stacktrace is *only* used in a call to erlang:raise/3,
+%%  there is no need to build a cooked stackframe using build_stacktrace/1.
+
+opt_build_stacktrace(#c_let{vars=[#c_var{name=Cooked}],
+                            arg=#c_primop{name=#c_literal{val=build_stacktrace},
+                                          args=[RawStk]},
+                            body=Body}=Let) ->
+    case Body of
+        #c_call{module=#c_literal{val=erlang},
+                name=#c_literal{val=raise},
+                args=[Class,Exp,#c_var{name=Cooked}]} ->
+            %% The stacktrace is only used in a call to erlang:raise/3.
+            %% There is no need to build the stacktrace. Replace the
+            %% call to erlang:raise/3 with the the raw_raise/3 instruction,
+            %% which will use a raw stacktrace.
+            #c_primop{name=#c_literal{val=raw_raise},
+                      args=[Class,Exp,RawStk]};
+        #c_let{vars=[#c_var{name=V}],arg=Arg,body=B0} when V =/= Cooked ->
+            case core_lib:is_var_used(Cooked, Arg) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% the body of the let.
+                    B = opt_build_stacktrace(Let#c_let{body=B0}),
+                    Body#c_let{body=B};
+                true ->
+                    Let
+            end;
+        #c_seq{arg=Arg,body=B0} ->
+            case core_lib:is_var_used(Cooked, Arg) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% the body of the sequence.
+                    B = opt_build_stacktrace(Let#c_let{body=B0}),
+                    Body#c_seq{body=B};
+                true ->
+                    Let
+            end;
+        #c_case{arg=Arg,clauses=Cs0} ->
+            case core_lib:is_var_used(Cooked, Arg) orelse
+                is_used_in_any_guard(Cooked, Cs0) of
+                false ->
+                    %% The built stacktrace is not used in the argument,
+                    %% so we can sink the building of the stacktrace into
+                    %% each arm of the case.
+                    Cs = [begin
+                              B = opt_build_stacktrace(Let#c_let{body=B0}),
+                              C#c_clause{body=B}
+                          end || #c_clause{body=B0}=C <- Cs0],
+                    Body#c_case{clauses=Cs};
+                true ->
+                    Let
+            end;
+        _ ->
+            Let
+    end;
+opt_build_stacktrace(Expr) ->
+    Expr.
+
+is_used_in_any_guard(V, Cs) ->
+    any(fun(#c_clause{guard=G}) ->
+                core_lib:is_var_used(V, G)
+        end, Cs).
+
 %% opt_case_in_let(Let) -> Let'
 %%  Try to avoid building tuples that are immediately matched.
 %%  A common pattern is:
@@ -2664,53 +2728,94 @@ opt_simple_let_1(#c_let{vars=Vs0,body=B0}=Let, Arg0, Ctxt, Sub0) ->
     %% Optimise let and add new substitutions.
     {Vs,Args,Sub1} = let_substs(Vs0, Arg0, Sub0),
     BodySub = update_let_types(Vs, Args, Sub1),
+    Sub = Sub1#sub{v=[],s=cerl_sets:new()},
     B = body(B0, Ctxt, BodySub),
     Arg = core_lib:make_values(Args),
-    opt_simple_let_2(Let, Vs, Arg, B, B0, Ctxt, Sub1).
+    opt_simple_let_2(Let, Vs, Arg, B, B0, Sub).
 
-opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Ctxt, Sub) ->
+
+%% opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Ctxt, Sub) -> Core.
+%%  Do final simplifications of the let.
+%%
+%%  Note that the substitutions and scope in Sub have been cleared
+%%  and should not be used.
+
+opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Sub) ->
     case {Vs0,Arg0,Body} of
-	{[#c_var{name=N1}],Arg1,#c_var{name=N2}} ->
-	    case N1 =:= N2 of
-		true ->
-		    %% let <Var> = Arg in <Var>  ==>  Arg
-		    Arg1;
-		false ->
-		    %% let <Var> = Arg in <OtherVar>  ==>  seq Arg OtherVar
-		    Arg = maybe_suppress_warnings(Arg1, Vs0, PrevBody),
-		    #c_seq{arg=Arg,body=Body}
-	    end;
+	{[#c_var{name=V}],Arg1,#c_var{name=V}} ->
+            %% let <Var> = Arg in <Var>  ==>  Arg
+            Arg1;
 	{[],#c_values{es=[]},_} ->
 	    %% No variables left.
 	    Body;
-	{Vs,Arg1,#c_literal{}} ->
-	    Arg = maybe_suppress_warnings(Arg1, Vs, PrevBody),
-	    case Ctxt of
-		effect ->
-		    %% Throw away the literal body.
-		    Arg;
-		value ->
-		    %% Since the variable is not used in the body, we
-		    %% can rewrite the let to a sequence.
-		    %%  let <Var> = Arg in Literal ==> seq Arg Literal
-		    #c_seq{arg=Arg,body=Body}
-	    end;
-	{Vs,Arg1,Body} ->
-	    %% If none of the variables are used in the body, we can
-	    %% rewrite the let to a sequence:
-	    %%    let <Var> = Arg in BodyWithoutVar ==>
-	    %%        seq Arg BodyWithoutVar
-	    case is_any_var_used(Vs, Body) of
-		false ->
-		    Arg = maybe_suppress_warnings(Arg1, Vs, PrevBody),
-		    #c_seq{arg=Arg,body=Body};
-		true ->
-		    Let1 = Let0#c_let{vars=Vs,arg=Arg1,body=Body},
-		    opt_bool_case_in_let(Let1, Sub)
+	{[#c_var{name=V}=Var|Vars]=Vars0,Arg1,Body} ->
+            case core_lib:is_var_used(V, Body) of
+                false when Vars =:= [] ->
+                    %% If the variable is not used in the body, we can
+                    %% rewrite the let to a sequence:
+                    %%    let <Var> = Arg in BodyWithoutVar ==>
+                    %%        seq Arg BodyWithoutVar
+                    Arg = maybe_suppress_warnings(Arg1, Var, PrevBody),
+                    #c_seq{arg=Arg,body=Body};
+                false ->
+                    %% There are multiple values returned by the argument
+                    %% and the first value is not used (this is a 'case'
+                    %% with exported variables, but the return value is
+                    %% ignored). We can remove the first variable and the
+                    %% the first value returned from the 'let' argument.
+                    Arg2 = remove_first_value(Arg1, Sub),
+                    Let1 = Let0#c_let{vars=Vars,arg=Arg2,body=Body},
+                    post_opt_let(Let1, Sub);
+                true ->
+                    Let1 = Let0#c_let{vars=Vars0,arg=Arg1,body=Body},
+                    post_opt_let(Let1, Sub)
 	    end
     end.
 
-%% maybe_suppress_warnings(Arg, [#c_var{}], PreviousBody) -> Arg'
+%% post_opt_let(Let, Sub)
+%%  Final optimizations of the let.
+%%
+%%  Note that the substitutions and scope in Sub have been cleared
+%%  and should not be used.
+
+post_opt_let(Let0, Sub) ->
+    Let1 = opt_bool_case_in_let(Let0, Sub),
+    opt_build_stacktrace(Let1).
+
+
+%% remove_first_value(Core0, Sub) -> Core.
+%%  Core0 is an expression that returns at least two values.
+%%  Remove the first value returned from Core0.
+
+remove_first_value(#c_values{es=[V|Vs]}, Sub) ->
+    Values = core_lib:make_values(Vs),
+    case is_safe_simple(V, Sub) of
+        false ->
+            #c_seq{arg=V,body=Values};
+        true ->
+            Values
+    end;
+remove_first_value(#c_case{clauses=Cs0}=Core, Sub) ->
+    Cs = remove_first_value_cs(Cs0, Sub),
+    Core#c_case{clauses=Cs};
+remove_first_value(#c_receive{clauses=Cs0,action=Act0}=Core, Sub) ->
+    Cs = remove_first_value_cs(Cs0, Sub),
+    Act = remove_first_value(Act0, Sub),
+    Core#c_receive{clauses=Cs,action=Act};
+remove_first_value(#c_let{body=B}=Core, Sub) ->
+    Core#c_let{body=remove_first_value(B, Sub)};
+remove_first_value(#c_seq{body=B}=Core, Sub) ->
+    Core#c_seq{body=remove_first_value(B, Sub)};
+remove_first_value(#c_primop{}=Core, _Sub) ->
+    Core;
+remove_first_value(#c_call{}=Core, _Sub) ->
+    Core.
+
+remove_first_value_cs(Cs, Sub) ->
+    [C#c_clause{body=remove_first_value(B, Sub)} ||
+        #c_clause{body=B}=C <- Cs].
+
+%% maybe_suppress_warnings(Arg, #c_var{}, PreviousBody) -> Arg'
 %%  Try to suppress false warnings when a variable is not used.
 %%  For instance, we don't expect a warning for useless building in:
 %%
@@ -2721,12 +2826,12 @@ opt_simple_let_2(Let0, Vs0, Arg0, Body, PrevBody, Ctxt, Sub) ->
 %%  referenced in the original unoptimized code. If they were, we will
 %%  consider the warning false and suppress it.
 
-maybe_suppress_warnings(Arg, Vs, PrevBody) ->
+maybe_suppress_warnings(Arg, #c_var{name=V}, PrevBody) ->
     case should_suppress_warning(Arg) of
 	true ->
 	    Arg;				%Already suppressed.
 	false ->
-	    case is_any_var_used(Vs, PrevBody) of
+	    case core_lib:is_var_used(V, PrevBody) of
 		true ->
 		    suppress_warning([Arg]);
 		false ->
@@ -2815,7 +2920,7 @@ move_case_into_arg(#c_case{arg=#c_case{arg=OuterArg,
 	    Outer#c_case{arg=OuterArg,
 			 clauses=[OuterCa,OuterCb]};
         false ->
-            impossible
+            Inner0
     end;
 move_case_into_arg(#c_case{arg=#c_seq{arg=OuterArg,body=InnerArg}=Outer,
                            clauses=InnerClauses}=Inner, _Sub) ->
@@ -2831,15 +2936,8 @@ move_case_into_arg(#c_case{arg=#c_seq{arg=OuterArg,body=InnerArg}=Outer,
     %%
     Outer#c_seq{arg=OuterArg,
                 body=Inner#c_case{arg=InnerArg,clauses=InnerClauses}};
-move_case_into_arg(_, _) ->
-    impossible.
-
-is_any_var_used([#c_var{name=V}|Vs], Expr) ->
-    case core_lib:is_var_used(V, Expr) of
-	false -> is_any_var_used(Vs, Expr);
-	true -> true
-    end;
-is_any_var_used([], _) -> false.
+move_case_into_arg(Expr, _) ->
+    Expr.
 
 %%%
 %%% Retrieving information about types.
diff --git a/lib/compiler/src/v3_codegen.erl b/lib/compiler/src/v3_codegen.erl
index 8f3399d133..a96d58a903 100644
--- a/lib/compiler/src/v3_codegen.erl
+++ b/lib/compiler/src/v3_codegen.erl
@@ -1855,7 +1855,12 @@ internal_cg(guard_error, [ExitCall], _Rs, Le, Vdb, Bef, St) ->
     {Ms,_} = cg_call_args(As, Bef, Le#l.i, Vdb),
     Call = {call_ext,Arity,{extfunc,Mod,Name,Arity}},
     Is = Ms++[line(Le),Call],
-    {Is,Bef,St}.
+    {Is,Bef,St};
+internal_cg(raw_raise=I, As, Rs, Le, Vdb, Bef, St) ->
+    %% This behaves like a function call.
+    {Sis,Int} = cg_setup_call(As, Bef, Le#l.i, Vdb),
+    Reg = load_vars(Rs, clear_regs(Int#sr.reg)),
+    {Sis++[I],clear_dead(Int#sr{reg=Reg}, Le#l.i, Vdb),St}.
 
 %% bif_cg(Bif, [Arg], [Ret], Le, Vdb, StackReg, State) ->
 %%      {[Ainstr],StackReg,State}.